76 files changed, 33997 insertions, 0 deletions
diff --git a/drivers/staging/lustre/lustre/include/cl_object.h b/drivers/staging/lustre/lustre/include/cl_object.h
new file mode 100644
index 000000000000..4bb68801d3a9
--- /dev/null
+++ b/drivers/staging/lustre/lustre/include/cl_object.h
@@ -0,0 +1,3279 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ */
+#ifndef _LUSTRE_CL_OBJECT_H
+#define _LUSTRE_CL_OBJECT_H
+
+/** \defgroup clio clio
+ *
+ * Client objects implement io operations and cache pages.
+ *
+ * Examples: lov and osc are implementations of cl interface.
+ *
+ * Big Theory Statement.
+ *
+ * Layered objects.
+ *
+ * Client implementation is based on the following data-types:
+ *
+ *   - cl_object
+ *
+ *   - cl_page
+ *
+ *   - cl_lock     represents an extent lock on an object.
+ *
+ *   - cl_io       represents high-level i/o activity such as whole read/write
+ *		 system call, or write-out of pages from under the lock being
+ *		 canceled. cl_io has sub-ios that can be stopped and resumed
+ *		 independently, thus achieving high degree of transfer
+ *		 parallelism. Single cl_io can be advanced forward by
+ *		 the multiple threads (although in the most usual case of
+ *		 read/write system call it is associated with the single user
+ *		 thread, that issued the system call).
+ *
+ *   - cl_req      represents a collection of pages for a transfer. cl_req is
+ *		 constructed by req-forming engine that tries to saturate
+ *		 transport with large and continuous transfers.
+ *
+ * Terminology
+ *
+ *     - to avoid confusion high-level I/O operation like read or write system
+ *     call is referred to as "an io", whereas low-level I/O operation, like
+ *     RPC, is referred to as "a transfer"
+ *
+ *     - "generic code" means generic (not file system specific) code in the
+ *     hosting environment. "cl-code" means code (mostly in cl_*.c files) that
+ *     is not layer specific.
+ *
+ * Locking.
+ *
+ *  - i_mutex
+ *      - PG_locked
+ *	  - cl_object_header::coh_page_guard
+ *	  - cl_object_header::coh_lock_guard
+ *	  - lu_site::ls_guard
+ *
+ * See the top comment in cl_object.c for the description of overall locking and
+ * reference-counting design.
+ *
+ * See comments below for the description of i/o, page, and dlm-locking
+ * design.
+ *
+ * @{
+ */
+
+/*
+ * super-class definitions.
+ */
+#include <lu_object.h>
+#include <lvfs.h>
+#	include <linux/mutex.h>
+#	include <linux/radix-tree.h>
+
+struct inode;
+
+struct cl_device;
+struct cl_device_operations;
+
+struct cl_object;
+struct cl_object_page_operations;
+struct cl_object_lock_operations;
+
+struct cl_page;
+struct cl_page_slice;
+struct cl_lock;
+struct cl_lock_slice;
+
+struct cl_lock_operations;
+struct cl_page_operations;
+
+struct cl_io;
+struct cl_io_slice;
+
+struct cl_req;
+struct cl_req_slice;
+
+/**
+ * Operations for each data device in the client stack.
+ *
+ * \see vvp_cl_ops, lov_cl_ops, lovsub_cl_ops, osc_cl_ops
+ */
+struct cl_device_operations {
+	/**
+	 * Initialize cl_req. This method is called top-to-bottom on all
+	 * devices in the stack to get them a chance to allocate layer-private
+	 * data, and to attach them to the cl_req by calling
+	 * cl_req_slice_add().
+	 *
+	 * \see osc_req_init(), lov_req_init(), lovsub_req_init()
+	 * \see ccc_req_init()
+	 */
+	int (*cdo_req_init)(const struct lu_env *env, struct cl_device *dev,
+			    struct cl_req *req);
+};
+
+/**
+ * Device in the client stack.
+ *
+ * \see ccc_device, lov_device, lovsub_device, osc_device
+ */
+struct cl_device {
+	/** Super-class. */
+	struct lu_device		   cd_lu_dev;
+	/** Per-layer operation vector. */
+	const struct cl_device_operations *cd_ops;
+};
+
+/** \addtogroup cl_object cl_object
+ * @{ */
+/**
+ * "Data attributes" of cl_object. Data attributes can be updated
+ * independently for a sub-object, and top-object's attributes are calculated
+ * from sub-objects' ones.
+ */
+struct cl_attr {
+	/** Object size, in bytes */
+	loff_t cat_size;
+	/**
+	 * Known minimal size, in bytes.
+	 *
+	 * This is only valid when at least one DLM lock is held.
+	 */
+	loff_t cat_kms;
+	/** Modification time. Measured in seconds since epoch. */
+	time_t cat_mtime;
+	/** Access time. Measured in seconds since epoch. */
+	time_t cat_atime;
+	/** Change time. Measured in seconds since epoch. */
+	time_t cat_ctime;
+	/**
+	 * Blocks allocated to this cl_object on the server file system.
+	 *
+	 * \todo XXX An interface for block size is needed.
+	 */
+	__u64  cat_blocks;
+	/**
+	 * User identifier for quota purposes.
+	 */
+	uid_t  cat_uid;
+	/**
+	 * Group identifier for quota purposes.
+	 */
+	gid_t  cat_gid;
+};
+
+/**
+ * Fields in cl_attr that are being set.
+ */
+enum cl_attr_valid {
+	CAT_SIZE   = 1 << 0,
+	CAT_KMS    = 1 << 1,
+	CAT_MTIME  = 1 << 3,
+	CAT_ATIME  = 1 << 4,
+	CAT_CTIME  = 1 << 5,
+	CAT_BLOCKS = 1 << 6,
+	CAT_UID    = 1 << 7,
+	CAT_GID    = 1 << 8
+};
+
+/**
+ * Sub-class of lu_object with methods common for objects on the client
+ * stacks.
+ *
+ * cl_object: represents a regular file system object, both a file and a
+ *    stripe. cl_object is based on lu_object: it is identified by a fid,
+ *    layered, cached, hashed, and lrued. Important distinction with the server
+ *    side, where md_object and dt_object are used, is that cl_object "fans out"
+ *    at the lov/sns level: depending on the file layout, single file is
+ *    represented as a set of "sub-objects" (stripes). At the implementation
+ *    level, struct lov_object contains an array of cl_objects. Each sub-object
+ *    is a full-fledged cl_object, having its fid, living in the lru and hash
+ *    table.
+ *
+ *    This leads to the next important difference with the server side: on the
+ *    client, it's quite usual to have objects with the different sequence of
+ *    layers. For example, typical top-object is composed of the following
+ *    layers:
+ *
+ *	- vvp
+ *	- lov
+ *
+ *    whereas its sub-objects are composed of
+ *
+ *	- lovsub
+ *	- osc
+ *
+ *    layers. Here "lovsub" is a mostly dummy layer, whose purpose is to keep
+ *    track of the object-subobject relationship.
+ *
+ *    Sub-objects are not cached independently: when top-object is about to
+ *    be discarded from the memory, all its sub-objects are torn-down and
+ *    destroyed too.
+ *
+ * \see ccc_object, lov_object, lovsub_object, osc_object
+ */
+struct cl_object {
+	/** super class */
+	struct lu_object		   co_lu;
+	/** per-object-layer operations */
+	const struct cl_object_operations *co_ops;
+	/** offset of page slice in cl_page buffer */
+	int				   co_slice_off;
+};
+
+/**
+ * Description of the client object configuration. This is used for the
+ * creation of a new client object that is identified by a more state than
+ * fid.
+ */
+struct cl_object_conf {
+	/** Super-class. */
+	struct lu_object_conf     coc_lu;
+	union {
+		/**
+		 * Object layout. This is consumed by lov.
+		 */
+		struct lustre_md *coc_md;
+		/**
+		 * Description of particular stripe location in the
+		 * cluster. This is consumed by osc.
+		 */
+		struct lov_oinfo *coc_oinfo;
+	} u;
+	/**
+	 * VFS inode. This is consumed by vvp.
+	 */
+	struct inode	     *coc_inode;
+	/**
+	 * Layout lock handle.
+	 */
+	struct ldlm_lock	 *coc_lock;
+	/**
+	 * Operation to handle layout, OBJECT_CONF_XYZ.
+	 */
+	int			  coc_opc;
+};
+
+enum {
+	/** configure layout, set up a new stripe, must be called while
+	 * holding layout lock. */
+	OBJECT_CONF_SET = 0,
+	/** invalidate the current stripe configuration due to losing
+	 * layout lock. */
+	OBJECT_CONF_INVALIDATE = 1,
+	/** wait for old layout to go away so that new layout can be
+	 * set up. */
+	OBJECT_CONF_WAIT = 2
+};
+
+/**
+ * Operations implemented for each cl object layer.
+ *
+ * \see vvp_ops, lov_ops, lovsub_ops, osc_ops
+ */
+struct cl_object_operations {
+	/**
+	 * Initialize page slice for this layer. Called top-to-bottom through
+	 * every object layer when a new cl_page is instantiated. Layer
+	 * keeping private per-page data, or requiring its own page operations
+	 * vector should allocate these data here, and attach then to the page
+	 * by calling cl_page_slice_add(). \a vmpage is locked (in the VM
+	 * sense). Optional.
+	 *
+	 * \retval NULL success.
+	 *
+	 * \retval ERR_PTR(errno) failure code.
+	 *
+	 * \retval valid-pointer pointer to already existing referenced page
+	 *	 to be used instead of newly created.
+	 */
+	int  (*coo_page_init)(const struct lu_env *env, struct cl_object *obj,
+				struct cl_page *page, struct page *vmpage);
+	/**
+	 * Initialize lock slice for this layer. Called top-to-bottom through
+	 * every object layer when a new cl_lock is instantiated. Layer
+	 * keeping private per-lock data, or requiring its own lock operations
+	 * vector should allocate these data here, and attach then to the lock
+	 * by calling cl_lock_slice_add(). Mandatory.
+	 */
+	int  (*coo_lock_init)(const struct lu_env *env,
+			      struct cl_object *obj, struct cl_lock *lock,
+			      const struct cl_io *io);
+	/**
+	 * Initialize io state for a given layer.
+	 *
+	 * called top-to-bottom once per io existence to initialize io
+	 * state. If layer wants to keep some state for this type of io, it
+	 * has to embed struct cl_io_slice in lu_env::le_ses, and register
+	 * slice with cl_io_slice_add(). It is guaranteed that all threads
+	 * participating in this io share the same session.
+	 */
+	int  (*coo_io_init)(const struct lu_env *env,
+			    struct cl_object *obj, struct cl_io *io);
+	/**
+	 * Fill portion of \a attr that this layer controls. This method is
+	 * called top-to-bottom through all object layers.
+	 *
+	 * \pre cl_object_header::coh_attr_guard of the top-object is locked.
+	 *
+	 * \return   0: to continue
+	 * \return +ve: to stop iterating through layers (but 0 is returned
+	 * from enclosing cl_object_attr_get())
+	 * \return -ve: to signal error
+	 */
+	int (*coo_attr_get)(const struct lu_env *env, struct cl_object *obj,
+			    struct cl_attr *attr);
+	/**
+	 * Update attributes.
+	 *
+	 * \a valid is a bitmask composed from enum #cl_attr_valid, and
+	 * indicating what attributes are to be set.
+	 *
+	 * \pre cl_object_header::coh_attr_guard of the top-object is locked.
+	 *
+	 * \return the same convention as for
+	 * cl_object_operations::coo_attr_get() is used.
+	 */
+	int (*coo_attr_set)(const struct lu_env *env, struct cl_object *obj,
+			    const struct cl_attr *attr, unsigned valid);
+	/**
+	 * Update object configuration. Called top-to-bottom to modify object
+	 * configuration.
+	 *
+	 * XXX error conditions and handling.
+	 */
+	int (*coo_conf_set)(const struct lu_env *env, struct cl_object *obj,
+			    const struct cl_object_conf *conf);
+	/**
+	 * Glimpse ast. Executed when glimpse ast arrives for a lock on this
+	 * object. Layers are supposed to fill parts of \a lvb that will be
+	 * shipped to the glimpse originator as a glimpse result.
+	 *
+	 * \see ccc_object_glimpse(), lovsub_object_glimpse(),
+	 * \see osc_object_glimpse()
+	 */
+	int (*coo_glimpse)(const struct lu_env *env,
+			   const struct cl_object *obj, struct ost_lvb *lvb);
+};
+
+/**
+ * Extended header for client object.
+ */
+struct cl_object_header {
+	/** Standard lu_object_header. cl_object::co_lu::lo_header points
+	 * here. */
+	struct lu_object_header  coh_lu;
+	/** \name locks
+	 * \todo XXX move locks below to the separate cache-lines, they are
+	 * mostly useless otherwise.
+	 */
+	/** @{ */
+	/** Lock protecting page tree. */
+	spinlock_t		 coh_page_guard;
+	/** Lock protecting lock list. */
+	spinlock_t		 coh_lock_guard;
+	/** @} locks */
+	/** Radix tree of cl_page's, cached for this object. */
+	struct radix_tree_root   coh_tree;
+	/** # of pages in radix tree. */
+	unsigned long	    coh_pages;
+	/** List of cl_lock's granted for this object. */
+	struct list_head	       coh_locks;
+
+	/**
+	 * Parent object. It is assumed that an object has a well-defined
+	 * parent, but not a well-defined child (there may be multiple
+	 * sub-objects, for the same top-object). cl_object_header::coh_parent
+	 * field allows certain code to be written generically, without
+	 * limiting possible cl_object layouts unduly.
+	 */
+	struct cl_object_header *coh_parent;
+	/**
+	 * Protects consistency between cl_attr of parent object and
+	 * attributes of sub-objects, that the former is calculated ("merged")
+	 * from.
+	 *
+	 * \todo XXX this can be read/write lock if needed.
+	 */
+	spinlock_t		 coh_attr_guard;
+	/**
+	 * Size of cl_page + page slices
+	 */
+	unsigned short		 coh_page_bufsize;
+	/**
+	 * Number of objects above this one: 0 for a top-object, 1 for its
+	 * sub-object, etc.
+	 */
+	unsigned char		 coh_nesting;
+};
+
+/**
+ * Helper macro: iterate over all layers of the object \a obj, assigning every
+ * layer top-to-bottom to \a slice.
+ */
+#define cl_object_for_each(slice, obj)				      \
+	list_for_each_entry((slice),				    \
+				&(obj)->co_lu.lo_header->loh_layers,	\
+				co_lu.lo_linkage)
+/**
+ * Helper macro: iterate over all layers of the object \a obj, assigning every
+ * layer bottom-to-top to \a slice.
+ */
+#define cl_object_for_each_reverse(slice, obj)			       \
+	list_for_each_entry_reverse((slice),			     \
+					&(obj)->co_lu.lo_header->loh_layers, \
+					co_lu.lo_linkage)
+/** @} cl_object */
+
+#ifndef pgoff_t
+#define pgoff_t unsigned long
+#endif
+
+#define CL_PAGE_EOF ((pgoff_t)~0ull)
+
+/** \addtogroup cl_page cl_page
+ * @{ */
+
+/** \struct cl_page
+ * Layered client page.
+ *
+ * cl_page: represents a portion of a file, cached in the memory. All pages
+ *    of the given file are of the same size, and are kept in the radix tree
+ *    hanging off the cl_object. cl_page doesn't fan out, but as sub-objects
+ *    of the top-level file object are first class cl_objects, they have their
+ *    own radix trees of pages and hence page is implemented as a sequence of
+ *    struct cl_pages's, linked into double-linked list through
+ *    cl_page::cp_parent and cl_page::cp_child pointers, each residing in the
+ *    corresponding radix tree at the corresponding logical offset.
+ *
+ * cl_page is associated with VM page of the hosting environment (struct
+ *    page in Linux kernel, for example), struct page. It is assumed, that this
+ *    association is implemented by one of cl_page layers (top layer in the
+ *    current design) that
+ *
+ *	- intercepts per-VM-page call-backs made by the environment (e.g.,
+ *	  memory pressure),
+ *
+ *	- translates state (page flag bits) and locking between lustre and
+ *	  environment.
+ *
+ *    The association between cl_page and struct page is immutable and
+ *    established when cl_page is created.
+ *
+ * cl_page can be "owned" by a particular cl_io (see below), guaranteeing
+ *    this io an exclusive access to this page w.r.t. other io attempts and
+ *    various events changing page state (such as transfer completion, or
+ *    eviction of the page from the memory). Note, that in general cl_io
+ *    cannot be identified with a particular thread, and page ownership is not
+ *    exactly equal to the current thread holding a lock on the page. Layer
+ *    implementing association between cl_page and struct page has to implement
+ *    ownership on top of available synchronization mechanisms.
+ *
+ *    While lustre client maintains the notion of an page ownership by io,
+ *    hosting MM/VM usually has its own page concurrency control
+ *    mechanisms. For example, in Linux, page access is synchronized by the
+ *    per-page PG_locked bit-lock, and generic kernel code (generic_file_*())
+ *    takes care to acquire and release such locks as necessary around the
+ *    calls to the file system methods (->readpage(), ->prepare_write(),
+ *    ->commit_write(), etc.). This leads to the situation when there are two
+ *    different ways to own a page in the client:
+ *
+ *	- client code explicitly and voluntary owns the page (cl_page_own());
+ *
+ *	- VM locks a page and then calls the client, that has "to assume"
+ *	  the ownership from the VM (cl_page_assume()).
+ *
+ *    Dual methods to release ownership are cl_page_disown() and
+ *    cl_page_unassume().
+ *
+ * cl_page is reference counted (cl_page::cp_ref). When reference counter
+ *    drops to 0, the page is returned to the cache, unless it is in
+ *    cl_page_state::CPS_FREEING state, in which case it is immediately
+ *    destroyed.
+ *
+ *    The general logic guaranteeing the absence of "existential races" for
+ *    pages is the following:
+ *
+ *	- there are fixed known ways for a thread to obtain a new reference
+ *	  to a page:
+ *
+ *	    - by doing a lookup in the cl_object radix tree, protected by the
+ *	      spin-lock;
+ *
+ *	    - by starting from VM-locked struct page and following some
+ *	      hosting environment method (e.g., following ->private pointer in
+ *	      the case of Linux kernel), see cl_vmpage_page();
+ *
+ *	- when the page enters cl_page_state::CPS_FREEING state, all these
+ *	  ways are severed with the proper synchronization
+ *	  (cl_page_delete());
+ *
+ *	- entry into cl_page_state::CPS_FREEING is serialized by the VM page
+ *	  lock;
+ *
+ *	- no new references to the page in cl_page_state::CPS_FREEING state
+ *	  are allowed (checked in cl_page_get()).
+ *
+ *    Together this guarantees that when last reference to a
+ *    cl_page_state::CPS_FREEING page is released, it is safe to destroy the
+ *    page, as neither references to it can be acquired at that point, nor
+ *    ones exist.
+ *
+ * cl_page is a state machine. States are enumerated in enum
+ *    cl_page_state. Possible state transitions are enumerated in
+ *    cl_page_state_set(). State transition process (i.e., actual changing of
+ *    cl_page::cp_state field) is protected by the lock on the underlying VM
+ *    page.
+ *
+ * Linux Kernel implementation.
+ *
+ *    Binding between cl_page and struct page (which is a typedef for
+ *    struct page) is implemented in the vvp layer. cl_page is attached to the
+ *    ->private pointer of the struct page, together with the setting of
+ *    PG_private bit in page->flags, and acquiring additional reference on the
+ *    struct page (much like struct buffer_head, or any similar file system
+ *    private data structures).
+ *
+ *    PG_locked lock is used to implement both ownership and transfer
+ *    synchronization, that is, page is VM-locked in CPS_{OWNED,PAGE{IN,OUT}}
+ *    states. No additional references are acquired for the duration of the
+ *    transfer.
+ *
+ * \warning *THIS IS NOT* the behavior expected by the Linux kernel, where
+ *	  write-out is "protected" by the special PG_writeback bit.
+ */
+
+/**
+ * States of cl_page. cl_page.c assumes particular order here.
+ *
+ * The page state machine is rather crude, as it doesn't recognize finer page
+ * states like "dirty" or "up to date". This is because such states are not
+ * always well defined for the whole stack (see, for example, the
+ * implementation of the read-ahead, that hides page up-to-dateness to track
+ * cache hits accurately). Such sub-states are maintained by the layers that
+ * are interested in them.
+ */
+enum cl_page_state {
+	/**
+	 * Page is in the cache, un-owned. Page leaves cached state in the
+	 * following cases:
+	 *
+	 *     - [cl_page_state::CPS_OWNED] io comes across the page and
+	 *     owns it;
+	 *
+	 *     - [cl_page_state::CPS_PAGEOUT] page is dirty, the
+	 *     req-formation engine decides that it wants to include this page
+	 *     into an cl_req being constructed, and yanks it from the cache;
+	 *
+	 *     - [cl_page_state::CPS_FREEING] VM callback is executed to
+	 *     evict the page form the memory;
+	 *
+	 * \invariant cl_page::cp_owner == NULL && cl_page::cp_req == NULL
+	 */
+	CPS_CACHED,
+	/**
+	 * Page is exclusively owned by some cl_io. Page may end up in this
+	 * state as a result of
+	 *
+	 *     - io creating new page and immediately owning it;
+	 *
+	 *     - [cl_page_state::CPS_CACHED] io finding existing cached page
+	 *     and owning it;
+	 *
+	 *     - [cl_page_state::CPS_OWNED] io finding existing owned page
+	 *     and waiting for owner to release the page;
+	 *
+	 * Page leaves owned state in the following cases:
+	 *
+	 *     - [cl_page_state::CPS_CACHED] io decides to leave the page in
+	 *     the cache, doing nothing;
+	 *
+	 *     - [cl_page_state::CPS_PAGEIN] io starts read transfer for
+	 *     this page;
+	 *
+	 *     - [cl_page_state::CPS_PAGEOUT] io starts immediate write
+	 *     transfer for this page;
+	 *
+	 *     - [cl_page_state::CPS_FREEING] io decides to destroy this
+	 *     page (e.g., as part of truncate or extent lock cancellation).
+	 *
+	 * \invariant cl_page::cp_owner != NULL && cl_page::cp_req == NULL
+	 */
+	CPS_OWNED,
+	/**
+	 * Page is being written out, as a part of a transfer. This state is
+	 * entered when req-formation logic decided that it wants this page to
+	 * be sent through the wire _now_. Specifically, it means that once
+	 * this state is achieved, transfer completion handler (with either
+	 * success or failure indication) is guaranteed to be executed against
+	 * this page independently of any locks and any scheduling decisions
+	 * made by the hosting environment (that effectively means that the
+	 * page is never put into cl_page_state::CPS_PAGEOUT state "in
+	 * advance". This property is mentioned, because it is important when
+	 * reasoning about possible dead-locks in the system). The page can
+	 * enter this state as a result of
+	 *
+	 *     - [cl_page_state::CPS_OWNED] an io requesting an immediate
+	 *     write-out of this page, or
+	 *
+	 *     - [cl_page_state::CPS_CACHED] req-forming engine deciding
+	 *     that it has enough dirty pages cached to issue a "good"
+	 *     transfer.
+	 *
+	 * The page leaves cl_page_state::CPS_PAGEOUT state when the transfer
+	 * is completed---it is moved into cl_page_state::CPS_CACHED state.
+	 *
+	 * Underlying VM page is locked for the duration of transfer.
+	 *
+	 * \invariant: cl_page::cp_owner == NULL && cl_page::cp_req != NULL
+	 */
+	CPS_PAGEOUT,
+	/**
+	 * Page is being read in, as a part of a transfer. This is quite
+	 * similar to the cl_page_state::CPS_PAGEOUT state, except that
+	 * read-in is always "immediate"---there is no such thing a sudden
+	 * construction of read cl_req from cached, presumably not up to date,
+	 * pages.
+	 *
+	 * Underlying VM page is locked for the duration of transfer.
+	 *
+	 * \invariant: cl_page::cp_owner == NULL && cl_page::cp_req != NULL
+	 */
+	CPS_PAGEIN,
+	/**
+	 * Page is being destroyed. This state is entered when client decides
+	 * that page has to be deleted from its host object, as, e.g., a part
+	 * of truncate.
+	 *
+	 * Once this state is reached, there is no way to escape it.
+	 *
+	 * \invariant: cl_page::cp_owner == NULL && cl_page::cp_req == NULL
+	 */
+	CPS_FREEING,
+	CPS_NR
+};
+
+enum cl_page_type {
+	/** Host page, the page is from the host inode which the cl_page
+	 * belongs to. */
+	CPT_CACHEABLE = 1,
+
+	/** Transient page, the transient cl_page is used to bind a cl_page
+	 *  to vmpage which is not belonging to the same object of cl_page.
+	 *  it is used in DirectIO, lockless IO and liblustre. */
+	CPT_TRANSIENT,
+};
+
+/**
+ * Flags maintained for every cl_page.
+ */
+enum cl_page_flags {
+	/**
+	 * Set when pagein completes. Used for debugging (read completes at
+	 * most once for a page).
+	 */
+	CPF_READ_COMPLETED = 1 << 0
+};
+
+/**
+ * Fields are protected by the lock on struct page, except for atomics and
+ * immutables.
+ *
+ * \invariant Data type invariants are in cl_page_invariant(). Basically:
+ * cl_page::cp_parent and cl_page::cp_child are a well-formed double-linked
+ * list, consistent with the parent/child pointers in the cl_page::cp_obj and
+ * cl_page::cp_owner (when set).
+ */
+struct cl_page {
+	/** Reference counter. */
+	atomic_t	     cp_ref;
+	/** An object this page is a part of. Immutable after creation. */
+	struct cl_object	*cp_obj;
+	/** Logical page index within the object. Immutable after creation. */
+	pgoff_t		  cp_index;
+	/** List of slices. Immutable after creation. */
+	struct list_head	       cp_layers;
+	/** Parent page, NULL for top-level page. Immutable after creation. */
+	struct cl_page	  *cp_parent;
+	/** Lower-layer page. NULL for bottommost page. Immutable after
+	 * creation. */
+	struct cl_page	  *cp_child;
+	/**
+	 * Page state. This field is const to avoid accidental update, it is
+	 * modified only internally within cl_page.c. Protected by a VM lock.
+	 */
+	const enum cl_page_state cp_state;
+	/** Linkage of pages within group. Protected by cl_page::cp_mutex. */
+	struct list_head		cp_batch;
+	/** Mutex serializing membership of a page in a batch. */
+	struct mutex		cp_mutex;
+	/** Linkage of pages within cl_req. */
+	struct list_head	       cp_flight;
+	/** Transfer error. */
+	int		      cp_error;
+
+	/**
+	 * Page type. Only CPT_TRANSIENT is used so far. Immutable after
+	 * creation.
+	 */
+	enum cl_page_type	cp_type;
+
+	/**
+	 * Owning IO in cl_page_state::CPS_OWNED state. Sub-page can be owned
+	 * by sub-io. Protected by a VM lock.
+	 */
+	struct cl_io	    *cp_owner;
+	/**
+	 * Debug information, the task is owning the page.
+	 */
+	task_t	      *cp_task;
+	/**
+	 * Owning IO request in cl_page_state::CPS_PAGEOUT and
+	 * cl_page_state::CPS_PAGEIN states. This field is maintained only in
+	 * the top-level pages. Protected by a VM lock.
+	 */
+	struct cl_req	   *cp_req;
+	/** List of references to this page, for debugging. */
+	struct lu_ref	    cp_reference;
+	/** Link to an object, for debugging. */
+	struct lu_ref_link      *cp_obj_ref;
+	/** Link to a queue, for debugging. */
+	struct lu_ref_link      *cp_queue_ref;
+	/** Per-page flags from enum cl_page_flags. Protected by a VM lock. */
+	unsigned		 cp_flags;
+	/** Assigned if doing a sync_io */
+	struct cl_sync_io       *cp_sync_io;
+};
+
+/**
+ * Per-layer part of cl_page.
+ *
+ * \see ccc_page, lov_page, osc_page
+ */
+struct cl_page_slice {
+	struct cl_page		  *cpl_page;
+	/**
+	 * Object slice corresponding to this page slice. Immutable after
+	 * creation.
+	 */
+	struct cl_object		*cpl_obj;
+	const struct cl_page_operations *cpl_ops;
+	/** Linkage into cl_page::cp_layers. Immutable after creation. */
+	struct list_head		       cpl_linkage;
+};
+
+/**
+ * Lock mode. For the client extent locks.
+ *
+ * \warning: cl_lock_mode_match() assumes particular ordering here.
+ * \ingroup cl_lock
+ */
+enum cl_lock_mode {
+	/**
+	 * Mode of a lock that protects no data, and exists only as a
+	 * placeholder. This is used for `glimpse' requests. A phantom lock
+	 * might get promoted to real lock at some point.
+	 */
+	CLM_PHANTOM,
+	CLM_READ,
+	CLM_WRITE,
+	CLM_GROUP
+};
+
+/**
+ * Requested transfer type.
+ * \ingroup cl_req
+ */
+enum cl_req_type {
+	CRT_READ,
+	CRT_WRITE,
+	CRT_NR
+};
+
+/**
+ * Per-layer page operations.
+ *
+ * Methods taking an \a io argument are for the activity happening in the
+ * context of given \a io. Page is assumed to be owned by that io, except for
+ * the obvious cases (like cl_page_operations::cpo_own()).
+ *
+ * \see vvp_page_ops, lov_page_ops, osc_page_ops
+ */
+struct cl_page_operations {
+	/**
+	 * cl_page<->struct page methods. Only one layer in the stack has to
+	 * implement these. Current code assumes that this functionality is
+	 * provided by the topmost layer, see cl_page_disown0() as an example.
+	 */
+
+	/**
+	 * \return the underlying VM page. Optional.
+	 */
+	struct page *(*cpo_vmpage)(const struct lu_env *env,
+				  const struct cl_page_slice *slice);
+	/**
+	 * Called when \a io acquires this page into the exclusive
+	 * ownership. When this method returns, it is guaranteed that the is
+	 * not owned by other io, and no transfer is going on against
+	 * it. Optional.
+	 *
+	 * \see cl_page_own()
+	 * \see vvp_page_own(), lov_page_own()
+	 */
+	int  (*cpo_own)(const struct lu_env *env,
+			const struct cl_page_slice *slice,
+			struct cl_io *io, int nonblock);
+	/** Called when ownership it yielded. Optional.
+	 *
+	 * \see cl_page_disown()
+	 * \see vvp_page_disown()
+	 */
+	void (*cpo_disown)(const struct lu_env *env,
+			   const struct cl_page_slice *slice, struct cl_io *io);
+	/**
+	 * Called for a page that is already "owned" by \a io from VM point of
+	 * view. Optional.
+	 *
+	 * \see cl_page_assume()
+	 * \see vvp_page_assume(), lov_page_assume()
+	 */
+	void (*cpo_assume)(const struct lu_env *env,
+			   const struct cl_page_slice *slice, struct cl_io *io);
+	/** Dual to cl_page_operations::cpo_assume(). Optional. Called
+	 * bottom-to-top when IO releases a page without actually unlocking
+	 * it.
+	 *
+	 * \see cl_page_unassume()
+	 * \see vvp_page_unassume()
+	 */
+	void (*cpo_unassume)(const struct lu_env *env,
+			     const struct cl_page_slice *slice,
+			     struct cl_io *io);
+	/**
+	 * Announces whether the page contains valid data or not by \a uptodate.
+	 *
+	 * \see cl_page_export()
+	 * \see vvp_page_export()
+	 */
+	void  (*cpo_export)(const struct lu_env *env,
+			    const struct cl_page_slice *slice, int uptodate);
+	/**
+	 * Unmaps page from the user space (if it is mapped).
+	 *
+	 * \see cl_page_unmap()
+	 * \see vvp_page_unmap()
+	 */
+	int (*cpo_unmap)(const struct lu_env *env,
+			 const struct cl_page_slice *slice, struct cl_io *io);
+	/**
+	 * Checks whether underlying VM page is locked (in the suitable
+	 * sense). Used for assertions.
+	 *
+	 * \retval    -EBUSY: page is protected by a lock of a given mode;
+	 * \retval  -ENODATA: page is not protected by a lock;
+	 * \retval	 0: this layer cannot decide. (Should never happen.)
+	 */
+	int (*cpo_is_vmlocked)(const struct lu_env *env,
+			       const struct cl_page_slice *slice);
+	/**
+	 * Page destruction.
+	 */
+
+	/**
+	 * Called when page is truncated from the object. Optional.
+	 *
+	 * \see cl_page_discard()
+	 * \see vvp_page_discard(), osc_page_discard()
+	 */
+	void (*cpo_discard)(const struct lu_env *env,
+			    const struct cl_page_slice *slice,
+			    struct cl_io *io);
+	/**
+	 * Called when page is removed from the cache, and is about to being
+	 * destroyed. Optional.
+	 *
+	 * \see cl_page_delete()
+	 * \see vvp_page_delete(), osc_page_delete()
+	 */
+	void (*cpo_delete)(const struct lu_env *env,
+			   const struct cl_page_slice *slice);
+	/** Destructor. Frees resources and slice itself. */
+	void (*cpo_fini)(const struct lu_env *env,
+			 struct cl_page_slice *slice);
+
+	/**
+	 * Checks whether the page is protected by a cl_lock. This is a
+	 * per-layer method, because certain layers have ways to check for the
+	 * lock much more efficiently than through the generic locks scan, or
+	 * implement locking mechanisms separate from cl_lock, e.g.,
+	 * LL_FILE_GROUP_LOCKED in vvp. If \a pending is true, check for locks
+	 * being canceled, or scheduled for cancellation as soon as the last
+	 * user goes away, too.
+	 *
+	 * \retval    -EBUSY: page is protected by a lock of a given mode;
+	 * \retval  -ENODATA: page is not protected by a lock;
+	 * \retval	 0: this layer cannot decide.
+	 *
+	 * \see cl_page_is_under_lock()
+	 */
+	int (*cpo_is_under_lock)(const struct lu_env *env,
+				 const struct cl_page_slice *slice,
+				 struct cl_io *io);
+
+	/**
+	 * Optional debugging helper. Prints given page slice.
+	 *
+	 * \see cl_page_print()
+	 */
+	int (*cpo_print)(const struct lu_env *env,
+			 const struct cl_page_slice *slice,
+			 void *cookie, lu_printer_t p);
+	/**
+	 * \name transfer
+	 *
+	 * Transfer methods. See comment on cl_req for a description of
+	 * transfer formation and life-cycle.
+	 *
+	 * @{
+	 */
+	/**
+	 * Request type dependent vector of operations.
+	 *
+	 * Transfer operations depend on transfer mode (cl_req_type). To avoid
+	 * passing transfer mode to each and every of these methods, and to
+	 * avoid branching on request type inside of the methods, separate
+	 * methods for cl_req_type:CRT_READ and cl_req_type:CRT_WRITE are
+	 * provided. That is, method invocation usually looks like
+	 *
+	 *	 slice->cp_ops.io[req->crq_type].cpo_method(env, slice, ...);
+	 */
+	struct {
+		/**
+		 * Called when a page is submitted for a transfer as a part of
+		 * cl_page_list.
+		 *
+		 * \return    0	 : page is eligible for submission;
+		 * \return    -EALREADY : skip this page;
+		 * \return    -ve       : error.
+		 *
+		 * \see cl_page_prep()
+		 */
+		int  (*cpo_prep)(const struct lu_env *env,
+				 const struct cl_page_slice *slice,
+				 struct cl_io *io);
+		/**
+		 * Completion handler. This is guaranteed to be eventually
+		 * fired after cl_page_operations::cpo_prep() or
+		 * cl_page_operations::cpo_make_ready() call.
+		 *
+		 * This method can be called in a non-blocking context. It is
+		 * guaranteed however, that the page involved and its object
+		 * are pinned in memory (and, hence, calling cl_page_put() is
+		 * safe).
+		 *
+		 * \see cl_page_completion()
+		 */
+		void (*cpo_completion)(const struct lu_env *env,
+				       const struct cl_page_slice *slice,
+				       int ioret);
+		/**
+		 * Called when cached page is about to be added to the
+		 * cl_req as a part of req formation.
+		 *
+		 * \return    0       : proceed with this page;
+		 * \return    -EAGAIN : skip this page;
+		 * \return    -ve     : error.
+		 *
+		 * \see cl_page_make_ready()
+		 */
+		int  (*cpo_make_ready)(const struct lu_env *env,
+				       const struct cl_page_slice *slice);
+		/**
+		 * Announce that this page is to be written out
+		 * opportunistically, that is, page is dirty, it is not
+		 * necessary to start write-out transfer right now, but
+		 * eventually page has to be written out.
+		 *
+		 * Main caller of this is the write path (see
+		 * vvp_io_commit_write()), using this method to build a
+		 * "transfer cache" from which large transfers are then
+		 * constructed by the req-formation engine.
+		 *
+		 * \todo XXX it would make sense to add page-age tracking
+		 * semantics here, and to oblige the req-formation engine to
+		 * send the page out not later than it is too old.
+		 *
+		 * \see cl_page_cache_add()
+		 */
+		int  (*cpo_cache_add)(const struct lu_env *env,
+				      const struct cl_page_slice *slice,
+				      struct cl_io *io);
+	} io[CRT_NR];
+	/**
+	 * Tell transfer engine that only [to, from] part of a page should be
+	 * transmitted.
+	 *
+	 * This is used for immediate transfers.
+	 *
+	 * \todo XXX this is not very good interface. It would be much better
+	 * if all transfer parameters were supplied as arguments to
+	 * cl_io_operations::cio_submit() call, but it is not clear how to do
+	 * this for page queues.
+	 *
+	 * \see cl_page_clip()
+	 */
+	void (*cpo_clip)(const struct lu_env *env,
+			 const struct cl_page_slice *slice,
+			 int from, int to);
+	/**
+	 * \pre  the page was queued for transferring.
+	 * \post page is removed from client's pending list, or -EBUSY
+	 *       is returned if it has already been in transferring.
+	 *
+	 * This is one of seldom page operation which is:
+	 * 0. called from top level;
+	 * 1. don't have vmpage locked;
+	 * 2. every layer should synchronize execution of its ->cpo_cancel()
+	 *    with completion handlers. Osc uses client obd lock for this
+	 *    purpose. Based on there is no vvp_page_cancel and
+	 *    lov_page_cancel(), cpo_cancel is defacto protected by client lock.
+	 *
+	 * \see osc_page_cancel().
+	 */
+	int (*cpo_cancel)(const struct lu_env *env,
+			  const struct cl_page_slice *slice);
+	/**
+	 * Write out a page by kernel. This is only called by ll_writepage
+	 * right now.
+	 *
+	 * \see cl_page_flush()
+	 */
+	int (*cpo_flush)(const struct lu_env *env,
+			 const struct cl_page_slice *slice,
+			 struct cl_io *io);
+	/** @} transfer */
+};
+
+/**
+ * Helper macro, dumping detailed information about \a page into a log.
+ */
+#define CL_PAGE_DEBUG(mask, env, page, format, ...)		     \
+do {								    \
+	LIBCFS_DEBUG_MSG_DATA_DECL(msgdata, mask, NULL);		\
+									\
+	if (cfs_cdebug_show(mask, DEBUG_SUBSYSTEM)) {		   \
+		cl_page_print(env, &msgdata, lu_cdebug_printer, page);  \
+		CDEBUG(mask, format , ## __VA_ARGS__);		  \
+	}							       \
+} while (0)
+
+/**
+ * Helper macro, dumping shorter information about \a page into a log.
+ */
+#define CL_PAGE_HEADER(mask, env, page, format, ...)			  \
+do {									  \
+	LIBCFS_DEBUG_MSG_DATA_DECL(msgdata, mask, NULL);		      \
+									      \
+	if (cfs_cdebug_show(mask, DEBUG_SUBSYSTEM)) {			 \
+		cl_page_header_print(env, &msgdata, lu_cdebug_printer, page); \
+		CDEBUG(mask, format , ## __VA_ARGS__);			\
+	}								     \
+} while (0)
+
+static inline int __page_in_use(const struct cl_page *page, int refc)
+{
+	if (page->cp_type == CPT_CACHEABLE)
+		++refc;
+	LASSERT(atomic_read(&page->cp_ref) > 0);
+	return (atomic_read(&page->cp_ref) > refc);
+}
+#define cl_page_in_use(pg)       __page_in_use(pg, 1)
+#define cl_page_in_use_noref(pg) __page_in_use(pg, 0)
+
+/** @} cl_page */
+
+/** \addtogroup cl_lock cl_lock
+ * @{ */
+/** \struct cl_lock
+ *
+ * Extent locking on the client.
+ *
+ * LAYERING
+ *
+ * The locking model of the new client code is built around
+ *
+ *	struct cl_lock
+ *
+ * data-type representing an extent lock on a regular file. cl_lock is a
+ * layered object (much like cl_object and cl_page), it consists of a header
+ * (struct cl_lock) and a list of layers (struct cl_lock_slice), linked to
+ * cl_lock::cll_layers list through cl_lock_slice::cls_linkage.
+ *
+ * All locks for a given object are linked into cl_object_header::coh_locks
+ * list (protected by cl_object_header::coh_lock_guard spin-lock) through
+ * cl_lock::cll_linkage. Currently this list is not sorted in any way. We can
+ * sort it in starting lock offset, or use altogether different data structure
+ * like a tree.
+ *
+ * Typical cl_lock consists of the two layers:
+ *
+ *     - vvp_lock (vvp specific data), and
+ *     - lov_lock (lov specific data).
+ *
+ * lov_lock contains an array of sub-locks. Each of these sub-locks is a
+ * normal cl_lock: it has a header (struct cl_lock) and a list of layers:
+ *
+ *     - lovsub_lock, and
+ *     - osc_lock
+ *
+ * Each sub-lock is associated with a cl_object (representing stripe
+ * sub-object or the file to which top-level cl_lock is associated to), and is
+ * linked into that cl_object::coh_locks. In this respect cl_lock is similar to
+ * cl_object (that at lov layer also fans out into multiple sub-objects), and
+ * is different from cl_page, that doesn't fan out (there is usually exactly
+ * one osc_page for every vvp_page). We shall call vvp-lov portion of the lock
+ * a "top-lock" and its lovsub-osc portion a "sub-lock".
+ *
+ * LIFE CYCLE
+ *
+ * cl_lock is reference counted. When reference counter drops to 0, lock is
+ * placed in the cache, except when lock is in CLS_FREEING state. CLS_FREEING
+ * lock is destroyed when last reference is released. Referencing between
+ * top-lock and its sub-locks is described in the lov documentation module.
+ *
+ * STATE MACHINE
+ *
+ * Also, cl_lock is a state machine. This requires some clarification. One of
+ * the goals of client IO re-write was to make IO path non-blocking, or at
+ * least to make it easier to make it non-blocking in the future. Here
+ * `non-blocking' means that when a system call (read, write, truncate)
+ * reaches a situation where it has to wait for a communication with the
+ * server, it should --instead of waiting-- remember its current state and
+ * switch to some other work.  E.g,. instead of waiting for a lock enqueue,
+ * client should proceed doing IO on the next stripe, etc. Obviously this is
+ * rather radical redesign, and it is not planned to be fully implemented at
+ * this time, instead we are putting some infrastructure in place, that would
+ * make it easier to do asynchronous non-blocking IO easier in the
+ * future. Specifically, where old locking code goes to sleep (waiting for
+ * enqueue, for example), new code returns cl_lock_transition::CLO_WAIT. When
+ * enqueue reply comes, its completion handler signals that lock state-machine
+ * is ready to transit to the next state. There is some generic code in
+ * cl_lock.c that sleeps, waiting for these signals. As a result, for users of
+ * this cl_lock.c code, it looks like locking is done in normal blocking
+ * fashion, and it the same time it is possible to switch to the non-blocking
+ * locking (simply by returning cl_lock_transition::CLO_WAIT from cl_lock.c
+ * functions).
+ *
+ * For a description of state machine states and transitions see enum
+ * cl_lock_state.
+ *
+ * There are two ways to restrict a set of states which lock might move to:
+ *
+ *     - placing a "hold" on a lock guarantees that lock will not be moved
+ *       into cl_lock_state::CLS_FREEING state until hold is released. Hold
+ *       can be only acquired on a lock that is not in
+ *       cl_lock_state::CLS_FREEING. All holds on a lock are counted in
+ *       cl_lock::cll_holds. Hold protects lock from cancellation and
+ *       destruction. Requests to cancel and destroy a lock on hold will be
+ *       recorded, but only honored when last hold on a lock is released;
+ *
+ *     - placing a "user" on a lock guarantees that lock will not leave
+ *       cl_lock_state::CLS_NEW, cl_lock_state::CLS_QUEUING,
+ *       cl_lock_state::CLS_ENQUEUED and cl_lock_state::CLS_HELD set of
+ *       states, once it enters this set. That is, if a user is added onto a
+ *       lock in a state not from this set, it doesn't immediately enforce
+ *       lock to move to this set, but once lock enters this set it will
+ *       remain there until all users are removed. Lock users are counted in
+ *       cl_lock::cll_users.
+ *
+ *       User is used to assure that lock is not canceled or destroyed while
+ *       it is being enqueued, or actively used by some IO.
+ *
+ *       Currently, a user always comes with a hold (cl_lock_invariant()
+ *       checks that a number of holds is not less than a number of users).
+ *
+ * CONCURRENCY
+ *
+ * This is how lock state-machine operates. struct cl_lock contains a mutex
+ * cl_lock::cll_guard that protects struct fields.
+ *
+ *     - mutex is taken, and cl_lock::cll_state is examined.
+ *
+ *     - for every state there are possible target states where lock can move
+ *       into. They are tried in order. Attempts to move into next state are
+ *       done by _try() functions in cl_lock.c:cl_{enqueue,unlock,wait}_try().
+ *
+ *     - if the transition can be performed immediately, state is changed,
+ *       and mutex is released.
+ *
+ *     - if the transition requires blocking, _try() function returns
+ *       cl_lock_transition::CLO_WAIT. Caller unlocks mutex and goes to
+ *       sleep, waiting for possibility of lock state change. It is woken
+ *       up when some event occurs, that makes lock state change possible
+ *       (e.g., the reception of the reply from the server), and repeats
+ *       the loop.
+ *
+ * Top-lock and sub-lock has separate mutexes and the latter has to be taken
+ * first to avoid dead-lock.
+ *
+ * To see an example of interaction of all these issues, take a look at the
+ * lov_cl.c:lov_lock_enqueue() function. It is called as a part of
+ * cl_enqueue_try(), and tries to advance top-lock to ENQUEUED state, by
+ * advancing state-machines of its sub-locks (lov_lock_enqueue_one()). Note
+ * also, that it uses trylock to grab sub-lock mutex to avoid dead-lock. It
+ * also has to handle CEF_ASYNC enqueue, when sub-locks enqueues have to be
+ * done in parallel, rather than one after another (this is used for glimpse
+ * locks, that cannot dead-lock).
+ *
+ * INTERFACE AND USAGE
+ *
+ * struct cl_lock_operations provide a number of call-backs that are invoked
+ * when events of interest occurs. Layers can intercept and handle glimpse,
+ * blocking, cancel ASTs and a reception of the reply from the server.
+ *
+ * One important difference with the old client locking model is that new
+ * client has a representation for the top-lock, whereas in the old code only
+ * sub-locks existed as real data structures and file-level locks are
+ * represented by "request sets" that are created and destroyed on each and
+ * every lock creation.
+ *
+ * Top-locks are cached, and can be found in the cache by the system calls. It
+ * is possible that top-lock is in cache, but some of its sub-locks were
+ * canceled and destroyed. In that case top-lock has to be enqueued again
+ * before it can be used.
+ *
+ * Overall process of the locking during IO operation is as following:
+ *
+ *     - once parameters for IO are setup in cl_io, cl_io_operations::cio_lock()
+ *       is called on each layer. Responsibility of this method is to add locks,
+ *       needed by a given layer into cl_io.ci_lockset.
+ *
+ *     - once locks for all layers were collected, they are sorted to avoid
+ *       dead-locks (cl_io_locks_sort()), and enqueued.
+ *
+ *     - when all locks are acquired, IO is performed;
+ *
+ *     - locks are released into cache.
+ *
+ * Striping introduces major additional complexity into locking. The
+ * fundamental problem is that it is generally unsafe to actively use (hold)
+ * two locks on the different OST servers at the same time, as this introduces
+ * inter-server dependency and can lead to cascading evictions.
+ *
+ * Basic solution is to sub-divide large read/write IOs into smaller pieces so
+ * that no multi-stripe locks are taken (note that this design abandons POSIX
+ * read/write semantics). Such pieces ideally can be executed concurrently. At
+ * the same time, certain types of IO cannot be sub-divived, without
+ * sacrificing correctness. This includes:
+ *
+ *  - O_APPEND write, where [0, EOF] lock has to be taken, to guarantee
+ *  atomicity;
+ *
+ *  - ftruncate(fd, offset), where [offset, EOF] lock has to be taken.
+ *
+ * Also, in the case of read(fd, buf, count) or write(fd, buf, count), where
+ * buf is a part of memory mapped Lustre file, a lock or locks protecting buf
+ * has to be held together with the usual lock on [offset, offset + count].
+ *
+ * As multi-stripe locks have to be allowed, it makes sense to cache them, so
+ * that, for example, a sequence of O_APPEND writes can proceed quickly
+ * without going down to the individual stripes to do lock matching. On the
+ * other hand, multi-stripe locks shouldn't be used by normal read/write
+ * calls. To achieve this, every layer can implement ->clo_fits_into() method,
+ * that is called by lock matching code (cl_lock_lookup()), and that can be
+ * used to selectively disable matching of certain locks for certain IOs. For
+ * exmaple, lov layer implements lov_lock_fits_into() that allow multi-stripe
+ * locks to be matched only for truncates and O_APPEND writes.
+ *
+ * Interaction with DLM
+ *
+ * In the expected setup, cl_lock is ultimately backed up by a collection of
+ * DLM locks (struct ldlm_lock). Association between cl_lock and DLM lock is
+ * implemented in osc layer, that also matches DLM events (ASTs, cancellation,
+ * etc.) into cl_lock_operation calls. See struct osc_lock for a more detailed
+ * description of interaction with DLM.
+ */
+
+/**
+ * Lock description.
+ */
+struct cl_lock_descr {
+	/** Object this lock is granted for. */
+	struct cl_object *cld_obj;
+	/** Index of the first page protected by this lock. */
+	pgoff_t	   cld_start;
+	/** Index of the last page (inclusive) protected by this lock. */
+	pgoff_t	   cld_end;
+	/** Group ID, for group lock */
+	__u64	     cld_gid;
+	/** Lock mode. */
+	enum cl_lock_mode cld_mode;
+	/**
+	 * flags to enqueue lock. A combination of bit-flags from
+	 * enum cl_enq_flags.
+	 */
+	__u32	     cld_enq_flags;
+};
+
+#define DDESCR "%s(%d):[%lu, %lu]"
+#define PDESCR(descr)						   \
+	cl_lock_mode_name((descr)->cld_mode), (descr)->cld_mode,	\
+	(descr)->cld_start, (descr)->cld_end
+
+const char *cl_lock_mode_name(const enum cl_lock_mode mode);
+
+/**
+ * Lock state-machine states.
+ *
+ * \htmlonly
+ * <pre>
+ *
+ * Possible state transitions:
+ *
+ *	      +------------------>NEW
+ *	      |		    |
+ *	      |		    | cl_enqueue_try()
+ *	      |		    |
+ *	      |    cl_unuse_try()  V
+ *	      |  +--------------QUEUING (*)
+ *	      |  |		 |
+ *	      |  |		 | cl_enqueue_try()
+ *	      |  |		 |
+ *	      |  | cl_unuse_try()  V
+ *    sub-lock  |  +-------------ENQUEUED (*)
+ *    canceled  |  |		 |
+ *	      |  |		 | cl_wait_try()
+ *	      |  |		 |
+ *	      |  |		(R)
+ *	      |  |		 |
+ *	      |  |		 V
+ *	      |  |		HELD<---------+
+ *	      |  |		 |	    |
+ *	      |  |		 |	    | cl_use_try()
+ *	      |  |  cl_unuse_try() |	    |
+ *	      |  |		 |	    |
+ *	      |  |		 V	 ---+
+ *	      |  +------------>INTRANSIT (D) <--+
+ *	      |		    |	    |
+ *	      |     cl_unuse_try() |	    | cached lock found
+ *	      |		    |	    | cl_use_try()
+ *	      |		    |	    |
+ *	      |		    V	    |
+ *	      +------------------CACHED---------+
+ *				   |
+ *				  (C)
+ *				   |
+ *				   V
+ *				FREEING
+ *
+ * Legend:
+ *
+ *	 In states marked with (*) transition to the same state (i.e., a loop
+ *	 in the diagram) is possible.
+ *
+ *	 (R) is the point where Receive call-back is invoked: it allows layers
+ *	 to handle arrival of lock reply.
+ *
+ *	 (C) is the point where Cancellation call-back is invoked.
+ *
+ *	 (D) is the transit state which means the lock is changing.
+ *
+ *	 Transition to FREEING state is possible from any other state in the
+ *	 diagram in case of unrecoverable error.
+ * </pre>
+ * \endhtmlonly
+ *
+ * These states are for individual cl_lock object. Top-lock and its sub-locks
+ * can be in the different states. Another way to say this is that we have
+ * nested state-machines.
+ *
+ * Separate QUEUING and ENQUEUED states are needed to support non-blocking
+ * operation for locks with multiple sub-locks. Imagine lock on a file F, that
+ * intersects 3 stripes S0, S1, and S2. To enqueue F client has to send
+ * enqueue to S0, wait for its completion, then send enqueue for S1, wait for
+ * its completion and at last enqueue lock for S2, and wait for its
+ * completion. In that case, top-lock is in QUEUING state while S0, S1 are
+ * handled, and is in ENQUEUED state after enqueue to S2 has been sent (note
+ * that in this case, sub-locks move from state to state, and top-lock remains
+ * in the same state).
+ */
+enum cl_lock_state {
+	/**
+	 * Lock that wasn't yet enqueued
+	 */
+	CLS_NEW,
+	/**
+	 * Enqueue is in progress, blocking for some intermediate interaction
+	 * with the other side.
+	 */
+	CLS_QUEUING,
+	/**
+	 * Lock is fully enqueued, waiting for server to reply when it is
+	 * granted.
+	 */
+	CLS_ENQUEUED,
+	/**
+	 * Lock granted, actively used by some IO.
+	 */
+	CLS_HELD,
+	/**
+	 * This state is used to mark the lock is being used, or unused.
+	 * We need this state because the lock may have several sublocks,
+	 * so it's impossible to have an atomic way to bring all sublocks
+	 * into CLS_HELD state at use case, or all sublocks to CLS_CACHED
+	 * at unuse case.
+	 * If a thread is referring to a lock, and it sees the lock is in this
+	 * state, it must wait for the lock.
+	 * See state diagram for details.
+	 */
+	CLS_INTRANSIT,
+	/**
+	 * Lock granted, not used.
+	 */
+	CLS_CACHED,
+	/**
+	 * Lock is being destroyed.
+	 */
+	CLS_FREEING,
+	CLS_NR
+};
+
+enum cl_lock_flags {
+	/**
+	 * lock has been cancelled. This flag is never cleared once set (by
+	 * cl_lock_cancel0()).
+	 */
+	CLF_CANCELLED  = 1 << 0,
+	/** cancellation is pending for this lock. */
+	CLF_CANCELPEND = 1 << 1,
+	/** destruction is pending for this lock. */
+	CLF_DOOMED     = 1 << 2,
+	/** from enqueue RPC reply upcall. */
+	CLF_FROM_UPCALL= 1 << 3,
+};
+
+/**
+ * Lock closure.
+ *
+ * Lock closure is a collection of locks (both top-locks and sub-locks) that
+ * might be updated in a result of an operation on a certain lock (which lock
+ * this is a closure of).
+ *
+ * Closures are needed to guarantee dead-lock freedom in the presence of
+ *
+ *     - nested state-machines (top-lock state-machine composed of sub-lock
+ *       state-machines), and
+ *
+ *     - shared sub-locks.
+ *
+ * Specifically, many operations, such as lock enqueue, wait, unlock,
+ * etc. start from a top-lock, and then operate on a sub-locks of this
+ * top-lock, holding a top-lock mutex. When sub-lock state changes as a result
+ * of such operation, this change has to be propagated to all top-locks that
+ * share this sub-lock. Obviously, no natural lock ordering (e.g.,
+ * top-to-bottom or bottom-to-top) captures this scenario, so try-locking has
+ * to be used. Lock closure systematizes this try-and-repeat logic.
+ */
+struct cl_lock_closure {
+	/**
+	 * Lock that is mutexed when closure construction is started. When
+	 * closure in is `wait' mode (cl_lock_closure::clc_wait), mutex on
+	 * origin is released before waiting.
+	 */
+	struct cl_lock   *clc_origin;
+	/**
+	 * List of enclosed locks, so far. Locks are linked here through
+	 * cl_lock::cll_inclosure.
+	 */
+	struct list_head	clc_list;
+	/**
+	 * True iff closure is in a `wait' mode. This determines what
+	 * cl_lock_enclosure() does when a lock L to be added to the closure
+	 * is currently mutexed by some other thread.
+	 *
+	 * If cl_lock_closure::clc_wait is not set, then closure construction
+	 * fails with CLO_REPEAT immediately.
+	 *
+	 * In wait mode, cl_lock_enclosure() waits until next attempt to build
+	 * a closure might succeed. To this end it releases an origin mutex
+	 * (cl_lock_closure::clc_origin), that has to be the only lock mutex
+	 * owned by the current thread, and then waits on L mutex (by grabbing
+	 * it and immediately releasing), before returning CLO_REPEAT to the
+	 * caller.
+	 */
+	int	       clc_wait;
+	/** Number of locks in the closure. */
+	int	       clc_nr;
+};
+
+/**
+ * Layered client lock.
+ */
+struct cl_lock {
+	/** Reference counter. */
+	atomic_t	  cll_ref;
+	/** List of slices. Immutable after creation. */
+	struct list_head	    cll_layers;
+	/**
+	 * Linkage into cl_lock::cll_descr::cld_obj::coh_locks list. Protected
+	 * by cl_lock::cll_descr::cld_obj::coh_lock_guard.
+	 */
+	struct list_head	    cll_linkage;
+	/**
+	 * Parameters of this lock. Protected by
+	 * cl_lock::cll_descr::cld_obj::coh_lock_guard nested within
+	 * cl_lock::cll_guard. Modified only on lock creation and in
+	 * cl_lock_modify().
+	 */
+	struct cl_lock_descr  cll_descr;
+	/** Protected by cl_lock::cll_guard. */
+	enum cl_lock_state    cll_state;
+	/** signals state changes. */
+	wait_queue_head_t	   cll_wq;
+	/**
+	 * Recursive lock, most fields in cl_lock{} are protected by this.
+	 *
+	 * Locking rules: this mutex is never held across network
+	 * communication, except when lock is being canceled.
+	 *
+	 * Lock ordering: a mutex of a sub-lock is taken first, then a mutex
+	 * on a top-lock. Other direction is implemented through a
+	 * try-lock-repeat loop. Mutices of unrelated locks can be taken only
+	 * by try-locking.
+	 *
+	 * \see osc_lock_enqueue_wait(), lov_lock_cancel(), lov_sublock_wait().
+	 */
+	struct mutex		cll_guard;
+	task_t	   *cll_guarder;
+	int		   cll_depth;
+
+	/**
+	 * the owner for INTRANSIT state
+	 */
+	task_t	   *cll_intransit_owner;
+	int		   cll_error;
+	/**
+	 * Number of holds on a lock. A hold prevents a lock from being
+	 * canceled and destroyed. Protected by cl_lock::cll_guard.
+	 *
+	 * \see cl_lock_hold(), cl_lock_unhold(), cl_lock_release()
+	 */
+	int		   cll_holds;
+	 /**
+	  * Number of lock users. Valid in cl_lock_state::CLS_HELD state
+	  * only. Lock user pins lock in CLS_HELD state. Protected by
+	  * cl_lock::cll_guard.
+	  *
+	  * \see cl_wait(), cl_unuse().
+	  */
+	int		   cll_users;
+	/**
+	 * Flag bit-mask. Values from enum cl_lock_flags. Updates are
+	 * protected by cl_lock::cll_guard.
+	 */
+	unsigned long	 cll_flags;
+	/**
+	 * A linkage into a list of locks in a closure.
+	 *
+	 * \see cl_lock_closure
+	 */
+	struct list_head	    cll_inclosure;
+	/**
+	 * Confict lock at queuing time.
+	 */
+	struct cl_lock       *cll_conflict;
+	/**
+	 * A list of references to this lock, for debugging.
+	 */
+	struct lu_ref	 cll_reference;
+	/**
+	 * A list of holds on this lock, for debugging.
+	 */
+	struct lu_ref	 cll_holders;
+	/**
+	 * A reference for cl_lock::cll_descr::cld_obj. For debugging.
+	 */
+	struct lu_ref_link   *cll_obj_ref;
+#ifdef CONFIG_LOCKDEP
+	/* "dep_map" name is assumed by lockdep.h macros. */
+	struct lockdep_map    dep_map;
+#endif
+};
+
+/**
+ * Per-layer part of cl_lock
+ *
+ * \see ccc_lock, lov_lock, lovsub_lock, osc_lock
+ */
+struct cl_lock_slice {
+	struct cl_lock		  *cls_lock;
+	/** Object slice corresponding to this lock slice. Immutable after
+	 * creation. */
+	struct cl_object		*cls_obj;
+	const struct cl_lock_operations *cls_ops;
+	/** Linkage into cl_lock::cll_layers. Immutable after creation. */
+	struct list_head		       cls_linkage;
+};
+
+/**
+ * Possible (non-error) return values of ->clo_{enqueue,wait,unlock}().
+ *
+ * NOTE: lov_subresult() depends on ordering here.
+ */
+enum cl_lock_transition {
+	/** operation cannot be completed immediately. Wait for state change. */
+	CLO_WAIT	= 1,
+	/** operation had to release lock mutex, restart. */
+	CLO_REPEAT      = 2,
+	/** lower layer re-enqueued. */
+	CLO_REENQUEUED  = 3,
+};
+
+/**
+ *
+ * \see vvp_lock_ops, lov_lock_ops, lovsub_lock_ops, osc_lock_ops
+ */
+struct cl_lock_operations {
+	/**
+	 * \name statemachine
+	 *
+	 * State machine transitions. These 3 methods are called to transfer
+	 * lock from one state to another, as described in the commentary
+	 * above enum #cl_lock_state.
+	 *
+	 * \retval 0	  this layer has nothing more to do to before
+	 *		       transition to the target state happens;
+	 *
+	 * \retval CLO_REPEAT method had to release and re-acquire cl_lock
+	 *		    mutex, repeat invocation of transition method
+	 *		    across all layers;
+	 *
+	 * \retval CLO_WAIT   this layer cannot move to the target state
+	 *		    immediately, as it has to wait for certain event
+	 *		    (e.g., the communication with the server). It
+	 *		    is guaranteed, that when the state transfer
+	 *		    becomes possible, cl_lock::cll_wq wait-queue
+	 *		    is signaled. Caller can wait for this event by
+	 *		    calling cl_lock_state_wait();
+	 *
+	 * \retval -ve	failure, abort state transition, move the lock
+	 *		    into cl_lock_state::CLS_FREEING state, and set
+	 *		    cl_lock::cll_error.
+	 *
+	 * Once all layers voted to agree to transition (by returning 0), lock
+	 * is moved into corresponding target state. All state transition
+	 * methods are optional.
+	 */
+	/** @{ */
+	/**
+	 * Attempts to enqueue the lock. Called top-to-bottom.
+	 *
+	 * \see ccc_lock_enqueue(), lov_lock_enqueue(), lovsub_lock_enqueue(),
+	 * \see osc_lock_enqueue()
+	 */
+	int  (*clo_enqueue)(const struct lu_env *env,
+			    const struct cl_lock_slice *slice,
+			    struct cl_io *io, __u32 enqflags);
+	/**
+	 * Attempts to wait for enqueue result. Called top-to-bottom.
+	 *
+	 * \see ccc_lock_wait(), lov_lock_wait(), osc_lock_wait()
+	 */
+	int  (*clo_wait)(const struct lu_env *env,
+			 const struct cl_lock_slice *slice);
+	/**
+	 * Attempts to unlock the lock. Called bottom-to-top. In addition to
+	 * usual return values of lock state-machine methods, this can return
+	 * -ESTALE to indicate that lock cannot be returned to the cache, and
+	 * has to be re-initialized.
+	 * unuse is a one-shot operation, so it must NOT return CLO_WAIT.
+	 *
+	 * \see ccc_lock_unuse(), lov_lock_unuse(), osc_lock_unuse()
+	 */
+	int  (*clo_unuse)(const struct lu_env *env,
+			  const struct cl_lock_slice *slice);
+	/**
+	 * Notifies layer that cached lock is started being used.
+	 *
+	 * \pre lock->cll_state == CLS_CACHED
+	 *
+	 * \see lov_lock_use(), osc_lock_use()
+	 */
+	int  (*clo_use)(const struct lu_env *env,
+			const struct cl_lock_slice *slice);
+	/** @} statemachine */
+	/**
+	 * A method invoked when lock state is changed (as a result of state
+	 * transition). This is used, for example, to track when the state of
+	 * a sub-lock changes, to propagate this change to the corresponding
+	 * top-lock. Optional
+	 *
+	 * \see lovsub_lock_state()
+	 */
+	void (*clo_state)(const struct lu_env *env,
+			  const struct cl_lock_slice *slice,
+			  enum cl_lock_state st);
+	/**
+	 * Returns true, iff given lock is suitable for the given io, idea
+	 * being, that there are certain "unsafe" locks, e.g., ones acquired
+	 * for O_APPEND writes, that we don't want to re-use for a normal
+	 * write, to avoid the danger of cascading evictions. Optional. Runs
+	 * under cl_object_header::coh_lock_guard.
+	 *
+	 * XXX this should take more information about lock needed by
+	 * io. Probably lock description or something similar.
+	 *
+	 * \see lov_fits_into()
+	 */
+	int (*clo_fits_into)(const struct lu_env *env,
+			     const struct cl_lock_slice *slice,
+			     const struct cl_lock_descr *need,
+			     const struct cl_io *io);
+	/**
+	 * \name ast
+	 * Asynchronous System Traps. All of then are optional, all are
+	 * executed bottom-to-top.
+	 */
+	/** @{ */
+
+	/**
+	 * Cancellation callback. Cancel a lock voluntarily, or under
+	 * the request of server.
+	 */
+	void (*clo_cancel)(const struct lu_env *env,
+			   const struct cl_lock_slice *slice);
+	/**
+	 * Lock weighting ast. Executed to estimate how precious this lock
+	 * is. The sum of results across all layers is used to determine
+	 * whether lock worth keeping in cache given present memory usage.
+	 *
+	 * \see osc_lock_weigh(), vvp_lock_weigh(), lovsub_lock_weigh().
+	 */
+	unsigned long (*clo_weigh)(const struct lu_env *env,
+				   const struct cl_lock_slice *slice);
+	/** @} ast */
+
+	/**
+	 * \see lovsub_lock_closure()
+	 */
+	int (*clo_closure)(const struct lu_env *env,
+			   const struct cl_lock_slice *slice,
+			   struct cl_lock_closure *closure);
+	/**
+	 * Executed bottom-to-top when lock description changes (e.g., as a
+	 * result of server granting more generous lock than was requested).
+	 *
+	 * \see lovsub_lock_modify()
+	 */
+	int (*clo_modify)(const struct lu_env *env,
+			  const struct cl_lock_slice *slice,
+			  const struct cl_lock_descr *updated);
+	/**
+	 * Notifies layers (bottom-to-top) that lock is going to be
+	 * destroyed. Responsibility of layers is to prevent new references on
+	 * this lock from being acquired once this method returns.
+	 *
+	 * This can be called multiple times due to the races.
+	 *
+	 * \see cl_lock_delete()
+	 * \see osc_lock_delete(), lovsub_lock_delete()
+	 */
+	void (*clo_delete)(const struct lu_env *env,
+			   const struct cl_lock_slice *slice);
+	/**
+	 * Destructor. Frees resources and the slice.
+	 *
+	 * \see ccc_lock_fini(), lov_lock_fini(), lovsub_lock_fini(),
+	 * \see osc_lock_fini()
+	 */
+	void (*clo_fini)(const struct lu_env *env, struct cl_lock_slice *slice);
+	/**
+	 * Optional debugging helper. Prints given lock slice.
+	 */
+	int (*clo_print)(const struct lu_env *env,
+			 void *cookie, lu_printer_t p,
+			 const struct cl_lock_slice *slice);
+};
+
+#define CL_LOCK_DEBUG(mask, env, lock, format, ...)		     \
+do {								    \
+	LIBCFS_DEBUG_MSG_DATA_DECL(msgdata, mask, NULL);		\
+									\
+	if (cfs_cdebug_show(mask, DEBUG_SUBSYSTEM)) {		   \
+		cl_lock_print(env, &msgdata, lu_cdebug_printer, lock);  \
+		CDEBUG(mask, format , ## __VA_ARGS__);		  \
+	}							       \
+} while (0)
+
+#define CL_LOCK_ASSERT(expr, env, lock) do {			    \
+	if (likely(expr))					       \
+		break;						  \
+									\
+	CL_LOCK_DEBUG(D_ERROR, env, lock, "failed at %s.\n", #expr);    \
+	LBUG();							 \
+} while (0)
+
+/** @} cl_lock */
+
+/** \addtogroup cl_page_list cl_page_list
+ * Page list used to perform collective operations on a group of pages.
+ *
+ * Pages are added to the list one by one. cl_page_list acquires a reference
+ * for every page in it. Page list is used to perform collective operations on
+ * pages:
+ *
+ *     - submit pages for an immediate transfer,
+ *
+ *     - own pages on behalf of certain io (waiting for each page in turn),
+ *
+ *     - discard pages.
+ *
+ * When list is finalized, it releases references on all pages it still has.
+ *
+ * \todo XXX concurrency control.
+ *
+ * @{
+ */
+struct cl_page_list {
+	unsigned	     pl_nr;
+	struct list_head	   pl_pages;
+	task_t	  *pl_owner;
+};
+
+/**
+ * A 2-queue of pages. A convenience data-type for common use case, 2-queue
+ * contains an incoming page list and an outgoing page list.
+ */
+struct cl_2queue {
+	struct cl_page_list c2_qin;
+	struct cl_page_list c2_qout;
+};
+
+/** @} cl_page_list */
+
+/** \addtogroup cl_io cl_io
+ * @{ */
+/** \struct cl_io
+ * I/O
+ *
+ * cl_io represents a high level I/O activity like
+ * read(2)/write(2)/truncate(2) system call, or cancellation of an extent
+ * lock.
+ *
+ * cl_io is a layered object, much like cl_{object,page,lock} but with one
+ * important distinction. We want to minimize number of calls to the allocator
+ * in the fast path, e.g., in the case of read(2) when everything is cached:
+ * client already owns the lock over region being read, and data are cached
+ * due to read-ahead. To avoid allocation of cl_io layers in such situations,
+ * per-layer io state is stored in the session, associated with the io, see
+ * struct {vvp,lov,osc}_io for example. Sessions allocation is amortized
+ * by using free-lists, see cl_env_get().
+ *
+ * There is a small predefined number of possible io types, enumerated in enum
+ * cl_io_type.
+ *
+ * cl_io is a state machine, that can be advanced concurrently by the multiple
+ * threads. It is up to these threads to control the concurrency and,
+ * specifically, to detect when io is done, and its state can be safely
+ * released.
+ *
+ * For read/write io overall execution plan is as following:
+ *
+ *     (0) initialize io state through all layers;
+ *
+ *     (1) loop: prepare chunk of work to do
+ *
+ *     (2) call all layers to collect locks they need to process current chunk
+ *
+ *     (3) sort all locks to avoid dead-locks, and acquire them
+ *
+ *     (4) process the chunk: call per-page methods
+ *	 (cl_io_operations::cio_read_page() for read,
+ *	 cl_io_operations::cio_prepare_write(),
+ *	 cl_io_operations::cio_commit_write() for write)
+ *
+ *     (5) release locks
+ *
+ *     (6) repeat loop.
+ *
+ * To implement the "parallel IO mode", lov layer creates sub-io's (lazily to
+ * address allocation efficiency issues mentioned above), and returns with the
+ * special error condition from per-page method when current sub-io has to
+ * block. This causes io loop to be repeated, and lov switches to the next
+ * sub-io in its cl_io_operations::cio_iter_init() implementation.
+ */
+
+/** IO types */
+enum cl_io_type {
+	/** read system call */
+	CIT_READ,
+	/** write system call */
+	CIT_WRITE,
+	/** truncate, utime system calls */
+	CIT_SETATTR,
+	/**
+	 * page fault handling
+	 */
+	CIT_FAULT,
+	/**
+	 * fsync system call handling
+	 * To write out a range of file
+	 */
+	CIT_FSYNC,
+	/**
+	 * Miscellaneous io. This is used for occasional io activity that
+	 * doesn't fit into other types. Currently this is used for:
+	 *
+	 *     - cancellation of an extent lock. This io exists as a context
+	 *     to write dirty pages from under the lock being canceled back
+	 *     to the server;
+	 *
+	 *     - VM induced page write-out. An io context for writing page out
+	 *     for memory cleansing;
+	 *
+	 *     - glimpse. An io context to acquire glimpse lock.
+	 *
+	 *     - grouplock. An io context to acquire group lock.
+	 *
+	 * CIT_MISC io is used simply as a context in which locks and pages
+	 * are manipulated. Such io has no internal "process", that is,
+	 * cl_io_loop() is never called for it.
+	 */
+	CIT_MISC,
+	CIT_OP_NR
+};
+
+/**
+ * States of cl_io state machine
+ */
+enum cl_io_state {
+	/** Not initialized. */
+	CIS_ZERO,
+	/** Initialized. */
+	CIS_INIT,
+	/** IO iteration started. */
+	CIS_IT_STARTED,
+	/** Locks taken. */
+	CIS_LOCKED,
+	/** Actual IO is in progress. */
+	CIS_IO_GOING,
+	/** IO for the current iteration finished. */
+	CIS_IO_FINISHED,
+	/** Locks released. */
+	CIS_UNLOCKED,
+	/** Iteration completed. */
+	CIS_IT_ENDED,
+	/** cl_io finalized. */
+	CIS_FINI
+};
+
+/**
+ * IO state private for a layer.
+ *
+ * This is usually embedded into layer session data, rather than allocated
+ * dynamically.
+ *
+ * \see vvp_io, lov_io, osc_io, ccc_io
+ */
+struct cl_io_slice {
+	struct cl_io		  *cis_io;
+	/** corresponding object slice. Immutable after creation. */
+	struct cl_object	      *cis_obj;
+	/** io operations. Immutable after creation. */
+	const struct cl_io_operations *cis_iop;
+	/**
+	 * linkage into a list of all slices for a given cl_io, hanging off
+	 * cl_io::ci_layers. Immutable after creation.
+	 */
+	struct list_head		     cis_linkage;
+};
+
+
+/**
+ * Per-layer io operations.
+ * \see vvp_io_ops, lov_io_ops, lovsub_io_ops, osc_io_ops
+ */
+struct cl_io_operations {
+	/**
+	 * Vector of io state transition methods for every io type.
+	 *
+	 * \see cl_page_operations::io
+	 */
+	struct {
+		/**
+		 * Prepare io iteration at a given layer.
+		 *
+		 * Called top-to-bottom at the beginning of each iteration of
+		 * "io loop" (if it makes sense for this type of io). Here
+		 * layer selects what work it will do during this iteration.
+		 *
+		 * \see cl_io_operations::cio_iter_fini()
+		 */
+		int (*cio_iter_init) (const struct lu_env *env,
+				      const struct cl_io_slice *slice);
+		/**
+		 * Finalize io iteration.
+		 *
+		 * Called bottom-to-top at the end of each iteration of "io
+		 * loop". Here layers can decide whether IO has to be
+		 * continued.
+		 *
+		 * \see cl_io_operations::cio_iter_init()
+		 */
+		void (*cio_iter_fini) (const struct lu_env *env,
+				       const struct cl_io_slice *slice);
+		/**
+		 * Collect locks for the current iteration of io.
+		 *
+		 * Called top-to-bottom to collect all locks necessary for
+		 * this iteration. This methods shouldn't actually enqueue
+		 * anything, instead it should post a lock through
+		 * cl_io_lock_add(). Once all locks are collected, they are
+		 * sorted and enqueued in the proper order.
+		 */
+		int  (*cio_lock) (const struct lu_env *env,
+				  const struct cl_io_slice *slice);
+		/**
+		 * Finalize unlocking.
+		 *
+		 * Called bottom-to-top to finish layer specific unlocking
+		 * functionality, after generic code released all locks
+		 * acquired by cl_io_operations::cio_lock().
+		 */
+		void  (*cio_unlock)(const struct lu_env *env,
+				    const struct cl_io_slice *slice);
+		/**
+		 * Start io iteration.
+		 *
+		 * Once all locks are acquired, called top-to-bottom to
+		 * commence actual IO. In the current implementation,
+		 * top-level vvp_io_{read,write}_start() does all the work
+		 * synchronously by calling generic_file_*(), so other layers
+		 * are called when everything is done.
+		 */
+		int  (*cio_start)(const struct lu_env *env,
+				  const struct cl_io_slice *slice);
+		/**
+		 * Called top-to-bottom at the end of io loop. Here layer
+		 * might wait for an unfinished asynchronous io.
+		 */
+		void (*cio_end)  (const struct lu_env *env,
+				  const struct cl_io_slice *slice);
+		/**
+		 * Called bottom-to-top to notify layers that read/write IO
+		 * iteration finished, with \a nob bytes transferred.
+		 */
+		void (*cio_advance)(const struct lu_env *env,
+				    const struct cl_io_slice *slice,
+				    size_t nob);
+		/**
+		 * Called once per io, bottom-to-top to release io resources.
+		 */
+		void (*cio_fini) (const struct lu_env *env,
+				  const struct cl_io_slice *slice);
+	} op[CIT_OP_NR];
+	struct {
+		/**
+		 * Submit pages from \a queue->c2_qin for IO, and move
+		 * successfully submitted pages into \a queue->c2_qout. Return
+		 * non-zero if failed to submit even the single page. If
+		 * submission failed after some pages were moved into \a
+		 * queue->c2_qout, completion callback with non-zero ioret is
+		 * executed on them.
+		 */
+		int  (*cio_submit)(const struct lu_env *env,
+				   const struct cl_io_slice *slice,
+				   enum cl_req_type crt,
+				   struct cl_2queue *queue);
+	} req_op[CRT_NR];
+	/**
+	 * Read missing page.
+	 *
+	 * Called by a top-level cl_io_operations::op[CIT_READ]::cio_start()
+	 * method, when it hits not-up-to-date page in the range. Optional.
+	 *
+	 * \pre io->ci_type == CIT_READ
+	 */
+	int (*cio_read_page)(const struct lu_env *env,
+			     const struct cl_io_slice *slice,
+			     const struct cl_page_slice *page);
+	/**
+	 * Prepare write of a \a page. Called bottom-to-top by a top-level
+	 * cl_io_operations::op[CIT_WRITE]::cio_start() to prepare page for
+	 * get data from user-level buffer.
+	 *
+	 * \pre io->ci_type == CIT_WRITE
+	 *
+	 * \see vvp_io_prepare_write(), lov_io_prepare_write(),
+	 * osc_io_prepare_write().
+	 */
+	int (*cio_prepare_write)(const struct lu_env *env,
+				 const struct cl_io_slice *slice,
+				 const struct cl_page_slice *page,
+				 unsigned from, unsigned to);
+	/**
+	 *
+	 * \pre io->ci_type == CIT_WRITE
+	 *
+	 * \see vvp_io_commit_write(), lov_io_commit_write(),
+	 * osc_io_commit_write().
+	 */
+	int (*cio_commit_write)(const struct lu_env *env,
+				const struct cl_io_slice *slice,
+				const struct cl_page_slice *page,
+				unsigned from, unsigned to);
+	/**
+	 * Optional debugging helper. Print given io slice.
+	 */
+	int (*cio_print)(const struct lu_env *env, void *cookie,
+			 lu_printer_t p, const struct cl_io_slice *slice);
+};
+
+/**
+ * Flags to lock enqueue procedure.
+ * \ingroup cl_lock
+ */
+enum cl_enq_flags {
+	/**
+	 * instruct server to not block, if conflicting lock is found. Instead
+	 * -EWOULDBLOCK is returned immediately.
+	 */
+	CEF_NONBLOCK     = 0x00000001,
+	/**
+	 * take lock asynchronously (out of order), as it cannot
+	 * deadlock. This is for LDLM_FL_HAS_INTENT locks used for glimpsing.
+	 */
+	CEF_ASYNC	= 0x00000002,
+	/**
+	 * tell the server to instruct (though a flag in the blocking ast) an
+	 * owner of the conflicting lock, that it can drop dirty pages
+	 * protected by this lock, without sending them to the server.
+	 */
+	CEF_DISCARD_DATA = 0x00000004,
+	/**
+	 * tell the sub layers that it must be a `real' lock. This is used for
+	 * mmapped-buffer locks and glimpse locks that must be never converted
+	 * into lockless mode.
+	 *
+	 * \see vvp_mmap_locks(), cl_glimpse_lock().
+	 */
+	CEF_MUST	 = 0x00000008,
+	/**
+	 * tell the sub layers that never request a `real' lock. This flag is
+	 * not used currently.
+	 *
+	 * cl_io::ci_lockreq and CEF_{MUST,NEVER} flags specify lockless
+	 * conversion policy: ci_lockreq describes generic information of lock
+	 * requirement for this IO, especially for locks which belong to the
+	 * object doing IO; however, lock itself may have precise requirements
+	 * that are described by the enqueue flags.
+	 */
+	CEF_NEVER	= 0x00000010,
+	/**
+	 * for async glimpse lock.
+	 */
+	CEF_AGL	  = 0x00000020,
+	/**
+	 * mask of enq_flags.
+	 */
+	CEF_MASK	 = 0x0000003f,
+};
+
+/**
+ * Link between lock and io. Intermediate structure is needed, because the
+ * same lock can be part of multiple io's simultaneously.
+ */
+struct cl_io_lock_link {
+	/** linkage into one of cl_lockset lists. */
+	struct list_head	   cill_linkage;
+	struct cl_lock_descr cill_descr;
+	struct cl_lock      *cill_lock;
+	/** optional destructor */
+	void	       (*cill_fini)(const struct lu_env *env,
+					struct cl_io_lock_link *link);
+};
+
+/**
+ * Lock-set represents a collection of locks, that io needs at a
+ * time. Generally speaking, client tries to avoid holding multiple locks when
+ * possible, because
+ *
+ *      - holding extent locks over multiple ost's introduces the danger of
+ *	"cascading timeouts";
+ *
+ *      - holding multiple locks over the same ost is still dead-lock prone,
+ *	see comment in osc_lock_enqueue(),
+ *
+ * but there are certain situations where this is unavoidable:
+ *
+ *      - O_APPEND writes have to take [0, EOF] lock for correctness;
+ *
+ *      - truncate has to take [new-size, EOF] lock for correctness;
+ *
+ *      - SNS has to take locks across full stripe for correctness;
+ *
+ *      - in the case when user level buffer, supplied to {read,write}(file0),
+ *	is a part of a memory mapped lustre file, client has to take a dlm
+ *	locks on file0, and all files that back up the buffer (or a part of
+ *	the buffer, that is being processed in the current chunk, in any
+ *	case, there are situations where at least 2 locks are necessary).
+ *
+ * In such cases we at least try to take locks in the same consistent
+ * order. To this end, all locks are first collected, then sorted, and then
+ * enqueued.
+ */
+struct cl_lockset {
+	/** locks to be acquired. */
+	struct list_head  cls_todo;
+	/** locks currently being processed. */
+	struct list_head  cls_curr;
+	/** locks acquired. */
+	struct list_head  cls_done;
+};
+
+/**
+ * Lock requirements(demand) for IO. It should be cl_io_lock_req,
+ * but 'req' is always to be thought as 'request' :-)
+ */
+enum cl_io_lock_dmd {
+	/** Always lock data (e.g., O_APPEND). */
+	CILR_MANDATORY = 0,
+	/** Layers are free to decide between local and global locking. */
+	CILR_MAYBE,
+	/** Never lock: there is no cache (e.g., liblustre). */
+	CILR_NEVER
+};
+
+enum cl_fsync_mode {
+	/** start writeback, do not wait for them to finish */
+	CL_FSYNC_NONE  = 0,
+	/** start writeback and wait for them to finish */
+	CL_FSYNC_LOCAL = 1,
+	/** discard all of dirty pages in a specific file range */
+	CL_FSYNC_DISCARD = 2,
+	/** start writeback and make sure they have reached storage before
+	 * return. OST_SYNC RPC must be issued and finished */
+	CL_FSYNC_ALL   = 3
+};
+
+struct cl_io_rw_common {
+	loff_t      crw_pos;
+	size_t      crw_count;
+	int	 crw_nonblock;
+};
+
+
+/**
+ * State for io.
+ *
+ * cl_io is shared by all threads participating in this IO (in current
+ * implementation only one thread advances IO, but parallel IO design and
+ * concurrent copy_*_user() require multiple threads acting on the same IO. It
+ * is up to these threads to serialize their activities, including updates to
+ * mutable cl_io fields.
+ */
+struct cl_io {
+	/** type of this IO. Immutable after creation. */
+	enum cl_io_type		ci_type;
+	/** current state of cl_io state machine. */
+	enum cl_io_state	       ci_state;
+	/** main object this io is against. Immutable after creation. */
+	struct cl_object	      *ci_obj;
+	/**
+	 * Upper layer io, of which this io is a part of. Immutable after
+	 * creation.
+	 */
+	struct cl_io		  *ci_parent;
+	/** List of slices. Immutable after creation. */
+	struct list_head		     ci_layers;
+	/** list of locks (to be) acquired by this io. */
+	struct cl_lockset	      ci_lockset;
+	/** lock requirements, this is just a help info for sublayers. */
+	enum cl_io_lock_dmd	    ci_lockreq;
+	union {
+		struct cl_rd_io {
+			struct cl_io_rw_common rd;
+		} ci_rd;
+		struct cl_wr_io {
+			struct cl_io_rw_common wr;
+			int		    wr_append;
+			int		    wr_sync;
+		} ci_wr;
+		struct cl_io_rw_common ci_rw;
+		struct cl_setattr_io {
+			struct ost_lvb   sa_attr;
+			unsigned int     sa_valid;
+			struct obd_capa *sa_capa;
+		} ci_setattr;
+		struct cl_fault_io {
+			/** page index within file. */
+			pgoff_t	 ft_index;
+			/** bytes valid byte on a faulted page. */
+			int	     ft_nob;
+			/** writable page? for nopage() only */
+			int	     ft_writable;
+			/** page of an executable? */
+			int	     ft_executable;
+			/** page_mkwrite() */
+			int	     ft_mkwrite;
+			/** resulting page */
+			struct cl_page *ft_page;
+		} ci_fault;
+		struct cl_fsync_io {
+			loff_t	     fi_start;
+			loff_t	     fi_end;
+			struct obd_capa   *fi_capa;
+			/** file system level fid */
+			struct lu_fid     *fi_fid;
+			enum cl_fsync_mode fi_mode;
+			/* how many pages were written/discarded */
+			unsigned int       fi_nr_written;
+		} ci_fsync;
+	} u;
+	struct cl_2queue     ci_queue;
+	size_t	       ci_nob;
+	int		  ci_result;
+	unsigned int	 ci_continue:1,
+	/**
+	 * This io has held grouplock, to inform sublayers that
+	 * don't do lockless i/o.
+	 */
+			     ci_no_srvlock:1,
+	/**
+	 * The whole IO need to be restarted because layout has been changed
+	 */
+			     ci_need_restart:1,
+	/**
+	 * to not refresh layout - the IO issuer knows that the layout won't
+	 * change(page operations, layout change causes all page to be
+	 * discarded), or it doesn't matter if it changes(sync).
+	 */
+			     ci_ignore_layout:1,
+	/**
+	 * Check if layout changed after the IO finishes. Mainly for HSM
+	 * requirement. If IO occurs to openning files, it doesn't need to
+	 * verify layout because HSM won't release openning files.
+	 * Right now, only two opertaions need to verify layout: glimpse
+	 * and setattr.
+	 */
+			     ci_verify_layout:1;
+	/**
+	 * Number of pages owned by this IO. For invariant checking.
+	 */
+	unsigned	     ci_owned_nr;
+};
+
+/** @} cl_io */
+
+/** \addtogroup cl_req cl_req
+ * @{ */
+/** \struct cl_req
+ * Transfer.
+ *
+ * There are two possible modes of transfer initiation on the client:
+ *
+ *     - immediate transfer: this is started when a high level io wants a page
+ *       or a collection of pages to be transferred right away. Examples:
+ *       read-ahead, synchronous read in the case of non-page aligned write,
+ *       page write-out as a part of extent lock cancellation, page write-out
+ *       as a part of memory cleansing. Immediate transfer can be both
+ *       cl_req_type::CRT_READ and cl_req_type::CRT_WRITE;
+ *
+ *     - opportunistic transfer (cl_req_type::CRT_WRITE only), that happens
+ *       when io wants to transfer a page to the server some time later, when
+ *       it can be done efficiently. Example: pages dirtied by the write(2)
+ *       path.
+ *
+ * In any case, transfer takes place in the form of a cl_req, which is a
+ * representation for a network RPC.
+ *
+ * Pages queued for an opportunistic transfer are cached until it is decided
+ * that efficient RPC can be composed of them. This decision is made by "a
+ * req-formation engine", currently implemented as a part of osc
+ * layer. Req-formation depends on many factors: the size of the resulting
+ * RPC, whether or not multi-object RPCs are supported by the server,
+ * max-rpc-in-flight limitations, size of the dirty cache, etc.
+ *
+ * For the immediate transfer io submits a cl_page_list, that req-formation
+ * engine slices into cl_req's, possibly adding cached pages to some of
+ * the resulting req's.
+ *
+ * Whenever a page from cl_page_list is added to a newly constructed req, its
+ * cl_page_operations::cpo_prep() layer methods are called. At that moment,
+ * page state is atomically changed from cl_page_state::CPS_OWNED to
+ * cl_page_state::CPS_PAGEOUT or cl_page_state::CPS_PAGEIN, cl_page::cp_owner
+ * is zeroed, and cl_page::cp_req is set to the
+ * req. cl_page_operations::cpo_prep() method at the particular layer might
+ * return -EALREADY to indicate that it does not need to submit this page
+ * at all. This is possible, for example, if page, submitted for read,
+ * became up-to-date in the meantime; and for write, the page don't have
+ * dirty bit marked. \see cl_io_submit_rw()
+ *
+ * Whenever a cached page is added to a newly constructed req, its
+ * cl_page_operations::cpo_make_ready() layer methods are called. At that
+ * moment, page state is atomically changed from cl_page_state::CPS_CACHED to
+ * cl_page_state::CPS_PAGEOUT, and cl_page::cp_req is set to
+ * req. cl_page_operations::cpo_make_ready() method at the particular layer
+ * might return -EAGAIN to indicate that this page is not eligible for the
+ * transfer right now.
+ *
+ * FUTURE
+ *
+ * Plan is to divide transfers into "priority bands" (indicated when
+ * submitting cl_page_list, and queuing a page for the opportunistic transfer)
+ * and allow glueing of cached pages to immediate transfers only within single
+ * band. This would make high priority transfers (like lock cancellation or
+ * memory pressure induced write-out) really high priority.
+ *
+ */
+
+/**
+ * Per-transfer attributes.
+ */
+struct cl_req_attr {
+	/** Generic attributes for the server consumption. */
+	struct obdo	*cra_oa;
+	/** Capability. */
+	struct obd_capa	*cra_capa;
+	/** Jobid */
+	char		 cra_jobid[JOBSTATS_JOBID_SIZE];
+};
+
+/**
+ * Transfer request operations definable at every layer.
+ *
+ * Concurrency: transfer formation engine synchronizes calls to all transfer
+ * methods.
+ */
+struct cl_req_operations {
+	/**
+	 * Invoked top-to-bottom by cl_req_prep() when transfer formation is
+	 * complete (all pages are added).
+	 *
+	 * \see osc_req_prep()
+	 */
+	int  (*cro_prep)(const struct lu_env *env,
+			 const struct cl_req_slice *slice);
+	/**
+	 * Called top-to-bottom to fill in \a oa fields. This is called twice
+	 * with different flags, see bug 10150 and osc_build_req().
+	 *
+	 * \param obj an object from cl_req which attributes are to be set in
+	 *	    \a oa.
+	 *
+	 * \param oa struct obdo where attributes are placed
+	 *
+	 * \param flags \a oa fields to be filled.
+	 */
+	void (*cro_attr_set)(const struct lu_env *env,
+			     const struct cl_req_slice *slice,
+			     const struct cl_object *obj,
+			     struct cl_req_attr *attr, obd_valid flags);
+	/**
+	 * Called top-to-bottom from cl_req_completion() to notify layers that
+	 * transfer completed. Has to free all state allocated by
+	 * cl_device_operations::cdo_req_init().
+	 */
+	void (*cro_completion)(const struct lu_env *env,
+			       const struct cl_req_slice *slice, int ioret);
+};
+
+/**
+ * A per-object state that (potentially multi-object) transfer request keeps.
+ */
+struct cl_req_obj {
+	/** object itself */
+	struct cl_object   *ro_obj;
+	/** reference to cl_req_obj::ro_obj. For debugging. */
+	struct lu_ref_link *ro_obj_ref;
+	/* something else? Number of pages for a given object? */
+};
+
+/**
+ * Transfer request.
+ *
+ * Transfer requests are not reference counted, because IO sub-system owns
+ * them exclusively and knows when to free them.
+ *
+ * Life cycle.
+ *
+ * cl_req is created by cl_req_alloc() that calls
+ * cl_device_operations::cdo_req_init() device methods to allocate per-req
+ * state in every layer.
+ *
+ * Then pages are added (cl_req_page_add()), req keeps track of all objects it
+ * contains pages for.
+ *
+ * Once all pages were collected, cl_page_operations::cpo_prep() method is
+ * called top-to-bottom. At that point layers can modify req, let it pass, or
+ * deny it completely. This is to support things like SNS that have transfer
+ * ordering requirements invisible to the individual req-formation engine.
+ *
+ * On transfer completion (or transfer timeout, or failure to initiate the
+ * transfer of an allocated req), cl_req_operations::cro_completion() method
+ * is called, after execution of cl_page_operations::cpo_completion() of all
+ * req's pages.
+ */
+struct cl_req {
+	enum cl_req_type      crq_type;
+	/** A list of pages being transfered */
+	struct list_head	    crq_pages;
+	/** Number of pages in cl_req::crq_pages */
+	unsigned	      crq_nrpages;
+	/** An array of objects which pages are in ->crq_pages */
+	struct cl_req_obj    *crq_o;
+	/** Number of elements in cl_req::crq_objs[] */
+	unsigned	      crq_nrobjs;
+	struct list_head	    crq_layers;
+};
+
+/**
+ * Per-layer state for request.
+ */
+struct cl_req_slice {
+	struct cl_req    *crs_req;
+	struct cl_device *crs_dev;
+	struct list_head	crs_linkage;
+	const struct cl_req_operations *crs_ops;
+};
+
+/* @} cl_req */
+
+enum cache_stats_item {
+	/** how many cache lookups were performed */
+	CS_lookup = 0,
+	/** how many times cache lookup resulted in a hit */
+	CS_hit,
+	/** how many entities are in the cache right now */
+	CS_total,
+	/** how many entities in the cache are actively used (and cannot be
+	 * evicted) right now */
+	CS_busy,
+	/** how many entities were created at all */
+	CS_create,
+	CS_NR
+};
+
+#define CS_NAMES { "lookup", "hit", "total", "busy", "create" }
+
+/**
+ * Stats for a generic cache (similar to inode, lu_object, etc. caches).
+ */
+struct cache_stats {
+	const char    *cs_name;
+	atomic_t   cs_stats[CS_NR];
+};
+
+/** These are not exported so far */
+void cache_stats_init (struct cache_stats *cs, const char *name);
+
+/**
+ * Client-side site. This represents particular client stack. "Global"
+ * variables should (directly or indirectly) be added here to allow multiple
+ * clients to co-exist in the single address space.
+ */
+struct cl_site {
+	struct lu_site	cs_lu;
+	/**
+	 * Statistical counters. Atomics do not scale, something better like
+	 * per-cpu counters is needed.
+	 *
+	 * These are exported as /proc/fs/lustre/llite/.../site
+	 *
+	 * When interpreting keep in mind that both sub-locks (and sub-pages)
+	 * and top-locks (and top-pages) are accounted here.
+	 */
+	struct cache_stats    cs_pages;
+	struct cache_stats    cs_locks;
+	atomic_t	  cs_pages_state[CPS_NR];
+	atomic_t	  cs_locks_state[CLS_NR];
+};
+
+int  cl_site_init (struct cl_site *s, struct cl_device *top);
+void cl_site_fini (struct cl_site *s);
+void cl_stack_fini(const struct lu_env *env, struct cl_device *cl);
+
+/**
+ * Output client site statistical counters into a buffer. Suitable for
+ * ll_rd_*()-style functions.
+ */
+int cl_site_stats_print(const struct cl_site *site, struct seq_file *m);
+
+/**
+ * \name helpers
+ *
+ * Type conversion and accessory functions.
+ */
+/** @{ */
+
+static inline struct cl_site *lu2cl_site(const struct lu_site *site)
+{
+	return container_of(site, struct cl_site, cs_lu);
+}
+
+static inline int lu_device_is_cl(const struct lu_device *d)
+{
+	return d->ld_type->ldt_tags & LU_DEVICE_CL;
+}
+
+static inline struct cl_device *lu2cl_dev(const struct lu_device *d)
+{
+	LASSERT(d == NULL || IS_ERR(d) || lu_device_is_cl(d));
+	return container_of0(d, struct cl_device, cd_lu_dev);
+}
+
+static inline struct lu_device *cl2lu_dev(struct cl_device *d)
+{
+	return &d->cd_lu_dev;
+}
+
+static inline struct cl_object *lu2cl(const struct lu_object *o)
+{
+	LASSERT(o == NULL || IS_ERR(o) || lu_device_is_cl(o->lo_dev));
+	return container_of0(o, struct cl_object, co_lu);
+}
+
+static inline const struct cl_object_conf *
+lu2cl_conf(const struct lu_object_conf *conf)
+{
+	return container_of0(conf, struct cl_object_conf, coc_lu);
+}
+
+static inline struct cl_object *cl_object_next(const struct cl_object *obj)
+{
+	return obj ? lu2cl(lu_object_next(&obj->co_lu)) : NULL;
+}
+
+static inline struct cl_device *cl_object_device(const struct cl_object *o)
+{
+	LASSERT(o == NULL || IS_ERR(o) || lu_device_is_cl(o->co_lu.lo_dev));
+	return container_of0(o->co_lu.lo_dev, struct cl_device, cd_lu_dev);
+}
+
+static inline struct cl_object_header *luh2coh(const struct lu_object_header *h)
+{
+	return container_of0(h, struct cl_object_header, coh_lu);
+}
+
+static inline struct cl_site *cl_object_site(const struct cl_object *obj)
+{
+	return lu2cl_site(obj->co_lu.lo_dev->ld_site);
+}
+
+static inline
+struct cl_object_header *cl_object_header(const struct cl_object *obj)
+{
+	return luh2coh(obj->co_lu.lo_header);
+}
+
+static inline int cl_device_init(struct cl_device *d, struct lu_device_type *t)
+{
+	return lu_device_init(&d->cd_lu_dev, t);
+}
+
+static inline void cl_device_fini(struct cl_device *d)
+{
+	lu_device_fini(&d->cd_lu_dev);
+}
+
+void cl_page_slice_add(struct cl_page *page, struct cl_page_slice *slice,
+		       struct cl_object *obj,
+		       const struct cl_page_operations *ops);
+void cl_lock_slice_add(struct cl_lock *lock, struct cl_lock_slice *slice,
+		       struct cl_object *obj,
+		       const struct cl_lock_operations *ops);
+void cl_io_slice_add(struct cl_io *io, struct cl_io_slice *slice,
+		     struct cl_object *obj, const struct cl_io_operations *ops);
+void cl_req_slice_add(struct cl_req *req, struct cl_req_slice *slice,
+		      struct cl_device *dev,
+		      const struct cl_req_operations *ops);
+/** @} helpers */
+
+/** \defgroup cl_object cl_object
+ * @{ */
+struct cl_object *cl_object_top (struct cl_object *o);
+struct cl_object *cl_object_find(const struct lu_env *env, struct cl_device *cd,
+				 const struct lu_fid *fid,
+				 const struct cl_object_conf *c);
+
+int  cl_object_header_init(struct cl_object_header *h);
+void cl_object_header_fini(struct cl_object_header *h);
+void cl_object_put	(const struct lu_env *env, struct cl_object *o);
+void cl_object_get	(struct cl_object *o);
+void cl_object_attr_lock  (struct cl_object *o);
+void cl_object_attr_unlock(struct cl_object *o);
+int  cl_object_attr_get   (const struct lu_env *env, struct cl_object *obj,
+			   struct cl_attr *attr);
+int  cl_object_attr_set   (const struct lu_env *env, struct cl_object *obj,
+			   const struct cl_attr *attr, unsigned valid);
+int  cl_object_glimpse    (const struct lu_env *env, struct cl_object *obj,
+			   struct ost_lvb *lvb);
+int  cl_conf_set	  (const struct lu_env *env, struct cl_object *obj,
+			   const struct cl_object_conf *conf);
+void cl_object_prune      (const struct lu_env *env, struct cl_object *obj);
+void cl_object_kill       (const struct lu_env *env, struct cl_object *obj);
+int  cl_object_has_locks  (struct cl_object *obj);
+
+/**
+ * Returns true, iff \a o0 and \a o1 are slices of the same object.
+ */
+static inline int cl_object_same(struct cl_object *o0, struct cl_object *o1)
+{
+	return cl_object_header(o0) == cl_object_header(o1);
+}
+
+static inline void cl_object_page_init(struct cl_object *clob, int size)
+{
+	clob->co_slice_off = cl_object_header(clob)->coh_page_bufsize;
+	cl_object_header(clob)->coh_page_bufsize += ALIGN(size, 8);
+}
+
+static inline void *cl_object_page_slice(struct cl_object *clob,
+					 struct cl_page *page)
+{
+	return (void *)((char *)page + clob->co_slice_off);
+}
+
+/** @} cl_object */
+
+/** \defgroup cl_page cl_page
+ * @{ */
+enum {
+	CLP_GANG_OKAY = 0,
+	CLP_GANG_RESCHED,
+	CLP_GANG_AGAIN,
+	CLP_GANG_ABORT
+};
+
+/* callback of cl_page_gang_lookup() */
+typedef int   (*cl_page_gang_cb_t)  (const struct lu_env *, struct cl_io *,
+				     struct cl_page *, void *);
+int	     cl_page_gang_lookup (const struct lu_env *env,
+				     struct cl_object *obj,
+				     struct cl_io *io,
+				     pgoff_t start, pgoff_t end,
+				     cl_page_gang_cb_t cb, void *cbdata);
+struct cl_page *cl_page_lookup      (struct cl_object_header *hdr,
+				     pgoff_t index);
+struct cl_page *cl_page_find	(const struct lu_env *env,
+				     struct cl_object *obj,
+				     pgoff_t idx, struct page *vmpage,
+				     enum cl_page_type type);
+struct cl_page *cl_page_find_sub    (const struct lu_env *env,
+				     struct cl_object *obj,
+				     pgoff_t idx, struct page *vmpage,
+				     struct cl_page *parent);
+void	    cl_page_get	 (struct cl_page *page);
+void	    cl_page_put	 (const struct lu_env *env,
+				     struct cl_page *page);
+void	    cl_page_print       (const struct lu_env *env, void *cookie,
+				     lu_printer_t printer,
+				     const struct cl_page *pg);
+void	    cl_page_header_print(const struct lu_env *env, void *cookie,
+				     lu_printer_t printer,
+				     const struct cl_page *pg);
+struct page     *cl_page_vmpage      (const struct lu_env *env,
+				     struct cl_page *page);
+struct cl_page *cl_vmpage_page      (struct page *vmpage, struct cl_object *obj);
+struct cl_page *cl_page_top	 (struct cl_page *page);
+
+const struct cl_page_slice *cl_page_at(const struct cl_page *page,
+				       const struct lu_device_type *dtype);
+
+/**
+ * \name ownership
+ *
+ * Functions dealing with the ownership of page by io.
+ */
+/** @{ */
+
+int  cl_page_own	(const struct lu_env *env,
+			 struct cl_io *io, struct cl_page *page);
+int  cl_page_own_try    (const struct lu_env *env,
+			 struct cl_io *io, struct cl_page *page);
+void cl_page_assume     (const struct lu_env *env,
+			 struct cl_io *io, struct cl_page *page);
+void cl_page_unassume   (const struct lu_env *env,
+			 struct cl_io *io, struct cl_page *pg);
+void cl_page_disown     (const struct lu_env *env,
+			 struct cl_io *io, struct cl_page *page);
+int  cl_page_is_owned   (const struct cl_page *pg, const struct cl_io *io);
+
+/** @} ownership */
+
+/**
+ * \name transfer
+ *
+ * Functions dealing with the preparation of a page for a transfer, and
+ * tracking transfer state.
+ */
+/** @{ */
+int  cl_page_prep       (const struct lu_env *env, struct cl_io *io,
+			 struct cl_page *pg, enum cl_req_type crt);
+void cl_page_completion (const struct lu_env *env,
+			 struct cl_page *pg, enum cl_req_type crt, int ioret);
+int  cl_page_make_ready (const struct lu_env *env, struct cl_page *pg,
+			 enum cl_req_type crt);
+int  cl_page_cache_add  (const struct lu_env *env, struct cl_io *io,
+			 struct cl_page *pg, enum cl_req_type crt);
+void cl_page_clip       (const struct lu_env *env, struct cl_page *pg,
+			 int from, int to);
+int  cl_page_cancel     (const struct lu_env *env, struct cl_page *page);
+int  cl_page_flush      (const struct lu_env *env, struct cl_io *io,
+			 struct cl_page *pg);
+
+/** @} transfer */
+
+
+/**
+ * \name helper routines
+ * Functions to discard, delete and export a cl_page.
+ */
+/** @{ */
+void    cl_page_discard      (const struct lu_env *env, struct cl_io *io,
+			      struct cl_page *pg);
+void    cl_page_delete       (const struct lu_env *env, struct cl_page *pg);
+int     cl_page_unmap	(const struct lu_env *env, struct cl_io *io,
+			      struct cl_page *pg);
+int     cl_page_is_vmlocked  (const struct lu_env *env,
+			      const struct cl_page *pg);
+void    cl_page_export       (const struct lu_env *env,
+			      struct cl_page *pg, int uptodate);
+int     cl_page_is_under_lock(const struct lu_env *env, struct cl_io *io,
+			      struct cl_page *page);
+loff_t  cl_offset	    (const struct cl_object *obj, pgoff_t idx);
+pgoff_t cl_index	     (const struct cl_object *obj, loff_t offset);
+int     cl_page_size	 (const struct cl_object *obj);
+int     cl_pages_prune       (const struct lu_env *env, struct cl_object *obj);
+
+void cl_lock_print      (const struct lu_env *env, void *cookie,
+			 lu_printer_t printer, const struct cl_lock *lock);
+void cl_lock_descr_print(const struct lu_env *env, void *cookie,
+			 lu_printer_t printer,
+			 const struct cl_lock_descr *descr);
+/* @} helper */
+
+/** @} cl_page */
+
+/** \defgroup cl_lock cl_lock
+ * @{ */
+
+struct cl_lock *cl_lock_hold(const struct lu_env *env, const struct cl_io *io,
+			     const struct cl_lock_descr *need,
+			     const char *scope, const void *source);
+struct cl_lock *cl_lock_peek(const struct lu_env *env, const struct cl_io *io,
+			     const struct cl_lock_descr *need,
+			     const char *scope, const void *source);
+struct cl_lock *cl_lock_request(const struct lu_env *env, struct cl_io *io,
+				const struct cl_lock_descr *need,
+				const char *scope, const void *source);
+struct cl_lock *cl_lock_at_pgoff(const struct lu_env *env,
+				 struct cl_object *obj, pgoff_t index,
+				 struct cl_lock *except, int pending,
+				 int canceld);
+static inline struct cl_lock *cl_lock_at_page(const struct lu_env *env,
+					      struct cl_object *obj,
+					      struct cl_page *page,
+					      struct cl_lock *except,
+					      int pending, int canceld)
+{
+	LASSERT(cl_object_header(obj) == cl_object_header(page->cp_obj));
+	return cl_lock_at_pgoff(env, obj, page->cp_index, except,
+				pending, canceld);
+}
+
+const struct cl_lock_slice *cl_lock_at(const struct cl_lock *lock,
+				       const struct lu_device_type *dtype);
+
+void  cl_lock_get       (struct cl_lock *lock);
+void  cl_lock_get_trust (struct cl_lock *lock);
+void  cl_lock_put       (const struct lu_env *env, struct cl_lock *lock);
+void  cl_lock_hold_add  (const struct lu_env *env, struct cl_lock *lock,
+			 const char *scope, const void *source);
+void cl_lock_hold_release(const struct lu_env *env, struct cl_lock *lock,
+			  const char *scope, const void *source);
+void  cl_lock_unhold    (const struct lu_env *env, struct cl_lock *lock,
+			 const char *scope, const void *source);
+void  cl_lock_release   (const struct lu_env *env, struct cl_lock *lock,
+			 const char *scope, const void *source);
+void  cl_lock_user_add  (const struct lu_env *env, struct cl_lock *lock);
+void  cl_lock_user_del  (const struct lu_env *env, struct cl_lock *lock);
+
+enum cl_lock_state cl_lock_intransit(const struct lu_env *env,
+				     struct cl_lock *lock);
+void cl_lock_extransit(const struct lu_env *env, struct cl_lock *lock,
+		       enum cl_lock_state state);
+int cl_lock_is_intransit(struct cl_lock *lock);
+
+int cl_lock_enqueue_wait(const struct lu_env *env, struct cl_lock *lock,
+			 int keep_mutex);
+
+/** \name statemachine statemachine
+ * Interface to lock state machine consists of 3 parts:
+ *
+ *     - "try" functions that attempt to effect a state transition. If state
+ *     transition is not possible right now (e.g., if it has to wait for some
+ *     asynchronous event to occur), these functions return
+ *     cl_lock_transition::CLO_WAIT.
+ *
+ *     - "non-try" functions that implement synchronous blocking interface on
+ *     top of non-blocking "try" functions. These functions repeatedly call
+ *     corresponding "try" versions, and if state transition is not possible
+ *     immediately, wait for lock state change.
+ *
+ *     - methods from cl_lock_operations, called by "try" functions. Lock can
+ *     be advanced to the target state only when all layers voted that they
+ *     are ready for this transition. "Try" functions call methods under lock
+ *     mutex. If a layer had to release a mutex, it re-acquires it and returns
+ *     cl_lock_transition::CLO_REPEAT, causing "try" function to call all
+ *     layers again.
+ *
+ * TRY	      NON-TRY      METHOD			    FINAL STATE
+ *
+ * cl_enqueue_try() cl_enqueue() cl_lock_operations::clo_enqueue() CLS_ENQUEUED
+ *
+ * cl_wait_try()    cl_wait()    cl_lock_operations::clo_wait()    CLS_HELD
+ *
+ * cl_unuse_try()   cl_unuse()   cl_lock_operations::clo_unuse()   CLS_CACHED
+ *
+ * cl_use_try()     NONE	 cl_lock_operations::clo_use()     CLS_HELD
+ *
+ * @{ */
+
+int   cl_enqueue    (const struct lu_env *env, struct cl_lock *lock,
+		     struct cl_io *io, __u32 flags);
+int   cl_wait       (const struct lu_env *env, struct cl_lock *lock);
+void  cl_unuse      (const struct lu_env *env, struct cl_lock *lock);
+int   cl_enqueue_try(const struct lu_env *env, struct cl_lock *lock,
+		     struct cl_io *io, __u32 flags);
+int   cl_unuse_try  (const struct lu_env *env, struct cl_lock *lock);
+int   cl_wait_try   (const struct lu_env *env, struct cl_lock *lock);
+int   cl_use_try    (const struct lu_env *env, struct cl_lock *lock, int atomic);
+
+/** @} statemachine */
+
+void cl_lock_signal      (const struct lu_env *env, struct cl_lock *lock);
+int  cl_lock_state_wait  (const struct lu_env *env, struct cl_lock *lock);
+void cl_lock_state_set   (const struct lu_env *env, struct cl_lock *lock,
+			  enum cl_lock_state state);
+int  cl_queue_match      (const struct list_head *queue,
+			  const struct cl_lock_descr *need);
+
+void cl_lock_mutex_get  (const struct lu_env *env, struct cl_lock *lock);
+int  cl_lock_mutex_try  (const struct lu_env *env, struct cl_lock *lock);
+void cl_lock_mutex_put  (const struct lu_env *env, struct cl_lock *lock);
+int  cl_lock_is_mutexed (struct cl_lock *lock);
+int  cl_lock_nr_mutexed (const struct lu_env *env);
+int  cl_lock_discard_pages(const struct lu_env *env, struct cl_lock *lock);
+int  cl_lock_ext_match  (const struct cl_lock_descr *has,
+			 const struct cl_lock_descr *need);
+int  cl_lock_descr_match(const struct cl_lock_descr *has,
+			 const struct cl_lock_descr *need);
+int  cl_lock_mode_match (enum cl_lock_mode has, enum cl_lock_mode need);
+int  cl_lock_modify     (const struct lu_env *env, struct cl_lock *lock,
+			 const struct cl_lock_descr *desc);
+
+void cl_lock_closure_init (const struct lu_env *env,
+			   struct cl_lock_closure *closure,
+			   struct cl_lock *origin, int wait);
+void cl_lock_closure_fini (struct cl_lock_closure *closure);
+int  cl_lock_closure_build(const struct lu_env *env, struct cl_lock *lock,
+			   struct cl_lock_closure *closure);
+void cl_lock_disclosure   (const struct lu_env *env,
+			   struct cl_lock_closure *closure);
+int  cl_lock_enclosure    (const struct lu_env *env, struct cl_lock *lock,
+			   struct cl_lock_closure *closure);
+
+void cl_lock_cancel(const struct lu_env *env, struct cl_lock *lock);
+void cl_lock_delete(const struct lu_env *env, struct cl_lock *lock);
+void cl_lock_error (const struct lu_env *env, struct cl_lock *lock, int error);
+void cl_locks_prune(const struct lu_env *env, struct cl_object *obj, int wait);
+
+unsigned long cl_lock_weigh(const struct lu_env *env, struct cl_lock *lock);
+
+/** @} cl_lock */
+
+/** \defgroup cl_io cl_io
+ * @{ */
+
+int   cl_io_init	 (const struct lu_env *env, struct cl_io *io,
+			  enum cl_io_type iot, struct cl_object *obj);
+int   cl_io_sub_init     (const struct lu_env *env, struct cl_io *io,
+			  enum cl_io_type iot, struct cl_object *obj);
+int   cl_io_rw_init      (const struct lu_env *env, struct cl_io *io,
+			  enum cl_io_type iot, loff_t pos, size_t count);
+int   cl_io_loop	 (const struct lu_env *env, struct cl_io *io);
+
+void  cl_io_fini	 (const struct lu_env *env, struct cl_io *io);
+int   cl_io_iter_init    (const struct lu_env *env, struct cl_io *io);
+void  cl_io_iter_fini    (const struct lu_env *env, struct cl_io *io);
+int   cl_io_lock	 (const struct lu_env *env, struct cl_io *io);
+void  cl_io_unlock       (const struct lu_env *env, struct cl_io *io);
+int   cl_io_start	(const struct lu_env *env, struct cl_io *io);
+void  cl_io_end	  (const struct lu_env *env, struct cl_io *io);
+int   cl_io_lock_add     (const struct lu_env *env, struct cl_io *io,
+			  struct cl_io_lock_link *link);
+int   cl_io_lock_alloc_add(const struct lu_env *env, struct cl_io *io,
+			   struct cl_lock_descr *descr);
+int   cl_io_read_page    (const struct lu_env *env, struct cl_io *io,
+			  struct cl_page *page);
+int   cl_io_prepare_write(const struct lu_env *env, struct cl_io *io,
+			  struct cl_page *page, unsigned from, unsigned to);
+int   cl_io_commit_write (const struct lu_env *env, struct cl_io *io,
+			  struct cl_page *page, unsigned from, unsigned to);
+int   cl_io_submit_rw    (const struct lu_env *env, struct cl_io *io,
+			  enum cl_req_type iot, struct cl_2queue *queue);
+int   cl_io_submit_sync  (const struct lu_env *env, struct cl_io *io,
+			  enum cl_req_type iot, struct cl_2queue *queue,
+			  long timeout);
+void  cl_io_rw_advance   (const struct lu_env *env, struct cl_io *io,
+			  size_t nob);
+int   cl_io_cancel       (const struct lu_env *env, struct cl_io *io,
+			  struct cl_page_list *queue);
+int   cl_io_is_going     (const struct lu_env *env);
+
+/**
+ * True, iff \a io is an O_APPEND write(2).
+ */
+static inline int cl_io_is_append(const struct cl_io *io)
+{
+	return io->ci_type == CIT_WRITE && io->u.ci_wr.wr_append;
+}
+
+static inline int cl_io_is_sync_write(const struct cl_io *io)
+{
+	return io->ci_type == CIT_WRITE && io->u.ci_wr.wr_sync;
+}
+
+static inline int cl_io_is_mkwrite(const struct cl_io *io)
+{
+	return io->ci_type == CIT_FAULT && io->u.ci_fault.ft_mkwrite;
+}
+
+/**
+ * True, iff \a io is a truncate(2).
+ */
+static inline int cl_io_is_trunc(const struct cl_io *io)
+{
+	return io->ci_type == CIT_SETATTR &&
+		(io->u.ci_setattr.sa_valid & ATTR_SIZE);
+}
+
+struct cl_io *cl_io_top(struct cl_io *io);
+
+void cl_io_print(const struct lu_env *env, void *cookie,
+		 lu_printer_t printer, const struct cl_io *io);
+
+#define CL_IO_SLICE_CLEAN(foo_io, base)				 \
+do {								    \
+	typeof(foo_io) __foo_io = (foo_io);			     \
+									\
+	CLASSERT(offsetof(typeof(*__foo_io), base) == 0);	       \
+	memset(&__foo_io->base + 1, 0,				  \
+	       (sizeof *__foo_io) - sizeof __foo_io->base);	     \
+} while (0)
+
+/** @} cl_io */
+
+/** \defgroup cl_page_list cl_page_list
+ * @{ */
+
+/**
+ * Last page in the page list.
+ */
+static inline struct cl_page *cl_page_list_last(struct cl_page_list *plist)
+{
+	LASSERT(plist->pl_nr > 0);
+	return list_entry(plist->pl_pages.prev, struct cl_page, cp_batch);
+}
+
+/**
+ * Iterate over pages in a page list.
+ */
+#define cl_page_list_for_each(page, list)			       \
+	list_for_each_entry((page), &(list)->pl_pages, cp_batch)
+
+/**
+ * Iterate over pages in a page list, taking possible removals into account.
+ */
+#define cl_page_list_for_each_safe(page, temp, list)		    \
+	list_for_each_entry_safe((page), (temp), &(list)->pl_pages, cp_batch)
+
+void cl_page_list_init   (struct cl_page_list *plist);
+void cl_page_list_add    (struct cl_page_list *plist, struct cl_page *page);
+void cl_page_list_move   (struct cl_page_list *dst, struct cl_page_list *src,
+			  struct cl_page *page);
+void cl_page_list_splice (struct cl_page_list *list,
+			  struct cl_page_list *head);
+void cl_page_list_del    (const struct lu_env *env,
+			  struct cl_page_list *plist, struct cl_page *page);
+void cl_page_list_disown (const struct lu_env *env,
+			  struct cl_io *io, struct cl_page_list *plist);
+int  cl_page_list_own    (const struct lu_env *env,
+			  struct cl_io *io, struct cl_page_list *plist);
+void cl_page_list_assume (const struct lu_env *env,
+			  struct cl_io *io, struct cl_page_list *plist);
+void cl_page_list_discard(const struct lu_env *env,
+			  struct cl_io *io, struct cl_page_list *plist);
+int  cl_page_list_unmap  (const struct lu_env *env,
+			  struct cl_io *io, struct cl_page_list *plist);
+void cl_page_list_fini   (const struct lu_env *env, struct cl_page_list *plist);
+
+void cl_2queue_init     (struct cl_2queue *queue);
+void cl_2queue_add      (struct cl_2queue *queue, struct cl_page *page);
+void cl_2queue_disown   (const struct lu_env *env,
+			 struct cl_io *io, struct cl_2queue *queue);
+void cl_2queue_assume   (const struct lu_env *env,
+			 struct cl_io *io, struct cl_2queue *queue);
+void cl_2queue_discard  (const struct lu_env *env,
+			 struct cl_io *io, struct cl_2queue *queue);
+void cl_2queue_fini     (const struct lu_env *env, struct cl_2queue *queue);
+void cl_2queue_init_page(struct cl_2queue *queue, struct cl_page *page);
+
+/** @} cl_page_list */
+
+/** \defgroup cl_req cl_req
+ * @{ */
+struct cl_req *cl_req_alloc(const struct lu_env *env, struct cl_page *page,
+			    enum cl_req_type crt, int nr_objects);
+
+void cl_req_page_add  (const struct lu_env *env, struct cl_req *req,
+		       struct cl_page *page);
+void cl_req_page_done (const struct lu_env *env, struct cl_page *page);
+int  cl_req_prep      (const struct lu_env *env, struct cl_req *req);
+void cl_req_attr_set  (const struct lu_env *env, struct cl_req *req,
+		       struct cl_req_attr *attr, obd_valid flags);
+void cl_req_completion(const struct lu_env *env, struct cl_req *req, int ioret);
+
+/** \defgroup cl_sync_io cl_sync_io
+ * @{ */
+
+/**
+ * Anchor for synchronous transfer. This is allocated on a stack by thread
+ * doing synchronous transfer, and a pointer to this structure is set up in
+ * every page submitted for transfer. Transfer completion routine updates
+ * anchor and wakes up waiting thread when transfer is complete.
+ */
+struct cl_sync_io {
+	/** number of pages yet to be transferred. */
+	atomic_t		csi_sync_nr;
+	/** error code. */
+	int			csi_sync_rc;
+	/** barrier of destroy this structure */
+	atomic_t		csi_barrier;
+	/** completion to be signaled when transfer is complete. */
+	wait_queue_head_t		csi_waitq;
+};
+
+void cl_sync_io_init(struct cl_sync_io *anchor, int nrpages);
+int  cl_sync_io_wait(const struct lu_env *env, struct cl_io *io,
+		     struct cl_page_list *queue, struct cl_sync_io *anchor,
+		     long timeout);
+void cl_sync_io_note(struct cl_sync_io *anchor, int ioret);
+
+/** @} cl_sync_io */
+
+/** @} cl_req */
+
+/** \defgroup cl_env cl_env
+ *
+ * lu_env handling for a client.
+ *
+ * lu_env is an environment within which lustre code executes. Its major part
+ * is lu_context---a fast memory allocation mechanism that is used to conserve
+ * precious kernel stack space. Originally lu_env was designed for a server,
+ * where
+ *
+ *     - there is a (mostly) fixed number of threads, and
+ *
+ *     - call chains have no non-lustre portions inserted between lustre code.
+ *
+ * On a client both these assumtpion fails, because every user thread can
+ * potentially execute lustre code as part of a system call, and lustre calls
+ * into VFS or MM that call back into lustre.
+ *
+ * To deal with that, cl_env wrapper functions implement the following
+ * optimizations:
+ *
+ *     - allocation and destruction of environment is amortized by caching no
+ *     longer used environments instead of destroying them;
+ *
+ *     - there is a notion of "current" environment, attached to the kernel
+ *     data structure representing current thread Top-level lustre code
+ *     allocates an environment and makes it current, then calls into
+ *     non-lustre code, that in turn calls lustre back. Low-level lustre
+ *     code thus called can fetch environment created by the top-level code
+ *     and reuse it, avoiding additional environment allocation.
+ *       Right now, three interfaces can attach the cl_env to running thread:
+ *       - cl_env_get
+ *       - cl_env_implant
+ *       - cl_env_reexit(cl_env_reenter had to be called priorly)
+ *
+ * \see lu_env, lu_context, lu_context_key
+ * @{ */
+
+struct cl_env_nest {
+	int   cen_refcheck;
+	void *cen_cookie;
+};
+
+struct lu_env *cl_env_peek       (int *refcheck);
+struct lu_env *cl_env_get	(int *refcheck);
+struct lu_env *cl_env_alloc      (int *refcheck, __u32 tags);
+struct lu_env *cl_env_nested_get (struct cl_env_nest *nest);
+void	   cl_env_put	(struct lu_env *env, int *refcheck);
+void	   cl_env_nested_put (struct cl_env_nest *nest, struct lu_env *env);
+void	  *cl_env_reenter    (void);
+void	   cl_env_reexit     (void *cookie);
+void	   cl_env_implant    (struct lu_env *env, int *refcheck);
+void	   cl_env_unplant    (struct lu_env *env, int *refcheck);
+
+/** @} cl_env */
+
+/*
+ * Misc
+ */
+void cl_attr2lvb(struct ost_lvb *lvb, const struct cl_attr *attr);
+void cl_lvb2attr(struct cl_attr *attr, const struct ost_lvb *lvb);
+
+struct cl_device *cl_type_setup(const struct lu_env *env, struct lu_site *site,
+				struct lu_device_type *ldt,
+				struct lu_device *next);
+/** @} clio */
+
+int cl_global_init(void);
+void cl_global_fini(void);
+
+#endif /* _LINUX_CL_OBJECT_H */
diff --git a/drivers/staging/lustre/lustre/include/dt_object.h b/drivers/staging/lustre/lustre/include/dt_object.h
new file mode 100644
index 000000000000..e116bb21b529
--- /dev/null
+++ b/drivers/staging/lustre/lustre/include/dt_object.h
@@ -0,0 +1,1498 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ */
+
+#ifndef __LUSTRE_DT_OBJECT_H
+#define __LUSTRE_DT_OBJECT_H
+
+/** \defgroup dt dt
+ * Sub-class of lu_object with methods common for "data" objects in OST stack.
+ *
+ * Data objects behave like regular files: you can read/write them, get and
+ * set their attributes. Implementation of dt interface is supposed to
+ * implement some form of garbage collection, normally reference counting
+ * (nlink) based one.
+ *
+ * Examples: osd (lustre/osd) is an implementation of dt interface.
+ * @{
+ */
+
+
+/*
+ * super-class definitions.
+ */
+#include <lu_object.h>
+
+#include <linux/libcfs/libcfs.h>
+
+struct seq_file;
+struct proc_dir_entry;
+struct lustre_cfg;
+
+struct thandle;
+struct dt_device;
+struct dt_object;
+struct dt_index_features;
+struct niobuf_local;
+struct niobuf_remote;
+struct ldlm_enqueue_info;
+
+typedef enum {
+	MNTOPT_USERXATTR	= 0x00000001,
+	MNTOPT_ACL	      = 0x00000002,
+} mntopt_t;
+
+struct dt_device_param {
+	unsigned	   ddp_max_name_len;
+	unsigned	   ddp_max_nlink;
+	unsigned	   ddp_block_shift;
+	mntopt_t	   ddp_mntopts;
+	unsigned	   ddp_max_ea_size;
+	void	      *ddp_mnt; /* XXX: old code can retrieve mnt -bzzz */
+	int		ddp_mount_type;
+	unsigned long long ddp_maxbytes;
+	/* percentage of available space to reserve for grant error margin */
+	int		ddp_grant_reserved;
+	/* per-inode space consumption */
+	short	      ddp_inodespace;
+	/* per-fragment grant overhead to be used by client for grant
+	 * calculation */
+	int		ddp_grant_frag;
+};
+
+/**
+ * Per-transaction commit callback function
+ */
+struct dt_txn_commit_cb;
+typedef void (*dt_cb_t)(struct lu_env *env, struct thandle *th,
+			struct dt_txn_commit_cb *cb, int err);
+/**
+ * Special per-transaction callback for cases when just commit callback
+ * is needed and per-device callback are not convenient to use
+ */
+#define TRANS_COMMIT_CB_MAGIC	0xa0a00a0a
+#define MAX_COMMIT_CB_STR_LEN	32
+
+struct dt_txn_commit_cb {
+	struct list_head	dcb_linkage;
+	dt_cb_t		dcb_func;
+	__u32		dcb_magic;
+	char		dcb_name[MAX_COMMIT_CB_STR_LEN];
+};
+
+/**
+ * Operations on dt device.
+ */
+struct dt_device_operations {
+	/**
+	 * Return device-wide statistics.
+	 */
+	int   (*dt_statfs)(const struct lu_env *env,
+			   struct dt_device *dev, struct obd_statfs *osfs);
+	/**
+	 * Create transaction, described by \a param.
+	 */
+	struct thandle *(*dt_trans_create)(const struct lu_env *env,
+					   struct dt_device *dev);
+	/**
+	 * Start transaction, described by \a param.
+	 */
+	int   (*dt_trans_start)(const struct lu_env *env,
+				struct dt_device *dev, struct thandle *th);
+	/**
+	 * Finish previously started transaction.
+	 */
+	int   (*dt_trans_stop)(const struct lu_env *env,
+			       struct thandle *th);
+	/**
+	 * Add commit callback to the transaction.
+	 */
+	int   (*dt_trans_cb_add)(struct thandle *th,
+				 struct dt_txn_commit_cb *dcb);
+	/**
+	 * Return fid of root index object.
+	 */
+	int   (*dt_root_get)(const struct lu_env *env,
+			     struct dt_device *dev, struct lu_fid *f);
+	/**
+	 * Return device configuration data.
+	 */
+	void  (*dt_conf_get)(const struct lu_env *env,
+			     const struct dt_device *dev,
+			     struct dt_device_param *param);
+	/**
+	 *  handling device state, mostly for tests
+	 */
+	int   (*dt_sync)(const struct lu_env *env, struct dt_device *dev);
+	int   (*dt_ro)(const struct lu_env *env, struct dt_device *dev);
+	/**
+	  * Start a transaction commit asynchronously
+	  *
+	  * \param env environment
+	  * \param dev dt_device to start commit on
+	  *
+	  * \return 0 success, negative value if error
+	  */
+	 int   (*dt_commit_async)(const struct lu_env *env,
+				  struct dt_device *dev);
+	/**
+	 * Initialize capability context.
+	 */
+	int   (*dt_init_capa_ctxt)(const struct lu_env *env,
+				   struct dt_device *dev,
+				   int mode, unsigned long timeout,
+				   __u32 alg, struct lustre_capa_key *keys);
+};
+
+struct dt_index_features {
+	/** required feature flags from enum dt_index_flags */
+	__u32 dif_flags;
+	/** minimal required key size */
+	size_t dif_keysize_min;
+	/** maximal required key size, 0 if no limit */
+	size_t dif_keysize_max;
+	/** minimal required record size */
+	size_t dif_recsize_min;
+	/** maximal required record size, 0 if no limit */
+	size_t dif_recsize_max;
+	/** pointer size for record */
+	size_t dif_ptrsize;
+};
+
+enum dt_index_flags {
+	/** index supports variable sized keys */
+	DT_IND_VARKEY = 1 << 0,
+	/** index supports variable sized records */
+	DT_IND_VARREC = 1 << 1,
+	/** index can be modified */
+	DT_IND_UPDATE = 1 << 2,
+	/** index supports records with non-unique (duplicate) keys */
+	DT_IND_NONUNQ = 1 << 3,
+	/**
+	 * index support fixed-size keys sorted with natural numerical way
+	 * and is able to return left-side value if no exact value found
+	 */
+	DT_IND_RANGE = 1 << 4,
+};
+
+/**
+ * Features, required from index to support file system directories (mapping
+ * names to fids).
+ */
+extern const struct dt_index_features dt_directory_features;
+extern const struct dt_index_features dt_otable_features;
+extern const struct dt_index_features dt_lfsck_features;
+
+/* index features supported by the accounting objects */
+extern const struct dt_index_features dt_acct_features;
+
+/* index features supported by the quota global indexes */
+extern const struct dt_index_features dt_quota_glb_features;
+
+/* index features supported by the quota slave indexes */
+extern const struct dt_index_features dt_quota_slv_features;
+
+/**
+ * This is a general purpose dt allocation hint.
+ * It now contains the parent object.
+ * It can contain any allocation hint in the future.
+ */
+struct dt_allocation_hint {
+	struct dt_object	   *dah_parent;
+	__u32		       dah_mode;
+};
+
+/**
+ * object type specifier.
+ */
+
+enum dt_format_type {
+	DFT_REGULAR,
+	DFT_DIR,
+	/** for mknod */
+	DFT_NODE,
+	/** for special index */
+	DFT_INDEX,
+	/** for symbolic link */
+	DFT_SYM,
+};
+
+/**
+ * object format specifier.
+ */
+struct dt_object_format {
+	/** type for dt object */
+	enum dt_format_type dof_type;
+	union {
+		struct dof_regular {
+			int striped;
+		} dof_reg;
+		struct dof_dir {
+		} dof_dir;
+		struct dof_node {
+		} dof_node;
+		/**
+		 * special index need feature as parameter to create
+		 * special idx
+		 */
+		struct dof_index {
+			const struct dt_index_features *di_feat;
+		} dof_idx;
+	} u;
+};
+
+enum dt_format_type dt_mode_to_dft(__u32 mode);
+
+typedef __u64 dt_obj_version_t;
+
+/**
+ * Per-dt-object operations.
+ */
+struct dt_object_operations {
+	void  (*do_read_lock)(const struct lu_env *env,
+			      struct dt_object *dt, unsigned role);
+	void  (*do_write_lock)(const struct lu_env *env,
+			       struct dt_object *dt, unsigned role);
+	void  (*do_read_unlock)(const struct lu_env *env,
+				struct dt_object *dt);
+	void  (*do_write_unlock)(const struct lu_env *env,
+				 struct dt_object *dt);
+	int  (*do_write_locked)(const struct lu_env *env,
+				struct dt_object *dt);
+	/**
+	 * Note: following ->do_{x,}attr_{set,get}() operations are very
+	 * similar to ->moo_{x,}attr_{set,get}() operations in struct
+	 * md_object_operations (see md_object.h). These operations are not in
+	 * lu_object_operations, because ->do_{x,}attr_set() versions take
+	 * transaction handle as an argument (this transaction is started by
+	 * caller). We might factor ->do_{x,}attr_get() into
+	 * lu_object_operations, but that would break existing symmetry.
+	 */
+
+	/**
+	 * Return standard attributes.
+	 *
+	 * precondition: lu_object_exists(&dt->do_lu);
+	 */
+	int   (*do_attr_get)(const struct lu_env *env,
+			     struct dt_object *dt, struct lu_attr *attr,
+			     struct lustre_capa *capa);
+	/**
+	 * Set standard attributes.
+	 *
+	 * precondition: dt_object_exists(dt);
+	 */
+	int   (*do_declare_attr_set)(const struct lu_env *env,
+				     struct dt_object *dt,
+				     const struct lu_attr *attr,
+				     struct thandle *handle);
+	int   (*do_attr_set)(const struct lu_env *env,
+			     struct dt_object *dt,
+			     const struct lu_attr *attr,
+			     struct thandle *handle,
+			     struct lustre_capa *capa);
+	/**
+	 * Return a value of an extended attribute.
+	 *
+	 * precondition: dt_object_exists(dt);
+	 */
+	int   (*do_xattr_get)(const struct lu_env *env, struct dt_object *dt,
+			      struct lu_buf *buf, const char *name,
+			      struct lustre_capa *capa);
+	/**
+	 * Set value of an extended attribute.
+	 *
+	 * \a fl - flags from enum lu_xattr_flags
+	 *
+	 * precondition: dt_object_exists(dt);
+	 */
+	int   (*do_declare_xattr_set)(const struct lu_env *env,
+				      struct dt_object *dt,
+				      const struct lu_buf *buf,
+				      const char *name, int fl,
+				      struct thandle *handle);
+	int   (*do_xattr_set)(const struct lu_env *env,
+			      struct dt_object *dt, const struct lu_buf *buf,
+			      const char *name, int fl, struct thandle *handle,
+			      struct lustre_capa *capa);
+	/**
+	 * Delete existing extended attribute.
+	 *
+	 * precondition: dt_object_exists(dt);
+	 */
+	int   (*do_declare_xattr_del)(const struct lu_env *env,
+				      struct dt_object *dt,
+				      const char *name, struct thandle *handle);
+	int   (*do_xattr_del)(const struct lu_env *env,
+			      struct dt_object *dt,
+			      const char *name, struct thandle *handle,
+			      struct lustre_capa *capa);
+	/**
+	 * Place list of existing extended attributes into \a buf (which has
+	 * length len).
+	 *
+	 * precondition: dt_object_exists(dt);
+	 */
+	int   (*do_xattr_list)(const struct lu_env *env,
+			       struct dt_object *dt, struct lu_buf *buf,
+			       struct lustre_capa *capa);
+	/**
+	 * Init allocation hint using parent object and child mode.
+	 * (1) The \a parent might be NULL if this is a partial creation for
+	 *     remote object.
+	 * (2) The type of child is in \a child_mode.
+	 * (3) The result hint is stored in \a ah;
+	 */
+	void  (*do_ah_init)(const struct lu_env *env,
+			    struct dt_allocation_hint *ah,
+			    struct dt_object *parent,
+			    struct dt_object *child,
+			    umode_t child_mode);
+	/**
+	 * Create new object on this device.
+	 *
+	 * precondition: !dt_object_exists(dt);
+	 * postcondition: ergo(result == 0, dt_object_exists(dt));
+	 */
+	int   (*do_declare_create)(const struct lu_env *env,
+				   struct dt_object *dt,
+				   struct lu_attr *attr,
+				   struct dt_allocation_hint *hint,
+				   struct dt_object_format *dof,
+				   struct thandle *th);
+	int   (*do_create)(const struct lu_env *env, struct dt_object *dt,
+			   struct lu_attr *attr,
+			   struct dt_allocation_hint *hint,
+			   struct dt_object_format *dof,
+			   struct thandle *th);
+
+	/**
+	  Destroy object on this device
+	 * precondition: !dt_object_exists(dt);
+	 * postcondition: ergo(result == 0, dt_object_exists(dt));
+	 */
+	int   (*do_declare_destroy)(const struct lu_env *env,
+				    struct dt_object *dt,
+				    struct thandle *th);
+	int   (*do_destroy)(const struct lu_env *env, struct dt_object *dt,
+			    struct thandle *th);
+
+	/**
+	 * Announce that this object is going to be used as an index. This
+	 * operation check that object supports indexing operations and
+	 * installs appropriate dt_index_operations vector on success.
+	 *
+	 * Also probes for features. Operation is successful if all required
+	 * features are supported.
+	 */
+	int   (*do_index_try)(const struct lu_env *env,
+			      struct dt_object *dt,
+			      const struct dt_index_features *feat);
+	/**
+	 * Add nlink of the object
+	 * precondition: dt_object_exists(dt);
+	 */
+	int   (*do_declare_ref_add)(const struct lu_env *env,
+				    struct dt_object *dt, struct thandle *th);
+	int   (*do_ref_add)(const struct lu_env *env,
+			    struct dt_object *dt, struct thandle *th);
+	/**
+	 * Del nlink of the object
+	 * precondition: dt_object_exists(dt);
+	 */
+	int   (*do_declare_ref_del)(const struct lu_env *env,
+				    struct dt_object *dt, struct thandle *th);
+	int   (*do_ref_del)(const struct lu_env *env,
+			    struct dt_object *dt, struct thandle *th);
+
+	struct obd_capa *(*do_capa_get)(const struct lu_env *env,
+					struct dt_object *dt,
+					struct lustre_capa *old,
+					__u64 opc);
+	int (*do_object_sync)(const struct lu_env *, struct dt_object *);
+	/**
+	 * Get object info of next level. Currently, only get inode from osd.
+	 * This is only used by quota b=16542
+	 * precondition: dt_object_exists(dt);
+	 */
+	int (*do_data_get)(const struct lu_env *env, struct dt_object *dt,
+			   void **data);
+
+	/**
+	 * Lock object.
+	 */
+	int (*do_object_lock)(const struct lu_env *env, struct dt_object *dt,
+			      struct lustre_handle *lh,
+			      struct ldlm_enqueue_info *einfo,
+			      void *policy);
+};
+
+/**
+ * Per-dt-object operations on "file body".
+ */
+struct dt_body_operations {
+	/**
+	 * precondition: dt_object_exists(dt);
+	 */
+	ssize_t (*dbo_read)(const struct lu_env *env, struct dt_object *dt,
+			    struct lu_buf *buf, loff_t *pos,
+			    struct lustre_capa *capa);
+	/**
+	 * precondition: dt_object_exists(dt);
+	 */
+	ssize_t (*dbo_declare_write)(const struct lu_env *env,
+				     struct dt_object *dt,
+				     const loff_t size, loff_t pos,
+				     struct thandle *handle);
+	ssize_t (*dbo_write)(const struct lu_env *env, struct dt_object *dt,
+			     const struct lu_buf *buf, loff_t *pos,
+			     struct thandle *handle, struct lustre_capa *capa,
+			     int ignore_quota);
+	/*
+	 * methods for zero-copy IO
+	 */
+
+	/*
+	 * precondition: dt_object_exists(dt);
+	 * returns:
+	 * < 0 - error code
+	 * = 0 - illegal
+	 * > 0 - number of local buffers prepared
+	 */
+	int (*dbo_bufs_get)(const struct lu_env *env, struct dt_object *dt,
+			    loff_t pos, ssize_t len, struct niobuf_local *lb,
+			    int rw, struct lustre_capa *capa);
+	/*
+	 * precondition: dt_object_exists(dt);
+	 */
+	int (*dbo_bufs_put)(const struct lu_env *env, struct dt_object *dt,
+			    struct niobuf_local *lb, int nr);
+	/*
+	 * precondition: dt_object_exists(dt);
+	 */
+	int (*dbo_write_prep)(const struct lu_env *env, struct dt_object *dt,
+			      struct niobuf_local *lb, int nr);
+	/*
+	 * precondition: dt_object_exists(dt);
+	 */
+	int (*dbo_declare_write_commit)(const struct lu_env *env,
+					struct dt_object *dt,
+					struct niobuf_local *,
+					int, struct thandle *);
+	/*
+	 * precondition: dt_object_exists(dt);
+	 */
+	int (*dbo_write_commit)(const struct lu_env *env, struct dt_object *dt,
+				struct niobuf_local *, int, struct thandle *);
+	/*
+	 * precondition: dt_object_exists(dt);
+	 */
+	int (*dbo_read_prep)(const struct lu_env *env, struct dt_object *dt,
+			     struct niobuf_local *lnb, int nr);
+	int (*dbo_fiemap_get)(const struct lu_env *env, struct dt_object *dt,
+			      struct ll_user_fiemap *fm);
+	/**
+	 * Punch object's content
+	 * precondition: regular object, not index
+	 */
+	int   (*dbo_declare_punch)(const struct lu_env *, struct dt_object *,
+				  __u64, __u64, struct thandle *th);
+	int   (*dbo_punch)(const struct lu_env *env, struct dt_object *dt,
+			  __u64 start, __u64 end, struct thandle *th,
+			  struct lustre_capa *capa);
+};
+
+/**
+ * Incomplete type of index record.
+ */
+struct dt_rec;
+
+/**
+ * Incomplete type of index key.
+ */
+struct dt_key;
+
+/**
+ * Incomplete type of dt iterator.
+ */
+struct dt_it;
+
+/**
+ * Per-dt-object operations on object as index.
+ */
+struct dt_index_operations {
+	/**
+	 * precondition: dt_object_exists(dt);
+	 */
+	int (*dio_lookup)(const struct lu_env *env, struct dt_object *dt,
+			  struct dt_rec *rec, const struct dt_key *key,
+			  struct lustre_capa *capa);
+	/**
+	 * precondition: dt_object_exists(dt);
+	 */
+	int (*dio_declare_insert)(const struct lu_env *env,
+				  struct dt_object *dt,
+				  const struct dt_rec *rec,
+				  const struct dt_key *key,
+				  struct thandle *handle);
+	int (*dio_insert)(const struct lu_env *env, struct dt_object *dt,
+			  const struct dt_rec *rec, const struct dt_key *key,
+			  struct thandle *handle, struct lustre_capa *capa,
+			  int ignore_quota);
+	/**
+	 * precondition: dt_object_exists(dt);
+	 */
+	int (*dio_declare_delete)(const struct lu_env *env,
+				  struct dt_object *dt,
+				  const struct dt_key *key,
+				  struct thandle *handle);
+	int (*dio_delete)(const struct lu_env *env, struct dt_object *dt,
+			  const struct dt_key *key, struct thandle *handle,
+			  struct lustre_capa *capa);
+	/**
+	 * Iterator interface
+	 */
+	struct dt_it_ops {
+		/**
+		 * Allocate and initialize new iterator.
+		 *
+		 * precondition: dt_object_exists(dt);
+		 */
+		struct dt_it *(*init)(const struct lu_env *env,
+				      struct dt_object *dt,
+				      __u32 attr,
+				      struct lustre_capa *capa);
+		void	  (*fini)(const struct lu_env *env,
+				      struct dt_it *di);
+		int	    (*get)(const struct lu_env *env,
+				      struct dt_it *di,
+				      const struct dt_key *key);
+		void	   (*put)(const struct lu_env *env,
+				      struct dt_it *di);
+		int	   (*next)(const struct lu_env *env,
+				      struct dt_it *di);
+		struct dt_key *(*key)(const struct lu_env *env,
+				      const struct dt_it *di);
+		int       (*key_size)(const struct lu_env *env,
+				      const struct dt_it *di);
+		int	    (*rec)(const struct lu_env *env,
+				      const struct dt_it *di,
+				      struct dt_rec *rec,
+				      __u32 attr);
+		__u64	(*store)(const struct lu_env *env,
+				      const struct dt_it *di);
+		int	   (*load)(const struct lu_env *env,
+				      const struct dt_it *di, __u64 hash);
+		int	(*key_rec)(const struct lu_env *env,
+				      const struct dt_it *di, void* key_rec);
+	} dio_it;
+};
+
+enum dt_otable_it_valid {
+	DOIV_ERROR_HANDLE	= 0x0001,
+};
+
+enum dt_otable_it_flags {
+	/* Exit when fail. */
+	DOIF_FAILOUT	= 0x0001,
+
+	/* Reset iteration position to the device beginning. */
+	DOIF_RESET	= 0x0002,
+
+	/* There is up layer component uses the iteration. */
+	DOIF_OUTUSED	= 0x0004,
+};
+
+/* otable based iteration needs to use the common DT interation APIs.
+ * To initialize the iteration, it needs call dio_it::init() firstly.
+ * Here is how the otable based iteration should prepare arguments to
+ * call dt_it_ops::init().
+ *
+ * For otable based iteration, the 32-bits 'attr' for dt_it_ops::init()
+ * is composed of two parts:
+ * low 16-bits is for valid bits, high 16-bits is for flags bits. */
+#define DT_OTABLE_IT_FLAGS_SHIFT	16
+#define DT_OTABLE_IT_FLAGS_MASK 	0xffff0000
+
+struct dt_device {
+	struct lu_device		   dd_lu_dev;
+	const struct dt_device_operations *dd_ops;
+
+	/**
+	 * List of dt_txn_callback (see below). This is not protected in any
+	 * way, because callbacks are supposed to be added/deleted only during
+	 * single-threaded start-up shut-down procedures.
+	 */
+	struct list_head			 dd_txn_callbacks;
+};
+
+int  dt_device_init(struct dt_device *dev, struct lu_device_type *t);
+void dt_device_fini(struct dt_device *dev);
+
+static inline int lu_device_is_dt(const struct lu_device *d)
+{
+	return ergo(d != NULL, d->ld_type->ldt_tags & LU_DEVICE_DT);
+}
+
+static inline struct dt_device * lu2dt_dev(struct lu_device *l)
+{
+	LASSERT(lu_device_is_dt(l));
+	return container_of0(l, struct dt_device, dd_lu_dev);
+}
+
+struct dt_object {
+	struct lu_object		   do_lu;
+	const struct dt_object_operations *do_ops;
+	const struct dt_body_operations   *do_body_ops;
+	const struct dt_index_operations  *do_index_ops;
+};
+
+/*
+ * In-core representation of per-device local object OID storage
+ */
+struct local_oid_storage {
+	/* all initialized llog systems on this node linked by this */
+	struct list_head	  los_list;
+
+	/* how many handle's reference this los has */
+	atomic_t	  los_refcount;
+	struct dt_device *los_dev;
+	struct dt_object *los_obj;
+
+	/* data used to generate new fids */
+	struct mutex	 los_id_lock;
+	__u64		  los_seq;
+	__u32		  los_last_oid;
+};
+
+static inline struct dt_object *lu2dt(struct lu_object *l)
+{
+	LASSERT(l == NULL || IS_ERR(l) || lu_device_is_dt(l->lo_dev));
+	return container_of0(l, struct dt_object, do_lu);
+}
+
+int  dt_object_init(struct dt_object *obj,
+		    struct lu_object_header *h, struct lu_device *d);
+
+void dt_object_fini(struct dt_object *obj);
+
+static inline int dt_object_exists(const struct dt_object *dt)
+{
+	return lu_object_exists(&dt->do_lu);
+}
+
+static inline int dt_object_remote(const struct dt_object *dt)
+{
+	return lu_object_remote(&dt->do_lu);
+}
+
+static inline struct dt_object *lu2dt_obj(struct lu_object *o)
+{
+	LASSERT(ergo(o != NULL, lu_device_is_dt(o->lo_dev)));
+	return container_of0(o, struct dt_object, do_lu);
+}
+
+/**
+ * This is the general purpose transaction handle.
+ * 1. Transaction Life Cycle
+ *      This transaction handle is allocated upon starting a new transaction,
+ *      and deallocated after this transaction is committed.
+ * 2. Transaction Nesting
+ *      We do _NOT_ support nested transaction. So, every thread should only
+ *      have one active transaction, and a transaction only belongs to one
+ *      thread. Due to this, transaction handle need no reference count.
+ * 3. Transaction & dt_object locking
+ *      dt_object locks should be taken inside transaction.
+ * 4. Transaction & RPC
+ *      No RPC request should be issued inside transaction.
+ */
+struct thandle {
+	/** the dt device on which the transactions are executed */
+	struct dt_device *th_dev;
+
+	/** context for this transaction, tag is LCT_TX_HANDLE */
+	struct lu_context th_ctx;
+
+	/** additional tags (layers can add in declare) */
+	__u32	     th_tags;
+
+	/** the last operation result in this transaction.
+	 * this value is used in recovery */
+	__s32	     th_result;
+
+	/** whether we need sync commit */
+	unsigned int		th_sync:1;
+
+	/* local transation, no need to inform other layers */
+	unsigned int		th_local:1;
+
+	/* In DNE, one transaction can be disassemblied into
+	 * updates on several different MDTs, and these updates
+	 * will be attached to th_remote_update_list per target.
+	 * Only single thread will access the list, no need lock
+	 */
+	struct list_head		th_remote_update_list;
+	struct update_request	*th_current_request;
+};
+
+/**
+ * Transaction call-backs.
+ *
+ * These are invoked by osd (or underlying transaction engine) when
+ * transaction changes state.
+ *
+ * Call-backs are used by upper layers to modify transaction parameters and to
+ * perform some actions on for each transaction state transition. Typical
+ * example is mdt registering call-back to write into last-received file
+ * before each transaction commit.
+ */
+struct dt_txn_callback {
+	int (*dtc_txn_start)(const struct lu_env *env,
+			     struct thandle *txn, void *cookie);
+	int (*dtc_txn_stop)(const struct lu_env *env,
+			    struct thandle *txn, void *cookie);
+	void (*dtc_txn_commit)(struct thandle *txn, void *cookie);
+	void		*dtc_cookie;
+	__u32		dtc_tag;
+	struct list_head	   dtc_linkage;
+};
+
+void dt_txn_callback_add(struct dt_device *dev, struct dt_txn_callback *cb);
+void dt_txn_callback_del(struct dt_device *dev, struct dt_txn_callback *cb);
+
+int dt_txn_hook_start(const struct lu_env *env,
+		      struct dt_device *dev, struct thandle *txn);
+int dt_txn_hook_stop(const struct lu_env *env, struct thandle *txn);
+void dt_txn_hook_commit(struct thandle *txn);
+
+int dt_try_as_dir(const struct lu_env *env, struct dt_object *obj);
+
+/**
+ * Callback function used for parsing path.
+ * \see llo_store_resolve
+ */
+typedef int (*dt_entry_func_t)(const struct lu_env *env,
+			    const char *name,
+			    void *pvt);
+
+#define DT_MAX_PATH 1024
+
+int dt_path_parser(const struct lu_env *env,
+		   char *local, dt_entry_func_t entry_func,
+		   void *data);
+
+struct dt_object *
+dt_store_resolve(const struct lu_env *env, struct dt_device *dt,
+		 const char *path, struct lu_fid *fid);
+
+struct dt_object *dt_store_open(const struct lu_env *env,
+				struct dt_device *dt,
+				const char *dirname,
+				const char *filename,
+				struct lu_fid *fid);
+
+struct dt_object *dt_find_or_create(const struct lu_env *env,
+				    struct dt_device *dt,
+				    const struct lu_fid *fid,
+				    struct dt_object_format *dof,
+				    struct lu_attr *attr);
+
+struct dt_object *dt_locate_at(const struct lu_env *env,
+			       struct dt_device *dev,
+			       const struct lu_fid *fid,
+			       struct lu_device *top_dev);
+static inline struct dt_object *
+dt_locate(const struct lu_env *env, struct dt_device *dev,
+	  const struct lu_fid *fid)
+{
+	return dt_locate_at(env, dev, fid, dev->dd_lu_dev.ld_site->ls_top_dev);
+}
+
+
+int local_oid_storage_init(const struct lu_env *env, struct dt_device *dev,
+			   const struct lu_fid *first_fid,
+			   struct local_oid_storage **los);
+void local_oid_storage_fini(const struct lu_env *env,
+			    struct local_oid_storage *los);
+int local_object_fid_generate(const struct lu_env *env,
+			      struct local_oid_storage *los,
+			      struct lu_fid *fid);
+int local_object_declare_create(const struct lu_env *env,
+				struct local_oid_storage *los,
+				struct dt_object *o,
+				struct lu_attr *attr,
+				struct dt_object_format *dof,
+				struct thandle *th);
+int local_object_create(const struct lu_env *env,
+			struct local_oid_storage *los,
+			struct dt_object *o,
+			struct lu_attr *attr, struct dt_object_format *dof,
+			struct thandle *th);
+struct dt_object *local_file_find_or_create(const struct lu_env *env,
+					    struct local_oid_storage *los,
+					    struct dt_object *parent,
+					    const char *name, __u32 mode);
+struct dt_object *local_file_find_or_create_with_fid(const struct lu_env *env,
+						     struct dt_device *dt,
+						     const struct lu_fid *fid,
+						     struct dt_object *parent,
+						     const char *name,
+						     __u32 mode);
+struct dt_object *
+local_index_find_or_create(const struct lu_env *env,
+			   struct local_oid_storage *los,
+			   struct dt_object *parent,
+			   const char *name, __u32 mode,
+			   const struct dt_index_features *ft);
+struct dt_object *
+local_index_find_or_create_with_fid(const struct lu_env *env,
+				    struct dt_device *dt,
+				    const struct lu_fid *fid,
+				    struct dt_object *parent,
+				    const char *name, __u32 mode,
+				    const struct dt_index_features *ft);
+int local_object_unlink(const struct lu_env *env, struct dt_device *dt,
+			struct dt_object *parent, const char *name);
+
+static inline int dt_object_lock(const struct lu_env *env,
+				 struct dt_object *o, struct lustre_handle *lh,
+				 struct ldlm_enqueue_info *einfo,
+				 void *policy)
+{
+	LASSERT(o);
+	LASSERT(o->do_ops);
+	LASSERT(o->do_ops->do_object_lock);
+	return o->do_ops->do_object_lock(env, o, lh, einfo, policy);
+}
+
+int dt_lookup_dir(const struct lu_env *env, struct dt_object *dir,
+		  const char *name, struct lu_fid *fid);
+
+static inline int dt_object_sync(const struct lu_env *env,
+				 struct dt_object *o)
+{
+	LASSERT(o);
+	LASSERT(o->do_ops);
+	LASSERT(o->do_ops->do_object_sync);
+	return o->do_ops->do_object_sync(env, o);
+}
+
+int dt_declare_version_set(const struct lu_env *env, struct dt_object *o,
+			   struct thandle *th);
+void dt_version_set(const struct lu_env *env, struct dt_object *o,
+		    dt_obj_version_t version, struct thandle *th);
+dt_obj_version_t dt_version_get(const struct lu_env *env, struct dt_object *o);
+
+
+int dt_read(const struct lu_env *env, struct dt_object *dt,
+	    struct lu_buf *buf, loff_t *pos);
+int dt_record_read(const struct lu_env *env, struct dt_object *dt,
+		   struct lu_buf *buf, loff_t *pos);
+int dt_record_write(const struct lu_env *env, struct dt_object *dt,
+		    const struct lu_buf *buf, loff_t *pos, struct thandle *th);
+typedef int (*dt_index_page_build_t)(const struct lu_env *env,
+				     union lu_page *lp, int nob,
+				     const struct dt_it_ops *iops,
+				     struct dt_it *it, __u32 attr, void *arg);
+int dt_index_walk(const struct lu_env *env, struct dt_object *obj,
+		  const struct lu_rdpg *rdpg, dt_index_page_build_t filler,
+		  void *arg);
+int dt_index_read(const struct lu_env *env, struct dt_device *dev,
+		  struct idx_info *ii, const struct lu_rdpg *rdpg);
+
+static inline struct thandle *dt_trans_create(const struct lu_env *env,
+					      struct dt_device *d)
+{
+	LASSERT(d->dd_ops->dt_trans_create);
+	return d->dd_ops->dt_trans_create(env, d);
+}
+
+static inline int dt_trans_start(const struct lu_env *env,
+				 struct dt_device *d, struct thandle *th)
+{
+	LASSERT(d->dd_ops->dt_trans_start);
+	return d->dd_ops->dt_trans_start(env, d, th);
+}
+
+/* for this transaction hooks shouldn't be called */
+static inline int dt_trans_start_local(const struct lu_env *env,
+				       struct dt_device *d, struct thandle *th)
+{
+	LASSERT(d->dd_ops->dt_trans_start);
+	th->th_local = 1;
+	return d->dd_ops->dt_trans_start(env, d, th);
+}
+
+static inline int dt_trans_stop(const struct lu_env *env,
+				struct dt_device *d, struct thandle *th)
+{
+	LASSERT(d->dd_ops->dt_trans_stop);
+	return d->dd_ops->dt_trans_stop(env, th);
+}
+
+static inline int dt_trans_cb_add(struct thandle *th,
+				  struct dt_txn_commit_cb *dcb)
+{
+	LASSERT(th->th_dev->dd_ops->dt_trans_cb_add);
+	dcb->dcb_magic = TRANS_COMMIT_CB_MAGIC;
+	return th->th_dev->dd_ops->dt_trans_cb_add(th, dcb);
+}
+/** @} dt */
+
+
+static inline int dt_declare_record_write(const struct lu_env *env,
+					  struct dt_object *dt,
+					  int size, loff_t pos,
+					  struct thandle *th)
+{
+	int rc;
+
+	LASSERTF(dt != NULL, "dt is NULL when we want to write record\n");
+	LASSERT(th != NULL);
+	LASSERT(dt->do_body_ops);
+	LASSERT(dt->do_body_ops->dbo_declare_write);
+	rc = dt->do_body_ops->dbo_declare_write(env, dt, size, pos, th);
+	return rc;
+}
+
+static inline int dt_declare_create(const struct lu_env *env,
+				    struct dt_object *dt,
+				    struct lu_attr *attr,
+				    struct dt_allocation_hint *hint,
+				    struct dt_object_format *dof,
+				    struct thandle *th)
+{
+	LASSERT(dt);
+	LASSERT(dt->do_ops);
+	LASSERT(dt->do_ops->do_declare_create);
+	return dt->do_ops->do_declare_create(env, dt, attr, hint, dof, th);
+}
+
+static inline int dt_create(const struct lu_env *env,
+				    struct dt_object *dt,
+				    struct lu_attr *attr,
+				    struct dt_allocation_hint *hint,
+				    struct dt_object_format *dof,
+				    struct thandle *th)
+{
+	LASSERT(dt);
+	LASSERT(dt->do_ops);
+	LASSERT(dt->do_ops->do_create);
+	return dt->do_ops->do_create(env, dt, attr, hint, dof, th);
+}
+
+static inline int dt_declare_destroy(const struct lu_env *env,
+				     struct dt_object *dt,
+				     struct thandle *th)
+{
+	LASSERT(dt);
+	LASSERT(dt->do_ops);
+	LASSERT(dt->do_ops->do_declare_destroy);
+	return dt->do_ops->do_declare_destroy(env, dt, th);
+}
+
+static inline int dt_destroy(const struct lu_env *env,
+			     struct dt_object *dt,
+			     struct thandle *th)
+{
+	LASSERT(dt);
+	LASSERT(dt->do_ops);
+	LASSERT(dt->do_ops->do_destroy);
+	return dt->do_ops->do_destroy(env, dt, th);
+}
+
+static inline void dt_read_lock(const struct lu_env *env,
+				struct dt_object *dt,
+				unsigned role)
+{
+	LASSERT(dt);
+	LASSERT(dt->do_ops);
+	LASSERT(dt->do_ops->do_read_lock);
+	dt->do_ops->do_read_lock(env, dt, role);
+}
+
+static inline void dt_write_lock(const struct lu_env *env,
+				struct dt_object *dt,
+				unsigned role)
+{
+	LASSERT(dt);
+	LASSERT(dt->do_ops);
+	LASSERT(dt->do_ops->do_write_lock);
+	dt->do_ops->do_write_lock(env, dt, role);
+}
+
+static inline void dt_read_unlock(const struct lu_env *env,
+				struct dt_object *dt)
+{
+	LASSERT(dt);
+	LASSERT(dt->do_ops);
+	LASSERT(dt->do_ops->do_read_unlock);
+	dt->do_ops->do_read_unlock(env, dt);
+}
+
+static inline void dt_write_unlock(const struct lu_env *env,
+				struct dt_object *dt)
+{
+	LASSERT(dt);
+	LASSERT(dt->do_ops);
+	LASSERT(dt->do_ops->do_write_unlock);
+	dt->do_ops->do_write_unlock(env, dt);
+}
+
+static inline int dt_write_locked(const struct lu_env *env,
+				  struct dt_object *dt)
+{
+	LASSERT(dt);
+	LASSERT(dt->do_ops);
+	LASSERT(dt->do_ops->do_write_locked);
+	return dt->do_ops->do_write_locked(env, dt);
+}
+
+static inline int dt_attr_get(const struct lu_env *env, struct dt_object *dt,
+			      struct lu_attr *la, void *arg)
+{
+	LASSERT(dt);
+	LASSERT(dt->do_ops);
+	LASSERT(dt->do_ops->do_attr_get);
+	return dt->do_ops->do_attr_get(env, dt, la, arg);
+}
+
+static inline int dt_declare_attr_set(const struct lu_env *env,
+				      struct dt_object *dt,
+				      const struct lu_attr *la,
+				      struct thandle *th)
+{
+	LASSERT(dt);
+	LASSERT(dt->do_ops);
+	LASSERT(dt->do_ops->do_declare_attr_set);
+	return dt->do_ops->do_declare_attr_set(env, dt, la, th);
+}
+
+static inline int dt_attr_set(const struct lu_env *env, struct dt_object *dt,
+			      const struct lu_attr *la, struct thandle *th,
+			      struct lustre_capa *capa)
+{
+	LASSERT(dt);
+	LASSERT(dt->do_ops);
+	LASSERT(dt->do_ops->do_attr_set);
+	return dt->do_ops->do_attr_set(env, dt, la, th, capa);
+}
+
+static inline int dt_declare_ref_add(const struct lu_env *env,
+				     struct dt_object *dt, struct thandle *th)
+{
+	LASSERT(dt);
+	LASSERT(dt->do_ops);
+	LASSERT(dt->do_ops->do_declare_ref_add);
+	return dt->do_ops->do_declare_ref_add(env, dt, th);
+}
+
+static inline int dt_ref_add(const struct lu_env *env,
+			     struct dt_object *dt, struct thandle *th)
+{
+	LASSERT(dt);
+	LASSERT(dt->do_ops);
+	LASSERT(dt->do_ops->do_ref_add);
+	return dt->do_ops->do_ref_add(env, dt, th);
+}
+
+static inline int dt_declare_ref_del(const struct lu_env *env,
+				     struct dt_object *dt, struct thandle *th)
+{
+	LASSERT(dt);
+	LASSERT(dt->do_ops);
+	LASSERT(dt->do_ops->do_declare_ref_del);
+	return dt->do_ops->do_declare_ref_del(env, dt, th);
+}
+
+static inline int dt_ref_del(const struct lu_env *env,
+			     struct dt_object *dt, struct thandle *th)
+{
+	LASSERT(dt);
+	LASSERT(dt->do_ops);
+	LASSERT(dt->do_ops->do_ref_del);
+	return dt->do_ops->do_ref_del(env, dt, th);
+}
+
+static inline struct obd_capa *dt_capa_get(const struct lu_env *env,
+					   struct dt_object *dt,
+					   struct lustre_capa *old, __u64 opc)
+{
+	LASSERT(dt);
+	LASSERT(dt->do_ops);
+	LASSERT(dt->do_ops->do_ref_del);
+	return dt->do_ops->do_capa_get(env, dt, old, opc);
+}
+
+static inline int dt_bufs_get(const struct lu_env *env, struct dt_object *d,
+			      struct niobuf_remote *rnb,
+			      struct niobuf_local *lnb, int rw,
+			      struct lustre_capa *capa)
+{
+	LASSERT(d);
+	LASSERT(d->do_body_ops);
+	LASSERT(d->do_body_ops->dbo_bufs_get);
+	return d->do_body_ops->dbo_bufs_get(env, d, rnb->offset,
+					    rnb->len, lnb, rw, capa);
+}
+
+static inline int dt_bufs_put(const struct lu_env *env, struct dt_object *d,
+			      struct niobuf_local *lnb, int n)
+{
+	LASSERT(d);
+	LASSERT(d->do_body_ops);
+	LASSERT(d->do_body_ops->dbo_bufs_put);
+	return d->do_body_ops->dbo_bufs_put(env, d, lnb, n);
+}
+
+static inline int dt_write_prep(const struct lu_env *env, struct dt_object *d,
+				struct niobuf_local *lnb, int n)
+{
+	LASSERT(d);
+	LASSERT(d->do_body_ops);
+	LASSERT(d->do_body_ops->dbo_write_prep);
+	return d->do_body_ops->dbo_write_prep(env, d, lnb, n);
+}
+
+static inline int dt_declare_write_commit(const struct lu_env *env,
+					  struct dt_object *d,
+					  struct niobuf_local *lnb,
+					  int n, struct thandle *th)
+{
+	LASSERTF(d != NULL, "dt is NULL when we want to declare write\n");
+	LASSERT(th != NULL);
+	return d->do_body_ops->dbo_declare_write_commit(env, d, lnb, n, th);
+}
+
+
+static inline int dt_write_commit(const struct lu_env *env,
+				  struct dt_object *d, struct niobuf_local *lnb,
+				  int n, struct thandle *th)
+{
+	LASSERT(d);
+	LASSERT(d->do_body_ops);
+	LASSERT(d->do_body_ops->dbo_write_commit);
+	return d->do_body_ops->dbo_write_commit(env, d, lnb, n, th);
+}
+
+static inline int dt_read_prep(const struct lu_env *env, struct dt_object *d,
+			       struct niobuf_local *lnb, int n)
+{
+	LASSERT(d);
+	LASSERT(d->do_body_ops);
+	LASSERT(d->do_body_ops->dbo_read_prep);
+	return d->do_body_ops->dbo_read_prep(env, d, lnb, n);
+}
+
+static inline int dt_declare_punch(const struct lu_env *env,
+				   struct dt_object *dt, __u64 start,
+				   __u64 end, struct thandle *th)
+{
+	LASSERT(dt);
+	LASSERT(dt->do_body_ops);
+	LASSERT(dt->do_body_ops->dbo_declare_punch);
+	return dt->do_body_ops->dbo_declare_punch(env, dt, start, end, th);
+}
+
+static inline int dt_punch(const struct lu_env *env, struct dt_object *dt,
+			   __u64 start, __u64 end, struct thandle *th,
+			   struct lustre_capa *capa)
+{
+	LASSERT(dt);
+	LASSERT(dt->do_body_ops);
+	LASSERT(dt->do_body_ops->dbo_punch);
+	return dt->do_body_ops->dbo_punch(env, dt, start, end, th, capa);
+}
+
+static inline int dt_fiemap_get(const struct lu_env *env, struct dt_object *d,
+				struct ll_user_fiemap *fm)
+{
+	LASSERT(d);
+	if (d->do_body_ops == NULL)
+		return -EPROTO;
+	if (d->do_body_ops->dbo_fiemap_get == NULL)
+		return -EOPNOTSUPP;
+	return d->do_body_ops->dbo_fiemap_get(env, d, fm);
+}
+
+static inline int dt_statfs(const struct lu_env *env, struct dt_device *dev,
+			    struct obd_statfs *osfs)
+{
+	LASSERT(dev);
+	LASSERT(dev->dd_ops);
+	LASSERT(dev->dd_ops->dt_statfs);
+	return dev->dd_ops->dt_statfs(env, dev, osfs);
+}
+
+static inline int dt_root_get(const struct lu_env *env, struct dt_device *dev,
+			      struct lu_fid *f)
+{
+	LASSERT(dev);
+	LASSERT(dev->dd_ops);
+	LASSERT(dev->dd_ops->dt_root_get);
+	return dev->dd_ops->dt_root_get(env, dev, f);
+}
+
+static inline void dt_conf_get(const struct lu_env *env,
+			       const struct dt_device *dev,
+			       struct dt_device_param *param)
+{
+	LASSERT(dev);
+	LASSERT(dev->dd_ops);
+	LASSERT(dev->dd_ops->dt_conf_get);
+	return dev->dd_ops->dt_conf_get(env, dev, param);
+}
+
+static inline int dt_sync(const struct lu_env *env, struct dt_device *dev)
+{
+	LASSERT(dev);
+	LASSERT(dev->dd_ops);
+	LASSERT(dev->dd_ops->dt_sync);
+	return dev->dd_ops->dt_sync(env, dev);
+}
+
+static inline int dt_ro(const struct lu_env *env, struct dt_device *dev)
+{
+	LASSERT(dev);
+	LASSERT(dev->dd_ops);
+	LASSERT(dev->dd_ops->dt_ro);
+	return dev->dd_ops->dt_ro(env, dev);
+}
+
+static inline int dt_declare_insert(const struct lu_env *env,
+				    struct dt_object *dt,
+				    const struct dt_rec *rec,
+				    const struct dt_key *key,
+				    struct thandle *th)
+{
+	LASSERT(dt);
+	LASSERT(dt->do_index_ops);
+	LASSERT(dt->do_index_ops->dio_declare_insert);
+	return dt->do_index_ops->dio_declare_insert(env, dt, rec, key, th);
+}
+
+static inline int dt_insert(const struct lu_env *env,
+				    struct dt_object *dt,
+				    const struct dt_rec *rec,
+				    const struct dt_key *key,
+				    struct thandle *th,
+				    struct lustre_capa *capa,
+				    int noquota)
+{
+	LASSERT(dt);
+	LASSERT(dt->do_index_ops);
+	LASSERT(dt->do_index_ops->dio_insert);
+	return dt->do_index_ops->dio_insert(env, dt, rec, key, th,
+					    capa, noquota);
+}
+
+static inline int dt_declare_xattr_del(const struct lu_env *env,
+				       struct dt_object *dt,
+				       const char *name,
+				       struct thandle *th)
+{
+	LASSERT(dt);
+	LASSERT(dt->do_ops);
+	LASSERT(dt->do_ops->do_declare_xattr_del);
+	return dt->do_ops->do_declare_xattr_del(env, dt, name, th);
+}
+
+static inline int dt_xattr_del(const struct lu_env *env,
+			       struct dt_object *dt, const char *name,
+			       struct thandle *th,
+			       struct lustre_capa *capa)
+{
+	LASSERT(dt);
+	LASSERT(dt->do_ops);
+	LASSERT(dt->do_ops->do_xattr_del);
+	return dt->do_ops->do_xattr_del(env, dt, name, th, capa);
+}
+
+static inline int dt_declare_xattr_set(const struct lu_env *env,
+				      struct dt_object *dt,
+				      const struct lu_buf *buf,
+				      const char *name, int fl,
+				      struct thandle *th)
+{
+	LASSERT(dt);
+	LASSERT(dt->do_ops);
+	LASSERT(dt->do_ops->do_declare_xattr_set);
+	return dt->do_ops->do_declare_xattr_set(env, dt, buf, name, fl, th);
+}
+
+static inline int dt_xattr_set(const struct lu_env *env,
+			      struct dt_object *dt, const struct lu_buf *buf,
+			      const char *name, int fl, struct thandle *th,
+			      struct lustre_capa *capa)
+{
+	LASSERT(dt);
+	LASSERT(dt->do_ops);
+	LASSERT(dt->do_ops->do_xattr_set);
+	return dt->do_ops->do_xattr_set(env, dt, buf, name, fl, th, capa);
+}
+
+static inline int dt_xattr_get(const struct lu_env *env,
+			      struct dt_object *dt, struct lu_buf *buf,
+			      const char *name, struct lustre_capa *capa)
+{
+	LASSERT(dt);
+	LASSERT(dt->do_ops);
+	LASSERT(dt->do_ops->do_xattr_get);
+	return dt->do_ops->do_xattr_get(env, dt, buf, name, capa);
+}
+
+static inline int dt_xattr_list(const struct lu_env *env,
+			       struct dt_object *dt, struct lu_buf *buf,
+			       struct lustre_capa *capa)
+{
+	LASSERT(dt);
+	LASSERT(dt->do_ops);
+	LASSERT(dt->do_ops->do_xattr_list);
+	return dt->do_ops->do_xattr_list(env, dt, buf, capa);
+}
+
+static inline int dt_declare_delete(const struct lu_env *env,
+				    struct dt_object *dt,
+				    const struct dt_key *key,
+				    struct thandle *th)
+{
+	LASSERT(dt);
+	LASSERT(dt->do_index_ops);
+	LASSERT(dt->do_index_ops->dio_declare_delete);
+	return dt->do_index_ops->dio_declare_delete(env, dt, key, th);
+}
+
+static inline int dt_delete(const struct lu_env *env,
+			    struct dt_object *dt,
+			    const struct dt_key *key,
+			    struct thandle *th,
+			    struct lustre_capa *capa)
+{
+	LASSERT(dt);
+	LASSERT(dt->do_index_ops);
+	LASSERT(dt->do_index_ops->dio_delete);
+	return dt->do_index_ops->dio_delete(env, dt, key, th, capa);
+}
+
+static inline int dt_commit_async(const struct lu_env *env,
+				  struct dt_device *dev)
+{
+	LASSERT(dev);
+	LASSERT(dev->dd_ops);
+	LASSERT(dev->dd_ops->dt_commit_async);
+	return dev->dd_ops->dt_commit_async(env, dev);
+}
+
+static inline int dt_init_capa_ctxt(const struct lu_env *env,
+				    struct dt_device *dev,
+				    int mode, unsigned long timeout,
+				    __u32 alg, struct lustre_capa_key *keys)
+{
+	LASSERT(dev);
+	LASSERT(dev->dd_ops);
+	LASSERT(dev->dd_ops->dt_init_capa_ctxt);
+	return dev->dd_ops->dt_init_capa_ctxt(env, dev, mode,
+					      timeout, alg, keys);
+}
+
+static inline int dt_lookup(const struct lu_env *env,
+			    struct dt_object *dt,
+			    struct dt_rec *rec,
+			    const struct dt_key *key,
+			    struct lustre_capa *capa)
+{
+	int ret;
+
+	LASSERT(dt);
+	LASSERT(dt->do_index_ops);
+	LASSERT(dt->do_index_ops->dio_lookup);
+
+	ret = dt->do_index_ops->dio_lookup(env, dt, rec, key, capa);
+	if (ret > 0)
+		ret = 0;
+	else if (ret == 0)
+		ret = -ENOENT;
+	return ret;
+}
+
+#define LU221_BAD_TIME (0x80000000U + 24 * 3600)
+
+struct dt_find_hint {
+	struct lu_fid	*dfh_fid;
+	struct dt_device     *dfh_dt;
+	struct dt_object     *dfh_o;
+};
+
+struct dt_thread_info {
+	char		     dti_buf[DT_MAX_PATH];
+	struct dt_find_hint      dti_dfh;
+	struct lu_attr	   dti_attr;
+	struct lu_fid	    dti_fid;
+	struct dt_object_format  dti_dof;
+	struct lustre_mdt_attrs  dti_lma;
+	struct lu_buf	    dti_lb;
+	loff_t		   dti_off;
+};
+
+extern struct lu_context_key dt_key;
+
+static inline struct dt_thread_info *dt_info(const struct lu_env *env)
+{
+	struct dt_thread_info *dti;
+
+	dti = lu_context_key_get(&env->le_ctx, &dt_key);
+	LASSERT(dti);
+	return dti;
+}
+
+int dt_global_init(void);
+void dt_global_fini(void);
+
+# ifdef LPROCFS
+int lprocfs_dt_rd_blksize(char *page, char **start, off_t off,
+			  int count, int *eof, void *data);
+int lprocfs_dt_rd_kbytestotal(char *page, char **start, off_t off,
+			      int count, int *eof, void *data);
+int lprocfs_dt_rd_kbytesfree(char *page, char **start, off_t off,
+			     int count, int *eof, void *data);
+int lprocfs_dt_rd_kbytesavail(char *page, char **start, off_t off,
+			      int count, int *eof, void *data);
+int lprocfs_dt_rd_filestotal(char *page, char **start, off_t off,
+			     int count, int *eof, void *data);
+int lprocfs_dt_rd_filesfree(char *page, char **start, off_t off,
+			    int count, int *eof, void *data);
+# endif /* LPROCFS */
+
+#endif /* __LUSTRE_DT_OBJECT_H */
diff --git a/drivers/staging/lustre/lustre/include/interval_tree.h b/drivers/staging/lustre/lustre/include/interval_tree.h
new file mode 100644
index 000000000000..dfdb8aa4e035
--- /dev/null
+++ b/drivers/staging/lustre/lustre/include/interval_tree.h
@@ -0,0 +1,124 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lustre/include/interval_tree.h
+ *
+ * Author: Huang Wei <huangwei@clusterfs.com>
+ * Author: Jay Xiong <jinshan.xiong@sun.com>
+ */
+
+#ifndef _INTERVAL_H__
+#define _INTERVAL_H__
+
+#include <linux/libcfs/libcfs.h>   /* LASSERT. */
+
+struct interval_node {
+	struct interval_node   *in_left;
+	struct interval_node   *in_right;
+	struct interval_node   *in_parent;
+	unsigned		in_color:1,
+				in_intree:1, /** set if the node is in tree */
+				in_res1:30;
+	__u8		    in_res2[4];  /** tags, 8-bytes aligned */
+	__u64		   in_max_high;
+	struct interval_node_extent {
+		__u64 start;
+		__u64 end;
+	} in_extent;
+};
+
+enum interval_iter {
+	INTERVAL_ITER_CONT = 1,
+	INTERVAL_ITER_STOP = 2
+};
+
+static inline int interval_is_intree(struct interval_node *node)
+{
+	return node->in_intree == 1;
+}
+
+static inline __u64 interval_low(struct interval_node *node)
+{
+	return node->in_extent.start;
+}
+
+static inline __u64 interval_high(struct interval_node *node)
+{
+	return node->in_extent.end;
+}
+
+static inline void interval_set(struct interval_node *node,
+				__u64 start, __u64 end)
+{
+	LASSERT(start <= end);
+	node->in_extent.start = start;
+	node->in_extent.end = end;
+	node->in_max_high = end;
+}
+
+/* Rules to write an interval callback.
+ *  - the callback returns INTERVAL_ITER_STOP when it thinks the iteration
+ *    should be stopped. It will then cause the iteration function to return
+ *    immediately with return value INTERVAL_ITER_STOP.
+ *  - callbacks for interval_iterate and interval_iterate_reverse: Every
+ *    nodes in the tree will be set to @node before the callback being called
+ *  - callback for interval_search: Only overlapped node will be set to @node
+ *    before the callback being called.
+ */
+typedef enum interval_iter (*interval_callback_t)(struct interval_node *node,
+						  void *args);
+
+struct interval_node *interval_insert(struct interval_node *node,
+				      struct interval_node **root);
+void interval_erase(struct interval_node *node, struct interval_node **root);
+
+/* Search the extents in the tree and call @func for each overlapped
+ * extents. */
+enum interval_iter interval_search(struct interval_node *root,
+				   struct interval_node_extent *ex,
+				   interval_callback_t func, void *data);
+
+/* Iterate every node in the tree - by reverse order or regular order. */
+enum interval_iter interval_iterate(struct interval_node *root,
+				    interval_callback_t func, void *data);
+enum interval_iter interval_iterate_reverse(struct interval_node *root,
+				    interval_callback_t func,void *data);
+
+void interval_expand(struct interval_node *root,
+		     struct interval_node_extent *ext,
+		     struct interval_node_extent *limiter);
+int interval_is_overlapped(struct interval_node *root,
+			   struct interval_node_extent *ex);
+struct interval_node *interval_find(struct interval_node *root,
+				    struct interval_node_extent *ex);
+#endif
diff --git a/drivers/staging/lustre/lustre/include/ioctl.h b/drivers/staging/lustre/lustre/include/ioctl.h
new file mode 100644
index 000000000000..227c261b2ae9
--- /dev/null
+++ b/drivers/staging/lustre/lustre/include/ioctl.h
@@ -0,0 +1,106 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ */
+
+#ifndef _IOWR
+
+/* On i386 and x86_64, _ASM_I386_IOCTL_H is defined by the kernel's ioctl.h,
+ * and on newer kernels this header is shared as _ASM_GENERIC_IOCTL_H.
+ *
+ * We can avoid any problems with the kernel header being included again by
+ * defining _ASM_I386_IOCTL_H here so that a later occurence of <asm/ioctl.h>
+ * does not include the kernel's ioctl.h after this one. b=14746 */
+#define _ASM_I386_IOCTL_H
+#define _ASM_GENERIC_IOCTL_H
+
+/* ioctl command encoding: 32 bits total, command in lower 16 bits,
+ * size of the parameter structure in the lower 14 bits of the
+ * upper 16 bits.
+ * Encoding the size of the parameter structure in the ioctl request
+ * The highest 2 bits are reserved for indicating the ``access mode''.
+ * NOTE: This limits the max parameter size to 16kB -1 !
+ */
+
+/*
+ * The following is for compatibility across the various Linux
+ * platforms.  The i386 ioctl numbering scheme doesn't really enforce
+ * a type field.  De facto, however, the top 8 bits of the lower 16
+ * bits are indeed used as a type field, so we might just as well make
+ * this explicit here.  Please be sure to use the decoding macros
+ * below from now on.
+ */
+#define _IOC_NRBITS     8
+#define _IOC_TYPEBITS   8
+#define _IOC_SIZEBITS   14
+#define _IOC_DIRBITS    2
+
+#define _IOC_NRMASK     ((1 << _IOC_NRBITS)-1)
+#define _IOC_TYPEMASK   ((1 << _IOC_TYPEBITS)-1)
+#define _IOC_SIZEMASK   ((1 << _IOC_SIZEBITS)-1)
+#define _IOC_DIRMASK    ((1 << _IOC_DIRBITS)-1)
+
+#define _IOC_NRSHIFT    0
+#define _IOC_TYPESHIFT  (_IOC_NRSHIFT+_IOC_NRBITS)
+#define _IOC_SIZESHIFT  (_IOC_TYPESHIFT+_IOC_TYPEBITS)
+#define _IOC_DIRSHIFT   (_IOC_SIZESHIFT+_IOC_SIZEBITS)
+
+/*
+ * Direction bits.
+ */
+#define _IOC_NONE       0U
+#define _IOC_WRITE      1U
+#define _IOC_READ       2U
+
+#define _IOC(dir,type,nr,size) (((dir)  << _IOC_DIRSHIFT) | ((type) << _IOC_TYPESHIFT) | ((nr)   << _IOC_NRSHIFT) | ((size) << _IOC_SIZESHIFT))
+
+/* used to create numbers */
+#define _IO(type,nr)	    _IOC(_IOC_NONE,(type),(nr),0)
+#define _IOR(type,nr,size)      _IOC(_IOC_READ,(type),(nr),sizeof(size))
+#define _IOW(type,nr,size)      _IOC(_IOC_WRITE,(type),(nr),sizeof(size))
+#define _IOWR(type,nr,size)     _IOC(_IOC_READ|_IOC_WRITE,(type),(nr),sizeof(size))
+
+/* used to decode ioctl numbers.. */
+#define _IOC_DIR(nr)	    (((nr) >> _IOC_DIRSHIFT) & _IOC_DIRMASK)
+#define _IOC_TYPE(nr)	   (((nr) >> _IOC_TYPESHIFT) & _IOC_TYPEMASK)
+#define _IOC_NR(nr)	     (((nr) >> _IOC_NRSHIFT) & _IOC_NRMASK)
+#define _IOC_SIZE(nr)	   (((nr) >> _IOC_SIZESHIFT) & _IOC_SIZEMASK)
+
+/* ...and for the drivers/sound files... */
+
+#define IOC_IN	  (_IOC_WRITE << _IOC_DIRSHIFT)
+#define IOC_OUT	 (_IOC_READ << _IOC_DIRSHIFT)
+#define IOC_INOUT       ((_IOC_WRITE|_IOC_READ) << _IOC_DIRSHIFT)
+#define IOCSIZE_MASK    (_IOC_SIZEMASK << _IOC_SIZESHIFT)
+#define IOCSIZE_SHIFT   (_IOC_SIZESHIFT)
+
+#endif /* _IOWR */
diff --git a/drivers/staging/lustre/lustre/include/lclient.h b/drivers/staging/lustre/lustre/include/lclient.h
new file mode 100644
index 000000000000..9d4011f2908b
--- /dev/null
+++ b/drivers/staging/lustre/lustre/include/lclient.h
@@ -0,0 +1,437 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * Definitions shared between vvp and liblustre, and other clients in the
+ * future.
+ *
+ *   Author: Oleg Drokin <oleg.drokin@sun.com>
+ *   Author: Nikita Danilov <nikita.danilov@sun.com>
+ */
+
+#ifndef LCLIENT_H
+#define LCLIENT_H
+
+blkcnt_t dirty_cnt(struct inode *inode);
+
+int cl_glimpse_size0(struct inode *inode, int agl);
+int cl_glimpse_lock(const struct lu_env *env, struct cl_io *io,
+		    struct inode *inode, struct cl_object *clob, int agl);
+
+static inline int cl_glimpse_size(struct inode *inode)
+{
+	return cl_glimpse_size0(inode, 0);
+}
+
+static inline int cl_agl(struct inode *inode)
+{
+	return cl_glimpse_size0(inode, 1);
+}
+
+/**
+ * Locking policy for setattr.
+ */
+enum ccc_setattr_lock_type {
+	/** Locking is done by server */
+	SETATTR_NOLOCK,
+	/** Extent lock is enqueued */
+	SETATTR_EXTENT_LOCK,
+	/** Existing local extent lock is used */
+	SETATTR_MATCH_LOCK
+};
+
+
+/**
+ * IO state private to vvp or slp layers.
+ */
+struct ccc_io {
+	/** super class */
+	struct cl_io_slice     cui_cl;
+	struct cl_io_lock_link cui_link;
+	/**
+	 * I/O vector information to or from which read/write is going.
+	 */
+	struct iovec *cui_iov;
+	unsigned long cui_nrsegs;
+	/**
+	 * Total iov count for left IO.
+	 */
+	unsigned long cui_tot_nrsegs;
+	/**
+	 * Old length for iov that was truncated partially.
+	 */
+	size_t cui_iov_olen;
+	/**
+	 * Total size for the left IO.
+	 */
+	size_t cui_tot_count;
+
+	union {
+		struct {
+			enum ccc_setattr_lock_type cui_local_lock;
+		} setattr;
+	} u;
+	/**
+	 * True iff io is processing glimpse right now.
+	 */
+	int		  cui_glimpse;
+	/**
+	 * Layout version when this IO is initialized
+	 */
+	__u32		cui_layout_gen;
+	/**
+	 * File descriptor against which IO is done.
+	 */
+	struct ll_file_data *cui_fd;
+	struct kiocb *cui_iocb;
+};
+
+/**
+ * True, if \a io is a normal io, False for other (sendfile, splice*).
+ * must be impementated in arch specific code.
+ */
+int cl_is_normalio(const struct lu_env *env, const struct cl_io *io);
+
+extern struct lu_context_key ccc_key;
+extern struct lu_context_key ccc_session_key;
+
+struct ccc_thread_info {
+	struct cl_lock_descr cti_descr;
+	struct cl_io	 cti_io;
+	struct cl_attr       cti_attr;
+};
+
+static inline struct ccc_thread_info *ccc_env_info(const struct lu_env *env)
+{
+	struct ccc_thread_info      *info;
+
+	info = lu_context_key_get(&env->le_ctx, &ccc_key);
+	LASSERT(info != NULL);
+	return info;
+}
+
+static inline struct cl_attr *ccc_env_thread_attr(const struct lu_env *env)
+{
+	struct cl_attr *attr = &ccc_env_info(env)->cti_attr;
+	memset(attr, 0, sizeof(*attr));
+	return attr;
+}
+
+static inline struct cl_io *ccc_env_thread_io(const struct lu_env *env)
+{
+	struct cl_io *io = &ccc_env_info(env)->cti_io;
+	memset(io, 0, sizeof(*io));
+	return io;
+}
+
+struct ccc_session {
+	struct ccc_io cs_ios;
+};
+
+static inline struct ccc_session *ccc_env_session(const struct lu_env *env)
+{
+	struct ccc_session *ses;
+
+	ses = lu_context_key_get(env->le_ses, &ccc_session_key);
+	LASSERT(ses != NULL);
+	return ses;
+}
+
+static inline struct ccc_io *ccc_env_io(const struct lu_env *env)
+{
+	return &ccc_env_session(env)->cs_ios;
+}
+
+/**
+ * ccc-private object state.
+ */
+struct ccc_object {
+	struct cl_object_header cob_header;
+	struct cl_object	cob_cl;
+	struct inode	   *cob_inode;
+
+	/**
+	 * A list of dirty pages pending IO in the cache. Used by
+	 * SOM. Protected by ll_inode_info::lli_lock.
+	 *
+	 * \see ccc_page::cpg_pending_linkage
+	 */
+	struct list_head	     cob_pending_list;
+
+	/**
+	 * Access this counter is protected by inode->i_sem. Now that
+	 * the lifetime of transient pages must be covered by inode sem,
+	 * we don't need to hold any lock..
+	 */
+	int		     cob_transient_pages;
+	/**
+	 * Number of outstanding mmaps on this file.
+	 *
+	 * \see ll_vm_open(), ll_vm_close().
+	 */
+	atomic_t	    cob_mmap_cnt;
+
+	/**
+	 * various flags
+	 * cob_discard_page_warned
+	 *     if pages belonging to this object are discarded when a client
+	 * is evicted, some debug info will be printed, this flag will be set
+	 * during processing the first discarded page, then avoid flooding
+	 * debug message for lots of discarded pages.
+	 *
+	 * \see ll_dirty_page_discard_warn.
+	 */
+	unsigned int		cob_discard_page_warned:1;
+};
+
+/**
+ * ccc-private page state.
+ */
+struct ccc_page {
+	struct cl_page_slice cpg_cl;
+	int		  cpg_defer_uptodate;
+	int		  cpg_ra_used;
+	int		  cpg_write_queued;
+	/**
+	 * Non-empty iff this page is already counted in
+	 * ccc_object::cob_pending_list. Protected by
+	 * ccc_object::cob_pending_guard. This list is only used as a flag,
+	 * that is, never iterated through, only checked for list_empty(), but
+	 * having a list is useful for debugging.
+	 */
+	struct list_head	   cpg_pending_linkage;
+	/** VM page */
+	struct page	  *cpg_page;
+};
+
+static inline struct ccc_page *cl2ccc_page(const struct cl_page_slice *slice)
+{
+	return container_of(slice, struct ccc_page, cpg_cl);
+}
+
+struct cl_page    *ccc_vmpage_page_transient(struct page *vmpage);
+
+struct ccc_device {
+	struct cl_device    cdv_cl;
+	struct super_block *cdv_sb;
+	struct cl_device   *cdv_next;
+};
+
+struct ccc_lock {
+	struct cl_lock_slice clk_cl;
+};
+
+struct ccc_req {
+	struct cl_req_slice  crq_cl;
+};
+
+void *ccc_key_init	(const struct lu_context *ctx,
+			   struct lu_context_key *key);
+void  ccc_key_fini	(const struct lu_context *ctx,
+			   struct lu_context_key *key, void *data);
+void *ccc_session_key_init(const struct lu_context *ctx,
+			   struct lu_context_key *key);
+void  ccc_session_key_fini(const struct lu_context *ctx,
+			   struct lu_context_key *key, void *data);
+
+int	      ccc_device_init  (const struct lu_env *env,
+				   struct lu_device *d,
+				   const char *name, struct lu_device *next);
+struct lu_device *ccc_device_fini (const struct lu_env *env,
+				   struct lu_device *d);
+struct lu_device *ccc_device_alloc(const struct lu_env *env,
+				   struct lu_device_type *t,
+				   struct lustre_cfg *cfg,
+				   const struct lu_device_operations *luops,
+				   const struct cl_device_operations *clops);
+struct lu_device *ccc_device_free (const struct lu_env *env,
+				   struct lu_device *d);
+struct lu_object *ccc_object_alloc(const struct lu_env *env,
+				   const struct lu_object_header *hdr,
+				   struct lu_device *dev,
+				   const struct cl_object_operations *clops,
+				   const struct lu_object_operations *luops);
+
+int ccc_req_init(const struct lu_env *env, struct cl_device *dev,
+		 struct cl_req *req);
+void ccc_umount(const struct lu_env *env, struct cl_device *dev);
+int ccc_global_init(struct lu_device_type *device_type);
+void ccc_global_fini(struct lu_device_type *device_type);
+int ccc_object_init0(const struct lu_env *env,struct ccc_object *vob,
+		     const struct cl_object_conf *conf);
+int ccc_object_init(const struct lu_env *env, struct lu_object *obj,
+		    const struct lu_object_conf *conf);
+void ccc_object_free(const struct lu_env *env, struct lu_object *obj);
+int ccc_lock_init(const struct lu_env *env, struct cl_object *obj,
+		  struct cl_lock *lock, const struct cl_io *io,
+		  const struct cl_lock_operations *lkops);
+int ccc_attr_set(const struct lu_env *env, struct cl_object *obj,
+		 const struct cl_attr *attr, unsigned valid);
+int ccc_object_glimpse(const struct lu_env *env,
+		       const struct cl_object *obj, struct ost_lvb *lvb);
+int ccc_conf_set(const struct lu_env *env, struct cl_object *obj,
+		 const struct cl_object_conf *conf);
+struct page *ccc_page_vmpage(const struct lu_env *env,
+			    const struct cl_page_slice *slice);
+int ccc_page_is_under_lock(const struct lu_env *env,
+			   const struct cl_page_slice *slice, struct cl_io *io);
+int ccc_fail(const struct lu_env *env, const struct cl_page_slice *slice);
+void ccc_transient_page_verify(const struct cl_page *page);
+int  ccc_transient_page_own(const struct lu_env *env,
+			    const struct cl_page_slice *slice,
+			    struct cl_io *io, int nonblock);
+void ccc_transient_page_assume(const struct lu_env *env,
+			       const struct cl_page_slice *slice,
+			       struct cl_io *io);
+void ccc_transient_page_unassume(const struct lu_env *env,
+				 const struct cl_page_slice *slice,
+				 struct cl_io *io);
+void ccc_transient_page_disown(const struct lu_env *env,
+			       const struct cl_page_slice *slice,
+			       struct cl_io *io);
+void ccc_transient_page_discard(const struct lu_env *env,
+				const struct cl_page_slice *slice,
+				struct cl_io *io);
+int ccc_transient_page_prep(const struct lu_env *env,
+			    const struct cl_page_slice *slice,
+			    struct cl_io *io);
+void ccc_lock_delete(const struct lu_env *env,
+		     const struct cl_lock_slice *slice);
+void ccc_lock_fini(const struct lu_env *env,struct cl_lock_slice *slice);
+int ccc_lock_enqueue(const struct lu_env *env,const struct cl_lock_slice *slice,
+		     struct cl_io *io, __u32 enqflags);
+int ccc_lock_unuse(const struct lu_env *env,const struct cl_lock_slice *slice);
+int ccc_lock_wait(const struct lu_env *env,const struct cl_lock_slice *slice);
+int ccc_lock_fits_into(const struct lu_env *env,
+		       const struct cl_lock_slice *slice,
+		       const struct cl_lock_descr *need,
+		       const struct cl_io *io);
+void ccc_lock_state(const struct lu_env *env,
+		    const struct cl_lock_slice *slice,
+		    enum cl_lock_state state);
+
+void ccc_io_fini(const struct lu_env *env, const struct cl_io_slice *ios);
+int ccc_io_one_lock_index(const struct lu_env *env, struct cl_io *io,
+			  __u32 enqflags, enum cl_lock_mode mode,
+			  pgoff_t start, pgoff_t end);
+int ccc_io_one_lock(const struct lu_env *env, struct cl_io *io,
+		    __u32 enqflags, enum cl_lock_mode mode,
+		    loff_t start, loff_t end);
+void ccc_io_end(const struct lu_env *env, const struct cl_io_slice *ios);
+void ccc_io_advance(const struct lu_env *env, const struct cl_io_slice *ios,
+		    size_t nob);
+void ccc_io_update_iov(const struct lu_env *env, struct ccc_io *cio,
+		       struct cl_io *io);
+int ccc_prep_size(const struct lu_env *env, struct cl_object *obj,
+		  struct cl_io *io, loff_t start, size_t count, int *exceed);
+void ccc_req_completion(const struct lu_env *env,
+			const struct cl_req_slice *slice, int ioret);
+void ccc_req_attr_set(const struct lu_env *env,const struct cl_req_slice *slice,
+		      const struct cl_object *obj,
+		      struct cl_req_attr *oa, obd_valid flags);
+
+struct lu_device   *ccc2lu_dev      (struct ccc_device *vdv);
+struct lu_object   *ccc2lu	  (struct ccc_object *vob);
+struct ccc_device  *lu2ccc_dev      (const struct lu_device *d);
+struct ccc_device  *cl2ccc_dev      (const struct cl_device *d);
+struct ccc_object  *lu2ccc	  (const struct lu_object *obj);
+struct ccc_object  *cl2ccc	  (const struct cl_object *obj);
+struct ccc_lock    *cl2ccc_lock     (const struct cl_lock_slice *slice);
+struct ccc_io      *cl2ccc_io       (const struct lu_env *env,
+				     const struct cl_io_slice *slice);
+struct ccc_req     *cl2ccc_req      (const struct cl_req_slice *slice);
+struct page	 *cl2vm_page      (const struct cl_page_slice *slice);
+struct inode       *ccc_object_inode(const struct cl_object *obj);
+struct ccc_object  *cl_inode2ccc    (struct inode *inode);
+
+int cl_setattr_ost(struct inode *inode, const struct iattr *attr,
+		   struct obd_capa *capa);
+
+struct cl_page *ccc_vmpage_page_transient(struct page *vmpage);
+int ccc_object_invariant(const struct cl_object *obj);
+int cl_file_inode_init(struct inode *inode, struct lustre_md *md);
+void cl_inode_fini(struct inode *inode);
+int cl_local_size(struct inode *inode);
+
+__u16 ll_dirent_type_get(struct lu_dirent *ent);
+__u64 cl_fid_build_ino(const struct lu_fid *fid, int api32);
+__u32 cl_fid_build_gen(const struct lu_fid *fid);
+
+# define CLOBINVRNT(env, clob, expr)				    \
+	((void)sizeof(env), (void)sizeof(clob), (void)sizeof !!(expr))
+
+int cl_init_ea_size(struct obd_export *md_exp, struct obd_export *dt_exp);
+int cl_ocd_update(struct obd_device *host,
+		  struct obd_device *watched,
+		  enum obd_notify_event ev, void *owner, void *data);
+
+struct ccc_grouplock {
+	struct lu_env   *cg_env;
+	struct cl_io    *cg_io;
+	struct cl_lock  *cg_lock;
+	unsigned long    cg_gid;
+};
+
+int  cl_get_grouplock(struct cl_object *obj, unsigned long gid, int nonblock,
+		      struct ccc_grouplock *cg);
+void cl_put_grouplock(struct ccc_grouplock *cg);
+
+/**
+ * New interfaces to get and put lov_stripe_md from lov layer. This violates
+ * layering because lov_stripe_md is supposed to be a private data in lov.
+ *
+ * NB: If you find you have to use these interfaces for your new code, please
+ * think about it again. These interfaces may be removed in the future for
+ * better layering. */
+struct lov_stripe_md *lov_lsm_get(struct cl_object *clobj);
+void lov_lsm_put(struct cl_object *clobj, struct lov_stripe_md *lsm);
+int lov_read_and_clear_async_rc(struct cl_object *clob);
+
+struct lov_stripe_md *ccc_inode_lsm_get(struct inode *inode);
+void ccc_inode_lsm_put(struct inode *inode, struct lov_stripe_md *lsm);
+
+/**
+ * Data structure managing a client's cached clean pages. An LRU of
+ * pages is maintained, along with other statistics.
+ */
+struct cl_client_cache {
+	atomic_t	ccc_users;    /* # of users (OSCs) of this data */
+	struct list_head	ccc_lru;      /* LRU list of cached clean pages */
+	spinlock_t	ccc_lru_lock; /* lock for list */
+	atomic_t	ccc_lru_left; /* # of LRU entries available */
+	unsigned long	ccc_lru_max;  /* Max # of LRU entries possible */
+	unsigned int	ccc_lru_shrinkers; /* # of threads reclaiming */
+};
+
+#endif /*LCLIENT_H */
diff --git a/drivers/staging/lustre/lustre/include/linux/lprocfs_status.h b/drivers/staging/lustre/lustre/include/linux/lprocfs_status.h
new file mode 100644
index 000000000000..586692272d78
--- /dev/null
+++ b/drivers/staging/lustre/lustre/include/linux/lprocfs_status.h
@@ -0,0 +1,58 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lustre/include/linux/lprocfs_status.h
+ *
+ * Top level header file for LProc SNMP
+ *
+ * Author: Hariharan Thantry thantry@users.sourceforge.net
+ */
+#ifndef _LINUX_LPROCFS_SNMP_H
+#define _LINUX_LPROCFS_SNMP_H
+
+#ifndef _LPROCFS_SNMP_H
+#error Do not #include this file directly. #include <lprocfs_status.h> instead
+#endif
+
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <linux/version.h>
+#include <linux/smp.h>
+#include <linux/rwsem.h>
+#include <linux/libcfs/libcfs.h>
+#include <linux/statfs.h>
+
+
+#endif /* LPROCFS_SNMP_H */
diff --git a/drivers/staging/lustre/lustre/include/linux/lustre_acl.h b/drivers/staging/lustre/lustre/include/linux/lustre_acl.h
new file mode 100644
index 000000000000..ff4fc4ff2894
--- /dev/null
+++ b/drivers/staging/lustre/lustre/include/linux/lustre_acl.h
@@ -0,0 +1,66 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ */
+/*
+ * Copyright (c) 2011, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lustre/lustre/include/lustre_acl.h
+ *
+ * MDS data structures.
+ * See also lustre_idl.h for wire formats of requests.
+ */
+
+#ifndef _LUSTRE_LINUX_ACL_H
+#define _LUSTRE_LINUX_ACL_H
+
+#ifndef	_LUSTRE_ACL_H
+#error	Shoud not include direectly. use #include <lustre_acl.h> instead
+#endif
+
+# include <linux/fs.h>
+# include <linux/dcache.h>
+# ifdef CONFIG_FS_POSIX_ACL
+#  include <linux/posix_acl_xattr.h>
+#  define LUSTRE_POSIX_ACL_MAX_ENTRIES	32
+#  define LUSTRE_POSIX_ACL_MAX_SIZE					\
+	(sizeof(posix_acl_xattr_header) +				\
+	 LUSTRE_POSIX_ACL_MAX_ENTRIES * sizeof(posix_acl_xattr_entry))
+# endif /* CONFIG_FS_POSIX_ACL */
+# include <linux/lustre_intent.h>
+# include <linux/xattr.h> /* XATTR_{REPLACE,CREATE} */
+
+#ifndef LUSTRE_POSIX_ACL_MAX_SIZE
+# define LUSTRE_POSIX_ACL_MAX_SIZE   0
+#endif
+
+#endif /* _LUSTRE_LINUX_ACL_H */
diff --git a/drivers/staging/lustre/lustre/include/linux/lustre_common.h b/drivers/staging/lustre/lustre/include/linux/lustre_common.h
new file mode 100644
index 000000000000..d1783a33d8ca
--- /dev/null
+++ b/drivers/staging/lustre/lustre/include/linux/lustre_common.h
@@ -0,0 +1,22 @@
+#ifndef LUSTRE_COMMON_H
+#define LUSTRE_COMMON_H
+
+#include <linux/sched.h>
+
+static inline int cfs_cleanup_group_info(void)
+{
+	struct group_info *ginfo;
+
+	ginfo = groups_alloc(0);
+	if (!ginfo)
+		return -ENOMEM;
+
+	set_current_groups(ginfo);
+	put_group_info(ginfo);
+
+	return 0;
+}
+
+#define ll_inode_blksize(a)		(1<<(a)->i_blkbits)
+
+#endif
diff --git a/drivers/staging/lustre/lustre/include/linux/lustre_compat25.h b/drivers/staging/lustre/lustre/include/linux/lustre_compat25.h
new file mode 100644
index 000000000000..dff04688945b
--- /dev/null
+++ b/drivers/staging/lustre/lustre/include/linux/lustre_compat25.h
@@ -0,0 +1,349 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ */
+
+#ifndef _LINUX_COMPAT25_H
+#define _LINUX_COMPAT25_H
+
+#include <linux/fs_struct.h>
+#include <linux/namei.h>
+#include <linux/libcfs/linux/portals_compat25.h>
+
+#include <linux/lustre_patchless_compat.h>
+
+# define LOCK_FS_STRUCT(fs)	spin_lock(&(fs)->lock)
+# define UNLOCK_FS_STRUCT(fs)	spin_unlock(&(fs)->lock)
+
+static inline void ll_set_fs_pwd(struct fs_struct *fs, struct vfsmount *mnt,
+				 struct dentry *dentry)
+{
+	struct path path;
+	struct path old_pwd;
+
+	path.mnt = mnt;
+	path.dentry = dentry;
+	LOCK_FS_STRUCT(fs);
+	old_pwd = fs->pwd;
+	path_get(&path);
+	fs->pwd = path;
+	UNLOCK_FS_STRUCT(fs);
+
+	if (old_pwd.dentry)
+		path_put(&old_pwd);
+}
+
+
+/*
+ * set ATTR_BLOCKS to a high value to avoid any risk of collision with other
+ * ATTR_* attributes (see bug 13828)
+ */
+#define ATTR_BLOCKS    (1 << 27)
+
+#define current_ngroups current_cred()->group_info->ngroups
+#define current_groups current_cred()->group_info->small_block
+
+/*
+ * OBD need working random driver, thus all our
+ * initialization routines must be called after device
+ * driver initialization
+ */
+#ifndef MODULE
+#undef module_init
+#define module_init(a)     late_initcall(a)
+#endif
+
+
+#define LTIME_S(time)		   (time.tv_sec)
+
+#define ll_permission(inode,mask,nd)    inode_permission(inode,mask)
+
+# define ll_generic_permission(inode, mask, flags, check_acl) \
+	 generic_permission(inode, mask)
+
+#define ll_blkdev_put(a, b) blkdev_put(a, b)
+
+#define ll_dentry_open(a,b,c)	dentry_open(a,b,c)
+
+#define ll_vfs_symlink(dir, dentry, mnt, path, mode) \
+		       vfs_symlink(dir, dentry, path)
+
+
+#define ll_generic_file_llseek_size(file, offset, origin, maxbytes, eof) \
+		generic_file_llseek_size(file, offset, origin, maxbytes, eof);
+
+/* inode_dio_wait(i) use as-is for write lock */
+# define inode_dio_write_done(i)	do {} while (0) /* for write unlock */
+# define inode_dio_read(i)		atomic_inc(&(i)->i_dio_count)
+/* inode_dio_done(i) use as-is for read unlock */
+
+#define TREE_READ_LOCK_IRQ(mapping)	spin_lock_irq(&(mapping)->tree_lock)
+#define TREE_READ_UNLOCK_IRQ(mapping)	spin_unlock_irq(&(mapping)->tree_lock)
+
+static inline
+int ll_unregister_blkdev(unsigned int dev, const char *name)
+{
+	unregister_blkdev(dev, name);
+	return 0;
+}
+
+#define ll_invalidate_bdev(a,b)	 invalidate_bdev((a))
+
+#ifndef FS_HAS_FIEMAP
+#define FS_HAS_FIEMAP			(0)
+#endif
+
+
+
+/* add a lustre compatible layer for crypto API */
+#include <linux/crypto.h>
+#define ll_crypto_hash	  crypto_hash
+#define ll_crypto_cipher	crypto_blkcipher
+#define ll_crypto_alloc_hash(name, type, mask)  crypto_alloc_hash(name, type, mask)
+#define ll_crypto_hash_setkey(tfm, key, keylen) crypto_hash_setkey(tfm, key, keylen)
+#define ll_crypto_hash_init(desc)	       crypto_hash_init(desc)
+#define ll_crypto_hash_update(desc, sl, bytes)  crypto_hash_update(desc, sl, bytes)
+#define ll_crypto_hash_final(desc, out)	 crypto_hash_final(desc, out)
+#define ll_crypto_blkcipher_setkey(tfm, key, keylen) \
+		crypto_blkcipher_setkey(tfm, key, keylen)
+#define ll_crypto_blkcipher_set_iv(tfm, src, len) \
+		crypto_blkcipher_set_iv(tfm, src, len)
+#define ll_crypto_blkcipher_get_iv(tfm, dst, len) \
+		crypto_blkcipher_get_iv(tfm, dst, len)
+#define ll_crypto_blkcipher_encrypt(desc, dst, src, bytes) \
+		crypto_blkcipher_encrypt(desc, dst, src, bytes)
+#define ll_crypto_blkcipher_decrypt(desc, dst, src, bytes) \
+		crypto_blkcipher_decrypt(desc, dst, src, bytes)
+#define ll_crypto_blkcipher_encrypt_iv(desc, dst, src, bytes) \
+		crypto_blkcipher_encrypt_iv(desc, dst, src, bytes)
+#define ll_crypto_blkcipher_decrypt_iv(desc, dst, src, bytes) \
+		crypto_blkcipher_decrypt_iv(desc, dst, src, bytes)
+
+static inline
+struct ll_crypto_cipher *ll_crypto_alloc_blkcipher(const char *name,
+						   u32 type, u32 mask)
+{
+	struct ll_crypto_cipher *rtn = crypto_alloc_blkcipher(name, type, mask);
+
+	return (rtn == NULL ? ERR_PTR(-ENOMEM) : rtn);
+}
+
+static inline int ll_crypto_hmac(struct ll_crypto_hash *tfm,
+				 u8 *key, unsigned int *keylen,
+				 struct scatterlist *sg,
+				 unsigned int size, u8 *result)
+{
+	struct hash_desc desc;
+	int	      rv;
+	desc.tfm   = tfm;
+	desc.flags = 0;
+	rv = crypto_hash_setkey(desc.tfm, key, *keylen);
+	if (rv) {
+		CERROR("failed to hash setkey: %d\n", rv);
+		return rv;
+	}
+	return crypto_hash_digest(&desc, sg, size, result);
+}
+static inline
+unsigned int ll_crypto_tfm_alg_max_keysize(struct crypto_blkcipher *tfm)
+{
+	return crypto_blkcipher_tfm(tfm)->__crt_alg->cra_blkcipher.max_keysize;
+}
+static inline
+unsigned int ll_crypto_tfm_alg_min_keysize(struct crypto_blkcipher *tfm)
+{
+	return crypto_blkcipher_tfm(tfm)->__crt_alg->cra_blkcipher.min_keysize;
+}
+
+#define ll_crypto_hash_blocksize(tfm)       crypto_hash_blocksize(tfm)
+#define ll_crypto_hash_digestsize(tfm)      crypto_hash_digestsize(tfm)
+#define ll_crypto_blkcipher_ivsize(tfm)     crypto_blkcipher_ivsize(tfm)
+#define ll_crypto_blkcipher_blocksize(tfm)  crypto_blkcipher_blocksize(tfm)
+#define ll_crypto_free_hash(tfm)	    crypto_free_hash(tfm)
+#define ll_crypto_free_blkcipher(tfm)       crypto_free_blkcipher(tfm)
+
+#define ll_vfs_rmdir(dir,entry,mnt)	     vfs_rmdir(dir,entry)
+#define ll_vfs_mkdir(inode,dir,mnt,mode)	vfs_mkdir(inode,dir,mode)
+#define ll_vfs_link(old,mnt,dir,new,mnt1)       vfs_link(old,dir,new)
+#define ll_vfs_unlink(inode,entry,mnt)	  vfs_unlink(inode,entry)
+#define ll_vfs_mknod(dir,entry,mnt,mode,dev)    vfs_mknod(dir,entry,mode,dev)
+#define ll_security_inode_unlink(dir,entry,mnt) security_inode_unlink(dir,entry)
+#define ll_vfs_rename(old,old_dir,mnt,new,new_dir,mnt1) \
+		vfs_rename(old,old_dir,new,new_dir)
+
+#ifdef for_each_possible_cpu
+#define cfs_for_each_possible_cpu(cpu) for_each_possible_cpu(cpu)
+#elif defined(for_each_cpu)
+#define cfs_for_each_possible_cpu(cpu) for_each_cpu(cpu)
+#endif
+
+#define cfs_bio_io_error(a,b)   bio_io_error((a))
+#define cfs_bio_endio(a,b,c)    bio_endio((a),(c))
+
+#define cfs_fs_pwd(fs)       ((fs)->pwd.dentry)
+#define cfs_fs_mnt(fs)       ((fs)->pwd.mnt)
+#define cfs_path_put(nd)     path_put(&(nd)->path)
+
+
+#ifndef SLAB_DESTROY_BY_RCU
+#define SLAB_DESTROY_BY_RCU 0
+#endif
+
+
+
+static inline int
+ll_quota_on(struct super_block *sb, int off, int ver, char *name, int remount)
+{
+	int rc;
+
+	if (sb->s_qcop->quota_on) {
+		struct path path;
+
+		rc = kern_path(name, LOOKUP_FOLLOW, &path);
+		if (!rc)
+			return rc;
+		rc = sb->s_qcop->quota_on(sb, off, ver
+					    , &path
+					   );
+		path_put(&path);
+		return rc;
+	}
+	else
+		return -ENOSYS;
+}
+
+static inline int ll_quota_off(struct super_block *sb, int off, int remount)
+{
+	if (sb->s_qcop->quota_off) {
+		return sb->s_qcop->quota_off(sb, off
+					    );
+	}
+	else
+		return -ENOSYS;
+}
+
+
+# define ll_vfs_dq_init	     dquot_initialize
+# define ll_vfs_dq_drop	     dquot_drop
+# define ll_vfs_dq_transfer	 dquot_transfer
+# define ll_vfs_dq_off(sb, remount) dquot_suspend(sb, -1)
+
+
+
+
+
+#define queue_max_phys_segments(rq)       queue_max_segments(rq)
+#define queue_max_hw_segments(rq)	 queue_max_segments(rq)
+
+#define ll_kmap_atomic(a, b)	kmap_atomic(a)
+#define ll_kunmap_atomic(a, b)	kunmap_atomic(a)
+
+
+#define ll_d_hlist_node hlist_node
+#define ll_d_hlist_empty(list) hlist_empty(list)
+#define ll_d_hlist_entry(ptr, type, name) hlist_entry(ptr.first, type, name)
+#define ll_d_hlist_for_each(tmp, i_dentry) hlist_for_each(tmp, i_dentry)
+#define ll_d_hlist_for_each_entry(dentry, p, i_dentry, alias) \
+	p = NULL; hlist_for_each_entry(dentry, i_dentry, alias)
+
+
+#define bio_hw_segments(q, bio) 0
+
+
+#define ll_pagevec_init(pv, cold)       do {} while (0)
+#define ll_pagevec_add(pv, pg)	  (0)
+#define ll_pagevec_lru_add_file(pv)     do {} while (0)
+
+
+#ifndef QUOTA_OK
+# define QUOTA_OK 0
+#endif
+#ifndef NO_QUOTA
+# define NO_QUOTA (-EDQUOT)
+#endif
+
+#ifndef SEEK_DATA
+#define SEEK_DATA      3       /* seek to the next data */
+#endif
+#ifndef SEEK_HOLE
+#define SEEK_HOLE      4       /* seek to the next hole */
+#endif
+
+#ifndef FMODE_UNSIGNED_OFFSET
+#define FMODE_UNSIGNED_OFFSET	((__force fmode_t)0x2000)
+#endif
+
+#if !defined(_ASM_GENERIC_BITOPS_EXT2_NON_ATOMIC_H_) && !defined(ext2_set_bit)
+# define ext2_set_bit	     __test_and_set_bit_le
+# define ext2_clear_bit	   __test_and_clear_bit_le
+# define ext2_test_bit	    test_bit_le
+# define ext2_find_first_zero_bit find_first_zero_bit_le
+# define ext2_find_next_zero_bit  find_next_zero_bit_le
+#endif
+
+#ifdef ATTR_TIMES_SET
+# define TIMES_SET_FLAGS (ATTR_MTIME_SET | ATTR_ATIME_SET | ATTR_TIMES_SET)
+#else
+# define TIMES_SET_FLAGS (ATTR_MTIME_SET | ATTR_ATIME_SET)
+#endif
+
+
+
+/*
+ * After 3.1, kernel's nameidata.intent.open.flags is different
+ * with lustre's lookup_intent.it_flags, as lustre's it_flags'
+ * lower bits equal to FMODE_xxx while kernel doesn't transliterate
+ * lower bits of nameidata.intent.open.flags to FMODE_xxx.
+ * */
+#include <linux/version.h>
+static inline int ll_namei_to_lookup_intent_flag(int flag)
+{
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 1, 0)
+	flag = (flag & ~O_ACCMODE) | OPEN_FMODE(flag);
+#endif
+	return flag;
+}
+
+# define ll_mrf_ret void
+# define LL_MRF_RETURN(rc)
+
+#include <linux/fs.h>
+
+# define ll_umode_t	umode_t
+
+#include <linux/dcache.h>
+
+# define ll_dirty_inode(inode, flag)	(inode)->i_sb->s_op->dirty_inode((inode), flag)
+
+#endif /* _COMPAT25_H */
diff --git a/drivers/staging/lustre/lustre/include/linux/lustre_debug.h b/drivers/staging/lustre/lustre/include/linux/lustre_debug.h
new file mode 100644
index 000000000000..11deac7248ae
--- /dev/null
+++ b/drivers/staging/lustre/lustre/include/linux/lustre_debug.h
@@ -0,0 +1,47 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ */
+
+#ifndef _LINUX_LUSTRE_DEBUG_H
+#define _LINUX_LUSTRE_DEBUG_H
+
+#ifndef _LUSTRE_DEBUG_H
+#error Do not #include this file directly. #include <lprocfs_status.h> instead
+#endif
+
+#define LL_CDEBUG_PAGE(mask, page, fmt, arg...)			       \
+	CDEBUG(mask, "page %p map %p index %lu flags %lx count %u priv %0lx: "\
+	       fmt, page, page->mapping, page->index, (long)page->flags,      \
+	       page_count(page), page_private(page), ## arg)
+
+#endif
diff --git a/drivers/staging/lustre/lustre/include/linux/lustre_dlm.h b/drivers/staging/lustre/lustre/include/linux/lustre_dlm.h
new file mode 100644
index 000000000000..207df03f6149
--- /dev/null
+++ b/drivers/staging/lustre/lustre/include/linux/lustre_dlm.h
@@ -0,0 +1,46 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ */
+
+#ifndef _LINUX_LUSTRE_DLM_H__
+#define _LINUX_LUSTRE_DLM_H__
+
+#ifndef _LUSTRE_DLM_H__
+#error Do not #include this file directly. #include <lprocfs_status.h> instead
+#endif
+
+# include <linux/proc_fs.h>
+#  include <asm/processor.h>
+#  include <linux/bit_spinlock.h>
+
+#endif
diff --git a/drivers/staging/lustre/lustre/include/linux/lustre_fsfilt.h b/drivers/staging/lustre/lustre/include/linux/lustre_fsfilt.h
new file mode 100644
index 000000000000..6c7260957383
--- /dev/null
+++ b/drivers/staging/lustre/lustre/include/linux/lustre_fsfilt.h
@@ -0,0 +1,181 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lustre/include/linux/lustre_fsfilt.h
+ *
+ * Filesystem interface helper.
+ */
+
+#ifndef _LINUX_LUSTRE_FSFILT_H
+#define _LINUX_LUSTRE_FSFILT_H
+
+#ifndef _LUSTRE_FSFILT_H
+#error Do not #include this file directly. #include <lustre_fsfilt.h> instead
+#endif
+
+
+#include <obd.h>
+#include <obd_class.h>
+
+typedef void (*fsfilt_cb_t)(struct obd_device *obd, __u64 last_rcvd,
+			    void *data, int error);
+
+struct fsfilt_operations {
+	struct list_head fs_list;
+	module_t *fs_owner;
+	char   *fs_type;
+	char   *(* fs_getlabel)(struct super_block *sb);
+	void   *(* fs_start)(struct inode *inode, int op, void *desc_private,
+			     int logs);
+	int     (* fs_commit)(struct inode *inode, void *handle,int force_sync);
+	int     (* fs_map_inode_pages)(struct inode *inode, struct page **page,
+				       int pages, unsigned long *blocks,
+				       int create, struct mutex *sem);
+	int     (* fs_write_record)(struct file *, void *, int size, loff_t *,
+				    int force_sync);
+	int     (* fs_read_record)(struct file *, void *, int size, loff_t *);
+	int     (* fs_setup)(struct super_block *sb);
+};
+
+extern int fsfilt_register_ops(struct fsfilt_operations *fs_ops);
+extern void fsfilt_unregister_ops(struct fsfilt_operations *fs_ops);
+extern struct fsfilt_operations *fsfilt_get_ops(const char *type);
+extern void fsfilt_put_ops(struct fsfilt_operations *fs_ops);
+
+static inline char *fsfilt_get_label(struct obd_device *obd,
+				     struct super_block *sb)
+{
+	if (obd->obd_fsops->fs_getlabel == NULL)
+		return NULL;
+	if (obd->obd_fsops->fs_getlabel(sb)[0] == '\0')
+		return NULL;
+
+	return obd->obd_fsops->fs_getlabel(sb);
+}
+
+#define FSFILT_OP_UNLINK		1
+#define FSFILT_OP_CANCEL_UNLINK	 10
+
+#define __fsfilt_check_slow(obd, start, msg)			      \
+do {								      \
+	if (cfs_time_before(jiffies, start + 15 * HZ))		\
+		break;						    \
+	else if (cfs_time_before(jiffies, start + 30 * HZ))	   \
+		CDEBUG(D_VFSTRACE, "%s: slow %s %lus\n", obd->obd_name,   \
+		       msg, (jiffies-start) / HZ);		    \
+	else if (cfs_time_before(jiffies, start + DISK_TIMEOUT * HZ)) \
+		CWARN("%s: slow %s %lus\n", obd->obd_name, msg,	   \
+		      (jiffies - start) / HZ);			\
+	else							      \
+		CERROR("%s: slow %s %lus\n", obd->obd_name, msg,	  \
+		       (jiffies - start) / HZ);		       \
+} while (0)
+
+#define fsfilt_check_slow(obd, start, msg)	      \
+do {						    \
+	__fsfilt_check_slow(obd, start, msg);	   \
+	start = jiffies;				\
+} while (0)
+
+static inline void *fsfilt_start_log(struct obd_device *obd,
+				     struct inode *inode, int op,
+				     struct obd_trans_info *oti, int logs)
+{
+	unsigned long now = jiffies;
+	void *parent_handle = oti ? oti->oti_handle : NULL;
+	void *handle;
+
+	handle = obd->obd_fsops->fs_start(inode, op, parent_handle, logs);
+	CDEBUG(D_INFO, "started handle %p (%p)\n", handle, parent_handle);
+
+	if (oti != NULL) {
+		if (parent_handle == NULL) {
+			oti->oti_handle = handle;
+		} else if (handle != parent_handle) {
+			CERROR("mismatch: parent %p, handle %p, oti %p\n",
+			       parent_handle, handle, oti);
+			LBUG();
+		}
+	}
+	fsfilt_check_slow(obd, now, "journal start");
+	return handle;
+}
+
+static inline int fsfilt_commit(struct obd_device *obd, struct inode *inode,
+				void *handle, int force_sync)
+{
+	unsigned long now = jiffies;
+	int rc = obd->obd_fsops->fs_commit(inode, handle, force_sync);
+	CDEBUG(D_INFO, "committing handle %p\n", handle);
+
+	fsfilt_check_slow(obd, now, "journal start");
+
+	return rc;
+}
+
+static inline int fsfilt_map_inode_pages(struct obd_device *obd,
+					 struct inode *inode,
+					 struct page **page, int pages,
+					 unsigned long *blocks,
+					 int create, struct mutex *mutex)
+{
+	return obd->obd_fsops->fs_map_inode_pages(inode, page, pages, blocks,
+						  create, mutex);
+}
+
+static inline int fsfilt_read_record(struct obd_device *obd, struct file *file,
+				     void *buf, loff_t size, loff_t *offs)
+{
+	return obd->obd_fsops->fs_read_record(file, buf, size, offs);
+}
+
+static inline int fsfilt_write_record(struct obd_device *obd, struct file *file,
+				      void *buf, loff_t size, loff_t *offs,
+				      int force_sync)
+{
+	return obd->obd_fsops->fs_write_record(file, buf, size,offs,force_sync);
+}
+
+static inline int fsfilt_setup(struct obd_device *obd, struct super_block *fs)
+{
+	if (obd->obd_fsops->fs_setup)
+		return obd->obd_fsops->fs_setup(fs);
+	return 0;
+}
+
+
+
+
+#endif
diff --git a/drivers/staging/lustre/lustre/include/linux/lustre_handles.h b/drivers/staging/lustre/lustre/include/linux/lustre_handles.h
new file mode 100644
index 000000000000..ecf184051252
--- /dev/null
+++ b/drivers/staging/lustre/lustre/include/linux/lustre_handles.h
@@ -0,0 +1,53 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ */
+
+#ifndef __LINUX_LUSTRE_HANDLES_H_
+#define __LINUX_LUSTRE_HANDLES_H_
+
+#ifndef __LUSTRE_HANDLES_H_
+#error Do not #include this file directly. #include <lustre_handles.h> instead
+#endif
+
+#include <asm/types.h>
+#include <asm/atomic.h>
+#include <linux/list.h>
+#include <linux/version.h>
+#include <linux/spinlock.h>
+#include <linux/types.h>
+
+#include <linux/rcupdate.h> /* for rcu_head{} */
+typedef struct rcu_head cfs_rcu_head_t;
+
+
+#endif
diff --git a/drivers/staging/lustre/lustre/include/linux/lustre_intent.h b/drivers/staging/lustre/lustre/include/linux/lustre_intent.h
new file mode 100644
index 000000000000..b10ddfa7df29
--- /dev/null
+++ b/drivers/staging/lustre/lustre/include/linux/lustre_intent.h
@@ -0,0 +1,62 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ */
+
+#ifndef LUSTRE_INTENT_H
+#define LUSTRE_INTENT_H
+
+/* intent IT_XXX are defined in lustre/include/obd.h */
+struct lustre_intent_data {
+	int		it_disposition;
+	int		it_status;
+	__u64		it_lock_handle;
+	__u64		it_lock_bits;
+	int		it_lock_mode;
+	int		it_remote_lock_mode;
+	__u64	   it_remote_lock_handle;
+	void	   *it_data;
+	unsigned int    it_lock_set:1;
+};
+
+struct lookup_intent {
+	int     it_op;
+	int     it_flags;
+	int     it_create_mode;
+	union {
+		struct lustre_intent_data lustre;
+	} d;
+};
+
+#endif
diff --git a/drivers/staging/lustre/lustre/include/linux/lustre_lib.h b/drivers/staging/lustre/lustre/include/linux/lustre_lib.h
new file mode 100644
index 000000000000..b2f755acadf6
--- /dev/null
+++ b/drivers/staging/lustre/lustre/include/linux/lustre_lib.h
@@ -0,0 +1,87 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lustre/include/linux/lustre_lib.h
+ *
+ * Basic Lustre library routines.
+ */
+
+#ifndef _LINUX_LUSTRE_LIB_H
+#define _LINUX_LUSTRE_LIB_H
+
+#ifndef _LUSTRE_LIB_H
+#error Do not #include this file directly. #include <lustre_lib.h> instead
+#endif
+
+# include <linux/rwsem.h>
+# include <linux/sched.h>
+# include <linux/signal.h>
+# include <linux/types.h>
+# include <linux/lustre_compat25.h>
+# include <linux/lustre_common.h>
+
+#ifndef LP_POISON
+#if BITS_PER_LONG > 32
+# define LI_POISON ((int)0x5a5a5a5a5a5a5a5a)
+# define LL_POISON ((long)0x5a5a5a5a5a5a5a5a)
+# define LP_POISON ((void *)(long)0x5a5a5a5a5a5a5a5a)
+#else
+# define LI_POISON ((int)0x5a5a5a5a)
+# define LL_POISON ((long)0x5a5a5a5a)
+# define LP_POISON ((void *)(long)0x5a5a5a5a)
+#endif
+#endif
+
+/* This macro is only for compatibility reasons with older Linux Lustre user
+ * tools. New ioctls should NOT use this macro as the ioctl "size". Instead
+ * the ioctl should get a "size" argument which is the actual data type used
+ * by the ioctl, to ensure the ioctl interface is versioned correctly. */
+#define OBD_IOC_DATA_TYPE	       long
+
+#define LUSTRE_FATAL_SIGS (sigmask(SIGKILL) | sigmask(SIGINT) |		\
+			   sigmask(SIGTERM) | sigmask(SIGQUIT) |	       \
+			   sigmask(SIGALRM))
+
+/* initialize ost_lvb according to inode */
+static inline void inode_init_lvb(struct inode *inode, struct ost_lvb *lvb)
+{
+	lvb->lvb_size = i_size_read(inode);
+	lvb->lvb_blocks = inode->i_blocks;
+	lvb->lvb_mtime = LTIME_S(inode->i_mtime);
+	lvb->lvb_atime = LTIME_S(inode->i_atime);
+	lvb->lvb_ctime = LTIME_S(inode->i_ctime);
+}
+
+#endif /* _LUSTRE_LIB_H */
diff --git a/drivers/staging/lustre/lustre/include/linux/lustre_lite.h b/drivers/staging/lustre/lustre/include/linux/lustre_lite.h
new file mode 100644
index 000000000000..c95dff900b58
--- /dev/null
+++ b/drivers/staging/lustre/lustre/include/linux/lustre_lite.h
@@ -0,0 +1,100 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ */
+
+#ifndef _LINUX_LL_H
+#define _LINUX_LL_H
+
+#ifndef _LL_H
+#error Do not #include this file directly. #include <lustre_lite.h> instead
+#endif
+
+
+#include <linux/version.h>
+
+#include <asm/statfs.h>
+
+#include <linux/fs.h>
+#include <linux/dcache.h>
+#include <linux/proc_fs.h>
+
+#include <obd_class.h>
+#include <lustre_net.h>
+#include <lustre_ha.h>
+
+#include <linux/rbtree.h>
+#include <linux/lustre_compat25.h>
+#include <linux/lustre_common.h>
+#include <linux/pagemap.h>
+
+/* lprocfs.c */
+enum {
+	 LPROC_LL_DIRTY_HITS = 0,
+	 LPROC_LL_DIRTY_MISSES,
+	 LPROC_LL_READ_BYTES,
+	 LPROC_LL_WRITE_BYTES,
+	 LPROC_LL_BRW_READ,
+	 LPROC_LL_BRW_WRITE,
+	 LPROC_LL_OSC_READ,
+	 LPROC_LL_OSC_WRITE,
+	 LPROC_LL_IOCTL,
+	 LPROC_LL_OPEN,
+	 LPROC_LL_RELEASE,
+	 LPROC_LL_MAP,
+	 LPROC_LL_LLSEEK,
+	 LPROC_LL_FSYNC,
+	 LPROC_LL_READDIR,
+	 LPROC_LL_SETATTR,
+	 LPROC_LL_TRUNC,
+	 LPROC_LL_FLOCK,
+	 LPROC_LL_GETATTR,
+	 LPROC_LL_CREATE,
+	 LPROC_LL_LINK,
+	 LPROC_LL_UNLINK,
+	 LPROC_LL_SYMLINK,
+	 LPROC_LL_MKDIR,
+	 LPROC_LL_RMDIR,
+	 LPROC_LL_MKNOD,
+	 LPROC_LL_RENAME,
+	 LPROC_LL_STAFS,
+	 LPROC_LL_ALLOC_INODE,
+	 LPROC_LL_SETXATTR,
+	 LPROC_LL_GETXATTR,
+	 LPROC_LL_LISTXATTR,
+	 LPROC_LL_REMOVEXATTR,
+	 LPROC_LL_INODE_PERM,
+	 LPROC_LL_FILE_OPCODES
+};
+
+
+#endif
diff --git a/drivers/staging/lustre/lustre/include/linux/lustre_log.h b/drivers/staging/lustre/lustre/include/linux/lustre_log.h
new file mode 100644
index 000000000000..e9c8e56737d2
--- /dev/null
+++ b/drivers/staging/lustre/lustre/include/linux/lustre_log.h
@@ -0,0 +1,57 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lustre/include/linux/lustre_log.h
+ *
+ * Generic infrastructure for managing a collection of logs.
+ * These logs are used for:
+ *  - orphan recovery: OST adds record on create
+ *  - mtime/size consistency: the OST adds a record on first write
+ *  - open/unlinked objects: OST adds a record on destroy
+ *
+ *  - mds unlink log: the MDS adds an entry upon delete
+ *
+ *  - raid1 replication log between OST's
+ *  - MDS replication logs
+ */
+
+#ifndef _LINUX_LUSTRE_LOG_H
+#define _LINUX_LUSTRE_LOG_H
+
+#ifndef _LUSTRE_LOG_H
+#error Do not #include this file directly. #include <lustre_log.h> instead
+#endif
+
+#define LUSTRE_LOG_SERVER
+
+#endif
diff --git a/drivers/staging/lustre/lustre/include/linux/lustre_net.h b/drivers/staging/lustre/lustre/include/linux/lustre_net.h
new file mode 100644
index 000000000000..2d7c425d7012
--- /dev/null
+++ b/drivers/staging/lustre/lustre/include/linux/lustre_net.h
@@ -0,0 +1,50 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ */
+
+#ifndef _LINUX_LUSTRE_NET_H
+#define _LINUX_LUSTRE_NET_H
+
+#ifndef _LUSTRE_NET_H
+#error Do not #include this file directly. #include <lustre_net.h> instead
+#endif
+
+#include <linux/version.h>
+#include <linux/workqueue.h>
+
+/* XXX Liang: should be moved to other header instead of here */
+#ifndef WITH_GROUP_INFO
+#define WITH_GROUP_INFO
+#endif
+
+#endif
diff --git a/drivers/staging/lustre/lustre/include/linux/lustre_patchless_compat.h b/drivers/staging/lustre/lustre/include/linux/lustre_patchless_compat.h
new file mode 100644
index 000000000000..a8e9c0c8ffd2
--- /dev/null
+++ b/drivers/staging/lustre/lustre/include/linux/lustre_patchless_compat.h
@@ -0,0 +1,83 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ */
+
+#ifndef LUSTRE_PATCHLESS_COMPAT_H
+#define LUSTRE_PATCHLESS_COMPAT_H
+
+#include <linux/fs.h>
+
+#include <linux/list.h>
+#include <linux/mm.h>
+#include <linux/hash.h>
+
+
+#define ll_delete_from_page_cache(page) delete_from_page_cache(page)
+
+static inline void
+truncate_complete_page(struct address_space *mapping, struct page *page)
+{
+	if (page->mapping != mapping)
+		return;
+
+	if (PagePrivate(page))
+		page->mapping->a_ops->invalidatepage(page, 0);
+
+	cancel_dirty_page(page, PAGE_SIZE);
+	ClearPageMappedToDisk(page);
+	ll_delete_from_page_cache(page);
+}
+
+#ifdef ATTR_OPEN
+# define ATTR_FROM_OPEN ATTR_OPEN
+#else
+# ifndef ATTR_FROM_OPEN
+#  define ATTR_FROM_OPEN 0
+# endif
+#endif /* ATTR_OPEN */
+
+#ifndef ATTR_RAW
+#define ATTR_RAW 0
+#endif
+
+#ifndef ATTR_CTIME_SET
+/*
+ * set ATTR_CTIME_SET to a high value to avoid any risk of collision with other
+ * ATTR_* attributes (see bug 13828)
+ */
+#define ATTR_CTIME_SET (1 << 28)
+#endif
+
+#endif /* LUSTRE_PATCHLESS_COMPAT_H */
diff --git a/drivers/staging/lustre/lustre/include/linux/lustre_quota.h b/drivers/staging/lustre/lustre/include/linux/lustre_quota.h
new file mode 100644
index 000000000000..421866b004cf
--- /dev/null
+++ b/drivers/staging/lustre/lustre/include/linux/lustre_quota.h
@@ -0,0 +1,47 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ */
+
+#ifndef _LINUX_LUSTRE_QUOTA_H
+#define _LINUX_LUSTRE_QUOTA_H
+
+#ifndef _LUSTRE_QUOTA_H
+#error Do not #include this file directly. #include <lustre_quota.h> instead
+#endif
+
+#include <linux/version.h>
+#include <linux/fs.h>
+#include <linux/quota.h>
+#include <linux/quotaops.h>
+
+#endif /* _LUSTRE_QUOTA_H */
diff --git a/drivers/staging/lustre/lustre/include/linux/lustre_user.h b/drivers/staging/lustre/lustre/include/linux/lustre_user.h
new file mode 100644
index 000000000000..ebaf92977f7f
--- /dev/null
+++ b/drivers/staging/lustre/lustre/include/linux/lustre_user.h
@@ -0,0 +1,67 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lustre/include/linux/lustre_user.h
+ *
+ * Lustre public user-space interface definitions.
+ */
+
+#ifndef _LINUX_LUSTRE_USER_H
+#define _LINUX_LUSTRE_USER_H
+
+# include <linux/version.h>
+# include <linux/quota.h>
+
+/*
+ * asm-x86_64/processor.h on some SLES 9 distros seems to use
+ * kernel-only typedefs.  fortunately skipping it altogether is ok
+ * (for now).
+ */
+#define __ASM_X86_64_PROCESSOR_H
+
+#include <linux/string.h>
+
+#if defined(__x86_64__) || defined(__ia64__) || defined(__ppc64__) || \
+    defined(__craynv) || defined (__mips64__) || defined(__powerpc64__)
+typedef struct stat     lstat_t;
+#define lstat_f	 lstat
+#define HAVE_LOV_USER_MDS_DATA
+#else
+typedef struct stat64   lstat_t;
+#define lstat_f	 lstat64
+#define HAVE_LOV_USER_MDS_DATA
+#endif
+
+#endif /* _LUSTRE_USER_H */
diff --git a/drivers/staging/lustre/lustre/include/linux/lvfs.h b/drivers/staging/lustre/lustre/include/linux/lvfs.h
new file mode 100644
index 000000000000..eb59ac7d5946
--- /dev/null
+++ b/drivers/staging/lustre/lustre/include/linux/lvfs.h
@@ -0,0 +1,134 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lustre/include/linux/lvfs.h
+ *
+ * lustre VFS/process permission interface
+ */
+
+#ifndef __LINUX_LVFS_H__
+#define __LINUX_LVFS_H__
+
+#ifndef __LVFS_H__
+#error Do not #include this file directly. #include <lvfs.h> instead
+#endif
+
+#include <linux/lustre_compat25.h>
+#include <linux/lustre_common.h>
+#include <linux/lvfs_linux.h>
+
+#define LLOG_LVFS
+
+/* simple.c */
+
+struct lvfs_ucred {
+	__u32		   luc_uid;
+	__u32		   luc_gid;
+	__u32		   luc_fsuid;
+	__u32		   luc_fsgid;
+	kernel_cap_t	luc_cap;
+	__u32		   luc_umask;
+	struct group_info      *luc_ginfo;
+	struct md_identity     *luc_identity;
+};
+
+struct lvfs_callback_ops {
+	struct dentry *(*l_fid2dentry)(__u64 id_ino, __u32 gen, __u64 gr, void *data);
+};
+
+#define OBD_RUN_CTXT_MAGIC      0xC0FFEEAA
+#define OBD_CTXT_DEBUG	  /* development-only debugging */
+struct lvfs_run_ctxt {
+	struct vfsmount	 *pwdmnt;
+	struct dentry	   *pwd;
+	mm_segment_t	     fs;
+	struct lvfs_ucred	luc;
+	int		      ngroups;
+	struct lvfs_callback_ops cb_ops;
+	struct group_info       *group_info;
+	struct dt_device	*dt;
+#ifdef OBD_CTXT_DEBUG
+	__u32		    magic;
+#endif
+};
+
+#ifdef OBD_CTXT_DEBUG
+#define OBD_SET_CTXT_MAGIC(ctxt) (ctxt)->magic = OBD_RUN_CTXT_MAGIC
+#else
+#define OBD_SET_CTXT_MAGIC(ctxt) do {} while(0)
+#endif
+
+
+int lustre_rename(struct dentry *dir, struct vfsmount *mnt, char *oldname,
+		  char *newname);
+
+static inline void l_dput(struct dentry *de)
+{
+	if (!de || IS_ERR(de))
+		return;
+	//shrink_dcache_parent(de);
+	LASSERT(d_count(de) > 0);
+	dput(de);
+}
+
+/* We need to hold the inode semaphore over the dcache lookup itself, or we
+ * run the risk of entering the filesystem lookup path concurrently on SMP
+ * systems, and instantiating two inodes for the same entry.  We still
+ * protect against concurrent addition/removal races with the DLM locking.
+ */
+static inline struct dentry *ll_lookup_one_len(const char *fid_name,
+					       struct dentry *dparent,
+					       int fid_namelen)
+{
+	struct dentry *dchild;
+
+	mutex_lock(&dparent->d_inode->i_mutex);
+	dchild = lookup_one_len(fid_name, dparent, fid_namelen);
+	mutex_unlock(&dparent->d_inode->i_mutex);
+
+	if (IS_ERR(dchild) || dchild->d_inode == NULL)
+		return dchild;
+
+	if (is_bad_inode(dchild->d_inode)) {
+		CERROR("bad inode returned %lu/%u\n",
+		       dchild->d_inode->i_ino, dchild->d_inode->i_generation);
+		dput(dchild);
+		dchild = ERR_PTR(-ENOENT);
+	}
+	return dchild;
+}
+
+
+#endif
diff --git a/drivers/staging/lustre/lustre/include/linux/lvfs_linux.h b/drivers/staging/lustre/lustre/include/linux/lvfs_linux.h
new file mode 100644
index 000000000000..140a60f1f0c9
--- /dev/null
+++ b/drivers/staging/lustre/lustre/include/linux/lvfs_linux.h
@@ -0,0 +1,66 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ */
+
+#ifndef __LVFS_LINUX_H__
+#define __LVFS_LINUX_H__
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/fs.h>
+#include <linux/namei.h>
+#include <linux/sched.h>
+
+#include <lvfs.h>
+
+#define l_file file
+#define l_dentry dentry
+
+#define l_filp_open filp_open
+
+struct lvfs_run_ctxt;
+struct l_file *l_dentry_open(struct lvfs_run_ctxt *, struct l_dentry *,
+			     int flags);
+
+struct l_linux_dirent {
+	struct list_head      lld_list;
+	ino_t	   lld_ino;
+	unsigned long   lld_off;
+	char	    lld_name[LL_FID_NAMELEN];
+};
+struct l_readdir_callback {
+	struct l_linux_dirent *lrc_dirent;
+	struct list_head	    *lrc_list;
+};
+
+#endif /*  __LVFS_LINUX_H__ */
diff --git a/drivers/staging/lustre/lustre/include/linux/obd.h b/drivers/staging/lustre/lustre/include/linux/obd.h
new file mode 100644
index 000000000000..2c36c0d19d06
--- /dev/null
+++ b/drivers/staging/lustre/lustre/include/linux/obd.h
@@ -0,0 +1,128 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ */
+
+#ifndef __LINUX_OBD_H
+#define __LINUX_OBD_H
+
+#ifndef __OBD_H
+#error Do not #include this file directly. #include <obd.h> instead
+#endif
+
+#include <obd_support.h>
+
+# include <linux/fs.h>
+# include <linux/list.h>
+# include <linux/sched.h>  /* for struct task_struct, for current.h */
+# include <linux/proc_fs.h>
+# include <linux/mount.h>
+# include <linux/lustre_intent.h>
+
+struct ll_iattr {
+	struct iattr	iattr;
+	unsigned int	ia_attr_flags;
+};
+
+#define CLIENT_OBD_LIST_LOCK_DEBUG 1
+
+typedef struct {
+	spinlock_t		lock;
+
+	unsigned long       time;
+	struct task_struct *task;
+	const char	 *func;
+	int		 line;
+} client_obd_lock_t;
+
+static inline void __client_obd_list_lock(client_obd_lock_t *lock,
+					  const char *func, int line)
+{
+	unsigned long cur = jiffies;
+	while (1) {
+		if (spin_trylock(&lock->lock)) {
+			LASSERT(lock->task == NULL);
+			lock->task = current;
+			lock->func = func;
+			lock->line = line;
+			lock->time = jiffies;
+			break;
+		}
+
+		if ((jiffies - cur > 5 * HZ) &&
+		    (jiffies - lock->time > 5 * HZ)) {
+			struct task_struct *task = lock->task;
+
+			if (task == NULL)
+				continue;
+
+			LCONSOLE_WARN("%s:%d: lock %p was acquired"
+				      " by <%s:%d:%s:%d> for %lu seconds.\n",
+				      current->comm, current->pid,
+				      lock, task->comm, task->pid,
+				      lock->func, lock->line,
+				      (jiffies - lock->time) / HZ);
+			LCONSOLE_WARN("====== for process holding the "
+				      "lock =====\n");
+			libcfs_debug_dumpstack(task);
+			LCONSOLE_WARN("====== for current process =====\n");
+			libcfs_debug_dumpstack(NULL);
+			LCONSOLE_WARN("====== end =======\n");
+			cfs_pause(1000 * HZ);
+		}
+		cpu_relax();
+	}
+}
+
+#define client_obd_list_lock(lock) \
+	__client_obd_list_lock(lock, __FUNCTION__, __LINE__)
+
+static inline void client_obd_list_unlock(client_obd_lock_t *lock)
+{
+	LASSERT(lock->task != NULL);
+	lock->task = NULL;
+	lock->time = jiffies;
+	spin_unlock(&lock->lock);
+}
+
+
+static inline void client_obd_list_lock_init(client_obd_lock_t *lock)
+{
+	spin_lock_init(&lock->lock);
+}
+
+static inline void client_obd_list_lock_done(client_obd_lock_t *lock)
+{}
+
+#endif /* __LINUX_OBD_H */
diff --git a/drivers/staging/lustre/lustre/include/linux/obd_class.h b/drivers/staging/lustre/lustre/include/linux/obd_class.h
new file mode 100644
index 000000000000..021ead6639fc
--- /dev/null
+++ b/drivers/staging/lustre/lustre/include/linux/obd_class.h
@@ -0,0 +1,58 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ */
+
+#ifndef __LINUX_CLASS_OBD_H
+#define __LINUX_CLASS_OBD_H
+
+#ifndef __CLASS_OBD_H
+#error Do not #include this file directly. #include <obd_class.h> instead
+#endif
+
+#include <asm/uaccess.h>
+#include <linux/types.h>
+#include <linux/fs.h>
+#include <linux/time.h>
+#include <linux/timer.h>
+
+/* obdo.c */
+void obdo_from_la(struct obdo *dst, struct lu_attr *la, __u64 valid);
+void la_from_obdo(struct lu_attr *la, struct obdo *dst, obd_flag valid);
+void obdo_refresh_inode(struct inode *dst, struct obdo *src, obd_flag valid);
+void obdo_to_inode(struct inode *dst, struct obdo *src, obd_flag valid);
+#define ll_inode_flags(inode)	 (inode->i_flags)
+
+
+#endif /* __LINUX_OBD_CLASS_H */
diff --git a/drivers/staging/lustre/lustre/include/linux/obd_support.h b/drivers/staging/lustre/lustre/include/linux/obd_support.h
new file mode 100644
index 000000000000..9166503408aa
--- /dev/null
+++ b/drivers/staging/lustre/lustre/include/linux/obd_support.h
@@ -0,0 +1,63 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ */
+
+#ifndef _LINUX_OBD_SUPPORT
+#define _LINUX_OBD_SUPPORT
+
+#ifndef _OBD_SUPPORT
+#error Do not #include this file directly. #include <obd_support.h> instead
+#endif
+
+#ifdef CONFIG_X86
+#include <asm/cpufeature.h>
+#endif
+#include <asm/processor.h>
+#include <linux/seq_file.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/highmem.h>
+#include <linux/swap.h>
+#include <linux/lustre_compat25.h>
+#include <linux/lustre_common.h>
+#include <linux/libcfs/libcfs.h>
+#include <lustre/lustre_idl.h>
+
+
+# include <linux/types.h>
+# include <linux/blkdev.h>
+# include <lvfs.h>
+
+#endif
diff --git a/drivers/staging/lustre/lustre/include/lprocfs_status.h b/drivers/staging/lustre/lustre/include/lprocfs_status.h
new file mode 100644
index 000000000000..55f182205d78
--- /dev/null
+++ b/drivers/staging/lustre/lustre/include/lprocfs_status.h
@@ -0,0 +1,1043 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lustre/include/lprocfs_status.h
+ *
+ * Top level header file for LProc SNMP
+ *
+ * Author: Hariharan Thantry thantry@users.sourceforge.net
+ */
+#ifndef _LPROCFS_SNMP_H
+#define _LPROCFS_SNMP_H
+
+#include <linux/lprocfs_status.h>
+#include <lustre/lustre_idl.h>
+#include <linux/libcfs/params_tree.h>
+
+struct lprocfs_vars {
+	const char		*name;
+	struct file_operations	*fops;
+	void			*data;
+	/**
+	 * /proc file mode.
+	 */
+	umode_t			proc_mode;
+};
+
+struct lprocfs_static_vars {
+	struct lprocfs_vars *module_vars;
+	struct lprocfs_vars *obd_vars;
+};
+
+/* if we find more consumers this could be generalized */
+#define OBD_HIST_MAX 32
+struct obd_histogram {
+	spinlock_t	oh_lock;
+	unsigned long	oh_buckets[OBD_HIST_MAX];
+};
+
+enum {
+	BRW_R_PAGES = 0,
+	BRW_W_PAGES,
+	BRW_R_RPC_HIST,
+	BRW_W_RPC_HIST,
+	BRW_R_IO_TIME,
+	BRW_W_IO_TIME,
+	BRW_R_DISCONT_PAGES,
+	BRW_W_DISCONT_PAGES,
+	BRW_R_DISCONT_BLOCKS,
+	BRW_W_DISCONT_BLOCKS,
+	BRW_R_DISK_IOSIZE,
+	BRW_W_DISK_IOSIZE,
+	BRW_R_DIO_FRAGS,
+	BRW_W_DIO_FRAGS,
+	BRW_LAST,
+};
+
+struct brw_stats {
+	struct obd_histogram hist[BRW_LAST];
+};
+
+enum {
+	RENAME_SAMEDIR_SIZE = 0,
+	RENAME_CROSSDIR_SRC_SIZE,
+	RENAME_CROSSDIR_TGT_SIZE,
+	RENAME_LAST,
+};
+
+struct rename_stats {
+	struct obd_histogram hist[RENAME_LAST];
+};
+
+/* An lprocfs counter can be configured using the enum bit masks below.
+ *
+ * LPROCFS_CNTR_EXTERNALLOCK indicates that an external lock already
+ * protects this counter from concurrent updates. If not specified,
+ * lprocfs an internal per-counter lock variable. External locks are
+ * not used to protect counter increments, but are used to protect
+ * counter readout and resets.
+ *
+ * LPROCFS_CNTR_AVGMINMAX indicates a multi-valued counter samples,
+ * (i.e. counter can be incremented by more than "1"). When specified,
+ * the counter maintains min, max and sum in addition to a simple
+ * invocation count. This allows averages to be be computed.
+ * If not specified, the counter is an increment-by-1 counter.
+ * min, max, sum, etc. are not maintained.
+ *
+ * LPROCFS_CNTR_STDDEV indicates that the counter should track sum of
+ * squares (for multi-valued counter samples only). This allows
+ * external computation of standard deviation, but involves a 64-bit
+ * multiply per counter increment.
+ */
+
+enum {
+	LPROCFS_CNTR_EXTERNALLOCK = 0x0001,
+	LPROCFS_CNTR_AVGMINMAX    = 0x0002,
+	LPROCFS_CNTR_STDDEV       = 0x0004,
+
+	/* counter data type */
+	LPROCFS_TYPE_REGS	 = 0x0100,
+	LPROCFS_TYPE_BYTES	= 0x0200,
+	LPROCFS_TYPE_PAGES	= 0x0400,
+	LPROCFS_TYPE_CYCLE	= 0x0800,
+};
+
+#define LC_MIN_INIT ((~(__u64)0) >> 1)
+
+struct lprocfs_counter_header {
+	unsigned int		lc_config;
+	const char		*lc_name;   /* must be static */
+	const char		*lc_units;  /* must be static */
+};
+
+struct lprocfs_counter {
+	__s64	lc_count;
+	__s64	lc_min;
+	__s64	lc_max;
+	__s64	lc_sumsquare;
+	/*
+	 * Every counter has lc_array_sum[0], while lc_array_sum[1] is only
+	 * for irq context counter, i.e. stats with
+	 * LPROCFS_STATS_FLAG_IRQ_SAFE flag, its counter need
+	 * lc_array_sum[1]
+	 */
+	__s64	lc_array_sum[1];
+};
+#define lc_sum		lc_array_sum[0]
+#define lc_sum_irq	lc_array_sum[1]
+
+struct lprocfs_percpu {
+#ifndef __GNUC__
+	__s64			pad;
+#endif
+	struct lprocfs_counter lp_cntr[0];
+};
+
+#define LPROCFS_GET_NUM_CPU 0x0001
+#define LPROCFS_GET_SMP_ID  0x0002
+
+enum lprocfs_stats_flags {
+	LPROCFS_STATS_FLAG_NONE     = 0x0000, /* per cpu counter */
+	LPROCFS_STATS_FLAG_NOPERCPU = 0x0001, /* stats have no percpu
+					       * area and need locking */
+	LPROCFS_STATS_FLAG_IRQ_SAFE = 0x0002, /* alloc need irq safe */
+};
+
+enum lprocfs_fields_flags {
+	LPROCFS_FIELDS_FLAGS_CONFIG     = 0x0001,
+	LPROCFS_FIELDS_FLAGS_SUM	= 0x0002,
+	LPROCFS_FIELDS_FLAGS_MIN	= 0x0003,
+	LPROCFS_FIELDS_FLAGS_MAX	= 0x0004,
+	LPROCFS_FIELDS_FLAGS_AVG	= 0x0005,
+	LPROCFS_FIELDS_FLAGS_SUMSQUARE  = 0x0006,
+	LPROCFS_FIELDS_FLAGS_COUNT      = 0x0007,
+};
+
+struct lprocfs_stats {
+	/* # of counters */
+	unsigned short			ls_num;
+	/* 1 + the biggest cpu # whose ls_percpu slot has been allocated */
+	unsigned short			ls_biggest_alloc_num;
+	enum lprocfs_stats_flags	ls_flags;
+	/* Lock used when there are no percpu stats areas; For percpu stats,
+	 * it is used to protect ls_biggest_alloc_num change */
+	spinlock_t			ls_lock;
+
+	/* has ls_num of counter headers */
+	struct lprocfs_counter_header	*ls_cnt_header;
+	struct lprocfs_percpu		*ls_percpu[0];
+};
+
+#define OPC_RANGE(seg) (seg ## _LAST_OPC - seg ## _FIRST_OPC)
+
+/* Pack all opcodes down into a single monotonically increasing index */
+static inline int opcode_offset(__u32 opc) {
+	if (opc < OST_LAST_OPC) {
+		 /* OST opcode */
+		return (opc - OST_FIRST_OPC);
+	} else if (opc < MDS_LAST_OPC) {
+		/* MDS opcode */
+		return (opc - MDS_FIRST_OPC +
+			OPC_RANGE(OST));
+	} else if (opc < LDLM_LAST_OPC) {
+		/* LDLM Opcode */
+		return (opc - LDLM_FIRST_OPC +
+			OPC_RANGE(MDS) +
+			OPC_RANGE(OST));
+	} else if (opc < MGS_LAST_OPC) {
+		/* MGS Opcode */
+		return (opc - MGS_FIRST_OPC +
+			OPC_RANGE(LDLM) +
+			OPC_RANGE(MDS) +
+			OPC_RANGE(OST));
+	} else if (opc < OBD_LAST_OPC) {
+		/* OBD Ping */
+		return (opc - OBD_FIRST_OPC +
+			OPC_RANGE(MGS) +
+			OPC_RANGE(LDLM) +
+			OPC_RANGE(MDS) +
+			OPC_RANGE(OST));
+	} else if (opc < LLOG_LAST_OPC) {
+		/* LLOG Opcode */
+		return (opc - LLOG_FIRST_OPC +
+			OPC_RANGE(OBD) +
+			OPC_RANGE(MGS) +
+			OPC_RANGE(LDLM) +
+			OPC_RANGE(MDS) +
+			OPC_RANGE(OST));
+	} else if (opc < QUOTA_LAST_OPC) {
+		/* LQUOTA Opcode */
+		return (opc - QUOTA_FIRST_OPC +
+			OPC_RANGE(LLOG) +
+			OPC_RANGE(OBD) +
+			OPC_RANGE(MGS) +
+			OPC_RANGE(LDLM) +
+			OPC_RANGE(MDS) +
+			OPC_RANGE(OST));
+	} else if (opc < SEQ_LAST_OPC) {
+		/* SEQ opcode */
+		return (opc - SEQ_FIRST_OPC +
+			OPC_RANGE(QUOTA) +
+			OPC_RANGE(LLOG) +
+			OPC_RANGE(OBD) +
+			OPC_RANGE(MGS) +
+			OPC_RANGE(LDLM) +
+			OPC_RANGE(MDS) +
+			OPC_RANGE(OST));
+	} else if (opc < SEC_LAST_OPC) {
+		/* SEC opcode */
+		return (opc - SEC_FIRST_OPC +
+			OPC_RANGE(SEQ) +
+			OPC_RANGE(QUOTA) +
+			OPC_RANGE(LLOG) +
+			OPC_RANGE(OBD) +
+			OPC_RANGE(MGS) +
+			OPC_RANGE(LDLM) +
+			OPC_RANGE(MDS) +
+			OPC_RANGE(OST));
+	} else if (opc < FLD_LAST_OPC) {
+		/* FLD opcode */
+		 return (opc - FLD_FIRST_OPC +
+			OPC_RANGE(SEC) +
+			OPC_RANGE(SEQ) +
+			OPC_RANGE(QUOTA) +
+			OPC_RANGE(LLOG) +
+			OPC_RANGE(OBD) +
+			OPC_RANGE(MGS) +
+			OPC_RANGE(LDLM) +
+			OPC_RANGE(MDS) +
+			OPC_RANGE(OST));
+	} else if (opc < UPDATE_LAST_OPC) {
+		/* update opcode */
+		return (opc - UPDATE_FIRST_OPC +
+			OPC_RANGE(FLD) +
+			OPC_RANGE(SEC) +
+			OPC_RANGE(SEQ) +
+			OPC_RANGE(QUOTA) +
+			OPC_RANGE(LLOG) +
+			OPC_RANGE(OBD) +
+			OPC_RANGE(MGS) +
+			OPC_RANGE(LDLM) +
+			OPC_RANGE(MDS) +
+			OPC_RANGE(OST));
+	} else {
+		/* Unknown Opcode */
+		return -1;
+	}
+}
+
+
+#define LUSTRE_MAX_OPCODES (OPC_RANGE(OST)  + \
+			    OPC_RANGE(MDS)  + \
+			    OPC_RANGE(LDLM) + \
+			    OPC_RANGE(MGS)  + \
+			    OPC_RANGE(OBD)  + \
+			    OPC_RANGE(LLOG) + \
+			    OPC_RANGE(SEC)  + \
+			    OPC_RANGE(SEQ)  + \
+			    OPC_RANGE(SEC)  + \
+			    OPC_RANGE(FLD)  + \
+			    OPC_RANGE(UPDATE))
+
+#define EXTRA_MAX_OPCODES ((PTLRPC_LAST_CNTR - PTLRPC_FIRST_CNTR)  + \
+			    OPC_RANGE(EXTRA))
+
+enum {
+	PTLRPC_REQWAIT_CNTR = 0,
+	PTLRPC_REQQDEPTH_CNTR,
+	PTLRPC_REQACTIVE_CNTR,
+	PTLRPC_TIMEOUT,
+	PTLRPC_REQBUF_AVAIL_CNTR,
+	PTLRPC_LAST_CNTR
+};
+
+#define PTLRPC_FIRST_CNTR PTLRPC_REQWAIT_CNTR
+
+enum {
+	LDLM_GLIMPSE_ENQUEUE = 0,
+	LDLM_PLAIN_ENQUEUE,
+	LDLM_EXTENT_ENQUEUE,
+	LDLM_FLOCK_ENQUEUE,
+	LDLM_IBITS_ENQUEUE,
+	MDS_REINT_SETATTR,
+	MDS_REINT_CREATE,
+	MDS_REINT_LINK,
+	MDS_REINT_UNLINK,
+	MDS_REINT_RENAME,
+	MDS_REINT_OPEN,
+	MDS_REINT_SETXATTR,
+	BRW_READ_BYTES,
+	BRW_WRITE_BYTES,
+	EXTRA_LAST_OPC
+};
+
+#define EXTRA_FIRST_OPC LDLM_GLIMPSE_ENQUEUE
+/* class_obd.c */
+extern proc_dir_entry_t *proc_lustre_root;
+
+struct obd_device;
+struct obd_histogram;
+
+/* Days / hours / mins / seconds format */
+struct dhms {
+	int d,h,m,s;
+};
+static inline void s2dhms(struct dhms *ts, time_t secs)
+{
+	ts->d = secs / 86400;
+	secs = secs % 86400;
+	ts->h = secs / 3600;
+	secs = secs % 3600;
+	ts->m = secs / 60;
+	ts->s = secs % 60;
+}
+#define DHMS_FMT "%dd%dh%02dm%02ds"
+#define DHMS_VARS(x) (x)->d, (x)->h, (x)->m, (x)->s
+
+#define JOBSTATS_JOBID_VAR_MAX_LEN	20
+#define JOBSTATS_DISABLE		"disable"
+#define JOBSTATS_PROCNAME_UID		"procname_uid"
+
+typedef void (*cntr_init_callback)(struct lprocfs_stats *stats);
+
+struct obd_job_stats {
+	cfs_hash_t	*ojs_hash;
+	struct list_head	 ojs_list;
+	rwlock_t       ojs_lock; /* protect the obj_list */
+	cntr_init_callback ojs_cntr_init_fn;
+	int		ojs_cntr_num;
+	int		ojs_cleanup_interval;
+	time_t		   ojs_last_cleanup;
+};
+
+#ifdef LPROCFS
+
+extern int lprocfs_stats_alloc_one(struct lprocfs_stats *stats,
+				   unsigned int cpuid);
+/*
+ * \return value
+ *      < 0     : on error (only possible for opc as LPROCFS_GET_SMP_ID)
+ */
+static inline int lprocfs_stats_lock(struct lprocfs_stats *stats, int opc,
+				     unsigned long *flags)
+{
+	int		rc = 0;
+
+	switch (opc) {
+	default:
+		LBUG();
+
+	case LPROCFS_GET_SMP_ID:
+		if (stats->ls_flags & LPROCFS_STATS_FLAG_NOPERCPU) {
+			if (stats->ls_flags & LPROCFS_STATS_FLAG_IRQ_SAFE)
+				spin_lock_irqsave(&stats->ls_lock, *flags);
+			else
+				spin_lock(&stats->ls_lock);
+			return 0;
+		} else {
+			unsigned int cpuid = get_cpu();
+
+			if (unlikely(stats->ls_percpu[cpuid] == NULL)) {
+				rc = lprocfs_stats_alloc_one(stats, cpuid);
+				if (rc < 0) {
+					put_cpu();
+					return rc;
+				}
+			}
+			return cpuid;
+		}
+
+	case LPROCFS_GET_NUM_CPU:
+		if (stats->ls_flags & LPROCFS_STATS_FLAG_NOPERCPU) {
+			if (stats->ls_flags & LPROCFS_STATS_FLAG_IRQ_SAFE)
+				spin_lock_irqsave(&stats->ls_lock, *flags);
+			else
+				spin_lock(&stats->ls_lock);
+			return 1;
+		} else {
+			return stats->ls_biggest_alloc_num;
+		}
+	}
+}
+
+static inline void lprocfs_stats_unlock(struct lprocfs_stats *stats, int opc,
+					unsigned long *flags)
+{
+	switch (opc) {
+	default:
+		LBUG();
+
+	case LPROCFS_GET_SMP_ID:
+		if (stats->ls_flags & LPROCFS_STATS_FLAG_NOPERCPU) {
+			if (stats->ls_flags & LPROCFS_STATS_FLAG_IRQ_SAFE) {
+				spin_unlock_irqrestore(&stats->ls_lock,
+							   *flags);
+			} else {
+				spin_unlock(&stats->ls_lock);
+			}
+		} else {
+			put_cpu();
+		}
+		return;
+
+	case LPROCFS_GET_NUM_CPU:
+		if (stats->ls_flags & LPROCFS_STATS_FLAG_NOPERCPU) {
+			if (stats->ls_flags & LPROCFS_STATS_FLAG_IRQ_SAFE) {
+				spin_unlock_irqrestore(&stats->ls_lock,
+							   *flags);
+			} else {
+				spin_unlock(&stats->ls_lock);
+			}
+		}
+		return;
+	}
+}
+
+static inline unsigned int
+lprocfs_stats_counter_size(struct lprocfs_stats *stats)
+{
+	unsigned int percpusize;
+
+	percpusize = offsetof(struct lprocfs_percpu, lp_cntr[stats->ls_num]);
+
+	/* irq safe stats need lc_array_sum[1] */
+	if ((stats->ls_flags & LPROCFS_STATS_FLAG_IRQ_SAFE) != 0)
+		percpusize += stats->ls_num * sizeof(__s64);
+
+	if ((stats->ls_flags & LPROCFS_STATS_FLAG_NOPERCPU) == 0)
+		percpusize = L1_CACHE_ALIGN(percpusize);
+
+	return percpusize;
+}
+
+static inline struct lprocfs_counter *
+lprocfs_stats_counter_get(struct lprocfs_stats *stats, unsigned int cpuid,
+			  int index)
+{
+	struct lprocfs_counter *cntr;
+
+	cntr = &stats->ls_percpu[cpuid]->lp_cntr[index];
+
+	if ((stats->ls_flags & LPROCFS_STATS_FLAG_IRQ_SAFE) != 0)
+		cntr = (void *)cntr + index * sizeof(__s64);
+
+	return cntr;
+}
+
+/* Two optimized LPROCFS counter increment functions are provided:
+ *     lprocfs_counter_incr(cntr, value) - optimized for by-one counters
+ *     lprocfs_counter_add(cntr) - use for multi-valued counters
+ * Counter data layout allows config flag, counter lock and the
+ * count itself to reside within a single cache line.
+ */
+
+extern void lprocfs_counter_add(struct lprocfs_stats *stats, int idx,
+				long amount);
+extern void lprocfs_counter_sub(struct lprocfs_stats *stats, int idx,
+				long amount);
+
+#define lprocfs_counter_incr(stats, idx) \
+	lprocfs_counter_add(stats, idx, 1)
+#define lprocfs_counter_decr(stats, idx) \
+	lprocfs_counter_sub(stats, idx, 1)
+
+extern __s64 lprocfs_read_helper(struct lprocfs_counter *lc,
+				 struct lprocfs_counter_header *header,
+				 enum lprocfs_stats_flags flags,
+				 enum lprocfs_fields_flags field);
+static inline __u64 lprocfs_stats_collector(struct lprocfs_stats *stats,
+					    int idx,
+					    enum lprocfs_fields_flags field)
+{
+	int	      i;
+	unsigned int  num_cpu;
+	unsigned long flags	= 0;
+	__u64	      ret	= 0;
+
+	LASSERT(stats != NULL);
+
+	num_cpu = lprocfs_stats_lock(stats, LPROCFS_GET_NUM_CPU, &flags);
+	for (i = 0; i < num_cpu; i++) {
+		if (stats->ls_percpu[i] == NULL)
+			continue;
+		ret += lprocfs_read_helper(
+				lprocfs_stats_counter_get(stats, i, idx),
+				&stats->ls_cnt_header[idx], stats->ls_flags,
+				field);
+	}
+	lprocfs_stats_unlock(stats, LPROCFS_GET_NUM_CPU, &flags);
+	return ret;
+}
+
+extern struct lprocfs_stats *
+lprocfs_alloc_stats(unsigned int num, enum lprocfs_stats_flags flags);
+extern void lprocfs_clear_stats(struct lprocfs_stats *stats);
+extern void lprocfs_free_stats(struct lprocfs_stats **stats);
+extern void lprocfs_init_ops_stats(int num_private_stats,
+				   struct lprocfs_stats *stats);
+extern void lprocfs_init_mps_stats(int num_private_stats,
+				   struct lprocfs_stats *stats);
+extern void lprocfs_init_ldlm_stats(struct lprocfs_stats *ldlm_stats);
+extern int lprocfs_alloc_obd_stats(struct obd_device *obddev,
+				   unsigned int num_private_stats);
+extern int lprocfs_alloc_md_stats(struct obd_device *obddev,
+				  unsigned int num_private_stats);
+extern void lprocfs_counter_init(struct lprocfs_stats *stats, int index,
+				 unsigned conf, const char *name,
+				 const char *units);
+extern void lprocfs_free_obd_stats(struct obd_device *obddev);
+extern void lprocfs_free_md_stats(struct obd_device *obddev);
+struct obd_export;
+struct nid_stat;
+extern int lprocfs_add_clear_entry(struct obd_device * obd,
+				   proc_dir_entry_t *entry);
+extern int lprocfs_exp_setup(struct obd_export *exp,
+			     lnet_nid_t *peer_nid, int *newnid);
+extern int lprocfs_exp_cleanup(struct obd_export *exp);
+extern proc_dir_entry_t *lprocfs_add_simple(struct proc_dir_entry *root,
+						char *name,
+						void *data,
+						struct file_operations *fops);
+extern struct proc_dir_entry *
+lprocfs_add_symlink(const char *name, struct proc_dir_entry *parent,
+		    const char *format, ...);
+extern void lprocfs_free_per_client_stats(struct obd_device *obd);
+extern int
+lprocfs_nid_stats_clear_write(struct file *file, const char *buffer,
+			      unsigned long count, void *data);
+extern int lprocfs_nid_stats_clear_read(struct seq_file *m, void *data);
+
+extern int lprocfs_register_stats(proc_dir_entry_t *root, const char *name,
+				  struct lprocfs_stats *stats);
+
+/* lprocfs_status.c */
+extern int lprocfs_add_vars(proc_dir_entry_t *root,
+			    struct lprocfs_vars *var,
+			    void *data);
+
+extern proc_dir_entry_t *lprocfs_register(const char *name,
+					      proc_dir_entry_t *parent,
+					      struct lprocfs_vars *list,
+					      void *data);
+
+extern void lprocfs_remove(proc_dir_entry_t **root);
+extern void lprocfs_remove_proc_entry(const char *name,
+				      struct proc_dir_entry *parent);
+
+extern int lprocfs_obd_setup(struct obd_device *obd, struct lprocfs_vars *list);
+extern int lprocfs_obd_cleanup(struct obd_device *obd);
+
+extern int lprocfs_seq_create(proc_dir_entry_t *parent, const char *name,
+			      umode_t mode,
+			      const struct file_operations *seq_fops,
+			      void *data);
+extern int lprocfs_obd_seq_create(struct obd_device *dev, const char *name,
+				  umode_t mode,
+				  const struct file_operations *seq_fops,
+				  void *data);
+
+/* Generic callbacks */
+
+extern int lprocfs_rd_u64(struct seq_file *m, void *data);
+extern int lprocfs_rd_atomic(struct seq_file *m, void *data);
+extern int lprocfs_wr_atomic(struct file *file, const char *buffer,
+			     unsigned long count, void *data);
+extern int lprocfs_rd_uint(struct seq_file *m, void *data);
+extern int lprocfs_wr_uint(struct file *file, const char *buffer,
+			   unsigned long count, void *data);
+extern int lprocfs_rd_uuid(struct seq_file *m, void *data);
+extern int lprocfs_rd_name(struct seq_file *m, void *data);
+extern int lprocfs_rd_server_uuid(struct seq_file *m, void *data);
+extern int lprocfs_rd_conn_uuid(struct seq_file *m, void *data);
+extern int lprocfs_rd_import(struct seq_file *m, void *data);
+extern int lprocfs_rd_state(struct seq_file *m, void *data);
+extern int lprocfs_rd_connect_flags(struct seq_file *m, void *data);
+extern int lprocfs_rd_num_exports(struct seq_file *m, void *data);
+extern int lprocfs_rd_numrefs(struct seq_file *m, void *data);
+
+struct adaptive_timeout;
+extern int lprocfs_at_hist_helper(struct seq_file *m,
+				  struct adaptive_timeout *at);
+extern int lprocfs_rd_timeouts(struct seq_file *m, void *data);
+extern int lprocfs_wr_timeouts(struct file *file, const char *buffer,
+			       unsigned long count, void *data);
+extern int lprocfs_wr_evict_client(struct file *file, const char *buffer,
+			    size_t count, loff_t *off);
+extern int lprocfs_wr_ping(struct file *file, const char *buffer,
+			   size_t count, loff_t *off);
+extern int lprocfs_wr_import(struct file *file, const char *buffer,
+		      size_t count, loff_t *off);
+extern int lprocfs_rd_pinger_recov(struct seq_file *m, void *n);
+extern int lprocfs_wr_pinger_recov(struct file *file, const char *buffer,
+				   size_t count, loff_t *off);
+
+/* Statfs helpers */
+extern int lprocfs_rd_blksize(struct seq_file *m, void *data);
+extern int lprocfs_rd_kbytestotal(struct seq_file *m, void *data);
+extern int lprocfs_rd_kbytesfree(struct seq_file *m, void *data);
+extern int lprocfs_rd_kbytesavail(struct seq_file *m, void *data);
+extern int lprocfs_rd_filestotal(struct seq_file *m, void *data);
+extern int lprocfs_rd_filesfree(struct seq_file *m, void *data);
+
+extern int lprocfs_write_helper(const char *buffer, unsigned long count,
+				int *val);
+extern int lprocfs_write_frac_helper(const char *buffer, unsigned long count,
+				     int *val, int mult);
+extern int lprocfs_seq_read_frac_helper(struct seq_file *m, long val, int mult);
+extern int lprocfs_read_frac_helper(char *buffer, unsigned long count,
+				    long val, int mult);
+extern int lprocfs_write_u64_helper(const char *buffer, unsigned long count,
+				    __u64 *val);
+extern int lprocfs_write_frac_u64_helper(const char *buffer,
+					 unsigned long count,
+					 __u64 *val, int mult);
+char *lprocfs_find_named_value(const char *buffer, const char *name,
+				unsigned long *count);
+void lprocfs_oh_tally(struct obd_histogram *oh, unsigned int value);
+void lprocfs_oh_tally_log2(struct obd_histogram *oh, unsigned int value);
+void lprocfs_oh_clear(struct obd_histogram *oh);
+unsigned long lprocfs_oh_sum(struct obd_histogram *oh);
+
+void lprocfs_stats_collect(struct lprocfs_stats *stats, int idx,
+			   struct lprocfs_counter *cnt);
+
+extern int lprocfs_single_release(cfs_inode_t *, struct file *);
+extern int lprocfs_seq_release(cfs_inode_t *, struct file *);
+
+/* You must use these macros when you want to refer to
+ * the import in a client obd_device for a lprocfs entry */
+#define LPROCFS_CLIMP_CHECK(obd) do {	   \
+	typecheck(struct obd_device *, obd);    \
+	down_read(&(obd)->u.cli.cl_sem);    \
+	if ((obd)->u.cli.cl_import == NULL) {   \
+	     up_read(&(obd)->u.cli.cl_sem); \
+	     return -ENODEV;		    \
+	}				       \
+} while(0)
+#define LPROCFS_CLIMP_EXIT(obd)		 \
+	up_read(&(obd)->u.cli.cl_sem);
+
+
+/* write the name##_seq_show function, call LPROC_SEQ_FOPS_RO for read-only
+  proc entries; otherwise, you will define name##_seq_write function also for
+  a read-write proc entry, and then call LPROC_SEQ_SEQ instead. Finally,
+  call lprocfs_obd_seq_create(obd, filename, 0444, &name#_fops, data); */
+#define __LPROC_SEQ_FOPS(name, custom_seq_write)			\
+static int name##_single_open(cfs_inode_t *inode, struct file *file)	\
+{									\
+	return single_open(file, name##_seq_show, PDE_DATA(inode));	\
+}									\
+struct file_operations name##_fops = {				     \
+	.owner   = THIS_MODULE,					    \
+	.open    = name##_single_open,				     \
+	.read    = seq_read,					       \
+	.write   = custom_seq_write,				       \
+	.llseek  = seq_lseek,					      \
+	.release = lprocfs_single_release,				 \
+}
+
+#define LPROC_SEQ_FOPS_RO(name)	 __LPROC_SEQ_FOPS(name, NULL)
+#define LPROC_SEQ_FOPS(name)	    __LPROC_SEQ_FOPS(name, name##_seq_write)
+
+#define LPROC_SEQ_FOPS_RO_TYPE(name, type)				\
+	static int name##_##type##_seq_show(struct seq_file *m, void *v)\
+	{								\
+		return lprocfs_rd_##type(m, m->private);		\
+	}								\
+	LPROC_SEQ_FOPS_RO(name##_##type)
+
+#define LPROC_SEQ_FOPS_RW_TYPE(name, type)				\
+	static int name##_##type##_seq_show(struct seq_file *m, void *v)\
+	{								\
+		return lprocfs_rd_##type(m, m->private);		\
+	}								\
+	static ssize_t name##_##type##_seq_write(struct file *file,	\
+			const char *buffer, size_t count, loff_t *off)	\
+	{								\
+		struct seq_file *seq = file->private_data;		\
+		return lprocfs_wr_##type(file, buffer,			\
+					 count, seq->private);		\
+	}								\
+	LPROC_SEQ_FOPS(name##_##type);
+
+#define LPROC_SEQ_FOPS_WR_ONLY(name, type)				\
+	static ssize_t name##_##type##_write(struct file *file,		\
+			const char *buffer, size_t count, loff_t *off)	\
+	{								\
+		return lprocfs_wr_##type(file, buffer, count, off);	\
+	}								\
+	static int name##_##type##_open(cfs_inode_t *inode, struct file *file) \
+	{								\
+		return single_open(file, NULL, PDE_DATA(inode));	\
+	}								\
+	struct file_operations name##_##type##_fops = {			\
+		.open	= name##_##type##_open,				\
+		.write	= name##_##type##_write,			\
+		.release = lprocfs_single_release,			\
+	};
+
+/* lprocfs_jobstats.c */
+int lprocfs_job_stats_log(struct obd_device *obd, char *jobid,
+			  int event, long amount);
+void lprocfs_job_stats_fini(struct obd_device *obd);
+int lprocfs_job_stats_init(struct obd_device *obd, int cntr_num,
+			   cntr_init_callback fn);
+int lprocfs_rd_job_interval(struct seq_file *m, void *data);
+int lprocfs_wr_job_interval(struct file *file, const char *buffer,
+			    unsigned long count, void *data);
+
+/* lproc_ptlrpc.c */
+struct ptlrpc_request;
+extern void target_print_req(void *seq_file, struct ptlrpc_request *req);
+
+/* lproc_status.c */
+int lprocfs_obd_rd_max_pages_per_rpc(struct seq_file *m, void *data);
+int lprocfs_obd_wr_max_pages_per_rpc(struct file *file, const char *buffer,
+				     size_t count, loff_t *off);
+
+/* all quota proc functions */
+extern int lprocfs_quota_rd_bunit(char *page, char **start,
+				  loff_t off, int count,
+				  int *eof, void *data);
+extern int lprocfs_quota_wr_bunit(struct file *file, const char *buffer,
+				  unsigned long count, void *data);
+extern int lprocfs_quota_rd_btune(char *page, char **start,
+				  loff_t off, int count,
+				  int *eof, void *data);
+extern int lprocfs_quota_wr_btune(struct file *file, const char *buffer,
+				  unsigned long count, void *data);
+extern int lprocfs_quota_rd_iunit(char *page, char **start,
+				  loff_t off, int count,
+				  int *eof, void *data);
+extern int lprocfs_quota_wr_iunit(struct file *file, const char *buffer,
+				  unsigned long count, void *data);
+extern int lprocfs_quota_rd_itune(char *page, char **start,
+				  loff_t off, int count,
+				  int *eof, void *data);
+extern int lprocfs_quota_wr_itune(struct file *file, const char *buffer,
+				  unsigned long count, void *data);
+extern int lprocfs_quota_rd_type(char *page, char **start, loff_t off, int count,
+				 int *eof, void *data);
+extern int lprocfs_quota_wr_type(struct file *file, const char *buffer,
+				 unsigned long count, void *data);
+extern int lprocfs_quota_rd_switch_seconds(char *page, char **start, loff_t off,
+					   int count, int *eof, void *data);
+extern int lprocfs_quota_wr_switch_seconds(struct file *file,
+					   const char *buffer,
+					   unsigned long count, void *data);
+extern int lprocfs_quota_rd_sync_blk(char *page, char **start, loff_t off,
+				     int count, int *eof, void *data);
+extern int lprocfs_quota_wr_sync_blk(struct file *file, const char *buffer,
+				     unsigned long count, void *data);
+extern int lprocfs_quota_rd_switch_qs(char *page, char **start, loff_t off,
+				      int count, int *eof, void *data);
+extern int lprocfs_quota_wr_switch_qs(struct file *file,
+				      const char *buffer,
+				      unsigned long count, void *data);
+extern int lprocfs_quota_rd_boundary_factor(char *page, char **start, loff_t off,
+					    int count, int *eof, void *data);
+extern int lprocfs_quota_wr_boundary_factor(struct file *file,
+					    const char *buffer,
+					    unsigned long count, void *data);
+extern int lprocfs_quota_rd_least_bunit(char *page, char **start, loff_t off,
+					int count, int *eof, void *data);
+extern int lprocfs_quota_wr_least_bunit(struct file *file,
+					const char *buffer,
+					unsigned long count, void *data);
+extern int lprocfs_quota_rd_least_iunit(char *page, char **start, loff_t off,
+					int count, int *eof, void *data);
+extern int lprocfs_quota_wr_least_iunit(struct file *file,
+					const char *buffer,
+					unsigned long count, void *data);
+extern int lprocfs_quota_rd_qs_factor(char *page, char **start, loff_t off,
+				      int count, int *eof, void *data);
+extern int lprocfs_quota_wr_qs_factor(struct file *file,
+				      const char *buffer,
+				      unsigned long count, void *data);
+
+
+
+#else
+/* LPROCFS is not defined */
+
+#define proc_lustre_root NULL
+
+static inline void lprocfs_counter_add(struct lprocfs_stats *stats,
+				       int index, long amount)
+{ return; }
+static inline void lprocfs_counter_incr(struct lprocfs_stats *stats,
+					int index)
+{ return; }
+static inline void lprocfs_counter_sub(struct lprocfs_stats *stats,
+				       int index, long amount)
+{ return; }
+static inline void lprocfs_counter_decr(struct lprocfs_stats *stats,
+					int index)
+{ return; }
+static inline void lprocfs_counter_init(struct lprocfs_stats *stats,
+					int index, unsigned conf,
+					const char *name, const char *units)
+{ return; }
+
+static inline __u64 lc_read_helper(struct lprocfs_counter *lc,
+				   enum lprocfs_fields_flags field)
+{ return 0; }
+
+/* NB: we return !NULL to satisfy error checker */
+static inline struct lprocfs_stats *
+lprocfs_alloc_stats(unsigned int num, enum lprocfs_stats_flags flags)
+{ return (struct lprocfs_stats *)1; }
+static inline void lprocfs_clear_stats(struct lprocfs_stats *stats)
+{ return; }
+static inline void lprocfs_free_stats(struct lprocfs_stats **stats)
+{ return; }
+static inline int lprocfs_register_stats(proc_dir_entry_t *root,
+					 const char *name,
+					 struct lprocfs_stats *stats)
+{ return 0; }
+static inline void lprocfs_init_ops_stats(int num_private_stats,
+					  struct lprocfs_stats *stats)
+{ return; }
+static inline void lprocfs_init_mps_stats(int num_private_stats,
+					  struct lprocfs_stats *stats)
+{ return; }
+static inline void lprocfs_init_ldlm_stats(struct lprocfs_stats *ldlm_stats)
+{ return; }
+static inline int lprocfs_alloc_obd_stats(struct obd_device *obddev,
+					  unsigned int num_private_stats)
+{ return 0; }
+static inline int lprocfs_alloc_md_stats(struct obd_device *obddev,
+					 unsigned int num_private_stats)
+{ return 0; }
+static inline void lprocfs_free_obd_stats(struct obd_device *obddev)
+{ return; }
+static inline void lprocfs_free_md_stats(struct obd_device *obddev)
+{ return; }
+
+struct obd_export;
+static inline int lprocfs_add_clear_entry(struct obd_export *exp)
+{ return 0; }
+static inline int lprocfs_exp_setup(struct obd_export *exp,lnet_nid_t *peer_nid,
+				    int *newnid)
+{ return 0; }
+static inline int lprocfs_exp_cleanup(struct obd_export *exp)
+{ return 0; }
+static inline proc_dir_entry_t *
+lprocfs_add_simple(struct proc_dir_entry *root, char *name,
+		   void *data, struct file_operations *fops)
+{return 0; }
+static inline struct proc_dir_entry *
+lprocfs_add_symlink(const char *name, struct proc_dir_entry *parent,
+		    const char *format, ...)
+{return NULL; }
+static inline void lprocfs_free_per_client_stats(struct obd_device *obd)
+{ return; }
+static inline
+int lprocfs_nid_stats_clear_write(struct file *file, const char *buffer,
+				  unsigned long count, void *data)
+{return count;}
+static inline
+int lprocfs_nid_stats_clear_read(struct seq_file *m, void *data)
+{ return 0; }
+
+static inline proc_dir_entry_t *
+lprocfs_register(const char *name, proc_dir_entry_t *parent,
+		 struct lprocfs_vars *list, void *data)
+{ return NULL; }
+static inline int lprocfs_add_vars(proc_dir_entry_t *root,
+				   struct lprocfs_vars *var,
+				   void *data)
+{ return 0; }
+static inline void lprocfs_remove(proc_dir_entry_t **root)
+{ return; }
+static inline void lprocfs_remove_proc_entry(const char *name,
+					     struct proc_dir_entry *parent)
+{ return; }
+static inline int lprocfs_obd_setup(struct obd_device *dev,
+				    struct lprocfs_vars *list)
+{ return 0; }
+static inline int lprocfs_obd_cleanup(struct obd_device *dev)
+{ return 0; }
+static inline int lprocfs_rd_u64(struct seq_file *m, void *data)
+{ return 0; }
+static inline int lprocfs_rd_uuid(struct seq_file *m, void *data)
+{ return 0; }
+static inline int lprocfs_rd_name(struct seq_file *m, void *data)
+{ return 0; }
+static inline int lprocfs_rd_server_uuid(struct seq_file *m, void *data)
+{ return 0; }
+static inline int lprocfs_rd_conn_uuid(struct seq_file *m, void *data)
+{ return 0; }
+static inline int lprocfs_rd_import(struct seq_file *m, void *data)
+{ return 0; }
+static inline int lprocfs_rd_pinger_recov(struct seq_file *m, void *n)
+{ return 0; }
+static inline int lprocfs_rd_state(struct seq_file *m, void *data)
+{ return 0; }
+static inline int lprocfs_rd_connect_flags(struct seq_file *m, void *data)
+{ return 0; }
+static inline int lprocfs_rd_num_exports(struct seq_file *m, void *data)
+{ return 0; }
+extern inline int lprocfs_rd_numrefs(struct seq_file *m, void *data)
+{ return 0; }
+struct adaptive_timeout;
+static inline int lprocfs_at_hist_helper(struct seq_file *m,
+					 struct adaptive_timeout *at)
+{ return 0; }
+static inline int lprocfs_rd_timeouts(struct seq_file *m, void *data)
+{ return 0; }
+static inline int lprocfs_wr_timeouts(struct file *file,
+				      const char *buffer,
+				      unsigned long count, void *data)
+{ return 0; }
+static inline int lprocfs_wr_evict_client(struct file *file, const char *buffer,
+				    size_t count, loff_t *off)
+{ return 0; }
+static inline int lprocfs_wr_ping(struct file *file, const char *buffer,
+			   size_t count, loff_t *off)
+{ return 0; }
+static inline int lprocfs_wr_import(struct file *file, const char *buffer,
+			      size_t count, loff_t *off)
+{ return 0; }
+static inline int lprocfs_wr_pinger_recov(struct file *file, const char *buffer,
+					size_t count, loff_t *off)
+{ return 0; }
+
+/* Statfs helpers */
+static inline
+int lprocfs_rd_blksize(struct seq_file *m, void *data)
+{ return 0; }
+static inline
+int lprocfs_rd_kbytestotal(struct seq_file *m, void *data)
+{ return 0; }
+static inline
+int lprocfs_rd_kbytesfree(struct seq_file *m, void *data)
+{ return 0; }
+static inline
+int lprocfs_rd_kbytesavail(struct seq_file *m, void *data)
+{ return 0; }
+static inline
+int lprocfs_rd_filestotal(struct seq_file *m, void *data)
+{ return 0; }
+static inline
+int lprocfs_rd_filesfree(struct seq_file *m, void *data)
+{ return 0; }
+static inline
+void lprocfs_oh_tally(struct obd_histogram *oh, unsigned int value)
+{ return; }
+static inline
+void lprocfs_oh_tally_log2(struct obd_histogram *oh, unsigned int value)
+{ return; }
+static inline
+void lprocfs_oh_clear(struct obd_histogram *oh)
+{ return; }
+static inline
+unsigned long lprocfs_oh_sum(struct obd_histogram *oh)
+{ return 0; }
+static inline
+void lprocfs_stats_collect(struct lprocfs_stats *stats, int idx,
+			   struct lprocfs_counter *cnt)
+{ return; }
+static inline
+__u64 lprocfs_stats_collector(struct lprocfs_stats *stats, int idx,
+			       enum lprocfs_fields_flags field)
+{ return (__u64)0; }
+
+#define LPROC_SEQ_FOPS_RO(name)
+#define LPROC_SEQ_FOPS(name)
+#define LPROC_SEQ_FOPS_RO_TYPE(name, type)
+#define LPROC_SEQ_FOPS_RW_TYPE(name, type)
+#define LPROC_SEQ_FOPS_WR_ONLY(name, type)
+
+/* lprocfs_jobstats.c */
+static inline
+int lprocfs_job_stats_log(struct obd_device *obd, char *jobid, int event,
+			  long amount)
+{ return 0; }
+static inline
+void lprocfs_job_stats_fini(struct obd_device *obd)
+{ return; }
+static inline
+int lprocfs_job_stats_init(struct obd_device *obd, int cntr_num,
+			   cntr_init_callback fn)
+{ return 0; }
+
+
+/* lproc_ptlrpc.c */
+#define target_print_req NULL
+
+#endif /* LPROCFS */
+
+#endif /* LPROCFS_SNMP_H */
diff --git a/drivers/staging/lustre/lustre/include/lu_object.h b/drivers/staging/lustre/lustre/include/lu_object.h
new file mode 100644
index 000000000000..d40ad81b4eb2
--- /dev/null
+++ b/drivers/staging/lustre/lustre/include/lu_object.h
@@ -0,0 +1,1346 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ */
+
+#ifndef __LUSTRE_LU_OBJECT_H
+#define __LUSTRE_LU_OBJECT_H
+
+#include <stdarg.h>
+#include <linux/libcfs/libcfs.h>
+#include <lustre/lustre_idl.h>
+#include <lu_ref.h>
+
+struct seq_file;
+struct proc_dir_entry;
+struct lustre_cfg;
+struct lprocfs_stats;
+
+/** \defgroup lu lu
+ * lu_* data-types represent server-side entities shared by data and meta-data
+ * stacks.
+ *
+ * Design goals:
+ *
+ * -# support for layering.
+ *
+ *     Server side object is split into layers, one per device in the
+ *     corresponding device stack. Individual layer is represented by struct
+ *     lu_object. Compound layered object --- by struct lu_object_header. Most
+ *     interface functions take lu_object as an argument and operate on the
+ *     whole compound object. This decision was made due to the following
+ *     reasons:
+ *
+ *	- it's envisaged that lu_object will be used much more often than
+ *	lu_object_header;
+ *
+ *	- we want lower (non-top) layers to be able to initiate operations
+ *	on the whole object.
+ *
+ *     Generic code supports layering more complex than simple stacking, e.g.,
+ *     it is possible that at some layer object "spawns" multiple sub-objects
+ *     on the lower layer.
+ *
+ * -# fid-based identification.
+ *
+ *     Compound object is uniquely identified by its fid. Objects are indexed
+ *     by their fids (hash table is used for index).
+ *
+ * -# caching and life-cycle management.
+ *
+ *     Object's life-time is controlled by reference counting. When reference
+ *     count drops to 0, object is returned to cache. Cached objects still
+ *     retain their identity (i.e., fid), and can be recovered from cache.
+ *
+ *     Objects are kept in the global LRU list, and lu_site_purge() function
+ *     can be used to reclaim given number of unused objects from the tail of
+ *     the LRU.
+ *
+ * -# avoiding recursion.
+ *
+ *     Generic code tries to replace recursion through layers by iterations
+ *     where possible. Additionally to the end of reducing stack consumption,
+ *     data, when practically possible, are allocated through lu_context_key
+ *     interface rather than on stack.
+ * @{
+ */
+
+struct lu_site;
+struct lu_object;
+struct lu_device;
+struct lu_object_header;
+struct lu_context;
+struct lu_env;
+
+/**
+ * Operations common for data and meta-data devices.
+ */
+struct lu_device_operations {
+	/**
+	 * Allocate object for the given device (without lower-layer
+	 * parts). This is called by lu_object_operations::loo_object_init()
+	 * from the parent layer, and should setup at least lu_object::lo_dev
+	 * and lu_object::lo_ops fields of resulting lu_object.
+	 *
+	 * Object creation protocol.
+	 *
+	 * Due to design goal of avoiding recursion, object creation (see
+	 * lu_object_alloc()) is somewhat involved:
+	 *
+	 *  - first, lu_device_operations::ldo_object_alloc() method of the
+	 *  top-level device in the stack is called. It should allocate top
+	 *  level object (including lu_object_header), but without any
+	 *  lower-layer sub-object(s).
+	 *
+	 *  - then lu_object_alloc() sets fid in the header of newly created
+	 *  object.
+	 *
+	 *  - then lu_object_operations::loo_object_init() is called. It has
+	 *  to allocate lower-layer object(s). To do this,
+	 *  lu_object_operations::loo_object_init() calls ldo_object_alloc()
+	 *  of the lower-layer device(s).
+	 *
+	 *  - for all new objects allocated by
+	 *  lu_object_operations::loo_object_init() (and inserted into object
+	 *  stack), lu_object_operations::loo_object_init() is called again
+	 *  repeatedly, until no new objects are created.
+	 *
+	 * \post ergo(!IS_ERR(result), result->lo_dev == d &&
+	 *			     result->lo_ops != NULL);
+	 */
+	struct lu_object *(*ldo_object_alloc)(const struct lu_env *env,
+					      const struct lu_object_header *h,
+					      struct lu_device *d);
+	/**
+	 * process config specific for device.
+	 */
+	int (*ldo_process_config)(const struct lu_env *env,
+				  struct lu_device *, struct lustre_cfg *);
+	int (*ldo_recovery_complete)(const struct lu_env *,
+				     struct lu_device *);
+
+	/**
+	 * initialize local objects for device. this method called after layer has
+	 * been initialized (after LCFG_SETUP stage) and before it starts serving
+	 * user requests.
+	 */
+
+	int (*ldo_prepare)(const struct lu_env *,
+			   struct lu_device *parent,
+			   struct lu_device *dev);
+
+};
+
+/**
+ * For lu_object_conf flags
+ */
+typedef enum {
+	/* This is a new object to be allocated, or the file
+	 * corresponding to the object does not exists. */
+	LOC_F_NEW	= 0x00000001,
+} loc_flags_t;
+
+/**
+ * Object configuration, describing particulars of object being created. On
+ * server this is not used, as server objects are full identified by fid. On
+ * client configuration contains struct lustre_md.
+ */
+struct lu_object_conf {
+	/**
+	 * Some hints for obj find and alloc.
+	 */
+	loc_flags_t     loc_flags;
+};
+
+/**
+ * Type of "printer" function used by lu_object_operations::loo_object_print()
+ * method.
+ *
+ * Printer function is needed to provide some flexibility in (semi-)debugging
+ * output: possible implementations: printk, CDEBUG, sysfs/seq_file
+ */
+typedef int (*lu_printer_t)(const struct lu_env *env,
+			    void *cookie, const char *format, ...)
+	__attribute__ ((format (printf, 3, 4)));
+
+/**
+ * Operations specific for particular lu_object.
+ */
+struct lu_object_operations {
+
+	/**
+	 * Allocate lower-layer parts of the object by calling
+	 * lu_device_operations::ldo_object_alloc() of the corresponding
+	 * underlying device.
+	 *
+	 * This method is called once for each object inserted into object
+	 * stack. It's responsibility of this method to insert lower-layer
+	 * object(s) it create into appropriate places of object stack.
+	 */
+	int (*loo_object_init)(const struct lu_env *env,
+			       struct lu_object *o,
+			       const struct lu_object_conf *conf);
+	/**
+	 * Called (in top-to-bottom order) during object allocation after all
+	 * layers were allocated and initialized. Can be used to perform
+	 * initialization depending on lower layers.
+	 */
+	int (*loo_object_start)(const struct lu_env *env,
+				struct lu_object *o);
+	/**
+	 * Called before lu_object_operations::loo_object_free() to signal
+	 * that object is being destroyed. Dual to
+	 * lu_object_operations::loo_object_init().
+	 */
+	void (*loo_object_delete)(const struct lu_env *env,
+				  struct lu_object *o);
+	/**
+	 * Dual to lu_device_operations::ldo_object_alloc(). Called when
+	 * object is removed from memory.
+	 */
+	void (*loo_object_free)(const struct lu_env *env,
+				struct lu_object *o);
+	/**
+	 * Called when last active reference to the object is released (and
+	 * object returns to the cache). This method is optional.
+	 */
+	void (*loo_object_release)(const struct lu_env *env,
+				   struct lu_object *o);
+	/**
+	 * Optional debugging helper. Print given object.
+	 */
+	int (*loo_object_print)(const struct lu_env *env, void *cookie,
+				lu_printer_t p, const struct lu_object *o);
+	/**
+	 * Optional debugging method. Returns true iff method is internally
+	 * consistent.
+	 */
+	int (*loo_object_invariant)(const struct lu_object *o);
+};
+
+/**
+ * Type of lu_device.
+ */
+struct lu_device_type;
+
+/**
+ * Device: a layer in the server side abstraction stacking.
+ */
+struct lu_device {
+	/**
+	 * reference count. This is incremented, in particular, on each object
+	 * created at this layer.
+	 *
+	 * \todo XXX which means that atomic_t is probably too small.
+	 */
+	atomic_t		       ld_ref;
+	/**
+	 * Pointer to device type. Never modified once set.
+	 */
+	struct lu_device_type       *ld_type;
+	/**
+	 * Operation vector for this device.
+	 */
+	const struct lu_device_operations *ld_ops;
+	/**
+	 * Stack this device belongs to.
+	 */
+	struct lu_site		    *ld_site;
+	struct proc_dir_entry	     *ld_proc_entry;
+
+	/** \todo XXX: temporary back pointer into obd. */
+	struct obd_device		 *ld_obd;
+	/**
+	 * A list of references to this object, for debugging.
+	 */
+	struct lu_ref		      ld_reference;
+	/**
+	 * Link the device to the site.
+	 **/
+	struct list_head			 ld_linkage;
+};
+
+struct lu_device_type_operations;
+
+/**
+ * Tag bits for device type. They are used to distinguish certain groups of
+ * device types.
+ */
+enum lu_device_tag {
+	/** this is meta-data device */
+	LU_DEVICE_MD = (1 << 0),
+	/** this is data device */
+	LU_DEVICE_DT = (1 << 1),
+	/** data device in the client stack */
+	LU_DEVICE_CL = (1 << 2)
+};
+
+/**
+ * Type of device.
+ */
+struct lu_device_type {
+	/**
+	 * Tag bits. Taken from enum lu_device_tag. Never modified once set.
+	 */
+	__u32				   ldt_tags;
+	/**
+	 * Name of this class. Unique system-wide. Never modified once set.
+	 */
+	char				   *ldt_name;
+	/**
+	 * Operations for this type.
+	 */
+	const struct lu_device_type_operations *ldt_ops;
+	/**
+	 * \todo XXX: temporary pointer to associated obd_type.
+	 */
+	struct obd_type			*ldt_obd_type;
+	/**
+	 * \todo XXX: temporary: context tags used by obd_*() calls.
+	 */
+	__u32				   ldt_ctx_tags;
+	/**
+	 * Number of existing device type instances.
+	 */
+	unsigned				ldt_device_nr;
+	/**
+	 * Linkage into a global list of all device types.
+	 *
+	 * \see lu_device_types.
+	 */
+	struct list_head			      ldt_linkage;
+};
+
+/**
+ * Operations on a device type.
+ */
+struct lu_device_type_operations {
+	/**
+	 * Allocate new device.
+	 */
+	struct lu_device *(*ldto_device_alloc)(const struct lu_env *env,
+					       struct lu_device_type *t,
+					       struct lustre_cfg *lcfg);
+	/**
+	 * Free device. Dual to
+	 * lu_device_type_operations::ldto_device_alloc(). Returns pointer to
+	 * the next device in the stack.
+	 */
+	struct lu_device *(*ldto_device_free)(const struct lu_env *,
+					      struct lu_device *);
+
+	/**
+	 * Initialize the devices after allocation
+	 */
+	int  (*ldto_device_init)(const struct lu_env *env,
+				 struct lu_device *, const char *,
+				 struct lu_device *);
+	/**
+	 * Finalize device. Dual to
+	 * lu_device_type_operations::ldto_device_init(). Returns pointer to
+	 * the next device in the stack.
+	 */
+	struct lu_device *(*ldto_device_fini)(const struct lu_env *env,
+					      struct lu_device *);
+	/**
+	 * Initialize device type. This is called on module load.
+	 */
+	int  (*ldto_init)(struct lu_device_type *t);
+	/**
+	 * Finalize device type. Dual to
+	 * lu_device_type_operations::ldto_init(). Called on module unload.
+	 */
+	void (*ldto_fini)(struct lu_device_type *t);
+	/**
+	 * Called when the first device is created.
+	 */
+	void (*ldto_start)(struct lu_device_type *t);
+	/**
+	 * Called when number of devices drops to 0.
+	 */
+	void (*ldto_stop)(struct lu_device_type *t);
+};
+
+static inline int lu_device_is_md(const struct lu_device *d)
+{
+	return ergo(d != NULL, d->ld_type->ldt_tags & LU_DEVICE_MD);
+}
+
+/**
+ * Flags for the object layers.
+ */
+enum lu_object_flags {
+	/**
+	 * this flags is set if lu_object_operations::loo_object_init() has
+	 * been called for this layer. Used by lu_object_alloc().
+	 */
+	LU_OBJECT_ALLOCATED = (1 << 0)
+};
+
+/**
+ * Common object attributes.
+ */
+struct lu_attr {
+	/** size in bytes */
+	__u64	  la_size;
+	/** modification time in seconds since Epoch */
+	obd_time       la_mtime;
+	/** access time in seconds since Epoch */
+	obd_time       la_atime;
+	/** change time in seconds since Epoch */
+	obd_time       la_ctime;
+	/** 512-byte blocks allocated to object */
+	__u64	  la_blocks;
+	/** permission bits and file type */
+	__u32	  la_mode;
+	/** owner id */
+	__u32	  la_uid;
+	/** group id */
+	__u32	  la_gid;
+	/** object flags */
+	__u32	  la_flags;
+	/** number of persistent references to this object */
+	__u32	  la_nlink;
+	/** blk bits of the object*/
+	__u32	  la_blkbits;
+	/** blk size of the object*/
+	__u32	  la_blksize;
+	/** real device */
+	__u32	  la_rdev;
+	/**
+	 * valid bits
+	 *
+	 * \see enum la_valid
+	 */
+	__u64	  la_valid;
+};
+
+/** Bit-mask of valid attributes */
+enum la_valid {
+	LA_ATIME = 1 << 0,
+	LA_MTIME = 1 << 1,
+	LA_CTIME = 1 << 2,
+	LA_SIZE  = 1 << 3,
+	LA_MODE  = 1 << 4,
+	LA_UID   = 1 << 5,
+	LA_GID   = 1 << 6,
+	LA_BLOCKS = 1 << 7,
+	LA_TYPE   = 1 << 8,
+	LA_FLAGS  = 1 << 9,
+	LA_NLINK  = 1 << 10,
+	LA_RDEV   = 1 << 11,
+	LA_BLKSIZE = 1 << 12,
+	LA_KILL_SUID = 1 << 13,
+	LA_KILL_SGID = 1 << 14,
+};
+
+/**
+ * Layer in the layered object.
+ */
+struct lu_object {
+	/**
+	 * Header for this object.
+	 */
+	struct lu_object_header	   *lo_header;
+	/**
+	 * Device for this layer.
+	 */
+	struct lu_device		  *lo_dev;
+	/**
+	 * Operations for this object.
+	 */
+	const struct lu_object_operations *lo_ops;
+	/**
+	 * Linkage into list of all layers.
+	 */
+	struct list_head			 lo_linkage;
+	/**
+	 * Depth. Top level layer depth is 0.
+	 */
+	int				lo_depth;
+	/**
+	 * Flags from enum lu_object_flags.
+	 */
+	__u32					lo_flags;
+	/**
+	 * Link to the device, for debugging.
+	 */
+	struct lu_ref_link		*lo_dev_ref;
+};
+
+enum lu_object_header_flags {
+	/**
+	 * Don't keep this object in cache. Object will be destroyed as soon
+	 * as last reference to it is released. This flag cannot be cleared
+	 * once set.
+	 */
+	LU_OBJECT_HEARD_BANSHEE = 0,
+	/**
+	 * Mark this object has already been taken out of cache.
+	 */
+	LU_OBJECT_UNHASHED = 1
+};
+
+enum lu_object_header_attr {
+	LOHA_EXISTS   = 1 << 0,
+	LOHA_REMOTE   = 1 << 1,
+	/**
+	 * UNIX file type is stored in S_IFMT bits.
+	 */
+	LOHA_FT_START = 001 << 12, /**< S_IFIFO */
+	LOHA_FT_END   = 017 << 12, /**< S_IFMT */
+};
+
+/**
+ * "Compound" object, consisting of multiple layers.
+ *
+ * Compound object with given fid is unique with given lu_site.
+ *
+ * Note, that object does *not* necessary correspond to the real object in the
+ * persistent storage: object is an anchor for locking and method calling, so
+ * it is created for things like not-yet-existing child created by mkdir or
+ * create calls. lu_object_operations::loo_exists() can be used to check
+ * whether object is backed by persistent storage entity.
+ */
+struct lu_object_header {
+	/**
+	 * Object flags from enum lu_object_header_flags. Set and checked
+	 * atomically.
+	 */
+	unsigned long	  loh_flags;
+	/**
+	 * Object reference count. Protected by lu_site::ls_guard.
+	 */
+	atomic_t	   loh_ref;
+	/**
+	 * Fid, uniquely identifying this object.
+	 */
+	struct lu_fid	  loh_fid;
+	/**
+	 * Common object attributes, cached for efficiency. From enum
+	 * lu_object_header_attr.
+	 */
+	__u32		  loh_attr;
+	/**
+	 * Linkage into per-site hash table. Protected by lu_site::ls_guard.
+	 */
+	struct hlist_node       loh_hash;
+	/**
+	 * Linkage into per-site LRU list. Protected by lu_site::ls_guard.
+	 */
+	struct list_head	     loh_lru;
+	/**
+	 * Linkage into list of layers. Never modified once set (except lately
+	 * during object destruction). No locking is necessary.
+	 */
+	struct list_head	     loh_layers;
+	/**
+	 * A list of references to this object, for debugging.
+	 */
+	struct lu_ref	  loh_reference;
+};
+
+struct fld;
+
+struct lu_site_bkt_data {
+	/**
+	 * number of busy object on this bucket
+	 */
+	long		      lsb_busy;
+	/**
+	 * LRU list, updated on each access to object. Protected by
+	 * bucket lock of lu_site::ls_obj_hash.
+	 *
+	 * "Cold" end of LRU is lu_site::ls_lru.next. Accessed object are
+	 * moved to the lu_site::ls_lru.prev (this is due to the non-existence
+	 * of list_for_each_entry_safe_reverse()).
+	 */
+	struct list_head		lsb_lru;
+	/**
+	 * Wait-queue signaled when an object in this site is ultimately
+	 * destroyed (lu_object_free()). It is used by lu_object_find() to
+	 * wait before re-trying when object in the process of destruction is
+	 * found in the hash table.
+	 *
+	 * \see htable_lookup().
+	 */
+	wait_queue_head_t	       lsb_marche_funebre;
+};
+
+enum {
+	LU_SS_CREATED	 = 0,
+	LU_SS_CACHE_HIT,
+	LU_SS_CACHE_MISS,
+	LU_SS_CACHE_RACE,
+	LU_SS_CACHE_DEATH_RACE,
+	LU_SS_LRU_PURGED,
+	LU_SS_LAST_STAT
+};
+
+/**
+ * lu_site is a "compartment" within which objects are unique, and LRU
+ * discipline is maintained.
+ *
+ * lu_site exists so that multiple layered stacks can co-exist in the same
+ * address space.
+ *
+ * lu_site has the same relation to lu_device as lu_object_header to
+ * lu_object.
+ */
+struct lu_site {
+	/**
+	 * objects hash table
+	 */
+	cfs_hash_t	       *ls_obj_hash;
+	/**
+	 * index of bucket on hash table while purging
+	 */
+	int		       ls_purge_start;
+	/**
+	 * Top-level device for this stack.
+	 */
+	struct lu_device	 *ls_top_dev;
+	/**
+	 * Bottom-level device for this stack
+	 */
+	struct lu_device	*ls_bottom_dev;
+	/**
+	 * Linkage into global list of sites.
+	 */
+	struct list_head		ls_linkage;
+	/**
+	 * List for lu device for this site, protected
+	 * by ls_ld_lock.
+	 **/
+	struct list_head		ls_ld_linkage;
+	spinlock_t		ls_ld_lock;
+
+	/**
+	 * lu_site stats
+	 */
+	struct lprocfs_stats	*ls_stats;
+	/**
+	 * XXX: a hack! fld has to find md_site via site, remove when possible
+	 */
+	struct seq_server_site	*ld_seq_site;
+};
+
+static inline struct lu_site_bkt_data *
+lu_site_bkt_from_fid(struct lu_site *site, struct lu_fid *fid)
+{
+	cfs_hash_bd_t bd;
+
+	cfs_hash_bd_get(site->ls_obj_hash, fid, &bd);
+	return cfs_hash_bd_extra_get(site->ls_obj_hash, &bd);
+}
+
+/** \name ctors
+ * Constructors/destructors.
+ * @{
+ */
+
+int  lu_site_init	 (struct lu_site *s, struct lu_device *d);
+void lu_site_fini	 (struct lu_site *s);
+int  lu_site_init_finish  (struct lu_site *s);
+void lu_stack_fini	(const struct lu_env *env, struct lu_device *top);
+void lu_device_get	(struct lu_device *d);
+void lu_device_put	(struct lu_device *d);
+int  lu_device_init       (struct lu_device *d, struct lu_device_type *t);
+void lu_device_fini       (struct lu_device *d);
+int  lu_object_header_init(struct lu_object_header *h);
+void lu_object_header_fini(struct lu_object_header *h);
+int  lu_object_init       (struct lu_object *o,
+			   struct lu_object_header *h, struct lu_device *d);
+void lu_object_fini       (struct lu_object *o);
+void lu_object_add_top    (struct lu_object_header *h, struct lu_object *o);
+void lu_object_add	(struct lu_object *before, struct lu_object *o);
+
+void lu_dev_add_linkage(struct lu_site *s, struct lu_device *d);
+void lu_dev_del_linkage(struct lu_site *s, struct lu_device *d);
+
+/**
+ * Helpers to initialize and finalize device types.
+ */
+
+int  lu_device_type_init(struct lu_device_type *ldt);
+void lu_device_type_fini(struct lu_device_type *ldt);
+void lu_types_stop(void);
+
+/** @} ctors */
+
+/** \name caching
+ * Caching and reference counting.
+ * @{
+ */
+
+/**
+ * Acquire additional reference to the given object. This function is used to
+ * attain additional reference. To acquire initial reference use
+ * lu_object_find().
+ */
+static inline void lu_object_get(struct lu_object *o)
+{
+	LASSERT(atomic_read(&o->lo_header->loh_ref) > 0);
+	atomic_inc(&o->lo_header->loh_ref);
+}
+
+/**
+ * Return true of object will not be cached after last reference to it is
+ * released.
+ */
+static inline int lu_object_is_dying(const struct lu_object_header *h)
+{
+	return test_bit(LU_OBJECT_HEARD_BANSHEE, &h->loh_flags);
+}
+
+void lu_object_put(const struct lu_env *env, struct lu_object *o);
+void lu_object_put_nocache(const struct lu_env *env, struct lu_object *o);
+void lu_object_unhash(const struct lu_env *env, struct lu_object *o);
+
+int lu_site_purge(const struct lu_env *env, struct lu_site *s, int nr);
+
+void lu_site_print(const struct lu_env *env, struct lu_site *s, void *cookie,
+		   lu_printer_t printer);
+struct lu_object *lu_object_find(const struct lu_env *env,
+				 struct lu_device *dev, const struct lu_fid *f,
+				 const struct lu_object_conf *conf);
+struct lu_object *lu_object_find_at(const struct lu_env *env,
+				    struct lu_device *dev,
+				    const struct lu_fid *f,
+				    const struct lu_object_conf *conf);
+struct lu_object *lu_object_find_slice(const struct lu_env *env,
+				       struct lu_device *dev,
+				       const struct lu_fid *f,
+				       const struct lu_object_conf *conf);
+/** @} caching */
+
+/** \name helpers
+ * Helpers.
+ * @{
+ */
+
+/**
+ * First (topmost) sub-object of given compound object
+ */
+static inline struct lu_object *lu_object_top(struct lu_object_header *h)
+{
+	LASSERT(!list_empty(&h->loh_layers));
+	return container_of0(h->loh_layers.next, struct lu_object, lo_linkage);
+}
+
+/**
+ * Next sub-object in the layering
+ */
+static inline struct lu_object *lu_object_next(const struct lu_object *o)
+{
+	return container_of0(o->lo_linkage.next, struct lu_object, lo_linkage);
+}
+
+/**
+ * Pointer to the fid of this object.
+ */
+static inline const struct lu_fid *lu_object_fid(const struct lu_object *o)
+{
+	return &o->lo_header->loh_fid;
+}
+
+/**
+ * return device operations vector for this object
+ */
+static const inline struct lu_device_operations *
+lu_object_ops(const struct lu_object *o)
+{
+	return o->lo_dev->ld_ops;
+}
+
+/**
+ * Given a compound object, find its slice, corresponding to the device type
+ * \a dtype.
+ */
+struct lu_object *lu_object_locate(struct lu_object_header *h,
+				   const struct lu_device_type *dtype);
+
+/**
+ * Printer function emitting messages through libcfs_debug_msg().
+ */
+int lu_cdebug_printer(const struct lu_env *env,
+		      void *cookie, const char *format, ...);
+
+/**
+ * Print object description followed by a user-supplied message.
+ */
+#define LU_OBJECT_DEBUG(mask, env, object, format, ...)		   \
+do {								      \
+	LIBCFS_DEBUG_MSG_DATA_DECL(msgdata, mask, NULL);		  \
+									  \
+	if (cfs_cdebug_show(mask, DEBUG_SUBSYSTEM)) {		     \
+		lu_object_print(env, &msgdata, lu_cdebug_printer, object);\
+		CDEBUG(mask, format , ## __VA_ARGS__);		    \
+	}								 \
+} while (0)
+
+/**
+ * Print short object description followed by a user-supplied message.
+ */
+#define LU_OBJECT_HEADER(mask, env, object, format, ...)		\
+do {								    \
+	LIBCFS_DEBUG_MSG_DATA_DECL(msgdata, mask, NULL);		\
+									\
+	if (cfs_cdebug_show(mask, DEBUG_SUBSYSTEM)) {		   \
+		lu_object_header_print(env, &msgdata, lu_cdebug_printer,\
+				       (object)->lo_header);	    \
+		lu_cdebug_printer(env, &msgdata, "\n");		 \
+		CDEBUG(mask, format , ## __VA_ARGS__);		  \
+	}							       \
+} while (0)
+
+void lu_object_print       (const struct lu_env *env, void *cookie,
+			    lu_printer_t printer, const struct lu_object *o);
+void lu_object_header_print(const struct lu_env *env, void *cookie,
+			    lu_printer_t printer,
+			    const struct lu_object_header *hdr);
+
+/**
+ * Check object consistency.
+ */
+int lu_object_invariant(const struct lu_object *o);
+
+
+/**
+ * Check whether object exists, no matter on local or remote storage.
+ * Note: LOHA_EXISTS will be set once some one created the object,
+ * and it does not needs to be committed to storage.
+ */
+#define lu_object_exists(o) ((o)->lo_header->loh_attr & LOHA_EXISTS)
+
+/**
+ * Check whether object on the remote storage.
+ */
+#define lu_object_remote(o) unlikely((o)->lo_header->loh_attr & LOHA_REMOTE)
+
+static inline int lu_object_assert_exists(const struct lu_object *o)
+{
+	return lu_object_exists(o);
+}
+
+static inline int lu_object_assert_not_exists(const struct lu_object *o)
+{
+	return !lu_object_exists(o);
+}
+
+/**
+ * Attr of this object.
+ */
+static inline __u32 lu_object_attr(const struct lu_object *o)
+{
+	LASSERT(lu_object_exists(o) != 0);
+	return o->lo_header->loh_attr;
+}
+
+static inline struct lu_ref_link *lu_object_ref_add(struct lu_object *o,
+						    const char *scope,
+						    const void *source)
+{
+	return lu_ref_add(&o->lo_header->loh_reference, scope, source);
+}
+
+static inline void lu_object_ref_del(struct lu_object *o,
+				     const char *scope, const void *source)
+{
+	lu_ref_del(&o->lo_header->loh_reference, scope, source);
+}
+
+static inline void lu_object_ref_del_at(struct lu_object *o,
+					struct lu_ref_link *link,
+					const char *scope, const void *source)
+{
+	lu_ref_del_at(&o->lo_header->loh_reference, link, scope, source);
+}
+
+/** input params, should be filled out by mdt */
+struct lu_rdpg {
+	/** hash */
+	__u64		   rp_hash;
+	/** count in bytes */
+	unsigned int	    rp_count;
+	/** number of pages */
+	unsigned int	    rp_npages;
+	/** requested attr */
+	__u32		   rp_attrs;
+	/** pointers to pages */
+	struct page	   **rp_pages;
+};
+
+enum lu_xattr_flags {
+	LU_XATTR_REPLACE = (1 << 0),
+	LU_XATTR_CREATE  = (1 << 1)
+};
+
+/** @} helpers */
+
+/** \name lu_context
+ * @{ */
+
+/** For lu_context health-checks */
+enum lu_context_state {
+	LCS_INITIALIZED = 1,
+	LCS_ENTERED,
+	LCS_LEFT,
+	LCS_FINALIZED
+};
+
+/**
+ * lu_context. Execution context for lu_object methods. Currently associated
+ * with thread.
+ *
+ * All lu_object methods, except device and device type methods (called during
+ * system initialization and shutdown) are executed "within" some
+ * lu_context. This means, that pointer to some "current" lu_context is passed
+ * as an argument to all methods.
+ *
+ * All service ptlrpc threads create lu_context as part of their
+ * initialization. It is possible to create "stand-alone" context for other
+ * execution environments (like system calls).
+ *
+ * lu_object methods mainly use lu_context through lu_context_key interface
+ * that allows each layer to associate arbitrary pieces of data with each
+ * context (see pthread_key_create(3) for similar interface).
+ *
+ * On a client, lu_context is bound to a thread, see cl_env_get().
+ *
+ * \see lu_context_key
+ */
+struct lu_context {
+	/**
+	 * lu_context is used on the client side too. Yet we don't want to
+	 * allocate values of server-side keys for the client contexts and
+	 * vice versa.
+	 *
+	 * To achieve this, set of tags in introduced. Contexts and keys are
+	 * marked with tags. Key value are created only for context whose set
+	 * of tags has non-empty intersection with one for key. Tags are taken
+	 * from enum lu_context_tag.
+	 */
+	__u32		  lc_tags;
+	enum lu_context_state  lc_state;
+	/**
+	 * Pointer to the home service thread. NULL for other execution
+	 * contexts.
+	 */
+	struct ptlrpc_thread  *lc_thread;
+	/**
+	 * Pointer to an array with key values. Internal implementation
+	 * detail.
+	 */
+	void		 **lc_value;
+	/**
+	 * Linkage into a list of all remembered contexts. Only
+	 * `non-transient' contexts, i.e., ones created for service threads
+	 * are placed here.
+	 */
+	struct list_head	     lc_remember;
+	/**
+	 * Version counter used to skip calls to lu_context_refill() when no
+	 * keys were registered.
+	 */
+	unsigned	       lc_version;
+	/**
+	 * Debugging cookie.
+	 */
+	unsigned	       lc_cookie;
+};
+
+/**
+ * lu_context_key interface. Similar to pthread_key.
+ */
+
+enum lu_context_tag {
+	/**
+	 * Thread on md server
+	 */
+	LCT_MD_THREAD = 1 << 0,
+	/**
+	 * Thread on dt server
+	 */
+	LCT_DT_THREAD = 1 << 1,
+	/**
+	 * Context for transaction handle
+	 */
+	LCT_TX_HANDLE = 1 << 2,
+	/**
+	 * Thread on client
+	 */
+	LCT_CL_THREAD = 1 << 3,
+	/**
+	 * A per-request session on a server, and a per-system-call session on
+	 * a client.
+	 */
+	LCT_SESSION   = 1 << 4,
+	/**
+	 * A per-request data on OSP device
+	 */
+	LCT_OSP_THREAD = 1 << 5,
+	/**
+	 * MGS device thread
+	 */
+	LCT_MG_THREAD = 1 << 6,
+	/**
+	 * Context for local operations
+	 */
+	LCT_LOCAL = 1 << 7,
+	/**
+	 * Set when at least one of keys, having values in this context has
+	 * non-NULL lu_context_key::lct_exit() method. This is used to
+	 * optimize lu_context_exit() call.
+	 */
+	LCT_HAS_EXIT  = 1 << 28,
+	/**
+	 * Don't add references for modules creating key values in that context.
+	 * This is only for contexts used internally by lu_object framework.
+	 */
+	LCT_NOREF     = 1 << 29,
+	/**
+	 * Key is being prepared for retiring, don't create new values for it.
+	 */
+	LCT_QUIESCENT = 1 << 30,
+	/**
+	 * Context should be remembered.
+	 */
+	LCT_REMEMBER  = 1 << 31,
+	/**
+	 * Contexts usable in cache shrinker thread.
+	 */
+	LCT_SHRINKER  = LCT_MD_THREAD|LCT_DT_THREAD|LCT_CL_THREAD|LCT_NOREF
+};
+
+/**
+ * Key. Represents per-context value slot.
+ *
+ * Keys are usually registered when module owning the key is initialized, and
+ * de-registered when module is unloaded. Once key is registered, all new
+ * contexts with matching tags, will get key value. "Old" contexts, already
+ * initialized at the time of key registration, can be forced to get key value
+ * by calling lu_context_refill().
+ *
+ * Every key value is counted in lu_context_key::lct_used and acquires a
+ * reference on an owning module. This means, that all key values have to be
+ * destroyed before module can be unloaded. This is usually achieved by
+ * stopping threads started by the module, that created contexts in their
+ * entry functions. Situation is complicated by the threads shared by multiple
+ * modules, like ptlrpcd daemon on a client. To work around this problem,
+ * contexts, created in such threads, are `remembered' (see
+ * LCT_REMEMBER)---i.e., added into a global list. When module is preparing
+ * for unloading it does the following:
+ *
+ *     - marks its keys as `quiescent' (lu_context_tag::LCT_QUIESCENT)
+ *       preventing new key values from being allocated in the new contexts,
+ *       and
+ *
+ *     - scans a list of remembered contexts, destroying values of module
+ *       keys, thus releasing references to the module.
+ *
+ * This is done by lu_context_key_quiesce(). If module is re-activated
+ * before key has been de-registered, lu_context_key_revive() call clears
+ * `quiescent' marker.
+ *
+ * lu_context code doesn't provide any internal synchronization for these
+ * activities---it's assumed that startup (including threads start-up) and
+ * shutdown are serialized by some external means.
+ *
+ * \see lu_context
+ */
+struct lu_context_key {
+	/**
+	 * Set of tags for which values of this key are to be instantiated.
+	 */
+	__u32 lct_tags;
+	/**
+	 * Value constructor. This is called when new value is created for a
+	 * context. Returns pointer to new value of error pointer.
+	 */
+	void  *(*lct_init)(const struct lu_context *ctx,
+			   struct lu_context_key *key);
+	/**
+	 * Value destructor. Called when context with previously allocated
+	 * value of this slot is destroyed. \a data is a value that was returned
+	 * by a matching call to lu_context_key::lct_init().
+	 */
+	void   (*lct_fini)(const struct lu_context *ctx,
+			   struct lu_context_key *key, void *data);
+	/**
+	 * Optional method called on lu_context_exit() for all allocated
+	 * keys. Can be used by debugging code checking that locks are
+	 * released, etc.
+	 */
+	void   (*lct_exit)(const struct lu_context *ctx,
+			   struct lu_context_key *key, void *data);
+	/**
+	 * Internal implementation detail: index within lu_context::lc_value[]
+	 * reserved for this key.
+	 */
+	int      lct_index;
+	/**
+	 * Internal implementation detail: number of values created for this
+	 * key.
+	 */
+	atomic_t lct_used;
+	/**
+	 * Internal implementation detail: module for this key.
+	 */
+	module_t *lct_owner;
+	/**
+	 * References to this key. For debugging.
+	 */
+	struct lu_ref  lct_reference;
+};
+
+#define LU_KEY_INIT(mod, type)				    \
+	static void* mod##_key_init(const struct lu_context *ctx, \
+				    struct lu_context_key *key)   \
+	{							 \
+		type *value;				      \
+								  \
+		CLASSERT(PAGE_CACHE_SIZE >= sizeof (*value));       \
+								  \
+		OBD_ALLOC_PTR(value);			     \
+		if (value == NULL)				\
+			value = ERR_PTR(-ENOMEM);		 \
+								  \
+		return value;				     \
+	}							 \
+	struct __##mod##__dummy_init {;} /* semicolon catcher */
+
+#define LU_KEY_FINI(mod, type)					      \
+	static void mod##_key_fini(const struct lu_context *ctx,	    \
+				    struct lu_context_key *key, void* data) \
+	{								   \
+		type *info = data;					  \
+									    \
+		OBD_FREE_PTR(info);					 \
+	}								   \
+	struct __##mod##__dummy_fini {;} /* semicolon catcher */
+
+#define LU_KEY_INIT_FINI(mod, type)   \
+	LU_KEY_INIT(mod,type);	\
+	LU_KEY_FINI(mod,type)
+
+#define LU_CONTEXT_KEY_DEFINE(mod, tags)		\
+	struct lu_context_key mod##_thread_key = {      \
+		.lct_tags = tags,		       \
+		.lct_init = mod##_key_init,	     \
+		.lct_fini = mod##_key_fini	      \
+	}
+
+#define LU_CONTEXT_KEY_INIT(key)			\
+do {						    \
+	(key)->lct_owner = THIS_MODULE;		 \
+} while (0)
+
+int   lu_context_key_register(struct lu_context_key *key);
+void  lu_context_key_degister(struct lu_context_key *key);
+void *lu_context_key_get     (const struct lu_context *ctx,
+			       const struct lu_context_key *key);
+void  lu_context_key_quiesce (struct lu_context_key *key);
+void  lu_context_key_revive  (struct lu_context_key *key);
+
+
+/*
+ * LU_KEY_INIT_GENERIC() has to be a macro to correctly determine an
+ * owning module.
+ */
+
+#define LU_KEY_INIT_GENERIC(mod)					\
+	static void mod##_key_init_generic(struct lu_context_key *k, ...) \
+	{							       \
+		struct lu_context_key *key = k;			 \
+		va_list args;					   \
+									\
+		va_start(args, k);				      \
+		do {						    \
+			LU_CONTEXT_KEY_INIT(key);		       \
+			key = va_arg(args, struct lu_context_key *);    \
+		} while (key != NULL);				  \
+		va_end(args);					   \
+	}
+
+#define LU_TYPE_INIT(mod, ...)					  \
+	LU_KEY_INIT_GENERIC(mod)					\
+	static int mod##_type_init(struct lu_device_type *t)	    \
+	{							       \
+		mod##_key_init_generic(__VA_ARGS__, NULL);	      \
+		return lu_context_key_register_many(__VA_ARGS__, NULL); \
+	}							       \
+	struct __##mod##_dummy_type_init {;}
+
+#define LU_TYPE_FINI(mod, ...)					  \
+	static void mod##_type_fini(struct lu_device_type *t)	   \
+	{							       \
+		lu_context_key_degister_many(__VA_ARGS__, NULL);	\
+	}							       \
+	struct __##mod##_dummy_type_fini {;}
+
+#define LU_TYPE_START(mod, ...)				 \
+	static void mod##_type_start(struct lu_device_type *t)  \
+	{						       \
+		lu_context_key_revive_many(__VA_ARGS__, NULL);  \
+	}						       \
+	struct __##mod##_dummy_type_start {;}
+
+#define LU_TYPE_STOP(mod, ...)				  \
+	static void mod##_type_stop(struct lu_device_type *t)   \
+	{						       \
+		lu_context_key_quiesce_many(__VA_ARGS__, NULL); \
+	}						       \
+	struct __##mod##_dummy_type_stop {;}
+
+
+
+#define LU_TYPE_INIT_FINI(mod, ...)	     \
+	LU_TYPE_INIT(mod, __VA_ARGS__);	 \
+	LU_TYPE_FINI(mod, __VA_ARGS__);	 \
+	LU_TYPE_START(mod, __VA_ARGS__);	\
+	LU_TYPE_STOP(mod, __VA_ARGS__)
+
+int   lu_context_init  (struct lu_context *ctx, __u32 tags);
+void  lu_context_fini  (struct lu_context *ctx);
+void  lu_context_enter (struct lu_context *ctx);
+void  lu_context_exit  (struct lu_context *ctx);
+int   lu_context_refill(struct lu_context *ctx);
+
+/*
+ * Helper functions to operate on multiple keys. These are used by the default
+ * device type operations, defined by LU_TYPE_INIT_FINI().
+ */
+
+int  lu_context_key_register_many(struct lu_context_key *k, ...);
+void lu_context_key_degister_many(struct lu_context_key *k, ...);
+void lu_context_key_revive_many  (struct lu_context_key *k, ...);
+void lu_context_key_quiesce_many (struct lu_context_key *k, ...);
+
+/*
+ * update/clear ctx/ses tags.
+ */
+void lu_context_tags_update(__u32 tags);
+void lu_context_tags_clear(__u32 tags);
+void lu_session_tags_update(__u32 tags);
+void lu_session_tags_clear(__u32 tags);
+
+/**
+ * Environment.
+ */
+struct lu_env {
+	/**
+	 * "Local" context, used to store data instead of stack.
+	 */
+	struct lu_context  le_ctx;
+	/**
+	 * "Session" context for per-request data.
+	 */
+	struct lu_context *le_ses;
+};
+
+int  lu_env_init  (struct lu_env *env, __u32 tags);
+void lu_env_fini  (struct lu_env *env);
+int  lu_env_refill(struct lu_env *env);
+int  lu_env_refill_by_tags(struct lu_env *env, __u32 ctags, __u32 stags);
+
+/** @} lu_context */
+
+/**
+ * Output site statistical counters into a buffer. Suitable for
+ * ll_rd_*()-style functions.
+ */
+int lu_site_stats_print(const struct lu_site *s, struct seq_file *m);
+
+/**
+ * Common name structure to be passed around for various name related methods.
+ */
+struct lu_name {
+	const char    *ln_name;
+	int	    ln_namelen;
+};
+
+/**
+ * Common buffer structure to be passed around for various xattr_{s,g}et()
+ * methods.
+ */
+struct lu_buf {
+	void   *lb_buf;
+	ssize_t lb_len;
+};
+
+#define DLUBUF "(%p %zu)"
+#define PLUBUF(buf) (buf)->lb_buf, (buf)->lb_len
+/**
+ * One-time initializers, called at obdclass module initialization, not
+ * exported.
+ */
+
+/**
+ * Initialization of global lu_* data.
+ */
+int lu_global_init(void);
+
+/**
+ * Dual to lu_global_init().
+ */
+void lu_global_fini(void);
+
+struct lu_kmem_descr {
+	struct kmem_cache **ckd_cache;
+	const char       *ckd_name;
+	const size_t      ckd_size;
+};
+
+int  lu_kmem_init(struct lu_kmem_descr *caches);
+void lu_kmem_fini(struct lu_kmem_descr *caches);
+
+void lu_object_assign_fid(const struct lu_env *env, struct lu_object *o,
+			  const struct lu_fid *fid);
+struct lu_object *lu_object_anon(const struct lu_env *env,
+				 struct lu_device *dev,
+				 const struct lu_object_conf *conf);
+
+/** null buffer */
+extern struct lu_buf LU_BUF_NULL;
+
+void lu_buf_free(struct lu_buf *buf);
+void lu_buf_alloc(struct lu_buf *buf, int size);
+void lu_buf_realloc(struct lu_buf *buf, int size);
+
+int lu_buf_check_and_grow(struct lu_buf *buf, int len);
+struct lu_buf *lu_buf_check_and_alloc(struct lu_buf *buf, int len);
+
+/** @} lu */
+#endif /* __LUSTRE_LU_OBJECT_H */
diff --git a/drivers/staging/lustre/lustre/include/lu_ref.h b/drivers/staging/lustre/lustre/include/lu_ref.h
new file mode 100644
index 000000000000..624c19be1524
--- /dev/null
+++ b/drivers/staging/lustre/lustre/include/lu_ref.h
@@ -0,0 +1,170 @@
+/*
+ * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2012, Intel Corporation.
+ *
+ *   Author: Nikita Danilov <nikita.danilov@sun.com>
+ *
+ *   This file is part of Lustre, http://www.lustre.org.
+ *
+ *   Lustre is free software; you can redistribute it and/or
+ *   modify it under the terms of version 2 of the GNU General Public
+ *   License as published by the Free Software Foundation.
+ *
+ *   Lustre is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with Lustre; if not, write to the Free Software
+ *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ */
+
+#ifndef __LUSTRE_LU_REF_H
+#define __LUSTRE_LU_REF_H
+
+#include <linux/list.h>
+
+/** \defgroup lu_ref lu_ref
+ *
+ * An interface to track references between objects. Mostly for debugging.
+ *
+ * Suppose there is a reference counted data-structure struct foo. To track
+ * who acquired references to instance of struct foo, add lu_ref field to it:
+ *
+ * \code
+ *	 struct foo {
+ *		 atomic_t      foo_refcount;
+ *		 struct lu_ref foo_reference;
+ *		 ...
+ *	 };
+ * \endcode
+ *
+ * foo::foo_reference has to be initialized by calling
+ * lu_ref_init(). Typically there will be functions or macros to increment and
+ * decrement foo::foo_refcount, let's say they are foo_get(struct foo *foo)
+ * and foo_put(struct foo *foo), respectively.
+ *
+ * Whenever foo_get() is called to acquire a reference on a foo, lu_ref_add()
+ * has to be called to insert into foo::foo_reference a record, describing
+ * acquired reference. Dually, lu_ref_del() removes matching record. Typical
+ * usages are:
+ *
+ * \code
+ *	struct bar *bar;
+ *
+ *	// bar owns a reference to foo.
+ *	bar->bar_foo = foo_get(foo);
+ *	lu_ref_add(&foo->foo_reference, "bar", bar);
+ *
+ *	...
+ *
+ *	// reference from bar to foo is released.
+ *	lu_ref_del(&foo->foo_reference, "bar", bar);
+ *	foo_put(bar->bar_foo);
+ *
+ *
+ *	// current thread acquired a temporary reference to foo.
+ *	foo_get(foo);
+ *	lu_ref_add(&foo->reference, __FUNCTION__, current);
+ *
+ *	...
+ *
+ *	// temporary reference is released.
+ *	lu_ref_del(&foo->reference, __FUNCTION__, current);
+ *	foo_put(foo);
+ * \endcode
+ *
+ * \e Et \e cetera. Often it makes sense to include lu_ref_add() and
+ * lu_ref_del() calls into foo_get() and foo_put(). When an instance of struct
+ * foo is destroyed, lu_ref_fini() has to be called that checks that no
+ * pending references remain. lu_ref_print() can be used to dump a list of
+ * pending references, while hunting down a leak.
+ *
+ * For objects to which a large number of references can be acquired,
+ * lu_ref_del() can become cpu consuming, as it has to scan the list of
+ * references. To work around this, remember result of lu_ref_add() (usually
+ * in the same place where pointer to struct foo is stored), and use
+ * lu_ref_del_at():
+ *
+ * \code
+ *	// There is a large number of bar's for a single foo.
+ *	bar->bar_foo     = foo_get(foo);
+ *	bar->bar_foo_ref = lu_ref_add(&foo->foo_reference, "bar", bar);
+ *
+ *	...
+ *
+ *	// reference from bar to foo is released.
+ *	lu_ref_del_at(&foo->foo_reference, bar->bar_foo_ref, "bar", bar);
+ *	foo_put(bar->bar_foo);
+ * \endcode
+ *
+ * lu_ref interface degrades gracefully in case of memory shortages.
+ *
+ * @{
+ */
+
+
+struct lu_ref  {};
+
+static inline void lu_ref_init(struct lu_ref *ref)
+{
+}
+
+static inline void lu_ref_fini(struct lu_ref *ref)
+{
+}
+
+static inline struct lu_ref_link *lu_ref_add(struct lu_ref *ref,
+					     const char *scope,
+					     const void *source)
+{
+	return NULL;
+}
+
+static inline struct lu_ref_link *lu_ref_add_atomic(struct lu_ref *ref,
+						    const char *scope,
+						    const void *source)
+{
+	return NULL;
+}
+
+static inline void lu_ref_del(struct lu_ref *ref, const char *scope,
+			      const void *source)
+{
+}
+
+static inline void lu_ref_set_at(struct lu_ref *ref, struct lu_ref_link *link,
+				 const char *scope, const void *source0,
+				 const void *source1)
+{
+}
+
+static inline void lu_ref_del_at(struct lu_ref *ref, struct lu_ref_link *link,
+				 const char *scope, const void *source)
+{
+}
+
+static inline int lu_ref_global_init(void)
+{
+	return 0;
+}
+
+static inline void lu_ref_global_fini(void)
+{
+}
+
+static inline void lu_ref_print(const struct lu_ref *ref)
+{
+}
+
+static inline void lu_ref_print_all(void)
+{
+}
+
+/** @} lu */
+
+#endif /* __LUSTRE_LU_REF_H */
diff --git a/drivers/staging/lustre/lustre/include/lu_target.h b/drivers/staging/lustre/lustre/include/lu_target.h
new file mode 100644
index 000000000000..8d48cf4e27ee
--- /dev/null
+++ b/drivers/staging/lustre/lustre/include/lu_target.h
@@ -0,0 +1,91 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ */
+
+#ifndef _LUSTRE_LU_TARGET_H
+#define _LUSTRE_LU_TARGET_H
+
+#include <dt_object.h>
+#include <lustre_disk.h>
+
+struct lu_target {
+	struct obd_device       *lut_obd;
+	struct dt_device	*lut_bottom;
+	/** last_rcvd file */
+	struct dt_object	*lut_last_rcvd;
+	/* transaction callbacks */
+	struct dt_txn_callback   lut_txn_cb;
+	/** server data in last_rcvd file */
+	struct lr_server_data    lut_lsd;
+	/** Server last transaction number */
+	__u64		    lut_last_transno;
+	/** Lock protecting last transaction number */
+	spinlock_t		 lut_translock;
+	/** Lock protecting client bitmap */
+	spinlock_t		 lut_client_bitmap_lock;
+	/** Bitmap of known clients */
+	unsigned long	   *lut_client_bitmap;
+};
+
+typedef void (*tgt_cb_t)(struct lu_target *lut, __u64 transno,
+			 void *data, int err);
+struct tgt_commit_cb {
+	tgt_cb_t  tgt_cb_func;
+	void     *tgt_cb_data;
+};
+
+void tgt_boot_epoch_update(struct lu_target *lut);
+int tgt_last_commit_cb_add(struct thandle *th, struct lu_target *lut,
+			   struct obd_export *exp, __u64 transno);
+int tgt_new_client_cb_add(struct thandle *th, struct obd_export *exp);
+int tgt_init(const struct lu_env *env, struct lu_target *lut,
+	     struct obd_device *obd, struct dt_device *dt);
+void tgt_fini(const struct lu_env *env, struct lu_target *lut);
+int tgt_client_alloc(struct obd_export *exp);
+void tgt_client_free(struct obd_export *exp);
+int tgt_client_del(const struct lu_env *env, struct obd_export *exp);
+int tgt_client_add(const struct lu_env *env, struct obd_export *exp, int);
+int tgt_client_new(const struct lu_env *env, struct obd_export *exp);
+int tgt_client_data_read(const struct lu_env *env, struct lu_target *tg,
+			 struct lsd_client_data *lcd, loff_t *off, int index);
+int tgt_client_data_write(const struct lu_env *env, struct lu_target *tg,
+			  struct lsd_client_data *lcd, loff_t *off, struct thandle *th);
+int tgt_server_data_read(const struct lu_env *env, struct lu_target *tg);
+int tgt_server_data_write(const struct lu_env *env, struct lu_target *tg,
+			  struct thandle *th);
+int tgt_server_data_update(const struct lu_env *env, struct lu_target *tg, int sync);
+int tgt_truncate_last_rcvd(const struct lu_env *env, struct lu_target *tg, loff_t off);
+
+#endif /* __LUSTRE_LU_TARGET_H */
diff --git a/drivers/staging/lustre/lustre/include/lustre/libiam.h b/drivers/staging/lustre/lustre/include/lustre/libiam.h
new file mode 100644
index 000000000000..e8e0b084a6bc
--- /dev/null
+++ b/drivers/staging/lustre/lustre/include/lustre/libiam.h
@@ -0,0 +1,145 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lustre/include/lustre/libiam.h
+ *
+ * iam user level library
+ *
+ * Author: Wang Di <wangdi@clusterfs.com>
+ * Author: Nikita Danilov <nikita@clusterfs.com>
+ * Author: Fan Yong <fanyong@clusterfs.com>
+ */
+
+/*
+ *  lustre/libiam.h
+ */
+
+#ifndef __IAM_ULIB_H__
+#define __IAM_ULIB_H__
+
+/** \defgroup libiam libiam
+ *
+ * @{
+ */
+
+
+#define DX_FMT_NAME_LEN 16
+
+enum iam_fmt_t {
+	FMT_LFIX,
+	FMT_LVAR
+};
+
+struct iam_uapi_info {
+	__u16 iui_keysize;
+	__u16 iui_recsize;
+	__u16 iui_ptrsize;
+	__u16 iui_height;
+	char  iui_fmt_name[DX_FMT_NAME_LEN];
+};
+
+/*
+ * Creat an iam file, but do NOT open it.
+ * Return 0 if success, else -1.
+ */
+int iam_creat(char *filename, enum iam_fmt_t fmt,
+	      int blocksize, int keysize, int recsize, int ptrsize);
+
+/*
+ * Open an iam file, but do NOT creat it if the file doesn't exist.
+ * Please use iam_creat for creating the file before use iam_open.
+ * Return file id (fd) if success, else -1.
+ */
+int iam_open(char *filename, struct iam_uapi_info *ua);
+
+/*
+ * Close file opened by iam_open.
+ */
+int iam_close(int fd);
+
+/*
+ * Please use iam_open before use this function.
+ */
+int iam_insert(int fd, struct iam_uapi_info *ua,
+	       int key_need_convert, char *keybuf,
+	       int rec_need_convert, char *recbuf);
+
+/*
+ * Please use iam_open before use this function.
+ */
+int iam_lookup(int fd, struct iam_uapi_info *ua,
+	       int key_need_convert, char *key_buf,
+	       int *keysize, char *save_key,
+	       int rec_need_convert, char *rec_buf,
+	       int *recsize, char *save_rec);
+
+/*
+ * Please use iam_open before use this function.
+ */
+int iam_delete(int fd, struct iam_uapi_info *ua,
+	       int key_need_convert, char *keybuf,
+	       int rec_need_convert, char *recbuf);
+
+/*
+ * Please use iam_open before use this function.
+ */
+int iam_it_start(int fd, struct iam_uapi_info *ua,
+		 int key_need_convert, char *key_buf,
+		 int *keysize, char *save_key,
+		 int rec_need_convert, char *rec_buf,
+		 int *recsize, char *save_rec);
+
+/*
+ * Please use iam_open before use this function.
+ */
+int iam_it_next(int fd, struct iam_uapi_info *ua,
+		int key_need_convert, char *key_buf,
+		int *keysize, char *save_key,
+		int rec_need_convert, char *rec_buf,
+		int *recsize, char *save_rec);
+
+/*
+ * Please use iam_open before use this function.
+ */
+int iam_it_stop(int fd, struct iam_uapi_info *ua,
+		int key_need_convert, char *keybuf,
+		int rec_need_convert, char *recbuf);
+
+/*
+ * Change iam file mode.
+ */
+int iam_polymorph(char *filename, unsigned long mode);
+
+/** @} libiam */
+
+#endif
diff --git a/drivers/staging/lustre/lustre/include/lustre/liblustreapi.h b/drivers/staging/lustre/lustre/include/lustre/liblustreapi.h
new file mode 100644
index 000000000000..707eb74fdf68
--- /dev/null
+++ b/drivers/staging/lustre/lustre/include/lustre/liblustreapi.h
@@ -0,0 +1,43 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ */
+
+/*
+ * NOTE: This file is DEPRECATED!  Please include lustreapi.h directly
+ * instead of this file.  This file will be removed from a future version
+ * of lustre!
+ */
+
+#ifndef _LIBLUSTREAPI_H_
+#define _LIBLUSTREAPI_H_
+
+#include <lustre/lustreapi.h>
+#warning "Including liblustreapi.h is deprecated. Include lustreapi.h directly."
+
+#endif
diff --git a/drivers/staging/lustre/lustre/include/lustre/ll_fiemap.h b/drivers/staging/lustre/lustre/include/lustre/ll_fiemap.h
new file mode 100644
index 000000000000..ad253c6deadd
--- /dev/null
+++ b/drivers/staging/lustre/lustre/include/lustre/ll_fiemap.h
@@ -0,0 +1,121 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lustre/include/lustre/ll_fiemap.h
+ *
+ * FIEMAP data structures and flags. This header file will be used until
+ * fiemap.h is available in the upstream kernel.
+ *
+ * Author: Kalpak Shah <kalpak.shah@sun.com>
+ * Author: Andreas Dilger <adilger@sun.com>
+ */
+
+#ifndef _LUSTRE_FIEMAP_H
+#define _LUSTRE_FIEMAP_H
+
+
+
+struct ll_fiemap_extent {
+	__u64 fe_logical;  /* logical offset in bytes for the start of
+			    * the extent from the beginning of the file */
+	__u64 fe_physical; /* physical offset in bytes for the start
+			    * of the extent from the beginning of the disk */
+	__u64 fe_length;   /* length in bytes for this extent */
+	__u64 fe_reserved64[2];
+	__u32 fe_flags;    /* FIEMAP_EXTENT_* flags for this extent */
+	__u32 fe_device;   /* device number for this extent */
+	__u32 fe_reserved[2];
+};
+
+struct ll_user_fiemap {
+	__u64 fm_start;  /* logical offset (inclusive) at
+			  * which to start mapping (in) */
+	__u64 fm_length; /* logical length of mapping which
+			  * userspace wants (in) */
+	__u32 fm_flags;  /* FIEMAP_FLAG_* flags for request (in/out) */
+	__u32 fm_mapped_extents;/* number of extents that were mapped (out) */
+	__u32 fm_extent_count;  /* size of fm_extents array (in) */
+	__u32 fm_reserved;
+	struct ll_fiemap_extent fm_extents[0]; /* array of mapped extents (out) */
+};
+
+#define FIEMAP_MAX_OFFSET      (~0ULL)
+
+#define FIEMAP_FLAG_SYNC	 0x00000001 /* sync file data before map */
+#define FIEMAP_FLAG_XATTR	0x00000002 /* map extended attribute tree */
+
+#define FIEMAP_EXTENT_LAST	      0x00000001 /* Last extent in file. */
+#define FIEMAP_EXTENT_UNKNOWN	   0x00000002 /* Data location unknown. */
+#define FIEMAP_EXTENT_DELALLOC	  0x00000004 /* Location still pending.
+						    * Sets EXTENT_UNKNOWN. */
+#define FIEMAP_EXTENT_ENCODED	   0x00000008 /* Data can not be read
+						    * while fs is unmounted */
+#define FIEMAP_EXTENT_DATA_ENCRYPTED    0x00000080 /* Data is encrypted by fs.
+						    * Sets EXTENT_NO_DIRECT. */
+#define FIEMAP_EXTENT_NOT_ALIGNED       0x00000100 /* Extent offsets may not be
+						    * block aligned. */
+#define FIEMAP_EXTENT_DATA_INLINE       0x00000200 /* Data mixed with metadata.
+						    * Sets EXTENT_NOT_ALIGNED.*/
+#define FIEMAP_EXTENT_DATA_TAIL	 0x00000400 /* Multiple files in block.
+						    * Sets EXTENT_NOT_ALIGNED.*/
+#define FIEMAP_EXTENT_UNWRITTEN	 0x00000800 /* Space allocated, but
+						    * no data (i.e. zero). */
+#define FIEMAP_EXTENT_MERGED	    0x00001000 /* File does not natively
+						    * support extents. Result
+						    * merged for efficiency. */
+
+
+static inline size_t fiemap_count_to_size(size_t extent_count)
+{
+	return (sizeof(struct ll_user_fiemap) + extent_count *
+					       sizeof(struct ll_fiemap_extent));
+}
+
+static inline unsigned fiemap_size_to_count(size_t array_size)
+{
+	return ((array_size - sizeof(struct ll_user_fiemap)) /
+					       sizeof(struct ll_fiemap_extent));
+}
+
+#define FIEMAP_FLAG_DEVICE_ORDER 0x40000000 /* return device ordered mapping */
+
+#ifdef FIEMAP_FLAGS_COMPAT
+#undef FIEMAP_FLAGS_COMPAT
+#endif
+
+/* Lustre specific flags - use a high bit, don't conflict with upstream flag */
+#define FIEMAP_EXTENT_NO_DIRECT	 0x40000000 /* Data mapping undefined */
+#define FIEMAP_EXTENT_NET	       0x80000000 /* Data stored remotely.
+						    * Sets NO_DIRECT flag */
+
+#endif /* _LUSTRE_FIEMAP_H */
diff --git a/drivers/staging/lustre/lustre/include/lustre/lustre_build_version.h b/drivers/staging/lustre/lustre/include/lustre/lustre_build_version.h
new file mode 100644
index 000000000000..93a3d7db3010
--- /dev/null
+++ b/drivers/staging/lustre/lustre/include/lustre/lustre_build_version.h
@@ -0,0 +1,2 @@
+#define BUILD_VERSION "v2_3_64_0-g6e62c21-CHANGED-3.9.0"
+#define LUSTRE_RELEASE 3.9.0_g6e62c21
diff --git a/drivers/staging/lustre/lustre/include/lustre/lustre_idl.h b/drivers/staging/lustre/lustre/include/lustre/lustre_idl.h
new file mode 100644
index 000000000000..8825460f12ac
--- /dev/null
+++ b/drivers/staging/lustre/lustre/include/lustre/lustre_idl.h
@@ -0,0 +1,3653 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lustre/include/lustre/lustre_idl.h
+ *
+ * Lustre wire protocol definitions.
+ */
+
+/** \defgroup lustreidl lustreidl
+ *
+ * Lustre wire protocol definitions.
+ *
+ * ALL structs passing over the wire should be declared here.  Structs
+ * that are used in interfaces with userspace should go in lustre_user.h.
+ *
+ * All structs being declared here should be built from simple fixed-size
+ * types (__u8, __u16, __u32, __u64) or be built from other types or
+ * structs also declared in this file.  Similarly, all flags and magic
+ * values in those structs should also be declared here.  This ensures
+ * that the Lustre wire protocol is not influenced by external dependencies.
+ *
+ * The only other acceptable items in this file are VERY SIMPLE accessor
+ * functions to avoid callers grubbing inside the structures, and the
+ * prototypes of the swabber functions for each struct.  Nothing that
+ * depends on external functions or definitions should be in here.
+ *
+ * Structs must be properly aligned to put 64-bit values on an 8-byte
+ * boundary.  Any structs being added here must also be added to
+ * utils/wirecheck.c and "make newwiretest" run to regenerate the
+ * utils/wiretest.c sources.  This allows us to verify that wire structs
+ * have the proper alignment/size on all architectures.
+ *
+ * DO NOT CHANGE any of the structs, flags, values declared here and used
+ * in released Lustre versions.  Some structs may have padding fields that
+ * can be used.  Some structs might allow addition at the end (verify this
+ * in the code to ensure that new/old clients that see this larger struct
+ * do not fail, otherwise you need to implement protocol compatibility).
+ *
+ * We assume all nodes are either little-endian or big-endian, and we
+ * always send messages in the sender's native format.  The receiver
+ * detects the message format by checking the 'magic' field of the message
+ * (see lustre_msg_swabbed() below).
+ *
+ * Each wire type has corresponding 'lustre_swab_xxxtypexxx()' routines,
+ * implemented either here, inline (trivial implementations) or in
+ * ptlrpc/pack_generic.c.  These 'swabbers' convert the type from "other"
+ * endian, in-place in the message buffer.
+ *
+ * A swabber takes a single pointer argument.  The caller must already have
+ * verified that the length of the message buffer >= sizeof (type).
+ *
+ * For variable length types, a second 'lustre_swab_v_xxxtypexxx()' routine
+ * may be defined that swabs just the variable part, after the caller has
+ * verified that the message buffer is large enough.
+ *
+ * @{
+ */
+
+#ifndef _LUSTRE_IDL_H_
+#define _LUSTRE_IDL_H_
+
+#if !defined(LASSERT) && !defined(LPU64)
+#include <linux/libcfs/libcfs.h> /* for LASSERT, LPUX64, etc */
+#endif
+
+/* Defn's shared with user-space. */
+#include <lustre/lustre_user.h>
+
+/*
+ *  GENERAL STUFF
+ */
+/* FOO_REQUEST_PORTAL is for incoming requests on the FOO
+ * FOO_REPLY_PORTAL   is for incoming replies on the FOO
+ * FOO_BULK_PORTAL    is for incoming bulk on the FOO
+ */
+
+#define CONNMGR_REQUEST_PORTAL	  1
+#define CONNMGR_REPLY_PORTAL	    2
+//#define OSC_REQUEST_PORTAL	    3
+#define OSC_REPLY_PORTAL		4
+//#define OSC_BULK_PORTAL	       5
+#define OST_IO_PORTAL		   6
+#define OST_CREATE_PORTAL	       7
+#define OST_BULK_PORTAL		 8
+//#define MDC_REQUEST_PORTAL	    9
+#define MDC_REPLY_PORTAL	       10
+//#define MDC_BULK_PORTAL	      11
+#define MDS_REQUEST_PORTAL	     12
+//#define MDS_REPLY_PORTAL	     13
+#define MDS_BULK_PORTAL		14
+#define LDLM_CB_REQUEST_PORTAL	 15
+#define LDLM_CB_REPLY_PORTAL	   16
+#define LDLM_CANCEL_REQUEST_PORTAL     17
+#define LDLM_CANCEL_REPLY_PORTAL       18
+//#define PTLBD_REQUEST_PORTAL	   19
+//#define PTLBD_REPLY_PORTAL	     20
+//#define PTLBD_BULK_PORTAL	      21
+#define MDS_SETATTR_PORTAL	     22
+#define MDS_READPAGE_PORTAL	    23
+#define MDS_MDS_PORTAL		 24
+
+#define MGC_REPLY_PORTAL	       25
+#define MGS_REQUEST_PORTAL	     26
+#define MGS_REPLY_PORTAL	       27
+#define OST_REQUEST_PORTAL	     28
+#define FLD_REQUEST_PORTAL	     29
+#define SEQ_METADATA_PORTAL	    30
+#define SEQ_DATA_PORTAL		31
+#define SEQ_CONTROLLER_PORTAL	  32
+#define MGS_BULK_PORTAL		33
+
+/* Portal 63 is reserved for the Cray Inc DVS - nic@cray.com, roe@cray.com, n8851@cray.com */
+
+/* packet types */
+#define PTL_RPC_MSG_REQUEST 4711
+#define PTL_RPC_MSG_ERR     4712
+#define PTL_RPC_MSG_REPLY   4713
+
+/* DON'T use swabbed values of MAGIC as magic! */
+#define LUSTRE_MSG_MAGIC_V1 0x0BD00BD0
+#define LUSTRE_MSG_MAGIC_V2 0x0BD00BD3
+
+#define LUSTRE_MSG_MAGIC_V1_SWABBED 0xD00BD00B
+#define LUSTRE_MSG_MAGIC_V2_SWABBED 0xD30BD00B
+
+#define LUSTRE_MSG_MAGIC LUSTRE_MSG_MAGIC_V2
+
+#define PTLRPC_MSG_VERSION  0x00000003
+#define LUSTRE_VERSION_MASK 0xffff0000
+#define LUSTRE_OBD_VERSION  0x00010000
+#define LUSTRE_MDS_VERSION  0x00020000
+#define LUSTRE_OST_VERSION  0x00030000
+#define LUSTRE_DLM_VERSION  0x00040000
+#define LUSTRE_LOG_VERSION  0x00050000
+#define LUSTRE_MGS_VERSION  0x00060000
+
+typedef __u32 mdsno_t;
+typedef __u64 seqno_t;
+typedef __u64 obd_id;
+typedef __u64 obd_seq;
+typedef __s64 obd_time;
+typedef __u64 obd_size;
+typedef __u64 obd_off;
+typedef __u64 obd_blocks;
+typedef __u64 obd_valid;
+typedef __u32 obd_blksize;
+typedef __u32 obd_mode;
+typedef __u32 obd_uid;
+typedef __u32 obd_gid;
+typedef __u32 obd_flag;
+typedef __u32 obd_count;
+
+/**
+ * Describes a range of sequence, lsr_start is included but lsr_end is
+ * not in the range.
+ * Same structure is used in fld module where lsr_index field holds mdt id
+ * of the home mdt.
+ */
+struct lu_seq_range {
+	__u64 lsr_start;
+	__u64 lsr_end;
+	__u32 lsr_index;
+	__u32 lsr_flags;
+};
+
+#define LU_SEQ_RANGE_MDT	0x0
+#define LU_SEQ_RANGE_OST	0x1
+#define LU_SEQ_RANGE_ANY	0x3
+
+#define LU_SEQ_RANGE_MASK	0x3
+
+static inline unsigned fld_range_type(const struct lu_seq_range *range)
+{
+	return range->lsr_flags & LU_SEQ_RANGE_MASK;
+}
+
+static inline int fld_range_is_ost(const struct lu_seq_range *range)
+{
+	return fld_range_type(range) == LU_SEQ_RANGE_OST;
+}
+
+static inline int fld_range_is_mdt(const struct lu_seq_range *range)
+{
+	return fld_range_type(range) == LU_SEQ_RANGE_MDT;
+}
+
+/**
+ * This all range is only being used when fld client sends fld query request,
+ * but it does not know whether the seq is MDT or OST, so it will send req
+ * with ALL type, which means either seq type gotten from lookup can be
+ * expected.
+ */
+static inline unsigned fld_range_is_any(const struct lu_seq_range *range)
+{
+	return fld_range_type(range) == LU_SEQ_RANGE_ANY;
+}
+
+static inline void fld_range_set_type(struct lu_seq_range *range,
+				      unsigned flags)
+{
+	LASSERT(!(flags & ~LU_SEQ_RANGE_MASK));
+	range->lsr_flags |= flags;
+}
+
+static inline void fld_range_set_mdt(struct lu_seq_range *range)
+{
+	fld_range_set_type(range, LU_SEQ_RANGE_MDT);
+}
+
+static inline void fld_range_set_ost(struct lu_seq_range *range)
+{
+	fld_range_set_type(range, LU_SEQ_RANGE_OST);
+}
+
+static inline void fld_range_set_any(struct lu_seq_range *range)
+{
+	fld_range_set_type(range, LU_SEQ_RANGE_ANY);
+}
+
+/**
+ * returns  width of given range \a r
+ */
+
+static inline __u64 range_space(const struct lu_seq_range *range)
+{
+	return range->lsr_end - range->lsr_start;
+}
+
+/**
+ * initialize range to zero
+ */
+
+static inline void range_init(struct lu_seq_range *range)
+{
+	range->lsr_start = range->lsr_end = range->lsr_index = 0;
+}
+
+/**
+ * check if given seq id \a s is within given range \a r
+ */
+
+static inline int range_within(const struct lu_seq_range *range,
+			       __u64 s)
+{
+	return s >= range->lsr_start && s < range->lsr_end;
+}
+
+static inline int range_is_sane(const struct lu_seq_range *range)
+{
+	return (range->lsr_end >= range->lsr_start);
+}
+
+static inline int range_is_zero(const struct lu_seq_range *range)
+{
+	return (range->lsr_start == 0 && range->lsr_end == 0);
+}
+
+static inline int range_is_exhausted(const struct lu_seq_range *range)
+
+{
+	return range_space(range) == 0;
+}
+
+/* return 0 if two range have the same location */
+static inline int range_compare_loc(const struct lu_seq_range *r1,
+				    const struct lu_seq_range *r2)
+{
+	return r1->lsr_index != r2->lsr_index ||
+	       r1->lsr_flags != r2->lsr_flags;
+}
+
+#define DRANGE "[%#16.16"LPF64"x-%#16.16"LPF64"x):%x:%s"
+
+#define PRANGE(range)		\
+	(range)->lsr_start,	\
+	(range)->lsr_end,	\
+	(range)->lsr_index,	\
+	fld_range_is_mdt(range) ? "mdt" : "ost"
+
+
+/** \defgroup lu_fid lu_fid
+ * @{ */
+
+/**
+ * Flags for lustre_mdt_attrs::lma_compat and lustre_mdt_attrs::lma_incompat.
+ * Deprecated since HSM and SOM attributes are now stored in separate on-disk
+ * xattr.
+ */
+enum lma_compat {
+	LMAC_HSM = 0x00000001,
+	LMAC_SOM = 0x00000002,
+};
+
+/**
+ * Masks for all features that should be supported by a Lustre version to
+ * access a specific file.
+ * This information is stored in lustre_mdt_attrs::lma_incompat.
+ */
+enum lma_incompat {
+	LMAI_RELEASED = 0x0000001, /* file is released */
+	LMAI_AGENT = 0x00000002, /* agent inode */
+	LMAI_REMOTE_PARENT = 0x00000004, /* the parent of the object
+					    is on the remote MDT */
+};
+#define LMA_INCOMPAT_SUPP	(LMAI_AGENT | LMAI_REMOTE_PARENT)
+
+extern void lustre_lma_swab(struct lustre_mdt_attrs *lma);
+extern void lustre_lma_init(struct lustre_mdt_attrs *lma,
+			    const struct lu_fid *fid, __u32 incompat);
+/**
+ * SOM on-disk attributes stored in a separate xattr.
+ */
+struct som_attrs {
+	/** Bitfield for supported data in this structure. For future use. */
+	__u32	som_compat;
+
+	/** Incompat feature list. The supported feature mask is availabe in
+	 * SOM_INCOMPAT_SUPP */
+	__u32	som_incompat;
+
+	/** IO Epoch SOM attributes belongs to */
+	__u64	som_ioepoch;
+	/** total file size in objects */
+	__u64	som_size;
+	/** total fs blocks in objects */
+	__u64	som_blocks;
+	/** mds mount id the size is valid for */
+	__u64	som_mountid;
+};
+extern void lustre_som_swab(struct som_attrs *attrs);
+
+#define SOM_INCOMPAT_SUPP 0x0
+
+/**
+ * HSM on-disk attributes stored in a separate xattr.
+ */
+struct hsm_attrs {
+	/** Bitfield for supported data in this structure. For future use. */
+	__u32	hsm_compat;
+
+	/** HSM flags, see hsm_flags enum below */
+	__u32	hsm_flags;
+	/** backend archive id associated with the file */
+	__u64	hsm_arch_id;
+	/** version associated with the last archiving, if any */
+	__u64	hsm_arch_ver;
+};
+extern void lustre_hsm_swab(struct hsm_attrs *attrs);
+
+/**
+ * fid constants
+ */
+enum {
+	/** LASTID file has zero OID */
+	LUSTRE_FID_LASTID_OID = 0UL,
+	/** initial fid id value */
+	LUSTRE_FID_INIT_OID  = 1UL
+};
+
+/** returns fid object sequence */
+static inline __u64 fid_seq(const struct lu_fid *fid)
+{
+	return fid->f_seq;
+}
+
+/** returns fid object id */
+static inline __u32 fid_oid(const struct lu_fid *fid)
+{
+	return fid->f_oid;
+}
+
+/** returns fid object version */
+static inline __u32 fid_ver(const struct lu_fid *fid)
+{
+	return fid->f_ver;
+}
+
+static inline void fid_zero(struct lu_fid *fid)
+{
+	memset(fid, 0, sizeof(*fid));
+}
+
+static inline obd_id fid_ver_oid(const struct lu_fid *fid)
+{
+	return ((__u64)fid_ver(fid) << 32 | fid_oid(fid));
+}
+
+/**
+ * Note that reserved SEQ numbers below 12 will conflict with ldiskfs
+ * inodes in the IGIF namespace, so these reserved SEQ numbers can be
+ * used for other purposes and not risk collisions with existing inodes.
+ *
+ * Different FID Format
+ * http://arch.lustre.org/index.php?title=Interoperability_fids_zfs#NEW.0
+ */
+enum fid_seq {
+	FID_SEQ_OST_MDT0	= 0,
+	FID_SEQ_LLOG		= 1, /* unnamed llogs */
+	FID_SEQ_ECHO		= 2,
+	FID_SEQ_OST_MDT1	= 3,
+	FID_SEQ_OST_MAX		= 9, /* Max MDT count before OST_on_FID */
+	FID_SEQ_LLOG_NAME	= 10, /* named llogs */
+	FID_SEQ_RSVD		= 11,
+	FID_SEQ_IGIF		= 12,
+	FID_SEQ_IGIF_MAX	= 0x0ffffffffULL,
+	FID_SEQ_IDIF		= 0x100000000ULL,
+	FID_SEQ_IDIF_MAX	= 0x1ffffffffULL,
+	/* Normal FID sequence starts from this value, i.e. 1<<33 */
+	FID_SEQ_START		= 0x200000000ULL,
+	/* sequence for local pre-defined FIDs listed in local_oid */
+	FID_SEQ_LOCAL_FILE	= 0x200000001ULL,
+	FID_SEQ_DOT_LUSTRE	= 0x200000002ULL,
+	/* sequence is used for local named objects FIDs generated
+	 * by local_object_storage library */
+	FID_SEQ_LOCAL_NAME	= 0x200000003ULL,
+	/* Because current FLD will only cache the fid sequence, instead
+	 * of oid on the client side, if the FID needs to be exposed to
+	 * clients sides, it needs to make sure all of fids under one
+	 * sequence will be located in one MDT. */
+	FID_SEQ_SPECIAL		= 0x200000004ULL,
+	FID_SEQ_QUOTA		= 0x200000005ULL,
+	FID_SEQ_QUOTA_GLB	= 0x200000006ULL,
+	FID_SEQ_ROOT		= 0x200000007ULL,  /* Located on MDT0 */
+	FID_SEQ_NORMAL		= 0x200000400ULL,
+	FID_SEQ_LOV_DEFAULT	= 0xffffffffffffffffULL
+};
+
+#define OBIF_OID_MAX_BITS	   32
+#define OBIF_MAX_OID		(1ULL << OBIF_OID_MAX_BITS)
+#define OBIF_OID_MASK	       ((1ULL << OBIF_OID_MAX_BITS) - 1)
+#define IDIF_OID_MAX_BITS	   48
+#define IDIF_MAX_OID		(1ULL << IDIF_OID_MAX_BITS)
+#define IDIF_OID_MASK	       ((1ULL << IDIF_OID_MAX_BITS) - 1)
+
+/** OID for FID_SEQ_SPECIAL */
+enum special_oid {
+	/* Big Filesystem Lock to serialize rename operations */
+	FID_OID_SPECIAL_BFL     = 1UL,
+};
+
+/** OID for FID_SEQ_DOT_LUSTRE */
+enum dot_lustre_oid {
+	FID_OID_DOT_LUSTRE  = 1UL,
+	FID_OID_DOT_LUSTRE_OBF = 2UL,
+};
+
+static inline int fid_seq_is_mdt0(obd_seq seq)
+{
+	return (seq == FID_SEQ_OST_MDT0);
+}
+
+static inline int fid_seq_is_mdt(const __u64 seq)
+{
+	return seq == FID_SEQ_OST_MDT0 || seq >= FID_SEQ_NORMAL;
+};
+
+static inline int fid_seq_is_echo(obd_seq seq)
+{
+	return (seq == FID_SEQ_ECHO);
+}
+
+static inline int fid_is_echo(const struct lu_fid *fid)
+{
+	return fid_seq_is_echo(fid_seq(fid));
+}
+
+static inline int fid_seq_is_llog(obd_seq seq)
+{
+	return (seq == FID_SEQ_LLOG);
+}
+
+static inline int fid_is_llog(const struct lu_fid *fid)
+{
+	/* file with OID == 0 is not llog but contains last oid */
+	return fid_seq_is_llog(fid_seq(fid)) && fid_oid(fid) > 0;
+}
+
+static inline int fid_seq_is_rsvd(const __u64 seq)
+{
+	return (seq > FID_SEQ_OST_MDT0 && seq <= FID_SEQ_RSVD);
+};
+
+static inline int fid_seq_is_special(const __u64 seq)
+{
+	return seq == FID_SEQ_SPECIAL;
+};
+
+static inline int fid_seq_is_local_file(const __u64 seq)
+{
+	return seq == FID_SEQ_LOCAL_FILE ||
+	       seq == FID_SEQ_LOCAL_NAME;
+};
+
+static inline int fid_seq_is_root(const __u64 seq)
+{
+	return seq == FID_SEQ_ROOT;
+}
+
+static inline int fid_seq_is_dot(const __u64 seq)
+{
+	return seq == FID_SEQ_DOT_LUSTRE;
+}
+
+static inline int fid_seq_is_default(const __u64 seq)
+{
+	return seq == FID_SEQ_LOV_DEFAULT;
+}
+
+static inline int fid_is_mdt0(const struct lu_fid *fid)
+{
+	return fid_seq_is_mdt0(fid_seq(fid));
+}
+
+static inline void lu_root_fid(struct lu_fid *fid)
+{
+	fid->f_seq = FID_SEQ_ROOT;
+	fid->f_oid = 1;
+	fid->f_ver = 0;
+}
+
+/**
+ * Check if a fid is igif or not.
+ * \param fid the fid to be tested.
+ * \return true if the fid is a igif; otherwise false.
+ */
+static inline int fid_seq_is_igif(const __u64 seq)
+{
+	return seq >= FID_SEQ_IGIF && seq <= FID_SEQ_IGIF_MAX;
+}
+
+static inline int fid_is_igif(const struct lu_fid *fid)
+{
+	return fid_seq_is_igif(fid_seq(fid));
+}
+
+/**
+ * Check if a fid is idif or not.
+ * \param fid the fid to be tested.
+ * \return true if the fid is a idif; otherwise false.
+ */
+static inline int fid_seq_is_idif(const __u64 seq)
+{
+	return seq >= FID_SEQ_IDIF && seq <= FID_SEQ_IDIF_MAX;
+}
+
+static inline int fid_is_idif(const struct lu_fid *fid)
+{
+	return fid_seq_is_idif(fid_seq(fid));
+}
+
+static inline int fid_is_local_file(const struct lu_fid *fid)
+{
+	return fid_seq_is_local_file(fid_seq(fid));
+}
+
+static inline int fid_seq_is_norm(const __u64 seq)
+{
+	return (seq >= FID_SEQ_NORMAL);
+}
+
+static inline int fid_is_norm(const struct lu_fid *fid)
+{
+	return fid_seq_is_norm(fid_seq(fid));
+}
+
+/* convert an OST objid into an IDIF FID SEQ number */
+static inline obd_seq fid_idif_seq(obd_id id, __u32 ost_idx)
+{
+	return FID_SEQ_IDIF | (ost_idx << 16) | ((id >> 32) & 0xffff);
+}
+
+/* convert a packed IDIF FID into an OST objid */
+static inline obd_id fid_idif_id(obd_seq seq, __u32 oid, __u32 ver)
+{
+	return ((__u64)ver << 48) | ((seq & 0xffff) << 32) | oid;
+}
+
+/* extract ost index from IDIF FID */
+static inline __u32 fid_idif_ost_idx(const struct lu_fid *fid)
+{
+	LASSERT(fid_is_idif(fid));
+	return (fid_seq(fid) >> 16) & 0xffff;
+}
+
+/* extract OST sequence (group) from a wire ost_id (id/seq) pair */
+static inline obd_seq ostid_seq(const struct ost_id *ostid)
+{
+	if (fid_seq_is_mdt0(ostid->oi.oi_seq))
+		return FID_SEQ_OST_MDT0;
+
+	if (fid_seq_is_default(ostid->oi.oi_seq))
+		return FID_SEQ_LOV_DEFAULT;
+
+	if (fid_is_idif(&ostid->oi_fid))
+		return FID_SEQ_OST_MDT0;
+
+	return fid_seq(&ostid->oi_fid);
+}
+
+/* extract OST objid from a wire ost_id (id/seq) pair */
+static inline obd_id ostid_id(const struct ost_id *ostid)
+{
+	if (fid_seq_is_mdt0(ostid_seq(ostid)))
+		return ostid->oi.oi_id & IDIF_OID_MASK;
+
+	if (fid_is_idif(&ostid->oi_fid))
+		return fid_idif_id(fid_seq(&ostid->oi_fid),
+				   fid_oid(&ostid->oi_fid), 0);
+
+	return fid_oid(&ostid->oi_fid);
+}
+
+static inline void ostid_set_seq(struct ost_id *oi, __u64 seq)
+{
+	if (fid_seq_is_mdt0(seq) || fid_seq_is_default(seq)) {
+		oi->oi.oi_seq = seq;
+	} else {
+		oi->oi_fid.f_seq = seq;
+		/* Note: if f_oid + f_ver is zero, we need init it
+		 * to be 1, otherwise, ostid_seq will treat this
+		 * as old ostid (oi_seq == 0) */
+		if (oi->oi_fid.f_oid == 0 && oi->oi_fid.f_ver == 0)
+			oi->oi_fid.f_oid = LUSTRE_FID_INIT_OID;
+	}
+}
+
+static inline void ostid_set_seq_mdt0(struct ost_id *oi)
+{
+	ostid_set_seq(oi, FID_SEQ_OST_MDT0);
+}
+
+static inline void ostid_set_seq_echo(struct ost_id *oi)
+{
+	ostid_set_seq(oi, FID_SEQ_ECHO);
+}
+
+static inline void ostid_set_seq_llog(struct ost_id *oi)
+{
+	ostid_set_seq(oi, FID_SEQ_LLOG);
+}
+
+/**
+ * Note: we need check oi_seq to decide where to set oi_id,
+ * so oi_seq should always be set ahead of oi_id.
+ */
+static inline void ostid_set_id(struct ost_id *oi, __u64 oid)
+{
+	if (fid_seq_is_mdt0(ostid_seq(oi))) {
+		if (oid >= IDIF_MAX_OID) {
+			CERROR("Bad "LPU64" to set "DOSTID"\n",
+				oid, POSTID(oi));
+			return;
+		}
+		oi->oi.oi_id = oid;
+	} else {
+		if (oid > OBIF_MAX_OID) {
+			CERROR("Bad "LPU64" to set "DOSTID"\n",
+				oid, POSTID(oi));
+			return;
+		}
+		oi->oi_fid.f_oid = oid;
+	}
+}
+
+static inline void ostid_inc_id(struct ost_id *oi)
+{
+	if (fid_seq_is_mdt0(ostid_seq(oi))) {
+		if (unlikely(ostid_id(oi) + 1 > IDIF_MAX_OID)) {
+			CERROR("Bad inc "DOSTID"\n", POSTID(oi));
+			return;
+		}
+		oi->oi.oi_id++;
+	} else {
+		oi->oi_fid.f_oid++;
+	}
+}
+
+static inline void ostid_dec_id(struct ost_id *oi)
+{
+	if (fid_seq_is_mdt0(ostid_seq(oi)))
+		oi->oi.oi_id--;
+	else
+		oi->oi_fid.f_oid--;
+}
+
+/**
+ * Unpack an OST object id/seq (group) into a FID.  This is needed for
+ * converting all obdo, lmm, lsm, etc. 64-bit id/seq pairs into proper
+ * FIDs.  Note that if an id/seq is already in FID/IDIF format it will
+ * be passed through unchanged.  Only legacy OST objects in "group 0"
+ * will be mapped into the IDIF namespace so that they can fit into the
+ * struct lu_fid fields without loss.  For reference see:
+ * http://arch.lustre.org/index.php?title=Interoperability_fids_zfs
+ */
+static inline int ostid_to_fid(struct lu_fid *fid, struct ost_id *ostid,
+			       __u32 ost_idx)
+{
+	if (ost_idx > 0xffff) {
+		CERROR("bad ost_idx, "DOSTID" ost_idx:%u\n", POSTID(ostid),
+		       ost_idx);
+		return -EBADF;
+	}
+
+	if (fid_seq_is_mdt0(ostid_seq(ostid))) {
+		/* This is a "legacy" (old 1.x/2.early) OST object in "group 0"
+		 * that we map into the IDIF namespace.  It allows up to 2^48
+		 * objects per OST, as this is the object namespace that has
+		 * been in production for years.  This can handle create rates
+		 * of 1M objects/s/OST for 9 years, or combinations thereof. */
+		if (ostid_id(ostid) >= IDIF_MAX_OID) {
+			 CERROR("bad MDT0 id, "DOSTID" ost_idx:%u\n",
+				POSTID(ostid), ost_idx);
+			 return -EBADF;
+		}
+		fid->f_seq = fid_idif_seq(ostid_id(ostid), ost_idx);
+		/* truncate to 32 bits by assignment */
+		fid->f_oid = ostid_id(ostid);
+		/* in theory, not currently used */
+		fid->f_ver = ostid_id(ostid) >> 48;
+	} else /* if (fid_seq_is_idif(seq) || fid_seq_is_norm(seq)) */ {
+	       /* This is either an IDIF object, which identifies objects across
+		* all OSTs, or a regular FID.  The IDIF namespace maps legacy
+		* OST objects into the FID namespace.  In both cases, we just
+		* pass the FID through, no conversion needed. */
+		if (ostid->oi_fid.f_ver != 0) {
+			CERROR("bad MDT0 id, "DOSTID" ost_idx:%u\n",
+				POSTID(ostid), ost_idx);
+			return -EBADF;
+		}
+		*fid = ostid->oi_fid;
+	}
+
+	return 0;
+}
+
+/* pack any OST FID into an ostid (id/seq) for the wire/disk */
+static inline int fid_to_ostid(const struct lu_fid *fid, struct ost_id *ostid)
+{
+	if (unlikely(fid_seq_is_igif(fid->f_seq))) {
+		CERROR("bad IGIF, "DFID"\n", PFID(fid));
+		return -EBADF;
+	}
+
+	if (fid_is_idif(fid)) {
+		ostid_set_seq_mdt0(ostid);
+		ostid_set_id(ostid, fid_idif_id(fid_seq(fid), fid_oid(fid),
+						fid_ver(fid)));
+	} else {
+		ostid->oi_fid = *fid;
+	}
+
+	return 0;
+}
+
+/* Check whether the fid is for LAST_ID */
+static inline int fid_is_last_id(const struct lu_fid *fid)
+{
+	return (fid_oid(fid) == 0);
+}
+
+/**
+ * Get inode number from a igif.
+ * \param fid a igif to get inode number from.
+ * \return inode number for the igif.
+ */
+static inline ino_t lu_igif_ino(const struct lu_fid *fid)
+{
+	return fid_seq(fid);
+}
+
+extern void lustre_swab_ost_id(struct ost_id *oid);
+
+/**
+ * Get inode generation from a igif.
+ * \param fid a igif to get inode generation from.
+ * \return inode generation for the igif.
+ */
+static inline __u32 lu_igif_gen(const struct lu_fid *fid)
+{
+	return fid_oid(fid);
+}
+
+/**
+ * Build igif from the inode number/generation.
+ */
+static inline void lu_igif_build(struct lu_fid *fid, __u32 ino, __u32 gen)
+{
+	fid->f_seq = ino;
+	fid->f_oid = gen;
+	fid->f_ver = 0;
+}
+
+/*
+ * Fids are transmitted across network (in the sender byte-ordering),
+ * and stored on disk in big-endian order.
+ */
+static inline void fid_cpu_to_le(struct lu_fid *dst, const struct lu_fid *src)
+{
+	/* check that all fields are converted */
+	CLASSERT(sizeof *src ==
+		 sizeof fid_seq(src) +
+		 sizeof fid_oid(src) + sizeof fid_ver(src));
+	dst->f_seq = cpu_to_le64(fid_seq(src));
+	dst->f_oid = cpu_to_le32(fid_oid(src));
+	dst->f_ver = cpu_to_le32(fid_ver(src));
+}
+
+static inline void fid_le_to_cpu(struct lu_fid *dst, const struct lu_fid *src)
+{
+	/* check that all fields are converted */
+	CLASSERT(sizeof *src ==
+		 sizeof fid_seq(src) +
+		 sizeof fid_oid(src) + sizeof fid_ver(src));
+	dst->f_seq = le64_to_cpu(fid_seq(src));
+	dst->f_oid = le32_to_cpu(fid_oid(src));
+	dst->f_ver = le32_to_cpu(fid_ver(src));
+}
+
+static inline void fid_cpu_to_be(struct lu_fid *dst, const struct lu_fid *src)
+{
+	/* check that all fields are converted */
+	CLASSERT(sizeof *src ==
+		 sizeof fid_seq(src) +
+		 sizeof fid_oid(src) + sizeof fid_ver(src));
+	dst->f_seq = cpu_to_be64(fid_seq(src));
+	dst->f_oid = cpu_to_be32(fid_oid(src));
+	dst->f_ver = cpu_to_be32(fid_ver(src));
+}
+
+static inline void fid_be_to_cpu(struct lu_fid *dst, const struct lu_fid *src)
+{
+	/* check that all fields are converted */
+	CLASSERT(sizeof *src ==
+		 sizeof fid_seq(src) +
+		 sizeof fid_oid(src) + sizeof fid_ver(src));
+	dst->f_seq = be64_to_cpu(fid_seq(src));
+	dst->f_oid = be32_to_cpu(fid_oid(src));
+	dst->f_ver = be32_to_cpu(fid_ver(src));
+}
+
+static inline int fid_is_sane(const struct lu_fid *fid)
+{
+	return fid != NULL &&
+	       ((fid_seq(fid) >= FID_SEQ_START && fid_ver(fid) == 0) ||
+		fid_is_igif(fid) || fid_is_idif(fid) ||
+		fid_seq_is_rsvd(fid_seq(fid)));
+}
+
+static inline int fid_is_zero(const struct lu_fid *fid)
+{
+	return fid_seq(fid) == 0 && fid_oid(fid) == 0;
+}
+
+extern void lustre_swab_lu_fid(struct lu_fid *fid);
+extern void lustre_swab_lu_seq_range(struct lu_seq_range *range);
+
+static inline int lu_fid_eq(const struct lu_fid *f0, const struct lu_fid *f1)
+{
+	/* Check that there is no alignment padding. */
+	CLASSERT(sizeof *f0 ==
+		 sizeof f0->f_seq + sizeof f0->f_oid + sizeof f0->f_ver);
+	return memcmp(f0, f1, sizeof *f0) == 0;
+}
+
+#define __diff_normalize(val0, val1)			    \
+({							      \
+	typeof(val0) __val0 = (val0);			   \
+	typeof(val1) __val1 = (val1);			   \
+								\
+	(__val0 == __val1 ? 0 : __val0 > __val1 ? +1 : -1);     \
+})
+
+static inline int lu_fid_cmp(const struct lu_fid *f0,
+			     const struct lu_fid *f1)
+{
+	return
+		__diff_normalize(fid_seq(f0), fid_seq(f1)) ?:
+		__diff_normalize(fid_oid(f0), fid_oid(f1)) ?:
+		__diff_normalize(fid_ver(f0), fid_ver(f1));
+}
+
+static inline void ostid_cpu_to_le(struct ost_id *src_oi,
+				   struct ost_id *dst_oi)
+{
+	if (fid_seq_is_mdt0(ostid_seq(src_oi))) {
+		dst_oi->oi.oi_id = cpu_to_le64(src_oi->oi.oi_id);
+		dst_oi->oi.oi_seq = cpu_to_le64(src_oi->oi.oi_seq);
+	} else {
+		fid_cpu_to_le(&dst_oi->oi_fid, &src_oi->oi_fid);
+	}
+}
+
+static inline void ostid_le_to_cpu(struct ost_id *src_oi,
+				   struct ost_id *dst_oi)
+{
+	if (fid_seq_is_mdt0(ostid_seq(src_oi))) {
+		dst_oi->oi.oi_id = le64_to_cpu(src_oi->oi.oi_id);
+		dst_oi->oi.oi_seq = le64_to_cpu(src_oi->oi.oi_seq);
+	} else {
+		fid_le_to_cpu(&dst_oi->oi_fid, &src_oi->oi_fid);
+	}
+}
+
+/** @} lu_fid */
+
+/** \defgroup lu_dir lu_dir
+ * @{ */
+
+/**
+ * Enumeration of possible directory entry attributes.
+ *
+ * Attributes follow directory entry header in the order they appear in this
+ * enumeration.
+ */
+enum lu_dirent_attrs {
+	LUDA_FID		= 0x0001,
+	LUDA_TYPE		= 0x0002,
+	LUDA_64BITHASH		= 0x0004,
+
+	/* The following attrs are used for MDT interanl only,
+	 * not visible to client */
+
+	/* Verify the dirent consistency */
+	LUDA_VERIFY		= 0x8000,
+	/* Only check but not repair the dirent inconsistency */
+	LUDA_VERIFY_DRYRUN	= 0x4000,
+	/* The dirent has been repaired, or to be repaired (dryrun). */
+	LUDA_REPAIR		= 0x2000,
+	/* The system is upgraded, has beed or to be repaired (dryrun). */
+	LUDA_UPGRADE		= 0x1000,
+	/* Ignore this record, go to next directly. */
+	LUDA_IGNORE		= 0x0800,
+};
+
+#define LU_DIRENT_ATTRS_MASK	0xf800
+
+/**
+ * Layout of readdir pages, as transmitted on wire.
+ */
+struct lu_dirent {
+	/** valid if LUDA_FID is set. */
+	struct lu_fid lde_fid;
+	/** a unique entry identifier: a hash or an offset. */
+	__u64	 lde_hash;
+	/** total record length, including all attributes. */
+	__u16	 lde_reclen;
+	/** name length */
+	__u16	 lde_namelen;
+	/** optional variable size attributes following this entry.
+	 *  taken from enum lu_dirent_attrs.
+	 */
+	__u32	 lde_attrs;
+	/** name is followed by the attributes indicated in ->ldp_attrs, in
+	 *  their natural order. After the last attribute, padding bytes are
+	 *  added to make ->lde_reclen a multiple of 8.
+	 */
+	char	  lde_name[0];
+};
+
+/*
+ * Definitions of optional directory entry attributes formats.
+ *
+ * Individual attributes do not have their length encoded in a generic way. It
+ * is assumed that consumer of an attribute knows its format. This means that
+ * it is impossible to skip over an unknown attribute, except by skipping over all
+ * remaining attributes (by using ->lde_reclen), which is not too
+ * constraining, because new server versions will append new attributes at
+ * the end of an entry.
+ */
+
+/**
+ * Fid directory attribute: a fid of an object referenced by the entry. This
+ * will be almost always requested by the client and supplied by the server.
+ *
+ * Aligned to 8 bytes.
+ */
+/* To have compatibility with 1.8, lets have fid in lu_dirent struct. */
+
+/**
+ * File type.
+ *
+ * Aligned to 2 bytes.
+ */
+struct luda_type {
+	__u16 lt_type;
+};
+
+struct lu_dirpage {
+	__u64	    ldp_hash_start;
+	__u64	    ldp_hash_end;
+	__u32	    ldp_flags;
+	__u32	    ldp_pad0;
+	struct lu_dirent ldp_entries[0];
+};
+
+enum lu_dirpage_flags {
+	/**
+	 * dirpage contains no entry.
+	 */
+	LDF_EMPTY   = 1 << 0,
+	/**
+	 * last entry's lde_hash equals ldp_hash_end.
+	 */
+	LDF_COLLIDE = 1 << 1
+};
+
+static inline struct lu_dirent *lu_dirent_start(struct lu_dirpage *dp)
+{
+	if (le32_to_cpu(dp->ldp_flags) & LDF_EMPTY)
+		return NULL;
+	else
+		return dp->ldp_entries;
+}
+
+static inline struct lu_dirent *lu_dirent_next(struct lu_dirent *ent)
+{
+	struct lu_dirent *next;
+
+	if (le16_to_cpu(ent->lde_reclen) != 0)
+		next = ((void *)ent) + le16_to_cpu(ent->lde_reclen);
+	else
+		next = NULL;
+
+	return next;
+}
+
+static inline int lu_dirent_calc_size(int namelen, __u16 attr)
+{
+	int size;
+
+	if (attr & LUDA_TYPE) {
+		const unsigned align = sizeof(struct luda_type) - 1;
+		size = (sizeof(struct lu_dirent) + namelen + align) & ~align;
+		size += sizeof(struct luda_type);
+	} else
+		size = sizeof(struct lu_dirent) + namelen;
+
+	return (size + 7) & ~7;
+}
+
+static inline int lu_dirent_size(struct lu_dirent *ent)
+{
+	if (le16_to_cpu(ent->lde_reclen) == 0) {
+		return lu_dirent_calc_size(le16_to_cpu(ent->lde_namelen),
+					   le32_to_cpu(ent->lde_attrs));
+	}
+	return le16_to_cpu(ent->lde_reclen);
+}
+
+#define MDS_DIR_END_OFF 0xfffffffffffffffeULL
+
+/**
+ * MDS_READPAGE page size
+ *
+ * This is the directory page size packed in MDS_READPAGE RPC.
+ * It's different than PAGE_CACHE_SIZE because the client needs to
+ * access the struct lu_dirpage header packed at the beginning of
+ * the "page" and without this there isn't any way to know find the
+ * lu_dirpage header is if client and server PAGE_CACHE_SIZE differ.
+ */
+#define LU_PAGE_SHIFT 12
+#define LU_PAGE_SIZE  (1UL << LU_PAGE_SHIFT)
+#define LU_PAGE_MASK  (~(LU_PAGE_SIZE - 1))
+
+#define LU_PAGE_COUNT (1 << (PAGE_CACHE_SHIFT - LU_PAGE_SHIFT))
+
+/** @} lu_dir */
+
+struct lustre_handle {
+	__u64 cookie;
+};
+#define DEAD_HANDLE_MAGIC 0xdeadbeefcafebabeULL
+
+static inline int lustre_handle_is_used(struct lustre_handle *lh)
+{
+	return lh->cookie != 0ull;
+}
+
+static inline int lustre_handle_equal(const struct lustre_handle *lh1,
+				      const struct lustre_handle *lh2)
+{
+	return lh1->cookie == lh2->cookie;
+}
+
+static inline void lustre_handle_copy(struct lustre_handle *tgt,
+				      struct lustre_handle *src)
+{
+	tgt->cookie = src->cookie;
+}
+
+/* flags for lm_flags */
+#define MSGHDR_AT_SUPPORT	       0x1
+#define MSGHDR_CKSUM_INCOMPAT18	 0x2
+
+#define lustre_msg lustre_msg_v2
+/* we depend on this structure to be 8-byte aligned */
+/* this type is only endian-adjusted in lustre_unpack_msg() */
+struct lustre_msg_v2 {
+	__u32 lm_bufcount;
+	__u32 lm_secflvr;
+	__u32 lm_magic;
+	__u32 lm_repsize;
+	__u32 lm_cksum;
+	__u32 lm_flags;
+	__u32 lm_padding_2;
+	__u32 lm_padding_3;
+	__u32 lm_buflens[0];
+};
+
+/* without gss, ptlrpc_body is put at the first buffer. */
+#define PTLRPC_NUM_VERSIONS     4
+#define JOBSTATS_JOBID_SIZE     32  /* 32 bytes string */
+struct ptlrpc_body_v3 {
+	struct lustre_handle pb_handle;
+	__u32 pb_type;
+	__u32 pb_version;
+	__u32 pb_opc;
+	__u32 pb_status;
+	__u64 pb_last_xid;
+	__u64 pb_last_seen;
+	__u64 pb_last_committed;
+	__u64 pb_transno;
+	__u32 pb_flags;
+	__u32 pb_op_flags;
+	__u32 pb_conn_cnt;
+	__u32 pb_timeout;  /* for req, the deadline, for rep, the service est */
+	__u32 pb_service_time; /* for rep, actual service time */
+	__u32 pb_limit;
+	__u64 pb_slv;
+	/* VBR: pre-versions */
+	__u64 pb_pre_versions[PTLRPC_NUM_VERSIONS];
+	/* padding for future needs */
+	__u64 pb_padding[4];
+	char  pb_jobid[JOBSTATS_JOBID_SIZE];
+};
+#define ptlrpc_body     ptlrpc_body_v3
+
+struct ptlrpc_body_v2 {
+	struct lustre_handle pb_handle;
+	__u32 pb_type;
+	__u32 pb_version;
+	__u32 pb_opc;
+	__u32 pb_status;
+	__u64 pb_last_xid;
+	__u64 pb_last_seen;
+	__u64 pb_last_committed;
+	__u64 pb_transno;
+	__u32 pb_flags;
+	__u32 pb_op_flags;
+	__u32 pb_conn_cnt;
+	__u32 pb_timeout;  /* for req, the deadline, for rep, the service est */
+	__u32 pb_service_time; /* for rep, actual service time, also used for
+				  net_latency of req */
+	__u32 pb_limit;
+	__u64 pb_slv;
+	/* VBR: pre-versions */
+	__u64 pb_pre_versions[PTLRPC_NUM_VERSIONS];
+	/* padding for future needs */
+	__u64 pb_padding[4];
+};
+
+extern void lustre_swab_ptlrpc_body(struct ptlrpc_body *pb);
+
+/* message body offset for lustre_msg_v2 */
+/* ptlrpc body offset in all request/reply messages */
+#define MSG_PTLRPC_BODY_OFF	     0
+
+/* normal request/reply message record offset */
+#define REQ_REC_OFF		     1
+#define REPLY_REC_OFF		   1
+
+/* ldlm request message body offset */
+#define DLM_LOCKREQ_OFF		 1 /* lockreq offset */
+#define DLM_REQ_REC_OFF		 2 /* normal dlm request record offset */
+
+/* ldlm intent lock message body offset */
+#define DLM_INTENT_IT_OFF	       2 /* intent lock it offset */
+#define DLM_INTENT_REC_OFF	      3 /* intent lock record offset */
+
+/* ldlm reply message body offset */
+#define DLM_LOCKREPLY_OFF	       1 /* lockrep offset */
+#define DLM_REPLY_REC_OFF	       2 /* reply record offset */
+
+/** only use in req->rq_{req,rep}_swab_mask */
+#define MSG_PTLRPC_HEADER_OFF	   31
+
+/* Flags that are operation-specific go in the top 16 bits. */
+#define MSG_OP_FLAG_MASK   0xffff0000
+#define MSG_OP_FLAG_SHIFT  16
+
+/* Flags that apply to all requests are in the bottom 16 bits */
+#define MSG_GEN_FLAG_MASK     0x0000ffff
+#define MSG_LAST_REPLAY	   0x0001
+#define MSG_RESENT		0x0002
+#define MSG_REPLAY		0x0004
+/* #define MSG_AT_SUPPORT	 0x0008
+ * This was used in early prototypes of adaptive timeouts, and while there
+ * shouldn't be any users of that code there also isn't a need for using this
+ * bits. Defer usage until at least 1.10 to avoid potential conflict. */
+#define MSG_DELAY_REPLAY	  0x0010
+#define MSG_VERSION_REPLAY	0x0020
+#define MSG_REQ_REPLAY_DONE       0x0040
+#define MSG_LOCK_REPLAY_DONE      0x0080
+
+/*
+ * Flags for all connect opcodes (MDS_CONNECT, OST_CONNECT)
+ */
+
+#define MSG_CONNECT_RECOVERING  0x00000001
+#define MSG_CONNECT_RECONNECT   0x00000002
+#define MSG_CONNECT_REPLAYABLE  0x00000004
+//#define MSG_CONNECT_PEER	0x8
+#define MSG_CONNECT_LIBCLIENT   0x00000010
+#define MSG_CONNECT_INITIAL     0x00000020
+#define MSG_CONNECT_ASYNC       0x00000040
+#define MSG_CONNECT_NEXT_VER    0x00000080 /* use next version of lustre_msg */
+#define MSG_CONNECT_TRANSNO     0x00000100 /* report transno */
+
+/* Connect flags */
+#define OBD_CONNECT_RDONLY		0x1ULL /*client has read-only access*/
+#define OBD_CONNECT_INDEX		 0x2ULL /*connect specific LOV idx */
+#define OBD_CONNECT_MDS		   0x4ULL /*connect from MDT to OST */
+#define OBD_CONNECT_GRANT		 0x8ULL /*OSC gets grant at connect */
+#define OBD_CONNECT_SRVLOCK	      0x10ULL /*server takes locks for cli */
+#define OBD_CONNECT_VERSION	      0x20ULL /*Lustre versions in ocd */
+#define OBD_CONNECT_REQPORTAL	    0x40ULL /*Separate non-IO req portal */
+#define OBD_CONNECT_ACL		  0x80ULL /*access control lists */
+#define OBD_CONNECT_XATTR	       0x100ULL /*client use extended attr */
+#define OBD_CONNECT_CROW		0x200ULL /*MDS+OST create obj on write*/
+#define OBD_CONNECT_TRUNCLOCK	   0x400ULL /*locks on server for punch */
+#define OBD_CONNECT_TRANSNO	     0x800ULL /*replay sends init transno */
+#define OBD_CONNECT_IBITS	      0x1000ULL /*support for inodebits locks*/
+#define OBD_CONNECT_JOIN	       0x2000ULL /*files can be concatenated.
+						  *We do not support JOIN FILE
+						  *anymore, reserve this flags
+						  *just for preventing such bit
+						  *to be reused.*/
+#define OBD_CONNECT_ATTRFID	    0x4000ULL /*Server can GetAttr By Fid*/
+#define OBD_CONNECT_NODEVOH	    0x8000ULL /*No open hndl on specl nodes*/
+#define OBD_CONNECT_RMT_CLIENT	0x10000ULL /*Remote client */
+#define OBD_CONNECT_RMT_CLIENT_FORCE  0x20000ULL /*Remote client by force */
+#define OBD_CONNECT_BRW_SIZE	  0x40000ULL /*Max bytes per rpc */
+#define OBD_CONNECT_QUOTA64	   0x80000ULL /*Not used since 2.4 */
+#define OBD_CONNECT_MDS_CAPA	 0x100000ULL /*MDS capability */
+#define OBD_CONNECT_OSS_CAPA	 0x200000ULL /*OSS capability */
+#define OBD_CONNECT_CANCELSET	0x400000ULL /*Early batched cancels. */
+#define OBD_CONNECT_SOM	      0x800000ULL /*Size on MDS */
+#define OBD_CONNECT_AT	      0x1000000ULL /*client uses AT */
+#define OBD_CONNECT_LRU_RESIZE      0x2000000ULL /*LRU resize feature. */
+#define OBD_CONNECT_MDS_MDS	 0x4000000ULL /*MDS-MDS connection */
+#define OBD_CONNECT_REAL	    0x8000000ULL /*real connection */
+#define OBD_CONNECT_CHANGE_QS      0x10000000ULL /*Not used since 2.4 */
+#define OBD_CONNECT_CKSUM	  0x20000000ULL /*support several cksum algos*/
+#define OBD_CONNECT_FID	    0x40000000ULL /*FID is supported by server */
+#define OBD_CONNECT_VBR	    0x80000000ULL /*version based recovery */
+#define OBD_CONNECT_LOV_V3	0x100000000ULL /*client supports LOV v3 EA */
+#define OBD_CONNECT_GRANT_SHRINK  0x200000000ULL /* support grant shrink */
+#define OBD_CONNECT_SKIP_ORPHAN   0x400000000ULL /* don't reuse orphan objids */
+#define OBD_CONNECT_MAX_EASIZE    0x800000000ULL /* preserved for large EA */
+#define OBD_CONNECT_FULL20       0x1000000000ULL /* it is 2.0 client */
+#define OBD_CONNECT_LAYOUTLOCK   0x2000000000ULL /* client uses layout lock */
+#define OBD_CONNECT_64BITHASH    0x4000000000ULL /* client supports 64-bits
+						  * directory hash */
+#define OBD_CONNECT_MAXBYTES     0x8000000000ULL /* max stripe size */
+#define OBD_CONNECT_IMP_RECOV   0x10000000000ULL /* imp recovery support */
+#define OBD_CONNECT_JOBSTATS    0x20000000000ULL /* jobid in ptlrpc_body */
+#define OBD_CONNECT_UMASK       0x40000000000ULL /* create uses client umask */
+#define OBD_CONNECT_EINPROGRESS 0x80000000000ULL /* client handles -EINPROGRESS
+						  * RPC error properly */
+#define OBD_CONNECT_GRANT_PARAM 0x100000000000ULL/* extra grant params used for
+						  * finer space reservation */
+#define OBD_CONNECT_FLOCK_OWNER 0x200000000000ULL /* for the fixed 1.8
+						   * policy and 2.x server */
+#define OBD_CONNECT_LVB_TYPE	0x400000000000ULL /* variable type of LVB */
+#define OBD_CONNECT_NANOSEC_TIME 0x800000000000ULL /* nanosecond timestamps */
+#define OBD_CONNECT_LIGHTWEIGHT 0x1000000000000ULL/* lightweight connection */
+#define OBD_CONNECT_SHORTIO     0x2000000000000ULL/* short io */
+#define OBD_CONNECT_PINGLESS	0x4000000000000ULL/* pings not required */
+/* XXX README XXX:
+ * Please DO NOT add flag values here before first ensuring that this same
+ * flag value is not in use on some other branch.  Please clear any such
+ * changes with senior engineers before starting to use a new flag.  Then,
+ * submit a small patch against EVERY branch that ONLY adds the new flag,
+ * updates obd_connect_names[] for lprocfs_rd_connect_flags(), adds the
+ * flag to check_obd_connect_data(), and updates wiretests accordingly, so it
+ * can be approved and landed easily to reserve the flag for future use. */
+
+/* The MNE_SWAB flag is overloading the MDS_MDS bit only for the MGS
+ * connection.  It is a temporary bug fix for Imperative Recovery interop
+ * between 2.2 and 2.3 x86/ppc nodes, and can be removed when interop for
+ * 2.2 clients/servers is no longer needed.  LU-1252/LU-1644. */
+#define OBD_CONNECT_MNE_SWAB		 OBD_CONNECT_MDS_MDS
+
+#define OCD_HAS_FLAG(ocd, flg)  \
+	(!!((ocd)->ocd_connect_flags & OBD_CONNECT_##flg))
+
+
+#define LRU_RESIZE_CONNECT_FLAG OBD_CONNECT_LRU_RESIZE
+
+#define MDT_CONNECT_SUPPORTED  (OBD_CONNECT_RDONLY | OBD_CONNECT_VERSION | \
+				OBD_CONNECT_ACL | OBD_CONNECT_XATTR | \
+				OBD_CONNECT_IBITS | \
+				OBD_CONNECT_NODEVOH | OBD_CONNECT_ATTRFID | \
+				OBD_CONNECT_CANCELSET | OBD_CONNECT_AT | \
+				OBD_CONNECT_RMT_CLIENT | \
+				OBD_CONNECT_RMT_CLIENT_FORCE | \
+				OBD_CONNECT_BRW_SIZE | OBD_CONNECT_MDS_CAPA | \
+				OBD_CONNECT_OSS_CAPA | OBD_CONNECT_MDS_MDS | \
+				OBD_CONNECT_FID | LRU_RESIZE_CONNECT_FLAG | \
+				OBD_CONNECT_VBR | OBD_CONNECT_LOV_V3 | \
+				OBD_CONNECT_SOM | OBD_CONNECT_FULL20 | \
+				OBD_CONNECT_64BITHASH | OBD_CONNECT_JOBSTATS | \
+				OBD_CONNECT_EINPROGRESS | \
+				OBD_CONNECT_LIGHTWEIGHT | OBD_CONNECT_UMASK | \
+				OBD_CONNECT_LVB_TYPE | OBD_CONNECT_LAYOUTLOCK |\
+				OBD_CONNECT_PINGLESS)
+#define OST_CONNECT_SUPPORTED  (OBD_CONNECT_SRVLOCK | OBD_CONNECT_GRANT | \
+				OBD_CONNECT_REQPORTAL | OBD_CONNECT_VERSION | \
+				OBD_CONNECT_TRUNCLOCK | OBD_CONNECT_INDEX | \
+				OBD_CONNECT_BRW_SIZE | OBD_CONNECT_OSS_CAPA | \
+				OBD_CONNECT_CANCELSET | OBD_CONNECT_AT | \
+				LRU_RESIZE_CONNECT_FLAG | OBD_CONNECT_CKSUM | \
+				OBD_CONNECT_RMT_CLIENT | \
+				OBD_CONNECT_RMT_CLIENT_FORCE | OBD_CONNECT_VBR | \
+				OBD_CONNECT_MDS | OBD_CONNECT_SKIP_ORPHAN | \
+				OBD_CONNECT_GRANT_SHRINK | OBD_CONNECT_FULL20 | \
+				OBD_CONNECT_64BITHASH | OBD_CONNECT_MAXBYTES | \
+				OBD_CONNECT_MAX_EASIZE | \
+				OBD_CONNECT_EINPROGRESS | \
+				OBD_CONNECT_JOBSTATS | \
+				OBD_CONNECT_LIGHTWEIGHT | OBD_CONNECT_LVB_TYPE|\
+				OBD_CONNECT_LAYOUTLOCK | OBD_CONNECT_FID | \
+				OBD_CONNECT_PINGLESS)
+#define ECHO_CONNECT_SUPPORTED (0)
+#define MGS_CONNECT_SUPPORTED  (OBD_CONNECT_VERSION | OBD_CONNECT_AT | \
+				OBD_CONNECT_FULL20 | OBD_CONNECT_IMP_RECOV | \
+				OBD_CONNECT_MNE_SWAB | OBD_CONNECT_PINGLESS)
+
+/* Features required for this version of the client to work with server */
+#define CLIENT_CONNECT_MDT_REQD (OBD_CONNECT_IBITS | OBD_CONNECT_FID | \
+				 OBD_CONNECT_FULL20)
+
+#define OBD_OCD_VERSION(major,minor,patch,fix) (((major)<<24) + ((minor)<<16) +\
+						((patch)<<8) + (fix))
+#define OBD_OCD_VERSION_MAJOR(version) ((int)((version)>>24)&255)
+#define OBD_OCD_VERSION_MINOR(version) ((int)((version)>>16)&255)
+#define OBD_OCD_VERSION_PATCH(version) ((int)((version)>>8)&255)
+#define OBD_OCD_VERSION_FIX(version)   ((int)(version)&255)
+
+/* This structure is used for both request and reply.
+ *
+ * If we eventually have separate connect data for different types, which we
+ * almost certainly will, then perhaps we stick a union in here. */
+struct obd_connect_data_v1 {
+	__u64 ocd_connect_flags; /* OBD_CONNECT_* per above */
+	__u32 ocd_version;	 /* lustre release version number */
+	__u32 ocd_grant;	 /* initial cache grant amount (bytes) */
+	__u32 ocd_index;	 /* LOV index to connect to */
+	__u32 ocd_brw_size;	 /* Maximum BRW size in bytes, must be 2^n */
+	__u64 ocd_ibits_known;   /* inode bits this client understands */
+	__u8  ocd_blocksize;     /* log2 of the backend filesystem blocksize */
+	__u8  ocd_inodespace;    /* log2 of the per-inode space consumption */
+	__u16 ocd_grant_extent;  /* per-extent grant overhead, in 1K blocks */
+	__u32 ocd_unused;	/* also fix lustre_swab_connect */
+	__u64 ocd_transno;       /* first transno from client to be replayed */
+	__u32 ocd_group;	 /* MDS group on OST */
+	__u32 ocd_cksum_types;   /* supported checksum algorithms */
+	__u32 ocd_max_easize;    /* How big LOV EA can be on MDS */
+	__u32 ocd_instance;      /* also fix lustre_swab_connect */
+	__u64 ocd_maxbytes;      /* Maximum stripe size in bytes */
+};
+
+struct obd_connect_data {
+	__u64 ocd_connect_flags; /* OBD_CONNECT_* per above */
+	__u32 ocd_version;	 /* lustre release version number */
+	__u32 ocd_grant;	 /* initial cache grant amount (bytes) */
+	__u32 ocd_index;	 /* LOV index to connect to */
+	__u32 ocd_brw_size;	 /* Maximum BRW size in bytes */
+	__u64 ocd_ibits_known;   /* inode bits this client understands */
+	__u8  ocd_blocksize;     /* log2 of the backend filesystem blocksize */
+	__u8  ocd_inodespace;    /* log2 of the per-inode space consumption */
+	__u16 ocd_grant_extent;  /* per-extent grant overhead, in 1K blocks */
+	__u32 ocd_unused;	/* also fix lustre_swab_connect */
+	__u64 ocd_transno;       /* first transno from client to be replayed */
+	__u32 ocd_group;	 /* MDS group on OST */
+	__u32 ocd_cksum_types;   /* supported checksum algorithms */
+	__u32 ocd_max_easize;    /* How big LOV EA can be on MDS */
+	__u32 ocd_instance;      /* instance # of this target */
+	__u64 ocd_maxbytes;      /* Maximum stripe size in bytes */
+	/* Fields after ocd_maxbytes are only accessible by the receiver
+	 * if the corresponding flag in ocd_connect_flags is set. Accessing
+	 * any field after ocd_maxbytes on the receiver without a valid flag
+	 * may result in out-of-bound memory access and kernel oops. */
+	__u64 padding1;	  /* added 2.1.0. also fix lustre_swab_connect */
+	__u64 padding2;	  /* added 2.1.0. also fix lustre_swab_connect */
+	__u64 padding3;	  /* added 2.1.0. also fix lustre_swab_connect */
+	__u64 padding4;	  /* added 2.1.0. also fix lustre_swab_connect */
+	__u64 padding5;	  /* added 2.1.0. also fix lustre_swab_connect */
+	__u64 padding6;	  /* added 2.1.0. also fix lustre_swab_connect */
+	__u64 padding7;	  /* added 2.1.0. also fix lustre_swab_connect */
+	__u64 padding8;	  /* added 2.1.0. also fix lustre_swab_connect */
+	__u64 padding9;	  /* added 2.1.0. also fix lustre_swab_connect */
+	__u64 paddingA;	  /* added 2.1.0. also fix lustre_swab_connect */
+	__u64 paddingB;	  /* added 2.1.0. also fix lustre_swab_connect */
+	__u64 paddingC;	  /* added 2.1.0. also fix lustre_swab_connect */
+	__u64 paddingD;	  /* added 2.1.0. also fix lustre_swab_connect */
+	__u64 paddingE;	  /* added 2.1.0. also fix lustre_swab_connect */
+	__u64 paddingF;	  /* added 2.1.0. also fix lustre_swab_connect */
+};
+/* XXX README XXX:
+ * Please DO NOT use any fields here before first ensuring that this same
+ * field is not in use on some other branch.  Please clear any such changes
+ * with senior engineers before starting to use a new field.  Then, submit
+ * a small patch against EVERY branch that ONLY adds the new field along with
+ * the matching OBD_CONNECT flag, so that can be approved and landed easily to
+ * reserve the flag for future use. */
+
+
+extern void lustre_swab_connect(struct obd_connect_data *ocd);
+
+/*
+ * Supported checksum algorithms. Up to 32 checksum types are supported.
+ * (32-bit mask stored in obd_connect_data::ocd_cksum_types)
+ * Please update DECLARE_CKSUM_NAME/OBD_CKSUM_ALL in obd.h when adding a new
+ * algorithm and also the OBD_FL_CKSUM* flags.
+ */
+typedef enum {
+	OBD_CKSUM_CRC32 = 0x00000001,
+	OBD_CKSUM_ADLER = 0x00000002,
+	OBD_CKSUM_CRC32C= 0x00000004,
+} cksum_type_t;
+
+/*
+ *   OST requests: OBDO & OBD request records
+ */
+
+/* opcodes */
+typedef enum {
+	OST_REPLY      =  0,       /* reply ? */
+	OST_GETATTR    =  1,
+	OST_SETATTR    =  2,
+	OST_READ       =  3,
+	OST_WRITE      =  4,
+	OST_CREATE     =  5,
+	OST_DESTROY    =  6,
+	OST_GET_INFO   =  7,
+	OST_CONNECT    =  8,
+	OST_DISCONNECT =  9,
+	OST_PUNCH      = 10,
+	OST_OPEN       = 11,
+	OST_CLOSE      = 12,
+	OST_STATFS     = 13,
+	OST_SYNC       = 16,
+	OST_SET_INFO   = 17,
+	OST_QUOTACHECK = 18,
+	OST_QUOTACTL   = 19,
+	OST_QUOTA_ADJUST_QUNIT = 20, /* not used since 2.4 */
+	OST_LAST_OPC
+} ost_cmd_t;
+#define OST_FIRST_OPC  OST_REPLY
+
+enum obdo_flags {
+	OBD_FL_INLINEDATA   = 0x00000001,
+	OBD_FL_OBDMDEXISTS  = 0x00000002,
+	OBD_FL_DELORPHAN    = 0x00000004, /* if set in o_flags delete orphans */
+	OBD_FL_NORPC	= 0x00000008, /* set in o_flags do in OSC not OST */
+	OBD_FL_IDONLY       = 0x00000010, /* set in o_flags only adjust obj id*/
+	OBD_FL_RECREATE_OBJS= 0x00000020, /* recreate missing obj */
+	OBD_FL_DEBUG_CHECK  = 0x00000040, /* echo client/server debug check */
+	OBD_FL_NO_USRQUOTA  = 0x00000100, /* the object's owner is over quota */
+	OBD_FL_NO_GRPQUOTA  = 0x00000200, /* the object's group is over quota */
+	OBD_FL_CREATE_CROW  = 0x00000400, /* object should be create on write */
+	OBD_FL_SRVLOCK      = 0x00000800, /* delegate DLM locking to server */
+	OBD_FL_CKSUM_CRC32  = 0x00001000, /* CRC32 checksum type */
+	OBD_FL_CKSUM_ADLER  = 0x00002000, /* ADLER checksum type */
+	OBD_FL_CKSUM_CRC32C = 0x00004000, /* CRC32C checksum type */
+	OBD_FL_CKSUM_RSVD2  = 0x00008000, /* for future cksum types */
+	OBD_FL_CKSUM_RSVD3  = 0x00010000, /* for future cksum types */
+	OBD_FL_SHRINK_GRANT = 0x00020000, /* object shrink the grant */
+	OBD_FL_MMAP	 = 0x00040000, /* object is mmapped on the client.
+					   * XXX: obsoleted - reserved for old
+					   * clients prior than 2.2 */
+	OBD_FL_RECOV_RESEND = 0x00080000, /* recoverable resent */
+	OBD_FL_NOSPC_BLK    = 0x00100000, /* no more block space on OST */
+
+	/* Note that while these checksum values are currently separate bits,
+	 * in 2.x we can actually allow all values from 1-31 if we wanted. */
+	OBD_FL_CKSUM_ALL    = OBD_FL_CKSUM_CRC32 | OBD_FL_CKSUM_ADLER |
+			      OBD_FL_CKSUM_CRC32C,
+
+	/* mask for local-only flag, which won't be sent over network */
+	OBD_FL_LOCAL_MASK   = 0xF0000000,
+};
+
+#define LOV_MAGIC_V1      0x0BD10BD0
+#define LOV_MAGIC	 LOV_MAGIC_V1
+#define LOV_MAGIC_JOIN_V1 0x0BD20BD0
+#define LOV_MAGIC_V3      0x0BD30BD0
+
+/*
+ * magic for fully defined striping
+ * the idea is that we should have different magics for striping "hints"
+ * (struct lov_user_md_v[13]) and defined ready-to-use striping (struct
+ * lov_mds_md_v[13]). at the moment the magics are used in wire protocol,
+ * we can't just change it w/o long way preparation, but we still need a
+ * mechanism to allow LOD to differentiate hint versus ready striping.
+ * so, at the moment we do a trick: MDT knows what to expect from request
+ * depending on the case (replay uses ready striping, non-replay req uses
+ * hints), so MDT replaces magic with appropriate one and now LOD can
+ * easily understand what's inside -bzzz
+ */
+#define LOV_MAGIC_V1_DEF  0x0CD10BD0
+#define LOV_MAGIC_V3_DEF  0x0CD30BD0
+
+#define LOV_PATTERN_RAID0 0x001   /* stripes are used round-robin */
+#define LOV_PATTERN_RAID1 0x002   /* stripes are mirrors of each other */
+#define LOV_PATTERN_FIRST 0x100   /* first stripe is not in round-robin */
+#define LOV_PATTERN_CMOBD 0x200
+
+#define lov_ost_data lov_ost_data_v1
+struct lov_ost_data_v1 {	  /* per-stripe data structure (little-endian)*/
+	struct ost_id l_ost_oi;	  /* OST object ID */
+	__u32 l_ost_gen;	  /* generation of this l_ost_idx */
+	__u32 l_ost_idx;	  /* OST index in LOV (lov_tgt_desc->tgts) */
+};
+
+#define lov_mds_md lov_mds_md_v1
+struct lov_mds_md_v1 {	    /* LOV EA mds/wire data (little-endian) */
+	__u32 lmm_magic;	  /* magic number = LOV_MAGIC_V1 */
+	__u32 lmm_pattern;	/* LOV_PATTERN_RAID0, LOV_PATTERN_RAID1 */
+	struct ost_id	lmm_oi;	  /* LOV object ID */
+	__u32 lmm_stripe_size;    /* size of stripe in bytes */
+	/* lmm_stripe_count used to be __u32 */
+	__u16 lmm_stripe_count;   /* num stripes in use for this object */
+	__u16 lmm_layout_gen;     /* layout generation number */
+	struct lov_ost_data_v1 lmm_objects[0]; /* per-stripe data */
+};
+
+/**
+ * Sigh, because pre-2.4 uses
+ * struct lov_mds_md_v1 {
+ *	........
+ *	__u64 lmm_object_id;
+ *	__u64 lmm_object_seq;
+ *      ......
+ *      }
+ * to identify the LOV(MDT) object, and lmm_object_seq will
+ * be normal_fid, which make it hard to combine these conversion
+ * to ostid_to FID. so we will do lmm_oi/fid conversion separately
+ *
+ * We can tell the lmm_oi by this way,
+ * 1.8: lmm_object_id = {inode}, lmm_object_gr = 0
+ * 2.1: lmm_object_id = {oid < 128k}, lmm_object_seq = FID_SEQ_NORMAL
+ * 2.4: lmm_oi.f_seq = FID_SEQ_NORMAL, lmm_oi.f_oid = {oid < 128k},
+ *      lmm_oi.f_ver = 0
+ *
+ * But currently lmm_oi/lsm_oi does not have any "real" usages,
+ * except for printing some information, and the user can always
+ * get the real FID from LMA, besides this multiple case check might
+ * make swab more complicate. So we will keep using id/seq for lmm_oi.
+ */
+
+static inline void fid_to_lmm_oi(const struct lu_fid *fid,
+				 struct ost_id *oi)
+{
+	oi->oi.oi_id = fid_oid(fid);
+	oi->oi.oi_seq = fid_seq(fid);
+}
+
+static inline void lmm_oi_set_seq(struct ost_id *oi, __u64 seq)
+{
+	oi->oi.oi_seq = seq;
+}
+
+static inline __u64 lmm_oi_id(struct ost_id *oi)
+{
+	return oi->oi.oi_id;
+}
+
+static inline __u64 lmm_oi_seq(struct ost_id *oi)
+{
+	return oi->oi.oi_seq;
+}
+
+static inline void lmm_oi_le_to_cpu(struct ost_id *dst_oi,
+				    struct ost_id *src_oi)
+{
+	dst_oi->oi.oi_id = le64_to_cpu(src_oi->oi.oi_id);
+	dst_oi->oi.oi_seq = le64_to_cpu(src_oi->oi.oi_seq);
+}
+
+static inline void lmm_oi_cpu_to_le(struct ost_id *dst_oi,
+				    struct ost_id *src_oi)
+{
+	dst_oi->oi.oi_id = cpu_to_le64(src_oi->oi.oi_id);
+	dst_oi->oi.oi_seq = cpu_to_le64(src_oi->oi.oi_seq);
+}
+
+/* extern void lustre_swab_lov_mds_md(struct lov_mds_md *llm); */
+
+#define MAX_MD_SIZE (sizeof(struct lov_mds_md) + 4 * sizeof(struct lov_ost_data))
+#define MIN_MD_SIZE (sizeof(struct lov_mds_md) + 1 * sizeof(struct lov_ost_data))
+
+#define XATTR_NAME_ACL_ACCESS   "system.posix_acl_access"
+#define XATTR_NAME_ACL_DEFAULT  "system.posix_acl_default"
+#define XATTR_USER_PREFIX       "user."
+#define XATTR_TRUSTED_PREFIX    "trusted."
+#define XATTR_SECURITY_PREFIX   "security."
+#define XATTR_LUSTRE_PREFIX     "lustre."
+
+#define XATTR_NAME_LOV	  "trusted.lov"
+#define XATTR_NAME_LMA	  "trusted.lma"
+#define XATTR_NAME_LMV	  "trusted.lmv"
+#define XATTR_NAME_LINK	 "trusted.link"
+#define XATTR_NAME_FID	  "trusted.fid"
+#define XATTR_NAME_VERSION      "trusted.version"
+#define XATTR_NAME_SOM		"trusted.som"
+#define XATTR_NAME_HSM		"trusted.hsm"
+#define XATTR_NAME_LFSCK_NAMESPACE "trusted.lfsck_namespace"
+
+struct lov_mds_md_v3 {	    /* LOV EA mds/wire data (little-endian) */
+	__u32 lmm_magic;	  /* magic number = LOV_MAGIC_V3 */
+	__u32 lmm_pattern;	/* LOV_PATTERN_RAID0, LOV_PATTERN_RAID1 */
+	struct ost_id	lmm_oi;	  /* LOV object ID */
+	__u32 lmm_stripe_size;    /* size of stripe in bytes */
+	/* lmm_stripe_count used to be __u32 */
+	__u16 lmm_stripe_count;   /* num stripes in use for this object */
+	__u16 lmm_layout_gen;     /* layout generation number */
+	char  lmm_pool_name[LOV_MAXPOOLNAME]; /* must be 32bit aligned */
+	struct lov_ost_data_v1 lmm_objects[0]; /* per-stripe data */
+};
+
+#define OBD_MD_FLID	(0x00000001ULL) /* object ID */
+#define OBD_MD_FLATIME     (0x00000002ULL) /* access time */
+#define OBD_MD_FLMTIME     (0x00000004ULL) /* data modification time */
+#define OBD_MD_FLCTIME     (0x00000008ULL) /* change time */
+#define OBD_MD_FLSIZE      (0x00000010ULL) /* size */
+#define OBD_MD_FLBLOCKS    (0x00000020ULL) /* allocated blocks count */
+#define OBD_MD_FLBLKSZ     (0x00000040ULL) /* block size */
+#define OBD_MD_FLMODE      (0x00000080ULL) /* access bits (mode & ~S_IFMT) */
+#define OBD_MD_FLTYPE      (0x00000100ULL) /* object type (mode & S_IFMT) */
+#define OBD_MD_FLUID       (0x00000200ULL) /* user ID */
+#define OBD_MD_FLGID       (0x00000400ULL) /* group ID */
+#define OBD_MD_FLFLAGS     (0x00000800ULL) /* flags word */
+#define OBD_MD_FLNLINK     (0x00002000ULL) /* link count */
+#define OBD_MD_FLGENER     (0x00004000ULL) /* generation number */
+/*#define OBD_MD_FLINLINE    (0x00008000ULL)  inline data. used until 1.6.5 */
+#define OBD_MD_FLRDEV      (0x00010000ULL) /* device number */
+#define OBD_MD_FLEASIZE    (0x00020000ULL) /* extended attribute data */
+#define OBD_MD_LINKNAME    (0x00040000ULL) /* symbolic link target */
+#define OBD_MD_FLHANDLE    (0x00080000ULL) /* file/lock handle */
+#define OBD_MD_FLCKSUM     (0x00100000ULL) /* bulk data checksum */
+#define OBD_MD_FLQOS       (0x00200000ULL) /* quality of service stats */
+/*#define OBD_MD_FLOSCOPQ    (0x00400000ULL) osc opaque data, never used */
+#define OBD_MD_FLCOOKIE    (0x00800000ULL) /* log cancellation cookie */
+#define OBD_MD_FLGROUP     (0x01000000ULL) /* group */
+#define OBD_MD_FLFID       (0x02000000ULL) /* ->ost write inline fid */
+#define OBD_MD_FLEPOCH     (0x04000000ULL) /* ->ost write with ioepoch */
+					   /* ->mds if epoch opens or closes */
+#define OBD_MD_FLGRANT     (0x08000000ULL) /* ost preallocation space grant */
+#define OBD_MD_FLDIREA     (0x10000000ULL) /* dir's extended attribute data */
+#define OBD_MD_FLUSRQUOTA  (0x20000000ULL) /* over quota flags sent from ost */
+#define OBD_MD_FLGRPQUOTA  (0x40000000ULL) /* over quota flags sent from ost */
+#define OBD_MD_FLMODEASIZE (0x80000000ULL) /* EA size will be changed */
+
+#define OBD_MD_MDS	 (0x0000000100000000ULL) /* where an inode lives on */
+#define OBD_MD_REINT       (0x0000000200000000ULL) /* reintegrate oa */
+#define OBD_MD_MEA	 (0x0000000400000000ULL) /* CMD split EA  */
+
+/* OBD_MD_MDTIDX is used to get MDT index, but it is never been used overwire,
+ * and it is already obsolete since 2.3 */
+/* #define OBD_MD_MDTIDX      (0x0000000800000000ULL) */
+
+#define OBD_MD_FLXATTR       (0x0000001000000000ULL) /* xattr */
+#define OBD_MD_FLXATTRLS     (0x0000002000000000ULL) /* xattr list */
+#define OBD_MD_FLXATTRRM     (0x0000004000000000ULL) /* xattr remove */
+#define OBD_MD_FLACL	 (0x0000008000000000ULL) /* ACL */
+#define OBD_MD_FLRMTPERM     (0x0000010000000000ULL) /* remote permission */
+#define OBD_MD_FLMDSCAPA     (0x0000020000000000ULL) /* MDS capability */
+#define OBD_MD_FLOSSCAPA     (0x0000040000000000ULL) /* OSS capability */
+#define OBD_MD_FLCKSPLIT     (0x0000080000000000ULL) /* Check split on server */
+#define OBD_MD_FLCROSSREF    (0x0000100000000000ULL) /* Cross-ref case */
+#define OBD_MD_FLGETATTRLOCK (0x0000200000000000ULL) /* Get IOEpoch attributes
+						      * under lock */
+#define OBD_MD_FLOBJCOUNT    (0x0000400000000000ULL) /* for multiple destroy */
+
+#define OBD_MD_FLRMTLSETFACL (0x0001000000000000ULL) /* lfs lsetfacl case */
+#define OBD_MD_FLRMTLGETFACL (0x0002000000000000ULL) /* lfs lgetfacl case */
+#define OBD_MD_FLRMTRSETFACL (0x0004000000000000ULL) /* lfs rsetfacl case */
+#define OBD_MD_FLRMTRGETFACL (0x0008000000000000ULL) /* lfs rgetfacl case */
+
+#define OBD_MD_FLDATAVERSION (0x0010000000000000ULL) /* iversion sum */
+
+#define OBD_MD_FLGETATTR (OBD_MD_FLID    | OBD_MD_FLATIME | OBD_MD_FLMTIME | \
+			  OBD_MD_FLCTIME | OBD_MD_FLSIZE  | OBD_MD_FLBLKSZ | \
+			  OBD_MD_FLMODE  | OBD_MD_FLTYPE  | OBD_MD_FLUID   | \
+			  OBD_MD_FLGID   | OBD_MD_FLFLAGS | OBD_MD_FLNLINK | \
+			  OBD_MD_FLGENER | OBD_MD_FLRDEV  | OBD_MD_FLGROUP)
+
+/* don't forget obdo_fid which is way down at the bottom so it can
+ * come after the definition of llog_cookie */
+
+enum hss_valid {
+	HSS_SETMASK	= 0x01,
+	HSS_CLEARMASK	= 0x02,
+	HSS_ARCHIVE_ID	= 0x04,
+};
+
+struct hsm_state_set {
+	__u32	hss_valid;
+	__u32	hss_archive_id;
+	__u64	hss_setmask;
+	__u64	hss_clearmask;
+};
+
+extern void lustre_swab_hsm_user_state(struct hsm_user_state *hus);
+extern void lustre_swab_hsm_state_set(struct hsm_state_set *hss);
+
+extern void lustre_swab_obd_statfs (struct obd_statfs *os);
+
+/* ost_body.data values for OST_BRW */
+
+#define OBD_BRW_READ	    0x01
+#define OBD_BRW_WRITE	   0x02
+#define OBD_BRW_RWMASK	  (OBD_BRW_READ | OBD_BRW_WRITE)
+#define OBD_BRW_SYNC	    0x08 /* this page is a part of synchronous
+				      * transfer and is not accounted in
+				      * the grant. */
+#define OBD_BRW_CHECK	   0x10
+#define OBD_BRW_FROM_GRANT      0x20 /* the osc manages this under llite */
+#define OBD_BRW_GRANTED	 0x40 /* the ost manages this */
+#define OBD_BRW_NOCACHE	 0x80 /* this page is a part of non-cached IO */
+#define OBD_BRW_NOQUOTA	0x100
+#define OBD_BRW_SRVLOCK	0x200 /* Client holds no lock over this page */
+#define OBD_BRW_ASYNC	  0x400 /* Server may delay commit to disk */
+#define OBD_BRW_MEMALLOC       0x800 /* Client runs in the "kswapd" context */
+#define OBD_BRW_OVER_USRQUOTA 0x1000 /* Running out of user quota */
+#define OBD_BRW_OVER_GRPQUOTA 0x2000 /* Running out of group quota */
+
+#define OBD_OBJECT_EOF 0xffffffffffffffffULL
+
+#define OST_MIN_PRECREATE 32
+#define OST_MAX_PRECREATE 20000
+
+struct obd_ioobj {
+	struct ost_id	ioo_oid;	/* object ID, if multi-obj BRW */
+	__u32		ioo_max_brw;	/* low 16 bits were o_mode before 2.4,
+					 * now (PTLRPC_BULK_OPS_COUNT - 1) in
+					 * high 16 bits in 2.4 and later */
+	__u32		ioo_bufcnt;	/* number of niobufs for this object */
+};
+
+#define IOOBJ_MAX_BRW_BITS	16
+#define IOOBJ_TYPE_MASK		((1U << IOOBJ_MAX_BRW_BITS) - 1)
+#define ioobj_max_brw_get(ioo)	(((ioo)->ioo_max_brw >> IOOBJ_MAX_BRW_BITS) + 1)
+#define ioobj_max_brw_set(ioo, num)					\
+do { (ioo)->ioo_max_brw = ((num) - 1) << IOOBJ_MAX_BRW_BITS; } while (0)
+
+extern void lustre_swab_obd_ioobj (struct obd_ioobj *ioo);
+
+/* multiple of 8 bytes => can array */
+struct niobuf_remote {
+	__u64 offset;
+	__u32 len;
+	__u32 flags;
+};
+
+extern void lustre_swab_niobuf_remote (struct niobuf_remote *nbr);
+
+/* lock value block communicated between the filter and llite */
+
+/* OST_LVB_ERR_INIT is needed because the return code in rc is
+ * negative, i.e. because ((MASK + rc) & MASK) != MASK. */
+#define OST_LVB_ERR_INIT 0xffbadbad80000000ULL
+#define OST_LVB_ERR_MASK 0xffbadbad00000000ULL
+#define OST_LVB_IS_ERR(blocks)					  \
+	((blocks & OST_LVB_ERR_MASK) == OST_LVB_ERR_MASK)
+#define OST_LVB_SET_ERR(blocks, rc)				     \
+	do { blocks = OST_LVB_ERR_INIT + rc; } while (0)
+#define OST_LVB_GET_ERR(blocks)    (int)(blocks - OST_LVB_ERR_INIT)
+
+struct ost_lvb_v1 {
+	__u64		lvb_size;
+	obd_time	lvb_mtime;
+	obd_time	lvb_atime;
+	obd_time	lvb_ctime;
+	__u64		lvb_blocks;
+};
+
+extern void lustre_swab_ost_lvb_v1(struct ost_lvb_v1 *lvb);
+
+struct ost_lvb {
+	__u64		lvb_size;
+	obd_time	lvb_mtime;
+	obd_time	lvb_atime;
+	obd_time	lvb_ctime;
+	__u64		lvb_blocks;
+	__u32		lvb_mtime_ns;
+	__u32		lvb_atime_ns;
+	__u32		lvb_ctime_ns;
+	__u32		lvb_padding;
+};
+
+extern void lustre_swab_ost_lvb(struct ost_lvb *lvb);
+
+/*
+ *   lquota data structures
+ */
+
+#ifndef QUOTABLOCK_BITS
+#define QUOTABLOCK_BITS 10
+#endif
+
+#ifndef QUOTABLOCK_SIZE
+#define QUOTABLOCK_SIZE (1 << QUOTABLOCK_BITS)
+#endif
+
+#ifndef toqb
+#define toqb(x) (((x) + QUOTABLOCK_SIZE - 1) >> QUOTABLOCK_BITS)
+#endif
+
+/* The lquota_id structure is an union of all the possible identifier types that
+ * can be used with quota, this includes:
+ * - 64-bit user ID
+ * - 64-bit group ID
+ * - a FID which can be used for per-directory quota in the future */
+union lquota_id {
+	struct lu_fid	qid_fid; /* FID for per-directory quota */
+	__u64		qid_uid; /* user identifier */
+	__u64		qid_gid; /* group identifier */
+};
+
+/* quotactl management */
+struct obd_quotactl {
+	__u32			qc_cmd;
+	__u32			qc_type; /* see Q_* flag below */
+	__u32			qc_id;
+	__u32			qc_stat;
+	struct obd_dqinfo	qc_dqinfo;
+	struct obd_dqblk	qc_dqblk;
+};
+
+extern void lustre_swab_obd_quotactl(struct obd_quotactl *q);
+
+#define Q_QUOTACHECK	0x800100 /* deprecated as of 2.4 */
+#define Q_INITQUOTA	0x800101 /* deprecated as of 2.4  */
+#define Q_GETOINFO	0x800102 /* get obd quota info */
+#define Q_GETOQUOTA	0x800103 /* get obd quotas */
+#define Q_FINVALIDATE	0x800104 /* deprecated as of 2.4 */
+
+#define Q_COPY(out, in, member) (out)->member = (in)->member
+
+#define QCTL_COPY(out, in)		\
+do {					\
+	Q_COPY(out, in, qc_cmd);	\
+	Q_COPY(out, in, qc_type);	\
+	Q_COPY(out, in, qc_id);		\
+	Q_COPY(out, in, qc_stat);	\
+	Q_COPY(out, in, qc_dqinfo);	\
+	Q_COPY(out, in, qc_dqblk);	\
+} while (0)
+
+/* Body of quota request used for quota acquire/release RPCs between quota
+ * master (aka QMT) and slaves (ak QSD). */
+struct quota_body {
+	struct lu_fid	qb_fid;     /* FID of global index packing the pool ID
+				      * and type (data or metadata) as well as
+				      * the quota type (user or group). */
+	union lquota_id	qb_id;      /* uid or gid or directory FID */
+	__u32		qb_flags;   /* see below */
+	__u32		qb_padding;
+	__u64		qb_count;   /* acquire/release count (kbytes/inodes) */
+	__u64		qb_usage;   /* current slave usage (kbytes/inodes) */
+	__u64		qb_slv_ver; /* slave index file version */
+	struct lustre_handle	qb_lockh;     /* per-ID lock handle */
+	struct lustre_handle	qb_glb_lockh; /* global lock handle */
+	__u64		qb_padding1[4];
+};
+
+/* When the quota_body is used in the reply of quota global intent
+ * lock (IT_QUOTA_CONN) reply, qb_fid contains slave index file FID. */
+#define qb_slv_fid	qb_fid
+/* qb_usage is the current qunit (in kbytes/inodes) when quota_body is used in
+ * quota reply */
+#define qb_qunit	qb_usage
+
+#define QUOTA_DQACQ_FL_ACQ	0x1  /* acquire quota */
+#define QUOTA_DQACQ_FL_PREACQ	0x2  /* pre-acquire */
+#define QUOTA_DQACQ_FL_REL	0x4  /* release quota */
+#define QUOTA_DQACQ_FL_REPORT	0x8  /* report usage */
+
+extern void lustre_swab_quota_body(struct quota_body *b);
+
+/* Quota types currently supported */
+enum {
+	LQUOTA_TYPE_USR	= 0x00, /* maps to USRQUOTA */
+	LQUOTA_TYPE_GRP	= 0x01, /* maps to GRPQUOTA */
+	LQUOTA_TYPE_MAX
+};
+
+/* There are 2 different resource types on which a quota limit can be enforced:
+ * - inodes on the MDTs
+ * - blocks on the OSTs */
+enum {
+	LQUOTA_RES_MD		= 0x01, /* skip 0 to avoid null oid in FID */
+	LQUOTA_RES_DT		= 0x02,
+	LQUOTA_LAST_RES,
+	LQUOTA_FIRST_RES	= LQUOTA_RES_MD
+};
+#define LQUOTA_NR_RES (LQUOTA_LAST_RES - LQUOTA_FIRST_RES + 1)
+
+/*
+ * Space accounting support
+ * Format of an accounting record, providing disk usage information for a given
+ * user or group
+ */
+struct lquota_acct_rec { /* 16 bytes */
+	__u64 bspace;  /* current space in use */
+	__u64 ispace;  /* current # inodes in use */
+};
+
+/*
+ * Global quota index support
+ * Format of a global record, providing global quota settings for a given quota
+ * identifier
+ */
+struct lquota_glb_rec { /* 32 bytes */
+	__u64 qbr_hardlimit; /* quota hard limit, in #inodes or kbytes */
+	__u64 qbr_softlimit; /* quota soft limit, in #inodes or kbytes */
+	__u64 qbr_time;      /* grace time, in seconds */
+	__u64 qbr_granted;   /* how much is granted to slaves, in #inodes or
+			      * kbytes */
+};
+
+/*
+ * Slave index support
+ * Format of a slave record, recording how much space is granted to a given
+ * slave
+ */
+struct lquota_slv_rec { /* 8 bytes */
+	__u64 qsr_granted; /* space granted to the slave for the key=ID,
+			    * in #inodes or kbytes */
+};
+
+/* Data structures associated with the quota locks */
+
+/* Glimpse descriptor used for the index & per-ID quota locks */
+struct ldlm_gl_lquota_desc {
+	union lquota_id	gl_id;    /* quota ID subject to the glimpse */
+	__u64		gl_flags; /* see LQUOTA_FL* below */
+	__u64		gl_ver;   /* new index version */
+	__u64		gl_hardlimit; /* new hardlimit or qunit value */
+	__u64		gl_softlimit; /* new softlimit */
+	__u64		gl_time;
+	__u64		gl_pad2;
+};
+#define gl_qunit	gl_hardlimit /* current qunit value used when
+				      * glimpsing per-ID quota locks */
+
+/* quota glimpse flags */
+#define LQUOTA_FL_EDQUOT 0x1 /* user/group out of quota space on QMT */
+
+/* LVB used with quota (global and per-ID) locks */
+struct lquota_lvb {
+	__u64	lvb_flags;	/* see LQUOTA_FL* above */
+	__u64	lvb_id_may_rel; /* space that might be released later */
+	__u64	lvb_id_rel;     /* space released by the slave for this ID */
+	__u64	lvb_id_qunit;   /* current qunit value */
+	__u64	lvb_pad1;
+};
+
+extern void lustre_swab_lquota_lvb(struct lquota_lvb *lvb);
+
+/* LVB used with global quota lock */
+#define lvb_glb_ver  lvb_id_may_rel /* current version of the global index */
+
+/* op codes */
+typedef enum {
+	QUOTA_DQACQ	= 601,
+	QUOTA_DQREL	= 602,
+	QUOTA_LAST_OPC
+} quota_cmd_t;
+#define QUOTA_FIRST_OPC	QUOTA_DQACQ
+
+/*
+ *   MDS REQ RECORDS
+ */
+
+/* opcodes */
+typedef enum {
+	MDS_GETATTR		= 33,
+	MDS_GETATTR_NAME	= 34,
+	MDS_CLOSE		= 35,
+	MDS_REINT		= 36,
+	MDS_READPAGE		= 37,
+	MDS_CONNECT		= 38,
+	MDS_DISCONNECT		= 39,
+	MDS_GETSTATUS		= 40,
+	MDS_STATFS		= 41,
+	MDS_PIN			= 42,
+	MDS_UNPIN		= 43,
+	MDS_SYNC		= 44,
+	MDS_DONE_WRITING	= 45,
+	MDS_SET_INFO		= 46,
+	MDS_QUOTACHECK		= 47,
+	MDS_QUOTACTL		= 48,
+	MDS_GETXATTR		= 49,
+	MDS_SETXATTR		= 50, /* obsolete, now it's MDS_REINT op */
+	MDS_WRITEPAGE		= 51,
+	MDS_IS_SUBDIR		= 52,
+	MDS_GET_INFO		= 53,
+	MDS_HSM_STATE_GET	= 54,
+	MDS_HSM_STATE_SET	= 55,
+	MDS_HSM_ACTION		= 56,
+	MDS_HSM_PROGRESS	= 57,
+	MDS_HSM_REQUEST		= 58,
+	MDS_HSM_CT_REGISTER	= 59,
+	MDS_HSM_CT_UNREGISTER	= 60,
+	MDS_SWAP_LAYOUTS	= 61,
+	MDS_LAST_OPC
+} mds_cmd_t;
+
+#define MDS_FIRST_OPC    MDS_GETATTR
+
+
+/* opcodes for object update */
+typedef enum {
+	UPDATE_OBJ	= 1000,
+	UPDATE_LAST_OPC
+} update_cmd_t;
+
+#define UPDATE_FIRST_OPC    UPDATE_OBJ
+
+/*
+ * Do not exceed 63
+ */
+
+typedef enum {
+	REINT_SETATTR  = 1,
+	REINT_CREATE   = 2,
+	REINT_LINK     = 3,
+	REINT_UNLINK   = 4,
+	REINT_RENAME   = 5,
+	REINT_OPEN     = 6,
+	REINT_SETXATTR = 7,
+	REINT_RMENTRY  = 8,
+//      REINT_WRITE    = 9,
+	REINT_MAX
+} mds_reint_t, mdt_reint_t;
+
+extern void lustre_swab_generic_32s (__u32 *val);
+
+/* the disposition of the intent outlines what was executed */
+#define DISP_IT_EXECD	0x00000001
+#define DISP_LOOKUP_EXECD    0x00000002
+#define DISP_LOOKUP_NEG      0x00000004
+#define DISP_LOOKUP_POS      0x00000008
+#define DISP_OPEN_CREATE     0x00000010
+#define DISP_OPEN_OPEN       0x00000020
+#define DISP_ENQ_COMPLETE    0x00400000
+#define DISP_ENQ_OPEN_REF    0x00800000
+#define DISP_ENQ_CREATE_REF  0x01000000
+#define DISP_OPEN_LOCK       0x02000000
+
+/* INODE LOCK PARTS */
+#define MDS_INODELOCK_LOOKUP 0x000001       /* dentry, mode, owner, group */
+#define MDS_INODELOCK_UPDATE 0x000002       /* size, links, timestamps */
+#define MDS_INODELOCK_OPEN   0x000004       /* For opened files */
+#define MDS_INODELOCK_LAYOUT 0x000008       /* for layout */
+#define MDS_INODELOCK_PERM   0x000010       /* for permission */
+
+#define MDS_INODELOCK_MAXSHIFT 4
+/* This FULL lock is useful to take on unlink sort of operations */
+#define MDS_INODELOCK_FULL ((1<<(MDS_INODELOCK_MAXSHIFT+1))-1)
+
+extern void lustre_swab_ll_fid (struct ll_fid *fid);
+
+/* NOTE: until Lustre 1.8.7/2.1.1 the fid_ver() was packed into name[2],
+ * but was moved into name[1] along with the OID to avoid consuming the
+ * name[2,3] fields that need to be used for the quota id (also a FID). */
+enum {
+	LUSTRE_RES_ID_SEQ_OFF = 0,
+	LUSTRE_RES_ID_VER_OID_OFF = 1,
+	LUSTRE_RES_ID_WAS_VER_OFF = 2, /* see note above */
+	LUSTRE_RES_ID_QUOTA_SEQ_OFF = 2,
+	LUSTRE_RES_ID_QUOTA_VER_OID_OFF = 3,
+	LUSTRE_RES_ID_HSH_OFF = 3
+};
+
+#define MDS_STATUS_CONN 1
+#define MDS_STATUS_LOV 2
+
+/* mdt_thread_info.mti_flags. */
+enum md_op_flags {
+	/* The flag indicates Size-on-MDS attributes are changed. */
+	MF_SOM_CHANGE	   = (1 << 0),
+	/* Flags indicates an epoch opens or closes. */
+	MF_EPOCH_OPEN	   = (1 << 1),
+	MF_EPOCH_CLOSE	  = (1 << 2),
+	MF_MDC_CANCEL_FID1      = (1 << 3),
+	MF_MDC_CANCEL_FID2      = (1 << 4),
+	MF_MDC_CANCEL_FID3      = (1 << 5),
+	MF_MDC_CANCEL_FID4      = (1 << 6),
+	/* There is a pending attribute update. */
+	MF_SOM_AU	       = (1 << 7),
+	/* Cancel OST locks while getattr OST attributes. */
+	MF_GETATTR_LOCK	 = (1 << 8),
+	MF_GET_MDT_IDX	  = (1 << 9),
+};
+
+#define MF_SOM_LOCAL_FLAGS (MF_SOM_CHANGE | MF_EPOCH_OPEN | MF_EPOCH_CLOSE)
+
+#define LUSTRE_BFLAG_UNCOMMITTED_WRITES   0x1
+
+/* these should be identical to their EXT4_*_FL counterparts, they are
+ * redefined here only to avoid dragging in fs/ext4/ext4.h */
+#define LUSTRE_SYNC_FL	 0x00000008 /* Synchronous updates */
+#define LUSTRE_IMMUTABLE_FL    0x00000010 /* Immutable file */
+#define LUSTRE_APPEND_FL       0x00000020 /* writes to file may only append */
+#define LUSTRE_NOATIME_FL      0x00000080 /* do not update atime */
+#define LUSTRE_DIRSYNC_FL      0x00010000 /* dirsync behaviour (dir only) */
+
+/* Convert wire LUSTRE_*_FL to corresponding client local VFS S_* values
+ * for the client inode i_flags.  The LUSTRE_*_FL are the Lustre wire
+ * protocol equivalents of LDISKFS_*_FL values stored on disk, while
+ * the S_* flags are kernel-internal values that change between kernel
+ * versions.  These flags are set/cleared via FSFILT_IOC_{GET,SET}_FLAGS.
+ * See b=16526 for a full history. */
+static inline int ll_ext_to_inode_flags(int flags)
+{
+	return (((flags & LUSTRE_SYNC_FL)      ? S_SYNC      : 0) |
+		((flags & LUSTRE_NOATIME_FL)   ? S_NOATIME   : 0) |
+		((flags & LUSTRE_APPEND_FL)    ? S_APPEND    : 0) |
+#if defined(S_DIRSYNC)
+		((flags & LUSTRE_DIRSYNC_FL)   ? S_DIRSYNC   : 0) |
+#endif
+		((flags & LUSTRE_IMMUTABLE_FL) ? S_IMMUTABLE : 0));
+}
+
+static inline int ll_inode_to_ext_flags(int iflags)
+{
+	return (((iflags & S_SYNC)      ? LUSTRE_SYNC_FL      : 0) |
+		((iflags & S_NOATIME)   ? LUSTRE_NOATIME_FL   : 0) |
+		((iflags & S_APPEND)    ? LUSTRE_APPEND_FL    : 0) |
+#if defined(S_DIRSYNC)
+		((iflags & S_DIRSYNC)   ? LUSTRE_DIRSYNC_FL   : 0) |
+#endif
+		((iflags & S_IMMUTABLE) ? LUSTRE_IMMUTABLE_FL : 0));
+}
+
+struct mdt_body {
+	struct lu_fid  fid1;
+	struct lu_fid  fid2;
+	struct lustre_handle handle;
+	__u64	  valid;
+	__u64	  size;   /* Offset, in the case of MDS_READPAGE */
+       obd_time	mtime;
+       obd_time	atime;
+       obd_time	ctime;
+	__u64	  blocks; /* XID, in the case of MDS_READPAGE */
+	__u64	  ioepoch;
+	__u64	       unused1; /* was "ino" until 2.4.0 */
+	__u32	  fsuid;
+	__u32	  fsgid;
+	__u32	  capability;
+	__u32	  mode;
+	__u32	  uid;
+	__u32	  gid;
+	__u32	  flags; /* from vfs for pin/unpin, LUSTRE_BFLAG close */
+	__u32	  rdev;
+	__u32	  nlink; /* #bytes to read in the case of MDS_READPAGE */
+	__u32	       unused2; /* was "generation" until 2.4.0 */
+	__u32	  suppgid;
+	__u32	  eadatasize;
+	__u32	  aclsize;
+	__u32	  max_mdsize;
+	__u32	  max_cookiesize;
+	__u32	  uid_h; /* high 32-bits of uid, for FUID */
+	__u32	  gid_h; /* high 32-bits of gid, for FUID */
+	__u32	  padding_5; /* also fix lustre_swab_mdt_body */
+	__u64	  padding_6;
+	__u64	  padding_7;
+	__u64	  padding_8;
+	__u64	  padding_9;
+	__u64	  padding_10;
+}; /* 216 */
+
+extern void lustre_swab_mdt_body (struct mdt_body *b);
+
+struct mdt_ioepoch {
+	struct lustre_handle handle;
+	__u64  ioepoch;
+	__u32  flags;
+	__u32  padding;
+};
+
+extern void lustre_swab_mdt_ioepoch (struct mdt_ioepoch *b);
+
+/* permissions for md_perm.mp_perm */
+enum {
+	CFS_SETUID_PERM = 0x01,
+	CFS_SETGID_PERM = 0x02,
+	CFS_SETGRP_PERM = 0x04,
+	CFS_RMTACL_PERM = 0x08,
+	CFS_RMTOWN_PERM = 0x10
+};
+
+/* inode access permission for remote user, the inode info are omitted,
+ * for client knows them. */
+struct mdt_remote_perm {
+	__u32	   rp_uid;
+	__u32	   rp_gid;
+	__u32	   rp_fsuid;
+	__u32	   rp_fsuid_h;
+	__u32	   rp_fsgid;
+	__u32	   rp_fsgid_h;
+	__u32	   rp_access_perm; /* MAY_READ/WRITE/EXEC */
+	__u32	   rp_padding;
+};
+
+extern void lustre_swab_mdt_remote_perm(struct mdt_remote_perm *p);
+
+struct mdt_rec_setattr {
+	__u32	   sa_opcode;
+	__u32	   sa_cap;
+	__u32	   sa_fsuid;
+	__u32	   sa_fsuid_h;
+	__u32	   sa_fsgid;
+	__u32	   sa_fsgid_h;
+	__u32	   sa_suppgid;
+	__u32	   sa_suppgid_h;
+	__u32	   sa_padding_1;
+	__u32	   sa_padding_1_h;
+	struct lu_fid   sa_fid;
+	__u64	   sa_valid;
+	__u32	   sa_uid;
+	__u32	   sa_gid;
+	__u64	   sa_size;
+	__u64	   sa_blocks;
+	obd_time	sa_mtime;
+	obd_time	sa_atime;
+	obd_time	sa_ctime;
+	__u32	   sa_attr_flags;
+	__u32	   sa_mode;
+	__u32	   sa_bias;      /* some operation flags */
+	__u32	   sa_padding_3;
+	__u32	   sa_padding_4;
+	__u32	   sa_padding_5;
+};
+
+extern void lustre_swab_mdt_rec_setattr (struct mdt_rec_setattr *sa);
+
+/*
+ * Attribute flags used in mdt_rec_setattr::sa_valid.
+ * The kernel's #defines for ATTR_* should not be used over the network
+ * since the client and MDS may run different kernels (see bug 13828)
+ * Therefore, we should only use MDS_ATTR_* attributes for sa_valid.
+ */
+#define MDS_ATTR_MODE	  0x1ULL /* = 1 */
+#define MDS_ATTR_UID	   0x2ULL /* = 2 */
+#define MDS_ATTR_GID	   0x4ULL /* = 4 */
+#define MDS_ATTR_SIZE	  0x8ULL /* = 8 */
+#define MDS_ATTR_ATIME	0x10ULL /* = 16 */
+#define MDS_ATTR_MTIME	0x20ULL /* = 32 */
+#define MDS_ATTR_CTIME	0x40ULL /* = 64 */
+#define MDS_ATTR_ATIME_SET    0x80ULL /* = 128 */
+#define MDS_ATTR_MTIME_SET   0x100ULL /* = 256 */
+#define MDS_ATTR_FORCE       0x200ULL /* = 512, Not a change, but a change it */
+#define MDS_ATTR_ATTR_FLAG   0x400ULL /* = 1024 */
+#define MDS_ATTR_KILL_SUID   0x800ULL /* = 2048 */
+#define MDS_ATTR_KILL_SGID  0x1000ULL /* = 4096 */
+#define MDS_ATTR_CTIME_SET  0x2000ULL /* = 8192 */
+#define MDS_ATTR_FROM_OPEN  0x4000ULL /* = 16384, called from open path, ie O_TRUNC */
+#define MDS_ATTR_BLOCKS     0x8000ULL /* = 32768 */
+
+#ifndef FMODE_READ
+#define FMODE_READ	       00000001
+#define FMODE_WRITE	      00000002
+#endif
+
+#define MDS_FMODE_CLOSED	 00000000
+#define MDS_FMODE_EXEC	   00000004
+/* IO Epoch is opened on a closed file. */
+#define MDS_FMODE_EPOCH	  01000000
+/* IO Epoch is opened on a file truncate. */
+#define MDS_FMODE_TRUNC	  02000000
+/* Size-on-MDS Attribute Update is pending. */
+#define MDS_FMODE_SOM	    04000000
+
+#define MDS_OPEN_CREATED	 00000010
+#define MDS_OPEN_CROSS	   00000020
+
+#define MDS_OPEN_CREAT	   00000100
+#define MDS_OPEN_EXCL	    00000200
+#define MDS_OPEN_TRUNC	   00001000
+#define MDS_OPEN_APPEND	  00002000
+#define MDS_OPEN_SYNC	    00010000
+#define MDS_OPEN_DIRECTORY       00200000
+
+#define MDS_OPEN_BY_FID		040000000 /* open_by_fid for known object */
+#define MDS_OPEN_DELAY_CREATE  0100000000 /* delay initial object create */
+#define MDS_OPEN_OWNEROVERRIDE 0200000000 /* NFSD rw-reopen ro file for owner */
+#define MDS_OPEN_JOIN_FILE     0400000000 /* open for join file.
+					   * We do not support JOIN FILE
+					   * anymore, reserve this flags
+					   * just for preventing such bit
+					   * to be reused. */
+
+#define MDS_OPEN_LOCK	 04000000000 /* This open requires open lock */
+#define MDS_OPEN_HAS_EA      010000000000 /* specify object create pattern */
+#define MDS_OPEN_HAS_OBJS    020000000000 /* Just set the EA the obj exist */
+#define MDS_OPEN_NORESTORE  0100000000000ULL /* Do not restore file at open */
+#define MDS_OPEN_NEWSTRIPE  0200000000000ULL /* New stripe needed (restripe or
+					      * hsm restore) */
+#define MDS_OPEN_VOLATILE   0400000000000ULL /* File is volatile = created
+						unlinked */
+
+/* permission for create non-directory file */
+#define MAY_CREATE      (1 << 7)
+/* permission for create directory file */
+#define MAY_LINK	(1 << 8)
+/* permission for delete from the directory */
+#define MAY_UNLINK      (1 << 9)
+/* source's permission for rename */
+#define MAY_RENAME_SRC  (1 << 10)
+/* target's permission for rename */
+#define MAY_RENAME_TAR  (1 << 11)
+/* part (parent's) VTX permission check */
+#define MAY_VTX_PART    (1 << 12)
+/* full VTX permission check */
+#define MAY_VTX_FULL    (1 << 13)
+/* lfs rgetfacl permission check */
+#define MAY_RGETFACL    (1 << 14)
+
+enum {
+	MDS_CHECK_SPLIT		= 1 << 0,
+	MDS_CROSS_REF		= 1 << 1,
+	MDS_VTX_BYPASS		= 1 << 2,
+	MDS_PERM_BYPASS		= 1 << 3,
+	MDS_SOM			= 1 << 4,
+	MDS_QUOTA_IGNORE	= 1 << 5,
+	MDS_CLOSE_CLEANUP	= 1 << 6,
+	MDS_KEEP_ORPHAN		= 1 << 7,
+	MDS_RECOV_OPEN		= 1 << 8,
+	MDS_DATA_MODIFIED	= 1 << 9,
+	MDS_CREATE_VOLATILE	= 1 << 10,
+	MDS_OWNEROVERRIDE	= 1 << 11,
+};
+
+/* instance of mdt_reint_rec */
+struct mdt_rec_create {
+	__u32	   cr_opcode;
+	__u32	   cr_cap;
+	__u32	   cr_fsuid;
+	__u32	   cr_fsuid_h;
+	__u32	   cr_fsgid;
+	__u32	   cr_fsgid_h;
+	__u32	   cr_suppgid1;
+	__u32	   cr_suppgid1_h;
+	__u32	   cr_suppgid2;
+	__u32	   cr_suppgid2_h;
+	struct lu_fid   cr_fid1;
+	struct lu_fid   cr_fid2;
+	struct lustre_handle cr_old_handle; /* handle in case of open replay */
+	obd_time	cr_time;
+	__u64	   cr_rdev;
+	__u64	   cr_ioepoch;
+	__u64	   cr_padding_1;   /* rr_blocks */
+	__u32	   cr_mode;
+	__u32	   cr_bias;
+	/* use of helpers set/get_mrc_cr_flags() is needed to access
+	 * 64 bits cr_flags [cr_flags_l, cr_flags_h], this is done to
+	 * extend cr_flags size without breaking 1.8 compat */
+	__u32	   cr_flags_l;     /* for use with open, low  32 bits  */
+	__u32	   cr_flags_h;     /* for use with open, high 32 bits */
+	__u32	   cr_umask;       /* umask for create */
+	__u32	   cr_padding_4;   /* rr_padding_4 */
+};
+
+static inline void set_mrc_cr_flags(struct mdt_rec_create *mrc, __u64 flags)
+{
+	mrc->cr_flags_l = (__u32)(flags & 0xFFFFFFFFUll);
+	mrc->cr_flags_h = (__u32)(flags >> 32);
+}
+
+static inline __u64 get_mrc_cr_flags(struct mdt_rec_create *mrc)
+{
+	return ((__u64)(mrc->cr_flags_l) | ((__u64)mrc->cr_flags_h << 32));
+}
+
+/* instance of mdt_reint_rec */
+struct mdt_rec_link {
+	__u32	   lk_opcode;
+	__u32	   lk_cap;
+	__u32	   lk_fsuid;
+	__u32	   lk_fsuid_h;
+	__u32	   lk_fsgid;
+	__u32	   lk_fsgid_h;
+	__u32	   lk_suppgid1;
+	__u32	   lk_suppgid1_h;
+	__u32	   lk_suppgid2;
+	__u32	   lk_suppgid2_h;
+	struct lu_fid   lk_fid1;
+	struct lu_fid   lk_fid2;
+	obd_time	lk_time;
+	__u64	   lk_padding_1;   /* rr_atime */
+	__u64	   lk_padding_2;   /* rr_ctime */
+	__u64	   lk_padding_3;   /* rr_size */
+	__u64	   lk_padding_4;   /* rr_blocks */
+	__u32	   lk_bias;
+	__u32	   lk_padding_5;   /* rr_mode */
+	__u32	   lk_padding_6;   /* rr_flags */
+	__u32	   lk_padding_7;   /* rr_padding_2 */
+	__u32	   lk_padding_8;   /* rr_padding_3 */
+	__u32	   lk_padding_9;   /* rr_padding_4 */
+};
+
+/* instance of mdt_reint_rec */
+struct mdt_rec_unlink {
+	__u32	   ul_opcode;
+	__u32	   ul_cap;
+	__u32	   ul_fsuid;
+	__u32	   ul_fsuid_h;
+	__u32	   ul_fsgid;
+	__u32	   ul_fsgid_h;
+	__u32	   ul_suppgid1;
+	__u32	   ul_suppgid1_h;
+	__u32	   ul_suppgid2;
+	__u32	   ul_suppgid2_h;
+	struct lu_fid   ul_fid1;
+	struct lu_fid   ul_fid2;
+	obd_time	ul_time;
+	__u64	   ul_padding_2;   /* rr_atime */
+	__u64	   ul_padding_3;   /* rr_ctime */
+	__u64	   ul_padding_4;   /* rr_size */
+	__u64	   ul_padding_5;   /* rr_blocks */
+	__u32	   ul_bias;
+	__u32	   ul_mode;
+	__u32	   ul_padding_6;   /* rr_flags */
+	__u32	   ul_padding_7;   /* rr_padding_2 */
+	__u32	   ul_padding_8;   /* rr_padding_3 */
+	__u32	   ul_padding_9;   /* rr_padding_4 */
+};
+
+/* instance of mdt_reint_rec */
+struct mdt_rec_rename {
+	__u32	   rn_opcode;
+	__u32	   rn_cap;
+	__u32	   rn_fsuid;
+	__u32	   rn_fsuid_h;
+	__u32	   rn_fsgid;
+	__u32	   rn_fsgid_h;
+	__u32	   rn_suppgid1;
+	__u32	   rn_suppgid1_h;
+	__u32	   rn_suppgid2;
+	__u32	   rn_suppgid2_h;
+	struct lu_fid   rn_fid1;
+	struct lu_fid   rn_fid2;
+	obd_time	rn_time;
+	__u64	   rn_padding_1;   /* rr_atime */
+	__u64	   rn_padding_2;   /* rr_ctime */
+	__u64	   rn_padding_3;   /* rr_size */
+	__u64	   rn_padding_4;   /* rr_blocks */
+	__u32	   rn_bias;	/* some operation flags */
+	__u32	   rn_mode;	/* cross-ref rename has mode */
+	__u32	   rn_padding_5;   /* rr_flags */
+	__u32	   rn_padding_6;   /* rr_padding_2 */
+	__u32	   rn_padding_7;   /* rr_padding_3 */
+	__u32	   rn_padding_8;   /* rr_padding_4 */
+};
+
+/* instance of mdt_reint_rec */
+struct mdt_rec_setxattr {
+	__u32	   sx_opcode;
+	__u32	   sx_cap;
+	__u32	   sx_fsuid;
+	__u32	   sx_fsuid_h;
+	__u32	   sx_fsgid;
+	__u32	   sx_fsgid_h;
+	__u32	   sx_suppgid1;
+	__u32	   sx_suppgid1_h;
+	__u32	   sx_suppgid2;
+	__u32	   sx_suppgid2_h;
+	struct lu_fid   sx_fid;
+	__u64	   sx_padding_1;   /* These three are rr_fid2 */
+	__u32	   sx_padding_2;
+	__u32	   sx_padding_3;
+	__u64	   sx_valid;
+	obd_time	sx_time;
+	__u64	   sx_padding_5;   /* rr_ctime */
+	__u64	   sx_padding_6;   /* rr_size */
+	__u64	   sx_padding_7;   /* rr_blocks */
+	__u32	   sx_size;
+	__u32	   sx_flags;
+	__u32	   sx_padding_8;   /* rr_flags */
+	__u32	   sx_padding_9;   /* rr_padding_2 */
+	__u32	   sx_padding_10;  /* rr_padding_3 */
+	__u32	   sx_padding_11;  /* rr_padding_4 */
+};
+
+/*
+ * mdt_rec_reint is the template for all mdt_reint_xxx structures.
+ * Do NOT change the size of various members, otherwise the value
+ * will be broken in lustre_swab_mdt_rec_reint().
+ *
+ * If you add new members in other mdt_reint_xxx structres and need to use the
+ * rr_padding_x fields, then update lustre_swab_mdt_rec_reint() also.
+ */
+struct mdt_rec_reint {
+	__u32	   rr_opcode;
+	__u32	   rr_cap;
+	__u32	   rr_fsuid;
+	__u32	   rr_fsuid_h;
+	__u32	   rr_fsgid;
+	__u32	   rr_fsgid_h;
+	__u32	   rr_suppgid1;
+	__u32	   rr_suppgid1_h;
+	__u32	   rr_suppgid2;
+	__u32	   rr_suppgid2_h;
+	struct lu_fid   rr_fid1;
+	struct lu_fid   rr_fid2;
+	obd_time	rr_mtime;
+	obd_time	rr_atime;
+	obd_time	rr_ctime;
+	__u64	   rr_size;
+	__u64	   rr_blocks;
+	__u32	   rr_bias;
+	__u32	   rr_mode;
+	__u32	   rr_flags;
+	__u32	   rr_flags_h;
+	__u32	   rr_umask;
+	__u32	   rr_padding_4; /* also fix lustre_swab_mdt_rec_reint */
+};
+
+extern void lustre_swab_mdt_rec_reint(struct mdt_rec_reint *rr);
+
+struct lmv_desc {
+	__u32 ld_tgt_count;		/* how many MDS's */
+	__u32 ld_active_tgt_count;	 /* how many active */
+	__u32 ld_default_stripe_count;     /* how many objects are used */
+	__u32 ld_pattern;		  /* default MEA_MAGIC_* */
+	__u64 ld_default_hash_size;
+	__u64 ld_padding_1;		/* also fix lustre_swab_lmv_desc */
+	__u32 ld_padding_2;		/* also fix lustre_swab_lmv_desc */
+	__u32 ld_qos_maxage;	       /* in second */
+	__u32 ld_padding_3;		/* also fix lustre_swab_lmv_desc */
+	__u32 ld_padding_4;		/* also fix lustre_swab_lmv_desc */
+	struct obd_uuid ld_uuid;
+};
+
+extern void lustre_swab_lmv_desc (struct lmv_desc *ld);
+
+/* TODO: lmv_stripe_md should contain mds capabilities for all slave fids */
+struct lmv_stripe_md {
+	__u32	 mea_magic;
+	__u32	 mea_count;
+	__u32	 mea_master;
+	__u32	 mea_padding;
+	char	  mea_pool_name[LOV_MAXPOOLNAME];
+	struct lu_fid mea_ids[0];
+};
+
+extern void lustre_swab_lmv_stripe_md(struct lmv_stripe_md *mea);
+
+/* lmv structures */
+#define MEA_MAGIC_LAST_CHAR      0xb2221ca1
+#define MEA_MAGIC_ALL_CHARS      0xb222a11c
+#define MEA_MAGIC_HASH_SEGMENT   0xb222a11b
+
+#define MAX_HASH_SIZE_32	 0x7fffffffUL
+#define MAX_HASH_SIZE	    0x7fffffffffffffffULL
+#define MAX_HASH_HIGHEST_BIT     0x1000000000000000ULL
+
+enum fld_rpc_opc {
+	FLD_QUERY		       = 900,
+	FLD_LAST_OPC,
+	FLD_FIRST_OPC		   = FLD_QUERY
+};
+
+enum seq_rpc_opc {
+	SEQ_QUERY		       = 700,
+	SEQ_LAST_OPC,
+	SEQ_FIRST_OPC		   = SEQ_QUERY
+};
+
+enum seq_op {
+	SEQ_ALLOC_SUPER = 0,
+	SEQ_ALLOC_META = 1
+};
+
+/*
+ *  LOV data structures
+ */
+
+#define LOV_MAX_UUID_BUFFER_SIZE  8192
+/* The size of the buffer the lov/mdc reserves for the
+ * array of UUIDs returned by the MDS.  With the current
+ * protocol, this will limit the max number of OSTs per LOV */
+
+#define LOV_DESC_MAGIC 0xB0CCDE5C
+
+/* LOV settings descriptor (should only contain static info) */
+struct lov_desc {
+	__u32 ld_tgt_count;		/* how many OBD's */
+	__u32 ld_active_tgt_count;	 /* how many active */
+	__u32 ld_default_stripe_count;     /* how many objects are used */
+	__u32 ld_pattern;		  /* default PATTERN_RAID0 */
+	__u64 ld_default_stripe_size;      /* in bytes */
+	__u64 ld_default_stripe_offset;    /* in bytes */
+	__u32 ld_padding_0;		/* unused */
+	__u32 ld_qos_maxage;	       /* in second */
+	__u32 ld_padding_1;		/* also fix lustre_swab_lov_desc */
+	__u32 ld_padding_2;		/* also fix lustre_swab_lov_desc */
+	struct obd_uuid ld_uuid;
+};
+
+#define ld_magic ld_active_tgt_count       /* for swabbing from llogs */
+
+extern void lustre_swab_lov_desc (struct lov_desc *ld);
+
+/*
+ *   LDLM requests:
+ */
+/* opcodes -- MUST be distinct from OST/MDS opcodes */
+typedef enum {
+	LDLM_ENQUEUE     = 101,
+	LDLM_CONVERT     = 102,
+	LDLM_CANCEL      = 103,
+	LDLM_BL_CALLBACK = 104,
+	LDLM_CP_CALLBACK = 105,
+	LDLM_GL_CALLBACK = 106,
+	LDLM_SET_INFO    = 107,
+	LDLM_LAST_OPC
+} ldlm_cmd_t;
+#define LDLM_FIRST_OPC LDLM_ENQUEUE
+
+#define RES_NAME_SIZE 4
+struct ldlm_res_id {
+	__u64 name[RES_NAME_SIZE];
+};
+
+extern void lustre_swab_ldlm_res_id (struct ldlm_res_id *id);
+
+static inline int ldlm_res_eq(const struct ldlm_res_id *res0,
+			      const struct ldlm_res_id *res1)
+{
+	return !memcmp(res0, res1, sizeof(*res0));
+}
+
+/* lock types */
+typedef enum {
+	LCK_MINMODE = 0,
+	LCK_EX      = 1,
+	LCK_PW      = 2,
+	LCK_PR      = 4,
+	LCK_CW      = 8,
+	LCK_CR      = 16,
+	LCK_NL      = 32,
+	LCK_GROUP   = 64,
+	LCK_COS     = 128,
+	LCK_MAXMODE
+} ldlm_mode_t;
+
+#define LCK_MODE_NUM    8
+
+typedef enum {
+	LDLM_PLAIN     = 10,
+	LDLM_EXTENT    = 11,
+	LDLM_FLOCK     = 12,
+	LDLM_IBITS     = 13,
+	LDLM_MAX_TYPE
+} ldlm_type_t;
+
+#define LDLM_MIN_TYPE LDLM_PLAIN
+
+struct ldlm_extent {
+	__u64 start;
+	__u64 end;
+	__u64 gid;
+};
+
+static inline int ldlm_extent_overlap(struct ldlm_extent *ex1,
+				      struct ldlm_extent *ex2)
+{
+	return (ex1->start <= ex2->end) && (ex2->start <= ex1->end);
+}
+
+/* check if @ex1 contains @ex2 */
+static inline int ldlm_extent_contain(struct ldlm_extent *ex1,
+				      struct ldlm_extent *ex2)
+{
+	return (ex1->start <= ex2->start) && (ex1->end >= ex2->end);
+}
+
+struct ldlm_inodebits {
+	__u64 bits;
+};
+
+struct ldlm_flock_wire {
+	__u64 lfw_start;
+	__u64 lfw_end;
+	__u64 lfw_owner;
+	__u32 lfw_padding;
+	__u32 lfw_pid;
+};
+
+/* it's important that the fields of the ldlm_extent structure match
+ * the first fields of the ldlm_flock structure because there is only
+ * one ldlm_swab routine to process the ldlm_policy_data_t union. if
+ * this ever changes we will need to swab the union differently based
+ * on the resource type. */
+
+typedef union {
+	struct ldlm_extent l_extent;
+	struct ldlm_flock_wire l_flock;
+	struct ldlm_inodebits l_inodebits;
+} ldlm_wire_policy_data_t;
+
+extern void lustre_swab_ldlm_policy_data (ldlm_wire_policy_data_t *d);
+
+union ldlm_gl_desc {
+	struct ldlm_gl_lquota_desc	lquota_desc;
+};
+
+extern void lustre_swab_gl_desc(union ldlm_gl_desc *);
+
+struct ldlm_intent {
+	__u64 opc;
+};
+
+extern void lustre_swab_ldlm_intent (struct ldlm_intent *i);
+
+struct ldlm_resource_desc {
+	ldlm_type_t lr_type;
+	__u32 lr_padding;       /* also fix lustre_swab_ldlm_resource_desc */
+	struct ldlm_res_id lr_name;
+};
+
+extern void lustre_swab_ldlm_resource_desc (struct ldlm_resource_desc *r);
+
+struct ldlm_lock_desc {
+	struct ldlm_resource_desc l_resource;
+	ldlm_mode_t l_req_mode;
+	ldlm_mode_t l_granted_mode;
+	ldlm_wire_policy_data_t l_policy_data;
+};
+
+extern void lustre_swab_ldlm_lock_desc (struct ldlm_lock_desc *l);
+
+#define LDLM_LOCKREQ_HANDLES 2
+#define LDLM_ENQUEUE_CANCEL_OFF 1
+
+struct ldlm_request {
+	__u32 lock_flags;
+	__u32 lock_count;
+	struct ldlm_lock_desc lock_desc;
+	struct lustre_handle lock_handle[LDLM_LOCKREQ_HANDLES];
+};
+
+extern void lustre_swab_ldlm_request (struct ldlm_request *rq);
+
+/* If LDLM_ENQUEUE, 1 slot is already occupied, 1 is available.
+ * Otherwise, 2 are available. */
+#define ldlm_request_bufsize(count,type)				\
+({								      \
+	int _avail = LDLM_LOCKREQ_HANDLES;			      \
+	_avail -= (type == LDLM_ENQUEUE ? LDLM_ENQUEUE_CANCEL_OFF : 0); \
+	sizeof(struct ldlm_request) +				   \
+	(count > _avail ? count - _avail : 0) *			 \
+	sizeof(struct lustre_handle);				   \
+})
+
+struct ldlm_reply {
+	__u32 lock_flags;
+	__u32 lock_padding;     /* also fix lustre_swab_ldlm_reply */
+	struct ldlm_lock_desc lock_desc;
+	struct lustre_handle lock_handle;
+	__u64  lock_policy_res1;
+	__u64  lock_policy_res2;
+};
+
+extern void lustre_swab_ldlm_reply (struct ldlm_reply *r);
+
+#define ldlm_flags_to_wire(flags)    ((__u32)(flags))
+#define ldlm_flags_from_wire(flags)  ((__u64)(flags))
+
+/*
+ * Opcodes for mountconf (mgs and mgc)
+ */
+typedef enum {
+	MGS_CONNECT = 250,
+	MGS_DISCONNECT,
+	MGS_EXCEPTION,	 /* node died, etc. */
+	MGS_TARGET_REG,	/* whenever target starts up */
+	MGS_TARGET_DEL,
+	MGS_SET_INFO,
+	MGS_CONFIG_READ,
+	MGS_LAST_OPC
+} mgs_cmd_t;
+#define MGS_FIRST_OPC MGS_CONNECT
+
+#define MGS_PARAM_MAXLEN 1024
+#define KEY_SET_INFO "set_info"
+
+struct mgs_send_param {
+	char	     mgs_param[MGS_PARAM_MAXLEN];
+};
+
+/* We pass this info to the MGS so it can write config logs */
+#define MTI_NAME_MAXLEN  64
+#define MTI_PARAM_MAXLEN 4096
+#define MTI_NIDS_MAX     32
+struct mgs_target_info {
+	__u32	    mti_lustre_ver;
+	__u32	    mti_stripe_index;
+	__u32	    mti_config_ver;
+	__u32	    mti_flags;
+	__u32	    mti_nid_count;
+	__u32	    mti_instance; /* Running instance of target */
+	char	     mti_fsname[MTI_NAME_MAXLEN];
+	char	     mti_svname[MTI_NAME_MAXLEN];
+	char	     mti_uuid[sizeof(struct obd_uuid)];
+	__u64	    mti_nids[MTI_NIDS_MAX];     /* host nids (lnet_nid_t)*/
+	char	     mti_params[MTI_PARAM_MAXLEN];
+};
+extern void lustre_swab_mgs_target_info(struct mgs_target_info *oinfo);
+
+struct mgs_nidtbl_entry {
+	__u64	   mne_version;    /* table version of this entry */
+	__u32	   mne_instance;   /* target instance # */
+	__u32	   mne_index;      /* target index */
+	__u32	   mne_length;     /* length of this entry - by bytes */
+	__u8	    mne_type;       /* target type LDD_F_SV_TYPE_OST/MDT */
+	__u8	    mne_nid_type;   /* type of nid(mbz). for ipv6. */
+	__u8	    mne_nid_size;   /* size of each NID, by bytes */
+	__u8	    mne_nid_count;  /* # of NIDs in buffer */
+	union {
+		lnet_nid_t nids[0];     /* variable size buffer for NIDs. */
+	} u;
+};
+extern void lustre_swab_mgs_nidtbl_entry(struct mgs_nidtbl_entry *oinfo);
+
+struct mgs_config_body {
+	char     mcb_name[MTI_NAME_MAXLEN]; /* logname */
+	__u64    mcb_offset;    /* next index of config log to request */
+	__u16    mcb_type;      /* type of log: CONFIG_T_[CONFIG|RECOVER] */
+	__u8     mcb_reserved;
+	__u8     mcb_bits;      /* bits unit size of config log */
+	__u32    mcb_units;     /* # of units for bulk transfer */
+};
+extern void lustre_swab_mgs_config_body(struct mgs_config_body *body);
+
+struct mgs_config_res {
+	__u64    mcr_offset;    /* index of last config log */
+	__u64    mcr_size;      /* size of the log */
+};
+extern void lustre_swab_mgs_config_res(struct mgs_config_res *body);
+
+/* Config marker flags (in config log) */
+#define CM_START       0x01
+#define CM_END	 0x02
+#define CM_SKIP	0x04
+#define CM_UPGRADE146  0x08
+#define CM_EXCLUDE     0x10
+#define CM_START_SKIP (CM_START | CM_SKIP)
+
+struct cfg_marker {
+	__u32	     cm_step;       /* aka config version */
+	__u32	     cm_flags;
+	__u32	     cm_vers;       /* lustre release version number */
+	__u32	     cm_padding;    /* 64 bit align */
+	obd_time	  cm_createtime; /*when this record was first created */
+	obd_time	  cm_canceltime; /*when this record is no longer valid*/
+	char	      cm_tgtname[MTI_NAME_MAXLEN];
+	char	      cm_comment[MTI_NAME_MAXLEN];
+};
+
+extern void lustre_swab_cfg_marker(struct cfg_marker *marker,
+				   int swab, int size);
+
+/*
+ * Opcodes for multiple servers.
+ */
+
+typedef enum {
+	OBD_PING = 400,
+	OBD_LOG_CANCEL,
+	OBD_QC_CALLBACK,
+	OBD_IDX_READ,
+	OBD_LAST_OPC
+} obd_cmd_t;
+#define OBD_FIRST_OPC OBD_PING
+
+/* catalog of log objects */
+
+/** Identifier for a single log object */
+struct llog_logid {
+	struct ost_id		lgl_oi;
+	__u32		   lgl_ogen;
+} __attribute__((packed));
+
+/** Records written to the CATALOGS list */
+#define CATLIST "CATALOGS"
+struct llog_catid {
+	struct llog_logid       lci_logid;
+	__u32		   lci_padding1;
+	__u32		   lci_padding2;
+	__u32		   lci_padding3;
+} __attribute__((packed));
+
+/* Log data record types - there is no specific reason that these need to
+ * be related to the RPC opcodes, but no reason not to (may be handy later?)
+ */
+#define LLOG_OP_MAGIC 0x10600000
+#define LLOG_OP_MASK  0xfff00000
+
+typedef enum {
+	LLOG_PAD_MAGIC		= LLOG_OP_MAGIC | 0x00000,
+	OST_SZ_REC		= LLOG_OP_MAGIC | 0x00f00,
+	/* OST_RAID1_REC	= LLOG_OP_MAGIC | 0x01000, never used */
+	MDS_UNLINK_REC		= LLOG_OP_MAGIC | 0x10000 | (MDS_REINT << 8) |
+				  REINT_UNLINK, /* obsolete after 2.5.0 */
+	MDS_UNLINK64_REC	= LLOG_OP_MAGIC | 0x90000 | (MDS_REINT << 8) |
+				  REINT_UNLINK,
+	/* MDS_SETATTR_REC	= LLOG_OP_MAGIC | 0x12401, obsolete 1.8.0 */
+	MDS_SETATTR64_REC	= LLOG_OP_MAGIC | 0x90000 | (MDS_REINT << 8) |
+				  REINT_SETATTR,
+	OBD_CFG_REC		= LLOG_OP_MAGIC | 0x20000,
+	/* PTL_CFG_REC		= LLOG_OP_MAGIC | 0x30000, obsolete 1.4.0 */
+	LLOG_GEN_REC		= LLOG_OP_MAGIC | 0x40000,
+	/* LLOG_JOIN_REC	= LLOG_OP_MAGIC | 0x50000, obsolete  1.8.0 */
+	CHANGELOG_REC		= LLOG_OP_MAGIC | 0x60000,
+	CHANGELOG_USER_REC	= LLOG_OP_MAGIC | 0x70000,
+	LLOG_HDR_MAGIC		= LLOG_OP_MAGIC | 0x45539,
+	LLOG_LOGID_MAGIC	= LLOG_OP_MAGIC | 0x4553b,
+} llog_op_type;
+
+#define LLOG_REC_HDR_NEEDS_SWABBING(r) \
+	(((r)->lrh_type & __swab32(LLOG_OP_MASK)) == __swab32(LLOG_OP_MAGIC))
+
+/** Log record header - stored in little endian order.
+ * Each record must start with this struct, end with a llog_rec_tail,
+ * and be a multiple of 256 bits in size.
+ */
+struct llog_rec_hdr {
+	__u32	lrh_len;
+	__u32	lrh_index;
+	__u32	lrh_type;
+	__u32	lrh_id;
+};
+
+struct llog_rec_tail {
+	__u32	lrt_len;
+	__u32	lrt_index;
+};
+
+/* Where data follow just after header */
+#define REC_DATA(ptr)						\
+	((void *)((char *)ptr + sizeof(struct llog_rec_hdr)))
+
+#define REC_DATA_LEN(rec)					\
+	(rec->lrh_len - sizeof(struct llog_rec_hdr) -		\
+	 sizeof(struct llog_rec_tail))
+
+struct llog_logid_rec {
+	struct llog_rec_hdr	lid_hdr;
+	struct llog_logid	lid_id;
+	__u32			lid_padding1;
+	__u64			lid_padding2;
+	__u64			lid_padding3;
+	struct llog_rec_tail	lid_tail;
+} __attribute__((packed));
+
+struct llog_unlink_rec {
+	struct llog_rec_hdr	lur_hdr;
+	obd_id			lur_oid;
+	obd_count		lur_oseq;
+	obd_count		lur_count;
+	struct llog_rec_tail	lur_tail;
+} __attribute__((packed));
+
+struct llog_unlink64_rec {
+	struct llog_rec_hdr	lur_hdr;
+	struct lu_fid		lur_fid;
+	obd_count		lur_count; /* to destroy the lost precreated */
+	__u32			lur_padding1;
+	__u64			lur_padding2;
+	__u64			lur_padding3;
+	struct llog_rec_tail    lur_tail;
+} __attribute__((packed));
+
+struct llog_setattr64_rec {
+	struct llog_rec_hdr	lsr_hdr;
+	struct ost_id		lsr_oi;
+	__u32			lsr_uid;
+	__u32			lsr_uid_h;
+	__u32			lsr_gid;
+	__u32			lsr_gid_h;
+	__u64			lsr_padding;
+	struct llog_rec_tail    lsr_tail;
+} __attribute__((packed));
+
+struct llog_size_change_rec {
+	struct llog_rec_hdr	lsc_hdr;
+	struct ll_fid		lsc_fid;
+	__u32			lsc_ioepoch;
+	__u32			lsc_padding1;
+	__u64			lsc_padding2;
+	__u64			lsc_padding3;
+	struct llog_rec_tail	lsc_tail;
+} __attribute__((packed));
+
+#define CHANGELOG_MAGIC 0xca103000
+
+/** \a changelog_rec_type's that can't be masked */
+#define CHANGELOG_MINMASK (1 << CL_MARK)
+/** bits covering all \a changelog_rec_type's */
+#define CHANGELOG_ALLMASK 0XFFFFFFFF
+/** default \a changelog_rec_type mask */
+#define CHANGELOG_DEFMASK CHANGELOG_ALLMASK & ~(1 << CL_ATIME | 1 << CL_CLOSE)
+
+/* changelog llog name, needed by client replicators */
+#define CHANGELOG_CATALOG "changelog_catalog"
+
+struct changelog_setinfo {
+	__u64 cs_recno;
+	__u32 cs_id;
+} __attribute__((packed));
+
+/** changelog record */
+struct llog_changelog_rec {
+	struct llog_rec_hdr  cr_hdr;
+	struct changelog_rec cr;
+	struct llog_rec_tail cr_tail; /**< for_sizezof_only */
+} __attribute__((packed));
+
+struct llog_changelog_ext_rec {
+	struct llog_rec_hdr      cr_hdr;
+	struct changelog_ext_rec cr;
+	struct llog_rec_tail     cr_tail; /**< for_sizezof_only */
+} __attribute__((packed));
+
+#define CHANGELOG_USER_PREFIX "cl"
+
+struct llog_changelog_user_rec {
+	struct llog_rec_hdr   cur_hdr;
+	__u32		 cur_id;
+	__u32		 cur_padding;
+	__u64		 cur_endrec;
+	struct llog_rec_tail  cur_tail;
+} __attribute__((packed));
+
+/* Old llog gen for compatibility */
+struct llog_gen {
+	__u64 mnt_cnt;
+	__u64 conn_cnt;
+} __attribute__((packed));
+
+struct llog_gen_rec {
+	struct llog_rec_hdr	lgr_hdr;
+	struct llog_gen		lgr_gen;
+	__u64			padding1;
+	__u64			padding2;
+	__u64			padding3;
+	struct llog_rec_tail	lgr_tail;
+};
+
+/* On-disk header structure of each log object, stored in little endian order */
+#define LLOG_CHUNK_SIZE	 8192
+#define LLOG_HEADER_SIZE	(96)
+#define LLOG_BITMAP_BYTES       (LLOG_CHUNK_SIZE - LLOG_HEADER_SIZE)
+
+#define LLOG_MIN_REC_SIZE       (24) /* round(llog_rec_hdr + llog_rec_tail) */
+
+/* flags for the logs */
+enum llog_flag {
+	LLOG_F_ZAP_WHEN_EMPTY	= 0x1,
+	LLOG_F_IS_CAT		= 0x2,
+	LLOG_F_IS_PLAIN		= 0x4,
+};
+
+struct llog_log_hdr {
+	struct llog_rec_hdr     llh_hdr;
+	obd_time		llh_timestamp;
+	__u32		   llh_count;
+	__u32		   llh_bitmap_offset;
+	__u32		   llh_size;
+	__u32		   llh_flags;
+	__u32		   llh_cat_idx;
+	/* for a catalog the first plain slot is next to it */
+	struct obd_uuid	 llh_tgtuuid;
+	__u32		   llh_reserved[LLOG_HEADER_SIZE/sizeof(__u32) - 23];
+	__u32		   llh_bitmap[LLOG_BITMAP_BYTES/sizeof(__u32)];
+	struct llog_rec_tail    llh_tail;
+} __attribute__((packed));
+
+#define LLOG_BITMAP_SIZE(llh)  (__u32)((llh->llh_hdr.lrh_len -		\
+					llh->llh_bitmap_offset -	\
+					sizeof(llh->llh_tail)) * 8)
+
+/** log cookies are used to reference a specific log file and a record therein */
+struct llog_cookie {
+	struct llog_logid       lgc_lgl;
+	__u32		   lgc_subsys;
+	__u32		   lgc_index;
+	__u32		   lgc_padding;
+} __attribute__((packed));
+
+/** llog protocol */
+enum llogd_rpc_ops {
+	LLOG_ORIGIN_HANDLE_CREATE       = 501,
+	LLOG_ORIGIN_HANDLE_NEXT_BLOCK   = 502,
+	LLOG_ORIGIN_HANDLE_READ_HEADER  = 503,
+	LLOG_ORIGIN_HANDLE_WRITE_REC    = 504,
+	LLOG_ORIGIN_HANDLE_CLOSE	= 505,
+	LLOG_ORIGIN_CONNECT	     = 506,
+	LLOG_CATINFO			= 507,  /* deprecated */
+	LLOG_ORIGIN_HANDLE_PREV_BLOCK   = 508,
+	LLOG_ORIGIN_HANDLE_DESTROY      = 509,  /* for destroy llog object*/
+	LLOG_LAST_OPC,
+	LLOG_FIRST_OPC		  = LLOG_ORIGIN_HANDLE_CREATE
+};
+
+struct llogd_body {
+	struct llog_logid  lgd_logid;
+	__u32 lgd_ctxt_idx;
+	__u32 lgd_llh_flags;
+	__u32 lgd_index;
+	__u32 lgd_saved_index;
+	__u32 lgd_len;
+	__u64 lgd_cur_offset;
+} __attribute__((packed));
+
+struct llogd_conn_body {
+	struct llog_gen	 lgdc_gen;
+	struct llog_logid       lgdc_logid;
+	__u32		   lgdc_ctxt_idx;
+} __attribute__((packed));
+
+/* Note: 64-bit types are 64-bit aligned in structure */
+struct obdo {
+	obd_valid	       o_valid;	/* hot fields in this obdo */
+	struct ost_id	   o_oi;
+	obd_id		  o_parent_seq;
+	obd_size		o_size;	 /* o_size-o_blocks == ost_lvb */
+	obd_time		o_mtime;
+	obd_time		o_atime;
+	obd_time		o_ctime;
+	obd_blocks	      o_blocks;       /* brw: cli sent cached bytes */
+	obd_size		o_grant;
+
+	/* 32-bit fields start here: keep an even number of them via padding */
+	obd_blksize	     o_blksize;      /* optimal IO blocksize */
+	obd_mode		o_mode;	 /* brw: cli sent cache remain */
+	obd_uid		 o_uid;
+	obd_gid		 o_gid;
+	obd_flag		o_flags;
+	obd_count	       o_nlink;	/* brw: checksum */
+	obd_count	       o_parent_oid;
+	obd_count		o_misc;		/* brw: o_dropped */
+
+	__u64		   o_ioepoch;      /* epoch in ost writes */
+	__u32		   o_stripe_idx;   /* holds stripe idx */
+	__u32		   o_parent_ver;
+	struct lustre_handle    o_handle;       /* brw: lock handle to prolong
+						 * locks */
+	struct llog_cookie      o_lcookie;      /* destroy: unlink cookie from
+						 * MDS */
+	__u32			o_uid_h;
+	__u32			o_gid_h;
+
+	__u64			o_data_version; /* getattr: sum of iversion for
+						 * each stripe.
+						 * brw: grant space consumed on
+						 * the client for the write */
+	__u64			o_padding_4;
+	__u64			o_padding_5;
+	__u64			o_padding_6;
+};
+
+#define o_dirty   o_blocks
+#define o_undirty o_mode
+#define o_dropped o_misc
+#define o_cksum   o_nlink
+#define o_grant_used o_data_version
+
+static inline void lustre_set_wire_obdo(struct obd_connect_data *ocd,
+					struct obdo *wobdo, struct obdo *lobdo)
+{
+	memcpy(wobdo, lobdo, sizeof(*lobdo));
+	wobdo->o_flags &= ~OBD_FL_LOCAL_MASK;
+	if (ocd == NULL)
+		return;
+
+	if (unlikely(!(ocd->ocd_connect_flags & OBD_CONNECT_FID)) &&
+	    fid_seq_is_echo(ostid_seq(&lobdo->o_oi))) {
+		/* Currently OBD_FL_OSTID will only be used when 2.4 echo
+		 * client communicate with pre-2.4 server */
+		wobdo->o_oi.oi.oi_id = fid_oid(&lobdo->o_oi.oi_fid);
+		wobdo->o_oi.oi.oi_seq = fid_seq(&lobdo->o_oi.oi_fid);
+	}
+}
+
+static inline void lustre_get_wire_obdo(struct obd_connect_data *ocd,
+					struct obdo *lobdo, struct obdo *wobdo)
+{
+	obd_flag local_flags = 0;
+
+	if (lobdo->o_valid & OBD_MD_FLFLAGS)
+		 local_flags = lobdo->o_flags & OBD_FL_LOCAL_MASK;
+
+	LASSERT(!(wobdo->o_flags & OBD_FL_LOCAL_MASK));
+
+	memcpy(lobdo, wobdo, sizeof(*lobdo));
+	if (local_flags != 0) {
+		lobdo->o_valid |= OBD_MD_FLFLAGS;
+		lobdo->o_flags &= ~OBD_FL_LOCAL_MASK;
+		lobdo->o_flags |= local_flags;
+	}
+	if (ocd == NULL)
+		return;
+
+	if (unlikely(!(ocd->ocd_connect_flags & OBD_CONNECT_FID)) &&
+	    fid_seq_is_echo(wobdo->o_oi.oi.oi_seq)) {
+		/* see above */
+		lobdo->o_oi.oi_fid.f_seq = wobdo->o_oi.oi.oi_seq;
+		lobdo->o_oi.oi_fid.f_oid = wobdo->o_oi.oi.oi_id;
+		lobdo->o_oi.oi_fid.f_ver = 0;
+	}
+}
+
+extern void lustre_swab_obdo (struct obdo *o);
+
+/* request structure for OST's */
+struct ost_body {
+	struct  obdo oa;
+};
+
+/* Key for FIEMAP to be used in get_info calls */
+struct ll_fiemap_info_key {
+	char    name[8];
+	struct  obdo oa;
+	struct  ll_user_fiemap fiemap;
+};
+
+extern void lustre_swab_ost_body (struct ost_body *b);
+extern void lustre_swab_ost_last_id(obd_id *id);
+extern void lustre_swab_fiemap(struct ll_user_fiemap *fiemap);
+
+extern void lustre_swab_lov_user_md_v1(struct lov_user_md_v1 *lum);
+extern void lustre_swab_lov_user_md_v3(struct lov_user_md_v3 *lum);
+extern void lustre_swab_lov_user_md_objects(struct lov_user_ost_data *lod,
+					    int stripe_count);
+extern void lustre_swab_lov_mds_md(struct lov_mds_md *lmm);
+
+/* llog_swab.c */
+extern void lustre_swab_llogd_body (struct llogd_body *d);
+extern void lustre_swab_llog_hdr (struct llog_log_hdr *h);
+extern void lustre_swab_llogd_conn_body (struct llogd_conn_body *d);
+extern void lustre_swab_llog_rec(struct llog_rec_hdr *rec);
+extern void lustre_swab_llog_id(struct llog_logid *lid);
+
+struct lustre_cfg;
+extern void lustre_swab_lustre_cfg(struct lustre_cfg *lcfg);
+
+/* Functions for dumping PTLRPC fields */
+void dump_rniobuf(struct niobuf_remote *rnb);
+void dump_ioo(struct obd_ioobj *nb);
+void dump_obdo(struct obdo *oa);
+void dump_ost_body(struct ost_body *ob);
+void dump_rcs(__u32 *rc);
+
+#define IDX_INFO_MAGIC 0x3D37CC37
+
+/* Index file transfer through the network. The server serializes the index into
+ * a byte stream which is sent to the client via a bulk transfer */
+struct idx_info {
+	__u32		ii_magic;
+
+	/* reply: see idx_info_flags below */
+	__u32		ii_flags;
+
+	/* request & reply: number of lu_idxpage (to be) transferred */
+	__u16		ii_count;
+	__u16		ii_pad0;
+
+	/* request: requested attributes passed down to the iterator API */
+	__u32		ii_attrs;
+
+	/* request & reply: index file identifier (FID) */
+	struct lu_fid	ii_fid;
+
+	/* reply: version of the index file before starting to walk the index.
+	 * Please note that the version can be modified at any time during the
+	 * transfer */
+	__u64		ii_version;
+
+	/* request: hash to start with:
+	 * reply: hash of the first entry of the first lu_idxpage and hash
+	 *	of the entry to read next if any */
+	__u64		ii_hash_start;
+	__u64		ii_hash_end;
+
+	/* reply: size of keys in lu_idxpages, minimal one if II_FL_VARKEY is
+	 * set */
+	__u16		ii_keysize;
+
+	/* reply: size of records in lu_idxpages, minimal one if II_FL_VARREC
+	 * is set */
+	__u16		ii_recsize;
+
+	__u32		ii_pad1;
+	__u64		ii_pad2;
+	__u64		ii_pad3;
+};
+extern void lustre_swab_idx_info(struct idx_info *ii);
+
+#define II_END_OFF	MDS_DIR_END_OFF /* all entries have been read */
+
+/* List of flags used in idx_info::ii_flags */
+enum idx_info_flags {
+	II_FL_NOHASH	= 1 << 0, /* client doesn't care about hash value */
+	II_FL_VARKEY	= 1 << 1, /* keys can be of variable size */
+	II_FL_VARREC	= 1 << 2, /* records can be of variable size */
+	II_FL_NONUNQ	= 1 << 3, /* index supports non-unique keys */
+};
+
+#define LIP_MAGIC 0x8A6D6B6C
+
+/* 4KB (= LU_PAGE_SIZE) container gathering key/record pairs */
+struct lu_idxpage {
+	/* 16-byte header */
+	__u32	lip_magic;
+	__u16	lip_flags;
+	__u16	lip_nr;   /* number of entries in the container */
+	__u64	lip_pad0; /* additional padding for future use */
+
+	/* key/record pairs are stored in the remaining 4080 bytes.
+	 * depending upon the flags in idx_info::ii_flags, each key/record
+	 * pair might be preceded by:
+	 * - a hash value
+	 * - the key size (II_FL_VARKEY is set)
+	 * - the record size (II_FL_VARREC is set)
+	 *
+	 * For the time being, we only support fixed-size key & record. */
+	char	lip_entries[0];
+};
+extern void lustre_swab_lip_header(struct lu_idxpage *lip);
+
+#define LIP_HDR_SIZE (offsetof(struct lu_idxpage, lip_entries))
+
+/* Gather all possible type associated with a 4KB container */
+union lu_page {
+	struct lu_dirpage	lp_dir; /* for MDS_READPAGE */
+	struct lu_idxpage	lp_idx; /* for OBD_IDX_READ */
+	char			lp_array[LU_PAGE_SIZE];
+};
+
+/* security opcodes */
+typedef enum {
+	SEC_CTX_INIT	    = 801,
+	SEC_CTX_INIT_CONT       = 802,
+	SEC_CTX_FINI	    = 803,
+	SEC_LAST_OPC,
+	SEC_FIRST_OPC	   = SEC_CTX_INIT
+} sec_cmd_t;
+
+/*
+ * capa related definitions
+ */
+#define CAPA_HMAC_MAX_LEN       64
+#define CAPA_HMAC_KEY_MAX_LEN   56
+
+/* NB take care when changing the sequence of elements this struct,
+ * because the offset info is used in find_capa() */
+struct lustre_capa {
+	struct lu_fid   lc_fid;	 /** fid */
+	__u64	   lc_opc;	 /** operations allowed */
+	__u64	   lc_uid;	 /** file owner */
+	__u64	   lc_gid;	 /** file group */
+	__u32	   lc_flags;       /** HMAC algorithm & flags */
+	__u32	   lc_keyid;       /** key# used for the capability */
+	__u32	   lc_timeout;     /** capa timeout value (sec) */
+	__u32	   lc_expiry;      /** expiry time (sec) */
+	__u8	    lc_hmac[CAPA_HMAC_MAX_LEN];   /** HMAC */
+} __attribute__((packed));
+
+extern void lustre_swab_lustre_capa(struct lustre_capa *c);
+
+/** lustre_capa::lc_opc */
+enum {
+	CAPA_OPC_BODY_WRITE   = 1<<0,  /**< write object data */
+	CAPA_OPC_BODY_READ    = 1<<1,  /**< read object data */
+	CAPA_OPC_INDEX_LOOKUP = 1<<2,  /**< lookup object fid */
+	CAPA_OPC_INDEX_INSERT = 1<<3,  /**< insert object fid */
+	CAPA_OPC_INDEX_DELETE = 1<<4,  /**< delete object fid */
+	CAPA_OPC_OSS_WRITE    = 1<<5,  /**< write oss object data */
+	CAPA_OPC_OSS_READ     = 1<<6,  /**< read oss object data */
+	CAPA_OPC_OSS_TRUNC    = 1<<7,  /**< truncate oss object */
+	CAPA_OPC_OSS_DESTROY  = 1<<8,  /**< destroy oss object */
+	CAPA_OPC_META_WRITE   = 1<<9,  /**< write object meta data */
+	CAPA_OPC_META_READ    = 1<<10, /**< read object meta data */
+};
+
+#define CAPA_OPC_OSS_RW (CAPA_OPC_OSS_READ | CAPA_OPC_OSS_WRITE)
+#define CAPA_OPC_MDS_ONLY						   \
+	(CAPA_OPC_BODY_WRITE | CAPA_OPC_BODY_READ | CAPA_OPC_INDEX_LOOKUP | \
+	 CAPA_OPC_INDEX_INSERT | CAPA_OPC_INDEX_DELETE)
+#define CAPA_OPC_OSS_ONLY						   \
+	(CAPA_OPC_OSS_WRITE | CAPA_OPC_OSS_READ | CAPA_OPC_OSS_TRUNC |      \
+	 CAPA_OPC_OSS_DESTROY)
+#define CAPA_OPC_MDS_DEFAULT ~CAPA_OPC_OSS_ONLY
+#define CAPA_OPC_OSS_DEFAULT ~(CAPA_OPC_MDS_ONLY | CAPA_OPC_OSS_ONLY)
+
+/* MDS capability covers object capability for operations of body r/w
+ * (dir readpage/sendpage), index lookup/insert/delete and meta data r/w,
+ * while OSS capability only covers object capability for operations of
+ * oss data(file content) r/w/truncate.
+ */
+static inline int capa_for_mds(struct lustre_capa *c)
+{
+	return (c->lc_opc & CAPA_OPC_INDEX_LOOKUP) != 0;
+}
+
+static inline int capa_for_oss(struct lustre_capa *c)
+{
+	return (c->lc_opc & CAPA_OPC_INDEX_LOOKUP) == 0;
+}
+
+/* lustre_capa::lc_hmac_alg */
+enum {
+	CAPA_HMAC_ALG_SHA1 = 1, /**< sha1 algorithm */
+	CAPA_HMAC_ALG_MAX,
+};
+
+#define CAPA_FL_MASK	    0x00ffffff
+#define CAPA_HMAC_ALG_MASK      0xff000000
+
+struct lustre_capa_key {
+	__u64   lk_seq;       /**< mds# */
+	__u32   lk_keyid;     /**< key# */
+	__u32   lk_padding;
+	__u8    lk_key[CAPA_HMAC_KEY_MAX_LEN];    /**< key */
+} __attribute__((packed));
+
+extern void lustre_swab_lustre_capa_key(struct lustre_capa_key *k);
+
+/** The link ea holds 1 \a link_ea_entry for each hardlink */
+#define LINK_EA_MAGIC 0x11EAF1DFUL
+struct link_ea_header {
+	__u32 leh_magic;
+	__u32 leh_reccount;
+	__u64 leh_len;      /* total size */
+	/* future use */
+	__u32 padding1;
+	__u32 padding2;
+};
+
+/** Hardlink data is name and parent fid.
+ * Stored in this crazy struct for maximum packing and endian-neutrality
+ */
+struct link_ea_entry {
+	/** __u16 stored big-endian, unaligned */
+	unsigned char      lee_reclen[2];
+	unsigned char      lee_parent_fid[sizeof(struct lu_fid)];
+	char	       lee_name[0];
+}__attribute__((packed));
+
+/** fid2path request/reply structure */
+struct getinfo_fid2path {
+	struct lu_fid   gf_fid;
+	__u64	   gf_recno;
+	__u32	   gf_linkno;
+	__u32	   gf_pathlen;
+	char	    gf_path[0];
+} __attribute__((packed));
+
+void lustre_swab_fid2path (struct getinfo_fid2path *gf);
+
+enum {
+	LAYOUT_INTENT_ACCESS    = 0,
+	LAYOUT_INTENT_READ      = 1,
+	LAYOUT_INTENT_WRITE     = 2,
+	LAYOUT_INTENT_GLIMPSE   = 3,
+	LAYOUT_INTENT_TRUNC     = 4,
+	LAYOUT_INTENT_RELEASE   = 5,
+	LAYOUT_INTENT_RESTORE   = 6
+};
+
+/* enqueue layout lock with intent */
+struct layout_intent {
+	__u32 li_opc; /* intent operation for enqueue, read, write etc */
+	__u32 li_flags;
+	__u64 li_start;
+	__u64 li_end;
+};
+
+void lustre_swab_layout_intent(struct layout_intent *li);
+
+/**
+ * On the wire version of hsm_progress structure.
+ *
+ * Contains the userspace hsm_progress and some internal fields.
+ */
+struct hsm_progress_kernel {
+	/* Field taken from struct hsm_progress */
+	lustre_fid		hpk_fid;
+	__u64			hpk_cookie;
+	struct hsm_extent	hpk_extent;
+	__u16			hpk_flags;
+	__u16			hpk_errval; /* positive val */
+	__u32			hpk_padding1;
+	/* Additional fields */
+	__u64			hpk_data_version;
+	__u64			hpk_padding2;
+} __attribute__((packed));
+
+extern void lustre_swab_hsm_user_state(struct hsm_user_state *hus);
+extern void lustre_swab_hsm_current_action(struct hsm_current_action *action);
+extern void lustre_swab_hsm_progress_kernel(struct hsm_progress_kernel *hpk);
+extern void lustre_swab_hsm_user_state(struct hsm_user_state *hus);
+extern void lustre_swab_hsm_user_item(struct hsm_user_item *hui);
+extern void lustre_swab_hsm_request(struct hsm_request *hr);
+
+/**
+ * These are object update opcode under UPDATE_OBJ, which is currently
+ * being used by cross-ref operations between MDT.
+ *
+ * During the cross-ref operation, the Master MDT, which the client send the
+ * request to, will disassembly the operation into object updates, then OSP
+ * will send these updates to the remote MDT to be executed.
+ *
+ *   Update request format
+ *   magic:  UPDATE_BUFFER_MAGIC_V1
+ *   Count:  How many updates in the req.
+ *   bufs[0] : following are packets of object.
+ *   update[0]:
+ *		type: object_update_op, the op code of update
+ *		fid: The object fid of the update.
+ *		lens/bufs: other parameters of the update.
+ *   update[1]:
+ *		type: object_update_op, the op code of update
+ *		fid: The object fid of the update.
+ *		lens/bufs: other parameters of the update.
+ *   ..........
+ *   update[7]:	type: object_update_op, the op code of update
+ *		fid: The object fid of the update.
+ *		lens/bufs: other parameters of the update.
+ *   Current 8 maxim updates per object update request.
+ *
+ *******************************************************************
+ *   update reply format:
+ *
+ *   ur_version: UPDATE_REPLY_V1
+ *   ur_count:   The count of the reply, which is usually equal
+ *		 to the number of updates in the request.
+ *   ur_lens:    The reply lengths of each object update.
+ *
+ *   replies:    1st update reply  [4bytes_ret: other body]
+ *		 2nd update reply  [4bytes_ret: other body]
+ *		 .....
+ *		 nth update reply  [4bytes_ret: other body]
+ *
+ *   For each reply of the update, the format would be
+ *	 result(4 bytes):Other stuff
+ */
+
+#define UPDATE_MAX_OPS		10
+#define UPDATE_BUFFER_MAGIC_V1	0xBDDE0001
+#define UPDATE_BUFFER_MAGIC	UPDATE_BUFFER_MAGIC_V1
+#define UPDATE_BUF_COUNT	8
+enum object_update_op {
+	OBJ_CREATE		= 1,
+	OBJ_DESTROY		= 2,
+	OBJ_REF_ADD		= 3,
+	OBJ_REF_DEL		= 4,
+	OBJ_ATTR_SET		= 5,
+	OBJ_ATTR_GET		= 6,
+	OBJ_XATTR_SET		= 7,
+	OBJ_XATTR_GET		= 8,
+	OBJ_INDEX_LOOKUP	= 9,
+	OBJ_INDEX_INSERT	= 10,
+	OBJ_INDEX_DELETE	= 11,
+	OBJ_LAST
+};
+
+struct update {
+	__u32		u_type;
+	__u32		u_batchid;
+	struct lu_fid	u_fid;
+	__u32		u_lens[UPDATE_BUF_COUNT];
+	__u32		u_bufs[0];
+};
+
+struct update_buf {
+	__u32	ub_magic;
+	__u32	ub_count;
+	__u32	ub_bufs[0];
+};
+
+#define UPDATE_REPLY_V1		0x00BD0001
+struct update_reply {
+	__u32	ur_version;
+	__u32	ur_count;
+	__u32	ur_lens[0];
+};
+
+void lustre_swab_update_buf(struct update_buf *ub);
+void lustre_swab_update_reply_buf(struct update_reply *ur);
+
+/** layout swap request structure
+ * fid1 and fid2 are in mdt_body
+ */
+struct mdc_swap_layouts {
+	__u64	   msl_flags;
+} __packed;
+
+void lustre_swab_swap_layouts(struct mdc_swap_layouts *msl);
+
+#endif
+/** @} lustreidl */
diff --git a/drivers/staging/lustre/lustre/include/lustre/lustre_lfsck_user.h b/drivers/staging/lustre/lustre/include/lustre/lustre_lfsck_user.h
new file mode 100644
index 000000000000..1c87a61a7fc1
--- /dev/null
+++ b/drivers/staging/lustre/lustre/include/lustre/lustre_lfsck_user.h
@@ -0,0 +1,95 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License version 2 for more details.  A copy is
+ * included in the COPYING file that accompanied this code.
+
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2012, Intel Corporation.
+ */
+/*
+ * lustre/include/lustre/lustre_lfsck_user.h
+ *
+ * Lustre LFSCK userspace interfaces.
+ *
+ * Author: Fan Yong <yong.fan@whamcloud.com>
+ */
+
+#ifndef _LUSTRE_LFSCK_USER_H
+# define _LUSTRE_LFSCK_USER_H
+
+enum lfsck_param_flags {
+	/* Reset LFSCK iterator position to the device beginning. */
+	LPF_RESET       = 0x0001,
+
+	/* Exit when fail. */
+	LPF_FAILOUT     = 0x0002,
+
+	/* Dryrun mode, only check without modification */
+	LPF_DRYRUN      = 0x0004,
+};
+
+enum lfsck_type {
+	/* For MDT-OST consistency check/repair. */
+	LT_LAYOUT	= 0x0001,
+
+	/* For MDT-MDT consistency check/repair. */
+	LT_DNE		= 0x0002,
+
+	/* For FID-in-dirent and linkEA consistency check/repair. */
+	LT_NAMESPACE	= 0x0004,
+};
+
+#define LFSCK_VERSION_V1	1
+#define LFSCK_VERSION_V2	2
+
+#define LFSCK_TYPES_ALL		((__u16)(~0))
+#define LFSCK_TYPES_DEF		((__u16)0)
+#define LFSCK_TYPES_SUPPORTED	LT_NAMESPACE
+
+#define LFSCK_SPEED_NO_LIMIT	0
+#define LFSCK_SPEED_LIMIT_DEF	LFSCK_SPEED_NO_LIMIT
+
+enum lfsck_start_valid {
+	LSV_SPEED_LIMIT		= 0x00000001,
+	LSV_ERROR_HANDLE	= 0x00000002,
+	LSV_DRYRUN		= 0x00000004,
+};
+
+/* Arguments for starting lfsck. */
+struct lfsck_start {
+	/* Which arguments are valid, see 'enum lfsck_start_valid'. */
+	__u32   ls_valid;
+
+	/* How many items can be scanned at most per second. */
+	__u32   ls_speed_limit;
+
+	/* For compatibility between user space tools and kernel service. */
+	__u16   ls_version;
+
+	/* Which LFSCK components to be (have been) started. */
+	__u16   ls_active;
+
+	/* Flags for the LFSCK, see 'enum lfsck_param_flags'. */
+	__u16   ls_flags;
+
+	/* For 64-bits aligned. */
+	__u16   ls_padding;
+};
+
+#endif /* _LUSTRE_LFSCK_USER_H */
diff --git a/drivers/staging/lustre/lustre/include/lustre/lustre_user.h b/drivers/staging/lustre/lustre/include/lustre/lustre_user.h
new file mode 100644
index 000000000000..7e9f57507f04
--- /dev/null
+++ b/drivers/staging/lustre/lustre/include/lustre/lustre_user.h
@@ -0,0 +1,1145 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2010, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lustre/include/lustre/lustre_user.h
+ *
+ * Lustre public user-space interface definitions.
+ */
+
+#ifndef _LUSTRE_USER_H
+#define _LUSTRE_USER_H
+
+/** \defgroup lustreuser lustreuser
+ *
+ * @{
+ */
+
+#include <lustre/ll_fiemap.h>
+#include <linux/lustre_user.h>
+
+/* for statfs() */
+#define LL_SUPER_MAGIC 0x0BD00BD0
+
+#ifndef FSFILT_IOC_GETFLAGS
+#define FSFILT_IOC_GETFLAGS	       _IOR('f', 1, long)
+#define FSFILT_IOC_SETFLAGS	       _IOW('f', 2, long)
+#define FSFILT_IOC_GETVERSION	     _IOR('f', 3, long)
+#define FSFILT_IOC_SETVERSION	     _IOW('f', 4, long)
+#define FSFILT_IOC_GETVERSION_OLD	 _IOR('v', 1, long)
+#define FSFILT_IOC_SETVERSION_OLD	 _IOW('v', 2, long)
+#define FSFILT_IOC_FIEMAP		 _IOWR('f', 11, struct ll_user_fiemap)
+#endif
+
+/* FIEMAP flags supported by Lustre */
+#define LUSTRE_FIEMAP_FLAGS_COMPAT (FIEMAP_FLAG_SYNC | FIEMAP_FLAG_DEVICE_ORDER)
+
+enum obd_statfs_state {
+	OS_STATE_DEGRADED       = 0x00000001, /**< RAID degraded/rebuilding */
+	OS_STATE_READONLY       = 0x00000002, /**< filesystem is read-only */
+	OS_STATE_RDONLY_1       = 0x00000004, /**< obsolete 1.6, was EROFS=30 */
+	OS_STATE_RDONLY_2       = 0x00000008, /**< obsolete 1.6, was EROFS=30 */
+	OS_STATE_RDONLY_3       = 0x00000010, /**< obsolete 1.6, was EROFS=30 */
+};
+
+struct obd_statfs {
+	__u64	   os_type;
+	__u64	   os_blocks;
+	__u64	   os_bfree;
+	__u64	   os_bavail;
+	__u64	   os_files;
+	__u64	   os_ffree;
+	__u8	    os_fsid[40];
+	__u32	   os_bsize;
+	__u32	   os_namelen;
+	__u64	   os_maxbytes;
+	__u32	   os_state;       /**< obd_statfs_state OS_STATE_* flag */
+	__u32	   os_fprecreated;	/* objs available now to the caller */
+					/* used in QoS code to find preferred
+					 * OSTs */
+	__u32	   os_spare2;
+	__u32	   os_spare3;
+	__u32	   os_spare4;
+	__u32	   os_spare5;
+	__u32	   os_spare6;
+	__u32	   os_spare7;
+	__u32	   os_spare8;
+	__u32	   os_spare9;
+};
+
+/**
+ * File IDentifier.
+ *
+ * FID is a cluster-wide unique identifier of a file or an object (stripe).
+ * FIDs are never reused.
+ **/
+struct lu_fid {
+       /**
+	* FID sequence. Sequence is a unit of migration: all files (objects)
+	* with FIDs from a given sequence are stored on the same server.
+	* Lustre should support 2^64 objects, so even if each sequence
+	* has only a single object we can still enumerate 2^64 objects.
+	**/
+	__u64 f_seq;
+	/* FID number within sequence. */
+	__u32 f_oid;
+	/**
+	 * FID version, used to distinguish different versions (in the sense
+	 * of snapshots, etc.) of the same file system object. Not currently
+	 * used.
+	 **/
+	__u32 f_ver;
+};
+
+struct filter_fid {
+	struct lu_fid	ff_parent;  /* ff_parent.f_ver == file stripe number */
+};
+
+/* keep this one for compatibility */
+struct filter_fid_old {
+	struct lu_fid	ff_parent;
+	__u64		ff_objid;
+	__u64		ff_seq;
+};
+
+/* Userspace should treat lu_fid as opaque, and only use the following methods
+ * to print or parse them.  Other functions (e.g. compare, swab) could be moved
+ * here from lustre_idl.h if needed. */
+typedef struct lu_fid lustre_fid;
+
+/**
+ * Following struct for object attributes, that will be kept inode's EA.
+ * Introduced in 2.0 release (please see b15993, for details)
+ * Added to all objects since Lustre 2.4 as contains self FID
+ */
+struct lustre_mdt_attrs {
+	/**
+	 * Bitfield for supported data in this structure. From enum lma_compat.
+	 * lma_self_fid and lma_flags are always available.
+	 */
+	__u32   lma_compat;
+	/**
+	 * Per-file incompat feature list. Lustre version should support all
+	 * flags set in this field. The supported feature mask is available in
+	 * LMA_INCOMPAT_SUPP.
+	 */
+	__u32   lma_incompat;
+	/** FID of this inode */
+	struct lu_fid  lma_self_fid;
+};
+
+/**
+ * Prior to 2.4, the LMA structure also included SOM attributes which has since
+ * been moved to a dedicated xattr
+ * lma_flags was also removed because of lma_compat/incompat fields.
+ */
+#define LMA_OLD_SIZE (sizeof(struct lustre_mdt_attrs) + 5 * sizeof(__u64))
+
+/**
+ * OST object IDentifier.
+ */
+struct ost_id {
+	union {
+		struct ostid {
+			__u64	oi_id;
+			__u64	oi_seq;
+		} oi;
+		struct lu_fid oi_fid;
+	};
+};
+
+#define DOSTID LPX64":"LPU64
+#define POSTID(oi) ostid_seq(oi), ostid_id(oi)
+
+/*
+ * The ioctl naming rules:
+ * LL_*     - works on the currently opened filehandle instead of parent dir
+ * *_OBD_*  - gets data for both OSC or MDC (LOV, LMV indirectly)
+ * *_MDC_*  - gets/sets data related to MDC
+ * *_LOV_*  - gets/sets data related to OSC/LOV
+ * *FILE*   - called on parent dir and passes in a filename
+ * *STRIPE* - set/get lov_user_md
+ * *INFO    - set/get lov_user_mds_data
+ */
+/* see <lustre_lib.h> for ioctl numberss 101-150 */
+#define LL_IOC_GETFLAGS		 _IOR ('f', 151, long)
+#define LL_IOC_SETFLAGS		 _IOW ('f', 152, long)
+#define LL_IOC_CLRFLAGS		 _IOW ('f', 153, long)
+/* LL_IOC_LOV_SETSTRIPE: See also OBD_IOC_LOV_SETSTRIPE */
+#define LL_IOC_LOV_SETSTRIPE	    _IOW ('f', 154, long)
+/* LL_IOC_LOV_GETSTRIPE: See also OBD_IOC_LOV_GETSTRIPE */
+#define LL_IOC_LOV_GETSTRIPE	    _IOW ('f', 155, long)
+/* LL_IOC_LOV_SETEA: See also OBD_IOC_LOV_SETEA */
+#define LL_IOC_LOV_SETEA		_IOW ('f', 156, long)
+#define LL_IOC_RECREATE_OBJ	     _IOW ('f', 157, long)
+#define LL_IOC_RECREATE_FID	     _IOW ('f', 157, struct lu_fid)
+#define LL_IOC_GROUP_LOCK	       _IOW ('f', 158, long)
+#define LL_IOC_GROUP_UNLOCK	     _IOW ('f', 159, long)
+/* LL_IOC_QUOTACHECK: See also OBD_IOC_QUOTACHECK */
+#define LL_IOC_QUOTACHECK	       _IOW ('f', 160, int)
+/* LL_IOC_POLL_QUOTACHECK: See also OBD_IOC_POLL_QUOTACHECK */
+#define LL_IOC_POLL_QUOTACHECK	  _IOR ('f', 161, struct if_quotacheck *)
+/* LL_IOC_QUOTACTL: See also OBD_IOC_QUOTACTL */
+#define LL_IOC_QUOTACTL		 _IOWR('f', 162, struct if_quotactl)
+#define IOC_OBD_STATFS		  _IOWR('f', 164, struct obd_statfs *)
+#define IOC_LOV_GETINFO		 _IOWR('f', 165, struct lov_user_mds_data *)
+#define LL_IOC_FLUSHCTX		 _IOW ('f', 166, long)
+#define LL_IOC_RMTACL		   _IOW ('f', 167, long)
+#define LL_IOC_GETOBDCOUNT	      _IOR ('f', 168, long)
+#define LL_IOC_LLOOP_ATTACH	     _IOWR('f', 169, long)
+#define LL_IOC_LLOOP_DETACH	     _IOWR('f', 170, long)
+#define LL_IOC_LLOOP_INFO	       _IOWR('f', 171, struct lu_fid)
+#define LL_IOC_LLOOP_DETACH_BYDEV       _IOWR('f', 172, long)
+#define LL_IOC_PATH2FID		 _IOR ('f', 173, long)
+#define LL_IOC_GET_CONNECT_FLAGS	_IOWR('f', 174, __u64 *)
+#define LL_IOC_GET_MDTIDX	       _IOR ('f', 175, int)
+
+/* see <lustre_lib.h> for ioctl numbers 177-210 */
+
+#define LL_IOC_HSM_STATE_GET		_IOR('f', 211, struct hsm_user_state)
+#define LL_IOC_HSM_STATE_SET		_IOW('f', 212, struct hsm_state_set)
+#define LL_IOC_HSM_CT_START		_IOW('f', 213, struct lustre_kernelcomm)
+#define LL_IOC_HSM_COPY_START		_IOW('f', 214, struct hsm_copy *)
+#define LL_IOC_HSM_COPY_END		_IOW('f', 215, struct hsm_copy *)
+#define LL_IOC_HSM_PROGRESS		_IOW('f', 216, struct hsm_user_request)
+#define LL_IOC_HSM_REQUEST		_IOW('f', 217, struct hsm_user_request)
+#define LL_IOC_DATA_VERSION		_IOR('f', 218, struct ioc_data_version)
+#define LL_IOC_LOV_SWAP_LAYOUTS		_IOW('f', 219, \
+						struct lustre_swap_layouts)
+#define LL_IOC_HSM_ACTION		_IOR('f', 220, \
+						struct hsm_current_action)
+/* see <lustre_lib.h> for ioctl numbers 221-232 */
+
+#define LL_IOC_LMV_SETSTRIPE	    _IOWR('f', 240, struct lmv_user_md)
+#define LL_IOC_LMV_GETSTRIPE	    _IOWR('f', 241, struct lmv_user_md)
+#define LL_IOC_REMOVE_ENTRY	    _IOWR('f', 242, __u64)
+
+#define LL_STATFS_LMV	   1
+#define LL_STATFS_LOV	   2
+#define LL_STATFS_NODELAY	4
+
+#define IOC_MDC_TYPE	    'i'
+#define IOC_MDC_LOOKUP	  _IOWR(IOC_MDC_TYPE, 20, struct obd_device *)
+#define IOC_MDC_GETFILESTRIPE   _IOWR(IOC_MDC_TYPE, 21, struct lov_user_md *)
+#define IOC_MDC_GETFILEINFO     _IOWR(IOC_MDC_TYPE, 22, struct lov_user_mds_data *)
+#define LL_IOC_MDC_GETINFO      _IOWR(IOC_MDC_TYPE, 23, struct lov_user_mds_data *)
+
+/* Keep these for backward compartability. */
+#define LL_IOC_OBD_STATFS       IOC_OBD_STATFS
+#define IOC_MDC_GETSTRIPE       IOC_MDC_GETFILESTRIPE
+
+
+#define MAX_OBD_NAME 128 /* If this changes, a NEW ioctl must be added */
+
+/* Hopefully O_LOV_DELAY_CREATE does not conflict with standard O_xxx flags.
+ * Previously it was defined as 0100000000 and conflicts with FMODE_NONOTIFY
+ * which was added since kernel 2.6.36, so we redefine it as 020000000.
+ * To be compatible with old version's statically linked binary, finally we
+ * define it as (020000000 | 0100000000).
+ * */
+#define O_LOV_DELAY_CREATE      0120000000
+
+#define LL_FILE_IGNORE_LOCK     0x00000001
+#define LL_FILE_GROUP_LOCKED    0x00000002
+#define LL_FILE_READAHEA	0x00000004
+#define LL_FILE_LOCKED_DIRECTIO 0x00000008 /* client-side locks with dio */
+#define LL_FILE_LOCKLESS_IO     0x00000010 /* server-side locks with cio */
+#define LL_FILE_RMTACL	  0x00000020
+
+#define LOV_USER_MAGIC_V1 0x0BD10BD0
+#define LOV_USER_MAGIC    LOV_USER_MAGIC_V1
+#define LOV_USER_MAGIC_JOIN_V1 0x0BD20BD0
+#define LOV_USER_MAGIC_V3 0x0BD30BD0
+
+#define LMV_MAGIC_V1      0x0CD10CD0    /*normal stripe lmv magic */
+#define LMV_USER_MAGIC    0x0CD20CD0    /*default lmv magic*/
+
+#define LOV_PATTERN_RAID0 0x001
+#define LOV_PATTERN_RAID1 0x002
+#define LOV_PATTERN_FIRST 0x100
+
+#define LOV_MAXPOOLNAME 16
+#define LOV_POOLNAMEF "%.16s"
+
+#define LOV_MIN_STRIPE_BITS 16   /* maximum PAGE_SIZE (ia64), power of 2 */
+#define LOV_MIN_STRIPE_SIZE (1 << LOV_MIN_STRIPE_BITS)
+#define LOV_MAX_STRIPE_COUNT_OLD 160
+/* This calculation is crafted so that input of 4096 will result in 160
+ * which in turn is equal to old maximal stripe count.
+ * XXX: In fact this is too simpified for now, what it also need is to get
+ * ea_type argument to clearly know how much space each stripe consumes.
+ *
+ * The limit of 12 pages is somewhat arbitrary, but is a reasonably large
+ * allocation that is sufficient for the current generation of systems.
+ *
+ * (max buffer size - lov+rpc header) / sizeof(struct lov_ost_data_v1) */
+#define LOV_MAX_STRIPE_COUNT 2000  /* ((12 * 4096 - 256) / 24) */
+#define LOV_ALL_STRIPES       0xffff /* only valid for directories */
+#define LOV_V1_INSANE_STRIPE_COUNT 65532 /* maximum stripe count bz13933 */
+
+#define lov_user_ost_data lov_user_ost_data_v1
+struct lov_user_ost_data_v1 {     /* per-stripe data structure */
+	struct ost_id l_ost_oi;	  /* OST object ID */
+	__u32 l_ost_gen;	  /* generation of this OST index */
+	__u32 l_ost_idx;	  /* OST index in LOV */
+} __attribute__((packed));
+
+#define lov_user_md lov_user_md_v1
+struct lov_user_md_v1 {	   /* LOV EA user data (host-endian) */
+	__u32 lmm_magic;	  /* magic number = LOV_USER_MAGIC_V1 */
+	__u32 lmm_pattern;	/* LOV_PATTERN_RAID0, LOV_PATTERN_RAID1 */
+	struct ost_id lmm_oi;	  /* LOV object ID */
+	__u32 lmm_stripe_size;    /* size of stripe in bytes */
+	__u16 lmm_stripe_count;   /* num stripes in use for this object */
+	union {
+		__u16 lmm_stripe_offset;  /* starting stripe offset in
+					   * lmm_objects, use when writing */
+		__u16 lmm_layout_gen;     /* layout generation number
+					   * used when reading */
+	};
+	struct lov_user_ost_data_v1 lmm_objects[0]; /* per-stripe data */
+} __attribute__((packed,  __may_alias__));
+
+struct lov_user_md_v3 {	   /* LOV EA user data (host-endian) */
+	__u32 lmm_magic;	  /* magic number = LOV_USER_MAGIC_V3 */
+	__u32 lmm_pattern;	/* LOV_PATTERN_RAID0, LOV_PATTERN_RAID1 */
+	struct ost_id lmm_oi;	  /* LOV object ID */
+	__u32 lmm_stripe_size;    /* size of stripe in bytes */
+	__u16 lmm_stripe_count;   /* num stripes in use for this object */
+	union {
+		__u16 lmm_stripe_offset;  /* starting stripe offset in
+					   * lmm_objects, use when writing */
+		__u16 lmm_layout_gen;     /* layout generation number
+					   * used when reading */
+	};
+	char  lmm_pool_name[LOV_MAXPOOLNAME]; /* pool name */
+	struct lov_user_ost_data_v1 lmm_objects[0]; /* per-stripe data */
+} __attribute__((packed));
+
+/* Compile with -D_LARGEFILE64_SOURCE or -D_GNU_SOURCE (or #define) to
+ * use this.  It is unsafe to #define those values in this header as it
+ * is possible the application has already #included <sys/stat.h>. */
+#ifdef HAVE_LOV_USER_MDS_DATA
+#define lov_user_mds_data lov_user_mds_data_v1
+struct lov_user_mds_data_v1 {
+	lstat_t lmd_st;		 /* MDS stat struct */
+	struct lov_user_md_v1 lmd_lmm;  /* LOV EA V1 user data */
+} __attribute__((packed));
+
+struct lov_user_mds_data_v3 {
+	lstat_t lmd_st;		 /* MDS stat struct */
+	struct lov_user_md_v3 lmd_lmm;  /* LOV EA V3 user data */
+} __attribute__((packed));
+#endif
+
+/* keep this to be the same size as lov_user_ost_data_v1 */
+struct lmv_user_mds_data {
+	struct lu_fid	lum_fid;
+	__u32		lum_padding;
+	__u32		lum_mds;
+};
+
+/* lum_type */
+enum {
+	LMV_STRIPE_TYPE = 0,
+	LMV_DEFAULT_TYPE = 1,
+};
+
+#define lmv_user_md lmv_user_md_v1
+struct lmv_user_md_v1 {
+	__u32	lum_magic;	 /* must be the first field */
+	__u32	lum_stripe_count;  /* dirstripe count */
+	__u32	lum_stripe_offset; /* MDT idx for default dirstripe */
+	__u32	lum_hash_type;     /* Dir stripe policy */
+	__u32	lum_type;	  /* LMV type: default or normal */
+	__u32	lum_padding1;
+	__u32	lum_padding2;
+	__u32	lum_padding3;
+	char	lum_pool_name[LOV_MAXPOOLNAME];
+	struct	lmv_user_mds_data  lum_objects[0];
+};
+
+static inline int lmv_user_md_size(int stripes, int lmm_magic)
+{
+	return sizeof(struct lmv_user_md) +
+		      stripes * sizeof(struct lmv_user_mds_data);
+}
+
+extern void lustre_swab_lmv_user_md(struct lmv_user_md *lum);
+
+struct ll_recreate_obj {
+	__u64 lrc_id;
+	__u32 lrc_ost_idx;
+};
+
+struct ll_fid {
+	__u64 id;	 /* holds object id */
+	__u32 generation; /* holds object generation */
+	__u32 f_type;     /* holds object type or stripe idx when passing it to
+			   * OST for saving into EA. */
+};
+
+#define UUID_MAX	40
+struct obd_uuid {
+	char uuid[UUID_MAX];
+};
+
+static inline int obd_uuid_equals(const struct obd_uuid *u1,
+				  const struct obd_uuid *u2)
+{
+	return strcmp((char *)u1->uuid, (char *)u2->uuid) == 0;
+}
+
+static inline int obd_uuid_empty(struct obd_uuid *uuid)
+{
+	return uuid->uuid[0] == '\0';
+}
+
+static inline void obd_str2uuid(struct obd_uuid *uuid, const char *tmp)
+{
+	strncpy((char *)uuid->uuid, tmp, sizeof(*uuid));
+	uuid->uuid[sizeof(*uuid) - 1] = '\0';
+}
+
+/* For printf's only, make sure uuid is terminated */
+static inline char *obd_uuid2str(struct obd_uuid *uuid)
+{
+	if (uuid->uuid[sizeof(*uuid) - 1] != '\0') {
+		/* Obviously not safe, but for printfs, no real harm done...
+		   we're always null-terminated, even in a race. */
+		static char temp[sizeof(*uuid)];
+		memcpy(temp, uuid->uuid, sizeof(*uuid) - 1);
+		temp[sizeof(*uuid) - 1] = '\0';
+		return temp;
+	}
+	return (char *)(uuid->uuid);
+}
+
+/* Extract fsname from uuid (or target name) of a target
+   e.g. (myfs-OST0007_UUID -> myfs)
+   see also deuuidify. */
+static inline void obd_uuid2fsname(char *buf, char *uuid, int buflen)
+{
+	char *p;
+
+	strncpy(buf, uuid, buflen - 1);
+	buf[buflen - 1] = '\0';
+	p = strrchr(buf, '-');
+	if (p)
+	   *p = '\0';
+}
+
+/* printf display format
+   e.g. printf("file FID is "DFID"\n", PFID(fid)); */
+#define DFID_NOBRACE LPX64":0x%x:0x%x"
+#define DFID "["DFID_NOBRACE"]"
+#define PFID(fid)     \
+	(fid)->f_seq, \
+	(fid)->f_oid, \
+	(fid)->f_ver
+
+/* scanf input parse format -- strip '[' first.
+   e.g. sscanf(fidstr, SFID, RFID(&fid)); */
+/* #define SFID "0x"LPX64i":0x"LPSZX":0x"LPSZX""
+liblustreapi.c:2893: warning: format '%lx' expects type 'long unsigned int *', but argument 4 has type 'unsigned int *'
+liblustreapi.c:2893: warning: format '%lx' expects type 'long unsigned int *', but argument 5 has type 'unsigned int *'
+*/
+#define SFID "0x"LPX64i":0x%x:0x%x"
+#define RFID(fid)     \
+	&((fid)->f_seq), \
+	&((fid)->f_oid), \
+	&((fid)->f_ver)
+
+
+/********* Quotas **********/
+
+/* these must be explicitly translated into linux Q_* in ll_dir_ioctl */
+#define LUSTRE_Q_QUOTAON    0x800002     /* turn quotas on */
+#define LUSTRE_Q_QUOTAOFF   0x800003     /* turn quotas off */
+#define LUSTRE_Q_GETINFO    0x800005     /* get information about quota files */
+#define LUSTRE_Q_SETINFO    0x800006     /* set information about quota files */
+#define LUSTRE_Q_GETQUOTA   0x800007     /* get user quota structure */
+#define LUSTRE_Q_SETQUOTA   0x800008     /* set user quota structure */
+/* lustre-specific control commands */
+#define LUSTRE_Q_INVALIDATE  0x80000b     /* invalidate quota data */
+#define LUSTRE_Q_FINVALIDATE 0x80000c     /* invalidate filter quota data */
+
+#define UGQUOTA 2       /* set both USRQUOTA and GRPQUOTA */
+
+struct if_quotacheck {
+	char		    obd_type[16];
+	struct obd_uuid	 obd_uuid;
+};
+
+#define IDENTITY_DOWNCALL_MAGIC 0x6d6dd629
+
+/* permission */
+#define N_PERMS_MAX      64
+
+struct perm_downcall_data {
+	__u64 pdd_nid;
+	__u32 pdd_perm;
+	__u32 pdd_padding;
+};
+
+struct identity_downcall_data {
+	__u32			    idd_magic;
+	__u32			    idd_err;
+	__u32			    idd_uid;
+	__u32			    idd_gid;
+	__u32			    idd_nperms;
+	__u32			    idd_ngroups;
+	struct perm_downcall_data idd_perms[N_PERMS_MAX];
+	__u32			    idd_groups[0];
+};
+
+/* for non-mapped uid/gid */
+#define NOBODY_UID      99
+#define NOBODY_GID      99
+
+#define INVALID_ID      (-1)
+
+enum {
+	RMT_LSETFACL    = 1,
+	RMT_LGETFACL    = 2,
+	RMT_RSETFACL    = 3,
+	RMT_RGETFACL    = 4
+};
+
+#ifdef NEED_QUOTA_DEFS
+#ifndef QIF_BLIMITS
+#define QIF_BLIMITS     1
+#define QIF_SPACE       2
+#define QIF_ILIMITS     4
+#define QIF_INODES      8
+#define QIF_BTIME       16
+#define QIF_ITIME       32
+#define QIF_LIMITS      (QIF_BLIMITS | QIF_ILIMITS)
+#define QIF_USAGE       (QIF_SPACE | QIF_INODES)
+#define QIF_TIMES       (QIF_BTIME | QIF_ITIME)
+#define QIF_ALL	 (QIF_LIMITS | QIF_USAGE | QIF_TIMES)
+#endif
+
+#endif /* !__KERNEL__ */
+
+/* lustre volatile file support
+ * file name header: .^L^S^T^R:volatile"
+ */
+#define LUSTRE_VOLATILE_HDR	".\x0c\x13\x14\x12:VOLATILE"
+#define LUSTRE_VOLATILE_HDR_LEN	14
+/* hdr + MDT index */
+#define LUSTRE_VOLATILE_IDX	LUSTRE_VOLATILE_HDR":%.4X:"
+
+typedef enum lustre_quota_version {
+	LUSTRE_QUOTA_V2 = 1
+} lustre_quota_version_t;
+
+/* XXX: same as if_dqinfo struct in kernel */
+struct obd_dqinfo {
+	__u64 dqi_bgrace;
+	__u64 dqi_igrace;
+	__u32 dqi_flags;
+	__u32 dqi_valid;
+};
+
+/* XXX: same as if_dqblk struct in kernel, plus one padding */
+struct obd_dqblk {
+	__u64 dqb_bhardlimit;
+	__u64 dqb_bsoftlimit;
+	__u64 dqb_curspace;
+	__u64 dqb_ihardlimit;
+	__u64 dqb_isoftlimit;
+	__u64 dqb_curinodes;
+	__u64 dqb_btime;
+	__u64 dqb_itime;
+	__u32 dqb_valid;
+	__u32 dqb_padding;
+};
+
+enum {
+	QC_GENERAL      = 0,
+	QC_MDTIDX       = 1,
+	QC_OSTIDX       = 2,
+	QC_UUID	 = 3
+};
+
+struct if_quotactl {
+	__u32		   qc_cmd;
+	__u32		   qc_type;
+	__u32		   qc_id;
+	__u32		   qc_stat;
+	__u32		   qc_valid;
+	__u32		   qc_idx;
+	struct obd_dqinfo       qc_dqinfo;
+	struct obd_dqblk	qc_dqblk;
+	char		    obd_type[16];
+	struct obd_uuid	 obd_uuid;
+};
+
+/* swap layout flags */
+#define	SWAP_LAYOUTS_CHECK_DV1		(1 << 0)
+#define	SWAP_LAYOUTS_CHECK_DV2		(1 << 1)
+#define	SWAP_LAYOUTS_KEEP_MTIME		(1 << 2)
+#define	SWAP_LAYOUTS_KEEP_ATIME		(1 << 3)
+struct lustre_swap_layouts {
+	__u64	sl_flags;
+	__u32	sl_fd;
+	__u32	sl_gid;
+	__u64	sl_dv1;
+	__u64	sl_dv2;
+};
+
+
+/********* Changelogs **********/
+/** Changelog record types */
+enum changelog_rec_type {
+	CL_MARK     = 0,
+	CL_CREATE   = 1,  /* namespace */
+	CL_MKDIR    = 2,  /* namespace */
+	CL_HARDLINK = 3,  /* namespace */
+	CL_SOFTLINK = 4,  /* namespace */
+	CL_MKNOD    = 5,  /* namespace */
+	CL_UNLINK   = 6,  /* namespace */
+	CL_RMDIR    = 7,  /* namespace */
+	CL_RENAME   = 8,  /* namespace */
+	CL_EXT      = 9,  /* namespace extended record (2nd half of rename) */
+	CL_OPEN     = 10, /* not currently used */
+	CL_CLOSE    = 11, /* may be written to log only with mtime change */
+	CL_LAYOUT   = 12, /* file layout/striping modified */
+	CL_TRUNC    = 13,
+	CL_SETATTR  = 14,
+	CL_XATTR    = 15,
+	CL_HSM      = 16, /* HSM specific events, see flags */
+	CL_MTIME    = 17, /* Precedence: setattr > mtime > ctime > atime */
+	CL_CTIME    = 18,
+	CL_ATIME    = 19,
+	CL_LAST
+};
+
+static inline const char *changelog_type2str(int type) {
+	static const char *changelog_str[] = {
+		"MARK",  "CREAT", "MKDIR", "HLINK", "SLINK", "MKNOD", "UNLNK",
+		"RMDIR", "RENME", "RNMTO", "OPEN",  "CLOSE", "LYOUT", "TRUNC",
+		"SATTR", "XATTR", "HSM",   "MTIME", "CTIME", "ATIME",
+	};
+
+	if (type >= 0 && type < CL_LAST)
+		return changelog_str[type];
+	return NULL;
+}
+
+/* per-record flags */
+#define CLF_VERSION     0x1000
+#define CLF_EXT_VERSION 0x2000
+#define CLF_FLAGSHIFT   12
+#define CLF_FLAGMASK    ((1U << CLF_FLAGSHIFT) - 1)
+#define CLF_VERMASK     (~CLF_FLAGMASK)
+/* Anything under the flagmask may be per-type (if desired) */
+/* Flags for unlink */
+#define CLF_UNLINK_LAST       0x0001 /* Unlink of last hardlink */
+#define CLF_UNLINK_HSM_EXISTS 0x0002 /* File has something in HSM */
+				     /* HSM cleaning needed */
+/* Flags for rename */
+#define CLF_RENAME_LAST       0x0001 /* rename unlink last hardlink of target */
+
+/* Flags for HSM */
+/* 12b used (from high weight to low weight):
+ * 2b for flags
+ * 3b for event
+ * 7b for error code
+ */
+#define CLF_HSM_ERR_L	0 /* HSM return code, 7 bits */
+#define CLF_HSM_ERR_H	6
+#define CLF_HSM_EVENT_L      7 /* HSM event, 3 bits, see enum hsm_event */
+#define CLF_HSM_EVENT_H      9
+#define CLF_HSM_FLAG_L      10 /* HSM flags, 2 bits, 1 used, 1 spare */
+#define CLF_HSM_FLAG_H      11
+#define CLF_HSM_SPARE_L     12 /* 4 spare bits */
+#define CLF_HSM_SPARE_H     15
+#define CLF_HSM_LAST	15
+
+/* Remove bits higher than _h, then extract the value
+ * between _h and _l by shifting lower weigth to bit 0. */
+#define CLF_GET_BITS(_b, _h, _l) (((_b << (CLF_HSM_LAST - _h)) & 0xFFFF) \
+				   >> (CLF_HSM_LAST - _h + _l))
+
+#define CLF_HSM_SUCCESS      0x00
+#define CLF_HSM_MAXERROR     0x7E
+#define CLF_HSM_ERROVERFLOW  0x7F
+
+#define CLF_HSM_DIRTY	1 /* file is dirty after HSM request end */
+
+/* 3 bits field => 8 values allowed */
+enum hsm_event {
+	HE_ARCHIVE      = 0,
+	HE_RESTORE      = 1,
+	HE_CANCEL       = 2,
+	HE_RELEASE      = 3,
+	HE_REMOVE       = 4,
+	HE_STATE	= 5,
+	HE_SPARE1       = 6,
+	HE_SPARE2       = 7,
+};
+
+static inline enum hsm_event hsm_get_cl_event(__u16 flags)
+{
+	return CLF_GET_BITS(flags, CLF_HSM_EVENT_H, CLF_HSM_EVENT_L);
+}
+
+static inline void hsm_set_cl_event(int *flags, enum hsm_event he)
+{
+	*flags |= (he << CLF_HSM_EVENT_L);
+}
+
+static inline __u16 hsm_get_cl_flags(int flags)
+{
+	return CLF_GET_BITS(flags, CLF_HSM_FLAG_H, CLF_HSM_FLAG_L);
+}
+
+static inline void hsm_set_cl_flags(int *flags, int bits)
+{
+	*flags |= (bits << CLF_HSM_FLAG_L);
+}
+
+static inline int hsm_get_cl_error(int flags)
+{
+	return CLF_GET_BITS(flags, CLF_HSM_ERR_H, CLF_HSM_ERR_L);
+}
+
+static inline void hsm_set_cl_error(int *flags, int error)
+{
+	*flags |= (error << CLF_HSM_ERR_L);
+}
+
+#define CR_MAXSIZE cfs_size_round(2*NAME_MAX + 1 + sizeof(struct changelog_rec))
+
+struct changelog_rec {
+	__u16		 cr_namelen;
+	__u16		 cr_flags; /**< (flags&CLF_FLAGMASK)|CLF_VERSION */
+	__u32		 cr_type;  /**< \a changelog_rec_type */
+	__u64		 cr_index; /**< changelog record number */
+	__u64		 cr_prev;  /**< last index for this target fid */
+	__u64		 cr_time;
+	union {
+		lustre_fid    cr_tfid;	/**< target fid */
+		__u32	 cr_markerflags; /**< CL_MARK flags */
+	};
+	lustre_fid	    cr_pfid;	/**< parent fid */
+	char		  cr_name[0];     /**< last element */
+} __attribute__((packed));
+
+/* changelog_ext_rec is 2*sizeof(lu_fid) bigger than changelog_rec, to save
+ * space, only rename uses changelog_ext_rec, while others use changelog_rec to
+ * store records.
+ */
+struct changelog_ext_rec {
+	__u16			cr_namelen;
+	__u16			cr_flags; /**< (flags & CLF_FLAGMASK) |
+						CLF_EXT_VERSION */
+	__u32			cr_type;  /**< \a changelog_rec_type */
+	__u64			cr_index; /**< changelog record number */
+	__u64			cr_prev;  /**< last index for this target fid */
+	__u64			cr_time;
+	union {
+		lustre_fid	cr_tfid;	/**< target fid */
+		__u32		cr_markerflags; /**< CL_MARK flags */
+	};
+	lustre_fid		cr_pfid;	/**< target parent fid */
+	lustre_fid		cr_sfid;	/**< source fid, or zero */
+	lustre_fid		cr_spfid;       /**< source parent fid, or zero */
+	char			cr_name[0];     /**< last element */
+} __attribute__((packed));
+
+#define CHANGELOG_REC_EXTENDED(rec) \
+	(((rec)->cr_flags & CLF_VERMASK) == CLF_EXT_VERSION)
+
+static inline int changelog_rec_size(struct changelog_rec *rec)
+{
+	return CHANGELOG_REC_EXTENDED(rec) ? sizeof(struct changelog_ext_rec):
+					     sizeof(*rec);
+}
+
+static inline char *changelog_rec_name(struct changelog_rec *rec)
+{
+	return CHANGELOG_REC_EXTENDED(rec) ?
+		((struct changelog_ext_rec *)rec)->cr_name: rec->cr_name;
+}
+
+static inline int changelog_rec_snamelen(struct changelog_ext_rec *rec)
+{
+	return rec->cr_namelen - strlen(rec->cr_name) - 1;
+}
+
+static inline char *changelog_rec_sname(struct changelog_ext_rec *rec)
+{
+	return rec->cr_name + strlen(rec->cr_name) + 1;
+}
+
+struct ioc_changelog {
+	__u64 icc_recno;
+	__u32 icc_mdtindex;
+	__u32 icc_id;
+	__u32 icc_flags;
+};
+
+enum changelog_message_type {
+	CL_RECORD = 10, /* message is a changelog_rec */
+	CL_EOF    = 11, /* at end of current changelog */
+};
+
+/********* Misc **********/
+
+struct ioc_data_version {
+	__u64 idv_version;
+	__u64 idv_flags;     /* See LL_DV_xxx */
+};
+#define LL_DV_NOFLUSH 0x01   /* Do not take READ EXTENT LOCK before sampling
+				version. Dirty caches are left unchanged. */
+
+#ifndef offsetof
+# define offsetof(typ,memb)     ((unsigned long)((char *)&(((typ *)0)->memb)))
+#endif
+
+#define dot_lustre_name ".lustre"
+
+
+/********* HSM **********/
+
+/** HSM per-file state
+ * See HSM_FLAGS below.
+ */
+enum hsm_states {
+	HS_EXISTS	= 0x00000001,
+	HS_DIRTY	= 0x00000002,
+	HS_RELEASED	= 0x00000004,
+	HS_ARCHIVED	= 0x00000008,
+	HS_NORELEASE	= 0x00000010,
+	HS_NOARCHIVE	= 0x00000020,
+	HS_LOST		= 0x00000040,
+};
+
+/* HSM user-setable flags. */
+#define HSM_USER_MASK   (HS_NORELEASE | HS_NOARCHIVE | HS_DIRTY)
+
+/* Other HSM flags. */
+#define HSM_STATUS_MASK (HS_EXISTS | HS_LOST | HS_RELEASED | HS_ARCHIVED)
+
+/*
+ * All HSM-related possible flags that could be applied to a file.
+ * This should be kept in sync with hsm_states.
+ */
+#define HSM_FLAGS_MASK  (HSM_USER_MASK | HSM_STATUS_MASK)
+
+/**
+ * HSM request progress state
+ */
+enum hsm_progress_states {
+	HPS_WAITING	= 1,
+	HPS_RUNNING	= 2,
+	HPS_DONE	= 3,
+};
+#define HPS_NONE	0
+
+static inline char *hsm_progress_state2name(enum hsm_progress_states s)
+{
+	switch  (s) {
+	case HPS_WAITING:	return "waiting";
+	case HPS_RUNNING:	return "running";
+	case HPS_DONE:		return "done";
+	default:		return "unknown";
+	}
+}
+
+struct hsm_extent {
+	__u64 offset;
+	__u64 length;
+} __attribute__((packed));
+
+/**
+ * Current HSM states of a Lustre file.
+ *
+ * This structure purpose is to be sent to user-space mainly. It describes the
+ * current HSM flags and in-progress action.
+ */
+struct hsm_user_state {
+	/** Current HSM states, from enum hsm_states. */
+	__u32			hus_states;
+	__u32			hus_archive_id;
+	/**  The current undergoing action, if there is one */
+	__u32			hus_in_progress_state;
+	__u32			hus_in_progress_action;
+	struct hsm_extent	hus_in_progress_location;
+	char			hus_extended_info[];
+};
+
+struct hsm_state_set_ioc {
+	struct lu_fid	hssi_fid;
+	__u64		hssi_setmask;
+	__u64		hssi_clearmask;
+};
+
+/*
+ * This structure describes the current in-progress action for a file.
+ * it is retuned to user space and send over the wire
+ */
+struct hsm_current_action {
+	/**  The current undergoing action, if there is one */
+	/* state is one of hsm_progress_states */
+	__u32			hca_state;
+	/* action is one of hsm_user_action */
+	__u32			hca_action;
+	struct hsm_extent	hca_location;
+};
+
+/***** HSM user requests ******/
+/* User-generated (lfs/ioctl) request types */
+enum hsm_user_action {
+	HUA_NONE    =  1, /* no action (noop) */
+	HUA_ARCHIVE = 10, /* copy to hsm */
+	HUA_RESTORE = 11, /* prestage */
+	HUA_RELEASE = 12, /* drop ost objects */
+	HUA_REMOVE  = 13, /* remove from archive */
+	HUA_CANCEL  = 14  /* cancel a request */
+};
+
+static inline char *hsm_user_action2name(enum hsm_user_action  a)
+{
+	switch  (a) {
+	case HUA_NONE:    return "NOOP";
+	case HUA_ARCHIVE: return "ARCHIVE";
+	case HUA_RESTORE: return "RESTORE";
+	case HUA_RELEASE: return "RELEASE";
+	case HUA_REMOVE:  return "REMOVE";
+	case HUA_CANCEL:  return "CANCEL";
+	default:	  return "UNKNOWN";
+	}
+}
+
+/*
+ * List of hr_flags (bit field)
+ */
+#define HSM_FORCE_ACTION 0x0001
+/* used by CT, connot be set by user */
+#define HSM_GHOST_COPY   0x0002
+
+/**
+ * Contains all the fixed part of struct hsm_user_request.
+ *
+ */
+struct hsm_request {
+	__u32 hr_action;	/* enum hsm_user_action */
+	__u32 hr_archive_id;	/* archive id, used only with HUA_ARCHIVE */
+	__u64 hr_flags;		/* request flags */
+	__u32 hr_itemcount;	/* item count in hur_user_item vector */
+	__u32 hr_data_len;
+};
+
+struct hsm_user_item {
+       lustre_fid	hui_fid;
+       struct hsm_extent hui_extent;
+} __attribute__((packed));
+
+struct hsm_user_request {
+	struct hsm_request	hur_request;
+	struct hsm_user_item	hur_user_item[0];
+	/* extra data blob at end of struct (after all
+	 * hur_user_items), only use helpers to access it
+	 */
+} __attribute__((packed));
+
+/** Return pointer to data field in a hsm user request */
+static inline void *hur_data(struct hsm_user_request *hur)
+{
+	return &(hur->hur_user_item[hur->hur_request.hr_itemcount]);
+}
+
+/** Compute the current length of the provided hsm_user_request. */
+static inline int hur_len(struct hsm_user_request *hur)
+{
+	return offsetof(struct hsm_user_request,
+			hur_user_item[hur->hur_request.hr_itemcount]) +
+		hur->hur_request.hr_data_len;
+}
+
+/****** HSM RPCs to copytool *****/
+/* Message types the copytool may receive */
+enum hsm_message_type {
+	HMT_ACTION_LIST = 100, /* message is a hsm_action_list */
+};
+
+/* Actions the copytool may be instructed to take for a given action_item */
+enum hsm_copytool_action {
+	HSMA_NONE    = 10, /* no action */
+	HSMA_ARCHIVE = 20, /* arbitrary offset */
+	HSMA_RESTORE = 21,
+	HSMA_REMOVE  = 22,
+	HSMA_CANCEL  = 23
+};
+
+static inline char *hsm_copytool_action2name(enum hsm_copytool_action  a)
+{
+	switch  (a) {
+	case HSMA_NONE:    return "NOOP";
+	case HSMA_ARCHIVE: return "ARCHIVE";
+	case HSMA_RESTORE: return "RESTORE";
+	case HSMA_REMOVE:  return "REMOVE";
+	case HSMA_CANCEL:  return "CANCEL";
+	default:	   return "UNKNOWN";
+	}
+}
+
+/* Copytool item action description */
+struct hsm_action_item {
+	__u32      hai_len;     /* valid size of this struct */
+	__u32      hai_action;  /* hsm_copytool_action, but use known size */
+	lustre_fid hai_fid;     /* Lustre FID to operated on */
+	lustre_fid hai_dfid;    /* fid used for data access */
+	struct hsm_extent hai_extent;  /* byte range to operate on */
+	__u64      hai_cookie;  /* action cookie from coordinator */
+	__u64      hai_gid;     /* grouplock id */
+	char       hai_data[0]; /* variable length */
+} __attribute__((packed));
+
+/*
+ * helper function which print in hexa the first bytes of
+ * hai opaque field
+ * \param hai [IN] record to print
+ * \param buffer [OUT] output buffer
+ * \param len [IN] max buffer len
+ * \retval buffer
+ */
+static inline char *hai_dump_data_field(struct hsm_action_item *hai,
+					char *buffer, int len)
+{
+	int i, sz, data_len;
+	char *ptr;
+
+	ptr = buffer;
+	sz = len;
+	data_len = hai->hai_len - sizeof(*hai);
+	for (i = 0 ; (i < data_len) && (sz > 0) ; i++)
+	{
+		int cnt;
+
+		cnt = snprintf(ptr, sz, "%.2X",
+			       (unsigned char)hai->hai_data[i]);
+		ptr += cnt;
+		sz -= cnt;
+	}
+	*ptr = '\0';
+	return buffer;
+}
+
+/* Copytool action list */
+#define HAL_VERSION 1
+#define HAL_MAXSIZE LNET_MTU /* bytes, used in userspace only */
+struct hsm_action_list {
+	__u32 hal_version;
+	__u32 hal_count;       /* number of hai's to follow */
+	__u64 hal_compound_id; /* returned by coordinator */
+	__u64 hal_flags;
+	__u32 hal_archive_id; /* which archive backend */
+	__u32 padding1;
+	char  hal_fsname[0];   /* null-terminated */
+	/* struct hsm_action_item[hal_count] follows, aligned on 8-byte
+	   boundaries. See hai_zero */
+} __attribute__((packed));
+
+#ifndef HAVE_CFS_SIZE_ROUND
+static inline int cfs_size_round (int val)
+{
+	return (val + 7) & (~0x7);
+}
+#define HAVE_CFS_SIZE_ROUND
+#endif
+
+/* Return pointer to first hai in action list */
+static inline struct hsm_action_item * hai_zero(struct hsm_action_list *hal)
+{
+	return (struct hsm_action_item *)(hal->hal_fsname +
+					  cfs_size_round(strlen(hal-> \
+								hal_fsname)));
+}
+/* Return pointer to next hai */
+static inline struct hsm_action_item * hai_next(struct hsm_action_item *hai)
+{
+	return (struct hsm_action_item *)((char *)hai +
+					  cfs_size_round(hai->hai_len));
+}
+
+/* Return size of an hsm_action_list */
+static inline int hal_size(struct hsm_action_list *hal)
+{
+	int i, sz;
+	struct hsm_action_item *hai;
+
+	sz = sizeof(*hal) + cfs_size_round(strlen(hal->hal_fsname));
+	hai = hai_zero(hal);
+	for (i = 0 ; i < hal->hal_count ; i++) {
+		sz += cfs_size_round(hai->hai_len);
+		hai = hai_next(hai);
+	}
+	return(sz);
+}
+
+/* Copytool progress reporting */
+#define HP_FLAG_COMPLETED 0x01
+#define HP_FLAG_RETRY     0x02
+
+struct hsm_progress {
+	lustre_fid		hp_fid;
+	__u64			hp_cookie;
+	struct hsm_extent	hp_extent;
+	__u16			hp_flags;
+	__u16			hp_errval; /* positive val */
+	__u32			padding;
+};
+
+/**
+ * Use by copytool during any hsm request they handled.
+ * This structure is initialized by llapi_hsm_copy_start()
+ * which is an helper over the ioctl() interface
+ * Store Lustre, internal use only, data.
+ */
+struct hsm_copy {
+	__u64			hc_data_version;
+	__u16			hc_flags;
+	__u16			hc_errval; /* positive val */
+	__u32			padding;
+	struct hsm_action_item	hc_hai;
+};
+
+/** @} lustreuser */
+
+#endif /* _LUSTRE_USER_H */
diff --git a/drivers/staging/lustre/lustre/include/lustre/lustreapi.h b/drivers/staging/lustre/lustre/include/lustre/lustreapi.h
new file mode 100644
index 000000000000..63da66506639
--- /dev/null
+++ b/drivers/staging/lustre/lustre/include/lustre/lustreapi.h
@@ -0,0 +1,310 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Whamcloud, Inc.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ */
+
+#ifndef _LUSTREAPI_H_
+#define _LUSTREAPI_H_
+
+/** \defgroup llapi llapi
+ *
+ * @{
+ */
+
+#include <lustre/lustre_user.h>
+
+typedef void (*llapi_cb_t)(char *obd_type_name, char *obd_name, char *obd_uuid, void *args);
+
+/* lustreapi message severity level */
+enum llapi_message_level {
+	LLAPI_MSG_OFF    = 0,
+	LLAPI_MSG_FATAL  = 1,
+	LLAPI_MSG_ERROR  = 2,
+	LLAPI_MSG_WARN   = 3,
+	LLAPI_MSG_NORMAL = 4,
+	LLAPI_MSG_INFO   = 5,
+	LLAPI_MSG_DEBUG  = 6,
+	LLAPI_MSG_MAX
+};
+
+/* the bottom three bits reserved for llapi_message_level */
+#define LLAPI_MSG_MASK	  0x00000007
+#define LLAPI_MSG_NO_ERRNO      0x00000010
+
+extern void llapi_msg_set_level(int level);
+extern void llapi_error(int level, int rc, char *fmt, ...);
+#define llapi_err_noerrno(level, fmt, a...)			     \
+	llapi_error((level) | LLAPI_MSG_NO_ERRNO, 0, fmt, ## a)
+extern void llapi_printf(int level, char *fmt, ...);
+extern int llapi_file_create(const char *name, unsigned long long stripe_size,
+			     int stripe_offset, int stripe_count,
+			     int stripe_pattern);
+extern int llapi_file_open(const char *name, int flags, int mode,
+			   unsigned long long stripe_size, int stripe_offset,
+			   int stripe_count, int stripe_pattern);
+extern int llapi_file_create_pool(const char *name,
+				  unsigned long long stripe_size,
+				  int stripe_offset, int stripe_count,
+				  int stripe_pattern, char *pool_name);
+extern int llapi_file_open_pool(const char *name, int flags, int mode,
+				unsigned long long stripe_size,
+				int stripe_offset, int stripe_count,
+				int stripe_pattern, char *pool_name);
+extern int llapi_poollist(const char *name);
+extern int llapi_get_poollist(const char *name, char **poollist, int list_size,
+			      char *buffer, int buffer_size);
+extern int llapi_get_poolmembers(const char *poolname, char **members,
+				 int list_size, char *buffer, int buffer_size);
+extern int llapi_file_get_stripe(const char *path, struct lov_user_md *lum);
+#define HAVE_LLAPI_FILE_LOOKUP
+extern int llapi_file_lookup(int dirfd, const char *name);
+
+#define VERBOSE_COUNT      0x1
+#define VERBOSE_SIZE       0x2
+#define VERBOSE_OFFSET     0x4
+#define VERBOSE_POOL       0x8
+#define VERBOSE_DETAIL     0x10
+#define VERBOSE_OBJID      0x20
+#define VERBOSE_GENERATION 0x40
+#define VERBOSE_MDTINDEX   0x80
+#define VERBOSE_ALL	(VERBOSE_COUNT | VERBOSE_SIZE | VERBOSE_OFFSET | \
+			    VERBOSE_POOL | VERBOSE_OBJID | VERBOSE_GENERATION)
+
+struct find_param {
+	unsigned int maxdepth;
+	time_t  atime;
+	time_t  mtime;
+	time_t  ctime;
+	int     asign;  /* cannot be bitfields due to using pointers to */
+	int     csign;  /* access them during argument parsing. */
+	int     msign;
+	int     type;
+	int	     size_sign:2,	/* these need to be signed values */
+			stripesize_sign:2,
+			stripecount_sign:2;
+	unsigned long long size;
+	unsigned long long size_units;
+	uid_t uid;
+	gid_t gid;
+
+	unsigned long   zeroend:1,
+			recursive:1,
+			exclude_pattern:1,
+			exclude_type:1,
+			exclude_obd:1,
+			exclude_mdt:1,
+			exclude_gid:1,
+			exclude_uid:1,
+			check_gid:1,	    /* group ID */
+			check_uid:1,	    /* user ID */
+			check_pool:1,	   /* LOV pool name */
+			check_size:1,	   /* file size */
+			exclude_pool:1,
+			exclude_size:1,
+			exclude_atime:1,
+			exclude_mtime:1,
+			exclude_ctime:1,
+			get_lmv:1,	      /* get MDT list from LMV */
+			raw:1,		  /* do not fill in defaults */
+			check_stripesize:1,     /* LOV stripe size */
+			exclude_stripesize:1,
+			check_stripecount:1,    /* LOV stripe count */
+			exclude_stripecount:1;
+
+	int     verbose;
+	int     quiet;
+
+	/* regular expression */
+	char   *pattern;
+
+	char   *print_fmt;
+
+	struct  obd_uuid       *obduuid;
+	int		     num_obds;
+	int		     num_alloc_obds;
+	int		     obdindex;
+	int		    *obdindexes;
+
+	struct  obd_uuid       *mdtuuid;
+	int		     num_mdts;
+	int		     num_alloc_mdts;
+	int		     mdtindex;
+	int		    *mdtindexes;
+	int		     file_mdtindex;
+
+	int	lumlen;
+	struct  lov_user_mds_data *lmd;
+
+	char poolname[LOV_MAXPOOLNAME + 1];
+
+	int			fp_lmv_count;
+	struct lmv_user_md	*fp_lmv_md;
+
+	unsigned long long stripesize;
+	unsigned long long stripesize_units;
+	unsigned long long stripecount;
+
+	/* In-process parameters. */
+	unsigned long   got_uuids:1,
+			obds_printed:1,
+			have_fileinfo:1;	/* file attrs and LOV xattr */
+	unsigned int    depth;
+	dev_t	   st_dev;
+};
+
+extern int llapi_ostlist(char *path, struct find_param *param);
+extern int llapi_uuid_match(char *real_uuid, char *search_uuid);
+extern int llapi_getstripe(char *path, struct find_param *param);
+extern int llapi_find(char *path, struct find_param *param);
+
+extern int llapi_file_fget_mdtidx(int fd, int *mdtidx);
+extern int llapi_dir_create_pool(const char *name, int flags, int stripe_offset,
+				 int stripe_count, int stripe_pattern,
+				 char *poolname);
+int llapi_direntry_remove(char *dname);
+extern int llapi_obd_statfs(char *path, __u32 type, __u32 index,
+		     struct obd_statfs *stat_buf,
+		     struct obd_uuid *uuid_buf);
+extern int llapi_ping(char *obd_type, char *obd_name);
+extern int llapi_target_check(int num_types, char **obd_types, char *dir);
+extern int llapi_file_get_lov_uuid(const char *path, struct obd_uuid *lov_uuid);
+extern int llapi_file_get_lmv_uuid(const char *path, struct obd_uuid *lmv_uuid);
+extern int llapi_file_fget_lov_uuid(int fd, struct obd_uuid *lov_uuid);
+extern int llapi_lov_get_uuids(int fd, struct obd_uuid *uuidp, int *ost_count);
+extern int llapi_lmv_get_uuids(int fd, struct obd_uuid *uuidp, int *mdt_count);
+extern int llapi_is_lustre_mnttype(const char *type);
+extern int llapi_search_ost(char *fsname, char *poolname, char *ostname);
+extern int llapi_get_obd_count(char *mnt, int *count, int is_mdt);
+extern int parse_size(char *optarg, unsigned long long *size,
+		      unsigned long long *size_units, int bytes_spec);
+extern int llapi_search_mounts(const char *pathname, int index,
+			       char *mntdir, char *fsname);
+extern int llapi_search_fsname(const char *pathname, char *fsname);
+extern int llapi_getname(const char *path, char *buf, size_t size);
+
+extern void llapi_ping_target(char *obd_type, char *obd_name,
+			      char *obd_uuid, void *args);
+
+extern int llapi_search_rootpath(char *pathname, const char *fsname);
+
+struct mntent;
+#define HAVE_LLAPI_IS_LUSTRE_MNT
+extern int llapi_is_lustre_mnt(struct mntent *mnt);
+extern int llapi_quotachown(char *path, int flag);
+extern int llapi_quotacheck(char *mnt, int check_type);
+extern int llapi_poll_quotacheck(char *mnt, struct if_quotacheck *qchk);
+extern int llapi_quotactl(char *mnt, struct if_quotactl *qctl);
+extern int llapi_target_iterate(int type_num, char **obd_type, void *args,
+				llapi_cb_t cb);
+extern int llapi_get_connect_flags(const char *mnt, __u64 *flags);
+extern int llapi_lsetfacl(int argc, char *argv[]);
+extern int llapi_lgetfacl(int argc, char *argv[]);
+extern int llapi_rsetfacl(int argc, char *argv[]);
+extern int llapi_rgetfacl(int argc, char *argv[]);
+extern int llapi_cp(int argc, char *argv[]);
+extern int llapi_ls(int argc, char *argv[]);
+extern int llapi_fid2path(const char *device, const char *fidstr, char *path,
+			  int pathlen, long long *recno, int *linkno);
+extern int llapi_path2fid(const char *path, lustre_fid *fid);
+extern int llapi_fd2fid(const int fd, lustre_fid *fid);
+
+extern int llapi_get_version(char *buffer, int buffer_size, char **version);
+extern int llapi_get_data_version(int fd, __u64 *data_version, __u64 flags);
+extern int llapi_hsm_state_get(const char *path, struct hsm_user_state *hus);
+extern int llapi_hsm_state_set(const char *path, __u64 setmask, __u64 clearmask,
+			       __u32 archive_id);
+
+extern int llapi_create_volatile_idx(char *directory, int idx, int mode);
+static inline int llapi_create_volatile(char *directory, int mode)
+{
+	return llapi_create_volatile_idx(directory, -1, mode);
+}
+
+
+extern int llapi_fswap_layouts(const int fd1, const int fd2,
+			       __u64 dv1, __u64 dv2, __u64 flags);
+extern int llapi_swap_layouts(const char *path1, const char *path2,
+			      __u64 dv1, __u64 dv2, __u64 flags);
+
+/* Changelog interface.  priv is private state, managed internally
+   by these functions */
+#define CHANGELOG_FLAG_FOLLOW 0x01   /* Not yet implemented */
+#define CHANGELOG_FLAG_BLOCK  0x02   /* Blocking IO makes sense in case of
+   slow user parsing of the records, but it also prevents us from cleaning
+   up if the records are not consumed. */
+
+/* Records received are in extentded format now, though most of them are still
+ * written in disk in changelog_rec format (to save space and time), it's
+ * converted to extented format in the lustre api to ease changelog analysis. */
+#define HAVE_CHANGELOG_EXTEND_REC 1
+
+extern int llapi_changelog_start(void **priv, int flags, const char *mdtname,
+				 long long startrec);
+extern int llapi_changelog_fini(void **priv);
+extern int llapi_changelog_recv(void *priv, struct changelog_ext_rec **rech);
+extern int llapi_changelog_free(struct changelog_ext_rec **rech);
+/* Allow records up to endrec to be destroyed; requires registered id. */
+extern int llapi_changelog_clear(const char *mdtname, const char *idstr,
+				 long long endrec);
+
+/* HSM copytool interface.
+ * priv is private state, managed internally by these functions
+ */
+struct hsm_copytool_private;
+extern int llapi_hsm_copytool_start(struct hsm_copytool_private **priv,
+				    char *fsname, int flags,
+				    int archive_count, int *archives);
+extern int llapi_hsm_copytool_fini(struct hsm_copytool_private **priv);
+extern int llapi_hsm_copytool_recv(struct hsm_copytool_private *priv,
+				   struct hsm_action_list **hal, int *msgsize);
+extern int llapi_hsm_copytool_free(struct hsm_action_list **hal);
+extern int llapi_hsm_copy_start(char *mnt, struct hsm_copy *copy,
+				const struct hsm_action_item *hai);
+extern int llapi_hsm_copy_end(char *mnt, struct hsm_copy *copy,
+			      const struct hsm_progress *hp);
+extern int llapi_hsm_progress(char *mnt, struct hsm_progress *hp);
+extern int llapi_hsm_import(const char *dst, int archive, struct stat *st,
+			    unsigned long long stripe_size, int stripe_offset,
+			    int stripe_count, int stripe_pattern,
+			    char *pool_name, lustre_fid *newfid);
+
+/* HSM user interface */
+extern struct hsm_user_request *llapi_hsm_user_request_alloc(int itemcount,
+							     int data_len);
+extern int llapi_hsm_request(char *mnt, struct hsm_user_request *request);
+extern int llapi_hsm_current_action(const char *path,
+				    struct hsm_current_action *hca);
+/** @} llapi */
+
+#endif
diff --git a/drivers/staging/lustre/lustre/include/lustre_acl.h b/drivers/staging/lustre/lustre/include/lustre_acl.h
new file mode 100644
index 000000000000..5cfb87b180c3
--- /dev/null
+++ b/drivers/staging/lustre/lustre/include/lustre_acl.h
@@ -0,0 +1,42 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lustre/include/lustre_acl.h
+ */
+
+#ifndef _LUSTRE_ACL_H
+#define _LUSTRE_ACL_H
+
+#include <linux/lustre_acl.h>
+
+#endif
diff --git a/drivers/staging/lustre/lustre/include/lustre_capa.h b/drivers/staging/lustre/lustre/include/lustre_capa.h
new file mode 100644
index 000000000000..d77bffc0b59d
--- /dev/null
+++ b/drivers/staging/lustre/lustre/include/lustre_capa.h
@@ -0,0 +1,305 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lustre/include/lustre_capa.h
+ *
+ * Author: Lai Siyao <lsy@clusterfs.com>
+ */
+
+#ifndef __LINUX_CAPA_H_
+#define __LINUX_CAPA_H_
+
+/** \defgroup capa capa
+ *
+ * @{
+ */
+
+/*
+ * capability
+ */
+#include <linux/crypto.h>
+#include <lustre/lustre_idl.h>
+
+#define CAPA_TIMEOUT 1800		/* sec, == 30 min */
+#define CAPA_KEY_TIMEOUT (24 * 60 * 60)  /* sec, == 1 days */
+
+struct capa_hmac_alg {
+	const char     *ha_name;
+	int	     ha_len;
+	int	     ha_keylen;
+};
+
+#define DEF_CAPA_HMAC_ALG(name, type, len, keylen)      \
+[CAPA_HMAC_ALG_ ## type] = {			    \
+	.ha_name	 = name,			\
+	.ha_len	  = len,			 \
+	.ha_keylen       = keylen,		      \
+}
+
+struct client_capa {
+	struct inode	     *inode;
+	struct list_head		lli_list;     /* link to lli_oss_capas */
+};
+
+struct target_capa {
+	struct hlist_node	  c_hash;       /* link to capa hash */
+};
+
+struct obd_capa {
+	struct list_head		c_list;       /* link to capa_list */
+
+	struct lustre_capa	c_capa;       /* capa */
+	atomic_t	      c_refc;       /* ref count */
+	cfs_time_t		c_expiry;     /* jiffies */
+	spinlock_t		c_lock;	/* protect capa content */
+	int			c_site;
+
+	union {
+		struct client_capa	cli;
+		struct target_capa	tgt;
+	} u;
+};
+
+enum {
+	CAPA_SITE_CLIENT = 0,
+	CAPA_SITE_SERVER,
+	CAPA_SITE_MAX
+};
+
+static inline struct lu_fid *capa_fid(struct lustre_capa *capa)
+{
+	return &capa->lc_fid;
+}
+
+static inline __u64 capa_opc(struct lustre_capa *capa)
+{
+	return capa->lc_opc;
+}
+
+static inline __u64 capa_uid(struct lustre_capa *capa)
+{
+	return capa->lc_uid;
+}
+
+static inline __u64 capa_gid(struct lustre_capa *capa)
+{
+	return capa->lc_gid;
+}
+
+static inline __u32 capa_flags(struct lustre_capa *capa)
+{
+	return capa->lc_flags & 0xffffff;
+}
+
+static inline __u32 capa_alg(struct lustre_capa *capa)
+{
+	return (capa->lc_flags >> 24);
+}
+
+static inline __u32 capa_keyid(struct lustre_capa *capa)
+{
+	return capa->lc_keyid;
+}
+
+static inline __u64 capa_key_seq(struct lustre_capa_key *key)
+{
+	return key->lk_seq;
+}
+
+static inline __u32 capa_key_keyid(struct lustre_capa_key *key)
+{
+	return key->lk_keyid;
+}
+
+static inline __u32 capa_timeout(struct lustre_capa *capa)
+{
+	return capa->lc_timeout;
+}
+
+static inline __u32 capa_expiry(struct lustre_capa *capa)
+{
+	return capa->lc_expiry;
+}
+
+void _debug_capa(struct lustre_capa *, struct libcfs_debug_msg_data *,
+		 const char *fmt, ... );
+#define DEBUG_CAPA(level, capa, fmt, args...)				  \
+do {									   \
+	if (((level) & D_CANTMASK) != 0 ||				     \
+	    ((libcfs_debug & (level)) != 0 &&				  \
+	     (libcfs_subsystem_debug & DEBUG_SUBSYSTEM) != 0)) {	       \
+		LIBCFS_DEBUG_MSG_DATA_DECL(msgdata, level, NULL);	      \
+		_debug_capa((capa), &msgdata, fmt, ##args);		    \
+	}								      \
+} while (0)
+
+#define DEBUG_CAPA_KEY(level, k, fmt, args...)				 \
+do {									   \
+CDEBUG(level, fmt " capability key@%p seq "LPU64" keyid %u\n",		 \
+       ##args, k, capa_key_seq(k), capa_key_keyid(k));			 \
+} while (0)
+
+typedef int (* renew_capa_cb_t)(struct obd_capa *, struct lustre_capa *);
+
+/* obdclass/capa.c */
+extern struct list_head capa_list[];
+extern spinlock_t capa_lock;
+extern int capa_count[];
+extern struct kmem_cache *capa_cachep;
+
+struct hlist_head *init_capa_hash(void);
+void cleanup_capa_hash(struct hlist_head *hash);
+
+struct obd_capa *capa_add(struct hlist_head *hash,
+			  struct lustre_capa *capa);
+struct obd_capa *capa_lookup(struct hlist_head *hash,
+			     struct lustre_capa *capa, int alive);
+
+int capa_hmac(__u8 *hmac, struct lustre_capa *capa, __u8 *key);
+int capa_encrypt_id(__u32 *d, __u32 *s, __u8 *key, int keylen);
+int capa_decrypt_id(__u32 *d, __u32 *s, __u8 *key, int keylen);
+void capa_cpy(void *dst, struct obd_capa *ocapa);
+static inline struct obd_capa *alloc_capa(int site)
+{
+	struct obd_capa *ocapa;
+
+	if (unlikely(site != CAPA_SITE_CLIENT && site != CAPA_SITE_SERVER))
+		return ERR_PTR(-EINVAL);
+
+	OBD_SLAB_ALLOC_PTR(ocapa, capa_cachep);
+	if (unlikely(!ocapa))
+		return ERR_PTR(-ENOMEM);
+
+	INIT_LIST_HEAD(&ocapa->c_list);
+	atomic_set(&ocapa->c_refc, 1);
+	spin_lock_init(&ocapa->c_lock);
+	ocapa->c_site = site;
+	if (ocapa->c_site == CAPA_SITE_CLIENT)
+		INIT_LIST_HEAD(&ocapa->u.cli.lli_list);
+	else
+		INIT_HLIST_NODE(&ocapa->u.tgt.c_hash);
+
+	return ocapa;
+}
+
+static inline struct obd_capa *capa_get(struct obd_capa *ocapa)
+{
+	if (!ocapa)
+		return NULL;
+
+	atomic_inc(&ocapa->c_refc);
+	return ocapa;
+}
+
+static inline void capa_put(struct obd_capa *ocapa)
+{
+	if (!ocapa)
+		return;
+
+	if (atomic_read(&ocapa->c_refc) == 0) {
+		DEBUG_CAPA(D_ERROR, &ocapa->c_capa, "refc is 0 for");
+		LBUG();
+	}
+
+	if (atomic_dec_and_test(&ocapa->c_refc)) {
+		LASSERT(list_empty(&ocapa->c_list));
+		if (ocapa->c_site == CAPA_SITE_CLIENT) {
+			LASSERT(list_empty(&ocapa->u.cli.lli_list));
+		} else {
+			struct hlist_node *hnode;
+
+			hnode = &ocapa->u.tgt.c_hash;
+			LASSERT(!hnode->next && !hnode->pprev);
+		}
+		OBD_SLAB_FREE(ocapa, capa_cachep, sizeof(*ocapa));
+	}
+}
+
+static inline int open_flags_to_accmode(int flags)
+{
+	int mode = flags;
+
+	if ((mode + 1) & O_ACCMODE)
+		mode++;
+	if (mode & O_TRUNC)
+		mode |= 2;
+
+	return mode;
+}
+
+static inline __u64 capa_open_opc(int mode)
+{
+	return mode & FMODE_WRITE ? CAPA_OPC_OSS_WRITE : CAPA_OPC_OSS_READ;
+}
+
+static inline void set_capa_expiry(struct obd_capa *ocapa)
+{
+	cfs_time_t expiry = cfs_time_sub((cfs_time_t)ocapa->c_capa.lc_expiry,
+					 cfs_time_current_sec());
+	ocapa->c_expiry = cfs_time_add(cfs_time_current(),
+				       cfs_time_seconds(expiry));
+}
+
+static inline int capa_is_expired_sec(struct lustre_capa *capa)
+{
+	return (capa->lc_expiry - cfs_time_current_sec() <= 0);
+}
+
+static inline int capa_is_expired(struct obd_capa *ocapa)
+{
+	return cfs_time_beforeq(ocapa->c_expiry, cfs_time_current());
+}
+
+static inline int capa_opc_supported(struct lustre_capa *capa, __u64 opc)
+{
+	return (capa_opc(capa) & opc) == opc;
+}
+
+struct filter_capa_key {
+	struct list_head	      k_list;
+	struct lustre_capa_key  k_key;
+};
+
+enum {
+	LC_ID_NONE      = 0,
+	LC_ID_PLAIN     = 1,
+	LC_ID_CONVERT   = 2
+};
+
+#define BYPASS_CAPA (struct lustre_capa *)ERR_PTR(-ENOENT)
+
+/** @} capa */
+
+#endif /* __LINUX_CAPA_H_ */
diff --git a/drivers/staging/lustre/lustre/include/lustre_cfg.h b/drivers/staging/lustre/lustre/include/lustre_cfg.h
new file mode 100644
index 000000000000..f12429f38215
--- /dev/null
+++ b/drivers/staging/lustre/lustre/include/lustre_cfg.h
@@ -0,0 +1,299 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ */
+
+#ifndef _LUSTRE_CFG_H
+#define _LUSTRE_CFG_H
+
+/** \defgroup cfg cfg
+ *
+ * @{
+ */
+
+/*
+ * 1cf6
+ * lcfG
+ */
+#define LUSTRE_CFG_VERSION 0x1cf60001
+#define LUSTRE_CFG_MAX_BUFCOUNT 8
+
+#define LCFG_HDR_SIZE(count) \
+    cfs_size_round(offsetof (struct lustre_cfg, lcfg_buflens[(count)]))
+
+/** If the LCFG_REQUIRED bit is set in a configuration command,
+ * then the client is required to understand this parameter
+ * in order to mount the filesystem. If it does not understand
+ * a REQUIRED command the client mount will fail. */
+#define LCFG_REQUIRED	 0x0001000
+
+enum lcfg_command_type {
+	LCFG_ATTACH	     = 0x00cf001, /**< create a new obd instance */
+	LCFG_DETACH	     = 0x00cf002, /**< destroy obd instance */
+	LCFG_SETUP	      = 0x00cf003, /**< call type-specific setup */
+	LCFG_CLEANUP	    = 0x00cf004, /**< call type-specific cleanup */
+	LCFG_ADD_UUID	   = 0x00cf005, /**< add a nid to a niduuid */
+	LCFG_DEL_UUID	   = 0x00cf006, /**< remove a nid from a niduuid */
+	LCFG_MOUNTOPT	   = 0x00cf007, /**< create a profile (mdc, osc) */
+	LCFG_DEL_MOUNTOPT       = 0x00cf008, /**< destroy a profile */
+	LCFG_SET_TIMEOUT	= 0x00cf009, /**< set obd_timeout */
+	LCFG_SET_UPCALL	 = 0x00cf00a, /**< deprecated */
+	LCFG_ADD_CONN	   = 0x00cf00b, /**< add a failover niduuid to an obd */
+	LCFG_DEL_CONN	   = 0x00cf00c, /**< remove a failover niduuid */
+	LCFG_LOV_ADD_OBD	= 0x00cf00d, /**< add an osc to a lov */
+	LCFG_LOV_DEL_OBD	= 0x00cf00e, /**< remove an osc from a lov */
+	LCFG_PARAM	      = 0x00cf00f, /**< set a proc parameter */
+	LCFG_MARKER	     = 0x00cf010, /**< metadata about next cfg rec */
+	LCFG_LOG_START	  = 0x00ce011, /**< mgc only, process a cfg log */
+	LCFG_LOG_END	    = 0x00ce012, /**< stop processing updates */
+	LCFG_LOV_ADD_INA	= 0x00ce013, /**< like LOV_ADD_OBD, inactive */
+	LCFG_ADD_MDC	    = 0x00cf014, /**< add an mdc to a lmv */
+	LCFG_DEL_MDC	    = 0x00cf015, /**< remove an mdc from a lmv */
+	LCFG_SPTLRPC_CONF       = 0x00ce016, /**< security */
+	LCFG_POOL_NEW	   = 0x00ce020, /**< create an ost pool name */
+	LCFG_POOL_ADD	   = 0x00ce021, /**< add an ost to a pool */
+	LCFG_POOL_REM	   = 0x00ce022, /**< remove an ost from a pool */
+	LCFG_POOL_DEL	   = 0x00ce023, /**< destroy an ost pool name */
+	LCFG_SET_LDLM_TIMEOUT   = 0x00ce030, /**< set ldlm_timeout */
+	LCFG_PRE_CLEANUP	= 0x00cf031, /**< call type-specific pre
+					      * cleanup cleanup */
+};
+
+struct lustre_cfg_bufs {
+	void    *lcfg_buf[LUSTRE_CFG_MAX_BUFCOUNT];
+	__u32    lcfg_buflen[LUSTRE_CFG_MAX_BUFCOUNT];
+	__u32    lcfg_bufcount;
+};
+
+struct lustre_cfg {
+	__u32 lcfg_version;
+	__u32 lcfg_command;
+
+	__u32 lcfg_num;
+	__u32 lcfg_flags;
+	__u64 lcfg_nid;
+	__u32 lcfg_nal;		/* not used any more */
+
+	__u32 lcfg_bufcount;
+	__u32 lcfg_buflens[0];
+};
+
+enum cfg_record_type {
+	PORTALS_CFG_TYPE = 1,
+	LUSTRE_CFG_TYPE = 123,
+};
+
+#define LUSTRE_CFG_BUFLEN(lcfg, idx)	    \
+	((lcfg)->lcfg_bufcount <= (idx)	 \
+	 ? 0				    \
+	 : (lcfg)->lcfg_buflens[(idx)])
+
+static inline void lustre_cfg_bufs_set(struct lustre_cfg_bufs *bufs,
+				       __u32		   index,
+				       void		   *buf,
+				       __u32		   buflen)
+{
+	if (index >= LUSTRE_CFG_MAX_BUFCOUNT)
+		return;
+	if (bufs == NULL)
+		return;
+
+	if (bufs->lcfg_bufcount <= index)
+		bufs->lcfg_bufcount = index + 1;
+
+	bufs->lcfg_buf[index]    = buf;
+	bufs->lcfg_buflen[index] = buflen;
+}
+
+static inline void lustre_cfg_bufs_set_string(struct lustre_cfg_bufs *bufs,
+					      __u32 index,
+					      char *str)
+{
+	lustre_cfg_bufs_set(bufs, index, str, str ? strlen(str) + 1 : 0);
+}
+
+static inline void lustre_cfg_bufs_reset(struct lustre_cfg_bufs *bufs, char *name)
+{
+	memset((bufs), 0, sizeof(*bufs));
+	if (name)
+		lustre_cfg_bufs_set_string(bufs, 0, name);
+}
+
+static inline void *lustre_cfg_buf(struct lustre_cfg *lcfg, int index)
+{
+	int i;
+	int offset;
+	int bufcount;
+	LASSERT (lcfg != NULL);
+	LASSERT (index >= 0);
+
+	bufcount = lcfg->lcfg_bufcount;
+	if (index >= bufcount)
+		return NULL;
+
+	offset = LCFG_HDR_SIZE(lcfg->lcfg_bufcount);
+	for (i = 0; i < index; i++)
+		offset += cfs_size_round(lcfg->lcfg_buflens[i]);
+	return (char *)lcfg + offset;
+}
+
+static inline void lustre_cfg_bufs_init(struct lustre_cfg_bufs *bufs,
+					struct lustre_cfg *lcfg)
+{
+	int i;
+	bufs->lcfg_bufcount = lcfg->lcfg_bufcount;
+	for (i = 0; i < bufs->lcfg_bufcount; i++) {
+		bufs->lcfg_buflen[i] = lcfg->lcfg_buflens[i];
+		bufs->lcfg_buf[i] = lustre_cfg_buf(lcfg, i);
+	}
+}
+
+static inline char *lustre_cfg_string(struct lustre_cfg *lcfg, int index)
+{
+	char *s;
+
+	if (lcfg->lcfg_buflens[index] == 0)
+		return NULL;
+
+	s = lustre_cfg_buf(lcfg, index);
+	if (s == NULL)
+		return NULL;
+
+	/*
+	 * make sure it's NULL terminated, even if this kills a char
+	 * of data.  Try to use the padding first though.
+	 */
+	if (s[lcfg->lcfg_buflens[index] - 1] != '\0') {
+		int last = min((int)lcfg->lcfg_buflens[index],
+			       cfs_size_round(lcfg->lcfg_buflens[index]) - 1);
+		char lost = s[last];
+		s[last] = '\0';
+		if (lost != '\0') {
+			CWARN("Truncated buf %d to '%s' (lost '%c'...)\n",
+			      index, s, lost);
+		}
+	}
+	return s;
+}
+
+static inline int lustre_cfg_len(__u32 bufcount, __u32 *buflens)
+{
+	int i;
+	int len;
+	ENTRY;
+
+	len = LCFG_HDR_SIZE(bufcount);
+	for (i = 0; i < bufcount; i++)
+		len += cfs_size_round(buflens[i]);
+
+	RETURN(cfs_size_round(len));
+}
+
+
+#include <obd_support.h>
+
+static inline struct lustre_cfg *lustre_cfg_new(int cmd,
+						struct lustre_cfg_bufs *bufs)
+{
+	struct lustre_cfg *lcfg;
+	char *ptr;
+	int i;
+
+	ENTRY;
+
+	OBD_ALLOC(lcfg, lustre_cfg_len(bufs->lcfg_bufcount,
+				       bufs->lcfg_buflen));
+	if (!lcfg)
+		RETURN(ERR_PTR(-ENOMEM));
+
+	lcfg->lcfg_version = LUSTRE_CFG_VERSION;
+	lcfg->lcfg_command = cmd;
+	lcfg->lcfg_bufcount = bufs->lcfg_bufcount;
+
+	ptr = (char *)lcfg + LCFG_HDR_SIZE(lcfg->lcfg_bufcount);
+	for (i = 0; i < lcfg->lcfg_bufcount; i++) {
+		lcfg->lcfg_buflens[i] = bufs->lcfg_buflen[i];
+		LOGL((char *)bufs->lcfg_buf[i], bufs->lcfg_buflen[i], ptr);
+	}
+	RETURN(lcfg);
+}
+
+static inline void lustre_cfg_free(struct lustre_cfg *lcfg)
+{
+	int len;
+
+	len = lustre_cfg_len(lcfg->lcfg_bufcount, lcfg->lcfg_buflens);
+
+	OBD_FREE(lcfg, len);
+	EXIT;
+	return;
+}
+
+static inline int lustre_cfg_sanity_check(void *buf, int len)
+{
+	struct lustre_cfg *lcfg = (struct lustre_cfg *)buf;
+	ENTRY;
+	if (!lcfg)
+		RETURN(-EINVAL);
+
+	/* check that the first bits of the struct are valid */
+	if (len < LCFG_HDR_SIZE(0))
+		RETURN(-EINVAL);
+
+	if (lcfg->lcfg_version != LUSTRE_CFG_VERSION)
+		RETURN(-EINVAL);
+
+	if (lcfg->lcfg_bufcount >= LUSTRE_CFG_MAX_BUFCOUNT)
+		RETURN(-EINVAL);
+
+	/* check that the buflens are valid */
+	if (len < LCFG_HDR_SIZE(lcfg->lcfg_bufcount))
+		RETURN(-EINVAL);
+
+	/* make sure all the pointers point inside the data */
+	if (len < lustre_cfg_len(lcfg->lcfg_bufcount, lcfg->lcfg_buflens))
+		RETURN(-EINVAL);
+
+	RETURN(0);
+}
+
+#include <lustre/lustre_user.h>
+
+#ifndef INVALID_UID
+#define INVALID_UID     (-1)
+#endif
+
+/** @} cfg */
+
+#endif // _LUSTRE_CFG_H
diff --git a/drivers/staging/lustre/lustre/include/lustre_debug.h b/drivers/staging/lustre/lustre/include/lustre_debug.h
new file mode 100644
index 000000000000..3d9e4462af43
--- /dev/null
+++ b/drivers/staging/lustre/lustre/include/lustre_debug.h
@@ -0,0 +1,76 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ */
+
+#ifndef _LUSTRE_DEBUG_H
+#define _LUSTRE_DEBUG_H
+
+/** \defgroup debug debug
+ *
+ * @{
+ */
+
+#include <lustre_net.h>
+#include <obd.h>
+
+#include <linux/lustre_debug.h>
+
+#define ASSERT_MAX_SIZE_MB 60000ULL
+#define ASSERT_PAGE_INDEX(index, OP)				    \
+do { if (index > ASSERT_MAX_SIZE_MB << (20 - PAGE_CACHE_SHIFT)) {	 \
+	CERROR("bad page index %lu > %llu\n", index,		    \
+	       ASSERT_MAX_SIZE_MB << (20 - PAGE_CACHE_SHIFT));	    \
+	libcfs_debug = ~0UL;					    \
+	OP;							     \
+}} while(0)
+
+#define ASSERT_FILE_OFFSET(offset, OP)				  \
+do { if (offset > ASSERT_MAX_SIZE_MB << 20) {			   \
+	CERROR("bad file offset %llu > %llu\n", offset,		 \
+	       ASSERT_MAX_SIZE_MB << 20);			       \
+	libcfs_debug = ~0UL;					    \
+	OP;							     \
+}} while(0)
+
+/* lib/debug.c */
+void dump_lniobuf(struct niobuf_local *lnb);
+int dump_req(struct ptlrpc_request *req);
+void dump_lsm(int level, struct lov_stripe_md *lsm);
+int block_debug_setup(void *addr, int len, __u64 off, __u64 id);
+int block_debug_check(char *who, void *addr, int len, __u64 off, __u64 id);
+
+/** @} debug */
+
+#endif
diff --git a/drivers/staging/lustre/lustre/include/lustre_disk.h b/drivers/staging/lustre/lustre/include/lustre_disk.h
new file mode 100644
index 000000000000..8db6086ea4ea
--- /dev/null
+++ b/drivers/staging/lustre/lustre/include/lustre_disk.h
@@ -0,0 +1,543 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lustre/include/lustre_disk.h
+ *
+ * Lustre disk format definitions.
+ *
+ * Author: Nathan Rutman <nathan@clusterfs.com>
+ */
+
+#ifndef _LUSTRE_DISK_H
+#define _LUSTRE_DISK_H
+
+/** \defgroup disk disk
+ *
+ * @{
+ */
+
+#include <linux/libcfs/libcfs.h>
+#include <linux/lnet/types.h>
+
+/****************** on-disk files *********************/
+
+#define MDT_LOGS_DIR      "LOGS"  /* COMPAT_146 */
+#define MOUNT_CONFIGS_DIR "CONFIGS"
+#define CONFIGS_FILE      "mountdata"
+/** Persistent mount data are stored on the disk in this file. */
+#define MOUNT_DATA_FILE    MOUNT_CONFIGS_DIR"/"CONFIGS_FILE
+#define LAST_RCVD	 "last_rcvd"
+#define LOV_OBJID	 "lov_objid"
+#define LOV_OBJSEQ		"lov_objseq"
+#define HEALTH_CHECK      "health_check"
+#define CAPA_KEYS	 "capa_keys"
+#define CHANGELOG_USERS   "changelog_users"
+#define MGS_NIDTBL_DIR    "NIDTBL_VERSIONS"
+#define QMT_DIR	   "quota_master"
+#define QSD_DIR	   "quota_slave"
+
+/****************** persistent mount data *********************/
+
+#define LDD_F_SV_TYPE_MDT   0x0001
+#define LDD_F_SV_TYPE_OST   0x0002
+#define LDD_F_SV_TYPE_MGS   0x0004
+#define LDD_F_SV_TYPE_MASK (LDD_F_SV_TYPE_MDT  | \
+			    LDD_F_SV_TYPE_OST  | \
+			    LDD_F_SV_TYPE_MGS)
+#define LDD_F_SV_ALL	0x0008
+/** need an index assignment */
+#define LDD_F_NEED_INDEX    0x0010
+/** never registered */
+#define LDD_F_VIRGIN	0x0020
+/** update the config logs for this server */
+#define LDD_F_UPDATE	0x0040
+/** rewrite the LDD */
+#define LDD_F_REWRITE_LDD   0x0080
+/** regenerate config logs for this fs or server */
+#define LDD_F_WRITECONF     0x0100
+/** COMPAT_14 */
+#define LDD_F_UPGRADE14     0x0200
+/** process as lctl conf_param */
+#define LDD_F_PARAM	 0x0400
+/** all nodes are specified as service nodes */
+#define LDD_F_NO_PRIMNODE   0x1000
+/** IR enable flag */
+#define LDD_F_IR_CAPABLE    0x2000
+/** the MGS refused to register the target. */
+#define LDD_F_ERROR	 0x4000
+
+/* opc for target register */
+#define LDD_F_OPC_REG   0x10000000
+#define LDD_F_OPC_UNREG 0x20000000
+#define LDD_F_OPC_READY 0x40000000
+#define LDD_F_OPC_MASK  0xf0000000
+
+#define LDD_F_ONDISK_MASK  (LDD_F_SV_TYPE_MASK)
+
+#define LDD_F_MASK	  0xFFFF
+
+enum ldd_mount_type {
+	LDD_MT_EXT3 = 0,
+	LDD_MT_LDISKFS,
+	LDD_MT_SMFS,
+	LDD_MT_REISERFS,
+	LDD_MT_LDISKFS2,
+	LDD_MT_ZFS,
+	LDD_MT_LAST
+};
+
+static inline char *mt_str(enum ldd_mount_type mt)
+{
+	static char *mount_type_string[] = {
+		"ext3",
+		"ldiskfs",
+		"smfs",
+		"reiserfs",
+		"ldiskfs2",
+		"zfs",
+	};
+	return mount_type_string[mt];
+}
+
+static inline char *mt_type(enum ldd_mount_type mt)
+{
+	static char *mount_type_string[] = {
+		"osd-ldiskfs",
+		"osd-ldiskfs",
+		"osd-smfs",
+		"osd-reiserfs",
+		"osd-ldiskfs",
+		"osd-zfs",
+	};
+	return mount_type_string[mt];
+}
+
+#define LDD_INCOMPAT_SUPP 0
+#define LDD_ROCOMPAT_SUPP 0
+
+#define LDD_MAGIC 0x1dd00001
+
+/* On-disk configuration file. In host-endian order. */
+struct lustre_disk_data {
+	__u32      ldd_magic;
+	__u32      ldd_feature_compat;  /* compatible feature flags */
+	__u32      ldd_feature_rocompat;/* read-only compatible feature flags */
+	__u32      ldd_feature_incompat;/* incompatible feature flags */
+
+	__u32      ldd_config_ver;      /* config rewrite count - not used */
+	__u32      ldd_flags;	   /* LDD_SV_TYPE */
+	__u32      ldd_svindex;	 /* server index (0001), must match
+					   svname */
+	__u32      ldd_mount_type;      /* target fs type LDD_MT_* */
+	char       ldd_fsname[64];      /* filesystem this server is part of,
+					   MTI_NAME_MAXLEN */
+	char       ldd_svname[64];      /* this server's name (lustre-mdt0001)*/
+	__u8       ldd_uuid[40];	/* server UUID (COMPAT_146) */
+
+/*200*/ char       ldd_userdata[1024 - 200]; /* arbitrary user string */
+/*1024*/__u8       ldd_padding[4096 - 1024];
+/*4096*/char       ldd_mount_opts[4096]; /* target fs mount opts */
+/*8192*/char       ldd_params[4096];     /* key=value pairs */
+};
+
+
+#define IS_MDT(data)    ((data)->lsi_flags & LDD_F_SV_TYPE_MDT)
+#define IS_OST(data)    ((data)->lsi_flags & LDD_F_SV_TYPE_OST)
+#define IS_MGS(data)    ((data)->lsi_flags & LDD_F_SV_TYPE_MGS)
+#define IS_SERVER(data) ((data)->lsi_flags & (LDD_F_SV_TYPE_MGS | \
+			 LDD_F_SV_TYPE_MDT | LDD_F_SV_TYPE_OST))
+#define MT_STR(data)    mt_str((data)->ldd_mount_type)
+
+/* Make the mdt/ost server obd name based on the filesystem name */
+static inline int server_make_name(__u32 flags, __u16 index, char *fs,
+				   char *name)
+{
+	if (flags & (LDD_F_SV_TYPE_MDT | LDD_F_SV_TYPE_OST)) {
+		if (!(flags & LDD_F_SV_ALL))
+			sprintf(name, "%.8s%c%s%04x", fs,
+				(flags & LDD_F_VIRGIN) ? ':' :
+					((flags & LDD_F_WRITECONF) ? '=' : '-'),
+				(flags & LDD_F_SV_TYPE_MDT) ? "MDT" : "OST",
+				index);
+	} else if (flags & LDD_F_SV_TYPE_MGS) {
+		sprintf(name, "MGS");
+	} else {
+		CERROR("unknown server type %#x\n", flags);
+		return 1;
+	}
+	return 0;
+}
+
+/****************** mount command *********************/
+
+/* The lmd is only used internally by Lustre; mount simply passes
+   everything as string options */
+
+#define LMD_MAGIC    0xbdacbd03
+
+/* gleaned from the mount command - no persistent info here */
+struct lustre_mount_data {
+	__u32      lmd_magic;
+	__u32      lmd_flags;	 /* lustre mount flags */
+	int	lmd_mgs_failnodes; /* mgs failover node count */
+	int	lmd_exclude_count;
+	int	lmd_recovery_time_soft;
+	int	lmd_recovery_time_hard;
+	char      *lmd_dev;	   /* device name */
+	char      *lmd_profile;       /* client only */
+	char      *lmd_mgssec;	/* sptlrpc flavor to mgs */
+	char      *lmd_opts;	  /* lustre mount options (as opposed to
+					 _device_ mount options) */
+	char      *lmd_params;	/* lustre params */
+	__u32     *lmd_exclude;       /* array of OSTs to ignore */
+	char	*lmd_mgs;	   /* MGS nid */
+	char	*lmd_osd_type;      /* OSD type */
+};
+
+#define LMD_FLG_SERVER       0x0001  /* Mounting a server */
+#define LMD_FLG_CLIENT       0x0002  /* Mounting a client */
+#define LMD_FLG_ABORT_RECOV  0x0008  /* Abort recovery */
+#define LMD_FLG_NOSVC	0x0010  /* Only start MGS/MGC for servers,
+					no other services */
+#define LMD_FLG_NOMGS	0x0020  /* Only start target for servers, reusing
+					existing MGS services */
+#define LMD_FLG_WRITECONF    0x0040  /* Rewrite config log */
+#define LMD_FLG_NOIR	 0x0080  /* NO imperative recovery */
+#define LMD_FLG_NOSCRUB	     0x0100  /* Do not trigger scrub automatically */
+#define LMD_FLG_MGS	     0x0200  /* Also start MGS along with server */
+#define LMD_FLG_IAM	     0x0400  /* IAM dir */
+#define LMD_FLG_NO_PRIMNODE  0x0800  /* all nodes are service nodes */
+#define LMD_FLG_VIRGIN	     0x1000  /* the service registers first time */
+#define LMD_FLG_UPDATE	     0x2000  /* update parameters */
+
+#define lmd_is_client(x) ((x)->lmd_flags & LMD_FLG_CLIENT)
+
+
+/****************** last_rcvd file *********************/
+
+/** version recovery epoch */
+#define LR_EPOCH_BITS   32
+#define lr_epoch(a) ((a) >> LR_EPOCH_BITS)
+#define LR_EXPIRE_INTERVALS 16 /**< number of intervals to track transno */
+#define ENOENT_VERSION 1 /** 'virtual' version of non-existent object */
+
+#define LR_SERVER_SIZE   512
+#define LR_CLIENT_START 8192
+#define LR_CLIENT_SIZE   128
+#if LR_CLIENT_START < LR_SERVER_SIZE
+#error "Can't have LR_CLIENT_START < LR_SERVER_SIZE"
+#endif
+
+/*
+ * This limit is arbitrary (131072 clients on x86), but it is convenient to use
+ * 2^n * PAGE_CACHE_SIZE * 8 for the number of bits that fit an order-n allocation.
+ * If we need more than 131072 clients (order-2 allocation on x86) then this
+ * should become an array of single-page pointers that are allocated on demand.
+ */
+#if (128 * 1024UL) > (PAGE_CACHE_SIZE * 8)
+#define LR_MAX_CLIENTS (128 * 1024UL)
+#else
+#define LR_MAX_CLIENTS (PAGE_CACHE_SIZE * 8)
+#endif
+
+/** COMPAT_146: this is an OST (temporary) */
+#define OBD_COMPAT_OST	  0x00000002
+/** COMPAT_146: this is an MDT (temporary) */
+#define OBD_COMPAT_MDT	  0x00000004
+/** 2.0 server, interop flag to show server version is changed */
+#define OBD_COMPAT_20	   0x00000008
+
+/** MDS handles LOV_OBJID file */
+#define OBD_ROCOMPAT_LOVOBJID   0x00000001
+
+/** OST handles group subdirs */
+#define OBD_INCOMPAT_GROUPS     0x00000001
+/** this is an OST */
+#define OBD_INCOMPAT_OST	0x00000002
+/** this is an MDT */
+#define OBD_INCOMPAT_MDT	0x00000004
+/** common last_rvcd format */
+#define OBD_INCOMPAT_COMMON_LR  0x00000008
+/** FID is enabled */
+#define OBD_INCOMPAT_FID	0x00000010
+/** Size-on-MDS is enabled */
+#define OBD_INCOMPAT_SOM	0x00000020
+/** filesystem using iam format to store directory entries */
+#define OBD_INCOMPAT_IAM_DIR    0x00000040
+/** LMA attribute contains per-inode incompatible flags */
+#define OBD_INCOMPAT_LMA	0x00000080
+/** lmm_stripe_count has been shrunk from __u32 to __u16 and the remaining 16
+ * bits are now used to store a generation. Once we start changing the layout
+ * and bumping the generation, old versions expecting a 32-bit lmm_stripe_count
+ * will be confused by interpreting stripe_count | gen << 16 as the actual
+ * stripe count */
+#define OBD_INCOMPAT_LMM_VER    0x00000100
+/** multiple OI files for MDT */
+#define OBD_INCOMPAT_MULTI_OI   0x00000200
+
+/* Data stored per server at the head of the last_rcvd file.  In le32 order.
+   This should be common to filter_internal.h, lustre_mds.h */
+struct lr_server_data {
+	__u8  lsd_uuid[40];	/* server UUID */
+	__u64 lsd_last_transno;    /* last completed transaction ID */
+	__u64 lsd_compat14;	/* reserved - compat with old last_rcvd */
+	__u64 lsd_mount_count;     /* incarnation number */
+	__u32 lsd_feature_compat;  /* compatible feature flags */
+	__u32 lsd_feature_rocompat;/* read-only compatible feature flags */
+	__u32 lsd_feature_incompat;/* incompatible feature flags */
+	__u32 lsd_server_size;     /* size of server data area */
+	__u32 lsd_client_start;    /* start of per-client data area */
+	__u16 lsd_client_size;     /* size of per-client data area */
+	__u16 lsd_subdir_count;    /* number of subdirectories for objects */
+	__u64 lsd_catalog_oid;     /* recovery catalog object id */
+	__u32 lsd_catalog_ogen;    /* recovery catalog inode generation */
+	__u8  lsd_peeruuid[40];    /* UUID of MDS associated with this OST */
+	__u32 lsd_osd_index;       /* index number of OST in LOV */
+	__u32 lsd_padding1;	/* was lsd_mdt_index, unused in 2.4.0 */
+	__u32 lsd_start_epoch;     /* VBR: start epoch from last boot */
+	/** transaction values since lsd_trans_table_time */
+	__u64 lsd_trans_table[LR_EXPIRE_INTERVALS];
+	/** start point of transno table below */
+	__u32 lsd_trans_table_time; /* time of first slot in table above */
+	__u32 lsd_expire_intervals; /* LR_EXPIRE_INTERVALS */
+	__u8  lsd_padding[LR_SERVER_SIZE - 288];
+};
+
+/* Data stored per client in the last_rcvd file.  In le32 order. */
+struct lsd_client_data {
+	__u8  lcd_uuid[40];      /* client UUID */
+	__u64 lcd_last_transno; /* last completed transaction ID */
+	__u64 lcd_last_xid;     /* xid for the last transaction */
+	__u32 lcd_last_result;  /* result from last RPC */
+	__u32 lcd_last_data;    /* per-op data (disposition for open &c.) */
+	/* for MDS_CLOSE requests */
+	__u64 lcd_last_close_transno; /* last completed transaction ID */
+	__u64 lcd_last_close_xid;     /* xid for the last transaction */
+	__u32 lcd_last_close_result;  /* result from last RPC */
+	__u32 lcd_last_close_data;    /* per-op data */
+	/* VBR: last versions */
+	__u64 lcd_pre_versions[4];
+	__u32 lcd_last_epoch;
+	/** orphans handling for delayed export rely on that */
+	__u32 lcd_first_epoch;
+	__u8  lcd_padding[LR_CLIENT_SIZE - 128];
+};
+
+/* bug20354: the lcd_uuid for export of clients may be wrong */
+static inline void check_lcd(char *obd_name, int index,
+			     struct lsd_client_data *lcd)
+{
+	int length = sizeof(lcd->lcd_uuid);
+	if (strnlen((char*)lcd->lcd_uuid, length) == length) {
+		lcd->lcd_uuid[length - 1] = '\0';
+
+		LCONSOLE_ERROR("the client UUID (%s) on %s for exports"
+			       "stored in last_rcvd(index = %d) is bad!\n",
+			       lcd->lcd_uuid, obd_name, index);
+	}
+}
+
+/* last_rcvd handling */
+static inline void lsd_le_to_cpu(struct lr_server_data *buf,
+				 struct lr_server_data *lsd)
+{
+	int i;
+	memcpy(lsd->lsd_uuid, buf->lsd_uuid, sizeof(lsd->lsd_uuid));
+	lsd->lsd_last_transno     = le64_to_cpu(buf->lsd_last_transno);
+	lsd->lsd_compat14	 = le64_to_cpu(buf->lsd_compat14);
+	lsd->lsd_mount_count      = le64_to_cpu(buf->lsd_mount_count);
+	lsd->lsd_feature_compat   = le32_to_cpu(buf->lsd_feature_compat);
+	lsd->lsd_feature_rocompat = le32_to_cpu(buf->lsd_feature_rocompat);
+	lsd->lsd_feature_incompat = le32_to_cpu(buf->lsd_feature_incompat);
+	lsd->lsd_server_size      = le32_to_cpu(buf->lsd_server_size);
+	lsd->lsd_client_start     = le32_to_cpu(buf->lsd_client_start);
+	lsd->lsd_client_size      = le16_to_cpu(buf->lsd_client_size);
+	lsd->lsd_subdir_count     = le16_to_cpu(buf->lsd_subdir_count);
+	lsd->lsd_catalog_oid      = le64_to_cpu(buf->lsd_catalog_oid);
+	lsd->lsd_catalog_ogen     = le32_to_cpu(buf->lsd_catalog_ogen);
+	memcpy(lsd->lsd_peeruuid, buf->lsd_peeruuid, sizeof(lsd->lsd_peeruuid));
+	lsd->lsd_osd_index	= le32_to_cpu(buf->lsd_osd_index);
+	lsd->lsd_padding1	= le32_to_cpu(buf->lsd_padding1);
+	lsd->lsd_start_epoch      = le32_to_cpu(buf->lsd_start_epoch);
+	for (i = 0; i < LR_EXPIRE_INTERVALS; i++)
+		lsd->lsd_trans_table[i] = le64_to_cpu(buf->lsd_trans_table[i]);
+	lsd->lsd_trans_table_time = le32_to_cpu(buf->lsd_trans_table_time);
+	lsd->lsd_expire_intervals = le32_to_cpu(buf->lsd_expire_intervals);
+}
+
+static inline void lsd_cpu_to_le(struct lr_server_data *lsd,
+				 struct lr_server_data *buf)
+{
+	int i;
+	memcpy(buf->lsd_uuid, lsd->lsd_uuid, sizeof(buf->lsd_uuid));
+	buf->lsd_last_transno     = cpu_to_le64(lsd->lsd_last_transno);
+	buf->lsd_compat14	 = cpu_to_le64(lsd->lsd_compat14);
+	buf->lsd_mount_count      = cpu_to_le64(lsd->lsd_mount_count);
+	buf->lsd_feature_compat   = cpu_to_le32(lsd->lsd_feature_compat);
+	buf->lsd_feature_rocompat = cpu_to_le32(lsd->lsd_feature_rocompat);
+	buf->lsd_feature_incompat = cpu_to_le32(lsd->lsd_feature_incompat);
+	buf->lsd_server_size      = cpu_to_le32(lsd->lsd_server_size);
+	buf->lsd_client_start     = cpu_to_le32(lsd->lsd_client_start);
+	buf->lsd_client_size      = cpu_to_le16(lsd->lsd_client_size);
+	buf->lsd_subdir_count     = cpu_to_le16(lsd->lsd_subdir_count);
+	buf->lsd_catalog_oid      = cpu_to_le64(lsd->lsd_catalog_oid);
+	buf->lsd_catalog_ogen     = cpu_to_le32(lsd->lsd_catalog_ogen);
+	memcpy(buf->lsd_peeruuid, lsd->lsd_peeruuid, sizeof(buf->lsd_peeruuid));
+	buf->lsd_osd_index	  = cpu_to_le32(lsd->lsd_osd_index);
+	buf->lsd_padding1	  = cpu_to_le32(lsd->lsd_padding1);
+	buf->lsd_start_epoch      = cpu_to_le32(lsd->lsd_start_epoch);
+	for (i = 0; i < LR_EXPIRE_INTERVALS; i++)
+		buf->lsd_trans_table[i] = cpu_to_le64(lsd->lsd_trans_table[i]);
+	buf->lsd_trans_table_time = cpu_to_le32(lsd->lsd_trans_table_time);
+	buf->lsd_expire_intervals = cpu_to_le32(lsd->lsd_expire_intervals);
+}
+
+static inline void lcd_le_to_cpu(struct lsd_client_data *buf,
+				 struct lsd_client_data *lcd)
+{
+	memcpy(lcd->lcd_uuid, buf->lcd_uuid, sizeof (lcd->lcd_uuid));
+	lcd->lcd_last_transno       = le64_to_cpu(buf->lcd_last_transno);
+	lcd->lcd_last_xid	   = le64_to_cpu(buf->lcd_last_xid);
+	lcd->lcd_last_result	= le32_to_cpu(buf->lcd_last_result);
+	lcd->lcd_last_data	  = le32_to_cpu(buf->lcd_last_data);
+	lcd->lcd_last_close_transno = le64_to_cpu(buf->lcd_last_close_transno);
+	lcd->lcd_last_close_xid     = le64_to_cpu(buf->lcd_last_close_xid);
+	lcd->lcd_last_close_result  = le32_to_cpu(buf->lcd_last_close_result);
+	lcd->lcd_last_close_data    = le32_to_cpu(buf->lcd_last_close_data);
+	lcd->lcd_pre_versions[0]    = le64_to_cpu(buf->lcd_pre_versions[0]);
+	lcd->lcd_pre_versions[1]    = le64_to_cpu(buf->lcd_pre_versions[1]);
+	lcd->lcd_pre_versions[2]    = le64_to_cpu(buf->lcd_pre_versions[2]);
+	lcd->lcd_pre_versions[3]    = le64_to_cpu(buf->lcd_pre_versions[3]);
+	lcd->lcd_last_epoch	 = le32_to_cpu(buf->lcd_last_epoch);
+	lcd->lcd_first_epoch	= le32_to_cpu(buf->lcd_first_epoch);
+}
+
+static inline void lcd_cpu_to_le(struct lsd_client_data *lcd,
+				 struct lsd_client_data *buf)
+{
+	memcpy(buf->lcd_uuid, lcd->lcd_uuid, sizeof (lcd->lcd_uuid));
+	buf->lcd_last_transno       = cpu_to_le64(lcd->lcd_last_transno);
+	buf->lcd_last_xid	   = cpu_to_le64(lcd->lcd_last_xid);
+	buf->lcd_last_result	= cpu_to_le32(lcd->lcd_last_result);
+	buf->lcd_last_data	  = cpu_to_le32(lcd->lcd_last_data);
+	buf->lcd_last_close_transno = cpu_to_le64(lcd->lcd_last_close_transno);
+	buf->lcd_last_close_xid     = cpu_to_le64(lcd->lcd_last_close_xid);
+	buf->lcd_last_close_result  = cpu_to_le32(lcd->lcd_last_close_result);
+	buf->lcd_last_close_data    = cpu_to_le32(lcd->lcd_last_close_data);
+	buf->lcd_pre_versions[0]    = cpu_to_le64(lcd->lcd_pre_versions[0]);
+	buf->lcd_pre_versions[1]    = cpu_to_le64(lcd->lcd_pre_versions[1]);
+	buf->lcd_pre_versions[2]    = cpu_to_le64(lcd->lcd_pre_versions[2]);
+	buf->lcd_pre_versions[3]    = cpu_to_le64(lcd->lcd_pre_versions[3]);
+	buf->lcd_last_epoch	 = cpu_to_le32(lcd->lcd_last_epoch);
+	buf->lcd_first_epoch	= cpu_to_le32(lcd->lcd_first_epoch);
+}
+
+static inline __u64 lcd_last_transno(struct lsd_client_data *lcd)
+{
+	return (lcd->lcd_last_transno > lcd->lcd_last_close_transno ?
+		lcd->lcd_last_transno : lcd->lcd_last_close_transno);
+}
+
+static inline __u64 lcd_last_xid(struct lsd_client_data *lcd)
+{
+	return (lcd->lcd_last_xid > lcd->lcd_last_close_xid ?
+		lcd->lcd_last_xid : lcd->lcd_last_close_xid);
+}
+
+/****************** superblock additional info *********************/
+
+struct ll_sb_info;
+
+struct lustre_sb_info {
+	int		       lsi_flags;
+	struct obd_device	*lsi_mgc;     /* mgc obd */
+	struct lustre_mount_data *lsi_lmd;     /* mount command info */
+	struct ll_sb_info	*lsi_llsbi;   /* add'l client sbi info */
+	struct dt_device	 *lsi_dt_dev;  /* dt device to access disk fs*/
+	struct vfsmount	  *lsi_srv_mnt; /* the one server mount */
+	atomic_t	      lsi_mounts;  /* references to the srv_mnt */
+	char			  lsi_svname[MTI_NAME_MAXLEN];
+	char			  lsi_osd_obdname[64];
+	char			  lsi_osd_uuid[64];
+	struct obd_export	 *lsi_osd_exp;
+	char			  lsi_osd_type[16];
+	char			  lsi_fstype[16];
+	struct backing_dev_info   lsi_bdi;     /* each client mountpoint needs
+						  own backing_dev_info */
+};
+
+#define LSI_UMOUNT_FAILOVER	      0x00200000
+#define LSI_BDI_INITIALIZED	      0x00400000
+
+#define     s2lsi(sb)	((struct lustre_sb_info *)((sb)->s_fs_info))
+#define     s2lsi_nocast(sb) ((sb)->s_fs_info)
+
+#define     get_profile_name(sb)   (s2lsi(sb)->lsi_lmd->lmd_profile)
+#define	    get_mount_flags(sb)	   (s2lsi(sb)->lsi_lmd->lmd_flags)
+#define	    get_mntdev_name(sb)	   (s2lsi(sb)->lsi_lmd->lmd_dev)
+
+
+/****************** mount lookup info *********************/
+
+struct lustre_mount_info {
+	char		 *lmi_name;
+	struct super_block   *lmi_sb;
+	struct vfsmount      *lmi_mnt;
+	struct list_head	    lmi_list_chain;
+};
+
+/****************** prototypes *********************/
+
+/* obd_mount.c */
+int server_name2fsname(const char *svname, char *fsname, const char **endptr);
+int server_name2index(const char *svname, __u32 *idx, const char **endptr);
+int server_name2svname(const char *label, char *svname, const char **endptr,
+		       size_t svsize);
+
+int lustre_put_lsi(struct super_block *sb);
+int lustre_start_simple(char *obdname, char *type, char *uuid,
+			char *s1, char *s2, char *s3, char *s4);
+int lustre_start_mgc(struct super_block *sb);
+void lustre_register_client_fill_super(int (*cfs)(struct super_block *sb,
+						  struct vfsmount *mnt));
+void lustre_register_kill_super_cb(void (*cfs)(struct super_block *sb));
+int lustre_common_put_super(struct super_block *sb);
+
+
+int mgc_fsname2resid(char *fsname, struct ldlm_res_id *res_id, int type);
+
+/** @} disk */
+
+#endif // _LUSTRE_DISK_H
diff --git a/drivers/staging/lustre/lustre/include/lustre_dlm.h b/drivers/staging/lustre/lustre/include/lustre_dlm.h
new file mode 100644
index 000000000000..317f928fc151
--- /dev/null
+++ b/drivers/staging/lustre/lustre/include/lustre_dlm.h
@@ -0,0 +1,1671 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2010, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ */
+
+/** \defgroup LDLM Lustre Distributed Lock Manager
+ *
+ * Lustre DLM is based on VAX DLM.
+ * Its two main roles are:
+ *   - To provide locking assuring consistency of data on all Lustre nodes.
+ *   - To allow clients to cache state protected by a lock by holding the
+ *     lock until a conflicting lock is requested or it is expired by the LRU.
+ *
+ * @{
+ */
+
+#ifndef _LUSTRE_DLM_H__
+#define _LUSTRE_DLM_H__
+
+#include <linux/lustre_dlm.h>
+
+#include <lustre_lib.h>
+#include <lustre_net.h>
+#include <lustre_import.h>
+#include <lustre_handles.h>
+#include <interval_tree.h> /* for interval_node{}, ldlm_extent */
+#include <lu_ref.h>
+
+struct obd_ops;
+struct obd_device;
+
+#define OBD_LDLM_DEVICENAME  "ldlm"
+
+#define LDLM_DEFAULT_LRU_SIZE (100 * num_online_cpus())
+#define LDLM_DEFAULT_MAX_ALIVE (cfs_time_seconds(36000))
+#define LDLM_CTIME_AGE_LIMIT (10)
+#define LDLM_DEFAULT_PARALLEL_AST_LIMIT 1024
+
+/**
+ * LDLM non-error return states
+ */
+typedef enum {
+	ELDLM_OK = 0,
+
+	ELDLM_LOCK_CHANGED = 300,
+	ELDLM_LOCK_ABORTED = 301,
+	ELDLM_LOCK_REPLACED = 302,
+	ELDLM_NO_LOCK_DATA = 303,
+	ELDLM_LOCK_WOULDBLOCK = 304,
+
+	ELDLM_NAMESPACE_EXISTS = 400,
+	ELDLM_BAD_NAMESPACE    = 401
+} ldlm_error_t;
+
+/**
+ * LDLM namespace type.
+ * The "client" type is actually an indication that this is a narrow local view
+ * into complete namespace on the server. Such namespaces cannot make any
+ * decisions about lack of conflicts or do any autonomous lock granting without
+ * first speaking to a server.
+ */
+typedef enum {
+	LDLM_NAMESPACE_SERVER = 1 << 0,
+	LDLM_NAMESPACE_CLIENT = 1 << 1
+} ldlm_side_t;
+
+/**
+ * Declaration of flags sent through the wire.
+ **/
+#define LDLM_FL_LOCK_CHANGED   0x000001 /* extent, mode, or resource changed */
+
+/**
+ * If the server returns one of these flags, then the lock was put on that list.
+ * If the client sends one of these flags (during recovery ONLY!), it wants the
+ * lock added to the specified list, no questions asked.
+ */
+#define LDLM_FL_BLOCK_GRANTED  0x000002
+#define LDLM_FL_BLOCK_CONV     0x000004
+#define LDLM_FL_BLOCK_WAIT     0x000008
+
+/* Used to be LDLM_FL_CBPENDING 0x000010 moved to non-wire flags */
+
+#define LDLM_FL_AST_SENT       0x000020 /* blocking or cancel packet was
+					 * queued for sending. */
+/* Used to be LDLM_FL_WAIT_NOREPROC 0x000040   moved to non-wire flags */
+/* Used to be LDLM_FL_CANCEL	0x000080   moved to non-wire flags */
+
+/**
+ * Lock is being replayed.  This could probably be implied by the fact that one
+ * of BLOCK_{GRANTED,CONV,WAIT} is set, but that is pretty dangerous.
+ */
+#define LDLM_FL_REPLAY	 0x000100
+
+#define LDLM_FL_INTENT_ONLY    0x000200 /* Don't grant lock, just do intent. */
+
+/* Used to be LDLM_FL_LOCAL_ONLY 0x000400  moved to non-wire flags */
+/* Used to be LDLM_FL_FAILED     0x000800  moved to non-wire flags */
+
+#define LDLM_FL_HAS_INTENT     0x001000 /* lock request has intent */
+
+/* Used to be LDLM_FL_CANCELING  0x002000  moved to non-wire flags */
+/* Used to be LDLM_FL_LOCAL      0x004000  moved to non-wire flags */
+
+#define LDLM_FL_DISCARD_DATA   0x010000 /* discard (no writeback) on cancel */
+
+#define LDLM_FL_NO_TIMEOUT     0x020000 /* Blocked by group lock - wait
+					 * indefinitely */
+
+/** file & record locking */
+#define LDLM_FL_BLOCK_NOWAIT   0x040000 /* Server told not to wait if blocked.
+					 * For AGL, OST will not send glimpse
+					 * callback. */
+#define LDLM_FL_TEST_LOCK      0x080000 // return blocking lock
+
+/* Used to be LDLM_FL_LVB_READY  0x100000 moved to non-wire flags */
+/* Used to be LDLM_FL_KMS_IGNORE 0x200000 moved to non-wire flags */
+/* Used to be LDLM_FL_NO_LRU     0x400000 moved to non-wire flags */
+
+/* Immediatelly cancel such locks when they block some other locks. Send
+ * cancel notification to original lock holder, but expect no reply. This is
+ * for clients (like liblustre) that cannot be expected to reliably response
+ * to blocking AST. */
+#define LDLM_FL_CANCEL_ON_BLOCK 0x800000
+
+/* Flags flags inherited from parent lock when doing intents. */
+#define LDLM_INHERIT_FLAGS     (LDLM_FL_CANCEL_ON_BLOCK)
+
+/* Used to be LDLM_FL_CP_REQD	0x1000000 moved to non-wire flags */
+/* Used to be LDLM_FL_CLEANED	0x2000000 moved to non-wire flags */
+/* Used to be LDLM_FL_ATOMIC_CB      0x4000000 moved to non-wire flags */
+/* Used to be LDLM_FL_BL_AST	 0x10000000 moved to non-wire flags */
+/* Used to be LDLM_FL_BL_DONE	0x20000000 moved to non-wire flags */
+
+/* measure lock contention and return -EUSERS if locking contention is high */
+#define LDLM_FL_DENY_ON_CONTENTION 0x40000000
+
+/* These are flags that are mapped into the flags and ASTs of blocking locks */
+#define LDLM_AST_DISCARD_DATA  0x80000000 /* Add FL_DISCARD to blocking ASTs */
+
+/* Flags sent in AST lock_flags to be mapped into the receiving lock. */
+#define LDLM_AST_FLAGS	 (LDLM_FL_DISCARD_DATA)
+
+/*
+ * --------------------------------------------------------------------------
+ * NOTE! Starting from this point, that is, LDLM_FL_* flags with values above
+ * 0x80000000 will not be sent over the wire.
+ * --------------------------------------------------------------------------
+ */
+
+/**
+ * Declaration of flags not sent through the wire.
+ **/
+
+/**
+ * Used for marking lock as a target for -EINTR while cp_ast sleep
+ * emulation + race with upcoming bl_ast.
+ */
+#define LDLM_FL_FAIL_LOC       0x100000000ULL
+
+/**
+ * Used while processing the unused list to know that we have already
+ * handled this lock and decided to skip it.
+ */
+#define LDLM_FL_SKIPPED	0x200000000ULL
+/* this lock is being destroyed */
+#define LDLM_FL_CBPENDING      0x400000000ULL
+/* not a real flag, not saved in lock */
+#define LDLM_FL_WAIT_NOREPROC  0x800000000ULL
+/* cancellation callback already run */
+#define LDLM_FL_CANCEL	 0x1000000000ULL
+#define LDLM_FL_LOCAL_ONLY     0x2000000000ULL
+/* don't run the cancel callback under ldlm_cli_cancel_unused */
+#define LDLM_FL_FAILED	 0x4000000000ULL
+/* lock cancel has already been sent */
+#define LDLM_FL_CANCELING      0x8000000000ULL
+/* local lock (ie, no srv/cli split) */
+#define LDLM_FL_LOCAL	  0x10000000000ULL
+/* XXX FIXME: This is being added to b_size as a low-risk fix to the fact that
+ * the LVB filling happens _after_ the lock has been granted, so another thread
+ * can match it before the LVB has been updated.  As a dirty hack, we set
+ * LDLM_FL_LVB_READY only after we've done the LVB poop.
+ * this is only needed on LOV/OSC now, where LVB is actually used and callers
+ * must set it in input flags.
+ *
+ * The proper fix is to do the granting inside of the completion AST, which can
+ * be replaced with a LVB-aware wrapping function for OSC locks.  That change is
+ * pretty high-risk, though, and would need a lot more testing. */
+#define LDLM_FL_LVB_READY      0x20000000000ULL
+/* A lock contributes to the known minimum size (KMS) calculation until it has
+ * finished the part of its cancelation that performs write back on its dirty
+ * pages.  It can remain on the granted list during this whole time.  Threads
+ * racing to update the KMS after performing their writeback need to know to
+ * exclude each other's locks from the calculation as they walk the granted
+ * list. */
+#define LDLM_FL_KMS_IGNORE     0x40000000000ULL
+/* completion AST to be executed */
+#define LDLM_FL_CP_REQD	0x80000000000ULL
+/* cleanup_resource has already handled the lock */
+#define LDLM_FL_CLEANED	0x100000000000ULL
+/* optimization hint: LDLM can run blocking callback from current context
+ * w/o involving separate thread. in order to decrease cs rate */
+#define LDLM_FL_ATOMIC_CB      0x200000000000ULL
+
+/* It may happen that a client initiates two operations, e.g. unlink and
+ * mkdir, such that the server sends a blocking AST for conflicting
+ * locks to this client for the first operation, whereas the second
+ * operation has canceled this lock and is waiting for rpc_lock which is
+ * taken by the first operation. LDLM_FL_BL_AST is set by
+ * ldlm_callback_handler() in the lock to prevent the Early Lock Cancel
+ * (ELC) code from cancelling it.
+ *
+ * LDLM_FL_BL_DONE is to be set by ldlm_cancel_callback() when lock
+ * cache is dropped to let ldlm_callback_handler() return EINVAL to the
+ * server. It is used when ELC RPC is already prepared and is waiting
+ * for rpc_lock, too late to send a separate CANCEL RPC. */
+#define LDLM_FL_BL_AST	  0x400000000000ULL
+#define LDLM_FL_BL_DONE	 0x800000000000ULL
+/* Don't put lock into the LRU list, so that it is not canceled due to aging.
+ * Used by MGC locks, they are cancelled only at unmount or by callback. */
+#define LDLM_FL_NO_LRU		0x1000000000000ULL
+
+/**
+ * The blocking callback is overloaded to perform two functions.  These flags
+ * indicate which operation should be performed.
+ */
+#define LDLM_CB_BLOCKING    1
+#define LDLM_CB_CANCELING   2
+
+/**
+ * \name Lock Compatibility Matrix.
+ *
+ * A lock has both a type (extent, flock, inode bits, or plain) and a mode.
+ * Lock types are described in their respective implementation files:
+ * ldlm_{extent,flock,inodebits,plain}.c.
+ *
+ * There are six lock modes along with a compatibility matrix to indicate if
+ * two locks are compatible.
+ *
+ * - EX: Exclusive mode. Before a new file is created, MDS requests EX lock
+ *   on the parent.
+ * - PW: Protective Write (normal write) mode. When a client requests a write
+ *   lock from an OST, a lock with PW mode will be issued.
+ * - PR: Protective Read (normal read) mode. When a client requests a read from
+ *   an OST, a lock with PR mode will be issued. Also, if the client opens a
+ *   file for execution, it is granted a lock with PR mode.
+ * - CW: Concurrent Write mode. The type of lock that the MDS grants if a client
+ *   requests a write lock during a file open operation.
+ * - CR Concurrent Read mode. When a client performs a path lookup, MDS grants
+ *   an inodebit lock with the CR mode on the intermediate path component.
+ * - NL Null mode.
+ *
+ * <PRE>
+ *       NL  CR  CW  PR  PW  EX
+ *  NL    1   1   1   1   1   1
+ *  CR    1   1   1   1   1   0
+ *  CW    1   1   1   0   0   0
+ *  PR    1   1   0   1   0   0
+ *  PW    1   1   0   0   0   0
+ *  EX    1   0   0   0   0   0
+ * </PRE>
+ */
+/** @{ */
+#define LCK_COMPAT_EX  LCK_NL
+#define LCK_COMPAT_PW  (LCK_COMPAT_EX | LCK_CR)
+#define LCK_COMPAT_PR  (LCK_COMPAT_PW | LCK_PR)
+#define LCK_COMPAT_CW  (LCK_COMPAT_PW | LCK_CW)
+#define LCK_COMPAT_CR  (LCK_COMPAT_CW | LCK_PR | LCK_PW)
+#define LCK_COMPAT_NL  (LCK_COMPAT_CR | LCK_EX | LCK_GROUP)
+#define LCK_COMPAT_GROUP  (LCK_GROUP | LCK_NL)
+#define LCK_COMPAT_COS (LCK_COS)
+/** @} Lock Compatibility Matrix */
+
+extern ldlm_mode_t lck_compat_array[];
+
+static inline void lockmode_verify(ldlm_mode_t mode)
+{
+       LASSERT(mode > LCK_MINMODE && mode < LCK_MAXMODE);
+}
+
+static inline int lockmode_compat(ldlm_mode_t exist_mode, ldlm_mode_t new_mode)
+{
+       return (lck_compat_array[exist_mode] & new_mode);
+}
+
+/*
+ *
+ * cluster name spaces
+ *
+ */
+
+#define DLM_OST_NAMESPACE 1
+#define DLM_MDS_NAMESPACE 2
+
+/* XXX
+   - do we just separate this by security domains and use a prefix for
+     multiple namespaces in the same domain?
+   -
+*/
+
+/**
+ * Locking rules for LDLM:
+ *
+ * lr_lock
+ *
+ * lr_lock
+ *     waiting_locks_spinlock
+ *
+ * lr_lock
+ *     led_lock
+ *
+ * lr_lock
+ *     ns_lock
+ *
+ * lr_lvb_mutex
+ *     lr_lock
+ *
+ */
+
+struct ldlm_pool;
+struct ldlm_lock;
+struct ldlm_resource;
+struct ldlm_namespace;
+
+/**
+ * Operations on LDLM pools.
+ * LDLM pool is a pool of locks in the namespace without any implicitly
+ * specified limits.
+ * Locks in the pool are organized in LRU.
+ * Local memory pressure or server instructions (e.g. mempressure on server)
+ * can trigger freeing of locks from the pool
+ */
+struct ldlm_pool_ops {
+	/** Recalculate pool \a pl usage */
+	int (*po_recalc)(struct ldlm_pool *pl);
+	/** Cancel at least \a nr locks from pool \a pl */
+	int (*po_shrink)(struct ldlm_pool *pl, int nr,
+			 unsigned int gfp_mask);
+	int (*po_setup)(struct ldlm_pool *pl, int limit);
+};
+
+/** One second for pools thread check interval. Each pool has own period. */
+#define LDLM_POOLS_THREAD_PERIOD (1)
+
+/** ~6% margin for modest pools. See ldlm_pool.c for details. */
+#define LDLM_POOLS_MODEST_MARGIN_SHIFT (4)
+
+/** Default recalc period for server side pools in sec. */
+#define LDLM_POOL_SRV_DEF_RECALC_PERIOD (1)
+
+/** Default recalc period for client side pools in sec. */
+#define LDLM_POOL_CLI_DEF_RECALC_PERIOD (10)
+
+/**
+ * LDLM pool structure to track granted locks.
+ * For purposes of determining when to release locks on e.g. memory pressure.
+ * This feature is commonly referred to as lru_resize.
+ */
+struct ldlm_pool {
+	/** Pool proc directory. */
+	proc_dir_entry_t	*pl_proc_dir;
+	/** Pool name, must be long enough to hold compound proc entry name. */
+	char			pl_name[100];
+	/** Lock for protecting SLV/CLV updates. */
+	spinlock_t		pl_lock;
+	/** Number of allowed locks in in pool, both, client and server side. */
+	atomic_t		pl_limit;
+	/** Number of granted locks in */
+	atomic_t		pl_granted;
+	/** Grant rate per T. */
+	atomic_t		pl_grant_rate;
+	/** Cancel rate per T. */
+	atomic_t		pl_cancel_rate;
+	/** Server lock volume (SLV). Protected by pl_lock. */
+	__u64			pl_server_lock_volume;
+	/** Current biggest client lock volume. Protected by pl_lock. */
+	__u64			pl_client_lock_volume;
+	/** Lock volume factor. SLV on client is calculated as following:
+	 *  server_slv * lock_volume_factor. */
+	atomic_t		pl_lock_volume_factor;
+	/** Time when last SLV from server was obtained. */
+	time_t			pl_recalc_time;
+	/** Recalculation period for pool. */
+	time_t			pl_recalc_period;
+	/** Recalculation and shrink operations. */
+	struct ldlm_pool_ops	*pl_ops;
+	/** Number of planned locks for next period. */
+	int			pl_grant_plan;
+	/** Pool statistics. */
+	struct lprocfs_stats	*pl_stats;
+};
+
+typedef int (*ldlm_res_policy)(struct ldlm_namespace *, struct ldlm_lock **,
+			       void *req_cookie, ldlm_mode_t mode, __u64 flags,
+			       void *data);
+
+typedef int (*ldlm_cancel_for_recovery)(struct ldlm_lock *lock);
+
+/**
+ * LVB operations.
+ * LVB is Lock Value Block. This is a special opaque (to LDLM) value that could
+ * be associated with an LDLM lock and transferred from client to server and
+ * back.
+ *
+ * Currently LVBs are used by:
+ *  - OSC-OST code to maintain current object size/times
+ *  - layout lock code to return the layout when the layout lock is granted
+ */
+struct ldlm_valblock_ops {
+	int (*lvbo_init)(struct ldlm_resource *res);
+	int (*lvbo_update)(struct ldlm_resource *res,
+			   struct ptlrpc_request *r,
+			   int increase);
+	int (*lvbo_free)(struct ldlm_resource *res);
+	/* Return size of lvb data appropriate RPC size can be reserved */
+	int (*lvbo_size)(struct ldlm_lock *lock);
+	/* Called to fill in lvb data to RPC buffer @buf */
+	int (*lvbo_fill)(struct ldlm_lock *lock, void *buf, int buflen);
+};
+
+/**
+ * LDLM pools related, type of lock pool in the namespace.
+ * Greedy means release cached locks aggressively
+ */
+typedef enum {
+	LDLM_NAMESPACE_GREEDY = 1 << 0,
+	LDLM_NAMESPACE_MODEST = 1 << 1
+} ldlm_appetite_t;
+
+/**
+ * Default values for the "max_nolock_size", "contention_time" and
+ * "contended_locks" namespace tunables.
+ */
+#define NS_DEFAULT_MAX_NOLOCK_BYTES 0
+#define NS_DEFAULT_CONTENTION_SECONDS 2
+#define NS_DEFAULT_CONTENDED_LOCKS 32
+
+struct ldlm_ns_bucket {
+	/** back pointer to namespace */
+	struct ldlm_namespace      *nsb_namespace;
+	/**
+	 * Estimated lock callback time.  Used by adaptive timeout code to
+	 * avoid spurious client evictions due to unresponsiveness when in
+	 * fact the network or overall system load is at fault
+	 */
+	struct adaptive_timeout     nsb_at_estimate;
+};
+
+enum {
+	/** LDLM namespace lock stats */
+	LDLM_NSS_LOCKS	  = 0,
+	LDLM_NSS_LAST
+};
+
+typedef enum {
+	/** invalide type */
+	LDLM_NS_TYPE_UNKNOWN    = 0,
+	/** mdc namespace */
+	LDLM_NS_TYPE_MDC,
+	/** mds namespace */
+	LDLM_NS_TYPE_MDT,
+	/** osc namespace */
+	LDLM_NS_TYPE_OSC,
+	/** ost namespace */
+	LDLM_NS_TYPE_OST,
+	/** mgc namespace */
+	LDLM_NS_TYPE_MGC,
+	/** mgs namespace */
+	LDLM_NS_TYPE_MGT,
+} ldlm_ns_type_t;
+
+/**
+ * LDLM Namespace.
+ *
+ * Namespace serves to contain locks related to a particular service.
+ * There are two kinds of namespaces:
+ * - Server namespace has knowledge of all locks and is therefore authoritative
+ *   to make decisions like what locks could be granted and what conflicts
+ *   exist during new lock enqueue.
+ * - Client namespace only has limited knowledge about locks in the namespace,
+ *   only seeing locks held by the client.
+ *
+ * Every Lustre service has one server namespace present on the server serving
+ * that service. Every client connected to the service has a client namespace
+ * for it.
+ * Every lock obtained by client in that namespace is actually represented by
+ * two in-memory locks. One on the server and one on the client. The locks are
+ * linked by a special cookie by which one node can tell to the other which lock
+ * it actually means during communications. Such locks are called remote locks.
+ * The locks held by server only without any reference to a client are called
+ * local locks.
+ */
+struct ldlm_namespace {
+	/** Backward link to OBD, required for LDLM pool to store new SLV. */
+	struct obd_device	*ns_obd;
+
+	/** Flag indicating if namespace is on client instead of server */
+	ldlm_side_t		ns_client;
+
+	/** Resource hash table for namespace. */
+	cfs_hash_t		*ns_rs_hash;
+
+	/** serialize */
+	spinlock_t		ns_lock;
+
+	/** big refcount (by bucket) */
+	atomic_t		ns_bref;
+
+	/**
+	 * Namespace connect flags supported by server (may be changed via
+	 * /proc, LRU resize may be disabled/enabled).
+	 */
+	__u64			ns_connect_flags;
+
+	/** Client side original connect flags supported by server. */
+	__u64			ns_orig_connect_flags;
+
+	/* namespace proc dir entry */
+	struct proc_dir_entry	*ns_proc_dir_entry;
+
+	/**
+	 * Position in global namespace list linking all namespaces on
+	 * the node.
+	 */
+	struct list_head		ns_list_chain;
+
+	/**
+	 * List of unused locks for this namespace. This list is also called
+	 * LRU lock list.
+	 * Unused locks are locks with zero reader/writer reference counts.
+	 * This list is only used on clients for lock caching purposes.
+	 * When we want to release some locks voluntarily or if server wants
+	 * us to release some locks due to e.g. memory pressure, we take locks
+	 * to release from the head of this list.
+	 * Locks are linked via l_lru field in \see struct ldlm_lock.
+	 */
+	struct list_head		ns_unused_list;
+	/** Number of locks in the LRU list above */
+	int			ns_nr_unused;
+
+	/**
+	 * Maximum number of locks permitted in the LRU. If 0, means locks
+	 * are managed by pools and there is no preset limit, rather it is all
+	 * controlled by available memory on this client and on server.
+	 */
+	unsigned int		ns_max_unused;
+	/** Maximum allowed age (last used time) for locks in the LRU */
+	unsigned int		ns_max_age;
+	/**
+	 * Server only: number of times we evicted clients due to lack of reply
+	 * to ASTs.
+	 */
+	unsigned int		ns_timeouts;
+	/**
+	 * Number of seconds since the file change time after which the
+	 * MDT will return an UPDATE lock along with a LOOKUP lock.
+	 * This allows the client to start caching negative dentries
+	 * for a directory and may save an RPC for a later stat.
+	 */
+	unsigned int		ns_ctime_age_limit;
+
+	/**
+	 * Used to rate-limit ldlm_namespace_dump calls.
+	 * \see ldlm_namespace_dump. Increased by 10 seconds every time
+	 * it is called.
+	 */
+	cfs_time_t		ns_next_dump;
+
+	/** "policy" function that does actual lock conflict determination */
+	ldlm_res_policy		ns_policy;
+
+	/**
+	 * LVB operations for this namespace.
+	 * \see struct ldlm_valblock_ops
+	 */
+	struct ldlm_valblock_ops *ns_lvbo;
+
+	/**
+	 * Used by filter code to store pointer to OBD of the service.
+	 * Should be dropped in favor of \a ns_obd
+	 */
+	void			*ns_lvbp;
+
+	/**
+	 * Wait queue used by __ldlm_namespace_free. Gets woken up every time
+	 * a resource is removed.
+	 */
+	wait_queue_head_t		ns_waitq;
+	/** LDLM pool structure for this namespace */
+	struct ldlm_pool	ns_pool;
+	/** Definition of how eagerly unused locks will be released from LRU */
+	ldlm_appetite_t		ns_appetite;
+
+	/**
+	 * If more than \a ns_contended_locks are found, the resource is
+	 * considered to be contended. Lock enqueues might specify that no
+	 * contended locks should be granted
+	 */
+	unsigned		ns_contended_locks;
+
+	/**
+	 * The resources in this namespace remember contended state during
+	 * \a ns_contention_time, in seconds.
+	 */
+	unsigned		ns_contention_time;
+
+	/**
+	 * Limit size of contended extent locks, in bytes.
+	 * If extended lock is requested for more then this many bytes and
+	 * caller instructs us not to grant contended locks, we would disregard
+	 * such a request.
+	 */
+	unsigned		ns_max_nolock_size;
+
+	/** Limit of parallel AST RPC count. */
+	unsigned		ns_max_parallel_ast;
+
+	/** Callback to cancel locks before replaying it during recovery. */
+	ldlm_cancel_for_recovery ns_cancel_for_recovery;
+
+	/** LDLM lock stats */
+	struct lprocfs_stats	*ns_stats;
+
+	/**
+	 * Flag to indicate namespace is being freed. Used to determine if
+	 * recalculation of LDLM pool statistics should be skipped.
+	 */
+	unsigned		ns_stopping:1;
+};
+
+/**
+ * Returns 1 if namespace \a ns is a client namespace.
+ */
+static inline int ns_is_client(struct ldlm_namespace *ns)
+{
+	LASSERT(ns != NULL);
+	LASSERT(!(ns->ns_client & ~(LDLM_NAMESPACE_CLIENT |
+				    LDLM_NAMESPACE_SERVER)));
+	LASSERT(ns->ns_client == LDLM_NAMESPACE_CLIENT ||
+		ns->ns_client == LDLM_NAMESPACE_SERVER);
+	return ns->ns_client == LDLM_NAMESPACE_CLIENT;
+}
+
+/**
+ * Returns 1 if namespace \a ns is a server namespace.
+ */
+static inline int ns_is_server(struct ldlm_namespace *ns)
+{
+	LASSERT(ns != NULL);
+	LASSERT(!(ns->ns_client & ~(LDLM_NAMESPACE_CLIENT |
+				    LDLM_NAMESPACE_SERVER)));
+	LASSERT(ns->ns_client == LDLM_NAMESPACE_CLIENT ||
+		ns->ns_client == LDLM_NAMESPACE_SERVER);
+	return ns->ns_client == LDLM_NAMESPACE_SERVER;
+}
+
+/**
+ * Returns 1 if namespace \a ns supports early lock cancel (ELC).
+ */
+static inline int ns_connect_cancelset(struct ldlm_namespace *ns)
+{
+	LASSERT(ns != NULL);
+	return !!(ns->ns_connect_flags & OBD_CONNECT_CANCELSET);
+}
+
+/**
+ * Returns 1 if this namespace supports lru_resize.
+ */
+static inline int ns_connect_lru_resize(struct ldlm_namespace *ns)
+{
+	LASSERT(ns != NULL);
+	return !!(ns->ns_connect_flags & OBD_CONNECT_LRU_RESIZE);
+}
+
+static inline void ns_register_cancel(struct ldlm_namespace *ns,
+				      ldlm_cancel_for_recovery arg)
+{
+	LASSERT(ns != NULL);
+	ns->ns_cancel_for_recovery = arg;
+}
+
+struct ldlm_lock;
+
+/** Type for blocking callback function of a lock. */
+typedef int (*ldlm_blocking_callback)(struct ldlm_lock *lock,
+				      struct ldlm_lock_desc *new, void *data,
+				      int flag);
+/** Type for completion callback function of a lock. */
+typedef int (*ldlm_completion_callback)(struct ldlm_lock *lock, __u64 flags,
+					void *data);
+/** Type for glimpse callback function of a lock. */
+typedef int (*ldlm_glimpse_callback)(struct ldlm_lock *lock, void *data);
+/** Type for weight callback function of a lock. */
+typedef unsigned long (*ldlm_weigh_callback)(struct ldlm_lock *lock);
+
+/** Work list for sending GL ASTs to multiple locks. */
+struct ldlm_glimpse_work {
+	struct ldlm_lock	*gl_lock; /* lock to glimpse */
+	struct list_head		 gl_list; /* linkage to other gl work structs */
+	__u32			 gl_flags;/* see LDLM_GL_WORK_* below */
+	union ldlm_gl_desc	*gl_desc; /* glimpse descriptor to be packed in
+					   * glimpse callback request */
+};
+
+/** The ldlm_glimpse_work is allocated on the stack and should not be freed. */
+#define LDLM_GL_WORK_NOFREE 0x1
+
+/** Interval node data for each LDLM_EXTENT lock. */
+struct ldlm_interval {
+	struct interval_node	li_node;  /* node for tree management */
+	struct list_head		li_group; /* the locks which have the same
+					   * policy - group of the policy */
+};
+#define to_ldlm_interval(n) container_of(n, struct ldlm_interval, li_node)
+
+/**
+ * Interval tree for extent locks.
+ * The interval tree must be accessed under the resource lock.
+ * Interval trees are used for granted extent locks to speed up conflicts
+ * lookup. See ldlm/interval_tree.c for more details.
+ */
+struct ldlm_interval_tree {
+	/** Tree size. */
+	int			lit_size;
+	ldlm_mode_t		lit_mode;  /* lock mode */
+	struct interval_node	*lit_root; /* actual ldlm_interval */
+};
+
+/** Whether to track references to exports by LDLM locks. */
+#define LUSTRE_TRACKS_LOCK_EXP_REFS (0)
+
+/** Cancel flags. */
+typedef enum {
+	LCF_ASYNC      = 0x1, /* Cancel locks asynchronously. */
+	LCF_LOCAL      = 0x2, /* Cancel locks locally, not notifing server */
+	LCF_BL_AST     = 0x4, /* Cancel locks marked as LDLM_FL_BL_AST
+			       * in the same RPC */
+} ldlm_cancel_flags_t;
+
+struct ldlm_flock {
+	__u64 start;
+	__u64 end;
+	__u64 owner;
+	__u64 blocking_owner;
+	struct obd_export *blocking_export;
+	/* Protected by the hash lock */
+	__u32 blocking_refs;
+	__u32 pid;
+};
+
+typedef union {
+	struct ldlm_extent l_extent;
+	struct ldlm_flock l_flock;
+	struct ldlm_inodebits l_inodebits;
+} ldlm_policy_data_t;
+
+void ldlm_convert_policy_to_wire(ldlm_type_t type,
+				 const ldlm_policy_data_t *lpolicy,
+				 ldlm_wire_policy_data_t *wpolicy);
+void ldlm_convert_policy_to_local(struct obd_export *exp, ldlm_type_t type,
+				  const ldlm_wire_policy_data_t *wpolicy,
+				  ldlm_policy_data_t *lpolicy);
+
+enum lvb_type {
+	LVB_T_NONE	= 0,
+	LVB_T_OST	= 1,
+	LVB_T_LQUOTA	= 2,
+	LVB_T_LAYOUT	= 3,
+};
+
+/**
+ * LDLM lock structure
+ *
+ * Represents a single LDLM lock and its state in memory. Each lock is
+ * associated with a single ldlm_resource, the object which is being
+ * locked. There may be multiple ldlm_locks on a single resource,
+ * depending on the lock type and whether the locks are conflicting or
+ * not.
+ */
+struct ldlm_lock {
+	/**
+	 * Local lock handle.
+	 * When remote side wants to tell us about a lock, they address
+	 * it by this opaque handle.  The handle does not hold a
+	 * reference on the ldlm_lock, so it can be safely passed to
+	 * other threads or nodes. When the lock needs to be accessed
+	 * from the handle, it is looked up again in the lock table, and
+	 * may no longer exist.
+	 *
+	 * Must be first in the structure.
+	 */
+	struct portals_handle	l_handle;
+	/**
+	 * Lock reference count.
+	 * This is how many users have pointers to actual structure, so that
+	 * we do not accidentally free lock structure that is in use.
+	 */
+	atomic_t		l_refc;
+	/**
+	 * Internal spinlock protects l_resource.  We should hold this lock
+	 * first before taking res_lock.
+	 */
+	spinlock_t		l_lock;
+	/**
+	 * Pointer to actual resource this lock is in.
+	 * ldlm_lock_change_resource() can change this.
+	 */
+	struct ldlm_resource	*l_resource;
+	/**
+	 * List item for client side LRU list.
+	 * Protected by ns_lock in struct ldlm_namespace.
+	 */
+	struct list_head		l_lru;
+	/**
+	 * Linkage to resource's lock queues according to current lock state.
+	 * (could be granted, waiting or converting)
+	 * Protected by lr_lock in struct ldlm_resource.
+	 */
+	struct list_head		l_res_link;
+	/**
+	 * Tree node for ldlm_extent.
+	 */
+	struct ldlm_interval	*l_tree_node;
+	/**
+	 * Per export hash of locks.
+	 * Protected by per-bucket exp->exp_lock_hash locks.
+	 */
+	struct hlist_node	l_exp_hash;
+	/**
+	 * Per export hash of flock locks.
+	 * Protected by per-bucket exp->exp_flock_hash locks.
+	 */
+	struct hlist_node	l_exp_flock_hash;
+	/**
+	 * Requested mode.
+	 * Protected by lr_lock.
+	 */
+	ldlm_mode_t		l_req_mode;
+	/**
+	 * Granted mode, also protected by lr_lock.
+	 */
+	ldlm_mode_t		l_granted_mode;
+	/** Lock completion handler pointer. Called when lock is granted. */
+	ldlm_completion_callback l_completion_ast;
+	/**
+	 * Lock blocking AST handler pointer.
+	 * It plays two roles:
+	 * - as a notification of an attempt to queue a conflicting lock (once)
+	 * - as a notification when the lock is being cancelled.
+	 *
+	 * As such it's typically called twice: once for the initial conflict
+	 * and then once more when the last user went away and the lock is
+	 * cancelled (could happen recursively).
+	 */
+	ldlm_blocking_callback	l_blocking_ast;
+	/**
+	 * Lock glimpse handler.
+	 * Glimpse handler is used to obtain LVB updates from a client by
+	 * server
+	 */
+	ldlm_glimpse_callback	l_glimpse_ast;
+
+	/** XXX apparently unused "weight" handler. To be removed? */
+	ldlm_weigh_callback	l_weigh_ast;
+
+	/**
+	 * Lock export.
+	 * This is a pointer to actual client export for locks that were granted
+	 * to clients. Used server-side.
+	 */
+	struct obd_export	*l_export;
+	/**
+	 * Lock connection export.
+	 * Pointer to server export on a client.
+	 */
+	struct obd_export	*l_conn_export;
+
+	/**
+	 * Remote lock handle.
+	 * If the lock is remote, this is the handle of the other side lock
+	 * (l_handle)
+	 */
+	struct lustre_handle	l_remote_handle;
+
+	/**
+	 * Representation of private data specific for a lock type.
+	 * Examples are: extent range for extent lock or bitmask for ibits locks
+	 */
+	ldlm_policy_data_t	l_policy_data;
+
+	/**
+	 * Lock state flags.
+	 * Like whenever we receive any blocking requests for this lock, etc.
+	 * Protected by lr_lock.
+	 */
+	__u64			l_flags;
+	/**
+	 * Lock r/w usage counters.
+	 * Protected by lr_lock.
+	 */
+	__u32			l_readers;
+	__u32			l_writers;
+	/**
+	 * If the lock is granted, a process sleeps on this waitq to learn when
+	 * it's no longer in use.  If the lock is not granted, a process sleeps
+	 * on this waitq to learn when it becomes granted.
+	 */
+	wait_queue_head_t		l_waitq;
+
+	/**
+	 * Seconds. It will be updated if there is any activity related to
+	 * the lock, e.g. enqueue the lock or send blocking AST.
+	 */
+	cfs_time_t		l_last_activity;
+
+	/**
+	 * Time last used by e.g. being matched by lock match.
+	 * Jiffies. Should be converted to time if needed.
+	 */
+	cfs_time_t		l_last_used;
+
+	/** Originally requested extent for the extent lock. */
+	struct ldlm_extent	l_req_extent;
+
+	unsigned int		l_failed:1,
+	/**
+	 * Set for locks that were removed from class hash table and will be
+	 * destroyed when last reference to them is released. Set by
+	 * ldlm_lock_destroy_internal().
+	 *
+	 * Protected by lock and resource locks.
+	 */
+				l_destroyed:1,
+	/*
+	 * it's set in lock_res_and_lock() and unset in unlock_res_and_lock().
+	 *
+	 * NB: compared with check_res_locked(), checking this bit is cheaper.
+	 * Also, spin_is_locked() is deprecated for kernel code; one reason is
+	 * because it works only for SMP so user needs to add extra macros like
+	 * LASSERT_SPIN_LOCKED for uniprocessor kernels.
+	 */
+				l_res_locked:1,
+	/*
+	 * It's set once we call ldlm_add_waiting_lock_res_locked()
+	 * to start the lock-timeout timer and it will never be reset.
+	 *
+	 * Protected by lock_res_and_lock().
+	 */
+				l_waited:1,
+	/** Flag whether this is a server namespace lock. */
+				l_ns_srv:1;
+
+	/*
+	 * Client-side-only members.
+	 */
+
+	enum lvb_type	      l_lvb_type;
+
+	/**
+	 * Temporary storage for a LVB received during an enqueue operation.
+	 */
+	__u32			l_lvb_len;
+	void			*l_lvb_data;
+
+	/** Private storage for lock user. Opaque to LDLM. */
+	void			*l_ast_data;
+
+	/*
+	 * Server-side-only members.
+	 */
+
+	/**
+	 * Connection cookie for the client originating the operation.
+	 * Used by Commit on Share (COS) code. Currently only used for
+	 * inodebits locks on MDS.
+	 */
+	__u64			l_client_cookie;
+
+	/**
+	 * List item for locks waiting for cancellation from clients.
+	 * The lists this could be linked into are:
+	 * waiting_locks_list (protected by waiting_locks_spinlock),
+	 * then if the lock timed out, it is moved to
+	 * expired_lock_thread.elt_expired_locks for further processing.
+	 * Protected by elt_lock.
+	 */
+	struct list_head		l_pending_chain;
+
+	/**
+	 * Set when lock is sent a blocking AST. Time in seconds when timeout
+	 * is reached and client holding this lock could be evicted.
+	 * This timeout could be further extended by e.g. certain IO activity
+	 * under this lock.
+	 * \see ost_rw_prolong_locks
+	 */
+	cfs_time_t		l_callback_timeout;
+
+	/** Local PID of process which created this lock. */
+	__u32			l_pid;
+
+	/**
+	 * Number of times blocking AST was sent for this lock.
+	 * This is for debugging. Valid values are 0 and 1, if there is an
+	 * attempt to send blocking AST more than once, an assertion would be
+	 * hit. \see ldlm_work_bl_ast_lock
+	 */
+	int			l_bl_ast_run;
+	/** List item ldlm_add_ast_work_item() for case of blocking ASTs. */
+	struct list_head		l_bl_ast;
+	/** List item ldlm_add_ast_work_item() for case of completion ASTs. */
+	struct list_head		l_cp_ast;
+	/** For ldlm_add_ast_work_item() for "revoke" AST used in COS. */
+	struct list_head		l_rk_ast;
+
+	/**
+	 * Pointer to a conflicting lock that caused blocking AST to be sent
+	 * for this lock
+	 */
+	struct ldlm_lock	*l_blocking_lock;
+
+	/**
+	 * Protected by lr_lock, linkages to "skip lists".
+	 * For more explanations of skip lists see ldlm/ldlm_inodebits.c
+	 */
+	struct list_head		l_sl_mode;
+	struct list_head		l_sl_policy;
+
+	/** Reference tracking structure to debug leaked locks. */
+	struct lu_ref		l_reference;
+#if LUSTRE_TRACKS_LOCK_EXP_REFS
+	/* Debugging stuff for bug 20498, for tracking export references. */
+	/** number of export references taken */
+	int			l_exp_refs_nr;
+	/** link all locks referencing one export */
+	struct list_head		l_exp_refs_link;
+	/** referenced export object */
+	struct obd_export	*l_exp_refs_target;
+#endif
+	/**
+	 * export blocking dlm lock list, protected by
+	 * l_export->exp_bl_list_lock.
+	 * Lock order of waiting_lists_spinlock, exp_bl_list_lock and res lock
+	 * is: res lock -> exp_bl_list_lock -> wanting_lists_spinlock.
+	 */
+	struct list_head		l_exp_list;
+};
+
+/**
+ * LDLM resource description.
+ * Basically, resource is a representation for a single object.
+ * Object has a name which is currently 4 64-bit integers. LDLM user is
+ * responsible for creation of a mapping between objects it wants to be
+ * protected and resource names.
+ *
+ * A resource can only hold locks of a single lock type, though there may be
+ * multiple ldlm_locks on a single resource, depending on the lock type and
+ * whether the locks are conflicting or not.
+ */
+struct ldlm_resource {
+	struct ldlm_ns_bucket	*lr_ns_bucket;
+
+	/**
+	 * List item for list in namespace hash.
+	 * protected by ns_lock
+	 */
+	struct hlist_node	lr_hash;
+
+	/** Spinlock to protect locks under this resource. */
+	spinlock_t		lr_lock;
+
+	/**
+	 * protected by lr_lock
+	 * @{ */
+	/** List of locks in granted state */
+	struct list_head		lr_granted;
+	/** List of locks waiting to change their granted mode (converted) */
+	struct list_head		lr_converting;
+	/**
+	 * List of locks that could not be granted due to conflicts and
+	 * that are waiting for conflicts to go away */
+	struct list_head		lr_waiting;
+	/** @} */
+
+	/* XXX No longer needed? Remove ASAP */
+	ldlm_mode_t		lr_most_restr;
+
+	/** Type of locks this resource can hold. Only one type per resource. */
+	ldlm_type_t		lr_type; /* LDLM_{PLAIN,EXTENT,FLOCK,IBITS} */
+
+	/** Resource name */
+	struct ldlm_res_id	lr_name;
+	/** Reference count for this resource */
+	atomic_t		lr_refcount;
+
+	/**
+	 * Interval trees (only for extent locks) for all modes of this resource
+	 */
+	struct ldlm_interval_tree lr_itree[LCK_MODE_NUM];
+
+	/**
+	 * Server-side-only lock value block elements.
+	 * To serialize lvbo_init.
+	 */
+	struct mutex		lr_lvb_mutex;
+	int			lr_lvb_len;
+	/** protected by lr_lock */
+	void			*lr_lvb_data;
+
+	/** When the resource was considered as contended. */
+	cfs_time_t		lr_contention_time;
+	/** List of references to this resource. For debugging. */
+	struct lu_ref		lr_reference;
+
+	struct inode		*lr_lvb_inode;
+};
+
+static inline bool ldlm_has_layout(struct ldlm_lock *lock)
+{
+	return lock->l_resource->lr_type == LDLM_IBITS &&
+		lock->l_policy_data.l_inodebits.bits & MDS_INODELOCK_LAYOUT;
+}
+
+static inline char *
+ldlm_ns_name(struct ldlm_namespace *ns)
+{
+	return ns->ns_rs_hash->hs_name;
+}
+
+static inline struct ldlm_namespace *
+ldlm_res_to_ns(struct ldlm_resource *res)
+{
+	return res->lr_ns_bucket->nsb_namespace;
+}
+
+static inline struct ldlm_namespace *
+ldlm_lock_to_ns(struct ldlm_lock *lock)
+{
+	return ldlm_res_to_ns(lock->l_resource);
+}
+
+static inline char *
+ldlm_lock_to_ns_name(struct ldlm_lock *lock)
+{
+	return ldlm_ns_name(ldlm_lock_to_ns(lock));
+}
+
+static inline struct adaptive_timeout *
+ldlm_lock_to_ns_at(struct ldlm_lock *lock)
+{
+	return &lock->l_resource->lr_ns_bucket->nsb_at_estimate;
+}
+
+static inline int ldlm_lvbo_init(struct ldlm_resource *res)
+{
+	struct ldlm_namespace *ns = ldlm_res_to_ns(res);
+
+	if (ns->ns_lvbo != NULL && ns->ns_lvbo->lvbo_init != NULL)
+		return ns->ns_lvbo->lvbo_init(res);
+
+	return 0;
+}
+
+static inline int ldlm_lvbo_size(struct ldlm_lock *lock)
+{
+	struct ldlm_namespace *ns = ldlm_lock_to_ns(lock);
+
+	if (ns->ns_lvbo != NULL && ns->ns_lvbo->lvbo_size != NULL)
+		return ns->ns_lvbo->lvbo_size(lock);
+
+	return 0;
+}
+
+static inline int ldlm_lvbo_fill(struct ldlm_lock *lock, void *buf, int len)
+{
+	struct ldlm_namespace *ns = ldlm_lock_to_ns(lock);
+
+	if (ns->ns_lvbo != NULL) {
+		LASSERT(ns->ns_lvbo->lvbo_fill != NULL);
+		return ns->ns_lvbo->lvbo_fill(lock, buf, len);
+	}
+	return 0;
+}
+
+struct ldlm_ast_work {
+	struct ldlm_lock      *w_lock;
+	int		    w_blocking;
+	struct ldlm_lock_desc  w_desc;
+	struct list_head	     w_list;
+	int		    w_flags;
+	void		  *w_data;
+	int		    w_datalen;
+};
+
+/**
+ * Common ldlm_enqueue parameters
+ */
+struct ldlm_enqueue_info {
+	__u32 ei_type;   /** Type of the lock being enqueued. */
+	__u32 ei_mode;   /** Mode of the lock being enqueued. */
+	void *ei_cb_bl;  /** blocking lock callback */
+	void *ei_cb_cp;  /** lock completion callback */
+	void *ei_cb_gl;  /** lock glimpse callback */
+	void *ei_cb_wg;  /** lock weigh callback */
+	void *ei_cbdata; /** Data to be passed into callbacks. */
+};
+
+extern struct obd_ops ldlm_obd_ops;
+
+extern char *ldlm_lockname[];
+extern char *ldlm_typename[];
+extern char *ldlm_it2str(int it);
+
+/**
+ * Just a fancy CDEBUG call with log level preset to LDLM_DEBUG.
+ * For the cases where we do not have actual lock to print along
+ * with a debugging message that is ldlm-related
+ */
+#define LDLM_DEBUG_NOLOCK(format, a...)			\
+	CDEBUG(D_DLMTRACE, "### " format "\n" , ##a)
+
+/**
+ * Support function for lock information printing into debug logs.
+ * \see LDLM_DEBUG
+ */
+#define ldlm_lock_debug(msgdata, mask, cdls, lock, fmt, a...) do {      \
+	CFS_CHECK_STACK(msgdata, mask, cdls);			   \
+									\
+	if (((mask) & D_CANTMASK) != 0 ||			       \
+	    ((libcfs_debug & (mask)) != 0 &&			    \
+	     (libcfs_subsystem_debug & DEBUG_SUBSYSTEM) != 0))	  \
+		_ldlm_lock_debug(lock, msgdata, fmt, ##a);	      \
+} while(0)
+
+void _ldlm_lock_debug(struct ldlm_lock *lock,
+		      struct libcfs_debug_msg_data *data,
+		      const char *fmt, ...)
+	__attribute__ ((format (printf, 3, 4)));
+
+/**
+ * Rate-limited version of lock printing function.
+ */
+#define LDLM_DEBUG_LIMIT(mask, lock, fmt, a...) do {			 \
+	static cfs_debug_limit_state_t _ldlm_cdls;			   \
+	LIBCFS_DEBUG_MSG_DATA_DECL(msgdata, mask, &_ldlm_cdls);	      \
+	ldlm_lock_debug(&msgdata, mask, &_ldlm_cdls, lock, "### " fmt , ##a);\
+} while (0)
+
+#define LDLM_ERROR(lock, fmt, a...) LDLM_DEBUG_LIMIT(D_ERROR, lock, fmt, ## a)
+#define LDLM_WARN(lock, fmt, a...)  LDLM_DEBUG_LIMIT(D_WARNING, lock, fmt, ## a)
+
+/** Non-rate-limited lock printing function for debugging purposes. */
+#define LDLM_DEBUG(lock, fmt, a...)   do {				  \
+	if (likely(lock != NULL)) {					    \
+		LIBCFS_DEBUG_MSG_DATA_DECL(msgdata, D_DLMTRACE, NULL);      \
+		ldlm_lock_debug(&msgdata, D_DLMTRACE, NULL, lock,	    \
+				"### " fmt , ##a);			    \
+	} else {							    \
+		LDLM_DEBUG_NOLOCK("no dlm lock: " fmt, ##a);		    \
+	}								    \
+} while (0)
+
+typedef int (*ldlm_processing_policy)(struct ldlm_lock *lock, __u64 *flags,
+				      int first_enq, ldlm_error_t *err,
+				      struct list_head *work_list);
+
+/**
+ * Return values for lock iterators.
+ * Also used during deciding of lock grants and cancellations.
+ */
+#define LDLM_ITER_CONTINUE 1 /* keep iterating */
+#define LDLM_ITER_STOP     2 /* stop iterating */
+
+typedef int (*ldlm_iterator_t)(struct ldlm_lock *, void *);
+typedef int (*ldlm_res_iterator_t)(struct ldlm_resource *, void *);
+
+/** \defgroup ldlm_iterator Lock iterators
+ *
+ * LDLM provides for a way to iterate through every lock on a resource or
+ * namespace or every resource in a namespace.
+ * @{ */
+int ldlm_resource_foreach(struct ldlm_resource *res, ldlm_iterator_t iter,
+			  void *closure);
+void ldlm_namespace_foreach(struct ldlm_namespace *ns, ldlm_iterator_t iter,
+			    void *closure);
+int ldlm_resource_iterate(struct ldlm_namespace *, const struct ldlm_res_id *,
+			  ldlm_iterator_t iter, void *data);
+/** @} ldlm_iterator */
+
+int ldlm_replay_locks(struct obd_import *imp);
+
+/* ldlm_flock.c */
+int ldlm_flock_completion_ast(struct ldlm_lock *lock, __u64 flags, void *data);
+
+/* ldlm_extent.c */
+__u64 ldlm_extent_shift_kms(struct ldlm_lock *lock, __u64 old_kms);
+
+struct ldlm_callback_suite {
+	ldlm_completion_callback lcs_completion;
+	ldlm_blocking_callback   lcs_blocking;
+	ldlm_glimpse_callback    lcs_glimpse;
+	ldlm_weigh_callback      lcs_weigh;
+};
+
+/* ldlm_lockd.c */
+int ldlm_del_waiting_lock(struct ldlm_lock *lock);
+int ldlm_refresh_waiting_lock(struct ldlm_lock *lock, int timeout);
+int ldlm_get_ref(void);
+void ldlm_put_ref(void);
+int ldlm_init_export(struct obd_export *exp);
+void ldlm_destroy_export(struct obd_export *exp);
+struct ldlm_lock *ldlm_request_lock(struct ptlrpc_request *req);
+
+/* ldlm_lock.c */
+void ldlm_register_intent(struct ldlm_namespace *ns, ldlm_res_policy arg);
+void ldlm_lock2handle(const struct ldlm_lock *lock,
+		      struct lustre_handle *lockh);
+struct ldlm_lock *__ldlm_handle2lock(const struct lustre_handle *, __u64 flags);
+void ldlm_cancel_callback(struct ldlm_lock *);
+int ldlm_lock_remove_from_lru(struct ldlm_lock *);
+int ldlm_lock_set_data(struct lustre_handle *, void *);
+
+/**
+ * Obtain a lock reference by its handle.
+ */
+static inline struct ldlm_lock *ldlm_handle2lock(const struct lustre_handle *h)
+{
+	return __ldlm_handle2lock(h, 0);
+}
+
+#define LDLM_LOCK_REF_DEL(lock) \
+	lu_ref_del(&lock->l_reference, "handle", current)
+
+static inline struct ldlm_lock *
+ldlm_handle2lock_long(const struct lustre_handle *h, __u64 flags)
+{
+	struct ldlm_lock *lock;
+
+	lock = __ldlm_handle2lock(h, flags);
+	if (lock != NULL)
+		LDLM_LOCK_REF_DEL(lock);
+	return lock;
+}
+
+/**
+ * Update Lock Value Block Operations (LVBO) on a resource taking into account
+ * data from reqest \a r
+ */
+static inline int ldlm_res_lvbo_update(struct ldlm_resource *res,
+				       struct ptlrpc_request *r, int increase)
+{
+	if (ldlm_res_to_ns(res)->ns_lvbo &&
+	    ldlm_res_to_ns(res)->ns_lvbo->lvbo_update) {
+		return ldlm_res_to_ns(res)->ns_lvbo->lvbo_update(res, r,
+								 increase);
+	}
+	return 0;
+}
+
+int ldlm_error2errno(ldlm_error_t error);
+ldlm_error_t ldlm_errno2error(int err_no); /* don't call it `errno': this
+					    * confuses user-space. */
+#if LUSTRE_TRACKS_LOCK_EXP_REFS
+void ldlm_dump_export_locks(struct obd_export *exp);
+#endif
+
+/**
+ * Release a temporary lock reference obtained by ldlm_handle2lock() or
+ * __ldlm_handle2lock().
+ */
+#define LDLM_LOCK_PUT(lock)		     \
+do {					    \
+	LDLM_LOCK_REF_DEL(lock);		\
+	/*LDLM_DEBUG((lock), "put");*/	  \
+	ldlm_lock_put(lock);		    \
+} while (0)
+
+/**
+ * Release a lock reference obtained by some other means (see
+ * LDLM_LOCK_PUT()).
+ */
+#define LDLM_LOCK_RELEASE(lock)		 \
+do {					    \
+	/*LDLM_DEBUG((lock), "put");*/	  \
+	ldlm_lock_put(lock);		    \
+} while (0)
+
+#define LDLM_LOCK_GET(lock)		     \
+({					      \
+	ldlm_lock_get(lock);		    \
+	/*LDLM_DEBUG((lock), "get");*/	  \
+	lock;				   \
+})
+
+#define ldlm_lock_list_put(head, member, count)		     \
+({								  \
+	struct ldlm_lock *_lock, *_next;			    \
+	int c = count;					      \
+	list_for_each_entry_safe(_lock, _next, head, member) {  \
+		if (c-- == 0)				       \
+			break;				      \
+		list_del_init(&_lock->member);		  \
+		LDLM_LOCK_RELEASE(_lock);			   \
+	}							   \
+	LASSERT(c <= 0);					    \
+})
+
+struct ldlm_lock *ldlm_lock_get(struct ldlm_lock *lock);
+void ldlm_lock_put(struct ldlm_lock *lock);
+void ldlm_lock_destroy(struct ldlm_lock *lock);
+void ldlm_lock2desc(struct ldlm_lock *lock, struct ldlm_lock_desc *desc);
+void ldlm_lock_addref(struct lustre_handle *lockh, __u32 mode);
+int  ldlm_lock_addref_try(struct lustre_handle *lockh, __u32 mode);
+void ldlm_lock_decref(struct lustre_handle *lockh, __u32 mode);
+void ldlm_lock_decref_and_cancel(struct lustre_handle *lockh, __u32 mode);
+void ldlm_lock_fail_match_locked(struct ldlm_lock *lock);
+void ldlm_lock_fail_match(struct ldlm_lock *lock);
+void ldlm_lock_allow_match(struct ldlm_lock *lock);
+void ldlm_lock_allow_match_locked(struct ldlm_lock *lock);
+ldlm_mode_t ldlm_lock_match(struct ldlm_namespace *ns, __u64 flags,
+			    const struct ldlm_res_id *, ldlm_type_t type,
+			    ldlm_policy_data_t *, ldlm_mode_t mode,
+			    struct lustre_handle *, int unref);
+ldlm_mode_t ldlm_revalidate_lock_handle(struct lustre_handle *lockh,
+					__u64 *bits);
+struct ldlm_resource *ldlm_lock_convert(struct ldlm_lock *lock, int new_mode,
+					__u32 *flags);
+void ldlm_lock_downgrade(struct ldlm_lock *lock, int new_mode);
+void ldlm_lock_cancel(struct ldlm_lock *lock);
+void ldlm_reprocess_all(struct ldlm_resource *res);
+void ldlm_reprocess_all_ns(struct ldlm_namespace *ns);
+void ldlm_lock_dump_handle(int level, struct lustre_handle *);
+void ldlm_unlink_lock_skiplist(struct ldlm_lock *req);
+
+/* resource.c */
+struct ldlm_namespace *
+ldlm_namespace_new(struct obd_device *obd, char *name,
+		   ldlm_side_t client, ldlm_appetite_t apt,
+		   ldlm_ns_type_t ns_type);
+int ldlm_namespace_cleanup(struct ldlm_namespace *ns, __u64 flags);
+void ldlm_namespace_free(struct ldlm_namespace *ns,
+			 struct obd_import *imp, int force);
+void ldlm_namespace_register(struct ldlm_namespace *ns, ldlm_side_t client);
+void ldlm_namespace_unregister(struct ldlm_namespace *ns, ldlm_side_t client);
+void ldlm_namespace_move_locked(struct ldlm_namespace *ns, ldlm_side_t client);
+struct ldlm_namespace *ldlm_namespace_first_locked(ldlm_side_t client);
+void ldlm_namespace_get(struct ldlm_namespace *ns);
+void ldlm_namespace_put(struct ldlm_namespace *ns);
+int ldlm_proc_setup(void);
+#ifdef LPROCFS
+void ldlm_proc_cleanup(void);
+#else
+static inline void ldlm_proc_cleanup(void) {}
+#endif
+
+/* resource.c - internal */
+struct ldlm_resource *ldlm_resource_get(struct ldlm_namespace *ns,
+					struct ldlm_resource *parent,
+					const struct ldlm_res_id *,
+					ldlm_type_t type, int create);
+struct ldlm_resource *ldlm_resource_getref(struct ldlm_resource *res);
+int ldlm_resource_putref(struct ldlm_resource *res);
+void ldlm_resource_add_lock(struct ldlm_resource *res,
+			    struct list_head *head,
+			    struct ldlm_lock *lock);
+void ldlm_resource_unlink_lock(struct ldlm_lock *lock);
+void ldlm_res2desc(struct ldlm_resource *res, struct ldlm_resource_desc *desc);
+void ldlm_dump_all_namespaces(ldlm_side_t client, int level);
+void ldlm_namespace_dump(int level, struct ldlm_namespace *);
+void ldlm_resource_dump(int level, struct ldlm_resource *);
+int ldlm_lock_change_resource(struct ldlm_namespace *, struct ldlm_lock *,
+			      const struct ldlm_res_id *);
+
+#define LDLM_RESOURCE_ADDREF(res) do {				  \
+	lu_ref_add_atomic(&(res)->lr_reference, __FUNCTION__, current);  \
+} while (0)
+
+#define LDLM_RESOURCE_DELREF(res) do {				  \
+	lu_ref_del(&(res)->lr_reference, __FUNCTION__, current);  \
+} while (0)
+
+/* ldlm_request.c */
+int ldlm_expired_completion_wait(void *data);
+/** \defgroup ldlm_local_ast Default AST handlers for local locks
+ * These AST handlers are typically used for server-side local locks and are
+ * also used by client-side lock handlers to perform minimum level base
+ * processing.
+ * @{ */
+int ldlm_blocking_ast_nocheck(struct ldlm_lock *lock);
+int ldlm_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc,
+		      void *data, int flag);
+int ldlm_glimpse_ast(struct ldlm_lock *lock, void *reqp);
+int ldlm_completion_ast_async(struct ldlm_lock *lock, __u64 flags, void *data);
+int ldlm_completion_ast(struct ldlm_lock *lock, __u64 flags, void *data);
+/** @} ldlm_local_ast */
+
+/** \defgroup ldlm_cli_api API to operate on locks from actual LDLM users.
+ * These are typically used by client and server (*_local versions)
+ * to obtain and release locks.
+ * @{ */
+int ldlm_cli_enqueue(struct obd_export *exp, struct ptlrpc_request **reqp,
+		     struct ldlm_enqueue_info *einfo,
+		     const struct ldlm_res_id *res_id,
+		     ldlm_policy_data_t const *policy, __u64 *flags,
+		     void *lvb, __u32 lvb_len, enum lvb_type lvb_type,
+		     struct lustre_handle *lockh, int async);
+int ldlm_prep_enqueue_req(struct obd_export *exp,
+			  struct ptlrpc_request *req,
+			  struct list_head *cancels,
+			  int count);
+int ldlm_prep_elc_req(struct obd_export *exp,
+		      struct ptlrpc_request *req,
+		      int version, int opc, int canceloff,
+		      struct list_head *cancels, int count);
+
+struct ptlrpc_request *ldlm_enqueue_pack(struct obd_export *exp, int lvb_len);
+int ldlm_handle_enqueue0(struct ldlm_namespace *ns, struct ptlrpc_request *req,
+			 const struct ldlm_request *dlm_req,
+			 const struct ldlm_callback_suite *cbs);
+int ldlm_cli_enqueue_fini(struct obd_export *exp, struct ptlrpc_request *req,
+			  ldlm_type_t type, __u8 with_policy, ldlm_mode_t mode,
+			  __u64 *flags, void *lvb, __u32 lvb_len,
+			  struct lustre_handle *lockh, int rc);
+int ldlm_cli_enqueue_local(struct ldlm_namespace *ns,
+			   const struct ldlm_res_id *res_id,
+			   ldlm_type_t type, ldlm_policy_data_t *policy,
+			   ldlm_mode_t mode, __u64 *flags,
+			   ldlm_blocking_callback blocking,
+			   ldlm_completion_callback completion,
+			   ldlm_glimpse_callback glimpse,
+			   void *data, __u32 lvb_len, enum lvb_type lvb_type,
+			   const __u64 *client_cookie,
+			   struct lustre_handle *lockh);
+int ldlm_server_ast(struct lustre_handle *lockh, struct ldlm_lock_desc *new,
+		    void *data, __u32 data_len);
+int ldlm_cli_convert(struct lustre_handle *, int new_mode, __u32 *flags);
+int ldlm_cli_update_pool(struct ptlrpc_request *req);
+int ldlm_cli_cancel(struct lustre_handle *lockh,
+		    ldlm_cancel_flags_t cancel_flags);
+int ldlm_cli_cancel_unused(struct ldlm_namespace *, const struct ldlm_res_id *,
+			   ldlm_cancel_flags_t flags, void *opaque);
+int ldlm_cli_cancel_unused_resource(struct ldlm_namespace *ns,
+				    const struct ldlm_res_id *res_id,
+				    ldlm_policy_data_t *policy,
+				    ldlm_mode_t mode,
+				    ldlm_cancel_flags_t flags,
+				    void *opaque);
+int ldlm_cli_cancel_req(struct obd_export *exp, struct list_head *head,
+			int count, ldlm_cancel_flags_t flags);
+int ldlm_cancel_resource_local(struct ldlm_resource *res,
+			       struct list_head *cancels,
+			       ldlm_policy_data_t *policy,
+			       ldlm_mode_t mode, int lock_flags,
+			       ldlm_cancel_flags_t cancel_flags, void *opaque);
+int ldlm_cli_cancel_list_local(struct list_head *cancels, int count,
+			       ldlm_cancel_flags_t flags);
+int ldlm_cli_cancel_list(struct list_head *head, int count,
+			 struct ptlrpc_request *req, ldlm_cancel_flags_t flags);
+/** @} ldlm_cli_api */
+
+/* mds/handler.c */
+/* This has to be here because recursive inclusion sucks. */
+int intent_disposition(struct ldlm_reply *rep, int flag);
+void intent_set_disposition(struct ldlm_reply *rep, int flag);
+
+
+/* ioctls for trying requests */
+#define IOC_LDLM_TYPE		   'f'
+#define IOC_LDLM_MIN_NR		 40
+
+#define IOC_LDLM_TEST		   _IOWR('f', 40, long)
+#define IOC_LDLM_DUMP		   _IOWR('f', 41, long)
+#define IOC_LDLM_REGRESS_START	  _IOWR('f', 42, long)
+#define IOC_LDLM_REGRESS_STOP	   _IOWR('f', 43, long)
+#define IOC_LDLM_MAX_NR		 43
+
+/**
+ * "Modes" of acquiring lock_res, necessary to tell lockdep that taking more
+ * than one lock_res is dead-lock safe.
+ */
+enum lock_res_type {
+	LRT_NORMAL,
+	LRT_NEW
+};
+
+/** Lock resource. */
+static inline void lock_res(struct ldlm_resource *res)
+{
+	spin_lock(&res->lr_lock);
+}
+
+/** Lock resource with a way to instruct lockdep code about nestedness-safe. */
+static inline void lock_res_nested(struct ldlm_resource *res,
+				   enum lock_res_type mode)
+{
+	spin_lock_nested(&res->lr_lock, mode);
+}
+
+/** Unlock resource. */
+static inline void unlock_res(struct ldlm_resource *res)
+{
+	spin_unlock(&res->lr_lock);
+}
+
+/** Check if resource is already locked, assert if not. */
+static inline void check_res_locked(struct ldlm_resource *res)
+{
+	LASSERT(spin_is_locked(&res->lr_lock));
+}
+
+struct ldlm_resource * lock_res_and_lock(struct ldlm_lock *lock);
+void unlock_res_and_lock(struct ldlm_lock *lock);
+
+/* ldlm_pool.c */
+/** \defgroup ldlm_pools Various LDLM pool related functions
+ * There are not used outside of ldlm.
+ * @{
+ */
+void ldlm_pools_recalc(ldlm_side_t client);
+int ldlm_pools_init(void);
+void ldlm_pools_fini(void);
+
+int ldlm_pool_init(struct ldlm_pool *pl, struct ldlm_namespace *ns,
+		   int idx, ldlm_side_t client);
+int ldlm_pool_shrink(struct ldlm_pool *pl, int nr,
+		     unsigned int gfp_mask);
+void ldlm_pool_fini(struct ldlm_pool *pl);
+int ldlm_pool_setup(struct ldlm_pool *pl, int limit);
+int ldlm_pool_recalc(struct ldlm_pool *pl);
+__u32 ldlm_pool_get_lvf(struct ldlm_pool *pl);
+__u64 ldlm_pool_get_slv(struct ldlm_pool *pl);
+__u64 ldlm_pool_get_clv(struct ldlm_pool *pl);
+__u32 ldlm_pool_get_limit(struct ldlm_pool *pl);
+void ldlm_pool_set_slv(struct ldlm_pool *pl, __u64 slv);
+void ldlm_pool_set_clv(struct ldlm_pool *pl, __u64 clv);
+void ldlm_pool_set_limit(struct ldlm_pool *pl, __u32 limit);
+void ldlm_pool_add(struct ldlm_pool *pl, struct ldlm_lock *lock);
+void ldlm_pool_del(struct ldlm_pool *pl, struct ldlm_lock *lock);
+/** @} */
+
+#endif
+/** @} LDLM */
diff --git a/drivers/staging/lustre/lustre/include/lustre_eacl.h b/drivers/staging/lustre/lustre/include/lustre_eacl.h
new file mode 100644
index 000000000000..b94f76a3301b
--- /dev/null
+++ b/drivers/staging/lustre/lustre/include/lustre_eacl.h
@@ -0,0 +1,95 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ */
+/*
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lustre/lustre/include/lustre_idmap.h
+ *
+ * MDS data structures.
+ * See also lustre_idl.h for wire formats of requests.
+ */
+
+#ifndef _LUSTRE_EACL_H
+#define _LUSTRE_EACL_H
+
+/** \defgroup eacl eacl
+ *
+ * @{
+ */
+
+#ifdef CONFIG_FS_POSIX_ACL
+
+#include <linux/posix_acl_xattr.h>
+
+typedef struct {
+	__u16		   e_tag;
+	__u16		   e_perm;
+	__u32		   e_id;
+	__u32		   e_stat;
+} ext_acl_xattr_entry;
+
+typedef struct {
+	__u32		   a_count;
+	ext_acl_xattr_entry     a_entries[0];
+} ext_acl_xattr_header;
+
+#define CFS_ACL_XATTR_SIZE(count, prefix) \
+	(sizeof(prefix ## _header) + (count) * sizeof(prefix ## _entry))
+
+#define CFS_ACL_XATTR_COUNT(size, prefix) \
+	(((size) - sizeof(prefix ## _header)) / sizeof(prefix ## _entry))
+
+
+extern ext_acl_xattr_header *
+lustre_posix_acl_xattr_2ext(posix_acl_xattr_header *header, int size);
+extern int
+lustre_posix_acl_xattr_filter(posix_acl_xattr_header *header, int size,
+			      posix_acl_xattr_header **out);
+extern void
+lustre_posix_acl_xattr_free(posix_acl_xattr_header *header, int size);
+extern void
+lustre_ext_acl_xattr_free(ext_acl_xattr_header *header);
+extern int
+lustre_acl_xattr_merge2posix(posix_acl_xattr_header *posix_header, int size,
+			     ext_acl_xattr_header *ext_header,
+			     posix_acl_xattr_header **out);
+extern ext_acl_xattr_header *
+lustre_acl_xattr_merge2ext(posix_acl_xattr_header *posix_header, int size,
+			   ext_acl_xattr_header *ext_header);
+
+#endif /* CONFIG_FS_POSIX_ACL */
+
+/** @} eacl */
+
+#endif
diff --git a/drivers/staging/lustre/lustre/include/lustre_export.h b/drivers/staging/lustre/lustre/include/lustre_export.h
new file mode 100644
index 000000000000..d61c020a4643
--- /dev/null
+++ b/drivers/staging/lustre/lustre/include/lustre_export.h
@@ -0,0 +1,389 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ */
+/** \defgroup obd_export PortalRPC export definitions
+ *
+ * @{
+ */
+
+#ifndef __EXPORT_H
+#define __EXPORT_H
+
+/** \defgroup export export
+ *
+ * @{
+ */
+
+#include <lprocfs_status.h>
+#include <lustre/lustre_idl.h>
+#include <lustre_dlm.h>
+
+struct mds_client_data;
+struct mdt_client_data;
+struct mds_idmap_table;
+struct mdt_idmap_table;
+
+/**
+ * Target-specific export data
+ */
+struct tg_export_data {
+	/** Protects led_lcd below */
+	struct mutex		ted_lcd_lock;
+	/** Per-client data for each export */
+	struct lsd_client_data	*ted_lcd;
+	/** Offset of record in last_rcvd file */
+	loff_t			ted_lr_off;
+	/** Client index in last_rcvd file */
+	int			ted_lr_idx;
+};
+
+/**
+ * MDT-specific export data
+ */
+struct mdt_export_data {
+	struct tg_export_data	med_ted;
+	/** List of all files opened by client on this MDT */
+	struct list_head		med_open_head;
+	spinlock_t		med_open_lock; /* med_open_head, mfd_list */
+	/** Bitmask of all ibit locks this MDT understands */
+	__u64			med_ibits_known;
+	struct mutex		med_idmap_mutex;
+	struct lustre_idmap_table *med_idmap;
+};
+
+struct ec_export_data { /* echo client */
+	struct list_head eced_locks;
+};
+
+/* In-memory access to client data from OST struct */
+/** Filter (oss-side) specific import data */
+struct filter_export_data {
+	struct tg_export_data	fed_ted;
+	spinlock_t		fed_lock;	/**< protects fed_mod_list */
+	long		       fed_dirty;    /* in bytes */
+	long		       fed_grant;    /* in bytes */
+	struct list_head		 fed_mod_list; /* files being modified */
+	int			fed_mod_count;/* items in fed_writing list */
+	long		       fed_pending;  /* bytes just being written */
+	__u32		      fed_group;
+	__u8		       fed_pagesize; /* log2 of client page size */
+};
+
+struct mgs_export_data {
+	struct list_head		med_clients;	/* mgc fs client via this exp */
+	spinlock_t		med_lock;	/* protect med_clients */
+};
+
+/**
+ * per-NID statistics structure.
+ * It tracks access patterns to this export on a per-client-NID basis
+ */
+struct nid_stat {
+	lnet_nid_t	       nid;
+	struct hlist_node	 nid_hash;
+	struct list_head	       nid_list;
+	struct obd_device       *nid_obd;
+	struct proc_dir_entry   *nid_proc;
+	struct lprocfs_stats    *nid_stats;
+	struct lprocfs_stats    *nid_ldlm_stats;
+	atomic_t	     nid_exp_ref_count; /* for obd_nid_stats_hash
+							   exp_nid_stats */
+};
+
+#define nidstat_getref(nidstat)						\
+do {									   \
+	atomic_inc(&(nidstat)->nid_exp_ref_count);			 \
+} while(0)
+
+#define nidstat_putref(nidstat)						\
+do {									   \
+	atomic_dec(&(nidstat)->nid_exp_ref_count);			 \
+	LASSERTF(atomic_read(&(nidstat)->nid_exp_ref_count) >= 0,	  \
+		 "stat %p nid_exp_ref_count < 0\n", nidstat);		  \
+} while(0)
+
+enum obd_option {
+	OBD_OPT_FORCE =	 0x0001,
+	OBD_OPT_FAILOVER =      0x0002,
+	OBD_OPT_ABORT_RECOV =   0x0004,
+};
+
+/**
+ * Export structure. Represents target-side of connection in portals.
+ * Also used in Lustre to connect between layers on the same node when
+ * there is no network-connection in-between.
+ * For every connected client there is an export structure on the server
+ * attached to the same obd device.
+ */
+struct obd_export {
+	/**
+	 * Export handle, it's id is provided to client on connect
+	 * Subsequent client RPCs contain this handle id to identify
+	 * what export they are talking to.
+	 */
+	struct portals_handle     exp_handle;
+	atomic_t	      exp_refcount;
+	/**
+	 * Set of counters below is to track where export references are
+	 * kept. The exp_rpc_count is used for reconnect handling also,
+	 * the cb_count and locks_count are for debug purposes only for now.
+	 * The sum of them should be less than exp_refcount by 3
+	 */
+	atomic_t	      exp_rpc_count; /* RPC references */
+	atomic_t	      exp_cb_count; /* Commit callback references */
+	/** Number of queued replay requests to be processes */
+	atomic_t		  exp_replay_count;
+	atomic_t	      exp_locks_count; /** Lock references */
+#if LUSTRE_TRACKS_LOCK_EXP_REFS
+	struct list_head		exp_locks_list;
+	spinlock_t		  exp_locks_list_guard;
+#endif
+	/** UUID of client connected to this export */
+	struct obd_uuid	   exp_client_uuid;
+	/** To link all exports on an obd device */
+	struct list_head		exp_obd_chain;
+	struct hlist_node	  exp_uuid_hash; /** uuid-export hash*/
+	struct hlist_node	  exp_nid_hash; /** nid-export hash */
+	/**
+	 * All exports eligible for ping evictor are linked into a list
+	 * through this field in "most time since last request on this export"
+	 * order
+	 * protected by obd_dev_lock
+	 */
+	struct list_head		exp_obd_chain_timed;
+	/** Obd device of this export */
+	struct obd_device	*exp_obd;
+	/**
+	 * "reverse" import to send requests (e.g. from ldlm) back to client
+	 * exp_lock protect its change
+	 */
+	struct obd_import	*exp_imp_reverse;
+	struct nid_stat	  *exp_nid_stats;
+	struct lprocfs_stats     *exp_md_stats;
+	/** Active connetion */
+	struct ptlrpc_connection *exp_connection;
+	/** Connection count value from last succesful reconnect rpc */
+	__u32		     exp_conn_cnt;
+	/** Hash list of all ldlm locks granted on this export */
+	cfs_hash_t	       *exp_lock_hash;
+	/**
+	 * Hash list for Posix lock deadlock detection, added with
+	 * ldlm_lock::l_exp_flock_hash.
+	 */
+	cfs_hash_t	       *exp_flock_hash;
+	struct list_head		exp_outstanding_replies;
+	struct list_head		exp_uncommitted_replies;
+	spinlock_t		  exp_uncommitted_replies_lock;
+	/** Last committed transno for this export */
+	__u64		     exp_last_committed;
+	/** When was last request received */
+	cfs_time_t		exp_last_request_time;
+	/** On replay all requests waiting for replay are linked here */
+	struct list_head		exp_req_replay_queue;
+	/**
+	 * protects exp_flags, exp_outstanding_replies and the change
+	 * of exp_imp_reverse
+	 */
+	spinlock_t		  exp_lock;
+	/** Compatibility flags for this export are embedded into
+	 *  exp_connect_data */
+	struct obd_connect_data   exp_connect_data;
+	enum obd_option	   exp_flags;
+	unsigned long	     exp_failed:1,
+				  exp_in_recovery:1,
+				  exp_disconnected:1,
+				  exp_connecting:1,
+				  /** VBR: export missed recovery */
+				  exp_delayed:1,
+				  /** VBR: failed version checking */
+				  exp_vbr_failed:1,
+				  exp_req_replay_needed:1,
+				  exp_lock_replay_needed:1,
+				  exp_need_sync:1,
+				  exp_flvr_changed:1,
+				  exp_flvr_adapt:1,
+				  exp_libclient:1, /* liblustre client? */
+				  /* client timed out and tried to reconnect,
+				   * but couldn't because of active rpcs */
+				  exp_abort_active_req:1,
+				  /* if to swap nidtbl entries for 2.2 clients.
+				   * Only used by the MGS to fix LU-1644. */
+				  exp_need_mne_swab:1;
+	/* also protected by exp_lock */
+	enum lustre_sec_part      exp_sp_peer;
+	struct sptlrpc_flavor     exp_flvr;	     /* current */
+	struct sptlrpc_flavor     exp_flvr_old[2];      /* about-to-expire */
+	cfs_time_t		exp_flvr_expire[2];   /* seconds */
+
+	/** protects exp_hp_rpcs */
+	spinlock_t		  exp_rpc_lock;
+	struct list_head		  exp_hp_rpcs;	/* (potential) HP RPCs */
+
+	/** blocking dlm lock list, protected by exp_bl_list_lock */
+	struct list_head		exp_bl_list;
+	spinlock_t		  exp_bl_list_lock;
+
+	/** Target specific data */
+	union {
+		struct tg_export_data     eu_target_data;
+		struct mdt_export_data    eu_mdt_data;
+		struct filter_export_data eu_filter_data;
+		struct ec_export_data     eu_ec_data;
+		struct mgs_export_data    eu_mgs_data;
+	} u;
+};
+
+#define exp_target_data u.eu_target_data
+#define exp_mdt_data    u.eu_mdt_data
+#define exp_filter_data u.eu_filter_data
+#define exp_ec_data     u.eu_ec_data
+
+static inline __u64 *exp_connect_flags_ptr(struct obd_export *exp)
+{
+	return &exp->exp_connect_data.ocd_connect_flags;
+}
+
+static inline __u64 exp_connect_flags(struct obd_export *exp)
+{
+	return *exp_connect_flags_ptr(exp);
+}
+
+static inline int exp_max_brw_size(struct obd_export *exp)
+{
+	LASSERT(exp != NULL);
+	if (exp_connect_flags(exp) & OBD_CONNECT_BRW_SIZE)
+		return exp->exp_connect_data.ocd_brw_size;
+
+	return ONE_MB_BRW_SIZE;
+}
+
+static inline int exp_connect_multibulk(struct obd_export *exp)
+{
+	return exp_max_brw_size(exp) > ONE_MB_BRW_SIZE;
+}
+
+static inline int exp_expired(struct obd_export *exp, cfs_duration_t age)
+{
+	LASSERT(exp->exp_delayed);
+	return cfs_time_before(cfs_time_add(exp->exp_last_request_time, age),
+			       cfs_time_current_sec());
+}
+
+static inline int exp_connect_cancelset(struct obd_export *exp)
+{
+	LASSERT(exp != NULL);
+	return !!(exp_connect_flags(exp) & OBD_CONNECT_CANCELSET);
+}
+
+static inline int exp_connect_lru_resize(struct obd_export *exp)
+{
+	LASSERT(exp != NULL);
+	return !!(exp_connect_flags(exp) & OBD_CONNECT_LRU_RESIZE);
+}
+
+static inline int exp_connect_rmtclient(struct obd_export *exp)
+{
+	LASSERT(exp != NULL);
+	return !!(exp_connect_flags(exp) & OBD_CONNECT_RMT_CLIENT);
+}
+
+static inline int client_is_remote(struct obd_export *exp)
+{
+	struct obd_import *imp = class_exp2cliimp(exp);
+
+	return !!(imp->imp_connect_data.ocd_connect_flags &
+		  OBD_CONNECT_RMT_CLIENT);
+}
+
+static inline int exp_connect_vbr(struct obd_export *exp)
+{
+	LASSERT(exp != NULL);
+	LASSERT(exp->exp_connection);
+	return !!(exp_connect_flags(exp) & OBD_CONNECT_VBR);
+}
+
+static inline int exp_connect_som(struct obd_export *exp)
+{
+	LASSERT(exp != NULL);
+	return !!(exp_connect_flags(exp) & OBD_CONNECT_SOM);
+}
+
+static inline int exp_connect_umask(struct obd_export *exp)
+{
+	return !!(exp_connect_flags(exp) & OBD_CONNECT_UMASK);
+}
+
+static inline int imp_connect_lru_resize(struct obd_import *imp)
+{
+	struct obd_connect_data *ocd;
+
+	LASSERT(imp != NULL);
+	ocd = &imp->imp_connect_data;
+	return !!(ocd->ocd_connect_flags & OBD_CONNECT_LRU_RESIZE);
+}
+
+static inline int exp_connect_layout(struct obd_export *exp)
+{
+	return !!(exp_connect_flags(exp) & OBD_CONNECT_LAYOUTLOCK);
+}
+
+static inline bool exp_connect_lvb_type(struct obd_export *exp)
+{
+	LASSERT(exp != NULL);
+	if (exp_connect_flags(exp) & OBD_CONNECT_LVB_TYPE)
+		return true;
+	else
+		return false;
+}
+
+static inline bool imp_connect_lvb_type(struct obd_import *imp)
+{
+	struct obd_connect_data *ocd;
+
+	LASSERT(imp != NULL);
+	ocd = &imp->imp_connect_data;
+	if (ocd->ocd_connect_flags & OBD_CONNECT_LVB_TYPE)
+		return true;
+	else
+		return false;
+}
+
+extern struct obd_export *class_conn2export(struct lustre_handle *conn);
+extern struct obd_device *class_conn2obd(struct lustre_handle *conn);
+
+/** @} export */
+
+#endif /* __EXPORT_H */
+/** @} obd_export */
diff --git a/drivers/staging/lustre/lustre/include/lustre_fid.h b/drivers/staging/lustre/lustre/include/lustre_fid.h
new file mode 100644
index 000000000000..7d20cba07287
--- /dev/null
+++ b/drivers/staging/lustre/lustre/include/lustre_fid.h
@@ -0,0 +1,762 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lustre/include/lustre_fid.h
+ *
+ * Author: Yury Umanets <umka@clusterfs.com>
+ */
+
+#ifndef __LINUX_FID_H
+#define __LINUX_FID_H
+
+/** \defgroup fid fid
+ *
+ * @{
+ *
+ * http://wiki.lustre.org/index.php/Architecture_-_Interoperability_fids_zfs
+ * describes the FID namespace and interoperability requirements for FIDs.
+ * The important parts of that document are included here for reference.
+ *
+ * FID
+ *   File IDentifier generated by client from range allocated by the SEQuence
+ *   service and stored in struct lu_fid. The FID is composed of three parts:
+ *   SEQuence, ObjectID, and VERsion.  The SEQ component is a filesystem
+ *   unique 64-bit integer, and only one client is ever assigned any SEQ value.
+ *   The first 0x400 FID_SEQ_NORMAL [2^33, 2^33 + 0x400] values are reserved
+ *   for system use.  The OID component is a 32-bit value generated by the
+ *   client on a per-SEQ basis to allow creating many unique FIDs without
+ *   communication with the server.  The VER component is a 32-bit value that
+ *   distinguishes between different FID instantiations, such as snapshots or
+ *   separate subtrees within the filesystem.  FIDs with the same VER field
+ *   are considered part of the same namespace.
+ *
+ * OLD filesystems are those upgraded from Lustre 1.x that predate FIDs, and
+ *   MDTs use 32-bit ldiskfs internal inode/generation numbers (IGIFs), while
+ *   OSTs use 64-bit Lustre object IDs and generation numbers.
+ *
+ * NEW filesystems are those formatted since the introduction of FIDs.
+ *
+ * IGIF
+ *   Inode and Generation In FID, a surrogate FID used to globally identify
+ *   an existing object on OLD formatted MDT file system. This would only be
+ *   used on MDT0 in a DNE filesystem, because there cannot be more than one
+ *   MDT in an OLD formatted filesystem. Belongs to sequence in [12, 2^32 - 1]
+ *   range, where inode number is stored in SEQ, and inode generation is in OID.
+ *   NOTE: This assumes no more than 2^32-1 inodes exist in the MDT filesystem,
+ *   which is the maximum possible for an ldiskfs backend.  It also assumes
+ *   that the reserved ext3/ext4/ldiskfs inode numbers [0-11] are never visible
+ *   to clients, which has always been true.
+ *
+ * IDIF
+ *   object ID In FID, a surrogate FID used to globally identify an existing
+ *   OST object on OLD formatted OST file system. Belongs to a sequence in
+ *   [2^32, 2^33 - 1]. Sequence number is calculated as:
+ *
+ *      1 << 32 | (ost_index << 16) | ((objid >> 32) & 0xffff)
+ *
+ *   that is, SEQ consists of 16-bit OST index, and higher 16 bits of object
+ *   ID. The generation of unique SEQ values per OST allows the IDIF FIDs to
+ *   be identified in the FLD correctly. The OID field is calculated as:
+ *
+ *      objid & 0xffffffff
+ *
+ *   that is, it consists of lower 32 bits of object ID.  For objects within
+ *   the IDIF range, object ID extraction will be:
+ *
+ *      o_id = (fid->f_seq & 0x7fff) << 16 | fid->f_oid;
+ *      o_seq = 0;  // formerly group number
+ *
+ *   NOTE: This assumes that no more than 2^48-1 objects have ever been created
+ *   on any OST, and that no more than 65535 OSTs are in use.  Both are very
+ *   reasonable assumptions, i.e. an IDIF can uniquely map all objects assuming
+ *   a maximum creation rate of 1M objects per second for a maximum of 9 years,
+ *   or combinations thereof.
+ *
+ * OST_MDT0
+ *   Surrogate FID used to identify an existing object on OLD formatted OST
+ *   filesystem. Belongs to the reserved SEQuence 0, and is used prior to
+ *   the introduction of FID-on-OST, at which point IDIF will be used to
+ *   identify objects as residing on a specific OST.
+ *
+ * LLOG
+ *   For Lustre Log objects the object sequence 1 is used. This is compatible
+ *   with both OLD and NEW namespaces, as this SEQ number is in the
+ *   ext3/ldiskfs reserved inode range and does not conflict with IGIF
+ *   sequence numbers.
+ *
+ * ECHO
+ *   For testing OST IO performance the object sequence 2 is used. This is
+ *   compatible with both OLD and NEW namespaces, as this SEQ number is in
+ *   the ext3/ldiskfs reserved inode range and does not conflict with IGIF
+ *   sequence numbers.
+ *
+ * OST_MDT1 .. OST_MAX
+ *   For testing with multiple MDTs the object sequence 3 through 9 is used,
+ *   allowing direct mapping of MDTs 1 through 7 respectively, for a total
+ *   of 8 MDTs including OST_MDT0. This matches the legacy CMD project "group"
+ *   mappings. However, this SEQ range is only for testing prior to any
+ *   production DNE release, as the objects in this range conflict across all
+ *   OSTs, as the OST index is not part of the FID.  For production DNE usage,
+ *   OST objects created by MDT1+ will use FID_SEQ_NORMAL FIDs.
+ *
+ * DLM OST objid to IDIF mapping
+ *   For compatibility with existing OLD OST network protocol structures, the
+ *   FID must map onto the o_id and o_seq in a manner that ensures existing
+ *   objects are identified consistently for IO, as well as onto the LDLM
+ *   namespace to ensure IDIFs there is only a single resource name for any
+ *   object in the DLM.  The OLD OST object DLM resource mapping is:
+ *
+ *      resource[] = {o_id, o_seq, 0, 0}; // o_seq == 0 for production releases
+ *
+ *   The NEW OST object DLM resource mapping is the same for both MDT and OST:
+ *
+ *      resource[] = {SEQ, OID, VER, HASH};
+ *
+ *  NOTE: for mapping IDIF values to DLM resource names the o_id may be
+ *  larger than the 2^33 reserved sequence numbers for IDIF, so it is possible
+ *  for the o_id numbers to overlap FID SEQ numbers in the resource. However,
+ *  in all production releases the OLD o_seq field is always zero, and all
+ *  valid FID OID values are non-zero, so the lock resources will not collide.
+ *  Even so, the MDT and OST resources are also in different LDLM namespaces.
+ */
+
+#include <linux/libcfs/libcfs.h>
+#include <lustre/lustre_idl.h>
+#include <lustre_req_layout.h>
+#include <lustre_mdt.h>
+#include <obd.h>
+
+
+struct lu_site;
+struct lu_context;
+
+/* Whole sequences space range and zero range definitions */
+extern const struct lu_seq_range LUSTRE_SEQ_SPACE_RANGE;
+extern const struct lu_seq_range LUSTRE_SEQ_ZERO_RANGE;
+extern const struct lu_fid LUSTRE_BFL_FID;
+extern const struct lu_fid LU_OBF_FID;
+extern const struct lu_fid LU_DOT_LUSTRE_FID;
+
+enum {
+	/*
+	 * This is how may metadata FIDs may be allocated in one sequence(128k)
+	 */
+	LUSTRE_METADATA_SEQ_MAX_WIDTH = 0x0000000000020000ULL,
+
+	/*
+	 * This is how many data FIDs could be allocated in one sequence(4B - 1)
+	 */
+	LUSTRE_DATA_SEQ_MAX_WIDTH = 0x00000000FFFFFFFFULL,
+
+	/*
+	 * How many sequences to allocate to a client at once.
+	 */
+	LUSTRE_SEQ_META_WIDTH = 0x0000000000000001ULL,
+
+	/*
+	 * seq allocation pool size.
+	 */
+	LUSTRE_SEQ_BATCH_WIDTH = LUSTRE_SEQ_META_WIDTH * 1000,
+
+	/*
+	 * This is how many sequences may be in one super-sequence allocated to
+	 * MDTs.
+	 */
+	LUSTRE_SEQ_SUPER_WIDTH = ((1ULL << 30ULL) * LUSTRE_SEQ_META_WIDTH)
+};
+
+enum {
+	/** 2^6 FIDs for OI containers */
+	OSD_OI_FID_OID_BITS     = 6,
+	/** reserve enough FIDs in case we want more in the future */
+	OSD_OI_FID_OID_BITS_MAX = 10,
+};
+
+/** special OID for local objects */
+enum local_oid {
+	/** \see fld_mod_init */
+	FLD_INDEX_OID		= 3UL,
+	/** \see fid_mod_init */
+	FID_SEQ_CTL_OID		= 4UL,
+	FID_SEQ_SRV_OID		= 5UL,
+	/** \see mdd_mod_init */
+	MDD_ROOT_INDEX_OID	= 6UL, /* deprecated in 2.4 */
+	MDD_ORPHAN_OID		= 7UL, /* deprecated in 2.4 */
+	MDD_LOV_OBJ_OID		= 8UL,
+	MDD_CAPA_KEYS_OID	= 9UL,
+	/** \see mdt_mod_init */
+	LAST_RECV_OID		= 11UL,
+	OSD_FS_ROOT_OID		= 13UL,
+	ACCT_USER_OID		= 15UL,
+	ACCT_GROUP_OID		= 16UL,
+	LFSCK_BOOKMARK_OID	= 17UL,
+	OTABLE_IT_OID		= 18UL,
+	/* These two definitions are obsolete
+	 * OFD_GROUP0_LAST_OID     = 20UL,
+	 * OFD_GROUP4K_LAST_OID    = 20UL+4096,
+	 */
+	OFD_LAST_GROUP_OID	= 4117UL,
+	LLOG_CATALOGS_OID	= 4118UL,
+	MGS_CONFIGS_OID		= 4119UL,
+	OFD_HEALTH_CHECK_OID	= 4120UL,
+	MDD_LOV_OBJ_OSEQ	= 4121UL,
+	LFSCK_NAMESPACE_OID     = 4122UL,
+	REMOTE_PARENT_DIR_OID	= 4123UL,
+};
+
+static inline void lu_local_obj_fid(struct lu_fid *fid, __u32 oid)
+{
+	fid->f_seq = FID_SEQ_LOCAL_FILE;
+	fid->f_oid = oid;
+	fid->f_ver = 0;
+}
+
+static inline void lu_local_name_obj_fid(struct lu_fid *fid, __u32 oid)
+{
+	fid->f_seq = FID_SEQ_LOCAL_NAME;
+	fid->f_oid = oid;
+	fid->f_ver = 0;
+}
+
+/* For new FS (>= 2.4), the root FID will be changed to
+ * [FID_SEQ_ROOT:1:0], for existing FS, (upgraded to 2.4),
+ * the root FID will still be IGIF */
+static inline int fid_is_root(const struct lu_fid *fid)
+{
+	return unlikely((fid_seq(fid) == FID_SEQ_ROOT &&
+			 fid_oid(fid) == 1));
+}
+
+static inline int fid_is_dot_lustre(const struct lu_fid *fid)
+{
+	return unlikely(fid_seq(fid) == FID_SEQ_DOT_LUSTRE &&
+			fid_oid(fid) == FID_OID_DOT_LUSTRE);
+}
+
+static inline int fid_is_obf(const struct lu_fid *fid)
+{
+	return unlikely(fid_seq(fid) == FID_SEQ_DOT_LUSTRE &&
+			fid_oid(fid) == FID_OID_DOT_LUSTRE_OBF);
+}
+
+static inline int fid_is_otable_it(const struct lu_fid *fid)
+{
+	return unlikely(fid_seq(fid) == FID_SEQ_LOCAL_FILE &&
+			fid_oid(fid) == OTABLE_IT_OID);
+}
+
+static inline int fid_is_acct(const struct lu_fid *fid)
+{
+	return fid_seq(fid) == FID_SEQ_LOCAL_FILE &&
+	       (fid_oid(fid) == ACCT_USER_OID ||
+		fid_oid(fid) == ACCT_GROUP_OID);
+}
+
+static inline int fid_is_quota(const struct lu_fid *fid)
+{
+	return fid_seq(fid) == FID_SEQ_QUOTA ||
+	       fid_seq(fid) == FID_SEQ_QUOTA_GLB;
+}
+
+static inline int fid_is_namespace_visible(const struct lu_fid *fid)
+{
+	const __u64 seq = fid_seq(fid);
+
+	/* Here, we cannot distinguish whether the normal FID is for OST
+	 * object or not. It is caller's duty to check more if needed. */
+	return (!fid_is_last_id(fid) &&
+		(fid_seq_is_norm(seq) || fid_seq_is_igif(seq))) ||
+	       fid_is_root(fid) || fid_is_dot_lustre(fid);
+}
+
+static inline int fid_seq_in_fldb(__u64 seq)
+{
+	return fid_seq_is_igif(seq) || fid_seq_is_norm(seq) ||
+	       fid_seq_is_root(seq) || fid_seq_is_dot(seq);
+}
+
+static inline void lu_last_id_fid(struct lu_fid *fid, __u64 seq)
+{
+	if (fid_seq_is_mdt0(seq)) {
+		fid->f_seq = fid_idif_seq(0, 0);
+	} else {
+		LASSERTF(fid_seq_is_norm(seq) || fid_seq_is_echo(seq) ||
+			 fid_seq_is_idif(seq), LPX64"\n", seq);
+		fid->f_seq = seq;
+	}
+	fid->f_oid = 0;
+	fid->f_ver = 0;
+}
+
+enum lu_mgr_type {
+	LUSTRE_SEQ_SERVER,
+	LUSTRE_SEQ_CONTROLLER
+};
+
+struct lu_server_seq;
+
+/* Client sequence manager interface. */
+struct lu_client_seq {
+	/* Sequence-controller export. */
+	struct obd_export      *lcs_exp;
+	struct mutex		lcs_mutex;
+
+	/*
+	 * Range of allowed for allocation sequeces. When using lu_client_seq on
+	 * clients, this contains meta-sequence range. And for servers this
+	 * contains super-sequence range.
+	 */
+	struct lu_seq_range	 lcs_space;
+
+	/* Seq related proc */
+	proc_dir_entry_t   *lcs_proc_dir;
+
+	/* This holds last allocated fid in last obtained seq */
+	struct lu_fid	   lcs_fid;
+
+	/* LUSTRE_SEQ_METADATA or LUSTRE_SEQ_DATA */
+	enum lu_cli_type	lcs_type;
+
+	/*
+	 * Service uuid, passed from MDT + seq name to form unique seq name to
+	 * use it with procfs.
+	 */
+	char		    lcs_name[80];
+
+	/*
+	 * Sequence width, that is how many objects may be allocated in one
+	 * sequence. Default value for it is LUSTRE_SEQ_MAX_WIDTH.
+	 */
+	__u64		   lcs_width;
+
+	/* Seq-server for direct talking */
+	struct lu_server_seq   *lcs_srv;
+
+	/* wait queue for fid allocation and update indicator */
+	wait_queue_head_t	     lcs_waitq;
+	int		     lcs_update;
+};
+
+/* server sequence manager interface */
+struct lu_server_seq {
+	/* Available sequences space */
+	struct lu_seq_range	 lss_space;
+
+	/* keeps highwater in lsr_end for seq allocation algorithm */
+	struct lu_seq_range	 lss_lowater_set;
+	struct lu_seq_range	 lss_hiwater_set;
+
+	/*
+	 * Device for server side seq manager needs (saving sequences to backing
+	 * store).
+	 */
+	struct dt_device       *lss_dev;
+
+	/* /seq file object device */
+	struct dt_object       *lss_obj;
+
+	/* Seq related proc */
+	proc_dir_entry_t   *lss_proc_dir;
+
+	/* LUSTRE_SEQ_SERVER or LUSTRE_SEQ_CONTROLLER */
+	enum lu_mgr_type       lss_type;
+
+	/* Client interafce to request controller */
+	struct lu_client_seq   *lss_cli;
+
+	/* Mutex for protecting allocation */
+	struct mutex		lss_mutex;
+
+	/*
+	 * Service uuid, passed from MDT + seq name to form unique seq name to
+	 * use it with procfs.
+	 */
+	char		    lss_name[80];
+
+	/*
+	 * Allocation chunks for super and meta sequences. Default values are
+	 * LUSTRE_SEQ_SUPER_WIDTH and LUSTRE_SEQ_META_WIDTH.
+	 */
+	__u64		   lss_width;
+
+	/*
+	 * minimum lss_alloc_set size that should be allocated from
+	 * lss_space
+	 */
+	__u64		   lss_set_width;
+
+	/* sync is needed for update operation */
+	__u32		   lss_need_sync;
+
+	/**
+	 * Pointer to site object, required to access site fld.
+	 */
+	struct seq_server_site  *lss_site;
+};
+
+int seq_query(struct com_thread_info *info);
+int seq_handle(struct ptlrpc_request *req);
+
+/* Server methods */
+int seq_server_init(struct lu_server_seq *seq,
+		    struct dt_device *dev,
+		    const char *prefix,
+		    enum lu_mgr_type type,
+		    struct seq_server_site *ss,
+		    const struct lu_env *env);
+
+void seq_server_fini(struct lu_server_seq *seq,
+		     const struct lu_env *env);
+
+int seq_server_alloc_super(struct lu_server_seq *seq,
+			   struct lu_seq_range *out,
+			   const struct lu_env *env);
+
+int seq_server_alloc_meta(struct lu_server_seq *seq,
+			  struct lu_seq_range *out,
+			  const struct lu_env *env);
+
+int seq_server_set_cli(struct lu_server_seq *seq,
+		       struct lu_client_seq *cli,
+		       const struct lu_env *env);
+
+/* Client methods */
+int seq_client_init(struct lu_client_seq *seq,
+		    struct obd_export *exp,
+		    enum lu_cli_type type,
+		    const char *prefix,
+		    struct lu_server_seq *srv);
+
+void seq_client_fini(struct lu_client_seq *seq);
+
+void seq_client_flush(struct lu_client_seq *seq);
+
+int seq_client_alloc_fid(const struct lu_env *env, struct lu_client_seq *seq,
+			 struct lu_fid *fid);
+int seq_client_get_seq(const struct lu_env *env, struct lu_client_seq *seq,
+		       seqno_t *seqnr);
+int seq_site_fini(const struct lu_env *env, struct seq_server_site *ss);
+/* Fids common stuff */
+int fid_is_local(const struct lu_env *env,
+		 struct lu_site *site, const struct lu_fid *fid);
+
+int client_fid_init(struct obd_device *obd, struct obd_export *exp,
+		    enum lu_cli_type type);
+int client_fid_fini(struct obd_device *obd);
+
+/* fid locking */
+
+struct ldlm_namespace;
+
+/*
+ * Build (DLM) resource name from FID.
+ *
+ * NOTE: until Lustre 1.8.7/2.1.1 the fid_ver() was packed into name[2],
+ * but was moved into name[1] along with the OID to avoid consuming the
+ * renaming name[2,3] fields that need to be used for the quota identifier.
+ */
+static inline struct ldlm_res_id *
+fid_build_reg_res_name(const struct lu_fid *f,
+		       struct ldlm_res_id *name)
+{
+	memset(name, 0, sizeof *name);
+	name->name[LUSTRE_RES_ID_SEQ_OFF] = fid_seq(f);
+	name->name[LUSTRE_RES_ID_VER_OID_OFF] = fid_ver_oid(f);
+	return name;
+}
+
+/*
+ * Build (DLM) resource identifier from global quota FID and quota ID.
+ */
+static inline struct ldlm_res_id *
+fid_build_quota_resid(const struct lu_fid *glb_fid, union lquota_id *qid,
+		      struct ldlm_res_id *res)
+{
+	fid_build_reg_res_name(glb_fid, res);
+	res->name[LUSTRE_RES_ID_QUOTA_SEQ_OFF] = fid_seq(&qid->qid_fid);
+	res->name[LUSTRE_RES_ID_QUOTA_VER_OID_OFF] = fid_ver_oid(&qid->qid_fid);
+	return res;
+}
+
+/*
+ * Extract global FID and quota ID from resource name
+ */
+static inline void fid_extract_quota_resid(struct ldlm_res_id *res,
+					   struct lu_fid *glb_fid,
+					   union lquota_id *qid)
+{
+	glb_fid->f_seq = res->name[LUSTRE_RES_ID_SEQ_OFF];
+	glb_fid->f_oid = (__u32)res->name[LUSTRE_RES_ID_VER_OID_OFF];
+	glb_fid->f_ver = (__u32)(res->name[LUSTRE_RES_ID_VER_OID_OFF] >> 32);
+
+	qid->qid_fid.f_seq = res->name[LUSTRE_RES_ID_QUOTA_SEQ_OFF];
+	qid->qid_fid.f_oid = (__u32)res->name[LUSTRE_RES_ID_QUOTA_VER_OID_OFF];
+	qid->qid_fid.f_ver =
+		(__u32)(res->name[LUSTRE_RES_ID_QUOTA_VER_OID_OFF] >> 32);
+}
+
+/*
+ * Return true if resource is for object identified by fid.
+ */
+static inline int fid_res_name_eq(const struct lu_fid *f,
+				  const struct ldlm_res_id *name)
+{
+	return name->name[LUSTRE_RES_ID_SEQ_OFF] == fid_seq(f) &&
+	       name->name[LUSTRE_RES_ID_VER_OID_OFF] == fid_ver_oid(f);
+}
+
+/* reverse function of fid_build_reg_res_name() */
+static inline void fid_build_from_res_name(struct lu_fid *f,
+					   const struct ldlm_res_id *name)
+{
+	fid_zero(f);
+	f->f_seq = name->name[LUSTRE_RES_ID_SEQ_OFF];
+	f->f_oid = name->name[LUSTRE_RES_ID_VER_OID_OFF] & 0xffffffff;
+	f->f_ver = name->name[LUSTRE_RES_ID_VER_OID_OFF] >> 32;
+	LASSERT(fid_res_name_eq(f, name));
+}
+
+static inline struct ldlm_res_id *
+fid_build_pdo_res_name(const struct lu_fid *f,
+		       unsigned int hash,
+		       struct ldlm_res_id *name)
+{
+	fid_build_reg_res_name(f, name);
+	name->name[LUSTRE_RES_ID_HSH_OFF] = hash;
+	return name;
+}
+
+/**
+ * Build DLM resource name from object id & seq, which will be removed
+ * finally, when we replace ost_id with FID in data stack.
+ *
+ * Currently, resid from the old client, whose res[0] = object_id,
+ * res[1] = object_seq, is just oposite with Metatdata
+ * resid, where, res[0] = fid->f_seq, res[1] = fid->f_oid.
+ * To unifiy the resid identification, we will reverse the data
+ * resid to keep it same with Metadata resid, i.e.
+ *
+ * For resid from the old client,
+ *    res[0] = objid,  res[1] = 0, still keep the original order,
+ *    for compatiblity.
+ *
+ * For new resid
+ *    res will be built from normal FID directly, i.e. res[0] = f_seq,
+ *    res[1] = f_oid + f_ver.
+ */
+static inline void ostid_build_res_name(struct ost_id *oi,
+					struct ldlm_res_id *name)
+{
+	memset(name, 0, sizeof *name);
+	if (fid_seq_is_mdt0(ostid_seq(oi))) {
+		name->name[LUSTRE_RES_ID_SEQ_OFF] = ostid_id(oi);
+		name->name[LUSTRE_RES_ID_VER_OID_OFF] = ostid_seq(oi);
+	} else {
+		fid_build_reg_res_name((struct lu_fid *)oi, name);
+	}
+}
+
+static inline void ostid_res_name_to_id(struct ost_id *oi,
+					struct ldlm_res_id *name)
+{
+	if (fid_seq_is_mdt0(name->name[LUSTRE_RES_ID_SEQ_OFF])) {
+		/* old resid */
+		ostid_set_seq(oi, name->name[LUSTRE_RES_ID_VER_OID_OFF]);
+		ostid_set_id(oi, name->name[LUSTRE_RES_ID_SEQ_OFF]);
+	} else {
+		/* new resid */
+		fid_build_from_res_name((struct lu_fid *)oi, name);
+	}
+}
+
+/**
+ * Return true if the resource is for the object identified by this id & group.
+ */
+static inline int ostid_res_name_eq(struct ost_id *oi,
+				    struct ldlm_res_id *name)
+{
+	/* Note: it is just a trick here to save some effort, probably the
+	 * correct way would be turn them into the FID and compare */
+	if (fid_seq_is_mdt0(ostid_seq(oi))) {
+		return name->name[LUSTRE_RES_ID_SEQ_OFF] == ostid_id(oi) &&
+		       name->name[LUSTRE_RES_ID_VER_OID_OFF] == ostid_seq(oi);
+	} else {
+		return name->name[LUSTRE_RES_ID_SEQ_OFF] == ostid_seq(oi) &&
+		       name->name[LUSTRE_RES_ID_VER_OID_OFF] == ostid_id(oi);
+	}
+}
+
+/* The same as osc_build_res_name() */
+static inline void ost_fid_build_resid(const struct lu_fid *fid,
+				       struct ldlm_res_id *resname)
+{
+	if (fid_is_mdt0(fid) || fid_is_idif(fid)) {
+		struct ost_id oi;
+		oi.oi.oi_id = 0; /* gcc 4.7.2 complains otherwise */
+		if (fid_to_ostid(fid, &oi) != 0)
+			return;
+		ostid_build_res_name(&oi, resname);
+	} else {
+		fid_build_reg_res_name(fid, resname);
+	}
+}
+
+static inline void ost_fid_from_resid(struct lu_fid *fid,
+				      const struct ldlm_res_id *name)
+{
+	if (fid_seq_is_mdt0(name->name[LUSTRE_RES_ID_VER_OID_OFF])) {
+		/* old resid */
+		struct ost_id oi;
+		ostid_set_seq(&oi, name->name[LUSTRE_RES_ID_VER_OID_OFF]);
+		ostid_set_id(&oi, name->name[LUSTRE_RES_ID_SEQ_OFF]);
+		ostid_to_fid(fid, &oi, 0);
+	} else {
+		/* new resid */
+		fid_build_from_res_name(fid, name);
+	}
+}
+
+/**
+ * Flatten 128-bit FID values into a 64-bit value for use as an inode number.
+ * For non-IGIF FIDs this starts just over 2^32, and continues without
+ * conflict until 2^64, at which point we wrap the high 24 bits of the SEQ
+ * into the range where there may not be many OID values in use, to minimize
+ * the risk of conflict.
+ *
+ * Suppose LUSTRE_SEQ_MAX_WIDTH less than (1 << 24) which is currently true,
+ * the time between re-used inode numbers is very long - 2^40 SEQ numbers,
+ * or about 2^40 client mounts, if clients create less than 2^24 files/mount.
+ */
+static inline __u64 fid_flatten(const struct lu_fid *fid)
+{
+	__u64 ino;
+	__u64 seq;
+
+	if (fid_is_igif(fid)) {
+		ino = lu_igif_ino(fid);
+		RETURN(ino);
+	}
+
+	seq = fid_seq(fid);
+
+	ino = (seq << 24) + ((seq >> 24) & 0xffffff0000ULL) + fid_oid(fid);
+
+	RETURN(ino ? ino : fid_oid(fid));
+}
+
+static inline __u32 fid_hash(const struct lu_fid *f, int bits)
+{
+	/* all objects with same id and different versions will belong to same
+	 * collisions list. */
+	return cfs_hash_long(fid_flatten(f), bits);
+}
+
+/**
+ * map fid to 32 bit value for ino on 32bit systems. */
+static inline __u32 fid_flatten32(const struct lu_fid *fid)
+{
+	__u32 ino;
+	__u64 seq;
+
+	if (fid_is_igif(fid)) {
+		ino = lu_igif_ino(fid);
+		RETURN(ino);
+	}
+
+	seq = fid_seq(fid) - FID_SEQ_START;
+
+	/* Map the high bits of the OID into higher bits of the inode number so
+	 * that inodes generated at about the same time have a reduced chance
+	 * of collisions. This will give a period of 2^12 = 1024 unique clients
+	 * (from SEQ) and up to min(LUSTRE_SEQ_MAX_WIDTH, 2^20) = 128k objects
+	 * (from OID), or up to 128M inodes without collisions for new files. */
+	ino = ((seq & 0x000fffffULL) << 12) + ((seq >> 8) & 0xfffff000) +
+	       (seq >> (64 - (40-8)) & 0xffffff00) +
+	       (fid_oid(fid) & 0xff000fff) + ((fid_oid(fid) & 0x00fff000) << 8);
+
+	RETURN(ino ? ino : fid_oid(fid));
+}
+
+static inline int lu_fid_diff(struct lu_fid *fid1, struct lu_fid *fid2)
+{
+	LASSERTF(fid_seq(fid1) == fid_seq(fid2), "fid1:"DFID", fid2:"DFID"\n",
+		 PFID(fid1), PFID(fid2));
+
+	if (fid_is_idif(fid1) && fid_is_idif(fid2))
+		return fid_idif_id(fid1->f_seq, fid1->f_oid, fid1->f_ver) -
+		       fid_idif_id(fid2->f_seq, fid2->f_oid, fid2->f_ver);
+
+	return fid_oid(fid1) - fid_oid(fid2);
+}
+
+#define LUSTRE_SEQ_SRV_NAME "seq_srv"
+#define LUSTRE_SEQ_CTL_NAME "seq_ctl"
+
+/* Range common stuff */
+static inline void range_cpu_to_le(struct lu_seq_range *dst, const struct lu_seq_range *src)
+{
+	dst->lsr_start = cpu_to_le64(src->lsr_start);
+	dst->lsr_end = cpu_to_le64(src->lsr_end);
+	dst->lsr_index = cpu_to_le32(src->lsr_index);
+	dst->lsr_flags = cpu_to_le32(src->lsr_flags);
+}
+
+static inline void range_le_to_cpu(struct lu_seq_range *dst, const struct lu_seq_range *src)
+{
+	dst->lsr_start = le64_to_cpu(src->lsr_start);
+	dst->lsr_end = le64_to_cpu(src->lsr_end);
+	dst->lsr_index = le32_to_cpu(src->lsr_index);
+	dst->lsr_flags = le32_to_cpu(src->lsr_flags);
+}
+
+static inline void range_cpu_to_be(struct lu_seq_range *dst, const struct lu_seq_range *src)
+{
+	dst->lsr_start = cpu_to_be64(src->lsr_start);
+	dst->lsr_end = cpu_to_be64(src->lsr_end);
+	dst->lsr_index = cpu_to_be32(src->lsr_index);
+	dst->lsr_flags = cpu_to_be32(src->lsr_flags);
+}
+
+static inline void range_be_to_cpu(struct lu_seq_range *dst, const struct lu_seq_range *src)
+{
+	dst->lsr_start = be64_to_cpu(src->lsr_start);
+	dst->lsr_end = be64_to_cpu(src->lsr_end);
+	dst->lsr_index = be32_to_cpu(src->lsr_index);
+	dst->lsr_flags = be32_to_cpu(src->lsr_flags);
+}
+
+/** @} fid */
+
+#endif /* __LINUX_FID_H */
diff --git a/drivers/staging/lustre/lustre/include/lustre_fld.h b/drivers/staging/lustre/lustre/include/lustre_fld.h
new file mode 100644
index 000000000000..11e034a65b17
--- /dev/null
+++ b/drivers/staging/lustre/lustre/include/lustre_fld.h
@@ -0,0 +1,202 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2013, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ */
+
+#ifndef __LINUX_FLD_H
+#define __LINUX_FLD_H
+
+/** \defgroup fld fld
+ *
+ * @{
+ */
+
+#include <lustre/lustre_idl.h>
+#include <lustre_mdt.h>
+#include <dt_object.h>
+
+#include <linux/libcfs/libcfs.h>
+
+struct lu_client_fld;
+struct lu_server_fld;
+struct lu_fld_hash;
+struct fld_cache;
+
+extern const struct dt_index_features fld_index_features;
+extern const char fld_index_name[];
+
+/*
+ * FLD (Fid Location Database) interface.
+ */
+enum {
+	LUSTRE_CLI_FLD_HASH_DHT = 0,
+	LUSTRE_CLI_FLD_HASH_RRB
+};
+
+
+struct lu_fld_target {
+	struct list_head	       ft_chain;
+	struct obd_export       *ft_exp;
+	struct lu_server_fld    *ft_srv;
+	__u64		    ft_idx;
+};
+
+struct lu_server_fld {
+	/**
+	 * Fld dir proc entry. */
+	proc_dir_entry_t    *lsf_proc_dir;
+
+	/**
+	 * /fld file object device */
+	struct dt_object	*lsf_obj;
+
+	/**
+	 * super sequence controller export, needed to forward fld
+	 * lookup  request. */
+	struct obd_export       *lsf_control_exp;
+
+	/**
+	 * Client FLD cache. */
+	struct fld_cache	*lsf_cache;
+
+	/**
+	 * Protect index modifications */
+	struct mutex		lsf_lock;
+
+	/**
+	 * Fld service name in form "fld-srv-lustre-MDTXXX" */
+	char		     lsf_name[80];
+
+};
+
+struct lu_client_fld {
+	/**
+	 * Client side proc entry. */
+	proc_dir_entry_t    *lcf_proc_dir;
+
+	/**
+	 * List of exports client FLD knows about. */
+	struct list_head	       lcf_targets;
+
+	/**
+	 * Current hash to be used to chose an export. */
+	struct lu_fld_hash      *lcf_hash;
+
+	/**
+	 * Exports count. */
+	int		      lcf_count;
+
+	/**
+	 * Lock protecting exports list and fld_hash. */
+	spinlock_t		 lcf_lock;
+
+	/**
+	 * Client FLD cache. */
+	struct fld_cache	*lcf_cache;
+
+	/**
+	 * Client fld proc entry name. */
+	char		     lcf_name[80];
+
+	const struct lu_context *lcf_ctx;
+
+	int		      lcf_flags;
+};
+
+/**
+ * number of blocks to reserve for particular operations. Should be function of
+ * ... something. Stub for now.
+ */
+enum {
+	/* one insert operation can involve two delete and one insert */
+	FLD_TXN_INDEX_INSERT_CREDITS  = 60,
+	FLD_TXN_INDEX_DELETE_CREDITS  = 20,
+};
+
+int fld_query(struct com_thread_info *info);
+
+/* Server methods */
+int fld_server_init(const struct lu_env *env, struct lu_server_fld *fld,
+		    struct dt_device *dt, const char *prefix, int mds_node_id,
+		    int type);
+
+void fld_server_fini(const struct lu_env *env, struct lu_server_fld *fld);
+
+int fld_declare_server_create(const struct lu_env *env,
+			      struct lu_server_fld *fld,
+			      struct lu_seq_range *new,
+			      struct thandle *th);
+
+int fld_server_create(const struct lu_env *env,
+		      struct lu_server_fld *fld,
+		      struct lu_seq_range *add_range,
+		      struct thandle *th);
+
+int fld_insert_entry(const struct lu_env *env,
+		     struct lu_server_fld *fld,
+		     const struct lu_seq_range *range);
+
+int fld_server_lookup(const struct lu_env *env, struct lu_server_fld *fld,
+		      seqno_t seq, struct lu_seq_range *range);
+
+/* Client methods */
+int fld_client_init(struct lu_client_fld *fld,
+		    const char *prefix, int hash);
+
+void fld_client_fini(struct lu_client_fld *fld);
+
+void fld_client_flush(struct lu_client_fld *fld);
+
+int fld_client_lookup(struct lu_client_fld *fld, seqno_t seq, mdsno_t *mds,
+		      __u32 flags, const struct lu_env *env);
+
+int fld_client_create(struct lu_client_fld *fld,
+		      struct lu_seq_range *range,
+		      const struct lu_env *env);
+
+int fld_client_delete(struct lu_client_fld *fld,
+		      seqno_t seq,
+		      const struct lu_env *env);
+
+int fld_client_add_target(struct lu_client_fld *fld,
+			  struct lu_fld_target *tar);
+
+int fld_client_del_target(struct lu_client_fld *fld,
+			  __u64 idx);
+
+void fld_client_proc_fini(struct lu_client_fld *fld);
+
+/** @} fld */
+
+#endif
diff --git a/drivers/staging/lustre/lustre/include/lustre_fsfilt.h b/drivers/staging/lustre/lustre/include/lustre_fsfilt.h
new file mode 100644
index 000000000000..9dcc332cb2f3
--- /dev/null
+++ b/drivers/staging/lustre/lustre/include/lustre_fsfilt.h
@@ -0,0 +1,48 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lustre/include/lustre_fsfilt.h
+ *
+ * Filesystem interface helper.
+ */
+
+#ifndef _LUSTRE_FSFILT_H
+#define _LUSTRE_FSFILT_H
+
+#include <linux/lustre_fsfilt.h>
+
+#define LU221_BAD_TIME (0x80000000U + 24 * 3600)
+
+#endif
diff --git a/drivers/staging/lustre/lustre/include/lustre_ha.h b/drivers/staging/lustre/lustre/include/lustre_ha.h
new file mode 100644
index 000000000000..105f6d61eef0
--- /dev/null
+++ b/drivers/staging/lustre/lustre/include/lustre_ha.h
@@ -0,0 +1,67 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ */
+
+#ifndef _LUSTRE_HA_H
+#define _LUSTRE_HA_H
+
+/** \defgroup ha ha
+ *
+ * @{
+ */
+
+struct obd_import;
+struct obd_export;
+struct obd_device;
+struct ptlrpc_request;
+
+
+int ptlrpc_replay(struct obd_import *imp);
+int ptlrpc_resend(struct obd_import *imp);
+void ptlrpc_free_committed(struct obd_import *imp);
+void ptlrpc_wake_delayed(struct obd_import *imp);
+int ptlrpc_recover_import(struct obd_import *imp, char *new_uuid, int async);
+int ptlrpc_set_import_active(struct obd_import *imp, int active);
+void ptlrpc_activate_import(struct obd_import *imp);
+void ptlrpc_deactivate_import(struct obd_import *imp);
+void ptlrpc_invalidate_import(struct obd_import *imp);
+void ptlrpc_fail_import(struct obd_import *imp, __u32 conn_cnt);
+int ptlrpc_check_suspend(void);
+void ptlrpc_activate_timeouts(struct obd_import *imp);
+void ptlrpc_deactivate_timeouts(struct obd_import *imp);
+
+/** @} ha */
+
+#endif
diff --git a/drivers/staging/lustre/lustre/include/lustre_handles.h b/drivers/staging/lustre/lustre/include/lustre_handles.h
new file mode 100644
index 000000000000..fcd40f33426a
--- /dev/null
+++ b/drivers/staging/lustre/lustre/include/lustre_handles.h
@@ -0,0 +1,93 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ */
+
+#ifndef __LUSTRE_HANDLES_H_
+#define __LUSTRE_HANDLES_H_
+
+/** \defgroup handles handles
+ *
+ * @{
+ */
+
+#include <linux/lustre_handles.h>
+
+#include <linux/libcfs/libcfs.h>
+
+
+struct portals_handle_ops {
+	void (*hop_addref)(void *object);
+	void (*hop_free)(void *object, int size);
+};
+
+/* These handles are most easily used by having them appear at the very top of
+ * whatever object that you want to make handles for.  ie:
+ *
+ * struct ldlm_lock {
+ *	 struct portals_handle handle;
+ *	 ...
+ * };
+ *
+ * Now you're able to assign the results of cookie2handle directly to an
+ * ldlm_lock.  If it's not at the top, you'll want to use container_of()
+ * to compute the start of the structure based on the handle field. */
+struct portals_handle {
+	struct list_head			h_link;
+	__u64				h_cookie;
+	struct portals_handle_ops	*h_ops;
+
+	/* newly added fields to handle the RCU issue. -jxiong */
+	cfs_rcu_head_t			h_rcu;
+	spinlock_t			h_lock;
+	unsigned int			h_size:31;
+	unsigned int			h_in:1;
+};
+#define RCU2HANDLE(rcu)    container_of(rcu, struct portals_handle, h_rcu)
+
+/* handles.c */
+
+/* Add a handle to the hash table */
+void class_handle_hash(struct portals_handle *,
+		       struct portals_handle_ops *ops);
+void class_handle_unhash(struct portals_handle *);
+void class_handle_hash_back(struct portals_handle *);
+void *class_handle2object(__u64 cookie);
+void class_handle_free_cb(cfs_rcu_head_t *);
+int class_handle_init(void);
+void class_handle_cleanup(void);
+
+/** @} handles */
+
+#endif
diff --git a/drivers/staging/lustre/lustre/include/lustre_idmap.h b/drivers/staging/lustre/lustre/include/lustre_idmap.h
new file mode 100644
index 000000000000..084bdd6ab4db
--- /dev/null
+++ b/drivers/staging/lustre/lustre/include/lustre_idmap.h
@@ -0,0 +1,104 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lustre/lustre/include/lustre_idmap.h
+ *
+ * MDS data structures.
+ * See also lustre_idl.h for wire formats of requests.
+ */
+
+#ifndef _LUSTRE_IDMAP_H
+#define _LUSTRE_IDMAP_H
+
+/** \defgroup idmap idmap
+ *
+ * @{
+ */
+
+#include <linux/libcfs/libcfs.h>
+
+#define CFS_NGROUPS_PER_BLOCK   ((int)(PAGE_CACHE_SIZE / sizeof(gid_t)))
+
+#define CFS_GROUP_AT(gi, i) \
+	((gi)->blocks[(i) / CFS_NGROUPS_PER_BLOCK][(i) % CFS_NGROUPS_PER_BLOCK])
+
+enum {
+	CFS_IC_NOTHING     = 0,    /* convert nothing */
+	CFS_IC_ALL	 = 1,    /* convert all items */
+	CFS_IC_MAPPED      = 2,    /* convert mapped uid/gid */
+	CFS_IC_UNMAPPED    = 3     /* convert unmapped uid/gid */
+};
+
+#define  CFS_IDMAP_NOTFOUND     (-1)
+
+#define CFS_IDMAP_HASHSIZE      32
+
+enum lustre_idmap_idx {
+	RMT_UIDMAP_IDX,
+	LCL_UIDMAP_IDX,
+	RMT_GIDMAP_IDX,
+	LCL_GIDMAP_IDX,
+	CFS_IDMAP_N_HASHES
+};
+
+struct lustre_idmap_table {
+	spinlock_t	lit_lock;
+	struct list_head	lit_idmaps[CFS_IDMAP_N_HASHES][CFS_IDMAP_HASHSIZE];
+};
+
+struct lu_ucred;
+
+extern void lustre_groups_from_list(group_info_t *ginfo, gid_t *glist);
+extern void lustre_groups_sort(group_info_t *group_info);
+extern int lustre_in_group_p(struct lu_ucred *mu, gid_t grp);
+
+extern int lustre_idmap_add(struct lustre_idmap_table *t,
+			    uid_t ruid, uid_t luid,
+			    gid_t rgid, gid_t lgid);
+extern int lustre_idmap_del(struct lustre_idmap_table *t,
+			    uid_t ruid, uid_t luid,
+			    gid_t rgid, gid_t lgid);
+extern int lustre_idmap_lookup_uid(struct lu_ucred *mu,
+				   struct lustre_idmap_table *t,
+				   int reverse, uid_t uid);
+extern int lustre_idmap_lookup_gid(struct lu_ucred *mu,
+				   struct lustre_idmap_table *t,
+				   int reverse, gid_t gid);
+extern struct lustre_idmap_table *lustre_idmap_init(void);
+extern void lustre_idmap_fini(struct lustre_idmap_table *t);
+
+/** @} idmap */
+
+#endif
diff --git a/drivers/staging/lustre/lustre/include/lustre_import.h b/drivers/staging/lustre/lustre/include/lustre_import.h
new file mode 100644
index 000000000000..3a5dd6a94c08
--- /dev/null
+++ b/drivers/staging/lustre/lustre/include/lustre_import.h
@@ -0,0 +1,367 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ */
+/** \defgroup obd_import PtlRPC import definitions
+ * Imports are client-side representation of remote obd target.
+ *
+ * @{
+ */
+
+#ifndef __IMPORT_H
+#define __IMPORT_H
+
+/** \defgroup export export
+ *
+ * @{
+ */
+
+#include <lustre_handles.h>
+#include <lustre/lustre_idl.h>
+
+
+/**
+ * Adaptive Timeout stuff
+ *
+ * @{
+ */
+#define D_ADAPTTO D_OTHER
+#define AT_BINS 4		  /* "bin" means "N seconds of history" */
+#define AT_FLG_NOHIST 0x1	  /* use last reported value only */
+
+struct adaptive_timeout {
+	time_t		at_binstart;	 /* bin start time */
+	unsigned int	at_hist[AT_BINS];    /* timeout history bins */
+	unsigned int	at_flags;
+	unsigned int	at_current;	  /* current timeout value */
+	unsigned int	at_worst_ever;       /* worst-ever timeout value */
+	time_t		at_worst_time;       /* worst-ever timeout timestamp */
+	spinlock_t	at_lock;
+};
+
+struct ptlrpc_at_array {
+	struct list_head       *paa_reqs_array; /** array to hold requests */
+	__u32	     paa_size;       /** the size of array */
+	__u32	     paa_count;      /** the total count of reqs */
+	time_t	    paa_deadline;   /** the earliest deadline of reqs */
+	__u32	    *paa_reqs_count; /** the count of reqs in each entry */
+};
+
+#define IMP_AT_MAX_PORTALS 8
+struct imp_at {
+	int		     iat_portal[IMP_AT_MAX_PORTALS];
+	struct adaptive_timeout iat_net_latency;
+	struct adaptive_timeout iat_service_estimate[IMP_AT_MAX_PORTALS];
+};
+
+
+/** @} */
+
+/** Possible import states */
+enum lustre_imp_state {
+	LUSTRE_IMP_CLOSED     = 1,
+	LUSTRE_IMP_NEW	= 2,
+	LUSTRE_IMP_DISCON     = 3,
+	LUSTRE_IMP_CONNECTING = 4,
+	LUSTRE_IMP_REPLAY     = 5,
+	LUSTRE_IMP_REPLAY_LOCKS = 6,
+	LUSTRE_IMP_REPLAY_WAIT  = 7,
+	LUSTRE_IMP_RECOVER    = 8,
+	LUSTRE_IMP_FULL       = 9,
+	LUSTRE_IMP_EVICTED    = 10,
+};
+
+/** Returns test string representation of numeric import state \a state */
+static inline char * ptlrpc_import_state_name(enum lustre_imp_state state)
+{
+	static char* import_state_names[] = {
+		"<UNKNOWN>", "CLOSED",  "NEW", "DISCONN",
+		"CONNECTING", "REPLAY", "REPLAY_LOCKS", "REPLAY_WAIT",
+		"RECOVER", "FULL", "EVICTED",
+	};
+
+	LASSERT (state <= LUSTRE_IMP_EVICTED);
+	return import_state_names[state];
+}
+
+/**
+ * List of import event types
+ */
+enum obd_import_event {
+	IMP_EVENT_DISCON     = 0x808001,
+	IMP_EVENT_INACTIVE   = 0x808002,
+	IMP_EVENT_INVALIDATE = 0x808003,
+	IMP_EVENT_ACTIVE     = 0x808004,
+	IMP_EVENT_OCD	= 0x808005,
+	IMP_EVENT_DEACTIVATE = 0x808006,
+	IMP_EVENT_ACTIVATE   = 0x808007,
+};
+
+/**
+ * Definition of import connection structure
+ */
+struct obd_import_conn {
+	/** Item for linking connections together */
+	struct list_head		oic_item;
+	/** Pointer to actual PortalRPC connection */
+	struct ptlrpc_connection *oic_conn;
+	/** uuid of remote side */
+	struct obd_uuid	   oic_uuid;
+	/**
+	 * Time (64 bit jiffies) of last connection attempt on this connection
+	 */
+	__u64		     oic_last_attempt;
+};
+
+/* state history */
+#define IMP_STATE_HIST_LEN 16
+struct import_state_hist {
+	enum lustre_imp_state ish_state;
+	time_t		ish_time;
+};
+
+/**
+ * Defintion of PortalRPC import structure.
+ * Imports are representing client-side view to remote target.
+ */
+struct obd_import {
+	/** Local handle (== id) for this import. */
+	struct portals_handle     imp_handle;
+	/** Reference counter */
+	atomic_t	      imp_refcount;
+	struct lustre_handle      imp_dlm_handle; /* client's ldlm export */
+	/** Currently active connection */
+	struct ptlrpc_connection *imp_connection;
+	/** PortalRPC client structure for this import */
+	struct ptlrpc_client     *imp_client;
+	/** List element for linking into pinger chain */
+	struct list_head		imp_pinger_chain;
+	/** List element for linking into chain for destruction */
+	struct list_head		imp_zombie_chain;
+
+	/**
+	 * Lists of requests that are retained for replay, waiting for a reply,
+	 * or waiting for recovery to complete, respectively.
+	 * @{
+	 */
+	struct list_head		imp_replay_list;
+	struct list_head		imp_sending_list;
+	struct list_head		imp_delayed_list;
+	/** @} */
+
+	/** obd device for this import */
+	struct obd_device	*imp_obd;
+
+	/**
+	 * some seciruty-related fields
+	 * @{
+	 */
+	struct ptlrpc_sec	*imp_sec;
+	struct mutex		  imp_sec_mutex;
+	cfs_time_t		imp_sec_expire;
+	/** @} */
+
+	/** Wait queue for those who need to wait for recovery completion */
+	wait_queue_head_t	       imp_recovery_waitq;
+
+	/** Number of requests currently in-flight */
+	atomic_t	      imp_inflight;
+	/** Number of requests currently unregistering */
+	atomic_t	      imp_unregistering;
+	/** Number of replay requests inflight */
+	atomic_t	      imp_replay_inflight;
+	/** Number of currently happening import invalidations */
+	atomic_t	      imp_inval_count;
+	/** Numbner of request timeouts */
+	atomic_t	      imp_timeouts;
+	/** Current import state */
+	enum lustre_imp_state     imp_state;
+	/** History of import states */
+	struct import_state_hist  imp_state_hist[IMP_STATE_HIST_LEN];
+	int		       imp_state_hist_idx;
+	/** Current import generation. Incremented on every reconnect */
+	int		       imp_generation;
+	/** Incremented every time we send reconnection request */
+	__u32		     imp_conn_cnt;
+       /**
+	* \see ptlrpc_free_committed remembers imp_generation value here
+	* after a check to save on unnecessary replay list iterations
+	*/
+	int		       imp_last_generation_checked;
+	/** Last tranno we replayed */
+	__u64		     imp_last_replay_transno;
+	/** Last transno committed on remote side */
+	__u64		     imp_peer_committed_transno;
+	/**
+	 * \see ptlrpc_free_committed remembers last_transno since its last
+	 * check here and if last_transno did not change since last run of
+	 * ptlrpc_free_committed and import generation is the same, we can
+	 * skip looking for requests to remove from replay list as optimisation
+	 */
+	__u64		     imp_last_transno_checked;
+	/**
+	 * Remote export handle. This is how remote side knows what export
+	 * we are talking to. Filled from response to connect request
+	 */
+	struct lustre_handle      imp_remote_handle;
+	/** When to perform next ping. time in jiffies. */
+	cfs_time_t		imp_next_ping;
+	/** When we last succesfully connected. time in 64bit jiffies */
+	__u64		     imp_last_success_conn;
+
+	/** List of all possible connection for import. */
+	struct list_head		imp_conn_list;
+	/**
+	 * Current connection. \a imp_connection is imp_conn_current->oic_conn
+	 */
+	struct obd_import_conn   *imp_conn_current;
+
+	/** Protects flags, level, generation, conn_cnt, *_list */
+	spinlock_t		  imp_lock;
+
+	/* flags */
+	unsigned long	     imp_no_timeout:1, /* timeouts are disabled */
+				  imp_invalid:1,    /* evicted */
+				  /* administratively disabled */
+				  imp_deactive:1,
+				  /* try to recover the import */
+				  imp_replayable:1,
+				  /* don't run recovery (timeout instead) */
+				  imp_dlm_fake:1,
+				  /* use 1/2 timeout on MDS' OSCs */
+				  imp_server_timeout:1,
+				  /* VBR: imp in delayed recovery */
+				  imp_delayed_recovery:1,
+				  /* VBR: if gap was found then no lock replays
+				   */
+				  imp_no_lock_replay:1,
+				  /* recovery by versions was failed */
+				  imp_vbr_failed:1,
+				  /* force an immidiate ping */
+				  imp_force_verify:1,
+				  /* force a scheduled ping */
+				  imp_force_next_verify:1,
+				  /* pingable */
+				  imp_pingable:1,
+				  /* resend for replay */
+				  imp_resend_replay:1,
+				  /* disable normal recovery, for test only. */
+				  imp_no_pinger_recover:1,
+				  /* need IR MNE swab */
+				  imp_need_mne_swab:1,
+				  /* import must be reconnected instead of
+				   * chouse new connection */
+				  imp_force_reconnect:1,
+				  /* import has tried to connect with server */
+				  imp_connect_tried:1;
+	__u32		     imp_connect_op;
+	struct obd_connect_data   imp_connect_data;
+	__u64		     imp_connect_flags_orig;
+	int		       imp_connect_error;
+
+	__u32		     imp_msg_magic;
+	__u32		     imp_msghdr_flags;       /* adjusted based on server capability */
+
+	struct ptlrpc_request_pool *imp_rq_pool;	  /* emergency request pool */
+
+	struct imp_at	     imp_at;		 /* adaptive timeout data */
+	time_t		    imp_last_reply_time;    /* for health check */
+};
+
+typedef void (*obd_import_callback)(struct obd_import *imp, void *closure,
+				    int event, void *event_arg, void *cb_data);
+
+/**
+ * Structure for import observer.
+ * It is possible to register "observer" on an import and every time
+ * something happens to an import (like connect/evict/disconnect)
+ * obderver will get its callback called with event type
+ */
+struct obd_import_observer {
+	struct list_head	   oio_chain;
+	obd_import_callback  oio_cb;
+	void		*oio_cb_data;
+};
+
+void class_observe_import(struct obd_import *imp, obd_import_callback cb,
+			  void *cb_data);
+void class_unobserve_import(struct obd_import *imp, obd_import_callback cb,
+			    void *cb_data);
+void class_notify_import_observers(struct obd_import *imp, int event,
+				   void *event_arg);
+
+/* import.c */
+static inline unsigned int at_est2timeout(unsigned int val)
+{
+	/* add an arbitrary minimum: 125% +5 sec */
+	return (val + (val >> 2) + 5);
+}
+
+static inline unsigned int at_timeout2est(unsigned int val)
+{
+	/* restore estimate value from timeout: e=4/5(t-5) */
+	LASSERT(val);
+	return (max((val << 2) / 5, 5U) - 4);
+}
+
+static inline void at_reset(struct adaptive_timeout *at, int val) {
+	at->at_current = val;
+	at->at_worst_ever = val;
+	at->at_worst_time = cfs_time_current_sec();
+}
+static inline void at_init(struct adaptive_timeout *at, int val, int flags) {
+	memset(at, 0, sizeof(*at));
+	spin_lock_init(&at->at_lock);
+	at->at_flags = flags;
+	at_reset(at, val);
+}
+extern unsigned int at_min;
+static inline int at_get(struct adaptive_timeout *at) {
+	return (at->at_current > at_min) ? at->at_current : at_min;
+}
+int at_measured(struct adaptive_timeout *at, unsigned int val);
+int import_at_get_index(struct obd_import *imp, int portal);
+extern unsigned int at_max;
+#define AT_OFF (at_max == 0)
+
+/* genops.c */
+struct obd_export;
+extern struct obd_import *class_exp2cliimp(struct obd_export *);
+extern struct obd_import *class_conn2cliimp(struct lustre_handle *);
+
+/** @} import */
+
+#endif /* __IMPORT_H */
+
+/** @} obd_import */
diff --git a/drivers/staging/lustre/lustre/include/lustre_lib.h b/drivers/staging/lustre/lustre/include/lustre_lib.h
new file mode 100644
index 000000000000..bdfc5391c6d2
--- /dev/null
+++ b/drivers/staging/lustre/lustre/include/lustre_lib.h
@@ -0,0 +1,667 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lustre/include/lustre_lib.h
+ *
+ * Basic Lustre library routines.
+ */
+
+#ifndef _LUSTRE_LIB_H
+#define _LUSTRE_LIB_H
+
+/** \defgroup lib lib
+ *
+ * @{
+ */
+
+#include <linux/libcfs/libcfs.h>
+#include <lustre/lustre_idl.h>
+#include <lustre_ver.h>
+#include <lustre_cfg.h>
+#include <linux/lustre_lib.h>
+
+/* target.c */
+struct ptlrpc_request;
+struct obd_export;
+struct lu_target;
+struct l_wait_info;
+#include <lustre_ha.h>
+#include <lustre_net.h>
+#include <lvfs.h>
+
+
+int target_pack_pool_reply(struct ptlrpc_request *req);
+int do_set_info_async(struct obd_import *imp,
+		      int opcode, int version,
+		      obd_count keylen, void *key,
+		      obd_count vallen, void *val,
+		      struct ptlrpc_request_set *set);
+
+#define OBD_RECOVERY_MAX_TIME (obd_timeout * 18) /* b13079 */
+#define OBD_MAX_IOCTL_BUFFER CONFIG_LUSTRE_OBD_MAX_IOCTL_BUFFER
+
+void target_send_reply(struct ptlrpc_request *req, int rc, int fail_id);
+
+/* client.c */
+
+int client_sanobd_setup(struct obd_device *obddev, struct lustre_cfg* lcfg);
+struct client_obd *client_conn2cli(struct lustre_handle *conn);
+
+struct md_open_data;
+struct obd_client_handle {
+	struct lustre_handle  och_fh;
+	struct lu_fid	 och_fid;
+	struct md_open_data  *och_mod;
+	__u32 och_magic;
+	int och_flags;
+};
+#define OBD_CLIENT_HANDLE_MAGIC 0xd15ea5ed
+
+/* statfs_pack.c */
+void statfs_pack(struct obd_statfs *osfs, struct kstatfs *sfs);
+void statfs_unpack(struct kstatfs *sfs, struct obd_statfs *osfs);
+
+/* l_lock.c */
+struct lustre_lock {
+	int			l_depth;
+	task_t		*l_owner;
+	struct semaphore	l_sem;
+	spinlock_t		l_spin;
+};
+
+void l_lock_init(struct lustre_lock *);
+void l_lock(struct lustre_lock *);
+void l_unlock(struct lustre_lock *);
+int l_has_lock(struct lustre_lock *);
+
+/*
+ * For md echo client
+ */
+enum md_echo_cmd {
+	ECHO_MD_CREATE       = 1, /* Open/Create file on MDT */
+	ECHO_MD_MKDIR	= 2, /* Mkdir on MDT */
+	ECHO_MD_DESTROY      = 3, /* Unlink file on MDT */
+	ECHO_MD_RMDIR	= 4, /* Rmdir on MDT */
+	ECHO_MD_LOOKUP       = 5, /* Lookup on MDT */
+	ECHO_MD_GETATTR      = 6, /* Getattr on MDT */
+	ECHO_MD_SETATTR      = 7, /* Setattr on MDT */
+	ECHO_MD_ALLOC_FID    = 8, /* Get FIDs from MDT */
+};
+
+/*
+ *   OBD IOCTLS
+ */
+#define OBD_IOCTL_VERSION 0x00010004
+
+struct obd_ioctl_data {
+	__u32 ioc_len;
+	__u32 ioc_version;
+
+	union {
+		__u64 ioc_cookie;
+		__u64 ioc_u64_1;
+	};
+	union {
+		__u32 ioc_conn1;
+		__u32 ioc_u32_1;
+	};
+	union {
+		__u32 ioc_conn2;
+		__u32 ioc_u32_2;
+	};
+
+	struct obdo ioc_obdo1;
+	struct obdo ioc_obdo2;
+
+	obd_size ioc_count;
+	obd_off  ioc_offset;
+	__u32    ioc_dev;
+	__u32    ioc_command;
+
+	__u64 ioc_nid;
+	__u32 ioc_nal;
+	__u32 ioc_type;
+
+	/* buffers the kernel will treat as user pointers */
+	__u32  ioc_plen1;
+	char  *ioc_pbuf1;
+	__u32  ioc_plen2;
+	char  *ioc_pbuf2;
+
+	/* inline buffers for various arguments */
+	__u32  ioc_inllen1;
+	char  *ioc_inlbuf1;
+	__u32  ioc_inllen2;
+	char  *ioc_inlbuf2;
+	__u32  ioc_inllen3;
+	char  *ioc_inlbuf3;
+	__u32  ioc_inllen4;
+	char  *ioc_inlbuf4;
+
+	char    ioc_bulk[0];
+};
+
+struct obd_ioctl_hdr {
+	__u32 ioc_len;
+	__u32 ioc_version;
+};
+
+static inline int obd_ioctl_packlen(struct obd_ioctl_data *data)
+{
+	int len = cfs_size_round(sizeof(struct obd_ioctl_data));
+	len += cfs_size_round(data->ioc_inllen1);
+	len += cfs_size_round(data->ioc_inllen2);
+	len += cfs_size_round(data->ioc_inllen3);
+	len += cfs_size_round(data->ioc_inllen4);
+	return len;
+}
+
+
+static inline int obd_ioctl_is_invalid(struct obd_ioctl_data *data)
+{
+	if (data->ioc_len > (1<<30)) {
+		CERROR("OBD ioctl: ioc_len larger than 1<<30\n");
+		return 1;
+	}
+	if (data->ioc_inllen1 > (1<<30)) {
+		CERROR("OBD ioctl: ioc_inllen1 larger than 1<<30\n");
+		return 1;
+	}
+	if (data->ioc_inllen2 > (1<<30)) {
+		CERROR("OBD ioctl: ioc_inllen2 larger than 1<<30\n");
+		return 1;
+	}
+	if (data->ioc_inllen3 > (1<<30)) {
+		CERROR("OBD ioctl: ioc_inllen3 larger than 1<<30\n");
+		return 1;
+	}
+	if (data->ioc_inllen4 > (1<<30)) {
+		CERROR("OBD ioctl: ioc_inllen4 larger than 1<<30\n");
+		return 1;
+	}
+	if (data->ioc_inlbuf1 && !data->ioc_inllen1) {
+		CERROR("OBD ioctl: inlbuf1 pointer but 0 length\n");
+		return 1;
+	}
+	if (data->ioc_inlbuf2 && !data->ioc_inllen2) {
+		CERROR("OBD ioctl: inlbuf2 pointer but 0 length\n");
+		return 1;
+	}
+	if (data->ioc_inlbuf3 && !data->ioc_inllen3) {
+		CERROR("OBD ioctl: inlbuf3 pointer but 0 length\n");
+		return 1;
+	}
+	if (data->ioc_inlbuf4 && !data->ioc_inllen4) {
+		CERROR("OBD ioctl: inlbuf4 pointer but 0 length\n");
+		return 1;
+	}
+	if (data->ioc_pbuf1 && !data->ioc_plen1) {
+		CERROR("OBD ioctl: pbuf1 pointer but 0 length\n");
+		return 1;
+	}
+	if (data->ioc_pbuf2 && !data->ioc_plen2) {
+		CERROR("OBD ioctl: pbuf2 pointer but 0 length\n");
+		return 1;
+	}
+	if (data->ioc_plen1 && !data->ioc_pbuf1) {
+		CERROR("OBD ioctl: plen1 set but NULL pointer\n");
+		return 1;
+	}
+	if (data->ioc_plen2 && !data->ioc_pbuf2) {
+		CERROR("OBD ioctl: plen2 set but NULL pointer\n");
+		return 1;
+	}
+	if (obd_ioctl_packlen(data) > data->ioc_len) {
+		CERROR("OBD ioctl: packlen exceeds ioc_len (%d > %d)\n",
+		       obd_ioctl_packlen(data), data->ioc_len);
+		return 1;
+	}
+	return 0;
+}
+
+
+#include <obd_support.h>
+
+/* function defined in lustre/obdclass/<platform>/<platform>-module.c */
+int obd_ioctl_getdata(char **buf, int *len, void *arg);
+int obd_ioctl_popdata(void *arg, void *data, int len);
+
+static inline void obd_ioctl_freedata(char *buf, int len)
+{
+	ENTRY;
+
+	OBD_FREE_LARGE(buf, len);
+	EXIT;
+	return;
+}
+
+/*
+ * BSD ioctl description:
+ * #define IOC_V1       _IOR(g, n1, long)
+ * #define IOC_V2       _IOW(g, n2, long)
+ *
+ * ioctl(f, IOC_V1, arg);
+ * arg will be treated as a long value,
+ *
+ * ioctl(f, IOC_V2, arg)
+ * arg will be treated as a pointer, bsd will call
+ * copyin(buf, arg, sizeof(long))
+ *
+ * To make BSD ioctl handles argument correctly and simplely,
+ * we change _IOR to _IOWR so BSD will copyin obd_ioctl_data
+ * for us. Does this change affect Linux?  (XXX Liang)
+ */
+#define OBD_IOC_CREATE		 _IOWR('f', 101, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_DESTROY		_IOW ('f', 104, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_PREALLOCATE	    _IOWR('f', 105, OBD_IOC_DATA_TYPE)
+
+#define OBD_IOC_SETATTR		_IOW ('f', 107, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_GETATTR		_IOWR ('f', 108, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_READ		   _IOWR('f', 109, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_WRITE		  _IOWR('f', 110, OBD_IOC_DATA_TYPE)
+
+
+#define OBD_IOC_STATFS		 _IOWR('f', 113, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_SYNC		   _IOW ('f', 114, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_READ2		  _IOWR('f', 115, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_FORMAT		 _IOWR('f', 116, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_PARTITION	      _IOWR('f', 117, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_COPY		   _IOWR('f', 120, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_MIGR		   _IOWR('f', 121, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_PUNCH		  _IOWR('f', 122, OBD_IOC_DATA_TYPE)
+
+#define OBD_IOC_MODULE_DEBUG	   _IOWR('f', 124, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_BRW_READ	       _IOWR('f', 125, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_BRW_WRITE	      _IOWR('f', 126, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_NAME2DEV	       _IOWR('f', 127, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_UUID2DEV	       _IOWR('f', 130, OBD_IOC_DATA_TYPE)
+
+#define OBD_IOC_GETNAME		_IOWR('f', 131, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_GETMDNAME	      _IOR('f', 131, char[MAX_OBD_NAME])
+#define OBD_IOC_GETDTNAME	       OBD_IOC_GETNAME
+
+#define OBD_IOC_LOV_GET_CONFIG	 _IOWR('f', 132, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_CLIENT_RECOVER	 _IOW ('f', 133, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_PING_TARGET	    _IOW ('f', 136, OBD_IOC_DATA_TYPE)
+
+#define OBD_IOC_DEC_FS_USE_COUNT       _IO  ('f', 139      )
+#define OBD_IOC_NO_TRANSNO	     _IOW ('f', 140, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_SET_READONLY	   _IOW ('f', 141, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_ABORT_RECOVERY	 _IOR ('f', 142, OBD_IOC_DATA_TYPE)
+
+#define OBD_IOC_ROOT_SQUASH	    _IOWR('f', 143, OBD_IOC_DATA_TYPE)
+
+#define OBD_GET_VERSION		_IOWR ('f', 144, OBD_IOC_DATA_TYPE)
+
+#define OBD_IOC_GSS_SUPPORT	    _IOWR('f', 145, OBD_IOC_DATA_TYPE)
+
+#define OBD_IOC_CLOSE_UUID	     _IOWR ('f', 147, OBD_IOC_DATA_TYPE)
+
+#define OBD_IOC_CHANGELOG_SEND	 _IOW ('f', 148, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_GETDEVICE	      _IOWR ('f', 149, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_FID2PATH	       _IOWR ('f', 150, OBD_IOC_DATA_TYPE)
+/* see also <lustre/lustre_user.h> for ioctls 151-153 */
+/* OBD_IOC_LOV_SETSTRIPE: See also LL_IOC_LOV_SETSTRIPE */
+#define OBD_IOC_LOV_SETSTRIPE	  _IOW ('f', 154, OBD_IOC_DATA_TYPE)
+/* OBD_IOC_LOV_GETSTRIPE: See also LL_IOC_LOV_GETSTRIPE */
+#define OBD_IOC_LOV_GETSTRIPE	  _IOW ('f', 155, OBD_IOC_DATA_TYPE)
+/* OBD_IOC_LOV_SETEA: See also LL_IOC_LOV_SETEA */
+#define OBD_IOC_LOV_SETEA	      _IOW ('f', 156, OBD_IOC_DATA_TYPE)
+/* see <lustre/lustre_user.h> for ioctls 157-159 */
+/* OBD_IOC_QUOTACHECK: See also LL_IOC_QUOTACHECK */
+#define OBD_IOC_QUOTACHECK	     _IOW ('f', 160, int)
+/* OBD_IOC_POLL_QUOTACHECK: See also LL_IOC_POLL_QUOTACHECK */
+#define OBD_IOC_POLL_QUOTACHECK	_IOR ('f', 161, struct if_quotacheck *)
+/* OBD_IOC_QUOTACTL: See also LL_IOC_QUOTACTL */
+#define OBD_IOC_QUOTACTL	       _IOWR('f', 162, struct if_quotactl)
+/* see  also <lustre/lustre_user.h> for ioctls 163-176 */
+#define OBD_IOC_CHANGELOG_REG	  _IOW ('f', 177, struct obd_ioctl_data)
+#define OBD_IOC_CHANGELOG_DEREG	_IOW ('f', 178, struct obd_ioctl_data)
+#define OBD_IOC_CHANGELOG_CLEAR	_IOW ('f', 179, struct obd_ioctl_data)
+#define OBD_IOC_RECORD		 _IOWR('f', 180, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_ENDRECORD	      _IOWR('f', 181, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_PARSE		  _IOWR('f', 182, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_DORECORD	       _IOWR('f', 183, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_PROCESS_CFG	    _IOWR('f', 184, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_DUMP_LOG	       _IOWR('f', 185, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_CLEAR_LOG	      _IOWR('f', 186, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_PARAM		  _IOW ('f', 187, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_POOL		   _IOWR('f', 188, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_REPLACE_NIDS	   _IOWR('f', 189, OBD_IOC_DATA_TYPE)
+
+#define OBD_IOC_CATLOGLIST	     _IOWR('f', 190, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_LLOG_INFO	      _IOWR('f', 191, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_LLOG_PRINT	     _IOWR('f', 192, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_LLOG_CANCEL	    _IOWR('f', 193, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_LLOG_REMOVE	    _IOWR('f', 194, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_LLOG_CHECK	     _IOWR('f', 195, OBD_IOC_DATA_TYPE)
+/* OBD_IOC_LLOG_CATINFO is deprecated */
+#define OBD_IOC_LLOG_CATINFO	   _IOWR('f', 196, OBD_IOC_DATA_TYPE)
+
+#define ECHO_IOC_GET_STRIPE	    _IOWR('f', 200, OBD_IOC_DATA_TYPE)
+#define ECHO_IOC_SET_STRIPE	    _IOWR('f', 201, OBD_IOC_DATA_TYPE)
+#define ECHO_IOC_ENQUEUE	       _IOWR('f', 202, OBD_IOC_DATA_TYPE)
+#define ECHO_IOC_CANCEL		_IOWR('f', 203, OBD_IOC_DATA_TYPE)
+
+#define OBD_IOC_GET_OBJ_VERSION	_IOR('f', 210, OBD_IOC_DATA_TYPE)
+
+/* <lustre/lustre_user.h> defines ioctl number 218-219 */
+#define OBD_IOC_GET_MNTOPT	     _IOW('f', 220, mntopt_t)
+
+#define OBD_IOC_ECHO_MD		_IOR('f', 221, struct obd_ioctl_data)
+#define OBD_IOC_ECHO_ALLOC_SEQ	 _IOWR('f', 222, struct obd_ioctl_data)
+
+#define OBD_IOC_START_LFSCK	       _IOWR('f', 230, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_STOP_LFSCK	       _IOW('f', 231, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_PAUSE_LFSCK	       _IOW('f', 232, OBD_IOC_DATA_TYPE)
+
+/* XXX _IOWR('f', 250, long) has been defined in
+ * libcfs/include/libcfs/libcfs_private.h for debug, don't use it
+ */
+
+/* Until such time as we get_info the per-stripe maximum from the OST,
+ * we define this to be 2T - 4k, which is the ext3 maxbytes. */
+#define LUSTRE_STRIPE_MAXBYTES 0x1fffffff000ULL
+
+/* Special values for remove LOV EA from disk */
+#define LOVEA_DELETE_VALUES(size, count, offset) (size == 0 && count == 0 && \
+						 offset == (typeof(offset))(-1))
+
+/* #define POISON_BULK 0 */
+
+/*
+ * l_wait_event is a flexible sleeping function, permitting simple caller
+ * configuration of interrupt and timeout sensitivity along with actions to
+ * be performed in the event of either exception.
+ *
+ * The first form of usage looks like this:
+ *
+ * struct l_wait_info lwi = LWI_TIMEOUT_INTR(timeout, timeout_handler,
+ *					   intr_handler, callback_data);
+ * rc = l_wait_event(waitq, condition, &lwi);
+ *
+ * l_wait_event() makes the current process wait on 'waitq' until 'condition'
+ * is TRUE or a "killable" signal (SIGTERM, SIKGILL, SIGINT) is pending.  It
+ * returns 0 to signify 'condition' is TRUE, but if a signal wakes it before
+ * 'condition' becomes true, it optionally calls the specified 'intr_handler'
+ * if not NULL, and returns -EINTR.
+ *
+ * If a non-zero timeout is specified, signals are ignored until the timeout
+ * has expired.  At this time, if 'timeout_handler' is not NULL it is called.
+ * If it returns FALSE l_wait_event() continues to wait as described above with
+ * signals enabled.  Otherwise it returns -ETIMEDOUT.
+ *
+ * LWI_INTR(intr_handler, callback_data) is shorthand for
+ * LWI_TIMEOUT_INTR(0, NULL, intr_handler, callback_data)
+ *
+ * The second form of usage looks like this:
+ *
+ * struct l_wait_info lwi = LWI_TIMEOUT(timeout, timeout_handler);
+ * rc = l_wait_event(waitq, condition, &lwi);
+ *
+ * This form is the same as the first except that it COMPLETELY IGNORES
+ * SIGNALS.  The caller must therefore beware that if 'timeout' is zero, or if
+ * 'timeout_handler' is not NULL and returns FALSE, then the ONLY thing that
+ * can unblock the current process is 'condition' becoming TRUE.
+ *
+ * Another form of usage is:
+ * struct l_wait_info lwi = LWI_TIMEOUT_INTERVAL(timeout, interval,
+ *					       timeout_handler);
+ * rc = l_wait_event(waitq, condition, &lwi);
+ * This is the same as previous case, but condition is checked once every
+ * 'interval' jiffies (if non-zero).
+ *
+ * Subtle synchronization point: this macro does *not* necessary takes
+ * wait-queue spin-lock before returning, and, hence, following idiom is safe
+ * ONLY when caller provides some external locking:
+ *
+ *	     Thread1			    Thread2
+ *
+ *   l_wait_event(&obj->wq, ....);				       (1)
+ *
+ *				    wake_up(&obj->wq):		 (2)
+ *					 spin_lock(&q->lock);	  (2.1)
+ *					 __wake_up_common(q, ...);     (2.2)
+ *					 spin_unlock(&q->lock, flags); (2.3)
+ *
+ *   OBD_FREE_PTR(obj);						  (3)
+ *
+ * As l_wait_event() may "short-cut" execution and return without taking
+ * wait-queue spin-lock, some additional synchronization is necessary to
+ * guarantee that step (3) can begin only after (2.3) finishes.
+ *
+ * XXX nikita: some ptlrpc daemon threads have races of that sort.
+ *
+ */
+static inline int back_to_sleep(void *arg)
+{
+	return 0;
+}
+
+#define LWI_ON_SIGNAL_NOOP ((void (*)(void *))(-1))
+
+struct l_wait_info {
+	cfs_duration_t lwi_timeout;
+	cfs_duration_t lwi_interval;
+	int	    lwi_allow_intr;
+	int  (*lwi_on_timeout)(void *);
+	void (*lwi_on_signal)(void *);
+	void  *lwi_cb_data;
+};
+
+/* NB: LWI_TIMEOUT ignores signals completely */
+#define LWI_TIMEOUT(time, cb, data)	     \
+((struct l_wait_info) {			 \
+	.lwi_timeout    = time,		 \
+	.lwi_on_timeout = cb,		   \
+	.lwi_cb_data    = data,		 \
+	.lwi_interval   = 0,		    \
+	.lwi_allow_intr = 0		     \
+})
+
+#define LWI_TIMEOUT_INTERVAL(time, interval, cb, data)  \
+((struct l_wait_info) {				 \
+	.lwi_timeout    = time,			 \
+	.lwi_on_timeout = cb,			   \
+	.lwi_cb_data    = data,			 \
+	.lwi_interval   = interval,		     \
+	.lwi_allow_intr = 0			     \
+})
+
+#define LWI_TIMEOUT_INTR(time, time_cb, sig_cb, data)   \
+((struct l_wait_info) {				 \
+	.lwi_timeout    = time,			 \
+	.lwi_on_timeout = time_cb,		      \
+	.lwi_on_signal  = sig_cb,		       \
+	.lwi_cb_data    = data,			 \
+	.lwi_interval   = 0,			    \
+	.lwi_allow_intr = 0			     \
+})
+
+#define LWI_TIMEOUT_INTR_ALL(time, time_cb, sig_cb, data)       \
+((struct l_wait_info) {					 \
+	.lwi_timeout    = time,				 \
+	.lwi_on_timeout = time_cb,			      \
+	.lwi_on_signal  = sig_cb,			       \
+	.lwi_cb_data    = data,				 \
+	.lwi_interval   = 0,				    \
+	.lwi_allow_intr = 1				     \
+})
+
+#define LWI_INTR(cb, data)  LWI_TIMEOUT_INTR(0, NULL, cb, data)
+
+
+/*
+ * wait for @condition to become true, but no longer than timeout, specified
+ * by @info.
+ */
+#define __l_wait_event(wq, condition, info, ret, l_add_wait)		   \
+do {									   \
+	wait_queue_t __wait;						 \
+	cfs_duration_t __timeout = info->lwi_timeout;			  \
+	sigset_t   __blocked;					      \
+	int   __allow_intr = info->lwi_allow_intr;			     \
+									       \
+	ret = 0;							       \
+	if (condition)							 \
+		break;							 \
+									       \
+	init_waitqueue_entry_current(&__wait);					    \
+	l_add_wait(&wq, &__wait);					      \
+									       \
+	/* Block all signals (just the non-fatal ones if no timeout). */       \
+	if (info->lwi_on_signal != NULL && (__timeout == 0 || __allow_intr))   \
+		__blocked = cfs_block_sigsinv(LUSTRE_FATAL_SIGS);	      \
+	else								   \
+		__blocked = cfs_block_sigsinv(0);			      \
+									       \
+	for (;;) {							     \
+		unsigned       __wstate;				       \
+									       \
+		__wstate = info->lwi_on_signal != NULL &&		      \
+			   (__timeout == 0 || __allow_intr) ?		  \
+			TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE;	       \
+									       \
+		set_current_state(TASK_INTERRUPTIBLE);		 \
+									       \
+		if (condition)						 \
+			break;						 \
+									       \
+		if (__timeout == 0) {					  \
+			waitq_wait(&__wait, __wstate);		     \
+		} else {						       \
+			cfs_duration_t interval = info->lwi_interval?	  \
+					     min_t(cfs_duration_t,	     \
+						 info->lwi_interval,__timeout):\
+					     __timeout;			\
+			cfs_duration_t remaining = waitq_timedwait(&__wait,\
+						   __wstate,		   \
+						   interval);		  \
+			__timeout = cfs_time_sub(__timeout,		    \
+					    cfs_time_sub(interval, remaining));\
+			if (__timeout == 0) {				  \
+				if (info->lwi_on_timeout == NULL ||	    \
+				    info->lwi_on_timeout(info->lwi_cb_data)) { \
+					ret = -ETIMEDOUT;		      \
+					break;				 \
+				}					      \
+				/* Take signals after the timeout expires. */  \
+				if (info->lwi_on_signal != NULL)	       \
+				    (void)cfs_block_sigsinv(LUSTRE_FATAL_SIGS);\
+			}						      \
+		}							      \
+									       \
+		if (condition)						 \
+			break;						 \
+		if (cfs_signal_pending()) {				    \
+			if (info->lwi_on_signal != NULL &&		     \
+			    (__timeout == 0 || __allow_intr)) {		\
+				if (info->lwi_on_signal != LWI_ON_SIGNAL_NOOP) \
+					info->lwi_on_signal(info->lwi_cb_data);\
+				ret = -EINTR;				  \
+				break;					 \
+			}						      \
+			/* We have to do this here because some signals */     \
+			/* are not blockable - ie from strace(1).       */     \
+			/* In these cases we want to schedule_timeout() */     \
+			/* again, because we don't want that to return  */     \
+			/* -EINTR when the RPC actually succeeded.      */     \
+			/* the recalc_sigpending() below will deliver the */     \
+			/* signal properly.			     */     \
+			cfs_clear_sigpending();				\
+		}							      \
+	}								      \
+									       \
+	cfs_restore_sigs(__blocked);					   \
+									       \
+	set_current_state(TASK_RUNNING);			       \
+	remove_wait_queue(&wq, &__wait);					   \
+} while (0)
+
+
+
+#define l_wait_event(wq, condition, info)		       \
+({							      \
+	int		 __ret;			      \
+	struct l_wait_info *__info = (info);		    \
+								\
+	__l_wait_event(wq, condition, __info,		   \
+		       __ret, add_wait_queue);		   \
+	__ret;						  \
+})
+
+#define l_wait_event_exclusive(wq, condition, info)	     \
+({							      \
+	int		 __ret;			      \
+	struct l_wait_info *__info = (info);		    \
+								\
+	__l_wait_event(wq, condition, __info,		   \
+		       __ret, add_wait_queue_exclusive);	 \
+	__ret;						  \
+})
+
+#define l_wait_event_exclusive_head(wq, condition, info)	\
+({							      \
+	int		 __ret;			      \
+	struct l_wait_info *__info = (info);		    \
+								\
+	__l_wait_event(wq, condition, __info,		   \
+		       __ret, add_wait_queue_exclusive_head);    \
+	__ret;						  \
+})
+
+#define l_wait_condition(wq, condition)			 \
+({							      \
+	struct l_wait_info lwi = { 0 };			 \
+	l_wait_event(wq, condition, &lwi);		      \
+})
+
+#define l_wait_condition_exclusive(wq, condition)	       \
+({							      \
+	struct l_wait_info lwi = { 0 };			 \
+	l_wait_event_exclusive(wq, condition, &lwi);	    \
+})
+
+#define l_wait_condition_exclusive_head(wq, condition)	  \
+({							      \
+	struct l_wait_info lwi = { 0 };			 \
+	l_wait_event_exclusive_head(wq, condition, &lwi);       \
+})
+
+#define LIBLUSTRE_CLIENT (0)
+
+/** @} lib */
+
+#endif /* _LUSTRE_LIB_H */
diff --git a/drivers/staging/lustre/lustre/include/lustre_linkea.h b/drivers/staging/lustre/lustre/include/lustre_linkea.h
new file mode 100644
index 000000000000..5790be913bf6
--- /dev/null
+++ b/drivers/staging/lustre/lustre/include/lustre_linkea.h
@@ -0,0 +1,57 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2013, Intel Corporation.
+ * Use is subject to license terms.
+ *
+ * Author: di wang <di.wang@intel.com>
+ */
+
+struct linkea_data {
+	/**
+	 * Buffer to keep link EA body.
+	 */
+	struct lu_buf		*ld_buf;
+	/**
+	 * The matched header, entry and its lenght in the EA
+	 */
+	struct link_ea_header	*ld_leh;
+	struct link_ea_entry	*ld_lee;
+	int			ld_reclen;
+};
+
+int linkea_data_new(struct linkea_data *ldata, struct lu_buf *buf);
+int linkea_init(struct linkea_data *ldata);
+void linkea_entry_unpack(const struct link_ea_entry *lee, int *reclen,
+			 struct lu_name *lname, struct lu_fid *pfid);
+int linkea_add_buf(struct linkea_data *ldata, const struct lu_name *lname,
+		   const struct lu_fid *pfid);
+void linkea_del_buf(struct linkea_data *ldata, const struct lu_name *lname);
+int linkea_links_find(struct linkea_data *ldata, const struct lu_name *lname,
+		      const struct lu_fid  *pfid);
+
+#define LINKEA_NEXT_ENTRY(ldata)	\
+	(struct link_ea_entry *)((char *)ldata.ld_lee + ldata.ld_reclen)
+
+#define LINKEA_FIRST_ENTRY(ldata)	\
+	(struct link_ea_entry *)(ldata.ld_leh + 1)
diff --git a/drivers/staging/lustre/lustre/include/lustre_lite.h b/drivers/staging/lustre/lustre/include/lustre_lite.h
new file mode 100644
index 000000000000..25f8bfaccef3
--- /dev/null
+++ b/drivers/staging/lustre/lustre/include/lustre_lite.h
@@ -0,0 +1,147 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ */
+
+#ifndef _LL_H
+#define _LL_H
+
+/** \defgroup lite lite
+ *
+ * @{
+ */
+
+#include <linux/lustre_lite.h>
+
+#include <obd_class.h>
+#include <obd_ost.h>
+#include <lustre_net.h>
+#include <lustre_mds.h>
+#include <lustre_ha.h>
+
+/* 4UL * 1024 * 1024 */
+#define LL_MAX_BLKSIZE_BITS     (22)
+#define LL_MAX_BLKSIZE	  (1UL<<LL_MAX_BLKSIZE_BITS)
+
+#include <lustre/lustre_user.h>
+
+
+struct lustre_rw_params {
+	int		lrp_lock_mode;
+	ldlm_policy_data_t lrp_policy;
+	obd_flag	   lrp_brw_flags;
+	int		lrp_ast_flags;
+};
+
+/*
+ * XXX nikita: this function lives in the header because it is used by both
+ * llite kernel module and liblustre library, and there is no (?) better place
+ * to put it in.
+ */
+static inline void lustre_build_lock_params(int cmd, unsigned long open_flags,
+					    __u64 connect_flags,
+					    loff_t pos, ssize_t len,
+					    struct lustre_rw_params *params)
+{
+	params->lrp_lock_mode = (cmd == OBD_BRW_READ) ? LCK_PR : LCK_PW;
+	params->lrp_brw_flags = 0;
+
+	params->lrp_policy.l_extent.start = pos;
+	params->lrp_policy.l_extent.end = pos + len - 1;
+	/*
+	 * for now O_APPEND always takes local locks.
+	 */
+	if (cmd == OBD_BRW_WRITE && (open_flags & O_APPEND)) {
+		params->lrp_policy.l_extent.start = 0;
+		params->lrp_policy.l_extent.end   = OBD_OBJECT_EOF;
+	} else if (LIBLUSTRE_CLIENT && (connect_flags & OBD_CONNECT_SRVLOCK)) {
+		/*
+		 * liblustre: OST-side locking for all non-O_APPEND
+		 * reads/writes.
+		 */
+		params->lrp_lock_mode = LCK_NL;
+		params->lrp_brw_flags = OBD_BRW_SRVLOCK;
+	} else {
+		/*
+		 * nothing special for the kernel. In the future llite may use
+		 * OST-side locks for small writes into highly contended
+		 * files.
+		 */
+	}
+	params->lrp_ast_flags = (open_flags & O_NONBLOCK) ?
+		LDLM_FL_BLOCK_NOWAIT : 0;
+}
+
+/*
+ * This is embedded into liblustre and llite super-blocks to keep track of
+ * connect flags (capabilities) supported by all imports given mount is
+ * connected to.
+ */
+struct lustre_client_ocd {
+	/*
+	 * This is conjunction of connect_flags across all imports (LOVs) this
+	 * mount is connected to. This field is updated by cl_ocd_update()
+	 * under ->lco_lock.
+	 */
+	__u64	      lco_flags;
+	struct mutex	   lco_lock;
+	struct obd_export *lco_md_exp;
+	struct obd_export *lco_dt_exp;
+};
+
+/*
+ * Chain of hash overflow pages.
+ */
+struct ll_dir_chain {
+	/* XXX something. Later */
+};
+
+static inline void ll_dir_chain_init(struct ll_dir_chain *chain)
+{
+}
+
+static inline void ll_dir_chain_fini(struct ll_dir_chain *chain)
+{
+}
+
+static inline unsigned long hash_x_index(__u64 hash, int hash64)
+{
+	if (BITS_PER_LONG == 32 && hash64)
+		hash >>= 32;
+	return ~0UL - hash;
+}
+
+/** @} lite */
+
+#endif
diff --git a/drivers/staging/lustre/lustre/include/lustre_log.h b/drivers/staging/lustre/lustre/include/lustre_log.h
new file mode 100644
index 000000000000..714ab378e431
--- /dev/null
+++ b/drivers/staging/lustre/lustre/include/lustre_log.h
@@ -0,0 +1,576 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lustre/include/lustre_log.h
+ *
+ * Generic infrastructure for managing a collection of logs.
+ * These logs are used for:
+ *
+ * - orphan recovery: OST adds record on create
+ * - mtime/size consistency: the OST adds a record on first write
+ * - open/unlinked objects: OST adds a record on destroy
+ *
+ * - mds unlink log: the MDS adds an entry upon delete
+ *
+ * - raid1 replication log between OST's
+ * - MDS replication logs
+ */
+
+#ifndef _LUSTRE_LOG_H
+#define _LUSTRE_LOG_H
+
+/** \defgroup log log
+ *
+ * @{
+ */
+
+#include <linux/lustre_log.h>
+
+#include <obd_class.h>
+#include <obd_ost.h>
+#include <lustre/lustre_idl.h>
+#include <dt_object.h>
+
+#define LOG_NAME_LIMIT(logname, name)		   \
+	snprintf(logname, sizeof(logname), "LOGS/%s", name)
+#define LLOG_EEMPTY 4711
+
+enum llog_open_param {
+	LLOG_OPEN_EXISTS	= 0x0000,
+	LLOG_OPEN_NEW		= 0x0001,
+};
+
+struct plain_handle_data {
+	struct list_head	  phd_entry;
+	struct llog_handle *phd_cat_handle;
+	struct llog_cookie  phd_cookie; /* cookie of this log in its cat */
+};
+
+struct cat_handle_data {
+	struct list_head	      chd_head;
+	struct llog_handle     *chd_current_log; /* currently open log */
+	struct llog_handle	*chd_next_log; /* llog to be used next */
+};
+
+static inline void logid_to_fid(struct llog_logid *id, struct lu_fid *fid)
+{
+	/* For compatibility purposes we identify pre-OSD (~< 2.3.51 MDS)
+	 * logid's by non-zero ogen (inode generation) and convert them
+	 * into IGIF */
+	if (id->lgl_ogen == 0) {
+		fid->f_seq = id->lgl_oi.oi.oi_seq;
+		fid->f_oid = id->lgl_oi.oi.oi_id;
+		fid->f_ver = 0;
+	} else {
+		lu_igif_build(fid, id->lgl_oi.oi.oi_id, id->lgl_ogen);
+	}
+}
+
+static inline void fid_to_logid(struct lu_fid *fid, struct llog_logid *id)
+{
+	id->lgl_oi.oi.oi_seq = fid->f_seq;
+	id->lgl_oi.oi.oi_id = fid->f_oid;
+	id->lgl_ogen = 0;
+}
+
+static inline void logid_set_id(struct llog_logid *log_id, __u64 id)
+{
+	log_id->lgl_oi.oi.oi_id = id;
+}
+
+static inline __u64 logid_id(struct llog_logid *log_id)
+{
+	return log_id->lgl_oi.oi.oi_id;
+}
+
+struct llog_handle;
+
+/* llog.c  -  general API */
+int llog_init_handle(const struct lu_env *env, struct llog_handle *handle,
+		     int flags, struct obd_uuid *uuid);
+int llog_copy_handler(const struct lu_env *env, struct llog_handle *llh,
+		      struct llog_rec_hdr *rec, void *data);
+int llog_process(const struct lu_env *env, struct llog_handle *loghandle,
+		 llog_cb_t cb, void *data, void *catdata);
+int llog_process_or_fork(const struct lu_env *env,
+			 struct llog_handle *loghandle,
+			 llog_cb_t cb, void *data, void *catdata, bool fork);
+int llog_reverse_process(const struct lu_env *env,
+			 struct llog_handle *loghandle, llog_cb_t cb,
+			 void *data, void *catdata);
+int llog_cancel_rec(const struct lu_env *env, struct llog_handle *loghandle,
+		    int index);
+int llog_open(const struct lu_env *env, struct llog_ctxt *ctxt,
+	      struct llog_handle **lgh, struct llog_logid *logid,
+	      char *name, enum llog_open_param open_param);
+int llog_close(const struct lu_env *env, struct llog_handle *cathandle);
+int llog_get_size(struct llog_handle *loghandle);
+
+/* llog_process flags */
+#define LLOG_FLAG_NODEAMON 0x0001
+
+/* llog_cat.c - catalog api */
+struct llog_process_data {
+	/**
+	 * Any useful data needed while processing catalog. This is
+	 * passed later to process callback.
+	 */
+	void		*lpd_data;
+	/**
+	 * Catalog process callback function, called for each record
+	 * in catalog.
+	 */
+	llog_cb_t	    lpd_cb;
+	/**
+	 * Start processing the catalog from startcat/startidx
+	 */
+	int		  lpd_startcat;
+	int		  lpd_startidx;
+};
+
+struct llog_process_cat_data {
+	/**
+	 * Temporary stored first_idx while scanning log.
+	 */
+	int		  lpcd_first_idx;
+	/**
+	 * Temporary stored last_idx while scanning log.
+	 */
+	int		  lpcd_last_idx;
+};
+
+int llog_cat_close(const struct lu_env *env, struct llog_handle *cathandle);
+int llog_cat_add_rec(const struct lu_env *env, struct llog_handle *cathandle,
+		     struct llog_rec_hdr *rec, struct llog_cookie *reccookie,
+		     void *buf, struct thandle *th);
+int llog_cat_declare_add_rec(const struct lu_env *env,
+			     struct llog_handle *cathandle,
+			     struct llog_rec_hdr *rec, struct thandle *th);
+int llog_cat_add(const struct lu_env *env, struct llog_handle *cathandle,
+		 struct llog_rec_hdr *rec, struct llog_cookie *reccookie,
+		 void *buf);
+int llog_cat_cancel_records(const struct lu_env *env,
+			    struct llog_handle *cathandle, int count,
+			    struct llog_cookie *cookies);
+int llog_cat_process_or_fork(const struct lu_env *env,
+			     struct llog_handle *cat_llh, llog_cb_t cb,
+			     void *data, int startcat, int startidx, bool fork);
+int llog_cat_process(const struct lu_env *env, struct llog_handle *cat_llh,
+		     llog_cb_t cb, void *data, int startcat, int startidx);
+int llog_cat_reverse_process(const struct lu_env *env,
+			     struct llog_handle *cat_llh, llog_cb_t cb,
+			     void *data);
+int llog_cat_init_and_process(const struct lu_env *env,
+			      struct llog_handle *llh);
+
+/* llog_obd.c */
+int llog_setup(const struct lu_env *env, struct obd_device *obd,
+	       struct obd_llog_group *olg, int index,
+	       struct obd_device *disk_obd, struct llog_operations *op);
+int __llog_ctxt_put(const struct lu_env *env, struct llog_ctxt *ctxt);
+int llog_cleanup(const struct lu_env *env, struct llog_ctxt *);
+int llog_sync(struct llog_ctxt *ctxt, struct obd_export *exp, int flags);
+int llog_obd_add(const struct lu_env *env, struct llog_ctxt *ctxt,
+		 struct llog_rec_hdr *rec, struct lov_stripe_md *lsm,
+		 struct llog_cookie *logcookies, int numcookies);
+int llog_cancel(const struct lu_env *env, struct llog_ctxt *ctxt,
+		struct lov_stripe_md *lsm, int count,
+		struct llog_cookie *cookies, int flags);
+
+int obd_llog_init(struct obd_device *obd, struct obd_llog_group *olg,
+		  struct obd_device *disk_obd, int *idx);
+
+int obd_llog_finish(struct obd_device *obd, int count);
+
+/* llog_ioctl.c */
+int llog_ioctl(const struct lu_env *env, struct llog_ctxt *ctxt, int cmd,
+	       struct obd_ioctl_data *data);
+
+/* llog_net.c */
+int llog_initiator_connect(struct llog_ctxt *ctxt);
+
+struct llog_operations {
+	int (*lop_destroy)(const struct lu_env *env,
+			   struct llog_handle *handle);
+	int (*lop_next_block)(const struct lu_env *env, struct llog_handle *h,
+			      int *curr_idx, int next_idx, __u64 *offset,
+			      void *buf, int len);
+	int (*lop_prev_block)(const struct lu_env *env, struct llog_handle *h,
+			      int prev_idx, void *buf, int len);
+	int (*lop_read_header)(const struct lu_env *env,
+			       struct llog_handle *handle);
+	int (*lop_setup)(const struct lu_env *env, struct obd_device *obd,
+			 struct obd_llog_group *olg, int ctxt_idx,
+			 struct obd_device *disk_obd);
+	int (*lop_sync)(struct llog_ctxt *ctxt, struct obd_export *exp,
+			int flags);
+	int (*lop_cleanup)(const struct lu_env *env, struct llog_ctxt *ctxt);
+	int (*lop_cancel)(const struct lu_env *env, struct llog_ctxt *ctxt,
+			  struct lov_stripe_md *lsm, int count,
+			  struct llog_cookie *cookies, int flags);
+	int (*lop_connect)(struct llog_ctxt *ctxt, struct llog_logid *logid,
+			   struct llog_gen *gen, struct obd_uuid *uuid);
+	/**
+	 * Any llog file must be opened first using llog_open().  Llog can be
+	 * opened by name, logid or without both, in last case the new logid
+	 * will be generated.
+	 */
+	int (*lop_open)(const struct lu_env *env, struct llog_handle *lgh,
+			struct llog_logid *logid, char *name,
+			enum llog_open_param);
+	/**
+	 * Opened llog may not exist and this must be checked where needed using
+	 * the llog_exist() call.
+	 */
+	int (*lop_exist)(struct llog_handle *lgh);
+	/**
+	 * Close llog file and calls llog_free_handle() implicitly.
+	 * Any opened llog must be closed by llog_close() call.
+	 */
+	int (*lop_close)(const struct lu_env *env, struct llog_handle *handle);
+	/**
+	 * Create new llog file. The llog must be opened.
+	 * Must be used only for local llog operations.
+	 */
+	int (*lop_declare_create)(const struct lu_env *env,
+				  struct llog_handle *handle,
+				  struct thandle *th);
+	int (*lop_create)(const struct lu_env *env, struct llog_handle *handle,
+			  struct thandle *th);
+	/**
+	 * write new record in llog. It appends records usually but can edit
+	 * existing records too.
+	 */
+	int (*lop_declare_write_rec)(const struct lu_env *env,
+				     struct llog_handle *lgh,
+				     struct llog_rec_hdr *rec,
+				     int idx, struct thandle *th);
+	int (*lop_write_rec)(const struct lu_env *env,
+			     struct llog_handle *loghandle,
+			     struct llog_rec_hdr *rec,
+			     struct llog_cookie *cookie, int cookiecount,
+			     void *buf, int idx, struct thandle *th);
+	/**
+	 * Add new record in llog catalog. Does the same as llog_write_rec()
+	 * but using llog catalog.
+	 */
+	int (*lop_declare_add)(const struct lu_env *env,
+			       struct llog_handle *lgh,
+			       struct llog_rec_hdr *rec, struct thandle *th);
+	int (*lop_add)(const struct lu_env *env, struct llog_handle *lgh,
+		       struct llog_rec_hdr *rec, struct llog_cookie *cookie,
+		       void *buf, struct thandle *th);
+	/* Old llog_add version, used in MDS-LOV-OSC now and will gone with
+	 * LOD/OSP replacement */
+	int (*lop_obd_add)(const struct lu_env *env, struct llog_ctxt *ctxt,
+			   struct llog_rec_hdr *rec, struct lov_stripe_md *lsm,
+			   struct llog_cookie *logcookies, int numcookies);
+};
+
+/* In-memory descriptor for a log object or log catalog */
+struct llog_handle {
+	struct rw_semaphore	 lgh_lock;
+	spinlock_t		 lgh_hdr_lock; /* protect lgh_hdr data */
+	struct llog_logid	 lgh_id; /* id of this log */
+	struct llog_log_hdr	*lgh_hdr;
+	struct file		*lgh_file;
+	struct dt_object	*lgh_obj;
+	int			 lgh_last_idx;
+	int			 lgh_cur_idx; /* used during llog_process */
+	__u64			 lgh_cur_offset; /* used during llog_process */
+	struct llog_ctxt	*lgh_ctxt;
+	union {
+		struct plain_handle_data	 phd;
+		struct cat_handle_data		 chd;
+	} u;
+	char			*lgh_name;
+	void			*private_data;
+	struct llog_operations	*lgh_logops;
+	atomic_t		 lgh_refcount;
+};
+
+/* llog_lvfs.c */
+extern struct llog_operations llog_lvfs_ops;
+
+/* llog_osd.c */
+extern struct llog_operations llog_osd_ops;
+int llog_osd_get_cat_list(const struct lu_env *env, struct dt_device *d,
+			  int idx, int count,
+			  struct llog_catid *idarray);
+int llog_osd_put_cat_list(const struct lu_env *env, struct dt_device *d,
+			  int idx, int count,
+			  struct llog_catid *idarray);
+
+#define LLOG_CTXT_FLAG_UNINITIALIZED     0x00000001
+#define LLOG_CTXT_FLAG_STOP		 0x00000002
+
+struct llog_ctxt {
+	int		      loc_idx; /* my index the obd array of ctxt's */
+	struct obd_device       *loc_obd; /* points back to the containing obd*/
+	struct obd_llog_group   *loc_olg; /* group containing that ctxt */
+	struct obd_export       *loc_exp; /* parent "disk" export (e.g. MDS) */
+	struct obd_import       *loc_imp; /* to use in RPC's: can be backward
+					     pointing import */
+	struct llog_operations  *loc_logops;
+	struct llog_handle      *loc_handle;
+	struct mutex		 loc_mutex; /* protect loc_imp */
+	atomic_t	     loc_refcount;
+	long		     loc_flags; /* flags, see above defines */
+	struct dt_object	*loc_dir;
+};
+
+#define LLOG_PROC_BREAK 0x0001
+#define LLOG_DEL_RECORD 0x0002
+
+static inline int llog_obd2ops(struct llog_ctxt *ctxt,
+			       struct llog_operations **lop)
+{
+	if (ctxt == NULL)
+		return -ENOTCONN;
+
+	*lop = ctxt->loc_logops;
+	if (*lop == NULL)
+		return -EOPNOTSUPP;
+
+	return 0;
+}
+
+static inline int llog_handle2ops(struct llog_handle *loghandle,
+				  struct llog_operations **lop)
+{
+	if (loghandle == NULL || loghandle->lgh_logops == NULL)
+		return -EINVAL;
+
+	*lop = loghandle->lgh_logops;
+	return 0;
+}
+
+static inline int llog_data_len(int len)
+{
+	return cfs_size_round(len);
+}
+
+static inline struct llog_ctxt *llog_ctxt_get(struct llog_ctxt *ctxt)
+{
+	atomic_inc(&ctxt->loc_refcount);
+	CDEBUG(D_INFO, "GETting ctxt %p : new refcount %d\n", ctxt,
+	       atomic_read(&ctxt->loc_refcount));
+	return ctxt;
+}
+
+static inline void llog_ctxt_put(struct llog_ctxt *ctxt)
+{
+	if (ctxt == NULL)
+		return;
+	LASSERT_ATOMIC_GT_LT(&ctxt->loc_refcount, 0, LI_POISON);
+	CDEBUG(D_INFO, "PUTting ctxt %p : new refcount %d\n", ctxt,
+	       atomic_read(&ctxt->loc_refcount) - 1);
+	__llog_ctxt_put(NULL, ctxt);
+}
+
+static inline void llog_group_init(struct obd_llog_group *olg, int group)
+{
+	init_waitqueue_head(&olg->olg_waitq);
+	spin_lock_init(&olg->olg_lock);
+	mutex_init(&olg->olg_cat_processing);
+	olg->olg_seq = group;
+}
+
+static inline int llog_group_set_ctxt(struct obd_llog_group *olg,
+				      struct llog_ctxt *ctxt, int index)
+{
+	LASSERT(index >= 0 && index < LLOG_MAX_CTXTS);
+
+	spin_lock(&olg->olg_lock);
+	if (olg->olg_ctxts[index] != NULL) {
+		spin_unlock(&olg->olg_lock);
+		return -EEXIST;
+	}
+	olg->olg_ctxts[index] = ctxt;
+	spin_unlock(&olg->olg_lock);
+	return 0;
+}
+
+static inline struct llog_ctxt *llog_group_get_ctxt(struct obd_llog_group *olg,
+						    int index)
+{
+	struct llog_ctxt *ctxt;
+
+	LASSERT(index >= 0 && index < LLOG_MAX_CTXTS);
+
+	spin_lock(&olg->olg_lock);
+	if (olg->olg_ctxts[index] == NULL)
+		ctxt = NULL;
+	else
+		ctxt = llog_ctxt_get(olg->olg_ctxts[index]);
+	spin_unlock(&olg->olg_lock);
+	return ctxt;
+}
+
+static inline void llog_group_clear_ctxt(struct obd_llog_group *olg, int index)
+{
+	LASSERT(index >= 0 && index < LLOG_MAX_CTXTS);
+	spin_lock(&olg->olg_lock);
+	olg->olg_ctxts[index] = NULL;
+	spin_unlock(&olg->olg_lock);
+}
+
+static inline struct llog_ctxt *llog_get_context(struct obd_device *obd,
+						 int index)
+{
+	return llog_group_get_ctxt(&obd->obd_olg, index);
+}
+
+static inline int llog_group_ctxt_null(struct obd_llog_group *olg, int index)
+{
+	return (olg->olg_ctxts[index] == NULL);
+}
+
+static inline int llog_ctxt_null(struct obd_device *obd, int index)
+{
+	return (llog_group_ctxt_null(&obd->obd_olg, index));
+}
+
+static inline int llog_destroy(const struct lu_env *env,
+			       struct llog_handle *handle)
+{
+	struct llog_operations *lop;
+	int rc;
+
+	ENTRY;
+
+	rc = llog_handle2ops(handle, &lop);
+	if (rc)
+		RETURN(rc);
+	if (lop->lop_destroy == NULL)
+		RETURN(-EOPNOTSUPP);
+
+	rc = lop->lop_destroy(env, handle);
+	RETURN(rc);
+}
+
+static inline int llog_next_block(const struct lu_env *env,
+				  struct llog_handle *loghandle, int *cur_idx,
+				  int next_idx, __u64 *cur_offset, void *buf,
+				  int len)
+{
+	struct llog_operations *lop;
+	int rc;
+
+	ENTRY;
+
+	rc = llog_handle2ops(loghandle, &lop);
+	if (rc)
+		RETURN(rc);
+	if (lop->lop_next_block == NULL)
+		RETURN(-EOPNOTSUPP);
+
+	rc = lop->lop_next_block(env, loghandle, cur_idx, next_idx,
+				 cur_offset, buf, len);
+	RETURN(rc);
+}
+
+static inline int llog_prev_block(const struct lu_env *env,
+				  struct llog_handle *loghandle,
+				  int prev_idx, void *buf, int len)
+{
+	struct llog_operations *lop;
+	int rc;
+
+	ENTRY;
+
+	rc = llog_handle2ops(loghandle, &lop);
+	if (rc)
+		RETURN(rc);
+	if (lop->lop_prev_block == NULL)
+		RETURN(-EOPNOTSUPP);
+
+	rc = lop->lop_prev_block(env, loghandle, prev_idx, buf, len);
+	RETURN(rc);
+}
+
+static inline int llog_connect(struct llog_ctxt *ctxt,
+			       struct llog_logid *logid, struct llog_gen *gen,
+			       struct obd_uuid *uuid)
+{
+	struct llog_operations	*lop;
+	int			 rc;
+
+	ENTRY;
+
+	rc = llog_obd2ops(ctxt, &lop);
+	if (rc)
+		RETURN(rc);
+	if (lop->lop_connect == NULL)
+		RETURN(-EOPNOTSUPP);
+
+	rc = lop->lop_connect(ctxt, logid, gen, uuid);
+	RETURN(rc);
+}
+
+/* llog.c */
+int llog_exist(struct llog_handle *loghandle);
+int llog_declare_create(const struct lu_env *env,
+			struct llog_handle *loghandle, struct thandle *th);
+int llog_create(const struct lu_env *env, struct llog_handle *handle,
+		struct thandle *th);
+int llog_declare_write_rec(const struct lu_env *env,
+			   struct llog_handle *handle,
+			   struct llog_rec_hdr *rec, int idx,
+			   struct thandle *th);
+int llog_write_rec(const struct lu_env *env, struct llog_handle *handle,
+		   struct llog_rec_hdr *rec, struct llog_cookie *logcookies,
+		   int numcookies, void *buf, int idx, struct thandle *th);
+int llog_add(const struct lu_env *env, struct llog_handle *lgh,
+	     struct llog_rec_hdr *rec, struct llog_cookie *logcookies,
+	     void *buf, struct thandle *th);
+int llog_declare_add(const struct lu_env *env, struct llog_handle *lgh,
+		     struct llog_rec_hdr *rec, struct thandle *th);
+int lustre_process_log(struct super_block *sb, char *logname,
+		       struct config_llog_instance *cfg);
+int lustre_end_log(struct super_block *sb, char *logname,
+		   struct config_llog_instance *cfg);
+int llog_open_create(const struct lu_env *env, struct llog_ctxt *ctxt,
+		     struct llog_handle **res, struct llog_logid *logid,
+		     char *name);
+int llog_erase(const struct lu_env *env, struct llog_ctxt *ctxt,
+	       struct llog_logid *logid, char *name);
+int llog_write(const struct lu_env *env, struct llog_handle *loghandle,
+	       struct llog_rec_hdr *rec, struct llog_cookie *reccookie,
+	       int cookiecount, void *buf, int idx);
+
+/** @} log */
+
+#endif
diff --git a/drivers/staging/lustre/lustre/include/lustre_mdc.h b/drivers/staging/lustre/lustre/include/lustre_mdc.h
new file mode 100644
index 000000000000..fb1561a809b9
--- /dev/null
+++ b/drivers/staging/lustre/lustre/include/lustre_mdc.h
@@ -0,0 +1,176 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ */
+/*
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lustre/include/lustre_mdc.h
+ *
+ * MDS data structures.
+ * See also lustre_idl.h for wire formats of requests.
+ */
+
+#ifndef _LUSTRE_MDC_H
+#define _LUSTRE_MDC_H
+
+/** \defgroup mdc mdc
+ *
+ * @{
+ */
+
+# include <linux/fs.h>
+# include <linux/dcache.h>
+# ifdef CONFIG_FS_POSIX_ACL
+#  include <linux/posix_acl_xattr.h>
+# endif /* CONFIG_FS_POSIX_ACL */
+# include <linux/lustre_intent.h>
+#include <lustre_handles.h>
+#include <linux/libcfs/libcfs.h>
+#include <obd_class.h>
+#include <lustre/lustre_idl.h>
+#include <lustre_lib.h>
+#include <lustre_dlm.h>
+#include <lustre_export.h>
+
+struct ptlrpc_client;
+struct obd_export;
+struct ptlrpc_request;
+struct obd_device;
+
+struct mdc_rpc_lock {
+	struct mutex		rpcl_mutex;
+	struct lookup_intent	*rpcl_it;
+	int			rpcl_fakes;
+};
+
+#define MDC_FAKE_RPCL_IT ((void *)0x2c0012bfUL)
+
+static inline void mdc_init_rpc_lock(struct mdc_rpc_lock *lck)
+{
+	mutex_init(&lck->rpcl_mutex);
+	lck->rpcl_it = NULL;
+}
+
+static inline void mdc_get_rpc_lock(struct mdc_rpc_lock *lck,
+				    struct lookup_intent *it)
+{
+	ENTRY;
+
+	if (it != NULL && (it->it_op == IT_GETATTR || it->it_op == IT_LOOKUP))
+		return;
+
+	/* This would normally block until the existing request finishes.
+	 * If fail_loc is set it will block until the regular request is
+	 * done, then set rpcl_it to MDC_FAKE_RPCL_IT.  Once that is set
+	 * it will only be cleared when all fake requests are finished.
+	 * Only when all fake requests are finished can normal requests
+	 * be sent, to ensure they are recoverable again. */
+ again:
+	mutex_lock(&lck->rpcl_mutex);
+
+	if (CFS_FAIL_CHECK_QUIET(OBD_FAIL_MDC_RPCS_SEM)) {
+		lck->rpcl_it = MDC_FAKE_RPCL_IT;
+		lck->rpcl_fakes++;
+		mutex_unlock(&lck->rpcl_mutex);
+		return;
+	}
+
+	/* This will only happen when the CFS_FAIL_CHECK() was
+	 * just turned off but there are still requests in progress.
+	 * Wait until they finish.  It doesn't need to be efficient
+	 * in this extremely rare case, just have low overhead in
+	 * the common case when it isn't true. */
+	while (unlikely(lck->rpcl_it == MDC_FAKE_RPCL_IT)) {
+		mutex_unlock(&lck->rpcl_mutex);
+		schedule_timeout(cfs_time_seconds(1) / 4);
+		goto again;
+	}
+
+	LASSERT(lck->rpcl_it == NULL);
+	lck->rpcl_it = it;
+}
+
+static inline void mdc_put_rpc_lock(struct mdc_rpc_lock *lck,
+				    struct lookup_intent *it)
+{
+	if (it != NULL && (it->it_op == IT_GETATTR || it->it_op == IT_LOOKUP))
+		goto out;
+
+	if (lck->rpcl_it == MDC_FAKE_RPCL_IT) { /* OBD_FAIL_MDC_RPCS_SEM */
+		mutex_lock(&lck->rpcl_mutex);
+
+		LASSERTF(lck->rpcl_fakes > 0, "%d\n", lck->rpcl_fakes);
+		lck->rpcl_fakes--;
+
+		if (lck->rpcl_fakes == 0)
+			lck->rpcl_it = NULL;
+
+	} else {
+		LASSERTF(it == lck->rpcl_it, "%p != %p\n", it, lck->rpcl_it);
+		lck->rpcl_it = NULL;
+	}
+
+	mutex_unlock(&lck->rpcl_mutex);
+ out:
+	EXIT;
+}
+
+static inline void mdc_update_max_ea_from_body(struct obd_export *exp,
+					       struct mdt_body *body)
+{
+	if (body->valid & OBD_MD_FLMODEASIZE) {
+		if (exp->exp_obd->u.cli.cl_max_mds_easize < body->max_mdsize)
+			exp->exp_obd->u.cli.cl_max_mds_easize =
+						body->max_mdsize;
+		if (exp->exp_obd->u.cli.cl_max_mds_cookiesize <
+						body->max_cookiesize)
+			exp->exp_obd->u.cli.cl_max_mds_cookiesize =
+						body->max_cookiesize;
+	}
+}
+
+
+struct mdc_cache_waiter {
+	struct list_head	      mcw_entry;
+	wait_queue_head_t	     mcw_waitq;
+};
+
+/* mdc/mdc_locks.c */
+int it_disposition(struct lookup_intent *it, int flag);
+void it_clear_disposition(struct lookup_intent *it, int flag);
+void it_set_disposition(struct lookup_intent *it, int flag);
+int it_open_error(int phase, struct lookup_intent *it);
+
+/** @} mdc */
+
+#endif
diff --git a/drivers/staging/lustre/lustre/include/lustre_mds.h b/drivers/staging/lustre/lustre/include/lustre_mds.h
new file mode 100644
index 000000000000..b386f87471e3
--- /dev/null
+++ b/drivers/staging/lustre/lustre/include/lustre_mds.h
@@ -0,0 +1,81 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lustre/include/lustre_mds.h
+ *
+ * MDS data structures.
+ * See also lustre_idl.h for wire formats of requests.
+ */
+
+#ifndef _LUSTRE_MDS_H
+#define _LUSTRE_MDS_H
+
+/** \defgroup mds mds
+ *
+ * @{
+ */
+
+#include <lustre_handles.h>
+#include <linux/libcfs/libcfs.h>
+#include <lustre/lustre_idl.h>
+#include <lustre_lib.h>
+#include <lustre_dlm.h>
+#include <lustre_export.h>
+
+struct mds_group_info {
+	struct obd_uuid *uuid;
+	int group;
+};
+
+struct mds_capa_info {
+	struct obd_uuid	*uuid;
+	struct lustre_capa_key *capa;
+};
+
+#define MDD_OBD_NAME     "mdd_obd"
+#define MDD_OBD_UUID     "mdd_obd_uuid"
+
+static inline int md_should_create(__u64 flags)
+{
+       return !(flags & MDS_OPEN_DELAY_CREATE ||
+	       !(flags & FMODE_WRITE));
+}
+
+/* these are local flags, used only on the client, private */
+#define M_CHECK_STALE	   0200000000
+
+/** @} mds */
+
+#endif
diff --git a/drivers/staging/lustre/lustre/include/lustre_mdt.h b/drivers/staging/lustre/lustre/include/lustre_mdt.h
new file mode 100644
index 000000000000..dba26a6cfa38
--- /dev/null
+++ b/drivers/staging/lustre/lustre/include/lustre_mdt.h
@@ -0,0 +1,84 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ */
+
+#ifndef __LINUX_MDT_H
+#define __LINUX_MDT_H
+
+/** \defgroup mdt mdt
+ *
+ * @{
+ */
+
+#include <lustre/lustre_idl.h>
+#include <lustre_req_layout.h>
+#include <md_object.h>
+#include <dt_object.h>
+#include <linux/libcfs/libcfs.h>
+
+/*
+ * Common thread info for mdt, seq and fld
+ */
+struct com_thread_info {
+	/*
+	 * for req-layout interface.
+	 */
+	struct req_capsule *cti_pill;
+};
+
+enum {
+	ESERIOUS = 0x0001000
+};
+
+static inline int err_serious(int rc)
+{
+	LASSERT(rc < 0);
+	LASSERT(-rc < ESERIOUS);
+	return -(-rc | ESERIOUS);
+}
+
+static inline int clear_serious(int rc)
+{
+	if (rc < 0)
+		rc = -(-rc & ~ESERIOUS);
+	return rc;
+}
+
+static inline int is_serious(int rc)
+{
+	return (rc < 0 && -rc & ESERIOUS);
+}
+
+/** @} mdt */
+
+#endif
diff --git a/drivers/staging/lustre/lustre/include/lustre_net.h b/drivers/staging/lustre/lustre/include/lustre_net.h
new file mode 100644
index 000000000000..293dd90e5b6c
--- /dev/null
+++ b/drivers/staging/lustre/lustre/include/lustre_net.h
@@ -0,0 +1,3451 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2010, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ */
+/** \defgroup PtlRPC Portal RPC and networking module.
+ *
+ * PortalRPC is the layer used by rest of lustre code to achieve network
+ * communications: establish connections with corresponding export and import
+ * states, listen for a service, send and receive RPCs.
+ * PortalRPC also includes base recovery framework: packet resending and
+ * replaying, reconnections, pinger.
+ *
+ * PortalRPC utilizes LNet as its transport layer.
+ *
+ * @{
+ */
+
+
+#ifndef _LUSTRE_NET_H
+#define _LUSTRE_NET_H
+
+/** \defgroup net net
+ *
+ * @{
+ */
+
+#include <linux/lustre_net.h>
+
+#include <linux/libcfs/libcfs.h>
+// #include <obd.h>
+#include <linux/lnet/lnet.h>
+#include <lustre/lustre_idl.h>
+#include <lustre_ha.h>
+#include <lustre_sec.h>
+#include <lustre_import.h>
+#include <lprocfs_status.h>
+#include <lu_object.h>
+#include <lustre_req_layout.h>
+
+#include <obd_support.h>
+#include <lustre_ver.h>
+
+/* MD flags we _always_ use */
+#define PTLRPC_MD_OPTIONS  0
+
+/**
+ * Max # of bulk operations in one request.
+ * In order for the client and server to properly negotiate the maximum
+ * possible transfer size, PTLRPC_BULK_OPS_COUNT must be a power-of-two
+ * value.  The client is free to limit the actual RPC size for any bulk
+ * transfer via cl_max_pages_per_rpc to some non-power-of-two value. */
+#define PTLRPC_BULK_OPS_BITS	2
+#define PTLRPC_BULK_OPS_COUNT	(1U << PTLRPC_BULK_OPS_BITS)
+/**
+ * PTLRPC_BULK_OPS_MASK is for the convenience of the client only, and
+ * should not be used on the server at all.  Otherwise, it imposes a
+ * protocol limitation on the maximum RPC size that can be used by any
+ * RPC sent to that server in the future.  Instead, the server should
+ * use the negotiated per-client ocd_brw_size to determine the bulk
+ * RPC count. */
+#define PTLRPC_BULK_OPS_MASK	(~((__u64)PTLRPC_BULK_OPS_COUNT - 1))
+
+/**
+ * Define maxima for bulk I/O.
+ *
+ * A single PTLRPC BRW request is sent via up to PTLRPC_BULK_OPS_COUNT
+ * of LNET_MTU sized RDMA transfers.  Clients and servers negotiate the
+ * currently supported maximum between peers at connect via ocd_brw_size.
+ */
+#define PTLRPC_MAX_BRW_BITS	(LNET_MTU_BITS + PTLRPC_BULK_OPS_BITS)
+#define PTLRPC_MAX_BRW_SIZE	(1 << PTLRPC_MAX_BRW_BITS)
+#define PTLRPC_MAX_BRW_PAGES	(PTLRPC_MAX_BRW_SIZE >> PAGE_CACHE_SHIFT)
+
+#define ONE_MB_BRW_SIZE		(1 << LNET_MTU_BITS)
+#define MD_MAX_BRW_SIZE		(1 << LNET_MTU_BITS)
+#define MD_MAX_BRW_PAGES	(MD_MAX_BRW_SIZE >> PAGE_CACHE_SHIFT)
+#define DT_MAX_BRW_SIZE		PTLRPC_MAX_BRW_SIZE
+#define DT_MAX_BRW_PAGES	(DT_MAX_BRW_SIZE >> PAGE_CACHE_SHIFT)
+#define OFD_MAX_BRW_SIZE	(1 << LNET_MTU_BITS)
+
+/* When PAGE_SIZE is a constant, we can check our arithmetic here with cpp! */
+# if ((PTLRPC_MAX_BRW_PAGES & (PTLRPC_MAX_BRW_PAGES - 1)) != 0)
+#  error "PTLRPC_MAX_BRW_PAGES isn't a power of two"
+# endif
+# if (PTLRPC_MAX_BRW_SIZE != (PTLRPC_MAX_BRW_PAGES * PAGE_CACHE_SIZE))
+#  error "PTLRPC_MAX_BRW_SIZE isn't PTLRPC_MAX_BRW_PAGES * PAGE_CACHE_SIZE"
+# endif
+# if (PTLRPC_MAX_BRW_SIZE > LNET_MTU * PTLRPC_BULK_OPS_COUNT)
+#  error "PTLRPC_MAX_BRW_SIZE too big"
+# endif
+# if (PTLRPC_MAX_BRW_PAGES > LNET_MAX_IOV * PTLRPC_BULK_OPS_COUNT)
+#  error "PTLRPC_MAX_BRW_PAGES too big"
+# endif
+
+#define PTLRPC_NTHRS_INIT	2
+
+/**
+ * Buffer Constants
+ *
+ * Constants determine how memory is used to buffer incoming service requests.
+ *
+ * ?_NBUFS	      # buffers to allocate when growing the pool
+ * ?_BUFSIZE	    # bytes in a single request buffer
+ * ?_MAXREQSIZE	 # maximum request service will receive
+ *
+ * When fewer than ?_NBUFS/2 buffers are posted for receive, another chunk
+ * of ?_NBUFS is added to the pool.
+ *
+ * Messages larger than ?_MAXREQSIZE are dropped.  Request buffers are
+ * considered full when less than ?_MAXREQSIZE is left in them.
+ */
+/**
+ * Thread Constants
+ *
+ * Constants determine how threads are created for ptlrpc service.
+ *
+ * ?_NTHRS_INIT		# threads to create for each service partition on
+ *			  initializing. If it's non-affinity service and
+ *			  there is only one partition, it's the overall #
+ *			  threads for the service while initializing.
+ * ?_NTHRS_BASE		# threads should be created at least for each
+ *			  ptlrpc partition to keep the service healthy.
+ *			  It's the low-water mark of threads upper-limit
+ *			  for each partition.
+ * ?_THR_FACTOR	 # threads can be added on threads upper-limit for
+ *			  each CPU core. This factor is only for reference,
+ *			  we might decrease value of factor if number of cores
+ *			  per CPT is above a limit.
+ * ?_NTHRS_MAX		# overall threads can be created for a service,
+ *			  it's a soft limit because if service is running
+ *			  on machine with hundreds of cores and tens of
+ *			  CPU partitions, we need to guarantee each partition
+ *			  has ?_NTHRS_BASE threads, which means total threads
+ *			  will be ?_NTHRS_BASE * number_of_cpts which can
+ *			  exceed ?_NTHRS_MAX.
+ *
+ * Examples
+ *
+ * #define MDS_NTHRS_INIT	2
+ * #define MDS_NTHRS_BASE	64
+ * #define MDS_NTHRS_FACTOR	8
+ * #define MDS_NTHRS_MAX	1024
+ *
+ * Example 1):
+ * ---------------------------------------------------------------------
+ * Server(A) has 16 cores, user configured it to 4 partitions so each
+ * partition has 4 cores, then actual number of service threads on each
+ * partition is:
+ *     MDS_NTHRS_BASE(64) + cores(4) * MDS_NTHRS_FACTOR(8) = 96
+ *
+ * Total number of threads for the service is:
+ *     96 * partitions(4) = 384
+ *
+ * Example 2):
+ * ---------------------------------------------------------------------
+ * Server(B) has 32 cores, user configured it to 4 partitions so each
+ * partition has 8 cores, then actual number of service threads on each
+ * partition is:
+ *     MDS_NTHRS_BASE(64) + cores(8) * MDS_NTHRS_FACTOR(8) = 128
+ *
+ * Total number of threads for the service is:
+ *     128 * partitions(4) = 512
+ *
+ * Example 3):
+ * ---------------------------------------------------------------------
+ * Server(B) has 96 cores, user configured it to 8 partitions so each
+ * partition has 12 cores, then actual number of service threads on each
+ * partition is:
+ *     MDS_NTHRS_BASE(64) + cores(12) * MDS_NTHRS_FACTOR(8) = 160
+ *
+ * Total number of threads for the service is:
+ *     160 * partitions(8) = 1280
+ *
+ * However, it's above the soft limit MDS_NTHRS_MAX, so we choose this number
+ * as upper limit of threads number for each partition:
+ *     MDS_NTHRS_MAX(1024) / partitions(8) = 128
+ *
+ * Example 4):
+ * ---------------------------------------------------------------------
+ * Server(C) have a thousand of cores and user configured it to 32 partitions
+ *     MDS_NTHRS_BASE(64) * 32 = 2048
+ *
+ * which is already above soft limit MDS_NTHRS_MAX(1024), but we still need
+ * to guarantee that each partition has at least MDS_NTHRS_BASE(64) threads
+ * to keep service healthy, so total number of threads will just be 2048.
+ *
+ * NB: we don't suggest to choose server with that many cores because backend
+ *     filesystem itself, buffer cache, or underlying network stack might
+ *     have some SMP scalability issues at that large scale.
+ *
+ *     If user already has a fat machine with hundreds or thousands of cores,
+ *     there are two choices for configuration:
+ *     a) create CPU table from subset of all CPUs and run Lustre on
+ *	top of this subset
+ *     b) bind service threads on a few partitions, see modparameters of
+ *	MDS and OSS for details
+*
+ * NB: these calculations (and examples below) are simplified to help
+ *     understanding, the real implementation is a little more complex,
+ *     please see ptlrpc_server_nthreads_check() for details.
+ *
+ */
+
+ /*
+  * LDLM threads constants:
+  *
+  * Given 8 as factor and 24 as base threads number
+  *
+  * example 1)
+  * On 4-core machine we will have 24 + 8 * 4 = 56 threads.
+  *
+  * example 2)
+  * On 8-core machine with 2 partitions we will have 24 + 4 * 8 = 56
+  * threads for each partition and total threads number will be 112.
+  *
+  * example 3)
+  * On 64-core machine with 8 partitions we will need LDLM_NTHRS_BASE(24)
+  * threads for each partition to keep service healthy, so total threads
+  * number should be 24 * 8 = 192.
+  *
+  * So with these constants, threads number will be at the similar level
+  * of old versions, unless target machine has over a hundred cores
+  */
+#define LDLM_THR_FACTOR		8
+#define LDLM_NTHRS_INIT		PTLRPC_NTHRS_INIT
+#define LDLM_NTHRS_BASE		24
+#define LDLM_NTHRS_MAX		(num_online_cpus() == 1 ? 64 : 128)
+
+#define LDLM_BL_THREADS   LDLM_NTHRS_AUTO_INIT
+#define LDLM_CLIENT_NBUFS 1
+#define LDLM_SERVER_NBUFS 64
+#define LDLM_BUFSIZE      (8 * 1024)
+#define LDLM_MAXREQSIZE   (5 * 1024)
+#define LDLM_MAXREPSIZE   (1024)
+
+ /*
+  * MDS threads constants:
+  *
+  * Please see examples in "Thread Constants", MDS threads number will be at
+  * the comparable level of old versions, unless the server has many cores.
+  */
+#ifndef MDS_MAX_THREADS
+#define MDS_MAX_THREADS		1024
+#define MDS_MAX_OTHR_THREADS	256
+
+#else /* MDS_MAX_THREADS */
+#if MDS_MAX_THREADS < PTLRPC_NTHRS_INIT
+#undef MDS_MAX_THREADS
+#define MDS_MAX_THREADS	PTLRPC_NTHRS_INIT
+#endif
+#define MDS_MAX_OTHR_THREADS	max(PTLRPC_NTHRS_INIT, MDS_MAX_THREADS / 2)
+#endif
+
+/* default service */
+#define MDS_THR_FACTOR		8
+#define MDS_NTHRS_INIT		PTLRPC_NTHRS_INIT
+#define MDS_NTHRS_MAX		MDS_MAX_THREADS
+#define MDS_NTHRS_BASE		min(64, MDS_NTHRS_MAX)
+
+/* read-page service */
+#define MDS_RDPG_THR_FACTOR	4
+#define MDS_RDPG_NTHRS_INIT	PTLRPC_NTHRS_INIT
+#define MDS_RDPG_NTHRS_MAX	MDS_MAX_OTHR_THREADS
+#define MDS_RDPG_NTHRS_BASE	min(48, MDS_RDPG_NTHRS_MAX)
+
+/* these should be removed when we remove setattr service in the future */
+#define MDS_SETA_THR_FACTOR	4
+#define MDS_SETA_NTHRS_INIT	PTLRPC_NTHRS_INIT
+#define MDS_SETA_NTHRS_MAX	MDS_MAX_OTHR_THREADS
+#define MDS_SETA_NTHRS_BASE	min(48, MDS_SETA_NTHRS_MAX)
+
+/* non-affinity threads */
+#define MDS_OTHR_NTHRS_INIT	PTLRPC_NTHRS_INIT
+#define MDS_OTHR_NTHRS_MAX	MDS_MAX_OTHR_THREADS
+
+#define MDS_NBUFS		64
+
+/**
+ * Assume file name length = FNAME_MAX = 256 (true for ext3).
+ *	  path name length = PATH_MAX = 4096
+ *	  LOV MD size max  = EA_MAX = 24 * 2000
+ *		(NB: 24 is size of lov_ost_data)
+ *	  LOV LOGCOOKIE size max = 32 * 2000
+ *		(NB: 32 is size of llog_cookie)
+ * symlink:  FNAME_MAX + PATH_MAX  <- largest
+ * link:     FNAME_MAX + PATH_MAX  (mds_rec_link < mds_rec_create)
+ * rename:   FNAME_MAX + FNAME_MAX
+ * open:     FNAME_MAX + EA_MAX
+ *
+ * MDS_MAXREQSIZE ~= 4736 bytes =
+ * lustre_msg + ldlm_request + mdt_body + mds_rec_create + FNAME_MAX + PATH_MAX
+ * MDS_MAXREPSIZE ~= 8300 bytes = lustre_msg + llog_header
+ *
+ * Realistic size is about 512 bytes (20 character name + 128 char symlink),
+ * except in the open case where there are a large number of OSTs in a LOV.
+ */
+#define MDS_MAXREQSIZE		(5 * 1024)	/* >= 4736 */
+#define MDS_MAXREPSIZE		(9 * 1024)	/* >= 8300 */
+
+/**
+ * MDS incoming request with LOV EA
+ * 24 = sizeof(struct lov_ost_data), i.e: replay of opencreate
+ */
+#define MDS_LOV_MAXREQSIZE	max(MDS_MAXREQSIZE, \
+				    362 + LOV_MAX_STRIPE_COUNT * 24)
+/**
+ * MDS outgoing reply with LOV EA
+ *
+ * NB: max reply size Lustre 2.4+ client can get from old MDS is:
+ * LOV_MAX_STRIPE_COUNT * (llog_cookie + lov_ost_data) + extra bytes
+ *
+ * but 2.4 or later MDS will never send reply with llog_cookie to any
+ * version client. This macro is defined for server side reply buffer size.
+ */
+#define MDS_LOV_MAXREPSIZE	MDS_LOV_MAXREQSIZE
+
+/**
+ * This is the size of a maximum REINT_SETXATTR request:
+ *
+ *   lustre_msg		 56 (32 + 4 x 5 + 4)
+ *   ptlrpc_body	184
+ *   mdt_rec_setxattr	136
+ *   lustre_capa	120
+ *   name		256 (XATTR_NAME_MAX)
+ *   value	      65536 (XATTR_SIZE_MAX)
+ */
+#define MDS_EA_MAXREQSIZE	66288
+
+/**
+ * These are the maximum request and reply sizes (rounded up to 1 KB
+ * boundaries) for the "regular" MDS_REQUEST_PORTAL and MDS_REPLY_PORTAL.
+ */
+#define MDS_REG_MAXREQSIZE	(((max(MDS_EA_MAXREQSIZE, \
+				       MDS_LOV_MAXREQSIZE) + 1023) >> 10) << 10)
+#define MDS_REG_MAXREPSIZE	MDS_REG_MAXREQSIZE
+
+/**
+ * The update request includes all of updates from the create, which might
+ * include linkea (4K maxim), together with other updates, we set it to 9K:
+ * lustre_msg + ptlrpc_body + UPDATE_BUF_SIZE (8K)
+ */
+#define MDS_OUT_MAXREQSIZE	(9 * 1024)
+#define MDS_OUT_MAXREPSIZE	MDS_MAXREPSIZE
+
+/** MDS_BUFSIZE = max_reqsize (w/o LOV EA) + max sptlrpc payload size */
+#define MDS_BUFSIZE		max(MDS_MAXREQSIZE + SPTLRPC_MAX_PAYLOAD, \
+				    8 * 1024)
+
+/**
+ * MDS_REG_BUFSIZE should at least be MDS_REG_MAXREQSIZE + SPTLRPC_MAX_PAYLOAD.
+ * However, we need to allocate a much larger buffer for it because LNet
+ * requires each MD(rqbd) has at least MDS_REQ_MAXREQSIZE bytes left to avoid
+ * dropping of maximum-sized incoming request.  So if MDS_REG_BUFSIZE is only a
+ * little larger than MDS_REG_MAXREQSIZE, then it can only fit in one request
+ * even there are about MDS_REG_MAX_REQSIZE bytes left in a rqbd, and memory
+ * utilization is very low.
+ *
+ * In the meanwhile, size of rqbd can't be too large, because rqbd can't be
+ * reused until all requests fit in it have been processed and released,
+ * which means one long blocked request can prevent the rqbd be reused.
+ * Now we set request buffer size to 160 KB, so even each rqbd is unlinked
+ * from LNet with unused 65 KB, buffer utilization will be about 59%.
+ * Please check LU-2432 for details.
+ */
+#define MDS_REG_BUFSIZE		max(MDS_REG_MAXREQSIZE + SPTLRPC_MAX_PAYLOAD, \
+				    160 * 1024)
+
+/**
+ * MDS_OUT_BUFSIZE = max_out_reqsize + max sptlrpc payload (~1K) which is
+ * about 10K, for the same reason as MDS_REG_BUFSIZE, we also give some
+ * extra bytes to each request buffer to improve buffer utilization rate.
+  */
+#define MDS_OUT_BUFSIZE		max(MDS_OUT_MAXREQSIZE + SPTLRPC_MAX_PAYLOAD, \
+				    24 * 1024)
+
+/** FLD_MAXREQSIZE == lustre_msg + __u32 padding + ptlrpc_body + opc */
+#define FLD_MAXREQSIZE  (160)
+
+/** FLD_MAXREPSIZE == lustre_msg + ptlrpc_body */
+#define FLD_MAXREPSIZE  (152)
+#define FLD_BUFSIZE	(1 << 12)
+
+/**
+ * SEQ_MAXREQSIZE == lustre_msg + __u32 padding + ptlrpc_body + opc + lu_range +
+ * __u32 padding */
+#define SEQ_MAXREQSIZE  (160)
+
+/** SEQ_MAXREPSIZE == lustre_msg + ptlrpc_body + lu_range */
+#define SEQ_MAXREPSIZE  (152)
+#define SEQ_BUFSIZE	(1 << 12)
+
+/** MGS threads must be >= 3, see bug 22458 comment #28 */
+#define MGS_NTHRS_INIT	(PTLRPC_NTHRS_INIT + 1)
+#define MGS_NTHRS_MAX	32
+
+#define MGS_NBUFS       64
+#define MGS_BUFSIZE     (8 * 1024)
+#define MGS_MAXREQSIZE  (7 * 1024)
+#define MGS_MAXREPSIZE  (9 * 1024)
+
+ /*
+  * OSS threads constants:
+  *
+  * Given 8 as factor and 64 as base threads number
+  *
+  * example 1):
+  * On 8-core server configured to 2 partitions, we will have
+  * 64 + 8 * 4 = 96 threads for each partition, 192 total threads.
+  *
+  * example 2):
+  * On 32-core machine configured to 4 partitions, we will have
+  * 64 + 8 * 8 = 112 threads for each partition, so total threads number
+  * will be 112 * 4 = 448.
+  *
+  * example 3):
+  * On 64-core machine configured to 4 partitions, we will have
+  * 64 + 16 * 8 = 192 threads for each partition, so total threads number
+  * will be 192 * 4 = 768 which is above limit OSS_NTHRS_MAX(512), so we
+  * cut off the value to OSS_NTHRS_MAX(512) / 4 which is 128 threads
+  * for each partition.
+  *
+  * So we can see that with these constants, threads number wil be at the
+  * similar level of old versions, unless the server has many cores.
+  */
+ /* depress threads factor for VM with small memory size */
+#define OSS_THR_FACTOR		min_t(int, 8, \
+				NUM_CACHEPAGES >> (28 - PAGE_CACHE_SHIFT))
+#define OSS_NTHRS_INIT		(PTLRPC_NTHRS_INIT + 1)
+#define OSS_NTHRS_BASE		64
+#define OSS_NTHRS_MAX		512
+
+/* threads for handling "create" request */
+#define OSS_CR_THR_FACTOR	1
+#define OSS_CR_NTHRS_INIT	PTLRPC_NTHRS_INIT
+#define OSS_CR_NTHRS_BASE	8
+#define OSS_CR_NTHRS_MAX	64
+
+/**
+ * OST_IO_MAXREQSIZE ~=
+ *	lustre_msg + ptlrpc_body + obdo + obd_ioobj +
+ *	DT_MAX_BRW_PAGES * niobuf_remote
+ *
+ * - single object with 16 pages is 512 bytes
+ * - OST_IO_MAXREQSIZE must be at least 1 page of cookies plus some spillover
+ * - Must be a multiple of 1024
+ * - actual size is about 18K
+ */
+#define _OST_MAXREQSIZE_SUM (sizeof(struct lustre_msg) + \
+			     sizeof(struct ptlrpc_body) + \
+			     sizeof(struct obdo) + \
+			     sizeof(struct obd_ioobj) + \
+			     sizeof(struct niobuf_remote) * DT_MAX_BRW_PAGES)
+/**
+ * FIEMAP request can be 4K+ for now
+ */
+#define OST_MAXREQSIZE		(5 * 1024)
+#define OST_IO_MAXREQSIZE	max_t(int, OST_MAXREQSIZE, \
+				(((_OST_MAXREQSIZE_SUM - 1) | (1024 - 1)) + 1))
+
+#define OST_MAXREPSIZE		(9 * 1024)
+#define OST_IO_MAXREPSIZE	OST_MAXREPSIZE
+
+#define OST_NBUFS		64
+/** OST_BUFSIZE = max_reqsize + max sptlrpc payload size */
+#define OST_BUFSIZE		max_t(int, OST_MAXREQSIZE + 1024, 16 * 1024)
+/**
+ * OST_IO_MAXREQSIZE is 18K, giving extra 46K can increase buffer utilization
+ * rate of request buffer, please check comment of MDS_LOV_BUFSIZE for details.
+ */
+#define OST_IO_BUFSIZE		max_t(int, OST_IO_MAXREQSIZE + 1024, 64 * 1024)
+
+/* Macro to hide a typecast. */
+#define ptlrpc_req_async_args(req) ((void *)&req->rq_async_args)
+
+/**
+ * Structure to single define portal connection.
+ */
+struct ptlrpc_connection {
+	/** linkage for connections hash table */
+	struct hlist_node	c_hash;
+	/** Our own lnet nid for this connection */
+	lnet_nid_t	      c_self;
+	/** Remote side nid for this connection */
+	lnet_process_id_t       c_peer;
+	/** UUID of the other side */
+	struct obd_uuid	 c_remote_uuid;
+	/** reference counter for this connection */
+	atomic_t	    c_refcount;
+};
+
+/** Client definition for PortalRPC */
+struct ptlrpc_client {
+	/** What lnet portal does this client send messages to by default */
+	__u32		   cli_request_portal;
+	/** What portal do we expect replies on */
+	__u32		   cli_reply_portal;
+	/** Name of the client */
+	char		   *cli_name;
+};
+
+/** state flags of requests */
+/* XXX only ones left are those used by the bulk descs as well! */
+#define PTL_RPC_FL_INTR      (1 << 0)  /* reply wait was interrupted by user */
+#define PTL_RPC_FL_TIMEOUT   (1 << 7)  /* request timed out waiting for reply */
+
+#define REQ_MAX_ACK_LOCKS 8
+
+union ptlrpc_async_args {
+	/**
+	 * Scratchpad for passing args to completion interpreter. Users
+	 * cast to the struct of their choosing, and CLASSERT that this is
+	 * big enough.  For _tons_ of context, OBD_ALLOC a struct and store
+	 * a pointer to it here.  The pointer_arg ensures this struct is at
+	 * least big enough for that.
+	 */
+	void      *pointer_arg[11];
+	__u64      space[7];
+};
+
+struct ptlrpc_request_set;
+typedef int (*set_interpreter_func)(struct ptlrpc_request_set *, void *, int);
+typedef int (*set_producer_func)(struct ptlrpc_request_set *, void *);
+
+/**
+ * Definition of request set structure.
+ * Request set is a list of requests (not necessary to the same target) that
+ * once populated with RPCs could be sent in parallel.
+ * There are two kinds of request sets. General purpose and with dedicated
+ * serving thread. Example of the latter is ptlrpcd set.
+ * For general purpose sets once request set started sending it is impossible
+ * to add new requests to such set.
+ * Provides a way to call "completion callbacks" when all requests in the set
+ * returned.
+ */
+struct ptlrpc_request_set {
+	atomic_t	  set_refcount;
+	/** number of in queue requests */
+	atomic_t	  set_new_count;
+	/** number of uncompleted requests */
+	atomic_t	  set_remaining;
+	/** wait queue to wait on for request events */
+	wait_queue_head_t	   set_waitq;
+	wait_queue_head_t	  *set_wakeup_ptr;
+	/** List of requests in the set */
+	struct list_head	    set_requests;
+	/**
+	 * List of completion callbacks to be called when the set is completed
+	 * This is only used if \a set_interpret is NULL.
+	 * Links struct ptlrpc_set_cbdata.
+	 */
+	struct list_head	    set_cblist;
+	/** Completion callback, if only one. */
+	set_interpreter_func  set_interpret;
+	/** opaq argument passed to completion \a set_interpret callback. */
+	void		 *set_arg;
+	/**
+	 * Lock for \a set_new_requests manipulations
+	 * locked so that any old caller can communicate requests to
+	 * the set holder who can then fold them into the lock-free set
+	 */
+	spinlock_t		set_new_req_lock;
+	/** List of new yet unsent requests. Only used with ptlrpcd now. */
+	struct list_head	    set_new_requests;
+
+	/** rq_status of requests that have been freed already */
+	int		   set_rc;
+	/** Additional fields used by the flow control extension */
+	/** Maximum number of RPCs in flight */
+	int		   set_max_inflight;
+	/** Callback function used to generate RPCs */
+	set_producer_func     set_producer;
+	/** opaq argument passed to the producer callback */
+	void		 *set_producer_arg;
+};
+
+/**
+ * Description of a single ptrlrpc_set callback
+ */
+struct ptlrpc_set_cbdata {
+	/** List linkage item */
+	struct list_head	      psc_item;
+	/** Pointer to interpreting function */
+	set_interpreter_func    psc_interpret;
+	/** Opaq argument to pass to the callback */
+	void		   *psc_data;
+};
+
+struct ptlrpc_bulk_desc;
+struct ptlrpc_service_part;
+struct ptlrpc_service;
+
+/**
+ * ptlrpc callback & work item stuff
+ */
+struct ptlrpc_cb_id {
+	void   (*cbid_fn)(lnet_event_t *ev);     /* specific callback fn */
+	void    *cbid_arg;		      /* additional arg */
+};
+
+/** Maximum number of locks to fit into reply state */
+#define RS_MAX_LOCKS 8
+#define RS_DEBUG     0
+
+/**
+ * Structure to define reply state on the server
+ * Reply state holds various reply message information. Also for "difficult"
+ * replies (rep-ack case) we store the state after sending reply and wait
+ * for the client to acknowledge the reception. In these cases locks could be
+ * added to the state for replay/failover consistency guarantees.
+ */
+struct ptlrpc_reply_state {
+	/** Callback description */
+	struct ptlrpc_cb_id    rs_cb_id;
+	/** Linkage for list of all reply states in a system */
+	struct list_head	     rs_list;
+	/** Linkage for list of all reply states on same export */
+	struct list_head	     rs_exp_list;
+	/** Linkage for list of all reply states for same obd */
+	struct list_head	     rs_obd_list;
+#if RS_DEBUG
+	struct list_head	     rs_debug_list;
+#endif
+	/** A spinlock to protect the reply state flags */
+	spinlock_t		rs_lock;
+	/** Reply state flags */
+	unsigned long	  rs_difficult:1;     /* ACK/commit stuff */
+	unsigned long	  rs_no_ack:1;    /* no ACK, even for
+						  difficult requests */
+	unsigned long	  rs_scheduled:1;     /* being handled? */
+	unsigned long	  rs_scheduled_ever:1;/* any schedule attempts? */
+	unsigned long	  rs_handled:1;  /* been handled yet? */
+	unsigned long	  rs_on_net:1;   /* reply_out_callback pending? */
+	unsigned long	  rs_prealloc:1; /* rs from prealloc list */
+	unsigned long	  rs_committed:1;/* the transaction was committed
+						 and the rs was dispatched
+						 by ptlrpc_commit_replies */
+	/** Size of the state */
+	int		    rs_size;
+	/** opcode */
+	__u32		  rs_opc;
+	/** Transaction number */
+	__u64		  rs_transno;
+	/** xid */
+	__u64		  rs_xid;
+	struct obd_export     *rs_export;
+	struct ptlrpc_service_part *rs_svcpt;
+	/** Lnet metadata handle for the reply */
+	lnet_handle_md_t       rs_md_h;
+	atomic_t	   rs_refcount;
+
+	/** Context for the sevice thread */
+	struct ptlrpc_svc_ctx *rs_svc_ctx;
+	/** Reply buffer (actually sent to the client), encoded if needed */
+	struct lustre_msg     *rs_repbuf;       /* wrapper */
+	/** Size of the reply buffer */
+	int		    rs_repbuf_len;   /* wrapper buf length */
+	/** Size of the reply message */
+	int		    rs_repdata_len;  /* wrapper msg length */
+	/**
+	 * Actual reply message. Its content is encrupted (if needed) to
+	 * produce reply buffer for actual sending. In simple case
+	 * of no network encryption we jus set \a rs_repbuf to \a rs_msg
+	 */
+	struct lustre_msg     *rs_msg;	  /* reply message */
+
+	/** Number of locks awaiting client ACK */
+	int		    rs_nlocks;
+	/** Handles of locks awaiting client reply ACK */
+	struct lustre_handle   rs_locks[RS_MAX_LOCKS];
+	/** Lock modes of locks in \a rs_locks */
+	ldlm_mode_t	    rs_modes[RS_MAX_LOCKS];
+};
+
+struct ptlrpc_thread;
+
+/** RPC stages */
+enum rq_phase {
+	RQ_PHASE_NEW	    = 0xebc0de00,
+	RQ_PHASE_RPC	    = 0xebc0de01,
+	RQ_PHASE_BULK	   = 0xebc0de02,
+	RQ_PHASE_INTERPRET      = 0xebc0de03,
+	RQ_PHASE_COMPLETE       = 0xebc0de04,
+	RQ_PHASE_UNREGISTERING  = 0xebc0de05,
+	RQ_PHASE_UNDEFINED      = 0xebc0de06
+};
+
+/** Type of request interpreter call-back */
+typedef int (*ptlrpc_interpterer_t)(const struct lu_env *env,
+				    struct ptlrpc_request *req,
+				    void *arg, int rc);
+
+/**
+ * Definition of request pool structure.
+ * The pool is used to store empty preallocated requests for the case
+ * when we would actually need to send something without performing
+ * any allocations (to avoid e.g. OOM).
+ */
+struct ptlrpc_request_pool {
+	/** Locks the list */
+	spinlock_t prp_lock;
+	/** list of ptlrpc_request structs */
+	struct list_head prp_req_list;
+	/** Maximum message size that would fit into a rquest from this pool */
+	int prp_rq_size;
+	/** Function to allocate more requests for this pool */
+	void (*prp_populate)(struct ptlrpc_request_pool *, int);
+};
+
+struct lu_context;
+struct lu_env;
+
+struct ldlm_lock;
+
+/**
+ * \defgroup nrs Network Request Scheduler
+ * @{
+ */
+struct ptlrpc_nrs_policy;
+struct ptlrpc_nrs_resource;
+struct ptlrpc_nrs_request;
+
+/**
+ * NRS control operations.
+ *
+ * These are common for all policies.
+ */
+enum ptlrpc_nrs_ctl {
+	/**
+	 * Not a valid opcode.
+	 */
+	PTLRPC_NRS_CTL_INVALID,
+	/**
+	 * Activate the policy.
+	 */
+	PTLRPC_NRS_CTL_START,
+	/**
+	 * Reserved for multiple primary policies, which may be a possibility
+	 * in the future.
+	 */
+	PTLRPC_NRS_CTL_STOP,
+	/**
+	 * Policies can start using opcodes from this value and onwards for
+	 * their own purposes; the assigned value itself is arbitrary.
+	 */
+	PTLRPC_NRS_CTL_1ST_POL_SPEC = 0x20,
+};
+
+/**
+ * ORR policy operations
+ */
+enum nrs_ctl_orr {
+	NRS_CTL_ORR_RD_QUANTUM = PTLRPC_NRS_CTL_1ST_POL_SPEC,
+	NRS_CTL_ORR_WR_QUANTUM,
+	NRS_CTL_ORR_RD_OFF_TYPE,
+	NRS_CTL_ORR_WR_OFF_TYPE,
+	NRS_CTL_ORR_RD_SUPP_REQ,
+	NRS_CTL_ORR_WR_SUPP_REQ,
+};
+
+/**
+ * NRS policy operations.
+ *
+ * These determine the behaviour of a policy, and are called in response to
+ * NRS core events.
+ */
+struct ptlrpc_nrs_pol_ops {
+	/**
+	 * Called during policy registration; this operation is optional.
+	 *
+	 * \param[in,out] policy The policy being initialized
+	 */
+	int	(*op_policy_init) (struct ptlrpc_nrs_policy *policy);
+	/**
+	 * Called during policy unregistration; this operation is optional.
+	 *
+	 * \param[in,out] policy The policy being unregistered/finalized
+	 */
+	void	(*op_policy_fini) (struct ptlrpc_nrs_policy *policy);
+	/**
+	 * Called when activating a policy via lprocfs; policies allocate and
+	 * initialize their resources here; this operation is optional.
+	 *
+	 * \param[in,out] policy The policy being started
+	 *
+	 * \see nrs_policy_start_locked()
+	 */
+	int	(*op_policy_start) (struct ptlrpc_nrs_policy *policy);
+	/**
+	 * Called when deactivating a policy via lprocfs; policies deallocate
+	 * their resources here; this operation is optional
+	 *
+	 * \param[in,out] policy The policy being stopped
+	 *
+	 * \see nrs_policy_stop0()
+	 */
+	void	(*op_policy_stop) (struct ptlrpc_nrs_policy *policy);
+	/**
+	 * Used for policy-specific operations; i.e. not generic ones like
+	 * \e PTLRPC_NRS_CTL_START and \e PTLRPC_NRS_CTL_GET_INFO; analogous
+	 * to an ioctl; this operation is optional.
+	 *
+	 * \param[in,out]	 policy The policy carrying out operation \a opc
+	 * \param[in]	  opc	 The command operation being carried out
+	 * \param[in,out] arg	 An generic buffer for communication between the
+	 *			 user and the control operation
+	 *
+	 * \retval -ve error
+	 * \retval   0 success
+	 *
+	 * \see ptlrpc_nrs_policy_control()
+	 */
+	int	(*op_policy_ctl) (struct ptlrpc_nrs_policy *policy,
+				  enum ptlrpc_nrs_ctl opc, void *arg);
+
+	/**
+	 * Called when obtaining references to the resources of the resource
+	 * hierarchy for a request that has arrived for handling at the PTLRPC
+	 * service. Policies should return -ve for requests they do not wish
+	 * to handle. This operation is mandatory.
+	 *
+	 * \param[in,out] policy  The policy we're getting resources for.
+	 * \param[in,out] nrq	  The request we are getting resources for.
+	 * \param[in]	  parent  The parent resource of the resource being
+	 *			  requested; set to NULL if none.
+	 * \param[out]	  resp	  The resource is to be returned here; the
+	 *			  fallback policy in an NRS head should
+	 *			  \e always return a non-NULL pointer value.
+	 * \param[in]  moving_req When set, signifies that this is an attempt
+	 *			  to obtain resources for a request being moved
+	 *			  to the high-priority NRS head by
+	 *			  ldlm_lock_reorder_req().
+	 *			  This implies two things:
+	 *			  1. We are under obd_export::exp_rpc_lock and
+	 *			  so should not sleep.
+	 *			  2. We should not perform non-idempotent or can
+	 *			  skip performing idempotent operations that
+	 *			  were carried out when resources were first
+	 *			  taken for the request when it was initialized
+	 *			  in ptlrpc_nrs_req_initialize().
+	 *
+	 * \retval 0, +ve The level of the returned resource in the resource
+	 *		  hierarchy; currently only 0 (for a non-leaf resource)
+	 *		  and 1 (for a leaf resource) are supported by the
+	 *		  framework.
+	 * \retval -ve	  error
+	 *
+	 * \see ptlrpc_nrs_req_initialize()
+	 * \see ptlrpc_nrs_hpreq_add_nolock()
+	 * \see ptlrpc_nrs_req_hp_move()
+	 */
+	int	(*op_res_get) (struct ptlrpc_nrs_policy *policy,
+			       struct ptlrpc_nrs_request *nrq,
+			       const struct ptlrpc_nrs_resource *parent,
+			       struct ptlrpc_nrs_resource **resp,
+			       bool moving_req);
+	/**
+	 * Called when releasing references taken for resources in the resource
+	 * hierarchy for the request; this operation is optional.
+	 *
+	 * \param[in,out] policy The policy the resource belongs to
+	 * \param[in] res	 The resource to be freed
+	 *
+	 * \see ptlrpc_nrs_req_finalize()
+	 * \see ptlrpc_nrs_hpreq_add_nolock()
+	 * \see ptlrpc_nrs_req_hp_move()
+	 */
+	void	(*op_res_put) (struct ptlrpc_nrs_policy *policy,
+			       const struct ptlrpc_nrs_resource *res);
+
+	/**
+	 * Obtains a request for handling from the policy, and optionally
+	 * removes the request from the policy; this operation is mandatory.
+	 *
+	 * \param[in,out] policy The policy to poll
+	 * \param[in]	  peek	 When set, signifies that we just want to
+	 *			 examine the request, and not handle it, so the
+	 *			 request is not removed from the policy.
+	 * \param[in]	  force	 When set, it will force a policy to return a
+	 *			 request if it has one queued.
+	 *
+	 * \retval NULL No request available for handling
+	 * \retval valid-pointer The request polled for handling
+	 *
+	 * \see ptlrpc_nrs_req_get_nolock()
+	 */
+	struct ptlrpc_nrs_request *
+		(*op_req_get) (struct ptlrpc_nrs_policy *policy, bool peek,
+			       bool force);
+	/**
+	 * Called when attempting to add a request to a policy for later
+	 * handling; this operation is mandatory.
+	 *
+	 * \param[in,out] policy  The policy on which to enqueue \a nrq
+	 * \param[in,out] nrq The request to enqueue
+	 *
+	 * \retval 0	success
+	 * \retval != 0	error
+	 *
+	 * \see ptlrpc_nrs_req_add_nolock()
+	 */
+	int	(*op_req_enqueue) (struct ptlrpc_nrs_policy *policy,
+				   struct ptlrpc_nrs_request *nrq);
+	/**
+	 * Removes a request from the policy's set of pending requests. Normally
+	 * called after a request has been polled successfully from the policy
+	 * for handling; this operation is mandatory.
+	 *
+	 * \param[in,out] policy The policy the request \a nrq belongs to
+	 * \param[in,out] nrq    The request to dequeue
+	 *
+	 * \see ptlrpc_nrs_req_del_nolock()
+	 */
+	void	(*op_req_dequeue) (struct ptlrpc_nrs_policy *policy,
+				   struct ptlrpc_nrs_request *nrq);
+	/**
+	 * Called after the request being carried out. Could be used for
+	 * job/resource control; this operation is optional.
+	 *
+	 * \param[in,out] policy The policy which is stopping to handle request
+	 *			 \a nrq
+	 * \param[in,out] nrq	 The request
+	 *
+	 * \pre spin_is_locked(&svcpt->scp_req_lock)
+	 *
+	 * \see ptlrpc_nrs_req_stop_nolock()
+	 */
+	void	(*op_req_stop) (struct ptlrpc_nrs_policy *policy,
+				struct ptlrpc_nrs_request *nrq);
+	/**
+	 * Registers the policy's lprocfs interface with a PTLRPC service.
+	 *
+	 * \param[in] svc The service
+	 *
+	 * \retval 0	success
+	 * \retval != 0	error
+	 */
+	int	(*op_lprocfs_init) (struct ptlrpc_service *svc);
+	/**
+	 * Unegisters the policy's lprocfs interface with a PTLRPC service.
+	 *
+	 * In cases of failed policy registration in
+	 * \e ptlrpc_nrs_policy_register(), this function may be called for a
+	 * service which has not registered the policy successfully, so
+	 * implementations of this method should make sure their operations are
+	 * safe in such cases.
+	 *
+	 * \param[in] svc The service
+	 */
+	void	(*op_lprocfs_fini) (struct ptlrpc_service *svc);
+};
+
+/**
+ * Policy flags
+ */
+enum nrs_policy_flags {
+	/**
+	 * Fallback policy, use this flag only on a single supported policy per
+	 * service. The flag cannot be used on policies that use
+	 * \e PTLRPC_NRS_FL_REG_EXTERN
+	 */
+	PTLRPC_NRS_FL_FALLBACK		= (1 << 0),
+	/**
+	 * Start policy immediately after registering.
+	 */
+	PTLRPC_NRS_FL_REG_START		= (1 << 1),
+	/**
+	 * This is a policy registering from a module different to the one NRS
+	 * core ships in (currently ptlrpc).
+	 */
+	PTLRPC_NRS_FL_REG_EXTERN	= (1 << 2),
+};
+
+/**
+ * NRS queue type.
+ *
+ * Denotes whether an NRS instance is for handling normal or high-priority
+ * RPCs, or whether an operation pertains to one or both of the NRS instances
+ * in a service.
+ */
+enum ptlrpc_nrs_queue_type {
+	PTLRPC_NRS_QUEUE_REG	= (1 << 0),
+	PTLRPC_NRS_QUEUE_HP	= (1 << 1),
+	PTLRPC_NRS_QUEUE_BOTH	= (PTLRPC_NRS_QUEUE_REG | PTLRPC_NRS_QUEUE_HP)
+};
+
+/**
+ * NRS head
+ *
+ * A PTLRPC service has at least one NRS head instance for handling normal
+ * priority RPCs, and may optionally have a second NRS head instance for
+ * handling high-priority RPCs. Each NRS head maintains a list of available
+ * policies, of which one and only one policy is acting as the fallback policy,
+ * and optionally a different policy may be acting as the primary policy. For
+ * all RPCs handled by this NRS head instance, NRS core will first attempt to
+ * enqueue the RPC using the primary policy (if any). The fallback policy is
+ * used in the following cases:
+ * - when there was no primary policy in the
+ *   ptlrpc_nrs_pol_state::NRS_POL_STATE_STARTED state at the time the request
+ *   was initialized.
+ * - when the primary policy that was at the
+ *   ptlrpc_nrs_pol_state::PTLRPC_NRS_POL_STATE_STARTED state at the time the
+ *   RPC was initialized, denoted it did not wish, or for some other reason was
+ *   not able to handle the request, by returning a non-valid NRS resource
+ *   reference.
+ * - when the primary policy that was at the
+ *   ptlrpc_nrs_pol_state::PTLRPC_NRS_POL_STATE_STARTED state at the time the
+ *   RPC was initialized, fails later during the request enqueueing stage.
+ *
+ * \see nrs_resource_get_safe()
+ * \see nrs_request_enqueue()
+ */
+struct ptlrpc_nrs {
+	spinlock_t			nrs_lock;
+	/** XXX Possibly replace svcpt->scp_req_lock with another lock here. */
+	/**
+	 * List of registered policies
+	 */
+	struct list_head			nrs_policy_list;
+	/**
+	 * List of policies with queued requests. Policies that have any
+	 * outstanding requests are queued here, and this list is queried
+	 * in a round-robin manner from NRS core when obtaining a request
+	 * for handling. This ensures that requests from policies that at some
+	 * point transition away from the
+	 * ptlrpc_nrs_pol_state::NRS_POL_STATE_STARTED state are drained.
+	 */
+	struct list_head			nrs_policy_queued;
+	/**
+	 * Service partition for this NRS head
+	 */
+	struct ptlrpc_service_part     *nrs_svcpt;
+	/**
+	 * Primary policy, which is the preferred policy for handling RPCs
+	 */
+	struct ptlrpc_nrs_policy       *nrs_policy_primary;
+	/**
+	 * Fallback policy, which is the backup policy for handling RPCs
+	 */
+	struct ptlrpc_nrs_policy       *nrs_policy_fallback;
+	/**
+	 * This NRS head handles either HP or regular requests
+	 */
+	enum ptlrpc_nrs_queue_type	nrs_queue_type;
+	/**
+	 * # queued requests from all policies in this NRS head
+	 */
+	unsigned long			nrs_req_queued;
+	/**
+	 * # scheduled requests from all policies in this NRS head
+	 */
+	unsigned long			nrs_req_started;
+	/**
+	 * # policies on this NRS
+	 */
+	unsigned			nrs_num_pols;
+	/**
+	 * This NRS head is in progress of starting a policy
+	 */
+	unsigned			nrs_policy_starting:1;
+	/**
+	 * In progress of shutting down the whole NRS head; used during
+	 * unregistration
+	 */
+	unsigned			nrs_stopping:1;
+};
+
+#define NRS_POL_NAME_MAX		16
+
+struct ptlrpc_nrs_pol_desc;
+
+/**
+ * Service compatibility predicate; this determines whether a policy is adequate
+ * for handling RPCs of a particular PTLRPC service.
+ *
+ * XXX:This should give the same result during policy registration and
+ * unregistration, and for all partitions of a service; so the result should not
+ * depend on temporal service or other properties, that may influence the
+ * result.
+ */
+typedef bool (*nrs_pol_desc_compat_t) (const struct ptlrpc_service *svc,
+				       const struct ptlrpc_nrs_pol_desc *desc);
+
+struct ptlrpc_nrs_pol_conf {
+	/**
+	 * Human-readable policy name
+	 */
+	char				   nc_name[NRS_POL_NAME_MAX];
+	/**
+	 * NRS operations for this policy
+	 */
+	const struct ptlrpc_nrs_pol_ops	  *nc_ops;
+	/**
+	 * Service compatibility predicate
+	 */
+	nrs_pol_desc_compat_t		   nc_compat;
+	/**
+	 * Set for policies that support a single ptlrpc service, i.e. ones that
+	 * have \a pd_compat set to nrs_policy_compat_one(). The variable value
+	 * depicts the name of the single service that such policies are
+	 * compatible with.
+	 */
+	const char			  *nc_compat_svc_name;
+	/**
+	 * Owner module for this policy descriptor; policies registering from a
+	 * different module to the one the NRS framework is held within
+	 * (currently ptlrpc), should set this field to THIS_MODULE.
+	 */
+	module_t			  *nc_owner;
+	/**
+	 * Policy registration flags; a bitmast of \e nrs_policy_flags
+	 */
+	unsigned			   nc_flags;
+};
+
+/**
+ * NRS policy registering descriptor
+ *
+ * Is used to hold a description of a policy that can be passed to NRS core in
+ * order to register the policy with NRS heads in different PTLRPC services.
+ */
+struct ptlrpc_nrs_pol_desc {
+	/**
+	 * Human-readable policy name
+	 */
+	char					pd_name[NRS_POL_NAME_MAX];
+	/**
+	 * Link into nrs_core::nrs_policies
+	 */
+	struct list_head				pd_list;
+	/**
+	 * NRS operations for this policy
+	 */
+	const struct ptlrpc_nrs_pol_ops	       *pd_ops;
+	/**
+	 * Service compatibility predicate
+	 */
+	nrs_pol_desc_compat_t			pd_compat;
+	/**
+	 * Set for policies that are compatible with only one PTLRPC service.
+	 *
+	 * \see ptlrpc_nrs_pol_conf::nc_compat_svc_name
+	 */
+	const char			       *pd_compat_svc_name;
+	/**
+	 * Owner module for this policy descriptor.
+	 *
+	 * We need to hold a reference to the module whenever we might make use
+	 * of any of the module's contents, i.e.
+	 * - If one or more instances of the policy are at a state where they
+	 *   might be handling a request, i.e.
+	 *   ptlrpc_nrs_pol_state::NRS_POL_STATE_STARTED or
+	 *   ptlrpc_nrs_pol_state::NRS_POL_STATE_STOPPING as we will have to
+	 *   call into the policy's ptlrpc_nrs_pol_ops() handlers. A reference
+	 *   is taken on the module when
+	 *   \e ptlrpc_nrs_pol_desc::pd_refs becomes 1, and released when it
+	 *   becomes 0, so that we hold only one reference to the module maximum
+	 *   at any time.
+	 *
+	 *   We do not need to hold a reference to the module, even though we
+	 *   might use code and data from the module, in the following cases:
+	 * - During external policy registration, because this should happen in
+	 *   the module's init() function, in which case the module is safe from
+	 *   removal because a reference is being held on the module by the
+	 *   kernel, and iirc kmod (and I guess module-init-tools also) will
+	 *   serialize any racing processes properly anyway.
+	 * - During external policy unregistration, because this should happen
+	 *   in a module's exit() function, and any attempts to start a policy
+	 *   instance would need to take a reference on the module, and this is
+	 *   not possible once we have reached the point where the exit()
+	 *   handler is called.
+	 * - During service registration and unregistration, as service setup
+	 *   and cleanup, and policy registration, unregistration and policy
+	 *   instance starting, are serialized by \e nrs_core::nrs_mutex, so
+	 *   as long as users adhere to the convention of registering policies
+	 *   in init() and unregistering them in module exit() functions, there
+	 *   should not be a race between these operations.
+	 * - During any policy-specific lprocfs operations, because a reference
+	 *   is held by the kernel on a proc entry that has been entered by a
+	 *   syscall, so as long as proc entries are removed during unregistration time,
+	 *   then unregistration and lprocfs operations will be properly
+	 *   serialized.
+	 */
+	module_t			       *pd_owner;
+	/**
+	 * Bitmask of \e nrs_policy_flags
+	 */
+	unsigned				pd_flags;
+	/**
+	 * # of references on this descriptor
+	 */
+	atomic_t				pd_refs;
+};
+
+/**
+ * NRS policy state
+ *
+ * Policies transition from one state to the other during their lifetime
+ */
+enum ptlrpc_nrs_pol_state {
+	/**
+	 * Not a valid policy state.
+	 */
+	NRS_POL_STATE_INVALID,
+	/**
+	 * Policies are at this state either at the start of their life, or
+	 * transition here when the user selects a different policy to act
+	 * as the primary one.
+	 */
+	NRS_POL_STATE_STOPPED,
+	/**
+	 * Policy is progress of stopping
+	 */
+	NRS_POL_STATE_STOPPING,
+	/**
+	 * Policy is in progress of starting
+	 */
+	NRS_POL_STATE_STARTING,
+	/**
+	 * A policy is in this state in two cases:
+	 * - it is the fallback policy, which is always in this state.
+	 * - it has been activated by the user; i.e. it is the primary policy,
+	 */
+	NRS_POL_STATE_STARTED,
+};
+
+/**
+ * NRS policy information
+ *
+ * Used for obtaining information for the status of a policy via lprocfs
+ */
+struct ptlrpc_nrs_pol_info {
+	/**
+	 * Policy name
+	 */
+	char				pi_name[NRS_POL_NAME_MAX];
+	/**
+	 * Current policy state
+	 */
+	enum ptlrpc_nrs_pol_state	pi_state;
+	/**
+	 * # RPCs enqueued for later dispatching by the policy
+	 */
+	long				pi_req_queued;
+	/**
+	 * # RPCs started for dispatch by the policy
+	 */
+	long				pi_req_started;
+	/**
+	 * Is this a fallback policy?
+	 */
+	unsigned			pi_fallback:1;
+};
+
+/**
+ * NRS policy
+ *
+ * There is one instance of this for each policy in each NRS head of each
+ * PTLRPC service partition.
+ */
+struct ptlrpc_nrs_policy {
+	/**
+	 * Linkage into the NRS head's list of policies,
+	 * ptlrpc_nrs:nrs_policy_list
+	 */
+	struct list_head			pol_list;
+	/**
+	 * Linkage into the NRS head's list of policies with enqueued
+	 * requests ptlrpc_nrs:nrs_policy_queued
+	 */
+	struct list_head			pol_list_queued;
+	/**
+	 * Current state of this policy
+	 */
+	enum ptlrpc_nrs_pol_state	pol_state;
+	/**
+	 * Bitmask of nrs_policy_flags
+	 */
+	unsigned			pol_flags;
+	/**
+	 * # RPCs enqueued for later dispatching by the policy
+	 */
+	long				pol_req_queued;
+	/**
+	 * # RPCs started for dispatch by the policy
+	 */
+	long				pol_req_started;
+	/**
+	 * Usage Reference count taken on the policy instance
+	 */
+	long				pol_ref;
+	/**
+	 * The NRS head this policy has been created at
+	 */
+	struct ptlrpc_nrs	       *pol_nrs;
+	/**
+	 * Private policy data; varies by policy type
+	 */
+	void			       *pol_private;
+	/**
+	 * Policy descriptor for this policy instance.
+	 */
+	struct ptlrpc_nrs_pol_desc     *pol_desc;
+};
+
+/**
+ * NRS resource
+ *
+ * Resources are embedded into two types of NRS entities:
+ * - Inside NRS policies, in the policy's private data in
+ *   ptlrpc_nrs_policy::pol_private
+ * - In objects that act as prime-level scheduling entities in different NRS
+ *   policies; e.g. on a policy that performs round robin or similar order
+ *   scheduling across client NIDs, there would be one NRS resource per unique
+ *   client NID. On a policy which performs round robin scheduling across
+ *   backend filesystem objects, there would be one resource associated with
+ *   each of the backend filesystem objects partaking in the scheduling
+ *   performed by the policy.
+ *
+ * NRS resources share a parent-child relationship, in which resources embedded
+ * in policy instances are the parent entities, with all scheduling entities
+ * a policy schedules across being the children, thus forming a simple resource
+ * hierarchy. This hierarchy may be extended with one or more levels in the
+ * future if the ability to have more than one primary policy is added.
+ *
+ * Upon request initialization, references to the then active NRS policies are
+ * taken and used to later handle the dispatching of the request with one of
+ * these policies.
+ *
+ * \see nrs_resource_get_safe()
+ * \see ptlrpc_nrs_req_add()
+ */
+struct ptlrpc_nrs_resource {
+	/**
+	 * This NRS resource's parent; is NULL for resources embedded in NRS
+	 * policy instances; i.e. those are top-level ones.
+	 */
+	struct ptlrpc_nrs_resource     *res_parent;
+	/**
+	 * The policy associated with this resource.
+	 */
+	struct ptlrpc_nrs_policy       *res_policy;
+};
+
+enum {
+	NRS_RES_FALLBACK,
+	NRS_RES_PRIMARY,
+	NRS_RES_MAX
+};
+
+/* \name fifo
+ *
+ * FIFO policy
+ *
+ * This policy is a logical wrapper around previous, non-NRS functionality.
+ * It dispatches RPCs in the same order as they arrive from the network. This
+ * policy is currently used as the fallback policy, and the only enabled policy
+ * on all NRS heads of all PTLRPC service partitions.
+ * @{
+ */
+
+/**
+ * Private data structure for the FIFO policy
+ */
+struct nrs_fifo_head {
+	/**
+	 * Resource object for policy instance.
+	 */
+	struct ptlrpc_nrs_resource	fh_res;
+	/**
+	 * List of queued requests.
+	 */
+	struct list_head			fh_list;
+	/**
+	 * For debugging purposes.
+	 */
+	__u64				fh_sequence;
+};
+
+struct nrs_fifo_req {
+	struct list_head		fr_list;
+	__u64			fr_sequence;
+};
+
+/** @} fifo */
+
+/**
+ * \name CRR-N
+ *
+ * CRR-N, Client Round Robin over NIDs
+ * @{
+ */
+
+/**
+ * private data structure for CRR-N NRS
+ */
+struct nrs_crrn_net {
+	struct ptlrpc_nrs_resource	cn_res;
+	cfs_binheap_t		       *cn_binheap;
+	cfs_hash_t		       *cn_cli_hash;
+	/**
+	 * Used when a new scheduling round commences, in order to synchronize
+	 * all clients with the new round number.
+	 */
+	__u64				cn_round;
+	/**
+	 * Determines the relevant ordering amongst request batches within a
+	 * scheduling round.
+	 */
+	__u64				cn_sequence;
+	/**
+	 * Round Robin quantum; the maximum number of RPCs that each request
+	 * batch for each client can have in a scheduling round.
+	 */
+	__u16				cn_quantum;
+};
+
+/**
+ * Object representing a client in CRR-N, as identified by its NID
+ */
+struct nrs_crrn_client {
+	struct ptlrpc_nrs_resource	cc_res;
+	struct hlist_node		cc_hnode;
+	lnet_nid_t			cc_nid;
+	/**
+	 * The round number against which this client is currently scheduling
+	 * requests.
+	 */
+	__u64				cc_round;
+	/**
+	 * The sequence number used for requests scheduled by this client during
+	 * the current round number.
+	 */
+	__u64				cc_sequence;
+	atomic_t			cc_ref;
+	/**
+	 * Round Robin quantum; the maximum number of RPCs the client is allowed
+	 * to schedule in a single batch of each round.
+	 */
+	__u16				cc_quantum;
+	/**
+	 * # of pending requests for this client, on all existing rounds
+	 */
+	__u16				cc_active;
+};
+
+/**
+ * CRR-N NRS request definition
+ */
+struct nrs_crrn_req {
+	/**
+	 * Round number for this request; shared with all other requests in the
+	 * same batch.
+	 */
+	__u64			cr_round;
+	/**
+	 * Sequence number for this request; shared with all other requests in
+	 * the same batch.
+	 */
+	__u64			cr_sequence;
+};
+
+/**
+ * CRR-N policy operations.
+ */
+enum nrs_ctl_crr {
+	/**
+	 * Read the RR quantum size of a CRR-N policy.
+	 */
+	NRS_CTL_CRRN_RD_QUANTUM = PTLRPC_NRS_CTL_1ST_POL_SPEC,
+	/**
+	 * Write the RR quantum size of a CRR-N policy.
+	 */
+	NRS_CTL_CRRN_WR_QUANTUM,
+};
+
+/** @} CRR-N */
+
+/**
+ * \name ORR/TRR
+ *
+ * ORR/TRR (Object-based Round Robin/Target-based Round Robin) NRS policies
+ * @{
+ */
+
+/**
+ * Lower and upper byte offsets of a brw RPC
+ */
+struct nrs_orr_req_range {
+	__u64		or_start;
+	__u64		or_end;
+};
+
+/**
+ * RPC types supported by the ORR/TRR policies
+ */
+enum nrs_orr_supp {
+	NOS_OST_READ  = (1 << 0),
+	NOS_OST_WRITE = (1 << 1),
+	NOS_OST_RW    = (NOS_OST_READ | NOS_OST_WRITE),
+	/**
+	 * Default value for policies.
+	 */
+	NOS_DFLT      = NOS_OST_READ
+};
+
+/**
+ * As unique keys for grouping RPCs together, we use the object's OST FID for
+ * the ORR policy, and the OST index for the TRR policy.
+ *
+ * XXX: We waste some space for TRR policy instances by using a union, but it
+ *	allows to consolidate some of the code between ORR and TRR, and these
+ *	policies will probably eventually merge into one anyway.
+ */
+struct nrs_orr_key {
+	union {
+		/** object FID for ORR */
+		struct lu_fid	ok_fid;
+		/** OST index for TRR */
+		__u32		ok_idx;
+	};
+};
+
+/**
+ * The largest base string for unique hash/slab object names is
+ * "nrs_orr_reg_", so 13 characters. We add 3 to this to be used for the CPT
+ * id number, so this _should_ be more than enough for the maximum number of
+ * CPTs on any system. If it does happen that this statement is incorrect,
+ * nrs_orr_genobjname() will inevitably yield a non-unique name and cause
+ * kmem_cache_create() to complain (on Linux), so the erroneous situation
+ * will hopefully not go unnoticed.
+ */
+#define NRS_ORR_OBJ_NAME_MAX	(sizeof("nrs_orr_reg_") + 3)
+
+/**
+ * private data structure for ORR and TRR NRS
+ */
+struct nrs_orr_data {
+	struct ptlrpc_nrs_resource	od_res;
+	cfs_binheap_t		       *od_binheap;
+	cfs_hash_t		       *od_obj_hash;
+	struct kmem_cache		       *od_cache;
+	/**
+	 * Used when a new scheduling round commences, in order to synchronize
+	 * all object or OST batches with the new round number.
+	 */
+	__u64				od_round;
+	/**
+	 * Determines the relevant ordering amongst request batches within a
+	 * scheduling round.
+	 */
+	__u64				od_sequence;
+	/**
+	 * RPC types that are currently supported.
+	 */
+	enum nrs_orr_supp		od_supp;
+	/**
+	 * Round Robin quantum; the maxium number of RPCs that each request
+	 * batch for each object or OST can have in a scheduling round.
+	 */
+	__u16				od_quantum;
+	/**
+	 * Whether to use physical disk offsets or logical file offsets.
+	 */
+	bool				od_physical;
+	/**
+	 * XXX: We need to provide a persistently allocated string to hold
+	 * unique object names for this policy, since in currently supported
+	 * versions of Linux by Lustre, kmem_cache_create() just sets a pointer
+	 * to the name string provided. kstrdup() is used in the version of
+	 * kmeme_cache_create() in current Linux mainline, so we may be able to
+	 * remove this in the future.
+	 */
+	char				od_objname[NRS_ORR_OBJ_NAME_MAX];
+};
+
+/**
+ * Represents a backend-fs object or OST in the ORR and TRR policies
+ * respectively
+ */
+struct nrs_orr_object {
+	struct ptlrpc_nrs_resource	oo_res;
+	struct hlist_node		oo_hnode;
+	/**
+	 * The round number against which requests are being scheduled for this
+	 * object or OST
+	 */
+	__u64				oo_round;
+	/**
+	 * The sequence number used for requests scheduled for this object or
+	 * OST during the current round number.
+	 */
+	__u64				oo_sequence;
+	/**
+	 * The key of the object or OST for which this structure instance is
+	 * scheduling RPCs
+	 */
+	struct nrs_orr_key		oo_key;
+	atomic_t			oo_ref;
+	/**
+	 * Round Robin quantum; the maximum number of RPCs that are allowed to
+	 * be scheduled for the object or OST in a single batch of each round.
+	 */
+	__u16				oo_quantum;
+	/**
+	 * # of pending requests for this object or OST, on all existing rounds
+	 */
+	__u16				oo_active;
+};
+
+/**
+ * ORR/TRR NRS request definition
+ */
+struct nrs_orr_req {
+	/**
+	 * The offset range this request covers
+	 */
+	struct nrs_orr_req_range	or_range;
+	/**
+	 * Round number for this request; shared with all other requests in the
+	 * same batch.
+	 */
+	__u64				or_round;
+	/**
+	 * Sequence number for this request; shared with all other requests in
+	 * the same batch.
+	 */
+	__u64				or_sequence;
+	/**
+	 * For debugging purposes.
+	 */
+	struct nrs_orr_key		or_key;
+	/**
+	 * An ORR policy instance has filled in request information while
+	 * enqueueing the request on the service partition's regular NRS head.
+	 */
+	unsigned int			or_orr_set:1;
+	/**
+	 * A TRR policy instance has filled in request information while
+	 * enqueueing the request on the service partition's regular NRS head.
+	 */
+	unsigned int			or_trr_set:1;
+	/**
+	 * Request offset ranges have been filled in with logical offset
+	 * values.
+	 */
+	unsigned int			or_logical_set:1;
+	/**
+	 * Request offset ranges have been filled in with physical offset
+	 * values.
+	 */
+	unsigned int			or_physical_set:1;
+};
+
+/** @} ORR/TRR */
+
+/**
+ * NRS request
+ *
+ * Instances of this object exist embedded within ptlrpc_request; the main
+ * purpose of this object is to hold references to the request's resources
+ * for the lifetime of the request, and to hold properties that policies use
+ * use for determining the request's scheduling priority.
+ * */
+struct ptlrpc_nrs_request {
+	/**
+	 * The request's resource hierarchy.
+	 */
+	struct ptlrpc_nrs_resource     *nr_res_ptrs[NRS_RES_MAX];
+	/**
+	 * Index into ptlrpc_nrs_request::nr_res_ptrs of the resource of the
+	 * policy that was used to enqueue the request.
+	 *
+	 * \see nrs_request_enqueue()
+	 */
+	unsigned			nr_res_idx;
+	unsigned			nr_initialized:1;
+	unsigned			nr_enqueued:1;
+	unsigned			nr_started:1;
+	unsigned			nr_finalized:1;
+	cfs_binheap_node_t		nr_node;
+
+	/**
+	 * Policy-specific fields, used for determining a request's scheduling
+	 * priority, and other supporting functionality.
+	 */
+	union {
+		/**
+		 * Fields for the FIFO policy
+		 */
+		struct nrs_fifo_req	fifo;
+		/**
+		 * CRR-N request defintion
+		 */
+		struct nrs_crrn_req	crr;
+		/** ORR and TRR share the same request definition */
+		struct nrs_orr_req	orr;
+	} nr_u;
+	/**
+	 * Externally-registering policies may want to use this to allocate
+	 * their own request properties.
+	 */
+	void			       *ext;
+};
+
+/** @} nrs */
+
+/**
+ * Basic request prioritization operations structure.
+ * The whole idea is centered around locks and RPCs that might affect locks.
+ * When a lock is contended we try to give priority to RPCs that might lead
+ * to fastest release of that lock.
+ * Currently only implemented for OSTs only in a way that makes all
+ * IO and truncate RPCs that are coming from a locked region where a lock is
+ * contended a priority over other requests.
+ */
+struct ptlrpc_hpreq_ops {
+	/**
+	 * Check if the lock handle of the given lock is the same as
+	 * taken from the request.
+	 */
+	int  (*hpreq_lock_match)(struct ptlrpc_request *, struct ldlm_lock *);
+	/**
+	 * Check if the request is a high priority one.
+	 */
+	int  (*hpreq_check)(struct ptlrpc_request *);
+	/**
+	 * Called after the request has been handled.
+	 */
+	void (*hpreq_fini)(struct ptlrpc_request *);
+};
+
+/**
+ * Represents remote procedure call.
+ *
+ * This is a staple structure used by everybody wanting to send a request
+ * in Lustre.
+ */
+struct ptlrpc_request {
+	/* Request type: one of PTL_RPC_MSG_* */
+	int rq_type;
+	/** Result of request processing */
+	int rq_status;
+	/**
+	 * Linkage item through which this request is included into
+	 * sending/delayed lists on client and into rqbd list on server
+	 */
+	struct list_head rq_list;
+	/**
+	 * Server side list of incoming unserved requests sorted by arrival
+	 * time.  Traversed from time to time to notice about to expire
+	 * requests and sent back "early replies" to clients to let them
+	 * know server is alive and well, just very busy to service their
+	 * requests in time
+	 */
+	struct list_head rq_timed_list;
+	/** server-side history, used for debuging purposes. */
+	struct list_head rq_history_list;
+	/** server-side per-export list */
+	struct list_head rq_exp_list;
+	/** server-side hp handlers */
+	struct ptlrpc_hpreq_ops *rq_ops;
+
+	/** initial thread servicing this request */
+	struct ptlrpc_thread *rq_svc_thread;
+
+	/** history sequence # */
+	__u64 rq_history_seq;
+	/** \addtogroup  nrs
+	 * @{
+	 */
+	/** stub for NRS request */
+	struct ptlrpc_nrs_request rq_nrq;
+	/** @} nrs */
+	/** the index of service's srv_at_array into which request is linked */
+	time_t rq_at_index;
+	/** Lock to protect request flags and some other important bits, like
+	 * rq_list
+	 */
+	spinlock_t rq_lock;
+	/** client-side flags are serialized by rq_lock */
+	unsigned int rq_intr:1, rq_replied:1, rq_err:1,
+		rq_timedout:1, rq_resend:1, rq_restart:1,
+		/**
+		 * when ->rq_replay is set, request is kept by the client even
+		 * after server commits corresponding transaction. This is
+		 * used for operations that require sequence of multiple
+		 * requests to be replayed. The only example currently is file
+		 * open/close. When last request in such a sequence is
+		 * committed, ->rq_replay is cleared on all requests in the
+		 * sequence.
+		 */
+		rq_replay:1,
+		rq_no_resend:1, rq_waiting:1, rq_receiving_reply:1,
+		rq_no_delay:1, rq_net_err:1, rq_wait_ctx:1,
+		rq_early:1, rq_must_unlink:1,
+		rq_memalloc:1,      /* req originated from "kswapd" */
+		/* server-side flags */
+		rq_packed_final:1,  /* packed final reply */
+		rq_hp:1,	    /* high priority RPC */
+		rq_at_linked:1,     /* link into service's srv_at_array */
+		rq_reply_truncate:1,
+		rq_committed:1,
+		/* whether the "rq_set" is a valid one */
+		rq_invalid_rqset:1,
+		rq_generation_set:1,
+		/* do not resend request on -EINPROGRESS */
+		rq_no_retry_einprogress:1,
+		/* allow the req to be sent if the import is in recovery
+		 * status */
+		rq_allow_replay:1;
+
+	unsigned int rq_nr_resend;
+
+	enum rq_phase rq_phase; /* one of RQ_PHASE_* */
+	enum rq_phase rq_next_phase; /* one of RQ_PHASE_* to be used next */
+	atomic_t rq_refcount;/* client-side refcount for SENT race,
+				    server-side refcounf for multiple replies */
+
+	/** Portal to which this request would be sent */
+	short rq_request_portal;  /* XXX FIXME bug 249 */
+	/** Portal where to wait for reply and where reply would be sent */
+	short rq_reply_portal;    /* XXX FIXME bug 249 */
+
+	/**
+	 * client-side:
+	 * !rq_truncate : # reply bytes actually received,
+	 *  rq_truncate : required repbuf_len for resend
+	 */
+	int rq_nob_received;
+	/** Request length */
+	int rq_reqlen;
+	/** Reply length */
+	int rq_replen;
+	/** Request message - what client sent */
+	struct lustre_msg *rq_reqmsg;
+	/** Reply message - server response */
+	struct lustre_msg *rq_repmsg;
+	/** Transaction number */
+	__u64 rq_transno;
+	/** xid */
+	__u64 rq_xid;
+	/**
+	 * List item to for replay list. Not yet commited requests get linked
+	 * there.
+	 * Also see \a rq_replay comment above.
+	 */
+	struct list_head rq_replay_list;
+
+	/**
+	 * security and encryption data
+	 * @{ */
+	struct ptlrpc_cli_ctx   *rq_cli_ctx;     /**< client's half ctx */
+	struct ptlrpc_svc_ctx   *rq_svc_ctx;     /**< server's half ctx */
+	struct list_head	       rq_ctx_chain;   /**< link to waited ctx */
+
+	struct sptlrpc_flavor    rq_flvr;	/**< for client & server */
+	enum lustre_sec_part     rq_sp_from;
+
+	/* client/server security flags */
+	unsigned int
+				 rq_ctx_init:1,      /* context initiation */
+				 rq_ctx_fini:1,      /* context destroy */
+				 rq_bulk_read:1,     /* request bulk read */
+				 rq_bulk_write:1,    /* request bulk write */
+				 /* server authentication flags */
+				 rq_auth_gss:1,      /* authenticated by gss */
+				 rq_auth_remote:1,   /* authed as remote user */
+				 rq_auth_usr_root:1, /* authed as root */
+				 rq_auth_usr_mdt:1,  /* authed as mdt */
+				 rq_auth_usr_ost:1,  /* authed as ost */
+				 /* security tfm flags */
+				 rq_pack_udesc:1,
+				 rq_pack_bulk:1,
+				 /* doesn't expect reply FIXME */
+				 rq_no_reply:1,
+				 rq_pill_init:1;     /* pill initialized */
+
+	uid_t		    rq_auth_uid;	/* authed uid */
+	uid_t		    rq_auth_mapped_uid; /* authed uid mapped to */
+
+	/* (server side), pointed directly into req buffer */
+	struct ptlrpc_user_desc *rq_user_desc;
+
+	/* various buffer pointers */
+	struct lustre_msg       *rq_reqbuf;      /* req wrapper */
+	char		    *rq_repbuf;      /* rep buffer */
+	struct lustre_msg       *rq_repdata;     /* rep wrapper msg */
+	struct lustre_msg       *rq_clrbuf;      /* only in priv mode */
+	int		      rq_reqbuf_len;  /* req wrapper buf len */
+	int		      rq_reqdata_len; /* req wrapper msg len */
+	int		      rq_repbuf_len;  /* rep buffer len */
+	int		      rq_repdata_len; /* rep wrapper msg len */
+	int		      rq_clrbuf_len;  /* only in priv mode */
+	int		      rq_clrdata_len; /* only in priv mode */
+
+	/** early replies go to offset 0, regular replies go after that */
+	unsigned int	     rq_reply_off;
+
+	/** @} */
+
+	/** Fields that help to see if request and reply were swabbed or not */
+	__u32 rq_req_swab_mask;
+	__u32 rq_rep_swab_mask;
+
+	/** What was import generation when this request was sent */
+	int rq_import_generation;
+	enum lustre_imp_state rq_send_state;
+
+	/** how many early replies (for stats) */
+	int rq_early_count;
+
+	/** client+server request */
+	lnet_handle_md_t     rq_req_md_h;
+	struct ptlrpc_cb_id  rq_req_cbid;
+	/** optional time limit for send attempts */
+	cfs_duration_t       rq_delay_limit;
+	/** time request was first queued */
+	cfs_time_t	   rq_queued_time;
+
+	/* server-side... */
+	/** request arrival time */
+	struct timeval       rq_arrival_time;
+	/** separated reply state */
+	struct ptlrpc_reply_state *rq_reply_state;
+	/** incoming request buffer */
+	struct ptlrpc_request_buffer_desc *rq_rqbd;
+
+	/** client-only incoming reply */
+	lnet_handle_md_t     rq_reply_md_h;
+	wait_queue_head_t	  rq_reply_waitq;
+	struct ptlrpc_cb_id  rq_reply_cbid;
+
+	/** our LNet NID */
+	lnet_nid_t	   rq_self;
+	/** Peer description (the other side) */
+	lnet_process_id_t    rq_peer;
+	/** Server-side, export on which request was received */
+	struct obd_export   *rq_export;
+	/** Client side, import where request is being sent */
+	struct obd_import   *rq_import;
+
+	/** Replay callback, called after request is replayed at recovery */
+	void (*rq_replay_cb)(struct ptlrpc_request *);
+	/**
+	 * Commit callback, called when request is committed and about to be
+	 * freed.
+	 */
+	void (*rq_commit_cb)(struct ptlrpc_request *);
+	/** Opaq data for replay and commit callbacks. */
+	void  *rq_cb_data;
+
+	/** For bulk requests on client only: bulk descriptor */
+	struct ptlrpc_bulk_desc *rq_bulk;
+
+	/** client outgoing req */
+	/**
+	 * when request/reply sent (secs), or time when request should be sent
+	 */
+	time_t rq_sent;
+	/** time for request really sent out */
+	time_t rq_real_sent;
+
+	/** when request must finish. volatile
+	 * so that servers' early reply updates to the deadline aren't
+	 * kept in per-cpu cache */
+	volatile time_t rq_deadline;
+	/** when req reply unlink must finish. */
+	time_t rq_reply_deadline;
+	/** when req bulk unlink must finish. */
+	time_t rq_bulk_deadline;
+	/**
+	 * service time estimate (secs)
+	 * If the requestsis not served by this time, it is marked as timed out.
+	 */
+	int    rq_timeout;
+
+	/** Multi-rpc bits */
+	/** Per-request waitq introduced by bug 21938 for recovery waiting */
+	wait_queue_head_t rq_set_waitq;
+	/** Link item for request set lists */
+	struct list_head  rq_set_chain;
+	/** Link back to the request set */
+	struct ptlrpc_request_set *rq_set;
+	/** Async completion handler, called when reply is received */
+	ptlrpc_interpterer_t rq_interpret_reply;
+	/** Async completion context */
+	union ptlrpc_async_args rq_async_args;
+
+	/** Pool if request is from preallocated list */
+	struct ptlrpc_request_pool *rq_pool;
+
+	struct lu_context	   rq_session;
+	struct lu_context	   rq_recov_session;
+
+	/** request format description */
+	struct req_capsule	  rq_pill;
+};
+
+/**
+ * Call completion handler for rpc if any, return it's status or original
+ * rc if there was no handler defined for this request.
+ */
+static inline int ptlrpc_req_interpret(const struct lu_env *env,
+				       struct ptlrpc_request *req, int rc)
+{
+	if (req->rq_interpret_reply != NULL) {
+		req->rq_status = req->rq_interpret_reply(env, req,
+							 &req->rq_async_args,
+							 rc);
+		return req->rq_status;
+	}
+	return rc;
+}
+
+/** \addtogroup  nrs
+ * @{
+ */
+int ptlrpc_nrs_policy_register(struct ptlrpc_nrs_pol_conf *conf);
+int ptlrpc_nrs_policy_unregister(struct ptlrpc_nrs_pol_conf *conf);
+void ptlrpc_nrs_req_hp_move(struct ptlrpc_request *req);
+void nrs_policy_get_info_locked(struct ptlrpc_nrs_policy *policy,
+				struct ptlrpc_nrs_pol_info *info);
+
+/*
+ * Can the request be moved from the regular NRS head to the high-priority NRS
+ * head (of the same PTLRPC service partition), if any?
+ *
+ * For a reliable result, this should be checked under svcpt->scp_req lock.
+ */
+static inline bool ptlrpc_nrs_req_can_move(struct ptlrpc_request *req)
+{
+	struct ptlrpc_nrs_request *nrq = &req->rq_nrq;
+
+	/**
+	 * LU-898: Check ptlrpc_nrs_request::nr_enqueued to make sure the
+	 * request has been enqueued first, and ptlrpc_nrs_request::nr_started
+	 * to make sure it has not been scheduled yet (analogous to previous
+	 * (non-NRS) checking of !list_empty(&ptlrpc_request::rq_list).
+	 */
+	return nrq->nr_enqueued && !nrq->nr_started && !req->rq_hp;
+}
+/** @} nrs */
+
+/**
+ * Returns 1 if request buffer at offset \a index was already swabbed
+ */
+static inline int lustre_req_swabbed(struct ptlrpc_request *req, int index)
+{
+	LASSERT(index < sizeof(req->rq_req_swab_mask) * 8);
+	return req->rq_req_swab_mask & (1 << index);
+}
+
+/**
+ * Returns 1 if request reply buffer at offset \a index was already swabbed
+ */
+static inline int lustre_rep_swabbed(struct ptlrpc_request *req, int index)
+{
+	LASSERT(index < sizeof(req->rq_rep_swab_mask) * 8);
+	return req->rq_rep_swab_mask & (1 << index);
+}
+
+/**
+ * Returns 1 if request needs to be swabbed into local cpu byteorder
+ */
+static inline int ptlrpc_req_need_swab(struct ptlrpc_request *req)
+{
+	return lustre_req_swabbed(req, MSG_PTLRPC_HEADER_OFF);
+}
+
+/**
+ * Returns 1 if request reply needs to be swabbed into local cpu byteorder
+ */
+static inline int ptlrpc_rep_need_swab(struct ptlrpc_request *req)
+{
+	return lustre_rep_swabbed(req, MSG_PTLRPC_HEADER_OFF);
+}
+
+/**
+ * Mark request buffer at offset \a index that it was already swabbed
+ */
+static inline void lustre_set_req_swabbed(struct ptlrpc_request *req, int index)
+{
+	LASSERT(index < sizeof(req->rq_req_swab_mask) * 8);
+	LASSERT((req->rq_req_swab_mask & (1 << index)) == 0);
+	req->rq_req_swab_mask |= 1 << index;
+}
+
+/**
+ * Mark request reply buffer at offset \a index that it was already swabbed
+ */
+static inline void lustre_set_rep_swabbed(struct ptlrpc_request *req, int index)
+{
+	LASSERT(index < sizeof(req->rq_rep_swab_mask) * 8);
+	LASSERT((req->rq_rep_swab_mask & (1 << index)) == 0);
+	req->rq_rep_swab_mask |= 1 << index;
+}
+
+/**
+ * Convert numerical request phase value \a phase into text string description
+ */
+static inline const char *
+ptlrpc_phase2str(enum rq_phase phase)
+{
+	switch (phase) {
+	case RQ_PHASE_NEW:
+		return "New";
+	case RQ_PHASE_RPC:
+		return "Rpc";
+	case RQ_PHASE_BULK:
+		return "Bulk";
+	case RQ_PHASE_INTERPRET:
+		return "Interpret";
+	case RQ_PHASE_COMPLETE:
+		return "Complete";
+	case RQ_PHASE_UNREGISTERING:
+		return "Unregistering";
+	default:
+		return "?Phase?";
+	}
+}
+
+/**
+ * Convert numerical request phase of the request \a req into text stringi
+ * description
+ */
+static inline const char *
+ptlrpc_rqphase2str(struct ptlrpc_request *req)
+{
+	return ptlrpc_phase2str(req->rq_phase);
+}
+
+/**
+ * Debugging functions and helpers to print request structure into debug log
+ * @{
+ */
+/* Spare the preprocessor, spoil the bugs. */
+#define FLAG(field, str) (field ? str : "")
+
+/** Convert bit flags into a string */
+#define DEBUG_REQ_FLAGS(req)						    \
+	ptlrpc_rqphase2str(req),						\
+	FLAG(req->rq_intr, "I"), FLAG(req->rq_replied, "R"),		    \
+	FLAG(req->rq_err, "E"),						 \
+	FLAG(req->rq_timedout, "X") /* eXpired */, FLAG(req->rq_resend, "S"),   \
+	FLAG(req->rq_restart, "T"), FLAG(req->rq_replay, "P"),		  \
+	FLAG(req->rq_no_resend, "N"),					   \
+	FLAG(req->rq_waiting, "W"),					     \
+	FLAG(req->rq_wait_ctx, "C"), FLAG(req->rq_hp, "H"),		     \
+	FLAG(req->rq_committed, "M")
+
+#define REQ_FLAGS_FMT "%s:%s%s%s%s%s%s%s%s%s%s%s%s"
+
+void _debug_req(struct ptlrpc_request *req,
+		struct libcfs_debug_msg_data *data, const char *fmt, ...)
+	__attribute__ ((format (printf, 3, 4)));
+
+/**
+ * Helper that decides if we need to print request accordig to current debug
+ * level settings
+ */
+#define debug_req(msgdata, mask, cdls, req, fmt, a...)			\
+do {									  \
+	CFS_CHECK_STACK(msgdata, mask, cdls);				 \
+									      \
+	if (((mask) & D_CANTMASK) != 0 ||				     \
+	    ((libcfs_debug & (mask)) != 0 &&				  \
+	     (libcfs_subsystem_debug & DEBUG_SUBSYSTEM) != 0))		\
+		_debug_req((req), msgdata, fmt, ##a);			 \
+} while(0)
+
+/**
+ * This is the debug print function you need to use to print request sturucture
+ * content into lustre debug log.
+ * for most callers (level is a constant) this is resolved at compile time */
+#define DEBUG_REQ(level, req, fmt, args...)				   \
+do {									  \
+	if ((level) & (D_ERROR | D_WARNING)) {				\
+		static cfs_debug_limit_state_t cdls;			  \
+		LIBCFS_DEBUG_MSG_DATA_DECL(msgdata, level, &cdls);	    \
+		debug_req(&msgdata, level, &cdls, req, "@@@ "fmt" ", ## args);\
+	} else {							      \
+		LIBCFS_DEBUG_MSG_DATA_DECL(msgdata, level, NULL);	     \
+		debug_req(&msgdata, level, NULL, req, "@@@ "fmt" ", ## args); \
+	}								     \
+} while (0)
+/** @} */
+
+/**
+ * Structure that defines a single page of a bulk transfer
+ */
+struct ptlrpc_bulk_page {
+	/** Linkage to list of pages in a bulk */
+	struct list_head       bp_link;
+	/**
+	 * Number of bytes in a page to transfer starting from \a bp_pageoffset
+	 */
+	int	      bp_buflen;
+	/** offset within a page */
+	int	      bp_pageoffset;
+	/** The page itself */
+	struct page     *bp_page;
+};
+
+#define BULK_GET_SOURCE   0
+#define BULK_PUT_SINK     1
+#define BULK_GET_SINK     2
+#define BULK_PUT_SOURCE   3
+
+/**
+ * Definition of bulk descriptor.
+ * Bulks are special "Two phase" RPCs where initial request message
+ * is sent first and it is followed bt a transfer (o receiving) of a large
+ * amount of data to be settled into pages referenced from the bulk descriptors.
+ * Bulks transfers (the actual data following the small requests) are done
+ * on separate LNet portals.
+ * In lustre we use bulk transfers for READ and WRITE transfers from/to OSTs.
+ *  Another user is readpage for MDT.
+ */
+struct ptlrpc_bulk_desc {
+	/** completed with failure */
+	unsigned long bd_failure:1;
+	/** {put,get}{source,sink} */
+	unsigned long bd_type:2;
+	/** client side */
+	unsigned long bd_registered:1;
+	/** For serialization with callback */
+	spinlock_t bd_lock;
+	/** Import generation when request for this bulk was sent */
+	int bd_import_generation;
+	/** LNet portal for this bulk */
+	__u32 bd_portal;
+	/** Server side - export this bulk created for */
+	struct obd_export *bd_export;
+	/** Client side - import this bulk was sent on */
+	struct obd_import *bd_import;
+	/** Back pointer to the request */
+	struct ptlrpc_request *bd_req;
+	wait_queue_head_t	    bd_waitq;	/* server side only WQ */
+	int		    bd_iov_count;    /* # entries in bd_iov */
+	int		    bd_max_iov;      /* allocated size of bd_iov */
+	int		    bd_nob;	  /* # bytes covered */
+	int		    bd_nob_transferred; /* # bytes GOT/PUT */
+
+	__u64		  bd_last_xid;
+
+	struct ptlrpc_cb_id    bd_cbid;	 /* network callback info */
+	lnet_nid_t	     bd_sender;       /* stash event::sender */
+	int			bd_md_count;	/* # valid entries in bd_mds */
+	int			bd_md_max_brw;	/* max entries in bd_mds */
+	/** array of associated MDs */
+	lnet_handle_md_t	bd_mds[PTLRPC_BULK_OPS_COUNT];
+
+	/*
+	 * encrypt iov, size is either 0 or bd_iov_count.
+	 */
+	lnet_kiov_t	   *bd_enc_iov;
+
+	lnet_kiov_t	    bd_iov[0];
+};
+
+enum {
+	SVC_STOPPED     = 1 << 0,
+	SVC_STOPPING    = 1 << 1,
+	SVC_STARTING    = 1 << 2,
+	SVC_RUNNING     = 1 << 3,
+	SVC_EVENT       = 1 << 4,
+	SVC_SIGNAL      = 1 << 5,
+};
+
+#define PTLRPC_THR_NAME_LEN		32
+/**
+ * Definition of server service thread structure
+ */
+struct ptlrpc_thread {
+	/**
+	 * List of active threads in svc->srv_threads
+	 */
+	struct list_head t_link;
+	/**
+	 * thread-private data (preallocated memory)
+	 */
+	void *t_data;
+	__u32 t_flags;
+	/**
+	 * service thread index, from ptlrpc_start_threads
+	 */
+	unsigned int t_id;
+	/**
+	 * service thread pid
+	 */
+	pid_t t_pid;
+	/**
+	 * put watchdog in the structure per thread b=14840
+	 */
+	struct lc_watchdog *t_watchdog;
+	/**
+	 * the svc this thread belonged to b=18582
+	 */
+	struct ptlrpc_service_part	*t_svcpt;
+	wait_queue_head_t			t_ctl_waitq;
+	struct lu_env			*t_env;
+	char				t_name[PTLRPC_THR_NAME_LEN];
+};
+
+static inline int thread_is_init(struct ptlrpc_thread *thread)
+{
+	return thread->t_flags == 0;
+}
+
+static inline int thread_is_stopped(struct ptlrpc_thread *thread)
+{
+	return !!(thread->t_flags & SVC_STOPPED);
+}
+
+static inline int thread_is_stopping(struct ptlrpc_thread *thread)
+{
+	return !!(thread->t_flags & SVC_STOPPING);
+}
+
+static inline int thread_is_starting(struct ptlrpc_thread *thread)
+{
+	return !!(thread->t_flags & SVC_STARTING);
+}
+
+static inline int thread_is_running(struct ptlrpc_thread *thread)
+{
+	return !!(thread->t_flags & SVC_RUNNING);
+}
+
+static inline int thread_is_event(struct ptlrpc_thread *thread)
+{
+	return !!(thread->t_flags & SVC_EVENT);
+}
+
+static inline int thread_is_signal(struct ptlrpc_thread *thread)
+{
+	return !!(thread->t_flags & SVC_SIGNAL);
+}
+
+static inline void thread_clear_flags(struct ptlrpc_thread *thread, __u32 flags)
+{
+	thread->t_flags &= ~flags;
+}
+
+static inline void thread_set_flags(struct ptlrpc_thread *thread, __u32 flags)
+{
+	thread->t_flags = flags;
+}
+
+static inline void thread_add_flags(struct ptlrpc_thread *thread, __u32 flags)
+{
+	thread->t_flags |= flags;
+}
+
+static inline int thread_test_and_clear_flags(struct ptlrpc_thread *thread,
+					      __u32 flags)
+{
+	if (thread->t_flags & flags) {
+		thread->t_flags &= ~flags;
+		return 1;
+	}
+	return 0;
+}
+
+/**
+ * Request buffer descriptor structure.
+ * This is a structure that contains one posted request buffer for service.
+ * Once data land into a buffer, event callback creates actual request and
+ * notifies wakes one of the service threads to process new incoming request.
+ * More than one request can fit into the buffer.
+ */
+struct ptlrpc_request_buffer_desc {
+	/** Link item for rqbds on a service */
+	struct list_head	     rqbd_list;
+	/** History of requests for this buffer */
+	struct list_head	     rqbd_reqs;
+	/** Back pointer to service for which this buffer is registered */
+	struct ptlrpc_service_part *rqbd_svcpt;
+	/** LNet descriptor */
+	lnet_handle_md_t       rqbd_md_h;
+	int		    rqbd_refcount;
+	/** The buffer itself */
+	char		  *rqbd_buffer;
+	struct ptlrpc_cb_id    rqbd_cbid;
+	/**
+	 * This "embedded" request structure is only used for the
+	 * last request to fit into the buffer
+	 */
+	struct ptlrpc_request  rqbd_req;
+};
+
+typedef int  (*svc_handler_t)(struct ptlrpc_request *req);
+
+struct ptlrpc_service_ops {
+	/**
+	 * if non-NULL called during thread creation (ptlrpc_start_thread())
+	 * to initialize service specific per-thread state.
+	 */
+	int		(*so_thr_init)(struct ptlrpc_thread *thr);
+	/**
+	 * if non-NULL called during thread shutdown (ptlrpc_main()) to
+	 * destruct state created by ->srv_init().
+	 */
+	void		(*so_thr_done)(struct ptlrpc_thread *thr);
+	/**
+	 * Handler function for incoming requests for this service
+	 */
+	int		(*so_req_handler)(struct ptlrpc_request *req);
+	/**
+	 * function to determine priority of the request, it's called
+	 * on every new request
+	 */
+	int		(*so_hpreq_handler)(struct ptlrpc_request *);
+	/**
+	 * service-specific print fn
+	 */
+	void		(*so_req_printer)(void *, struct ptlrpc_request *);
+};
+
+#ifndef __cfs_cacheline_aligned
+/* NB: put it here for reducing patche dependence */
+# define __cfs_cacheline_aligned
+#endif
+
+/**
+ * How many high priority requests to serve before serving one normal
+ * priority request
+ */
+#define PTLRPC_SVC_HP_RATIO 10
+
+/**
+ * Definition of PortalRPC service.
+ * The service is listening on a particular portal (like tcp port)
+ * and perform actions for a specific server like IO service for OST
+ * or general metadata service for MDS.
+ */
+struct ptlrpc_service {
+	/** serialize /proc operations */
+	spinlock_t			srv_lock;
+	/** most often accessed fields */
+	/** chain thru all services */
+	struct list_head		      srv_list;
+	/** service operations table */
+	struct ptlrpc_service_ops	srv_ops;
+	/** only statically allocated strings here; we don't clean them */
+	char			   *srv_name;
+	/** only statically allocated strings here; we don't clean them */
+	char			   *srv_thread_name;
+	/** service thread list */
+	struct list_head		      srv_threads;
+	/** threads # should be created for each partition on initializing */
+	int				srv_nthrs_cpt_init;
+	/** limit of threads number for each partition */
+	int				srv_nthrs_cpt_limit;
+	/** Root of /proc dir tree for this service */
+	proc_dir_entry_t	   *srv_procroot;
+	/** Pointer to statistic data for this service */
+	struct lprocfs_stats	   *srv_stats;
+	/** # hp per lp reqs to handle */
+	int			     srv_hpreq_ratio;
+	/** biggest request to receive */
+	int			     srv_max_req_size;
+	/** biggest reply to send */
+	int			     srv_max_reply_size;
+	/** size of individual buffers */
+	int			     srv_buf_size;
+	/** # buffers to allocate in 1 group */
+	int			     srv_nbuf_per_group;
+	/** Local portal on which to receive requests */
+	__u32			   srv_req_portal;
+	/** Portal on the client to send replies to */
+	__u32			   srv_rep_portal;
+	/**
+	 * Tags for lu_context associated with this thread, see struct
+	 * lu_context.
+	 */
+	__u32			   srv_ctx_tags;
+	/** soft watchdog timeout multiplier */
+	int			     srv_watchdog_factor;
+	/** under unregister_service */
+	unsigned			srv_is_stopping:1;
+
+	/** max # request buffers in history per partition */
+	int				srv_hist_nrqbds_cpt_max;
+	/** number of CPTs this service bound on */
+	int				srv_ncpts;
+	/** CPTs array this service bound on */
+	__u32				*srv_cpts;
+	/** 2^srv_cptab_bits >= cfs_cpt_numbert(srv_cptable) */
+	int				srv_cpt_bits;
+	/** CPT table this service is running over */
+	struct cfs_cpt_table		*srv_cptable;
+	/**
+	 * partition data for ptlrpc service
+	 */
+	struct ptlrpc_service_part	*srv_parts[0];
+};
+
+/**
+ * Definition of PortalRPC service partition data.
+ * Although a service only has one instance of it right now, but we
+ * will have multiple instances very soon (instance per CPT).
+ *
+ * it has four locks:
+ * \a scp_lock
+ *    serialize operations on rqbd and requests waiting for preprocess
+ * \a scp_req_lock
+ *    serialize operations active requests sent to this portal
+ * \a scp_at_lock
+ *    serialize adaptive timeout stuff
+ * \a scp_rep_lock
+ *    serialize operations on RS list (reply states)
+ *
+ * We don't have any use-case to take two or more locks at the same time
+ * for now, so there is no lock order issue.
+ */
+struct ptlrpc_service_part {
+	/** back reference to owner */
+	struct ptlrpc_service		*scp_service __cfs_cacheline_aligned;
+	/* CPT id, reserved */
+	int				scp_cpt;
+	/** always increasing number */
+	int				scp_thr_nextid;
+	/** # of starting threads */
+	int				scp_nthrs_starting;
+	/** # of stopping threads, reserved for shrinking threads */
+	int				scp_nthrs_stopping;
+	/** # running threads */
+	int				scp_nthrs_running;
+	/** service threads list */
+	struct list_head			scp_threads;
+
+	/**
+	 * serialize the following fields, used for protecting
+	 * rqbd list and incoming requests waiting for preprocess,
+	 * threads starting & stopping are also protected by this lock.
+	 */
+	spinlock_t			scp_lock  __cfs_cacheline_aligned;
+	/** total # req buffer descs allocated */
+	int				scp_nrqbds_total;
+	/** # posted request buffers for receiving */
+	int				scp_nrqbds_posted;
+	/** in progress of allocating rqbd */
+	int				scp_rqbd_allocating;
+	/** # incoming reqs */
+	int				scp_nreqs_incoming;
+	/** request buffers to be reposted */
+	struct list_head			scp_rqbd_idle;
+	/** req buffers receiving */
+	struct list_head			scp_rqbd_posted;
+	/** incoming reqs */
+	struct list_head			scp_req_incoming;
+	/** timeout before re-posting reqs, in tick */
+	cfs_duration_t			scp_rqbd_timeout;
+	/**
+	 * all threads sleep on this. This wait-queue is signalled when new
+	 * incoming request arrives and when difficult reply has to be handled.
+	 */
+	wait_queue_head_t			scp_waitq;
+
+	/** request history */
+	struct list_head			scp_hist_reqs;
+	/** request buffer history */
+	struct list_head			scp_hist_rqbds;
+	/** # request buffers in history */
+	int				scp_hist_nrqbds;
+	/** sequence number for request */
+	__u64				scp_hist_seq;
+	/** highest seq culled from history */
+	__u64				scp_hist_seq_culled;
+
+	/**
+	 * serialize the following fields, used for processing requests
+	 * sent to this portal
+	 */
+	spinlock_t			scp_req_lock __cfs_cacheline_aligned;
+	/** # reqs in either of the NRS heads below */
+	/** # reqs being served */
+	int				scp_nreqs_active;
+	/** # HPreqs being served */
+	int				scp_nhreqs_active;
+	/** # hp requests handled */
+	int				scp_hreq_count;
+
+	/** NRS head for regular requests */
+	struct ptlrpc_nrs		scp_nrs_reg;
+	/** NRS head for HP requests; this is only valid for services that can
+	 *  handle HP requests */
+	struct ptlrpc_nrs	       *scp_nrs_hp;
+
+	/** AT stuff */
+	/** @{ */
+	/**
+	 * serialize the following fields, used for changes on
+	 * adaptive timeout
+	 */
+	spinlock_t			scp_at_lock __cfs_cacheline_aligned;
+	/** estimated rpc service time */
+	struct adaptive_timeout		scp_at_estimate;
+	/** reqs waiting for replies */
+	struct ptlrpc_at_array		scp_at_array;
+	/** early reply timer */
+	timer_list_t			scp_at_timer;
+	/** debug */
+	cfs_time_t			scp_at_checktime;
+	/** check early replies */
+	unsigned			scp_at_check;
+	/** @} */
+
+	/**
+	 * serialize the following fields, used for processing
+	 * replies for this portal
+	 */
+	spinlock_t			scp_rep_lock __cfs_cacheline_aligned;
+	/** all the active replies */
+	struct list_head			scp_rep_active;
+	/** List of free reply_states */
+	struct list_head			scp_rep_idle;
+	/** waitq to run, when adding stuff to srv_free_rs_list */
+	wait_queue_head_t			scp_rep_waitq;
+	/** # 'difficult' replies */
+	atomic_t			scp_nreps_difficult;
+};
+
+#define ptlrpc_service_for_each_part(part, i, svc)			\
+	for (i = 0;							\
+	     i < (svc)->srv_ncpts &&					\
+	     (svc)->srv_parts != NULL &&				\
+	     ((part) = (svc)->srv_parts[i]) != NULL; i++)
+
+/**
+ * Declaration of ptlrpcd control structure
+ */
+struct ptlrpcd_ctl {
+	/**
+	 * Ptlrpc thread control flags (LIOD_START, LIOD_STOP, LIOD_FORCE)
+	 */
+	unsigned long			pc_flags;
+	/**
+	 * Thread lock protecting structure fields.
+	 */
+	spinlock_t			pc_lock;
+	/**
+	 * Start completion.
+	 */
+	struct completion		pc_starting;
+	/**
+	 * Stop completion.
+	 */
+	struct completion		pc_finishing;
+	/**
+	 * Thread requests set.
+	 */
+	struct ptlrpc_request_set  *pc_set;
+	/**
+	 * Thread name used in cfs_daemonize()
+	 */
+	char			pc_name[16];
+	/**
+	 * Environment for request interpreters to run in.
+	 */
+	struct lu_env	       pc_env;
+	/**
+	 * Index of ptlrpcd thread in the array.
+	 */
+	int			 pc_index;
+	/**
+	 * Number of the ptlrpcd's partners.
+	 */
+	int			 pc_npartners;
+	/**
+	 * Pointer to the array of partners' ptlrpcd_ctl structure.
+	 */
+	struct ptlrpcd_ctl	**pc_partners;
+	/**
+	 * Record the partner index to be processed next.
+	 */
+	int			 pc_cursor;
+};
+
+/* Bits for pc_flags */
+enum ptlrpcd_ctl_flags {
+	/**
+	 * Ptlrpc thread start flag.
+	 */
+	LIOD_START       = 1 << 0,
+	/**
+	 * Ptlrpc thread stop flag.
+	 */
+	LIOD_STOP	= 1 << 1,
+	/**
+	 * Ptlrpc thread force flag (only stop force so far).
+	 * This will cause aborting any inflight rpcs handled
+	 * by thread if LIOD_STOP is specified.
+	 */
+	LIOD_FORCE       = 1 << 2,
+	/**
+	 * This is a recovery ptlrpc thread.
+	 */
+	LIOD_RECOVERY    = 1 << 3,
+	/**
+	 * The ptlrpcd is bound to some CPU core.
+	 */
+	LIOD_BIND	= 1 << 4,
+};
+
+/**
+ * \addtogroup nrs
+ * @{
+ *
+ * Service compatibility function; the policy is compatible with all services.
+ *
+ * \param[in] svc  The service the policy is attempting to register with.
+ * \param[in] desc The policy descriptor
+ *
+ * \retval true The policy is compatible with the service
+ *
+ * \see ptlrpc_nrs_pol_desc::pd_compat()
+ */
+static inline bool nrs_policy_compat_all(const struct ptlrpc_service *svc,
+					 const struct ptlrpc_nrs_pol_desc *desc)
+{
+	return true;
+}
+
+/**
+ * Service compatibility function; the policy is compatible with only a specific
+ * service which is identified by its human-readable name at
+ * ptlrpc_service::srv_name.
+ *
+ * \param[in] svc  The service the policy is attempting to register with.
+ * \param[in] desc The policy descriptor
+ *
+ * \retval false The policy is not compatible with the service
+ * \retval true	 The policy is compatible with the service
+ *
+ * \see ptlrpc_nrs_pol_desc::pd_compat()
+ */
+static inline bool nrs_policy_compat_one(const struct ptlrpc_service *svc,
+					 const struct ptlrpc_nrs_pol_desc *desc)
+{
+	LASSERT(desc->pd_compat_svc_name != NULL);
+	return strcmp(svc->srv_name, desc->pd_compat_svc_name) == 0;
+}
+
+/** @} nrs */
+
+/* ptlrpc/events.c */
+extern lnet_handle_eq_t ptlrpc_eq_h;
+extern int ptlrpc_uuid_to_peer(struct obd_uuid *uuid,
+			       lnet_process_id_t *peer, lnet_nid_t *self);
+/**
+ * These callbacks are invoked by LNet when something happened to
+ * underlying buffer
+ * @{
+ */
+extern void request_out_callback(lnet_event_t *ev);
+extern void reply_in_callback(lnet_event_t *ev);
+extern void client_bulk_callback(lnet_event_t *ev);
+extern void request_in_callback(lnet_event_t *ev);
+extern void reply_out_callback(lnet_event_t *ev);
+/** @} */
+
+/* ptlrpc/connection.c */
+struct ptlrpc_connection *ptlrpc_connection_get(lnet_process_id_t peer,
+						lnet_nid_t self,
+						struct obd_uuid *uuid);
+int ptlrpc_connection_put(struct ptlrpc_connection *c);
+struct ptlrpc_connection *ptlrpc_connection_addref(struct ptlrpc_connection *);
+int ptlrpc_connection_init(void);
+void ptlrpc_connection_fini(void);
+extern lnet_pid_t ptl_get_pid(void);
+
+/* ptlrpc/niobuf.c */
+/**
+ * Actual interfacing with LNet to put/get/register/unregister stuff
+ * @{
+ */
+
+int ptlrpc_register_bulk(struct ptlrpc_request *req);
+int ptlrpc_unregister_bulk(struct ptlrpc_request *req, int async);
+
+static inline int ptlrpc_client_bulk_active(struct ptlrpc_request *req)
+{
+	struct ptlrpc_bulk_desc *desc;
+	int		      rc;
+
+	LASSERT(req != NULL);
+	desc = req->rq_bulk;
+
+	if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_BULK_UNLINK) &&
+	    req->rq_bulk_deadline > cfs_time_current_sec())
+		return 1;
+
+	if (!desc)
+		return 0;
+
+	spin_lock(&desc->bd_lock);
+	rc = desc->bd_md_count;
+	spin_unlock(&desc->bd_lock);
+	return rc;
+}
+
+#define PTLRPC_REPLY_MAYBE_DIFFICULT 0x01
+#define PTLRPC_REPLY_EARLY	   0x02
+int ptlrpc_send_reply(struct ptlrpc_request *req, int flags);
+int ptlrpc_reply(struct ptlrpc_request *req);
+int ptlrpc_send_error(struct ptlrpc_request *req, int difficult);
+int ptlrpc_error(struct ptlrpc_request *req);
+void ptlrpc_resend_req(struct ptlrpc_request *request);
+int ptlrpc_at_get_net_latency(struct ptlrpc_request *req);
+int ptl_send_rpc(struct ptlrpc_request *request, int noreply);
+int ptlrpc_register_rqbd(struct ptlrpc_request_buffer_desc *rqbd);
+/** @} */
+
+/* ptlrpc/client.c */
+/**
+ * Client-side portals API. Everything to send requests, receive replies,
+ * request queues, request management, etc.
+ * @{
+ */
+void ptlrpc_init_client(int req_portal, int rep_portal, char *name,
+			struct ptlrpc_client *);
+void ptlrpc_cleanup_client(struct obd_import *imp);
+struct ptlrpc_connection *ptlrpc_uuid_to_connection(struct obd_uuid *uuid);
+
+int ptlrpc_queue_wait(struct ptlrpc_request *req);
+int ptlrpc_replay_req(struct ptlrpc_request *req);
+int ptlrpc_unregister_reply(struct ptlrpc_request *req, int async);
+void ptlrpc_restart_req(struct ptlrpc_request *req);
+void ptlrpc_abort_inflight(struct obd_import *imp);
+void ptlrpc_cleanup_imp(struct obd_import *imp);
+void ptlrpc_abort_set(struct ptlrpc_request_set *set);
+
+struct ptlrpc_request_set *ptlrpc_prep_set(void);
+struct ptlrpc_request_set *ptlrpc_prep_fcset(int max, set_producer_func func,
+					     void *arg);
+int ptlrpc_set_add_cb(struct ptlrpc_request_set *set,
+		      set_interpreter_func fn, void *data);
+int ptlrpc_set_next_timeout(struct ptlrpc_request_set *);
+int ptlrpc_check_set(const struct lu_env *env, struct ptlrpc_request_set *set);
+int ptlrpc_set_wait(struct ptlrpc_request_set *);
+int ptlrpc_expired_set(void *data);
+void ptlrpc_interrupted_set(void *data);
+void ptlrpc_mark_interrupted(struct ptlrpc_request *req);
+void ptlrpc_set_destroy(struct ptlrpc_request_set *);
+void ptlrpc_set_add_req(struct ptlrpc_request_set *, struct ptlrpc_request *);
+void ptlrpc_set_add_new_req(struct ptlrpcd_ctl *pc,
+			    struct ptlrpc_request *req);
+
+void ptlrpc_free_rq_pool(struct ptlrpc_request_pool *pool);
+void ptlrpc_add_rqs_to_pool(struct ptlrpc_request_pool *pool, int num_rq);
+
+struct ptlrpc_request_pool *
+ptlrpc_init_rq_pool(int, int,
+		    void (*populate_pool)(struct ptlrpc_request_pool *, int));
+
+void ptlrpc_at_set_req_timeout(struct ptlrpc_request *req);
+struct ptlrpc_request *ptlrpc_request_alloc(struct obd_import *imp,
+					    const struct req_format *format);
+struct ptlrpc_request *ptlrpc_request_alloc_pool(struct obd_import *imp,
+					    struct ptlrpc_request_pool *,
+					    const struct req_format *format);
+void ptlrpc_request_free(struct ptlrpc_request *request);
+int ptlrpc_request_pack(struct ptlrpc_request *request,
+			__u32 version, int opcode);
+struct ptlrpc_request *ptlrpc_request_alloc_pack(struct obd_import *imp,
+						const struct req_format *format,
+						__u32 version, int opcode);
+int ptlrpc_request_bufs_pack(struct ptlrpc_request *request,
+			     __u32 version, int opcode, char **bufs,
+			     struct ptlrpc_cli_ctx *ctx);
+struct ptlrpc_request *ptlrpc_prep_req(struct obd_import *imp, __u32 version,
+				       int opcode, int count, __u32 *lengths,
+				       char **bufs);
+struct ptlrpc_request *ptlrpc_prep_req_pool(struct obd_import *imp,
+					     __u32 version, int opcode,
+					    int count, __u32 *lengths, char **bufs,
+					    struct ptlrpc_request_pool *pool);
+void ptlrpc_req_finished(struct ptlrpc_request *request);
+void ptlrpc_req_finished_with_imp_lock(struct ptlrpc_request *request);
+struct ptlrpc_request *ptlrpc_request_addref(struct ptlrpc_request *req);
+struct ptlrpc_bulk_desc *ptlrpc_prep_bulk_imp(struct ptlrpc_request *req,
+					      unsigned npages, unsigned max_brw,
+					      unsigned type, unsigned portal);
+void __ptlrpc_free_bulk(struct ptlrpc_bulk_desc *bulk, int pin);
+static inline void ptlrpc_free_bulk_pin(struct ptlrpc_bulk_desc *bulk)
+{
+	__ptlrpc_free_bulk(bulk, 1);
+}
+static inline void ptlrpc_free_bulk_nopin(struct ptlrpc_bulk_desc *bulk)
+{
+	__ptlrpc_free_bulk(bulk, 0);
+}
+void __ptlrpc_prep_bulk_page(struct ptlrpc_bulk_desc *desc,
+			     struct page *page, int pageoffset, int len, int);
+static inline void ptlrpc_prep_bulk_page_pin(struct ptlrpc_bulk_desc *desc,
+					     struct page *page, int pageoffset,
+					     int len)
+{
+	__ptlrpc_prep_bulk_page(desc, page, pageoffset, len, 1);
+}
+
+static inline void ptlrpc_prep_bulk_page_nopin(struct ptlrpc_bulk_desc *desc,
+					       struct page *page, int pageoffset,
+					       int len)
+{
+	__ptlrpc_prep_bulk_page(desc, page, pageoffset, len, 0);
+}
+
+void ptlrpc_retain_replayable_request(struct ptlrpc_request *req,
+				      struct obd_import *imp);
+__u64 ptlrpc_next_xid(void);
+__u64 ptlrpc_sample_next_xid(void);
+__u64 ptlrpc_req_xid(struct ptlrpc_request *request);
+
+/* Set of routines to run a function in ptlrpcd context */
+void *ptlrpcd_alloc_work(struct obd_import *imp,
+			 int (*cb)(const struct lu_env *, void *), void *data);
+void ptlrpcd_destroy_work(void *handler);
+int ptlrpcd_queue_work(void *handler);
+
+/** @} */
+struct ptlrpc_service_buf_conf {
+	/* nbufs is buffers # to allocate when growing the pool */
+	unsigned int			bc_nbufs;
+	/* buffer size to post */
+	unsigned int			bc_buf_size;
+	/* portal to listed for requests on */
+	unsigned int			bc_req_portal;
+	/* portal of where to send replies to */
+	unsigned int			bc_rep_portal;
+	/* maximum request size to be accepted for this service */
+	unsigned int			bc_req_max_size;
+	/* maximum reply size this service can ever send */
+	unsigned int			bc_rep_max_size;
+};
+
+struct ptlrpc_service_thr_conf {
+	/* threadname should be 8 characters or less - 6 will be added on */
+	char				*tc_thr_name;
+	/* threads increasing factor for each CPU */
+	unsigned int			tc_thr_factor;
+	/* service threads # to start on each partition while initializing */
+	unsigned int			tc_nthrs_init;
+	/*
+	 * low water of threads # upper-limit on each partition while running,
+	 * service availability may be impacted if threads number is lower
+	 * than this value. It can be ZERO if the service doesn't require
+	 * CPU affinity or there is only one partition.
+	 */
+	unsigned int			tc_nthrs_base;
+	/* "soft" limit for total threads number */
+	unsigned int			tc_nthrs_max;
+	/* user specified threads number, it will be validated due to
+	 * other members of this structure. */
+	unsigned int			tc_nthrs_user;
+	/* set NUMA node affinity for service threads */
+	unsigned int			tc_cpu_affinity;
+	/* Tags for lu_context associated with service thread */
+	__u32				tc_ctx_tags;
+};
+
+struct ptlrpc_service_cpt_conf {
+	struct cfs_cpt_table		*cc_cptable;
+	/* string pattern to describe CPTs for a service */
+	char				*cc_pattern;
+};
+
+struct ptlrpc_service_conf {
+	/* service name */
+	char				*psc_name;
+	/* soft watchdog timeout multiplifier to print stuck service traces */
+	unsigned int			psc_watchdog_factor;
+	/* buffer information */
+	struct ptlrpc_service_buf_conf	psc_buf;
+	/* thread information */
+	struct ptlrpc_service_thr_conf	psc_thr;
+	/* CPU partition information */
+	struct ptlrpc_service_cpt_conf	psc_cpt;
+	/* function table */
+	struct ptlrpc_service_ops	psc_ops;
+};
+
+/* ptlrpc/service.c */
+/**
+ * Server-side services API. Register/unregister service, request state
+ * management, service thread management
+ *
+ * @{
+ */
+void ptlrpc_save_lock(struct ptlrpc_request *req,
+		      struct lustre_handle *lock, int mode, int no_ack);
+void ptlrpc_commit_replies(struct obd_export *exp);
+void ptlrpc_dispatch_difficult_reply(struct ptlrpc_reply_state *rs);
+void ptlrpc_schedule_difficult_reply(struct ptlrpc_reply_state *rs);
+int ptlrpc_hpreq_handler(struct ptlrpc_request *req);
+struct ptlrpc_service *ptlrpc_register_service(
+				struct ptlrpc_service_conf *conf,
+				struct proc_dir_entry *proc_entry);
+void ptlrpc_stop_all_threads(struct ptlrpc_service *svc);
+
+int ptlrpc_start_threads(struct ptlrpc_service *svc);
+int ptlrpc_unregister_service(struct ptlrpc_service *service);
+int liblustre_check_services(void *arg);
+void ptlrpc_daemonize(char *name);
+int ptlrpc_service_health_check(struct ptlrpc_service *);
+void ptlrpc_server_drop_request(struct ptlrpc_request *req);
+void ptlrpc_request_change_export(struct ptlrpc_request *req,
+				  struct obd_export *export);
+
+int ptlrpc_hr_init(void);
+void ptlrpc_hr_fini(void);
+
+/** @} */
+
+/* ptlrpc/import.c */
+/**
+ * Import API
+ * @{
+ */
+int ptlrpc_connect_import(struct obd_import *imp);
+int ptlrpc_init_import(struct obd_import *imp);
+int ptlrpc_disconnect_import(struct obd_import *imp, int noclose);
+int ptlrpc_import_recovery_state_machine(struct obd_import *imp);
+void deuuidify(char *uuid, const char *prefix, char **uuid_start,
+	       int *uuid_len);
+
+/* ptlrpc/pack_generic.c */
+int ptlrpc_reconnect_import(struct obd_import *imp);
+/** @} */
+
+/**
+ * ptlrpc msg buffer and swab interface
+ *
+ * @{
+ */
+int ptlrpc_buf_need_swab(struct ptlrpc_request *req, const int inout,
+			 int index);
+void ptlrpc_buf_set_swabbed(struct ptlrpc_request *req, const int inout,
+				int index);
+int ptlrpc_unpack_rep_msg(struct ptlrpc_request *req, int len);
+int ptlrpc_unpack_req_msg(struct ptlrpc_request *req, int len);
+
+int lustre_msg_check_version(struct lustre_msg *msg, __u32 version);
+void lustre_init_msg_v2(struct lustre_msg_v2 *msg, int count, __u32 *lens,
+			char **bufs);
+int lustre_pack_request(struct ptlrpc_request *, __u32 magic, int count,
+			__u32 *lens, char **bufs);
+int lustre_pack_reply(struct ptlrpc_request *, int count, __u32 *lens,
+		      char **bufs);
+int lustre_pack_reply_v2(struct ptlrpc_request *req, int count,
+			 __u32 *lens, char **bufs, int flags);
+#define LPRFL_EARLY_REPLY 1
+int lustre_pack_reply_flags(struct ptlrpc_request *, int count, __u32 *lens,
+			    char **bufs, int flags);
+int lustre_shrink_msg(struct lustre_msg *msg, int segment,
+		      unsigned int newlen, int move_data);
+void lustre_free_reply_state(struct ptlrpc_reply_state *rs);
+int __lustre_unpack_msg(struct lustre_msg *m, int len);
+int lustre_msg_hdr_size(__u32 magic, int count);
+int lustre_msg_size(__u32 magic, int count, __u32 *lengths);
+int lustre_msg_size_v2(int count, __u32 *lengths);
+int lustre_packed_msg_size(struct lustre_msg *msg);
+int lustre_msg_early_size(void);
+void *lustre_msg_buf_v2(struct lustre_msg_v2 *m, int n, int min_size);
+void *lustre_msg_buf(struct lustre_msg *m, int n, int minlen);
+int lustre_msg_buflen(struct lustre_msg *m, int n);
+void lustre_msg_set_buflen(struct lustre_msg *m, int n, int len);
+int lustre_msg_bufcount(struct lustre_msg *m);
+char *lustre_msg_string(struct lustre_msg *m, int n, int max_len);
+__u32 lustre_msghdr_get_flags(struct lustre_msg *msg);
+void lustre_msghdr_set_flags(struct lustre_msg *msg, __u32 flags);
+__u32 lustre_msg_get_flags(struct lustre_msg *msg);
+void lustre_msg_add_flags(struct lustre_msg *msg, int flags);
+void lustre_msg_set_flags(struct lustre_msg *msg, int flags);
+void lustre_msg_clear_flags(struct lustre_msg *msg, int flags);
+__u32 lustre_msg_get_op_flags(struct lustre_msg *msg);
+void lustre_msg_add_op_flags(struct lustre_msg *msg, int flags);
+void lustre_msg_set_op_flags(struct lustre_msg *msg, int flags);
+struct lustre_handle *lustre_msg_get_handle(struct lustre_msg *msg);
+__u32 lustre_msg_get_type(struct lustre_msg *msg);
+__u32 lustre_msg_get_version(struct lustre_msg *msg);
+void lustre_msg_add_version(struct lustre_msg *msg, int version);
+__u32 lustre_msg_get_opc(struct lustre_msg *msg);
+__u64 lustre_msg_get_last_xid(struct lustre_msg *msg);
+__u64 lustre_msg_get_last_committed(struct lustre_msg *msg);
+__u64 *lustre_msg_get_versions(struct lustre_msg *msg);
+__u64 lustre_msg_get_transno(struct lustre_msg *msg);
+__u64 lustre_msg_get_slv(struct lustre_msg *msg);
+__u32 lustre_msg_get_limit(struct lustre_msg *msg);
+void lustre_msg_set_slv(struct lustre_msg *msg, __u64 slv);
+void lustre_msg_set_limit(struct lustre_msg *msg, __u64 limit);
+int lustre_msg_get_status(struct lustre_msg *msg);
+__u32 lustre_msg_get_conn_cnt(struct lustre_msg *msg);
+int lustre_msg_is_v1(struct lustre_msg *msg);
+__u32 lustre_msg_get_magic(struct lustre_msg *msg);
+__u32 lustre_msg_get_timeout(struct lustre_msg *msg);
+__u32 lustre_msg_get_service_time(struct lustre_msg *msg);
+char *lustre_msg_get_jobid(struct lustre_msg *msg);
+__u32 lustre_msg_get_cksum(struct lustre_msg *msg);
+#if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(2, 7, 50, 0)
+__u32 lustre_msg_calc_cksum(struct lustre_msg *msg, int compat18);
+#else
+# warning "remove checksum compatibility support for b1_8"
+__u32 lustre_msg_calc_cksum(struct lustre_msg *msg);
+#endif
+void lustre_msg_set_handle(struct lustre_msg *msg,struct lustre_handle *handle);
+void lustre_msg_set_type(struct lustre_msg *msg, __u32 type);
+void lustre_msg_set_opc(struct lustre_msg *msg, __u32 opc);
+void lustre_msg_set_last_xid(struct lustre_msg *msg, __u64 last_xid);
+void lustre_msg_set_last_committed(struct lustre_msg *msg,__u64 last_committed);
+void lustre_msg_set_versions(struct lustre_msg *msg, __u64 *versions);
+void lustre_msg_set_transno(struct lustre_msg *msg, __u64 transno);
+void lustre_msg_set_status(struct lustre_msg *msg, __u32 status);
+void lustre_msg_set_conn_cnt(struct lustre_msg *msg, __u32 conn_cnt);
+void ptlrpc_req_set_repsize(struct ptlrpc_request *req, int count, __u32 *sizes);
+void ptlrpc_request_set_replen(struct ptlrpc_request *req);
+void lustre_msg_set_timeout(struct lustre_msg *msg, __u32 timeout);
+void lustre_msg_set_service_time(struct lustre_msg *msg, __u32 service_time);
+void lustre_msg_set_jobid(struct lustre_msg *msg, char *jobid);
+void lustre_msg_set_cksum(struct lustre_msg *msg, __u32 cksum);
+
+static inline void
+lustre_shrink_reply(struct ptlrpc_request *req, int segment,
+		    unsigned int newlen, int move_data)
+{
+	LASSERT(req->rq_reply_state);
+	LASSERT(req->rq_repmsg);
+	req->rq_replen = lustre_shrink_msg(req->rq_repmsg, segment,
+					   newlen, move_data);
+}
+/** @} */
+
+/** Change request phase of \a req to \a new_phase */
+static inline void
+ptlrpc_rqphase_move(struct ptlrpc_request *req, enum rq_phase new_phase)
+{
+	if (req->rq_phase == new_phase)
+		return;
+
+	if (new_phase == RQ_PHASE_UNREGISTERING) {
+		req->rq_next_phase = req->rq_phase;
+		if (req->rq_import)
+			atomic_inc(&req->rq_import->imp_unregistering);
+	}
+
+	if (req->rq_phase == RQ_PHASE_UNREGISTERING) {
+		if (req->rq_import)
+			atomic_dec(&req->rq_import->imp_unregistering);
+	}
+
+	DEBUG_REQ(D_INFO, req, "move req \"%s\" -> \"%s\"",
+		  ptlrpc_rqphase2str(req), ptlrpc_phase2str(new_phase));
+
+	req->rq_phase = new_phase;
+}
+
+/**
+ * Returns true if request \a req got early reply and hard deadline is not met
+ */
+static inline int
+ptlrpc_client_early(struct ptlrpc_request *req)
+{
+	if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_REPL_UNLINK) &&
+	    req->rq_reply_deadline > cfs_time_current_sec())
+		return 0;
+	return req->rq_early;
+}
+
+/**
+ * Returns true if we got real reply from server for this request
+ */
+static inline int
+ptlrpc_client_replied(struct ptlrpc_request *req)
+{
+	if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_REPL_UNLINK) &&
+	    req->rq_reply_deadline > cfs_time_current_sec())
+		return 0;
+	return req->rq_replied;
+}
+
+/** Returns true if request \a req is in process of receiving server reply */
+static inline int
+ptlrpc_client_recv(struct ptlrpc_request *req)
+{
+	if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_REPL_UNLINK) &&
+	    req->rq_reply_deadline > cfs_time_current_sec())
+		return 1;
+	return req->rq_receiving_reply;
+}
+
+static inline int
+ptlrpc_client_recv_or_unlink(struct ptlrpc_request *req)
+{
+	int rc;
+
+	spin_lock(&req->rq_lock);
+	if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_REPL_UNLINK) &&
+	    req->rq_reply_deadline > cfs_time_current_sec()) {
+		spin_unlock(&req->rq_lock);
+		return 1;
+	}
+	rc = req->rq_receiving_reply || req->rq_must_unlink;
+	spin_unlock(&req->rq_lock);
+	return rc;
+}
+
+static inline void
+ptlrpc_client_wake_req(struct ptlrpc_request *req)
+{
+	if (req->rq_set == NULL)
+		wake_up(&req->rq_reply_waitq);
+	else
+		wake_up(&req->rq_set->set_waitq);
+}
+
+static inline void
+ptlrpc_rs_addref(struct ptlrpc_reply_state *rs)
+{
+	LASSERT(atomic_read(&rs->rs_refcount) > 0);
+	atomic_inc(&rs->rs_refcount);
+}
+
+static inline void
+ptlrpc_rs_decref(struct ptlrpc_reply_state *rs)
+{
+	LASSERT(atomic_read(&rs->rs_refcount) > 0);
+	if (atomic_dec_and_test(&rs->rs_refcount))
+		lustre_free_reply_state(rs);
+}
+
+/* Should only be called once per req */
+static inline void ptlrpc_req_drop_rs(struct ptlrpc_request *req)
+{
+	if (req->rq_reply_state == NULL)
+		return; /* shouldn't occur */
+	ptlrpc_rs_decref(req->rq_reply_state);
+	req->rq_reply_state = NULL;
+	req->rq_repmsg = NULL;
+}
+
+static inline __u32 lustre_request_magic(struct ptlrpc_request *req)
+{
+	return lustre_msg_get_magic(req->rq_reqmsg);
+}
+
+static inline int ptlrpc_req_get_repsize(struct ptlrpc_request *req)
+{
+	switch (req->rq_reqmsg->lm_magic) {
+	case LUSTRE_MSG_MAGIC_V2:
+		return req->rq_reqmsg->lm_repsize;
+	default:
+		LASSERTF(0, "incorrect message magic: %08x\n",
+			 req->rq_reqmsg->lm_magic);
+		return -EFAULT;
+	}
+}
+
+static inline int ptlrpc_send_limit_expired(struct ptlrpc_request *req)
+{
+	if (req->rq_delay_limit != 0 &&
+	    cfs_time_before(cfs_time_add(req->rq_queued_time,
+					 cfs_time_seconds(req->rq_delay_limit)),
+			    cfs_time_current())) {
+		return 1;
+	}
+	return 0;
+}
+
+static inline int ptlrpc_no_resend(struct ptlrpc_request *req)
+{
+	if (!req->rq_no_resend && ptlrpc_send_limit_expired(req)) {
+		spin_lock(&req->rq_lock);
+		req->rq_no_resend = 1;
+		spin_unlock(&req->rq_lock);
+	}
+	return req->rq_no_resend;
+}
+
+static inline int
+ptlrpc_server_get_timeout(struct ptlrpc_service_part *svcpt)
+{
+	int at = AT_OFF ? 0 : at_get(&svcpt->scp_at_estimate);
+
+	return svcpt->scp_service->srv_watchdog_factor *
+	       max_t(int, at, obd_timeout);
+}
+
+static inline struct ptlrpc_service *
+ptlrpc_req2svc(struct ptlrpc_request *req)
+{
+	LASSERT(req->rq_rqbd != NULL);
+	return req->rq_rqbd->rqbd_svcpt->scp_service;
+}
+
+/* ldlm/ldlm_lib.c */
+/**
+ * Target client logic
+ * @{
+ */
+int client_obd_setup(struct obd_device *obddev, struct lustre_cfg *lcfg);
+int client_obd_cleanup(struct obd_device *obddev);
+int client_connect_import(const struct lu_env *env,
+			  struct obd_export **exp, struct obd_device *obd,
+			  struct obd_uuid *cluuid, struct obd_connect_data *,
+			  void *localdata);
+int client_disconnect_export(struct obd_export *exp);
+int client_import_add_conn(struct obd_import *imp, struct obd_uuid *uuid,
+			   int priority);
+int client_import_del_conn(struct obd_import *imp, struct obd_uuid *uuid);
+int client_import_find_conn(struct obd_import *imp, lnet_nid_t peer,
+			    struct obd_uuid *uuid);
+int import_set_conn_priority(struct obd_import *imp, struct obd_uuid *uuid);
+void client_destroy_import(struct obd_import *imp);
+/** @} */
+
+
+/* ptlrpc/pinger.c */
+/**
+ * Pinger API (client side only)
+ * @{
+ */
+enum timeout_event {
+	TIMEOUT_GRANT = 1
+};
+struct timeout_item;
+typedef int (*timeout_cb_t)(struct timeout_item *, void *);
+int ptlrpc_pinger_add_import(struct obd_import *imp);
+int ptlrpc_pinger_del_import(struct obd_import *imp);
+int ptlrpc_add_timeout_client(int time, enum timeout_event event,
+			      timeout_cb_t cb, void *data,
+			      struct list_head *obd_list);
+int ptlrpc_del_timeout_client(struct list_head *obd_list,
+			      enum timeout_event event);
+struct ptlrpc_request * ptlrpc_prep_ping(struct obd_import *imp);
+int ptlrpc_obd_ping(struct obd_device *obd);
+cfs_time_t ptlrpc_suspend_wakeup_time(void);
+void ping_evictor_start(void);
+void ping_evictor_stop(void);
+int ptlrpc_check_and_wait_suspend(struct ptlrpc_request *req);
+void ptlrpc_pinger_ir_up(void);
+void ptlrpc_pinger_ir_down(void);
+/** @} */
+int ptlrpc_pinger_suppress_pings(void);
+
+/* ptlrpc daemon bind policy */
+typedef enum {
+	/* all ptlrpcd threads are free mode */
+	PDB_POLICY_NONE	  = 1,
+	/* all ptlrpcd threads are bound mode */
+	PDB_POLICY_FULL	  = 2,
+	/* <free1 bound1> <free2 bound2> ... <freeN boundN> */
+	PDB_POLICY_PAIR	  = 3,
+	/* <free1 bound1> <bound1 free2> ... <freeN boundN> <boundN free1>,
+	 * means each ptlrpcd[X] has two partners: thread[X-1] and thread[X+1].
+	 * If kernel supports NUMA, pthrpcd threads are binded and
+	 * grouped by NUMA node */
+	PDB_POLICY_NEIGHBOR      = 4,
+} pdb_policy_t;
+
+/* ptlrpc daemon load policy
+ * It is caller's duty to specify how to push the async RPC into some ptlrpcd
+ * queue, but it is not enforced, affected by "ptlrpcd_bind_policy". If it is
+ * "PDB_POLICY_FULL", then the RPC will be processed by the selected ptlrpcd,
+ * Otherwise, the RPC may be processed by the selected ptlrpcd or its partner,
+ * depends on which is scheduled firstly, to accelerate the RPC processing. */
+typedef enum {
+	/* on the same CPU core as the caller */
+	PDL_POLICY_SAME	 = 1,
+	/* within the same CPU partition, but not the same core as the caller */
+	PDL_POLICY_LOCAL	= 2,
+	/* round-robin on all CPU cores, but not the same core as the caller */
+	PDL_POLICY_ROUND	= 3,
+	/* the specified CPU core is preferred, but not enforced */
+	PDL_POLICY_PREFERRED    = 4,
+} pdl_policy_t;
+
+/* ptlrpc/ptlrpcd.c */
+void ptlrpcd_stop(struct ptlrpcd_ctl *pc, int force);
+void ptlrpcd_free(struct ptlrpcd_ctl *pc);
+void ptlrpcd_wake(struct ptlrpc_request *req);
+void ptlrpcd_add_req(struct ptlrpc_request *req, pdl_policy_t policy, int idx);
+void ptlrpcd_add_rqset(struct ptlrpc_request_set *set);
+int ptlrpcd_addref(void);
+void ptlrpcd_decref(void);
+
+/* ptlrpc/lproc_ptlrpc.c */
+/**
+ * procfs output related functions
+ * @{
+ */
+const char* ll_opcode2str(__u32 opcode);
+#ifdef LPROCFS
+void ptlrpc_lprocfs_register_obd(struct obd_device *obd);
+void ptlrpc_lprocfs_unregister_obd(struct obd_device *obd);
+void ptlrpc_lprocfs_brw(struct ptlrpc_request *req, int bytes);
+#else
+static inline void ptlrpc_lprocfs_register_obd(struct obd_device *obd) {}
+static inline void ptlrpc_lprocfs_unregister_obd(struct obd_device *obd) {}
+static inline void ptlrpc_lprocfs_brw(struct ptlrpc_request *req, int bytes) {}
+#endif
+/** @} */
+
+/* ptlrpc/llog_server.c */
+int llog_origin_handle_open(struct ptlrpc_request *req);
+int llog_origin_handle_destroy(struct ptlrpc_request *req);
+int llog_origin_handle_prev_block(struct ptlrpc_request *req);
+int llog_origin_handle_next_block(struct ptlrpc_request *req);
+int llog_origin_handle_read_header(struct ptlrpc_request *req);
+int llog_origin_handle_close(struct ptlrpc_request *req);
+int llog_origin_handle_cancel(struct ptlrpc_request *req);
+
+/* ptlrpc/llog_client.c */
+extern struct llog_operations llog_client_ops;
+
+/** @} net */
+
+#endif
+/** @} PtlRPC */
diff --git a/drivers/staging/lustre/lustre/include/lustre_param.h b/drivers/staging/lustre/lustre/include/lustre_param.h
new file mode 100644
index 000000000000..ed654684cb64
--- /dev/null
+++ b/drivers/staging/lustre/lustre/include/lustre_param.h
@@ -0,0 +1,121 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lustre/include/lustre_param.h
+ *
+ * User-settable parameter keys
+ *
+ * Author: Nathan Rutman <nathan@clusterfs.com>
+ */
+
+#ifndef _LUSTRE_PARAM_H
+#define _LUSTRE_PARAM_H
+
+/** \defgroup param param
+ *
+ * @{
+ */
+
+/* For interoperability */
+struct cfg_interop_param {
+	char *old_param;
+	char *new_param;
+};
+
+/* obd_config.c */
+int class_find_param(char *buf, char *key, char **valp);
+struct cfg_interop_param *class_find_old_param(const char *param,
+					       struct cfg_interop_param *ptr);
+int class_get_next_param(char **params, char *copy);
+int class_match_param(char *buf, char *key, char **valp);
+int class_parse_nid(char *buf, lnet_nid_t *nid, char **endh);
+int class_parse_nid_quiet(char *buf, lnet_nid_t *nid, char **endh);
+int class_parse_net(char *buf, __u32 *net, char **endh);
+int class_match_nid(char *buf, char *key, lnet_nid_t nid);
+int class_match_net(char *buf, char *key, __u32 net);
+/* obd_mount.c */
+int do_lcfg(char *cfgname, lnet_nid_t nid, int cmd,
+	    char *s1, char *s2, char *s3, char *s4);
+
+
+
+/****************** User-settable parameter keys *********************/
+/* e.g.
+	tunefs.lustre --param="failover.node=192.168.0.13@tcp0" /dev/sda
+	lctl conf_param testfs-OST0000 failover.node=3@elan,192.168.0.3@tcp0
+		    ... testfs-MDT0000.lov.stripesize=4M
+		    ... testfs-OST0000.ost.client_cache_seconds=15
+		    ... testfs.sys.timeout=<secs>
+		    ... testfs.llite.max_read_ahead_mb=16
+*/
+
+/* System global or special params not handled in obd's proc
+ * See mgs_write_log_sys()
+ */
+#define PARAM_TIMEOUT	      "timeout="	  /* global */
+#define PARAM_LDLM_TIMEOUT	 "ldlm_timeout="     /* global */
+#define PARAM_AT_MIN	       "at_min="	   /* global */
+#define PARAM_AT_MAX	       "at_max="	   /* global */
+#define PARAM_AT_EXTRA	     "at_extra="	 /* global */
+#define PARAM_AT_EARLY_MARGIN      "at_early_margin="  /* global */
+#define PARAM_AT_HISTORY	   "at_history="       /* global */
+#define PARAM_JOBID_VAR		   "jobid_var="	       /* global */
+#define PARAM_MGSNODE	      "mgsnode="	  /* only at mounttime */
+#define PARAM_FAILNODE	     "failover.node="    /* add failover nid */
+#define PARAM_FAILMODE	     "failover.mode="    /* initial mount only */
+#define PARAM_ACTIVE	       "active="	   /* activate/deactivate */
+#define PARAM_NETWORK	      "network="	  /* bind on nid */
+#define PARAM_ID_UPCALL		"identity_upcall="  /* identity upcall */
+
+/* Prefixes for parameters handled by obd's proc methods (XXX_process_config) */
+#define PARAM_OST		  "ost."
+#define PARAM_OSC		  "osc."
+#define PARAM_MDT		  "mdt."
+#define PARAM_MDD		  "mdd."
+#define PARAM_MDC		  "mdc."
+#define PARAM_LLITE		"llite."
+#define PARAM_LOV		  "lov."
+#define PARAM_LOD		"lod."
+#define PARAM_OSP		"osp."
+#define PARAM_SYS		  "sys."	      /* global */
+#define PARAM_SRPC		 "srpc."
+#define PARAM_SRPC_FLVR	    "srpc.flavor."
+#define PARAM_SRPC_UDESC	   "srpc.udesc.cli2mdt"
+#define PARAM_SEC		  "security."
+#define PARAM_QUOTA		"quota."	    /* global */
+
+/** @} param */
+
+#endif /* _LUSTRE_PARAM_H */
diff --git a/drivers/staging/lustre/lustre/include/lustre_quota.h b/drivers/staging/lustre/lustre/include/lustre_quota.h
new file mode 100644
index 000000000000..1c3041f50049
--- /dev/null
+++ b/drivers/staging/lustre/lustre/include/lustre_quota.h
@@ -0,0 +1,239 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ * Use is subject to license terms.
+ */
+
+#ifndef _LUSTRE_QUOTA_H
+#define _LUSTRE_QUOTA_H
+
+/** \defgroup quota quota
+ *
+ */
+
+#include <linux/lustre_quota.h>
+
+#include <dt_object.h>
+#include <lustre_fid.h>
+#include <lustre_dlm.h>
+
+#ifndef MAX_IQ_TIME
+#define MAX_IQ_TIME  604800     /* (7*24*60*60) 1 week */
+#endif
+
+#ifndef MAX_DQ_TIME
+#define MAX_DQ_TIME  604800     /* (7*24*60*60) 1 week */
+#endif
+
+struct lquota_id_info;
+struct lquota_trans;
+
+/* Gather all quota record type in an union that can be used to read any records
+ * from disk. All fields of these records must be 64-bit aligned, otherwise the
+ * OSD layer may swab them incorrectly. */
+union lquota_rec {
+	struct lquota_glb_rec	lqr_glb_rec;
+	struct lquota_slv_rec	lqr_slv_rec;
+	struct lquota_acct_rec	lqr_acct_rec;
+};
+
+/* Index features supported by the global index objects
+ * Only used for migration purpose and should be removed once on-disk migration
+ * is no longer needed */
+extern struct dt_index_features dt_quota_iusr_features;
+extern struct dt_index_features dt_quota_busr_features;
+extern struct dt_index_features dt_quota_igrp_features;
+extern struct dt_index_features dt_quota_bgrp_features;
+
+/* Name used in the configuration logs to identify the default metadata pool
+ * (composed of all the MDTs, with pool ID 0) and the default data pool (all
+ * the OSTs, with pool ID 0 too). */
+#define QUOTA_METAPOOL_NAME   "mdt="
+#define QUOTA_DATAPOOL_NAME   "ost="
+
+/*
+ * Quota Master Target support
+ */
+
+/* Request handlers for quota master operations.
+ * This is used by the MDT to pass quota/lock requests to the quota master
+ * target. This won't be needed any more once the QMT is a real target and
+ * does not rely any more on the MDT service threads and namespace. */
+struct qmt_handlers {
+	/* Handle quotactl request from client. */
+	int (*qmth_quotactl)(const struct lu_env *, struct lu_device *,
+			     struct obd_quotactl *);
+
+	/* Handle dqacq/dqrel request from slave. */
+	int (*qmth_dqacq)(const struct lu_env *, struct lu_device *,
+			  struct ptlrpc_request *);
+
+	/* LDLM intent policy associated with quota locks */
+	int (*qmth_intent_policy)(const struct lu_env *, struct lu_device *,
+				  struct ptlrpc_request *, struct ldlm_lock **,
+				  int);
+
+	/* Initialize LVB of ldlm resource associated with quota objects */
+	int (*qmth_lvbo_init)(struct lu_device *, struct ldlm_resource *);
+
+	/* Update LVB of ldlm resource associated with quota objects */
+	int (*qmth_lvbo_update)(struct lu_device *, struct ldlm_resource *,
+				struct ptlrpc_request *, int);
+
+	/* Return size of LVB to be packed in ldlm message */
+	int (*qmth_lvbo_size)(struct lu_device *, struct ldlm_lock *);
+
+	/* Fill request buffer with lvb */
+	int (*qmth_lvbo_fill)(struct lu_device *, struct ldlm_lock *, void *,
+			      int);
+
+	/* Free lvb associated with ldlm resource */
+	int (*qmth_lvbo_free)(struct lu_device *, struct ldlm_resource *);
+};
+
+/* actual handlers are defined in lustre/quota/qmt_handler.c */
+extern struct qmt_handlers qmt_hdls;
+
+/*
+ * Quota enforcement support on slaves
+ */
+
+struct qsd_instance;
+
+/* The quota slave feature is implemented under the form of a library.
+ * The API is the following:
+ *
+ * - qsd_init(): the user (mostly the OSD layer) should first allocate a qsd
+ *	       instance via qsd_init(). This creates all required structures
+ *	       to manage quota enforcement for this target and performs all
+ *	       low-level initialization which does not involve any lustre
+ *	       object. qsd_init() should typically be called when the OSD
+ *	       is being set up.
+ *
+ * - qsd_prepare(): This sets up on-disk objects associated with the quota slave
+ *		  feature and initiates the quota reintegration procedure if
+ *		  needed. qsd_prepare() should typically be called when
+ *		  ->ldo_prepare is invoked.
+ *
+ * - qsd_start(): a qsd instance should be started once recovery is completed
+ *		(i.e. when ->ldo_recovery_complete is called). This is used
+ *		to notify the qsd layer that quota should now be enforced
+ *		again via the qsd_op_begin/end functions. The last step of the
+ *		reintegration prodecure (namely usage reconciliation) will be
+ *		completed during start.
+ *
+ * - qsd_fini(): is used to release a qsd_instance structure allocated with
+ *	       qsd_init(). This releases all quota slave objects and frees the
+ *	       structures associated with the qsd_instance.
+ *
+ * - qsd_op_begin(): is used to enforce quota, it must be called in the
+ *		   declaration of each operation. qsd_op_end() should then be
+ *		   invoked later once all operations have been completed in
+ *		   order to release/adjust the quota space.
+ *		   Running qsd_op_begin() before qsd_start() isn't fatal and
+ *		   will return success.
+ *		   Once qsd_start() has been run, qsd_op_begin() will block
+ *		   until the reintegration procedure is completed.
+ *
+ * - qsd_op_end(): performs the post operation quota processing. This must be
+ *		 called after the operation transaction stopped.
+ *		 While qsd_op_begin() must be invoked each time a new
+ *		 operation is declared, qsd_op_end() should be called only
+ *		 once for the whole transaction.
+ *
+ * - qsd_op_adjust(): triggers pre-acquire/release if necessary.
+ *
+ * Below are the function prototypes to be used by OSD layer to manage quota
+ * enforcement. Arguments are documented where each function is defined.  */
+
+struct qsd_instance *qsd_init(const struct lu_env *, char *, struct dt_device *,
+			      proc_dir_entry_t *);
+int qsd_prepare(const struct lu_env *, struct qsd_instance *);
+int qsd_start(const struct lu_env *, struct qsd_instance *);
+void qsd_fini(const struct lu_env *, struct qsd_instance *);
+int qsd_op_begin(const struct lu_env *, struct qsd_instance *,
+		 struct lquota_trans *, struct lquota_id_info *, int *);
+void qsd_op_end(const struct lu_env *, struct qsd_instance *,
+		struct lquota_trans *);
+void qsd_op_adjust(const struct lu_env *, struct qsd_instance *,
+		   union lquota_id *, int);
+/* This is exported for the ldiskfs quota migration only,
+ * see convert_quota_file() */
+int lquota_disk_write_glb(const struct lu_env *, struct dt_object *,
+			  __u64, struct lquota_glb_rec *);
+
+/*
+ * Quota information attached to a transaction
+ */
+
+struct lquota_entry;
+
+struct lquota_id_info {
+	/* quota identifier */
+	union lquota_id		 lqi_id;
+
+	/* USRQUOTA or GRPQUOTA for now, could be expanded for
+	 * directory quota or other types later.  */
+	int			 lqi_type;
+
+	/* inodes or kbytes to be consumed or released, it could
+	 * be negative when releasing space.  */
+	long long		 lqi_space;
+
+	/* quota slave entry structure associated with this ID */
+	struct lquota_entry	*lqi_qentry;
+
+	/* whether we are reporting blocks or inodes */
+	bool			 lqi_is_blk;
+};
+
+/* Since we enforce only inode quota in meta pool (MDTs), and block quota in
+ * data pool (OSTs), there are at most 4 quota ids being enforced in a single
+ * transaction, which is chown transaction:
+ * original uid and gid, new uid and gid.
+ *
+ * This value might need to be revised when directory quota is added.  */
+#define QUOTA_MAX_TRANSIDS    4
+
+/* all qids involved in a single transaction */
+struct lquota_trans {
+	unsigned short		lqt_id_cnt;
+	struct lquota_id_info	lqt_ids[QUOTA_MAX_TRANSIDS];
+};
+
+/* flags for quota local enforcement */
+#define QUOTA_FL_OVER_USRQUOTA  0x01
+#define QUOTA_FL_OVER_GRPQUOTA  0x02
+#define QUOTA_FL_SYNC	   0x04
+
+#define IS_LQUOTA_RES(res)						\
+	(res->lr_name.name[LUSTRE_RES_ID_SEQ_OFF] == FID_SEQ_QUOTA ||	\
+	 res->lr_name.name[LUSTRE_RES_ID_SEQ_OFF] == FID_SEQ_QUOTA_GLB)
+
+/* helper function used by MDT & OFD to retrieve quota accounting information
+ * on slave */
+int lquotactl_slv(const struct lu_env *, struct dt_device *,
+		  struct obd_quotactl *);
+/** @} quota */
+#endif /* _LUSTRE_QUOTA_H */
diff --git a/drivers/staging/lustre/lustre/include/lustre_req_layout.h b/drivers/staging/lustre/lustre/include/lustre_req_layout.h
new file mode 100644
index 000000000000..f4d3820865f1
--- /dev/null
+++ b/drivers/staging/lustre/lustre/include/lustre_req_layout.h
@@ -0,0 +1,334 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lustre/include/lustre_req_layout.h
+ *
+ * Lustre Metadata Target (mdt) request handler
+ *
+ * Author: Nikita Danilov <nikita@clusterfs.com>
+ */
+
+#ifndef _LUSTRE_REQ_LAYOUT_H__
+#define _LUSTRE_REQ_LAYOUT_H__
+
+/** \defgroup req_layout req_layout
+ *
+ * @{
+ */
+
+struct req_msg_field;
+struct req_format;
+struct req_capsule;
+
+struct ptlrpc_request;
+
+enum req_location {
+	RCL_CLIENT,
+	RCL_SERVER,
+	RCL_NR
+};
+
+/* Maximal number of fields (buffers) in a request message. */
+#define REQ_MAX_FIELD_NR  9
+
+struct req_capsule {
+	struct ptlrpc_request   *rc_req;
+	const struct req_format *rc_fmt;
+	enum req_location	rc_loc;
+	__u32		    rc_area[RCL_NR][REQ_MAX_FIELD_NR];
+};
+
+#if !defined(__REQ_LAYOUT_USER__)
+
+/* struct ptlrpc_request, lustre_msg* */
+#include <lustre_net.h>
+
+void req_capsule_init(struct req_capsule *pill, struct ptlrpc_request *req,
+		      enum req_location location);
+void req_capsule_fini(struct req_capsule *pill);
+
+void req_capsule_set(struct req_capsule *pill, const struct req_format *fmt);
+void req_capsule_client_dump(struct req_capsule *pill);
+void req_capsule_server_dump(struct req_capsule *pill);
+void req_capsule_init_area(struct req_capsule *pill);
+int req_capsule_filled_sizes(struct req_capsule *pill, enum req_location loc);
+int  req_capsule_server_pack(struct req_capsule *pill);
+
+void *req_capsule_client_get(struct req_capsule *pill,
+			     const struct req_msg_field *field);
+void *req_capsule_client_swab_get(struct req_capsule *pill,
+				  const struct req_msg_field *field,
+				  void *swabber);
+void *req_capsule_client_sized_get(struct req_capsule *pill,
+				   const struct req_msg_field *field,
+				   int len);
+void *req_capsule_server_get(struct req_capsule *pill,
+			     const struct req_msg_field *field);
+void *req_capsule_server_sized_get(struct req_capsule *pill,
+				   const struct req_msg_field *field,
+				   int len);
+void *req_capsule_server_swab_get(struct req_capsule *pill,
+				  const struct req_msg_field *field,
+				  void *swabber);
+void *req_capsule_server_sized_swab_get(struct req_capsule *pill,
+					const struct req_msg_field *field,
+					int len, void *swabber);
+const void *req_capsule_other_get(struct req_capsule *pill,
+				  const struct req_msg_field *field);
+
+void req_capsule_set_size(struct req_capsule *pill,
+			  const struct req_msg_field *field,
+			  enum req_location loc, int size);
+int req_capsule_get_size(const struct req_capsule *pill,
+			  const struct req_msg_field *field,
+			  enum req_location loc);
+int req_capsule_msg_size(struct req_capsule *pill, enum req_location loc);
+int req_capsule_fmt_size(__u32 magic, const struct req_format *fmt,
+			 enum req_location loc);
+void req_capsule_extend(struct req_capsule *pill, const struct req_format *fmt);
+
+int req_capsule_has_field(const struct req_capsule *pill,
+			  const struct req_msg_field *field,
+			  enum req_location loc);
+int req_capsule_field_present(const struct req_capsule *pill,
+			      const struct req_msg_field *field,
+			      enum req_location loc);
+void req_capsule_shrink(struct req_capsule *pill,
+			const struct req_msg_field *field,
+			unsigned int newlen,
+			enum req_location loc);
+int req_capsule_server_grow(struct req_capsule *pill,
+			    const struct req_msg_field *field,
+			    unsigned int newlen);
+int  req_layout_init(void);
+void req_layout_fini(void);
+
+/* __REQ_LAYOUT_USER__ */
+#endif
+
+extern struct req_format RQF_OBD_PING;
+extern struct req_format RQF_OBD_SET_INFO;
+extern struct req_format RQF_SEC_CTX;
+extern struct req_format RQF_OBD_IDX_READ;
+/* MGS req_format */
+extern struct req_format RQF_MGS_TARGET_REG;
+extern struct req_format RQF_MGS_SET_INFO;
+extern struct req_format RQF_MGS_CONFIG_READ;
+/* fid/fld req_format */
+extern struct req_format RQF_SEQ_QUERY;
+extern struct req_format RQF_FLD_QUERY;
+/* MDS req_format */
+extern struct req_format RQF_MDS_CONNECT;
+extern struct req_format RQF_MDS_DISCONNECT;
+extern struct req_format RQF_MDS_STATFS;
+extern struct req_format RQF_MDS_GETSTATUS;
+extern struct req_format RQF_MDS_SYNC;
+extern struct req_format RQF_MDS_GETXATTR;
+extern struct req_format RQF_MDS_GETATTR;
+extern struct req_format RQF_UPDATE_OBJ;
+
+/*
+ * This is format of direct (non-intent) MDS_GETATTR_NAME request.
+ */
+extern struct req_format RQF_MDS_GETATTR_NAME;
+extern struct req_format RQF_MDS_CLOSE;
+extern struct req_format RQF_MDS_PIN;
+extern struct req_format RQF_MDS_UNPIN;
+extern struct req_format RQF_MDS_CONNECT;
+extern struct req_format RQF_MDS_DISCONNECT;
+extern struct req_format RQF_MDS_GET_INFO;
+extern struct req_format RQF_MDS_READPAGE;
+extern struct req_format RQF_MDS_WRITEPAGE;
+extern struct req_format RQF_MDS_IS_SUBDIR;
+extern struct req_format RQF_MDS_DONE_WRITING;
+extern struct req_format RQF_MDS_REINT;
+extern struct req_format RQF_MDS_REINT_CREATE;
+extern struct req_format RQF_MDS_REINT_CREATE_RMT_ACL;
+extern struct req_format RQF_MDS_REINT_CREATE_SLAVE;
+extern struct req_format RQF_MDS_REINT_CREATE_SYM;
+extern struct req_format RQF_MDS_REINT_OPEN;
+extern struct req_format RQF_MDS_REINT_UNLINK;
+extern struct req_format RQF_MDS_REINT_LINK;
+extern struct req_format RQF_MDS_REINT_RENAME;
+extern struct req_format RQF_MDS_REINT_SETATTR;
+extern struct req_format RQF_MDS_REINT_SETXATTR;
+extern struct req_format RQF_MDS_QUOTACHECK;
+extern struct req_format RQF_MDS_QUOTACTL;
+extern struct req_format RQF_QC_CALLBACK;
+extern struct req_format RQF_QUOTA_DQACQ;
+extern struct req_format RQF_MDS_SWAP_LAYOUTS;
+/* MDS hsm formats */
+extern struct req_format RQF_MDS_HSM_STATE_GET;
+extern struct req_format RQF_MDS_HSM_STATE_SET;
+extern struct req_format RQF_MDS_HSM_ACTION;
+extern struct req_format RQF_MDS_HSM_PROGRESS;
+extern struct req_format RQF_MDS_HSM_CT_REGISTER;
+extern struct req_format RQF_MDS_HSM_CT_UNREGISTER;
+extern struct req_format RQF_MDS_HSM_REQUEST;
+/* OST req_format */
+extern struct req_format RQF_OST_CONNECT;
+extern struct req_format RQF_OST_DISCONNECT;
+extern struct req_format RQF_OST_QUOTACHECK;
+extern struct req_format RQF_OST_QUOTACTL;
+extern struct req_format RQF_OST_GETATTR;
+extern struct req_format RQF_OST_SETATTR;
+extern struct req_format RQF_OST_CREATE;
+extern struct req_format RQF_OST_PUNCH;
+extern struct req_format RQF_OST_SYNC;
+extern struct req_format RQF_OST_DESTROY;
+extern struct req_format RQF_OST_BRW_READ;
+extern struct req_format RQF_OST_BRW_WRITE;
+extern struct req_format RQF_OST_STATFS;
+extern struct req_format RQF_OST_SET_GRANT_INFO;
+extern struct req_format RQF_OST_GET_INFO_GENERIC;
+extern struct req_format RQF_OST_GET_INFO_LAST_ID;
+extern struct req_format RQF_OST_GET_INFO_LAST_FID;
+extern struct req_format RQF_OST_SET_INFO_LAST_FID;
+extern struct req_format RQF_OST_GET_INFO_FIEMAP;
+
+/* LDLM req_format */
+extern struct req_format RQF_LDLM_ENQUEUE;
+extern struct req_format RQF_LDLM_ENQUEUE_LVB;
+extern struct req_format RQF_LDLM_CONVERT;
+extern struct req_format RQF_LDLM_INTENT;
+extern struct req_format RQF_LDLM_INTENT_BASIC;
+extern struct req_format RQF_LDLM_INTENT_LAYOUT;
+extern struct req_format RQF_LDLM_INTENT_GETATTR;
+extern struct req_format RQF_LDLM_INTENT_OPEN;
+extern struct req_format RQF_LDLM_INTENT_CREATE;
+extern struct req_format RQF_LDLM_INTENT_UNLINK;
+extern struct req_format RQF_LDLM_INTENT_QUOTA;
+extern struct req_format RQF_LDLM_CANCEL;
+extern struct req_format RQF_LDLM_CALLBACK;
+extern struct req_format RQF_LDLM_CP_CALLBACK;
+extern struct req_format RQF_LDLM_BL_CALLBACK;
+extern struct req_format RQF_LDLM_GL_CALLBACK;
+extern struct req_format RQF_LDLM_GL_DESC_CALLBACK;
+/* LOG req_format */
+extern struct req_format RQF_LOG_CANCEL;
+extern struct req_format RQF_LLOG_ORIGIN_HANDLE_CREATE;
+extern struct req_format RQF_LLOG_ORIGIN_HANDLE_DESTROY;
+extern struct req_format RQF_LLOG_ORIGIN_HANDLE_NEXT_BLOCK;
+extern struct req_format RQF_LLOG_ORIGIN_HANDLE_PREV_BLOCK;
+extern struct req_format RQF_LLOG_ORIGIN_HANDLE_READ_HEADER;
+extern struct req_format RQF_LLOG_ORIGIN_CONNECT;
+
+extern struct req_msg_field RMF_GENERIC_DATA;
+extern struct req_msg_field RMF_PTLRPC_BODY;
+extern struct req_msg_field RMF_MDT_BODY;
+extern struct req_msg_field RMF_MDT_EPOCH;
+extern struct req_msg_field RMF_OBD_STATFS;
+extern struct req_msg_field RMF_NAME;
+extern struct req_msg_field RMF_SYMTGT;
+extern struct req_msg_field RMF_TGTUUID;
+extern struct req_msg_field RMF_CLUUID;
+extern struct req_msg_field RMF_SETINFO_VAL;
+extern struct req_msg_field RMF_SETINFO_KEY;
+extern struct req_msg_field RMF_GETINFO_VAL;
+extern struct req_msg_field RMF_GETINFO_VALLEN;
+extern struct req_msg_field RMF_GETINFO_KEY;
+extern struct req_msg_field RMF_IDX_INFO;
+
+/*
+ * connection handle received in MDS_CONNECT request.
+ */
+extern struct req_msg_field RMF_CONN;
+extern struct req_msg_field RMF_CONNECT_DATA;
+extern struct req_msg_field RMF_DLM_REQ;
+extern struct req_msg_field RMF_DLM_REP;
+extern struct req_msg_field RMF_DLM_LVB;
+extern struct req_msg_field RMF_DLM_GL_DESC;
+extern struct req_msg_field RMF_LDLM_INTENT;
+extern struct req_msg_field RMF_LAYOUT_INTENT;
+extern struct req_msg_field RMF_MDT_MD;
+extern struct req_msg_field RMF_REC_REINT;
+extern struct req_msg_field RMF_EADATA;
+extern struct req_msg_field RMF_ACL;
+extern struct req_msg_field RMF_LOGCOOKIES;
+extern struct req_msg_field RMF_CAPA1;
+extern struct req_msg_field RMF_CAPA2;
+extern struct req_msg_field RMF_OBD_QUOTACHECK;
+extern struct req_msg_field RMF_OBD_QUOTACTL;
+extern struct req_msg_field RMF_QUOTA_BODY;
+extern struct req_msg_field RMF_STRING;
+extern struct req_msg_field RMF_SWAP_LAYOUTS;
+extern struct req_msg_field RMF_MDS_HSM_PROGRESS;
+extern struct req_msg_field RMF_MDS_HSM_REQUEST;
+extern struct req_msg_field RMF_MDS_HSM_USER_ITEM;
+extern struct req_msg_field RMF_MDS_HSM_ARCHIVE;
+extern struct req_msg_field RMF_HSM_USER_STATE;
+extern struct req_msg_field RMF_HSM_STATE_SET;
+extern struct req_msg_field RMF_MDS_HSM_CURRENT_ACTION;
+extern struct req_msg_field RMF_MDS_HSM_REQUEST;
+
+/* seq-mgr fields */
+extern struct req_msg_field RMF_SEQ_OPC;
+extern struct req_msg_field RMF_SEQ_RANGE;
+extern struct req_msg_field RMF_FID_SPACE;
+
+/* FLD fields */
+extern struct req_msg_field RMF_FLD_OPC;
+extern struct req_msg_field RMF_FLD_MDFLD;
+
+extern struct req_msg_field RMF_LLOGD_BODY;
+extern struct req_msg_field RMF_LLOG_LOG_HDR;
+extern struct req_msg_field RMF_LLOGD_CONN_BODY;
+
+extern struct req_msg_field RMF_MGS_TARGET_INFO;
+extern struct req_msg_field RMF_MGS_SEND_PARAM;
+
+extern struct req_msg_field RMF_OST_BODY;
+extern struct req_msg_field RMF_OBD_IOOBJ;
+extern struct req_msg_field RMF_OBD_ID;
+extern struct req_msg_field RMF_FID;
+extern struct req_msg_field RMF_NIOBUF_REMOTE;
+extern struct req_msg_field RMF_RCS;
+extern struct req_msg_field RMF_FIEMAP_KEY;
+extern struct req_msg_field RMF_FIEMAP_VAL;
+extern struct req_msg_field RMF_OST_ID;
+
+/* MGS config read message format */
+extern struct req_msg_field RMF_MGS_CONFIG_BODY;
+extern struct req_msg_field RMF_MGS_CONFIG_RES;
+
+/* generic uint32 */
+extern struct req_msg_field RMF_U32;
+
+/* OBJ update format */
+extern struct req_msg_field RMF_UPDATE;
+extern struct req_msg_field RMF_UPDATE_REPLY;
+/** @} req_layout */
+
+#endif /* _LUSTRE_REQ_LAYOUT_H__ */
diff --git a/drivers/staging/lustre/lustre/include/lustre_sec.h b/drivers/staging/lustre/lustre/include/lustre_sec.h
new file mode 100644
index 000000000000..9e0908e1c4d6
--- /dev/null
+++ b/drivers/staging/lustre/lustre/include/lustre_sec.h
@@ -0,0 +1,1145 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ */
+
+#ifndef _LUSTRE_SEC_H_
+#define _LUSTRE_SEC_H_
+
+/** \defgroup sptlrpc sptlrpc
+ *
+ * @{
+ */
+
+/*
+ * to avoid include
+ */
+struct obd_import;
+struct obd_export;
+struct ptlrpc_request;
+struct ptlrpc_reply_state;
+struct ptlrpc_bulk_desc;
+struct brw_page;
+/* Linux specific */
+struct key;
+struct seq_file;
+
+/*
+ * forward declaration
+ */
+struct ptlrpc_sec_policy;
+struct ptlrpc_sec_cops;
+struct ptlrpc_sec_sops;
+struct ptlrpc_sec;
+struct ptlrpc_svc_ctx;
+struct ptlrpc_cli_ctx;
+struct ptlrpc_ctx_ops;
+
+/**
+ * \addtogroup flavor flavor
+ *
+ * RPC flavor is represented by a 32 bits integer. Currently the high 12 bits
+ * are unused, must be set to 0 for future expansion.
+ * <pre>
+ * ------------------------------------------------------------------------
+ * | 4b (bulk svc) | 4b (bulk type) | 4b (svc) | 4b (mech)  | 4b (policy) |
+ * ------------------------------------------------------------------------
+ * </pre>
+ *
+ * @{
+ */
+
+/*
+ * flavor constants
+ */
+enum sptlrpc_policy {
+	SPTLRPC_POLICY_NULL	     = 0,
+	SPTLRPC_POLICY_PLAIN	    = 1,
+	SPTLRPC_POLICY_GSS	      = 2,
+	SPTLRPC_POLICY_MAX,
+};
+
+enum sptlrpc_mech_null {
+	SPTLRPC_MECH_NULL	       = 0,
+	SPTLRPC_MECH_NULL_MAX,
+};
+
+enum sptlrpc_mech_plain {
+	SPTLRPC_MECH_PLAIN	      = 0,
+	SPTLRPC_MECH_PLAIN_MAX,
+};
+
+enum sptlrpc_mech_gss {
+	SPTLRPC_MECH_GSS_NULL	   = 0,
+	SPTLRPC_MECH_GSS_KRB5	   = 1,
+	SPTLRPC_MECH_GSS_MAX,
+};
+
+enum sptlrpc_service_type {
+	SPTLRPC_SVC_NULL		= 0,    /**< no security */
+	SPTLRPC_SVC_AUTH		= 1,    /**< authentication only */
+	SPTLRPC_SVC_INTG		= 2,    /**< integrity */
+	SPTLRPC_SVC_PRIV		= 3,    /**< privacy */
+	SPTLRPC_SVC_MAX,
+};
+
+enum sptlrpc_bulk_type {
+	SPTLRPC_BULK_DEFAULT	    = 0,    /**< follow rpc flavor */
+	SPTLRPC_BULK_HASH	       = 1,    /**< hash integrity */
+	SPTLRPC_BULK_MAX,
+};
+
+enum sptlrpc_bulk_service {
+	SPTLRPC_BULK_SVC_NULL	   = 0,    /**< no security */
+	SPTLRPC_BULK_SVC_AUTH	   = 1,    /**< authentication only */
+	SPTLRPC_BULK_SVC_INTG	   = 2,    /**< integrity */
+	SPTLRPC_BULK_SVC_PRIV	   = 3,    /**< privacy */
+	SPTLRPC_BULK_SVC_MAX,
+};
+
+/*
+ * compose/extract macros
+ */
+#define FLVR_POLICY_OFFSET	      (0)
+#define FLVR_MECH_OFFSET		(4)
+#define FLVR_SVC_OFFSET		 (8)
+#define FLVR_BULK_TYPE_OFFSET	   (12)
+#define FLVR_BULK_SVC_OFFSET	    (16)
+
+#define MAKE_FLVR(policy, mech, svc, btype, bsvc)		       \
+	(((__u32)(policy) << FLVR_POLICY_OFFSET) |		      \
+	 ((__u32)(mech) << FLVR_MECH_OFFSET) |			  \
+	 ((__u32)(svc) << FLVR_SVC_OFFSET) |			    \
+	 ((__u32)(btype) << FLVR_BULK_TYPE_OFFSET) |		    \
+	 ((__u32)(bsvc) << FLVR_BULK_SVC_OFFSET))
+
+/*
+ * extraction
+ */
+#define SPTLRPC_FLVR_POLICY(flavor)				     \
+	((((__u32)(flavor)) >> FLVR_POLICY_OFFSET) & 0xF)
+#define SPTLRPC_FLVR_MECH(flavor)				       \
+	((((__u32)(flavor)) >> FLVR_MECH_OFFSET) & 0xF)
+#define SPTLRPC_FLVR_SVC(flavor)					\
+	((((__u32)(flavor)) >> FLVR_SVC_OFFSET) & 0xF)
+#define SPTLRPC_FLVR_BULK_TYPE(flavor)				  \
+	((((__u32)(flavor)) >> FLVR_BULK_TYPE_OFFSET) & 0xF)
+#define SPTLRPC_FLVR_BULK_SVC(flavor)				   \
+	((((__u32)(flavor)) >> FLVR_BULK_SVC_OFFSET) & 0xF)
+
+#define SPTLRPC_FLVR_BASE(flavor)				       \
+	((((__u32)(flavor)) >> FLVR_POLICY_OFFSET) & 0xFFF)
+#define SPTLRPC_FLVR_BASE_SUB(flavor)				   \
+	((((__u32)(flavor)) >> FLVR_MECH_OFFSET) & 0xFF)
+
+/*
+ * gss subflavors
+ */
+#define MAKE_BASE_SUBFLVR(mech, svc)				    \
+	((__u32)(mech) |						\
+	 ((__u32)(svc) << (FLVR_SVC_OFFSET - FLVR_MECH_OFFSET)))
+
+#define SPTLRPC_SUBFLVR_KRB5N					   \
+	MAKE_BASE_SUBFLVR(SPTLRPC_MECH_GSS_KRB5, SPTLRPC_SVC_NULL)
+#define SPTLRPC_SUBFLVR_KRB5A					   \
+	MAKE_BASE_SUBFLVR(SPTLRPC_MECH_GSS_KRB5, SPTLRPC_SVC_AUTH)
+#define SPTLRPC_SUBFLVR_KRB5I					   \
+	MAKE_BASE_SUBFLVR(SPTLRPC_MECH_GSS_KRB5, SPTLRPC_SVC_INTG)
+#define SPTLRPC_SUBFLVR_KRB5P					   \
+	MAKE_BASE_SUBFLVR(SPTLRPC_MECH_GSS_KRB5, SPTLRPC_SVC_PRIV)
+
+/*
+ * "end user" flavors
+ */
+#define SPTLRPC_FLVR_NULL			       \
+	MAKE_FLVR(SPTLRPC_POLICY_NULL,		  \
+		  SPTLRPC_MECH_NULL,		    \
+		  SPTLRPC_SVC_NULL,		     \
+		  SPTLRPC_BULK_DEFAULT,		 \
+		  SPTLRPC_BULK_SVC_NULL)
+#define SPTLRPC_FLVR_PLAIN			      \
+	MAKE_FLVR(SPTLRPC_POLICY_PLAIN,		 \
+		  SPTLRPC_MECH_PLAIN,		   \
+		  SPTLRPC_SVC_NULL,		     \
+		  SPTLRPC_BULK_HASH,		    \
+		  SPTLRPC_BULK_SVC_INTG)
+#define SPTLRPC_FLVR_KRB5N			      \
+	MAKE_FLVR(SPTLRPC_POLICY_GSS,		   \
+		  SPTLRPC_MECH_GSS_KRB5,		\
+		  SPTLRPC_SVC_NULL,		     \
+		  SPTLRPC_BULK_DEFAULT,		 \
+		  SPTLRPC_BULK_SVC_NULL)
+#define SPTLRPC_FLVR_KRB5A			      \
+	MAKE_FLVR(SPTLRPC_POLICY_GSS,		   \
+		  SPTLRPC_MECH_GSS_KRB5,		\
+		  SPTLRPC_SVC_AUTH,		     \
+		  SPTLRPC_BULK_DEFAULT,		 \
+		  SPTLRPC_BULK_SVC_NULL)
+#define SPTLRPC_FLVR_KRB5I			      \
+	MAKE_FLVR(SPTLRPC_POLICY_GSS,		   \
+		  SPTLRPC_MECH_GSS_KRB5,		\
+		  SPTLRPC_SVC_INTG,		     \
+		  SPTLRPC_BULK_DEFAULT,		 \
+		  SPTLRPC_BULK_SVC_INTG)
+#define SPTLRPC_FLVR_KRB5P			      \
+	MAKE_FLVR(SPTLRPC_POLICY_GSS,		   \
+		  SPTLRPC_MECH_GSS_KRB5,		\
+		  SPTLRPC_SVC_PRIV,		     \
+		  SPTLRPC_BULK_DEFAULT,		 \
+		  SPTLRPC_BULK_SVC_PRIV)
+
+#define SPTLRPC_FLVR_DEFAULT	    SPTLRPC_FLVR_NULL
+
+#define SPTLRPC_FLVR_INVALID	    ((__u32) 0xFFFFFFFF)
+#define SPTLRPC_FLVR_ANY		((__u32) 0xFFF00000)
+
+/**
+ * extract the useful part from wire flavor
+ */
+#define WIRE_FLVR(wflvr)		(((__u32) (wflvr)) & 0x000FFFFF)
+
+/** @} flavor */
+
+static inline void flvr_set_svc(__u32 *flvr, __u32 svc)
+{
+	LASSERT(svc < SPTLRPC_SVC_MAX);
+	*flvr = MAKE_FLVR(SPTLRPC_FLVR_POLICY(*flvr),
+			  SPTLRPC_FLVR_MECH(*flvr),
+			  svc,
+			  SPTLRPC_FLVR_BULK_TYPE(*flvr),
+			  SPTLRPC_FLVR_BULK_SVC(*flvr));
+}
+
+static inline void flvr_set_bulk_svc(__u32 *flvr, __u32 svc)
+{
+	LASSERT(svc < SPTLRPC_BULK_SVC_MAX);
+	*flvr = MAKE_FLVR(SPTLRPC_FLVR_POLICY(*flvr),
+			  SPTLRPC_FLVR_MECH(*flvr),
+			  SPTLRPC_FLVR_SVC(*flvr),
+			  SPTLRPC_FLVR_BULK_TYPE(*flvr),
+			  svc);
+}
+
+struct bulk_spec_hash {
+	__u8    hash_alg;
+};
+
+/**
+ * Full description of flavors being used on a ptlrpc connection, include
+ * both regular RPC and bulk transfer parts.
+ */
+struct sptlrpc_flavor {
+	/**
+	 * wire flavor, should be renamed to sf_wire.
+	 */
+	__u32   sf_rpc;
+	/**
+	 * general flags of PTLRPC_SEC_FL_*
+	 */
+	__u32   sf_flags;
+	/**
+	 * rpc flavor specification
+	 */
+	union {
+		/* nothing for now */
+	} u_rpc;
+	/**
+	 * bulk flavor specification
+	 */
+	union {
+		struct bulk_spec_hash hash;
+	} u_bulk;
+};
+
+/**
+ * identify the RPC is generated from what part of Lustre. It's encoded into
+ * RPC requests and to be checked by ptlrpc service.
+ */
+enum lustre_sec_part {
+	LUSTRE_SP_CLI	   = 0,
+	LUSTRE_SP_MDT,
+	LUSTRE_SP_OST,
+	LUSTRE_SP_MGC,
+	LUSTRE_SP_MGS,
+	LUSTRE_SP_ANY	   = 0xFF
+};
+
+const char *sptlrpc_part2name(enum lustre_sec_part sp);
+enum lustre_sec_part sptlrpc_target_sec_part(struct obd_device *obd);
+
+/**
+ * A rule specifies a flavor to be used by a ptlrpc connection between
+ * two Lustre parts.
+ */
+struct sptlrpc_rule {
+	__u32		   sr_netid;   /* LNET network ID */
+	__u8		    sr_from;    /* sec_part */
+	__u8		    sr_to;      /* sec_part */
+	__u16		   sr_padding;
+	struct sptlrpc_flavor   sr_flvr;
+};
+
+/**
+ * A set of rules in memory.
+ *
+ * Rules are generated and stored on MGS, and propagated to MDT, OST,
+ * and client when needed.
+ */
+struct sptlrpc_rule_set {
+	int		     srs_nslot;
+	int		     srs_nrule;
+	struct sptlrpc_rule    *srs_rules;
+};
+
+int sptlrpc_parse_flavor(const char *str, struct sptlrpc_flavor *flvr);
+int sptlrpc_flavor_has_bulk(struct sptlrpc_flavor *flvr);
+
+static inline void sptlrpc_rule_set_init(struct sptlrpc_rule_set *set)
+{
+	memset(set, 0, sizeof(*set));
+}
+
+void sptlrpc_rule_set_free(struct sptlrpc_rule_set *set);
+int  sptlrpc_rule_set_expand(struct sptlrpc_rule_set *set);
+int  sptlrpc_rule_set_merge(struct sptlrpc_rule_set *set,
+			    struct sptlrpc_rule *rule);
+int sptlrpc_rule_set_choose(struct sptlrpc_rule_set *rset,
+			    enum lustre_sec_part from,
+			    enum lustre_sec_part to,
+			    lnet_nid_t nid,
+			    struct sptlrpc_flavor *sf);
+void sptlrpc_rule_set_dump(struct sptlrpc_rule_set *set);
+
+int  sptlrpc_process_config(struct lustre_cfg *lcfg);
+void sptlrpc_conf_log_start(const char *logname);
+void sptlrpc_conf_log_stop(const char *logname);
+void sptlrpc_conf_log_update_begin(const char *logname);
+void sptlrpc_conf_log_update_end(const char *logname);
+void sptlrpc_conf_client_adapt(struct obd_device *obd);
+int  sptlrpc_conf_target_get_rules(struct obd_device *obd,
+				   struct sptlrpc_rule_set *rset,
+				   int initial);
+void sptlrpc_target_choose_flavor(struct sptlrpc_rule_set *rset,
+				  enum lustre_sec_part from,
+				  lnet_nid_t nid,
+				  struct sptlrpc_flavor *flavor);
+
+/* The maximum length of security payload. 1024 is enough for Kerberos 5,
+ * and should be enough for other future mechanisms but not sure.
+ * Only used by pre-allocated request/reply pool.
+ */
+#define SPTLRPC_MAX_PAYLOAD     (1024)
+
+
+struct vfs_cred {
+	uint32_t	vc_uid;
+	uint32_t	vc_gid;
+};
+
+struct ptlrpc_ctx_ops {
+	/**
+	 * To determine whether it's suitable to use the \a ctx for \a vcred.
+	 */
+	int     (*match)       (struct ptlrpc_cli_ctx *ctx,
+				struct vfs_cred *vcred);
+
+	/**
+	 * To bring the \a ctx uptodate.
+	 */
+	int     (*refresh)     (struct ptlrpc_cli_ctx *ctx);
+
+	/**
+	 * Validate the \a ctx.
+	 */
+	int     (*validate)    (struct ptlrpc_cli_ctx *ctx);
+
+	/**
+	 * Force the \a ctx to die.
+	 */
+	void    (*die)	 (struct ptlrpc_cli_ctx *ctx,
+				int grace);
+	int     (*display)     (struct ptlrpc_cli_ctx *ctx,
+				char *buf, int bufsize);
+
+	/**
+	 * Sign the request message using \a ctx.
+	 *
+	 * \pre req->rq_reqmsg point to request message.
+	 * \pre req->rq_reqlen is the request message length.
+	 * \post req->rq_reqbuf point to request message with signature.
+	 * \post req->rq_reqdata_len is set to the final request message size.
+	 *
+	 * \see null_ctx_sign(), plain_ctx_sign(), gss_cli_ctx_sign().
+	 */
+	int     (*sign)	(struct ptlrpc_cli_ctx *ctx,
+				struct ptlrpc_request *req);
+
+	/**
+	 * Verify the reply message using \a ctx.
+	 *
+	 * \pre req->rq_repdata point to reply message with signature.
+	 * \pre req->rq_repdata_len is the total reply message length.
+	 * \post req->rq_repmsg point to reply message without signature.
+	 * \post req->rq_replen is the reply message length.
+	 *
+	 * \see null_ctx_verify(), plain_ctx_verify(), gss_cli_ctx_verify().
+	 */
+	int     (*verify)      (struct ptlrpc_cli_ctx *ctx,
+				struct ptlrpc_request *req);
+
+	/**
+	 * Encrypt the request message using \a ctx.
+	 *
+	 * \pre req->rq_reqmsg point to request message in clear text.
+	 * \pre req->rq_reqlen is the request message length.
+	 * \post req->rq_reqbuf point to request message.
+	 * \post req->rq_reqdata_len is set to the final request message size.
+	 *
+	 * \see gss_cli_ctx_seal().
+	 */
+	int     (*seal)	(struct ptlrpc_cli_ctx *ctx,
+				struct ptlrpc_request *req);
+
+	/**
+	 * Decrypt the reply message using \a ctx.
+	 *
+	 * \pre req->rq_repdata point to encrypted reply message.
+	 * \pre req->rq_repdata_len is the total cipher text length.
+	 * \post req->rq_repmsg point to reply message in clear text.
+	 * \post req->rq_replen is the reply message length in clear text.
+	 *
+	 * \see gss_cli_ctx_unseal().
+	 */
+	int     (*unseal)      (struct ptlrpc_cli_ctx *ctx,
+				struct ptlrpc_request *req);
+
+	/**
+	 * Wrap bulk request data. This is called before wrapping RPC
+	 * request message.
+	 *
+	 * \pre bulk buffer is descripted by desc->bd_iov and
+	 * desc->bd_iov_count. note for read it's just buffer, no data
+	 * need to be sent;  for write it contains data in clear text.
+	 * \post when necessary, ptlrpc_bulk_sec_desc was properly prepared
+	 * (usually inside of RPC request message).
+	 * - encryption: cipher text bulk buffer is descripted by
+	 *   desc->bd_enc_iov and desc->bd_iov_count (currently assume iov
+	 *   count remains the same).
+	 * - otherwise: bulk buffer is still desc->bd_iov and
+	 *   desc->bd_iov_count.
+	 *
+	 * \return 0: success.
+	 * \return -ev: error code.
+	 *
+	 * \see plain_cli_wrap_bulk(), gss_cli_ctx_wrap_bulk().
+	 */
+	int     (*wrap_bulk)   (struct ptlrpc_cli_ctx *ctx,
+				struct ptlrpc_request *req,
+				struct ptlrpc_bulk_desc *desc);
+
+	/**
+	 * Unwrap bulk reply data. This is called after wrapping RPC
+	 * reply message.
+	 *
+	 * \pre bulk buffer is descripted by desc->bd_iov/desc->bd_enc_iov and
+	 * desc->bd_iov_count, according to wrap_bulk().
+	 * \post final bulk data in clear text is placed in buffer described
+	 * by desc->bd_iov and desc->bd_iov_count.
+	 * \return +ve nob of actual bulk data in clear text.
+	 * \return -ve error code.
+	 *
+	 * \see plain_cli_unwrap_bulk(), gss_cli_ctx_unwrap_bulk().
+	 */
+	int     (*unwrap_bulk) (struct ptlrpc_cli_ctx *ctx,
+				struct ptlrpc_request *req,
+				struct ptlrpc_bulk_desc *desc);
+};
+
+#define PTLRPC_CTX_NEW_BIT	     (0)  /* newly created */
+#define PTLRPC_CTX_UPTODATE_BIT	(1)  /* uptodate */
+#define PTLRPC_CTX_DEAD_BIT	    (2)  /* mark expired gracefully */
+#define PTLRPC_CTX_ERROR_BIT	   (3)  /* fatal error (refresh, etc.) */
+#define PTLRPC_CTX_CACHED_BIT	  (8)  /* in ctx cache (hash etc.) */
+#define PTLRPC_CTX_ETERNAL_BIT	 (9)  /* always valid */
+
+#define PTLRPC_CTX_NEW		 (1 << PTLRPC_CTX_NEW_BIT)
+#define PTLRPC_CTX_UPTODATE	    (1 << PTLRPC_CTX_UPTODATE_BIT)
+#define PTLRPC_CTX_DEAD		(1 << PTLRPC_CTX_DEAD_BIT)
+#define PTLRPC_CTX_ERROR	       (1 << PTLRPC_CTX_ERROR_BIT)
+#define PTLRPC_CTX_CACHED	      (1 << PTLRPC_CTX_CACHED_BIT)
+#define PTLRPC_CTX_ETERNAL	     (1 << PTLRPC_CTX_ETERNAL_BIT)
+
+#define PTLRPC_CTX_STATUS_MASK	 (PTLRPC_CTX_NEW_BIT    |       \
+					PTLRPC_CTX_UPTODATE   |       \
+					PTLRPC_CTX_DEAD       |       \
+					PTLRPC_CTX_ERROR)
+
+struct ptlrpc_cli_ctx {
+	struct hlist_node	cc_cache;      /* linked into ctx cache */
+	atomic_t	    cc_refcount;
+	struct ptlrpc_sec      *cc_sec;
+	struct ptlrpc_ctx_ops  *cc_ops;
+	cfs_time_t	      cc_expire;     /* in seconds */
+	unsigned int	    cc_early_expire:1;
+	unsigned long	   cc_flags;
+	struct vfs_cred	 cc_vcred;
+	spinlock_t		cc_lock;
+	struct list_head	      cc_req_list;   /* waiting reqs linked here */
+	struct list_head	      cc_gc_chain;   /* linked to gc chain */
+};
+
+/**
+ * client side policy operation vector.
+ */
+struct ptlrpc_sec_cops {
+	/**
+	 * Given an \a imp, create and initialize a ptlrpc_sec structure.
+	 * \param ctx service context:
+	 * - regular import: \a ctx should be NULL;
+	 * - reverse import: \a ctx is obtained from incoming request.
+	 * \param flavor specify what flavor to use.
+	 *
+	 * When necessary, policy module is responsible for taking reference
+	 * on the import.
+	 *
+	 * \see null_create_sec(), plain_create_sec(), gss_sec_create_kr().
+	 */
+	struct ptlrpc_sec *     (*create_sec)  (struct obd_import *imp,
+						struct ptlrpc_svc_ctx *ctx,
+						struct sptlrpc_flavor *flavor);
+
+	/**
+	 * Destructor of ptlrpc_sec. When called, refcount has been dropped
+	 * to 0 and all contexts has been destroyed.
+	 *
+	 * \see null_destroy_sec(), plain_destroy_sec(), gss_sec_destroy_kr().
+	 */
+	void		    (*destroy_sec) (struct ptlrpc_sec *sec);
+
+	/**
+	 * Notify that this ptlrpc_sec is going to die. Optionally, policy
+	 * module is supposed to set sec->ps_dying and whatever necessary
+	 * actions.
+	 *
+	 * \see plain_kill_sec(), gss_sec_kill().
+	 */
+	void		    (*kill_sec)    (struct ptlrpc_sec *sec);
+
+	/**
+	 * Given \a vcred, lookup and/or create its context. The policy module
+	 * is supposed to maintain its own context cache.
+	 * XXX currently \a create and \a remove_dead is always 1, perhaps
+	 * should be removed completely.
+	 *
+	 * \see null_lookup_ctx(), plain_lookup_ctx(), gss_sec_lookup_ctx_kr().
+	 */
+	struct ptlrpc_cli_ctx * (*lookup_ctx)  (struct ptlrpc_sec *sec,
+						struct vfs_cred *vcred,
+						int create,
+						int remove_dead);
+
+	/**
+	 * Called then the reference of \a ctx dropped to 0. The policy module
+	 * is supposed to destroy this context or whatever else according to
+	 * its cache maintainance mechamism.
+	 *
+	 * \param sync if zero, we shouldn't wait for the context being
+	 * destroyed completely.
+	 *
+	 * \see plain_release_ctx(), gss_sec_release_ctx_kr().
+	 */
+	void		    (*release_ctx) (struct ptlrpc_sec *sec,
+						struct ptlrpc_cli_ctx *ctx,
+						int sync);
+
+	/**
+	 * Flush the context cache.
+	 *
+	 * \param uid context of which user, -1 means all contexts.
+	 * \param grace if zero, the PTLRPC_CTX_UPTODATE_BIT of affected
+	 * contexts should be cleared immediately.
+	 * \param force if zero, only idle contexts will be flushed.
+	 *
+	 * \see plain_flush_ctx_cache(), gss_sec_flush_ctx_cache_kr().
+	 */
+	int		     (*flush_ctx_cache)
+					       (struct ptlrpc_sec *sec,
+						uid_t uid,
+						int grace,
+						int force);
+
+	/**
+	 * Called periodically by garbage collector to remove dead contexts
+	 * from cache.
+	 *
+	 * \see gss_sec_gc_ctx_kr().
+	 */
+	void		    (*gc_ctx)      (struct ptlrpc_sec *sec);
+
+	/**
+	 * Given an context \a ctx, install a corresponding reverse service
+	 * context on client side.
+	 * XXX currently it's only used by GSS module, maybe we should remove
+	 * this from general API.
+	 */
+	int		     (*install_rctx)(struct obd_import *imp,
+						struct ptlrpc_sec *sec,
+						struct ptlrpc_cli_ctx *ctx);
+
+	/**
+	 * To allocate request buffer for \a req.
+	 *
+	 * \pre req->rq_reqmsg == NULL.
+	 * \pre req->rq_reqbuf == NULL, otherwise it must be pre-allocated,
+	 * we are not supposed to free it.
+	 * \post if success, req->rq_reqmsg point to a buffer with size
+	 * at least \a lustre_msg_size.
+	 *
+	 * \see null_alloc_reqbuf(), plain_alloc_reqbuf(), gss_alloc_reqbuf().
+	 */
+	int		     (*alloc_reqbuf)(struct ptlrpc_sec *sec,
+						struct ptlrpc_request *req,
+						int lustre_msg_size);
+
+	/**
+	 * To free request buffer for \a req.
+	 *
+	 * \pre req->rq_reqbuf != NULL.
+	 *
+	 * \see null_free_reqbuf(), plain_free_reqbuf(), gss_free_reqbuf().
+	 */
+	void		    (*free_reqbuf) (struct ptlrpc_sec *sec,
+						struct ptlrpc_request *req);
+
+	/**
+	 * To allocate reply buffer for \a req.
+	 *
+	 * \pre req->rq_repbuf == NULL.
+	 * \post if success, req->rq_repbuf point to a buffer with size
+	 * req->rq_repbuf_len, the size should be large enough to receive
+	 * reply which be transformed from \a lustre_msg_size of clear text.
+	 *
+	 * \see null_alloc_repbuf(), plain_alloc_repbuf(), gss_alloc_repbuf().
+	 */
+	int		     (*alloc_repbuf)(struct ptlrpc_sec *sec,
+						struct ptlrpc_request *req,
+						int lustre_msg_size);
+
+	/**
+	 * To free reply buffer for \a req.
+	 *
+	 * \pre req->rq_repbuf != NULL.
+	 * \post req->rq_repbuf == NULL.
+	 * \post req->rq_repbuf_len == 0.
+	 *
+	 * \see null_free_repbuf(), plain_free_repbuf(), gss_free_repbuf().
+	 */
+	void		    (*free_repbuf) (struct ptlrpc_sec *sec,
+						struct ptlrpc_request *req);
+
+	/**
+	 * To expand the request buffer of \a req, thus the \a segment in
+	 * the request message pointed by req->rq_reqmsg can accommodate
+	 * at least \a newsize of data.
+	 *
+	 * \pre req->rq_reqmsg->lm_buflens[segment] < newsize.
+	 *
+	 * \see null_enlarge_reqbuf(), plain_enlarge_reqbuf(),
+	 * gss_enlarge_reqbuf().
+	 */
+	int		     (*enlarge_reqbuf)
+					       (struct ptlrpc_sec *sec,
+						struct ptlrpc_request *req,
+						int segment, int newsize);
+	/*
+	 * misc
+	 */
+	int		     (*display)     (struct ptlrpc_sec *sec,
+						struct seq_file *seq);
+};
+
+/**
+ * server side policy operation vector.
+ */
+struct ptlrpc_sec_sops {
+	/**
+	 * verify an incoming request.
+	 *
+	 * \pre request message is pointed by req->rq_reqbuf, size is
+	 * req->rq_reqdata_len; and the message has been unpacked to
+	 * host byte order.
+	 *
+	 * \retval SECSVC_OK success, req->rq_reqmsg point to request message
+	 * in clear text, size is req->rq_reqlen; req->rq_svc_ctx is set;
+	 * req->rq_sp_from is decoded from request.
+	 * \retval SECSVC_COMPLETE success, the request has been fully
+	 * processed, and reply message has been prepared; req->rq_sp_from is
+	 * decoded from request.
+	 * \retval SECSVC_DROP failed, this request should be dropped.
+	 *
+	 * \see null_accept(), plain_accept(), gss_svc_accept_kr().
+	 */
+	int		     (*accept)      (struct ptlrpc_request *req);
+
+	/**
+	 * Perform security transformation upon reply message.
+	 *
+	 * \pre reply message is pointed by req->rq_reply_state->rs_msg, size
+	 * is req->rq_replen.
+	 * \post req->rs_repdata_len is the final message size.
+	 * \post req->rq_reply_off is set.
+	 *
+	 * \see null_authorize(), plain_authorize(), gss_svc_authorize().
+	 */
+	int		     (*authorize)   (struct ptlrpc_request *req);
+
+	/**
+	 * Invalidate server context \a ctx.
+	 *
+	 * \see gss_svc_invalidate_ctx().
+	 */
+	void		    (*invalidate_ctx)
+					       (struct ptlrpc_svc_ctx *ctx);
+
+	/**
+	 * Allocate a ptlrpc_reply_state.
+	 *
+	 * \param msgsize size of the reply message in clear text.
+	 * \pre if req->rq_reply_state != NULL, then it's pre-allocated, we
+	 * should simply use it; otherwise we'll responsible for allocating
+	 * a new one.
+	 * \post req->rq_reply_state != NULL;
+	 * \post req->rq_reply_state->rs_msg != NULL;
+	 *
+	 * \see null_alloc_rs(), plain_alloc_rs(), gss_svc_alloc_rs().
+	 */
+	int		     (*alloc_rs)    (struct ptlrpc_request *req,
+						int msgsize);
+
+	/**
+	 * Free a ptlrpc_reply_state.
+	 */
+	void		    (*free_rs)     (struct ptlrpc_reply_state *rs);
+
+	/**
+	 * Release the server context \a ctx.
+	 *
+	 * \see gss_svc_free_ctx().
+	 */
+	void		    (*free_ctx)    (struct ptlrpc_svc_ctx *ctx);
+
+	/**
+	 * Install a reverse context based on the server context \a ctx.
+	 *
+	 * \see gss_svc_install_rctx_kr().
+	 */
+	int		     (*install_rctx)(struct obd_import *imp,
+						struct ptlrpc_svc_ctx *ctx);
+
+	/**
+	 * Prepare buffer for incoming bulk write.
+	 *
+	 * \pre desc->bd_iov and desc->bd_iov_count describes the buffer
+	 * intended to receive the write.
+	 *
+	 * \see gss_svc_prep_bulk().
+	 */
+	int		     (*prep_bulk)   (struct ptlrpc_request *req,
+						struct ptlrpc_bulk_desc *desc);
+
+	/**
+	 * Unwrap the bulk write data.
+	 *
+	 * \see plain_svc_unwrap_bulk(), gss_svc_unwrap_bulk().
+	 */
+	int		     (*unwrap_bulk) (struct ptlrpc_request *req,
+						struct ptlrpc_bulk_desc *desc);
+
+	/**
+	 * Wrap the bulk read data.
+	 *
+	 * \see plain_svc_wrap_bulk(), gss_svc_wrap_bulk().
+	 */
+	int		     (*wrap_bulk)   (struct ptlrpc_request *req,
+						struct ptlrpc_bulk_desc *desc);
+};
+
+struct ptlrpc_sec_policy {
+	module_t		   *sp_owner;
+	char			   *sp_name;
+	__u16			   sp_policy; /* policy number */
+	struct ptlrpc_sec_cops	 *sp_cops;   /* client ops */
+	struct ptlrpc_sec_sops	 *sp_sops;   /* server ops */
+};
+
+#define PTLRPC_SEC_FL_REVERSE	   0x0001 /* reverse sec */
+#define PTLRPC_SEC_FL_ROOTONLY	  0x0002 /* treat everyone as root */
+#define PTLRPC_SEC_FL_UDESC	     0x0004 /* ship udesc */
+#define PTLRPC_SEC_FL_BULK	      0x0008 /* intensive bulk i/o expected */
+#define PTLRPC_SEC_FL_PAG	       0x0010 /* PAG mode */
+
+/**
+ * The ptlrpc_sec represents the client side ptlrpc security facilities,
+ * each obd_import (both regular and reverse import) must associate with
+ * a ptlrpc_sec.
+ *
+ * \see sptlrpc_import_sec_adapt().
+ */
+struct ptlrpc_sec {
+	struct ptlrpc_sec_policy       *ps_policy;
+	atomic_t		    ps_refcount;
+	/** statistic only */
+	atomic_t		    ps_nctx;
+	/** unique identifier */
+	int			     ps_id;
+	struct sptlrpc_flavor	   ps_flvr;
+	enum lustre_sec_part	    ps_part;
+	/** after set, no more new context will be created */
+	unsigned int		    ps_dying:1;
+	/** owning import */
+	struct obd_import	      *ps_import;
+	spinlock_t			ps_lock;
+
+	/*
+	 * garbage collection
+	 */
+	struct list_head		      ps_gc_list;
+	cfs_time_t		      ps_gc_interval; /* in seconds */
+	cfs_time_t		      ps_gc_next;     /* in seconds */
+};
+
+static inline int sec_is_reverse(struct ptlrpc_sec *sec)
+{
+	return (sec->ps_flvr.sf_flags & PTLRPC_SEC_FL_REVERSE);
+}
+
+static inline int sec_is_rootonly(struct ptlrpc_sec *sec)
+{
+	return (sec->ps_flvr.sf_flags & PTLRPC_SEC_FL_ROOTONLY);
+}
+
+
+struct ptlrpc_svc_ctx {
+	atomic_t		    sc_refcount;
+	struct ptlrpc_sec_policy       *sc_policy;
+};
+
+/*
+ * user identity descriptor
+ */
+#define LUSTRE_MAX_GROUPS	       (128)
+
+struct ptlrpc_user_desc {
+	__u32	   pud_uid;
+	__u32	   pud_gid;
+	__u32	   pud_fsuid;
+	__u32	   pud_fsgid;
+	__u32	   pud_cap;
+	__u32	   pud_ngroups;
+	__u32	   pud_groups[0];
+};
+
+/*
+ * bulk flavors
+ */
+enum sptlrpc_bulk_hash_alg {
+	BULK_HASH_ALG_NULL      = 0,
+	BULK_HASH_ALG_ADLER32,
+	BULK_HASH_ALG_CRC32,
+	BULK_HASH_ALG_MD5,
+	BULK_HASH_ALG_SHA1,
+	BULK_HASH_ALG_SHA256,
+	BULK_HASH_ALG_SHA384,
+	BULK_HASH_ALG_SHA512,
+	BULK_HASH_ALG_MAX
+};
+
+const char * sptlrpc_get_hash_name(__u8 hash_alg);
+__u8 sptlrpc_get_hash_alg(const char *algname);
+
+enum {
+	BSD_FL_ERR      = 1,
+};
+
+struct ptlrpc_bulk_sec_desc {
+	__u8	    bsd_version;    /* 0 */
+	__u8	    bsd_type;       /* SPTLRPC_BULK_XXX */
+	__u8	    bsd_svc;	/* SPTLRPC_BULK_SVC_XXXX */
+	__u8	    bsd_flags;      /* flags */
+	__u32	   bsd_nob;	/* nob of bulk data */
+	__u8	    bsd_data[0];    /* policy-specific token */
+};
+
+
+/*
+ * lprocfs
+ */
+struct proc_dir_entry;
+extern struct proc_dir_entry *sptlrpc_proc_root;
+
+/*
+ * round size up to next power of 2, for slab allocation.
+ * @size must be sane (can't overflow after round up)
+ */
+static inline int size_roundup_power2(int size)
+{
+	size--;
+	size |= size >> 1;
+	size |= size >> 2;
+	size |= size >> 4;
+	size |= size >> 8;
+	size |= size >> 16;
+	size++;
+	return size;
+}
+
+/*
+ * internal support libraries
+ */
+void _sptlrpc_enlarge_msg_inplace(struct lustre_msg *msg,
+				  int segment, int newsize);
+
+/*
+ * security policies
+ */
+int sptlrpc_register_policy(struct ptlrpc_sec_policy *policy);
+int sptlrpc_unregister_policy(struct ptlrpc_sec_policy *policy);
+
+__u32 sptlrpc_name2flavor_base(const char *name);
+const char *sptlrpc_flavor2name_base(__u32 flvr);
+char *sptlrpc_flavor2name_bulk(struct sptlrpc_flavor *sf,
+			       char *buf, int bufsize);
+char *sptlrpc_flavor2name(struct sptlrpc_flavor *sf, char *buf, int bufsize);
+char *sptlrpc_secflags2str(__u32 flags, char *buf, int bufsize);
+
+static inline
+struct ptlrpc_sec_policy *sptlrpc_policy_get(struct ptlrpc_sec_policy *policy)
+{
+	__module_get(policy->sp_owner);
+	return policy;
+}
+
+static inline
+void sptlrpc_policy_put(struct ptlrpc_sec_policy *policy)
+{
+	module_put(policy->sp_owner);
+}
+
+/*
+ * client credential
+ */
+static inline
+unsigned long cli_ctx_status(struct ptlrpc_cli_ctx *ctx)
+{
+	return (ctx->cc_flags & PTLRPC_CTX_STATUS_MASK);
+}
+
+static inline
+int cli_ctx_is_ready(struct ptlrpc_cli_ctx *ctx)
+{
+	return (cli_ctx_status(ctx) == PTLRPC_CTX_UPTODATE);
+}
+
+static inline
+int cli_ctx_is_refreshed(struct ptlrpc_cli_ctx *ctx)
+{
+	return (cli_ctx_status(ctx) != 0);
+}
+
+static inline
+int cli_ctx_is_uptodate(struct ptlrpc_cli_ctx *ctx)
+{
+	return ((ctx->cc_flags & PTLRPC_CTX_UPTODATE) != 0);
+}
+
+static inline
+int cli_ctx_is_error(struct ptlrpc_cli_ctx *ctx)
+{
+	return ((ctx->cc_flags & PTLRPC_CTX_ERROR) != 0);
+}
+
+static inline
+int cli_ctx_is_dead(struct ptlrpc_cli_ctx *ctx)
+{
+	return ((ctx->cc_flags & (PTLRPC_CTX_DEAD | PTLRPC_CTX_ERROR)) != 0);
+}
+
+static inline
+int cli_ctx_is_eternal(struct ptlrpc_cli_ctx *ctx)
+{
+	return ((ctx->cc_flags & PTLRPC_CTX_ETERNAL) != 0);
+}
+
+/*
+ * sec get/put
+ */
+struct ptlrpc_sec *sptlrpc_sec_get(struct ptlrpc_sec *sec);
+void sptlrpc_sec_put(struct ptlrpc_sec *sec);
+
+/*
+ * internal apis which only used by policy impelentation
+ */
+int  sptlrpc_get_next_secid(void);
+void sptlrpc_sec_destroy(struct ptlrpc_sec *sec);
+
+/*
+ * exported client context api
+ */
+struct ptlrpc_cli_ctx *sptlrpc_cli_ctx_get(struct ptlrpc_cli_ctx *ctx);
+void sptlrpc_cli_ctx_put(struct ptlrpc_cli_ctx *ctx, int sync);
+void sptlrpc_cli_ctx_expire(struct ptlrpc_cli_ctx *ctx);
+void sptlrpc_cli_ctx_wakeup(struct ptlrpc_cli_ctx *ctx);
+int sptlrpc_cli_ctx_display(struct ptlrpc_cli_ctx *ctx, char *buf, int bufsize);
+
+/*
+ * exported client context wrap/buffers
+ */
+int sptlrpc_cli_wrap_request(struct ptlrpc_request *req);
+int sptlrpc_cli_unwrap_reply(struct ptlrpc_request *req);
+int sptlrpc_cli_alloc_reqbuf(struct ptlrpc_request *req, int msgsize);
+void sptlrpc_cli_free_reqbuf(struct ptlrpc_request *req);
+int sptlrpc_cli_alloc_repbuf(struct ptlrpc_request *req, int msgsize);
+void sptlrpc_cli_free_repbuf(struct ptlrpc_request *req);
+int sptlrpc_cli_enlarge_reqbuf(struct ptlrpc_request *req,
+			       int segment, int newsize);
+int  sptlrpc_cli_unwrap_early_reply(struct ptlrpc_request *req,
+				    struct ptlrpc_request **req_ret);
+void sptlrpc_cli_finish_early_reply(struct ptlrpc_request *early_req);
+
+void sptlrpc_request_out_callback(struct ptlrpc_request *req);
+
+/*
+ * exported higher interface of import & request
+ */
+int sptlrpc_import_sec_adapt(struct obd_import *imp,
+			     struct ptlrpc_svc_ctx *ctx,
+			     struct sptlrpc_flavor *flvr);
+struct ptlrpc_sec *sptlrpc_import_sec_ref(struct obd_import *imp);
+void sptlrpc_import_sec_put(struct obd_import *imp);
+
+int  sptlrpc_import_check_ctx(struct obd_import *imp);
+void sptlrpc_import_flush_root_ctx(struct obd_import *imp);
+void sptlrpc_import_flush_my_ctx(struct obd_import *imp);
+void sptlrpc_import_flush_all_ctx(struct obd_import *imp);
+int  sptlrpc_req_get_ctx(struct ptlrpc_request *req);
+void sptlrpc_req_put_ctx(struct ptlrpc_request *req, int sync);
+int  sptlrpc_req_refresh_ctx(struct ptlrpc_request *req, long timeout);
+int  sptlrpc_req_replace_dead_ctx(struct ptlrpc_request *req);
+void sptlrpc_req_set_flavor(struct ptlrpc_request *req, int opcode);
+
+int sptlrpc_parse_rule(char *param, struct sptlrpc_rule *rule);
+
+/* gc */
+void sptlrpc_gc_add_sec(struct ptlrpc_sec *sec);
+void sptlrpc_gc_del_sec(struct ptlrpc_sec *sec);
+void sptlrpc_gc_add_ctx(struct ptlrpc_cli_ctx *ctx);
+
+/* misc */
+const char * sec2target_str(struct ptlrpc_sec *sec);
+int sptlrpc_lprocfs_cliobd_attach(struct obd_device *dev);
+
+/*
+ * server side
+ */
+enum secsvc_accept_res {
+	SECSVC_OK       = 0,
+	SECSVC_COMPLETE,
+	SECSVC_DROP,
+};
+
+int  sptlrpc_svc_unwrap_request(struct ptlrpc_request *req);
+int  sptlrpc_svc_alloc_rs(struct ptlrpc_request *req, int msglen);
+int  sptlrpc_svc_wrap_reply(struct ptlrpc_request *req);
+void sptlrpc_svc_free_rs(struct ptlrpc_reply_state *rs);
+void sptlrpc_svc_ctx_addref(struct ptlrpc_request *req);
+void sptlrpc_svc_ctx_decref(struct ptlrpc_request *req);
+void sptlrpc_svc_ctx_invalidate(struct ptlrpc_request *req);
+
+int  sptlrpc_target_export_check(struct obd_export *exp,
+				 struct ptlrpc_request *req);
+void sptlrpc_target_update_exp_flavor(struct obd_device *obd,
+				      struct sptlrpc_rule_set *rset);
+
+/*
+ * reverse context
+ */
+int sptlrpc_svc_install_rvs_ctx(struct obd_import *imp,
+				struct ptlrpc_svc_ctx *ctx);
+int sptlrpc_cli_install_rvs_ctx(struct obd_import *imp,
+				struct ptlrpc_cli_ctx *ctx);
+
+/* bulk security api */
+int sptlrpc_enc_pool_add_user(void);
+int sptlrpc_enc_pool_del_user(void);
+int  sptlrpc_enc_pool_get_pages(struct ptlrpc_bulk_desc *desc);
+void sptlrpc_enc_pool_put_pages(struct ptlrpc_bulk_desc *desc);
+
+int sptlrpc_cli_wrap_bulk(struct ptlrpc_request *req,
+			  struct ptlrpc_bulk_desc *desc);
+int sptlrpc_cli_unwrap_bulk_read(struct ptlrpc_request *req,
+				 struct ptlrpc_bulk_desc *desc,
+				 int nob);
+int sptlrpc_cli_unwrap_bulk_write(struct ptlrpc_request *req,
+				  struct ptlrpc_bulk_desc *desc);
+
+/* bulk helpers (internal use only by policies) */
+int sptlrpc_get_bulk_checksum(struct ptlrpc_bulk_desc *desc, __u8 alg,
+			      void *buf, int buflen);
+
+int bulk_sec_desc_unpack(struct lustre_msg *msg, int offset, int swabbed);
+
+/* user descriptor helpers */
+static inline int sptlrpc_user_desc_size(int ngroups)
+{
+	return sizeof(struct ptlrpc_user_desc) + ngroups * sizeof(__u32);
+}
+
+int sptlrpc_current_user_desc_size(void);
+int sptlrpc_pack_user_desc(struct lustre_msg *msg, int offset);
+int sptlrpc_unpack_user_desc(struct lustre_msg *req, int offset, int swabbed);
+
+
+#define CFS_CAP_CHOWN_MASK (1 << CFS_CAP_CHOWN)
+#define CFS_CAP_SYS_RESOURCE_MASK (1 << CFS_CAP_SYS_RESOURCE)
+
+enum {
+	LUSTRE_SEC_NONE	 = 0,
+	LUSTRE_SEC_REMOTE       = 1,
+	LUSTRE_SEC_SPECIFY      = 2,
+	LUSTRE_SEC_ALL	  = 3
+};
+
+/** @} sptlrpc */
+
+#endif /* _LUSTRE_SEC_H_ */
diff --git a/drivers/staging/lustre/lustre/include/lustre_update.h b/drivers/staging/lustre/lustre/include/lustre_update.h
new file mode 100644
index 000000000000..84defce0f623
--- /dev/null
+++ b/drivers/staging/lustre/lustre/include/lustre_update.h
@@ -0,0 +1,189 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.gnu.org/licenses/gpl-2.0.htm
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2013, Intel Corporation.
+ */
+/*
+ * lustre/include/lustre_update.h
+ *
+ * Author: Di Wang <di.wang@intel.com>
+ */
+
+#ifndef _LUSTRE_UPDATE_H
+#define _LUSTRE_UPDATE_H
+
+#define UPDATE_BUFFER_SIZE	8192
+struct update_request {
+	struct dt_device	*ur_dt;
+	struct list_head		ur_list;    /* attached itself to thandle */
+	int			ur_flags;
+	int			ur_rc;	    /* request result */
+	int			ur_batchid; /* Current batch(trans) id */
+	struct update_buf	*ur_buf;   /* Holding the update req */
+};
+
+static inline unsigned long update_size(struct update *update)
+{
+	unsigned long size;
+	int	   i;
+
+	size = cfs_size_round(offsetof(struct update, u_bufs[0]));
+	for (i = 0; i < UPDATE_BUF_COUNT; i++)
+		size += cfs_size_round(update->u_lens[i]);
+
+	return size;
+}
+
+static inline void *update_param_buf(struct update *update, int index,
+				     int *size)
+{
+	int	i;
+	void	*ptr;
+
+	if (index >= UPDATE_BUF_COUNT)
+		return NULL;
+
+	ptr = (char *)update + cfs_size_round(offsetof(struct update,
+						       u_bufs[0]));
+	for (i = 0; i < index; i++) {
+		LASSERT(update->u_lens[i] > 0);
+		ptr += cfs_size_round(update->u_lens[i]);
+	}
+
+	if (size != NULL)
+		*size = update->u_lens[index];
+
+	return ptr;
+}
+
+static inline unsigned long update_buf_size(struct update_buf *buf)
+{
+	unsigned long size;
+	int	   i = 0;
+
+	size = cfs_size_round(offsetof(struct update_buf, ub_bufs[0]));
+	for (i = 0; i < buf->ub_count; i++) {
+		struct update *update;
+
+		update = (struct update *)((char *)buf + size);
+		size += update_size(update);
+	}
+	LASSERT(size <= UPDATE_BUFFER_SIZE);
+	return size;
+}
+
+static inline void *update_buf_get(struct update_buf *buf, int index, int *size)
+{
+	int	count = buf->ub_count;
+	void	*ptr;
+	int	i = 0;
+
+	if (index >= count)
+		return NULL;
+
+	ptr = (char *)buf + cfs_size_round(offsetof(struct update_buf,
+						    ub_bufs[0]));
+	for (i = 0; i < index; i++)
+		ptr += update_size((struct update *)ptr);
+
+	if (size != NULL)
+		*size = update_size((struct update *)ptr);
+
+	return ptr;
+}
+
+static inline void update_init_reply_buf(struct update_reply *reply, int count)
+{
+	reply->ur_version = UPDATE_REPLY_V1;
+	reply->ur_count = count;
+}
+
+static inline void *update_get_buf_internal(struct update_reply *reply,
+					    int index, int *size)
+{
+	char *ptr;
+	int count = reply->ur_count;
+	int i;
+
+	if (index >= count)
+		return NULL;
+
+	ptr = (char *)reply + cfs_size_round(offsetof(struct update_reply,
+					     ur_lens[count]));
+	for (i = 0; i < index; i++) {
+		LASSERT(reply->ur_lens[i] > 0);
+		ptr += cfs_size_round(reply->ur_lens[i]);
+	}
+
+	if (size != NULL)
+		*size = reply->ur_lens[index];
+
+	return ptr;
+}
+
+static inline void update_insert_reply(struct update_reply *reply, void *data,
+				       int data_len, int index, int rc)
+{
+	char *ptr;
+
+	ptr = update_get_buf_internal(reply, index, NULL);
+	LASSERT(ptr != NULL);
+
+	*(int *)ptr = cpu_to_le32(rc);
+	ptr += sizeof(int);
+	if (data_len > 0) {
+		LASSERT(data != NULL);
+		memcpy(ptr, data, data_len);
+	}
+	reply->ur_lens[index] = data_len + sizeof(int);
+}
+
+static inline int update_get_reply_buf(struct update_reply *reply, void **buf,
+				       int index)
+{
+	char *ptr;
+	int  size = 0;
+	int  result;
+
+	ptr = update_get_buf_internal(reply, index, &size);
+	result = *(int *)ptr;
+
+	if (result < 0)
+		return result;
+
+	LASSERT((ptr != NULL && size >= sizeof(int)));
+	*buf = ptr + sizeof(int);
+	return size - sizeof(int);
+}
+
+static inline int update_get_reply_result(struct update_reply *reply,
+					  void **buf, int index)
+{
+	void *ptr;
+	int  size;
+
+	ptr = update_get_buf_internal(reply, index, &size);
+	LASSERT(ptr != NULL && size > sizeof(int));
+	return *(int *)ptr;
+}
+
+#endif
diff --git a/drivers/staging/lustre/lustre/include/lustre_ver.h b/drivers/staging/lustre/lustre/include/lustre_ver.h
new file mode 100644
index 000000000000..dc187b8f741f
--- /dev/null
+++ b/drivers/staging/lustre/lustre/include/lustre_ver.h
@@ -0,0 +1,24 @@
+#ifndef _LUSTRE_VER_H_
+#define _LUSTRE_VER_H_
+/* This file automatically generated from lustre/include/lustre_ver.h.in,
+ * based on parameters in lustre/autoconf/lustre-version.ac.
+ * Changes made directly to this file will be lost. */
+
+#define LUSTRE_MAJOR 2
+#define LUSTRE_MINOR 3
+#define LUSTRE_PATCH 64
+#define LUSTRE_FIX 0
+#define LUSTRE_VERSION_STRING "2.3.64"
+
+#define LUSTRE_VERSION_CODE OBD_OCD_VERSION(LUSTRE_MAJOR,LUSTRE_MINOR,LUSTRE_PATCH,LUSTRE_FIX)
+
+/* liblustre clients are only allowed to connect if their LUSTRE_FIX mismatches
+ * by this amount (set in lustre/autoconf/lustre-version.ac). */
+#define LUSTRE_VERSION_ALLOWED_OFFSET OBD_OCD_VERSION(0, 0, 1, 32)
+
+/* If lustre version of client and servers it connects to differs by more
+ * than this amount, client would issue a warning.
+ * (set in lustre/autoconf/lustre-version.ac) */
+#define LUSTRE_VERSION_OFFSET_WARN OBD_OCD_VERSION(0, 4, 0, 0)
+
+#endif
diff --git a/drivers/staging/lustre/lustre/include/lvfs.h b/drivers/staging/lustre/lustre/include/lvfs.h
new file mode 100644
index 000000000000..28f1a6b76f73
--- /dev/null
+++ b/drivers/staging/lustre/lustre/include/lvfs.h
@@ -0,0 +1,57 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lustre/include/lvfs.h
+ *
+ * lustre VFS/process permission interface
+ */
+
+#ifndef __LVFS_H__
+#define __LVFS_H__
+
+#define LL_FID_NAMELEN (16 + 1 + 8 + 1)
+
+#include <linux/libcfs/libcfs.h>
+#include <linux/lvfs.h>
+
+#include <linux/libcfs/lucache.h>
+
+
+/* lvfs_common.c */
+struct dentry *lvfs_fid2dentry(struct lvfs_run_ctxt *, __u64, __u32, __u64 ,void *data);
+
+void push_ctxt(struct lvfs_run_ctxt *save, struct lvfs_run_ctxt *new_ctx,
+	       struct lvfs_ucred *cred);
+void pop_ctxt(struct lvfs_run_ctxt *saved, struct lvfs_run_ctxt *new_ctx,
+	      struct lvfs_ucred *cred);
+#endif
diff --git a/drivers/staging/lustre/lustre/include/md_object.h b/drivers/staging/lustre/lustre/include/md_object.h
new file mode 100644
index 000000000000..92d6420b21da
--- /dev/null
+++ b/drivers/staging/lustre/lustre/include/md_object.h
@@ -0,0 +1,908 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lustre/include/md_object.h
+ *
+ * Extention of lu_object.h for metadata objects
+ */
+
+#ifndef _LUSTRE_MD_OBJECT_H
+#define _LUSTRE_MD_OBJECT_H
+
+/** \defgroup md md
+ * Sub-class of lu_object with methods common for "meta-data" objects in MDT
+ * stack.
+ *
+ * Meta-data objects implement namespace operations: you can link, unlink
+ * them, and treat them as directories.
+ *
+ * Examples: mdt, cmm, and mdt are implementations of md interface.
+ * @{
+ */
+
+
+/*
+ * super-class definitions.
+ */
+#include <dt_object.h>
+
+struct md_device;
+struct md_device_operations;
+struct md_object;
+struct obd_export;
+
+enum {
+	UCRED_INVALID   = -1,
+	UCRED_INIT      = 0,
+	UCRED_OLD       = 1,
+	UCRED_NEW       = 2
+};
+
+enum {
+	MD_CAPAINFO_MAX = 5
+};
+
+/** there are at most 5 fids in one operation, see rename, NOTE the last one
+ * is a temporary one used for is_subdir() */
+struct md_capainfo {
+	__u32		   mc_auth;
+	__u32		   mc_padding;
+	struct lu_fid	   mc_fid[MD_CAPAINFO_MAX];
+	struct lustre_capa     *mc_capa[MD_CAPAINFO_MAX];
+};
+
+struct md_quota {
+	struct obd_export       *mq_exp;
+};
+
+/**
+ * Implemented in mdd/mdd_handler.c.
+ *
+ * XXX should be moved into separate .h/.c together with all md security
+ * related definitions.
+ */
+struct md_capainfo *md_capainfo(const struct lu_env *env);
+struct md_quota *md_quota(const struct lu_env *env);
+
+/** metadata attributes */
+enum ma_valid {
+	MA_INODE     = (1 << 0),
+	MA_LOV       = (1 << 1),
+	MA_COOKIE    = (1 << 2),
+	MA_FLAGS     = (1 << 3),
+	MA_LMV       = (1 << 4),
+	MA_ACL_DEF   = (1 << 5),
+	MA_LOV_DEF   = (1 << 6),
+	MA_LAY_GEN   = (1 << 7),
+	MA_HSM       = (1 << 8),
+	MA_SOM       = (1 << 9),
+	MA_PFID      = (1 << 10)
+};
+
+typedef enum {
+	MDL_MINMODE  = 0,
+	MDL_EX       = 1,
+	MDL_PW       = 2,
+	MDL_PR       = 4,
+	MDL_CW       = 8,
+	MDL_CR       = 16,
+	MDL_NL       = 32,
+	MDL_GROUP    = 64,
+	MDL_MAXMODE
+} mdl_mode_t;
+
+typedef enum {
+	MDT_NUL_LOCK = 0,
+	MDT_REG_LOCK = (1 << 0),
+	MDT_PDO_LOCK = (1 << 1)
+} mdl_type_t;
+
+/* memory structure for hsm attributes
+ * for fields description see the on disk structure hsm_attrs
+ * which is defined in lustre_idl.h
+ */
+struct md_hsm {
+	__u32	mh_compat;
+	__u32	mh_flags;
+	__u64	mh_arch_id;
+	__u64	mh_arch_ver;
+};
+
+#define IOEPOCH_INVAL 0
+
+/* memory structure for som attributes
+ * for fields description see the on disk structure som_attrs
+ * which is defined in lustre_idl.h
+ */
+struct md_som_data {
+	__u32	msd_compat;
+	__u32	msd_incompat;
+	__u64	msd_ioepoch;
+	__u64	msd_size;
+	__u64	msd_blocks;
+	__u64	msd_mountid;
+};
+
+struct md_attr {
+	__u64		   ma_valid;
+	__u64		   ma_need;
+	__u64		   ma_attr_flags;
+	struct lu_attr	  ma_attr;
+	struct lu_fid	   ma_pfid;
+	struct md_hsm	   ma_hsm;
+	struct lov_mds_md      *ma_lmm;
+	struct lmv_stripe_md   *ma_lmv;
+	void		   *ma_acl;
+	struct llog_cookie     *ma_cookie;
+	struct lustre_capa     *ma_capa;
+	struct md_som_data     *ma_som;
+	int		     ma_lmm_size;
+	int		     ma_lmv_size;
+	int		     ma_acl_size;
+	int		     ma_cookie_size;
+	__u16		   ma_layout_gen;
+};
+
+/** Additional parameters for create */
+struct md_op_spec {
+	union {
+		/** symlink target */
+		const char	       *sp_symname;
+		/** parent FID for cross-ref mkdir */
+		const struct lu_fid      *sp_pfid;
+		/** eadata for regular files */
+		struct md_spec_reg {
+			/** lov objs exist already */
+			const struct lu_fid   *fid;
+			const void *eadata;
+			int  eadatalen;
+		} sp_ea;
+	} u;
+
+	/** Create flag from client: such as MDS_OPEN_CREAT, and others. */
+	__u64      sp_cr_flags;
+
+	/** don't create lov objects or llog cookie - this replay */
+	unsigned int no_create:1,
+		     sp_cr_lookup:1, /* do lookup sanity check or not. */
+		     sp_rm_entry:1;  /* only remove name entry */
+
+	/** Current lock mode for parent dir where create is performing. */
+	mdl_mode_t sp_cr_mode;
+
+	/** to create directory */
+	const struct dt_index_features *sp_feat;
+};
+
+/**
+ * Operations implemented for each md object (both directory and leaf).
+ */
+struct md_object_operations {
+	int (*moo_permission)(const struct lu_env *env,
+			      struct md_object *pobj, struct md_object *cobj,
+			      struct md_attr *attr, int mask);
+
+	int (*moo_attr_get)(const struct lu_env *env, struct md_object *obj,
+			    struct md_attr *attr);
+
+	int (*moo_attr_set)(const struct lu_env *env, struct md_object *obj,
+			    const struct md_attr *attr);
+
+	int (*moo_xattr_get)(const struct lu_env *env, struct md_object *obj,
+			     struct lu_buf *buf, const char *name);
+
+	int (*moo_xattr_list)(const struct lu_env *env, struct md_object *obj,
+			      struct lu_buf *buf);
+
+	int (*moo_xattr_set)(const struct lu_env *env, struct md_object *obj,
+			     const struct lu_buf *buf, const char *name,
+			     int fl);
+
+	int (*moo_xattr_del)(const struct lu_env *env, struct md_object *obj,
+			     const char *name);
+
+	/** This method is used to swap the layouts between 2 objects */
+	int (*moo_swap_layouts)(const struct lu_env *env,
+			       struct md_object *obj1, struct md_object *obj2,
+			       __u64 flags);
+
+	/** \retval number of bytes actually read upon success */
+	int (*moo_readpage)(const struct lu_env *env, struct md_object *obj,
+			    const struct lu_rdpg *rdpg);
+
+	int (*moo_readlink)(const struct lu_env *env, struct md_object *obj,
+			    struct lu_buf *buf);
+	int (*moo_changelog)(const struct lu_env *env,
+			     enum changelog_rec_type type, int flags,
+			     struct md_object *obj);
+	/** part of cross-ref operation */
+	int (*moo_object_create)(const struct lu_env *env,
+				 struct md_object *obj,
+				 const struct md_op_spec *spec,
+				 struct md_attr *ma);
+
+	int (*moo_ref_add)(const struct lu_env *env,
+			   struct md_object *obj,
+			   const struct md_attr *ma);
+
+	int (*moo_ref_del)(const struct lu_env *env,
+			   struct md_object *obj,
+			   struct md_attr *ma);
+
+	int (*moo_open)(const struct lu_env *env,
+			struct md_object *obj, int flag);
+
+	int (*moo_close)(const struct lu_env *env, struct md_object *obj,
+			 struct md_attr *ma, int mode);
+
+	int (*moo_capa_get)(const struct lu_env *, struct md_object *,
+			    struct lustre_capa *, int renewal);
+
+	int (*moo_object_sync)(const struct lu_env *, struct md_object *);
+
+	int (*moo_file_lock)(const struct lu_env *env, struct md_object *obj,
+			     struct lov_mds_md *lmm, struct ldlm_extent *extent,
+			     struct lustre_handle *lockh);
+	int (*moo_file_unlock)(const struct lu_env *env, struct md_object *obj,
+			       struct lov_mds_md *lmm,
+			       struct lustre_handle *lockh);
+	int (*moo_object_lock)(const struct lu_env *env, struct md_object *obj,
+			       struct lustre_handle *lh,
+			       struct ldlm_enqueue_info *einfo,
+			       void *policy);
+};
+
+/**
+ * Operations implemented for each directory object.
+ */
+struct md_dir_operations {
+	int (*mdo_is_subdir) (const struct lu_env *env, struct md_object *obj,
+			      const struct lu_fid *fid, struct lu_fid *sfid);
+
+	int (*mdo_lookup)(const struct lu_env *env, struct md_object *obj,
+			  const struct lu_name *lname, struct lu_fid *fid,
+			  struct md_op_spec *spec);
+
+	mdl_mode_t (*mdo_lock_mode)(const struct lu_env *env,
+				    struct md_object *obj,
+				    mdl_mode_t mode);
+
+	int (*mdo_create)(const struct lu_env *env, struct md_object *pobj,
+			  const struct lu_name *lname, struct md_object *child,
+			  struct md_op_spec *spec,
+			  struct md_attr *ma);
+
+	/** This method is used for creating data object for this meta object*/
+	int (*mdo_create_data)(const struct lu_env *env, struct md_object *p,
+			       struct md_object *o,
+			       const struct md_op_spec *spec,
+			       struct md_attr *ma);
+
+	int (*mdo_rename)(const struct lu_env *env, struct md_object *spobj,
+			  struct md_object *tpobj, const struct lu_fid *lf,
+			  const struct lu_name *lsname, struct md_object *tobj,
+			  const struct lu_name *ltname, struct md_attr *ma);
+
+	int (*mdo_link)(const struct lu_env *env, struct md_object *tgt_obj,
+			struct md_object *src_obj, const struct lu_name *lname,
+			struct md_attr *ma);
+
+	int (*mdo_unlink)(const struct lu_env *env, struct md_object *pobj,
+			  struct md_object *cobj, const struct lu_name *lname,
+			  struct md_attr *ma, int no_name);
+
+	/** This method is used to compare a requested layout to an existing
+	 * layout (struct lov_mds_md_v1/3 vs struct lov_mds_md_v1/3) */
+	int (*mdo_lum_lmm_cmp)(const struct lu_env *env,
+			       struct md_object *cobj,
+			       const struct md_op_spec *spec,
+			       struct md_attr *ma);
+
+	/** partial ops for cross-ref case */
+	int (*mdo_name_insert)(const struct lu_env *env,
+			       struct md_object *obj,
+			       const struct lu_name *lname,
+			       const struct lu_fid *fid,
+			       const struct md_attr *ma);
+
+	int (*mdo_name_remove)(const struct lu_env *env,
+			       struct md_object *obj,
+			       const struct lu_name *lname,
+			       const struct md_attr *ma);
+
+	int (*mdo_rename_tgt)(const struct lu_env *env, struct md_object *pobj,
+			      struct md_object *tobj, const struct lu_fid *fid,
+			      const struct lu_name *lname, struct md_attr *ma);
+};
+
+struct md_device_operations {
+	/** meta-data device related handlers. */
+	int (*mdo_root_get)(const struct lu_env *env, struct md_device *m,
+			    struct lu_fid *f);
+
+	int (*mdo_maxsize_get)(const struct lu_env *env, struct md_device *m,
+			       int *md_size, int *cookie_size);
+
+	int (*mdo_statfs)(const struct lu_env *env, struct md_device *m,
+			  struct obd_statfs *sfs);
+
+	int (*mdo_init_capa_ctxt)(const struct lu_env *env, struct md_device *m,
+				  int mode, unsigned long timeout, __u32 alg,
+				  struct lustre_capa_key *keys);
+
+	int (*mdo_update_capa_key)(const struct lu_env *env,
+				   struct md_device *m,
+				   struct lustre_capa_key *key);
+
+	int (*mdo_llog_ctxt_get)(const struct lu_env *env,
+				 struct md_device *m, int idx, void **h);
+
+	int (*mdo_iocontrol)(const struct lu_env *env, struct md_device *m,
+			     unsigned int cmd, int len, void *data);
+};
+
+enum md_upcall_event {
+	/** Sync the md layer*/
+	MD_LOV_SYNC = (1 << 0),
+	/** Just for split, no need trans, for replay */
+	MD_NO_TRANS = (1 << 1),
+	MD_LOV_CONFIG = (1 << 2),
+	/** Trigger quota recovery */
+	MD_LOV_QUOTA = (1 << 3)
+};
+
+struct md_upcall {
+	/** this lock protects upcall using against its removal
+	 * read lock is for usage the upcall, write - for init/fini */
+	struct rw_semaphore	mu_upcall_sem;
+	/** device to call, upper layer normally */
+	struct md_device       *mu_upcall_dev;
+	/** upcall function */
+	int (*mu_upcall)(const struct lu_env *env, struct md_device *md,
+			 enum md_upcall_event ev, void *data);
+};
+
+struct md_device {
+	struct lu_device		   md_lu_dev;
+	const struct md_device_operations *md_ops;
+	struct md_upcall		   md_upcall;
+};
+
+static inline void md_upcall_init(struct md_device *m, void *upcl)
+{
+	init_rwsem(&m->md_upcall.mu_upcall_sem);
+	m->md_upcall.mu_upcall_dev = NULL;
+	m->md_upcall.mu_upcall = upcl;
+}
+
+static inline void md_upcall_dev_set(struct md_device *m, struct md_device *up)
+{
+	down_write(&m->md_upcall.mu_upcall_sem);
+	m->md_upcall.mu_upcall_dev = up;
+	up_write(&m->md_upcall.mu_upcall_sem);
+}
+
+static inline void md_upcall_fini(struct md_device *m)
+{
+	down_write(&m->md_upcall.mu_upcall_sem);
+	m->md_upcall.mu_upcall_dev = NULL;
+	m->md_upcall.mu_upcall = NULL;
+	up_write(&m->md_upcall.mu_upcall_sem);
+}
+
+static inline int md_do_upcall(const struct lu_env *env, struct md_device *m,
+				enum md_upcall_event ev, void *data)
+{
+	int rc = 0;
+	down_read(&m->md_upcall.mu_upcall_sem);
+	if (m->md_upcall.mu_upcall_dev != NULL &&
+	    m->md_upcall.mu_upcall_dev->md_upcall.mu_upcall != NULL) {
+		rc = m->md_upcall.mu_upcall_dev->md_upcall.mu_upcall(env,
+					      m->md_upcall.mu_upcall_dev,
+					      ev, data);
+	}
+	up_read(&m->md_upcall.mu_upcall_sem);
+	return rc;
+}
+
+struct md_object {
+	struct lu_object		   mo_lu;
+	const struct md_object_operations *mo_ops;
+	const struct md_dir_operations    *mo_dir_ops;
+};
+
+/**
+ * seq-server site.
+ */
+struct seq_server_site {
+	struct lu_site	     *ss_lu;
+	/**
+	 * mds number of this site.
+	 */
+	mdsno_t	       ss_node_id;
+	/**
+	 * Fid location database
+	 */
+	struct lu_server_fld *ss_server_fld;
+	struct lu_client_fld *ss_client_fld;
+
+	/**
+	 * Server Seq Manager
+	 */
+	struct lu_server_seq *ss_server_seq;
+
+	/**
+	 * Controller Seq Manager
+	 */
+	struct lu_server_seq *ss_control_seq;
+	struct obd_export    *ss_control_exp;
+
+	/**
+	 * Client Seq Manager
+	 */
+	struct lu_client_seq *ss_client_seq;
+};
+
+static inline struct md_device *lu2md_dev(const struct lu_device *d)
+{
+	LASSERT(IS_ERR(d) || lu_device_is_md(d));
+	return container_of0(d, struct md_device, md_lu_dev);
+}
+
+static inline struct lu_device *md2lu_dev(struct md_device *d)
+{
+	return &d->md_lu_dev;
+}
+
+static inline struct md_object *lu2md(const struct lu_object *o)
+{
+	LASSERT(o == NULL || IS_ERR(o) || lu_device_is_md(o->lo_dev));
+	return container_of0(o, struct md_object, mo_lu);
+}
+
+static inline struct md_object *md_object_next(const struct md_object *obj)
+{
+	return (obj ? lu2md(lu_object_next(&obj->mo_lu)) : NULL);
+}
+
+static inline struct md_device *md_obj2dev(const struct md_object *o)
+{
+	LASSERT(o == NULL || IS_ERR(o) || lu_device_is_md(o->mo_lu.lo_dev));
+	return container_of0(o->mo_lu.lo_dev, struct md_device, md_lu_dev);
+}
+
+static inline struct seq_server_site *lu_site2seq(const struct lu_site *s)
+{
+	return s->ld_seq_site;
+}
+
+static inline int md_device_init(struct md_device *md, struct lu_device_type *t)
+{
+	return lu_device_init(&md->md_lu_dev, t);
+}
+
+static inline void md_device_fini(struct md_device *md)
+{
+	lu_device_fini(&md->md_lu_dev);
+}
+
+static inline struct md_object *md_object_find_slice(const struct lu_env *env,
+						     struct md_device *md,
+						     const struct lu_fid *f)
+{
+	return lu2md(lu_object_find_slice(env, md2lu_dev(md), f, NULL));
+}
+
+
+/** md operations */
+static inline int mo_permission(const struct lu_env *env,
+				struct md_object *p,
+				struct md_object *c,
+				struct md_attr *at,
+				int mask)
+{
+	LASSERT(c->mo_ops->moo_permission);
+	return c->mo_ops->moo_permission(env, p, c, at, mask);
+}
+
+static inline int mo_attr_get(const struct lu_env *env,
+			      struct md_object *m,
+			      struct md_attr *at)
+{
+	LASSERT(m->mo_ops->moo_attr_get);
+	return m->mo_ops->moo_attr_get(env, m, at);
+}
+
+static inline int mo_readlink(const struct lu_env *env,
+			      struct md_object *m,
+			      struct lu_buf *buf)
+{
+	LASSERT(m->mo_ops->moo_readlink);
+	return m->mo_ops->moo_readlink(env, m, buf);
+}
+
+static inline int mo_changelog(const struct lu_env *env,
+			       enum changelog_rec_type type,
+			       int flags, struct md_object *m)
+{
+	LASSERT(m->mo_ops->moo_changelog);
+	return m->mo_ops->moo_changelog(env, type, flags, m);
+}
+
+static inline int mo_attr_set(const struct lu_env *env,
+			      struct md_object *m,
+			      const struct md_attr *at)
+{
+	LASSERT(m->mo_ops->moo_attr_set);
+	return m->mo_ops->moo_attr_set(env, m, at);
+}
+
+static inline int mo_xattr_get(const struct lu_env *env,
+			       struct md_object *m,
+			       struct lu_buf *buf,
+			       const char *name)
+{
+	LASSERT(m->mo_ops->moo_xattr_get);
+	return m->mo_ops->moo_xattr_get(env, m, buf, name);
+}
+
+static inline int mo_xattr_del(const struct lu_env *env,
+			       struct md_object *m,
+			       const char *name)
+{
+	LASSERT(m->mo_ops->moo_xattr_del);
+	return m->mo_ops->moo_xattr_del(env, m, name);
+}
+
+static inline int mo_xattr_set(const struct lu_env *env,
+			       struct md_object *m,
+			       const struct lu_buf *buf,
+			       const char *name,
+			       int flags)
+{
+	LASSERT(m->mo_ops->moo_xattr_set);
+	return m->mo_ops->moo_xattr_set(env, m, buf, name, flags);
+}
+
+static inline int mo_xattr_list(const struct lu_env *env,
+				struct md_object *m,
+				struct lu_buf *buf)
+{
+	LASSERT(m->mo_ops->moo_xattr_list);
+	return m->mo_ops->moo_xattr_list(env, m, buf);
+}
+
+static inline int mo_swap_layouts(const struct lu_env *env,
+				  struct md_object *o1,
+				  struct md_object *o2, __u64 flags)
+{
+	LASSERT(o1->mo_ops->moo_swap_layouts);
+	LASSERT(o2->mo_ops->moo_swap_layouts);
+	if (o1->mo_ops->moo_swap_layouts != o2->mo_ops->moo_swap_layouts)
+		return -EPERM;
+	return o1->mo_ops->moo_swap_layouts(env, o1, o2, flags);
+}
+
+static inline int mo_open(const struct lu_env *env,
+			  struct md_object *m,
+			  int flags)
+{
+	LASSERT(m->mo_ops->moo_open);
+	return m->mo_ops->moo_open(env, m, flags);
+}
+
+static inline int mo_close(const struct lu_env *env,
+			   struct md_object *m,
+			   struct md_attr *ma,
+			   int mode)
+{
+	LASSERT(m->mo_ops->moo_close);
+	return m->mo_ops->moo_close(env, m, ma, mode);
+}
+
+static inline int mo_readpage(const struct lu_env *env,
+			      struct md_object *m,
+			      const struct lu_rdpg *rdpg)
+{
+	LASSERT(m->mo_ops->moo_readpage);
+	return m->mo_ops->moo_readpage(env, m, rdpg);
+}
+
+static inline int mo_object_create(const struct lu_env *env,
+				   struct md_object *m,
+				   const struct md_op_spec *spc,
+				   struct md_attr *at)
+{
+	LASSERT(m->mo_ops->moo_object_create);
+	return m->mo_ops->moo_object_create(env, m, spc, at);
+}
+
+static inline int mo_ref_add(const struct lu_env *env,
+			     struct md_object *m,
+			     const struct md_attr *ma)
+{
+	LASSERT(m->mo_ops->moo_ref_add);
+	return m->mo_ops->moo_ref_add(env, m, ma);
+}
+
+static inline int mo_ref_del(const struct lu_env *env,
+			     struct md_object *m,
+			     struct md_attr *ma)
+{
+	LASSERT(m->mo_ops->moo_ref_del);
+	return m->mo_ops->moo_ref_del(env, m, ma);
+}
+
+static inline int mo_capa_get(const struct lu_env *env,
+			      struct md_object *m,
+			      struct lustre_capa *c,
+			      int renewal)
+{
+	LASSERT(m->mo_ops->moo_capa_get);
+	return m->mo_ops->moo_capa_get(env, m, c, renewal);
+}
+
+static inline int mo_object_sync(const struct lu_env *env, struct md_object *m)
+{
+	LASSERT(m->mo_ops->moo_object_sync);
+	return m->mo_ops->moo_object_sync(env, m);
+}
+
+static inline int mo_file_lock(const struct lu_env *env, struct md_object *m,
+			       struct lov_mds_md *lmm,
+			       struct ldlm_extent *extent,
+			       struct lustre_handle *lockh)
+{
+	LASSERT(m->mo_ops->moo_file_lock);
+	return m->mo_ops->moo_file_lock(env, m, lmm, extent, lockh);
+}
+
+static inline int mo_file_unlock(const struct lu_env *env, struct md_object *m,
+				 struct lov_mds_md *lmm,
+				 struct lustre_handle *lockh)
+{
+	LASSERT(m->mo_ops->moo_file_unlock);
+	return m->mo_ops->moo_file_unlock(env, m, lmm, lockh);
+}
+
+static inline int mo_object_lock(const struct lu_env *env,
+				 struct md_object *m,
+				 struct lustre_handle *lh,
+				 struct ldlm_enqueue_info *einfo,
+				 void *policy)
+{
+	LASSERT(m->mo_ops->moo_object_lock);
+	return m->mo_ops->moo_object_lock(env, m, lh, einfo, policy);
+}
+
+static inline int mdo_lookup(const struct lu_env *env,
+			     struct md_object *p,
+			     const struct lu_name *lname,
+			     struct lu_fid *f,
+			     struct md_op_spec *spec)
+{
+	LASSERT(p->mo_dir_ops->mdo_lookup);
+	return p->mo_dir_ops->mdo_lookup(env, p, lname, f, spec);
+}
+
+static inline mdl_mode_t mdo_lock_mode(const struct lu_env *env,
+				       struct md_object *mo,
+				       mdl_mode_t lm)
+{
+	if (mo->mo_dir_ops->mdo_lock_mode == NULL)
+		return MDL_MINMODE;
+	return mo->mo_dir_ops->mdo_lock_mode(env, mo, lm);
+}
+
+static inline int mdo_create(const struct lu_env *env,
+			     struct md_object *p,
+			     const struct lu_name *lchild_name,
+			     struct md_object *c,
+			     struct md_op_spec *spc,
+			     struct md_attr *at)
+{
+	LASSERT(p->mo_dir_ops->mdo_create);
+	return p->mo_dir_ops->mdo_create(env, p, lchild_name, c, spc, at);
+}
+
+static inline int mdo_create_data(const struct lu_env *env,
+				  struct md_object *p,
+				  struct md_object *c,
+				  const struct md_op_spec *spec,
+				  struct md_attr *ma)
+{
+	LASSERT(c->mo_dir_ops->mdo_create_data);
+	return c->mo_dir_ops->mdo_create_data(env, p, c, spec, ma);
+}
+
+static inline int mdo_rename(const struct lu_env *env,
+			     struct md_object *sp,
+			     struct md_object *tp,
+			     const struct lu_fid *lf,
+			     const struct lu_name *lsname,
+			     struct md_object *t,
+			     const struct lu_name *ltname,
+			     struct md_attr *ma)
+{
+	LASSERT(tp->mo_dir_ops->mdo_rename);
+	return tp->mo_dir_ops->mdo_rename(env, sp, tp, lf, lsname, t, ltname,
+					  ma);
+}
+
+static inline int mdo_is_subdir(const struct lu_env *env,
+				struct md_object *mo,
+				const struct lu_fid *fid,
+				struct lu_fid *sfid)
+{
+	LASSERT(mo->mo_dir_ops->mdo_is_subdir);
+	return mo->mo_dir_ops->mdo_is_subdir(env, mo, fid, sfid);
+}
+
+static inline int mdo_link(const struct lu_env *env,
+			   struct md_object *p,
+			   struct md_object *s,
+			   const struct lu_name *lname,
+			   struct md_attr *ma)
+{
+	LASSERT(s->mo_dir_ops->mdo_link);
+	return s->mo_dir_ops->mdo_link(env, p, s, lname, ma);
+}
+
+static inline int mdo_unlink(const struct lu_env *env,
+			     struct md_object *p,
+			     struct md_object *c,
+			     const struct lu_name *lname,
+			     struct md_attr *ma, int no_name)
+{
+	LASSERT(p->mo_dir_ops->mdo_unlink);
+	return p->mo_dir_ops->mdo_unlink(env, p, c, lname, ma, no_name);
+}
+
+static inline int mdo_lum_lmm_cmp(const struct lu_env *env,
+				  struct md_object *c,
+				  const struct md_op_spec *spec,
+				  struct md_attr *ma)
+{
+	LASSERT(c->mo_dir_ops->mdo_lum_lmm_cmp);
+	return c->mo_dir_ops->mdo_lum_lmm_cmp(env, c, spec, ma);
+}
+
+static inline int mdo_name_insert(const struct lu_env *env,
+				  struct md_object *p,
+				  const struct lu_name *lname,
+				  const struct lu_fid *f,
+				  const struct md_attr *ma)
+{
+	LASSERT(p->mo_dir_ops->mdo_name_insert);
+	return p->mo_dir_ops->mdo_name_insert(env, p, lname, f, ma);
+}
+
+static inline int mdo_name_remove(const struct lu_env *env,
+				  struct md_object *p,
+				  const struct lu_name *lname,
+				  const struct md_attr *ma)
+{
+	LASSERT(p->mo_dir_ops->mdo_name_remove);
+	return p->mo_dir_ops->mdo_name_remove(env, p, lname, ma);
+}
+
+static inline int mdo_rename_tgt(const struct lu_env *env,
+				 struct md_object *p,
+				 struct md_object *t,
+				 const struct lu_fid *lf,
+				 const struct lu_name *lname,
+				 struct md_attr *ma)
+{
+	if (t) {
+		LASSERT(t->mo_dir_ops->mdo_rename_tgt);
+		return t->mo_dir_ops->mdo_rename_tgt(env, p, t, lf, lname, ma);
+	} else {
+		LASSERT(p->mo_dir_ops->mdo_rename_tgt);
+		return p->mo_dir_ops->mdo_rename_tgt(env, p, t, lf, lname, ma);
+	}
+}
+
+/**
+ * Used in MDD/OUT layer for object lock rule
+ **/
+enum mdd_object_role {
+	MOR_SRC_PARENT,
+	MOR_SRC_CHILD,
+	MOR_TGT_PARENT,
+	MOR_TGT_CHILD,
+	MOR_TGT_ORPHAN
+};
+
+struct dt_device;
+/**
+ * Structure to hold object information. This is used to create object
+ * \pre llod_dir exist
+ */
+struct lu_local_obj_desc {
+	const char		      *llod_dir;
+	const char		      *llod_name;
+	__u32			    llod_oid;
+	int			      llod_is_index;
+	const struct dt_index_features  *llod_feat;
+	struct list_head		       llod_linkage;
+};
+
+int lustre_buf2som(void *buf, int rc, struct md_som_data *msd);
+int lustre_buf2hsm(void *buf, int rc, struct md_hsm *mh);
+void lustre_hsm2buf(void *buf, struct md_hsm *mh);
+
+struct lu_ucred {
+	__u32	       uc_valid;
+	__u32	       uc_o_uid;
+	__u32	       uc_o_gid;
+	__u32	       uc_o_fsuid;
+	__u32	       uc_o_fsgid;
+	__u32	       uc_uid;
+	__u32	       uc_gid;
+	__u32	       uc_fsuid;
+	__u32	       uc_fsgid;
+	__u32	       uc_suppgids[2];
+	cfs_cap_t	   uc_cap;
+	__u32	       uc_umask;
+	group_info_t   *uc_ginfo;
+	struct md_identity *uc_identity;
+};
+
+struct lu_ucred *lu_ucred(const struct lu_env *env);
+
+struct lu_ucred *lu_ucred_check(const struct lu_env *env);
+
+struct lu_ucred *lu_ucred_assert(const struct lu_env *env);
+
+int lu_ucred_global_init(void);
+
+void lu_ucred_global_fini(void);
+
+#define md_cap_t(x) (x)
+
+#define MD_CAP_TO_MASK(x) (1 << (x))
+
+#define md_cap_raised(c, flag) (md_cap_t(c) & MD_CAP_TO_MASK(flag))
+
+/* capable() is copied from linux kernel! */
+static inline int md_capable(struct lu_ucred *uc, cfs_cap_t cap)
+{
+	if (md_cap_raised(uc->uc_cap, cap))
+		return 1;
+	return 0;
+}
+
+/** @} md */
+#endif /* _LINUX_MD_OBJECT_H */
diff --git a/drivers/staging/lustre/lustre/include/obd.h b/drivers/staging/lustre/lustre/include/obd.h
new file mode 100644
index 000000000000..0a251fdfe167
--- /dev/null
+++ b/drivers/staging/lustre/lustre/include/obd.h
@@ -0,0 +1,1677 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ */
+
+#ifndef __OBD_H
+#define __OBD_H
+
+#include <linux/obd.h>
+
+#define IOC_OSC_TYPE	 'h'
+#define IOC_OSC_MIN_NR       20
+#define IOC_OSC_SET_ACTIVE   _IOWR(IOC_OSC_TYPE, 21, struct obd_device *)
+#define IOC_OSC_MAX_NR       50
+
+#define IOC_MDC_TYPE	 'i'
+#define IOC_MDC_MIN_NR       20
+#define IOC_MDC_MAX_NR       50
+
+#include <lustre/lustre_idl.h>
+#include <lu_ref.h>
+#include <lustre_lib.h>
+#include <lustre_export.h>
+#include <lustre_fld.h>
+#include <lustre_capa.h>
+
+#include <linux/libcfs/bitmap.h>
+
+
+#define MAX_OBD_DEVICES 8192
+
+struct osc_async_rc {
+	int     ar_rc;
+	int     ar_force_sync;
+	__u64   ar_min_xid;
+};
+
+struct lov_oinfo {		 /* per-stripe data structure */
+	struct ost_id   loi_oi;    /* object ID/Sequence on the target OST */
+	int loi_ost_idx;	   /* OST stripe index in lov_tgt_desc->tgts */
+	int loi_ost_gen;	   /* generation of this loi_ost_idx */
+
+	unsigned long loi_kms_valid:1;
+	__u64 loi_kms;	     /* known minimum size */
+	struct ost_lvb loi_lvb;
+	struct osc_async_rc     loi_ar;
+};
+
+static inline void loi_kms_set(struct lov_oinfo *oinfo, __u64 kms)
+{
+	oinfo->loi_kms = kms;
+	oinfo->loi_kms_valid = 1;
+}
+
+static inline void loi_init(struct lov_oinfo *loi)
+{
+}
+
+struct lov_stripe_md {
+	atomic_t     lsm_refc;
+	spinlock_t	lsm_lock;
+	pid_t	    lsm_lock_owner; /* debugging */
+
+	/* maximum possible file size, might change as OSTs status changes,
+	 * e.g. disconnected, deactivated */
+	__u64	    lsm_maxbytes;
+	struct {
+		/* Public members. */
+		struct ost_id lw_object_oi; /* lov object id/seq */
+
+		/* LOV-private members start here -- only for use in lov/. */
+		__u32 lw_magic;
+		__u32 lw_stripe_size;      /* size of the stripe */
+		__u32 lw_pattern;	  /* striping pattern (RAID0, RAID1) */
+		__u16 lw_stripe_count;  /* number of objects being striped over */
+		__u16 lw_layout_gen;       /* generation of the layout */
+		char  lw_pool_name[LOV_MAXPOOLNAME]; /* pool name */
+	} lsm_wire;
+
+	struct lov_oinfo *lsm_oinfo[0];
+};
+
+#define lsm_oi		 lsm_wire.lw_object_oi
+#define lsm_magic	lsm_wire.lw_magic
+#define lsm_layout_gen   lsm_wire.lw_layout_gen
+#define lsm_stripe_size  lsm_wire.lw_stripe_size
+#define lsm_pattern      lsm_wire.lw_pattern
+#define lsm_stripe_count lsm_wire.lw_stripe_count
+#define lsm_pool_name    lsm_wire.lw_pool_name
+
+struct obd_info;
+
+typedef int (*obd_enqueue_update_f)(void *cookie, int rc);
+
+/* obd info for a particular level (lov, osc). */
+struct obd_info {
+	/* Lock policy. It keeps an extent which is specific for a particular
+	 * OSC. (e.g. lov_prep_enqueue_set initialises extent of the policy,
+	 * and osc_enqueue passes it into ldlm_lock_match & ldlm_cli_enqueue. */
+	ldlm_policy_data_t      oi_policy;
+	/* Flags used for set request specific flags:
+	   - while lock handling, the flags obtained on the enqueue
+	   request are set here.
+	   - while stats, the flags used for control delay/resend.
+	   - while setattr, the flags used for distinguish punch operation
+	 */
+	__u64		   oi_flags;
+	/* Lock handle specific for every OSC lock. */
+	struct lustre_handle   *oi_lockh;
+	/* lsm data specific for every OSC. */
+	struct lov_stripe_md   *oi_md;
+	/* obdo data specific for every OSC, if needed at all. */
+	struct obdo	    *oi_oa;
+	/* statfs data specific for every OSC, if needed at all. */
+	struct obd_statfs      *oi_osfs;
+	/* An update callback which is called to update some data on upper
+	 * level. E.g. it is used for update lsm->lsm_oinfo at every recieved
+	 * request in osc level for enqueue requests. It is also possible to
+	 * update some caller data from LOV layer if needed. */
+	obd_enqueue_update_f    oi_cb_up;
+	/* oss capability, its type is obd_capa in client to avoid copy.
+	 * in contrary its type is lustre_capa in OSS. */
+	void		   *oi_capa;
+	/* transfer jobid from ost_sync() to filter_sync()... */
+	char		   *oi_jobid;
+};
+
+/* compare all relevant fields. */
+static inline int lov_stripe_md_cmp(struct lov_stripe_md *m1,
+				    struct lov_stripe_md *m2)
+{
+	/*
+	 * ->lsm_wire contains padding, but it should be zeroed out during
+	 * allocation.
+	 */
+	return memcmp(&m1->lsm_wire, &m2->lsm_wire, sizeof m1->lsm_wire);
+}
+
+static inline int lov_lum_lsm_cmp(struct lov_user_md *lum,
+				  struct lov_stripe_md  *lsm)
+{
+	if (lsm->lsm_magic != lum->lmm_magic)
+		return 1;
+	if ((lsm->lsm_stripe_count != 0) && (lum->lmm_stripe_count != 0) &&
+	    (lsm->lsm_stripe_count != lum->lmm_stripe_count))
+		return 2;
+	if ((lsm->lsm_stripe_size != 0) && (lum->lmm_stripe_size != 0) &&
+	    (lsm->lsm_stripe_size != lum->lmm_stripe_size))
+		return 3;
+	if ((lsm->lsm_pattern != 0) && (lum->lmm_pattern != 0) &&
+	    (lsm->lsm_pattern != lum->lmm_pattern))
+		return 4;
+	if ((lsm->lsm_magic == LOV_MAGIC_V3) &&
+	    (strncmp(lsm->lsm_pool_name,
+		     ((struct lov_user_md_v3 *)lum)->lmm_pool_name,
+		     LOV_MAXPOOLNAME) != 0))
+		return 5;
+	return 0;
+}
+
+static inline int lov_lum_swab_if_needed(struct lov_user_md_v3 *lumv3,
+					 int *lmm_magic,
+					 struct lov_user_md *lum)
+{
+	if (lum && copy_from_user(lumv3, lum,sizeof(struct lov_user_md_v1)))
+		return -EFAULT;
+
+	*lmm_magic = lumv3->lmm_magic;
+
+	if (*lmm_magic == __swab32(LOV_USER_MAGIC_V1)) {
+		lustre_swab_lov_user_md_v1((struct lov_user_md_v1 *)lumv3);
+		*lmm_magic = LOV_USER_MAGIC_V1;
+	} else if (*lmm_magic == LOV_USER_MAGIC_V3) {
+		if (lum && copy_from_user(lumv3, lum, sizeof(*lumv3)))
+			return -EFAULT;
+	} else if (*lmm_magic == __swab32(LOV_USER_MAGIC_V3)) {
+		if (lum && copy_from_user(lumv3, lum, sizeof(*lumv3)))
+			return -EFAULT;
+		lustre_swab_lov_user_md_v3(lumv3);
+		*lmm_magic = LOV_USER_MAGIC_V3;
+	} else if (*lmm_magic != LOV_USER_MAGIC_V1) {
+		CDEBUG(D_IOCTL,
+		       "bad userland LOV MAGIC: %#08x != %#08x nor %#08x\n",
+		       *lmm_magic, LOV_USER_MAGIC_V1, LOV_USER_MAGIC_V3);
+		       return -EINVAL;
+	}
+	return 0;
+}
+
+void lov_stripe_lock(struct lov_stripe_md *md);
+void lov_stripe_unlock(struct lov_stripe_md *md);
+
+struct obd_type {
+	struct list_head typ_chain;
+	struct obd_ops *typ_dt_ops;
+	struct md_ops *typ_md_ops;
+	proc_dir_entry_t *typ_procroot;
+	char *typ_name;
+	int  typ_refcnt;
+	struct lu_device_type *typ_lu;
+	spinlock_t obd_type_lock;
+};
+
+struct brw_page {
+	obd_off  off;
+	struct page *pg;
+	int count;
+	obd_flag flag;
+};
+
+/* Individual type definitions */
+
+struct ost_server_data;
+
+struct osd_properties {
+	size_t osd_max_ea_size;
+};
+
+#define OBT_MAGIC       0xBDDECEAE
+/* hold common fields for "target" device */
+struct obd_device_target {
+	__u32		     obt_magic;
+	__u32		     obt_instance;
+	struct super_block       *obt_sb;
+	/** last_rcvd file */
+	struct file	      *obt_rcvd_filp;
+	__u64		     obt_mount_count;
+	struct rw_semaphore	  obt_rwsem;
+	struct vfsmount	  *obt_vfsmnt;
+	struct file	      *obt_health_check_filp;
+	struct osd_properties     obt_osd_properties;
+	struct obd_job_stats      obt_jobstats;
+};
+
+/* llog contexts */
+enum llog_ctxt_id {
+	LLOG_CONFIG_ORIG_CTXT  =  0,
+	LLOG_CONFIG_REPL_CTXT,
+	LLOG_MDS_OST_ORIG_CTXT,
+	LLOG_MDS_OST_REPL_CTXT,
+	LLOG_SIZE_ORIG_CTXT,
+	LLOG_SIZE_REPL_CTXT,
+	LLOG_RD1_ORIG_CTXT,
+	LLOG_RD1_REPL_CTXT,
+	LLOG_TEST_ORIG_CTXT,
+	LLOG_TEST_REPL_CTXT,
+	LLOG_LOVEA_ORIG_CTXT,
+	LLOG_LOVEA_REPL_CTXT,
+	LLOG_CHANGELOG_ORIG_CTXT,      /**< changelog generation on mdd */
+	LLOG_CHANGELOG_REPL_CTXT,      /**< changelog access on clients */
+	LLOG_CHANGELOG_USER_ORIG_CTXT, /**< for multiple changelog consumers */
+	LLOG_MAX_CTXTS
+};
+
+#define FILTER_SUBDIR_COUNT      32	    /* set to zero for no subdirs */
+
+struct filter_subdirs {
+	struct dentry *dentry[FILTER_SUBDIR_COUNT];
+};
+
+
+struct filter_ext {
+	__u64		fe_start;
+	__u64		fe_end;
+};
+
+struct filter_obd {
+	/* NB this field MUST be first */
+	struct obd_device_target fo_obt;
+	const char		*fo_fstype;
+
+	int			fo_group_count;
+	struct dentry		*fo_dentry_O;
+	struct dentry		**fo_dentry_O_groups;
+	struct filter_subdirs	*fo_dentry_O_sub;
+	struct mutex		fo_init_lock;	/* group initialization lock*/
+	int			fo_committed_group;
+
+	spinlock_t		fo_objidlock;	/* protect fo_lastobjid */
+
+	unsigned long		fo_destroys_in_progress;
+	struct mutex		fo_create_locks[FILTER_SUBDIR_COUNT];
+
+	struct list_head fo_export_list;
+	int		  fo_subdir_count;
+
+	obd_size	     fo_tot_dirty;      /* protected by obd_osfs_lock */
+	obd_size	     fo_tot_granted;    /* all values in bytes */
+	obd_size	     fo_tot_pending;
+	int		  fo_tot_granted_clients;
+
+	obd_size	     fo_readcache_max_filesize;
+	spinlock_t		fo_flags_lock;
+	unsigned int	 fo_read_cache:1,   /**< enable read-only cache */
+			     fo_writethrough_cache:1,/**< read cache writes */
+			     fo_mds_ost_sync:1, /**< MDS-OST orphan recovery*/
+			     fo_raid_degraded:1;/**< RAID device degraded */
+
+	struct obd_import   *fo_mdc_imp;
+	struct obd_uuid      fo_mdc_uuid;
+	struct lustre_handle fo_mdc_conn;
+	struct file	**fo_last_objid_files;
+	__u64	       *fo_last_objids; /* last created objid for groups,
+					      * protected by fo_objidlock */
+
+	struct mutex		fo_alloc_lock;
+
+	atomic_t	 fo_r_in_flight;
+	atomic_t	 fo_w_in_flight;
+
+	/*
+	 * per-filter pool of kiobuf's allocated by filter_common_setup() and
+	 * torn down by filter_cleanup().
+	 *
+	 * This pool contains kiobuf used by
+	 * filter_{prep,commit}rw_{read,write}() and is shared by all OST
+	 * threads.
+	 *
+	 * Locking: protected by internal lock of cfs_hash, pool can be
+	 * found from this hash table by t_id of ptlrpc_thread.
+	 */
+	struct cfs_hash		*fo_iobuf_hash;
+
+	struct brw_stats	 fo_filter_stats;
+
+	int		      fo_fmd_max_num; /* per exp filter_mod_data */
+	int		      fo_fmd_max_age; /* jiffies to fmd expiry */
+	unsigned long	    fo_syncjournal:1, /* sync journal on writes */
+				 fo_sync_lock_cancel:2;/* sync on lock cancel */
+
+
+	/* sptlrpc stuff */
+	rwlock_t		fo_sptlrpc_lock;
+	struct sptlrpc_rule_set  fo_sptlrpc_rset;
+
+	/* capability related */
+	unsigned int	     fo_fl_oss_capa;
+	struct list_head	       fo_capa_keys;
+	struct hlist_head	*fo_capa_hash;
+	int		      fo_sec_level;
+};
+
+struct timeout_item {
+	enum timeout_event ti_event;
+	cfs_time_t	 ti_timeout;
+	timeout_cb_t       ti_cb;
+	void	      *ti_cb_data;
+	struct list_head	 ti_obd_list;
+	struct list_head	 ti_chain;
+};
+
+#define OSC_MAX_RIF_DEFAULT       8
+#define MDS_OSC_MAX_RIF_DEFAULT   50
+#define OSC_MAX_RIF_MAX	 256
+#define OSC_MAX_DIRTY_DEFAULT  (OSC_MAX_RIF_DEFAULT * 4)
+#define OSC_MAX_DIRTY_MB_MAX   2048     /* arbitrary, but < MAX_LONG bytes */
+#define OSC_DEFAULT_RESENDS      10
+
+/* possible values for fo_sync_lock_cancel */
+enum {
+	NEVER_SYNC_ON_CANCEL = 0,
+	BLOCKING_SYNC_ON_CANCEL = 1,
+	ALWAYS_SYNC_ON_CANCEL = 2,
+	NUM_SYNC_ON_CANCEL_STATES
+};
+
+#define MDC_MAX_RIF_DEFAULT       8
+#define MDC_MAX_RIF_MAX	 512
+
+struct mdc_rpc_lock;
+struct obd_import;
+struct client_obd {
+	struct rw_semaphore  cl_sem;
+	struct obd_uuid	  cl_target_uuid;
+	struct obd_import       *cl_import; /* ptlrpc connection state */
+	int		      cl_conn_count;
+	/* max_mds_easize is purely a performance thing so we don't have to
+	 * call obd_size_diskmd() all the time. */
+	int		      cl_default_mds_easize;
+	int		      cl_max_mds_easize;
+	int		      cl_max_mds_cookiesize;
+
+	enum lustre_sec_part     cl_sp_me;
+	enum lustre_sec_part     cl_sp_to;
+	struct sptlrpc_flavor    cl_flvr_mgc;   /* fixed flavor of mgc->mgs */
+
+	/* the grant values are protected by loi_list_lock below */
+	long		     cl_dirty;	 /* all _dirty_ in bytes */
+	long		     cl_dirty_max;     /* allowed w/o rpc */
+	long		     cl_dirty_transit; /* dirty synchronous */
+	long		     cl_avail_grant;   /* bytes of credit for ost */
+	long		     cl_lost_grant;    /* lost credits (trunc) */
+
+	/* since we allocate grant by blocks, we don't know how many grant will
+	 * be used to add a page into cache. As a solution, we reserve maximum
+	 * grant before trying to dirty a page and unreserve the rest.
+	 * See osc_{reserve|unreserve}_grant for details. */
+	long		 cl_reserved_grant;
+	struct list_head	   cl_cache_waiters; /* waiting for cache/grant */
+	cfs_time_t	   cl_next_shrink_grant;   /* jiffies */
+	struct list_head	   cl_grant_shrink_list;  /* Timeout event list */
+	int		  cl_grant_shrink_interval; /* seconds */
+
+	/* A chunk is an optimal size used by osc_extent to determine
+	 * the extent size. A chunk is max(PAGE_CACHE_SIZE, OST block size) */
+	int		  cl_chunkbits;
+	int		  cl_chunk;
+	int		  cl_extent_tax; /* extent overhead, by bytes */
+
+	/* keep track of objects that have lois that contain pages which
+	 * have been queued for async brw.  this lock also protects the
+	 * lists of osc_client_pages that hang off of the loi */
+	/*
+	 * ->cl_loi_list_lock protects consistency of
+	 * ->cl_loi_{ready,read,write}_list. ->ap_make_ready() and
+	 * ->ap_completion() call-backs are executed under this lock. As we
+	 * cannot guarantee that these call-backs never block on all platforms
+	 * (as a matter of fact they do block on Mac OS X), type of
+	 * ->cl_loi_list_lock is platform dependent: it's a spin-lock on Linux
+	 * and blocking mutex on Mac OS X. (Alternative is to make this lock
+	 * blocking everywhere, but we don't want to slow down fast-path of
+	 * our main platform.)
+	 *
+	 * Exact type of ->cl_loi_list_lock is defined in arch/obd.h together
+	 * with client_obd_list_{un,}lock() and
+	 * client_obd_list_lock_{init,done}() functions.
+	 *
+	 * NB by Jinshan: though field names are still _loi_, but actually
+	 * osc_object{}s are in the list.
+	 */
+	client_obd_lock_t	cl_loi_list_lock;
+	struct list_head	       cl_loi_ready_list;
+	struct list_head	       cl_loi_hp_ready_list;
+	struct list_head	       cl_loi_write_list;
+	struct list_head	       cl_loi_read_list;
+	int		      cl_r_in_flight;
+	int		      cl_w_in_flight;
+	/* just a sum of the loi/lop pending numbers to be exported by /proc */
+	atomic_t	     cl_pending_w_pages;
+	atomic_t	     cl_pending_r_pages;
+	__u32			 cl_max_pages_per_rpc;
+	int		      cl_max_rpcs_in_flight;
+	struct obd_histogram     cl_read_rpc_hist;
+	struct obd_histogram     cl_write_rpc_hist;
+	struct obd_histogram     cl_read_page_hist;
+	struct obd_histogram     cl_write_page_hist;
+	struct obd_histogram     cl_read_offset_hist;
+	struct obd_histogram     cl_write_offset_hist;
+
+	/* lru for osc caching pages */
+	struct cl_client_cache	*cl_cache;
+	struct list_head		 cl_lru_osc; /* member of cl_cache->ccc_lru */
+	atomic_t		*cl_lru_left;
+	atomic_t		 cl_lru_busy;
+	atomic_t		 cl_lru_shrinkers;
+	atomic_t		 cl_lru_in_list;
+	struct list_head		 cl_lru_list; /* lru page list */
+	client_obd_lock_t	 cl_lru_list_lock; /* page list protector */
+
+	/* number of in flight destroy rpcs is limited to max_rpcs_in_flight */
+	atomic_t	     cl_destroy_in_flight;
+	wait_queue_head_t	      cl_destroy_waitq;
+
+	struct mdc_rpc_lock     *cl_rpc_lock;
+	struct mdc_rpc_lock     *cl_close_lock;
+
+	/* mgc datastruct */
+	struct semaphore	 cl_mgc_sem;
+	struct vfsmount	 *cl_mgc_vfsmnt;
+	struct dentry	   *cl_mgc_configs_dir;
+	atomic_t	     cl_mgc_refcount;
+	struct obd_export       *cl_mgc_mgsexp;
+
+	/* checksumming for data sent over the network */
+	unsigned int	     cl_checksum:1; /* 0 = disabled, 1 = enabled */
+	/* supported checksum types that are worked out at connect time */
+	__u32		    cl_supp_cksum_types;
+	/* checksum algorithm to be used */
+	cksum_type_t	     cl_cksum_type;
+
+	/* also protected by the poorly named _loi_list_lock lock above */
+	struct osc_async_rc      cl_ar;
+
+	/* used by quotacheck when the servers are older than 2.4 */
+	int		      cl_qchk_stat; /* quotacheck stat of the peer */
+#define CL_NOT_QUOTACHECKED 1   /* client->cl_qchk_stat init value */
+#if LUSTRE_VERSION_CODE >= OBD_OCD_VERSION(2, 7, 50, 0)
+#warning "please consider removing quotacheck compatibility code"
+#endif
+
+	/* sequence manager */
+	struct lu_client_seq    *cl_seq;
+
+	atomic_t	     cl_resends; /* resend count */
+
+	/* ptlrpc work for writeback in ptlrpcd context */
+	void		    *cl_writeback_work;
+	/* hash tables for osc_quota_info */
+	cfs_hash_t	      *cl_quota_hash[MAXQUOTAS];
+};
+#define obd2cli_tgt(obd) ((char *)(obd)->u.cli.cl_target_uuid.uuid)
+
+struct obd_id_info {
+	__u32   idx;
+	obd_id  *data;
+};
+
+/* */
+
+struct echo_obd {
+	struct obd_device_target eo_obt;
+	struct obdo		eo_oa;
+	spinlock_t		 eo_lock;
+	__u64			 eo_lastino;
+	struct lustre_handle	eo_nl_lock;
+	atomic_t		eo_prep;
+};
+
+struct ost_obd {
+	struct ptlrpc_service	*ost_service;
+	struct ptlrpc_service	*ost_create_service;
+	struct ptlrpc_service	*ost_io_service;
+	struct ptlrpc_service	*ost_seq_service;
+	struct mutex		ost_health_mutex;
+};
+
+struct echo_client_obd {
+	struct obd_export	*ec_exp;   /* the local connection to osc/lov */
+	spinlock_t		ec_lock;
+	struct list_head	   ec_objects;
+	struct list_head	   ec_locks;
+	int		  ec_nstripes;
+	__u64		ec_unique;
+};
+
+struct lov_qos_oss {
+	struct obd_uuid     lqo_uuid;       /* ptlrpc's c_remote_uuid */
+	struct list_head	  lqo_oss_list;   /* link to lov_qos */
+	__u64	       lqo_bavail;     /* total bytes avail on OSS */
+	__u64	       lqo_penalty;    /* current penalty */
+	__u64	       lqo_penalty_per_obj;/* penalty decrease every obj*/
+	time_t	      lqo_used;       /* last used time, seconds */
+	__u32	       lqo_ost_count;  /* number of osts on this oss */
+};
+
+struct ltd_qos {
+	struct lov_qos_oss *ltq_oss;	 /* oss info */
+	__u64	       ltq_penalty;     /* current penalty */
+	__u64	       ltq_penalty_per_obj; /* penalty decrease every obj*/
+	__u64	       ltq_weight;      /* net weighting */
+	time_t	      ltq_used;	/* last used time, seconds */
+	unsigned int	ltq_usable:1;    /* usable for striping */
+};
+
+/* Generic subset of OSTs */
+struct ost_pool {
+	__u32	      *op_array;      /* array of index of
+						   lov_obd->lov_tgts */
+	unsigned int	op_count;      /* number of OSTs in the array */
+	unsigned int	op_size;       /* allocated size of lp_array */
+	struct rw_semaphore op_rw_sem;     /* to protect ost_pool use */
+};
+
+/* Round-robin allocator data */
+struct lov_qos_rr {
+	__u32	       lqr_start_idx;   /* start index of new inode */
+	__u32	       lqr_offset_idx;  /* aliasing for start_idx  */
+	int		 lqr_start_count; /* reseed counter */
+	struct ost_pool     lqr_pool;	/* round-robin optimized list */
+	unsigned long       lqr_dirty:1;     /* recalc round-robin list */
+};
+
+/* allow statfs data caching for 1 second */
+#define OBD_STATFS_CACHE_SECONDS 1
+
+struct lov_statfs_data {
+	struct obd_info   lsd_oi;
+	struct obd_statfs lsd_statfs;
+};
+/* Stripe placement optimization */
+struct lov_qos {
+	struct list_head	  lq_oss_list; /* list of OSSs that targets use */
+	struct rw_semaphore lq_rw_sem;
+	__u32	       lq_active_oss_count;
+	unsigned int	lq_prio_free;   /* priority for free space */
+	unsigned int	lq_threshold_rr;/* priority for rr */
+	struct lov_qos_rr   lq_rr;	  /* round robin qos data */
+	unsigned long       lq_dirty:1,     /* recalc qos data */
+			    lq_same_space:1,/* the ost's all have approx.
+					       the same space avail */
+			    lq_reset:1,     /* zero current penalties */
+			    lq_statfs_in_progress:1; /* statfs op in
+							progress */
+	/* qos statfs data */
+	struct lov_statfs_data *lq_statfs_data;
+	wait_queue_head_t	 lq_statfs_waitq; /* waitqueue to notify statfs
+					      * requests completion */
+};
+
+struct lov_tgt_desc {
+	struct list_head	  ltd_kill;
+	struct obd_uuid     ltd_uuid;
+	struct obd_device  *ltd_obd;
+	struct obd_export  *ltd_exp;
+	struct ltd_qos      ltd_qos;     /* qos info per target */
+	__u32	       ltd_gen;
+	__u32	       ltd_index;   /* index in lov_obd->tgts */
+	unsigned long       ltd_active:1,/* is this target up for requests */
+			    ltd_activate:1,/* should  target be activated */
+			    ltd_reap:1;  /* should this target be deleted */
+};
+
+/* Pool metadata */
+#define pool_tgt_size(_p)   _p->pool_obds.op_size
+#define pool_tgt_count(_p)  _p->pool_obds.op_count
+#define pool_tgt_array(_p)  _p->pool_obds.op_array
+#define pool_tgt_rw_sem(_p) _p->pool_obds.op_rw_sem
+
+struct pool_desc {
+	char		  pool_name[LOV_MAXPOOLNAME + 1]; /* name of pool */
+	struct ost_pool       pool_obds;	      /* pool members */
+	atomic_t	  pool_refcount;	  /* pool ref. counter */
+	struct lov_qos_rr     pool_rr;		/* round robin qos */
+	struct hlist_node      pool_hash;	      /* access by poolname */
+	struct list_head	    pool_list;	      /* serial access */
+	proc_dir_entry_t *pool_proc_entry;	/* file in /proc */
+	struct obd_device    *pool_lobd;	      /* obd of the lov/lod to which
+						       * this pool belongs */
+};
+
+struct lov_obd {
+	struct lov_desc	 desc;
+	struct lov_tgt_desc   **lov_tgts;	      /* sparse array */
+	struct ost_pool	 lov_packed;	    /* all OSTs in a packed
+							  array */
+	struct mutex		lov_lock;
+	struct obd_connect_data lov_ocd;
+	atomic_t	    lov_refcount;
+	__u32		   lov_tgt_count;	 /* how many OBD's */
+	__u32		   lov_active_tgt_count;  /* how many active */
+	__u32		   lov_death_row;/* tgts scheduled to be deleted */
+	__u32		   lov_tgt_size;   /* size of tgts array */
+	int		     lov_connects;
+	int		     lov_pool_count;
+	cfs_hash_t	     *lov_pools_hash_body; /* used for key access */
+	struct list_head	      lov_pool_list; /* used for sequential access */
+	proc_dir_entry_t   *lov_pool_proc_entry;
+	enum lustre_sec_part    lov_sp_me;
+
+	/* Cached LRU pages from upper layer */
+	void		       *lov_cache;
+
+	struct rw_semaphore     lov_notify_lock;
+};
+
+struct lmv_tgt_desc {
+	struct obd_uuid		ltd_uuid;
+	struct obd_export	*ltd_exp;
+	int			ltd_idx;
+	struct mutex		ltd_fid_mutex;
+	unsigned long		ltd_active:1; /* target up for requests */
+};
+
+enum placement_policy {
+	PLACEMENT_CHAR_POLICY   = 0,
+	PLACEMENT_NID_POLICY    = 1,
+	PLACEMENT_INVAL_POLICY  = 2,
+	PLACEMENT_MAX_POLICY
+};
+
+typedef enum placement_policy placement_policy_t;
+
+struct lmv_obd {
+	int			refcount;
+	struct lu_client_fld	lmv_fld;
+	spinlock_t		lmv_lock;
+	placement_policy_t	lmv_placement;
+	struct lmv_desc		desc;
+	struct obd_uuid		cluuid;
+	struct obd_export	*exp;
+
+	struct mutex		init_mutex;
+	int			connected;
+	int			max_easize;
+	int			max_def_easize;
+	int			max_cookiesize;
+	int			server_timeout;
+
+	int			tgts_size; /* size of tgts array */
+	struct lmv_tgt_desc	**tgts;
+
+	struct obd_connect_data	conn_data;
+};
+
+struct niobuf_local {
+	__u64		lnb_file_offset;
+	__u32		lnb_page_offset;
+	__u32		len;
+	__u32		flags;
+	struct page	*page;
+	struct dentry	*dentry;
+	int		lnb_grant_used;
+	int		rc;
+};
+
+#define LUSTRE_FLD_NAME	 "fld"
+#define LUSTRE_SEQ_NAME	 "seq"
+
+#define LUSTRE_MDD_NAME	 "mdd"
+#define LUSTRE_OSD_LDISKFS_NAME	"osd-ldiskfs"
+#define LUSTRE_OSD_ZFS_NAME     "osd-zfs"
+#define LUSTRE_VVP_NAME	 "vvp"
+#define LUSTRE_LMV_NAME	 "lmv"
+#define LUSTRE_SLP_NAME	 "slp"
+#define LUSTRE_LOD_NAME		"lod"
+#define LUSTRE_OSP_NAME		"osp"
+#define LUSTRE_LWP_NAME		"lwp"
+
+/* obd device type names */
+ /* FIXME all the references to LUSTRE_MDS_NAME should be swapped with LUSTRE_MDT_NAME */
+#define LUSTRE_MDS_NAME	 "mds"
+#define LUSTRE_MDT_NAME	 "mdt"
+#define LUSTRE_MDC_NAME	 "mdc"
+#define LUSTRE_OSS_NAME	 "ost"       /* FIXME change name to oss */
+#define LUSTRE_OST_NAME	 "obdfilter" /* FIXME change name to ost */
+#define LUSTRE_OSC_NAME	 "osc"
+#define LUSTRE_LOV_NAME	 "lov"
+#define LUSTRE_MGS_NAME	 "mgs"
+#define LUSTRE_MGC_NAME	 "mgc"
+
+#define LUSTRE_ECHO_NAME	"obdecho"
+#define LUSTRE_ECHO_CLIENT_NAME "echo_client"
+#define LUSTRE_QMT_NAME	 "qmt"
+
+/* Constant obd names (post-rename) */
+#define LUSTRE_MDS_OBDNAME "MDS"
+#define LUSTRE_OSS_OBDNAME "OSS"
+#define LUSTRE_MGS_OBDNAME "MGS"
+#define LUSTRE_MGC_OBDNAME "MGC"
+
+static inline int is_osp_on_mdt(char *name)
+{
+	char   *ptr;
+
+	ptr = strrchr(name, '-');
+	if (ptr == NULL) {
+		CERROR("%s is not a obdname\n", name);
+		return 0;
+	}
+
+	/* 1.8 OSC/OSP name on MDT is fsname-OSTxxxx-osc */
+	if (strncmp(ptr + 1, "osc", 3) == 0)
+		return 1;
+
+	if (strncmp(ptr + 1, "MDT", 3) != 0)
+		return 0;
+
+	while (*(--ptr) != '-' && ptr != name);
+
+	if (ptr == name)
+		return 0;
+
+	if (strncmp(ptr + 1, LUSTRE_OSP_NAME, strlen(LUSTRE_OSP_NAME)) != 0 &&
+	    strncmp(ptr + 1, LUSTRE_OSC_NAME, strlen(LUSTRE_OSC_NAME)) != 0)
+		return 0;
+
+	return 1;
+}
+
+/* Don't conflict with on-wire flags OBD_BRW_WRITE, etc */
+#define N_LOCAL_TEMP_PAGE 0x10000000
+
+struct obd_trans_info {
+	__u64		    oti_transno;
+	__u64		    oti_xid;
+	/* Only used on the server side for tracking acks. */
+	struct oti_req_ack_lock {
+		struct lustre_handle lock;
+		__u32		mode;
+	}			oti_ack_locks[4];
+	void		    *oti_handle;
+	struct llog_cookie       oti_onecookie;
+	struct llog_cookie      *oti_logcookies;
+	int		      oti_numcookies;
+	/** synchronous write is needed */
+	unsigned long		 oti_sync_write:1;
+
+	/* initial thread handling transaction */
+	struct ptlrpc_thread *   oti_thread;
+	__u32		    oti_conn_cnt;
+	/** VBR: versions */
+	__u64		    oti_pre_version;
+	/** JobID */
+	char		    *oti_jobid;
+
+	struct obd_uuid	 *oti_ost_uuid;
+};
+
+static inline void oti_init(struct obd_trans_info *oti,
+			    struct ptlrpc_request *req)
+{
+	if (oti == NULL)
+		return;
+	memset(oti, 0, sizeof(*oti));
+
+	if (req == NULL)
+		return;
+
+	oti->oti_xid = req->rq_xid;
+	/** VBR: take versions from request */
+	if (req->rq_reqmsg != NULL &&
+	    lustre_msg_get_flags(req->rq_reqmsg) & MSG_REPLAY) {
+		__u64 *pre_version = lustre_msg_get_versions(req->rq_reqmsg);
+		oti->oti_pre_version = pre_version ? pre_version[0] : 0;
+		oti->oti_transno = lustre_msg_get_transno(req->rq_reqmsg);
+	}
+
+	/** called from mds_create_objects */
+	if (req->rq_repmsg != NULL)
+		oti->oti_transno = lustre_msg_get_transno(req->rq_repmsg);
+	oti->oti_thread = req->rq_svc_thread;
+	if (req->rq_reqmsg != NULL)
+		oti->oti_conn_cnt = lustre_msg_get_conn_cnt(req->rq_reqmsg);
+}
+
+static inline void oti_alloc_cookies(struct obd_trans_info *oti,int num_cookies)
+{
+	if (!oti)
+		return;
+
+	if (num_cookies == 1)
+		oti->oti_logcookies = &oti->oti_onecookie;
+	else
+		OBD_ALLOC_LARGE(oti->oti_logcookies,
+				num_cookies * sizeof(oti->oti_onecookie));
+
+	oti->oti_numcookies = num_cookies;
+}
+
+static inline void oti_free_cookies(struct obd_trans_info *oti)
+{
+	if (!oti || !oti->oti_logcookies)
+		return;
+
+	if (oti->oti_logcookies == &oti->oti_onecookie)
+		LASSERT(oti->oti_numcookies == 1);
+	else
+		OBD_FREE_LARGE(oti->oti_logcookies,
+			       oti->oti_numcookies*sizeof(oti->oti_onecookie));
+	oti->oti_logcookies = NULL;
+	oti->oti_numcookies = 0;
+}
+
+/*
+ * Events signalled through obd_notify() upcall-chain.
+ */
+enum obd_notify_event {
+	/* target added */
+	OBD_NOTIFY_CREATE,
+	/* Device connect start */
+	OBD_NOTIFY_CONNECT,
+	/* Device activated */
+	OBD_NOTIFY_ACTIVE,
+	/* Device deactivated */
+	OBD_NOTIFY_INACTIVE,
+	/* Device disconnected */
+	OBD_NOTIFY_DISCON,
+	/* Connect data for import were changed */
+	OBD_NOTIFY_OCD,
+	/* Sync request */
+	OBD_NOTIFY_SYNC_NONBLOCK,
+	OBD_NOTIFY_SYNC,
+	/* Configuration event */
+	OBD_NOTIFY_CONFIG,
+	/* Administratively deactivate/activate event */
+	OBD_NOTIFY_DEACTIVATE,
+	OBD_NOTIFY_ACTIVATE
+};
+
+/*
+ * Data structure used to pass obd_notify()-event to non-obd listeners (llite
+ * and liblustre being main examples).
+ */
+struct obd_notify_upcall {
+	int (*onu_upcall)(struct obd_device *host, struct obd_device *watched,
+			  enum obd_notify_event ev, void *owner, void *data);
+	/* Opaque datum supplied by upper layer listener */
+	void *onu_owner;
+};
+
+struct target_recovery_data {
+	svc_handler_t		trd_recovery_handler;
+	pid_t			trd_processing_task;
+	struct completion	trd_starting;
+	struct completion	trd_finishing;
+};
+
+struct obd_llog_group {
+	int		olg_seq;
+	struct llog_ctxt  *olg_ctxts[LLOG_MAX_CTXTS];
+	wait_queue_head_t	olg_waitq;
+	spinlock_t	   olg_lock;
+	struct mutex	   olg_cat_processing;
+};
+
+/* corresponds to one of the obd's */
+#define OBD_DEVICE_MAGIC	0XAB5CD6EF
+#define OBD_DEV_BY_DEVNAME      0xffffd0de
+
+struct obd_device {
+	struct obd_type	*obd_type;
+	__u32		   obd_magic;
+
+	/* common and UUID name of this device */
+	char		    obd_name[MAX_OBD_NAME];
+	struct obd_uuid	 obd_uuid;
+
+	struct lu_device       *obd_lu_dev;
+
+	int		     obd_minor;
+	/* bitfield modification is protected by obd_dev_lock */
+	unsigned long obd_attached:1,      /* finished attach */
+		      obd_set_up:1,	/* finished setup */
+		      obd_recovering:1,    /* there are recoverable clients */
+		      obd_abort_recovery:1,/* recovery expired */
+		      obd_version_recov:1, /* obd uses version checking */
+		      obd_replayable:1,    /* recovery is enabled; inform clients */
+		      obd_no_transno:1,    /* no committed-transno notification */
+		      obd_no_recov:1,      /* fail instead of retry messages */
+		      obd_stopping:1,      /* started cleanup */
+		      obd_starting:1,      /* started setup */
+		      obd_force:1,	 /* cleanup with > 0 obd refcount */
+		      obd_fail:1,	  /* cleanup with failover */
+		      obd_async_recov:1,   /* allow asynchronous orphan cleanup */
+		      obd_no_conn:1,       /* deny new connections */
+		      obd_inactive:1,      /* device active/inactive
+					   * (for /proc/status only!!) */
+		      obd_no_ir:1,	 /* no imperative recovery. */
+		      obd_process_conf:1;  /* device is processing mgs config */
+	/* use separate field as it is set in interrupt to don't mess with
+	 * protection of other bits using _bh lock */
+	unsigned long obd_recovery_expired:1;
+	/* uuid-export hash body */
+	cfs_hash_t	     *obd_uuid_hash;
+	/* nid-export hash body */
+	cfs_hash_t	     *obd_nid_hash;
+	/* nid stats body */
+	cfs_hash_t	     *obd_nid_stats_hash;
+	struct list_head	      obd_nid_stats;
+	atomic_t	    obd_refcount;
+	wait_queue_head_t	     obd_refcount_waitq;
+	struct list_head	      obd_exports;
+	struct list_head	      obd_unlinked_exports;
+	struct list_head	      obd_delayed_exports;
+	int		     obd_num_exports;
+	spinlock_t		obd_nid_lock;
+	struct ldlm_namespace  *obd_namespace;
+	struct ptlrpc_client	obd_ldlm_client; /* XXX OST/MDS only */
+	/* a spinlock is OK for what we do now, may need a semaphore later */
+	spinlock_t		obd_dev_lock; /* protect OBD bitfield above */
+	struct mutex		obd_dev_mutex;
+	__u64			obd_last_committed;
+	struct fsfilt_operations *obd_fsops;
+	spinlock_t		obd_osfs_lock;
+	struct obd_statfs	obd_osfs;       /* locked by obd_osfs_lock */
+	__u64			obd_osfs_age;
+	struct lvfs_run_ctxt	obd_lvfs_ctxt;
+	struct obd_llog_group	obd_olg;	/* default llog group */
+	struct obd_device	*obd_observer;
+	struct rw_semaphore	obd_observer_link_sem;
+	struct obd_notify_upcall obd_upcall;
+	struct obd_export       *obd_self_export;
+	/* list of exports in LRU order, for ping evictor, with obd_dev_lock */
+	struct list_head	      obd_exports_timed;
+	time_t		  obd_eviction_timer; /* for ping evictor */
+
+	int			      obd_max_recoverable_clients;
+	atomic_t		     obd_connected_clients;
+	int			      obd_stale_clients;
+	int			      obd_delayed_clients;
+	/* this lock protects all recovery list_heads, timer and
+	 * obd_next_recovery_transno value */
+	spinlock_t			 obd_recovery_task_lock;
+	__u64			    obd_next_recovery_transno;
+	int			      obd_replayed_requests;
+	int			      obd_requests_queued_for_recovery;
+	wait_queue_head_t		      obd_next_transno_waitq;
+	/* protected by obd_recovery_task_lock */
+	timer_list_t		      obd_recovery_timer;
+	time_t			   obd_recovery_start; /* seconds */
+	time_t			   obd_recovery_end; /* seconds, for lprocfs_status */
+	int			      obd_recovery_time_hard;
+	int			      obd_recovery_timeout;
+	int			      obd_recovery_ir_factor;
+
+	/* new recovery stuff from CMD2 */
+	struct target_recovery_data      obd_recovery_data;
+	int			      obd_replayed_locks;
+	atomic_t		     obd_req_replay_clients;
+	atomic_t		     obd_lock_replay_clients;
+	/* all lists are protected by obd_recovery_task_lock */
+	struct list_head		       obd_req_replay_queue;
+	struct list_head		       obd_lock_replay_queue;
+	struct list_head		       obd_final_req_queue;
+	int			      obd_recovery_stage;
+
+	union {
+		struct obd_device_target obt;
+		struct filter_obd filter;
+		struct client_obd cli;
+		struct ost_obd ost;
+		struct echo_client_obd echo_client;
+		struct echo_obd echo;
+		struct lov_obd lov;
+		struct lmv_obd lmv;
+	} u;
+	/* Fields used by LProcFS */
+	unsigned int	   obd_cntr_base;
+	struct lprocfs_stats  *obd_stats;
+
+	unsigned int	   md_cntr_base;
+	struct lprocfs_stats  *md_stats;
+
+	proc_dir_entry_t  *obd_proc_entry;
+	void		  *obd_proc_private; /* type private PDEs */
+	proc_dir_entry_t  *obd_proc_exports_entry;
+	proc_dir_entry_t  *obd_svc_procroot;
+	struct lprocfs_stats  *obd_svc_stats;
+	atomic_t	   obd_evict_inprogress;
+	wait_queue_head_t	    obd_evict_inprogress_waitq;
+	struct list_head	     obd_evict_list; /* protected with pet_lock */
+
+	/**
+	 * Ldlm pool part. Save last calculated SLV and Limit.
+	 */
+	rwlock_t		obd_pool_lock;
+	int		    obd_pool_limit;
+	__u64		  obd_pool_slv;
+
+	/**
+	 * A list of outstanding class_incref()'s against this obd. For
+	 * debugging.
+	 */
+	struct lu_ref	  obd_reference;
+
+	int		       obd_conn_inprogress;
+};
+
+#define OBD_LLOG_FL_SENDNOW     0x0001
+#define OBD_LLOG_FL_EXIT	0x0002
+
+enum obd_cleanup_stage {
+/* Special case hack for MDS LOVs */
+	OBD_CLEANUP_EARLY,
+/* can be directly mapped to .ldto_device_fini() */
+	OBD_CLEANUP_EXPORTS,
+};
+
+/* get/set_info keys */
+#define KEY_ASYNC	       "async"
+#define KEY_BLOCKSIZE_BITS      "blocksize_bits"
+#define KEY_BLOCKSIZE	   "blocksize"
+#define KEY_CAPA_KEY	    "capa_key"
+#define KEY_CHANGELOG_CLEAR     "changelog_clear"
+#define KEY_FID2PATH	    "fid2path"
+#define KEY_CHECKSUM	    "checksum"
+#define KEY_CLEAR_FS	    "clear_fs"
+#define KEY_CONN_DATA	   "conn_data"
+#define KEY_EVICT_BY_NID	"evict_by_nid"
+#define KEY_FIEMAP	      "fiemap"
+#define KEY_FLUSH_CTX	   "flush_ctx"
+#define KEY_GRANT_SHRINK	"grant_shrink"
+#define KEY_HSM_COPYTOOL_SEND   "hsm_send"
+#define KEY_INIT_RECOV_BACKUP   "init_recov_bk"
+#define KEY_INIT_RECOV	  "initial_recov"
+#define KEY_INTERMDS	    "inter_mds"
+#define KEY_LAST_ID	     "last_id"
+#define KEY_LAST_FID		"last_fid"
+#define KEY_LOCK_TO_STRIPE      "lock_to_stripe"
+#define KEY_LOVDESC	     "lovdesc"
+#define KEY_LOV_IDX	     "lov_idx"
+#define KEY_MAX_EASIZE	  "max_easize"
+#define KEY_MDS_CONN	    "mds_conn"
+#define KEY_MGSSEC	      "mgssec"
+#define KEY_NEXT_ID	     "next_id"
+#define KEY_READ_ONLY	   "read-only"
+#define KEY_REGISTER_TARGET     "register_target"
+#define KEY_SET_FS	      "set_fs"
+#define KEY_TGT_COUNT	   "tgt_count"
+/*      KEY_SET_INFO in lustre_idl.h */
+#define KEY_SPTLRPC_CONF	"sptlrpc_conf"
+#define KEY_CONNECT_FLAG	"connect_flags"
+#define KEY_SYNC_LOCK_CANCEL    "sync_lock_cancel"
+
+#define KEY_CACHE_SET		"cache_set"
+#define KEY_CACHE_LRU_SHRINK	"cache_lru_shrink"
+#define KEY_CHANGELOG_INDEX	"changelog_index"
+
+struct lu_context;
+
+/* /!\ must be coherent with include/linux/namei.h on patched kernel */
+#define IT_OPEN     (1 << 0)
+#define IT_CREAT    (1 << 1)
+#define IT_READDIR  (1 << 2)
+#define IT_GETATTR  (1 << 3)
+#define IT_LOOKUP   (1 << 4)
+#define IT_UNLINK   (1 << 5)
+#define IT_TRUNC    (1 << 6)
+#define IT_GETXATTR (1 << 7)
+#define IT_EXEC     (1 << 8)
+#define IT_PIN      (1 << 9)
+#define IT_LAYOUT   (1 << 10)
+#define IT_QUOTA_DQACQ (1 << 11)
+#define IT_QUOTA_CONN  (1 << 12)
+
+static inline int it_to_lock_mode(struct lookup_intent *it)
+{
+	/* CREAT needs to be tested before open (both could be set) */
+	if (it->it_op & IT_CREAT)
+		return LCK_CW;
+	else if (it->it_op & (IT_READDIR | IT_GETATTR | IT_OPEN | IT_LOOKUP |
+			      IT_LAYOUT))
+		return LCK_CR;
+
+	LASSERTF(0, "Invalid it_op: %d\n", it->it_op);
+	return -EINVAL;
+}
+
+struct md_op_data {
+	struct lu_fid	   op_fid1; /* operation fid1 (usualy parent) */
+	struct lu_fid	   op_fid2; /* operation fid2 (usualy child) */
+	struct lu_fid	   op_fid3; /* 2 extra fids to find conflicting */
+	struct lu_fid	   op_fid4; /* to the operation locks. */
+	mdsno_t		 op_mds;  /* what mds server open will go to */
+	struct lustre_handle    op_handle;
+	obd_time		op_mod_time;
+	const char	     *op_name;
+	int		     op_namelen;
+	__u32		   op_mode;
+	struct lmv_stripe_md   *op_mea1;
+	struct lmv_stripe_md   *op_mea2;
+	__u32		   op_suppgids[2];
+	__u32		   op_fsuid;
+	__u32		   op_fsgid;
+	cfs_cap_t	       op_cap;
+	void		   *op_data;
+
+	/* iattr fields and blocks. */
+	struct iattr	    op_attr;
+	unsigned int	    op_attr_flags;
+	__u64		   op_valid;
+	loff_t		  op_attr_blocks;
+
+	/* Size-on-MDS epoch and flags. */
+	__u64		   op_ioepoch;
+	__u32		   op_flags;
+
+	/* Capa fields */
+	struct obd_capa	*op_capa1;
+	struct obd_capa	*op_capa2;
+
+	/* Various operation flags. */
+	__u32		   op_bias;
+
+	/* Operation type */
+	__u32		   op_opc;
+
+	/* Used by readdir */
+	__u64		   op_offset;
+
+	/* Used by readdir */
+	__u32		   op_npages;
+
+	/* used to transfer info between the stacks of MD client
+	 * see enum op_cli_flags */
+	__u32			op_cli_flags;
+};
+
+enum op_cli_flags {
+	CLI_SET_MEA	= 1 << 0,
+	CLI_RM_ENTRY	= 1 << 1,
+};
+
+struct md_enqueue_info;
+/* metadata stat-ahead */
+typedef int (* md_enqueue_cb_t)(struct ptlrpc_request *req,
+				struct md_enqueue_info *minfo,
+				int rc);
+
+/* seq client type */
+enum lu_cli_type {
+	LUSTRE_SEQ_METADATA = 1,
+	LUSTRE_SEQ_DATA
+};
+
+struct md_enqueue_info {
+	struct md_op_data       mi_data;
+	struct lookup_intent    mi_it;
+	struct lustre_handle    mi_lockh;
+	struct inode	   *mi_dir;
+	md_enqueue_cb_t	 mi_cb;
+	__u64		   mi_cbdata;
+	unsigned int	    mi_generation;
+};
+
+struct obd_ops {
+	module_t *o_owner;
+	int (*o_iocontrol)(unsigned int cmd, struct obd_export *exp, int len,
+			   void *karg, void *uarg);
+	int (*o_get_info)(const struct lu_env *env, struct obd_export *,
+			  __u32 keylen, void *key, __u32 *vallen, void *val,
+			  struct lov_stripe_md *lsm);
+	int (*o_set_info_async)(const struct lu_env *, struct obd_export *,
+				__u32 keylen, void *key,
+				__u32 vallen, void *val,
+				struct ptlrpc_request_set *set);
+	int (*o_attach)(struct obd_device *dev, obd_count len, void *data);
+	int (*o_detach)(struct obd_device *dev);
+	int (*o_setup) (struct obd_device *dev, struct lustre_cfg *cfg);
+	int (*o_precleanup)(struct obd_device *dev,
+			    enum obd_cleanup_stage cleanup_stage);
+	int (*o_cleanup)(struct obd_device *dev);
+	int (*o_process_config)(struct obd_device *dev, obd_count len,
+				void *data);
+	int (*o_postrecov)(struct obd_device *dev);
+	int (*o_add_conn)(struct obd_import *imp, struct obd_uuid *uuid,
+			  int priority);
+	int (*o_del_conn)(struct obd_import *imp, struct obd_uuid *uuid);
+	/* connect to the target device with given connection
+	 * data. @ocd->ocd_connect_flags is modified to reflect flags actually
+	 * granted by the target, which are guaranteed to be a subset of flags
+	 * asked for. If @ocd == NULL, use default parameters. */
+	int (*o_connect)(const struct lu_env *env,
+			 struct obd_export **exp, struct obd_device *src,
+			 struct obd_uuid *cluuid, struct obd_connect_data *ocd,
+			 void *localdata);
+	int (*o_reconnect)(const struct lu_env *env,
+			   struct obd_export *exp, struct obd_device *src,
+			   struct obd_uuid *cluuid,
+			   struct obd_connect_data *ocd,
+			   void *localdata);
+	int (*o_disconnect)(struct obd_export *exp);
+
+	/* Initialize/finalize fids infrastructure. */
+	int (*o_fid_init)(struct obd_device *obd,
+			  struct obd_export *exp, enum lu_cli_type type);
+	int (*o_fid_fini)(struct obd_device *obd);
+
+	/* Allocate new fid according to passed @hint. */
+	int (*o_fid_alloc)(struct obd_export *exp, struct lu_fid *fid,
+			   struct md_op_data *op_data);
+
+	/*
+	 * Object with @fid is getting deleted, we may want to do something
+	 * about this.
+	 */
+	int (*o_statfs)(const struct lu_env *, struct obd_export *exp,
+			struct obd_statfs *osfs, __u64 max_age, __u32 flags);
+	int (*o_statfs_async)(struct obd_export *exp, struct obd_info *oinfo,
+			      __u64 max_age, struct ptlrpc_request_set *set);
+	int (*o_packmd)(struct obd_export *exp, struct lov_mds_md **disk_tgt,
+			struct lov_stripe_md *mem_src);
+	int (*o_unpackmd)(struct obd_export *exp,struct lov_stripe_md **mem_tgt,
+			  struct lov_mds_md *disk_src, int disk_len);
+	int (*o_preallocate)(struct lustre_handle *, obd_count *req,
+			     obd_id *ids);
+	/* FIXME: add fid capability support for create & destroy! */
+	int (*o_precreate)(struct obd_export *exp);
+	int (*o_create)(const struct lu_env *env, struct obd_export *exp,
+			struct obdo *oa, struct lov_stripe_md **ea,
+			struct obd_trans_info *oti);
+	int (*o_create_async)(struct obd_export *exp,  struct obd_info *oinfo,
+			      struct lov_stripe_md **ea,
+			      struct obd_trans_info *oti);
+	int (*o_destroy)(const struct lu_env *env, struct obd_export *exp,
+			 struct obdo *oa, struct lov_stripe_md *ea,
+			 struct obd_trans_info *oti, struct obd_export *md_exp,
+			 void *capa);
+	int (*o_setattr)(const struct lu_env *, struct obd_export *exp,
+			 struct obd_info *oinfo, struct obd_trans_info *oti);
+	int (*o_setattr_async)(struct obd_export *exp, struct obd_info *oinfo,
+			       struct obd_trans_info *oti,
+			       struct ptlrpc_request_set *rqset);
+	int (*o_getattr)(const struct lu_env *env, struct obd_export *exp,
+			 struct obd_info *oinfo);
+	int (*o_getattr_async)(struct obd_export *exp, struct obd_info *oinfo,
+			       struct ptlrpc_request_set *set);
+	int (*o_brw)(int rw, struct obd_export *exp, struct obd_info *oinfo,
+		     obd_count oa_bufs, struct brw_page *pgarr,
+		     struct obd_trans_info *oti);
+	int (*o_merge_lvb)(struct obd_export *exp, struct lov_stripe_md *lsm,
+			   struct ost_lvb *lvb, int kms_only);
+	int (*o_adjust_kms)(struct obd_export *exp, struct lov_stripe_md *lsm,
+			    obd_off size, int shrink);
+	int (*o_punch)(const struct lu_env *, struct obd_export *exp,
+		       struct obd_info *oinfo, struct obd_trans_info *oti,
+		       struct ptlrpc_request_set *rqset);
+	int (*o_sync)(const struct lu_env *env, struct obd_export *exp,
+		      struct obd_info *oinfo, obd_size start, obd_size end,
+		      struct ptlrpc_request_set *set);
+	int (*o_migrate)(struct lustre_handle *conn, struct lov_stripe_md *dst,
+			 struct lov_stripe_md *src, obd_size start,
+			 obd_size end, struct obd_trans_info *oti);
+	int (*o_copy)(struct lustre_handle *dstconn, struct lov_stripe_md *dst,
+		      struct lustre_handle *srconn, struct lov_stripe_md *src,
+		      obd_size start, obd_size end, struct obd_trans_info *);
+	int (*o_iterate)(struct lustre_handle *conn,
+			 int (*)(obd_id, obd_seq, void *),
+			 obd_id *startid, obd_seq seq, void *data);
+	int (*o_preprw)(const struct lu_env *env, int cmd,
+			struct obd_export *exp, struct obdo *oa, int objcount,
+			struct obd_ioobj *obj, struct niobuf_remote *remote,
+			int *nr_pages, struct niobuf_local *local,
+			struct obd_trans_info *oti, struct lustre_capa *capa);
+	int (*o_commitrw)(const struct lu_env *env, int cmd,
+			  struct obd_export *exp, struct obdo *oa,
+			  int objcount, struct obd_ioobj *obj,
+			  struct niobuf_remote *remote, int pages,
+			  struct niobuf_local *local,
+			  struct obd_trans_info *oti, int rc);
+	int (*o_enqueue)(struct obd_export *, struct obd_info *oinfo,
+			 struct ldlm_enqueue_info *einfo,
+			 struct ptlrpc_request_set *rqset);
+	int (*o_change_cbdata)(struct obd_export *, struct lov_stripe_md *,
+			       ldlm_iterator_t it, void *data);
+	int (*o_find_cbdata)(struct obd_export *, struct lov_stripe_md *,
+			     ldlm_iterator_t it, void *data);
+	int (*o_cancel)(struct obd_export *, struct lov_stripe_md *md,
+			__u32 mode, struct lustre_handle *);
+	int (*o_cancel_unused)(struct obd_export *, struct lov_stripe_md *,
+			       ldlm_cancel_flags_t flags, void *opaque);
+	int (*o_init_export)(struct obd_export *exp);
+	int (*o_destroy_export)(struct obd_export *exp);
+	int (*o_extent_calc)(struct obd_export *, struct lov_stripe_md *,
+			     int cmd, obd_off *);
+
+	/* llog related obd_methods */
+	int (*o_llog_init)(struct obd_device *obd, struct obd_llog_group *grp,
+			   struct obd_device *disk_obd, int *idx);
+	int (*o_llog_finish)(struct obd_device *obd, int count);
+	int (*o_llog_connect)(struct obd_export *, struct llogd_conn_body *);
+
+	/* metadata-only methods */
+	int (*o_pin)(struct obd_export *, const struct lu_fid *fid,
+		     struct obd_capa *, struct obd_client_handle *, int flag);
+	int (*o_unpin)(struct obd_export *, struct obd_client_handle *, int);
+
+	int (*o_import_event)(struct obd_device *, struct obd_import *,
+			      enum obd_import_event);
+
+	int (*o_notify)(struct obd_device *obd, struct obd_device *watched,
+			enum obd_notify_event ev, void *data);
+
+	int (*o_health_check)(const struct lu_env *env, struct obd_device *);
+	struct obd_uuid *(*o_get_uuid) (struct obd_export *exp);
+
+	/* quota methods */
+	int (*o_quotacheck)(struct obd_device *, struct obd_export *,
+			    struct obd_quotactl *);
+	int (*o_quotactl)(struct obd_device *, struct obd_export *,
+			  struct obd_quotactl *);
+
+	int (*o_ping)(const struct lu_env *, struct obd_export *exp);
+
+	/* pools methods */
+	int (*o_pool_new)(struct obd_device *obd, char *poolname);
+	int (*o_pool_del)(struct obd_device *obd, char *poolname);
+	int (*o_pool_add)(struct obd_device *obd, char *poolname,
+			  char *ostname);
+	int (*o_pool_rem)(struct obd_device *obd, char *poolname,
+			  char *ostname);
+	void (*o_getref)(struct obd_device *obd);
+	void (*o_putref)(struct obd_device *obd);
+	/*
+	 * NOTE: If adding ops, add another LPROCFS_OBD_OP_INIT() line
+	 * to lprocfs_alloc_obd_stats() in obdclass/lprocfs_status.c.
+	 * Also, add a wrapper function in include/linux/obd_class.h. */
+};
+
+enum {
+	LUSTRE_OPC_MKDIR    = (1 << 0),
+	LUSTRE_OPC_SYMLINK  = (1 << 1),
+	LUSTRE_OPC_MKNOD    = (1 << 2),
+	LUSTRE_OPC_CREATE   = (1 << 3),
+	LUSTRE_OPC_ANY      = (1 << 4)
+};
+
+/* lmv structures */
+#define MEA_MAGIC_LAST_CHAR      0xb2221ca1
+#define MEA_MAGIC_ALL_CHARS      0xb222a11c
+#define MEA_MAGIC_HASH_SEGMENT   0xb222a11b
+
+#define MAX_HASH_SIZE_32	 0x7fffffffUL
+#define MAX_HASH_SIZE	    0x7fffffffffffffffULL
+#define MAX_HASH_HIGHEST_BIT     0x1000000000000000ULL
+
+struct lustre_md {
+	struct mdt_body	 *body;
+	struct lov_stripe_md    *lsm;
+	struct lmv_stripe_md    *mea;
+#ifdef CONFIG_FS_POSIX_ACL
+	struct posix_acl	*posix_acl;
+#endif
+	struct mdt_remote_perm  *remote_perm;
+	struct obd_capa	 *mds_capa;
+	struct obd_capa	 *oss_capa;
+};
+
+struct md_open_data {
+	struct obd_client_handle *mod_och;
+	struct ptlrpc_request    *mod_open_req;
+	struct ptlrpc_request    *mod_close_req;
+	atomic_t	      mod_refcount;
+};
+
+struct lookup_intent;
+
+struct md_ops {
+	int (*m_getstatus)(struct obd_export *, struct lu_fid *,
+			   struct obd_capa **);
+	int (*m_null_inode)(struct obd_export *, const struct lu_fid *);
+	int (*m_find_cbdata)(struct obd_export *, const struct lu_fid *,
+			     ldlm_iterator_t, void *);
+	int (*m_close)(struct obd_export *, struct md_op_data *,
+		       struct md_open_data *, struct ptlrpc_request **);
+	int (*m_create)(struct obd_export *, struct md_op_data *,
+			const void *, int, int, __u32, __u32, cfs_cap_t,
+			__u64, struct ptlrpc_request **);
+	int (*m_done_writing)(struct obd_export *, struct md_op_data  *,
+			      struct md_open_data *);
+	int (*m_enqueue)(struct obd_export *, struct ldlm_enqueue_info *,
+			 struct lookup_intent *, struct md_op_data *,
+			 struct lustre_handle *, void *, int,
+			 struct ptlrpc_request **, __u64);
+	int (*m_getattr)(struct obd_export *, struct md_op_data *,
+			 struct ptlrpc_request **);
+	int (*m_getattr_name)(struct obd_export *, struct md_op_data *,
+			      struct ptlrpc_request **);
+	int (*m_intent_lock)(struct obd_export *, struct md_op_data *,
+			     void *, int, struct lookup_intent *, int,
+			     struct ptlrpc_request **,
+			     ldlm_blocking_callback, __u64);
+	int (*m_link)(struct obd_export *, struct md_op_data *,
+		      struct ptlrpc_request **);
+	int (*m_rename)(struct obd_export *, struct md_op_data *,
+			const char *, int, const char *, int,
+			struct ptlrpc_request **);
+	int (*m_is_subdir)(struct obd_export *, const struct lu_fid *,
+			   const struct lu_fid *,
+			   struct ptlrpc_request **);
+	int (*m_setattr)(struct obd_export *, struct md_op_data *, void *,
+			 int , void *, int, struct ptlrpc_request **,
+			 struct md_open_data **mod);
+	int (*m_sync)(struct obd_export *, const struct lu_fid *,
+		      struct obd_capa *, struct ptlrpc_request **);
+	int (*m_readpage)(struct obd_export *, struct md_op_data *,
+			  struct page **, struct ptlrpc_request **);
+
+	int (*m_unlink)(struct obd_export *, struct md_op_data *,
+			struct ptlrpc_request **);
+
+	int (*m_setxattr)(struct obd_export *, const struct lu_fid *,
+			  struct obd_capa *, obd_valid, const char *,
+			  const char *, int, int, int, __u32,
+			  struct ptlrpc_request **);
+
+	int (*m_getxattr)(struct obd_export *, const struct lu_fid *,
+			  struct obd_capa *, obd_valid, const char *,
+			  const char *, int, int, int,
+			  struct ptlrpc_request **);
+
+	int (*m_init_ea_size)(struct obd_export *, int, int, int);
+
+	int (*m_get_lustre_md)(struct obd_export *, struct ptlrpc_request *,
+			       struct obd_export *, struct obd_export *,
+			       struct lustre_md *);
+
+	int (*m_free_lustre_md)(struct obd_export *, struct lustre_md *);
+
+	int (*m_set_open_replay_data)(struct obd_export *,
+				      struct obd_client_handle *,
+				      struct ptlrpc_request *);
+	int (*m_clear_open_replay_data)(struct obd_export *,
+					struct obd_client_handle *);
+	int (*m_set_lock_data)(struct obd_export *, __u64 *, void *, __u64 *);
+
+	ldlm_mode_t (*m_lock_match)(struct obd_export *, __u64,
+				    const struct lu_fid *, ldlm_type_t,
+				    ldlm_policy_data_t *, ldlm_mode_t,
+				    struct lustre_handle *);
+
+	int (*m_cancel_unused)(struct obd_export *, const struct lu_fid *,
+			       ldlm_policy_data_t *, ldlm_mode_t,
+			       ldlm_cancel_flags_t flags, void *opaque);
+	int (*m_renew_capa)(struct obd_export *, struct obd_capa *oc,
+			    renew_capa_cb_t cb);
+	int (*m_unpack_capa)(struct obd_export *, struct ptlrpc_request *,
+			     const struct req_msg_field *, struct obd_capa **);
+
+	int (*m_get_remote_perm)(struct obd_export *, const struct lu_fid *,
+				 struct obd_capa *, __u32,
+				 struct ptlrpc_request **);
+
+	int (*m_intent_getattr_async)(struct obd_export *,
+				      struct md_enqueue_info *,
+				      struct ldlm_enqueue_info *);
+
+	int (*m_revalidate_lock)(struct obd_export *, struct lookup_intent *,
+				 struct lu_fid *, __u64 *bits);
+
+	/*
+	 * NOTE: If adding ops, add another LPROCFS_MD_OP_INIT() line to
+	 * lprocfs_alloc_md_stats() in obdclass/lprocfs_status.c. Also, add a
+	 * wrapper function in include/linux/obd_class.h.
+	 */
+};
+
+struct lsm_operations {
+	void (*lsm_free)(struct lov_stripe_md *);
+	int (*lsm_destroy)(struct lov_stripe_md *, struct obdo *oa,
+			   struct obd_export *md_exp);
+	void (*lsm_stripe_by_index)(struct lov_stripe_md *, int *, obd_off *,
+				    obd_off *);
+	void (*lsm_stripe_by_offset)(struct lov_stripe_md *, int *, obd_off *,
+				     obd_off *);
+	int (*lsm_lmm_verify) (struct lov_mds_md *lmm, int lmm_bytes,
+			       __u16 *stripe_count);
+	int (*lsm_unpackmd) (struct lov_obd *lov, struct lov_stripe_md *lsm,
+			     struct lov_mds_md *lmm);
+};
+
+extern const struct lsm_operations lsm_v1_ops;
+extern const struct lsm_operations lsm_v3_ops;
+static inline const struct lsm_operations *lsm_op_find(int magic)
+{
+	switch(magic) {
+	case LOV_MAGIC_V1:
+	       return &lsm_v1_ops;
+	case LOV_MAGIC_V3:
+	       return &lsm_v3_ops;
+	default:
+	       CERROR("Cannot recognize lsm_magic %08x\n", magic);
+	       return NULL;
+	}
+}
+
+/* Requests for obd_extent_calc() */
+#define OBD_CALC_STRIPE_START   1
+#define OBD_CALC_STRIPE_END     2
+
+static inline struct lustre_capa *oinfo_capa(struct obd_info *oinfo)
+{
+	return oinfo->oi_capa;
+}
+
+static inline struct md_open_data *obd_mod_alloc(void)
+{
+	struct md_open_data *mod;
+	OBD_ALLOC_PTR(mod);
+	if (mod == NULL)
+		return NULL;
+	atomic_set(&mod->mod_refcount, 1);
+	return mod;
+}
+
+#define obd_mod_get(mod) atomic_inc(&(mod)->mod_refcount)
+#define obd_mod_put(mod)					\
+({							      \
+	if (atomic_dec_and_test(&(mod)->mod_refcount)) {	  \
+		if ((mod)->mod_open_req)			  \
+			ptlrpc_req_finished((mod)->mod_open_req);   \
+		OBD_FREE_PTR(mod);			      \
+	}						       \
+})
+
+void obdo_from_inode(struct obdo *dst, struct inode *src, obd_flag valid);
+void obdo_set_parent_fid(struct obdo *dst, const struct lu_fid *parent);
+
+/* return 1 if client should be resend request */
+static inline int client_should_resend(int resend, struct client_obd *cli)
+{
+	return atomic_read(&cli->cl_resends) ?
+	       atomic_read(&cli->cl_resends) > resend : 1;
+}
+
+/**
+ * Return device name for this device
+ *
+ * XXX: lu_device is declared before obd_device, while a pointer pointing
+ * back to obd_device in lu_device, so this helper function defines here
+ * instead of in lu_object.h
+ */
+static inline const char *lu_dev_name(const struct lu_device *lu_dev)
+{
+	return lu_dev->ld_obd->obd_name;
+}
+
+static inline bool filename_is_volatile(const char *name, int namelen, int *idx)
+{
+	const char	*start;
+	char		*end;
+
+	if (strncmp(name, LUSTRE_VOLATILE_HDR, LUSTRE_VOLATILE_HDR_LEN) != 0)
+		return false;
+
+	/* caller does not care of idx */
+	if (idx == NULL)
+		return true;
+
+	/* volatile file, the MDT can be set from name */
+	/* name format is LUSTRE_VOLATILE_HDR:[idx]: */
+	/* if no MDT is specified, use std way */
+	if (namelen < LUSTRE_VOLATILE_HDR_LEN + 2)
+		goto bad_format;
+	/* test for no MDT idx case */
+	if ((*(name + LUSTRE_VOLATILE_HDR_LEN) == ':') &&
+	    (*(name + LUSTRE_VOLATILE_HDR_LEN + 1) == ':')) {
+		*idx = -1;
+		return true;
+	}
+	/* we have an idx, read it */
+	start = name + LUSTRE_VOLATILE_HDR_LEN + 1;
+	*idx = strtoul(start, &end, 0);
+	/* error cases:
+	 * no digit, no trailing :, negative value
+	 */
+	if (((*idx == 0) && (end == start)) ||
+	    (*end != ':') || (*idx < 0))
+		goto bad_format;
+
+	return true;
+bad_format:
+	/* bad format of mdt idx, we cannot return an error
+	 * to caller so we use hash algo */
+	CERROR("Bad volatile file name format: %s\n",
+	       name + LUSTRE_VOLATILE_HDR_LEN);
+	return false;
+}
+
+static inline int cli_brw_size(struct obd_device *obd)
+{
+	LASSERT(obd != NULL);
+	return obd->u.cli.cl_max_pages_per_rpc << PAGE_CACHE_SHIFT;
+}
+
+#endif /* __OBD_H */
diff --git a/drivers/staging/lustre/lustre/include/obd_cache.h b/drivers/staging/lustre/lustre/include/obd_cache.h
new file mode 100644
index 000000000000..c8249fbb0d72
--- /dev/null
+++ b/drivers/staging/lustre/lustre/include/obd_cache.h
@@ -0,0 +1,39 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ */
+
+#ifndef _OBD_CACHE_H__
+#define _OBD_CACHE_H__
+
+
+#endif
diff --git a/drivers/staging/lustre/lustre/include/obd_cksum.h b/drivers/staging/lustre/lustre/include/obd_cksum.h
new file mode 100644
index 000000000000..5f740f1743ca
--- /dev/null
+++ b/drivers/staging/lustre/lustre/include/obd_cksum.h
@@ -0,0 +1,176 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ */
+
+#ifndef __OBD_CKSUM
+#define __OBD_CKSUM
+#include <linux/libcfs/libcfs.h>
+#include <lustre/lustre_idl.h>
+
+static inline unsigned char cksum_obd2cfs(cksum_type_t cksum_type)
+{
+	switch (cksum_type) {
+	case OBD_CKSUM_CRC32:
+		return CFS_HASH_ALG_CRC32;
+	case OBD_CKSUM_ADLER:
+		return CFS_HASH_ALG_ADLER32;
+	case OBD_CKSUM_CRC32C:
+		return CFS_HASH_ALG_CRC32C;
+	default:
+		CERROR("Unknown checksum type (%x)!!!\n", cksum_type);
+		LBUG();
+	}
+	return 0;
+}
+
+/* The OBD_FL_CKSUM_* flags is packed into 5 bits of o_flags, since there can
+ * only be a single checksum type per RPC.
+ *
+ * The OBD_CHECKSUM_* type bits passed in ocd_cksum_types are a 32-bit bitmask
+ * since they need to represent the full range of checksum algorithms that
+ * both the client and server can understand.
+ *
+ * In case of an unsupported types/flags we fall back to ADLER
+ * because that is supported by all clients since 1.8
+ *
+ * In case multiple algorithms are supported the best one is used. */
+static inline obd_flag cksum_type_pack(cksum_type_t cksum_type)
+{
+	unsigned int    performance = 0, tmp;
+	obd_flag	flag = OBD_FL_CKSUM_ADLER;
+
+	if (cksum_type & OBD_CKSUM_CRC32) {
+		tmp = cfs_crypto_hash_speed(cksum_obd2cfs(OBD_CKSUM_CRC32));
+		if (tmp > performance) {
+			performance = tmp;
+			flag = OBD_FL_CKSUM_CRC32;
+		}
+	}
+	if (cksum_type & OBD_CKSUM_CRC32C) {
+		tmp = cfs_crypto_hash_speed(cksum_obd2cfs(OBD_CKSUM_CRC32C));
+		if (tmp > performance) {
+			performance = tmp;
+			flag = OBD_FL_CKSUM_CRC32C;
+		}
+	}
+	if (cksum_type & OBD_CKSUM_ADLER) {
+		tmp = cfs_crypto_hash_speed(cksum_obd2cfs(OBD_CKSUM_ADLER));
+		if (tmp > performance) {
+			performance = tmp;
+			flag = OBD_FL_CKSUM_ADLER;
+		}
+	}
+	if (unlikely(cksum_type && !(cksum_type & (OBD_CKSUM_CRC32C |
+						   OBD_CKSUM_CRC32 |
+						   OBD_CKSUM_ADLER))))
+		CWARN("unknown cksum type %x\n", cksum_type);
+
+	return flag;
+}
+
+static inline cksum_type_t cksum_type_unpack(obd_flag o_flags)
+{
+	switch (o_flags & OBD_FL_CKSUM_ALL) {
+	case OBD_FL_CKSUM_CRC32C:
+		return OBD_CKSUM_CRC32C;
+	case OBD_FL_CKSUM_CRC32:
+		return OBD_CKSUM_CRC32;
+	default:
+		break;
+	}
+
+	return OBD_CKSUM_ADLER;
+}
+
+/* Return a bitmask of the checksum types supported on this system.
+ * 1.8 supported ADLER it is base and not depend on hw
+ * Client uses all available local algos
+ */
+static inline cksum_type_t cksum_types_supported_client(void)
+{
+	cksum_type_t ret = OBD_CKSUM_ADLER;
+
+	CDEBUG(D_INFO, "Crypto hash speed: crc %d, crc32c %d, adler %d\n",
+	       cfs_crypto_hash_speed(cksum_obd2cfs(OBD_CKSUM_CRC32)),
+	       cfs_crypto_hash_speed(cksum_obd2cfs(OBD_CKSUM_CRC32C)),
+	       cfs_crypto_hash_speed(cksum_obd2cfs(OBD_CKSUM_ADLER)));
+
+	if (cfs_crypto_hash_speed(cksum_obd2cfs(OBD_CKSUM_CRC32C)) > 0)
+		ret |= OBD_CKSUM_CRC32C;
+	if (cfs_crypto_hash_speed(cksum_obd2cfs(OBD_CKSUM_CRC32)) > 0)
+		ret |= OBD_CKSUM_CRC32;
+
+	return ret;
+}
+
+/* Server uses algos that perform at 50% or better of the Adler */
+static inline cksum_type_t cksum_types_supported_server(void)
+{
+	int	     base_speed;
+	cksum_type_t    ret = OBD_CKSUM_ADLER;
+
+	CDEBUG(D_INFO, "Crypto hash speed: crc %d, crc32c %d, adler %d\n",
+	       cfs_crypto_hash_speed(cksum_obd2cfs(OBD_CKSUM_CRC32)),
+	       cfs_crypto_hash_speed(cksum_obd2cfs(OBD_CKSUM_CRC32C)),
+	       cfs_crypto_hash_speed(cksum_obd2cfs(OBD_CKSUM_ADLER)));
+
+	base_speed = cfs_crypto_hash_speed(cksum_obd2cfs(OBD_CKSUM_ADLER)) / 2;
+
+	if (cfs_crypto_hash_speed(cksum_obd2cfs(OBD_CKSUM_CRC32C)) >=
+	    base_speed)
+		ret |= OBD_CKSUM_CRC32C;
+	if (cfs_crypto_hash_speed(cksum_obd2cfs(OBD_CKSUM_CRC32)) >=
+	    base_speed)
+		ret |= OBD_CKSUM_CRC32;
+
+	return ret;
+}
+
+
+/* Select the best checksum algorithm among those supplied in the cksum_types
+ * input.
+ *
+ * Currently, calling cksum_type_pack() with a mask will return the fastest
+ * checksum type due to its benchmarking at libcfs module load.
+ * Caution is advised, however, since what is fastest on a single client may
+ * not be the fastest or most efficient algorithm on the server.  */
+static inline cksum_type_t cksum_type_select(cksum_type_t cksum_types)
+{
+	return cksum_type_unpack(cksum_type_pack(cksum_types));
+}
+
+/* Checksum algorithm names. Must be defined in the same order as the
+ * OBD_CKSUM_* flags. */
+#define DECLARE_CKSUM_NAME char *cksum_name[] = {"crc32", "adler", "crc32c"}
+
+#endif /* __OBD_H */
diff --git a/drivers/staging/lustre/lustre/include/obd_class.h b/drivers/staging/lustre/lustre/include/obd_class.h
new file mode 100644
index 000000000000..de5c5853647f
--- /dev/null
+++ b/drivers/staging/lustre/lustre/include/obd_class.h
@@ -0,0 +1,2281 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ */
+#ifndef __CLASS_OBD_H
+#define __CLASS_OBD_H
+
+
+#include <obd_support.h>
+#include <lustre_import.h>
+#include <lustre_net.h>
+#include <obd.h>
+#include <lustre_lib.h>
+#include <lustre/lustre_idl.h>
+#include <lprocfs_status.h>
+
+#include <linux/obd_class.h>
+
+#define OBD_STATFS_NODELAY      0x0001  /* requests should be send without delay
+					 * and resends for avoid deadlocks */
+#define OBD_STATFS_FROM_CACHE   0x0002  /* the statfs callback should not update
+					 * obd_osfs_age */
+#define OBD_STATFS_PTLRPCD      0x0004  /* requests will be sent via ptlrpcd
+					 * instead of a specific set. This
+					 * means that we cannot rely on the set
+					 * interpret routine to be called.
+					 * lov_statfs_fini() must thus be called
+					 * by the request interpret routine */
+#define OBD_STATFS_FOR_MDT0	0x0008	/* The statfs is only for retrieving
+					 * information from MDT0. */
+#define OBD_FL_PUNCH    0x00000001      /* To indicate it is punch operation */
+
+/* OBD Device Declarations */
+extern struct obd_device *obd_devs[MAX_OBD_DEVICES];
+extern rwlock_t obd_dev_lock;
+
+/* OBD Operations Declarations */
+extern struct obd_device *class_conn2obd(struct lustre_handle *);
+extern struct obd_device *class_exp2obd(struct obd_export *);
+extern int class_handle_ioctl(unsigned int cmd, unsigned long arg);
+extern int lustre_get_jobid(char *jobid);
+
+struct lu_device_type;
+
+/* genops.c */
+struct obd_export *class_conn2export(struct lustre_handle *);
+int class_register_type(struct obd_ops *, struct md_ops *,
+			struct lprocfs_vars *, const char *nm,
+			struct lu_device_type *ldt);
+int class_unregister_type(const char *nm);
+
+struct obd_device *class_newdev(const char *type_name, const char *name);
+void class_release_dev(struct obd_device *obd);
+
+int class_name2dev(const char *name);
+struct obd_device *class_name2obd(const char *name);
+int class_uuid2dev(struct obd_uuid *uuid);
+struct obd_device *class_uuid2obd(struct obd_uuid *uuid);
+void class_obd_list(void);
+struct obd_device * class_find_client_obd(struct obd_uuid *tgt_uuid,
+					  const char * typ_name,
+					  struct obd_uuid *grp_uuid);
+struct obd_device * class_devices_in_group(struct obd_uuid *grp_uuid,
+					   int *next);
+struct obd_device * class_num2obd(int num);
+int get_devices_count(void);
+
+int class_notify_sptlrpc_conf(const char *fsname, int namelen);
+
+char *obd_export_nid2str(struct obd_export *exp);
+
+int obd_export_evict_by_nid(struct obd_device *obd, const char *nid);
+int obd_export_evict_by_uuid(struct obd_device *obd, const char *uuid);
+int obd_connect_flags2str(char *page, int count, __u64 flags, char *sep);
+
+int obd_zombie_impexp_init(void);
+void obd_zombie_impexp_stop(void);
+void obd_zombie_impexp_cull(void);
+void obd_zombie_barrier(void);
+void obd_exports_barrier(struct obd_device *obd);
+int kuc_len(int payload_len);
+struct kuc_hdr * kuc_ptr(void *p);
+int kuc_ispayload(void *p);
+void *kuc_alloc(int payload_len, int transport, int type);
+void kuc_free(void *p, int payload_len);
+
+struct llog_handle;
+struct llog_rec_hdr;
+typedef int (*llog_cb_t)(const struct lu_env *, struct llog_handle *,
+			 struct llog_rec_hdr *, void *);
+/* obd_config.c */
+struct lustre_cfg *lustre_cfg_rename(struct lustre_cfg *cfg,
+				     const char *new_name);
+int class_process_config(struct lustre_cfg *lcfg);
+int class_process_proc_param(char *prefix, struct lprocfs_vars *lvars,
+			     struct lustre_cfg *lcfg, void *data);
+int class_attach(struct lustre_cfg *lcfg);
+int class_setup(struct obd_device *obd, struct lustre_cfg *lcfg);
+int class_cleanup(struct obd_device *obd, struct lustre_cfg *lcfg);
+int class_detach(struct obd_device *obd, struct lustre_cfg *lcfg);
+struct obd_device *class_incref(struct obd_device *obd,
+				const char *scope, const void *source);
+void class_decref(struct obd_device *obd,
+		  const char *scope, const void *source);
+void dump_exports(struct obd_device *obd, int locks);
+int class_config_llog_handler(const struct lu_env *env,
+			      struct llog_handle *handle,
+			      struct llog_rec_hdr *rec, void *data);
+int class_add_conn(struct obd_device *obd, struct lustre_cfg *lcfg);
+int class_add_uuid(const char *uuid, __u64 nid);
+
+/*obdecho*/
+#ifdef LPROCFS
+extern void lprocfs_echo_init_vars(struct lprocfs_static_vars *lvars);
+#else
+static inline void lprocfs_echo_init_vars(struct lprocfs_static_vars *lvars)
+{
+	memset(lvars, 0, sizeof(*lvars));
+}
+#endif
+
+#define CFG_F_START     0x01   /* Set when we start updating from a log */
+#define CFG_F_MARKER    0x02   /* We are within a maker */
+#define CFG_F_SKIP      0x04   /* We should ignore this cfg command */
+#define CFG_F_COMPAT146 0x08   /* Allow old-style logs */
+#define CFG_F_EXCLUDE   0x10   /* OST exclusion list */
+
+/* Passed as data param to class_config_parse_llog */
+struct config_llog_instance {
+	char	       *cfg_obdname;
+	void	       *cfg_instance;
+	struct super_block *cfg_sb;
+	struct obd_uuid     cfg_uuid;
+	llog_cb_t	    cfg_callback;
+	int		 cfg_last_idx; /* for partial llog processing */
+	int		 cfg_flags;
+};
+int class_config_parse_llog(const struct lu_env *env, struct llog_ctxt *ctxt,
+			    char *name, struct config_llog_instance *cfg);
+int class_config_dump_llog(const struct lu_env *env, struct llog_ctxt *ctxt,
+			   char *name, struct config_llog_instance *cfg);
+
+enum {
+	CONFIG_T_CONFIG  = 0,
+	CONFIG_T_SPTLRPC = 1,
+	CONFIG_T_RECOVER = 2,
+	CONFIG_T_MAX     = 3
+};
+
+/* list of active configuration logs  */
+struct config_llog_data {
+	struct ldlm_res_id	  cld_resid;
+	struct config_llog_instance cld_cfg;
+	struct list_head		  cld_list_chain;
+	atomic_t		cld_refcount;
+	struct config_llog_data    *cld_sptlrpc;/* depended sptlrpc log */
+	struct config_llog_data    *cld_recover;    /* imperative recover log */
+	struct obd_export	  *cld_mgcexp;
+	struct mutex		    cld_lock;
+	int			 cld_type;
+	unsigned int		cld_stopping:1, /* we were told to stop
+						     * watching */
+				    cld_lostlock:1; /* lock not requeued */
+	char			cld_logname[0];
+};
+
+struct lustre_profile {
+	struct list_head       lp_list;
+	char	    *lp_profile;
+	char	    *lp_dt;
+	char	    *lp_md;
+};
+
+struct lustre_profile *class_get_profile(const char * prof);
+void class_del_profile(const char *prof);
+void class_del_profiles(void);
+
+#if LUSTRE_TRACKS_LOCK_EXP_REFS
+
+void __class_export_add_lock_ref(struct obd_export *, struct ldlm_lock *);
+void __class_export_del_lock_ref(struct obd_export *, struct ldlm_lock *);
+extern void (*class_export_dump_hook)(struct obd_export *);
+
+#else
+
+#define __class_export_add_lock_ref(exp, lock)	     do {} while(0)
+#define __class_export_del_lock_ref(exp, lock)	     do {} while(0)
+
+#endif
+
+#define class_export_rpc_inc(exp)				       \
+({								      \
+	atomic_inc(&(exp)->exp_rpc_count);			  \
+	CDEBUG(D_INFO, "RPC GETting export %p : new rpc_count %d\n",    \
+	       (exp), atomic_read(&(exp)->exp_rpc_count));	  \
+})
+
+#define class_export_rpc_dec(exp)				       \
+({								      \
+	LASSERT_ATOMIC_POS(&exp->exp_rpc_count);			\
+	atomic_dec(&(exp)->exp_rpc_count);			  \
+	CDEBUG(D_INFO, "RPC PUTting export %p : new rpc_count %d\n",    \
+	       (exp), atomic_read(&(exp)->exp_rpc_count));	  \
+})
+
+#define class_export_lock_get(exp, lock)				\
+({								      \
+	atomic_inc(&(exp)->exp_locks_count);			\
+	__class_export_add_lock_ref(exp, lock);			 \
+	CDEBUG(D_INFO, "lock GETting export %p : new locks_count %d\n", \
+	       (exp), atomic_read(&(exp)->exp_locks_count));	\
+	class_export_get(exp);					  \
+})
+
+#define class_export_lock_put(exp, lock)				\
+({								      \
+	LASSERT_ATOMIC_POS(&exp->exp_locks_count);		      \
+	atomic_dec(&(exp)->exp_locks_count);			\
+	__class_export_del_lock_ref(exp, lock);			 \
+	CDEBUG(D_INFO, "lock PUTting export %p : new locks_count %d\n", \
+	       (exp), atomic_read(&(exp)->exp_locks_count));	\
+	class_export_put(exp);					  \
+})
+
+#define class_export_cb_get(exp)					\
+({								      \
+	atomic_inc(&(exp)->exp_cb_count);			   \
+	CDEBUG(D_INFO, "callback GETting export %p : new cb_count %d\n",\
+	       (exp), atomic_read(&(exp)->exp_cb_count));	   \
+	class_export_get(exp);					  \
+})
+
+#define class_export_cb_put(exp)					\
+({								      \
+	LASSERT_ATOMIC_POS(&exp->exp_cb_count);			 \
+	atomic_dec(&(exp)->exp_cb_count);			   \
+	CDEBUG(D_INFO, "callback PUTting export %p : new cb_count %d\n",\
+	       (exp), atomic_read(&(exp)->exp_cb_count));	   \
+	class_export_put(exp);					  \
+})
+
+/* genops.c */
+struct obd_export *class_export_get(struct obd_export *exp);
+void class_export_put(struct obd_export *exp);
+struct obd_export *class_new_export(struct obd_device *obddev,
+				    struct obd_uuid *cluuid);
+void class_unlink_export(struct obd_export *exp);
+
+struct obd_import *class_import_get(struct obd_import *);
+void class_import_put(struct obd_import *);
+struct obd_import *class_new_import(struct obd_device *obd);
+void class_destroy_import(struct obd_import *exp);
+
+struct obd_type *class_search_type(const char *name);
+struct obd_type *class_get_type(const char *name);
+void class_put_type(struct obd_type *type);
+int class_connect(struct lustre_handle *conn, struct obd_device *obd,
+		  struct obd_uuid *cluuid);
+int class_disconnect(struct obd_export *exp);
+void class_fail_export(struct obd_export *exp);
+int class_connected_export(struct obd_export *exp);
+void class_disconnect_exports(struct obd_device *obddev);
+int class_manual_cleanup(struct obd_device *obd);
+void class_disconnect_stale_exports(struct obd_device *,
+				    int (*test_export)(struct obd_export *));
+static inline enum obd_option exp_flags_from_obd(struct obd_device *obd)
+{
+	return ((obd->obd_fail ? OBD_OPT_FAILOVER : 0) |
+		(obd->obd_force ? OBD_OPT_FORCE : 0) |
+		(obd->obd_abort_recovery ? OBD_OPT_ABORT_RECOV : 0) |
+		0);
+}
+
+
+void obdo_cpy_md(struct obdo *dst, struct obdo *src, obd_flag valid);
+void obdo_to_ioobj(struct obdo *oa, struct obd_ioobj *ioobj);
+void obdo_from_iattr(struct obdo *oa, struct iattr *attr,
+		     unsigned int ia_valid);
+void iattr_from_obdo(struct iattr *attr, struct obdo *oa, obd_flag valid);
+void md_from_obdo(struct md_op_data *op_data, struct obdo *oa, obd_flag valid);
+void obdo_from_md(struct obdo *oa, struct md_op_data *op_data,
+		  unsigned int valid);
+
+void obdo_cpu_to_le(struct obdo *dobdo, struct obdo *sobdo);
+void obdo_le_to_cpu(struct obdo *dobdo, struct obdo *sobdo);
+
+#define OBT(dev)	(dev)->obd_type
+#define OBP(dev, op)    (dev)->obd_type->typ_dt_ops->o_ ## op
+#define MDP(dev, op)    (dev)->obd_type->typ_md_ops->m_ ## op
+#define CTXTP(ctxt, op) (ctxt)->loc_logops->lop_##op
+
+/* Ensure obd_setup: used for cleanup which must be called
+   while obd is stopping */
+#define OBD_CHECK_DEV(obd)				      \
+do {							    \
+	if (!(obd)) {					   \
+		CERROR("NULL device\n");			\
+		RETURN(-ENODEV);				\
+	}						       \
+} while (0)
+
+/* ensure obd_setup and !obd_stopping */
+#define OBD_CHECK_DEV_ACTIVE(obd)			       \
+do {							    \
+	OBD_CHECK_DEV(obd);				     \
+	if (!(obd)->obd_set_up || (obd)->obd_stopping) {	\
+		CERROR("Device %d not setup\n",		 \
+		       (obd)->obd_minor);		       \
+		RETURN(-ENODEV);				\
+	}						       \
+} while (0)
+
+
+#ifdef LPROCFS
+#define OBD_COUNTER_OFFSET(op)				  \
+	((offsetof(struct obd_ops, o_ ## op) -		  \
+	  offsetof(struct obd_ops, o_iocontrol))		\
+	 / sizeof(((struct obd_ops *)(0))->o_iocontrol))
+
+#define OBD_COUNTER_INCREMENT(obdx, op)			   \
+	if ((obdx)->obd_stats != NULL) {			  \
+		unsigned int coffset;			     \
+		coffset = (unsigned int)((obdx)->obd_cntr_base) + \
+			OBD_COUNTER_OFFSET(op);		   \
+		LASSERT(coffset < (obdx)->obd_stats->ls_num);     \
+		lprocfs_counter_incr((obdx)->obd_stats, coffset); \
+	}
+
+#define EXP_COUNTER_INCREMENT(export, op)				    \
+	if ((export)->exp_obd->obd_stats != NULL) {			  \
+		unsigned int coffset;					\
+		coffset = (unsigned int)((export)->exp_obd->obd_cntr_base) + \
+			OBD_COUNTER_OFFSET(op);			      \
+		LASSERT(coffset < (export)->exp_obd->obd_stats->ls_num);     \
+		lprocfs_counter_incr((export)->exp_obd->obd_stats, coffset); \
+		if ((export)->exp_nid_stats != NULL &&		       \
+		    (export)->exp_nid_stats->nid_stats != NULL)	      \
+			lprocfs_counter_incr(				\
+				(export)->exp_nid_stats->nid_stats, coffset);\
+	}
+
+#define MD_COUNTER_OFFSET(op)				   \
+	((offsetof(struct md_ops, m_ ## op) -		   \
+	  offsetof(struct md_ops, m_getstatus))		 \
+	 / sizeof(((struct md_ops *)(0))->m_getstatus))
+
+#define MD_COUNTER_INCREMENT(obdx, op)			   \
+	if ((obd)->md_stats != NULL) {			   \
+		unsigned int coffset;			    \
+		coffset = (unsigned int)((obdx)->md_cntr_base) + \
+			MD_COUNTER_OFFSET(op);		   \
+		LASSERT(coffset < (obdx)->md_stats->ls_num);     \
+		lprocfs_counter_incr((obdx)->md_stats, coffset); \
+	}
+
+#define EXP_MD_COUNTER_INCREMENT(export, op)				 \
+	if ((export)->exp_obd->obd_stats != NULL) {			  \
+		unsigned int coffset;					\
+		coffset = (unsigned int)((export)->exp_obd->md_cntr_base) +  \
+			MD_COUNTER_OFFSET(op);			       \
+		LASSERT(coffset < (export)->exp_obd->md_stats->ls_num);      \
+		lprocfs_counter_incr((export)->exp_obd->md_stats, coffset);  \
+		if ((export)->exp_md_stats != NULL)			  \
+			lprocfs_counter_incr(				\
+				(export)->exp_md_stats, coffset);	    \
+	}
+
+#else
+#define OBD_COUNTER_OFFSET(op)
+#define OBD_COUNTER_INCREMENT(obd, op)
+#define EXP_COUNTER_INCREMENT(exp, op)
+#define MD_COUNTER_INCREMENT(obd, op)
+#define EXP_MD_COUNTER_INCREMENT(exp, op)
+#endif
+
+static inline int lprocfs_nid_ldlm_stats_init(struct nid_stat* tmp)
+{
+	/* Always add in ldlm_stats */
+	tmp->nid_ldlm_stats = lprocfs_alloc_stats(LDLM_LAST_OPC - LDLM_FIRST_OPC
+						  ,LPROCFS_STATS_FLAG_NOPERCPU);
+	if (tmp->nid_ldlm_stats == NULL)
+		return -ENOMEM;
+
+	lprocfs_init_ldlm_stats(tmp->nid_ldlm_stats);
+
+	return lprocfs_register_stats(tmp->nid_proc, "ldlm_stats",
+				      tmp->nid_ldlm_stats);
+}
+
+#define OBD_CHECK_MD_OP(obd, op, err)			   \
+do {							    \
+	if (!OBT(obd) || !MDP((obd), op)) {		     \
+		if (err)					\
+			CERROR("md_" #op ": dev %s/%d no operation\n", \
+			       obd->obd_name, obd->obd_minor);  \
+		RETURN(err);				    \
+	}						       \
+} while (0)
+
+#define EXP_CHECK_MD_OP(exp, op)				\
+do {							    \
+	if ((exp) == NULL) {				    \
+		CERROR("obd_" #op ": NULL export\n");	   \
+		RETURN(-ENODEV);				\
+	}						       \
+	if ((exp)->exp_obd == NULL || !OBT((exp)->exp_obd)) {   \
+		CERROR("obd_" #op ": cleaned up obd\n");	\
+		RETURN(-EOPNOTSUPP);			    \
+	}						       \
+	if (!OBT((exp)->exp_obd) || !MDP((exp)->exp_obd, op)) { \
+		CERROR("obd_" #op ": dev %s/%d no operation\n", \
+		       (exp)->exp_obd->obd_name,		\
+		       (exp)->exp_obd->obd_minor);	      \
+		RETURN(-EOPNOTSUPP);			    \
+	}						       \
+} while (0)
+
+
+#define OBD_CHECK_DT_OP(obd, op, err)			   \
+do {							    \
+	if (!OBT(obd) || !OBP((obd), op)) {		     \
+		if (err)					\
+			CERROR("obd_" #op ": dev %d no operation\n",    \
+			       obd->obd_minor);		 \
+		RETURN(err);				    \
+	}						       \
+} while (0)
+
+#define EXP_CHECK_DT_OP(exp, op)				\
+do {							    \
+	if ((exp) == NULL) {				    \
+		CERROR("obd_" #op ": NULL export\n");	   \
+		RETURN(-ENODEV);				\
+	}						       \
+	if ((exp)->exp_obd == NULL || !OBT((exp)->exp_obd)) {   \
+		CERROR("obd_" #op ": cleaned up obd\n");	\
+		RETURN(-EOPNOTSUPP);			    \
+	}						       \
+	if (!OBT((exp)->exp_obd) || !OBP((exp)->exp_obd, op)) { \
+		CERROR("obd_" #op ": dev %d no operation\n",    \
+		       (exp)->exp_obd->obd_minor);	      \
+		RETURN(-EOPNOTSUPP);			    \
+	}						       \
+} while (0)
+
+#define CTXT_CHECK_OP(ctxt, op, err)				 \
+do {								 \
+	if (!OBT(ctxt->loc_obd) || !CTXTP((ctxt), op)) {	     \
+		if (err)					     \
+			CERROR("lop_" #op ": dev %d no operation\n", \
+			       ctxt->loc_obd->obd_minor);	    \
+		RETURN(err);					 \
+	}							    \
+} while (0)
+
+static inline int class_devno_max(void)
+{
+	return MAX_OBD_DEVICES;
+}
+
+static inline int obd_get_info(const struct lu_env *env,
+			       struct obd_export *exp, __u32 keylen,
+			       void *key, __u32 *vallen, void *val,
+			       struct lov_stripe_md *lsm)
+{
+	int rc;
+	ENTRY;
+
+	EXP_CHECK_DT_OP(exp, get_info);
+	EXP_COUNTER_INCREMENT(exp, get_info);
+
+	rc = OBP(exp->exp_obd, get_info)(env, exp, keylen, key, vallen, val,
+					 lsm);
+	RETURN(rc);
+}
+
+static inline int obd_set_info_async(const struct lu_env *env,
+				     struct obd_export *exp, obd_count keylen,
+				     void *key, obd_count vallen, void *val,
+				     struct ptlrpc_request_set *set)
+{
+	int rc;
+	ENTRY;
+
+	EXP_CHECK_DT_OP(exp, set_info_async);
+	EXP_COUNTER_INCREMENT(exp, set_info_async);
+
+	rc = OBP(exp->exp_obd, set_info_async)(env, exp, keylen, key, vallen,
+					       val, set);
+	RETURN(rc);
+}
+
+/*
+ * obd-lu integration.
+ *
+ * Functionality is being moved into new lu_device-based layering, but some
+ * pieces of configuration process are still based on obd devices.
+ *
+ * Specifically, lu_device_type_operations::ldto_device_alloc() methods fully
+ * subsume ->o_setup() methods of obd devices they replace. The same for
+ * lu_device_operations::ldo_process_config() and ->o_process_config(). As a
+ * result, obd_setup() and obd_process_config() branch and call one XOR
+ * another.
+ *
+ * Yet neither lu_device_type_operations::ldto_device_fini() nor
+ * lu_device_type_operations::ldto_device_free() fully implement the
+ * functionality of ->o_precleanup() and ->o_cleanup() they override. Hence,
+ * obd_precleanup() and obd_cleanup() call both lu_device and obd operations.
+ */
+
+#define DECLARE_LU_VARS(ldt, d)		 \
+	struct lu_device_type *ldt;       \
+	struct lu_device *d
+
+static inline int obd_setup(struct obd_device *obd, struct lustre_cfg *cfg)
+{
+	int rc;
+	DECLARE_LU_VARS(ldt, d);
+	ENTRY;
+
+	ldt = obd->obd_type->typ_lu;
+	if (ldt != NULL) {
+		struct lu_context  session_ctx;
+		struct lu_env env;
+		lu_context_init(&session_ctx, LCT_SESSION);
+		session_ctx.lc_thread = NULL;
+		lu_context_enter(&session_ctx);
+
+		rc = lu_env_init(&env, ldt->ldt_ctx_tags);
+		if (rc == 0) {
+			env.le_ses = &session_ctx;
+			d = ldt->ldt_ops->ldto_device_alloc(&env, ldt, cfg);
+			lu_env_fini(&env);
+			if (!IS_ERR(d)) {
+				obd->obd_lu_dev = d;
+				d->ld_obd = obd;
+				rc = 0;
+			} else
+				rc = PTR_ERR(d);
+		}
+		lu_context_exit(&session_ctx);
+		lu_context_fini(&session_ctx);
+
+	} else {
+		OBD_CHECK_DT_OP(obd, setup, -EOPNOTSUPP);
+		OBD_COUNTER_INCREMENT(obd, setup);
+		rc = OBP(obd, setup)(obd, cfg);
+	}
+	RETURN(rc);
+}
+
+static inline int obd_precleanup(struct obd_device *obd,
+				 enum obd_cleanup_stage cleanup_stage)
+{
+	int rc;
+	DECLARE_LU_VARS(ldt, d);
+	ENTRY;
+
+	OBD_CHECK_DEV(obd);
+	ldt = obd->obd_type->typ_lu;
+	d = obd->obd_lu_dev;
+	if (ldt != NULL && d != NULL) {
+		if (cleanup_stage == OBD_CLEANUP_EXPORTS) {
+			struct lu_env env;
+
+			rc = lu_env_init(&env, ldt->ldt_ctx_tags);
+			if (rc == 0) {
+				ldt->ldt_ops->ldto_device_fini(&env, d);
+				lu_env_fini(&env);
+			}
+		}
+	}
+	OBD_CHECK_DT_OP(obd, precleanup, 0);
+	OBD_COUNTER_INCREMENT(obd, precleanup);
+
+	rc = OBP(obd, precleanup)(obd, cleanup_stage);
+	RETURN(rc);
+}
+
+static inline int obd_cleanup(struct obd_device *obd)
+{
+	int rc;
+	DECLARE_LU_VARS(ldt, d);
+	ENTRY;
+
+	OBD_CHECK_DEV(obd);
+
+	ldt = obd->obd_type->typ_lu;
+	d = obd->obd_lu_dev;
+	if (ldt != NULL && d != NULL) {
+		struct lu_env env;
+
+		rc = lu_env_init(&env, ldt->ldt_ctx_tags);
+		if (rc == 0) {
+			ldt->ldt_ops->ldto_device_free(&env, d);
+			lu_env_fini(&env);
+			obd->obd_lu_dev = NULL;
+		}
+	}
+	OBD_CHECK_DT_OP(obd, cleanup, 0);
+	OBD_COUNTER_INCREMENT(obd, cleanup);
+
+	rc = OBP(obd, cleanup)(obd);
+	RETURN(rc);
+}
+
+static inline void obd_cleanup_client_import(struct obd_device *obd)
+{
+	ENTRY;
+
+	/* If we set up but never connected, the
+	   client import will not have been cleaned. */
+	down_write(&obd->u.cli.cl_sem);
+	if (obd->u.cli.cl_import) {
+		struct obd_import *imp;
+		imp = obd->u.cli.cl_import;
+		CDEBUG(D_CONFIG, "%s: client import never connected\n",
+		       obd->obd_name);
+		ptlrpc_invalidate_import(imp);
+		if (imp->imp_rq_pool) {
+			ptlrpc_free_rq_pool(imp->imp_rq_pool);
+			imp->imp_rq_pool = NULL;
+		}
+		client_destroy_import(imp);
+		obd->u.cli.cl_import = NULL;
+	}
+	up_write(&obd->u.cli.cl_sem);
+
+	EXIT;
+}
+
+static inline int
+obd_process_config(struct obd_device *obd, int datalen, void *data)
+{
+	int rc;
+	DECLARE_LU_VARS(ldt, d);
+	ENTRY;
+
+	OBD_CHECK_DEV(obd);
+
+	obd->obd_process_conf = 1;
+	ldt = obd->obd_type->typ_lu;
+	d = obd->obd_lu_dev;
+	if (ldt != NULL && d != NULL) {
+		struct lu_env env;
+
+		rc = lu_env_init(&env, ldt->ldt_ctx_tags);
+		if (rc == 0) {
+			rc = d->ld_ops->ldo_process_config(&env, d, data);
+			lu_env_fini(&env);
+		}
+	} else {
+		OBD_CHECK_DT_OP(obd, process_config, -EOPNOTSUPP);
+		rc = OBP(obd, process_config)(obd, datalen, data);
+	}
+	OBD_COUNTER_INCREMENT(obd, process_config);
+	obd->obd_process_conf = 0;
+
+	RETURN(rc);
+}
+
+/* Pack an in-memory MD struct for storage on disk.
+ * Returns +ve size of packed MD (0 for free), or -ve error.
+ *
+ * If @disk_tgt == NULL, MD size is returned (max size if @mem_src == NULL).
+ * If @*disk_tgt != NULL and @mem_src == NULL, @*disk_tgt will be freed.
+ * If @*disk_tgt == NULL, it will be allocated
+ */
+static inline int obd_packmd(struct obd_export *exp,
+			     struct lov_mds_md **disk_tgt,
+			     struct lov_stripe_md *mem_src)
+{
+	int rc;
+	ENTRY;
+
+	EXP_CHECK_DT_OP(exp, packmd);
+	EXP_COUNTER_INCREMENT(exp, packmd);
+
+	rc = OBP(exp->exp_obd, packmd)(exp, disk_tgt, mem_src);
+	RETURN(rc);
+}
+
+static inline int obd_size_diskmd(struct obd_export *exp,
+				  struct lov_stripe_md *mem_src)
+{
+	return obd_packmd(exp, NULL, mem_src);
+}
+
+/* helper functions */
+static inline int obd_alloc_diskmd(struct obd_export *exp,
+				   struct lov_mds_md **disk_tgt)
+{
+	LASSERT(disk_tgt);
+	LASSERT(*disk_tgt == NULL);
+	return obd_packmd(exp, disk_tgt, NULL);
+}
+
+static inline int obd_free_diskmd(struct obd_export *exp,
+				  struct lov_mds_md **disk_tgt)
+{
+	LASSERT(disk_tgt);
+	LASSERT(*disk_tgt);
+	/*
+	 * LU-2590, for caller's convenience, *disk_tgt could be host
+	 * endianness, it needs swab to LE if necessary, while just
+	 * lov_mds_md header needs it for figuring out how much memory
+	 * needs to be freed.
+	 */
+	if ((cpu_to_le32(LOV_MAGIC) != LOV_MAGIC) &&
+	    (((*disk_tgt)->lmm_magic == LOV_MAGIC_V1) ||
+	     ((*disk_tgt)->lmm_magic == LOV_MAGIC_V3)))
+		lustre_swab_lov_mds_md(*disk_tgt);
+	return obd_packmd(exp, disk_tgt, NULL);
+}
+
+/* Unpack an MD struct from disk to in-memory format.
+ * Returns +ve size of unpacked MD (0 for free), or -ve error.
+ *
+ * If @mem_tgt == NULL, MD size is returned (max size if @disk_src == NULL).
+ * If @*mem_tgt != NULL and @disk_src == NULL, @*mem_tgt will be freed.
+ * If @*mem_tgt == NULL, it will be allocated
+ */
+static inline int obd_unpackmd(struct obd_export *exp,
+			       struct lov_stripe_md **mem_tgt,
+			       struct lov_mds_md *disk_src,
+			       int disk_len)
+{
+	int rc;
+	ENTRY;
+
+	EXP_CHECK_DT_OP(exp, unpackmd);
+	EXP_COUNTER_INCREMENT(exp, unpackmd);
+
+	rc = OBP(exp->exp_obd, unpackmd)(exp, mem_tgt, disk_src, disk_len);
+	RETURN(rc);
+}
+
+/* helper functions */
+static inline int obd_alloc_memmd(struct obd_export *exp,
+				  struct lov_stripe_md **mem_tgt)
+{
+	LASSERT(mem_tgt);
+	LASSERT(*mem_tgt == NULL);
+	return obd_unpackmd(exp, mem_tgt, NULL, 0);
+}
+
+static inline int obd_free_memmd(struct obd_export *exp,
+				 struct lov_stripe_md **mem_tgt)
+{
+	int rc;
+
+	LASSERT(mem_tgt);
+	LASSERT(*mem_tgt);
+	rc = obd_unpackmd(exp, mem_tgt, NULL, 0);
+	*mem_tgt = NULL;
+	return rc;
+}
+
+static inline int obd_precreate(struct obd_export *exp)
+{
+	int rc;
+	ENTRY;
+
+	EXP_CHECK_DT_OP(exp, precreate);
+	OBD_COUNTER_INCREMENT(exp->exp_obd, precreate);
+
+	rc = OBP(exp->exp_obd, precreate)(exp);
+	RETURN(rc);
+}
+
+static inline int obd_create_async(struct obd_export *exp,
+				   struct obd_info *oinfo,
+				   struct lov_stripe_md **ea,
+				   struct obd_trans_info *oti)
+{
+	int rc;
+	ENTRY;
+
+	EXP_CHECK_DT_OP(exp, create_async);
+	EXP_COUNTER_INCREMENT(exp, create_async);
+
+	rc = OBP(exp->exp_obd, create_async)(exp, oinfo, ea, oti);
+	RETURN(rc);
+}
+
+static inline int obd_create(const struct lu_env *env, struct obd_export *exp,
+			     struct obdo *obdo, struct lov_stripe_md **ea,
+			     struct obd_trans_info *oti)
+{
+	int rc;
+	ENTRY;
+
+	EXP_CHECK_DT_OP(exp, create);
+	EXP_COUNTER_INCREMENT(exp, create);
+
+	rc = OBP(exp->exp_obd, create)(env, exp, obdo, ea, oti);
+	RETURN(rc);
+}
+
+static inline int obd_destroy(const struct lu_env *env, struct obd_export *exp,
+			      struct obdo *obdo, struct lov_stripe_md *ea,
+			      struct obd_trans_info *oti,
+			      struct obd_export *md_exp, void *capa)
+{
+	int rc;
+	ENTRY;
+
+	EXP_CHECK_DT_OP(exp, destroy);
+	EXP_COUNTER_INCREMENT(exp, destroy);
+
+	rc = OBP(exp->exp_obd, destroy)(env, exp, obdo, ea, oti, md_exp, capa);
+	RETURN(rc);
+}
+
+static inline int obd_getattr(const struct lu_env *env, struct obd_export *exp,
+			      struct obd_info *oinfo)
+{
+	int rc;
+	ENTRY;
+
+	EXP_CHECK_DT_OP(exp, getattr);
+	EXP_COUNTER_INCREMENT(exp, getattr);
+
+	rc = OBP(exp->exp_obd, getattr)(env, exp, oinfo);
+	RETURN(rc);
+}
+
+static inline int obd_getattr_async(struct obd_export *exp,
+				    struct obd_info *oinfo,
+				    struct ptlrpc_request_set *set)
+{
+	int rc;
+	ENTRY;
+
+	EXP_CHECK_DT_OP(exp, getattr_async);
+	EXP_COUNTER_INCREMENT(exp, getattr_async);
+
+	rc = OBP(exp->exp_obd, getattr_async)(exp, oinfo, set);
+	RETURN(rc);
+}
+
+static inline int obd_setattr(const struct lu_env *env, struct obd_export *exp,
+			      struct obd_info *oinfo,
+			      struct obd_trans_info *oti)
+{
+	int rc;
+	ENTRY;
+
+	EXP_CHECK_DT_OP(exp, setattr);
+	EXP_COUNTER_INCREMENT(exp, setattr);
+
+	rc = OBP(exp->exp_obd, setattr)(env, exp, oinfo, oti);
+	RETURN(rc);
+}
+
+/* This performs all the requests set init/wait/destroy actions. */
+static inline int obd_setattr_rqset(struct obd_export *exp,
+				    struct obd_info *oinfo,
+				    struct obd_trans_info *oti)
+{
+	struct ptlrpc_request_set *set = NULL;
+	int rc;
+	ENTRY;
+
+	EXP_CHECK_DT_OP(exp, setattr_async);
+	EXP_COUNTER_INCREMENT(exp, setattr_async);
+
+	set =  ptlrpc_prep_set();
+	if (set == NULL)
+		RETURN(-ENOMEM);
+
+	rc = OBP(exp->exp_obd, setattr_async)(exp, oinfo, oti, set);
+	if (rc == 0)
+		rc = ptlrpc_set_wait(set);
+	ptlrpc_set_destroy(set);
+	RETURN(rc);
+}
+
+/* This adds all the requests into @set if @set != NULL, otherwise
+   all requests are sent asynchronously without waiting for response. */
+static inline int obd_setattr_async(struct obd_export *exp,
+				    struct obd_info *oinfo,
+				    struct obd_trans_info *oti,
+				    struct ptlrpc_request_set *set)
+{
+	int rc;
+	ENTRY;
+
+	EXP_CHECK_DT_OP(exp, setattr_async);
+	EXP_COUNTER_INCREMENT(exp, setattr_async);
+
+	rc = OBP(exp->exp_obd, setattr_async)(exp, oinfo, oti, set);
+	RETURN(rc);
+}
+
+static inline int obd_add_conn(struct obd_import *imp, struct obd_uuid *uuid,
+			       int priority)
+{
+	struct obd_device *obd = imp->imp_obd;
+	int rc;
+	ENTRY;
+
+	OBD_CHECK_DEV_ACTIVE(obd);
+	OBD_CHECK_DT_OP(obd, add_conn, -EOPNOTSUPP);
+	OBD_COUNTER_INCREMENT(obd, add_conn);
+
+	rc = OBP(obd, add_conn)(imp, uuid, priority);
+	RETURN(rc);
+}
+
+static inline int obd_del_conn(struct obd_import *imp, struct obd_uuid *uuid)
+{
+	struct obd_device *obd = imp->imp_obd;
+	int rc;
+	ENTRY;
+
+	OBD_CHECK_DEV_ACTIVE(obd);
+	OBD_CHECK_DT_OP(obd, del_conn, -EOPNOTSUPP);
+	OBD_COUNTER_INCREMENT(obd, del_conn);
+
+	rc = OBP(obd, del_conn)(imp, uuid);
+	RETURN(rc);
+}
+
+static inline struct obd_uuid *obd_get_uuid(struct obd_export *exp)
+{
+	struct obd_uuid *uuid;
+	ENTRY;
+
+	OBD_CHECK_DT_OP(exp->exp_obd, get_uuid, NULL);
+	EXP_COUNTER_INCREMENT(exp, get_uuid);
+
+	uuid = OBP(exp->exp_obd, get_uuid)(exp);
+	RETURN(uuid);
+}
+
+/** Create a new /a exp on device /a obd for the uuid /a cluuid
+ * @param exp New export handle
+ * @param d Connect data, supported flags are set, flags also understood
+ *    by obd are returned.
+ */
+static inline int obd_connect(const struct lu_env *env,
+			      struct obd_export **exp,struct obd_device *obd,
+			      struct obd_uuid *cluuid,
+			      struct obd_connect_data *data,
+			      void *localdata)
+{
+	int rc;
+	__u64 ocf = data ? data->ocd_connect_flags : 0; /* for post-condition
+						   * check */
+	ENTRY;
+
+	OBD_CHECK_DEV_ACTIVE(obd);
+	OBD_CHECK_DT_OP(obd, connect, -EOPNOTSUPP);
+	OBD_COUNTER_INCREMENT(obd, connect);
+
+	rc = OBP(obd, connect)(env, exp, obd, cluuid, data, localdata);
+	/* check that only subset is granted */
+	LASSERT(ergo(data != NULL, (data->ocd_connect_flags & ocf) ==
+				    data->ocd_connect_flags));
+	RETURN(rc);
+}
+
+static inline int obd_reconnect(const struct lu_env *env,
+				struct obd_export *exp,
+				struct obd_device *obd,
+				struct obd_uuid *cluuid,
+				struct obd_connect_data *d,
+				void *localdata)
+{
+	int rc;
+	__u64 ocf = d ? d->ocd_connect_flags : 0; /* for post-condition
+						   * check */
+
+	ENTRY;
+
+	OBD_CHECK_DEV_ACTIVE(obd);
+	OBD_CHECK_DT_OP(obd, reconnect, 0);
+	OBD_COUNTER_INCREMENT(obd, reconnect);
+
+	rc = OBP(obd, reconnect)(env, exp, obd, cluuid, d, localdata);
+	/* check that only subset is granted */
+	LASSERT(ergo(d != NULL,
+		     (d->ocd_connect_flags & ocf) == d->ocd_connect_flags));
+	RETURN(rc);
+}
+
+static inline int obd_disconnect(struct obd_export *exp)
+{
+	int rc;
+	ENTRY;
+
+	EXP_CHECK_DT_OP(exp, disconnect);
+	EXP_COUNTER_INCREMENT(exp, disconnect);
+
+	rc = OBP(exp->exp_obd, disconnect)(exp);
+	RETURN(rc);
+}
+
+static inline int obd_fid_init(struct obd_device *obd, struct obd_export *exp,
+			       enum lu_cli_type type)
+{
+	int rc;
+	ENTRY;
+
+	OBD_CHECK_DT_OP(obd, fid_init, 0);
+	OBD_COUNTER_INCREMENT(obd, fid_init);
+
+	rc = OBP(obd, fid_init)(obd, exp, type);
+	RETURN(rc);
+}
+
+static inline int obd_fid_fini(struct obd_device *obd)
+{
+	int rc;
+	ENTRY;
+
+	OBD_CHECK_DT_OP(obd, fid_fini, 0);
+	OBD_COUNTER_INCREMENT(obd, fid_fini);
+
+	rc = OBP(obd, fid_fini)(obd);
+	RETURN(rc);
+}
+
+static inline int obd_fid_alloc(struct obd_export *exp,
+				struct lu_fid *fid,
+				struct md_op_data *op_data)
+{
+	int rc;
+	ENTRY;
+
+	EXP_CHECK_DT_OP(exp, fid_alloc);
+	EXP_COUNTER_INCREMENT(exp, fid_alloc);
+
+	rc = OBP(exp->exp_obd, fid_alloc)(exp, fid, op_data);
+	RETURN(rc);
+}
+
+static inline int obd_ping(const struct lu_env *env, struct obd_export *exp)
+{
+	int rc;
+	ENTRY;
+
+	OBD_CHECK_DT_OP(exp->exp_obd, ping, 0);
+	EXP_COUNTER_INCREMENT(exp, ping);
+
+	rc = OBP(exp->exp_obd, ping)(env, exp);
+	RETURN(rc);
+}
+
+static inline int obd_pool_new(struct obd_device *obd, char *poolname)
+{
+	int rc;
+	ENTRY;
+
+	OBD_CHECK_DT_OP(obd, pool_new, -EOPNOTSUPP);
+	OBD_COUNTER_INCREMENT(obd, pool_new);
+
+	rc = OBP(obd, pool_new)(obd, poolname);
+	RETURN(rc);
+}
+
+static inline int obd_pool_del(struct obd_device *obd, char *poolname)
+{
+	int rc;
+	ENTRY;
+
+	OBD_CHECK_DT_OP(obd, pool_del, -EOPNOTSUPP);
+	OBD_COUNTER_INCREMENT(obd, pool_del);
+
+	rc = OBP(obd, pool_del)(obd, poolname);
+	RETURN(rc);
+}
+
+static inline int obd_pool_add(struct obd_device *obd, char *poolname, char *ostname)
+{
+	int rc;
+	ENTRY;
+
+	OBD_CHECK_DT_OP(obd, pool_add, -EOPNOTSUPP);
+	OBD_COUNTER_INCREMENT(obd, pool_add);
+
+	rc = OBP(obd, pool_add)(obd, poolname, ostname);
+	RETURN(rc);
+}
+
+static inline int obd_pool_rem(struct obd_device *obd, char *poolname, char *ostname)
+{
+	int rc;
+	ENTRY;
+
+	OBD_CHECK_DT_OP(obd, pool_rem, -EOPNOTSUPP);
+	OBD_COUNTER_INCREMENT(obd, pool_rem);
+
+	rc = OBP(obd, pool_rem)(obd, poolname, ostname);
+	RETURN(rc);
+}
+
+static inline void obd_getref(struct obd_device *obd)
+{
+	ENTRY;
+	if (OBT(obd) && OBP(obd, getref)) {
+		OBD_COUNTER_INCREMENT(obd, getref);
+		OBP(obd, getref)(obd);
+	}
+	EXIT;
+}
+
+static inline void obd_putref(struct obd_device *obd)
+{
+	ENTRY;
+	if (OBT(obd) && OBP(obd, putref)) {
+		OBD_COUNTER_INCREMENT(obd, putref);
+		OBP(obd, putref)(obd);
+	}
+	EXIT;
+}
+
+static inline int obd_init_export(struct obd_export *exp)
+{
+	int rc = 0;
+
+	ENTRY;
+	if ((exp)->exp_obd != NULL && OBT((exp)->exp_obd) &&
+	    OBP((exp)->exp_obd, init_export))
+		rc = OBP(exp->exp_obd, init_export)(exp);
+	RETURN(rc);
+}
+
+static inline int obd_destroy_export(struct obd_export *exp)
+{
+	ENTRY;
+	if ((exp)->exp_obd != NULL && OBT((exp)->exp_obd) &&
+	    OBP((exp)->exp_obd, destroy_export))
+		OBP(exp->exp_obd, destroy_export)(exp);
+	RETURN(0);
+}
+
+static inline int obd_extent_calc(struct obd_export *exp,
+				  struct lov_stripe_md *md,
+				  int cmd, obd_off *offset)
+{
+	int rc;
+	ENTRY;
+	EXP_CHECK_DT_OP(exp, extent_calc);
+	rc = OBP(exp->exp_obd, extent_calc)(exp, md, cmd, offset);
+	RETURN(rc);
+}
+
+static inline struct dentry *
+obd_lvfs_fid2dentry(struct obd_export *exp, struct ost_id *oi, __u32 gen)
+{
+	struct lvfs_run_ctxt *ctxt = &exp->exp_obd->obd_lvfs_ctxt;
+	LASSERT(exp->exp_obd);
+
+	return ctxt->cb_ops.l_fid2dentry(ostid_id(oi), gen, ostid_seq(oi),
+					 exp->exp_obd);
+}
+
+/* @max_age is the oldest time in jiffies that we accept using a cached data.
+ * If the cache is older than @max_age we will get a new value from the
+ * target.  Use a value of "cfs_time_current() + HZ" to guarantee freshness. */
+static inline int obd_statfs_async(struct obd_export *exp,
+				   struct obd_info *oinfo,
+				   __u64 max_age,
+				   struct ptlrpc_request_set *rqset)
+{
+	int rc = 0;
+	struct obd_device *obd;
+	ENTRY;
+
+	if (exp == NULL || exp->exp_obd == NULL)
+		RETURN(-EINVAL);
+
+	obd = exp->exp_obd;
+	OBD_CHECK_DT_OP(obd, statfs, -EOPNOTSUPP);
+	OBD_COUNTER_INCREMENT(obd, statfs);
+
+	CDEBUG(D_SUPER, "%s: osfs %p age "LPU64", max_age "LPU64"\n",
+	       obd->obd_name, &obd->obd_osfs, obd->obd_osfs_age, max_age);
+	if (cfs_time_before_64(obd->obd_osfs_age, max_age)) {
+		rc = OBP(obd, statfs_async)(exp, oinfo, max_age, rqset);
+	} else {
+		CDEBUG(D_SUPER,"%s: use %p cache blocks "LPU64"/"LPU64
+		       " objects "LPU64"/"LPU64"\n",
+		       obd->obd_name, &obd->obd_osfs,
+		       obd->obd_osfs.os_bavail, obd->obd_osfs.os_blocks,
+		       obd->obd_osfs.os_ffree, obd->obd_osfs.os_files);
+		spin_lock(&obd->obd_osfs_lock);
+		memcpy(oinfo->oi_osfs, &obd->obd_osfs, sizeof(*oinfo->oi_osfs));
+		spin_unlock(&obd->obd_osfs_lock);
+		oinfo->oi_flags |= OBD_STATFS_FROM_CACHE;
+		if (oinfo->oi_cb_up)
+			oinfo->oi_cb_up(oinfo, 0);
+	}
+	RETURN(rc);
+}
+
+static inline int obd_statfs_rqset(struct obd_export *exp,
+				   struct obd_statfs *osfs, __u64 max_age,
+				   __u32 flags)
+{
+	struct ptlrpc_request_set *set = NULL;
+	struct obd_info oinfo = { { { 0 } } };
+	int rc = 0;
+	ENTRY;
+
+	set =  ptlrpc_prep_set();
+	if (set == NULL)
+		RETURN(-ENOMEM);
+
+	oinfo.oi_osfs = osfs;
+	oinfo.oi_flags = flags;
+	rc = obd_statfs_async(exp, &oinfo, max_age, set);
+	if (rc == 0)
+		rc = ptlrpc_set_wait(set);
+	ptlrpc_set_destroy(set);
+	RETURN(rc);
+}
+
+/* @max_age is the oldest time in jiffies that we accept using a cached data.
+ * If the cache is older than @max_age we will get a new value from the
+ * target.  Use a value of "cfs_time_current() + HZ" to guarantee freshness. */
+static inline int obd_statfs(const struct lu_env *env, struct obd_export *exp,
+			     struct obd_statfs *osfs, __u64 max_age,
+			     __u32 flags)
+{
+	int rc = 0;
+	struct obd_device *obd = exp->exp_obd;
+	ENTRY;
+
+	if (obd == NULL)
+		RETURN(-EINVAL);
+
+	OBD_CHECK_DT_OP(obd, statfs, -EOPNOTSUPP);
+	OBD_COUNTER_INCREMENT(obd, statfs);
+
+	CDEBUG(D_SUPER, "osfs "LPU64", max_age "LPU64"\n",
+	       obd->obd_osfs_age, max_age);
+	if (cfs_time_before_64(obd->obd_osfs_age, max_age)) {
+		rc = OBP(obd, statfs)(env, exp, osfs, max_age, flags);
+		if (rc == 0) {
+			spin_lock(&obd->obd_osfs_lock);
+			memcpy(&obd->obd_osfs, osfs, sizeof(obd->obd_osfs));
+			obd->obd_osfs_age = cfs_time_current_64();
+			spin_unlock(&obd->obd_osfs_lock);
+		}
+	} else {
+		CDEBUG(D_SUPER, "%s: use %p cache blocks "LPU64"/"LPU64
+		       " objects "LPU64"/"LPU64"\n",
+		       obd->obd_name, &obd->obd_osfs,
+		       obd->obd_osfs.os_bavail, obd->obd_osfs.os_blocks,
+		       obd->obd_osfs.os_ffree, obd->obd_osfs.os_files);
+		spin_lock(&obd->obd_osfs_lock);
+		memcpy(osfs, &obd->obd_osfs, sizeof(*osfs));
+		spin_unlock(&obd->obd_osfs_lock);
+	}
+	RETURN(rc);
+}
+
+static inline int obd_sync_rqset(struct obd_export *exp, struct obd_info *oinfo,
+				 obd_size start, obd_size end)
+{
+	struct ptlrpc_request_set *set = NULL;
+	int rc;
+	ENTRY;
+
+	OBD_CHECK_DT_OP(exp->exp_obd, sync, -EOPNOTSUPP);
+	EXP_COUNTER_INCREMENT(exp, sync);
+
+	set =  ptlrpc_prep_set();
+	if (set == NULL)
+		RETURN(-ENOMEM);
+
+	rc = OBP(exp->exp_obd, sync)(NULL, exp, oinfo, start, end, set);
+	if (rc == 0)
+		rc = ptlrpc_set_wait(set);
+	ptlrpc_set_destroy(set);
+	RETURN(rc);
+}
+
+static inline int obd_sync(const struct lu_env *env, struct obd_export *exp,
+			   struct obd_info *oinfo, obd_size start, obd_size end,
+			   struct ptlrpc_request_set *set)
+{
+	int rc;
+	ENTRY;
+
+	OBD_CHECK_DT_OP(exp->exp_obd, sync, -EOPNOTSUPP);
+	EXP_COUNTER_INCREMENT(exp, sync);
+
+	rc = OBP(exp->exp_obd, sync)(env, exp, oinfo, start, end, set);
+	RETURN(rc);
+}
+
+static inline int obd_punch_rqset(struct obd_export *exp,
+				  struct obd_info *oinfo,
+				  struct obd_trans_info *oti)
+{
+	struct ptlrpc_request_set *set = NULL;
+	int rc;
+	ENTRY;
+
+	EXP_CHECK_DT_OP(exp, punch);
+	EXP_COUNTER_INCREMENT(exp, punch);
+
+	set =  ptlrpc_prep_set();
+	if (set == NULL)
+		RETURN(-ENOMEM);
+
+	rc = OBP(exp->exp_obd, punch)(NULL, exp, oinfo, oti, set);
+	if (rc == 0)
+		rc = ptlrpc_set_wait(set);
+	ptlrpc_set_destroy(set);
+	RETURN(rc);
+}
+
+static inline int obd_punch(const struct lu_env *env, struct obd_export *exp,
+			    struct obd_info *oinfo, struct obd_trans_info *oti,
+			    struct ptlrpc_request_set *rqset)
+{
+	int rc;
+	ENTRY;
+
+	EXP_CHECK_DT_OP(exp, punch);
+	EXP_COUNTER_INCREMENT(exp, punch);
+
+	rc = OBP(exp->exp_obd, punch)(env, exp, oinfo, oti, rqset);
+	RETURN(rc);
+}
+
+static inline int obd_brw(int cmd, struct obd_export *exp,
+			  struct obd_info *oinfo, obd_count oa_bufs,
+			  struct brw_page *pg, struct obd_trans_info *oti)
+{
+	int rc;
+	ENTRY;
+
+	EXP_CHECK_DT_OP(exp, brw);
+	EXP_COUNTER_INCREMENT(exp, brw);
+
+	if (!(cmd & (OBD_BRW_RWMASK | OBD_BRW_CHECK))) {
+		CERROR("obd_brw: cmd must be OBD_BRW_READ, OBD_BRW_WRITE, "
+		       "or OBD_BRW_CHECK\n");
+		LBUG();
+	}
+
+	rc = OBP(exp->exp_obd, brw)(cmd, exp, oinfo, oa_bufs, pg, oti);
+	RETURN(rc);
+}
+
+static inline int obd_preprw(const struct lu_env *env, int cmd,
+			     struct obd_export *exp, struct obdo *oa,
+			     int objcount, struct obd_ioobj *obj,
+			     struct niobuf_remote *remote, int *pages,
+			     struct niobuf_local *local,
+			     struct obd_trans_info *oti,
+			     struct lustre_capa *capa)
+{
+	int rc;
+	ENTRY;
+
+	EXP_CHECK_DT_OP(exp, preprw);
+	EXP_COUNTER_INCREMENT(exp, preprw);
+
+	rc = OBP(exp->exp_obd, preprw)(env, cmd, exp, oa, objcount, obj, remote,
+				       pages, local, oti, capa);
+	RETURN(rc);
+}
+
+static inline int obd_commitrw(const struct lu_env *env, int cmd,
+			       struct obd_export *exp, struct obdo *oa,
+			       int objcount, struct obd_ioobj *obj,
+			       struct niobuf_remote *rnb, int pages,
+			       struct niobuf_local *local,
+			       struct obd_trans_info *oti, int rc)
+{
+	ENTRY;
+
+	EXP_CHECK_DT_OP(exp, commitrw);
+	EXP_COUNTER_INCREMENT(exp, commitrw);
+
+	rc = OBP(exp->exp_obd, commitrw)(env, cmd, exp, oa, objcount, obj,
+					 rnb, pages, local, oti, rc);
+	RETURN(rc);
+}
+
+static inline int obd_merge_lvb(struct obd_export *exp,
+				struct lov_stripe_md *lsm,
+				struct ost_lvb *lvb, int kms_only)
+{
+	int rc;
+	ENTRY;
+
+	EXP_CHECK_DT_OP(exp, merge_lvb);
+	EXP_COUNTER_INCREMENT(exp, merge_lvb);
+
+	rc = OBP(exp->exp_obd, merge_lvb)(exp, lsm, lvb, kms_only);
+	RETURN(rc);
+}
+
+static inline int obd_adjust_kms(struct obd_export *exp,
+				 struct lov_stripe_md *lsm, obd_off size,
+				 int shrink)
+{
+	int rc;
+	ENTRY;
+
+	EXP_CHECK_DT_OP(exp, adjust_kms);
+	EXP_COUNTER_INCREMENT(exp, adjust_kms);
+
+	rc = OBP(exp->exp_obd, adjust_kms)(exp, lsm, size, shrink);
+	RETURN(rc);
+}
+
+static inline int obd_iocontrol(unsigned int cmd, struct obd_export *exp,
+				int len, void *karg, void *uarg)
+{
+	int rc;
+	ENTRY;
+
+	EXP_CHECK_DT_OP(exp, iocontrol);
+	EXP_COUNTER_INCREMENT(exp, iocontrol);
+
+	rc = OBP(exp->exp_obd, iocontrol)(cmd, exp, len, karg, uarg);
+	RETURN(rc);
+}
+
+static inline int obd_enqueue_rqset(struct obd_export *exp,
+				    struct obd_info *oinfo,
+				    struct ldlm_enqueue_info *einfo)
+{
+	struct ptlrpc_request_set *set = NULL;
+	int rc;
+	ENTRY;
+
+	EXP_CHECK_DT_OP(exp, enqueue);
+	EXP_COUNTER_INCREMENT(exp, enqueue);
+
+	set =  ptlrpc_prep_set();
+	if (set == NULL)
+		RETURN(-ENOMEM);
+
+	rc = OBP(exp->exp_obd, enqueue)(exp, oinfo, einfo, set);
+	if (rc == 0)
+		rc = ptlrpc_set_wait(set);
+	ptlrpc_set_destroy(set);
+	RETURN(rc);
+}
+
+static inline int obd_enqueue(struct obd_export *exp,
+			      struct obd_info *oinfo,
+			      struct ldlm_enqueue_info *einfo,
+			      struct ptlrpc_request_set *set)
+{
+	int rc;
+	ENTRY;
+
+	EXP_CHECK_DT_OP(exp, enqueue);
+	EXP_COUNTER_INCREMENT(exp, enqueue);
+
+	rc = OBP(exp->exp_obd, enqueue)(exp, oinfo, einfo, set);
+	RETURN(rc);
+}
+
+static inline int obd_change_cbdata(struct obd_export *exp,
+				    struct lov_stripe_md *lsm,
+				    ldlm_iterator_t it, void *data)
+{
+	int rc;
+	ENTRY;
+
+	EXP_CHECK_DT_OP(exp, change_cbdata);
+	EXP_COUNTER_INCREMENT(exp, change_cbdata);
+
+	rc = OBP(exp->exp_obd, change_cbdata)(exp, lsm, it, data);
+	RETURN(rc);
+}
+
+static inline int obd_find_cbdata(struct obd_export *exp,
+				  struct lov_stripe_md *lsm,
+				  ldlm_iterator_t it, void *data)
+{
+	int rc;
+	ENTRY;
+
+	EXP_CHECK_DT_OP(exp, find_cbdata);
+	EXP_COUNTER_INCREMENT(exp, find_cbdata);
+
+	rc = OBP(exp->exp_obd, find_cbdata)(exp, lsm, it, data);
+	RETURN(rc);
+}
+
+static inline int obd_cancel(struct obd_export *exp,
+			     struct lov_stripe_md *ea, __u32 mode,
+			     struct lustre_handle *lockh)
+{
+	int rc;
+	ENTRY;
+
+	EXP_CHECK_DT_OP(exp, cancel);
+	EXP_COUNTER_INCREMENT(exp, cancel);
+
+	rc = OBP(exp->exp_obd, cancel)(exp, ea, mode, lockh);
+	RETURN(rc);
+}
+
+static inline int obd_cancel_unused(struct obd_export *exp,
+				    struct lov_stripe_md *ea,
+				    ldlm_cancel_flags_t flags,
+				    void *opaque)
+{
+	int rc;
+	ENTRY;
+
+	EXP_CHECK_DT_OP(exp, cancel_unused);
+	EXP_COUNTER_INCREMENT(exp, cancel_unused);
+
+	rc = OBP(exp->exp_obd, cancel_unused)(exp, ea, flags, opaque);
+	RETURN(rc);
+}
+
+static inline int obd_pin(struct obd_export *exp, const struct lu_fid *fid,
+			  struct obd_capa *oc, struct obd_client_handle *handle,
+			  int flag)
+{
+	int rc;
+	ENTRY;
+
+	EXP_CHECK_DT_OP(exp, pin);
+	EXP_COUNTER_INCREMENT(exp, pin);
+
+	rc = OBP(exp->exp_obd, pin)(exp, fid, oc, handle, flag);
+	RETURN(rc);
+}
+
+static inline int obd_unpin(struct obd_export *exp,
+			    struct obd_client_handle *handle, int flag)
+{
+	int rc;
+	ENTRY;
+
+	EXP_CHECK_DT_OP(exp, unpin);
+	EXP_COUNTER_INCREMENT(exp, unpin);
+
+	rc = OBP(exp->exp_obd, unpin)(exp, handle, flag);
+	RETURN(rc);
+}
+
+
+static inline void obd_import_event(struct obd_device *obd,
+				    struct obd_import *imp,
+				    enum obd_import_event event)
+{
+	ENTRY;
+	if (!obd) {
+		CERROR("NULL device\n");
+		EXIT;
+		return;
+	}
+	if (obd->obd_set_up && OBP(obd, import_event)) {
+		OBD_COUNTER_INCREMENT(obd, import_event);
+		OBP(obd, import_event)(obd, imp, event);
+	}
+	EXIT;
+}
+
+static inline int obd_llog_connect(struct obd_export *exp,
+				   struct llogd_conn_body *body)
+{
+	int rc;
+	ENTRY;
+
+	OBD_CHECK_DT_OP(exp->exp_obd, llog_connect, 0);
+	EXP_COUNTER_INCREMENT(exp, llog_connect);
+
+	rc = OBP(exp->exp_obd, llog_connect)(exp, body);
+	RETURN(rc);
+}
+
+
+static inline int obd_notify(struct obd_device *obd,
+			     struct obd_device *watched,
+			     enum obd_notify_event ev,
+			     void *data)
+{
+	int rc;
+	ENTRY;
+	OBD_CHECK_DEV(obd);
+
+	/* the check for async_recov is a complete hack - I'm hereby
+	   overloading the meaning to also mean "this was called from
+	   mds_postsetup".  I know that my mds is able to handle notifies
+	   by this point, and it needs to get them to execute mds_postrecov. */
+	if (!obd->obd_set_up && !obd->obd_async_recov) {
+		CDEBUG(D_HA, "obd %s not set up\n", obd->obd_name);
+		RETURN(-EINVAL);
+	}
+
+	if (!OBP(obd, notify)) {
+		CDEBUG(D_HA, "obd %s has no notify handler\n", obd->obd_name);
+		RETURN(-ENOSYS);
+	}
+
+	OBD_COUNTER_INCREMENT(obd, notify);
+	rc = OBP(obd, notify)(obd, watched, ev, data);
+	RETURN(rc);
+}
+
+static inline int obd_notify_observer(struct obd_device *observer,
+				      struct obd_device *observed,
+				      enum obd_notify_event ev,
+				      void *data)
+{
+	int rc1;
+	int rc2;
+
+	struct obd_notify_upcall *onu;
+
+	if (observer->obd_observer)
+		rc1 = obd_notify(observer->obd_observer, observed, ev, data);
+	else
+		rc1 = 0;
+	/*
+	 * Also, call non-obd listener, if any
+	 */
+	onu = &observer->obd_upcall;
+	if (onu->onu_upcall != NULL)
+		rc2 = onu->onu_upcall(observer, observed, ev,
+				      onu->onu_owner, NULL);
+	else
+		rc2 = 0;
+
+	return rc1 ? rc1 : rc2;
+}
+
+static inline int obd_quotacheck(struct obd_export *exp,
+				 struct obd_quotactl *oqctl)
+{
+	int rc;
+	ENTRY;
+
+	EXP_CHECK_DT_OP(exp, quotacheck);
+	EXP_COUNTER_INCREMENT(exp, quotacheck);
+
+	rc = OBP(exp->exp_obd, quotacheck)(exp->exp_obd, exp, oqctl);
+	RETURN(rc);
+}
+
+static inline int obd_quotactl(struct obd_export *exp,
+			       struct obd_quotactl *oqctl)
+{
+	int rc;
+	ENTRY;
+
+	EXP_CHECK_DT_OP(exp, quotactl);
+	EXP_COUNTER_INCREMENT(exp, quotactl);
+
+	rc = OBP(exp->exp_obd, quotactl)(exp->exp_obd, exp, oqctl);
+	RETURN(rc);
+}
+
+static inline int obd_health_check(const struct lu_env *env,
+				   struct obd_device *obd)
+{
+	/* returns: 0 on healthy
+	 *	 >0 on unhealthy + reason code/flag
+	 *	    however the only suppored reason == 1 right now
+	 *	    We'll need to define some better reasons
+	 *	    or flags in the future.
+	 *	 <0 on error
+	 */
+	int rc;
+	ENTRY;
+
+	/* don't use EXP_CHECK_DT_OP, because NULL method is normal here */
+	if (obd == NULL || !OBT(obd)) {
+		CERROR("cleaned up obd\n");
+		RETURN(-EOPNOTSUPP);
+	}
+	if (!obd->obd_set_up || obd->obd_stopping)
+		RETURN(0);
+	if (!OBP(obd, health_check))
+		RETURN(0);
+
+	rc = OBP(obd, health_check)(env, obd);
+	RETURN(rc);
+}
+
+static inline int obd_register_observer(struct obd_device *obd,
+					struct obd_device *observer)
+{
+	ENTRY;
+	OBD_CHECK_DEV(obd);
+	down_write(&obd->obd_observer_link_sem);
+	if (obd->obd_observer && observer) {
+		up_write(&obd->obd_observer_link_sem);
+		RETURN(-EALREADY);
+	}
+	obd->obd_observer = observer;
+	up_write(&obd->obd_observer_link_sem);
+	RETURN(0);
+}
+
+static inline int obd_pin_observer(struct obd_device *obd,
+				   struct obd_device **observer)
+{
+	ENTRY;
+	down_read(&obd->obd_observer_link_sem);
+	if (!obd->obd_observer) {
+		*observer = NULL;
+		up_read(&obd->obd_observer_link_sem);
+		RETURN(-ENOENT);
+	}
+	*observer = obd->obd_observer;
+	RETURN(0);
+}
+
+static inline int obd_unpin_observer(struct obd_device *obd)
+{
+	ENTRY;
+	up_read(&obd->obd_observer_link_sem);
+	RETURN(0);
+}
+
+#if 0
+static inline int obd_register_page_removal_cb(struct obd_export *exp,
+					       obd_page_removal_cb_t cb,
+					       obd_pin_extent_cb pin_cb)
+{
+	int rc;
+	ENTRY;
+
+	OBD_CHECK_DT_OP(exp->exp_obd, register_page_removal_cb, 0);
+	OBD_COUNTER_INCREMENT(exp->exp_obd, register_page_removal_cb);
+
+	rc = OBP(exp->exp_obd, register_page_removal_cb)(exp, cb, pin_cb);
+	RETURN(rc);
+}
+
+static inline int obd_unregister_page_removal_cb(struct obd_export *exp,
+						 obd_page_removal_cb_t cb)
+{
+	int rc;
+	ENTRY;
+
+	OBD_CHECK_DT_OP(exp->exp_obd, unregister_page_removal_cb, 0);
+	OBD_COUNTER_INCREMENT(exp->exp_obd, unregister_page_removal_cb);
+
+	rc = OBP(exp->exp_obd, unregister_page_removal_cb)(exp, cb);
+	RETURN(rc);
+}
+
+static inline int obd_register_lock_cancel_cb(struct obd_export *exp,
+					      obd_lock_cancel_cb cb)
+{
+	int rc;
+	ENTRY;
+
+	OBD_CHECK_DT_OP(exp->exp_obd, register_lock_cancel_cb, 0);
+	OBD_COUNTER_INCREMENT(exp->exp_obd, register_lock_cancel_cb);
+
+	rc = OBP(exp->exp_obd, register_lock_cancel_cb)(exp, cb);
+	RETURN(rc);
+}
+
+static inline int obd_unregister_lock_cancel_cb(struct obd_export *exp,
+						 obd_lock_cancel_cb cb)
+{
+	int rc;
+	ENTRY;
+
+	OBD_CHECK_DT_OP(exp->exp_obd, unregister_lock_cancel_cb, 0);
+	OBD_COUNTER_INCREMENT(exp->exp_obd, unregister_lock_cancel_cb);
+
+	rc = OBP(exp->exp_obd, unregister_lock_cancel_cb)(exp, cb);
+	RETURN(rc);
+}
+#endif
+
+/* metadata helpers */
+static inline int md_getstatus(struct obd_export *exp,
+			       struct lu_fid *fid, struct obd_capa **pc)
+{
+	int rc;
+	ENTRY;
+
+	EXP_CHECK_MD_OP(exp, getstatus);
+	EXP_MD_COUNTER_INCREMENT(exp, getstatus);
+	rc = MDP(exp->exp_obd, getstatus)(exp, fid, pc);
+	RETURN(rc);
+}
+
+static inline int md_getattr(struct obd_export *exp, struct md_op_data *op_data,
+			     struct ptlrpc_request **request)
+{
+	int rc;
+	ENTRY;
+	EXP_CHECK_MD_OP(exp, getattr);
+	EXP_MD_COUNTER_INCREMENT(exp, getattr);
+	rc = MDP(exp->exp_obd, getattr)(exp, op_data, request);
+	RETURN(rc);
+}
+
+static inline int md_null_inode(struct obd_export *exp,
+				   const struct lu_fid *fid)
+{
+	int rc;
+	ENTRY;
+	EXP_CHECK_MD_OP(exp, null_inode);
+	EXP_MD_COUNTER_INCREMENT(exp, null_inode);
+	rc = MDP(exp->exp_obd, null_inode)(exp, fid);
+	RETURN(rc);
+}
+
+static inline int md_find_cbdata(struct obd_export *exp,
+				 const struct lu_fid *fid,
+				 ldlm_iterator_t it, void *data)
+{
+	int rc;
+	ENTRY;
+	EXP_CHECK_MD_OP(exp, find_cbdata);
+	EXP_MD_COUNTER_INCREMENT(exp, find_cbdata);
+	rc = MDP(exp->exp_obd, find_cbdata)(exp, fid, it, data);
+	RETURN(rc);
+}
+
+static inline int md_close(struct obd_export *exp, struct md_op_data *op_data,
+			   struct md_open_data *mod,
+			   struct ptlrpc_request **request)
+{
+	int rc;
+	ENTRY;
+	EXP_CHECK_MD_OP(exp, close);
+	EXP_MD_COUNTER_INCREMENT(exp, close);
+	rc = MDP(exp->exp_obd, close)(exp, op_data, mod, request);
+	RETURN(rc);
+}
+
+static inline int md_create(struct obd_export *exp, struct md_op_data *op_data,
+			    const void *data, int datalen, int mode, __u32 uid,
+			    __u32 gid, cfs_cap_t cap_effective, __u64 rdev,
+			    struct ptlrpc_request **request)
+{
+	int rc;
+	ENTRY;
+	EXP_CHECK_MD_OP(exp, create);
+	EXP_MD_COUNTER_INCREMENT(exp, create);
+	rc = MDP(exp->exp_obd, create)(exp, op_data, data, datalen, mode,
+				       uid, gid, cap_effective, rdev, request);
+	RETURN(rc);
+}
+
+static inline int md_done_writing(struct obd_export *exp,
+				  struct md_op_data *op_data,
+				  struct md_open_data *mod)
+{
+	int rc;
+	ENTRY;
+	EXP_CHECK_MD_OP(exp, done_writing);
+	EXP_MD_COUNTER_INCREMENT(exp, done_writing);
+	rc = MDP(exp->exp_obd, done_writing)(exp, op_data, mod);
+	RETURN(rc);
+}
+
+static inline int md_enqueue(struct obd_export *exp,
+			     struct ldlm_enqueue_info *einfo,
+			     struct lookup_intent *it,
+			     struct md_op_data *op_data,
+			     struct lustre_handle *lockh,
+			     void *lmm, int lmmsize,
+			     struct ptlrpc_request **req,
+			     int extra_lock_flags)
+{
+	int rc;
+	ENTRY;
+	EXP_CHECK_MD_OP(exp, enqueue);
+	EXP_MD_COUNTER_INCREMENT(exp, enqueue);
+	rc = MDP(exp->exp_obd, enqueue)(exp, einfo, it, op_data, lockh,
+					lmm, lmmsize, req, extra_lock_flags);
+	RETURN(rc);
+}
+
+static inline int md_getattr_name(struct obd_export *exp,
+				  struct md_op_data *op_data,
+				  struct ptlrpc_request **request)
+{
+	int rc;
+	ENTRY;
+	EXP_CHECK_MD_OP(exp, getattr_name);
+	EXP_MD_COUNTER_INCREMENT(exp, getattr_name);
+	rc = MDP(exp->exp_obd, getattr_name)(exp, op_data, request);
+	RETURN(rc);
+}
+
+static inline int md_intent_lock(struct obd_export *exp,
+				 struct md_op_data *op_data, void *lmm,
+				 int lmmsize, struct lookup_intent *it,
+				 int lookup_flags, struct ptlrpc_request **reqp,
+				 ldlm_blocking_callback cb_blocking,
+				 __u64 extra_lock_flags)
+{
+	int rc;
+	ENTRY;
+	EXP_CHECK_MD_OP(exp, intent_lock);
+	EXP_MD_COUNTER_INCREMENT(exp, intent_lock);
+	rc = MDP(exp->exp_obd, intent_lock)(exp, op_data, lmm, lmmsize,
+					    it, lookup_flags, reqp, cb_blocking,
+					    extra_lock_flags);
+	RETURN(rc);
+}
+
+static inline int md_link(struct obd_export *exp, struct md_op_data *op_data,
+			  struct ptlrpc_request **request)
+{
+	int rc;
+	ENTRY;
+	EXP_CHECK_MD_OP(exp, link);
+	EXP_MD_COUNTER_INCREMENT(exp, link);
+	rc = MDP(exp->exp_obd, link)(exp, op_data, request);
+	RETURN(rc);
+}
+
+static inline int md_rename(struct obd_export *exp, struct md_op_data *op_data,
+			    const char *old, int oldlen, const char *new,
+			    int newlen, struct ptlrpc_request **request)
+{
+	int rc;
+	ENTRY;
+	EXP_CHECK_MD_OP(exp, rename);
+	EXP_MD_COUNTER_INCREMENT(exp, rename);
+	rc = MDP(exp->exp_obd, rename)(exp, op_data, old, oldlen, new,
+				       newlen, request);
+	RETURN(rc);
+}
+
+static inline int md_is_subdir(struct obd_export *exp,
+			       const struct lu_fid *pfid,
+			       const struct lu_fid *cfid,
+			       struct ptlrpc_request **request)
+{
+	int rc;
+	ENTRY;
+	EXP_CHECK_MD_OP(exp, is_subdir);
+	EXP_MD_COUNTER_INCREMENT(exp, is_subdir);
+	rc = MDP(exp->exp_obd, is_subdir)(exp, pfid, cfid, request);
+	RETURN(rc);
+}
+
+static inline int md_setattr(struct obd_export *exp, struct md_op_data *op_data,
+			     void *ea, int ealen, void *ea2, int ea2len,
+			     struct ptlrpc_request **request,
+			     struct md_open_data **mod)
+{
+	int rc;
+	ENTRY;
+	EXP_CHECK_MD_OP(exp, setattr);
+	EXP_MD_COUNTER_INCREMENT(exp, setattr);
+	rc = MDP(exp->exp_obd, setattr)(exp, op_data, ea, ealen,
+					ea2, ea2len, request, mod);
+	RETURN(rc);
+}
+
+static inline int md_sync(struct obd_export *exp, const struct lu_fid *fid,
+			  struct obd_capa *oc, struct ptlrpc_request **request)
+{
+	int rc;
+	ENTRY;
+	EXP_CHECK_MD_OP(exp, sync);
+	EXP_MD_COUNTER_INCREMENT(exp, sync);
+	rc = MDP(exp->exp_obd, sync)(exp, fid, oc, request);
+	RETURN(rc);
+}
+
+static inline int md_readpage(struct obd_export *exp, struct md_op_data *opdata,
+			      struct page **pages,
+			      struct ptlrpc_request **request)
+{
+	int rc;
+	ENTRY;
+	EXP_CHECK_MD_OP(exp, readpage);
+	EXP_MD_COUNTER_INCREMENT(exp, readpage);
+	rc = MDP(exp->exp_obd, readpage)(exp, opdata, pages, request);
+	RETURN(rc);
+}
+
+static inline int md_unlink(struct obd_export *exp, struct md_op_data *op_data,
+			    struct ptlrpc_request **request)
+{
+	int rc;
+	ENTRY;
+	EXP_CHECK_MD_OP(exp, unlink);
+	EXP_MD_COUNTER_INCREMENT(exp, unlink);
+	rc = MDP(exp->exp_obd, unlink)(exp, op_data, request);
+	RETURN(rc);
+}
+
+static inline int md_get_lustre_md(struct obd_export *exp,
+				   struct ptlrpc_request *req,
+				   struct obd_export *dt_exp,
+				   struct obd_export *md_exp,
+				   struct lustre_md *md)
+{
+	ENTRY;
+	EXP_CHECK_MD_OP(exp, get_lustre_md);
+	EXP_MD_COUNTER_INCREMENT(exp, get_lustre_md);
+	RETURN(MDP(exp->exp_obd, get_lustre_md)(exp, req, dt_exp, md_exp, md));
+}
+
+static inline int md_free_lustre_md(struct obd_export *exp,
+				    struct lustre_md *md)
+{
+	ENTRY;
+	EXP_CHECK_MD_OP(exp, free_lustre_md);
+	EXP_MD_COUNTER_INCREMENT(exp, free_lustre_md);
+	RETURN(MDP(exp->exp_obd, free_lustre_md)(exp, md));
+}
+
+static inline int md_setxattr(struct obd_export *exp,
+			      const struct lu_fid *fid, struct obd_capa *oc,
+			      obd_valid valid, const char *name,
+			      const char *input, int input_size,
+			      int output_size, int flags, __u32 suppgid,
+			      struct ptlrpc_request **request)
+{
+	ENTRY;
+	EXP_CHECK_MD_OP(exp, setxattr);
+	EXP_MD_COUNTER_INCREMENT(exp, setxattr);
+	RETURN(MDP(exp->exp_obd, setxattr)(exp, fid, oc, valid, name, input,
+					   input_size, output_size, flags,
+					   suppgid, request));
+}
+
+static inline int md_getxattr(struct obd_export *exp,
+			      const struct lu_fid *fid, struct obd_capa *oc,
+			      obd_valid valid, const char *name,
+			      const char *input, int input_size,
+			      int output_size, int flags,
+			      struct ptlrpc_request **request)
+{
+	ENTRY;
+	EXP_CHECK_MD_OP(exp, getxattr);
+	EXP_MD_COUNTER_INCREMENT(exp, getxattr);
+	RETURN(MDP(exp->exp_obd, getxattr)(exp, fid, oc, valid, name, input,
+					   input_size, output_size, flags,
+					   request));
+}
+
+static inline int md_set_open_replay_data(struct obd_export *exp,
+					  struct obd_client_handle *och,
+					  struct ptlrpc_request *open_req)
+{
+	ENTRY;
+	EXP_CHECK_MD_OP(exp, set_open_replay_data);
+	EXP_MD_COUNTER_INCREMENT(exp, set_open_replay_data);
+	RETURN(MDP(exp->exp_obd, set_open_replay_data)(exp, och, open_req));
+}
+
+static inline int md_clear_open_replay_data(struct obd_export *exp,
+					    struct obd_client_handle *och)
+{
+	ENTRY;
+	EXP_CHECK_MD_OP(exp, clear_open_replay_data);
+	EXP_MD_COUNTER_INCREMENT(exp, clear_open_replay_data);
+	RETURN(MDP(exp->exp_obd, clear_open_replay_data)(exp, och));
+}
+
+static inline int md_set_lock_data(struct obd_export *exp,
+				   __u64 *lockh, void *data, __u64 *bits)
+{
+	ENTRY;
+	EXP_CHECK_MD_OP(exp, set_lock_data);
+	EXP_MD_COUNTER_INCREMENT(exp, set_lock_data);
+	RETURN(MDP(exp->exp_obd, set_lock_data)(exp, lockh, data, bits));
+}
+
+static inline int md_cancel_unused(struct obd_export *exp,
+				   const struct lu_fid *fid,
+				   ldlm_policy_data_t *policy,
+				   ldlm_mode_t mode,
+				   ldlm_cancel_flags_t flags,
+				   void *opaque)
+{
+	int rc;
+	ENTRY;
+
+	EXP_CHECK_MD_OP(exp, cancel_unused);
+	EXP_MD_COUNTER_INCREMENT(exp, cancel_unused);
+
+	rc = MDP(exp->exp_obd, cancel_unused)(exp, fid, policy, mode,
+					      flags, opaque);
+	RETURN(rc);
+}
+
+static inline ldlm_mode_t md_lock_match(struct obd_export *exp, __u64 flags,
+					const struct lu_fid *fid,
+					ldlm_type_t type,
+					ldlm_policy_data_t *policy,
+					ldlm_mode_t mode,
+					struct lustre_handle *lockh)
+{
+	ENTRY;
+	EXP_CHECK_MD_OP(exp, lock_match);
+	EXP_MD_COUNTER_INCREMENT(exp, lock_match);
+	RETURN(MDP(exp->exp_obd, lock_match)(exp, flags, fid, type,
+					     policy, mode, lockh));
+}
+
+static inline int md_init_ea_size(struct obd_export *exp, int easize,
+				  int def_asize, int cookiesize)
+{
+	ENTRY;
+	EXP_CHECK_MD_OP(exp, init_ea_size);
+	EXP_MD_COUNTER_INCREMENT(exp, init_ea_size);
+	RETURN(MDP(exp->exp_obd, init_ea_size)(exp, easize, def_asize,
+					       cookiesize));
+}
+
+static inline int md_get_remote_perm(struct obd_export *exp,
+				     const struct lu_fid *fid,
+				     struct obd_capa *oc, __u32 suppgid,
+				     struct ptlrpc_request **request)
+{
+	ENTRY;
+	EXP_CHECK_MD_OP(exp, get_remote_perm);
+	EXP_MD_COUNTER_INCREMENT(exp, get_remote_perm);
+	RETURN(MDP(exp->exp_obd, get_remote_perm)(exp, fid, oc, suppgid,
+						  request));
+}
+
+static inline int md_renew_capa(struct obd_export *exp, struct obd_capa *ocapa,
+				renew_capa_cb_t cb)
+{
+	int rc;
+	ENTRY;
+	EXP_CHECK_MD_OP(exp, renew_capa);
+	EXP_MD_COUNTER_INCREMENT(exp, renew_capa);
+	rc = MDP(exp->exp_obd, renew_capa)(exp, ocapa, cb);
+	RETURN(rc);
+}
+
+static inline int md_unpack_capa(struct obd_export *exp,
+				 struct ptlrpc_request *req,
+				 const struct req_msg_field *field,
+				 struct obd_capa **oc)
+{
+	int rc;
+	ENTRY;
+	EXP_CHECK_MD_OP(exp, unpack_capa);
+	EXP_MD_COUNTER_INCREMENT(exp, unpack_capa);
+	rc = MDP(exp->exp_obd, unpack_capa)(exp, req, field, oc);
+	RETURN(rc);
+}
+
+static inline int md_intent_getattr_async(struct obd_export *exp,
+					  struct md_enqueue_info *minfo,
+					  struct ldlm_enqueue_info *einfo)
+{
+	int rc;
+	ENTRY;
+	EXP_CHECK_MD_OP(exp, intent_getattr_async);
+	EXP_MD_COUNTER_INCREMENT(exp, intent_getattr_async);
+	rc = MDP(exp->exp_obd, intent_getattr_async)(exp, minfo, einfo);
+	RETURN(rc);
+}
+
+static inline int md_revalidate_lock(struct obd_export *exp,
+				     struct lookup_intent *it,
+				     struct lu_fid *fid, __u64 *bits)
+{
+	int rc;
+	ENTRY;
+	EXP_CHECK_MD_OP(exp, revalidate_lock);
+	EXP_MD_COUNTER_INCREMENT(exp, revalidate_lock);
+	rc = MDP(exp->exp_obd, revalidate_lock)(exp, it, fid, bits);
+	RETURN(rc);
+}
+
+
+/* OBD Metadata Support */
+
+extern int obd_init_caches(void);
+extern void obd_cleanup_caches(void);
+
+/* support routines */
+extern struct kmem_cache *obdo_cachep;
+
+#define OBDO_ALLOC(ptr)						       \
+do {									  \
+	OBD_SLAB_ALLOC_PTR_GFP((ptr), obdo_cachep, __GFP_IO);	     \
+} while(0)
+
+#define OBDO_FREE(ptr)							\
+do {									  \
+	OBD_SLAB_FREE_PTR((ptr), obdo_cachep);				\
+} while(0)
+
+
+static inline void obdo2fid(struct obdo *oa, struct lu_fid *fid)
+{
+	/* something here */
+}
+
+static inline void fid2obdo(struct lu_fid *fid, struct obdo *oa)
+{
+	/* something here */
+}
+
+typedef int (*register_lwp_cb)(void *data);
+
+struct lwp_register_item {
+	struct obd_export **lri_exp;
+	register_lwp_cb	    lri_cb_func;
+	void		   *lri_cb_data;
+	struct list_head	    lri_list;
+	char		    lri_name[MTI_NAME_MAXLEN];
+};
+
+/* I'm as embarrassed about this as you are.
+ *
+ * <shaver> // XXX do not look into _superhack with remaining eye
+ * <shaver> // XXX if this were any uglier, I'd get my own show on MTV */
+extern int (*ptlrpc_put_connection_superhack)(struct ptlrpc_connection *c);
+
+/* obd_mount.c */
+
+/* sysctl.c */
+extern void obd_sysctl_init (void);
+extern void obd_sysctl_clean (void);
+
+/* uuid.c  */
+typedef __u8 class_uuid_t[16];
+void class_uuid_unparse(class_uuid_t in, struct obd_uuid *out);
+
+/* lustre_peer.c    */
+int lustre_uuid_to_peer(const char *uuid, lnet_nid_t *peer_nid, int index);
+int class_add_uuid(const char *uuid, __u64 nid);
+int class_del_uuid (const char *uuid);
+int class_check_uuid(struct obd_uuid *uuid, __u64 nid);
+void class_init_uuidlist(void);
+void class_exit_uuidlist(void);
+
+/* mea.c */
+int mea_name2idx(struct lmv_stripe_md *mea, const char *name, int namelen);
+int raw_name2idx(int hashtype, int count, const char *name, int namelen);
+
+/* prng.c */
+#define ll_generate_random_uuid(uuid_out) cfs_get_random_bytes(uuid_out, sizeof(class_uuid_t))
+
+#endif /* __LINUX_OBD_CLASS_H */
diff --git a/drivers/staging/lustre/lustre/include/obd_lov.h b/drivers/staging/lustre/lustre/include/obd_lov.h
new file mode 100644
index 000000000000..d82f3341d0a8
--- /dev/null
+++ b/drivers/staging/lustre/lustre/include/obd_lov.h
@@ -0,0 +1,126 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ */
+
+#ifndef _OBD_LOV_H__
+#define _OBD_LOV_H__
+
+#define LOV_DEFAULT_STRIPE_SIZE (1 << LNET_MTU_BITS)
+
+static inline int lov_stripe_md_size(__u16 stripes)
+{
+	return sizeof(struct lov_stripe_md) + stripes*sizeof(struct lov_oinfo*);
+}
+
+static inline __u32 lov_mds_md_size(__u16 stripes, __u32 lmm_magic)
+{
+	if (lmm_magic == LOV_MAGIC_V3)
+		return sizeof(struct lov_mds_md_v3) +
+			stripes * sizeof(struct lov_ost_data_v1);
+	else
+		return sizeof(struct lov_mds_md_v1) +
+			stripes * sizeof(struct lov_ost_data_v1);
+}
+
+struct lov_version_size {
+	__u32   lvs_magic;
+	size_t  lvs_lmm_size;
+	size_t  lvs_lod_size;
+};
+
+static inline __u32 lov_mds_md_stripecnt(int ea_size, __u32 lmm_magic)
+{
+	static const struct lov_version_size lmm_ver_size[] = {
+			{ .lvs_magic = LOV_MAGIC_V3,
+			  .lvs_lmm_size = sizeof(struct lov_mds_md_v3),
+			  .lvs_lod_size = sizeof(struct lov_ost_data_v1) },
+			{ .lvs_magic = LOV_MAGIC_V1,
+			  .lvs_lmm_size = sizeof(struct lov_mds_md_v1),
+			  .lvs_lod_size = sizeof(struct lov_ost_data_v1)} };
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(lmm_ver_size); i++) {
+		if (lmm_magic == lmm_ver_size[i].lvs_magic) {
+			if (ea_size <= lmm_ver_size[i].lvs_lmm_size)
+				return 0;
+			return (ea_size - lmm_ver_size[i].lvs_lmm_size) /
+				lmm_ver_size[i].lvs_lod_size;
+		}
+	}
+
+	/* Invalid LOV magic, so no stripes could fit */
+	return 0;
+}
+
+/* lov_do_div64(a, b) returns a % b, and a = a / b.
+ * The 32-bit code is LOV-specific due to knowing about stripe limits in
+ * order to reduce the divisor to a 32-bit number.  If the divisor is
+ * already a 32-bit value the compiler handles this directly. */
+#if BITS_PER_LONG > 32
+# define lov_do_div64(n,base) ({					\
+	uint64_t __base = (base);					\
+	uint64_t __rem;							\
+	__rem = ((uint64_t)(n)) % __base;				\
+	(n) = ((uint64_t)(n)) / __base;					\
+	__rem;								\
+  })
+#else
+# define lov_do_div64(n,base) ({					\
+	uint64_t __rem;							\
+	if ((sizeof(base) > 4) && (((base) & 0xffffffff00000000ULL) != 0)) {  \
+		int __remainder;					      \
+		LASSERTF(!((base) & (LOV_MIN_STRIPE_SIZE - 1)), "64 bit lov " \
+			 "division %llu / %llu\n", (n), (uint64_t)(base));    \
+		__remainder = (n) & (LOV_MIN_STRIPE_SIZE - 1);		\
+		(n) >>= LOV_MIN_STRIPE_BITS;				\
+		__rem = do_div(n, (base) >> LOV_MIN_STRIPE_BITS);	\
+		__rem <<= LOV_MIN_STRIPE_BITS;				\
+		__rem += __remainder;					\
+	} else {							\
+		__rem = do_div(n, base);				\
+	}								\
+	__rem;								\
+  })
+#endif
+
+#define IOC_LOV_TYPE		   'g'
+#define IOC_LOV_MIN_NR		 50
+#define IOC_LOV_SET_OSC_ACTIVE	 _IOWR('g', 50, long)
+#define IOC_LOV_MAX_NR		 50
+
+#define QOS_DEFAULT_THRESHOLD	   10 /* MB */
+#define QOS_DEFAULT_MAXAGE	      5  /* Seconds */
+
+#endif
diff --git a/drivers/staging/lustre/lustre/include/obd_ost.h b/drivers/staging/lustre/lustre/include/obd_ost.h
new file mode 100644
index 000000000000..af89843c312b
--- /dev/null
+++ b/drivers/staging/lustre/lustre/include/obd_ost.h
@@ -0,0 +1,96 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lustre/include/obd_ost.h
+ *
+ * Data structures for object storage targets and client: OST & OSC's
+ *
+ * See also lustre_idl.h for wire formats of requests.
+ */
+
+#ifndef _LUSTRE_OST_H
+#define _LUSTRE_OST_H
+
+#include <obd_class.h>
+
+struct osc_brw_async_args {
+	struct obdo       *aa_oa;
+	int		aa_requested_nob;
+	int		aa_nio_count;
+	obd_count	  aa_page_count;
+	int		aa_resends;
+	struct brw_page  **aa_ppga;
+	struct client_obd *aa_cli;
+	struct list_head	 aa_oaps;
+	struct list_head	 aa_exts;
+	struct obd_capa   *aa_ocapa;
+	struct cl_req     *aa_clerq;
+};
+
+#define osc_grant_args osc_brw_async_args
+struct osc_async_args {
+	struct obd_info   *aa_oi;
+};
+
+struct osc_setattr_args {
+	struct obdo	 *sa_oa;
+	obd_enqueue_update_f sa_upcall;
+	void		*sa_cookie;
+};
+
+struct osc_fsync_args {
+	struct obd_info     *fa_oi;
+	obd_enqueue_update_f fa_upcall;
+	void		*fa_cookie;
+};
+
+struct osc_enqueue_args {
+	struct obd_export	*oa_exp;
+	__u64		    *oa_flags;
+	obd_enqueue_update_f      oa_upcall;
+	void		     *oa_cookie;
+	struct ost_lvb	   *oa_lvb;
+	struct lustre_handle     *oa_lockh;
+	struct ldlm_enqueue_info *oa_ei;
+	unsigned int	      oa_agl:1;
+};
+
+#if 0
+int osc_extent_blocking_cb(struct ldlm_lock *lock,
+			   struct ldlm_lock_desc *new, void *data,
+			   int flag);
+#endif
+
+#endif
diff --git a/drivers/staging/lustre/lustre/include/obd_support.h b/drivers/staging/lustre/lustre/include/obd_support.h
new file mode 100644
index 000000000000..b5d40afc3599
--- /dev/null
+++ b/drivers/staging/lustre/lustre/include/obd_support.h
@@ -0,0 +1,851 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ */
+
+#ifndef _OBD_SUPPORT
+#define _OBD_SUPPORT
+
+#include <linux/libcfs/libcfs.h>
+#include <lvfs.h>
+#include <lprocfs_status.h>
+
+#include <linux/obd_support.h>
+
+/* global variables */
+extern struct lprocfs_stats *obd_memory;
+enum {
+	OBD_MEMORY_STAT = 0,
+	OBD_MEMORY_PAGES_STAT = 1,
+	OBD_STATS_NUM,
+};
+
+extern unsigned int obd_debug_peer_on_timeout;
+extern unsigned int obd_dump_on_timeout;
+extern unsigned int obd_dump_on_eviction;
+/* obd_timeout should only be used for recovery, not for
+   networking / disk / timings affected by load (use Adaptive Timeouts) */
+extern unsigned int obd_timeout;	  /* seconds */
+extern unsigned int ldlm_timeout;	 /* seconds */
+extern unsigned int obd_timeout_set;
+extern unsigned int ldlm_timeout_set;
+extern unsigned int at_min;
+extern unsigned int at_max;
+extern unsigned int at_history;
+extern int at_early_margin;
+extern int at_extra;
+extern unsigned int obd_sync_filter;
+extern unsigned int obd_max_dirty_pages;
+extern atomic_t obd_dirty_pages;
+extern atomic_t obd_dirty_transit_pages;
+extern unsigned int obd_alloc_fail_rate;
+extern char obd_jobid_var[];
+
+/* lvfs.c */
+int obd_alloc_fail(const void *ptr, const char *name, const char *type,
+		   size_t size, const char *file, int line);
+
+/* Some hash init argument constants */
+#define HASH_POOLS_BKT_BITS 3
+#define HASH_POOLS_CUR_BITS 3
+#define HASH_POOLS_MAX_BITS 7
+#define HASH_UUID_BKT_BITS 5
+#define HASH_UUID_CUR_BITS 7
+#define HASH_UUID_MAX_BITS 12
+#define HASH_NID_BKT_BITS 5
+#define HASH_NID_CUR_BITS 7
+#define HASH_NID_MAX_BITS 12
+#define HASH_NID_STATS_BKT_BITS 5
+#define HASH_NID_STATS_CUR_BITS 7
+#define HASH_NID_STATS_MAX_BITS 12
+#define HASH_LQE_BKT_BITS 5
+#define HASH_LQE_CUR_BITS 7
+#define HASH_LQE_MAX_BITS 12
+#define HASH_CONN_BKT_BITS 5
+#define HASH_CONN_CUR_BITS 5
+#define HASH_CONN_MAX_BITS 15
+#define HASH_EXP_LOCK_BKT_BITS  5
+#define HASH_EXP_LOCK_CUR_BITS  7
+#define HASH_EXP_LOCK_MAX_BITS  16
+#define HASH_CL_ENV_BKT_BITS    5
+#define HASH_CL_ENV_BITS	10
+#define HASH_JOB_STATS_BKT_BITS 5
+#define HASH_JOB_STATS_CUR_BITS 7
+#define HASH_JOB_STATS_MAX_BITS 12
+
+/* Timeout definitions */
+#define OBD_TIMEOUT_DEFAULT	     100
+#define LDLM_TIMEOUT_DEFAULT	    20
+#define MDS_LDLM_TIMEOUT_DEFAULT	6
+/* Time to wait for all clients to reconnect during recovery (hard limit) */
+#define OBD_RECOVERY_TIME_HARD	  (obd_timeout * 9)
+/* Time to wait for all clients to reconnect during recovery (soft limit) */
+/* Should be very conservative; must catch the first reconnect after reboot */
+#define OBD_RECOVERY_TIME_SOFT	  (obd_timeout * 3)
+/* Change recovery-small 26b time if you change this */
+#define PING_INTERVAL max(obd_timeout / 4, 1U)
+/* a bit more than maximal journal commit time in seconds */
+#define PING_INTERVAL_SHORT min(PING_INTERVAL, 7U)
+/* Client may skip 1 ping; we must wait at least 2.5. But for multiple
+ * failover targets the client only pings one server at a time, and pings
+ * can be lost on a loaded network. Since eviction has serious consequences,
+ * and there's no urgent need to evict a client just because it's idle, we
+ * should be very conservative here. */
+#define PING_EVICT_TIMEOUT (PING_INTERVAL * 6)
+#define DISK_TIMEOUT 50	  /* Beyond this we warn about disk speed */
+#define CONNECTION_SWITCH_MIN 5U /* Connection switching rate limiter */
+ /* Max connect interval for nonresponsive servers; ~50s to avoid building up
+    connect requests in the LND queues, but within obd_timeout so we don't
+    miss the recovery window */
+#define CONNECTION_SWITCH_MAX min(50U, max(CONNECTION_SWITCH_MIN,obd_timeout))
+#define CONNECTION_SWITCH_INC 5  /* Connection timeout backoff */
+/* In general this should be low to have quick detection of a system
+   running on a backup server. (If it's too low, import_select_connection
+   will increase the timeout anyhow.)  */
+#define INITIAL_CONNECT_TIMEOUT max(CONNECTION_SWITCH_MIN,obd_timeout/20)
+/* The max delay between connects is SWITCH_MAX + SWITCH_INC + INITIAL */
+#define RECONNECT_DELAY_MAX (CONNECTION_SWITCH_MAX + CONNECTION_SWITCH_INC + \
+			     INITIAL_CONNECT_TIMEOUT)
+/* The min time a target should wait for clients to reconnect in recovery */
+#define OBD_RECOVERY_TIME_MIN    (2*RECONNECT_DELAY_MAX)
+#define OBD_IR_FACTOR_MIN	 1
+#define OBD_IR_FACTOR_MAX	 10
+#define OBD_IR_FACTOR_DEFAULT    (OBD_IR_FACTOR_MAX/2)
+/* default timeout for the MGS to become IR_FULL */
+#define OBD_IR_MGS_TIMEOUT       (4*obd_timeout)
+#define LONG_UNLINK 300	  /* Unlink should happen before now */
+
+/**
+ * Time interval of shrink, if the client is "idle" more than this interval,
+ * then the ll_grant thread will return the requested grant space to filter
+ */
+#define GRANT_SHRINK_INTERVAL	    1200/*20 minutes*/
+
+#define OBD_FAIL_MDS		     0x100
+#define OBD_FAIL_MDS_HANDLE_UNPACK       0x101
+#define OBD_FAIL_MDS_GETATTR_NET	 0x102
+#define OBD_FAIL_MDS_GETATTR_PACK	0x103
+#define OBD_FAIL_MDS_READPAGE_NET	0x104
+#define OBD_FAIL_MDS_READPAGE_PACK       0x105
+#define OBD_FAIL_MDS_SENDPAGE	    0x106
+#define OBD_FAIL_MDS_REINT_NET	   0x107
+#define OBD_FAIL_MDS_REINT_UNPACK	0x108
+#define OBD_FAIL_MDS_REINT_SETATTR       0x109
+#define OBD_FAIL_MDS_REINT_SETATTR_WRITE 0x10a
+#define OBD_FAIL_MDS_REINT_CREATE	0x10b
+#define OBD_FAIL_MDS_REINT_CREATE_WRITE  0x10c
+#define OBD_FAIL_MDS_REINT_UNLINK	0x10d
+#define OBD_FAIL_MDS_REINT_UNLINK_WRITE  0x10e
+#define OBD_FAIL_MDS_REINT_LINK	  0x10f
+#define OBD_FAIL_MDS_REINT_LINK_WRITE    0x110
+#define OBD_FAIL_MDS_REINT_RENAME	0x111
+#define OBD_FAIL_MDS_REINT_RENAME_WRITE  0x112
+#define OBD_FAIL_MDS_OPEN_NET	    0x113
+#define OBD_FAIL_MDS_OPEN_PACK	   0x114
+#define OBD_FAIL_MDS_CLOSE_NET	   0x115
+#define OBD_FAIL_MDS_CLOSE_PACK	  0x116
+#define OBD_FAIL_MDS_CONNECT_NET	 0x117
+#define OBD_FAIL_MDS_CONNECT_PACK	0x118
+#define OBD_FAIL_MDS_REINT_NET_REP       0x119
+#define OBD_FAIL_MDS_DISCONNECT_NET      0x11a
+#define OBD_FAIL_MDS_GETSTATUS_NET       0x11b
+#define OBD_FAIL_MDS_GETSTATUS_PACK      0x11c
+#define OBD_FAIL_MDS_STATFS_PACK	 0x11d
+#define OBD_FAIL_MDS_STATFS_NET	  0x11e
+#define OBD_FAIL_MDS_GETATTR_NAME_NET    0x11f
+#define OBD_FAIL_MDS_PIN_NET	     0x120
+#define OBD_FAIL_MDS_UNPIN_NET	   0x121
+#define OBD_FAIL_MDS_ALL_REPLY_NET       0x122
+#define OBD_FAIL_MDS_ALL_REQUEST_NET     0x123
+#define OBD_FAIL_MDS_SYNC_NET	    0x124
+#define OBD_FAIL_MDS_SYNC_PACK	   0x125
+#define OBD_FAIL_MDS_DONE_WRITING_NET    0x126
+#define OBD_FAIL_MDS_DONE_WRITING_PACK   0x127
+#define OBD_FAIL_MDS_ALLOC_OBDO	  0x128
+#define OBD_FAIL_MDS_PAUSE_OPEN	  0x129
+#define OBD_FAIL_MDS_STATFS_LCW_SLEEP    0x12a
+#define OBD_FAIL_MDS_OPEN_CREATE	 0x12b
+#define OBD_FAIL_MDS_OST_SETATTR	 0x12c
+#define OBD_FAIL_MDS_QUOTACHECK_NET      0x12d
+#define OBD_FAIL_MDS_QUOTACTL_NET	0x12e
+#define OBD_FAIL_MDS_CLIENT_ADD	  0x12f
+#define OBD_FAIL_MDS_GETXATTR_NET	0x130
+#define OBD_FAIL_MDS_GETXATTR_PACK       0x131
+#define OBD_FAIL_MDS_SETXATTR_NET	0x132
+#define OBD_FAIL_MDS_SETXATTR	    0x133
+#define OBD_FAIL_MDS_SETXATTR_WRITE      0x134
+#define OBD_FAIL_MDS_FS_SETUP	    0x135
+#define OBD_FAIL_MDS_RESEND	      0x136
+#define OBD_FAIL_MDS_LLOG_CREATE_FAILED  0x137
+#define OBD_FAIL_MDS_LOV_SYNC_RACE       0x138
+#define OBD_FAIL_MDS_OSC_PRECREATE       0x139
+#define OBD_FAIL_MDS_LLOG_SYNC_TIMEOUT   0x13a
+#define OBD_FAIL_MDS_CLOSE_NET_REP       0x13b
+#define OBD_FAIL_MDS_BLOCK_QUOTA_REQ     0x13c
+#define OBD_FAIL_MDS_DROP_QUOTA_REQ      0x13d
+#define OBD_FAIL_MDS_REMOVE_COMMON_EA    0x13e
+#define OBD_FAIL_MDS_ALLOW_COMMON_EA_SETTING   0x13f
+#define OBD_FAIL_MDS_FAIL_LOV_LOG_ADD    0x140
+#define OBD_FAIL_MDS_LOV_PREP_CREATE     0x141
+#define OBD_FAIL_MDS_REINT_DELAY	 0x142
+#define OBD_FAIL_MDS_READLINK_EPROTO     0x143
+#define OBD_FAIL_MDS_OPEN_WAIT_CREATE    0x144
+#define OBD_FAIL_MDS_PDO_LOCK	    0x145
+#define OBD_FAIL_MDS_PDO_LOCK2	   0x146
+#define OBD_FAIL_MDS_OSC_CREATE_FAIL     0x147
+#define OBD_FAIL_MDS_NEGATIVE_POSITIVE	 0x148
+#define OBD_FAIL_MDS_HSM_STATE_GET_NET		0x149
+#define OBD_FAIL_MDS_HSM_STATE_SET_NET		0x14a
+#define OBD_FAIL_MDS_HSM_PROGRESS_NET		0x14b
+#define OBD_FAIL_MDS_HSM_REQUEST_NET		0x14c
+#define OBD_FAIL_MDS_HSM_CT_REGISTER_NET	0x14d
+#define OBD_FAIL_MDS_HSM_CT_UNREGISTER_NET	0x14e
+#define OBD_FAIL_MDS_SWAP_LAYOUTS_NET		0x14f
+#define OBD_FAIL_MDS_HSM_ACTION_NET		0x150
+#define OBD_FAIL_MDS_CHANGELOG_INIT		0x151
+
+/* layout lock */
+#define OBD_FAIL_MDS_NO_LL_GETATTR	 0x170
+#define OBD_FAIL_MDS_NO_LL_OPEN		 0x171
+#define OBD_FAIL_MDS_LL_BLOCK		 0x172
+
+/* CMD */
+#define OBD_FAIL_MDS_IS_SUBDIR_NET       0x180
+#define OBD_FAIL_MDS_IS_SUBDIR_PACK      0x181
+#define OBD_FAIL_MDS_SET_INFO_NET	0x182
+#define OBD_FAIL_MDS_WRITEPAGE_NET       0x183
+#define OBD_FAIL_MDS_WRITEPAGE_PACK      0x184
+#define OBD_FAIL_MDS_RECOVERY_ACCEPTS_GAPS 0x185
+#define OBD_FAIL_MDS_GET_INFO_NET	0x186
+#define OBD_FAIL_MDS_DQACQ_NET	   0x187
+
+/* OI scrub */
+#define OBD_FAIL_OSD_SCRUB_DELAY			0x190
+#define OBD_FAIL_OSD_SCRUB_CRASH			0x191
+#define OBD_FAIL_OSD_SCRUB_FATAL			0x192
+#define OBD_FAIL_OSD_FID_MAPPING			0x193
+#define OBD_FAIL_OSD_LMA_INCOMPAT			0x194
+
+#define OBD_FAIL_OST		     0x200
+#define OBD_FAIL_OST_CONNECT_NET	 0x201
+#define OBD_FAIL_OST_DISCONNECT_NET      0x202
+#define OBD_FAIL_OST_GET_INFO_NET	0x203
+#define OBD_FAIL_OST_CREATE_NET	  0x204
+#define OBD_FAIL_OST_DESTROY_NET	 0x205
+#define OBD_FAIL_OST_GETATTR_NET	 0x206
+#define OBD_FAIL_OST_SETATTR_NET	 0x207
+#define OBD_FAIL_OST_OPEN_NET	    0x208
+#define OBD_FAIL_OST_CLOSE_NET	   0x209
+#define OBD_FAIL_OST_BRW_NET	     0x20a
+#define OBD_FAIL_OST_PUNCH_NET	   0x20b
+#define OBD_FAIL_OST_STATFS_NET	  0x20c
+#define OBD_FAIL_OST_HANDLE_UNPACK       0x20d
+#define OBD_FAIL_OST_BRW_WRITE_BULK      0x20e
+#define OBD_FAIL_OST_BRW_READ_BULK       0x20f
+#define OBD_FAIL_OST_SYNC_NET	    0x210
+#define OBD_FAIL_OST_ALL_REPLY_NET       0x211
+#define OBD_FAIL_OST_ALL_REQUEST_NET     0x212
+#define OBD_FAIL_OST_LDLM_REPLY_NET      0x213
+#define OBD_FAIL_OST_BRW_PAUSE_BULK      0x214
+#define OBD_FAIL_OST_ENOSPC	      0x215
+#define OBD_FAIL_OST_EROFS	       0x216
+#define OBD_FAIL_OST_ENOENT	      0x217
+#define OBD_FAIL_OST_QUOTACHECK_NET      0x218
+#define OBD_FAIL_OST_QUOTACTL_NET	0x219
+#define OBD_FAIL_OST_CHECKSUM_RECEIVE    0x21a
+#define OBD_FAIL_OST_CHECKSUM_SEND       0x21b
+#define OBD_FAIL_OST_BRW_SIZE	    0x21c
+#define OBD_FAIL_OST_DROP_REQ	    0x21d
+#define OBD_FAIL_OST_SETATTR_CREDITS     0x21e
+#define OBD_FAIL_OST_HOLD_WRITE_RPC      0x21f
+#define OBD_FAIL_OST_BRW_WRITE_BULK2     0x220
+#define OBD_FAIL_OST_LLOG_RECOVERY_TIMEOUT 0x221
+#define OBD_FAIL_OST_CANCEL_COOKIE_TIMEOUT 0x222
+#define OBD_FAIL_OST_PAUSE_CREATE	0x223
+#define OBD_FAIL_OST_BRW_PAUSE_PACK      0x224
+#define OBD_FAIL_OST_CONNECT_NET2	0x225
+#define OBD_FAIL_OST_NOMEM	       0x226
+#define OBD_FAIL_OST_BRW_PAUSE_BULK2     0x227
+#define OBD_FAIL_OST_MAPBLK_ENOSPC       0x228
+#define OBD_FAIL_OST_ENOINO	      0x229
+#define OBD_FAIL_OST_DQACQ_NET	   0x230
+#define OBD_FAIL_OST_STATFS_EINPROGRESS  0x231
+
+#define OBD_FAIL_LDLM		    0x300
+#define OBD_FAIL_LDLM_NAMESPACE_NEW      0x301
+#define OBD_FAIL_LDLM_ENQUEUE_NET			0x302
+#define OBD_FAIL_LDLM_CONVERT_NET			0x303
+#define OBD_FAIL_LDLM_CANCEL_NET			0x304
+#define OBD_FAIL_LDLM_BL_CALLBACK_NET			0x305
+#define OBD_FAIL_LDLM_CP_CALLBACK_NET			0x306
+#define OBD_FAIL_LDLM_GL_CALLBACK_NET			0x307
+#define OBD_FAIL_LDLM_ENQUEUE_EXTENT_ERR 0x308
+#define OBD_FAIL_LDLM_ENQUEUE_INTENT_ERR 0x309
+#define OBD_FAIL_LDLM_CREATE_RESOURCE    0x30a
+#define OBD_FAIL_LDLM_ENQUEUE_BLOCKED    0x30b
+#define OBD_FAIL_LDLM_REPLY	      0x30c
+#define OBD_FAIL_LDLM_RECOV_CLIENTS      0x30d
+#define OBD_FAIL_LDLM_ENQUEUE_OLD_EXPORT 0x30e
+#define OBD_FAIL_LDLM_GLIMPSE	    0x30f
+#define OBD_FAIL_LDLM_CANCEL_RACE	0x310
+#define OBD_FAIL_LDLM_CANCEL_EVICT_RACE  0x311
+#define OBD_FAIL_LDLM_PAUSE_CANCEL       0x312
+#define OBD_FAIL_LDLM_CLOSE_THREAD       0x313
+#define OBD_FAIL_LDLM_CANCEL_BL_CB_RACE  0x314
+#define OBD_FAIL_LDLM_CP_CB_WAIT	 0x315
+#define OBD_FAIL_LDLM_OST_FAIL_RACE      0x316
+#define OBD_FAIL_LDLM_INTR_CP_AST	0x317
+#define OBD_FAIL_LDLM_CP_BL_RACE	 0x318
+#define OBD_FAIL_LDLM_NEW_LOCK	   0x319
+#define OBD_FAIL_LDLM_AGL_DELAY	  0x31a
+#define OBD_FAIL_LDLM_AGL_NOLOCK	 0x31b
+#define OBD_FAIL_LDLM_OST_LVB		 0x31c
+
+/* LOCKLESS IO */
+#define OBD_FAIL_LDLM_SET_CONTENTION     0x385
+
+#define OBD_FAIL_OSC		     0x400
+#define OBD_FAIL_OSC_BRW_READ_BULK       0x401
+#define OBD_FAIL_OSC_BRW_WRITE_BULK      0x402
+#define OBD_FAIL_OSC_LOCK_BL_AST	 0x403
+#define OBD_FAIL_OSC_LOCK_CP_AST	 0x404
+#define OBD_FAIL_OSC_MATCH	       0x405
+#define OBD_FAIL_OSC_BRW_PREP_REQ	0x406
+#define OBD_FAIL_OSC_SHUTDOWN	    0x407
+#define OBD_FAIL_OSC_CHECKSUM_RECEIVE    0x408
+#define OBD_FAIL_OSC_CHECKSUM_SEND       0x409
+#define OBD_FAIL_OSC_BRW_PREP_REQ2       0x40a
+#define OBD_FAIL_OSC_CONNECT_CKSUM       0x40b
+#define OBD_FAIL_OSC_CKSUM_ADLER_ONLY    0x40c
+#define OBD_FAIL_OSC_DIO_PAUSE	   0x40d
+#define OBD_FAIL_OSC_OBJECT_CONTENTION   0x40e
+#define OBD_FAIL_OSC_CP_CANCEL_RACE      0x40f
+#define OBD_FAIL_OSC_CP_ENQ_RACE	 0x410
+#define OBD_FAIL_OSC_NO_GRANT	    0x411
+#define OBD_FAIL_OSC_DELAY_SETTIME	 0x412
+
+#define OBD_FAIL_PTLRPC		  0x500
+#define OBD_FAIL_PTLRPC_ACK	      0x501
+#define OBD_FAIL_PTLRPC_RQBD	     0x502
+#define OBD_FAIL_PTLRPC_BULK_GET_NET     0x503
+#define OBD_FAIL_PTLRPC_BULK_PUT_NET     0x504
+#define OBD_FAIL_PTLRPC_DROP_RPC	 0x505
+#define OBD_FAIL_PTLRPC_DELAY_SEND       0x506
+#define OBD_FAIL_PTLRPC_DELAY_RECOV      0x507
+#define OBD_FAIL_PTLRPC_CLIENT_BULK_CB   0x508
+#define OBD_FAIL_PTLRPC_PAUSE_REQ	0x50a
+#define OBD_FAIL_PTLRPC_PAUSE_REP	0x50c
+#define OBD_FAIL_PTLRPC_IMP_DEACTIVE     0x50d
+#define OBD_FAIL_PTLRPC_DUMP_LOG	 0x50e
+#define OBD_FAIL_PTLRPC_LONG_REPL_UNLINK 0x50f
+#define OBD_FAIL_PTLRPC_LONG_BULK_UNLINK 0x510
+#define OBD_FAIL_PTLRPC_HPREQ_TIMEOUT    0x511
+#define OBD_FAIL_PTLRPC_HPREQ_NOTIMEOUT  0x512
+#define OBD_FAIL_PTLRPC_DROP_REQ_OPC     0x513
+#define OBD_FAIL_PTLRPC_FINISH_REPLAY    0x514
+#define OBD_FAIL_PTLRPC_CLIENT_BULK_CB2  0x515
+#define OBD_FAIL_PTLRPC_DELAY_IMP_FULL   0x516
+#define OBD_FAIL_PTLRPC_CANCEL_RESEND    0x517
+
+#define OBD_FAIL_OBD_PING_NET	    0x600
+#define OBD_FAIL_OBD_LOG_CANCEL_NET      0x601
+#define OBD_FAIL_OBD_LOGD_NET	    0x602
+#define OBD_FAIL_OBD_QC_CALLBACK_NET     0x603
+#define OBD_FAIL_OBD_DQACQ	       0x604
+#define OBD_FAIL_OBD_LLOG_SETUP	  0x605
+#define OBD_FAIL_OBD_LOG_CANCEL_REP      0x606
+#define OBD_FAIL_OBD_IDX_READ_NET	0x607
+#define OBD_FAIL_OBD_IDX_READ_BREAK	 0x608
+#define OBD_FAIL_OBD_NO_LRU		 0x609
+
+#define OBD_FAIL_TGT_REPLY_NET	   0x700
+#define OBD_FAIL_TGT_CONN_RACE	   0x701
+#define OBD_FAIL_TGT_FORCE_RECONNECT     0x702
+#define OBD_FAIL_TGT_DELAY_CONNECT       0x703
+#define OBD_FAIL_TGT_DELAY_RECONNECT     0x704
+#define OBD_FAIL_TGT_DELAY_PRECREATE     0x705
+#define OBD_FAIL_TGT_TOOMANY_THREADS     0x706
+#define OBD_FAIL_TGT_REPLAY_DROP	 0x707
+#define OBD_FAIL_TGT_FAKE_EXP	    0x708
+#define OBD_FAIL_TGT_REPLAY_DELAY	0x709
+#define OBD_FAIL_TGT_LAST_REPLAY	 0x710
+#define OBD_FAIL_TGT_CLIENT_ADD	  0x711
+#define OBD_FAIL_TGT_RCVG_FLAG	   0x712
+
+#define OBD_FAIL_MDC_REVALIDATE_PAUSE    0x800
+#define OBD_FAIL_MDC_ENQUEUE_PAUSE       0x801
+#define OBD_FAIL_MDC_OLD_EXT_FLAGS       0x802
+#define OBD_FAIL_MDC_GETATTR_ENQUEUE     0x803
+#define OBD_FAIL_MDC_RPCS_SEM		 0x804
+#define OBD_FAIL_MDC_LIGHTWEIGHT	 0x805
+
+#define OBD_FAIL_MGS		     0x900
+#define OBD_FAIL_MGS_ALL_REQUEST_NET     0x901
+#define OBD_FAIL_MGS_ALL_REPLY_NET       0x902
+#define OBD_FAIL_MGC_PAUSE_PROCESS_LOG   0x903
+#define OBD_FAIL_MGS_PAUSE_REQ	   0x904
+#define OBD_FAIL_MGS_PAUSE_TARGET_REG    0x905
+
+#define OBD_FAIL_QUOTA_DQACQ_NET			0xA01
+#define OBD_FAIL_QUOTA_EDQUOT	    0xA02
+#define OBD_FAIL_QUOTA_DELAY_REINT       0xA03
+#define OBD_FAIL_QUOTA_RECOVERABLE_ERR   0xA04
+
+#define OBD_FAIL_LPROC_REMOVE	    0xB00
+
+#define OBD_FAIL_GENERAL_ALLOC	   0xC00
+
+#define OBD_FAIL_SEQ		     0x1000
+#define OBD_FAIL_SEQ_QUERY_NET	   0x1001
+#define OBD_FAIL_SEQ_EXHAUST		 0x1002
+
+#define OBD_FAIL_FLD		     0x1100
+#define OBD_FAIL_FLD_QUERY_NET	   0x1101
+
+#define OBD_FAIL_SEC_CTX		 0x1200
+#define OBD_FAIL_SEC_CTX_INIT_NET	0x1201
+#define OBD_FAIL_SEC_CTX_INIT_CONT_NET   0x1202
+#define OBD_FAIL_SEC_CTX_FINI_NET	0x1203
+#define OBD_FAIL_SEC_CTX_HDL_PAUSE       0x1204
+
+#define OBD_FAIL_LLOG			       0x1300
+#define OBD_FAIL_LLOG_ORIGIN_CONNECT_NET	    0x1301
+#define OBD_FAIL_LLOG_ORIGIN_HANDLE_CREATE_NET      0x1302
+#define OBD_FAIL_LLOG_ORIGIN_HANDLE_DESTROY_NET     0x1303
+#define OBD_FAIL_LLOG_ORIGIN_HANDLE_READ_HEADER_NET 0x1304
+#define OBD_FAIL_LLOG_ORIGIN_HANDLE_NEXT_BLOCK_NET  0x1305
+#define OBD_FAIL_LLOG_ORIGIN_HANDLE_PREV_BLOCK_NET  0x1306
+#define OBD_FAIL_LLOG_ORIGIN_HANDLE_WRITE_REC_NET   0x1307
+#define OBD_FAIL_LLOG_ORIGIN_HANDLE_CLOSE_NET       0x1308
+#define OBD_FAIL_LLOG_CATINFO_NET		   0x1309
+#define OBD_FAIL_MDS_SYNC_CAPA_SL		   0x1310
+#define OBD_FAIL_SEQ_ALLOC			  0x1311
+
+#define OBD_FAIL_LLITE			      0x1400
+#define OBD_FAIL_LLITE_FAULT_TRUNC_RACE	     0x1401
+#define OBD_FAIL_LOCK_STATE_WAIT_INTR	       0x1402
+#define OBD_FAIL_LOV_INIT			    0x1403
+#define OBD_FAIL_GLIMPSE_DELAY			    0x1404
+
+#define OBD_FAIL_FID_INDIR	0x1501
+#define OBD_FAIL_FID_INLMA	0x1502
+#define OBD_FAIL_FID_IGIF	0x1504
+#define OBD_FAIL_FID_LOOKUP	0x1505
+#define OBD_FAIL_FID_NOLMA	0x1506
+
+/* LFSCK */
+#define OBD_FAIL_LFSCK_DELAY1		0x1600
+#define OBD_FAIL_LFSCK_DELAY2		0x1601
+#define OBD_FAIL_LFSCK_DELAY3		0x1602
+#define OBD_FAIL_LFSCK_LINKEA_CRASH	0x1603
+#define OBD_FAIL_LFSCK_LINKEA_MORE	0x1604
+#define OBD_FAIL_LFSCK_FATAL1		0x1608
+#define OBD_FAIL_LFSCK_FATAL2		0x1609
+#define OBD_FAIL_LFSCK_CRASH		0x160a
+#define OBD_FAIL_LFSCK_NO_AUTO		0x160b
+#define OBD_FAIL_LFSCK_NO_DOUBLESCAN	0x160c
+
+/* UPDATE */
+#define OBD_FAIL_UPDATE_OBJ_NET			0x1700
+#define OBD_FAIL_UPDATE_OBJ_NET_REP		0x1701
+
+
+/* Assign references to moved code to reduce code changes */
+#define OBD_FAIL_PRECHECK(id)		   CFS_FAIL_PRECHECK(id)
+#define OBD_FAIL_CHECK(id)		      CFS_FAIL_CHECK(id)
+#define OBD_FAIL_CHECK_VALUE(id, value)	 CFS_FAIL_CHECK_VALUE(id, value)
+#define OBD_FAIL_CHECK_ORSET(id, value)	 CFS_FAIL_CHECK_ORSET(id, value)
+#define OBD_FAIL_CHECK_RESET(id, value)	 CFS_FAIL_CHECK_RESET(id, value)
+#define OBD_FAIL_RETURN(id, ret)		CFS_FAIL_RETURN(id, ret)
+#define OBD_FAIL_TIMEOUT(id, secs)	      CFS_FAIL_TIMEOUT(id, secs)
+#define OBD_FAIL_TIMEOUT_MS(id, ms)	     CFS_FAIL_TIMEOUT_MS(id, ms)
+#define OBD_FAIL_TIMEOUT_ORSET(id, value, secs) CFS_FAIL_TIMEOUT_ORSET(id, value, secs)
+#define OBD_RACE(id)			    CFS_RACE(id)
+#define OBD_FAIL_ONCE			   CFS_FAIL_ONCE
+#define OBD_FAILED			      CFS_FAILED
+
+extern atomic_t libcfs_kmemory;
+
+#ifdef LPROCFS
+#define obd_memory_add(size)						  \
+	lprocfs_counter_add(obd_memory, OBD_MEMORY_STAT, (long)(size))
+#define obd_memory_sub(size)						  \
+	lprocfs_counter_sub(obd_memory, OBD_MEMORY_STAT, (long)(size))
+#define obd_memory_sum()						      \
+	lprocfs_stats_collector(obd_memory, OBD_MEMORY_STAT,		  \
+				LPROCFS_FIELDS_FLAGS_SUM)
+#define obd_pages_add(order)						  \
+	lprocfs_counter_add(obd_memory, OBD_MEMORY_PAGES_STAT,		\
+			    (long)(1 << (order)))
+#define obd_pages_sub(order)						  \
+	lprocfs_counter_sub(obd_memory, OBD_MEMORY_PAGES_STAT,		\
+			    (long)(1 << (order)))
+#define obd_pages_sum()						       \
+	lprocfs_stats_collector(obd_memory, OBD_MEMORY_PAGES_STAT,	    \
+				LPROCFS_FIELDS_FLAGS_SUM)
+
+extern void obd_update_maxusage(void);
+extern __u64 obd_memory_max(void);
+extern __u64 obd_pages_max(void);
+
+#else
+
+extern __u64 obd_alloc;
+extern __u64 obd_pages;
+
+extern __u64 obd_max_alloc;
+extern __u64 obd_max_pages;
+
+static inline void obd_memory_add(long size)
+{
+	obd_alloc += size;
+	if (obd_alloc > obd_max_alloc)
+		obd_max_alloc = obd_alloc;
+}
+
+static inline void obd_memory_sub(long size)
+{
+	obd_alloc -= size;
+}
+
+static inline void obd_pages_add(int order)
+{
+	obd_pages += 1<< order;
+	if (obd_pages > obd_max_pages)
+		obd_max_pages = obd_pages;
+}
+
+static inline void obd_pages_sub(int order)
+{
+	obd_pages -= 1<< order;
+}
+
+#define obd_memory_sum() (obd_alloc)
+#define obd_pages_sum()  (obd_pages)
+
+#define obd_memory_max() (obd_max_alloc)
+#define obd_pages_max() (obd_max_pages)
+
+#endif
+
+#define OBD_DEBUG_MEMUSAGE (1)
+
+#if OBD_DEBUG_MEMUSAGE
+#define OBD_ALLOC_POST(ptr, size, name)				 \
+		obd_memory_add(size);				   \
+		CDEBUG(D_MALLOC, name " '" #ptr "': %d at %p.\n",       \
+		       (int)(size), ptr)
+
+#define OBD_FREE_PRE(ptr, size, name)				   \
+	LASSERT(ptr);						   \
+	obd_memory_sub(size);					   \
+	CDEBUG(D_MALLOC, name " '" #ptr "': %d at %p.\n",	       \
+	       (int)(size), ptr);				       \
+	POISON(ptr, 0x5a, size)
+
+#else /* !OBD_DEBUG_MEMUSAGE */
+
+#define OBD_ALLOC_POST(ptr, size, name) ((void)0)
+#define OBD_FREE_PRE(ptr, size, name)   ((void)0)
+
+#endif /* !OBD_DEBUG_MEMUSAGE */
+
+#define HAS_FAIL_ALLOC_FLAG OBD_FAIL_CHECK(OBD_FAIL_GENERAL_ALLOC)
+
+#define OBD_ALLOC_FAIL_BITS 24
+#define OBD_ALLOC_FAIL_MASK ((1 << OBD_ALLOC_FAIL_BITS) - 1)
+#define OBD_ALLOC_FAIL_MULT (OBD_ALLOC_FAIL_MASK / 100)
+
+#if defined(LUSTRE_UTILS) /* this version is for utils only */
+#define __OBD_MALLOC_VERBOSE(ptr, cptab, cpt, size, flags)		      \
+do {									      \
+	(ptr) = (cptab) == NULL ?					      \
+		kmalloc(size, flags) :				      \
+		kmalloc_node(size, flags, cfs_cpt_spread_node(cptab, cpt));   \
+	if (unlikely((ptr) == NULL)) {					\
+		CERROR("kmalloc of '" #ptr "' (%d bytes) failed at %s:%d\n",  \
+		       (int)(size), __FILE__, __LINE__);		      \
+	} else {							      \
+		memset(ptr, 0, size);					      \
+		CDEBUG(D_MALLOC, "kmalloced '" #ptr "': %d at %p\n",	      \
+		       (int)(size), ptr);				      \
+	}								      \
+} while (0)
+
+#else /* this version is for the kernel and liblustre */
+#define OBD_FREE_RTN0(ptr)						    \
+({									    \
+	kfree(ptr);							\
+	(ptr) = NULL;							 \
+	0;								    \
+})
+
+#define __OBD_MALLOC_VERBOSE(ptr, cptab, cpt, size, flags)		      \
+do {									      \
+	(ptr) = (cptab) == NULL ?					      \
+		kmalloc(size, flags | __GFP_ZERO) :			      \
+		kmalloc_node(size, flags | __GFP_ZERO,			      \
+			     cfs_cpt_spread_node(cptab, cpt));		      \
+	if (likely((ptr) != NULL &&					   \
+		   (!HAS_FAIL_ALLOC_FLAG || obd_alloc_fail_rate == 0 ||       \
+		    !obd_alloc_fail(ptr, #ptr, "km", size,		    \
+				    __FILE__, __LINE__) ||		    \
+		    OBD_FREE_RTN0(ptr)))){				    \
+		OBD_ALLOC_POST(ptr, size, "kmalloced");		       \
+	}								     \
+} while (0)
+#endif
+
+#define OBD_ALLOC_GFP(ptr, size, gfp_mask)				      \
+	__OBD_MALLOC_VERBOSE(ptr, NULL, 0, size, gfp_mask)
+
+#define OBD_ALLOC(ptr, size) OBD_ALLOC_GFP(ptr, size, __GFP_IO)
+#define OBD_ALLOC_WAIT(ptr, size) OBD_ALLOC_GFP(ptr, size, GFP_IOFS)
+#define OBD_ALLOC_PTR(ptr) OBD_ALLOC(ptr, sizeof *(ptr))
+#define OBD_ALLOC_PTR_WAIT(ptr) OBD_ALLOC_WAIT(ptr, sizeof *(ptr))
+
+#define OBD_CPT_ALLOC_GFP(ptr, cptab, cpt, size, gfp_mask)		      \
+	__OBD_MALLOC_VERBOSE(ptr, cptab, cpt, size, gfp_mask)
+
+#define OBD_CPT_ALLOC(ptr, cptab, cpt, size)				      \
+	OBD_CPT_ALLOC_GFP(ptr, cptab, cpt, size, __GFP_IO)
+
+#define OBD_CPT_ALLOC_PTR(ptr, cptab, cpt)				      \
+	OBD_CPT_ALLOC(ptr, cptab, cpt, sizeof *(ptr))
+
+# define __OBD_VMALLOC_VEROBSE(ptr, cptab, cpt, size)			      \
+do {									      \
+	(ptr) = cptab == NULL ?						      \
+		vzalloc(size) :						      \
+		vzalloc_node(size, cfs_cpt_spread_node(cptab, cpt));	      \
+	if (unlikely((ptr) == NULL)) {					\
+		CERROR("vmalloc of '" #ptr "' (%d bytes) failed\n",	   \
+		       (int)(size));					  \
+		CERROR(LPU64" total bytes allocated by Lustre, %d by LNET\n", \
+		       obd_memory_sum(), atomic_read(&libcfs_kmemory));   \
+	} else {							      \
+		OBD_ALLOC_POST(ptr, size, "vmalloced");		       \
+	}								     \
+} while(0)
+
+# define OBD_VMALLOC(ptr, size)						      \
+	 __OBD_VMALLOC_VEROBSE(ptr, NULL, 0, size)
+# define OBD_CPT_VMALLOC(ptr, cptab, cpt, size)				      \
+	 __OBD_VMALLOC_VEROBSE(ptr, cptab, cpt, size)
+
+
+/* Allocations above this size are considered too big and could not be done
+ * atomically.
+ *
+ * Be very careful when changing this value, especially when decreasing it,
+ * since vmalloc in Linux doesn't perform well on multi-cores system, calling
+ * vmalloc in critical path would hurt peformance badly. See LU-66.
+ */
+#define OBD_ALLOC_BIG (4 * PAGE_CACHE_SIZE)
+
+#define OBD_ALLOC_LARGE(ptr, size)					    \
+do {									  \
+	if (size > OBD_ALLOC_BIG)					     \
+		OBD_VMALLOC(ptr, size);				       \
+	else								  \
+		OBD_ALLOC(ptr, size);					 \
+} while (0)
+
+#define OBD_CPT_ALLOC_LARGE(ptr, cptab, cpt, size)			      \
+do {									      \
+	if (size > OBD_ALLOC_BIG)					      \
+		OBD_CPT_VMALLOC(ptr, cptab, cpt, size);			      \
+	else								      \
+		OBD_CPT_ALLOC(ptr, cptab, cpt, size);			      \
+} while (0)
+
+#define OBD_FREE_LARGE(ptr, size)					     \
+do {									  \
+	if (size > OBD_ALLOC_BIG)					     \
+		OBD_VFREE(ptr, size);					 \
+	else								  \
+		OBD_FREE(ptr, size);					  \
+} while (0)
+
+
+#ifdef CONFIG_DEBUG_SLAB
+#define POISON(ptr, c, s) do {} while (0)
+#define POISON_PTR(ptr)  ((void)0)
+#else
+#define POISON(ptr, c, s) memset(ptr, c, s)
+#define POISON_PTR(ptr)  (ptr) = (void *)0xdeadbeef
+#endif
+
+#ifdef POISON_BULK
+#define POISON_PAGE(page, val) do { memset(kmap(page), val, PAGE_CACHE_SIZE);   \
+				    kunmap(page); } while (0)
+#else
+#define POISON_PAGE(page, val) do { } while (0)
+#endif
+
+#define OBD_FREE(ptr, size)						   \
+do {									  \
+	OBD_FREE_PRE(ptr, size, "kfreed");				    \
+	kfree(ptr);							\
+	POISON_PTR(ptr);						      \
+} while(0)
+
+
+#define OBD_FREE_RCU(ptr, size, handle)					      \
+do {									      \
+	struct portals_handle *__h = (handle);				      \
+									      \
+	LASSERT(handle != NULL);					      \
+	__h->h_cookie = (unsigned long)(ptr);				      \
+	__h->h_size = (size);						      \
+	call_rcu(&__h->h_rcu, class_handle_free_cb);			      \
+	POISON_PTR(ptr);						      \
+} while(0)
+
+
+#define OBD_VFREE(ptr, size)				\
+	do {						\
+		OBD_FREE_PRE(ptr, size, "vfreed");	\
+		vfree(ptr);			\
+		POISON_PTR(ptr);			\
+	} while (0)
+
+/* we memset() the slab object to 0 when allocation succeeds, so DO NOT
+ * HAVE A CTOR THAT DOES ANYTHING.  its work will be cleared here.  we'd
+ * love to assert on that, but slab.c keeps kmem_cache_s all to itself. */
+#define OBD_SLAB_FREE_RTN0(ptr, slab)					 \
+({									    \
+	kmem_cache_free((slab), (ptr));				    \
+	(ptr) = NULL;							 \
+	0;								    \
+})
+
+#define __OBD_SLAB_ALLOC_VERBOSE(ptr, slab, cptab, cpt, size, type)	      \
+do {									      \
+	LASSERT(ergo((type) != GFP_ATOMIC, !in_interrupt()));	      \
+	(ptr) = (cptab) == NULL ?					      \
+		kmem_cache_alloc(slab, type | __GFP_ZERO) :		\
+		kmem_cache_alloc_node(slab, type | __GFP_ZERO,		\
+				      cfs_cpt_spread_node(cptab, cpt));	\
+	if (likely((ptr) != NULL &&					   \
+		   (!HAS_FAIL_ALLOC_FLAG || obd_alloc_fail_rate == 0 ||       \
+		    !obd_alloc_fail(ptr, #ptr, "slab-", size,		 \
+				    __FILE__, __LINE__) ||		    \
+		    OBD_SLAB_FREE_RTN0(ptr, slab)))) {			\
+		OBD_ALLOC_POST(ptr, size, "slab-alloced");		    \
+	}								     \
+} while(0)
+
+#define OBD_SLAB_ALLOC_GFP(ptr, slab, size, flags)			      \
+	__OBD_SLAB_ALLOC_VERBOSE(ptr, slab, NULL, 0, size, flags)
+#define OBD_SLAB_CPT_ALLOC_GFP(ptr, slab, cptab, cpt, size, flags)	      \
+	__OBD_SLAB_ALLOC_VERBOSE(ptr, slab, cptab, cpt, size, flags)
+
+#define OBD_FREE_PTR(ptr) OBD_FREE(ptr, sizeof *(ptr))
+
+#define OBD_SLAB_FREE(ptr, slab, size)					\
+do {									  \
+	OBD_FREE_PRE(ptr, size, "slab-freed");				\
+	kmem_cache_free(slab, ptr);					\
+	POISON_PTR(ptr);						      \
+} while(0)
+
+#define OBD_SLAB_ALLOC(ptr, slab, size)					      \
+	OBD_SLAB_ALLOC_GFP(ptr, slab, size, __GFP_IO)
+
+#define OBD_SLAB_CPT_ALLOC(ptr, slab, cptab, cpt, size)			      \
+	OBD_SLAB_CPT_ALLOC_GFP(ptr, slab, cptab, cpt, size, __GFP_IO)
+
+#define OBD_SLAB_ALLOC_PTR(ptr, slab)					      \
+	OBD_SLAB_ALLOC(ptr, slab, sizeof *(ptr))
+
+#define OBD_SLAB_CPT_ALLOC_PTR(ptr, slab, cptab, cpt)			      \
+	OBD_SLAB_CPT_ALLOC(ptr, slab, cptab, cpt, sizeof *(ptr))
+
+#define OBD_SLAB_ALLOC_PTR_GFP(ptr, slab, flags)			      \
+	OBD_SLAB_ALLOC_GFP(ptr, slab, sizeof *(ptr), flags)
+
+#define OBD_SLAB_CPT_ALLOC_PTR_GFP(ptr, slab, cptab, cpt, flags)		      \
+	OBD_SLAB_CPT_ALLOC_GFP(ptr, slab, cptab, cpt, sizeof *(ptr), flags)
+
+#define OBD_SLAB_FREE_PTR(ptr, slab)					      \
+	OBD_SLAB_FREE((ptr), (slab), sizeof *(ptr))
+
+#define KEY_IS(str) \
+	(keylen >= (sizeof(str)-1) && memcmp(key, str, (sizeof(str)-1)) == 0)
+
+/* Wrapper for contiguous page frame allocation */
+#define __OBD_PAGE_ALLOC_VERBOSE(ptr, cptab, cpt, gfp_mask)		      \
+do {									      \
+	(ptr) = (cptab) == NULL ?					      \
+		alloc_page(gfp_mask) :				      \
+		alloc_pages_node(cfs_cpt_spread_node(cptab, cpt), gfp_mask, 0);\
+	if (unlikely((ptr) == NULL)) {					\
+		CERROR("alloc_pages of '" #ptr "' %d page(s) / "LPU64" bytes "\
+		       "failed\n", (int)1,				    \
+		       (__u64)(1 << PAGE_CACHE_SHIFT));			 \
+		CERROR(LPU64" total bytes and "LPU64" total pages "	   \
+		       "("LPU64" bytes) allocated by Lustre, "		\
+		       "%d total bytes by LNET\n",			    \
+		       obd_memory_sum(),				      \
+		       obd_pages_sum() << PAGE_CACHE_SHIFT,		     \
+		       obd_pages_sum(),				       \
+		       atomic_read(&libcfs_kmemory));		     \
+	} else {							      \
+		obd_pages_add(0);					     \
+		CDEBUG(D_MALLOC, "alloc_pages '" #ptr "': %d page(s) / "      \
+		       LPU64" bytes at %p.\n",				\
+		       (int)1,						\
+		       (__u64)(1 << PAGE_CACHE_SHIFT), ptr);		    \
+	}								     \
+} while (0)
+
+#define OBD_PAGE_ALLOC(ptr, gfp_mask)					      \
+	__OBD_PAGE_ALLOC_VERBOSE(ptr, NULL, 0, gfp_mask)
+#define OBD_PAGE_CPT_ALLOC(ptr, cptab, cpt, gfp_mask)			      \
+	__OBD_PAGE_ALLOC_VERBOSE(ptr, cptab, cpt, gfp_mask)
+
+#define OBD_PAGE_FREE(ptr)						    \
+do {									  \
+	LASSERT(ptr);							 \
+	obd_pages_sub(0);						     \
+	CDEBUG(D_MALLOC, "free_pages '" #ptr "': %d page(s) / "LPU64" bytes " \
+	       "at %p.\n",						    \
+	       (int)1, (__u64)(1 << PAGE_CACHE_SHIFT),			  \
+	       ptr);							  \
+	__free_page(ptr);						   \
+	(ptr) = (void *)0xdeadbeef;					   \
+} while (0)
+
+#endif