aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/staging/lustre/lustre/include
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/staging/lustre/lustre/include')
-rw-r--r--drivers/staging/lustre/lustre/include/cl_object.h978
-rw-r--r--drivers/staging/lustre/lustre/include/lclient.h408
-rw-r--r--drivers/staging/lustre/lustre/include/linux/obd.h125
-rw-r--r--drivers/staging/lustre/lustre/include/lu_object.h75
-rw-r--r--drivers/staging/lustre/lustre/include/lustre/lustre_idl.h112
-rw-r--r--drivers/staging/lustre/lustre/include/lustre/lustre_user.h54
-rw-r--r--drivers/staging/lustre/lustre/include/lustre_cfg.h2
-rw-r--r--drivers/staging/lustre/lustre/include/lustre_disk.h2
-rw-r--r--drivers/staging/lustre/lustre/include/lustre_dlm.h14
-rw-r--r--drivers/staging/lustre/lustre/include/lustre_dlm_flags.h120
-rw-r--r--drivers/staging/lustre/lustre/include/lustre_fid.h22
-rw-r--r--drivers/staging/lustre/lustre/include/lustre_import.h2
-rw-r--r--drivers/staging/lustre/lustre/include/lustre_lib.h60
-rw-r--r--drivers/staging/lustre/lustre/include/lustre_mdc.h18
-rw-r--r--drivers/staging/lustre/lustre/include/lustre_net.h4
-rw-r--r--drivers/staging/lustre/lustre/include/lustre_param.h1
-rw-r--r--drivers/staging/lustre/lustre/include/lustre_req_layout.h3
-rw-r--r--drivers/staging/lustre/lustre/include/obd.h77
-rw-r--r--drivers/staging/lustre/lustre/include/obd_cksum.h1
-rw-r--r--drivers/staging/lustre/lustre/include/obd_class.h5
-rw-r--r--drivers/staging/lustre/lustre/include/obd_support.h4
21 files changed, 436 insertions, 1651 deletions
diff --git a/drivers/staging/lustre/lustre/include/cl_object.h b/drivers/staging/lustre/lustre/include/cl_object.h
index fb971ded5a1b..d4c33dd110ab 100644
--- a/drivers/staging/lustre/lustre/include/cl_object.h
+++ b/drivers/staging/lustre/lustre/include/cl_object.h
@@ -82,7 +82,6 @@
* - i_mutex
* - PG_locked
* - cl_object_header::coh_page_guard
- * - cl_object_header::coh_lock_guard
* - lu_site::ls_guard
*
* See the top comment in cl_object.c for the description of overall locking and
@@ -98,9 +97,12 @@
* super-class definitions.
*/
#include "lu_object.h"
+#include <linux/atomic.h>
#include "linux/lustre_compat25.h"
#include <linux/mutex.h>
#include <linux/radix-tree.h>
+#include <linux/spinlock.h>
+#include <linux/wait.h>
struct inode;
@@ -138,7 +140,7 @@ struct cl_device_operations {
* cl_req_slice_add().
*
* \see osc_req_init(), lov_req_init(), lovsub_req_init()
- * \see ccc_req_init()
+ * \see vvp_req_init()
*/
int (*cdo_req_init)(const struct lu_env *env, struct cl_device *dev,
struct cl_req *req);
@@ -147,7 +149,7 @@ struct cl_device_operations {
/**
* Device in the client stack.
*
- * \see ccc_device, lov_device, lovsub_device, osc_device
+ * \see vvp_device, lov_device, lovsub_device, osc_device
*/
struct cl_device {
/** Super-class. */
@@ -243,7 +245,7 @@ enum cl_attr_valid {
* be discarded from the memory, all its sub-objects are torn-down and
* destroyed too.
*
- * \see ccc_object, lov_object, lovsub_object, osc_object
+ * \see vvp_object, lov_object, lovsub_object, osc_object
*/
struct cl_object {
/** super class */
@@ -322,7 +324,7 @@ struct cl_object_operations {
* to be used instead of newly created.
*/
int (*coo_page_init)(const struct lu_env *env, struct cl_object *obj,
- struct cl_page *page, struct page *vmpage);
+ struct cl_page *page, pgoff_t index);
/**
* Initialize lock slice for this layer. Called top-to-bottom through
* every object layer when a new cl_lock is instantiated. Layer
@@ -383,11 +385,17 @@ struct cl_object_operations {
* object. Layers are supposed to fill parts of \a lvb that will be
* shipped to the glimpse originator as a glimpse result.
*
- * \see ccc_object_glimpse(), lovsub_object_glimpse(),
+ * \see vvp_object_glimpse(), lovsub_object_glimpse(),
* \see osc_object_glimpse()
*/
int (*coo_glimpse)(const struct lu_env *env,
const struct cl_object *obj, struct ost_lvb *lvb);
+ /**
+ * Object prune method. Called when the layout is going to change on
+ * this object, therefore each layer has to clean up their cache,
+ * mainly pages and locks.
+ */
+ int (*coo_prune)(const struct lu_env *env, struct cl_object *obj);
};
/**
@@ -398,22 +406,6 @@ struct cl_object_header {
* here.
*/
struct lu_object_header coh_lu;
- /** \name locks
- * \todo XXX move locks below to the separate cache-lines, they are
- * mostly useless otherwise.
- */
- /** @{ */
- /** Lock protecting page tree. */
- spinlock_t coh_page_guard;
- /** Lock protecting lock list. */
- spinlock_t coh_lock_guard;
- /** @} locks */
- /** Radix tree of cl_page's, cached for this object. */
- struct radix_tree_root coh_tree;
- /** # of pages in radix tree. */
- unsigned long coh_pages;
- /** List of cl_lock's granted for this object. */
- struct list_head coh_locks;
/**
* Parent object. It is assumed that an object has a well-defined
@@ -460,10 +452,6 @@ struct cl_object_header {
co_lu.lo_linkage)
/** @} cl_object */
-#ifndef pgoff_t
-#define pgoff_t unsigned long
-#endif
-
#define CL_PAGE_EOF ((pgoff_t)~0ull)
/** \addtogroup cl_page cl_page
@@ -727,16 +715,10 @@ struct cl_page {
atomic_t cp_ref;
/** An object this page is a part of. Immutable after creation. */
struct cl_object *cp_obj;
- /** Logical page index within the object. Immutable after creation. */
- pgoff_t cp_index;
/** List of slices. Immutable after creation. */
struct list_head cp_layers;
- /** Parent page, NULL for top-level page. Immutable after creation. */
- struct cl_page *cp_parent;
- /** Lower-layer page. NULL for bottommost page. Immutable after
- * creation.
- */
- struct cl_page *cp_child;
+ /** vmpage */
+ struct page *cp_vmpage;
/**
* Page state. This field is const to avoid accidental update, it is
* modified only internally within cl_page.c. Protected by a VM lock.
@@ -787,10 +769,11 @@ struct cl_page {
/**
* Per-layer part of cl_page.
*
- * \see ccc_page, lov_page, osc_page
+ * \see vvp_page, lov_page, osc_page
*/
struct cl_page_slice {
struct cl_page *cpl_page;
+ pgoff_t cpl_index;
/**
* Object slice corresponding to this page slice. Immutable after
* creation.
@@ -804,16 +787,9 @@ struct cl_page_slice {
/**
* Lock mode. For the client extent locks.
*
- * \warning: cl_lock_mode_match() assumes particular ordering here.
* \ingroup cl_lock
*/
enum cl_lock_mode {
- /**
- * Mode of a lock that protects no data, and exists only as a
- * placeholder. This is used for `glimpse' requests. A phantom lock
- * might get promoted to real lock at some point.
- */
- CLM_PHANTOM,
CLM_READ,
CLM_WRITE,
CLM_GROUP
@@ -846,11 +822,6 @@ struct cl_page_operations {
*/
/**
- * \return the underlying VM page. Optional.
- */
- struct page *(*cpo_vmpage)(const struct lu_env *env,
- const struct cl_page_slice *slice);
- /**
* Called when \a io acquires this page into the exclusive
* ownership. When this method returns, it is guaranteed that the is
* not owned by other io, and no transfer is going on against
@@ -897,14 +868,6 @@ struct cl_page_operations {
void (*cpo_export)(const struct lu_env *env,
const struct cl_page_slice *slice, int uptodate);
/**
- * Unmaps page from the user space (if it is mapped).
- *
- * \see cl_page_unmap()
- * \see vvp_page_unmap()
- */
- int (*cpo_unmap)(const struct lu_env *env,
- const struct cl_page_slice *slice, struct cl_io *io);
- /**
* Checks whether underlying VM page is locked (in the suitable
* sense). Used for assertions.
*
@@ -957,7 +920,7 @@ struct cl_page_operations {
*/
int (*cpo_is_under_lock)(const struct lu_env *env,
const struct cl_page_slice *slice,
- struct cl_io *io);
+ struct cl_io *io, pgoff_t *max);
/**
* Optional debugging helper. Prints given page slice.
@@ -1027,26 +990,6 @@ struct cl_page_operations {
*/
int (*cpo_make_ready)(const struct lu_env *env,
const struct cl_page_slice *slice);
- /**
- * Announce that this page is to be written out
- * opportunistically, that is, page is dirty, it is not
- * necessary to start write-out transfer right now, but
- * eventually page has to be written out.
- *
- * Main caller of this is the write path (see
- * vvp_io_commit_write()), using this method to build a
- * "transfer cache" from which large transfers are then
- * constructed by the req-formation engine.
- *
- * \todo XXX it would make sense to add page-age tracking
- * semantics here, and to oblige the req-formation engine to
- * send the page out not later than it is too old.
- *
- * \see cl_page_cache_add()
- */
- int (*cpo_cache_add)(const struct lu_env *env,
- const struct cl_page_slice *slice,
- struct cl_io *io);
} io[CRT_NR];
/**
* Tell transfer engine that only [to, from] part of a page should be
@@ -1098,9 +1041,8 @@ struct cl_page_operations {
*/
#define CL_PAGE_DEBUG(mask, env, page, format, ...) \
do { \
- LIBCFS_DEBUG_MSG_DATA_DECL(msgdata, mask, NULL); \
- \
if (cfs_cdebug_show(mask, DEBUG_SUBSYSTEM)) { \
+ LIBCFS_DEBUG_MSG_DATA_DECL(msgdata, mask, NULL); \
cl_page_print(env, &msgdata, lu_cdebug_printer, page); \
CDEBUG(mask, format, ## __VA_ARGS__); \
} \
@@ -1111,9 +1053,8 @@ do { \
*/
#define CL_PAGE_HEADER(mask, env, page, format, ...) \
do { \
- LIBCFS_DEBUG_MSG_DATA_DECL(msgdata, mask, NULL); \
- \
if (cfs_cdebug_show(mask, DEBUG_SUBSYSTEM)) { \
+ LIBCFS_DEBUG_MSG_DATA_DECL(msgdata, mask, NULL); \
cl_page_header_print(env, &msgdata, lu_cdebug_printer, page); \
CDEBUG(mask, format, ## __VA_ARGS__); \
} \
@@ -1130,6 +1071,12 @@ static inline int __page_in_use(const struct cl_page *page, int refc)
#define cl_page_in_use(pg) __page_in_use(pg, 1)
#define cl_page_in_use_noref(pg) __page_in_use(pg, 0)
+static inline struct page *cl_page_vmpage(struct cl_page *page)
+{
+ LASSERT(page->cp_vmpage);
+ return page->cp_vmpage;
+}
+
/** @} cl_page */
/** \addtogroup cl_lock cl_lock
@@ -1150,12 +1097,6 @@ static inline int __page_in_use(const struct cl_page *page, int refc)
* (struct cl_lock) and a list of layers (struct cl_lock_slice), linked to
* cl_lock::cll_layers list through cl_lock_slice::cls_linkage.
*
- * All locks for a given object are linked into cl_object_header::coh_locks
- * list (protected by cl_object_header::coh_lock_guard spin-lock) through
- * cl_lock::cll_linkage. Currently this list is not sorted in any way. We can
- * sort it in starting lock offset, or use altogether different data structure
- * like a tree.
- *
* Typical cl_lock consists of the two layers:
*
* - vvp_lock (vvp specific data), and
@@ -1177,111 +1118,29 @@ static inline int __page_in_use(const struct cl_page *page, int refc)
*
* LIFE CYCLE
*
- * cl_lock is reference counted. When reference counter drops to 0, lock is
- * placed in the cache, except when lock is in CLS_FREEING state. CLS_FREEING
- * lock is destroyed when last reference is released. Referencing between
- * top-lock and its sub-locks is described in the lov documentation module.
- *
- * STATE MACHINE
- *
- * Also, cl_lock is a state machine. This requires some clarification. One of
- * the goals of client IO re-write was to make IO path non-blocking, or at
- * least to make it easier to make it non-blocking in the future. Here
- * `non-blocking' means that when a system call (read, write, truncate)
- * reaches a situation where it has to wait for a communication with the
- * server, it should --instead of waiting-- remember its current state and
- * switch to some other work. E.g,. instead of waiting for a lock enqueue,
- * client should proceed doing IO on the next stripe, etc. Obviously this is
- * rather radical redesign, and it is not planned to be fully implemented at
- * this time, instead we are putting some infrastructure in place, that would
- * make it easier to do asynchronous non-blocking IO easier in the
- * future. Specifically, where old locking code goes to sleep (waiting for
- * enqueue, for example), new code returns cl_lock_transition::CLO_WAIT. When
- * enqueue reply comes, its completion handler signals that lock state-machine
- * is ready to transit to the next state. There is some generic code in
- * cl_lock.c that sleeps, waiting for these signals. As a result, for users of
- * this cl_lock.c code, it looks like locking is done in normal blocking
- * fashion, and it the same time it is possible to switch to the non-blocking
- * locking (simply by returning cl_lock_transition::CLO_WAIT from cl_lock.c
- * functions).
- *
- * For a description of state machine states and transitions see enum
- * cl_lock_state.
- *
- * There are two ways to restrict a set of states which lock might move to:
- *
- * - placing a "hold" on a lock guarantees that lock will not be moved
- * into cl_lock_state::CLS_FREEING state until hold is released. Hold
- * can be only acquired on a lock that is not in
- * cl_lock_state::CLS_FREEING. All holds on a lock are counted in
- * cl_lock::cll_holds. Hold protects lock from cancellation and
- * destruction. Requests to cancel and destroy a lock on hold will be
- * recorded, but only honored when last hold on a lock is released;
- *
- * - placing a "user" on a lock guarantees that lock will not leave
- * cl_lock_state::CLS_NEW, cl_lock_state::CLS_QUEUING,
- * cl_lock_state::CLS_ENQUEUED and cl_lock_state::CLS_HELD set of
- * states, once it enters this set. That is, if a user is added onto a
- * lock in a state not from this set, it doesn't immediately enforce
- * lock to move to this set, but once lock enters this set it will
- * remain there until all users are removed. Lock users are counted in
- * cl_lock::cll_users.
- *
- * User is used to assure that lock is not canceled or destroyed while
- * it is being enqueued, or actively used by some IO.
- *
- * Currently, a user always comes with a hold (cl_lock_invariant()
- * checks that a number of holds is not less than a number of users).
- *
- * CONCURRENCY
- *
- * This is how lock state-machine operates. struct cl_lock contains a mutex
- * cl_lock::cll_guard that protects struct fields.
- *
- * - mutex is taken, and cl_lock::cll_state is examined.
- *
- * - for every state there are possible target states where lock can move
- * into. They are tried in order. Attempts to move into next state are
- * done by _try() functions in cl_lock.c:cl_{enqueue,unlock,wait}_try().
- *
- * - if the transition can be performed immediately, state is changed,
- * and mutex is released.
- *
- * - if the transition requires blocking, _try() function returns
- * cl_lock_transition::CLO_WAIT. Caller unlocks mutex and goes to
- * sleep, waiting for possibility of lock state change. It is woken
- * up when some event occurs, that makes lock state change possible
- * (e.g., the reception of the reply from the server), and repeats
- * the loop.
- *
- * Top-lock and sub-lock has separate mutexes and the latter has to be taken
- * first to avoid dead-lock.
- *
- * To see an example of interaction of all these issues, take a look at the
- * lov_cl.c:lov_lock_enqueue() function. It is called as a part of
- * cl_enqueue_try(), and tries to advance top-lock to ENQUEUED state, by
- * advancing state-machines of its sub-locks (lov_lock_enqueue_one()). Note
- * also, that it uses trylock to grab sub-lock mutex to avoid dead-lock. It
- * also has to handle CEF_ASYNC enqueue, when sub-locks enqueues have to be
- * done in parallel, rather than one after another (this is used for glimpse
- * locks, that cannot dead-lock).
+ * cl_lock is a cacheless data container for the requirements of locks to
+ * complete the IO. cl_lock is created before I/O starts and destroyed when the
+ * I/O is complete.
+ *
+ * cl_lock depends on LDLM lock to fulfill lock semantics. LDLM lock is attached
+ * to cl_lock at OSC layer. LDLM lock is still cacheable.
*
* INTERFACE AND USAGE
*
- * struct cl_lock_operations provide a number of call-backs that are invoked
- * when events of interest occurs. Layers can intercept and handle glimpse,
- * blocking, cancel ASTs and a reception of the reply from the server.
+ * Two major methods are supported for cl_lock: clo_enqueue and clo_cancel. A
+ * cl_lock is enqueued by cl_lock_request(), which will call clo_enqueue()
+ * methods for each layer to enqueue the lock. At the LOV layer, if a cl_lock
+ * consists of multiple sub cl_locks, each sub locks will be enqueued
+ * correspondingly. At OSC layer, the lock enqueue request will tend to reuse
+ * cached LDLM lock; otherwise a new LDLM lock will have to be requested from
+ * OST side.
*
- * One important difference with the old client locking model is that new
- * client has a representation for the top-lock, whereas in the old code only
- * sub-locks existed as real data structures and file-level locks are
- * represented by "request sets" that are created and destroyed on each and
- * every lock creation.
+ * cl_lock_cancel() must be called to release a cl_lock after use. clo_cancel()
+ * method will be called for each layer to release the resource held by this
+ * lock. At OSC layer, the reference count of LDLM lock, which is held at
+ * clo_enqueue time, is released.
*
- * Top-locks are cached, and can be found in the cache by the system calls. It
- * is possible that top-lock is in cache, but some of its sub-locks were
- * canceled and destroyed. In that case top-lock has to be enqueued again
- * before it can be used.
+ * LDLM lock can only be canceled if there is no cl_lock using it.
*
* Overall process of the locking during IO operation is as following:
*
@@ -1294,7 +1153,7 @@ static inline int __page_in_use(const struct cl_page *page, int refc)
*
* - when all locks are acquired, IO is performed;
*
- * - locks are released into cache.
+ * - locks are released after IO is complete.
*
* Striping introduces major additional complexity into locking. The
* fundamental problem is that it is generally unsafe to actively use (hold)
@@ -1316,16 +1175,6 @@ static inline int __page_in_use(const struct cl_page *page, int refc)
* buf is a part of memory mapped Lustre file, a lock or locks protecting buf
* has to be held together with the usual lock on [offset, offset + count].
*
- * As multi-stripe locks have to be allowed, it makes sense to cache them, so
- * that, for example, a sequence of O_APPEND writes can proceed quickly
- * without going down to the individual stripes to do lock matching. On the
- * other hand, multi-stripe locks shouldn't be used by normal read/write
- * calls. To achieve this, every layer can implement ->clo_fits_into() method,
- * that is called by lock matching code (cl_lock_lookup()), and that can be
- * used to selectively disable matching of certain locks for certain IOs. For
- * example, lov layer implements lov_lock_fits_into() that allow multi-stripe
- * locks to be matched only for truncates and O_APPEND writes.
- *
* Interaction with DLM
*
* In the expected setup, cl_lock is ultimately backed up by a collection of
@@ -1356,295 +1205,27 @@ struct cl_lock_descr {
__u32 cld_enq_flags;
};
-#define DDESCR "%s(%d):[%lu, %lu]"
+#define DDESCR "%s(%d):[%lu, %lu]:%x"
#define PDESCR(descr) \
cl_lock_mode_name((descr)->cld_mode), (descr)->cld_mode, \
- (descr)->cld_start, (descr)->cld_end
+ (descr)->cld_start, (descr)->cld_end, (descr)->cld_enq_flags
const char *cl_lock_mode_name(const enum cl_lock_mode mode);
/**
- * Lock state-machine states.
- *
- * \htmlonly
- * <pre>
- *
- * Possible state transitions:
- *
- * +------------------>NEW
- * | |
- * | | cl_enqueue_try()
- * | |
- * | cl_unuse_try() V
- * | +--------------QUEUING (*)
- * | | |
- * | | | cl_enqueue_try()
- * | | |
- * | | cl_unuse_try() V
- * sub-lock | +-------------ENQUEUED (*)
- * canceled | | |
- * | | | cl_wait_try()
- * | | |
- * | | (R)
- * | | |
- * | | V
- * | | HELD<---------+
- * | | | |
- * | | | | cl_use_try()
- * | | cl_unuse_try() | |
- * | | | |
- * | | V ---+
- * | +------------>INTRANSIT (D) <--+
- * | | |
- * | cl_unuse_try() | | cached lock found
- * | | | cl_use_try()
- * | | |
- * | V |
- * +------------------CACHED---------+
- * |
- * (C)
- * |
- * V
- * FREEING
- *
- * Legend:
- *
- * In states marked with (*) transition to the same state (i.e., a loop
- * in the diagram) is possible.
- *
- * (R) is the point where Receive call-back is invoked: it allows layers
- * to handle arrival of lock reply.
- *
- * (C) is the point where Cancellation call-back is invoked.
- *
- * (D) is the transit state which means the lock is changing.
- *
- * Transition to FREEING state is possible from any other state in the
- * diagram in case of unrecoverable error.
- * </pre>
- * \endhtmlonly
- *
- * These states are for individual cl_lock object. Top-lock and its sub-locks
- * can be in the different states. Another way to say this is that we have
- * nested state-machines.
- *
- * Separate QUEUING and ENQUEUED states are needed to support non-blocking
- * operation for locks with multiple sub-locks. Imagine lock on a file F, that
- * intersects 3 stripes S0, S1, and S2. To enqueue F client has to send
- * enqueue to S0, wait for its completion, then send enqueue for S1, wait for
- * its completion and at last enqueue lock for S2, and wait for its
- * completion. In that case, top-lock is in QUEUING state while S0, S1 are
- * handled, and is in ENQUEUED state after enqueue to S2 has been sent (note
- * that in this case, sub-locks move from state to state, and top-lock remains
- * in the same state).
- */
-enum cl_lock_state {
- /**
- * Lock that wasn't yet enqueued
- */
- CLS_NEW,
- /**
- * Enqueue is in progress, blocking for some intermediate interaction
- * with the other side.
- */
- CLS_QUEUING,
- /**
- * Lock is fully enqueued, waiting for server to reply when it is
- * granted.
- */
- CLS_ENQUEUED,
- /**
- * Lock granted, actively used by some IO.
- */
- CLS_HELD,
- /**
- * This state is used to mark the lock is being used, or unused.
- * We need this state because the lock may have several sublocks,
- * so it's impossible to have an atomic way to bring all sublocks
- * into CLS_HELD state at use case, or all sublocks to CLS_CACHED
- * at unuse case.
- * If a thread is referring to a lock, and it sees the lock is in this
- * state, it must wait for the lock.
- * See state diagram for details.
- */
- CLS_INTRANSIT,
- /**
- * Lock granted, not used.
- */
- CLS_CACHED,
- /**
- * Lock is being destroyed.
- */
- CLS_FREEING,
- CLS_NR
-};
-
-enum cl_lock_flags {
- /**
- * lock has been cancelled. This flag is never cleared once set (by
- * cl_lock_cancel0()).
- */
- CLF_CANCELLED = 1 << 0,
- /** cancellation is pending for this lock. */
- CLF_CANCELPEND = 1 << 1,
- /** destruction is pending for this lock. */
- CLF_DOOMED = 1 << 2,
- /** from enqueue RPC reply upcall. */
- CLF_FROM_UPCALL = 1 << 3,
-};
-
-/**
- * Lock closure.
- *
- * Lock closure is a collection of locks (both top-locks and sub-locks) that
- * might be updated in a result of an operation on a certain lock (which lock
- * this is a closure of).
- *
- * Closures are needed to guarantee dead-lock freedom in the presence of
- *
- * - nested state-machines (top-lock state-machine composed of sub-lock
- * state-machines), and
- *
- * - shared sub-locks.
- *
- * Specifically, many operations, such as lock enqueue, wait, unlock,
- * etc. start from a top-lock, and then operate on a sub-locks of this
- * top-lock, holding a top-lock mutex. When sub-lock state changes as a result
- * of such operation, this change has to be propagated to all top-locks that
- * share this sub-lock. Obviously, no natural lock ordering (e.g.,
- * top-to-bottom or bottom-to-top) captures this scenario, so try-locking has
- * to be used. Lock closure systematizes this try-and-repeat logic.
- */
-struct cl_lock_closure {
- /**
- * Lock that is mutexed when closure construction is started. When
- * closure in is `wait' mode (cl_lock_closure::clc_wait), mutex on
- * origin is released before waiting.
- */
- struct cl_lock *clc_origin;
- /**
- * List of enclosed locks, so far. Locks are linked here through
- * cl_lock::cll_inclosure.
- */
- struct list_head clc_list;
- /**
- * True iff closure is in a `wait' mode. This determines what
- * cl_lock_enclosure() does when a lock L to be added to the closure
- * is currently mutexed by some other thread.
- *
- * If cl_lock_closure::clc_wait is not set, then closure construction
- * fails with CLO_REPEAT immediately.
- *
- * In wait mode, cl_lock_enclosure() waits until next attempt to build
- * a closure might succeed. To this end it releases an origin mutex
- * (cl_lock_closure::clc_origin), that has to be the only lock mutex
- * owned by the current thread, and then waits on L mutex (by grabbing
- * it and immediately releasing), before returning CLO_REPEAT to the
- * caller.
- */
- int clc_wait;
- /** Number of locks in the closure. */
- int clc_nr;
-};
-
-/**
* Layered client lock.
*/
struct cl_lock {
- /** Reference counter. */
- atomic_t cll_ref;
/** List of slices. Immutable after creation. */
struct list_head cll_layers;
- /**
- * Linkage into cl_lock::cll_descr::cld_obj::coh_locks list. Protected
- * by cl_lock::cll_descr::cld_obj::coh_lock_guard.
- */
- struct list_head cll_linkage;
- /**
- * Parameters of this lock. Protected by
- * cl_lock::cll_descr::cld_obj::coh_lock_guard nested within
- * cl_lock::cll_guard. Modified only on lock creation and in
- * cl_lock_modify().
- */
+ /** lock attribute, extent, cl_object, etc. */
struct cl_lock_descr cll_descr;
- /** Protected by cl_lock::cll_guard. */
- enum cl_lock_state cll_state;
- /** signals state changes. */
- wait_queue_head_t cll_wq;
- /**
- * Recursive lock, most fields in cl_lock{} are protected by this.
- *
- * Locking rules: this mutex is never held across network
- * communication, except when lock is being canceled.
- *
- * Lock ordering: a mutex of a sub-lock is taken first, then a mutex
- * on a top-lock. Other direction is implemented through a
- * try-lock-repeat loop. Mutices of unrelated locks can be taken only
- * by try-locking.
- *
- * \see osc_lock_enqueue_wait(), lov_lock_cancel(), lov_sublock_wait().
- */
- struct mutex cll_guard;
- struct task_struct *cll_guarder;
- int cll_depth;
-
- /**
- * the owner for INTRANSIT state
- */
- struct task_struct *cll_intransit_owner;
- int cll_error;
- /**
- * Number of holds on a lock. A hold prevents a lock from being
- * canceled and destroyed. Protected by cl_lock::cll_guard.
- *
- * \see cl_lock_hold(), cl_lock_unhold(), cl_lock_release()
- */
- int cll_holds;
- /**
- * Number of lock users. Valid in cl_lock_state::CLS_HELD state
- * only. Lock user pins lock in CLS_HELD state. Protected by
- * cl_lock::cll_guard.
- *
- * \see cl_wait(), cl_unuse().
- */
- int cll_users;
- /**
- * Flag bit-mask. Values from enum cl_lock_flags. Updates are
- * protected by cl_lock::cll_guard.
- */
- unsigned long cll_flags;
- /**
- * A linkage into a list of locks in a closure.
- *
- * \see cl_lock_closure
- */
- struct list_head cll_inclosure;
- /**
- * Confict lock at queuing time.
- */
- struct cl_lock *cll_conflict;
- /**
- * A list of references to this lock, for debugging.
- */
- struct lu_ref cll_reference;
- /**
- * A list of holds on this lock, for debugging.
- */
- struct lu_ref cll_holders;
- /**
- * A reference for cl_lock::cll_descr::cld_obj. For debugging.
- */
- struct lu_ref_link cll_obj_ref;
-#ifdef CONFIG_LOCKDEP
- /* "dep_map" name is assumed by lockdep.h macros. */
- struct lockdep_map dep_map;
-#endif
};
/**
* Per-layer part of cl_lock
*
- * \see ccc_lock, lov_lock, lovsub_lock, osc_lock
+ * \see vvp_lock, lov_lock, lovsub_lock, osc_lock
*/
struct cl_lock_slice {
struct cl_lock *cls_lock;
@@ -1658,174 +1239,36 @@ struct cl_lock_slice {
};
/**
- * Possible (non-error) return values of ->clo_{enqueue,wait,unlock}().
- *
- * NOTE: lov_subresult() depends on ordering here.
- */
-enum cl_lock_transition {
- /** operation cannot be completed immediately. Wait for state change. */
- CLO_WAIT = 1,
- /** operation had to release lock mutex, restart. */
- CLO_REPEAT = 2,
- /** lower layer re-enqueued. */
- CLO_REENQUEUED = 3,
-};
-
-/**
*
* \see vvp_lock_ops, lov_lock_ops, lovsub_lock_ops, osc_lock_ops
*/
struct cl_lock_operations {
- /**
- * \name statemachine
- *
- * State machine transitions. These 3 methods are called to transfer
- * lock from one state to another, as described in the commentary
- * above enum #cl_lock_state.
- *
- * \retval 0 this layer has nothing more to do to before
- * transition to the target state happens;
- *
- * \retval CLO_REPEAT method had to release and re-acquire cl_lock
- * mutex, repeat invocation of transition method
- * across all layers;
- *
- * \retval CLO_WAIT this layer cannot move to the target state
- * immediately, as it has to wait for certain event
- * (e.g., the communication with the server). It
- * is guaranteed, that when the state transfer
- * becomes possible, cl_lock::cll_wq wait-queue
- * is signaled. Caller can wait for this event by
- * calling cl_lock_state_wait();
- *
- * \retval -ve failure, abort state transition, move the lock
- * into cl_lock_state::CLS_FREEING state, and set
- * cl_lock::cll_error.
- *
- * Once all layers voted to agree to transition (by returning 0), lock
- * is moved into corresponding target state. All state transition
- * methods are optional.
- */
/** @{ */
/**
* Attempts to enqueue the lock. Called top-to-bottom.
*
- * \see ccc_lock_enqueue(), lov_lock_enqueue(), lovsub_lock_enqueue(),
+ * \retval 0 this layer has enqueued the lock successfully
+ * \retval >0 this layer has enqueued the lock, but need to wait on
+ * @anchor for resources
+ * \retval -ve failure
+ *
+ * \see vvp_lock_enqueue(), lov_lock_enqueue(), lovsub_lock_enqueue(),
* \see osc_lock_enqueue()
*/
int (*clo_enqueue)(const struct lu_env *env,
const struct cl_lock_slice *slice,
- struct cl_io *io, __u32 enqflags);
+ struct cl_io *io, struct cl_sync_io *anchor);
/**
- * Attempts to wait for enqueue result. Called top-to-bottom.
- *
- * \see ccc_lock_wait(), lov_lock_wait(), osc_lock_wait()
- */
- int (*clo_wait)(const struct lu_env *env,
- const struct cl_lock_slice *slice);
- /**
- * Attempts to unlock the lock. Called bottom-to-top. In addition to
- * usual return values of lock state-machine methods, this can return
- * -ESTALE to indicate that lock cannot be returned to the cache, and
- * has to be re-initialized.
- * unuse is a one-shot operation, so it must NOT return CLO_WAIT.
- *
- * \see ccc_lock_unuse(), lov_lock_unuse(), osc_lock_unuse()
- */
- int (*clo_unuse)(const struct lu_env *env,
- const struct cl_lock_slice *slice);
- /**
- * Notifies layer that cached lock is started being used.
- *
- * \pre lock->cll_state == CLS_CACHED
- *
- * \see lov_lock_use(), osc_lock_use()
- */
- int (*clo_use)(const struct lu_env *env,
- const struct cl_lock_slice *slice);
- /** @} statemachine */
- /**
- * A method invoked when lock state is changed (as a result of state
- * transition). This is used, for example, to track when the state of
- * a sub-lock changes, to propagate this change to the corresponding
- * top-lock. Optional
- *
- * \see lovsub_lock_state()
- */
- void (*clo_state)(const struct lu_env *env,
- const struct cl_lock_slice *slice,
- enum cl_lock_state st);
- /**
- * Returns true, iff given lock is suitable for the given io, idea
- * being, that there are certain "unsafe" locks, e.g., ones acquired
- * for O_APPEND writes, that we don't want to re-use for a normal
- * write, to avoid the danger of cascading evictions. Optional. Runs
- * under cl_object_header::coh_lock_guard.
- *
- * XXX this should take more information about lock needed by
- * io. Probably lock description or something similar.
- *
- * \see lov_fits_into()
- */
- int (*clo_fits_into)(const struct lu_env *env,
- const struct cl_lock_slice *slice,
- const struct cl_lock_descr *need,
- const struct cl_io *io);
- /**
- * \name ast
- * Asynchronous System Traps. All of then are optional, all are
- * executed bottom-to-top.
- */
- /** @{ */
-
- /**
- * Cancellation callback. Cancel a lock voluntarily, or under
- * the request of server.
+ * Cancel a lock, release its DLM lock ref, while does not cancel the
+ * DLM lock
*/
void (*clo_cancel)(const struct lu_env *env,
const struct cl_lock_slice *slice);
- /**
- * Lock weighting ast. Executed to estimate how precious this lock
- * is. The sum of results across all layers is used to determine
- * whether lock worth keeping in cache given present memory usage.
- *
- * \see osc_lock_weigh(), vvp_lock_weigh(), lovsub_lock_weigh().
- */
- unsigned long (*clo_weigh)(const struct lu_env *env,
- const struct cl_lock_slice *slice);
- /** @} ast */
-
- /**
- * \see lovsub_lock_closure()
- */
- int (*clo_closure)(const struct lu_env *env,
- const struct cl_lock_slice *slice,
- struct cl_lock_closure *closure);
- /**
- * Executed bottom-to-top when lock description changes (e.g., as a
- * result of server granting more generous lock than was requested).
- *
- * \see lovsub_lock_modify()
- */
- int (*clo_modify)(const struct lu_env *env,
- const struct cl_lock_slice *slice,
- const struct cl_lock_descr *updated);
- /**
- * Notifies layers (bottom-to-top) that lock is going to be
- * destroyed. Responsibility of layers is to prevent new references on
- * this lock from being acquired once this method returns.
- *
- * This can be called multiple times due to the races.
- *
- * \see cl_lock_delete()
- * \see osc_lock_delete(), lovsub_lock_delete()
- */
- void (*clo_delete)(const struct lu_env *env,
- const struct cl_lock_slice *slice);
+ /** @} */
/**
* Destructor. Frees resources and the slice.
*
- * \see ccc_lock_fini(), lov_lock_fini(), lovsub_lock_fini(),
+ * \see vvp_lock_fini(), lov_lock_fini(), lovsub_lock_fini(),
* \see osc_lock_fini()
*/
void (*clo_fini)(const struct lu_env *env, struct cl_lock_slice *slice);
@@ -2016,7 +1459,7 @@ enum cl_io_state {
* This is usually embedded into layer session data, rather than allocated
* dynamically.
*
- * \see vvp_io, lov_io, osc_io, ccc_io
+ * \see vvp_io, lov_io, osc_io
*/
struct cl_io_slice {
struct cl_io *cis_io;
@@ -2031,6 +1474,8 @@ struct cl_io_slice {
struct list_head cis_linkage;
};
+typedef void (*cl_commit_cbt)(const struct lu_env *, struct cl_io *,
+ struct cl_page *);
/**
* Per-layer io operations.
* \see vvp_io_ops, lov_io_ops, lovsub_io_ops, osc_io_ops
@@ -2114,7 +1559,7 @@ struct cl_io_operations {
void (*cio_fini)(const struct lu_env *env,
const struct cl_io_slice *slice);
} op[CIT_OP_NR];
- struct {
+
/**
* Submit pages from \a queue->c2_qin for IO, and move
* successfully submitted pages into \a queue->c2_qout. Return
@@ -2127,7 +1572,15 @@ struct cl_io_operations {
const struct cl_io_slice *slice,
enum cl_req_type crt,
struct cl_2queue *queue);
- } req_op[CRT_NR];
+ /**
+ * Queue async page for write.
+ * The difference between cio_submit and cio_queue is that
+ * cio_submit is for urgent request.
+ */
+ int (*cio_commit_async)(const struct lu_env *env,
+ const struct cl_io_slice *slice,
+ struct cl_page_list *queue, int from, int to,
+ cl_commit_cbt cb);
/**
* Read missing page.
*
@@ -2140,31 +1593,6 @@ struct cl_io_operations {
const struct cl_io_slice *slice,
const struct cl_page_slice *page);
/**
- * Prepare write of a \a page. Called bottom-to-top by a top-level
- * cl_io_operations::op[CIT_WRITE]::cio_start() to prepare page for
- * get data from user-level buffer.
- *
- * \pre io->ci_type == CIT_WRITE
- *
- * \see vvp_io_prepare_write(), lov_io_prepare_write(),
- * osc_io_prepare_write().
- */
- int (*cio_prepare_write)(const struct lu_env *env,
- const struct cl_io_slice *slice,
- const struct cl_page_slice *page,
- unsigned from, unsigned to);
- /**
- *
- * \pre io->ci_type == CIT_WRITE
- *
- * \see vvp_io_commit_write(), lov_io_commit_write(),
- * osc_io_commit_write().
- */
- int (*cio_commit_write)(const struct lu_env *env,
- const struct cl_io_slice *slice,
- const struct cl_page_slice *page,
- unsigned from, unsigned to);
- /**
* Optional debugging helper. Print given io slice.
*/
int (*cio_print)(const struct lu_env *env, void *cookie,
@@ -2216,9 +1644,13 @@ enum cl_enq_flags {
*/
CEF_AGL = 0x00000020,
/**
+ * enqueue a lock to test DLM lock existence.
+ */
+ CEF_PEEK = 0x00000040,
+ /**
* mask of enq_flags.
*/
- CEF_MASK = 0x0000003f,
+ CEF_MASK = 0x0000007f,
};
/**
@@ -2228,12 +1660,12 @@ enum cl_enq_flags {
struct cl_io_lock_link {
/** linkage into one of cl_lockset lists. */
struct list_head cill_linkage;
- struct cl_lock_descr cill_descr;
- struct cl_lock *cill_lock;
+ struct cl_lock cill_lock;
/** optional destructor */
void (*cill_fini)(const struct lu_env *env,
struct cl_io_lock_link *link);
};
+#define cill_descr cill_lock.cll_descr
/**
* Lock-set represents a collection of locks, that io needs at a
@@ -2267,8 +1699,6 @@ struct cl_io_lock_link {
struct cl_lockset {
/** locks to be acquired. */
struct list_head cls_todo;
- /** locks currently being processed. */
- struct list_head cls_curr;
/** locks acquired. */
struct list_head cls_done;
};
@@ -2632,9 +2062,7 @@ struct cl_site {
* and top-locks (and top-pages) are accounted here.
*/
struct cache_stats cs_pages;
- struct cache_stats cs_locks;
atomic_t cs_pages_state[CPS_NR];
- atomic_t cs_locks_state[CLS_NR];
};
int cl_site_init(struct cl_site *s, struct cl_device *top);
@@ -2725,7 +2153,7 @@ static inline void cl_device_fini(struct cl_device *d)
}
void cl_page_slice_add(struct cl_page *page, struct cl_page_slice *slice,
- struct cl_object *obj,
+ struct cl_object *obj, pgoff_t index,
const struct cl_page_operations *ops);
void cl_lock_slice_add(struct cl_lock *lock, struct cl_lock_slice *slice,
struct cl_object *obj,
@@ -2758,7 +2186,7 @@ int cl_object_glimpse(const struct lu_env *env, struct cl_object *obj,
struct ost_lvb *lvb);
int cl_conf_set(const struct lu_env *env, struct cl_object *obj,
const struct cl_object_conf *conf);
-void cl_object_prune(const struct lu_env *env, struct cl_object *obj);
+int cl_object_prune(const struct lu_env *env, struct cl_object *obj);
void cl_object_kill(const struct lu_env *env, struct cl_object *obj);
/**
@@ -2772,7 +2200,7 @@ static inline int cl_object_same(struct cl_object *o0, struct cl_object *o1)
static inline void cl_object_page_init(struct cl_object *clob, int size)
{
clob->co_slice_off = cl_object_header(clob)->coh_page_bufsize;
- cl_object_header(clob)->coh_page_bufsize += ALIGN(size, 8);
+ cl_object_header(clob)->coh_page_bufsize += cfs_size_round(size);
}
static inline void *cl_object_page_slice(struct cl_object *clob,
@@ -2781,6 +2209,16 @@ static inline void *cl_object_page_slice(struct cl_object *clob,
return (void *)((char *)page + clob->co_slice_off);
}
+/**
+ * Return refcount of cl_object.
+ */
+static inline int cl_object_refc(struct cl_object *clob)
+{
+ struct lu_object_header *header = clob->co_lu.lo_header;
+
+ return atomic_read(&header->loh_ref);
+}
+
/** @} cl_object */
/** \defgroup cl_page cl_page
@@ -2794,28 +2232,20 @@ enum {
};
/* callback of cl_page_gang_lookup() */
-typedef int (*cl_page_gang_cb_t) (const struct lu_env *, struct cl_io *,
- struct cl_page *, void *);
-int cl_page_gang_lookup(const struct lu_env *env, struct cl_object *obj,
- struct cl_io *io, pgoff_t start, pgoff_t end,
- cl_page_gang_cb_t cb, void *cbdata);
-struct cl_page *cl_page_lookup(struct cl_object_header *hdr, pgoff_t index);
struct cl_page *cl_page_find(const struct lu_env *env, struct cl_object *obj,
pgoff_t idx, struct page *vmpage,
enum cl_page_type type);
-struct cl_page *cl_page_find_sub(const struct lu_env *env,
- struct cl_object *obj,
- pgoff_t idx, struct page *vmpage,
- struct cl_page *parent);
+struct cl_page *cl_page_alloc(const struct lu_env *env,
+ struct cl_object *o, pgoff_t ind,
+ struct page *vmpage,
+ enum cl_page_type type);
void cl_page_get(struct cl_page *page);
void cl_page_put(const struct lu_env *env, struct cl_page *page);
void cl_page_print(const struct lu_env *env, void *cookie, lu_printer_t printer,
const struct cl_page *pg);
void cl_page_header_print(const struct lu_env *env, void *cookie,
lu_printer_t printer, const struct cl_page *pg);
-struct page *cl_page_vmpage(const struct lu_env *env, struct cl_page *page);
struct cl_page *cl_vmpage_page(struct page *vmpage, struct cl_object *obj);
-struct cl_page *cl_page_top(struct cl_page *page);
const struct cl_page_slice *cl_page_at(const struct cl_page *page,
const struct lu_device_type *dtype);
@@ -2872,12 +2302,10 @@ int cl_page_flush(const struct lu_env *env, struct cl_io *io,
void cl_page_discard(const struct lu_env *env, struct cl_io *io,
struct cl_page *pg);
void cl_page_delete(const struct lu_env *env, struct cl_page *pg);
-int cl_page_unmap(const struct lu_env *env, struct cl_io *io,
- struct cl_page *pg);
int cl_page_is_vmlocked(const struct lu_env *env, const struct cl_page *pg);
void cl_page_export(const struct lu_env *env, struct cl_page *pg, int uptodate);
int cl_page_is_under_lock(const struct lu_env *env, struct cl_io *io,
- struct cl_page *page);
+ struct cl_page *page, pgoff_t *max_index);
loff_t cl_offset(const struct cl_object *obj, pgoff_t idx);
pgoff_t cl_index(const struct cl_object *obj, loff_t offset);
int cl_page_size(const struct cl_object *obj);
@@ -2890,138 +2318,66 @@ void cl_lock_descr_print(const struct lu_env *env, void *cookie,
const struct cl_lock_descr *descr);
/* @} helper */
+/**
+ * Data structure managing a client's cached pages. A count of
+ * "unstable" pages is maintained, and an LRU of clean pages is
+ * maintained. "unstable" pages are pages pinned by the ptlrpc
+ * layer for recovery purposes.
+ */
+struct cl_client_cache {
+ /**
+ * # of users (OSCs)
+ */
+ atomic_t ccc_users;
+ /**
+ * # of threads are doing shrinking
+ */
+ unsigned int ccc_lru_shrinkers;
+ /**
+ * # of LRU entries available
+ */
+ atomic_t ccc_lru_left;
+ /**
+ * List of entities(OSCs) for this LRU cache
+ */
+ struct list_head ccc_lru;
+ /**
+ * Max # of LRU entries
+ */
+ unsigned long ccc_lru_max;
+ /**
+ * Lock to protect ccc_lru list
+ */
+ spinlock_t ccc_lru_lock;
+ /**
+ * # of unstable pages for this mount point
+ */
+ atomic_t ccc_unstable_nr;
+ /**
+ * Waitq for awaiting unstable pages to reach zero.
+ * Used at umounting time and signaled on BRW commit
+ */
+ wait_queue_head_t ccc_unstable_waitq;
+
+};
+
/** @} cl_page */
/** \defgroup cl_lock cl_lock
* @{
*/
-struct cl_lock *cl_lock_hold(const struct lu_env *env, const struct cl_io *io,
- const struct cl_lock_descr *need,
- const char *scope, const void *source);
-struct cl_lock *cl_lock_peek(const struct lu_env *env, const struct cl_io *io,
- const struct cl_lock_descr *need,
- const char *scope, const void *source);
-struct cl_lock *cl_lock_request(const struct lu_env *env, struct cl_io *io,
- const struct cl_lock_descr *need,
- const char *scope, const void *source);
-struct cl_lock *cl_lock_at_pgoff(const struct lu_env *env,
- struct cl_object *obj, pgoff_t index,
- struct cl_lock *except, int pending,
- int canceld);
-static inline struct cl_lock *cl_lock_at_page(const struct lu_env *env,
- struct cl_object *obj,
- struct cl_page *page,
- struct cl_lock *except,
- int pending, int canceld)
-{
- LASSERT(cl_object_header(obj) == cl_object_header(page->cp_obj));
- return cl_lock_at_pgoff(env, obj, page->cp_index, except,
- pending, canceld);
-}
-
+int cl_lock_request(const struct lu_env *env, struct cl_io *io,
+ struct cl_lock *lock);
+int cl_lock_init(const struct lu_env *env, struct cl_lock *lock,
+ const struct cl_io *io);
+void cl_lock_fini(const struct lu_env *env, struct cl_lock *lock);
const struct cl_lock_slice *cl_lock_at(const struct cl_lock *lock,
const struct lu_device_type *dtype);
-
-void cl_lock_get(struct cl_lock *lock);
-void cl_lock_get_trust(struct cl_lock *lock);
-void cl_lock_put(const struct lu_env *env, struct cl_lock *lock);
-void cl_lock_hold_add(const struct lu_env *env, struct cl_lock *lock,
- const char *scope, const void *source);
-void cl_lock_hold_release(const struct lu_env *env, struct cl_lock *lock,
- const char *scope, const void *source);
-void cl_lock_unhold(const struct lu_env *env, struct cl_lock *lock,
- const char *scope, const void *source);
-void cl_lock_release(const struct lu_env *env, struct cl_lock *lock,
- const char *scope, const void *source);
-void cl_lock_user_add(const struct lu_env *env, struct cl_lock *lock);
-void cl_lock_user_del(const struct lu_env *env, struct cl_lock *lock);
-
-int cl_lock_is_intransit(struct cl_lock *lock);
-
-int cl_lock_enqueue_wait(const struct lu_env *env, struct cl_lock *lock,
- int keep_mutex);
-
-/** \name statemachine statemachine
- * Interface to lock state machine consists of 3 parts:
- *
- * - "try" functions that attempt to effect a state transition. If state
- * transition is not possible right now (e.g., if it has to wait for some
- * asynchronous event to occur), these functions return
- * cl_lock_transition::CLO_WAIT.
- *
- * - "non-try" functions that implement synchronous blocking interface on
- * top of non-blocking "try" functions. These functions repeatedly call
- * corresponding "try" versions, and if state transition is not possible
- * immediately, wait for lock state change.
- *
- * - methods from cl_lock_operations, called by "try" functions. Lock can
- * be advanced to the target state only when all layers voted that they
- * are ready for this transition. "Try" functions call methods under lock
- * mutex. If a layer had to release a mutex, it re-acquires it and returns
- * cl_lock_transition::CLO_REPEAT, causing "try" function to call all
- * layers again.
- *
- * TRY NON-TRY METHOD FINAL STATE
- *
- * cl_enqueue_try() cl_enqueue() cl_lock_operations::clo_enqueue() CLS_ENQUEUED
- *
- * cl_wait_try() cl_wait() cl_lock_operations::clo_wait() CLS_HELD
- *
- * cl_unuse_try() cl_unuse() cl_lock_operations::clo_unuse() CLS_CACHED
- *
- * cl_use_try() NONE cl_lock_operations::clo_use() CLS_HELD
- *
- * @{
- */
-
-int cl_wait(const struct lu_env *env, struct cl_lock *lock);
-void cl_unuse(const struct lu_env *env, struct cl_lock *lock);
-int cl_enqueue_try(const struct lu_env *env, struct cl_lock *lock,
- struct cl_io *io, __u32 flags);
-int cl_unuse_try(const struct lu_env *env, struct cl_lock *lock);
-int cl_wait_try(const struct lu_env *env, struct cl_lock *lock);
-int cl_use_try(const struct lu_env *env, struct cl_lock *lock, int atomic);
-
-/** @} statemachine */
-
-void cl_lock_signal(const struct lu_env *env, struct cl_lock *lock);
-int cl_lock_state_wait(const struct lu_env *env, struct cl_lock *lock);
-void cl_lock_state_set(const struct lu_env *env, struct cl_lock *lock,
- enum cl_lock_state state);
-int cl_queue_match(const struct list_head *queue,
- const struct cl_lock_descr *need);
-
-void cl_lock_mutex_get(const struct lu_env *env, struct cl_lock *lock);
-void cl_lock_mutex_put(const struct lu_env *env, struct cl_lock *lock);
-int cl_lock_is_mutexed(struct cl_lock *lock);
-int cl_lock_nr_mutexed(const struct lu_env *env);
-int cl_lock_discard_pages(const struct lu_env *env, struct cl_lock *lock);
-int cl_lock_ext_match(const struct cl_lock_descr *has,
- const struct cl_lock_descr *need);
-int cl_lock_descr_match(const struct cl_lock_descr *has,
- const struct cl_lock_descr *need);
-int cl_lock_mode_match(enum cl_lock_mode has, enum cl_lock_mode need);
-int cl_lock_modify(const struct lu_env *env, struct cl_lock *lock,
- const struct cl_lock_descr *desc);
-
-void cl_lock_closure_init(const struct lu_env *env,
- struct cl_lock_closure *closure,
- struct cl_lock *origin, int wait);
-void cl_lock_closure_fini(struct cl_lock_closure *closure);
-int cl_lock_closure_build(const struct lu_env *env, struct cl_lock *lock,
- struct cl_lock_closure *closure);
-void cl_lock_disclosure(const struct lu_env *env,
- struct cl_lock_closure *closure);
-int cl_lock_enclosure(const struct lu_env *env, struct cl_lock *lock,
- struct cl_lock_closure *closure);
-
+void cl_lock_release(const struct lu_env *env, struct cl_lock *lock);
+int cl_lock_enqueue(const struct lu_env *env, struct cl_io *io,
+ struct cl_lock *lock, struct cl_sync_io *anchor);
void cl_lock_cancel(const struct lu_env *env, struct cl_lock *lock);
-void cl_lock_delete(const struct lu_env *env, struct cl_lock *lock);
-void cl_lock_error(const struct lu_env *env, struct cl_lock *lock, int error);
-void cl_locks_prune(const struct lu_env *env, struct cl_object *obj, int wait);
-
-unsigned long cl_lock_weigh(const struct lu_env *env, struct cl_lock *lock);
/** @} cl_lock */
@@ -3050,15 +2406,14 @@ int cl_io_lock_alloc_add(const struct lu_env *env, struct cl_io *io,
struct cl_lock_descr *descr);
int cl_io_read_page(const struct lu_env *env, struct cl_io *io,
struct cl_page *page);
-int cl_io_prepare_write(const struct lu_env *env, struct cl_io *io,
- struct cl_page *page, unsigned from, unsigned to);
-int cl_io_commit_write(const struct lu_env *env, struct cl_io *io,
- struct cl_page *page, unsigned from, unsigned to);
int cl_io_submit_rw(const struct lu_env *env, struct cl_io *io,
enum cl_req_type iot, struct cl_2queue *queue);
int cl_io_submit_sync(const struct lu_env *env, struct cl_io *io,
enum cl_req_type iot, struct cl_2queue *queue,
long timeout);
+int cl_io_commit_async(const struct lu_env *env, struct cl_io *io,
+ struct cl_page_list *queue, int from, int to,
+ cl_commit_cbt cb);
int cl_io_is_going(const struct lu_env *env);
/**
@@ -3114,6 +2469,12 @@ static inline struct cl_page *cl_page_list_last(struct cl_page_list *plist)
return list_entry(plist->pl_pages.prev, struct cl_page, cp_batch);
}
+static inline struct cl_page *cl_page_list_first(struct cl_page_list *plist)
+{
+ LASSERT(plist->pl_nr > 0);
+ return list_entry(plist->pl_pages.next, struct cl_page, cp_batch);
+}
+
/**
* Iterate over pages in a page list.
*/
@@ -3130,9 +2491,14 @@ void cl_page_list_init(struct cl_page_list *plist);
void cl_page_list_add(struct cl_page_list *plist, struct cl_page *page);
void cl_page_list_move(struct cl_page_list *dst, struct cl_page_list *src,
struct cl_page *page);
+void cl_page_list_move_head(struct cl_page_list *dst, struct cl_page_list *src,
+ struct cl_page *page);
void cl_page_list_splice(struct cl_page_list *list, struct cl_page_list *head);
+void cl_page_list_del(const struct lu_env *env, struct cl_page_list *plist,
+ struct cl_page *page);
void cl_page_list_disown(const struct lu_env *env,
struct cl_io *io, struct cl_page_list *plist);
+void cl_page_list_fini(const struct lu_env *env, struct cl_page_list *plist);
void cl_2queue_init(struct cl_2queue *queue);
void cl_2queue_disown(const struct lu_env *env,
@@ -3177,13 +2543,18 @@ struct cl_sync_io {
atomic_t csi_barrier;
/** completion to be signaled when transfer is complete. */
wait_queue_head_t csi_waitq;
+ /** callback to invoke when this IO is finished */
+ void (*csi_end_io)(const struct lu_env *,
+ struct cl_sync_io *);
};
-void cl_sync_io_init(struct cl_sync_io *anchor, int nrpages);
-int cl_sync_io_wait(const struct lu_env *env, struct cl_io *io,
- struct cl_page_list *queue, struct cl_sync_io *anchor,
+void cl_sync_io_init(struct cl_sync_io *anchor, int nr,
+ void (*end)(const struct lu_env *, struct cl_sync_io *));
+int cl_sync_io_wait(const struct lu_env *env, struct cl_sync_io *anchor,
long timeout);
-void cl_sync_io_note(struct cl_sync_io *anchor, int ioret);
+void cl_sync_io_note(const struct lu_env *env, struct cl_sync_io *anchor,
+ int ioret);
+void cl_sync_io_end(const struct lu_env *env, struct cl_sync_io *anchor);
/** @} cl_sync_io */
@@ -3241,6 +2612,9 @@ void *cl_env_reenter(void);
void cl_env_reexit(void *cookie);
void cl_env_implant(struct lu_env *env, int *refcheck);
void cl_env_unplant(struct lu_env *env, int *refcheck);
+unsigned int cl_env_cache_purge(unsigned int nr);
+struct lu_env *cl_env_percpu_get(void);
+void cl_env_percpu_put(struct lu_env *env);
/** @} cl_env */
diff --git a/drivers/staging/lustre/lustre/include/lclient.h b/drivers/staging/lustre/lustre/include/lclient.h
deleted file mode 100644
index 5d839a9f789f..000000000000
--- a/drivers/staging/lustre/lustre/include/lclient.h
+++ /dev/null
@@ -1,408 +0,0 @@
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2012, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * Definitions shared between vvp and liblustre, and other clients in the
- * future.
- *
- * Author: Oleg Drokin <oleg.drokin@sun.com>
- * Author: Nikita Danilov <nikita.danilov@sun.com>
- */
-
-#ifndef LCLIENT_H
-#define LCLIENT_H
-
-blkcnt_t dirty_cnt(struct inode *inode);
-
-int cl_glimpse_size0(struct inode *inode, int agl);
-int cl_glimpse_lock(const struct lu_env *env, struct cl_io *io,
- struct inode *inode, struct cl_object *clob, int agl);
-
-static inline int cl_glimpse_size(struct inode *inode)
-{
- return cl_glimpse_size0(inode, 0);
-}
-
-static inline int cl_agl(struct inode *inode)
-{
- return cl_glimpse_size0(inode, 1);
-}
-
-/**
- * Locking policy for setattr.
- */
-enum ccc_setattr_lock_type {
- /** Locking is done by server */
- SETATTR_NOLOCK,
- /** Extent lock is enqueued */
- SETATTR_EXTENT_LOCK,
- /** Existing local extent lock is used */
- SETATTR_MATCH_LOCK
-};
-
-/**
- * IO state private to vvp or slp layers.
- */
-struct ccc_io {
- /** super class */
- struct cl_io_slice cui_cl;
- struct cl_io_lock_link cui_link;
- /**
- * I/O vector information to or from which read/write is going.
- */
- struct iov_iter *cui_iter;
- /**
- * Total size for the left IO.
- */
- size_t cui_tot_count;
-
- union {
- struct {
- enum ccc_setattr_lock_type cui_local_lock;
- } setattr;
- } u;
- /**
- * True iff io is processing glimpse right now.
- */
- int cui_glimpse;
- /**
- * Layout version when this IO is initialized
- */
- __u32 cui_layout_gen;
- /**
- * File descriptor against which IO is done.
- */
- struct ll_file_data *cui_fd;
- struct kiocb *cui_iocb;
-};
-
-/**
- * True, if \a io is a normal io, False for splice_{read,write}.
- * must be implemented in arch specific code.
- */
-int cl_is_normalio(const struct lu_env *env, const struct cl_io *io);
-
-extern struct lu_context_key ccc_key;
-extern struct lu_context_key ccc_session_key;
-
-struct ccc_thread_info {
- struct cl_lock_descr cti_descr;
- struct cl_io cti_io;
- struct cl_attr cti_attr;
-};
-
-static inline struct ccc_thread_info *ccc_env_info(const struct lu_env *env)
-{
- struct ccc_thread_info *info;
-
- info = lu_context_key_get(&env->le_ctx, &ccc_key);
- LASSERT(info);
- return info;
-}
-
-static inline struct cl_attr *ccc_env_thread_attr(const struct lu_env *env)
-{
- struct cl_attr *attr = &ccc_env_info(env)->cti_attr;
-
- memset(attr, 0, sizeof(*attr));
- return attr;
-}
-
-static inline struct cl_io *ccc_env_thread_io(const struct lu_env *env)
-{
- struct cl_io *io = &ccc_env_info(env)->cti_io;
-
- memset(io, 0, sizeof(*io));
- return io;
-}
-
-struct ccc_session {
- struct ccc_io cs_ios;
-};
-
-static inline struct ccc_session *ccc_env_session(const struct lu_env *env)
-{
- struct ccc_session *ses;
-
- ses = lu_context_key_get(env->le_ses, &ccc_session_key);
- LASSERT(ses);
- return ses;
-}
-
-static inline struct ccc_io *ccc_env_io(const struct lu_env *env)
-{
- return &ccc_env_session(env)->cs_ios;
-}
-
-/**
- * ccc-private object state.
- */
-struct ccc_object {
- struct cl_object_header cob_header;
- struct cl_object cob_cl;
- struct inode *cob_inode;
-
- /**
- * A list of dirty pages pending IO in the cache. Used by
- * SOM. Protected by ll_inode_info::lli_lock.
- *
- * \see ccc_page::cpg_pending_linkage
- */
- struct list_head cob_pending_list;
-
- /**
- * Access this counter is protected by inode->i_sem. Now that
- * the lifetime of transient pages must be covered by inode sem,
- * we don't need to hold any lock..
- */
- int cob_transient_pages;
- /**
- * Number of outstanding mmaps on this file.
- *
- * \see ll_vm_open(), ll_vm_close().
- */
- atomic_t cob_mmap_cnt;
-
- /**
- * various flags
- * cob_discard_page_warned
- * if pages belonging to this object are discarded when a client
- * is evicted, some debug info will be printed, this flag will be set
- * during processing the first discarded page, then avoid flooding
- * debug message for lots of discarded pages.
- *
- * \see ll_dirty_page_discard_warn.
- */
- unsigned int cob_discard_page_warned:1;
-};
-
-/**
- * ccc-private page state.
- */
-struct ccc_page {
- struct cl_page_slice cpg_cl;
- int cpg_defer_uptodate;
- int cpg_ra_used;
- int cpg_write_queued;
- /**
- * Non-empty iff this page is already counted in
- * ccc_object::cob_pending_list. Protected by
- * ccc_object::cob_pending_guard. This list is only used as a flag,
- * that is, never iterated through, only checked for list_empty(), but
- * having a list is useful for debugging.
- */
- struct list_head cpg_pending_linkage;
- /** VM page */
- struct page *cpg_page;
-};
-
-static inline struct ccc_page *cl2ccc_page(const struct cl_page_slice *slice)
-{
- return container_of(slice, struct ccc_page, cpg_cl);
-}
-
-struct ccc_device {
- struct cl_device cdv_cl;
- struct super_block *cdv_sb;
- struct cl_device *cdv_next;
-};
-
-struct ccc_lock {
- struct cl_lock_slice clk_cl;
-};
-
-struct ccc_req {
- struct cl_req_slice crq_cl;
-};
-
-void *ccc_key_init (const struct lu_context *ctx,
- struct lu_context_key *key);
-void ccc_key_fini (const struct lu_context *ctx,
- struct lu_context_key *key, void *data);
-void *ccc_session_key_init(const struct lu_context *ctx,
- struct lu_context_key *key);
-void ccc_session_key_fini(const struct lu_context *ctx,
- struct lu_context_key *key, void *data);
-
-int ccc_device_init (const struct lu_env *env,
- struct lu_device *d,
- const char *name, struct lu_device *next);
-struct lu_device *ccc_device_fini (const struct lu_env *env,
- struct lu_device *d);
-struct lu_device *ccc_device_alloc(const struct lu_env *env,
- struct lu_device_type *t,
- struct lustre_cfg *cfg,
- const struct lu_device_operations *luops,
- const struct cl_device_operations *clops);
-struct lu_device *ccc_device_free (const struct lu_env *env,
- struct lu_device *d);
-struct lu_object *ccc_object_alloc(const struct lu_env *env,
- const struct lu_object_header *hdr,
- struct lu_device *dev,
- const struct cl_object_operations *clops,
- const struct lu_object_operations *luops);
-
-int ccc_req_init(const struct lu_env *env, struct cl_device *dev,
- struct cl_req *req);
-void ccc_umount(const struct lu_env *env, struct cl_device *dev);
-int ccc_global_init(struct lu_device_type *device_type);
-void ccc_global_fini(struct lu_device_type *device_type);
-int ccc_object_init0(const struct lu_env *env, struct ccc_object *vob,
- const struct cl_object_conf *conf);
-int ccc_object_init(const struct lu_env *env, struct lu_object *obj,
- const struct lu_object_conf *conf);
-void ccc_object_free(const struct lu_env *env, struct lu_object *obj);
-int ccc_lock_init(const struct lu_env *env, struct cl_object *obj,
- struct cl_lock *lock, const struct cl_io *io,
- const struct cl_lock_operations *lkops);
-int ccc_object_glimpse(const struct lu_env *env,
- const struct cl_object *obj, struct ost_lvb *lvb);
-struct page *ccc_page_vmpage(const struct lu_env *env,
- const struct cl_page_slice *slice);
-int ccc_page_is_under_lock(const struct lu_env *env,
- const struct cl_page_slice *slice, struct cl_io *io);
-int ccc_fail(const struct lu_env *env, const struct cl_page_slice *slice);
-int ccc_transient_page_prep(const struct lu_env *env,
- const struct cl_page_slice *slice,
- struct cl_io *io);
-void ccc_lock_delete(const struct lu_env *env,
- const struct cl_lock_slice *slice);
-void ccc_lock_fini(const struct lu_env *env, struct cl_lock_slice *slice);
-int ccc_lock_enqueue(const struct lu_env *env,
- const struct cl_lock_slice *slice,
- struct cl_io *io, __u32 enqflags);
-int ccc_lock_use(const struct lu_env *env, const struct cl_lock_slice *slice);
-int ccc_lock_unuse(const struct lu_env *env, const struct cl_lock_slice *slice);
-int ccc_lock_wait(const struct lu_env *env, const struct cl_lock_slice *slice);
-int ccc_lock_fits_into(const struct lu_env *env,
- const struct cl_lock_slice *slice,
- const struct cl_lock_descr *need,
- const struct cl_io *io);
-void ccc_lock_state(const struct lu_env *env,
- const struct cl_lock_slice *slice,
- enum cl_lock_state state);
-
-int ccc_io_one_lock_index(const struct lu_env *env, struct cl_io *io,
- __u32 enqflags, enum cl_lock_mode mode,
- pgoff_t start, pgoff_t end);
-int ccc_io_one_lock(const struct lu_env *env, struct cl_io *io,
- __u32 enqflags, enum cl_lock_mode mode,
- loff_t start, loff_t end);
-void ccc_io_end(const struct lu_env *env, const struct cl_io_slice *ios);
-void ccc_io_advance(const struct lu_env *env, const struct cl_io_slice *ios,
- size_t nob);
-void ccc_io_update_iov(const struct lu_env *env, struct ccc_io *cio,
- struct cl_io *io);
-int ccc_prep_size(const struct lu_env *env, struct cl_object *obj,
- struct cl_io *io, loff_t start, size_t count, int *exceed);
-void ccc_req_completion(const struct lu_env *env,
- const struct cl_req_slice *slice, int ioret);
-void ccc_req_attr_set(const struct lu_env *env,
- const struct cl_req_slice *slice,
- const struct cl_object *obj,
- struct cl_req_attr *oa, u64 flags);
-
-struct lu_device *ccc2lu_dev (struct ccc_device *vdv);
-struct lu_object *ccc2lu (struct ccc_object *vob);
-struct ccc_device *lu2ccc_dev (const struct lu_device *d);
-struct ccc_device *cl2ccc_dev (const struct cl_device *d);
-struct ccc_object *lu2ccc (const struct lu_object *obj);
-struct ccc_object *cl2ccc (const struct cl_object *obj);
-struct ccc_lock *cl2ccc_lock (const struct cl_lock_slice *slice);
-struct ccc_io *cl2ccc_io (const struct lu_env *env,
- const struct cl_io_slice *slice);
-struct ccc_req *cl2ccc_req (const struct cl_req_slice *slice);
-struct page *cl2vm_page (const struct cl_page_slice *slice);
-struct inode *ccc_object_inode(const struct cl_object *obj);
-struct ccc_object *cl_inode2ccc (struct inode *inode);
-
-int cl_setattr_ost(struct inode *inode, const struct iattr *attr);
-
-int ccc_object_invariant(const struct cl_object *obj);
-int cl_file_inode_init(struct inode *inode, struct lustre_md *md);
-void cl_inode_fini(struct inode *inode);
-int cl_local_size(struct inode *inode);
-
-__u16 ll_dirent_type_get(struct lu_dirent *ent);
-__u64 cl_fid_build_ino(const struct lu_fid *fid, int api32);
-__u32 cl_fid_build_gen(const struct lu_fid *fid);
-
-# define CLOBINVRNT(env, clob, expr) \
- ((void)sizeof(env), (void)sizeof(clob), (void)sizeof(!!(expr)))
-
-int cl_init_ea_size(struct obd_export *md_exp, struct obd_export *dt_exp);
-int cl_ocd_update(struct obd_device *host,
- struct obd_device *watched,
- enum obd_notify_event ev, void *owner, void *data);
-
-struct ccc_grouplock {
- struct lu_env *cg_env;
- struct cl_io *cg_io;
- struct cl_lock *cg_lock;
- unsigned long cg_gid;
-};
-
-int cl_get_grouplock(struct cl_object *obj, unsigned long gid, int nonblock,
- struct ccc_grouplock *cg);
-void cl_put_grouplock(struct ccc_grouplock *cg);
-
-/**
- * New interfaces to get and put lov_stripe_md from lov layer. This violates
- * layering because lov_stripe_md is supposed to be a private data in lov.
- *
- * NB: If you find you have to use these interfaces for your new code, please
- * think about it again. These interfaces may be removed in the future for
- * better layering.
- */
-struct lov_stripe_md *lov_lsm_get(struct cl_object *clobj);
-void lov_lsm_put(struct cl_object *clobj, struct lov_stripe_md *lsm);
-int lov_read_and_clear_async_rc(struct cl_object *clob);
-
-struct lov_stripe_md *ccc_inode_lsm_get(struct inode *inode);
-void ccc_inode_lsm_put(struct inode *inode, struct lov_stripe_md *lsm);
-
-/**
- * Data structure managing a client's cached clean pages. An LRU of
- * pages is maintained, along with other statistics.
- */
-struct cl_client_cache {
- atomic_t ccc_users; /* # of users (OSCs) of this data */
- struct list_head ccc_lru; /* LRU list of cached clean pages */
- spinlock_t ccc_lru_lock; /* lock for list */
- atomic_t ccc_lru_left; /* # of LRU entries available */
- unsigned long ccc_lru_max; /* Max # of LRU entries possible */
- unsigned int ccc_lru_shrinkers; /* # of threads reclaiming */
-};
-
-#endif /*LCLIENT_H */
diff --git a/drivers/staging/lustre/lustre/include/linux/obd.h b/drivers/staging/lustre/lustre/include/linux/obd.h
deleted file mode 100644
index 3907bf4ce07c..000000000000
--- a/drivers/staging/lustre/lustre/include/linux/obd.h
+++ /dev/null
@@ -1,125 +0,0 @@
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2012, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- */
-
-#ifndef __LINUX_OBD_H
-#define __LINUX_OBD_H
-
-#ifndef __OBD_H
-#error Do not #include this file directly. #include <obd.h> instead
-#endif
-
-#include "../obd_support.h"
-
-#include <linux/fs.h>
-#include <linux/list.h>
-#include <linux/sched.h> /* for struct task_struct, for current.h */
-#include <linux/mount.h>
-
-#include "../lustre_intent.h"
-
-struct ll_iattr {
- struct iattr iattr;
- unsigned int ia_attr_flags;
-};
-
-#define CLIENT_OBD_LIST_LOCK_DEBUG 1
-
-struct client_obd_lock {
- spinlock_t lock;
-
- unsigned long time;
- struct task_struct *task;
- const char *func;
- int line;
-};
-
-static inline void __client_obd_list_lock(struct client_obd_lock *lock,
- const char *func, int line)
-{
- unsigned long cur = jiffies;
-
- while (1) {
- if (spin_trylock(&lock->lock)) {
- LASSERT(!lock->task);
- lock->task = current;
- lock->func = func;
- lock->line = line;
- lock->time = jiffies;
- break;
- }
-
- if (time_before(cur + 5 * HZ, jiffies) &&
- time_before(lock->time + 5 * HZ, jiffies)) {
- struct task_struct *task = lock->task;
-
- if (!task)
- continue;
-
- LCONSOLE_WARN("%s:%d: lock %p was acquired by <%s:%d:%s:%d> for %lu seconds.\n",
- current->comm, current->pid,
- lock, task->comm, task->pid,
- lock->func, lock->line,
- (jiffies - lock->time) / HZ);
- LCONSOLE_WARN("====== for current process =====\n");
- dump_stack();
- LCONSOLE_WARN("====== end =======\n");
- set_current_state(TASK_UNINTERRUPTIBLE);
- schedule_timeout(1000 * HZ);
- }
- cpu_relax();
- }
-}
-
-#define client_obd_list_lock(lock) \
- __client_obd_list_lock(lock, __func__, __LINE__)
-
-static inline void client_obd_list_unlock(struct client_obd_lock *lock)
-{
- LASSERT(lock->task);
- lock->task = NULL;
- lock->time = jiffies;
- spin_unlock(&lock->lock);
-}
-
-static inline void client_obd_list_lock_init(struct client_obd_lock *lock)
-{
- spin_lock_init(&lock->lock);
-}
-
-static inline void client_obd_list_lock_done(struct client_obd_lock *lock)
-{}
-
-#endif /* __LINUX_OBD_H */
diff --git a/drivers/staging/lustre/lustre/include/lu_object.h b/drivers/staging/lustre/lustre/include/lu_object.h
index 242bb1ef6245..2816512185af 100644
--- a/drivers/staging/lustre/lustre/include/lu_object.h
+++ b/drivers/staging/lustre/lustre/include/lu_object.h
@@ -198,7 +198,6 @@ typedef int (*lu_printer_t)(const struct lu_env *env,
* Operations specific for particular lu_object.
*/
struct lu_object_operations {
-
/**
* Allocate lower-layer parts of the object by calling
* lu_device_operations::ldo_object_alloc() of the corresponding
@@ -656,21 +655,21 @@ static inline struct seq_server_site *lu_site2seq(const struct lu_site *s)
* @{
*/
-int lu_site_init (struct lu_site *s, struct lu_device *d);
-void lu_site_fini (struct lu_site *s);
-int lu_site_init_finish (struct lu_site *s);
-void lu_stack_fini (const struct lu_env *env, struct lu_device *top);
-void lu_device_get (struct lu_device *d);
-void lu_device_put (struct lu_device *d);
-int lu_device_init (struct lu_device *d, struct lu_device_type *t);
-void lu_device_fini (struct lu_device *d);
-int lu_object_header_init(struct lu_object_header *h);
+int lu_site_init(struct lu_site *s, struct lu_device *d);
+void lu_site_fini(struct lu_site *s);
+int lu_site_init_finish(struct lu_site *s);
+void lu_stack_fini(const struct lu_env *env, struct lu_device *top);
+void lu_device_get(struct lu_device *d);
+void lu_device_put(struct lu_device *d);
+int lu_device_init(struct lu_device *d, struct lu_device_type *t);
+void lu_device_fini(struct lu_device *d);
+int lu_object_header_init(struct lu_object_header *h);
void lu_object_header_fini(struct lu_object_header *h);
-int lu_object_init (struct lu_object *o,
- struct lu_object_header *h, struct lu_device *d);
-void lu_object_fini (struct lu_object *o);
-void lu_object_add_top (struct lu_object_header *h, struct lu_object *o);
-void lu_object_add (struct lu_object *before, struct lu_object *o);
+int lu_object_init(struct lu_object *o,
+ struct lu_object_header *h, struct lu_device *d);
+void lu_object_fini(struct lu_object *o);
+void lu_object_add_top(struct lu_object_header *h, struct lu_object *o);
+void lu_object_add(struct lu_object *before, struct lu_object *o);
/**
* Helpers to initialize and finalize device types.
@@ -781,9 +780,8 @@ int lu_cdebug_printer(const struct lu_env *env,
*/
#define LU_OBJECT_DEBUG(mask, env, object, format, ...) \
do { \
- LIBCFS_DEBUG_MSG_DATA_DECL(msgdata, mask, NULL); \
- \
if (cfs_cdebug_show(mask, DEBUG_SUBSYSTEM)) { \
+ LIBCFS_DEBUG_MSG_DATA_DECL(msgdata, mask, NULL); \
lu_object_print(env, &msgdata, lu_cdebug_printer, object);\
CDEBUG(mask, format, ## __VA_ARGS__); \
} \
@@ -794,9 +792,8 @@ do { \
*/
#define LU_OBJECT_HEADER(mask, env, object, format, ...) \
do { \
- LIBCFS_DEBUG_MSG_DATA_DECL(msgdata, mask, NULL); \
- \
if (cfs_cdebug_show(mask, DEBUG_SUBSYSTEM)) { \
+ LIBCFS_DEBUG_MSG_DATA_DECL(msgdata, mask, NULL); \
lu_object_header_print(env, &msgdata, lu_cdebug_printer,\
(object)->lo_header); \
lu_cdebug_printer(env, &msgdata, "\n"); \
@@ -1007,6 +1004,10 @@ enum lu_context_tag {
*/
LCT_LOCAL = 1 << 7,
/**
+ * session for server thread
+ **/
+ LCT_SERVER_SESSION = BIT(8),
+ /**
* Set when at least one of keys, having values in this context has
* non-NULL lu_context_key::lct_exit() method. This is used to
* optimize lu_context_exit() call.
@@ -1118,7 +1119,7 @@ struct lu_context_key {
{ \
type *value; \
\
- CLASSERT(PAGE_SIZE >= sizeof (*value)); \
+ CLASSERT(PAGE_SIZE >= sizeof(*value)); \
\
value = kzalloc(sizeof(*value), GFP_NOFS); \
if (!value) \
@@ -1154,12 +1155,12 @@ do { \
(key)->lct_owner = THIS_MODULE; \
} while (0)
-int lu_context_key_register(struct lu_context_key *key);
-void lu_context_key_degister(struct lu_context_key *key);
-void *lu_context_key_get (const struct lu_context *ctx,
- const struct lu_context_key *key);
-void lu_context_key_quiesce (struct lu_context_key *key);
-void lu_context_key_revive (struct lu_context_key *key);
+int lu_context_key_register(struct lu_context_key *key);
+void lu_context_key_degister(struct lu_context_key *key);
+void *lu_context_key_get(const struct lu_context *ctx,
+ const struct lu_context_key *key);
+void lu_context_key_quiesce(struct lu_context_key *key);
+void lu_context_key_revive(struct lu_context_key *key);
/*
* LU_KEY_INIT_GENERIC() has to be a macro to correctly determine an
@@ -1216,21 +1217,21 @@ void lu_context_key_revive (struct lu_context_key *key);
LU_TYPE_START(mod, __VA_ARGS__); \
LU_TYPE_STOP(mod, __VA_ARGS__)
-int lu_context_init (struct lu_context *ctx, __u32 tags);
-void lu_context_fini (struct lu_context *ctx);
-void lu_context_enter (struct lu_context *ctx);
-void lu_context_exit (struct lu_context *ctx);
-int lu_context_refill(struct lu_context *ctx);
+int lu_context_init(struct lu_context *ctx, __u32 tags);
+void lu_context_fini(struct lu_context *ctx);
+void lu_context_enter(struct lu_context *ctx);
+void lu_context_exit(struct lu_context *ctx);
+int lu_context_refill(struct lu_context *ctx);
/*
* Helper functions to operate on multiple keys. These are used by the default
* device type operations, defined by LU_TYPE_INIT_FINI().
*/
-int lu_context_key_register_many(struct lu_context_key *k, ...);
+int lu_context_key_register_many(struct lu_context_key *k, ...);
void lu_context_key_degister_many(struct lu_context_key *k, ...);
-void lu_context_key_revive_many (struct lu_context_key *k, ...);
-void lu_context_key_quiesce_many (struct lu_context_key *k, ...);
+void lu_context_key_revive_many(struct lu_context_key *k, ...);
+void lu_context_key_quiesce_many(struct lu_context_key *k, ...);
/**
* Environment.
@@ -1246,9 +1247,9 @@ struct lu_env {
struct lu_context *le_ses;
};
-int lu_env_init (struct lu_env *env, __u32 tags);
-void lu_env_fini (struct lu_env *env);
-int lu_env_refill(struct lu_env *env);
+int lu_env_init(struct lu_env *env, __u32 tags);
+void lu_env_fini(struct lu_env *env);
+int lu_env_refill(struct lu_env *env);
/** @} lu_context */
diff --git a/drivers/staging/lustre/lustre/include/lustre/lustre_idl.h b/drivers/staging/lustre/lustre/include/lustre/lustre_idl.h
index 5aae1d06a5fa..9c53c1792dc8 100644
--- a/drivers/staging/lustre/lustre/include/lustre/lustre_idl.h
+++ b/drivers/staging/lustre/lustre/include/lustre/lustre_idl.h
@@ -183,6 +183,12 @@ struct lu_seq_range {
__u32 lsr_flags;
};
+struct lu_seq_range_array {
+ __u32 lsra_count;
+ __u32 lsra_padding;
+ struct lu_seq_range lsra_lsr[0];
+};
+
#define LU_SEQ_RANGE_MDT 0x0
#define LU_SEQ_RANGE_OST 0x1
#define LU_SEQ_RANGE_ANY 0x3
@@ -578,7 +584,7 @@ static inline __u64 ostid_seq(const struct ost_id *ostid)
if (fid_seq_is_mdt0(ostid->oi.oi_seq))
return FID_SEQ_OST_MDT0;
- if (fid_seq_is_default(ostid->oi.oi_seq))
+ if (unlikely(fid_seq_is_default(ostid->oi.oi_seq)))
return FID_SEQ_LOV_DEFAULT;
if (fid_is_idif(&ostid->oi_fid))
@@ -590,9 +596,12 @@ static inline __u64 ostid_seq(const struct ost_id *ostid)
/* extract OST objid from a wire ost_id (id/seq) pair */
static inline __u64 ostid_id(const struct ost_id *ostid)
{
- if (fid_seq_is_mdt0(ostid_seq(ostid)))
+ if (fid_seq_is_mdt0(ostid->oi.oi_seq))
return ostid->oi.oi_id & IDIF_OID_MASK;
+ if (unlikely(fid_seq_is_default(ostid->oi.oi_seq)))
+ return ostid->oi.oi_id;
+
if (fid_is_idif(&ostid->oi_fid))
return fid_idif_id(fid_seq(&ostid->oi_fid),
fid_oid(&ostid->oi_fid), 0);
@@ -636,12 +645,22 @@ static inline void ostid_set_seq_llog(struct ost_id *oi)
*/
static inline void ostid_set_id(struct ost_id *oi, __u64 oid)
{
- if (fid_seq_is_mdt0(ostid_seq(oi))) {
+ if (fid_seq_is_mdt0(oi->oi.oi_seq)) {
if (oid >= IDIF_MAX_OID) {
CERROR("Bad %llu to set " DOSTID "\n", oid, POSTID(oi));
return;
}
oi->oi.oi_id = oid;
+ } else if (fid_is_idif(&oi->oi_fid)) {
+ if (oid >= IDIF_MAX_OID) {
+ CERROR("Bad %llu to set "DOSTID"\n",
+ oid, POSTID(oi));
+ return;
+ }
+ oi->oi_fid.f_seq = fid_idif_seq(oid,
+ fid_idif_ost_idx(&oi->oi_fid));
+ oi->oi_fid.f_oid = oid;
+ oi->oi_fid.f_ver = oid >> 48;
} else {
if (oid > OBIF_MAX_OID) {
CERROR("Bad %llu to set " DOSTID "\n", oid, POSTID(oi));
@@ -651,25 +670,31 @@ static inline void ostid_set_id(struct ost_id *oi, __u64 oid)
}
}
-static inline void ostid_inc_id(struct ost_id *oi)
+static inline int fid_set_id(struct lu_fid *fid, __u64 oid)
{
- if (fid_seq_is_mdt0(ostid_seq(oi))) {
- if (unlikely(ostid_id(oi) + 1 > IDIF_MAX_OID)) {
- CERROR("Bad inc "DOSTID"\n", POSTID(oi));
- return;
+ if (unlikely(fid_seq_is_igif(fid->f_seq))) {
+ CERROR("bad IGIF, "DFID"\n", PFID(fid));
+ return -EBADF;
+ }
+
+ if (fid_is_idif(fid)) {
+ if (oid >= IDIF_MAX_OID) {
+ CERROR("Too large OID %#llx to set IDIF "DFID"\n",
+ (unsigned long long)oid, PFID(fid));
+ return -EBADF;
}
- oi->oi.oi_id++;
+ fid->f_seq = fid_idif_seq(oid, fid_idif_ost_idx(fid));
+ fid->f_oid = oid;
+ fid->f_ver = oid >> 48;
} else {
- oi->oi_fid.f_oid++;
+ if (oid > OBIF_MAX_OID) {
+ CERROR("Too large OID %#llx to set REG "DFID"\n",
+ (unsigned long long)oid, PFID(fid));
+ return -EBADF;
+ }
+ fid->f_oid = oid;
}
-}
-
-static inline void ostid_dec_id(struct ost_id *oi)
-{
- if (fid_seq_is_mdt0(ostid_seq(oi)))
- oi->oi.oi_id--;
- else
- oi->oi_fid.f_oid--;
+ return 0;
}
/**
@@ -684,30 +709,34 @@ static inline void ostid_dec_id(struct ost_id *oi)
static inline int ostid_to_fid(struct lu_fid *fid, struct ost_id *ostid,
__u32 ost_idx)
{
+ __u64 seq = ostid_seq(ostid);
+
if (ost_idx > 0xffff) {
CERROR("bad ost_idx, "DOSTID" ost_idx:%u\n", POSTID(ostid),
ost_idx);
return -EBADF;
}
- if (fid_seq_is_mdt0(ostid_seq(ostid))) {
+ if (fid_seq_is_mdt0(seq)) {
+ __u64 oid = ostid_id(ostid);
+
/* This is a "legacy" (old 1.x/2.early) OST object in "group 0"
* that we map into the IDIF namespace. It allows up to 2^48
* objects per OST, as this is the object namespace that has
* been in production for years. This can handle create rates
* of 1M objects/s/OST for 9 years, or combinations thereof.
*/
- if (ostid_id(ostid) >= IDIF_MAX_OID) {
+ if (oid >= IDIF_MAX_OID) {
CERROR("bad MDT0 id, " DOSTID " ost_idx:%u\n",
POSTID(ostid), ost_idx);
return -EBADF;
}
- fid->f_seq = fid_idif_seq(ostid_id(ostid), ost_idx);
+ fid->f_seq = fid_idif_seq(oid, ost_idx);
/* truncate to 32 bits by assignment */
- fid->f_oid = ostid_id(ostid);
+ fid->f_oid = oid;
/* in theory, not currently used */
- fid->f_ver = ostid_id(ostid) >> 48;
- } else /* if (fid_seq_is_idif(seq) || fid_seq_is_norm(seq)) */ {
+ fid->f_ver = oid >> 48;
+ } else if (likely(!fid_seq_is_default(seq))) {
/* This is either an IDIF object, which identifies objects across
* all OSTs, or a regular FID. The IDIF namespace maps legacy
* OST objects into the FID namespace. In both cases, we just
@@ -1001,8 +1030,9 @@ static inline int lu_dirent_calc_size(int namelen, __u16 attr)
size = (sizeof(struct lu_dirent) + namelen + align) & ~align;
size += sizeof(struct luda_type);
- } else
+ } else {
size = sizeof(struct lu_dirent) + namelen;
+ }
return (size + 7) & ~7;
}
@@ -1256,6 +1286,9 @@ void lustre_swab_ptlrpc_body(struct ptlrpc_body *pb);
#define OBD_CONNECT_PINGLESS 0x4000000000000ULL/* pings not required */
#define OBD_CONNECT_FLOCK_DEAD 0x8000000000000ULL/* flock deadlock detection */
#define OBD_CONNECT_DISP_STRIPE 0x10000000000000ULL/*create stripe disposition*/
+#define OBD_CONNECT_OPEN_BY_FID 0x20000000000000ULL /* open by fid won't pack
+ * name in request
+ */
/* XXX README XXX:
* Please DO NOT add flag values here before first ensuring that this same
@@ -1428,6 +1461,8 @@ enum obdo_flags {
*/
OBD_FL_RECOV_RESEND = 0x00080000, /* recoverable resent */
OBD_FL_NOSPC_BLK = 0x00100000, /* no more block space on OST */
+ OBD_FL_FLUSH = 0x00200000, /* flush pages on the OST */
+ OBD_FL_SHORT_IO = 0x00400000, /* short io request */
/* Note that while these checksum values are currently separate bits,
* in 2.x we can actually allow all values from 1-31 if we wanted.
@@ -1525,6 +1560,11 @@ static inline void lmm_oi_set_seq(struct ost_id *oi, __u64 seq)
oi->oi.oi_seq = seq;
}
+static inline void lmm_oi_set_id(struct ost_id *oi, __u64 oid)
+{
+ oi->oi.oi_id = oid;
+}
+
static inline __u64 lmm_oi_id(struct ost_id *oi)
{
return oi->oi.oi_id;
@@ -1732,6 +1772,11 @@ void lustre_swab_obd_statfs(struct obd_statfs *os);
#define OBD_BRW_MEMALLOC 0x800 /* Client runs in the "kswapd" context */
#define OBD_BRW_OVER_USRQUOTA 0x1000 /* Running out of user quota */
#define OBD_BRW_OVER_GRPQUOTA 0x2000 /* Running out of group quota */
+#define OBD_BRW_SOFT_SYNC 0x4000 /* This flag notifies the server
+ * that the client is running low on
+ * space for unstable pages; asking
+ * it to sync quickly
+ */
#define OBD_OBJECT_EOF 0xffffffffffffffffULL
@@ -2436,6 +2481,7 @@ struct mdt_rec_reint {
void lustre_swab_mdt_rec_reint(struct mdt_rec_reint *rr);
+/* lmv structures */
struct lmv_desc {
__u32 ld_tgt_count; /* how many MDS's */
__u32 ld_active_tgt_count; /* how many active */
@@ -2460,7 +2506,6 @@ struct lmv_stripe_md {
struct lu_fid mea_ids[0];
};
-/* lmv structures */
#define MEA_MAGIC_LAST_CHAR 0xb2221ca1
#define MEA_MAGIC_ALL_CHARS 0xb222a11c
#define MEA_MAGIC_HASH_SEGMENT 0xb222a11b
@@ -2470,9 +2515,10 @@ struct lmv_stripe_md {
#define MAX_HASH_HIGHEST_BIT 0x1000000000000000ULL
enum fld_rpc_opc {
- FLD_QUERY = 900,
+ FLD_QUERY = 900,
+ FLD_READ = 901,
FLD_LAST_OPC,
- FLD_FIRST_OPC = FLD_QUERY
+ FLD_FIRST_OPC = FLD_QUERY
};
enum seq_rpc_opc {
@@ -2486,6 +2532,12 @@ enum seq_op {
SEQ_ALLOC_META = 1
};
+enum fld_op {
+ FLD_CREATE = 0,
+ FLD_DELETE = 1,
+ FLD_LOOKUP = 2,
+};
+
/*
* LOV data structures
*/
@@ -2582,6 +2634,8 @@ struct ldlm_extent {
__u64 gid;
};
+#define LDLM_GID_ANY ((__u64)-1)
+
static inline int ldlm_extent_overlap(struct ldlm_extent *ex1,
struct ldlm_extent *ex2)
{
@@ -3304,7 +3358,7 @@ struct getinfo_fid2path {
char gf_path[0];
} __packed;
-void lustre_swab_fid2path (struct getinfo_fid2path *gf);
+void lustre_swab_fid2path(struct getinfo_fid2path *gf);
enum {
LAYOUT_INTENT_ACCESS = 0,
diff --git a/drivers/staging/lustre/lustre/include/lustre/lustre_user.h b/drivers/staging/lustre/lustre/include/lustre/lustre_user.h
index 276906e646f5..59ba48ac31a7 100644
--- a/drivers/staging/lustre/lustre/include/lustre/lustre_user.h
+++ b/drivers/staging/lustre/lustre/include/lustre/lustre_user.h
@@ -193,37 +193,37 @@ struct ost_id {
* *INFO - set/get lov_user_mds_data
*/
/* see <lustre_lib.h> for ioctl numberss 101-150 */
-#define LL_IOC_GETFLAGS _IOR ('f', 151, long)
-#define LL_IOC_SETFLAGS _IOW ('f', 152, long)
-#define LL_IOC_CLRFLAGS _IOW ('f', 153, long)
+#define LL_IOC_GETFLAGS _IOR('f', 151, long)
+#define LL_IOC_SETFLAGS _IOW('f', 152, long)
+#define LL_IOC_CLRFLAGS _IOW('f', 153, long)
/* LL_IOC_LOV_SETSTRIPE: See also OBD_IOC_LOV_SETSTRIPE */
-#define LL_IOC_LOV_SETSTRIPE _IOW ('f', 154, long)
+#define LL_IOC_LOV_SETSTRIPE _IOW('f', 154, long)
/* LL_IOC_LOV_GETSTRIPE: See also OBD_IOC_LOV_GETSTRIPE */
-#define LL_IOC_LOV_GETSTRIPE _IOW ('f', 155, long)
+#define LL_IOC_LOV_GETSTRIPE _IOW('f', 155, long)
/* LL_IOC_LOV_SETEA: See also OBD_IOC_LOV_SETEA */
-#define LL_IOC_LOV_SETEA _IOW ('f', 156, long)
-#define LL_IOC_RECREATE_OBJ _IOW ('f', 157, long)
-#define LL_IOC_RECREATE_FID _IOW ('f', 157, struct lu_fid)
-#define LL_IOC_GROUP_LOCK _IOW ('f', 158, long)
-#define LL_IOC_GROUP_UNLOCK _IOW ('f', 159, long)
+#define LL_IOC_LOV_SETEA _IOW('f', 156, long)
+#define LL_IOC_RECREATE_OBJ _IOW('f', 157, long)
+#define LL_IOC_RECREATE_FID _IOW('f', 157, struct lu_fid)
+#define LL_IOC_GROUP_LOCK _IOW('f', 158, long)
+#define LL_IOC_GROUP_UNLOCK _IOW('f', 159, long)
/* LL_IOC_QUOTACHECK: See also OBD_IOC_QUOTACHECK */
-#define LL_IOC_QUOTACHECK _IOW ('f', 160, int)
+#define LL_IOC_QUOTACHECK _IOW('f', 160, int)
/* LL_IOC_POLL_QUOTACHECK: See also OBD_IOC_POLL_QUOTACHECK */
-#define LL_IOC_POLL_QUOTACHECK _IOR ('f', 161, struct if_quotacheck *)
+#define LL_IOC_POLL_QUOTACHECK _IOR('f', 161, struct if_quotacheck *)
/* LL_IOC_QUOTACTL: See also OBD_IOC_QUOTACTL */
#define LL_IOC_QUOTACTL _IOWR('f', 162, struct if_quotactl)
#define IOC_OBD_STATFS _IOWR('f', 164, struct obd_statfs *)
#define IOC_LOV_GETINFO _IOWR('f', 165, struct lov_user_mds_data *)
-#define LL_IOC_FLUSHCTX _IOW ('f', 166, long)
-#define LL_IOC_RMTACL _IOW ('f', 167, long)
-#define LL_IOC_GETOBDCOUNT _IOR ('f', 168, long)
+#define LL_IOC_FLUSHCTX _IOW('f', 166, long)
+#define LL_IOC_RMTACL _IOW('f', 167, long)
+#define LL_IOC_GETOBDCOUNT _IOR('f', 168, long)
#define LL_IOC_LLOOP_ATTACH _IOWR('f', 169, long)
#define LL_IOC_LLOOP_DETACH _IOWR('f', 170, long)
#define LL_IOC_LLOOP_INFO _IOWR('f', 171, struct lu_fid)
#define LL_IOC_LLOOP_DETACH_BYDEV _IOWR('f', 172, long)
-#define LL_IOC_PATH2FID _IOR ('f', 173, long)
+#define LL_IOC_PATH2FID _IOR('f', 173, long)
#define LL_IOC_GET_CONNECT_FLAGS _IOWR('f', 174, __u64 *)
-#define LL_IOC_GET_MDTIDX _IOR ('f', 175, int)
+#define LL_IOC_GET_MDTIDX _IOR('f', 175, int)
/* see <lustre_lib.h> for ioctl numbers 177-210 */
@@ -676,7 +676,12 @@ static inline const char *changelog_type2str(int type)
#define CLF_UNLINK_HSM_EXISTS 0x0002 /* File has something in HSM */
/* HSM cleaning needed */
/* Flags for rename */
-#define CLF_RENAME_LAST 0x0001 /* rename unlink last hardlink of target */
+#define CLF_RENAME_LAST 0x0001 /* rename unlink last hardlink of
+ * target
+ */
+#define CLF_RENAME_LAST_EXISTS 0x0002 /* rename unlink last hardlink of target
+ * has an archive in backend
+ */
/* Flags for HSM */
/* 12b used (from high weight to low weight):
@@ -833,9 +838,8 @@ struct ioc_data_version {
__u64 idv_flags; /* See LL_DV_xxx */
};
-#define LL_DV_NOFLUSH 0x01 /* Do not take READ EXTENT LOCK before sampling
- * version. Dirty caches are left unchanged.
- */
+#define LL_DV_RD_FLUSH BIT(0) /* Flush dirty pages from clients */
+#define LL_DV_WR_FLUSH BIT(1) /* Flush all caching pages from clients */
#ifndef offsetof
# define offsetof(typ, memb) ((unsigned long)((char *)&(((typ *)0)->memb)))
@@ -1095,12 +1099,12 @@ struct hsm_action_list {
__u32 padding1;
char hal_fsname[0]; /* null-terminated */
/* struct hsm_action_item[hal_count] follows, aligned on 8-byte
- * boundaries. See hai_zero
+ * boundaries. See hai_first
*/
} __packed;
#ifndef HAVE_CFS_SIZE_ROUND
-static inline int cfs_size_round (int val)
+static inline int cfs_size_round(int val)
{
return (val + 7) & (~0x7);
}
@@ -1109,7 +1113,7 @@ static inline int cfs_size_round (int val)
#endif
/* Return pointer to first hai in action list */
-static inline struct hsm_action_item *hai_zero(struct hsm_action_list *hal)
+static inline struct hsm_action_item *hai_first(struct hsm_action_list *hal)
{
return (struct hsm_action_item *)(hal->hal_fsname +
cfs_size_round(strlen(hal-> \
@@ -1131,7 +1135,7 @@ static inline int hal_size(struct hsm_action_list *hal)
struct hsm_action_item *hai;
sz = sizeof(*hal) + cfs_size_round(strlen(hal->hal_fsname) + 1);
- hai = hai_zero(hal);
+ hai = hai_first(hal);
for (i = 0; i < hal->hal_count; i++, hai = hai_next(hai))
sz += cfs_size_round(hai->hai_len);
diff --git a/drivers/staging/lustre/lustre/include/lustre_cfg.h b/drivers/staging/lustre/lustre/include/lustre_cfg.h
index bb16ae980b98..e229e91f7f56 100644
--- a/drivers/staging/lustre/lustre/include/lustre_cfg.h
+++ b/drivers/staging/lustre/lustre/include/lustre_cfg.h
@@ -161,7 +161,7 @@ static inline void *lustre_cfg_buf(struct lustre_cfg *lcfg, int index)
int offset;
int bufcount;
- LASSERT (index >= 0);
+ LASSERT(index >= 0);
bufcount = lcfg->lcfg_bufcount;
if (index >= bufcount)
diff --git a/drivers/staging/lustre/lustre/include/lustre_disk.h b/drivers/staging/lustre/lustre/include/lustre_disk.h
index 95fd36063f55..b36821ffb252 100644
--- a/drivers/staging/lustre/lustre/include/lustre_disk.h
+++ b/drivers/staging/lustre/lustre/include/lustre_disk.h
@@ -130,7 +130,6 @@ struct lustre_sb_info {
struct lustre_mount_data *lsi_lmd; /* mount command info */
struct ll_sb_info *lsi_llsbi; /* add'l client sbi info */
struct dt_device *lsi_dt_dev; /* dt device to access disk fs*/
- struct vfsmount *lsi_srv_mnt; /* the one server mount */
atomic_t lsi_mounts; /* references to the srv_mnt */
char lsi_svname[MTI_NAME_MAXLEN];
char lsi_osd_obdname[64];
@@ -158,7 +157,6 @@ struct lustre_sb_info {
struct lustre_mount_info {
char *lmi_name;
struct super_block *lmi_sb;
- struct vfsmount *lmi_mnt;
struct list_head lmi_list_chain;
};
diff --git a/drivers/staging/lustre/lustre/include/lustre_dlm.h b/drivers/staging/lustre/lustre/include/lustre_dlm.h
index 8b0364f71129..9cade144faca 100644
--- a/drivers/staging/lustre/lustre/include/lustre_dlm.h
+++ b/drivers/staging/lustre/lustre/include/lustre_dlm.h
@@ -71,6 +71,7 @@ struct obd_device;
*/
enum ldlm_error {
ELDLM_OK = 0,
+ ELDLM_LOCK_MATCHED = 1,
ELDLM_LOCK_CHANGED = 300,
ELDLM_LOCK_ABORTED = 301,
@@ -269,7 +270,7 @@ struct ldlm_pool {
struct completion pl_kobj_unregister;
};
-typedef int (*ldlm_cancel_for_recovery)(struct ldlm_lock *lock);
+typedef int (*ldlm_cancel_cbt)(struct ldlm_lock *lock);
/**
* LVB operations.
@@ -446,8 +447,11 @@ struct ldlm_namespace {
/** Limit of parallel AST RPC count. */
unsigned ns_max_parallel_ast;
- /** Callback to cancel locks before replaying it during recovery. */
- ldlm_cancel_for_recovery ns_cancel_for_recovery;
+ /**
+ * Callback to check if a lock is good to be canceled by ELC or
+ * during recovery.
+ */
+ ldlm_cancel_cbt ns_cancel;
/** LDLM lock stats */
struct lprocfs_stats *ns_stats;
@@ -479,9 +483,9 @@ static inline int ns_connect_lru_resize(struct ldlm_namespace *ns)
}
static inline void ns_register_cancel(struct ldlm_namespace *ns,
- ldlm_cancel_for_recovery arg)
+ ldlm_cancel_cbt arg)
{
- ns->ns_cancel_for_recovery = arg;
+ ns->ns_cancel = arg;
}
struct ldlm_lock;
diff --git a/drivers/staging/lustre/lustre/include/lustre_dlm_flags.h b/drivers/staging/lustre/lustre/include/lustre_dlm_flags.h
index 7f2ba2ffe0eb..e7e0c21a9b40 100644
--- a/drivers/staging/lustre/lustre/include/lustre_dlm_flags.h
+++ b/drivers/staging/lustre/lustre/include/lustre_dlm_flags.h
@@ -37,17 +37,11 @@
/** l_flags bits marked as "gone" bits */
#define LDLM_FL_GONE_MASK 0x0006004000000000ULL
-/** l_flags bits marked as "hide_lock" bits */
-#define LDLM_FL_HIDE_LOCK_MASK 0x0000206400000000ULL
-
/** l_flags bits marked as "inherit" bits */
#define LDLM_FL_INHERIT_MASK 0x0000000000800000ULL
-/** l_flags bits marked as "local_only" bits */
-#define LDLM_FL_LOCAL_ONLY_MASK 0x00FFFFFF00000000ULL
-
-/** l_flags bits marked as "on_wire" bits */
-#define LDLM_FL_ON_WIRE_MASK 0x00000000C08F932FULL
+/** l_flags bits marked as "off_wire" bits */
+#define LDLM_FL_OFF_WIRE_MASK 0x00FFFFFF00000000ULL
/** extent, mode, or resource changed */
#define LDLM_FL_LOCK_CHANGED 0x0000000000000001ULL /* bit 0 */
@@ -204,7 +198,7 @@
#define ldlm_set_cancel(_l) LDLM_SET_FLAG((_l), 1ULL << 36)
#define ldlm_clear_cancel(_l) LDLM_CLEAR_FLAG((_l), 1ULL << 36)
-/** whatever it might mean */
+/** whatever it might mean -- never transmitted? */
#define LDLM_FL_LOCAL_ONLY 0x0000002000000000ULL /* bit 37 */
#define ldlm_is_local_only(_l) LDLM_TEST_FLAG((_l), 1ULL << 37)
#define ldlm_set_local_only(_l) LDLM_SET_FLAG((_l), 1ULL << 37)
@@ -287,18 +281,18 @@
* has canceled this lock and is waiting for rpc_lock which is taken by
* the first operation. LDLM_FL_BL_AST is set by ldlm_callback_handler() in
* the lock to prevent the Early Lock Cancel (ELC) code from cancelling it.
- *
- * LDLM_FL_BL_DONE is to be set by ldlm_cancel_callback() when lock cache is
- * dropped to let ldlm_callback_handler() return EINVAL to the server. It
- * is used when ELC RPC is already prepared and is waiting for rpc_lock,
- * too late to send a separate CANCEL RPC.
*/
#define LDLM_FL_BL_AST 0x0000400000000000ULL /* bit 46 */
#define ldlm_is_bl_ast(_l) LDLM_TEST_FLAG((_l), 1ULL << 46)
#define ldlm_set_bl_ast(_l) LDLM_SET_FLAG((_l), 1ULL << 46)
#define ldlm_clear_bl_ast(_l) LDLM_CLEAR_FLAG((_l), 1ULL << 46)
-/** whatever it might mean */
+/**
+ * Set by ldlm_cancel_callback() when lock cache is dropped to let
+ * ldlm_callback_handler() return EINVAL to the server. It is used when
+ * ELC RPC is already prepared and is waiting for rpc_lock, too late to
+ * send a separate CANCEL RPC.
+ */
#define LDLM_FL_BL_DONE 0x0000800000000000ULL /* bit 47 */
#define ldlm_is_bl_done(_l) LDLM_TEST_FLAG((_l), 1ULL << 47)
#define ldlm_set_bl_done(_l) LDLM_SET_FLAG((_l), 1ULL << 47)
@@ -381,104 +375,16 @@
/** test for ldlm_lock flag bit set */
#define LDLM_TEST_FLAG(_l, _b) (((_l)->l_flags & (_b)) != 0)
+/** multi-bit test: are any of mask bits set? */
+#define LDLM_HAVE_MASK(_l, _m) ((_l)->l_flags & LDLM_FL_##_m##_MASK)
+
/** set a ldlm_lock flag bit */
#define LDLM_SET_FLAG(_l, _b) ((_l)->l_flags |= (_b))
/** clear a ldlm_lock flag bit */
#define LDLM_CLEAR_FLAG(_l, _b) ((_l)->l_flags &= ~(_b))
-/** Mask of flags inherited from parent lock when doing intents. */
-#define LDLM_INHERIT_FLAGS LDLM_FL_INHERIT_MASK
-
-/** Mask of Flags sent in AST lock_flags to map into the receiving lock. */
-#define LDLM_AST_FLAGS LDLM_FL_AST_MASK
-
/** @} subgroup */
/** @} group */
-#ifdef WIRESHARK_COMPILE
-static int hf_lustre_ldlm_fl_lock_changed = -1;
-static int hf_lustre_ldlm_fl_block_granted = -1;
-static int hf_lustre_ldlm_fl_block_conv = -1;
-static int hf_lustre_ldlm_fl_block_wait = -1;
-static int hf_lustre_ldlm_fl_ast_sent = -1;
-static int hf_lustre_ldlm_fl_replay = -1;
-static int hf_lustre_ldlm_fl_intent_only = -1;
-static int hf_lustre_ldlm_fl_has_intent = -1;
-static int hf_lustre_ldlm_fl_flock_deadlock = -1;
-static int hf_lustre_ldlm_fl_discard_data = -1;
-static int hf_lustre_ldlm_fl_no_timeout = -1;
-static int hf_lustre_ldlm_fl_block_nowait = -1;
-static int hf_lustre_ldlm_fl_test_lock = -1;
-static int hf_lustre_ldlm_fl_cancel_on_block = -1;
-static int hf_lustre_ldlm_fl_deny_on_contention = -1;
-static int hf_lustre_ldlm_fl_ast_discard_data = -1;
-static int hf_lustre_ldlm_fl_fail_loc = -1;
-static int hf_lustre_ldlm_fl_skipped = -1;
-static int hf_lustre_ldlm_fl_cbpending = -1;
-static int hf_lustre_ldlm_fl_wait_noreproc = -1;
-static int hf_lustre_ldlm_fl_cancel = -1;
-static int hf_lustre_ldlm_fl_local_only = -1;
-static int hf_lustre_ldlm_fl_failed = -1;
-static int hf_lustre_ldlm_fl_canceling = -1;
-static int hf_lustre_ldlm_fl_local = -1;
-static int hf_lustre_ldlm_fl_lvb_ready = -1;
-static int hf_lustre_ldlm_fl_kms_ignore = -1;
-static int hf_lustre_ldlm_fl_cp_reqd = -1;
-static int hf_lustre_ldlm_fl_cleaned = -1;
-static int hf_lustre_ldlm_fl_atomic_cb = -1;
-static int hf_lustre_ldlm_fl_bl_ast = -1;
-static int hf_lustre_ldlm_fl_bl_done = -1;
-static int hf_lustre_ldlm_fl_no_lru = -1;
-static int hf_lustre_ldlm_fl_fail_notified = -1;
-static int hf_lustre_ldlm_fl_destroyed = -1;
-static int hf_lustre_ldlm_fl_server_lock = -1;
-static int hf_lustre_ldlm_fl_res_locked = -1;
-static int hf_lustre_ldlm_fl_waited = -1;
-static int hf_lustre_ldlm_fl_ns_srv = -1;
-static int hf_lustre_ldlm_fl_excl = -1;
-
-const value_string lustre_ldlm_flags_vals[] = {
- {LDLM_FL_LOCK_CHANGED, "LDLM_FL_LOCK_CHANGED"},
- {LDLM_FL_BLOCK_GRANTED, "LDLM_FL_BLOCK_GRANTED"},
- {LDLM_FL_BLOCK_CONV, "LDLM_FL_BLOCK_CONV"},
- {LDLM_FL_BLOCK_WAIT, "LDLM_FL_BLOCK_WAIT"},
- {LDLM_FL_AST_SENT, "LDLM_FL_AST_SENT"},
- {LDLM_FL_REPLAY, "LDLM_FL_REPLAY"},
- {LDLM_FL_INTENT_ONLY, "LDLM_FL_INTENT_ONLY"},
- {LDLM_FL_HAS_INTENT, "LDLM_FL_HAS_INTENT"},
- {LDLM_FL_FLOCK_DEADLOCK, "LDLM_FL_FLOCK_DEADLOCK"},
- {LDLM_FL_DISCARD_DATA, "LDLM_FL_DISCARD_DATA"},
- {LDLM_FL_NO_TIMEOUT, "LDLM_FL_NO_TIMEOUT"},
- {LDLM_FL_BLOCK_NOWAIT, "LDLM_FL_BLOCK_NOWAIT"},
- {LDLM_FL_TEST_LOCK, "LDLM_FL_TEST_LOCK"},
- {LDLM_FL_CANCEL_ON_BLOCK, "LDLM_FL_CANCEL_ON_BLOCK"},
- {LDLM_FL_DENY_ON_CONTENTION, "LDLM_FL_DENY_ON_CONTENTION"},
- {LDLM_FL_AST_DISCARD_DATA, "LDLM_FL_AST_DISCARD_DATA"},
- {LDLM_FL_FAIL_LOC, "LDLM_FL_FAIL_LOC"},
- {LDLM_FL_SKIPPED, "LDLM_FL_SKIPPED"},
- {LDLM_FL_CBPENDING, "LDLM_FL_CBPENDING"},
- {LDLM_FL_WAIT_NOREPROC, "LDLM_FL_WAIT_NOREPROC"},
- {LDLM_FL_CANCEL, "LDLM_FL_CANCEL"},
- {LDLM_FL_LOCAL_ONLY, "LDLM_FL_LOCAL_ONLY"},
- {LDLM_FL_FAILED, "LDLM_FL_FAILED"},
- {LDLM_FL_CANCELING, "LDLM_FL_CANCELING"},
- {LDLM_FL_LOCAL, "LDLM_FL_LOCAL"},
- {LDLM_FL_LVB_READY, "LDLM_FL_LVB_READY"},
- {LDLM_FL_KMS_IGNORE, "LDLM_FL_KMS_IGNORE"},
- {LDLM_FL_CP_REQD, "LDLM_FL_CP_REQD"},
- {LDLM_FL_CLEANED, "LDLM_FL_CLEANED"},
- {LDLM_FL_ATOMIC_CB, "LDLM_FL_ATOMIC_CB"},
- {LDLM_FL_BL_AST, "LDLM_FL_BL_AST"},
- {LDLM_FL_BL_DONE, "LDLM_FL_BL_DONE"},
- {LDLM_FL_NO_LRU, "LDLM_FL_NO_LRU"},
- {LDLM_FL_FAIL_NOTIFIED, "LDLM_FL_FAIL_NOTIFIED"},
- {LDLM_FL_DESTROYED, "LDLM_FL_DESTROYED"},
- {LDLM_FL_SERVER_LOCK, "LDLM_FL_SERVER_LOCK"},
- {LDLM_FL_RES_LOCKED, "LDLM_FL_RES_LOCKED"},
- {LDLM_FL_WAITED, "LDLM_FL_WAITED"},
- {LDLM_FL_NS_SRV, "LDLM_FL_NS_SRV"},
- {LDLM_FL_EXCL, "LDLM_FL_EXCL"},
- { 0, NULL }
-};
-#endif /* WIRESHARK_COMPILE */
+
#endif /* LDLM_ALL_FLAGS_MASK */
diff --git a/drivers/staging/lustre/lustre/include/lustre_fid.h b/drivers/staging/lustre/lustre/include/lustre_fid.h
index ab4a92390a43..12e8b585c2b4 100644
--- a/drivers/staging/lustre/lustre/include/lustre_fid.h
+++ b/drivers/staging/lustre/lustre/include/lustre_fid.h
@@ -308,10 +308,10 @@ static inline int fid_seq_in_fldb(__u64 seq)
fid_seq_is_root(seq) || fid_seq_is_dot(seq);
}
-static inline void lu_last_id_fid(struct lu_fid *fid, __u64 seq)
+static inline void lu_last_id_fid(struct lu_fid *fid, __u64 seq, __u32 ost_idx)
{
if (fid_seq_is_mdt0(seq)) {
- fid->f_seq = fid_idif_seq(0, 0);
+ fid->f_seq = fid_idif_seq(0, ost_idx);
} else {
LASSERTF(fid_seq_is_norm(seq) || fid_seq_is_echo(seq) ||
fid_seq_is_idif(seq), "%#llx\n", seq);
@@ -498,19 +498,6 @@ static inline void ostid_build_res_name(struct ost_id *oi,
}
}
-static inline void ostid_res_name_to_id(struct ost_id *oi,
- struct ldlm_res_id *name)
-{
- if (fid_seq_is_mdt0(name->name[LUSTRE_RES_ID_SEQ_OFF])) {
- /* old resid */
- ostid_set_seq(oi, name->name[LUSTRE_RES_ID_VER_OID_OFF]);
- ostid_set_id(oi, name->name[LUSTRE_RES_ID_SEQ_OFF]);
- } else {
- /* new resid */
- fid_extract_from_res_name(&oi->oi_fid, name);
- }
-}
-
/**
* Return true if the resource is for the object identified by this id & group.
*/
@@ -546,7 +533,8 @@ static inline void ost_fid_build_resid(const struct lu_fid *fid,
}
static inline void ost_fid_from_resid(struct lu_fid *fid,
- const struct ldlm_res_id *name)
+ const struct ldlm_res_id *name,
+ int ost_idx)
{
if (fid_seq_is_mdt0(name->name[LUSTRE_RES_ID_VER_OID_OFF])) {
/* old resid */
@@ -554,7 +542,7 @@ static inline void ost_fid_from_resid(struct lu_fid *fid,
ostid_set_seq(&oi, name->name[LUSTRE_RES_ID_VER_OID_OFF]);
ostid_set_id(&oi, name->name[LUSTRE_RES_ID_SEQ_OFF]);
- ostid_to_fid(fid, &oi, 0);
+ ostid_to_fid(fid, &oi, ost_idx);
} else {
/* new resid */
fid_extract_from_res_name(fid, name);
diff --git a/drivers/staging/lustre/lustre/include/lustre_import.h b/drivers/staging/lustre/lustre/include/lustre_import.h
index dac2d84d8266..8325c82b3ebf 100644
--- a/drivers/staging/lustre/lustre/include/lustre_import.h
+++ b/drivers/staging/lustre/lustre/include/lustre_import.h
@@ -109,7 +109,7 @@ static inline char *ptlrpc_import_state_name(enum lustre_imp_state state)
"RECOVER", "FULL", "EVICTED",
};
- LASSERT (state <= LUSTRE_IMP_EVICTED);
+ LASSERT(state <= LUSTRE_IMP_EVICTED);
return import_state_names[state];
}
diff --git a/drivers/staging/lustre/lustre/include/lustre_lib.h b/drivers/staging/lustre/lustre/include/lustre_lib.h
index f2223d55850a..00b976766aef 100644
--- a/drivers/staging/lustre/lustre/include/lustre_lib.h
+++ b/drivers/staging/lustre/lustre/include/lustre_lib.h
@@ -280,16 +280,16 @@ static inline void obd_ioctl_freedata(char *buf, int len)
#define OBD_IOC_DATA_TYPE long
#define OBD_IOC_CREATE _IOWR('f', 101, OBD_IOC_DATA_TYPE)
-#define OBD_IOC_DESTROY _IOW ('f', 104, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_DESTROY _IOW('f', 104, OBD_IOC_DATA_TYPE)
#define OBD_IOC_PREALLOCATE _IOWR('f', 105, OBD_IOC_DATA_TYPE)
-#define OBD_IOC_SETATTR _IOW ('f', 107, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_SETATTR _IOW('f', 107, OBD_IOC_DATA_TYPE)
#define OBD_IOC_GETATTR _IOWR ('f', 108, OBD_IOC_DATA_TYPE)
#define OBD_IOC_READ _IOWR('f', 109, OBD_IOC_DATA_TYPE)
#define OBD_IOC_WRITE _IOWR('f', 110, OBD_IOC_DATA_TYPE)
#define OBD_IOC_STATFS _IOWR('f', 113, OBD_IOC_DATA_TYPE)
-#define OBD_IOC_SYNC _IOW ('f', 114, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_SYNC _IOW('f', 114, OBD_IOC_DATA_TYPE)
#define OBD_IOC_READ2 _IOWR('f', 115, OBD_IOC_DATA_TYPE)
#define OBD_IOC_FORMAT _IOWR('f', 116, OBD_IOC_DATA_TYPE)
#define OBD_IOC_PARTITION _IOWR('f', 117, OBD_IOC_DATA_TYPE)
@@ -308,13 +308,13 @@ static inline void obd_ioctl_freedata(char *buf, int len)
#define OBD_IOC_GETDTNAME OBD_IOC_GETNAME
#define OBD_IOC_LOV_GET_CONFIG _IOWR('f', 132, OBD_IOC_DATA_TYPE)
-#define OBD_IOC_CLIENT_RECOVER _IOW ('f', 133, OBD_IOC_DATA_TYPE)
-#define OBD_IOC_PING_TARGET _IOW ('f', 136, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_CLIENT_RECOVER _IOW('f', 133, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_PING_TARGET _IOW('f', 136, OBD_IOC_DATA_TYPE)
#define OBD_IOC_DEC_FS_USE_COUNT _IO ('f', 139)
-#define OBD_IOC_NO_TRANSNO _IOW ('f', 140, OBD_IOC_DATA_TYPE)
-#define OBD_IOC_SET_READONLY _IOW ('f', 141, OBD_IOC_DATA_TYPE)
-#define OBD_IOC_ABORT_RECOVERY _IOR ('f', 142, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_NO_TRANSNO _IOW('f', 140, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_SET_READONLY _IOW('f', 141, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_ABORT_RECOVERY _IOR('f', 142, OBD_IOC_DATA_TYPE)
#define OBD_IOC_ROOT_SQUASH _IOWR('f', 143, OBD_IOC_DATA_TYPE)
@@ -324,27 +324,27 @@ static inline void obd_ioctl_freedata(char *buf, int len)
#define OBD_IOC_CLOSE_UUID _IOWR ('f', 147, OBD_IOC_DATA_TYPE)
-#define OBD_IOC_CHANGELOG_SEND _IOW ('f', 148, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_CHANGELOG_SEND _IOW('f', 148, OBD_IOC_DATA_TYPE)
#define OBD_IOC_GETDEVICE _IOWR ('f', 149, OBD_IOC_DATA_TYPE)
#define OBD_IOC_FID2PATH _IOWR ('f', 150, OBD_IOC_DATA_TYPE)
/* see also <lustre/lustre_user.h> for ioctls 151-153 */
/* OBD_IOC_LOV_SETSTRIPE: See also LL_IOC_LOV_SETSTRIPE */
-#define OBD_IOC_LOV_SETSTRIPE _IOW ('f', 154, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_LOV_SETSTRIPE _IOW('f', 154, OBD_IOC_DATA_TYPE)
/* OBD_IOC_LOV_GETSTRIPE: See also LL_IOC_LOV_GETSTRIPE */
-#define OBD_IOC_LOV_GETSTRIPE _IOW ('f', 155, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_LOV_GETSTRIPE _IOW('f', 155, OBD_IOC_DATA_TYPE)
/* OBD_IOC_LOV_SETEA: See also LL_IOC_LOV_SETEA */
-#define OBD_IOC_LOV_SETEA _IOW ('f', 156, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_LOV_SETEA _IOW('f', 156, OBD_IOC_DATA_TYPE)
/* see <lustre/lustre_user.h> for ioctls 157-159 */
/* OBD_IOC_QUOTACHECK: See also LL_IOC_QUOTACHECK */
-#define OBD_IOC_QUOTACHECK _IOW ('f', 160, int)
+#define OBD_IOC_QUOTACHECK _IOW('f', 160, int)
/* OBD_IOC_POLL_QUOTACHECK: See also LL_IOC_POLL_QUOTACHECK */
-#define OBD_IOC_POLL_QUOTACHECK _IOR ('f', 161, struct if_quotacheck *)
+#define OBD_IOC_POLL_QUOTACHECK _IOR('f', 161, struct if_quotacheck *)
/* OBD_IOC_QUOTACTL: See also LL_IOC_QUOTACTL */
#define OBD_IOC_QUOTACTL _IOWR('f', 162, struct if_quotactl)
/* see also <lustre/lustre_user.h> for ioctls 163-176 */
-#define OBD_IOC_CHANGELOG_REG _IOW ('f', 177, struct obd_ioctl_data)
-#define OBD_IOC_CHANGELOG_DEREG _IOW ('f', 178, struct obd_ioctl_data)
-#define OBD_IOC_CHANGELOG_CLEAR _IOW ('f', 179, struct obd_ioctl_data)
+#define OBD_IOC_CHANGELOG_REG _IOW('f', 177, struct obd_ioctl_data)
+#define OBD_IOC_CHANGELOG_DEREG _IOW('f', 178, struct obd_ioctl_data)
+#define OBD_IOC_CHANGELOG_CLEAR _IOW('f', 179, struct obd_ioctl_data)
#define OBD_IOC_RECORD _IOWR('f', 180, OBD_IOC_DATA_TYPE)
#define OBD_IOC_ENDRECORD _IOWR('f', 181, OBD_IOC_DATA_TYPE)
#define OBD_IOC_PARSE _IOWR('f', 182, OBD_IOC_DATA_TYPE)
@@ -352,7 +352,7 @@ static inline void obd_ioctl_freedata(char *buf, int len)
#define OBD_IOC_PROCESS_CFG _IOWR('f', 184, OBD_IOC_DATA_TYPE)
#define OBD_IOC_DUMP_LOG _IOWR('f', 185, OBD_IOC_DATA_TYPE)
#define OBD_IOC_CLEAR_LOG _IOWR('f', 186, OBD_IOC_DATA_TYPE)
-#define OBD_IOC_PARAM _IOW ('f', 187, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_PARAM _IOW('f', 187, OBD_IOC_DATA_TYPE)
#define OBD_IOC_POOL _IOWR('f', 188, OBD_IOC_DATA_TYPE)
#define OBD_IOC_REPLACE_NIDS _IOWR('f', 189, OBD_IOC_DATA_TYPE)
@@ -522,6 +522,28 @@ struct l_wait_info {
sigmask(SIGTERM) | sigmask(SIGQUIT) | \
sigmask(SIGALRM))
+/**
+ * wait_queue_t of Linux (version < 2.6.34) is a FIFO list for exclusively
+ * waiting threads, which is not always desirable because all threads will
+ * be waken up again and again, even user only needs a few of them to be
+ * active most time. This is not good for performance because cache can
+ * be polluted by different threads.
+ *
+ * LIFO list can resolve this problem because we always wakeup the most
+ * recent active thread by default.
+ *
+ * NB: please don't call non-exclusive & exclusive wait on the same
+ * waitq if add_wait_queue_exclusive_head is used.
+ */
+#define add_wait_queue_exclusive_head(waitq, link) \
+{ \
+ unsigned long flags; \
+ \
+ spin_lock_irqsave(&((waitq)->lock), flags); \
+ __add_wait_queue_exclusive(waitq, link); \
+ spin_unlock_irqrestore(&((waitq)->lock), flags); \
+}
+
/*
* wait for @condition to become true, but no longer than timeout, specified
* by @info.
@@ -578,7 +600,7 @@ do { \
\
if (condition) \
break; \
- if (cfs_signal_pending()) { \
+ if (signal_pending(current)) { \
if (info->lwi_on_signal && \
(__timeout == 0 || __allow_intr)) { \
if (info->lwi_on_signal != LWI_ON_SIGNAL_NOOP) \
diff --git a/drivers/staging/lustre/lustre/include/lustre_mdc.h b/drivers/staging/lustre/lustre/include/lustre_mdc.h
index af77eb359c43..f267ff8a6ec8 100644
--- a/drivers/staging/lustre/lustre/include/lustre_mdc.h
+++ b/drivers/staging/lustre/lustre/include/lustre_mdc.h
@@ -64,9 +64,27 @@ struct obd_export;
struct ptlrpc_request;
struct obd_device;
+/**
+ * Serializes in-flight MDT-modifying RPC requests to preserve idempotency.
+ *
+ * This mutex is used to implement execute-once semantics on the MDT.
+ * The MDT stores the last transaction ID and result for every client in
+ * its last_rcvd file. If the client doesn't get a reply, it can safely
+ * resend the request and the MDT will reconstruct the reply being aware
+ * that the request has already been executed. Without this lock,
+ * execution status of concurrent in-flight requests would be
+ * overwritten.
+ *
+ * This design limits the extent to which we can keep a full pipeline of
+ * in-flight requests from a single client. This limitation could be
+ * overcome by allowing multiple slots per client in the last_rcvd file.
+ */
struct mdc_rpc_lock {
+ /** Lock protecting in-flight RPC concurrency. */
struct mutex rpcl_mutex;
+ /** Intent associated with currently executing request. */
struct lookup_intent *rpcl_it;
+ /** Used for MDS/RPC load testing purposes. */
int rpcl_fakes;
};
diff --git a/drivers/staging/lustre/lustre/include/lustre_net.h b/drivers/staging/lustre/lustre/include/lustre_net.h
index 69586a522eb7..a7973d5de168 100644
--- a/drivers/staging/lustre/lustre/include/lustre_net.h
+++ b/drivers/staging/lustre/lustre/include/lustre_net.h
@@ -1327,7 +1327,9 @@ struct ptlrpc_request {
/* allow the req to be sent if the import is in recovery
* status
*/
- rq_allow_replay:1;
+ rq_allow_replay:1,
+ /* bulk request, sent to server, but uncommitted */
+ rq_unstable:1;
unsigned int rq_nr_resend;
diff --git a/drivers/staging/lustre/lustre/include/lustre_param.h b/drivers/staging/lustre/lustre/include/lustre_param.h
index 383fe6febe4b..a42cf90c1cd8 100644
--- a/drivers/staging/lustre/lustre/include/lustre_param.h
+++ b/drivers/staging/lustre/lustre/include/lustre_param.h
@@ -89,6 +89,7 @@ int class_parse_nid_quiet(char *buf, lnet_nid_t *nid, char **endh);
/* Prefixes for parameters handled by obd's proc methods (XXX_process_config) */
#define PARAM_OST "ost."
+#define PARAM_OSD "osd."
#define PARAM_OSC "osc."
#define PARAM_MDT "mdt."
#define PARAM_MDD "mdd."
diff --git a/drivers/staging/lustre/lustre/include/lustre_req_layout.h b/drivers/staging/lustre/lustre/include/lustre_req_layout.h
index b2e67fcf9ef1..0aac4391ea16 100644
--- a/drivers/staging/lustre/lustre/include/lustre_req_layout.h
+++ b/drivers/staging/lustre/lustre/include/lustre_req_layout.h
@@ -137,6 +137,7 @@ extern struct req_format RQF_MGS_CONFIG_READ;
/* fid/fld req_format */
extern struct req_format RQF_SEQ_QUERY;
extern struct req_format RQF_FLD_QUERY;
+extern struct req_format RQF_FLD_READ;
/* MDS req_format */
extern struct req_format RQF_MDS_CONNECT;
extern struct req_format RQF_MDS_DISCONNECT;
@@ -199,7 +200,7 @@ extern struct req_format RQF_OST_BRW_READ;
extern struct req_format RQF_OST_BRW_WRITE;
extern struct req_format RQF_OST_STATFS;
extern struct req_format RQF_OST_SET_GRANT_INFO;
-extern struct req_format RQF_OST_GET_INFO_GENERIC;
+extern struct req_format RQF_OST_GET_INFO;
extern struct req_format RQF_OST_GET_INFO_LAST_ID;
extern struct req_format RQF_OST_GET_INFO_LAST_FID;
extern struct req_format RQF_OST_SET_INFO_LAST_FID;
diff --git a/drivers/staging/lustre/lustre/include/obd.h b/drivers/staging/lustre/lustre/include/obd.h
index 4264d97650ec..2d926e0ee647 100644
--- a/drivers/staging/lustre/lustre/include/obd.h
+++ b/drivers/staging/lustre/lustre/include/obd.h
@@ -37,7 +37,7 @@
#ifndef __OBD_H
#define __OBD_H
-#include "linux/obd.h"
+#include <linux/spinlock.h>
#define IOC_OSC_TYPE 'h'
#define IOC_OSC_MIN_NR 20
@@ -54,6 +54,7 @@
#include "lustre_export.h"
#include "lustre_fid.h"
#include "lustre_fld.h"
+#include "lustre_intent.h"
#define MAX_OBD_DEVICES 8192
@@ -165,9 +166,6 @@ struct obd_info {
obd_enqueue_update_f oi_cb_up;
};
-void lov_stripe_lock(struct lov_stripe_md *md);
-void lov_stripe_unlock(struct lov_stripe_md *md);
-
struct obd_type {
struct list_head typ_chain;
struct obd_ops *typ_dt_ops;
@@ -293,14 +291,10 @@ struct client_obd {
* blocking everywhere, but we don't want to slow down fast-path of
* our main platform.)
*
- * Exact type of ->cl_loi_list_lock is defined in arch/obd.h together
- * with client_obd_list_{un,}lock() and
- * client_obd_list_lock_{init,done}() functions.
- *
* NB by Jinshan: though field names are still _loi_, but actually
* osc_object{}s are in the list.
*/
- struct client_obd_lock cl_loi_list_lock;
+ spinlock_t cl_loi_list_lock;
struct list_head cl_loi_ready_list;
struct list_head cl_loi_hp_ready_list;
struct list_head cl_loi_write_list;
@@ -327,7 +321,8 @@ struct client_obd {
atomic_t cl_lru_shrinkers;
atomic_t cl_lru_in_list;
struct list_head cl_lru_list; /* lru page list */
- struct client_obd_lock cl_lru_list_lock; /* page list protector */
+ spinlock_t cl_lru_list_lock; /* page list protector */
+ atomic_t cl_unstable_count;
/* number of in flight destroy rpcs is limited to max_rpcs_in_flight */
atomic_t cl_destroy_in_flight;
@@ -364,6 +359,7 @@ struct client_obd {
/* ptlrpc work for writeback in ptlrpcd context */
void *cl_writeback_work;
+ void *cl_lru_work;
/* hash tables for osc_quota_info */
struct cfs_hash *cl_quota_hash[MAXQUOTAS];
};
@@ -391,45 +387,9 @@ struct ost_pool {
struct rw_semaphore op_rw_sem; /* to protect ost_pool use */
};
-/* Round-robin allocator data */
-struct lov_qos_rr {
- __u32 lqr_start_idx; /* start index of new inode */
- __u32 lqr_offset_idx; /* aliasing for start_idx */
- int lqr_start_count; /* reseed counter */
- struct ost_pool lqr_pool; /* round-robin optimized list */
- unsigned long lqr_dirty:1; /* recalc round-robin list */
-};
-
/* allow statfs data caching for 1 second */
#define OBD_STATFS_CACHE_SECONDS 1
-struct lov_statfs_data {
- struct obd_info lsd_oi;
- struct obd_statfs lsd_statfs;
-};
-
-/* Stripe placement optimization */
-struct lov_qos {
- struct list_head lq_oss_list; /* list of OSSs that targets use */
- struct rw_semaphore lq_rw_sem;
- __u32 lq_active_oss_count;
- unsigned int lq_prio_free; /* priority for free space */
- unsigned int lq_threshold_rr;/* priority for rr */
- struct lov_qos_rr lq_rr; /* round robin qos data */
- unsigned long lq_dirty:1, /* recalc qos data */
- lq_same_space:1,/* the ost's all have approx.
- * the same space avail
- */
- lq_reset:1, /* zero current penalties */
- lq_statfs_in_progress:1; /* statfs op in
- progress */
- /* qos statfs data */
- struct lov_statfs_data *lq_statfs_data;
- wait_queue_head_t lq_statfs_waitq; /* waitqueue to notify statfs
- * requests completion
- */
-};
-
struct lov_tgt_desc {
struct list_head ltd_kill;
struct obd_uuid ltd_uuid;
@@ -442,25 +402,6 @@ struct lov_tgt_desc {
ltd_reap:1; /* should this target be deleted */
};
-/* Pool metadata */
-#define pool_tgt_size(_p) _p->pool_obds.op_size
-#define pool_tgt_count(_p) _p->pool_obds.op_count
-#define pool_tgt_array(_p) _p->pool_obds.op_array
-#define pool_tgt_rw_sem(_p) _p->pool_obds.op_rw_sem
-
-struct pool_desc {
- char pool_name[LOV_MAXPOOLNAME + 1]; /* name of pool */
- struct ost_pool pool_obds; /* pool members */
- atomic_t pool_refcount; /* pool ref. counter */
- struct lov_qos_rr pool_rr; /* round robin qos */
- struct hlist_node pool_hash; /* access by poolname */
- struct list_head pool_list; /* serial access */
- struct dentry *pool_debugfs_entry; /* file in debugfs */
- struct obd_device *pool_lobd; /* obd of the lov/lod to which
- * this pool belongs
- */
-};
-
struct lov_obd {
struct lov_desc desc;
struct lov_tgt_desc **lov_tgts; /* sparse array */
@@ -468,8 +409,6 @@ struct lov_obd {
struct mutex lov_lock;
struct obd_connect_data lov_ocd;
atomic_t lov_refcount;
- __u32 lov_tgt_count; /* how many OBD's */
- __u32 lov_active_tgt_count; /* how many active */
__u32 lov_death_row;/* tgts scheduled to be deleted */
__u32 lov_tgt_size; /* size of tgts array */
int lov_connects;
@@ -479,7 +418,7 @@ struct lov_obd {
struct dentry *lov_pool_debugfs_entry;
enum lustre_sec_part lov_sp_me;
- /* Cached LRU pages from upper layer */
+ /* Cached LRU and unstable data from upper layer */
void *lov_cache;
struct rw_semaphore lov_notify_lock;
@@ -511,7 +450,7 @@ struct lmv_obd {
struct obd_uuid cluuid;
struct obd_export *exp;
- struct mutex init_mutex;
+ struct mutex lmv_init_mutex;
int connected;
int max_easize;
int max_def_easize;
diff --git a/drivers/staging/lustre/lustre/include/obd_cksum.h b/drivers/staging/lustre/lustre/include/obd_cksum.h
index 637fa22110a4..f6c18df906a8 100644
--- a/drivers/staging/lustre/lustre/include/obd_cksum.h
+++ b/drivers/staging/lustre/lustre/include/obd_cksum.h
@@ -35,6 +35,7 @@
#ifndef __OBD_CKSUM
#define __OBD_CKSUM
#include "../../include/linux/libcfs/libcfs.h"
+#include "../../include/linux/libcfs/libcfs_crypto.h"
#include "lustre/lustre_idl.h"
static inline unsigned char cksum_obd2cfs(enum cksum_type cksum_type)
diff --git a/drivers/staging/lustre/lustre/include/obd_class.h b/drivers/staging/lustre/lustre/include/obd_class.h
index 706869f8c98f..32863bcb30b9 100644
--- a/drivers/staging/lustre/lustre/include/obd_class.h
+++ b/drivers/staging/lustre/lustre/include/obd_class.h
@@ -477,7 +477,7 @@ static inline int obd_setup(struct obd_device *obd, struct lustre_cfg *cfg)
struct lu_context session_ctx;
struct lu_env env;
- lu_context_init(&session_ctx, LCT_SESSION);
+ lu_context_init(&session_ctx, LCT_SESSION | LCT_SERVER_SESSION);
session_ctx.lc_thread = NULL;
lu_context_enter(&session_ctx);
@@ -490,8 +490,9 @@ static inline int obd_setup(struct obd_device *obd, struct lustre_cfg *cfg)
obd->obd_lu_dev = d;
d->ld_obd = obd;
rc = 0;
- } else
+ } else {
rc = PTR_ERR(d);
+ }
}
lu_context_exit(&session_ctx);
lu_context_fini(&session_ctx);
diff --git a/drivers/staging/lustre/lustre/include/obd_support.h b/drivers/staging/lustre/lustre/include/obd_support.h
index f8ee3a3254ba..60034d39b00d 100644
--- a/drivers/staging/lustre/lustre/include/obd_support.h
+++ b/drivers/staging/lustre/lustre/include/obd_support.h
@@ -58,6 +58,7 @@ extern int at_early_margin;
extern int at_extra;
extern unsigned int obd_sync_filter;
extern unsigned int obd_max_dirty_pages;
+extern atomic_t obd_unstable_pages;
extern atomic_t obd_dirty_pages;
extern atomic_t obd_dirty_transit_pages;
extern char obd_jobid_var[];
@@ -289,6 +290,7 @@ extern char obd_jobid_var[];
#define OBD_FAIL_OST_ENOINO 0x229
#define OBD_FAIL_OST_DQACQ_NET 0x230
#define OBD_FAIL_OST_STATFS_EINPROGRESS 0x231
+#define OBD_FAIL_OST_SET_INFO_NET 0x232
#define OBD_FAIL_LDLM 0x300
#define OBD_FAIL_LDLM_NAMESPACE_NEW 0x301
@@ -319,6 +321,7 @@ extern char obd_jobid_var[];
#define OBD_FAIL_LDLM_AGL_DELAY 0x31a
#define OBD_FAIL_LDLM_AGL_NOLOCK 0x31b
#define OBD_FAIL_LDLM_OST_LVB 0x31c
+#define OBD_FAIL_LDLM_ENQUEUE_HANG 0x31d
/* LOCKLESS IO */
#define OBD_FAIL_LDLM_SET_CONTENTION 0x385
@@ -426,6 +429,7 @@ extern char obd_jobid_var[];
#define OBD_FAIL_FLD 0x1100
#define OBD_FAIL_FLD_QUERY_NET 0x1101
+#define OBD_FAIL_FLD_READ_NET 0x1102
#define OBD_FAIL_SEC_CTX 0x1200
#define OBD_FAIL_SEC_CTX_INIT_NET 0x1201