diff options
Diffstat (limited to 'drivers/staging/lustre/lustre/include')
21 files changed, 436 insertions, 1651 deletions
diff --git a/drivers/staging/lustre/lustre/include/cl_object.h b/drivers/staging/lustre/lustre/include/cl_object.h index fb971ded5a1b..d4c33dd110ab 100644 --- a/drivers/staging/lustre/lustre/include/cl_object.h +++ b/drivers/staging/lustre/lustre/include/cl_object.h @@ -82,7 +82,6 @@ * - i_mutex * - PG_locked * - cl_object_header::coh_page_guard - * - cl_object_header::coh_lock_guard * - lu_site::ls_guard * * See the top comment in cl_object.c for the description of overall locking and @@ -98,9 +97,12 @@ * super-class definitions. */ #include "lu_object.h" +#include <linux/atomic.h> #include "linux/lustre_compat25.h" #include <linux/mutex.h> #include <linux/radix-tree.h> +#include <linux/spinlock.h> +#include <linux/wait.h> struct inode; @@ -138,7 +140,7 @@ struct cl_device_operations { * cl_req_slice_add(). * * \see osc_req_init(), lov_req_init(), lovsub_req_init() - * \see ccc_req_init() + * \see vvp_req_init() */ int (*cdo_req_init)(const struct lu_env *env, struct cl_device *dev, struct cl_req *req); @@ -147,7 +149,7 @@ struct cl_device_operations { /** * Device in the client stack. * - * \see ccc_device, lov_device, lovsub_device, osc_device + * \see vvp_device, lov_device, lovsub_device, osc_device */ struct cl_device { /** Super-class. */ @@ -243,7 +245,7 @@ enum cl_attr_valid { * be discarded from the memory, all its sub-objects are torn-down and * destroyed too. * - * \see ccc_object, lov_object, lovsub_object, osc_object + * \see vvp_object, lov_object, lovsub_object, osc_object */ struct cl_object { /** super class */ @@ -322,7 +324,7 @@ struct cl_object_operations { * to be used instead of newly created. */ int (*coo_page_init)(const struct lu_env *env, struct cl_object *obj, - struct cl_page *page, struct page *vmpage); + struct cl_page *page, pgoff_t index); /** * Initialize lock slice for this layer. Called top-to-bottom through * every object layer when a new cl_lock is instantiated. Layer @@ -383,11 +385,17 @@ struct cl_object_operations { * object. Layers are supposed to fill parts of \a lvb that will be * shipped to the glimpse originator as a glimpse result. * - * \see ccc_object_glimpse(), lovsub_object_glimpse(), + * \see vvp_object_glimpse(), lovsub_object_glimpse(), * \see osc_object_glimpse() */ int (*coo_glimpse)(const struct lu_env *env, const struct cl_object *obj, struct ost_lvb *lvb); + /** + * Object prune method. Called when the layout is going to change on + * this object, therefore each layer has to clean up their cache, + * mainly pages and locks. + */ + int (*coo_prune)(const struct lu_env *env, struct cl_object *obj); }; /** @@ -398,22 +406,6 @@ struct cl_object_header { * here. */ struct lu_object_header coh_lu; - /** \name locks - * \todo XXX move locks below to the separate cache-lines, they are - * mostly useless otherwise. - */ - /** @{ */ - /** Lock protecting page tree. */ - spinlock_t coh_page_guard; - /** Lock protecting lock list. */ - spinlock_t coh_lock_guard; - /** @} locks */ - /** Radix tree of cl_page's, cached for this object. */ - struct radix_tree_root coh_tree; - /** # of pages in radix tree. */ - unsigned long coh_pages; - /** List of cl_lock's granted for this object. */ - struct list_head coh_locks; /** * Parent object. It is assumed that an object has a well-defined @@ -460,10 +452,6 @@ struct cl_object_header { co_lu.lo_linkage) /** @} cl_object */ -#ifndef pgoff_t -#define pgoff_t unsigned long -#endif - #define CL_PAGE_EOF ((pgoff_t)~0ull) /** \addtogroup cl_page cl_page @@ -727,16 +715,10 @@ struct cl_page { atomic_t cp_ref; /** An object this page is a part of. Immutable after creation. */ struct cl_object *cp_obj; - /** Logical page index within the object. Immutable after creation. */ - pgoff_t cp_index; /** List of slices. Immutable after creation. */ struct list_head cp_layers; - /** Parent page, NULL for top-level page. Immutable after creation. */ - struct cl_page *cp_parent; - /** Lower-layer page. NULL for bottommost page. Immutable after - * creation. - */ - struct cl_page *cp_child; + /** vmpage */ + struct page *cp_vmpage; /** * Page state. This field is const to avoid accidental update, it is * modified only internally within cl_page.c. Protected by a VM lock. @@ -787,10 +769,11 @@ struct cl_page { /** * Per-layer part of cl_page. * - * \see ccc_page, lov_page, osc_page + * \see vvp_page, lov_page, osc_page */ struct cl_page_slice { struct cl_page *cpl_page; + pgoff_t cpl_index; /** * Object slice corresponding to this page slice. Immutable after * creation. @@ -804,16 +787,9 @@ struct cl_page_slice { /** * Lock mode. For the client extent locks. * - * \warning: cl_lock_mode_match() assumes particular ordering here. * \ingroup cl_lock */ enum cl_lock_mode { - /** - * Mode of a lock that protects no data, and exists only as a - * placeholder. This is used for `glimpse' requests. A phantom lock - * might get promoted to real lock at some point. - */ - CLM_PHANTOM, CLM_READ, CLM_WRITE, CLM_GROUP @@ -846,11 +822,6 @@ struct cl_page_operations { */ /** - * \return the underlying VM page. Optional. - */ - struct page *(*cpo_vmpage)(const struct lu_env *env, - const struct cl_page_slice *slice); - /** * Called when \a io acquires this page into the exclusive * ownership. When this method returns, it is guaranteed that the is * not owned by other io, and no transfer is going on against @@ -897,14 +868,6 @@ struct cl_page_operations { void (*cpo_export)(const struct lu_env *env, const struct cl_page_slice *slice, int uptodate); /** - * Unmaps page from the user space (if it is mapped). - * - * \see cl_page_unmap() - * \see vvp_page_unmap() - */ - int (*cpo_unmap)(const struct lu_env *env, - const struct cl_page_slice *slice, struct cl_io *io); - /** * Checks whether underlying VM page is locked (in the suitable * sense). Used for assertions. * @@ -957,7 +920,7 @@ struct cl_page_operations { */ int (*cpo_is_under_lock)(const struct lu_env *env, const struct cl_page_slice *slice, - struct cl_io *io); + struct cl_io *io, pgoff_t *max); /** * Optional debugging helper. Prints given page slice. @@ -1027,26 +990,6 @@ struct cl_page_operations { */ int (*cpo_make_ready)(const struct lu_env *env, const struct cl_page_slice *slice); - /** - * Announce that this page is to be written out - * opportunistically, that is, page is dirty, it is not - * necessary to start write-out transfer right now, but - * eventually page has to be written out. - * - * Main caller of this is the write path (see - * vvp_io_commit_write()), using this method to build a - * "transfer cache" from which large transfers are then - * constructed by the req-formation engine. - * - * \todo XXX it would make sense to add page-age tracking - * semantics here, and to oblige the req-formation engine to - * send the page out not later than it is too old. - * - * \see cl_page_cache_add() - */ - int (*cpo_cache_add)(const struct lu_env *env, - const struct cl_page_slice *slice, - struct cl_io *io); } io[CRT_NR]; /** * Tell transfer engine that only [to, from] part of a page should be @@ -1098,9 +1041,8 @@ struct cl_page_operations { */ #define CL_PAGE_DEBUG(mask, env, page, format, ...) \ do { \ - LIBCFS_DEBUG_MSG_DATA_DECL(msgdata, mask, NULL); \ - \ if (cfs_cdebug_show(mask, DEBUG_SUBSYSTEM)) { \ + LIBCFS_DEBUG_MSG_DATA_DECL(msgdata, mask, NULL); \ cl_page_print(env, &msgdata, lu_cdebug_printer, page); \ CDEBUG(mask, format, ## __VA_ARGS__); \ } \ @@ -1111,9 +1053,8 @@ do { \ */ #define CL_PAGE_HEADER(mask, env, page, format, ...) \ do { \ - LIBCFS_DEBUG_MSG_DATA_DECL(msgdata, mask, NULL); \ - \ if (cfs_cdebug_show(mask, DEBUG_SUBSYSTEM)) { \ + LIBCFS_DEBUG_MSG_DATA_DECL(msgdata, mask, NULL); \ cl_page_header_print(env, &msgdata, lu_cdebug_printer, page); \ CDEBUG(mask, format, ## __VA_ARGS__); \ } \ @@ -1130,6 +1071,12 @@ static inline int __page_in_use(const struct cl_page *page, int refc) #define cl_page_in_use(pg) __page_in_use(pg, 1) #define cl_page_in_use_noref(pg) __page_in_use(pg, 0) +static inline struct page *cl_page_vmpage(struct cl_page *page) +{ + LASSERT(page->cp_vmpage); + return page->cp_vmpage; +} + /** @} cl_page */ /** \addtogroup cl_lock cl_lock @@ -1150,12 +1097,6 @@ static inline int __page_in_use(const struct cl_page *page, int refc) * (struct cl_lock) and a list of layers (struct cl_lock_slice), linked to * cl_lock::cll_layers list through cl_lock_slice::cls_linkage. * - * All locks for a given object are linked into cl_object_header::coh_locks - * list (protected by cl_object_header::coh_lock_guard spin-lock) through - * cl_lock::cll_linkage. Currently this list is not sorted in any way. We can - * sort it in starting lock offset, or use altogether different data structure - * like a tree. - * * Typical cl_lock consists of the two layers: * * - vvp_lock (vvp specific data), and @@ -1177,111 +1118,29 @@ static inline int __page_in_use(const struct cl_page *page, int refc) * * LIFE CYCLE * - * cl_lock is reference counted. When reference counter drops to 0, lock is - * placed in the cache, except when lock is in CLS_FREEING state. CLS_FREEING - * lock is destroyed when last reference is released. Referencing between - * top-lock and its sub-locks is described in the lov documentation module. - * - * STATE MACHINE - * - * Also, cl_lock is a state machine. This requires some clarification. One of - * the goals of client IO re-write was to make IO path non-blocking, or at - * least to make it easier to make it non-blocking in the future. Here - * `non-blocking' means that when a system call (read, write, truncate) - * reaches a situation where it has to wait for a communication with the - * server, it should --instead of waiting-- remember its current state and - * switch to some other work. E.g,. instead of waiting for a lock enqueue, - * client should proceed doing IO on the next stripe, etc. Obviously this is - * rather radical redesign, and it is not planned to be fully implemented at - * this time, instead we are putting some infrastructure in place, that would - * make it easier to do asynchronous non-blocking IO easier in the - * future. Specifically, where old locking code goes to sleep (waiting for - * enqueue, for example), new code returns cl_lock_transition::CLO_WAIT. When - * enqueue reply comes, its completion handler signals that lock state-machine - * is ready to transit to the next state. There is some generic code in - * cl_lock.c that sleeps, waiting for these signals. As a result, for users of - * this cl_lock.c code, it looks like locking is done in normal blocking - * fashion, and it the same time it is possible to switch to the non-blocking - * locking (simply by returning cl_lock_transition::CLO_WAIT from cl_lock.c - * functions). - * - * For a description of state machine states and transitions see enum - * cl_lock_state. - * - * There are two ways to restrict a set of states which lock might move to: - * - * - placing a "hold" on a lock guarantees that lock will not be moved - * into cl_lock_state::CLS_FREEING state until hold is released. Hold - * can be only acquired on a lock that is not in - * cl_lock_state::CLS_FREEING. All holds on a lock are counted in - * cl_lock::cll_holds. Hold protects lock from cancellation and - * destruction. Requests to cancel and destroy a lock on hold will be - * recorded, but only honored when last hold on a lock is released; - * - * - placing a "user" on a lock guarantees that lock will not leave - * cl_lock_state::CLS_NEW, cl_lock_state::CLS_QUEUING, - * cl_lock_state::CLS_ENQUEUED and cl_lock_state::CLS_HELD set of - * states, once it enters this set. That is, if a user is added onto a - * lock in a state not from this set, it doesn't immediately enforce - * lock to move to this set, but once lock enters this set it will - * remain there until all users are removed. Lock users are counted in - * cl_lock::cll_users. - * - * User is used to assure that lock is not canceled or destroyed while - * it is being enqueued, or actively used by some IO. - * - * Currently, a user always comes with a hold (cl_lock_invariant() - * checks that a number of holds is not less than a number of users). - * - * CONCURRENCY - * - * This is how lock state-machine operates. struct cl_lock contains a mutex - * cl_lock::cll_guard that protects struct fields. - * - * - mutex is taken, and cl_lock::cll_state is examined. - * - * - for every state there are possible target states where lock can move - * into. They are tried in order. Attempts to move into next state are - * done by _try() functions in cl_lock.c:cl_{enqueue,unlock,wait}_try(). - * - * - if the transition can be performed immediately, state is changed, - * and mutex is released. - * - * - if the transition requires blocking, _try() function returns - * cl_lock_transition::CLO_WAIT. Caller unlocks mutex and goes to - * sleep, waiting for possibility of lock state change. It is woken - * up when some event occurs, that makes lock state change possible - * (e.g., the reception of the reply from the server), and repeats - * the loop. - * - * Top-lock and sub-lock has separate mutexes and the latter has to be taken - * first to avoid dead-lock. - * - * To see an example of interaction of all these issues, take a look at the - * lov_cl.c:lov_lock_enqueue() function. It is called as a part of - * cl_enqueue_try(), and tries to advance top-lock to ENQUEUED state, by - * advancing state-machines of its sub-locks (lov_lock_enqueue_one()). Note - * also, that it uses trylock to grab sub-lock mutex to avoid dead-lock. It - * also has to handle CEF_ASYNC enqueue, when sub-locks enqueues have to be - * done in parallel, rather than one after another (this is used for glimpse - * locks, that cannot dead-lock). + * cl_lock is a cacheless data container for the requirements of locks to + * complete the IO. cl_lock is created before I/O starts and destroyed when the + * I/O is complete. + * + * cl_lock depends on LDLM lock to fulfill lock semantics. LDLM lock is attached + * to cl_lock at OSC layer. LDLM lock is still cacheable. * * INTERFACE AND USAGE * - * struct cl_lock_operations provide a number of call-backs that are invoked - * when events of interest occurs. Layers can intercept and handle glimpse, - * blocking, cancel ASTs and a reception of the reply from the server. + * Two major methods are supported for cl_lock: clo_enqueue and clo_cancel. A + * cl_lock is enqueued by cl_lock_request(), which will call clo_enqueue() + * methods for each layer to enqueue the lock. At the LOV layer, if a cl_lock + * consists of multiple sub cl_locks, each sub locks will be enqueued + * correspondingly. At OSC layer, the lock enqueue request will tend to reuse + * cached LDLM lock; otherwise a new LDLM lock will have to be requested from + * OST side. * - * One important difference with the old client locking model is that new - * client has a representation for the top-lock, whereas in the old code only - * sub-locks existed as real data structures and file-level locks are - * represented by "request sets" that are created and destroyed on each and - * every lock creation. + * cl_lock_cancel() must be called to release a cl_lock after use. clo_cancel() + * method will be called for each layer to release the resource held by this + * lock. At OSC layer, the reference count of LDLM lock, which is held at + * clo_enqueue time, is released. * - * Top-locks are cached, and can be found in the cache by the system calls. It - * is possible that top-lock is in cache, but some of its sub-locks were - * canceled and destroyed. In that case top-lock has to be enqueued again - * before it can be used. + * LDLM lock can only be canceled if there is no cl_lock using it. * * Overall process of the locking during IO operation is as following: * @@ -1294,7 +1153,7 @@ static inline int __page_in_use(const struct cl_page *page, int refc) * * - when all locks are acquired, IO is performed; * - * - locks are released into cache. + * - locks are released after IO is complete. * * Striping introduces major additional complexity into locking. The * fundamental problem is that it is generally unsafe to actively use (hold) @@ -1316,16 +1175,6 @@ static inline int __page_in_use(const struct cl_page *page, int refc) * buf is a part of memory mapped Lustre file, a lock or locks protecting buf * has to be held together with the usual lock on [offset, offset + count]. * - * As multi-stripe locks have to be allowed, it makes sense to cache them, so - * that, for example, a sequence of O_APPEND writes can proceed quickly - * without going down to the individual stripes to do lock matching. On the - * other hand, multi-stripe locks shouldn't be used by normal read/write - * calls. To achieve this, every layer can implement ->clo_fits_into() method, - * that is called by lock matching code (cl_lock_lookup()), and that can be - * used to selectively disable matching of certain locks for certain IOs. For - * example, lov layer implements lov_lock_fits_into() that allow multi-stripe - * locks to be matched only for truncates and O_APPEND writes. - * * Interaction with DLM * * In the expected setup, cl_lock is ultimately backed up by a collection of @@ -1356,295 +1205,27 @@ struct cl_lock_descr { __u32 cld_enq_flags; }; -#define DDESCR "%s(%d):[%lu, %lu]" +#define DDESCR "%s(%d):[%lu, %lu]:%x" #define PDESCR(descr) \ cl_lock_mode_name((descr)->cld_mode), (descr)->cld_mode, \ - (descr)->cld_start, (descr)->cld_end + (descr)->cld_start, (descr)->cld_end, (descr)->cld_enq_flags const char *cl_lock_mode_name(const enum cl_lock_mode mode); /** - * Lock state-machine states. - * - * \htmlonly - * <pre> - * - * Possible state transitions: - * - * +------------------>NEW - * | | - * | | cl_enqueue_try() - * | | - * | cl_unuse_try() V - * | +--------------QUEUING (*) - * | | | - * | | | cl_enqueue_try() - * | | | - * | | cl_unuse_try() V - * sub-lock | +-------------ENQUEUED (*) - * canceled | | | - * | | | cl_wait_try() - * | | | - * | | (R) - * | | | - * | | V - * | | HELD<---------+ - * | | | | - * | | | | cl_use_try() - * | | cl_unuse_try() | | - * | | | | - * | | V ---+ - * | +------------>INTRANSIT (D) <--+ - * | | | - * | cl_unuse_try() | | cached lock found - * | | | cl_use_try() - * | | | - * | V | - * +------------------CACHED---------+ - * | - * (C) - * | - * V - * FREEING - * - * Legend: - * - * In states marked with (*) transition to the same state (i.e., a loop - * in the diagram) is possible. - * - * (R) is the point where Receive call-back is invoked: it allows layers - * to handle arrival of lock reply. - * - * (C) is the point where Cancellation call-back is invoked. - * - * (D) is the transit state which means the lock is changing. - * - * Transition to FREEING state is possible from any other state in the - * diagram in case of unrecoverable error. - * </pre> - * \endhtmlonly - * - * These states are for individual cl_lock object. Top-lock and its sub-locks - * can be in the different states. Another way to say this is that we have - * nested state-machines. - * - * Separate QUEUING and ENQUEUED states are needed to support non-blocking - * operation for locks with multiple sub-locks. Imagine lock on a file F, that - * intersects 3 stripes S0, S1, and S2. To enqueue F client has to send - * enqueue to S0, wait for its completion, then send enqueue for S1, wait for - * its completion and at last enqueue lock for S2, and wait for its - * completion. In that case, top-lock is in QUEUING state while S0, S1 are - * handled, and is in ENQUEUED state after enqueue to S2 has been sent (note - * that in this case, sub-locks move from state to state, and top-lock remains - * in the same state). - */ -enum cl_lock_state { - /** - * Lock that wasn't yet enqueued - */ - CLS_NEW, - /** - * Enqueue is in progress, blocking for some intermediate interaction - * with the other side. - */ - CLS_QUEUING, - /** - * Lock is fully enqueued, waiting for server to reply when it is - * granted. - */ - CLS_ENQUEUED, - /** - * Lock granted, actively used by some IO. - */ - CLS_HELD, - /** - * This state is used to mark the lock is being used, or unused. - * We need this state because the lock may have several sublocks, - * so it's impossible to have an atomic way to bring all sublocks - * into CLS_HELD state at use case, or all sublocks to CLS_CACHED - * at unuse case. - * If a thread is referring to a lock, and it sees the lock is in this - * state, it must wait for the lock. - * See state diagram for details. - */ - CLS_INTRANSIT, - /** - * Lock granted, not used. - */ - CLS_CACHED, - /** - * Lock is being destroyed. - */ - CLS_FREEING, - CLS_NR -}; - -enum cl_lock_flags { - /** - * lock has been cancelled. This flag is never cleared once set (by - * cl_lock_cancel0()). - */ - CLF_CANCELLED = 1 << 0, - /** cancellation is pending for this lock. */ - CLF_CANCELPEND = 1 << 1, - /** destruction is pending for this lock. */ - CLF_DOOMED = 1 << 2, - /** from enqueue RPC reply upcall. */ - CLF_FROM_UPCALL = 1 << 3, -}; - -/** - * Lock closure. - * - * Lock closure is a collection of locks (both top-locks and sub-locks) that - * might be updated in a result of an operation on a certain lock (which lock - * this is a closure of). - * - * Closures are needed to guarantee dead-lock freedom in the presence of - * - * - nested state-machines (top-lock state-machine composed of sub-lock - * state-machines), and - * - * - shared sub-locks. - * - * Specifically, many operations, such as lock enqueue, wait, unlock, - * etc. start from a top-lock, and then operate on a sub-locks of this - * top-lock, holding a top-lock mutex. When sub-lock state changes as a result - * of such operation, this change has to be propagated to all top-locks that - * share this sub-lock. Obviously, no natural lock ordering (e.g., - * top-to-bottom or bottom-to-top) captures this scenario, so try-locking has - * to be used. Lock closure systematizes this try-and-repeat logic. - */ -struct cl_lock_closure { - /** - * Lock that is mutexed when closure construction is started. When - * closure in is `wait' mode (cl_lock_closure::clc_wait), mutex on - * origin is released before waiting. - */ - struct cl_lock *clc_origin; - /** - * List of enclosed locks, so far. Locks are linked here through - * cl_lock::cll_inclosure. - */ - struct list_head clc_list; - /** - * True iff closure is in a `wait' mode. This determines what - * cl_lock_enclosure() does when a lock L to be added to the closure - * is currently mutexed by some other thread. - * - * If cl_lock_closure::clc_wait is not set, then closure construction - * fails with CLO_REPEAT immediately. - * - * In wait mode, cl_lock_enclosure() waits until next attempt to build - * a closure might succeed. To this end it releases an origin mutex - * (cl_lock_closure::clc_origin), that has to be the only lock mutex - * owned by the current thread, and then waits on L mutex (by grabbing - * it and immediately releasing), before returning CLO_REPEAT to the - * caller. - */ - int clc_wait; - /** Number of locks in the closure. */ - int clc_nr; -}; - -/** * Layered client lock. */ struct cl_lock { - /** Reference counter. */ - atomic_t cll_ref; /** List of slices. Immutable after creation. */ struct list_head cll_layers; - /** - * Linkage into cl_lock::cll_descr::cld_obj::coh_locks list. Protected - * by cl_lock::cll_descr::cld_obj::coh_lock_guard. - */ - struct list_head cll_linkage; - /** - * Parameters of this lock. Protected by - * cl_lock::cll_descr::cld_obj::coh_lock_guard nested within - * cl_lock::cll_guard. Modified only on lock creation and in - * cl_lock_modify(). - */ + /** lock attribute, extent, cl_object, etc. */ struct cl_lock_descr cll_descr; - /** Protected by cl_lock::cll_guard. */ - enum cl_lock_state cll_state; - /** signals state changes. */ - wait_queue_head_t cll_wq; - /** - * Recursive lock, most fields in cl_lock{} are protected by this. - * - * Locking rules: this mutex is never held across network - * communication, except when lock is being canceled. - * - * Lock ordering: a mutex of a sub-lock is taken first, then a mutex - * on a top-lock. Other direction is implemented through a - * try-lock-repeat loop. Mutices of unrelated locks can be taken only - * by try-locking. - * - * \see osc_lock_enqueue_wait(), lov_lock_cancel(), lov_sublock_wait(). - */ - struct mutex cll_guard; - struct task_struct *cll_guarder; - int cll_depth; - - /** - * the owner for INTRANSIT state - */ - struct task_struct *cll_intransit_owner; - int cll_error; - /** - * Number of holds on a lock. A hold prevents a lock from being - * canceled and destroyed. Protected by cl_lock::cll_guard. - * - * \see cl_lock_hold(), cl_lock_unhold(), cl_lock_release() - */ - int cll_holds; - /** - * Number of lock users. Valid in cl_lock_state::CLS_HELD state - * only. Lock user pins lock in CLS_HELD state. Protected by - * cl_lock::cll_guard. - * - * \see cl_wait(), cl_unuse(). - */ - int cll_users; - /** - * Flag bit-mask. Values from enum cl_lock_flags. Updates are - * protected by cl_lock::cll_guard. - */ - unsigned long cll_flags; - /** - * A linkage into a list of locks in a closure. - * - * \see cl_lock_closure - */ - struct list_head cll_inclosure; - /** - * Confict lock at queuing time. - */ - struct cl_lock *cll_conflict; - /** - * A list of references to this lock, for debugging. - */ - struct lu_ref cll_reference; - /** - * A list of holds on this lock, for debugging. - */ - struct lu_ref cll_holders; - /** - * A reference for cl_lock::cll_descr::cld_obj. For debugging. - */ - struct lu_ref_link cll_obj_ref; -#ifdef CONFIG_LOCKDEP - /* "dep_map" name is assumed by lockdep.h macros. */ - struct lockdep_map dep_map; -#endif }; /** * Per-layer part of cl_lock * - * \see ccc_lock, lov_lock, lovsub_lock, osc_lock + * \see vvp_lock, lov_lock, lovsub_lock, osc_lock */ struct cl_lock_slice { struct cl_lock *cls_lock; @@ -1658,174 +1239,36 @@ struct cl_lock_slice { }; /** - * Possible (non-error) return values of ->clo_{enqueue,wait,unlock}(). - * - * NOTE: lov_subresult() depends on ordering here. - */ -enum cl_lock_transition { - /** operation cannot be completed immediately. Wait for state change. */ - CLO_WAIT = 1, - /** operation had to release lock mutex, restart. */ - CLO_REPEAT = 2, - /** lower layer re-enqueued. */ - CLO_REENQUEUED = 3, -}; - -/** * * \see vvp_lock_ops, lov_lock_ops, lovsub_lock_ops, osc_lock_ops */ struct cl_lock_operations { - /** - * \name statemachine - * - * State machine transitions. These 3 methods are called to transfer - * lock from one state to another, as described in the commentary - * above enum #cl_lock_state. - * - * \retval 0 this layer has nothing more to do to before - * transition to the target state happens; - * - * \retval CLO_REPEAT method had to release and re-acquire cl_lock - * mutex, repeat invocation of transition method - * across all layers; - * - * \retval CLO_WAIT this layer cannot move to the target state - * immediately, as it has to wait for certain event - * (e.g., the communication with the server). It - * is guaranteed, that when the state transfer - * becomes possible, cl_lock::cll_wq wait-queue - * is signaled. Caller can wait for this event by - * calling cl_lock_state_wait(); - * - * \retval -ve failure, abort state transition, move the lock - * into cl_lock_state::CLS_FREEING state, and set - * cl_lock::cll_error. - * - * Once all layers voted to agree to transition (by returning 0), lock - * is moved into corresponding target state. All state transition - * methods are optional. - */ /** @{ */ /** * Attempts to enqueue the lock. Called top-to-bottom. * - * \see ccc_lock_enqueue(), lov_lock_enqueue(), lovsub_lock_enqueue(), + * \retval 0 this layer has enqueued the lock successfully + * \retval >0 this layer has enqueued the lock, but need to wait on + * @anchor for resources + * \retval -ve failure + * + * \see vvp_lock_enqueue(), lov_lock_enqueue(), lovsub_lock_enqueue(), * \see osc_lock_enqueue() */ int (*clo_enqueue)(const struct lu_env *env, const struct cl_lock_slice *slice, - struct cl_io *io, __u32 enqflags); + struct cl_io *io, struct cl_sync_io *anchor); /** - * Attempts to wait for enqueue result. Called top-to-bottom. - * - * \see ccc_lock_wait(), lov_lock_wait(), osc_lock_wait() - */ - int (*clo_wait)(const struct lu_env *env, - const struct cl_lock_slice *slice); - /** - * Attempts to unlock the lock. Called bottom-to-top. In addition to - * usual return values of lock state-machine methods, this can return - * -ESTALE to indicate that lock cannot be returned to the cache, and - * has to be re-initialized. - * unuse is a one-shot operation, so it must NOT return CLO_WAIT. - * - * \see ccc_lock_unuse(), lov_lock_unuse(), osc_lock_unuse() - */ - int (*clo_unuse)(const struct lu_env *env, - const struct cl_lock_slice *slice); - /** - * Notifies layer that cached lock is started being used. - * - * \pre lock->cll_state == CLS_CACHED - * - * \see lov_lock_use(), osc_lock_use() - */ - int (*clo_use)(const struct lu_env *env, - const struct cl_lock_slice *slice); - /** @} statemachine */ - /** - * A method invoked when lock state is changed (as a result of state - * transition). This is used, for example, to track when the state of - * a sub-lock changes, to propagate this change to the corresponding - * top-lock. Optional - * - * \see lovsub_lock_state() - */ - void (*clo_state)(const struct lu_env *env, - const struct cl_lock_slice *slice, - enum cl_lock_state st); - /** - * Returns true, iff given lock is suitable for the given io, idea - * being, that there are certain "unsafe" locks, e.g., ones acquired - * for O_APPEND writes, that we don't want to re-use for a normal - * write, to avoid the danger of cascading evictions. Optional. Runs - * under cl_object_header::coh_lock_guard. - * - * XXX this should take more information about lock needed by - * io. Probably lock description or something similar. - * - * \see lov_fits_into() - */ - int (*clo_fits_into)(const struct lu_env *env, - const struct cl_lock_slice *slice, - const struct cl_lock_descr *need, - const struct cl_io *io); - /** - * \name ast - * Asynchronous System Traps. All of then are optional, all are - * executed bottom-to-top. - */ - /** @{ */ - - /** - * Cancellation callback. Cancel a lock voluntarily, or under - * the request of server. + * Cancel a lock, release its DLM lock ref, while does not cancel the + * DLM lock */ void (*clo_cancel)(const struct lu_env *env, const struct cl_lock_slice *slice); - /** - * Lock weighting ast. Executed to estimate how precious this lock - * is. The sum of results across all layers is used to determine - * whether lock worth keeping in cache given present memory usage. - * - * \see osc_lock_weigh(), vvp_lock_weigh(), lovsub_lock_weigh(). - */ - unsigned long (*clo_weigh)(const struct lu_env *env, - const struct cl_lock_slice *slice); - /** @} ast */ - - /** - * \see lovsub_lock_closure() - */ - int (*clo_closure)(const struct lu_env *env, - const struct cl_lock_slice *slice, - struct cl_lock_closure *closure); - /** - * Executed bottom-to-top when lock description changes (e.g., as a - * result of server granting more generous lock than was requested). - * - * \see lovsub_lock_modify() - */ - int (*clo_modify)(const struct lu_env *env, - const struct cl_lock_slice *slice, - const struct cl_lock_descr *updated); - /** - * Notifies layers (bottom-to-top) that lock is going to be - * destroyed. Responsibility of layers is to prevent new references on - * this lock from being acquired once this method returns. - * - * This can be called multiple times due to the races. - * - * \see cl_lock_delete() - * \see osc_lock_delete(), lovsub_lock_delete() - */ - void (*clo_delete)(const struct lu_env *env, - const struct cl_lock_slice *slice); + /** @} */ /** * Destructor. Frees resources and the slice. * - * \see ccc_lock_fini(), lov_lock_fini(), lovsub_lock_fini(), + * \see vvp_lock_fini(), lov_lock_fini(), lovsub_lock_fini(), * \see osc_lock_fini() */ void (*clo_fini)(const struct lu_env *env, struct cl_lock_slice *slice); @@ -2016,7 +1459,7 @@ enum cl_io_state { * This is usually embedded into layer session data, rather than allocated * dynamically. * - * \see vvp_io, lov_io, osc_io, ccc_io + * \see vvp_io, lov_io, osc_io */ struct cl_io_slice { struct cl_io *cis_io; @@ -2031,6 +1474,8 @@ struct cl_io_slice { struct list_head cis_linkage; }; +typedef void (*cl_commit_cbt)(const struct lu_env *, struct cl_io *, + struct cl_page *); /** * Per-layer io operations. * \see vvp_io_ops, lov_io_ops, lovsub_io_ops, osc_io_ops @@ -2114,7 +1559,7 @@ struct cl_io_operations { void (*cio_fini)(const struct lu_env *env, const struct cl_io_slice *slice); } op[CIT_OP_NR]; - struct { + /** * Submit pages from \a queue->c2_qin for IO, and move * successfully submitted pages into \a queue->c2_qout. Return @@ -2127,7 +1572,15 @@ struct cl_io_operations { const struct cl_io_slice *slice, enum cl_req_type crt, struct cl_2queue *queue); - } req_op[CRT_NR]; + /** + * Queue async page for write. + * The difference between cio_submit and cio_queue is that + * cio_submit is for urgent request. + */ + int (*cio_commit_async)(const struct lu_env *env, + const struct cl_io_slice *slice, + struct cl_page_list *queue, int from, int to, + cl_commit_cbt cb); /** * Read missing page. * @@ -2140,31 +1593,6 @@ struct cl_io_operations { const struct cl_io_slice *slice, const struct cl_page_slice *page); /** - * Prepare write of a \a page. Called bottom-to-top by a top-level - * cl_io_operations::op[CIT_WRITE]::cio_start() to prepare page for - * get data from user-level buffer. - * - * \pre io->ci_type == CIT_WRITE - * - * \see vvp_io_prepare_write(), lov_io_prepare_write(), - * osc_io_prepare_write(). - */ - int (*cio_prepare_write)(const struct lu_env *env, - const struct cl_io_slice *slice, - const struct cl_page_slice *page, - unsigned from, unsigned to); - /** - * - * \pre io->ci_type == CIT_WRITE - * - * \see vvp_io_commit_write(), lov_io_commit_write(), - * osc_io_commit_write(). - */ - int (*cio_commit_write)(const struct lu_env *env, - const struct cl_io_slice *slice, - const struct cl_page_slice *page, - unsigned from, unsigned to); - /** * Optional debugging helper. Print given io slice. */ int (*cio_print)(const struct lu_env *env, void *cookie, @@ -2216,9 +1644,13 @@ enum cl_enq_flags { */ CEF_AGL = 0x00000020, /** + * enqueue a lock to test DLM lock existence. + */ + CEF_PEEK = 0x00000040, + /** * mask of enq_flags. */ - CEF_MASK = 0x0000003f, + CEF_MASK = 0x0000007f, }; /** @@ -2228,12 +1660,12 @@ enum cl_enq_flags { struct cl_io_lock_link { /** linkage into one of cl_lockset lists. */ struct list_head cill_linkage; - struct cl_lock_descr cill_descr; - struct cl_lock *cill_lock; + struct cl_lock cill_lock; /** optional destructor */ void (*cill_fini)(const struct lu_env *env, struct cl_io_lock_link *link); }; +#define cill_descr cill_lock.cll_descr /** * Lock-set represents a collection of locks, that io needs at a @@ -2267,8 +1699,6 @@ struct cl_io_lock_link { struct cl_lockset { /** locks to be acquired. */ struct list_head cls_todo; - /** locks currently being processed. */ - struct list_head cls_curr; /** locks acquired. */ struct list_head cls_done; }; @@ -2632,9 +2062,7 @@ struct cl_site { * and top-locks (and top-pages) are accounted here. */ struct cache_stats cs_pages; - struct cache_stats cs_locks; atomic_t cs_pages_state[CPS_NR]; - atomic_t cs_locks_state[CLS_NR]; }; int cl_site_init(struct cl_site *s, struct cl_device *top); @@ -2725,7 +2153,7 @@ static inline void cl_device_fini(struct cl_device *d) } void cl_page_slice_add(struct cl_page *page, struct cl_page_slice *slice, - struct cl_object *obj, + struct cl_object *obj, pgoff_t index, const struct cl_page_operations *ops); void cl_lock_slice_add(struct cl_lock *lock, struct cl_lock_slice *slice, struct cl_object *obj, @@ -2758,7 +2186,7 @@ int cl_object_glimpse(const struct lu_env *env, struct cl_object *obj, struct ost_lvb *lvb); int cl_conf_set(const struct lu_env *env, struct cl_object *obj, const struct cl_object_conf *conf); -void cl_object_prune(const struct lu_env *env, struct cl_object *obj); +int cl_object_prune(const struct lu_env *env, struct cl_object *obj); void cl_object_kill(const struct lu_env *env, struct cl_object *obj); /** @@ -2772,7 +2200,7 @@ static inline int cl_object_same(struct cl_object *o0, struct cl_object *o1) static inline void cl_object_page_init(struct cl_object *clob, int size) { clob->co_slice_off = cl_object_header(clob)->coh_page_bufsize; - cl_object_header(clob)->coh_page_bufsize += ALIGN(size, 8); + cl_object_header(clob)->coh_page_bufsize += cfs_size_round(size); } static inline void *cl_object_page_slice(struct cl_object *clob, @@ -2781,6 +2209,16 @@ static inline void *cl_object_page_slice(struct cl_object *clob, return (void *)((char *)page + clob->co_slice_off); } +/** + * Return refcount of cl_object. + */ +static inline int cl_object_refc(struct cl_object *clob) +{ + struct lu_object_header *header = clob->co_lu.lo_header; + + return atomic_read(&header->loh_ref); +} + /** @} cl_object */ /** \defgroup cl_page cl_page @@ -2794,28 +2232,20 @@ enum { }; /* callback of cl_page_gang_lookup() */ -typedef int (*cl_page_gang_cb_t) (const struct lu_env *, struct cl_io *, - struct cl_page *, void *); -int cl_page_gang_lookup(const struct lu_env *env, struct cl_object *obj, - struct cl_io *io, pgoff_t start, pgoff_t end, - cl_page_gang_cb_t cb, void *cbdata); -struct cl_page *cl_page_lookup(struct cl_object_header *hdr, pgoff_t index); struct cl_page *cl_page_find(const struct lu_env *env, struct cl_object *obj, pgoff_t idx, struct page *vmpage, enum cl_page_type type); -struct cl_page *cl_page_find_sub(const struct lu_env *env, - struct cl_object *obj, - pgoff_t idx, struct page *vmpage, - struct cl_page *parent); +struct cl_page *cl_page_alloc(const struct lu_env *env, + struct cl_object *o, pgoff_t ind, + struct page *vmpage, + enum cl_page_type type); void cl_page_get(struct cl_page *page); void cl_page_put(const struct lu_env *env, struct cl_page *page); void cl_page_print(const struct lu_env *env, void *cookie, lu_printer_t printer, const struct cl_page *pg); void cl_page_header_print(const struct lu_env *env, void *cookie, lu_printer_t printer, const struct cl_page *pg); -struct page *cl_page_vmpage(const struct lu_env *env, struct cl_page *page); struct cl_page *cl_vmpage_page(struct page *vmpage, struct cl_object *obj); -struct cl_page *cl_page_top(struct cl_page *page); const struct cl_page_slice *cl_page_at(const struct cl_page *page, const struct lu_device_type *dtype); @@ -2872,12 +2302,10 @@ int cl_page_flush(const struct lu_env *env, struct cl_io *io, void cl_page_discard(const struct lu_env *env, struct cl_io *io, struct cl_page *pg); void cl_page_delete(const struct lu_env *env, struct cl_page *pg); -int cl_page_unmap(const struct lu_env *env, struct cl_io *io, - struct cl_page *pg); int cl_page_is_vmlocked(const struct lu_env *env, const struct cl_page *pg); void cl_page_export(const struct lu_env *env, struct cl_page *pg, int uptodate); int cl_page_is_under_lock(const struct lu_env *env, struct cl_io *io, - struct cl_page *page); + struct cl_page *page, pgoff_t *max_index); loff_t cl_offset(const struct cl_object *obj, pgoff_t idx); pgoff_t cl_index(const struct cl_object *obj, loff_t offset); int cl_page_size(const struct cl_object *obj); @@ -2890,138 +2318,66 @@ void cl_lock_descr_print(const struct lu_env *env, void *cookie, const struct cl_lock_descr *descr); /* @} helper */ +/** + * Data structure managing a client's cached pages. A count of + * "unstable" pages is maintained, and an LRU of clean pages is + * maintained. "unstable" pages are pages pinned by the ptlrpc + * layer for recovery purposes. + */ +struct cl_client_cache { + /** + * # of users (OSCs) + */ + atomic_t ccc_users; + /** + * # of threads are doing shrinking + */ + unsigned int ccc_lru_shrinkers; + /** + * # of LRU entries available + */ + atomic_t ccc_lru_left; + /** + * List of entities(OSCs) for this LRU cache + */ + struct list_head ccc_lru; + /** + * Max # of LRU entries + */ + unsigned long ccc_lru_max; + /** + * Lock to protect ccc_lru list + */ + spinlock_t ccc_lru_lock; + /** + * # of unstable pages for this mount point + */ + atomic_t ccc_unstable_nr; + /** + * Waitq for awaiting unstable pages to reach zero. + * Used at umounting time and signaled on BRW commit + */ + wait_queue_head_t ccc_unstable_waitq; + +}; + /** @} cl_page */ /** \defgroup cl_lock cl_lock * @{ */ -struct cl_lock *cl_lock_hold(const struct lu_env *env, const struct cl_io *io, - const struct cl_lock_descr *need, - const char *scope, const void *source); -struct cl_lock *cl_lock_peek(const struct lu_env *env, const struct cl_io *io, - const struct cl_lock_descr *need, - const char *scope, const void *source); -struct cl_lock *cl_lock_request(const struct lu_env *env, struct cl_io *io, - const struct cl_lock_descr *need, - const char *scope, const void *source); -struct cl_lock *cl_lock_at_pgoff(const struct lu_env *env, - struct cl_object *obj, pgoff_t index, - struct cl_lock *except, int pending, - int canceld); -static inline struct cl_lock *cl_lock_at_page(const struct lu_env *env, - struct cl_object *obj, - struct cl_page *page, - struct cl_lock *except, - int pending, int canceld) -{ - LASSERT(cl_object_header(obj) == cl_object_header(page->cp_obj)); - return cl_lock_at_pgoff(env, obj, page->cp_index, except, - pending, canceld); -} - +int cl_lock_request(const struct lu_env *env, struct cl_io *io, + struct cl_lock *lock); +int cl_lock_init(const struct lu_env *env, struct cl_lock *lock, + const struct cl_io *io); +void cl_lock_fini(const struct lu_env *env, struct cl_lock *lock); const struct cl_lock_slice *cl_lock_at(const struct cl_lock *lock, const struct lu_device_type *dtype); - -void cl_lock_get(struct cl_lock *lock); -void cl_lock_get_trust(struct cl_lock *lock); -void cl_lock_put(const struct lu_env *env, struct cl_lock *lock); -void cl_lock_hold_add(const struct lu_env *env, struct cl_lock *lock, - const char *scope, const void *source); -void cl_lock_hold_release(const struct lu_env *env, struct cl_lock *lock, - const char *scope, const void *source); -void cl_lock_unhold(const struct lu_env *env, struct cl_lock *lock, - const char *scope, const void *source); -void cl_lock_release(const struct lu_env *env, struct cl_lock *lock, - const char *scope, const void *source); -void cl_lock_user_add(const struct lu_env *env, struct cl_lock *lock); -void cl_lock_user_del(const struct lu_env *env, struct cl_lock *lock); - -int cl_lock_is_intransit(struct cl_lock *lock); - -int cl_lock_enqueue_wait(const struct lu_env *env, struct cl_lock *lock, - int keep_mutex); - -/** \name statemachine statemachine - * Interface to lock state machine consists of 3 parts: - * - * - "try" functions that attempt to effect a state transition. If state - * transition is not possible right now (e.g., if it has to wait for some - * asynchronous event to occur), these functions return - * cl_lock_transition::CLO_WAIT. - * - * - "non-try" functions that implement synchronous blocking interface on - * top of non-blocking "try" functions. These functions repeatedly call - * corresponding "try" versions, and if state transition is not possible - * immediately, wait for lock state change. - * - * - methods from cl_lock_operations, called by "try" functions. Lock can - * be advanced to the target state only when all layers voted that they - * are ready for this transition. "Try" functions call methods under lock - * mutex. If a layer had to release a mutex, it re-acquires it and returns - * cl_lock_transition::CLO_REPEAT, causing "try" function to call all - * layers again. - * - * TRY NON-TRY METHOD FINAL STATE - * - * cl_enqueue_try() cl_enqueue() cl_lock_operations::clo_enqueue() CLS_ENQUEUED - * - * cl_wait_try() cl_wait() cl_lock_operations::clo_wait() CLS_HELD - * - * cl_unuse_try() cl_unuse() cl_lock_operations::clo_unuse() CLS_CACHED - * - * cl_use_try() NONE cl_lock_operations::clo_use() CLS_HELD - * - * @{ - */ - -int cl_wait(const struct lu_env *env, struct cl_lock *lock); -void cl_unuse(const struct lu_env *env, struct cl_lock *lock); -int cl_enqueue_try(const struct lu_env *env, struct cl_lock *lock, - struct cl_io *io, __u32 flags); -int cl_unuse_try(const struct lu_env *env, struct cl_lock *lock); -int cl_wait_try(const struct lu_env *env, struct cl_lock *lock); -int cl_use_try(const struct lu_env *env, struct cl_lock *lock, int atomic); - -/** @} statemachine */ - -void cl_lock_signal(const struct lu_env *env, struct cl_lock *lock); -int cl_lock_state_wait(const struct lu_env *env, struct cl_lock *lock); -void cl_lock_state_set(const struct lu_env *env, struct cl_lock *lock, - enum cl_lock_state state); -int cl_queue_match(const struct list_head *queue, - const struct cl_lock_descr *need); - -void cl_lock_mutex_get(const struct lu_env *env, struct cl_lock *lock); -void cl_lock_mutex_put(const struct lu_env *env, struct cl_lock *lock); -int cl_lock_is_mutexed(struct cl_lock *lock); -int cl_lock_nr_mutexed(const struct lu_env *env); -int cl_lock_discard_pages(const struct lu_env *env, struct cl_lock *lock); -int cl_lock_ext_match(const struct cl_lock_descr *has, - const struct cl_lock_descr *need); -int cl_lock_descr_match(const struct cl_lock_descr *has, - const struct cl_lock_descr *need); -int cl_lock_mode_match(enum cl_lock_mode has, enum cl_lock_mode need); -int cl_lock_modify(const struct lu_env *env, struct cl_lock *lock, - const struct cl_lock_descr *desc); - -void cl_lock_closure_init(const struct lu_env *env, - struct cl_lock_closure *closure, - struct cl_lock *origin, int wait); -void cl_lock_closure_fini(struct cl_lock_closure *closure); -int cl_lock_closure_build(const struct lu_env *env, struct cl_lock *lock, - struct cl_lock_closure *closure); -void cl_lock_disclosure(const struct lu_env *env, - struct cl_lock_closure *closure); -int cl_lock_enclosure(const struct lu_env *env, struct cl_lock *lock, - struct cl_lock_closure *closure); - +void cl_lock_release(const struct lu_env *env, struct cl_lock *lock); +int cl_lock_enqueue(const struct lu_env *env, struct cl_io *io, + struct cl_lock *lock, struct cl_sync_io *anchor); void cl_lock_cancel(const struct lu_env *env, struct cl_lock *lock); -void cl_lock_delete(const struct lu_env *env, struct cl_lock *lock); -void cl_lock_error(const struct lu_env *env, struct cl_lock *lock, int error); -void cl_locks_prune(const struct lu_env *env, struct cl_object *obj, int wait); - -unsigned long cl_lock_weigh(const struct lu_env *env, struct cl_lock *lock); /** @} cl_lock */ @@ -3050,15 +2406,14 @@ int cl_io_lock_alloc_add(const struct lu_env *env, struct cl_io *io, struct cl_lock_descr *descr); int cl_io_read_page(const struct lu_env *env, struct cl_io *io, struct cl_page *page); -int cl_io_prepare_write(const struct lu_env *env, struct cl_io *io, - struct cl_page *page, unsigned from, unsigned to); -int cl_io_commit_write(const struct lu_env *env, struct cl_io *io, - struct cl_page *page, unsigned from, unsigned to); int cl_io_submit_rw(const struct lu_env *env, struct cl_io *io, enum cl_req_type iot, struct cl_2queue *queue); int cl_io_submit_sync(const struct lu_env *env, struct cl_io *io, enum cl_req_type iot, struct cl_2queue *queue, long timeout); +int cl_io_commit_async(const struct lu_env *env, struct cl_io *io, + struct cl_page_list *queue, int from, int to, + cl_commit_cbt cb); int cl_io_is_going(const struct lu_env *env); /** @@ -3114,6 +2469,12 @@ static inline struct cl_page *cl_page_list_last(struct cl_page_list *plist) return list_entry(plist->pl_pages.prev, struct cl_page, cp_batch); } +static inline struct cl_page *cl_page_list_first(struct cl_page_list *plist) +{ + LASSERT(plist->pl_nr > 0); + return list_entry(plist->pl_pages.next, struct cl_page, cp_batch); +} + /** * Iterate over pages in a page list. */ @@ -3130,9 +2491,14 @@ void cl_page_list_init(struct cl_page_list *plist); void cl_page_list_add(struct cl_page_list *plist, struct cl_page *page); void cl_page_list_move(struct cl_page_list *dst, struct cl_page_list *src, struct cl_page *page); +void cl_page_list_move_head(struct cl_page_list *dst, struct cl_page_list *src, + struct cl_page *page); void cl_page_list_splice(struct cl_page_list *list, struct cl_page_list *head); +void cl_page_list_del(const struct lu_env *env, struct cl_page_list *plist, + struct cl_page *page); void cl_page_list_disown(const struct lu_env *env, struct cl_io *io, struct cl_page_list *plist); +void cl_page_list_fini(const struct lu_env *env, struct cl_page_list *plist); void cl_2queue_init(struct cl_2queue *queue); void cl_2queue_disown(const struct lu_env *env, @@ -3177,13 +2543,18 @@ struct cl_sync_io { atomic_t csi_barrier; /** completion to be signaled when transfer is complete. */ wait_queue_head_t csi_waitq; + /** callback to invoke when this IO is finished */ + void (*csi_end_io)(const struct lu_env *, + struct cl_sync_io *); }; -void cl_sync_io_init(struct cl_sync_io *anchor, int nrpages); -int cl_sync_io_wait(const struct lu_env *env, struct cl_io *io, - struct cl_page_list *queue, struct cl_sync_io *anchor, +void cl_sync_io_init(struct cl_sync_io *anchor, int nr, + void (*end)(const struct lu_env *, struct cl_sync_io *)); +int cl_sync_io_wait(const struct lu_env *env, struct cl_sync_io *anchor, long timeout); -void cl_sync_io_note(struct cl_sync_io *anchor, int ioret); +void cl_sync_io_note(const struct lu_env *env, struct cl_sync_io *anchor, + int ioret); +void cl_sync_io_end(const struct lu_env *env, struct cl_sync_io *anchor); /** @} cl_sync_io */ @@ -3241,6 +2612,9 @@ void *cl_env_reenter(void); void cl_env_reexit(void *cookie); void cl_env_implant(struct lu_env *env, int *refcheck); void cl_env_unplant(struct lu_env *env, int *refcheck); +unsigned int cl_env_cache_purge(unsigned int nr); +struct lu_env *cl_env_percpu_get(void); +void cl_env_percpu_put(struct lu_env *env); /** @} cl_env */ diff --git a/drivers/staging/lustre/lustre/include/lclient.h b/drivers/staging/lustre/lustre/include/lclient.h deleted file mode 100644 index 5d839a9f789f..000000000000 --- a/drivers/staging/lustre/lustre/include/lclient.h +++ /dev/null @@ -1,408 +0,0 @@ -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. - * - * GPL HEADER END - */ -/* - * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2011, 2012, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * Definitions shared between vvp and liblustre, and other clients in the - * future. - * - * Author: Oleg Drokin <oleg.drokin@sun.com> - * Author: Nikita Danilov <nikita.danilov@sun.com> - */ - -#ifndef LCLIENT_H -#define LCLIENT_H - -blkcnt_t dirty_cnt(struct inode *inode); - -int cl_glimpse_size0(struct inode *inode, int agl); -int cl_glimpse_lock(const struct lu_env *env, struct cl_io *io, - struct inode *inode, struct cl_object *clob, int agl); - -static inline int cl_glimpse_size(struct inode *inode) -{ - return cl_glimpse_size0(inode, 0); -} - -static inline int cl_agl(struct inode *inode) -{ - return cl_glimpse_size0(inode, 1); -} - -/** - * Locking policy for setattr. - */ -enum ccc_setattr_lock_type { - /** Locking is done by server */ - SETATTR_NOLOCK, - /** Extent lock is enqueued */ - SETATTR_EXTENT_LOCK, - /** Existing local extent lock is used */ - SETATTR_MATCH_LOCK -}; - -/** - * IO state private to vvp or slp layers. - */ -struct ccc_io { - /** super class */ - struct cl_io_slice cui_cl; - struct cl_io_lock_link cui_link; - /** - * I/O vector information to or from which read/write is going. - */ - struct iov_iter *cui_iter; - /** - * Total size for the left IO. - */ - size_t cui_tot_count; - - union { - struct { - enum ccc_setattr_lock_type cui_local_lock; - } setattr; - } u; - /** - * True iff io is processing glimpse right now. - */ - int cui_glimpse; - /** - * Layout version when this IO is initialized - */ - __u32 cui_layout_gen; - /** - * File descriptor against which IO is done. - */ - struct ll_file_data *cui_fd; - struct kiocb *cui_iocb; -}; - -/** - * True, if \a io is a normal io, False for splice_{read,write}. - * must be implemented in arch specific code. - */ -int cl_is_normalio(const struct lu_env *env, const struct cl_io *io); - -extern struct lu_context_key ccc_key; -extern struct lu_context_key ccc_session_key; - -struct ccc_thread_info { - struct cl_lock_descr cti_descr; - struct cl_io cti_io; - struct cl_attr cti_attr; -}; - -static inline struct ccc_thread_info *ccc_env_info(const struct lu_env *env) -{ - struct ccc_thread_info *info; - - info = lu_context_key_get(&env->le_ctx, &ccc_key); - LASSERT(info); - return info; -} - -static inline struct cl_attr *ccc_env_thread_attr(const struct lu_env *env) -{ - struct cl_attr *attr = &ccc_env_info(env)->cti_attr; - - memset(attr, 0, sizeof(*attr)); - return attr; -} - -static inline struct cl_io *ccc_env_thread_io(const struct lu_env *env) -{ - struct cl_io *io = &ccc_env_info(env)->cti_io; - - memset(io, 0, sizeof(*io)); - return io; -} - -struct ccc_session { - struct ccc_io cs_ios; -}; - -static inline struct ccc_session *ccc_env_session(const struct lu_env *env) -{ - struct ccc_session *ses; - - ses = lu_context_key_get(env->le_ses, &ccc_session_key); - LASSERT(ses); - return ses; -} - -static inline struct ccc_io *ccc_env_io(const struct lu_env *env) -{ - return &ccc_env_session(env)->cs_ios; -} - -/** - * ccc-private object state. - */ -struct ccc_object { - struct cl_object_header cob_header; - struct cl_object cob_cl; - struct inode *cob_inode; - - /** - * A list of dirty pages pending IO in the cache. Used by - * SOM. Protected by ll_inode_info::lli_lock. - * - * \see ccc_page::cpg_pending_linkage - */ - struct list_head cob_pending_list; - - /** - * Access this counter is protected by inode->i_sem. Now that - * the lifetime of transient pages must be covered by inode sem, - * we don't need to hold any lock.. - */ - int cob_transient_pages; - /** - * Number of outstanding mmaps on this file. - * - * \see ll_vm_open(), ll_vm_close(). - */ - atomic_t cob_mmap_cnt; - - /** - * various flags - * cob_discard_page_warned - * if pages belonging to this object are discarded when a client - * is evicted, some debug info will be printed, this flag will be set - * during processing the first discarded page, then avoid flooding - * debug message for lots of discarded pages. - * - * \see ll_dirty_page_discard_warn. - */ - unsigned int cob_discard_page_warned:1; -}; - -/** - * ccc-private page state. - */ -struct ccc_page { - struct cl_page_slice cpg_cl; - int cpg_defer_uptodate; - int cpg_ra_used; - int cpg_write_queued; - /** - * Non-empty iff this page is already counted in - * ccc_object::cob_pending_list. Protected by - * ccc_object::cob_pending_guard. This list is only used as a flag, - * that is, never iterated through, only checked for list_empty(), but - * having a list is useful for debugging. - */ - struct list_head cpg_pending_linkage; - /** VM page */ - struct page *cpg_page; -}; - -static inline struct ccc_page *cl2ccc_page(const struct cl_page_slice *slice) -{ - return container_of(slice, struct ccc_page, cpg_cl); -} - -struct ccc_device { - struct cl_device cdv_cl; - struct super_block *cdv_sb; - struct cl_device *cdv_next; -}; - -struct ccc_lock { - struct cl_lock_slice clk_cl; -}; - -struct ccc_req { - struct cl_req_slice crq_cl; -}; - -void *ccc_key_init (const struct lu_context *ctx, - struct lu_context_key *key); -void ccc_key_fini (const struct lu_context *ctx, - struct lu_context_key *key, void *data); -void *ccc_session_key_init(const struct lu_context *ctx, - struct lu_context_key *key); -void ccc_session_key_fini(const struct lu_context *ctx, - struct lu_context_key *key, void *data); - -int ccc_device_init (const struct lu_env *env, - struct lu_device *d, - const char *name, struct lu_device *next); -struct lu_device *ccc_device_fini (const struct lu_env *env, - struct lu_device *d); -struct lu_device *ccc_device_alloc(const struct lu_env *env, - struct lu_device_type *t, - struct lustre_cfg *cfg, - const struct lu_device_operations *luops, - const struct cl_device_operations *clops); -struct lu_device *ccc_device_free (const struct lu_env *env, - struct lu_device *d); -struct lu_object *ccc_object_alloc(const struct lu_env *env, - const struct lu_object_header *hdr, - struct lu_device *dev, - const struct cl_object_operations *clops, - const struct lu_object_operations *luops); - -int ccc_req_init(const struct lu_env *env, struct cl_device *dev, - struct cl_req *req); -void ccc_umount(const struct lu_env *env, struct cl_device *dev); -int ccc_global_init(struct lu_device_type *device_type); -void ccc_global_fini(struct lu_device_type *device_type); -int ccc_object_init0(const struct lu_env *env, struct ccc_object *vob, - const struct cl_object_conf *conf); -int ccc_object_init(const struct lu_env *env, struct lu_object *obj, - const struct lu_object_conf *conf); -void ccc_object_free(const struct lu_env *env, struct lu_object *obj); -int ccc_lock_init(const struct lu_env *env, struct cl_object *obj, - struct cl_lock *lock, const struct cl_io *io, - const struct cl_lock_operations *lkops); -int ccc_object_glimpse(const struct lu_env *env, - const struct cl_object *obj, struct ost_lvb *lvb); -struct page *ccc_page_vmpage(const struct lu_env *env, - const struct cl_page_slice *slice); -int ccc_page_is_under_lock(const struct lu_env *env, - const struct cl_page_slice *slice, struct cl_io *io); -int ccc_fail(const struct lu_env *env, const struct cl_page_slice *slice); -int ccc_transient_page_prep(const struct lu_env *env, - const struct cl_page_slice *slice, - struct cl_io *io); -void ccc_lock_delete(const struct lu_env *env, - const struct cl_lock_slice *slice); -void ccc_lock_fini(const struct lu_env *env, struct cl_lock_slice *slice); -int ccc_lock_enqueue(const struct lu_env *env, - const struct cl_lock_slice *slice, - struct cl_io *io, __u32 enqflags); -int ccc_lock_use(const struct lu_env *env, const struct cl_lock_slice *slice); -int ccc_lock_unuse(const struct lu_env *env, const struct cl_lock_slice *slice); -int ccc_lock_wait(const struct lu_env *env, const struct cl_lock_slice *slice); -int ccc_lock_fits_into(const struct lu_env *env, - const struct cl_lock_slice *slice, - const struct cl_lock_descr *need, - const struct cl_io *io); -void ccc_lock_state(const struct lu_env *env, - const struct cl_lock_slice *slice, - enum cl_lock_state state); - -int ccc_io_one_lock_index(const struct lu_env *env, struct cl_io *io, - __u32 enqflags, enum cl_lock_mode mode, - pgoff_t start, pgoff_t end); -int ccc_io_one_lock(const struct lu_env *env, struct cl_io *io, - __u32 enqflags, enum cl_lock_mode mode, - loff_t start, loff_t end); -void ccc_io_end(const struct lu_env *env, const struct cl_io_slice *ios); -void ccc_io_advance(const struct lu_env *env, const struct cl_io_slice *ios, - size_t nob); -void ccc_io_update_iov(const struct lu_env *env, struct ccc_io *cio, - struct cl_io *io); -int ccc_prep_size(const struct lu_env *env, struct cl_object *obj, - struct cl_io *io, loff_t start, size_t count, int *exceed); -void ccc_req_completion(const struct lu_env *env, - const struct cl_req_slice *slice, int ioret); -void ccc_req_attr_set(const struct lu_env *env, - const struct cl_req_slice *slice, - const struct cl_object *obj, - struct cl_req_attr *oa, u64 flags); - -struct lu_device *ccc2lu_dev (struct ccc_device *vdv); -struct lu_object *ccc2lu (struct ccc_object *vob); -struct ccc_device *lu2ccc_dev (const struct lu_device *d); -struct ccc_device *cl2ccc_dev (const struct cl_device *d); -struct ccc_object *lu2ccc (const struct lu_object *obj); -struct ccc_object *cl2ccc (const struct cl_object *obj); -struct ccc_lock *cl2ccc_lock (const struct cl_lock_slice *slice); -struct ccc_io *cl2ccc_io (const struct lu_env *env, - const struct cl_io_slice *slice); -struct ccc_req *cl2ccc_req (const struct cl_req_slice *slice); -struct page *cl2vm_page (const struct cl_page_slice *slice); -struct inode *ccc_object_inode(const struct cl_object *obj); -struct ccc_object *cl_inode2ccc (struct inode *inode); - -int cl_setattr_ost(struct inode *inode, const struct iattr *attr); - -int ccc_object_invariant(const struct cl_object *obj); -int cl_file_inode_init(struct inode *inode, struct lustre_md *md); -void cl_inode_fini(struct inode *inode); -int cl_local_size(struct inode *inode); - -__u16 ll_dirent_type_get(struct lu_dirent *ent); -__u64 cl_fid_build_ino(const struct lu_fid *fid, int api32); -__u32 cl_fid_build_gen(const struct lu_fid *fid); - -# define CLOBINVRNT(env, clob, expr) \ - ((void)sizeof(env), (void)sizeof(clob), (void)sizeof(!!(expr))) - -int cl_init_ea_size(struct obd_export *md_exp, struct obd_export *dt_exp); -int cl_ocd_update(struct obd_device *host, - struct obd_device *watched, - enum obd_notify_event ev, void *owner, void *data); - -struct ccc_grouplock { - struct lu_env *cg_env; - struct cl_io *cg_io; - struct cl_lock *cg_lock; - unsigned long cg_gid; -}; - -int cl_get_grouplock(struct cl_object *obj, unsigned long gid, int nonblock, - struct ccc_grouplock *cg); -void cl_put_grouplock(struct ccc_grouplock *cg); - -/** - * New interfaces to get and put lov_stripe_md from lov layer. This violates - * layering because lov_stripe_md is supposed to be a private data in lov. - * - * NB: If you find you have to use these interfaces for your new code, please - * think about it again. These interfaces may be removed in the future for - * better layering. - */ -struct lov_stripe_md *lov_lsm_get(struct cl_object *clobj); -void lov_lsm_put(struct cl_object *clobj, struct lov_stripe_md *lsm); -int lov_read_and_clear_async_rc(struct cl_object *clob); - -struct lov_stripe_md *ccc_inode_lsm_get(struct inode *inode); -void ccc_inode_lsm_put(struct inode *inode, struct lov_stripe_md *lsm); - -/** - * Data structure managing a client's cached clean pages. An LRU of - * pages is maintained, along with other statistics. - */ -struct cl_client_cache { - atomic_t ccc_users; /* # of users (OSCs) of this data */ - struct list_head ccc_lru; /* LRU list of cached clean pages */ - spinlock_t ccc_lru_lock; /* lock for list */ - atomic_t ccc_lru_left; /* # of LRU entries available */ - unsigned long ccc_lru_max; /* Max # of LRU entries possible */ - unsigned int ccc_lru_shrinkers; /* # of threads reclaiming */ -}; - -#endif /*LCLIENT_H */ diff --git a/drivers/staging/lustre/lustre/include/linux/obd.h b/drivers/staging/lustre/lustre/include/linux/obd.h deleted file mode 100644 index 3907bf4ce07c..000000000000 --- a/drivers/staging/lustre/lustre/include/linux/obd.h +++ /dev/null @@ -1,125 +0,0 @@ -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. - * - * GPL HEADER END - */ -/* - * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2011, 2012, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - */ - -#ifndef __LINUX_OBD_H -#define __LINUX_OBD_H - -#ifndef __OBD_H -#error Do not #include this file directly. #include <obd.h> instead -#endif - -#include "../obd_support.h" - -#include <linux/fs.h> -#include <linux/list.h> -#include <linux/sched.h> /* for struct task_struct, for current.h */ -#include <linux/mount.h> - -#include "../lustre_intent.h" - -struct ll_iattr { - struct iattr iattr; - unsigned int ia_attr_flags; -}; - -#define CLIENT_OBD_LIST_LOCK_DEBUG 1 - -struct client_obd_lock { - spinlock_t lock; - - unsigned long time; - struct task_struct *task; - const char *func; - int line; -}; - -static inline void __client_obd_list_lock(struct client_obd_lock *lock, - const char *func, int line) -{ - unsigned long cur = jiffies; - - while (1) { - if (spin_trylock(&lock->lock)) { - LASSERT(!lock->task); - lock->task = current; - lock->func = func; - lock->line = line; - lock->time = jiffies; - break; - } - - if (time_before(cur + 5 * HZ, jiffies) && - time_before(lock->time + 5 * HZ, jiffies)) { - struct task_struct *task = lock->task; - - if (!task) - continue; - - LCONSOLE_WARN("%s:%d: lock %p was acquired by <%s:%d:%s:%d> for %lu seconds.\n", - current->comm, current->pid, - lock, task->comm, task->pid, - lock->func, lock->line, - (jiffies - lock->time) / HZ); - LCONSOLE_WARN("====== for current process =====\n"); - dump_stack(); - LCONSOLE_WARN("====== end =======\n"); - set_current_state(TASK_UNINTERRUPTIBLE); - schedule_timeout(1000 * HZ); - } - cpu_relax(); - } -} - -#define client_obd_list_lock(lock) \ - __client_obd_list_lock(lock, __func__, __LINE__) - -static inline void client_obd_list_unlock(struct client_obd_lock *lock) -{ - LASSERT(lock->task); - lock->task = NULL; - lock->time = jiffies; - spin_unlock(&lock->lock); -} - -static inline void client_obd_list_lock_init(struct client_obd_lock *lock) -{ - spin_lock_init(&lock->lock); -} - -static inline void client_obd_list_lock_done(struct client_obd_lock *lock) -{} - -#endif /* __LINUX_OBD_H */ diff --git a/drivers/staging/lustre/lustre/include/lu_object.h b/drivers/staging/lustre/lustre/include/lu_object.h index 242bb1ef6245..2816512185af 100644 --- a/drivers/staging/lustre/lustre/include/lu_object.h +++ b/drivers/staging/lustre/lustre/include/lu_object.h @@ -198,7 +198,6 @@ typedef int (*lu_printer_t)(const struct lu_env *env, * Operations specific for particular lu_object. */ struct lu_object_operations { - /** * Allocate lower-layer parts of the object by calling * lu_device_operations::ldo_object_alloc() of the corresponding @@ -656,21 +655,21 @@ static inline struct seq_server_site *lu_site2seq(const struct lu_site *s) * @{ */ -int lu_site_init (struct lu_site *s, struct lu_device *d); -void lu_site_fini (struct lu_site *s); -int lu_site_init_finish (struct lu_site *s); -void lu_stack_fini (const struct lu_env *env, struct lu_device *top); -void lu_device_get (struct lu_device *d); -void lu_device_put (struct lu_device *d); -int lu_device_init (struct lu_device *d, struct lu_device_type *t); -void lu_device_fini (struct lu_device *d); -int lu_object_header_init(struct lu_object_header *h); +int lu_site_init(struct lu_site *s, struct lu_device *d); +void lu_site_fini(struct lu_site *s); +int lu_site_init_finish(struct lu_site *s); +void lu_stack_fini(const struct lu_env *env, struct lu_device *top); +void lu_device_get(struct lu_device *d); +void lu_device_put(struct lu_device *d); +int lu_device_init(struct lu_device *d, struct lu_device_type *t); +void lu_device_fini(struct lu_device *d); +int lu_object_header_init(struct lu_object_header *h); void lu_object_header_fini(struct lu_object_header *h); -int lu_object_init (struct lu_object *o, - struct lu_object_header *h, struct lu_device *d); -void lu_object_fini (struct lu_object *o); -void lu_object_add_top (struct lu_object_header *h, struct lu_object *o); -void lu_object_add (struct lu_object *before, struct lu_object *o); +int lu_object_init(struct lu_object *o, + struct lu_object_header *h, struct lu_device *d); +void lu_object_fini(struct lu_object *o); +void lu_object_add_top(struct lu_object_header *h, struct lu_object *o); +void lu_object_add(struct lu_object *before, struct lu_object *o); /** * Helpers to initialize and finalize device types. @@ -781,9 +780,8 @@ int lu_cdebug_printer(const struct lu_env *env, */ #define LU_OBJECT_DEBUG(mask, env, object, format, ...) \ do { \ - LIBCFS_DEBUG_MSG_DATA_DECL(msgdata, mask, NULL); \ - \ if (cfs_cdebug_show(mask, DEBUG_SUBSYSTEM)) { \ + LIBCFS_DEBUG_MSG_DATA_DECL(msgdata, mask, NULL); \ lu_object_print(env, &msgdata, lu_cdebug_printer, object);\ CDEBUG(mask, format, ## __VA_ARGS__); \ } \ @@ -794,9 +792,8 @@ do { \ */ #define LU_OBJECT_HEADER(mask, env, object, format, ...) \ do { \ - LIBCFS_DEBUG_MSG_DATA_DECL(msgdata, mask, NULL); \ - \ if (cfs_cdebug_show(mask, DEBUG_SUBSYSTEM)) { \ + LIBCFS_DEBUG_MSG_DATA_DECL(msgdata, mask, NULL); \ lu_object_header_print(env, &msgdata, lu_cdebug_printer,\ (object)->lo_header); \ lu_cdebug_printer(env, &msgdata, "\n"); \ @@ -1007,6 +1004,10 @@ enum lu_context_tag { */ LCT_LOCAL = 1 << 7, /** + * session for server thread + **/ + LCT_SERVER_SESSION = BIT(8), + /** * Set when at least one of keys, having values in this context has * non-NULL lu_context_key::lct_exit() method. This is used to * optimize lu_context_exit() call. @@ -1118,7 +1119,7 @@ struct lu_context_key { { \ type *value; \ \ - CLASSERT(PAGE_SIZE >= sizeof (*value)); \ + CLASSERT(PAGE_SIZE >= sizeof(*value)); \ \ value = kzalloc(sizeof(*value), GFP_NOFS); \ if (!value) \ @@ -1154,12 +1155,12 @@ do { \ (key)->lct_owner = THIS_MODULE; \ } while (0) -int lu_context_key_register(struct lu_context_key *key); -void lu_context_key_degister(struct lu_context_key *key); -void *lu_context_key_get (const struct lu_context *ctx, - const struct lu_context_key *key); -void lu_context_key_quiesce (struct lu_context_key *key); -void lu_context_key_revive (struct lu_context_key *key); +int lu_context_key_register(struct lu_context_key *key); +void lu_context_key_degister(struct lu_context_key *key); +void *lu_context_key_get(const struct lu_context *ctx, + const struct lu_context_key *key); +void lu_context_key_quiesce(struct lu_context_key *key); +void lu_context_key_revive(struct lu_context_key *key); /* * LU_KEY_INIT_GENERIC() has to be a macro to correctly determine an @@ -1216,21 +1217,21 @@ void lu_context_key_revive (struct lu_context_key *key); LU_TYPE_START(mod, __VA_ARGS__); \ LU_TYPE_STOP(mod, __VA_ARGS__) -int lu_context_init (struct lu_context *ctx, __u32 tags); -void lu_context_fini (struct lu_context *ctx); -void lu_context_enter (struct lu_context *ctx); -void lu_context_exit (struct lu_context *ctx); -int lu_context_refill(struct lu_context *ctx); +int lu_context_init(struct lu_context *ctx, __u32 tags); +void lu_context_fini(struct lu_context *ctx); +void lu_context_enter(struct lu_context *ctx); +void lu_context_exit(struct lu_context *ctx); +int lu_context_refill(struct lu_context *ctx); /* * Helper functions to operate on multiple keys. These are used by the default * device type operations, defined by LU_TYPE_INIT_FINI(). */ -int lu_context_key_register_many(struct lu_context_key *k, ...); +int lu_context_key_register_many(struct lu_context_key *k, ...); void lu_context_key_degister_many(struct lu_context_key *k, ...); -void lu_context_key_revive_many (struct lu_context_key *k, ...); -void lu_context_key_quiesce_many (struct lu_context_key *k, ...); +void lu_context_key_revive_many(struct lu_context_key *k, ...); +void lu_context_key_quiesce_many(struct lu_context_key *k, ...); /** * Environment. @@ -1246,9 +1247,9 @@ struct lu_env { struct lu_context *le_ses; }; -int lu_env_init (struct lu_env *env, __u32 tags); -void lu_env_fini (struct lu_env *env); -int lu_env_refill(struct lu_env *env); +int lu_env_init(struct lu_env *env, __u32 tags); +void lu_env_fini(struct lu_env *env); +int lu_env_refill(struct lu_env *env); /** @} lu_context */ diff --git a/drivers/staging/lustre/lustre/include/lustre/lustre_idl.h b/drivers/staging/lustre/lustre/include/lustre/lustre_idl.h index 5aae1d06a5fa..9c53c1792dc8 100644 --- a/drivers/staging/lustre/lustre/include/lustre/lustre_idl.h +++ b/drivers/staging/lustre/lustre/include/lustre/lustre_idl.h @@ -183,6 +183,12 @@ struct lu_seq_range { __u32 lsr_flags; }; +struct lu_seq_range_array { + __u32 lsra_count; + __u32 lsra_padding; + struct lu_seq_range lsra_lsr[0]; +}; + #define LU_SEQ_RANGE_MDT 0x0 #define LU_SEQ_RANGE_OST 0x1 #define LU_SEQ_RANGE_ANY 0x3 @@ -578,7 +584,7 @@ static inline __u64 ostid_seq(const struct ost_id *ostid) if (fid_seq_is_mdt0(ostid->oi.oi_seq)) return FID_SEQ_OST_MDT0; - if (fid_seq_is_default(ostid->oi.oi_seq)) + if (unlikely(fid_seq_is_default(ostid->oi.oi_seq))) return FID_SEQ_LOV_DEFAULT; if (fid_is_idif(&ostid->oi_fid)) @@ -590,9 +596,12 @@ static inline __u64 ostid_seq(const struct ost_id *ostid) /* extract OST objid from a wire ost_id (id/seq) pair */ static inline __u64 ostid_id(const struct ost_id *ostid) { - if (fid_seq_is_mdt0(ostid_seq(ostid))) + if (fid_seq_is_mdt0(ostid->oi.oi_seq)) return ostid->oi.oi_id & IDIF_OID_MASK; + if (unlikely(fid_seq_is_default(ostid->oi.oi_seq))) + return ostid->oi.oi_id; + if (fid_is_idif(&ostid->oi_fid)) return fid_idif_id(fid_seq(&ostid->oi_fid), fid_oid(&ostid->oi_fid), 0); @@ -636,12 +645,22 @@ static inline void ostid_set_seq_llog(struct ost_id *oi) */ static inline void ostid_set_id(struct ost_id *oi, __u64 oid) { - if (fid_seq_is_mdt0(ostid_seq(oi))) { + if (fid_seq_is_mdt0(oi->oi.oi_seq)) { if (oid >= IDIF_MAX_OID) { CERROR("Bad %llu to set " DOSTID "\n", oid, POSTID(oi)); return; } oi->oi.oi_id = oid; + } else if (fid_is_idif(&oi->oi_fid)) { + if (oid >= IDIF_MAX_OID) { + CERROR("Bad %llu to set "DOSTID"\n", + oid, POSTID(oi)); + return; + } + oi->oi_fid.f_seq = fid_idif_seq(oid, + fid_idif_ost_idx(&oi->oi_fid)); + oi->oi_fid.f_oid = oid; + oi->oi_fid.f_ver = oid >> 48; } else { if (oid > OBIF_MAX_OID) { CERROR("Bad %llu to set " DOSTID "\n", oid, POSTID(oi)); @@ -651,25 +670,31 @@ static inline void ostid_set_id(struct ost_id *oi, __u64 oid) } } -static inline void ostid_inc_id(struct ost_id *oi) +static inline int fid_set_id(struct lu_fid *fid, __u64 oid) { - if (fid_seq_is_mdt0(ostid_seq(oi))) { - if (unlikely(ostid_id(oi) + 1 > IDIF_MAX_OID)) { - CERROR("Bad inc "DOSTID"\n", POSTID(oi)); - return; + if (unlikely(fid_seq_is_igif(fid->f_seq))) { + CERROR("bad IGIF, "DFID"\n", PFID(fid)); + return -EBADF; + } + + if (fid_is_idif(fid)) { + if (oid >= IDIF_MAX_OID) { + CERROR("Too large OID %#llx to set IDIF "DFID"\n", + (unsigned long long)oid, PFID(fid)); + return -EBADF; } - oi->oi.oi_id++; + fid->f_seq = fid_idif_seq(oid, fid_idif_ost_idx(fid)); + fid->f_oid = oid; + fid->f_ver = oid >> 48; } else { - oi->oi_fid.f_oid++; + if (oid > OBIF_MAX_OID) { + CERROR("Too large OID %#llx to set REG "DFID"\n", + (unsigned long long)oid, PFID(fid)); + return -EBADF; + } + fid->f_oid = oid; } -} - -static inline void ostid_dec_id(struct ost_id *oi) -{ - if (fid_seq_is_mdt0(ostid_seq(oi))) - oi->oi.oi_id--; - else - oi->oi_fid.f_oid--; + return 0; } /** @@ -684,30 +709,34 @@ static inline void ostid_dec_id(struct ost_id *oi) static inline int ostid_to_fid(struct lu_fid *fid, struct ost_id *ostid, __u32 ost_idx) { + __u64 seq = ostid_seq(ostid); + if (ost_idx > 0xffff) { CERROR("bad ost_idx, "DOSTID" ost_idx:%u\n", POSTID(ostid), ost_idx); return -EBADF; } - if (fid_seq_is_mdt0(ostid_seq(ostid))) { + if (fid_seq_is_mdt0(seq)) { + __u64 oid = ostid_id(ostid); + /* This is a "legacy" (old 1.x/2.early) OST object in "group 0" * that we map into the IDIF namespace. It allows up to 2^48 * objects per OST, as this is the object namespace that has * been in production for years. This can handle create rates * of 1M objects/s/OST for 9 years, or combinations thereof. */ - if (ostid_id(ostid) >= IDIF_MAX_OID) { + if (oid >= IDIF_MAX_OID) { CERROR("bad MDT0 id, " DOSTID " ost_idx:%u\n", POSTID(ostid), ost_idx); return -EBADF; } - fid->f_seq = fid_idif_seq(ostid_id(ostid), ost_idx); + fid->f_seq = fid_idif_seq(oid, ost_idx); /* truncate to 32 bits by assignment */ - fid->f_oid = ostid_id(ostid); + fid->f_oid = oid; /* in theory, not currently used */ - fid->f_ver = ostid_id(ostid) >> 48; - } else /* if (fid_seq_is_idif(seq) || fid_seq_is_norm(seq)) */ { + fid->f_ver = oid >> 48; + } else if (likely(!fid_seq_is_default(seq))) { /* This is either an IDIF object, which identifies objects across * all OSTs, or a regular FID. The IDIF namespace maps legacy * OST objects into the FID namespace. In both cases, we just @@ -1001,8 +1030,9 @@ static inline int lu_dirent_calc_size(int namelen, __u16 attr) size = (sizeof(struct lu_dirent) + namelen + align) & ~align; size += sizeof(struct luda_type); - } else + } else { size = sizeof(struct lu_dirent) + namelen; + } return (size + 7) & ~7; } @@ -1256,6 +1286,9 @@ void lustre_swab_ptlrpc_body(struct ptlrpc_body *pb); #define OBD_CONNECT_PINGLESS 0x4000000000000ULL/* pings not required */ #define OBD_CONNECT_FLOCK_DEAD 0x8000000000000ULL/* flock deadlock detection */ #define OBD_CONNECT_DISP_STRIPE 0x10000000000000ULL/*create stripe disposition*/ +#define OBD_CONNECT_OPEN_BY_FID 0x20000000000000ULL /* open by fid won't pack + * name in request + */ /* XXX README XXX: * Please DO NOT add flag values here before first ensuring that this same @@ -1428,6 +1461,8 @@ enum obdo_flags { */ OBD_FL_RECOV_RESEND = 0x00080000, /* recoverable resent */ OBD_FL_NOSPC_BLK = 0x00100000, /* no more block space on OST */ + OBD_FL_FLUSH = 0x00200000, /* flush pages on the OST */ + OBD_FL_SHORT_IO = 0x00400000, /* short io request */ /* Note that while these checksum values are currently separate bits, * in 2.x we can actually allow all values from 1-31 if we wanted. @@ -1525,6 +1560,11 @@ static inline void lmm_oi_set_seq(struct ost_id *oi, __u64 seq) oi->oi.oi_seq = seq; } +static inline void lmm_oi_set_id(struct ost_id *oi, __u64 oid) +{ + oi->oi.oi_id = oid; +} + static inline __u64 lmm_oi_id(struct ost_id *oi) { return oi->oi.oi_id; @@ -1732,6 +1772,11 @@ void lustre_swab_obd_statfs(struct obd_statfs *os); #define OBD_BRW_MEMALLOC 0x800 /* Client runs in the "kswapd" context */ #define OBD_BRW_OVER_USRQUOTA 0x1000 /* Running out of user quota */ #define OBD_BRW_OVER_GRPQUOTA 0x2000 /* Running out of group quota */ +#define OBD_BRW_SOFT_SYNC 0x4000 /* This flag notifies the server + * that the client is running low on + * space for unstable pages; asking + * it to sync quickly + */ #define OBD_OBJECT_EOF 0xffffffffffffffffULL @@ -2436,6 +2481,7 @@ struct mdt_rec_reint { void lustre_swab_mdt_rec_reint(struct mdt_rec_reint *rr); +/* lmv structures */ struct lmv_desc { __u32 ld_tgt_count; /* how many MDS's */ __u32 ld_active_tgt_count; /* how many active */ @@ -2460,7 +2506,6 @@ struct lmv_stripe_md { struct lu_fid mea_ids[0]; }; -/* lmv structures */ #define MEA_MAGIC_LAST_CHAR 0xb2221ca1 #define MEA_MAGIC_ALL_CHARS 0xb222a11c #define MEA_MAGIC_HASH_SEGMENT 0xb222a11b @@ -2470,9 +2515,10 @@ struct lmv_stripe_md { #define MAX_HASH_HIGHEST_BIT 0x1000000000000000ULL enum fld_rpc_opc { - FLD_QUERY = 900, + FLD_QUERY = 900, + FLD_READ = 901, FLD_LAST_OPC, - FLD_FIRST_OPC = FLD_QUERY + FLD_FIRST_OPC = FLD_QUERY }; enum seq_rpc_opc { @@ -2486,6 +2532,12 @@ enum seq_op { SEQ_ALLOC_META = 1 }; +enum fld_op { + FLD_CREATE = 0, + FLD_DELETE = 1, + FLD_LOOKUP = 2, +}; + /* * LOV data structures */ @@ -2582,6 +2634,8 @@ struct ldlm_extent { __u64 gid; }; +#define LDLM_GID_ANY ((__u64)-1) + static inline int ldlm_extent_overlap(struct ldlm_extent *ex1, struct ldlm_extent *ex2) { @@ -3304,7 +3358,7 @@ struct getinfo_fid2path { char gf_path[0]; } __packed; -void lustre_swab_fid2path (struct getinfo_fid2path *gf); +void lustre_swab_fid2path(struct getinfo_fid2path *gf); enum { LAYOUT_INTENT_ACCESS = 0, diff --git a/drivers/staging/lustre/lustre/include/lustre/lustre_user.h b/drivers/staging/lustre/lustre/include/lustre/lustre_user.h index 276906e646f5..59ba48ac31a7 100644 --- a/drivers/staging/lustre/lustre/include/lustre/lustre_user.h +++ b/drivers/staging/lustre/lustre/include/lustre/lustre_user.h @@ -193,37 +193,37 @@ struct ost_id { * *INFO - set/get lov_user_mds_data */ /* see <lustre_lib.h> for ioctl numberss 101-150 */ -#define LL_IOC_GETFLAGS _IOR ('f', 151, long) -#define LL_IOC_SETFLAGS _IOW ('f', 152, long) -#define LL_IOC_CLRFLAGS _IOW ('f', 153, long) +#define LL_IOC_GETFLAGS _IOR('f', 151, long) +#define LL_IOC_SETFLAGS _IOW('f', 152, long) +#define LL_IOC_CLRFLAGS _IOW('f', 153, long) /* LL_IOC_LOV_SETSTRIPE: See also OBD_IOC_LOV_SETSTRIPE */ -#define LL_IOC_LOV_SETSTRIPE _IOW ('f', 154, long) +#define LL_IOC_LOV_SETSTRIPE _IOW('f', 154, long) /* LL_IOC_LOV_GETSTRIPE: See also OBD_IOC_LOV_GETSTRIPE */ -#define LL_IOC_LOV_GETSTRIPE _IOW ('f', 155, long) +#define LL_IOC_LOV_GETSTRIPE _IOW('f', 155, long) /* LL_IOC_LOV_SETEA: See also OBD_IOC_LOV_SETEA */ -#define LL_IOC_LOV_SETEA _IOW ('f', 156, long) -#define LL_IOC_RECREATE_OBJ _IOW ('f', 157, long) -#define LL_IOC_RECREATE_FID _IOW ('f', 157, struct lu_fid) -#define LL_IOC_GROUP_LOCK _IOW ('f', 158, long) -#define LL_IOC_GROUP_UNLOCK _IOW ('f', 159, long) +#define LL_IOC_LOV_SETEA _IOW('f', 156, long) +#define LL_IOC_RECREATE_OBJ _IOW('f', 157, long) +#define LL_IOC_RECREATE_FID _IOW('f', 157, struct lu_fid) +#define LL_IOC_GROUP_LOCK _IOW('f', 158, long) +#define LL_IOC_GROUP_UNLOCK _IOW('f', 159, long) /* LL_IOC_QUOTACHECK: See also OBD_IOC_QUOTACHECK */ -#define LL_IOC_QUOTACHECK _IOW ('f', 160, int) +#define LL_IOC_QUOTACHECK _IOW('f', 160, int) /* LL_IOC_POLL_QUOTACHECK: See also OBD_IOC_POLL_QUOTACHECK */ -#define LL_IOC_POLL_QUOTACHECK _IOR ('f', 161, struct if_quotacheck *) +#define LL_IOC_POLL_QUOTACHECK _IOR('f', 161, struct if_quotacheck *) /* LL_IOC_QUOTACTL: See also OBD_IOC_QUOTACTL */ #define LL_IOC_QUOTACTL _IOWR('f', 162, struct if_quotactl) #define IOC_OBD_STATFS _IOWR('f', 164, struct obd_statfs *) #define IOC_LOV_GETINFO _IOWR('f', 165, struct lov_user_mds_data *) -#define LL_IOC_FLUSHCTX _IOW ('f', 166, long) -#define LL_IOC_RMTACL _IOW ('f', 167, long) -#define LL_IOC_GETOBDCOUNT _IOR ('f', 168, long) +#define LL_IOC_FLUSHCTX _IOW('f', 166, long) +#define LL_IOC_RMTACL _IOW('f', 167, long) +#define LL_IOC_GETOBDCOUNT _IOR('f', 168, long) #define LL_IOC_LLOOP_ATTACH _IOWR('f', 169, long) #define LL_IOC_LLOOP_DETACH _IOWR('f', 170, long) #define LL_IOC_LLOOP_INFO _IOWR('f', 171, struct lu_fid) #define LL_IOC_LLOOP_DETACH_BYDEV _IOWR('f', 172, long) -#define LL_IOC_PATH2FID _IOR ('f', 173, long) +#define LL_IOC_PATH2FID _IOR('f', 173, long) #define LL_IOC_GET_CONNECT_FLAGS _IOWR('f', 174, __u64 *) -#define LL_IOC_GET_MDTIDX _IOR ('f', 175, int) +#define LL_IOC_GET_MDTIDX _IOR('f', 175, int) /* see <lustre_lib.h> for ioctl numbers 177-210 */ @@ -676,7 +676,12 @@ static inline const char *changelog_type2str(int type) #define CLF_UNLINK_HSM_EXISTS 0x0002 /* File has something in HSM */ /* HSM cleaning needed */ /* Flags for rename */ -#define CLF_RENAME_LAST 0x0001 /* rename unlink last hardlink of target */ +#define CLF_RENAME_LAST 0x0001 /* rename unlink last hardlink of + * target + */ +#define CLF_RENAME_LAST_EXISTS 0x0002 /* rename unlink last hardlink of target + * has an archive in backend + */ /* Flags for HSM */ /* 12b used (from high weight to low weight): @@ -833,9 +838,8 @@ struct ioc_data_version { __u64 idv_flags; /* See LL_DV_xxx */ }; -#define LL_DV_NOFLUSH 0x01 /* Do not take READ EXTENT LOCK before sampling - * version. Dirty caches are left unchanged. - */ +#define LL_DV_RD_FLUSH BIT(0) /* Flush dirty pages from clients */ +#define LL_DV_WR_FLUSH BIT(1) /* Flush all caching pages from clients */ #ifndef offsetof # define offsetof(typ, memb) ((unsigned long)((char *)&(((typ *)0)->memb))) @@ -1095,12 +1099,12 @@ struct hsm_action_list { __u32 padding1; char hal_fsname[0]; /* null-terminated */ /* struct hsm_action_item[hal_count] follows, aligned on 8-byte - * boundaries. See hai_zero + * boundaries. See hai_first */ } __packed; #ifndef HAVE_CFS_SIZE_ROUND -static inline int cfs_size_round (int val) +static inline int cfs_size_round(int val) { return (val + 7) & (~0x7); } @@ -1109,7 +1113,7 @@ static inline int cfs_size_round (int val) #endif /* Return pointer to first hai in action list */ -static inline struct hsm_action_item *hai_zero(struct hsm_action_list *hal) +static inline struct hsm_action_item *hai_first(struct hsm_action_list *hal) { return (struct hsm_action_item *)(hal->hal_fsname + cfs_size_round(strlen(hal-> \ @@ -1131,7 +1135,7 @@ static inline int hal_size(struct hsm_action_list *hal) struct hsm_action_item *hai; sz = sizeof(*hal) + cfs_size_round(strlen(hal->hal_fsname) + 1); - hai = hai_zero(hal); + hai = hai_first(hal); for (i = 0; i < hal->hal_count; i++, hai = hai_next(hai)) sz += cfs_size_round(hai->hai_len); diff --git a/drivers/staging/lustre/lustre/include/lustre_cfg.h b/drivers/staging/lustre/lustre/include/lustre_cfg.h index bb16ae980b98..e229e91f7f56 100644 --- a/drivers/staging/lustre/lustre/include/lustre_cfg.h +++ b/drivers/staging/lustre/lustre/include/lustre_cfg.h @@ -161,7 +161,7 @@ static inline void *lustre_cfg_buf(struct lustre_cfg *lcfg, int index) int offset; int bufcount; - LASSERT (index >= 0); + LASSERT(index >= 0); bufcount = lcfg->lcfg_bufcount; if (index >= bufcount) diff --git a/drivers/staging/lustre/lustre/include/lustre_disk.h b/drivers/staging/lustre/lustre/include/lustre_disk.h index 95fd36063f55..b36821ffb252 100644 --- a/drivers/staging/lustre/lustre/include/lustre_disk.h +++ b/drivers/staging/lustre/lustre/include/lustre_disk.h @@ -130,7 +130,6 @@ struct lustre_sb_info { struct lustre_mount_data *lsi_lmd; /* mount command info */ struct ll_sb_info *lsi_llsbi; /* add'l client sbi info */ struct dt_device *lsi_dt_dev; /* dt device to access disk fs*/ - struct vfsmount *lsi_srv_mnt; /* the one server mount */ atomic_t lsi_mounts; /* references to the srv_mnt */ char lsi_svname[MTI_NAME_MAXLEN]; char lsi_osd_obdname[64]; @@ -158,7 +157,6 @@ struct lustre_sb_info { struct lustre_mount_info { char *lmi_name; struct super_block *lmi_sb; - struct vfsmount *lmi_mnt; struct list_head lmi_list_chain; }; diff --git a/drivers/staging/lustre/lustre/include/lustre_dlm.h b/drivers/staging/lustre/lustre/include/lustre_dlm.h index 8b0364f71129..9cade144faca 100644 --- a/drivers/staging/lustre/lustre/include/lustre_dlm.h +++ b/drivers/staging/lustre/lustre/include/lustre_dlm.h @@ -71,6 +71,7 @@ struct obd_device; */ enum ldlm_error { ELDLM_OK = 0, + ELDLM_LOCK_MATCHED = 1, ELDLM_LOCK_CHANGED = 300, ELDLM_LOCK_ABORTED = 301, @@ -269,7 +270,7 @@ struct ldlm_pool { struct completion pl_kobj_unregister; }; -typedef int (*ldlm_cancel_for_recovery)(struct ldlm_lock *lock); +typedef int (*ldlm_cancel_cbt)(struct ldlm_lock *lock); /** * LVB operations. @@ -446,8 +447,11 @@ struct ldlm_namespace { /** Limit of parallel AST RPC count. */ unsigned ns_max_parallel_ast; - /** Callback to cancel locks before replaying it during recovery. */ - ldlm_cancel_for_recovery ns_cancel_for_recovery; + /** + * Callback to check if a lock is good to be canceled by ELC or + * during recovery. + */ + ldlm_cancel_cbt ns_cancel; /** LDLM lock stats */ struct lprocfs_stats *ns_stats; @@ -479,9 +483,9 @@ static inline int ns_connect_lru_resize(struct ldlm_namespace *ns) } static inline void ns_register_cancel(struct ldlm_namespace *ns, - ldlm_cancel_for_recovery arg) + ldlm_cancel_cbt arg) { - ns->ns_cancel_for_recovery = arg; + ns->ns_cancel = arg; } struct ldlm_lock; diff --git a/drivers/staging/lustre/lustre/include/lustre_dlm_flags.h b/drivers/staging/lustre/lustre/include/lustre_dlm_flags.h index 7f2ba2ffe0eb..e7e0c21a9b40 100644 --- a/drivers/staging/lustre/lustre/include/lustre_dlm_flags.h +++ b/drivers/staging/lustre/lustre/include/lustre_dlm_flags.h @@ -37,17 +37,11 @@ /** l_flags bits marked as "gone" bits */ #define LDLM_FL_GONE_MASK 0x0006004000000000ULL -/** l_flags bits marked as "hide_lock" bits */ -#define LDLM_FL_HIDE_LOCK_MASK 0x0000206400000000ULL - /** l_flags bits marked as "inherit" bits */ #define LDLM_FL_INHERIT_MASK 0x0000000000800000ULL -/** l_flags bits marked as "local_only" bits */ -#define LDLM_FL_LOCAL_ONLY_MASK 0x00FFFFFF00000000ULL - -/** l_flags bits marked as "on_wire" bits */ -#define LDLM_FL_ON_WIRE_MASK 0x00000000C08F932FULL +/** l_flags bits marked as "off_wire" bits */ +#define LDLM_FL_OFF_WIRE_MASK 0x00FFFFFF00000000ULL /** extent, mode, or resource changed */ #define LDLM_FL_LOCK_CHANGED 0x0000000000000001ULL /* bit 0 */ @@ -204,7 +198,7 @@ #define ldlm_set_cancel(_l) LDLM_SET_FLAG((_l), 1ULL << 36) #define ldlm_clear_cancel(_l) LDLM_CLEAR_FLAG((_l), 1ULL << 36) -/** whatever it might mean */ +/** whatever it might mean -- never transmitted? */ #define LDLM_FL_LOCAL_ONLY 0x0000002000000000ULL /* bit 37 */ #define ldlm_is_local_only(_l) LDLM_TEST_FLAG((_l), 1ULL << 37) #define ldlm_set_local_only(_l) LDLM_SET_FLAG((_l), 1ULL << 37) @@ -287,18 +281,18 @@ * has canceled this lock and is waiting for rpc_lock which is taken by * the first operation. LDLM_FL_BL_AST is set by ldlm_callback_handler() in * the lock to prevent the Early Lock Cancel (ELC) code from cancelling it. - * - * LDLM_FL_BL_DONE is to be set by ldlm_cancel_callback() when lock cache is - * dropped to let ldlm_callback_handler() return EINVAL to the server. It - * is used when ELC RPC is already prepared and is waiting for rpc_lock, - * too late to send a separate CANCEL RPC. */ #define LDLM_FL_BL_AST 0x0000400000000000ULL /* bit 46 */ #define ldlm_is_bl_ast(_l) LDLM_TEST_FLAG((_l), 1ULL << 46) #define ldlm_set_bl_ast(_l) LDLM_SET_FLAG((_l), 1ULL << 46) #define ldlm_clear_bl_ast(_l) LDLM_CLEAR_FLAG((_l), 1ULL << 46) -/** whatever it might mean */ +/** + * Set by ldlm_cancel_callback() when lock cache is dropped to let + * ldlm_callback_handler() return EINVAL to the server. It is used when + * ELC RPC is already prepared and is waiting for rpc_lock, too late to + * send a separate CANCEL RPC. + */ #define LDLM_FL_BL_DONE 0x0000800000000000ULL /* bit 47 */ #define ldlm_is_bl_done(_l) LDLM_TEST_FLAG((_l), 1ULL << 47) #define ldlm_set_bl_done(_l) LDLM_SET_FLAG((_l), 1ULL << 47) @@ -381,104 +375,16 @@ /** test for ldlm_lock flag bit set */ #define LDLM_TEST_FLAG(_l, _b) (((_l)->l_flags & (_b)) != 0) +/** multi-bit test: are any of mask bits set? */ +#define LDLM_HAVE_MASK(_l, _m) ((_l)->l_flags & LDLM_FL_##_m##_MASK) + /** set a ldlm_lock flag bit */ #define LDLM_SET_FLAG(_l, _b) ((_l)->l_flags |= (_b)) /** clear a ldlm_lock flag bit */ #define LDLM_CLEAR_FLAG(_l, _b) ((_l)->l_flags &= ~(_b)) -/** Mask of flags inherited from parent lock when doing intents. */ -#define LDLM_INHERIT_FLAGS LDLM_FL_INHERIT_MASK - -/** Mask of Flags sent in AST lock_flags to map into the receiving lock. */ -#define LDLM_AST_FLAGS LDLM_FL_AST_MASK - /** @} subgroup */ /** @} group */ -#ifdef WIRESHARK_COMPILE -static int hf_lustre_ldlm_fl_lock_changed = -1; -static int hf_lustre_ldlm_fl_block_granted = -1; -static int hf_lustre_ldlm_fl_block_conv = -1; -static int hf_lustre_ldlm_fl_block_wait = -1; -static int hf_lustre_ldlm_fl_ast_sent = -1; -static int hf_lustre_ldlm_fl_replay = -1; -static int hf_lustre_ldlm_fl_intent_only = -1; -static int hf_lustre_ldlm_fl_has_intent = -1; -static int hf_lustre_ldlm_fl_flock_deadlock = -1; -static int hf_lustre_ldlm_fl_discard_data = -1; -static int hf_lustre_ldlm_fl_no_timeout = -1; -static int hf_lustre_ldlm_fl_block_nowait = -1; -static int hf_lustre_ldlm_fl_test_lock = -1; -static int hf_lustre_ldlm_fl_cancel_on_block = -1; -static int hf_lustre_ldlm_fl_deny_on_contention = -1; -static int hf_lustre_ldlm_fl_ast_discard_data = -1; -static int hf_lustre_ldlm_fl_fail_loc = -1; -static int hf_lustre_ldlm_fl_skipped = -1; -static int hf_lustre_ldlm_fl_cbpending = -1; -static int hf_lustre_ldlm_fl_wait_noreproc = -1; -static int hf_lustre_ldlm_fl_cancel = -1; -static int hf_lustre_ldlm_fl_local_only = -1; -static int hf_lustre_ldlm_fl_failed = -1; -static int hf_lustre_ldlm_fl_canceling = -1; -static int hf_lustre_ldlm_fl_local = -1; -static int hf_lustre_ldlm_fl_lvb_ready = -1; -static int hf_lustre_ldlm_fl_kms_ignore = -1; -static int hf_lustre_ldlm_fl_cp_reqd = -1; -static int hf_lustre_ldlm_fl_cleaned = -1; -static int hf_lustre_ldlm_fl_atomic_cb = -1; -static int hf_lustre_ldlm_fl_bl_ast = -1; -static int hf_lustre_ldlm_fl_bl_done = -1; -static int hf_lustre_ldlm_fl_no_lru = -1; -static int hf_lustre_ldlm_fl_fail_notified = -1; -static int hf_lustre_ldlm_fl_destroyed = -1; -static int hf_lustre_ldlm_fl_server_lock = -1; -static int hf_lustre_ldlm_fl_res_locked = -1; -static int hf_lustre_ldlm_fl_waited = -1; -static int hf_lustre_ldlm_fl_ns_srv = -1; -static int hf_lustre_ldlm_fl_excl = -1; - -const value_string lustre_ldlm_flags_vals[] = { - {LDLM_FL_LOCK_CHANGED, "LDLM_FL_LOCK_CHANGED"}, - {LDLM_FL_BLOCK_GRANTED, "LDLM_FL_BLOCK_GRANTED"}, - {LDLM_FL_BLOCK_CONV, "LDLM_FL_BLOCK_CONV"}, - {LDLM_FL_BLOCK_WAIT, "LDLM_FL_BLOCK_WAIT"}, - {LDLM_FL_AST_SENT, "LDLM_FL_AST_SENT"}, - {LDLM_FL_REPLAY, "LDLM_FL_REPLAY"}, - {LDLM_FL_INTENT_ONLY, "LDLM_FL_INTENT_ONLY"}, - {LDLM_FL_HAS_INTENT, "LDLM_FL_HAS_INTENT"}, - {LDLM_FL_FLOCK_DEADLOCK, "LDLM_FL_FLOCK_DEADLOCK"}, - {LDLM_FL_DISCARD_DATA, "LDLM_FL_DISCARD_DATA"}, - {LDLM_FL_NO_TIMEOUT, "LDLM_FL_NO_TIMEOUT"}, - {LDLM_FL_BLOCK_NOWAIT, "LDLM_FL_BLOCK_NOWAIT"}, - {LDLM_FL_TEST_LOCK, "LDLM_FL_TEST_LOCK"}, - {LDLM_FL_CANCEL_ON_BLOCK, "LDLM_FL_CANCEL_ON_BLOCK"}, - {LDLM_FL_DENY_ON_CONTENTION, "LDLM_FL_DENY_ON_CONTENTION"}, - {LDLM_FL_AST_DISCARD_DATA, "LDLM_FL_AST_DISCARD_DATA"}, - {LDLM_FL_FAIL_LOC, "LDLM_FL_FAIL_LOC"}, - {LDLM_FL_SKIPPED, "LDLM_FL_SKIPPED"}, - {LDLM_FL_CBPENDING, "LDLM_FL_CBPENDING"}, - {LDLM_FL_WAIT_NOREPROC, "LDLM_FL_WAIT_NOREPROC"}, - {LDLM_FL_CANCEL, "LDLM_FL_CANCEL"}, - {LDLM_FL_LOCAL_ONLY, "LDLM_FL_LOCAL_ONLY"}, - {LDLM_FL_FAILED, "LDLM_FL_FAILED"}, - {LDLM_FL_CANCELING, "LDLM_FL_CANCELING"}, - {LDLM_FL_LOCAL, "LDLM_FL_LOCAL"}, - {LDLM_FL_LVB_READY, "LDLM_FL_LVB_READY"}, - {LDLM_FL_KMS_IGNORE, "LDLM_FL_KMS_IGNORE"}, - {LDLM_FL_CP_REQD, "LDLM_FL_CP_REQD"}, - {LDLM_FL_CLEANED, "LDLM_FL_CLEANED"}, - {LDLM_FL_ATOMIC_CB, "LDLM_FL_ATOMIC_CB"}, - {LDLM_FL_BL_AST, "LDLM_FL_BL_AST"}, - {LDLM_FL_BL_DONE, "LDLM_FL_BL_DONE"}, - {LDLM_FL_NO_LRU, "LDLM_FL_NO_LRU"}, - {LDLM_FL_FAIL_NOTIFIED, "LDLM_FL_FAIL_NOTIFIED"}, - {LDLM_FL_DESTROYED, "LDLM_FL_DESTROYED"}, - {LDLM_FL_SERVER_LOCK, "LDLM_FL_SERVER_LOCK"}, - {LDLM_FL_RES_LOCKED, "LDLM_FL_RES_LOCKED"}, - {LDLM_FL_WAITED, "LDLM_FL_WAITED"}, - {LDLM_FL_NS_SRV, "LDLM_FL_NS_SRV"}, - {LDLM_FL_EXCL, "LDLM_FL_EXCL"}, - { 0, NULL } -}; -#endif /* WIRESHARK_COMPILE */ + #endif /* LDLM_ALL_FLAGS_MASK */ diff --git a/drivers/staging/lustre/lustre/include/lustre_fid.h b/drivers/staging/lustre/lustre/include/lustre_fid.h index ab4a92390a43..12e8b585c2b4 100644 --- a/drivers/staging/lustre/lustre/include/lustre_fid.h +++ b/drivers/staging/lustre/lustre/include/lustre_fid.h @@ -308,10 +308,10 @@ static inline int fid_seq_in_fldb(__u64 seq) fid_seq_is_root(seq) || fid_seq_is_dot(seq); } -static inline void lu_last_id_fid(struct lu_fid *fid, __u64 seq) +static inline void lu_last_id_fid(struct lu_fid *fid, __u64 seq, __u32 ost_idx) { if (fid_seq_is_mdt0(seq)) { - fid->f_seq = fid_idif_seq(0, 0); + fid->f_seq = fid_idif_seq(0, ost_idx); } else { LASSERTF(fid_seq_is_norm(seq) || fid_seq_is_echo(seq) || fid_seq_is_idif(seq), "%#llx\n", seq); @@ -498,19 +498,6 @@ static inline void ostid_build_res_name(struct ost_id *oi, } } -static inline void ostid_res_name_to_id(struct ost_id *oi, - struct ldlm_res_id *name) -{ - if (fid_seq_is_mdt0(name->name[LUSTRE_RES_ID_SEQ_OFF])) { - /* old resid */ - ostid_set_seq(oi, name->name[LUSTRE_RES_ID_VER_OID_OFF]); - ostid_set_id(oi, name->name[LUSTRE_RES_ID_SEQ_OFF]); - } else { - /* new resid */ - fid_extract_from_res_name(&oi->oi_fid, name); - } -} - /** * Return true if the resource is for the object identified by this id & group. */ @@ -546,7 +533,8 @@ static inline void ost_fid_build_resid(const struct lu_fid *fid, } static inline void ost_fid_from_resid(struct lu_fid *fid, - const struct ldlm_res_id *name) + const struct ldlm_res_id *name, + int ost_idx) { if (fid_seq_is_mdt0(name->name[LUSTRE_RES_ID_VER_OID_OFF])) { /* old resid */ @@ -554,7 +542,7 @@ static inline void ost_fid_from_resid(struct lu_fid *fid, ostid_set_seq(&oi, name->name[LUSTRE_RES_ID_VER_OID_OFF]); ostid_set_id(&oi, name->name[LUSTRE_RES_ID_SEQ_OFF]); - ostid_to_fid(fid, &oi, 0); + ostid_to_fid(fid, &oi, ost_idx); } else { /* new resid */ fid_extract_from_res_name(fid, name); diff --git a/drivers/staging/lustre/lustre/include/lustre_import.h b/drivers/staging/lustre/lustre/include/lustre_import.h index dac2d84d8266..8325c82b3ebf 100644 --- a/drivers/staging/lustre/lustre/include/lustre_import.h +++ b/drivers/staging/lustre/lustre/include/lustre_import.h @@ -109,7 +109,7 @@ static inline char *ptlrpc_import_state_name(enum lustre_imp_state state) "RECOVER", "FULL", "EVICTED", }; - LASSERT (state <= LUSTRE_IMP_EVICTED); + LASSERT(state <= LUSTRE_IMP_EVICTED); return import_state_names[state]; } diff --git a/drivers/staging/lustre/lustre/include/lustre_lib.h b/drivers/staging/lustre/lustre/include/lustre_lib.h index f2223d55850a..00b976766aef 100644 --- a/drivers/staging/lustre/lustre/include/lustre_lib.h +++ b/drivers/staging/lustre/lustre/include/lustre_lib.h @@ -280,16 +280,16 @@ static inline void obd_ioctl_freedata(char *buf, int len) #define OBD_IOC_DATA_TYPE long #define OBD_IOC_CREATE _IOWR('f', 101, OBD_IOC_DATA_TYPE) -#define OBD_IOC_DESTROY _IOW ('f', 104, OBD_IOC_DATA_TYPE) +#define OBD_IOC_DESTROY _IOW('f', 104, OBD_IOC_DATA_TYPE) #define OBD_IOC_PREALLOCATE _IOWR('f', 105, OBD_IOC_DATA_TYPE) -#define OBD_IOC_SETATTR _IOW ('f', 107, OBD_IOC_DATA_TYPE) +#define OBD_IOC_SETATTR _IOW('f', 107, OBD_IOC_DATA_TYPE) #define OBD_IOC_GETATTR _IOWR ('f', 108, OBD_IOC_DATA_TYPE) #define OBD_IOC_READ _IOWR('f', 109, OBD_IOC_DATA_TYPE) #define OBD_IOC_WRITE _IOWR('f', 110, OBD_IOC_DATA_TYPE) #define OBD_IOC_STATFS _IOWR('f', 113, OBD_IOC_DATA_TYPE) -#define OBD_IOC_SYNC _IOW ('f', 114, OBD_IOC_DATA_TYPE) +#define OBD_IOC_SYNC _IOW('f', 114, OBD_IOC_DATA_TYPE) #define OBD_IOC_READ2 _IOWR('f', 115, OBD_IOC_DATA_TYPE) #define OBD_IOC_FORMAT _IOWR('f', 116, OBD_IOC_DATA_TYPE) #define OBD_IOC_PARTITION _IOWR('f', 117, OBD_IOC_DATA_TYPE) @@ -308,13 +308,13 @@ static inline void obd_ioctl_freedata(char *buf, int len) #define OBD_IOC_GETDTNAME OBD_IOC_GETNAME #define OBD_IOC_LOV_GET_CONFIG _IOWR('f', 132, OBD_IOC_DATA_TYPE) -#define OBD_IOC_CLIENT_RECOVER _IOW ('f', 133, OBD_IOC_DATA_TYPE) -#define OBD_IOC_PING_TARGET _IOW ('f', 136, OBD_IOC_DATA_TYPE) +#define OBD_IOC_CLIENT_RECOVER _IOW('f', 133, OBD_IOC_DATA_TYPE) +#define OBD_IOC_PING_TARGET _IOW('f', 136, OBD_IOC_DATA_TYPE) #define OBD_IOC_DEC_FS_USE_COUNT _IO ('f', 139) -#define OBD_IOC_NO_TRANSNO _IOW ('f', 140, OBD_IOC_DATA_TYPE) -#define OBD_IOC_SET_READONLY _IOW ('f', 141, OBD_IOC_DATA_TYPE) -#define OBD_IOC_ABORT_RECOVERY _IOR ('f', 142, OBD_IOC_DATA_TYPE) +#define OBD_IOC_NO_TRANSNO _IOW('f', 140, OBD_IOC_DATA_TYPE) +#define OBD_IOC_SET_READONLY _IOW('f', 141, OBD_IOC_DATA_TYPE) +#define OBD_IOC_ABORT_RECOVERY _IOR('f', 142, OBD_IOC_DATA_TYPE) #define OBD_IOC_ROOT_SQUASH _IOWR('f', 143, OBD_IOC_DATA_TYPE) @@ -324,27 +324,27 @@ static inline void obd_ioctl_freedata(char *buf, int len) #define OBD_IOC_CLOSE_UUID _IOWR ('f', 147, OBD_IOC_DATA_TYPE) -#define OBD_IOC_CHANGELOG_SEND _IOW ('f', 148, OBD_IOC_DATA_TYPE) +#define OBD_IOC_CHANGELOG_SEND _IOW('f', 148, OBD_IOC_DATA_TYPE) #define OBD_IOC_GETDEVICE _IOWR ('f', 149, OBD_IOC_DATA_TYPE) #define OBD_IOC_FID2PATH _IOWR ('f', 150, OBD_IOC_DATA_TYPE) /* see also <lustre/lustre_user.h> for ioctls 151-153 */ /* OBD_IOC_LOV_SETSTRIPE: See also LL_IOC_LOV_SETSTRIPE */ -#define OBD_IOC_LOV_SETSTRIPE _IOW ('f', 154, OBD_IOC_DATA_TYPE) +#define OBD_IOC_LOV_SETSTRIPE _IOW('f', 154, OBD_IOC_DATA_TYPE) /* OBD_IOC_LOV_GETSTRIPE: See also LL_IOC_LOV_GETSTRIPE */ -#define OBD_IOC_LOV_GETSTRIPE _IOW ('f', 155, OBD_IOC_DATA_TYPE) +#define OBD_IOC_LOV_GETSTRIPE _IOW('f', 155, OBD_IOC_DATA_TYPE) /* OBD_IOC_LOV_SETEA: See also LL_IOC_LOV_SETEA */ -#define OBD_IOC_LOV_SETEA _IOW ('f', 156, OBD_IOC_DATA_TYPE) +#define OBD_IOC_LOV_SETEA _IOW('f', 156, OBD_IOC_DATA_TYPE) /* see <lustre/lustre_user.h> for ioctls 157-159 */ /* OBD_IOC_QUOTACHECK: See also LL_IOC_QUOTACHECK */ -#define OBD_IOC_QUOTACHECK _IOW ('f', 160, int) +#define OBD_IOC_QUOTACHECK _IOW('f', 160, int) /* OBD_IOC_POLL_QUOTACHECK: See also LL_IOC_POLL_QUOTACHECK */ -#define OBD_IOC_POLL_QUOTACHECK _IOR ('f', 161, struct if_quotacheck *) +#define OBD_IOC_POLL_QUOTACHECK _IOR('f', 161, struct if_quotacheck *) /* OBD_IOC_QUOTACTL: See also LL_IOC_QUOTACTL */ #define OBD_IOC_QUOTACTL _IOWR('f', 162, struct if_quotactl) /* see also <lustre/lustre_user.h> for ioctls 163-176 */ -#define OBD_IOC_CHANGELOG_REG _IOW ('f', 177, struct obd_ioctl_data) -#define OBD_IOC_CHANGELOG_DEREG _IOW ('f', 178, struct obd_ioctl_data) -#define OBD_IOC_CHANGELOG_CLEAR _IOW ('f', 179, struct obd_ioctl_data) +#define OBD_IOC_CHANGELOG_REG _IOW('f', 177, struct obd_ioctl_data) +#define OBD_IOC_CHANGELOG_DEREG _IOW('f', 178, struct obd_ioctl_data) +#define OBD_IOC_CHANGELOG_CLEAR _IOW('f', 179, struct obd_ioctl_data) #define OBD_IOC_RECORD _IOWR('f', 180, OBD_IOC_DATA_TYPE) #define OBD_IOC_ENDRECORD _IOWR('f', 181, OBD_IOC_DATA_TYPE) #define OBD_IOC_PARSE _IOWR('f', 182, OBD_IOC_DATA_TYPE) @@ -352,7 +352,7 @@ static inline void obd_ioctl_freedata(char *buf, int len) #define OBD_IOC_PROCESS_CFG _IOWR('f', 184, OBD_IOC_DATA_TYPE) #define OBD_IOC_DUMP_LOG _IOWR('f', 185, OBD_IOC_DATA_TYPE) #define OBD_IOC_CLEAR_LOG _IOWR('f', 186, OBD_IOC_DATA_TYPE) -#define OBD_IOC_PARAM _IOW ('f', 187, OBD_IOC_DATA_TYPE) +#define OBD_IOC_PARAM _IOW('f', 187, OBD_IOC_DATA_TYPE) #define OBD_IOC_POOL _IOWR('f', 188, OBD_IOC_DATA_TYPE) #define OBD_IOC_REPLACE_NIDS _IOWR('f', 189, OBD_IOC_DATA_TYPE) @@ -522,6 +522,28 @@ struct l_wait_info { sigmask(SIGTERM) | sigmask(SIGQUIT) | \ sigmask(SIGALRM)) +/** + * wait_queue_t of Linux (version < 2.6.34) is a FIFO list for exclusively + * waiting threads, which is not always desirable because all threads will + * be waken up again and again, even user only needs a few of them to be + * active most time. This is not good for performance because cache can + * be polluted by different threads. + * + * LIFO list can resolve this problem because we always wakeup the most + * recent active thread by default. + * + * NB: please don't call non-exclusive & exclusive wait on the same + * waitq if add_wait_queue_exclusive_head is used. + */ +#define add_wait_queue_exclusive_head(waitq, link) \ +{ \ + unsigned long flags; \ + \ + spin_lock_irqsave(&((waitq)->lock), flags); \ + __add_wait_queue_exclusive(waitq, link); \ + spin_unlock_irqrestore(&((waitq)->lock), flags); \ +} + /* * wait for @condition to become true, but no longer than timeout, specified * by @info. @@ -578,7 +600,7 @@ do { \ \ if (condition) \ break; \ - if (cfs_signal_pending()) { \ + if (signal_pending(current)) { \ if (info->lwi_on_signal && \ (__timeout == 0 || __allow_intr)) { \ if (info->lwi_on_signal != LWI_ON_SIGNAL_NOOP) \ diff --git a/drivers/staging/lustre/lustre/include/lustre_mdc.h b/drivers/staging/lustre/lustre/include/lustre_mdc.h index af77eb359c43..f267ff8a6ec8 100644 --- a/drivers/staging/lustre/lustre/include/lustre_mdc.h +++ b/drivers/staging/lustre/lustre/include/lustre_mdc.h @@ -64,9 +64,27 @@ struct obd_export; struct ptlrpc_request; struct obd_device; +/** + * Serializes in-flight MDT-modifying RPC requests to preserve idempotency. + * + * This mutex is used to implement execute-once semantics on the MDT. + * The MDT stores the last transaction ID and result for every client in + * its last_rcvd file. If the client doesn't get a reply, it can safely + * resend the request and the MDT will reconstruct the reply being aware + * that the request has already been executed. Without this lock, + * execution status of concurrent in-flight requests would be + * overwritten. + * + * This design limits the extent to which we can keep a full pipeline of + * in-flight requests from a single client. This limitation could be + * overcome by allowing multiple slots per client in the last_rcvd file. + */ struct mdc_rpc_lock { + /** Lock protecting in-flight RPC concurrency. */ struct mutex rpcl_mutex; + /** Intent associated with currently executing request. */ struct lookup_intent *rpcl_it; + /** Used for MDS/RPC load testing purposes. */ int rpcl_fakes; }; diff --git a/drivers/staging/lustre/lustre/include/lustre_net.h b/drivers/staging/lustre/lustre/include/lustre_net.h index 69586a522eb7..a7973d5de168 100644 --- a/drivers/staging/lustre/lustre/include/lustre_net.h +++ b/drivers/staging/lustre/lustre/include/lustre_net.h @@ -1327,7 +1327,9 @@ struct ptlrpc_request { /* allow the req to be sent if the import is in recovery * status */ - rq_allow_replay:1; + rq_allow_replay:1, + /* bulk request, sent to server, but uncommitted */ + rq_unstable:1; unsigned int rq_nr_resend; diff --git a/drivers/staging/lustre/lustre/include/lustre_param.h b/drivers/staging/lustre/lustre/include/lustre_param.h index 383fe6febe4b..a42cf90c1cd8 100644 --- a/drivers/staging/lustre/lustre/include/lustre_param.h +++ b/drivers/staging/lustre/lustre/include/lustre_param.h @@ -89,6 +89,7 @@ int class_parse_nid_quiet(char *buf, lnet_nid_t *nid, char **endh); /* Prefixes for parameters handled by obd's proc methods (XXX_process_config) */ #define PARAM_OST "ost." +#define PARAM_OSD "osd." #define PARAM_OSC "osc." #define PARAM_MDT "mdt." #define PARAM_MDD "mdd." diff --git a/drivers/staging/lustre/lustre/include/lustre_req_layout.h b/drivers/staging/lustre/lustre/include/lustre_req_layout.h index b2e67fcf9ef1..0aac4391ea16 100644 --- a/drivers/staging/lustre/lustre/include/lustre_req_layout.h +++ b/drivers/staging/lustre/lustre/include/lustre_req_layout.h @@ -137,6 +137,7 @@ extern struct req_format RQF_MGS_CONFIG_READ; /* fid/fld req_format */ extern struct req_format RQF_SEQ_QUERY; extern struct req_format RQF_FLD_QUERY; +extern struct req_format RQF_FLD_READ; /* MDS req_format */ extern struct req_format RQF_MDS_CONNECT; extern struct req_format RQF_MDS_DISCONNECT; @@ -199,7 +200,7 @@ extern struct req_format RQF_OST_BRW_READ; extern struct req_format RQF_OST_BRW_WRITE; extern struct req_format RQF_OST_STATFS; extern struct req_format RQF_OST_SET_GRANT_INFO; -extern struct req_format RQF_OST_GET_INFO_GENERIC; +extern struct req_format RQF_OST_GET_INFO; extern struct req_format RQF_OST_GET_INFO_LAST_ID; extern struct req_format RQF_OST_GET_INFO_LAST_FID; extern struct req_format RQF_OST_SET_INFO_LAST_FID; diff --git a/drivers/staging/lustre/lustre/include/obd.h b/drivers/staging/lustre/lustre/include/obd.h index 4264d97650ec..2d926e0ee647 100644 --- a/drivers/staging/lustre/lustre/include/obd.h +++ b/drivers/staging/lustre/lustre/include/obd.h @@ -37,7 +37,7 @@ #ifndef __OBD_H #define __OBD_H -#include "linux/obd.h" +#include <linux/spinlock.h> #define IOC_OSC_TYPE 'h' #define IOC_OSC_MIN_NR 20 @@ -54,6 +54,7 @@ #include "lustre_export.h" #include "lustre_fid.h" #include "lustre_fld.h" +#include "lustre_intent.h" #define MAX_OBD_DEVICES 8192 @@ -165,9 +166,6 @@ struct obd_info { obd_enqueue_update_f oi_cb_up; }; -void lov_stripe_lock(struct lov_stripe_md *md); -void lov_stripe_unlock(struct lov_stripe_md *md); - struct obd_type { struct list_head typ_chain; struct obd_ops *typ_dt_ops; @@ -293,14 +291,10 @@ struct client_obd { * blocking everywhere, but we don't want to slow down fast-path of * our main platform.) * - * Exact type of ->cl_loi_list_lock is defined in arch/obd.h together - * with client_obd_list_{un,}lock() and - * client_obd_list_lock_{init,done}() functions. - * * NB by Jinshan: though field names are still _loi_, but actually * osc_object{}s are in the list. */ - struct client_obd_lock cl_loi_list_lock; + spinlock_t cl_loi_list_lock; struct list_head cl_loi_ready_list; struct list_head cl_loi_hp_ready_list; struct list_head cl_loi_write_list; @@ -327,7 +321,8 @@ struct client_obd { atomic_t cl_lru_shrinkers; atomic_t cl_lru_in_list; struct list_head cl_lru_list; /* lru page list */ - struct client_obd_lock cl_lru_list_lock; /* page list protector */ + spinlock_t cl_lru_list_lock; /* page list protector */ + atomic_t cl_unstable_count; /* number of in flight destroy rpcs is limited to max_rpcs_in_flight */ atomic_t cl_destroy_in_flight; @@ -364,6 +359,7 @@ struct client_obd { /* ptlrpc work for writeback in ptlrpcd context */ void *cl_writeback_work; + void *cl_lru_work; /* hash tables for osc_quota_info */ struct cfs_hash *cl_quota_hash[MAXQUOTAS]; }; @@ -391,45 +387,9 @@ struct ost_pool { struct rw_semaphore op_rw_sem; /* to protect ost_pool use */ }; -/* Round-robin allocator data */ -struct lov_qos_rr { - __u32 lqr_start_idx; /* start index of new inode */ - __u32 lqr_offset_idx; /* aliasing for start_idx */ - int lqr_start_count; /* reseed counter */ - struct ost_pool lqr_pool; /* round-robin optimized list */ - unsigned long lqr_dirty:1; /* recalc round-robin list */ -}; - /* allow statfs data caching for 1 second */ #define OBD_STATFS_CACHE_SECONDS 1 -struct lov_statfs_data { - struct obd_info lsd_oi; - struct obd_statfs lsd_statfs; -}; - -/* Stripe placement optimization */ -struct lov_qos { - struct list_head lq_oss_list; /* list of OSSs that targets use */ - struct rw_semaphore lq_rw_sem; - __u32 lq_active_oss_count; - unsigned int lq_prio_free; /* priority for free space */ - unsigned int lq_threshold_rr;/* priority for rr */ - struct lov_qos_rr lq_rr; /* round robin qos data */ - unsigned long lq_dirty:1, /* recalc qos data */ - lq_same_space:1,/* the ost's all have approx. - * the same space avail - */ - lq_reset:1, /* zero current penalties */ - lq_statfs_in_progress:1; /* statfs op in - progress */ - /* qos statfs data */ - struct lov_statfs_data *lq_statfs_data; - wait_queue_head_t lq_statfs_waitq; /* waitqueue to notify statfs - * requests completion - */ -}; - struct lov_tgt_desc { struct list_head ltd_kill; struct obd_uuid ltd_uuid; @@ -442,25 +402,6 @@ struct lov_tgt_desc { ltd_reap:1; /* should this target be deleted */ }; -/* Pool metadata */ -#define pool_tgt_size(_p) _p->pool_obds.op_size -#define pool_tgt_count(_p) _p->pool_obds.op_count -#define pool_tgt_array(_p) _p->pool_obds.op_array -#define pool_tgt_rw_sem(_p) _p->pool_obds.op_rw_sem - -struct pool_desc { - char pool_name[LOV_MAXPOOLNAME + 1]; /* name of pool */ - struct ost_pool pool_obds; /* pool members */ - atomic_t pool_refcount; /* pool ref. counter */ - struct lov_qos_rr pool_rr; /* round robin qos */ - struct hlist_node pool_hash; /* access by poolname */ - struct list_head pool_list; /* serial access */ - struct dentry *pool_debugfs_entry; /* file in debugfs */ - struct obd_device *pool_lobd; /* obd of the lov/lod to which - * this pool belongs - */ -}; - struct lov_obd { struct lov_desc desc; struct lov_tgt_desc **lov_tgts; /* sparse array */ @@ -468,8 +409,6 @@ struct lov_obd { struct mutex lov_lock; struct obd_connect_data lov_ocd; atomic_t lov_refcount; - __u32 lov_tgt_count; /* how many OBD's */ - __u32 lov_active_tgt_count; /* how many active */ __u32 lov_death_row;/* tgts scheduled to be deleted */ __u32 lov_tgt_size; /* size of tgts array */ int lov_connects; @@ -479,7 +418,7 @@ struct lov_obd { struct dentry *lov_pool_debugfs_entry; enum lustre_sec_part lov_sp_me; - /* Cached LRU pages from upper layer */ + /* Cached LRU and unstable data from upper layer */ void *lov_cache; struct rw_semaphore lov_notify_lock; @@ -511,7 +450,7 @@ struct lmv_obd { struct obd_uuid cluuid; struct obd_export *exp; - struct mutex init_mutex; + struct mutex lmv_init_mutex; int connected; int max_easize; int max_def_easize; diff --git a/drivers/staging/lustre/lustre/include/obd_cksum.h b/drivers/staging/lustre/lustre/include/obd_cksum.h index 637fa22110a4..f6c18df906a8 100644 --- a/drivers/staging/lustre/lustre/include/obd_cksum.h +++ b/drivers/staging/lustre/lustre/include/obd_cksum.h @@ -35,6 +35,7 @@ #ifndef __OBD_CKSUM #define __OBD_CKSUM #include "../../include/linux/libcfs/libcfs.h" +#include "../../include/linux/libcfs/libcfs_crypto.h" #include "lustre/lustre_idl.h" static inline unsigned char cksum_obd2cfs(enum cksum_type cksum_type) diff --git a/drivers/staging/lustre/lustre/include/obd_class.h b/drivers/staging/lustre/lustre/include/obd_class.h index 706869f8c98f..32863bcb30b9 100644 --- a/drivers/staging/lustre/lustre/include/obd_class.h +++ b/drivers/staging/lustre/lustre/include/obd_class.h @@ -477,7 +477,7 @@ static inline int obd_setup(struct obd_device *obd, struct lustre_cfg *cfg) struct lu_context session_ctx; struct lu_env env; - lu_context_init(&session_ctx, LCT_SESSION); + lu_context_init(&session_ctx, LCT_SESSION | LCT_SERVER_SESSION); session_ctx.lc_thread = NULL; lu_context_enter(&session_ctx); @@ -490,8 +490,9 @@ static inline int obd_setup(struct obd_device *obd, struct lustre_cfg *cfg) obd->obd_lu_dev = d; d->ld_obd = obd; rc = 0; - } else + } else { rc = PTR_ERR(d); + } } lu_context_exit(&session_ctx); lu_context_fini(&session_ctx); diff --git a/drivers/staging/lustre/lustre/include/obd_support.h b/drivers/staging/lustre/lustre/include/obd_support.h index f8ee3a3254ba..60034d39b00d 100644 --- a/drivers/staging/lustre/lustre/include/obd_support.h +++ b/drivers/staging/lustre/lustre/include/obd_support.h @@ -58,6 +58,7 @@ extern int at_early_margin; extern int at_extra; extern unsigned int obd_sync_filter; extern unsigned int obd_max_dirty_pages; +extern atomic_t obd_unstable_pages; extern atomic_t obd_dirty_pages; extern atomic_t obd_dirty_transit_pages; extern char obd_jobid_var[]; @@ -289,6 +290,7 @@ extern char obd_jobid_var[]; #define OBD_FAIL_OST_ENOINO 0x229 #define OBD_FAIL_OST_DQACQ_NET 0x230 #define OBD_FAIL_OST_STATFS_EINPROGRESS 0x231 +#define OBD_FAIL_OST_SET_INFO_NET 0x232 #define OBD_FAIL_LDLM 0x300 #define OBD_FAIL_LDLM_NAMESPACE_NEW 0x301 @@ -319,6 +321,7 @@ extern char obd_jobid_var[]; #define OBD_FAIL_LDLM_AGL_DELAY 0x31a #define OBD_FAIL_LDLM_AGL_NOLOCK 0x31b #define OBD_FAIL_LDLM_OST_LVB 0x31c +#define OBD_FAIL_LDLM_ENQUEUE_HANG 0x31d /* LOCKLESS IO */ #define OBD_FAIL_LDLM_SET_CONTENTION 0x385 @@ -426,6 +429,7 @@ extern char obd_jobid_var[]; #define OBD_FAIL_FLD 0x1100 #define OBD_FAIL_FLD_QUERY_NET 0x1101 +#define OBD_FAIL_FLD_READ_NET 0x1102 #define OBD_FAIL_SEC_CTX 0x1200 #define OBD_FAIL_SEC_CTX_INIT_NET 0x1201 |