aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/staging/lustre/lustre/include
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/staging/lustre/lustre/include')
-rw-r--r--drivers/staging/lustre/lustre/include/cl_object.h379
-rw-r--r--drivers/staging/lustre/lustre/include/llog_swab.h65
-rw-r--r--drivers/staging/lustre/lustre/include/lprocfs_status.h9
-rw-r--r--drivers/staging/lustre/lustre/include/lustre/ll_fiemap.h75
-rw-r--r--drivers/staging/lustre/lustre/include/lustre/lustre_idl.h438
-rw-r--r--drivers/staging/lustre/lustre/include/lustre/lustre_ioctl.h4
-rw-r--r--drivers/staging/lustre/lustre/include/lustre/lustre_user.h44
-rw-r--r--drivers/staging/lustre/lustre/include/lustre_compat.h2
-rw-r--r--drivers/staging/lustre/lustre/include/lustre_dlm.h50
-rw-r--r--drivers/staging/lustre/lustre/include/lustre_fid.h1
-rw-r--r--drivers/staging/lustre/lustre/include/lustre_fld.h2
-rw-r--r--drivers/staging/lustre/lustre/include/lustre_ha.h1
-rw-r--r--drivers/staging/lustre/lustre/include/lustre_import.h9
-rw-r--r--drivers/staging/lustre/lustre/include/lustre_lib.h2
-rw-r--r--drivers/staging/lustre/lustre/include/lustre_lmv.h13
-rw-r--r--drivers/staging/lustre/lustre/include/lustre_log.h6
-rw-r--r--drivers/staging/lustre/lustre/include/lustre_mdc.h44
-rw-r--r--drivers/staging/lustre/lustre/include/lustre_net.h898
-rw-r--r--drivers/staging/lustre/lustre/include/lustre_nrs.h717
-rw-r--r--drivers/staging/lustre/lustre/include/lustre_nrs_fifo.h70
-rw-r--r--drivers/staging/lustre/lustre/include/lustre_req_layout.h7
-rw-r--r--drivers/staging/lustre/lustre/include/lustre_sec.h3
-rw-r--r--drivers/staging/lustre/lustre/include/lustre_swab.h102
-rw-r--r--drivers/staging/lustre/lustre/include/obd.h291
-rw-r--r--drivers/staging/lustre/lustre/include/obd_class.h264
-rw-r--r--drivers/staging/lustre/lustre/include/obd_support.h15
-rw-r--r--drivers/staging/lustre/lustre/include/seq_range.h199
27 files changed, 1810 insertions, 1900 deletions
diff --git a/drivers/staging/lustre/lustre/include/cl_object.h b/drivers/staging/lustre/lustre/include/cl_object.h
index 89292c93dcd5..dc685610c4c4 100644
--- a/drivers/staging/lustre/lustre/include/cl_object.h
+++ b/drivers/staging/lustre/lustre/include/cl_object.h
@@ -59,10 +59,6 @@
* read/write system call it is associated with the single user
* thread, that issued the system call).
*
- * - cl_req represents a collection of pages for a transfer. cl_req is
- * constructed by req-forming engine that tries to saturate
- * transport with large and continuous transfers.
- *
* Terminology
*
* - to avoid confusion high-level I/O operation like read or write system
@@ -103,11 +99,8 @@
struct inode;
struct cl_device;
-struct cl_device_operations;
struct cl_object;
-struct cl_object_page_operations;
-struct cl_object_lock_operations;
struct cl_page;
struct cl_page_slice;
@@ -120,27 +113,7 @@ struct cl_page_operations;
struct cl_io;
struct cl_io_slice;
-struct cl_req;
-struct cl_req_slice;
-
-/**
- * Operations for each data device in the client stack.
- *
- * \see vvp_cl_ops, lov_cl_ops, lovsub_cl_ops, osc_cl_ops
- */
-struct cl_device_operations {
- /**
- * Initialize cl_req. This method is called top-to-bottom on all
- * devices in the stack to get them a chance to allocate layer-private
- * data, and to attach them to the cl_req by calling
- * cl_req_slice_add().
- *
- * \see osc_req_init(), lov_req_init(), lovsub_req_init()
- * \see vvp_req_init()
- */
- int (*cdo_req_init)(const struct lu_env *env, struct cl_device *dev,
- struct cl_req *req);
-};
+struct cl_req_attr;
/**
* Device in the client stack.
@@ -150,8 +123,6 @@ struct cl_device_operations {
struct cl_device {
/** Super-class. */
struct lu_device cd_lu_dev;
- /** Per-layer operation vector. */
- const struct cl_device_operations *cd_ops;
};
/** \addtogroup cl_object cl_object
@@ -267,7 +238,7 @@ struct cl_object_conf {
/**
* Object layout. This is consumed by lov.
*/
- struct lustre_md *coc_md;
+ struct lu_buf coc_layout;
/**
* Description of particular stripe location in the
* cluster. This is consumed by osc.
@@ -301,6 +272,26 @@ enum {
OBJECT_CONF_WAIT = 2
};
+enum {
+ CL_LAYOUT_GEN_NONE = (u32)-2, /* layout lock was cancelled */
+ CL_LAYOUT_GEN_EMPTY = (u32)-1, /* for empty layout */
+};
+
+struct cl_layout {
+ /** the buffer to return the layout in lov_mds_md format. */
+ struct lu_buf cl_buf;
+ /** size of layout in lov_mds_md format. */
+ size_t cl_size;
+ /** Layout generation. */
+ u32 cl_layout_gen;
+ /**
+ * True if this is a released file.
+ * Temporarily added for released file truncate in ll_setattr_raw().
+ * It will be removed later. -Jinshan
+ */
+ bool cl_is_released;
+};
+
/**
* Operations implemented for each cl object layer.
*
@@ -400,6 +391,27 @@ struct cl_object_operations {
*/
int (*coo_getstripe)(const struct lu_env *env, struct cl_object *obj,
struct lov_user_md __user *lum);
+ /**
+ * Get FIEMAP mapping from the object.
+ */
+ int (*coo_fiemap)(const struct lu_env *env, struct cl_object *obj,
+ struct ll_fiemap_info_key *fmkey,
+ struct fiemap *fiemap, size_t *buflen);
+ /**
+ * Get layout and generation of the object.
+ */
+ int (*coo_layout_get)(const struct lu_env *env, struct cl_object *obj,
+ struct cl_layout *layout);
+ /**
+ * Get maximum size of the object.
+ */
+ loff_t (*coo_maxbytes)(struct cl_object *obj);
+ /**
+ * Set request attributes.
+ */
+ void (*coo_req_attr_set)(const struct lu_env *env,
+ struct cl_object *obj,
+ struct cl_req_attr *attr);
};
/**
@@ -591,7 +603,7 @@ enum cl_page_state {
*
* - [cl_page_state::CPS_PAGEOUT] page is dirty, the
* req-formation engine decides that it wants to include this page
- * into an cl_req being constructed, and yanks it from the cache;
+ * into an RPC being constructed, and yanks it from the cache;
*
* - [cl_page_state::CPS_FREEING] VM callback is executed to
* evict the page form the memory;
@@ -660,7 +672,7 @@ enum cl_page_state {
* Page is being read in, as a part of a transfer. This is quite
* similar to the cl_page_state::CPS_PAGEOUT state, except that
* read-in is always "immediate"---there is no such thing a sudden
- * construction of read cl_req from cached, presumably not up to date,
+ * construction of read request from cached, presumably not up to date,
* pages.
*
* Underlying VM page is locked for the duration of transfer.
@@ -714,8 +726,6 @@ struct cl_page {
struct list_head cp_batch;
/** List of slices. Immutable after creation. */
struct list_head cp_layers;
- /** Linkage of pages within cl_req. */
- struct list_head cp_flight;
/**
* Page state. This field is const to avoid accidental update, it is
* modified only internally within cl_page.c. Protected by a VM lock.
@@ -732,12 +742,6 @@ struct cl_page {
* by sub-io. Protected by a VM lock.
*/
struct cl_io *cp_owner;
- /**
- * Owning IO request in cl_page_state::CPS_PAGEOUT and
- * cl_page_state::CPS_PAGEIN states. This field is maintained only in
- * the top-level pages. Protected by a VM lock.
- */
- struct cl_req *cp_req;
/** List of references to this page, for debugging. */
struct lu_ref cp_reference;
/** Link to an object, for debugging. */
@@ -779,7 +783,6 @@ enum cl_lock_mode {
/**
* Requested transfer type.
- * \ingroup cl_req
*/
enum cl_req_type {
CRT_READ,
@@ -884,26 +887,6 @@ struct cl_page_operations {
/** Destructor. Frees resources and slice itself. */
void (*cpo_fini)(const struct lu_env *env,
struct cl_page_slice *slice);
-
- /**
- * Checks whether the page is protected by a cl_lock. This is a
- * per-layer method, because certain layers have ways to check for the
- * lock much more efficiently than through the generic locks scan, or
- * implement locking mechanisms separate from cl_lock, e.g.,
- * LL_FILE_GROUP_LOCKED in vvp. If \a pending is true, check for locks
- * being canceled, or scheduled for cancellation as soon as the last
- * user goes away, too.
- *
- * \retval -EBUSY: page is protected by a lock of a given mode;
- * \retval -ENODATA: page is not protected by a lock;
- * \retval 0: this layer cannot decide.
- *
- * \see cl_page_is_under_lock()
- */
- int (*cpo_is_under_lock)(const struct lu_env *env,
- const struct cl_page_slice *slice,
- struct cl_io *io, pgoff_t *max);
-
/**
* Optional debugging helper. Prints given page slice.
*
@@ -915,8 +898,7 @@ struct cl_page_operations {
/**
* \name transfer
*
- * Transfer methods. See comment on cl_req for a description of
- * transfer formation and life-cycle.
+ * Transfer methods.
*
* @{
*/
@@ -962,7 +944,7 @@ struct cl_page_operations {
int ioret);
/**
* Called when cached page is about to be added to the
- * cl_req as a part of req formation.
+ * ptlrpc request as a part of req formation.
*
* \return 0 : proceed with this page;
* \return -EAGAIN : skip this page;
@@ -1365,7 +1347,6 @@ struct cl_2queue {
* (3) sort all locks to avoid dead-locks, and acquire them
*
* (4) process the chunk: call per-page methods
- * (cl_io_operations::cio_read_page() for read,
* cl_io_operations::cio_prepare_write(),
* cl_io_operations::cio_commit_write() for write)
*
@@ -1388,6 +1369,8 @@ enum cl_io_type {
CIT_WRITE,
/** truncate, utime system calls */
CIT_SETATTR,
+ /** get data version */
+ CIT_DATA_VERSION,
/**
* page fault handling
*/
@@ -1467,6 +1450,31 @@ struct cl_io_slice {
typedef void (*cl_commit_cbt)(const struct lu_env *, struct cl_io *,
struct cl_page *);
+
+struct cl_read_ahead {
+ /*
+ * Maximum page index the readahead window will end.
+ * This is determined DLM lock coverage, RPC and stripe boundary.
+ * cra_end is included.
+ */
+ pgoff_t cra_end;
+ /*
+ * Release routine. If readahead holds resources underneath, this
+ * function should be called to release it.
+ */
+ void (*cra_release)(const struct lu_env *env, void *cbdata);
+ /* Callback data for cra_release routine */
+ void *cra_cbdata;
+};
+
+static inline void cl_read_ahead_release(const struct lu_env *env,
+ struct cl_read_ahead *ra)
+{
+ if (ra->cra_release)
+ ra->cra_release(env, ra->cra_cbdata);
+ memset(ra, 0, sizeof(*ra));
+}
+
/**
* Per-layer io operations.
* \see vvp_io_ops, lov_io_ops, lovsub_io_ops, osc_io_ops
@@ -1573,16 +1581,13 @@ struct cl_io_operations {
struct cl_page_list *queue, int from, int to,
cl_commit_cbt cb);
/**
- * Read missing page.
- *
- * Called by a top-level cl_io_operations::op[CIT_READ]::cio_start()
- * method, when it hits not-up-to-date page in the range. Optional.
+ * Decide maximum read ahead extent
*
* \pre io->ci_type == CIT_READ
*/
- int (*cio_read_page)(const struct lu_env *env,
- const struct cl_io_slice *slice,
- const struct cl_page_slice *page);
+ int (*cio_read_ahead)(const struct lu_env *env,
+ const struct cl_io_slice *slice,
+ pgoff_t start, struct cl_read_ahead *ra);
/**
* Optional debugging helper. Print given io slice.
*/
@@ -1765,10 +1770,15 @@ struct cl_io {
struct cl_io_rw_common ci_rw;
struct cl_setattr_io {
struct ost_lvb sa_attr;
+ unsigned int sa_attr_flags;
unsigned int sa_valid;
int sa_stripe_index;
- struct lu_fid *sa_parent_fid;
+ const struct lu_fid *sa_parent_fid;
} ci_setattr;
+ struct cl_data_version_io {
+ u64 dv_data_version;
+ int dv_flags;
+ } ci_data_version;
struct cl_fault_io {
/** page index within file. */
pgoff_t ft_index;
@@ -1836,179 +1846,20 @@ struct cl_io {
/** @} cl_io */
-/** \addtogroup cl_req cl_req
- * @{
- */
-/** \struct cl_req
- * Transfer.
- *
- * There are two possible modes of transfer initiation on the client:
- *
- * - immediate transfer: this is started when a high level io wants a page
- * or a collection of pages to be transferred right away. Examples:
- * read-ahead, synchronous read in the case of non-page aligned write,
- * page write-out as a part of extent lock cancellation, page write-out
- * as a part of memory cleansing. Immediate transfer can be both
- * cl_req_type::CRT_READ and cl_req_type::CRT_WRITE;
- *
- * - opportunistic transfer (cl_req_type::CRT_WRITE only), that happens
- * when io wants to transfer a page to the server some time later, when
- * it can be done efficiently. Example: pages dirtied by the write(2)
- * path.
- *
- * In any case, transfer takes place in the form of a cl_req, which is a
- * representation for a network RPC.
- *
- * Pages queued for an opportunistic transfer are cached until it is decided
- * that efficient RPC can be composed of them. This decision is made by "a
- * req-formation engine", currently implemented as a part of osc
- * layer. Req-formation depends on many factors: the size of the resulting
- * RPC, whether or not multi-object RPCs are supported by the server,
- * max-rpc-in-flight limitations, size of the dirty cache, etc.
- *
- * For the immediate transfer io submits a cl_page_list, that req-formation
- * engine slices into cl_req's, possibly adding cached pages to some of
- * the resulting req's.
- *
- * Whenever a page from cl_page_list is added to a newly constructed req, its
- * cl_page_operations::cpo_prep() layer methods are called. At that moment,
- * page state is atomically changed from cl_page_state::CPS_OWNED to
- * cl_page_state::CPS_PAGEOUT or cl_page_state::CPS_PAGEIN, cl_page::cp_owner
- * is zeroed, and cl_page::cp_req is set to the
- * req. cl_page_operations::cpo_prep() method at the particular layer might
- * return -EALREADY to indicate that it does not need to submit this page
- * at all. This is possible, for example, if page, submitted for read,
- * became up-to-date in the meantime; and for write, the page don't have
- * dirty bit marked. \see cl_io_submit_rw()
- *
- * Whenever a cached page is added to a newly constructed req, its
- * cl_page_operations::cpo_make_ready() layer methods are called. At that
- * moment, page state is atomically changed from cl_page_state::CPS_CACHED to
- * cl_page_state::CPS_PAGEOUT, and cl_page::cp_req is set to
- * req. cl_page_operations::cpo_make_ready() method at the particular layer
- * might return -EAGAIN to indicate that this page is not eligible for the
- * transfer right now.
- *
- * FUTURE
- *
- * Plan is to divide transfers into "priority bands" (indicated when
- * submitting cl_page_list, and queuing a page for the opportunistic transfer)
- * and allow glueing of cached pages to immediate transfers only within single
- * band. This would make high priority transfers (like lock cancellation or
- * memory pressure induced write-out) really high priority.
- *
- */
-
/**
* Per-transfer attributes.
*/
struct cl_req_attr {
+ enum cl_req_type cra_type;
+ u64 cra_flags;
+ struct cl_page *cra_page;
+
/** Generic attributes for the server consumption. */
struct obdo *cra_oa;
/** Jobid */
char cra_jobid[LUSTRE_JOBID_SIZE];
};
-/**
- * Transfer request operations definable at every layer.
- *
- * Concurrency: transfer formation engine synchronizes calls to all transfer
- * methods.
- */
-struct cl_req_operations {
- /**
- * Invoked top-to-bottom by cl_req_prep() when transfer formation is
- * complete (all pages are added).
- *
- * \see osc_req_prep()
- */
- int (*cro_prep)(const struct lu_env *env,
- const struct cl_req_slice *slice);
- /**
- * Called top-to-bottom to fill in \a oa fields. This is called twice
- * with different flags, see bug 10150 and osc_build_req().
- *
- * \param obj an object from cl_req which attributes are to be set in
- * \a oa.
- *
- * \param oa struct obdo where attributes are placed
- *
- * \param flags \a oa fields to be filled.
- */
- void (*cro_attr_set)(const struct lu_env *env,
- const struct cl_req_slice *slice,
- const struct cl_object *obj,
- struct cl_req_attr *attr, u64 flags);
- /**
- * Called top-to-bottom from cl_req_completion() to notify layers that
- * transfer completed. Has to free all state allocated by
- * cl_device_operations::cdo_req_init().
- */
- void (*cro_completion)(const struct lu_env *env,
- const struct cl_req_slice *slice, int ioret);
-};
-
-/**
- * A per-object state that (potentially multi-object) transfer request keeps.
- */
-struct cl_req_obj {
- /** object itself */
- struct cl_object *ro_obj;
- /** reference to cl_req_obj::ro_obj. For debugging. */
- struct lu_ref_link ro_obj_ref;
- /* something else? Number of pages for a given object? */
-};
-
-/**
- * Transfer request.
- *
- * Transfer requests are not reference counted, because IO sub-system owns
- * them exclusively and knows when to free them.
- *
- * Life cycle.
- *
- * cl_req is created by cl_req_alloc() that calls
- * cl_device_operations::cdo_req_init() device methods to allocate per-req
- * state in every layer.
- *
- * Then pages are added (cl_req_page_add()), req keeps track of all objects it
- * contains pages for.
- *
- * Once all pages were collected, cl_page_operations::cpo_prep() method is
- * called top-to-bottom. At that point layers can modify req, let it pass, or
- * deny it completely. This is to support things like SNS that have transfer
- * ordering requirements invisible to the individual req-formation engine.
- *
- * On transfer completion (or transfer timeout, or failure to initiate the
- * transfer of an allocated req), cl_req_operations::cro_completion() method
- * is called, after execution of cl_page_operations::cpo_completion() of all
- * req's pages.
- */
-struct cl_req {
- enum cl_req_type crq_type;
- /** A list of pages being transferred */
- struct list_head crq_pages;
- /** Number of pages in cl_req::crq_pages */
- unsigned crq_nrpages;
- /** An array of objects which pages are in ->crq_pages */
- struct cl_req_obj *crq_o;
- /** Number of elements in cl_req::crq_objs[] */
- unsigned crq_nrobjs;
- struct list_head crq_layers;
-};
-
-/**
- * Per-layer state for request.
- */
-struct cl_req_slice {
- struct cl_req *crs_req;
- struct cl_device *crs_dev;
- struct list_head crs_linkage;
- const struct cl_req_operations *crs_ops;
-};
-
-/* @} cl_req */
-
enum cache_stats_item {
/** how many cache lookups were performed */
CS_lookup = 0,
@@ -2153,9 +2004,6 @@ void cl_lock_slice_add(struct cl_lock *lock, struct cl_lock_slice *slice,
const struct cl_lock_operations *ops);
void cl_io_slice_add(struct cl_io *io, struct cl_io_slice *slice,
struct cl_object *obj, const struct cl_io_operations *ops);
-void cl_req_slice_add(struct cl_req *req, struct cl_req_slice *slice,
- struct cl_device *dev,
- const struct cl_req_operations *ops);
/** @} helpers */
/** \defgroup cl_object cl_object
@@ -2183,6 +2031,12 @@ int cl_object_prune(const struct lu_env *env, struct cl_object *obj);
void cl_object_kill(const struct lu_env *env, struct cl_object *obj);
int cl_object_getstripe(const struct lu_env *env, struct cl_object *obj,
struct lov_user_md __user *lum);
+int cl_object_fiemap(const struct lu_env *env, struct cl_object *obj,
+ struct ll_fiemap_info_key *fmkey, struct fiemap *fiemap,
+ size_t *buflen);
+int cl_object_layout_get(const struct lu_env *env, struct cl_object *obj,
+ struct cl_layout *cl);
+loff_t cl_object_maxbytes(struct cl_object *obj);
/**
* Returns true, iff \a o0 and \a o1 are slices of the same object.
@@ -2302,8 +2156,6 @@ void cl_page_discard(const struct lu_env *env, struct cl_io *io,
void cl_page_delete(const struct lu_env *env, struct cl_page *pg);
int cl_page_is_vmlocked(const struct lu_env *env, const struct cl_page *pg);
void cl_page_export(const struct lu_env *env, struct cl_page *pg, int uptodate);
-int cl_page_is_under_lock(const struct lu_env *env, struct cl_io *io,
- struct cl_page *page, pgoff_t *max_index);
loff_t cl_offset(const struct cl_object *obj, pgoff_t idx);
pgoff_t cl_index(const struct cl_object *obj, loff_t offset);
size_t cl_page_size(const struct cl_object *obj);
@@ -2414,8 +2266,6 @@ int cl_io_lock_add(const struct lu_env *env, struct cl_io *io,
struct cl_io_lock_link *link);
int cl_io_lock_alloc_add(const struct lu_env *env, struct cl_io *io,
struct cl_lock_descr *descr);
-int cl_io_read_page(const struct lu_env *env, struct cl_io *io,
- struct cl_page *page);
int cl_io_submit_rw(const struct lu_env *env, struct cl_io *io,
enum cl_req_type iot, struct cl_2queue *queue);
int cl_io_submit_sync(const struct lu_env *env, struct cl_io *io,
@@ -2424,6 +2274,8 @@ int cl_io_submit_sync(const struct lu_env *env, struct cl_io *io,
int cl_io_commit_async(const struct lu_env *env, struct cl_io *io,
struct cl_page_list *queue, int from, int to,
cl_commit_cbt cb);
+int cl_io_read_ahead(const struct lu_env *env, struct cl_io *io,
+ pgoff_t start, struct cl_read_ahead *ra);
int cl_io_is_going(const struct lu_env *env);
/**
@@ -2520,19 +2372,8 @@ void cl_2queue_init_page(struct cl_2queue *queue, struct cl_page *page);
/** @} cl_page_list */
-/** \defgroup cl_req cl_req
- * @{
- */
-struct cl_req *cl_req_alloc(const struct lu_env *env, struct cl_page *page,
- enum cl_req_type crt, int nr_objects);
-
-void cl_req_page_add(const struct lu_env *env, struct cl_req *req,
- struct cl_page *page);
-void cl_req_page_done(const struct lu_env *env, struct cl_page *page);
-int cl_req_prep(const struct lu_env *env, struct cl_req *req);
-void cl_req_attr_set(const struct lu_env *env, struct cl_req *req,
- struct cl_req_attr *attr, u64 flags);
-void cl_req_completion(const struct lu_env *env, struct cl_req *req, int ioret);
+void cl_req_attr_set(const struct lu_env *env, struct cl_object *obj,
+ struct cl_req_attr *attr);
/** \defgroup cl_sync_io cl_sync_io
* @{
@@ -2568,8 +2409,6 @@ void cl_sync_io_end(const struct lu_env *env, struct cl_sync_io *anchor);
/** @} cl_sync_io */
-/** @} cl_req */
-
/** \defgroup cl_env cl_env
*
* lu_env handling for a client.
@@ -2593,35 +2432,13 @@ void cl_sync_io_end(const struct lu_env *env, struct cl_sync_io *anchor);
* - allocation and destruction of environment is amortized by caching no
* longer used environments instead of destroying them;
*
- * - there is a notion of "current" environment, attached to the kernel
- * data structure representing current thread Top-level lustre code
- * allocates an environment and makes it current, then calls into
- * non-lustre code, that in turn calls lustre back. Low-level lustre
- * code thus called can fetch environment created by the top-level code
- * and reuse it, avoiding additional environment allocation.
- * Right now, three interfaces can attach the cl_env to running thread:
- * - cl_env_get
- * - cl_env_implant
- * - cl_env_reexit(cl_env_reenter had to be called priorly)
- *
* \see lu_env, lu_context, lu_context_key
* @{
*/
-struct cl_env_nest {
- int cen_refcheck;
- void *cen_cookie;
-};
-
struct lu_env *cl_env_get(int *refcheck);
struct lu_env *cl_env_alloc(int *refcheck, __u32 tags);
-struct lu_env *cl_env_nested_get(struct cl_env_nest *nest);
void cl_env_put(struct lu_env *env, int *refcheck);
-void cl_env_nested_put(struct cl_env_nest *nest, struct lu_env *env);
-void *cl_env_reenter(void);
-void cl_env_reexit(void *cookie);
-void cl_env_implant(struct lu_env *env, int *refcheck);
-void cl_env_unplant(struct lu_env *env, int *refcheck);
unsigned int cl_env_cache_purge(unsigned int nr);
struct lu_env *cl_env_percpu_get(void);
void cl_env_percpu_put(struct lu_env *env);
diff --git a/drivers/staging/lustre/lustre/include/llog_swab.h b/drivers/staging/lustre/lustre/include/llog_swab.h
new file mode 100644
index 000000000000..fd7ffb154ad1
--- /dev/null
+++ b/drivers/staging/lustre/lustre/include/llog_swab.h
@@ -0,0 +1,65 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.gnu.org/licenses/gpl-2.0.html
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2014, Intel Corporation.
+ *
+ * Copyright 2015 Cray Inc, all rights reserved.
+ * Author: Ben Evans.
+ *
+ * We assume all nodes are either little-endian or big-endian, and we
+ * always send messages in the sender's native format. The receiver
+ * detects the message format by checking the 'magic' field of the message
+ * (see lustre_msg_swabbed() below).
+ *
+ * Each type has corresponding 'lustre_swab_xxxtypexxx()' routines
+ * are implemented in ptlrpc/pack_generic.c. These 'swabbers' convert the
+ * type from "other" endian, in-place in the message buffer.
+ *
+ * A swabber takes a single pointer argument. The caller must already have
+ * verified that the length of the message buffer >= sizeof (type).
+ *
+ * For variable length types, a second 'lustre_swab_v_xxxtypexxx()' routine
+ * may be defined that swabs just the variable part, after the caller has
+ * verified that the message buffer is large enough.
+ */
+
+#ifndef _LLOG_SWAB_H_
+#define _LLOG_SWAB_H_
+
+#include "lustre/lustre_idl.h"
+struct lustre_cfg;
+
+void lustre_swab_lu_fid(struct lu_fid *fid);
+void lustre_swab_ost_id(struct ost_id *oid);
+void lustre_swab_llogd_body(struct llogd_body *d);
+void lustre_swab_llog_hdr(struct llog_log_hdr *h);
+void lustre_swab_llogd_conn_body(struct llogd_conn_body *d);
+void lustre_swab_llog_rec(struct llog_rec_hdr *rec);
+void lustre_swab_lu_seq_range(struct lu_seq_range *range);
+void lustre_swab_lustre_cfg(struct lustre_cfg *lcfg);
+void lustre_swab_cfg_marker(struct cfg_marker *marker,
+ int swab, int size);
+
+#endif
diff --git a/drivers/staging/lustre/lustre/include/lprocfs_status.h b/drivers/staging/lustre/lustre/include/lprocfs_status.h
index cc0713ef8ae5..62753dae0bfa 100644
--- a/drivers/staging/lustre/lustre/include/lprocfs_status.h
+++ b/drivers/staging/lustre/lustre/include/lprocfs_status.h
@@ -43,6 +43,8 @@
#include <linux/spinlock.h>
#include <linux/types.h>
+#include "../../include/linux/libcfs/libcfs.h"
+#include "lustre_cfg.h"
#include "lustre/lustre_idl.h"
struct lprocfs_vars {
@@ -540,7 +542,8 @@ lprocfs_alloc_stats(unsigned int num, enum lprocfs_stats_flags flags);
void lprocfs_clear_stats(struct lprocfs_stats *stats);
void lprocfs_free_stats(struct lprocfs_stats **stats);
void lprocfs_counter_init(struct lprocfs_stats *stats, int index,
- unsigned conf, const char *name, const char *units);
+ unsigned int conf, const char *name,
+ const char *units);
struct obd_export;
int lprocfs_exp_cleanup(struct obd_export *exp);
struct dentry *ldebugfs_add_simple(struct dentry *root,
@@ -701,9 +704,9 @@ static struct lustre_attr lustre_attr_##name = __ATTR(name, mode, show, store)
extern const struct sysfs_ops lustre_sysfs_ops;
struct root_squash_info;
-int lprocfs_wr_root_squash(const char *buffer, unsigned long count,
+int lprocfs_wr_root_squash(const char __user *buffer, unsigned long count,
struct root_squash_info *squash, char *name);
-int lprocfs_wr_nosquash_nids(const char *buffer, unsigned long count,
+int lprocfs_wr_nosquash_nids(const char __user *buffer, unsigned long count,
struct root_squash_info *squash, char *name);
/* all quota proc functions */
diff --git a/drivers/staging/lustre/lustre/include/lustre/ll_fiemap.h b/drivers/staging/lustre/lustre/include/lustre/ll_fiemap.h
index c2340d643e84..b8ad5559a3b9 100644
--- a/drivers/staging/lustre/lustre/include/lustre/ll_fiemap.h
+++ b/drivers/staging/lustre/lustre/include/lustre/ll_fiemap.h
@@ -41,79 +41,24 @@
#ifndef _LUSTRE_FIEMAP_H
#define _LUSTRE_FIEMAP_H
-struct ll_fiemap_extent {
- __u64 fe_logical; /* logical offset in bytes for the start of
- * the extent from the beginning of the file
- */
- __u64 fe_physical; /* physical offset in bytes for the start
- * of the extent from the beginning of the disk
- */
- __u64 fe_length; /* length in bytes for this extent */
- __u64 fe_reserved64[2];
- __u32 fe_flags; /* FIEMAP_EXTENT_* flags for this extent */
- __u32 fe_device; /* device number for this extent */
- __u32 fe_reserved[2];
-};
-
-struct ll_user_fiemap {
- __u64 fm_start; /* logical offset (inclusive) at
- * which to start mapping (in)
- */
- __u64 fm_length; /* logical length of mapping which
- * userspace wants (in)
- */
- __u32 fm_flags; /* FIEMAP_FLAG_* flags for request (in/out) */
- __u32 fm_mapped_extents;/* number of extents that were mapped (out) */
- __u32 fm_extent_count; /* size of fm_extents array (in) */
- __u32 fm_reserved;
- struct ll_fiemap_extent fm_extents[0]; /* array of mapped extents (out) */
-};
-
-#define FIEMAP_MAX_OFFSET (~0ULL)
+#ifndef __KERNEL__
+#include <stddef.h>
+#include <fiemap.h>
+#endif
-#define FIEMAP_FLAG_SYNC 0x00000001 /* sync file data before
- * map
- */
-#define FIEMAP_FLAG_XATTR 0x00000002 /* map extended attribute
- * tree
- */
-#define FIEMAP_EXTENT_LAST 0x00000001 /* Last extent in file. */
-#define FIEMAP_EXTENT_UNKNOWN 0x00000002 /* Data location unknown. */
-#define FIEMAP_EXTENT_DELALLOC 0x00000004 /* Location still pending.
- * Sets EXTENT_UNKNOWN.
- */
-#define FIEMAP_EXTENT_ENCODED 0x00000008 /* Data can not be read
- * while fs is unmounted
- */
-#define FIEMAP_EXTENT_DATA_ENCRYPTED 0x00000080 /* Data is encrypted by fs.
- * Sets EXTENT_NO_DIRECT.
- */
-#define FIEMAP_EXTENT_NOT_ALIGNED 0x00000100 /* Extent offsets may not be
- * block aligned.
- */
-#define FIEMAP_EXTENT_DATA_INLINE 0x00000200 /* Data mixed with metadata.
- * Sets EXTENT_NOT_ALIGNED.*/
-#define FIEMAP_EXTENT_DATA_TAIL 0x00000400 /* Multiple files in block.
- * Sets EXTENT_NOT_ALIGNED.
- */
-#define FIEMAP_EXTENT_UNWRITTEN 0x00000800 /* Space allocated, but
- * no data (i.e. zero).
- */
-#define FIEMAP_EXTENT_MERGED 0x00001000 /* File does not natively
- * support extents. Result
- * merged for efficiency.
- */
+/* XXX: We use fiemap_extent::fe_reserved[0] */
+#define fe_device fe_reserved[0]
static inline size_t fiemap_count_to_size(size_t extent_count)
{
- return (sizeof(struct ll_user_fiemap) + extent_count *
- sizeof(struct ll_fiemap_extent));
+ return sizeof(struct fiemap) + extent_count *
+ sizeof(struct fiemap_extent);
}
static inline unsigned fiemap_size_to_count(size_t array_size)
{
- return ((array_size - sizeof(struct ll_user_fiemap)) /
- sizeof(struct ll_fiemap_extent));
+ return (array_size - sizeof(struct fiemap)) /
+ sizeof(struct fiemap_extent);
}
#define FIEMAP_FLAG_DEVICE_ORDER 0x40000000 /* return device ordered mapping */
diff --git a/drivers/staging/lustre/lustre/include/lustre/lustre_idl.h b/drivers/staging/lustre/lustre/include/lustre/lustre_idl.h
index 72eaee95c6b8..65ce503ad595 100644
--- a/drivers/staging/lustre/lustre/include/lustre/lustre_idl.h
+++ b/drivers/staging/lustre/lustre/include/lustre/lustre_idl.h
@@ -48,8 +48,7 @@
* that the Lustre wire protocol is not influenced by external dependencies.
*
* The only other acceptable items in this file are VERY SIMPLE accessor
- * functions to avoid callers grubbing inside the structures, and the
- * prototypes of the swabber functions for each struct. Nothing that
+ * functions to avoid callers grubbing inside the structures. Nothing that
* depends on external functions or definitions should be in here.
*
* Structs must be properly aligned to put 64-bit values on an 8-byte
@@ -64,23 +63,6 @@
* in the code to ensure that new/old clients that see this larger struct
* do not fail, otherwise you need to implement protocol compatibility).
*
- * We assume all nodes are either little-endian or big-endian, and we
- * always send messages in the sender's native format. The receiver
- * detects the message format by checking the 'magic' field of the message
- * (see lustre_msg_swabbed() below).
- *
- * Each wire type has corresponding 'lustre_swab_xxxtypexxx()' routines,
- * implemented either here, inline (trivial implementations) or in
- * ptlrpc/pack_generic.c. These 'swabbers' convert the type from "other"
- * endian, in-place in the message buffer.
- *
- * A swabber takes a single pointer argument. The caller must already have
- * verified that the length of the message buffer >= sizeof (type).
- *
- * For variable length types, a second 'lustre_swab_v_xxxtypexxx()' routine
- * may be defined that swabs just the variable part, after the caller has
- * verified that the message buffer is large enough.
- *
* @{
*/
@@ -192,113 +174,6 @@ struct lu_seq_range_array {
#define LU_SEQ_RANGE_MASK 0x3
-static inline unsigned fld_range_type(const struct lu_seq_range *range)
-{
- return range->lsr_flags & LU_SEQ_RANGE_MASK;
-}
-
-static inline bool fld_range_is_ost(const struct lu_seq_range *range)
-{
- return fld_range_type(range) == LU_SEQ_RANGE_OST;
-}
-
-static inline bool fld_range_is_mdt(const struct lu_seq_range *range)
-{
- return fld_range_type(range) == LU_SEQ_RANGE_MDT;
-}
-
-/**
- * This all range is only being used when fld client sends fld query request,
- * but it does not know whether the seq is MDT or OST, so it will send req
- * with ALL type, which means either seq type gotten from lookup can be
- * expected.
- */
-static inline unsigned fld_range_is_any(const struct lu_seq_range *range)
-{
- return fld_range_type(range) == LU_SEQ_RANGE_ANY;
-}
-
-static inline void fld_range_set_type(struct lu_seq_range *range,
- unsigned flags)
-{
- range->lsr_flags |= flags;
-}
-
-static inline void fld_range_set_mdt(struct lu_seq_range *range)
-{
- fld_range_set_type(range, LU_SEQ_RANGE_MDT);
-}
-
-static inline void fld_range_set_ost(struct lu_seq_range *range)
-{
- fld_range_set_type(range, LU_SEQ_RANGE_OST);
-}
-
-static inline void fld_range_set_any(struct lu_seq_range *range)
-{
- fld_range_set_type(range, LU_SEQ_RANGE_ANY);
-}
-
-/**
- * returns width of given range \a r
- */
-
-static inline __u64 range_space(const struct lu_seq_range *range)
-{
- return range->lsr_end - range->lsr_start;
-}
-
-/**
- * initialize range to zero
- */
-
-static inline void range_init(struct lu_seq_range *range)
-{
- memset(range, 0, sizeof(*range));
-}
-
-/**
- * check if given seq id \a s is within given range \a r
- */
-
-static inline bool range_within(const struct lu_seq_range *range,
- __u64 s)
-{
- return s >= range->lsr_start && s < range->lsr_end;
-}
-
-static inline bool range_is_sane(const struct lu_seq_range *range)
-{
- return (range->lsr_end >= range->lsr_start);
-}
-
-static inline bool range_is_zero(const struct lu_seq_range *range)
-{
- return (range->lsr_start == 0 && range->lsr_end == 0);
-}
-
-static inline bool range_is_exhausted(const struct lu_seq_range *range)
-
-{
- return range_space(range) == 0;
-}
-
-/* return 0 if two range have the same location */
-static inline int range_compare_loc(const struct lu_seq_range *r1,
- const struct lu_seq_range *r2)
-{
- return r1->lsr_index != r2->lsr_index ||
- r1->lsr_flags != r2->lsr_flags;
-}
-
-#define DRANGE "[%#16.16Lx-%#16.16Lx):%x:%s"
-
-#define PRANGE(range) \
- (range)->lsr_start, \
- (range)->lsr_end, \
- (range)->lsr_index, \
- fld_range_is_mdt(range) ? "mdt" : "ost"
-
/** \defgroup lu_fid lu_fid
* @{
*/
@@ -310,7 +185,7 @@ static inline int range_compare_loc(const struct lu_seq_range *r1,
*/
enum lma_compat {
LMAC_HSM = 0x00000001,
- LMAC_SOM = 0x00000002,
+/* LMAC_SOM = 0x00000002, obsolete since 2.8.0 */
LMAC_NOT_IN_OI = 0x00000004, /* the object does NOT need OI mapping */
LMAC_FID_ON_OST = 0x00000008, /* For OST-object, its OI mapping is
* under /O/<seq>/d<x>.
@@ -644,13 +519,14 @@ static inline void ostid_set_id(struct ost_id *oi, __u64 oid)
{
if (fid_seq_is_mdt0(oi->oi.oi_seq)) {
if (oid >= IDIF_MAX_OID) {
- CERROR("Bad %llu to set " DOSTID "\n", oid, POSTID(oi));
+ CERROR("Too large OID %#llx to set MDT0 " DOSTID "\n",
+ oid, POSTID(oi));
return;
}
oi->oi.oi_id = oid;
} else if (fid_is_idif(&oi->oi_fid)) {
if (oid >= IDIF_MAX_OID) {
- CERROR("Bad %llu to set "DOSTID"\n",
+ CERROR("Too large OID %#llx to set IDIF " DOSTID "\n",
oid, POSTID(oi));
return;
}
@@ -676,7 +552,7 @@ static inline int fid_set_id(struct lu_fid *fid, __u64 oid)
if (fid_is_idif(fid)) {
if (oid >= IDIF_MAX_OID) {
- CERROR("Too large OID %#llx to set IDIF "DFID"\n",
+ CERROR("Too large OID %#llx to set IDIF " DFID "\n",
(unsigned long long)oid, PFID(fid));
return -EBADF;
}
@@ -685,7 +561,7 @@ static inline int fid_set_id(struct lu_fid *fid, __u64 oid)
fid->f_ver = oid >> 48;
} else {
if (oid >= OBIF_MAX_OID) {
- CERROR("Too large OID %#llx to set REG "DFID"\n",
+ CERROR("Too large OID %#llx to set REG " DFID "\n",
(unsigned long long)oid, PFID(fid));
return -EBADF;
}
@@ -785,8 +661,6 @@ static inline ino_t lu_igif_ino(const struct lu_fid *fid)
return fid_seq(fid);
}
-void lustre_swab_ost_id(struct ost_id *oid);
-
/**
* Get inode generation from a igif.
* \param fid a igif to get inode generation from.
@@ -847,9 +721,6 @@ static inline bool fid_is_sane(const struct lu_fid *fid)
fid_seq_is_rsvd(fid_seq(fid)));
}
-void lustre_swab_lu_fid(struct lu_fid *fid);
-void lustre_swab_lu_seq_range(struct lu_seq_range *range);
-
static inline bool lu_fid_eq(const struct lu_fid *f0, const struct lu_fid *f1)
{
return memcmp(f0, f1, sizeof(*f0)) == 0;
@@ -1099,8 +970,10 @@ struct ptlrpc_body_v3 {
__u32 pb_version;
__u32 pb_opc;
__u32 pb_status;
- __u64 pb_last_xid;
- __u64 pb_last_seen;
+ __u64 pb_last_xid; /* highest replied XID without lower unreplied XID */
+ __u16 pb_tag; /* virtual slot idx for multiple modifying RPCs */
+ __u16 pb_padding0;
+ __u32 pb_padding1;
__u64 pb_last_committed;
__u64 pb_transno;
__u32 pb_flags;
@@ -1112,8 +985,11 @@ struct ptlrpc_body_v3 {
__u64 pb_slv;
/* VBR: pre-versions */
__u64 pb_pre_versions[PTLRPC_NUM_VERSIONS];
+ __u64 pb_mbits; /**< match bits for bulk request */
/* padding for future needs */
- __u64 pb_padding[4];
+ __u64 pb_padding64_0;
+ __u64 pb_padding64_1;
+ __u64 pb_padding64_2;
char pb_jobid[LUSTRE_JOBID_SIZE];
};
@@ -1125,8 +1001,10 @@ struct ptlrpc_body_v2 {
__u32 pb_version;
__u32 pb_opc;
__u32 pb_status;
- __u64 pb_last_xid;
- __u64 pb_last_seen;
+ __u64 pb_last_xid; /* highest replied XID without lower unreplied XID */
+ __u16 pb_tag; /* virtual slot idx for multiple modifying RPCs */
+ __u16 pb_padding0;
+ __u32 pb_padding1;
__u64 pb_last_committed;
__u64 pb_transno;
__u32 pb_flags;
@@ -1140,12 +1018,13 @@ struct ptlrpc_body_v2 {
__u64 pb_slv;
/* VBR: pre-versions */
__u64 pb_pre_versions[PTLRPC_NUM_VERSIONS];
+ __u64 pb_mbits; /**< unused in V2 */
/* padding for future needs */
- __u64 pb_padding[4];
+ __u64 pb_padding64_0;
+ __u64 pb_padding64_1;
+ __u64 pb_padding64_2;
};
-void lustre_swab_ptlrpc_body(struct ptlrpc_body *pb);
-
/* message body offset for lustre_msg_v2 */
/* ptlrpc body offset in all request/reply messages */
#define MSG_PTLRPC_BODY_OFF 0
@@ -1282,7 +1161,16 @@ void lustre_swab_ptlrpc_body(struct ptlrpc_body *pb);
*/
#define OBD_CONNECT_LFSCK 0x40000000000000ULL/* support online LFSCK */
#define OBD_CONNECT_UNLINK_CLOSE 0x100000000000000ULL/* close file in unlink */
+#define OBD_CONNECT_MULTIMODRPCS 0x200000000000000ULL /* support multiple modify
+ * RPCs in parallel
+ */
#define OBD_CONNECT_DIR_STRIPE 0x400000000000000ULL/* striped DNE dir */
+#define OBD_CONNECT_SUBTREE 0x800000000000000ULL /* fileset mount */
+#define OBD_CONNECT_LOCK_AHEAD 0x1000000000000000ULL /* lock ahead */
+/** bulk matchbits is sent within ptlrpc_body */
+#define OBD_CONNECT_BULK_MBITS 0x2000000000000000ULL
+#define OBD_CONNECT_OBDOPACK 0x4000000000000000ULL /* compact OUT obdo */
+#define OBD_CONNECT_FLAGS2 0x8000000000000000ULL /* second flags word */
/* XXX README XXX:
* Please DO NOT add flag values here before first ensuring that this same
@@ -1313,25 +1201,6 @@ void lustre_swab_ptlrpc_body(struct ptlrpc_body *pb);
* If we eventually have separate connect data for different types, which we
* almost certainly will, then perhaps we stick a union in here.
*/
-struct obd_connect_data_v1 {
- __u64 ocd_connect_flags; /* OBD_CONNECT_* per above */
- __u32 ocd_version; /* lustre release version number */
- __u32 ocd_grant; /* initial cache grant amount (bytes) */
- __u32 ocd_index; /* LOV index to connect to */
- __u32 ocd_brw_size; /* Maximum BRW size in bytes, must be 2^n */
- __u64 ocd_ibits_known; /* inode bits this client understands */
- __u8 ocd_blocksize; /* log2 of the backend filesystem blocksize */
- __u8 ocd_inodespace; /* log2 of the per-inode space consumption */
- __u16 ocd_grant_extent; /* per-extent grant overhead, in 1K blocks */
- __u32 ocd_unused; /* also fix lustre_swab_connect */
- __u64 ocd_transno; /* first transno from client to be replayed */
- __u32 ocd_group; /* MDS group on OST */
- __u32 ocd_cksum_types; /* supported checksum algorithms */
- __u32 ocd_max_easize; /* How big LOV EA can be on MDS */
- __u32 ocd_instance; /* also fix lustre_swab_connect */
- __u64 ocd_maxbytes; /* Maximum stripe size in bytes */
-};
-
struct obd_connect_data {
__u64 ocd_connect_flags; /* OBD_CONNECT_* per above */
__u32 ocd_version; /* lustre release version number */
@@ -1354,8 +1223,10 @@ struct obd_connect_data {
* any field after ocd_maxbytes on the receiver without a valid flag
* may result in out-of-bound memory access and kernel oops.
*/
- __u64 padding1; /* added 2.1.0. also fix lustre_swab_connect */
- __u64 padding2; /* added 2.1.0. also fix lustre_swab_connect */
+ __u16 ocd_maxmodrpcs; /* Maximum modify RPCs in parallel */
+ __u16 padding0; /* added 2.1.0. also fix lustre_swab_connect */
+ __u32 padding1; /* added 2.1.0. also fix lustre_swab_connect */
+ __u64 ocd_connect_flags2;
__u64 padding3; /* added 2.1.0. also fix lustre_swab_connect */
__u64 padding4; /* added 2.1.0. also fix lustre_swab_connect */
__u64 padding5; /* added 2.1.0. also fix lustre_swab_connect */
@@ -1380,8 +1251,6 @@ struct obd_connect_data {
* reserve the flag for future use.
*/
-void lustre_swab_connect(struct obd_connect_data *ocd);
-
/*
* Supported checksum algorithms. Up to 32 checksum types are supported.
* (32-bit mask stored in obd_connect_data::ocd_cksum_types)
@@ -1416,7 +1285,7 @@ enum ost_cmd {
OST_STATFS = 13,
OST_SYNC = 16,
OST_SET_INFO = 17,
- OST_QUOTACHECK = 18,
+ OST_QUOTACHECK = 18, /* not used since 2.4 */
OST_QUOTACTL = 19,
OST_QUOTA_ADJUST_QUNIT = 20, /* not used since 2.4 */
OST_LAST_OPC
@@ -1580,8 +1449,6 @@ static inline void lmm_oi_cpu_to_le(struct ost_id *dst_oi,
dst_oi->oi.oi_seq = cpu_to_le64(src_oi->oi.oi_seq);
}
-/* extern void lustre_swab_lov_mds_md(struct lov_mds_md *llm); */
-
#define MAX_MD_SIZE \
(sizeof(struct lov_mds_md) + 4 * sizeof(struct lov_ost_data))
#define MIN_MD_SIZE \
@@ -1674,7 +1541,7 @@ lov_mds_md_max_stripe_count(size_t buf_size, __u32 lmm_magic)
#define OBD_MD_FLCKSUM (0x00100000ULL) /* bulk data checksum */
#define OBD_MD_FLQOS (0x00200000ULL) /* quality of service stats */
/*#define OBD_MD_FLOSCOPQ (0x00400000ULL) osc opaque data, never used */
-#define OBD_MD_FLCOOKIE (0x00800000ULL) /* log cancellation cookie */
+/* OBD_MD_FLCOOKIE (0x00800000ULL) obsolete in 2.8 */
#define OBD_MD_FLGROUP (0x01000000ULL) /* group */
#define OBD_MD_FLFID (0x02000000ULL) /* ->ost write inline fid */
#define OBD_MD_FLEPOCH (0x04000000ULL) /* ->ost write with ioepoch */
@@ -1713,7 +1580,9 @@ lov_mds_md_max_stripe_count(size_t buf_size, __u32 lmm_magic)
/* OBD_MD_FLRMTRGETFACL (0x0008000000000000ULL) lfs rgetfacl, obsolete */
#define OBD_MD_FLDATAVERSION (0x0010000000000000ULL) /* iversion sum */
-#define OBD_MD_FLRELEASED (0x0020000000000000ULL) /* file released */
+#define OBD_MD_CLOSE_INTENT_EXECED (0x0020000000000000ULL) /* close intent
+ * executed
+ */
#define OBD_MD_DEFAULT_MEA (0x0040000000000000ULL) /* default MEA */
@@ -1742,11 +1611,6 @@ struct hsm_state_set {
__u64 hss_clearmask;
};
-void lustre_swab_hsm_user_state(struct hsm_user_state *hus);
-void lustre_swab_hsm_state_set(struct hsm_state_set *hss);
-
-void lustre_swab_obd_statfs(struct obd_statfs *os);
-
/* ost_body.data values for OST_BRW */
#define OBD_BRW_READ 0x01
@@ -1786,14 +1650,16 @@ struct obd_ioobj {
__u32 ioo_bufcnt; /* number of niobufs for this object */
};
+/*
+ * NOTE: IOOBJ_MAX_BRW_BITS defines the _offset_ of the max_brw field in
+ * ioo_max_brw, NOT the maximum number of bits in PTLRPC_BULK_OPS_BITS.
+ * That said, ioo_max_brw is a 32-bit field so the limit is also 16 bits.
+ */
#define IOOBJ_MAX_BRW_BITS 16
-#define IOOBJ_TYPE_MASK ((1U << IOOBJ_MAX_BRW_BITS) - 1)
#define ioobj_max_brw_get(ioo) (((ioo)->ioo_max_brw >> IOOBJ_MAX_BRW_BITS) + 1)
#define ioobj_max_brw_set(ioo, num) \
do { (ioo)->ioo_max_brw = ((num) - 1) << IOOBJ_MAX_BRW_BITS; } while (0)
-void lustre_swab_obd_ioobj(struct obd_ioobj *ioo);
-
/* multiple of 8 bytes => can array */
struct niobuf_remote {
__u64 rnb_offset;
@@ -1801,8 +1667,6 @@ struct niobuf_remote {
__u32 rnb_flags;
};
-void lustre_swab_niobuf_remote(struct niobuf_remote *nbr);
-
/* lock value block communicated between the filter and llite */
/* OST_LVB_ERR_INIT is needed because the return code in rc is
@@ -1824,8 +1688,6 @@ struct ost_lvb_v1 {
__u64 lvb_blocks;
};
-void lustre_swab_ost_lvb_v1(struct ost_lvb_v1 *lvb);
-
struct ost_lvb {
__u64 lvb_size;
__s64 lvb_mtime;
@@ -1838,8 +1700,6 @@ struct ost_lvb {
__u32 lvb_padding;
};
-void lustre_swab_ost_lvb(struct ost_lvb *lvb);
-
/*
* lquota data structures
*/
@@ -1866,8 +1726,6 @@ struct obd_quotactl {
struct obd_dqblk qc_dqblk;
};
-void lustre_swab_obd_quotactl(struct obd_quotactl *q);
-
#define Q_COPY(out, in, member) (out)->member = (in)->member
#define QCTL_COPY(out, in) \
@@ -1905,8 +1763,6 @@ struct lquota_lvb {
__u64 lvb_pad1;
};
-void lustre_swab_lquota_lvb(struct lquota_lvb *lvb);
-
/* op codes */
enum quota_cmd {
QUOTA_DQACQ = 601,
@@ -1933,9 +1789,9 @@ enum mds_cmd {
MDS_PIN = 42, /* obsolete, never used in a release */
MDS_UNPIN = 43, /* obsolete, never used in a release */
MDS_SYNC = 44,
- MDS_DONE_WRITING = 45,
+ MDS_DONE_WRITING = 45, /* obsolete since 2.8.0 */
MDS_SET_INFO = 46,
- MDS_QUOTACHECK = 47,
+ MDS_QUOTACHECK = 47, /* not used since 2.4 */
MDS_QUOTACTL = 48,
MDS_GETXATTR = 49,
MDS_SETXATTR = 50, /* obsolete, now it's MDS_REINT op */
@@ -1972,8 +1828,6 @@ enum mdt_reint_cmd {
REINT_MAX
};
-void lustre_swab_generic_32s(__u32 *val);
-
/* the disposition of the intent outlines what was executed */
#define DISP_IT_EXECD 0x00000001
#define DISP_LOOKUP_EXECD 0x00000002
@@ -2031,36 +1885,19 @@ enum {
#define MDS_STATUS_CONN 1
#define MDS_STATUS_LOV 2
-/* mdt_thread_info.mti_flags. */
-enum md_op_flags {
- /* The flag indicates Size-on-MDS attributes are changed. */
- MF_SOM_CHANGE = (1 << 0),
- /* Flags indicates an epoch opens or closes. */
- MF_EPOCH_OPEN = (1 << 1),
- MF_EPOCH_CLOSE = (1 << 2),
- MF_MDC_CANCEL_FID1 = (1 << 3),
- MF_MDC_CANCEL_FID2 = (1 << 4),
- MF_MDC_CANCEL_FID3 = (1 << 5),
- MF_MDC_CANCEL_FID4 = (1 << 6),
- /* There is a pending attribute update. */
- MF_SOM_AU = (1 << 7),
- /* Cancel OST locks while getattr OST attributes. */
- MF_GETATTR_LOCK = (1 << 8),
- MF_GET_MDT_IDX = (1 << 9),
-};
-
-#define MF_SOM_LOCAL_FLAGS (MF_SOM_CHANGE | MF_EPOCH_OPEN | MF_EPOCH_CLOSE)
-
-#define LUSTRE_BFLAG_UNCOMMITTED_WRITES 0x1
-
/* these should be identical to their EXT4_*_FL counterparts, they are
* redefined here only to avoid dragging in fs/ext4/ext4.h
*/
#define LUSTRE_SYNC_FL 0x00000008 /* Synchronous updates */
#define LUSTRE_IMMUTABLE_FL 0x00000010 /* Immutable file */
#define LUSTRE_APPEND_FL 0x00000020 /* writes to file may only append */
+#define LUSTRE_NODUMP_FL 0x00000040 /* do not dump file */
#define LUSTRE_NOATIME_FL 0x00000080 /* do not update atime */
+#define LUSTRE_INDEX_FL 0x00001000 /* hash-indexed directory */
#define LUSTRE_DIRSYNC_FL 0x00010000 /* dirsync behaviour (dir only) */
+#define LUSTRE_TOPDIR_FL 0x00020000 /* Top of directory hierarchies*/
+#define LUSTRE_DIRECTIO_FL 0x00100000 /* Use direct i/o */
+#define LUSTRE_INLINE_DATA_FL 0x10000000 /* Inode has inline data. */
/* Convert wire LUSTRE_*_FL to corresponding client local VFS S_* values
* for the client inode i_flags. The LUSTRE_*_FL are the Lustre wire
@@ -2113,7 +1950,7 @@ struct mdt_body {
__u32 mbo_mode;
__u32 mbo_uid;
__u32 mbo_gid;
- __u32 mbo_flags;
+ __u32 mbo_flags; /* LUSTRE_*_FL file attributes */
__u32 mbo_rdev;
__u32 mbo_nlink; /* #bytes to read in the case of MDS_READPAGE */
__u32 mbo_unused2; /* was "generation" until 2.4.0 */
@@ -2121,7 +1958,7 @@ struct mdt_body {
__u32 mbo_eadatasize;
__u32 mbo_aclsize;
__u32 mbo_max_mdsize;
- __u32 mbo_max_cookiesize;
+ __u32 mbo_unused3; /* was max_cookiesize until 2.8 */
__u32 mbo_uid_h; /* high 32-bits of uid, for FUID */
__u32 mbo_gid_h; /* high 32-bits of gid, for FUID */
__u32 mbo_padding_5; /* also fix lustre_swab_mdt_body */
@@ -2132,17 +1969,13 @@ struct mdt_body {
__u64 mbo_padding_10;
}; /* 216 */
-void lustre_swab_mdt_body(struct mdt_body *b);
-
struct mdt_ioepoch {
- struct lustre_handle handle;
- __u64 ioepoch;
- __u32 flags;
- __u32 padding;
+ struct lustre_handle mio_handle;
+ __u64 mio_unused1; /* was ioepoch */
+ __u32 mio_unused2; /* was flags */
+ __u32 mio_padding;
};
-void lustre_swab_mdt_ioepoch(struct mdt_ioepoch *b);
-
/* permissions for md_perm.mp_perm */
enum {
CFS_SETUID_PERM = 0x01,
@@ -2178,8 +2011,6 @@ struct mdt_rec_setattr {
__u32 sa_padding_5;
};
-void lustre_swab_mdt_rec_setattr(struct mdt_rec_setattr *sa);
-
/*
* Attribute flags used in mdt_rec_setattr::sa_valid.
* The kernel's #defines for ATTR_* should not be used over the network
@@ -2207,12 +2038,9 @@ void lustre_swab_mdt_rec_setattr(struct mdt_rec_setattr *sa);
#define MDS_FMODE_CLOSED 00000000
#define MDS_FMODE_EXEC 00000004
-/* IO Epoch is opened on a closed file. */
-#define MDS_FMODE_EPOCH 01000000
-/* IO Epoch is opened on a file truncate. */
-#define MDS_FMODE_TRUNC 02000000
-/* Size-on-MDS Attribute Update is pending. */
-#define MDS_FMODE_SOM 04000000
+/* MDS_FMODE_EPOCH 01000000 obsolete since 2.8.0 */
+/* MDS_FMODE_TRUNC 02000000 obsolete since 2.8.0 */
+/* MDS_FMODE_SOM 04000000 obsolete since 2.8.0 */
#define MDS_OPEN_CREATED 00000010
#define MDS_OPEN_CROSS 00000020
@@ -2258,7 +2086,7 @@ enum mds_op_bias {
MDS_CROSS_REF = 1 << 1,
MDS_VTX_BYPASS = 1 << 2,
MDS_PERM_BYPASS = 1 << 3,
- MDS_SOM = 1 << 4,
+/* MDS_SOM = 1 << 4, obsolete since 2.8.0 */
MDS_QUOTA_IGNORE = 1 << 5,
MDS_CLOSE_CLEANUP = 1 << 6,
MDS_KEEP_ORPHAN = 1 << 7,
@@ -2268,6 +2096,7 @@ enum mds_op_bias {
MDS_OWNEROVERRIDE = 1 << 11,
MDS_HSM_RELEASE = 1 << 12,
MDS_RENAME_MIGRATE = BIT(13),
+ MDS_CLOSE_LAYOUT_SWAP = BIT(14),
};
/* instance of mdt_reint_rec */
@@ -2456,8 +2285,6 @@ struct mdt_rec_reint {
__u32 rr_padding_4; /* also fix lustre_swab_mdt_rec_reint */
};
-void lustre_swab_mdt_rec_reint(struct mdt_rec_reint *rr);
-
/* lmv structures */
struct lmv_desc {
__u32 ld_tgt_count; /* how many MDS's */
@@ -2547,8 +2374,6 @@ union lmv_mds_md {
struct lmv_user_md lmv_user_md;
};
-void lustre_swab_lmv_mds_md(union lmv_mds_md *lmm);
-
static inline ssize_t lmv_mds_md_size(int stripe_count, unsigned int lmm_magic)
{
ssize_t len = -EINVAL;
@@ -2652,8 +2477,6 @@ struct lov_desc {
#define ld_magic ld_active_tgt_count /* for swabbing from llogs */
-void lustre_swab_lov_desc(struct lov_desc *ld);
-
/*
* LDLM requests:
*/
@@ -2749,24 +2572,38 @@ struct ldlm_flock_wire {
* on the resource type.
*/
-typedef union {
+union ldlm_wire_policy_data {
struct ldlm_extent l_extent;
struct ldlm_flock_wire l_flock;
struct ldlm_inodebits l_inodebits;
-} ldlm_wire_policy_data_t;
+};
union ldlm_gl_desc {
struct ldlm_gl_lquota_desc lquota_desc;
};
-void lustre_swab_gl_desc(union ldlm_gl_desc *);
+enum ldlm_intent_flags {
+ IT_OPEN = BIT(0),
+ IT_CREAT = BIT(1),
+ IT_OPEN_CREAT = BIT(1) | BIT(0),
+ IT_READDIR = BIT(2),
+ IT_GETATTR = BIT(3),
+ IT_LOOKUP = BIT(4),
+ IT_UNLINK = BIT(5),
+ IT_TRUNC = BIT(6),
+ IT_GETXATTR = BIT(7),
+ IT_EXEC = BIT(8),
+ IT_PIN = BIT(9),
+ IT_LAYOUT = BIT(10),
+ IT_QUOTA_DQACQ = BIT(11),
+ IT_QUOTA_CONN = BIT(12),
+ IT_SETXATTR = BIT(13),
+};
struct ldlm_intent {
__u64 opc;
};
-void lustre_swab_ldlm_intent(struct ldlm_intent *i);
-
struct ldlm_resource_desc {
enum ldlm_type lr_type;
__u32 lr_padding; /* also fix lustre_swab_ldlm_resource_desc */
@@ -2777,7 +2614,7 @@ struct ldlm_lock_desc {
struct ldlm_resource_desc l_resource;
enum ldlm_mode l_req_mode;
enum ldlm_mode l_granted_mode;
- ldlm_wire_policy_data_t l_policy_data;
+ union ldlm_wire_policy_data l_policy_data;
};
#define LDLM_LOCKREQ_HANDLES 2
@@ -2790,8 +2627,6 @@ struct ldlm_request {
struct lustre_handle lock_handle[LDLM_LOCKREQ_HANDLES];
};
-void lustre_swab_ldlm_request(struct ldlm_request *rq);
-
/* If LDLM_ENQUEUE, 1 slot is already occupied, 1 is available.
* Otherwise, 2 are available.
*/
@@ -2813,8 +2648,6 @@ struct ldlm_reply {
__u64 lock_policy_res2;
};
-void lustre_swab_ldlm_reply(struct ldlm_reply *r);
-
#define ldlm_flags_to_wire(flags) ((__u32)(flags))
#define ldlm_flags_from_wire(flags) ((__u64)(flags))
@@ -2858,8 +2691,6 @@ struct mgs_target_info {
char mti_params[MTI_PARAM_MAXLEN];
};
-void lustre_swab_mgs_target_info(struct mgs_target_info *oinfo);
-
struct mgs_nidtbl_entry {
__u64 mne_version; /* table version of this entry */
__u32 mne_instance; /* target instance # */
@@ -2874,8 +2705,6 @@ struct mgs_nidtbl_entry {
} u;
};
-void lustre_swab_mgs_nidtbl_entry(struct mgs_nidtbl_entry *oinfo);
-
struct mgs_config_body {
char mcb_name[MTI_NAME_MAXLEN]; /* logname */
__u64 mcb_offset; /* next index of config log to request */
@@ -2885,15 +2714,11 @@ struct mgs_config_body {
__u32 mcb_units; /* # of units for bulk transfer */
};
-void lustre_swab_mgs_config_body(struct mgs_config_body *body);
-
struct mgs_config_res {
__u64 mcr_offset; /* index of last config log */
__u64 mcr_size; /* size of the log */
};
-void lustre_swab_mgs_config_res(struct mgs_config_res *body);
-
/* Config marker flags (in config log) */
#define CM_START 0x01
#define CM_END 0x02
@@ -2913,8 +2738,6 @@ struct cfg_marker {
char cm_comment[MTI_NAME_MAXLEN];
};
-void lustre_swab_cfg_marker(struct cfg_marker *marker, int swab, int size);
-
/*
* Opcodes for multiple servers.
*/
@@ -2922,7 +2745,7 @@ void lustre_swab_cfg_marker(struct cfg_marker *marker, int swab, int size);
enum obd_cmd {
OBD_PING = 400,
OBD_LOG_CANCEL,
- OBD_QC_CALLBACK,
+ OBD_QC_CALLBACK, /* not used since 2.4 */
OBD_IDX_READ,
OBD_LAST_OPC
};
@@ -3155,23 +2978,32 @@ struct llog_gen_rec {
struct llog_rec_tail lgr_tail;
};
-/* On-disk header structure of each log object, stored in little endian order */
-#define LLOG_CHUNK_SIZE 8192
-#define LLOG_HEADER_SIZE (96)
-#define LLOG_BITMAP_BYTES (LLOG_CHUNK_SIZE - LLOG_HEADER_SIZE)
-
-#define LLOG_MIN_REC_SIZE (24) /* round(llog_rec_hdr + llog_rec_tail) */
-
/* flags for the logs */
enum llog_flag {
LLOG_F_ZAP_WHEN_EMPTY = 0x1,
LLOG_F_IS_CAT = 0x2,
LLOG_F_IS_PLAIN = 0x4,
LLOG_F_EXT_JOBID = BIT(3),
+ LLOG_F_IS_FIXSIZE = BIT(4),
+ /*
+ * Note: Flags covered by LLOG_F_EXT_MASK will be inherited from
+ * catlog to plain log, so do not add LLOG_F_IS_FIXSIZE here,
+ * because the catlog record is usually fixed size, but its plain
+ * log record can be variable
+ */
LLOG_F_EXT_MASK = LLOG_F_EXT_JOBID,
};
+/* On-disk header structure of each log object, stored in little endian order */
+#define LLOG_MIN_CHUNK_SIZE 8192
+#define LLOG_HEADER_SIZE (96) /* sizeof (llog_log_hdr) +
+ * sizeof(llh_tail) - sizeof(llh_bitmap)
+ */
+#define LLOG_BITMAP_BYTES (LLOG_MIN_CHUNK_SIZE - LLOG_HEADER_SIZE)
+#define LLOG_MIN_REC_SIZE (24) /* round(llog_rec_hdr + llog_rec_tail) */
+
+/* flags for the logs */
struct llog_log_hdr {
struct llog_rec_hdr llh_hdr;
__s64 llh_timestamp;
@@ -3183,13 +3015,30 @@ struct llog_log_hdr {
/* for a catalog the first plain slot is next to it */
struct obd_uuid llh_tgtuuid;
__u32 llh_reserved[LLOG_HEADER_SIZE / sizeof(__u32) - 23];
+ /* These fields must always be at the end of the llog_log_hdr.
+ * Note: llh_bitmap size is variable because llog chunk size could be
+ * bigger than LLOG_MIN_CHUNK_SIZE, i.e. sizeof(llog_log_hdr) > 8192
+ * bytes, and the real size is stored in llh_hdr.lrh_len, which means
+ * llh_tail should only be referred by LLOG_HDR_TAIL().
+ * But this structure is also used by client/server llog interface
+ * (see llog_client.c), it will be kept in its original way to avoid
+ * compatibility issue.
+ */
__u32 llh_bitmap[LLOG_BITMAP_BYTES / sizeof(__u32)];
struct llog_rec_tail llh_tail;
} __packed;
-#define LLOG_BITMAP_SIZE(llh) (__u32)((llh->llh_hdr.lrh_len - \
- llh->llh_bitmap_offset - \
- sizeof(llh->llh_tail)) * 8)
+#undef LLOG_HEADER_SIZE
+#undef LLOG_BITMAP_BYTES
+
+#define LLOG_HDR_BITMAP_SIZE(llh) (__u32)((llh->llh_hdr.lrh_len - \
+ llh->llh_bitmap_offset - \
+ sizeof(llh->llh_tail)) * 8)
+#define LLOG_HDR_BITMAP(llh) (__u32 *)((char *)(llh) + \
+ (llh)->llh_bitmap_offset)
+#define LLOG_HDR_TAIL(llh) ((struct llog_rec_tail *)((char *)llh + \
+ llh->llh_hdr.lrh_len - \
+ sizeof(llh->llh_tail)))
/** log cookies are used to reference a specific log file and a record
* therein
@@ -3259,7 +3108,8 @@ struct obdo {
__u32 o_parent_ver;
struct lustre_handle o_handle; /* brw: lock handle to prolong locks
*/
- struct llog_cookie o_lcookie; /* destroy: unlink cookie from MDS
+ struct llog_cookie o_lcookie; /* destroy: unlink cookie from MDS,
+ * obsolete in 2.8, reused in OSP
*/
__u32 o_uid_h;
__u32 o_gid_h;
@@ -3333,30 +3183,11 @@ struct ost_body {
/* Key for FIEMAP to be used in get_info calls */
struct ll_fiemap_info_key {
- char name[8];
- struct obdo oa;
- struct ll_user_fiemap fiemap;
+ char lfik_name[8];
+ struct obdo lfik_oa;
+ struct fiemap lfik_fiemap;
};
-void lustre_swab_ost_body(struct ost_body *b);
-void lustre_swab_ost_last_id(__u64 *id);
-void lustre_swab_fiemap(struct ll_user_fiemap *fiemap);
-
-void lustre_swab_lov_user_md_v1(struct lov_user_md_v1 *lum);
-void lustre_swab_lov_user_md_v3(struct lov_user_md_v3 *lum);
-void lustre_swab_lov_user_md_objects(struct lov_user_ost_data *lod,
- int stripe_count);
-void lustre_swab_lov_mds_md(struct lov_mds_md *lmm);
-
-/* llog_swab.c */
-void lustre_swab_llogd_body(struct llogd_body *d);
-void lustre_swab_llog_hdr(struct llog_log_hdr *h);
-void lustre_swab_llogd_conn_body(struct llogd_conn_body *d);
-void lustre_swab_llog_rec(struct llog_rec_hdr *rec);
-
-struct lustre_cfg;
-void lustre_swab_lustre_cfg(struct lustre_cfg *lcfg);
-
/* Functions for dumping PTLRPC fields */
void dump_rniobuf(struct niobuf_remote *rnb);
void dump_ioo(struct obd_ioobj *nb);
@@ -3394,8 +3225,6 @@ struct lustre_capa {
__u8 lc_hmac[CAPA_HMAC_MAX_LEN]; /** HMAC */
} __packed;
-void lustre_swab_lustre_capa(struct lustre_capa *c);
-
/** lustre_capa::lc_opc */
enum {
CAPA_OPC_BODY_WRITE = 1 << 0, /**< write object data */
@@ -3458,8 +3287,6 @@ struct getinfo_fid2path {
char gf_path[0];
} __packed;
-void lustre_swab_fid2path(struct getinfo_fid2path *gf);
-
/** path2parent request/reply structures */
struct getparent {
struct lu_fid gp_fid; /**< parent FID */
@@ -3486,8 +3313,6 @@ struct layout_intent {
__u64 li_end;
};
-void lustre_swab_layout_intent(struct layout_intent *li);
-
/**
* On the wire version of hsm_progress structure.
*
@@ -3506,13 +3331,6 @@ struct hsm_progress_kernel {
__u64 hpk_padding2;
} __packed;
-void lustre_swab_hsm_user_state(struct hsm_user_state *hus);
-void lustre_swab_hsm_current_action(struct hsm_current_action *action);
-void lustre_swab_hsm_progress_kernel(struct hsm_progress_kernel *hpk);
-void lustre_swab_hsm_user_state(struct hsm_user_state *hus);
-void lustre_swab_hsm_user_item(struct hsm_user_item *hui);
-void lustre_swab_hsm_request(struct hsm_request *hr);
-
/** layout swap request structure
* fid1 and fid2 are in mdt_body
*/
@@ -3520,8 +3338,6 @@ struct mdc_swap_layouts {
__u64 msl_flags;
} __packed;
-void lustre_swab_swap_layouts(struct mdc_swap_layouts *msl);
-
struct close_data {
struct lustre_handle cd_handle;
struct lu_fid cd_fid;
@@ -3529,7 +3345,5 @@ struct close_data {
__u64 cd_reserved[8];
};
-void lustre_swab_close_data(struct close_data *data);
-
#endif
/** @} lustreidl */
diff --git a/drivers/staging/lustre/lustre/include/lustre/lustre_ioctl.h b/drivers/staging/lustre/lustre/include/lustre/lustre_ioctl.h
index f3d7c94c3b50..eb08df33b2db 100644
--- a/drivers/staging/lustre/lustre/include/lustre/lustre_ioctl.h
+++ b/drivers/staging/lustre/lustre/include/lustre/lustre_ioctl.h
@@ -363,8 +363,8 @@ obd_ioctl_unpack(struct obd_ioctl_data *data, char *pbuf, int max_len)
/* OBD_IOC_LOV_GETSTRIPE 155 LL_IOC_LOV_GETSTRIPE */
/* OBD_IOC_LOV_SETEA 156 LL_IOC_LOV_SETEA */
/* lustre/lustre_user.h 157-159 */
-#define OBD_IOC_QUOTACHECK _IOW('f', 160, int)
-#define OBD_IOC_POLL_QUOTACHECK _IOR('f', 161, struct if_quotacheck *)
+/* OBD_IOC_QUOTACHECK _IOW('f', 160, int) */
+/* OBD_IOC_POLL_QUOTACHECK _IOR('f', 161, struct if_quotacheck *) */
#define OBD_IOC_QUOTACTL _IOWR('f', 162, struct if_quotactl)
/* lustre/lustre_user.h 163-176 */
#define OBD_IOC_CHANGELOG_REG _IOW('f', 177, struct obd_ioctl_data)
diff --git a/drivers/staging/lustre/lustre/include/lustre/lustre_user.h b/drivers/staging/lustre/lustre/include/lustre/lustre_user.h
index 6fc985571cba..3301ad652db1 100644
--- a/drivers/staging/lustre/lustre/include/lustre/lustre_user.h
+++ b/drivers/staging/lustre/lustre/include/lustre/lustre_user.h
@@ -63,9 +63,13 @@
#if __BITS_PER_LONG != 64 || defined(__ARCH_WANT_STAT64)
typedef struct stat64 lstat_t;
#define lstat_f lstat64
+#define fstat_f fstat64
+#define fstatat_f fstatat64
#else
typedef struct stat lstat_t;
#define lstat_f lstat
+#define fstat_f fstat
+#define fstatat_f fstatat
#endif
#define HAVE_LOV_USER_MDS_DATA
@@ -82,7 +86,6 @@ typedef struct stat lstat_t;
#define FSFILT_IOC_SETVERSION _IOW('f', 4, long)
#define FSFILT_IOC_GETVERSION_OLD _IOR('v', 1, long)
#define FSFILT_IOC_SETVERSION_OLD _IOW('v', 2, long)
-#define FSFILT_IOC_FIEMAP _IOWR('f', 11, struct ll_user_fiemap)
#endif
/* FIEMAP flags supported by Lustre */
@@ -235,7 +238,7 @@ struct ost_id {
/* #define LL_IOC_POLL_QUOTACHECK 161 OBD_IOC_POLL_QUOTACHECK */
/* #define LL_IOC_QUOTACTL 162 OBD_IOC_QUOTACTL */
#define IOC_OBD_STATFS _IOWR('f', 164, struct obd_statfs *)
-#define IOC_LOV_GETINFO _IOWR('f', 165, struct lov_user_mds_data *)
+/* IOC_LOV_GETINFO 165 obsolete */
#define LL_IOC_FLUSHCTX _IOW('f', 166, long)
/* LL_IOC_RMTACL 167 obsolete */
#define LL_IOC_GETOBDCOUNT _IOR('f', 168, long)
@@ -343,6 +346,9 @@ enum ll_lease_type {
#define LOV_ALL_STRIPES 0xffff /* only valid for directories */
#define LOV_V1_INSANE_STRIPE_COUNT 65532 /* maximum stripe count bz13933 */
+#define XATTR_LUSTRE_PREFIX "lustre."
+#define XATTR_LUSTRE_LOV "lustre.lov"
+
#define lov_user_ost_data lov_user_ost_data_v1
struct lov_user_ost_data_v1 { /* per-stripe data structure */
struct ost_id l_ost_oi; /* OST object ID */
@@ -451,8 +457,6 @@ static inline int lmv_user_md_size(int stripes, int lmm_magic)
stripes * sizeof(struct lmv_user_mds_data);
}
-void lustre_swab_lmv_user_md(struct lmv_user_md *lum);
-
struct ll_recreate_obj {
__u64 lrc_id;
__u32 lrc_ost_idx;
@@ -522,25 +526,20 @@ static inline void obd_uuid2fsname(char *buf, char *uuid, int buflen)
}
/* printf display format
- * e.g. printf("file FID is "DFID"\n", PFID(fid));
+ * * usage: printf("file FID is "DFID"\n", PFID(fid));
*/
#define FID_NOBRACE_LEN 40
#define FID_LEN (FID_NOBRACE_LEN + 2)
#define DFID_NOBRACE "%#llx:0x%x:0x%x"
#define DFID "["DFID_NOBRACE"]"
-#define PFID(fid) \
- (fid)->f_seq, \
- (fid)->f_oid, \
- (fid)->f_ver
+#define PFID(fid) (unsigned long long)(fid)->f_seq, (fid)->f_oid, (fid)->f_ver
-/* scanf input parse format -- strip '[' first.
- * e.g. sscanf(fidstr, SFID, RFID(&fid));
+/* scanf input parse format for fids in DFID_NOBRACE format
+ * Need to strip '[' from DFID format first or use "["SFID"]" at caller.
+ * usage: sscanf(fidstr, SFID, RFID(&fid));
*/
#define SFID "0x%llx:0x%x:0x%x"
-#define RFID(fid) \
- &((fid)->f_seq), \
- &((fid)->f_oid), \
- &((fid)->f_ver)
+#define RFID(fid) &((fid)->f_seq), &((fid)->f_oid), &((fid)->f_ver)
/********* Quotas **********/
@@ -551,23 +550,18 @@ static inline void obd_uuid2fsname(char *buf, char *uuid, int buflen)
#define Q_FINVALIDATE 0x800104 /* deprecated as of 2.4 */
/* these must be explicitly translated into linux Q_* in ll_dir_ioctl */
-#define LUSTRE_Q_QUOTAON 0x800002 /* turn quotas on */
-#define LUSTRE_Q_QUOTAOFF 0x800003 /* turn quotas off */
+#define LUSTRE_Q_QUOTAON 0x800002 /* deprecated as of 2.4 */
+#define LUSTRE_Q_QUOTAOFF 0x800003 /* deprecated as of 2.4 */
#define LUSTRE_Q_GETINFO 0x800005 /* get information about quota files */
#define LUSTRE_Q_SETINFO 0x800006 /* set information about quota files */
#define LUSTRE_Q_GETQUOTA 0x800007 /* get user quota structure */
#define LUSTRE_Q_SETQUOTA 0x800008 /* set user quota structure */
/* lustre-specific control commands */
-#define LUSTRE_Q_INVALIDATE 0x80000b /* invalidate quota data */
-#define LUSTRE_Q_FINVALIDATE 0x80000c /* invalidate filter quota data */
+#define LUSTRE_Q_INVALIDATE 0x80000b /* deprecated as of 2.4 */
+#define LUSTRE_Q_FINVALIDATE 0x80000c /* deprecated as of 2.4 */
#define UGQUOTA 2 /* set both USRQUOTA and GRPQUOTA */
-struct if_quotacheck {
- char obd_type[16];
- struct obd_uuid obd_uuid;
-};
-
#define IDENTITY_DOWNCALL_MAGIC 0x6d6dd629
/* permission */
@@ -649,6 +643,7 @@ struct if_quotactl {
#define SWAP_LAYOUTS_CHECK_DV2 (1 << 1)
#define SWAP_LAYOUTS_KEEP_MTIME (1 << 2)
#define SWAP_LAYOUTS_KEEP_ATIME (1 << 3)
+#define SWAP_LAYOUTS_CLOSE BIT(4)
/* Swap XATTR_NAME_HSM as well, only on the MDT so far */
#define SWAP_LAYOUTS_MDS_HSM (1 << 31)
@@ -999,6 +994,7 @@ struct ioc_data_version {
* See HSM_FLAGS below.
*/
enum hsm_states {
+ HS_NONE = 0x00000000,
HS_EXISTS = 0x00000001,
HS_DIRTY = 0x00000002,
HS_RELEASED = 0x00000004,
diff --git a/drivers/staging/lustre/lustre/include/lustre_compat.h b/drivers/staging/lustre/lustre/include/lustre_compat.h
index 567c438e93cb..300e96fb032a 100644
--- a/drivers/staging/lustre/lustre/include/lustre_compat.h
+++ b/drivers/staging/lustre/lustre/include/lustre_compat.h
@@ -74,4 +74,6 @@
# define ext2_find_next_zero_bit find_next_zero_bit_le
#endif
+#define TIMES_SET_FLAGS (ATTR_MTIME_SET | ATTR_ATIME_SET | ATTR_TIMES_SET)
+
#endif /* _LUSTRE_COMPAT_H */
diff --git a/drivers/staging/lustre/lustre/include/lustre_dlm.h b/drivers/staging/lustre/lustre/include/lustre_dlm.h
index d03534432624..b7e61d082e55 100644
--- a/drivers/staging/lustre/lustre/include/lustre_dlm.h
+++ b/drivers/staging/lustre/lustre/include/lustre_dlm.h
@@ -59,7 +59,7 @@ struct obd_device;
#define OBD_LDLM_DEVICENAME "ldlm"
#define LDLM_DEFAULT_LRU_SIZE (100 * num_online_cpus())
-#define LDLM_DEFAULT_MAX_ALIVE (cfs_time_seconds(36000))
+#define LDLM_DEFAULT_MAX_ALIVE (cfs_time_seconds(3900)) /* 65 min */
#define LDLM_DEFAULT_PARALLEL_AST_LIMIT 1024
/**
@@ -86,10 +86,10 @@ enum ldlm_error {
* decisions about lack of conflicts or do any autonomous lock granting without
* first speaking to a server.
*/
-typedef enum {
+enum ldlm_side {
LDLM_NAMESPACE_SERVER = 1 << 0,
LDLM_NAMESPACE_CLIENT = 1 << 1
-} ldlm_side_t;
+};
/**
* The blocking callback is overloaded to perform two functions. These flags
@@ -359,7 +359,7 @@ struct ldlm_namespace {
struct obd_device *ns_obd;
/** Flag indicating if namespace is on client instead of server */
- ldlm_side_t ns_client;
+ enum ldlm_side ns_client;
/** Resource hash table for namespace. */
struct cfs_hash *ns_rs_hash;
@@ -550,20 +550,18 @@ struct ldlm_flock {
__u64 owner;
__u64 blocking_owner;
struct obd_export *blocking_export;
- /* Protected by the hash lock */
- __u32 blocking_refs;
__u32 pid;
};
-typedef union {
+union ldlm_policy_data {
struct ldlm_extent l_extent;
struct ldlm_flock l_flock;
struct ldlm_inodebits l_inodebits;
-} ldlm_policy_data_t;
+};
void ldlm_convert_policy_to_local(struct obd_export *exp, enum ldlm_type type,
- const ldlm_wire_policy_data_t *wpolicy,
- ldlm_policy_data_t *lpolicy);
+ const union ldlm_wire_policy_data *wpolicy,
+ union ldlm_policy_data *lpolicy);
enum lvb_type {
LVB_T_NONE = 0,
@@ -692,7 +690,7 @@ struct ldlm_lock {
* Representation of private data specific for a lock type.
* Examples are: extent range for extent lock or bitmask for ibits locks
*/
- ldlm_policy_data_t l_policy_data;
+ union ldlm_policy_data l_policy_data;
/**
* Lock state flags. Protected by lr_lock.
@@ -967,8 +965,8 @@ struct ldlm_ast_work {
* Common ldlm_enqueue parameters
*/
struct ldlm_enqueue_info {
- __u32 ei_type; /** Type of the lock being enqueued. */
- __u32 ei_mode; /** Mode of the lock being enqueued. */
+ enum ldlm_type ei_type; /** Type of the lock being enqueued. */
+ enum ldlm_mode ei_mode; /** Mode of the lock being enqueued. */
void *ei_cb_bl; /** blocking lock callback */
void *ei_cb_cp; /** lock completion callback */
void *ei_cb_gl; /** lock glimpse callback */
@@ -979,7 +977,7 @@ struct ldlm_enqueue_info {
extern struct obd_ops ldlm_obd_ops;
extern char *ldlm_lockname[];
-char *ldlm_it2str(int it);
+const char *ldlm_it2str(enum ldlm_intent_flags it);
/**
* Just a fancy CDEBUG call with log level preset to LDLM_DEBUG.
@@ -1168,16 +1166,18 @@ do { \
struct ldlm_lock *ldlm_lock_get(struct ldlm_lock *lock);
void ldlm_lock_put(struct ldlm_lock *lock);
void ldlm_lock2desc(struct ldlm_lock *lock, struct ldlm_lock_desc *desc);
-void ldlm_lock_addref(const struct lustre_handle *lockh, __u32 mode);
-int ldlm_lock_addref_try(const struct lustre_handle *lockh, __u32 mode);
-void ldlm_lock_decref(const struct lustre_handle *lockh, __u32 mode);
-void ldlm_lock_decref_and_cancel(const struct lustre_handle *lockh, __u32 mode);
+void ldlm_lock_addref(const struct lustre_handle *lockh, enum ldlm_mode mode);
+int ldlm_lock_addref_try(const struct lustre_handle *lockh,
+ enum ldlm_mode mode);
+void ldlm_lock_decref(const struct lustre_handle *lockh, enum ldlm_mode mode);
+void ldlm_lock_decref_and_cancel(const struct lustre_handle *lockh,
+ enum ldlm_mode mode);
void ldlm_lock_fail_match_locked(struct ldlm_lock *lock);
void ldlm_lock_allow_match(struct ldlm_lock *lock);
void ldlm_lock_allow_match_locked(struct ldlm_lock *lock);
enum ldlm_mode ldlm_lock_match(struct ldlm_namespace *ns, __u64 flags,
const struct ldlm_res_id *,
- enum ldlm_type type, ldlm_policy_data_t *,
+ enum ldlm_type type, union ldlm_policy_data *,
enum ldlm_mode mode, struct lustre_handle *,
int unref);
enum ldlm_mode ldlm_revalidate_lock_handle(const struct lustre_handle *lockh,
@@ -1189,7 +1189,7 @@ void ldlm_unlink_lock_skiplist(struct ldlm_lock *req);
/* resource.c */
struct ldlm_namespace *
ldlm_namespace_new(struct obd_device *obd, char *name,
- ldlm_side_t client, enum ldlm_appetite apt,
+ enum ldlm_side client, enum ldlm_appetite apt,
enum ldlm_ns_type ns_type);
int ldlm_namespace_cleanup(struct ldlm_namespace *ns, __u64 flags);
void ldlm_namespace_get(struct ldlm_namespace *ns);
@@ -1208,7 +1208,7 @@ void ldlm_resource_add_lock(struct ldlm_resource *res,
struct ldlm_lock *lock);
void ldlm_resource_unlink_lock(struct ldlm_lock *lock);
void ldlm_res2desc(struct ldlm_resource *res, struct ldlm_resource_desc *desc);
-void ldlm_dump_all_namespaces(ldlm_side_t client, int level);
+void ldlm_dump_all_namespaces(enum ldlm_side client, int level);
void ldlm_namespace_dump(int level, struct ldlm_namespace *);
void ldlm_resource_dump(int level, struct ldlm_resource *);
int ldlm_lock_change_resource(struct ldlm_namespace *, struct ldlm_lock *,
@@ -1241,7 +1241,7 @@ int ldlm_completion_ast(struct ldlm_lock *lock, __u64 flags, void *data);
int ldlm_cli_enqueue(struct obd_export *exp, struct ptlrpc_request **reqp,
struct ldlm_enqueue_info *einfo,
const struct ldlm_res_id *res_id,
- ldlm_policy_data_t const *policy, __u64 *flags,
+ union ldlm_policy_data const *policy, __u64 *flags,
void *lvb, __u32 lvb_len, enum lvb_type lvb_type,
struct lustre_handle *lockh, int async);
int ldlm_prep_enqueue_req(struct obd_export *exp,
@@ -1265,13 +1265,13 @@ int ldlm_cli_cancel_unused(struct ldlm_namespace *, const struct ldlm_res_id *,
enum ldlm_cancel_flags flags, void *opaque);
int ldlm_cli_cancel_unused_resource(struct ldlm_namespace *ns,
const struct ldlm_res_id *res_id,
- ldlm_policy_data_t *policy,
+ union ldlm_policy_data *policy,
enum ldlm_mode mode,
enum ldlm_cancel_flags flags,
void *opaque);
int ldlm_cancel_resource_local(struct ldlm_resource *res,
struct list_head *cancels,
- ldlm_policy_data_t *policy,
+ union ldlm_policy_data *policy,
enum ldlm_mode mode, __u64 lock_flags,
enum ldlm_cancel_flags cancel_flags,
void *opaque);
@@ -1333,7 +1333,7 @@ int ldlm_pools_init(void);
void ldlm_pools_fini(void);
int ldlm_pool_init(struct ldlm_pool *pl, struct ldlm_namespace *ns,
- int idx, ldlm_side_t client);
+ int idx, enum ldlm_side client);
void ldlm_pool_fini(struct ldlm_pool *pl);
void ldlm_pool_add(struct ldlm_pool *pl, struct ldlm_lock *lock);
void ldlm_pool_del(struct ldlm_pool *pl, struct ldlm_lock *lock);
diff --git a/drivers/staging/lustre/lustre/include/lustre_fid.h b/drivers/staging/lustre/lustre/include/lustre_fid.h
index 316780693193..b5a1aadbcb93 100644
--- a/drivers/staging/lustre/lustre/include/lustre_fid.h
+++ b/drivers/staging/lustre/lustre/include/lustre_fid.h
@@ -150,6 +150,7 @@
#include "../../include/linux/libcfs/libcfs.h"
#include "lustre/lustre_idl.h"
+#include "seq_range.h"
struct lu_env;
struct lu_site;
diff --git a/drivers/staging/lustre/lustre/include/lustre_fld.h b/drivers/staging/lustre/lustre/include/lustre_fld.h
index 932410d3e3cc..6ef1b03cb986 100644
--- a/drivers/staging/lustre/lustre/include/lustre_fld.h
+++ b/drivers/staging/lustre/lustre/include/lustre_fld.h
@@ -103,8 +103,6 @@ struct lu_client_fld {
/** Client fld debugfs entry name. */
char lcf_name[LUSTRE_MDT_MAXNAMELEN];
-
- int lcf_flags;
};
/* Client methods */
diff --git a/drivers/staging/lustre/lustre/include/lustre_ha.h b/drivers/staging/lustre/lustre/include/lustre_ha.h
index cde7ed702c86..dec1e99d594d 100644
--- a/drivers/staging/lustre/lustre/include/lustre_ha.h
+++ b/drivers/staging/lustre/lustre/include/lustre_ha.h
@@ -53,6 +53,7 @@ void ptlrpc_activate_import(struct obd_import *imp);
void ptlrpc_deactivate_import(struct obd_import *imp);
void ptlrpc_invalidate_import(struct obd_import *imp);
void ptlrpc_fail_import(struct obd_import *imp, __u32 conn_cnt);
+void ptlrpc_pinger_force(struct obd_import *imp);
/** @} ha */
diff --git a/drivers/staging/lustre/lustre/include/lustre_import.h b/drivers/staging/lustre/lustre/include/lustre_import.h
index 5461ba33d90c..f0c931ce1a67 100644
--- a/drivers/staging/lustre/lustre/include/lustre_import.h
+++ b/drivers/staging/lustre/lustre/include/lustre_import.h
@@ -185,6 +185,11 @@ struct obd_import {
struct list_head *imp_replay_cursor;
/** @} */
+ /** List of not replied requests */
+ struct list_head imp_unreplied_list;
+ /** Known maximal replied XID */
+ __u64 imp_known_replied_xid;
+
/** obd device for this import */
struct obd_device *imp_obd;
@@ -294,7 +299,9 @@ struct obd_import {
*/
imp_force_reconnect:1,
/* import has tried to connect with server */
- imp_connect_tried:1;
+ imp_connect_tried:1,
+ /* connected but not FULL yet */
+ imp_connected:1;
__u32 imp_connect_op;
struct obd_connect_data imp_connect_data;
__u64 imp_connect_flags_orig;
diff --git a/drivers/staging/lustre/lustre/include/lustre_lib.h b/drivers/staging/lustre/lustre/include/lustre_lib.h
index 6b231913ba2e..27f3148c4344 100644
--- a/drivers/staging/lustre/lustre/include/lustre_lib.h
+++ b/drivers/staging/lustre/lustre/include/lustre_lib.h
@@ -350,8 +350,6 @@ do { \
l_wait_event_exclusive_head(wq, condition, &lwi); \
})
-#define LIBLUSTRE_CLIENT (0)
-
/** @} lib */
#endif /* _LUSTRE_LIB_H */
diff --git a/drivers/staging/lustre/lustre/include/lustre_lmv.h b/drivers/staging/lustre/lustre/include/lustre_lmv.h
index d7f7afa8dfa7..5aa3645e64dc 100644
--- a/drivers/staging/lustre/lustre/include/lustre_lmv.h
+++ b/drivers/staging/lustre/lustre/include/lustre_lmv.h
@@ -76,18 +76,7 @@ lsm_md_eq(const struct lmv_stripe_md *lsm1, const struct lmv_stripe_md *lsm2)
union lmv_mds_md;
-int lmv_unpack_md(struct obd_export *exp, struct lmv_stripe_md **lsmp,
- const union lmv_mds_md *lmm, int stripe_count);
-
-static inline int lmv_alloc_memmd(struct lmv_stripe_md **lsmp, int stripe_count)
-{
- return lmv_unpack_md(NULL, lsmp, NULL, stripe_count);
-}
-
-static inline void lmv_free_memmd(struct lmv_stripe_md *lsm)
-{
- lmv_unpack_md(NULL, &lsm, NULL, 0);
-}
+void lmv_free_memmd(struct lmv_stripe_md *lsm);
static inline void lmv1_le_to_cpu(struct lmv_mds_md_v1 *lmv_dst,
const struct lmv_mds_md_v1 *lmv_src)
diff --git a/drivers/staging/lustre/lustre/include/lustre_log.h b/drivers/staging/lustre/lustre/include/lustre_log.h
index 995b266932e3..35e37eb1bc2c 100644
--- a/drivers/staging/lustre/lustre/include/lustre_log.h
+++ b/drivers/staging/lustre/lustre/include/lustre_log.h
@@ -214,6 +214,7 @@ struct llog_handle {
spinlock_t lgh_hdr_lock; /* protect lgh_hdr data */
struct llog_logid lgh_id; /* id of this log */
struct llog_log_hdr *lgh_hdr;
+ size_t lgh_hdr_size;
int lgh_last_idx;
int lgh_cur_idx; /* used during llog_process */
__u64 lgh_cur_offset; /* used during llog_process */
@@ -244,6 +245,11 @@ struct llog_ctxt {
struct mutex loc_mutex; /* protect loc_imp */
atomic_t loc_refcount;
long loc_flags; /* flags, see above defines */
+ /*
+ * llog chunk size, and llog record size can not be bigger than
+ * loc_chunk_size
+ */
+ __u32 loc_chunk_size;
};
#define LLOG_PROC_BREAK 0x0001
diff --git a/drivers/staging/lustre/lustre/include/lustre_mdc.h b/drivers/staging/lustre/lustre/include/lustre_mdc.h
index 8fc2d3f2dfd6..198ceb0c66f9 100644
--- a/drivers/staging/lustre/lustre/include/lustre_mdc.h
+++ b/drivers/staging/lustre/lustre/include/lustre_mdc.h
@@ -156,16 +156,39 @@ static inline void mdc_put_rpc_lock(struct mdc_rpc_lock *lck,
mutex_unlock(&lck->rpcl_mutex);
}
+static inline void mdc_get_mod_rpc_slot(struct ptlrpc_request *req,
+ struct lookup_intent *it)
+{
+ struct client_obd *cli = &req->rq_import->imp_obd->u.cli;
+ u32 opc;
+ u16 tag;
+
+ opc = lustre_msg_get_opc(req->rq_reqmsg);
+ tag = obd_get_mod_rpc_slot(cli, opc, it);
+ lustre_msg_set_tag(req->rq_reqmsg, tag);
+}
+
+static inline void mdc_put_mod_rpc_slot(struct ptlrpc_request *req,
+ struct lookup_intent *it)
+{
+ struct client_obd *cli = &req->rq_import->imp_obd->u.cli;
+ u32 opc;
+ u16 tag;
+
+ opc = lustre_msg_get_opc(req->rq_reqmsg);
+ tag = lustre_msg_get_tag(req->rq_reqmsg);
+ obd_put_mod_rpc_slot(cli, opc, it, tag);
+}
+
/**
- * Update the maximum possible easize and cookiesize.
+ * Update the maximum possible easize.
*
- * The values are learned from ptlrpc replies sent by the MDT. The
- * default easize and cookiesize is initialized to the minimum value but
- * allowed to grow up to a single page in size if required to handle the
+ * This value is learned from ptlrpc replies sent by the MDT. The
+ * default easize is initialized to the minimum value but allowed
+ * to grow up to a single page in size if required to handle the
* common case.
*
- * \see client_obd::cl_default_mds_easize and
- * client_obd::cl_default_mds_cookiesize
+ * \see client_obd::cl_default_mds_easize
*
* \param[in] exp export for MDC device
* \param[in] body body of ptlrpc reply from MDT
@@ -176,7 +199,7 @@ static inline void mdc_update_max_ea_from_body(struct obd_export *exp,
{
if (body->mbo_valid & OBD_MD_FLMODEASIZE) {
struct client_obd *cli = &exp->exp_obd->u.cli;
- u32 def_cookiesize, def_easize;
+ u32 def_easize;
if (cli->cl_max_mds_easize < body->mbo_max_mdsize)
cli->cl_max_mds_easize = body->mbo_max_mdsize;
@@ -184,13 +207,6 @@ static inline void mdc_update_max_ea_from_body(struct obd_export *exp,
def_easize = min_t(__u32, body->mbo_max_mdsize,
OBD_MAX_DEFAULT_EA_SIZE);
cli->cl_default_mds_easize = def_easize;
-
- if (cli->cl_max_mds_cookiesize < body->mbo_max_cookiesize)
- cli->cl_max_mds_cookiesize = body->mbo_max_cookiesize;
-
- def_cookiesize = min_t(__u32, body->mbo_max_cookiesize,
- OBD_MAX_DEFAULT_COOKIE_SIZE);
- cli->cl_default_mds_cookiesize = def_cookiesize;
}
}
diff --git a/drivers/staging/lustre/lustre/include/lustre_net.h b/drivers/staging/lustre/lustre/include/lustre_net.h
index e9aba99ee52a..411eb0dc7f38 100644
--- a/drivers/staging/lustre/lustre/include/lustre_net.h
+++ b/drivers/staging/lustre/lustre/include/lustre_net.h
@@ -50,6 +50,7 @@
* @{
*/
+#include <linux/uio.h>
#include "../../include/linux/libcfs/libcfs.h"
#include "../../include/linux/lnet/nidstr.h"
#include "../../include/linux/lnet/api.h"
@@ -68,13 +69,17 @@
#define PTLRPC_MD_OPTIONS 0
/**
- * Max # of bulk operations in one request.
+ * log2 max # of bulk operations in one request: 2=4MB/RPC, 5=32MB/RPC, ...
* In order for the client and server to properly negotiate the maximum
* possible transfer size, PTLRPC_BULK_OPS_COUNT must be a power-of-two
* value. The client is free to limit the actual RPC size for any bulk
* transfer via cl_max_pages_per_rpc to some non-power-of-two value.
+ * NOTE: This is limited to 16 (=64GB RPCs) by IOOBJ_MAX_BRW_BITS.
*/
-#define PTLRPC_BULK_OPS_BITS 2
+#define PTLRPC_BULK_OPS_BITS 4
+#if PTLRPC_BULK_OPS_BITS > 16
+#error "More than 65536 BRW RPCs not allowed by IOOBJ_MAX_BRW_BITS."
+#endif
#define PTLRPC_BULK_OPS_COUNT (1U << PTLRPC_BULK_OPS_BITS)
/**
* PTLRPC_BULK_OPS_MASK is for the convenience of the client only, and
@@ -437,6 +442,10 @@ struct ptlrpc_reply_state {
unsigned long rs_committed:1;/* the transaction was committed
* and the rs was dispatched
*/
+ atomic_t rs_refcount; /* number of users */
+ /** Number of locks awaiting client ACK */
+ int rs_nlocks;
+
/** Size of the state */
int rs_size;
/** opcode */
@@ -449,7 +458,6 @@ struct ptlrpc_reply_state {
struct ptlrpc_service_part *rs_svcpt;
/** Lnet metadata handle for the reply */
lnet_handle_md_t rs_md_h;
- atomic_t rs_refcount;
/** Context for the service thread */
struct ptlrpc_svc_ctx *rs_svc_ctx;
@@ -466,8 +474,6 @@ struct ptlrpc_reply_state {
*/
struct lustre_msg *rs_msg; /* reply message */
- /** Number of locks awaiting client ACK */
- int rs_nlocks;
/** Handles of locks awaiting client reply ACK */
struct lustre_handle rs_locks[RS_MAX_LOCKS];
/** Lock modes of locks in \a rs_locks */
@@ -515,717 +521,7 @@ struct lu_env;
struct ldlm_lock;
-/**
- * \defgroup nrs Network Request Scheduler
- * @{
- */
-struct ptlrpc_nrs_policy;
-struct ptlrpc_nrs_resource;
-struct ptlrpc_nrs_request;
-
-/**
- * NRS control operations.
- *
- * These are common for all policies.
- */
-enum ptlrpc_nrs_ctl {
- /**
- * Not a valid opcode.
- */
- PTLRPC_NRS_CTL_INVALID,
- /**
- * Activate the policy.
- */
- PTLRPC_NRS_CTL_START,
- /**
- * Reserved for multiple primary policies, which may be a possibility
- * in the future.
- */
- PTLRPC_NRS_CTL_STOP,
- /**
- * Policies can start using opcodes from this value and onwards for
- * their own purposes; the assigned value itself is arbitrary.
- */
- PTLRPC_NRS_CTL_1ST_POL_SPEC = 0x20,
-};
-
-/**
- * ORR policy operations
- */
-enum nrs_ctl_orr {
- NRS_CTL_ORR_RD_QUANTUM = PTLRPC_NRS_CTL_1ST_POL_SPEC,
- NRS_CTL_ORR_WR_QUANTUM,
- NRS_CTL_ORR_RD_OFF_TYPE,
- NRS_CTL_ORR_WR_OFF_TYPE,
- NRS_CTL_ORR_RD_SUPP_REQ,
- NRS_CTL_ORR_WR_SUPP_REQ,
-};
-
-/**
- * NRS policy operations.
- *
- * These determine the behaviour of a policy, and are called in response to
- * NRS core events.
- */
-struct ptlrpc_nrs_pol_ops {
- /**
- * Called during policy registration; this operation is optional.
- *
- * \param[in,out] policy The policy being initialized
- */
- int (*op_policy_init)(struct ptlrpc_nrs_policy *policy);
- /**
- * Called during policy unregistration; this operation is optional.
- *
- * \param[in,out] policy The policy being unregistered/finalized
- */
- void (*op_policy_fini)(struct ptlrpc_nrs_policy *policy);
- /**
- * Called when activating a policy via lprocfs; policies allocate and
- * initialize their resources here; this operation is optional.
- *
- * \param[in,out] policy The policy being started
- *
- * \see nrs_policy_start_locked()
- */
- int (*op_policy_start)(struct ptlrpc_nrs_policy *policy);
- /**
- * Called when deactivating a policy via lprocfs; policies deallocate
- * their resources here; this operation is optional
- *
- * \param[in,out] policy The policy being stopped
- *
- * \see nrs_policy_stop0()
- */
- void (*op_policy_stop)(struct ptlrpc_nrs_policy *policy);
- /**
- * Used for policy-specific operations; i.e. not generic ones like
- * \e PTLRPC_NRS_CTL_START and \e PTLRPC_NRS_CTL_GET_INFO; analogous
- * to an ioctl; this operation is optional.
- *
- * \param[in,out] policy The policy carrying out operation \a opc
- * \param[in] opc The command operation being carried out
- * \param[in,out] arg An generic buffer for communication between the
- * user and the control operation
- *
- * \retval -ve error
- * \retval 0 success
- *
- * \see ptlrpc_nrs_policy_control()
- */
- int (*op_policy_ctl)(struct ptlrpc_nrs_policy *policy,
- enum ptlrpc_nrs_ctl opc, void *arg);
-
- /**
- * Called when obtaining references to the resources of the resource
- * hierarchy for a request that has arrived for handling at the PTLRPC
- * service. Policies should return -ve for requests they do not wish
- * to handle. This operation is mandatory.
- *
- * \param[in,out] policy The policy we're getting resources for.
- * \param[in,out] nrq The request we are getting resources for.
- * \param[in] parent The parent resource of the resource being
- * requested; set to NULL if none.
- * \param[out] resp The resource is to be returned here; the
- * fallback policy in an NRS head should
- * \e always return a non-NULL pointer value.
- * \param[in] moving_req When set, signifies that this is an attempt
- * to obtain resources for a request being moved
- * to the high-priority NRS head by
- * ldlm_lock_reorder_req().
- * This implies two things:
- * 1. We are under obd_export::exp_rpc_lock and
- * so should not sleep.
- * 2. We should not perform non-idempotent or can
- * skip performing idempotent operations that
- * were carried out when resources were first
- * taken for the request when it was initialized
- * in ptlrpc_nrs_req_initialize().
- *
- * \retval 0, +ve The level of the returned resource in the resource
- * hierarchy; currently only 0 (for a non-leaf resource)
- * and 1 (for a leaf resource) are supported by the
- * framework.
- * \retval -ve error
- *
- * \see ptlrpc_nrs_req_initialize()
- * \see ptlrpc_nrs_hpreq_add_nolock()
- */
- int (*op_res_get)(struct ptlrpc_nrs_policy *policy,
- struct ptlrpc_nrs_request *nrq,
- const struct ptlrpc_nrs_resource *parent,
- struct ptlrpc_nrs_resource **resp,
- bool moving_req);
- /**
- * Called when releasing references taken for resources in the resource
- * hierarchy for the request; this operation is optional.
- *
- * \param[in,out] policy The policy the resource belongs to
- * \param[in] res The resource to be freed
- *
- * \see ptlrpc_nrs_req_finalize()
- * \see ptlrpc_nrs_hpreq_add_nolock()
- */
- void (*op_res_put)(struct ptlrpc_nrs_policy *policy,
- const struct ptlrpc_nrs_resource *res);
-
- /**
- * Obtains a request for handling from the policy, and optionally
- * removes the request from the policy; this operation is mandatory.
- *
- * \param[in,out] policy The policy to poll
- * \param[in] peek When set, signifies that we just want to
- * examine the request, and not handle it, so the
- * request is not removed from the policy.
- * \param[in] force When set, it will force a policy to return a
- * request if it has one queued.
- *
- * \retval NULL No request available for handling
- * \retval valid-pointer The request polled for handling
- *
- * \see ptlrpc_nrs_req_get_nolock()
- */
- struct ptlrpc_nrs_request *
- (*op_req_get)(struct ptlrpc_nrs_policy *policy, bool peek,
- bool force);
- /**
- * Called when attempting to add a request to a policy for later
- * handling; this operation is mandatory.
- *
- * \param[in,out] policy The policy on which to enqueue \a nrq
- * \param[in,out] nrq The request to enqueue
- *
- * \retval 0 success
- * \retval != 0 error
- *
- * \see ptlrpc_nrs_req_add_nolock()
- */
- int (*op_req_enqueue)(struct ptlrpc_nrs_policy *policy,
- struct ptlrpc_nrs_request *nrq);
- /**
- * Removes a request from the policy's set of pending requests. Normally
- * called after a request has been polled successfully from the policy
- * for handling; this operation is mandatory.
- *
- * \param[in,out] policy The policy the request \a nrq belongs to
- * \param[in,out] nrq The request to dequeue
- */
- void (*op_req_dequeue)(struct ptlrpc_nrs_policy *policy,
- struct ptlrpc_nrs_request *nrq);
- /**
- * Called after the request being carried out. Could be used for
- * job/resource control; this operation is optional.
- *
- * \param[in,out] policy The policy which is stopping to handle request
- * \a nrq
- * \param[in,out] nrq The request
- *
- * \pre assert_spin_locked(&svcpt->scp_req_lock)
- *
- * \see ptlrpc_nrs_req_stop_nolock()
- */
- void (*op_req_stop)(struct ptlrpc_nrs_policy *policy,
- struct ptlrpc_nrs_request *nrq);
- /**
- * Registers the policy's lprocfs interface with a PTLRPC service.
- *
- * \param[in] svc The service
- *
- * \retval 0 success
- * \retval != 0 error
- */
- int (*op_lprocfs_init)(struct ptlrpc_service *svc);
- /**
- * Unegisters the policy's lprocfs interface with a PTLRPC service.
- *
- * In cases of failed policy registration in
- * \e ptlrpc_nrs_policy_register(), this function may be called for a
- * service which has not registered the policy successfully, so
- * implementations of this method should make sure their operations are
- * safe in such cases.
- *
- * \param[in] svc The service
- */
- void (*op_lprocfs_fini)(struct ptlrpc_service *svc);
-};
-
-/**
- * Policy flags
- */
-enum nrs_policy_flags {
- /**
- * Fallback policy, use this flag only on a single supported policy per
- * service. The flag cannot be used on policies that use
- * \e PTLRPC_NRS_FL_REG_EXTERN
- */
- PTLRPC_NRS_FL_FALLBACK = (1 << 0),
- /**
- * Start policy immediately after registering.
- */
- PTLRPC_NRS_FL_REG_START = (1 << 1),
- /**
- * This is a policy registering from a module different to the one NRS
- * core ships in (currently ptlrpc).
- */
- PTLRPC_NRS_FL_REG_EXTERN = (1 << 2),
-};
-
-/**
- * NRS queue type.
- *
- * Denotes whether an NRS instance is for handling normal or high-priority
- * RPCs, or whether an operation pertains to one or both of the NRS instances
- * in a service.
- */
-enum ptlrpc_nrs_queue_type {
- PTLRPC_NRS_QUEUE_REG = (1 << 0),
- PTLRPC_NRS_QUEUE_HP = (1 << 1),
- PTLRPC_NRS_QUEUE_BOTH = (PTLRPC_NRS_QUEUE_REG | PTLRPC_NRS_QUEUE_HP)
-};
-
-/**
- * NRS head
- *
- * A PTLRPC service has at least one NRS head instance for handling normal
- * priority RPCs, and may optionally have a second NRS head instance for
- * handling high-priority RPCs. Each NRS head maintains a list of available
- * policies, of which one and only one policy is acting as the fallback policy,
- * and optionally a different policy may be acting as the primary policy. For
- * all RPCs handled by this NRS head instance, NRS core will first attempt to
- * enqueue the RPC using the primary policy (if any). The fallback policy is
- * used in the following cases:
- * - when there was no primary policy in the
- * ptlrpc_nrs_pol_state::NRS_POL_STATE_STARTED state at the time the request
- * was initialized.
- * - when the primary policy that was at the
- * ptlrpc_nrs_pol_state::PTLRPC_NRS_POL_STATE_STARTED state at the time the
- * RPC was initialized, denoted it did not wish, or for some other reason was
- * not able to handle the request, by returning a non-valid NRS resource
- * reference.
- * - when the primary policy that was at the
- * ptlrpc_nrs_pol_state::PTLRPC_NRS_POL_STATE_STARTED state at the time the
- * RPC was initialized, fails later during the request enqueueing stage.
- *
- * \see nrs_resource_get_safe()
- * \see nrs_request_enqueue()
- */
-struct ptlrpc_nrs {
- spinlock_t nrs_lock;
- /** XXX Possibly replace svcpt->scp_req_lock with another lock here. */
- /**
- * List of registered policies
- */
- struct list_head nrs_policy_list;
- /**
- * List of policies with queued requests. Policies that have any
- * outstanding requests are queued here, and this list is queried
- * in a round-robin manner from NRS core when obtaining a request
- * for handling. This ensures that requests from policies that at some
- * point transition away from the
- * ptlrpc_nrs_pol_state::NRS_POL_STATE_STARTED state are drained.
- */
- struct list_head nrs_policy_queued;
- /**
- * Service partition for this NRS head
- */
- struct ptlrpc_service_part *nrs_svcpt;
- /**
- * Primary policy, which is the preferred policy for handling RPCs
- */
- struct ptlrpc_nrs_policy *nrs_policy_primary;
- /**
- * Fallback policy, which is the backup policy for handling RPCs
- */
- struct ptlrpc_nrs_policy *nrs_policy_fallback;
- /**
- * This NRS head handles either HP or regular requests
- */
- enum ptlrpc_nrs_queue_type nrs_queue_type;
- /**
- * # queued requests from all policies in this NRS head
- */
- unsigned long nrs_req_queued;
- /**
- * # scheduled requests from all policies in this NRS head
- */
- unsigned long nrs_req_started;
- /**
- * # policies on this NRS
- */
- unsigned nrs_num_pols;
- /**
- * This NRS head is in progress of starting a policy
- */
- unsigned nrs_policy_starting:1;
- /**
- * In progress of shutting down the whole NRS head; used during
- * unregistration
- */
- unsigned nrs_stopping:1;
-};
-
-#define NRS_POL_NAME_MAX 16
-
-struct ptlrpc_nrs_pol_desc;
-
-/**
- * Service compatibility predicate; this determines whether a policy is adequate
- * for handling RPCs of a particular PTLRPC service.
- *
- * XXX:This should give the same result during policy registration and
- * unregistration, and for all partitions of a service; so the result should not
- * depend on temporal service or other properties, that may influence the
- * result.
- */
-typedef bool (*nrs_pol_desc_compat_t) (const struct ptlrpc_service *svc,
- const struct ptlrpc_nrs_pol_desc *desc);
-
-struct ptlrpc_nrs_pol_conf {
- /**
- * Human-readable policy name
- */
- char nc_name[NRS_POL_NAME_MAX];
- /**
- * NRS operations for this policy
- */
- const struct ptlrpc_nrs_pol_ops *nc_ops;
- /**
- * Service compatibility predicate
- */
- nrs_pol_desc_compat_t nc_compat;
- /**
- * Set for policies that support a single ptlrpc service, i.e. ones that
- * have \a pd_compat set to nrs_policy_compat_one(). The variable value
- * depicts the name of the single service that such policies are
- * compatible with.
- */
- const char *nc_compat_svc_name;
- /**
- * Owner module for this policy descriptor; policies registering from a
- * different module to the one the NRS framework is held within
- * (currently ptlrpc), should set this field to THIS_MODULE.
- */
- struct module *nc_owner;
- /**
- * Policy registration flags; a bitmask of \e nrs_policy_flags
- */
- unsigned nc_flags;
-};
-
-/**
- * NRS policy registering descriptor
- *
- * Is used to hold a description of a policy that can be passed to NRS core in
- * order to register the policy with NRS heads in different PTLRPC services.
- */
-struct ptlrpc_nrs_pol_desc {
- /**
- * Human-readable policy name
- */
- char pd_name[NRS_POL_NAME_MAX];
- /**
- * Link into nrs_core::nrs_policies
- */
- struct list_head pd_list;
- /**
- * NRS operations for this policy
- */
- const struct ptlrpc_nrs_pol_ops *pd_ops;
- /**
- * Service compatibility predicate
- */
- nrs_pol_desc_compat_t pd_compat;
- /**
- * Set for policies that are compatible with only one PTLRPC service.
- *
- * \see ptlrpc_nrs_pol_conf::nc_compat_svc_name
- */
- const char *pd_compat_svc_name;
- /**
- * Owner module for this policy descriptor.
- *
- * We need to hold a reference to the module whenever we might make use
- * of any of the module's contents, i.e.
- * - If one or more instances of the policy are at a state where they
- * might be handling a request, i.e.
- * ptlrpc_nrs_pol_state::NRS_POL_STATE_STARTED or
- * ptlrpc_nrs_pol_state::NRS_POL_STATE_STOPPING as we will have to
- * call into the policy's ptlrpc_nrs_pol_ops() handlers. A reference
- * is taken on the module when
- * \e ptlrpc_nrs_pol_desc::pd_refs becomes 1, and released when it
- * becomes 0, so that we hold only one reference to the module maximum
- * at any time.
- *
- * We do not need to hold a reference to the module, even though we
- * might use code and data from the module, in the following cases:
- * - During external policy registration, because this should happen in
- * the module's init() function, in which case the module is safe from
- * removal because a reference is being held on the module by the
- * kernel, and iirc kmod (and I guess module-init-tools also) will
- * serialize any racing processes properly anyway.
- * - During external policy unregistration, because this should happen
- * in a module's exit() function, and any attempts to start a policy
- * instance would need to take a reference on the module, and this is
- * not possible once we have reached the point where the exit()
- * handler is called.
- * - During service registration and unregistration, as service setup
- * and cleanup, and policy registration, unregistration and policy
- * instance starting, are serialized by \e nrs_core::nrs_mutex, so
- * as long as users adhere to the convention of registering policies
- * in init() and unregistering them in module exit() functions, there
- * should not be a race between these operations.
- * - During any policy-specific lprocfs operations, because a reference
- * is held by the kernel on a proc entry that has been entered by a
- * syscall, so as long as proc entries are removed during unregistration time,
- * then unregistration and lprocfs operations will be properly
- * serialized.
- */
- struct module *pd_owner;
- /**
- * Bitmask of \e nrs_policy_flags
- */
- unsigned pd_flags;
- /**
- * # of references on this descriptor
- */
- atomic_t pd_refs;
-};
-
-/**
- * NRS policy state
- *
- * Policies transition from one state to the other during their lifetime
- */
-enum ptlrpc_nrs_pol_state {
- /**
- * Not a valid policy state.
- */
- NRS_POL_STATE_INVALID,
- /**
- * Policies are at this state either at the start of their life, or
- * transition here when the user selects a different policy to act
- * as the primary one.
- */
- NRS_POL_STATE_STOPPED,
- /**
- * Policy is progress of stopping
- */
- NRS_POL_STATE_STOPPING,
- /**
- * Policy is in progress of starting
- */
- NRS_POL_STATE_STARTING,
- /**
- * A policy is in this state in two cases:
- * - it is the fallback policy, which is always in this state.
- * - it has been activated by the user; i.e. it is the primary policy,
- */
- NRS_POL_STATE_STARTED,
-};
-
-/**
- * NRS policy information
- *
- * Used for obtaining information for the status of a policy via lprocfs
- */
-struct ptlrpc_nrs_pol_info {
- /**
- * Policy name
- */
- char pi_name[NRS_POL_NAME_MAX];
- /**
- * Current policy state
- */
- enum ptlrpc_nrs_pol_state pi_state;
- /**
- * # RPCs enqueued for later dispatching by the policy
- */
- long pi_req_queued;
- /**
- * # RPCs started for dispatch by the policy
- */
- long pi_req_started;
- /**
- * Is this a fallback policy?
- */
- unsigned pi_fallback:1;
-};
-
-/**
- * NRS policy
- *
- * There is one instance of this for each policy in each NRS head of each
- * PTLRPC service partition.
- */
-struct ptlrpc_nrs_policy {
- /**
- * Linkage into the NRS head's list of policies,
- * ptlrpc_nrs:nrs_policy_list
- */
- struct list_head pol_list;
- /**
- * Linkage into the NRS head's list of policies with enqueued
- * requests ptlrpc_nrs:nrs_policy_queued
- */
- struct list_head pol_list_queued;
- /**
- * Current state of this policy
- */
- enum ptlrpc_nrs_pol_state pol_state;
- /**
- * Bitmask of nrs_policy_flags
- */
- unsigned pol_flags;
- /**
- * # RPCs enqueued for later dispatching by the policy
- */
- long pol_req_queued;
- /**
- * # RPCs started for dispatch by the policy
- */
- long pol_req_started;
- /**
- * Usage Reference count taken on the policy instance
- */
- long pol_ref;
- /**
- * The NRS head this policy has been created at
- */
- struct ptlrpc_nrs *pol_nrs;
- /**
- * Private policy data; varies by policy type
- */
- void *pol_private;
- /**
- * Policy descriptor for this policy instance.
- */
- struct ptlrpc_nrs_pol_desc *pol_desc;
-};
-
-/**
- * NRS resource
- *
- * Resources are embedded into two types of NRS entities:
- * - Inside NRS policies, in the policy's private data in
- * ptlrpc_nrs_policy::pol_private
- * - In objects that act as prime-level scheduling entities in different NRS
- * policies; e.g. on a policy that performs round robin or similar order
- * scheduling across client NIDs, there would be one NRS resource per unique
- * client NID. On a policy which performs round robin scheduling across
- * backend filesystem objects, there would be one resource associated with
- * each of the backend filesystem objects partaking in the scheduling
- * performed by the policy.
- *
- * NRS resources share a parent-child relationship, in which resources embedded
- * in policy instances are the parent entities, with all scheduling entities
- * a policy schedules across being the children, thus forming a simple resource
- * hierarchy. This hierarchy may be extended with one or more levels in the
- * future if the ability to have more than one primary policy is added.
- *
- * Upon request initialization, references to the then active NRS policies are
- * taken and used to later handle the dispatching of the request with one of
- * these policies.
- *
- * \see nrs_resource_get_safe()
- * \see ptlrpc_nrs_req_add()
- */
-struct ptlrpc_nrs_resource {
- /**
- * This NRS resource's parent; is NULL for resources embedded in NRS
- * policy instances; i.e. those are top-level ones.
- */
- struct ptlrpc_nrs_resource *res_parent;
- /**
- * The policy associated with this resource.
- */
- struct ptlrpc_nrs_policy *res_policy;
-};
-
-enum {
- NRS_RES_FALLBACK,
- NRS_RES_PRIMARY,
- NRS_RES_MAX
-};
-
-/* \name fifo
- *
- * FIFO policy
- *
- * This policy is a logical wrapper around previous, non-NRS functionality.
- * It dispatches RPCs in the same order as they arrive from the network. This
- * policy is currently used as the fallback policy, and the only enabled policy
- * on all NRS heads of all PTLRPC service partitions.
- * @{
- */
-
-/**
- * Private data structure for the FIFO policy
- */
-struct nrs_fifo_head {
- /**
- * Resource object for policy instance.
- */
- struct ptlrpc_nrs_resource fh_res;
- /**
- * List of queued requests.
- */
- struct list_head fh_list;
- /**
- * For debugging purposes.
- */
- __u64 fh_sequence;
-};
-
-struct nrs_fifo_req {
- struct list_head fr_list;
- __u64 fr_sequence;
-};
-
-/** @} fifo */
-
-/**
- * NRS request
- *
- * Instances of this object exist embedded within ptlrpc_request; the main
- * purpose of this object is to hold references to the request's resources
- * for the lifetime of the request, and to hold properties that policies use
- * use for determining the request's scheduling priority.
- */
-struct ptlrpc_nrs_request {
- /**
- * The request's resource hierarchy.
- */
- struct ptlrpc_nrs_resource *nr_res_ptrs[NRS_RES_MAX];
- /**
- * Index into ptlrpc_nrs_request::nr_res_ptrs of the resource of the
- * policy that was used to enqueue the request.
- *
- * \see nrs_request_enqueue()
- */
- unsigned nr_res_idx;
- unsigned nr_initialized:1;
- unsigned nr_enqueued:1;
- unsigned nr_started:1;
- unsigned nr_finalized:1;
-
- /**
- * Policy-specific fields, used for determining a request's scheduling
- * priority, and other supporting functionality.
- */
- union {
- /**
- * Fields for the FIFO policy
- */
- struct nrs_fifo_req fifo;
- } nr_u;
- /**
- * Externally-registering policies may want to use this to allocate
- * their own request properties.
- */
- void *ext;
-};
-
-/** @} nrs */
+#include "lustre_nrs.h"
/**
* Basic request prioritization operations structure.
@@ -1304,6 +600,8 @@ struct ptlrpc_cli_req {
union ptlrpc_async_args cr_async_args;
/** Opaq data for replay and commit callbacks. */
void *cr_cb_data;
+ /** Link to the imp->imp_unreplied_list */
+ struct list_head cr_unreplied_list;
/**
* Commit callback, called when request is committed and about to be
* freed.
@@ -1343,6 +641,7 @@ struct ptlrpc_cli_req {
#define rq_interpret_reply rq_cli.cr_reply_interp
#define rq_async_args rq_cli.cr_async_args
#define rq_cb_data rq_cli.cr_cb_data
+#define rq_unreplied_list rq_cli.cr_unreplied_list
#define rq_commit_cb rq_cli.cr_commit_cb
#define rq_replay_cb rq_cli.cr_replay_cb
@@ -1505,6 +804,8 @@ struct ptlrpc_request {
__u64 rq_transno;
/** xid */
__u64 rq_xid;
+ /** bulk match bits */
+ u64 rq_mbits;
/**
* List item to for replay list. Not yet committed requests get linked
* there.
@@ -1793,10 +1094,93 @@ struct ptlrpc_bulk_page {
struct page *bp_page;
};
-#define BULK_GET_SOURCE 0
-#define BULK_PUT_SINK 1
-#define BULK_GET_SINK 2
-#define BULK_PUT_SOURCE 3
+enum ptlrpc_bulk_op_type {
+ PTLRPC_BULK_OP_ACTIVE = 0x00000001,
+ PTLRPC_BULK_OP_PASSIVE = 0x00000002,
+ PTLRPC_BULK_OP_PUT = 0x00000004,
+ PTLRPC_BULK_OP_GET = 0x00000008,
+ PTLRPC_BULK_BUF_KVEC = 0x00000010,
+ PTLRPC_BULK_BUF_KIOV = 0x00000020,
+ PTLRPC_BULK_GET_SOURCE = PTLRPC_BULK_OP_PASSIVE | PTLRPC_BULK_OP_GET,
+ PTLRPC_BULK_PUT_SINK = PTLRPC_BULK_OP_PASSIVE | PTLRPC_BULK_OP_PUT,
+ PTLRPC_BULK_GET_SINK = PTLRPC_BULK_OP_ACTIVE | PTLRPC_BULK_OP_GET,
+ PTLRPC_BULK_PUT_SOURCE = PTLRPC_BULK_OP_ACTIVE | PTLRPC_BULK_OP_PUT,
+};
+
+static inline bool ptlrpc_is_bulk_op_get(enum ptlrpc_bulk_op_type type)
+{
+ return (type & PTLRPC_BULK_OP_GET) == PTLRPC_BULK_OP_GET;
+}
+
+static inline bool ptlrpc_is_bulk_get_source(enum ptlrpc_bulk_op_type type)
+{
+ return (type & PTLRPC_BULK_GET_SOURCE) == PTLRPC_BULK_GET_SOURCE;
+}
+
+static inline bool ptlrpc_is_bulk_put_sink(enum ptlrpc_bulk_op_type type)
+{
+ return (type & PTLRPC_BULK_PUT_SINK) == PTLRPC_BULK_PUT_SINK;
+}
+
+static inline bool ptlrpc_is_bulk_get_sink(enum ptlrpc_bulk_op_type type)
+{
+ return (type & PTLRPC_BULK_GET_SINK) == PTLRPC_BULK_GET_SINK;
+}
+
+static inline bool ptlrpc_is_bulk_put_source(enum ptlrpc_bulk_op_type type)
+{
+ return (type & PTLRPC_BULK_PUT_SOURCE) == PTLRPC_BULK_PUT_SOURCE;
+}
+
+static inline bool ptlrpc_is_bulk_desc_kvec(enum ptlrpc_bulk_op_type type)
+{
+ return ((type & PTLRPC_BULK_BUF_KVEC) | (type & PTLRPC_BULK_BUF_KIOV))
+ == PTLRPC_BULK_BUF_KVEC;
+}
+
+static inline bool ptlrpc_is_bulk_desc_kiov(enum ptlrpc_bulk_op_type type)
+{
+ return ((type & PTLRPC_BULK_BUF_KVEC) | (type & PTLRPC_BULK_BUF_KIOV))
+ == PTLRPC_BULK_BUF_KIOV;
+}
+
+static inline bool ptlrpc_is_bulk_op_active(enum ptlrpc_bulk_op_type type)
+{
+ return ((type & PTLRPC_BULK_OP_ACTIVE) |
+ (type & PTLRPC_BULK_OP_PASSIVE)) == PTLRPC_BULK_OP_ACTIVE;
+}
+
+static inline bool ptlrpc_is_bulk_op_passive(enum ptlrpc_bulk_op_type type)
+{
+ return ((type & PTLRPC_BULK_OP_ACTIVE) |
+ (type & PTLRPC_BULK_OP_PASSIVE)) == PTLRPC_BULK_OP_PASSIVE;
+}
+
+struct ptlrpc_bulk_frag_ops {
+ /**
+ * Add a page \a page to the bulk descriptor \a desc
+ * Data to transfer in the page starts at offset \a pageoffset and
+ * amount of data to transfer from the page is \a len
+ */
+ void (*add_kiov_frag)(struct ptlrpc_bulk_desc *desc,
+ struct page *page, int pageoffset, int len);
+
+ /*
+ * Add a \a fragment to the bulk descriptor \a desc.
+ * Data to transfer in the fragment is pointed to by \a frag
+ * The size of the fragment is \a len
+ */
+ int (*add_iov_frag)(struct ptlrpc_bulk_desc *desc, void *frag, int len);
+
+ /**
+ * Uninitialize and free bulk descriptor \a desc.
+ * Works on bulk descriptors both from server and client side.
+ */
+ void (*release_frags)(struct ptlrpc_bulk_desc *desc);
+};
+
+extern const struct ptlrpc_bulk_frag_ops ptlrpc_bulk_kiov_pin_ops;
+extern const struct ptlrpc_bulk_frag_ops ptlrpc_bulk_kiov_nopin_ops;
/**
* Definition of bulk descriptor.
@@ -1811,14 +1195,14 @@ struct ptlrpc_bulk_page {
struct ptlrpc_bulk_desc {
/** completed with failure */
unsigned long bd_failure:1;
- /** {put,get}{source,sink} */
- unsigned long bd_type:2;
/** client side */
unsigned long bd_registered:1;
/** For serialization with callback */
spinlock_t bd_lock;
/** Import generation when request for this bulk was sent */
int bd_import_generation;
+ /** {put,get}{source,sink}{kvec,kiov} */
+ enum ptlrpc_bulk_op_type bd_type;
/** LNet portal for this bulk */
__u32 bd_portal;
/** Server side - export this bulk created for */
@@ -1827,13 +1211,14 @@ struct ptlrpc_bulk_desc {
struct obd_import *bd_import;
/** Back pointer to the request */
struct ptlrpc_request *bd_req;
+ struct ptlrpc_bulk_frag_ops *bd_frag_ops;
wait_queue_head_t bd_waitq; /* server side only WQ */
int bd_iov_count; /* # entries in bd_iov */
int bd_max_iov; /* allocated size of bd_iov */
int bd_nob; /* # bytes covered */
int bd_nob_transferred; /* # bytes GOT/PUT */
- __u64 bd_last_xid;
+ u64 bd_last_mbits;
struct ptlrpc_cb_id bd_cbid; /* network callback info */
lnet_nid_t bd_sender; /* stash event::sender */
@@ -1842,14 +1227,31 @@ struct ptlrpc_bulk_desc {
/** array of associated MDs */
lnet_handle_md_t bd_mds[PTLRPC_BULK_OPS_COUNT];
- /*
- * encrypt iov, size is either 0 or bd_iov_count.
- */
- lnet_kiov_t *bd_enc_iov;
-
- lnet_kiov_t bd_iov[0];
+ union {
+ struct {
+ /*
+ * encrypt iov, size is either 0 or bd_iov_count.
+ */
+ struct bio_vec *bd_enc_vec;
+ struct bio_vec *bd_vec; /* Array of bio_vecs */
+ } bd_kiov;
+
+ struct {
+ struct kvec *bd_enc_kvec;
+ struct kvec *bd_kvec; /* Array of kvecs */
+ } bd_kvec;
+ } bd_u;
};
+#define GET_KIOV(desc) ((desc)->bd_u.bd_kiov.bd_vec)
+#define BD_GET_KIOV(desc, i) ((desc)->bd_u.bd_kiov.bd_vec[i])
+#define GET_ENC_KIOV(desc) ((desc)->bd_u.bd_kiov.bd_enc_vec)
+#define BD_GET_ENC_KIOV(desc, i) ((desc)->bd_u.bd_kiov.bd_enc_vec[i])
+#define GET_KVEC(desc) ((desc)->bd_u.bd_kvec.bd_kvec)
+#define BD_GET_KVEC(desc, i) ((desc)->bd_u.bd_kvec.bd_kvec[i])
+#define GET_ENC_KVEC(desc) ((desc)->bd_u.bd_kvec.bd_enc_kvec)
+#define BD_GET_ENC_KVEC(desc, i) ((desc)->bd_u.bd_kvec.bd_enc_kvec[i])
+
enum {
SVC_STOPPED = 1 << 0,
SVC_STOPPING = 1 << 1,
@@ -2464,21 +1866,17 @@ int ptlrpc_request_bufs_pack(struct ptlrpc_request *request,
void ptlrpc_req_finished(struct ptlrpc_request *request);
struct ptlrpc_request *ptlrpc_request_addref(struct ptlrpc_request *req);
struct ptlrpc_bulk_desc *ptlrpc_prep_bulk_imp(struct ptlrpc_request *req,
- unsigned npages, unsigned max_brw,
- unsigned type, unsigned portal);
-void __ptlrpc_free_bulk(struct ptlrpc_bulk_desc *bulk, int pin);
-static inline void ptlrpc_free_bulk_pin(struct ptlrpc_bulk_desc *bulk)
-{
- __ptlrpc_free_bulk(bulk, 1);
-}
-
-static inline void ptlrpc_free_bulk_nopin(struct ptlrpc_bulk_desc *bulk)
-{
- __ptlrpc_free_bulk(bulk, 0);
-}
-
+ unsigned int nfrags,
+ unsigned int max_brw,
+ unsigned int type,
+ unsigned int portal,
+ const struct ptlrpc_bulk_frag_ops *ops);
+
+int ptlrpc_prep_bulk_frag(struct ptlrpc_bulk_desc *desc,
+ void *frag, int len);
void __ptlrpc_prep_bulk_page(struct ptlrpc_bulk_desc *desc,
- struct page *page, int pageoffset, int len, int);
+ struct page *page, int pageoffset, int len,
+ int pin);
static inline void ptlrpc_prep_bulk_page_pin(struct ptlrpc_bulk_desc *desc,
struct page *page, int pageoffset,
int len)
@@ -2493,6 +1891,16 @@ static inline void ptlrpc_prep_bulk_page_nopin(struct ptlrpc_bulk_desc *desc,
__ptlrpc_prep_bulk_page(desc, page, pageoffset, len, 0);
}
+void ptlrpc_free_bulk(struct ptlrpc_bulk_desc *bulk);
+
+static inline void ptlrpc_release_bulk_page_pin(struct ptlrpc_bulk_desc *desc)
+{
+ int i;
+
+ for (i = 0; i < desc->bd_iov_count ; i++)
+ put_page(BD_GET_KIOV(desc, i).bv_page);
+}
+
void ptlrpc_retain_replayable_request(struct ptlrpc_request *req,
struct obd_import *imp);
__u64 ptlrpc_next_xid(void);
@@ -2652,6 +2060,7 @@ struct lustre_handle *lustre_msg_get_handle(struct lustre_msg *msg);
__u32 lustre_msg_get_type(struct lustre_msg *msg);
void lustre_msg_add_version(struct lustre_msg *msg, u32 version);
__u32 lustre_msg_get_opc(struct lustre_msg *msg);
+__u16 lustre_msg_get_tag(struct lustre_msg *msg);
__u64 lustre_msg_get_last_committed(struct lustre_msg *msg);
__u64 *lustre_msg_get_versions(struct lustre_msg *msg);
__u64 lustre_msg_get_transno(struct lustre_msg *msg);
@@ -2670,6 +2079,8 @@ void lustre_msg_set_handle(struct lustre_msg *msg,
struct lustre_handle *handle);
void lustre_msg_set_type(struct lustre_msg *msg, __u32 type);
void lustre_msg_set_opc(struct lustre_msg *msg, __u32 opc);
+void lustre_msg_set_last_xid(struct lustre_msg *msg, u64 last_xid);
+void lustre_msg_set_tag(struct lustre_msg *msg, __u16 tag);
void lustre_msg_set_versions(struct lustre_msg *msg, __u64 *versions);
void lustre_msg_set_transno(struct lustre_msg *msg, __u64 transno);
void lustre_msg_set_status(struct lustre_msg *msg, __u32 status);
@@ -2679,6 +2090,7 @@ void lustre_msg_set_timeout(struct lustre_msg *msg, __u32 timeout);
void lustre_msg_set_service_time(struct lustre_msg *msg, __u32 service_time);
void lustre_msg_set_jobid(struct lustre_msg *msg, char *jobid);
void lustre_msg_set_cksum(struct lustre_msg *msg, __u32 cksum);
+void lustre_msg_set_mbits(struct lustre_msg *msg, u64 mbits);
static inline void
lustre_shrink_reply(struct ptlrpc_request *req, int segment,
diff --git a/drivers/staging/lustre/lustre/include/lustre_nrs.h b/drivers/staging/lustre/lustre/include/lustre_nrs.h
new file mode 100644
index 000000000000..a5028aaa19cd
--- /dev/null
+++ b/drivers/staging/lustre/lustre/include/lustre_nrs.h
@@ -0,0 +1,717 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License version 2 for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.gnu.org/licenses/gpl-2.0.html
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2014, Intel Corporation.
+ *
+ * Copyright 2012 Xyratex Technology Limited
+ */
+/*
+ *
+ * Network Request Scheduler (NRS)
+ *
+ */
+
+#ifndef _LUSTRE_NRS_H
+#define _LUSTRE_NRS_H
+
+/**
+ * \defgroup nrs Network Request Scheduler
+ * @{
+ */
+struct ptlrpc_nrs_policy;
+struct ptlrpc_nrs_resource;
+struct ptlrpc_nrs_request;
+
+/**
+ * NRS control operations.
+ *
+ * These are common for all policies.
+ */
+enum ptlrpc_nrs_ctl {
+ /**
+ * Not a valid opcode.
+ */
+ PTLRPC_NRS_CTL_INVALID,
+ /**
+ * Activate the policy.
+ */
+ PTLRPC_NRS_CTL_START,
+ /**
+ * Reserved for multiple primary policies, which may be a possibility
+ * in the future.
+ */
+ PTLRPC_NRS_CTL_STOP,
+ /**
+ * Policies can start using opcodes from this value and onwards for
+ * their own purposes; the assigned value itself is arbitrary.
+ */
+ PTLRPC_NRS_CTL_1ST_POL_SPEC = 0x20,
+};
+
+/**
+ * NRS policy operations.
+ *
+ * These determine the behaviour of a policy, and are called in response to
+ * NRS core events.
+ */
+struct ptlrpc_nrs_pol_ops {
+ /**
+ * Called during policy registration; this operation is optional.
+ *
+ * \param[in,out] policy The policy being initialized
+ */
+ int (*op_policy_init)(struct ptlrpc_nrs_policy *policy);
+ /**
+ * Called during policy unregistration; this operation is optional.
+ *
+ * \param[in,out] policy The policy being unregistered/finalized
+ */
+ void (*op_policy_fini)(struct ptlrpc_nrs_policy *policy);
+ /**
+ * Called when activating a policy via lprocfs; policies allocate and
+ * initialize their resources here; this operation is optional.
+ *
+ * \param[in,out] policy The policy being started
+ *
+ * \see nrs_policy_start_locked()
+ */
+ int (*op_policy_start)(struct ptlrpc_nrs_policy *policy);
+ /**
+ * Called when deactivating a policy via lprocfs; policies deallocate
+ * their resources here; this operation is optional
+ *
+ * \param[in,out] policy The policy being stopped
+ *
+ * \see nrs_policy_stop0()
+ */
+ void (*op_policy_stop)(struct ptlrpc_nrs_policy *policy);
+ /**
+ * Used for policy-specific operations; i.e. not generic ones like
+ * \e PTLRPC_NRS_CTL_START and \e PTLRPC_NRS_CTL_GET_INFO; analogous
+ * to an ioctl; this operation is optional.
+ *
+ * \param[in,out] policy The policy carrying out operation \a opc
+ * \param[in] opc The command operation being carried out
+ * \param[in,out] arg An generic buffer for communication between the
+ * user and the control operation
+ *
+ * \retval -ve error
+ * \retval 0 success
+ *
+ * \see ptlrpc_nrs_policy_control()
+ */
+ int (*op_policy_ctl)(struct ptlrpc_nrs_policy *policy,
+ enum ptlrpc_nrs_ctl opc, void *arg);
+
+ /**
+ * Called when obtaining references to the resources of the resource
+ * hierarchy for a request that has arrived for handling at the PTLRPC
+ * service. Policies should return -ve for requests they do not wish
+ * to handle. This operation is mandatory.
+ *
+ * \param[in,out] policy The policy we're getting resources for.
+ * \param[in,out] nrq The request we are getting resources for.
+ * \param[in] parent The parent resource of the resource being
+ * requested; set to NULL if none.
+ * \param[out] resp The resource is to be returned here; the
+ * fallback policy in an NRS head should
+ * \e always return a non-NULL pointer value.
+ * \param[in] moving_req When set, signifies that this is an attempt
+ * to obtain resources for a request being moved
+ * to the high-priority NRS head by
+ * ldlm_lock_reorder_req().
+ * This implies two things:
+ * 1. We are under obd_export::exp_rpc_lock and
+ * so should not sleep.
+ * 2. We should not perform non-idempotent or can
+ * skip performing idempotent operations that
+ * were carried out when resources were first
+ * taken for the request when it was initialized
+ * in ptlrpc_nrs_req_initialize().
+ *
+ * \retval 0, +ve The level of the returned resource in the resource
+ * hierarchy; currently only 0 (for a non-leaf resource)
+ * and 1 (for a leaf resource) are supported by the
+ * framework.
+ * \retval -ve error
+ *
+ * \see ptlrpc_nrs_req_initialize()
+ * \see ptlrpc_nrs_hpreq_add_nolock()
+ * \see ptlrpc_nrs_req_hp_move()
+ */
+ int (*op_res_get)(struct ptlrpc_nrs_policy *policy,
+ struct ptlrpc_nrs_request *nrq,
+ const struct ptlrpc_nrs_resource *parent,
+ struct ptlrpc_nrs_resource **resp,
+ bool moving_req);
+ /**
+ * Called when releasing references taken for resources in the resource
+ * hierarchy for the request; this operation is optional.
+ *
+ * \param[in,out] policy The policy the resource belongs to
+ * \param[in] res The resource to be freed
+ *
+ * \see ptlrpc_nrs_req_finalize()
+ * \see ptlrpc_nrs_hpreq_add_nolock()
+ * \see ptlrpc_nrs_req_hp_move()
+ */
+ void (*op_res_put)(struct ptlrpc_nrs_policy *policy,
+ const struct ptlrpc_nrs_resource *res);
+
+ /**
+ * Obtains a request for handling from the policy, and optionally
+ * removes the request from the policy; this operation is mandatory.
+ *
+ * \param[in,out] policy The policy to poll
+ * \param[in] peek When set, signifies that we just want to
+ * examine the request, and not handle it, so the
+ * request is not removed from the policy.
+ * \param[in] force When set, it will force a policy to return a
+ * request if it has one queued.
+ *
+ * \retval NULL No request available for handling
+ * \retval valid-pointer The request polled for handling
+ *
+ * \see ptlrpc_nrs_req_get_nolock()
+ */
+ struct ptlrpc_nrs_request *
+ (*op_req_get)(struct ptlrpc_nrs_policy *policy, bool peek,
+ bool force);
+ /**
+ * Called when attempting to add a request to a policy for later
+ * handling; this operation is mandatory.
+ *
+ * \param[in,out] policy The policy on which to enqueue \a nrq
+ * \param[in,out] nrq The request to enqueue
+ *
+ * \retval 0 success
+ * \retval != 0 error
+ *
+ * \see ptlrpc_nrs_req_add_nolock()
+ */
+ int (*op_req_enqueue)(struct ptlrpc_nrs_policy *policy,
+ struct ptlrpc_nrs_request *nrq);
+ /**
+ * Removes a request from the policy's set of pending requests. Normally
+ * called after a request has been polled successfully from the policy
+ * for handling; this operation is mandatory.
+ *
+ * \param[in,out] policy The policy the request \a nrq belongs to
+ * \param[in,out] nrq The request to dequeue
+ *
+ * \see ptlrpc_nrs_req_del_nolock()
+ */
+ void (*op_req_dequeue)(struct ptlrpc_nrs_policy *policy,
+ struct ptlrpc_nrs_request *nrq);
+ /**
+ * Called after the request being carried out. Could be used for
+ * job/resource control; this operation is optional.
+ *
+ * \param[in,out] policy The policy which is stopping to handle request
+ * \a nrq
+ * \param[in,out] nrq The request
+ *
+ * \pre assert_spin_locked(&svcpt->scp_req_lock)
+ *
+ * \see ptlrpc_nrs_req_stop_nolock()
+ */
+ void (*op_req_stop)(struct ptlrpc_nrs_policy *policy,
+ struct ptlrpc_nrs_request *nrq);
+ /**
+ * Registers the policy's lprocfs interface with a PTLRPC service.
+ *
+ * \param[in] svc The service
+ *
+ * \retval 0 success
+ * \retval != 0 error
+ */
+ int (*op_lprocfs_init)(struct ptlrpc_service *svc);
+ /**
+ * Unegisters the policy's lprocfs interface with a PTLRPC service.
+ *
+ * In cases of failed policy registration in
+ * \e ptlrpc_nrs_policy_register(), this function may be called for a
+ * service which has not registered the policy successfully, so
+ * implementations of this method should make sure their operations are
+ * safe in such cases.
+ *
+ * \param[in] svc The service
+ */
+ void (*op_lprocfs_fini)(struct ptlrpc_service *svc);
+};
+
+/**
+ * Policy flags
+ */
+enum nrs_policy_flags {
+ /**
+ * Fallback policy, use this flag only on a single supported policy per
+ * service. The flag cannot be used on policies that use
+ * \e PTLRPC_NRS_FL_REG_EXTERN
+ */
+ PTLRPC_NRS_FL_FALLBACK = BIT(0),
+ /**
+ * Start policy immediately after registering.
+ */
+ PTLRPC_NRS_FL_REG_START = BIT(1),
+ /**
+ * This is a policy registering from a module different to the one NRS
+ * core ships in (currently ptlrpc).
+ */
+ PTLRPC_NRS_FL_REG_EXTERN = BIT(2),
+};
+
+/**
+ * NRS queue type.
+ *
+ * Denotes whether an NRS instance is for handling normal or high-priority
+ * RPCs, or whether an operation pertains to one or both of the NRS instances
+ * in a service.
+ */
+enum ptlrpc_nrs_queue_type {
+ PTLRPC_NRS_QUEUE_REG = BIT(0),
+ PTLRPC_NRS_QUEUE_HP = BIT(1),
+ PTLRPC_NRS_QUEUE_BOTH = (PTLRPC_NRS_QUEUE_REG | PTLRPC_NRS_QUEUE_HP)
+};
+
+/**
+ * NRS head
+ *
+ * A PTLRPC service has at least one NRS head instance for handling normal
+ * priority RPCs, and may optionally have a second NRS head instance for
+ * handling high-priority RPCs. Each NRS head maintains a list of available
+ * policies, of which one and only one policy is acting as the fallback policy,
+ * and optionally a different policy may be acting as the primary policy. For
+ * all RPCs handled by this NRS head instance, NRS core will first attempt to
+ * enqueue the RPC using the primary policy (if any). The fallback policy is
+ * used in the following cases:
+ * - when there was no primary policy in the
+ * ptlrpc_nrs_pol_state::NRS_POL_STATE_STARTED state at the time the request
+ * was initialized.
+ * - when the primary policy that was at the
+ * ptlrpc_nrs_pol_state::PTLRPC_NRS_POL_STATE_STARTED state at the time the
+ * RPC was initialized, denoted it did not wish, or for some other reason was
+ * not able to handle the request, by returning a non-valid NRS resource
+ * reference.
+ * - when the primary policy that was at the
+ * ptlrpc_nrs_pol_state::PTLRPC_NRS_POL_STATE_STARTED state at the time the
+ * RPC was initialized, fails later during the request enqueueing stage.
+ *
+ * \see nrs_resource_get_safe()
+ * \see nrs_request_enqueue()
+ */
+struct ptlrpc_nrs {
+ spinlock_t nrs_lock;
+ /** XXX Possibly replace svcpt->scp_req_lock with another lock here. */
+ /**
+ * List of registered policies
+ */
+ struct list_head nrs_policy_list;
+ /**
+ * List of policies with queued requests. Policies that have any
+ * outstanding requests are queued here, and this list is queried
+ * in a round-robin manner from NRS core when obtaining a request
+ * for handling. This ensures that requests from policies that at some
+ * point transition away from the
+ * ptlrpc_nrs_pol_state::NRS_POL_STATE_STARTED state are drained.
+ */
+ struct list_head nrs_policy_queued;
+ /**
+ * Service partition for this NRS head
+ */
+ struct ptlrpc_service_part *nrs_svcpt;
+ /**
+ * Primary policy, which is the preferred policy for handling RPCs
+ */
+ struct ptlrpc_nrs_policy *nrs_policy_primary;
+ /**
+ * Fallback policy, which is the backup policy for handling RPCs
+ */
+ struct ptlrpc_nrs_policy *nrs_policy_fallback;
+ /**
+ * This NRS head handles either HP or regular requests
+ */
+ enum ptlrpc_nrs_queue_type nrs_queue_type;
+ /**
+ * # queued requests from all policies in this NRS head
+ */
+ unsigned long nrs_req_queued;
+ /**
+ * # scheduled requests from all policies in this NRS head
+ */
+ unsigned long nrs_req_started;
+ /**
+ * # policies on this NRS
+ */
+ unsigned int nrs_num_pols;
+ /**
+ * This NRS head is in progress of starting a policy
+ */
+ unsigned int nrs_policy_starting:1;
+ /**
+ * In progress of shutting down the whole NRS head; used during
+ * unregistration
+ */
+ unsigned int nrs_stopping:1;
+ /**
+ * NRS policy is throttling request
+ */
+ unsigned int nrs_throttling:1;
+};
+
+#define NRS_POL_NAME_MAX 16
+#define NRS_POL_ARG_MAX 16
+
+struct ptlrpc_nrs_pol_desc;
+
+/**
+ * Service compatibility predicate; this determines whether a policy is adequate
+ * for handling RPCs of a particular PTLRPC service.
+ *
+ * XXX:This should give the same result during policy registration and
+ * unregistration, and for all partitions of a service; so the result should not
+ * depend on temporal service or other properties, that may influence the
+ * result.
+ */
+typedef bool (*nrs_pol_desc_compat_t)(const struct ptlrpc_service *svc,
+ const struct ptlrpc_nrs_pol_desc *desc);
+
+struct ptlrpc_nrs_pol_conf {
+ /**
+ * Human-readable policy name
+ */
+ char nc_name[NRS_POL_NAME_MAX];
+ /**
+ * NRS operations for this policy
+ */
+ const struct ptlrpc_nrs_pol_ops *nc_ops;
+ /**
+ * Service compatibility predicate
+ */
+ nrs_pol_desc_compat_t nc_compat;
+ /**
+ * Set for policies that support a single ptlrpc service, i.e. ones that
+ * have \a pd_compat set to nrs_policy_compat_one(). The variable value
+ * depicts the name of the single service that such policies are
+ * compatible with.
+ */
+ const char *nc_compat_svc_name;
+ /**
+ * Owner module for this policy descriptor; policies registering from a
+ * different module to the one the NRS framework is held within
+ * (currently ptlrpc), should set this field to THIS_MODULE.
+ */
+ struct module *nc_owner;
+ /**
+ * Policy registration flags; a bitmask of \e nrs_policy_flags
+ */
+ unsigned int nc_flags;
+};
+
+/**
+ * NRS policy registering descriptor
+ *
+ * Is used to hold a description of a policy that can be passed to NRS core in
+ * order to register the policy with NRS heads in different PTLRPC services.
+ */
+struct ptlrpc_nrs_pol_desc {
+ /**
+ * Human-readable policy name
+ */
+ char pd_name[NRS_POL_NAME_MAX];
+ /**
+ * Link into nrs_core::nrs_policies
+ */
+ struct list_head pd_list;
+ /**
+ * NRS operations for this policy
+ */
+ const struct ptlrpc_nrs_pol_ops *pd_ops;
+ /**
+ * Service compatibility predicate
+ */
+ nrs_pol_desc_compat_t pd_compat;
+ /**
+ * Set for policies that are compatible with only one PTLRPC service.
+ *
+ * \see ptlrpc_nrs_pol_conf::nc_compat_svc_name
+ */
+ const char *pd_compat_svc_name;
+ /**
+ * Owner module for this policy descriptor.
+ *
+ * We need to hold a reference to the module whenever we might make use
+ * of any of the module's contents, i.e.
+ * - If one or more instances of the policy are at a state where they
+ * might be handling a request, i.e.
+ * ptlrpc_nrs_pol_state::NRS_POL_STATE_STARTED or
+ * ptlrpc_nrs_pol_state::NRS_POL_STATE_STOPPING as we will have to
+ * call into the policy's ptlrpc_nrs_pol_ops() handlers. A reference
+ * is taken on the module when
+ * \e ptlrpc_nrs_pol_desc::pd_refs becomes 1, and released when it
+ * becomes 0, so that we hold only one reference to the module maximum
+ * at any time.
+ *
+ * We do not need to hold a reference to the module, even though we
+ * might use code and data from the module, in the following cases:
+ * - During external policy registration, because this should happen in
+ * the module's init() function, in which case the module is safe from
+ * removal because a reference is being held on the module by the
+ * kernel, and iirc kmod (and I guess module-init-tools also) will
+ * serialize any racing processes properly anyway.
+ * - During external policy unregistration, because this should happen
+ * in a module's exit() function, and any attempts to start a policy
+ * instance would need to take a reference on the module, and this is
+ * not possible once we have reached the point where the exit()
+ * handler is called.
+ * - During service registration and unregistration, as service setup
+ * and cleanup, and policy registration, unregistration and policy
+ * instance starting, are serialized by \e nrs_core::nrs_mutex, so
+ * as long as users adhere to the convention of registering policies
+ * in init() and unregistering them in module exit() functions, there
+ * should not be a race between these operations.
+ * - During any policy-specific lprocfs operations, because a reference
+ * is held by the kernel on a proc entry that has been entered by a
+ * syscall, so as long as proc entries are removed during
+ * unregistration time, then unregistration and lprocfs operations
+ * will be properly serialized.
+ */
+ struct module *pd_owner;
+ /**
+ * Bitmask of \e nrs_policy_flags
+ */
+ unsigned int pd_flags;
+ /**
+ * # of references on this descriptor
+ */
+ atomic_t pd_refs;
+};
+
+/**
+ * NRS policy state
+ *
+ * Policies transition from one state to the other during their lifetime
+ */
+enum ptlrpc_nrs_pol_state {
+ /**
+ * Not a valid policy state.
+ */
+ NRS_POL_STATE_INVALID,
+ /**
+ * Policies are at this state either at the start of their life, or
+ * transition here when the user selects a different policy to act
+ * as the primary one.
+ */
+ NRS_POL_STATE_STOPPED,
+ /**
+ * Policy is progress of stopping
+ */
+ NRS_POL_STATE_STOPPING,
+ /**
+ * Policy is in progress of starting
+ */
+ NRS_POL_STATE_STARTING,
+ /**
+ * A policy is in this state in two cases:
+ * - it is the fallback policy, which is always in this state.
+ * - it has been activated by the user; i.e. it is the primary policy,
+ */
+ NRS_POL_STATE_STARTED,
+};
+
+/**
+ * NRS policy information
+ *
+ * Used for obtaining information for the status of a policy via lprocfs
+ */
+struct ptlrpc_nrs_pol_info {
+ /**
+ * Policy name
+ */
+ char pi_name[NRS_POL_NAME_MAX];
+ /**
+ * Policy argument
+ */
+ char pi_arg[NRS_POL_ARG_MAX];
+ /**
+ * Current policy state
+ */
+ enum ptlrpc_nrs_pol_state pi_state;
+ /**
+ * # RPCs enqueued for later dispatching by the policy
+ */
+ long pi_req_queued;
+ /**
+ * # RPCs started for dispatch by the policy
+ */
+ long pi_req_started;
+ /**
+ * Is this a fallback policy?
+ */
+ unsigned pi_fallback:1;
+};
+
+/**
+ * NRS policy
+ *
+ * There is one instance of this for each policy in each NRS head of each
+ * PTLRPC service partition.
+ */
+struct ptlrpc_nrs_policy {
+ /**
+ * Linkage into the NRS head's list of policies,
+ * ptlrpc_nrs:nrs_policy_list
+ */
+ struct list_head pol_list;
+ /**
+ * Linkage into the NRS head's list of policies with enqueued
+ * requests ptlrpc_nrs:nrs_policy_queued
+ */
+ struct list_head pol_list_queued;
+ /**
+ * Current state of this policy
+ */
+ enum ptlrpc_nrs_pol_state pol_state;
+ /**
+ * Bitmask of nrs_policy_flags
+ */
+ unsigned int pol_flags;
+ /**
+ * # RPCs enqueued for later dispatching by the policy
+ */
+ long pol_req_queued;
+ /**
+ * # RPCs started for dispatch by the policy
+ */
+ long pol_req_started;
+ /**
+ * Usage Reference count taken on the policy instance
+ */
+ long pol_ref;
+ /**
+ * Human-readable policy argument
+ */
+ char pol_arg[NRS_POL_ARG_MAX];
+ /**
+ * The NRS head this policy has been created at
+ */
+ struct ptlrpc_nrs *pol_nrs;
+ /**
+ * Private policy data; varies by policy type
+ */
+ void *pol_private;
+ /**
+ * Policy descriptor for this policy instance.
+ */
+ struct ptlrpc_nrs_pol_desc *pol_desc;
+};
+
+/**
+ * NRS resource
+ *
+ * Resources are embedded into two types of NRS entities:
+ * - Inside NRS policies, in the policy's private data in
+ * ptlrpc_nrs_policy::pol_private
+ * - In objects that act as prime-level scheduling entities in different NRS
+ * policies; e.g. on a policy that performs round robin or similar order
+ * scheduling across client NIDs, there would be one NRS resource per unique
+ * client NID. On a policy which performs round robin scheduling across
+ * backend filesystem objects, there would be one resource associated with
+ * each of the backend filesystem objects partaking in the scheduling
+ * performed by the policy.
+ *
+ * NRS resources share a parent-child relationship, in which resources embedded
+ * in policy instances are the parent entities, with all scheduling entities
+ * a policy schedules across being the children, thus forming a simple resource
+ * hierarchy. This hierarchy may be extended with one or more levels in the
+ * future if the ability to have more than one primary policy is added.
+ *
+ * Upon request initialization, references to the then active NRS policies are
+ * taken and used to later handle the dispatching of the request with one of
+ * these policies.
+ *
+ * \see nrs_resource_get_safe()
+ * \see ptlrpc_nrs_req_add()
+ */
+struct ptlrpc_nrs_resource {
+ /**
+ * This NRS resource's parent; is NULL for resources embedded in NRS
+ * policy instances; i.e. those are top-level ones.
+ */
+ struct ptlrpc_nrs_resource *res_parent;
+ /**
+ * The policy associated with this resource.
+ */
+ struct ptlrpc_nrs_policy *res_policy;
+};
+
+enum {
+ NRS_RES_FALLBACK,
+ NRS_RES_PRIMARY,
+ NRS_RES_MAX
+};
+
+#include "lustre_nrs_fifo.h"
+
+/**
+ * NRS request
+ *
+ * Instances of this object exist embedded within ptlrpc_request; the main
+ * purpose of this object is to hold references to the request's resources
+ * for the lifetime of the request, and to hold properties that policies use
+ * use for determining the request's scheduling priority.
+ **/
+struct ptlrpc_nrs_request {
+ /**
+ * The request's resource hierarchy.
+ */
+ struct ptlrpc_nrs_resource *nr_res_ptrs[NRS_RES_MAX];
+ /**
+ * Index into ptlrpc_nrs_request::nr_res_ptrs of the resource of the
+ * policy that was used to enqueue the request.
+ *
+ * \see nrs_request_enqueue()
+ */
+ unsigned int nr_res_idx;
+ unsigned int nr_initialized:1;
+ unsigned int nr_enqueued:1;
+ unsigned int nr_started:1;
+ unsigned int nr_finalized:1;
+
+ /**
+ * Policy-specific fields, used for determining a request's scheduling
+ * priority, and other supporting functionality.
+ */
+ union {
+ /**
+ * Fields for the FIFO policy
+ */
+ struct nrs_fifo_req fifo;
+ } nr_u;
+ /**
+ * Externally-registering policies may want to use this to allocate
+ * their own request properties.
+ */
+ void *ext;
+};
+
+/** @} nrs */
+#endif
diff --git a/drivers/staging/lustre/lustre/include/lustre_nrs_fifo.h b/drivers/staging/lustre/lustre/include/lustre_nrs_fifo.h
new file mode 100644
index 000000000000..3b5418eac6c4
--- /dev/null
+++ b/drivers/staging/lustre/lustre/include/lustre_nrs_fifo.h
@@ -0,0 +1,70 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License version 2 for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.gnu.org/licenses/gpl-2.0.html
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2014, Intel Corporation.
+ *
+ * Copyright 2012 Xyratex Technology Limited
+ */
+/*
+ *
+ * Network Request Scheduler (NRS) First-in First-out (FIFO) policy
+ *
+ */
+
+#ifndef _LUSTRE_NRS_FIFO_H
+#define _LUSTRE_NRS_FIFO_H
+
+/* \name fifo
+ *
+ * FIFO policy
+ *
+ * This policy is a logical wrapper around previous, non-NRS functionality.
+ * It dispatches RPCs in the same order as they arrive from the network. This
+ * policy is currently used as the fallback policy, and the only enabled policy
+ * on all NRS heads of all PTLRPC service partitions.
+ * @{
+ */
+
+/**
+ * Private data structure for the FIFO policy
+ */
+struct nrs_fifo_head {
+ /**
+ * Resource object for policy instance.
+ */
+ struct ptlrpc_nrs_resource fh_res;
+ /**
+ * List of queued requests.
+ */
+ struct list_head fh_list;
+ /**
+ * For debugging purposes.
+ */
+ __u64 fh_sequence;
+};
+
+struct nrs_fifo_req {
+ struct list_head fr_list;
+ __u64 fr_sequence;
+};
+
+/** @} fifo */
+#endif
diff --git a/drivers/staging/lustre/lustre/include/lustre_req_layout.h b/drivers/staging/lustre/lustre/include/lustre_req_layout.h
index a13558e53274..fbcd39572cd0 100644
--- a/drivers/staging/lustre/lustre/include/lustre_req_layout.h
+++ b/drivers/staging/lustre/lustre/include/lustre_req_layout.h
@@ -148,13 +148,12 @@ extern struct req_format RQF_MDS_GETATTR;
*/
extern struct req_format RQF_MDS_GETATTR_NAME;
extern struct req_format RQF_MDS_CLOSE;
-extern struct req_format RQF_MDS_RELEASE_CLOSE;
+extern struct req_format RQF_MDS_INTENT_CLOSE;
extern struct req_format RQF_MDS_CONNECT;
extern struct req_format RQF_MDS_DISCONNECT;
extern struct req_format RQF_MDS_GET_INFO;
extern struct req_format RQF_MDS_READPAGE;
extern struct req_format RQF_MDS_WRITEPAGE;
-extern struct req_format RQF_MDS_DONE_WRITING;
extern struct req_format RQF_MDS_REINT;
extern struct req_format RQF_MDS_REINT_CREATE;
extern struct req_format RQF_MDS_REINT_CREATE_ACL;
@@ -166,10 +165,9 @@ extern struct req_format RQF_MDS_REINT_LINK;
extern struct req_format RQF_MDS_REINT_RENAME;
extern struct req_format RQF_MDS_REINT_SETATTR;
extern struct req_format RQF_MDS_REINT_SETXATTR;
-extern struct req_format RQF_MDS_QUOTACHECK;
extern struct req_format RQF_MDS_QUOTACTL;
-extern struct req_format RQF_QC_CALLBACK;
extern struct req_format RQF_MDS_SWAP_LAYOUTS;
+extern struct req_format RQF_MDS_REINT_MIGRATE;
/* MDS hsm formats */
extern struct req_format RQF_MDS_HSM_STATE_GET;
extern struct req_format RQF_MDS_HSM_STATE_SET;
@@ -181,7 +179,6 @@ extern struct req_format RQF_MDS_HSM_REQUEST;
/* OST req_format */
extern struct req_format RQF_OST_CONNECT;
extern struct req_format RQF_OST_DISCONNECT;
-extern struct req_format RQF_OST_QUOTACHECK;
extern struct req_format RQF_OST_QUOTACTL;
extern struct req_format RQF_OST_GETATTR;
extern struct req_format RQF_OST_SETATTR;
diff --git a/drivers/staging/lustre/lustre/include/lustre_sec.h b/drivers/staging/lustre/lustre/include/lustre_sec.h
index 90c183424802..03a970bcac55 100644
--- a/drivers/staging/lustre/lustre/include/lustre_sec.h
+++ b/drivers/staging/lustre/lustre/include/lustre_sec.h
@@ -50,6 +50,7 @@ struct brw_page;
/* Linux specific */
struct key;
struct seq_file;
+struct lustre_cfg;
/*
* forward declaration
@@ -1029,6 +1030,8 @@ int sptlrpc_target_export_check(struct obd_export *exp,
/* bulk security api */
void sptlrpc_enc_pool_put_pages(struct ptlrpc_bulk_desc *desc);
+int get_free_pages_in_pool(void);
+int pool_is_at_full_capacity(void);
int sptlrpc_cli_wrap_bulk(struct ptlrpc_request *req,
struct ptlrpc_bulk_desc *desc);
diff --git a/drivers/staging/lustre/lustre/include/lustre_swab.h b/drivers/staging/lustre/lustre/include/lustre_swab.h
new file mode 100644
index 000000000000..26d01c2d6633
--- /dev/null
+++ b/drivers/staging/lustre/lustre/include/lustre_swab.h
@@ -0,0 +1,102 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.gnu.org/licenses/gpl-2.0.html
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2014, Intel Corporation.
+ *
+ * Copyright 2015 Cray Inc, all rights reserved.
+ * Author: Ben Evans.
+ *
+ * We assume all nodes are either little-endian or big-endian, and we
+ * always send messages in the sender's native format. The receiver
+ * detects the message format by checking the 'magic' field of the message
+ * (see lustre_msg_swabbed() below).
+ *
+ * Each wire type has corresponding 'lustre_swab_xxxtypexxx()' routines
+ * are implemented in ptlrpc/lustre_swab.c. These 'swabbers' convert the
+ * type from "other" endian, in-place in the message buffer.
+ *
+ * A swabber takes a single pointer argument. The caller must already have
+ * verified that the length of the message buffer >= sizeof (type).
+ *
+ * For variable length types, a second 'lustre_swab_v_xxxtypexxx()' routine
+ * may be defined that swabs just the variable part, after the caller has
+ * verified that the message buffer is large enough.
+ */
+
+#ifndef _LUSTRE_SWAB_H_
+#define _LUSTRE_SWAB_H_
+
+#include "lustre/lustre_idl.h"
+
+void lustre_swab_ptlrpc_body(struct ptlrpc_body *pb);
+void lustre_swab_connect(struct obd_connect_data *ocd);
+void lustre_swab_hsm_user_state(struct hsm_user_state *hus);
+void lustre_swab_hsm_state_set(struct hsm_state_set *hss);
+void lustre_swab_obd_statfs(struct obd_statfs *os);
+void lustre_swab_obd_ioobj(struct obd_ioobj *ioo);
+void lustre_swab_niobuf_remote(struct niobuf_remote *nbr);
+void lustre_swab_ost_lvb_v1(struct ost_lvb_v1 *lvb);
+void lustre_swab_ost_lvb(struct ost_lvb *lvb);
+void lustre_swab_obd_quotactl(struct obd_quotactl *q);
+void lustre_swab_lquota_lvb(struct lquota_lvb *lvb);
+void lustre_swab_generic_32s(__u32 *val);
+void lustre_swab_mdt_body(struct mdt_body *b);
+void lustre_swab_mdt_ioepoch(struct mdt_ioepoch *b);
+void lustre_swab_mdt_rec_setattr(struct mdt_rec_setattr *sa);
+void lustre_swab_mdt_rec_reint(struct mdt_rec_reint *rr);
+void lustre_swab_lmv_desc(struct lmv_desc *ld);
+void lustre_swab_lmv_mds_md(union lmv_mds_md *lmm);
+void lustre_swab_lov_desc(struct lov_desc *ld);
+void lustre_swab_gl_desc(union ldlm_gl_desc *desc);
+void lustre_swab_ldlm_intent(struct ldlm_intent *i);
+void lustre_swab_ldlm_request(struct ldlm_request *rq);
+void lustre_swab_ldlm_reply(struct ldlm_reply *r);
+void lustre_swab_mgs_target_info(struct mgs_target_info *oinfo);
+void lustre_swab_mgs_nidtbl_entry(struct mgs_nidtbl_entry *oinfo);
+void lustre_swab_mgs_config_body(struct mgs_config_body *body);
+void lustre_swab_mgs_config_res(struct mgs_config_res *body);
+void lustre_swab_ost_body(struct ost_body *b);
+void lustre_swab_ost_last_id(__u64 *id);
+void lustre_swab_fiemap(struct fiemap *fiemap);
+void lustre_swab_lov_user_md_v1(struct lov_user_md_v1 *lum);
+void lustre_swab_lov_user_md_v3(struct lov_user_md_v3 *lum);
+void lustre_swab_lov_user_md_objects(struct lov_user_ost_data *lod,
+ int stripe_count);
+void lustre_swab_lov_mds_md(struct lov_mds_md *lmm);
+void lustre_swab_lustre_capa(struct lustre_capa *c);
+void lustre_swab_lustre_capa_key(struct lustre_capa_key *k);
+void lustre_swab_fid2path(struct getinfo_fid2path *gf);
+void lustre_swab_layout_intent(struct layout_intent *li);
+void lustre_swab_hsm_user_state(struct hsm_user_state *hus);
+void lustre_swab_hsm_current_action(struct hsm_current_action *action);
+void lustre_swab_hsm_progress_kernel(struct hsm_progress_kernel *hpk);
+void lustre_swab_hsm_user_state(struct hsm_user_state *hus);
+void lustre_swab_hsm_user_item(struct hsm_user_item *hui);
+void lustre_swab_hsm_request(struct hsm_request *hr);
+void lustre_swab_swap_layouts(struct mdc_swap_layouts *msl);
+void lustre_swab_close_data(struct close_data *data);
+void lustre_swab_lmv_user_md(struct lmv_user_md *lum);
+
+#endif
diff --git a/drivers/staging/lustre/lustre/include/obd.h b/drivers/staging/lustre/lustre/include/obd.h
index f6fc4dd05bd6..0f48e9c3d9e3 100644
--- a/drivers/staging/lustre/lustre/include/obd.h
+++ b/drivers/staging/lustre/lustre/include/obd.h
@@ -73,70 +73,17 @@ static inline void loi_init(struct lov_oinfo *loi)
{
}
-/*
- * If we are unable to get the maximum object size from the OST in
- * ocd_maxbytes using OBD_CONNECT_MAXBYTES, then we fall back to using
- * the old maximum object size from ext3.
- */
-#define LUSTRE_EXT3_STRIPE_MAXBYTES 0x1fffffff000ULL
-
-struct lov_stripe_md {
- atomic_t lsm_refc;
- spinlock_t lsm_lock;
- pid_t lsm_lock_owner; /* debugging */
-
- /* maximum possible file size, might change as OSTs status changes,
- * e.g. disconnected, deactivated
- */
- __u64 lsm_maxbytes;
- struct ost_id lsm_oi;
- __u32 lsm_magic;
- __u32 lsm_stripe_size;
- __u32 lsm_pattern; /* striping pattern (RAID0, RAID1) */
- __u16 lsm_stripe_count;
- __u16 lsm_layout_gen;
- char lsm_pool_name[LOV_MAXPOOLNAME + 1];
- struct lov_oinfo *lsm_oinfo[0];
-};
-
-static inline bool lsm_is_released(struct lov_stripe_md *lsm)
-{
- return !!(lsm->lsm_pattern & LOV_PATTERN_F_RELEASED);
-}
-
-static inline bool lsm_has_objects(struct lov_stripe_md *lsm)
-{
- if (!lsm)
- return false;
- if (lsm_is_released(lsm))
- return false;
- return true;
-}
-
-static inline int lov_stripe_md_size(unsigned int stripe_count)
-{
- struct lov_stripe_md lsm;
-
- return sizeof(lsm) + stripe_count * sizeof(lsm.lsm_oinfo[0]);
-}
-
+struct lov_stripe_md;
struct obd_info;
typedef int (*obd_enqueue_update_f)(void *cookie, int rc);
/* obd info for a particular level (lov, osc). */
struct obd_info {
- /* Flags used for set request specific flags:
- - while lock handling, the flags obtained on the enqueue
- request are set here.
- - while stats, the flags used for control delay/resend.
- - while setattr, the flags used for distinguish punch operation
- */
+ /* OBD_STATFS_* flags */
__u64 oi_flags;
/* lsm data specific for every OSC. */
struct lov_stripe_md *oi_md;
- /* obdo data specific for every OSC, if needed at all. */
- struct obdo *oi_oa;
/* statfs data specific for every OSC, if needed at all. */
struct obd_statfs *oi_osfs;
/* An update callback which is called to update some data on upper
@@ -204,7 +151,6 @@ enum obd_cl_sem_lock_class {
* on the MDS.
*/
#define OBD_MAX_DEFAULT_EA_SIZE 4096
-#define OBD_MAX_DEFAULT_COOKIE_SIZE 4096
struct mdc_rpc_lock;
struct obd_import;
@@ -214,7 +160,7 @@ struct client_obd {
struct obd_import *cl_import; /* ptlrpc connection state */
size_t cl_conn_count;
/*
- * Cache maximum and default values for easize and cookiesize. This is
+ * Cache maximum and default values for easize. This is
* strictly a performance optimization to minimize calls to
* obd_size_diskmd(). The default values are used to calculate the
* initial size of a request buffer. The ptlrpc layer will resize the
@@ -235,18 +181,6 @@ struct client_obd {
* run-time if a larger observed size is advertised by the MDT.
*/
u32 cl_max_mds_easize;
- /* Default cookie size for llog cookies (see struct llog_cookie). It is
- * initialized to zero at mount-time, then it tracks the largest
- * observed cookie size advertised by the MDT, up to a maximum value of
- * OBD_MAX_DEFAULT_COOKIE_SIZE. Note that llog_cookies are not
- * used by clients communicating with MDS versions 2.4.0 and later.
- */
- u32 cl_default_mds_cookiesize;
- /* Maximum possible cookie size computed at mount-time based on
- * the number of OSTs in the filesystem. May be increased at
- * run-time if a larger observed size is advertised by the MDT.
- */
- u32 cl_max_mds_cookiesize;
enum lustre_sec_part cl_sp_me;
enum lustre_sec_part cl_sp_to;
@@ -313,15 +247,42 @@ struct client_obd {
struct obd_histogram cl_read_offset_hist;
struct obd_histogram cl_write_offset_hist;
- /* lru for osc caching pages */
+ /* LRU for osc caching pages */
struct cl_client_cache *cl_cache;
- struct list_head cl_lru_osc; /* member of cl_cache->ccc_lru */
+ /** member of cl_cache->ccc_lru */
+ struct list_head cl_lru_osc;
+ /** # of available LRU slots left in the per-OSC cache.
+ * Available LRU slots are shared by all OSCs of the same file system,
+ * therefore this is a pointer to cl_client_cache::ccc_lru_left.
+ */
atomic_long_t *cl_lru_left;
+ /** # of busy LRU pages. A page is considered busy if it's in writeback
+ * queue, or in transfer. Busy pages can't be discarded so they are not
+ * in LRU cache.
+ */
atomic_long_t cl_lru_busy;
+ /** # of LRU pages in the cache for this client_obd */
atomic_long_t cl_lru_in_list;
+ /** # of threads are shrinking LRU cache. To avoid contention, it's not
+ * allowed to have multiple threads shrinking LRU cache.
+ */
atomic_t cl_lru_shrinkers;
- struct list_head cl_lru_list; /* lru page list */
- spinlock_t cl_lru_list_lock; /* page list protector */
+ /** The time when this LRU cache was last used. */
+ time64_t cl_lru_last_used;
+ /** stats: how many reclaims have happened for this client_obd.
+ * reclaim and shrink - shrink is async, voluntarily rebalancing;
+ * reclaim is sync, initiated by IO thread when the LRU slots are
+ * in shortage.
+ */
+ u64 cl_lru_reclaim;
+ /** List of LRU pages for this client_obd */
+ struct list_head cl_lru_list;
+ /** Lock for LRU page list */
+ spinlock_t cl_lru_list_lock;
+ /** # of unstable pages in this client_obd.
+ * An unstable page is a page state that WRITE RPC has finished but
+ * the transaction has NOT yet committed.
+ */
atomic_long_t cl_unstable_count;
/* number of in flight destroy rpcs is limited to max_rpcs_in_flight */
@@ -329,7 +290,17 @@ struct client_obd {
wait_queue_head_t cl_destroy_waitq;
struct mdc_rpc_lock *cl_rpc_lock;
- struct mdc_rpc_lock *cl_close_lock;
+
+ /* modify rpcs in flight
+ * currently used for metadata only
+ */
+ spinlock_t cl_mod_rpcs_lock;
+ u16 cl_max_mod_rpcs_in_flight;
+ u16 cl_mod_rpcs_in_flight;
+ u16 cl_close_rpcs_in_flight;
+ wait_queue_head_t cl_mod_rpcs_waitq;
+ unsigned long *cl_mod_tag_bitmap;
+ struct obd_histogram cl_mod_rpcs_hist;
/* mgc datastruct */
atomic_t cl_mgc_refcount;
@@ -345,13 +316,6 @@ struct client_obd {
/* also protected by the poorly named _loi_list_lock lock above */
struct osc_async_rc cl_ar;
- /* used by quotacheck when the servers are older than 2.4 */
- int cl_qchk_stat; /* quotacheck stat of the peer */
-#define CL_NOT_QUOTACHECKED 1 /* client->cl_qchk_stat init value */
-#if OBD_OCD_VERSION(2, 7, 53, 0) < LUSTRE_VERSION_CODE
-#warning "please consider removing quotacheck compatibility code"
-#endif
-
/* sequence manager */
struct lu_client_seq *cl_seq;
@@ -454,8 +418,6 @@ struct lmv_obd {
int connected;
int max_easize;
int max_def_easize;
- int max_cookiesize;
- int max_def_cookiesize;
u32 tgts_size; /* size of tgts array */
struct lmv_tgt_desc **tgts;
@@ -469,9 +431,9 @@ struct niobuf_local {
__u32 lnb_page_offset;
__u32 lnb_len;
__u32 lnb_flags;
+ int lnb_rc;
struct page *lnb_page;
void *lnb_data;
- int lnb_rc;
};
#define LUSTRE_FLD_NAME "fld"
@@ -512,21 +474,6 @@ struct niobuf_local {
/* Don't conflict with on-wire flags OBD_BRW_WRITE, etc */
#define N_LOCAL_TEMP_PAGE 0x10000000
-struct obd_trans_info {
- __u64 oti_xid;
- /* Only used on the server side for tracking acks. */
- struct oti_req_ack_lock {
- struct lustre_handle lock;
- __u32 mode;
- } oti_ack_locks[4];
- void *oti_handle;
- struct llog_cookie oti_onecookie;
- struct llog_cookie *oti_logcookies;
-
- /** VBR: versions */
- __u64 oti_pre_version;
-};
-
/*
* Events signalled through obd_notify() upcall-chain.
*/
@@ -587,15 +534,14 @@ struct lvfs_run_ctxt {
struct obd_device {
struct obd_type *obd_type;
- __u32 obd_magic;
+ u32 obd_magic; /* OBD_DEVICE_MAGIC */
+ int obd_minor; /* device number: lctl dl */
+ struct lu_device *obd_lu_dev;
/* common and UUID name of this device */
- char obd_name[MAX_OBD_NAME];
- struct obd_uuid obd_uuid;
-
- struct lu_device *obd_lu_dev;
+ struct obd_uuid obd_uuid;
+ char obd_name[MAX_OBD_NAME];
- int obd_minor;
/* bitfield modification is protected by obd_dev_lock */
unsigned long obd_attached:1, /* finished attach */
obd_set_up:1, /* finished setup */
@@ -619,22 +565,22 @@ struct obd_device {
unsigned long obd_recovery_expired:1;
/* uuid-export hash body */
struct cfs_hash *obd_uuid_hash;
- atomic_t obd_refcount;
wait_queue_head_t obd_refcount_waitq;
struct list_head obd_exports;
struct list_head obd_unlinked_exports;
struct list_head obd_delayed_exports;
+ atomic_t obd_refcount;
int obd_num_exports;
spinlock_t obd_nid_lock;
struct ldlm_namespace *obd_namespace;
struct ptlrpc_client obd_ldlm_client; /* XXX OST/MDS only */
/* a spinlock is OK for what we do now, may need a semaphore later */
spinlock_t obd_dev_lock; /* protect OBD bitfield above */
- struct mutex obd_dev_mutex;
- __u64 obd_last_committed;
spinlock_t obd_osfs_lock;
struct obd_statfs obd_osfs; /* locked by obd_osfs_lock */
__u64 obd_osfs_age;
+ u64 obd_last_committed;
+ struct mutex obd_dev_mutex;
struct lvfs_run_ctxt obd_lvfs_ctxt;
struct obd_llog_group obd_olg; /* default llog group */
struct obd_device *obd_observer;
@@ -648,12 +594,13 @@ struct obd_device {
struct lov_obd lov;
struct lmv_obd lmv;
} u;
+
/* Fields used by LProcFS */
- unsigned int obd_cntr_base;
- struct lprocfs_stats *obd_stats;
+ struct lprocfs_stats *obd_stats;
+ unsigned int obd_cntr_base;
- unsigned int md_cntr_base;
- struct lprocfs_stats *md_stats;
+ struct lprocfs_stats *md_stats;
+ unsigned int md_cntr_base;
struct dentry *obd_debugfs_entry;
struct dentry *obd_svc_debugfs_entry;
@@ -665,9 +612,11 @@ struct obd_device {
/**
* Ldlm pool part. Save last calculated SLV and Limit.
*/
- rwlock_t obd_pool_lock;
- int obd_pool_limit;
- __u64 obd_pool_slv;
+ rwlock_t obd_pool_lock;
+ u64 obd_pool_slv;
+ int obd_pool_limit;
+
+ int obd_conn_inprogress;
/**
* A list of outstanding class_incref()'s against this obd. For
@@ -675,19 +624,10 @@ struct obd_device {
*/
struct lu_ref obd_reference;
- int obd_conn_inprogress;
-
struct kobject obd_kobj; /* sysfs object */
struct completion obd_kobj_unregister;
};
-enum obd_cleanup_stage {
-/* Special case hack for MDS LOVs */
- OBD_CLEANUP_EARLY,
-/* can be directly mapped to .ldto_device_fini() */
- OBD_CLEANUP_EXPORTS,
-};
-
/* get/set_info keys */
#define KEY_ASYNC "async"
#define KEY_CHANGELOG_CLEAR "changelog_clear"
@@ -704,7 +644,6 @@ enum obd_cleanup_stage {
#define KEY_INTERMDS "inter_mds"
#define KEY_LAST_ID "last_id"
#define KEY_LAST_FID "last_fid"
-#define KEY_LOVDESC "lovdesc"
#define KEY_MAX_EASIZE "max_easize"
#define KEY_DEFAULT_EASIZE "default_easize"
#define KEY_MGSSEC "mgssec"
@@ -720,22 +659,6 @@ enum obd_cleanup_stage {
struct lu_context;
-/* /!\ must be coherent with include/linux/namei.h on patched kernel */
-#define IT_OPEN (1 << 0)
-#define IT_CREAT (1 << 1)
-#define IT_READDIR (1 << 2)
-#define IT_GETATTR (1 << 3)
-#define IT_LOOKUP (1 << 4)
-#define IT_UNLINK (1 << 5)
-#define IT_TRUNC (1 << 6)
-#define IT_GETXATTR (1 << 7)
-#define IT_EXEC (1 << 8)
-#define IT_PIN (1 << 9)
-#define IT_LAYOUT (1 << 10)
-#define IT_QUOTA_DQACQ (1 << 11)
-#define IT_QUOTA_CONN (1 << 12)
-#define IT_SETXATTR (1 << 13)
-
static inline int it_to_lock_mode(struct lookup_intent *it)
{
/* CREAT needs to be tested before open (both could be set) */
@@ -755,6 +678,14 @@ static inline int it_to_lock_mode(struct lookup_intent *it)
return -EINVAL;
}
+enum md_op_flags {
+ MF_MDC_CANCEL_FID1 = BIT(0),
+ MF_MDC_CANCEL_FID2 = BIT(1),
+ MF_MDC_CANCEL_FID3 = BIT(2),
+ MF_MDC_CANCEL_FID4 = BIT(3),
+ MF_GET_MDT_IDX = BIT(4),
+};
+
enum md_cli_flags {
CLI_SET_MEA = BIT(0),
CLI_RM_ENTRY = BIT(1),
@@ -789,8 +720,6 @@ struct md_op_data {
__u64 op_valid;
loff_t op_attr_blocks;
- /* Size-on-MDS epoch and flags. */
- __u64 op_ioepoch;
__u32 op_flags;
/* Various operation flags. */
@@ -839,15 +768,13 @@ struct obd_ops {
int (*iocontrol)(unsigned int cmd, struct obd_export *exp, int len,
void *karg, void __user *uarg);
int (*get_info)(const struct lu_env *env, struct obd_export *,
- __u32 keylen, void *key, __u32 *vallen, void *val,
- struct lov_stripe_md *lsm);
+ __u32 keylen, void *key, __u32 *vallen, void *val);
int (*set_info_async)(const struct lu_env *, struct obd_export *,
__u32 keylen, void *key,
__u32 vallen, void *val,
struct ptlrpc_request_set *set);
int (*setup)(struct obd_device *dev, struct lustre_cfg *cfg);
- int (*precleanup)(struct obd_device *dev,
- enum obd_cleanup_stage cleanup_stage);
+ int (*precleanup)(struct obd_device *dev);
int (*cleanup)(struct obd_device *dev);
int (*process_config)(struct obd_device *dev, u32 len, void *data);
int (*postrecov)(struct obd_device *dev);
@@ -887,35 +814,23 @@ struct obd_ops {
struct obd_statfs *osfs, __u64 max_age, __u32 flags);
int (*statfs_async)(struct obd_export *exp, struct obd_info *oinfo,
__u64 max_age, struct ptlrpc_request_set *set);
- int (*packmd)(struct obd_export *exp, struct lov_mds_md **disk_tgt,
- struct lov_stripe_md *mem_src);
- int (*unpackmd)(struct obd_export *exp,
- struct lov_stripe_md **mem_tgt,
- struct lov_mds_md *disk_src, int disk_len);
int (*create)(const struct lu_env *env, struct obd_export *exp,
- struct obdo *oa, struct obd_trans_info *oti);
+ struct obdo *oa);
int (*destroy)(const struct lu_env *env, struct obd_export *exp,
- struct obdo *oa, struct obd_trans_info *oti);
+ struct obdo *oa);
int (*setattr)(const struct lu_env *, struct obd_export *exp,
- struct obd_info *oinfo, struct obd_trans_info *oti);
- int (*setattr_async)(struct obd_export *exp, struct obd_info *oinfo,
- struct obd_trans_info *oti,
- struct ptlrpc_request_set *rqset);
+ struct obdo *oa);
int (*getattr)(const struct lu_env *env, struct obd_export *exp,
- struct obd_info *oinfo);
- int (*getattr_async)(struct obd_export *exp, struct obd_info *oinfo,
- struct ptlrpc_request_set *set);
+ struct obdo *oa);
int (*preprw)(const struct lu_env *env, int cmd,
struct obd_export *exp, struct obdo *oa, int objcount,
struct obd_ioobj *obj, struct niobuf_remote *remote,
- int *nr_pages, struct niobuf_local *local,
- struct obd_trans_info *oti);
+ int *nr_pages, struct niobuf_local *local);
int (*commitrw)(const struct lu_env *env, int cmd,
struct obd_export *exp, struct obdo *oa,
int objcount, struct obd_ioobj *obj,
struct niobuf_remote *remote, int pages,
- struct niobuf_local *local,
- struct obd_trans_info *oti, int rc);
+ struct niobuf_local *local, int rc);
int (*init_export)(struct obd_export *exp);
int (*destroy_export)(struct obd_export *exp);
@@ -930,8 +845,6 @@ struct obd_ops {
struct obd_uuid *(*get_uuid)(struct obd_export *exp);
/* quota methods */
- int (*quotacheck)(struct obd_device *, struct obd_export *,
- struct obd_quotactl *);
int (*quotactl)(struct obd_device *, struct obd_export *,
struct obd_quotactl *);
@@ -954,7 +867,7 @@ struct obd_ops {
/* lmv structures */
struct lustre_md {
struct mdt_body *body;
- struct lov_stripe_md *lsm;
+ struct lu_buf layout;
struct lmv_stripe_md *lmv;
#ifdef CONFIG_FS_POSIX_ACL
struct posix_acl *posix_acl;
@@ -992,10 +905,8 @@ struct md_ops {
int (*create)(struct obd_export *, struct md_op_data *,
const void *, size_t, umode_t, uid_t, gid_t,
cfs_cap_t, __u64, struct ptlrpc_request **);
- int (*done_writing)(struct obd_export *, struct md_op_data *,
- struct md_open_data *);
int (*enqueue)(struct obd_export *, struct ldlm_enqueue_info *,
- const ldlm_policy_data_t *,
+ const union ldlm_policy_data *,
struct lookup_intent *, struct md_op_data *,
struct lustre_handle *, __u64);
int (*getattr)(struct obd_export *, struct md_op_data *,
@@ -1012,8 +923,7 @@ struct md_ops {
const char *, size_t, const char *, size_t,
struct ptlrpc_request **);
int (*setattr)(struct obd_export *, struct md_op_data *, void *,
- size_t, void *, size_t, struct ptlrpc_request **,
- struct md_open_data **mod);
+ size_t, struct ptlrpc_request **);
int (*sync)(struct obd_export *, const struct lu_fid *,
struct ptlrpc_request **);
int (*read_page)(struct obd_export *, struct md_op_data *,
@@ -1030,7 +940,7 @@ struct md_ops {
u64, const char *, const char *, int, int, int,
struct ptlrpc_request **);
- int (*init_ea_size)(struct obd_export *, u32, u32, u32, u32);
+ int (*init_ea_size)(struct obd_export *, u32, u32);
int (*get_lustre_md)(struct obd_export *, struct ptlrpc_request *,
struct obd_export *, struct obd_export *,
@@ -1052,11 +962,11 @@ struct md_ops {
enum ldlm_mode (*lock_match)(struct obd_export *, __u64,
const struct lu_fid *, enum ldlm_type,
- ldlm_policy_data_t *, enum ldlm_mode,
+ union ldlm_policy_data *, enum ldlm_mode,
struct lustre_handle *);
int (*cancel_unused)(struct obd_export *, const struct lu_fid *,
- ldlm_policy_data_t *, enum ldlm_mode,
+ union ldlm_policy_data *, enum ldlm_mode,
enum ldlm_cancel_flags flags, void *opaque);
int (*get_fid_from_lsm)(struct obd_export *,
@@ -1071,6 +981,8 @@ struct md_ops {
int (*revalidate_lock)(struct obd_export *, struct lookup_intent *,
struct lu_fid *, __u64 *bits);
+ int (*unpackmd)(struct obd_export *exp, struct lmv_stripe_md **plsm,
+ const union lmv_mds_md *lmv, size_t lmv_size);
/*
* NOTE: If adding ops, add another LPROCFS_MD_OP_INIT() line to
* lprocfs_alloc_md_stats() in obdclass/lprocfs_status.c. Also, add a
@@ -1078,33 +990,6 @@ struct md_ops {
*/
};
-struct lsm_operations {
- void (*lsm_free)(struct lov_stripe_md *);
- void (*lsm_stripe_by_index)(struct lov_stripe_md *, int *, u64 *,
- u64 *);
- void (*lsm_stripe_by_offset)(struct lov_stripe_md *, int *, u64 *,
- u64 *);
- int (*lsm_lmm_verify)(struct lov_mds_md *lmm, int lmm_bytes,
- __u16 *stripe_count);
- int (*lsm_unpackmd)(struct lov_obd *lov, struct lov_stripe_md *lsm,
- struct lov_mds_md *lmm);
-};
-
-extern const struct lsm_operations lsm_v1_ops;
-extern const struct lsm_operations lsm_v3_ops;
-static inline const struct lsm_operations *lsm_op_find(int magic)
-{
- switch (magic) {
- case LOV_MAGIC_V1:
- return &lsm_v1_ops;
- case LOV_MAGIC_V3:
- return &lsm_v3_ops;
- default:
- CERROR("Cannot recognize lsm_magic %08x\n", magic);
- return NULL;
- }
-}
-
static inline struct md_open_data *obd_mod_alloc(void)
{
struct md_open_data *mod;
diff --git a/drivers/staging/lustre/lustre/include/obd_class.h b/drivers/staging/lustre/lustre/include/obd_class.h
index 16094dbec08b..7ec25202cd22 100644
--- a/drivers/staging/lustre/lustre/include/obd_class.h
+++ b/drivers/staging/lustre/lustre/include/obd_class.h
@@ -100,6 +100,13 @@ int obd_get_request_slot(struct client_obd *cli);
void obd_put_request_slot(struct client_obd *cli);
__u32 obd_get_max_rpcs_in_flight(struct client_obd *cli);
int obd_set_max_rpcs_in_flight(struct client_obd *cli, __u32 max);
+int obd_set_max_mod_rpcs_in_flight(struct client_obd *cli, u16 max);
+int obd_mod_rpc_stats_seq_show(struct client_obd *cli, struct seq_file *seq);
+
+u16 obd_get_mod_rpc_slot(struct client_obd *cli, u32 opc,
+ struct lookup_intent *it);
+void obd_put_mod_rpc_slot(struct client_obd *cli, u32 opc,
+ struct lookup_intent *it, u16 tag);
struct llog_handle;
struct llog_rec_hdr;
@@ -175,10 +182,13 @@ struct lustre_profile {
char *lp_profile;
char *lp_dt;
char *lp_md;
+ int lp_refs;
+ bool lp_list_deleted;
};
struct lustre_profile *class_get_profile(const char *prof);
void class_del_profile(const char *prof);
+void class_put_profile(struct lustre_profile *lprof);
void class_del_profiles(void);
#if LUSTRE_TRACKS_LOCK_EXP_REFS
@@ -269,10 +279,8 @@ static inline int lprocfs_climp_check(struct obd_device *obd)
struct inode;
struct lu_attr;
struct obdo;
-void obdo_refresh_inode(struct inode *dst, const struct obdo *src, u32 valid);
void obdo_to_ioobj(const struct obdo *oa, struct obd_ioobj *ioobj);
-void md_from_obdo(struct md_op_data *op_data, const struct obdo *oa, u32 valid);
#define OBT(dev) (dev)->obd_type
#define OBP(dev, op) (dev)->obd_type->typ_dt_ops->op
@@ -417,16 +425,14 @@ static inline int class_devno_max(void)
static inline int obd_get_info(const struct lu_env *env,
struct obd_export *exp, __u32 keylen,
- void *key, __u32 *vallen, void *val,
- struct lov_stripe_md *lsm)
+ void *key, __u32 *vallen, void *val)
{
int rc;
EXP_CHECK_DT_OP(exp, get_info);
EXP_COUNTER_INCREMENT(exp, get_info);
- rc = OBP(exp->exp_obd, get_info)(env, exp, keylen, key, vallen, val,
- lsm);
+ rc = OBP(exp->exp_obd, get_info)(env, exp, keylen, key, vallen, val);
return rc;
}
@@ -505,8 +511,7 @@ static inline int obd_setup(struct obd_device *obd, struct lustre_cfg *cfg)
return rc;
}
-static inline int obd_precleanup(struct obd_device *obd,
- enum obd_cleanup_stage cleanup_stage)
+static inline int obd_precleanup(struct obd_device *obd)
{
int rc;
DECLARE_LU_VARS(ldt, d);
@@ -517,20 +522,18 @@ static inline int obd_precleanup(struct obd_device *obd,
ldt = obd->obd_type->typ_lu;
d = obd->obd_lu_dev;
if (ldt && d) {
- if (cleanup_stage == OBD_CLEANUP_EXPORTS) {
- struct lu_env env;
+ struct lu_env env;
- rc = lu_env_init(&env, ldt->ldt_ctx_tags);
- if (rc == 0) {
- ldt->ldt_ops->ldto_device_fini(&env, d);
- lu_env_fini(&env);
- }
+ rc = lu_env_init(&env, ldt->ldt_ctx_tags);
+ if (!rc) {
+ ldt->ldt_ops->ldto_device_fini(&env, d);
+ lu_env_fini(&env);
}
}
OBD_CHECK_DT_OP(obd, precleanup, 0);
OBD_COUNTER_INCREMENT(obd, precleanup);
- rc = OBP(obd, precleanup)(obd, cleanup_stage);
+ rc = OBP(obd, precleanup)(obd);
return rc;
}
@@ -612,181 +615,51 @@ obd_process_config(struct obd_device *obd, int datalen, void *data)
return rc;
}
-/* Pack an in-memory MD struct for storage on disk.
- * Returns +ve size of packed MD (0 for free), or -ve error.
- *
- * If @disk_tgt == NULL, MD size is returned (max size if @mem_src == NULL).
- * If @*disk_tgt != NULL and @mem_src == NULL, @*disk_tgt will be freed.
- * If @*disk_tgt == NULL, it will be allocated
- */
-static inline int obd_packmd(struct obd_export *exp,
- struct lov_mds_md **disk_tgt,
- struct lov_stripe_md *mem_src)
-{
- int rc;
-
- EXP_CHECK_DT_OP(exp, packmd);
- EXP_COUNTER_INCREMENT(exp, packmd);
-
- rc = OBP(exp->exp_obd, packmd)(exp, disk_tgt, mem_src);
- return rc;
-}
-
-static inline int obd_size_diskmd(struct obd_export *exp,
- struct lov_stripe_md *mem_src)
-{
- return obd_packmd(exp, NULL, mem_src);
-}
-
-static inline int obd_free_diskmd(struct obd_export *exp,
- struct lov_mds_md **disk_tgt)
-{
- LASSERT(disk_tgt);
- LASSERT(*disk_tgt);
- /*
- * LU-2590, for caller's convenience, *disk_tgt could be host
- * endianness, it needs swab to LE if necessary, while just
- * lov_mds_md header needs it for figuring out how much memory
- * needs to be freed.
- */
- if ((cpu_to_le32(LOV_MAGIC) != LOV_MAGIC) &&
- (((*disk_tgt)->lmm_magic == LOV_MAGIC_V1) ||
- ((*disk_tgt)->lmm_magic == LOV_MAGIC_V3)))
- lustre_swab_lov_mds_md(*disk_tgt);
- return obd_packmd(exp, disk_tgt, NULL);
-}
-
-/* Unpack an MD struct from disk to in-memory format.
- * Returns +ve size of unpacked MD (0 for free), or -ve error.
- *
- * If @mem_tgt == NULL, MD size is returned (max size if @disk_src == NULL).
- * If @*mem_tgt != NULL and @disk_src == NULL, @*mem_tgt will be freed.
- * If @*mem_tgt == NULL, it will be allocated
- */
-static inline int obd_unpackmd(struct obd_export *exp,
- struct lov_stripe_md **mem_tgt,
- struct lov_mds_md *disk_src,
- int disk_len)
-{
- int rc;
-
- EXP_CHECK_DT_OP(exp, unpackmd);
- EXP_COUNTER_INCREMENT(exp, unpackmd);
-
- rc = OBP(exp->exp_obd, unpackmd)(exp, mem_tgt, disk_src, disk_len);
- return rc;
-}
-
-static inline int obd_free_memmd(struct obd_export *exp,
- struct lov_stripe_md **mem_tgt)
-{
- int rc;
-
- LASSERT(mem_tgt);
- LASSERT(*mem_tgt);
- rc = obd_unpackmd(exp, mem_tgt, NULL, 0);
- *mem_tgt = NULL;
- return rc;
-}
-
static inline int obd_create(const struct lu_env *env, struct obd_export *exp,
- struct obdo *obdo, struct obd_trans_info *oti)
+ struct obdo *obdo)
{
int rc;
EXP_CHECK_DT_OP(exp, create);
EXP_COUNTER_INCREMENT(exp, create);
- rc = OBP(exp->exp_obd, create)(env, exp, obdo, oti);
+ rc = OBP(exp->exp_obd, create)(env, exp, obdo);
return rc;
}
static inline int obd_destroy(const struct lu_env *env, struct obd_export *exp,
- struct obdo *obdo, struct obd_trans_info *oti)
+ struct obdo *obdo)
{
int rc;
EXP_CHECK_DT_OP(exp, destroy);
EXP_COUNTER_INCREMENT(exp, destroy);
- rc = OBP(exp->exp_obd, destroy)(env, exp, obdo, oti);
+ rc = OBP(exp->exp_obd, destroy)(env, exp, obdo);
return rc;
}
static inline int obd_getattr(const struct lu_env *env, struct obd_export *exp,
- struct obd_info *oinfo)
+ struct obdo *oa)
{
int rc;
EXP_CHECK_DT_OP(exp, getattr);
EXP_COUNTER_INCREMENT(exp, getattr);
- rc = OBP(exp->exp_obd, getattr)(env, exp, oinfo);
- return rc;
-}
-
-static inline int obd_getattr_async(struct obd_export *exp,
- struct obd_info *oinfo,
- struct ptlrpc_request_set *set)
-{
- int rc;
-
- EXP_CHECK_DT_OP(exp, getattr_async);
- EXP_COUNTER_INCREMENT(exp, getattr_async);
-
- rc = OBP(exp->exp_obd, getattr_async)(exp, oinfo, set);
+ rc = OBP(exp->exp_obd, getattr)(env, exp, oa);
return rc;
}
static inline int obd_setattr(const struct lu_env *env, struct obd_export *exp,
- struct obd_info *oinfo,
- struct obd_trans_info *oti)
+ struct obdo *oa)
{
int rc;
EXP_CHECK_DT_OP(exp, setattr);
EXP_COUNTER_INCREMENT(exp, setattr);
- rc = OBP(exp->exp_obd, setattr)(env, exp, oinfo, oti);
- return rc;
-}
-
-/* This performs all the requests set init/wait/destroy actions. */
-static inline int obd_setattr_rqset(struct obd_export *exp,
- struct obd_info *oinfo,
- struct obd_trans_info *oti)
-{
- struct ptlrpc_request_set *set = NULL;
- int rc;
-
- EXP_CHECK_DT_OP(exp, setattr_async);
- EXP_COUNTER_INCREMENT(exp, setattr_async);
-
- set = ptlrpc_prep_set();
- if (!set)
- return -ENOMEM;
-
- rc = OBP(exp->exp_obd, setattr_async)(exp, oinfo, oti, set);
- if (rc == 0)
- rc = ptlrpc_set_wait(set);
- ptlrpc_set_destroy(set);
- return rc;
-}
-
-/* This adds all the requests into @set if @set != NULL, otherwise
- * all requests are sent asynchronously without waiting for response.
- */
-static inline int obd_setattr_async(struct obd_export *exp,
- struct obd_info *oinfo,
- struct obd_trans_info *oti,
- struct ptlrpc_request_set *set)
-{
- int rc;
-
- EXP_CHECK_DT_OP(exp, setattr_async);
- EXP_COUNTER_INCREMENT(exp, setattr_async);
-
- rc = OBP(exp->exp_obd, setattr_async)(exp, oinfo, oti, set);
+ rc = OBP(exp->exp_obd, setattr)(env, exp, oa);
return rc;
}
@@ -1053,15 +926,16 @@ static inline int obd_statfs_rqset(struct obd_export *exp,
__u32 flags)
{
struct ptlrpc_request_set *set = NULL;
- struct obd_info oinfo = { };
+ struct obd_info oinfo = {
+ .oi_osfs = osfs,
+ .oi_flags = flags,
+ };
int rc = 0;
- set = ptlrpc_prep_set();
+ set = ptlrpc_prep_set();
if (!set)
return -ENOMEM;
- oinfo.oi_osfs = osfs;
- oinfo.oi_flags = flags;
rc = obd_statfs_async(exp, &oinfo, max_age, set);
if (rc == 0)
rc = ptlrpc_set_wait(set);
@@ -1112,8 +986,7 @@ static inline int obd_preprw(const struct lu_env *env, int cmd,
struct obd_export *exp, struct obdo *oa,
int objcount, struct obd_ioobj *obj,
struct niobuf_remote *remote, int *pages,
- struct niobuf_local *local,
- struct obd_trans_info *oti)
+ struct niobuf_local *local)
{
int rc;
@@ -1121,7 +994,7 @@ static inline int obd_preprw(const struct lu_env *env, int cmd,
EXP_COUNTER_INCREMENT(exp, preprw);
rc = OBP(exp->exp_obd, preprw)(env, cmd, exp, oa, objcount, obj, remote,
- pages, local, oti);
+ pages, local);
return rc;
}
@@ -1129,14 +1002,13 @@ static inline int obd_commitrw(const struct lu_env *env, int cmd,
struct obd_export *exp, struct obdo *oa,
int objcount, struct obd_ioobj *obj,
struct niobuf_remote *rnb, int pages,
- struct niobuf_local *local,
- struct obd_trans_info *oti, int rc)
+ struct niobuf_local *local, int rc)
{
EXP_CHECK_DT_OP(exp, commitrw);
EXP_COUNTER_INCREMENT(exp, commitrw);
rc = OBP(exp->exp_obd, commitrw)(env, cmd, exp, oa, objcount, obj,
- rnb, pages, local, oti, rc);
+ rnb, pages, local, rc);
return rc;
}
@@ -1219,18 +1091,6 @@ static inline int obd_notify_observer(struct obd_device *observer,
return rc1 ? rc1 : rc2;
}
-static inline int obd_quotacheck(struct obd_export *exp,
- struct obd_quotactl *oqctl)
-{
- int rc;
-
- EXP_CHECK_DT_OP(exp, quotacheck);
- EXP_COUNTER_INCREMENT(exp, quotacheck);
-
- rc = OBP(exp->exp_obd, quotacheck)(exp->exp_obd, exp, oqctl);
- return rc;
-}
-
static inline int obd_quotactl(struct obd_export *exp,
struct obd_quotactl *oqctl)
{
@@ -1346,21 +1206,9 @@ static inline int md_create(struct obd_export *exp, struct md_op_data *op_data,
return rc;
}
-static inline int md_done_writing(struct obd_export *exp,
- struct md_op_data *op_data,
- struct md_open_data *mod)
-{
- int rc;
-
- EXP_CHECK_MD_OP(exp, done_writing);
- EXP_MD_COUNTER_INCREMENT(exp, done_writing);
- rc = MDP(exp->exp_obd, done_writing)(exp, op_data, mod);
- return rc;
-}
-
static inline int md_enqueue(struct obd_export *exp,
struct ldlm_enqueue_info *einfo,
- const ldlm_policy_data_t *policy,
+ const union ldlm_policy_data *policy,
struct lookup_intent *it,
struct md_op_data *op_data,
struct lustre_handle *lockh,
@@ -1428,16 +1276,14 @@ static inline int md_rename(struct obd_export *exp, struct md_op_data *op_data,
}
static inline int md_setattr(struct obd_export *exp, struct md_op_data *op_data,
- void *ea, size_t ealen, void *ea2, size_t ea2len,
- struct ptlrpc_request **request,
- struct md_open_data **mod)
+ void *ea, size_t ealen,
+ struct ptlrpc_request **request)
{
int rc;
EXP_CHECK_MD_OP(exp, setattr);
EXP_MD_COUNTER_INCREMENT(exp, setattr);
- rc = MDP(exp->exp_obd, setattr)(exp, op_data, ea, ealen,
- ea2, ea2len, request, mod);
+ rc = MDP(exp->exp_obd, setattr)(exp, op_data, ea, ealen, request);
return rc;
}
@@ -1561,7 +1407,7 @@ static inline int md_set_lock_data(struct obd_export *exp,
static inline int md_cancel_unused(struct obd_export *exp,
const struct lu_fid *fid,
- ldlm_policy_data_t *policy,
+ union ldlm_policy_data *policy,
enum ldlm_mode mode,
enum ldlm_cancel_flags flags,
void *opaque)
@@ -1579,7 +1425,7 @@ static inline int md_cancel_unused(struct obd_export *exp,
static inline enum ldlm_mode md_lock_match(struct obd_export *exp, __u64 flags,
const struct lu_fid *fid,
enum ldlm_type type,
- ldlm_policy_data_t *policy,
+ union ldlm_policy_data *policy,
enum ldlm_mode mode,
struct lustre_handle *lockh)
{
@@ -1589,14 +1435,12 @@ static inline enum ldlm_mode md_lock_match(struct obd_export *exp, __u64 flags,
policy, mode, lockh);
}
-static inline int md_init_ea_size(struct obd_export *exp, int easize,
- int def_asize, int cookiesize,
- int def_cookiesize)
+static inline int md_init_ea_size(struct obd_export *exp, u32 easize,
+ u32 def_asize)
{
EXP_CHECK_MD_OP(exp, init_ea_size);
EXP_MD_COUNTER_INCREMENT(exp, init_ea_size);
- return MDP(exp->exp_obd, init_ea_size)(exp, easize, def_asize,
- cookiesize, def_cookiesize);
+ return MDP(exp->exp_obd, init_ea_size)(exp, easize, def_asize);
}
static inline int md_intent_getattr_async(struct obd_export *exp,
@@ -1636,6 +1480,24 @@ static inline int md_get_fid_from_lsm(struct obd_export *exp,
return rc;
}
+/* Unpack an MD struct from disk to in-memory format.
+ * Returns +ve size of unpacked MD (0 for free), or -ve error.
+ *
+ * If *plsm != NULL and lmm == NULL then *lsm will be freed.
+ * If *plsm == NULL then it will be allocated.
+ */
+static inline int md_unpackmd(struct obd_export *exp,
+ struct lmv_stripe_md **plsm,
+ const union lmv_mds_md *lmm, size_t lmm_size)
+{
+ int rc;
+
+ EXP_CHECK_MD_OP(exp, unpackmd);
+ EXP_MD_COUNTER_INCREMENT(exp, unpackmd);
+ rc = MDP(exp->exp_obd, unpackmd)(exp, plsm, lmm, lmm_size);
+ return rc;
+}
+
/* OBD Metadata Support */
int obd_init_caches(void);
diff --git a/drivers/staging/lustre/lustre/include/obd_support.h b/drivers/staging/lustre/lustre/include/obd_support.h
index b346a7f10aa4..aaedec7d793c 100644
--- a/drivers/staging/lustre/lustre/include/obd_support.h
+++ b/drivers/staging/lustre/lustre/include/obd_support.h
@@ -172,14 +172,14 @@ extern char obd_jobid_var[];
#define OBD_FAIL_MDS_ALL_REQUEST_NET 0x123
#define OBD_FAIL_MDS_SYNC_NET 0x124
#define OBD_FAIL_MDS_SYNC_PACK 0x125
-#define OBD_FAIL_MDS_DONE_WRITING_NET 0x126
-#define OBD_FAIL_MDS_DONE_WRITING_PACK 0x127
+/* OBD_FAIL_MDS_DONE_WRITING_NET 0x126 obsolete since 2.8.0 */
+/* OBD_FAIL_MDS_DONE_WRITING_PACK 0x127 obsolete since 2.8.0 */
#define OBD_FAIL_MDS_ALLOC_OBDO 0x128
#define OBD_FAIL_MDS_PAUSE_OPEN 0x129
#define OBD_FAIL_MDS_STATFS_LCW_SLEEP 0x12a
#define OBD_FAIL_MDS_OPEN_CREATE 0x12b
#define OBD_FAIL_MDS_OST_SETATTR 0x12c
-#define OBD_FAIL_MDS_QUOTACHECK_NET 0x12d
+/* OBD_FAIL_MDS_QUOTACHECK_NET 0x12d obsolete since 2.4 */
#define OBD_FAIL_MDS_QUOTACTL_NET 0x12e
#define OBD_FAIL_MDS_CLIENT_ADD 0x12f
#define OBD_FAIL_MDS_GETXATTR_NET 0x130
@@ -264,7 +264,7 @@ extern char obd_jobid_var[];
#define OBD_FAIL_OST_ENOSPC 0x215
#define OBD_FAIL_OST_EROFS 0x216
#define OBD_FAIL_OST_ENOENT 0x217
-#define OBD_FAIL_OST_QUOTACHECK_NET 0x218
+/* OBD_FAIL_OST_QUOTACHECK_NET 0x218 obsolete since 2.4 */
#define OBD_FAIL_OST_QUOTACTL_NET 0x219
#define OBD_FAIL_OST_CHECKSUM_RECEIVE 0x21a
#define OBD_FAIL_OST_CHECKSUM_SEND 0x21b
@@ -321,6 +321,8 @@ extern char obd_jobid_var[];
#define OBD_FAIL_LDLM_CP_CB_WAIT4 0x322
#define OBD_FAIL_LDLM_CP_CB_WAIT5 0x323
+#define OBD_FAIL_LDLM_GRANT_CHECK 0x32a
+
/* LOCKLESS IO */
#define OBD_FAIL_LDLM_SET_CONTENTION 0x385
@@ -343,6 +345,7 @@ extern char obd_jobid_var[];
#define OBD_FAIL_OSC_CP_ENQ_RACE 0x410
#define OBD_FAIL_OSC_NO_GRANT 0x411
#define OBD_FAIL_OSC_DELAY_SETTIME 0x412
+#define OBD_FAIL_OSC_DELAY_IO 0x414
#define OBD_FAIL_PTLRPC 0x500
#define OBD_FAIL_PTLRPC_ACK 0x501
@@ -373,7 +376,7 @@ extern char obd_jobid_var[];
#define OBD_FAIL_OBD_PING_NET 0x600
#define OBD_FAIL_OBD_LOG_CANCEL_NET 0x601
#define OBD_FAIL_OBD_LOGD_NET 0x602
-#define OBD_FAIL_OBD_QC_CALLBACK_NET 0x603
+/* OBD_FAIL_OBD_QC_CALLBACK_NET 0x603 obsolete since 2.4 */
#define OBD_FAIL_OBD_DQACQ 0x604
#define OBD_FAIL_OBD_LLOG_SETUP 0x605
#define OBD_FAIL_OBD_LOG_CANCEL_REP 0x606
@@ -458,6 +461,8 @@ extern char obd_jobid_var[];
#define OBD_FAIL_LOV_INIT 0x1403
#define OBD_FAIL_GLIMPSE_DELAY 0x1404
#define OBD_FAIL_LLITE_XATTR_ENOMEM 0x1405
+#define OBD_FAIL_MAKE_LOVEA_HOLE 0x1406
+#define OBD_FAIL_LLITE_LOST_LAYOUT 0x1407
#define OBD_FAIL_GETATTR_DELAY 0x1409
#define OBD_FAIL_FID_INDIR 0x1501
diff --git a/drivers/staging/lustre/lustre/include/seq_range.h b/drivers/staging/lustre/lustre/include/seq_range.h
new file mode 100644
index 000000000000..30c4dd66d5c4
--- /dev/null
+++ b/drivers/staging/lustre/lustre/include/seq_range.h
@@ -0,0 +1,199 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.gnu.org/licenses/gpl-2.0.html
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2014, Intel Corporation.
+ *
+ * Copyright 2015 Cray Inc, all rights reserved.
+ * Author: Ben Evans.
+ *
+ * Define lu_seq_range associated functions
+ */
+
+#ifndef _SEQ_RANGE_H_
+#define _SEQ_RANGE_H_
+
+#include "lustre/lustre_idl.h"
+
+/**
+ * computes the sequence range type \a range
+ */
+
+static inline unsigned int fld_range_type(const struct lu_seq_range *range)
+{
+ return range->lsr_flags & LU_SEQ_RANGE_MASK;
+}
+
+/**
+ * Is this sequence range an OST? \a range
+ */
+
+static inline bool fld_range_is_ost(const struct lu_seq_range *range)
+{
+ return fld_range_type(range) == LU_SEQ_RANGE_OST;
+}
+
+/**
+ * Is this sequence range an MDT? \a range
+ */
+
+static inline bool fld_range_is_mdt(const struct lu_seq_range *range)
+{
+ return fld_range_type(range) == LU_SEQ_RANGE_MDT;
+}
+
+/**
+ * ANY range is only used when the fld client sends a fld query request,
+ * but it does not know whether the seq is an MDT or OST, so it will send the
+ * request with ANY type, which means any seq type from the lookup can be
+ * expected. /a range
+ */
+static inline unsigned int fld_range_is_any(const struct lu_seq_range *range)
+{
+ return fld_range_type(range) == LU_SEQ_RANGE_ANY;
+}
+
+/**
+ * Apply flags to range \a range \a flags
+ */
+
+static inline void fld_range_set_type(struct lu_seq_range *range,
+ unsigned int flags)
+{
+ range->lsr_flags |= flags;
+}
+
+/**
+ * Add MDT to range type \a range
+ */
+
+static inline void fld_range_set_mdt(struct lu_seq_range *range)
+{
+ fld_range_set_type(range, LU_SEQ_RANGE_MDT);
+}
+
+/**
+ * Add OST to range type \a range
+ */
+
+static inline void fld_range_set_ost(struct lu_seq_range *range)
+{
+ fld_range_set_type(range, LU_SEQ_RANGE_OST);
+}
+
+/**
+ * Add ANY to range type \a range
+ */
+
+static inline void fld_range_set_any(struct lu_seq_range *range)
+{
+ fld_range_set_type(range, LU_SEQ_RANGE_ANY);
+}
+
+/**
+ * computes width of given sequence range \a range
+ */
+
+static inline u64 lu_seq_range_space(const struct lu_seq_range *range)
+{
+ return range->lsr_end - range->lsr_start;
+}
+
+/**
+ * initialize range to zero \a range
+ */
+
+static inline void lu_seq_range_init(struct lu_seq_range *range)
+{
+ memset(range, 0, sizeof(*range));
+}
+
+/**
+ * check if given seq id \a s is within given range \a range
+ */
+
+static inline bool lu_seq_range_within(const struct lu_seq_range *range,
+ u64 seq)
+{
+ return seq >= range->lsr_start && seq < range->lsr_end;
+}
+
+/**
+ * Is the range sane? Is the end after the beginning? \a range
+ */
+
+static inline bool lu_seq_range_is_sane(const struct lu_seq_range *range)
+{
+ return range->lsr_end >= range->lsr_start;
+}
+
+/**
+ * Is the range 0? \a range
+ */
+
+static inline bool lu_seq_range_is_zero(const struct lu_seq_range *range)
+{
+ return range->lsr_start == 0 && range->lsr_end == 0;
+}
+
+/**
+ * Is the range out of space? \a range
+ */
+
+static inline bool lu_seq_range_is_exhausted(const struct lu_seq_range *range)
+{
+ return lu_seq_range_space(range) == 0;
+}
+
+/**
+ * return 0 if two ranges have the same location, nonzero if they are
+ * different \a r1 \a r2
+ */
+
+static inline int lu_seq_range_compare_loc(const struct lu_seq_range *r1,
+ const struct lu_seq_range *r2)
+{
+ return r1->lsr_index != r2->lsr_index ||
+ r1->lsr_flags != r2->lsr_flags;
+}
+
+#if !defined(__REQ_LAYOUT_USER__)
+/**
+ * byte swap range structure \a range
+ */
+
+void lustre_swab_lu_seq_range(struct lu_seq_range *range);
+#endif
+/**
+ * printf string and argument list for sequence range
+ */
+#define DRANGE "[%#16.16llx-%#16.16llx]:%x:%s"
+
+#define PRANGE(range) \
+ (range)->lsr_start, \
+ (range)->lsr_end, \
+ (range)->lsr_index, \
+ fld_range_is_mdt(range) ? "mdt" : "ost"
+
+#endif