diff options
Diffstat (limited to 'drivers/staging/lustre/lustre/include')
27 files changed, 1810 insertions, 1900 deletions
diff --git a/drivers/staging/lustre/lustre/include/cl_object.h b/drivers/staging/lustre/lustre/include/cl_object.h index 89292c93dcd5..dc685610c4c4 100644 --- a/drivers/staging/lustre/lustre/include/cl_object.h +++ b/drivers/staging/lustre/lustre/include/cl_object.h @@ -59,10 +59,6 @@ * read/write system call it is associated with the single user * thread, that issued the system call). * - * - cl_req represents a collection of pages for a transfer. cl_req is - * constructed by req-forming engine that tries to saturate - * transport with large and continuous transfers. - * * Terminology * * - to avoid confusion high-level I/O operation like read or write system @@ -103,11 +99,8 @@ struct inode; struct cl_device; -struct cl_device_operations; struct cl_object; -struct cl_object_page_operations; -struct cl_object_lock_operations; struct cl_page; struct cl_page_slice; @@ -120,27 +113,7 @@ struct cl_page_operations; struct cl_io; struct cl_io_slice; -struct cl_req; -struct cl_req_slice; - -/** - * Operations for each data device in the client stack. - * - * \see vvp_cl_ops, lov_cl_ops, lovsub_cl_ops, osc_cl_ops - */ -struct cl_device_operations { - /** - * Initialize cl_req. This method is called top-to-bottom on all - * devices in the stack to get them a chance to allocate layer-private - * data, and to attach them to the cl_req by calling - * cl_req_slice_add(). - * - * \see osc_req_init(), lov_req_init(), lovsub_req_init() - * \see vvp_req_init() - */ - int (*cdo_req_init)(const struct lu_env *env, struct cl_device *dev, - struct cl_req *req); -}; +struct cl_req_attr; /** * Device in the client stack. @@ -150,8 +123,6 @@ struct cl_device_operations { struct cl_device { /** Super-class. */ struct lu_device cd_lu_dev; - /** Per-layer operation vector. */ - const struct cl_device_operations *cd_ops; }; /** \addtogroup cl_object cl_object @@ -267,7 +238,7 @@ struct cl_object_conf { /** * Object layout. This is consumed by lov. */ - struct lustre_md *coc_md; + struct lu_buf coc_layout; /** * Description of particular stripe location in the * cluster. This is consumed by osc. @@ -301,6 +272,26 @@ enum { OBJECT_CONF_WAIT = 2 }; +enum { + CL_LAYOUT_GEN_NONE = (u32)-2, /* layout lock was cancelled */ + CL_LAYOUT_GEN_EMPTY = (u32)-1, /* for empty layout */ +}; + +struct cl_layout { + /** the buffer to return the layout in lov_mds_md format. */ + struct lu_buf cl_buf; + /** size of layout in lov_mds_md format. */ + size_t cl_size; + /** Layout generation. */ + u32 cl_layout_gen; + /** + * True if this is a released file. + * Temporarily added for released file truncate in ll_setattr_raw(). + * It will be removed later. -Jinshan + */ + bool cl_is_released; +}; + /** * Operations implemented for each cl object layer. * @@ -400,6 +391,27 @@ struct cl_object_operations { */ int (*coo_getstripe)(const struct lu_env *env, struct cl_object *obj, struct lov_user_md __user *lum); + /** + * Get FIEMAP mapping from the object. + */ + int (*coo_fiemap)(const struct lu_env *env, struct cl_object *obj, + struct ll_fiemap_info_key *fmkey, + struct fiemap *fiemap, size_t *buflen); + /** + * Get layout and generation of the object. + */ + int (*coo_layout_get)(const struct lu_env *env, struct cl_object *obj, + struct cl_layout *layout); + /** + * Get maximum size of the object. + */ + loff_t (*coo_maxbytes)(struct cl_object *obj); + /** + * Set request attributes. + */ + void (*coo_req_attr_set)(const struct lu_env *env, + struct cl_object *obj, + struct cl_req_attr *attr); }; /** @@ -591,7 +603,7 @@ enum cl_page_state { * * - [cl_page_state::CPS_PAGEOUT] page is dirty, the * req-formation engine decides that it wants to include this page - * into an cl_req being constructed, and yanks it from the cache; + * into an RPC being constructed, and yanks it from the cache; * * - [cl_page_state::CPS_FREEING] VM callback is executed to * evict the page form the memory; @@ -660,7 +672,7 @@ enum cl_page_state { * Page is being read in, as a part of a transfer. This is quite * similar to the cl_page_state::CPS_PAGEOUT state, except that * read-in is always "immediate"---there is no such thing a sudden - * construction of read cl_req from cached, presumably not up to date, + * construction of read request from cached, presumably not up to date, * pages. * * Underlying VM page is locked for the duration of transfer. @@ -714,8 +726,6 @@ struct cl_page { struct list_head cp_batch; /** List of slices. Immutable after creation. */ struct list_head cp_layers; - /** Linkage of pages within cl_req. */ - struct list_head cp_flight; /** * Page state. This field is const to avoid accidental update, it is * modified only internally within cl_page.c. Protected by a VM lock. @@ -732,12 +742,6 @@ struct cl_page { * by sub-io. Protected by a VM lock. */ struct cl_io *cp_owner; - /** - * Owning IO request in cl_page_state::CPS_PAGEOUT and - * cl_page_state::CPS_PAGEIN states. This field is maintained only in - * the top-level pages. Protected by a VM lock. - */ - struct cl_req *cp_req; /** List of references to this page, for debugging. */ struct lu_ref cp_reference; /** Link to an object, for debugging. */ @@ -779,7 +783,6 @@ enum cl_lock_mode { /** * Requested transfer type. - * \ingroup cl_req */ enum cl_req_type { CRT_READ, @@ -884,26 +887,6 @@ struct cl_page_operations { /** Destructor. Frees resources and slice itself. */ void (*cpo_fini)(const struct lu_env *env, struct cl_page_slice *slice); - - /** - * Checks whether the page is protected by a cl_lock. This is a - * per-layer method, because certain layers have ways to check for the - * lock much more efficiently than through the generic locks scan, or - * implement locking mechanisms separate from cl_lock, e.g., - * LL_FILE_GROUP_LOCKED in vvp. If \a pending is true, check for locks - * being canceled, or scheduled for cancellation as soon as the last - * user goes away, too. - * - * \retval -EBUSY: page is protected by a lock of a given mode; - * \retval -ENODATA: page is not protected by a lock; - * \retval 0: this layer cannot decide. - * - * \see cl_page_is_under_lock() - */ - int (*cpo_is_under_lock)(const struct lu_env *env, - const struct cl_page_slice *slice, - struct cl_io *io, pgoff_t *max); - /** * Optional debugging helper. Prints given page slice. * @@ -915,8 +898,7 @@ struct cl_page_operations { /** * \name transfer * - * Transfer methods. See comment on cl_req for a description of - * transfer formation and life-cycle. + * Transfer methods. * * @{ */ @@ -962,7 +944,7 @@ struct cl_page_operations { int ioret); /** * Called when cached page is about to be added to the - * cl_req as a part of req formation. + * ptlrpc request as a part of req formation. * * \return 0 : proceed with this page; * \return -EAGAIN : skip this page; @@ -1365,7 +1347,6 @@ struct cl_2queue { * (3) sort all locks to avoid dead-locks, and acquire them * * (4) process the chunk: call per-page methods - * (cl_io_operations::cio_read_page() for read, * cl_io_operations::cio_prepare_write(), * cl_io_operations::cio_commit_write() for write) * @@ -1388,6 +1369,8 @@ enum cl_io_type { CIT_WRITE, /** truncate, utime system calls */ CIT_SETATTR, + /** get data version */ + CIT_DATA_VERSION, /** * page fault handling */ @@ -1467,6 +1450,31 @@ struct cl_io_slice { typedef void (*cl_commit_cbt)(const struct lu_env *, struct cl_io *, struct cl_page *); + +struct cl_read_ahead { + /* + * Maximum page index the readahead window will end. + * This is determined DLM lock coverage, RPC and stripe boundary. + * cra_end is included. + */ + pgoff_t cra_end; + /* + * Release routine. If readahead holds resources underneath, this + * function should be called to release it. + */ + void (*cra_release)(const struct lu_env *env, void *cbdata); + /* Callback data for cra_release routine */ + void *cra_cbdata; +}; + +static inline void cl_read_ahead_release(const struct lu_env *env, + struct cl_read_ahead *ra) +{ + if (ra->cra_release) + ra->cra_release(env, ra->cra_cbdata); + memset(ra, 0, sizeof(*ra)); +} + /** * Per-layer io operations. * \see vvp_io_ops, lov_io_ops, lovsub_io_ops, osc_io_ops @@ -1573,16 +1581,13 @@ struct cl_io_operations { struct cl_page_list *queue, int from, int to, cl_commit_cbt cb); /** - * Read missing page. - * - * Called by a top-level cl_io_operations::op[CIT_READ]::cio_start() - * method, when it hits not-up-to-date page in the range. Optional. + * Decide maximum read ahead extent * * \pre io->ci_type == CIT_READ */ - int (*cio_read_page)(const struct lu_env *env, - const struct cl_io_slice *slice, - const struct cl_page_slice *page); + int (*cio_read_ahead)(const struct lu_env *env, + const struct cl_io_slice *slice, + pgoff_t start, struct cl_read_ahead *ra); /** * Optional debugging helper. Print given io slice. */ @@ -1765,10 +1770,15 @@ struct cl_io { struct cl_io_rw_common ci_rw; struct cl_setattr_io { struct ost_lvb sa_attr; + unsigned int sa_attr_flags; unsigned int sa_valid; int sa_stripe_index; - struct lu_fid *sa_parent_fid; + const struct lu_fid *sa_parent_fid; } ci_setattr; + struct cl_data_version_io { + u64 dv_data_version; + int dv_flags; + } ci_data_version; struct cl_fault_io { /** page index within file. */ pgoff_t ft_index; @@ -1836,179 +1846,20 @@ struct cl_io { /** @} cl_io */ -/** \addtogroup cl_req cl_req - * @{ - */ -/** \struct cl_req - * Transfer. - * - * There are two possible modes of transfer initiation on the client: - * - * - immediate transfer: this is started when a high level io wants a page - * or a collection of pages to be transferred right away. Examples: - * read-ahead, synchronous read in the case of non-page aligned write, - * page write-out as a part of extent lock cancellation, page write-out - * as a part of memory cleansing. Immediate transfer can be both - * cl_req_type::CRT_READ and cl_req_type::CRT_WRITE; - * - * - opportunistic transfer (cl_req_type::CRT_WRITE only), that happens - * when io wants to transfer a page to the server some time later, when - * it can be done efficiently. Example: pages dirtied by the write(2) - * path. - * - * In any case, transfer takes place in the form of a cl_req, which is a - * representation for a network RPC. - * - * Pages queued for an opportunistic transfer are cached until it is decided - * that efficient RPC can be composed of them. This decision is made by "a - * req-formation engine", currently implemented as a part of osc - * layer. Req-formation depends on many factors: the size of the resulting - * RPC, whether or not multi-object RPCs are supported by the server, - * max-rpc-in-flight limitations, size of the dirty cache, etc. - * - * For the immediate transfer io submits a cl_page_list, that req-formation - * engine slices into cl_req's, possibly adding cached pages to some of - * the resulting req's. - * - * Whenever a page from cl_page_list is added to a newly constructed req, its - * cl_page_operations::cpo_prep() layer methods are called. At that moment, - * page state is atomically changed from cl_page_state::CPS_OWNED to - * cl_page_state::CPS_PAGEOUT or cl_page_state::CPS_PAGEIN, cl_page::cp_owner - * is zeroed, and cl_page::cp_req is set to the - * req. cl_page_operations::cpo_prep() method at the particular layer might - * return -EALREADY to indicate that it does not need to submit this page - * at all. This is possible, for example, if page, submitted for read, - * became up-to-date in the meantime; and for write, the page don't have - * dirty bit marked. \see cl_io_submit_rw() - * - * Whenever a cached page is added to a newly constructed req, its - * cl_page_operations::cpo_make_ready() layer methods are called. At that - * moment, page state is atomically changed from cl_page_state::CPS_CACHED to - * cl_page_state::CPS_PAGEOUT, and cl_page::cp_req is set to - * req. cl_page_operations::cpo_make_ready() method at the particular layer - * might return -EAGAIN to indicate that this page is not eligible for the - * transfer right now. - * - * FUTURE - * - * Plan is to divide transfers into "priority bands" (indicated when - * submitting cl_page_list, and queuing a page for the opportunistic transfer) - * and allow glueing of cached pages to immediate transfers only within single - * band. This would make high priority transfers (like lock cancellation or - * memory pressure induced write-out) really high priority. - * - */ - /** * Per-transfer attributes. */ struct cl_req_attr { + enum cl_req_type cra_type; + u64 cra_flags; + struct cl_page *cra_page; + /** Generic attributes for the server consumption. */ struct obdo *cra_oa; /** Jobid */ char cra_jobid[LUSTRE_JOBID_SIZE]; }; -/** - * Transfer request operations definable at every layer. - * - * Concurrency: transfer formation engine synchronizes calls to all transfer - * methods. - */ -struct cl_req_operations { - /** - * Invoked top-to-bottom by cl_req_prep() when transfer formation is - * complete (all pages are added). - * - * \see osc_req_prep() - */ - int (*cro_prep)(const struct lu_env *env, - const struct cl_req_slice *slice); - /** - * Called top-to-bottom to fill in \a oa fields. This is called twice - * with different flags, see bug 10150 and osc_build_req(). - * - * \param obj an object from cl_req which attributes are to be set in - * \a oa. - * - * \param oa struct obdo where attributes are placed - * - * \param flags \a oa fields to be filled. - */ - void (*cro_attr_set)(const struct lu_env *env, - const struct cl_req_slice *slice, - const struct cl_object *obj, - struct cl_req_attr *attr, u64 flags); - /** - * Called top-to-bottom from cl_req_completion() to notify layers that - * transfer completed. Has to free all state allocated by - * cl_device_operations::cdo_req_init(). - */ - void (*cro_completion)(const struct lu_env *env, - const struct cl_req_slice *slice, int ioret); -}; - -/** - * A per-object state that (potentially multi-object) transfer request keeps. - */ -struct cl_req_obj { - /** object itself */ - struct cl_object *ro_obj; - /** reference to cl_req_obj::ro_obj. For debugging. */ - struct lu_ref_link ro_obj_ref; - /* something else? Number of pages for a given object? */ -}; - -/** - * Transfer request. - * - * Transfer requests are not reference counted, because IO sub-system owns - * them exclusively and knows when to free them. - * - * Life cycle. - * - * cl_req is created by cl_req_alloc() that calls - * cl_device_operations::cdo_req_init() device methods to allocate per-req - * state in every layer. - * - * Then pages are added (cl_req_page_add()), req keeps track of all objects it - * contains pages for. - * - * Once all pages were collected, cl_page_operations::cpo_prep() method is - * called top-to-bottom. At that point layers can modify req, let it pass, or - * deny it completely. This is to support things like SNS that have transfer - * ordering requirements invisible to the individual req-formation engine. - * - * On transfer completion (or transfer timeout, or failure to initiate the - * transfer of an allocated req), cl_req_operations::cro_completion() method - * is called, after execution of cl_page_operations::cpo_completion() of all - * req's pages. - */ -struct cl_req { - enum cl_req_type crq_type; - /** A list of pages being transferred */ - struct list_head crq_pages; - /** Number of pages in cl_req::crq_pages */ - unsigned crq_nrpages; - /** An array of objects which pages are in ->crq_pages */ - struct cl_req_obj *crq_o; - /** Number of elements in cl_req::crq_objs[] */ - unsigned crq_nrobjs; - struct list_head crq_layers; -}; - -/** - * Per-layer state for request. - */ -struct cl_req_slice { - struct cl_req *crs_req; - struct cl_device *crs_dev; - struct list_head crs_linkage; - const struct cl_req_operations *crs_ops; -}; - -/* @} cl_req */ - enum cache_stats_item { /** how many cache lookups were performed */ CS_lookup = 0, @@ -2153,9 +2004,6 @@ void cl_lock_slice_add(struct cl_lock *lock, struct cl_lock_slice *slice, const struct cl_lock_operations *ops); void cl_io_slice_add(struct cl_io *io, struct cl_io_slice *slice, struct cl_object *obj, const struct cl_io_operations *ops); -void cl_req_slice_add(struct cl_req *req, struct cl_req_slice *slice, - struct cl_device *dev, - const struct cl_req_operations *ops); /** @} helpers */ /** \defgroup cl_object cl_object @@ -2183,6 +2031,12 @@ int cl_object_prune(const struct lu_env *env, struct cl_object *obj); void cl_object_kill(const struct lu_env *env, struct cl_object *obj); int cl_object_getstripe(const struct lu_env *env, struct cl_object *obj, struct lov_user_md __user *lum); +int cl_object_fiemap(const struct lu_env *env, struct cl_object *obj, + struct ll_fiemap_info_key *fmkey, struct fiemap *fiemap, + size_t *buflen); +int cl_object_layout_get(const struct lu_env *env, struct cl_object *obj, + struct cl_layout *cl); +loff_t cl_object_maxbytes(struct cl_object *obj); /** * Returns true, iff \a o0 and \a o1 are slices of the same object. @@ -2302,8 +2156,6 @@ void cl_page_discard(const struct lu_env *env, struct cl_io *io, void cl_page_delete(const struct lu_env *env, struct cl_page *pg); int cl_page_is_vmlocked(const struct lu_env *env, const struct cl_page *pg); void cl_page_export(const struct lu_env *env, struct cl_page *pg, int uptodate); -int cl_page_is_under_lock(const struct lu_env *env, struct cl_io *io, - struct cl_page *page, pgoff_t *max_index); loff_t cl_offset(const struct cl_object *obj, pgoff_t idx); pgoff_t cl_index(const struct cl_object *obj, loff_t offset); size_t cl_page_size(const struct cl_object *obj); @@ -2414,8 +2266,6 @@ int cl_io_lock_add(const struct lu_env *env, struct cl_io *io, struct cl_io_lock_link *link); int cl_io_lock_alloc_add(const struct lu_env *env, struct cl_io *io, struct cl_lock_descr *descr); -int cl_io_read_page(const struct lu_env *env, struct cl_io *io, - struct cl_page *page); int cl_io_submit_rw(const struct lu_env *env, struct cl_io *io, enum cl_req_type iot, struct cl_2queue *queue); int cl_io_submit_sync(const struct lu_env *env, struct cl_io *io, @@ -2424,6 +2274,8 @@ int cl_io_submit_sync(const struct lu_env *env, struct cl_io *io, int cl_io_commit_async(const struct lu_env *env, struct cl_io *io, struct cl_page_list *queue, int from, int to, cl_commit_cbt cb); +int cl_io_read_ahead(const struct lu_env *env, struct cl_io *io, + pgoff_t start, struct cl_read_ahead *ra); int cl_io_is_going(const struct lu_env *env); /** @@ -2520,19 +2372,8 @@ void cl_2queue_init_page(struct cl_2queue *queue, struct cl_page *page); /** @} cl_page_list */ -/** \defgroup cl_req cl_req - * @{ - */ -struct cl_req *cl_req_alloc(const struct lu_env *env, struct cl_page *page, - enum cl_req_type crt, int nr_objects); - -void cl_req_page_add(const struct lu_env *env, struct cl_req *req, - struct cl_page *page); -void cl_req_page_done(const struct lu_env *env, struct cl_page *page); -int cl_req_prep(const struct lu_env *env, struct cl_req *req); -void cl_req_attr_set(const struct lu_env *env, struct cl_req *req, - struct cl_req_attr *attr, u64 flags); -void cl_req_completion(const struct lu_env *env, struct cl_req *req, int ioret); +void cl_req_attr_set(const struct lu_env *env, struct cl_object *obj, + struct cl_req_attr *attr); /** \defgroup cl_sync_io cl_sync_io * @{ @@ -2568,8 +2409,6 @@ void cl_sync_io_end(const struct lu_env *env, struct cl_sync_io *anchor); /** @} cl_sync_io */ -/** @} cl_req */ - /** \defgroup cl_env cl_env * * lu_env handling for a client. @@ -2593,35 +2432,13 @@ void cl_sync_io_end(const struct lu_env *env, struct cl_sync_io *anchor); * - allocation and destruction of environment is amortized by caching no * longer used environments instead of destroying them; * - * - there is a notion of "current" environment, attached to the kernel - * data structure representing current thread Top-level lustre code - * allocates an environment and makes it current, then calls into - * non-lustre code, that in turn calls lustre back. Low-level lustre - * code thus called can fetch environment created by the top-level code - * and reuse it, avoiding additional environment allocation. - * Right now, three interfaces can attach the cl_env to running thread: - * - cl_env_get - * - cl_env_implant - * - cl_env_reexit(cl_env_reenter had to be called priorly) - * * \see lu_env, lu_context, lu_context_key * @{ */ -struct cl_env_nest { - int cen_refcheck; - void *cen_cookie; -}; - struct lu_env *cl_env_get(int *refcheck); struct lu_env *cl_env_alloc(int *refcheck, __u32 tags); -struct lu_env *cl_env_nested_get(struct cl_env_nest *nest); void cl_env_put(struct lu_env *env, int *refcheck); -void cl_env_nested_put(struct cl_env_nest *nest, struct lu_env *env); -void *cl_env_reenter(void); -void cl_env_reexit(void *cookie); -void cl_env_implant(struct lu_env *env, int *refcheck); -void cl_env_unplant(struct lu_env *env, int *refcheck); unsigned int cl_env_cache_purge(unsigned int nr); struct lu_env *cl_env_percpu_get(void); void cl_env_percpu_put(struct lu_env *env); diff --git a/drivers/staging/lustre/lustre/include/llog_swab.h b/drivers/staging/lustre/lustre/include/llog_swab.h new file mode 100644 index 000000000000..fd7ffb154ad1 --- /dev/null +++ b/drivers/staging/lustre/lustre/include/llog_swab.h @@ -0,0 +1,65 @@ +/* + * GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see + * http://www.gnu.org/licenses/gpl-2.0.html + * + * GPL HEADER END + */ +/* + * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. + * Use is subject to license terms. + * + * Copyright (c) 2011, 2014, Intel Corporation. + * + * Copyright 2015 Cray Inc, all rights reserved. + * Author: Ben Evans. + * + * We assume all nodes are either little-endian or big-endian, and we + * always send messages in the sender's native format. The receiver + * detects the message format by checking the 'magic' field of the message + * (see lustre_msg_swabbed() below). + * + * Each type has corresponding 'lustre_swab_xxxtypexxx()' routines + * are implemented in ptlrpc/pack_generic.c. These 'swabbers' convert the + * type from "other" endian, in-place in the message buffer. + * + * A swabber takes a single pointer argument. The caller must already have + * verified that the length of the message buffer >= sizeof (type). + * + * For variable length types, a second 'lustre_swab_v_xxxtypexxx()' routine + * may be defined that swabs just the variable part, after the caller has + * verified that the message buffer is large enough. + */ + +#ifndef _LLOG_SWAB_H_ +#define _LLOG_SWAB_H_ + +#include "lustre/lustre_idl.h" +struct lustre_cfg; + +void lustre_swab_lu_fid(struct lu_fid *fid); +void lustre_swab_ost_id(struct ost_id *oid); +void lustre_swab_llogd_body(struct llogd_body *d); +void lustre_swab_llog_hdr(struct llog_log_hdr *h); +void lustre_swab_llogd_conn_body(struct llogd_conn_body *d); +void lustre_swab_llog_rec(struct llog_rec_hdr *rec); +void lustre_swab_lu_seq_range(struct lu_seq_range *range); +void lustre_swab_lustre_cfg(struct lustre_cfg *lcfg); +void lustre_swab_cfg_marker(struct cfg_marker *marker, + int swab, int size); + +#endif diff --git a/drivers/staging/lustre/lustre/include/lprocfs_status.h b/drivers/staging/lustre/lustre/include/lprocfs_status.h index cc0713ef8ae5..62753dae0bfa 100644 --- a/drivers/staging/lustre/lustre/include/lprocfs_status.h +++ b/drivers/staging/lustre/lustre/include/lprocfs_status.h @@ -43,6 +43,8 @@ #include <linux/spinlock.h> #include <linux/types.h> +#include "../../include/linux/libcfs/libcfs.h" +#include "lustre_cfg.h" #include "lustre/lustre_idl.h" struct lprocfs_vars { @@ -540,7 +542,8 @@ lprocfs_alloc_stats(unsigned int num, enum lprocfs_stats_flags flags); void lprocfs_clear_stats(struct lprocfs_stats *stats); void lprocfs_free_stats(struct lprocfs_stats **stats); void lprocfs_counter_init(struct lprocfs_stats *stats, int index, - unsigned conf, const char *name, const char *units); + unsigned int conf, const char *name, + const char *units); struct obd_export; int lprocfs_exp_cleanup(struct obd_export *exp); struct dentry *ldebugfs_add_simple(struct dentry *root, @@ -701,9 +704,9 @@ static struct lustre_attr lustre_attr_##name = __ATTR(name, mode, show, store) extern const struct sysfs_ops lustre_sysfs_ops; struct root_squash_info; -int lprocfs_wr_root_squash(const char *buffer, unsigned long count, +int lprocfs_wr_root_squash(const char __user *buffer, unsigned long count, struct root_squash_info *squash, char *name); -int lprocfs_wr_nosquash_nids(const char *buffer, unsigned long count, +int lprocfs_wr_nosquash_nids(const char __user *buffer, unsigned long count, struct root_squash_info *squash, char *name); /* all quota proc functions */ diff --git a/drivers/staging/lustre/lustre/include/lustre/ll_fiemap.h b/drivers/staging/lustre/lustre/include/lustre/ll_fiemap.h index c2340d643e84..b8ad5559a3b9 100644 --- a/drivers/staging/lustre/lustre/include/lustre/ll_fiemap.h +++ b/drivers/staging/lustre/lustre/include/lustre/ll_fiemap.h @@ -41,79 +41,24 @@ #ifndef _LUSTRE_FIEMAP_H #define _LUSTRE_FIEMAP_H -struct ll_fiemap_extent { - __u64 fe_logical; /* logical offset in bytes for the start of - * the extent from the beginning of the file - */ - __u64 fe_physical; /* physical offset in bytes for the start - * of the extent from the beginning of the disk - */ - __u64 fe_length; /* length in bytes for this extent */ - __u64 fe_reserved64[2]; - __u32 fe_flags; /* FIEMAP_EXTENT_* flags for this extent */ - __u32 fe_device; /* device number for this extent */ - __u32 fe_reserved[2]; -}; - -struct ll_user_fiemap { - __u64 fm_start; /* logical offset (inclusive) at - * which to start mapping (in) - */ - __u64 fm_length; /* logical length of mapping which - * userspace wants (in) - */ - __u32 fm_flags; /* FIEMAP_FLAG_* flags for request (in/out) */ - __u32 fm_mapped_extents;/* number of extents that were mapped (out) */ - __u32 fm_extent_count; /* size of fm_extents array (in) */ - __u32 fm_reserved; - struct ll_fiemap_extent fm_extents[0]; /* array of mapped extents (out) */ -}; - -#define FIEMAP_MAX_OFFSET (~0ULL) +#ifndef __KERNEL__ +#include <stddef.h> +#include <fiemap.h> +#endif -#define FIEMAP_FLAG_SYNC 0x00000001 /* sync file data before - * map - */ -#define FIEMAP_FLAG_XATTR 0x00000002 /* map extended attribute - * tree - */ -#define FIEMAP_EXTENT_LAST 0x00000001 /* Last extent in file. */ -#define FIEMAP_EXTENT_UNKNOWN 0x00000002 /* Data location unknown. */ -#define FIEMAP_EXTENT_DELALLOC 0x00000004 /* Location still pending. - * Sets EXTENT_UNKNOWN. - */ -#define FIEMAP_EXTENT_ENCODED 0x00000008 /* Data can not be read - * while fs is unmounted - */ -#define FIEMAP_EXTENT_DATA_ENCRYPTED 0x00000080 /* Data is encrypted by fs. - * Sets EXTENT_NO_DIRECT. - */ -#define FIEMAP_EXTENT_NOT_ALIGNED 0x00000100 /* Extent offsets may not be - * block aligned. - */ -#define FIEMAP_EXTENT_DATA_INLINE 0x00000200 /* Data mixed with metadata. - * Sets EXTENT_NOT_ALIGNED.*/ -#define FIEMAP_EXTENT_DATA_TAIL 0x00000400 /* Multiple files in block. - * Sets EXTENT_NOT_ALIGNED. - */ -#define FIEMAP_EXTENT_UNWRITTEN 0x00000800 /* Space allocated, but - * no data (i.e. zero). - */ -#define FIEMAP_EXTENT_MERGED 0x00001000 /* File does not natively - * support extents. Result - * merged for efficiency. - */ +/* XXX: We use fiemap_extent::fe_reserved[0] */ +#define fe_device fe_reserved[0] static inline size_t fiemap_count_to_size(size_t extent_count) { - return (sizeof(struct ll_user_fiemap) + extent_count * - sizeof(struct ll_fiemap_extent)); + return sizeof(struct fiemap) + extent_count * + sizeof(struct fiemap_extent); } static inline unsigned fiemap_size_to_count(size_t array_size) { - return ((array_size - sizeof(struct ll_user_fiemap)) / - sizeof(struct ll_fiemap_extent)); + return (array_size - sizeof(struct fiemap)) / + sizeof(struct fiemap_extent); } #define FIEMAP_FLAG_DEVICE_ORDER 0x40000000 /* return device ordered mapping */ diff --git a/drivers/staging/lustre/lustre/include/lustre/lustre_idl.h b/drivers/staging/lustre/lustre/include/lustre/lustre_idl.h index 72eaee95c6b8..65ce503ad595 100644 --- a/drivers/staging/lustre/lustre/include/lustre/lustre_idl.h +++ b/drivers/staging/lustre/lustre/include/lustre/lustre_idl.h @@ -48,8 +48,7 @@ * that the Lustre wire protocol is not influenced by external dependencies. * * The only other acceptable items in this file are VERY SIMPLE accessor - * functions to avoid callers grubbing inside the structures, and the - * prototypes of the swabber functions for each struct. Nothing that + * functions to avoid callers grubbing inside the structures. Nothing that * depends on external functions or definitions should be in here. * * Structs must be properly aligned to put 64-bit values on an 8-byte @@ -64,23 +63,6 @@ * in the code to ensure that new/old clients that see this larger struct * do not fail, otherwise you need to implement protocol compatibility). * - * We assume all nodes are either little-endian or big-endian, and we - * always send messages in the sender's native format. The receiver - * detects the message format by checking the 'magic' field of the message - * (see lustre_msg_swabbed() below). - * - * Each wire type has corresponding 'lustre_swab_xxxtypexxx()' routines, - * implemented either here, inline (trivial implementations) or in - * ptlrpc/pack_generic.c. These 'swabbers' convert the type from "other" - * endian, in-place in the message buffer. - * - * A swabber takes a single pointer argument. The caller must already have - * verified that the length of the message buffer >= sizeof (type). - * - * For variable length types, a second 'lustre_swab_v_xxxtypexxx()' routine - * may be defined that swabs just the variable part, after the caller has - * verified that the message buffer is large enough. - * * @{ */ @@ -192,113 +174,6 @@ struct lu_seq_range_array { #define LU_SEQ_RANGE_MASK 0x3 -static inline unsigned fld_range_type(const struct lu_seq_range *range) -{ - return range->lsr_flags & LU_SEQ_RANGE_MASK; -} - -static inline bool fld_range_is_ost(const struct lu_seq_range *range) -{ - return fld_range_type(range) == LU_SEQ_RANGE_OST; -} - -static inline bool fld_range_is_mdt(const struct lu_seq_range *range) -{ - return fld_range_type(range) == LU_SEQ_RANGE_MDT; -} - -/** - * This all range is only being used when fld client sends fld query request, - * but it does not know whether the seq is MDT or OST, so it will send req - * with ALL type, which means either seq type gotten from lookup can be - * expected. - */ -static inline unsigned fld_range_is_any(const struct lu_seq_range *range) -{ - return fld_range_type(range) == LU_SEQ_RANGE_ANY; -} - -static inline void fld_range_set_type(struct lu_seq_range *range, - unsigned flags) -{ - range->lsr_flags |= flags; -} - -static inline void fld_range_set_mdt(struct lu_seq_range *range) -{ - fld_range_set_type(range, LU_SEQ_RANGE_MDT); -} - -static inline void fld_range_set_ost(struct lu_seq_range *range) -{ - fld_range_set_type(range, LU_SEQ_RANGE_OST); -} - -static inline void fld_range_set_any(struct lu_seq_range *range) -{ - fld_range_set_type(range, LU_SEQ_RANGE_ANY); -} - -/** - * returns width of given range \a r - */ - -static inline __u64 range_space(const struct lu_seq_range *range) -{ - return range->lsr_end - range->lsr_start; -} - -/** - * initialize range to zero - */ - -static inline void range_init(struct lu_seq_range *range) -{ - memset(range, 0, sizeof(*range)); -} - -/** - * check if given seq id \a s is within given range \a r - */ - -static inline bool range_within(const struct lu_seq_range *range, - __u64 s) -{ - return s >= range->lsr_start && s < range->lsr_end; -} - -static inline bool range_is_sane(const struct lu_seq_range *range) -{ - return (range->lsr_end >= range->lsr_start); -} - -static inline bool range_is_zero(const struct lu_seq_range *range) -{ - return (range->lsr_start == 0 && range->lsr_end == 0); -} - -static inline bool range_is_exhausted(const struct lu_seq_range *range) - -{ - return range_space(range) == 0; -} - -/* return 0 if two range have the same location */ -static inline int range_compare_loc(const struct lu_seq_range *r1, - const struct lu_seq_range *r2) -{ - return r1->lsr_index != r2->lsr_index || - r1->lsr_flags != r2->lsr_flags; -} - -#define DRANGE "[%#16.16Lx-%#16.16Lx):%x:%s" - -#define PRANGE(range) \ - (range)->lsr_start, \ - (range)->lsr_end, \ - (range)->lsr_index, \ - fld_range_is_mdt(range) ? "mdt" : "ost" - /** \defgroup lu_fid lu_fid * @{ */ @@ -310,7 +185,7 @@ static inline int range_compare_loc(const struct lu_seq_range *r1, */ enum lma_compat { LMAC_HSM = 0x00000001, - LMAC_SOM = 0x00000002, +/* LMAC_SOM = 0x00000002, obsolete since 2.8.0 */ LMAC_NOT_IN_OI = 0x00000004, /* the object does NOT need OI mapping */ LMAC_FID_ON_OST = 0x00000008, /* For OST-object, its OI mapping is * under /O/<seq>/d<x>. @@ -644,13 +519,14 @@ static inline void ostid_set_id(struct ost_id *oi, __u64 oid) { if (fid_seq_is_mdt0(oi->oi.oi_seq)) { if (oid >= IDIF_MAX_OID) { - CERROR("Bad %llu to set " DOSTID "\n", oid, POSTID(oi)); + CERROR("Too large OID %#llx to set MDT0 " DOSTID "\n", + oid, POSTID(oi)); return; } oi->oi.oi_id = oid; } else if (fid_is_idif(&oi->oi_fid)) { if (oid >= IDIF_MAX_OID) { - CERROR("Bad %llu to set "DOSTID"\n", + CERROR("Too large OID %#llx to set IDIF " DOSTID "\n", oid, POSTID(oi)); return; } @@ -676,7 +552,7 @@ static inline int fid_set_id(struct lu_fid *fid, __u64 oid) if (fid_is_idif(fid)) { if (oid >= IDIF_MAX_OID) { - CERROR("Too large OID %#llx to set IDIF "DFID"\n", + CERROR("Too large OID %#llx to set IDIF " DFID "\n", (unsigned long long)oid, PFID(fid)); return -EBADF; } @@ -685,7 +561,7 @@ static inline int fid_set_id(struct lu_fid *fid, __u64 oid) fid->f_ver = oid >> 48; } else { if (oid >= OBIF_MAX_OID) { - CERROR("Too large OID %#llx to set REG "DFID"\n", + CERROR("Too large OID %#llx to set REG " DFID "\n", (unsigned long long)oid, PFID(fid)); return -EBADF; } @@ -785,8 +661,6 @@ static inline ino_t lu_igif_ino(const struct lu_fid *fid) return fid_seq(fid); } -void lustre_swab_ost_id(struct ost_id *oid); - /** * Get inode generation from a igif. * \param fid a igif to get inode generation from. @@ -847,9 +721,6 @@ static inline bool fid_is_sane(const struct lu_fid *fid) fid_seq_is_rsvd(fid_seq(fid))); } -void lustre_swab_lu_fid(struct lu_fid *fid); -void lustre_swab_lu_seq_range(struct lu_seq_range *range); - static inline bool lu_fid_eq(const struct lu_fid *f0, const struct lu_fid *f1) { return memcmp(f0, f1, sizeof(*f0)) == 0; @@ -1099,8 +970,10 @@ struct ptlrpc_body_v3 { __u32 pb_version; __u32 pb_opc; __u32 pb_status; - __u64 pb_last_xid; - __u64 pb_last_seen; + __u64 pb_last_xid; /* highest replied XID without lower unreplied XID */ + __u16 pb_tag; /* virtual slot idx for multiple modifying RPCs */ + __u16 pb_padding0; + __u32 pb_padding1; __u64 pb_last_committed; __u64 pb_transno; __u32 pb_flags; @@ -1112,8 +985,11 @@ struct ptlrpc_body_v3 { __u64 pb_slv; /* VBR: pre-versions */ __u64 pb_pre_versions[PTLRPC_NUM_VERSIONS]; + __u64 pb_mbits; /**< match bits for bulk request */ /* padding for future needs */ - __u64 pb_padding[4]; + __u64 pb_padding64_0; + __u64 pb_padding64_1; + __u64 pb_padding64_2; char pb_jobid[LUSTRE_JOBID_SIZE]; }; @@ -1125,8 +1001,10 @@ struct ptlrpc_body_v2 { __u32 pb_version; __u32 pb_opc; __u32 pb_status; - __u64 pb_last_xid; - __u64 pb_last_seen; + __u64 pb_last_xid; /* highest replied XID without lower unreplied XID */ + __u16 pb_tag; /* virtual slot idx for multiple modifying RPCs */ + __u16 pb_padding0; + __u32 pb_padding1; __u64 pb_last_committed; __u64 pb_transno; __u32 pb_flags; @@ -1140,12 +1018,13 @@ struct ptlrpc_body_v2 { __u64 pb_slv; /* VBR: pre-versions */ __u64 pb_pre_versions[PTLRPC_NUM_VERSIONS]; + __u64 pb_mbits; /**< unused in V2 */ /* padding for future needs */ - __u64 pb_padding[4]; + __u64 pb_padding64_0; + __u64 pb_padding64_1; + __u64 pb_padding64_2; }; -void lustre_swab_ptlrpc_body(struct ptlrpc_body *pb); - /* message body offset for lustre_msg_v2 */ /* ptlrpc body offset in all request/reply messages */ #define MSG_PTLRPC_BODY_OFF 0 @@ -1282,7 +1161,16 @@ void lustre_swab_ptlrpc_body(struct ptlrpc_body *pb); */ #define OBD_CONNECT_LFSCK 0x40000000000000ULL/* support online LFSCK */ #define OBD_CONNECT_UNLINK_CLOSE 0x100000000000000ULL/* close file in unlink */ +#define OBD_CONNECT_MULTIMODRPCS 0x200000000000000ULL /* support multiple modify + * RPCs in parallel + */ #define OBD_CONNECT_DIR_STRIPE 0x400000000000000ULL/* striped DNE dir */ +#define OBD_CONNECT_SUBTREE 0x800000000000000ULL /* fileset mount */ +#define OBD_CONNECT_LOCK_AHEAD 0x1000000000000000ULL /* lock ahead */ +/** bulk matchbits is sent within ptlrpc_body */ +#define OBD_CONNECT_BULK_MBITS 0x2000000000000000ULL +#define OBD_CONNECT_OBDOPACK 0x4000000000000000ULL /* compact OUT obdo */ +#define OBD_CONNECT_FLAGS2 0x8000000000000000ULL /* second flags word */ /* XXX README XXX: * Please DO NOT add flag values here before first ensuring that this same @@ -1313,25 +1201,6 @@ void lustre_swab_ptlrpc_body(struct ptlrpc_body *pb); * If we eventually have separate connect data for different types, which we * almost certainly will, then perhaps we stick a union in here. */ -struct obd_connect_data_v1 { - __u64 ocd_connect_flags; /* OBD_CONNECT_* per above */ - __u32 ocd_version; /* lustre release version number */ - __u32 ocd_grant; /* initial cache grant amount (bytes) */ - __u32 ocd_index; /* LOV index to connect to */ - __u32 ocd_brw_size; /* Maximum BRW size in bytes, must be 2^n */ - __u64 ocd_ibits_known; /* inode bits this client understands */ - __u8 ocd_blocksize; /* log2 of the backend filesystem blocksize */ - __u8 ocd_inodespace; /* log2 of the per-inode space consumption */ - __u16 ocd_grant_extent; /* per-extent grant overhead, in 1K blocks */ - __u32 ocd_unused; /* also fix lustre_swab_connect */ - __u64 ocd_transno; /* first transno from client to be replayed */ - __u32 ocd_group; /* MDS group on OST */ - __u32 ocd_cksum_types; /* supported checksum algorithms */ - __u32 ocd_max_easize; /* How big LOV EA can be on MDS */ - __u32 ocd_instance; /* also fix lustre_swab_connect */ - __u64 ocd_maxbytes; /* Maximum stripe size in bytes */ -}; - struct obd_connect_data { __u64 ocd_connect_flags; /* OBD_CONNECT_* per above */ __u32 ocd_version; /* lustre release version number */ @@ -1354,8 +1223,10 @@ struct obd_connect_data { * any field after ocd_maxbytes on the receiver without a valid flag * may result in out-of-bound memory access and kernel oops. */ - __u64 padding1; /* added 2.1.0. also fix lustre_swab_connect */ - __u64 padding2; /* added 2.1.0. also fix lustre_swab_connect */ + __u16 ocd_maxmodrpcs; /* Maximum modify RPCs in parallel */ + __u16 padding0; /* added 2.1.0. also fix lustre_swab_connect */ + __u32 padding1; /* added 2.1.0. also fix lustre_swab_connect */ + __u64 ocd_connect_flags2; __u64 padding3; /* added 2.1.0. also fix lustre_swab_connect */ __u64 padding4; /* added 2.1.0. also fix lustre_swab_connect */ __u64 padding5; /* added 2.1.0. also fix lustre_swab_connect */ @@ -1380,8 +1251,6 @@ struct obd_connect_data { * reserve the flag for future use. */ -void lustre_swab_connect(struct obd_connect_data *ocd); - /* * Supported checksum algorithms. Up to 32 checksum types are supported. * (32-bit mask stored in obd_connect_data::ocd_cksum_types) @@ -1416,7 +1285,7 @@ enum ost_cmd { OST_STATFS = 13, OST_SYNC = 16, OST_SET_INFO = 17, - OST_QUOTACHECK = 18, + OST_QUOTACHECK = 18, /* not used since 2.4 */ OST_QUOTACTL = 19, OST_QUOTA_ADJUST_QUNIT = 20, /* not used since 2.4 */ OST_LAST_OPC @@ -1580,8 +1449,6 @@ static inline void lmm_oi_cpu_to_le(struct ost_id *dst_oi, dst_oi->oi.oi_seq = cpu_to_le64(src_oi->oi.oi_seq); } -/* extern void lustre_swab_lov_mds_md(struct lov_mds_md *llm); */ - #define MAX_MD_SIZE \ (sizeof(struct lov_mds_md) + 4 * sizeof(struct lov_ost_data)) #define MIN_MD_SIZE \ @@ -1674,7 +1541,7 @@ lov_mds_md_max_stripe_count(size_t buf_size, __u32 lmm_magic) #define OBD_MD_FLCKSUM (0x00100000ULL) /* bulk data checksum */ #define OBD_MD_FLQOS (0x00200000ULL) /* quality of service stats */ /*#define OBD_MD_FLOSCOPQ (0x00400000ULL) osc opaque data, never used */ -#define OBD_MD_FLCOOKIE (0x00800000ULL) /* log cancellation cookie */ +/* OBD_MD_FLCOOKIE (0x00800000ULL) obsolete in 2.8 */ #define OBD_MD_FLGROUP (0x01000000ULL) /* group */ #define OBD_MD_FLFID (0x02000000ULL) /* ->ost write inline fid */ #define OBD_MD_FLEPOCH (0x04000000ULL) /* ->ost write with ioepoch */ @@ -1713,7 +1580,9 @@ lov_mds_md_max_stripe_count(size_t buf_size, __u32 lmm_magic) /* OBD_MD_FLRMTRGETFACL (0x0008000000000000ULL) lfs rgetfacl, obsolete */ #define OBD_MD_FLDATAVERSION (0x0010000000000000ULL) /* iversion sum */ -#define OBD_MD_FLRELEASED (0x0020000000000000ULL) /* file released */ +#define OBD_MD_CLOSE_INTENT_EXECED (0x0020000000000000ULL) /* close intent + * executed + */ #define OBD_MD_DEFAULT_MEA (0x0040000000000000ULL) /* default MEA */ @@ -1742,11 +1611,6 @@ struct hsm_state_set { __u64 hss_clearmask; }; -void lustre_swab_hsm_user_state(struct hsm_user_state *hus); -void lustre_swab_hsm_state_set(struct hsm_state_set *hss); - -void lustre_swab_obd_statfs(struct obd_statfs *os); - /* ost_body.data values for OST_BRW */ #define OBD_BRW_READ 0x01 @@ -1786,14 +1650,16 @@ struct obd_ioobj { __u32 ioo_bufcnt; /* number of niobufs for this object */ }; +/* + * NOTE: IOOBJ_MAX_BRW_BITS defines the _offset_ of the max_brw field in + * ioo_max_brw, NOT the maximum number of bits in PTLRPC_BULK_OPS_BITS. + * That said, ioo_max_brw is a 32-bit field so the limit is also 16 bits. + */ #define IOOBJ_MAX_BRW_BITS 16 -#define IOOBJ_TYPE_MASK ((1U << IOOBJ_MAX_BRW_BITS) - 1) #define ioobj_max_brw_get(ioo) (((ioo)->ioo_max_brw >> IOOBJ_MAX_BRW_BITS) + 1) #define ioobj_max_brw_set(ioo, num) \ do { (ioo)->ioo_max_brw = ((num) - 1) << IOOBJ_MAX_BRW_BITS; } while (0) -void lustre_swab_obd_ioobj(struct obd_ioobj *ioo); - /* multiple of 8 bytes => can array */ struct niobuf_remote { __u64 rnb_offset; @@ -1801,8 +1667,6 @@ struct niobuf_remote { __u32 rnb_flags; }; -void lustre_swab_niobuf_remote(struct niobuf_remote *nbr); - /* lock value block communicated between the filter and llite */ /* OST_LVB_ERR_INIT is needed because the return code in rc is @@ -1824,8 +1688,6 @@ struct ost_lvb_v1 { __u64 lvb_blocks; }; -void lustre_swab_ost_lvb_v1(struct ost_lvb_v1 *lvb); - struct ost_lvb { __u64 lvb_size; __s64 lvb_mtime; @@ -1838,8 +1700,6 @@ struct ost_lvb { __u32 lvb_padding; }; -void lustre_swab_ost_lvb(struct ost_lvb *lvb); - /* * lquota data structures */ @@ -1866,8 +1726,6 @@ struct obd_quotactl { struct obd_dqblk qc_dqblk; }; -void lustre_swab_obd_quotactl(struct obd_quotactl *q); - #define Q_COPY(out, in, member) (out)->member = (in)->member #define QCTL_COPY(out, in) \ @@ -1905,8 +1763,6 @@ struct lquota_lvb { __u64 lvb_pad1; }; -void lustre_swab_lquota_lvb(struct lquota_lvb *lvb); - /* op codes */ enum quota_cmd { QUOTA_DQACQ = 601, @@ -1933,9 +1789,9 @@ enum mds_cmd { MDS_PIN = 42, /* obsolete, never used in a release */ MDS_UNPIN = 43, /* obsolete, never used in a release */ MDS_SYNC = 44, - MDS_DONE_WRITING = 45, + MDS_DONE_WRITING = 45, /* obsolete since 2.8.0 */ MDS_SET_INFO = 46, - MDS_QUOTACHECK = 47, + MDS_QUOTACHECK = 47, /* not used since 2.4 */ MDS_QUOTACTL = 48, MDS_GETXATTR = 49, MDS_SETXATTR = 50, /* obsolete, now it's MDS_REINT op */ @@ -1972,8 +1828,6 @@ enum mdt_reint_cmd { REINT_MAX }; -void lustre_swab_generic_32s(__u32 *val); - /* the disposition of the intent outlines what was executed */ #define DISP_IT_EXECD 0x00000001 #define DISP_LOOKUP_EXECD 0x00000002 @@ -2031,36 +1885,19 @@ enum { #define MDS_STATUS_CONN 1 #define MDS_STATUS_LOV 2 -/* mdt_thread_info.mti_flags. */ -enum md_op_flags { - /* The flag indicates Size-on-MDS attributes are changed. */ - MF_SOM_CHANGE = (1 << 0), - /* Flags indicates an epoch opens or closes. */ - MF_EPOCH_OPEN = (1 << 1), - MF_EPOCH_CLOSE = (1 << 2), - MF_MDC_CANCEL_FID1 = (1 << 3), - MF_MDC_CANCEL_FID2 = (1 << 4), - MF_MDC_CANCEL_FID3 = (1 << 5), - MF_MDC_CANCEL_FID4 = (1 << 6), - /* There is a pending attribute update. */ - MF_SOM_AU = (1 << 7), - /* Cancel OST locks while getattr OST attributes. */ - MF_GETATTR_LOCK = (1 << 8), - MF_GET_MDT_IDX = (1 << 9), -}; - -#define MF_SOM_LOCAL_FLAGS (MF_SOM_CHANGE | MF_EPOCH_OPEN | MF_EPOCH_CLOSE) - -#define LUSTRE_BFLAG_UNCOMMITTED_WRITES 0x1 - /* these should be identical to their EXT4_*_FL counterparts, they are * redefined here only to avoid dragging in fs/ext4/ext4.h */ #define LUSTRE_SYNC_FL 0x00000008 /* Synchronous updates */ #define LUSTRE_IMMUTABLE_FL 0x00000010 /* Immutable file */ #define LUSTRE_APPEND_FL 0x00000020 /* writes to file may only append */ +#define LUSTRE_NODUMP_FL 0x00000040 /* do not dump file */ #define LUSTRE_NOATIME_FL 0x00000080 /* do not update atime */ +#define LUSTRE_INDEX_FL 0x00001000 /* hash-indexed directory */ #define LUSTRE_DIRSYNC_FL 0x00010000 /* dirsync behaviour (dir only) */ +#define LUSTRE_TOPDIR_FL 0x00020000 /* Top of directory hierarchies*/ +#define LUSTRE_DIRECTIO_FL 0x00100000 /* Use direct i/o */ +#define LUSTRE_INLINE_DATA_FL 0x10000000 /* Inode has inline data. */ /* Convert wire LUSTRE_*_FL to corresponding client local VFS S_* values * for the client inode i_flags. The LUSTRE_*_FL are the Lustre wire @@ -2113,7 +1950,7 @@ struct mdt_body { __u32 mbo_mode; __u32 mbo_uid; __u32 mbo_gid; - __u32 mbo_flags; + __u32 mbo_flags; /* LUSTRE_*_FL file attributes */ __u32 mbo_rdev; __u32 mbo_nlink; /* #bytes to read in the case of MDS_READPAGE */ __u32 mbo_unused2; /* was "generation" until 2.4.0 */ @@ -2121,7 +1958,7 @@ struct mdt_body { __u32 mbo_eadatasize; __u32 mbo_aclsize; __u32 mbo_max_mdsize; - __u32 mbo_max_cookiesize; + __u32 mbo_unused3; /* was max_cookiesize until 2.8 */ __u32 mbo_uid_h; /* high 32-bits of uid, for FUID */ __u32 mbo_gid_h; /* high 32-bits of gid, for FUID */ __u32 mbo_padding_5; /* also fix lustre_swab_mdt_body */ @@ -2132,17 +1969,13 @@ struct mdt_body { __u64 mbo_padding_10; }; /* 216 */ -void lustre_swab_mdt_body(struct mdt_body *b); - struct mdt_ioepoch { - struct lustre_handle handle; - __u64 ioepoch; - __u32 flags; - __u32 padding; + struct lustre_handle mio_handle; + __u64 mio_unused1; /* was ioepoch */ + __u32 mio_unused2; /* was flags */ + __u32 mio_padding; }; -void lustre_swab_mdt_ioepoch(struct mdt_ioepoch *b); - /* permissions for md_perm.mp_perm */ enum { CFS_SETUID_PERM = 0x01, @@ -2178,8 +2011,6 @@ struct mdt_rec_setattr { __u32 sa_padding_5; }; -void lustre_swab_mdt_rec_setattr(struct mdt_rec_setattr *sa); - /* * Attribute flags used in mdt_rec_setattr::sa_valid. * The kernel's #defines for ATTR_* should not be used over the network @@ -2207,12 +2038,9 @@ void lustre_swab_mdt_rec_setattr(struct mdt_rec_setattr *sa); #define MDS_FMODE_CLOSED 00000000 #define MDS_FMODE_EXEC 00000004 -/* IO Epoch is opened on a closed file. */ -#define MDS_FMODE_EPOCH 01000000 -/* IO Epoch is opened on a file truncate. */ -#define MDS_FMODE_TRUNC 02000000 -/* Size-on-MDS Attribute Update is pending. */ -#define MDS_FMODE_SOM 04000000 +/* MDS_FMODE_EPOCH 01000000 obsolete since 2.8.0 */ +/* MDS_FMODE_TRUNC 02000000 obsolete since 2.8.0 */ +/* MDS_FMODE_SOM 04000000 obsolete since 2.8.0 */ #define MDS_OPEN_CREATED 00000010 #define MDS_OPEN_CROSS 00000020 @@ -2258,7 +2086,7 @@ enum mds_op_bias { MDS_CROSS_REF = 1 << 1, MDS_VTX_BYPASS = 1 << 2, MDS_PERM_BYPASS = 1 << 3, - MDS_SOM = 1 << 4, +/* MDS_SOM = 1 << 4, obsolete since 2.8.0 */ MDS_QUOTA_IGNORE = 1 << 5, MDS_CLOSE_CLEANUP = 1 << 6, MDS_KEEP_ORPHAN = 1 << 7, @@ -2268,6 +2096,7 @@ enum mds_op_bias { MDS_OWNEROVERRIDE = 1 << 11, MDS_HSM_RELEASE = 1 << 12, MDS_RENAME_MIGRATE = BIT(13), + MDS_CLOSE_LAYOUT_SWAP = BIT(14), }; /* instance of mdt_reint_rec */ @@ -2456,8 +2285,6 @@ struct mdt_rec_reint { __u32 rr_padding_4; /* also fix lustre_swab_mdt_rec_reint */ }; -void lustre_swab_mdt_rec_reint(struct mdt_rec_reint *rr); - /* lmv structures */ struct lmv_desc { __u32 ld_tgt_count; /* how many MDS's */ @@ -2547,8 +2374,6 @@ union lmv_mds_md { struct lmv_user_md lmv_user_md; }; -void lustre_swab_lmv_mds_md(union lmv_mds_md *lmm); - static inline ssize_t lmv_mds_md_size(int stripe_count, unsigned int lmm_magic) { ssize_t len = -EINVAL; @@ -2652,8 +2477,6 @@ struct lov_desc { #define ld_magic ld_active_tgt_count /* for swabbing from llogs */ -void lustre_swab_lov_desc(struct lov_desc *ld); - /* * LDLM requests: */ @@ -2749,24 +2572,38 @@ struct ldlm_flock_wire { * on the resource type. */ -typedef union { +union ldlm_wire_policy_data { struct ldlm_extent l_extent; struct ldlm_flock_wire l_flock; struct ldlm_inodebits l_inodebits; -} ldlm_wire_policy_data_t; +}; union ldlm_gl_desc { struct ldlm_gl_lquota_desc lquota_desc; }; -void lustre_swab_gl_desc(union ldlm_gl_desc *); +enum ldlm_intent_flags { + IT_OPEN = BIT(0), + IT_CREAT = BIT(1), + IT_OPEN_CREAT = BIT(1) | BIT(0), + IT_READDIR = BIT(2), + IT_GETATTR = BIT(3), + IT_LOOKUP = BIT(4), + IT_UNLINK = BIT(5), + IT_TRUNC = BIT(6), + IT_GETXATTR = BIT(7), + IT_EXEC = BIT(8), + IT_PIN = BIT(9), + IT_LAYOUT = BIT(10), + IT_QUOTA_DQACQ = BIT(11), + IT_QUOTA_CONN = BIT(12), + IT_SETXATTR = BIT(13), +}; struct ldlm_intent { __u64 opc; }; -void lustre_swab_ldlm_intent(struct ldlm_intent *i); - struct ldlm_resource_desc { enum ldlm_type lr_type; __u32 lr_padding; /* also fix lustre_swab_ldlm_resource_desc */ @@ -2777,7 +2614,7 @@ struct ldlm_lock_desc { struct ldlm_resource_desc l_resource; enum ldlm_mode l_req_mode; enum ldlm_mode l_granted_mode; - ldlm_wire_policy_data_t l_policy_data; + union ldlm_wire_policy_data l_policy_data; }; #define LDLM_LOCKREQ_HANDLES 2 @@ -2790,8 +2627,6 @@ struct ldlm_request { struct lustre_handle lock_handle[LDLM_LOCKREQ_HANDLES]; }; -void lustre_swab_ldlm_request(struct ldlm_request *rq); - /* If LDLM_ENQUEUE, 1 slot is already occupied, 1 is available. * Otherwise, 2 are available. */ @@ -2813,8 +2648,6 @@ struct ldlm_reply { __u64 lock_policy_res2; }; -void lustre_swab_ldlm_reply(struct ldlm_reply *r); - #define ldlm_flags_to_wire(flags) ((__u32)(flags)) #define ldlm_flags_from_wire(flags) ((__u64)(flags)) @@ -2858,8 +2691,6 @@ struct mgs_target_info { char mti_params[MTI_PARAM_MAXLEN]; }; -void lustre_swab_mgs_target_info(struct mgs_target_info *oinfo); - struct mgs_nidtbl_entry { __u64 mne_version; /* table version of this entry */ __u32 mne_instance; /* target instance # */ @@ -2874,8 +2705,6 @@ struct mgs_nidtbl_entry { } u; }; -void lustre_swab_mgs_nidtbl_entry(struct mgs_nidtbl_entry *oinfo); - struct mgs_config_body { char mcb_name[MTI_NAME_MAXLEN]; /* logname */ __u64 mcb_offset; /* next index of config log to request */ @@ -2885,15 +2714,11 @@ struct mgs_config_body { __u32 mcb_units; /* # of units for bulk transfer */ }; -void lustre_swab_mgs_config_body(struct mgs_config_body *body); - struct mgs_config_res { __u64 mcr_offset; /* index of last config log */ __u64 mcr_size; /* size of the log */ }; -void lustre_swab_mgs_config_res(struct mgs_config_res *body); - /* Config marker flags (in config log) */ #define CM_START 0x01 #define CM_END 0x02 @@ -2913,8 +2738,6 @@ struct cfg_marker { char cm_comment[MTI_NAME_MAXLEN]; }; -void lustre_swab_cfg_marker(struct cfg_marker *marker, int swab, int size); - /* * Opcodes for multiple servers. */ @@ -2922,7 +2745,7 @@ void lustre_swab_cfg_marker(struct cfg_marker *marker, int swab, int size); enum obd_cmd { OBD_PING = 400, OBD_LOG_CANCEL, - OBD_QC_CALLBACK, + OBD_QC_CALLBACK, /* not used since 2.4 */ OBD_IDX_READ, OBD_LAST_OPC }; @@ -3155,23 +2978,32 @@ struct llog_gen_rec { struct llog_rec_tail lgr_tail; }; -/* On-disk header structure of each log object, stored in little endian order */ -#define LLOG_CHUNK_SIZE 8192 -#define LLOG_HEADER_SIZE (96) -#define LLOG_BITMAP_BYTES (LLOG_CHUNK_SIZE - LLOG_HEADER_SIZE) - -#define LLOG_MIN_REC_SIZE (24) /* round(llog_rec_hdr + llog_rec_tail) */ - /* flags for the logs */ enum llog_flag { LLOG_F_ZAP_WHEN_EMPTY = 0x1, LLOG_F_IS_CAT = 0x2, LLOG_F_IS_PLAIN = 0x4, LLOG_F_EXT_JOBID = BIT(3), + LLOG_F_IS_FIXSIZE = BIT(4), + /* + * Note: Flags covered by LLOG_F_EXT_MASK will be inherited from + * catlog to plain log, so do not add LLOG_F_IS_FIXSIZE here, + * because the catlog record is usually fixed size, but its plain + * log record can be variable + */ LLOG_F_EXT_MASK = LLOG_F_EXT_JOBID, }; +/* On-disk header structure of each log object, stored in little endian order */ +#define LLOG_MIN_CHUNK_SIZE 8192 +#define LLOG_HEADER_SIZE (96) /* sizeof (llog_log_hdr) + + * sizeof(llh_tail) - sizeof(llh_bitmap) + */ +#define LLOG_BITMAP_BYTES (LLOG_MIN_CHUNK_SIZE - LLOG_HEADER_SIZE) +#define LLOG_MIN_REC_SIZE (24) /* round(llog_rec_hdr + llog_rec_tail) */ + +/* flags for the logs */ struct llog_log_hdr { struct llog_rec_hdr llh_hdr; __s64 llh_timestamp; @@ -3183,13 +3015,30 @@ struct llog_log_hdr { /* for a catalog the first plain slot is next to it */ struct obd_uuid llh_tgtuuid; __u32 llh_reserved[LLOG_HEADER_SIZE / sizeof(__u32) - 23]; + /* These fields must always be at the end of the llog_log_hdr. + * Note: llh_bitmap size is variable because llog chunk size could be + * bigger than LLOG_MIN_CHUNK_SIZE, i.e. sizeof(llog_log_hdr) > 8192 + * bytes, and the real size is stored in llh_hdr.lrh_len, which means + * llh_tail should only be referred by LLOG_HDR_TAIL(). + * But this structure is also used by client/server llog interface + * (see llog_client.c), it will be kept in its original way to avoid + * compatibility issue. + */ __u32 llh_bitmap[LLOG_BITMAP_BYTES / sizeof(__u32)]; struct llog_rec_tail llh_tail; } __packed; -#define LLOG_BITMAP_SIZE(llh) (__u32)((llh->llh_hdr.lrh_len - \ - llh->llh_bitmap_offset - \ - sizeof(llh->llh_tail)) * 8) +#undef LLOG_HEADER_SIZE +#undef LLOG_BITMAP_BYTES + +#define LLOG_HDR_BITMAP_SIZE(llh) (__u32)((llh->llh_hdr.lrh_len - \ + llh->llh_bitmap_offset - \ + sizeof(llh->llh_tail)) * 8) +#define LLOG_HDR_BITMAP(llh) (__u32 *)((char *)(llh) + \ + (llh)->llh_bitmap_offset) +#define LLOG_HDR_TAIL(llh) ((struct llog_rec_tail *)((char *)llh + \ + llh->llh_hdr.lrh_len - \ + sizeof(llh->llh_tail))) /** log cookies are used to reference a specific log file and a record * therein @@ -3259,7 +3108,8 @@ struct obdo { __u32 o_parent_ver; struct lustre_handle o_handle; /* brw: lock handle to prolong locks */ - struct llog_cookie o_lcookie; /* destroy: unlink cookie from MDS + struct llog_cookie o_lcookie; /* destroy: unlink cookie from MDS, + * obsolete in 2.8, reused in OSP */ __u32 o_uid_h; __u32 o_gid_h; @@ -3333,30 +3183,11 @@ struct ost_body { /* Key for FIEMAP to be used in get_info calls */ struct ll_fiemap_info_key { - char name[8]; - struct obdo oa; - struct ll_user_fiemap fiemap; + char lfik_name[8]; + struct obdo lfik_oa; + struct fiemap lfik_fiemap; }; -void lustre_swab_ost_body(struct ost_body *b); -void lustre_swab_ost_last_id(__u64 *id); -void lustre_swab_fiemap(struct ll_user_fiemap *fiemap); - -void lustre_swab_lov_user_md_v1(struct lov_user_md_v1 *lum); -void lustre_swab_lov_user_md_v3(struct lov_user_md_v3 *lum); -void lustre_swab_lov_user_md_objects(struct lov_user_ost_data *lod, - int stripe_count); -void lustre_swab_lov_mds_md(struct lov_mds_md *lmm); - -/* llog_swab.c */ -void lustre_swab_llogd_body(struct llogd_body *d); -void lustre_swab_llog_hdr(struct llog_log_hdr *h); -void lustre_swab_llogd_conn_body(struct llogd_conn_body *d); -void lustre_swab_llog_rec(struct llog_rec_hdr *rec); - -struct lustre_cfg; -void lustre_swab_lustre_cfg(struct lustre_cfg *lcfg); - /* Functions for dumping PTLRPC fields */ void dump_rniobuf(struct niobuf_remote *rnb); void dump_ioo(struct obd_ioobj *nb); @@ -3394,8 +3225,6 @@ struct lustre_capa { __u8 lc_hmac[CAPA_HMAC_MAX_LEN]; /** HMAC */ } __packed; -void lustre_swab_lustre_capa(struct lustre_capa *c); - /** lustre_capa::lc_opc */ enum { CAPA_OPC_BODY_WRITE = 1 << 0, /**< write object data */ @@ -3458,8 +3287,6 @@ struct getinfo_fid2path { char gf_path[0]; } __packed; -void lustre_swab_fid2path(struct getinfo_fid2path *gf); - /** path2parent request/reply structures */ struct getparent { struct lu_fid gp_fid; /**< parent FID */ @@ -3486,8 +3313,6 @@ struct layout_intent { __u64 li_end; }; -void lustre_swab_layout_intent(struct layout_intent *li); - /** * On the wire version of hsm_progress structure. * @@ -3506,13 +3331,6 @@ struct hsm_progress_kernel { __u64 hpk_padding2; } __packed; -void lustre_swab_hsm_user_state(struct hsm_user_state *hus); -void lustre_swab_hsm_current_action(struct hsm_current_action *action); -void lustre_swab_hsm_progress_kernel(struct hsm_progress_kernel *hpk); -void lustre_swab_hsm_user_state(struct hsm_user_state *hus); -void lustre_swab_hsm_user_item(struct hsm_user_item *hui); -void lustre_swab_hsm_request(struct hsm_request *hr); - /** layout swap request structure * fid1 and fid2 are in mdt_body */ @@ -3520,8 +3338,6 @@ struct mdc_swap_layouts { __u64 msl_flags; } __packed; -void lustre_swab_swap_layouts(struct mdc_swap_layouts *msl); - struct close_data { struct lustre_handle cd_handle; struct lu_fid cd_fid; @@ -3529,7 +3345,5 @@ struct close_data { __u64 cd_reserved[8]; }; -void lustre_swab_close_data(struct close_data *data); - #endif /** @} lustreidl */ diff --git a/drivers/staging/lustre/lustre/include/lustre/lustre_ioctl.h b/drivers/staging/lustre/lustre/include/lustre/lustre_ioctl.h index f3d7c94c3b50..eb08df33b2db 100644 --- a/drivers/staging/lustre/lustre/include/lustre/lustre_ioctl.h +++ b/drivers/staging/lustre/lustre/include/lustre/lustre_ioctl.h @@ -363,8 +363,8 @@ obd_ioctl_unpack(struct obd_ioctl_data *data, char *pbuf, int max_len) /* OBD_IOC_LOV_GETSTRIPE 155 LL_IOC_LOV_GETSTRIPE */ /* OBD_IOC_LOV_SETEA 156 LL_IOC_LOV_SETEA */ /* lustre/lustre_user.h 157-159 */ -#define OBD_IOC_QUOTACHECK _IOW('f', 160, int) -#define OBD_IOC_POLL_QUOTACHECK _IOR('f', 161, struct if_quotacheck *) +/* OBD_IOC_QUOTACHECK _IOW('f', 160, int) */ +/* OBD_IOC_POLL_QUOTACHECK _IOR('f', 161, struct if_quotacheck *) */ #define OBD_IOC_QUOTACTL _IOWR('f', 162, struct if_quotactl) /* lustre/lustre_user.h 163-176 */ #define OBD_IOC_CHANGELOG_REG _IOW('f', 177, struct obd_ioctl_data) diff --git a/drivers/staging/lustre/lustre/include/lustre/lustre_user.h b/drivers/staging/lustre/lustre/include/lustre/lustre_user.h index 6fc985571cba..3301ad652db1 100644 --- a/drivers/staging/lustre/lustre/include/lustre/lustre_user.h +++ b/drivers/staging/lustre/lustre/include/lustre/lustre_user.h @@ -63,9 +63,13 @@ #if __BITS_PER_LONG != 64 || defined(__ARCH_WANT_STAT64) typedef struct stat64 lstat_t; #define lstat_f lstat64 +#define fstat_f fstat64 +#define fstatat_f fstatat64 #else typedef struct stat lstat_t; #define lstat_f lstat +#define fstat_f fstat +#define fstatat_f fstatat #endif #define HAVE_LOV_USER_MDS_DATA @@ -82,7 +86,6 @@ typedef struct stat lstat_t; #define FSFILT_IOC_SETVERSION _IOW('f', 4, long) #define FSFILT_IOC_GETVERSION_OLD _IOR('v', 1, long) #define FSFILT_IOC_SETVERSION_OLD _IOW('v', 2, long) -#define FSFILT_IOC_FIEMAP _IOWR('f', 11, struct ll_user_fiemap) #endif /* FIEMAP flags supported by Lustre */ @@ -235,7 +238,7 @@ struct ost_id { /* #define LL_IOC_POLL_QUOTACHECK 161 OBD_IOC_POLL_QUOTACHECK */ /* #define LL_IOC_QUOTACTL 162 OBD_IOC_QUOTACTL */ #define IOC_OBD_STATFS _IOWR('f', 164, struct obd_statfs *) -#define IOC_LOV_GETINFO _IOWR('f', 165, struct lov_user_mds_data *) +/* IOC_LOV_GETINFO 165 obsolete */ #define LL_IOC_FLUSHCTX _IOW('f', 166, long) /* LL_IOC_RMTACL 167 obsolete */ #define LL_IOC_GETOBDCOUNT _IOR('f', 168, long) @@ -343,6 +346,9 @@ enum ll_lease_type { #define LOV_ALL_STRIPES 0xffff /* only valid for directories */ #define LOV_V1_INSANE_STRIPE_COUNT 65532 /* maximum stripe count bz13933 */ +#define XATTR_LUSTRE_PREFIX "lustre." +#define XATTR_LUSTRE_LOV "lustre.lov" + #define lov_user_ost_data lov_user_ost_data_v1 struct lov_user_ost_data_v1 { /* per-stripe data structure */ struct ost_id l_ost_oi; /* OST object ID */ @@ -451,8 +457,6 @@ static inline int lmv_user_md_size(int stripes, int lmm_magic) stripes * sizeof(struct lmv_user_mds_data); } -void lustre_swab_lmv_user_md(struct lmv_user_md *lum); - struct ll_recreate_obj { __u64 lrc_id; __u32 lrc_ost_idx; @@ -522,25 +526,20 @@ static inline void obd_uuid2fsname(char *buf, char *uuid, int buflen) } /* printf display format - * e.g. printf("file FID is "DFID"\n", PFID(fid)); + * * usage: printf("file FID is "DFID"\n", PFID(fid)); */ #define FID_NOBRACE_LEN 40 #define FID_LEN (FID_NOBRACE_LEN + 2) #define DFID_NOBRACE "%#llx:0x%x:0x%x" #define DFID "["DFID_NOBRACE"]" -#define PFID(fid) \ - (fid)->f_seq, \ - (fid)->f_oid, \ - (fid)->f_ver +#define PFID(fid) (unsigned long long)(fid)->f_seq, (fid)->f_oid, (fid)->f_ver -/* scanf input parse format -- strip '[' first. - * e.g. sscanf(fidstr, SFID, RFID(&fid)); +/* scanf input parse format for fids in DFID_NOBRACE format + * Need to strip '[' from DFID format first or use "["SFID"]" at caller. + * usage: sscanf(fidstr, SFID, RFID(&fid)); */ #define SFID "0x%llx:0x%x:0x%x" -#define RFID(fid) \ - &((fid)->f_seq), \ - &((fid)->f_oid), \ - &((fid)->f_ver) +#define RFID(fid) &((fid)->f_seq), &((fid)->f_oid), &((fid)->f_ver) /********* Quotas **********/ @@ -551,23 +550,18 @@ static inline void obd_uuid2fsname(char *buf, char *uuid, int buflen) #define Q_FINVALIDATE 0x800104 /* deprecated as of 2.4 */ /* these must be explicitly translated into linux Q_* in ll_dir_ioctl */ -#define LUSTRE_Q_QUOTAON 0x800002 /* turn quotas on */ -#define LUSTRE_Q_QUOTAOFF 0x800003 /* turn quotas off */ +#define LUSTRE_Q_QUOTAON 0x800002 /* deprecated as of 2.4 */ +#define LUSTRE_Q_QUOTAOFF 0x800003 /* deprecated as of 2.4 */ #define LUSTRE_Q_GETINFO 0x800005 /* get information about quota files */ #define LUSTRE_Q_SETINFO 0x800006 /* set information about quota files */ #define LUSTRE_Q_GETQUOTA 0x800007 /* get user quota structure */ #define LUSTRE_Q_SETQUOTA 0x800008 /* set user quota structure */ /* lustre-specific control commands */ -#define LUSTRE_Q_INVALIDATE 0x80000b /* invalidate quota data */ -#define LUSTRE_Q_FINVALIDATE 0x80000c /* invalidate filter quota data */ +#define LUSTRE_Q_INVALIDATE 0x80000b /* deprecated as of 2.4 */ +#define LUSTRE_Q_FINVALIDATE 0x80000c /* deprecated as of 2.4 */ #define UGQUOTA 2 /* set both USRQUOTA and GRPQUOTA */ -struct if_quotacheck { - char obd_type[16]; - struct obd_uuid obd_uuid; -}; - #define IDENTITY_DOWNCALL_MAGIC 0x6d6dd629 /* permission */ @@ -649,6 +643,7 @@ struct if_quotactl { #define SWAP_LAYOUTS_CHECK_DV2 (1 << 1) #define SWAP_LAYOUTS_KEEP_MTIME (1 << 2) #define SWAP_LAYOUTS_KEEP_ATIME (1 << 3) +#define SWAP_LAYOUTS_CLOSE BIT(4) /* Swap XATTR_NAME_HSM as well, only on the MDT so far */ #define SWAP_LAYOUTS_MDS_HSM (1 << 31) @@ -999,6 +994,7 @@ struct ioc_data_version { * See HSM_FLAGS below. */ enum hsm_states { + HS_NONE = 0x00000000, HS_EXISTS = 0x00000001, HS_DIRTY = 0x00000002, HS_RELEASED = 0x00000004, diff --git a/drivers/staging/lustre/lustre/include/lustre_compat.h b/drivers/staging/lustre/lustre/include/lustre_compat.h index 567c438e93cb..300e96fb032a 100644 --- a/drivers/staging/lustre/lustre/include/lustre_compat.h +++ b/drivers/staging/lustre/lustre/include/lustre_compat.h @@ -74,4 +74,6 @@ # define ext2_find_next_zero_bit find_next_zero_bit_le #endif +#define TIMES_SET_FLAGS (ATTR_MTIME_SET | ATTR_ATIME_SET | ATTR_TIMES_SET) + #endif /* _LUSTRE_COMPAT_H */ diff --git a/drivers/staging/lustre/lustre/include/lustre_dlm.h b/drivers/staging/lustre/lustre/include/lustre_dlm.h index d03534432624..b7e61d082e55 100644 --- a/drivers/staging/lustre/lustre/include/lustre_dlm.h +++ b/drivers/staging/lustre/lustre/include/lustre_dlm.h @@ -59,7 +59,7 @@ struct obd_device; #define OBD_LDLM_DEVICENAME "ldlm" #define LDLM_DEFAULT_LRU_SIZE (100 * num_online_cpus()) -#define LDLM_DEFAULT_MAX_ALIVE (cfs_time_seconds(36000)) +#define LDLM_DEFAULT_MAX_ALIVE (cfs_time_seconds(3900)) /* 65 min */ #define LDLM_DEFAULT_PARALLEL_AST_LIMIT 1024 /** @@ -86,10 +86,10 @@ enum ldlm_error { * decisions about lack of conflicts or do any autonomous lock granting without * first speaking to a server. */ -typedef enum { +enum ldlm_side { LDLM_NAMESPACE_SERVER = 1 << 0, LDLM_NAMESPACE_CLIENT = 1 << 1 -} ldlm_side_t; +}; /** * The blocking callback is overloaded to perform two functions. These flags @@ -359,7 +359,7 @@ struct ldlm_namespace { struct obd_device *ns_obd; /** Flag indicating if namespace is on client instead of server */ - ldlm_side_t ns_client; + enum ldlm_side ns_client; /** Resource hash table for namespace. */ struct cfs_hash *ns_rs_hash; @@ -550,20 +550,18 @@ struct ldlm_flock { __u64 owner; __u64 blocking_owner; struct obd_export *blocking_export; - /* Protected by the hash lock */ - __u32 blocking_refs; __u32 pid; }; -typedef union { +union ldlm_policy_data { struct ldlm_extent l_extent; struct ldlm_flock l_flock; struct ldlm_inodebits l_inodebits; -} ldlm_policy_data_t; +}; void ldlm_convert_policy_to_local(struct obd_export *exp, enum ldlm_type type, - const ldlm_wire_policy_data_t *wpolicy, - ldlm_policy_data_t *lpolicy); + const union ldlm_wire_policy_data *wpolicy, + union ldlm_policy_data *lpolicy); enum lvb_type { LVB_T_NONE = 0, @@ -692,7 +690,7 @@ struct ldlm_lock { * Representation of private data specific for a lock type. * Examples are: extent range for extent lock or bitmask for ibits locks */ - ldlm_policy_data_t l_policy_data; + union ldlm_policy_data l_policy_data; /** * Lock state flags. Protected by lr_lock. @@ -967,8 +965,8 @@ struct ldlm_ast_work { * Common ldlm_enqueue parameters */ struct ldlm_enqueue_info { - __u32 ei_type; /** Type of the lock being enqueued. */ - __u32 ei_mode; /** Mode of the lock being enqueued. */ + enum ldlm_type ei_type; /** Type of the lock being enqueued. */ + enum ldlm_mode ei_mode; /** Mode of the lock being enqueued. */ void *ei_cb_bl; /** blocking lock callback */ void *ei_cb_cp; /** lock completion callback */ void *ei_cb_gl; /** lock glimpse callback */ @@ -979,7 +977,7 @@ struct ldlm_enqueue_info { extern struct obd_ops ldlm_obd_ops; extern char *ldlm_lockname[]; -char *ldlm_it2str(int it); +const char *ldlm_it2str(enum ldlm_intent_flags it); /** * Just a fancy CDEBUG call with log level preset to LDLM_DEBUG. @@ -1168,16 +1166,18 @@ do { \ struct ldlm_lock *ldlm_lock_get(struct ldlm_lock *lock); void ldlm_lock_put(struct ldlm_lock *lock); void ldlm_lock2desc(struct ldlm_lock *lock, struct ldlm_lock_desc *desc); -void ldlm_lock_addref(const struct lustre_handle *lockh, __u32 mode); -int ldlm_lock_addref_try(const struct lustre_handle *lockh, __u32 mode); -void ldlm_lock_decref(const struct lustre_handle *lockh, __u32 mode); -void ldlm_lock_decref_and_cancel(const struct lustre_handle *lockh, __u32 mode); +void ldlm_lock_addref(const struct lustre_handle *lockh, enum ldlm_mode mode); +int ldlm_lock_addref_try(const struct lustre_handle *lockh, + enum ldlm_mode mode); +void ldlm_lock_decref(const struct lustre_handle *lockh, enum ldlm_mode mode); +void ldlm_lock_decref_and_cancel(const struct lustre_handle *lockh, + enum ldlm_mode mode); void ldlm_lock_fail_match_locked(struct ldlm_lock *lock); void ldlm_lock_allow_match(struct ldlm_lock *lock); void ldlm_lock_allow_match_locked(struct ldlm_lock *lock); enum ldlm_mode ldlm_lock_match(struct ldlm_namespace *ns, __u64 flags, const struct ldlm_res_id *, - enum ldlm_type type, ldlm_policy_data_t *, + enum ldlm_type type, union ldlm_policy_data *, enum ldlm_mode mode, struct lustre_handle *, int unref); enum ldlm_mode ldlm_revalidate_lock_handle(const struct lustre_handle *lockh, @@ -1189,7 +1189,7 @@ void ldlm_unlink_lock_skiplist(struct ldlm_lock *req); /* resource.c */ struct ldlm_namespace * ldlm_namespace_new(struct obd_device *obd, char *name, - ldlm_side_t client, enum ldlm_appetite apt, + enum ldlm_side client, enum ldlm_appetite apt, enum ldlm_ns_type ns_type); int ldlm_namespace_cleanup(struct ldlm_namespace *ns, __u64 flags); void ldlm_namespace_get(struct ldlm_namespace *ns); @@ -1208,7 +1208,7 @@ void ldlm_resource_add_lock(struct ldlm_resource *res, struct ldlm_lock *lock); void ldlm_resource_unlink_lock(struct ldlm_lock *lock); void ldlm_res2desc(struct ldlm_resource *res, struct ldlm_resource_desc *desc); -void ldlm_dump_all_namespaces(ldlm_side_t client, int level); +void ldlm_dump_all_namespaces(enum ldlm_side client, int level); void ldlm_namespace_dump(int level, struct ldlm_namespace *); void ldlm_resource_dump(int level, struct ldlm_resource *); int ldlm_lock_change_resource(struct ldlm_namespace *, struct ldlm_lock *, @@ -1241,7 +1241,7 @@ int ldlm_completion_ast(struct ldlm_lock *lock, __u64 flags, void *data); int ldlm_cli_enqueue(struct obd_export *exp, struct ptlrpc_request **reqp, struct ldlm_enqueue_info *einfo, const struct ldlm_res_id *res_id, - ldlm_policy_data_t const *policy, __u64 *flags, + union ldlm_policy_data const *policy, __u64 *flags, void *lvb, __u32 lvb_len, enum lvb_type lvb_type, struct lustre_handle *lockh, int async); int ldlm_prep_enqueue_req(struct obd_export *exp, @@ -1265,13 +1265,13 @@ int ldlm_cli_cancel_unused(struct ldlm_namespace *, const struct ldlm_res_id *, enum ldlm_cancel_flags flags, void *opaque); int ldlm_cli_cancel_unused_resource(struct ldlm_namespace *ns, const struct ldlm_res_id *res_id, - ldlm_policy_data_t *policy, + union ldlm_policy_data *policy, enum ldlm_mode mode, enum ldlm_cancel_flags flags, void *opaque); int ldlm_cancel_resource_local(struct ldlm_resource *res, struct list_head *cancels, - ldlm_policy_data_t *policy, + union ldlm_policy_data *policy, enum ldlm_mode mode, __u64 lock_flags, enum ldlm_cancel_flags cancel_flags, void *opaque); @@ -1333,7 +1333,7 @@ int ldlm_pools_init(void); void ldlm_pools_fini(void); int ldlm_pool_init(struct ldlm_pool *pl, struct ldlm_namespace *ns, - int idx, ldlm_side_t client); + int idx, enum ldlm_side client); void ldlm_pool_fini(struct ldlm_pool *pl); void ldlm_pool_add(struct ldlm_pool *pl, struct ldlm_lock *lock); void ldlm_pool_del(struct ldlm_pool *pl, struct ldlm_lock *lock); diff --git a/drivers/staging/lustre/lustre/include/lustre_fid.h b/drivers/staging/lustre/lustre/include/lustre_fid.h index 316780693193..b5a1aadbcb93 100644 --- a/drivers/staging/lustre/lustre/include/lustre_fid.h +++ b/drivers/staging/lustre/lustre/include/lustre_fid.h @@ -150,6 +150,7 @@ #include "../../include/linux/libcfs/libcfs.h" #include "lustre/lustre_idl.h" +#include "seq_range.h" struct lu_env; struct lu_site; diff --git a/drivers/staging/lustre/lustre/include/lustre_fld.h b/drivers/staging/lustre/lustre/include/lustre_fld.h index 932410d3e3cc..6ef1b03cb986 100644 --- a/drivers/staging/lustre/lustre/include/lustre_fld.h +++ b/drivers/staging/lustre/lustre/include/lustre_fld.h @@ -103,8 +103,6 @@ struct lu_client_fld { /** Client fld debugfs entry name. */ char lcf_name[LUSTRE_MDT_MAXNAMELEN]; - - int lcf_flags; }; /* Client methods */ diff --git a/drivers/staging/lustre/lustre/include/lustre_ha.h b/drivers/staging/lustre/lustre/include/lustre_ha.h index cde7ed702c86..dec1e99d594d 100644 --- a/drivers/staging/lustre/lustre/include/lustre_ha.h +++ b/drivers/staging/lustre/lustre/include/lustre_ha.h @@ -53,6 +53,7 @@ void ptlrpc_activate_import(struct obd_import *imp); void ptlrpc_deactivate_import(struct obd_import *imp); void ptlrpc_invalidate_import(struct obd_import *imp); void ptlrpc_fail_import(struct obd_import *imp, __u32 conn_cnt); +void ptlrpc_pinger_force(struct obd_import *imp); /** @} ha */ diff --git a/drivers/staging/lustre/lustre/include/lustre_import.h b/drivers/staging/lustre/lustre/include/lustre_import.h index 5461ba33d90c..f0c931ce1a67 100644 --- a/drivers/staging/lustre/lustre/include/lustre_import.h +++ b/drivers/staging/lustre/lustre/include/lustre_import.h @@ -185,6 +185,11 @@ struct obd_import { struct list_head *imp_replay_cursor; /** @} */ + /** List of not replied requests */ + struct list_head imp_unreplied_list; + /** Known maximal replied XID */ + __u64 imp_known_replied_xid; + /** obd device for this import */ struct obd_device *imp_obd; @@ -294,7 +299,9 @@ struct obd_import { */ imp_force_reconnect:1, /* import has tried to connect with server */ - imp_connect_tried:1; + imp_connect_tried:1, + /* connected but not FULL yet */ + imp_connected:1; __u32 imp_connect_op; struct obd_connect_data imp_connect_data; __u64 imp_connect_flags_orig; diff --git a/drivers/staging/lustre/lustre/include/lustre_lib.h b/drivers/staging/lustre/lustre/include/lustre_lib.h index 6b231913ba2e..27f3148c4344 100644 --- a/drivers/staging/lustre/lustre/include/lustre_lib.h +++ b/drivers/staging/lustre/lustre/include/lustre_lib.h @@ -350,8 +350,6 @@ do { \ l_wait_event_exclusive_head(wq, condition, &lwi); \ }) -#define LIBLUSTRE_CLIENT (0) - /** @} lib */ #endif /* _LUSTRE_LIB_H */ diff --git a/drivers/staging/lustre/lustre/include/lustre_lmv.h b/drivers/staging/lustre/lustre/include/lustre_lmv.h index d7f7afa8dfa7..5aa3645e64dc 100644 --- a/drivers/staging/lustre/lustre/include/lustre_lmv.h +++ b/drivers/staging/lustre/lustre/include/lustre_lmv.h @@ -76,18 +76,7 @@ lsm_md_eq(const struct lmv_stripe_md *lsm1, const struct lmv_stripe_md *lsm2) union lmv_mds_md; -int lmv_unpack_md(struct obd_export *exp, struct lmv_stripe_md **lsmp, - const union lmv_mds_md *lmm, int stripe_count); - -static inline int lmv_alloc_memmd(struct lmv_stripe_md **lsmp, int stripe_count) -{ - return lmv_unpack_md(NULL, lsmp, NULL, stripe_count); -} - -static inline void lmv_free_memmd(struct lmv_stripe_md *lsm) -{ - lmv_unpack_md(NULL, &lsm, NULL, 0); -} +void lmv_free_memmd(struct lmv_stripe_md *lsm); static inline void lmv1_le_to_cpu(struct lmv_mds_md_v1 *lmv_dst, const struct lmv_mds_md_v1 *lmv_src) diff --git a/drivers/staging/lustre/lustre/include/lustre_log.h b/drivers/staging/lustre/lustre/include/lustre_log.h index 995b266932e3..35e37eb1bc2c 100644 --- a/drivers/staging/lustre/lustre/include/lustre_log.h +++ b/drivers/staging/lustre/lustre/include/lustre_log.h @@ -214,6 +214,7 @@ struct llog_handle { spinlock_t lgh_hdr_lock; /* protect lgh_hdr data */ struct llog_logid lgh_id; /* id of this log */ struct llog_log_hdr *lgh_hdr; + size_t lgh_hdr_size; int lgh_last_idx; int lgh_cur_idx; /* used during llog_process */ __u64 lgh_cur_offset; /* used during llog_process */ @@ -244,6 +245,11 @@ struct llog_ctxt { struct mutex loc_mutex; /* protect loc_imp */ atomic_t loc_refcount; long loc_flags; /* flags, see above defines */ + /* + * llog chunk size, and llog record size can not be bigger than + * loc_chunk_size + */ + __u32 loc_chunk_size; }; #define LLOG_PROC_BREAK 0x0001 diff --git a/drivers/staging/lustre/lustre/include/lustre_mdc.h b/drivers/staging/lustre/lustre/include/lustre_mdc.h index 8fc2d3f2dfd6..198ceb0c66f9 100644 --- a/drivers/staging/lustre/lustre/include/lustre_mdc.h +++ b/drivers/staging/lustre/lustre/include/lustre_mdc.h @@ -156,16 +156,39 @@ static inline void mdc_put_rpc_lock(struct mdc_rpc_lock *lck, mutex_unlock(&lck->rpcl_mutex); } +static inline void mdc_get_mod_rpc_slot(struct ptlrpc_request *req, + struct lookup_intent *it) +{ + struct client_obd *cli = &req->rq_import->imp_obd->u.cli; + u32 opc; + u16 tag; + + opc = lustre_msg_get_opc(req->rq_reqmsg); + tag = obd_get_mod_rpc_slot(cli, opc, it); + lustre_msg_set_tag(req->rq_reqmsg, tag); +} + +static inline void mdc_put_mod_rpc_slot(struct ptlrpc_request *req, + struct lookup_intent *it) +{ + struct client_obd *cli = &req->rq_import->imp_obd->u.cli; + u32 opc; + u16 tag; + + opc = lustre_msg_get_opc(req->rq_reqmsg); + tag = lustre_msg_get_tag(req->rq_reqmsg); + obd_put_mod_rpc_slot(cli, opc, it, tag); +} + /** - * Update the maximum possible easize and cookiesize. + * Update the maximum possible easize. * - * The values are learned from ptlrpc replies sent by the MDT. The - * default easize and cookiesize is initialized to the minimum value but - * allowed to grow up to a single page in size if required to handle the + * This value is learned from ptlrpc replies sent by the MDT. The + * default easize is initialized to the minimum value but allowed + * to grow up to a single page in size if required to handle the * common case. * - * \see client_obd::cl_default_mds_easize and - * client_obd::cl_default_mds_cookiesize + * \see client_obd::cl_default_mds_easize * * \param[in] exp export for MDC device * \param[in] body body of ptlrpc reply from MDT @@ -176,7 +199,7 @@ static inline void mdc_update_max_ea_from_body(struct obd_export *exp, { if (body->mbo_valid & OBD_MD_FLMODEASIZE) { struct client_obd *cli = &exp->exp_obd->u.cli; - u32 def_cookiesize, def_easize; + u32 def_easize; if (cli->cl_max_mds_easize < body->mbo_max_mdsize) cli->cl_max_mds_easize = body->mbo_max_mdsize; @@ -184,13 +207,6 @@ static inline void mdc_update_max_ea_from_body(struct obd_export *exp, def_easize = min_t(__u32, body->mbo_max_mdsize, OBD_MAX_DEFAULT_EA_SIZE); cli->cl_default_mds_easize = def_easize; - - if (cli->cl_max_mds_cookiesize < body->mbo_max_cookiesize) - cli->cl_max_mds_cookiesize = body->mbo_max_cookiesize; - - def_cookiesize = min_t(__u32, body->mbo_max_cookiesize, - OBD_MAX_DEFAULT_COOKIE_SIZE); - cli->cl_default_mds_cookiesize = def_cookiesize; } } diff --git a/drivers/staging/lustre/lustre/include/lustre_net.h b/drivers/staging/lustre/lustre/include/lustre_net.h index e9aba99ee52a..411eb0dc7f38 100644 --- a/drivers/staging/lustre/lustre/include/lustre_net.h +++ b/drivers/staging/lustre/lustre/include/lustre_net.h @@ -50,6 +50,7 @@ * @{ */ +#include <linux/uio.h> #include "../../include/linux/libcfs/libcfs.h" #include "../../include/linux/lnet/nidstr.h" #include "../../include/linux/lnet/api.h" @@ -68,13 +69,17 @@ #define PTLRPC_MD_OPTIONS 0 /** - * Max # of bulk operations in one request. + * log2 max # of bulk operations in one request: 2=4MB/RPC, 5=32MB/RPC, ... * In order for the client and server to properly negotiate the maximum * possible transfer size, PTLRPC_BULK_OPS_COUNT must be a power-of-two * value. The client is free to limit the actual RPC size for any bulk * transfer via cl_max_pages_per_rpc to some non-power-of-two value. + * NOTE: This is limited to 16 (=64GB RPCs) by IOOBJ_MAX_BRW_BITS. */ -#define PTLRPC_BULK_OPS_BITS 2 +#define PTLRPC_BULK_OPS_BITS 4 +#if PTLRPC_BULK_OPS_BITS > 16 +#error "More than 65536 BRW RPCs not allowed by IOOBJ_MAX_BRW_BITS." +#endif #define PTLRPC_BULK_OPS_COUNT (1U << PTLRPC_BULK_OPS_BITS) /** * PTLRPC_BULK_OPS_MASK is for the convenience of the client only, and @@ -437,6 +442,10 @@ struct ptlrpc_reply_state { unsigned long rs_committed:1;/* the transaction was committed * and the rs was dispatched */ + atomic_t rs_refcount; /* number of users */ + /** Number of locks awaiting client ACK */ + int rs_nlocks; + /** Size of the state */ int rs_size; /** opcode */ @@ -449,7 +458,6 @@ struct ptlrpc_reply_state { struct ptlrpc_service_part *rs_svcpt; /** Lnet metadata handle for the reply */ lnet_handle_md_t rs_md_h; - atomic_t rs_refcount; /** Context for the service thread */ struct ptlrpc_svc_ctx *rs_svc_ctx; @@ -466,8 +474,6 @@ struct ptlrpc_reply_state { */ struct lustre_msg *rs_msg; /* reply message */ - /** Number of locks awaiting client ACK */ - int rs_nlocks; /** Handles of locks awaiting client reply ACK */ struct lustre_handle rs_locks[RS_MAX_LOCKS]; /** Lock modes of locks in \a rs_locks */ @@ -515,717 +521,7 @@ struct lu_env; struct ldlm_lock; -/** - * \defgroup nrs Network Request Scheduler - * @{ - */ -struct ptlrpc_nrs_policy; -struct ptlrpc_nrs_resource; -struct ptlrpc_nrs_request; - -/** - * NRS control operations. - * - * These are common for all policies. - */ -enum ptlrpc_nrs_ctl { - /** - * Not a valid opcode. - */ - PTLRPC_NRS_CTL_INVALID, - /** - * Activate the policy. - */ - PTLRPC_NRS_CTL_START, - /** - * Reserved for multiple primary policies, which may be a possibility - * in the future. - */ - PTLRPC_NRS_CTL_STOP, - /** - * Policies can start using opcodes from this value and onwards for - * their own purposes; the assigned value itself is arbitrary. - */ - PTLRPC_NRS_CTL_1ST_POL_SPEC = 0x20, -}; - -/** - * ORR policy operations - */ -enum nrs_ctl_orr { - NRS_CTL_ORR_RD_QUANTUM = PTLRPC_NRS_CTL_1ST_POL_SPEC, - NRS_CTL_ORR_WR_QUANTUM, - NRS_CTL_ORR_RD_OFF_TYPE, - NRS_CTL_ORR_WR_OFF_TYPE, - NRS_CTL_ORR_RD_SUPP_REQ, - NRS_CTL_ORR_WR_SUPP_REQ, -}; - -/** - * NRS policy operations. - * - * These determine the behaviour of a policy, and are called in response to - * NRS core events. - */ -struct ptlrpc_nrs_pol_ops { - /** - * Called during policy registration; this operation is optional. - * - * \param[in,out] policy The policy being initialized - */ - int (*op_policy_init)(struct ptlrpc_nrs_policy *policy); - /** - * Called during policy unregistration; this operation is optional. - * - * \param[in,out] policy The policy being unregistered/finalized - */ - void (*op_policy_fini)(struct ptlrpc_nrs_policy *policy); - /** - * Called when activating a policy via lprocfs; policies allocate and - * initialize their resources here; this operation is optional. - * - * \param[in,out] policy The policy being started - * - * \see nrs_policy_start_locked() - */ - int (*op_policy_start)(struct ptlrpc_nrs_policy *policy); - /** - * Called when deactivating a policy via lprocfs; policies deallocate - * their resources here; this operation is optional - * - * \param[in,out] policy The policy being stopped - * - * \see nrs_policy_stop0() - */ - void (*op_policy_stop)(struct ptlrpc_nrs_policy *policy); - /** - * Used for policy-specific operations; i.e. not generic ones like - * \e PTLRPC_NRS_CTL_START and \e PTLRPC_NRS_CTL_GET_INFO; analogous - * to an ioctl; this operation is optional. - * - * \param[in,out] policy The policy carrying out operation \a opc - * \param[in] opc The command operation being carried out - * \param[in,out] arg An generic buffer for communication between the - * user and the control operation - * - * \retval -ve error - * \retval 0 success - * - * \see ptlrpc_nrs_policy_control() - */ - int (*op_policy_ctl)(struct ptlrpc_nrs_policy *policy, - enum ptlrpc_nrs_ctl opc, void *arg); - - /** - * Called when obtaining references to the resources of the resource - * hierarchy for a request that has arrived for handling at the PTLRPC - * service. Policies should return -ve for requests they do not wish - * to handle. This operation is mandatory. - * - * \param[in,out] policy The policy we're getting resources for. - * \param[in,out] nrq The request we are getting resources for. - * \param[in] parent The parent resource of the resource being - * requested; set to NULL if none. - * \param[out] resp The resource is to be returned here; the - * fallback policy in an NRS head should - * \e always return a non-NULL pointer value. - * \param[in] moving_req When set, signifies that this is an attempt - * to obtain resources for a request being moved - * to the high-priority NRS head by - * ldlm_lock_reorder_req(). - * This implies two things: - * 1. We are under obd_export::exp_rpc_lock and - * so should not sleep. - * 2. We should not perform non-idempotent or can - * skip performing idempotent operations that - * were carried out when resources were first - * taken for the request when it was initialized - * in ptlrpc_nrs_req_initialize(). - * - * \retval 0, +ve The level of the returned resource in the resource - * hierarchy; currently only 0 (for a non-leaf resource) - * and 1 (for a leaf resource) are supported by the - * framework. - * \retval -ve error - * - * \see ptlrpc_nrs_req_initialize() - * \see ptlrpc_nrs_hpreq_add_nolock() - */ - int (*op_res_get)(struct ptlrpc_nrs_policy *policy, - struct ptlrpc_nrs_request *nrq, - const struct ptlrpc_nrs_resource *parent, - struct ptlrpc_nrs_resource **resp, - bool moving_req); - /** - * Called when releasing references taken for resources in the resource - * hierarchy for the request; this operation is optional. - * - * \param[in,out] policy The policy the resource belongs to - * \param[in] res The resource to be freed - * - * \see ptlrpc_nrs_req_finalize() - * \see ptlrpc_nrs_hpreq_add_nolock() - */ - void (*op_res_put)(struct ptlrpc_nrs_policy *policy, - const struct ptlrpc_nrs_resource *res); - - /** - * Obtains a request for handling from the policy, and optionally - * removes the request from the policy; this operation is mandatory. - * - * \param[in,out] policy The policy to poll - * \param[in] peek When set, signifies that we just want to - * examine the request, and not handle it, so the - * request is not removed from the policy. - * \param[in] force When set, it will force a policy to return a - * request if it has one queued. - * - * \retval NULL No request available for handling - * \retval valid-pointer The request polled for handling - * - * \see ptlrpc_nrs_req_get_nolock() - */ - struct ptlrpc_nrs_request * - (*op_req_get)(struct ptlrpc_nrs_policy *policy, bool peek, - bool force); - /** - * Called when attempting to add a request to a policy for later - * handling; this operation is mandatory. - * - * \param[in,out] policy The policy on which to enqueue \a nrq - * \param[in,out] nrq The request to enqueue - * - * \retval 0 success - * \retval != 0 error - * - * \see ptlrpc_nrs_req_add_nolock() - */ - int (*op_req_enqueue)(struct ptlrpc_nrs_policy *policy, - struct ptlrpc_nrs_request *nrq); - /** - * Removes a request from the policy's set of pending requests. Normally - * called after a request has been polled successfully from the policy - * for handling; this operation is mandatory. - * - * \param[in,out] policy The policy the request \a nrq belongs to - * \param[in,out] nrq The request to dequeue - */ - void (*op_req_dequeue)(struct ptlrpc_nrs_policy *policy, - struct ptlrpc_nrs_request *nrq); - /** - * Called after the request being carried out. Could be used for - * job/resource control; this operation is optional. - * - * \param[in,out] policy The policy which is stopping to handle request - * \a nrq - * \param[in,out] nrq The request - * - * \pre assert_spin_locked(&svcpt->scp_req_lock) - * - * \see ptlrpc_nrs_req_stop_nolock() - */ - void (*op_req_stop)(struct ptlrpc_nrs_policy *policy, - struct ptlrpc_nrs_request *nrq); - /** - * Registers the policy's lprocfs interface with a PTLRPC service. - * - * \param[in] svc The service - * - * \retval 0 success - * \retval != 0 error - */ - int (*op_lprocfs_init)(struct ptlrpc_service *svc); - /** - * Unegisters the policy's lprocfs interface with a PTLRPC service. - * - * In cases of failed policy registration in - * \e ptlrpc_nrs_policy_register(), this function may be called for a - * service which has not registered the policy successfully, so - * implementations of this method should make sure their operations are - * safe in such cases. - * - * \param[in] svc The service - */ - void (*op_lprocfs_fini)(struct ptlrpc_service *svc); -}; - -/** - * Policy flags - */ -enum nrs_policy_flags { - /** - * Fallback policy, use this flag only on a single supported policy per - * service. The flag cannot be used on policies that use - * \e PTLRPC_NRS_FL_REG_EXTERN - */ - PTLRPC_NRS_FL_FALLBACK = (1 << 0), - /** - * Start policy immediately after registering. - */ - PTLRPC_NRS_FL_REG_START = (1 << 1), - /** - * This is a policy registering from a module different to the one NRS - * core ships in (currently ptlrpc). - */ - PTLRPC_NRS_FL_REG_EXTERN = (1 << 2), -}; - -/** - * NRS queue type. - * - * Denotes whether an NRS instance is for handling normal or high-priority - * RPCs, or whether an operation pertains to one or both of the NRS instances - * in a service. - */ -enum ptlrpc_nrs_queue_type { - PTLRPC_NRS_QUEUE_REG = (1 << 0), - PTLRPC_NRS_QUEUE_HP = (1 << 1), - PTLRPC_NRS_QUEUE_BOTH = (PTLRPC_NRS_QUEUE_REG | PTLRPC_NRS_QUEUE_HP) -}; - -/** - * NRS head - * - * A PTLRPC service has at least one NRS head instance for handling normal - * priority RPCs, and may optionally have a second NRS head instance for - * handling high-priority RPCs. Each NRS head maintains a list of available - * policies, of which one and only one policy is acting as the fallback policy, - * and optionally a different policy may be acting as the primary policy. For - * all RPCs handled by this NRS head instance, NRS core will first attempt to - * enqueue the RPC using the primary policy (if any). The fallback policy is - * used in the following cases: - * - when there was no primary policy in the - * ptlrpc_nrs_pol_state::NRS_POL_STATE_STARTED state at the time the request - * was initialized. - * - when the primary policy that was at the - * ptlrpc_nrs_pol_state::PTLRPC_NRS_POL_STATE_STARTED state at the time the - * RPC was initialized, denoted it did not wish, or for some other reason was - * not able to handle the request, by returning a non-valid NRS resource - * reference. - * - when the primary policy that was at the - * ptlrpc_nrs_pol_state::PTLRPC_NRS_POL_STATE_STARTED state at the time the - * RPC was initialized, fails later during the request enqueueing stage. - * - * \see nrs_resource_get_safe() - * \see nrs_request_enqueue() - */ -struct ptlrpc_nrs { - spinlock_t nrs_lock; - /** XXX Possibly replace svcpt->scp_req_lock with another lock here. */ - /** - * List of registered policies - */ - struct list_head nrs_policy_list; - /** - * List of policies with queued requests. Policies that have any - * outstanding requests are queued here, and this list is queried - * in a round-robin manner from NRS core when obtaining a request - * for handling. This ensures that requests from policies that at some - * point transition away from the - * ptlrpc_nrs_pol_state::NRS_POL_STATE_STARTED state are drained. - */ - struct list_head nrs_policy_queued; - /** - * Service partition for this NRS head - */ - struct ptlrpc_service_part *nrs_svcpt; - /** - * Primary policy, which is the preferred policy for handling RPCs - */ - struct ptlrpc_nrs_policy *nrs_policy_primary; - /** - * Fallback policy, which is the backup policy for handling RPCs - */ - struct ptlrpc_nrs_policy *nrs_policy_fallback; - /** - * This NRS head handles either HP or regular requests - */ - enum ptlrpc_nrs_queue_type nrs_queue_type; - /** - * # queued requests from all policies in this NRS head - */ - unsigned long nrs_req_queued; - /** - * # scheduled requests from all policies in this NRS head - */ - unsigned long nrs_req_started; - /** - * # policies on this NRS - */ - unsigned nrs_num_pols; - /** - * This NRS head is in progress of starting a policy - */ - unsigned nrs_policy_starting:1; - /** - * In progress of shutting down the whole NRS head; used during - * unregistration - */ - unsigned nrs_stopping:1; -}; - -#define NRS_POL_NAME_MAX 16 - -struct ptlrpc_nrs_pol_desc; - -/** - * Service compatibility predicate; this determines whether a policy is adequate - * for handling RPCs of a particular PTLRPC service. - * - * XXX:This should give the same result during policy registration and - * unregistration, and for all partitions of a service; so the result should not - * depend on temporal service or other properties, that may influence the - * result. - */ -typedef bool (*nrs_pol_desc_compat_t) (const struct ptlrpc_service *svc, - const struct ptlrpc_nrs_pol_desc *desc); - -struct ptlrpc_nrs_pol_conf { - /** - * Human-readable policy name - */ - char nc_name[NRS_POL_NAME_MAX]; - /** - * NRS operations for this policy - */ - const struct ptlrpc_nrs_pol_ops *nc_ops; - /** - * Service compatibility predicate - */ - nrs_pol_desc_compat_t nc_compat; - /** - * Set for policies that support a single ptlrpc service, i.e. ones that - * have \a pd_compat set to nrs_policy_compat_one(). The variable value - * depicts the name of the single service that such policies are - * compatible with. - */ - const char *nc_compat_svc_name; - /** - * Owner module for this policy descriptor; policies registering from a - * different module to the one the NRS framework is held within - * (currently ptlrpc), should set this field to THIS_MODULE. - */ - struct module *nc_owner; - /** - * Policy registration flags; a bitmask of \e nrs_policy_flags - */ - unsigned nc_flags; -}; - -/** - * NRS policy registering descriptor - * - * Is used to hold a description of a policy that can be passed to NRS core in - * order to register the policy with NRS heads in different PTLRPC services. - */ -struct ptlrpc_nrs_pol_desc { - /** - * Human-readable policy name - */ - char pd_name[NRS_POL_NAME_MAX]; - /** - * Link into nrs_core::nrs_policies - */ - struct list_head pd_list; - /** - * NRS operations for this policy - */ - const struct ptlrpc_nrs_pol_ops *pd_ops; - /** - * Service compatibility predicate - */ - nrs_pol_desc_compat_t pd_compat; - /** - * Set for policies that are compatible with only one PTLRPC service. - * - * \see ptlrpc_nrs_pol_conf::nc_compat_svc_name - */ - const char *pd_compat_svc_name; - /** - * Owner module for this policy descriptor. - * - * We need to hold a reference to the module whenever we might make use - * of any of the module's contents, i.e. - * - If one or more instances of the policy are at a state where they - * might be handling a request, i.e. - * ptlrpc_nrs_pol_state::NRS_POL_STATE_STARTED or - * ptlrpc_nrs_pol_state::NRS_POL_STATE_STOPPING as we will have to - * call into the policy's ptlrpc_nrs_pol_ops() handlers. A reference - * is taken on the module when - * \e ptlrpc_nrs_pol_desc::pd_refs becomes 1, and released when it - * becomes 0, so that we hold only one reference to the module maximum - * at any time. - * - * We do not need to hold a reference to the module, even though we - * might use code and data from the module, in the following cases: - * - During external policy registration, because this should happen in - * the module's init() function, in which case the module is safe from - * removal because a reference is being held on the module by the - * kernel, and iirc kmod (and I guess module-init-tools also) will - * serialize any racing processes properly anyway. - * - During external policy unregistration, because this should happen - * in a module's exit() function, and any attempts to start a policy - * instance would need to take a reference on the module, and this is - * not possible once we have reached the point where the exit() - * handler is called. - * - During service registration and unregistration, as service setup - * and cleanup, and policy registration, unregistration and policy - * instance starting, are serialized by \e nrs_core::nrs_mutex, so - * as long as users adhere to the convention of registering policies - * in init() and unregistering them in module exit() functions, there - * should not be a race between these operations. - * - During any policy-specific lprocfs operations, because a reference - * is held by the kernel on a proc entry that has been entered by a - * syscall, so as long as proc entries are removed during unregistration time, - * then unregistration and lprocfs operations will be properly - * serialized. - */ - struct module *pd_owner; - /** - * Bitmask of \e nrs_policy_flags - */ - unsigned pd_flags; - /** - * # of references on this descriptor - */ - atomic_t pd_refs; -}; - -/** - * NRS policy state - * - * Policies transition from one state to the other during their lifetime - */ -enum ptlrpc_nrs_pol_state { - /** - * Not a valid policy state. - */ - NRS_POL_STATE_INVALID, - /** - * Policies are at this state either at the start of their life, or - * transition here when the user selects a different policy to act - * as the primary one. - */ - NRS_POL_STATE_STOPPED, - /** - * Policy is progress of stopping - */ - NRS_POL_STATE_STOPPING, - /** - * Policy is in progress of starting - */ - NRS_POL_STATE_STARTING, - /** - * A policy is in this state in two cases: - * - it is the fallback policy, which is always in this state. - * - it has been activated by the user; i.e. it is the primary policy, - */ - NRS_POL_STATE_STARTED, -}; - -/** - * NRS policy information - * - * Used for obtaining information for the status of a policy via lprocfs - */ -struct ptlrpc_nrs_pol_info { - /** - * Policy name - */ - char pi_name[NRS_POL_NAME_MAX]; - /** - * Current policy state - */ - enum ptlrpc_nrs_pol_state pi_state; - /** - * # RPCs enqueued for later dispatching by the policy - */ - long pi_req_queued; - /** - * # RPCs started for dispatch by the policy - */ - long pi_req_started; - /** - * Is this a fallback policy? - */ - unsigned pi_fallback:1; -}; - -/** - * NRS policy - * - * There is one instance of this for each policy in each NRS head of each - * PTLRPC service partition. - */ -struct ptlrpc_nrs_policy { - /** - * Linkage into the NRS head's list of policies, - * ptlrpc_nrs:nrs_policy_list - */ - struct list_head pol_list; - /** - * Linkage into the NRS head's list of policies with enqueued - * requests ptlrpc_nrs:nrs_policy_queued - */ - struct list_head pol_list_queued; - /** - * Current state of this policy - */ - enum ptlrpc_nrs_pol_state pol_state; - /** - * Bitmask of nrs_policy_flags - */ - unsigned pol_flags; - /** - * # RPCs enqueued for later dispatching by the policy - */ - long pol_req_queued; - /** - * # RPCs started for dispatch by the policy - */ - long pol_req_started; - /** - * Usage Reference count taken on the policy instance - */ - long pol_ref; - /** - * The NRS head this policy has been created at - */ - struct ptlrpc_nrs *pol_nrs; - /** - * Private policy data; varies by policy type - */ - void *pol_private; - /** - * Policy descriptor for this policy instance. - */ - struct ptlrpc_nrs_pol_desc *pol_desc; -}; - -/** - * NRS resource - * - * Resources are embedded into two types of NRS entities: - * - Inside NRS policies, in the policy's private data in - * ptlrpc_nrs_policy::pol_private - * - In objects that act as prime-level scheduling entities in different NRS - * policies; e.g. on a policy that performs round robin or similar order - * scheduling across client NIDs, there would be one NRS resource per unique - * client NID. On a policy which performs round robin scheduling across - * backend filesystem objects, there would be one resource associated with - * each of the backend filesystem objects partaking in the scheduling - * performed by the policy. - * - * NRS resources share a parent-child relationship, in which resources embedded - * in policy instances are the parent entities, with all scheduling entities - * a policy schedules across being the children, thus forming a simple resource - * hierarchy. This hierarchy may be extended with one or more levels in the - * future if the ability to have more than one primary policy is added. - * - * Upon request initialization, references to the then active NRS policies are - * taken and used to later handle the dispatching of the request with one of - * these policies. - * - * \see nrs_resource_get_safe() - * \see ptlrpc_nrs_req_add() - */ -struct ptlrpc_nrs_resource { - /** - * This NRS resource's parent; is NULL for resources embedded in NRS - * policy instances; i.e. those are top-level ones. - */ - struct ptlrpc_nrs_resource *res_parent; - /** - * The policy associated with this resource. - */ - struct ptlrpc_nrs_policy *res_policy; -}; - -enum { - NRS_RES_FALLBACK, - NRS_RES_PRIMARY, - NRS_RES_MAX -}; - -/* \name fifo - * - * FIFO policy - * - * This policy is a logical wrapper around previous, non-NRS functionality. - * It dispatches RPCs in the same order as they arrive from the network. This - * policy is currently used as the fallback policy, and the only enabled policy - * on all NRS heads of all PTLRPC service partitions. - * @{ - */ - -/** - * Private data structure for the FIFO policy - */ -struct nrs_fifo_head { - /** - * Resource object for policy instance. - */ - struct ptlrpc_nrs_resource fh_res; - /** - * List of queued requests. - */ - struct list_head fh_list; - /** - * For debugging purposes. - */ - __u64 fh_sequence; -}; - -struct nrs_fifo_req { - struct list_head fr_list; - __u64 fr_sequence; -}; - -/** @} fifo */ - -/** - * NRS request - * - * Instances of this object exist embedded within ptlrpc_request; the main - * purpose of this object is to hold references to the request's resources - * for the lifetime of the request, and to hold properties that policies use - * use for determining the request's scheduling priority. - */ -struct ptlrpc_nrs_request { - /** - * The request's resource hierarchy. - */ - struct ptlrpc_nrs_resource *nr_res_ptrs[NRS_RES_MAX]; - /** - * Index into ptlrpc_nrs_request::nr_res_ptrs of the resource of the - * policy that was used to enqueue the request. - * - * \see nrs_request_enqueue() - */ - unsigned nr_res_idx; - unsigned nr_initialized:1; - unsigned nr_enqueued:1; - unsigned nr_started:1; - unsigned nr_finalized:1; - - /** - * Policy-specific fields, used for determining a request's scheduling - * priority, and other supporting functionality. - */ - union { - /** - * Fields for the FIFO policy - */ - struct nrs_fifo_req fifo; - } nr_u; - /** - * Externally-registering policies may want to use this to allocate - * their own request properties. - */ - void *ext; -}; - -/** @} nrs */ +#include "lustre_nrs.h" /** * Basic request prioritization operations structure. @@ -1304,6 +600,8 @@ struct ptlrpc_cli_req { union ptlrpc_async_args cr_async_args; /** Opaq data for replay and commit callbacks. */ void *cr_cb_data; + /** Link to the imp->imp_unreplied_list */ + struct list_head cr_unreplied_list; /** * Commit callback, called when request is committed and about to be * freed. @@ -1343,6 +641,7 @@ struct ptlrpc_cli_req { #define rq_interpret_reply rq_cli.cr_reply_interp #define rq_async_args rq_cli.cr_async_args #define rq_cb_data rq_cli.cr_cb_data +#define rq_unreplied_list rq_cli.cr_unreplied_list #define rq_commit_cb rq_cli.cr_commit_cb #define rq_replay_cb rq_cli.cr_replay_cb @@ -1505,6 +804,8 @@ struct ptlrpc_request { __u64 rq_transno; /** xid */ __u64 rq_xid; + /** bulk match bits */ + u64 rq_mbits; /** * List item to for replay list. Not yet committed requests get linked * there. @@ -1793,10 +1094,93 @@ struct ptlrpc_bulk_page { struct page *bp_page; }; -#define BULK_GET_SOURCE 0 -#define BULK_PUT_SINK 1 -#define BULK_GET_SINK 2 -#define BULK_PUT_SOURCE 3 +enum ptlrpc_bulk_op_type { + PTLRPC_BULK_OP_ACTIVE = 0x00000001, + PTLRPC_BULK_OP_PASSIVE = 0x00000002, + PTLRPC_BULK_OP_PUT = 0x00000004, + PTLRPC_BULK_OP_GET = 0x00000008, + PTLRPC_BULK_BUF_KVEC = 0x00000010, + PTLRPC_BULK_BUF_KIOV = 0x00000020, + PTLRPC_BULK_GET_SOURCE = PTLRPC_BULK_OP_PASSIVE | PTLRPC_BULK_OP_GET, + PTLRPC_BULK_PUT_SINK = PTLRPC_BULK_OP_PASSIVE | PTLRPC_BULK_OP_PUT, + PTLRPC_BULK_GET_SINK = PTLRPC_BULK_OP_ACTIVE | PTLRPC_BULK_OP_GET, + PTLRPC_BULK_PUT_SOURCE = PTLRPC_BULK_OP_ACTIVE | PTLRPC_BULK_OP_PUT, +}; + +static inline bool ptlrpc_is_bulk_op_get(enum ptlrpc_bulk_op_type type) +{ + return (type & PTLRPC_BULK_OP_GET) == PTLRPC_BULK_OP_GET; +} + +static inline bool ptlrpc_is_bulk_get_source(enum ptlrpc_bulk_op_type type) +{ + return (type & PTLRPC_BULK_GET_SOURCE) == PTLRPC_BULK_GET_SOURCE; +} + +static inline bool ptlrpc_is_bulk_put_sink(enum ptlrpc_bulk_op_type type) +{ + return (type & PTLRPC_BULK_PUT_SINK) == PTLRPC_BULK_PUT_SINK; +} + +static inline bool ptlrpc_is_bulk_get_sink(enum ptlrpc_bulk_op_type type) +{ + return (type & PTLRPC_BULK_GET_SINK) == PTLRPC_BULK_GET_SINK; +} + +static inline bool ptlrpc_is_bulk_put_source(enum ptlrpc_bulk_op_type type) +{ + return (type & PTLRPC_BULK_PUT_SOURCE) == PTLRPC_BULK_PUT_SOURCE; +} + +static inline bool ptlrpc_is_bulk_desc_kvec(enum ptlrpc_bulk_op_type type) +{ + return ((type & PTLRPC_BULK_BUF_KVEC) | (type & PTLRPC_BULK_BUF_KIOV)) + == PTLRPC_BULK_BUF_KVEC; +} + +static inline bool ptlrpc_is_bulk_desc_kiov(enum ptlrpc_bulk_op_type type) +{ + return ((type & PTLRPC_BULK_BUF_KVEC) | (type & PTLRPC_BULK_BUF_KIOV)) + == PTLRPC_BULK_BUF_KIOV; +} + +static inline bool ptlrpc_is_bulk_op_active(enum ptlrpc_bulk_op_type type) +{ + return ((type & PTLRPC_BULK_OP_ACTIVE) | + (type & PTLRPC_BULK_OP_PASSIVE)) == PTLRPC_BULK_OP_ACTIVE; +} + +static inline bool ptlrpc_is_bulk_op_passive(enum ptlrpc_bulk_op_type type) +{ + return ((type & PTLRPC_BULK_OP_ACTIVE) | + (type & PTLRPC_BULK_OP_PASSIVE)) == PTLRPC_BULK_OP_PASSIVE; +} + +struct ptlrpc_bulk_frag_ops { + /** + * Add a page \a page to the bulk descriptor \a desc + * Data to transfer in the page starts at offset \a pageoffset and + * amount of data to transfer from the page is \a len + */ + void (*add_kiov_frag)(struct ptlrpc_bulk_desc *desc, + struct page *page, int pageoffset, int len); + + /* + * Add a \a fragment to the bulk descriptor \a desc. + * Data to transfer in the fragment is pointed to by \a frag + * The size of the fragment is \a len + */ + int (*add_iov_frag)(struct ptlrpc_bulk_desc *desc, void *frag, int len); + + /** + * Uninitialize and free bulk descriptor \a desc. + * Works on bulk descriptors both from server and client side. + */ + void (*release_frags)(struct ptlrpc_bulk_desc *desc); +}; + +extern const struct ptlrpc_bulk_frag_ops ptlrpc_bulk_kiov_pin_ops; +extern const struct ptlrpc_bulk_frag_ops ptlrpc_bulk_kiov_nopin_ops; /** * Definition of bulk descriptor. @@ -1811,14 +1195,14 @@ struct ptlrpc_bulk_page { struct ptlrpc_bulk_desc { /** completed with failure */ unsigned long bd_failure:1; - /** {put,get}{source,sink} */ - unsigned long bd_type:2; /** client side */ unsigned long bd_registered:1; /** For serialization with callback */ spinlock_t bd_lock; /** Import generation when request for this bulk was sent */ int bd_import_generation; + /** {put,get}{source,sink}{kvec,kiov} */ + enum ptlrpc_bulk_op_type bd_type; /** LNet portal for this bulk */ __u32 bd_portal; /** Server side - export this bulk created for */ @@ -1827,13 +1211,14 @@ struct ptlrpc_bulk_desc { struct obd_import *bd_import; /** Back pointer to the request */ struct ptlrpc_request *bd_req; + struct ptlrpc_bulk_frag_ops *bd_frag_ops; wait_queue_head_t bd_waitq; /* server side only WQ */ int bd_iov_count; /* # entries in bd_iov */ int bd_max_iov; /* allocated size of bd_iov */ int bd_nob; /* # bytes covered */ int bd_nob_transferred; /* # bytes GOT/PUT */ - __u64 bd_last_xid; + u64 bd_last_mbits; struct ptlrpc_cb_id bd_cbid; /* network callback info */ lnet_nid_t bd_sender; /* stash event::sender */ @@ -1842,14 +1227,31 @@ struct ptlrpc_bulk_desc { /** array of associated MDs */ lnet_handle_md_t bd_mds[PTLRPC_BULK_OPS_COUNT]; - /* - * encrypt iov, size is either 0 or bd_iov_count. - */ - lnet_kiov_t *bd_enc_iov; - - lnet_kiov_t bd_iov[0]; + union { + struct { + /* + * encrypt iov, size is either 0 or bd_iov_count. + */ + struct bio_vec *bd_enc_vec; + struct bio_vec *bd_vec; /* Array of bio_vecs */ + } bd_kiov; + + struct { + struct kvec *bd_enc_kvec; + struct kvec *bd_kvec; /* Array of kvecs */ + } bd_kvec; + } bd_u; }; +#define GET_KIOV(desc) ((desc)->bd_u.bd_kiov.bd_vec) +#define BD_GET_KIOV(desc, i) ((desc)->bd_u.bd_kiov.bd_vec[i]) +#define GET_ENC_KIOV(desc) ((desc)->bd_u.bd_kiov.bd_enc_vec) +#define BD_GET_ENC_KIOV(desc, i) ((desc)->bd_u.bd_kiov.bd_enc_vec[i]) +#define GET_KVEC(desc) ((desc)->bd_u.bd_kvec.bd_kvec) +#define BD_GET_KVEC(desc, i) ((desc)->bd_u.bd_kvec.bd_kvec[i]) +#define GET_ENC_KVEC(desc) ((desc)->bd_u.bd_kvec.bd_enc_kvec) +#define BD_GET_ENC_KVEC(desc, i) ((desc)->bd_u.bd_kvec.bd_enc_kvec[i]) + enum { SVC_STOPPED = 1 << 0, SVC_STOPPING = 1 << 1, @@ -2464,21 +1866,17 @@ int ptlrpc_request_bufs_pack(struct ptlrpc_request *request, void ptlrpc_req_finished(struct ptlrpc_request *request); struct ptlrpc_request *ptlrpc_request_addref(struct ptlrpc_request *req); struct ptlrpc_bulk_desc *ptlrpc_prep_bulk_imp(struct ptlrpc_request *req, - unsigned npages, unsigned max_brw, - unsigned type, unsigned portal); -void __ptlrpc_free_bulk(struct ptlrpc_bulk_desc *bulk, int pin); -static inline void ptlrpc_free_bulk_pin(struct ptlrpc_bulk_desc *bulk) -{ - __ptlrpc_free_bulk(bulk, 1); -} - -static inline void ptlrpc_free_bulk_nopin(struct ptlrpc_bulk_desc *bulk) -{ - __ptlrpc_free_bulk(bulk, 0); -} - + unsigned int nfrags, + unsigned int max_brw, + unsigned int type, + unsigned int portal, + const struct ptlrpc_bulk_frag_ops *ops); + +int ptlrpc_prep_bulk_frag(struct ptlrpc_bulk_desc *desc, + void *frag, int len); void __ptlrpc_prep_bulk_page(struct ptlrpc_bulk_desc *desc, - struct page *page, int pageoffset, int len, int); + struct page *page, int pageoffset, int len, + int pin); static inline void ptlrpc_prep_bulk_page_pin(struct ptlrpc_bulk_desc *desc, struct page *page, int pageoffset, int len) @@ -2493,6 +1891,16 @@ static inline void ptlrpc_prep_bulk_page_nopin(struct ptlrpc_bulk_desc *desc, __ptlrpc_prep_bulk_page(desc, page, pageoffset, len, 0); } +void ptlrpc_free_bulk(struct ptlrpc_bulk_desc *bulk); + +static inline void ptlrpc_release_bulk_page_pin(struct ptlrpc_bulk_desc *desc) +{ + int i; + + for (i = 0; i < desc->bd_iov_count ; i++) + put_page(BD_GET_KIOV(desc, i).bv_page); +} + void ptlrpc_retain_replayable_request(struct ptlrpc_request *req, struct obd_import *imp); __u64 ptlrpc_next_xid(void); @@ -2652,6 +2060,7 @@ struct lustre_handle *lustre_msg_get_handle(struct lustre_msg *msg); __u32 lustre_msg_get_type(struct lustre_msg *msg); void lustre_msg_add_version(struct lustre_msg *msg, u32 version); __u32 lustre_msg_get_opc(struct lustre_msg *msg); +__u16 lustre_msg_get_tag(struct lustre_msg *msg); __u64 lustre_msg_get_last_committed(struct lustre_msg *msg); __u64 *lustre_msg_get_versions(struct lustre_msg *msg); __u64 lustre_msg_get_transno(struct lustre_msg *msg); @@ -2670,6 +2079,8 @@ void lustre_msg_set_handle(struct lustre_msg *msg, struct lustre_handle *handle); void lustre_msg_set_type(struct lustre_msg *msg, __u32 type); void lustre_msg_set_opc(struct lustre_msg *msg, __u32 opc); +void lustre_msg_set_last_xid(struct lustre_msg *msg, u64 last_xid); +void lustre_msg_set_tag(struct lustre_msg *msg, __u16 tag); void lustre_msg_set_versions(struct lustre_msg *msg, __u64 *versions); void lustre_msg_set_transno(struct lustre_msg *msg, __u64 transno); void lustre_msg_set_status(struct lustre_msg *msg, __u32 status); @@ -2679,6 +2090,7 @@ void lustre_msg_set_timeout(struct lustre_msg *msg, __u32 timeout); void lustre_msg_set_service_time(struct lustre_msg *msg, __u32 service_time); void lustre_msg_set_jobid(struct lustre_msg *msg, char *jobid); void lustre_msg_set_cksum(struct lustre_msg *msg, __u32 cksum); +void lustre_msg_set_mbits(struct lustre_msg *msg, u64 mbits); static inline void lustre_shrink_reply(struct ptlrpc_request *req, int segment, diff --git a/drivers/staging/lustre/lustre/include/lustre_nrs.h b/drivers/staging/lustre/lustre/include/lustre_nrs.h new file mode 100644 index 000000000000..a5028aaa19cd --- /dev/null +++ b/drivers/staging/lustre/lustre/include/lustre_nrs.h @@ -0,0 +1,717 @@ +/* + * GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License version 2 for more details. + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see + * http://www.gnu.org/licenses/gpl-2.0.html + * + * GPL HEADER END + */ +/* + * Copyright (c) 2014, Intel Corporation. + * + * Copyright 2012 Xyratex Technology Limited + */ +/* + * + * Network Request Scheduler (NRS) + * + */ + +#ifndef _LUSTRE_NRS_H +#define _LUSTRE_NRS_H + +/** + * \defgroup nrs Network Request Scheduler + * @{ + */ +struct ptlrpc_nrs_policy; +struct ptlrpc_nrs_resource; +struct ptlrpc_nrs_request; + +/** + * NRS control operations. + * + * These are common for all policies. + */ +enum ptlrpc_nrs_ctl { + /** + * Not a valid opcode. + */ + PTLRPC_NRS_CTL_INVALID, + /** + * Activate the policy. + */ + PTLRPC_NRS_CTL_START, + /** + * Reserved for multiple primary policies, which may be a possibility + * in the future. + */ + PTLRPC_NRS_CTL_STOP, + /** + * Policies can start using opcodes from this value and onwards for + * their own purposes; the assigned value itself is arbitrary. + */ + PTLRPC_NRS_CTL_1ST_POL_SPEC = 0x20, +}; + +/** + * NRS policy operations. + * + * These determine the behaviour of a policy, and are called in response to + * NRS core events. + */ +struct ptlrpc_nrs_pol_ops { + /** + * Called during policy registration; this operation is optional. + * + * \param[in,out] policy The policy being initialized + */ + int (*op_policy_init)(struct ptlrpc_nrs_policy *policy); + /** + * Called during policy unregistration; this operation is optional. + * + * \param[in,out] policy The policy being unregistered/finalized + */ + void (*op_policy_fini)(struct ptlrpc_nrs_policy *policy); + /** + * Called when activating a policy via lprocfs; policies allocate and + * initialize their resources here; this operation is optional. + * + * \param[in,out] policy The policy being started + * + * \see nrs_policy_start_locked() + */ + int (*op_policy_start)(struct ptlrpc_nrs_policy *policy); + /** + * Called when deactivating a policy via lprocfs; policies deallocate + * their resources here; this operation is optional + * + * \param[in,out] policy The policy being stopped + * + * \see nrs_policy_stop0() + */ + void (*op_policy_stop)(struct ptlrpc_nrs_policy *policy); + /** + * Used for policy-specific operations; i.e. not generic ones like + * \e PTLRPC_NRS_CTL_START and \e PTLRPC_NRS_CTL_GET_INFO; analogous + * to an ioctl; this operation is optional. + * + * \param[in,out] policy The policy carrying out operation \a opc + * \param[in] opc The command operation being carried out + * \param[in,out] arg An generic buffer for communication between the + * user and the control operation + * + * \retval -ve error + * \retval 0 success + * + * \see ptlrpc_nrs_policy_control() + */ + int (*op_policy_ctl)(struct ptlrpc_nrs_policy *policy, + enum ptlrpc_nrs_ctl opc, void *arg); + + /** + * Called when obtaining references to the resources of the resource + * hierarchy for a request that has arrived for handling at the PTLRPC + * service. Policies should return -ve for requests they do not wish + * to handle. This operation is mandatory. + * + * \param[in,out] policy The policy we're getting resources for. + * \param[in,out] nrq The request we are getting resources for. + * \param[in] parent The parent resource of the resource being + * requested; set to NULL if none. + * \param[out] resp The resource is to be returned here; the + * fallback policy in an NRS head should + * \e always return a non-NULL pointer value. + * \param[in] moving_req When set, signifies that this is an attempt + * to obtain resources for a request being moved + * to the high-priority NRS head by + * ldlm_lock_reorder_req(). + * This implies two things: + * 1. We are under obd_export::exp_rpc_lock and + * so should not sleep. + * 2. We should not perform non-idempotent or can + * skip performing idempotent operations that + * were carried out when resources were first + * taken for the request when it was initialized + * in ptlrpc_nrs_req_initialize(). + * + * \retval 0, +ve The level of the returned resource in the resource + * hierarchy; currently only 0 (for a non-leaf resource) + * and 1 (for a leaf resource) are supported by the + * framework. + * \retval -ve error + * + * \see ptlrpc_nrs_req_initialize() + * \see ptlrpc_nrs_hpreq_add_nolock() + * \see ptlrpc_nrs_req_hp_move() + */ + int (*op_res_get)(struct ptlrpc_nrs_policy *policy, + struct ptlrpc_nrs_request *nrq, + const struct ptlrpc_nrs_resource *parent, + struct ptlrpc_nrs_resource **resp, + bool moving_req); + /** + * Called when releasing references taken for resources in the resource + * hierarchy for the request; this operation is optional. + * + * \param[in,out] policy The policy the resource belongs to + * \param[in] res The resource to be freed + * + * \see ptlrpc_nrs_req_finalize() + * \see ptlrpc_nrs_hpreq_add_nolock() + * \see ptlrpc_nrs_req_hp_move() + */ + void (*op_res_put)(struct ptlrpc_nrs_policy *policy, + const struct ptlrpc_nrs_resource *res); + + /** + * Obtains a request for handling from the policy, and optionally + * removes the request from the policy; this operation is mandatory. + * + * \param[in,out] policy The policy to poll + * \param[in] peek When set, signifies that we just want to + * examine the request, and not handle it, so the + * request is not removed from the policy. + * \param[in] force When set, it will force a policy to return a + * request if it has one queued. + * + * \retval NULL No request available for handling + * \retval valid-pointer The request polled for handling + * + * \see ptlrpc_nrs_req_get_nolock() + */ + struct ptlrpc_nrs_request * + (*op_req_get)(struct ptlrpc_nrs_policy *policy, bool peek, + bool force); + /** + * Called when attempting to add a request to a policy for later + * handling; this operation is mandatory. + * + * \param[in,out] policy The policy on which to enqueue \a nrq + * \param[in,out] nrq The request to enqueue + * + * \retval 0 success + * \retval != 0 error + * + * \see ptlrpc_nrs_req_add_nolock() + */ + int (*op_req_enqueue)(struct ptlrpc_nrs_policy *policy, + struct ptlrpc_nrs_request *nrq); + /** + * Removes a request from the policy's set of pending requests. Normally + * called after a request has been polled successfully from the policy + * for handling; this operation is mandatory. + * + * \param[in,out] policy The policy the request \a nrq belongs to + * \param[in,out] nrq The request to dequeue + * + * \see ptlrpc_nrs_req_del_nolock() + */ + void (*op_req_dequeue)(struct ptlrpc_nrs_policy *policy, + struct ptlrpc_nrs_request *nrq); + /** + * Called after the request being carried out. Could be used for + * job/resource control; this operation is optional. + * + * \param[in,out] policy The policy which is stopping to handle request + * \a nrq + * \param[in,out] nrq The request + * + * \pre assert_spin_locked(&svcpt->scp_req_lock) + * + * \see ptlrpc_nrs_req_stop_nolock() + */ + void (*op_req_stop)(struct ptlrpc_nrs_policy *policy, + struct ptlrpc_nrs_request *nrq); + /** + * Registers the policy's lprocfs interface with a PTLRPC service. + * + * \param[in] svc The service + * + * \retval 0 success + * \retval != 0 error + */ + int (*op_lprocfs_init)(struct ptlrpc_service *svc); + /** + * Unegisters the policy's lprocfs interface with a PTLRPC service. + * + * In cases of failed policy registration in + * \e ptlrpc_nrs_policy_register(), this function may be called for a + * service which has not registered the policy successfully, so + * implementations of this method should make sure their operations are + * safe in such cases. + * + * \param[in] svc The service + */ + void (*op_lprocfs_fini)(struct ptlrpc_service *svc); +}; + +/** + * Policy flags + */ +enum nrs_policy_flags { + /** + * Fallback policy, use this flag only on a single supported policy per + * service. The flag cannot be used on policies that use + * \e PTLRPC_NRS_FL_REG_EXTERN + */ + PTLRPC_NRS_FL_FALLBACK = BIT(0), + /** + * Start policy immediately after registering. + */ + PTLRPC_NRS_FL_REG_START = BIT(1), + /** + * This is a policy registering from a module different to the one NRS + * core ships in (currently ptlrpc). + */ + PTLRPC_NRS_FL_REG_EXTERN = BIT(2), +}; + +/** + * NRS queue type. + * + * Denotes whether an NRS instance is for handling normal or high-priority + * RPCs, or whether an operation pertains to one or both of the NRS instances + * in a service. + */ +enum ptlrpc_nrs_queue_type { + PTLRPC_NRS_QUEUE_REG = BIT(0), + PTLRPC_NRS_QUEUE_HP = BIT(1), + PTLRPC_NRS_QUEUE_BOTH = (PTLRPC_NRS_QUEUE_REG | PTLRPC_NRS_QUEUE_HP) +}; + +/** + * NRS head + * + * A PTLRPC service has at least one NRS head instance for handling normal + * priority RPCs, and may optionally have a second NRS head instance for + * handling high-priority RPCs. Each NRS head maintains a list of available + * policies, of which one and only one policy is acting as the fallback policy, + * and optionally a different policy may be acting as the primary policy. For + * all RPCs handled by this NRS head instance, NRS core will first attempt to + * enqueue the RPC using the primary policy (if any). The fallback policy is + * used in the following cases: + * - when there was no primary policy in the + * ptlrpc_nrs_pol_state::NRS_POL_STATE_STARTED state at the time the request + * was initialized. + * - when the primary policy that was at the + * ptlrpc_nrs_pol_state::PTLRPC_NRS_POL_STATE_STARTED state at the time the + * RPC was initialized, denoted it did not wish, or for some other reason was + * not able to handle the request, by returning a non-valid NRS resource + * reference. + * - when the primary policy that was at the + * ptlrpc_nrs_pol_state::PTLRPC_NRS_POL_STATE_STARTED state at the time the + * RPC was initialized, fails later during the request enqueueing stage. + * + * \see nrs_resource_get_safe() + * \see nrs_request_enqueue() + */ +struct ptlrpc_nrs { + spinlock_t nrs_lock; + /** XXX Possibly replace svcpt->scp_req_lock with another lock here. */ + /** + * List of registered policies + */ + struct list_head nrs_policy_list; + /** + * List of policies with queued requests. Policies that have any + * outstanding requests are queued here, and this list is queried + * in a round-robin manner from NRS core when obtaining a request + * for handling. This ensures that requests from policies that at some + * point transition away from the + * ptlrpc_nrs_pol_state::NRS_POL_STATE_STARTED state are drained. + */ + struct list_head nrs_policy_queued; + /** + * Service partition for this NRS head + */ + struct ptlrpc_service_part *nrs_svcpt; + /** + * Primary policy, which is the preferred policy for handling RPCs + */ + struct ptlrpc_nrs_policy *nrs_policy_primary; + /** + * Fallback policy, which is the backup policy for handling RPCs + */ + struct ptlrpc_nrs_policy *nrs_policy_fallback; + /** + * This NRS head handles either HP or regular requests + */ + enum ptlrpc_nrs_queue_type nrs_queue_type; + /** + * # queued requests from all policies in this NRS head + */ + unsigned long nrs_req_queued; + /** + * # scheduled requests from all policies in this NRS head + */ + unsigned long nrs_req_started; + /** + * # policies on this NRS + */ + unsigned int nrs_num_pols; + /** + * This NRS head is in progress of starting a policy + */ + unsigned int nrs_policy_starting:1; + /** + * In progress of shutting down the whole NRS head; used during + * unregistration + */ + unsigned int nrs_stopping:1; + /** + * NRS policy is throttling request + */ + unsigned int nrs_throttling:1; +}; + +#define NRS_POL_NAME_MAX 16 +#define NRS_POL_ARG_MAX 16 + +struct ptlrpc_nrs_pol_desc; + +/** + * Service compatibility predicate; this determines whether a policy is adequate + * for handling RPCs of a particular PTLRPC service. + * + * XXX:This should give the same result during policy registration and + * unregistration, and for all partitions of a service; so the result should not + * depend on temporal service or other properties, that may influence the + * result. + */ +typedef bool (*nrs_pol_desc_compat_t)(const struct ptlrpc_service *svc, + const struct ptlrpc_nrs_pol_desc *desc); + +struct ptlrpc_nrs_pol_conf { + /** + * Human-readable policy name + */ + char nc_name[NRS_POL_NAME_MAX]; + /** + * NRS operations for this policy + */ + const struct ptlrpc_nrs_pol_ops *nc_ops; + /** + * Service compatibility predicate + */ + nrs_pol_desc_compat_t nc_compat; + /** + * Set for policies that support a single ptlrpc service, i.e. ones that + * have \a pd_compat set to nrs_policy_compat_one(). The variable value + * depicts the name of the single service that such policies are + * compatible with. + */ + const char *nc_compat_svc_name; + /** + * Owner module for this policy descriptor; policies registering from a + * different module to the one the NRS framework is held within + * (currently ptlrpc), should set this field to THIS_MODULE. + */ + struct module *nc_owner; + /** + * Policy registration flags; a bitmask of \e nrs_policy_flags + */ + unsigned int nc_flags; +}; + +/** + * NRS policy registering descriptor + * + * Is used to hold a description of a policy that can be passed to NRS core in + * order to register the policy with NRS heads in different PTLRPC services. + */ +struct ptlrpc_nrs_pol_desc { + /** + * Human-readable policy name + */ + char pd_name[NRS_POL_NAME_MAX]; + /** + * Link into nrs_core::nrs_policies + */ + struct list_head pd_list; + /** + * NRS operations for this policy + */ + const struct ptlrpc_nrs_pol_ops *pd_ops; + /** + * Service compatibility predicate + */ + nrs_pol_desc_compat_t pd_compat; + /** + * Set for policies that are compatible with only one PTLRPC service. + * + * \see ptlrpc_nrs_pol_conf::nc_compat_svc_name + */ + const char *pd_compat_svc_name; + /** + * Owner module for this policy descriptor. + * + * We need to hold a reference to the module whenever we might make use + * of any of the module's contents, i.e. + * - If one or more instances of the policy are at a state where they + * might be handling a request, i.e. + * ptlrpc_nrs_pol_state::NRS_POL_STATE_STARTED or + * ptlrpc_nrs_pol_state::NRS_POL_STATE_STOPPING as we will have to + * call into the policy's ptlrpc_nrs_pol_ops() handlers. A reference + * is taken on the module when + * \e ptlrpc_nrs_pol_desc::pd_refs becomes 1, and released when it + * becomes 0, so that we hold only one reference to the module maximum + * at any time. + * + * We do not need to hold a reference to the module, even though we + * might use code and data from the module, in the following cases: + * - During external policy registration, because this should happen in + * the module's init() function, in which case the module is safe from + * removal because a reference is being held on the module by the + * kernel, and iirc kmod (and I guess module-init-tools also) will + * serialize any racing processes properly anyway. + * - During external policy unregistration, because this should happen + * in a module's exit() function, and any attempts to start a policy + * instance would need to take a reference on the module, and this is + * not possible once we have reached the point where the exit() + * handler is called. + * - During service registration and unregistration, as service setup + * and cleanup, and policy registration, unregistration and policy + * instance starting, are serialized by \e nrs_core::nrs_mutex, so + * as long as users adhere to the convention of registering policies + * in init() and unregistering them in module exit() functions, there + * should not be a race between these operations. + * - During any policy-specific lprocfs operations, because a reference + * is held by the kernel on a proc entry that has been entered by a + * syscall, so as long as proc entries are removed during + * unregistration time, then unregistration and lprocfs operations + * will be properly serialized. + */ + struct module *pd_owner; + /** + * Bitmask of \e nrs_policy_flags + */ + unsigned int pd_flags; + /** + * # of references on this descriptor + */ + atomic_t pd_refs; +}; + +/** + * NRS policy state + * + * Policies transition from one state to the other during their lifetime + */ +enum ptlrpc_nrs_pol_state { + /** + * Not a valid policy state. + */ + NRS_POL_STATE_INVALID, + /** + * Policies are at this state either at the start of their life, or + * transition here when the user selects a different policy to act + * as the primary one. + */ + NRS_POL_STATE_STOPPED, + /** + * Policy is progress of stopping + */ + NRS_POL_STATE_STOPPING, + /** + * Policy is in progress of starting + */ + NRS_POL_STATE_STARTING, + /** + * A policy is in this state in two cases: + * - it is the fallback policy, which is always in this state. + * - it has been activated by the user; i.e. it is the primary policy, + */ + NRS_POL_STATE_STARTED, +}; + +/** + * NRS policy information + * + * Used for obtaining information for the status of a policy via lprocfs + */ +struct ptlrpc_nrs_pol_info { + /** + * Policy name + */ + char pi_name[NRS_POL_NAME_MAX]; + /** + * Policy argument + */ + char pi_arg[NRS_POL_ARG_MAX]; + /** + * Current policy state + */ + enum ptlrpc_nrs_pol_state pi_state; + /** + * # RPCs enqueued for later dispatching by the policy + */ + long pi_req_queued; + /** + * # RPCs started for dispatch by the policy + */ + long pi_req_started; + /** + * Is this a fallback policy? + */ + unsigned pi_fallback:1; +}; + +/** + * NRS policy + * + * There is one instance of this for each policy in each NRS head of each + * PTLRPC service partition. + */ +struct ptlrpc_nrs_policy { + /** + * Linkage into the NRS head's list of policies, + * ptlrpc_nrs:nrs_policy_list + */ + struct list_head pol_list; + /** + * Linkage into the NRS head's list of policies with enqueued + * requests ptlrpc_nrs:nrs_policy_queued + */ + struct list_head pol_list_queued; + /** + * Current state of this policy + */ + enum ptlrpc_nrs_pol_state pol_state; + /** + * Bitmask of nrs_policy_flags + */ + unsigned int pol_flags; + /** + * # RPCs enqueued for later dispatching by the policy + */ + long pol_req_queued; + /** + * # RPCs started for dispatch by the policy + */ + long pol_req_started; + /** + * Usage Reference count taken on the policy instance + */ + long pol_ref; + /** + * Human-readable policy argument + */ + char pol_arg[NRS_POL_ARG_MAX]; + /** + * The NRS head this policy has been created at + */ + struct ptlrpc_nrs *pol_nrs; + /** + * Private policy data; varies by policy type + */ + void *pol_private; + /** + * Policy descriptor for this policy instance. + */ + struct ptlrpc_nrs_pol_desc *pol_desc; +}; + +/** + * NRS resource + * + * Resources are embedded into two types of NRS entities: + * - Inside NRS policies, in the policy's private data in + * ptlrpc_nrs_policy::pol_private + * - In objects that act as prime-level scheduling entities in different NRS + * policies; e.g. on a policy that performs round robin or similar order + * scheduling across client NIDs, there would be one NRS resource per unique + * client NID. On a policy which performs round robin scheduling across + * backend filesystem objects, there would be one resource associated with + * each of the backend filesystem objects partaking in the scheduling + * performed by the policy. + * + * NRS resources share a parent-child relationship, in which resources embedded + * in policy instances are the parent entities, with all scheduling entities + * a policy schedules across being the children, thus forming a simple resource + * hierarchy. This hierarchy may be extended with one or more levels in the + * future if the ability to have more than one primary policy is added. + * + * Upon request initialization, references to the then active NRS policies are + * taken and used to later handle the dispatching of the request with one of + * these policies. + * + * \see nrs_resource_get_safe() + * \see ptlrpc_nrs_req_add() + */ +struct ptlrpc_nrs_resource { + /** + * This NRS resource's parent; is NULL for resources embedded in NRS + * policy instances; i.e. those are top-level ones. + */ + struct ptlrpc_nrs_resource *res_parent; + /** + * The policy associated with this resource. + */ + struct ptlrpc_nrs_policy *res_policy; +}; + +enum { + NRS_RES_FALLBACK, + NRS_RES_PRIMARY, + NRS_RES_MAX +}; + +#include "lustre_nrs_fifo.h" + +/** + * NRS request + * + * Instances of this object exist embedded within ptlrpc_request; the main + * purpose of this object is to hold references to the request's resources + * for the lifetime of the request, and to hold properties that policies use + * use for determining the request's scheduling priority. + **/ +struct ptlrpc_nrs_request { + /** + * The request's resource hierarchy. + */ + struct ptlrpc_nrs_resource *nr_res_ptrs[NRS_RES_MAX]; + /** + * Index into ptlrpc_nrs_request::nr_res_ptrs of the resource of the + * policy that was used to enqueue the request. + * + * \see nrs_request_enqueue() + */ + unsigned int nr_res_idx; + unsigned int nr_initialized:1; + unsigned int nr_enqueued:1; + unsigned int nr_started:1; + unsigned int nr_finalized:1; + + /** + * Policy-specific fields, used for determining a request's scheduling + * priority, and other supporting functionality. + */ + union { + /** + * Fields for the FIFO policy + */ + struct nrs_fifo_req fifo; + } nr_u; + /** + * Externally-registering policies may want to use this to allocate + * their own request properties. + */ + void *ext; +}; + +/** @} nrs */ +#endif diff --git a/drivers/staging/lustre/lustre/include/lustre_nrs_fifo.h b/drivers/staging/lustre/lustre/include/lustre_nrs_fifo.h new file mode 100644 index 000000000000..3b5418eac6c4 --- /dev/null +++ b/drivers/staging/lustre/lustre/include/lustre_nrs_fifo.h @@ -0,0 +1,70 @@ +/* + * GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License version 2 for more details. + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see + * http://www.gnu.org/licenses/gpl-2.0.html + * + * GPL HEADER END + */ +/* + * Copyright (c) 2014, Intel Corporation. + * + * Copyright 2012 Xyratex Technology Limited + */ +/* + * + * Network Request Scheduler (NRS) First-in First-out (FIFO) policy + * + */ + +#ifndef _LUSTRE_NRS_FIFO_H +#define _LUSTRE_NRS_FIFO_H + +/* \name fifo + * + * FIFO policy + * + * This policy is a logical wrapper around previous, non-NRS functionality. + * It dispatches RPCs in the same order as they arrive from the network. This + * policy is currently used as the fallback policy, and the only enabled policy + * on all NRS heads of all PTLRPC service partitions. + * @{ + */ + +/** + * Private data structure for the FIFO policy + */ +struct nrs_fifo_head { + /** + * Resource object for policy instance. + */ + struct ptlrpc_nrs_resource fh_res; + /** + * List of queued requests. + */ + struct list_head fh_list; + /** + * For debugging purposes. + */ + __u64 fh_sequence; +}; + +struct nrs_fifo_req { + struct list_head fr_list; + __u64 fr_sequence; +}; + +/** @} fifo */ +#endif diff --git a/drivers/staging/lustre/lustre/include/lustre_req_layout.h b/drivers/staging/lustre/lustre/include/lustre_req_layout.h index a13558e53274..fbcd39572cd0 100644 --- a/drivers/staging/lustre/lustre/include/lustre_req_layout.h +++ b/drivers/staging/lustre/lustre/include/lustre_req_layout.h @@ -148,13 +148,12 @@ extern struct req_format RQF_MDS_GETATTR; */ extern struct req_format RQF_MDS_GETATTR_NAME; extern struct req_format RQF_MDS_CLOSE; -extern struct req_format RQF_MDS_RELEASE_CLOSE; +extern struct req_format RQF_MDS_INTENT_CLOSE; extern struct req_format RQF_MDS_CONNECT; extern struct req_format RQF_MDS_DISCONNECT; extern struct req_format RQF_MDS_GET_INFO; extern struct req_format RQF_MDS_READPAGE; extern struct req_format RQF_MDS_WRITEPAGE; -extern struct req_format RQF_MDS_DONE_WRITING; extern struct req_format RQF_MDS_REINT; extern struct req_format RQF_MDS_REINT_CREATE; extern struct req_format RQF_MDS_REINT_CREATE_ACL; @@ -166,10 +165,9 @@ extern struct req_format RQF_MDS_REINT_LINK; extern struct req_format RQF_MDS_REINT_RENAME; extern struct req_format RQF_MDS_REINT_SETATTR; extern struct req_format RQF_MDS_REINT_SETXATTR; -extern struct req_format RQF_MDS_QUOTACHECK; extern struct req_format RQF_MDS_QUOTACTL; -extern struct req_format RQF_QC_CALLBACK; extern struct req_format RQF_MDS_SWAP_LAYOUTS; +extern struct req_format RQF_MDS_REINT_MIGRATE; /* MDS hsm formats */ extern struct req_format RQF_MDS_HSM_STATE_GET; extern struct req_format RQF_MDS_HSM_STATE_SET; @@ -181,7 +179,6 @@ extern struct req_format RQF_MDS_HSM_REQUEST; /* OST req_format */ extern struct req_format RQF_OST_CONNECT; extern struct req_format RQF_OST_DISCONNECT; -extern struct req_format RQF_OST_QUOTACHECK; extern struct req_format RQF_OST_QUOTACTL; extern struct req_format RQF_OST_GETATTR; extern struct req_format RQF_OST_SETATTR; diff --git a/drivers/staging/lustre/lustre/include/lustre_sec.h b/drivers/staging/lustre/lustre/include/lustre_sec.h index 90c183424802..03a970bcac55 100644 --- a/drivers/staging/lustre/lustre/include/lustre_sec.h +++ b/drivers/staging/lustre/lustre/include/lustre_sec.h @@ -50,6 +50,7 @@ struct brw_page; /* Linux specific */ struct key; struct seq_file; +struct lustre_cfg; /* * forward declaration @@ -1029,6 +1030,8 @@ int sptlrpc_target_export_check(struct obd_export *exp, /* bulk security api */ void sptlrpc_enc_pool_put_pages(struct ptlrpc_bulk_desc *desc); +int get_free_pages_in_pool(void); +int pool_is_at_full_capacity(void); int sptlrpc_cli_wrap_bulk(struct ptlrpc_request *req, struct ptlrpc_bulk_desc *desc); diff --git a/drivers/staging/lustre/lustre/include/lustre_swab.h b/drivers/staging/lustre/lustre/include/lustre_swab.h new file mode 100644 index 000000000000..26d01c2d6633 --- /dev/null +++ b/drivers/staging/lustre/lustre/include/lustre_swab.h @@ -0,0 +1,102 @@ +/* + * GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see + * http://www.gnu.org/licenses/gpl-2.0.html + * + * GPL HEADER END + */ +/* + * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. + * Use is subject to license terms. + * + * Copyright (c) 2011, 2014, Intel Corporation. + * + * Copyright 2015 Cray Inc, all rights reserved. + * Author: Ben Evans. + * + * We assume all nodes are either little-endian or big-endian, and we + * always send messages in the sender's native format. The receiver + * detects the message format by checking the 'magic' field of the message + * (see lustre_msg_swabbed() below). + * + * Each wire type has corresponding 'lustre_swab_xxxtypexxx()' routines + * are implemented in ptlrpc/lustre_swab.c. These 'swabbers' convert the + * type from "other" endian, in-place in the message buffer. + * + * A swabber takes a single pointer argument. The caller must already have + * verified that the length of the message buffer >= sizeof (type). + * + * For variable length types, a second 'lustre_swab_v_xxxtypexxx()' routine + * may be defined that swabs just the variable part, after the caller has + * verified that the message buffer is large enough. + */ + +#ifndef _LUSTRE_SWAB_H_ +#define _LUSTRE_SWAB_H_ + +#include "lustre/lustre_idl.h" + +void lustre_swab_ptlrpc_body(struct ptlrpc_body *pb); +void lustre_swab_connect(struct obd_connect_data *ocd); +void lustre_swab_hsm_user_state(struct hsm_user_state *hus); +void lustre_swab_hsm_state_set(struct hsm_state_set *hss); +void lustre_swab_obd_statfs(struct obd_statfs *os); +void lustre_swab_obd_ioobj(struct obd_ioobj *ioo); +void lustre_swab_niobuf_remote(struct niobuf_remote *nbr); +void lustre_swab_ost_lvb_v1(struct ost_lvb_v1 *lvb); +void lustre_swab_ost_lvb(struct ost_lvb *lvb); +void lustre_swab_obd_quotactl(struct obd_quotactl *q); +void lustre_swab_lquota_lvb(struct lquota_lvb *lvb); +void lustre_swab_generic_32s(__u32 *val); +void lustre_swab_mdt_body(struct mdt_body *b); +void lustre_swab_mdt_ioepoch(struct mdt_ioepoch *b); +void lustre_swab_mdt_rec_setattr(struct mdt_rec_setattr *sa); +void lustre_swab_mdt_rec_reint(struct mdt_rec_reint *rr); +void lustre_swab_lmv_desc(struct lmv_desc *ld); +void lustre_swab_lmv_mds_md(union lmv_mds_md *lmm); +void lustre_swab_lov_desc(struct lov_desc *ld); +void lustre_swab_gl_desc(union ldlm_gl_desc *desc); +void lustre_swab_ldlm_intent(struct ldlm_intent *i); +void lustre_swab_ldlm_request(struct ldlm_request *rq); +void lustre_swab_ldlm_reply(struct ldlm_reply *r); +void lustre_swab_mgs_target_info(struct mgs_target_info *oinfo); +void lustre_swab_mgs_nidtbl_entry(struct mgs_nidtbl_entry *oinfo); +void lustre_swab_mgs_config_body(struct mgs_config_body *body); +void lustre_swab_mgs_config_res(struct mgs_config_res *body); +void lustre_swab_ost_body(struct ost_body *b); +void lustre_swab_ost_last_id(__u64 *id); +void lustre_swab_fiemap(struct fiemap *fiemap); +void lustre_swab_lov_user_md_v1(struct lov_user_md_v1 *lum); +void lustre_swab_lov_user_md_v3(struct lov_user_md_v3 *lum); +void lustre_swab_lov_user_md_objects(struct lov_user_ost_data *lod, + int stripe_count); +void lustre_swab_lov_mds_md(struct lov_mds_md *lmm); +void lustre_swab_lustre_capa(struct lustre_capa *c); +void lustre_swab_lustre_capa_key(struct lustre_capa_key *k); +void lustre_swab_fid2path(struct getinfo_fid2path *gf); +void lustre_swab_layout_intent(struct layout_intent *li); +void lustre_swab_hsm_user_state(struct hsm_user_state *hus); +void lustre_swab_hsm_current_action(struct hsm_current_action *action); +void lustre_swab_hsm_progress_kernel(struct hsm_progress_kernel *hpk); +void lustre_swab_hsm_user_state(struct hsm_user_state *hus); +void lustre_swab_hsm_user_item(struct hsm_user_item *hui); +void lustre_swab_hsm_request(struct hsm_request *hr); +void lustre_swab_swap_layouts(struct mdc_swap_layouts *msl); +void lustre_swab_close_data(struct close_data *data); +void lustre_swab_lmv_user_md(struct lmv_user_md *lum); + +#endif diff --git a/drivers/staging/lustre/lustre/include/obd.h b/drivers/staging/lustre/lustre/include/obd.h index f6fc4dd05bd6..0f48e9c3d9e3 100644 --- a/drivers/staging/lustre/lustre/include/obd.h +++ b/drivers/staging/lustre/lustre/include/obd.h @@ -73,70 +73,17 @@ static inline void loi_init(struct lov_oinfo *loi) { } -/* - * If we are unable to get the maximum object size from the OST in - * ocd_maxbytes using OBD_CONNECT_MAXBYTES, then we fall back to using - * the old maximum object size from ext3. - */ -#define LUSTRE_EXT3_STRIPE_MAXBYTES 0x1fffffff000ULL - -struct lov_stripe_md { - atomic_t lsm_refc; - spinlock_t lsm_lock; - pid_t lsm_lock_owner; /* debugging */ - - /* maximum possible file size, might change as OSTs status changes, - * e.g. disconnected, deactivated - */ - __u64 lsm_maxbytes; - struct ost_id lsm_oi; - __u32 lsm_magic; - __u32 lsm_stripe_size; - __u32 lsm_pattern; /* striping pattern (RAID0, RAID1) */ - __u16 lsm_stripe_count; - __u16 lsm_layout_gen; - char lsm_pool_name[LOV_MAXPOOLNAME + 1]; - struct lov_oinfo *lsm_oinfo[0]; -}; - -static inline bool lsm_is_released(struct lov_stripe_md *lsm) -{ - return !!(lsm->lsm_pattern & LOV_PATTERN_F_RELEASED); -} - -static inline bool lsm_has_objects(struct lov_stripe_md *lsm) -{ - if (!lsm) - return false; - if (lsm_is_released(lsm)) - return false; - return true; -} - -static inline int lov_stripe_md_size(unsigned int stripe_count) -{ - struct lov_stripe_md lsm; - - return sizeof(lsm) + stripe_count * sizeof(lsm.lsm_oinfo[0]); -} - +struct lov_stripe_md; struct obd_info; typedef int (*obd_enqueue_update_f)(void *cookie, int rc); /* obd info for a particular level (lov, osc). */ struct obd_info { - /* Flags used for set request specific flags: - - while lock handling, the flags obtained on the enqueue - request are set here. - - while stats, the flags used for control delay/resend. - - while setattr, the flags used for distinguish punch operation - */ + /* OBD_STATFS_* flags */ __u64 oi_flags; /* lsm data specific for every OSC. */ struct lov_stripe_md *oi_md; - /* obdo data specific for every OSC, if needed at all. */ - struct obdo *oi_oa; /* statfs data specific for every OSC, if needed at all. */ struct obd_statfs *oi_osfs; /* An update callback which is called to update some data on upper @@ -204,7 +151,6 @@ enum obd_cl_sem_lock_class { * on the MDS. */ #define OBD_MAX_DEFAULT_EA_SIZE 4096 -#define OBD_MAX_DEFAULT_COOKIE_SIZE 4096 struct mdc_rpc_lock; struct obd_import; @@ -214,7 +160,7 @@ struct client_obd { struct obd_import *cl_import; /* ptlrpc connection state */ size_t cl_conn_count; /* - * Cache maximum and default values for easize and cookiesize. This is + * Cache maximum and default values for easize. This is * strictly a performance optimization to minimize calls to * obd_size_diskmd(). The default values are used to calculate the * initial size of a request buffer. The ptlrpc layer will resize the @@ -235,18 +181,6 @@ struct client_obd { * run-time if a larger observed size is advertised by the MDT. */ u32 cl_max_mds_easize; - /* Default cookie size for llog cookies (see struct llog_cookie). It is - * initialized to zero at mount-time, then it tracks the largest - * observed cookie size advertised by the MDT, up to a maximum value of - * OBD_MAX_DEFAULT_COOKIE_SIZE. Note that llog_cookies are not - * used by clients communicating with MDS versions 2.4.0 and later. - */ - u32 cl_default_mds_cookiesize; - /* Maximum possible cookie size computed at mount-time based on - * the number of OSTs in the filesystem. May be increased at - * run-time if a larger observed size is advertised by the MDT. - */ - u32 cl_max_mds_cookiesize; enum lustre_sec_part cl_sp_me; enum lustre_sec_part cl_sp_to; @@ -313,15 +247,42 @@ struct client_obd { struct obd_histogram cl_read_offset_hist; struct obd_histogram cl_write_offset_hist; - /* lru for osc caching pages */ + /* LRU for osc caching pages */ struct cl_client_cache *cl_cache; - struct list_head cl_lru_osc; /* member of cl_cache->ccc_lru */ + /** member of cl_cache->ccc_lru */ + struct list_head cl_lru_osc; + /** # of available LRU slots left in the per-OSC cache. + * Available LRU slots are shared by all OSCs of the same file system, + * therefore this is a pointer to cl_client_cache::ccc_lru_left. + */ atomic_long_t *cl_lru_left; + /** # of busy LRU pages. A page is considered busy if it's in writeback + * queue, or in transfer. Busy pages can't be discarded so they are not + * in LRU cache. + */ atomic_long_t cl_lru_busy; + /** # of LRU pages in the cache for this client_obd */ atomic_long_t cl_lru_in_list; + /** # of threads are shrinking LRU cache. To avoid contention, it's not + * allowed to have multiple threads shrinking LRU cache. + */ atomic_t cl_lru_shrinkers; - struct list_head cl_lru_list; /* lru page list */ - spinlock_t cl_lru_list_lock; /* page list protector */ + /** The time when this LRU cache was last used. */ + time64_t cl_lru_last_used; + /** stats: how many reclaims have happened for this client_obd. + * reclaim and shrink - shrink is async, voluntarily rebalancing; + * reclaim is sync, initiated by IO thread when the LRU slots are + * in shortage. + */ + u64 cl_lru_reclaim; + /** List of LRU pages for this client_obd */ + struct list_head cl_lru_list; + /** Lock for LRU page list */ + spinlock_t cl_lru_list_lock; + /** # of unstable pages in this client_obd. + * An unstable page is a page state that WRITE RPC has finished but + * the transaction has NOT yet committed. + */ atomic_long_t cl_unstable_count; /* number of in flight destroy rpcs is limited to max_rpcs_in_flight */ @@ -329,7 +290,17 @@ struct client_obd { wait_queue_head_t cl_destroy_waitq; struct mdc_rpc_lock *cl_rpc_lock; - struct mdc_rpc_lock *cl_close_lock; + + /* modify rpcs in flight + * currently used for metadata only + */ + spinlock_t cl_mod_rpcs_lock; + u16 cl_max_mod_rpcs_in_flight; + u16 cl_mod_rpcs_in_flight; + u16 cl_close_rpcs_in_flight; + wait_queue_head_t cl_mod_rpcs_waitq; + unsigned long *cl_mod_tag_bitmap; + struct obd_histogram cl_mod_rpcs_hist; /* mgc datastruct */ atomic_t cl_mgc_refcount; @@ -345,13 +316,6 @@ struct client_obd { /* also protected by the poorly named _loi_list_lock lock above */ struct osc_async_rc cl_ar; - /* used by quotacheck when the servers are older than 2.4 */ - int cl_qchk_stat; /* quotacheck stat of the peer */ -#define CL_NOT_QUOTACHECKED 1 /* client->cl_qchk_stat init value */ -#if OBD_OCD_VERSION(2, 7, 53, 0) < LUSTRE_VERSION_CODE -#warning "please consider removing quotacheck compatibility code" -#endif - /* sequence manager */ struct lu_client_seq *cl_seq; @@ -454,8 +418,6 @@ struct lmv_obd { int connected; int max_easize; int max_def_easize; - int max_cookiesize; - int max_def_cookiesize; u32 tgts_size; /* size of tgts array */ struct lmv_tgt_desc **tgts; @@ -469,9 +431,9 @@ struct niobuf_local { __u32 lnb_page_offset; __u32 lnb_len; __u32 lnb_flags; + int lnb_rc; struct page *lnb_page; void *lnb_data; - int lnb_rc; }; #define LUSTRE_FLD_NAME "fld" @@ -512,21 +474,6 @@ struct niobuf_local { /* Don't conflict with on-wire flags OBD_BRW_WRITE, etc */ #define N_LOCAL_TEMP_PAGE 0x10000000 -struct obd_trans_info { - __u64 oti_xid; - /* Only used on the server side for tracking acks. */ - struct oti_req_ack_lock { - struct lustre_handle lock; - __u32 mode; - } oti_ack_locks[4]; - void *oti_handle; - struct llog_cookie oti_onecookie; - struct llog_cookie *oti_logcookies; - - /** VBR: versions */ - __u64 oti_pre_version; -}; - /* * Events signalled through obd_notify() upcall-chain. */ @@ -587,15 +534,14 @@ struct lvfs_run_ctxt { struct obd_device { struct obd_type *obd_type; - __u32 obd_magic; + u32 obd_magic; /* OBD_DEVICE_MAGIC */ + int obd_minor; /* device number: lctl dl */ + struct lu_device *obd_lu_dev; /* common and UUID name of this device */ - char obd_name[MAX_OBD_NAME]; - struct obd_uuid obd_uuid; - - struct lu_device *obd_lu_dev; + struct obd_uuid obd_uuid; + char obd_name[MAX_OBD_NAME]; - int obd_minor; /* bitfield modification is protected by obd_dev_lock */ unsigned long obd_attached:1, /* finished attach */ obd_set_up:1, /* finished setup */ @@ -619,22 +565,22 @@ struct obd_device { unsigned long obd_recovery_expired:1; /* uuid-export hash body */ struct cfs_hash *obd_uuid_hash; - atomic_t obd_refcount; wait_queue_head_t obd_refcount_waitq; struct list_head obd_exports; struct list_head obd_unlinked_exports; struct list_head obd_delayed_exports; + atomic_t obd_refcount; int obd_num_exports; spinlock_t obd_nid_lock; struct ldlm_namespace *obd_namespace; struct ptlrpc_client obd_ldlm_client; /* XXX OST/MDS only */ /* a spinlock is OK for what we do now, may need a semaphore later */ spinlock_t obd_dev_lock; /* protect OBD bitfield above */ - struct mutex obd_dev_mutex; - __u64 obd_last_committed; spinlock_t obd_osfs_lock; struct obd_statfs obd_osfs; /* locked by obd_osfs_lock */ __u64 obd_osfs_age; + u64 obd_last_committed; + struct mutex obd_dev_mutex; struct lvfs_run_ctxt obd_lvfs_ctxt; struct obd_llog_group obd_olg; /* default llog group */ struct obd_device *obd_observer; @@ -648,12 +594,13 @@ struct obd_device { struct lov_obd lov; struct lmv_obd lmv; } u; + /* Fields used by LProcFS */ - unsigned int obd_cntr_base; - struct lprocfs_stats *obd_stats; + struct lprocfs_stats *obd_stats; + unsigned int obd_cntr_base; - unsigned int md_cntr_base; - struct lprocfs_stats *md_stats; + struct lprocfs_stats *md_stats; + unsigned int md_cntr_base; struct dentry *obd_debugfs_entry; struct dentry *obd_svc_debugfs_entry; @@ -665,9 +612,11 @@ struct obd_device { /** * Ldlm pool part. Save last calculated SLV and Limit. */ - rwlock_t obd_pool_lock; - int obd_pool_limit; - __u64 obd_pool_slv; + rwlock_t obd_pool_lock; + u64 obd_pool_slv; + int obd_pool_limit; + + int obd_conn_inprogress; /** * A list of outstanding class_incref()'s against this obd. For @@ -675,19 +624,10 @@ struct obd_device { */ struct lu_ref obd_reference; - int obd_conn_inprogress; - struct kobject obd_kobj; /* sysfs object */ struct completion obd_kobj_unregister; }; -enum obd_cleanup_stage { -/* Special case hack for MDS LOVs */ - OBD_CLEANUP_EARLY, -/* can be directly mapped to .ldto_device_fini() */ - OBD_CLEANUP_EXPORTS, -}; - /* get/set_info keys */ #define KEY_ASYNC "async" #define KEY_CHANGELOG_CLEAR "changelog_clear" @@ -704,7 +644,6 @@ enum obd_cleanup_stage { #define KEY_INTERMDS "inter_mds" #define KEY_LAST_ID "last_id" #define KEY_LAST_FID "last_fid" -#define KEY_LOVDESC "lovdesc" #define KEY_MAX_EASIZE "max_easize" #define KEY_DEFAULT_EASIZE "default_easize" #define KEY_MGSSEC "mgssec" @@ -720,22 +659,6 @@ enum obd_cleanup_stage { struct lu_context; -/* /!\ must be coherent with include/linux/namei.h on patched kernel */ -#define IT_OPEN (1 << 0) -#define IT_CREAT (1 << 1) -#define IT_READDIR (1 << 2) -#define IT_GETATTR (1 << 3) -#define IT_LOOKUP (1 << 4) -#define IT_UNLINK (1 << 5) -#define IT_TRUNC (1 << 6) -#define IT_GETXATTR (1 << 7) -#define IT_EXEC (1 << 8) -#define IT_PIN (1 << 9) -#define IT_LAYOUT (1 << 10) -#define IT_QUOTA_DQACQ (1 << 11) -#define IT_QUOTA_CONN (1 << 12) -#define IT_SETXATTR (1 << 13) - static inline int it_to_lock_mode(struct lookup_intent *it) { /* CREAT needs to be tested before open (both could be set) */ @@ -755,6 +678,14 @@ static inline int it_to_lock_mode(struct lookup_intent *it) return -EINVAL; } +enum md_op_flags { + MF_MDC_CANCEL_FID1 = BIT(0), + MF_MDC_CANCEL_FID2 = BIT(1), + MF_MDC_CANCEL_FID3 = BIT(2), + MF_MDC_CANCEL_FID4 = BIT(3), + MF_GET_MDT_IDX = BIT(4), +}; + enum md_cli_flags { CLI_SET_MEA = BIT(0), CLI_RM_ENTRY = BIT(1), @@ -789,8 +720,6 @@ struct md_op_data { __u64 op_valid; loff_t op_attr_blocks; - /* Size-on-MDS epoch and flags. */ - __u64 op_ioepoch; __u32 op_flags; /* Various operation flags. */ @@ -839,15 +768,13 @@ struct obd_ops { int (*iocontrol)(unsigned int cmd, struct obd_export *exp, int len, void *karg, void __user *uarg); int (*get_info)(const struct lu_env *env, struct obd_export *, - __u32 keylen, void *key, __u32 *vallen, void *val, - struct lov_stripe_md *lsm); + __u32 keylen, void *key, __u32 *vallen, void *val); int (*set_info_async)(const struct lu_env *, struct obd_export *, __u32 keylen, void *key, __u32 vallen, void *val, struct ptlrpc_request_set *set); int (*setup)(struct obd_device *dev, struct lustre_cfg *cfg); - int (*precleanup)(struct obd_device *dev, - enum obd_cleanup_stage cleanup_stage); + int (*precleanup)(struct obd_device *dev); int (*cleanup)(struct obd_device *dev); int (*process_config)(struct obd_device *dev, u32 len, void *data); int (*postrecov)(struct obd_device *dev); @@ -887,35 +814,23 @@ struct obd_ops { struct obd_statfs *osfs, __u64 max_age, __u32 flags); int (*statfs_async)(struct obd_export *exp, struct obd_info *oinfo, __u64 max_age, struct ptlrpc_request_set *set); - int (*packmd)(struct obd_export *exp, struct lov_mds_md **disk_tgt, - struct lov_stripe_md *mem_src); - int (*unpackmd)(struct obd_export *exp, - struct lov_stripe_md **mem_tgt, - struct lov_mds_md *disk_src, int disk_len); int (*create)(const struct lu_env *env, struct obd_export *exp, - struct obdo *oa, struct obd_trans_info *oti); + struct obdo *oa); int (*destroy)(const struct lu_env *env, struct obd_export *exp, - struct obdo *oa, struct obd_trans_info *oti); + struct obdo *oa); int (*setattr)(const struct lu_env *, struct obd_export *exp, - struct obd_info *oinfo, struct obd_trans_info *oti); - int (*setattr_async)(struct obd_export *exp, struct obd_info *oinfo, - struct obd_trans_info *oti, - struct ptlrpc_request_set *rqset); + struct obdo *oa); int (*getattr)(const struct lu_env *env, struct obd_export *exp, - struct obd_info *oinfo); - int (*getattr_async)(struct obd_export *exp, struct obd_info *oinfo, - struct ptlrpc_request_set *set); + struct obdo *oa); int (*preprw)(const struct lu_env *env, int cmd, struct obd_export *exp, struct obdo *oa, int objcount, struct obd_ioobj *obj, struct niobuf_remote *remote, - int *nr_pages, struct niobuf_local *local, - struct obd_trans_info *oti); + int *nr_pages, struct niobuf_local *local); int (*commitrw)(const struct lu_env *env, int cmd, struct obd_export *exp, struct obdo *oa, int objcount, struct obd_ioobj *obj, struct niobuf_remote *remote, int pages, - struct niobuf_local *local, - struct obd_trans_info *oti, int rc); + struct niobuf_local *local, int rc); int (*init_export)(struct obd_export *exp); int (*destroy_export)(struct obd_export *exp); @@ -930,8 +845,6 @@ struct obd_ops { struct obd_uuid *(*get_uuid)(struct obd_export *exp); /* quota methods */ - int (*quotacheck)(struct obd_device *, struct obd_export *, - struct obd_quotactl *); int (*quotactl)(struct obd_device *, struct obd_export *, struct obd_quotactl *); @@ -954,7 +867,7 @@ struct obd_ops { /* lmv structures */ struct lustre_md { struct mdt_body *body; - struct lov_stripe_md *lsm; + struct lu_buf layout; struct lmv_stripe_md *lmv; #ifdef CONFIG_FS_POSIX_ACL struct posix_acl *posix_acl; @@ -992,10 +905,8 @@ struct md_ops { int (*create)(struct obd_export *, struct md_op_data *, const void *, size_t, umode_t, uid_t, gid_t, cfs_cap_t, __u64, struct ptlrpc_request **); - int (*done_writing)(struct obd_export *, struct md_op_data *, - struct md_open_data *); int (*enqueue)(struct obd_export *, struct ldlm_enqueue_info *, - const ldlm_policy_data_t *, + const union ldlm_policy_data *, struct lookup_intent *, struct md_op_data *, struct lustre_handle *, __u64); int (*getattr)(struct obd_export *, struct md_op_data *, @@ -1012,8 +923,7 @@ struct md_ops { const char *, size_t, const char *, size_t, struct ptlrpc_request **); int (*setattr)(struct obd_export *, struct md_op_data *, void *, - size_t, void *, size_t, struct ptlrpc_request **, - struct md_open_data **mod); + size_t, struct ptlrpc_request **); int (*sync)(struct obd_export *, const struct lu_fid *, struct ptlrpc_request **); int (*read_page)(struct obd_export *, struct md_op_data *, @@ -1030,7 +940,7 @@ struct md_ops { u64, const char *, const char *, int, int, int, struct ptlrpc_request **); - int (*init_ea_size)(struct obd_export *, u32, u32, u32, u32); + int (*init_ea_size)(struct obd_export *, u32, u32); int (*get_lustre_md)(struct obd_export *, struct ptlrpc_request *, struct obd_export *, struct obd_export *, @@ -1052,11 +962,11 @@ struct md_ops { enum ldlm_mode (*lock_match)(struct obd_export *, __u64, const struct lu_fid *, enum ldlm_type, - ldlm_policy_data_t *, enum ldlm_mode, + union ldlm_policy_data *, enum ldlm_mode, struct lustre_handle *); int (*cancel_unused)(struct obd_export *, const struct lu_fid *, - ldlm_policy_data_t *, enum ldlm_mode, + union ldlm_policy_data *, enum ldlm_mode, enum ldlm_cancel_flags flags, void *opaque); int (*get_fid_from_lsm)(struct obd_export *, @@ -1071,6 +981,8 @@ struct md_ops { int (*revalidate_lock)(struct obd_export *, struct lookup_intent *, struct lu_fid *, __u64 *bits); + int (*unpackmd)(struct obd_export *exp, struct lmv_stripe_md **plsm, + const union lmv_mds_md *lmv, size_t lmv_size); /* * NOTE: If adding ops, add another LPROCFS_MD_OP_INIT() line to * lprocfs_alloc_md_stats() in obdclass/lprocfs_status.c. Also, add a @@ -1078,33 +990,6 @@ struct md_ops { */ }; -struct lsm_operations { - void (*lsm_free)(struct lov_stripe_md *); - void (*lsm_stripe_by_index)(struct lov_stripe_md *, int *, u64 *, - u64 *); - void (*lsm_stripe_by_offset)(struct lov_stripe_md *, int *, u64 *, - u64 *); - int (*lsm_lmm_verify)(struct lov_mds_md *lmm, int lmm_bytes, - __u16 *stripe_count); - int (*lsm_unpackmd)(struct lov_obd *lov, struct lov_stripe_md *lsm, - struct lov_mds_md *lmm); -}; - -extern const struct lsm_operations lsm_v1_ops; -extern const struct lsm_operations lsm_v3_ops; -static inline const struct lsm_operations *lsm_op_find(int magic) -{ - switch (magic) { - case LOV_MAGIC_V1: - return &lsm_v1_ops; - case LOV_MAGIC_V3: - return &lsm_v3_ops; - default: - CERROR("Cannot recognize lsm_magic %08x\n", magic); - return NULL; - } -} - static inline struct md_open_data *obd_mod_alloc(void) { struct md_open_data *mod; diff --git a/drivers/staging/lustre/lustre/include/obd_class.h b/drivers/staging/lustre/lustre/include/obd_class.h index 16094dbec08b..7ec25202cd22 100644 --- a/drivers/staging/lustre/lustre/include/obd_class.h +++ b/drivers/staging/lustre/lustre/include/obd_class.h @@ -100,6 +100,13 @@ int obd_get_request_slot(struct client_obd *cli); void obd_put_request_slot(struct client_obd *cli); __u32 obd_get_max_rpcs_in_flight(struct client_obd *cli); int obd_set_max_rpcs_in_flight(struct client_obd *cli, __u32 max); +int obd_set_max_mod_rpcs_in_flight(struct client_obd *cli, u16 max); +int obd_mod_rpc_stats_seq_show(struct client_obd *cli, struct seq_file *seq); + +u16 obd_get_mod_rpc_slot(struct client_obd *cli, u32 opc, + struct lookup_intent *it); +void obd_put_mod_rpc_slot(struct client_obd *cli, u32 opc, + struct lookup_intent *it, u16 tag); struct llog_handle; struct llog_rec_hdr; @@ -175,10 +182,13 @@ struct lustre_profile { char *lp_profile; char *lp_dt; char *lp_md; + int lp_refs; + bool lp_list_deleted; }; struct lustre_profile *class_get_profile(const char *prof); void class_del_profile(const char *prof); +void class_put_profile(struct lustre_profile *lprof); void class_del_profiles(void); #if LUSTRE_TRACKS_LOCK_EXP_REFS @@ -269,10 +279,8 @@ static inline int lprocfs_climp_check(struct obd_device *obd) struct inode; struct lu_attr; struct obdo; -void obdo_refresh_inode(struct inode *dst, const struct obdo *src, u32 valid); void obdo_to_ioobj(const struct obdo *oa, struct obd_ioobj *ioobj); -void md_from_obdo(struct md_op_data *op_data, const struct obdo *oa, u32 valid); #define OBT(dev) (dev)->obd_type #define OBP(dev, op) (dev)->obd_type->typ_dt_ops->op @@ -417,16 +425,14 @@ static inline int class_devno_max(void) static inline int obd_get_info(const struct lu_env *env, struct obd_export *exp, __u32 keylen, - void *key, __u32 *vallen, void *val, - struct lov_stripe_md *lsm) + void *key, __u32 *vallen, void *val) { int rc; EXP_CHECK_DT_OP(exp, get_info); EXP_COUNTER_INCREMENT(exp, get_info); - rc = OBP(exp->exp_obd, get_info)(env, exp, keylen, key, vallen, val, - lsm); + rc = OBP(exp->exp_obd, get_info)(env, exp, keylen, key, vallen, val); return rc; } @@ -505,8 +511,7 @@ static inline int obd_setup(struct obd_device *obd, struct lustre_cfg *cfg) return rc; } -static inline int obd_precleanup(struct obd_device *obd, - enum obd_cleanup_stage cleanup_stage) +static inline int obd_precleanup(struct obd_device *obd) { int rc; DECLARE_LU_VARS(ldt, d); @@ -517,20 +522,18 @@ static inline int obd_precleanup(struct obd_device *obd, ldt = obd->obd_type->typ_lu; d = obd->obd_lu_dev; if (ldt && d) { - if (cleanup_stage == OBD_CLEANUP_EXPORTS) { - struct lu_env env; + struct lu_env env; - rc = lu_env_init(&env, ldt->ldt_ctx_tags); - if (rc == 0) { - ldt->ldt_ops->ldto_device_fini(&env, d); - lu_env_fini(&env); - } + rc = lu_env_init(&env, ldt->ldt_ctx_tags); + if (!rc) { + ldt->ldt_ops->ldto_device_fini(&env, d); + lu_env_fini(&env); } } OBD_CHECK_DT_OP(obd, precleanup, 0); OBD_COUNTER_INCREMENT(obd, precleanup); - rc = OBP(obd, precleanup)(obd, cleanup_stage); + rc = OBP(obd, precleanup)(obd); return rc; } @@ -612,181 +615,51 @@ obd_process_config(struct obd_device *obd, int datalen, void *data) return rc; } -/* Pack an in-memory MD struct for storage on disk. - * Returns +ve size of packed MD (0 for free), or -ve error. - * - * If @disk_tgt == NULL, MD size is returned (max size if @mem_src == NULL). - * If @*disk_tgt != NULL and @mem_src == NULL, @*disk_tgt will be freed. - * If @*disk_tgt == NULL, it will be allocated - */ -static inline int obd_packmd(struct obd_export *exp, - struct lov_mds_md **disk_tgt, - struct lov_stripe_md *mem_src) -{ - int rc; - - EXP_CHECK_DT_OP(exp, packmd); - EXP_COUNTER_INCREMENT(exp, packmd); - - rc = OBP(exp->exp_obd, packmd)(exp, disk_tgt, mem_src); - return rc; -} - -static inline int obd_size_diskmd(struct obd_export *exp, - struct lov_stripe_md *mem_src) -{ - return obd_packmd(exp, NULL, mem_src); -} - -static inline int obd_free_diskmd(struct obd_export *exp, - struct lov_mds_md **disk_tgt) -{ - LASSERT(disk_tgt); - LASSERT(*disk_tgt); - /* - * LU-2590, for caller's convenience, *disk_tgt could be host - * endianness, it needs swab to LE if necessary, while just - * lov_mds_md header needs it for figuring out how much memory - * needs to be freed. - */ - if ((cpu_to_le32(LOV_MAGIC) != LOV_MAGIC) && - (((*disk_tgt)->lmm_magic == LOV_MAGIC_V1) || - ((*disk_tgt)->lmm_magic == LOV_MAGIC_V3))) - lustre_swab_lov_mds_md(*disk_tgt); - return obd_packmd(exp, disk_tgt, NULL); -} - -/* Unpack an MD struct from disk to in-memory format. - * Returns +ve size of unpacked MD (0 for free), or -ve error. - * - * If @mem_tgt == NULL, MD size is returned (max size if @disk_src == NULL). - * If @*mem_tgt != NULL and @disk_src == NULL, @*mem_tgt will be freed. - * If @*mem_tgt == NULL, it will be allocated - */ -static inline int obd_unpackmd(struct obd_export *exp, - struct lov_stripe_md **mem_tgt, - struct lov_mds_md *disk_src, - int disk_len) -{ - int rc; - - EXP_CHECK_DT_OP(exp, unpackmd); - EXP_COUNTER_INCREMENT(exp, unpackmd); - - rc = OBP(exp->exp_obd, unpackmd)(exp, mem_tgt, disk_src, disk_len); - return rc; -} - -static inline int obd_free_memmd(struct obd_export *exp, - struct lov_stripe_md **mem_tgt) -{ - int rc; - - LASSERT(mem_tgt); - LASSERT(*mem_tgt); - rc = obd_unpackmd(exp, mem_tgt, NULL, 0); - *mem_tgt = NULL; - return rc; -} - static inline int obd_create(const struct lu_env *env, struct obd_export *exp, - struct obdo *obdo, struct obd_trans_info *oti) + struct obdo *obdo) { int rc; EXP_CHECK_DT_OP(exp, create); EXP_COUNTER_INCREMENT(exp, create); - rc = OBP(exp->exp_obd, create)(env, exp, obdo, oti); + rc = OBP(exp->exp_obd, create)(env, exp, obdo); return rc; } static inline int obd_destroy(const struct lu_env *env, struct obd_export *exp, - struct obdo *obdo, struct obd_trans_info *oti) + struct obdo *obdo) { int rc; EXP_CHECK_DT_OP(exp, destroy); EXP_COUNTER_INCREMENT(exp, destroy); - rc = OBP(exp->exp_obd, destroy)(env, exp, obdo, oti); + rc = OBP(exp->exp_obd, destroy)(env, exp, obdo); return rc; } static inline int obd_getattr(const struct lu_env *env, struct obd_export *exp, - struct obd_info *oinfo) + struct obdo *oa) { int rc; EXP_CHECK_DT_OP(exp, getattr); EXP_COUNTER_INCREMENT(exp, getattr); - rc = OBP(exp->exp_obd, getattr)(env, exp, oinfo); - return rc; -} - -static inline int obd_getattr_async(struct obd_export *exp, - struct obd_info *oinfo, - struct ptlrpc_request_set *set) -{ - int rc; - - EXP_CHECK_DT_OP(exp, getattr_async); - EXP_COUNTER_INCREMENT(exp, getattr_async); - - rc = OBP(exp->exp_obd, getattr_async)(exp, oinfo, set); + rc = OBP(exp->exp_obd, getattr)(env, exp, oa); return rc; } static inline int obd_setattr(const struct lu_env *env, struct obd_export *exp, - struct obd_info *oinfo, - struct obd_trans_info *oti) + struct obdo *oa) { int rc; EXP_CHECK_DT_OP(exp, setattr); EXP_COUNTER_INCREMENT(exp, setattr); - rc = OBP(exp->exp_obd, setattr)(env, exp, oinfo, oti); - return rc; -} - -/* This performs all the requests set init/wait/destroy actions. */ -static inline int obd_setattr_rqset(struct obd_export *exp, - struct obd_info *oinfo, - struct obd_trans_info *oti) -{ - struct ptlrpc_request_set *set = NULL; - int rc; - - EXP_CHECK_DT_OP(exp, setattr_async); - EXP_COUNTER_INCREMENT(exp, setattr_async); - - set = ptlrpc_prep_set(); - if (!set) - return -ENOMEM; - - rc = OBP(exp->exp_obd, setattr_async)(exp, oinfo, oti, set); - if (rc == 0) - rc = ptlrpc_set_wait(set); - ptlrpc_set_destroy(set); - return rc; -} - -/* This adds all the requests into @set if @set != NULL, otherwise - * all requests are sent asynchronously without waiting for response. - */ -static inline int obd_setattr_async(struct obd_export *exp, - struct obd_info *oinfo, - struct obd_trans_info *oti, - struct ptlrpc_request_set *set) -{ - int rc; - - EXP_CHECK_DT_OP(exp, setattr_async); - EXP_COUNTER_INCREMENT(exp, setattr_async); - - rc = OBP(exp->exp_obd, setattr_async)(exp, oinfo, oti, set); + rc = OBP(exp->exp_obd, setattr)(env, exp, oa); return rc; } @@ -1053,15 +926,16 @@ static inline int obd_statfs_rqset(struct obd_export *exp, __u32 flags) { struct ptlrpc_request_set *set = NULL; - struct obd_info oinfo = { }; + struct obd_info oinfo = { + .oi_osfs = osfs, + .oi_flags = flags, + }; int rc = 0; - set = ptlrpc_prep_set(); + set = ptlrpc_prep_set(); if (!set) return -ENOMEM; - oinfo.oi_osfs = osfs; - oinfo.oi_flags = flags; rc = obd_statfs_async(exp, &oinfo, max_age, set); if (rc == 0) rc = ptlrpc_set_wait(set); @@ -1112,8 +986,7 @@ static inline int obd_preprw(const struct lu_env *env, int cmd, struct obd_export *exp, struct obdo *oa, int objcount, struct obd_ioobj *obj, struct niobuf_remote *remote, int *pages, - struct niobuf_local *local, - struct obd_trans_info *oti) + struct niobuf_local *local) { int rc; @@ -1121,7 +994,7 @@ static inline int obd_preprw(const struct lu_env *env, int cmd, EXP_COUNTER_INCREMENT(exp, preprw); rc = OBP(exp->exp_obd, preprw)(env, cmd, exp, oa, objcount, obj, remote, - pages, local, oti); + pages, local); return rc; } @@ -1129,14 +1002,13 @@ static inline int obd_commitrw(const struct lu_env *env, int cmd, struct obd_export *exp, struct obdo *oa, int objcount, struct obd_ioobj *obj, struct niobuf_remote *rnb, int pages, - struct niobuf_local *local, - struct obd_trans_info *oti, int rc) + struct niobuf_local *local, int rc) { EXP_CHECK_DT_OP(exp, commitrw); EXP_COUNTER_INCREMENT(exp, commitrw); rc = OBP(exp->exp_obd, commitrw)(env, cmd, exp, oa, objcount, obj, - rnb, pages, local, oti, rc); + rnb, pages, local, rc); return rc; } @@ -1219,18 +1091,6 @@ static inline int obd_notify_observer(struct obd_device *observer, return rc1 ? rc1 : rc2; } -static inline int obd_quotacheck(struct obd_export *exp, - struct obd_quotactl *oqctl) -{ - int rc; - - EXP_CHECK_DT_OP(exp, quotacheck); - EXP_COUNTER_INCREMENT(exp, quotacheck); - - rc = OBP(exp->exp_obd, quotacheck)(exp->exp_obd, exp, oqctl); - return rc; -} - static inline int obd_quotactl(struct obd_export *exp, struct obd_quotactl *oqctl) { @@ -1346,21 +1206,9 @@ static inline int md_create(struct obd_export *exp, struct md_op_data *op_data, return rc; } -static inline int md_done_writing(struct obd_export *exp, - struct md_op_data *op_data, - struct md_open_data *mod) -{ - int rc; - - EXP_CHECK_MD_OP(exp, done_writing); - EXP_MD_COUNTER_INCREMENT(exp, done_writing); - rc = MDP(exp->exp_obd, done_writing)(exp, op_data, mod); - return rc; -} - static inline int md_enqueue(struct obd_export *exp, struct ldlm_enqueue_info *einfo, - const ldlm_policy_data_t *policy, + const union ldlm_policy_data *policy, struct lookup_intent *it, struct md_op_data *op_data, struct lustre_handle *lockh, @@ -1428,16 +1276,14 @@ static inline int md_rename(struct obd_export *exp, struct md_op_data *op_data, } static inline int md_setattr(struct obd_export *exp, struct md_op_data *op_data, - void *ea, size_t ealen, void *ea2, size_t ea2len, - struct ptlrpc_request **request, - struct md_open_data **mod) + void *ea, size_t ealen, + struct ptlrpc_request **request) { int rc; EXP_CHECK_MD_OP(exp, setattr); EXP_MD_COUNTER_INCREMENT(exp, setattr); - rc = MDP(exp->exp_obd, setattr)(exp, op_data, ea, ealen, - ea2, ea2len, request, mod); + rc = MDP(exp->exp_obd, setattr)(exp, op_data, ea, ealen, request); return rc; } @@ -1561,7 +1407,7 @@ static inline int md_set_lock_data(struct obd_export *exp, static inline int md_cancel_unused(struct obd_export *exp, const struct lu_fid *fid, - ldlm_policy_data_t *policy, + union ldlm_policy_data *policy, enum ldlm_mode mode, enum ldlm_cancel_flags flags, void *opaque) @@ -1579,7 +1425,7 @@ static inline int md_cancel_unused(struct obd_export *exp, static inline enum ldlm_mode md_lock_match(struct obd_export *exp, __u64 flags, const struct lu_fid *fid, enum ldlm_type type, - ldlm_policy_data_t *policy, + union ldlm_policy_data *policy, enum ldlm_mode mode, struct lustre_handle *lockh) { @@ -1589,14 +1435,12 @@ static inline enum ldlm_mode md_lock_match(struct obd_export *exp, __u64 flags, policy, mode, lockh); } -static inline int md_init_ea_size(struct obd_export *exp, int easize, - int def_asize, int cookiesize, - int def_cookiesize) +static inline int md_init_ea_size(struct obd_export *exp, u32 easize, + u32 def_asize) { EXP_CHECK_MD_OP(exp, init_ea_size); EXP_MD_COUNTER_INCREMENT(exp, init_ea_size); - return MDP(exp->exp_obd, init_ea_size)(exp, easize, def_asize, - cookiesize, def_cookiesize); + return MDP(exp->exp_obd, init_ea_size)(exp, easize, def_asize); } static inline int md_intent_getattr_async(struct obd_export *exp, @@ -1636,6 +1480,24 @@ static inline int md_get_fid_from_lsm(struct obd_export *exp, return rc; } +/* Unpack an MD struct from disk to in-memory format. + * Returns +ve size of unpacked MD (0 for free), or -ve error. + * + * If *plsm != NULL and lmm == NULL then *lsm will be freed. + * If *plsm == NULL then it will be allocated. + */ +static inline int md_unpackmd(struct obd_export *exp, + struct lmv_stripe_md **plsm, + const union lmv_mds_md *lmm, size_t lmm_size) +{ + int rc; + + EXP_CHECK_MD_OP(exp, unpackmd); + EXP_MD_COUNTER_INCREMENT(exp, unpackmd); + rc = MDP(exp->exp_obd, unpackmd)(exp, plsm, lmm, lmm_size); + return rc; +} + /* OBD Metadata Support */ int obd_init_caches(void); diff --git a/drivers/staging/lustre/lustre/include/obd_support.h b/drivers/staging/lustre/lustre/include/obd_support.h index b346a7f10aa4..aaedec7d793c 100644 --- a/drivers/staging/lustre/lustre/include/obd_support.h +++ b/drivers/staging/lustre/lustre/include/obd_support.h @@ -172,14 +172,14 @@ extern char obd_jobid_var[]; #define OBD_FAIL_MDS_ALL_REQUEST_NET 0x123 #define OBD_FAIL_MDS_SYNC_NET 0x124 #define OBD_FAIL_MDS_SYNC_PACK 0x125 -#define OBD_FAIL_MDS_DONE_WRITING_NET 0x126 -#define OBD_FAIL_MDS_DONE_WRITING_PACK 0x127 +/* OBD_FAIL_MDS_DONE_WRITING_NET 0x126 obsolete since 2.8.0 */ +/* OBD_FAIL_MDS_DONE_WRITING_PACK 0x127 obsolete since 2.8.0 */ #define OBD_FAIL_MDS_ALLOC_OBDO 0x128 #define OBD_FAIL_MDS_PAUSE_OPEN 0x129 #define OBD_FAIL_MDS_STATFS_LCW_SLEEP 0x12a #define OBD_FAIL_MDS_OPEN_CREATE 0x12b #define OBD_FAIL_MDS_OST_SETATTR 0x12c -#define OBD_FAIL_MDS_QUOTACHECK_NET 0x12d +/* OBD_FAIL_MDS_QUOTACHECK_NET 0x12d obsolete since 2.4 */ #define OBD_FAIL_MDS_QUOTACTL_NET 0x12e #define OBD_FAIL_MDS_CLIENT_ADD 0x12f #define OBD_FAIL_MDS_GETXATTR_NET 0x130 @@ -264,7 +264,7 @@ extern char obd_jobid_var[]; #define OBD_FAIL_OST_ENOSPC 0x215 #define OBD_FAIL_OST_EROFS 0x216 #define OBD_FAIL_OST_ENOENT 0x217 -#define OBD_FAIL_OST_QUOTACHECK_NET 0x218 +/* OBD_FAIL_OST_QUOTACHECK_NET 0x218 obsolete since 2.4 */ #define OBD_FAIL_OST_QUOTACTL_NET 0x219 #define OBD_FAIL_OST_CHECKSUM_RECEIVE 0x21a #define OBD_FAIL_OST_CHECKSUM_SEND 0x21b @@ -321,6 +321,8 @@ extern char obd_jobid_var[]; #define OBD_FAIL_LDLM_CP_CB_WAIT4 0x322 #define OBD_FAIL_LDLM_CP_CB_WAIT5 0x323 +#define OBD_FAIL_LDLM_GRANT_CHECK 0x32a + /* LOCKLESS IO */ #define OBD_FAIL_LDLM_SET_CONTENTION 0x385 @@ -343,6 +345,7 @@ extern char obd_jobid_var[]; #define OBD_FAIL_OSC_CP_ENQ_RACE 0x410 #define OBD_FAIL_OSC_NO_GRANT 0x411 #define OBD_FAIL_OSC_DELAY_SETTIME 0x412 +#define OBD_FAIL_OSC_DELAY_IO 0x414 #define OBD_FAIL_PTLRPC 0x500 #define OBD_FAIL_PTLRPC_ACK 0x501 @@ -373,7 +376,7 @@ extern char obd_jobid_var[]; #define OBD_FAIL_OBD_PING_NET 0x600 #define OBD_FAIL_OBD_LOG_CANCEL_NET 0x601 #define OBD_FAIL_OBD_LOGD_NET 0x602 -#define OBD_FAIL_OBD_QC_CALLBACK_NET 0x603 +/* OBD_FAIL_OBD_QC_CALLBACK_NET 0x603 obsolete since 2.4 */ #define OBD_FAIL_OBD_DQACQ 0x604 #define OBD_FAIL_OBD_LLOG_SETUP 0x605 #define OBD_FAIL_OBD_LOG_CANCEL_REP 0x606 @@ -458,6 +461,8 @@ extern char obd_jobid_var[]; #define OBD_FAIL_LOV_INIT 0x1403 #define OBD_FAIL_GLIMPSE_DELAY 0x1404 #define OBD_FAIL_LLITE_XATTR_ENOMEM 0x1405 +#define OBD_FAIL_MAKE_LOVEA_HOLE 0x1406 +#define OBD_FAIL_LLITE_LOST_LAYOUT 0x1407 #define OBD_FAIL_GETATTR_DELAY 0x1409 #define OBD_FAIL_FID_INDIR 0x1501 diff --git a/drivers/staging/lustre/lustre/include/seq_range.h b/drivers/staging/lustre/lustre/include/seq_range.h new file mode 100644 index 000000000000..30c4dd66d5c4 --- /dev/null +++ b/drivers/staging/lustre/lustre/include/seq_range.h @@ -0,0 +1,199 @@ +/* + * GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see + * http://www.gnu.org/licenses/gpl-2.0.html + * + * GPL HEADER END + */ +/* + * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. + * Use is subject to license terms. + * + * Copyright (c) 2011, 2014, Intel Corporation. + * + * Copyright 2015 Cray Inc, all rights reserved. + * Author: Ben Evans. + * + * Define lu_seq_range associated functions + */ + +#ifndef _SEQ_RANGE_H_ +#define _SEQ_RANGE_H_ + +#include "lustre/lustre_idl.h" + +/** + * computes the sequence range type \a range + */ + +static inline unsigned int fld_range_type(const struct lu_seq_range *range) +{ + return range->lsr_flags & LU_SEQ_RANGE_MASK; +} + +/** + * Is this sequence range an OST? \a range + */ + +static inline bool fld_range_is_ost(const struct lu_seq_range *range) +{ + return fld_range_type(range) == LU_SEQ_RANGE_OST; +} + +/** + * Is this sequence range an MDT? \a range + */ + +static inline bool fld_range_is_mdt(const struct lu_seq_range *range) +{ + return fld_range_type(range) == LU_SEQ_RANGE_MDT; +} + +/** + * ANY range is only used when the fld client sends a fld query request, + * but it does not know whether the seq is an MDT or OST, so it will send the + * request with ANY type, which means any seq type from the lookup can be + * expected. /a range + */ +static inline unsigned int fld_range_is_any(const struct lu_seq_range *range) +{ + return fld_range_type(range) == LU_SEQ_RANGE_ANY; +} + +/** + * Apply flags to range \a range \a flags + */ + +static inline void fld_range_set_type(struct lu_seq_range *range, + unsigned int flags) +{ + range->lsr_flags |= flags; +} + +/** + * Add MDT to range type \a range + */ + +static inline void fld_range_set_mdt(struct lu_seq_range *range) +{ + fld_range_set_type(range, LU_SEQ_RANGE_MDT); +} + +/** + * Add OST to range type \a range + */ + +static inline void fld_range_set_ost(struct lu_seq_range *range) +{ + fld_range_set_type(range, LU_SEQ_RANGE_OST); +} + +/** + * Add ANY to range type \a range + */ + +static inline void fld_range_set_any(struct lu_seq_range *range) +{ + fld_range_set_type(range, LU_SEQ_RANGE_ANY); +} + +/** + * computes width of given sequence range \a range + */ + +static inline u64 lu_seq_range_space(const struct lu_seq_range *range) +{ + return range->lsr_end - range->lsr_start; +} + +/** + * initialize range to zero \a range + */ + +static inline void lu_seq_range_init(struct lu_seq_range *range) +{ + memset(range, 0, sizeof(*range)); +} + +/** + * check if given seq id \a s is within given range \a range + */ + +static inline bool lu_seq_range_within(const struct lu_seq_range *range, + u64 seq) +{ + return seq >= range->lsr_start && seq < range->lsr_end; +} + +/** + * Is the range sane? Is the end after the beginning? \a range + */ + +static inline bool lu_seq_range_is_sane(const struct lu_seq_range *range) +{ + return range->lsr_end >= range->lsr_start; +} + +/** + * Is the range 0? \a range + */ + +static inline bool lu_seq_range_is_zero(const struct lu_seq_range *range) +{ + return range->lsr_start == 0 && range->lsr_end == 0; +} + +/** + * Is the range out of space? \a range + */ + +static inline bool lu_seq_range_is_exhausted(const struct lu_seq_range *range) +{ + return lu_seq_range_space(range) == 0; +} + +/** + * return 0 if two ranges have the same location, nonzero if they are + * different \a r1 \a r2 + */ + +static inline int lu_seq_range_compare_loc(const struct lu_seq_range *r1, + const struct lu_seq_range *r2) +{ + return r1->lsr_index != r2->lsr_index || + r1->lsr_flags != r2->lsr_flags; +} + +#if !defined(__REQ_LAYOUT_USER__) +/** + * byte swap range structure \a range + */ + +void lustre_swab_lu_seq_range(struct lu_seq_range *range); +#endif +/** + * printf string and argument list for sequence range + */ +#define DRANGE "[%#16.16llx-%#16.16llx]:%x:%s" + +#define PRANGE(range) \ + (range)->lsr_start, \ + (range)->lsr_end, \ + (range)->lsr_index, \ + fld_range_is_mdt(range) ? "mdt" : "ost" + +#endif |