diff options
Diffstat (limited to 'drivers/staging/lustre/lustre/include')
40 files changed, 0 insertions, 18265 deletions
diff --git a/drivers/staging/lustre/lustre/include/cl_object.h b/drivers/staging/lustre/lustre/include/cl_object.h deleted file mode 100644 index 341a145c3331..000000000000 --- a/drivers/staging/lustre/lustre/include/cl_object.h +++ /dev/null @@ -1,2463 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2011, 2015, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - */ -#ifndef _LUSTRE_CL_OBJECT_H -#define _LUSTRE_CL_OBJECT_H - -/** \defgroup clio clio - * - * Client objects implement io operations and cache pages. - * - * Examples: lov and osc are implementations of cl interface. - * - * Big Theory Statement. - * - * Layered objects. - * - * Client implementation is based on the following data-types: - * - * - cl_object - * - * - cl_page - * - * - cl_lock represents an extent lock on an object. - * - * - cl_io represents high-level i/o activity such as whole read/write - * system call, or write-out of pages from under the lock being - * canceled. cl_io has sub-ios that can be stopped and resumed - * independently, thus achieving high degree of transfer - * parallelism. Single cl_io can be advanced forward by - * the multiple threads (although in the most usual case of - * read/write system call it is associated with the single user - * thread, that issued the system call). - * - * Terminology - * - * - to avoid confusion high-level I/O operation like read or write system - * call is referred to as "an io", whereas low-level I/O operation, like - * RPC, is referred to as "a transfer" - * - * - "generic code" means generic (not file system specific) code in the - * hosting environment. "cl-code" means code (mostly in cl_*.c files) that - * is not layer specific. - * - * Locking. - * - * - i_mutex - * - PG_locked - * - cl_object_header::coh_page_guard - * - lu_site::ls_guard - * - * See the top comment in cl_object.c for the description of overall locking and - * reference-counting design. - * - * See comments below for the description of i/o, page, and dlm-locking - * design. - * - * @{ - */ - -/* - * super-class definitions. - */ -#include <lu_object.h> -#include <lustre_compat.h> -#include <linux/atomic.h> -#include <linux/mutex.h> -#include <linux/radix-tree.h> -#include <linux/spinlock.h> -#include <linux/wait.h> - -struct inode; - -struct cl_device; - -struct cl_object; - -struct cl_page; -struct cl_page_slice; -struct cl_lock; -struct cl_lock_slice; - -struct cl_lock_operations; -struct cl_page_operations; - -struct cl_io; -struct cl_io_slice; - -struct cl_req_attr; - -/** - * Device in the client stack. - * - * \see vvp_device, lov_device, lovsub_device, osc_device - */ -struct cl_device { - /** Super-class. */ - struct lu_device cd_lu_dev; -}; - -/** \addtogroup cl_object cl_object - * @{ - */ -/** - * "Data attributes" of cl_object. Data attributes can be updated - * independently for a sub-object, and top-object's attributes are calculated - * from sub-objects' ones. - */ -struct cl_attr { - /** Object size, in bytes */ - loff_t cat_size; - /** - * Known minimal size, in bytes. - * - * This is only valid when at least one DLM lock is held. - */ - loff_t cat_kms; - /** Modification time. Measured in seconds since epoch. */ - time64_t cat_mtime; - /** Access time. Measured in seconds since epoch. */ - time64_t cat_atime; - /** Change time. Measured in seconds since epoch. */ - time64_t cat_ctime; - /** - * Blocks allocated to this cl_object on the server file system. - * - * \todo XXX An interface for block size is needed. - */ - __u64 cat_blocks; - /** - * User identifier for quota purposes. - */ - uid_t cat_uid; - /** - * Group identifier for quota purposes. - */ - gid_t cat_gid; - - /* nlink of the directory */ - __u64 cat_nlink; -}; - -/** - * Fields in cl_attr that are being set. - */ -enum cl_attr_valid { - CAT_SIZE = 1 << 0, - CAT_KMS = 1 << 1, - CAT_MTIME = 1 << 3, - CAT_ATIME = 1 << 4, - CAT_CTIME = 1 << 5, - CAT_BLOCKS = 1 << 6, - CAT_UID = 1 << 7, - CAT_GID = 1 << 8 -}; - -/** - * Sub-class of lu_object with methods common for objects on the client - * stacks. - * - * cl_object: represents a regular file system object, both a file and a - * stripe. cl_object is based on lu_object: it is identified by a fid, - * layered, cached, hashed, and lrued. Important distinction with the server - * side, where md_object and dt_object are used, is that cl_object "fans out" - * at the lov/sns level: depending on the file layout, single file is - * represented as a set of "sub-objects" (stripes). At the implementation - * level, struct lov_object contains an array of cl_objects. Each sub-object - * is a full-fledged cl_object, having its fid, living in the lru and hash - * table. - * - * This leads to the next important difference with the server side: on the - * client, it's quite usual to have objects with the different sequence of - * layers. For example, typical top-object is composed of the following - * layers: - * - * - vvp - * - lov - * - * whereas its sub-objects are composed of - * - * - lovsub - * - osc - * - * layers. Here "lovsub" is a mostly dummy layer, whose purpose is to keep - * track of the object-subobject relationship. - * - * Sub-objects are not cached independently: when top-object is about to - * be discarded from the memory, all its sub-objects are torn-down and - * destroyed too. - * - * \see vvp_object, lov_object, lovsub_object, osc_object - */ -struct cl_object { - /** super class */ - struct lu_object co_lu; - /** per-object-layer operations */ - const struct cl_object_operations *co_ops; - /** offset of page slice in cl_page buffer */ - int co_slice_off; -}; - -/** - * Description of the client object configuration. This is used for the - * creation of a new client object that is identified by a more state than - * fid. - */ -struct cl_object_conf { - /** Super-class. */ - struct lu_object_conf coc_lu; - union { - /** - * Object layout. This is consumed by lov. - */ - struct lu_buf coc_layout; - /** - * Description of particular stripe location in the - * cluster. This is consumed by osc. - */ - struct lov_oinfo *coc_oinfo; - } u; - /** - * VFS inode. This is consumed by vvp. - */ - struct inode *coc_inode; - /** - * Layout lock handle. - */ - struct ldlm_lock *coc_lock; - /** - * Operation to handle layout, OBJECT_CONF_XYZ. - */ - int coc_opc; -}; - -enum { - /** configure layout, set up a new stripe, must be called while - * holding layout lock. - */ - OBJECT_CONF_SET = 0, - /** invalidate the current stripe configuration due to losing - * layout lock. - */ - OBJECT_CONF_INVALIDATE = 1, - /** wait for old layout to go away so that new layout can be set up. */ - OBJECT_CONF_WAIT = 2 -}; - -enum { - CL_LAYOUT_GEN_NONE = (u32)-2, /* layout lock was cancelled */ - CL_LAYOUT_GEN_EMPTY = (u32)-1, /* for empty layout */ -}; - -struct cl_layout { - /** the buffer to return the layout in lov_mds_md format. */ - struct lu_buf cl_buf; - /** size of layout in lov_mds_md format. */ - size_t cl_size; - /** Layout generation. */ - u32 cl_layout_gen; -}; - -/** - * Operations implemented for each cl object layer. - * - * \see vvp_ops, lov_ops, lovsub_ops, osc_ops - */ -struct cl_object_operations { - /** - * Initialize page slice for this layer. Called top-to-bottom through - * every object layer when a new cl_page is instantiated. Layer - * keeping private per-page data, or requiring its own page operations - * vector should allocate these data here, and attach then to the page - * by calling cl_page_slice_add(). \a vmpage is locked (in the VM - * sense). Optional. - * - * \retval NULL success. - * - * \retval ERR_PTR(errno) failure code. - * - * \retval valid-pointer pointer to already existing referenced page - * to be used instead of newly created. - */ - int (*coo_page_init)(const struct lu_env *env, struct cl_object *obj, - struct cl_page *page, pgoff_t index); - /** - * Initialize lock slice for this layer. Called top-to-bottom through - * every object layer when a new cl_lock is instantiated. Layer - * keeping private per-lock data, or requiring its own lock operations - * vector should allocate these data here, and attach then to the lock - * by calling cl_lock_slice_add(). Mandatory. - */ - int (*coo_lock_init)(const struct lu_env *env, - struct cl_object *obj, struct cl_lock *lock, - const struct cl_io *io); - /** - * Initialize io state for a given layer. - * - * called top-to-bottom once per io existence to initialize io - * state. If layer wants to keep some state for this type of io, it - * has to embed struct cl_io_slice in lu_env::le_ses, and register - * slice with cl_io_slice_add(). It is guaranteed that all threads - * participating in this io share the same session. - */ - int (*coo_io_init)(const struct lu_env *env, - struct cl_object *obj, struct cl_io *io); - /** - * Fill portion of \a attr that this layer controls. This method is - * called top-to-bottom through all object layers. - * - * \pre cl_object_header::coh_attr_guard of the top-object is locked. - * - * \return 0: to continue - * \return +ve: to stop iterating through layers (but 0 is returned - * from enclosing cl_object_attr_get()) - * \return -ve: to signal error - */ - int (*coo_attr_get)(const struct lu_env *env, struct cl_object *obj, - struct cl_attr *attr); - /** - * Update attributes. - * - * \a valid is a bitmask composed from enum #cl_attr_valid, and - * indicating what attributes are to be set. - * - * \pre cl_object_header::coh_attr_guard of the top-object is locked. - * - * \return the same convention as for - * cl_object_operations::coo_attr_get() is used. - */ - int (*coo_attr_update)(const struct lu_env *env, struct cl_object *obj, - const struct cl_attr *attr, unsigned int valid); - /** - * Update object configuration. Called top-to-bottom to modify object - * configuration. - * - * XXX error conditions and handling. - */ - int (*coo_conf_set)(const struct lu_env *env, struct cl_object *obj, - const struct cl_object_conf *conf); - /** - * Glimpse ast. Executed when glimpse ast arrives for a lock on this - * object. Layers are supposed to fill parts of \a lvb that will be - * shipped to the glimpse originator as a glimpse result. - * - * \see vvp_object_glimpse(), lovsub_object_glimpse(), - * \see osc_object_glimpse() - */ - int (*coo_glimpse)(const struct lu_env *env, - const struct cl_object *obj, struct ost_lvb *lvb); - /** - * Object prune method. Called when the layout is going to change on - * this object, therefore each layer has to clean up their cache, - * mainly pages and locks. - */ - int (*coo_prune)(const struct lu_env *env, struct cl_object *obj); - /** - * Object getstripe method. - */ - int (*coo_getstripe)(const struct lu_env *env, struct cl_object *obj, - struct lov_user_md __user *lum); - /** - * Get FIEMAP mapping from the object. - */ - int (*coo_fiemap)(const struct lu_env *env, struct cl_object *obj, - struct ll_fiemap_info_key *fmkey, - struct fiemap *fiemap, size_t *buflen); - /** - * Get layout and generation of the object. - */ - int (*coo_layout_get)(const struct lu_env *env, struct cl_object *obj, - struct cl_layout *layout); - /** - * Get maximum size of the object. - */ - loff_t (*coo_maxbytes)(struct cl_object *obj); - /** - * Set request attributes. - */ - void (*coo_req_attr_set)(const struct lu_env *env, - struct cl_object *obj, - struct cl_req_attr *attr); -}; - -/** - * Extended header for client object. - */ -struct cl_object_header { - /** Standard lu_object_header. cl_object::co_lu::lo_header points - * here. - */ - struct lu_object_header coh_lu; - - /** - * Parent object. It is assumed that an object has a well-defined - * parent, but not a well-defined child (there may be multiple - * sub-objects, for the same top-object). cl_object_header::coh_parent - * field allows certain code to be written generically, without - * limiting possible cl_object layouts unduly. - */ - struct cl_object_header *coh_parent; - /** - * Protects consistency between cl_attr of parent object and - * attributes of sub-objects, that the former is calculated ("merged") - * from. - * - * \todo XXX this can be read/write lock if needed. - */ - spinlock_t coh_attr_guard; - /** - * Size of cl_page + page slices - */ - unsigned short coh_page_bufsize; - /** - * Number of objects above this one: 0 for a top-object, 1 for its - * sub-object, etc. - */ - unsigned char coh_nesting; -}; - -/** - * Helper macro: iterate over all layers of the object \a obj, assigning every - * layer top-to-bottom to \a slice. - */ -#define cl_object_for_each(slice, obj) \ - list_for_each_entry((slice), \ - &(obj)->co_lu.lo_header->loh_layers, \ - co_lu.lo_linkage) -/** - * Helper macro: iterate over all layers of the object \a obj, assigning every - * layer bottom-to-top to \a slice. - */ -#define cl_object_for_each_reverse(slice, obj) \ - list_for_each_entry_reverse((slice), \ - &(obj)->co_lu.lo_header->loh_layers, \ - co_lu.lo_linkage) -/** @} cl_object */ - -#define CL_PAGE_EOF ((pgoff_t)~0ull) - -/** \addtogroup cl_page cl_page - * @{ - */ - -/** \struct cl_page - * Layered client page. - * - * cl_page: represents a portion of a file, cached in the memory. All pages - * of the given file are of the same size, and are kept in the radix tree - * hanging off the cl_object. cl_page doesn't fan out, but as sub-objects - * of the top-level file object are first class cl_objects, they have their - * own radix trees of pages and hence page is implemented as a sequence of - * struct cl_pages's, linked into double-linked list through - * cl_page::cp_parent and cl_page::cp_child pointers, each residing in the - * corresponding radix tree at the corresponding logical offset. - * - * cl_page is associated with VM page of the hosting environment (struct - * page in Linux kernel, for example), struct page. It is assumed, that this - * association is implemented by one of cl_page layers (top layer in the - * current design) that - * - * - intercepts per-VM-page call-backs made by the environment (e.g., - * memory pressure), - * - * - translates state (page flag bits) and locking between lustre and - * environment. - * - * The association between cl_page and struct page is immutable and - * established when cl_page is created. - * - * cl_page can be "owned" by a particular cl_io (see below), guaranteeing - * this io an exclusive access to this page w.r.t. other io attempts and - * various events changing page state (such as transfer completion, or - * eviction of the page from the memory). Note, that in general cl_io - * cannot be identified with a particular thread, and page ownership is not - * exactly equal to the current thread holding a lock on the page. Layer - * implementing association between cl_page and struct page has to implement - * ownership on top of available synchronization mechanisms. - * - * While lustre client maintains the notion of an page ownership by io, - * hosting MM/VM usually has its own page concurrency control - * mechanisms. For example, in Linux, page access is synchronized by the - * per-page PG_locked bit-lock, and generic kernel code (generic_file_*()) - * takes care to acquire and release such locks as necessary around the - * calls to the file system methods (->readpage(), ->prepare_write(), - * ->commit_write(), etc.). This leads to the situation when there are two - * different ways to own a page in the client: - * - * - client code explicitly and voluntary owns the page (cl_page_own()); - * - * - VM locks a page and then calls the client, that has "to assume" - * the ownership from the VM (cl_page_assume()). - * - * Dual methods to release ownership are cl_page_disown() and - * cl_page_unassume(). - * - * cl_page is reference counted (cl_page::cp_ref). When reference counter - * drops to 0, the page is returned to the cache, unless it is in - * cl_page_state::CPS_FREEING state, in which case it is immediately - * destroyed. - * - * The general logic guaranteeing the absence of "existential races" for - * pages is the following: - * - * - there are fixed known ways for a thread to obtain a new reference - * to a page: - * - * - by doing a lookup in the cl_object radix tree, protected by the - * spin-lock; - * - * - by starting from VM-locked struct page and following some - * hosting environment method (e.g., following ->private pointer in - * the case of Linux kernel), see cl_vmpage_page(); - * - * - when the page enters cl_page_state::CPS_FREEING state, all these - * ways are severed with the proper synchronization - * (cl_page_delete()); - * - * - entry into cl_page_state::CPS_FREEING is serialized by the VM page - * lock; - * - * - no new references to the page in cl_page_state::CPS_FREEING state - * are allowed (checked in cl_page_get()). - * - * Together this guarantees that when last reference to a - * cl_page_state::CPS_FREEING page is released, it is safe to destroy the - * page, as neither references to it can be acquired at that point, nor - * ones exist. - * - * cl_page is a state machine. States are enumerated in enum - * cl_page_state. Possible state transitions are enumerated in - * cl_page_state_set(). State transition process (i.e., actual changing of - * cl_page::cp_state field) is protected by the lock on the underlying VM - * page. - * - * Linux Kernel implementation. - * - * Binding between cl_page and struct page (which is a typedef for - * struct page) is implemented in the vvp layer. cl_page is attached to the - * ->private pointer of the struct page, together with the setting of - * PG_private bit in page->flags, and acquiring additional reference on the - * struct page (much like struct buffer_head, or any similar file system - * private data structures). - * - * PG_locked lock is used to implement both ownership and transfer - * synchronization, that is, page is VM-locked in CPS_{OWNED,PAGE{IN,OUT}} - * states. No additional references are acquired for the duration of the - * transfer. - * - * \warning *THIS IS NOT* the behavior expected by the Linux kernel, where - * write-out is "protected" by the special PG_writeback bit. - */ - -/** - * States of cl_page. cl_page.c assumes particular order here. - * - * The page state machine is rather crude, as it doesn't recognize finer page - * states like "dirty" or "up to date". This is because such states are not - * always well defined for the whole stack (see, for example, the - * implementation of the read-ahead, that hides page up-to-dateness to track - * cache hits accurately). Such sub-states are maintained by the layers that - * are interested in them. - */ -enum cl_page_state { - /** - * Page is in the cache, un-owned. Page leaves cached state in the - * following cases: - * - * - [cl_page_state::CPS_OWNED] io comes across the page and - * owns it; - * - * - [cl_page_state::CPS_PAGEOUT] page is dirty, the - * req-formation engine decides that it wants to include this page - * into an RPC being constructed, and yanks it from the cache; - * - * - [cl_page_state::CPS_FREEING] VM callback is executed to - * evict the page form the memory; - * - * \invariant cl_page::cp_owner == NULL && cl_page::cp_req == NULL - */ - CPS_CACHED, - /** - * Page is exclusively owned by some cl_io. Page may end up in this - * state as a result of - * - * - io creating new page and immediately owning it; - * - * - [cl_page_state::CPS_CACHED] io finding existing cached page - * and owning it; - * - * - [cl_page_state::CPS_OWNED] io finding existing owned page - * and waiting for owner to release the page; - * - * Page leaves owned state in the following cases: - * - * - [cl_page_state::CPS_CACHED] io decides to leave the page in - * the cache, doing nothing; - * - * - [cl_page_state::CPS_PAGEIN] io starts read transfer for - * this page; - * - * - [cl_page_state::CPS_PAGEOUT] io starts immediate write - * transfer for this page; - * - * - [cl_page_state::CPS_FREEING] io decides to destroy this - * page (e.g., as part of truncate or extent lock cancellation). - * - * \invariant cl_page::cp_owner != NULL && cl_page::cp_req == NULL - */ - CPS_OWNED, - /** - * Page is being written out, as a part of a transfer. This state is - * entered when req-formation logic decided that it wants this page to - * be sent through the wire _now_. Specifically, it means that once - * this state is achieved, transfer completion handler (with either - * success or failure indication) is guaranteed to be executed against - * this page independently of any locks and any scheduling decisions - * made by the hosting environment (that effectively means that the - * page is never put into cl_page_state::CPS_PAGEOUT state "in - * advance". This property is mentioned, because it is important when - * reasoning about possible dead-locks in the system). The page can - * enter this state as a result of - * - * - [cl_page_state::CPS_OWNED] an io requesting an immediate - * write-out of this page, or - * - * - [cl_page_state::CPS_CACHED] req-forming engine deciding - * that it has enough dirty pages cached to issue a "good" - * transfer. - * - * The page leaves cl_page_state::CPS_PAGEOUT state when the transfer - * is completed---it is moved into cl_page_state::CPS_CACHED state. - * - * Underlying VM page is locked for the duration of transfer. - * - * \invariant: cl_page::cp_owner == NULL && cl_page::cp_req != NULL - */ - CPS_PAGEOUT, - /** - * Page is being read in, as a part of a transfer. This is quite - * similar to the cl_page_state::CPS_PAGEOUT state, except that - * read-in is always "immediate"---there is no such thing a sudden - * construction of read request from cached, presumably not up to date, - * pages. - * - * Underlying VM page is locked for the duration of transfer. - * - * \invariant: cl_page::cp_owner == NULL && cl_page::cp_req != NULL - */ - CPS_PAGEIN, - /** - * Page is being destroyed. This state is entered when client decides - * that page has to be deleted from its host object, as, e.g., a part - * of truncate. - * - * Once this state is reached, there is no way to escape it. - * - * \invariant: cl_page::cp_owner == NULL && cl_page::cp_req == NULL - */ - CPS_FREEING, - CPS_NR -}; - -enum cl_page_type { - /** Host page, the page is from the host inode which the cl_page - * belongs to. - */ - CPT_CACHEABLE = 1, - - /** Transient page, the transient cl_page is used to bind a cl_page - * to vmpage which is not belonging to the same object of cl_page. - * it is used in DirectIO and lockless IO. - */ - CPT_TRANSIENT, -}; - -/** - * Fields are protected by the lock on struct page, except for atomics and - * immutables. - * - * \invariant Data type invariants are in cl_page_invariant(). Basically: - * cl_page::cp_parent and cl_page::cp_child are a well-formed double-linked - * list, consistent with the parent/child pointers in the cl_page::cp_obj and - * cl_page::cp_owner (when set). - */ -struct cl_page { - /** Reference counter. */ - atomic_t cp_ref; - /** An object this page is a part of. Immutable after creation. */ - struct cl_object *cp_obj; - /** vmpage */ - struct page *cp_vmpage; - /** Linkage of pages within group. Pages must be owned */ - struct list_head cp_batch; - /** List of slices. Immutable after creation. */ - struct list_head cp_layers; - /** - * Page state. This field is const to avoid accidental update, it is - * modified only internally within cl_page.c. Protected by a VM lock. - */ - const enum cl_page_state cp_state; - /** - * Page type. Only CPT_TRANSIENT is used so far. Immutable after - * creation. - */ - enum cl_page_type cp_type; - - /** - * Owning IO in cl_page_state::CPS_OWNED state. Sub-page can be owned - * by sub-io. Protected by a VM lock. - */ - struct cl_io *cp_owner; - /** List of references to this page, for debugging. */ - struct lu_ref cp_reference; - /** Link to an object, for debugging. */ - struct lu_ref_link cp_obj_ref; - /** Link to a queue, for debugging. */ - struct lu_ref_link cp_queue_ref; - /** Assigned if doing a sync_io */ - struct cl_sync_io *cp_sync_io; -}; - -/** - * Per-layer part of cl_page. - * - * \see vvp_page, lov_page, osc_page - */ -struct cl_page_slice { - struct cl_page *cpl_page; - pgoff_t cpl_index; - /** - * Object slice corresponding to this page slice. Immutable after - * creation. - */ - struct cl_object *cpl_obj; - const struct cl_page_operations *cpl_ops; - /** Linkage into cl_page::cp_layers. Immutable after creation. */ - struct list_head cpl_linkage; -}; - -/** - * Lock mode. For the client extent locks. - * - * \ingroup cl_lock - */ -enum cl_lock_mode { - CLM_READ, - CLM_WRITE, - CLM_GROUP -}; - -/** - * Requested transfer type. - */ -enum cl_req_type { - CRT_READ, - CRT_WRITE, - CRT_NR -}; - -/** - * Per-layer page operations. - * - * Methods taking an \a io argument are for the activity happening in the - * context of given \a io. Page is assumed to be owned by that io, except for - * the obvious cases (like cl_page_operations::cpo_own()). - * - * \see vvp_page_ops, lov_page_ops, osc_page_ops - */ -struct cl_page_operations { - /** - * cl_page<->struct page methods. Only one layer in the stack has to - * implement these. Current code assumes that this functionality is - * provided by the topmost layer, see cl_page_disown0() as an example. - */ - - /** - * Called when \a io acquires this page into the exclusive - * ownership. When this method returns, it is guaranteed that the is - * not owned by other io, and no transfer is going on against - * it. Optional. - * - * \see cl_page_own() - * \see vvp_page_own(), lov_page_own() - */ - int (*cpo_own)(const struct lu_env *env, - const struct cl_page_slice *slice, - struct cl_io *io, int nonblock); - /** Called when ownership it yielded. Optional. - * - * \see cl_page_disown() - * \see vvp_page_disown() - */ - void (*cpo_disown)(const struct lu_env *env, - const struct cl_page_slice *slice, struct cl_io *io); - /** - * Called for a page that is already "owned" by \a io from VM point of - * view. Optional. - * - * \see cl_page_assume() - * \see vvp_page_assume(), lov_page_assume() - */ - void (*cpo_assume)(const struct lu_env *env, - const struct cl_page_slice *slice, struct cl_io *io); - /** Dual to cl_page_operations::cpo_assume(). Optional. Called - * bottom-to-top when IO releases a page without actually unlocking - * it. - * - * \see cl_page_unassume() - * \see vvp_page_unassume() - */ - void (*cpo_unassume)(const struct lu_env *env, - const struct cl_page_slice *slice, - struct cl_io *io); - /** - * Announces whether the page contains valid data or not by \a uptodate. - * - * \see cl_page_export() - * \see vvp_page_export() - */ - void (*cpo_export)(const struct lu_env *env, - const struct cl_page_slice *slice, int uptodate); - /** - * Checks whether underlying VM page is locked (in the suitable - * sense). Used for assertions. - * - * \retval -EBUSY: page is protected by a lock of a given mode; - * \retval -ENODATA: page is not protected by a lock; - * \retval 0: this layer cannot decide. (Should never happen.) - */ - int (*cpo_is_vmlocked)(const struct lu_env *env, - const struct cl_page_slice *slice); - /** - * Page destruction. - */ - - /** - * Called when page is truncated from the object. Optional. - * - * \see cl_page_discard() - * \see vvp_page_discard(), osc_page_discard() - */ - void (*cpo_discard)(const struct lu_env *env, - const struct cl_page_slice *slice, - struct cl_io *io); - /** - * Called when page is removed from the cache, and is about to being - * destroyed. Optional. - * - * \see cl_page_delete() - * \see vvp_page_delete(), osc_page_delete() - */ - void (*cpo_delete)(const struct lu_env *env, - const struct cl_page_slice *slice); - /** Destructor. Frees resources and slice itself. */ - void (*cpo_fini)(const struct lu_env *env, - struct cl_page_slice *slice); - /** - * Optional debugging helper. Prints given page slice. - * - * \see cl_page_print() - */ - int (*cpo_print)(const struct lu_env *env, - const struct cl_page_slice *slice, - void *cookie, lu_printer_t p); - /** - * \name transfer - * - * Transfer methods. - * - * @{ - */ - /** - * Request type dependent vector of operations. - * - * Transfer operations depend on transfer mode (cl_req_type). To avoid - * passing transfer mode to each and every of these methods, and to - * avoid branching on request type inside of the methods, separate - * methods for cl_req_type:CRT_READ and cl_req_type:CRT_WRITE are - * provided. That is, method invocation usually looks like - * - * slice->cp_ops.io[req->crq_type].cpo_method(env, slice, ...); - */ - struct { - /** - * Called when a page is submitted for a transfer as a part of - * cl_page_list. - * - * \return 0 : page is eligible for submission; - * \return -EALREADY : skip this page; - * \return -ve : error. - * - * \see cl_page_prep() - */ - int (*cpo_prep)(const struct lu_env *env, - const struct cl_page_slice *slice, - struct cl_io *io); - /** - * Completion handler. This is guaranteed to be eventually - * fired after cl_page_operations::cpo_prep() or - * cl_page_operations::cpo_make_ready() call. - * - * This method can be called in a non-blocking context. It is - * guaranteed however, that the page involved and its object - * are pinned in memory (and, hence, calling cl_page_put() is - * safe). - * - * \see cl_page_completion() - */ - void (*cpo_completion)(const struct lu_env *env, - const struct cl_page_slice *slice, - int ioret); - /** - * Called when cached page is about to be added to the - * ptlrpc request as a part of req formation. - * - * \return 0 : proceed with this page; - * \return -EAGAIN : skip this page; - * \return -ve : error. - * - * \see cl_page_make_ready() - */ - int (*cpo_make_ready)(const struct lu_env *env, - const struct cl_page_slice *slice); - } io[CRT_NR]; - /** - * Tell transfer engine that only [to, from] part of a page should be - * transmitted. - * - * This is used for immediate transfers. - * - * \todo XXX this is not very good interface. It would be much better - * if all transfer parameters were supplied as arguments to - * cl_io_operations::cio_submit() call, but it is not clear how to do - * this for page queues. - * - * \see cl_page_clip() - */ - void (*cpo_clip)(const struct lu_env *env, - const struct cl_page_slice *slice, - int from, int to); - /** - * \pre the page was queued for transferring. - * \post page is removed from client's pending list, or -EBUSY - * is returned if it has already been in transferring. - * - * This is one of seldom page operation which is: - * 0. called from top level; - * 1. don't have vmpage locked; - * 2. every layer should synchronize execution of its ->cpo_cancel() - * with completion handlers. Osc uses client obd lock for this - * purpose. Based on there is no vvp_page_cancel and - * lov_page_cancel(), cpo_cancel is defacto protected by client lock. - * - * \see osc_page_cancel(). - */ - int (*cpo_cancel)(const struct lu_env *env, - const struct cl_page_slice *slice); - /** - * Write out a page by kernel. This is only called by ll_writepage - * right now. - * - * \see cl_page_flush() - */ - int (*cpo_flush)(const struct lu_env *env, - const struct cl_page_slice *slice, - struct cl_io *io); - /** @} transfer */ -}; - -/** - * Helper macro, dumping detailed information about \a page into a log. - */ -#define CL_PAGE_DEBUG(mask, env, page, format, ...) \ -do { \ - if (cfs_cdebug_show(mask, DEBUG_SUBSYSTEM)) { \ - LIBCFS_DEBUG_MSG_DATA_DECL(msgdata, mask, NULL); \ - cl_page_print(env, &msgdata, lu_cdebug_printer, page); \ - CDEBUG(mask, format, ## __VA_ARGS__); \ - } \ -} while (0) - -/** - * Helper macro, dumping shorter information about \a page into a log. - */ -#define CL_PAGE_HEADER(mask, env, page, format, ...) \ -do { \ - if (cfs_cdebug_show(mask, DEBUG_SUBSYSTEM)) { \ - LIBCFS_DEBUG_MSG_DATA_DECL(msgdata, mask, NULL); \ - cl_page_header_print(env, &msgdata, lu_cdebug_printer, page); \ - CDEBUG(mask, format, ## __VA_ARGS__); \ - } \ -} while (0) - -static inline struct page *cl_page_vmpage(struct cl_page *page) -{ - LASSERT(page->cp_vmpage); - return page->cp_vmpage; -} - -/** - * Check if a cl_page is in use. - * - * Client cache holds a refcount, this refcount will be dropped when - * the page is taken out of cache, see vvp_page_delete(). - */ -static inline bool __page_in_use(const struct cl_page *page, int refc) -{ - return (atomic_read(&page->cp_ref) > refc + 1); -} - -/** - * Caller itself holds a refcount of cl_page. - */ -#define cl_page_in_use(pg) __page_in_use(pg, 1) -/** - * Caller doesn't hold a refcount. - */ -#define cl_page_in_use_noref(pg) __page_in_use(pg, 0) - -/** @} cl_page */ - -/** \addtogroup cl_lock cl_lock - * @{ - */ -/** \struct cl_lock - * - * Extent locking on the client. - * - * LAYERING - * - * The locking model of the new client code is built around - * - * struct cl_lock - * - * data-type representing an extent lock on a regular file. cl_lock is a - * layered object (much like cl_object and cl_page), it consists of a header - * (struct cl_lock) and a list of layers (struct cl_lock_slice), linked to - * cl_lock::cll_layers list through cl_lock_slice::cls_linkage. - * - * Typical cl_lock consists of the two layers: - * - * - vvp_lock (vvp specific data), and - * - lov_lock (lov specific data). - * - * lov_lock contains an array of sub-locks. Each of these sub-locks is a - * normal cl_lock: it has a header (struct cl_lock) and a list of layers: - * - * - lovsub_lock, and - * - osc_lock - * - * Each sub-lock is associated with a cl_object (representing stripe - * sub-object or the file to which top-level cl_lock is associated to), and is - * linked into that cl_object::coh_locks. In this respect cl_lock is similar to - * cl_object (that at lov layer also fans out into multiple sub-objects), and - * is different from cl_page, that doesn't fan out (there is usually exactly - * one osc_page for every vvp_page). We shall call vvp-lov portion of the lock - * a "top-lock" and its lovsub-osc portion a "sub-lock". - * - * LIFE CYCLE - * - * cl_lock is a cacheless data container for the requirements of locks to - * complete the IO. cl_lock is created before I/O starts and destroyed when the - * I/O is complete. - * - * cl_lock depends on LDLM lock to fulfill lock semantics. LDLM lock is attached - * to cl_lock at OSC layer. LDLM lock is still cacheable. - * - * INTERFACE AND USAGE - * - * Two major methods are supported for cl_lock: clo_enqueue and clo_cancel. A - * cl_lock is enqueued by cl_lock_request(), which will call clo_enqueue() - * methods for each layer to enqueue the lock. At the LOV layer, if a cl_lock - * consists of multiple sub cl_locks, each sub locks will be enqueued - * correspondingly. At OSC layer, the lock enqueue request will tend to reuse - * cached LDLM lock; otherwise a new LDLM lock will have to be requested from - * OST side. - * - * cl_lock_cancel() must be called to release a cl_lock after use. clo_cancel() - * method will be called for each layer to release the resource held by this - * lock. At OSC layer, the reference count of LDLM lock, which is held at - * clo_enqueue time, is released. - * - * LDLM lock can only be canceled if there is no cl_lock using it. - * - * Overall process of the locking during IO operation is as following: - * - * - once parameters for IO are setup in cl_io, cl_io_operations::cio_lock() - * is called on each layer. Responsibility of this method is to add locks, - * needed by a given layer into cl_io.ci_lockset. - * - * - once locks for all layers were collected, they are sorted to avoid - * dead-locks (cl_io_locks_sort()), and enqueued. - * - * - when all locks are acquired, IO is performed; - * - * - locks are released after IO is complete. - * - * Striping introduces major additional complexity into locking. The - * fundamental problem is that it is generally unsafe to actively use (hold) - * two locks on the different OST servers at the same time, as this introduces - * inter-server dependency and can lead to cascading evictions. - * - * Basic solution is to sub-divide large read/write IOs into smaller pieces so - * that no multi-stripe locks are taken (note that this design abandons POSIX - * read/write semantics). Such pieces ideally can be executed concurrently. At - * the same time, certain types of IO cannot be sub-divived, without - * sacrificing correctness. This includes: - * - * - O_APPEND write, where [0, EOF] lock has to be taken, to guarantee - * atomicity; - * - * - ftruncate(fd, offset), where [offset, EOF] lock has to be taken. - * - * Also, in the case of read(fd, buf, count) or write(fd, buf, count), where - * buf is a part of memory mapped Lustre file, a lock or locks protecting buf - * has to be held together with the usual lock on [offset, offset + count]. - * - * Interaction with DLM - * - * In the expected setup, cl_lock is ultimately backed up by a collection of - * DLM locks (struct ldlm_lock). Association between cl_lock and DLM lock is - * implemented in osc layer, that also matches DLM events (ASTs, cancellation, - * etc.) into cl_lock_operation calls. See struct osc_lock for a more detailed - * description of interaction with DLM. - */ - -/** - * Lock description. - */ -struct cl_lock_descr { - /** Object this lock is granted for. */ - struct cl_object *cld_obj; - /** Index of the first page protected by this lock. */ - pgoff_t cld_start; - /** Index of the last page (inclusive) protected by this lock. */ - pgoff_t cld_end; - /** Group ID, for group lock */ - __u64 cld_gid; - /** Lock mode. */ - enum cl_lock_mode cld_mode; - /** - * flags to enqueue lock. A combination of bit-flags from - * enum cl_enq_flags. - */ - __u32 cld_enq_flags; -}; - -#define DDESCR "%s(%d):[%lu, %lu]:%x" -#define PDESCR(descr) \ - cl_lock_mode_name((descr)->cld_mode), (descr)->cld_mode, \ - (descr)->cld_start, (descr)->cld_end, (descr)->cld_enq_flags - -const char *cl_lock_mode_name(const enum cl_lock_mode mode); - -/** - * Layered client lock. - */ -struct cl_lock { - /** List of slices. Immutable after creation. */ - struct list_head cll_layers; - /** lock attribute, extent, cl_object, etc. */ - struct cl_lock_descr cll_descr; -}; - -/** - * Per-layer part of cl_lock - * - * \see vvp_lock, lov_lock, lovsub_lock, osc_lock - */ -struct cl_lock_slice { - struct cl_lock *cls_lock; - /** Object slice corresponding to this lock slice. Immutable after - * creation. - */ - struct cl_object *cls_obj; - const struct cl_lock_operations *cls_ops; - /** Linkage into cl_lock::cll_layers. Immutable after creation. */ - struct list_head cls_linkage; -}; - -/** - * - * \see vvp_lock_ops, lov_lock_ops, lovsub_lock_ops, osc_lock_ops - */ -struct cl_lock_operations { - /** @{ */ - /** - * Attempts to enqueue the lock. Called top-to-bottom. - * - * \retval 0 this layer has enqueued the lock successfully - * \retval >0 this layer has enqueued the lock, but need to wait on - * @anchor for resources - * \retval -ve failure - * - * \see vvp_lock_enqueue(), lov_lock_enqueue(), lovsub_lock_enqueue(), - * \see osc_lock_enqueue() - */ - int (*clo_enqueue)(const struct lu_env *env, - const struct cl_lock_slice *slice, - struct cl_io *io, struct cl_sync_io *anchor); - /** - * Cancel a lock, release its DLM lock ref, while does not cancel the - * DLM lock - */ - void (*clo_cancel)(const struct lu_env *env, - const struct cl_lock_slice *slice); - /** @} */ - /** - * Destructor. Frees resources and the slice. - * - * \see vvp_lock_fini(), lov_lock_fini(), lovsub_lock_fini(), - * \see osc_lock_fini() - */ - void (*clo_fini)(const struct lu_env *env, struct cl_lock_slice *slice); - /** - * Optional debugging helper. Prints given lock slice. - */ - int (*clo_print)(const struct lu_env *env, - void *cookie, lu_printer_t p, - const struct cl_lock_slice *slice); -}; - -#define CL_LOCK_DEBUG(mask, env, lock, format, ...) \ -do { \ - LIBCFS_DEBUG_MSG_DATA_DECL(msgdata, mask, NULL); \ - \ - if (cfs_cdebug_show(mask, DEBUG_SUBSYSTEM)) { \ - cl_lock_print(env, &msgdata, lu_cdebug_printer, lock); \ - CDEBUG(mask, format, ## __VA_ARGS__); \ - } \ -} while (0) - -#define CL_LOCK_ASSERT(expr, env, lock) do { \ - if (likely(expr)) \ - break; \ - \ - CL_LOCK_DEBUG(D_ERROR, env, lock, "failed at %s.\n", #expr); \ - LBUG(); \ -} while (0) - -/** @} cl_lock */ - -/** \addtogroup cl_page_list cl_page_list - * Page list used to perform collective operations on a group of pages. - * - * Pages are added to the list one by one. cl_page_list acquires a reference - * for every page in it. Page list is used to perform collective operations on - * pages: - * - * - submit pages for an immediate transfer, - * - * - own pages on behalf of certain io (waiting for each page in turn), - * - * - discard pages. - * - * When list is finalized, it releases references on all pages it still has. - * - * \todo XXX concurrency control. - * - * @{ - */ -struct cl_page_list { - unsigned int pl_nr; - struct list_head pl_pages; - struct task_struct *pl_owner; -}; - -/** - * A 2-queue of pages. A convenience data-type for common use case, 2-queue - * contains an incoming page list and an outgoing page list. - */ -struct cl_2queue { - struct cl_page_list c2_qin; - struct cl_page_list c2_qout; -}; - -/** @} cl_page_list */ - -/** \addtogroup cl_io cl_io - * @{ - */ -/** \struct cl_io - * I/O - * - * cl_io represents a high level I/O activity like - * read(2)/write(2)/truncate(2) system call, or cancellation of an extent - * lock. - * - * cl_io is a layered object, much like cl_{object,page,lock} but with one - * important distinction. We want to minimize number of calls to the allocator - * in the fast path, e.g., in the case of read(2) when everything is cached: - * client already owns the lock over region being read, and data are cached - * due to read-ahead. To avoid allocation of cl_io layers in such situations, - * per-layer io state is stored in the session, associated with the io, see - * struct {vvp,lov,osc}_io for example. Sessions allocation is amortized - * by using free-lists, see cl_env_get(). - * - * There is a small predefined number of possible io types, enumerated in enum - * cl_io_type. - * - * cl_io is a state machine, that can be advanced concurrently by the multiple - * threads. It is up to these threads to control the concurrency and, - * specifically, to detect when io is done, and its state can be safely - * released. - * - * For read/write io overall execution plan is as following: - * - * (0) initialize io state through all layers; - * - * (1) loop: prepare chunk of work to do - * - * (2) call all layers to collect locks they need to process current chunk - * - * (3) sort all locks to avoid dead-locks, and acquire them - * - * (4) process the chunk: call per-page methods - * cl_io_operations::cio_prepare_write(), - * cl_io_operations::cio_commit_write() for write) - * - * (5) release locks - * - * (6) repeat loop. - * - * To implement the "parallel IO mode", lov layer creates sub-io's (lazily to - * address allocation efficiency issues mentioned above), and returns with the - * special error condition from per-page method when current sub-io has to - * block. This causes io loop to be repeated, and lov switches to the next - * sub-io in its cl_io_operations::cio_iter_init() implementation. - */ - -/** IO types */ -enum cl_io_type { - /** read system call */ - CIT_READ = 1, - /** write system call */ - CIT_WRITE, - /** truncate, utime system calls */ - CIT_SETATTR, - /** get data version */ - CIT_DATA_VERSION, - /** - * page fault handling - */ - CIT_FAULT, - /** - * fsync system call handling - * To write out a range of file - */ - CIT_FSYNC, - /** - * Miscellaneous io. This is used for occasional io activity that - * doesn't fit into other types. Currently this is used for: - * - * - cancellation of an extent lock. This io exists as a context - * to write dirty pages from under the lock being canceled back - * to the server; - * - * - VM induced page write-out. An io context for writing page out - * for memory cleansing; - * - * - glimpse. An io context to acquire glimpse lock. - * - * - grouplock. An io context to acquire group lock. - * - * CIT_MISC io is used simply as a context in which locks and pages - * are manipulated. Such io has no internal "process", that is, - * cl_io_loop() is never called for it. - */ - CIT_MISC, - CIT_OP_NR -}; - -/** - * States of cl_io state machine - */ -enum cl_io_state { - /** Not initialized. */ - CIS_ZERO, - /** Initialized. */ - CIS_INIT, - /** IO iteration started. */ - CIS_IT_STARTED, - /** Locks taken. */ - CIS_LOCKED, - /** Actual IO is in progress. */ - CIS_IO_GOING, - /** IO for the current iteration finished. */ - CIS_IO_FINISHED, - /** Locks released. */ - CIS_UNLOCKED, - /** Iteration completed. */ - CIS_IT_ENDED, - /** cl_io finalized. */ - CIS_FINI -}; - -/** - * IO state private for a layer. - * - * This is usually embedded into layer session data, rather than allocated - * dynamically. - * - * \see vvp_io, lov_io, osc_io - */ -struct cl_io_slice { - struct cl_io *cis_io; - /** corresponding object slice. Immutable after creation. */ - struct cl_object *cis_obj; - /** io operations. Immutable after creation. */ - const struct cl_io_operations *cis_iop; - /** - * linkage into a list of all slices for a given cl_io, hanging off - * cl_io::ci_layers. Immutable after creation. - */ - struct list_head cis_linkage; -}; - -typedef void (*cl_commit_cbt)(const struct lu_env *, struct cl_io *, - struct cl_page *); - -struct cl_read_ahead { - /* - * Maximum page index the readahead window will end. - * This is determined DLM lock coverage, RPC and stripe boundary. - * cra_end is included. - */ - pgoff_t cra_end; - /* optimal RPC size for this read, by pages */ - unsigned long cra_rpc_size; - /* - * Release callback. If readahead holds resources underneath, this - * function should be called to release it. - */ - void (*cra_release)(const struct lu_env *env, void *cbdata); - /* Callback data for cra_release routine */ - void *cra_cbdata; -}; - -static inline void cl_read_ahead_release(const struct lu_env *env, - struct cl_read_ahead *ra) -{ - if (ra->cra_release) - ra->cra_release(env, ra->cra_cbdata); - memset(ra, 0, sizeof(*ra)); -} - -/** - * Per-layer io operations. - * \see vvp_io_ops, lov_io_ops, lovsub_io_ops, osc_io_ops - */ -struct cl_io_operations { - /** - * Vector of io state transition methods for every io type. - * - * \see cl_page_operations::io - */ - struct { - /** - * Prepare io iteration at a given layer. - * - * Called top-to-bottom at the beginning of each iteration of - * "io loop" (if it makes sense for this type of io). Here - * layer selects what work it will do during this iteration. - * - * \see cl_io_operations::cio_iter_fini() - */ - int (*cio_iter_init)(const struct lu_env *env, - const struct cl_io_slice *slice); - /** - * Finalize io iteration. - * - * Called bottom-to-top at the end of each iteration of "io - * loop". Here layers can decide whether IO has to be - * continued. - * - * \see cl_io_operations::cio_iter_init() - */ - void (*cio_iter_fini)(const struct lu_env *env, - const struct cl_io_slice *slice); - /** - * Collect locks for the current iteration of io. - * - * Called top-to-bottom to collect all locks necessary for - * this iteration. This methods shouldn't actually enqueue - * anything, instead it should post a lock through - * cl_io_lock_add(). Once all locks are collected, they are - * sorted and enqueued in the proper order. - */ - int (*cio_lock)(const struct lu_env *env, - const struct cl_io_slice *slice); - /** - * Finalize unlocking. - * - * Called bottom-to-top to finish layer specific unlocking - * functionality, after generic code released all locks - * acquired by cl_io_operations::cio_lock(). - */ - void (*cio_unlock)(const struct lu_env *env, - const struct cl_io_slice *slice); - /** - * Start io iteration. - * - * Once all locks are acquired, called top-to-bottom to - * commence actual IO. In the current implementation, - * top-level vvp_io_{read,write}_start() does all the work - * synchronously by calling generic_file_*(), so other layers - * are called when everything is done. - */ - int (*cio_start)(const struct lu_env *env, - const struct cl_io_slice *slice); - /** - * Called top-to-bottom at the end of io loop. Here layer - * might wait for an unfinished asynchronous io. - */ - void (*cio_end)(const struct lu_env *env, - const struct cl_io_slice *slice); - /** - * Called bottom-to-top to notify layers that read/write IO - * iteration finished, with \a nob bytes transferred. - */ - void (*cio_advance)(const struct lu_env *env, - const struct cl_io_slice *slice, - size_t nob); - /** - * Called once per io, bottom-to-top to release io resources. - */ - void (*cio_fini)(const struct lu_env *env, - const struct cl_io_slice *slice); - } op[CIT_OP_NR]; - - /** - * Submit pages from \a queue->c2_qin for IO, and move - * successfully submitted pages into \a queue->c2_qout. Return - * non-zero if failed to submit even the single page. If - * submission failed after some pages were moved into \a - * queue->c2_qout, completion callback with non-zero ioret is - * executed on them. - */ - int (*cio_submit)(const struct lu_env *env, - const struct cl_io_slice *slice, - enum cl_req_type crt, - struct cl_2queue *queue); - /** - * Queue async page for write. - * The difference between cio_submit and cio_queue is that - * cio_submit is for urgent request. - */ - int (*cio_commit_async)(const struct lu_env *env, - const struct cl_io_slice *slice, - struct cl_page_list *queue, int from, int to, - cl_commit_cbt cb); - /** - * Decide maximum read ahead extent - * - * \pre io->ci_type == CIT_READ - */ - int (*cio_read_ahead)(const struct lu_env *env, - const struct cl_io_slice *slice, - pgoff_t start, struct cl_read_ahead *ra); - /** - * Optional debugging helper. Print given io slice. - */ - int (*cio_print)(const struct lu_env *env, void *cookie, - lu_printer_t p, const struct cl_io_slice *slice); -}; - -/** - * Flags to lock enqueue procedure. - * \ingroup cl_lock - */ -enum cl_enq_flags { - /** - * instruct server to not block, if conflicting lock is found. Instead - * -EWOULDBLOCK is returned immediately. - */ - CEF_NONBLOCK = 0x00000001, - /** - * take lock asynchronously (out of order), as it cannot - * deadlock. This is for LDLM_FL_HAS_INTENT locks used for glimpsing. - */ - CEF_ASYNC = 0x00000002, - /** - * tell the server to instruct (though a flag in the blocking ast) an - * owner of the conflicting lock, that it can drop dirty pages - * protected by this lock, without sending them to the server. - */ - CEF_DISCARD_DATA = 0x00000004, - /** - * tell the sub layers that it must be a `real' lock. This is used for - * mmapped-buffer locks and glimpse locks that must be never converted - * into lockless mode. - * - * \see vvp_mmap_locks(), cl_glimpse_lock(). - */ - CEF_MUST = 0x00000008, - /** - * tell the sub layers that never request a `real' lock. This flag is - * not used currently. - * - * cl_io::ci_lockreq and CEF_{MUST,NEVER} flags specify lockless - * conversion policy: ci_lockreq describes generic information of lock - * requirement for this IO, especially for locks which belong to the - * object doing IO; however, lock itself may have precise requirements - * that are described by the enqueue flags. - */ - CEF_NEVER = 0x00000010, - /** - * for async glimpse lock. - */ - CEF_AGL = 0x00000020, - /** - * enqueue a lock to test DLM lock existence. - */ - CEF_PEEK = 0x00000040, - /** - * Lock match only. Used by group lock in I/O as group lock - * is known to exist. - */ - CEF_LOCK_MATCH = BIT(7), - /** - * mask of enq_flags. - */ - CEF_MASK = 0x000000ff, -}; - -/** - * Link between lock and io. Intermediate structure is needed, because the - * same lock can be part of multiple io's simultaneously. - */ -struct cl_io_lock_link { - /** linkage into one of cl_lockset lists. */ - struct list_head cill_linkage; - struct cl_lock cill_lock; - /** optional destructor */ - void (*cill_fini)(const struct lu_env *env, - struct cl_io_lock_link *link); -}; -#define cill_descr cill_lock.cll_descr - -/** - * Lock-set represents a collection of locks, that io needs at a - * time. Generally speaking, client tries to avoid holding multiple locks when - * possible, because - * - * - holding extent locks over multiple ost's introduces the danger of - * "cascading timeouts"; - * - * - holding multiple locks over the same ost is still dead-lock prone, - * see comment in osc_lock_enqueue(), - * - * but there are certain situations where this is unavoidable: - * - * - O_APPEND writes have to take [0, EOF] lock for correctness; - * - * - truncate has to take [new-size, EOF] lock for correctness; - * - * - SNS has to take locks across full stripe for correctness; - * - * - in the case when user level buffer, supplied to {read,write}(file0), - * is a part of a memory mapped lustre file, client has to take a dlm - * locks on file0, and all files that back up the buffer (or a part of - * the buffer, that is being processed in the current chunk, in any - * case, there are situations where at least 2 locks are necessary). - * - * In such cases we at least try to take locks in the same consistent - * order. To this end, all locks are first collected, then sorted, and then - * enqueued. - */ -struct cl_lockset { - /** locks to be acquired. */ - struct list_head cls_todo; - /** locks acquired. */ - struct list_head cls_done; -}; - -/** - * Lock requirements(demand) for IO. It should be cl_io_lock_req, - * but 'req' is always to be thought as 'request' :-) - */ -enum cl_io_lock_dmd { - /** Always lock data (e.g., O_APPEND). */ - CILR_MANDATORY = 0, - /** Layers are free to decide between local and global locking. */ - CILR_MAYBE, - /** Never lock: there is no cache (e.g., lockless IO). */ - CILR_NEVER -}; - -enum cl_fsync_mode { - /** start writeback, do not wait for them to finish */ - CL_FSYNC_NONE = 0, - /** start writeback and wait for them to finish */ - CL_FSYNC_LOCAL = 1, - /** discard all of dirty pages in a specific file range */ - CL_FSYNC_DISCARD = 2, - /** start writeback and make sure they have reached storage before - * return. OST_SYNC RPC must be issued and finished - */ - CL_FSYNC_ALL = 3 -}; - -struct cl_io_rw_common { - loff_t crw_pos; - size_t crw_count; - int crw_nonblock; -}; - -/** - * State for io. - * - * cl_io is shared by all threads participating in this IO (in current - * implementation only one thread advances IO, but parallel IO design and - * concurrent copy_*_user() require multiple threads acting on the same IO. It - * is up to these threads to serialize their activities, including updates to - * mutable cl_io fields. - */ -struct cl_io { - /** type of this IO. Immutable after creation. */ - enum cl_io_type ci_type; - /** current state of cl_io state machine. */ - enum cl_io_state ci_state; - /** main object this io is against. Immutable after creation. */ - struct cl_object *ci_obj; - /** - * Upper layer io, of which this io is a part of. Immutable after - * creation. - */ - struct cl_io *ci_parent; - /** List of slices. Immutable after creation. */ - struct list_head ci_layers; - /** list of locks (to be) acquired by this io. */ - struct cl_lockset ci_lockset; - /** lock requirements, this is just a help info for sublayers. */ - enum cl_io_lock_dmd ci_lockreq; - union { - struct cl_rd_io { - struct cl_io_rw_common rd; - } ci_rd; - struct cl_wr_io { - struct cl_io_rw_common wr; - int wr_append; - int wr_sync; - } ci_wr; - struct cl_io_rw_common ci_rw; - struct cl_setattr_io { - struct ost_lvb sa_attr; - unsigned int sa_attr_flags; - unsigned int sa_valid; - int sa_stripe_index; - const struct lu_fid *sa_parent_fid; - } ci_setattr; - struct cl_data_version_io { - u64 dv_data_version; - int dv_flags; - } ci_data_version; - struct cl_fault_io { - /** page index within file. */ - pgoff_t ft_index; - /** bytes valid byte on a faulted page. */ - size_t ft_nob; - /** writable page? for nopage() only */ - int ft_writable; - /** page of an executable? */ - int ft_executable; - /** page_mkwrite() */ - int ft_mkwrite; - /** resulting page */ - struct cl_page *ft_page; - } ci_fault; - struct cl_fsync_io { - loff_t fi_start; - loff_t fi_end; - /** file system level fid */ - struct lu_fid *fi_fid; - enum cl_fsync_mode fi_mode; - /* how many pages were written/discarded */ - unsigned int fi_nr_written; - } ci_fsync; - } u; - struct cl_2queue ci_queue; - size_t ci_nob; - int ci_result; - unsigned int ci_continue:1, - /** - * This io has held grouplock, to inform sublayers that - * don't do lockless i/o. - */ - ci_no_srvlock:1, - /** - * The whole IO need to be restarted because layout has been changed - */ - ci_need_restart:1, - /** - * to not refresh layout - the IO issuer knows that the layout won't - * change(page operations, layout change causes all page to be - * discarded), or it doesn't matter if it changes(sync). - */ - ci_ignore_layout:1, - /** - * Check if layout changed after the IO finishes. Mainly for HSM - * requirement. If IO occurs to openning files, it doesn't need to - * verify layout because HSM won't release openning files. - * Right now, only two operations need to verify layout: glimpse - * and setattr. - */ - ci_verify_layout:1, - /** - * file is released, restore has to be triggered by vvp layer - */ - ci_restore_needed:1, - /** - * O_NOATIME - */ - ci_noatime:1; - /** - * Number of pages owned by this IO. For invariant checking. - */ - unsigned int ci_owned_nr; -}; - -/** @} cl_io */ - -/** - * Per-transfer attributes. - */ -struct cl_req_attr { - enum cl_req_type cra_type; - u64 cra_flags; - struct cl_page *cra_page; - - /** Generic attributes for the server consumption. */ - struct obdo *cra_oa; - /** Jobid */ - char cra_jobid[LUSTRE_JOBID_SIZE]; -}; - -enum cache_stats_item { - /** how many cache lookups were performed */ - CS_lookup = 0, - /** how many times cache lookup resulted in a hit */ - CS_hit, - /** how many entities are in the cache right now */ - CS_total, - /** how many entities in the cache are actively used (and cannot be - * evicted) right now - */ - CS_busy, - /** how many entities were created at all */ - CS_create, - CS_NR -}; - -#define CS_NAMES { "lookup", "hit", "total", "busy", "create" } - -/** - * Stats for a generic cache (similar to inode, lu_object, etc. caches). - */ -struct cache_stats { - const char *cs_name; - atomic_t cs_stats[CS_NR]; -}; - -/** These are not exported so far */ -void cache_stats_init(struct cache_stats *cs, const char *name); - -/** - * Client-side site. This represents particular client stack. "Global" - * variables should (directly or indirectly) be added here to allow multiple - * clients to co-exist in the single address space. - */ -struct cl_site { - struct lu_site cs_lu; - /** - * Statistical counters. Atomics do not scale, something better like - * per-cpu counters is needed. - * - * These are exported as /sys/kernel/debug/lustre/llite/.../site - * - * When interpreting keep in mind that both sub-locks (and sub-pages) - * and top-locks (and top-pages) are accounted here. - */ - struct cache_stats cs_pages; - atomic_t cs_pages_state[CPS_NR]; -}; - -int cl_site_init(struct cl_site *s, struct cl_device *top); -void cl_site_fini(struct cl_site *s); -void cl_stack_fini(const struct lu_env *env, struct cl_device *cl); - -/** - * Output client site statistical counters into a buffer. Suitable for - * ll_rd_*()-style functions. - */ -int cl_site_stats_print(const struct cl_site *site, struct seq_file *m); - -/** - * \name helpers - * - * Type conversion and accessory functions. - */ -/** @{ */ - -static inline struct cl_site *lu2cl_site(const struct lu_site *site) -{ - return container_of(site, struct cl_site, cs_lu); -} - -static inline int lu_device_is_cl(const struct lu_device *d) -{ - return d->ld_type->ldt_tags & LU_DEVICE_CL; -} - -static inline struct cl_device *lu2cl_dev(const struct lu_device *d) -{ - LASSERT(!d || IS_ERR(d) || lu_device_is_cl(d)); - return container_of0(d, struct cl_device, cd_lu_dev); -} - -static inline struct lu_device *cl2lu_dev(struct cl_device *d) -{ - return &d->cd_lu_dev; -} - -static inline struct cl_object *lu2cl(const struct lu_object *o) -{ - LASSERT(!o || IS_ERR(o) || lu_device_is_cl(o->lo_dev)); - return container_of0(o, struct cl_object, co_lu); -} - -static inline const struct cl_object_conf * -lu2cl_conf(const struct lu_object_conf *conf) -{ - return container_of0(conf, struct cl_object_conf, coc_lu); -} - -static inline struct cl_object *cl_object_next(const struct cl_object *obj) -{ - return obj ? lu2cl(lu_object_next(&obj->co_lu)) : NULL; -} - -static inline struct cl_device *cl_object_device(const struct cl_object *o) -{ - LASSERT(!o || IS_ERR(o) || lu_device_is_cl(o->co_lu.lo_dev)); - return container_of0(o->co_lu.lo_dev, struct cl_device, cd_lu_dev); -} - -static inline struct cl_object_header *luh2coh(const struct lu_object_header *h) -{ - return container_of0(h, struct cl_object_header, coh_lu); -} - -static inline struct cl_site *cl_object_site(const struct cl_object *obj) -{ - return lu2cl_site(obj->co_lu.lo_dev->ld_site); -} - -static inline -struct cl_object_header *cl_object_header(const struct cl_object *obj) -{ - return luh2coh(obj->co_lu.lo_header); -} - -static inline int cl_device_init(struct cl_device *d, struct lu_device_type *t) -{ - return lu_device_init(&d->cd_lu_dev, t); -} - -static inline void cl_device_fini(struct cl_device *d) -{ - lu_device_fini(&d->cd_lu_dev); -} - -void cl_page_slice_add(struct cl_page *page, struct cl_page_slice *slice, - struct cl_object *obj, pgoff_t index, - const struct cl_page_operations *ops); -void cl_lock_slice_add(struct cl_lock *lock, struct cl_lock_slice *slice, - struct cl_object *obj, - const struct cl_lock_operations *ops); -void cl_io_slice_add(struct cl_io *io, struct cl_io_slice *slice, - struct cl_object *obj, const struct cl_io_operations *ops); -/** @} helpers */ - -/** \defgroup cl_object cl_object - * @{ - */ -struct cl_object *cl_object_top(struct cl_object *o); -struct cl_object *cl_object_find(const struct lu_env *env, struct cl_device *cd, - const struct lu_fid *fid, - const struct cl_object_conf *c); - -int cl_object_header_init(struct cl_object_header *h); -void cl_object_put(const struct lu_env *env, struct cl_object *o); -void cl_object_get(struct cl_object *o); -void cl_object_attr_lock(struct cl_object *o); -void cl_object_attr_unlock(struct cl_object *o); -int cl_object_attr_get(const struct lu_env *env, struct cl_object *obj, - struct cl_attr *attr); -int cl_object_attr_update(const struct lu_env *env, struct cl_object *obj, - const struct cl_attr *attr, unsigned int valid); -int cl_object_glimpse(const struct lu_env *env, struct cl_object *obj, - struct ost_lvb *lvb); -int cl_conf_set(const struct lu_env *env, struct cl_object *obj, - const struct cl_object_conf *conf); -int cl_object_prune(const struct lu_env *env, struct cl_object *obj); -void cl_object_kill(const struct lu_env *env, struct cl_object *obj); -int cl_object_getstripe(const struct lu_env *env, struct cl_object *obj, - struct lov_user_md __user *lum); -int cl_object_fiemap(const struct lu_env *env, struct cl_object *obj, - struct ll_fiemap_info_key *fmkey, struct fiemap *fiemap, - size_t *buflen); -int cl_object_layout_get(const struct lu_env *env, struct cl_object *obj, - struct cl_layout *cl); -loff_t cl_object_maxbytes(struct cl_object *obj); - -/** - * Returns true, iff \a o0 and \a o1 are slices of the same object. - */ -static inline int cl_object_same(struct cl_object *o0, struct cl_object *o1) -{ - return cl_object_header(o0) == cl_object_header(o1); -} - -static inline void cl_object_page_init(struct cl_object *clob, int size) -{ - clob->co_slice_off = cl_object_header(clob)->coh_page_bufsize; - cl_object_header(clob)->coh_page_bufsize += cfs_size_round(size); - WARN_ON(cl_object_header(clob)->coh_page_bufsize > 512); -} - -static inline void *cl_object_page_slice(struct cl_object *clob, - struct cl_page *page) -{ - return (void *)((char *)page + clob->co_slice_off); -} - -/** - * Return refcount of cl_object. - */ -static inline int cl_object_refc(struct cl_object *clob) -{ - struct lu_object_header *header = clob->co_lu.lo_header; - - return atomic_read(&header->loh_ref); -} - -/** @} cl_object */ - -/** \defgroup cl_page cl_page - * @{ - */ -enum { - CLP_GANG_OKAY = 0, - CLP_GANG_RESCHED, - CLP_GANG_AGAIN, - CLP_GANG_ABORT -}; - -/* callback of cl_page_gang_lookup() */ -struct cl_page *cl_page_find(const struct lu_env *env, struct cl_object *obj, - pgoff_t idx, struct page *vmpage, - enum cl_page_type type); -struct cl_page *cl_page_alloc(const struct lu_env *env, - struct cl_object *o, pgoff_t ind, - struct page *vmpage, - enum cl_page_type type); -void cl_page_get(struct cl_page *page); -void cl_page_put(const struct lu_env *env, struct cl_page *page); -void cl_page_print(const struct lu_env *env, void *cookie, lu_printer_t printer, - const struct cl_page *pg); -void cl_page_header_print(const struct lu_env *env, void *cookie, - lu_printer_t printer, const struct cl_page *pg); -struct cl_page *cl_vmpage_page(struct page *vmpage, struct cl_object *obj); - -const struct cl_page_slice *cl_page_at(const struct cl_page *page, - const struct lu_device_type *dtype); - -/** - * \name ownership - * - * Functions dealing with the ownership of page by io. - */ -/** @{ */ - -int cl_page_own(const struct lu_env *env, - struct cl_io *io, struct cl_page *page); -int cl_page_own_try(const struct lu_env *env, - struct cl_io *io, struct cl_page *page); -void cl_page_assume(const struct lu_env *env, - struct cl_io *io, struct cl_page *page); -void cl_page_unassume(const struct lu_env *env, - struct cl_io *io, struct cl_page *pg); -void cl_page_disown(const struct lu_env *env, - struct cl_io *io, struct cl_page *page); -void cl_page_disown0(const struct lu_env *env, - struct cl_io *io, struct cl_page *pg); -int cl_page_is_owned(const struct cl_page *pg, const struct cl_io *io); - -/** @} ownership */ - -/** - * \name transfer - * - * Functions dealing with the preparation of a page for a transfer, and - * tracking transfer state. - */ -/** @{ */ -int cl_page_prep(const struct lu_env *env, struct cl_io *io, - struct cl_page *pg, enum cl_req_type crt); -void cl_page_completion(const struct lu_env *env, - struct cl_page *pg, enum cl_req_type crt, int ioret); -int cl_page_make_ready(const struct lu_env *env, struct cl_page *pg, - enum cl_req_type crt); -int cl_page_cache_add(const struct lu_env *env, struct cl_io *io, - struct cl_page *pg, enum cl_req_type crt); -void cl_page_clip(const struct lu_env *env, struct cl_page *pg, - int from, int to); -int cl_page_cancel(const struct lu_env *env, struct cl_page *page); -int cl_page_flush(const struct lu_env *env, struct cl_io *io, - struct cl_page *pg); - -/** @} transfer */ - -/** - * \name helper routines - * Functions to discard, delete and export a cl_page. - */ -/** @{ */ -void cl_page_discard(const struct lu_env *env, struct cl_io *io, - struct cl_page *pg); -void cl_page_delete(const struct lu_env *env, struct cl_page *pg); -int cl_page_is_vmlocked(const struct lu_env *env, const struct cl_page *pg); -void cl_page_export(const struct lu_env *env, struct cl_page *pg, int uptodate); -loff_t cl_offset(const struct cl_object *obj, pgoff_t idx); -pgoff_t cl_index(const struct cl_object *obj, loff_t offset); -size_t cl_page_size(const struct cl_object *obj); -int cl_pages_prune(const struct lu_env *env, struct cl_object *obj); - -void cl_lock_print(const struct lu_env *env, void *cookie, - lu_printer_t printer, const struct cl_lock *lock); -void cl_lock_descr_print(const struct lu_env *env, void *cookie, - lu_printer_t printer, - const struct cl_lock_descr *descr); -/* @} helper */ - -/** - * Data structure managing a client's cached pages. A count of - * "unstable" pages is maintained, and an LRU of clean pages is - * maintained. "unstable" pages are pages pinned by the ptlrpc - * layer for recovery purposes. - */ -struct cl_client_cache { - /** - * # of client cache refcount - * # of users (OSCs) + 2 (held by llite and lov) - */ - atomic_t ccc_users; - /** - * # of threads are doing shrinking - */ - unsigned int ccc_lru_shrinkers; - /** - * # of LRU entries available - */ - atomic_long_t ccc_lru_left; - /** - * List of entities(OSCs) for this LRU cache - */ - struct list_head ccc_lru; - /** - * Max # of LRU entries - */ - unsigned long ccc_lru_max; - /** - * Lock to protect ccc_lru list - */ - spinlock_t ccc_lru_lock; - /** - * Set if unstable check is enabled - */ - unsigned int ccc_unstable_check:1; - /** - * # of unstable pages for this mount point - */ - atomic_long_t ccc_unstable_nr; - /** - * Waitq for awaiting unstable pages to reach zero. - * Used at umounting time and signaled on BRW commit - */ - wait_queue_head_t ccc_unstable_waitq; - -}; - -/** - * cl_cache functions - */ -struct cl_client_cache *cl_cache_init(unsigned long lru_page_max); -void cl_cache_incref(struct cl_client_cache *cache); -void cl_cache_decref(struct cl_client_cache *cache); - -/** @} cl_page */ - -/** \defgroup cl_lock cl_lock - * @{ - */ - -int cl_lock_request(const struct lu_env *env, struct cl_io *io, - struct cl_lock *lock); -int cl_lock_init(const struct lu_env *env, struct cl_lock *lock, - const struct cl_io *io); -void cl_lock_fini(const struct lu_env *env, struct cl_lock *lock); -const struct cl_lock_slice *cl_lock_at(const struct cl_lock *lock, - const struct lu_device_type *dtype); -void cl_lock_release(const struct lu_env *env, struct cl_lock *lock); -int cl_lock_enqueue(const struct lu_env *env, struct cl_io *io, - struct cl_lock *lock, struct cl_sync_io *anchor); -void cl_lock_cancel(const struct lu_env *env, struct cl_lock *lock); - -/** @} cl_lock */ - -/** \defgroup cl_io cl_io - * @{ - */ - -int cl_io_init(const struct lu_env *env, struct cl_io *io, - enum cl_io_type iot, struct cl_object *obj); -int cl_io_sub_init(const struct lu_env *env, struct cl_io *io, - enum cl_io_type iot, struct cl_object *obj); -int cl_io_rw_init(const struct lu_env *env, struct cl_io *io, - enum cl_io_type iot, loff_t pos, size_t count); -int cl_io_loop(const struct lu_env *env, struct cl_io *io); - -void cl_io_fini(const struct lu_env *env, struct cl_io *io); -int cl_io_iter_init(const struct lu_env *env, struct cl_io *io); -void cl_io_iter_fini(const struct lu_env *env, struct cl_io *io); -int cl_io_lock(const struct lu_env *env, struct cl_io *io); -void cl_io_unlock(const struct lu_env *env, struct cl_io *io); -int cl_io_start(const struct lu_env *env, struct cl_io *io); -void cl_io_end(const struct lu_env *env, struct cl_io *io); -int cl_io_lock_add(const struct lu_env *env, struct cl_io *io, - struct cl_io_lock_link *link); -int cl_io_lock_alloc_add(const struct lu_env *env, struct cl_io *io, - struct cl_lock_descr *descr); -int cl_io_submit_rw(const struct lu_env *env, struct cl_io *io, - enum cl_req_type iot, struct cl_2queue *queue); -int cl_io_submit_sync(const struct lu_env *env, struct cl_io *io, - enum cl_req_type iot, struct cl_2queue *queue, - long timeout); -int cl_io_commit_async(const struct lu_env *env, struct cl_io *io, - struct cl_page_list *queue, int from, int to, - cl_commit_cbt cb); -int cl_io_read_ahead(const struct lu_env *env, struct cl_io *io, - pgoff_t start, struct cl_read_ahead *ra); -int cl_io_is_going(const struct lu_env *env); - -/** - * True, iff \a io is an O_APPEND write(2). - */ -static inline int cl_io_is_append(const struct cl_io *io) -{ - return io->ci_type == CIT_WRITE && io->u.ci_wr.wr_append; -} - -static inline int cl_io_is_sync_write(const struct cl_io *io) -{ - return io->ci_type == CIT_WRITE && io->u.ci_wr.wr_sync; -} - -static inline int cl_io_is_mkwrite(const struct cl_io *io) -{ - return io->ci_type == CIT_FAULT && io->u.ci_fault.ft_mkwrite; -} - -/** - * True, iff \a io is a truncate(2). - */ -static inline int cl_io_is_trunc(const struct cl_io *io) -{ - return io->ci_type == CIT_SETATTR && - (io->u.ci_setattr.sa_valid & ATTR_SIZE); -} - -struct cl_io *cl_io_top(struct cl_io *io); - -#define CL_IO_SLICE_CLEAN(foo_io, base) \ -do { \ - typeof(foo_io) __foo_io = (foo_io); \ - \ - BUILD_BUG_ON(offsetof(typeof(*__foo_io), base) != 0); \ - memset(&__foo_io->base + 1, 0, \ - sizeof(*__foo_io) - sizeof(__foo_io->base)); \ -} while (0) - -/** @} cl_io */ - -/** \defgroup cl_page_list cl_page_list - * @{ - */ - -/** - * Last page in the page list. - */ -static inline struct cl_page *cl_page_list_last(struct cl_page_list *plist) -{ - LASSERT(plist->pl_nr > 0); - return list_entry(plist->pl_pages.prev, struct cl_page, cp_batch); -} - -static inline struct cl_page *cl_page_list_first(struct cl_page_list *plist) -{ - LASSERT(plist->pl_nr > 0); - return list_entry(plist->pl_pages.next, struct cl_page, cp_batch); -} - -/** - * Iterate over pages in a page list. - */ -#define cl_page_list_for_each(page, list) \ - list_for_each_entry((page), &(list)->pl_pages, cp_batch) - -/** - * Iterate over pages in a page list, taking possible removals into account. - */ -#define cl_page_list_for_each_safe(page, temp, list) \ - list_for_each_entry_safe((page), (temp), &(list)->pl_pages, cp_batch) - -void cl_page_list_init(struct cl_page_list *plist); -void cl_page_list_add(struct cl_page_list *plist, struct cl_page *page); -void cl_page_list_move(struct cl_page_list *dst, struct cl_page_list *src, - struct cl_page *page); -void cl_page_list_move_head(struct cl_page_list *dst, struct cl_page_list *src, - struct cl_page *page); -void cl_page_list_splice(struct cl_page_list *list, struct cl_page_list *head); -void cl_page_list_del(const struct lu_env *env, struct cl_page_list *plist, - struct cl_page *page); -void cl_page_list_disown(const struct lu_env *env, - struct cl_io *io, struct cl_page_list *plist); -void cl_page_list_fini(const struct lu_env *env, struct cl_page_list *plist); - -void cl_2queue_init(struct cl_2queue *queue); -void cl_2queue_disown(const struct lu_env *env, - struct cl_io *io, struct cl_2queue *queue); -void cl_2queue_discard(const struct lu_env *env, - struct cl_io *io, struct cl_2queue *queue); -void cl_2queue_fini(const struct lu_env *env, struct cl_2queue *queue); -void cl_2queue_init_page(struct cl_2queue *queue, struct cl_page *page); - -/** @} cl_page_list */ - -void cl_req_attr_set(const struct lu_env *env, struct cl_object *obj, - struct cl_req_attr *attr); - -/** \defgroup cl_sync_io cl_sync_io - * @{ - */ - -/** - * Anchor for synchronous transfer. This is allocated on a stack by thread - * doing synchronous transfer, and a pointer to this structure is set up in - * every page submitted for transfer. Transfer completion routine updates - * anchor and wakes up waiting thread when transfer is complete. - */ -struct cl_sync_io { - /** number of pages yet to be transferred. */ - atomic_t csi_sync_nr; - /** error code. */ - int csi_sync_rc; - /** barrier of destroy this structure */ - atomic_t csi_barrier; - /** completion to be signaled when transfer is complete. */ - wait_queue_head_t csi_waitq; - /** callback to invoke when this IO is finished */ - void (*csi_end_io)(const struct lu_env *, - struct cl_sync_io *); -}; - -void cl_sync_io_init(struct cl_sync_io *anchor, int nr, - void (*end)(const struct lu_env *, struct cl_sync_io *)); -int cl_sync_io_wait(const struct lu_env *env, struct cl_sync_io *anchor, - long timeout); -void cl_sync_io_note(const struct lu_env *env, struct cl_sync_io *anchor, - int ioret); -void cl_sync_io_end(const struct lu_env *env, struct cl_sync_io *anchor); - -/** @} cl_sync_io */ - -/** \defgroup cl_env cl_env - * - * lu_env handling for a client. - * - * lu_env is an environment within which lustre code executes. Its major part - * is lu_context---a fast memory allocation mechanism that is used to conserve - * precious kernel stack space. Originally lu_env was designed for a server, - * where - * - * - there is a (mostly) fixed number of threads, and - * - * - call chains have no non-lustre portions inserted between lustre code. - * - * On a client both these assumption fails, because every user thread can - * potentially execute lustre code as part of a system call, and lustre calls - * into VFS or MM that call back into lustre. - * - * To deal with that, cl_env wrapper functions implement the following - * optimizations: - * - * - allocation and destruction of environment is amortized by caching no - * longer used environments instead of destroying them; - * - * \see lu_env, lu_context, lu_context_key - * @{ - */ - -struct lu_env *cl_env_get(u16 *refcheck); -struct lu_env *cl_env_alloc(u16 *refcheck, __u32 tags); -void cl_env_put(struct lu_env *env, u16 *refcheck); -unsigned int cl_env_cache_purge(unsigned int nr); -struct lu_env *cl_env_percpu_get(void); -void cl_env_percpu_put(struct lu_env *env); - -/** @} cl_env */ - -/* - * Misc - */ -void cl_lvb2attr(struct cl_attr *attr, const struct ost_lvb *lvb); - -struct cl_device *cl_type_setup(const struct lu_env *env, struct lu_site *site, - struct lu_device_type *ldt, - struct lu_device *next); -/** @} clio */ - -int cl_global_init(void); -void cl_global_fini(void); - -#endif /* _LINUX_CL_OBJECT_H */ diff --git a/drivers/staging/lustre/lustre/include/interval_tree.h b/drivers/staging/lustre/lustre/include/interval_tree.h deleted file mode 100644 index 7d119c1a0469..000000000000 --- a/drivers/staging/lustre/lustre/include/interval_tree.h +++ /dev/null @@ -1,119 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * lustre/include/interval_tree.h - * - * Author: Huang Wei <huangwei@clusterfs.com> - * Author: Jay Xiong <jinshan.xiong@sun.com> - */ - -#ifndef _INTERVAL_H__ -#define _INTERVAL_H__ - -#include <linux/errno.h> -#include <linux/string.h> -#include <linux/types.h> - -struct interval_node { - struct interval_node *in_left; - struct interval_node *in_right; - struct interval_node *in_parent; - unsigned in_color:1, - in_intree:1, /** set if the node is in tree */ - in_res1:30; - __u8 in_res2[4]; /** tags, 8-bytes aligned */ - __u64 in_max_high; - struct interval_node_extent { - __u64 start; - __u64 end; - } in_extent; -}; - -enum interval_iter { - INTERVAL_ITER_CONT = 1, - INTERVAL_ITER_STOP = 2 -}; - -static inline int interval_is_intree(struct interval_node *node) -{ - return node->in_intree == 1; -} - -static inline __u64 interval_low(struct interval_node *node) -{ - return node->in_extent.start; -} - -static inline __u64 interval_high(struct interval_node *node) -{ - return node->in_extent.end; -} - -static inline int interval_set(struct interval_node *node, - __u64 start, __u64 end) -{ - if (start > end) - return -ERANGE; - node->in_extent.start = start; - node->in_extent.end = end; - node->in_max_high = end; - return 0; -} - -/* - * Rules to write an interval callback. - * - the callback returns INTERVAL_ITER_STOP when it thinks the iteration - * should be stopped. It will then cause the iteration function to return - * immediately with return value INTERVAL_ITER_STOP. - * - callbacks for interval_iterate and interval_iterate_reverse: Every - * nodes in the tree will be set to @node before the callback being called - * - callback for interval_search: Only overlapped node will be set to @node - * before the callback being called. - */ -typedef enum interval_iter (*interval_callback_t)(struct interval_node *node, - void *args); - -struct interval_node *interval_insert(struct interval_node *node, - struct interval_node **root); -void interval_erase(struct interval_node *node, struct interval_node **root); - -/* - * Search the extents in the tree and call @func for each overlapped - * extents. - */ -enum interval_iter interval_search(struct interval_node *root, - struct interval_node_extent *ex, - interval_callback_t func, void *data); - -enum interval_iter interval_iterate_reverse(struct interval_node *root, - interval_callback_t func, - void *data); - -#endif diff --git a/drivers/staging/lustre/lustre/include/llog_swab.h b/drivers/staging/lustre/lustre/include/llog_swab.h deleted file mode 100644 index 0433b79efdcb..000000000000 --- a/drivers/staging/lustre/lustre/include/llog_swab.h +++ /dev/null @@ -1,67 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2011, 2014, Intel Corporation. - * - * Copyright 2015 Cray Inc, all rights reserved. - * Author: Ben Evans. - * - * We assume all nodes are either little-endian or big-endian, and we - * always send messages in the sender's native format. The receiver - * detects the message format by checking the 'magic' field of the message - * (see lustre_msg_swabbed() below). - * - * Each type has corresponding 'lustre_swab_xxxtypexxx()' routines - * are implemented in ptlrpc/pack_generic.c. These 'swabbers' convert the - * type from "other" endian, in-place in the message buffer. - * - * A swabber takes a single pointer argument. The caller must already have - * verified that the length of the message buffer >= sizeof (type). - * - * For variable length types, a second 'lustre_swab_v_xxxtypexxx()' routine - * may be defined that swabs just the variable part, after the caller has - * verified that the message buffer is large enough. - */ - -#ifndef _LLOG_SWAB_H_ -#define _LLOG_SWAB_H_ - -#include <uapi/linux/lustre/lustre_idl.h> - -struct lustre_cfg; - -void lustre_swab_lu_fid(struct lu_fid *fid); -void lustre_swab_ost_id(struct ost_id *oid); -void lustre_swab_llogd_body(struct llogd_body *d); -void lustre_swab_llog_hdr(struct llog_log_hdr *h); -void lustre_swab_llogd_conn_body(struct llogd_conn_body *d); -void lustre_swab_llog_rec(struct llog_rec_hdr *rec); -void lustre_swab_lu_seq_range(struct lu_seq_range *range); -void lustre_swab_lustre_cfg(struct lustre_cfg *lcfg); -void lustre_swab_cfg_marker(struct cfg_marker *marker, - int swab, int size); - -#endif diff --git a/drivers/staging/lustre/lustre/include/lprocfs_status.h b/drivers/staging/lustre/lustre/include/lprocfs_status.h deleted file mode 100644 index 426e8f3c9809..000000000000 --- a/drivers/staging/lustre/lustre/include/lprocfs_status.h +++ /dev/null @@ -1,672 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2011, 2015, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * lustre/include/lprocfs_status.h - * - * Top level header file for LProc SNMP - * - * Author: Hariharan Thantry thantry@users.sourceforge.net - */ -#ifndef _LPROCFS_SNMP_H -#define _LPROCFS_SNMP_H - -#include <linux/debugfs.h> -#include <linux/seq_file.h> -#include <linux/spinlock.h> -#include <linux/types.h> - -#include <linux/libcfs/libcfs.h> -#include <uapi/linux/lustre/lustre_cfg.h> -#include <uapi/linux/lustre/lustre_idl.h> - -struct lprocfs_vars { - const char *name; - const struct file_operations *fops; - void *data; - /** - * sysfs file mode. - */ - umode_t proc_mode; -}; - -struct lprocfs_static_vars { - struct lprocfs_vars *obd_vars; - const struct attribute_group *sysfs_vars; -}; - -/* if we find more consumers this could be generalized */ -#define OBD_HIST_MAX 32 -struct obd_histogram { - spinlock_t oh_lock; - unsigned long oh_buckets[OBD_HIST_MAX]; -}; - -enum { - BRW_R_PAGES = 0, - BRW_W_PAGES, - BRW_R_RPC_HIST, - BRW_W_RPC_HIST, - BRW_R_IO_TIME, - BRW_W_IO_TIME, - BRW_R_DISCONT_PAGES, - BRW_W_DISCONT_PAGES, - BRW_R_DISCONT_BLOCKS, - BRW_W_DISCONT_BLOCKS, - BRW_R_DISK_IOSIZE, - BRW_W_DISK_IOSIZE, - BRW_R_DIO_FRAGS, - BRW_W_DIO_FRAGS, - BRW_LAST, -}; - -struct brw_stats { - struct obd_histogram hist[BRW_LAST]; -}; - -enum { - RENAME_SAMEDIR_SIZE = 0, - RENAME_CROSSDIR_SRC_SIZE, - RENAME_CROSSDIR_TGT_SIZE, - RENAME_LAST, -}; - -struct rename_stats { - struct obd_histogram hist[RENAME_LAST]; -}; - -/* An lprocfs counter can be configured using the enum bit masks below. - * - * LPROCFS_CNTR_EXTERNALLOCK indicates that an external lock already - * protects this counter from concurrent updates. If not specified, - * lprocfs an internal per-counter lock variable. External locks are - * not used to protect counter increments, but are used to protect - * counter readout and resets. - * - * LPROCFS_CNTR_AVGMINMAX indicates a multi-valued counter samples, - * (i.e. counter can be incremented by more than "1"). When specified, - * the counter maintains min, max and sum in addition to a simple - * invocation count. This allows averages to be computed. - * If not specified, the counter is an increment-by-1 counter. - * min, max, sum, etc. are not maintained. - * - * LPROCFS_CNTR_STDDEV indicates that the counter should track sum of - * squares (for multi-valued counter samples only). This allows - * external computation of standard deviation, but involves a 64-bit - * multiply per counter increment. - */ - -enum { - LPROCFS_CNTR_EXTERNALLOCK = 0x0001, - LPROCFS_CNTR_AVGMINMAX = 0x0002, - LPROCFS_CNTR_STDDEV = 0x0004, - - /* counter data type */ - LPROCFS_TYPE_REGS = 0x0100, - LPROCFS_TYPE_BYTES = 0x0200, - LPROCFS_TYPE_PAGES = 0x0400, - LPROCFS_TYPE_CYCLE = 0x0800, -}; - -#define LC_MIN_INIT ((~(__u64)0) >> 1) - -struct lprocfs_counter_header { - unsigned int lc_config; - const char *lc_name; /* must be static */ - const char *lc_units; /* must be static */ -}; - -struct lprocfs_counter { - __s64 lc_count; - __s64 lc_min; - __s64 lc_max; - __s64 lc_sumsquare; - /* - * Every counter has lc_array_sum[0], while lc_array_sum[1] is only - * for irq context counter, i.e. stats with - * LPROCFS_STATS_FLAG_IRQ_SAFE flag, its counter need - * lc_array_sum[1] - */ - __s64 lc_array_sum[1]; -}; - -#define lc_sum lc_array_sum[0] -#define lc_sum_irq lc_array_sum[1] - -struct lprocfs_percpu { -#ifndef __GNUC__ - __s64 pad; -#endif - struct lprocfs_counter lp_cntr[0]; -}; - -enum lprocfs_stats_lock_ops { - LPROCFS_GET_NUM_CPU = 0x0001, /* number allocated per-CPU stats */ - LPROCFS_GET_SMP_ID = 0x0002, /* current stat to be updated */ -}; - -enum lprocfs_stats_flags { - LPROCFS_STATS_FLAG_NONE = 0x0000, /* per cpu counter */ - LPROCFS_STATS_FLAG_NOPERCPU = 0x0001, /* stats have no percpu - * area and need locking - */ - LPROCFS_STATS_FLAG_IRQ_SAFE = 0x0002, /* alloc need irq safe */ -}; - -enum lprocfs_fields_flags { - LPROCFS_FIELDS_FLAGS_CONFIG = 0x0001, - LPROCFS_FIELDS_FLAGS_SUM = 0x0002, - LPROCFS_FIELDS_FLAGS_MIN = 0x0003, - LPROCFS_FIELDS_FLAGS_MAX = 0x0004, - LPROCFS_FIELDS_FLAGS_AVG = 0x0005, - LPROCFS_FIELDS_FLAGS_SUMSQUARE = 0x0006, - LPROCFS_FIELDS_FLAGS_COUNT = 0x0007, -}; - -struct lprocfs_stats { - /* # of counters */ - unsigned short ls_num; - /* 1 + the biggest cpu # whose ls_percpu slot has been allocated */ - unsigned short ls_biggest_alloc_num; - enum lprocfs_stats_flags ls_flags; - /* Lock used when there are no percpu stats areas; For percpu stats, - * it is used to protect ls_biggest_alloc_num change - */ - spinlock_t ls_lock; - - /* has ls_num of counter headers */ - struct lprocfs_counter_header *ls_cnt_header; - struct lprocfs_percpu *ls_percpu[0]; -}; - -#define OPC_RANGE(seg) (seg ## _LAST_OPC - seg ## _FIRST_OPC) - -/* Pack all opcodes down into a single monotonically increasing index */ -static inline int opcode_offset(__u32 opc) -{ - if (opc < OST_LAST_OPC) { - /* OST opcode */ - return (opc - OST_FIRST_OPC); - } else if (opc < MDS_LAST_OPC) { - /* MDS opcode */ - return (opc - MDS_FIRST_OPC + - OPC_RANGE(OST)); - } else if (opc < LDLM_LAST_OPC) { - /* LDLM Opcode */ - return (opc - LDLM_FIRST_OPC + - OPC_RANGE(MDS) + - OPC_RANGE(OST)); - } else if (opc < MGS_LAST_OPC) { - /* MGS Opcode */ - return (opc - MGS_FIRST_OPC + - OPC_RANGE(LDLM) + - OPC_RANGE(MDS) + - OPC_RANGE(OST)); - } else if (opc < OBD_LAST_OPC) { - /* OBD Ping */ - return (opc - OBD_FIRST_OPC + - OPC_RANGE(MGS) + - OPC_RANGE(LDLM) + - OPC_RANGE(MDS) + - OPC_RANGE(OST)); - } else if (opc < LLOG_LAST_OPC) { - /* LLOG Opcode */ - return (opc - LLOG_FIRST_OPC + - OPC_RANGE(OBD) + - OPC_RANGE(MGS) + - OPC_RANGE(LDLM) + - OPC_RANGE(MDS) + - OPC_RANGE(OST)); - } else if (opc < QUOTA_LAST_OPC) { - /* LQUOTA Opcode */ - return (opc - QUOTA_FIRST_OPC + - OPC_RANGE(LLOG) + - OPC_RANGE(OBD) + - OPC_RANGE(MGS) + - OPC_RANGE(LDLM) + - OPC_RANGE(MDS) + - OPC_RANGE(OST)); - } else if (opc < SEQ_LAST_OPC) { - /* SEQ opcode */ - return (opc - SEQ_FIRST_OPC + - OPC_RANGE(QUOTA) + - OPC_RANGE(LLOG) + - OPC_RANGE(OBD) + - OPC_RANGE(MGS) + - OPC_RANGE(LDLM) + - OPC_RANGE(MDS) + - OPC_RANGE(OST)); - } else if (opc < SEC_LAST_OPC) { - /* SEC opcode */ - return (opc - SEC_FIRST_OPC + - OPC_RANGE(SEQ) + - OPC_RANGE(QUOTA) + - OPC_RANGE(LLOG) + - OPC_RANGE(OBD) + - OPC_RANGE(MGS) + - OPC_RANGE(LDLM) + - OPC_RANGE(MDS) + - OPC_RANGE(OST)); - } else if (opc < FLD_LAST_OPC) { - /* FLD opcode */ - return (opc - FLD_FIRST_OPC + - OPC_RANGE(SEC) + - OPC_RANGE(SEQ) + - OPC_RANGE(QUOTA) + - OPC_RANGE(LLOG) + - OPC_RANGE(OBD) + - OPC_RANGE(MGS) + - OPC_RANGE(LDLM) + - OPC_RANGE(MDS) + - OPC_RANGE(OST)); - } else { - /* Unknown Opcode */ - return -1; - } -} - -#define LUSTRE_MAX_OPCODES (OPC_RANGE(OST) + \ - OPC_RANGE(MDS) + \ - OPC_RANGE(LDLM) + \ - OPC_RANGE(MGS) + \ - OPC_RANGE(OBD) + \ - OPC_RANGE(LLOG) + \ - OPC_RANGE(SEC) + \ - OPC_RANGE(SEQ) + \ - OPC_RANGE(SEC) + \ - OPC_RANGE(FLD)) - -#define EXTRA_MAX_OPCODES ((PTLRPC_LAST_CNTR - PTLRPC_FIRST_CNTR) + \ - OPC_RANGE(EXTRA)) - -enum { - PTLRPC_REQWAIT_CNTR = 0, - PTLRPC_REQQDEPTH_CNTR, - PTLRPC_REQACTIVE_CNTR, - PTLRPC_TIMEOUT, - PTLRPC_REQBUF_AVAIL_CNTR, - PTLRPC_LAST_CNTR -}; - -#define PTLRPC_FIRST_CNTR PTLRPC_REQWAIT_CNTR - -enum { - LDLM_GLIMPSE_ENQUEUE = 0, - LDLM_PLAIN_ENQUEUE, - LDLM_EXTENT_ENQUEUE, - LDLM_FLOCK_ENQUEUE, - LDLM_IBITS_ENQUEUE, - MDS_REINT_SETATTR, - MDS_REINT_CREATE, - MDS_REINT_LINK, - MDS_REINT_UNLINK, - MDS_REINT_RENAME, - MDS_REINT_OPEN, - MDS_REINT_SETXATTR, - BRW_READ_BYTES, - BRW_WRITE_BYTES, - EXTRA_LAST_OPC -}; - -#define EXTRA_FIRST_OPC LDLM_GLIMPSE_ENQUEUE -/* class_obd.c */ -extern struct dentry *debugfs_lustre_root; -extern struct kobject *lustre_kobj; - -struct obd_device; -struct obd_histogram; - -/* Days / hours / mins / seconds format */ -struct dhms { - int d, h, m, s; -}; - -static inline void s2dhms(struct dhms *ts, time64_t secs64) -{ - unsigned int secs; - - ts->d = div_u64_rem(secs64, 86400, &secs); - ts->h = secs / 3600; - secs = secs % 3600; - ts->m = secs / 60; - ts->s = secs % 60; -} - -#define DHMS_FMT "%dd%dh%02dm%02ds" -#define DHMS_VARS(x) (x)->d, (x)->h, (x)->m, (x)->s - -#define JOBSTATS_JOBID_VAR_MAX_LEN 20 -#define JOBSTATS_DISABLE "disable" -#define JOBSTATS_PROCNAME_UID "procname_uid" -#define JOBSTATS_NODELOCAL "nodelocal" - -/* obd_config.c */ -void lustre_register_client_process_config(int (*cpc)(struct lustre_cfg *lcfg)); - -int lprocfs_write_frac_helper(const char __user *buffer, - unsigned long count, int *val, int mult); -int lprocfs_read_frac_helper(char *buffer, unsigned long count, - long val, int mult); - -int lprocfs_stats_alloc_one(struct lprocfs_stats *stats, - unsigned int cpuid); -int lprocfs_stats_lock(struct lprocfs_stats *stats, - enum lprocfs_stats_lock_ops opc, - unsigned long *flags); -void lprocfs_stats_unlock(struct lprocfs_stats *stats, - enum lprocfs_stats_lock_ops opc, - unsigned long *flags); - -static inline unsigned int -lprocfs_stats_counter_size(struct lprocfs_stats *stats) -{ - unsigned int percpusize; - - percpusize = offsetof(struct lprocfs_percpu, lp_cntr[stats->ls_num]); - - /* irq safe stats need lc_array_sum[1] */ - if ((stats->ls_flags & LPROCFS_STATS_FLAG_IRQ_SAFE) != 0) - percpusize += stats->ls_num * sizeof(__s64); - - if ((stats->ls_flags & LPROCFS_STATS_FLAG_NOPERCPU) == 0) - percpusize = L1_CACHE_ALIGN(percpusize); - - return percpusize; -} - -static inline struct lprocfs_counter * -lprocfs_stats_counter_get(struct lprocfs_stats *stats, unsigned int cpuid, - int index) -{ - struct lprocfs_counter *cntr; - - cntr = &stats->ls_percpu[cpuid]->lp_cntr[index]; - - if ((stats->ls_flags & LPROCFS_STATS_FLAG_IRQ_SAFE) != 0) - cntr = (void *)cntr + index * sizeof(__s64); - - return cntr; -} - -/* Two optimized LPROCFS counter increment functions are provided: - * lprocfs_counter_incr(cntr, value) - optimized for by-one counters - * lprocfs_counter_add(cntr) - use for multi-valued counters - * Counter data layout allows config flag, counter lock and the - * count itself to reside within a single cache line. - */ - -void lprocfs_counter_add(struct lprocfs_stats *stats, int idx, long amount); -void lprocfs_counter_sub(struct lprocfs_stats *stats, int idx, long amount); - -#define lprocfs_counter_incr(stats, idx) \ - lprocfs_counter_add(stats, idx, 1) -#define lprocfs_counter_decr(stats, idx) \ - lprocfs_counter_sub(stats, idx, 1) - -__s64 lprocfs_read_helper(struct lprocfs_counter *lc, - struct lprocfs_counter_header *header, - enum lprocfs_stats_flags flags, - enum lprocfs_fields_flags field); -__u64 lprocfs_stats_collector(struct lprocfs_stats *stats, int idx, - enum lprocfs_fields_flags field); - -extern struct lprocfs_stats * -lprocfs_alloc_stats(unsigned int num, enum lprocfs_stats_flags flags); -void lprocfs_clear_stats(struct lprocfs_stats *stats); -void lprocfs_free_stats(struct lprocfs_stats **stats); -void lprocfs_counter_init(struct lprocfs_stats *stats, int index, - unsigned int conf, const char *name, - const char *units); -struct obd_export; -int lprocfs_exp_cleanup(struct obd_export *exp); -struct dentry *ldebugfs_add_simple(struct dentry *root, - char *name, - void *data, - const struct file_operations *fops); - -int ldebugfs_register_stats(struct dentry *parent, - const char *name, - struct lprocfs_stats *stats); - -/* lprocfs_status.c */ -int ldebugfs_add_vars(struct dentry *parent, - struct lprocfs_vars *var, - void *data); - -struct dentry *ldebugfs_register(const char *name, - struct dentry *parent, - struct lprocfs_vars *list, - void *data); - -void ldebugfs_remove(struct dentry **entryp); - -int lprocfs_obd_setup(struct obd_device *obd, struct lprocfs_vars *list, - const struct attribute_group *attrs); -int lprocfs_obd_cleanup(struct obd_device *obd); - -int ldebugfs_seq_create(struct dentry *parent, - const char *name, - umode_t mode, - const struct file_operations *seq_fops, - void *data); -int ldebugfs_obd_seq_create(struct obd_device *dev, - const char *name, - umode_t mode, - const struct file_operations *seq_fops, - void *data); - -/* Generic callbacks */ - -int lprocfs_rd_uint(struct seq_file *m, void *data); -int lprocfs_wr_uint(struct file *file, const char __user *buffer, - unsigned long count, void *data); -int lprocfs_rd_server_uuid(struct seq_file *m, void *data); -int lprocfs_rd_conn_uuid(struct seq_file *m, void *data); -int lprocfs_rd_import(struct seq_file *m, void *data); -int lprocfs_rd_state(struct seq_file *m, void *data); -int lprocfs_rd_connect_flags(struct seq_file *m, void *data); - -struct adaptive_timeout; -int lprocfs_at_hist_helper(struct seq_file *m, struct adaptive_timeout *at); -int lprocfs_rd_timeouts(struct seq_file *m, void *data); -int lprocfs_wr_ping(struct file *file, const char __user *buffer, - size_t count, loff_t *off); -int lprocfs_wr_import(struct file *file, const char __user *buffer, - size_t count, loff_t *off); -int lprocfs_rd_pinger_recov(struct seq_file *m, void *n); -int lprocfs_wr_pinger_recov(struct file *file, const char __user *buffer, - size_t count, loff_t *off); - -/* Statfs helpers */ - -int lprocfs_write_helper(const char __user *buffer, unsigned long count, - int *val); -int lprocfs_write_u64_helper(const char __user *buffer, - unsigned long count, __u64 *val); -int lprocfs_write_frac_u64_helper(const char __user *buffer, - unsigned long count, - __u64 *val, int mult); -char *lprocfs_find_named_value(const char *buffer, const char *name, - size_t *count); -void lprocfs_oh_tally(struct obd_histogram *oh, unsigned int value); -void lprocfs_oh_tally_log2(struct obd_histogram *oh, unsigned int value); -void lprocfs_oh_clear(struct obd_histogram *oh); -unsigned long lprocfs_oh_sum(struct obd_histogram *oh); - -void lprocfs_stats_collect(struct lprocfs_stats *stats, int idx, - struct lprocfs_counter *cnt); - -int lprocfs_single_release(struct inode *inode, struct file *file); -int lprocfs_seq_release(struct inode *inode, struct file *file); - -/* write the name##_seq_show function, call LPROC_SEQ_FOPS_RO for read-only - * proc entries; otherwise, you will define name##_seq_write function also for - * a read-write proc entry, and then call LPROC_SEQ_SEQ instead. Finally, - * call ldebugfs_obd_seq_create(obd, filename, 0444, &name#_fops, data); - */ -#define __LPROC_SEQ_FOPS(name, custom_seq_write) \ -static int name##_single_open(struct inode *inode, struct file *file) \ -{ \ - return single_open(file, name##_seq_show, inode->i_private); \ -} \ -static const struct file_operations name##_fops = { \ - .owner = THIS_MODULE, \ - .open = name##_single_open, \ - .read = seq_read, \ - .write = custom_seq_write, \ - .llseek = seq_lseek, \ - .release = lprocfs_single_release, \ -} - -#define LPROC_SEQ_FOPS_RO(name) __LPROC_SEQ_FOPS(name, NULL) -#define LPROC_SEQ_FOPS(name) __LPROC_SEQ_FOPS(name, name##_seq_write) - -#define LPROC_SEQ_FOPS_RO_TYPE(name, type) \ - static int name##_##type##_seq_show(struct seq_file *m, void *v)\ - { \ - return lprocfs_rd_##type(m, m->private); \ - } \ - LPROC_SEQ_FOPS_RO(name##_##type) - -#define LPROC_SEQ_FOPS_RW_TYPE(name, type) \ - static int name##_##type##_seq_show(struct seq_file *m, void *v)\ - { \ - return lprocfs_rd_##type(m, m->private); \ - } \ - static ssize_t name##_##type##_seq_write(struct file *file, \ - const char __user *buffer, size_t count, \ - loff_t *off) \ - { \ - struct seq_file *seq = file->private_data; \ - return lprocfs_wr_##type(file, buffer, \ - count, seq->private); \ - } \ - LPROC_SEQ_FOPS(name##_##type) - -#define LPROC_SEQ_FOPS_WR_ONLY(name, type) \ - static ssize_t name##_##type##_write(struct file *file, \ - const char __user *buffer, size_t count, \ - loff_t *off) \ - { \ - return lprocfs_wr_##type(file, buffer, count, off); \ - } \ - static int name##_##type##_open(struct inode *inode, struct file *file) \ - { \ - return single_open(file, NULL, inode->i_private); \ - } \ - static const struct file_operations name##_##type##_fops = { \ - .open = name##_##type##_open, \ - .write = name##_##type##_write, \ - .release = lprocfs_single_release, \ - } - -struct lustre_attr { - struct attribute attr; - ssize_t (*show)(struct kobject *kobj, struct attribute *attr, - char *buf); - ssize_t (*store)(struct kobject *kobj, struct attribute *attr, - const char *buf, size_t len); -}; - -#define LUSTRE_ATTR(name, mode, show, store) \ -static struct lustre_attr lustre_attr_##name = __ATTR(name, mode, show, store) - -#define LUSTRE_RO_ATTR(name) LUSTRE_ATTR(name, 0444, name##_show, NULL) -#define LUSTRE_RW_ATTR(name) LUSTRE_ATTR(name, 0644, name##_show, name##_store) - -extern const struct sysfs_ops lustre_sysfs_ops; - -struct root_squash_info; -int lprocfs_wr_root_squash(const char __user *buffer, unsigned long count, - struct root_squash_info *squash, char *name); -int lprocfs_wr_nosquash_nids(const char __user *buffer, unsigned long count, - struct root_squash_info *squash, char *name); - -/* all quota proc functions */ -int lprocfs_quota_rd_bunit(char *page, char **start, - loff_t off, int count, - int *eof, void *data); -int lprocfs_quota_wr_bunit(struct file *file, const char *buffer, - unsigned long count, void *data); -int lprocfs_quota_rd_btune(char *page, char **start, - loff_t off, int count, - int *eof, void *data); -int lprocfs_quota_wr_btune(struct file *file, const char *buffer, - unsigned long count, void *data); -int lprocfs_quota_rd_iunit(char *page, char **start, - loff_t off, int count, - int *eof, void *data); -int lprocfs_quota_wr_iunit(struct file *file, const char *buffer, - unsigned long count, void *data); -int lprocfs_quota_rd_itune(char *page, char **start, - loff_t off, int count, - int *eof, void *data); -int lprocfs_quota_wr_itune(struct file *file, const char *buffer, - unsigned long count, void *data); -int lprocfs_quota_rd_type(char *page, char **start, loff_t off, int count, - int *eof, void *data); -int lprocfs_quota_wr_type(struct file *file, const char *buffer, - unsigned long count, void *data); -int lprocfs_quota_rd_switch_seconds(char *page, char **start, loff_t off, - int count, int *eof, void *data); -int lprocfs_quota_wr_switch_seconds(struct file *file, - const char *buffer, - unsigned long count, void *data); -int lprocfs_quota_rd_sync_blk(char *page, char **start, loff_t off, - int count, int *eof, void *data); -int lprocfs_quota_wr_sync_blk(struct file *file, const char *buffer, - unsigned long count, void *data); -int lprocfs_quota_rd_switch_qs(char *page, char **start, loff_t off, - int count, int *eof, void *data); -int lprocfs_quota_wr_switch_qs(struct file *file, - const char *buffer, unsigned long count, - void *data); -int lprocfs_quota_rd_boundary_factor(char *page, char **start, loff_t off, - int count, int *eof, void *data); -int lprocfs_quota_wr_boundary_factor(struct file *file, - const char *buffer, unsigned long count, - void *data); -int lprocfs_quota_rd_least_bunit(char *page, char **start, loff_t off, - int count, int *eof, void *data); -int lprocfs_quota_wr_least_bunit(struct file *file, - const char *buffer, unsigned long count, - void *data); -int lprocfs_quota_rd_least_iunit(char *page, char **start, loff_t off, - int count, int *eof, void *data); -int lprocfs_quota_wr_least_iunit(struct file *file, - const char *buffer, unsigned long count, - void *data); -int lprocfs_quota_rd_qs_factor(char *page, char **start, loff_t off, - int count, int *eof, void *data); -int lprocfs_quota_wr_qs_factor(struct file *file, - const char *buffer, unsigned long count, - void *data); -#endif /* LPROCFS_SNMP_H */ diff --git a/drivers/staging/lustre/lustre/include/lu_object.h b/drivers/staging/lustre/lustre/include/lu_object.h deleted file mode 100644 index 35c7b582f36d..000000000000 --- a/drivers/staging/lustre/lustre/include/lu_object.h +++ /dev/null @@ -1,1335 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2011, 2015, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - */ - -#ifndef __LUSTRE_LU_OBJECT_H -#define __LUSTRE_LU_OBJECT_H - -#include <stdarg.h> -#include <linux/percpu_counter.h> -#include <linux/libcfs/libcfs.h> -#include <uapi/linux/lustre/lustre_idl.h> -#include <lu_ref.h> - -struct seq_file; -struct lustre_cfg; -struct lprocfs_stats; - -/** \defgroup lu lu - * lu_* data-types represent server-side entities shared by data and meta-data - * stacks. - * - * Design goals: - * - * -# support for layering. - * - * Server side object is split into layers, one per device in the - * corresponding device stack. Individual layer is represented by struct - * lu_object. Compound layered object --- by struct lu_object_header. Most - * interface functions take lu_object as an argument and operate on the - * whole compound object. This decision was made due to the following - * reasons: - * - * - it's envisaged that lu_object will be used much more often than - * lu_object_header; - * - * - we want lower (non-top) layers to be able to initiate operations - * on the whole object. - * - * Generic code supports layering more complex than simple stacking, e.g., - * it is possible that at some layer object "spawns" multiple sub-objects - * on the lower layer. - * - * -# fid-based identification. - * - * Compound object is uniquely identified by its fid. Objects are indexed - * by their fids (hash table is used for index). - * - * -# caching and life-cycle management. - * - * Object's life-time is controlled by reference counting. When reference - * count drops to 0, object is returned to cache. Cached objects still - * retain their identity (i.e., fid), and can be recovered from cache. - * - * Objects are kept in the global LRU list, and lu_site_purge() function - * can be used to reclaim given number of unused objects from the tail of - * the LRU. - * - * -# avoiding recursion. - * - * Generic code tries to replace recursion through layers by iterations - * where possible. Additionally to the end of reducing stack consumption, - * data, when practically possible, are allocated through lu_context_key - * interface rather than on stack. - * @{ - */ - -struct lu_site; -struct lu_object; -struct lu_device; -struct lu_object_header; -struct lu_context; -struct lu_env; - -/** - * Operations common for data and meta-data devices. - */ -struct lu_device_operations { - /** - * Allocate object for the given device (without lower-layer - * parts). This is called by lu_object_operations::loo_object_init() - * from the parent layer, and should setup at least lu_object::lo_dev - * and lu_object::lo_ops fields of resulting lu_object. - * - * Object creation protocol. - * - * Due to design goal of avoiding recursion, object creation (see - * lu_object_alloc()) is somewhat involved: - * - * - first, lu_device_operations::ldo_object_alloc() method of the - * top-level device in the stack is called. It should allocate top - * level object (including lu_object_header), but without any - * lower-layer sub-object(s). - * - * - then lu_object_alloc() sets fid in the header of newly created - * object. - * - * - then lu_object_operations::loo_object_init() is called. It has - * to allocate lower-layer object(s). To do this, - * lu_object_operations::loo_object_init() calls ldo_object_alloc() - * of the lower-layer device(s). - * - * - for all new objects allocated by - * lu_object_operations::loo_object_init() (and inserted into object - * stack), lu_object_operations::loo_object_init() is called again - * repeatedly, until no new objects are created. - * - * \post ergo(!IS_ERR(result), result->lo_dev == d && - * result->lo_ops != NULL); - */ - struct lu_object *(*ldo_object_alloc)(const struct lu_env *env, - const struct lu_object_header *h, - struct lu_device *d); - /** - * process config specific for device. - */ - int (*ldo_process_config)(const struct lu_env *env, - struct lu_device *, struct lustre_cfg *); - int (*ldo_recovery_complete)(const struct lu_env *, - struct lu_device *); - - /** - * initialize local objects for device. this method called after layer - * has been initialized (after LCFG_SETUP stage) and before it starts - * serving user requests. - */ - - int (*ldo_prepare)(const struct lu_env *, - struct lu_device *parent, - struct lu_device *dev); - -}; - -/** - * For lu_object_conf flags - */ -enum loc_flags { - /* This is a new object to be allocated, or the file - * corresponding to the object does not exists. - */ - LOC_F_NEW = 0x00000001, -}; - -/** - * Object configuration, describing particulars of object being created. On - * server this is not used, as server objects are full identified by fid. On - * client configuration contains struct lustre_md. - */ -struct lu_object_conf { - /** - * Some hints for obj find and alloc. - */ - enum loc_flags loc_flags; -}; - -/** - * Type of "printer" function used by lu_object_operations::loo_object_print() - * method. - * - * Printer function is needed to provide some flexibility in (semi-)debugging - * output: possible implementations: printk, CDEBUG, sysfs/seq_file - */ -typedef int (*lu_printer_t)(const struct lu_env *env, - void *cookie, const char *format, ...) - __printf(3, 4); - -/** - * Operations specific for particular lu_object. - */ -struct lu_object_operations { - /** - * Allocate lower-layer parts of the object by calling - * lu_device_operations::ldo_object_alloc() of the corresponding - * underlying device. - * - * This method is called once for each object inserted into object - * stack. It's responsibility of this method to insert lower-layer - * object(s) it create into appropriate places of object stack. - */ - int (*loo_object_init)(const struct lu_env *env, - struct lu_object *o, - const struct lu_object_conf *conf); - /** - * Called (in top-to-bottom order) during object allocation after all - * layers were allocated and initialized. Can be used to perform - * initialization depending on lower layers. - */ - int (*loo_object_start)(const struct lu_env *env, - struct lu_object *o); - /** - * Called before lu_object_operations::loo_object_free() to signal - * that object is being destroyed. Dual to - * lu_object_operations::loo_object_init(). - */ - void (*loo_object_delete)(const struct lu_env *env, - struct lu_object *o); - /** - * Dual to lu_device_operations::ldo_object_alloc(). Called when - * object is removed from memory. - */ - void (*loo_object_free)(const struct lu_env *env, - struct lu_object *o); - /** - * Called when last active reference to the object is released (and - * object returns to the cache). This method is optional. - */ - void (*loo_object_release)(const struct lu_env *env, - struct lu_object *o); - /** - * Optional debugging helper. Print given object. - */ - int (*loo_object_print)(const struct lu_env *env, void *cookie, - lu_printer_t p, const struct lu_object *o); - /** - * Optional debugging method. Returns true iff method is internally - * consistent. - */ - int (*loo_object_invariant)(const struct lu_object *o); -}; - -/** - * Type of lu_device. - */ -struct lu_device_type; - -/** - * Device: a layer in the server side abstraction stacking. - */ -struct lu_device { - /** - * reference count. This is incremented, in particular, on each object - * created at this layer. - * - * \todo XXX which means that atomic_t is probably too small. - */ - atomic_t ld_ref; - /** - * Pointer to device type. Never modified once set. - */ - struct lu_device_type *ld_type; - /** - * Operation vector for this device. - */ - const struct lu_device_operations *ld_ops; - /** - * Stack this device belongs to. - */ - struct lu_site *ld_site; - - /** \todo XXX: temporary back pointer into obd. */ - struct obd_device *ld_obd; - /** - * A list of references to this object, for debugging. - */ - struct lu_ref ld_reference; - /** - * Link the device to the site. - **/ - struct list_head ld_linkage; -}; - -struct lu_device_type_operations; - -/** - * Tag bits for device type. They are used to distinguish certain groups of - * device types. - */ -enum lu_device_tag { - /** this is meta-data device */ - LU_DEVICE_MD = (1 << 0), - /** this is data device */ - LU_DEVICE_DT = (1 << 1), - /** data device in the client stack */ - LU_DEVICE_CL = (1 << 2) -}; - -/** - * Type of device. - */ -struct lu_device_type { - /** - * Tag bits. Taken from enum lu_device_tag. Never modified once set. - */ - __u32 ldt_tags; - /** - * Name of this class. Unique system-wide. Never modified once set. - */ - char *ldt_name; - /** - * Operations for this type. - */ - const struct lu_device_type_operations *ldt_ops; - /** - * \todo XXX: temporary pointer to associated obd_type. - */ - struct obd_type *ldt_obd_type; - /** - * \todo XXX: temporary: context tags used by obd_*() calls. - */ - __u32 ldt_ctx_tags; - /** - * Number of existing device type instances. - */ - atomic_t ldt_device_nr; - /** - * Linkage into a global list of all device types. - * - * \see lu_device_types. - */ - struct list_head ldt_linkage; -}; - -/** - * Operations on a device type. - */ -struct lu_device_type_operations { - /** - * Allocate new device. - */ - struct lu_device *(*ldto_device_alloc)(const struct lu_env *env, - struct lu_device_type *t, - struct lustre_cfg *lcfg); - /** - * Free device. Dual to - * lu_device_type_operations::ldto_device_alloc(). Returns pointer to - * the next device in the stack. - */ - struct lu_device *(*ldto_device_free)(const struct lu_env *, - struct lu_device *); - - /** - * Initialize the devices after allocation - */ - int (*ldto_device_init)(const struct lu_env *env, - struct lu_device *, const char *, - struct lu_device *); - /** - * Finalize device. Dual to - * lu_device_type_operations::ldto_device_init(). Returns pointer to - * the next device in the stack. - */ - struct lu_device *(*ldto_device_fini)(const struct lu_env *env, - struct lu_device *); - /** - * Initialize device type. This is called on module load. - */ - int (*ldto_init)(struct lu_device_type *t); - /** - * Finalize device type. Dual to - * lu_device_type_operations::ldto_init(). Called on module unload. - */ - void (*ldto_fini)(struct lu_device_type *t); - /** - * Called when the first device is created. - */ - void (*ldto_start)(struct lu_device_type *t); - /** - * Called when number of devices drops to 0. - */ - void (*ldto_stop)(struct lu_device_type *t); -}; - -static inline int lu_device_is_md(const struct lu_device *d) -{ - return ergo(d, d->ld_type->ldt_tags & LU_DEVICE_MD); -} - -/** - * Common object attributes. - */ -struct lu_attr { - /** size in bytes */ - __u64 la_size; - /** modification time in seconds since Epoch */ - s64 la_mtime; - /** access time in seconds since Epoch */ - s64 la_atime; - /** change time in seconds since Epoch */ - s64 la_ctime; - /** 512-byte blocks allocated to object */ - __u64 la_blocks; - /** permission bits and file type */ - __u32 la_mode; - /** owner id */ - __u32 la_uid; - /** group id */ - __u32 la_gid; - /** object flags */ - __u32 la_flags; - /** number of persistent references to this object */ - __u32 la_nlink; - /** blk bits of the object*/ - __u32 la_blkbits; - /** blk size of the object*/ - __u32 la_blksize; - /** real device */ - __u32 la_rdev; - /** - * valid bits - * - * \see enum la_valid - */ - __u64 la_valid; -}; - -/** Bit-mask of valid attributes */ -enum la_valid { - LA_ATIME = 1 << 0, - LA_MTIME = 1 << 1, - LA_CTIME = 1 << 2, - LA_SIZE = 1 << 3, - LA_MODE = 1 << 4, - LA_UID = 1 << 5, - LA_GID = 1 << 6, - LA_BLOCKS = 1 << 7, - LA_TYPE = 1 << 8, - LA_FLAGS = 1 << 9, - LA_NLINK = 1 << 10, - LA_RDEV = 1 << 11, - LA_BLKSIZE = 1 << 12, - LA_KILL_SUID = 1 << 13, - LA_KILL_SGID = 1 << 14, -}; - -/** - * Layer in the layered object. - */ -struct lu_object { - /** - * Header for this object. - */ - struct lu_object_header *lo_header; - /** - * Device for this layer. - */ - struct lu_device *lo_dev; - /** - * Operations for this object. - */ - const struct lu_object_operations *lo_ops; - /** - * Linkage into list of all layers. - */ - struct list_head lo_linkage; - /** - * Link to the device, for debugging. - */ - struct lu_ref_link lo_dev_ref; -}; - -enum lu_object_header_flags { - /** - * Don't keep this object in cache. Object will be destroyed as soon - * as last reference to it is released. This flag cannot be cleared - * once set. - */ - LU_OBJECT_HEARD_BANSHEE = 0, - /** - * Mark this object has already been taken out of cache. - */ - LU_OBJECT_UNHASHED = 1, -}; - -enum lu_object_header_attr { - LOHA_EXISTS = 1 << 0, - LOHA_REMOTE = 1 << 1, - /** - * UNIX file type is stored in S_IFMT bits. - */ - LOHA_FT_START = 001 << 12, /**< S_IFIFO */ - LOHA_FT_END = 017 << 12, /**< S_IFMT */ -}; - -/** - * "Compound" object, consisting of multiple layers. - * - * Compound object with given fid is unique with given lu_site. - * - * Note, that object does *not* necessary correspond to the real object in the - * persistent storage: object is an anchor for locking and method calling, so - * it is created for things like not-yet-existing child created by mkdir or - * create calls. lu_object_operations::loo_exists() can be used to check - * whether object is backed by persistent storage entity. - */ -struct lu_object_header { - /** - * Fid, uniquely identifying this object. - */ - struct lu_fid loh_fid; - /** - * Object flags from enum lu_object_header_flags. Set and checked - * atomically. - */ - unsigned long loh_flags; - /** - * Object reference count. Protected by lu_site::ls_guard. - */ - atomic_t loh_ref; - /** - * Common object attributes, cached for efficiency. From enum - * lu_object_header_attr. - */ - __u32 loh_attr; - /** - * Linkage into per-site hash table. Protected by lu_site::ls_guard. - */ - struct hlist_node loh_hash; - /** - * Linkage into per-site LRU list. Protected by lu_site::ls_guard. - */ - struct list_head loh_lru; - /** - * Linkage into list of layers. Never modified once set (except lately - * during object destruction). No locking is necessary. - */ - struct list_head loh_layers; - /** - * A list of references to this object, for debugging. - */ - struct lu_ref loh_reference; -}; - -struct fld; - -struct lu_site_bkt_data { - /** - * number of object in this bucket on the lsb_lru list. - */ - long lsb_lru_len; - /** - * LRU list, updated on each access to object. Protected by - * bucket lock of lu_site::ls_obj_hash. - * - * "Cold" end of LRU is lu_site::ls_lru.next. Accessed object are - * moved to the lu_site::ls_lru.prev (this is due to the non-existence - * of list_for_each_entry_safe_reverse()). - */ - struct list_head lsb_lru; - /** - * Wait-queue signaled when an object in this site is ultimately - * destroyed (lu_object_free()). It is used by lu_object_find() to - * wait before re-trying when object in the process of destruction is - * found in the hash table. - * - * \see htable_lookup(). - */ - wait_queue_head_t lsb_marche_funebre; -}; - -enum { - LU_SS_CREATED = 0, - LU_SS_CACHE_HIT, - LU_SS_CACHE_MISS, - LU_SS_CACHE_RACE, - LU_SS_CACHE_DEATH_RACE, - LU_SS_LRU_PURGED, - LU_SS_LAST_STAT -}; - -/** - * lu_site is a "compartment" within which objects are unique, and LRU - * discipline is maintained. - * - * lu_site exists so that multiple layered stacks can co-exist in the same - * address space. - * - * lu_site has the same relation to lu_device as lu_object_header to - * lu_object. - */ -struct lu_site { - /** - * objects hash table - */ - struct cfs_hash *ls_obj_hash; - /** - * index of bucket on hash table while purging - */ - unsigned int ls_purge_start; - /** - * Top-level device for this stack. - */ - struct lu_device *ls_top_dev; - /** - * Bottom-level device for this stack - */ - struct lu_device *ls_bottom_dev; - /** - * Linkage into global list of sites. - */ - struct list_head ls_linkage; - /** - * List for lu device for this site, protected - * by ls_ld_lock. - **/ - struct list_head ls_ld_linkage; - spinlock_t ls_ld_lock; - - /** - * Lock to serialize site purge. - */ - struct mutex ls_purge_mutex; - - /** - * lu_site stats - */ - struct lprocfs_stats *ls_stats; - /** - * XXX: a hack! fld has to find md_site via site, remove when possible - */ - struct seq_server_site *ld_seq_site; - /** - * Number of objects in lsb_lru_lists - used for shrinking - */ - struct percpu_counter ls_lru_len_counter; -}; - -static inline struct lu_site_bkt_data * -lu_site_bkt_from_fid(struct lu_site *site, struct lu_fid *fid) -{ - struct cfs_hash_bd bd; - - cfs_hash_bd_get(site->ls_obj_hash, fid, &bd); - return cfs_hash_bd_extra_get(site->ls_obj_hash, &bd); -} - -static inline struct seq_server_site *lu_site2seq(const struct lu_site *s) -{ - return s->ld_seq_site; -} - -/** \name ctors - * Constructors/destructors. - * @{ - */ - -int lu_site_init(struct lu_site *s, struct lu_device *d); -void lu_site_fini(struct lu_site *s); -int lu_site_init_finish(struct lu_site *s); -void lu_stack_fini(const struct lu_env *env, struct lu_device *top); -void lu_device_get(struct lu_device *d); -void lu_device_put(struct lu_device *d); -int lu_device_init(struct lu_device *d, struct lu_device_type *t); -void lu_device_fini(struct lu_device *d); -int lu_object_header_init(struct lu_object_header *h); -void lu_object_header_fini(struct lu_object_header *h); -int lu_object_init(struct lu_object *o, - struct lu_object_header *h, struct lu_device *d); -void lu_object_fini(struct lu_object *o); -void lu_object_add_top(struct lu_object_header *h, struct lu_object *o); -void lu_object_add(struct lu_object *before, struct lu_object *o); - -/** - * Helpers to initialize and finalize device types. - */ - -int lu_device_type_init(struct lu_device_type *ldt); -void lu_device_type_fini(struct lu_device_type *ldt); - -/** @} ctors */ - -/** \name caching - * Caching and reference counting. - * @{ - */ - -/** - * Acquire additional reference to the given object. This function is used to - * attain additional reference. To acquire initial reference use - * lu_object_find(). - */ -static inline void lu_object_get(struct lu_object *o) -{ - LASSERT(atomic_read(&o->lo_header->loh_ref) > 0); - atomic_inc(&o->lo_header->loh_ref); -} - -/** - * Return true of object will not be cached after last reference to it is - * released. - */ -static inline int lu_object_is_dying(const struct lu_object_header *h) -{ - return test_bit(LU_OBJECT_HEARD_BANSHEE, &h->loh_flags); -} - -void lu_object_put(const struct lu_env *env, struct lu_object *o); -void lu_object_unhash(const struct lu_env *env, struct lu_object *o); -int lu_site_purge_objects(const struct lu_env *env, struct lu_site *s, int nr, - bool canblock); - -static inline int lu_site_purge(const struct lu_env *env, struct lu_site *s, - int nr) -{ - return lu_site_purge_objects(env, s, nr, true); -} - -void lu_site_print(const struct lu_env *env, struct lu_site *s, void *cookie, - lu_printer_t printer); -struct lu_object *lu_object_find_at(const struct lu_env *env, - struct lu_device *dev, - const struct lu_fid *f, - const struct lu_object_conf *conf); -struct lu_object *lu_object_find_slice(const struct lu_env *env, - struct lu_device *dev, - const struct lu_fid *f, - const struct lu_object_conf *conf); -/** @} caching */ - -/** \name helpers - * Helpers. - * @{ - */ - -/** - * First (topmost) sub-object of given compound object - */ -static inline struct lu_object *lu_object_top(struct lu_object_header *h) -{ - LASSERT(!list_empty(&h->loh_layers)); - return container_of0(h->loh_layers.next, struct lu_object, lo_linkage); -} - -/** - * Next sub-object in the layering - */ -static inline struct lu_object *lu_object_next(const struct lu_object *o) -{ - return container_of0(o->lo_linkage.next, struct lu_object, lo_linkage); -} - -/** - * Pointer to the fid of this object. - */ -static inline const struct lu_fid *lu_object_fid(const struct lu_object *o) -{ - return &o->lo_header->loh_fid; -} - -/** - * return device operations vector for this object - */ -static inline const struct lu_device_operations * -lu_object_ops(const struct lu_object *o) -{ - return o->lo_dev->ld_ops; -} - -/** - * Given a compound object, find its slice, corresponding to the device type - * \a dtype. - */ -struct lu_object *lu_object_locate(struct lu_object_header *h, - const struct lu_device_type *dtype); - -/** - * Printer function emitting messages through libcfs_debug_msg(). - */ -int lu_cdebug_printer(const struct lu_env *env, - void *cookie, const char *format, ...); - -/** - * Print object description followed by a user-supplied message. - */ -#define LU_OBJECT_DEBUG(mask, env, object, format, ...) \ -do { \ - if (cfs_cdebug_show(mask, DEBUG_SUBSYSTEM)) { \ - LIBCFS_DEBUG_MSG_DATA_DECL(msgdata, mask, NULL); \ - lu_object_print(env, &msgdata, lu_cdebug_printer, object);\ - CDEBUG(mask, format "\n", ## __VA_ARGS__); \ - } \ -} while (0) - -/** - * Print short object description followed by a user-supplied message. - */ -#define LU_OBJECT_HEADER(mask, env, object, format, ...) \ -do { \ - if (cfs_cdebug_show(mask, DEBUG_SUBSYSTEM)) { \ - LIBCFS_DEBUG_MSG_DATA_DECL(msgdata, mask, NULL); \ - lu_object_header_print(env, &msgdata, lu_cdebug_printer,\ - (object)->lo_header); \ - lu_cdebug_printer(env, &msgdata, "\n"); \ - CDEBUG(mask, format, ## __VA_ARGS__); \ - } \ -} while (0) - -void lu_object_print (const struct lu_env *env, void *cookie, - lu_printer_t printer, const struct lu_object *o); -void lu_object_header_print(const struct lu_env *env, void *cookie, - lu_printer_t printer, - const struct lu_object_header *hdr); - -/** - * Check object consistency. - */ -int lu_object_invariant(const struct lu_object *o); - -/** - * Check whether object exists, no matter on local or remote storage. - * Note: LOHA_EXISTS will be set once some one created the object, - * and it does not needs to be committed to storage. - */ -#define lu_object_exists(o) ((o)->lo_header->loh_attr & LOHA_EXISTS) - -/** - * Check whether object on the remote storage. - */ -#define lu_object_remote(o) unlikely((o)->lo_header->loh_attr & LOHA_REMOTE) - -static inline int lu_object_assert_exists(const struct lu_object *o) -{ - return lu_object_exists(o); -} - -static inline int lu_object_assert_not_exists(const struct lu_object *o) -{ - return !lu_object_exists(o); -} - -/** - * Attr of this object. - */ -static inline __u32 lu_object_attr(const struct lu_object *o) -{ - LASSERT(lu_object_exists(o) != 0); - return o->lo_header->loh_attr; -} - -static inline void lu_object_ref_add(struct lu_object *o, - const char *scope, - const void *source) -{ - lu_ref_add(&o->lo_header->loh_reference, scope, source); -} - -static inline void lu_object_ref_add_at(struct lu_object *o, - struct lu_ref_link *link, - const char *scope, - const void *source) -{ - lu_ref_add_at(&o->lo_header->loh_reference, link, scope, source); -} - -static inline void lu_object_ref_del(struct lu_object *o, - const char *scope, const void *source) -{ - lu_ref_del(&o->lo_header->loh_reference, scope, source); -} - -static inline void lu_object_ref_del_at(struct lu_object *o, - struct lu_ref_link *link, - const char *scope, const void *source) -{ - lu_ref_del_at(&o->lo_header->loh_reference, link, scope, source); -} - -/** input params, should be filled out by mdt */ -struct lu_rdpg { - /** hash */ - __u64 rp_hash; - /** count in bytes */ - unsigned int rp_count; - /** number of pages */ - unsigned int rp_npages; - /** requested attr */ - __u32 rp_attrs; - /** pointers to pages */ - struct page **rp_pages; -}; - -enum lu_xattr_flags { - LU_XATTR_REPLACE = (1 << 0), - LU_XATTR_CREATE = (1 << 1) -}; - -/** @} helpers */ - -/** \name lu_context - * @{ - */ - -/** For lu_context health-checks */ -enum lu_context_state { - LCS_INITIALIZED = 1, - LCS_ENTERED, - LCS_LEFT, - LCS_FINALIZED -}; - -/** - * lu_context. Execution context for lu_object methods. Currently associated - * with thread. - * - * All lu_object methods, except device and device type methods (called during - * system initialization and shutdown) are executed "within" some - * lu_context. This means, that pointer to some "current" lu_context is passed - * as an argument to all methods. - * - * All service ptlrpc threads create lu_context as part of their - * initialization. It is possible to create "stand-alone" context for other - * execution environments (like system calls). - * - * lu_object methods mainly use lu_context through lu_context_key interface - * that allows each layer to associate arbitrary pieces of data with each - * context (see pthread_key_create(3) for similar interface). - * - * On a client, lu_context is bound to a thread, see cl_env_get(). - * - * \see lu_context_key - */ -struct lu_context { - /** - * lu_context is used on the client side too. Yet we don't want to - * allocate values of server-side keys for the client contexts and - * vice versa. - * - * To achieve this, set of tags in introduced. Contexts and keys are - * marked with tags. Key value are created only for context whose set - * of tags has non-empty intersection with one for key. Tags are taken - * from enum lu_context_tag. - */ - __u32 lc_tags; - enum lu_context_state lc_state; - /** - * Pointer to the home service thread. NULL for other execution - * contexts. - */ - struct ptlrpc_thread *lc_thread; - /** - * Pointer to an array with key values. Internal implementation - * detail. - */ - void **lc_value; - /** - * Linkage into a list of all remembered contexts. Only - * `non-transient' contexts, i.e., ones created for service threads - * are placed here. - */ - struct list_head lc_remember; - /** - * Version counter used to skip calls to lu_context_refill() when no - * keys were registered. - */ - unsigned int lc_version; - /** - * Debugging cookie. - */ - unsigned int lc_cookie; -}; - -/** - * lu_context_key interface. Similar to pthread_key. - */ - -enum lu_context_tag { - /** - * Thread on md server - */ - LCT_MD_THREAD = 1 << 0, - /** - * Thread on dt server - */ - LCT_DT_THREAD = 1 << 1, - /** - * Context for transaction handle - */ - LCT_TX_HANDLE = 1 << 2, - /** - * Thread on client - */ - LCT_CL_THREAD = 1 << 3, - /** - * A per-request session on a server, and a per-system-call session on - * a client. - */ - LCT_SESSION = 1 << 4, - /** - * A per-request data on OSP device - */ - LCT_OSP_THREAD = 1 << 5, - /** - * MGS device thread - */ - LCT_MG_THREAD = 1 << 6, - /** - * Context for local operations - */ - LCT_LOCAL = 1 << 7, - /** - * session for server thread - **/ - LCT_SERVER_SESSION = BIT(8), - /** - * Set when at least one of keys, having values in this context has - * non-NULL lu_context_key::lct_exit() method. This is used to - * optimize lu_context_exit() call. - */ - LCT_HAS_EXIT = 1 << 28, - /** - * Don't add references for modules creating key values in that context. - * This is only for contexts used internally by lu_object framework. - */ - LCT_NOREF = 1 << 29, - /** - * Key is being prepared for retiring, don't create new values for it. - */ - LCT_QUIESCENT = 1 << 30, - /** - * Context should be remembered. - */ - LCT_REMEMBER = 1 << 31, - /** - * Contexts usable in cache shrinker thread. - */ - LCT_SHRINKER = LCT_MD_THREAD | LCT_DT_THREAD | LCT_CL_THREAD | - LCT_NOREF -}; - -/** - * Key. Represents per-context value slot. - * - * Keys are usually registered when module owning the key is initialized, and - * de-registered when module is unloaded. Once key is registered, all new - * contexts with matching tags, will get key value. "Old" contexts, already - * initialized at the time of key registration, can be forced to get key value - * by calling lu_context_refill(). - * - * Every key value is counted in lu_context_key::lct_used and acquires a - * reference on an owning module. This means, that all key values have to be - * destroyed before module can be unloaded. This is usually achieved by - * stopping threads started by the module, that created contexts in their - * entry functions. Situation is complicated by the threads shared by multiple - * modules, like ptlrpcd daemon on a client. To work around this problem, - * contexts, created in such threads, are `remembered' (see - * LCT_REMEMBER)---i.e., added into a global list. When module is preparing - * for unloading it does the following: - * - * - marks its keys as `quiescent' (lu_context_tag::LCT_QUIESCENT) - * preventing new key values from being allocated in the new contexts, - * and - * - * - scans a list of remembered contexts, destroying values of module - * keys, thus releasing references to the module. - * - * This is done by lu_context_key_quiesce(). If module is re-activated - * before key has been de-registered, lu_context_key_revive() call clears - * `quiescent' marker. - * - * lu_context code doesn't provide any internal synchronization for these - * activities---it's assumed that startup (including threads start-up) and - * shutdown are serialized by some external means. - * - * \see lu_context - */ -struct lu_context_key { - /** - * Set of tags for which values of this key are to be instantiated. - */ - __u32 lct_tags; - /** - * Value constructor. This is called when new value is created for a - * context. Returns pointer to new value of error pointer. - */ - void *(*lct_init)(const struct lu_context *ctx, - struct lu_context_key *key); - /** - * Value destructor. Called when context with previously allocated - * value of this slot is destroyed. \a data is a value that was returned - * by a matching call to lu_context_key::lct_init(). - */ - void (*lct_fini)(const struct lu_context *ctx, - struct lu_context_key *key, void *data); - /** - * Optional method called on lu_context_exit() for all allocated - * keys. Can be used by debugging code checking that locks are - * released, etc. - */ - void (*lct_exit)(const struct lu_context *ctx, - struct lu_context_key *key, void *data); - /** - * Internal implementation detail: index within lu_context::lc_value[] - * reserved for this key. - */ - int lct_index; - /** - * Internal implementation detail: number of values created for this - * key. - */ - atomic_t lct_used; - /** - * Internal implementation detail: module for this key. - */ - struct module *lct_owner; - /** - * References to this key. For debugging. - */ - struct lu_ref lct_reference; -}; - -#define LU_KEY_INIT(mod, type) \ - static void *mod##_key_init(const struct lu_context *ctx, \ - struct lu_context_key *key) \ - { \ - type *value; \ - \ - BUILD_BUG_ON(sizeof(*value) > PAGE_SIZE); \ - \ - value = kzalloc(sizeof(*value), GFP_NOFS); \ - if (!value) \ - value = ERR_PTR(-ENOMEM); \ - \ - return value; \ - } \ - struct __##mod##__dummy_init {; } /* semicolon catcher */ - -#define LU_KEY_FINI(mod, type) \ - static void mod##_key_fini(const struct lu_context *ctx, \ - struct lu_context_key *key, void *data) \ - { \ - type *info = data; \ - \ - kfree(info); \ - } \ - struct __##mod##__dummy_fini {; } /* semicolon catcher */ - -#define LU_KEY_INIT_FINI(mod, type) \ - LU_KEY_INIT(mod, type); \ - LU_KEY_FINI(mod, type) - -#define LU_CONTEXT_KEY_DEFINE(mod, tags) \ - struct lu_context_key mod##_thread_key = { \ - .lct_tags = tags, \ - .lct_init = mod##_key_init, \ - .lct_fini = mod##_key_fini \ - } - -#define LU_CONTEXT_KEY_INIT(key) \ -do { \ - (key)->lct_owner = THIS_MODULE; \ -} while (0) - -int lu_context_key_register(struct lu_context_key *key); -void lu_context_key_degister(struct lu_context_key *key); -void *lu_context_key_get(const struct lu_context *ctx, - const struct lu_context_key *key); -void lu_context_key_quiesce(struct lu_context_key *key); -void lu_context_key_revive(struct lu_context_key *key); - -/* - * LU_KEY_INIT_GENERIC() has to be a macro to correctly determine an - * owning module. - */ - -#define LU_KEY_INIT_GENERIC(mod) \ - static void mod##_key_init_generic(struct lu_context_key *k, ...) \ - { \ - struct lu_context_key *key = k; \ - va_list args; \ - \ - va_start(args, k); \ - do { \ - LU_CONTEXT_KEY_INIT(key); \ - key = va_arg(args, struct lu_context_key *); \ - } while (key); \ - va_end(args); \ - } - -#define LU_TYPE_INIT(mod, ...) \ - LU_KEY_INIT_GENERIC(mod) \ - static int mod##_type_init(struct lu_device_type *t) \ - { \ - mod##_key_init_generic(__VA_ARGS__, NULL); \ - return lu_context_key_register_many(__VA_ARGS__, NULL); \ - } \ - struct __##mod##_dummy_type_init {; } - -#define LU_TYPE_FINI(mod, ...) \ - static void mod##_type_fini(struct lu_device_type *t) \ - { \ - lu_context_key_degister_many(__VA_ARGS__, NULL); \ - } \ - struct __##mod##_dummy_type_fini {; } - -#define LU_TYPE_START(mod, ...) \ - static void mod##_type_start(struct lu_device_type *t) \ - { \ - lu_context_key_revive_many(__VA_ARGS__, NULL); \ - } \ - struct __##mod##_dummy_type_start {; } - -#define LU_TYPE_STOP(mod, ...) \ - static void mod##_type_stop(struct lu_device_type *t) \ - { \ - lu_context_key_quiesce_many(__VA_ARGS__, NULL); \ - } \ - struct __##mod##_dummy_type_stop {; } - -#define LU_TYPE_INIT_FINI(mod, ...) \ - LU_TYPE_INIT(mod, __VA_ARGS__); \ - LU_TYPE_FINI(mod, __VA_ARGS__); \ - LU_TYPE_START(mod, __VA_ARGS__); \ - LU_TYPE_STOP(mod, __VA_ARGS__) - -int lu_context_init(struct lu_context *ctx, __u32 tags); -void lu_context_fini(struct lu_context *ctx); -void lu_context_enter(struct lu_context *ctx); -void lu_context_exit(struct lu_context *ctx); -int lu_context_refill(struct lu_context *ctx); - -/* - * Helper functions to operate on multiple keys. These are used by the default - * device type operations, defined by LU_TYPE_INIT_FINI(). - */ - -int lu_context_key_register_many(struct lu_context_key *k, ...); -void lu_context_key_degister_many(struct lu_context_key *k, ...); -void lu_context_key_revive_many(struct lu_context_key *k, ...); -void lu_context_key_quiesce_many(struct lu_context_key *k, ...); - -/** - * Environment. - */ -struct lu_env { - /** - * "Local" context, used to store data instead of stack. - */ - struct lu_context le_ctx; - /** - * "Session" context for per-request data. - */ - struct lu_context *le_ses; -}; - -int lu_env_init(struct lu_env *env, __u32 tags); -void lu_env_fini(struct lu_env *env); -int lu_env_refill(struct lu_env *env); - -/** @} lu_context */ - -/** - * Output site statistical counters into a buffer. Suitable for - * ll_rd_*()-style functions. - */ -int lu_site_stats_print(const struct lu_site *s, struct seq_file *m); - -/** - * Common name structure to be passed around for various name related methods. - */ -struct lu_name { - const char *ln_name; - int ln_namelen; -}; - -/** - * Validate names (path components) - * - * To be valid \a name must be non-empty, '\0' terminated of length \a - * name_len, and not contain '/'. The maximum length of a name (before - * say -ENAMETOOLONG will be returned) is really controlled by llite - * and the server. We only check for something insane coming from bad - * integer handling here. - */ -static inline bool lu_name_is_valid_2(const char *name, size_t name_len) -{ - return name && name_len > 0 && name_len < INT_MAX && - name[name_len] == '\0' && strlen(name) == name_len && - !memchr(name, '/', name_len); -} - -/** - * Common buffer structure to be passed around for various xattr_{s,g}et() - * methods. - */ -struct lu_buf { - void *lb_buf; - size_t lb_len; -}; - -/** - * One-time initializers, called at obdclass module initialization, not - * exported. - */ - -/** - * Initialization of global lu_* data. - */ -int lu_global_init(void); - -/** - * Dual to lu_global_init(). - */ -void lu_global_fini(void); - -struct lu_kmem_descr { - struct kmem_cache **ckd_cache; - const char *ckd_name; - const size_t ckd_size; -}; - -int lu_kmem_init(struct lu_kmem_descr *caches); -void lu_kmem_fini(struct lu_kmem_descr *caches); - -extern __u32 lu_context_tags_default; -extern __u32 lu_session_tags_default; - -/** @} lu */ -#endif /* __LUSTRE_LU_OBJECT_H */ diff --git a/drivers/staging/lustre/lustre/include/lu_ref.h b/drivers/staging/lustre/lustre/include/lu_ref.h deleted file mode 100644 index ad0c24d29ffa..000000000000 --- a/drivers/staging/lustre/lustre/include/lu_ref.h +++ /dev/null @@ -1,178 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2012, Intel Corporation. - * - * Author: Nikita Danilov <nikita.danilov@sun.com> - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - */ - -#ifndef __LUSTRE_LU_REF_H -#define __LUSTRE_LU_REF_H - -#include <linux/list.h> - -/** \defgroup lu_ref lu_ref - * - * An interface to track references between objects. Mostly for debugging. - * - * Suppose there is a reference counted data-structure struct foo. To track - * who acquired references to instance of struct foo, add lu_ref field to it: - * - * \code - * struct foo { - * atomic_t foo_refcount; - * struct lu_ref foo_reference; - * ... - * }; - * \endcode - * - * foo::foo_reference has to be initialized by calling - * lu_ref_init(). Typically there will be functions or macros to increment and - * decrement foo::foo_refcount, let's say they are foo_get(struct foo *foo) - * and foo_put(struct foo *foo), respectively. - * - * Whenever foo_get() is called to acquire a reference on a foo, lu_ref_add() - * has to be called to insert into foo::foo_reference a record, describing - * acquired reference. Dually, lu_ref_del() removes matching record. Typical - * usages are: - * - * \code - * struct bar *bar; - * - * // bar owns a reference to foo. - * bar->bar_foo = foo_get(foo); - * lu_ref_add(&foo->foo_reference, "bar", bar); - * - * ... - * - * // reference from bar to foo is released. - * lu_ref_del(&foo->foo_reference, "bar", bar); - * foo_put(bar->bar_foo); - * - * - * // current thread acquired a temporary reference to foo. - * foo_get(foo); - * lu_ref_add(&foo->reference, __func__, current); - * - * ... - * - * // temporary reference is released. - * lu_ref_del(&foo->reference, __func__, current); - * foo_put(foo); - * \endcode - * - * \e Et \e cetera. Often it makes sense to include lu_ref_add() and - * lu_ref_del() calls into foo_get() and foo_put(). When an instance of struct - * foo is destroyed, lu_ref_fini() has to be called that checks that no - * pending references remain. lu_ref_print() can be used to dump a list of - * pending references, while hunting down a leak. - * - * For objects to which a large number of references can be acquired, - * lu_ref_del() can become cpu consuming, as it has to scan the list of - * references. To work around this, remember result of lu_ref_add() (usually - * in the same place where pointer to struct foo is stored), and use - * lu_ref_del_at(): - * - * \code - * // There is a large number of bar's for a single foo. - * bar->bar_foo = foo_get(foo); - * bar->bar_foo_ref = lu_ref_add(&foo->foo_reference, "bar", bar); - * - * ... - * - * // reference from bar to foo is released. - * lu_ref_del_at(&foo->foo_reference, bar->bar_foo_ref, "bar", bar); - * foo_put(bar->bar_foo); - * \endcode - * - * lu_ref interface degrades gracefully in case of memory shortages. - * - * @{ - */ - -/* - * dummy data structures/functions to pass compile for now. - * We need to reimplement them with kref. - */ -struct lu_ref {}; -struct lu_ref_link {}; - -static inline void lu_ref_init(struct lu_ref *ref) -{ -} - -static inline void lu_ref_fini(struct lu_ref *ref) -{ -} - -static inline struct lu_ref_link *lu_ref_add(struct lu_ref *ref, - const char *scope, - const void *source) -{ - return NULL; -} - -static inline struct lu_ref_link *lu_ref_add_atomic(struct lu_ref *ref, - const char *scope, - const void *source) -{ - return NULL; -} - -static inline void lu_ref_add_at(struct lu_ref *ref, - struct lu_ref_link *link, - const char *scope, - const void *source) -{ -} - -static inline void lu_ref_del(struct lu_ref *ref, const char *scope, - const void *source) -{ -} - -static inline void lu_ref_set_at(struct lu_ref *ref, struct lu_ref_link *link, - const char *scope, const void *source0, - const void *source1) -{ -} - -static inline void lu_ref_del_at(struct lu_ref *ref, struct lu_ref_link *link, - const char *scope, const void *source) -{ -} - -static inline int lu_ref_global_init(void) -{ - return 0; -} - -static inline void lu_ref_global_fini(void) -{ -} - -static inline void lu_ref_print(const struct lu_ref *ref) -{ -} - -static inline void lu_ref_print_all(void) -{ -} - -/** @} lu */ - -#endif /* __LUSTRE_LU_REF_H */ diff --git a/drivers/staging/lustre/lustre/include/lustre_acl.h b/drivers/staging/lustre/lustre/include/lustre_acl.h deleted file mode 100644 index 35ff61ce4e9d..000000000000 --- a/drivers/staging/lustre/lustre/include/lustre_acl.h +++ /dev/null @@ -1,46 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * lustre/include/lustre_acl.h - */ - -#ifndef _LUSTRE_ACL_H -#define _LUSTRE_ACL_H - -#include <linux/fs.h> -#include <linux/dcache.h> -#include <linux/posix_acl_xattr.h> - -#define LUSTRE_POSIX_ACL_MAX_ENTRIES 32 -#define LUSTRE_POSIX_ACL_MAX_SIZE \ - (sizeof(struct posix_acl_xattr_header) + \ - LUSTRE_POSIX_ACL_MAX_ENTRIES * sizeof(struct posix_acl_xattr_entry)) - -#endif diff --git a/drivers/staging/lustre/lustre/include/lustre_compat.h b/drivers/staging/lustre/lustre/include/lustre_compat.h deleted file mode 100644 index 9f488e605083..000000000000 --- a/drivers/staging/lustre/lustre/include/lustre_compat.h +++ /dev/null @@ -1,81 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2011, 2012, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - */ - -#ifndef _LUSTRE_COMPAT_H -#define _LUSTRE_COMPAT_H - -#include <linux/fs_struct.h> -#include <linux/namei.h> -#include <linux/cred.h> - -#include <lustre_patchless_compat.h> - -/* - * set ATTR_BLOCKS to a high value to avoid any risk of collision with other - * ATTR_* attributes (see bug 13828) - */ -#define ATTR_BLOCKS (1 << 27) - -#define current_ngroups current_cred()->group_info->ngroups -#define current_groups current_cred()->group_info->small_block - -/* - * OBD need working random driver, thus all our - * initialization routines must be called after device - * driver initialization - */ -#ifndef MODULE -#undef module_init -#define module_init(a) late_initcall(a) -#endif - -#define LTIME_S(time) (time.tv_sec) - -#ifndef QUOTA_OK -# define QUOTA_OK 0 -#endif -#ifndef NO_QUOTA -# define NO_QUOTA (-EDQUOT) -#endif - -#if !defined(_ASM_GENERIC_BITOPS_EXT2_NON_ATOMIC_H_) && !defined(ext2_set_bit) -# define ext2_set_bit __test_and_set_bit_le -# define ext2_clear_bit __test_and_clear_bit_le -# define ext2_test_bit test_bit_le -# define ext2_find_first_zero_bit find_first_zero_bit_le -# define ext2_find_next_zero_bit find_next_zero_bit_le -#endif - -#define TIMES_SET_FLAGS (ATTR_MTIME_SET | ATTR_ATIME_SET | ATTR_TIMES_SET) - -#endif /* _LUSTRE_COMPAT_H */ diff --git a/drivers/staging/lustre/lustre/include/lustre_debug.h b/drivers/staging/lustre/lustre/include/lustre_debug.h deleted file mode 100644 index 721a81f923e3..000000000000 --- a/drivers/staging/lustre/lustre/include/lustre_debug.h +++ /dev/null @@ -1,52 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2011, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - */ - -#ifndef _LUSTRE_DEBUG_H -#define _LUSTRE_DEBUG_H - -/** \defgroup debug debug - * - * @{ - */ - -#include <lustre_net.h> -#include <obd.h> - -/* lib/debug.c */ -int dump_req(struct ptlrpc_request *req); -int block_debug_setup(void *addr, int len, __u64 off, __u64 id); -int block_debug_check(char *who, void *addr, int len, __u64 off, __u64 id); - -/** @} debug */ - -#endif diff --git a/drivers/staging/lustre/lustre/include/lustre_disk.h b/drivers/staging/lustre/lustre/include/lustre_disk.h deleted file mode 100644 index 100e993ab00b..000000000000 --- a/drivers/staging/lustre/lustre/include/lustre_disk.h +++ /dev/null @@ -1,153 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2011, 2012, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * lustre/include/lustre_disk.h - * - * Lustre disk format definitions. - * - * Author: Nathan Rutman <nathan@clusterfs.com> - */ - -#ifndef _LUSTRE_DISK_H -#define _LUSTRE_DISK_H - -/** \defgroup disk disk - * - * @{ - */ - -#include <asm/byteorder.h> -#include <linux/types.h> -#include <linux/backing-dev.h> -#include <linux/libcfs/libcfs.h> - -/****************** persistent mount data *********************/ - -#define LDD_F_SV_TYPE_MDT 0x0001 -#define LDD_F_SV_TYPE_OST 0x0002 -#define LDD_F_SV_TYPE_MGS 0x0004 -#define LDD_F_SV_TYPE_MASK (LDD_F_SV_TYPE_MDT | \ - LDD_F_SV_TYPE_OST | \ - LDD_F_SV_TYPE_MGS) -#define LDD_F_SV_ALL 0x0008 - -/****************** mount command *********************/ - -/* The lmd is only used internally by Lustre; mount simply passes - * everything as string options - */ - -#define LMD_MAGIC 0xbdacbd03 -#define LMD_PARAMS_MAXLEN 4096 - -/* gleaned from the mount command - no persistent info here */ -struct lustre_mount_data { - __u32 lmd_magic; - __u32 lmd_flags; /* lustre mount flags */ - int lmd_mgs_failnodes; /* mgs failover node count */ - int lmd_exclude_count; - int lmd_recovery_time_soft; - int lmd_recovery_time_hard; - char *lmd_dev; /* device name */ - char *lmd_profile; /* client only */ - char *lmd_mgssec; /* sptlrpc flavor to mgs */ - char *lmd_opts; /* lustre mount options (as opposed to - * _device_ mount options) - */ - char *lmd_params; /* lustre params */ - __u32 *lmd_exclude; /* array of OSTs to ignore */ - char *lmd_mgs; /* MGS nid */ - char *lmd_osd_type; /* OSD type */ -}; - -#define LMD_FLG_SERVER 0x0001 /* Mounting a server */ -#define LMD_FLG_CLIENT 0x0002 /* Mounting a client */ -#define LMD_FLG_ABORT_RECOV 0x0008 /* Abort recovery */ -#define LMD_FLG_NOSVC 0x0010 /* Only start MGS/MGC for servers, - * no other services - */ -#define LMD_FLG_NOMGS 0x0020 /* Only start target for servers, - * reusing existing MGS services - */ -#define LMD_FLG_WRITECONF 0x0040 /* Rewrite config log */ -#define LMD_FLG_NOIR 0x0080 /* NO imperative recovery */ -#define LMD_FLG_NOSCRUB 0x0100 /* Do not trigger scrub automatically */ -#define LMD_FLG_MGS 0x0200 /* Also start MGS along with server */ -#define LMD_FLG_IAM 0x0400 /* IAM dir */ -#define LMD_FLG_NO_PRIMNODE 0x0800 /* all nodes are service nodes */ -#define LMD_FLG_VIRGIN 0x1000 /* the service registers first time */ -#define LMD_FLG_UPDATE 0x2000 /* update parameters */ -#define LMD_FLG_HSM 0x4000 /* Start coordinator */ - -#define lmd_is_client(x) ((x)->lmd_flags & LMD_FLG_CLIENT) - -/****************** superblock additional info *********************/ - -struct ll_sb_info; - -struct lustre_sb_info { - int lsi_flags; - struct obd_device *lsi_mgc; /* mgc obd */ - struct lustre_mount_data *lsi_lmd; /* mount command info */ - struct ll_sb_info *lsi_llsbi; /* add'l client sbi info */ - struct dt_device *lsi_dt_dev; /* dt device to access disk fs*/ - atomic_t lsi_mounts; /* references to the srv_mnt */ - char lsi_svname[MTI_NAME_MAXLEN]; - char lsi_osd_obdname[64]; - char lsi_osd_uuid[64]; - struct obd_export *lsi_osd_exp; - char lsi_osd_type[16]; - char lsi_fstype[16]; -}; - -#define LSI_UMOUNT_FAILOVER 0x00200000 - -#define s2lsi(sb) ((struct lustre_sb_info *)((sb)->s_fs_info)) -#define s2lsi_nocast(sb) ((sb)->s_fs_info) - -#define get_profile_name(sb) (s2lsi(sb)->lsi_lmd->lmd_profile) - -/****************** prototypes *********************/ - -/* obd_mount.c */ - -int lustre_start_mgc(struct super_block *sb); -void lustre_register_super_ops(struct module *mod, - int (*cfs)(struct super_block *sb), - void (*ksc)(struct super_block *sb)); -int lustre_common_put_super(struct super_block *sb); - -int mgc_fsname2resid(char *fsname, struct ldlm_res_id *res_id, int type); - -/** @} disk */ - -#endif /* _LUSTRE_DISK_H */ diff --git a/drivers/staging/lustre/lustre/include/lustre_dlm.h b/drivers/staging/lustre/lustre/include/lustre_dlm.h deleted file mode 100644 index 239aa2b1268f..000000000000 --- a/drivers/staging/lustre/lustre/include/lustre_dlm.h +++ /dev/null @@ -1,1354 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2010, 2015, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - */ - -/** \defgroup LDLM Lustre Distributed Lock Manager - * - * Lustre DLM is based on VAX DLM. - * Its two main roles are: - * - To provide locking assuring consistency of data on all Lustre nodes. - * - To allow clients to cache state protected by a lock by holding the - * lock until a conflicting lock is requested or it is expired by the LRU. - * - * @{ - */ - -#ifndef _LUSTRE_DLM_H__ -#define _LUSTRE_DLM_H__ - -#include <lustre_lib.h> -#include <lustre_net.h> -#include <lustre_import.h> -#include <lustre_handles.h> -#include <interval_tree.h> /* for interval_node{}, ldlm_extent */ -#include <lu_ref.h> - -#include "lustre_dlm_flags.h" - -struct obd_ops; -struct obd_device; - -#define OBD_LDLM_DEVICENAME "ldlm" - -#define LDLM_DEFAULT_LRU_SIZE (100 * num_online_cpus()) -#define LDLM_DEFAULT_MAX_ALIVE (65 * 60 * HZ) /* 65 min */ -#define LDLM_DEFAULT_PARALLEL_AST_LIMIT 1024 - -/** - * LDLM non-error return states - */ -enum ldlm_error { - ELDLM_OK = 0, - ELDLM_LOCK_MATCHED = 1, - - ELDLM_LOCK_CHANGED = 300, - ELDLM_LOCK_ABORTED = 301, - ELDLM_LOCK_REPLACED = 302, - ELDLM_NO_LOCK_DATA = 303, - ELDLM_LOCK_WOULDBLOCK = 304, - - ELDLM_NAMESPACE_EXISTS = 400, - ELDLM_BAD_NAMESPACE = 401 -}; - -/** - * LDLM namespace type. - * The "client" type is actually an indication that this is a narrow local view - * into complete namespace on the server. Such namespaces cannot make any - * decisions about lack of conflicts or do any autonomous lock granting without - * first speaking to a server. - */ -enum ldlm_side { - LDLM_NAMESPACE_SERVER = 1 << 0, - LDLM_NAMESPACE_CLIENT = 1 << 1 -}; - -/** - * The blocking callback is overloaded to perform two functions. These flags - * indicate which operation should be performed. - */ -#define LDLM_CB_BLOCKING 1 -#define LDLM_CB_CANCELING 2 - -/** - * \name Lock Compatibility Matrix. - * - * A lock has both a type (extent, flock, inode bits, or plain) and a mode. - * Lock types are described in their respective implementation files: - * ldlm_{extent,flock,inodebits,plain}.c. - * - * There are six lock modes along with a compatibility matrix to indicate if - * two locks are compatible. - * - * - EX: Exclusive mode. Before a new file is created, MDS requests EX lock - * on the parent. - * - PW: Protective Write (normal write) mode. When a client requests a write - * lock from an OST, a lock with PW mode will be issued. - * - PR: Protective Read (normal read) mode. When a client requests a read from - * an OST, a lock with PR mode will be issued. Also, if the client opens a - * file for execution, it is granted a lock with PR mode. - * - CW: Concurrent Write mode. The type of lock that the MDS grants if a client - * requests a write lock during a file open operation. - * - CR Concurrent Read mode. When a client performs a path lookup, MDS grants - * an inodebit lock with the CR mode on the intermediate path component. - * - NL Null mode. - * - * <PRE> - * NL CR CW PR PW EX - * NL 1 1 1 1 1 1 - * CR 1 1 1 1 1 0 - * CW 1 1 1 0 0 0 - * PR 1 1 0 1 0 0 - * PW 1 1 0 0 0 0 - * EX 1 0 0 0 0 0 - * </PRE> - */ -/** @{ */ -#define LCK_COMPAT_EX LCK_NL -#define LCK_COMPAT_PW (LCK_COMPAT_EX | LCK_CR) -#define LCK_COMPAT_PR (LCK_COMPAT_PW | LCK_PR) -#define LCK_COMPAT_CW (LCK_COMPAT_PW | LCK_CW) -#define LCK_COMPAT_CR (LCK_COMPAT_CW | LCK_PR | LCK_PW) -#define LCK_COMPAT_NL (LCK_COMPAT_CR | LCK_EX | LCK_GROUP) -#define LCK_COMPAT_GROUP (LCK_GROUP | LCK_NL) -#define LCK_COMPAT_COS (LCK_COS) -/** @} Lock Compatibility Matrix */ - -extern enum ldlm_mode lck_compat_array[]; - -static inline void lockmode_verify(enum ldlm_mode mode) -{ - LASSERT(mode > LCK_MINMODE && mode < LCK_MAXMODE); -} - -static inline int lockmode_compat(enum ldlm_mode exist_mode, - enum ldlm_mode new_mode) -{ - return (lck_compat_array[exist_mode] & new_mode); -} - -/* - * - * cluster name spaces - * - */ - -#define DLM_OST_NAMESPACE 1 -#define DLM_MDS_NAMESPACE 2 - -/* XXX - - do we just separate this by security domains and use a prefix for - multiple namespaces in the same domain? - - -*/ - -/** - * Locking rules for LDLM: - * - * lr_lock - * - * lr_lock - * waiting_locks_spinlock - * - * lr_lock - * led_lock - * - * lr_lock - * ns_lock - * - * lr_lvb_mutex - * lr_lock - * - */ - -struct ldlm_pool; -struct ldlm_lock; -struct ldlm_resource; -struct ldlm_namespace; - -/** - * Operations on LDLM pools. - * LDLM pool is a pool of locks in the namespace without any implicitly - * specified limits. - * Locks in the pool are organized in LRU. - * Local memory pressure or server instructions (e.g. mempressure on server) - * can trigger freeing of locks from the pool - */ -struct ldlm_pool_ops { - /** Recalculate pool \a pl usage */ - int (*po_recalc)(struct ldlm_pool *pl); - /** Cancel at least \a nr locks from pool \a pl */ - int (*po_shrink)(struct ldlm_pool *pl, int nr, - gfp_t gfp_mask); -}; - -/** One second for pools thread check interval. Each pool has own period. */ -#define LDLM_POOLS_THREAD_PERIOD (1) - -/** ~6% margin for modest pools. See ldlm_pool.c for details. */ -#define LDLM_POOLS_MODEST_MARGIN_SHIFT (4) - -/** Default recalc period for server side pools in sec. */ -#define LDLM_POOL_SRV_DEF_RECALC_PERIOD (1) - -/** Default recalc period for client side pools in sec. */ -#define LDLM_POOL_CLI_DEF_RECALC_PERIOD (10) - -/** - * LDLM pool structure to track granted locks. - * For purposes of determining when to release locks on e.g. memory pressure. - * This feature is commonly referred to as lru_resize. - */ -struct ldlm_pool { - /** Pool debugfs directory. */ - struct dentry *pl_debugfs_entry; - /** Pool name, must be long enough to hold compound proc entry name. */ - char pl_name[100]; - /** Lock for protecting SLV/CLV updates. */ - spinlock_t pl_lock; - /** Number of allowed locks in in pool, both, client and server side. */ - atomic_t pl_limit; - /** Number of granted locks in */ - atomic_t pl_granted; - /** Grant rate per T. */ - atomic_t pl_grant_rate; - /** Cancel rate per T. */ - atomic_t pl_cancel_rate; - /** Server lock volume (SLV). Protected by pl_lock. */ - __u64 pl_server_lock_volume; - /** Current biggest client lock volume. Protected by pl_lock. */ - __u64 pl_client_lock_volume; - /** Lock volume factor. SLV on client is calculated as following: - * server_slv * lock_volume_factor. - */ - atomic_t pl_lock_volume_factor; - /** Time when last SLV from server was obtained. */ - time64_t pl_recalc_time; - /** Recalculation period for pool. */ - time64_t pl_recalc_period; - /** Recalculation and shrink operations. */ - const struct ldlm_pool_ops *pl_ops; - /** Number of planned locks for next period. */ - int pl_grant_plan; - /** Pool statistics. */ - struct lprocfs_stats *pl_stats; - - /* sysfs object */ - struct kobject pl_kobj; - struct completion pl_kobj_unregister; -}; - -typedef int (*ldlm_cancel_cbt)(struct ldlm_lock *lock); - -/** - * LVB operations. - * LVB is Lock Value Block. This is a special opaque (to LDLM) value that could - * be associated with an LDLM lock and transferred from client to server and - * back. - * - * Currently LVBs are used by: - * - OSC-OST code to maintain current object size/times - * - layout lock code to return the layout when the layout lock is granted - */ -struct ldlm_valblock_ops { - int (*lvbo_init)(struct ldlm_resource *res); - int (*lvbo_update)(struct ldlm_resource *res, - struct ptlrpc_request *r, - int increase); - int (*lvbo_free)(struct ldlm_resource *res); - /* Return size of lvb data appropriate RPC size can be reserved */ - int (*lvbo_size)(struct ldlm_lock *lock); - /* Called to fill in lvb data to RPC buffer @buf */ - int (*lvbo_fill)(struct ldlm_lock *lock, void *buf, int buflen); -}; - -/** - * LDLM pools related, type of lock pool in the namespace. - * Greedy means release cached locks aggressively - */ -enum ldlm_appetite { - LDLM_NAMESPACE_GREEDY = 1 << 0, - LDLM_NAMESPACE_MODEST = 1 << 1 -}; - -struct ldlm_ns_bucket { - /** back pointer to namespace */ - struct ldlm_namespace *nsb_namespace; - /** - * Estimated lock callback time. Used by adaptive timeout code to - * avoid spurious client evictions due to unresponsiveness when in - * fact the network or overall system load is at fault - */ - struct adaptive_timeout nsb_at_estimate; -}; - -enum { - /** LDLM namespace lock stats */ - LDLM_NSS_LOCKS = 0, - LDLM_NSS_LAST -}; - -enum ldlm_ns_type { - /** invalid type */ - LDLM_NS_TYPE_UNKNOWN = 0, - /** mdc namespace */ - LDLM_NS_TYPE_MDC, - /** mds namespace */ - LDLM_NS_TYPE_MDT, - /** osc namespace */ - LDLM_NS_TYPE_OSC, - /** ost namespace */ - LDLM_NS_TYPE_OST, - /** mgc namespace */ - LDLM_NS_TYPE_MGC, - /** mgs namespace */ - LDLM_NS_TYPE_MGT, -}; - -/** - * LDLM Namespace. - * - * Namespace serves to contain locks related to a particular service. - * There are two kinds of namespaces: - * - Server namespace has knowledge of all locks and is therefore authoritative - * to make decisions like what locks could be granted and what conflicts - * exist during new lock enqueue. - * - Client namespace only has limited knowledge about locks in the namespace, - * only seeing locks held by the client. - * - * Every Lustre service has one server namespace present on the server serving - * that service. Every client connected to the service has a client namespace - * for it. - * Every lock obtained by client in that namespace is actually represented by - * two in-memory locks. One on the server and one on the client. The locks are - * linked by a special cookie by which one node can tell to the other which lock - * it actually means during communications. Such locks are called remote locks. - * The locks held by server only without any reference to a client are called - * local locks. - */ -struct ldlm_namespace { - /** Backward link to OBD, required for LDLM pool to store new SLV. */ - struct obd_device *ns_obd; - - /** Flag indicating if namespace is on client instead of server */ - enum ldlm_side ns_client; - - /** Resource hash table for namespace. */ - struct cfs_hash *ns_rs_hash; - - /** serialize */ - spinlock_t ns_lock; - - /** big refcount (by bucket) */ - atomic_t ns_bref; - - /** - * Namespace connect flags supported by server (may be changed via - * sysfs, LRU resize may be disabled/enabled). - */ - __u64 ns_connect_flags; - - /** Client side original connect flags supported by server. */ - __u64 ns_orig_connect_flags; - - /* namespace debugfs dir entry */ - struct dentry *ns_debugfs_entry; - - /** - * Position in global namespace list linking all namespaces on - * the node. - */ - struct list_head ns_list_chain; - - /** - * List of unused locks for this namespace. This list is also called - * LRU lock list. - * Unused locks are locks with zero reader/writer reference counts. - * This list is only used on clients for lock caching purposes. - * When we want to release some locks voluntarily or if server wants - * us to release some locks due to e.g. memory pressure, we take locks - * to release from the head of this list. - * Locks are linked via l_lru field in \see struct ldlm_lock. - */ - struct list_head ns_unused_list; - /** Number of locks in the LRU list above */ - int ns_nr_unused; - - /** - * Maximum number of locks permitted in the LRU. If 0, means locks - * are managed by pools and there is no preset limit, rather it is all - * controlled by available memory on this client and on server. - */ - unsigned int ns_max_unused; - /** Maximum allowed age (last used time) for locks in the LRU */ - unsigned int ns_max_age; - - /** - * Used to rate-limit ldlm_namespace_dump calls. - * \see ldlm_namespace_dump. Increased by 10 seconds every time - * it is called. - */ - unsigned long ns_next_dump; - - /** - * LVB operations for this namespace. - * \see struct ldlm_valblock_ops - */ - struct ldlm_valblock_ops *ns_lvbo; - - /** - * Used by filter code to store pointer to OBD of the service. - * Should be dropped in favor of \a ns_obd - */ - void *ns_lvbp; - - /** - * Wait queue used by __ldlm_namespace_free. Gets woken up every time - * a resource is removed. - */ - wait_queue_head_t ns_waitq; - /** LDLM pool structure for this namespace */ - struct ldlm_pool ns_pool; - /** Definition of how eagerly unused locks will be released from LRU */ - enum ldlm_appetite ns_appetite; - - /** Limit of parallel AST RPC count. */ - unsigned ns_max_parallel_ast; - - /** - * Callback to check if a lock is good to be canceled by ELC or - * during recovery. - */ - ldlm_cancel_cbt ns_cancel; - - /** LDLM lock stats */ - struct lprocfs_stats *ns_stats; - - /** - * Flag to indicate namespace is being freed. Used to determine if - * recalculation of LDLM pool statistics should be skipped. - */ - unsigned ns_stopping:1; - - struct kobject ns_kobj; /* sysfs object */ - struct completion ns_kobj_unregister; -}; - -/** - * Returns 1 if namespace \a ns supports early lock cancel (ELC). - */ -static inline int ns_connect_cancelset(struct ldlm_namespace *ns) -{ - return !!(ns->ns_connect_flags & OBD_CONNECT_CANCELSET); -} - -/** - * Returns 1 if this namespace supports lru_resize. - */ -static inline int ns_connect_lru_resize(struct ldlm_namespace *ns) -{ - return !!(ns->ns_connect_flags & OBD_CONNECT_LRU_RESIZE); -} - -static inline void ns_register_cancel(struct ldlm_namespace *ns, - ldlm_cancel_cbt arg) -{ - ns->ns_cancel = arg; -} - -struct ldlm_lock; - -/** Type for blocking callback function of a lock. */ -typedef int (*ldlm_blocking_callback)(struct ldlm_lock *lock, - struct ldlm_lock_desc *new, void *data, - int flag); -/** Type for completion callback function of a lock. */ -typedef int (*ldlm_completion_callback)(struct ldlm_lock *lock, __u64 flags, - void *data); -/** Type for glimpse callback function of a lock. */ -typedef int (*ldlm_glimpse_callback)(struct ldlm_lock *lock, void *data); - -/** Work list for sending GL ASTs to multiple locks. */ -struct ldlm_glimpse_work { - struct ldlm_lock *gl_lock; /* lock to glimpse */ - struct list_head gl_list; /* linkage to other gl work structs */ - __u32 gl_flags;/* see LDLM_GL_WORK_* below */ - union ldlm_gl_desc *gl_desc; /* glimpse descriptor to be packed in - * glimpse callback request - */ -}; - -/** The ldlm_glimpse_work is allocated on the stack and should not be freed. */ -#define LDLM_GL_WORK_NOFREE 0x1 - -/** Interval node data for each LDLM_EXTENT lock. */ -struct ldlm_interval { - struct interval_node li_node; /* node for tree management */ - struct list_head li_group; /* the locks which have the same - * policy - group of the policy - */ -}; - -#define to_ldlm_interval(n) container_of(n, struct ldlm_interval, li_node) - -/** - * Interval tree for extent locks. - * The interval tree must be accessed under the resource lock. - * Interval trees are used for granted extent locks to speed up conflicts - * lookup. See ldlm/interval_tree.c for more details. - */ -struct ldlm_interval_tree { - /** Tree size. */ - int lit_size; - enum ldlm_mode lit_mode; /* lock mode */ - struct interval_node *lit_root; /* actual ldlm_interval */ -}; - -/** Whether to track references to exports by LDLM locks. */ -#define LUSTRE_TRACKS_LOCK_EXP_REFS (0) - -/** Cancel flags. */ -enum ldlm_cancel_flags { - LCF_ASYNC = 0x1, /* Cancel locks asynchronously. */ - LCF_LOCAL = 0x2, /* Cancel locks locally, not notifing server */ - LCF_BL_AST = 0x4, /* Cancel locks marked as LDLM_FL_BL_AST - * in the same RPC - */ -}; - -struct ldlm_flock { - __u64 start; - __u64 end; - __u64 owner; - __u64 blocking_owner; - struct obd_export *blocking_export; - __u32 pid; -}; - -union ldlm_policy_data { - struct ldlm_extent l_extent; - struct ldlm_flock l_flock; - struct ldlm_inodebits l_inodebits; -}; - -void ldlm_convert_policy_to_local(struct obd_export *exp, enum ldlm_type type, - const union ldlm_wire_policy_data *wpolicy, - union ldlm_policy_data *lpolicy); - -enum lvb_type { - LVB_T_NONE = 0, - LVB_T_OST = 1, - LVB_T_LQUOTA = 2, - LVB_T_LAYOUT = 3, -}; - -/** - * LDLM_GID_ANY is used to match any group id in ldlm_lock_match(). - */ -#define LDLM_GID_ANY ((__u64)-1) - -/** - * LDLM lock structure - * - * Represents a single LDLM lock and its state in memory. Each lock is - * associated with a single ldlm_resource, the object which is being - * locked. There may be multiple ldlm_locks on a single resource, - * depending on the lock type and whether the locks are conflicting or - * not. - */ -struct ldlm_lock { - /** - * Local lock handle. - * When remote side wants to tell us about a lock, they address - * it by this opaque handle. The handle does not hold a - * reference on the ldlm_lock, so it can be safely passed to - * other threads or nodes. When the lock needs to be accessed - * from the handle, it is looked up again in the lock table, and - * may no longer exist. - * - * Must be first in the structure. - */ - struct portals_handle l_handle; - /** - * Lock reference count. - * This is how many users have pointers to actual structure, so that - * we do not accidentally free lock structure that is in use. - */ - atomic_t l_refc; - /** - * Internal spinlock protects l_resource. We should hold this lock - * first before taking res_lock. - */ - spinlock_t l_lock; - /** - * Pointer to actual resource this lock is in. - * ldlm_lock_change_resource() can change this. - */ - struct ldlm_resource *l_resource; - /** - * List item for client side LRU list. - * Protected by ns_lock in struct ldlm_namespace. - */ - struct list_head l_lru; - /** - * Linkage to resource's lock queues according to current lock state. - * (could be granted, waiting or converting) - * Protected by lr_lock in struct ldlm_resource. - */ - struct list_head l_res_link; - /** - * Tree node for ldlm_extent. - */ - struct ldlm_interval *l_tree_node; - /** - * Per export hash of locks. - * Protected by per-bucket exp->exp_lock_hash locks. - */ - struct hlist_node l_exp_hash; - /** - * Per export hash of flock locks. - * Protected by per-bucket exp->exp_flock_hash locks. - */ - struct hlist_node l_exp_flock_hash; - /** - * Requested mode. - * Protected by lr_lock. - */ - enum ldlm_mode l_req_mode; - /** - * Granted mode, also protected by lr_lock. - */ - enum ldlm_mode l_granted_mode; - /** Lock completion handler pointer. Called when lock is granted. */ - ldlm_completion_callback l_completion_ast; - /** - * Lock blocking AST handler pointer. - * It plays two roles: - * - as a notification of an attempt to queue a conflicting lock (once) - * - as a notification when the lock is being cancelled. - * - * As such it's typically called twice: once for the initial conflict - * and then once more when the last user went away and the lock is - * cancelled (could happen recursively). - */ - ldlm_blocking_callback l_blocking_ast; - /** - * Lock glimpse handler. - * Glimpse handler is used to obtain LVB updates from a client by - * server - */ - ldlm_glimpse_callback l_glimpse_ast; - - /** - * Lock export. - * This is a pointer to actual client export for locks that were granted - * to clients. Used server-side. - */ - struct obd_export *l_export; - /** - * Lock connection export. - * Pointer to server export on a client. - */ - struct obd_export *l_conn_export; - - /** - * Remote lock handle. - * If the lock is remote, this is the handle of the other side lock - * (l_handle) - */ - struct lustre_handle l_remote_handle; - - /** - * Representation of private data specific for a lock type. - * Examples are: extent range for extent lock or bitmask for ibits locks - */ - union ldlm_policy_data l_policy_data; - - /** - * Lock state flags. Protected by lr_lock. - * \see lustre_dlm_flags.h where the bits are defined. - */ - __u64 l_flags; - - /** - * Lock r/w usage counters. - * Protected by lr_lock. - */ - __u32 l_readers; - __u32 l_writers; - /** - * If the lock is granted, a process sleeps on this waitq to learn when - * it's no longer in use. If the lock is not granted, a process sleeps - * on this waitq to learn when it becomes granted. - */ - wait_queue_head_t l_waitq; - - /** - * Seconds. It will be updated if there is any activity related to - * the lock, e.g. enqueue the lock or send blocking AST. - */ - time64_t l_last_activity; - - /** - * Time last used by e.g. being matched by lock match. - * Jiffies. Should be converted to time if needed. - */ - unsigned long l_last_used; - - /** Originally requested extent for the extent lock. */ - struct ldlm_extent l_req_extent; - - /* - * Client-side-only members. - */ - - enum lvb_type l_lvb_type; - - /** - * Temporary storage for a LVB received during an enqueue operation. - */ - __u32 l_lvb_len; - void *l_lvb_data; - - /** Private storage for lock user. Opaque to LDLM. */ - void *l_ast_data; - - /* - * Server-side-only members. - */ - - /** - * Connection cookie for the client originating the operation. - * Used by Commit on Share (COS) code. Currently only used for - * inodebits locks on MDS. - */ - __u64 l_client_cookie; - - /** - * List item for locks waiting for cancellation from clients. - * The lists this could be linked into are: - * waiting_locks_list (protected by waiting_locks_spinlock), - * then if the lock timed out, it is moved to - * expired_lock_thread.elt_expired_locks for further processing. - * Protected by elt_lock. - */ - struct list_head l_pending_chain; - - /** - * Set when lock is sent a blocking AST. Time in seconds when timeout - * is reached and client holding this lock could be evicted. - * This timeout could be further extended by e.g. certain IO activity - * under this lock. - * \see ost_rw_prolong_locks - */ - unsigned long l_callback_timeout; - - /** Local PID of process which created this lock. */ - __u32 l_pid; - - /** - * Number of times blocking AST was sent for this lock. - * This is for debugging. Valid values are 0 and 1, if there is an - * attempt to send blocking AST more than once, an assertion would be - * hit. \see ldlm_work_bl_ast_lock - */ - int l_bl_ast_run; - /** List item ldlm_add_ast_work_item() for case of blocking ASTs. */ - struct list_head l_bl_ast; - /** List item ldlm_add_ast_work_item() for case of completion ASTs. */ - struct list_head l_cp_ast; - /** For ldlm_add_ast_work_item() for "revoke" AST used in COS. */ - struct list_head l_rk_ast; - - /** - * Pointer to a conflicting lock that caused blocking AST to be sent - * for this lock - */ - struct ldlm_lock *l_blocking_lock; - - /** - * Protected by lr_lock, linkages to "skip lists". - * For more explanations of skip lists see ldlm/ldlm_inodebits.c - */ - struct list_head l_sl_mode; - struct list_head l_sl_policy; - - /** Reference tracking structure to debug leaked locks. */ - struct lu_ref l_reference; -#if LUSTRE_TRACKS_LOCK_EXP_REFS - /* Debugging stuff for bug 20498, for tracking export references. */ - /** number of export references taken */ - int l_exp_refs_nr; - /** link all locks referencing one export */ - struct list_head l_exp_refs_link; - /** referenced export object */ - struct obd_export *l_exp_refs_target; -#endif -}; - -/** - * LDLM resource description. - * Basically, resource is a representation for a single object. - * Object has a name which is currently 4 64-bit integers. LDLM user is - * responsible for creation of a mapping between objects it wants to be - * protected and resource names. - * - * A resource can only hold locks of a single lock type, though there may be - * multiple ldlm_locks on a single resource, depending on the lock type and - * whether the locks are conflicting or not. - */ -struct ldlm_resource { - struct ldlm_ns_bucket *lr_ns_bucket; - - /** - * List item for list in namespace hash. - * protected by ns_lock - */ - struct hlist_node lr_hash; - - /** Spinlock to protect locks under this resource. */ - spinlock_t lr_lock; - - /** - * protected by lr_lock - * @{ - */ - /** List of locks in granted state */ - struct list_head lr_granted; - /** - * List of locks that could not be granted due to conflicts and - * that are waiting for conflicts to go away - */ - struct list_head lr_waiting; - /** @} */ - - /** Type of locks this resource can hold. Only one type per resource. */ - enum ldlm_type lr_type; /* LDLM_{PLAIN,EXTENT,FLOCK,IBITS} */ - - /** Resource name */ - struct ldlm_res_id lr_name; - /** Reference count for this resource */ - atomic_t lr_refcount; - - /** - * Interval trees (only for extent locks) for all modes of this resource - */ - struct ldlm_interval_tree lr_itree[LCK_MODE_NUM]; - - /** - * Server-side-only lock value block elements. - * To serialize lvbo_init. - */ - struct mutex lr_lvb_mutex; - int lr_lvb_len; - - /** When the resource was considered as contended. */ - unsigned long lr_contention_time; - /** List of references to this resource. For debugging. */ - struct lu_ref lr_reference; - - struct inode *lr_lvb_inode; -}; - -static inline bool ldlm_has_layout(struct ldlm_lock *lock) -{ - return lock->l_resource->lr_type == LDLM_IBITS && - lock->l_policy_data.l_inodebits.bits & MDS_INODELOCK_LAYOUT; -} - -static inline char * -ldlm_ns_name(struct ldlm_namespace *ns) -{ - return ns->ns_rs_hash->hs_name; -} - -static inline struct ldlm_namespace * -ldlm_res_to_ns(struct ldlm_resource *res) -{ - return res->lr_ns_bucket->nsb_namespace; -} - -static inline struct ldlm_namespace * -ldlm_lock_to_ns(struct ldlm_lock *lock) -{ - return ldlm_res_to_ns(lock->l_resource); -} - -static inline char * -ldlm_lock_to_ns_name(struct ldlm_lock *lock) -{ - return ldlm_ns_name(ldlm_lock_to_ns(lock)); -} - -static inline struct adaptive_timeout * -ldlm_lock_to_ns_at(struct ldlm_lock *lock) -{ - return &lock->l_resource->lr_ns_bucket->nsb_at_estimate; -} - -static inline int ldlm_lvbo_init(struct ldlm_resource *res) -{ - struct ldlm_namespace *ns = ldlm_res_to_ns(res); - - if (ns->ns_lvbo && ns->ns_lvbo->lvbo_init) - return ns->ns_lvbo->lvbo_init(res); - - return 0; -} - -static inline int ldlm_lvbo_size(struct ldlm_lock *lock) -{ - struct ldlm_namespace *ns = ldlm_lock_to_ns(lock); - - if (ns->ns_lvbo && ns->ns_lvbo->lvbo_size) - return ns->ns_lvbo->lvbo_size(lock); - - return 0; -} - -static inline int ldlm_lvbo_fill(struct ldlm_lock *lock, void *buf, int len) -{ - struct ldlm_namespace *ns = ldlm_lock_to_ns(lock); - - if (ns->ns_lvbo) - return ns->ns_lvbo->lvbo_fill(lock, buf, len); - - return 0; -} - -struct ldlm_ast_work { - struct ldlm_lock *w_lock; - int w_blocking; - struct ldlm_lock_desc w_desc; - struct list_head w_list; - int w_flags; - void *w_data; - int w_datalen; -}; - -/** - * Common ldlm_enqueue parameters - */ -struct ldlm_enqueue_info { - enum ldlm_type ei_type; /** Type of the lock being enqueued. */ - enum ldlm_mode ei_mode; /** Mode of the lock being enqueued. */ - void *ei_cb_bl; /** blocking lock callback */ - void *ei_cb_cp; /** lock completion callback */ - void *ei_cb_gl; /** lock glimpse callback */ - void *ei_cbdata; /** Data to be passed into callbacks. */ - unsigned int ei_enq_slave:1; /* whether enqueue slave stripes */ -}; - -extern struct obd_ops ldlm_obd_ops; - -extern char *ldlm_lockname[]; -const char *ldlm_it2str(enum ldlm_intent_flags it); - -/** - * Just a fancy CDEBUG call with log level preset to LDLM_DEBUG. - * For the cases where we do not have actual lock to print along - * with a debugging message that is ldlm-related - */ -#define LDLM_DEBUG_NOLOCK(format, a...) \ - CDEBUG(D_DLMTRACE, "### " format "\n", ##a) - -/** - * Support function for lock information printing into debug logs. - * \see LDLM_DEBUG - */ -#define ldlm_lock_debug(msgdata, mask, cdls, lock, fmt, a...) do { \ - CFS_CHECK_STACK(msgdata, mask, cdls); \ - \ - if (((mask) & D_CANTMASK) != 0 || \ - ((libcfs_debug & (mask)) != 0 && \ - (libcfs_subsystem_debug & DEBUG_SUBSYSTEM) != 0)) \ - _ldlm_lock_debug(lock, msgdata, fmt, ##a); \ -} while (0) - -void _ldlm_lock_debug(struct ldlm_lock *lock, - struct libcfs_debug_msg_data *data, - const char *fmt, ...) - __printf(3, 4); - -/** - * Rate-limited version of lock printing function. - */ -#define LDLM_DEBUG_LIMIT(mask, lock, fmt, a...) do { \ - static struct cfs_debug_limit_state _ldlm_cdls; \ - LIBCFS_DEBUG_MSG_DATA_DECL(msgdata, mask, &_ldlm_cdls); \ - ldlm_lock_debug(&msgdata, mask, &_ldlm_cdls, lock, "### " fmt, ##a);\ -} while (0) - -#define LDLM_ERROR(lock, fmt, a...) LDLM_DEBUG_LIMIT(D_ERROR, lock, fmt, ## a) -#define LDLM_WARN(lock, fmt, a...) LDLM_DEBUG_LIMIT(D_WARNING, lock, fmt, ## a) - -/** Non-rate-limited lock printing function for debugging purposes. */ -#define LDLM_DEBUG(lock, fmt, a...) do { \ - if (likely(lock)) { \ - LIBCFS_DEBUG_MSG_DATA_DECL(msgdata, D_DLMTRACE, NULL); \ - ldlm_lock_debug(&msgdata, D_DLMTRACE, NULL, lock, \ - "### " fmt, ##a); \ - } else { \ - LDLM_DEBUG_NOLOCK("no dlm lock: " fmt, ##a); \ - } \ -} while (0) - -typedef int (*ldlm_processing_policy)(struct ldlm_lock *lock, __u64 *flags, - int first_enq, enum ldlm_error *err, - struct list_head *work_list); - -/** - * Return values for lock iterators. - * Also used during deciding of lock grants and cancellations. - */ -#define LDLM_ITER_CONTINUE 1 /* keep iterating */ -#define LDLM_ITER_STOP 2 /* stop iterating */ - -typedef int (*ldlm_iterator_t)(struct ldlm_lock *, void *); -typedef int (*ldlm_res_iterator_t)(struct ldlm_resource *, void *); - -/** \defgroup ldlm_iterator Lock iterators - * - * LDLM provides for a way to iterate through every lock on a resource or - * namespace or every resource in a namespace. - * @{ - */ -int ldlm_resource_iterate(struct ldlm_namespace *, const struct ldlm_res_id *, - ldlm_iterator_t iter, void *data); -/** @} ldlm_iterator */ - -int ldlm_replay_locks(struct obd_import *imp); - -/* ldlm_flock.c */ -int ldlm_flock_completion_ast(struct ldlm_lock *lock, __u64 flags, void *data); - -/* ldlm_extent.c */ -__u64 ldlm_extent_shift_kms(struct ldlm_lock *lock, __u64 old_kms); - -struct ldlm_callback_suite { - ldlm_completion_callback lcs_completion; - ldlm_blocking_callback lcs_blocking; - ldlm_glimpse_callback lcs_glimpse; -}; - -/* ldlm_lockd.c */ -int ldlm_get_ref(void); -void ldlm_put_ref(void); -struct ldlm_lock *ldlm_request_lock(struct ptlrpc_request *req); - -/* ldlm_lock.c */ -void ldlm_lock2handle(const struct ldlm_lock *lock, - struct lustre_handle *lockh); -struct ldlm_lock *__ldlm_handle2lock(const struct lustre_handle *, __u64 flags); -void ldlm_cancel_callback(struct ldlm_lock *); -int ldlm_lock_remove_from_lru(struct ldlm_lock *); -int ldlm_lock_set_data(const struct lustre_handle *lockh, void *data); - -/** - * Obtain a lock reference by its handle. - */ -static inline struct ldlm_lock *ldlm_handle2lock(const struct lustre_handle *h) -{ - return __ldlm_handle2lock(h, 0); -} - -#define LDLM_LOCK_REF_DEL(lock) \ - lu_ref_del(&lock->l_reference, "handle", current) - -static inline struct ldlm_lock * -ldlm_handle2lock_long(const struct lustre_handle *h, __u64 flags) -{ - struct ldlm_lock *lock; - - lock = __ldlm_handle2lock(h, flags); - if (lock) - LDLM_LOCK_REF_DEL(lock); - return lock; -} - -/** - * Update Lock Value Block Operations (LVBO) on a resource taking into account - * data from request \a r - */ -static inline int ldlm_res_lvbo_update(struct ldlm_resource *res, - struct ptlrpc_request *r, int increase) -{ - if (ldlm_res_to_ns(res)->ns_lvbo && - ldlm_res_to_ns(res)->ns_lvbo->lvbo_update) { - return ldlm_res_to_ns(res)->ns_lvbo->lvbo_update(res, r, - increase); - } - return 0; -} - -int ldlm_error2errno(enum ldlm_error error); - -#if LUSTRE_TRACKS_LOCK_EXP_REFS -void ldlm_dump_export_locks(struct obd_export *exp); -#endif - -/** - * Release a temporary lock reference obtained by ldlm_handle2lock() or - * __ldlm_handle2lock(). - */ -#define LDLM_LOCK_PUT(lock) \ -do { \ - LDLM_LOCK_REF_DEL(lock); \ - /*LDLM_DEBUG((lock), "put");*/ \ - ldlm_lock_put(lock); \ -} while (0) - -/** - * Release a lock reference obtained by some other means (see - * LDLM_LOCK_PUT()). - */ -#define LDLM_LOCK_RELEASE(lock) \ -do { \ - /*LDLM_DEBUG((lock), "put");*/ \ - ldlm_lock_put(lock); \ -} while (0) - -#define LDLM_LOCK_GET(lock) \ -({ \ - ldlm_lock_get(lock); \ - /*LDLM_DEBUG((lock), "get");*/ \ - lock; \ -}) - -#define ldlm_lock_list_put(head, member, count) \ -({ \ - struct ldlm_lock *_lock, *_next; \ - int c = count; \ - list_for_each_entry_safe(_lock, _next, head, member) { \ - if (c-- == 0) \ - break; \ - list_del_init(&_lock->member); \ - LDLM_LOCK_RELEASE(_lock); \ - } \ - LASSERT(c <= 0); \ -}) - -struct ldlm_lock *ldlm_lock_get(struct ldlm_lock *lock); -void ldlm_lock_put(struct ldlm_lock *lock); -void ldlm_lock2desc(struct ldlm_lock *lock, struct ldlm_lock_desc *desc); -void ldlm_lock_addref(const struct lustre_handle *lockh, enum ldlm_mode mode); -int ldlm_lock_addref_try(const struct lustre_handle *lockh, - enum ldlm_mode mode); -void ldlm_lock_decref(const struct lustre_handle *lockh, enum ldlm_mode mode); -void ldlm_lock_decref_and_cancel(const struct lustre_handle *lockh, - enum ldlm_mode mode); -void ldlm_lock_fail_match_locked(struct ldlm_lock *lock); -void ldlm_lock_allow_match(struct ldlm_lock *lock); -void ldlm_lock_allow_match_locked(struct ldlm_lock *lock); -enum ldlm_mode ldlm_lock_match(struct ldlm_namespace *ns, __u64 flags, - const struct ldlm_res_id *, - enum ldlm_type type, union ldlm_policy_data *, - enum ldlm_mode mode, struct lustre_handle *, - int unref); -enum ldlm_mode ldlm_revalidate_lock_handle(const struct lustre_handle *lockh, - __u64 *bits); -void ldlm_lock_cancel(struct ldlm_lock *lock); -void ldlm_lock_dump_handle(int level, const struct lustre_handle *); -void ldlm_unlink_lock_skiplist(struct ldlm_lock *req); - -/* resource.c */ -struct ldlm_namespace * -ldlm_namespace_new(struct obd_device *obd, char *name, - enum ldlm_side client, enum ldlm_appetite apt, - enum ldlm_ns_type ns_type); -int ldlm_namespace_cleanup(struct ldlm_namespace *ns, __u64 flags); -void ldlm_namespace_free_prior(struct ldlm_namespace *ns, - struct obd_import *imp, - int force); -void ldlm_namespace_free_post(struct ldlm_namespace *ns); -void ldlm_namespace_get(struct ldlm_namespace *ns); -void ldlm_namespace_put(struct ldlm_namespace *ns); -int ldlm_debugfs_setup(void); -void ldlm_debugfs_cleanup(void); - -/* resource.c - internal */ -struct ldlm_resource *ldlm_resource_get(struct ldlm_namespace *ns, - struct ldlm_resource *parent, - const struct ldlm_res_id *, - enum ldlm_type type, int create); -int ldlm_resource_putref(struct ldlm_resource *res); -void ldlm_resource_add_lock(struct ldlm_resource *res, - struct list_head *head, - struct ldlm_lock *lock); -void ldlm_resource_unlink_lock(struct ldlm_lock *lock); -void ldlm_res2desc(struct ldlm_resource *res, struct ldlm_resource_desc *desc); -void ldlm_dump_all_namespaces(enum ldlm_side client, int level); -void ldlm_namespace_dump(int level, struct ldlm_namespace *); -void ldlm_resource_dump(int level, struct ldlm_resource *); -int ldlm_lock_change_resource(struct ldlm_namespace *, struct ldlm_lock *, - const struct ldlm_res_id *); - -#define LDLM_RESOURCE_ADDREF(res) do { \ - lu_ref_add_atomic(&(res)->lr_reference, __func__, current); \ -} while (0) - -#define LDLM_RESOURCE_DELREF(res) do { \ - lu_ref_del(&(res)->lr_reference, __func__, current); \ -} while (0) - -/* ldlm_request.c */ -/** \defgroup ldlm_local_ast Default AST handlers for local locks - * These AST handlers are typically used for server-side local locks and are - * also used by client-side lock handlers to perform minimum level base - * processing. - * @{ - */ -int ldlm_completion_ast_async(struct ldlm_lock *lock, __u64 flags, void *data); -int ldlm_completion_ast(struct ldlm_lock *lock, __u64 flags, void *data); -/** @} ldlm_local_ast */ - -/** \defgroup ldlm_cli_api API to operate on locks from actual LDLM users. - * These are typically used by client and server (*_local versions) - * to obtain and release locks. - * @{ - */ -int ldlm_cli_enqueue(struct obd_export *exp, struct ptlrpc_request **reqp, - struct ldlm_enqueue_info *einfo, - const struct ldlm_res_id *res_id, - union ldlm_policy_data const *policy, __u64 *flags, - void *lvb, __u32 lvb_len, enum lvb_type lvb_type, - struct lustre_handle *lockh, int async); -int ldlm_prep_enqueue_req(struct obd_export *exp, - struct ptlrpc_request *req, - struct list_head *cancels, - int count); -int ldlm_prep_elc_req(struct obd_export *exp, - struct ptlrpc_request *req, - int version, int opc, int canceloff, - struct list_head *cancels, int count); - -int ldlm_cli_enqueue_fini(struct obd_export *exp, struct ptlrpc_request *req, - enum ldlm_type type, __u8 with_policy, - enum ldlm_mode mode, - __u64 *flags, void *lvb, __u32 lvb_len, - const struct lustre_handle *lockh, int rc); -int ldlm_cli_update_pool(struct ptlrpc_request *req); -int ldlm_cli_cancel(const struct lustre_handle *lockh, - enum ldlm_cancel_flags cancel_flags); -int ldlm_cli_cancel_unused(struct ldlm_namespace *, const struct ldlm_res_id *, - enum ldlm_cancel_flags flags, void *opaque); -int ldlm_cli_cancel_unused_resource(struct ldlm_namespace *ns, - const struct ldlm_res_id *res_id, - union ldlm_policy_data *policy, - enum ldlm_mode mode, - enum ldlm_cancel_flags flags, - void *opaque); -int ldlm_cancel_resource_local(struct ldlm_resource *res, - struct list_head *cancels, - union ldlm_policy_data *policy, - enum ldlm_mode mode, __u64 lock_flags, - enum ldlm_cancel_flags cancel_flags, - void *opaque); -int ldlm_cli_cancel_list_local(struct list_head *cancels, int count, - enum ldlm_cancel_flags flags); -int ldlm_cli_cancel_list(struct list_head *head, int count, - struct ptlrpc_request *req, - enum ldlm_cancel_flags flags); -/** @} ldlm_cli_api */ - -/* mds/handler.c */ -/* This has to be here because recursive inclusion sucks. */ -int intent_disposition(struct ldlm_reply *rep, int flag); -void intent_set_disposition(struct ldlm_reply *rep, int flag); - -/** - * "Modes" of acquiring lock_res, necessary to tell lockdep that taking more - * than one lock_res is dead-lock safe. - */ -enum lock_res_type { - LRT_NORMAL, - LRT_NEW -}; - -/** Lock resource. */ -static inline void lock_res(struct ldlm_resource *res) -{ - spin_lock(&res->lr_lock); -} - -/** Lock resource with a way to instruct lockdep code about nestedness-safe. */ -static inline void lock_res_nested(struct ldlm_resource *res, - enum lock_res_type mode) -{ - spin_lock_nested(&res->lr_lock, mode); -} - -/** Unlock resource. */ -static inline void unlock_res(struct ldlm_resource *res) -{ - spin_unlock(&res->lr_lock); -} - -/** Check if resource is already locked, assert if not. */ -static inline void check_res_locked(struct ldlm_resource *res) -{ - assert_spin_locked(&res->lr_lock); -} - -struct ldlm_resource *lock_res_and_lock(struct ldlm_lock *lock); -void unlock_res_and_lock(struct ldlm_lock *lock); - -/* ldlm_pool.c */ -/** \defgroup ldlm_pools Various LDLM pool related functions - * There are not used outside of ldlm. - * @{ - */ -int ldlm_pools_init(void); -void ldlm_pools_fini(void); - -int ldlm_pool_init(struct ldlm_pool *pl, struct ldlm_namespace *ns, - int idx, enum ldlm_side client); -void ldlm_pool_fini(struct ldlm_pool *pl); -void ldlm_pool_add(struct ldlm_pool *pl, struct ldlm_lock *lock); -void ldlm_pool_del(struct ldlm_pool *pl, struct ldlm_lock *lock); -/** @} */ - -static inline int ldlm_extent_overlap(const struct ldlm_extent *ex1, - const struct ldlm_extent *ex2) -{ - return ex1->start <= ex2->end && ex2->start <= ex1->end; -} - -/* check if @ex1 contains @ex2 */ -static inline int ldlm_extent_contain(const struct ldlm_extent *ex1, - const struct ldlm_extent *ex2) -{ - return ex1->start <= ex2->start && ex1->end >= ex2->end; -} - -#endif -/** @} LDLM */ diff --git a/drivers/staging/lustre/lustre/include/lustre_dlm_flags.h b/drivers/staging/lustre/lustre/include/lustre_dlm_flags.h deleted file mode 100644 index 53db031c4c8c..000000000000 --- a/drivers/staging/lustre/lustre/include/lustre_dlm_flags.h +++ /dev/null @@ -1,402 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* -*- buffer-read-only: t -*- vi: set ro: - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - * See the GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License along - * with this program. If not, see <http://www.gnu.org/licenses/>. - */ -/** - * \file lustre_dlm_flags.h - * The flags and collections of flags (masks) for \see struct ldlm_lock. - * - * \addtogroup LDLM Lustre Distributed Lock Manager - * @{ - * - * \name flags - * The flags and collections of flags (masks) for \see struct ldlm_lock. - * @{ - */ -#ifndef LDLM_ALL_FLAGS_MASK - -/** l_flags bits marked as "all_flags" bits */ -#define LDLM_FL_ALL_FLAGS_MASK 0x00FFFFFFC08F932FULL - -/** extent, mode, or resource changed */ -#define LDLM_FL_LOCK_CHANGED 0x0000000000000001ULL /* bit 0 */ -#define ldlm_is_lock_changed(_l) LDLM_TEST_FLAG((_l), 1ULL << 0) -#define ldlm_set_lock_changed(_l) LDLM_SET_FLAG((_l), 1ULL << 0) -#define ldlm_clear_lock_changed(_l) LDLM_CLEAR_FLAG((_l), 1ULL << 0) - -/** - * Server placed lock on granted list, or a recovering client wants the - * lock added to the granted list, no questions asked. - */ -#define LDLM_FL_BLOCK_GRANTED 0x0000000000000002ULL /* bit 1 */ -#define ldlm_is_block_granted(_l) LDLM_TEST_FLAG((_l), 1ULL << 1) -#define ldlm_set_block_granted(_l) LDLM_SET_FLAG((_l), 1ULL << 1) -#define ldlm_clear_block_granted(_l) LDLM_CLEAR_FLAG((_l), 1ULL << 1) - -/** - * Server placed lock on conv list, or a recovering client wants the lock - * added to the conv list, no questions asked. - */ -#define LDLM_FL_BLOCK_CONV 0x0000000000000004ULL /* bit 2 */ -#define ldlm_is_block_conv(_l) LDLM_TEST_FLAG((_l), 1ULL << 2) -#define ldlm_set_block_conv(_l) LDLM_SET_FLAG((_l), 1ULL << 2) -#define ldlm_clear_block_conv(_l) LDLM_CLEAR_FLAG((_l), 1ULL << 2) - -/** - * Server placed lock on wait list, or a recovering client wants the lock - * added to the wait list, no questions asked. - */ -#define LDLM_FL_BLOCK_WAIT 0x0000000000000008ULL /* bit 3 */ -#define ldlm_is_block_wait(_l) LDLM_TEST_FLAG((_l), 1ULL << 3) -#define ldlm_set_block_wait(_l) LDLM_SET_FLAG((_l), 1ULL << 3) -#define ldlm_clear_block_wait(_l) LDLM_CLEAR_FLAG((_l), 1ULL << 3) - -/** blocking or cancel packet was queued for sending. */ -#define LDLM_FL_AST_SENT 0x0000000000000020ULL /* bit 5 */ -#define ldlm_is_ast_sent(_l) LDLM_TEST_FLAG((_l), 1ULL << 5) -#define ldlm_set_ast_sent(_l) LDLM_SET_FLAG((_l), 1ULL << 5) -#define ldlm_clear_ast_sent(_l) LDLM_CLEAR_FLAG((_l), 1ULL << 5) - -/** - * Lock is being replayed. This could probably be implied by the fact that - * one of BLOCK_{GRANTED,CONV,WAIT} is set, but that is pretty dangerous. - */ -#define LDLM_FL_REPLAY 0x0000000000000100ULL /* bit 8 */ -#define ldlm_is_replay(_l) LDLM_TEST_FLAG((_l), 1ULL << 8) -#define ldlm_set_replay(_l) LDLM_SET_FLAG((_l), 1ULL << 8) -#define ldlm_clear_replay(_l) LDLM_CLEAR_FLAG((_l), 1ULL << 8) - -/** Don't grant lock, just do intent. */ -#define LDLM_FL_INTENT_ONLY 0x0000000000000200ULL /* bit 9 */ -#define ldlm_is_intent_only(_l) LDLM_TEST_FLAG((_l), 1ULL << 9) -#define ldlm_set_intent_only(_l) LDLM_SET_FLAG((_l), 1ULL << 9) -#define ldlm_clear_intent_only(_l) LDLM_CLEAR_FLAG((_l), 1ULL << 9) - -/** lock request has intent */ -#define LDLM_FL_HAS_INTENT 0x0000000000001000ULL /* bit 12 */ -#define ldlm_is_has_intent(_l) LDLM_TEST_FLAG((_l), 1ULL << 12) -#define ldlm_set_has_intent(_l) LDLM_SET_FLAG((_l), 1ULL << 12) -#define ldlm_clear_has_intent(_l) LDLM_CLEAR_FLAG((_l), 1ULL << 12) - -/** flock deadlock detected */ -#define LDLM_FL_FLOCK_DEADLOCK 0x0000000000008000ULL /* bit 15 */ -#define ldlm_is_flock_deadlock(_l) LDLM_TEST_FLAG((_l), 1ULL << 15) -#define ldlm_set_flock_deadlock(_l) LDLM_SET_FLAG((_l), 1ULL << 15) -#define ldlm_clear_flock_deadlock(_l) LDLM_CLEAR_FLAG((_l), 1ULL << 15) - -/** discard (no writeback) on cancel */ -#define LDLM_FL_DISCARD_DATA 0x0000000000010000ULL /* bit 16 */ -#define ldlm_is_discard_data(_l) LDLM_TEST_FLAG((_l), 1ULL << 16) -#define ldlm_set_discard_data(_l) LDLM_SET_FLAG((_l), 1ULL << 16) -#define ldlm_clear_discard_data(_l) LDLM_CLEAR_FLAG((_l), 1ULL << 16) - -/** Blocked by group lock - wait indefinitely */ -#define LDLM_FL_NO_TIMEOUT 0x0000000000020000ULL /* bit 17 */ -#define ldlm_is_no_timeout(_l) LDLM_TEST_FLAG((_l), 1ULL << 17) -#define ldlm_set_no_timeout(_l) LDLM_SET_FLAG((_l), 1ULL << 17) -#define ldlm_clear_no_timeout(_l) LDLM_CLEAR_FLAG((_l), 1ULL << 17) - -/** - * Server told not to wait if blocked. For AGL, OST will not send glimpse - * callback. - */ -#define LDLM_FL_BLOCK_NOWAIT 0x0000000000040000ULL /* bit 18 */ -#define ldlm_is_block_nowait(_l) LDLM_TEST_FLAG((_l), 1ULL << 18) -#define ldlm_set_block_nowait(_l) LDLM_SET_FLAG((_l), 1ULL << 18) -#define ldlm_clear_block_nowait(_l) LDLM_CLEAR_FLAG((_l), 1ULL << 18) - -/** return blocking lock */ -#define LDLM_FL_TEST_LOCK 0x0000000000080000ULL /* bit 19 */ -#define ldlm_is_test_lock(_l) LDLM_TEST_FLAG((_l), 1ULL << 19) -#define ldlm_set_test_lock(_l) LDLM_SET_FLAG((_l), 1ULL << 19) -#define ldlm_clear_test_lock(_l) LDLM_CLEAR_FLAG((_l), 1ULL << 19) - -/** match lock only */ -#define LDLM_FL_MATCH_LOCK 0x0000000000100000ULL /* bit 20 */ - -/** - * Immediately cancel such locks when they block some other locks. Send - * cancel notification to original lock holder, but expect no reply. This - * is for clients (like liblustre) that cannot be expected to reliably - * response to blocking AST. - */ -#define LDLM_FL_CANCEL_ON_BLOCK 0x0000000000800000ULL /* bit 23 */ -#define ldlm_is_cancel_on_block(_l) LDLM_TEST_FLAG((_l), 1ULL << 23) -#define ldlm_set_cancel_on_block(_l) LDLM_SET_FLAG((_l), 1ULL << 23) -#define ldlm_clear_cancel_on_block(_l) LDLM_CLEAR_FLAG((_l), 1ULL << 23) - -/** - * measure lock contention and return -EUSERS if locking contention is high - */ -#define LDLM_FL_DENY_ON_CONTENTION 0x0000000040000000ULL /* bit 30 */ -#define ldlm_is_deny_on_contention(_l) LDLM_TEST_FLAG((_l), 1ULL << 30) -#define ldlm_set_deny_on_contention(_l) LDLM_SET_FLAG((_l), 1ULL << 30) -#define ldlm_clear_deny_on_contention(_l) LDLM_CLEAR_FLAG((_l), 1ULL << 30) - -/** - * These are flags that are mapped into the flags and ASTs of blocking - * locks Add FL_DISCARD to blocking ASTs - */ -#define LDLM_FL_AST_DISCARD_DATA 0x0000000080000000ULL /* bit 31 */ -#define ldlm_is_ast_discard_data(_l) LDLM_TEST_FLAG((_l), 1ULL << 31) -#define ldlm_set_ast_discard_data(_l) LDLM_SET_FLAG((_l), 1ULL << 31) -#define ldlm_clear_ast_discard_data(_l) LDLM_CLEAR_FLAG((_l), 1ULL << 31) - -/** - * Used for marking lock as a target for -EINTR while cp_ast sleep emulation - * + race with upcoming bl_ast. - */ -#define LDLM_FL_FAIL_LOC 0x0000000100000000ULL /* bit 32 */ -#define ldlm_is_fail_loc(_l) LDLM_TEST_FLAG((_l), 1ULL << 32) -#define ldlm_set_fail_loc(_l) LDLM_SET_FLAG((_l), 1ULL << 32) -#define ldlm_clear_fail_loc(_l) LDLM_CLEAR_FLAG((_l), 1ULL << 32) - -/** - * Used while processing the unused list to know that we have already - * handled this lock and decided to skip it. - */ -#define LDLM_FL_SKIPPED 0x0000000200000000ULL /* bit 33 */ -#define ldlm_is_skipped(_l) LDLM_TEST_FLAG((_l), 1ULL << 33) -#define ldlm_set_skipped(_l) LDLM_SET_FLAG((_l), 1ULL << 33) -#define ldlm_clear_skipped(_l) LDLM_CLEAR_FLAG((_l), 1ULL << 33) - -/** this lock is being destroyed */ -#define LDLM_FL_CBPENDING 0x0000000400000000ULL /* bit 34 */ -#define ldlm_is_cbpending(_l) LDLM_TEST_FLAG((_l), 1ULL << 34) -#define ldlm_set_cbpending(_l) LDLM_SET_FLAG((_l), 1ULL << 34) -#define ldlm_clear_cbpending(_l) LDLM_CLEAR_FLAG((_l), 1ULL << 34) - -/** not a real flag, not saved in lock */ -#define LDLM_FL_WAIT_NOREPROC 0x0000000800000000ULL /* bit 35 */ -#define ldlm_is_wait_noreproc(_l) LDLM_TEST_FLAG((_l), 1ULL << 35) -#define ldlm_set_wait_noreproc(_l) LDLM_SET_FLAG((_l), 1ULL << 35) -#define ldlm_clear_wait_noreproc(_l) LDLM_CLEAR_FLAG((_l), 1ULL << 35) - -/** cancellation callback already run */ -#define LDLM_FL_CANCEL 0x0000001000000000ULL /* bit 36 */ -#define ldlm_is_cancel(_l) LDLM_TEST_FLAG((_l), 1ULL << 36) -#define ldlm_set_cancel(_l) LDLM_SET_FLAG((_l), 1ULL << 36) -#define ldlm_clear_cancel(_l) LDLM_CLEAR_FLAG((_l), 1ULL << 36) - -/** whatever it might mean -- never transmitted? */ -#define LDLM_FL_LOCAL_ONLY 0x0000002000000000ULL /* bit 37 */ -#define ldlm_is_local_only(_l) LDLM_TEST_FLAG((_l), 1ULL << 37) -#define ldlm_set_local_only(_l) LDLM_SET_FLAG((_l), 1ULL << 37) -#define ldlm_clear_local_only(_l) LDLM_CLEAR_FLAG((_l), 1ULL << 37) - -/** don't run the cancel callback under ldlm_cli_cancel_unused */ -#define LDLM_FL_FAILED 0x0000004000000000ULL /* bit 38 */ -#define ldlm_is_failed(_l) LDLM_TEST_FLAG((_l), 1ULL << 38) -#define ldlm_set_failed(_l) LDLM_SET_FLAG((_l), 1ULL << 38) -#define ldlm_clear_failed(_l) LDLM_CLEAR_FLAG((_l), 1ULL << 38) - -/** lock cancel has already been sent */ -#define LDLM_FL_CANCELING 0x0000008000000000ULL /* bit 39 */ -#define ldlm_is_canceling(_l) LDLM_TEST_FLAG((_l), 1ULL << 39) -#define ldlm_set_canceling(_l) LDLM_SET_FLAG((_l), 1ULL << 39) -#define ldlm_clear_canceling(_l) LDLM_CLEAR_FLAG((_l), 1ULL << 39) - -/** local lock (ie, no srv/cli split) */ -#define LDLM_FL_LOCAL 0x0000010000000000ULL /* bit 40 */ -#define ldlm_is_local(_l) LDLM_TEST_FLAG((_l), 1ULL << 40) -#define ldlm_set_local(_l) LDLM_SET_FLAG((_l), 1ULL << 40) -#define ldlm_clear_local(_l) LDLM_CLEAR_FLAG((_l), 1ULL << 40) - -/** - * XXX FIXME: This is being added to b_size as a low-risk fix to the - * fact that the LVB filling happens _after_ the lock has been granted, - * so another thread can match it before the LVB has been updated. As a - * dirty hack, we set LDLM_FL_LVB_READY only after we've done the LVB poop. - * this is only needed on LOV/OSC now, where LVB is actually used and - * callers must set it in input flags. - * - * The proper fix is to do the granting inside of the completion AST, - * which can be replaced with a LVB-aware wrapping function for OSC locks. - * That change is pretty high-risk, though, and would need a lot more - * testing. - */ -#define LDLM_FL_LVB_READY 0x0000020000000000ULL /* bit 41 */ -#define ldlm_is_lvb_ready(_l) LDLM_TEST_FLAG((_l), 1ULL << 41) -#define ldlm_set_lvb_ready(_l) LDLM_SET_FLAG((_l), 1ULL << 41) -#define ldlm_clear_lvb_ready(_l) LDLM_CLEAR_FLAG((_l), 1ULL << 41) - -/** - * A lock contributes to the known minimum size (KMS) calculation until it - * has finished the part of its cancellation that performs write back on its - * dirty pages. It can remain on the granted list during this whole time. - * Threads racing to update the KMS after performing their writeback need - * to know to exclude each other's locks from the calculation as they walk - * the granted list. - */ -#define LDLM_FL_KMS_IGNORE 0x0000040000000000ULL /* bit 42 */ -#define ldlm_is_kms_ignore(_l) LDLM_TEST_FLAG((_l), 1ULL << 42) -#define ldlm_set_kms_ignore(_l) LDLM_SET_FLAG((_l), 1ULL << 42) -#define ldlm_clear_kms_ignore(_l) LDLM_CLEAR_FLAG((_l), 1ULL << 42) - -/** completion AST to be executed */ -#define LDLM_FL_CP_REQD 0x0000080000000000ULL /* bit 43 */ -#define ldlm_is_cp_reqd(_l) LDLM_TEST_FLAG((_l), 1ULL << 43) -#define ldlm_set_cp_reqd(_l) LDLM_SET_FLAG((_l), 1ULL << 43) -#define ldlm_clear_cp_reqd(_l) LDLM_CLEAR_FLAG((_l), 1ULL << 43) - -/** cleanup_resource has already handled the lock */ -#define LDLM_FL_CLEANED 0x0000100000000000ULL /* bit 44 */ -#define ldlm_is_cleaned(_l) LDLM_TEST_FLAG((_l), 1ULL << 44) -#define ldlm_set_cleaned(_l) LDLM_SET_FLAG((_l), 1ULL << 44) -#define ldlm_clear_cleaned(_l) LDLM_CLEAR_FLAG((_l), 1ULL << 44) - -/** - * optimization hint: LDLM can run blocking callback from current context - * w/o involving separate thread. in order to decrease cs rate - */ -#define LDLM_FL_ATOMIC_CB 0x0000200000000000ULL /* bit 45 */ -#define ldlm_is_atomic_cb(_l) LDLM_TEST_FLAG((_l), 1ULL << 45) -#define ldlm_set_atomic_cb(_l) LDLM_SET_FLAG((_l), 1ULL << 45) -#define ldlm_clear_atomic_cb(_l) LDLM_CLEAR_FLAG((_l), 1ULL << 45) - -/** - * It may happen that a client initiates two operations, e.g. unlink and - * mkdir, such that the server sends a blocking AST for conflicting locks - * to this client for the first operation, whereas the second operation - * has canceled this lock and is waiting for rpc_lock which is taken by - * the first operation. LDLM_FL_BL_AST is set by ldlm_callback_handler() in - * the lock to prevent the Early Lock Cancel (ELC) code from cancelling it. - */ -#define LDLM_FL_BL_AST 0x0000400000000000ULL /* bit 46 */ -#define ldlm_is_bl_ast(_l) LDLM_TEST_FLAG((_l), 1ULL << 46) -#define ldlm_set_bl_ast(_l) LDLM_SET_FLAG((_l), 1ULL << 46) -#define ldlm_clear_bl_ast(_l) LDLM_CLEAR_FLAG((_l), 1ULL << 46) - -/** - * Set by ldlm_cancel_callback() when lock cache is dropped to let - * ldlm_callback_handler() return EINVAL to the server. It is used when - * ELC RPC is already prepared and is waiting for rpc_lock, too late to - * send a separate CANCEL RPC. - */ -#define LDLM_FL_BL_DONE 0x0000800000000000ULL /* bit 47 */ -#define ldlm_is_bl_done(_l) LDLM_TEST_FLAG((_l), 1ULL << 47) -#define ldlm_set_bl_done(_l) LDLM_SET_FLAG((_l), 1ULL << 47) -#define ldlm_clear_bl_done(_l) LDLM_CLEAR_FLAG((_l), 1ULL << 47) - -/** - * Don't put lock into the LRU list, so that it is not canceled due - * to aging. Used by MGC locks, they are cancelled only at unmount or - * by callback. - */ -#define LDLM_FL_NO_LRU 0x0001000000000000ULL /* bit 48 */ -#define ldlm_is_no_lru(_l) LDLM_TEST_FLAG((_l), 1ULL << 48) -#define ldlm_set_no_lru(_l) LDLM_SET_FLAG((_l), 1ULL << 48) -#define ldlm_clear_no_lru(_l) LDLM_CLEAR_FLAG((_l), 1ULL << 48) - -/** - * Set for locks that failed and where the server has been notified. - * - * Protected by lock and resource locks. - */ -#define LDLM_FL_FAIL_NOTIFIED 0x0002000000000000ULL /* bit 49 */ -#define ldlm_is_fail_notified(_l) LDLM_TEST_FLAG((_l), 1ULL << 49) -#define ldlm_set_fail_notified(_l) LDLM_SET_FLAG((_l), 1ULL << 49) -#define ldlm_clear_fail_notified(_l) LDLM_CLEAR_FLAG((_l), 1ULL << 49) - -/** - * Set for locks that were removed from class hash table and will - * be destroyed when last reference to them is released. Set by - * ldlm_lock_destroy_internal(). - * - * Protected by lock and resource locks. - */ -#define LDLM_FL_DESTROYED 0x0004000000000000ULL /* bit 50 */ -#define ldlm_is_destroyed(_l) LDLM_TEST_FLAG((_l), 1ULL << 50) -#define ldlm_set_destroyed(_l) LDLM_SET_FLAG((_l), 1ULL << 50) -#define ldlm_clear_destroyed(_l) LDLM_CLEAR_FLAG((_l), 1ULL << 50) - -/** flag whether this is a server namespace lock */ -#define LDLM_FL_SERVER_LOCK 0x0008000000000000ULL /* bit 51 */ -#define ldlm_is_server_lock(_l) LDLM_TEST_FLAG((_l), 1ULL << 51) -#define ldlm_set_server_lock(_l) LDLM_SET_FLAG((_l), 1ULL << 51) -#define ldlm_clear_server_lock(_l) LDLM_CLEAR_FLAG((_l), 1ULL << 51) - -/** - * It's set in lock_res_and_lock() and unset in unlock_res_and_lock(). - * - * NB: compared with check_res_locked(), checking this bit is cheaper. - * Also, spin_is_locked() is deprecated for kernel code; one reason is - * because it works only for SMP so user needs to add extra macros like - * LASSERT_SPIN_LOCKED for uniprocessor kernels. - */ -#define LDLM_FL_RES_LOCKED 0x0010000000000000ULL /* bit 52 */ -#define ldlm_is_res_locked(_l) LDLM_TEST_FLAG((_l), 1ULL << 52) -#define ldlm_set_res_locked(_l) LDLM_SET_FLAG((_l), 1ULL << 52) -#define ldlm_clear_res_locked(_l) LDLM_CLEAR_FLAG((_l), 1ULL << 52) - -/** - * It's set once we call ldlm_add_waiting_lock_res_locked() to start the - * lock-timeout timer and it will never be reset. - * - * Protected by lock and resource locks. - */ -#define LDLM_FL_WAITED 0x0020000000000000ULL /* bit 53 */ -#define ldlm_is_waited(_l) LDLM_TEST_FLAG((_l), 1ULL << 53) -#define ldlm_set_waited(_l) LDLM_SET_FLAG((_l), 1ULL << 53) -#define ldlm_clear_waited(_l) LDLM_CLEAR_FLAG((_l), 1ULL << 53) - -/** Flag whether this is a server namespace lock. */ -#define LDLM_FL_NS_SRV 0x0040000000000000ULL /* bit 54 */ -#define ldlm_is_ns_srv(_l) LDLM_TEST_FLAG((_l), 1ULL << 54) -#define ldlm_set_ns_srv(_l) LDLM_SET_FLAG((_l), 1ULL << 54) -#define ldlm_clear_ns_srv(_l) LDLM_CLEAR_FLAG((_l), 1ULL << 54) - -/** Flag whether this lock can be reused. Used by exclusive open. */ -#define LDLM_FL_EXCL 0x0080000000000000ULL /* bit 55 */ -#define ldlm_is_excl(_l) LDLM_TEST_FLAG((_l), 1ULL << 55) -#define ldlm_set_excl(_l) LDLM_SET_FLAG((_l), 1ULL << 55) -#define ldlm_clear_excl(_l) LDLM_CLEAR_FLAG((_l), 1ULL << 55) - -/** l_flags bits marked as "ast" bits */ -#define LDLM_FL_AST_MASK (LDLM_FL_FLOCK_DEADLOCK |\ - LDLM_FL_AST_DISCARD_DATA) - -/** l_flags bits marked as "blocked" bits */ -#define LDLM_FL_BLOCKED_MASK (LDLM_FL_BLOCK_GRANTED |\ - LDLM_FL_BLOCK_CONV |\ - LDLM_FL_BLOCK_WAIT) - -/** l_flags bits marked as "gone" bits */ -#define LDLM_FL_GONE_MASK (LDLM_FL_DESTROYED |\ - LDLM_FL_FAILED) - -/** l_flags bits marked as "inherit" bits */ -/* Flags inherited from wire on enqueue/reply between client/server. */ -/* NO_TIMEOUT flag to force ldlm_lock_match() to wait with no timeout. */ -/* TEST_LOCK flag to not let TEST lock to be granted. */ -#define LDLM_FL_INHERIT_MASK (LDLM_FL_CANCEL_ON_BLOCK |\ - LDLM_FL_NO_TIMEOUT |\ - LDLM_FL_TEST_LOCK) - -/** test for ldlm_lock flag bit set */ -#define LDLM_TEST_FLAG(_l, _b) (((_l)->l_flags & (_b)) != 0) - -/** multi-bit test: are any of mask bits set? */ -#define LDLM_HAVE_MASK(_l, _m) ((_l)->l_flags & LDLM_FL_##_m##_MASK) - -/** set a ldlm_lock flag bit */ -#define LDLM_SET_FLAG(_l, _b) ((_l)->l_flags |= (_b)) - -/** clear a ldlm_lock flag bit */ -#define LDLM_CLEAR_FLAG(_l, _b) ((_l)->l_flags &= ~(_b)) - -/** @} subgroup */ -/** @} group */ - -#endif /* LDLM_ALL_FLAGS_MASK */ diff --git a/drivers/staging/lustre/lustre/include/lustre_errno.h b/drivers/staging/lustre/lustre/include/lustre_errno.h deleted file mode 100644 index 59fbb9f47ff1..000000000000 --- a/drivers/staging/lustre/lustre/include/lustre_errno.h +++ /dev/null @@ -1,198 +0,0 @@ -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.txt - * - * GPL HEADER END - */ -/* - * Copyright (C) 2011 FUJITSU LIMITED. All rights reserved. - * - * Copyright (c) 2013, Intel Corporation. - */ - -#ifndef LUSTRE_ERRNO_H -#define LUSTRE_ERRNO_H - -/* - * Only "network" errnos, which are defined below, are allowed on wire (or on - * disk). Generic routines exist to help translate between these and a subset - * of the "host" errnos. Some host errnos (e.g., EDEADLOCK) are intentionally - * left out. See also the comment on lustre_errno_hton_mapping[]. - * - * To maintain compatibility with existing x86 clients and servers, each of - * these network errnos has the same numerical value as its corresponding host - * errno on x86. - */ -#define LUSTRE_EPERM 1 /* Operation not permitted */ -#define LUSTRE_ENOENT 2 /* No such file or directory */ -#define LUSTRE_ESRCH 3 /* No such process */ -#define LUSTRE_EINTR 4 /* Interrupted system call */ -#define LUSTRE_EIO 5 /* I/O error */ -#define LUSTRE_ENXIO 6 /* No such device or address */ -#define LUSTRE_E2BIG 7 /* Argument list too long */ -#define LUSTRE_ENOEXEC 8 /* Exec format error */ -#define LUSTRE_EBADF 9 /* Bad file number */ -#define LUSTRE_ECHILD 10 /* No child processes */ -#define LUSTRE_EAGAIN 11 /* Try again */ -#define LUSTRE_ENOMEM 12 /* Out of memory */ -#define LUSTRE_EACCES 13 /* Permission denied */ -#define LUSTRE_EFAULT 14 /* Bad address */ -#define LUSTRE_ENOTBLK 15 /* Block device required */ -#define LUSTRE_EBUSY 16 /* Device or resource busy */ -#define LUSTRE_EEXIST 17 /* File exists */ -#define LUSTRE_EXDEV 18 /* Cross-device link */ -#define LUSTRE_ENODEV 19 /* No such device */ -#define LUSTRE_ENOTDIR 20 /* Not a directory */ -#define LUSTRE_EISDIR 21 /* Is a directory */ -#define LUSTRE_EINVAL 22 /* Invalid argument */ -#define LUSTRE_ENFILE 23 /* File table overflow */ -#define LUSTRE_EMFILE 24 /* Too many open files */ -#define LUSTRE_ENOTTY 25 /* Not a typewriter */ -#define LUSTRE_ETXTBSY 26 /* Text file busy */ -#define LUSTRE_EFBIG 27 /* File too large */ -#define LUSTRE_ENOSPC 28 /* No space left on device */ -#define LUSTRE_ESPIPE 29 /* Illegal seek */ -#define LUSTRE_EROFS 30 /* Read-only file system */ -#define LUSTRE_EMLINK 31 /* Too many links */ -#define LUSTRE_EPIPE 32 /* Broken pipe */ -#define LUSTRE_EDOM 33 /* Math argument out of func domain */ -#define LUSTRE_ERANGE 34 /* Math result not representable */ -#define LUSTRE_EDEADLK 35 /* Resource deadlock would occur */ -#define LUSTRE_ENAMETOOLONG 36 /* File name too long */ -#define LUSTRE_ENOLCK 37 /* No record locks available */ -#define LUSTRE_ENOSYS 38 /* Function not implemented */ -#define LUSTRE_ENOTEMPTY 39 /* Directory not empty */ -#define LUSTRE_ELOOP 40 /* Too many symbolic links found */ -#define LUSTRE_ENOMSG 42 /* No message of desired type */ -#define LUSTRE_EIDRM 43 /* Identifier removed */ -#define LUSTRE_ECHRNG 44 /* Channel number out of range */ -#define LUSTRE_EL2NSYNC 45 /* Level 2 not synchronized */ -#define LUSTRE_EL3HLT 46 /* Level 3 halted */ -#define LUSTRE_EL3RST 47 /* Level 3 reset */ -#define LUSTRE_ELNRNG 48 /* Link number out of range */ -#define LUSTRE_EUNATCH 49 /* Protocol driver not attached */ -#define LUSTRE_ENOCSI 50 /* No CSI structure available */ -#define LUSTRE_EL2HLT 51 /* Level 2 halted */ -#define LUSTRE_EBADE 52 /* Invalid exchange */ -#define LUSTRE_EBADR 53 /* Invalid request descriptor */ -#define LUSTRE_EXFULL 54 /* Exchange full */ -#define LUSTRE_ENOANO 55 /* No anode */ -#define LUSTRE_EBADRQC 56 /* Invalid request code */ -#define LUSTRE_EBADSLT 57 /* Invalid slot */ -#define LUSTRE_EBFONT 59 /* Bad font file format */ -#define LUSTRE_ENOSTR 60 /* Device not a stream */ -#define LUSTRE_ENODATA 61 /* No data available */ -#define LUSTRE_ETIME 62 /* Timer expired */ -#define LUSTRE_ENOSR 63 /* Out of streams resources */ -#define LUSTRE_ENONET 64 /* Machine is not on the network */ -#define LUSTRE_ENOPKG 65 /* Package not installed */ -#define LUSTRE_EREMOTE 66 /* Object is remote */ -#define LUSTRE_ENOLINK 67 /* Link has been severed */ -#define LUSTRE_EADV 68 /* Advertise error */ -#define LUSTRE_ESRMNT 69 /* Srmount error */ -#define LUSTRE_ECOMM 70 /* Communication error on send */ -#define LUSTRE_EPROTO 71 /* Protocol error */ -#define LUSTRE_EMULTIHOP 72 /* Multihop attempted */ -#define LUSTRE_EDOTDOT 73 /* RFS specific error */ -#define LUSTRE_EBADMSG 74 /* Not a data message */ -#define LUSTRE_EOVERFLOW 75 /* Value too large for data type */ -#define LUSTRE_ENOTUNIQ 76 /* Name not unique on network */ -#define LUSTRE_EBADFD 77 /* File descriptor in bad state */ -#define LUSTRE_EREMCHG 78 /* Remote address changed */ -#define LUSTRE_ELIBACC 79 /* Can't access needed shared library */ -#define LUSTRE_ELIBBAD 80 /* Access corrupted shared library */ -#define LUSTRE_ELIBSCN 81 /* .lib section in a.out corrupted */ -#define LUSTRE_ELIBMAX 82 /* Trying to link too many libraries */ -#define LUSTRE_ELIBEXEC 83 /* Cannot exec a shared lib directly */ -#define LUSTRE_EILSEQ 84 /* Illegal byte sequence */ -#define LUSTRE_ERESTART 85 /* Restart interrupted system call */ -#define LUSTRE_ESTRPIPE 86 /* Streams pipe error */ -#define LUSTRE_EUSERS 87 /* Too many users */ -#define LUSTRE_ENOTSOCK 88 /* Socket operation on non-socket */ -#define LUSTRE_EDESTADDRREQ 89 /* Destination address required */ -#define LUSTRE_EMSGSIZE 90 /* Message too long */ -#define LUSTRE_EPROTOTYPE 91 /* Protocol wrong type for socket */ -#define LUSTRE_ENOPROTOOPT 92 /* Protocol not available */ -#define LUSTRE_EPROTONOSUPPORT 93 /* Protocol not supported */ -#define LUSTRE_ESOCKTNOSUPPORT 94 /* Socket type not supported */ -#define LUSTRE_EOPNOTSUPP 95 /* Operation not supported */ -#define LUSTRE_EPFNOSUPPORT 96 /* Protocol family not supported */ -#define LUSTRE_EAFNOSUPPORT 97 /* Address family not supported */ -#define LUSTRE_EADDRINUSE 98 /* Address already in use */ -#define LUSTRE_EADDRNOTAVAIL 99 /* Cannot assign requested address */ -#define LUSTRE_ENETDOWN 100 /* Network is down */ -#define LUSTRE_ENETUNREACH 101 /* Network is unreachable */ -#define LUSTRE_ENETRESET 102 /* Network connection drop for reset */ -#define LUSTRE_ECONNABORTED 103 /* Software caused connection abort */ -#define LUSTRE_ECONNRESET 104 /* Connection reset by peer */ -#define LUSTRE_ENOBUFS 105 /* No buffer space available */ -#define LUSTRE_EISCONN 106 /* Transport endpoint is connected */ -#define LUSTRE_ENOTCONN 107 /* Transport endpoint not connected */ -#define LUSTRE_ESHUTDOWN 108 /* Cannot send after shutdown */ -#define LUSTRE_ETOOMANYREFS 109 /* Too many references: cannot splice */ -#define LUSTRE_ETIMEDOUT 110 /* Connection timed out */ -#define LUSTRE_ECONNREFUSED 111 /* Connection refused */ -#define LUSTRE_EHOSTDOWN 112 /* Host is down */ -#define LUSTRE_EHOSTUNREACH 113 /* No route to host */ -#define LUSTRE_EALREADY 114 /* Operation already in progress */ -#define LUSTRE_EINPROGRESS 115 /* Operation now in progress */ -#define LUSTRE_ESTALE 116 /* Stale file handle */ -#define LUSTRE_EUCLEAN 117 /* Structure needs cleaning */ -#define LUSTRE_ENOTNAM 118 /* Not a XENIX named type file */ -#define LUSTRE_ENAVAIL 119 /* No XENIX semaphores available */ -#define LUSTRE_EISNAM 120 /* Is a named type file */ -#define LUSTRE_EREMOTEIO 121 /* Remote I/O error */ -#define LUSTRE_EDQUOT 122 /* Quota exceeded */ -#define LUSTRE_ENOMEDIUM 123 /* No medium found */ -#define LUSTRE_EMEDIUMTYPE 124 /* Wrong medium type */ -#define LUSTRE_ECANCELED 125 /* Operation Canceled */ -#define LUSTRE_ENOKEY 126 /* Required key not available */ -#define LUSTRE_EKEYEXPIRED 127 /* Key has expired */ -#define LUSTRE_EKEYREVOKED 128 /* Key has been revoked */ -#define LUSTRE_EKEYREJECTED 129 /* Key was rejected by service */ -#define LUSTRE_EOWNERDEAD 130 /* Owner died */ -#define LUSTRE_ENOTRECOVERABLE 131 /* State not recoverable */ -#define LUSTRE_ERESTARTSYS 512 -#define LUSTRE_ERESTARTNOINTR 513 -#define LUSTRE_ERESTARTNOHAND 514 /* restart if no handler.. */ -#define LUSTRE_ENOIOCTLCMD 515 /* No ioctl command */ -#define LUSTRE_ERESTART_RESTARTBLOCK 516 /* restart via sys_restart_syscall */ -#define LUSTRE_EBADHANDLE 521 /* Illegal NFS file handle */ -#define LUSTRE_ENOTSYNC 522 /* Update synchronization mismatch */ -#define LUSTRE_EBADCOOKIE 523 /* Cookie is stale */ -#define LUSTRE_ENOTSUPP 524 /* Operation is not supported */ -#define LUSTRE_ETOOSMALL 525 /* Buffer or request is too small */ -#define LUSTRE_ESERVERFAULT 526 /* An untranslatable error occurred */ -#define LUSTRE_EBADTYPE 527 /* Type not supported by server */ -#define LUSTRE_EJUKEBOX 528 /* Request won't finish until timeout */ -#define LUSTRE_EIOCBQUEUED 529 /* iocb queued await completion event */ -#define LUSTRE_EIOCBRETRY 530 /* iocb queued, will trigger a retry */ - -/* - * Translations are optimized away on x86. Host errnos that shouldn't be put - * on wire could leak through as a result. Do not count on this side effect. - */ -#ifdef CONFIG_LUSTRE_TRANSLATE_ERRNOS -unsigned int lustre_errno_hton(unsigned int h); -unsigned int lustre_errno_ntoh(unsigned int n); -#else -#define lustre_errno_hton(h) (h) -#define lustre_errno_ntoh(n) (n) -#endif - -#endif /* LUSTRE_ERRNO_H */ diff --git a/drivers/staging/lustre/lustre/include/lustre_export.h b/drivers/staging/lustre/lustre/include/lustre_export.h deleted file mode 100644 index 40cd168ed2ea..000000000000 --- a/drivers/staging/lustre/lustre/include/lustre_export.h +++ /dev/null @@ -1,257 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2011, 2015, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - */ -/** \defgroup obd_export PortalRPC export definitions - * - * @{ - */ - -#ifndef __EXPORT_H -#define __EXPORT_H - -/** \defgroup export export - * - * @{ - */ - -#include <lprocfs_status.h> -#include <uapi/linux/lustre/lustre_idl.h> -#include <lustre_dlm.h> - -enum obd_option { - OBD_OPT_FORCE = 0x0001, - OBD_OPT_FAILOVER = 0x0002, - OBD_OPT_ABORT_RECOV = 0x0004, -}; - -/** - * Export structure. Represents target-side of connection in portals. - * Also used in Lustre to connect between layers on the same node when - * there is no network-connection in-between. - * For every connected client there is an export structure on the server - * attached to the same obd device. - */ -struct obd_export { - /** - * Export handle, it's id is provided to client on connect - * Subsequent client RPCs contain this handle id to identify - * what export they are talking to. - */ - struct portals_handle exp_handle; - atomic_t exp_refcount; - /** - * Set of counters below is to track where export references are - * kept. The exp_rpc_count is used for reconnect handling also, - * the cb_count and locks_count are for debug purposes only for now. - * The sum of them should be less than exp_refcount by 3 - */ - atomic_t exp_rpc_count; /* RPC references */ - atomic_t exp_cb_count; /* Commit callback references */ - /** Number of queued replay requests to be processes */ - atomic_t exp_replay_count; - atomic_t exp_locks_count; /** Lock references */ -#if LUSTRE_TRACKS_LOCK_EXP_REFS - struct list_head exp_locks_list; - spinlock_t exp_locks_list_guard; -#endif - /** UUID of client connected to this export */ - struct obd_uuid exp_client_uuid; - /** To link all exports on an obd device */ - struct list_head exp_obd_chain; - /** work_struct for destruction of export */ - struct work_struct exp_zombie_work; - struct hlist_node exp_uuid_hash; /** uuid-export hash*/ - /** Obd device of this export */ - struct obd_device *exp_obd; - /** - * "reverse" import to send requests (e.g. from ldlm) back to client - * exp_lock protect its change - */ - struct obd_import *exp_imp_reverse; - struct lprocfs_stats *exp_md_stats; - /** Active connection */ - struct ptlrpc_connection *exp_connection; - /** Connection count value from last successful reconnect rpc */ - __u32 exp_conn_cnt; - /** Hash list of all ldlm locks granted on this export */ - struct cfs_hash *exp_lock_hash; - /** - * Hash list for Posix lock deadlock detection, added with - * ldlm_lock::l_exp_flock_hash. - */ - struct cfs_hash *exp_flock_hash; - struct list_head exp_outstanding_replies; - struct list_head exp_uncommitted_replies; - spinlock_t exp_uncommitted_replies_lock; - /** Last committed transno for this export */ - __u64 exp_last_committed; - /** On replay all requests waiting for replay are linked here */ - struct list_head exp_req_replay_queue; - /** - * protects exp_flags, exp_outstanding_replies and the change - * of exp_imp_reverse - */ - spinlock_t exp_lock; - /** Compatibility flags for this export are embedded into - * exp_connect_data - */ - struct obd_connect_data exp_connect_data; - enum obd_option exp_flags; - unsigned long exp_failed:1, - exp_disconnected:1, - exp_connecting:1, - exp_flvr_changed:1, - exp_flvr_adapt:1; - /* also protected by exp_lock */ - enum lustre_sec_part exp_sp_peer; - struct sptlrpc_flavor exp_flvr; /* current */ - struct sptlrpc_flavor exp_flvr_old[2]; /* about-to-expire */ - time64_t exp_flvr_expire[2]; /* seconds */ - - /** protects exp_hp_rpcs */ - spinlock_t exp_rpc_lock; - struct list_head exp_hp_rpcs; /* (potential) HP RPCs */ - - /** blocking dlm lock list, protected by exp_bl_list_lock */ - struct list_head exp_bl_list; - spinlock_t exp_bl_list_lock; -}; - -static inline __u64 *exp_connect_flags_ptr(struct obd_export *exp) -{ - return &exp->exp_connect_data.ocd_connect_flags; -} - -static inline __u64 exp_connect_flags(struct obd_export *exp) -{ - return *exp_connect_flags_ptr(exp); -} - -static inline int exp_max_brw_size(struct obd_export *exp) -{ - if (exp_connect_flags(exp) & OBD_CONNECT_BRW_SIZE) - return exp->exp_connect_data.ocd_brw_size; - - return ONE_MB_BRW_SIZE; -} - -static inline int exp_connect_multibulk(struct obd_export *exp) -{ - return exp_max_brw_size(exp) > ONE_MB_BRW_SIZE; -} - -static inline int exp_connect_cancelset(struct obd_export *exp) -{ - return !!(exp_connect_flags(exp) & OBD_CONNECT_CANCELSET); -} - -static inline int exp_connect_lru_resize(struct obd_export *exp) -{ - return !!(exp_connect_flags(exp) & OBD_CONNECT_LRU_RESIZE); -} - -static inline int exp_connect_vbr(struct obd_export *exp) -{ - return !!(exp_connect_flags(exp) & OBD_CONNECT_VBR); -} - -static inline int exp_connect_som(struct obd_export *exp) -{ - return !!(exp_connect_flags(exp) & OBD_CONNECT_SOM); -} - -static inline int exp_connect_umask(struct obd_export *exp) -{ - return !!(exp_connect_flags(exp) & OBD_CONNECT_UMASK); -} - -static inline int imp_connect_lru_resize(struct obd_import *imp) -{ - struct obd_connect_data *ocd; - - ocd = &imp->imp_connect_data; - return !!(ocd->ocd_connect_flags & OBD_CONNECT_LRU_RESIZE); -} - -static inline int exp_connect_layout(struct obd_export *exp) -{ - return !!(exp_connect_flags(exp) & OBD_CONNECT_LAYOUTLOCK); -} - -static inline bool exp_connect_lvb_type(struct obd_export *exp) -{ - if (exp_connect_flags(exp) & OBD_CONNECT_LVB_TYPE) - return true; - else - return false; -} - -static inline bool imp_connect_lvb_type(struct obd_import *imp) -{ - struct obd_connect_data *ocd; - - ocd = &imp->imp_connect_data; - if (ocd->ocd_connect_flags & OBD_CONNECT_LVB_TYPE) - return true; - else - return false; -} - -static inline __u64 exp_connect_ibits(struct obd_export *exp) -{ - struct obd_connect_data *ocd; - - ocd = &exp->exp_connect_data; - return ocd->ocd_ibits_known; -} - -static inline bool imp_connect_disp_stripe(struct obd_import *imp) -{ - struct obd_connect_data *ocd; - - ocd = &imp->imp_connect_data; - return ocd->ocd_connect_flags & OBD_CONNECT_DISP_STRIPE; -} - -struct obd_export *class_conn2export(struct lustre_handle *conn); - -#define KKUC_CT_DATA_MAGIC 0x092013cea -struct kkuc_ct_data { - __u32 kcd_magic; - struct obd_uuid kcd_uuid; - __u32 kcd_archive; -}; - -/** @} export */ - -#endif /* __EXPORT_H */ -/** @} obd_export */ diff --git a/drivers/staging/lustre/lustre/include/lustre_fid.h b/drivers/staging/lustre/lustre/include/lustre_fid.h deleted file mode 100644 index 094ad282de2c..000000000000 --- a/drivers/staging/lustre/lustre/include/lustre_fid.h +++ /dev/null @@ -1,676 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2011, 2015, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * lustre/include/lustre_fid.h - * - * Author: Yury Umanets <umka@clusterfs.com> - */ - -#ifndef __LUSTRE_FID_H -#define __LUSTRE_FID_H - -/** \defgroup fid fid - * - * @{ - * - * http://wiki.lustre.org/index.php/Architecture_-_Interoperability_fids_zfs - * describes the FID namespace and interoperability requirements for FIDs. - * The important parts of that document are included here for reference. - * - * FID - * File IDentifier generated by client from range allocated by the SEQuence - * service and stored in struct lu_fid. The FID is composed of three parts: - * SEQuence, ObjectID, and VERsion. The SEQ component is a filesystem - * unique 64-bit integer, and only one client is ever assigned any SEQ value. - * The first 0x400 FID_SEQ_NORMAL [2^33, 2^33 + 0x400] values are reserved - * for system use. The OID component is a 32-bit value generated by the - * client on a per-SEQ basis to allow creating many unique FIDs without - * communication with the server. The VER component is a 32-bit value that - * distinguishes between different FID instantiations, such as snapshots or - * separate subtrees within the filesystem. FIDs with the same VER field - * are considered part of the same namespace. - * - * OLD filesystems are those upgraded from Lustre 1.x that predate FIDs, and - * MDTs use 32-bit ldiskfs internal inode/generation numbers (IGIFs), while - * OSTs use 64-bit Lustre object IDs and generation numbers. - * - * NEW filesystems are those formatted since the introduction of FIDs. - * - * IGIF - * Inode and Generation In FID, a surrogate FID used to globally identify - * an existing object on OLD formatted MDT file system. This would only be - * used on MDT0 in a DNE filesystem, because there cannot be more than one - * MDT in an OLD formatted filesystem. Belongs to sequence in [12, 2^32 - 1] - * range, where inode number is stored in SEQ, and inode generation is in OID. - * NOTE: This assumes no more than 2^32-1 inodes exist in the MDT filesystem, - * which is the maximum possible for an ldiskfs backend. It also assumes - * that the reserved ext3/ext4/ldiskfs inode numbers [0-11] are never visible - * to clients, which has always been true. - * - * IDIF - * object ID In FID, a surrogate FID used to globally identify an existing - * OST object on OLD formatted OST file system. Belongs to a sequence in - * [2^32, 2^33 - 1]. Sequence number is calculated as: - * - * 1 << 32 | (ost_index << 16) | ((objid >> 32) & 0xffff) - * - * that is, SEQ consists of 16-bit OST index, and higher 16 bits of object - * ID. The generation of unique SEQ values per OST allows the IDIF FIDs to - * be identified in the FLD correctly. The OID field is calculated as: - * - * objid & 0xffffffff - * - * that is, it consists of lower 32 bits of object ID. For objects within - * the IDIF range, object ID extraction will be: - * - * o_id = (fid->f_seq & 0x7fff) << 16 | fid->f_oid; - * o_seq = 0; // formerly group number - * - * NOTE: This assumes that no more than 2^48-1 objects have ever been created - * on any OST, and that no more than 65535 OSTs are in use. Both are very - * reasonable assumptions, i.e. an IDIF can uniquely map all objects assuming - * a maximum creation rate of 1M objects per second for a maximum of 9 years, - * or combinations thereof. - * - * OST_MDT0 - * Surrogate FID used to identify an existing object on OLD formatted OST - * filesystem. Belongs to the reserved SEQuence 0, and is used prior to - * the introduction of FID-on-OST, at which point IDIF will be used to - * identify objects as residing on a specific OST. - * - * LLOG - * For Lustre Log objects the object sequence 1 is used. This is compatible - * with both OLD and NEW namespaces, as this SEQ number is in the - * ext3/ldiskfs reserved inode range and does not conflict with IGIF - * sequence numbers. - * - * ECHO - * For testing OST IO performance the object sequence 2 is used. This is - * compatible with both OLD and NEW namespaces, as this SEQ number is in - * the ext3/ldiskfs reserved inode range and does not conflict with IGIF - * sequence numbers. - * - * OST_MDT1 .. OST_MAX - * For testing with multiple MDTs the object sequence 3 through 9 is used, - * allowing direct mapping of MDTs 1 through 7 respectively, for a total - * of 8 MDTs including OST_MDT0. This matches the legacy CMD project "group" - * mappings. However, this SEQ range is only for testing prior to any - * production DNE release, as the objects in this range conflict across all - * OSTs, as the OST index is not part of the FID. For production DNE usage, - * OST objects created by MDT1+ will use FID_SEQ_NORMAL FIDs. - * - * DLM OST objid to IDIF mapping - * For compatibility with existing OLD OST network protocol structures, the - * FID must map onto the o_id and o_seq in a manner that ensures existing - * objects are identified consistently for IO, as well as onto the LDLM - * namespace to ensure IDIFs there is only a single resource name for any - * object in the DLM. The OLD OST object DLM resource mapping is: - * - * resource[] = {o_id, o_seq, 0, 0}; // o_seq == 0 for production releases - * - * The NEW OST object DLM resource mapping is the same for both MDT and OST: - * - * resource[] = {SEQ, OID, VER, HASH}; - * - * NOTE: for mapping IDIF values to DLM resource names the o_id may be - * larger than the 2^33 reserved sequence numbers for IDIF, so it is possible - * for the o_id numbers to overlap FID SEQ numbers in the resource. However, - * in all production releases the OLD o_seq field is always zero, and all - * valid FID OID values are non-zero, so the lock resources will not collide. - * Even so, the MDT and OST resources are also in different LDLM namespaces. - */ - -#include <linux/libcfs/libcfs.h> -#include <uapi/linux/lustre/lustre_fid.h> -#include <uapi/linux/lustre/lustre_idl.h> -#include <uapi/linux/lustre/lustre_ostid.h> - -struct lu_env; -struct lu_site; -struct lu_context; -struct obd_device; -struct obd_export; - -/* Whole sequences space range and zero range definitions */ -extern const struct lu_seq_range LUSTRE_SEQ_SPACE_RANGE; -extern const struct lu_seq_range LUSTRE_SEQ_ZERO_RANGE; -extern const struct lu_fid LUSTRE_BFL_FID; -extern const struct lu_fid LU_OBF_FID; -extern const struct lu_fid LU_DOT_LUSTRE_FID; - -enum { - /* - * This is how may metadata FIDs may be allocated in one sequence(128k) - */ - LUSTRE_METADATA_SEQ_MAX_WIDTH = 0x0000000000020000ULL, - - /* - * This is how many data FIDs could be allocated in one sequence(4B - 1) - */ - LUSTRE_DATA_SEQ_MAX_WIDTH = 0x00000000FFFFFFFFULL, - - /* - * How many sequences to allocate to a client at once. - */ - LUSTRE_SEQ_META_WIDTH = 0x0000000000000001ULL, - - /* - * seq allocation pool size. - */ - LUSTRE_SEQ_BATCH_WIDTH = LUSTRE_SEQ_META_WIDTH * 1000, - - /* - * This is how many sequences may be in one super-sequence allocated to - * MDTs. - */ - LUSTRE_SEQ_SUPER_WIDTH = ((1ULL << 30ULL) * LUSTRE_SEQ_META_WIDTH) -}; - -enum { - /** 2^6 FIDs for OI containers */ - OSD_OI_FID_OID_BITS = 6, - /** reserve enough FIDs in case we want more in the future */ - OSD_OI_FID_OID_BITS_MAX = 10, -}; - -/** special OID for local objects */ -enum local_oid { - /** \see fld_mod_init */ - FLD_INDEX_OID = 3UL, - /** \see fid_mod_init */ - FID_SEQ_CTL_OID = 4UL, - FID_SEQ_SRV_OID = 5UL, - /** \see mdd_mod_init */ - MDD_ROOT_INDEX_OID = 6UL, /* deprecated in 2.4 */ - MDD_ORPHAN_OID = 7UL, /* deprecated in 2.4 */ - MDD_LOV_OBJ_OID = 8UL, - MDD_CAPA_KEYS_OID = 9UL, - /** \see mdt_mod_init */ - LAST_RECV_OID = 11UL, - OSD_FS_ROOT_OID = 13UL, - ACCT_USER_OID = 15UL, - ACCT_GROUP_OID = 16UL, - LFSCK_BOOKMARK_OID = 17UL, - OTABLE_IT_OID = 18UL, - /* These two definitions are obsolete - * OFD_GROUP0_LAST_OID = 20UL, - * OFD_GROUP4K_LAST_OID = 20UL+4096, - */ - OFD_LAST_GROUP_OID = 4117UL, - LLOG_CATALOGS_OID = 4118UL, - MGS_CONFIGS_OID = 4119UL, - OFD_HEALTH_CHECK_OID = 4120UL, - MDD_LOV_OBJ_OSEQ = 4121UL, - LFSCK_NAMESPACE_OID = 4122UL, - REMOTE_PARENT_DIR_OID = 4123UL, - SLAVE_LLOG_CATALOGS_OID = 4124UL, -}; - -static inline void lu_local_obj_fid(struct lu_fid *fid, __u32 oid) -{ - fid->f_seq = FID_SEQ_LOCAL_FILE; - fid->f_oid = oid; - fid->f_ver = 0; -} - -static inline void lu_local_name_obj_fid(struct lu_fid *fid, __u32 oid) -{ - fid->f_seq = FID_SEQ_LOCAL_NAME; - fid->f_oid = oid; - fid->f_ver = 0; -} - -/* For new FS (>= 2.4), the root FID will be changed to - * [FID_SEQ_ROOT:1:0], for existing FS, (upgraded to 2.4), - * the root FID will still be IGIF - */ -static inline int fid_is_root(const struct lu_fid *fid) -{ - return unlikely((fid_seq(fid) == FID_SEQ_ROOT && - fid_oid(fid) == 1)); -} - -static inline int fid_is_dot_lustre(const struct lu_fid *fid) -{ - return unlikely(fid_seq(fid) == FID_SEQ_DOT_LUSTRE && - fid_oid(fid) == FID_OID_DOT_LUSTRE); -} - -static inline int fid_is_obf(const struct lu_fid *fid) -{ - return unlikely(fid_seq(fid) == FID_SEQ_DOT_LUSTRE && - fid_oid(fid) == FID_OID_DOT_LUSTRE_OBF); -} - -static inline int fid_is_otable_it(const struct lu_fid *fid) -{ - return unlikely(fid_seq(fid) == FID_SEQ_LOCAL_FILE && - fid_oid(fid) == OTABLE_IT_OID); -} - -static inline int fid_is_acct(const struct lu_fid *fid) -{ - return fid_seq(fid) == FID_SEQ_LOCAL_FILE && - (fid_oid(fid) == ACCT_USER_OID || - fid_oid(fid) == ACCT_GROUP_OID); -} - -static inline int fid_is_quota(const struct lu_fid *fid) -{ - return fid_seq(fid) == FID_SEQ_QUOTA || - fid_seq(fid) == FID_SEQ_QUOTA_GLB; -} - -static inline int fid_seq_in_fldb(__u64 seq) -{ - return fid_seq_is_igif(seq) || fid_seq_is_norm(seq) || - fid_seq_is_root(seq) || fid_seq_is_dot(seq); -} - -static inline void lu_last_id_fid(struct lu_fid *fid, __u64 seq, __u32 ost_idx) -{ - if (fid_seq_is_mdt0(seq)) { - fid->f_seq = fid_idif_seq(0, ost_idx); - } else { - LASSERTF(fid_seq_is_norm(seq) || fid_seq_is_echo(seq) || - fid_seq_is_idif(seq), "%#llx\n", seq); - fid->f_seq = seq; - } - fid->f_oid = 0; - fid->f_ver = 0; -} - -/* seq client type */ -enum lu_cli_type { - LUSTRE_SEQ_METADATA = 1, - LUSTRE_SEQ_DATA -}; - -enum lu_mgr_type { - LUSTRE_SEQ_SERVER, - LUSTRE_SEQ_CONTROLLER -}; - -/* Client sequence manager interface. */ -struct lu_client_seq { - /* Sequence-controller export. */ - struct obd_export *lcs_exp; - spinlock_t lcs_lock; - - /* - * Range of allowed for allocation sequences. When using lu_client_seq on - * clients, this contains meta-sequence range. And for servers this - * contains super-sequence range. - */ - struct lu_seq_range lcs_space; - - /* Seq related proc */ - struct dentry *lcs_debugfs_entry; - - /* This holds last allocated fid in last obtained seq */ - struct lu_fid lcs_fid; - - /* LUSTRE_SEQ_METADATA or LUSTRE_SEQ_DATA */ - enum lu_cli_type lcs_type; - - /* - * Service uuid, passed from MDT + seq name to form unique seq name to - * use it with procfs. - */ - char lcs_name[LUSTRE_MDT_MAXNAMELEN]; - - /* - * Sequence width, that is how many objects may be allocated in one - * sequence. Default value for it is LUSTRE_SEQ_MAX_WIDTH. - */ - __u64 lcs_width; - - /* wait queue for fid allocation and update indicator */ - wait_queue_head_t lcs_waitq; - int lcs_update; -}; - -/* Client methods */ -void seq_client_flush(struct lu_client_seq *seq); - -int seq_client_alloc_fid(const struct lu_env *env, struct lu_client_seq *seq, - struct lu_fid *fid); -/* Fids common stuff */ -int fid_is_local(const struct lu_env *env, - struct lu_site *site, const struct lu_fid *fid); - -enum lu_cli_type; -int client_fid_init(struct obd_device *obd, struct obd_export *exp, - enum lu_cli_type type); -int client_fid_fini(struct obd_device *obd); - -/* fid locking */ - -struct ldlm_namespace; - -/* - * Build (DLM) resource name from FID. - * - * NOTE: until Lustre 1.8.7/2.1.1 the fid_ver() was packed into name[2], - * but was moved into name[1] along with the OID to avoid consuming the - * renaming name[2,3] fields that need to be used for the quota identifier. - */ -static inline void -fid_build_reg_res_name(const struct lu_fid *fid, struct ldlm_res_id *res) -{ - memset(res, 0, sizeof(*res)); - res->name[LUSTRE_RES_ID_SEQ_OFF] = fid_seq(fid); - res->name[LUSTRE_RES_ID_VER_OID_OFF] = fid_ver_oid(fid); -} - -/* - * Return true if resource is for object identified by FID. - */ -static inline bool fid_res_name_eq(const struct lu_fid *fid, - const struct ldlm_res_id *res) -{ - return res->name[LUSTRE_RES_ID_SEQ_OFF] == fid_seq(fid) && - res->name[LUSTRE_RES_ID_VER_OID_OFF] == fid_ver_oid(fid); -} - -/* - * Extract FID from LDLM resource. Reverse of fid_build_reg_res_name(). - */ -static inline void -fid_extract_from_res_name(struct lu_fid *fid, const struct ldlm_res_id *res) -{ - fid->f_seq = res->name[LUSTRE_RES_ID_SEQ_OFF]; - fid->f_oid = (__u32)(res->name[LUSTRE_RES_ID_VER_OID_OFF]); - fid->f_ver = (__u32)(res->name[LUSTRE_RES_ID_VER_OID_OFF] >> 32); - LASSERT(fid_res_name_eq(fid, res)); -} - -/* - * Build (DLM) resource identifier from global quota FID and quota ID. - */ -static inline void -fid_build_quota_res_name(const struct lu_fid *glb_fid, union lquota_id *qid, - struct ldlm_res_id *res) -{ - fid_build_reg_res_name(glb_fid, res); - res->name[LUSTRE_RES_ID_QUOTA_SEQ_OFF] = fid_seq(&qid->qid_fid); - res->name[LUSTRE_RES_ID_QUOTA_VER_OID_OFF] = fid_ver_oid(&qid->qid_fid); -} - -/* - * Extract global FID and quota ID from resource name - */ -static inline void fid_extract_from_quota_res(struct lu_fid *glb_fid, - union lquota_id *qid, - const struct ldlm_res_id *res) -{ - fid_extract_from_res_name(glb_fid, res); - qid->qid_fid.f_seq = res->name[LUSTRE_RES_ID_QUOTA_SEQ_OFF]; - qid->qid_fid.f_oid = (__u32)res->name[LUSTRE_RES_ID_QUOTA_VER_OID_OFF]; - qid->qid_fid.f_ver = - (__u32)(res->name[LUSTRE_RES_ID_QUOTA_VER_OID_OFF] >> 32); -} - -static inline void -fid_build_pdo_res_name(const struct lu_fid *fid, unsigned int hash, - struct ldlm_res_id *res) -{ - fid_build_reg_res_name(fid, res); - res->name[LUSTRE_RES_ID_HSH_OFF] = hash; -} - -/** - * Build DLM resource name from object id & seq, which will be removed - * finally, when we replace ost_id with FID in data stack. - * - * Currently, resid from the old client, whose res[0] = object_id, - * res[1] = object_seq, is just opposite with Metatdata - * resid, where, res[0] = fid->f_seq, res[1] = fid->f_oid. - * To unify the resid identification, we will reverse the data - * resid to keep it same with Metadata resid, i.e. - * - * For resid from the old client, - * res[0] = objid, res[1] = 0, still keep the original order, - * for compatibility. - * - * For new resid - * res will be built from normal FID directly, i.e. res[0] = f_seq, - * res[1] = f_oid + f_ver. - */ -static inline void ostid_build_res_name(const struct ost_id *oi, - struct ldlm_res_id *name) -{ - memset(name, 0, sizeof(*name)); - if (fid_seq_is_mdt0(ostid_seq(oi))) { - name->name[LUSTRE_RES_ID_SEQ_OFF] = ostid_id(oi); - name->name[LUSTRE_RES_ID_VER_OID_OFF] = ostid_seq(oi); - } else { - fid_build_reg_res_name(&oi->oi_fid, name); - } -} - -/** - * Return true if the resource is for the object identified by this id & group. - */ -static inline int ostid_res_name_eq(const struct ost_id *oi, - const struct ldlm_res_id *name) -{ - /* Note: it is just a trick here to save some effort, probably the - * correct way would be turn them into the FID and compare - */ - if (fid_seq_is_mdt0(ostid_seq(oi))) { - return name->name[LUSTRE_RES_ID_SEQ_OFF] == ostid_id(oi) && - name->name[LUSTRE_RES_ID_VER_OID_OFF] == ostid_seq(oi); - } else { - return name->name[LUSTRE_RES_ID_SEQ_OFF] == ostid_seq(oi) && - name->name[LUSTRE_RES_ID_VER_OID_OFF] == ostid_id(oi); - } -} - -/** - * Note: we need check oi_seq to decide where to set oi_id, - * so oi_seq should always be set ahead of oi_id. - */ -static inline int ostid_set_id(struct ost_id *oi, __u64 oid) -{ - if (fid_seq_is_mdt0(oi->oi.oi_seq)) { - if (oid >= IDIF_MAX_OID) - return -E2BIG; - oi->oi.oi_id = oid; - } else if (fid_is_idif(&oi->oi_fid)) { - if (oid >= IDIF_MAX_OID) - return -E2BIG; - oi->oi_fid.f_seq = fid_idif_seq(oid, - fid_idif_ost_idx(&oi->oi_fid)); - oi->oi_fid.f_oid = oid; - oi->oi_fid.f_ver = oid >> 48; - } else { - if (oid >= OBIF_MAX_OID) - return -E2BIG; - oi->oi_fid.f_oid = oid; - } - return 0; -} - -/* pack any OST FID into an ostid (id/seq) for the wire/disk */ -static inline int fid_to_ostid(const struct lu_fid *fid, struct ost_id *ostid) -{ - int rc = 0; - - if (fid_seq_is_igif(fid->f_seq)) - return -EBADF; - - if (fid_is_idif(fid)) { - u64 objid = fid_idif_id(fid_seq(fid), fid_oid(fid), - fid_ver(fid)); - - ostid_set_seq_mdt0(ostid); - rc = ostid_set_id(ostid, objid); - } else { - ostid->oi_fid = *fid; - } - - return rc; -} - -/* The same as osc_build_res_name() */ -static inline void ost_fid_build_resid(const struct lu_fid *fid, - struct ldlm_res_id *resname) -{ - if (fid_is_mdt0(fid) || fid_is_idif(fid)) { - struct ost_id oi; - - oi.oi.oi_id = 0; /* gcc 4.7.2 complains otherwise */ - if (fid_to_ostid(fid, &oi) != 0) - return; - ostid_build_res_name(&oi, resname); - } else { - fid_build_reg_res_name(fid, resname); - } -} - -/** - * Flatten 128-bit FID values into a 64-bit value for use as an inode number. - * For non-IGIF FIDs this starts just over 2^32, and continues without - * conflict until 2^64, at which point we wrap the high 24 bits of the SEQ - * into the range where there may not be many OID values in use, to minimize - * the risk of conflict. - * - * Suppose LUSTRE_SEQ_MAX_WIDTH less than (1 << 24) which is currently true, - * the time between re-used inode numbers is very long - 2^40 SEQ numbers, - * or about 2^40 client mounts, if clients create less than 2^24 files/mount. - */ -static inline __u64 fid_flatten(const struct lu_fid *fid) -{ - __u64 ino; - __u64 seq; - - if (fid_is_igif(fid)) { - ino = lu_igif_ino(fid); - return ino; - } - - seq = fid_seq(fid); - - ino = (seq << 24) + ((seq >> 24) & 0xffffff0000ULL) + fid_oid(fid); - - return ino ? ino : fid_oid(fid); -} - -static inline __u32 fid_hash(const struct lu_fid *f, int bits) -{ - /* all objects with same id and different versions will belong to same - * collisions list. - */ - return hash_long(fid_flatten(f), bits); -} - -/** - * map fid to 32 bit value for ino on 32bit systems. - */ -static inline __u32 fid_flatten32(const struct lu_fid *fid) -{ - __u32 ino; - __u64 seq; - - if (fid_is_igif(fid)) { - ino = lu_igif_ino(fid); - return ino; - } - - seq = fid_seq(fid) - FID_SEQ_START; - - /* Map the high bits of the OID into higher bits of the inode number so - * that inodes generated at about the same time have a reduced chance - * of collisions. This will give a period of 2^12 = 1024 unique clients - * (from SEQ) and up to min(LUSTRE_SEQ_MAX_WIDTH, 2^20) = 128k objects - * (from OID), or up to 128M inodes without collisions for new files. - */ - ino = ((seq & 0x000fffffULL) << 12) + ((seq >> 8) & 0xfffff000) + - (seq >> (64 - (40 - 8)) & 0xffffff00) + - (fid_oid(fid) & 0xff000fff) + ((fid_oid(fid) & 0x00fff000) << 8); - - return ino ? ino : fid_oid(fid); -} - -static inline int lu_fid_diff(const struct lu_fid *fid1, - const struct lu_fid *fid2) -{ - LASSERTF(fid_seq(fid1) == fid_seq(fid2), "fid1:" DFID ", fid2:" DFID "\n", - PFID(fid1), PFID(fid2)); - - if (fid_is_idif(fid1) && fid_is_idif(fid2)) - return fid_idif_id(fid1->f_seq, fid1->f_oid, fid1->f_ver) - - fid_idif_id(fid2->f_seq, fid2->f_oid, fid2->f_ver); - - return fid_oid(fid1) - fid_oid(fid2); -} - -#define LUSTRE_SEQ_SRV_NAME "seq_srv" -#define LUSTRE_SEQ_CTL_NAME "seq_ctl" - -/* Range common stuff */ -static inline void range_cpu_to_le(struct lu_seq_range *dst, const struct lu_seq_range *src) -{ - dst->lsr_start = cpu_to_le64(src->lsr_start); - dst->lsr_end = cpu_to_le64(src->lsr_end); - dst->lsr_index = cpu_to_le32(src->lsr_index); - dst->lsr_flags = cpu_to_le32(src->lsr_flags); -} - -static inline void range_le_to_cpu(struct lu_seq_range *dst, const struct lu_seq_range *src) -{ - dst->lsr_start = le64_to_cpu(src->lsr_start); - dst->lsr_end = le64_to_cpu(src->lsr_end); - dst->lsr_index = le32_to_cpu(src->lsr_index); - dst->lsr_flags = le32_to_cpu(src->lsr_flags); -} - -static inline void range_cpu_to_be(struct lu_seq_range *dst, const struct lu_seq_range *src) -{ - dst->lsr_start = cpu_to_be64(src->lsr_start); - dst->lsr_end = cpu_to_be64(src->lsr_end); - dst->lsr_index = cpu_to_be32(src->lsr_index); - dst->lsr_flags = cpu_to_be32(src->lsr_flags); -} - -static inline void range_be_to_cpu(struct lu_seq_range *dst, const struct lu_seq_range *src) -{ - dst->lsr_start = be64_to_cpu(src->lsr_start); - dst->lsr_end = be64_to_cpu(src->lsr_end); - dst->lsr_index = be32_to_cpu(src->lsr_index); - dst->lsr_flags = be32_to_cpu(src->lsr_flags); -} - -/** @} fid */ - -#endif /* __LUSTRE_FID_H */ diff --git a/drivers/staging/lustre/lustre/include/lustre_fld.h b/drivers/staging/lustre/lustre/include/lustre_fld.h deleted file mode 100644 index 4055bbd24c55..000000000000 --- a/drivers/staging/lustre/lustre/include/lustre_fld.h +++ /dev/null @@ -1,138 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2011, 2015, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - */ - -#ifndef __LINUX_FLD_H -#define __LINUX_FLD_H - -/** \defgroup fld fld - * - * @{ - */ - -#include <uapi/linux/lustre/lustre_idl.h> -#include <linux/libcfs/libcfs.h> -#include <seq_range.h> - -struct lu_client_fld; -struct lu_server_fld; -struct lu_fld_hash; -struct fld_cache; - -extern const struct dt_index_features fld_index_features; -extern const char fld_index_name[]; - -/* - * FLD (Fid Location Database) interface. - */ -enum { - LUSTRE_CLI_FLD_HASH_DHT = 0, - LUSTRE_CLI_FLD_HASH_RRB -}; - -struct lu_fld_target { - struct list_head ft_chain; - struct obd_export *ft_exp; - struct lu_server_fld *ft_srv; - __u64 ft_idx; -}; - -struct lu_server_fld { - /** - * super sequence controller export, needed to forward fld - * lookup request. - */ - struct obd_export *lsf_control_exp; - - /** Client FLD cache. */ - struct fld_cache *lsf_cache; - - /** Protect index modifications */ - struct mutex lsf_lock; - - /** Fld service name in form "fld-srv-lustre-MDTXXX" */ - char lsf_name[LUSTRE_MDT_MAXNAMELEN]; - -}; - -struct lu_client_fld { - /** Client side debugfs entry. */ - struct dentry *lcf_debugfs_entry; - - /** List of exports client FLD knows about. */ - struct list_head lcf_targets; - - /** Current hash to be used to chose an export. */ - struct lu_fld_hash *lcf_hash; - - /** Exports count. */ - int lcf_count; - - /** Lock protecting exports list and fld_hash. */ - spinlock_t lcf_lock; - - /** Client FLD cache. */ - struct fld_cache *lcf_cache; - - /** Client fld debugfs entry name. */ - char lcf_name[LUSTRE_MDT_MAXNAMELEN]; -}; - -/* Client methods */ -int fld_client_init(struct lu_client_fld *fld, - const char *prefix, int hash); - -void fld_client_fini(struct lu_client_fld *fld); - -void fld_client_flush(struct lu_client_fld *fld); - -int fld_client_lookup(struct lu_client_fld *fld, u64 seq, u32 *mds, - __u32 flags, const struct lu_env *env); - -int fld_client_create(struct lu_client_fld *fld, - struct lu_seq_range *range, - const struct lu_env *env); - -int fld_client_delete(struct lu_client_fld *fld, u64 seq, - const struct lu_env *env); - -int fld_client_add_target(struct lu_client_fld *fld, - struct lu_fld_target *tar); - -int fld_client_del_target(struct lu_client_fld *fld, - __u64 idx); - -void fld_client_debugfs_fini(struct lu_client_fld *fld); - -/** @} fld */ - -#endif diff --git a/drivers/staging/lustre/lustre/include/lustre_ha.h b/drivers/staging/lustre/lustre/include/lustre_ha.h deleted file mode 100644 index cbd68985ada9..000000000000 --- a/drivers/staging/lustre/lustre/include/lustre_ha.h +++ /dev/null @@ -1,61 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2011, 2015, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - */ - -#ifndef _LUSTRE_HA_H -#define _LUSTRE_HA_H - -/** \defgroup ha ha - * - * @{ - */ - -struct obd_import; -struct obd_export; -struct obd_device; -struct ptlrpc_request; - -int ptlrpc_replay(struct obd_import *imp); -int ptlrpc_resend(struct obd_import *imp); -void ptlrpc_free_committed(struct obd_import *imp); -void ptlrpc_wake_delayed(struct obd_import *imp); -int ptlrpc_recover_import(struct obd_import *imp, char *new_uuid, int async); -int ptlrpc_set_import_active(struct obd_import *imp, int active); -void ptlrpc_activate_import(struct obd_import *imp); -void ptlrpc_deactivate_import(struct obd_import *imp); -void ptlrpc_invalidate_import(struct obd_import *imp); -void ptlrpc_fail_import(struct obd_import *imp, __u32 conn_cnt); -void ptlrpc_pinger_force(struct obd_import *imp); - -/** @} ha */ - -#endif diff --git a/drivers/staging/lustre/lustre/include/lustre_handles.h b/drivers/staging/lustre/lustre/include/lustre_handles.h deleted file mode 100644 index c48c97362cf6..000000000000 --- a/drivers/staging/lustre/lustre/include/lustre_handles.h +++ /dev/null @@ -1,93 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2011, 2012, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - */ - -#ifndef __LUSTRE_HANDLES_H_ -#define __LUSTRE_HANDLES_H_ - -/** \defgroup handles handles - * - * @{ - */ - -#include <linux/atomic.h> -#include <linux/list.h> -#include <linux/rcupdate.h> -#include <linux/spinlock.h> -#include <linux/types.h> - -#include <linux/libcfs/libcfs.h> - -struct portals_handle_ops { - void (*hop_addref)(void *object); - void (*hop_free)(void *object, int size); -}; - -/* These handles are most easily used by having them appear at the very top of - * whatever object that you want to make handles for. ie: - * - * struct ldlm_lock { - * struct portals_handle handle; - * ... - * }; - * - * Now you're able to assign the results of cookie2handle directly to an - * ldlm_lock. If it's not at the top, you'll want to use container_of() - * to compute the start of the structure based on the handle field. - */ -struct portals_handle { - struct list_head h_link; - __u64 h_cookie; - const void *h_owner; - struct portals_handle_ops *h_ops; - - /* newly added fields to handle the RCU issue. -jxiong */ - struct rcu_head h_rcu; - spinlock_t h_lock; - unsigned int h_size:31; - unsigned int h_in:1; -}; - -/* handles.c */ - -/* Add a handle to the hash table */ -void class_handle_hash(struct portals_handle *, - struct portals_handle_ops *ops); -void class_handle_unhash(struct portals_handle *); -void *class_handle2object(__u64 cookie, const void *owner); -void class_handle_free_cb(struct rcu_head *rcu); -int class_handle_init(void); -void class_handle_cleanup(void); - -/** @} handles */ - -#endif diff --git a/drivers/staging/lustre/lustre/include/lustre_import.h b/drivers/staging/lustre/lustre/include/lustre_import.h deleted file mode 100644 index 1731048f1ff2..000000000000 --- a/drivers/staging/lustre/lustre/include/lustre_import.h +++ /dev/null @@ -1,368 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2011, 2012, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - */ -/** \defgroup obd_import PtlRPC import definitions - * Imports are client-side representation of remote obd target. - * - * @{ - */ - -#ifndef __IMPORT_H -#define __IMPORT_H - -/** \defgroup export export - * - * @{ - */ - -#include <lustre_handles.h> -#include <uapi/linux/lustre/lustre_idl.h> - -/** - * Adaptive Timeout stuff - * - * @{ - */ -#define D_ADAPTTO D_OTHER -#define AT_BINS 4 /* "bin" means "N seconds of history" */ -#define AT_FLG_NOHIST 0x1 /* use last reported value only */ - -struct adaptive_timeout { - time64_t at_binstart; /* bin start time */ - unsigned int at_hist[AT_BINS]; /* timeout history bins */ - unsigned int at_flags; - unsigned int at_current; /* current timeout value */ - unsigned int at_worst_ever; /* worst-ever timeout value */ - time64_t at_worst_time; /* worst-ever timeout timestamp */ - spinlock_t at_lock; -}; - -struct ptlrpc_at_array { - struct list_head *paa_reqs_array; /** array to hold requests */ - __u32 paa_size; /** the size of array */ - __u32 paa_count; /** the total count of reqs */ - time64_t paa_deadline; /** the earliest deadline of reqs */ - __u32 *paa_reqs_count; /** the count of reqs in each entry */ -}; - -#define IMP_AT_MAX_PORTALS 8 -struct imp_at { - int iat_portal[IMP_AT_MAX_PORTALS]; - struct adaptive_timeout iat_net_latency; - struct adaptive_timeout iat_service_estimate[IMP_AT_MAX_PORTALS]; -}; - -/** @} */ - -/** Possible import states */ -enum lustre_imp_state { - LUSTRE_IMP_CLOSED = 1, - LUSTRE_IMP_NEW = 2, - LUSTRE_IMP_DISCON = 3, - LUSTRE_IMP_CONNECTING = 4, - LUSTRE_IMP_REPLAY = 5, - LUSTRE_IMP_REPLAY_LOCKS = 6, - LUSTRE_IMP_REPLAY_WAIT = 7, - LUSTRE_IMP_RECOVER = 8, - LUSTRE_IMP_FULL = 9, - LUSTRE_IMP_EVICTED = 10, -}; - -/** Returns test string representation of numeric import state \a state */ -static inline char *ptlrpc_import_state_name(enum lustre_imp_state state) -{ - static char *import_state_names[] = { - "<UNKNOWN>", "CLOSED", "NEW", "DISCONN", - "CONNECTING", "REPLAY", "REPLAY_LOCKS", "REPLAY_WAIT", - "RECOVER", "FULL", "EVICTED", - }; - - LASSERT(state <= LUSTRE_IMP_EVICTED); - return import_state_names[state]; -} - -/** - * List of import event types - */ -enum obd_import_event { - IMP_EVENT_DISCON = 0x808001, - IMP_EVENT_INACTIVE = 0x808002, - IMP_EVENT_INVALIDATE = 0x808003, - IMP_EVENT_ACTIVE = 0x808004, - IMP_EVENT_OCD = 0x808005, - IMP_EVENT_DEACTIVATE = 0x808006, - IMP_EVENT_ACTIVATE = 0x808007, -}; - -/** - * Definition of import connection structure - */ -struct obd_import_conn { - /** Item for linking connections together */ - struct list_head oic_item; - /** Pointer to actual PortalRPC connection */ - struct ptlrpc_connection *oic_conn; - /** uuid of remote side */ - struct obd_uuid oic_uuid; - /** - * Time (64 bit jiffies) of last connection attempt on this connection - */ - __u64 oic_last_attempt; -}; - -/* state history */ -#define IMP_STATE_HIST_LEN 16 -struct import_state_hist { - enum lustre_imp_state ish_state; - time64_t ish_time; -}; - -/** - * Definition of PortalRPC import structure. - * Imports are representing client-side view to remote target. - */ -struct obd_import { - /** Local handle (== id) for this import. */ - struct portals_handle imp_handle; - /** Reference counter */ - atomic_t imp_refcount; - struct lustre_handle imp_dlm_handle; /* client's ldlm export */ - /** Currently active connection */ - struct ptlrpc_connection *imp_connection; - /** PortalRPC client structure for this import */ - struct ptlrpc_client *imp_client; - /** List element for linking into pinger chain */ - struct list_head imp_pinger_chain; - /** work struct for destruction of import */ - struct work_struct imp_zombie_work; - - /** - * Lists of requests that are retained for replay, waiting for a reply, - * or waiting for recovery to complete, respectively. - * @{ - */ - struct list_head imp_replay_list; - struct list_head imp_sending_list; - struct list_head imp_delayed_list; - /** @} */ - - /** - * List of requests that are retained for committed open replay. Once - * open is committed, open replay request will be moved from the - * imp_replay_list into the imp_committed_list. - * The imp_replay_cursor is for accelerating searching during replay. - * @{ - */ - struct list_head imp_committed_list; - struct list_head *imp_replay_cursor; - /** @} */ - - /** List of not replied requests */ - struct list_head imp_unreplied_list; - /** Known maximal replied XID */ - __u64 imp_known_replied_xid; - - /** obd device for this import */ - struct obd_device *imp_obd; - - /** - * some seciruty-related fields - * @{ - */ - struct ptlrpc_sec *imp_sec; - struct mutex imp_sec_mutex; - time64_t imp_sec_expire; - /** @} */ - - /** Wait queue for those who need to wait for recovery completion */ - wait_queue_head_t imp_recovery_waitq; - - /** Number of requests currently in-flight */ - atomic_t imp_inflight; - /** Number of requests currently unregistering */ - atomic_t imp_unregistering; - /** Number of replay requests inflight */ - atomic_t imp_replay_inflight; - /** Number of currently happening import invalidations */ - atomic_t imp_inval_count; - /** Numbner of request timeouts */ - atomic_t imp_timeouts; - /** Current import state */ - enum lustre_imp_state imp_state; - /** Last replay state */ - enum lustre_imp_state imp_replay_state; - /** History of import states */ - struct import_state_hist imp_state_hist[IMP_STATE_HIST_LEN]; - int imp_state_hist_idx; - /** Current import generation. Incremented on every reconnect */ - int imp_generation; - /** Incremented every time we send reconnection request */ - __u32 imp_conn_cnt; - /** - * \see ptlrpc_free_committed remembers imp_generation value here - * after a check to save on unnecessary replay list iterations - */ - int imp_last_generation_checked; - /** Last transno we replayed */ - __u64 imp_last_replay_transno; - /** Last transno committed on remote side */ - __u64 imp_peer_committed_transno; - /** - * \see ptlrpc_free_committed remembers last_transno since its last - * check here and if last_transno did not change since last run of - * ptlrpc_free_committed and import generation is the same, we can - * skip looking for requests to remove from replay list as optimisation - */ - __u64 imp_last_transno_checked; - /** - * Remote export handle. This is how remote side knows what export - * we are talking to. Filled from response to connect request - */ - struct lustre_handle imp_remote_handle; - /** When to perform next ping. time in jiffies. */ - unsigned long imp_next_ping; - /** When we last successfully connected. time in 64bit jiffies */ - __u64 imp_last_success_conn; - - /** List of all possible connection for import. */ - struct list_head imp_conn_list; - /** - * Current connection. \a imp_connection is imp_conn_current->oic_conn - */ - struct obd_import_conn *imp_conn_current; - - /** Protects flags, level, generation, conn_cnt, *_list */ - spinlock_t imp_lock; - - /* flags */ - unsigned long imp_no_timeout:1, /* timeouts are disabled */ - imp_invalid:1, /* evicted */ - /* administratively disabled */ - imp_deactive:1, - /* try to recover the import */ - imp_replayable:1, - /* don't run recovery (timeout instead) */ - imp_dlm_fake:1, - /* use 1/2 timeout on MDS' OSCs */ - imp_server_timeout:1, - /* VBR: imp in delayed recovery */ - imp_delayed_recovery:1, - /* VBR: if gap was found then no lock replays - */ - imp_no_lock_replay:1, - /* recovery by versions was failed */ - imp_vbr_failed:1, - /* force an immediate ping */ - imp_force_verify:1, - /* force a scheduled ping */ - imp_force_next_verify:1, - /* pingable */ - imp_pingable:1, - /* resend for replay */ - imp_resend_replay:1, - /* disable normal recovery, for test only. */ - imp_no_pinger_recover:1, -#if OBD_OCD_VERSION(3, 0, 53, 0) > LUSTRE_VERSION_CODE - /* need IR MNE swab */ - imp_need_mne_swab:1, -#endif - /* import must be reconnected instead of - * chosing new connection - */ - imp_force_reconnect:1, - /* import has tried to connect with server */ - imp_connect_tried:1, - /* connected but not FULL yet */ - imp_connected:1; - __u32 imp_connect_op; - struct obd_connect_data imp_connect_data; - __u64 imp_connect_flags_orig; - int imp_connect_error; - - __u32 imp_msg_magic; - __u32 imp_msghdr_flags; /* adjusted based on server capability */ - - struct imp_at imp_at; /* adaptive timeout data */ - time64_t imp_last_reply_time; /* for health check */ -}; - -/* import.c */ -static inline unsigned int at_est2timeout(unsigned int val) -{ - /* add an arbitrary minimum: 125% +5 sec */ - return (val + (val >> 2) + 5); -} - -static inline unsigned int at_timeout2est(unsigned int val) -{ - /* restore estimate value from timeout: e=4/5(t-5) */ - LASSERT(val); - return (max((val << 2) / 5, 5U) - 4); -} - -static inline void at_reset(struct adaptive_timeout *at, int val) -{ - spin_lock(&at->at_lock); - at->at_current = val; - at->at_worst_ever = val; - at->at_worst_time = ktime_get_real_seconds(); - spin_unlock(&at->at_lock); -} - -static inline void at_init(struct adaptive_timeout *at, int val, int flags) -{ - memset(at, 0, sizeof(*at)); - spin_lock_init(&at->at_lock); - at->at_flags = flags; - at_reset(at, val); -} - -extern unsigned int at_min; -static inline int at_get(struct adaptive_timeout *at) -{ - return (at->at_current > at_min) ? at->at_current : at_min; -} - -int at_measured(struct adaptive_timeout *at, unsigned int val); -int import_at_get_index(struct obd_import *imp, int portal); -extern unsigned int at_max; -#define AT_OFF (at_max == 0) - -/* genops.c */ -struct obd_export; -struct obd_import *class_exp2cliimp(struct obd_export *); - -/** @} import */ - -#endif /* __IMPORT_H */ - -/** @} obd_import */ diff --git a/drivers/staging/lustre/lustre/include/lustre_intent.h b/drivers/staging/lustre/lustre/include/lustre_intent.h deleted file mode 100644 index 519e94fc089d..000000000000 --- a/drivers/staging/lustre/lustre/include/lustre_intent.h +++ /dev/null @@ -1,69 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2011, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - */ - -#ifndef LUSTRE_INTENT_H -#define LUSTRE_INTENT_H - -/* intent IT_XXX are defined in lustre/include/obd.h */ - -struct lookup_intent { - int it_op; - int it_create_mode; - __u64 it_flags; - int it_disposition; - int it_status; - __u64 it_lock_handle; - __u64 it_lock_bits; - int it_lock_mode; - int it_remote_lock_mode; - __u64 it_remote_lock_handle; - struct ptlrpc_request *it_request; - unsigned int it_lock_set:1; -}; - -static inline int it_disposition(struct lookup_intent *it, int flag) -{ - return it->it_disposition & flag; -} - -static inline void it_set_disposition(struct lookup_intent *it, int flag) -{ - it->it_disposition |= flag; -} - -static inline void it_clear_disposition(struct lookup_intent *it, int flag) -{ - it->it_disposition &= ~flag; -} - -#endif diff --git a/drivers/staging/lustre/lustre/include/lustre_kernelcomm.h b/drivers/staging/lustre/lustre/include/lustre_kernelcomm.h deleted file mode 100644 index 2b3fa8430185..000000000000 --- a/drivers/staging/lustre/lustre/include/lustre_kernelcomm.h +++ /dev/null @@ -1,56 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2013 Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * - * Author: Nathan Rutman <nathan.rutman@sun.com> - * - * Kernel <-> userspace communication routines. - * The definitions below are used in the kernel and userspace. - */ - -#ifndef __LUSTRE_KERNELCOMM_H__ -#define __LUSTRE_KERNELCOMM_H__ - -/* For declarations shared with userspace */ -#include <uapi/linux/lustre/lustre_kernelcomm.h> - -/* prototype for callback function on kuc groups */ -typedef int (*libcfs_kkuc_cb_t)(void *data, void *cb_arg); - -/* Kernel methods */ -int libcfs_kkuc_msg_put(struct file *fp, void *payload); -int libcfs_kkuc_group_put(unsigned int group, void *payload); -int libcfs_kkuc_group_add(struct file *fp, int uid, unsigned int group, - void *data, size_t data_len); -int libcfs_kkuc_group_rem(int uid, unsigned int group); -int libcfs_kkuc_group_foreach(unsigned int group, libcfs_kkuc_cb_t cb_func, - void *cb_arg); - -#endif /* __LUSTRE_KERNELCOMM_H__ */ diff --git a/drivers/staging/lustre/lustre/include/lustre_lib.h b/drivers/staging/lustre/lustre/include/lustre_lib.h deleted file mode 100644 index 0053eafc1c10..000000000000 --- a/drivers/staging/lustre/lustre/include/lustre_lib.h +++ /dev/null @@ -1,124 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2011, 2012, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * lustre/include/lustre_lib.h - * - * Basic Lustre library routines. - */ - -#ifndef _LUSTRE_LIB_H -#define _LUSTRE_LIB_H - -/** \defgroup lib lib - * - * @{ - */ - -#include <linux/sched/signal.h> -#include <linux/signal.h> -#include <linux/types.h> -#include <linux/libcfs/libcfs.h> -#include <uapi/linux/lustre/lustre_idl.h> -#include <uapi/linux/lustre/lustre_ver.h> -#include <uapi/linux/lustre/lustre_cfg.h> - -/* target.c */ -struct ptlrpc_request; -struct obd_export; -struct lu_target; -struct l_wait_info; -#include <lustre_ha.h> -#include <lustre_net.h> - -#define LI_POISON 0x5a5a5a5a -#if BITS_PER_LONG > 32 -# define LL_POISON 0x5a5a5a5a5a5a5a5aL -#else -# define LL_POISON 0x5a5a5a5aL -#endif -#define LP_POISON ((void *)LL_POISON) - -int target_pack_pool_reply(struct ptlrpc_request *req); -int do_set_info_async(struct obd_import *imp, - int opcode, int version, - u32 keylen, void *key, - u32 vallen, void *val, - struct ptlrpc_request_set *set); - -void target_send_reply(struct ptlrpc_request *req, int rc, int fail_id); - -#define LUSTRE_FATAL_SIGS (sigmask(SIGKILL) | sigmask(SIGINT) | \ - sigmask(SIGTERM) | sigmask(SIGQUIT) | \ - sigmask(SIGALRM)) -static inline int l_fatal_signal_pending(struct task_struct *p) -{ - return signal_pending(p) && sigtestsetmask(&p->pending.signal, LUSTRE_FATAL_SIGS); -} - -/** @} lib */ - - - -/* l_wait_event_abortable() is a bit like wait_event_killable() - * except there is a fixed set of signals which will abort: - * LUSTRE_FATAL_SIGS - */ -#define l_wait_event_abortable(wq, condition) \ -({ \ - sigset_t __old_blocked; \ - int __ret = 0; \ - cfs_block_sigsinv(LUSTRE_FATAL_SIGS, &__old_blocked); \ - __ret = wait_event_interruptible(wq, condition); \ - cfs_restore_sigs(&__old_blocked); \ - __ret; \ -}) - -#define l_wait_event_abortable_timeout(wq, condition, timeout) \ -({ \ - sigset_t __old_blocked; \ - int __ret = 0; \ - cfs_block_sigsinv(LUSTRE_FATAL_SIGS, &__old_blocked); \ - __ret = wait_event_interruptible_timeout(wq, condition, timeout);\ - cfs_restore_sigs(&__old_blocked); \ - __ret; \ -}) - -#define l_wait_event_abortable_exclusive(wq, condition) \ -({ \ - sigset_t __old_blocked; \ - int __ret = 0; \ - cfs_block_sigsinv(LUSTRE_FATAL_SIGS, &__old_blocked); \ - __ret = wait_event_interruptible_exclusive(wq, condition); \ - cfs_restore_sigs(&__old_blocked); \ - __ret; \ -}) -#endif /* _LUSTRE_LIB_H */ diff --git a/drivers/staging/lustre/lustre/include/lustre_linkea.h b/drivers/staging/lustre/lustre/include/lustre_linkea.h deleted file mode 100644 index 03db1511bfd3..000000000000 --- a/drivers/staging/lustre/lustre/include/lustre_linkea.h +++ /dev/null @@ -1,93 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2013, 2014, Intel Corporation. - * Use is subject to license terms. - * - * Author: di wang <di.wang@intel.com> - */ - -/* There are several reasons to restrict the linkEA size: - * - * 1. Under DNE mode, if we do not restrict the linkEA size, and if there - * are too many cross-MDTs hard links to the same object, then it will - * casue the llog overflow. - * - * 2. Some backend has limited size for EA. For example, if without large - * EA enabled, the ldiskfs will make all EAs to share one (4K) EA block. - * - * 3. Too many entries in linkEA will seriously affect linkEA performance - * because we only support to locate linkEA entry consecutively. - */ -#define MAX_LINKEA_SIZE 4096 - -struct linkea_data { - /** - * Buffer to keep link EA body. - */ - struct lu_buf *ld_buf; - /** - * The matched header, entry and its length in the EA - */ - struct link_ea_header *ld_leh; - struct link_ea_entry *ld_lee; - int ld_reclen; -}; - -int linkea_data_new(struct linkea_data *ldata, struct lu_buf *buf); -int linkea_init(struct linkea_data *ldata); -int linkea_init_with_rec(struct linkea_data *ldata); -void linkea_entry_unpack(const struct link_ea_entry *lee, int *reclen, - struct lu_name *lname, struct lu_fid *pfid); -int linkea_entry_pack(struct link_ea_entry *lee, const struct lu_name *lname, - const struct lu_fid *pfid); -int linkea_add_buf(struct linkea_data *ldata, const struct lu_name *lname, - const struct lu_fid *pfid); -void linkea_del_buf(struct linkea_data *ldata, const struct lu_name *lname); -int linkea_links_find(struct linkea_data *ldata, const struct lu_name *lname, - const struct lu_fid *pfid); - -static inline void linkea_first_entry(struct linkea_data *ldata) -{ - LASSERT(ldata); - LASSERT(ldata->ld_leh); - - if (ldata->ld_leh->leh_reccount == 0) - ldata->ld_lee = NULL; - else - ldata->ld_lee = (struct link_ea_entry *)(ldata->ld_leh + 1); -} - -static inline void linkea_next_entry(struct linkea_data *ldata) -{ - LASSERT(ldata); - LASSERT(ldata->ld_leh); - - if (ldata->ld_lee) { - ldata->ld_lee = (struct link_ea_entry *)((char *)ldata->ld_lee + - ldata->ld_reclen); - if ((char *)ldata->ld_lee >= ((char *)ldata->ld_leh + - ldata->ld_leh->leh_len)) - ldata->ld_lee = NULL; - } -} diff --git a/drivers/staging/lustre/lustre/include/lustre_lmv.h b/drivers/staging/lustre/lustre/include/lustre_lmv.h deleted file mode 100644 index 080ec1f8e19f..000000000000 --- a/drivers/staging/lustre/lustre/include/lustre_lmv.h +++ /dev/null @@ -1,174 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License version 2 for more details. A copy is - * included in the COPYING file that accompanied this code. - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * GPL HEADER END - */ -/* - * Copyright (c) 2013, Intel Corporation. - */ -/* - * lustre/include/lustre_lmv.h - * - * Lustre LMV structures and functions. - * - * Author: Di Wang <di.wang@intel.com> - */ - -#ifndef _LUSTRE_LMV_H -#define _LUSTRE_LMV_H -#include <uapi/linux/lustre/lustre_idl.h> - -struct lmv_oinfo { - struct lu_fid lmo_fid; - u32 lmo_mds; - struct inode *lmo_root; -}; - -struct lmv_stripe_md { - __u32 lsm_md_magic; - __u32 lsm_md_stripe_count; - __u32 lsm_md_master_mdt_index; - __u32 lsm_md_hash_type; - __u32 lsm_md_layout_version; - __u32 lsm_md_default_count; - __u32 lsm_md_default_index; - char lsm_md_pool_name[LOV_MAXPOOLNAME + 1]; - struct lmv_oinfo lsm_md_oinfo[0]; -}; - -static inline bool -lsm_md_eq(const struct lmv_stripe_md *lsm1, const struct lmv_stripe_md *lsm2) -{ - __u32 idx; - - if (lsm1->lsm_md_magic != lsm2->lsm_md_magic || - lsm1->lsm_md_stripe_count != lsm2->lsm_md_stripe_count || - lsm1->lsm_md_master_mdt_index != lsm2->lsm_md_master_mdt_index || - lsm1->lsm_md_hash_type != lsm2->lsm_md_hash_type || - lsm1->lsm_md_layout_version != lsm2->lsm_md_layout_version || - strcmp(lsm1->lsm_md_pool_name, lsm2->lsm_md_pool_name) != 0) - return false; - - for (idx = 0; idx < lsm1->lsm_md_stripe_count; idx++) { - if (!lu_fid_eq(&lsm1->lsm_md_oinfo[idx].lmo_fid, - &lsm2->lsm_md_oinfo[idx].lmo_fid)) - return false; - } - - return true; -} - -union lmv_mds_md; - -void lmv_free_memmd(struct lmv_stripe_md *lsm); - -static inline void lmv1_le_to_cpu(struct lmv_mds_md_v1 *lmv_dst, - const struct lmv_mds_md_v1 *lmv_src) -{ - __u32 i; - - lmv_dst->lmv_magic = le32_to_cpu(lmv_src->lmv_magic); - lmv_dst->lmv_stripe_count = le32_to_cpu(lmv_src->lmv_stripe_count); - lmv_dst->lmv_master_mdt_index = - le32_to_cpu(lmv_src->lmv_master_mdt_index); - lmv_dst->lmv_hash_type = le32_to_cpu(lmv_src->lmv_hash_type); - lmv_dst->lmv_layout_version = le32_to_cpu(lmv_src->lmv_layout_version); - - for (i = 0; i < lmv_src->lmv_stripe_count; i++) - fid_le_to_cpu(&lmv_dst->lmv_stripe_fids[i], - &lmv_src->lmv_stripe_fids[i]); -} - -static inline void lmv_le_to_cpu(union lmv_mds_md *lmv_dst, - const union lmv_mds_md *lmv_src) -{ - switch (le32_to_cpu(lmv_src->lmv_magic)) { - case LMV_MAGIC_V1: - lmv1_le_to_cpu(&lmv_dst->lmv_md_v1, &lmv_src->lmv_md_v1); - break; - default: - break; - } -} - -/* This hash is only for testing purpose */ -static inline unsigned int -lmv_hash_all_chars(unsigned int count, const char *name, int namelen) -{ - const unsigned char *p = (const unsigned char *)name; - unsigned int c = 0; - - while (--namelen >= 0) - c += p[namelen]; - - c = c % count; - - return c; -} - -static inline unsigned int -lmv_hash_fnv1a(unsigned int count, const char *name, int namelen) -{ - __u64 hash; - - hash = lustre_hash_fnv_1a_64(name, namelen); - - return do_div(hash, count); -} - -static inline int lmv_name_to_stripe_index(__u32 lmv_hash_type, - unsigned int stripe_count, - const char *name, int namelen) -{ - __u32 hash_type = lmv_hash_type & LMV_HASH_TYPE_MASK; - int idx; - - LASSERT(namelen > 0); - if (stripe_count <= 1) - return 0; - - /* for migrating object, always start from 0 stripe */ - if (lmv_hash_type & LMV_HASH_FLAG_MIGRATION) - return 0; - - switch (hash_type) { - case LMV_HASH_TYPE_ALL_CHARS: - idx = lmv_hash_all_chars(stripe_count, name, namelen); - break; - case LMV_HASH_TYPE_FNV_1A_64: - idx = lmv_hash_fnv1a(stripe_count, name, namelen); - break; - default: - idx = -EBADFD; - break; - } - CDEBUG(D_INFO, "name %.*s hash_type %d idx %d\n", namelen, name, - hash_type, idx); - - return idx; -} - -static inline bool lmv_is_known_hash_type(__u32 type) -{ - return (type & LMV_HASH_TYPE_MASK) == LMV_HASH_TYPE_FNV_1A_64 || - (type & LMV_HASH_TYPE_MASK) == LMV_HASH_TYPE_ALL_CHARS; -} - -#endif diff --git a/drivers/staging/lustre/lustre/include/lustre_log.h b/drivers/staging/lustre/lustre/include/lustre_log.h deleted file mode 100644 index 07f4e600386b..000000000000 --- a/drivers/staging/lustre/lustre/include/lustre_log.h +++ /dev/null @@ -1,382 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2012, 2015, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * lustre/include/lustre_log.h - * - * Generic infrastructure for managing a collection of logs. - * These logs are used for: - * - * - orphan recovery: OST adds record on create - * - mtime/size consistency: the OST adds a record on first write - * - open/unlinked objects: OST adds a record on destroy - * - * - mds unlink log: the MDS adds an entry upon delete - * - * - raid1 replication log between OST's - * - MDS replication logs - */ - -#ifndef _LUSTRE_LOG_H -#define _LUSTRE_LOG_H - -/** \defgroup log log - * - * @{ - */ - -#include <obd_class.h> -#include <uapi/linux/lustre/lustre_idl.h> - -#define LOG_NAME_LIMIT(logname, name) \ - snprintf(logname, sizeof(logname), "LOGS/%s", name) -#define LLOG_EEMPTY 4711 - -enum llog_open_param { - LLOG_OPEN_EXISTS = 0x0000, - LLOG_OPEN_NEW = 0x0001, -}; - -struct plain_handle_data { - struct list_head phd_entry; - struct llog_handle *phd_cat_handle; - struct llog_cookie phd_cookie; /* cookie of this log in its cat */ -}; - -struct cat_handle_data { - struct list_head chd_head; - struct llog_handle *chd_current_log; /* currently open log */ - struct llog_handle *chd_next_log; /* llog to be used next */ -}; - -struct llog_handle; - -/* llog.c - general API */ -int llog_init_handle(const struct lu_env *env, struct llog_handle *handle, - int flags, struct obd_uuid *uuid); -int llog_process(const struct lu_env *env, struct llog_handle *loghandle, - llog_cb_t cb, void *data, void *catdata); -int llog_process_or_fork(const struct lu_env *env, - struct llog_handle *loghandle, - llog_cb_t cb, void *data, void *catdata, bool fork); -int llog_open(const struct lu_env *env, struct llog_ctxt *ctxt, - struct llog_handle **lgh, struct llog_logid *logid, - char *name, enum llog_open_param open_param); -int llog_close(const struct lu_env *env, struct llog_handle *cathandle); - -/* llog_process flags */ -#define LLOG_FLAG_NODEAMON 0x0001 - -/* llog_cat.c - catalog api */ -struct llog_process_data { - /** - * Any useful data needed while processing catalog. This is - * passed later to process callback. - */ - void *lpd_data; - /** - * Catalog process callback function, called for each record - * in catalog. - */ - llog_cb_t lpd_cb; - /** - * Start processing the catalog from startcat/startidx - */ - int lpd_startcat; - int lpd_startidx; -}; - -struct llog_process_cat_data { - /** - * Temporary stored first_idx while scanning log. - */ - int lpcd_first_idx; - /** - * Temporary stored last_idx while scanning log. - */ - int lpcd_last_idx; -}; - -struct thandle; - -int llog_cat_close(const struct lu_env *env, struct llog_handle *cathandle); -int llog_cat_process(const struct lu_env *env, struct llog_handle *cat_llh, - llog_cb_t cb, void *data, int startcat, int startidx); - -/* llog_obd.c */ -int llog_setup(const struct lu_env *env, struct obd_device *obd, - struct obd_llog_group *olg, int index, - struct obd_device *disk_obd, struct llog_operations *op); -int __llog_ctxt_put(const struct lu_env *env, struct llog_ctxt *ctxt); -int llog_cleanup(const struct lu_env *env, struct llog_ctxt *); - -/* llog_net.c */ -int llog_initiator_connect(struct llog_ctxt *ctxt); - -struct llog_operations { - int (*lop_next_block)(const struct lu_env *env, struct llog_handle *h, - int *curr_idx, int next_idx, __u64 *offset, - void *buf, int len); - int (*lop_prev_block)(const struct lu_env *env, struct llog_handle *h, - int prev_idx, void *buf, int len); - int (*lop_read_header)(const struct lu_env *env, - struct llog_handle *handle); - int (*lop_setup)(const struct lu_env *env, struct obd_device *obd, - struct obd_llog_group *olg, int ctxt_idx, - struct obd_device *disk_obd); - int (*lop_sync)(struct llog_ctxt *ctxt, struct obd_export *exp, - int flags); - int (*lop_cleanup)(const struct lu_env *env, struct llog_ctxt *ctxt); - int (*lop_cancel)(const struct lu_env *env, struct llog_ctxt *ctxt, - struct llog_cookie *cookies, int flags); - int (*lop_connect)(struct llog_ctxt *ctxt, struct llog_logid *logid, - struct llog_gen *gen, struct obd_uuid *uuid); - /** - * Any llog file must be opened first using llog_open(). Llog can be - * opened by name, logid or without both, in last case the new logid - * will be generated. - */ - int (*lop_open)(const struct lu_env *env, struct llog_handle *lgh, - struct llog_logid *logid, char *name, - enum llog_open_param); - /** - * Opened llog may not exist and this must be checked where needed using - * the llog_exist() call. - */ - int (*lop_exist)(struct llog_handle *lgh); - /** - * Close llog file and calls llog_free_handle() implicitly. - * Any opened llog must be closed by llog_close() call. - */ - int (*lop_close)(const struct lu_env *env, struct llog_handle *handle); - /** - * Create new llog file. The llog must be opened. - * Must be used only for local llog operations. - */ - int (*lop_declare_create)(const struct lu_env *env, - struct llog_handle *handle, - struct thandle *th); - /** - * write new record in llog. It appends records usually but can edit - * existing records too. - */ - int (*lop_declare_write_rec)(const struct lu_env *env, - struct llog_handle *lgh, - struct llog_rec_hdr *rec, - int idx, struct thandle *th); - int (*lop_write_rec)(const struct lu_env *env, - struct llog_handle *loghandle, - struct llog_rec_hdr *rec, - struct llog_cookie *cookie, int cookiecount, - void *buf, int idx, struct thandle *th); - /** - * Add new record in llog catalog. Does the same as llog_write_rec() - * but using llog catalog. - */ - int (*lop_declare_add)(const struct lu_env *env, - struct llog_handle *lgh, - struct llog_rec_hdr *rec, struct thandle *th); - int (*lop_add)(const struct lu_env *env, struct llog_handle *lgh, - struct llog_rec_hdr *rec, struct llog_cookie *cookie, - void *buf, struct thandle *th); -}; - -/* In-memory descriptor for a log object or log catalog */ -struct llog_handle { - struct rw_semaphore lgh_lock; - spinlock_t lgh_hdr_lock; /* protect lgh_hdr data */ - struct llog_logid lgh_id; /* id of this log */ - struct llog_log_hdr *lgh_hdr; - size_t lgh_hdr_size; - int lgh_last_idx; - int lgh_cur_idx; /* used during llog_process */ - __u64 lgh_cur_offset; /* used during llog_process */ - struct llog_ctxt *lgh_ctxt; - union { - struct plain_handle_data phd; - struct cat_handle_data chd; - } u; - char *lgh_name; - void *private_data; - struct llog_operations *lgh_logops; - atomic_t lgh_refcount; -}; - -#define LLOG_CTXT_FLAG_UNINITIALIZED 0x00000001 -#define LLOG_CTXT_FLAG_STOP 0x00000002 - -struct llog_ctxt { - int loc_idx; /* my index the obd array of ctxt's */ - struct obd_device *loc_obd; /* points back to the containing obd*/ - struct obd_llog_group *loc_olg; /* group containing that ctxt */ - struct obd_export *loc_exp; /* parent "disk" export (e.g. MDS) */ - struct obd_import *loc_imp; /* to use in RPC's: can be backward - * pointing import - */ - struct llog_operations *loc_logops; - struct llog_handle *loc_handle; - struct mutex loc_mutex; /* protect loc_imp */ - atomic_t loc_refcount; - long loc_flags; /* flags, see above defines */ - /* - * llog chunk size, and llog record size can not be bigger than - * loc_chunk_size - */ - __u32 loc_chunk_size; -}; - -#define LLOG_PROC_BREAK 0x0001 -#define LLOG_DEL_RECORD 0x0002 - -static inline int llog_handle2ops(struct llog_handle *loghandle, - struct llog_operations **lop) -{ - if (!loghandle || !loghandle->lgh_logops) - return -EINVAL; - - *lop = loghandle->lgh_logops; - return 0; -} - -static inline struct llog_ctxt *llog_ctxt_get(struct llog_ctxt *ctxt) -{ - atomic_inc(&ctxt->loc_refcount); - CDEBUG(D_INFO, "GETting ctxt %p : new refcount %d\n", ctxt, - atomic_read(&ctxt->loc_refcount)); - return ctxt; -} - -static inline void llog_ctxt_put(struct llog_ctxt *ctxt) -{ - if (!ctxt) - return; - LASSERT_ATOMIC_GT_LT(&ctxt->loc_refcount, 0, LI_POISON); - CDEBUG(D_INFO, "PUTting ctxt %p : new refcount %d\n", ctxt, - atomic_read(&ctxt->loc_refcount) - 1); - __llog_ctxt_put(NULL, ctxt); -} - -static inline void llog_group_init(struct obd_llog_group *olg) -{ - init_waitqueue_head(&olg->olg_waitq); - spin_lock_init(&olg->olg_lock); - mutex_init(&olg->olg_cat_processing); -} - -static inline int llog_group_set_ctxt(struct obd_llog_group *olg, - struct llog_ctxt *ctxt, int index) -{ - LASSERT(index >= 0 && index < LLOG_MAX_CTXTS); - - spin_lock(&olg->olg_lock); - if (olg->olg_ctxts[index]) { - spin_unlock(&olg->olg_lock); - return -EEXIST; - } - olg->olg_ctxts[index] = ctxt; - spin_unlock(&olg->olg_lock); - return 0; -} - -static inline struct llog_ctxt *llog_group_get_ctxt(struct obd_llog_group *olg, - int index) -{ - struct llog_ctxt *ctxt; - - LASSERT(index >= 0 && index < LLOG_MAX_CTXTS); - - spin_lock(&olg->olg_lock); - if (!olg->olg_ctxts[index]) - ctxt = NULL; - else - ctxt = llog_ctxt_get(olg->olg_ctxts[index]); - spin_unlock(&olg->olg_lock); - return ctxt; -} - -static inline void llog_group_clear_ctxt(struct obd_llog_group *olg, int index) -{ - LASSERT(index >= 0 && index < LLOG_MAX_CTXTS); - spin_lock(&olg->olg_lock); - olg->olg_ctxts[index] = NULL; - spin_unlock(&olg->olg_lock); -} - -static inline struct llog_ctxt *llog_get_context(struct obd_device *obd, - int index) -{ - return llog_group_get_ctxt(&obd->obd_olg, index); -} - -static inline int llog_group_ctxt_null(struct obd_llog_group *olg, int index) -{ - return (!olg->olg_ctxts[index]); -} - -static inline int llog_ctxt_null(struct obd_device *obd, int index) -{ - return llog_group_ctxt_null(&obd->obd_olg, index); -} - -static inline int llog_next_block(const struct lu_env *env, - struct llog_handle *loghandle, int *cur_idx, - int next_idx, __u64 *cur_offset, void *buf, - int len) -{ - struct llog_operations *lop; - int rc; - - rc = llog_handle2ops(loghandle, &lop); - if (rc) - return rc; - if (!lop->lop_next_block) - return -EOPNOTSUPP; - - rc = lop->lop_next_block(env, loghandle, cur_idx, next_idx, - cur_offset, buf, len); - return rc; -} - -/* llog.c */ -int llog_declare_write_rec(const struct lu_env *env, - struct llog_handle *handle, - struct llog_rec_hdr *rec, int idx, - struct thandle *th); -int llog_write_rec(const struct lu_env *env, struct llog_handle *handle, - struct llog_rec_hdr *rec, struct llog_cookie *logcookies, - int numcookies, void *buf, int idx, struct thandle *th); -int lustre_process_log(struct super_block *sb, char *logname, - struct config_llog_instance *cfg); -int lustre_end_log(struct super_block *sb, char *logname, - struct config_llog_instance *cfg); -/** @} log */ - -#endif diff --git a/drivers/staging/lustre/lustre/include/lustre_mdc.h b/drivers/staging/lustre/lustre/include/lustre_mdc.h deleted file mode 100644 index a9c9992a2502..000000000000 --- a/drivers/staging/lustre/lustre/include/lustre_mdc.h +++ /dev/null @@ -1,229 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - */ -/* - * Copyright (c) 2011, 2012, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * lustre/include/lustre_mdc.h - * - * MDS data structures. - * See also lustre_idl.h for wire formats of requests. - */ - -#ifndef _LUSTRE_MDC_H -#define _LUSTRE_MDC_H - -/** \defgroup mdc mdc - * - * @{ - */ - -#include <linux/fs.h> -#include <linux/dcache.h> -#include <lustre_intent.h> -#include <lustre_handles.h> -#include <linux/libcfs/libcfs.h> -#include <obd_class.h> -#include <lustre_lib.h> -#include <lustre_dlm.h> -#include <lustre_export.h> - -struct ptlrpc_client; -struct obd_export; -struct ptlrpc_request; -struct obd_device; - -/** - * Serializes in-flight MDT-modifying RPC requests to preserve idempotency. - * - * This mutex is used to implement execute-once semantics on the MDT. - * The MDT stores the last transaction ID and result for every client in - * its last_rcvd file. If the client doesn't get a reply, it can safely - * resend the request and the MDT will reconstruct the reply being aware - * that the request has already been executed. Without this lock, - * execution status of concurrent in-flight requests would be - * overwritten. - * - * This design limits the extent to which we can keep a full pipeline of - * in-flight requests from a single client. This limitation could be - * overcome by allowing multiple slots per client in the last_rcvd file. - */ -struct mdc_rpc_lock { - /** Lock protecting in-flight RPC concurrency. */ - struct mutex rpcl_mutex; - /** Intent associated with currently executing request. */ - struct lookup_intent *rpcl_it; - /** Used for MDS/RPC load testing purposes. */ - int rpcl_fakes; -}; - -#define MDC_FAKE_RPCL_IT ((void *)0x2c0012bfUL) - -static inline void mdc_init_rpc_lock(struct mdc_rpc_lock *lck) -{ - mutex_init(&lck->rpcl_mutex); - lck->rpcl_it = NULL; -} - -static inline void mdc_get_rpc_lock(struct mdc_rpc_lock *lck, - struct lookup_intent *it) -{ - if (it && (it->it_op == IT_GETATTR || it->it_op == IT_LOOKUP || - it->it_op == IT_LAYOUT || it->it_op == IT_READDIR)) - return; - - /* This would normally block until the existing request finishes. - * If fail_loc is set it will block until the regular request is - * done, then set rpcl_it to MDC_FAKE_RPCL_IT. Once that is set - * it will only be cleared when all fake requests are finished. - * Only when all fake requests are finished can normal requests - * be sent, to ensure they are recoverable again. - */ - again: - mutex_lock(&lck->rpcl_mutex); - - if (CFS_FAIL_CHECK_QUIET(OBD_FAIL_MDC_RPCS_SEM)) { - lck->rpcl_it = MDC_FAKE_RPCL_IT; - lck->rpcl_fakes++; - mutex_unlock(&lck->rpcl_mutex); - return; - } - - /* This will only happen when the CFS_FAIL_CHECK() was - * just turned off but there are still requests in progress. - * Wait until they finish. It doesn't need to be efficient - * in this extremely rare case, just have low overhead in - * the common case when it isn't true. - */ - while (unlikely(lck->rpcl_it == MDC_FAKE_RPCL_IT)) { - mutex_unlock(&lck->rpcl_mutex); - schedule_timeout(HZ / 4); - goto again; - } - - LASSERT(!lck->rpcl_it); - lck->rpcl_it = it; -} - -static inline void mdc_put_rpc_lock(struct mdc_rpc_lock *lck, - struct lookup_intent *it) -{ - if (it && (it->it_op == IT_GETATTR || it->it_op == IT_LOOKUP || - it->it_op == IT_LAYOUT || it->it_op == IT_READDIR)) - return; - - if (lck->rpcl_it == MDC_FAKE_RPCL_IT) { /* OBD_FAIL_MDC_RPCS_SEM */ - mutex_lock(&lck->rpcl_mutex); - - LASSERTF(lck->rpcl_fakes > 0, "%d\n", lck->rpcl_fakes); - lck->rpcl_fakes--; - - if (lck->rpcl_fakes == 0) - lck->rpcl_it = NULL; - - } else { - LASSERTF(it == lck->rpcl_it, "%p != %p\n", it, lck->rpcl_it); - lck->rpcl_it = NULL; - } - - mutex_unlock(&lck->rpcl_mutex); -} - -static inline void mdc_get_mod_rpc_slot(struct ptlrpc_request *req, - struct lookup_intent *it) -{ - struct client_obd *cli = &req->rq_import->imp_obd->u.cli; - u32 opc; - u16 tag; - - opc = lustre_msg_get_opc(req->rq_reqmsg); - tag = obd_get_mod_rpc_slot(cli, opc, it); - lustre_msg_set_tag(req->rq_reqmsg, tag); -} - -static inline void mdc_put_mod_rpc_slot(struct ptlrpc_request *req, - struct lookup_intent *it) -{ - struct client_obd *cli = &req->rq_import->imp_obd->u.cli; - u32 opc; - u16 tag; - - opc = lustre_msg_get_opc(req->rq_reqmsg); - tag = lustre_msg_get_tag(req->rq_reqmsg); - obd_put_mod_rpc_slot(cli, opc, it, tag); -} - -/** - * Update the maximum possible easize. - * - * This value is learned from ptlrpc replies sent by the MDT. The - * default easize is initialized to the minimum value but allowed - * to grow up to a single page in size if required to handle the - * common case. - * - * \see client_obd::cl_default_mds_easize - * - * \param[in] exp export for MDC device - * \param[in] body body of ptlrpc reply from MDT - * - */ -static inline void mdc_update_max_ea_from_body(struct obd_export *exp, - struct mdt_body *body) -{ - if (body->mbo_valid & OBD_MD_FLMODEASIZE) { - struct client_obd *cli = &exp->exp_obd->u.cli; - u32 def_easize; - - if (cli->cl_max_mds_easize < body->mbo_max_mdsize) - cli->cl_max_mds_easize = body->mbo_max_mdsize; - - def_easize = min_t(__u32, body->mbo_max_mdsize, - OBD_MAX_DEFAULT_EA_SIZE); - cli->cl_default_mds_easize = def_easize; - } -} - -/* mdc/mdc_locks.c */ -int it_open_error(int phase, struct lookup_intent *it); - -static inline bool cl_is_lov_delay_create(unsigned int flags) -{ - return (flags & O_LOV_DELAY_CREATE) == O_LOV_DELAY_CREATE; -} - -static inline void cl_lov_delay_create_clear(unsigned int *flags) -{ - if ((*flags & O_LOV_DELAY_CREATE) == O_LOV_DELAY_CREATE) - *flags &= ~O_LOV_DELAY_CREATE; -} - -/** @} mdc */ - -#endif diff --git a/drivers/staging/lustre/lustre/include/lustre_mds.h b/drivers/staging/lustre/lustre/include/lustre_mds.h deleted file mode 100644 index 6937546f1d46..000000000000 --- a/drivers/staging/lustre/lustre/include/lustre_mds.h +++ /dev/null @@ -1,63 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2011, 2012, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * lustre/include/lustre_mds.h - * - * MDS data structures. - * See also lustre_idl.h for wire formats of requests. - */ - -#ifndef _LUSTRE_MDS_H -#define _LUSTRE_MDS_H - -/** \defgroup mds mds - * - * @{ - */ - -#include <lustre_handles.h> -#include <linux/libcfs/libcfs.h> -#include <lustre_lib.h> -#include <lustre_dlm.h> -#include <lustre_export.h> - -struct mds_group_info { - struct obd_uuid *uuid; - int group; -}; - -#define MDD_OBD_NAME "mdd_obd" -#define MDD_OBD_UUID "mdd_obd_uuid" - -/** @} mds */ - -#endif diff --git a/drivers/staging/lustre/lustre/include/lustre_net.h b/drivers/staging/lustre/lustre/include/lustre_net.h deleted file mode 100644 index d35ae0cda8d2..000000000000 --- a/drivers/staging/lustre/lustre/include/lustre_net.h +++ /dev/null @@ -1,2359 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2010, 2015, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - */ -/** \defgroup PtlRPC Portal RPC and networking module. - * - * PortalRPC is the layer used by rest of lustre code to achieve network - * communications: establish connections with corresponding export and import - * states, listen for a service, send and receive RPCs. - * PortalRPC also includes base recovery framework: packet resending and - * replaying, reconnections, pinger. - * - * PortalRPC utilizes LNet as its transport layer. - * - * @{ - */ - -#ifndef _LUSTRE_NET_H -#define _LUSTRE_NET_H - -/** \defgroup net net - * - * @{ - */ - -#include <linux/uio.h> -#include <linux/libcfs/libcfs.h> -#include <uapi/linux/lnet/nidstr.h> -#include <linux/lnet/api.h> -#include <uapi/linux/lustre/lustre_idl.h> -#include <lustre_errno.h> -#include <lustre_ha.h> -#include <lustre_sec.h> -#include <lustre_import.h> -#include <lprocfs_status.h> -#include <lu_object.h> -#include <lustre_req_layout.h> - -#include <obd_support.h> -#include <uapi/linux/lustre/lustre_ver.h> - -/* MD flags we _always_ use */ -#define PTLRPC_MD_OPTIONS 0 - -/** - * log2 max # of bulk operations in one request: 2=4MB/RPC, 5=32MB/RPC, ... - * In order for the client and server to properly negotiate the maximum - * possible transfer size, PTLRPC_BULK_OPS_COUNT must be a power-of-two - * value. The client is free to limit the actual RPC size for any bulk - * transfer via cl_max_pages_per_rpc to some non-power-of-two value. - * NOTE: This is limited to 16 (=64GB RPCs) by IOOBJ_MAX_BRW_BITS. - */ -#define PTLRPC_BULK_OPS_BITS 4 -#if PTLRPC_BULK_OPS_BITS > 16 -#error "More than 65536 BRW RPCs not allowed by IOOBJ_MAX_BRW_BITS." -#endif -#define PTLRPC_BULK_OPS_COUNT (1U << PTLRPC_BULK_OPS_BITS) -/** - * PTLRPC_BULK_OPS_MASK is for the convenience of the client only, and - * should not be used on the server at all. Otherwise, it imposes a - * protocol limitation on the maximum RPC size that can be used by any - * RPC sent to that server in the future. Instead, the server should - * use the negotiated per-client ocd_brw_size to determine the bulk - * RPC count. - */ -#define PTLRPC_BULK_OPS_MASK (~((__u64)PTLRPC_BULK_OPS_COUNT - 1)) - -/** - * Define maxima for bulk I/O. - * - * A single PTLRPC BRW request is sent via up to PTLRPC_BULK_OPS_COUNT - * of LNET_MTU sized RDMA transfers. Clients and servers negotiate the - * currently supported maximum between peers at connect via ocd_brw_size. - */ -#define PTLRPC_MAX_BRW_BITS (LNET_MTU_BITS + PTLRPC_BULK_OPS_BITS) -#define PTLRPC_MAX_BRW_SIZE (1 << PTLRPC_MAX_BRW_BITS) -#define PTLRPC_MAX_BRW_PAGES (PTLRPC_MAX_BRW_SIZE >> PAGE_SHIFT) - -#define ONE_MB_BRW_SIZE (1 << LNET_MTU_BITS) -#define MD_MAX_BRW_SIZE (1 << LNET_MTU_BITS) -#define MD_MAX_BRW_PAGES (MD_MAX_BRW_SIZE >> PAGE_SHIFT) -#define DT_MAX_BRW_SIZE PTLRPC_MAX_BRW_SIZE -#define DT_MAX_BRW_PAGES (DT_MAX_BRW_SIZE >> PAGE_SHIFT) -#define OFD_MAX_BRW_SIZE (1 << LNET_MTU_BITS) - -/* When PAGE_SIZE is a constant, we can check our arithmetic here with cpp! */ -# if ((PTLRPC_MAX_BRW_PAGES & (PTLRPC_MAX_BRW_PAGES - 1)) != 0) -# error "PTLRPC_MAX_BRW_PAGES isn't a power of two" -# endif -# if (PTLRPC_MAX_BRW_SIZE != (PTLRPC_MAX_BRW_PAGES * PAGE_SIZE)) -# error "PTLRPC_MAX_BRW_SIZE isn't PTLRPC_MAX_BRW_PAGES * PAGE_SIZE" -# endif -# if (PTLRPC_MAX_BRW_SIZE > LNET_MTU * PTLRPC_BULK_OPS_COUNT) -# error "PTLRPC_MAX_BRW_SIZE too big" -# endif -# if (PTLRPC_MAX_BRW_PAGES > LNET_MAX_IOV * PTLRPC_BULK_OPS_COUNT) -# error "PTLRPC_MAX_BRW_PAGES too big" -# endif - -#define PTLRPC_NTHRS_INIT 2 - -/** - * Buffer Constants - * - * Constants determine how memory is used to buffer incoming service requests. - * - * ?_NBUFS # buffers to allocate when growing the pool - * ?_BUFSIZE # bytes in a single request buffer - * ?_MAXREQSIZE # maximum request service will receive - * - * When fewer than ?_NBUFS/2 buffers are posted for receive, another chunk - * of ?_NBUFS is added to the pool. - * - * Messages larger than ?_MAXREQSIZE are dropped. Request buffers are - * considered full when less than ?_MAXREQSIZE is left in them. - */ -/** - * Thread Constants - * - * Constants determine how threads are created for ptlrpc service. - * - * ?_NTHRS_INIT # threads to create for each service partition on - * initializing. If it's non-affinity service and - * there is only one partition, it's the overall # - * threads for the service while initializing. - * ?_NTHRS_BASE # threads should be created at least for each - * ptlrpc partition to keep the service healthy. - * It's the low-water mark of threads upper-limit - * for each partition. - * ?_THR_FACTOR # threads can be added on threads upper-limit for - * each CPU core. This factor is only for reference, - * we might decrease value of factor if number of cores - * per CPT is above a limit. - * ?_NTHRS_MAX # overall threads can be created for a service, - * it's a soft limit because if service is running - * on machine with hundreds of cores and tens of - * CPU partitions, we need to guarantee each partition - * has ?_NTHRS_BASE threads, which means total threads - * will be ?_NTHRS_BASE * number_of_cpts which can - * exceed ?_NTHRS_MAX. - * - * Examples - * - * #define MDS_NTHRS_INIT 2 - * #define MDS_NTHRS_BASE 64 - * #define MDS_NTHRS_FACTOR 8 - * #define MDS_NTHRS_MAX 1024 - * - * Example 1): - * --------------------------------------------------------------------- - * Server(A) has 16 cores, user configured it to 4 partitions so each - * partition has 4 cores, then actual number of service threads on each - * partition is: - * MDS_NTHRS_BASE(64) + cores(4) * MDS_NTHRS_FACTOR(8) = 96 - * - * Total number of threads for the service is: - * 96 * partitions(4) = 384 - * - * Example 2): - * --------------------------------------------------------------------- - * Server(B) has 32 cores, user configured it to 4 partitions so each - * partition has 8 cores, then actual number of service threads on each - * partition is: - * MDS_NTHRS_BASE(64) + cores(8) * MDS_NTHRS_FACTOR(8) = 128 - * - * Total number of threads for the service is: - * 128 * partitions(4) = 512 - * - * Example 3): - * --------------------------------------------------------------------- - * Server(B) has 96 cores, user configured it to 8 partitions so each - * partition has 12 cores, then actual number of service threads on each - * partition is: - * MDS_NTHRS_BASE(64) + cores(12) * MDS_NTHRS_FACTOR(8) = 160 - * - * Total number of threads for the service is: - * 160 * partitions(8) = 1280 - * - * However, it's above the soft limit MDS_NTHRS_MAX, so we choose this number - * as upper limit of threads number for each partition: - * MDS_NTHRS_MAX(1024) / partitions(8) = 128 - * - * Example 4): - * --------------------------------------------------------------------- - * Server(C) have a thousand of cores and user configured it to 32 partitions - * MDS_NTHRS_BASE(64) * 32 = 2048 - * - * which is already above soft limit MDS_NTHRS_MAX(1024), but we still need - * to guarantee that each partition has at least MDS_NTHRS_BASE(64) threads - * to keep service healthy, so total number of threads will just be 2048. - * - * NB: we don't suggest to choose server with that many cores because backend - * filesystem itself, buffer cache, or underlying network stack might - * have some SMP scalability issues at that large scale. - * - * If user already has a fat machine with hundreds or thousands of cores, - * there are two choices for configuration: - * a) create CPU table from subset of all CPUs and run Lustre on - * top of this subset - * b) bind service threads on a few partitions, see modparameters of - * MDS and OSS for details -* - * NB: these calculations (and examples below) are simplified to help - * understanding, the real implementation is a little more complex, - * please see ptlrpc_server_nthreads_check() for details. - * - */ - - /* - * LDLM threads constants: - * - * Given 8 as factor and 24 as base threads number - * - * example 1) - * On 4-core machine we will have 24 + 8 * 4 = 56 threads. - * - * example 2) - * On 8-core machine with 2 partitions we will have 24 + 4 * 8 = 56 - * threads for each partition and total threads number will be 112. - * - * example 3) - * On 64-core machine with 8 partitions we will need LDLM_NTHRS_BASE(24) - * threads for each partition to keep service healthy, so total threads - * number should be 24 * 8 = 192. - * - * So with these constants, threads number will be at the similar level - * of old versions, unless target machine has over a hundred cores - */ -#define LDLM_THR_FACTOR 8 -#define LDLM_NTHRS_INIT PTLRPC_NTHRS_INIT -#define LDLM_NTHRS_BASE 24 -#define LDLM_NTHRS_MAX (num_online_cpus() == 1 ? 64 : 128) - -#define LDLM_BL_THREADS LDLM_NTHRS_AUTO_INIT -#define LDLM_CLIENT_NBUFS 1 -#define LDLM_SERVER_NBUFS 64 -#define LDLM_BUFSIZE (8 * 1024) -#define LDLM_MAXREQSIZE (5 * 1024) -#define LDLM_MAXREPSIZE (1024) - -#define MDS_MAXREQSIZE (5 * 1024) /* >= 4736 */ - -/** - * FIEMAP request can be 4K+ for now - */ -#define OST_MAXREQSIZE (16 * 1024) - -/* Macro to hide a typecast. */ -#define ptlrpc_req_async_args(req) ((void *)&req->rq_async_args) - -struct ptlrpc_replay_async_args { - int praa_old_state; - int praa_old_status; -}; - -/** - * Structure to single define portal connection. - */ -struct ptlrpc_connection { - /** linkage for connections hash table */ - struct hlist_node c_hash; - /** Our own lnet nid for this connection */ - lnet_nid_t c_self; - /** Remote side nid for this connection */ - struct lnet_process_id c_peer; - /** UUID of the other side */ - struct obd_uuid c_remote_uuid; - /** reference counter for this connection */ - atomic_t c_refcount; -}; - -/** Client definition for PortalRPC */ -struct ptlrpc_client { - /** What lnet portal does this client send messages to by default */ - __u32 cli_request_portal; - /** What portal do we expect replies on */ - __u32 cli_reply_portal; - /** Name of the client */ - char *cli_name; -}; - -/** state flags of requests */ -/* XXX only ones left are those used by the bulk descs as well! */ -#define PTL_RPC_FL_INTR (1 << 0) /* reply wait was interrupted by user */ -#define PTL_RPC_FL_TIMEOUT (1 << 7) /* request timed out waiting for reply */ - -#define REQ_MAX_ACK_LOCKS 8 - -union ptlrpc_async_args { - /** - * Scratchpad for passing args to completion interpreter. Users - * cast to the struct of their choosing, and BUILD_BUG_ON oversized - * arguments. For _tons_ of context, kmalloc a struct and store - * a pointer to it here. The pointer_arg ensures this struct is at - * least big enough for that. - */ - void *pointer_arg[11]; - __u64 space[7]; -}; - -struct ptlrpc_request_set; -typedef int (*set_interpreter_func)(struct ptlrpc_request_set *, void *, int); -typedef int (*set_producer_func)(struct ptlrpc_request_set *, void *); - -/** - * Definition of request set structure. - * Request set is a list of requests (not necessary to the same target) that - * once populated with RPCs could be sent in parallel. - * There are two kinds of request sets. General purpose and with dedicated - * serving thread. Example of the latter is ptlrpcd set. - * For general purpose sets once request set started sending it is impossible - * to add new requests to such set. - * Provides a way to call "completion callbacks" when all requests in the set - * returned. - */ -struct ptlrpc_request_set { - atomic_t set_refcount; - /** number of in queue requests */ - atomic_t set_new_count; - /** number of uncompleted requests */ - atomic_t set_remaining; - /** wait queue to wait on for request events */ - wait_queue_head_t set_waitq; - wait_queue_head_t *set_wakeup_ptr; - /** List of requests in the set */ - struct list_head set_requests; - /** - * List of completion callbacks to be called when the set is completed - * This is only used if \a set_interpret is NULL. - * Links struct ptlrpc_set_cbdata. - */ - struct list_head set_cblist; - /** Completion callback, if only one. */ - set_interpreter_func set_interpret; - /** opaq argument passed to completion \a set_interpret callback. */ - void *set_arg; - /** - * Lock for \a set_new_requests manipulations - * locked so that any old caller can communicate requests to - * the set holder who can then fold them into the lock-free set - */ - spinlock_t set_new_req_lock; - /** List of new yet unsent requests. Only used with ptlrpcd now. */ - struct list_head set_new_requests; - - /** rq_status of requests that have been freed already */ - int set_rc; - /** Additional fields used by the flow control extension */ - /** Maximum number of RPCs in flight */ - int set_max_inflight; - /** Callback function used to generate RPCs */ - set_producer_func set_producer; - /** opaq argument passed to the producer callback */ - void *set_producer_arg; -}; - -/** - * Description of a single ptrlrpc_set callback - */ -struct ptlrpc_set_cbdata { - /** List linkage item */ - struct list_head psc_item; - /** Pointer to interpreting function */ - set_interpreter_func psc_interpret; - /** Opaq argument to pass to the callback */ - void *psc_data; -}; - -struct ptlrpc_bulk_desc; -struct ptlrpc_service_part; -struct ptlrpc_service; - -/** - * ptlrpc callback & work item stuff - */ -struct ptlrpc_cb_id { - void (*cbid_fn)(struct lnet_event *ev); /* specific callback fn */ - void *cbid_arg; /* additional arg */ -}; - -/** Maximum number of locks to fit into reply state */ -#define RS_MAX_LOCKS 8 -#define RS_DEBUG 0 - -/** - * Structure to define reply state on the server - * Reply state holds various reply message information. Also for "difficult" - * replies (rep-ack case) we store the state after sending reply and wait - * for the client to acknowledge the reception. In these cases locks could be - * added to the state for replay/failover consistency guarantees. - */ -struct ptlrpc_reply_state { - /** Callback description */ - struct ptlrpc_cb_id rs_cb_id; - /** Linkage for list of all reply states in a system */ - struct list_head rs_list; - /** Linkage for list of all reply states on same export */ - struct list_head rs_exp_list; - /** Linkage for list of all reply states for same obd */ - struct list_head rs_obd_list; -#if RS_DEBUG - struct list_head rs_debug_list; -#endif - /** A spinlock to protect the reply state flags */ - spinlock_t rs_lock; - /** Reply state flags */ - unsigned long rs_difficult:1; /* ACK/commit stuff */ - unsigned long rs_no_ack:1; /* no ACK, even for - * difficult requests - */ - unsigned long rs_scheduled:1; /* being handled? */ - unsigned long rs_scheduled_ever:1;/* any schedule attempts? */ - unsigned long rs_handled:1; /* been handled yet? */ - unsigned long rs_on_net:1; /* reply_out_callback pending? */ - unsigned long rs_prealloc:1; /* rs from prealloc list */ - unsigned long rs_committed:1;/* the transaction was committed - * and the rs was dispatched - */ - atomic_t rs_refcount; /* number of users */ - /** Number of locks awaiting client ACK */ - int rs_nlocks; - - /** Size of the state */ - int rs_size; - /** opcode */ - __u32 rs_opc; - /** Transaction number */ - __u64 rs_transno; - /** xid */ - __u64 rs_xid; - struct obd_export *rs_export; - struct ptlrpc_service_part *rs_svcpt; - /** Lnet metadata handle for the reply */ - struct lnet_handle_md rs_md_h; - - /** Context for the service thread */ - struct ptlrpc_svc_ctx *rs_svc_ctx; - /** Reply buffer (actually sent to the client), encoded if needed */ - struct lustre_msg *rs_repbuf; /* wrapper */ - /** Size of the reply buffer */ - int rs_repbuf_len; /* wrapper buf length */ - /** Size of the reply message */ - int rs_repdata_len; /* wrapper msg length */ - /** - * Actual reply message. Its content is encrypted (if needed) to - * produce reply buffer for actual sending. In simple case - * of no network encryption we just set \a rs_repbuf to \a rs_msg - */ - struct lustre_msg *rs_msg; /* reply message */ - - /** Handles of locks awaiting client reply ACK */ - struct lustre_handle rs_locks[RS_MAX_LOCKS]; - /** Lock modes of locks in \a rs_locks */ - enum ldlm_mode rs_modes[RS_MAX_LOCKS]; -}; - -struct ptlrpc_thread; - -/** RPC stages */ -enum rq_phase { - RQ_PHASE_NEW = 0xebc0de00, - RQ_PHASE_RPC = 0xebc0de01, - RQ_PHASE_BULK = 0xebc0de02, - RQ_PHASE_INTERPRET = 0xebc0de03, - RQ_PHASE_COMPLETE = 0xebc0de04, - RQ_PHASE_UNREG_RPC = 0xebc0de05, - RQ_PHASE_UNREG_BULK = 0xebc0de06, - RQ_PHASE_UNDEFINED = 0xebc0de07 -}; - -/** Type of request interpreter call-back */ -typedef int (*ptlrpc_interpterer_t)(const struct lu_env *env, - struct ptlrpc_request *req, - void *arg, int rc); - -/** - * Definition of request pool structure. - * The pool is used to store empty preallocated requests for the case - * when we would actually need to send something without performing - * any allocations (to avoid e.g. OOM). - */ -struct ptlrpc_request_pool { - /** Locks the list */ - spinlock_t prp_lock; - /** list of ptlrpc_request structs */ - struct list_head prp_req_list; - /** Maximum message size that would fit into a request from this pool */ - int prp_rq_size; - /** Function to allocate more requests for this pool */ - int (*prp_populate)(struct ptlrpc_request_pool *, int); -}; - -struct lu_context; -struct lu_env; - -struct ldlm_lock; - -#include <lustre_nrs.h> - -/** - * Basic request prioritization operations structure. - * The whole idea is centered around locks and RPCs that might affect locks. - * When a lock is contended we try to give priority to RPCs that might lead - * to fastest release of that lock. - * Currently only implemented for OSTs only in a way that makes all - * IO and truncate RPCs that are coming from a locked region where a lock is - * contended a priority over other requests. - */ -struct ptlrpc_hpreq_ops { - /** - * Check if the lock handle of the given lock is the same as - * taken from the request. - */ - int (*hpreq_lock_match)(struct ptlrpc_request *, struct ldlm_lock *); - /** - * Check if the request is a high priority one. - */ - int (*hpreq_check)(struct ptlrpc_request *); - /** - * Called after the request has been handled. - */ - void (*hpreq_fini)(struct ptlrpc_request *); -}; - -struct ptlrpc_cli_req { - /** For bulk requests on client only: bulk descriptor */ - struct ptlrpc_bulk_desc *cr_bulk; - /** optional time limit for send attempts */ - long cr_delay_limit; - /** time request was first queued */ - time_t cr_queued_time; - /** request sent timeval */ - struct timespec64 cr_sent_tv; - /** time for request really sent out */ - time64_t cr_sent_out; - /** when req reply unlink must finish. */ - time64_t cr_reply_deadline; - /** when req bulk unlink must finish. */ - time64_t cr_bulk_deadline; - /** when req unlink must finish. */ - time64_t cr_req_deadline; - /** Portal to which this request would be sent */ - short cr_req_ptl; - /** Portal where to wait for reply and where reply would be sent */ - short cr_rep_ptl; - /** request resending number */ - unsigned int cr_resend_nr; - /** What was import generation when this request was sent */ - int cr_imp_gen; - enum lustre_imp_state cr_send_state; - /** Per-request waitq introduced by bug 21938 for recovery waiting */ - wait_queue_head_t cr_set_waitq; - /** Link item for request set lists */ - struct list_head cr_set_chain; - /** link to waited ctx */ - struct list_head cr_ctx_chain; - - /** client's half ctx */ - struct ptlrpc_cli_ctx *cr_cli_ctx; - /** Link back to the request set */ - struct ptlrpc_request_set *cr_set; - /** outgoing request MD handle */ - struct lnet_handle_md cr_req_md_h; - /** request-out callback parameter */ - struct ptlrpc_cb_id cr_req_cbid; - /** incoming reply MD handle */ - struct lnet_handle_md cr_reply_md_h; - wait_queue_head_t cr_reply_waitq; - /** reply callback parameter */ - struct ptlrpc_cb_id cr_reply_cbid; - /** Async completion handler, called when reply is received */ - ptlrpc_interpterer_t cr_reply_interp; - /** Async completion context */ - union ptlrpc_async_args cr_async_args; - /** Opaq data for replay and commit callbacks. */ - void *cr_cb_data; - /** Link to the imp->imp_unreplied_list */ - struct list_head cr_unreplied_list; - /** - * Commit callback, called when request is committed and about to be - * freed. - */ - void (*cr_commit_cb)(struct ptlrpc_request *); - /** Replay callback, called after request is replayed at recovery */ - void (*cr_replay_cb)(struct ptlrpc_request *); -}; - -/** client request member alias */ -/* NB: these alias should NOT be used by any new code, instead they should - * be removed step by step to avoid potential abuse - */ -#define rq_bulk rq_cli.cr_bulk -#define rq_delay_limit rq_cli.cr_delay_limit -#define rq_queued_time rq_cli.cr_queued_time -#define rq_sent_tv rq_cli.cr_sent_tv -#define rq_real_sent rq_cli.cr_sent_out -#define rq_reply_deadline rq_cli.cr_reply_deadline -#define rq_bulk_deadline rq_cli.cr_bulk_deadline -#define rq_req_deadline rq_cli.cr_req_deadline -#define rq_nr_resend rq_cli.cr_resend_nr -#define rq_request_portal rq_cli.cr_req_ptl -#define rq_reply_portal rq_cli.cr_rep_ptl -#define rq_import_generation rq_cli.cr_imp_gen -#define rq_send_state rq_cli.cr_send_state -#define rq_set_chain rq_cli.cr_set_chain -#define rq_ctx_chain rq_cli.cr_ctx_chain -#define rq_set rq_cli.cr_set -#define rq_set_waitq rq_cli.cr_set_waitq -#define rq_cli_ctx rq_cli.cr_cli_ctx -#define rq_req_md_h rq_cli.cr_req_md_h -#define rq_req_cbid rq_cli.cr_req_cbid -#define rq_reply_md_h rq_cli.cr_reply_md_h -#define rq_reply_waitq rq_cli.cr_reply_waitq -#define rq_reply_cbid rq_cli.cr_reply_cbid -#define rq_interpret_reply rq_cli.cr_reply_interp -#define rq_async_args rq_cli.cr_async_args -#define rq_cb_data rq_cli.cr_cb_data -#define rq_unreplied_list rq_cli.cr_unreplied_list -#define rq_commit_cb rq_cli.cr_commit_cb -#define rq_replay_cb rq_cli.cr_replay_cb - -struct ptlrpc_srv_req { - /** initial thread servicing this request */ - struct ptlrpc_thread *sr_svc_thread; - /** - * Server side list of incoming unserved requests sorted by arrival - * time. Traversed from time to time to notice about to expire - * requests and sent back "early replies" to clients to let them - * know server is alive and well, just very busy to service their - * requests in time - */ - struct list_head sr_timed_list; - /** server-side per-export list */ - struct list_head sr_exp_list; - /** server-side history, used for debuging purposes. */ - struct list_head sr_hist_list; - /** history sequence # */ - __u64 sr_hist_seq; - /** the index of service's srv_at_array into which request is linked */ - time64_t sr_at_index; - /** authed uid */ - uid_t sr_auth_uid; - /** authed uid mapped to */ - uid_t sr_auth_mapped_uid; - /** RPC is generated from what part of Lustre */ - enum lustre_sec_part sr_sp_from; - /** request session context */ - struct lu_context sr_ses; - /** \addtogroup nrs - * @{ - */ - /** stub for NRS request */ - struct ptlrpc_nrs_request sr_nrq; - /** @} nrs */ - /** request arrival time */ - struct timespec64 sr_arrival_time; - /** server's half ctx */ - struct ptlrpc_svc_ctx *sr_svc_ctx; - /** (server side), pointed directly into req buffer */ - struct ptlrpc_user_desc *sr_user_desc; - /** separated reply state */ - struct ptlrpc_reply_state *sr_reply_state; - /** server-side hp handlers */ - struct ptlrpc_hpreq_ops *sr_ops; - /** incoming request buffer */ - struct ptlrpc_request_buffer_desc *sr_rqbd; -}; - -/** server request member alias */ -/* NB: these alias should NOT be used by any new code, instead they should - * be removed step by step to avoid potential abuse - */ -#define rq_svc_thread rq_srv.sr_svc_thread -#define rq_timed_list rq_srv.sr_timed_list -#define rq_exp_list rq_srv.sr_exp_list -#define rq_history_list rq_srv.sr_hist_list -#define rq_history_seq rq_srv.sr_hist_seq -#define rq_at_index rq_srv.sr_at_index -#define rq_auth_uid rq_srv.sr_auth_uid -#define rq_auth_mapped_uid rq_srv.sr_auth_mapped_uid -#define rq_sp_from rq_srv.sr_sp_from -#define rq_session rq_srv.sr_ses -#define rq_nrq rq_srv.sr_nrq -#define rq_arrival_time rq_srv.sr_arrival_time -#define rq_reply_state rq_srv.sr_reply_state -#define rq_svc_ctx rq_srv.sr_svc_ctx -#define rq_user_desc rq_srv.sr_user_desc -#define rq_ops rq_srv.sr_ops -#define rq_rqbd rq_srv.sr_rqbd - -/** - * Represents remote procedure call. - * - * This is a staple structure used by everybody wanting to send a request - * in Lustre. - */ -struct ptlrpc_request { - /* Request type: one of PTL_RPC_MSG_* */ - int rq_type; - /** Result of request processing */ - int rq_status; - /** - * Linkage item through which this request is included into - * sending/delayed lists on client and into rqbd list on server - */ - struct list_head rq_list; - /** Lock to protect request flags and some other important bits, like - * rq_list - */ - spinlock_t rq_lock; - /** client-side flags are serialized by rq_lock @{ */ - unsigned int rq_intr:1, rq_replied:1, rq_err:1, - rq_timedout:1, rq_resend:1, rq_restart:1, - /** - * when ->rq_replay is set, request is kept by the client even - * after server commits corresponding transaction. This is - * used for operations that require sequence of multiple - * requests to be replayed. The only example currently is file - * open/close. When last request in such a sequence is - * committed, ->rq_replay is cleared on all requests in the - * sequence. - */ - rq_replay:1, - rq_no_resend:1, rq_waiting:1, rq_receiving_reply:1, - rq_no_delay:1, rq_net_err:1, rq_wait_ctx:1, - rq_early:1, - rq_req_unlinked:1, /* unlinked request buffer from lnet */ - rq_reply_unlinked:1, /* unlinked reply buffer from lnet */ - rq_memalloc:1, /* req originated from "kswapd" */ - rq_committed:1, - rq_reply_truncated:1, - /** whether the "rq_set" is a valid one */ - rq_invalid_rqset:1, - rq_generation_set:1, - /** do not resend request on -EINPROGRESS */ - rq_no_retry_einprogress:1, - /* allow the req to be sent if the import is in recovery - * status - */ - rq_allow_replay:1, - /* bulk request, sent to server, but uncommitted */ - rq_unstable:1; - /** @} */ - - /** server-side flags @{ */ - unsigned int - rq_hp:1, /**< high priority RPC */ - rq_at_linked:1, /**< link into service's srv_at_array */ - rq_packed_final:1; /**< packed final reply */ - /** @} */ - - /** one of RQ_PHASE_* */ - enum rq_phase rq_phase; - /** one of RQ_PHASE_* to be used next */ - enum rq_phase rq_next_phase; - /** - * client-side refcount for SENT race, server-side refcount - * for multiple replies - */ - atomic_t rq_refcount; - /** - * client-side: - * !rq_truncate : # reply bytes actually received, - * rq_truncate : required repbuf_len for resend - */ - int rq_nob_received; - /** Request length */ - int rq_reqlen; - /** Reply length */ - int rq_replen; - /** Pool if request is from preallocated list */ - struct ptlrpc_request_pool *rq_pool; - /** Request message - what client sent */ - struct lustre_msg *rq_reqmsg; - /** Reply message - server response */ - struct lustre_msg *rq_repmsg; - /** Transaction number */ - __u64 rq_transno; - /** xid */ - __u64 rq_xid; - /** bulk match bits */ - u64 rq_mbits; - /** - * List item to for replay list. Not yet committed requests get linked - * there. - * Also see \a rq_replay comment above. - * It's also link chain on obd_export::exp_req_replay_queue - */ - struct list_head rq_replay_list; - /** non-shared members for client & server request*/ - union { - struct ptlrpc_cli_req rq_cli; - struct ptlrpc_srv_req rq_srv; - }; - /** - * security and encryption data - * @{ - */ - /** description of flavors for client & server */ - struct sptlrpc_flavor rq_flvr; - - /* client/server security flags */ - unsigned int - rq_ctx_init:1, /* context initiation */ - rq_ctx_fini:1, /* context destroy */ - rq_bulk_read:1, /* request bulk read */ - rq_bulk_write:1, /* request bulk write */ - /* server authentication flags */ - rq_auth_gss:1, /* authenticated by gss */ - rq_auth_usr_root:1, /* authed as root */ - rq_auth_usr_mdt:1, /* authed as mdt */ - rq_auth_usr_ost:1, /* authed as ost */ - /* security tfm flags */ - rq_pack_udesc:1, - rq_pack_bulk:1, - /* doesn't expect reply FIXME */ - rq_no_reply:1, - rq_pill_init:1, /* pill initialized */ - rq_srv_req:1; /* server request */ - - /** various buffer pointers */ - struct lustre_msg *rq_reqbuf; /**< req wrapper */ - char *rq_repbuf; /**< rep buffer */ - struct lustre_msg *rq_repdata; /**< rep wrapper msg */ - /** only in priv mode */ - struct lustre_msg *rq_clrbuf; - int rq_reqbuf_len; /* req wrapper buf len */ - int rq_reqdata_len; /* req wrapper msg len */ - int rq_repbuf_len; /* rep buffer len */ - int rq_repdata_len; /* rep wrapper msg len */ - int rq_clrbuf_len; /* only in priv mode */ - int rq_clrdata_len; /* only in priv mode */ - - /** early replies go to offset 0, regular replies go after that */ - unsigned int rq_reply_off; - - /** @} */ - - /** Fields that help to see if request and reply were swabbed or not */ - __u32 rq_req_swab_mask; - __u32 rq_rep_swab_mask; - - /** how many early replies (for stats) */ - int rq_early_count; - - /** Server-side, export on which request was received */ - struct obd_export *rq_export; - /** import where request is being sent */ - struct obd_import *rq_import; - /** our LNet NID */ - lnet_nid_t rq_self; - /** Peer description (the other side) */ - struct lnet_process_id rq_peer; - /** - * service time estimate (secs) - * If the request is not served by this time, it is marked as timed out. - */ - int rq_timeout; - /** - * when request/reply sent (secs), or time when request should be sent - */ - time64_t rq_sent; - /** when request must finish. */ - time64_t rq_deadline; - /** request format description */ - struct req_capsule rq_pill; -}; - -/** - * Call completion handler for rpc if any, return it's status or original - * rc if there was no handler defined for this request. - */ -static inline int ptlrpc_req_interpret(const struct lu_env *env, - struct ptlrpc_request *req, int rc) -{ - if (req->rq_interpret_reply) { - req->rq_status = req->rq_interpret_reply(env, req, - &req->rq_async_args, - rc); - return req->rq_status; - } - return rc; -} - -/* - * Can the request be moved from the regular NRS head to the high-priority NRS - * head (of the same PTLRPC service partition), if any? - * - * For a reliable result, this should be checked under svcpt->scp_req lock. - */ -static inline bool ptlrpc_nrs_req_can_move(struct ptlrpc_request *req) -{ - struct ptlrpc_nrs_request *nrq = &req->rq_nrq; - - /** - * LU-898: Check ptlrpc_nrs_request::nr_enqueued to make sure the - * request has been enqueued first, and ptlrpc_nrs_request::nr_started - * to make sure it has not been scheduled yet (analogous to previous - * (non-NRS) checking of !list_empty(&ptlrpc_request::rq_list). - */ - return nrq->nr_enqueued && !nrq->nr_started && !req->rq_hp; -} - -/** @} nrs */ - -/** - * Returns 1 if request buffer at offset \a index was already swabbed - */ -static inline int lustre_req_swabbed(struct ptlrpc_request *req, size_t index) -{ - LASSERT(index < sizeof(req->rq_req_swab_mask) * 8); - return req->rq_req_swab_mask & (1 << index); -} - -/** - * Returns 1 if request reply buffer at offset \a index was already swabbed - */ -static inline int lustre_rep_swabbed(struct ptlrpc_request *req, size_t index) -{ - LASSERT(index < sizeof(req->rq_rep_swab_mask) * 8); - return req->rq_rep_swab_mask & (1 << index); -} - -/** - * Returns 1 if request needs to be swabbed into local cpu byteorder - */ -static inline int ptlrpc_req_need_swab(struct ptlrpc_request *req) -{ - return lustre_req_swabbed(req, MSG_PTLRPC_HEADER_OFF); -} - -/** - * Returns 1 if request reply needs to be swabbed into local cpu byteorder - */ -static inline int ptlrpc_rep_need_swab(struct ptlrpc_request *req) -{ - return lustre_rep_swabbed(req, MSG_PTLRPC_HEADER_OFF); -} - -/** - * Mark request buffer at offset \a index that it was already swabbed - */ -static inline void lustre_set_req_swabbed(struct ptlrpc_request *req, - size_t index) -{ - LASSERT(index < sizeof(req->rq_req_swab_mask) * 8); - LASSERT((req->rq_req_swab_mask & (1 << index)) == 0); - req->rq_req_swab_mask |= 1 << index; -} - -/** - * Mark request reply buffer at offset \a index that it was already swabbed - */ -static inline void lustre_set_rep_swabbed(struct ptlrpc_request *req, - size_t index) -{ - LASSERT(index < sizeof(req->rq_rep_swab_mask) * 8); - LASSERT((req->rq_rep_swab_mask & (1 << index)) == 0); - req->rq_rep_swab_mask |= 1 << index; -} - -/** - * Convert numerical request phase value \a phase into text string description - */ -static inline const char * -ptlrpc_phase2str(enum rq_phase phase) -{ - switch (phase) { - case RQ_PHASE_NEW: - return "New"; - case RQ_PHASE_RPC: - return "Rpc"; - case RQ_PHASE_BULK: - return "Bulk"; - case RQ_PHASE_INTERPRET: - return "Interpret"; - case RQ_PHASE_COMPLETE: - return "Complete"; - case RQ_PHASE_UNREG_RPC: - return "UnregRPC"; - case RQ_PHASE_UNREG_BULK: - return "UnregBULK"; - default: - return "?Phase?"; - } -} - -/** - * Convert numerical request phase of the request \a req into text stringi - * description - */ -static inline const char * -ptlrpc_rqphase2str(struct ptlrpc_request *req) -{ - return ptlrpc_phase2str(req->rq_phase); -} - -/** - * Debugging functions and helpers to print request structure into debug log - * @{ - */ -/* Spare the preprocessor, spoil the bugs. */ -#define FLAG(field, str) (field ? str : "") - -/** Convert bit flags into a string */ -#define DEBUG_REQ_FLAGS(req) \ - ptlrpc_rqphase2str(req), \ - FLAG(req->rq_intr, "I"), FLAG(req->rq_replied, "R"), \ - FLAG(req->rq_err, "E"), FLAG(req->rq_net_err, "e"), \ - FLAG(req->rq_timedout, "X") /* eXpired */, FLAG(req->rq_resend, "S"), \ - FLAG(req->rq_restart, "T"), FLAG(req->rq_replay, "P"), \ - FLAG(req->rq_no_resend, "N"), \ - FLAG(req->rq_waiting, "W"), \ - FLAG(req->rq_wait_ctx, "C"), FLAG(req->rq_hp, "H"), \ - FLAG(req->rq_committed, "M") - -#define REQ_FLAGS_FMT "%s:%s%s%s%s%s%s%s%s%s%s%s%s%s" - -void _debug_req(struct ptlrpc_request *req, - struct libcfs_debug_msg_data *data, const char *fmt, ...) - __printf(3, 4); - -/** - * Helper that decides if we need to print request according to current debug - * level settings - */ -#define debug_req(msgdata, mask, cdls, req, fmt, a...) \ -do { \ - CFS_CHECK_STACK(msgdata, mask, cdls); \ - \ - if (((mask) & D_CANTMASK) != 0 || \ - ((libcfs_debug & (mask)) != 0 && \ - (libcfs_subsystem_debug & DEBUG_SUBSYSTEM) != 0)) \ - _debug_req((req), msgdata, fmt, ##a); \ -} while (0) - -/** - * This is the debug print function you need to use to print request structure - * content into lustre debug log. - * for most callers (level is a constant) this is resolved at compile time - */ -#define DEBUG_REQ(level, req, fmt, args...) \ -do { \ - if ((level) & (D_ERROR | D_WARNING)) { \ - static struct cfs_debug_limit_state cdls; \ - LIBCFS_DEBUG_MSG_DATA_DECL(msgdata, level, &cdls); \ - debug_req(&msgdata, level, &cdls, req, "@@@ "fmt" ", ## args);\ - } else { \ - LIBCFS_DEBUG_MSG_DATA_DECL(msgdata, level, NULL); \ - debug_req(&msgdata, level, NULL, req, "@@@ "fmt" ", ## args); \ - } \ -} while (0) -/** @} */ - -/** - * Structure that defines a single page of a bulk transfer - */ -struct ptlrpc_bulk_page { - /** Linkage to list of pages in a bulk */ - struct list_head bp_link; - /** - * Number of bytes in a page to transfer starting from \a bp_pageoffset - */ - int bp_buflen; - /** offset within a page */ - int bp_pageoffset; - /** The page itself */ - struct page *bp_page; -}; - -enum ptlrpc_bulk_op_type { - PTLRPC_BULK_OP_ACTIVE = 0x00000001, - PTLRPC_BULK_OP_PASSIVE = 0x00000002, - PTLRPC_BULK_OP_PUT = 0x00000004, - PTLRPC_BULK_OP_GET = 0x00000008, - PTLRPC_BULK_BUF_KVEC = 0x00000010, - PTLRPC_BULK_BUF_KIOV = 0x00000020, - PTLRPC_BULK_GET_SOURCE = PTLRPC_BULK_OP_PASSIVE | PTLRPC_BULK_OP_GET, - PTLRPC_BULK_PUT_SINK = PTLRPC_BULK_OP_PASSIVE | PTLRPC_BULK_OP_PUT, - PTLRPC_BULK_GET_SINK = PTLRPC_BULK_OP_ACTIVE | PTLRPC_BULK_OP_GET, - PTLRPC_BULK_PUT_SOURCE = PTLRPC_BULK_OP_ACTIVE | PTLRPC_BULK_OP_PUT, -}; - -static inline bool ptlrpc_is_bulk_op_get(enum ptlrpc_bulk_op_type type) -{ - return (type & PTLRPC_BULK_OP_GET) == PTLRPC_BULK_OP_GET; -} - -static inline bool ptlrpc_is_bulk_get_source(enum ptlrpc_bulk_op_type type) -{ - return (type & PTLRPC_BULK_GET_SOURCE) == PTLRPC_BULK_GET_SOURCE; -} - -static inline bool ptlrpc_is_bulk_put_sink(enum ptlrpc_bulk_op_type type) -{ - return (type & PTLRPC_BULK_PUT_SINK) == PTLRPC_BULK_PUT_SINK; -} - -static inline bool ptlrpc_is_bulk_get_sink(enum ptlrpc_bulk_op_type type) -{ - return (type & PTLRPC_BULK_GET_SINK) == PTLRPC_BULK_GET_SINK; -} - -static inline bool ptlrpc_is_bulk_put_source(enum ptlrpc_bulk_op_type type) -{ - return (type & PTLRPC_BULK_PUT_SOURCE) == PTLRPC_BULK_PUT_SOURCE; -} - -static inline bool ptlrpc_is_bulk_desc_kvec(enum ptlrpc_bulk_op_type type) -{ - return ((type & PTLRPC_BULK_BUF_KVEC) | (type & PTLRPC_BULK_BUF_KIOV)) - == PTLRPC_BULK_BUF_KVEC; -} - -static inline bool ptlrpc_is_bulk_desc_kiov(enum ptlrpc_bulk_op_type type) -{ - return ((type & PTLRPC_BULK_BUF_KVEC) | (type & PTLRPC_BULK_BUF_KIOV)) - == PTLRPC_BULK_BUF_KIOV; -} - -static inline bool ptlrpc_is_bulk_op_active(enum ptlrpc_bulk_op_type type) -{ - return ((type & PTLRPC_BULK_OP_ACTIVE) | - (type & PTLRPC_BULK_OP_PASSIVE)) == PTLRPC_BULK_OP_ACTIVE; -} - -static inline bool ptlrpc_is_bulk_op_passive(enum ptlrpc_bulk_op_type type) -{ - return ((type & PTLRPC_BULK_OP_ACTIVE) | - (type & PTLRPC_BULK_OP_PASSIVE)) == PTLRPC_BULK_OP_PASSIVE; -} - -struct ptlrpc_bulk_frag_ops { - /** - * Add a page \a page to the bulk descriptor \a desc - * Data to transfer in the page starts at offset \a pageoffset and - * amount of data to transfer from the page is \a len - */ - void (*add_kiov_frag)(struct ptlrpc_bulk_desc *desc, - struct page *page, int pageoffset, int len); - - /* - * Add a \a fragment to the bulk descriptor \a desc. - * Data to transfer in the fragment is pointed to by \a frag - * The size of the fragment is \a len - */ - int (*add_iov_frag)(struct ptlrpc_bulk_desc *desc, void *frag, int len); - - /** - * Uninitialize and free bulk descriptor \a desc. - * Works on bulk descriptors both from server and client side. - */ - void (*release_frags)(struct ptlrpc_bulk_desc *desc); -}; - -extern const struct ptlrpc_bulk_frag_ops ptlrpc_bulk_kiov_pin_ops; -extern const struct ptlrpc_bulk_frag_ops ptlrpc_bulk_kiov_nopin_ops; - -/** - * Definition of bulk descriptor. - * Bulks are special "Two phase" RPCs where initial request message - * is sent first and it is followed bt a transfer (o receiving) of a large - * amount of data to be settled into pages referenced from the bulk descriptors. - * Bulks transfers (the actual data following the small requests) are done - * on separate LNet portals. - * In lustre we use bulk transfers for READ and WRITE transfers from/to OSTs. - * Another user is readpage for MDT. - */ -struct ptlrpc_bulk_desc { - /** completed with failure */ - unsigned long bd_failure:1; - /** client side */ - unsigned long bd_registered:1; - /** For serialization with callback */ - spinlock_t bd_lock; - /** Import generation when request for this bulk was sent */ - int bd_import_generation; - /** {put,get}{source,sink}{kvec,kiov} */ - enum ptlrpc_bulk_op_type bd_type; - /** LNet portal for this bulk */ - __u32 bd_portal; - /** Server side - export this bulk created for */ - struct obd_export *bd_export; - /** Client side - import this bulk was sent on */ - struct obd_import *bd_import; - /** Back pointer to the request */ - struct ptlrpc_request *bd_req; - struct ptlrpc_bulk_frag_ops *bd_frag_ops; - wait_queue_head_t bd_waitq; /* server side only WQ */ - int bd_iov_count; /* # entries in bd_iov */ - int bd_max_iov; /* allocated size of bd_iov */ - int bd_nob; /* # bytes covered */ - int bd_nob_transferred; /* # bytes GOT/PUT */ - - u64 bd_last_mbits; - - struct ptlrpc_cb_id bd_cbid; /* network callback info */ - lnet_nid_t bd_sender; /* stash event::sender */ - int bd_md_count; /* # valid entries in bd_mds */ - int bd_md_max_brw; /* max entries in bd_mds */ - /** array of associated MDs */ - struct lnet_handle_md bd_mds[PTLRPC_BULK_OPS_COUNT]; - - union { - struct { - /* - * encrypt iov, size is either 0 or bd_iov_count. - */ - struct bio_vec *bd_enc_vec; - struct bio_vec *bd_vec; /* Array of bio_vecs */ - } bd_kiov; - - struct { - struct kvec *bd_enc_kvec; - struct kvec *bd_kvec; /* Array of kvecs */ - } bd_kvec; - } bd_u; -}; - -#define GET_KIOV(desc) ((desc)->bd_u.bd_kiov.bd_vec) -#define BD_GET_KIOV(desc, i) ((desc)->bd_u.bd_kiov.bd_vec[i]) -#define GET_ENC_KIOV(desc) ((desc)->bd_u.bd_kiov.bd_enc_vec) -#define BD_GET_ENC_KIOV(desc, i) ((desc)->bd_u.bd_kiov.bd_enc_vec[i]) -#define GET_KVEC(desc) ((desc)->bd_u.bd_kvec.bd_kvec) -#define BD_GET_KVEC(desc, i) ((desc)->bd_u.bd_kvec.bd_kvec[i]) -#define GET_ENC_KVEC(desc) ((desc)->bd_u.bd_kvec.bd_enc_kvec) -#define BD_GET_ENC_KVEC(desc, i) ((desc)->bd_u.bd_kvec.bd_enc_kvec[i]) - -enum { - SVC_STOPPED = 1 << 0, - SVC_STOPPING = 1 << 1, - SVC_STARTING = 1 << 2, - SVC_RUNNING = 1 << 3, -}; - -#define PTLRPC_THR_NAME_LEN 32 -/** - * Definition of server service thread structure - */ -struct ptlrpc_thread { - /** - * List of active threads in svc->srv_threads - */ - struct list_head t_link; - /** - * thread-private data (preallocated memory) - */ - void *t_data; - __u32 t_flags; - /** - * service thread index, from ptlrpc_start_threads - */ - unsigned int t_id; - /** - * service thread pid - */ - pid_t t_pid; - /** - * put watchdog in the structure per thread b=14840 - * - * Lustre watchdog is removed for client in the hope - * of a generic watchdog can be merged in kernel. - * When that happens, we should add below back. - * - * struct lc_watchdog *t_watchdog; - */ - /** - * the svc this thread belonged to b=18582 - */ - struct ptlrpc_service_part *t_svcpt; - wait_queue_head_t t_ctl_waitq; - struct lu_env *t_env; - char t_name[PTLRPC_THR_NAME_LEN]; -}; - -static inline int thread_is_stopped(struct ptlrpc_thread *thread) -{ - return !!(thread->t_flags & SVC_STOPPED); -} - -static inline int thread_is_stopping(struct ptlrpc_thread *thread) -{ - return !!(thread->t_flags & SVC_STOPPING); -} - -static inline int thread_is_starting(struct ptlrpc_thread *thread) -{ - return !!(thread->t_flags & SVC_STARTING); -} - -static inline int thread_is_running(struct ptlrpc_thread *thread) -{ - return !!(thread->t_flags & SVC_RUNNING); -} - -static inline void thread_clear_flags(struct ptlrpc_thread *thread, __u32 flags) -{ - thread->t_flags &= ~flags; -} - -static inline void thread_set_flags(struct ptlrpc_thread *thread, __u32 flags) -{ - thread->t_flags = flags; -} - -static inline void thread_add_flags(struct ptlrpc_thread *thread, __u32 flags) -{ - thread->t_flags |= flags; -} - -static inline int thread_test_and_clear_flags(struct ptlrpc_thread *thread, - __u32 flags) -{ - if (thread->t_flags & flags) { - thread->t_flags &= ~flags; - return 1; - } - return 0; -} - -/** - * Request buffer descriptor structure. - * This is a structure that contains one posted request buffer for service. - * Once data land into a buffer, event callback creates actual request and - * notifies wakes one of the service threads to process new incoming request. - * More than one request can fit into the buffer. - */ -struct ptlrpc_request_buffer_desc { - /** Link item for rqbds on a service */ - struct list_head rqbd_list; - /** History of requests for this buffer */ - struct list_head rqbd_reqs; - /** Back pointer to service for which this buffer is registered */ - struct ptlrpc_service_part *rqbd_svcpt; - /** LNet descriptor */ - struct lnet_handle_md rqbd_md_h; - int rqbd_refcount; - /** The buffer itself */ - char *rqbd_buffer; - struct ptlrpc_cb_id rqbd_cbid; - /** - * This "embedded" request structure is only used for the - * last request to fit into the buffer - */ - struct ptlrpc_request rqbd_req; -}; - -typedef int (*svc_handler_t)(struct ptlrpc_request *req); - -struct ptlrpc_service_ops { - /** - * if non-NULL called during thread creation (ptlrpc_start_thread()) - * to initialize service specific per-thread state. - */ - int (*so_thr_init)(struct ptlrpc_thread *thr); - /** - * if non-NULL called during thread shutdown (ptlrpc_main()) to - * destruct state created by ->srv_init(). - */ - void (*so_thr_done)(struct ptlrpc_thread *thr); - /** - * Handler function for incoming requests for this service - */ - int (*so_req_handler)(struct ptlrpc_request *req); - /** - * function to determine priority of the request, it's called - * on every new request - */ - int (*so_hpreq_handler)(struct ptlrpc_request *); - /** - * service-specific print fn - */ - void (*so_req_printer)(void *, struct ptlrpc_request *); -}; - -#ifndef __cfs_cacheline_aligned -/* NB: put it here for reducing patche dependence */ -# define __cfs_cacheline_aligned -#endif - -/** - * How many high priority requests to serve before serving one normal - * priority request - */ -#define PTLRPC_SVC_HP_RATIO 10 - -/** - * Definition of PortalRPC service. - * The service is listening on a particular portal (like tcp port) - * and perform actions for a specific server like IO service for OST - * or general metadata service for MDS. - */ -struct ptlrpc_service { - /** serialize sysfs operations */ - spinlock_t srv_lock; - /** most often accessed fields */ - /** chain thru all services */ - struct list_head srv_list; - /** service operations table */ - struct ptlrpc_service_ops srv_ops; - /** only statically allocated strings here; we don't clean them */ - char *srv_name; - /** only statically allocated strings here; we don't clean them */ - char *srv_thread_name; - /** service thread list */ - struct list_head srv_threads; - /** threads # should be created for each partition on initializing */ - int srv_nthrs_cpt_init; - /** limit of threads number for each partition */ - int srv_nthrs_cpt_limit; - /** Root of debugfs dir tree for this service */ - struct dentry *srv_debugfs_entry; - /** Pointer to statistic data for this service */ - struct lprocfs_stats *srv_stats; - /** # hp per lp reqs to handle */ - int srv_hpreq_ratio; - /** biggest request to receive */ - int srv_max_req_size; - /** biggest reply to send */ - int srv_max_reply_size; - /** size of individual buffers */ - int srv_buf_size; - /** # buffers to allocate in 1 group */ - int srv_nbuf_per_group; - /** Local portal on which to receive requests */ - __u32 srv_req_portal; - /** Portal on the client to send replies to */ - __u32 srv_rep_portal; - /** - * Tags for lu_context associated with this thread, see struct - * lu_context. - */ - __u32 srv_ctx_tags; - /** soft watchdog timeout multiplier */ - int srv_watchdog_factor; - /** under unregister_service */ - unsigned srv_is_stopping:1; - - /** max # request buffers in history per partition */ - int srv_hist_nrqbds_cpt_max; - /** number of CPTs this service bound on */ - int srv_ncpts; - /** CPTs array this service bound on */ - __u32 *srv_cpts; - /** 2^srv_cptab_bits >= cfs_cpt_numbert(srv_cptable) */ - int srv_cpt_bits; - /** CPT table this service is running over */ - struct cfs_cpt_table *srv_cptable; - - /* sysfs object */ - struct kobject srv_kobj; - struct completion srv_kobj_unregister; - /** - * partition data for ptlrpc service - */ - struct ptlrpc_service_part *srv_parts[0]; -}; - -/** - * Definition of PortalRPC service partition data. - * Although a service only has one instance of it right now, but we - * will have multiple instances very soon (instance per CPT). - * - * it has four locks: - * \a scp_lock - * serialize operations on rqbd and requests waiting for preprocess - * \a scp_req_lock - * serialize operations active requests sent to this portal - * \a scp_at_lock - * serialize adaptive timeout stuff - * \a scp_rep_lock - * serialize operations on RS list (reply states) - * - * We don't have any use-case to take two or more locks at the same time - * for now, so there is no lock order issue. - */ -struct ptlrpc_service_part { - /** back reference to owner */ - struct ptlrpc_service *scp_service __cfs_cacheline_aligned; - /* CPT id, reserved */ - int scp_cpt; - /** always increasing number */ - int scp_thr_nextid; - /** # of starting threads */ - int scp_nthrs_starting; - /** # of stopping threads, reserved for shrinking threads */ - int scp_nthrs_stopping; - /** # running threads */ - int scp_nthrs_running; - /** service threads list */ - struct list_head scp_threads; - - /** - * serialize the following fields, used for protecting - * rqbd list and incoming requests waiting for preprocess, - * threads starting & stopping are also protected by this lock. - */ - spinlock_t scp_lock __cfs_cacheline_aligned; - /** total # req buffer descs allocated */ - int scp_nrqbds_total; - /** # posted request buffers for receiving */ - int scp_nrqbds_posted; - /** in progress of allocating rqbd */ - int scp_rqbd_allocating; - /** # incoming reqs */ - int scp_nreqs_incoming; - /** request buffers to be reposted */ - struct list_head scp_rqbd_idle; - /** req buffers receiving */ - struct list_head scp_rqbd_posted; - /** incoming reqs */ - struct list_head scp_req_incoming; - /** timeout before re-posting reqs, in tick */ - long scp_rqbd_timeout; - /** - * all threads sleep on this. This wait-queue is signalled when new - * incoming request arrives and when difficult reply has to be handled. - */ - wait_queue_head_t scp_waitq; - - /** request history */ - struct list_head scp_hist_reqs; - /** request buffer history */ - struct list_head scp_hist_rqbds; - /** # request buffers in history */ - int scp_hist_nrqbds; - /** sequence number for request */ - __u64 scp_hist_seq; - /** highest seq culled from history */ - __u64 scp_hist_seq_culled; - - /** - * serialize the following fields, used for processing requests - * sent to this portal - */ - spinlock_t scp_req_lock __cfs_cacheline_aligned; - /** # reqs in either of the NRS heads below */ - /** # reqs being served */ - int scp_nreqs_active; - /** # HPreqs being served */ - int scp_nhreqs_active; - /** # hp requests handled */ - int scp_hreq_count; - - /** NRS head for regular requests */ - struct ptlrpc_nrs scp_nrs_reg; - /** NRS head for HP requests; this is only valid for services that can - * handle HP requests - */ - struct ptlrpc_nrs *scp_nrs_hp; - - /** AT stuff */ - /** @{ */ - /** - * serialize the following fields, used for changes on - * adaptive timeout - */ - spinlock_t scp_at_lock __cfs_cacheline_aligned; - /** estimated rpc service time */ - struct adaptive_timeout scp_at_estimate; - /** reqs waiting for replies */ - struct ptlrpc_at_array scp_at_array; - /** early reply timer */ - struct timer_list scp_at_timer; - /** debug */ - unsigned long scp_at_checktime; - /** check early replies */ - unsigned scp_at_check; - /** @} */ - - /** - * serialize the following fields, used for processing - * replies for this portal - */ - spinlock_t scp_rep_lock __cfs_cacheline_aligned; - /** all the active replies */ - struct list_head scp_rep_active; - /** List of free reply_states */ - struct list_head scp_rep_idle; - /** waitq to run, when adding stuff to srv_free_rs_list */ - wait_queue_head_t scp_rep_waitq; - /** # 'difficult' replies */ - atomic_t scp_nreps_difficult; -}; - -#define ptlrpc_service_for_each_part(part, i, svc) \ - for (i = 0; \ - i < (svc)->srv_ncpts && \ - (svc)->srv_parts && \ - ((part) = (svc)->srv_parts[i]); i++) - -/** - * Declaration of ptlrpcd control structure - */ -struct ptlrpcd_ctl { - /** - * Ptlrpc thread control flags (LIOD_START, LIOD_STOP, LIOD_FORCE) - */ - unsigned long pc_flags; - /** - * Thread lock protecting structure fields. - */ - spinlock_t pc_lock; - /** - * Start completion. - */ - struct completion pc_starting; - /** - * Stop completion. - */ - struct completion pc_finishing; - /** - * Thread requests set. - */ - struct ptlrpc_request_set *pc_set; - /** - * Thread name used in kthread_run() - */ - char pc_name[16]; - /** - * CPT the thread is bound on. - */ - int pc_cpt; - /** - * Index of ptlrpcd thread in the array. - */ - int pc_index; - /** - * Pointer to the array of partners' ptlrpcd_ctl structure. - */ - struct ptlrpcd_ctl **pc_partners; - /** - * Number of the ptlrpcd's partners. - */ - int pc_npartners; - /** - * Record the partner index to be processed next. - */ - int pc_cursor; - /** - * Error code if the thread failed to fully start. - */ - int pc_error; -}; - -/* Bits for pc_flags */ -enum ptlrpcd_ctl_flags { - /** - * Ptlrpc thread start flag. - */ - LIOD_START = 1 << 0, - /** - * Ptlrpc thread stop flag. - */ - LIOD_STOP = 1 << 1, - /** - * Ptlrpc thread force flag (only stop force so far). - * This will cause aborting any inflight rpcs handled - * by thread if LIOD_STOP is specified. - */ - LIOD_FORCE = 1 << 2, - /** - * This is a recovery ptlrpc thread. - */ - LIOD_RECOVERY = 1 << 3, -}; - -/** - * \addtogroup nrs - * @{ - * - * Service compatibility function; the policy is compatible with all services. - * - * \param[in] svc The service the policy is attempting to register with. - * \param[in] desc The policy descriptor - * - * \retval true The policy is compatible with the service - * - * \see ptlrpc_nrs_pol_desc::pd_compat() - */ -static inline bool nrs_policy_compat_all(const struct ptlrpc_service *svc, - const struct ptlrpc_nrs_pol_desc *desc) -{ - return true; -} - -/** - * Service compatibility function; the policy is compatible with only a specific - * service which is identified by its human-readable name at - * ptlrpc_service::srv_name. - * - * \param[in] svc The service the policy is attempting to register with. - * \param[in] desc The policy descriptor - * - * \retval false The policy is not compatible with the service - * \retval true The policy is compatible with the service - * - * \see ptlrpc_nrs_pol_desc::pd_compat() - */ -static inline bool nrs_policy_compat_one(const struct ptlrpc_service *svc, - const struct ptlrpc_nrs_pol_desc *desc) -{ - return strcmp(svc->srv_name, desc->pd_compat_svc_name) == 0; -} - -/** @} nrs */ - -/* ptlrpc/events.c */ -extern struct lnet_handle_eq ptlrpc_eq_h; -int ptlrpc_uuid_to_peer(struct obd_uuid *uuid, - struct lnet_process_id *peer, lnet_nid_t *self); -/** - * These callbacks are invoked by LNet when something happened to - * underlying buffer - * @{ - */ -void request_out_callback(struct lnet_event *ev); -void reply_in_callback(struct lnet_event *ev); -void client_bulk_callback(struct lnet_event *ev); -void request_in_callback(struct lnet_event *ev); -void reply_out_callback(struct lnet_event *ev); -/** @} */ - -/* ptlrpc/connection.c */ -struct ptlrpc_connection *ptlrpc_connection_get(struct lnet_process_id peer, - lnet_nid_t self, - struct obd_uuid *uuid); -int ptlrpc_connection_put(struct ptlrpc_connection *c); -struct ptlrpc_connection *ptlrpc_connection_addref(struct ptlrpc_connection *); -int ptlrpc_connection_init(void); -void ptlrpc_connection_fini(void); - -/* ptlrpc/niobuf.c */ -/** - * Actual interfacing with LNet to put/get/register/unregister stuff - * @{ - */ - -int ptlrpc_unregister_bulk(struct ptlrpc_request *req, int async); - -static inline int ptlrpc_client_bulk_active(struct ptlrpc_request *req) -{ - struct ptlrpc_bulk_desc *desc; - int rc; - - desc = req->rq_bulk; - - if (req->rq_bulk_deadline > ktime_get_real_seconds()) - return 1; - - if (!desc) - return 0; - - spin_lock(&desc->bd_lock); - rc = desc->bd_md_count; - spin_unlock(&desc->bd_lock); - return rc; -} - -#define PTLRPC_REPLY_MAYBE_DIFFICULT 0x01 -#define PTLRPC_REPLY_EARLY 0x02 -int ptlrpc_send_reply(struct ptlrpc_request *req, int flags); -int ptlrpc_reply(struct ptlrpc_request *req); -int ptlrpc_send_error(struct ptlrpc_request *req, int difficult); -int ptlrpc_error(struct ptlrpc_request *req); -int ptlrpc_at_get_net_latency(struct ptlrpc_request *req); -int ptl_send_rpc(struct ptlrpc_request *request, int noreply); -int ptlrpc_register_rqbd(struct ptlrpc_request_buffer_desc *rqbd); -/** @} */ - -/* ptlrpc/client.c */ -/** - * Client-side portals API. Everything to send requests, receive replies, - * request queues, request management, etc. - * @{ - */ -void ptlrpc_request_committed(struct ptlrpc_request *req, int force); - -int ptlrpc_inc_ref(void); -void ptlrpc_dec_ref(void); - -void ptlrpc_init_client(int req_portal, int rep_portal, char *name, - struct ptlrpc_client *); -struct ptlrpc_connection *ptlrpc_uuid_to_connection(struct obd_uuid *uuid); - -int ptlrpc_queue_wait(struct ptlrpc_request *req); -int ptlrpc_replay_req(struct ptlrpc_request *req); -void ptlrpc_abort_inflight(struct obd_import *imp); -void ptlrpc_abort_set(struct ptlrpc_request_set *set); - -struct ptlrpc_request_set *ptlrpc_prep_set(void); -struct ptlrpc_request_set *ptlrpc_prep_fcset(int max, set_producer_func func, - void *arg); -int ptlrpc_check_set(const struct lu_env *env, struct ptlrpc_request_set *set); -int ptlrpc_set_wait(struct ptlrpc_request_set *); -void ptlrpc_mark_interrupted(struct ptlrpc_request *req); -void ptlrpc_set_destroy(struct ptlrpc_request_set *); -void ptlrpc_set_add_req(struct ptlrpc_request_set *, struct ptlrpc_request *); - -void ptlrpc_free_rq_pool(struct ptlrpc_request_pool *pool); -int ptlrpc_add_rqs_to_pool(struct ptlrpc_request_pool *pool, int num_rq); - -struct ptlrpc_request_pool * -ptlrpc_init_rq_pool(int, int, - int (*populate_pool)(struct ptlrpc_request_pool *, int)); - -void ptlrpc_at_set_req_timeout(struct ptlrpc_request *req); -struct ptlrpc_request *ptlrpc_request_alloc(struct obd_import *imp, - const struct req_format *format); -struct ptlrpc_request *ptlrpc_request_alloc_pool(struct obd_import *imp, - struct ptlrpc_request_pool *, - const struct req_format *); -void ptlrpc_request_free(struct ptlrpc_request *request); -int ptlrpc_request_pack(struct ptlrpc_request *request, - __u32 version, int opcode); -struct ptlrpc_request *ptlrpc_request_alloc_pack(struct obd_import *, - const struct req_format *, - __u32, int); -int ptlrpc_request_bufs_pack(struct ptlrpc_request *request, - __u32 version, int opcode, char **bufs, - struct ptlrpc_cli_ctx *ctx); -void ptlrpc_req_finished(struct ptlrpc_request *request); -struct ptlrpc_request *ptlrpc_request_addref(struct ptlrpc_request *req); -struct ptlrpc_bulk_desc *ptlrpc_prep_bulk_imp(struct ptlrpc_request *req, - unsigned int nfrags, - unsigned int max_brw, - unsigned int type, - unsigned int portal, - const struct ptlrpc_bulk_frag_ops *ops); - -int ptlrpc_prep_bulk_frag(struct ptlrpc_bulk_desc *desc, - void *frag, int len); -void __ptlrpc_prep_bulk_page(struct ptlrpc_bulk_desc *desc, - struct page *page, int pageoffset, int len, - int pin); -static inline void ptlrpc_prep_bulk_page_pin(struct ptlrpc_bulk_desc *desc, - struct page *page, int pageoffset, - int len) -{ - __ptlrpc_prep_bulk_page(desc, page, pageoffset, len, 1); -} - -static inline void ptlrpc_prep_bulk_page_nopin(struct ptlrpc_bulk_desc *desc, - struct page *page, int pageoffset, - int len) -{ - __ptlrpc_prep_bulk_page(desc, page, pageoffset, len, 0); -} - -void ptlrpc_free_bulk(struct ptlrpc_bulk_desc *bulk); - -static inline void ptlrpc_release_bulk_page_pin(struct ptlrpc_bulk_desc *desc) -{ - int i; - - for (i = 0; i < desc->bd_iov_count ; i++) - put_page(BD_GET_KIOV(desc, i).bv_page); -} - -void ptlrpc_retain_replayable_request(struct ptlrpc_request *req, - struct obd_import *imp); -__u64 ptlrpc_next_xid(void); -__u64 ptlrpc_sample_next_xid(void); -__u64 ptlrpc_req_xid(struct ptlrpc_request *request); - -/* Set of routines to run a function in ptlrpcd context */ -void *ptlrpcd_alloc_work(struct obd_import *imp, - int (*cb)(const struct lu_env *, void *), void *data); -void ptlrpcd_destroy_work(void *handler); -int ptlrpcd_queue_work(void *handler); - -/** @} */ -struct ptlrpc_service_buf_conf { - /* nbufs is buffers # to allocate when growing the pool */ - unsigned int bc_nbufs; - /* buffer size to post */ - unsigned int bc_buf_size; - /* portal to listed for requests on */ - unsigned int bc_req_portal; - /* portal of where to send replies to */ - unsigned int bc_rep_portal; - /* maximum request size to be accepted for this service */ - unsigned int bc_req_max_size; - /* maximum reply size this service can ever send */ - unsigned int bc_rep_max_size; -}; - -struct ptlrpc_service_thr_conf { - /* threadname should be 8 characters or less - 6 will be added on */ - char *tc_thr_name; - /* threads increasing factor for each CPU */ - unsigned int tc_thr_factor; - /* service threads # to start on each partition while initializing */ - unsigned int tc_nthrs_init; - /* - * low water of threads # upper-limit on each partition while running, - * service availability may be impacted if threads number is lower - * than this value. It can be ZERO if the service doesn't require - * CPU affinity or there is only one partition. - */ - unsigned int tc_nthrs_base; - /* "soft" limit for total threads number */ - unsigned int tc_nthrs_max; - /* user specified threads number, it will be validated due to - * other members of this structure. - */ - unsigned int tc_nthrs_user; - /* set NUMA node affinity for service threads */ - unsigned int tc_cpu_affinity; - /* Tags for lu_context associated with service thread */ - __u32 tc_ctx_tags; -}; - -struct ptlrpc_service_cpt_conf { - struct cfs_cpt_table *cc_cptable; - /* string pattern to describe CPTs for a service */ - char *cc_pattern; -}; - -struct ptlrpc_service_conf { - /* service name */ - char *psc_name; - /* soft watchdog timeout multiplifier to print stuck service traces */ - unsigned int psc_watchdog_factor; - /* buffer information */ - struct ptlrpc_service_buf_conf psc_buf; - /* thread information */ - struct ptlrpc_service_thr_conf psc_thr; - /* CPU partition information */ - struct ptlrpc_service_cpt_conf psc_cpt; - /* function table */ - struct ptlrpc_service_ops psc_ops; -}; - -/* ptlrpc/service.c */ -/** - * Server-side services API. Register/unregister service, request state - * management, service thread management - * - * @{ - */ -void ptlrpc_dispatch_difficult_reply(struct ptlrpc_reply_state *rs); -void ptlrpc_schedule_difficult_reply(struct ptlrpc_reply_state *rs); -struct ptlrpc_service *ptlrpc_register_service(struct ptlrpc_service_conf *conf, - struct kset *parent, - struct dentry *debugfs_entry); - -int ptlrpc_start_threads(struct ptlrpc_service *svc); -int ptlrpc_unregister_service(struct ptlrpc_service *service); - -int ptlrpc_hr_init(void); -void ptlrpc_hr_fini(void); - -/** @} */ - -/* ptlrpc/import.c */ -/** - * Import API - * @{ - */ -int ptlrpc_connect_import(struct obd_import *imp); -int ptlrpc_init_import(struct obd_import *imp); -int ptlrpc_disconnect_import(struct obd_import *imp, int noclose); -int ptlrpc_import_recovery_state_machine(struct obd_import *imp); - -/* ptlrpc/pack_generic.c */ -int ptlrpc_reconnect_import(struct obd_import *imp); -/** @} */ - -/** - * ptlrpc msg buffer and swab interface - * - * @{ - */ -int ptlrpc_buf_need_swab(struct ptlrpc_request *req, const int inout, - u32 index); -void ptlrpc_buf_set_swabbed(struct ptlrpc_request *req, const int inout, - u32 index); -int ptlrpc_unpack_rep_msg(struct ptlrpc_request *req, int len); -int ptlrpc_unpack_req_msg(struct ptlrpc_request *req, int len); - -void lustre_init_msg_v2(struct lustre_msg_v2 *msg, int count, __u32 *lens, - char **bufs); -int lustre_pack_request(struct ptlrpc_request *, __u32 magic, int count, - __u32 *lens, char **bufs); -int lustre_pack_reply(struct ptlrpc_request *, int count, __u32 *lens, - char **bufs); -int lustre_pack_reply_v2(struct ptlrpc_request *req, int count, - __u32 *lens, char **bufs, int flags); -#define LPRFL_EARLY_REPLY 1 -int lustre_pack_reply_flags(struct ptlrpc_request *, int count, __u32 *lens, - char **bufs, int flags); -int lustre_shrink_msg(struct lustre_msg *msg, int segment, - unsigned int newlen, int move_data); -void lustre_free_reply_state(struct ptlrpc_reply_state *rs); -int __lustre_unpack_msg(struct lustre_msg *m, int len); -u32 lustre_msg_hdr_size(__u32 magic, u32 count); -u32 lustre_msg_size(__u32 magic, int count, __u32 *lengths); -u32 lustre_msg_size_v2(int count, __u32 *lengths); -u32 lustre_packed_msg_size(struct lustre_msg *msg); -u32 lustre_msg_early_size(void); -void *lustre_msg_buf_v2(struct lustre_msg_v2 *m, u32 n, u32 min_size); -void *lustre_msg_buf(struct lustre_msg *m, u32 n, u32 minlen); -u32 lustre_msg_buflen(struct lustre_msg *m, u32 n); -u32 lustre_msg_bufcount(struct lustre_msg *m); -char *lustre_msg_string(struct lustre_msg *m, u32 n, u32 max_len); -__u32 lustre_msghdr_get_flags(struct lustre_msg *msg); -void lustre_msghdr_set_flags(struct lustre_msg *msg, __u32 flags); -__u32 lustre_msg_get_flags(struct lustre_msg *msg); -void lustre_msg_add_flags(struct lustre_msg *msg, u32 flags); -void lustre_msg_set_flags(struct lustre_msg *msg, u32 flags); -void lustre_msg_clear_flags(struct lustre_msg *msg, u32 flags); -__u32 lustre_msg_get_op_flags(struct lustre_msg *msg); -void lustre_msg_add_op_flags(struct lustre_msg *msg, u32 flags); -struct lustre_handle *lustre_msg_get_handle(struct lustre_msg *msg); -__u32 lustre_msg_get_type(struct lustre_msg *msg); -void lustre_msg_add_version(struct lustre_msg *msg, u32 version); -__u32 lustre_msg_get_opc(struct lustre_msg *msg); -__u16 lustre_msg_get_tag(struct lustre_msg *msg); -__u64 lustre_msg_get_last_committed(struct lustre_msg *msg); -__u64 *lustre_msg_get_versions(struct lustre_msg *msg); -__u64 lustre_msg_get_transno(struct lustre_msg *msg); -__u64 lustre_msg_get_slv(struct lustre_msg *msg); -__u32 lustre_msg_get_limit(struct lustre_msg *msg); -void lustre_msg_set_slv(struct lustre_msg *msg, __u64 slv); -void lustre_msg_set_limit(struct lustre_msg *msg, __u64 limit); -int lustre_msg_get_status(struct lustre_msg *msg); -__u32 lustre_msg_get_conn_cnt(struct lustre_msg *msg); -__u32 lustre_msg_get_magic(struct lustre_msg *msg); -__u32 lustre_msg_get_timeout(struct lustre_msg *msg); -__u32 lustre_msg_get_service_time(struct lustre_msg *msg); -__u32 lustre_msg_get_cksum(struct lustre_msg *msg); -__u32 lustre_msg_calc_cksum(struct lustre_msg *msg); -void lustre_msg_set_handle(struct lustre_msg *msg, - struct lustre_handle *handle); -void lustre_msg_set_type(struct lustre_msg *msg, __u32 type); -void lustre_msg_set_opc(struct lustre_msg *msg, __u32 opc); -void lustre_msg_set_last_xid(struct lustre_msg *msg, u64 last_xid); -void lustre_msg_set_tag(struct lustre_msg *msg, __u16 tag); -void lustre_msg_set_versions(struct lustre_msg *msg, __u64 *versions); -void lustre_msg_set_transno(struct lustre_msg *msg, __u64 transno); -void lustre_msg_set_status(struct lustre_msg *msg, __u32 status); -void lustre_msg_set_conn_cnt(struct lustre_msg *msg, __u32 conn_cnt); -void ptlrpc_request_set_replen(struct ptlrpc_request *req); -void lustre_msg_set_timeout(struct lustre_msg *msg, __u32 timeout); -void lustre_msg_set_service_time(struct lustre_msg *msg, __u32 service_time); -void lustre_msg_set_jobid(struct lustre_msg *msg, char *jobid); -void lustre_msg_set_cksum(struct lustre_msg *msg, __u32 cksum); -void lustre_msg_set_mbits(struct lustre_msg *msg, u64 mbits); - -static inline void -lustre_shrink_reply(struct ptlrpc_request *req, int segment, - unsigned int newlen, int move_data) -{ - LASSERT(req->rq_reply_state); - LASSERT(req->rq_repmsg); - req->rq_replen = lustre_shrink_msg(req->rq_repmsg, segment, - newlen, move_data); -} - -#ifdef CONFIG_LUSTRE_TRANSLATE_ERRNOS - -static inline int ptlrpc_status_hton(int h) -{ - /* - * Positive errnos must be network errnos, such as LUSTRE_EDEADLK, - * ELDLM_LOCK_ABORTED, etc. - */ - if (h < 0) - return -lustre_errno_hton(-h); - else - return h; -} - -static inline int ptlrpc_status_ntoh(int n) -{ - /* - * See the comment in ptlrpc_status_hton(). - */ - if (n < 0) - return -lustre_errno_ntoh(-n); - else - return n; -} - -#else - -#define ptlrpc_status_hton(h) (h) -#define ptlrpc_status_ntoh(n) (n) - -#endif -/** @} */ - -/** Change request phase of \a req to \a new_phase */ -static inline void -ptlrpc_rqphase_move(struct ptlrpc_request *req, enum rq_phase new_phase) -{ - if (req->rq_phase == new_phase) - return; - - if (new_phase == RQ_PHASE_UNREG_RPC || - new_phase == RQ_PHASE_UNREG_BULK) { - /* No embedded unregistering phases */ - if (req->rq_phase == RQ_PHASE_UNREG_RPC || - req->rq_phase == RQ_PHASE_UNREG_BULK) - return; - - req->rq_next_phase = req->rq_phase; - if (req->rq_import) - atomic_inc(&req->rq_import->imp_unregistering); - } - - if (req->rq_phase == RQ_PHASE_UNREG_RPC || - req->rq_phase == RQ_PHASE_UNREG_BULK) { - if (req->rq_import) - atomic_dec(&req->rq_import->imp_unregistering); - } - - DEBUG_REQ(D_INFO, req, "move req \"%s\" -> \"%s\"", - ptlrpc_rqphase2str(req), ptlrpc_phase2str(new_phase)); - - req->rq_phase = new_phase; -} - -/** - * Returns true if request \a req got early reply and hard deadline is not met - */ -static inline int -ptlrpc_client_early(struct ptlrpc_request *req) -{ - return req->rq_early; -} - -/** - * Returns true if we got real reply from server for this request - */ -static inline int -ptlrpc_client_replied(struct ptlrpc_request *req) -{ - if (req->rq_reply_deadline > ktime_get_real_seconds()) - return 0; - return req->rq_replied; -} - -/** Returns true if request \a req is in process of receiving server reply */ -static inline int -ptlrpc_client_recv(struct ptlrpc_request *req) -{ - if (req->rq_reply_deadline > ktime_get_real_seconds()) - return 1; - return req->rq_receiving_reply; -} - -static inline int -ptlrpc_client_recv_or_unlink(struct ptlrpc_request *req) -{ - int rc; - - spin_lock(&req->rq_lock); - if (req->rq_reply_deadline > ktime_get_real_seconds()) { - spin_unlock(&req->rq_lock); - return 1; - } - if (req->rq_req_deadline > ktime_get_real_seconds()) { - spin_unlock(&req->rq_lock); - return 1; - } - rc = !req->rq_req_unlinked || !req->rq_reply_unlinked || - req->rq_receiving_reply; - spin_unlock(&req->rq_lock); - return rc; -} - -static inline void -ptlrpc_client_wake_req(struct ptlrpc_request *req) -{ - if (!req->rq_set) - wake_up(&req->rq_reply_waitq); - else - wake_up(&req->rq_set->set_waitq); -} - -static inline void -ptlrpc_rs_addref(struct ptlrpc_reply_state *rs) -{ - LASSERT(atomic_read(&rs->rs_refcount) > 0); - atomic_inc(&rs->rs_refcount); -} - -static inline void -ptlrpc_rs_decref(struct ptlrpc_reply_state *rs) -{ - LASSERT(atomic_read(&rs->rs_refcount) > 0); - if (atomic_dec_and_test(&rs->rs_refcount)) - lustre_free_reply_state(rs); -} - -/* Should only be called once per req */ -static inline void ptlrpc_req_drop_rs(struct ptlrpc_request *req) -{ - if (!req->rq_reply_state) - return; /* shouldn't occur */ - ptlrpc_rs_decref(req->rq_reply_state); - req->rq_reply_state = NULL; - req->rq_repmsg = NULL; -} - -static inline __u32 lustre_request_magic(struct ptlrpc_request *req) -{ - return lustre_msg_get_magic(req->rq_reqmsg); -} - -static inline int ptlrpc_req_get_repsize(struct ptlrpc_request *req) -{ - switch (req->rq_reqmsg->lm_magic) { - case LUSTRE_MSG_MAGIC_V2: - return req->rq_reqmsg->lm_repsize; - default: - LASSERTF(0, "incorrect message magic: %08x\n", - req->rq_reqmsg->lm_magic); - return -EFAULT; - } -} - -static inline int ptlrpc_send_limit_expired(struct ptlrpc_request *req) -{ - if (req->rq_delay_limit != 0 && - time_before(cfs_time_add(req->rq_queued_time, - req->rq_delay_limit * HZ), - cfs_time_current())) { - return 1; - } - return 0; -} - -static inline int ptlrpc_no_resend(struct ptlrpc_request *req) -{ - if (!req->rq_no_resend && ptlrpc_send_limit_expired(req)) { - spin_lock(&req->rq_lock); - req->rq_no_resend = 1; - spin_unlock(&req->rq_lock); - } - return req->rq_no_resend; -} - -static inline int -ptlrpc_server_get_timeout(struct ptlrpc_service_part *svcpt) -{ - int at = AT_OFF ? 0 : at_get(&svcpt->scp_at_estimate); - - return svcpt->scp_service->srv_watchdog_factor * - max_t(int, at, obd_timeout); -} - -static inline struct ptlrpc_service * -ptlrpc_req2svc(struct ptlrpc_request *req) -{ - return req->rq_rqbd->rqbd_svcpt->scp_service; -} - -/* ldlm/ldlm_lib.c */ -/** - * Target client logic - * @{ - */ -int client_obd_setup(struct obd_device *obddev, struct lustre_cfg *lcfg); -int client_obd_cleanup(struct obd_device *obddev); -int client_connect_import(const struct lu_env *env, - struct obd_export **exp, struct obd_device *obd, - struct obd_uuid *cluuid, struct obd_connect_data *, - void *localdata); -int client_disconnect_export(struct obd_export *exp); -int client_import_add_conn(struct obd_import *imp, struct obd_uuid *uuid, - int priority); -int client_import_del_conn(struct obd_import *imp, struct obd_uuid *uuid); -int client_import_find_conn(struct obd_import *imp, lnet_nid_t peer, - struct obd_uuid *uuid); -int import_set_conn_priority(struct obd_import *imp, struct obd_uuid *uuid); -void client_destroy_import(struct obd_import *imp); -/** @} */ - -/* ptlrpc/pinger.c */ -/** - * Pinger API (client side only) - * @{ - */ -enum timeout_event { - TIMEOUT_GRANT = 1 -}; - -struct timeout_item; -typedef int (*timeout_cb_t)(struct timeout_item *, void *); -int ptlrpc_pinger_add_import(struct obd_import *imp); -int ptlrpc_pinger_del_import(struct obd_import *imp); -int ptlrpc_add_timeout_client(int time, enum timeout_event event, - timeout_cb_t cb, void *data, - struct list_head *obd_list); -int ptlrpc_del_timeout_client(struct list_head *obd_list, - enum timeout_event event); -struct ptlrpc_request *ptlrpc_prep_ping(struct obd_import *imp); -int ptlrpc_obd_ping(struct obd_device *obd); -void ptlrpc_pinger_ir_up(void); -void ptlrpc_pinger_ir_down(void); -/** @} */ -int ptlrpc_pinger_suppress_pings(void); - -/* ptlrpc/ptlrpcd.c */ -void ptlrpcd_stop(struct ptlrpcd_ctl *pc, int force); -void ptlrpcd_free(struct ptlrpcd_ctl *pc); -void ptlrpcd_wake(struct ptlrpc_request *req); -void ptlrpcd_add_req(struct ptlrpc_request *req); -int ptlrpcd_addref(void); -void ptlrpcd_decref(void); - -/* ptlrpc/lproc_ptlrpc.c */ -/** - * procfs output related functions - * @{ - */ -const char *ll_opcode2str(__u32 opcode); -void ptlrpc_lprocfs_register_obd(struct obd_device *obd); -void ptlrpc_lprocfs_unregister_obd(struct obd_device *obd); -void ptlrpc_lprocfs_brw(struct ptlrpc_request *req, int bytes); -/** @} */ - -/* ptlrpc/llog_client.c */ -extern struct llog_operations llog_client_ops; -/** @} net */ - -#endif -/** @} PtlRPC */ diff --git a/drivers/staging/lustre/lustre/include/lustre_nrs.h b/drivers/staging/lustre/lustre/include/lustre_nrs.h deleted file mode 100644 index ffa7317da35b..000000000000 --- a/drivers/staging/lustre/lustre/include/lustre_nrs.h +++ /dev/null @@ -1,718 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License version 2 for more details. - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2014, Intel Corporation. - * - * Copyright 2012 Xyratex Technology Limited - */ -/* - * - * Network Request Scheduler (NRS) - * - */ - -#ifndef _LUSTRE_NRS_H -#define _LUSTRE_NRS_H - -/** - * \defgroup nrs Network Request Scheduler - * @{ - */ -struct ptlrpc_nrs_policy; -struct ptlrpc_nrs_resource; -struct ptlrpc_nrs_request; - -/** - * NRS control operations. - * - * These are common for all policies. - */ -enum ptlrpc_nrs_ctl { - /** - * Not a valid opcode. - */ - PTLRPC_NRS_CTL_INVALID, - /** - * Activate the policy. - */ - PTLRPC_NRS_CTL_START, - /** - * Reserved for multiple primary policies, which may be a possibility - * in the future. - */ - PTLRPC_NRS_CTL_STOP, - /** - * Policies can start using opcodes from this value and onwards for - * their own purposes; the assigned value itself is arbitrary. - */ - PTLRPC_NRS_CTL_1ST_POL_SPEC = 0x20, -}; - -/** - * NRS policy operations. - * - * These determine the behaviour of a policy, and are called in response to - * NRS core events. - */ -struct ptlrpc_nrs_pol_ops { - /** - * Called during policy registration; this operation is optional. - * - * \param[in,out] policy The policy being initialized - */ - int (*op_policy_init)(struct ptlrpc_nrs_policy *policy); - /** - * Called during policy unregistration; this operation is optional. - * - * \param[in,out] policy The policy being unregistered/finalized - */ - void (*op_policy_fini)(struct ptlrpc_nrs_policy *policy); - /** - * Called when activating a policy via lprocfs; policies allocate and - * initialize their resources here; this operation is optional. - * - * \param[in,out] policy The policy being started - * - * \see nrs_policy_start_locked() - */ - int (*op_policy_start)(struct ptlrpc_nrs_policy *policy); - /** - * Called when deactivating a policy via lprocfs; policies deallocate - * their resources here; this operation is optional - * - * \param[in,out] policy The policy being stopped - * - * \see nrs_policy_stop0() - */ - void (*op_policy_stop)(struct ptlrpc_nrs_policy *policy); - /** - * Used for policy-specific operations; i.e. not generic ones like - * \e PTLRPC_NRS_CTL_START and \e PTLRPC_NRS_CTL_GET_INFO; analogous - * to an ioctl; this operation is optional. - * - * \param[in,out] policy The policy carrying out operation \a opc - * \param[in] opc The command operation being carried out - * \param[in,out] arg An generic buffer for communication between the - * user and the control operation - * - * \retval -ve error - * \retval 0 success - * - * \see ptlrpc_nrs_policy_control() - */ - int (*op_policy_ctl)(struct ptlrpc_nrs_policy *policy, - enum ptlrpc_nrs_ctl opc, void *arg); - - /** - * Called when obtaining references to the resources of the resource - * hierarchy for a request that has arrived for handling at the PTLRPC - * service. Policies should return -ve for requests they do not wish - * to handle. This operation is mandatory. - * - * \param[in,out] policy The policy we're getting resources for. - * \param[in,out] nrq The request we are getting resources for. - * \param[in] parent The parent resource of the resource being - * requested; set to NULL if none. - * \param[out] resp The resource is to be returned here; the - * fallback policy in an NRS head should - * \e always return a non-NULL pointer value. - * \param[in] moving_req When set, signifies that this is an attempt - * to obtain resources for a request being moved - * to the high-priority NRS head by - * ldlm_lock_reorder_req(). - * This implies two things: - * 1. We are under obd_export::exp_rpc_lock and - * so should not sleep. - * 2. We should not perform non-idempotent or can - * skip performing idempotent operations that - * were carried out when resources were first - * taken for the request when it was initialized - * in ptlrpc_nrs_req_initialize(). - * - * \retval 0, +ve The level of the returned resource in the resource - * hierarchy; currently only 0 (for a non-leaf resource) - * and 1 (for a leaf resource) are supported by the - * framework. - * \retval -ve error - * - * \see ptlrpc_nrs_req_initialize() - * \see ptlrpc_nrs_hpreq_add_nolock() - * \see ptlrpc_nrs_req_hp_move() - */ - int (*op_res_get)(struct ptlrpc_nrs_policy *policy, - struct ptlrpc_nrs_request *nrq, - const struct ptlrpc_nrs_resource *parent, - struct ptlrpc_nrs_resource **resp, - bool moving_req); - /** - * Called when releasing references taken for resources in the resource - * hierarchy for the request; this operation is optional. - * - * \param[in,out] policy The policy the resource belongs to - * \param[in] res The resource to be freed - * - * \see ptlrpc_nrs_req_finalize() - * \see ptlrpc_nrs_hpreq_add_nolock() - * \see ptlrpc_nrs_req_hp_move() - */ - void (*op_res_put)(struct ptlrpc_nrs_policy *policy, - const struct ptlrpc_nrs_resource *res); - - /** - * Obtains a request for handling from the policy, and optionally - * removes the request from the policy; this operation is mandatory. - * - * \param[in,out] policy The policy to poll - * \param[in] peek When set, signifies that we just want to - * examine the request, and not handle it, so the - * request is not removed from the policy. - * \param[in] force When set, it will force a policy to return a - * request if it has one queued. - * - * \retval NULL No request available for handling - * \retval valid-pointer The request polled for handling - * - * \see ptlrpc_nrs_req_get_nolock() - */ - struct ptlrpc_nrs_request * - (*op_req_get)(struct ptlrpc_nrs_policy *policy, bool peek, - bool force); - /** - * Called when attempting to add a request to a policy for later - * handling; this operation is mandatory. - * - * \param[in,out] policy The policy on which to enqueue \a nrq - * \param[in,out] nrq The request to enqueue - * - * \retval 0 success - * \retval != 0 error - * - * \see ptlrpc_nrs_req_add_nolock() - */ - int (*op_req_enqueue)(struct ptlrpc_nrs_policy *policy, - struct ptlrpc_nrs_request *nrq); - /** - * Removes a request from the policy's set of pending requests. Normally - * called after a request has been polled successfully from the policy - * for handling; this operation is mandatory. - * - * \param[in,out] policy The policy the request \a nrq belongs to - * \param[in,out] nrq The request to dequeue - * - * \see ptlrpc_nrs_req_del_nolock() - */ - void (*op_req_dequeue)(struct ptlrpc_nrs_policy *policy, - struct ptlrpc_nrs_request *nrq); - /** - * Called after the request being carried out. Could be used for - * job/resource control; this operation is optional. - * - * \param[in,out] policy The policy which is stopping to handle request - * \a nrq - * \param[in,out] nrq The request - * - * \pre assert_spin_locked(&svcpt->scp_req_lock) - * - * \see ptlrpc_nrs_req_stop_nolock() - */ - void (*op_req_stop)(struct ptlrpc_nrs_policy *policy, - struct ptlrpc_nrs_request *nrq); - /** - * Registers the policy's lprocfs interface with a PTLRPC service. - * - * \param[in] svc The service - * - * \retval 0 success - * \retval != 0 error - */ - int (*op_lprocfs_init)(struct ptlrpc_service *svc); - /** - * Unegisters the policy's lprocfs interface with a PTLRPC service. - * - * In cases of failed policy registration in - * \e ptlrpc_nrs_policy_register(), this function may be called for a - * service which has not registered the policy successfully, so - * implementations of this method should make sure their operations are - * safe in such cases. - * - * \param[in] svc The service - */ - void (*op_lprocfs_fini)(struct ptlrpc_service *svc); -}; - -/** - * Policy flags - */ -enum nrs_policy_flags { - /** - * Fallback policy, use this flag only on a single supported policy per - * service. The flag cannot be used on policies that use - * \e PTLRPC_NRS_FL_REG_EXTERN - */ - PTLRPC_NRS_FL_FALLBACK = BIT(0), - /** - * Start policy immediately after registering. - */ - PTLRPC_NRS_FL_REG_START = BIT(1), - /** - * This is a policy registering from a module different to the one NRS - * core ships in (currently ptlrpc). - */ - PTLRPC_NRS_FL_REG_EXTERN = BIT(2), -}; - -/** - * NRS queue type. - * - * Denotes whether an NRS instance is for handling normal or high-priority - * RPCs, or whether an operation pertains to one or both of the NRS instances - * in a service. - */ -enum ptlrpc_nrs_queue_type { - PTLRPC_NRS_QUEUE_REG = BIT(0), - PTLRPC_NRS_QUEUE_HP = BIT(1), - PTLRPC_NRS_QUEUE_BOTH = (PTLRPC_NRS_QUEUE_REG | PTLRPC_NRS_QUEUE_HP) -}; - -/** - * NRS head - * - * A PTLRPC service has at least one NRS head instance for handling normal - * priority RPCs, and may optionally have a second NRS head instance for - * handling high-priority RPCs. Each NRS head maintains a list of available - * policies, of which one and only one policy is acting as the fallback policy, - * and optionally a different policy may be acting as the primary policy. For - * all RPCs handled by this NRS head instance, NRS core will first attempt to - * enqueue the RPC using the primary policy (if any). The fallback policy is - * used in the following cases: - * - when there was no primary policy in the - * ptlrpc_nrs_pol_state::NRS_POL_STATE_STARTED state at the time the request - * was initialized. - * - when the primary policy that was at the - * ptlrpc_nrs_pol_state::PTLRPC_NRS_POL_STATE_STARTED state at the time the - * RPC was initialized, denoted it did not wish, or for some other reason was - * not able to handle the request, by returning a non-valid NRS resource - * reference. - * - when the primary policy that was at the - * ptlrpc_nrs_pol_state::PTLRPC_NRS_POL_STATE_STARTED state at the time the - * RPC was initialized, fails later during the request enqueueing stage. - * - * \see nrs_resource_get_safe() - * \see nrs_request_enqueue() - */ -struct ptlrpc_nrs { - spinlock_t nrs_lock; - /** XXX Possibly replace svcpt->scp_req_lock with another lock here. */ - /** - * List of registered policies - */ - struct list_head nrs_policy_list; - /** - * List of policies with queued requests. Policies that have any - * outstanding requests are queued here, and this list is queried - * in a round-robin manner from NRS core when obtaining a request - * for handling. This ensures that requests from policies that at some - * point transition away from the - * ptlrpc_nrs_pol_state::NRS_POL_STATE_STARTED state are drained. - */ - struct list_head nrs_policy_queued; - /** - * Service partition for this NRS head - */ - struct ptlrpc_service_part *nrs_svcpt; - /** - * Primary policy, which is the preferred policy for handling RPCs - */ - struct ptlrpc_nrs_policy *nrs_policy_primary; - /** - * Fallback policy, which is the backup policy for handling RPCs - */ - struct ptlrpc_nrs_policy *nrs_policy_fallback; - /** - * This NRS head handles either HP or regular requests - */ - enum ptlrpc_nrs_queue_type nrs_queue_type; - /** - * # queued requests from all policies in this NRS head - */ - unsigned long nrs_req_queued; - /** - * # scheduled requests from all policies in this NRS head - */ - unsigned long nrs_req_started; - /** - * # policies on this NRS - */ - unsigned int nrs_num_pols; - /** - * This NRS head is in progress of starting a policy - */ - unsigned int nrs_policy_starting:1; - /** - * In progress of shutting down the whole NRS head; used during - * unregistration - */ - unsigned int nrs_stopping:1; - /** - * NRS policy is throttling request - */ - unsigned int nrs_throttling:1; -}; - -#define NRS_POL_NAME_MAX 16 -#define NRS_POL_ARG_MAX 16 - -struct ptlrpc_nrs_pol_desc; - -/** - * Service compatibility predicate; this determines whether a policy is adequate - * for handling RPCs of a particular PTLRPC service. - * - * XXX:This should give the same result during policy registration and - * unregistration, and for all partitions of a service; so the result should not - * depend on temporal service or other properties, that may influence the - * result. - */ -typedef bool (*nrs_pol_desc_compat_t)(const struct ptlrpc_service *svc, - const struct ptlrpc_nrs_pol_desc *desc); - -struct ptlrpc_nrs_pol_conf { - /** - * Human-readable policy name - */ - char nc_name[NRS_POL_NAME_MAX]; - /** - * NRS operations for this policy - */ - const struct ptlrpc_nrs_pol_ops *nc_ops; - /** - * Service compatibility predicate - */ - nrs_pol_desc_compat_t nc_compat; - /** - * Set for policies that support a single ptlrpc service, i.e. ones that - * have \a pd_compat set to nrs_policy_compat_one(). The variable value - * depicts the name of the single service that such policies are - * compatible with. - */ - const char *nc_compat_svc_name; - /** - * Owner module for this policy descriptor; policies registering from a - * different module to the one the NRS framework is held within - * (currently ptlrpc), should set this field to THIS_MODULE. - */ - struct module *nc_owner; - /** - * Policy registration flags; a bitmask of \e nrs_policy_flags - */ - unsigned int nc_flags; -}; - -/** - * NRS policy registering descriptor - * - * Is used to hold a description of a policy that can be passed to NRS core in - * order to register the policy with NRS heads in different PTLRPC services. - */ -struct ptlrpc_nrs_pol_desc { - /** - * Human-readable policy name - */ - char pd_name[NRS_POL_NAME_MAX]; - /** - * Link into nrs_core::nrs_policies - */ - struct list_head pd_list; - /** - * NRS operations for this policy - */ - const struct ptlrpc_nrs_pol_ops *pd_ops; - /** - * Service compatibility predicate - */ - nrs_pol_desc_compat_t pd_compat; - /** - * Set for policies that are compatible with only one PTLRPC service. - * - * \see ptlrpc_nrs_pol_conf::nc_compat_svc_name - */ - const char *pd_compat_svc_name; - /** - * Owner module for this policy descriptor. - * - * We need to hold a reference to the module whenever we might make use - * of any of the module's contents, i.e. - * - If one or more instances of the policy are at a state where they - * might be handling a request, i.e. - * ptlrpc_nrs_pol_state::NRS_POL_STATE_STARTED or - * ptlrpc_nrs_pol_state::NRS_POL_STATE_STOPPING as we will have to - * call into the policy's ptlrpc_nrs_pol_ops() handlers. A reference - * is taken on the module when - * \e ptlrpc_nrs_pol_desc::pd_refs becomes 1, and released when it - * becomes 0, so that we hold only one reference to the module maximum - * at any time. - * - * We do not need to hold a reference to the module, even though we - * might use code and data from the module, in the following cases: - * - During external policy registration, because this should happen in - * the module's init() function, in which case the module is safe from - * removal because a reference is being held on the module by the - * kernel, and iirc kmod (and I guess module-init-tools also) will - * serialize any racing processes properly anyway. - * - During external policy unregistration, because this should happen - * in a module's exit() function, and any attempts to start a policy - * instance would need to take a reference on the module, and this is - * not possible once we have reached the point where the exit() - * handler is called. - * - During service registration and unregistration, as service setup - * and cleanup, and policy registration, unregistration and policy - * instance starting, are serialized by \e nrs_core::nrs_mutex, so - * as long as users adhere to the convention of registering policies - * in init() and unregistering them in module exit() functions, there - * should not be a race between these operations. - * - During any policy-specific lprocfs operations, because a reference - * is held by the kernel on a proc entry that has been entered by a - * syscall, so as long as proc entries are removed during - * unregistration time, then unregistration and lprocfs operations - * will be properly serialized. - */ - struct module *pd_owner; - /** - * Bitmask of \e nrs_policy_flags - */ - unsigned int pd_flags; - /** - * # of references on this descriptor - */ - atomic_t pd_refs; -}; - -/** - * NRS policy state - * - * Policies transition from one state to the other during their lifetime - */ -enum ptlrpc_nrs_pol_state { - /** - * Not a valid policy state. - */ - NRS_POL_STATE_INVALID, - /** - * Policies are at this state either at the start of their life, or - * transition here when the user selects a different policy to act - * as the primary one. - */ - NRS_POL_STATE_STOPPED, - /** - * Policy is progress of stopping - */ - NRS_POL_STATE_STOPPING, - /** - * Policy is in progress of starting - */ - NRS_POL_STATE_STARTING, - /** - * A policy is in this state in two cases: - * - it is the fallback policy, which is always in this state. - * - it has been activated by the user; i.e. it is the primary policy, - */ - NRS_POL_STATE_STARTED, -}; - -/** - * NRS policy information - * - * Used for obtaining information for the status of a policy via lprocfs - */ -struct ptlrpc_nrs_pol_info { - /** - * Policy name - */ - char pi_name[NRS_POL_NAME_MAX]; - /** - * Policy argument - */ - char pi_arg[NRS_POL_ARG_MAX]; - /** - * Current policy state - */ - enum ptlrpc_nrs_pol_state pi_state; - /** - * # RPCs enqueued for later dispatching by the policy - */ - long pi_req_queued; - /** - * # RPCs started for dispatch by the policy - */ - long pi_req_started; - /** - * Is this a fallback policy? - */ - unsigned pi_fallback:1; -}; - -/** - * NRS policy - * - * There is one instance of this for each policy in each NRS head of each - * PTLRPC service partition. - */ -struct ptlrpc_nrs_policy { - /** - * Linkage into the NRS head's list of policies, - * ptlrpc_nrs:nrs_policy_list - */ - struct list_head pol_list; - /** - * Linkage into the NRS head's list of policies with enqueued - * requests ptlrpc_nrs:nrs_policy_queued - */ - struct list_head pol_list_queued; - /** - * Current state of this policy - */ - enum ptlrpc_nrs_pol_state pol_state; - /** - * Bitmask of nrs_policy_flags - */ - unsigned int pol_flags; - /** - * # RPCs enqueued for later dispatching by the policy - */ - long pol_req_queued; - /** - * # RPCs started for dispatch by the policy - */ - long pol_req_started; - /** - * Usage Reference count taken on the policy instance - */ - long pol_ref; - /** - * Human-readable policy argument - */ - char pol_arg[NRS_POL_ARG_MAX]; - /** - * The NRS head this policy has been created at - */ - struct ptlrpc_nrs *pol_nrs; - /** - * Private policy data; varies by policy type - */ - void *pol_private; - /** - * Policy descriptor for this policy instance. - */ - struct ptlrpc_nrs_pol_desc *pol_desc; -}; - -/** - * NRS resource - * - * Resources are embedded into two types of NRS entities: - * - Inside NRS policies, in the policy's private data in - * ptlrpc_nrs_policy::pol_private - * - In objects that act as prime-level scheduling entities in different NRS - * policies; e.g. on a policy that performs round robin or similar order - * scheduling across client NIDs, there would be one NRS resource per unique - * client NID. On a policy which performs round robin scheduling across - * backend filesystem objects, there would be one resource associated with - * each of the backend filesystem objects partaking in the scheduling - * performed by the policy. - * - * NRS resources share a parent-child relationship, in which resources embedded - * in policy instances are the parent entities, with all scheduling entities - * a policy schedules across being the children, thus forming a simple resource - * hierarchy. This hierarchy may be extended with one or more levels in the - * future if the ability to have more than one primary policy is added. - * - * Upon request initialization, references to the then active NRS policies are - * taken and used to later handle the dispatching of the request with one of - * these policies. - * - * \see nrs_resource_get_safe() - * \see ptlrpc_nrs_req_add() - */ -struct ptlrpc_nrs_resource { - /** - * This NRS resource's parent; is NULL for resources embedded in NRS - * policy instances; i.e. those are top-level ones. - */ - struct ptlrpc_nrs_resource *res_parent; - /** - * The policy associated with this resource. - */ - struct ptlrpc_nrs_policy *res_policy; -}; - -enum { - NRS_RES_FALLBACK, - NRS_RES_PRIMARY, - NRS_RES_MAX -}; - -#include <lustre_nrs_fifo.h> - -/** - * NRS request - * - * Instances of this object exist embedded within ptlrpc_request; the main - * purpose of this object is to hold references to the request's resources - * for the lifetime of the request, and to hold properties that policies use - * use for determining the request's scheduling priority. - **/ -struct ptlrpc_nrs_request { - /** - * The request's resource hierarchy. - */ - struct ptlrpc_nrs_resource *nr_res_ptrs[NRS_RES_MAX]; - /** - * Index into ptlrpc_nrs_request::nr_res_ptrs of the resource of the - * policy that was used to enqueue the request. - * - * \see nrs_request_enqueue() - */ - unsigned int nr_res_idx; - unsigned int nr_initialized:1; - unsigned int nr_enqueued:1; - unsigned int nr_started:1; - unsigned int nr_finalized:1; - - /** - * Policy-specific fields, used for determining a request's scheduling - * priority, and other supporting functionality. - */ - union { - /** - * Fields for the FIFO policy - */ - struct nrs_fifo_req fifo; - } nr_u; - /** - * Externally-registering policies may want to use this to allocate - * their own request properties. - */ - void *ext; -}; - -/** @} nrs */ -#endif diff --git a/drivers/staging/lustre/lustre/include/lustre_nrs_fifo.h b/drivers/staging/lustre/lustre/include/lustre_nrs_fifo.h deleted file mode 100644 index b70d97d4acbb..000000000000 --- a/drivers/staging/lustre/lustre/include/lustre_nrs_fifo.h +++ /dev/null @@ -1,71 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License version 2 for more details. - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2014, Intel Corporation. - * - * Copyright 2012 Xyratex Technology Limited - */ -/* - * - * Network Request Scheduler (NRS) First-in First-out (FIFO) policy - * - */ - -#ifndef _LUSTRE_NRS_FIFO_H -#define _LUSTRE_NRS_FIFO_H - -/* \name fifo - * - * FIFO policy - * - * This policy is a logical wrapper around previous, non-NRS functionality. - * It dispatches RPCs in the same order as they arrive from the network. This - * policy is currently used as the fallback policy, and the only enabled policy - * on all NRS heads of all PTLRPC service partitions. - * @{ - */ - -/** - * Private data structure for the FIFO policy - */ -struct nrs_fifo_head { - /** - * Resource object for policy instance. - */ - struct ptlrpc_nrs_resource fh_res; - /** - * List of queued requests. - */ - struct list_head fh_list; - /** - * For debugging purposes. - */ - __u64 fh_sequence; -}; - -struct nrs_fifo_req { - struct list_head fr_list; - __u64 fr_sequence; -}; - -/** @} fifo */ -#endif diff --git a/drivers/staging/lustre/lustre/include/lustre_obdo.h b/drivers/staging/lustre/lustre/include/lustre_obdo.h deleted file mode 100644 index d67dcbb84f18..000000000000 --- a/drivers/staging/lustre/lustre/include/lustre_obdo.h +++ /dev/null @@ -1,55 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2011, 2014, Intel Corporation. - * - * Copyright 2015 Cray Inc, all rights reserved. - * Author: Ben Evans. - * - * Define obdo associated functions - * obdo: OBject Device o... - */ - -#ifndef _LUSTRE_OBDO_H_ -#define _LUSTRE_OBDO_H_ - -#include <uapi/linux/lustre/lustre_idl.h> - -/** - * Create an obdo to send over the wire - */ -void lustre_set_wire_obdo(const struct obd_connect_data *ocd, - struct obdo *wobdo, - const struct obdo *lobdo); - -/** - * Create a local obdo from a wire based odbo - */ -void lustre_get_wire_obdo(const struct obd_connect_data *ocd, - struct obdo *lobdo, - const struct obdo *wobdo); - -#endif diff --git a/drivers/staging/lustre/lustre/include/lustre_patchless_compat.h b/drivers/staging/lustre/lustre/include/lustre_patchless_compat.h deleted file mode 100644 index ce28ed5c1ef8..000000000000 --- a/drivers/staging/lustre/lustre/include/lustre_patchless_compat.h +++ /dev/null @@ -1,67 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2011, 2012, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - */ - -#ifndef LUSTRE_PATCHLESS_COMPAT_H -#define LUSTRE_PATCHLESS_COMPAT_H - -#include <linux/fs.h> - -#include <linux/list.h> -#include <linux/mm.h> -#include <linux/hash.h> - -#define ll_delete_from_page_cache(page) delete_from_page_cache(page) - -static inline void -truncate_complete_page(struct address_space *mapping, struct page *page) -{ - if (page->mapping != mapping) - return; - - if (PagePrivate(page)) - page->mapping->a_ops->invalidatepage(page, 0, PAGE_SIZE); - - cancel_dirty_page(page); - ClearPageMappedToDisk(page); - ll_delete_from_page_cache(page); -} - -#ifndef ATTR_CTIME_SET -/* - * set ATTR_CTIME_SET to a high value to avoid any risk of collision with other - * ATTR_* attributes (see bug 13828) - */ -#define ATTR_CTIME_SET (1 << 28) -#endif - -#endif /* LUSTRE_PATCHLESS_COMPAT_H */ diff --git a/drivers/staging/lustre/lustre/include/lustre_req_layout.h b/drivers/staging/lustre/lustre/include/lustre_req_layout.h deleted file mode 100644 index 213d0a01adcf..000000000000 --- a/drivers/staging/lustre/lustre/include/lustre_req_layout.h +++ /dev/null @@ -1,307 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2011, 2015, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * lustre/include/lustre_req_layout.h - * - * Lustre Metadata Target (mdt) request handler - * - * Author: Nikita Danilov <nikita@clusterfs.com> - */ - -#ifndef _LUSTRE_REQ_LAYOUT_H__ -#define _LUSTRE_REQ_LAYOUT_H__ - -#include <linux/types.h> - -/** \defgroup req_layout req_layout - * - * @{ - */ - -struct req_msg_field; -struct req_format; -struct req_capsule; - -struct ptlrpc_request; - -enum req_location { - RCL_CLIENT, - RCL_SERVER, - RCL_NR -}; - -/* Maximal number of fields (buffers) in a request message. */ -#define REQ_MAX_FIELD_NR 9 - -struct req_capsule { - struct ptlrpc_request *rc_req; - const struct req_format *rc_fmt; - enum req_location rc_loc; - __u32 rc_area[RCL_NR][REQ_MAX_FIELD_NR]; -}; - -void req_capsule_init(struct req_capsule *pill, struct ptlrpc_request *req, - enum req_location location); -void req_capsule_fini(struct req_capsule *pill); - -void req_capsule_set(struct req_capsule *pill, const struct req_format *fmt); -size_t req_capsule_filled_sizes(struct req_capsule *pill, - enum req_location loc); -int req_capsule_server_pack(struct req_capsule *pill); - -void *req_capsule_client_get(struct req_capsule *pill, - const struct req_msg_field *field); -void *req_capsule_client_swab_get(struct req_capsule *pill, - const struct req_msg_field *field, - void *swabber); -void *req_capsule_client_sized_get(struct req_capsule *pill, - const struct req_msg_field *field, - u32 len); -void *req_capsule_server_get(struct req_capsule *pill, - const struct req_msg_field *field); -void *req_capsule_server_sized_get(struct req_capsule *pill, - const struct req_msg_field *field, - u32 len); -void *req_capsule_server_swab_get(struct req_capsule *pill, - const struct req_msg_field *field, - void *swabber); -void *req_capsule_server_sized_swab_get(struct req_capsule *pill, - const struct req_msg_field *field, - u32 len, void *swabber); - -void req_capsule_set_size(struct req_capsule *pill, - const struct req_msg_field *field, - enum req_location loc, u32 size); -u32 req_capsule_get_size(const struct req_capsule *pill, - const struct req_msg_field *field, - enum req_location loc); -u32 req_capsule_msg_size(struct req_capsule *pill, enum req_location loc); -u32 req_capsule_fmt_size(__u32 magic, const struct req_format *fmt, - enum req_location loc); -void req_capsule_extend(struct req_capsule *pill, const struct req_format *fmt); - -int req_capsule_has_field(const struct req_capsule *pill, - const struct req_msg_field *field, - enum req_location loc); -void req_capsule_shrink(struct req_capsule *pill, - const struct req_msg_field *field, - u32 newlen, enum req_location loc); -int req_layout_init(void); -void req_layout_fini(void); - -extern struct req_format RQF_OBD_PING; -extern struct req_format RQF_OBD_SET_INFO; -extern struct req_format RQF_SEC_CTX; -/* MGS req_format */ -extern struct req_format RQF_MGS_TARGET_REG; -extern struct req_format RQF_MGS_SET_INFO; -extern struct req_format RQF_MGS_CONFIG_READ; -/* fid/fld req_format */ -extern struct req_format RQF_SEQ_QUERY; -extern struct req_format RQF_FLD_QUERY; -extern struct req_format RQF_FLD_READ; -/* MDS req_format */ -extern struct req_format RQF_MDS_CONNECT; -extern struct req_format RQF_MDS_DISCONNECT; -extern struct req_format RQF_MDS_STATFS; -extern struct req_format RQF_MDS_GETSTATUS; -extern struct req_format RQF_MDS_SYNC; -extern struct req_format RQF_MDS_GETXATTR; -extern struct req_format RQF_MDS_GETATTR; - -/* - * This is format of direct (non-intent) MDS_GETATTR_NAME request. - */ -extern struct req_format RQF_MDS_GETATTR_NAME; -extern struct req_format RQF_MDS_CLOSE; -extern struct req_format RQF_MDS_INTENT_CLOSE; -extern struct req_format RQF_MDS_CONNECT; -extern struct req_format RQF_MDS_DISCONNECT; -extern struct req_format RQF_MDS_GET_INFO; -extern struct req_format RQF_MDS_READPAGE; -extern struct req_format RQF_MDS_WRITEPAGE; -extern struct req_format RQF_MDS_REINT; -extern struct req_format RQF_MDS_REINT_CREATE; -extern struct req_format RQF_MDS_REINT_CREATE_ACL; -extern struct req_format RQF_MDS_REINT_CREATE_SLAVE; -extern struct req_format RQF_MDS_REINT_CREATE_SYM; -extern struct req_format RQF_MDS_REINT_OPEN; -extern struct req_format RQF_MDS_REINT_UNLINK; -extern struct req_format RQF_MDS_REINT_LINK; -extern struct req_format RQF_MDS_REINT_RENAME; -extern struct req_format RQF_MDS_REINT_SETATTR; -extern struct req_format RQF_MDS_REINT_SETXATTR; -extern struct req_format RQF_MDS_QUOTACTL; -extern struct req_format RQF_MDS_SWAP_LAYOUTS; -extern struct req_format RQF_MDS_REINT_MIGRATE; -/* MDS hsm formats */ -extern struct req_format RQF_MDS_HSM_STATE_GET; -extern struct req_format RQF_MDS_HSM_STATE_SET; -extern struct req_format RQF_MDS_HSM_ACTION; -extern struct req_format RQF_MDS_HSM_PROGRESS; -extern struct req_format RQF_MDS_HSM_CT_REGISTER; -extern struct req_format RQF_MDS_HSM_CT_UNREGISTER; -extern struct req_format RQF_MDS_HSM_REQUEST; -/* OST req_format */ -extern struct req_format RQF_OST_CONNECT; -extern struct req_format RQF_OST_DISCONNECT; -extern struct req_format RQF_OST_QUOTACTL; -extern struct req_format RQF_OST_GETATTR; -extern struct req_format RQF_OST_SETATTR; -extern struct req_format RQF_OST_CREATE; -extern struct req_format RQF_OST_PUNCH; -extern struct req_format RQF_OST_SYNC; -extern struct req_format RQF_OST_DESTROY; -extern struct req_format RQF_OST_BRW_READ; -extern struct req_format RQF_OST_BRW_WRITE; -extern struct req_format RQF_OST_STATFS; -extern struct req_format RQF_OST_SET_GRANT_INFO; -extern struct req_format RQF_OST_GET_INFO; -extern struct req_format RQF_OST_GET_INFO_LAST_ID; -extern struct req_format RQF_OST_GET_INFO_LAST_FID; -extern struct req_format RQF_OST_SET_INFO_LAST_FID; -extern struct req_format RQF_OST_GET_INFO_FIEMAP; - -/* LDLM req_format */ -extern struct req_format RQF_LDLM_ENQUEUE; -extern struct req_format RQF_LDLM_ENQUEUE_LVB; -extern struct req_format RQF_LDLM_CONVERT; -extern struct req_format RQF_LDLM_INTENT; -extern struct req_format RQF_LDLM_INTENT_BASIC; -extern struct req_format RQF_LDLM_INTENT_LAYOUT; -extern struct req_format RQF_LDLM_INTENT_GETATTR; -extern struct req_format RQF_LDLM_INTENT_OPEN; -extern struct req_format RQF_LDLM_INTENT_CREATE; -extern struct req_format RQF_LDLM_INTENT_UNLINK; -extern struct req_format RQF_LDLM_INTENT_GETXATTR; -extern struct req_format RQF_LDLM_CANCEL; -extern struct req_format RQF_LDLM_CALLBACK; -extern struct req_format RQF_LDLM_CP_CALLBACK; -extern struct req_format RQF_LDLM_BL_CALLBACK; -extern struct req_format RQF_LDLM_GL_CALLBACK; -extern struct req_format RQF_LDLM_GL_DESC_CALLBACK; -/* LOG req_format */ -extern struct req_format RQF_LOG_CANCEL; -extern struct req_format RQF_LLOG_ORIGIN_HANDLE_CREATE; -extern struct req_format RQF_LLOG_ORIGIN_HANDLE_DESTROY; -extern struct req_format RQF_LLOG_ORIGIN_HANDLE_NEXT_BLOCK; -extern struct req_format RQF_LLOG_ORIGIN_HANDLE_PREV_BLOCK; -extern struct req_format RQF_LLOG_ORIGIN_HANDLE_READ_HEADER; -extern struct req_format RQF_LLOG_ORIGIN_CONNECT; - -extern struct req_format RQF_CONNECT; - -extern struct req_msg_field RMF_GENERIC_DATA; -extern struct req_msg_field RMF_PTLRPC_BODY; -extern struct req_msg_field RMF_MDT_BODY; -extern struct req_msg_field RMF_MDT_EPOCH; -extern struct req_msg_field RMF_OBD_STATFS; -extern struct req_msg_field RMF_NAME; -extern struct req_msg_field RMF_SYMTGT; -extern struct req_msg_field RMF_TGTUUID; -extern struct req_msg_field RMF_CLUUID; -extern struct req_msg_field RMF_SETINFO_VAL; -extern struct req_msg_field RMF_SETINFO_KEY; -extern struct req_msg_field RMF_GETINFO_VAL; -extern struct req_msg_field RMF_GETINFO_VALLEN; -extern struct req_msg_field RMF_GETINFO_KEY; -extern struct req_msg_field RMF_CLOSE_DATA; - -/* - * connection handle received in MDS_CONNECT request. - */ -extern struct req_msg_field RMF_CONN; -extern struct req_msg_field RMF_CONNECT_DATA; -extern struct req_msg_field RMF_DLM_REQ; -extern struct req_msg_field RMF_DLM_REP; -extern struct req_msg_field RMF_DLM_LVB; -extern struct req_msg_field RMF_DLM_GL_DESC; -extern struct req_msg_field RMF_LDLM_INTENT; -extern struct req_msg_field RMF_LAYOUT_INTENT; -extern struct req_msg_field RMF_MDT_MD; -extern struct req_msg_field RMF_REC_REINT; -extern struct req_msg_field RMF_EADATA; -extern struct req_msg_field RMF_EAVALS; -extern struct req_msg_field RMF_EAVALS_LENS; -extern struct req_msg_field RMF_ACL; -extern struct req_msg_field RMF_LOGCOOKIES; -extern struct req_msg_field RMF_CAPA1; -extern struct req_msg_field RMF_CAPA2; -extern struct req_msg_field RMF_OBD_QUOTACHECK; -extern struct req_msg_field RMF_OBD_QUOTACTL; -extern struct req_msg_field RMF_STRING; -extern struct req_msg_field RMF_SWAP_LAYOUTS; -extern struct req_msg_field RMF_MDS_HSM_PROGRESS; -extern struct req_msg_field RMF_MDS_HSM_REQUEST; -extern struct req_msg_field RMF_MDS_HSM_USER_ITEM; -extern struct req_msg_field RMF_MDS_HSM_ARCHIVE; -extern struct req_msg_field RMF_HSM_USER_STATE; -extern struct req_msg_field RMF_HSM_STATE_SET; -extern struct req_msg_field RMF_MDS_HSM_CURRENT_ACTION; -extern struct req_msg_field RMF_MDS_HSM_REQUEST; - -/* seq-mgr fields */ -extern struct req_msg_field RMF_SEQ_OPC; -extern struct req_msg_field RMF_SEQ_RANGE; -extern struct req_msg_field RMF_FID_SPACE; - -/* FLD fields */ -extern struct req_msg_field RMF_FLD_OPC; -extern struct req_msg_field RMF_FLD_MDFLD; - -extern struct req_msg_field RMF_LLOGD_BODY; -extern struct req_msg_field RMF_LLOG_LOG_HDR; -extern struct req_msg_field RMF_LLOGD_CONN_BODY; - -extern struct req_msg_field RMF_MGS_TARGET_INFO; -extern struct req_msg_field RMF_MGS_SEND_PARAM; - -extern struct req_msg_field RMF_OST_BODY; -extern struct req_msg_field RMF_OBD_IOOBJ; -extern struct req_msg_field RMF_OBD_ID; -extern struct req_msg_field RMF_FID; -extern struct req_msg_field RMF_NIOBUF_REMOTE; -extern struct req_msg_field RMF_RCS; -extern struct req_msg_field RMF_FIEMAP_KEY; -extern struct req_msg_field RMF_FIEMAP_VAL; -extern struct req_msg_field RMF_OST_ID; - -/* MGS config read message format */ -extern struct req_msg_field RMF_MGS_CONFIG_BODY; -extern struct req_msg_field RMF_MGS_CONFIG_RES; - -/* generic uint32 */ -extern struct req_msg_field RMF_U32; - -/** @} req_layout */ - -#endif /* _LUSTRE_REQ_LAYOUT_H__ */ diff --git a/drivers/staging/lustre/lustre/include/lustre_sec.h b/drivers/staging/lustre/lustre/include/lustre_sec.h deleted file mode 100644 index c5cb07acd0da..000000000000 --- a/drivers/staging/lustre/lustre/include/lustre_sec.h +++ /dev/null @@ -1,1070 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2012, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - */ - -#ifndef _LUSTRE_SEC_H_ -#define _LUSTRE_SEC_H_ - -/** \defgroup sptlrpc sptlrpc - * - * @{ - */ - -/* - * to avoid include - */ -struct obd_import; -struct obd_export; -struct ptlrpc_request; -struct ptlrpc_reply_state; -struct ptlrpc_bulk_desc; -struct brw_page; -/* Linux specific */ -struct key; -struct seq_file; -struct lustre_cfg; - -/* - * forward declaration - */ -struct ptlrpc_sec_policy; -struct ptlrpc_sec_cops; -struct ptlrpc_sec_sops; -struct ptlrpc_sec; -struct ptlrpc_svc_ctx; -struct ptlrpc_cli_ctx; -struct ptlrpc_ctx_ops; - -/** - * \addtogroup flavor flavor - * - * RPC flavor is represented by a 32 bits integer. Currently the high 12 bits - * are unused, must be set to 0 for future expansion. - * <pre> - * ------------------------------------------------------------------------ - * | 4b (bulk svc) | 4b (bulk type) | 4b (svc) | 4b (mech) | 4b (policy) | - * ------------------------------------------------------------------------ - * </pre> - * - * @{ - */ - -/* - * flavor constants - */ -enum sptlrpc_policy { - SPTLRPC_POLICY_NULL = 0, - SPTLRPC_POLICY_PLAIN = 1, - SPTLRPC_POLICY_GSS = 2, - SPTLRPC_POLICY_MAX, -}; - -enum sptlrpc_mech_null { - SPTLRPC_MECH_NULL = 0, - SPTLRPC_MECH_NULL_MAX, -}; - -enum sptlrpc_mech_plain { - SPTLRPC_MECH_PLAIN = 0, - SPTLRPC_MECH_PLAIN_MAX, -}; - -enum sptlrpc_mech_gss { - SPTLRPC_MECH_GSS_NULL = 0, - SPTLRPC_MECH_GSS_KRB5 = 1, - SPTLRPC_MECH_GSS_MAX, -}; - -enum sptlrpc_service_type { - SPTLRPC_SVC_NULL = 0, /**< no security */ - SPTLRPC_SVC_AUTH = 1, /**< authentication only */ - SPTLRPC_SVC_INTG = 2, /**< integrity */ - SPTLRPC_SVC_PRIV = 3, /**< privacy */ - SPTLRPC_SVC_MAX, -}; - -enum sptlrpc_bulk_type { - SPTLRPC_BULK_DEFAULT = 0, /**< follow rpc flavor */ - SPTLRPC_BULK_HASH = 1, /**< hash integrity */ - SPTLRPC_BULK_MAX, -}; - -enum sptlrpc_bulk_service { - SPTLRPC_BULK_SVC_NULL = 0, /**< no security */ - SPTLRPC_BULK_SVC_AUTH = 1, /**< authentication only */ - SPTLRPC_BULK_SVC_INTG = 2, /**< integrity */ - SPTLRPC_BULK_SVC_PRIV = 3, /**< privacy */ - SPTLRPC_BULK_SVC_MAX, -}; - -/* - * compose/extract macros - */ -#define FLVR_POLICY_OFFSET (0) -#define FLVR_MECH_OFFSET (4) -#define FLVR_SVC_OFFSET (8) -#define FLVR_BULK_TYPE_OFFSET (12) -#define FLVR_BULK_SVC_OFFSET (16) - -#define MAKE_FLVR(policy, mech, svc, btype, bsvc) \ - (((__u32)(policy) << FLVR_POLICY_OFFSET) | \ - ((__u32)(mech) << FLVR_MECH_OFFSET) | \ - ((__u32)(svc) << FLVR_SVC_OFFSET) | \ - ((__u32)(btype) << FLVR_BULK_TYPE_OFFSET) | \ - ((__u32)(bsvc) << FLVR_BULK_SVC_OFFSET)) - -/* - * extraction - */ -#define SPTLRPC_FLVR_POLICY(flavor) \ - ((((__u32)(flavor)) >> FLVR_POLICY_OFFSET) & 0xF) -#define SPTLRPC_FLVR_MECH(flavor) \ - ((((__u32)(flavor)) >> FLVR_MECH_OFFSET) & 0xF) -#define SPTLRPC_FLVR_SVC(flavor) \ - ((((__u32)(flavor)) >> FLVR_SVC_OFFSET) & 0xF) -#define SPTLRPC_FLVR_BULK_TYPE(flavor) \ - ((((__u32)(flavor)) >> FLVR_BULK_TYPE_OFFSET) & 0xF) -#define SPTLRPC_FLVR_BULK_SVC(flavor) \ - ((((__u32)(flavor)) >> FLVR_BULK_SVC_OFFSET) & 0xF) - -#define SPTLRPC_FLVR_BASE(flavor) \ - ((((__u32)(flavor)) >> FLVR_POLICY_OFFSET) & 0xFFF) -#define SPTLRPC_FLVR_BASE_SUB(flavor) \ - ((((__u32)(flavor)) >> FLVR_MECH_OFFSET) & 0xFF) - -/* - * gss subflavors - */ -#define MAKE_BASE_SUBFLVR(mech, svc) \ - ((__u32)(mech) | \ - ((__u32)(svc) << (FLVR_SVC_OFFSET - FLVR_MECH_OFFSET))) - -#define SPTLRPC_SUBFLVR_KRB5N \ - MAKE_BASE_SUBFLVR(SPTLRPC_MECH_GSS_KRB5, SPTLRPC_SVC_NULL) -#define SPTLRPC_SUBFLVR_KRB5A \ - MAKE_BASE_SUBFLVR(SPTLRPC_MECH_GSS_KRB5, SPTLRPC_SVC_AUTH) -#define SPTLRPC_SUBFLVR_KRB5I \ - MAKE_BASE_SUBFLVR(SPTLRPC_MECH_GSS_KRB5, SPTLRPC_SVC_INTG) -#define SPTLRPC_SUBFLVR_KRB5P \ - MAKE_BASE_SUBFLVR(SPTLRPC_MECH_GSS_KRB5, SPTLRPC_SVC_PRIV) - -/* - * "end user" flavors - */ -#define SPTLRPC_FLVR_NULL \ - MAKE_FLVR(SPTLRPC_POLICY_NULL, \ - SPTLRPC_MECH_NULL, \ - SPTLRPC_SVC_NULL, \ - SPTLRPC_BULK_DEFAULT, \ - SPTLRPC_BULK_SVC_NULL) -#define SPTLRPC_FLVR_PLAIN \ - MAKE_FLVR(SPTLRPC_POLICY_PLAIN, \ - SPTLRPC_MECH_PLAIN, \ - SPTLRPC_SVC_NULL, \ - SPTLRPC_BULK_HASH, \ - SPTLRPC_BULK_SVC_INTG) -#define SPTLRPC_FLVR_KRB5N \ - MAKE_FLVR(SPTLRPC_POLICY_GSS, \ - SPTLRPC_MECH_GSS_KRB5, \ - SPTLRPC_SVC_NULL, \ - SPTLRPC_BULK_DEFAULT, \ - SPTLRPC_BULK_SVC_NULL) -#define SPTLRPC_FLVR_KRB5A \ - MAKE_FLVR(SPTLRPC_POLICY_GSS, \ - SPTLRPC_MECH_GSS_KRB5, \ - SPTLRPC_SVC_AUTH, \ - SPTLRPC_BULK_DEFAULT, \ - SPTLRPC_BULK_SVC_NULL) -#define SPTLRPC_FLVR_KRB5I \ - MAKE_FLVR(SPTLRPC_POLICY_GSS, \ - SPTLRPC_MECH_GSS_KRB5, \ - SPTLRPC_SVC_INTG, \ - SPTLRPC_BULK_DEFAULT, \ - SPTLRPC_BULK_SVC_INTG) -#define SPTLRPC_FLVR_KRB5P \ - MAKE_FLVR(SPTLRPC_POLICY_GSS, \ - SPTLRPC_MECH_GSS_KRB5, \ - SPTLRPC_SVC_PRIV, \ - SPTLRPC_BULK_DEFAULT, \ - SPTLRPC_BULK_SVC_PRIV) - -#define SPTLRPC_FLVR_DEFAULT SPTLRPC_FLVR_NULL - -#define SPTLRPC_FLVR_INVALID ((__u32)0xFFFFFFFF) -#define SPTLRPC_FLVR_ANY ((__u32)0xFFF00000) - -/** - * extract the useful part from wire flavor - */ -#define WIRE_FLVR(wflvr) (((__u32)(wflvr)) & 0x000FFFFF) - -/** @} flavor */ - -static inline void flvr_set_svc(__u32 *flvr, __u32 svc) -{ - LASSERT(svc < SPTLRPC_SVC_MAX); - *flvr = MAKE_FLVR(SPTLRPC_FLVR_POLICY(*flvr), - SPTLRPC_FLVR_MECH(*flvr), - svc, - SPTLRPC_FLVR_BULK_TYPE(*flvr), - SPTLRPC_FLVR_BULK_SVC(*flvr)); -} - -static inline void flvr_set_bulk_svc(__u32 *flvr, __u32 svc) -{ - LASSERT(svc < SPTLRPC_BULK_SVC_MAX); - *flvr = MAKE_FLVR(SPTLRPC_FLVR_POLICY(*flvr), - SPTLRPC_FLVR_MECH(*flvr), - SPTLRPC_FLVR_SVC(*flvr), - SPTLRPC_FLVR_BULK_TYPE(*flvr), - svc); -} - -struct bulk_spec_hash { - __u8 hash_alg; -}; - -/** - * Full description of flavors being used on a ptlrpc connection, include - * both regular RPC and bulk transfer parts. - */ -struct sptlrpc_flavor { - /** - * wire flavor, should be renamed to sf_wire. - */ - __u32 sf_rpc; - /** - * general flags of PTLRPC_SEC_FL_* - */ - __u32 sf_flags; - /** - * rpc flavor specification - */ - union { - /* nothing for now */ - } u_rpc; - /** - * bulk flavor specification - */ - union { - struct bulk_spec_hash hash; - } u_bulk; -}; - -/** - * identify the RPC is generated from what part of Lustre. It's encoded into - * RPC requests and to be checked by ptlrpc service. - */ -enum lustre_sec_part { - LUSTRE_SP_CLI = 0, - LUSTRE_SP_MDT, - LUSTRE_SP_OST, - LUSTRE_SP_MGC, - LUSTRE_SP_MGS, - LUSTRE_SP_ANY = 0xFF -}; - -enum lustre_sec_part sptlrpc_target_sec_part(struct obd_device *obd); - -/** - * A rule specifies a flavor to be used by a ptlrpc connection between - * two Lustre parts. - */ -struct sptlrpc_rule { - __u32 sr_netid; /* LNET network ID */ - __u8 sr_from; /* sec_part */ - __u8 sr_to; /* sec_part */ - __u16 sr_padding; - struct sptlrpc_flavor sr_flvr; -}; - -/** - * A set of rules in memory. - * - * Rules are generated and stored on MGS, and propagated to MDT, OST, - * and client when needed. - */ -struct sptlrpc_rule_set { - int srs_nslot; - int srs_nrule; - struct sptlrpc_rule *srs_rules; -}; - -int sptlrpc_parse_flavor(const char *str, struct sptlrpc_flavor *flvr); -bool sptlrpc_flavor_has_bulk(struct sptlrpc_flavor *flvr); - -static inline void sptlrpc_rule_set_init(struct sptlrpc_rule_set *set) -{ - memset(set, 0, sizeof(*set)); -} - -int sptlrpc_process_config(struct lustre_cfg *lcfg); -void sptlrpc_conf_log_start(const char *logname); -void sptlrpc_conf_log_stop(const char *logname); -void sptlrpc_conf_log_update_begin(const char *logname); -void sptlrpc_conf_log_update_end(const char *logname); -void sptlrpc_conf_client_adapt(struct obd_device *obd); - -/* The maximum length of security payload. 1024 is enough for Kerberos 5, - * and should be enough for other future mechanisms but not sure. - * Only used by pre-allocated request/reply pool. - */ -#define SPTLRPC_MAX_PAYLOAD (1024) - -struct vfs_cred { - u32 vc_uid; - u32 vc_gid; -}; - -struct ptlrpc_ctx_ops { - /** - * To determine whether it's suitable to use the \a ctx for \a vcred. - */ - int (*match)(struct ptlrpc_cli_ctx *ctx, struct vfs_cred *vcred); - - /** - * To bring the \a ctx uptodate. - */ - int (*refresh)(struct ptlrpc_cli_ctx *ctx); - - /** - * Validate the \a ctx. - */ - int (*validate)(struct ptlrpc_cli_ctx *ctx); - - /** - * Force the \a ctx to die. - */ - void (*force_die)(struct ptlrpc_cli_ctx *ctx, int grace); - int (*display)(struct ptlrpc_cli_ctx *ctx, char *buf, int bufsize); - - /** - * Sign the request message using \a ctx. - * - * \pre req->rq_reqmsg point to request message. - * \pre req->rq_reqlen is the request message length. - * \post req->rq_reqbuf point to request message with signature. - * \post req->rq_reqdata_len is set to the final request message size. - * - * \see null_ctx_sign(), plain_ctx_sign(), gss_cli_ctx_sign(). - */ - int (*sign)(struct ptlrpc_cli_ctx *ctx, struct ptlrpc_request *req); - - /** - * Verify the reply message using \a ctx. - * - * \pre req->rq_repdata point to reply message with signature. - * \pre req->rq_repdata_len is the total reply message length. - * \post req->rq_repmsg point to reply message without signature. - * \post req->rq_replen is the reply message length. - * - * \see null_ctx_verify(), plain_ctx_verify(), gss_cli_ctx_verify(). - */ - int (*verify)(struct ptlrpc_cli_ctx *ctx, struct ptlrpc_request *req); - - /** - * Encrypt the request message using \a ctx. - * - * \pre req->rq_reqmsg point to request message in clear text. - * \pre req->rq_reqlen is the request message length. - * \post req->rq_reqbuf point to request message. - * \post req->rq_reqdata_len is set to the final request message size. - * - * \see gss_cli_ctx_seal(). - */ - int (*seal)(struct ptlrpc_cli_ctx *ctx, struct ptlrpc_request *req); - - /** - * Decrypt the reply message using \a ctx. - * - * \pre req->rq_repdata point to encrypted reply message. - * \pre req->rq_repdata_len is the total cipher text length. - * \post req->rq_repmsg point to reply message in clear text. - * \post req->rq_replen is the reply message length in clear text. - * - * \see gss_cli_ctx_unseal(). - */ - int (*unseal)(struct ptlrpc_cli_ctx *ctx, struct ptlrpc_request *req); - - /** - * Wrap bulk request data. This is called before wrapping RPC - * request message. - * - * \pre bulk buffer is descripted by desc->bd_iov and - * desc->bd_iov_count. note for read it's just buffer, no data - * need to be sent; for write it contains data in clear text. - * \post when necessary, ptlrpc_bulk_sec_desc was properly prepared - * (usually inside of RPC request message). - * - encryption: cipher text bulk buffer is descripted by - * desc->bd_enc_iov and desc->bd_iov_count (currently assume iov - * count remains the same). - * - otherwise: bulk buffer is still desc->bd_iov and - * desc->bd_iov_count. - * - * \return 0: success. - * \return -ev: error code. - * - * \see plain_cli_wrap_bulk(), gss_cli_ctx_wrap_bulk(). - */ - int (*wrap_bulk)(struct ptlrpc_cli_ctx *ctx, - struct ptlrpc_request *req, - struct ptlrpc_bulk_desc *desc); - - /** - * Unwrap bulk reply data. This is called after wrapping RPC - * reply message. - * - * \pre bulk buffer is descripted by desc->bd_iov/desc->bd_enc_iov and - * desc->bd_iov_count, according to wrap_bulk(). - * \post final bulk data in clear text is placed in buffer described - * by desc->bd_iov and desc->bd_iov_count. - * \return +ve nob of actual bulk data in clear text. - * \return -ve error code. - * - * \see plain_cli_unwrap_bulk(), gss_cli_ctx_unwrap_bulk(). - */ - int (*unwrap_bulk)(struct ptlrpc_cli_ctx *ctx, - struct ptlrpc_request *req, - struct ptlrpc_bulk_desc *desc); -}; - -#define PTLRPC_CTX_NEW_BIT (0) /* newly created */ -#define PTLRPC_CTX_UPTODATE_BIT (1) /* uptodate */ -#define PTLRPC_CTX_DEAD_BIT (2) /* mark expired gracefully */ -#define PTLRPC_CTX_ERROR_BIT (3) /* fatal error (refresh, etc.) */ -#define PTLRPC_CTX_CACHED_BIT (8) /* in ctx cache (hash etc.) */ -#define PTLRPC_CTX_ETERNAL_BIT (9) /* always valid */ - -#define PTLRPC_CTX_NEW (1 << PTLRPC_CTX_NEW_BIT) -#define PTLRPC_CTX_UPTODATE (1 << PTLRPC_CTX_UPTODATE_BIT) -#define PTLRPC_CTX_DEAD (1 << PTLRPC_CTX_DEAD_BIT) -#define PTLRPC_CTX_ERROR (1 << PTLRPC_CTX_ERROR_BIT) -#define PTLRPC_CTX_CACHED (1 << PTLRPC_CTX_CACHED_BIT) -#define PTLRPC_CTX_ETERNAL (1 << PTLRPC_CTX_ETERNAL_BIT) - -#define PTLRPC_CTX_STATUS_MASK (PTLRPC_CTX_NEW_BIT | \ - PTLRPC_CTX_UPTODATE | \ - PTLRPC_CTX_DEAD | \ - PTLRPC_CTX_ERROR) - -struct ptlrpc_cli_ctx { - struct hlist_node cc_cache; /* linked into ctx cache */ - atomic_t cc_refcount; - struct ptlrpc_sec *cc_sec; - struct ptlrpc_ctx_ops *cc_ops; - unsigned long cc_expire; /* in seconds */ - unsigned int cc_early_expire:1; - unsigned long cc_flags; - struct vfs_cred cc_vcred; - spinlock_t cc_lock; - struct list_head cc_req_list; /* waiting reqs linked here */ - struct list_head cc_gc_chain; /* linked to gc chain */ -}; - -/** - * client side policy operation vector. - */ -struct ptlrpc_sec_cops { - /** - * Given an \a imp, create and initialize a ptlrpc_sec structure. - * \param ctx service context: - * - regular import: \a ctx should be NULL; - * - reverse import: \a ctx is obtained from incoming request. - * \param flavor specify what flavor to use. - * - * When necessary, policy module is responsible for taking reference - * on the import. - * - * \see null_create_sec(), plain_create_sec(), gss_sec_create_kr(). - */ - struct ptlrpc_sec *(*create_sec)(struct obd_import *imp, - struct ptlrpc_svc_ctx *ctx, - struct sptlrpc_flavor *flavor); - - /** - * Destructor of ptlrpc_sec. When called, refcount has been dropped - * to 0 and all contexts has been destroyed. - * - * \see null_destroy_sec(), plain_destroy_sec(), gss_sec_destroy_kr(). - */ - void (*destroy_sec)(struct ptlrpc_sec *sec); - - /** - * Notify that this ptlrpc_sec is going to die. Optionally, policy - * module is supposed to set sec->ps_dying and whatever necessary - * actions. - * - * \see plain_kill_sec(), gss_sec_kill(). - */ - void (*kill_sec)(struct ptlrpc_sec *sec); - - /** - * Given \a vcred, lookup and/or create its context. The policy module - * is supposed to maintain its own context cache. - * XXX currently \a create and \a remove_dead is always 1, perhaps - * should be removed completely. - * - * \see null_lookup_ctx(), plain_lookup_ctx(), gss_sec_lookup_ctx_kr(). - */ - struct ptlrpc_cli_ctx *(*lookup_ctx)(struct ptlrpc_sec *sec, - struct vfs_cred *vcred, - int create, int remove_dead); - - /** - * Called then the reference of \a ctx dropped to 0. The policy module - * is supposed to destroy this context or whatever else according to - * its cache maintenance mechanism. - * - * \param sync if zero, we shouldn't wait for the context being - * destroyed completely. - * - * \see plain_release_ctx(), gss_sec_release_ctx_kr(). - */ - void (*release_ctx)(struct ptlrpc_sec *sec, struct ptlrpc_cli_ctx *ctx, - int sync); - - /** - * Flush the context cache. - * - * \param uid context of which user, -1 means all contexts. - * \param grace if zero, the PTLRPC_CTX_UPTODATE_BIT of affected - * contexts should be cleared immediately. - * \param force if zero, only idle contexts will be flushed. - * - * \see plain_flush_ctx_cache(), gss_sec_flush_ctx_cache_kr(). - */ - int (*flush_ctx_cache)(struct ptlrpc_sec *sec, uid_t uid, - int grace, int force); - - /** - * Called periodically by garbage collector to remove dead contexts - * from cache. - * - * \see gss_sec_gc_ctx_kr(). - */ - void (*gc_ctx)(struct ptlrpc_sec *sec); - - /** - * Given an context \a ctx, install a corresponding reverse service - * context on client side. - * XXX currently it's only used by GSS module, maybe we should remove - * this from general API. - */ - int (*install_rctx)(struct obd_import *imp, struct ptlrpc_sec *sec, - struct ptlrpc_cli_ctx *ctx); - - /** - * To allocate request buffer for \a req. - * - * \pre req->rq_reqmsg == NULL. - * \pre req->rq_reqbuf == NULL, otherwise it must be pre-allocated, - * we are not supposed to free it. - * \post if success, req->rq_reqmsg point to a buffer with size - * at least \a lustre_msg_size. - * - * \see null_alloc_reqbuf(), plain_alloc_reqbuf(), gss_alloc_reqbuf(). - */ - int (*alloc_reqbuf)(struct ptlrpc_sec *sec, struct ptlrpc_request *req, - int lustre_msg_size); - - /** - * To free request buffer for \a req. - * - * \pre req->rq_reqbuf != NULL. - * - * \see null_free_reqbuf(), plain_free_reqbuf(), gss_free_reqbuf(). - */ - void (*free_reqbuf)(struct ptlrpc_sec *sec, struct ptlrpc_request *req); - - /** - * To allocate reply buffer for \a req. - * - * \pre req->rq_repbuf == NULL. - * \post if success, req->rq_repbuf point to a buffer with size - * req->rq_repbuf_len, the size should be large enough to receive - * reply which be transformed from \a lustre_msg_size of clear text. - * - * \see null_alloc_repbuf(), plain_alloc_repbuf(), gss_alloc_repbuf(). - */ - int (*alloc_repbuf)(struct ptlrpc_sec *sec, struct ptlrpc_request *req, - int lustre_msg_size); - - /** - * To free reply buffer for \a req. - * - * \pre req->rq_repbuf != NULL. - * \post req->rq_repbuf == NULL. - * \post req->rq_repbuf_len == 0. - * - * \see null_free_repbuf(), plain_free_repbuf(), gss_free_repbuf(). - */ - void (*free_repbuf)(struct ptlrpc_sec *sec, struct ptlrpc_request *req); - - /** - * To expand the request buffer of \a req, thus the \a segment in - * the request message pointed by req->rq_reqmsg can accommodate - * at least \a newsize of data. - * - * \pre req->rq_reqmsg->lm_buflens[segment] < newsize. - * - * \see null_enlarge_reqbuf(), plain_enlarge_reqbuf(), - * gss_enlarge_reqbuf(). - */ - int (*enlarge_reqbuf)(struct ptlrpc_sec *sec, - struct ptlrpc_request *req, - int segment, int newsize); - /* - * misc - */ - int (*display)(struct ptlrpc_sec *sec, struct seq_file *seq); -}; - -/** - * server side policy operation vector. - */ -struct ptlrpc_sec_sops { - /** - * verify an incoming request. - * - * \pre request message is pointed by req->rq_reqbuf, size is - * req->rq_reqdata_len; and the message has been unpacked to - * host byte order. - * - * \retval SECSVC_OK success, req->rq_reqmsg point to request message - * in clear text, size is req->rq_reqlen; req->rq_svc_ctx is set; - * req->rq_sp_from is decoded from request. - * \retval SECSVC_COMPLETE success, the request has been fully - * processed, and reply message has been prepared; req->rq_sp_from is - * decoded from request. - * \retval SECSVC_DROP failed, this request should be dropped. - * - * \see null_accept(), plain_accept(), gss_svc_accept_kr(). - */ - int (*accept)(struct ptlrpc_request *req); - - /** - * Perform security transformation upon reply message. - * - * \pre reply message is pointed by req->rq_reply_state->rs_msg, size - * is req->rq_replen. - * \post req->rs_repdata_len is the final message size. - * \post req->rq_reply_off is set. - * - * \see null_authorize(), plain_authorize(), gss_svc_authorize(). - */ - int (*authorize)(struct ptlrpc_request *req); - - /** - * Invalidate server context \a ctx. - * - * \see gss_svc_invalidate_ctx(). - */ - void (*invalidate_ctx)(struct ptlrpc_svc_ctx *ctx); - - /** - * Allocate a ptlrpc_reply_state. - * - * \param msgsize size of the reply message in clear text. - * \pre if req->rq_reply_state != NULL, then it's pre-allocated, we - * should simply use it; otherwise we'll responsible for allocating - * a new one. - * \post req->rq_reply_state != NULL; - * \post req->rq_reply_state->rs_msg != NULL; - * - * \see null_alloc_rs(), plain_alloc_rs(), gss_svc_alloc_rs(). - */ - int (*alloc_rs)(struct ptlrpc_request *req, int msgsize); - - /** - * Free a ptlrpc_reply_state. - */ - void (*free_rs)(struct ptlrpc_reply_state *rs); - - /** - * Release the server context \a ctx. - * - * \see gss_svc_free_ctx(). - */ - void (*free_ctx)(struct ptlrpc_svc_ctx *ctx); - - /** - * Install a reverse context based on the server context \a ctx. - * - * \see gss_svc_install_rctx_kr(). - */ - int (*install_rctx)(struct obd_import *imp, struct ptlrpc_svc_ctx *ctx); - - /** - * Prepare buffer for incoming bulk write. - * - * \pre desc->bd_iov and desc->bd_iov_count describes the buffer - * intended to receive the write. - * - * \see gss_svc_prep_bulk(). - */ - int (*prep_bulk)(struct ptlrpc_request *req, - struct ptlrpc_bulk_desc *desc); - - /** - * Unwrap the bulk write data. - * - * \see plain_svc_unwrap_bulk(), gss_svc_unwrap_bulk(). - */ - int (*unwrap_bulk)(struct ptlrpc_request *req, - struct ptlrpc_bulk_desc *desc); - - /** - * Wrap the bulk read data. - * - * \see plain_svc_wrap_bulk(), gss_svc_wrap_bulk(). - */ - int (*wrap_bulk)(struct ptlrpc_request *req, - struct ptlrpc_bulk_desc *desc); -}; - -struct ptlrpc_sec_policy { - struct module *sp_owner; - char *sp_name; - __u16 sp_policy; /* policy number */ - struct ptlrpc_sec_cops *sp_cops; /* client ops */ - struct ptlrpc_sec_sops *sp_sops; /* server ops */ -}; - -#define PTLRPC_SEC_FL_REVERSE 0x0001 /* reverse sec */ -#define PTLRPC_SEC_FL_ROOTONLY 0x0002 /* treat everyone as root */ -#define PTLRPC_SEC_FL_UDESC 0x0004 /* ship udesc */ -#define PTLRPC_SEC_FL_BULK 0x0008 /* intensive bulk i/o expected */ -#define PTLRPC_SEC_FL_PAG 0x0010 /* PAG mode */ - -/** - * The ptlrpc_sec represents the client side ptlrpc security facilities, - * each obd_import (both regular and reverse import) must associate with - * a ptlrpc_sec. - * - * \see sptlrpc_import_sec_adapt(). - */ -struct ptlrpc_sec { - struct ptlrpc_sec_policy *ps_policy; - atomic_t ps_refcount; - /** statistic only */ - atomic_t ps_nctx; - /** unique identifier */ - int ps_id; - struct sptlrpc_flavor ps_flvr; - enum lustre_sec_part ps_part; - /** after set, no more new context will be created */ - unsigned int ps_dying:1; - /** owning import */ - struct obd_import *ps_import; - spinlock_t ps_lock; - - /* - * garbage collection - */ - struct list_head ps_gc_list; - unsigned long ps_gc_interval; /* in seconds */ - time64_t ps_gc_next; /* in seconds */ -}; - -static inline int sec_is_reverse(struct ptlrpc_sec *sec) -{ - return (sec->ps_flvr.sf_flags & PTLRPC_SEC_FL_REVERSE); -} - -static inline int sec_is_rootonly(struct ptlrpc_sec *sec) -{ - return (sec->ps_flvr.sf_flags & PTLRPC_SEC_FL_ROOTONLY); -} - -struct ptlrpc_svc_ctx { - atomic_t sc_refcount; - struct ptlrpc_sec_policy *sc_policy; -}; - -/* - * user identity descriptor - */ -#define LUSTRE_MAX_GROUPS (128) - -struct ptlrpc_user_desc { - __u32 pud_uid; - __u32 pud_gid; - __u32 pud_fsuid; - __u32 pud_fsgid; - __u32 pud_cap; - __u32 pud_ngroups; - __u32 pud_groups[0]; -}; - -/* - * bulk flavors - */ -enum sptlrpc_bulk_hash_alg { - BULK_HASH_ALG_NULL = 0, - BULK_HASH_ALG_ADLER32, - BULK_HASH_ALG_CRC32, - BULK_HASH_ALG_MD5, - BULK_HASH_ALG_SHA1, - BULK_HASH_ALG_SHA256, - BULK_HASH_ALG_SHA384, - BULK_HASH_ALG_SHA512, - BULK_HASH_ALG_MAX -}; - -const char *sptlrpc_get_hash_name(__u8 hash_alg); -__u8 sptlrpc_get_hash_alg(const char *algname); - -enum { - BSD_FL_ERR = 1, -}; - -struct ptlrpc_bulk_sec_desc { - __u8 bsd_version; /* 0 */ - __u8 bsd_type; /* SPTLRPC_BULK_XXX */ - __u8 bsd_svc; /* SPTLRPC_BULK_SVC_XXXX */ - __u8 bsd_flags; /* flags */ - __u32 bsd_nob; /* nob of bulk data */ - __u8 bsd_data[0]; /* policy-specific token */ -}; - -/* - * round size up to next power of 2, for slab allocation. - * @size must be sane (can't overflow after round up) - */ -static inline int size_roundup_power2(int size) -{ - size--; - size |= size >> 1; - size |= size >> 2; - size |= size >> 4; - size |= size >> 8; - size |= size >> 16; - size++; - return size; -} - -/* - * internal support libraries - */ -void _sptlrpc_enlarge_msg_inplace(struct lustre_msg *msg, - int segment, int newsize); - -/* - * security policies - */ -int sptlrpc_register_policy(struct ptlrpc_sec_policy *policy); -int sptlrpc_unregister_policy(struct ptlrpc_sec_policy *policy); - -__u32 sptlrpc_name2flavor_base(const char *name); -const char *sptlrpc_flavor2name_base(__u32 flvr); -char *sptlrpc_flavor2name_bulk(struct sptlrpc_flavor *sf, - char *buf, int bufsize); -char *sptlrpc_flavor2name(struct sptlrpc_flavor *sf, char *buf, int bufsize); - -static inline -struct ptlrpc_sec_policy *sptlrpc_policy_get(struct ptlrpc_sec_policy *policy) -{ - __module_get(policy->sp_owner); - return policy; -} - -static inline -void sptlrpc_policy_put(struct ptlrpc_sec_policy *policy) -{ - module_put(policy->sp_owner); -} - -/* - * client credential - */ -static inline -unsigned long cli_ctx_status(struct ptlrpc_cli_ctx *ctx) -{ - return (ctx->cc_flags & PTLRPC_CTX_STATUS_MASK); -} - -static inline -int cli_ctx_is_ready(struct ptlrpc_cli_ctx *ctx) -{ - return (cli_ctx_status(ctx) == PTLRPC_CTX_UPTODATE); -} - -static inline -int cli_ctx_is_refreshed(struct ptlrpc_cli_ctx *ctx) -{ - return (cli_ctx_status(ctx) != 0); -} - -static inline -int cli_ctx_is_uptodate(struct ptlrpc_cli_ctx *ctx) -{ - return ((ctx->cc_flags & PTLRPC_CTX_UPTODATE) != 0); -} - -static inline -int cli_ctx_is_error(struct ptlrpc_cli_ctx *ctx) -{ - return ((ctx->cc_flags & PTLRPC_CTX_ERROR) != 0); -} - -static inline -int cli_ctx_is_dead(struct ptlrpc_cli_ctx *ctx) -{ - return ((ctx->cc_flags & (PTLRPC_CTX_DEAD | PTLRPC_CTX_ERROR)) != 0); -} - -static inline -int cli_ctx_is_eternal(struct ptlrpc_cli_ctx *ctx) -{ - return ((ctx->cc_flags & PTLRPC_CTX_ETERNAL) != 0); -} - -/* - * sec get/put - */ -void sptlrpc_sec_put(struct ptlrpc_sec *sec); - -/* - * internal apis which only used by policy implementation - */ -int sptlrpc_get_next_secid(void); - -/* - * exported client context api - */ -struct ptlrpc_cli_ctx *sptlrpc_cli_ctx_get(struct ptlrpc_cli_ctx *ctx); -void sptlrpc_cli_ctx_put(struct ptlrpc_cli_ctx *ctx, int sync); - -/* - * exported client context wrap/buffers - */ -int sptlrpc_cli_wrap_request(struct ptlrpc_request *req); -int sptlrpc_cli_unwrap_reply(struct ptlrpc_request *req); -int sptlrpc_cli_alloc_reqbuf(struct ptlrpc_request *req, int msgsize); -void sptlrpc_cli_free_reqbuf(struct ptlrpc_request *req); -int sptlrpc_cli_alloc_repbuf(struct ptlrpc_request *req, int msgsize); -void sptlrpc_cli_free_repbuf(struct ptlrpc_request *req); -int sptlrpc_cli_enlarge_reqbuf(struct ptlrpc_request *req, - int segment, int newsize); -int sptlrpc_cli_unwrap_early_reply(struct ptlrpc_request *req, - struct ptlrpc_request **req_ret); -void sptlrpc_cli_finish_early_reply(struct ptlrpc_request *early_req); - -void sptlrpc_request_out_callback(struct ptlrpc_request *req); - -/* - * exported higher interface of import & request - */ -int sptlrpc_import_sec_adapt(struct obd_import *imp, - struct ptlrpc_svc_ctx *ctx, - struct sptlrpc_flavor *flvr); -struct ptlrpc_sec *sptlrpc_import_sec_ref(struct obd_import *imp); -void sptlrpc_import_sec_put(struct obd_import *imp); - -int sptlrpc_import_check_ctx(struct obd_import *imp); -void sptlrpc_import_flush_root_ctx(struct obd_import *imp); -void sptlrpc_import_flush_my_ctx(struct obd_import *imp); -void sptlrpc_import_flush_all_ctx(struct obd_import *imp); -int sptlrpc_req_get_ctx(struct ptlrpc_request *req); -void sptlrpc_req_put_ctx(struct ptlrpc_request *req, int sync); -int sptlrpc_req_refresh_ctx(struct ptlrpc_request *req, long timeout); -void sptlrpc_req_set_flavor(struct ptlrpc_request *req, int opcode); - -/* gc */ -void sptlrpc_gc_add_sec(struct ptlrpc_sec *sec); -void sptlrpc_gc_del_sec(struct ptlrpc_sec *sec); - -/* misc */ -const char *sec2target_str(struct ptlrpc_sec *sec); -/* - * lprocfs - */ -int sptlrpc_lprocfs_cliobd_attach(struct obd_device *dev); - -/* - * server side - */ -enum secsvc_accept_res { - SECSVC_OK = 0, - SECSVC_COMPLETE, - SECSVC_DROP, -}; - -int sptlrpc_svc_unwrap_request(struct ptlrpc_request *req); -int sptlrpc_svc_alloc_rs(struct ptlrpc_request *req, int msglen); -int sptlrpc_svc_wrap_reply(struct ptlrpc_request *req); -void sptlrpc_svc_free_rs(struct ptlrpc_reply_state *rs); -void sptlrpc_svc_ctx_addref(struct ptlrpc_request *req); -void sptlrpc_svc_ctx_decref(struct ptlrpc_request *req); - -int sptlrpc_target_export_check(struct obd_export *exp, - struct ptlrpc_request *req); - -/* bulk security api */ -void sptlrpc_enc_pool_put_pages(struct ptlrpc_bulk_desc *desc); -int get_free_pages_in_pool(void); -int pool_is_at_full_capacity(void); - -int sptlrpc_cli_wrap_bulk(struct ptlrpc_request *req, - struct ptlrpc_bulk_desc *desc); -int sptlrpc_cli_unwrap_bulk_read(struct ptlrpc_request *req, - struct ptlrpc_bulk_desc *desc, - int nob); -int sptlrpc_cli_unwrap_bulk_write(struct ptlrpc_request *req, - struct ptlrpc_bulk_desc *desc); - -/* bulk helpers (internal use only by policies) */ -int sptlrpc_get_bulk_checksum(struct ptlrpc_bulk_desc *desc, __u8 alg, - void *buf, int buflen); - -int bulk_sec_desc_unpack(struct lustre_msg *msg, int offset, int swabbed); - -/* user descriptor helpers */ -static inline int sptlrpc_user_desc_size(int ngroups) -{ - return sizeof(struct ptlrpc_user_desc) + ngroups * sizeof(__u32); -} - -int sptlrpc_current_user_desc_size(void); -int sptlrpc_pack_user_desc(struct lustre_msg *msg, int offset); -int sptlrpc_unpack_user_desc(struct lustre_msg *req, int offset, int swabbed); - -enum { - LUSTRE_SEC_NONE = 0, - LUSTRE_SEC_REMOTE = 1, - LUSTRE_SEC_SPECIFY = 2, - LUSTRE_SEC_ALL = 3 -}; - -/** @} sptlrpc */ - -#endif /* _LUSTRE_SEC_H_ */ diff --git a/drivers/staging/lustre/lustre/include/lustre_swab.h b/drivers/staging/lustre/lustre/include/lustre_swab.h deleted file mode 100644 index 9d786bbe7f3f..000000000000 --- a/drivers/staging/lustre/lustre/include/lustre_swab.h +++ /dev/null @@ -1,109 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2011, 2014, Intel Corporation. - * - * Copyright 2015 Cray Inc, all rights reserved. - * Author: Ben Evans. - * - * We assume all nodes are either little-endian or big-endian, and we - * always send messages in the sender's native format. The receiver - * detects the message format by checking the 'magic' field of the message - * (see lustre_msg_swabbed() below). - * - * Each wire type has corresponding 'lustre_swab_xxxtypexxx()' routines - * are implemented in ptlrpc/lustre_swab.c. These 'swabbers' convert the - * type from "other" endian, in-place in the message buffer. - * - * A swabber takes a single pointer argument. The caller must already have - * verified that the length of the message buffer >= sizeof (type). - * - * For variable length types, a second 'lustre_swab_v_xxxtypexxx()' routine - * may be defined that swabs just the variable part, after the caller has - * verified that the message buffer is large enough. - */ - -#ifndef _LUSTRE_SWAB_H_ -#define _LUSTRE_SWAB_H_ - -#include <uapi/linux/lustre/lustre_idl.h> - -void lustre_swab_ptlrpc_body(struct ptlrpc_body *pb); -void lustre_swab_connect(struct obd_connect_data *ocd); -void lustre_swab_hsm_user_state(struct hsm_user_state *hus); -void lustre_swab_hsm_state_set(struct hsm_state_set *hss); -void lustre_swab_obd_statfs(struct obd_statfs *os); -void lustre_swab_obd_ioobj(struct obd_ioobj *ioo); -void lustre_swab_niobuf_remote(struct niobuf_remote *nbr); -void lustre_swab_ost_lvb_v1(struct ost_lvb_v1 *lvb); -void lustre_swab_ost_lvb(struct ost_lvb *lvb); -void lustre_swab_obd_quotactl(struct obd_quotactl *q); -void lustre_swab_lquota_lvb(struct lquota_lvb *lvb); -void lustre_swab_generic_32s(__u32 *val); -void lustre_swab_mdt_body(struct mdt_body *b); -void lustre_swab_mdt_ioepoch(struct mdt_ioepoch *b); -void lustre_swab_mdt_rec_setattr(struct mdt_rec_setattr *sa); -void lustre_swab_mdt_rec_reint(struct mdt_rec_reint *rr); -void lustre_swab_lmv_desc(struct lmv_desc *ld); -void lustre_swab_lmv_mds_md(union lmv_mds_md *lmm); -void lustre_swab_lov_desc(struct lov_desc *ld); -void lustre_swab_gl_desc(union ldlm_gl_desc *desc); -void lustre_swab_ldlm_intent(struct ldlm_intent *i); -void lustre_swab_ldlm_request(struct ldlm_request *rq); -void lustre_swab_ldlm_reply(struct ldlm_reply *r); -void lustre_swab_mgs_target_info(struct mgs_target_info *oinfo); -void lustre_swab_mgs_nidtbl_entry(struct mgs_nidtbl_entry *oinfo); -void lustre_swab_mgs_config_body(struct mgs_config_body *body); -void lustre_swab_mgs_config_res(struct mgs_config_res *body); -void lustre_swab_ost_body(struct ost_body *b); -void lustre_swab_ost_last_id(__u64 *id); -void lustre_swab_fiemap(struct fiemap *fiemap); -void lustre_swab_lov_user_md_v1(struct lov_user_md_v1 *lum); -void lustre_swab_lov_user_md_v3(struct lov_user_md_v3 *lum); -void lustre_swab_lov_user_md_objects(struct lov_user_ost_data *lod, - int stripe_count); -void lustre_swab_lov_mds_md(struct lov_mds_md *lmm); -void lustre_swab_lustre_capa(struct lustre_capa *c); -void lustre_swab_lustre_capa_key(struct lustre_capa_key *k); -void lustre_swab_fid2path(struct getinfo_fid2path *gf); -void lustre_swab_layout_intent(struct layout_intent *li); -void lustre_swab_hsm_user_state(struct hsm_user_state *hus); -void lustre_swab_hsm_current_action(struct hsm_current_action *action); -void lustre_swab_hsm_progress_kernel(struct hsm_progress_kernel *hpk); -void lustre_swab_hsm_user_state(struct hsm_user_state *hus); -void lustre_swab_hsm_user_item(struct hsm_user_item *hui); -void lustre_swab_hsm_request(struct hsm_request *hr); -void lustre_swab_swap_layouts(struct mdc_swap_layouts *msl); -void lustre_swab_close_data(struct close_data *data); -void lustre_swab_lmv_user_md(struct lmv_user_md *lum); - -/* Functions for dumping PTLRPC fields */ -void dump_rniobuf(struct niobuf_remote *rnb); -void dump_ioo(struct obd_ioobj *nb); -void dump_ost_body(struct ost_body *ob); -void dump_rcs(__u32 *rc); - -#endif diff --git a/drivers/staging/lustre/lustre/include/obd.h b/drivers/staging/lustre/lustre/include/obd.h deleted file mode 100644 index f1233ca7d337..000000000000 --- a/drivers/staging/lustre/lustre/include/obd.h +++ /dev/null @@ -1,1101 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2011, 2015, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - */ - -#ifndef __OBD_H -#define __OBD_H - -#include <linux/spinlock.h> - -#include <uapi/linux/lustre/lustre_idl.h> -#include <lustre_lib.h> -#include <lu_ref.h> -#include <lustre_export.h> -#include <lustre_fid.h> -#include <lustre_fld.h> -#include <lustre_handles.h> -#include <lustre_intent.h> -#include <cl_object.h> - -#define MAX_OBD_DEVICES 8192 - -struct osc_async_rc { - int ar_rc; - int ar_force_sync; - __u64 ar_min_xid; -}; - -struct lov_oinfo { /* per-stripe data structure */ - struct ost_id loi_oi; /* object ID/Sequence on the target OST */ - int loi_ost_idx; /* OST stripe index in lov_tgt_desc->tgts */ - int loi_ost_gen; /* generation of this loi_ost_idx */ - - unsigned long loi_kms_valid:1; - __u64 loi_kms; /* known minimum size */ - struct ost_lvb loi_lvb; - struct osc_async_rc loi_ar; -}; - -static inline void loi_kms_set(struct lov_oinfo *oinfo, __u64 kms) -{ - oinfo->loi_kms = kms; - oinfo->loi_kms_valid = 1; -} - -static inline void loi_init(struct lov_oinfo *loi) -{ -} - -struct lov_stripe_md; -struct obd_info; - -int lov_read_and_clear_async_rc(struct cl_object *clob); - -typedef int (*obd_enqueue_update_f)(void *cookie, int rc); - -/* obd info for a particular level (lov, osc). */ -struct obd_info { - /* OBD_STATFS_* flags */ - __u64 oi_flags; - /* lsm data specific for every OSC. */ - struct lov_stripe_md *oi_md; - /* statfs data specific for every OSC, if needed at all. */ - struct obd_statfs *oi_osfs; - /* An update callback which is called to update some data on upper - * level. E.g. it is used for update lsm->lsm_oinfo at every received - * request in osc level for enqueue requests. It is also possible to - * update some caller data from LOV layer if needed. - */ - obd_enqueue_update_f oi_cb_up; -}; - -struct obd_type { - struct list_head typ_chain; - struct obd_ops *typ_dt_ops; - struct md_ops *typ_md_ops; - struct dentry *typ_debugfs_entry; - char *typ_name; - int typ_refcnt; - struct lu_device_type *typ_lu; - spinlock_t obd_type_lock; - struct kobject *typ_kobj; -}; - -struct brw_page { - u64 off; - struct page *pg; - unsigned int count; - u32 flag; -}; - -struct timeout_item { - enum timeout_event ti_event; - unsigned long ti_timeout; - timeout_cb_t ti_cb; - void *ti_cb_data; - struct list_head ti_obd_list; - struct list_head ti_chain; -}; - -#define OBD_MAX_RIF_DEFAULT 8 -#define OBD_MAX_RIF_MAX 512 -#define OSC_MAX_RIF_MAX 256 -#define OSC_MAX_DIRTY_DEFAULT (OBD_MAX_RIF_DEFAULT * 4) -#define OSC_MAX_DIRTY_MB_MAX 2048 /* arbitrary, but < MAX_LONG bytes */ -#define OSC_DEFAULT_RESENDS 10 - -/* possible values for fo_sync_lock_cancel */ -enum { - NEVER_SYNC_ON_CANCEL = 0, - BLOCKING_SYNC_ON_CANCEL = 1, - ALWAYS_SYNC_ON_CANCEL = 2, - NUM_SYNC_ON_CANCEL_STATES -}; - -enum obd_cl_sem_lock_class { - OBD_CLI_SEM_NORMAL, - OBD_CLI_SEM_MGC, - OBD_CLI_SEM_MDCOSC, -}; - -/* - * Limit reply buffer size for striping data to one x86_64 page. This - * value is chosen to fit the striping data for common use cases while - * staying well below the limit at which the buffer must be backed by - * vmalloc(). Excessive use of vmalloc() may cause spinlock contention - * on the MDS. - */ -#define OBD_MAX_DEFAULT_EA_SIZE 4096 - -struct mdc_rpc_lock; -struct obd_import; -struct client_obd { - struct rw_semaphore cl_sem; - struct obd_uuid cl_target_uuid; - struct obd_import *cl_import; /* ptlrpc connection state */ - size_t cl_conn_count; - /* - * Cache maximum and default values for easize. This is - * strictly a performance optimization to minimize calls to - * obd_size_diskmd(). The default values are used to calculate the - * initial size of a request buffer. The ptlrpc layer will resize the - * buffer as needed to accommodate a larger reply from the - * server. The default values should be small enough to avoid wasted - * memory and excessive use of vmalloc(), yet large enough to avoid - * reallocating the buffer in the common use case. - */ - /* - * Default EA size for striping attributes. It is initialized at - * mount-time based on the default stripe width of the filesystem, - * then it tracks the largest observed EA size advertised by - * the MDT, up to a maximum value of OBD_MAX_DEFAULT_EA_SIZE. - */ - u32 cl_default_mds_easize; - /* Maximum possible EA size computed at mount-time based on - * the number of OSTs in the filesystem. May be increased at - * run-time if a larger observed size is advertised by the MDT. - */ - u32 cl_max_mds_easize; - - enum lustre_sec_part cl_sp_me; - enum lustre_sec_part cl_sp_to; - struct sptlrpc_flavor cl_flvr_mgc; /* fixed flavor of mgc->mgs */ - - /* the grant values are protected by loi_list_lock below */ - unsigned long cl_dirty_pages; /* all _dirty_ in pages */ - unsigned long cl_dirty_max_pages; /* allowed w/o rpc */ - unsigned long cl_dirty_transit; /* dirty synchronous */ - unsigned long cl_avail_grant; /* bytes of credit for ost */ - unsigned long cl_lost_grant; /* lost credits (trunc) */ - - /* since we allocate grant by blocks, we don't know how many grant will - * be used to add a page into cache. As a solution, we reserve maximum - * grant before trying to dirty a page and unreserve the rest. - * See osc_{reserve|unreserve}_grant for details. - */ - long cl_reserved_grant; - struct list_head cl_cache_waiters; /* waiting for cache/grant */ - unsigned long cl_next_shrink_grant; /* jiffies */ - struct list_head cl_grant_shrink_list; /* Timeout event list */ - int cl_grant_shrink_interval; /* seconds */ - - /* A chunk is an optimal size used by osc_extent to determine - * the extent size. A chunk is max(PAGE_SIZE, OST block size) - */ - int cl_chunkbits; - unsigned int cl_extent_tax; /* extent overhead, by bytes */ - - /* keep track of objects that have lois that contain pages which - * have been queued for async brw. this lock also protects the - * lists of osc_client_pages that hang off of the loi - */ - /* - * ->cl_loi_list_lock protects consistency of - * ->cl_loi_{ready,read,write}_list. ->ap_make_ready() and - * ->ap_completion() call-backs are executed under this lock. As we - * cannot guarantee that these call-backs never block on all platforms - * (as a matter of fact they do block on Mac OS X), type of - * ->cl_loi_list_lock is platform dependent: it's a spin-lock on Linux - * and blocking mutex on Mac OS X. (Alternative is to make this lock - * blocking everywhere, but we don't want to slow down fast-path of - * our main platform.) - * - * NB by Jinshan: though field names are still _loi_, but actually - * osc_object{}s are in the list. - */ - spinlock_t cl_loi_list_lock; - struct list_head cl_loi_ready_list; - struct list_head cl_loi_hp_ready_list; - struct list_head cl_loi_write_list; - struct list_head cl_loi_read_list; - __u32 cl_r_in_flight; - __u32 cl_w_in_flight; - /* just a sum of the loi/lop pending numbers to be exported by sysfs */ - atomic_t cl_pending_w_pages; - atomic_t cl_pending_r_pages; - __u32 cl_max_pages_per_rpc; - __u32 cl_max_rpcs_in_flight; - struct obd_histogram cl_read_rpc_hist; - struct obd_histogram cl_write_rpc_hist; - struct obd_histogram cl_read_page_hist; - struct obd_histogram cl_write_page_hist; - struct obd_histogram cl_read_offset_hist; - struct obd_histogram cl_write_offset_hist; - - /* LRU for osc caching pages */ - struct cl_client_cache *cl_cache; - /** member of cl_cache->ccc_lru */ - struct list_head cl_lru_osc; - /** # of available LRU slots left in the per-OSC cache. - * Available LRU slots are shared by all OSCs of the same file system, - * therefore this is a pointer to cl_client_cache::ccc_lru_left. - */ - atomic_long_t *cl_lru_left; - /** # of busy LRU pages. A page is considered busy if it's in writeback - * queue, or in transfer. Busy pages can't be discarded so they are not - * in LRU cache. - */ - atomic_long_t cl_lru_busy; - /** # of LRU pages in the cache for this client_obd */ - atomic_long_t cl_lru_in_list; - /** # of threads are shrinking LRU cache. To avoid contention, it's not - * allowed to have multiple threads shrinking LRU cache. - */ - atomic_t cl_lru_shrinkers; - /** The time when this LRU cache was last used. */ - time64_t cl_lru_last_used; - /** stats: how many reclaims have happened for this client_obd. - * reclaim and shrink - shrink is async, voluntarily rebalancing; - * reclaim is sync, initiated by IO thread when the LRU slots are - * in shortage. - */ - u64 cl_lru_reclaim; - /** List of LRU pages for this client_obd */ - struct list_head cl_lru_list; - /** Lock for LRU page list */ - spinlock_t cl_lru_list_lock; - /** # of unstable pages in this client_obd. - * An unstable page is a page state that WRITE RPC has finished but - * the transaction has NOT yet committed. - */ - atomic_long_t cl_unstable_count; - /** Link to osc_shrinker_list */ - struct list_head cl_shrink_list; - - /* number of in flight destroy rpcs is limited to max_rpcs_in_flight */ - atomic_t cl_destroy_in_flight; - wait_queue_head_t cl_destroy_waitq; - - struct mdc_rpc_lock *cl_rpc_lock; - - /* modify rpcs in flight - * currently used for metadata only - */ - spinlock_t cl_mod_rpcs_lock; - u16 cl_max_mod_rpcs_in_flight; - u16 cl_mod_rpcs_in_flight; - u16 cl_close_rpcs_in_flight; - wait_queue_head_t cl_mod_rpcs_waitq; - unsigned long *cl_mod_tag_bitmap; - struct obd_histogram cl_mod_rpcs_hist; - - /* mgc datastruct */ - atomic_t cl_mgc_refcount; - struct obd_export *cl_mgc_mgsexp; - - /* checksumming for data sent over the network */ - unsigned int cl_checksum:1; /* 0 = disabled, 1 = enabled */ - /* supported checksum types that are worked out at connect time */ - __u32 cl_supp_cksum_types; - /* checksum algorithm to be used */ - enum cksum_type cl_cksum_type; - - /* also protected by the poorly named _loi_list_lock lock above */ - struct osc_async_rc cl_ar; - - /* sequence manager */ - struct lu_client_seq *cl_seq; - - atomic_t cl_resends; /* resend count */ - - /* ptlrpc work for writeback in ptlrpcd context */ - void *cl_writeback_work; - void *cl_lru_work; - /* hash tables for osc_quota_info */ - struct cfs_hash *cl_quota_hash[MAXQUOTAS]; -}; - -#define obd2cli_tgt(obd) ((char *)(obd)->u.cli.cl_target_uuid.uuid) - -struct obd_id_info { - __u32 idx; - u64 *data; -}; - -struct echo_client_obd { - struct obd_export *ec_exp; /* the local connection to osc/lov */ - spinlock_t ec_lock; - struct list_head ec_objects; - struct list_head ec_locks; - __u64 ec_unique; -}; - -/* Generic subset of OSTs */ -struct ost_pool { - __u32 *op_array; /* array of index of lov_obd->lov_tgts */ - unsigned int op_count; /* number of OSTs in the array */ - unsigned int op_size; /* allocated size of lp_array */ - struct rw_semaphore op_rw_sem; /* to protect ost_pool use */ -}; - -/* allow statfs data caching for 1 second */ -#define OBD_STATFS_CACHE_SECONDS 1 - -struct lov_tgt_desc { - struct list_head ltd_kill; - struct obd_uuid ltd_uuid; - struct obd_device *ltd_obd; - struct obd_export *ltd_exp; - __u32 ltd_gen; - __u32 ltd_index; /* index in lov_obd->tgts */ - unsigned long ltd_active:1,/* is this target up for requests */ - ltd_activate:1,/* should target be activated */ - ltd_reap:1; /* should this target be deleted */ -}; - -struct lov_obd { - struct lov_desc desc; - struct lov_tgt_desc **lov_tgts; /* sparse array */ - struct ost_pool lov_packed; /* all OSTs in a packed array */ - struct mutex lov_lock; - struct obd_connect_data lov_ocd; - atomic_t lov_refcount; - __u32 lov_death_row;/* tgts scheduled to be deleted */ - __u32 lov_tgt_size; /* size of tgts array */ - int lov_connects; - int lov_pool_count; - struct cfs_hash *lov_pools_hash_body; /* used for key access */ - struct list_head lov_pool_list; /* used for sequential access */ - struct dentry *lov_pool_debugfs_entry; - enum lustre_sec_part lov_sp_me; - - /* Cached LRU and unstable data from upper layer */ - struct cl_client_cache *lov_cache; - - struct rw_semaphore lov_notify_lock; - - struct kobject *lov_tgts_kobj; -}; - -struct lmv_tgt_desc { - struct obd_uuid ltd_uuid; - struct obd_export *ltd_exp; - u32 ltd_idx; - struct mutex ltd_fid_mutex; - unsigned long ltd_active:1; /* target up for requests */ -}; - -struct lmv_obd { - struct lu_client_fld lmv_fld; - spinlock_t lmv_lock; - struct lmv_desc desc; - struct obd_uuid cluuid; - - struct mutex lmv_init_mutex; - int connected; - int max_easize; - int max_def_easize; - - u32 tgts_size; /* size of tgts array */ - struct lmv_tgt_desc **tgts; - - struct obd_connect_data conn_data; - struct kobject *lmv_tgts_kobj; -}; - -struct niobuf_local { - __u64 lnb_file_offset; - __u32 lnb_page_offset; - __u32 lnb_len; - __u32 lnb_flags; - int lnb_rc; - struct page *lnb_page; - void *lnb_data; -}; - -#define LUSTRE_FLD_NAME "fld" -#define LUSTRE_SEQ_NAME "seq" - -#define LUSTRE_MDD_NAME "mdd" -#define LUSTRE_OSD_LDISKFS_NAME "osd-ldiskfs" -#define LUSTRE_OSD_ZFS_NAME "osd-zfs" -#define LUSTRE_VVP_NAME "vvp" -#define LUSTRE_LMV_NAME "lmv" -#define LUSTRE_SLP_NAME "slp" -#define LUSTRE_LOD_NAME "lod" -#define LUSTRE_OSP_NAME "osp" -#define LUSTRE_LWP_NAME "lwp" - -/* obd device type names */ - /* FIXME all the references to LUSTRE_MDS_NAME should be swapped with LUSTRE_MDT_NAME */ -#define LUSTRE_MDS_NAME "mds" -#define LUSTRE_MDT_NAME "mdt" -#define LUSTRE_MDC_NAME "mdc" -#define LUSTRE_OSS_NAME "ost" /* FIXME change name to oss */ -#define LUSTRE_OST_NAME "obdfilter" /* FIXME change name to ost */ -#define LUSTRE_OSC_NAME "osc" -#define LUSTRE_LOV_NAME "lov" -#define LUSTRE_MGS_NAME "mgs" -#define LUSTRE_MGC_NAME "mgc" - -#define LUSTRE_ECHO_NAME "obdecho" -#define LUSTRE_ECHO_CLIENT_NAME "echo_client" -#define LUSTRE_QMT_NAME "qmt" - -/* Constant obd names (post-rename) */ -#define LUSTRE_MDS_OBDNAME "MDS" -#define LUSTRE_OSS_OBDNAME "OSS" -#define LUSTRE_MGS_OBDNAME "MGS" -#define LUSTRE_MGC_OBDNAME "MGC" - -/* Don't conflict with on-wire flags OBD_BRW_WRITE, etc */ -#define N_LOCAL_TEMP_PAGE 0x10000000 - -/* - * Events signalled through obd_notify() upcall-chain. - */ -enum obd_notify_event { - /* Device connect start */ - OBD_NOTIFY_CONNECT, - /* Device activated */ - OBD_NOTIFY_ACTIVE, - /* Device deactivated */ - OBD_NOTIFY_INACTIVE, - /* Connect data for import were changed */ - OBD_NOTIFY_OCD, - /* Sync request */ - OBD_NOTIFY_SYNC_NONBLOCK, - OBD_NOTIFY_SYNC, - /* Configuration event */ - OBD_NOTIFY_CONFIG, - /* Administratively deactivate/activate event */ - OBD_NOTIFY_DEACTIVATE, - OBD_NOTIFY_ACTIVATE -}; - -/* - * Data structure used to pass obd_notify()-event to non-obd listeners (llite - * being main example). - */ -struct obd_notify_upcall { - int (*onu_upcall)(struct obd_device *host, struct obd_device *watched, - enum obd_notify_event ev, void *owner, void *data); - /* Opaque datum supplied by upper layer listener */ - void *onu_owner; -}; - -struct target_recovery_data { - svc_handler_t trd_recovery_handler; - pid_t trd_processing_task; - struct completion trd_starting; - struct completion trd_finishing; -}; - -struct obd_llog_group { - struct llog_ctxt *olg_ctxts[LLOG_MAX_CTXTS]; - wait_queue_head_t olg_waitq; - spinlock_t olg_lock; - struct mutex olg_cat_processing; -}; - -/* corresponds to one of the obd's */ -#define OBD_DEVICE_MAGIC 0XAB5CD6EF - -struct lvfs_run_ctxt { - struct dt_device *dt; -}; - -struct obd_device { - struct obd_type *obd_type; - u32 obd_magic; /* OBD_DEVICE_MAGIC */ - int obd_minor; /* device number: lctl dl */ - struct lu_device *obd_lu_dev; - - /* common and UUID name of this device */ - struct obd_uuid obd_uuid; - char obd_name[MAX_OBD_NAME]; - - /* bitfield modification is protected by obd_dev_lock */ - unsigned long obd_attached:1, /* finished attach */ - obd_set_up:1, /* finished setup */ - obd_version_recov:1, /* obd uses version checking */ - obd_replayable:1,/* recovery is enabled; inform clients */ - obd_no_transno:1, /* no committed-transno notification */ - obd_no_recov:1, /* fail instead of retry messages */ - obd_stopping:1, /* started cleanup */ - obd_starting:1, /* started setup */ - obd_force:1, /* cleanup with > 0 obd refcount */ - obd_fail:1, /* cleanup with failover */ - obd_no_conn:1, /* deny new connections */ - obd_inactive:1, /* device active/inactive - * (for sysfs status only!!) - */ - obd_no_ir:1, /* no imperative recovery. */ - obd_process_conf:1; /* device is processing mgs config */ - /* use separate field as it is set in interrupt to don't mess with - * protection of other bits using _bh lock - */ - unsigned long obd_recovery_expired:1; - /* uuid-export hash body */ - struct cfs_hash *obd_uuid_hash; - wait_queue_head_t obd_refcount_waitq; - struct list_head obd_exports; - struct list_head obd_unlinked_exports; - struct list_head obd_delayed_exports; - atomic_t obd_refcount; - int obd_num_exports; - spinlock_t obd_nid_lock; - struct ldlm_namespace *obd_namespace; - struct ptlrpc_client obd_ldlm_client; /* XXX OST/MDS only */ - /* a spinlock is OK for what we do now, may need a semaphore later */ - spinlock_t obd_dev_lock; /* protect OBD bitfield above */ - spinlock_t obd_osfs_lock; - struct obd_statfs obd_osfs; /* locked by obd_osfs_lock */ - __u64 obd_osfs_age; - u64 obd_last_committed; - struct mutex obd_dev_mutex; - struct lvfs_run_ctxt obd_lvfs_ctxt; - struct obd_llog_group obd_olg; /* default llog group */ - struct obd_device *obd_observer; - struct rw_semaphore obd_observer_link_sem; - struct obd_notify_upcall obd_upcall; - struct obd_export *obd_self_export; - - union { - struct client_obd cli; - struct echo_client_obd echo_client; - struct lov_obd lov; - struct lmv_obd lmv; - } u; - - /* Fields used by LProcFS */ - struct lprocfs_stats *obd_stats; - unsigned int obd_cntr_base; - - struct lprocfs_stats *md_stats; - unsigned int md_cntr_base; - - struct dentry *obd_debugfs_entry; - struct dentry *obd_svc_debugfs_entry; - struct lprocfs_stats *obd_svc_stats; - atomic_t obd_evict_inprogress; - wait_queue_head_t obd_evict_inprogress_waitq; - struct list_head obd_evict_list; /* protected with pet_lock */ - - /** - * Ldlm pool part. Save last calculated SLV and Limit. - */ - rwlock_t obd_pool_lock; - u64 obd_pool_slv; - int obd_pool_limit; - - int obd_conn_inprogress; - - /** - * A list of outstanding class_incref()'s against this obd. For - * debugging. - */ - struct lu_ref obd_reference; - - struct kobject obd_kobj; /* sysfs object */ - struct completion obd_kobj_unregister; -}; - -/* get/set_info keys */ -#define KEY_ASYNC "async" -#define KEY_CHANGELOG_CLEAR "changelog_clear" -#define KEY_FID2PATH "fid2path" -#define KEY_CHECKSUM "checksum" -#define KEY_CLEAR_FS "clear_fs" -#define KEY_CONN_DATA "conn_data" -#define KEY_EVICT_BY_NID "evict_by_nid" -#define KEY_FIEMAP "fiemap" -#define KEY_FLUSH_CTX "flush_ctx" -#define KEY_GRANT_SHRINK "grant_shrink" -#define KEY_HSM_COPYTOOL_SEND "hsm_send" -#define KEY_INIT_RECOV_BACKUP "init_recov_bk" -#define KEY_INTERMDS "inter_mds" -#define KEY_LAST_ID "last_id" -#define KEY_LAST_FID "last_fid" -#define KEY_MAX_EASIZE "max_easize" -#define KEY_DEFAULT_EASIZE "default_easize" -#define KEY_MGSSEC "mgssec" -#define KEY_READ_ONLY "read-only" -#define KEY_REGISTER_TARGET "register_target" -#define KEY_SET_FS "set_fs" -#define KEY_TGT_COUNT "tgt_count" -/* KEY_SET_INFO in lustre_idl.h */ -#define KEY_SPTLRPC_CONF "sptlrpc_conf" - -#define KEY_CACHE_SET "cache_set" -#define KEY_CACHE_LRU_SHRINK "cache_lru_shrink" - -struct lu_context; - -static inline int it_to_lock_mode(struct lookup_intent *it) -{ - /* CREAT needs to be tested before open (both could be set) */ - if (it->it_op & IT_CREAT) - return LCK_CW; - else if (it->it_op & (IT_GETATTR | IT_OPEN | IT_LOOKUP | - IT_LAYOUT)) - return LCK_CR; - else if (it->it_op & IT_READDIR) - return LCK_PR; - else if (it->it_op & IT_GETXATTR) - return LCK_PR; - else if (it->it_op & IT_SETXATTR) - return LCK_PW; - - LASSERTF(0, "Invalid it_op: %d\n", it->it_op); - return -EINVAL; -} - -enum md_op_flags { - MF_MDC_CANCEL_FID1 = BIT(0), - MF_MDC_CANCEL_FID2 = BIT(1), - MF_MDC_CANCEL_FID3 = BIT(2), - MF_MDC_CANCEL_FID4 = BIT(3), - MF_GET_MDT_IDX = BIT(4), -}; - -enum md_cli_flags { - CLI_SET_MEA = BIT(0), - CLI_RM_ENTRY = BIT(1), - CLI_HASH64 = BIT(2), - CLI_API32 = BIT(3), - CLI_MIGRATE = BIT(4), -}; - -struct md_op_data { - struct lu_fid op_fid1; /* operation fid1 (usually parent) */ - struct lu_fid op_fid2; /* operation fid2 (usually child) */ - struct lu_fid op_fid3; /* 2 extra fids to find conflicting */ - struct lu_fid op_fid4; /* to the operation locks. */ - u32 op_mds; /* what mds server open will go to */ - struct lustre_handle op_handle; - s64 op_mod_time; - const char *op_name; - size_t op_namelen; - __u32 op_mode; - struct lmv_stripe_md *op_mea1; - struct lmv_stripe_md *op_mea2; - __u32 op_suppgids[2]; - __u32 op_fsuid; - __u32 op_fsgid; - cfs_cap_t op_cap; - void *op_data; - size_t op_data_size; - - /* iattr fields and blocks. */ - struct iattr op_attr; - unsigned int op_attr_flags; - __u64 op_valid; - loff_t op_attr_blocks; - - __u32 op_flags; - - /* Various operation flags. */ - enum mds_op_bias op_bias; - - /* Used by readdir */ - __u64 op_offset; - - /* Used by readdir */ - __u32 op_max_pages; - - /* used to transfer info between the stacks of MD client - * see enum op_cli_flags - */ - enum md_cli_flags op_cli_flags; - - /* File object data version for HSM release, on client */ - __u64 op_data_version; - struct lustre_handle op_lease_handle; - - /* default stripe offset */ - __u32 op_default_stripe_offset; -}; - -struct md_callback { - int (*md_blocking_ast)(struct ldlm_lock *lock, - struct ldlm_lock_desc *desc, - void *data, int flag); -}; - -struct md_enqueue_info; -/* metadata stat-ahead */ - -struct md_enqueue_info { - struct md_op_data mi_data; - struct lookup_intent mi_it; - struct lustre_handle mi_lockh; - struct inode *mi_dir; - struct ldlm_enqueue_info mi_einfo; - int (*mi_cb)(struct ptlrpc_request *req, - struct md_enqueue_info *minfo, int rc); - void *mi_cbdata; -}; - -struct obd_ops { - struct module *owner; - int (*iocontrol)(unsigned int cmd, struct obd_export *exp, int len, - void *karg, void __user *uarg); - int (*get_info)(const struct lu_env *env, struct obd_export *, - __u32 keylen, void *key, __u32 *vallen, void *val); - int (*set_info_async)(const struct lu_env *, struct obd_export *, - __u32 keylen, void *key, - __u32 vallen, void *val, - struct ptlrpc_request_set *set); - int (*setup)(struct obd_device *dev, struct lustre_cfg *cfg); - int (*precleanup)(struct obd_device *dev); - int (*cleanup)(struct obd_device *dev); - int (*process_config)(struct obd_device *dev, u32 len, void *data); - int (*postrecov)(struct obd_device *dev); - int (*add_conn)(struct obd_import *imp, struct obd_uuid *uuid, - int priority); - int (*del_conn)(struct obd_import *imp, struct obd_uuid *uuid); - /* connect to the target device with given connection - * data. @ocd->ocd_connect_flags is modified to reflect flags actually - * granted by the target, which are guaranteed to be a subset of flags - * asked for. If @ocd == NULL, use default parameters. - */ - int (*connect)(const struct lu_env *env, - struct obd_export **exp, struct obd_device *src, - struct obd_uuid *cluuid, struct obd_connect_data *ocd, - void *localdata); - int (*reconnect)(const struct lu_env *env, - struct obd_export *exp, struct obd_device *src, - struct obd_uuid *cluuid, - struct obd_connect_data *ocd, - void *localdata); - int (*disconnect)(struct obd_export *exp); - - /* Initialize/finalize fids infrastructure. */ - int (*fid_init)(struct obd_device *obd, - struct obd_export *exp, enum lu_cli_type type); - int (*fid_fini)(struct obd_device *obd); - - /* Allocate new fid according to passed @hint. */ - int (*fid_alloc)(const struct lu_env *env, struct obd_export *exp, - struct lu_fid *fid, struct md_op_data *op_data); - - /* - * Object with @fid is getting deleted, we may want to do something - * about this. - */ - int (*statfs)(const struct lu_env *, struct obd_export *exp, - struct obd_statfs *osfs, __u64 max_age, __u32 flags); - int (*statfs_async)(struct obd_export *exp, struct obd_info *oinfo, - __u64 max_age, struct ptlrpc_request_set *set); - int (*create)(const struct lu_env *env, struct obd_export *exp, - struct obdo *oa); - int (*destroy)(const struct lu_env *env, struct obd_export *exp, - struct obdo *oa); - int (*setattr)(const struct lu_env *, struct obd_export *exp, - struct obdo *oa); - int (*getattr)(const struct lu_env *env, struct obd_export *exp, - struct obdo *oa); - int (*preprw)(const struct lu_env *env, int cmd, - struct obd_export *exp, struct obdo *oa, int objcount, - struct obd_ioobj *obj, struct niobuf_remote *remote, - int *nr_pages, struct niobuf_local *local); - int (*commitrw)(const struct lu_env *env, int cmd, - struct obd_export *exp, struct obdo *oa, - int objcount, struct obd_ioobj *obj, - struct niobuf_remote *remote, int pages, - struct niobuf_local *local, int rc); - int (*init_export)(struct obd_export *exp); - int (*destroy_export)(struct obd_export *exp); - - /* metadata-only methods */ - int (*import_event)(struct obd_device *, struct obd_import *, - enum obd_import_event); - - int (*notify)(struct obd_device *obd, struct obd_device *watched, - enum obd_notify_event ev, void *data); - - int (*health_check)(const struct lu_env *env, struct obd_device *); - struct obd_uuid *(*get_uuid)(struct obd_export *exp); - - /* quota methods */ - int (*quotactl)(struct obd_device *, struct obd_export *, - struct obd_quotactl *); - - /* pools methods */ - int (*pool_new)(struct obd_device *obd, char *poolname); - int (*pool_del)(struct obd_device *obd, char *poolname); - int (*pool_add)(struct obd_device *obd, char *poolname, - char *ostname); - int (*pool_rem)(struct obd_device *obd, char *poolname, - char *ostname); - void (*getref)(struct obd_device *obd); - void (*putref)(struct obd_device *obd); - /* - * NOTE: If adding ops, add another LPROCFS_OBD_OP_INIT() line - * to lprocfs_alloc_obd_stats() in obdclass/lprocfs_status.c. - * Also, add a wrapper function in include/linux/obd_class.h. - */ -}; - -/* lmv structures */ -struct lustre_md { - struct mdt_body *body; - struct lu_buf layout; - struct lmv_stripe_md *lmv; -#ifdef CONFIG_FS_POSIX_ACL - struct posix_acl *posix_acl; -#endif - struct mdt_remote_perm *remote_perm; -}; - -struct md_open_data { - struct obd_client_handle *mod_och; - struct ptlrpc_request *mod_open_req; - struct ptlrpc_request *mod_close_req; - atomic_t mod_refcount; - bool mod_is_create; -}; - -struct obd_client_handle { - struct lustre_handle och_fh; - struct lu_fid och_fid; - struct md_open_data *och_mod; - struct lustre_handle och_lease_handle; /* open lock for lease */ - __u32 och_magic; - fmode_t och_flags; -}; - -#define OBD_CLIENT_HANDLE_MAGIC 0xd15ea5ed - -struct lookup_intent; -struct cl_attr; - -struct md_ops { - int (*getstatus)(struct obd_export *, struct lu_fid *); - int (*null_inode)(struct obd_export *, const struct lu_fid *); - int (*close)(struct obd_export *, struct md_op_data *, - struct md_open_data *, struct ptlrpc_request **); - int (*create)(struct obd_export *, struct md_op_data *, - const void *, size_t, umode_t, uid_t, gid_t, - cfs_cap_t, __u64, struct ptlrpc_request **); - int (*enqueue)(struct obd_export *, struct ldlm_enqueue_info *, - const union ldlm_policy_data *, - struct lookup_intent *, struct md_op_data *, - struct lustre_handle *, __u64); - int (*getattr)(struct obd_export *, struct md_op_data *, - struct ptlrpc_request **); - int (*getattr_name)(struct obd_export *, struct md_op_data *, - struct ptlrpc_request **); - int (*intent_lock)(struct obd_export *, struct md_op_data *, - struct lookup_intent *, - struct ptlrpc_request **, - ldlm_blocking_callback, __u64); - int (*link)(struct obd_export *, struct md_op_data *, - struct ptlrpc_request **); - int (*rename)(struct obd_export *, struct md_op_data *, - const char *, size_t, const char *, size_t, - struct ptlrpc_request **); - int (*setattr)(struct obd_export *, struct md_op_data *, void *, - size_t, struct ptlrpc_request **); - int (*sync)(struct obd_export *, const struct lu_fid *, - struct ptlrpc_request **); - int (*read_page)(struct obd_export *, struct md_op_data *, - struct md_callback *cb_op, __u64 hash_offset, - struct page **ppage); - int (*unlink)(struct obd_export *, struct md_op_data *, - struct ptlrpc_request **); - - int (*setxattr)(struct obd_export *, const struct lu_fid *, - u64, const char *, const char *, int, int, int, __u32, - struct ptlrpc_request **); - - int (*getxattr)(struct obd_export *, const struct lu_fid *, - u64, const char *, const char *, int, int, int, - struct ptlrpc_request **); - - int (*init_ea_size)(struct obd_export *, u32, u32); - - int (*get_lustre_md)(struct obd_export *, struct ptlrpc_request *, - struct obd_export *, struct obd_export *, - struct lustre_md *); - - int (*free_lustre_md)(struct obd_export *, struct lustre_md *); - - int (*merge_attr)(struct obd_export *, - const struct lmv_stripe_md *lsm, - struct cl_attr *attr, ldlm_blocking_callback); - - int (*set_open_replay_data)(struct obd_export *, - struct obd_client_handle *, - struct lookup_intent *); - int (*clear_open_replay_data)(struct obd_export *, - struct obd_client_handle *); - int (*set_lock_data)(struct obd_export *, const struct lustre_handle *, - void *, __u64 *); - - enum ldlm_mode (*lock_match)(struct obd_export *, __u64, - const struct lu_fid *, enum ldlm_type, - union ldlm_policy_data *, enum ldlm_mode, - struct lustre_handle *); - - int (*cancel_unused)(struct obd_export *, const struct lu_fid *, - union ldlm_policy_data *, enum ldlm_mode, - enum ldlm_cancel_flags flags, void *opaque); - - int (*get_fid_from_lsm)(struct obd_export *, - const struct lmv_stripe_md *, - const char *name, int namelen, - struct lu_fid *fid); - - int (*intent_getattr_async)(struct obd_export *, - struct md_enqueue_info *); - - int (*revalidate_lock)(struct obd_export *, struct lookup_intent *, - struct lu_fid *, __u64 *bits); - - int (*unpackmd)(struct obd_export *exp, struct lmv_stripe_md **plsm, - const union lmv_mds_md *lmv, size_t lmv_size); - /* - * NOTE: If adding ops, add another LPROCFS_MD_OP_INIT() line to - * lprocfs_alloc_md_stats() in obdclass/lprocfs_status.c. Also, add a - * wrapper function in include/linux/obd_class.h. - */ -}; - -static inline struct md_open_data *obd_mod_alloc(void) -{ - struct md_open_data *mod; - - mod = kzalloc(sizeof(*mod), GFP_NOFS); - if (!mod) - return NULL; - atomic_set(&mod->mod_refcount, 1); - return mod; -} - -#define obd_mod_get(mod) atomic_inc(&(mod)->mod_refcount) -#define obd_mod_put(mod) \ -({ \ - if (atomic_dec_and_test(&(mod)->mod_refcount)) { \ - if ((mod)->mod_open_req) \ - ptlrpc_req_finished((mod)->mod_open_req); \ - kfree(mod); \ - } \ -}) - -void obdo_from_inode(struct obdo *dst, struct inode *src, u32 valid); -void obdo_set_parent_fid(struct obdo *dst, const struct lu_fid *parent); - -/* return 1 if client should be resend request */ -static inline int client_should_resend(int resend, struct client_obd *cli) -{ - return atomic_read(&cli->cl_resends) ? - atomic_read(&cli->cl_resends) > resend : 1; -} - -/** - * Return device name for this device - * - * XXX: lu_device is declared before obd_device, while a pointer pointing - * back to obd_device in lu_device, so this helper function defines here - * instead of in lu_object.h - */ -static inline const char *lu_dev_name(const struct lu_device *lu_dev) -{ - return lu_dev->ld_obd->obd_name; -} - -static inline bool filename_is_volatile(const char *name, size_t namelen, - int *idx) -{ - const char *start; - char *end; - - if (strncmp(name, LUSTRE_VOLATILE_HDR, LUSTRE_VOLATILE_HDR_LEN) != 0) - return false; - - /* caller does not care of idx */ - if (!idx) - return true; - - /* volatile file, the MDT can be set from name */ - /* name format is LUSTRE_VOLATILE_HDR:[idx]: */ - /* if no MDT is specified, use std way */ - if (namelen < LUSTRE_VOLATILE_HDR_LEN + 2) - goto bad_format; - /* test for no MDT idx case */ - if ((*(name + LUSTRE_VOLATILE_HDR_LEN) == ':') && - (*(name + LUSTRE_VOLATILE_HDR_LEN + 1) == ':')) { - *idx = -1; - return true; - } - /* we have an idx, read it */ - start = name + LUSTRE_VOLATILE_HDR_LEN + 1; - *idx = simple_strtoul(start, &end, 0); - /* error cases: - * no digit, no trailing :, negative value - */ - if (((*idx == 0) && (end == start)) || - (*end != ':') || (*idx < 0)) - goto bad_format; - - return true; -bad_format: - /* bad format of mdt idx, we cannot return an error - * to caller so we use hash algo - */ - CERROR("Bad volatile file name format: %s\n", - name + LUSTRE_VOLATILE_HDR_LEN); - return false; -} - -static inline int cli_brw_size(struct obd_device *obd) -{ - return obd->u.cli.cl_max_pages_per_rpc << PAGE_SHIFT; -} - -/* - * when RPC size or the max RPCs in flight is increased, the max dirty pages - * of the client should be increased accordingly to avoid sending fragmented - * RPCs over the network when the client runs out of the maximum dirty space - * when so many RPCs are being generated. - */ -static inline void client_adjust_max_dirty(struct client_obd *cli) -{ - /* initializing */ - if (cli->cl_dirty_max_pages <= 0) - cli->cl_dirty_max_pages = - (OSC_MAX_DIRTY_DEFAULT * 1024 * 1024) >> PAGE_SHIFT; - else { - unsigned long dirty_max = cli->cl_max_rpcs_in_flight * - cli->cl_max_pages_per_rpc; - - if (dirty_max > cli->cl_dirty_max_pages) - cli->cl_dirty_max_pages = dirty_max; - } - - if (cli->cl_dirty_max_pages > totalram_pages / 8) - cli->cl_dirty_max_pages = totalram_pages / 8; -} - -#endif /* __OBD_H */ diff --git a/drivers/staging/lustre/lustre/include/obd_cksum.h b/drivers/staging/lustre/lustre/include/obd_cksum.h deleted file mode 100644 index e5f7bb20415d..000000000000 --- a/drivers/staging/lustre/lustre/include/obd_cksum.h +++ /dev/null @@ -1,153 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - */ - -#ifndef __OBD_CKSUM -#define __OBD_CKSUM -#include <linux/libcfs/libcfs.h> -#include <linux/libcfs/libcfs_crypto.h> -#include <uapi/linux/lustre/lustre_idl.h> - -static inline unsigned char cksum_obd2cfs(enum cksum_type cksum_type) -{ - switch (cksum_type) { - case OBD_CKSUM_CRC32: - return CFS_HASH_ALG_CRC32; - case OBD_CKSUM_ADLER: - return CFS_HASH_ALG_ADLER32; - case OBD_CKSUM_CRC32C: - return CFS_HASH_ALG_CRC32C; - default: - CERROR("Unknown checksum type (%x)!!!\n", cksum_type); - LBUG(); - } - return 0; -} - -/* The OBD_FL_CKSUM_* flags is packed into 5 bits of o_flags, since there can - * only be a single checksum type per RPC. - * - * The OBD_CHECKSUM_* type bits passed in ocd_cksum_types are a 32-bit bitmask - * since they need to represent the full range of checksum algorithms that - * both the client and server can understand. - * - * In case of an unsupported types/flags we fall back to ADLER - * because that is supported by all clients since 1.8 - * - * In case multiple algorithms are supported the best one is used. - */ -static inline u32 cksum_type_pack(enum cksum_type cksum_type) -{ - unsigned int performance = 0, tmp; - u32 flag = OBD_FL_CKSUM_ADLER; - - if (cksum_type & OBD_CKSUM_CRC32) { - tmp = cfs_crypto_hash_speed(cksum_obd2cfs(OBD_CKSUM_CRC32)); - if (tmp > performance) { - performance = tmp; - flag = OBD_FL_CKSUM_CRC32; - } - } - if (cksum_type & OBD_CKSUM_CRC32C) { - tmp = cfs_crypto_hash_speed(cksum_obd2cfs(OBD_CKSUM_CRC32C)); - if (tmp > performance) { - performance = tmp; - flag = OBD_FL_CKSUM_CRC32C; - } - } - if (cksum_type & OBD_CKSUM_ADLER) { - tmp = cfs_crypto_hash_speed(cksum_obd2cfs(OBD_CKSUM_ADLER)); - if (tmp > performance) { - performance = tmp; - flag = OBD_FL_CKSUM_ADLER; - } - } - if (unlikely(cksum_type && !(cksum_type & (OBD_CKSUM_CRC32C | - OBD_CKSUM_CRC32 | - OBD_CKSUM_ADLER)))) - CWARN("unknown cksum type %x\n", cksum_type); - - return flag; -} - -static inline enum cksum_type cksum_type_unpack(u32 o_flags) -{ - switch (o_flags & OBD_FL_CKSUM_ALL) { - case OBD_FL_CKSUM_CRC32C: - return OBD_CKSUM_CRC32C; - case OBD_FL_CKSUM_CRC32: - return OBD_CKSUM_CRC32; - default: - break; - } - - return OBD_CKSUM_ADLER; -} - -/* Return a bitmask of the checksum types supported on this system. - * 1.8 supported ADLER it is base and not depend on hw - * Client uses all available local algos - */ -static inline enum cksum_type cksum_types_supported_client(void) -{ - enum cksum_type ret = OBD_CKSUM_ADLER; - - CDEBUG(D_INFO, "Crypto hash speed: crc %d, crc32c %d, adler %d\n", - cfs_crypto_hash_speed(cksum_obd2cfs(OBD_CKSUM_CRC32)), - cfs_crypto_hash_speed(cksum_obd2cfs(OBD_CKSUM_CRC32C)), - cfs_crypto_hash_speed(cksum_obd2cfs(OBD_CKSUM_ADLER))); - - if (cfs_crypto_hash_speed(cksum_obd2cfs(OBD_CKSUM_CRC32C)) > 0) - ret |= OBD_CKSUM_CRC32C; - if (cfs_crypto_hash_speed(cksum_obd2cfs(OBD_CKSUM_CRC32)) > 0) - ret |= OBD_CKSUM_CRC32; - - return ret; -} - -/* Select the best checksum algorithm among those supplied in the cksum_types - * input. - * - * Currently, calling cksum_type_pack() with a mask will return the fastest - * checksum type due to its benchmarking at libcfs module load. - * Caution is advised, however, since what is fastest on a single client may - * not be the fastest or most efficient algorithm on the server. - */ -static inline enum cksum_type cksum_type_select(enum cksum_type cksum_types) -{ - return cksum_type_unpack(cksum_type_pack(cksum_types)); -} - -/* Checksum algorithm names. Must be defined in the same order as the - * OBD_CKSUM_* flags. - */ -#define DECLARE_CKSUM_NAME char *cksum_name[] = {"crc32", "adler", "crc32c"} - -#endif /* __OBD_H */ diff --git a/drivers/staging/lustre/lustre/include/obd_class.h b/drivers/staging/lustre/lustre/include/obd_class.h deleted file mode 100644 index f24dd74ffa09..000000000000 --- a/drivers/staging/lustre/lustre/include/obd_class.h +++ /dev/null @@ -1,1607 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2011, 2015, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - */ -#ifndef __CLASS_OBD_H -#define __CLASS_OBD_H - -#include <obd_support.h> -#include <lustre_import.h> -#include <lustre_net.h> -#include <obd.h> -#include <lustre_lib.h> -#include <lprocfs_status.h> - -/* requests should be send without delay and resends for avoid deadlocks */ -#define OBD_STATFS_NODELAY 0x0001 -/* the statfs callback should not update obd_osfs_age */ -#define OBD_STATFS_FROM_CACHE 0x0002 -/* the statfs is only for retrieving information from MDT0 */ -#define OBD_STATFS_FOR_MDT0 0x0004 - -/* OBD Device Declarations */ -extern struct obd_device *obd_devs[MAX_OBD_DEVICES]; -extern rwlock_t obd_dev_lock; - -/* OBD Operations Declarations */ -struct obd_device *class_exp2obd(struct obd_export *exp); -int class_handle_ioctl(unsigned int cmd, unsigned long arg); -int lustre_get_jobid(char *jobid); - -struct lu_device_type; - -/* genops.c */ -extern struct list_head obd_types; -struct obd_export *class_conn2export(struct lustre_handle *conn); -int class_register_type(struct obd_ops *dt_ops, struct md_ops *md_ops, - const char *name, struct lu_device_type *ldt); -int class_unregister_type(const char *name); - -struct obd_device *class_newdev(const char *type_name, const char *name); -void class_release_dev(struct obd_device *obd); - -int class_name2dev(const char *name); -struct obd_device *class_name2obd(const char *name); -int class_uuid2dev(struct obd_uuid *uuid); -struct obd_device *class_find_client_obd(struct obd_uuid *tgt_uuid, - const char *typ_name, - struct obd_uuid *grp_uuid); -struct obd_device *class_devices_in_group(struct obd_uuid *grp_uuid, - int *next); -struct obd_device *class_num2obd(int num); - -int class_notify_sptlrpc_conf(const char *fsname, int namelen); - -int obd_connect_flags2str(char *page, int count, __u64 flags, char *sep); - -int obd_zombie_impexp_init(void); -void obd_zombie_impexp_stop(void); -void obd_zombie_barrier(void); - -int obd_get_request_slot(struct client_obd *cli); -void obd_put_request_slot(struct client_obd *cli); -__u32 obd_get_max_rpcs_in_flight(struct client_obd *cli); -int obd_set_max_rpcs_in_flight(struct client_obd *cli, __u32 max); -int obd_set_max_mod_rpcs_in_flight(struct client_obd *cli, u16 max); -int obd_mod_rpc_stats_seq_show(struct client_obd *cli, struct seq_file *seq); - -u16 obd_get_mod_rpc_slot(struct client_obd *cli, u32 opc, - struct lookup_intent *it); -void obd_put_mod_rpc_slot(struct client_obd *cli, u32 opc, - struct lookup_intent *it, u16 tag); - -struct llog_handle; -struct llog_rec_hdr; -typedef int (*llog_cb_t)(const struct lu_env *, struct llog_handle *, - struct llog_rec_hdr *, void *); - -/* obd_config.c */ -char *lustre_cfg_string(struct lustre_cfg *lcfg, u32 index); -int class_process_config(struct lustre_cfg *lcfg); -int class_process_proc_param(char *prefix, struct lprocfs_vars *lvars, - struct lustre_cfg *lcfg, void *data); - -/* For interoperability */ -struct cfg_interop_param { - char *old_param; - char *new_param; -}; - -int class_find_param(char *buf, char *key, char **valp); -struct cfg_interop_param *class_find_old_param(const char *param, - struct cfg_interop_param *ptr); -int class_get_next_param(char **params, char *copy); -int class_parse_nid(char *buf, lnet_nid_t *nid, char **endh); -int class_parse_nid_quiet(char *buf, lnet_nid_t *nid, char **endh); -int class_parse_net(char *buf, u32 *net, char **endh); -int class_match_nid(char *buf, char *key, lnet_nid_t nid); -int class_match_net(char *buf, char *key, u32 net); - -struct obd_device *class_incref(struct obd_device *obd, - const char *scope, const void *source); -void class_decref(struct obd_device *obd, - const char *scope, const void *source); -int class_config_llog_handler(const struct lu_env *env, - struct llog_handle *handle, - struct llog_rec_hdr *rec, void *data); -int class_add_uuid(const char *uuid, __u64 nid); - -/* obdecho */ -void lprocfs_echo_init_vars(struct lprocfs_static_vars *lvars); - -#define CFG_F_START 0x01 /* Set when we start updating from a log */ -#define CFG_F_MARKER 0x02 /* We are within a maker */ -#define CFG_F_SKIP 0x04 /* We should ignore this cfg command */ -#define CFG_F_COMPAT146 0x08 /* Allow old-style logs */ -#define CFG_F_EXCLUDE 0x10 /* OST exclusion list */ - -/* Passed as data param to class_config_parse_llog */ -struct config_llog_instance { - char *cfg_obdname; - void *cfg_instance; - struct super_block *cfg_sb; - struct obd_uuid cfg_uuid; - llog_cb_t cfg_callback; - int cfg_last_idx; /* for partial llog processing */ - int cfg_flags; -}; - -int class_config_parse_llog(const struct lu_env *env, struct llog_ctxt *ctxt, - char *name, struct config_llog_instance *cfg); -enum { - CONFIG_T_CONFIG = 0, - CONFIG_T_SPTLRPC = 1, - CONFIG_T_RECOVER = 2, - CONFIG_T_PARAMS = 3, - CONFIG_T_MAX = 4 -}; - -#define PARAMS_FILENAME "params" -#define LCTL_UPCALL "lctl" - -/* list of active configuration logs */ -struct config_llog_data { - struct ldlm_res_id cld_resid; - struct config_llog_instance cld_cfg; - struct list_head cld_list_chain; - atomic_t cld_refcount; - struct config_llog_data *cld_sptlrpc;/* depended sptlrpc log */ - struct config_llog_data *cld_params; /* common parameters log */ - struct config_llog_data *cld_recover;/* imperative recover log */ - struct obd_export *cld_mgcexp; - struct mutex cld_lock; - int cld_type; - unsigned int cld_stopping:1, /* - * we were told to stop - * watching - */ - cld_lostlock:1; /* lock not requeued */ - char cld_logname[0]; -}; - -struct lustre_profile { - struct list_head lp_list; - char *lp_profile; - char *lp_dt; - char *lp_md; - int lp_refs; - bool lp_list_deleted; -}; - -struct lustre_profile *class_get_profile(const char *prof); -void class_del_profile(const char *prof); -void class_put_profile(struct lustre_profile *lprof); -void class_del_profiles(void); - -#if LUSTRE_TRACKS_LOCK_EXP_REFS - -void __class_export_add_lock_ref(struct obd_export *exp, - struct ldlm_lock *lock); -void __class_export_del_lock_ref(struct obd_export *exp, - struct ldlm_lock *lock); -extern void (*class_export_dump_hook)(struct obd_export *exp); - -#else - -#define __class_export_add_lock_ref(exp, lock) do {} while (0) -#define __class_export_del_lock_ref(exp, lock) do {} while (0) - -#endif - -/* genops.c */ -struct obd_export *class_export_get(struct obd_export *exp); -void class_export_put(struct obd_export *exp); -struct obd_export *class_new_export(struct obd_device *obddev, - struct obd_uuid *cluuid); -void class_unlink_export(struct obd_export *exp); - -struct obd_import *class_import_get(struct obd_import *imp); -void class_import_put(struct obd_import *imp); -struct obd_import *class_new_import(struct obd_device *obd); -void class_destroy_import(struct obd_import *exp); - -void class_put_type(struct obd_type *type); -int class_connect(struct lustre_handle *conn, struct obd_device *obd, - struct obd_uuid *cluuid); -int class_disconnect(struct obd_export *exp); -void class_fail_export(struct obd_export *exp); -int class_manual_cleanup(struct obd_device *obd); - -static inline void class_export_rpc_inc(struct obd_export *exp) -{ - atomic_inc(&(exp)->exp_rpc_count); - CDEBUG(D_INFO, "RPC GETting export %p : new rpc_count %d\n", - (exp), atomic_read(&(exp)->exp_rpc_count)); -} - -static inline void class_export_rpc_dec(struct obd_export *exp) -{ - LASSERT_ATOMIC_POS(&exp->exp_rpc_count); - atomic_dec(&(exp)->exp_rpc_count); - CDEBUG(D_INFO, "RPC PUTting export %p : new rpc_count %d\n", - (exp), atomic_read(&(exp)->exp_rpc_count)); -} - -static inline struct obd_export *class_export_lock_get(struct obd_export *exp, - struct ldlm_lock *lock) -{ - atomic_inc(&(exp)->exp_locks_count); - __class_export_add_lock_ref(exp, lock); - CDEBUG(D_INFO, "lock GETting export %p : new locks_count %d\n", - (exp), atomic_read(&(exp)->exp_locks_count)); - return class_export_get(exp); -} - -static inline void class_export_lock_put(struct obd_export *exp, - struct ldlm_lock *lock) -{ - LASSERT_ATOMIC_POS(&exp->exp_locks_count); - atomic_dec(&(exp)->exp_locks_count); - __class_export_del_lock_ref(exp, lock); - CDEBUG(D_INFO, "lock PUTting export %p : new locks_count %d\n", - (exp), atomic_read(&(exp)->exp_locks_count)); - class_export_put(exp); -} - -static inline enum obd_option exp_flags_from_obd(struct obd_device *obd) -{ - return ((obd->obd_fail ? OBD_OPT_FAILOVER : 0) | - (obd->obd_force ? OBD_OPT_FORCE : 0) | - 0); -} - -static inline int lprocfs_climp_check(struct obd_device *obd) -{ - down_read(&(obd)->u.cli.cl_sem); - if (!(obd)->u.cli.cl_import) { - up_read(&(obd)->u.cli.cl_sem); - return -ENODEV; - } - return 0; -} - -struct inode; -struct lu_attr; -struct obdo; - -void obdo_to_ioobj(const struct obdo *oa, struct obd_ioobj *ioobj); - -#define OBT(dev) ((dev)->obd_type) -#define OBP(dev, op) ((dev)->obd_type->typ_dt_ops->op) -#define MDP(dev, op) ((dev)->obd_type->typ_md_ops->op) -#define CTXTP(ctxt, op) ((ctxt)->loc_logops->lop_##op) - -/* - * Ensure obd_setup: used for cleanup which must be called - * while obd is stopping - */ -static inline int obd_check_dev(struct obd_device *obd) -{ - if (!obd) { - CERROR("NULL device\n"); - return -ENODEV; - } - return 0; -} - -/* ensure obd_setup and !obd_stopping */ -static inline int obd_check_dev_active(struct obd_device *obd) -{ - int rc; - - rc = obd_check_dev(obd); - if (rc) - return rc; - if (!obd->obd_set_up || obd->obd_stopping) { - CERROR("Device %d not setup\n", obd->obd_minor); - return -ENODEV; - } - return rc; -} - -#define OBD_COUNTER_OFFSET(op) \ - ((offsetof(struct obd_ops, op) - \ - offsetof(struct obd_ops, iocontrol)) \ - / sizeof(((struct obd_ops *)(0))->iocontrol)) - -#define OBD_COUNTER_INCREMENT(obdx, op) \ -do { \ - if ((obdx)->obd_stats) { \ - unsigned int coffset; \ - coffset = (unsigned int)((obdx)->obd_cntr_base) + \ - OBD_COUNTER_OFFSET(op); \ - LASSERT(coffset < (obdx)->obd_stats->ls_num); \ - lprocfs_counter_incr((obdx)->obd_stats, coffset); \ - } \ -} while (0) - -#define EXP_COUNTER_INCREMENT(export, op) \ -do { \ - if ((export)->exp_obd->obd_stats) { \ - unsigned int coffset; \ - coffset = (unsigned int)((export)->exp_obd->obd_cntr_base) + \ - OBD_COUNTER_OFFSET(op); \ - LASSERT(coffset < (export)->exp_obd->obd_stats->ls_num); \ - lprocfs_counter_incr((export)->exp_obd->obd_stats, coffset); \ - } \ -} while (0) - -#define MD_COUNTER_OFFSET(op) \ - ((offsetof(struct md_ops, op) - \ - offsetof(struct md_ops, getstatus)) \ - / sizeof(((struct md_ops *)(0))->getstatus)) - -#define MD_COUNTER_INCREMENT(obdx, op) \ -do { \ - if ((obd)->md_stats) { \ - unsigned int coffset; \ - coffset = (unsigned int)((obdx)->md_cntr_base) + \ - MD_COUNTER_OFFSET(op); \ - LASSERT(coffset < (obdx)->md_stats->ls_num); \ - lprocfs_counter_incr((obdx)->md_stats, coffset); \ - } \ -} while (0) - -#define EXP_MD_COUNTER_INCREMENT(export, op) \ -do { \ - if ((export)->exp_obd->obd_stats) { \ - unsigned int coffset; \ - coffset = (unsigned int)((export)->exp_obd->md_cntr_base) + \ - MD_COUNTER_OFFSET(op); \ - LASSERT(coffset < (export)->exp_obd->md_stats->ls_num); \ - lprocfs_counter_incr((export)->exp_obd->md_stats, coffset); \ - if ((export)->exp_md_stats) \ - lprocfs_counter_incr( \ - (export)->exp_md_stats, coffset); \ - } \ -} while (0) - -#define EXP_CHECK_MD_OP(exp, op) \ -do { \ - if (!(exp)) { \ - CERROR("obd_" #op ": NULL export\n"); \ - return -ENODEV; \ - } \ - if (!(exp)->exp_obd || !OBT((exp)->exp_obd)) { \ - CERROR("obd_" #op ": cleaned up obd\n"); \ - return -EOPNOTSUPP; \ - } \ - if (!OBT((exp)->exp_obd) || !MDP((exp)->exp_obd, op)) { \ - CERROR("obd_" #op ": dev %s/%d no operation\n", \ - (exp)->exp_obd->obd_name, \ - (exp)->exp_obd->obd_minor); \ - return -EOPNOTSUPP; \ - } \ -} while (0) - -#define OBD_CHECK_DT_OP(obd, op, err) \ -do { \ - if (!OBT(obd) || !OBP((obd), op)) { \ - if (err) \ - CERROR("obd_" #op ": dev %d no operation\n", \ - obd->obd_minor); \ - return err; \ - } \ -} while (0) - -#define EXP_CHECK_DT_OP(exp, op) \ -do { \ - if (!(exp)) { \ - CERROR("obd_" #op ": NULL export\n"); \ - return -ENODEV; \ - } \ - if (!(exp)->exp_obd || !OBT((exp)->exp_obd)) { \ - CERROR("obd_" #op ": cleaned up obd\n"); \ - return -EOPNOTSUPP; \ - } \ - if (!OBT((exp)->exp_obd) || !OBP((exp)->exp_obd, op)) { \ - CERROR("obd_" #op ": dev %d no operation\n", \ - (exp)->exp_obd->obd_minor); \ - return -EOPNOTSUPP; \ - } \ -} while (0) - -#define CTXT_CHECK_OP(ctxt, op, err) \ -do { \ - if (!OBT(ctxt->loc_obd) || !CTXTP((ctxt), op)) { \ - if (err) \ - CERROR("lop_" #op ": dev %d no operation\n", \ - ctxt->loc_obd->obd_minor); \ - return err; \ - } \ -} while (0) - -static inline int class_devno_max(void) -{ - return MAX_OBD_DEVICES; -} - -static inline int obd_get_info(const struct lu_env *env, - struct obd_export *exp, __u32 keylen, - void *key, __u32 *vallen, void *val) -{ - int rc; - - EXP_CHECK_DT_OP(exp, get_info); - EXP_COUNTER_INCREMENT(exp, get_info); - - rc = OBP(exp->exp_obd, get_info)(env, exp, keylen, key, vallen, val); - return rc; -} - -static inline int obd_set_info_async(const struct lu_env *env, - struct obd_export *exp, u32 keylen, - void *key, u32 vallen, void *val, - struct ptlrpc_request_set *set) -{ - int rc; - - EXP_CHECK_DT_OP(exp, set_info_async); - EXP_COUNTER_INCREMENT(exp, set_info_async); - - rc = OBP(exp->exp_obd, set_info_async)(env, exp, keylen, key, vallen, - val, set); - return rc; -} - -/* - * obd-lu integration. - * - * Functionality is being moved into new lu_device-based layering, but some - * pieces of configuration process are still based on obd devices. - * - * Specifically, lu_device_type_operations::ldto_device_alloc() methods fully - * subsume ->o_setup() methods of obd devices they replace. The same for - * lu_device_operations::ldo_process_config() and ->o_process_config(). As a - * result, obd_setup() and obd_process_config() branch and call one XOR - * another. - * - * Yet neither lu_device_type_operations::ldto_device_fini() nor - * lu_device_type_operations::ldto_device_free() fully implement the - * functionality of ->o_precleanup() and ->o_cleanup() they override. Hence, - * obd_precleanup() and obd_cleanup() call both lu_device and obd operations. - */ - -static inline int obd_setup(struct obd_device *obd, struct lustre_cfg *cfg) -{ - int rc; - struct lu_device_type *ldt; - struct lu_device *d; - - ldt = obd->obd_type->typ_lu; - if (ldt) { - struct lu_context session_ctx; - struct lu_env env; - - lu_context_init(&session_ctx, LCT_SESSION | LCT_SERVER_SESSION); - session_ctx.lc_thread = NULL; - lu_context_enter(&session_ctx); - - rc = lu_env_init(&env, ldt->ldt_ctx_tags); - if (rc == 0) { - env.le_ses = &session_ctx; - d = ldt->ldt_ops->ldto_device_alloc(&env, ldt, cfg); - lu_env_fini(&env); - if (!IS_ERR(d)) { - obd->obd_lu_dev = d; - d->ld_obd = obd; - rc = 0; - } else { - rc = PTR_ERR(d); - } - } - lu_context_exit(&session_ctx); - lu_context_fini(&session_ctx); - - } else { - OBD_CHECK_DT_OP(obd, setup, -EOPNOTSUPP); - OBD_COUNTER_INCREMENT(obd, setup); - rc = OBP(obd, setup)(obd, cfg); - } - return rc; -} - -static inline int obd_precleanup(struct obd_device *obd) -{ - int rc; - struct lu_device_type *ldt; - struct lu_device *d; - - rc = obd_check_dev(obd); - if (rc) - return rc; - ldt = obd->obd_type->typ_lu; - d = obd->obd_lu_dev; - if (ldt && d) { - struct lu_env env; - - rc = lu_env_init(&env, ldt->ldt_ctx_tags); - if (!rc) { - ldt->ldt_ops->ldto_device_fini(&env, d); - lu_env_fini(&env); - } - } - OBD_CHECK_DT_OP(obd, precleanup, 0); - OBD_COUNTER_INCREMENT(obd, precleanup); - - rc = OBP(obd, precleanup)(obd); - return rc; -} - -static inline int obd_cleanup(struct obd_device *obd) -{ - int rc; - struct lu_device_type *ldt; - struct lu_device *d; - - rc = obd_check_dev(obd); - if (rc) - return rc; - - ldt = obd->obd_type->typ_lu; - d = obd->obd_lu_dev; - if (ldt && d) { - struct lu_env env; - - rc = lu_env_init(&env, ldt->ldt_ctx_tags); - if (rc == 0) { - ldt->ldt_ops->ldto_device_free(&env, d); - lu_env_fini(&env); - obd->obd_lu_dev = NULL; - } - } - OBD_CHECK_DT_OP(obd, cleanup, 0); - OBD_COUNTER_INCREMENT(obd, cleanup); - - rc = OBP(obd, cleanup)(obd); - return rc; -} - -static inline void obd_cleanup_client_import(struct obd_device *obd) -{ - /* - * If we set up but never connected, the - * client import will not have been cleaned. - */ - down_write(&obd->u.cli.cl_sem); - if (obd->u.cli.cl_import) { - struct obd_import *imp; - - imp = obd->u.cli.cl_import; - CDEBUG(D_CONFIG, "%s: client import never connected\n", - obd->obd_name); - ptlrpc_invalidate_import(imp); - client_destroy_import(imp); - obd->u.cli.cl_import = NULL; - } - up_write(&obd->u.cli.cl_sem); -} - -static inline int -obd_process_config(struct obd_device *obd, int datalen, void *data) -{ - int rc; - struct lu_device_type *ldt; - struct lu_device *d; - - rc = obd_check_dev(obd); - if (rc) - return rc; - - obd->obd_process_conf = 1; - ldt = obd->obd_type->typ_lu; - d = obd->obd_lu_dev; - if (ldt && d) { - struct lu_env env; - - rc = lu_env_init(&env, ldt->ldt_ctx_tags); - if (rc == 0) { - rc = d->ld_ops->ldo_process_config(&env, d, data); - lu_env_fini(&env); - } - } else { - OBD_CHECK_DT_OP(obd, process_config, -EOPNOTSUPP); - rc = OBP(obd, process_config)(obd, datalen, data); - } - OBD_COUNTER_INCREMENT(obd, process_config); - obd->obd_process_conf = 0; - - return rc; -} - -static inline int obd_create(const struct lu_env *env, struct obd_export *exp, - struct obdo *obdo) -{ - int rc; - - EXP_CHECK_DT_OP(exp, create); - EXP_COUNTER_INCREMENT(exp, create); - - rc = OBP(exp->exp_obd, create)(env, exp, obdo); - return rc; -} - -static inline int obd_destroy(const struct lu_env *env, struct obd_export *exp, - struct obdo *obdo) -{ - int rc; - - EXP_CHECK_DT_OP(exp, destroy); - EXP_COUNTER_INCREMENT(exp, destroy); - - rc = OBP(exp->exp_obd, destroy)(env, exp, obdo); - return rc; -} - -static inline int obd_getattr(const struct lu_env *env, struct obd_export *exp, - struct obdo *oa) -{ - int rc; - - EXP_CHECK_DT_OP(exp, getattr); - EXP_COUNTER_INCREMENT(exp, getattr); - - rc = OBP(exp->exp_obd, getattr)(env, exp, oa); - return rc; -} - -static inline int obd_setattr(const struct lu_env *env, struct obd_export *exp, - struct obdo *oa) -{ - int rc; - - EXP_CHECK_DT_OP(exp, setattr); - EXP_COUNTER_INCREMENT(exp, setattr); - - rc = OBP(exp->exp_obd, setattr)(env, exp, oa); - return rc; -} - -static inline int obd_add_conn(struct obd_import *imp, struct obd_uuid *uuid, - int priority) -{ - struct obd_device *obd = imp->imp_obd; - int rc; - - rc = obd_check_dev_active(obd); - if (rc) - return rc; - OBD_CHECK_DT_OP(obd, add_conn, -EOPNOTSUPP); - OBD_COUNTER_INCREMENT(obd, add_conn); - - rc = OBP(obd, add_conn)(imp, uuid, priority); - return rc; -} - -static inline int obd_del_conn(struct obd_import *imp, struct obd_uuid *uuid) -{ - struct obd_device *obd = imp->imp_obd; - int rc; - - rc = obd_check_dev_active(obd); - if (rc) - return rc; - OBD_CHECK_DT_OP(obd, del_conn, -EOPNOTSUPP); - OBD_COUNTER_INCREMENT(obd, del_conn); - - rc = OBP(obd, del_conn)(imp, uuid); - return rc; -} - -static inline struct obd_uuid *obd_get_uuid(struct obd_export *exp) -{ - struct obd_uuid *uuid; - - OBD_CHECK_DT_OP(exp->exp_obd, get_uuid, NULL); - EXP_COUNTER_INCREMENT(exp, get_uuid); - - uuid = OBP(exp->exp_obd, get_uuid)(exp); - return uuid; -} - -/* - * Create a new /a exp on device /a obd for the uuid /a cluuid - * @param exp New export handle - * @param d Connect data, supported flags are set, flags also understood - * by obd are returned. - */ -static inline int obd_connect(const struct lu_env *env, - struct obd_export **exp, struct obd_device *obd, - struct obd_uuid *cluuid, - struct obd_connect_data *data, - void *localdata) -{ - int rc; - __u64 ocf = data ? data->ocd_connect_flags : 0; /* - * for post-condition - * check - */ - - rc = obd_check_dev_active(obd); - if (rc) - return rc; - OBD_CHECK_DT_OP(obd, connect, -EOPNOTSUPP); - OBD_COUNTER_INCREMENT(obd, connect); - - rc = OBP(obd, connect)(env, exp, obd, cluuid, data, localdata); - /* check that only subset is granted */ - LASSERT(ergo(data, (data->ocd_connect_flags & ocf) == - data->ocd_connect_flags)); - return rc; -} - -static inline int obd_reconnect(const struct lu_env *env, - struct obd_export *exp, - struct obd_device *obd, - struct obd_uuid *cluuid, - struct obd_connect_data *d, - void *localdata) -{ - int rc; - __u64 ocf = d ? d->ocd_connect_flags : 0; /* for post-condition check */ - - rc = obd_check_dev_active(obd); - if (rc) - return rc; - OBD_CHECK_DT_OP(obd, reconnect, 0); - OBD_COUNTER_INCREMENT(obd, reconnect); - - rc = OBP(obd, reconnect)(env, exp, obd, cluuid, d, localdata); - /* check that only subset is granted */ - LASSERT(ergo(d, (d->ocd_connect_flags & ocf) == d->ocd_connect_flags)); - return rc; -} - -static inline int obd_disconnect(struct obd_export *exp) -{ - int rc; - - EXP_CHECK_DT_OP(exp, disconnect); - EXP_COUNTER_INCREMENT(exp, disconnect); - - rc = OBP(exp->exp_obd, disconnect)(exp); - return rc; -} - -static inline int obd_fid_init(struct obd_device *obd, struct obd_export *exp, - enum lu_cli_type type) -{ - int rc; - - OBD_CHECK_DT_OP(obd, fid_init, 0); - OBD_COUNTER_INCREMENT(obd, fid_init); - - rc = OBP(obd, fid_init)(obd, exp, type); - return rc; -} - -static inline int obd_fid_fini(struct obd_device *obd) -{ - int rc; - - OBD_CHECK_DT_OP(obd, fid_fini, 0); - OBD_COUNTER_INCREMENT(obd, fid_fini); - - rc = OBP(obd, fid_fini)(obd); - return rc; -} - -static inline int obd_fid_alloc(const struct lu_env *env, - struct obd_export *exp, - struct lu_fid *fid, - struct md_op_data *op_data) -{ - int rc; - - EXP_CHECK_DT_OP(exp, fid_alloc); - EXP_COUNTER_INCREMENT(exp, fid_alloc); - - rc = OBP(exp->exp_obd, fid_alloc)(env, exp, fid, op_data); - return rc; -} - -static inline int obd_pool_new(struct obd_device *obd, char *poolname) -{ - int rc; - - OBD_CHECK_DT_OP(obd, pool_new, -EOPNOTSUPP); - OBD_COUNTER_INCREMENT(obd, pool_new); - - rc = OBP(obd, pool_new)(obd, poolname); - return rc; -} - -static inline int obd_pool_del(struct obd_device *obd, char *poolname) -{ - int rc; - - OBD_CHECK_DT_OP(obd, pool_del, -EOPNOTSUPP); - OBD_COUNTER_INCREMENT(obd, pool_del); - - rc = OBP(obd, pool_del)(obd, poolname); - return rc; -} - -static inline int obd_pool_add(struct obd_device *obd, - char *poolname, - char *ostname) -{ - int rc; - - OBD_CHECK_DT_OP(obd, pool_add, -EOPNOTSUPP); - OBD_COUNTER_INCREMENT(obd, pool_add); - - rc = OBP(obd, pool_add)(obd, poolname, ostname); - return rc; -} - -static inline int obd_pool_rem(struct obd_device *obd, - char *poolname, - char *ostname) -{ - int rc; - - OBD_CHECK_DT_OP(obd, pool_rem, -EOPNOTSUPP); - OBD_COUNTER_INCREMENT(obd, pool_rem); - - rc = OBP(obd, pool_rem)(obd, poolname, ostname); - return rc; -} - -static inline void obd_getref(struct obd_device *obd) -{ - if (OBT(obd) && OBP(obd, getref)) { - OBD_COUNTER_INCREMENT(obd, getref); - OBP(obd, getref)(obd); - } -} - -static inline void obd_putref(struct obd_device *obd) -{ - if (OBT(obd) && OBP(obd, putref)) { - OBD_COUNTER_INCREMENT(obd, putref); - OBP(obd, putref)(obd); - } -} - -static inline int obd_init_export(struct obd_export *exp) -{ - int rc = 0; - - if ((exp)->exp_obd && OBT((exp)->exp_obd) && - OBP((exp)->exp_obd, init_export)) - rc = OBP(exp->exp_obd, init_export)(exp); - return rc; -} - -static inline int obd_destroy_export(struct obd_export *exp) -{ - if ((exp)->exp_obd && OBT((exp)->exp_obd) && - OBP((exp)->exp_obd, destroy_export)) - OBP(exp->exp_obd, destroy_export)(exp); - return 0; -} - -/* - * @max_age is the oldest time in jiffies that we accept using a cached data. - * If the cache is older than @max_age we will get a new value from the - * target. Use a value of "cfs_time_current() + HZ" to guarantee freshness. - */ -static inline int obd_statfs_async(struct obd_export *exp, - struct obd_info *oinfo, - __u64 max_age, - struct ptlrpc_request_set *rqset) -{ - int rc = 0; - struct obd_device *obd; - - if (!exp || !exp->exp_obd) - return -EINVAL; - - obd = exp->exp_obd; - OBD_CHECK_DT_OP(obd, statfs, -EOPNOTSUPP); - OBD_COUNTER_INCREMENT(obd, statfs); - - CDEBUG(D_SUPER, "%s: osfs %p age %llu, max_age %llu\n", - obd->obd_name, &obd->obd_osfs, obd->obd_osfs_age, max_age); - if (cfs_time_before_64(obd->obd_osfs_age, max_age)) { - rc = OBP(obd, statfs_async)(exp, oinfo, max_age, rqset); - } else { - CDEBUG(D_SUPER, - "%s: use %p cache blocks %llu/%llu objects %llu/%llu\n", - obd->obd_name, &obd->obd_osfs, - obd->obd_osfs.os_bavail, obd->obd_osfs.os_blocks, - obd->obd_osfs.os_ffree, obd->obd_osfs.os_files); - spin_lock(&obd->obd_osfs_lock); - memcpy(oinfo->oi_osfs, &obd->obd_osfs, sizeof(*oinfo->oi_osfs)); - spin_unlock(&obd->obd_osfs_lock); - oinfo->oi_flags |= OBD_STATFS_FROM_CACHE; - if (oinfo->oi_cb_up) - oinfo->oi_cb_up(oinfo, 0); - } - return rc; -} - -static inline int obd_statfs_rqset(struct obd_export *exp, - struct obd_statfs *osfs, __u64 max_age, - __u32 flags) -{ - struct ptlrpc_request_set *set = NULL; - struct obd_info oinfo = { - .oi_osfs = osfs, - .oi_flags = flags, - }; - int rc = 0; - - set = ptlrpc_prep_set(); - if (!set) - return -ENOMEM; - - rc = obd_statfs_async(exp, &oinfo, max_age, set); - if (rc == 0) - rc = ptlrpc_set_wait(set); - ptlrpc_set_destroy(set); - return rc; -} - -/* - * @max_age is the oldest time in jiffies that we accept using a cached data. - * If the cache is older than @max_age we will get a new value from the - * target. Use a value of "cfs_time_current() + HZ" to guarantee freshness. - */ -static inline int obd_statfs(const struct lu_env *env, struct obd_export *exp, - struct obd_statfs *osfs, __u64 max_age, - __u32 flags) -{ - int rc = 0; - struct obd_device *obd = exp->exp_obd; - - if (!obd) - return -EINVAL; - - OBD_CHECK_DT_OP(obd, statfs, -EOPNOTSUPP); - OBD_COUNTER_INCREMENT(obd, statfs); - - CDEBUG(D_SUPER, "osfs %llu, max_age %llu\n", - obd->obd_osfs_age, max_age); - if (cfs_time_before_64(obd->obd_osfs_age, max_age)) { - rc = OBP(obd, statfs)(env, exp, osfs, max_age, flags); - if (rc == 0) { - spin_lock(&obd->obd_osfs_lock); - memcpy(&obd->obd_osfs, osfs, sizeof(obd->obd_osfs)); - obd->obd_osfs_age = cfs_time_current_64(); - spin_unlock(&obd->obd_osfs_lock); - } - } else { - CDEBUG(D_SUPER, - "%s: use %p cache blocks %llu/%llu objects %llu/%llu\n", - obd->obd_name, &obd->obd_osfs, - obd->obd_osfs.os_bavail, obd->obd_osfs.os_blocks, - obd->obd_osfs.os_ffree, obd->obd_osfs.os_files); - spin_lock(&obd->obd_osfs_lock); - memcpy(osfs, &obd->obd_osfs, sizeof(*osfs)); - spin_unlock(&obd->obd_osfs_lock); - } - return rc; -} - -static inline int obd_preprw(const struct lu_env *env, int cmd, - struct obd_export *exp, struct obdo *oa, - int objcount, struct obd_ioobj *obj, - struct niobuf_remote *remote, int *pages, - struct niobuf_local *local) -{ - int rc; - - EXP_CHECK_DT_OP(exp, preprw); - EXP_COUNTER_INCREMENT(exp, preprw); - - rc = OBP(exp->exp_obd, preprw)(env, cmd, exp, oa, objcount, obj, remote, - pages, local); - return rc; -} - -static inline int obd_commitrw(const struct lu_env *env, int cmd, - struct obd_export *exp, struct obdo *oa, - int objcount, struct obd_ioobj *obj, - struct niobuf_remote *rnb, int pages, - struct niobuf_local *local, int rc) -{ - EXP_CHECK_DT_OP(exp, commitrw); - EXP_COUNTER_INCREMENT(exp, commitrw); - - rc = OBP(exp->exp_obd, commitrw)(env, cmd, exp, oa, objcount, obj, - rnb, pages, local, rc); - return rc; -} - -static inline int obd_iocontrol(unsigned int cmd, struct obd_export *exp, - int len, void *karg, void __user *uarg) -{ - int rc; - - EXP_CHECK_DT_OP(exp, iocontrol); - EXP_COUNTER_INCREMENT(exp, iocontrol); - - rc = OBP(exp->exp_obd, iocontrol)(cmd, exp, len, karg, uarg); - return rc; -} - -static inline void obd_import_event(struct obd_device *obd, - struct obd_import *imp, - enum obd_import_event event) -{ - if (!obd) { - CERROR("NULL device\n"); - return; - } - if (obd->obd_set_up && OBP(obd, import_event)) { - OBD_COUNTER_INCREMENT(obd, import_event); - OBP(obd, import_event)(obd, imp, event); - } -} - -static inline int obd_notify(struct obd_device *obd, - struct obd_device *watched, - enum obd_notify_event ev, - void *data) -{ - int rc; - - rc = obd_check_dev(obd); - if (rc) - return rc; - - if (!obd->obd_set_up) { - CDEBUG(D_HA, "obd %s not set up\n", obd->obd_name); - return -EINVAL; - } - - if (!OBP(obd, notify)) { - CDEBUG(D_HA, "obd %s has no notify handler\n", obd->obd_name); - return -ENOSYS; - } - - OBD_COUNTER_INCREMENT(obd, notify); - rc = OBP(obd, notify)(obd, watched, ev, data); - return rc; -} - -static inline int obd_notify_observer(struct obd_device *observer, - struct obd_device *observed, - enum obd_notify_event ev, - void *data) -{ - int rc1; - int rc2; - - struct obd_notify_upcall *onu; - - if (observer->obd_observer) - rc1 = obd_notify(observer->obd_observer, observed, ev, data); - else - rc1 = 0; - /* - * Also, call non-obd listener, if any - */ - onu = &observer->obd_upcall; - if (onu->onu_upcall) - rc2 = onu->onu_upcall(observer, observed, ev, - onu->onu_owner, NULL); - else - rc2 = 0; - - return rc1 ? rc1 : rc2; -} - -static inline int obd_quotactl(struct obd_export *exp, - struct obd_quotactl *oqctl) -{ - int rc; - - EXP_CHECK_DT_OP(exp, quotactl); - EXP_COUNTER_INCREMENT(exp, quotactl); - - rc = OBP(exp->exp_obd, quotactl)(exp->exp_obd, exp, oqctl); - return rc; -} - -static inline int obd_health_check(const struct lu_env *env, - struct obd_device *obd) -{ - /* - * returns: 0 on healthy - * >0 on unhealthy + reason code/flag - * however the only supported reason == 1 right now - * We'll need to define some better reasons - * or flags in the future. - * <0 on error - */ - int rc; - - /* don't use EXP_CHECK_DT_OP, because NULL method is normal here */ - if (!obd || !OBT(obd)) { - CERROR("cleaned up obd\n"); - return -EOPNOTSUPP; - } - if (!obd->obd_set_up || obd->obd_stopping) - return 0; - if (!OBP(obd, health_check)) - return 0; - - rc = OBP(obd, health_check)(env, obd); - return rc; -} - -static inline int obd_register_observer(struct obd_device *obd, - struct obd_device *observer) -{ - int rc; - - rc = obd_check_dev(obd); - if (rc) - return rc; - down_write(&obd->obd_observer_link_sem); - if (obd->obd_observer && observer) { - up_write(&obd->obd_observer_link_sem); - return -EALREADY; - } - obd->obd_observer = observer; - up_write(&obd->obd_observer_link_sem); - return 0; -} - -/* metadata helpers */ -static inline int md_getstatus(struct obd_export *exp, struct lu_fid *fid) -{ - int rc; - - EXP_CHECK_MD_OP(exp, getstatus); - EXP_MD_COUNTER_INCREMENT(exp, getstatus); - rc = MDP(exp->exp_obd, getstatus)(exp, fid); - return rc; -} - -static inline int md_getattr(struct obd_export *exp, struct md_op_data *op_data, - struct ptlrpc_request **request) -{ - int rc; - - EXP_CHECK_MD_OP(exp, getattr); - EXP_MD_COUNTER_INCREMENT(exp, getattr); - rc = MDP(exp->exp_obd, getattr)(exp, op_data, request); - return rc; -} - -static inline int md_null_inode(struct obd_export *exp, - const struct lu_fid *fid) -{ - int rc; - - EXP_CHECK_MD_OP(exp, null_inode); - EXP_MD_COUNTER_INCREMENT(exp, null_inode); - rc = MDP(exp->exp_obd, null_inode)(exp, fid); - return rc; -} - -static inline int md_close(struct obd_export *exp, struct md_op_data *op_data, - struct md_open_data *mod, - struct ptlrpc_request **request) -{ - int rc; - - EXP_CHECK_MD_OP(exp, close); - EXP_MD_COUNTER_INCREMENT(exp, close); - rc = MDP(exp->exp_obd, close)(exp, op_data, mod, request); - return rc; -} - -static inline int md_create(struct obd_export *exp, struct md_op_data *op_data, - const void *data, size_t datalen, umode_t mode, - uid_t uid, gid_t gid, cfs_cap_t cap_effective, - __u64 rdev, struct ptlrpc_request **request) -{ - int rc; - - EXP_CHECK_MD_OP(exp, create); - EXP_MD_COUNTER_INCREMENT(exp, create); - rc = MDP(exp->exp_obd, create)(exp, op_data, data, datalen, mode, - uid, gid, cap_effective, rdev, request); - return rc; -} - -static inline int md_enqueue(struct obd_export *exp, - struct ldlm_enqueue_info *einfo, - const union ldlm_policy_data *policy, - struct lookup_intent *it, - struct md_op_data *op_data, - struct lustre_handle *lockh, - __u64 extra_lock_flags) -{ - int rc; - - EXP_CHECK_MD_OP(exp, enqueue); - EXP_MD_COUNTER_INCREMENT(exp, enqueue); - rc = MDP(exp->exp_obd, enqueue)(exp, einfo, policy, it, op_data, lockh, - extra_lock_flags); - return rc; -} - -static inline int md_getattr_name(struct obd_export *exp, - struct md_op_data *op_data, - struct ptlrpc_request **request) -{ - int rc; - - EXP_CHECK_MD_OP(exp, getattr_name); - EXP_MD_COUNTER_INCREMENT(exp, getattr_name); - rc = MDP(exp->exp_obd, getattr_name)(exp, op_data, request); - return rc; -} - -static inline int md_intent_lock(struct obd_export *exp, - struct md_op_data *op_data, - struct lookup_intent *it, - struct ptlrpc_request **reqp, - ldlm_blocking_callback cb_blocking, - __u64 extra_lock_flags) -{ - int rc; - - EXP_CHECK_MD_OP(exp, intent_lock); - EXP_MD_COUNTER_INCREMENT(exp, intent_lock); - rc = MDP(exp->exp_obd, intent_lock)(exp, op_data, it, reqp, - cb_blocking, extra_lock_flags); - return rc; -} - -static inline int md_link(struct obd_export *exp, struct md_op_data *op_data, - struct ptlrpc_request **request) -{ - int rc; - - EXP_CHECK_MD_OP(exp, link); - EXP_MD_COUNTER_INCREMENT(exp, link); - rc = MDP(exp->exp_obd, link)(exp, op_data, request); - return rc; -} - -static inline int md_rename(struct obd_export *exp, struct md_op_data *op_data, - const char *old, size_t oldlen, const char *new, - size_t newlen, struct ptlrpc_request **request) -{ - int rc; - - EXP_CHECK_MD_OP(exp, rename); - EXP_MD_COUNTER_INCREMENT(exp, rename); - rc = MDP(exp->exp_obd, rename)(exp, op_data, old, oldlen, new, - newlen, request); - return rc; -} - -static inline int md_setattr(struct obd_export *exp, struct md_op_data *op_data, - void *ea, size_t ealen, - struct ptlrpc_request **request) -{ - int rc; - - EXP_CHECK_MD_OP(exp, setattr); - EXP_MD_COUNTER_INCREMENT(exp, setattr); - rc = MDP(exp->exp_obd, setattr)(exp, op_data, ea, ealen, request); - return rc; -} - -static inline int md_sync(struct obd_export *exp, const struct lu_fid *fid, - struct ptlrpc_request **request) -{ - int rc; - - EXP_CHECK_MD_OP(exp, sync); - EXP_MD_COUNTER_INCREMENT(exp, sync); - rc = MDP(exp->exp_obd, sync)(exp, fid, request); - return rc; -} - -static inline int md_read_page(struct obd_export *exp, - struct md_op_data *op_data, - struct md_callback *cb_op, - __u64 hash_offset, - struct page **ppage) -{ - int rc; - - EXP_CHECK_MD_OP(exp, read_page); - EXP_MD_COUNTER_INCREMENT(exp, read_page); - rc = MDP(exp->exp_obd, read_page)(exp, op_data, cb_op, hash_offset, - ppage); - return rc; -} - -static inline int md_unlink(struct obd_export *exp, struct md_op_data *op_data, - struct ptlrpc_request **request) -{ - int rc; - - EXP_CHECK_MD_OP(exp, unlink); - EXP_MD_COUNTER_INCREMENT(exp, unlink); - rc = MDP(exp->exp_obd, unlink)(exp, op_data, request); - return rc; -} - -static inline int md_get_lustre_md(struct obd_export *exp, - struct ptlrpc_request *req, - struct obd_export *dt_exp, - struct obd_export *md_exp, - struct lustre_md *md) -{ - EXP_CHECK_MD_OP(exp, get_lustre_md); - EXP_MD_COUNTER_INCREMENT(exp, get_lustre_md); - return MDP(exp->exp_obd, get_lustre_md)(exp, req, dt_exp, md_exp, md); -} - -static inline int md_free_lustre_md(struct obd_export *exp, - struct lustre_md *md) -{ - EXP_CHECK_MD_OP(exp, free_lustre_md); - EXP_MD_COUNTER_INCREMENT(exp, free_lustre_md); - return MDP(exp->exp_obd, free_lustre_md)(exp, md); -} - -static inline int md_merge_attr(struct obd_export *exp, - const struct lmv_stripe_md *lsm, - struct cl_attr *attr, - ldlm_blocking_callback cb) -{ - EXP_CHECK_MD_OP(exp, merge_attr); - EXP_MD_COUNTER_INCREMENT(exp, merge_attr); - return MDP(exp->exp_obd, merge_attr)(exp, lsm, attr, cb); -} - -static inline int md_setxattr(struct obd_export *exp, const struct lu_fid *fid, - u64 valid, const char *name, - const char *input, int input_size, - int output_size, int flags, __u32 suppgid, - struct ptlrpc_request **request) -{ - EXP_CHECK_MD_OP(exp, setxattr); - EXP_MD_COUNTER_INCREMENT(exp, setxattr); - return MDP(exp->exp_obd, setxattr)(exp, fid, valid, name, input, - input_size, output_size, flags, - suppgid, request); -} - -static inline int md_getxattr(struct obd_export *exp, const struct lu_fid *fid, - u64 valid, const char *name, - const char *input, int input_size, - int output_size, int flags, - struct ptlrpc_request **request) -{ - EXP_CHECK_MD_OP(exp, getxattr); - EXP_MD_COUNTER_INCREMENT(exp, getxattr); - return MDP(exp->exp_obd, getxattr)(exp, fid, valid, name, input, - input_size, output_size, flags, - request); -} - -static inline int md_set_open_replay_data(struct obd_export *exp, - struct obd_client_handle *och, - struct lookup_intent *it) -{ - EXP_CHECK_MD_OP(exp, set_open_replay_data); - EXP_MD_COUNTER_INCREMENT(exp, set_open_replay_data); - return MDP(exp->exp_obd, set_open_replay_data)(exp, och, it); -} - -static inline int md_clear_open_replay_data(struct obd_export *exp, - struct obd_client_handle *och) -{ - EXP_CHECK_MD_OP(exp, clear_open_replay_data); - EXP_MD_COUNTER_INCREMENT(exp, clear_open_replay_data); - return MDP(exp->exp_obd, clear_open_replay_data)(exp, och); -} - -static inline int md_set_lock_data(struct obd_export *exp, - const struct lustre_handle *lockh, - void *data, __u64 *bits) -{ - EXP_CHECK_MD_OP(exp, set_lock_data); - EXP_MD_COUNTER_INCREMENT(exp, set_lock_data); - return MDP(exp->exp_obd, set_lock_data)(exp, lockh, data, bits); -} - -static inline int md_cancel_unused(struct obd_export *exp, - const struct lu_fid *fid, - union ldlm_policy_data *policy, - enum ldlm_mode mode, - enum ldlm_cancel_flags flags, - void *opaque) -{ - int rc; - - EXP_CHECK_MD_OP(exp, cancel_unused); - EXP_MD_COUNTER_INCREMENT(exp, cancel_unused); - - rc = MDP(exp->exp_obd, cancel_unused)(exp, fid, policy, mode, - flags, opaque); - return rc; -} - -static inline enum ldlm_mode md_lock_match(struct obd_export *exp, __u64 flags, - const struct lu_fid *fid, - enum ldlm_type type, - union ldlm_policy_data *policy, - enum ldlm_mode mode, - struct lustre_handle *lockh) -{ - EXP_CHECK_MD_OP(exp, lock_match); - EXP_MD_COUNTER_INCREMENT(exp, lock_match); - return MDP(exp->exp_obd, lock_match)(exp, flags, fid, type, - policy, mode, lockh); -} - -static inline int md_init_ea_size(struct obd_export *exp, u32 easize, - u32 def_asize) -{ - EXP_CHECK_MD_OP(exp, init_ea_size); - EXP_MD_COUNTER_INCREMENT(exp, init_ea_size); - return MDP(exp->exp_obd, init_ea_size)(exp, easize, def_asize); -} - -static inline int md_intent_getattr_async(struct obd_export *exp, - struct md_enqueue_info *minfo) -{ - int rc; - - EXP_CHECK_MD_OP(exp, intent_getattr_async); - EXP_MD_COUNTER_INCREMENT(exp, intent_getattr_async); - rc = MDP(exp->exp_obd, intent_getattr_async)(exp, minfo); - return rc; -} - -static inline int md_revalidate_lock(struct obd_export *exp, - struct lookup_intent *it, - struct lu_fid *fid, __u64 *bits) -{ - int rc; - - EXP_CHECK_MD_OP(exp, revalidate_lock); - EXP_MD_COUNTER_INCREMENT(exp, revalidate_lock); - rc = MDP(exp->exp_obd, revalidate_lock)(exp, it, fid, bits); - return rc; -} - -static inline int md_get_fid_from_lsm(struct obd_export *exp, - const struct lmv_stripe_md *lsm, - const char *name, int namelen, - struct lu_fid *fid) -{ - int rc; - - EXP_CHECK_MD_OP(exp, get_fid_from_lsm); - EXP_MD_COUNTER_INCREMENT(exp, get_fid_from_lsm); - rc = MDP(exp->exp_obd, get_fid_from_lsm)(exp, lsm, name, namelen, fid); - return rc; -} - -/* - * Unpack an MD struct from disk to in-memory format. - * Returns +ve size of unpacked MD (0 for free), or -ve error. - * - * If *plsm != NULL and lmm == NULL then *lsm will be freed. - * If *plsm == NULL then it will be allocated. - */ -static inline int md_unpackmd(struct obd_export *exp, - struct lmv_stripe_md **plsm, - const union lmv_mds_md *lmm, size_t lmm_size) -{ - int rc; - - EXP_CHECK_MD_OP(exp, unpackmd); - EXP_MD_COUNTER_INCREMENT(exp, unpackmd); - rc = MDP(exp->exp_obd, unpackmd)(exp, plsm, lmm, lmm_size); - return rc; -} - -/* OBD Metadata Support */ - -int obd_init_caches(void); -void obd_cleanup_caches(void); - -/* support routines */ -extern struct kmem_cache *obdo_cachep; - -typedef int (*register_lwp_cb)(void *data); - -struct lwp_register_item { - struct obd_export **lri_exp; - register_lwp_cb lri_cb_func; - void *lri_cb_data; - struct list_head lri_list; - char lri_name[MTI_NAME_MAXLEN]; -}; - -/* - * I'm as embarrassed about this as you are. - * - * <shaver> // XXX do not look into _superhack with remaining eye - * <shaver> // XXX if this were any uglier, I'd get my own show on MTV - */ -extern int (*ptlrpc_put_connection_superhack)(struct ptlrpc_connection *c); - -/* obd_mount.c */ -int lustre_unregister_fs(void); -int lustre_register_fs(void); -int lustre_check_exclusion(struct super_block *sb, char *svname); - -/* sysctl.c */ -int obd_sysctl_init(void); - -/* uuid.c */ -typedef __u8 class_uuid_t[16]; -void class_uuid_unparse(class_uuid_t in, struct obd_uuid *out); - -/* lustre_peer.c */ -int lustre_uuid_to_peer(const char *uuid, lnet_nid_t *peer_nid, int index); -int class_add_uuid(const char *uuid, __u64 nid); -int class_del_uuid(const char *uuid); -int class_check_uuid(struct obd_uuid *uuid, __u64 nid); -void class_init_uuidlist(void); -void class_exit_uuidlist(void); - -/* class_obd.c */ -extern char obd_jobid_node[]; -extern struct miscdevice obd_psdev; -extern spinlock_t obd_types_lock; -int class_procfs_init(void); -int class_procfs_clean(void); - -/* prng.c */ -#define ll_generate_random_uuid(uuid_out) \ - get_random_bytes(uuid_out, sizeof(class_uuid_t)) - -/* statfs_pack.c */ -struct kstatfs; -void statfs_pack(struct obd_statfs *osfs, struct kstatfs *sfs); -void statfs_unpack(struct kstatfs *sfs, struct obd_statfs *osfs); - -/* root squash info */ -struct rw_semaphore; -struct root_squash_info { - uid_t rsi_uid; - gid_t rsi_gid; - struct list_head rsi_nosquash_nids; - struct rw_semaphore rsi_sem; -}; - -/* linux-module.c */ -int obd_ioctl_getdata(char **buf, int *len, void __user *arg); - -#endif /* __LINUX_OBD_CLASS_H */ diff --git a/drivers/staging/lustre/lustre/include/obd_support.h b/drivers/staging/lustre/lustre/include/obd_support.h deleted file mode 100644 index 8595091b8b86..000000000000 --- a/drivers/staging/lustre/lustre/include/obd_support.h +++ /dev/null @@ -1,545 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2011, 2015, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - */ - -#ifndef _OBD_SUPPORT -#define _OBD_SUPPORT - -#include <linux/slab.h> -#include <linux/sched/signal.h> - -#include <linux/libcfs/libcfs.h> -#include <lustre_compat.h> -#include <lprocfs_status.h> - -/* global variables */ -extern unsigned int obd_debug_peer_on_timeout; -extern unsigned int obd_dump_on_timeout; -extern unsigned int obd_dump_on_eviction; -/* obd_timeout should only be used for recovery, not for - * networking / disk / timings affected by load (use Adaptive Timeouts) - */ -extern unsigned int obd_timeout; /* seconds */ -extern unsigned int obd_timeout_set; -extern unsigned int at_min; -extern unsigned int at_max; -extern unsigned int at_history; -extern int at_early_margin; -extern int at_extra; -extern unsigned long obd_max_dirty_pages; -extern atomic_long_t obd_dirty_pages; -extern atomic_long_t obd_dirty_transit_pages; -extern char obd_jobid_var[]; - -/* Some hash init argument constants */ -#define HASH_POOLS_BKT_BITS 3 -#define HASH_POOLS_CUR_BITS 3 -#define HASH_POOLS_MAX_BITS 7 -#define HASH_UUID_BKT_BITS 5 -#define HASH_UUID_CUR_BITS 7 -#define HASH_UUID_MAX_BITS 12 -#define HASH_NID_BKT_BITS 5 -#define HASH_NID_CUR_BITS 7 -#define HASH_NID_MAX_BITS 12 -#define HASH_NID_STATS_BKT_BITS 5 -#define HASH_NID_STATS_CUR_BITS 7 -#define HASH_NID_STATS_MAX_BITS 12 -#define HASH_LQE_BKT_BITS 5 -#define HASH_LQE_CUR_BITS 7 -#define HASH_LQE_MAX_BITS 12 -#define HASH_CONN_BKT_BITS 5 -#define HASH_CONN_CUR_BITS 5 -#define HASH_CONN_MAX_BITS 15 -#define HASH_EXP_LOCK_BKT_BITS 5 -#define HASH_EXP_LOCK_CUR_BITS 7 -#define HASH_EXP_LOCK_MAX_BITS 16 -#define HASH_CL_ENV_BKT_BITS 5 -#define HASH_CL_ENV_BITS 10 -#define HASH_JOB_STATS_BKT_BITS 5 -#define HASH_JOB_STATS_CUR_BITS 7 -#define HASH_JOB_STATS_MAX_BITS 12 - -/* Timeout definitions */ -#define OBD_TIMEOUT_DEFAULT 100 -/* Time to wait for all clients to reconnect during recovery (hard limit) */ -#define OBD_RECOVERY_TIME_HARD (obd_timeout * 9) -/* Time to wait for all clients to reconnect during recovery (soft limit) */ -/* Should be very conservative; must catch the first reconnect after reboot */ -#define OBD_RECOVERY_TIME_SOFT (obd_timeout * 3) -/* Change recovery-small 26b time if you change this */ -#define PING_INTERVAL max(obd_timeout / 4, 1U) -/* a bit more than maximal journal commit time in seconds */ -#define PING_INTERVAL_SHORT min(PING_INTERVAL, 7U) -/* Client may skip 1 ping; we must wait at least 2.5. But for multiple - * failover targets the client only pings one server at a time, and pings - * can be lost on a loaded network. Since eviction has serious consequences, - * and there's no urgent need to evict a client just because it's idle, we - * should be very conservative here. - */ -#define PING_EVICT_TIMEOUT (PING_INTERVAL * 6) -#define DISK_TIMEOUT 50 /* Beyond this we warn about disk speed */ -#define CONNECTION_SWITCH_MIN 5U /* Connection switching rate limiter */ -/* Max connect interval for nonresponsive servers; ~50s to avoid building up - * connect requests in the LND queues, but within obd_timeout so we don't - * miss the recovery window - */ -#define CONNECTION_SWITCH_MAX min(50U, max(CONNECTION_SWITCH_MIN, obd_timeout)) -#define CONNECTION_SWITCH_INC 5 /* Connection timeout backoff */ -/* In general this should be low to have quick detection of a system - * running on a backup server. (If it's too low, import_select_connection - * will increase the timeout anyhow.) - */ -#define INITIAL_CONNECT_TIMEOUT max(CONNECTION_SWITCH_MIN, obd_timeout / 20) -/* The max delay between connects is SWITCH_MAX + SWITCH_INC + INITIAL */ -#define RECONNECT_DELAY_MAX (CONNECTION_SWITCH_MAX + CONNECTION_SWITCH_INC + \ - INITIAL_CONNECT_TIMEOUT) -/* The min time a target should wait for clients to reconnect in recovery */ -#define OBD_RECOVERY_TIME_MIN (2 * RECONNECT_DELAY_MAX) -#define OBD_IR_FACTOR_MIN 1 -#define OBD_IR_FACTOR_MAX 10 -#define OBD_IR_FACTOR_DEFAULT (OBD_IR_FACTOR_MAX / 2) -/* default timeout for the MGS to become IR_FULL */ -#define OBD_IR_MGS_TIMEOUT (4 * obd_timeout) -#define LONG_UNLINK 300 /* Unlink should happen before now */ - -/** - * Time interval of shrink, if the client is "idle" more than this interval, - * then the ll_grant thread will return the requested grant space to filter - */ -#define GRANT_SHRINK_INTERVAL 1200/*20 minutes*/ - -#define OBD_FAIL_MDS 0x100 -#define OBD_FAIL_MDS_HANDLE_UNPACK 0x101 -#define OBD_FAIL_MDS_GETATTR_NET 0x102 -#define OBD_FAIL_MDS_GETATTR_PACK 0x103 -#define OBD_FAIL_MDS_READPAGE_NET 0x104 -#define OBD_FAIL_MDS_READPAGE_PACK 0x105 -#define OBD_FAIL_MDS_SENDPAGE 0x106 -#define OBD_FAIL_MDS_REINT_NET 0x107 -#define OBD_FAIL_MDS_REINT_UNPACK 0x108 -#define OBD_FAIL_MDS_REINT_SETATTR 0x109 -#define OBD_FAIL_MDS_REINT_SETATTR_WRITE 0x10a -#define OBD_FAIL_MDS_REINT_CREATE 0x10b -#define OBD_FAIL_MDS_REINT_CREATE_WRITE 0x10c -#define OBD_FAIL_MDS_REINT_UNLINK 0x10d -#define OBD_FAIL_MDS_REINT_UNLINK_WRITE 0x10e -#define OBD_FAIL_MDS_REINT_LINK 0x10f -#define OBD_FAIL_MDS_REINT_LINK_WRITE 0x110 -#define OBD_FAIL_MDS_REINT_RENAME 0x111 -#define OBD_FAIL_MDS_REINT_RENAME_WRITE 0x112 -#define OBD_FAIL_MDS_OPEN_NET 0x113 -#define OBD_FAIL_MDS_OPEN_PACK 0x114 -#define OBD_FAIL_MDS_CLOSE_NET 0x115 -#define OBD_FAIL_MDS_CLOSE_PACK 0x116 -#define OBD_FAIL_MDS_CONNECT_NET 0x117 -#define OBD_FAIL_MDS_CONNECT_PACK 0x118 -#define OBD_FAIL_MDS_REINT_NET_REP 0x119 -#define OBD_FAIL_MDS_DISCONNECT_NET 0x11a -#define OBD_FAIL_MDS_GETSTATUS_NET 0x11b -#define OBD_FAIL_MDS_GETSTATUS_PACK 0x11c -#define OBD_FAIL_MDS_STATFS_PACK 0x11d -#define OBD_FAIL_MDS_STATFS_NET 0x11e -#define OBD_FAIL_MDS_GETATTR_NAME_NET 0x11f -#define OBD_FAIL_MDS_PIN_NET 0x120 -#define OBD_FAIL_MDS_UNPIN_NET 0x121 -#define OBD_FAIL_MDS_ALL_REPLY_NET 0x122 -#define OBD_FAIL_MDS_ALL_REQUEST_NET 0x123 -#define OBD_FAIL_MDS_SYNC_NET 0x124 -#define OBD_FAIL_MDS_SYNC_PACK 0x125 -/* OBD_FAIL_MDS_DONE_WRITING_NET 0x126 obsolete since 2.8.0 */ -/* OBD_FAIL_MDS_DONE_WRITING_PACK 0x127 obsolete since 2.8.0 */ -#define OBD_FAIL_MDS_ALLOC_OBDO 0x128 -#define OBD_FAIL_MDS_PAUSE_OPEN 0x129 -#define OBD_FAIL_MDS_STATFS_LCW_SLEEP 0x12a -#define OBD_FAIL_MDS_OPEN_CREATE 0x12b -#define OBD_FAIL_MDS_OST_SETATTR 0x12c -/* OBD_FAIL_MDS_QUOTACHECK_NET 0x12d obsolete since 2.4 */ -#define OBD_FAIL_MDS_QUOTACTL_NET 0x12e -#define OBD_FAIL_MDS_CLIENT_ADD 0x12f -#define OBD_FAIL_MDS_GETXATTR_NET 0x130 -#define OBD_FAIL_MDS_GETXATTR_PACK 0x131 -#define OBD_FAIL_MDS_SETXATTR_NET 0x132 -#define OBD_FAIL_MDS_SETXATTR 0x133 -#define OBD_FAIL_MDS_SETXATTR_WRITE 0x134 -#define OBD_FAIL_MDS_FS_SETUP 0x135 -#define OBD_FAIL_MDS_RESEND 0x136 -#define OBD_FAIL_MDS_LLOG_CREATE_FAILED 0x137 -#define OBD_FAIL_MDS_LOV_SYNC_RACE 0x138 -#define OBD_FAIL_MDS_OSC_PRECREATE 0x139 -#define OBD_FAIL_MDS_LLOG_SYNC_TIMEOUT 0x13a -#define OBD_FAIL_MDS_CLOSE_NET_REP 0x13b -#define OBD_FAIL_MDS_BLOCK_QUOTA_REQ 0x13c -#define OBD_FAIL_MDS_DROP_QUOTA_REQ 0x13d -#define OBD_FAIL_MDS_REMOVE_COMMON_EA 0x13e -#define OBD_FAIL_MDS_ALLOW_COMMON_EA_SETTING 0x13f -#define OBD_FAIL_MDS_FAIL_LOV_LOG_ADD 0x140 -#define OBD_FAIL_MDS_LOV_PREP_CREATE 0x141 -#define OBD_FAIL_MDS_REINT_DELAY 0x142 -#define OBD_FAIL_MDS_READLINK_EPROTO 0x143 -#define OBD_FAIL_MDS_OPEN_WAIT_CREATE 0x144 -#define OBD_FAIL_MDS_PDO_LOCK 0x145 -#define OBD_FAIL_MDS_PDO_LOCK2 0x146 -#define OBD_FAIL_MDS_OSC_CREATE_FAIL 0x147 -#define OBD_FAIL_MDS_NEGATIVE_POSITIVE 0x148 -#define OBD_FAIL_MDS_HSM_STATE_GET_NET 0x149 -#define OBD_FAIL_MDS_HSM_STATE_SET_NET 0x14a -#define OBD_FAIL_MDS_HSM_PROGRESS_NET 0x14b -#define OBD_FAIL_MDS_HSM_REQUEST_NET 0x14c -#define OBD_FAIL_MDS_HSM_CT_REGISTER_NET 0x14d -#define OBD_FAIL_MDS_HSM_CT_UNREGISTER_NET 0x14e -#define OBD_FAIL_MDS_SWAP_LAYOUTS_NET 0x14f -#define OBD_FAIL_MDS_HSM_ACTION_NET 0x150 -#define OBD_FAIL_MDS_CHANGELOG_INIT 0x151 - -/* layout lock */ -#define OBD_FAIL_MDS_NO_LL_GETATTR 0x170 -#define OBD_FAIL_MDS_NO_LL_OPEN 0x171 -#define OBD_FAIL_MDS_LL_BLOCK 0x172 - -/* CMD */ -#define OBD_FAIL_MDS_IS_SUBDIR_NET 0x180 -#define OBD_FAIL_MDS_IS_SUBDIR_PACK 0x181 -#define OBD_FAIL_MDS_SET_INFO_NET 0x182 -#define OBD_FAIL_MDS_WRITEPAGE_NET 0x183 -#define OBD_FAIL_MDS_WRITEPAGE_PACK 0x184 -#define OBD_FAIL_MDS_RECOVERY_ACCEPTS_GAPS 0x185 -#define OBD_FAIL_MDS_GET_INFO_NET 0x186 -#define OBD_FAIL_MDS_DQACQ_NET 0x187 - -/* OI scrub */ -#define OBD_FAIL_OSD_SCRUB_DELAY 0x190 -#define OBD_FAIL_OSD_SCRUB_CRASH 0x191 -#define OBD_FAIL_OSD_SCRUB_FATAL 0x192 -#define OBD_FAIL_OSD_FID_MAPPING 0x193 -#define OBD_FAIL_OSD_LMA_INCOMPAT 0x194 -#define OBD_FAIL_OSD_COMPAT_INVALID_ENTRY 0x195 - -#define OBD_FAIL_OST 0x200 -#define OBD_FAIL_OST_CONNECT_NET 0x201 -#define OBD_FAIL_OST_DISCONNECT_NET 0x202 -#define OBD_FAIL_OST_GET_INFO_NET 0x203 -#define OBD_FAIL_OST_CREATE_NET 0x204 -#define OBD_FAIL_OST_DESTROY_NET 0x205 -#define OBD_FAIL_OST_GETATTR_NET 0x206 -#define OBD_FAIL_OST_SETATTR_NET 0x207 -#define OBD_FAIL_OST_OPEN_NET 0x208 -#define OBD_FAIL_OST_CLOSE_NET 0x209 -#define OBD_FAIL_OST_BRW_NET 0x20a -#define OBD_FAIL_OST_PUNCH_NET 0x20b -#define OBD_FAIL_OST_STATFS_NET 0x20c -#define OBD_FAIL_OST_HANDLE_UNPACK 0x20d -#define OBD_FAIL_OST_BRW_WRITE_BULK 0x20e -#define OBD_FAIL_OST_BRW_READ_BULK 0x20f -#define OBD_FAIL_OST_SYNC_NET 0x210 -#define OBD_FAIL_OST_ALL_REPLY_NET 0x211 -#define OBD_FAIL_OST_ALL_REQUEST_NET 0x212 -#define OBD_FAIL_OST_LDLM_REPLY_NET 0x213 -#define OBD_FAIL_OST_BRW_PAUSE_BULK 0x214 -#define OBD_FAIL_OST_ENOSPC 0x215 -#define OBD_FAIL_OST_EROFS 0x216 -#define OBD_FAIL_OST_ENOENT 0x217 -/* OBD_FAIL_OST_QUOTACHECK_NET 0x218 obsolete since 2.4 */ -#define OBD_FAIL_OST_QUOTACTL_NET 0x219 -#define OBD_FAIL_OST_CHECKSUM_RECEIVE 0x21a -#define OBD_FAIL_OST_CHECKSUM_SEND 0x21b -#define OBD_FAIL_OST_BRW_SIZE 0x21c -#define OBD_FAIL_OST_DROP_REQ 0x21d -#define OBD_FAIL_OST_SETATTR_CREDITS 0x21e -#define OBD_FAIL_OST_HOLD_WRITE_RPC 0x21f -#define OBD_FAIL_OST_BRW_WRITE_BULK2 0x220 -#define OBD_FAIL_OST_LLOG_RECOVERY_TIMEOUT 0x221 -#define OBD_FAIL_OST_CANCEL_COOKIE_TIMEOUT 0x222 -#define OBD_FAIL_OST_PAUSE_CREATE 0x223 -#define OBD_FAIL_OST_BRW_PAUSE_PACK 0x224 -#define OBD_FAIL_OST_CONNECT_NET2 0x225 -#define OBD_FAIL_OST_NOMEM 0x226 -#define OBD_FAIL_OST_BRW_PAUSE_BULK2 0x227 -#define OBD_FAIL_OST_MAPBLK_ENOSPC 0x228 -#define OBD_FAIL_OST_ENOINO 0x229 -#define OBD_FAIL_OST_DQACQ_NET 0x230 -#define OBD_FAIL_OST_STATFS_EINPROGRESS 0x231 -#define OBD_FAIL_OST_SET_INFO_NET 0x232 - -#define OBD_FAIL_LDLM 0x300 -#define OBD_FAIL_LDLM_NAMESPACE_NEW 0x301 -#define OBD_FAIL_LDLM_ENQUEUE_NET 0x302 -#define OBD_FAIL_LDLM_CONVERT_NET 0x303 -#define OBD_FAIL_LDLM_CANCEL_NET 0x304 -#define OBD_FAIL_LDLM_BL_CALLBACK_NET 0x305 -#define OBD_FAIL_LDLM_CP_CALLBACK_NET 0x306 -#define OBD_FAIL_LDLM_GL_CALLBACK_NET 0x307 -#define OBD_FAIL_LDLM_ENQUEUE_EXTENT_ERR 0x308 -#define OBD_FAIL_LDLM_ENQUEUE_INTENT_ERR 0x309 -#define OBD_FAIL_LDLM_CREATE_RESOURCE 0x30a -#define OBD_FAIL_LDLM_ENQUEUE_BLOCKED 0x30b -#define OBD_FAIL_LDLM_REPLY 0x30c -#define OBD_FAIL_LDLM_RECOV_CLIENTS 0x30d -#define OBD_FAIL_LDLM_ENQUEUE_OLD_EXPORT 0x30e -#define OBD_FAIL_LDLM_GLIMPSE 0x30f -#define OBD_FAIL_LDLM_CANCEL_RACE 0x310 -#define OBD_FAIL_LDLM_CANCEL_EVICT_RACE 0x311 -#define OBD_FAIL_LDLM_PAUSE_CANCEL 0x312 -#define OBD_FAIL_LDLM_CLOSE_THREAD 0x313 -#define OBD_FAIL_LDLM_CANCEL_BL_CB_RACE 0x314 -#define OBD_FAIL_LDLM_CP_CB_WAIT 0x315 -#define OBD_FAIL_LDLM_OST_FAIL_RACE 0x316 -#define OBD_FAIL_LDLM_INTR_CP_AST 0x317 -#define OBD_FAIL_LDLM_CP_BL_RACE 0x318 -#define OBD_FAIL_LDLM_NEW_LOCK 0x319 -#define OBD_FAIL_LDLM_AGL_DELAY 0x31a -#define OBD_FAIL_LDLM_AGL_NOLOCK 0x31b -#define OBD_FAIL_LDLM_OST_LVB 0x31c -#define OBD_FAIL_LDLM_ENQUEUE_HANG 0x31d -#define OBD_FAIL_LDLM_PAUSE_CANCEL2 0x31f -#define OBD_FAIL_LDLM_CP_CB_WAIT2 0x320 -#define OBD_FAIL_LDLM_CP_CB_WAIT3 0x321 -#define OBD_FAIL_LDLM_CP_CB_WAIT4 0x322 -#define OBD_FAIL_LDLM_CP_CB_WAIT5 0x323 - -#define OBD_FAIL_LDLM_GRANT_CHECK 0x32a - -/* LOCKLESS IO */ -#define OBD_FAIL_LDLM_SET_CONTENTION 0x385 - -#define OBD_FAIL_OSC 0x400 -#define OBD_FAIL_OSC_BRW_READ_BULK 0x401 -#define OBD_FAIL_OSC_BRW_WRITE_BULK 0x402 -#define OBD_FAIL_OSC_LOCK_BL_AST 0x403 -#define OBD_FAIL_OSC_LOCK_CP_AST 0x404 -#define OBD_FAIL_OSC_MATCH 0x405 -#define OBD_FAIL_OSC_BRW_PREP_REQ 0x406 -#define OBD_FAIL_OSC_SHUTDOWN 0x407 -#define OBD_FAIL_OSC_CHECKSUM_RECEIVE 0x408 -#define OBD_FAIL_OSC_CHECKSUM_SEND 0x409 -#define OBD_FAIL_OSC_BRW_PREP_REQ2 0x40a -#define OBD_FAIL_OSC_CONNECT_CKSUM 0x40b -#define OBD_FAIL_OSC_CKSUM_ADLER_ONLY 0x40c -#define OBD_FAIL_OSC_DIO_PAUSE 0x40d -#define OBD_FAIL_OSC_OBJECT_CONTENTION 0x40e -#define OBD_FAIL_OSC_CP_CANCEL_RACE 0x40f -#define OBD_FAIL_OSC_CP_ENQ_RACE 0x410 -#define OBD_FAIL_OSC_NO_GRANT 0x411 -#define OBD_FAIL_OSC_DELAY_SETTIME 0x412 -#define OBD_FAIL_OSC_DELAY_IO 0x414 - -#define OBD_FAIL_PTLRPC 0x500 -#define OBD_FAIL_PTLRPC_ACK 0x501 -#define OBD_FAIL_PTLRPC_RQBD 0x502 -#define OBD_FAIL_PTLRPC_BULK_GET_NET 0x503 -#define OBD_FAIL_PTLRPC_BULK_PUT_NET 0x504 -#define OBD_FAIL_PTLRPC_DROP_RPC 0x505 -#define OBD_FAIL_PTLRPC_DELAY_SEND 0x506 -#define OBD_FAIL_PTLRPC_DELAY_RECOV 0x507 -#define OBD_FAIL_PTLRPC_CLIENT_BULK_CB 0x508 -#define OBD_FAIL_PTLRPC_PAUSE_REQ 0x50a -#define OBD_FAIL_PTLRPC_PAUSE_REP 0x50c -#define OBD_FAIL_PTLRPC_IMP_DEACTIVE 0x50d -#define OBD_FAIL_PTLRPC_DUMP_LOG 0x50e -#define OBD_FAIL_PTLRPC_LONG_REPL_UNLINK 0x50f -#define OBD_FAIL_PTLRPC_LONG_BULK_UNLINK 0x510 -#define OBD_FAIL_PTLRPC_HPREQ_TIMEOUT 0x511 -#define OBD_FAIL_PTLRPC_HPREQ_NOTIMEOUT 0x512 -#define OBD_FAIL_PTLRPC_DROP_REQ_OPC 0x513 -#define OBD_FAIL_PTLRPC_FINISH_REPLAY 0x514 -#define OBD_FAIL_PTLRPC_CLIENT_BULK_CB2 0x515 -#define OBD_FAIL_PTLRPC_DELAY_IMP_FULL 0x516 -#define OBD_FAIL_PTLRPC_CANCEL_RESEND 0x517 -#define OBD_FAIL_PTLRPC_DROP_BULK 0x51a -#define OBD_FAIL_PTLRPC_LONG_REQ_UNLINK 0x51b -#define OBD_FAIL_PTLRPC_LONG_BOTH_UNLINK 0x51c - -#define OBD_FAIL_OBD_PING_NET 0x600 -#define OBD_FAIL_OBD_LOG_CANCEL_NET 0x601 -#define OBD_FAIL_OBD_LOGD_NET 0x602 -/* OBD_FAIL_OBD_QC_CALLBACK_NET 0x603 obsolete since 2.4 */ -#define OBD_FAIL_OBD_DQACQ 0x604 -#define OBD_FAIL_OBD_LLOG_SETUP 0x605 -#define OBD_FAIL_OBD_LOG_CANCEL_REP 0x606 -#define OBD_FAIL_OBD_IDX_READ_NET 0x607 -#define OBD_FAIL_OBD_IDX_READ_BREAK 0x608 -#define OBD_FAIL_OBD_NO_LRU 0x609 - -#define OBD_FAIL_TGT_REPLY_NET 0x700 -#define OBD_FAIL_TGT_CONN_RACE 0x701 -#define OBD_FAIL_TGT_FORCE_RECONNECT 0x702 -#define OBD_FAIL_TGT_DELAY_CONNECT 0x703 -#define OBD_FAIL_TGT_DELAY_RECONNECT 0x704 -#define OBD_FAIL_TGT_DELAY_PRECREATE 0x705 -#define OBD_FAIL_TGT_TOOMANY_THREADS 0x706 -#define OBD_FAIL_TGT_REPLAY_DROP 0x707 -#define OBD_FAIL_TGT_FAKE_EXP 0x708 -#define OBD_FAIL_TGT_REPLAY_DELAY 0x709 -#define OBD_FAIL_TGT_LAST_REPLAY 0x710 -#define OBD_FAIL_TGT_CLIENT_ADD 0x711 -#define OBD_FAIL_TGT_RCVG_FLAG 0x712 -#define OBD_FAIL_TGT_DELAY_CONDITIONAL 0x713 - -#define OBD_FAIL_MDC_REVALIDATE_PAUSE 0x800 -#define OBD_FAIL_MDC_ENQUEUE_PAUSE 0x801 -#define OBD_FAIL_MDC_OLD_EXT_FLAGS 0x802 -#define OBD_FAIL_MDC_GETATTR_ENQUEUE 0x803 -#define OBD_FAIL_MDC_RPCS_SEM 0x804 -#define OBD_FAIL_MDC_LIGHTWEIGHT 0x805 -#define OBD_FAIL_MDC_CLOSE 0x806 - -#define OBD_FAIL_MGS 0x900 -#define OBD_FAIL_MGS_ALL_REQUEST_NET 0x901 -#define OBD_FAIL_MGS_ALL_REPLY_NET 0x902 -#define OBD_FAIL_MGC_PAUSE_PROCESS_LOG 0x903 -#define OBD_FAIL_MGS_PAUSE_REQ 0x904 -#define OBD_FAIL_MGS_PAUSE_TARGET_REG 0x905 -#define OBD_FAIL_MGS_CONNECT_NET 0x906 -#define OBD_FAIL_MGS_DISCONNECT_NET 0x907 -#define OBD_FAIL_MGS_SET_INFO_NET 0x908 -#define OBD_FAIL_MGS_EXCEPTION_NET 0x909 -#define OBD_FAIL_MGS_TARGET_REG_NET 0x90a -#define OBD_FAIL_MGS_TARGET_DEL_NET 0x90b -#define OBD_FAIL_MGS_CONFIG_READ_NET 0x90c - -#define OBD_FAIL_QUOTA_DQACQ_NET 0xA01 -#define OBD_FAIL_QUOTA_EDQUOT 0xA02 -#define OBD_FAIL_QUOTA_DELAY_REINT 0xA03 -#define OBD_FAIL_QUOTA_RECOVERABLE_ERR 0xA04 - -#define OBD_FAIL_LPROC_REMOVE 0xB00 - -#define OBD_FAIL_SEQ 0x1000 -#define OBD_FAIL_SEQ_QUERY_NET 0x1001 -#define OBD_FAIL_SEQ_EXHAUST 0x1002 - -#define OBD_FAIL_FLD 0x1100 -#define OBD_FAIL_FLD_QUERY_NET 0x1101 -#define OBD_FAIL_FLD_READ_NET 0x1102 - -#define OBD_FAIL_SEC_CTX 0x1200 -#define OBD_FAIL_SEC_CTX_INIT_NET 0x1201 -#define OBD_FAIL_SEC_CTX_INIT_CONT_NET 0x1202 -#define OBD_FAIL_SEC_CTX_FINI_NET 0x1203 -#define OBD_FAIL_SEC_CTX_HDL_PAUSE 0x1204 - -#define OBD_FAIL_LLOG 0x1300 -#define OBD_FAIL_LLOG_ORIGIN_CONNECT_NET 0x1301 -#define OBD_FAIL_LLOG_ORIGIN_HANDLE_CREATE_NET 0x1302 -#define OBD_FAIL_LLOG_ORIGIN_HANDLE_DESTROY_NET 0x1303 -#define OBD_FAIL_LLOG_ORIGIN_HANDLE_READ_HEADER_NET 0x1304 -#define OBD_FAIL_LLOG_ORIGIN_HANDLE_NEXT_BLOCK_NET 0x1305 -#define OBD_FAIL_LLOG_ORIGIN_HANDLE_PREV_BLOCK_NET 0x1306 -#define OBD_FAIL_LLOG_ORIGIN_HANDLE_WRITE_REC_NET 0x1307 -#define OBD_FAIL_LLOG_ORIGIN_HANDLE_CLOSE_NET 0x1308 -#define OBD_FAIL_LLOG_CATINFO_NET 0x1309 -#define OBD_FAIL_MDS_SYNC_CAPA_SL 0x1310 -#define OBD_FAIL_SEQ_ALLOC 0x1311 - -#define OBD_FAIL_LLITE 0x1400 -#define OBD_FAIL_LLITE_FAULT_TRUNC_RACE 0x1401 -#define OBD_FAIL_LOCK_STATE_WAIT_INTR 0x1402 -#define OBD_FAIL_LOV_INIT 0x1403 -#define OBD_FAIL_GLIMPSE_DELAY 0x1404 -#define OBD_FAIL_LLITE_XATTR_ENOMEM 0x1405 -#define OBD_FAIL_MAKE_LOVEA_HOLE 0x1406 -#define OBD_FAIL_LLITE_LOST_LAYOUT 0x1407 -#define OBD_FAIL_GETATTR_DELAY 0x1409 - -#define OBD_FAIL_FID_INDIR 0x1501 -#define OBD_FAIL_FID_INLMA 0x1502 -#define OBD_FAIL_FID_IGIF 0x1504 -#define OBD_FAIL_FID_LOOKUP 0x1505 -#define OBD_FAIL_FID_NOLMA 0x1506 - -/* LFSCK */ -#define OBD_FAIL_LFSCK_DELAY1 0x1600 -#define OBD_FAIL_LFSCK_DELAY2 0x1601 -#define OBD_FAIL_LFSCK_DELAY3 0x1602 -#define OBD_FAIL_LFSCK_LINKEA_CRASH 0x1603 -#define OBD_FAIL_LFSCK_LINKEA_MORE 0x1604 -#define OBD_FAIL_LFSCK_LINKEA_MORE2 0x1605 -#define OBD_FAIL_LFSCK_FATAL1 0x1608 -#define OBD_FAIL_LFSCK_FATAL2 0x1609 -#define OBD_FAIL_LFSCK_CRASH 0x160a -#define OBD_FAIL_LFSCK_NO_AUTO 0x160b -#define OBD_FAIL_LFSCK_NO_DOUBLESCAN 0x160c -#define OBD_FAIL_LFSCK_INVALID_PFID 0x1619 -#define OBD_FAIL_LFSCK_BAD_NAME_HASH 0x1628 - -/* UPDATE */ -#define OBD_FAIL_UPDATE_OBJ_NET 0x1700 -#define OBD_FAIL_UPDATE_OBJ_NET_REP 0x1701 - -/* LMV */ -#define OBD_FAIL_UNKNOWN_LMV_STRIPE 0x1901 - -/* Assign references to moved code to reduce code changes */ -#define OBD_FAIL_PRECHECK(id) CFS_FAIL_PRECHECK(id) -#define OBD_FAIL_CHECK(id) CFS_FAIL_CHECK(id) -#define OBD_FAIL_CHECK_VALUE(id, value) CFS_FAIL_CHECK_VALUE(id, value) -#define OBD_FAIL_CHECK_ORSET(id, value) CFS_FAIL_CHECK_ORSET(id, value) -#define OBD_FAIL_CHECK_RESET(id, value) CFS_FAIL_CHECK_RESET(id, value) -#define OBD_FAIL_RETURN(id, ret) CFS_FAIL_RETURN(id, ret) -#define OBD_FAIL_TIMEOUT(id, secs) CFS_FAIL_TIMEOUT(id, secs) -#define OBD_FAIL_TIMEOUT_MS(id, ms) CFS_FAIL_TIMEOUT_MS(id, ms) -#define OBD_FAIL_TIMEOUT_ORSET(id, value, secs) CFS_FAIL_TIMEOUT_ORSET(id, value, secs) -#define OBD_RACE(id) CFS_RACE(id) -#define OBD_FAIL_ONCE CFS_FAIL_ONCE -#define OBD_FAILED CFS_FAILED - -#ifdef CONFIG_DEBUG_SLAB -#define POISON(ptr, c, s) do {} while (0) -#define POISON_PTR(ptr) ((void)0) -#else -#define POISON(ptr, c, s) memset(ptr, c, s) -#define POISON_PTR(ptr) ((ptr) = (void *)0xdeadbeef) -#endif - -#ifdef POISON_BULK -#define POISON_PAGE(page, val) do { \ - memset(kmap(page), val, PAGE_SIZE); \ - kunmap(page); \ -} while (0) -#else -#define POISON_PAGE(page, val) do { } while (0) -#endif - -#define OBD_FREE_RCU(ptr, size, handle) \ -do { \ - struct portals_handle *__h = (handle); \ - \ - __h->h_cookie = (unsigned long)(ptr); \ - __h->h_size = (size); \ - call_rcu(&__h->h_rcu, class_handle_free_cb); \ - POISON_PTR(ptr); \ -} while (0) - -#define KEY_IS(str) \ - (keylen >= (sizeof(str) - 1) && \ - memcmp(key, str, (sizeof(str) - 1)) == 0) - -#endif diff --git a/drivers/staging/lustre/lustre/include/seq_range.h b/drivers/staging/lustre/lustre/include/seq_range.h deleted file mode 100644 index 9450da728160..000000000000 --- a/drivers/staging/lustre/lustre/include/seq_range.h +++ /dev/null @@ -1,200 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2011, 2014, Intel Corporation. - * - * Copyright 2015 Cray Inc, all rights reserved. - * Author: Ben Evans. - * - * Define lu_seq_range associated functions - */ - -#ifndef _SEQ_RANGE_H_ -#define _SEQ_RANGE_H_ - -#include <uapi/linux/lustre/lustre_idl.h> - -/** - * computes the sequence range type \a range - */ - -static inline unsigned int fld_range_type(const struct lu_seq_range *range) -{ - return range->lsr_flags & LU_SEQ_RANGE_MASK; -} - -/** - * Is this sequence range an OST? \a range - */ - -static inline bool fld_range_is_ost(const struct lu_seq_range *range) -{ - return fld_range_type(range) == LU_SEQ_RANGE_OST; -} - -/** - * Is this sequence range an MDT? \a range - */ - -static inline bool fld_range_is_mdt(const struct lu_seq_range *range) -{ - return fld_range_type(range) == LU_SEQ_RANGE_MDT; -} - -/** - * ANY range is only used when the fld client sends a fld query request, - * but it does not know whether the seq is an MDT or OST, so it will send the - * request with ANY type, which means any seq type from the lookup can be - * expected. /a range - */ -static inline unsigned int fld_range_is_any(const struct lu_seq_range *range) -{ - return fld_range_type(range) == LU_SEQ_RANGE_ANY; -} - -/** - * Apply flags to range \a range \a flags - */ - -static inline void fld_range_set_type(struct lu_seq_range *range, - unsigned int flags) -{ - range->lsr_flags |= flags; -} - -/** - * Add MDT to range type \a range - */ - -static inline void fld_range_set_mdt(struct lu_seq_range *range) -{ - fld_range_set_type(range, LU_SEQ_RANGE_MDT); -} - -/** - * Add OST to range type \a range - */ - -static inline void fld_range_set_ost(struct lu_seq_range *range) -{ - fld_range_set_type(range, LU_SEQ_RANGE_OST); -} - -/** - * Add ANY to range type \a range - */ - -static inline void fld_range_set_any(struct lu_seq_range *range) -{ - fld_range_set_type(range, LU_SEQ_RANGE_ANY); -} - -/** - * computes width of given sequence range \a range - */ - -static inline u64 lu_seq_range_space(const struct lu_seq_range *range) -{ - return range->lsr_end - range->lsr_start; -} - -/** - * initialize range to zero \a range - */ - -static inline void lu_seq_range_init(struct lu_seq_range *range) -{ - memset(range, 0, sizeof(*range)); -} - -/** - * check if given seq id \a s is within given range \a range - */ - -static inline bool lu_seq_range_within(const struct lu_seq_range *range, - u64 seq) -{ - return seq >= range->lsr_start && seq < range->lsr_end; -} - -/** - * Is the range sane? Is the end after the beginning? \a range - */ - -static inline bool lu_seq_range_is_sane(const struct lu_seq_range *range) -{ - return range->lsr_end >= range->lsr_start; -} - -/** - * Is the range 0? \a range - */ - -static inline bool lu_seq_range_is_zero(const struct lu_seq_range *range) -{ - return range->lsr_start == 0 && range->lsr_end == 0; -} - -/** - * Is the range out of space? \a range - */ - -static inline bool lu_seq_range_is_exhausted(const struct lu_seq_range *range) -{ - return lu_seq_range_space(range) == 0; -} - -/** - * return 0 if two ranges have the same location, nonzero if they are - * different \a r1 \a r2 - */ - -static inline int lu_seq_range_compare_loc(const struct lu_seq_range *r1, - const struct lu_seq_range *r2) -{ - return r1->lsr_index != r2->lsr_index || - r1->lsr_flags != r2->lsr_flags; -} - -#if !defined(__REQ_LAYOUT_USER__) -/** - * byte swap range structure \a range - */ - -void lustre_swab_lu_seq_range(struct lu_seq_range *range); -#endif -/** - * printf string and argument list for sequence range - */ -#define DRANGE "[%#16.16llx-%#16.16llx]:%x:%s" - -#define PRANGE(range) \ - (range)->lsr_start, \ - (range)->lsr_end, \ - (range)->lsr_index, \ - fld_range_is_mdt(range) ? "mdt" : "ost" - -#endif |