diff options
Diffstat (limited to 'drivers/staging/lustre/lustre/include/lustre_net.h')
-rw-r--r-- | drivers/staging/lustre/lustre/include/lustre_net.h | 2359 |
1 files changed, 0 insertions, 2359 deletions
diff --git a/drivers/staging/lustre/lustre/include/lustre_net.h b/drivers/staging/lustre/lustre/include/lustre_net.h deleted file mode 100644 index d35ae0cda8d2..000000000000 --- a/drivers/staging/lustre/lustre/include/lustre_net.h +++ /dev/null @@ -1,2359 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2010, 2015, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - */ -/** \defgroup PtlRPC Portal RPC and networking module. - * - * PortalRPC is the layer used by rest of lustre code to achieve network - * communications: establish connections with corresponding export and import - * states, listen for a service, send and receive RPCs. - * PortalRPC also includes base recovery framework: packet resending and - * replaying, reconnections, pinger. - * - * PortalRPC utilizes LNet as its transport layer. - * - * @{ - */ - -#ifndef _LUSTRE_NET_H -#define _LUSTRE_NET_H - -/** \defgroup net net - * - * @{ - */ - -#include <linux/uio.h> -#include <linux/libcfs/libcfs.h> -#include <uapi/linux/lnet/nidstr.h> -#include <linux/lnet/api.h> -#include <uapi/linux/lustre/lustre_idl.h> -#include <lustre_errno.h> -#include <lustre_ha.h> -#include <lustre_sec.h> -#include <lustre_import.h> -#include <lprocfs_status.h> -#include <lu_object.h> -#include <lustre_req_layout.h> - -#include <obd_support.h> -#include <uapi/linux/lustre/lustre_ver.h> - -/* MD flags we _always_ use */ -#define PTLRPC_MD_OPTIONS 0 - -/** - * log2 max # of bulk operations in one request: 2=4MB/RPC, 5=32MB/RPC, ... - * In order for the client and server to properly negotiate the maximum - * possible transfer size, PTLRPC_BULK_OPS_COUNT must be a power-of-two - * value. The client is free to limit the actual RPC size for any bulk - * transfer via cl_max_pages_per_rpc to some non-power-of-two value. - * NOTE: This is limited to 16 (=64GB RPCs) by IOOBJ_MAX_BRW_BITS. - */ -#define PTLRPC_BULK_OPS_BITS 4 -#if PTLRPC_BULK_OPS_BITS > 16 -#error "More than 65536 BRW RPCs not allowed by IOOBJ_MAX_BRW_BITS." -#endif -#define PTLRPC_BULK_OPS_COUNT (1U << PTLRPC_BULK_OPS_BITS) -/** - * PTLRPC_BULK_OPS_MASK is for the convenience of the client only, and - * should not be used on the server at all. Otherwise, it imposes a - * protocol limitation on the maximum RPC size that can be used by any - * RPC sent to that server in the future. Instead, the server should - * use the negotiated per-client ocd_brw_size to determine the bulk - * RPC count. - */ -#define PTLRPC_BULK_OPS_MASK (~((__u64)PTLRPC_BULK_OPS_COUNT - 1)) - -/** - * Define maxima for bulk I/O. - * - * A single PTLRPC BRW request is sent via up to PTLRPC_BULK_OPS_COUNT - * of LNET_MTU sized RDMA transfers. Clients and servers negotiate the - * currently supported maximum between peers at connect via ocd_brw_size. - */ -#define PTLRPC_MAX_BRW_BITS (LNET_MTU_BITS + PTLRPC_BULK_OPS_BITS) -#define PTLRPC_MAX_BRW_SIZE (1 << PTLRPC_MAX_BRW_BITS) -#define PTLRPC_MAX_BRW_PAGES (PTLRPC_MAX_BRW_SIZE >> PAGE_SHIFT) - -#define ONE_MB_BRW_SIZE (1 << LNET_MTU_BITS) -#define MD_MAX_BRW_SIZE (1 << LNET_MTU_BITS) -#define MD_MAX_BRW_PAGES (MD_MAX_BRW_SIZE >> PAGE_SHIFT) -#define DT_MAX_BRW_SIZE PTLRPC_MAX_BRW_SIZE -#define DT_MAX_BRW_PAGES (DT_MAX_BRW_SIZE >> PAGE_SHIFT) -#define OFD_MAX_BRW_SIZE (1 << LNET_MTU_BITS) - -/* When PAGE_SIZE is a constant, we can check our arithmetic here with cpp! */ -# if ((PTLRPC_MAX_BRW_PAGES & (PTLRPC_MAX_BRW_PAGES - 1)) != 0) -# error "PTLRPC_MAX_BRW_PAGES isn't a power of two" -# endif -# if (PTLRPC_MAX_BRW_SIZE != (PTLRPC_MAX_BRW_PAGES * PAGE_SIZE)) -# error "PTLRPC_MAX_BRW_SIZE isn't PTLRPC_MAX_BRW_PAGES * PAGE_SIZE" -# endif -# if (PTLRPC_MAX_BRW_SIZE > LNET_MTU * PTLRPC_BULK_OPS_COUNT) -# error "PTLRPC_MAX_BRW_SIZE too big" -# endif -# if (PTLRPC_MAX_BRW_PAGES > LNET_MAX_IOV * PTLRPC_BULK_OPS_COUNT) -# error "PTLRPC_MAX_BRW_PAGES too big" -# endif - -#define PTLRPC_NTHRS_INIT 2 - -/** - * Buffer Constants - * - * Constants determine how memory is used to buffer incoming service requests. - * - * ?_NBUFS # buffers to allocate when growing the pool - * ?_BUFSIZE # bytes in a single request buffer - * ?_MAXREQSIZE # maximum request service will receive - * - * When fewer than ?_NBUFS/2 buffers are posted for receive, another chunk - * of ?_NBUFS is added to the pool. - * - * Messages larger than ?_MAXREQSIZE are dropped. Request buffers are - * considered full when less than ?_MAXREQSIZE is left in them. - */ -/** - * Thread Constants - * - * Constants determine how threads are created for ptlrpc service. - * - * ?_NTHRS_INIT # threads to create for each service partition on - * initializing. If it's non-affinity service and - * there is only one partition, it's the overall # - * threads for the service while initializing. - * ?_NTHRS_BASE # threads should be created at least for each - * ptlrpc partition to keep the service healthy. - * It's the low-water mark of threads upper-limit - * for each partition. - * ?_THR_FACTOR # threads can be added on threads upper-limit for - * each CPU core. This factor is only for reference, - * we might decrease value of factor if number of cores - * per CPT is above a limit. - * ?_NTHRS_MAX # overall threads can be created for a service, - * it's a soft limit because if service is running - * on machine with hundreds of cores and tens of - * CPU partitions, we need to guarantee each partition - * has ?_NTHRS_BASE threads, which means total threads - * will be ?_NTHRS_BASE * number_of_cpts which can - * exceed ?_NTHRS_MAX. - * - * Examples - * - * #define MDS_NTHRS_INIT 2 - * #define MDS_NTHRS_BASE 64 - * #define MDS_NTHRS_FACTOR 8 - * #define MDS_NTHRS_MAX 1024 - * - * Example 1): - * --------------------------------------------------------------------- - * Server(A) has 16 cores, user configured it to 4 partitions so each - * partition has 4 cores, then actual number of service threads on each - * partition is: - * MDS_NTHRS_BASE(64) + cores(4) * MDS_NTHRS_FACTOR(8) = 96 - * - * Total number of threads for the service is: - * 96 * partitions(4) = 384 - * - * Example 2): - * --------------------------------------------------------------------- - * Server(B) has 32 cores, user configured it to 4 partitions so each - * partition has 8 cores, then actual number of service threads on each - * partition is: - * MDS_NTHRS_BASE(64) + cores(8) * MDS_NTHRS_FACTOR(8) = 128 - * - * Total number of threads for the service is: - * 128 * partitions(4) = 512 - * - * Example 3): - * --------------------------------------------------------------------- - * Server(B) has 96 cores, user configured it to 8 partitions so each - * partition has 12 cores, then actual number of service threads on each - * partition is: - * MDS_NTHRS_BASE(64) + cores(12) * MDS_NTHRS_FACTOR(8) = 160 - * - * Total number of threads for the service is: - * 160 * partitions(8) = 1280 - * - * However, it's above the soft limit MDS_NTHRS_MAX, so we choose this number - * as upper limit of threads number for each partition: - * MDS_NTHRS_MAX(1024) / partitions(8) = 128 - * - * Example 4): - * --------------------------------------------------------------------- - * Server(C) have a thousand of cores and user configured it to 32 partitions - * MDS_NTHRS_BASE(64) * 32 = 2048 - * - * which is already above soft limit MDS_NTHRS_MAX(1024), but we still need - * to guarantee that each partition has at least MDS_NTHRS_BASE(64) threads - * to keep service healthy, so total number of threads will just be 2048. - * - * NB: we don't suggest to choose server with that many cores because backend - * filesystem itself, buffer cache, or underlying network stack might - * have some SMP scalability issues at that large scale. - * - * If user already has a fat machine with hundreds or thousands of cores, - * there are two choices for configuration: - * a) create CPU table from subset of all CPUs and run Lustre on - * top of this subset - * b) bind service threads on a few partitions, see modparameters of - * MDS and OSS for details -* - * NB: these calculations (and examples below) are simplified to help - * understanding, the real implementation is a little more complex, - * please see ptlrpc_server_nthreads_check() for details. - * - */ - - /* - * LDLM threads constants: - * - * Given 8 as factor and 24 as base threads number - * - * example 1) - * On 4-core machine we will have 24 + 8 * 4 = 56 threads. - * - * example 2) - * On 8-core machine with 2 partitions we will have 24 + 4 * 8 = 56 - * threads for each partition and total threads number will be 112. - * - * example 3) - * On 64-core machine with 8 partitions we will need LDLM_NTHRS_BASE(24) - * threads for each partition to keep service healthy, so total threads - * number should be 24 * 8 = 192. - * - * So with these constants, threads number will be at the similar level - * of old versions, unless target machine has over a hundred cores - */ -#define LDLM_THR_FACTOR 8 -#define LDLM_NTHRS_INIT PTLRPC_NTHRS_INIT -#define LDLM_NTHRS_BASE 24 -#define LDLM_NTHRS_MAX (num_online_cpus() == 1 ? 64 : 128) - -#define LDLM_BL_THREADS LDLM_NTHRS_AUTO_INIT -#define LDLM_CLIENT_NBUFS 1 -#define LDLM_SERVER_NBUFS 64 -#define LDLM_BUFSIZE (8 * 1024) -#define LDLM_MAXREQSIZE (5 * 1024) -#define LDLM_MAXREPSIZE (1024) - -#define MDS_MAXREQSIZE (5 * 1024) /* >= 4736 */ - -/** - * FIEMAP request can be 4K+ for now - */ -#define OST_MAXREQSIZE (16 * 1024) - -/* Macro to hide a typecast. */ -#define ptlrpc_req_async_args(req) ((void *)&req->rq_async_args) - -struct ptlrpc_replay_async_args { - int praa_old_state; - int praa_old_status; -}; - -/** - * Structure to single define portal connection. - */ -struct ptlrpc_connection { - /** linkage for connections hash table */ - struct hlist_node c_hash; - /** Our own lnet nid for this connection */ - lnet_nid_t c_self; - /** Remote side nid for this connection */ - struct lnet_process_id c_peer; - /** UUID of the other side */ - struct obd_uuid c_remote_uuid; - /** reference counter for this connection */ - atomic_t c_refcount; -}; - -/** Client definition for PortalRPC */ -struct ptlrpc_client { - /** What lnet portal does this client send messages to by default */ - __u32 cli_request_portal; - /** What portal do we expect replies on */ - __u32 cli_reply_portal; - /** Name of the client */ - char *cli_name; -}; - -/** state flags of requests */ -/* XXX only ones left are those used by the bulk descs as well! */ -#define PTL_RPC_FL_INTR (1 << 0) /* reply wait was interrupted by user */ -#define PTL_RPC_FL_TIMEOUT (1 << 7) /* request timed out waiting for reply */ - -#define REQ_MAX_ACK_LOCKS 8 - -union ptlrpc_async_args { - /** - * Scratchpad for passing args to completion interpreter. Users - * cast to the struct of their choosing, and BUILD_BUG_ON oversized - * arguments. For _tons_ of context, kmalloc a struct and store - * a pointer to it here. The pointer_arg ensures this struct is at - * least big enough for that. - */ - void *pointer_arg[11]; - __u64 space[7]; -}; - -struct ptlrpc_request_set; -typedef int (*set_interpreter_func)(struct ptlrpc_request_set *, void *, int); -typedef int (*set_producer_func)(struct ptlrpc_request_set *, void *); - -/** - * Definition of request set structure. - * Request set is a list of requests (not necessary to the same target) that - * once populated with RPCs could be sent in parallel. - * There are two kinds of request sets. General purpose and with dedicated - * serving thread. Example of the latter is ptlrpcd set. - * For general purpose sets once request set started sending it is impossible - * to add new requests to such set. - * Provides a way to call "completion callbacks" when all requests in the set - * returned. - */ -struct ptlrpc_request_set { - atomic_t set_refcount; - /** number of in queue requests */ - atomic_t set_new_count; - /** number of uncompleted requests */ - atomic_t set_remaining; - /** wait queue to wait on for request events */ - wait_queue_head_t set_waitq; - wait_queue_head_t *set_wakeup_ptr; - /** List of requests in the set */ - struct list_head set_requests; - /** - * List of completion callbacks to be called when the set is completed - * This is only used if \a set_interpret is NULL. - * Links struct ptlrpc_set_cbdata. - */ - struct list_head set_cblist; - /** Completion callback, if only one. */ - set_interpreter_func set_interpret; - /** opaq argument passed to completion \a set_interpret callback. */ - void *set_arg; - /** - * Lock for \a set_new_requests manipulations - * locked so that any old caller can communicate requests to - * the set holder who can then fold them into the lock-free set - */ - spinlock_t set_new_req_lock; - /** List of new yet unsent requests. Only used with ptlrpcd now. */ - struct list_head set_new_requests; - - /** rq_status of requests that have been freed already */ - int set_rc; - /** Additional fields used by the flow control extension */ - /** Maximum number of RPCs in flight */ - int set_max_inflight; - /** Callback function used to generate RPCs */ - set_producer_func set_producer; - /** opaq argument passed to the producer callback */ - void *set_producer_arg; -}; - -/** - * Description of a single ptrlrpc_set callback - */ -struct ptlrpc_set_cbdata { - /** List linkage item */ - struct list_head psc_item; - /** Pointer to interpreting function */ - set_interpreter_func psc_interpret; - /** Opaq argument to pass to the callback */ - void *psc_data; -}; - -struct ptlrpc_bulk_desc; -struct ptlrpc_service_part; -struct ptlrpc_service; - -/** - * ptlrpc callback & work item stuff - */ -struct ptlrpc_cb_id { - void (*cbid_fn)(struct lnet_event *ev); /* specific callback fn */ - void *cbid_arg; /* additional arg */ -}; - -/** Maximum number of locks to fit into reply state */ -#define RS_MAX_LOCKS 8 -#define RS_DEBUG 0 - -/** - * Structure to define reply state on the server - * Reply state holds various reply message information. Also for "difficult" - * replies (rep-ack case) we store the state after sending reply and wait - * for the client to acknowledge the reception. In these cases locks could be - * added to the state for replay/failover consistency guarantees. - */ -struct ptlrpc_reply_state { - /** Callback description */ - struct ptlrpc_cb_id rs_cb_id; - /** Linkage for list of all reply states in a system */ - struct list_head rs_list; - /** Linkage for list of all reply states on same export */ - struct list_head rs_exp_list; - /** Linkage for list of all reply states for same obd */ - struct list_head rs_obd_list; -#if RS_DEBUG - struct list_head rs_debug_list; -#endif - /** A spinlock to protect the reply state flags */ - spinlock_t rs_lock; - /** Reply state flags */ - unsigned long rs_difficult:1; /* ACK/commit stuff */ - unsigned long rs_no_ack:1; /* no ACK, even for - * difficult requests - */ - unsigned long rs_scheduled:1; /* being handled? */ - unsigned long rs_scheduled_ever:1;/* any schedule attempts? */ - unsigned long rs_handled:1; /* been handled yet? */ - unsigned long rs_on_net:1; /* reply_out_callback pending? */ - unsigned long rs_prealloc:1; /* rs from prealloc list */ - unsigned long rs_committed:1;/* the transaction was committed - * and the rs was dispatched - */ - atomic_t rs_refcount; /* number of users */ - /** Number of locks awaiting client ACK */ - int rs_nlocks; - - /** Size of the state */ - int rs_size; - /** opcode */ - __u32 rs_opc; - /** Transaction number */ - __u64 rs_transno; - /** xid */ - __u64 rs_xid; - struct obd_export *rs_export; - struct ptlrpc_service_part *rs_svcpt; - /** Lnet metadata handle for the reply */ - struct lnet_handle_md rs_md_h; - - /** Context for the service thread */ - struct ptlrpc_svc_ctx *rs_svc_ctx; - /** Reply buffer (actually sent to the client), encoded if needed */ - struct lustre_msg *rs_repbuf; /* wrapper */ - /** Size of the reply buffer */ - int rs_repbuf_len; /* wrapper buf length */ - /** Size of the reply message */ - int rs_repdata_len; /* wrapper msg length */ - /** - * Actual reply message. Its content is encrypted (if needed) to - * produce reply buffer for actual sending. In simple case - * of no network encryption we just set \a rs_repbuf to \a rs_msg - */ - struct lustre_msg *rs_msg; /* reply message */ - - /** Handles of locks awaiting client reply ACK */ - struct lustre_handle rs_locks[RS_MAX_LOCKS]; - /** Lock modes of locks in \a rs_locks */ - enum ldlm_mode rs_modes[RS_MAX_LOCKS]; -}; - -struct ptlrpc_thread; - -/** RPC stages */ -enum rq_phase { - RQ_PHASE_NEW = 0xebc0de00, - RQ_PHASE_RPC = 0xebc0de01, - RQ_PHASE_BULK = 0xebc0de02, - RQ_PHASE_INTERPRET = 0xebc0de03, - RQ_PHASE_COMPLETE = 0xebc0de04, - RQ_PHASE_UNREG_RPC = 0xebc0de05, - RQ_PHASE_UNREG_BULK = 0xebc0de06, - RQ_PHASE_UNDEFINED = 0xebc0de07 -}; - -/** Type of request interpreter call-back */ -typedef int (*ptlrpc_interpterer_t)(const struct lu_env *env, - struct ptlrpc_request *req, - void *arg, int rc); - -/** - * Definition of request pool structure. - * The pool is used to store empty preallocated requests for the case - * when we would actually need to send something without performing - * any allocations (to avoid e.g. OOM). - */ -struct ptlrpc_request_pool { - /** Locks the list */ - spinlock_t prp_lock; - /** list of ptlrpc_request structs */ - struct list_head prp_req_list; - /** Maximum message size that would fit into a request from this pool */ - int prp_rq_size; - /** Function to allocate more requests for this pool */ - int (*prp_populate)(struct ptlrpc_request_pool *, int); -}; - -struct lu_context; -struct lu_env; - -struct ldlm_lock; - -#include <lustre_nrs.h> - -/** - * Basic request prioritization operations structure. - * The whole idea is centered around locks and RPCs that might affect locks. - * When a lock is contended we try to give priority to RPCs that might lead - * to fastest release of that lock. - * Currently only implemented for OSTs only in a way that makes all - * IO and truncate RPCs that are coming from a locked region where a lock is - * contended a priority over other requests. - */ -struct ptlrpc_hpreq_ops { - /** - * Check if the lock handle of the given lock is the same as - * taken from the request. - */ - int (*hpreq_lock_match)(struct ptlrpc_request *, struct ldlm_lock *); - /** - * Check if the request is a high priority one. - */ - int (*hpreq_check)(struct ptlrpc_request *); - /** - * Called after the request has been handled. - */ - void (*hpreq_fini)(struct ptlrpc_request *); -}; - -struct ptlrpc_cli_req { - /** For bulk requests on client only: bulk descriptor */ - struct ptlrpc_bulk_desc *cr_bulk; - /** optional time limit for send attempts */ - long cr_delay_limit; - /** time request was first queued */ - time_t cr_queued_time; - /** request sent timeval */ - struct timespec64 cr_sent_tv; - /** time for request really sent out */ - time64_t cr_sent_out; - /** when req reply unlink must finish. */ - time64_t cr_reply_deadline; - /** when req bulk unlink must finish. */ - time64_t cr_bulk_deadline; - /** when req unlink must finish. */ - time64_t cr_req_deadline; - /** Portal to which this request would be sent */ - short cr_req_ptl; - /** Portal where to wait for reply and where reply would be sent */ - short cr_rep_ptl; - /** request resending number */ - unsigned int cr_resend_nr; - /** What was import generation when this request was sent */ - int cr_imp_gen; - enum lustre_imp_state cr_send_state; - /** Per-request waitq introduced by bug 21938 for recovery waiting */ - wait_queue_head_t cr_set_waitq; - /** Link item for request set lists */ - struct list_head cr_set_chain; - /** link to waited ctx */ - struct list_head cr_ctx_chain; - - /** client's half ctx */ - struct ptlrpc_cli_ctx *cr_cli_ctx; - /** Link back to the request set */ - struct ptlrpc_request_set *cr_set; - /** outgoing request MD handle */ - struct lnet_handle_md cr_req_md_h; - /** request-out callback parameter */ - struct ptlrpc_cb_id cr_req_cbid; - /** incoming reply MD handle */ - struct lnet_handle_md cr_reply_md_h; - wait_queue_head_t cr_reply_waitq; - /** reply callback parameter */ - struct ptlrpc_cb_id cr_reply_cbid; - /** Async completion handler, called when reply is received */ - ptlrpc_interpterer_t cr_reply_interp; - /** Async completion context */ - union ptlrpc_async_args cr_async_args; - /** Opaq data for replay and commit callbacks. */ - void *cr_cb_data; - /** Link to the imp->imp_unreplied_list */ - struct list_head cr_unreplied_list; - /** - * Commit callback, called when request is committed and about to be - * freed. - */ - void (*cr_commit_cb)(struct ptlrpc_request *); - /** Replay callback, called after request is replayed at recovery */ - void (*cr_replay_cb)(struct ptlrpc_request *); -}; - -/** client request member alias */ -/* NB: these alias should NOT be used by any new code, instead they should - * be removed step by step to avoid potential abuse - */ -#define rq_bulk rq_cli.cr_bulk -#define rq_delay_limit rq_cli.cr_delay_limit -#define rq_queued_time rq_cli.cr_queued_time -#define rq_sent_tv rq_cli.cr_sent_tv -#define rq_real_sent rq_cli.cr_sent_out -#define rq_reply_deadline rq_cli.cr_reply_deadline -#define rq_bulk_deadline rq_cli.cr_bulk_deadline -#define rq_req_deadline rq_cli.cr_req_deadline -#define rq_nr_resend rq_cli.cr_resend_nr -#define rq_request_portal rq_cli.cr_req_ptl -#define rq_reply_portal rq_cli.cr_rep_ptl -#define rq_import_generation rq_cli.cr_imp_gen -#define rq_send_state rq_cli.cr_send_state -#define rq_set_chain rq_cli.cr_set_chain -#define rq_ctx_chain rq_cli.cr_ctx_chain -#define rq_set rq_cli.cr_set -#define rq_set_waitq rq_cli.cr_set_waitq -#define rq_cli_ctx rq_cli.cr_cli_ctx -#define rq_req_md_h rq_cli.cr_req_md_h -#define rq_req_cbid rq_cli.cr_req_cbid -#define rq_reply_md_h rq_cli.cr_reply_md_h -#define rq_reply_waitq rq_cli.cr_reply_waitq -#define rq_reply_cbid rq_cli.cr_reply_cbid -#define rq_interpret_reply rq_cli.cr_reply_interp -#define rq_async_args rq_cli.cr_async_args -#define rq_cb_data rq_cli.cr_cb_data -#define rq_unreplied_list rq_cli.cr_unreplied_list -#define rq_commit_cb rq_cli.cr_commit_cb -#define rq_replay_cb rq_cli.cr_replay_cb - -struct ptlrpc_srv_req { - /** initial thread servicing this request */ - struct ptlrpc_thread *sr_svc_thread; - /** - * Server side list of incoming unserved requests sorted by arrival - * time. Traversed from time to time to notice about to expire - * requests and sent back "early replies" to clients to let them - * know server is alive and well, just very busy to service their - * requests in time - */ - struct list_head sr_timed_list; - /** server-side per-export list */ - struct list_head sr_exp_list; - /** server-side history, used for debuging purposes. */ - struct list_head sr_hist_list; - /** history sequence # */ - __u64 sr_hist_seq; - /** the index of service's srv_at_array into which request is linked */ - time64_t sr_at_index; - /** authed uid */ - uid_t sr_auth_uid; - /** authed uid mapped to */ - uid_t sr_auth_mapped_uid; - /** RPC is generated from what part of Lustre */ - enum lustre_sec_part sr_sp_from; - /** request session context */ - struct lu_context sr_ses; - /** \addtogroup nrs - * @{ - */ - /** stub for NRS request */ - struct ptlrpc_nrs_request sr_nrq; - /** @} nrs */ - /** request arrival time */ - struct timespec64 sr_arrival_time; - /** server's half ctx */ - struct ptlrpc_svc_ctx *sr_svc_ctx; - /** (server side), pointed directly into req buffer */ - struct ptlrpc_user_desc *sr_user_desc; - /** separated reply state */ - struct ptlrpc_reply_state *sr_reply_state; - /** server-side hp handlers */ - struct ptlrpc_hpreq_ops *sr_ops; - /** incoming request buffer */ - struct ptlrpc_request_buffer_desc *sr_rqbd; -}; - -/** server request member alias */ -/* NB: these alias should NOT be used by any new code, instead they should - * be removed step by step to avoid potential abuse - */ -#define rq_svc_thread rq_srv.sr_svc_thread -#define rq_timed_list rq_srv.sr_timed_list -#define rq_exp_list rq_srv.sr_exp_list -#define rq_history_list rq_srv.sr_hist_list -#define rq_history_seq rq_srv.sr_hist_seq -#define rq_at_index rq_srv.sr_at_index -#define rq_auth_uid rq_srv.sr_auth_uid -#define rq_auth_mapped_uid rq_srv.sr_auth_mapped_uid -#define rq_sp_from rq_srv.sr_sp_from -#define rq_session rq_srv.sr_ses -#define rq_nrq rq_srv.sr_nrq -#define rq_arrival_time rq_srv.sr_arrival_time -#define rq_reply_state rq_srv.sr_reply_state -#define rq_svc_ctx rq_srv.sr_svc_ctx -#define rq_user_desc rq_srv.sr_user_desc -#define rq_ops rq_srv.sr_ops -#define rq_rqbd rq_srv.sr_rqbd - -/** - * Represents remote procedure call. - * - * This is a staple structure used by everybody wanting to send a request - * in Lustre. - */ -struct ptlrpc_request { - /* Request type: one of PTL_RPC_MSG_* */ - int rq_type; - /** Result of request processing */ - int rq_status; - /** - * Linkage item through which this request is included into - * sending/delayed lists on client and into rqbd list on server - */ - struct list_head rq_list; - /** Lock to protect request flags and some other important bits, like - * rq_list - */ - spinlock_t rq_lock; - /** client-side flags are serialized by rq_lock @{ */ - unsigned int rq_intr:1, rq_replied:1, rq_err:1, - rq_timedout:1, rq_resend:1, rq_restart:1, - /** - * when ->rq_replay is set, request is kept by the client even - * after server commits corresponding transaction. This is - * used for operations that require sequence of multiple - * requests to be replayed. The only example currently is file - * open/close. When last request in such a sequence is - * committed, ->rq_replay is cleared on all requests in the - * sequence. - */ - rq_replay:1, - rq_no_resend:1, rq_waiting:1, rq_receiving_reply:1, - rq_no_delay:1, rq_net_err:1, rq_wait_ctx:1, - rq_early:1, - rq_req_unlinked:1, /* unlinked request buffer from lnet */ - rq_reply_unlinked:1, /* unlinked reply buffer from lnet */ - rq_memalloc:1, /* req originated from "kswapd" */ - rq_committed:1, - rq_reply_truncated:1, - /** whether the "rq_set" is a valid one */ - rq_invalid_rqset:1, - rq_generation_set:1, - /** do not resend request on -EINPROGRESS */ - rq_no_retry_einprogress:1, - /* allow the req to be sent if the import is in recovery - * status - */ - rq_allow_replay:1, - /* bulk request, sent to server, but uncommitted */ - rq_unstable:1; - /** @} */ - - /** server-side flags @{ */ - unsigned int - rq_hp:1, /**< high priority RPC */ - rq_at_linked:1, /**< link into service's srv_at_array */ - rq_packed_final:1; /**< packed final reply */ - /** @} */ - - /** one of RQ_PHASE_* */ - enum rq_phase rq_phase; - /** one of RQ_PHASE_* to be used next */ - enum rq_phase rq_next_phase; - /** - * client-side refcount for SENT race, server-side refcount - * for multiple replies - */ - atomic_t rq_refcount; - /** - * client-side: - * !rq_truncate : # reply bytes actually received, - * rq_truncate : required repbuf_len for resend - */ - int rq_nob_received; - /** Request length */ - int rq_reqlen; - /** Reply length */ - int rq_replen; - /** Pool if request is from preallocated list */ - struct ptlrpc_request_pool *rq_pool; - /** Request message - what client sent */ - struct lustre_msg *rq_reqmsg; - /** Reply message - server response */ - struct lustre_msg *rq_repmsg; - /** Transaction number */ - __u64 rq_transno; - /** xid */ - __u64 rq_xid; - /** bulk match bits */ - u64 rq_mbits; - /** - * List item to for replay list. Not yet committed requests get linked - * there. - * Also see \a rq_replay comment above. - * It's also link chain on obd_export::exp_req_replay_queue - */ - struct list_head rq_replay_list; - /** non-shared members for client & server request*/ - union { - struct ptlrpc_cli_req rq_cli; - struct ptlrpc_srv_req rq_srv; - }; - /** - * security and encryption data - * @{ - */ - /** description of flavors for client & server */ - struct sptlrpc_flavor rq_flvr; - - /* client/server security flags */ - unsigned int - rq_ctx_init:1, /* context initiation */ - rq_ctx_fini:1, /* context destroy */ - rq_bulk_read:1, /* request bulk read */ - rq_bulk_write:1, /* request bulk write */ - /* server authentication flags */ - rq_auth_gss:1, /* authenticated by gss */ - rq_auth_usr_root:1, /* authed as root */ - rq_auth_usr_mdt:1, /* authed as mdt */ - rq_auth_usr_ost:1, /* authed as ost */ - /* security tfm flags */ - rq_pack_udesc:1, - rq_pack_bulk:1, - /* doesn't expect reply FIXME */ - rq_no_reply:1, - rq_pill_init:1, /* pill initialized */ - rq_srv_req:1; /* server request */ - - /** various buffer pointers */ - struct lustre_msg *rq_reqbuf; /**< req wrapper */ - char *rq_repbuf; /**< rep buffer */ - struct lustre_msg *rq_repdata; /**< rep wrapper msg */ - /** only in priv mode */ - struct lustre_msg *rq_clrbuf; - int rq_reqbuf_len; /* req wrapper buf len */ - int rq_reqdata_len; /* req wrapper msg len */ - int rq_repbuf_len; /* rep buffer len */ - int rq_repdata_len; /* rep wrapper msg len */ - int rq_clrbuf_len; /* only in priv mode */ - int rq_clrdata_len; /* only in priv mode */ - - /** early replies go to offset 0, regular replies go after that */ - unsigned int rq_reply_off; - - /** @} */ - - /** Fields that help to see if request and reply were swabbed or not */ - __u32 rq_req_swab_mask; - __u32 rq_rep_swab_mask; - - /** how many early replies (for stats) */ - int rq_early_count; - - /** Server-side, export on which request was received */ - struct obd_export *rq_export; - /** import where request is being sent */ - struct obd_import *rq_import; - /** our LNet NID */ - lnet_nid_t rq_self; - /** Peer description (the other side) */ - struct lnet_process_id rq_peer; - /** - * service time estimate (secs) - * If the request is not served by this time, it is marked as timed out. - */ - int rq_timeout; - /** - * when request/reply sent (secs), or time when request should be sent - */ - time64_t rq_sent; - /** when request must finish. */ - time64_t rq_deadline; - /** request format description */ - struct req_capsule rq_pill; -}; - -/** - * Call completion handler for rpc if any, return it's status or original - * rc if there was no handler defined for this request. - */ -static inline int ptlrpc_req_interpret(const struct lu_env *env, - struct ptlrpc_request *req, int rc) -{ - if (req->rq_interpret_reply) { - req->rq_status = req->rq_interpret_reply(env, req, - &req->rq_async_args, - rc); - return req->rq_status; - } - return rc; -} - -/* - * Can the request be moved from the regular NRS head to the high-priority NRS - * head (of the same PTLRPC service partition), if any? - * - * For a reliable result, this should be checked under svcpt->scp_req lock. - */ -static inline bool ptlrpc_nrs_req_can_move(struct ptlrpc_request *req) -{ - struct ptlrpc_nrs_request *nrq = &req->rq_nrq; - - /** - * LU-898: Check ptlrpc_nrs_request::nr_enqueued to make sure the - * request has been enqueued first, and ptlrpc_nrs_request::nr_started - * to make sure it has not been scheduled yet (analogous to previous - * (non-NRS) checking of !list_empty(&ptlrpc_request::rq_list). - */ - return nrq->nr_enqueued && !nrq->nr_started && !req->rq_hp; -} - -/** @} nrs */ - -/** - * Returns 1 if request buffer at offset \a index was already swabbed - */ -static inline int lustre_req_swabbed(struct ptlrpc_request *req, size_t index) -{ - LASSERT(index < sizeof(req->rq_req_swab_mask) * 8); - return req->rq_req_swab_mask & (1 << index); -} - -/** - * Returns 1 if request reply buffer at offset \a index was already swabbed - */ -static inline int lustre_rep_swabbed(struct ptlrpc_request *req, size_t index) -{ - LASSERT(index < sizeof(req->rq_rep_swab_mask) * 8); - return req->rq_rep_swab_mask & (1 << index); -} - -/** - * Returns 1 if request needs to be swabbed into local cpu byteorder - */ -static inline int ptlrpc_req_need_swab(struct ptlrpc_request *req) -{ - return lustre_req_swabbed(req, MSG_PTLRPC_HEADER_OFF); -} - -/** - * Returns 1 if request reply needs to be swabbed into local cpu byteorder - */ -static inline int ptlrpc_rep_need_swab(struct ptlrpc_request *req) -{ - return lustre_rep_swabbed(req, MSG_PTLRPC_HEADER_OFF); -} - -/** - * Mark request buffer at offset \a index that it was already swabbed - */ -static inline void lustre_set_req_swabbed(struct ptlrpc_request *req, - size_t index) -{ - LASSERT(index < sizeof(req->rq_req_swab_mask) * 8); - LASSERT((req->rq_req_swab_mask & (1 << index)) == 0); - req->rq_req_swab_mask |= 1 << index; -} - -/** - * Mark request reply buffer at offset \a index that it was already swabbed - */ -static inline void lustre_set_rep_swabbed(struct ptlrpc_request *req, - size_t index) -{ - LASSERT(index < sizeof(req->rq_rep_swab_mask) * 8); - LASSERT((req->rq_rep_swab_mask & (1 << index)) == 0); - req->rq_rep_swab_mask |= 1 << index; -} - -/** - * Convert numerical request phase value \a phase into text string description - */ -static inline const char * -ptlrpc_phase2str(enum rq_phase phase) -{ - switch (phase) { - case RQ_PHASE_NEW: - return "New"; - case RQ_PHASE_RPC: - return "Rpc"; - case RQ_PHASE_BULK: - return "Bulk"; - case RQ_PHASE_INTERPRET: - return "Interpret"; - case RQ_PHASE_COMPLETE: - return "Complete"; - case RQ_PHASE_UNREG_RPC: - return "UnregRPC"; - case RQ_PHASE_UNREG_BULK: - return "UnregBULK"; - default: - return "?Phase?"; - } -} - -/** - * Convert numerical request phase of the request \a req into text stringi - * description - */ -static inline const char * -ptlrpc_rqphase2str(struct ptlrpc_request *req) -{ - return ptlrpc_phase2str(req->rq_phase); -} - -/** - * Debugging functions and helpers to print request structure into debug log - * @{ - */ -/* Spare the preprocessor, spoil the bugs. */ -#define FLAG(field, str) (field ? str : "") - -/** Convert bit flags into a string */ -#define DEBUG_REQ_FLAGS(req) \ - ptlrpc_rqphase2str(req), \ - FLAG(req->rq_intr, "I"), FLAG(req->rq_replied, "R"), \ - FLAG(req->rq_err, "E"), FLAG(req->rq_net_err, "e"), \ - FLAG(req->rq_timedout, "X") /* eXpired */, FLAG(req->rq_resend, "S"), \ - FLAG(req->rq_restart, "T"), FLAG(req->rq_replay, "P"), \ - FLAG(req->rq_no_resend, "N"), \ - FLAG(req->rq_waiting, "W"), \ - FLAG(req->rq_wait_ctx, "C"), FLAG(req->rq_hp, "H"), \ - FLAG(req->rq_committed, "M") - -#define REQ_FLAGS_FMT "%s:%s%s%s%s%s%s%s%s%s%s%s%s%s" - -void _debug_req(struct ptlrpc_request *req, - struct libcfs_debug_msg_data *data, const char *fmt, ...) - __printf(3, 4); - -/** - * Helper that decides if we need to print request according to current debug - * level settings - */ -#define debug_req(msgdata, mask, cdls, req, fmt, a...) \ -do { \ - CFS_CHECK_STACK(msgdata, mask, cdls); \ - \ - if (((mask) & D_CANTMASK) != 0 || \ - ((libcfs_debug & (mask)) != 0 && \ - (libcfs_subsystem_debug & DEBUG_SUBSYSTEM) != 0)) \ - _debug_req((req), msgdata, fmt, ##a); \ -} while (0) - -/** - * This is the debug print function you need to use to print request structure - * content into lustre debug log. - * for most callers (level is a constant) this is resolved at compile time - */ -#define DEBUG_REQ(level, req, fmt, args...) \ -do { \ - if ((level) & (D_ERROR | D_WARNING)) { \ - static struct cfs_debug_limit_state cdls; \ - LIBCFS_DEBUG_MSG_DATA_DECL(msgdata, level, &cdls); \ - debug_req(&msgdata, level, &cdls, req, "@@@ "fmt" ", ## args);\ - } else { \ - LIBCFS_DEBUG_MSG_DATA_DECL(msgdata, level, NULL); \ - debug_req(&msgdata, level, NULL, req, "@@@ "fmt" ", ## args); \ - } \ -} while (0) -/** @} */ - -/** - * Structure that defines a single page of a bulk transfer - */ -struct ptlrpc_bulk_page { - /** Linkage to list of pages in a bulk */ - struct list_head bp_link; - /** - * Number of bytes in a page to transfer starting from \a bp_pageoffset - */ - int bp_buflen; - /** offset within a page */ - int bp_pageoffset; - /** The page itself */ - struct page *bp_page; -}; - -enum ptlrpc_bulk_op_type { - PTLRPC_BULK_OP_ACTIVE = 0x00000001, - PTLRPC_BULK_OP_PASSIVE = 0x00000002, - PTLRPC_BULK_OP_PUT = 0x00000004, - PTLRPC_BULK_OP_GET = 0x00000008, - PTLRPC_BULK_BUF_KVEC = 0x00000010, - PTLRPC_BULK_BUF_KIOV = 0x00000020, - PTLRPC_BULK_GET_SOURCE = PTLRPC_BULK_OP_PASSIVE | PTLRPC_BULK_OP_GET, - PTLRPC_BULK_PUT_SINK = PTLRPC_BULK_OP_PASSIVE | PTLRPC_BULK_OP_PUT, - PTLRPC_BULK_GET_SINK = PTLRPC_BULK_OP_ACTIVE | PTLRPC_BULK_OP_GET, - PTLRPC_BULK_PUT_SOURCE = PTLRPC_BULK_OP_ACTIVE | PTLRPC_BULK_OP_PUT, -}; - -static inline bool ptlrpc_is_bulk_op_get(enum ptlrpc_bulk_op_type type) -{ - return (type & PTLRPC_BULK_OP_GET) == PTLRPC_BULK_OP_GET; -} - -static inline bool ptlrpc_is_bulk_get_source(enum ptlrpc_bulk_op_type type) -{ - return (type & PTLRPC_BULK_GET_SOURCE) == PTLRPC_BULK_GET_SOURCE; -} - -static inline bool ptlrpc_is_bulk_put_sink(enum ptlrpc_bulk_op_type type) -{ - return (type & PTLRPC_BULK_PUT_SINK) == PTLRPC_BULK_PUT_SINK; -} - -static inline bool ptlrpc_is_bulk_get_sink(enum ptlrpc_bulk_op_type type) -{ - return (type & PTLRPC_BULK_GET_SINK) == PTLRPC_BULK_GET_SINK; -} - -static inline bool ptlrpc_is_bulk_put_source(enum ptlrpc_bulk_op_type type) -{ - return (type & PTLRPC_BULK_PUT_SOURCE) == PTLRPC_BULK_PUT_SOURCE; -} - -static inline bool ptlrpc_is_bulk_desc_kvec(enum ptlrpc_bulk_op_type type) -{ - return ((type & PTLRPC_BULK_BUF_KVEC) | (type & PTLRPC_BULK_BUF_KIOV)) - == PTLRPC_BULK_BUF_KVEC; -} - -static inline bool ptlrpc_is_bulk_desc_kiov(enum ptlrpc_bulk_op_type type) -{ - return ((type & PTLRPC_BULK_BUF_KVEC) | (type & PTLRPC_BULK_BUF_KIOV)) - == PTLRPC_BULK_BUF_KIOV; -} - -static inline bool ptlrpc_is_bulk_op_active(enum ptlrpc_bulk_op_type type) -{ - return ((type & PTLRPC_BULK_OP_ACTIVE) | - (type & PTLRPC_BULK_OP_PASSIVE)) == PTLRPC_BULK_OP_ACTIVE; -} - -static inline bool ptlrpc_is_bulk_op_passive(enum ptlrpc_bulk_op_type type) -{ - return ((type & PTLRPC_BULK_OP_ACTIVE) | - (type & PTLRPC_BULK_OP_PASSIVE)) == PTLRPC_BULK_OP_PASSIVE; -} - -struct ptlrpc_bulk_frag_ops { - /** - * Add a page \a page to the bulk descriptor \a desc - * Data to transfer in the page starts at offset \a pageoffset and - * amount of data to transfer from the page is \a len - */ - void (*add_kiov_frag)(struct ptlrpc_bulk_desc *desc, - struct page *page, int pageoffset, int len); - - /* - * Add a \a fragment to the bulk descriptor \a desc. - * Data to transfer in the fragment is pointed to by \a frag - * The size of the fragment is \a len - */ - int (*add_iov_frag)(struct ptlrpc_bulk_desc *desc, void *frag, int len); - - /** - * Uninitialize and free bulk descriptor \a desc. - * Works on bulk descriptors both from server and client side. - */ - void (*release_frags)(struct ptlrpc_bulk_desc *desc); -}; - -extern const struct ptlrpc_bulk_frag_ops ptlrpc_bulk_kiov_pin_ops; -extern const struct ptlrpc_bulk_frag_ops ptlrpc_bulk_kiov_nopin_ops; - -/** - * Definition of bulk descriptor. - * Bulks are special "Two phase" RPCs where initial request message - * is sent first and it is followed bt a transfer (o receiving) of a large - * amount of data to be settled into pages referenced from the bulk descriptors. - * Bulks transfers (the actual data following the small requests) are done - * on separate LNet portals. - * In lustre we use bulk transfers for READ and WRITE transfers from/to OSTs. - * Another user is readpage for MDT. - */ -struct ptlrpc_bulk_desc { - /** completed with failure */ - unsigned long bd_failure:1; - /** client side */ - unsigned long bd_registered:1; - /** For serialization with callback */ - spinlock_t bd_lock; - /** Import generation when request for this bulk was sent */ - int bd_import_generation; - /** {put,get}{source,sink}{kvec,kiov} */ - enum ptlrpc_bulk_op_type bd_type; - /** LNet portal for this bulk */ - __u32 bd_portal; - /** Server side - export this bulk created for */ - struct obd_export *bd_export; - /** Client side - import this bulk was sent on */ - struct obd_import *bd_import; - /** Back pointer to the request */ - struct ptlrpc_request *bd_req; - struct ptlrpc_bulk_frag_ops *bd_frag_ops; - wait_queue_head_t bd_waitq; /* server side only WQ */ - int bd_iov_count; /* # entries in bd_iov */ - int bd_max_iov; /* allocated size of bd_iov */ - int bd_nob; /* # bytes covered */ - int bd_nob_transferred; /* # bytes GOT/PUT */ - - u64 bd_last_mbits; - - struct ptlrpc_cb_id bd_cbid; /* network callback info */ - lnet_nid_t bd_sender; /* stash event::sender */ - int bd_md_count; /* # valid entries in bd_mds */ - int bd_md_max_brw; /* max entries in bd_mds */ - /** array of associated MDs */ - struct lnet_handle_md bd_mds[PTLRPC_BULK_OPS_COUNT]; - - union { - struct { - /* - * encrypt iov, size is either 0 or bd_iov_count. - */ - struct bio_vec *bd_enc_vec; - struct bio_vec *bd_vec; /* Array of bio_vecs */ - } bd_kiov; - - struct { - struct kvec *bd_enc_kvec; - struct kvec *bd_kvec; /* Array of kvecs */ - } bd_kvec; - } bd_u; -}; - -#define GET_KIOV(desc) ((desc)->bd_u.bd_kiov.bd_vec) -#define BD_GET_KIOV(desc, i) ((desc)->bd_u.bd_kiov.bd_vec[i]) -#define GET_ENC_KIOV(desc) ((desc)->bd_u.bd_kiov.bd_enc_vec) -#define BD_GET_ENC_KIOV(desc, i) ((desc)->bd_u.bd_kiov.bd_enc_vec[i]) -#define GET_KVEC(desc) ((desc)->bd_u.bd_kvec.bd_kvec) -#define BD_GET_KVEC(desc, i) ((desc)->bd_u.bd_kvec.bd_kvec[i]) -#define GET_ENC_KVEC(desc) ((desc)->bd_u.bd_kvec.bd_enc_kvec) -#define BD_GET_ENC_KVEC(desc, i) ((desc)->bd_u.bd_kvec.bd_enc_kvec[i]) - -enum { - SVC_STOPPED = 1 << 0, - SVC_STOPPING = 1 << 1, - SVC_STARTING = 1 << 2, - SVC_RUNNING = 1 << 3, -}; - -#define PTLRPC_THR_NAME_LEN 32 -/** - * Definition of server service thread structure - */ -struct ptlrpc_thread { - /** - * List of active threads in svc->srv_threads - */ - struct list_head t_link; - /** - * thread-private data (preallocated memory) - */ - void *t_data; - __u32 t_flags; - /** - * service thread index, from ptlrpc_start_threads - */ - unsigned int t_id; - /** - * service thread pid - */ - pid_t t_pid; - /** - * put watchdog in the structure per thread b=14840 - * - * Lustre watchdog is removed for client in the hope - * of a generic watchdog can be merged in kernel. - * When that happens, we should add below back. - * - * struct lc_watchdog *t_watchdog; - */ - /** - * the svc this thread belonged to b=18582 - */ - struct ptlrpc_service_part *t_svcpt; - wait_queue_head_t t_ctl_waitq; - struct lu_env *t_env; - char t_name[PTLRPC_THR_NAME_LEN]; -}; - -static inline int thread_is_stopped(struct ptlrpc_thread *thread) -{ - return !!(thread->t_flags & SVC_STOPPED); -} - -static inline int thread_is_stopping(struct ptlrpc_thread *thread) -{ - return !!(thread->t_flags & SVC_STOPPING); -} - -static inline int thread_is_starting(struct ptlrpc_thread *thread) -{ - return !!(thread->t_flags & SVC_STARTING); -} - -static inline int thread_is_running(struct ptlrpc_thread *thread) -{ - return !!(thread->t_flags & SVC_RUNNING); -} - -static inline void thread_clear_flags(struct ptlrpc_thread *thread, __u32 flags) -{ - thread->t_flags &= ~flags; -} - -static inline void thread_set_flags(struct ptlrpc_thread *thread, __u32 flags) -{ - thread->t_flags = flags; -} - -static inline void thread_add_flags(struct ptlrpc_thread *thread, __u32 flags) -{ - thread->t_flags |= flags; -} - -static inline int thread_test_and_clear_flags(struct ptlrpc_thread *thread, - __u32 flags) -{ - if (thread->t_flags & flags) { - thread->t_flags &= ~flags; - return 1; - } - return 0; -} - -/** - * Request buffer descriptor structure. - * This is a structure that contains one posted request buffer for service. - * Once data land into a buffer, event callback creates actual request and - * notifies wakes one of the service threads to process new incoming request. - * More than one request can fit into the buffer. - */ -struct ptlrpc_request_buffer_desc { - /** Link item for rqbds on a service */ - struct list_head rqbd_list; - /** History of requests for this buffer */ - struct list_head rqbd_reqs; - /** Back pointer to service for which this buffer is registered */ - struct ptlrpc_service_part *rqbd_svcpt; - /** LNet descriptor */ - struct lnet_handle_md rqbd_md_h; - int rqbd_refcount; - /** The buffer itself */ - char *rqbd_buffer; - struct ptlrpc_cb_id rqbd_cbid; - /** - * This "embedded" request structure is only used for the - * last request to fit into the buffer - */ - struct ptlrpc_request rqbd_req; -}; - -typedef int (*svc_handler_t)(struct ptlrpc_request *req); - -struct ptlrpc_service_ops { - /** - * if non-NULL called during thread creation (ptlrpc_start_thread()) - * to initialize service specific per-thread state. - */ - int (*so_thr_init)(struct ptlrpc_thread *thr); - /** - * if non-NULL called during thread shutdown (ptlrpc_main()) to - * destruct state created by ->srv_init(). - */ - void (*so_thr_done)(struct ptlrpc_thread *thr); - /** - * Handler function for incoming requests for this service - */ - int (*so_req_handler)(struct ptlrpc_request *req); - /** - * function to determine priority of the request, it's called - * on every new request - */ - int (*so_hpreq_handler)(struct ptlrpc_request *); - /** - * service-specific print fn - */ - void (*so_req_printer)(void *, struct ptlrpc_request *); -}; - -#ifndef __cfs_cacheline_aligned -/* NB: put it here for reducing patche dependence */ -# define __cfs_cacheline_aligned -#endif - -/** - * How many high priority requests to serve before serving one normal - * priority request - */ -#define PTLRPC_SVC_HP_RATIO 10 - -/** - * Definition of PortalRPC service. - * The service is listening on a particular portal (like tcp port) - * and perform actions for a specific server like IO service for OST - * or general metadata service for MDS. - */ -struct ptlrpc_service { - /** serialize sysfs operations */ - spinlock_t srv_lock; - /** most often accessed fields */ - /** chain thru all services */ - struct list_head srv_list; - /** service operations table */ - struct ptlrpc_service_ops srv_ops; - /** only statically allocated strings here; we don't clean them */ - char *srv_name; - /** only statically allocated strings here; we don't clean them */ - char *srv_thread_name; - /** service thread list */ - struct list_head srv_threads; - /** threads # should be created for each partition on initializing */ - int srv_nthrs_cpt_init; - /** limit of threads number for each partition */ - int srv_nthrs_cpt_limit; - /** Root of debugfs dir tree for this service */ - struct dentry *srv_debugfs_entry; - /** Pointer to statistic data for this service */ - struct lprocfs_stats *srv_stats; - /** # hp per lp reqs to handle */ - int srv_hpreq_ratio; - /** biggest request to receive */ - int srv_max_req_size; - /** biggest reply to send */ - int srv_max_reply_size; - /** size of individual buffers */ - int srv_buf_size; - /** # buffers to allocate in 1 group */ - int srv_nbuf_per_group; - /** Local portal on which to receive requests */ - __u32 srv_req_portal; - /** Portal on the client to send replies to */ - __u32 srv_rep_portal; - /** - * Tags for lu_context associated with this thread, see struct - * lu_context. - */ - __u32 srv_ctx_tags; - /** soft watchdog timeout multiplier */ - int srv_watchdog_factor; - /** under unregister_service */ - unsigned srv_is_stopping:1; - - /** max # request buffers in history per partition */ - int srv_hist_nrqbds_cpt_max; - /** number of CPTs this service bound on */ - int srv_ncpts; - /** CPTs array this service bound on */ - __u32 *srv_cpts; - /** 2^srv_cptab_bits >= cfs_cpt_numbert(srv_cptable) */ - int srv_cpt_bits; - /** CPT table this service is running over */ - struct cfs_cpt_table *srv_cptable; - - /* sysfs object */ - struct kobject srv_kobj; - struct completion srv_kobj_unregister; - /** - * partition data for ptlrpc service - */ - struct ptlrpc_service_part *srv_parts[0]; -}; - -/** - * Definition of PortalRPC service partition data. - * Although a service only has one instance of it right now, but we - * will have multiple instances very soon (instance per CPT). - * - * it has four locks: - * \a scp_lock - * serialize operations on rqbd and requests waiting for preprocess - * \a scp_req_lock - * serialize operations active requests sent to this portal - * \a scp_at_lock - * serialize adaptive timeout stuff - * \a scp_rep_lock - * serialize operations on RS list (reply states) - * - * We don't have any use-case to take two or more locks at the same time - * for now, so there is no lock order issue. - */ -struct ptlrpc_service_part { - /** back reference to owner */ - struct ptlrpc_service *scp_service __cfs_cacheline_aligned; - /* CPT id, reserved */ - int scp_cpt; - /** always increasing number */ - int scp_thr_nextid; - /** # of starting threads */ - int scp_nthrs_starting; - /** # of stopping threads, reserved for shrinking threads */ - int scp_nthrs_stopping; - /** # running threads */ - int scp_nthrs_running; - /** service threads list */ - struct list_head scp_threads; - - /** - * serialize the following fields, used for protecting - * rqbd list and incoming requests waiting for preprocess, - * threads starting & stopping are also protected by this lock. - */ - spinlock_t scp_lock __cfs_cacheline_aligned; - /** total # req buffer descs allocated */ - int scp_nrqbds_total; - /** # posted request buffers for receiving */ - int scp_nrqbds_posted; - /** in progress of allocating rqbd */ - int scp_rqbd_allocating; - /** # incoming reqs */ - int scp_nreqs_incoming; - /** request buffers to be reposted */ - struct list_head scp_rqbd_idle; - /** req buffers receiving */ - struct list_head scp_rqbd_posted; - /** incoming reqs */ - struct list_head scp_req_incoming; - /** timeout before re-posting reqs, in tick */ - long scp_rqbd_timeout; - /** - * all threads sleep on this. This wait-queue is signalled when new - * incoming request arrives and when difficult reply has to be handled. - */ - wait_queue_head_t scp_waitq; - - /** request history */ - struct list_head scp_hist_reqs; - /** request buffer history */ - struct list_head scp_hist_rqbds; - /** # request buffers in history */ - int scp_hist_nrqbds; - /** sequence number for request */ - __u64 scp_hist_seq; - /** highest seq culled from history */ - __u64 scp_hist_seq_culled; - - /** - * serialize the following fields, used for processing requests - * sent to this portal - */ - spinlock_t scp_req_lock __cfs_cacheline_aligned; - /** # reqs in either of the NRS heads below */ - /** # reqs being served */ - int scp_nreqs_active; - /** # HPreqs being served */ - int scp_nhreqs_active; - /** # hp requests handled */ - int scp_hreq_count; - - /** NRS head for regular requests */ - struct ptlrpc_nrs scp_nrs_reg; - /** NRS head for HP requests; this is only valid for services that can - * handle HP requests - */ - struct ptlrpc_nrs *scp_nrs_hp; - - /** AT stuff */ - /** @{ */ - /** - * serialize the following fields, used for changes on - * adaptive timeout - */ - spinlock_t scp_at_lock __cfs_cacheline_aligned; - /** estimated rpc service time */ - struct adaptive_timeout scp_at_estimate; - /** reqs waiting for replies */ - struct ptlrpc_at_array scp_at_array; - /** early reply timer */ - struct timer_list scp_at_timer; - /** debug */ - unsigned long scp_at_checktime; - /** check early replies */ - unsigned scp_at_check; - /** @} */ - - /** - * serialize the following fields, used for processing - * replies for this portal - */ - spinlock_t scp_rep_lock __cfs_cacheline_aligned; - /** all the active replies */ - struct list_head scp_rep_active; - /** List of free reply_states */ - struct list_head scp_rep_idle; - /** waitq to run, when adding stuff to srv_free_rs_list */ - wait_queue_head_t scp_rep_waitq; - /** # 'difficult' replies */ - atomic_t scp_nreps_difficult; -}; - -#define ptlrpc_service_for_each_part(part, i, svc) \ - for (i = 0; \ - i < (svc)->srv_ncpts && \ - (svc)->srv_parts && \ - ((part) = (svc)->srv_parts[i]); i++) - -/** - * Declaration of ptlrpcd control structure - */ -struct ptlrpcd_ctl { - /** - * Ptlrpc thread control flags (LIOD_START, LIOD_STOP, LIOD_FORCE) - */ - unsigned long pc_flags; - /** - * Thread lock protecting structure fields. - */ - spinlock_t pc_lock; - /** - * Start completion. - */ - struct completion pc_starting; - /** - * Stop completion. - */ - struct completion pc_finishing; - /** - * Thread requests set. - */ - struct ptlrpc_request_set *pc_set; - /** - * Thread name used in kthread_run() - */ - char pc_name[16]; - /** - * CPT the thread is bound on. - */ - int pc_cpt; - /** - * Index of ptlrpcd thread in the array. - */ - int pc_index; - /** - * Pointer to the array of partners' ptlrpcd_ctl structure. - */ - struct ptlrpcd_ctl **pc_partners; - /** - * Number of the ptlrpcd's partners. - */ - int pc_npartners; - /** - * Record the partner index to be processed next. - */ - int pc_cursor; - /** - * Error code if the thread failed to fully start. - */ - int pc_error; -}; - -/* Bits for pc_flags */ -enum ptlrpcd_ctl_flags { - /** - * Ptlrpc thread start flag. - */ - LIOD_START = 1 << 0, - /** - * Ptlrpc thread stop flag. - */ - LIOD_STOP = 1 << 1, - /** - * Ptlrpc thread force flag (only stop force so far). - * This will cause aborting any inflight rpcs handled - * by thread if LIOD_STOP is specified. - */ - LIOD_FORCE = 1 << 2, - /** - * This is a recovery ptlrpc thread. - */ - LIOD_RECOVERY = 1 << 3, -}; - -/** - * \addtogroup nrs - * @{ - * - * Service compatibility function; the policy is compatible with all services. - * - * \param[in] svc The service the policy is attempting to register with. - * \param[in] desc The policy descriptor - * - * \retval true The policy is compatible with the service - * - * \see ptlrpc_nrs_pol_desc::pd_compat() - */ -static inline bool nrs_policy_compat_all(const struct ptlrpc_service *svc, - const struct ptlrpc_nrs_pol_desc *desc) -{ - return true; -} - -/** - * Service compatibility function; the policy is compatible with only a specific - * service which is identified by its human-readable name at - * ptlrpc_service::srv_name. - * - * \param[in] svc The service the policy is attempting to register with. - * \param[in] desc The policy descriptor - * - * \retval false The policy is not compatible with the service - * \retval true The policy is compatible with the service - * - * \see ptlrpc_nrs_pol_desc::pd_compat() - */ -static inline bool nrs_policy_compat_one(const struct ptlrpc_service *svc, - const struct ptlrpc_nrs_pol_desc *desc) -{ - return strcmp(svc->srv_name, desc->pd_compat_svc_name) == 0; -} - -/** @} nrs */ - -/* ptlrpc/events.c */ -extern struct lnet_handle_eq ptlrpc_eq_h; -int ptlrpc_uuid_to_peer(struct obd_uuid *uuid, - struct lnet_process_id *peer, lnet_nid_t *self); -/** - * These callbacks are invoked by LNet when something happened to - * underlying buffer - * @{ - */ -void request_out_callback(struct lnet_event *ev); -void reply_in_callback(struct lnet_event *ev); -void client_bulk_callback(struct lnet_event *ev); -void request_in_callback(struct lnet_event *ev); -void reply_out_callback(struct lnet_event *ev); -/** @} */ - -/* ptlrpc/connection.c */ -struct ptlrpc_connection *ptlrpc_connection_get(struct lnet_process_id peer, - lnet_nid_t self, - struct obd_uuid *uuid); -int ptlrpc_connection_put(struct ptlrpc_connection *c); -struct ptlrpc_connection *ptlrpc_connection_addref(struct ptlrpc_connection *); -int ptlrpc_connection_init(void); -void ptlrpc_connection_fini(void); - -/* ptlrpc/niobuf.c */ -/** - * Actual interfacing with LNet to put/get/register/unregister stuff - * @{ - */ - -int ptlrpc_unregister_bulk(struct ptlrpc_request *req, int async); - -static inline int ptlrpc_client_bulk_active(struct ptlrpc_request *req) -{ - struct ptlrpc_bulk_desc *desc; - int rc; - - desc = req->rq_bulk; - - if (req->rq_bulk_deadline > ktime_get_real_seconds()) - return 1; - - if (!desc) - return 0; - - spin_lock(&desc->bd_lock); - rc = desc->bd_md_count; - spin_unlock(&desc->bd_lock); - return rc; -} - -#define PTLRPC_REPLY_MAYBE_DIFFICULT 0x01 -#define PTLRPC_REPLY_EARLY 0x02 -int ptlrpc_send_reply(struct ptlrpc_request *req, int flags); -int ptlrpc_reply(struct ptlrpc_request *req); -int ptlrpc_send_error(struct ptlrpc_request *req, int difficult); -int ptlrpc_error(struct ptlrpc_request *req); -int ptlrpc_at_get_net_latency(struct ptlrpc_request *req); -int ptl_send_rpc(struct ptlrpc_request *request, int noreply); -int ptlrpc_register_rqbd(struct ptlrpc_request_buffer_desc *rqbd); -/** @} */ - -/* ptlrpc/client.c */ -/** - * Client-side portals API. Everything to send requests, receive replies, - * request queues, request management, etc. - * @{ - */ -void ptlrpc_request_committed(struct ptlrpc_request *req, int force); - -int ptlrpc_inc_ref(void); -void ptlrpc_dec_ref(void); - -void ptlrpc_init_client(int req_portal, int rep_portal, char *name, - struct ptlrpc_client *); -struct ptlrpc_connection *ptlrpc_uuid_to_connection(struct obd_uuid *uuid); - -int ptlrpc_queue_wait(struct ptlrpc_request *req); -int ptlrpc_replay_req(struct ptlrpc_request *req); -void ptlrpc_abort_inflight(struct obd_import *imp); -void ptlrpc_abort_set(struct ptlrpc_request_set *set); - -struct ptlrpc_request_set *ptlrpc_prep_set(void); -struct ptlrpc_request_set *ptlrpc_prep_fcset(int max, set_producer_func func, - void *arg); -int ptlrpc_check_set(const struct lu_env *env, struct ptlrpc_request_set *set); -int ptlrpc_set_wait(struct ptlrpc_request_set *); -void ptlrpc_mark_interrupted(struct ptlrpc_request *req); -void ptlrpc_set_destroy(struct ptlrpc_request_set *); -void ptlrpc_set_add_req(struct ptlrpc_request_set *, struct ptlrpc_request *); - -void ptlrpc_free_rq_pool(struct ptlrpc_request_pool *pool); -int ptlrpc_add_rqs_to_pool(struct ptlrpc_request_pool *pool, int num_rq); - -struct ptlrpc_request_pool * -ptlrpc_init_rq_pool(int, int, - int (*populate_pool)(struct ptlrpc_request_pool *, int)); - -void ptlrpc_at_set_req_timeout(struct ptlrpc_request *req); -struct ptlrpc_request *ptlrpc_request_alloc(struct obd_import *imp, - const struct req_format *format); -struct ptlrpc_request *ptlrpc_request_alloc_pool(struct obd_import *imp, - struct ptlrpc_request_pool *, - const struct req_format *); -void ptlrpc_request_free(struct ptlrpc_request *request); -int ptlrpc_request_pack(struct ptlrpc_request *request, - __u32 version, int opcode); -struct ptlrpc_request *ptlrpc_request_alloc_pack(struct obd_import *, - const struct req_format *, - __u32, int); -int ptlrpc_request_bufs_pack(struct ptlrpc_request *request, - __u32 version, int opcode, char **bufs, - struct ptlrpc_cli_ctx *ctx); -void ptlrpc_req_finished(struct ptlrpc_request *request); -struct ptlrpc_request *ptlrpc_request_addref(struct ptlrpc_request *req); -struct ptlrpc_bulk_desc *ptlrpc_prep_bulk_imp(struct ptlrpc_request *req, - unsigned int nfrags, - unsigned int max_brw, - unsigned int type, - unsigned int portal, - const struct ptlrpc_bulk_frag_ops *ops); - -int ptlrpc_prep_bulk_frag(struct ptlrpc_bulk_desc *desc, - void *frag, int len); -void __ptlrpc_prep_bulk_page(struct ptlrpc_bulk_desc *desc, - struct page *page, int pageoffset, int len, - int pin); -static inline void ptlrpc_prep_bulk_page_pin(struct ptlrpc_bulk_desc *desc, - struct page *page, int pageoffset, - int len) -{ - __ptlrpc_prep_bulk_page(desc, page, pageoffset, len, 1); -} - -static inline void ptlrpc_prep_bulk_page_nopin(struct ptlrpc_bulk_desc *desc, - struct page *page, int pageoffset, - int len) -{ - __ptlrpc_prep_bulk_page(desc, page, pageoffset, len, 0); -} - -void ptlrpc_free_bulk(struct ptlrpc_bulk_desc *bulk); - -static inline void ptlrpc_release_bulk_page_pin(struct ptlrpc_bulk_desc *desc) -{ - int i; - - for (i = 0; i < desc->bd_iov_count ; i++) - put_page(BD_GET_KIOV(desc, i).bv_page); -} - -void ptlrpc_retain_replayable_request(struct ptlrpc_request *req, - struct obd_import *imp); -__u64 ptlrpc_next_xid(void); -__u64 ptlrpc_sample_next_xid(void); -__u64 ptlrpc_req_xid(struct ptlrpc_request *request); - -/* Set of routines to run a function in ptlrpcd context */ -void *ptlrpcd_alloc_work(struct obd_import *imp, - int (*cb)(const struct lu_env *, void *), void *data); -void ptlrpcd_destroy_work(void *handler); -int ptlrpcd_queue_work(void *handler); - -/** @} */ -struct ptlrpc_service_buf_conf { - /* nbufs is buffers # to allocate when growing the pool */ - unsigned int bc_nbufs; - /* buffer size to post */ - unsigned int bc_buf_size; - /* portal to listed for requests on */ - unsigned int bc_req_portal; - /* portal of where to send replies to */ - unsigned int bc_rep_portal; - /* maximum request size to be accepted for this service */ - unsigned int bc_req_max_size; - /* maximum reply size this service can ever send */ - unsigned int bc_rep_max_size; -}; - -struct ptlrpc_service_thr_conf { - /* threadname should be 8 characters or less - 6 will be added on */ - char *tc_thr_name; - /* threads increasing factor for each CPU */ - unsigned int tc_thr_factor; - /* service threads # to start on each partition while initializing */ - unsigned int tc_nthrs_init; - /* - * low water of threads # upper-limit on each partition while running, - * service availability may be impacted if threads number is lower - * than this value. It can be ZERO if the service doesn't require - * CPU affinity or there is only one partition. - */ - unsigned int tc_nthrs_base; - /* "soft" limit for total threads number */ - unsigned int tc_nthrs_max; - /* user specified threads number, it will be validated due to - * other members of this structure. - */ - unsigned int tc_nthrs_user; - /* set NUMA node affinity for service threads */ - unsigned int tc_cpu_affinity; - /* Tags for lu_context associated with service thread */ - __u32 tc_ctx_tags; -}; - -struct ptlrpc_service_cpt_conf { - struct cfs_cpt_table *cc_cptable; - /* string pattern to describe CPTs for a service */ - char *cc_pattern; -}; - -struct ptlrpc_service_conf { - /* service name */ - char *psc_name; - /* soft watchdog timeout multiplifier to print stuck service traces */ - unsigned int psc_watchdog_factor; - /* buffer information */ - struct ptlrpc_service_buf_conf psc_buf; - /* thread information */ - struct ptlrpc_service_thr_conf psc_thr; - /* CPU partition information */ - struct ptlrpc_service_cpt_conf psc_cpt; - /* function table */ - struct ptlrpc_service_ops psc_ops; -}; - -/* ptlrpc/service.c */ -/** - * Server-side services API. Register/unregister service, request state - * management, service thread management - * - * @{ - */ -void ptlrpc_dispatch_difficult_reply(struct ptlrpc_reply_state *rs); -void ptlrpc_schedule_difficult_reply(struct ptlrpc_reply_state *rs); -struct ptlrpc_service *ptlrpc_register_service(struct ptlrpc_service_conf *conf, - struct kset *parent, - struct dentry *debugfs_entry); - -int ptlrpc_start_threads(struct ptlrpc_service *svc); -int ptlrpc_unregister_service(struct ptlrpc_service *service); - -int ptlrpc_hr_init(void); -void ptlrpc_hr_fini(void); - -/** @} */ - -/* ptlrpc/import.c */ -/** - * Import API - * @{ - */ -int ptlrpc_connect_import(struct obd_import *imp); -int ptlrpc_init_import(struct obd_import *imp); -int ptlrpc_disconnect_import(struct obd_import *imp, int noclose); -int ptlrpc_import_recovery_state_machine(struct obd_import *imp); - -/* ptlrpc/pack_generic.c */ -int ptlrpc_reconnect_import(struct obd_import *imp); -/** @} */ - -/** - * ptlrpc msg buffer and swab interface - * - * @{ - */ -int ptlrpc_buf_need_swab(struct ptlrpc_request *req, const int inout, - u32 index); -void ptlrpc_buf_set_swabbed(struct ptlrpc_request *req, const int inout, - u32 index); -int ptlrpc_unpack_rep_msg(struct ptlrpc_request *req, int len); -int ptlrpc_unpack_req_msg(struct ptlrpc_request *req, int len); - -void lustre_init_msg_v2(struct lustre_msg_v2 *msg, int count, __u32 *lens, - char **bufs); -int lustre_pack_request(struct ptlrpc_request *, __u32 magic, int count, - __u32 *lens, char **bufs); -int lustre_pack_reply(struct ptlrpc_request *, int count, __u32 *lens, - char **bufs); -int lustre_pack_reply_v2(struct ptlrpc_request *req, int count, - __u32 *lens, char **bufs, int flags); -#define LPRFL_EARLY_REPLY 1 -int lustre_pack_reply_flags(struct ptlrpc_request *, int count, __u32 *lens, - char **bufs, int flags); -int lustre_shrink_msg(struct lustre_msg *msg, int segment, - unsigned int newlen, int move_data); -void lustre_free_reply_state(struct ptlrpc_reply_state *rs); -int __lustre_unpack_msg(struct lustre_msg *m, int len); -u32 lustre_msg_hdr_size(__u32 magic, u32 count); -u32 lustre_msg_size(__u32 magic, int count, __u32 *lengths); -u32 lustre_msg_size_v2(int count, __u32 *lengths); -u32 lustre_packed_msg_size(struct lustre_msg *msg); -u32 lustre_msg_early_size(void); -void *lustre_msg_buf_v2(struct lustre_msg_v2 *m, u32 n, u32 min_size); -void *lustre_msg_buf(struct lustre_msg *m, u32 n, u32 minlen); -u32 lustre_msg_buflen(struct lustre_msg *m, u32 n); -u32 lustre_msg_bufcount(struct lustre_msg *m); -char *lustre_msg_string(struct lustre_msg *m, u32 n, u32 max_len); -__u32 lustre_msghdr_get_flags(struct lustre_msg *msg); -void lustre_msghdr_set_flags(struct lustre_msg *msg, __u32 flags); -__u32 lustre_msg_get_flags(struct lustre_msg *msg); -void lustre_msg_add_flags(struct lustre_msg *msg, u32 flags); -void lustre_msg_set_flags(struct lustre_msg *msg, u32 flags); -void lustre_msg_clear_flags(struct lustre_msg *msg, u32 flags); -__u32 lustre_msg_get_op_flags(struct lustre_msg *msg); -void lustre_msg_add_op_flags(struct lustre_msg *msg, u32 flags); -struct lustre_handle *lustre_msg_get_handle(struct lustre_msg *msg); -__u32 lustre_msg_get_type(struct lustre_msg *msg); -void lustre_msg_add_version(struct lustre_msg *msg, u32 version); -__u32 lustre_msg_get_opc(struct lustre_msg *msg); -__u16 lustre_msg_get_tag(struct lustre_msg *msg); -__u64 lustre_msg_get_last_committed(struct lustre_msg *msg); -__u64 *lustre_msg_get_versions(struct lustre_msg *msg); -__u64 lustre_msg_get_transno(struct lustre_msg *msg); -__u64 lustre_msg_get_slv(struct lustre_msg *msg); -__u32 lustre_msg_get_limit(struct lustre_msg *msg); -void lustre_msg_set_slv(struct lustre_msg *msg, __u64 slv); -void lustre_msg_set_limit(struct lustre_msg *msg, __u64 limit); -int lustre_msg_get_status(struct lustre_msg *msg); -__u32 lustre_msg_get_conn_cnt(struct lustre_msg *msg); -__u32 lustre_msg_get_magic(struct lustre_msg *msg); -__u32 lustre_msg_get_timeout(struct lustre_msg *msg); -__u32 lustre_msg_get_service_time(struct lustre_msg *msg); -__u32 lustre_msg_get_cksum(struct lustre_msg *msg); -__u32 lustre_msg_calc_cksum(struct lustre_msg *msg); -void lustre_msg_set_handle(struct lustre_msg *msg, - struct lustre_handle *handle); -void lustre_msg_set_type(struct lustre_msg *msg, __u32 type); -void lustre_msg_set_opc(struct lustre_msg *msg, __u32 opc); -void lustre_msg_set_last_xid(struct lustre_msg *msg, u64 last_xid); -void lustre_msg_set_tag(struct lustre_msg *msg, __u16 tag); -void lustre_msg_set_versions(struct lustre_msg *msg, __u64 *versions); -void lustre_msg_set_transno(struct lustre_msg *msg, __u64 transno); -void lustre_msg_set_status(struct lustre_msg *msg, __u32 status); -void lustre_msg_set_conn_cnt(struct lustre_msg *msg, __u32 conn_cnt); -void ptlrpc_request_set_replen(struct ptlrpc_request *req); -void lustre_msg_set_timeout(struct lustre_msg *msg, __u32 timeout); -void lustre_msg_set_service_time(struct lustre_msg *msg, __u32 service_time); -void lustre_msg_set_jobid(struct lustre_msg *msg, char *jobid); -void lustre_msg_set_cksum(struct lustre_msg *msg, __u32 cksum); -void lustre_msg_set_mbits(struct lustre_msg *msg, u64 mbits); - -static inline void -lustre_shrink_reply(struct ptlrpc_request *req, int segment, - unsigned int newlen, int move_data) -{ - LASSERT(req->rq_reply_state); - LASSERT(req->rq_repmsg); - req->rq_replen = lustre_shrink_msg(req->rq_repmsg, segment, - newlen, move_data); -} - -#ifdef CONFIG_LUSTRE_TRANSLATE_ERRNOS - -static inline int ptlrpc_status_hton(int h) -{ - /* - * Positive errnos must be network errnos, such as LUSTRE_EDEADLK, - * ELDLM_LOCK_ABORTED, etc. - */ - if (h < 0) - return -lustre_errno_hton(-h); - else - return h; -} - -static inline int ptlrpc_status_ntoh(int n) -{ - /* - * See the comment in ptlrpc_status_hton(). - */ - if (n < 0) - return -lustre_errno_ntoh(-n); - else - return n; -} - -#else - -#define ptlrpc_status_hton(h) (h) -#define ptlrpc_status_ntoh(n) (n) - -#endif -/** @} */ - -/** Change request phase of \a req to \a new_phase */ -static inline void -ptlrpc_rqphase_move(struct ptlrpc_request *req, enum rq_phase new_phase) -{ - if (req->rq_phase == new_phase) - return; - - if (new_phase == RQ_PHASE_UNREG_RPC || - new_phase == RQ_PHASE_UNREG_BULK) { - /* No embedded unregistering phases */ - if (req->rq_phase == RQ_PHASE_UNREG_RPC || - req->rq_phase == RQ_PHASE_UNREG_BULK) - return; - - req->rq_next_phase = req->rq_phase; - if (req->rq_import) - atomic_inc(&req->rq_import->imp_unregistering); - } - - if (req->rq_phase == RQ_PHASE_UNREG_RPC || - req->rq_phase == RQ_PHASE_UNREG_BULK) { - if (req->rq_import) - atomic_dec(&req->rq_import->imp_unregistering); - } - - DEBUG_REQ(D_INFO, req, "move req \"%s\" -> \"%s\"", - ptlrpc_rqphase2str(req), ptlrpc_phase2str(new_phase)); - - req->rq_phase = new_phase; -} - -/** - * Returns true if request \a req got early reply and hard deadline is not met - */ -static inline int -ptlrpc_client_early(struct ptlrpc_request *req) -{ - return req->rq_early; -} - -/** - * Returns true if we got real reply from server for this request - */ -static inline int -ptlrpc_client_replied(struct ptlrpc_request *req) -{ - if (req->rq_reply_deadline > ktime_get_real_seconds()) - return 0; - return req->rq_replied; -} - -/** Returns true if request \a req is in process of receiving server reply */ -static inline int -ptlrpc_client_recv(struct ptlrpc_request *req) -{ - if (req->rq_reply_deadline > ktime_get_real_seconds()) - return 1; - return req->rq_receiving_reply; -} - -static inline int -ptlrpc_client_recv_or_unlink(struct ptlrpc_request *req) -{ - int rc; - - spin_lock(&req->rq_lock); - if (req->rq_reply_deadline > ktime_get_real_seconds()) { - spin_unlock(&req->rq_lock); - return 1; - } - if (req->rq_req_deadline > ktime_get_real_seconds()) { - spin_unlock(&req->rq_lock); - return 1; - } - rc = !req->rq_req_unlinked || !req->rq_reply_unlinked || - req->rq_receiving_reply; - spin_unlock(&req->rq_lock); - return rc; -} - -static inline void -ptlrpc_client_wake_req(struct ptlrpc_request *req) -{ - if (!req->rq_set) - wake_up(&req->rq_reply_waitq); - else - wake_up(&req->rq_set->set_waitq); -} - -static inline void -ptlrpc_rs_addref(struct ptlrpc_reply_state *rs) -{ - LASSERT(atomic_read(&rs->rs_refcount) > 0); - atomic_inc(&rs->rs_refcount); -} - -static inline void -ptlrpc_rs_decref(struct ptlrpc_reply_state *rs) -{ - LASSERT(atomic_read(&rs->rs_refcount) > 0); - if (atomic_dec_and_test(&rs->rs_refcount)) - lustre_free_reply_state(rs); -} - -/* Should only be called once per req */ -static inline void ptlrpc_req_drop_rs(struct ptlrpc_request *req) -{ - if (!req->rq_reply_state) - return; /* shouldn't occur */ - ptlrpc_rs_decref(req->rq_reply_state); - req->rq_reply_state = NULL; - req->rq_repmsg = NULL; -} - -static inline __u32 lustre_request_magic(struct ptlrpc_request *req) -{ - return lustre_msg_get_magic(req->rq_reqmsg); -} - -static inline int ptlrpc_req_get_repsize(struct ptlrpc_request *req) -{ - switch (req->rq_reqmsg->lm_magic) { - case LUSTRE_MSG_MAGIC_V2: - return req->rq_reqmsg->lm_repsize; - default: - LASSERTF(0, "incorrect message magic: %08x\n", - req->rq_reqmsg->lm_magic); - return -EFAULT; - } -} - -static inline int ptlrpc_send_limit_expired(struct ptlrpc_request *req) -{ - if (req->rq_delay_limit != 0 && - time_before(cfs_time_add(req->rq_queued_time, - req->rq_delay_limit * HZ), - cfs_time_current())) { - return 1; - } - return 0; -} - -static inline int ptlrpc_no_resend(struct ptlrpc_request *req) -{ - if (!req->rq_no_resend && ptlrpc_send_limit_expired(req)) { - spin_lock(&req->rq_lock); - req->rq_no_resend = 1; - spin_unlock(&req->rq_lock); - } - return req->rq_no_resend; -} - -static inline int -ptlrpc_server_get_timeout(struct ptlrpc_service_part *svcpt) -{ - int at = AT_OFF ? 0 : at_get(&svcpt->scp_at_estimate); - - return svcpt->scp_service->srv_watchdog_factor * - max_t(int, at, obd_timeout); -} - -static inline struct ptlrpc_service * -ptlrpc_req2svc(struct ptlrpc_request *req) -{ - return req->rq_rqbd->rqbd_svcpt->scp_service; -} - -/* ldlm/ldlm_lib.c */ -/** - * Target client logic - * @{ - */ -int client_obd_setup(struct obd_device *obddev, struct lustre_cfg *lcfg); -int client_obd_cleanup(struct obd_device *obddev); -int client_connect_import(const struct lu_env *env, - struct obd_export **exp, struct obd_device *obd, - struct obd_uuid *cluuid, struct obd_connect_data *, - void *localdata); -int client_disconnect_export(struct obd_export *exp); -int client_import_add_conn(struct obd_import *imp, struct obd_uuid *uuid, - int priority); -int client_import_del_conn(struct obd_import *imp, struct obd_uuid *uuid); -int client_import_find_conn(struct obd_import *imp, lnet_nid_t peer, - struct obd_uuid *uuid); -int import_set_conn_priority(struct obd_import *imp, struct obd_uuid *uuid); -void client_destroy_import(struct obd_import *imp); -/** @} */ - -/* ptlrpc/pinger.c */ -/** - * Pinger API (client side only) - * @{ - */ -enum timeout_event { - TIMEOUT_GRANT = 1 -}; - -struct timeout_item; -typedef int (*timeout_cb_t)(struct timeout_item *, void *); -int ptlrpc_pinger_add_import(struct obd_import *imp); -int ptlrpc_pinger_del_import(struct obd_import *imp); -int ptlrpc_add_timeout_client(int time, enum timeout_event event, - timeout_cb_t cb, void *data, - struct list_head *obd_list); -int ptlrpc_del_timeout_client(struct list_head *obd_list, - enum timeout_event event); -struct ptlrpc_request *ptlrpc_prep_ping(struct obd_import *imp); -int ptlrpc_obd_ping(struct obd_device *obd); -void ptlrpc_pinger_ir_up(void); -void ptlrpc_pinger_ir_down(void); -/** @} */ -int ptlrpc_pinger_suppress_pings(void); - -/* ptlrpc/ptlrpcd.c */ -void ptlrpcd_stop(struct ptlrpcd_ctl *pc, int force); -void ptlrpcd_free(struct ptlrpcd_ctl *pc); -void ptlrpcd_wake(struct ptlrpc_request *req); -void ptlrpcd_add_req(struct ptlrpc_request *req); -int ptlrpcd_addref(void); -void ptlrpcd_decref(void); - -/* ptlrpc/lproc_ptlrpc.c */ -/** - * procfs output related functions - * @{ - */ -const char *ll_opcode2str(__u32 opcode); -void ptlrpc_lprocfs_register_obd(struct obd_device *obd); -void ptlrpc_lprocfs_unregister_obd(struct obd_device *obd); -void ptlrpc_lprocfs_brw(struct ptlrpc_request *req, int bytes); -/** @} */ - -/* ptlrpc/llog_client.c */ -extern struct llog_operations llog_client_ops; -/** @} net */ - -#endif -/** @} PtlRPC */ |