diff options
Diffstat (limited to 'drivers/staging/lustre/lustre/include/lustre_net.h')
-rw-r--r-- | drivers/staging/lustre/lustre/include/lustre_net.h | 421 |
1 files changed, 247 insertions, 174 deletions
diff --git a/drivers/staging/lustre/lustre/include/lustre_net.h b/drivers/staging/lustre/lustre/include/lustre_net.h index a7973d5de168..d5debd615fdf 100644 --- a/drivers/staging/lustre/lustre/include/lustre_net.h +++ b/drivers/staging/lustre/lustre/include/lustre_net.h @@ -15,11 +15,7 @@ * * You should have received a copy of the GNU General Public License * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. + * http://www.gnu.org/licenses/gpl-2.0.html * * GPL HEADER END */ @@ -270,6 +266,11 @@ /* Macro to hide a typecast. */ #define ptlrpc_req_async_args(req) ((void *)&req->rq_async_args) +struct ptlrpc_replay_async_args { + int praa_old_state; + int praa_old_status; +}; + /** * Structure to single define portal connection. */ @@ -479,8 +480,9 @@ enum rq_phase { RQ_PHASE_BULK = 0xebc0de02, RQ_PHASE_INTERPRET = 0xebc0de03, RQ_PHASE_COMPLETE = 0xebc0de04, - RQ_PHASE_UNREGISTERING = 0xebc0de05, - RQ_PHASE_UNDEFINED = 0xebc0de06 + RQ_PHASE_UNREG_RPC = 0xebc0de05, + RQ_PHASE_UNREG_BULK = 0xebc0de06, + RQ_PHASE_UNDEFINED = 0xebc0de07 }; /** Type of request interpreter call-back */ @@ -1247,22 +1249,103 @@ struct ptlrpc_hpreq_ops { void (*hpreq_fini)(struct ptlrpc_request *); }; -/** - * Represents remote procedure call. - * - * This is a staple structure used by everybody wanting to send a request - * in Lustre. - */ -struct ptlrpc_request { - /* Request type: one of PTL_RPC_MSG_* */ - int rq_type; - /** Result of request processing */ - int rq_status; +struct ptlrpc_cli_req { + /** For bulk requests on client only: bulk descriptor */ + struct ptlrpc_bulk_desc *cr_bulk; + /** optional time limit for send attempts */ + long cr_delay_limit; + /** time request was first queued */ + time_t cr_queued_time; + /** request sent timeval */ + struct timespec64 cr_sent_tv; + /** time for request really sent out */ + time_t cr_sent_out; + /** when req reply unlink must finish. */ + time_t cr_reply_deadline; + /** when req bulk unlink must finish. */ + time_t cr_bulk_deadline; + /** when req unlink must finish. */ + time_t cr_req_deadline; + /** Portal to which this request would be sent */ + short cr_req_ptl; + /** Portal where to wait for reply and where reply would be sent */ + short cr_rep_ptl; + /** request resending number */ + unsigned int cr_resend_nr; + /** What was import generation when this request was sent */ + int cr_imp_gen; + enum lustre_imp_state cr_send_state; + /** Per-request waitq introduced by bug 21938 for recovery waiting */ + wait_queue_head_t cr_set_waitq; + /** Link item for request set lists */ + struct list_head cr_set_chain; + /** link to waited ctx */ + struct list_head cr_ctx_chain; + + /** client's half ctx */ + struct ptlrpc_cli_ctx *cr_cli_ctx; + /** Link back to the request set */ + struct ptlrpc_request_set *cr_set; + /** outgoing request MD handle */ + lnet_handle_md_t cr_req_md_h; + /** request-out callback parameter */ + struct ptlrpc_cb_id cr_req_cbid; + /** incoming reply MD handle */ + lnet_handle_md_t cr_reply_md_h; + wait_queue_head_t cr_reply_waitq; + /** reply callback parameter */ + struct ptlrpc_cb_id cr_reply_cbid; + /** Async completion handler, called when reply is received */ + ptlrpc_interpterer_t cr_reply_interp; + /** Async completion context */ + union ptlrpc_async_args cr_async_args; + /** Opaq data for replay and commit callbacks. */ + void *cr_cb_data; /** - * Linkage item through which this request is included into - * sending/delayed lists on client and into rqbd list on server + * Commit callback, called when request is committed and about to be + * freed. */ - struct list_head rq_list; + void (*cr_commit_cb)(struct ptlrpc_request *); + /** Replay callback, called after request is replayed at recovery */ + void (*cr_replay_cb)(struct ptlrpc_request *); +}; + +/** client request member alias */ +/* NB: these alias should NOT be used by any new code, instead they should + * be removed step by step to avoid potential abuse + */ +#define rq_bulk rq_cli.cr_bulk +#define rq_delay_limit rq_cli.cr_delay_limit +#define rq_queued_time rq_cli.cr_queued_time +#define rq_sent_tv rq_cli.cr_sent_tv +#define rq_real_sent rq_cli.cr_sent_out +#define rq_reply_deadline rq_cli.cr_reply_deadline +#define rq_bulk_deadline rq_cli.cr_bulk_deadline +#define rq_req_deadline rq_cli.cr_req_deadline +#define rq_nr_resend rq_cli.cr_resend_nr +#define rq_request_portal rq_cli.cr_req_ptl +#define rq_reply_portal rq_cli.cr_rep_ptl +#define rq_import_generation rq_cli.cr_imp_gen +#define rq_send_state rq_cli.cr_send_state +#define rq_set_chain rq_cli.cr_set_chain +#define rq_ctx_chain rq_cli.cr_ctx_chain +#define rq_set rq_cli.cr_set +#define rq_set_waitq rq_cli.cr_set_waitq +#define rq_cli_ctx rq_cli.cr_cli_ctx +#define rq_req_md_h rq_cli.cr_req_md_h +#define rq_req_cbid rq_cli.cr_req_cbid +#define rq_reply_md_h rq_cli.cr_reply_md_h +#define rq_reply_waitq rq_cli.cr_reply_waitq +#define rq_reply_cbid rq_cli.cr_reply_cbid +#define rq_interpret_reply rq_cli.cr_reply_interp +#define rq_async_args rq_cli.cr_async_args +#define rq_cb_data rq_cli.cr_cb_data +#define rq_commit_cb rq_cli.cr_commit_cb +#define rq_replay_cb rq_cli.cr_replay_cb + +struct ptlrpc_srv_req { + /** initial thread servicing this request */ + struct ptlrpc_thread *sr_svc_thread; /** * Server side list of incoming unserved requests sorted by arrival * time. Traversed from time to time to notice about to expire @@ -1270,32 +1353,86 @@ struct ptlrpc_request { * know server is alive and well, just very busy to service their * requests in time */ - struct list_head rq_timed_list; - /** server-side history, used for debugging purposes. */ - struct list_head rq_history_list; + struct list_head sr_timed_list; /** server-side per-export list */ - struct list_head rq_exp_list; - /** server-side hp handlers */ - struct ptlrpc_hpreq_ops *rq_ops; - - /** initial thread servicing this request */ - struct ptlrpc_thread *rq_svc_thread; - + struct list_head sr_exp_list; + /** server-side history, used for debuging purposes. */ + struct list_head sr_hist_list; /** history sequence # */ - __u64 rq_history_seq; + __u64 sr_hist_seq; + /** the index of service's srv_at_array into which request is linked */ + time_t sr_at_index; + /** authed uid */ + uid_t sr_auth_uid; + /** authed uid mapped to */ + uid_t sr_auth_mapped_uid; + /** RPC is generated from what part of Lustre */ + enum lustre_sec_part sr_sp_from; + /** request session context */ + struct lu_context sr_ses; /** \addtogroup nrs * @{ */ /** stub for NRS request */ - struct ptlrpc_nrs_request rq_nrq; + struct ptlrpc_nrs_request sr_nrq; /** @} nrs */ - /** the index of service's srv_at_array into which request is linked */ - u32 rq_at_index; + /** request arrival time */ + struct timespec64 sr_arrival_time; + /** server's half ctx */ + struct ptlrpc_svc_ctx *sr_svc_ctx; + /** (server side), pointed directly into req buffer */ + struct ptlrpc_user_desc *sr_user_desc; + /** separated reply state */ + struct ptlrpc_reply_state *sr_reply_state; + /** server-side hp handlers */ + struct ptlrpc_hpreq_ops *sr_ops; + /** incoming request buffer */ + struct ptlrpc_request_buffer_desc *sr_rqbd; +}; + +/** server request member alias */ +/* NB: these alias should NOT be used by any new code, instead they should + * be removed step by step to avoid potential abuse + */ +#define rq_svc_thread rq_srv.sr_svc_thread +#define rq_timed_list rq_srv.sr_timed_list +#define rq_exp_list rq_srv.sr_exp_list +#define rq_history_list rq_srv.sr_hist_list +#define rq_history_seq rq_srv.sr_hist_seq +#define rq_at_index rq_srv.sr_at_index +#define rq_auth_uid rq_srv.sr_auth_uid +#define rq_auth_mapped_uid rq_srv.sr_auth_mapped_uid +#define rq_sp_from rq_srv.sr_sp_from +#define rq_session rq_srv.sr_ses +#define rq_nrq rq_srv.sr_nrq +#define rq_arrival_time rq_srv.sr_arrival_time +#define rq_reply_state rq_srv.sr_reply_state +#define rq_svc_ctx rq_srv.sr_svc_ctx +#define rq_user_desc rq_srv.sr_user_desc +#define rq_ops rq_srv.sr_ops +#define rq_rqbd rq_srv.sr_rqbd + +/** + * Represents remote procedure call. + * + * This is a staple structure used by everybody wanting to send a request + * in Lustre. + */ +struct ptlrpc_request { + /* Request type: one of PTL_RPC_MSG_* */ + int rq_type; + /** Result of request processing */ + int rq_status; + /** + * Linkage item through which this request is included into + * sending/delayed lists on client and into rqbd list on server + */ + struct list_head rq_list; /** Lock to protect request flags and some other important bits, like * rq_list */ spinlock_t rq_lock; - /** client-side flags are serialized by rq_lock */ + /** client-side flags are serialized by rq_lock @{ */ unsigned int rq_intr:1, rq_replied:1, rq_err:1, rq_timedout:1, rq_resend:1, rq_restart:1, /** @@ -1311,18 +1448,15 @@ struct ptlrpc_request { rq_no_resend:1, rq_waiting:1, rq_receiving_reply:1, rq_no_delay:1, rq_net_err:1, rq_wait_ctx:1, rq_early:1, - rq_req_unlink:1, rq_reply_unlink:1, + rq_req_unlinked:1, /* unlinked request buffer from lnet */ + rq_reply_unlinked:1, /* unlinked reply buffer from lnet */ rq_memalloc:1, /* req originated from "kswapd" */ - /* server-side flags */ - rq_packed_final:1, /* packed final reply */ - rq_hp:1, /* high priority RPC */ - rq_at_linked:1, /* link into service's srv_at_array */ - rq_reply_truncate:1, rq_committed:1, - /* whether the "rq_set" is a valid one */ + rq_reply_truncated:1, + /** whether the "rq_set" is a valid one */ rq_invalid_rqset:1, rq_generation_set:1, - /* do not resend request on -EINPROGRESS */ + /** do not resend request on -EINPROGRESS */ rq_no_retry_einprogress:1, /* allow the req to be sent if the import is in recovery * status @@ -1330,20 +1464,24 @@ struct ptlrpc_request { rq_allow_replay:1, /* bulk request, sent to server, but uncommitted */ rq_unstable:1; + /** @} */ - unsigned int rq_nr_resend; - - enum rq_phase rq_phase; /* one of RQ_PHASE_* */ - enum rq_phase rq_next_phase; /* one of RQ_PHASE_* to be used next */ - atomic_t rq_refcount; /* client-side refcount for SENT race, - * server-side refcount for multiple replies - */ - - /** Portal to which this request would be sent */ - short rq_request_portal; /* XXX FIXME bug 249 */ - /** Portal where to wait for reply and where reply would be sent */ - short rq_reply_portal; /* XXX FIXME bug 249 */ + /** server-side flags @{ */ + unsigned int + rq_hp:1, /**< high priority RPC */ + rq_at_linked:1, /**< link into service's srv_at_array */ + rq_packed_final:1; /**< packed final reply */ + /** @} */ + /** one of RQ_PHASE_* */ + enum rq_phase rq_phase; + /** one of RQ_PHASE_* to be used next */ + enum rq_phase rq_next_phase; + /** + * client-side refcount for SENT race, server-side refcount + * for multiple replies + */ + atomic_t rq_refcount; /** * client-side: * !rq_truncate : # reply bytes actually received, @@ -1354,6 +1492,8 @@ struct ptlrpc_request { int rq_reqlen; /** Reply length */ int rq_replen; + /** Pool if request is from preallocated list */ + struct ptlrpc_request_pool *rq_pool; /** Request message - what client sent */ struct lustre_msg *rq_reqmsg; /** Reply message - server response */ @@ -1366,19 +1506,20 @@ struct ptlrpc_request { * List item to for replay list. Not yet committed requests get linked * there. * Also see \a rq_replay comment above. + * It's also link chain on obd_export::exp_req_replay_queue */ struct list_head rq_replay_list; - + /** non-shared members for client & server request*/ + union { + struct ptlrpc_cli_req rq_cli; + struct ptlrpc_srv_req rq_srv; + }; /** * security and encryption data * @{ */ - struct ptlrpc_cli_ctx *rq_cli_ctx; /**< client's half ctx */ - struct ptlrpc_svc_ctx *rq_svc_ctx; /**< server's half ctx */ - struct list_head rq_ctx_chain; /**< link to waited ctx */ - - struct sptlrpc_flavor rq_flvr; /**< for client & server */ - enum lustre_sec_part rq_sp_from; + /** description of flavors for client & server */ + struct sptlrpc_flavor rq_flvr; /* client/server security flags */ unsigned int @@ -1388,7 +1529,6 @@ struct ptlrpc_request { rq_bulk_write:1, /* request bulk write */ /* server authentication flags */ rq_auth_gss:1, /* authenticated by gss */ - rq_auth_remote:1, /* authed as remote user */ rq_auth_usr_root:1, /* authed as root */ rq_auth_usr_mdt:1, /* authed as mdt */ rq_auth_usr_ost:1, /* authed as ost */ @@ -1397,19 +1537,15 @@ struct ptlrpc_request { rq_pack_bulk:1, /* doesn't expect reply FIXME */ rq_no_reply:1, - rq_pill_init:1; /* pill initialized */ - - uid_t rq_auth_uid; /* authed uid */ - uid_t rq_auth_mapped_uid; /* authed uid mapped to */ - - /* (server side), pointed directly into req buffer */ - struct ptlrpc_user_desc *rq_user_desc; - - /* various buffer pointers */ - struct lustre_msg *rq_reqbuf; /* req wrapper */ - char *rq_repbuf; /* rep buffer */ - struct lustre_msg *rq_repdata; /* rep wrapper msg */ - struct lustre_msg *rq_clrbuf; /* only in priv mode */ + rq_pill_init:1, /* pill initialized */ + rq_srv_req:1; /* server request */ + + /** various buffer pointers */ + struct lustre_msg *rq_reqbuf; /**< req wrapper */ + char *rq_repbuf; /**< rep buffer */ + struct lustre_msg *rq_repdata; /**< rep wrapper msg */ + /** only in priv mode */ + struct lustre_msg *rq_clrbuf; int rq_reqbuf_len; /* req wrapper buf len */ int rq_reqdata_len; /* req wrapper msg len */ int rq_repbuf_len; /* rep buffer len */ @@ -1426,97 +1562,28 @@ struct ptlrpc_request { __u32 rq_req_swab_mask; __u32 rq_rep_swab_mask; - /** What was import generation when this request was sent */ - int rq_import_generation; - enum lustre_imp_state rq_send_state; - /** how many early replies (for stats) */ int rq_early_count; - /** client+server request */ - lnet_handle_md_t rq_req_md_h; - struct ptlrpc_cb_id rq_req_cbid; - /** optional time limit for send attempts */ - long rq_delay_limit; - /** time request was first queued */ - unsigned long rq_queued_time; - - /* server-side... */ - /** request arrival time */ - struct timespec64 rq_arrival_time; - /** separated reply state */ - struct ptlrpc_reply_state *rq_reply_state; - /** incoming request buffer */ - struct ptlrpc_request_buffer_desc *rq_rqbd; - - /** client-only incoming reply */ - lnet_handle_md_t rq_reply_md_h; - wait_queue_head_t rq_reply_waitq; - struct ptlrpc_cb_id rq_reply_cbid; - + /** Server-side, export on which request was received */ + struct obd_export *rq_export; + /** import where request is being sent */ + struct obd_import *rq_import; /** our LNet NID */ lnet_nid_t rq_self; /** Peer description (the other side) */ lnet_process_id_t rq_peer; - /** Server-side, export on which request was received */ - struct obd_export *rq_export; - /** Client side, import where request is being sent */ - struct obd_import *rq_import; - - /** Replay callback, called after request is replayed at recovery */ - void (*rq_replay_cb)(struct ptlrpc_request *); /** - * Commit callback, called when request is committed and about to be - * freed. + * service time estimate (secs) + * If the request is not served by this time, it is marked as timed out. */ - void (*rq_commit_cb)(struct ptlrpc_request *); - /** Opaq data for replay and commit callbacks. */ - void *rq_cb_data; - - /** For bulk requests on client only: bulk descriptor */ - struct ptlrpc_bulk_desc *rq_bulk; - - /** client outgoing req */ + int rq_timeout; /** * when request/reply sent (secs), or time when request should be sent */ time64_t rq_sent; - /** time for request really sent out */ - time64_t rq_real_sent; - - /** when request must finish. volatile - * so that servers' early reply updates to the deadline aren't - * kept in per-cpu cache - */ - volatile time64_t rq_deadline; - /** when req reply unlink must finish. */ - time64_t rq_reply_deadline; - /** when req bulk unlink must finish. */ - time64_t rq_bulk_deadline; - /** - * service time estimate (secs) - * If the requestsis not served by this time, it is marked as timed out. - */ - int rq_timeout; - - /** Multi-rpc bits */ - /** Per-request waitq introduced by bug 21938 for recovery waiting */ - wait_queue_head_t rq_set_waitq; - /** Link item for request set lists */ - struct list_head rq_set_chain; - /** Link back to the request set */ - struct ptlrpc_request_set *rq_set; - /** Async completion handler, called when reply is received */ - ptlrpc_interpterer_t rq_interpret_reply; - /** Async completion context */ - union ptlrpc_async_args rq_async_args; - - /** Pool if request is from preallocated list */ - struct ptlrpc_request_pool *rq_pool; - - struct lu_context rq_session; - struct lu_context rq_recov_session; - + /** when request must finish. */ + time64_t rq_deadline; /** request format description */ struct req_capsule rq_pill; }; @@ -1629,8 +1696,10 @@ ptlrpc_phase2str(enum rq_phase phase) return "Interpret"; case RQ_PHASE_COMPLETE: return "Complete"; - case RQ_PHASE_UNREGISTERING: - return "Unregistering"; + case RQ_PHASE_UNREG_RPC: + return "UnregRPC"; + case RQ_PHASE_UNREG_BULK: + return "UnregBULK"; default: return "?Phase?"; } @@ -1657,7 +1726,7 @@ ptlrpc_rqphase2str(struct ptlrpc_request *req) #define DEBUG_REQ_FLAGS(req) \ ptlrpc_rqphase2str(req), \ FLAG(req->rq_intr, "I"), FLAG(req->rq_replied, "R"), \ - FLAG(req->rq_err, "E"), \ + FLAG(req->rq_err, "E"), FLAG(req->rq_net_err, "e"), \ FLAG(req->rq_timedout, "X") /* eXpired */, FLAG(req->rq_resend, "S"), \ FLAG(req->rq_restart, "T"), FLAG(req->rq_replay, "P"), \ FLAG(req->rq_no_resend, "N"), \ @@ -1665,7 +1734,7 @@ ptlrpc_rqphase2str(struct ptlrpc_request *req) FLAG(req->rq_wait_ctx, "C"), FLAG(req->rq_hp, "H"), \ FLAG(req->rq_committed, "M") -#define REQ_FLAGS_FMT "%s:%s%s%s%s%s%s%s%s%s%s%s%s" +#define REQ_FLAGS_FMT "%s:%s%s%s%s%s%s%s%s%s%s%s%s%s" void _debug_req(struct ptlrpc_request *req, struct libcfs_debug_msg_data *data, const char *fmt, ...) @@ -2316,8 +2385,7 @@ static inline int ptlrpc_client_bulk_active(struct ptlrpc_request *req) desc = req->rq_bulk; - if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_BULK_UNLINK) && - req->rq_bulk_deadline > ktime_get_real_seconds()) + if (req->rq_bulk_deadline > ktime_get_real_seconds()) return 1; if (!desc) @@ -2664,13 +2732,20 @@ ptlrpc_rqphase_move(struct ptlrpc_request *req, enum rq_phase new_phase) if (req->rq_phase == new_phase) return; - if (new_phase == RQ_PHASE_UNREGISTERING) { + if (new_phase == RQ_PHASE_UNREG_RPC || + new_phase == RQ_PHASE_UNREG_BULK) { + /* No embedded unregistering phases */ + if (req->rq_phase == RQ_PHASE_UNREG_RPC || + req->rq_phase == RQ_PHASE_UNREG_BULK) + return; + req->rq_next_phase = req->rq_phase; if (req->rq_import) atomic_inc(&req->rq_import->imp_unregistering); } - if (req->rq_phase == RQ_PHASE_UNREGISTERING) { + if (req->rq_phase == RQ_PHASE_UNREG_RPC || + req->rq_phase == RQ_PHASE_UNREG_BULK) { if (req->rq_import) atomic_dec(&req->rq_import->imp_unregistering); } @@ -2687,9 +2762,6 @@ ptlrpc_rqphase_move(struct ptlrpc_request *req, enum rq_phase new_phase) static inline int ptlrpc_client_early(struct ptlrpc_request *req) { - if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_REPL_UNLINK) && - req->rq_reply_deadline > ktime_get_real_seconds()) - return 0; return req->rq_early; } @@ -2699,8 +2771,7 @@ ptlrpc_client_early(struct ptlrpc_request *req) static inline int ptlrpc_client_replied(struct ptlrpc_request *req) { - if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_REPL_UNLINK) && - req->rq_reply_deadline > ktime_get_real_seconds()) + if (req->rq_reply_deadline > ktime_get_real_seconds()) return 0; return req->rq_replied; } @@ -2709,8 +2780,7 @@ ptlrpc_client_replied(struct ptlrpc_request *req) static inline int ptlrpc_client_recv(struct ptlrpc_request *req) { - if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_REPL_UNLINK) && - req->rq_reply_deadline > ktime_get_real_seconds()) + if (req->rq_reply_deadline > ktime_get_real_seconds()) return 1; return req->rq_receiving_reply; } @@ -2721,13 +2791,16 @@ ptlrpc_client_recv_or_unlink(struct ptlrpc_request *req) int rc; spin_lock(&req->rq_lock); - if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_REPL_UNLINK) && - req->rq_reply_deadline > ktime_get_real_seconds()) { + if (req->rq_reply_deadline > ktime_get_real_seconds()) { + spin_unlock(&req->rq_lock); + return 1; + } + if (req->rq_req_deadline > ktime_get_real_seconds()) { spin_unlock(&req->rq_lock); return 1; } - rc = req->rq_receiving_reply; - rc = rc || req->rq_req_unlink || req->rq_reply_unlink; + rc = !req->rq_req_unlinked || !req->rq_reply_unlinked || + req->rq_receiving_reply; spin_unlock(&req->rq_lock); return rc; } |