diff options
Diffstat (limited to 'drivers/staging/lustre/lustre/ptlrpc/client.c')
-rw-r--r-- | drivers/staging/lustre/lustre/ptlrpc/client.c | 187 |
1 files changed, 128 insertions, 59 deletions
diff --git a/drivers/staging/lustre/lustre/ptlrpc/client.c b/drivers/staging/lustre/lustre/ptlrpc/client.c index d90efe408414..4c9e00695087 100644 --- a/drivers/staging/lustre/lustre/ptlrpc/client.c +++ b/drivers/staging/lustre/lustre/ptlrpc/client.c @@ -48,6 +48,7 @@ #include "ptlrpc_internal.h" static int ptlrpc_send_new_req(struct ptlrpc_request *req); +static int ptlrpcd_check_work(struct ptlrpc_request *req); /** * Initialize passed in client structure \a cl. @@ -62,7 +63,7 @@ void ptlrpc_init_client(int req_portal, int rep_portal, char *name, EXPORT_SYMBOL(ptlrpc_init_client); /** - * Return PortalRPC connection for remore uud \a uuid + * Return PortalRPC connection for remote uud \a uuid */ struct ptlrpc_connection *ptlrpc_uuid_to_connection(struct obd_uuid *uuid) { @@ -127,7 +128,7 @@ struct ptlrpc_bulk_desc *ptlrpc_new_bulk(unsigned npages, unsigned max_brw, * Prepare bulk descriptor for specified outgoing request \a req that * can fit \a npages * pages. \a type is bulk type. \a portal is where * the bulk to be sent. Used on client-side. - * Returns pointer to newly allocatrd initialized bulk descriptor or NULL on + * Returns pointer to newly allocated initialized bulk descriptor or NULL on * error. */ struct ptlrpc_bulk_desc *ptlrpc_prep_bulk_imp(struct ptlrpc_request *req, @@ -631,7 +632,7 @@ int ptlrpc_request_pack(struct ptlrpc_request *request, /* For some old 1.8 clients (< 1.8.7), they will LASSERT the size of * ptlrpc_body sent from server equal to local ptlrpc_body size, so we - * have to send old ptlrpc_body to keep interoprability with these + * have to send old ptlrpc_body to keep interoperability with these * clients. * * Only three kinds of server->client RPCs so far: @@ -639,7 +640,7 @@ int ptlrpc_request_pack(struct ptlrpc_request *request, * - LDLM_CP_CALLBACK * - LDLM_GL_CALLBACK * - * XXX This should be removed whenever we drop the interoprability with + * XXX This should be removed whenever we drop the interoperability with * the these old clients. */ if (opcode == LDLM_BL_CALLBACK || opcode == LDLM_CP_CALLBACK || @@ -686,7 +687,7 @@ struct ptlrpc_request *__ptlrpc_request_alloc(struct obd_import *imp, /** * Helper function for creating a request. - * Calls __ptlrpc_request_alloc to allocate new request sturcture and inits + * Calls __ptlrpc_request_alloc to allocate new request structure and inits * buffer structures according to capsule template \a format. * Returns allocated request structure pointer or NULL on error. */ @@ -743,7 +744,7 @@ void ptlrpc_request_free(struct ptlrpc_request *request) EXPORT_SYMBOL(ptlrpc_request_free); /** - * Allocate new request for operatione \a opcode and immediatelly pack it for + * Allocate new request for operation \a opcode and immediately pack it for * network transfer. * Only used for simple requests like OBD_PING where the only important * part of the request is operation itself. @@ -768,7 +769,7 @@ struct ptlrpc_request *ptlrpc_request_alloc_pack(struct obd_import *imp, EXPORT_SYMBOL(ptlrpc_request_alloc_pack); /** - * Prepare request (fetched from pool \a poolif not NULL) on import \a imp + * Prepare request (fetched from pool \a pool if not NULL) on import \a imp * for operation \a opcode. Request would contain \a count buffers. * Sizes of buffers are described in array \a lengths and buffers themselves * are provided by a pointer \a bufs. @@ -1073,7 +1074,7 @@ static int ptlrpc_import_delay_req(struct obd_import *imp, } /** - * Decide if the eror message regarding provided request \a req + * Decide if the error message regarding provided request \a req * should be printed to the console or not. * Makes it's decision on request status and other properties. * Returns 1 to print error on the system console or 0 if not. @@ -1159,7 +1160,7 @@ static void ptlrpc_save_versions(struct ptlrpc_request *req) /** * Callback function called when client receives RPC reply for \a req. * Returns 0 on success or error code. - * The return alue would be assigned to req->rq_status by the caller + * The return value would be assigned to req->rq_status by the caller * as request processing status. * This function also decides if the request needs to be saved for later replay. */ @@ -1190,7 +1191,9 @@ static int after_reply(struct ptlrpc_request *req) * will roundup it */ req->rq_replen = req->rq_nob_received; req->rq_nob_received = 0; + spin_lock(&req->rq_lock); req->rq_resend = 1; + spin_unlock(&req->rq_lock); return 0; } @@ -1313,7 +1316,11 @@ static int after_reply(struct ptlrpc_request *req) /** version recovery */ ptlrpc_save_versions(req); ptlrpc_retain_replayable_request(req, imp); - } else if (req->rq_commit_cb != NULL) { + } else if (req->rq_commit_cb != NULL && + list_empty(&req->rq_replay_list)) { + /* NB: don't call rq_commit_cb if it's already on + * rq_replay_list, ptlrpc_free_committed() will call + * it later, see LU-3618 for details */ spin_unlock(&imp->imp_lock); req->rq_commit_cb(req); spin_lock(&imp->imp_lock); @@ -1408,7 +1415,9 @@ static int ptlrpc_send_new_req(struct ptlrpc_request *req) req->rq_status = rc; return 1; } else { + spin_lock(&req->rq_lock); req->rq_wait_ctx = 1; + spin_unlock(&req->rq_lock); return 0; } } @@ -1423,7 +1432,9 @@ static int ptlrpc_send_new_req(struct ptlrpc_request *req) rc = ptl_send_rpc(req, 0); if (rc) { DEBUG_REQ(D_HA, req, "send failed (%d); expect timeout", rc); + spin_lock(&req->rq_lock); req->rq_net_err = 1; + spin_unlock(&req->rq_lock); return rc; } return 0; @@ -1688,6 +1699,7 @@ int ptlrpc_check_set(const struct lu_env *env, struct ptlrpc_request_set *set) spin_lock(&req->rq_lock); req->rq_net_err = 1; spin_unlock(&req->rq_lock); + continue; } /* need to reset the timeout */ force_timer_recalc = 1; @@ -1773,6 +1785,10 @@ interpret: ptlrpc_req_interpret(env, req, req->rq_status); + if (ptlrpcd_check_work(req)) { + atomic_dec(&set->set_remaining); + continue; + } ptlrpc_rqphase_move(req, RQ_PHASE_COMPLETE); CDEBUG(req->rq_reqmsg != NULL ? D_RPCTRACE : 0, @@ -2031,7 +2047,7 @@ int ptlrpc_set_next_timeout(struct ptlrpc_request_set *set) EXPORT_SYMBOL(ptlrpc_set_next_timeout); /** - * Send all unset request from the set and then wait untill all + * Send all unset request from the set and then wait until all * requests in the set complete (either get a reply, timeout, get an * error or otherwise be interrupted). * Returns 0 on success or error code otherwise. @@ -2156,7 +2172,7 @@ int ptlrpc_set_wait(struct ptlrpc_request_set *set) EXPORT_SYMBOL(ptlrpc_set_wait); /** - * Helper fuction for request freeing. + * Helper function for request freeing. * Called when request count reached zero and request needs to be freed. * Removes request from all sorts of sending/replay lists it might be on, * frees network buffers if any are present. @@ -2223,7 +2239,7 @@ static void __ptlrpc_free_req(struct ptlrpc_request *request, int locked) static int __ptlrpc_req_finished(struct ptlrpc_request *request, int locked); /** * Drop one request reference. Must be called with import imp_lock held. - * When reference count drops to zero, reuqest is freed. + * When reference count drops to zero, request is freed. */ void ptlrpc_req_finished_with_imp_lock(struct ptlrpc_request *request) { @@ -2236,7 +2252,7 @@ EXPORT_SYMBOL(ptlrpc_req_finished_with_imp_lock); * Helper function * Drops one reference count for request \a request. * \a locked set indicates that caller holds import imp_lock. - * Frees the request whe reference count reaches zero. + * Frees the request when reference count reaches zero. */ static int __ptlrpc_req_finished(struct ptlrpc_request *request, int locked) { @@ -2360,19 +2376,52 @@ int ptlrpc_unregister_reply(struct ptlrpc_request *request, int async) } EXPORT_SYMBOL(ptlrpc_unregister_reply); +static void ptlrpc_free_request(struct ptlrpc_request *req) +{ + spin_lock(&req->rq_lock); + req->rq_replay = 0; + spin_unlock(&req->rq_lock); + + if (req->rq_commit_cb != NULL) + req->rq_commit_cb(req); + list_del_init(&req->rq_replay_list); + + __ptlrpc_req_finished(req, 1); +} + +/** + * the request is committed and dropped from the replay list of its import + */ +void ptlrpc_request_committed(struct ptlrpc_request *req, int force) +{ + struct obd_import *imp = req->rq_import; + + spin_lock(&imp->imp_lock); + if (list_empty(&req->rq_replay_list)) { + spin_unlock(&imp->imp_lock); + return; + } + + if (force || req->rq_transno <= imp->imp_peer_committed_transno) + ptlrpc_free_request(req); + + spin_unlock(&imp->imp_lock); +} +EXPORT_SYMBOL(ptlrpc_request_committed); + /** * Iterates through replay_list on import and prunes * all requests have transno smaller than last_committed for the * import and don't have rq_replay set. - * Since requests are sorted in transno order, stops when meetign first + * Since requests are sorted in transno order, stops when meeting first * transno bigger than last_committed. * caller must hold imp->imp_lock */ void ptlrpc_free_committed(struct obd_import *imp) { - struct list_head *tmp, *saved; - struct ptlrpc_request *req; + struct ptlrpc_request *req, *saved; struct ptlrpc_request *last_req = NULL; /* temporary fire escape */ + bool skip_committed_list = true; LASSERT(imp != NULL); @@ -2388,13 +2437,15 @@ void ptlrpc_free_committed(struct obd_import *imp) CDEBUG(D_RPCTRACE, "%s: committing for last_committed "LPU64" gen %d\n", imp->imp_obd->obd_name, imp->imp_peer_committed_transno, imp->imp_generation); + + if (imp->imp_generation != imp->imp_last_generation_checked) + skip_committed_list = false; + imp->imp_last_transno_checked = imp->imp_peer_committed_transno; imp->imp_last_generation_checked = imp->imp_generation; - list_for_each_safe(tmp, saved, &imp->imp_replay_list) { - req = list_entry(tmp, struct ptlrpc_request, - rq_replay_list); - + list_for_each_entry_safe(req, saved, &imp->imp_replay_list, + rq_replay_list) { /* XXX ok to remove when 1357 resolved - rread 05/29/03 */ LASSERT(req != last_req); last_req = req; @@ -2408,27 +2459,34 @@ void ptlrpc_free_committed(struct obd_import *imp) GOTO(free_req, 0); } - if (req->rq_replay) { - DEBUG_REQ(D_RPCTRACE, req, "keeping (FL_REPLAY)"); - continue; - } - /* not yet committed */ if (req->rq_transno > imp->imp_peer_committed_transno) { DEBUG_REQ(D_RPCTRACE, req, "stopping search"); break; } + if (req->rq_replay) { + DEBUG_REQ(D_RPCTRACE, req, "keeping (FL_REPLAY)"); + list_move_tail(&req->rq_replay_list, + &imp->imp_committed_list); + continue; + } + DEBUG_REQ(D_INFO, req, "commit (last_committed "LPU64")", imp->imp_peer_committed_transno); free_req: - spin_lock(&req->rq_lock); - req->rq_replay = 0; - spin_unlock(&req->rq_lock); - if (req->rq_commit_cb != NULL) - req->rq_commit_cb(req); - list_del_init(&req->rq_replay_list); - __ptlrpc_req_finished(req, 1); + ptlrpc_free_request(req); + } + if (skip_committed_list) + return; + + list_for_each_entry_safe(req, saved, &imp->imp_committed_list, + rq_replay_list) { + LASSERT(req->rq_transno != 0); + if (req->rq_import_generation < imp->imp_generation) { + DEBUG_REQ(D_RPCTRACE, req, "free stale open request"); + ptlrpc_free_request(req); + } } } @@ -2585,7 +2643,7 @@ struct ptlrpc_replay_async_args { /** * Callback used for replayed requests reply processing. - * In case of succesful reply calls registeresd request replay callback. + * In case of successful reply calls registered request replay callback. * In case of error restart replay process. */ static int ptlrpc_replay_interpret(const struct lu_env *env, @@ -2834,7 +2892,7 @@ void ptlrpc_init_xid(void) ptlrpc_last_xid = (__u64)now << 20; } - /* Need to always be aligned to a power-of-two for mutli-bulk BRW */ + /* Always need to be aligned to a power-of-two for multi-bulk BRW */ CLASSERT((PTLRPC_BULK_OPS_COUNT & (PTLRPC_BULK_OPS_COUNT - 1)) == 0); ptlrpc_last_xid &= PTLRPC_BULK_OPS_MASK; } @@ -2904,22 +2962,50 @@ EXPORT_SYMBOL(ptlrpc_sample_next_xid); * have delay before it really runs by ptlrpcd thread. */ struct ptlrpc_work_async_args { - __u64 magic; int (*cb)(const struct lu_env *, void *); void *cbdata; }; -#define PTLRPC_WORK_MAGIC 0x6655436b676f4f44ULL /* magic code */ +static void ptlrpcd_add_work_req(struct ptlrpc_request *req) +{ + /* re-initialize the req */ + req->rq_timeout = obd_timeout; + req->rq_sent = cfs_time_current_sec(); + req->rq_deadline = req->rq_sent + req->rq_timeout; + req->rq_reply_deadline = req->rq_deadline; + req->rq_phase = RQ_PHASE_INTERPRET; + req->rq_next_phase = RQ_PHASE_COMPLETE; + req->rq_xid = ptlrpc_next_xid(); + req->rq_import_generation = req->rq_import->imp_generation; + + ptlrpcd_add_req(req, PDL_POLICY_ROUND, -1); +} static int work_interpreter(const struct lu_env *env, struct ptlrpc_request *req, void *data, int rc) { struct ptlrpc_work_async_args *arg = data; - LASSERT(arg->magic == PTLRPC_WORK_MAGIC); + LASSERT(ptlrpcd_check_work(req)); LASSERT(arg->cb != NULL); - return arg->cb(env, arg->cbdata); + rc = arg->cb(env, arg->cbdata); + + list_del_init(&req->rq_set_chain); + req->rq_set = NULL; + + if (atomic_dec_return(&req->rq_refcount) > 1) { + atomic_set(&req->rq_refcount, 2); + ptlrpcd_add_work_req(req); + } + return rc; +} + +static int worker_format; + +static int ptlrpcd_check_work(struct ptlrpc_request *req) +{ + return req->rq_pill.rc_fmt == (void *)&worker_format; } /** @@ -2952,6 +3038,7 @@ void *ptlrpcd_alloc_work(struct obd_import *imp, req->rq_receiving_reply = 0; req->rq_must_unlink = 0; req->rq_no_delay = req->rq_no_resend = 1; + req->rq_pill.rc_fmt = (void *)&worker_format; spin_lock_init(&req->rq_lock); INIT_LIST_HEAD(&req->rq_list); @@ -2965,7 +3052,6 @@ void *ptlrpcd_alloc_work(struct obd_import *imp, CLASSERT(sizeof(*args) <= sizeof(req->rq_async_args)); args = ptlrpc_req_async_args(req); - args->magic = PTLRPC_WORK_MAGIC; args->cb = cb; args->cbdata = cbdata; @@ -2995,25 +3081,8 @@ int ptlrpcd_queue_work(void *handler) * req as opaque data. - Jinshan */ LASSERT(atomic_read(&req->rq_refcount) > 0); - if (atomic_read(&req->rq_refcount) > 1) - return -EBUSY; - - if (atomic_inc_return(&req->rq_refcount) > 2) { /* race */ - atomic_dec(&req->rq_refcount); - return -EBUSY; - } - - /* re-initialize the req */ - req->rq_timeout = obd_timeout; - req->rq_sent = cfs_time_current_sec(); - req->rq_deadline = req->rq_sent + req->rq_timeout; - req->rq_reply_deadline = req->rq_deadline; - req->rq_phase = RQ_PHASE_INTERPRET; - req->rq_next_phase = RQ_PHASE_COMPLETE; - req->rq_xid = ptlrpc_next_xid(); - req->rq_import_generation = req->rq_import->imp_generation; - - ptlrpcd_add_req(req, PDL_POLICY_ROUND, -1); + if (atomic_inc_return(&req->rq_refcount) == 2) + ptlrpcd_add_work_req(req); return 0; } EXPORT_SYMBOL(ptlrpcd_queue_work); |