aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/staging/lustre/lustre/osc/osc_request.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/staging/lustre/lustre/osc/osc_request.c')
-rw-r--r--drivers/staging/lustre/lustre/osc/osc_request.c238
1 files changed, 112 insertions, 126 deletions
diff --git a/drivers/staging/lustre/lustre/osc/osc_request.c b/drivers/staging/lustre/lustre/osc/osc_request.c
index 12113dfd87b8..367f83af12c0 100644
--- a/drivers/staging/lustre/lustre/osc/osc_request.c
+++ b/drivers/staging/lustre/lustre/osc/osc_request.c
@@ -38,7 +38,6 @@
#include "../../include/linux/libcfs/libcfs.h"
-
#include "../include/lustre_dlm.h"
#include "../include/lustre_net.h"
#include "../include/lustre/lustre_user.h"
@@ -50,9 +49,18 @@
#include "../include/lustre_param.h"
#include "../include/lustre_fid.h"
#include "../include/obd_class.h"
+#include "../include/obd.h"
#include "osc_internal.h"
#include "osc_cl_internal.h"
+atomic_t osc_pool_req_count;
+unsigned int osc_reqpool_maxreqcount;
+struct ptlrpc_request_pool *osc_rq_pool;
+
+/* max memory used for request pool, unit is MB */
+static unsigned int osc_reqpool_mem_max = 5;
+module_param(osc_reqpool_mem_max, uint, 0444);
+
struct osc_brw_async_args {
struct obdo *aa_oa;
int aa_requested_nob;
@@ -63,7 +71,6 @@ struct osc_brw_async_args {
struct client_obd *aa_cli;
struct list_head aa_oaps;
struct list_head aa_exts;
- struct obd_capa *aa_ocapa;
struct cl_req *aa_clerq;
};
@@ -191,22 +198,6 @@ static int osc_unpackmd(struct obd_export *exp, struct lov_stripe_md **lsmp,
return lsm_size;
}
-static inline void osc_pack_capa(struct ptlrpc_request *req,
- struct ost_body *body, void *capa)
-{
- struct obd_capa *oc = (struct obd_capa *)capa;
- struct lustre_capa *c;
-
- if (!capa)
- return;
-
- c = req_capsule_client_get(&req->rq_pill, &RMF_CAPA1);
- LASSERT(c);
- capa_cpy(c, oc);
- body->oa.o_valid |= OBD_MD_FLOSSCAPA;
- DEBUG_CAPA(D_SEC, c, "pack");
-}
-
static inline void osc_pack_req_body(struct ptlrpc_request *req,
struct obd_info *oinfo)
{
@@ -217,18 +208,6 @@ static inline void osc_pack_req_body(struct ptlrpc_request *req,
lustre_set_wire_obdo(&req->rq_import->imp_connect_data, &body->oa,
oinfo->oi_oa);
- osc_pack_capa(req, body, oinfo->oi_capa);
-}
-
-static inline void osc_set_capa_size(struct ptlrpc_request *req,
- const struct req_msg_field *field,
- struct obd_capa *oc)
-{
- if (oc == NULL)
- req_capsule_set_size(&req->rq_pill, field, RCL_CLIENT, 0);
- else
- /* it is already calculated as sizeof struct obd_capa */
- ;
}
static int osc_getattr_interpret(const struct lu_env *env,
@@ -270,7 +249,6 @@ static int osc_getattr_async(struct obd_export *exp, struct obd_info *oinfo,
if (req == NULL)
return -ENOMEM;
- osc_set_capa_size(req, &RMF_CAPA1, oinfo->oi_capa);
rc = ptlrpc_request_pack(req, LUSTRE_OST_VERSION, OST_GETATTR);
if (rc) {
ptlrpc_request_free(req);
@@ -301,7 +279,6 @@ static int osc_getattr(const struct lu_env *env, struct obd_export *exp,
if (req == NULL)
return -ENOMEM;
- osc_set_capa_size(req, &RMF_CAPA1, oinfo->oi_capa);
rc = ptlrpc_request_pack(req, LUSTRE_OST_VERSION, OST_GETATTR);
if (rc) {
ptlrpc_request_free(req);
@@ -347,7 +324,6 @@ static int osc_setattr(const struct lu_env *env, struct obd_export *exp,
if (req == NULL)
return -ENOMEM;
- osc_set_capa_size(req, &RMF_CAPA1, oinfo->oi_capa);
rc = ptlrpc_request_pack(req, LUSTRE_OST_VERSION, OST_SETATTR);
if (rc) {
ptlrpc_request_free(req);
@@ -411,7 +387,6 @@ int osc_setattr_async_base(struct obd_export *exp, struct obd_info *oinfo,
if (req == NULL)
return -ENOMEM;
- osc_set_capa_size(req, &RMF_CAPA1, oinfo->oi_capa);
rc = ptlrpc_request_pack(req, LUSTRE_OST_VERSION, OST_SETATTR);
if (rc) {
ptlrpc_request_free(req);
@@ -428,19 +403,19 @@ int osc_setattr_async_base(struct obd_export *exp, struct obd_info *oinfo,
/* do mds to ost setattr asynchronously */
if (!rqset) {
/* Do not wait for response. */
- ptlrpcd_add_req(req, PDL_POLICY_ROUND, -1);
+ ptlrpcd_add_req(req);
} else {
req->rq_interpret_reply =
(ptlrpc_interpterer_t)osc_setattr_interpret;
- CLASSERT (sizeof(*sa) <= sizeof(req->rq_async_args));
+ CLASSERT(sizeof(*sa) <= sizeof(req->rq_async_args));
sa = ptlrpc_req_async_args(req);
sa->sa_oa = oinfo->oi_oa;
sa->sa_upcall = upcall;
sa->sa_cookie = cookie;
if (rqset == PTLRPCD_SET)
- ptlrpcd_add_req(req, PDL_POLICY_ROUND, -1);
+ ptlrpcd_add_req(req);
else
ptlrpc_set_add_req(rqset, req);
}
@@ -557,7 +532,6 @@ int osc_punch_base(struct obd_export *exp, struct obd_info *oinfo,
if (req == NULL)
return -ENOMEM;
- osc_set_capa_size(req, &RMF_CAPA1, oinfo->oi_capa);
rc = ptlrpc_request_pack(req, LUSTRE_OST_VERSION, OST_PUNCH);
if (rc) {
ptlrpc_request_free(req);
@@ -570,18 +544,17 @@ int osc_punch_base(struct obd_export *exp, struct obd_info *oinfo,
LASSERT(body);
lustre_set_wire_obdo(&req->rq_import->imp_connect_data, &body->oa,
oinfo->oi_oa);
- osc_pack_capa(req, body, oinfo->oi_capa);
ptlrpc_request_set_replen(req);
req->rq_interpret_reply = (ptlrpc_interpterer_t)osc_setattr_interpret;
- CLASSERT (sizeof(*sa) <= sizeof(req->rq_async_args));
+ CLASSERT(sizeof(*sa) <= sizeof(req->rq_async_args));
sa = ptlrpc_req_async_args(req);
sa->sa_oa = oinfo->oi_oa;
sa->sa_upcall = upcall;
sa->sa_cookie = cookie;
if (rqset == PTLRPCD_SET)
- ptlrpcd_add_req(req, PDL_POLICY_ROUND, -1);
+ ptlrpcd_add_req(req);
else
ptlrpc_set_add_req(rqset, req);
@@ -600,7 +573,7 @@ static int osc_sync_interpret(const struct lu_env *env,
body = req_capsule_server_get(&req->rq_pill, &RMF_OST_BODY);
if (body == NULL) {
- CERROR ("can't unpack ost_body\n");
+ CERROR("can't unpack ost_body\n");
rc = -EPROTO;
goto out;
}
@@ -624,7 +597,6 @@ int osc_sync_base(struct obd_export *exp, struct obd_info *oinfo,
if (req == NULL)
return -ENOMEM;
- osc_set_capa_size(req, &RMF_CAPA1, oinfo->oi_capa);
rc = ptlrpc_request_pack(req, LUSTRE_OST_VERSION, OST_SYNC);
if (rc) {
ptlrpc_request_free(req);
@@ -636,7 +608,6 @@ int osc_sync_base(struct obd_export *exp, struct obd_info *oinfo,
LASSERT(body);
lustre_set_wire_obdo(&req->rq_import->imp_connect_data, &body->oa,
oinfo->oi_oa);
- osc_pack_capa(req, body, oinfo->oi_capa);
ptlrpc_request_set_replen(req);
req->rq_interpret_reply = osc_sync_interpret;
@@ -648,7 +619,7 @@ int osc_sync_base(struct obd_export *exp, struct obd_info *oinfo,
fa->fa_cookie = cookie;
if (rqset == PTLRPCD_SET)
- ptlrpcd_add_req(req, PDL_POLICY_ROUND, -1);
+ ptlrpcd_add_req(req);
else
ptlrpc_set_add_req(rqset, req);
@@ -754,8 +725,7 @@ int osc_create(const struct lu_env *env, struct obd_export *exp,
* cookies to the MDS after committing destroy transactions. */
static int osc_destroy(const struct lu_env *env, struct obd_export *exp,
struct obdo *oa, struct lov_stripe_md *ea,
- struct obd_trans_info *oti, struct obd_export *md_export,
- void *capa)
+ struct obd_trans_info *oti, struct obd_export *md_export)
{
struct client_obd *cli = &exp->exp_obd->u.cli;
struct ptlrpc_request *req;
@@ -777,7 +747,6 @@ static int osc_destroy(const struct lu_env *env, struct obd_export *exp,
return -ENOMEM;
}
- osc_set_capa_size(req, &RMF_CAPA1, (struct obd_capa *)capa);
rc = ldlm_prep_elc_req(exp, req, LUSTRE_OST_VERSION, OST_DESTROY,
0, &cancels, count);
if (rc) {
@@ -794,7 +763,6 @@ static int osc_destroy(const struct lu_env *env, struct obd_export *exp,
LASSERT(body);
lustre_set_wire_obdo(&req->rq_import->imp_connect_data, &body->oa, oa);
- osc_pack_capa(req, body, (struct obd_capa *)capa);
ptlrpc_request_set_replen(req);
/* If osc_destroy is for destroying the unlink orphan,
@@ -817,7 +785,7 @@ static int osc_destroy(const struct lu_env *env, struct obd_export *exp,
}
/* Do not wait for response */
- ptlrpcd_add_req(req, PDL_POLICY_ROUND, -1);
+ ptlrpcd_add_req(req);
return 0;
}
@@ -910,7 +878,7 @@ static int osc_shrink_grant_interpret(const struct lu_env *env,
LASSERT(body);
osc_update_grant(cli, body);
out:
- OBDO_FREE(oa);
+ kmem_cache_free(obdo_cachep, oa);
return rc;
}
@@ -1100,7 +1068,7 @@ static void handle_short_read(int nob_read, u32 page_count,
/* skip bytes read OK */
while (nob_read > 0) {
- LASSERT (page_count > 0);
+ LASSERT(page_count > 0);
if (pga[i]->count > nob_read) {
/* EOF inside this page */
@@ -1210,6 +1178,7 @@ static u32 osc_checksum_bulk(int nob, u32 pg_count,
OBD_FAIL_CHECK(OBD_FAIL_OSC_CHECKSUM_RECEIVE)) {
unsigned char *ptr = kmap(pga[i]->pg);
int off = pga[i]->off & ~CFS_PAGE_MASK;
+
memcpy(ptr + off, "bad1", min(4, nob));
kunmap(pga[i]->pg);
}
@@ -1247,7 +1216,7 @@ static int osc_brw_prep_request(int cmd, struct client_obd *cli,
struct lov_stripe_md *lsm, u32 page_count,
struct brw_page **pga,
struct ptlrpc_request **reqp,
- struct obd_capa *ocapa, int reserve,
+ int reserve,
int resend)
{
struct ptlrpc_request *req;
@@ -1268,7 +1237,7 @@ static int osc_brw_prep_request(int cmd, struct client_obd *cli,
if ((cmd & OBD_BRW_WRITE) != 0) {
opc = OST_WRITE;
req = ptlrpc_request_alloc_pool(cli->cl_import,
- cli->cl_import->imp_rq_pool,
+ osc_rq_pool,
&RQF_OST_BRW_WRITE);
} else {
opc = OST_READ;
@@ -1287,7 +1256,6 @@ static int osc_brw_prep_request(int cmd, struct client_obd *cli,
sizeof(*ioobj));
req_capsule_set_size(pill, &RMF_NIOBUF_REMOTE, RCL_CLIENT,
niocount * sizeof(*niobuf));
- osc_set_capa_size(req, &RMF_CAPA1, ocapa);
rc = ptlrpc_request_pack(req, LUSTRE_OST_VERSION, opc);
if (rc) {
@@ -1326,7 +1294,6 @@ static int osc_brw_prep_request(int cmd, struct client_obd *cli,
* "max - 1" for old client compatibility sending "0", and also so the
* the actual maximum is a power-of-two number, not one less. LU-1431 */
ioobj_max_brw_set(ioobj, desc->bd_md_max_brw);
- osc_pack_capa(req, body, ocapa);
LASSERT(page_count > 0);
pg_prev = pga[0];
for (requested_nob = i = 0; i < page_count; i++, niobuf++) {
@@ -1435,8 +1402,6 @@ static int osc_brw_prep_request(int cmd, struct client_obd *cli,
aa->aa_ppga = pga;
aa->aa_cli = cli;
INIT_LIST_HEAD(&aa->aa_oaps);
- if (ocapa && reserve)
- aa->aa_ocapa = capa_get(ocapa);
*reqp = req;
return 0;
@@ -1571,7 +1536,7 @@ static int osc_brw_fini_request(struct ptlrpc_request *req, int rc)
}
if (rc != req->rq_bulk->bd_nob_transferred) {
- CERROR ("Unexpected rc %d (%d transferred)\n",
+ CERROR("Unexpected rc %d (%d transferred)\n",
rc, req->rq_bulk->bd_nob_transferred);
return -EPROTO;
}
@@ -1582,19 +1547,17 @@ static int osc_brw_fini_request(struct ptlrpc_request *req, int rc)
if (body->oa.o_valid & OBD_MD_FLCKSUM) {
static int cksum_counter;
__u32 server_cksum = body->oa.o_cksum;
- char *via;
- char *router;
+ char *via = "";
+ char *router = "";
cksum_type_t cksum_type;
- cksum_type = cksum_type_unpack(body->oa.o_valid &OBD_MD_FLFLAGS?
+ cksum_type = cksum_type_unpack(body->oa.o_valid&OBD_MD_FLFLAGS ?
body->oa.o_flags : 0);
client_cksum = osc_checksum_bulk(rc, aa->aa_page_count,
aa->aa_ppga, OST_READ,
cksum_type);
- if (peer->nid == req->rq_bulk->bd_sender) {
- via = router = "";
- } else {
+ if (peer->nid != req->rq_bulk->bd_sender) {
via = " via ";
router = libcfs_nid2str(req->rq_bulk->bd_sender);
}
@@ -1654,11 +1617,11 @@ static int osc_brw_redo_request(struct ptlrpc_request *request,
"redo for recoverable error %d", rc);
rc = osc_brw_prep_request(lustre_msg_get_opc(request->rq_reqmsg) ==
- OST_WRITE ? OBD_BRW_WRITE :OBD_BRW_READ,
+ OST_WRITE ? OBD_BRW_WRITE : OBD_BRW_READ,
aa->aa_cli, aa->aa_oa,
NULL /* lsm unused by osc currently */,
aa->aa_page_count, aa->aa_ppga,
- &new_req, aa->aa_ocapa, 0, 1);
+ &new_req, 0, 1);
if (rc)
return rc;
@@ -1681,9 +1644,9 @@ static int osc_brw_redo_request(struct ptlrpc_request *request,
/* cap resend delay to the current request timeout, this is similar to
* what ptlrpc does (see after_reply()) */
if (aa->aa_resends > new_req->rq_timeout)
- new_req->rq_sent = get_seconds() + new_req->rq_timeout;
+ new_req->rq_sent = ktime_get_real_seconds() + new_req->rq_timeout;
else
- new_req->rq_sent = get_seconds() + aa->aa_resends;
+ new_req->rq_sent = ktime_get_real_seconds() + aa->aa_resends;
new_req->rq_generation_set = 1;
new_req->rq_import_generation = request->rq_import_generation;
@@ -1702,14 +1665,11 @@ static int osc_brw_redo_request(struct ptlrpc_request *request,
}
}
- new_aa->aa_ocapa = aa->aa_ocapa;
- aa->aa_ocapa = NULL;
-
/* XXX: This code will run into problem if we're going to support
* to add a series of BRW RPCs into a self-defined ptlrpc_request_set
* and wait for all of them to be finished. We should inherit request
* set from old request. */
- ptlrpcd_add_req(new_req, PDL_POLICY_SAME, -1);
+ ptlrpcd_add_req(new_req);
DEBUG_REQ(D_INFO, new_req, "new request");
return 0;
@@ -1786,11 +1746,6 @@ static int brw_interpret(const struct lu_env *env,
rc = -EIO;
}
- if (aa->aa_ocapa) {
- capa_put(aa->aa_ocapa);
- aa->aa_ocapa = NULL;
- }
-
list_for_each_entry_safe(ext, tmp, &aa->aa_exts, oe_link) {
if (obj == NULL && rc == 0) {
obj = osc2cl(ext->oe_obj);
@@ -1832,7 +1787,7 @@ static int brw_interpret(const struct lu_env *env,
}
cl_object_put(env, obj);
}
- OBDO_FREE(aa->aa_oa);
+ kmem_cache_free(obdo_cachep, aa->aa_oa);
cl_req_completion(env, aa->aa_clerq, rc < 0 ? rc :
req->rq_bulk->bd_nob_transferred);
@@ -1850,7 +1805,7 @@ static int brw_interpret(const struct lu_env *env,
osc_wake_cache_waiters(cli);
client_obd_list_unlock(&cli->cl_loi_list_lock);
- osc_io_unplug(env, cli, NULL, PDL_POLICY_SAME);
+ osc_io_unplug(env, cli, NULL);
return rc;
}
@@ -1860,7 +1815,7 @@ static int brw_interpret(const struct lu_env *env,
* Extents in the list must be in OES_RPC state.
*/
int osc_build_rpc(const struct lu_env *env, struct client_obd *cli,
- struct list_head *ext_list, int cmd, pdl_policy_t pol)
+ struct list_head *ext_list, int cmd)
{
struct ptlrpc_request *req = NULL;
struct osc_extent *ext;
@@ -1920,7 +1875,7 @@ int osc_build_rpc(const struct lu_env *env, struct client_obd *cli,
goto out;
}
- OBDO_ALLOC(oa);
+ oa = kmem_cache_alloc(obdo_cachep, GFP_NOFS | __GFP_ZERO);
if (oa == NULL) {
rc = -ENOMEM;
goto out;
@@ -1929,6 +1884,7 @@ int osc_build_rpc(const struct lu_env *env, struct client_obd *cli,
i = 0;
list_for_each_entry(oap, &rpc_list, oap_rpc_item) {
struct cl_page *page = oap2cl_page(oap);
+
if (clerq == NULL) {
clerq = cl_req_alloc(env, page, crt,
1 /* only 1-object rpcs for now */);
@@ -1966,7 +1922,7 @@ int osc_build_rpc(const struct lu_env *env, struct client_obd *cli,
sort_brw_pages(pga, page_count);
rc = osc_brw_prep_request(cmd, cli, oa, NULL, page_count,
- pga, &req, crattr->cra_capa, 1, 0);
+ pga, &req, 1, 0);
if (rc != 0) {
CERROR("prep_req failed: %d\n", rc);
goto out;
@@ -2034,35 +1990,20 @@ int osc_build_rpc(const struct lu_env *env, struct client_obd *cli,
page_count, aa, cli->cl_r_in_flight,
cli->cl_w_in_flight);
- /* XXX: Maybe the caller can check the RPC bulk descriptor to
- * see which CPU/NUMA node the majority of pages were allocated
- * on, and try to assign the async RPC to the CPU core
- * (PDL_POLICY_PREFERRED) to reduce cross-CPU memory traffic.
- *
- * But on the other hand, we expect that multiple ptlrpcd
- * threads and the initial write sponsor can run in parallel,
- * especially when data checksum is enabled, which is CPU-bound
- * operation and single ptlrpcd thread cannot process in time.
- * So more ptlrpcd threads sharing BRW load
- * (with PDL_POLICY_ROUND) seems better.
- */
- ptlrpcd_add_req(req, pol, -1);
+ ptlrpcd_add_req(req);
rc = 0;
out:
if (mem_tight != 0)
cfs_memory_pressure_restore(mpflag);
- if (crattr != NULL) {
- capa_put(crattr->cra_capa);
- kfree(crattr);
- }
+ kfree(crattr);
if (rc != 0) {
LASSERT(req == NULL);
if (oa)
- OBDO_FREE(oa);
+ kmem_cache_free(obdo_cachep, oa);
kfree(pga);
/* this should happen rarely and is pretty bad, it makes the
* pending list not follow the dirty order */
@@ -2149,6 +2090,7 @@ static int osc_enqueue_fini(struct ptlrpc_request *req, struct ost_lvb *lvb,
/* The request was created before ldlm_cli_enqueue call. */
if (rc == ELDLM_LOCK_ABORTED) {
struct ldlm_reply *rep;
+
rep = req_capsule_server_get(&req->rq_pill,
&RMF_DLM_REP);
@@ -2335,6 +2277,7 @@ int osc_enqueue_base(struct obd_export *exp, struct ldlm_res_id *res_id,
no_match:
if (intent) {
LIST_HEAD(cancels);
+
req = ptlrpc_request_alloc(class_exp2cliimp(exp),
&RQF_LDLM_ENQUEUE_LVB);
if (req == NULL)
@@ -2359,6 +2302,7 @@ int osc_enqueue_base(struct obd_export *exp, struct ldlm_res_id *res_id,
if (rqset) {
if (!rc) {
struct osc_enqueue_args *aa;
+
CLASSERT (sizeof(*aa) <= sizeof(req->rq_async_args));
aa = ptlrpc_req_async_args(req);
aa->oa_ei = einfo;
@@ -2373,7 +2317,7 @@ int osc_enqueue_base(struct obd_export *exp, struct ldlm_res_id *res_id,
req->rq_interpret_reply =
(ptlrpc_interpterer_t)osc_enqueue_interpret;
if (rqset == PTLRPCD_SET)
- ptlrpcd_add_req(req, PDL_POLICY_ROUND, -1);
+ ptlrpcd_add_req(req);
else
ptlrpc_set_add_req(rqset, req);
} else if (intent) {
@@ -2645,7 +2589,6 @@ static int osc_getstripe(struct lov_stripe_md *lsm, struct lov_user_md *lump)
return rc;
}
-
static int osc_iocontrol(unsigned int cmd, struct obd_export *exp, int len,
void *karg, void *uarg)
{
@@ -2720,7 +2663,7 @@ static int osc_iocontrol(unsigned int cmd, struct obd_export *exp, int len,
data->ioc_offset);
goto out;
case OBD_IOC_POLL_QUOTACHECK:
- err = osc_quota_poll_check(exp, (struct if_quotacheck *)karg);
+ err = osc_quota_poll_check(exp, karg);
goto out;
case OBD_IOC_PING_TARGET:
err = ptlrpc_obd_ping(obd);
@@ -2787,8 +2730,7 @@ static int osc_get_info(const struct lu_env *env, struct obd_export *exp,
ptlrpc_req_finished(req);
return rc;
} else if (KEY_IS(KEY_FIEMAP)) {
- struct ll_fiemap_info_key *fm_key =
- (struct ll_fiemap_info_key *)key;
+ struct ll_fiemap_info_key *fm_key = key;
struct ldlm_res_id res_id;
ldlm_policy_data_t policy;
struct lustre_handle lockh;
@@ -2910,7 +2852,7 @@ static int osc_set_info_async(const struct lu_env *env, struct obd_export *exp,
struct client_obd *cli = &obd->u.cli;
LASSERT(cli->cl_cache == NULL); /* only once */
- cli->cl_cache = (struct cl_client_cache *)val;
+ cli->cl_cache = val;
atomic_inc(&cli->cl_cache->ccc_users);
cli->cl_lru_left = &cli->cl_cache->ccc_lru_left;
@@ -2973,7 +2915,7 @@ static int osc_set_info_async(const struct lu_env *env, struct obd_export *exp,
CLASSERT(sizeof(*aa) <= sizeof(req->rq_async_args));
aa = ptlrpc_req_async_args(req);
- OBDO_ALLOC(oa);
+ oa = kmem_cache_alloc(obdo_cachep, GFP_NOFS | __GFP_ZERO);
if (!oa) {
ptlrpc_req_finished(req);
return -ENOMEM;
@@ -2988,8 +2930,9 @@ static int osc_set_info_async(const struct lu_env *env, struct obd_export *exp,
LASSERT(set != NULL);
ptlrpc_set_add_req(set, req);
ptlrpc_check_set(NULL, set);
- } else
- ptlrpcd_add_req(req, PDL_POLICY_ROUND, -1);
+ } else {
+ ptlrpcd_add_req(req);
+ }
return 0;
}
@@ -3081,7 +3024,7 @@ static int osc_import_event(struct obd_device *obd,
cli = &obd->u.cli;
/* all pages go to failing rpcs due to the invalid
* import */
- osc_io_unplug(env, cli, NULL, PDL_POLICY_ROUND);
+ osc_io_unplug(env, cli, NULL);
ldlm_namespace_cleanup(ns, LDLM_FL_LOCAL_ONLY);
cl_env_put(env, &refcheck);
@@ -3101,7 +3044,7 @@ static int osc_import_event(struct obd_device *obd,
/* See bug 7198 */
if (ocd->ocd_connect_flags & OBD_CONNECT_REQPORTAL)
- imp->imp_client->cli_request_portal =OST_REQUEST_PORTAL;
+ imp->imp_client->cli_request_portal = OST_REQUEST_PORTAL;
rc = obd_notify_observer(obd, obd, OBD_NOTIFY_OCD, NULL);
break;
@@ -3153,7 +3096,7 @@ static int brw_queue_work(const struct lu_env *env, void *data)
CDEBUG(D_CACHE, "Run writeback work for client obd %p.\n", cli);
- osc_io_unplug(env, cli, NULL, PDL_POLICY_SAME);
+ osc_io_unplug(env, cli, NULL);
return 0;
}
@@ -3163,6 +3106,9 @@ int osc_setup(struct obd_device *obd, struct lustre_cfg *lcfg)
struct client_obd *cli = &obd->u.cli;
void *handler;
int rc;
+ int adding;
+ int added;
+ int req_count;
rc = ptlrpcd_addref();
if (rc)
@@ -3191,15 +3137,20 @@ int osc_setup(struct obd_device *obd, struct lustre_cfg *lcfg)
ptlrpc_lprocfs_register_obd(obd);
}
- /* We need to allocate a few requests more, because
- * brw_interpret tries to create new requests before freeing
- * previous ones, Ideally we want to have 2x max_rpcs_in_flight
- * reserved, but I'm afraid that might be too much wasted RAM
- * in fact, so 2 is just my guess and still should work. */
- cli->cl_import->imp_rq_pool =
- ptlrpc_init_rq_pool(cli->cl_max_rpcs_in_flight + 2,
- OST_MAXREQSIZE,
- ptlrpc_add_rqs_to_pool);
+ /*
+ * We try to control the total number of requests with a upper limit
+ * osc_reqpool_maxreqcount. There might be some race which will cause
+ * over-limit allocation, but it is fine.
+ */
+ req_count = atomic_read(&osc_pool_req_count);
+ if (req_count < osc_reqpool_maxreqcount) {
+ adding = cli->cl_max_rpcs_in_flight + 2;
+ if (req_count + adding > osc_reqpool_maxreqcount)
+ adding = osc_reqpool_maxreqcount - req_count;
+
+ added = ptlrpc_add_rqs_to_pool(osc_rq_pool, adding);
+ atomic_add(added, &osc_pool_req_count);
+ }
INIT_LIST_HEAD(&cli->cl_grant_shrink_list);
ns_register_cancel(obd->obd_namespace, osc_cancel_for_recovery);
@@ -3219,6 +3170,7 @@ static int osc_precleanup(struct obd_device *obd, enum obd_cleanup_stage stage)
switch (stage) {
case OBD_CLEANUP_EARLY: {
struct obd_import *imp;
+
imp = obd->u.cli.cl_import;
CDEBUG(D_HA, "Deactivating import %s\n", obd->obd_name);
/* ptlrpc_abort_inflight to stop an mds_lov_synchronize */
@@ -3339,6 +3291,8 @@ extern struct lock_class_key osc_ast_guard_class;
static int __init osc_init(void)
{
struct lprocfs_static_vars lvars = { NULL };
+ unsigned int reqpool_size;
+ unsigned int reqsize;
int rc;
/* print an address of _any_ initialized kernel symbol from this
@@ -3354,14 +3308,45 @@ static int __init osc_init(void)
rc = class_register_type(&osc_obd_ops, NULL,
LUSTRE_OSC_NAME, &osc_device_type);
- if (rc) {
- lu_kmem_fini(osc_caches);
- return rc;
- }
+ if (rc)
+ goto out_kmem;
spin_lock_init(&osc_ast_guard);
lockdep_set_class(&osc_ast_guard, &osc_ast_guard_class);
+ /* This is obviously too much memory, only prevent overflow here */
+ if (osc_reqpool_mem_max >= 1 << 12 || osc_reqpool_mem_max == 0) {
+ rc = -EINVAL;
+ goto out_type;
+ }
+
+ reqpool_size = osc_reqpool_mem_max << 20;
+
+ reqsize = 1;
+ while (reqsize < OST_MAXREQSIZE)
+ reqsize = reqsize << 1;
+
+ /*
+ * We don't enlarge the request count in OSC pool according to
+ * cl_max_rpcs_in_flight. The allocation from the pool will only be
+ * tried after normal allocation failed. So a small OSC pool won't
+ * cause much performance degression in most of cases.
+ */
+ osc_reqpool_maxreqcount = reqpool_size / reqsize;
+
+ atomic_set(&osc_pool_req_count, 0);
+ osc_rq_pool = ptlrpc_init_rq_pool(0, OST_MAXREQSIZE,
+ ptlrpc_add_rqs_to_pool);
+
+ if (osc_rq_pool)
+ return 0;
+
+ rc = -ENOMEM;
+
+out_type:
+ class_unregister_type(LUSTRE_OSC_NAME);
+out_kmem:
+ lu_kmem_fini(osc_caches);
return rc;
}
@@ -3369,6 +3354,7 @@ static void /*__exit*/ osc_exit(void)
{
class_unregister_type(LUSTRE_OSC_NAME);
lu_kmem_fini(osc_caches);
+ ptlrpc_free_rq_pool(osc_rq_pool);
}
MODULE_AUTHOR("Sun Microsystems, Inc. <http://www.lustre.org/>");