aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/staging/lustre/lustre/obdclass
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/staging/lustre/lustre/obdclass')
-rw-r--r--drivers/staging/lustre/lustre/obdclass/cl_io.c430
-rw-r--r--drivers/staging/lustre/lustre/obdclass/cl_lock.c2086
-rw-r--r--drivers/staging/lustre/lustre/obdclass/cl_object.c303
-rw-r--r--drivers/staging/lustre/lustre/obdclass/cl_page.c659
-rw-r--r--drivers/staging/lustre/lustre/obdclass/class_obd.c5
-rw-r--r--drivers/staging/lustre/lustre/obdclass/debug.c4
-rw-r--r--drivers/staging/lustre/lustre/obdclass/genops.c1
-rw-r--r--drivers/staging/lustre/lustre/obdclass/linux/linux-module.c4
-rw-r--r--drivers/staging/lustre/lustre/obdclass/llog.c1
-rw-r--r--drivers/staging/lustre/lustre/obdclass/lprocfs_status.c72
-rw-r--r--drivers/staging/lustre/lustre/obdclass/lu_object.c9
-rw-r--r--drivers/staging/lustre/lustre/obdclass/lustre_peer.c3
-rw-r--r--drivers/staging/lustre/lustre/obdclass/obd_config.c26
-rw-r--r--drivers/staging/lustre/lustre/obdclass/obd_mount.c15
-rw-r--r--drivers/staging/lustre/lustre/obdclass/obdo.c3
15 files changed, 628 insertions, 2993 deletions
diff --git a/drivers/staging/lustre/lustre/obdclass/cl_io.c b/drivers/staging/lustre/lustre/obdclass/cl_io.c
index f5128b4f176f..583fb5f33889 100644
--- a/drivers/staging/lustre/lustre/obdclass/cl_io.c
+++ b/drivers/staging/lustre/lustre/obdclass/cl_io.c
@@ -36,6 +36,7 @@
* Client IO.
*
* Author: Nikita Danilov <nikita.danilov@sun.com>
+ * Author: Jinshan Xiong <jinshan.xiong@intel.com>
*/
#define DEBUG_SUBSYSTEM S_CLASS
@@ -132,6 +133,7 @@ void cl_io_fini(const struct lu_env *env, struct cl_io *io)
case CIT_WRITE:
break;
case CIT_FAULT:
+ break;
case CIT_FSYNC:
LASSERT(!io->ci_need_restart);
break;
@@ -159,7 +161,6 @@ static int cl_io_init0(const struct lu_env *env, struct cl_io *io,
io->ci_type = iot;
INIT_LIST_HEAD(&io->ci_lockset.cls_todo);
- INIT_LIST_HEAD(&io->ci_lockset.cls_curr);
INIT_LIST_HEAD(&io->ci_lockset.cls_done);
INIT_LIST_HEAD(&io->ci_layers);
@@ -241,37 +242,7 @@ static int cl_lock_descr_sort(const struct cl_lock_descr *d0,
const struct cl_lock_descr *d1)
{
return lu_fid_cmp(lu_object_fid(&d0->cld_obj->co_lu),
- lu_object_fid(&d1->cld_obj->co_lu)) ?:
- __diff_normalize(d0->cld_start, d1->cld_start);
-}
-
-static int cl_lock_descr_cmp(const struct cl_lock_descr *d0,
- const struct cl_lock_descr *d1)
-{
- int ret;
-
- ret = lu_fid_cmp(lu_object_fid(&d0->cld_obj->co_lu),
- lu_object_fid(&d1->cld_obj->co_lu));
- if (ret)
- return ret;
- if (d0->cld_end < d1->cld_start)
- return -1;
- if (d0->cld_start > d0->cld_end)
- return 1;
- return 0;
-}
-
-static void cl_lock_descr_merge(struct cl_lock_descr *d0,
- const struct cl_lock_descr *d1)
-{
- d0->cld_start = min(d0->cld_start, d1->cld_start);
- d0->cld_end = max(d0->cld_end, d1->cld_end);
-
- if (d1->cld_mode == CLM_WRITE && d0->cld_mode != CLM_WRITE)
- d0->cld_mode = CLM_WRITE;
-
- if (d1->cld_mode == CLM_GROUP && d0->cld_mode != CLM_GROUP)
- d0->cld_mode = CLM_GROUP;
+ lu_object_fid(&d1->cld_obj->co_lu));
}
/*
@@ -320,33 +291,35 @@ static void cl_io_locks_sort(struct cl_io *io)
} while (!done);
}
-/**
- * Check whether \a queue contains locks matching \a need.
- *
- * \retval +ve there is a matching lock in the \a queue
- * \retval 0 there are no matching locks in the \a queue
- */
-int cl_queue_match(const struct list_head *queue,
- const struct cl_lock_descr *need)
+static void cl_lock_descr_merge(struct cl_lock_descr *d0,
+ const struct cl_lock_descr *d1)
{
- struct cl_io_lock_link *scan;
+ d0->cld_start = min(d0->cld_start, d1->cld_start);
+ d0->cld_end = max(d0->cld_end, d1->cld_end);
- list_for_each_entry(scan, queue, cill_linkage) {
- if (cl_lock_descr_match(&scan->cill_descr, need))
- return 1;
- }
- return 0;
+ if (d1->cld_mode == CLM_WRITE && d0->cld_mode != CLM_WRITE)
+ d0->cld_mode = CLM_WRITE;
+
+ if (d1->cld_mode == CLM_GROUP && d0->cld_mode != CLM_GROUP)
+ d0->cld_mode = CLM_GROUP;
}
-EXPORT_SYMBOL(cl_queue_match);
-static int cl_queue_merge(const struct list_head *queue,
- const struct cl_lock_descr *need)
+static int cl_lockset_merge(const struct cl_lockset *set,
+ const struct cl_lock_descr *need)
{
struct cl_io_lock_link *scan;
- list_for_each_entry(scan, queue, cill_linkage) {
- if (cl_lock_descr_cmp(&scan->cill_descr, need))
+ list_for_each_entry(scan, &set->cls_todo, cill_linkage) {
+ if (!cl_object_same(scan->cill_descr.cld_obj, need->cld_obj))
continue;
+
+ /* Merge locks for the same object because ldlm lock server
+ * may expand the lock extent, otherwise there is a deadlock
+ * case if two conflicted locks are queueud for the same object
+ * and lock server expands one lock to overlap the another.
+ * The side effect is that it can generate a multi-stripe lock
+ * that may cause casacading problem
+ */
cl_lock_descr_merge(&scan->cill_descr, need);
CDEBUG(D_VFSTRACE, "lock: %d: [%lu, %lu]\n",
scan->cill_descr.cld_mode, scan->cill_descr.cld_start,
@@ -356,87 +329,20 @@ static int cl_queue_merge(const struct list_head *queue,
return 0;
}
-static int cl_lockset_match(const struct cl_lockset *set,
- const struct cl_lock_descr *need)
-{
- return cl_queue_match(&set->cls_curr, need) ||
- cl_queue_match(&set->cls_done, need);
-}
-
-static int cl_lockset_merge(const struct cl_lockset *set,
- const struct cl_lock_descr *need)
-{
- return cl_queue_merge(&set->cls_todo, need) ||
- cl_lockset_match(set, need);
-}
-
-static int cl_lockset_lock_one(const struct lu_env *env,
- struct cl_io *io, struct cl_lockset *set,
- struct cl_io_lock_link *link)
-{
- struct cl_lock *lock;
- int result;
-
- lock = cl_lock_request(env, io, &link->cill_descr, "io", io);
-
- if (!IS_ERR(lock)) {
- link->cill_lock = lock;
- list_move(&link->cill_linkage, &set->cls_curr);
- if (!(link->cill_descr.cld_enq_flags & CEF_ASYNC)) {
- result = cl_wait(env, lock);
- if (result == 0)
- list_move(&link->cill_linkage, &set->cls_done);
- } else
- result = 0;
- } else
- result = PTR_ERR(lock);
- return result;
-}
-
-static void cl_lock_link_fini(const struct lu_env *env, struct cl_io *io,
- struct cl_io_lock_link *link)
-{
- struct cl_lock *lock = link->cill_lock;
-
- list_del_init(&link->cill_linkage);
- if (lock) {
- cl_lock_release(env, lock, "io", io);
- link->cill_lock = NULL;
- }
- if (link->cill_fini)
- link->cill_fini(env, link);
-}
-
static int cl_lockset_lock(const struct lu_env *env, struct cl_io *io,
struct cl_lockset *set)
{
struct cl_io_lock_link *link;
struct cl_io_lock_link *temp;
- struct cl_lock *lock;
int result;
result = 0;
list_for_each_entry_safe(link, temp, &set->cls_todo, cill_linkage) {
- if (!cl_lockset_match(set, &link->cill_descr)) {
- /* XXX some locking to guarantee that locks aren't
- * expanded in between.
- */
- result = cl_lockset_lock_one(env, io, set, link);
- if (result != 0)
- break;
- } else
- cl_lock_link_fini(env, io, link);
- }
- if (result == 0) {
- list_for_each_entry_safe(link, temp,
- &set->cls_curr, cill_linkage) {
- lock = link->cill_lock;
- result = cl_wait(env, lock);
- if (result == 0)
- list_move(&link->cill_linkage, &set->cls_done);
- else
- break;
- }
+ result = cl_lock_request(env, io, &link->cill_lock);
+ if (result < 0)
+ break;
+
+ list_move(&link->cill_linkage, &set->cls_done);
}
return result;
}
@@ -492,16 +398,19 @@ void cl_io_unlock(const struct lu_env *env, struct cl_io *io)
set = &io->ci_lockset;
- list_for_each_entry_safe(link, temp, &set->cls_todo, cill_linkage)
- cl_lock_link_fini(env, io, link);
-
- list_for_each_entry_safe(link, temp, &set->cls_curr, cill_linkage)
- cl_lock_link_fini(env, io, link);
+ list_for_each_entry_safe(link, temp, &set->cls_todo, cill_linkage) {
+ list_del_init(&link->cill_linkage);
+ if (link->cill_fini)
+ link->cill_fini(env, link);
+ }
list_for_each_entry_safe(link, temp, &set->cls_done, cill_linkage) {
- cl_unuse(env, link->cill_lock);
- cl_lock_link_fini(env, io, link);
+ list_del_init(&link->cill_linkage);
+ cl_lock_release(env, &link->cill_lock);
+ if (link->cill_fini)
+ link->cill_fini(env, link);
}
+
cl_io_for_each_reverse(scan, io) {
if (scan->cis_iop->op[io->ci_type].cio_unlock)
scan->cis_iop->op[io->ci_type].cio_unlock(env, scan);
@@ -595,9 +504,9 @@ int cl_io_lock_add(const struct lu_env *env, struct cl_io *io,
{
int result;
- if (cl_lockset_merge(&io->ci_lockset, &link->cill_descr))
+ if (cl_lockset_merge(&io->ci_lockset, &link->cill_descr)) {
result = 1;
- else {
+ } else {
list_add(&link->cill_linkage, &io->ci_lockset.cls_todo);
result = 0;
}
@@ -627,8 +536,9 @@ int cl_io_lock_alloc_add(const struct lu_env *env, struct cl_io *io,
result = cl_io_lock_add(env, io, link);
if (result) /* lock match */
link->cill_fini(env, link);
- } else
+ } else {
result = -ENOMEM;
+ }
return result;
}
@@ -692,42 +602,6 @@ cl_io_slice_page(const struct cl_io_slice *ios, struct cl_page *page)
}
/**
- * True iff \a page is within \a io range.
- */
-static int cl_page_in_io(const struct cl_page *page, const struct cl_io *io)
-{
- int result = 1;
- loff_t start;
- loff_t end;
- pgoff_t idx;
-
- idx = page->cp_index;
- switch (io->ci_type) {
- case CIT_READ:
- case CIT_WRITE:
- /*
- * check that [start, end) and [pos, pos + count) extents
- * overlap.
- */
- if (!cl_io_is_append(io)) {
- const struct cl_io_rw_common *crw = &(io->u.ci_rw);
-
- start = cl_offset(page->cp_obj, idx);
- end = cl_offset(page->cp_obj, idx + 1);
- result = crw->crw_pos < end &&
- start < crw->crw_pos + crw->crw_count;
- }
- break;
- case CIT_FAULT:
- result = io->u.ci_fault.ft_index == idx;
- break;
- default:
- LBUG();
- }
- return result;
-}
-
-/**
* Called by read io, when page has to be read from the server.
*
* \see cl_io_operations::cio_read_page()
@@ -742,7 +616,6 @@ int cl_io_read_page(const struct lu_env *env, struct cl_io *io,
LINVRNT(io->ci_type == CIT_READ || io->ci_type == CIT_FAULT);
LINVRNT(cl_page_is_owned(page, io));
LINVRNT(io->ci_state == CIS_IO_GOING || io->ci_state == CIS_LOCKED);
- LINVRNT(cl_page_in_io(page, io));
LINVRNT(cl_io_invariant(io));
queue = &io->ci_queue;
@@ -769,7 +642,7 @@ int cl_io_read_page(const struct lu_env *env, struct cl_io *io,
break;
}
}
- if (result == 0)
+ if (result == 0 && queue->c2_qin.pl_nr > 0)
result = cl_io_submit_rw(env, io, CRT_READ, queue);
/*
* Unlock unsent pages in case of error.
@@ -781,77 +654,29 @@ int cl_io_read_page(const struct lu_env *env, struct cl_io *io,
EXPORT_SYMBOL(cl_io_read_page);
/**
- * Called by write io to prepare page to receive data from user buffer.
+ * Commit a list of contiguous pages into writeback cache.
*
- * \see cl_io_operations::cio_prepare_write()
+ * \returns 0 if all pages committed, or errcode if error occurred.
+ * \see cl_io_operations::cio_commit_async()
*/
-int cl_io_prepare_write(const struct lu_env *env, struct cl_io *io,
- struct cl_page *page, unsigned from, unsigned to)
+int cl_io_commit_async(const struct lu_env *env, struct cl_io *io,
+ struct cl_page_list *queue, int from, int to,
+ cl_commit_cbt cb)
{
const struct cl_io_slice *scan;
int result = 0;
- LINVRNT(io->ci_type == CIT_WRITE);
- LINVRNT(cl_page_is_owned(page, io));
- LINVRNT(io->ci_state == CIS_IO_GOING || io->ci_state == CIS_LOCKED);
- LINVRNT(cl_io_invariant(io));
- LASSERT(cl_page_in_io(page, io));
-
- cl_io_for_each_reverse(scan, io) {
- if (scan->cis_iop->cio_prepare_write) {
- const struct cl_page_slice *slice;
-
- slice = cl_io_slice_page(scan, page);
- result = scan->cis_iop->cio_prepare_write(env, scan,
- slice,
- from, to);
- if (result != 0)
- break;
- }
- }
- return result;
-}
-EXPORT_SYMBOL(cl_io_prepare_write);
-
-/**
- * Called by write io after user data were copied into a page.
- *
- * \see cl_io_operations::cio_commit_write()
- */
-int cl_io_commit_write(const struct lu_env *env, struct cl_io *io,
- struct cl_page *page, unsigned from, unsigned to)
-{
- const struct cl_io_slice *scan;
- int result = 0;
-
- LINVRNT(io->ci_type == CIT_WRITE);
- LINVRNT(io->ci_state == CIS_IO_GOING || io->ci_state == CIS_LOCKED);
- LINVRNT(cl_io_invariant(io));
- /*
- * XXX Uh... not nice. Top level cl_io_commit_write() call (vvp->lov)
- * already called cl_page_cache_add(), moving page into CPS_CACHED
- * state. Better (and more general) way of dealing with such situation
- * is needed.
- */
- LASSERT(cl_page_is_owned(page, io) || page->cp_parent);
- LASSERT(cl_page_in_io(page, io));
-
cl_io_for_each(scan, io) {
- if (scan->cis_iop->cio_commit_write) {
- const struct cl_page_slice *slice;
-
- slice = cl_io_slice_page(scan, page);
- result = scan->cis_iop->cio_commit_write(env, scan,
- slice,
- from, to);
- if (result != 0)
- break;
- }
+ if (!scan->cis_iop->cio_commit_async)
+ continue;
+ result = scan->cis_iop->cio_commit_async(env, scan, queue,
+ from, to, cb);
+ if (result != 0)
+ break;
}
- LINVRNT(result <= 0);
return result;
}
-EXPORT_SYMBOL(cl_io_commit_write);
+EXPORT_SYMBOL(cl_io_commit_async);
/**
* Submits a list of pages for immediate io.
@@ -869,13 +694,10 @@ int cl_io_submit_rw(const struct lu_env *env, struct cl_io *io,
const struct cl_io_slice *scan;
int result = 0;
- LINVRNT(crt < ARRAY_SIZE(scan->cis_iop->req_op));
-
cl_io_for_each(scan, io) {
- if (!scan->cis_iop->req_op[crt].cio_submit)
+ if (!scan->cis_iop->cio_submit)
continue;
- result = scan->cis_iop->req_op[crt].cio_submit(env, scan, crt,
- queue);
+ result = scan->cis_iop->cio_submit(env, scan, crt, queue);
if (result != 0)
break;
}
@@ -887,6 +709,9 @@ int cl_io_submit_rw(const struct lu_env *env, struct cl_io *io,
}
EXPORT_SYMBOL(cl_io_submit_rw);
+static void cl_page_list_assume(const struct lu_env *env,
+ struct cl_io *io, struct cl_page_list *plist);
+
/**
* Submit a sync_io and wait for the IO to be finished, or error happens.
* If \a timeout is zero, it means to wait for the IO unconditionally.
@@ -904,7 +729,7 @@ int cl_io_submit_sync(const struct lu_env *env, struct cl_io *io,
pg->cp_sync_io = anchor;
}
- cl_sync_io_init(anchor, queue->c2_qin.pl_nr);
+ cl_sync_io_init(anchor, queue->c2_qin.pl_nr, &cl_sync_io_end);
rc = cl_io_submit_rw(env, io, iot, queue);
if (rc == 0) {
/*
@@ -915,12 +740,12 @@ int cl_io_submit_sync(const struct lu_env *env, struct cl_io *io,
*/
cl_page_list_for_each(pg, &queue->c2_qin) {
pg->cp_sync_io = NULL;
- cl_sync_io_note(anchor, 1);
+ cl_sync_io_note(env, anchor, 1);
}
/* wait for the IO to be finished. */
- rc = cl_sync_io_wait(env, io, &queue->c2_qout,
- anchor, timeout);
+ rc = cl_sync_io_wait(env, anchor, timeout);
+ cl_page_list_assume(env, io, &queue->c2_qout);
} else {
LASSERT(list_empty(&queue->c2_qout.pl_pages));
cl_page_list_for_each(pg, &queue->c2_qin)
@@ -931,26 +756,6 @@ int cl_io_submit_sync(const struct lu_env *env, struct cl_io *io,
EXPORT_SYMBOL(cl_io_submit_sync);
/**
- * Cancel an IO which has been submitted by cl_io_submit_rw.
- */
-static int cl_io_cancel(const struct lu_env *env, struct cl_io *io,
- struct cl_page_list *queue)
-{
- struct cl_page *page;
- int result = 0;
-
- CERROR("Canceling ongoing page transmission\n");
- cl_page_list_for_each(page, queue) {
- int rc;
-
- LINVRNT(cl_page_in_io(page, io));
- rc = cl_page_cancel(env, page);
- result = result ?: rc;
- }
- return result;
-}
-
-/**
* Main io loop.
*
* Pumps io through iterations calling
@@ -1072,8 +877,8 @@ EXPORT_SYMBOL(cl_page_list_add);
/**
* Removes a page from a page list.
*/
-static void cl_page_list_del(const struct lu_env *env,
- struct cl_page_list *plist, struct cl_page *page)
+void cl_page_list_del(const struct lu_env *env, struct cl_page_list *plist,
+ struct cl_page *page)
{
LASSERT(plist->pl_nr > 0);
LINVRNT(plist->pl_owner == current);
@@ -1086,6 +891,7 @@ static void cl_page_list_del(const struct lu_env *env,
lu_ref_del_at(&page->cp_reference, &page->cp_queue_ref, "queue", plist);
cl_page_put(env, page);
}
+EXPORT_SYMBOL(cl_page_list_del);
/**
* Moves a page from one page list to another.
@@ -1106,6 +912,24 @@ void cl_page_list_move(struct cl_page_list *dst, struct cl_page_list *src,
EXPORT_SYMBOL(cl_page_list_move);
/**
+ * Moves a page from one page list to the head of another list.
+ */
+void cl_page_list_move_head(struct cl_page_list *dst, struct cl_page_list *src,
+ struct cl_page *page)
+{
+ LASSERT(src->pl_nr > 0);
+ LINVRNT(dst->pl_owner == current);
+ LINVRNT(src->pl_owner == current);
+
+ list_move(&page->cp_batch, &dst->pl_pages);
+ --src->pl_nr;
+ ++dst->pl_nr;
+ lu_ref_set_at(&page->cp_reference, &page->cp_queue_ref, "queue",
+ src, dst);
+}
+EXPORT_SYMBOL(cl_page_list_move_head);
+
+/**
* splice the cl_page_list, just as list head does
*/
void cl_page_list_splice(struct cl_page_list *list, struct cl_page_list *head)
@@ -1162,8 +986,7 @@ EXPORT_SYMBOL(cl_page_list_disown);
/**
* Releases pages from queue.
*/
-static void cl_page_list_fini(const struct lu_env *env,
- struct cl_page_list *plist)
+void cl_page_list_fini(const struct lu_env *env, struct cl_page_list *plist)
{
struct cl_page *page;
struct cl_page *temp;
@@ -1174,6 +997,7 @@ static void cl_page_list_fini(const struct lu_env *env,
cl_page_list_del(env, plist, page);
LASSERT(plist->pl_nr == 0);
}
+EXPORT_SYMBOL(cl_page_list_fini);
/**
* Assumes all pages in a queue.
@@ -1260,7 +1084,7 @@ EXPORT_SYMBOL(cl_2queue_init_page);
/**
* Returns top-level io.
*
- * \see cl_object_top(), cl_page_top().
+ * \see cl_object_top()
*/
struct cl_io *cl_io_top(struct cl_io *io)
{
@@ -1323,19 +1147,14 @@ static int cl_req_init(const struct lu_env *env, struct cl_req *req,
int result;
result = 0;
- page = cl_page_top(page);
- do {
- list_for_each_entry(slice, &page->cp_layers, cpl_linkage) {
- dev = lu2cl_dev(slice->cpl_obj->co_lu.lo_dev);
- if (dev->cd_ops->cdo_req_init) {
- result = dev->cd_ops->cdo_req_init(env,
- dev, req);
- if (result != 0)
- break;
- }
+ list_for_each_entry(slice, &page->cp_layers, cpl_linkage) {
+ dev = lu2cl_dev(slice->cpl_obj->co_lu.lo_dev);
+ if (dev->cd_ops->cdo_req_init) {
+ result = dev->cd_ops->cdo_req_init(env, dev, req);
+ if (result != 0)
+ break;
}
- page = page->cp_child;
- } while (page && result == 0);
+ }
return result;
}
@@ -1384,14 +1203,16 @@ struct cl_req *cl_req_alloc(const struct lu_env *env, struct cl_page *page,
if (req->crq_o) {
req->crq_nrobjs = nr_objects;
result = cl_req_init(env, req, page);
- } else
+ } else {
result = -ENOMEM;
+ }
if (result != 0) {
cl_req_completion(env, req, result);
req = ERR_PTR(result);
}
- } else
+ } else {
req = ERR_PTR(-ENOMEM);
+ }
return req;
}
EXPORT_SYMBOL(cl_req_alloc);
@@ -1406,8 +1227,6 @@ void cl_req_page_add(const struct lu_env *env,
struct cl_req_obj *rqo;
int i;
- page = cl_page_top(page);
-
LASSERT(list_empty(&page->cp_flight));
LASSERT(!page->cp_req);
@@ -1438,8 +1257,6 @@ void cl_req_page_done(const struct lu_env *env, struct cl_page *page)
{
struct cl_req *req = page->cp_req;
- page = cl_page_top(page);
-
LASSERT(!list_empty(&page->cp_flight));
LASSERT(req->crq_nrpages > 0);
@@ -1511,25 +1328,39 @@ void cl_req_attr_set(const struct lu_env *env, struct cl_req *req,
}
EXPORT_SYMBOL(cl_req_attr_set);
+/* cl_sync_io_callback assumes the caller must call cl_sync_io_wait() to
+ * wait for the IO to finish.
+ */
+void cl_sync_io_end(const struct lu_env *env, struct cl_sync_io *anchor)
+{
+ wake_up_all(&anchor->csi_waitq);
+
+ /* it's safe to nuke or reuse anchor now */
+ atomic_set(&anchor->csi_barrier, 0);
+}
+EXPORT_SYMBOL(cl_sync_io_end);
/**
- * Initialize synchronous io wait anchor, for transfer of \a nrpages pages.
+ * Initialize synchronous io wait anchor
*/
-void cl_sync_io_init(struct cl_sync_io *anchor, int nrpages)
+void cl_sync_io_init(struct cl_sync_io *anchor, int nr,
+ void (*end)(const struct lu_env *, struct cl_sync_io *))
{
+ memset(anchor, 0, sizeof(*anchor));
init_waitqueue_head(&anchor->csi_waitq);
- atomic_set(&anchor->csi_sync_nr, nrpages);
- atomic_set(&anchor->csi_barrier, nrpages > 0);
+ atomic_set(&anchor->csi_sync_nr, nr);
+ atomic_set(&anchor->csi_barrier, nr > 0);
anchor->csi_sync_rc = 0;
+ anchor->csi_end_io = end;
+ LASSERT(end);
}
EXPORT_SYMBOL(cl_sync_io_init);
/**
- * Wait until all transfer completes. Transfer completion routine has to call
- * cl_sync_io_note() for every page.
+ * Wait until all IO completes. Transfer completion routine has to call
+ * cl_sync_io_note() for every entity.
*/
-int cl_sync_io_wait(const struct lu_env *env, struct cl_io *io,
- struct cl_page_list *queue, struct cl_sync_io *anchor,
+int cl_sync_io_wait(const struct lu_env *env, struct cl_sync_io *anchor,
long timeout)
{
struct l_wait_info lwi = LWI_TIMEOUT_INTR(cfs_time_seconds(timeout),
@@ -1542,11 +1373,9 @@ int cl_sync_io_wait(const struct lu_env *env, struct cl_io *io,
atomic_read(&anchor->csi_sync_nr) == 0,
&lwi);
if (rc < 0) {
- CERROR("SYNC IO failed with error: %d, try to cancel %d remaining pages\n",
+ CERROR("IO failed: %d, still wait for %d remaining entries\n",
rc, atomic_read(&anchor->csi_sync_nr));
- (void)cl_io_cancel(env, io, queue);
-
lwi = (struct l_wait_info) { 0 };
(void)l_wait_event(anchor->csi_waitq,
atomic_read(&anchor->csi_sync_nr) == 0,
@@ -1555,14 +1384,12 @@ int cl_sync_io_wait(const struct lu_env *env, struct cl_io *io,
rc = anchor->csi_sync_rc;
}
LASSERT(atomic_read(&anchor->csi_sync_nr) == 0);
- cl_page_list_assume(env, io, queue);
/* wait until cl_sync_io_note() has done wakeup */
while (unlikely(atomic_read(&anchor->csi_barrier) != 0)) {
cpu_relax();
}
- POISON(anchor, 0x5a, sizeof(*anchor));
return rc;
}
EXPORT_SYMBOL(cl_sync_io_wait);
@@ -1570,7 +1397,8 @@ EXPORT_SYMBOL(cl_sync_io_wait);
/**
* Indicate that transfer of a single page completed.
*/
-void cl_sync_io_note(struct cl_sync_io *anchor, int ioret)
+void cl_sync_io_note(const struct lu_env *env, struct cl_sync_io *anchor,
+ int ioret)
{
if (anchor->csi_sync_rc == 0 && ioret < 0)
anchor->csi_sync_rc = ioret;
@@ -1581,9 +1409,9 @@ void cl_sync_io_note(struct cl_sync_io *anchor, int ioret)
*/
LASSERT(atomic_read(&anchor->csi_sync_nr) > 0);
if (atomic_dec_and_test(&anchor->csi_sync_nr)) {
- wake_up_all(&anchor->csi_waitq);
- /* it's safe to nuke or reuse anchor now */
- atomic_set(&anchor->csi_barrier, 0);
+ LASSERT(anchor->csi_end_io);
+ anchor->csi_end_io(env, anchor);
+ /* Can't access anchor any more */
}
}
EXPORT_SYMBOL(cl_sync_io_note);
diff --git a/drivers/staging/lustre/lustre/obdclass/cl_lock.c b/drivers/staging/lustre/lustre/obdclass/cl_lock.c
index aec644eb4db9..26a576b63a72 100644
--- a/drivers/staging/lustre/lustre/obdclass/cl_lock.c
+++ b/drivers/staging/lustre/lustre/obdclass/cl_lock.c
@@ -36,6 +36,7 @@
* Client Extent Lock.
*
* Author: Nikita Danilov <nikita.danilov@sun.com>
+ * Author: Jinshan Xiong <jinshan.xiong@intel.com>
*/
#define DEBUG_SUBSYSTEM S_CLASS
@@ -47,138 +48,18 @@
#include "../include/cl_object.h"
#include "cl_internal.h"
-/** Lock class of cl_lock::cll_guard */
-static struct lock_class_key cl_lock_guard_class;
-static struct kmem_cache *cl_lock_kmem;
-
-static struct lu_kmem_descr cl_lock_caches[] = {
- {
- .ckd_cache = &cl_lock_kmem,
- .ckd_name = "cl_lock_kmem",
- .ckd_size = sizeof (struct cl_lock)
- },
- {
- .ckd_cache = NULL
- }
-};
-
-#define CS_LOCK_INC(o, item)
-#define CS_LOCK_DEC(o, item)
-#define CS_LOCKSTATE_INC(o, state)
-#define CS_LOCKSTATE_DEC(o, state)
-
-/**
- * Basic lock invariant that is maintained at all times. Caller either has a
- * reference to \a lock, or somehow assures that \a lock cannot be freed.
- *
- * \see cl_lock_invariant()
- */
-static int cl_lock_invariant_trusted(const struct lu_env *env,
- const struct cl_lock *lock)
-{
- return ergo(lock->cll_state == CLS_FREEING, lock->cll_holds == 0) &&
- atomic_read(&lock->cll_ref) >= lock->cll_holds &&
- lock->cll_holds >= lock->cll_users &&
- lock->cll_holds >= 0 &&
- lock->cll_users >= 0 &&
- lock->cll_depth >= 0;
-}
-
-/**
- * Stronger lock invariant, checking that caller has a reference on a lock.
- *
- * \see cl_lock_invariant_trusted()
- */
-static int cl_lock_invariant(const struct lu_env *env,
- const struct cl_lock *lock)
-{
- int result;
-
- result = atomic_read(&lock->cll_ref) > 0 &&
- cl_lock_invariant_trusted(env, lock);
- if (!result && env)
- CL_LOCK_DEBUG(D_ERROR, env, lock, "invariant broken\n");
- return result;
-}
-
-/**
- * Returns lock "nesting": 0 for a top-lock and 1 for a sub-lock.
- */
-static enum clt_nesting_level cl_lock_nesting(const struct cl_lock *lock)
-{
- return cl_object_header(lock->cll_descr.cld_obj)->coh_nesting;
-}
-
-/**
- * Returns a set of counters for this lock, depending on a lock nesting.
- */
-static struct cl_thread_counters *cl_lock_counters(const struct lu_env *env,
- const struct cl_lock *lock)
-{
- struct cl_thread_info *info;
- enum clt_nesting_level nesting;
-
- info = cl_env_info(env);
- nesting = cl_lock_nesting(lock);
- LASSERT(nesting < ARRAY_SIZE(info->clt_counters));
- return &info->clt_counters[nesting];
-}
-
static void cl_lock_trace0(int level, const struct lu_env *env,
const char *prefix, const struct cl_lock *lock,
const char *func, const int line)
{
struct cl_object_header *h = cl_object_header(lock->cll_descr.cld_obj);
- CDEBUG(level, "%s: %p@(%d %p %d %d %d %d %d %lx)(%p/%d/%d) at %s():%d\n",
- prefix, lock, atomic_read(&lock->cll_ref),
- lock->cll_guarder, lock->cll_depth,
- lock->cll_state, lock->cll_error, lock->cll_holds,
- lock->cll_users, lock->cll_flags,
- env, h->coh_nesting, cl_lock_nr_mutexed(env),
- func, line);
+ CDEBUG(level, "%s: %p (%p/%d) at %s():%d\n",
+ prefix, lock, env, h->coh_nesting, func, line);
}
-
-#define cl_lock_trace(level, env, prefix, lock) \
+#define cl_lock_trace(level, env, prefix, lock) \
cl_lock_trace0(level, env, prefix, lock, __func__, __LINE__)
-#define RETIP ((unsigned long)__builtin_return_address(0))
-
-#ifdef CONFIG_LOCKDEP
-static struct lock_class_key cl_lock_key;
-
-static void cl_lock_lockdep_init(struct cl_lock *lock)
-{
- lockdep_set_class_and_name(lock, &cl_lock_key, "EXT");
-}
-
-static void cl_lock_lockdep_acquire(const struct lu_env *env,
- struct cl_lock *lock, __u32 enqflags)
-{
- cl_lock_counters(env, lock)->ctc_nr_locks_acquired++;
- lock_map_acquire(&lock->dep_map);
-}
-
-static void cl_lock_lockdep_release(const struct lu_env *env,
- struct cl_lock *lock)
-{
- cl_lock_counters(env, lock)->ctc_nr_locks_acquired--;
- lock_release(&lock->dep_map, 0, RETIP);
-}
-
-#else /* !CONFIG_LOCKDEP */
-
-static void cl_lock_lockdep_init(struct cl_lock *lock)
-{}
-static void cl_lock_lockdep_acquire(const struct lu_env *env,
- struct cl_lock *lock, __u32 enqflags)
-{}
-static void cl_lock_lockdep_release(const struct lu_env *env,
- struct cl_lock *lock)
-{}
-
-#endif /* !CONFIG_LOCKDEP */
-
/**
* Adds lock slice to the compound lock.
*
@@ -199,62 +80,10 @@ void cl_lock_slice_add(struct cl_lock *lock, struct cl_lock_slice *slice,
}
EXPORT_SYMBOL(cl_lock_slice_add);
-/**
- * Returns true iff a lock with the mode \a has provides at least the same
- * guarantees as a lock with the mode \a need.
- */
-int cl_lock_mode_match(enum cl_lock_mode has, enum cl_lock_mode need)
-{
- LINVRNT(need == CLM_READ || need == CLM_WRITE ||
- need == CLM_PHANTOM || need == CLM_GROUP);
- LINVRNT(has == CLM_READ || has == CLM_WRITE ||
- has == CLM_PHANTOM || has == CLM_GROUP);
- CLASSERT(CLM_PHANTOM < CLM_READ);
- CLASSERT(CLM_READ < CLM_WRITE);
- CLASSERT(CLM_WRITE < CLM_GROUP);
-
- if (has != CLM_GROUP)
- return need <= has;
- else
- return need == has;
-}
-EXPORT_SYMBOL(cl_lock_mode_match);
-
-/**
- * Returns true iff extent portions of lock descriptions match.
- */
-int cl_lock_ext_match(const struct cl_lock_descr *has,
- const struct cl_lock_descr *need)
-{
- return
- has->cld_start <= need->cld_start &&
- has->cld_end >= need->cld_end &&
- cl_lock_mode_match(has->cld_mode, need->cld_mode) &&
- (has->cld_mode != CLM_GROUP || has->cld_gid == need->cld_gid);
-}
-EXPORT_SYMBOL(cl_lock_ext_match);
-
-/**
- * Returns true iff a lock with the description \a has provides at least the
- * same guarantees as a lock with the description \a need.
- */
-int cl_lock_descr_match(const struct cl_lock_descr *has,
- const struct cl_lock_descr *need)
+void cl_lock_fini(const struct lu_env *env, struct cl_lock *lock)
{
- return
- cl_object_same(has->cld_obj, need->cld_obj) &&
- cl_lock_ext_match(has, need);
-}
-EXPORT_SYMBOL(cl_lock_descr_match);
+ cl_lock_trace(D_DLMTRACE, env, "destroy lock", lock);
-static void cl_lock_free(const struct lu_env *env, struct cl_lock *lock)
-{
- struct cl_object *obj = lock->cll_descr.cld_obj;
-
- LINVRNT(!cl_lock_is_mutexed(lock));
-
- cl_lock_trace(D_DLMTRACE, env, "free lock", lock);
- might_sleep();
while (!list_empty(&lock->cll_layers)) {
struct cl_lock_slice *slice;
@@ -263,350 +92,36 @@ static void cl_lock_free(const struct lu_env *env, struct cl_lock *lock)
list_del_init(lock->cll_layers.next);
slice->cls_ops->clo_fini(env, slice);
}
- CS_LOCK_DEC(obj, total);
- CS_LOCKSTATE_DEC(obj, lock->cll_state);
- lu_object_ref_del_at(&obj->co_lu, &lock->cll_obj_ref, "cl_lock", lock);
- cl_object_put(env, obj);
- lu_ref_fini(&lock->cll_reference);
- lu_ref_fini(&lock->cll_holders);
- mutex_destroy(&lock->cll_guard);
- kmem_cache_free(cl_lock_kmem, lock);
-}
-
-/**
- * Releases a reference on a lock.
- *
- * When last reference is released, lock is returned to the cache, unless it
- * is in cl_lock_state::CLS_FREEING state, in which case it is destroyed
- * immediately.
- *
- * \see cl_object_put(), cl_page_put()
- */
-void cl_lock_put(const struct lu_env *env, struct cl_lock *lock)
-{
- struct cl_object *obj;
-
- LINVRNT(cl_lock_invariant(env, lock));
- obj = lock->cll_descr.cld_obj;
- LINVRNT(obj);
-
- CDEBUG(D_TRACE, "releasing reference: %d %p %lu\n",
- atomic_read(&lock->cll_ref), lock, RETIP);
-
- if (atomic_dec_and_test(&lock->cll_ref)) {
- if (lock->cll_state == CLS_FREEING) {
- LASSERT(list_empty(&lock->cll_linkage));
- cl_lock_free(env, lock);
- }
- CS_LOCK_DEC(obj, busy);
- }
-}
-EXPORT_SYMBOL(cl_lock_put);
-
-/**
- * Acquires an additional reference to a lock.
- *
- * This can be called only by caller already possessing a reference to \a
- * lock.
- *
- * \see cl_object_get(), cl_page_get()
- */
-void cl_lock_get(struct cl_lock *lock)
-{
- LINVRNT(cl_lock_invariant(NULL, lock));
- CDEBUG(D_TRACE, "acquiring reference: %d %p %lu\n",
- atomic_read(&lock->cll_ref), lock, RETIP);
- atomic_inc(&lock->cll_ref);
-}
-EXPORT_SYMBOL(cl_lock_get);
-
-/**
- * Acquires a reference to a lock.
- *
- * This is much like cl_lock_get(), except that this function can be used to
- * acquire initial reference to the cached lock. Caller has to deal with all
- * possible races. Use with care!
- *
- * \see cl_page_get_trust()
- */
-void cl_lock_get_trust(struct cl_lock *lock)
-{
- CDEBUG(D_TRACE, "acquiring trusted reference: %d %p %lu\n",
- atomic_read(&lock->cll_ref), lock, RETIP);
- if (atomic_inc_return(&lock->cll_ref) == 1)
- CS_LOCK_INC(lock->cll_descr.cld_obj, busy);
-}
-EXPORT_SYMBOL(cl_lock_get_trust);
-
-/**
- * Helper function destroying the lock that wasn't completely initialized.
- *
- * Other threads can acquire references to the top-lock through its
- * sub-locks. Hence, it cannot be cl_lock_free()-ed immediately.
- */
-static void cl_lock_finish(const struct lu_env *env, struct cl_lock *lock)
-{
- cl_lock_mutex_get(env, lock);
- cl_lock_cancel(env, lock);
- cl_lock_delete(env, lock);
- cl_lock_mutex_put(env, lock);
- cl_lock_put(env, lock);
-}
-
-static struct cl_lock *cl_lock_alloc(const struct lu_env *env,
- struct cl_object *obj,
- const struct cl_io *io,
- const struct cl_lock_descr *descr)
-{
- struct cl_lock *lock;
- struct lu_object_header *head;
-
- lock = kmem_cache_zalloc(cl_lock_kmem, GFP_NOFS);
- if (lock) {
- atomic_set(&lock->cll_ref, 1);
- lock->cll_descr = *descr;
- lock->cll_state = CLS_NEW;
- cl_object_get(obj);
- lu_object_ref_add_at(&obj->co_lu, &lock->cll_obj_ref, "cl_lock",
- lock);
- INIT_LIST_HEAD(&lock->cll_layers);
- INIT_LIST_HEAD(&lock->cll_linkage);
- INIT_LIST_HEAD(&lock->cll_inclosure);
- lu_ref_init(&lock->cll_reference);
- lu_ref_init(&lock->cll_holders);
- mutex_init(&lock->cll_guard);
- lockdep_set_class(&lock->cll_guard, &cl_lock_guard_class);
- init_waitqueue_head(&lock->cll_wq);
- head = obj->co_lu.lo_header;
- CS_LOCKSTATE_INC(obj, CLS_NEW);
- CS_LOCK_INC(obj, total);
- CS_LOCK_INC(obj, create);
- cl_lock_lockdep_init(lock);
- list_for_each_entry(obj, &head->loh_layers, co_lu.lo_linkage) {
- int err;
-
- err = obj->co_ops->coo_lock_init(env, obj, lock, io);
- if (err != 0) {
- cl_lock_finish(env, lock);
- lock = ERR_PTR(err);
- break;
- }
- }
- } else
- lock = ERR_PTR(-ENOMEM);
- return lock;
-}
-
-/**
- * Transfer the lock into INTRANSIT state and return the original state.
- *
- * \pre state: CLS_CACHED, CLS_HELD or CLS_ENQUEUED
- * \post state: CLS_INTRANSIT
- * \see CLS_INTRANSIT
- */
-static enum cl_lock_state cl_lock_intransit(const struct lu_env *env,
- struct cl_lock *lock)
-{
- enum cl_lock_state state = lock->cll_state;
-
- LASSERT(cl_lock_is_mutexed(lock));
- LASSERT(state != CLS_INTRANSIT);
- LASSERTF(state >= CLS_ENQUEUED && state <= CLS_CACHED,
- "Malformed lock state %d.\n", state);
-
- cl_lock_state_set(env, lock, CLS_INTRANSIT);
- lock->cll_intransit_owner = current;
- cl_lock_hold_add(env, lock, "intransit", current);
- return state;
-}
-
-/**
- * Exit the intransit state and restore the lock state to the original state
- */
-static void cl_lock_extransit(const struct lu_env *env, struct cl_lock *lock,
- enum cl_lock_state state)
-{
- LASSERT(cl_lock_is_mutexed(lock));
- LASSERT(lock->cll_state == CLS_INTRANSIT);
- LASSERT(state != CLS_INTRANSIT);
- LASSERT(lock->cll_intransit_owner == current);
-
- lock->cll_intransit_owner = NULL;
- cl_lock_state_set(env, lock, state);
- cl_lock_unhold(env, lock, "intransit", current);
-}
-
-/**
- * Checking whether the lock is intransit state
- */
-int cl_lock_is_intransit(struct cl_lock *lock)
-{
- LASSERT(cl_lock_is_mutexed(lock));
- return lock->cll_state == CLS_INTRANSIT &&
- lock->cll_intransit_owner != current;
-}
-EXPORT_SYMBOL(cl_lock_is_intransit);
-/**
- * Returns true iff lock is "suitable" for given io. E.g., locks acquired by
- * truncate and O_APPEND cannot be reused for read/non-append-write, as they
- * cover multiple stripes and can trigger cascading timeouts.
- */
-static int cl_lock_fits_into(const struct lu_env *env,
- const struct cl_lock *lock,
- const struct cl_lock_descr *need,
- const struct cl_io *io)
-{
- const struct cl_lock_slice *slice;
-
- LINVRNT(cl_lock_invariant_trusted(env, lock));
- list_for_each_entry(slice, &lock->cll_layers, cls_linkage) {
- if (slice->cls_ops->clo_fits_into &&
- !slice->cls_ops->clo_fits_into(env, slice, need, io))
- return 0;
- }
- return 1;
+ POISON(lock, 0x5a, sizeof(*lock));
}
+EXPORT_SYMBOL(cl_lock_fini);
-static struct cl_lock *cl_lock_lookup(const struct lu_env *env,
- struct cl_object *obj,
- const struct cl_io *io,
- const struct cl_lock_descr *need)
+int cl_lock_init(const struct lu_env *env, struct cl_lock *lock,
+ const struct cl_io *io)
{
- struct cl_lock *lock;
- struct cl_object_header *head;
-
- head = cl_object_header(obj);
- assert_spin_locked(&head->coh_lock_guard);
- CS_LOCK_INC(obj, lookup);
- list_for_each_entry(lock, &head->coh_locks, cll_linkage) {
- int matched;
-
- matched = cl_lock_ext_match(&lock->cll_descr, need) &&
- lock->cll_state < CLS_FREEING &&
- lock->cll_error == 0 &&
- !(lock->cll_flags & CLF_CANCELLED) &&
- cl_lock_fits_into(env, lock, need, io);
- CDEBUG(D_DLMTRACE, "has: "DDESCR"(%d) need: "DDESCR": %d\n",
- PDESCR(&lock->cll_descr), lock->cll_state, PDESCR(need),
- matched);
- if (matched) {
- cl_lock_get_trust(lock);
- CS_LOCK_INC(obj, hit);
- return lock;
- }
- }
- return NULL;
-}
-
-/**
- * Returns a lock matching description \a need.
- *
- * This is the main entry point into the cl_lock caching interface. First, a
- * cache (implemented as a per-object linked list) is consulted. If lock is
- * found there, it is returned immediately. Otherwise new lock is allocated
- * and returned. In any case, additional reference to lock is acquired.
- *
- * \see cl_object_find(), cl_page_find()
- */
-static struct cl_lock *cl_lock_find(const struct lu_env *env,
- const struct cl_io *io,
- const struct cl_lock_descr *need)
-{
- struct cl_object_header *head;
- struct cl_object *obj;
- struct cl_lock *lock;
-
- obj = need->cld_obj;
- head = cl_object_header(obj);
-
- spin_lock(&head->coh_lock_guard);
- lock = cl_lock_lookup(env, obj, io, need);
- spin_unlock(&head->coh_lock_guard);
+ struct cl_object *obj = lock->cll_descr.cld_obj;
+ struct cl_object *scan;
+ int result = 0;
- if (!lock) {
- lock = cl_lock_alloc(env, obj, io, need);
- if (!IS_ERR(lock)) {
- struct cl_lock *ghost;
+ /* Make sure cl_lock::cll_descr is initialized. */
+ LASSERT(obj);
- spin_lock(&head->coh_lock_guard);
- ghost = cl_lock_lookup(env, obj, io, need);
- if (!ghost) {
- cl_lock_get_trust(lock);
- list_add_tail(&lock->cll_linkage,
- &head->coh_locks);
- spin_unlock(&head->coh_lock_guard);
- CS_LOCK_INC(obj, busy);
- } else {
- spin_unlock(&head->coh_lock_guard);
- /*
- * Other threads can acquire references to the
- * top-lock through its sub-locks. Hence, it
- * cannot be cl_lock_free()-ed immediately.
- */
- cl_lock_finish(env, lock);
- lock = ghost;
- }
+ INIT_LIST_HEAD(&lock->cll_layers);
+ list_for_each_entry(scan, &obj->co_lu.lo_header->loh_layers,
+ co_lu.lo_linkage) {
+ result = scan->co_ops->coo_lock_init(env, scan, lock, io);
+ if (result != 0) {
+ cl_lock_fini(env, lock);
+ break;
}
}
- return lock;
-}
-/**
- * Returns existing lock matching given description. This is similar to
- * cl_lock_find() except that no new lock is created, and returned lock is
- * guaranteed to be in enum cl_lock_state::CLS_HELD state.
- */
-struct cl_lock *cl_lock_peek(const struct lu_env *env, const struct cl_io *io,
- const struct cl_lock_descr *need,
- const char *scope, const void *source)
-{
- struct cl_object_header *head;
- struct cl_object *obj;
- struct cl_lock *lock;
-
- obj = need->cld_obj;
- head = cl_object_header(obj);
-
- do {
- spin_lock(&head->coh_lock_guard);
- lock = cl_lock_lookup(env, obj, io, need);
- spin_unlock(&head->coh_lock_guard);
- if (!lock)
- return NULL;
-
- cl_lock_mutex_get(env, lock);
- if (lock->cll_state == CLS_INTRANSIT)
- /* Don't care return value. */
- cl_lock_state_wait(env, lock);
- if (lock->cll_state == CLS_FREEING) {
- cl_lock_mutex_put(env, lock);
- cl_lock_put(env, lock);
- lock = NULL;
- }
- } while (!lock);
-
- cl_lock_hold_add(env, lock, scope, source);
- cl_lock_user_add(env, lock);
- if (lock->cll_state == CLS_CACHED)
- cl_use_try(env, lock, 1);
- if (lock->cll_state == CLS_HELD) {
- cl_lock_mutex_put(env, lock);
- cl_lock_lockdep_acquire(env, lock, 0);
- cl_lock_put(env, lock);
- } else {
- cl_unuse_try(env, lock);
- cl_lock_unhold(env, lock, scope, source);
- cl_lock_mutex_put(env, lock);
- cl_lock_put(env, lock);
- lock = NULL;
- }
-
- return lock;
+ return result;
}
-EXPORT_SYMBOL(cl_lock_peek);
+EXPORT_SYMBOL(cl_lock_init);
/**
- * Returns a slice within a lock, corresponding to the given layer in the
+ * Returns a slice with a lock, corresponding to the given layer in the
* device stack.
*
* \see cl_page_at()
@@ -616,8 +131,6 @@ const struct cl_lock_slice *cl_lock_at(const struct cl_lock *lock,
{
const struct cl_lock_slice *slice;
- LINVRNT(cl_lock_invariant_trusted(NULL, lock));
-
list_for_each_entry(slice, &lock->cll_layers, cls_linkage) {
if (slice->cls_obj->co_lu.lo_dev->ld_type == dtype)
return slice;
@@ -626,1537 +139,96 @@ const struct cl_lock_slice *cl_lock_at(const struct cl_lock *lock,
}
EXPORT_SYMBOL(cl_lock_at);
-static void cl_lock_mutex_tail(const struct lu_env *env, struct cl_lock *lock)
-{
- struct cl_thread_counters *counters;
-
- counters = cl_lock_counters(env, lock);
- lock->cll_depth++;
- counters->ctc_nr_locks_locked++;
- lu_ref_add(&counters->ctc_locks_locked, "cll_guard", lock);
- cl_lock_trace(D_TRACE, env, "got mutex", lock);
-}
-
-/**
- * Locks cl_lock object.
- *
- * This is used to manipulate cl_lock fields, and to serialize state
- * transitions in the lock state machine.
- *
- * \post cl_lock_is_mutexed(lock)
- *
- * \see cl_lock_mutex_put()
- */
-void cl_lock_mutex_get(const struct lu_env *env, struct cl_lock *lock)
-{
- LINVRNT(cl_lock_invariant(env, lock));
-
- if (lock->cll_guarder == current) {
- LINVRNT(cl_lock_is_mutexed(lock));
- LINVRNT(lock->cll_depth > 0);
- } else {
- struct cl_object_header *hdr;
- struct cl_thread_info *info;
- int i;
-
- LINVRNT(lock->cll_guarder != current);
- hdr = cl_object_header(lock->cll_descr.cld_obj);
- /*
- * Check that mutices are taken in the bottom-to-top order.
- */
- info = cl_env_info(env);
- for (i = 0; i < hdr->coh_nesting; ++i)
- LASSERT(info->clt_counters[i].ctc_nr_locks_locked == 0);
- mutex_lock_nested(&lock->cll_guard, hdr->coh_nesting);
- lock->cll_guarder = current;
- LINVRNT(lock->cll_depth == 0);
- }
- cl_lock_mutex_tail(env, lock);
-}
-EXPORT_SYMBOL(cl_lock_mutex_get);
-
-/**
- * Try-locks cl_lock object.
- *
- * \retval 0 \a lock was successfully locked
- *
- * \retval -EBUSY \a lock cannot be locked right now
- *
- * \post ergo(result == 0, cl_lock_is_mutexed(lock))
- *
- * \see cl_lock_mutex_get()
- */
-static int cl_lock_mutex_try(const struct lu_env *env, struct cl_lock *lock)
-{
- int result;
-
- LINVRNT(cl_lock_invariant_trusted(env, lock));
-
- result = 0;
- if (lock->cll_guarder == current) {
- LINVRNT(lock->cll_depth > 0);
- cl_lock_mutex_tail(env, lock);
- } else if (mutex_trylock(&lock->cll_guard)) {
- LINVRNT(lock->cll_depth == 0);
- lock->cll_guarder = current;
- cl_lock_mutex_tail(env, lock);
- } else
- result = -EBUSY;
- return result;
-}
-
-/**
- {* Unlocks cl_lock object.
- *
- * \pre cl_lock_is_mutexed(lock)
- *
- * \see cl_lock_mutex_get()
- */
-void cl_lock_mutex_put(const struct lu_env *env, struct cl_lock *lock)
-{
- struct cl_thread_counters *counters;
-
- LINVRNT(cl_lock_invariant(env, lock));
- LINVRNT(cl_lock_is_mutexed(lock));
- LINVRNT(lock->cll_guarder == current);
- LINVRNT(lock->cll_depth > 0);
-
- counters = cl_lock_counters(env, lock);
- LINVRNT(counters->ctc_nr_locks_locked > 0);
-
- cl_lock_trace(D_TRACE, env, "put mutex", lock);
- lu_ref_del(&counters->ctc_locks_locked, "cll_guard", lock);
- counters->ctc_nr_locks_locked--;
- if (--lock->cll_depth == 0) {
- lock->cll_guarder = NULL;
- mutex_unlock(&lock->cll_guard);
- }
-}
-EXPORT_SYMBOL(cl_lock_mutex_put);
-
-/**
- * Returns true iff lock's mutex is owned by the current thread.
- */
-int cl_lock_is_mutexed(struct cl_lock *lock)
-{
- return lock->cll_guarder == current;
-}
-EXPORT_SYMBOL(cl_lock_is_mutexed);
-
-/**
- * Returns number of cl_lock mutices held by the current thread (environment).
- */
-int cl_lock_nr_mutexed(const struct lu_env *env)
-{
- struct cl_thread_info *info;
- int i;
- int locked;
-
- /*
- * NOTE: if summation across all nesting levels (currently 2) proves
- * too expensive, a summary counter can be added to
- * struct cl_thread_info.
- */
- info = cl_env_info(env);
- for (i = 0, locked = 0; i < ARRAY_SIZE(info->clt_counters); ++i)
- locked += info->clt_counters[i].ctc_nr_locks_locked;
- return locked;
-}
-EXPORT_SYMBOL(cl_lock_nr_mutexed);
-
-static void cl_lock_cancel0(const struct lu_env *env, struct cl_lock *lock)
-{
- LINVRNT(cl_lock_is_mutexed(lock));
- LINVRNT(cl_lock_invariant(env, lock));
- if (!(lock->cll_flags & CLF_CANCELLED)) {
- const struct cl_lock_slice *slice;
-
- lock->cll_flags |= CLF_CANCELLED;
- list_for_each_entry_reverse(slice, &lock->cll_layers,
- cls_linkage) {
- if (slice->cls_ops->clo_cancel)
- slice->cls_ops->clo_cancel(env, slice);
- }
- }
-}
-
-static void cl_lock_delete0(const struct lu_env *env, struct cl_lock *lock)
-{
- struct cl_object_header *head;
- const struct cl_lock_slice *slice;
-
- LINVRNT(cl_lock_is_mutexed(lock));
- LINVRNT(cl_lock_invariant(env, lock));
-
- if (lock->cll_state < CLS_FREEING) {
- bool in_cache;
-
- LASSERT(lock->cll_state != CLS_INTRANSIT);
- cl_lock_state_set(env, lock, CLS_FREEING);
-
- head = cl_object_header(lock->cll_descr.cld_obj);
-
- spin_lock(&head->coh_lock_guard);
- in_cache = !list_empty(&lock->cll_linkage);
- if (in_cache)
- list_del_init(&lock->cll_linkage);
- spin_unlock(&head->coh_lock_guard);
-
- if (in_cache) /* coh_locks cache holds a refcount. */
- cl_lock_put(env, lock);
-
- /*
- * From now on, no new references to this lock can be acquired
- * by cl_lock_lookup().
- */
- list_for_each_entry_reverse(slice, &lock->cll_layers,
- cls_linkage) {
- if (slice->cls_ops->clo_delete)
- slice->cls_ops->clo_delete(env, slice);
- }
- /*
- * From now on, no new references to this lock can be acquired
- * by layer-specific means (like a pointer from struct
- * ldlm_lock in osc, or a pointer from top-lock to sub-lock in
- * lov).
- *
- * Lock will be finally freed in cl_lock_put() when last of
- * existing references goes away.
- */
- }
-}
-
-/**
- * Mod(ifie)s cl_lock::cll_holds counter for a given lock. Also, for a
- * top-lock (nesting == 0) accounts for this modification in the per-thread
- * debugging counters. Sub-lock holds can be released by a thread different
- * from one that acquired it.
- */
-static void cl_lock_hold_mod(const struct lu_env *env, struct cl_lock *lock,
- int delta)
-{
- struct cl_thread_counters *counters;
- enum clt_nesting_level nesting;
-
- lock->cll_holds += delta;
- nesting = cl_lock_nesting(lock);
- if (nesting == CNL_TOP) {
- counters = &cl_env_info(env)->clt_counters[CNL_TOP];
- counters->ctc_nr_held += delta;
- LASSERT(counters->ctc_nr_held >= 0);
- }
-}
-
-/**
- * Mod(ifie)s cl_lock::cll_users counter for a given lock. See
- * cl_lock_hold_mod() for the explanation of the debugging code.
- */
-static void cl_lock_used_mod(const struct lu_env *env, struct cl_lock *lock,
- int delta)
-{
- struct cl_thread_counters *counters;
- enum clt_nesting_level nesting;
-
- lock->cll_users += delta;
- nesting = cl_lock_nesting(lock);
- if (nesting == CNL_TOP) {
- counters = &cl_env_info(env)->clt_counters[CNL_TOP];
- counters->ctc_nr_used += delta;
- LASSERT(counters->ctc_nr_used >= 0);
- }
-}
-
-void cl_lock_hold_release(const struct lu_env *env, struct cl_lock *lock,
- const char *scope, const void *source)
-{
- LINVRNT(cl_lock_is_mutexed(lock));
- LINVRNT(cl_lock_invariant(env, lock));
- LASSERT(lock->cll_holds > 0);
-
- cl_lock_trace(D_DLMTRACE, env, "hold release lock", lock);
- lu_ref_del(&lock->cll_holders, scope, source);
- cl_lock_hold_mod(env, lock, -1);
- if (lock->cll_holds == 0) {
- CL_LOCK_ASSERT(lock->cll_state != CLS_HELD, env, lock);
- if (lock->cll_descr.cld_mode == CLM_PHANTOM ||
- lock->cll_descr.cld_mode == CLM_GROUP ||
- lock->cll_state != CLS_CACHED)
- /*
- * If lock is still phantom or grouplock when user is
- * done with it---destroy the lock.
- */
- lock->cll_flags |= CLF_CANCELPEND|CLF_DOOMED;
- if (lock->cll_flags & CLF_CANCELPEND) {
- lock->cll_flags &= ~CLF_CANCELPEND;
- cl_lock_cancel0(env, lock);
- }
- if (lock->cll_flags & CLF_DOOMED) {
- /* no longer doomed: it's dead... Jim. */
- lock->cll_flags &= ~CLF_DOOMED;
- cl_lock_delete0(env, lock);
- }
- }
-}
-EXPORT_SYMBOL(cl_lock_hold_release);
-
-/**
- * Waits until lock state is changed.
- *
- * This function is called with cl_lock mutex locked, atomically releases
- * mutex and goes to sleep, waiting for a lock state change (signaled by
- * cl_lock_signal()), and re-acquires the mutex before return.
- *
- * This function is used to wait until lock state machine makes some progress
- * and to emulate synchronous operations on top of asynchronous lock
- * interface.
- *
- * \retval -EINTR wait was interrupted
- *
- * \retval 0 wait wasn't interrupted
- *
- * \pre cl_lock_is_mutexed(lock)
- *
- * \see cl_lock_signal()
- */
-int cl_lock_state_wait(const struct lu_env *env, struct cl_lock *lock)
-{
- wait_queue_t waiter;
- sigset_t blocked;
- int result;
-
- LINVRNT(cl_lock_is_mutexed(lock));
- LINVRNT(cl_lock_invariant(env, lock));
- LASSERT(lock->cll_depth == 1);
- LASSERT(lock->cll_state != CLS_FREEING); /* too late to wait */
-
- cl_lock_trace(D_DLMTRACE, env, "state wait lock", lock);
- result = lock->cll_error;
- if (result == 0) {
- /* To avoid being interrupted by the 'non-fatal' signals
- * (SIGCHLD, for instance), we'd block them temporarily.
- * LU-305
- */
- blocked = cfs_block_sigsinv(LUSTRE_FATAL_SIGS);
-
- init_waitqueue_entry(&waiter, current);
- add_wait_queue(&lock->cll_wq, &waiter);
- set_current_state(TASK_INTERRUPTIBLE);
- cl_lock_mutex_put(env, lock);
-
- LASSERT(cl_lock_nr_mutexed(env) == 0);
-
- /* Returning ERESTARTSYS instead of EINTR so syscalls
- * can be restarted if signals are pending here
- */
- result = -ERESTARTSYS;
- if (likely(!OBD_FAIL_CHECK(OBD_FAIL_LOCK_STATE_WAIT_INTR))) {
- schedule();
- if (!cfs_signal_pending())
- result = 0;
- }
-
- cl_lock_mutex_get(env, lock);
- set_current_state(TASK_RUNNING);
- remove_wait_queue(&lock->cll_wq, &waiter);
-
- /* Restore old blocked signals */
- cfs_restore_sigs(blocked);
- }
- return result;
-}
-EXPORT_SYMBOL(cl_lock_state_wait);
-
-static void cl_lock_state_signal(const struct lu_env *env, struct cl_lock *lock,
- enum cl_lock_state state)
-{
- const struct cl_lock_slice *slice;
-
- LINVRNT(cl_lock_is_mutexed(lock));
- LINVRNT(cl_lock_invariant(env, lock));
-
- list_for_each_entry(slice, &lock->cll_layers, cls_linkage)
- if (slice->cls_ops->clo_state)
- slice->cls_ops->clo_state(env, slice, state);
- wake_up_all(&lock->cll_wq);
-}
-
-/**
- * Notifies waiters that lock state changed.
- *
- * Wakes up all waiters sleeping in cl_lock_state_wait(), also notifies all
- * layers about state change by calling cl_lock_operations::clo_state()
- * top-to-bottom.
- */
-void cl_lock_signal(const struct lu_env *env, struct cl_lock *lock)
-{
- cl_lock_trace(D_DLMTRACE, env, "state signal lock", lock);
- cl_lock_state_signal(env, lock, lock->cll_state);
-}
-EXPORT_SYMBOL(cl_lock_signal);
-
-/**
- * Changes lock state.
- *
- * This function is invoked to notify layers that lock state changed, possible
- * as a result of an asynchronous event such as call-back reception.
- *
- * \post lock->cll_state == state
- *
- * \see cl_lock_operations::clo_state()
- */
-void cl_lock_state_set(const struct lu_env *env, struct cl_lock *lock,
- enum cl_lock_state state)
-{
- LASSERT(lock->cll_state <= state ||
- (lock->cll_state == CLS_CACHED &&
- (state == CLS_HELD || /* lock found in cache */
- state == CLS_NEW || /* sub-lock canceled */
- state == CLS_INTRANSIT)) ||
- /* lock is in transit state */
- lock->cll_state == CLS_INTRANSIT);
-
- if (lock->cll_state != state) {
- CS_LOCKSTATE_DEC(lock->cll_descr.cld_obj, lock->cll_state);
- CS_LOCKSTATE_INC(lock->cll_descr.cld_obj, state);
-
- cl_lock_state_signal(env, lock, state);
- lock->cll_state = state;
- }
-}
-EXPORT_SYMBOL(cl_lock_state_set);
-
-static int cl_unuse_try_internal(const struct lu_env *env, struct cl_lock *lock)
-{
- const struct cl_lock_slice *slice;
- int result;
-
- do {
- result = 0;
-
- LINVRNT(cl_lock_is_mutexed(lock));
- LINVRNT(cl_lock_invariant(env, lock));
- LASSERT(lock->cll_state == CLS_INTRANSIT);
-
- result = -ENOSYS;
- list_for_each_entry_reverse(slice, &lock->cll_layers,
- cls_linkage) {
- if (slice->cls_ops->clo_unuse) {
- result = slice->cls_ops->clo_unuse(env, slice);
- if (result != 0)
- break;
- }
- }
- LASSERT(result != -ENOSYS);
- } while (result == CLO_REPEAT);
-
- return result;
-}
-
-/**
- * Yanks lock from the cache (cl_lock_state::CLS_CACHED state) by calling
- * cl_lock_operations::clo_use() top-to-bottom to notify layers.
- * @atomic = 1, it must unuse the lock to recovery the lock to keep the
- * use process atomic
- */
-int cl_use_try(const struct lu_env *env, struct cl_lock *lock, int atomic)
+void cl_lock_cancel(const struct lu_env *env, struct cl_lock *lock)
{
const struct cl_lock_slice *slice;
- int result;
- enum cl_lock_state state;
-
- cl_lock_trace(D_DLMTRACE, env, "use lock", lock);
-
- LASSERT(lock->cll_state == CLS_CACHED);
- if (lock->cll_error)
- return lock->cll_error;
-
- result = -ENOSYS;
- state = cl_lock_intransit(env, lock);
- list_for_each_entry(slice, &lock->cll_layers, cls_linkage) {
- if (slice->cls_ops->clo_use) {
- result = slice->cls_ops->clo_use(env, slice);
- if (result != 0)
- break;
- }
- }
- LASSERT(result != -ENOSYS);
-
- LASSERTF(lock->cll_state == CLS_INTRANSIT, "Wrong state %d.\n",
- lock->cll_state);
-
- if (result == 0) {
- state = CLS_HELD;
- } else {
- if (result == -ESTALE) {
- /*
- * ESTALE means sublock being cancelled
- * at this time, and set lock state to
- * be NEW here and ask the caller to repeat.
- */
- state = CLS_NEW;
- result = CLO_REPEAT;
- }
-
- /* @atomic means back-off-on-failure. */
- if (atomic) {
- int rc;
-
- rc = cl_unuse_try_internal(env, lock);
- /* Vet the results. */
- if (rc < 0 && result > 0)
- result = rc;
- }
+ cl_lock_trace(D_DLMTRACE, env, "cancel lock", lock);
+ list_for_each_entry_reverse(slice, &lock->cll_layers, cls_linkage) {
+ if (slice->cls_ops->clo_cancel)
+ slice->cls_ops->clo_cancel(env, slice);
}
- cl_lock_extransit(env, lock, state);
- return result;
}
-EXPORT_SYMBOL(cl_use_try);
+EXPORT_SYMBOL(cl_lock_cancel);
/**
- * Helper for cl_enqueue_try() that calls ->clo_enqueue() across all layers
- * top-to-bottom.
+ * Enqueue a lock.
+ * \param anchor: if we need to wait for resources before getting the lock,
+ * use @anchor for the purpose.
+ * \retval 0 enqueue successfully
+ * \retval <0 error code
*/
-static int cl_enqueue_kick(const struct lu_env *env,
- struct cl_lock *lock,
- struct cl_io *io, __u32 flags)
+int cl_lock_enqueue(const struct lu_env *env, struct cl_io *io,
+ struct cl_lock *lock, struct cl_sync_io *anchor)
{
- int result;
const struct cl_lock_slice *slice;
+ int rc = -ENOSYS;
- result = -ENOSYS;
list_for_each_entry(slice, &lock->cll_layers, cls_linkage) {
- if (slice->cls_ops->clo_enqueue) {
- result = slice->cls_ops->clo_enqueue(env,
- slice, io, flags);
- if (result != 0)
- break;
- }
- }
- LASSERT(result != -ENOSYS);
- return result;
-}
-
-/**
- * Tries to enqueue a lock.
- *
- * This function is called repeatedly by cl_enqueue() until either lock is
- * enqueued, or error occurs. This function does not block waiting for
- * networking communication to complete.
- *
- * \post ergo(result == 0, lock->cll_state == CLS_ENQUEUED ||
- * lock->cll_state == CLS_HELD)
- *
- * \see cl_enqueue() cl_lock_operations::clo_enqueue()
- * \see cl_lock_state::CLS_ENQUEUED
- */
-int cl_enqueue_try(const struct lu_env *env, struct cl_lock *lock,
- struct cl_io *io, __u32 flags)
-{
- int result;
-
- cl_lock_trace(D_DLMTRACE, env, "enqueue lock", lock);
- do {
- LINVRNT(cl_lock_is_mutexed(lock));
-
- result = lock->cll_error;
- if (result != 0)
- break;
-
- switch (lock->cll_state) {
- case CLS_NEW:
- cl_lock_state_set(env, lock, CLS_QUEUING);
- /* fall-through */
- case CLS_QUEUING:
- /* kick layers. */
- result = cl_enqueue_kick(env, lock, io, flags);
- /* For AGL case, the cl_lock::cll_state may
- * become CLS_HELD already.
- */
- if (result == 0 && lock->cll_state == CLS_QUEUING)
- cl_lock_state_set(env, lock, CLS_ENQUEUED);
- break;
- case CLS_INTRANSIT:
- LASSERT(cl_lock_is_intransit(lock));
- result = CLO_WAIT;
- break;
- case CLS_CACHED:
- /* yank lock from the cache. */
- result = cl_use_try(env, lock, 0);
- break;
- case CLS_ENQUEUED:
- case CLS_HELD:
- result = 0;
- break;
- default:
- case CLS_FREEING:
- /*
- * impossible, only held locks with increased
- * ->cll_holds can be enqueued, and they cannot be
- * freed.
- */
- LBUG();
- }
- } while (result == CLO_REPEAT);
- return result;
-}
-EXPORT_SYMBOL(cl_enqueue_try);
-
-/**
- * Cancel the conflicting lock found during previous enqueue.
- *
- * \retval 0 conflicting lock has been canceled.
- * \retval -ve error code.
- */
-int cl_lock_enqueue_wait(const struct lu_env *env,
- struct cl_lock *lock,
- int keep_mutex)
-{
- struct cl_lock *conflict;
- int rc = 0;
-
- LASSERT(cl_lock_is_mutexed(lock));
- LASSERT(lock->cll_state == CLS_QUEUING);
- LASSERT(lock->cll_conflict);
-
- conflict = lock->cll_conflict;
- lock->cll_conflict = NULL;
+ if (!slice->cls_ops->clo_enqueue)
+ continue;
- cl_lock_mutex_put(env, lock);
- LASSERT(cl_lock_nr_mutexed(env) == 0);
-
- cl_lock_mutex_get(env, conflict);
- cl_lock_trace(D_DLMTRACE, env, "enqueue wait", conflict);
- cl_lock_cancel(env, conflict);
- cl_lock_delete(env, conflict);
-
- while (conflict->cll_state != CLS_FREEING) {
- rc = cl_lock_state_wait(env, conflict);
+ rc = slice->cls_ops->clo_enqueue(env, slice, io, anchor);
if (rc != 0)
break;
- }
- cl_lock_mutex_put(env, conflict);
- lu_ref_del(&conflict->cll_reference, "cancel-wait", lock);
- cl_lock_put(env, conflict);
-
- if (keep_mutex)
- cl_lock_mutex_get(env, lock);
-
- LASSERT(rc <= 0);
- return rc;
-}
-EXPORT_SYMBOL(cl_lock_enqueue_wait);
-
-static int cl_enqueue_locked(const struct lu_env *env, struct cl_lock *lock,
- struct cl_io *io, __u32 enqflags)
-{
- int result;
-
- LINVRNT(cl_lock_is_mutexed(lock));
- LINVRNT(cl_lock_invariant(env, lock));
- LASSERT(lock->cll_holds > 0);
-
- cl_lock_user_add(env, lock);
- do {
- result = cl_enqueue_try(env, lock, io, enqflags);
- if (result == CLO_WAIT) {
- if (lock->cll_conflict)
- result = cl_lock_enqueue_wait(env, lock, 1);
- else
- result = cl_lock_state_wait(env, lock);
- if (result == 0)
- continue;
- }
- break;
- } while (1);
- if (result != 0)
- cl_unuse_try(env, lock);
- LASSERT(ergo(result == 0 && !(enqflags & CEF_AGL),
- lock->cll_state == CLS_ENQUEUED ||
- lock->cll_state == CLS_HELD));
- return result;
-}
-
-/**
- * Tries to unlock a lock.
- *
- * This function is called to release underlying resource:
- * 1. for top lock, the resource is sublocks it held;
- * 2. for sublock, the resource is the reference to dlmlock.
- *
- * cl_unuse_try is a one-shot operation, so it must NOT return CLO_WAIT.
- *
- * \see cl_unuse() cl_lock_operations::clo_unuse()
- * \see cl_lock_state::CLS_CACHED
- */
-int cl_unuse_try(const struct lu_env *env, struct cl_lock *lock)
-{
- int result;
- enum cl_lock_state state = CLS_NEW;
-
- cl_lock_trace(D_DLMTRACE, env, "unuse lock", lock);
-
- if (lock->cll_users > 1) {
- cl_lock_user_del(env, lock);
- return 0;
- }
-
- /* Only if the lock is in CLS_HELD or CLS_ENQUEUED state, it can hold
- * underlying resources.
- */
- if (!(lock->cll_state == CLS_HELD || lock->cll_state == CLS_ENQUEUED)) {
- cl_lock_user_del(env, lock);
- return 0;
- }
-
- /*
- * New lock users (->cll_users) are not protecting unlocking
- * from proceeding. From this point, lock eventually reaches
- * CLS_CACHED, is reinitialized to CLS_NEW or fails into
- * CLS_FREEING.
- */
- state = cl_lock_intransit(env, lock);
-
- result = cl_unuse_try_internal(env, lock);
- LASSERT(lock->cll_state == CLS_INTRANSIT);
- LASSERT(result != CLO_WAIT);
- cl_lock_user_del(env, lock);
- if (result == 0 || result == -ESTALE) {
- /*
- * Return lock back to the cache. This is the only
- * place where lock is moved into CLS_CACHED state.
- *
- * If one of ->clo_unuse() methods returned -ESTALE, lock
- * cannot be placed into cache and has to be
- * re-initialized. This happens e.g., when a sub-lock was
- * canceled while unlocking was in progress.
- */
- if (state == CLS_HELD && result == 0)
- state = CLS_CACHED;
- else
- state = CLS_NEW;
- cl_lock_extransit(env, lock, state);
-
- /*
- * Hide -ESTALE error.
- * If the lock is a glimpse lock, and it has multiple
- * stripes. Assuming that one of its sublock returned -ENAVAIL,
- * and other sublocks are matched write locks. In this case,
- * we can't set this lock to error because otherwise some of
- * its sublocks may not be canceled. This causes some dirty
- * pages won't be written to OSTs. -jay
- */
- result = 0;
- } else {
- CERROR("result = %d, this is unlikely!\n", result);
- state = CLS_NEW;
- cl_lock_extransit(env, lock, state);
- }
- return result ?: lock->cll_error;
-}
-EXPORT_SYMBOL(cl_unuse_try);
-
-static void cl_unuse_locked(const struct lu_env *env, struct cl_lock *lock)
-{
- int result;
-
- result = cl_unuse_try(env, lock);
- if (result)
- CL_LOCK_DEBUG(D_ERROR, env, lock, "unuse return %d\n", result);
-}
-
-/**
- * Unlocks a lock.
- */
-void cl_unuse(const struct lu_env *env, struct cl_lock *lock)
-{
- cl_lock_mutex_get(env, lock);
- cl_unuse_locked(env, lock);
- cl_lock_mutex_put(env, lock);
- cl_lock_lockdep_release(env, lock);
-}
-EXPORT_SYMBOL(cl_unuse);
-
-/**
- * Tries to wait for a lock.
- *
- * This function is called repeatedly by cl_wait() until either lock is
- * granted, or error occurs. This function does not block waiting for network
- * communication to complete.
- *
- * \see cl_wait() cl_lock_operations::clo_wait()
- * \see cl_lock_state::CLS_HELD
- */
-int cl_wait_try(const struct lu_env *env, struct cl_lock *lock)
-{
- const struct cl_lock_slice *slice;
- int result;
-
- cl_lock_trace(D_DLMTRACE, env, "wait lock try", lock);
- do {
- LINVRNT(cl_lock_is_mutexed(lock));
- LINVRNT(cl_lock_invariant(env, lock));
- LASSERTF(lock->cll_state == CLS_QUEUING ||
- lock->cll_state == CLS_ENQUEUED ||
- lock->cll_state == CLS_HELD ||
- lock->cll_state == CLS_INTRANSIT,
- "lock state: %d\n", lock->cll_state);
- LASSERT(lock->cll_users > 0);
- LASSERT(lock->cll_holds > 0);
-
- result = lock->cll_error;
- if (result != 0)
- break;
-
- if (cl_lock_is_intransit(lock)) {
- result = CLO_WAIT;
- break;
- }
-
- if (lock->cll_state == CLS_HELD)
- /* nothing to do */
- break;
-
- result = -ENOSYS;
- list_for_each_entry(slice, &lock->cll_layers, cls_linkage) {
- if (slice->cls_ops->clo_wait) {
- result = slice->cls_ops->clo_wait(env, slice);
- if (result != 0)
- break;
- }
- }
- LASSERT(result != -ENOSYS);
- if (result == 0) {
- LASSERT(lock->cll_state != CLS_INTRANSIT);
- cl_lock_state_set(env, lock, CLS_HELD);
- }
- } while (result == CLO_REPEAT);
- return result;
-}
-EXPORT_SYMBOL(cl_wait_try);
-
-/**
- * Waits until enqueued lock is granted.
- *
- * \pre current thread or io owns a hold on the lock
- * \pre ergo(result == 0, lock->cll_state == CLS_ENQUEUED ||
- * lock->cll_state == CLS_HELD)
- *
- * \post ergo(result == 0, lock->cll_state == CLS_HELD)
- */
-int cl_wait(const struct lu_env *env, struct cl_lock *lock)
-{
- int result;
-
- cl_lock_mutex_get(env, lock);
-
- LINVRNT(cl_lock_invariant(env, lock));
- LASSERTF(lock->cll_state == CLS_ENQUEUED || lock->cll_state == CLS_HELD,
- "Wrong state %d\n", lock->cll_state);
- LASSERT(lock->cll_holds > 0);
-
- do {
- result = cl_wait_try(env, lock);
- if (result == CLO_WAIT) {
- result = cl_lock_state_wait(env, lock);
- if (result == 0)
- continue;
- }
- break;
- } while (1);
- if (result < 0) {
- cl_unuse_try(env, lock);
- cl_lock_lockdep_release(env, lock);
- }
- cl_lock_trace(D_DLMTRACE, env, "wait lock", lock);
- cl_lock_mutex_put(env, lock);
- LASSERT(ergo(result == 0, lock->cll_state == CLS_HELD));
- return result;
-}
-EXPORT_SYMBOL(cl_wait);
-
-/**
- * Executes cl_lock_operations::clo_weigh(), and sums results to estimate lock
- * value.
- */
-unsigned long cl_lock_weigh(const struct lu_env *env, struct cl_lock *lock)
-{
- const struct cl_lock_slice *slice;
- unsigned long pound;
- unsigned long ounce;
-
- LINVRNT(cl_lock_is_mutexed(lock));
- LINVRNT(cl_lock_invariant(env, lock));
-
- pound = 0;
- list_for_each_entry_reverse(slice, &lock->cll_layers, cls_linkage) {
- if (slice->cls_ops->clo_weigh) {
- ounce = slice->cls_ops->clo_weigh(env, slice);
- pound += ounce;
- if (pound < ounce) /* over-weight^Wflow */
- pound = ~0UL;
- }
- }
- return pound;
-}
-EXPORT_SYMBOL(cl_lock_weigh);
-
-/**
- * Notifies layers that lock description changed.
- *
- * The server can grant client a lock different from one that was requested
- * (e.g., larger in extent). This method is called when actually granted lock
- * description becomes known to let layers to accommodate for changed lock
- * description.
- *
- * \see cl_lock_operations::clo_modify()
- */
-int cl_lock_modify(const struct lu_env *env, struct cl_lock *lock,
- const struct cl_lock_descr *desc)
-{
- const struct cl_lock_slice *slice;
- struct cl_object *obj = lock->cll_descr.cld_obj;
- struct cl_object_header *hdr = cl_object_header(obj);
- int result;
-
- cl_lock_trace(D_DLMTRACE, env, "modify lock", lock);
- /* don't allow object to change */
- LASSERT(obj == desc->cld_obj);
- LINVRNT(cl_lock_is_mutexed(lock));
- LINVRNT(cl_lock_invariant(env, lock));
-
- list_for_each_entry_reverse(slice, &lock->cll_layers, cls_linkage) {
- if (slice->cls_ops->clo_modify) {
- result = slice->cls_ops->clo_modify(env, slice, desc);
- if (result != 0)
- return result;
- }
- }
- CL_LOCK_DEBUG(D_DLMTRACE, env, lock, " -> "DDESCR"@"DFID"\n",
- PDESCR(desc), PFID(lu_object_fid(&desc->cld_obj->co_lu)));
- /*
- * Just replace description in place. Nothing more is needed for
- * now. If locks were indexed according to their extent and/or mode,
- * that index would have to be updated here.
- */
- spin_lock(&hdr->coh_lock_guard);
- lock->cll_descr = *desc;
- spin_unlock(&hdr->coh_lock_guard);
- return 0;
-}
-EXPORT_SYMBOL(cl_lock_modify);
-
-/**
- * Initializes lock closure with a given origin.
- *
- * \see cl_lock_closure
- */
-void cl_lock_closure_init(const struct lu_env *env,
- struct cl_lock_closure *closure,
- struct cl_lock *origin, int wait)
-{
- LINVRNT(cl_lock_is_mutexed(origin));
- LINVRNT(cl_lock_invariant(env, origin));
-
- INIT_LIST_HEAD(&closure->clc_list);
- closure->clc_origin = origin;
- closure->clc_wait = wait;
- closure->clc_nr = 0;
-}
-EXPORT_SYMBOL(cl_lock_closure_init);
-
-/**
- * Builds a closure of \a lock.
- *
- * Building of a closure consists of adding initial lock (\a lock) into it,
- * and calling cl_lock_operations::clo_closure() methods of \a lock. These
- * methods might call cl_lock_closure_build() recursively again, adding more
- * locks to the closure, etc.
- *
- * \see cl_lock_closure
- */
-int cl_lock_closure_build(const struct lu_env *env, struct cl_lock *lock,
- struct cl_lock_closure *closure)
-{
- const struct cl_lock_slice *slice;
- int result;
-
- LINVRNT(cl_lock_is_mutexed(closure->clc_origin));
- LINVRNT(cl_lock_invariant(env, closure->clc_origin));
-
- result = cl_lock_enclosure(env, lock, closure);
- if (result == 0) {
- list_for_each_entry(slice, &lock->cll_layers, cls_linkage) {
- if (slice->cls_ops->clo_closure) {
- result = slice->cls_ops->clo_closure(env, slice,
- closure);
- if (result != 0)
- break;
- }
- }
- }
- if (result != 0)
- cl_lock_disclosure(env, closure);
- return result;
-}
-EXPORT_SYMBOL(cl_lock_closure_build);
-
-/**
- * Adds new lock to a closure.
- *
- * Try-locks \a lock and if succeeded, adds it to the closure (never more than
- * once). If try-lock failed, returns CLO_REPEAT, after optionally waiting
- * until next try-lock is likely to succeed.
- */
-int cl_lock_enclosure(const struct lu_env *env, struct cl_lock *lock,
- struct cl_lock_closure *closure)
-{
- int result = 0;
-
- cl_lock_trace(D_DLMTRACE, env, "enclosure lock", lock);
- if (!cl_lock_mutex_try(env, lock)) {
- /*
- * If lock->cll_inclosure is not empty, lock is already in
- * this closure.
- */
- if (list_empty(&lock->cll_inclosure)) {
- cl_lock_get_trust(lock);
- lu_ref_add(&lock->cll_reference, "closure", closure);
- list_add(&lock->cll_inclosure, &closure->clc_list);
- closure->clc_nr++;
- } else
- cl_lock_mutex_put(env, lock);
- result = 0;
- } else {
- cl_lock_disclosure(env, closure);
- if (closure->clc_wait) {
- cl_lock_get_trust(lock);
- lu_ref_add(&lock->cll_reference, "closure-w", closure);
- cl_lock_mutex_put(env, closure->clc_origin);
-
- LASSERT(cl_lock_nr_mutexed(env) == 0);
- cl_lock_mutex_get(env, lock);
- cl_lock_mutex_put(env, lock);
-
- cl_lock_mutex_get(env, closure->clc_origin);
- lu_ref_del(&lock->cll_reference, "closure-w", closure);
- cl_lock_put(env, lock);
- }
- result = CLO_REPEAT;
- }
- return result;
-}
-EXPORT_SYMBOL(cl_lock_enclosure);
-
-/** Releases mutices of enclosed locks. */
-void cl_lock_disclosure(const struct lu_env *env,
- struct cl_lock_closure *closure)
-{
- struct cl_lock *scan;
- struct cl_lock *temp;
-
- cl_lock_trace(D_DLMTRACE, env, "disclosure lock", closure->clc_origin);
- list_for_each_entry_safe(scan, temp, &closure->clc_list,
- cll_inclosure) {
- list_del_init(&scan->cll_inclosure);
- cl_lock_mutex_put(env, scan);
- lu_ref_del(&scan->cll_reference, "closure", closure);
- cl_lock_put(env, scan);
- closure->clc_nr--;
- }
- LASSERT(closure->clc_nr == 0);
-}
-EXPORT_SYMBOL(cl_lock_disclosure);
-
-/** Finalizes a closure. */
-void cl_lock_closure_fini(struct cl_lock_closure *closure)
-{
- LASSERT(closure->clc_nr == 0);
- LASSERT(list_empty(&closure->clc_list));
-}
-EXPORT_SYMBOL(cl_lock_closure_fini);
-
-/**
- * Destroys this lock. Notifies layers (bottom-to-top) that lock is being
- * destroyed, then destroy the lock. If there are holds on the lock, postpone
- * destruction until all holds are released. This is called when a decision is
- * made to destroy the lock in the future. E.g., when a blocking AST is
- * received on it, or fatal communication error happens.
- *
- * Caller must have a reference on this lock to prevent a situation, when
- * deleted lock lingers in memory for indefinite time, because nobody calls
- * cl_lock_put() to finish it.
- *
- * \pre atomic_read(&lock->cll_ref) > 0
- * \pre ergo(cl_lock_nesting(lock) == CNL_TOP,
- * cl_lock_nr_mutexed(env) == 1)
- * [i.e., if a top-lock is deleted, mutices of no other locks can be
- * held, as deletion of sub-locks might require releasing a top-lock
- * mutex]
- *
- * \see cl_lock_operations::clo_delete()
- * \see cl_lock::cll_holds
- */
-void cl_lock_delete(const struct lu_env *env, struct cl_lock *lock)
-{
- LINVRNT(cl_lock_is_mutexed(lock));
- LINVRNT(cl_lock_invariant(env, lock));
- LASSERT(ergo(cl_lock_nesting(lock) == CNL_TOP,
- cl_lock_nr_mutexed(env) == 1));
-
- cl_lock_trace(D_DLMTRACE, env, "delete lock", lock);
- if (lock->cll_holds == 0)
- cl_lock_delete0(env, lock);
- else
- lock->cll_flags |= CLF_DOOMED;
-}
-EXPORT_SYMBOL(cl_lock_delete);
-
-/**
- * Mark lock as irrecoverably failed, and mark it for destruction. This
- * happens when, e.g., server fails to grant a lock to us, or networking
- * time-out happens.
- *
- * \pre atomic_read(&lock->cll_ref) > 0
- *
- * \see clo_lock_delete()
- * \see cl_lock::cll_holds
- */
-void cl_lock_error(const struct lu_env *env, struct cl_lock *lock, int error)
-{
- LINVRNT(cl_lock_is_mutexed(lock));
- LINVRNT(cl_lock_invariant(env, lock));
-
- if (lock->cll_error == 0 && error != 0) {
- cl_lock_trace(D_DLMTRACE, env, "set lock error", lock);
- lock->cll_error = error;
- cl_lock_signal(env, lock);
- cl_lock_cancel(env, lock);
- cl_lock_delete(env, lock);
- }
-}
-EXPORT_SYMBOL(cl_lock_error);
-
-/**
- * Cancels this lock. Notifies layers
- * (bottom-to-top) that lock is being cancelled, then destroy the lock. If
- * there are holds on the lock, postpone cancellation until
- * all holds are released.
- *
- * Cancellation notification is delivered to layers at most once.
- *
- * \see cl_lock_operations::clo_cancel()
- * \see cl_lock::cll_holds
- */
-void cl_lock_cancel(const struct lu_env *env, struct cl_lock *lock)
-{
- LINVRNT(cl_lock_is_mutexed(lock));
- LINVRNT(cl_lock_invariant(env, lock));
-
- cl_lock_trace(D_DLMTRACE, env, "cancel lock", lock);
- if (lock->cll_holds == 0)
- cl_lock_cancel0(env, lock);
- else
- lock->cll_flags |= CLF_CANCELPEND;
-}
-EXPORT_SYMBOL(cl_lock_cancel);
-
-/**
- * Finds an existing lock covering given index and optionally different from a
- * given \a except lock.
- */
-struct cl_lock *cl_lock_at_pgoff(const struct lu_env *env,
- struct cl_object *obj, pgoff_t index,
- struct cl_lock *except,
- int pending, int canceld)
-{
- struct cl_object_header *head;
- struct cl_lock *scan;
- struct cl_lock *lock;
- struct cl_lock_descr *need;
-
- head = cl_object_header(obj);
- need = &cl_env_info(env)->clt_descr;
- lock = NULL;
-
- need->cld_mode = CLM_READ; /* CLM_READ matches both READ & WRITE, but
- * not PHANTOM
- */
- need->cld_start = need->cld_end = index;
- need->cld_enq_flags = 0;
-
- spin_lock(&head->coh_lock_guard);
- /* It is fine to match any group lock since there could be only one
- * with a uniq gid and it conflicts with all other lock modes too
- */
- list_for_each_entry(scan, &head->coh_locks, cll_linkage) {
- if (scan != except &&
- (scan->cll_descr.cld_mode == CLM_GROUP ||
- cl_lock_ext_match(&scan->cll_descr, need)) &&
- scan->cll_state >= CLS_HELD &&
- scan->cll_state < CLS_FREEING &&
- /*
- * This check is racy as the lock can be canceled right
- * after it is done, but this is fine, because page exists
- * already.
- */
- (canceld || !(scan->cll_flags & CLF_CANCELLED)) &&
- (pending || !(scan->cll_flags & CLF_CANCELPEND))) {
- /* Don't increase cs_hit here since this
- * is just a helper function.
- */
- cl_lock_get_trust(scan);
- lock = scan;
- break;
}
- }
- spin_unlock(&head->coh_lock_guard);
- return lock;
-}
-EXPORT_SYMBOL(cl_lock_at_pgoff);
-
-/**
- * Calculate the page offset at the layer of @lock.
- * At the time of this writing, @page is top page and @lock is sub lock.
- */
-static pgoff_t pgoff_at_lock(struct cl_page *page, struct cl_lock *lock)
-{
- struct lu_device_type *dtype;
- const struct cl_page_slice *slice;
-
- dtype = lock->cll_descr.cld_obj->co_lu.lo_dev->ld_type;
- slice = cl_page_at(page, dtype);
- return slice->cpl_page->cp_index;
+ return rc;
}
+EXPORT_SYMBOL(cl_lock_enqueue);
/**
- * Check if page @page is covered by an extra lock or discard it.
+ * Main high-level entry point of cl_lock interface that finds existing or
+ * enqueues new lock matching given description.
*/
-static int check_and_discard_cb(const struct lu_env *env, struct cl_io *io,
- struct cl_page *page, void *cbdata)
+int cl_lock_request(const struct lu_env *env, struct cl_io *io,
+ struct cl_lock *lock)
{
- struct cl_thread_info *info = cl_env_info(env);
- struct cl_lock *lock = cbdata;
- pgoff_t index = pgoff_at_lock(page, lock);
+ struct cl_sync_io *anchor = NULL;
+ __u32 enq_flags = lock->cll_descr.cld_enq_flags;
+ int rc;
- if (index >= info->clt_fn_index) {
- struct cl_lock *tmp;
+ rc = cl_lock_init(env, lock, io);
+ if (rc < 0)
+ return rc;
- /* refresh non-overlapped index */
- tmp = cl_lock_at_pgoff(env, lock->cll_descr.cld_obj, index,
- lock, 1, 0);
- if (tmp) {
- /* Cache the first-non-overlapped index so as to skip
- * all pages within [index, clt_fn_index). This
- * is safe because if tmp lock is canceled, it will
- * discard these pages.
- */
- info->clt_fn_index = tmp->cll_descr.cld_end + 1;
- if (tmp->cll_descr.cld_end == CL_PAGE_EOF)
- info->clt_fn_index = CL_PAGE_EOF;
- cl_lock_put(env, tmp);
- } else if (cl_page_own(env, io, page) == 0) {
- /* discard the page */
- cl_page_unmap(env, io, page);
- cl_page_discard(env, io, page);
- cl_page_disown(env, io, page);
- } else {
- LASSERT(page->cp_state == CPS_FREEING);
- }
+ if ((enq_flags & CEF_ASYNC) && !(enq_flags & CEF_AGL)) {
+ anchor = &cl_env_info(env)->clt_anchor;
+ cl_sync_io_init(anchor, 1, cl_sync_io_end);
}
- info->clt_next_index = index + 1;
- return CLP_GANG_OKAY;
-}
+ rc = cl_lock_enqueue(env, io, lock, anchor);
-static int discard_cb(const struct lu_env *env, struct cl_io *io,
- struct cl_page *page, void *cbdata)
-{
- struct cl_thread_info *info = cl_env_info(env);
- struct cl_lock *lock = cbdata;
+ if (anchor) {
+ int rc2;
- LASSERT(lock->cll_descr.cld_mode >= CLM_WRITE);
- KLASSERT(ergo(page->cp_type == CPT_CACHEABLE,
- !PageWriteback(cl_page_vmpage(env, page))));
- KLASSERT(ergo(page->cp_type == CPT_CACHEABLE,
- !PageDirty(cl_page_vmpage(env, page))));
-
- info->clt_next_index = pgoff_at_lock(page, lock) + 1;
- if (cl_page_own(env, io, page) == 0) {
- /* discard the page */
- cl_page_unmap(env, io, page);
- cl_page_discard(env, io, page);
- cl_page_disown(env, io, page);
- } else {
- LASSERT(page->cp_state == CPS_FREEING);
+ /* drop the reference count held at initialization time */
+ cl_sync_io_note(env, anchor, 0);
+ rc2 = cl_sync_io_wait(env, anchor, 0);
+ if (rc2 < 0 && rc == 0)
+ rc = rc2;
}
- return CLP_GANG_OKAY;
-}
+ if (rc < 0)
+ cl_lock_release(env, lock);
-/**
- * Discard pages protected by the given lock. This function traverses radix
- * tree to find all covering pages and discard them. If a page is being covered
- * by other locks, it should remain in cache.
- *
- * If error happens on any step, the process continues anyway (the reasoning
- * behind this being that lock cancellation cannot be delayed indefinitely).
- */
-int cl_lock_discard_pages(const struct lu_env *env, struct cl_lock *lock)
-{
- struct cl_thread_info *info = cl_env_info(env);
- struct cl_io *io = &info->clt_io;
- struct cl_lock_descr *descr = &lock->cll_descr;
- cl_page_gang_cb_t cb;
- int res;
- int result;
-
- LINVRNT(cl_lock_invariant(env, lock));
-
- io->ci_obj = cl_object_top(descr->cld_obj);
- io->ci_ignore_layout = 1;
- result = cl_io_init(env, io, CIT_MISC, io->ci_obj);
- if (result != 0)
- goto out;
-
- cb = descr->cld_mode == CLM_READ ? check_and_discard_cb : discard_cb;
- info->clt_fn_index = info->clt_next_index = descr->cld_start;
- do {
- res = cl_page_gang_lookup(env, descr->cld_obj, io,
- info->clt_next_index, descr->cld_end,
- cb, (void *)lock);
- if (info->clt_next_index > descr->cld_end)
- break;
-
- if (res == CLP_GANG_RESCHED)
- cond_resched();
- } while (res != CLP_GANG_OKAY);
-out:
- cl_io_fini(env, io);
- return result;
-}
-EXPORT_SYMBOL(cl_lock_discard_pages);
-
-/**
- * Eliminate all locks for a given object.
- *
- * Caller has to guarantee that no lock is in active use.
- *
- * \param cancel when this is set, cl_locks_prune() cancels locks before
- * destroying.
- */
-void cl_locks_prune(const struct lu_env *env, struct cl_object *obj, int cancel)
-{
- struct cl_object_header *head;
- struct cl_lock *lock;
-
- head = cl_object_header(obj);
- /*
- * If locks are destroyed without cancellation, all pages must be
- * already destroyed (as otherwise they will be left unprotected).
- */
- LASSERT(ergo(!cancel,
- !head->coh_tree.rnode && head->coh_pages == 0));
-
- spin_lock(&head->coh_lock_guard);
- while (!list_empty(&head->coh_locks)) {
- lock = container_of(head->coh_locks.next,
- struct cl_lock, cll_linkage);
- cl_lock_get_trust(lock);
- spin_unlock(&head->coh_lock_guard);
- lu_ref_add(&lock->cll_reference, "prune", current);
-
-again:
- cl_lock_mutex_get(env, lock);
- if (lock->cll_state < CLS_FREEING) {
- LASSERT(lock->cll_users <= 1);
- if (unlikely(lock->cll_users == 1)) {
- struct l_wait_info lwi = { 0 };
-
- cl_lock_mutex_put(env, lock);
- l_wait_event(lock->cll_wq,
- lock->cll_users == 0,
- &lwi);
- goto again;
- }
-
- if (cancel)
- cl_lock_cancel(env, lock);
- cl_lock_delete(env, lock);
- }
- cl_lock_mutex_put(env, lock);
- lu_ref_del(&lock->cll_reference, "prune", current);
- cl_lock_put(env, lock);
- spin_lock(&head->coh_lock_guard);
- }
- spin_unlock(&head->coh_lock_guard);
-}
-EXPORT_SYMBOL(cl_locks_prune);
-
-static struct cl_lock *cl_lock_hold_mutex(const struct lu_env *env,
- const struct cl_io *io,
- const struct cl_lock_descr *need,
- const char *scope, const void *source)
-{
- struct cl_lock *lock;
-
- while (1) {
- lock = cl_lock_find(env, io, need);
- if (IS_ERR(lock))
- break;
- cl_lock_mutex_get(env, lock);
- if (lock->cll_state < CLS_FREEING &&
- !(lock->cll_flags & CLF_CANCELLED)) {
- cl_lock_hold_mod(env, lock, 1);
- lu_ref_add(&lock->cll_holders, scope, source);
- lu_ref_add(&lock->cll_reference, scope, source);
- break;
- }
- cl_lock_mutex_put(env, lock);
- cl_lock_put(env, lock);
- }
- return lock;
-}
-
-/**
- * Returns a lock matching \a need description with a reference and a hold on
- * it.
- *
- * This is much like cl_lock_find(), except that cl_lock_hold() additionally
- * guarantees that lock is not in the CLS_FREEING state on return.
- */
-struct cl_lock *cl_lock_hold(const struct lu_env *env, const struct cl_io *io,
- const struct cl_lock_descr *need,
- const char *scope, const void *source)
-{
- struct cl_lock *lock;
-
- lock = cl_lock_hold_mutex(env, io, need, scope, source);
- if (!IS_ERR(lock))
- cl_lock_mutex_put(env, lock);
- return lock;
-}
-EXPORT_SYMBOL(cl_lock_hold);
-
-/**
- * Main high-level entry point of cl_lock interface that finds existing or
- * enqueues new lock matching given description.
- */
-struct cl_lock *cl_lock_request(const struct lu_env *env, struct cl_io *io,
- const struct cl_lock_descr *need,
- const char *scope, const void *source)
-{
- struct cl_lock *lock;
- int rc;
- __u32 enqflags = need->cld_enq_flags;
-
- do {
- lock = cl_lock_hold_mutex(env, io, need, scope, source);
- if (IS_ERR(lock))
- break;
-
- rc = cl_enqueue_locked(env, lock, io, enqflags);
- if (rc == 0) {
- if (cl_lock_fits_into(env, lock, need, io)) {
- if (!(enqflags & CEF_AGL)) {
- cl_lock_mutex_put(env, lock);
- cl_lock_lockdep_acquire(env, lock,
- enqflags);
- break;
- }
- rc = 1;
- }
- cl_unuse_locked(env, lock);
- }
- cl_lock_trace(D_DLMTRACE, env,
- rc <= 0 ? "enqueue failed" : "agl succeed", lock);
- cl_lock_hold_release(env, lock, scope, source);
- cl_lock_mutex_put(env, lock);
- lu_ref_del(&lock->cll_reference, scope, source);
- cl_lock_put(env, lock);
- if (rc > 0) {
- LASSERT(enqflags & CEF_AGL);
- lock = NULL;
- } else if (rc != 0) {
- lock = ERR_PTR(rc);
- }
- } while (rc == 0);
- return lock;
+ return rc;
}
EXPORT_SYMBOL(cl_lock_request);
/**
- * Adds a hold to a known lock.
- */
-void cl_lock_hold_add(const struct lu_env *env, struct cl_lock *lock,
- const char *scope, const void *source)
-{
- LINVRNT(cl_lock_is_mutexed(lock));
- LINVRNT(cl_lock_invariant(env, lock));
- LASSERT(lock->cll_state != CLS_FREEING);
-
- cl_lock_hold_mod(env, lock, 1);
- cl_lock_get(lock);
- lu_ref_add(&lock->cll_holders, scope, source);
- lu_ref_add(&lock->cll_reference, scope, source);
-}
-EXPORT_SYMBOL(cl_lock_hold_add);
-
-/**
- * Releases a hold and a reference on a lock, on which caller acquired a
- * mutex.
- */
-void cl_lock_unhold(const struct lu_env *env, struct cl_lock *lock,
- const char *scope, const void *source)
-{
- LINVRNT(cl_lock_invariant(env, lock));
- cl_lock_hold_release(env, lock, scope, source);
- lu_ref_del(&lock->cll_reference, scope, source);
- cl_lock_put(env, lock);
-}
-EXPORT_SYMBOL(cl_lock_unhold);
-
-/**
* Releases a hold and a reference on a lock, obtained by cl_lock_hold().
*/
-void cl_lock_release(const struct lu_env *env, struct cl_lock *lock,
- const char *scope, const void *source)
+void cl_lock_release(const struct lu_env *env, struct cl_lock *lock)
{
- LINVRNT(cl_lock_invariant(env, lock));
cl_lock_trace(D_DLMTRACE, env, "release lock", lock);
- cl_lock_mutex_get(env, lock);
- cl_lock_hold_release(env, lock, scope, source);
- cl_lock_mutex_put(env, lock);
- lu_ref_del(&lock->cll_reference, scope, source);
- cl_lock_put(env, lock);
+ cl_lock_cancel(env, lock);
+ cl_lock_fini(env, lock);
}
EXPORT_SYMBOL(cl_lock_release);
-void cl_lock_user_add(const struct lu_env *env, struct cl_lock *lock)
-{
- LINVRNT(cl_lock_is_mutexed(lock));
- LINVRNT(cl_lock_invariant(env, lock));
-
- cl_lock_used_mod(env, lock, 1);
-}
-EXPORT_SYMBOL(cl_lock_user_add);
-
-void cl_lock_user_del(const struct lu_env *env, struct cl_lock *lock)
-{
- LINVRNT(cl_lock_is_mutexed(lock));
- LINVRNT(cl_lock_invariant(env, lock));
- LASSERT(lock->cll_users > 0);
-
- cl_lock_used_mod(env, lock, -1);
- if (lock->cll_users == 0)
- wake_up_all(&lock->cll_wq);
-}
-EXPORT_SYMBOL(cl_lock_user_del);
-
const char *cl_lock_mode_name(const enum cl_lock_mode mode)
{
static const char *names[] = {
- [CLM_PHANTOM] = "P",
[CLM_READ] = "R",
[CLM_WRITE] = "W",
[CLM_GROUP] = "G"
@@ -2189,10 +261,8 @@ void cl_lock_print(const struct lu_env *env, void *cookie,
lu_printer_t printer, const struct cl_lock *lock)
{
const struct cl_lock_slice *slice;
- (*printer)(env, cookie, "lock@%p[%d %d %d %d %d %08lx] ",
- lock, atomic_read(&lock->cll_ref),
- lock->cll_state, lock->cll_error, lock->cll_holds,
- lock->cll_users, lock->cll_flags);
+
+ (*printer)(env, cookie, "lock@%p", lock);
cl_lock_descr_print(env, cookie, printer, &lock->cll_descr);
(*printer)(env, cookie, " {\n");
@@ -2207,13 +277,3 @@ void cl_lock_print(const struct lu_env *env, void *cookie,
(*printer)(env, cookie, "} lock@%p\n", lock);
}
EXPORT_SYMBOL(cl_lock_print);
-
-int cl_lock_init(void)
-{
- return lu_kmem_init(cl_lock_caches);
-}
-
-void cl_lock_fini(void)
-{
- lu_kmem_fini(cl_lock_caches);
-}
diff --git a/drivers/staging/lustre/lustre/obdclass/cl_object.c b/drivers/staging/lustre/lustre/obdclass/cl_object.c
index 43e299d4d416..5940f30318ec 100644
--- a/drivers/staging/lustre/lustre/obdclass/cl_object.c
+++ b/drivers/staging/lustre/lustre/obdclass/cl_object.c
@@ -36,6 +36,7 @@
* Client Lustre Object.
*
* Author: Nikita Danilov <nikita.danilov@sun.com>
+ * Author: Jinshan Xiong <jinshan.xiong@intel.com>
*/
/*
@@ -43,8 +44,6 @@
*
* i_mutex
* PG_locked
- * ->coh_page_guard
- * ->coh_lock_guard
* ->coh_attr_guard
* ->ls_guard
*/
@@ -63,10 +62,6 @@
static struct kmem_cache *cl_env_kmem;
-/** Lock class of cl_object_header::coh_page_guard */
-static struct lock_class_key cl_page_guard_class;
-/** Lock class of cl_object_header::coh_lock_guard */
-static struct lock_class_key cl_lock_guard_class;
/** Lock class of cl_object_header::coh_attr_guard */
static struct lock_class_key cl_attr_guard_class;
@@ -81,17 +76,9 @@ int cl_object_header_init(struct cl_object_header *h)
result = lu_object_header_init(&h->coh_lu);
if (result == 0) {
- spin_lock_init(&h->coh_page_guard);
- spin_lock_init(&h->coh_lock_guard);
spin_lock_init(&h->coh_attr_guard);
- lockdep_set_class(&h->coh_page_guard, &cl_page_guard_class);
- lockdep_set_class(&h->coh_lock_guard, &cl_lock_guard_class);
lockdep_set_class(&h->coh_attr_guard, &cl_attr_guard_class);
- h->coh_pages = 0;
- /* XXX hard coded GFP_* mask. */
- INIT_RADIX_TREE(&h->coh_tree, GFP_ATOMIC);
- INIT_LIST_HEAD(&h->coh_locks);
- h->coh_page_bufsize = ALIGN(sizeof(struct cl_page), 8);
+ h->coh_page_bufsize = 0;
}
return result;
}
@@ -145,7 +132,7 @@ EXPORT_SYMBOL(cl_object_get);
/**
* Returns the top-object for a given \a o.
*
- * \see cl_page_top(), cl_io_top()
+ * \see cl_io_top()
*/
struct cl_object *cl_object_top(struct cl_object *o)
{
@@ -315,6 +302,29 @@ int cl_conf_set(const struct lu_env *env, struct cl_object *obj,
EXPORT_SYMBOL(cl_conf_set);
/**
+ * Prunes caches of pages and locks for this object.
+ */
+int cl_object_prune(const struct lu_env *env, struct cl_object *obj)
+{
+ struct lu_object_header *top;
+ struct cl_object *o;
+ int result;
+
+ top = obj->co_lu.lo_header;
+ result = 0;
+ list_for_each_entry(o, &top->loh_layers, co_lu.lo_linkage) {
+ if (o->co_ops->coo_prune) {
+ result = o->co_ops->coo_prune(env, o);
+ if (result != 0)
+ break;
+ }
+ }
+
+ return result;
+}
+EXPORT_SYMBOL(cl_object_prune);
+
+/**
* Helper function removing all object locks, and marking object for
* deletion. All object pages must have been deleted at this point.
*
@@ -323,34 +333,12 @@ EXPORT_SYMBOL(cl_conf_set);
*/
void cl_object_kill(const struct lu_env *env, struct cl_object *obj)
{
- struct cl_object_header *hdr;
-
- hdr = cl_object_header(obj);
- LASSERT(!hdr->coh_tree.rnode);
- LASSERT(hdr->coh_pages == 0);
+ struct cl_object_header *hdr = cl_object_header(obj);
set_bit(LU_OBJECT_HEARD_BANSHEE, &hdr->coh_lu.loh_flags);
- /*
- * Destroy all locks. Object destruction (including cl_inode_fini())
- * cannot cancel the locks, because in the case of a local client,
- * where client and server share the same thread running
- * prune_icache(), this can dead-lock with ldlm_cancel_handler()
- * waiting on __wait_on_freeing_inode().
- */
- cl_locks_prune(env, obj, 0);
}
EXPORT_SYMBOL(cl_object_kill);
-/**
- * Prunes caches of pages and locks for this object.
- */
-void cl_object_prune(const struct lu_env *env, struct cl_object *obj)
-{
- cl_pages_prune(env, obj);
- cl_locks_prune(env, obj, 1);
-}
-EXPORT_SYMBOL(cl_object_prune);
-
void cache_stats_init(struct cache_stats *cs, const char *name)
{
int i;
@@ -383,6 +371,8 @@ static int cache_stats_print(const struct cache_stats *cs,
return 0;
}
+static void cl_env_percpu_refill(void);
+
/**
* Initialize client site.
*
@@ -397,11 +387,9 @@ int cl_site_init(struct cl_site *s, struct cl_device *d)
result = lu_site_init(&s->cs_lu, &d->cd_lu_dev);
if (result == 0) {
cache_stats_init(&s->cs_pages, "pages");
- cache_stats_init(&s->cs_locks, "locks");
for (i = 0; i < ARRAY_SIZE(s->cs_pages_state); ++i)
atomic_set(&s->cs_pages_state[0], 0);
- for (i = 0; i < ARRAY_SIZE(s->cs_locks_state); ++i)
- atomic_set(&s->cs_locks_state[i], 0);
+ cl_env_percpu_refill();
}
return result;
}
@@ -435,15 +423,6 @@ int cl_site_stats_print(const struct cl_site *site, struct seq_file *m)
[CPS_PAGEIN] = "r",
[CPS_FREEING] = "f"
};
- static const char *lstate[] = {
- [CLS_NEW] = "n",
- [CLS_QUEUING] = "q",
- [CLS_ENQUEUED] = "e",
- [CLS_HELD] = "h",
- [CLS_INTRANSIT] = "t",
- [CLS_CACHED] = "c",
- [CLS_FREEING] = "f"
- };
/*
lookup hit total busy create
pages: ...... ...... ...... ...... ...... [...... ...... ...... ......]
@@ -457,12 +436,6 @@ locks: ...... ...... ...... ...... ...... [...... ...... ...... ...... ......]
seq_printf(m, "%s: %u ", pstate[i],
atomic_read(&site->cs_pages_state[i]));
seq_printf(m, "]\n");
- cache_stats_print(&site->cs_locks, m, 0);
- seq_printf(m, " [");
- for (i = 0; i < ARRAY_SIZE(site->cs_locks_state); ++i)
- seq_printf(m, "%s: %u ", lstate[i],
- atomic_read(&site->cs_locks_state[i]));
- seq_printf(m, "]\n");
cache_stats_print(&cl_env_stats, m, 0);
seq_printf(m, "\n");
return 0;
@@ -492,6 +465,13 @@ EXPORT_SYMBOL(cl_site_stats_print);
* bz20044, bz22683.
*/
+static LIST_HEAD(cl_envs);
+static unsigned int cl_envs_cached_nr;
+static unsigned int cl_envs_cached_max = 128; /* XXX: prototype: arbitrary limit
+ * for now.
+ */
+static DEFINE_SPINLOCK(cl_envs_guard);
+
struct cl_env {
void *ce_magic;
struct lu_env ce_lu;
@@ -674,8 +654,9 @@ static struct lu_env *cl_env_new(__u32 ctx_tags, __u32 ses_tags, void *debug)
lu_context_enter(&cle->ce_ses);
env->le_ses = &cle->ce_ses;
cl_env_init0(cle, debug);
- } else
+ } else {
lu_env_fini(env);
+ }
}
if (rc != 0) {
kmem_cache_free(cl_env_kmem, cle);
@@ -684,8 +665,9 @@ static struct lu_env *cl_env_new(__u32 ctx_tags, __u32 ses_tags, void *debug)
CL_ENV_INC(create);
CL_ENV_INC(total);
}
- } else
+ } else {
env = ERR_PTR(-ENOMEM);
+ }
return env;
}
@@ -697,6 +679,39 @@ static void cl_env_fini(struct cl_env *cle)
kmem_cache_free(cl_env_kmem, cle);
}
+static struct lu_env *cl_env_obtain(void *debug)
+{
+ struct cl_env *cle;
+ struct lu_env *env;
+
+ spin_lock(&cl_envs_guard);
+ LASSERT(equi(cl_envs_cached_nr == 0, list_empty(&cl_envs)));
+ if (cl_envs_cached_nr > 0) {
+ int rc;
+
+ cle = container_of(cl_envs.next, struct cl_env, ce_linkage);
+ list_del_init(&cle->ce_linkage);
+ cl_envs_cached_nr--;
+ spin_unlock(&cl_envs_guard);
+
+ env = &cle->ce_lu;
+ rc = lu_env_refill(env);
+ if (rc == 0) {
+ cl_env_init0(cle, debug);
+ lu_context_enter(&env->le_ctx);
+ lu_context_enter(&cle->ce_ses);
+ } else {
+ cl_env_fini(cle);
+ env = ERR_PTR(rc);
+ }
+ } else {
+ spin_unlock(&cl_envs_guard);
+ env = cl_env_new(lu_context_tags_default,
+ lu_session_tags_default, debug);
+ }
+ return env;
+}
+
static inline struct cl_env *cl_env_container(struct lu_env *env)
{
return container_of(env, struct cl_env, ce_lu);
@@ -727,6 +742,8 @@ static struct lu_env *cl_env_peek(int *refcheck)
* Returns lu_env: if there already is an environment associated with the
* current thread, it is returned, otherwise, new environment is allocated.
*
+ * Allocations are amortized through the global cache of environments.
+ *
* \param refcheck pointer to a counter used to detect environment leaks. In
* the usual case cl_env_get() and cl_env_put() are called in the same lexical
* scope and pointer to the same integer is passed as \a refcheck. This is
@@ -740,10 +757,7 @@ struct lu_env *cl_env_get(int *refcheck)
env = cl_env_peek(refcheck);
if (!env) {
- env = cl_env_new(lu_context_tags_default,
- lu_session_tags_default,
- __builtin_return_address(0));
-
+ env = cl_env_obtain(__builtin_return_address(0));
if (!IS_ERR(env)) {
struct cl_env *cle;
@@ -787,6 +801,32 @@ static void cl_env_exit(struct cl_env *cle)
}
/**
+ * Finalizes and frees a given number of cached environments. This is done to
+ * (1) free some memory (not currently hooked into VM), or (2) release
+ * references to modules.
+ */
+unsigned int cl_env_cache_purge(unsigned int nr)
+{
+ struct cl_env *cle;
+
+ spin_lock(&cl_envs_guard);
+ for (; !list_empty(&cl_envs) && nr > 0; --nr) {
+ cle = container_of(cl_envs.next, struct cl_env, ce_linkage);
+ list_del_init(&cle->ce_linkage);
+ LASSERT(cl_envs_cached_nr > 0);
+ cl_envs_cached_nr--;
+ spin_unlock(&cl_envs_guard);
+
+ cl_env_fini(cle);
+ spin_lock(&cl_envs_guard);
+ }
+ LASSERT(equi(cl_envs_cached_nr == 0, list_empty(&cl_envs)));
+ spin_unlock(&cl_envs_guard);
+ return nr;
+}
+EXPORT_SYMBOL(cl_env_cache_purge);
+
+/**
* Release an environment.
*
* Decrement \a env reference counter. When counter drops to 0, nothing in
@@ -808,7 +848,22 @@ void cl_env_put(struct lu_env *env, int *refcheck)
cl_env_detach(cle);
cle->ce_debug = NULL;
cl_env_exit(cle);
- cl_env_fini(cle);
+ /*
+ * Don't bother to take a lock here.
+ *
+ * Return environment to the cache only when it was allocated
+ * with the standard tags.
+ */
+ if (cl_envs_cached_nr < cl_envs_cached_max &&
+ (env->le_ctx.lc_tags & ~LCT_HAS_EXIT) == LCT_CL_THREAD &&
+ (env->le_ses->lc_tags & ~LCT_HAS_EXIT) == LCT_SESSION) {
+ spin_lock(&cl_envs_guard);
+ list_add(&cle->ce_linkage, &cl_envs);
+ cl_envs_cached_nr++;
+ spin_unlock(&cl_envs_guard);
+ } else {
+ cl_env_fini(cle);
+ }
}
}
EXPORT_SYMBOL(cl_env_put);
@@ -914,6 +969,104 @@ void cl_lvb2attr(struct cl_attr *attr, const struct ost_lvb *lvb)
}
EXPORT_SYMBOL(cl_lvb2attr);
+static struct cl_env cl_env_percpu[NR_CPUS];
+
+static int cl_env_percpu_init(void)
+{
+ struct cl_env *cle;
+ int tags = LCT_REMEMBER | LCT_NOREF;
+ int i, j;
+ int rc = 0;
+
+ for_each_possible_cpu(i) {
+ struct lu_env *env;
+
+ cle = &cl_env_percpu[i];
+ env = &cle->ce_lu;
+
+ INIT_LIST_HEAD(&cle->ce_linkage);
+ cle->ce_magic = &cl_env_init0;
+ rc = lu_env_init(env, LCT_CL_THREAD | tags);
+ if (rc == 0) {
+ rc = lu_context_init(&cle->ce_ses, LCT_SESSION | tags);
+ if (rc == 0) {
+ lu_context_enter(&cle->ce_ses);
+ env->le_ses = &cle->ce_ses;
+ } else {
+ lu_env_fini(env);
+ }
+ }
+ if (rc != 0)
+ break;
+ }
+ if (rc != 0) {
+ /* Indices 0 to i (excluding i) were correctly initialized,
+ * thus we must uninitialize up to i, the rest are undefined.
+ */
+ for (j = 0; j < i; j++) {
+ cle = &cl_env_percpu[i];
+ lu_context_exit(&cle->ce_ses);
+ lu_context_fini(&cle->ce_ses);
+ lu_env_fini(&cle->ce_lu);
+ }
+ }
+
+ return rc;
+}
+
+static void cl_env_percpu_fini(void)
+{
+ int i;
+
+ for_each_possible_cpu(i) {
+ struct cl_env *cle = &cl_env_percpu[i];
+
+ lu_context_exit(&cle->ce_ses);
+ lu_context_fini(&cle->ce_ses);
+ lu_env_fini(&cle->ce_lu);
+ }
+}
+
+static void cl_env_percpu_refill(void)
+{
+ int i;
+
+ for_each_possible_cpu(i)
+ lu_env_refill(&cl_env_percpu[i].ce_lu);
+}
+
+void cl_env_percpu_put(struct lu_env *env)
+{
+ struct cl_env *cle;
+ int cpu;
+
+ cpu = smp_processor_id();
+ cle = cl_env_container(env);
+ LASSERT(cle == &cl_env_percpu[cpu]);
+
+ cle->ce_ref--;
+ LASSERT(cle->ce_ref == 0);
+
+ CL_ENV_DEC(busy);
+ cl_env_detach(cle);
+ cle->ce_debug = NULL;
+
+ put_cpu();
+}
+EXPORT_SYMBOL(cl_env_percpu_put);
+
+struct lu_env *cl_env_percpu_get(void)
+{
+ struct cl_env *cle;
+
+ cle = &cl_env_percpu[get_cpu()];
+ cl_env_init0(cle, __builtin_return_address(0));
+
+ cl_env_attach(cle);
+ return &cle->ce_lu;
+}
+EXPORT_SYMBOL(cl_env_percpu_get);
+
/*****************************************************************************
*
* Temporary prototype thing: mirror obd-devices into cl devices.
@@ -944,8 +1097,9 @@ struct cl_device *cl_type_setup(const struct lu_env *env, struct lu_site *site,
CERROR("can't init device '%s', %d\n", typename, rc);
d = ERR_PTR(rc);
}
- } else
+ } else {
CERROR("Cannot allocate device: '%s'\n", typename);
+ }
return lu2cl_dev(d);
}
EXPORT_SYMBOL(cl_type_setup);
@@ -959,12 +1113,6 @@ void cl_stack_fini(const struct lu_env *env, struct cl_device *cl)
}
EXPORT_SYMBOL(cl_stack_fini);
-int cl_lock_init(void);
-void cl_lock_fini(void);
-
-int cl_page_init(void);
-void cl_page_fini(void);
-
static struct lu_context_key cl_key;
struct cl_thread_info *cl_env_info(const struct lu_env *env)
@@ -1059,17 +1207,13 @@ int cl_global_init(void)
if (result)
goto out_kmem;
- result = cl_lock_init();
+ result = cl_env_percpu_init();
if (result)
+ /* no cl_env_percpu_fini on error */
goto out_context;
- result = cl_page_init();
- if (result)
- goto out_lock;
-
return 0;
-out_lock:
- cl_lock_fini();
+
out_context:
lu_context_key_degister(&cl_key);
out_kmem:
@@ -1084,8 +1228,7 @@ out_store:
*/
void cl_global_fini(void)
{
- cl_lock_fini();
- cl_page_fini();
+ cl_env_percpu_fini();
lu_context_key_degister(&cl_key);
lu_kmem_fini(cl_object_caches);
cl_env_store_fini();
diff --git a/drivers/staging/lustre/lustre/obdclass/cl_page.c b/drivers/staging/lustre/lustre/obdclass/cl_page.c
index 394580016638..b754f516e557 100644
--- a/drivers/staging/lustre/lustre/obdclass/cl_page.c
+++ b/drivers/staging/lustre/lustre/obdclass/cl_page.c
@@ -36,6 +36,7 @@
* Client Lustre Page.
*
* Author: Nikita Danilov <nikita.danilov@sun.com>
+ * Author: Jinshan Xiong <jinshan.xiong@intel.com>
*/
#define DEBUG_SUBSYSTEM S_CLASS
@@ -48,8 +49,7 @@
#include "../include/cl_object.h"
#include "cl_internal.h"
-static void cl_page_delete0(const struct lu_env *env, struct cl_page *pg,
- int radix);
+static void cl_page_delete0(const struct lu_env *env, struct cl_page *pg);
# define PASSERT(env, page, expr) \
do { \
@@ -63,24 +63,11 @@ static void cl_page_delete0(const struct lu_env *env, struct cl_page *pg,
((void)sizeof(env), (void)sizeof(page), (void)sizeof !!(exp))
/**
- * Internal version of cl_page_top, it should be called if the page is
- * known to be not freed, says with page referenced, or radix tree lock held,
- * or page owned.
- */
-static struct cl_page *cl_page_top_trusted(struct cl_page *page)
-{
- while (page->cp_parent)
- page = page->cp_parent;
- return page;
-}
-
-/**
* Internal version of cl_page_get().
*
* This function can be used to obtain initial reference to previously
* unreferenced cached object. It can be called only if concurrent page
- * reclamation is somehow prevented, e.g., by locking page radix-tree
- * (cl_object_header::hdr->coh_page_guard), or by keeping a lock on a VM page,
+ * reclamation is somehow prevented, e.g., by keeping a lock on a VM page,
* associated with \a page.
*
* Use with care! Not exported.
@@ -103,142 +90,12 @@ cl_page_at_trusted(const struct cl_page *page,
{
const struct cl_page_slice *slice;
- page = cl_page_top_trusted((struct cl_page *)page);
- do {
- list_for_each_entry(slice, &page->cp_layers, cpl_linkage) {
- if (slice->cpl_obj->co_lu.lo_dev->ld_type == dtype)
- return slice;
- }
- page = page->cp_child;
- } while (page);
- return NULL;
-}
-
-/**
- * Returns a page with given index in the given object, or NULL if no page is
- * found. Acquires a reference on \a page.
- *
- * Locking: called under cl_object_header::coh_page_guard spin-lock.
- */
-struct cl_page *cl_page_lookup(struct cl_object_header *hdr, pgoff_t index)
-{
- struct cl_page *page;
-
- assert_spin_locked(&hdr->coh_page_guard);
-
- page = radix_tree_lookup(&hdr->coh_tree, index);
- if (page)
- cl_page_get_trust(page);
- return page;
-}
-EXPORT_SYMBOL(cl_page_lookup);
-
-/**
- * Returns a list of pages by a given [start, end] of \a obj.
- *
- * \param resched If not NULL, then we give up before hogging CPU for too
- * long and set *resched = 1, in that case caller should implement a retry
- * logic.
- *
- * Gang tree lookup (radix_tree_gang_lookup()) optimization is absolutely
- * crucial in the face of [offset, EOF] locks.
- *
- * Return at least one page in @queue unless there is no covered page.
- */
-int cl_page_gang_lookup(const struct lu_env *env, struct cl_object *obj,
- struct cl_io *io, pgoff_t start, pgoff_t end,
- cl_page_gang_cb_t cb, void *cbdata)
-{
- struct cl_object_header *hdr;
- struct cl_page *page;
- struct cl_page **pvec;
- const struct cl_page_slice *slice;
- const struct lu_device_type *dtype;
- pgoff_t idx;
- unsigned int nr;
- unsigned int i;
- unsigned int j;
- int res = CLP_GANG_OKAY;
- int tree_lock = 1;
-
- idx = start;
- hdr = cl_object_header(obj);
- pvec = cl_env_info(env)->clt_pvec;
- dtype = cl_object_top(obj)->co_lu.lo_dev->ld_type;
- spin_lock(&hdr->coh_page_guard);
- while ((nr = radix_tree_gang_lookup(&hdr->coh_tree, (void **)pvec,
- idx, CLT_PVEC_SIZE)) > 0) {
- int end_of_region = 0;
-
- idx = pvec[nr - 1]->cp_index + 1;
- for (i = 0, j = 0; i < nr; ++i) {
- page = pvec[i];
- pvec[i] = NULL;
-
- LASSERT(page->cp_type == CPT_CACHEABLE);
- if (page->cp_index > end) {
- end_of_region = 1;
- break;
- }
- if (page->cp_state == CPS_FREEING)
- continue;
-
- slice = cl_page_at_trusted(page, dtype);
- /*
- * Pages for lsm-less file has no underneath sub-page
- * for osc, in case of ...
- */
- PASSERT(env, page, slice);
-
- page = slice->cpl_page;
- /*
- * Can safely call cl_page_get_trust() under
- * radix-tree spin-lock.
- *
- * XXX not true, because @page is from object another
- * than @hdr and protected by different tree lock.
- */
- cl_page_get_trust(page);
- lu_ref_add_atomic(&page->cp_reference,
- "gang_lookup", current);
- pvec[j++] = page;
- }
-
- /*
- * Here a delicate locking dance is performed. Current thread
- * holds a reference to a page, but has to own it before it
- * can be placed into queue. Owning implies waiting, so
- * radix-tree lock is to be released. After a wait one has to
- * check that pages weren't truncated (cl_page_own() returns
- * error in the latter case).
- */
- spin_unlock(&hdr->coh_page_guard);
- tree_lock = 0;
-
- for (i = 0; i < j; ++i) {
- page = pvec[i];
- if (res == CLP_GANG_OKAY)
- res = (*cb)(env, io, page, cbdata);
- lu_ref_del(&page->cp_reference,
- "gang_lookup", current);
- cl_page_put(env, page);
- }
- if (nr < CLT_PVEC_SIZE || end_of_region)
- break;
-
- if (res == CLP_GANG_OKAY && need_resched())
- res = CLP_GANG_RESCHED;
- if (res != CLP_GANG_OKAY)
- break;
-
- spin_lock(&hdr->coh_page_guard);
- tree_lock = 1;
+ list_for_each_entry(slice, &page->cp_layers, cpl_linkage) {
+ if (slice->cpl_obj->co_lu.lo_dev->ld_type == dtype)
+ return slice;
}
- if (tree_lock)
- spin_unlock(&hdr->coh_page_guard);
- return res;
+ return NULL;
}
-EXPORT_SYMBOL(cl_page_gang_lookup);
static void cl_page_free(const struct lu_env *env, struct cl_page *page)
{
@@ -247,17 +104,16 @@ static void cl_page_free(const struct lu_env *env, struct cl_page *page)
PASSERT(env, page, list_empty(&page->cp_batch));
PASSERT(env, page, !page->cp_owner);
PASSERT(env, page, !page->cp_req);
- PASSERT(env, page, !page->cp_parent);
PASSERT(env, page, page->cp_state == CPS_FREEING);
- might_sleep();
while (!list_empty(&page->cp_layers)) {
struct cl_page_slice *slice;
slice = list_entry(page->cp_layers.next,
struct cl_page_slice, cpl_linkage);
list_del_init(page->cp_layers.next);
- slice->cpl_ops->cpo_fini(env, slice);
+ if (unlikely(slice->cpl_ops->cpo_fini))
+ slice->cpl_ops->cpo_fini(env, slice);
}
lu_object_ref_del_at(&obj->co_lu, &page->cp_obj_ref, "cl_page", page);
cl_object_put(env, obj);
@@ -276,10 +132,10 @@ static inline void cl_page_state_set_trust(struct cl_page *page,
*(enum cl_page_state *)&page->cp_state = state;
}
-static struct cl_page *cl_page_alloc(const struct lu_env *env,
- struct cl_object *o, pgoff_t ind,
- struct page *vmpage,
- enum cl_page_type type)
+struct cl_page *cl_page_alloc(const struct lu_env *env,
+ struct cl_object *o, pgoff_t ind,
+ struct page *vmpage,
+ enum cl_page_type type)
{
struct cl_page *page;
struct lu_object_header *head;
@@ -289,13 +145,11 @@ static struct cl_page *cl_page_alloc(const struct lu_env *env,
int result = 0;
atomic_set(&page->cp_ref, 1);
- if (type == CPT_CACHEABLE) /* for radix tree */
- atomic_inc(&page->cp_ref);
page->cp_obj = o;
cl_object_get(o);
lu_object_ref_add_at(&o->co_lu, &page->cp_obj_ref, "cl_page",
page);
- page->cp_index = ind;
+ page->cp_vmpage = vmpage;
cl_page_state_set_trust(page, CPS_CACHED);
page->cp_type = type;
INIT_LIST_HEAD(&page->cp_layers);
@@ -306,10 +160,10 @@ static struct cl_page *cl_page_alloc(const struct lu_env *env,
head = o->co_lu.lo_header;
list_for_each_entry(o, &head->loh_layers, co_lu.lo_linkage) {
if (o->co_ops->coo_page_init) {
- result = o->co_ops->coo_page_init(env, o,
- page, vmpage);
+ result = o->co_ops->coo_page_init(env, o, page,
+ ind);
if (result != 0) {
- cl_page_delete0(env, page, 0);
+ cl_page_delete0(env, page);
cl_page_free(env, page);
page = ERR_PTR(result);
break;
@@ -321,6 +175,7 @@ static struct cl_page *cl_page_alloc(const struct lu_env *env,
}
return page;
}
+EXPORT_SYMBOL(cl_page_alloc);
/**
* Returns a cl_page with index \a idx at the object \a o, and associated with
@@ -333,16 +188,13 @@ static struct cl_page *cl_page_alloc(const struct lu_env *env,
*
* \see cl_object_find(), cl_lock_find()
*/
-static struct cl_page *cl_page_find0(const struct lu_env *env,
- struct cl_object *o,
- pgoff_t idx, struct page *vmpage,
- enum cl_page_type type,
- struct cl_page *parent)
+struct cl_page *cl_page_find(const struct lu_env *env,
+ struct cl_object *o,
+ pgoff_t idx, struct page *vmpage,
+ enum cl_page_type type)
{
struct cl_page *page = NULL;
- struct cl_page *ghost = NULL;
struct cl_object_header *hdr;
- int err;
LASSERT(type == CPT_CACHEABLE || type == CPT_TRANSIENT);
might_sleep();
@@ -368,120 +220,25 @@ static struct cl_page *cl_page_find0(const struct lu_env *env,
* reference on it.
*/
page = cl_vmpage_page(vmpage, o);
- PINVRNT(env, page,
- ergo(page,
- cl_page_vmpage(env, page) == vmpage &&
- (void *)radix_tree_lookup(&hdr->coh_tree,
- idx) == page));
- }
- if (page)
- return page;
+ if (page)
+ return page;
+ }
/* allocate and initialize cl_page */
page = cl_page_alloc(env, o, idx, vmpage, type);
- if (IS_ERR(page))
- return page;
-
- if (type == CPT_TRANSIENT) {
- if (parent) {
- LASSERT(!page->cp_parent);
- page->cp_parent = parent;
- parent->cp_child = page;
- }
- return page;
- }
-
- /*
- * XXX optimization: use radix_tree_preload() here, and change tree
- * gfp mask to GFP_KERNEL in cl_object_header_init().
- */
- spin_lock(&hdr->coh_page_guard);
- err = radix_tree_insert(&hdr->coh_tree, idx, page);
- if (err != 0) {
- ghost = page;
- /*
- * Noted by Jay: a lock on \a vmpage protects cl_page_find()
- * from this race, but
- *
- * 0. it's better to have cl_page interface "locally
- * consistent" so that its correctness can be reasoned
- * about without appealing to the (obscure world of) VM
- * locking.
- *
- * 1. handling this race allows ->coh_tree to remain
- * consistent even when VM locking is somehow busted,
- * which is very useful during diagnosing and debugging.
- */
- page = ERR_PTR(err);
- CL_PAGE_DEBUG(D_ERROR, env, ghost,
- "fail to insert into radix tree: %d\n", err);
- } else {
- if (parent) {
- LASSERT(!page->cp_parent);
- page->cp_parent = parent;
- parent->cp_child = page;
- }
- hdr->coh_pages++;
- }
- spin_unlock(&hdr->coh_page_guard);
-
- if (unlikely(ghost)) {
- cl_page_delete0(env, ghost, 0);
- cl_page_free(env, ghost);
- }
return page;
}
-
-struct cl_page *cl_page_find(const struct lu_env *env, struct cl_object *o,
- pgoff_t idx, struct page *vmpage,
- enum cl_page_type type)
-{
- return cl_page_find0(env, o, idx, vmpage, type, NULL);
-}
EXPORT_SYMBOL(cl_page_find);
-struct cl_page *cl_page_find_sub(const struct lu_env *env, struct cl_object *o,
- pgoff_t idx, struct page *vmpage,
- struct cl_page *parent)
-{
- return cl_page_find0(env, o, idx, vmpage, parent->cp_type, parent);
-}
-EXPORT_SYMBOL(cl_page_find_sub);
-
static inline int cl_page_invariant(const struct cl_page *pg)
{
- struct cl_object_header *header;
- struct cl_page *parent;
- struct cl_page *child;
- struct cl_io *owner;
-
/*
* Page invariant is protected by a VM lock.
*/
LINVRNT(cl_page_is_vmlocked(NULL, pg));
- header = cl_object_header(pg->cp_obj);
- parent = pg->cp_parent;
- child = pg->cp_child;
- owner = pg->cp_owner;
-
- return cl_page_in_use(pg) &&
- ergo(parent, parent->cp_child == pg) &&
- ergo(child, child->cp_parent == pg) &&
- ergo(child, pg->cp_obj != child->cp_obj) &&
- ergo(parent, pg->cp_obj != parent->cp_obj) &&
- ergo(owner && parent,
- parent->cp_owner == pg->cp_owner->ci_parent) &&
- ergo(owner && child, child->cp_owner->ci_parent == owner) &&
- /*
- * Either page is early in initialization (has neither child
- * nor parent yet), or it is in the object radix tree.
- */
- ergo(pg->cp_state < CPS_FREEING && pg->cp_type == CPT_CACHEABLE,
- (void *)radix_tree_lookup(&header->coh_tree,
- pg->cp_index) == pg ||
- (!child && !parent));
+ return cl_page_in_use_noref(pg);
}
static void cl_page_state_set0(const struct lu_env *env,
@@ -534,13 +291,9 @@ static void cl_page_state_set0(const struct lu_env *env,
old = page->cp_state;
PASSERT(env, page, allowed_transitions[old][state]);
CL_PAGE_HEADER(D_TRACE, env, page, "%d -> %d\n", old, state);
- for (; page; page = page->cp_child) {
- PASSERT(env, page, page->cp_state == old);
- PASSERT(env, page,
- equi(state == CPS_OWNED, page->cp_owner));
-
- cl_page_state_set_trust(page, state);
- }
+ PASSERT(env, page, page->cp_state == old);
+ PASSERT(env, page, equi(state == CPS_OWNED, page->cp_owner));
+ cl_page_state_set_trust(page, state);
}
static void cl_page_state_set(const struct lu_env *env,
@@ -574,8 +327,6 @@ EXPORT_SYMBOL(cl_page_get);
*/
void cl_page_put(const struct lu_env *env, struct cl_page *page)
{
- PASSERT(env, page, atomic_read(&page->cp_ref) > !!page->cp_parent);
-
CL_PAGE_HEADER(D_TRACE, env, page, "%d\n",
atomic_read(&page->cp_ref));
@@ -595,34 +346,10 @@ void cl_page_put(const struct lu_env *env, struct cl_page *page)
EXPORT_SYMBOL(cl_page_put);
/**
- * Returns a VM page associated with a given cl_page.
- */
-struct page *cl_page_vmpage(const struct lu_env *env, struct cl_page *page)
-{
- const struct cl_page_slice *slice;
-
- /*
- * Find uppermost layer with ->cpo_vmpage() method, and return its
- * result.
- */
- page = cl_page_top(page);
- do {
- list_for_each_entry(slice, &page->cp_layers, cpl_linkage) {
- if (slice->cpl_ops->cpo_vmpage)
- return slice->cpl_ops->cpo_vmpage(env, slice);
- }
- page = page->cp_child;
- } while (page);
- LBUG(); /* ->cpo_vmpage() has to be defined somewhere in the stack */
-}
-EXPORT_SYMBOL(cl_page_vmpage);
-
-/**
* Returns a cl_page associated with a VM page, and given cl_object.
*/
struct cl_page *cl_vmpage_page(struct page *vmpage, struct cl_object *obj)
{
- struct cl_page *top;
struct cl_page *page;
KLASSERT(PageLocked(vmpage));
@@ -633,36 +360,15 @@ struct cl_page *cl_vmpage_page(struct page *vmpage, struct cl_object *obj)
* bottom-to-top pass.
*/
- /*
- * This loop assumes that ->private points to the top-most page. This
- * can be rectified easily.
- */
- top = (struct cl_page *)vmpage->private;
- if (!top)
- return NULL;
-
- for (page = top; page; page = page->cp_child) {
- if (cl_object_same(page->cp_obj, obj)) {
- cl_page_get_trust(page);
- break;
- }
+ page = (struct cl_page *)vmpage->private;
+ if (page) {
+ cl_page_get_trust(page);
+ LASSERT(page->cp_type == CPT_CACHEABLE);
}
- LASSERT(ergo(page, page->cp_type == CPT_CACHEABLE));
return page;
}
EXPORT_SYMBOL(cl_vmpage_page);
-/**
- * Returns the top-page for a given page.
- *
- * \see cl_object_top(), cl_io_top()
- */
-struct cl_page *cl_page_top(struct cl_page *page)
-{
- return cl_page_top_trusted(page);
-}
-EXPORT_SYMBOL(cl_page_top);
-
const struct cl_page_slice *cl_page_at(const struct cl_page *page,
const struct lu_device_type *dtype)
{
@@ -682,26 +388,43 @@ EXPORT_SYMBOL(cl_page_at);
int (*__method)_proto; \
\
__result = 0; \
- __page = cl_page_top(__page); \
- do { \
- list_for_each_entry(__scan, &__page->cp_layers, \
- cpl_linkage) { \
- __method = *(void **)((char *)__scan->cpl_ops + \
- __op); \
- if (__method) { \
- __result = (*__method)(__env, __scan, \
- ## __VA_ARGS__); \
- if (__result != 0) \
- break; \
- } \
- } \
- __page = __page->cp_child; \
- } while (__page && __result == 0); \
+ list_for_each_entry(__scan, &__page->cp_layers, cpl_linkage) { \
+ __method = *(void **)((char *)__scan->cpl_ops + __op); \
+ if (__method) { \
+ __result = (*__method)(__env, __scan, ## __VA_ARGS__); \
+ if (__result != 0) \
+ break; \
+ } \
+ } \
if (__result > 0) \
__result = 0; \
__result; \
})
+#define CL_PAGE_INVOKE_REVERSE(_env, _page, _op, _proto, ...) \
+({ \
+ const struct lu_env *__env = (_env); \
+ struct cl_page *__page = (_page); \
+ const struct cl_page_slice *__scan; \
+ int __result; \
+ ptrdiff_t __op = (_op); \
+ int (*__method)_proto; \
+ \
+ __result = 0; \
+ list_for_each_entry_reverse(__scan, &__page->cp_layers, \
+ cpl_linkage) { \
+ __method = *(void **)((char *)__scan->cpl_ops + __op); \
+ if (__method) { \
+ __result = (*__method)(__env, __scan, ## __VA_ARGS__); \
+ if (__result != 0) \
+ break; \
+ } \
+ } \
+ if (__result > 0) \
+ __result = 0; \
+ __result; \
+})
+
#define CL_PAGE_INVOID(_env, _page, _op, _proto, ...) \
do { \
const struct lu_env *__env = (_env); \
@@ -710,18 +433,11 @@ do { \
ptrdiff_t __op = (_op); \
void (*__method)_proto; \
\
- __page = cl_page_top(__page); \
- do { \
- list_for_each_entry(__scan, &__page->cp_layers, \
- cpl_linkage) { \
- __method = *(void **)((char *)__scan->cpl_ops + \
- __op); \
- if (__method) \
- (*__method)(__env, __scan, \
- ## __VA_ARGS__); \
- } \
- __page = __page->cp_child; \
- } while (__page); \
+ list_for_each_entry(__scan, &__page->cp_layers, cpl_linkage) { \
+ __method = *(void **)((char *)__scan->cpl_ops + __op); \
+ if (__method) \
+ (*__method)(__env, __scan, ## __VA_ARGS__); \
+ } \
} while (0)
#define CL_PAGE_INVOID_REVERSE(_env, _page, _op, _proto, ...) \
@@ -732,20 +448,11 @@ do { \
ptrdiff_t __op = (_op); \
void (*__method)_proto; \
\
- /* get to the bottom page. */ \
- while (__page->cp_child) \
- __page = __page->cp_child; \
- do { \
- list_for_each_entry_reverse(__scan, &__page->cp_layers, \
- cpl_linkage) { \
- __method = *(void **)((char *)__scan->cpl_ops + \
- __op); \
- if (__method) \
- (*__method)(__env, __scan, \
- ## __VA_ARGS__); \
- } \
- __page = __page->cp_parent; \
- } while (__page); \
+ list_for_each_entry_reverse(__scan, &__page->cp_layers, cpl_linkage) { \
+ __method = *(void **)((char *)__scan->cpl_ops + __op); \
+ if (__method) \
+ (*__method)(__env, __scan, ## __VA_ARGS__); \
+ } \
} while (0)
static int cl_page_invoke(const struct lu_env *env,
@@ -771,20 +478,17 @@ static void cl_page_invoid(const struct lu_env *env,
static void cl_page_owner_clear(struct cl_page *page)
{
- for (page = cl_page_top(page); page; page = page->cp_child) {
- if (page->cp_owner) {
- LASSERT(page->cp_owner->ci_owned_nr > 0);
- page->cp_owner->ci_owned_nr--;
- page->cp_owner = NULL;
- page->cp_task = NULL;
- }
+ if (page->cp_owner) {
+ LASSERT(page->cp_owner->ci_owned_nr > 0);
+ page->cp_owner->ci_owned_nr--;
+ page->cp_owner = NULL;
+ page->cp_task = NULL;
}
}
static void cl_page_owner_set(struct cl_page *page)
{
- for (page = cl_page_top(page); page; page = page->cp_child)
- page->cp_owner->ci_owned_nr++;
+ page->cp_owner->ci_owned_nr++;
}
void cl_page_disown0(const struct lu_env *env,
@@ -794,7 +498,7 @@ void cl_page_disown0(const struct lu_env *env,
state = pg->cp_state;
PINVRNT(env, pg, state == CPS_OWNED || state == CPS_FREEING);
- PINVRNT(env, pg, cl_page_invariant(pg));
+ PINVRNT(env, pg, cl_page_invariant(pg) || state == CPS_FREEING);
cl_page_owner_clear(pg);
if (state == CPS_OWNED)
@@ -815,8 +519,9 @@ void cl_page_disown0(const struct lu_env *env,
*/
int cl_page_is_owned(const struct cl_page *pg, const struct cl_io *io)
{
+ struct cl_io *top = cl_io_top((struct cl_io *)io);
LINVRNT(cl_object_same(pg->cp_obj, io->ci_obj));
- return pg->cp_state == CPS_OWNED && pg->cp_owner == io;
+ return pg->cp_state == CPS_OWNED && pg->cp_owner == top;
}
EXPORT_SYMBOL(cl_page_is_owned);
@@ -847,7 +552,6 @@ static int cl_page_own0(const struct lu_env *env, struct cl_io *io,
PINVRNT(env, pg, !cl_page_is_owned(pg, io));
- pg = cl_page_top(pg);
io = cl_io_top(io);
if (pg->cp_state == CPS_FREEING) {
@@ -861,7 +565,7 @@ static int cl_page_own0(const struct lu_env *env, struct cl_io *io,
if (result == 0) {
PASSERT(env, pg, !pg->cp_owner);
PASSERT(env, pg, !pg->cp_req);
- pg->cp_owner = io;
+ pg->cp_owner = cl_io_top(io);
pg->cp_task = current;
cl_page_owner_set(pg);
if (pg->cp_state != CPS_FREEING) {
@@ -914,12 +618,11 @@ void cl_page_assume(const struct lu_env *env,
{
PINVRNT(env, pg, cl_object_same(pg->cp_obj, io->ci_obj));
- pg = cl_page_top(pg);
io = cl_io_top(io);
cl_page_invoid(env, io, pg, CL_PAGE_OP(cpo_assume));
PASSERT(env, pg, !pg->cp_owner);
- pg->cp_owner = io;
+ pg->cp_owner = cl_io_top(io);
pg->cp_task = current;
cl_page_owner_set(pg);
cl_page_state_set(env, pg, CPS_OWNED);
@@ -943,7 +646,6 @@ void cl_page_unassume(const struct lu_env *env,
PINVRNT(env, pg, cl_page_is_owned(pg, io));
PINVRNT(env, pg, cl_page_invariant(pg));
- pg = cl_page_top(pg);
io = cl_io_top(io);
cl_page_owner_clear(pg);
cl_page_state_set(env, pg, CPS_CACHED);
@@ -968,9 +670,9 @@ EXPORT_SYMBOL(cl_page_unassume);
void cl_page_disown(const struct lu_env *env,
struct cl_io *io, struct cl_page *pg)
{
- PINVRNT(env, pg, cl_page_is_owned(pg, io));
+ PINVRNT(env, pg, cl_page_is_owned(pg, io) ||
+ pg->cp_state == CPS_FREEING);
- pg = cl_page_top(pg);
io = cl_io_top(io);
cl_page_disown0(env, io, pg);
}
@@ -1001,12 +703,8 @@ EXPORT_SYMBOL(cl_page_discard);
* pages, e.g,. in a error handling cl_page_find()->cl_page_delete0()
* path. Doesn't check page invariant.
*/
-static void cl_page_delete0(const struct lu_env *env, struct cl_page *pg,
- int radix)
+static void cl_page_delete0(const struct lu_env *env, struct cl_page *pg)
{
- struct cl_page *tmp = pg;
-
- PASSERT(env, pg, pg == cl_page_top(pg));
PASSERT(env, pg, pg->cp_state != CPS_FREEING);
/*
@@ -1014,41 +712,11 @@ static void cl_page_delete0(const struct lu_env *env, struct cl_page *pg,
*/
cl_page_owner_clear(pg);
- /*
- * unexport the page firstly before freeing it so that
- * the page content is considered to be invalid.
- * We have to do this because a CPS_FREEING cl_page may
- * be NOT under the protection of a cl_lock.
- * Afterwards, if this page is found by other threads, then this
- * page will be forced to reread.
- */
- cl_page_export(env, pg, 0);
cl_page_state_set0(env, pg, CPS_FREEING);
- CL_PAGE_INVOID(env, pg, CL_PAGE_OP(cpo_delete),
- (const struct lu_env *, const struct cl_page_slice *));
-
- if (tmp->cp_type == CPT_CACHEABLE) {
- if (!radix)
- /* !radix means that @pg is not yet in the radix tree,
- * skip removing it.
- */
- tmp = pg->cp_child;
- for (; tmp; tmp = tmp->cp_child) {
- void *value;
- struct cl_object_header *hdr;
-
- hdr = cl_object_header(tmp->cp_obj);
- spin_lock(&hdr->coh_page_guard);
- value = radix_tree_delete(&hdr->coh_tree,
- tmp->cp_index);
- PASSERT(env, tmp, value == tmp);
- PASSERT(env, tmp, hdr->coh_pages > 0);
- hdr->coh_pages--;
- spin_unlock(&hdr->coh_page_guard);
- cl_page_put(env, tmp);
- }
- }
+ CL_PAGE_INVOID_REVERSE(env, pg, CL_PAGE_OP(cpo_delete),
+ (const struct lu_env *,
+ const struct cl_page_slice *));
}
/**
@@ -1070,7 +738,6 @@ static void cl_page_delete0(const struct lu_env *env, struct cl_page *pg,
* Once page reaches cl_page_state::CPS_FREEING, all remaining references will
* drain after some time, at which point page will be recycled.
*
- * \pre pg == cl_page_top(pg)
* \pre VM page is locked
* \post pg->cp_state == CPS_FREEING
*
@@ -1079,30 +746,11 @@ static void cl_page_delete0(const struct lu_env *env, struct cl_page *pg,
void cl_page_delete(const struct lu_env *env, struct cl_page *pg)
{
PINVRNT(env, pg, cl_page_invariant(pg));
- cl_page_delete0(env, pg, 1);
+ cl_page_delete0(env, pg);
}
EXPORT_SYMBOL(cl_page_delete);
/**
- * Unmaps page from user virtual memory.
- *
- * Calls cl_page_operations::cpo_unmap() through all layers top-to-bottom. The
- * layer responsible for VM interaction has to unmap page from user space
- * virtual memory.
- *
- * \see cl_page_operations::cpo_unmap()
- */
-int cl_page_unmap(const struct lu_env *env,
- struct cl_io *io, struct cl_page *pg)
-{
- PINVRNT(env, pg, cl_page_is_owned(pg, io));
- PINVRNT(env, pg, cl_page_invariant(pg));
-
- return cl_page_invoke(env, io, pg, CL_PAGE_OP(cpo_unmap));
-}
-EXPORT_SYMBOL(cl_page_unmap);
-
-/**
* Marks page up-to-date.
*
* Call cl_page_operations::cpo_export() through all layers top-to-bottom. The
@@ -1129,7 +777,6 @@ int cl_page_is_vmlocked(const struct lu_env *env, const struct cl_page *pg)
int result;
const struct cl_page_slice *slice;
- pg = cl_page_top_trusted((struct cl_page *)pg);
slice = container_of(pg->cp_layers.next,
const struct cl_page_slice, cpl_linkage);
PASSERT(env, pg, slice->cpl_ops->cpo_is_vmlocked);
@@ -1241,7 +888,7 @@ void cl_page_completion(const struct lu_env *env,
cl_page_put(env, pg);
if (anchor)
- cl_sync_io_note(anchor, ioret);
+ cl_sync_io_note(env, anchor, ioret);
}
EXPORT_SYMBOL(cl_page_completion);
@@ -1276,44 +923,6 @@ int cl_page_make_ready(const struct lu_env *env, struct cl_page *pg,
EXPORT_SYMBOL(cl_page_make_ready);
/**
- * Notify layers that high level io decided to place this page into a cache
- * for future transfer.
- *
- * The layer implementing transfer engine (osc) has to register this page in
- * its queues.
- *
- * \pre cl_page_is_owned(pg, io)
- * \post cl_page_is_owned(pg, io)
- *
- * \see cl_page_operations::cpo_cache_add()
- */
-int cl_page_cache_add(const struct lu_env *env, struct cl_io *io,
- struct cl_page *pg, enum cl_req_type crt)
-{
- const struct cl_page_slice *scan;
- int result = 0;
-
- PINVRNT(env, pg, crt < CRT_NR);
- PINVRNT(env, pg, cl_page_is_owned(pg, io));
- PINVRNT(env, pg, cl_page_invariant(pg));
-
- if (crt >= CRT_NR)
- return -EINVAL;
-
- list_for_each_entry(scan, &pg->cp_layers, cpl_linkage) {
- if (!scan->cpl_ops->io[crt].cpo_cache_add)
- continue;
-
- result = scan->cpl_ops->io[crt].cpo_cache_add(env, scan, io);
- if (result != 0)
- break;
- }
- CL_PAGE_HEADER(D_TRACE, env, pg, "%d %d\n", crt, result);
- return result;
-}
-EXPORT_SYMBOL(cl_page_cache_add);
-
-/**
* Called if a pge is being written back by kernel's intention.
*
* \pre cl_page_is_owned(pg, io)
@@ -1344,68 +953,21 @@ EXPORT_SYMBOL(cl_page_flush);
* \see cl_page_operations::cpo_is_under_lock()
*/
int cl_page_is_under_lock(const struct lu_env *env, struct cl_io *io,
- struct cl_page *page)
+ struct cl_page *page, pgoff_t *max_index)
{
int rc;
PINVRNT(env, page, cl_page_invariant(page));
- rc = CL_PAGE_INVOKE(env, page, CL_PAGE_OP(cpo_is_under_lock),
- (const struct lu_env *,
- const struct cl_page_slice *, struct cl_io *),
- io);
- PASSERT(env, page, rc != 0);
+ rc = CL_PAGE_INVOKE_REVERSE(env, page, CL_PAGE_OP(cpo_is_under_lock),
+ (const struct lu_env *,
+ const struct cl_page_slice *,
+ struct cl_io *, pgoff_t *),
+ io, max_index);
return rc;
}
EXPORT_SYMBOL(cl_page_is_under_lock);
-static int page_prune_cb(const struct lu_env *env, struct cl_io *io,
- struct cl_page *page, void *cbdata)
-{
- cl_page_own(env, io, page);
- cl_page_unmap(env, io, page);
- cl_page_discard(env, io, page);
- cl_page_disown(env, io, page);
- return CLP_GANG_OKAY;
-}
-
-/**
- * Purges all cached pages belonging to the object \a obj.
- */
-int cl_pages_prune(const struct lu_env *env, struct cl_object *clobj)
-{
- struct cl_thread_info *info;
- struct cl_object *obj = cl_object_top(clobj);
- struct cl_io *io;
- int result;
-
- info = cl_env_info(env);
- io = &info->clt_io;
-
- /*
- * initialize the io. This is ugly since we never do IO in this
- * function, we just make cl_page_list functions happy. -jay
- */
- io->ci_obj = obj;
- io->ci_ignore_layout = 1;
- result = cl_io_init(env, io, CIT_MISC, obj);
- if (result != 0) {
- cl_io_fini(env, io);
- return io->ci_result;
- }
-
- do {
- result = cl_page_gang_lookup(env, obj, io, 0, CL_PAGE_EOF,
- page_prune_cb, NULL);
- if (result == CLP_GANG_RESCHED)
- cond_resched();
- } while (result != CLP_GANG_OKAY);
-
- cl_io_fini(env, io);
- return result;
-}
-EXPORT_SYMBOL(cl_pages_prune);
-
/**
* Tells transfer engine that only part of a page is to be transmitted.
*
@@ -1431,9 +993,8 @@ void cl_page_header_print(const struct lu_env *env, void *cookie,
lu_printer_t printer, const struct cl_page *pg)
{
(*printer)(env, cookie,
- "page@%p[%d %p:%lu ^%p_%p %d %d %d %p %p %#x]\n",
+ "page@%p[%d %p %d %d %d %p %p %#x]\n",
pg, atomic_read(&pg->cp_ref), pg->cp_obj,
- pg->cp_index, pg->cp_parent, pg->cp_child,
pg->cp_state, pg->cp_error, pg->cp_type,
pg->cp_owner, pg->cp_req, pg->cp_flags);
}
@@ -1445,11 +1006,7 @@ EXPORT_SYMBOL(cl_page_header_print);
void cl_page_print(const struct lu_env *env, void *cookie,
lu_printer_t printer, const struct cl_page *pg)
{
- struct cl_page *scan;
-
- for (scan = cl_page_top((struct cl_page *)pg); scan;
- scan = scan->cp_child)
- cl_page_header_print(env, cookie, printer, scan);
+ cl_page_header_print(env, cookie, printer, pg);
CL_PAGE_INVOKE(env, (struct cl_page *)pg, CL_PAGE_OP(cpo_print),
(const struct lu_env *env,
const struct cl_page_slice *slice,
@@ -1509,21 +1066,13 @@ EXPORT_SYMBOL(cl_page_size);
* \see cl_lock_slice_add(), cl_req_slice_add(), cl_io_slice_add()
*/
void cl_page_slice_add(struct cl_page *page, struct cl_page_slice *slice,
- struct cl_object *obj,
+ struct cl_object *obj, pgoff_t index,
const struct cl_page_operations *ops)
{
list_add_tail(&slice->cpl_linkage, &page->cp_layers);
slice->cpl_obj = obj;
+ slice->cpl_index = index;
slice->cpl_ops = ops;
slice->cpl_page = page;
}
EXPORT_SYMBOL(cl_page_slice_add);
-
-int cl_page_init(void)
-{
- return 0;
-}
-
-void cl_page_fini(void)
-{
-}
diff --git a/drivers/staging/lustre/lustre/obdclass/class_obd.c b/drivers/staging/lustre/lustre/obdclass/class_obd.c
index c2cf015962dd..f48816af8be7 100644
--- a/drivers/staging/lustre/lustre/obdclass/class_obd.c
+++ b/drivers/staging/lustre/lustre/obdclass/class_obd.c
@@ -60,6 +60,8 @@ unsigned int obd_dump_on_eviction;
EXPORT_SYMBOL(obd_dump_on_eviction);
unsigned int obd_max_dirty_pages = 256;
EXPORT_SYMBOL(obd_max_dirty_pages);
+atomic_t obd_unstable_pages;
+EXPORT_SYMBOL(obd_unstable_pages);
atomic_t obd_dirty_pages;
EXPORT_SYMBOL(obd_dirty_pages);
unsigned int obd_timeout = OBD_TIMEOUT_DEFAULT; /* seconds */
@@ -335,7 +337,6 @@ int class_handle_ioctl(unsigned int cmd, unsigned long arg)
err = 0;
goto out;
}
-
}
if (data->ioc_dev == OBD_DEV_BY_DEVNAME) {
@@ -461,7 +462,7 @@ static int obd_init_checks(void)
CWARN("LPD64 wrong length! strlen(%s)=%d != 2\n", buf, len);
ret = -EINVAL;
}
- if ((u64val & ~CFS_PAGE_MASK) >= PAGE_SIZE) {
+ if ((u64val & ~PAGE_MASK) >= PAGE_SIZE) {
CWARN("mask failed: u64val %llu >= %llu\n", u64val,
(__u64)PAGE_SIZE);
ret = -EINVAL;
diff --git a/drivers/staging/lustre/lustre/obdclass/debug.c b/drivers/staging/lustre/lustre/obdclass/debug.c
index 43a7f7a79b35..e4edfb2c0a20 100644
--- a/drivers/staging/lustre/lustre/obdclass/debug.c
+++ b/drivers/staging/lustre/lustre/obdclass/debug.c
@@ -68,8 +68,8 @@ int block_debug_check(char *who, void *addr, int end, __u64 off, __u64 id)
LASSERT(addr);
- ne_off = le64_to_cpu (off);
- id = le64_to_cpu (id);
+ ne_off = le64_to_cpu(off);
+ id = le64_to_cpu(id);
if (memcmp(addr, (char *)&ne_off, LPDS)) {
CDEBUG(D_ERROR, "%s: id %#llx offset %llu off: %#llx != %#llx\n",
who, id, off, *(__u64 *)addr, ne_off);
diff --git a/drivers/staging/lustre/lustre/obdclass/genops.c b/drivers/staging/lustre/lustre/obdclass/genops.c
index cf97b8f06764..d95f11d62a32 100644
--- a/drivers/staging/lustre/lustre/obdclass/genops.c
+++ b/drivers/staging/lustre/lustre/obdclass/genops.c
@@ -604,7 +604,6 @@ int obd_init_caches(void)
out:
obd_cleanup_caches();
return -ENOMEM;
-
}
/* map connection to client */
diff --git a/drivers/staging/lustre/lustre/obdclass/linux/linux-module.c b/drivers/staging/lustre/lustre/obdclass/linux/linux-module.c
index 8eddf206f1ed..2cd4522462d9 100644
--- a/drivers/staging/lustre/lustre/obdclass/linux/linux-module.c
+++ b/drivers/staging/lustre/lustre/obdclass/linux/linux-module.c
@@ -158,9 +158,7 @@ int obd_ioctl_popdata(void __user *arg, void *data, int len)
{
int err;
- err = copy_to_user(arg, data, len);
- if (err)
- err = -EFAULT;
+ err = copy_to_user(arg, data, len) ? -EFAULT : 0;
return err;
}
EXPORT_SYMBOL(obd_ioctl_popdata);
diff --git a/drivers/staging/lustre/lustre/obdclass/llog.c b/drivers/staging/lustre/lustre/obdclass/llog.c
index 992573eae1b1..79194d8cb587 100644
--- a/drivers/staging/lustre/lustre/obdclass/llog.c
+++ b/drivers/staging/lustre/lustre/obdclass/llog.c
@@ -265,7 +265,6 @@ repeat:
for (rec = (struct llog_rec_hdr *)buf;
(char *)rec < buf + LLOG_CHUNK_SIZE;
rec = (struct llog_rec_hdr *)((char *)rec + rec->lrh_len)) {
-
CDEBUG(D_OTHER, "processing rec 0x%p type %#x\n",
rec, rec->lrh_type);
diff --git a/drivers/staging/lustre/lustre/obdclass/lprocfs_status.c b/drivers/staging/lustre/lustre/obdclass/lprocfs_status.c
index d93f42fee420..5a1eae1de2ec 100644
--- a/drivers/staging/lustre/lustre/obdclass/lprocfs_status.c
+++ b/drivers/staging/lustre/lustre/obdclass/lprocfs_status.c
@@ -49,7 +49,7 @@
static const char * const obd_connect_names[] = {
"read_only",
"lov_index",
- "unused",
+ "connect_from_mds",
"write_grant",
"server_lock",
"version",
@@ -122,6 +122,56 @@ int obd_connect_flags2str(char *page, int count, __u64 flags, char *sep)
}
EXPORT_SYMBOL(obd_connect_flags2str);
+static void obd_connect_data_seqprint(struct seq_file *m,
+ struct obd_connect_data *ocd)
+{
+ int flags;
+
+ LASSERT(ocd);
+ flags = ocd->ocd_connect_flags;
+
+ seq_printf(m, " connect_data:\n"
+ " flags: %llx\n"
+ " instance: %u\n",
+ ocd->ocd_connect_flags,
+ ocd->ocd_instance);
+ if (flags & OBD_CONNECT_VERSION)
+ seq_printf(m, " target_version: %u.%u.%u.%u\n",
+ OBD_OCD_VERSION_MAJOR(ocd->ocd_version),
+ OBD_OCD_VERSION_MINOR(ocd->ocd_version),
+ OBD_OCD_VERSION_PATCH(ocd->ocd_version),
+ OBD_OCD_VERSION_FIX(ocd->ocd_version));
+ if (flags & OBD_CONNECT_MDS)
+ seq_printf(m, " mdt_index: %d\n", ocd->ocd_group);
+ if (flags & OBD_CONNECT_GRANT)
+ seq_printf(m, " initial_grant: %d\n", ocd->ocd_grant);
+ if (flags & OBD_CONNECT_INDEX)
+ seq_printf(m, " target_index: %u\n", ocd->ocd_index);
+ if (flags & OBD_CONNECT_BRW_SIZE)
+ seq_printf(m, " max_brw_size: %d\n", ocd->ocd_brw_size);
+ if (flags & OBD_CONNECT_IBITS)
+ seq_printf(m, " ibits_known: %llx\n",
+ ocd->ocd_ibits_known);
+ if (flags & OBD_CONNECT_GRANT_PARAM)
+ seq_printf(m, " grant_block_size: %d\n"
+ " grant_inode_size: %d\n"
+ " grant_extent_overhead: %d\n",
+ ocd->ocd_blocksize,
+ ocd->ocd_inodespace,
+ ocd->ocd_grant_extent);
+ if (flags & OBD_CONNECT_TRANSNO)
+ seq_printf(m, " first_transno: %llx\n",
+ ocd->ocd_transno);
+ if (flags & OBD_CONNECT_CKSUM)
+ seq_printf(m, " cksum_types: %#x\n",
+ ocd->ocd_cksum_types);
+ if (flags & OBD_CONNECT_MAX_EASIZE)
+ seq_printf(m, " max_easize: %d\n", ocd->ocd_max_easize);
+ if (flags & OBD_CONNECT_MAXBYTES)
+ seq_printf(m, " max_object_bytes: %llx\n",
+ ocd->ocd_maxbytes);
+}
+
int lprocfs_read_frac_helper(char *buffer, unsigned long count, long val,
int mult)
{
@@ -624,6 +674,7 @@ int lprocfs_rd_import(struct seq_file *m, void *data)
struct obd_device *obd = data;
struct obd_import *imp;
struct obd_import_conn *conn;
+ struct obd_connect_data *ocd;
int j;
int k;
int rw = 0;
@@ -635,9 +686,9 @@ int lprocfs_rd_import(struct seq_file *m, void *data)
return rc;
imp = obd->u.cli.cl_import;
+ ocd = &imp->imp_connect_data;
- seq_printf(m,
- "import:\n"
+ seq_printf(m, "import:\n"
" name: %s\n"
" target: %s\n"
" state: %s\n"
@@ -649,9 +700,9 @@ int lprocfs_rd_import(struct seq_file *m, void *data)
imp->imp_connect_data.ocd_instance);
obd_connect_seq_flags2str(m, imp->imp_connect_data.ocd_connect_flags,
", ");
- seq_printf(m,
- " ]\n"
- " import_flags: [ ");
+ seq_printf(m, " ]\n");
+ obd_connect_data_seqprint(m, ocd);
+ seq_printf(m, " import_flags: [ ");
obd_import_flags2str(imp, m);
seq_printf(m,
@@ -694,8 +745,9 @@ int lprocfs_rd_import(struct seq_file *m, void *data)
do_div(sum, ret.lc_count);
ret.lc_sum = sum;
- } else
+ } else {
ret.lc_sum = 0;
+ }
seq_printf(m,
" rpcs:\n"
" inflight: %u\n"
@@ -1471,10 +1523,10 @@ EXPORT_SYMBOL(lprocfs_oh_tally);
void lprocfs_oh_tally_log2(struct obd_histogram *oh, unsigned int value)
{
- unsigned int val;
+ unsigned int val = 0;
- for (val = 0; ((1 << val) < value) && (val <= OBD_HIST_MAX); val++)
- ;
+ if (likely(value != 0))
+ val = min(fls(value - 1), OBD_HIST_MAX);
lprocfs_oh_tally(oh, val);
}
diff --git a/drivers/staging/lustre/lustre/obdclass/lu_object.c b/drivers/staging/lustre/lustre/obdclass/lu_object.c
index 978568ada8e9..e04385760f21 100644
--- a/drivers/staging/lustre/lustre/obdclass/lu_object.c
+++ b/drivers/staging/lustre/lustre/obdclass/lu_object.c
@@ -55,6 +55,7 @@
#include "../include/lustre_disk.h"
#include "../include/lustre_fid.h"
#include "../include/lu_object.h"
+#include "../include/cl_object.h"
#include "../include/lu_ref.h"
#include <linux/list.h>
@@ -103,7 +104,6 @@ void lu_object_put(const struct lu_env *env, struct lu_object *o)
if (!cfs_hash_bd_dec_and_lock(site->ls_obj_hash, &bd, &top->loh_ref)) {
if (lu_object_is_dying(top)) {
-
/*
* somebody may be waiting for this, currently only
* used for cl_object, see cl_object_put_last().
@@ -357,7 +357,6 @@ int lu_site_purge(const struct lu_env *env, struct lu_site *s, int nr)
if (count > 0 && --count == 0)
break;
-
}
cfs_hash_bd_unlock(s->ls_obj_hash, &bd, 1);
cond_resched();
@@ -715,8 +714,9 @@ struct lu_object *lu_object_find_slice(const struct lu_env *env,
obj = lu_object_locate(top->lo_header, dev->ld_type);
if (!obj)
lu_object_put(env, top);
- } else
+ } else {
obj = top;
+ }
return obj;
}
EXPORT_SYMBOL(lu_object_find_slice);
@@ -935,7 +935,7 @@ static void lu_dev_add_linkage(struct lu_site *s, struct lu_device *d)
* Initialize site \a s, with \a d as the top level device.
*/
#define LU_SITE_BITS_MIN 12
-#define LU_SITE_BITS_MAX 24
+#define LU_SITE_BITS_MAX 19
/**
* total 256 buckets, we don't want too many buckets because:
* - consume too much memory
@@ -1468,6 +1468,7 @@ void lu_context_key_quiesce(struct lu_context_key *key)
/*
* XXX layering violation.
*/
+ cl_env_cache_purge(~0);
key->lct_tags |= LCT_QUIESCENT;
/*
* XXX memory barrier has to go here.
diff --git a/drivers/staging/lustre/lustre/obdclass/lustre_peer.c b/drivers/staging/lustre/lustre/obdclass/lustre_peer.c
index 5f812460b3ea..b1abe023bb35 100644
--- a/drivers/staging/lustre/lustre/obdclass/lustre_peer.c
+++ b/drivers/staging/lustre/lustre/obdclass/lustre_peer.c
@@ -163,8 +163,9 @@ int class_del_uuid(const char *uuid)
break;
}
}
- } else
+ } else {
list_splice_init(&g_uuid_list, &deathrow);
+ }
spin_unlock(&g_uuid_lock);
if (uuid && list_empty(&deathrow)) {
diff --git a/drivers/staging/lustre/lustre/obdclass/obd_config.c b/drivers/staging/lustre/lustre/obdclass/obd_config.c
index 5395e994deab..cb1d65c3d95d 100644
--- a/drivers/staging/lustre/lustre/obdclass/obd_config.c
+++ b/drivers/staging/lustre/lustre/obdclass/obd_config.c
@@ -606,7 +606,7 @@ static int class_del_conn(struct obd_device *obd, struct lustre_cfg *lcfg)
return rc;
}
-LIST_HEAD(lustre_profile_list);
+static LIST_HEAD(lustre_profile_list);
struct lustre_profile *class_get_profile(const char *prof)
{
@@ -961,7 +961,6 @@ int class_process_config(struct lustre_cfg *lcfg)
default: {
err = obd_process_config(obd, sizeof(*lcfg), lcfg);
goto out;
-
}
}
out:
@@ -1001,7 +1000,13 @@ int class_process_proc_param(char *prefix, struct lprocfs_vars *lvars,
for (i = 1; i < lcfg->lcfg_bufcount; i++) {
key = lustre_cfg_buf(lcfg, i);
/* Strip off prefix */
- class_match_param(key, prefix, &key);
+ if (class_match_param(key, prefix, &key)) {
+ /*
+ * If the prefix doesn't match, return error so we
+ * can pass it down the stack
+ */
+ return -ENOSYS;
+ }
sval = strchr(key, '=');
if (!sval || (*(sval + 1) == 0)) {
CERROR("Can't parse param %s (missing '=')\n", key);
@@ -1034,18 +1039,14 @@ int class_process_proc_param(char *prefix, struct lprocfs_vars *lvars,
j++;
}
if (!matched) {
- /* If the prefix doesn't match, return error so we
- * can pass it down the stack
- */
- if (strnchr(key, keylen, '.'))
- return -ENOSYS;
- CERROR("%s: unknown param %s\n",
+ CERROR("%.*s: %s unknown param %s\n",
+ (int)strlen(prefix) - 1, prefix,
(char *)lustre_cfg_string(lcfg, 0), key);
/* rc = -EINVAL; continue parsing other params */
skip++;
} else if (rc < 0) {
- CERROR("writing proc entry %s err %d\n",
- var->name, rc);
+ CERROR("%s: error writing proc entry '%s': rc = %d\n",
+ prefix, var->name, rc);
rc = 0;
} else {
CDEBUG(D_CONFIG, "%s.%.*s: Set parameter %.*s=%s\n",
@@ -1350,6 +1351,7 @@ static int class_config_parse_rec(struct llog_rec_hdr *rec, char *buf,
lustre_cfg_string(lcfg, i));
}
}
+ ptr += snprintf(ptr, end - ptr, "\n");
/* return consumed bytes */
rc = ptr - buf;
return rc;
@@ -1368,7 +1370,7 @@ int class_config_dump_handler(const struct lu_env *env,
if (rec->lrh_type == OBD_CFG_REC) {
class_config_parse_rec(rec, outstr, 256);
- LCONSOLE(D_WARNING, " %s\n", outstr);
+ LCONSOLE(D_WARNING, " %s", outstr);
} else {
LCONSOLE(D_WARNING, "unhandled lrh_type: %#x\n", rec->lrh_type);
rc = -EINVAL;
diff --git a/drivers/staging/lustre/lustre/obdclass/obd_mount.c b/drivers/staging/lustre/lustre/obdclass/obd_mount.c
index d3e28a389ac1..e0c90adc72a7 100644
--- a/drivers/staging/lustre/lustre/obdclass/obd_mount.c
+++ b/drivers/staging/lustre/lustre/obdclass/obd_mount.c
@@ -102,7 +102,7 @@ int lustre_process_log(struct super_block *sb, char *logname,
LCONSOLE_ERROR_MSG(0x15b, "%s: The configuration from log '%s' failed from the MGS (%d). Make sure this client and the MGS are running compatible versions of Lustre.\n",
mgc->obd_name, logname, rc);
- if (rc)
+ else if (rc)
LCONSOLE_ERROR_MSG(0x15c, "%s: The configuration from log '%s' failed (%d). This may be the result of communication errors between this node and the MGS, a bad configuration, or other errors. See the syslog for more information.\n",
mgc->obd_name, logname,
rc);
@@ -307,7 +307,8 @@ int lustre_start_mgc(struct super_block *sb)
while (class_parse_nid(ptr, &nid, &ptr) == 0) {
rc = do_lcfg(mgcname, nid,
LCFG_ADD_UUID, niduuid, NULL, NULL, NULL);
- i++;
+ if (!rc)
+ i++;
/* Stop at the first failover nid */
if (*ptr == ':')
break;
@@ -345,16 +346,18 @@ int lustre_start_mgc(struct super_block *sb)
sprintf(niduuid, "%s_%x", mgcname, i);
j = 0;
while (class_parse_nid_quiet(ptr, &nid, &ptr) == 0) {
- j++;
- rc = do_lcfg(mgcname, nid,
- LCFG_ADD_UUID, niduuid, NULL, NULL, NULL);
+ rc = do_lcfg(mgcname, nid, LCFG_ADD_UUID, niduuid,
+ NULL, NULL, NULL);
+ if (!rc)
+ ++j;
if (*ptr == ':')
break;
}
if (j > 0) {
rc = do_lcfg(mgcname, 0, LCFG_ADD_CONN,
niduuid, NULL, NULL, NULL);
- i++;
+ if (!rc)
+ i++;
} else {
/* at ":/fsname" */
break;
diff --git a/drivers/staging/lustre/lustre/obdclass/obdo.c b/drivers/staging/lustre/lustre/obdclass/obdo.c
index e6436cb4ac62..748e33f017d5 100644
--- a/drivers/staging/lustre/lustre/obdclass/obdo.c
+++ b/drivers/staging/lustre/lustre/obdclass/obdo.c
@@ -185,8 +185,7 @@ void md_from_obdo(struct md_op_data *op_data, struct obdo *oa, u32 valid)
op_data->op_attr.ia_valid |= ATTR_BLOCKS;
}
if (valid & OBD_MD_FLFLAGS) {
- ((struct ll_iattr *)&op_data->op_attr)->ia_attr_flags =
- oa->o_flags;
+ op_data->op_attr_flags = oa->o_flags;
op_data->op_attr.ia_valid |= ATTR_ATTR_FLAG;
}
}