diff options
Diffstat (limited to 'drivers/staging/lustre/lustre')
194 files changed, 8022 insertions, 15655 deletions
diff --git a/drivers/staging/lustre/lustre/Kconfig b/drivers/staging/lustre/lustre/Kconfig index 8ac7cd4d6fdb..9f5d75f166e7 100644 --- a/drivers/staging/lustre/lustre/Kconfig +++ b/drivers/staging/lustre/lustre/Kconfig @@ -54,9 +54,3 @@ config LUSTRE_TRANSLATE_ERRNOS bool depends on LUSTRE_FS && !X86 default y - -config LUSTRE_LLITE_LLOOP - tristate "Lustre virtual block device" - depends on LUSTRE_FS && BLOCK - depends on !PPC_64K_PAGES && !ARM64_64K_PAGES && !MICROBLAZE_64K_PAGES && !PAGE_SIZE_64KB && !IA64_PAGE_SIZE_64KB && !PARISC_PAGE_SIZE_64KB - default m diff --git a/drivers/staging/lustre/lustre/fid/fid_internal.h b/drivers/staging/lustre/lustre/fid/fid_internal.h index b79a813977cf..5c53773ecc5a 100644 --- a/drivers/staging/lustre/lustre/fid/fid_internal.h +++ b/drivers/staging/lustre/lustre/fid/fid_internal.h @@ -15,11 +15,7 @@ * * You should have received a copy of the GNU General Public License * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. + * http://www.gnu.org/licenses/gpl-2.0.html * * GPL HEADER END */ diff --git a/drivers/staging/lustre/lustre/fid/fid_lib.c b/drivers/staging/lustre/lustre/fid/fid_lib.c index dd65159ebb38..99ae7eb6720e 100644 --- a/drivers/staging/lustre/lustre/fid/fid_lib.c +++ b/drivers/staging/lustre/lustre/fid/fid_lib.c @@ -15,11 +15,7 @@ * * You should have received a copy of the GNU General Public License * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. + * http://www.gnu.org/licenses/gpl-2.0.html * * GPL HEADER END */ diff --git a/drivers/staging/lustre/lustre/fid/fid_request.c b/drivers/staging/lustre/lustre/fid/fid_request.c index 39269c3c56a6..454744d25956 100644 --- a/drivers/staging/lustre/lustre/fid/fid_request.c +++ b/drivers/staging/lustre/lustre/fid/fid_request.c @@ -15,11 +15,7 @@ * * You should have received a copy of the GNU General Public License * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. + * http://www.gnu.org/licenses/gpl-2.0.html * * GPL HEADER END */ @@ -66,6 +62,7 @@ static int seq_client_rpc(struct lu_client_seq *seq, unsigned int debug_mask; int rc; + LASSERT(exp && !IS_ERR(exp)); req = ptlrpc_request_alloc_pack(class_exp2cliimp(exp), &RQF_SEQ_QUERY, LUSTRE_MDS_VERSION, SEQ_QUERY); if (!req) @@ -97,23 +94,28 @@ static int seq_client_rpc(struct lu_client_seq *seq, * request here, otherwise if MDT0 is failed(umounted), * it can not release the export of MDT0 */ - if (seq->lcs_type == LUSTRE_SEQ_DATA) - req->rq_no_delay = req->rq_no_resend = 1; + if (seq->lcs_type == LUSTRE_SEQ_DATA) { + req->rq_no_delay = 1; + req->rq_no_resend = 1; + } debug_mask = D_CONSOLE; } else { - if (seq->lcs_type == LUSTRE_SEQ_METADATA) + if (seq->lcs_type == LUSTRE_SEQ_METADATA) { + req->rq_reply_portal = MDC_REPLY_PORTAL; req->rq_request_portal = SEQ_METADATA_PORTAL; - else + } else { + req->rq_reply_portal = OSC_REPLY_PORTAL; req->rq_request_portal = SEQ_DATA_PORTAL; + } debug_mask = D_INFO; } ptlrpc_at_set_req_timeout(req); - if (seq->lcs_type == LUSTRE_SEQ_METADATA) + if (opc != SEQ_ALLOC_SUPER && seq->lcs_type == LUSTRE_SEQ_METADATA) mdc_get_rpc_lock(exp->exp_obd->u.cli.cl_rpc_lock, NULL); rc = ptlrpc_queue_wait(req); - if (seq->lcs_type == LUSTRE_SEQ_METADATA) + if (opc != SEQ_ALLOC_SUPER && seq->lcs_type == LUSTRE_SEQ_METADATA) mdc_put_rpc_lock(exp->exp_obd->u.cli.cl_rpc_lock, NULL); if (rc) goto out_req; diff --git a/drivers/staging/lustre/lustre/fid/lproc_fid.c b/drivers/staging/lustre/lustre/fid/lproc_fid.c index 1f0e78686278..81b7ca9ea2fd 100644 --- a/drivers/staging/lustre/lustre/fid/lproc_fid.c +++ b/drivers/staging/lustre/lustre/fid/lproc_fid.c @@ -15,11 +15,7 @@ * * You should have received a copy of the GNU General Public License * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. + * http://www.gnu.org/licenses/gpl-2.0.html * * GPL HEADER END */ diff --git a/drivers/staging/lustre/lustre/fld/fld_cache.c b/drivers/staging/lustre/lustre/fld/fld_cache.c index 062f388cf38a..0100a935f4ff 100644 --- a/drivers/staging/lustre/lustre/fld/fld_cache.c +++ b/drivers/staging/lustre/lustre/fld/fld_cache.c @@ -15,11 +15,7 @@ * * You should have received a copy of the GNU General Public License * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. + * http://www.gnu.org/licenses/gpl-2.0.html * * GPL HEADER END */ @@ -178,8 +174,9 @@ restart_fixup: if (n_range->lsr_end <= c_range->lsr_end) { *n_range = *c_range; fld_cache_entry_delete(cache, f_curr); - } else + } else { n_range->lsr_start = c_range->lsr_end; + } } /* we could have overlap over next diff --git a/drivers/staging/lustre/lustre/fld/fld_internal.h b/drivers/staging/lustre/lustre/fld/fld_internal.h index e8a3caf20c9b..f0efe5b9fbec 100644 --- a/drivers/staging/lustre/lustre/fld/fld_internal.h +++ b/drivers/staging/lustre/lustre/fld/fld_internal.h @@ -15,11 +15,7 @@ * * You should have received a copy of the GNU General Public License * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. + * http://www.gnu.org/licenses/gpl-2.0.html * * GPL HEADER END */ @@ -101,12 +97,6 @@ struct fld_cache { unsigned int fci_no_shrink:1; }; -enum fld_op { - FLD_CREATE = 0, - FLD_DELETE = 1, - FLD_LOOKUP = 2 -}; - enum { /* 4M of FLD cache will not hurt client a lot. */ FLD_SERVER_CACHE_SIZE = (4 * 0x100000), @@ -126,7 +116,8 @@ enum { extern struct lu_fld_hash fld_hash[]; int fld_client_rpc(struct obd_export *exp, - struct lu_seq_range *range, __u32 fld_op); + struct lu_seq_range *range, __u32 fld_op, + struct ptlrpc_request **reqp); extern struct lprocfs_vars fld_client_debugfs_list[]; diff --git a/drivers/staging/lustre/lustre/fld/fld_request.c b/drivers/staging/lustre/lustre/fld/fld_request.c index a3d122d85c8d..e59d626a1548 100644 --- a/drivers/staging/lustre/lustre/fld/fld_request.c +++ b/drivers/staging/lustre/lustre/fld/fld_request.c @@ -15,11 +15,7 @@ * * You should have received a copy of the GNU General Public License * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. + * http://www.gnu.org/licenses/gpl-2.0.html * * GPL HEADER END */ @@ -64,9 +60,9 @@ static int fld_req_avail(struct client_obd *cli, struct mdc_cache_waiter *mcw) { int rc; - client_obd_list_lock(&cli->cl_loi_list_lock); + spin_lock(&cli->cl_loi_list_lock); rc = list_empty(&mcw->mcw_entry); - client_obd_list_unlock(&cli->cl_loi_list_lock); + spin_unlock(&cli->cl_loi_list_lock); return rc; }; @@ -75,15 +71,15 @@ static void fld_enter_request(struct client_obd *cli) struct mdc_cache_waiter mcw; struct l_wait_info lwi = { 0 }; - client_obd_list_lock(&cli->cl_loi_list_lock); + spin_lock(&cli->cl_loi_list_lock); if (cli->cl_r_in_flight >= cli->cl_max_rpcs_in_flight) { list_add_tail(&mcw.mcw_entry, &cli->cl_cache_waiters); init_waitqueue_head(&mcw.mcw_waitq); - client_obd_list_unlock(&cli->cl_loi_list_lock); + spin_unlock(&cli->cl_loi_list_lock); l_wait_event(mcw.mcw_waitq, fld_req_avail(cli, &mcw), &lwi); } else { cli->cl_r_in_flight++; - client_obd_list_unlock(&cli->cl_loi_list_lock); + spin_unlock(&cli->cl_loi_list_lock); } } @@ -92,10 +88,9 @@ static void fld_exit_request(struct client_obd *cli) struct list_head *l, *tmp; struct mdc_cache_waiter *mcw; - client_obd_list_lock(&cli->cl_loi_list_lock); + spin_lock(&cli->cl_loi_list_lock); cli->cl_r_in_flight--; list_for_each_safe(l, tmp, &cli->cl_cache_waiters) { - if (cli->cl_r_in_flight >= cli->cl_max_rpcs_in_flight) { /* No free request slots anymore */ break; @@ -106,7 +101,7 @@ static void fld_exit_request(struct client_obd *cli) cli->cl_r_in_flight++; wake_up(&mcw->mcw_waitq); } - client_obd_list_unlock(&cli->cl_loi_list_lock); + spin_unlock(&cli->cl_loi_list_lock); } static int fld_rrb_hash(struct lu_client_fld *fld, u64 seq) @@ -392,55 +387,82 @@ void fld_client_fini(struct lu_client_fld *fld) EXPORT_SYMBOL(fld_client_fini); int fld_client_rpc(struct obd_export *exp, - struct lu_seq_range *range, __u32 fld_op) + struct lu_seq_range *range, __u32 fld_op, + struct ptlrpc_request **reqp) { - struct ptlrpc_request *req; + struct ptlrpc_request *req = NULL; struct lu_seq_range *prange; __u32 *op; - int rc; + int rc = 0; struct obd_import *imp; LASSERT(exp); imp = class_exp2cliimp(exp); - req = ptlrpc_request_alloc_pack(imp, &RQF_FLD_QUERY, LUSTRE_MDS_VERSION, - FLD_QUERY); - if (!req) - return -ENOMEM; - - op = req_capsule_client_get(&req->rq_pill, &RMF_FLD_OPC); - *op = fld_op; + switch (fld_op) { + case FLD_QUERY: + req = ptlrpc_request_alloc_pack(imp, &RQF_FLD_QUERY, + LUSTRE_MDS_VERSION, FLD_QUERY); + if (!req) + return -ENOMEM; + + /* + * XXX: only needed when talking to old server(< 2.6), it should + * be removed when < 2.6 server is not supported + */ + op = req_capsule_client_get(&req->rq_pill, &RMF_FLD_OPC); + *op = FLD_LOOKUP; + + if (imp->imp_connect_flags_orig & OBD_CONNECT_MDS_MDS) + req->rq_allow_replay = 1; + break; + case FLD_READ: + req = ptlrpc_request_alloc_pack(imp, &RQF_FLD_READ, + LUSTRE_MDS_VERSION, FLD_READ); + if (!req) + return -ENOMEM; + + req_capsule_set_size(&req->rq_pill, &RMF_GENERIC_DATA, + RCL_SERVER, PAGE_SIZE); + break; + default: + rc = -EINVAL; + break; + } + if (rc) + return rc; prange = req_capsule_client_get(&req->rq_pill, &RMF_FLD_MDFLD); *prange = *range; - ptlrpc_request_set_replen(req); req->rq_request_portal = FLD_REQUEST_PORTAL; req->rq_reply_portal = MDC_REPLY_PORTAL; ptlrpc_at_set_req_timeout(req); - if (fld_op == FLD_LOOKUP && - imp->imp_connect_flags_orig & OBD_CONNECT_MDS_MDS) - req->rq_allow_replay = 1; - - if (fld_op != FLD_LOOKUP) - mdc_get_rpc_lock(exp->exp_obd->u.cli.cl_rpc_lock, NULL); fld_enter_request(&exp->exp_obd->u.cli); rc = ptlrpc_queue_wait(req); fld_exit_request(&exp->exp_obd->u.cli); - if (fld_op != FLD_LOOKUP) - mdc_put_rpc_lock(exp->exp_obd->u.cli.cl_rpc_lock, NULL); if (rc) goto out_req; - prange = req_capsule_server_get(&req->rq_pill, &RMF_FLD_MDFLD); - if (!prange) { - rc = -EFAULT; - goto out_req; + if (fld_op == FLD_QUERY) { + prange = req_capsule_server_get(&req->rq_pill, &RMF_FLD_MDFLD); + if (!prange) { + rc = -EFAULT; + goto out_req; + } + *range = *prange; } - *range = *prange; + out_req: - ptlrpc_req_finished(req); + if (rc || !reqp) { + ptlrpc_req_finished(req); + req = NULL; + } + + if (reqp) + *reqp = req; + return rc; } @@ -468,7 +490,7 @@ int fld_client_lookup(struct lu_client_fld *fld, u64 seq, u32 *mds, res.lsr_start = seq; fld_range_set_type(&res, flags); - rc = fld_client_rpc(target->ft_exp, &res, FLD_LOOKUP); + rc = fld_client_rpc(target->ft_exp, &res, FLD_QUERY, NULL); if (rc == 0) { *mds = res.lsr_index; diff --git a/drivers/staging/lustre/lustre/fld/lproc_fld.c b/drivers/staging/lustre/lustre/fld/lproc_fld.c index ca898befeba6..61ac420798af 100644 --- a/drivers/staging/lustre/lustre/fld/lproc_fld.c +++ b/drivers/staging/lustre/lustre/fld/lproc_fld.c @@ -15,11 +15,7 @@ * * You should have received a copy of the GNU General Public License * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. + * http://www.gnu.org/licenses/gpl-2.0.html * * GPL HEADER END */ diff --git a/drivers/staging/lustre/lustre/include/cl_object.h b/drivers/staging/lustre/lustre/include/cl_object.h index fb971ded5a1b..3cd4a2577d90 100644 --- a/drivers/staging/lustre/lustre/include/cl_object.h +++ b/drivers/staging/lustre/lustre/include/cl_object.h @@ -15,11 +15,7 @@ * * You should have received a copy of the GNU General Public License * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. + * http://www.gnu.org/licenses/gpl-2.0.html * * GPL HEADER END */ @@ -82,7 +78,6 @@ * - i_mutex * - PG_locked * - cl_object_header::coh_page_guard - * - cl_object_header::coh_lock_guard * - lu_site::ls_guard * * See the top comment in cl_object.c for the description of overall locking and @@ -98,9 +93,12 @@ * super-class definitions. */ #include "lu_object.h" +#include <linux/atomic.h> #include "linux/lustre_compat25.h" #include <linux/mutex.h> #include <linux/radix-tree.h> +#include <linux/spinlock.h> +#include <linux/wait.h> struct inode; @@ -138,7 +136,7 @@ struct cl_device_operations { * cl_req_slice_add(). * * \see osc_req_init(), lov_req_init(), lovsub_req_init() - * \see ccc_req_init() + * \see vvp_req_init() */ int (*cdo_req_init)(const struct lu_env *env, struct cl_device *dev, struct cl_req *req); @@ -147,7 +145,7 @@ struct cl_device_operations { /** * Device in the client stack. * - * \see ccc_device, lov_device, lovsub_device, osc_device + * \see vvp_device, lov_device, lovsub_device, osc_device */ struct cl_device { /** Super-class. */ @@ -243,7 +241,7 @@ enum cl_attr_valid { * be discarded from the memory, all its sub-objects are torn-down and * destroyed too. * - * \see ccc_object, lov_object, lovsub_object, osc_object + * \see vvp_object, lov_object, lovsub_object, osc_object */ struct cl_object { /** super class */ @@ -322,7 +320,7 @@ struct cl_object_operations { * to be used instead of newly created. */ int (*coo_page_init)(const struct lu_env *env, struct cl_object *obj, - struct cl_page *page, struct page *vmpage); + struct cl_page *page, pgoff_t index); /** * Initialize lock slice for this layer. Called top-to-bottom through * every object layer when a new cl_lock is instantiated. Layer @@ -383,11 +381,17 @@ struct cl_object_operations { * object. Layers are supposed to fill parts of \a lvb that will be * shipped to the glimpse originator as a glimpse result. * - * \see ccc_object_glimpse(), lovsub_object_glimpse(), + * \see vvp_object_glimpse(), lovsub_object_glimpse(), * \see osc_object_glimpse() */ int (*coo_glimpse)(const struct lu_env *env, const struct cl_object *obj, struct ost_lvb *lvb); + /** + * Object prune method. Called when the layout is going to change on + * this object, therefore each layer has to clean up their cache, + * mainly pages and locks. + */ + int (*coo_prune)(const struct lu_env *env, struct cl_object *obj); }; /** @@ -398,22 +402,6 @@ struct cl_object_header { * here. */ struct lu_object_header coh_lu; - /** \name locks - * \todo XXX move locks below to the separate cache-lines, they are - * mostly useless otherwise. - */ - /** @{ */ - /** Lock protecting page tree. */ - spinlock_t coh_page_guard; - /** Lock protecting lock list. */ - spinlock_t coh_lock_guard; - /** @} locks */ - /** Radix tree of cl_page's, cached for this object. */ - struct radix_tree_root coh_tree; - /** # of pages in radix tree. */ - unsigned long coh_pages; - /** List of cl_lock's granted for this object. */ - struct list_head coh_locks; /** * Parent object. It is assumed that an object has a well-defined @@ -460,10 +448,6 @@ struct cl_object_header { co_lu.lo_linkage) /** @} cl_object */ -#ifndef pgoff_t -#define pgoff_t unsigned long -#endif - #define CL_PAGE_EOF ((pgoff_t)~0ull) /** \addtogroup cl_page cl_page @@ -727,16 +711,10 @@ struct cl_page { atomic_t cp_ref; /** An object this page is a part of. Immutable after creation. */ struct cl_object *cp_obj; - /** Logical page index within the object. Immutable after creation. */ - pgoff_t cp_index; /** List of slices. Immutable after creation. */ struct list_head cp_layers; - /** Parent page, NULL for top-level page. Immutable after creation. */ - struct cl_page *cp_parent; - /** Lower-layer page. NULL for bottommost page. Immutable after - * creation. - */ - struct cl_page *cp_child; + /** vmpage */ + struct page *cp_vmpage; /** * Page state. This field is const to avoid accidental update, it is * modified only internally within cl_page.c. Protected by a VM lock. @@ -787,10 +765,11 @@ struct cl_page { /** * Per-layer part of cl_page. * - * \see ccc_page, lov_page, osc_page + * \see vvp_page, lov_page, osc_page */ struct cl_page_slice { struct cl_page *cpl_page; + pgoff_t cpl_index; /** * Object slice corresponding to this page slice. Immutable after * creation. @@ -804,16 +783,9 @@ struct cl_page_slice { /** * Lock mode. For the client extent locks. * - * \warning: cl_lock_mode_match() assumes particular ordering here. * \ingroup cl_lock */ enum cl_lock_mode { - /** - * Mode of a lock that protects no data, and exists only as a - * placeholder. This is used for `glimpse' requests. A phantom lock - * might get promoted to real lock at some point. - */ - CLM_PHANTOM, CLM_READ, CLM_WRITE, CLM_GROUP @@ -846,11 +818,6 @@ struct cl_page_operations { */ /** - * \return the underlying VM page. Optional. - */ - struct page *(*cpo_vmpage)(const struct lu_env *env, - const struct cl_page_slice *slice); - /** * Called when \a io acquires this page into the exclusive * ownership. When this method returns, it is guaranteed that the is * not owned by other io, and no transfer is going on against @@ -897,14 +864,6 @@ struct cl_page_operations { void (*cpo_export)(const struct lu_env *env, const struct cl_page_slice *slice, int uptodate); /** - * Unmaps page from the user space (if it is mapped). - * - * \see cl_page_unmap() - * \see vvp_page_unmap() - */ - int (*cpo_unmap)(const struct lu_env *env, - const struct cl_page_slice *slice, struct cl_io *io); - /** * Checks whether underlying VM page is locked (in the suitable * sense). Used for assertions. * @@ -957,7 +916,7 @@ struct cl_page_operations { */ int (*cpo_is_under_lock)(const struct lu_env *env, const struct cl_page_slice *slice, - struct cl_io *io); + struct cl_io *io, pgoff_t *max); /** * Optional debugging helper. Prints given page slice. @@ -1027,26 +986,6 @@ struct cl_page_operations { */ int (*cpo_make_ready)(const struct lu_env *env, const struct cl_page_slice *slice); - /** - * Announce that this page is to be written out - * opportunistically, that is, page is dirty, it is not - * necessary to start write-out transfer right now, but - * eventually page has to be written out. - * - * Main caller of this is the write path (see - * vvp_io_commit_write()), using this method to build a - * "transfer cache" from which large transfers are then - * constructed by the req-formation engine. - * - * \todo XXX it would make sense to add page-age tracking - * semantics here, and to oblige the req-formation engine to - * send the page out not later than it is too old. - * - * \see cl_page_cache_add() - */ - int (*cpo_cache_add)(const struct lu_env *env, - const struct cl_page_slice *slice, - struct cl_io *io); } io[CRT_NR]; /** * Tell transfer engine that only [to, from] part of a page should be @@ -1098,9 +1037,8 @@ struct cl_page_operations { */ #define CL_PAGE_DEBUG(mask, env, page, format, ...) \ do { \ - LIBCFS_DEBUG_MSG_DATA_DECL(msgdata, mask, NULL); \ - \ if (cfs_cdebug_show(mask, DEBUG_SUBSYSTEM)) { \ + LIBCFS_DEBUG_MSG_DATA_DECL(msgdata, mask, NULL); \ cl_page_print(env, &msgdata, lu_cdebug_printer, page); \ CDEBUG(mask, format, ## __VA_ARGS__); \ } \ @@ -1111,9 +1049,8 @@ do { \ */ #define CL_PAGE_HEADER(mask, env, page, format, ...) \ do { \ - LIBCFS_DEBUG_MSG_DATA_DECL(msgdata, mask, NULL); \ - \ if (cfs_cdebug_show(mask, DEBUG_SUBSYSTEM)) { \ + LIBCFS_DEBUG_MSG_DATA_DECL(msgdata, mask, NULL); \ cl_page_header_print(env, &msgdata, lu_cdebug_printer, page); \ CDEBUG(mask, format, ## __VA_ARGS__); \ } \ @@ -1130,6 +1067,12 @@ static inline int __page_in_use(const struct cl_page *page, int refc) #define cl_page_in_use(pg) __page_in_use(pg, 1) #define cl_page_in_use_noref(pg) __page_in_use(pg, 0) +static inline struct page *cl_page_vmpage(struct cl_page *page) +{ + LASSERT(page->cp_vmpage); + return page->cp_vmpage; +} + /** @} cl_page */ /** \addtogroup cl_lock cl_lock @@ -1150,12 +1093,6 @@ static inline int __page_in_use(const struct cl_page *page, int refc) * (struct cl_lock) and a list of layers (struct cl_lock_slice), linked to * cl_lock::cll_layers list through cl_lock_slice::cls_linkage. * - * All locks for a given object are linked into cl_object_header::coh_locks - * list (protected by cl_object_header::coh_lock_guard spin-lock) through - * cl_lock::cll_linkage. Currently this list is not sorted in any way. We can - * sort it in starting lock offset, or use altogether different data structure - * like a tree. - * * Typical cl_lock consists of the two layers: * * - vvp_lock (vvp specific data), and @@ -1177,111 +1114,29 @@ static inline int __page_in_use(const struct cl_page *page, int refc) * * LIFE CYCLE * - * cl_lock is reference counted. When reference counter drops to 0, lock is - * placed in the cache, except when lock is in CLS_FREEING state. CLS_FREEING - * lock is destroyed when last reference is released. Referencing between - * top-lock and its sub-locks is described in the lov documentation module. - * - * STATE MACHINE - * - * Also, cl_lock is a state machine. This requires some clarification. One of - * the goals of client IO re-write was to make IO path non-blocking, or at - * least to make it easier to make it non-blocking in the future. Here - * `non-blocking' means that when a system call (read, write, truncate) - * reaches a situation where it has to wait for a communication with the - * server, it should --instead of waiting-- remember its current state and - * switch to some other work. E.g,. instead of waiting for a lock enqueue, - * client should proceed doing IO on the next stripe, etc. Obviously this is - * rather radical redesign, and it is not planned to be fully implemented at - * this time, instead we are putting some infrastructure in place, that would - * make it easier to do asynchronous non-blocking IO easier in the - * future. Specifically, where old locking code goes to sleep (waiting for - * enqueue, for example), new code returns cl_lock_transition::CLO_WAIT. When - * enqueue reply comes, its completion handler signals that lock state-machine - * is ready to transit to the next state. There is some generic code in - * cl_lock.c that sleeps, waiting for these signals. As a result, for users of - * this cl_lock.c code, it looks like locking is done in normal blocking - * fashion, and it the same time it is possible to switch to the non-blocking - * locking (simply by returning cl_lock_transition::CLO_WAIT from cl_lock.c - * functions). - * - * For a description of state machine states and transitions see enum - * cl_lock_state. - * - * There are two ways to restrict a set of states which lock might move to: - * - * - placing a "hold" on a lock guarantees that lock will not be moved - * into cl_lock_state::CLS_FREEING state until hold is released. Hold - * can be only acquired on a lock that is not in - * cl_lock_state::CLS_FREEING. All holds on a lock are counted in - * cl_lock::cll_holds. Hold protects lock from cancellation and - * destruction. Requests to cancel and destroy a lock on hold will be - * recorded, but only honored when last hold on a lock is released; - * - * - placing a "user" on a lock guarantees that lock will not leave - * cl_lock_state::CLS_NEW, cl_lock_state::CLS_QUEUING, - * cl_lock_state::CLS_ENQUEUED and cl_lock_state::CLS_HELD set of - * states, once it enters this set. That is, if a user is added onto a - * lock in a state not from this set, it doesn't immediately enforce - * lock to move to this set, but once lock enters this set it will - * remain there until all users are removed. Lock users are counted in - * cl_lock::cll_users. - * - * User is used to assure that lock is not canceled or destroyed while - * it is being enqueued, or actively used by some IO. - * - * Currently, a user always comes with a hold (cl_lock_invariant() - * checks that a number of holds is not less than a number of users). - * - * CONCURRENCY - * - * This is how lock state-machine operates. struct cl_lock contains a mutex - * cl_lock::cll_guard that protects struct fields. - * - * - mutex is taken, and cl_lock::cll_state is examined. - * - * - for every state there are possible target states where lock can move - * into. They are tried in order. Attempts to move into next state are - * done by _try() functions in cl_lock.c:cl_{enqueue,unlock,wait}_try(). - * - * - if the transition can be performed immediately, state is changed, - * and mutex is released. - * - * - if the transition requires blocking, _try() function returns - * cl_lock_transition::CLO_WAIT. Caller unlocks mutex and goes to - * sleep, waiting for possibility of lock state change. It is woken - * up when some event occurs, that makes lock state change possible - * (e.g., the reception of the reply from the server), and repeats - * the loop. - * - * Top-lock and sub-lock has separate mutexes and the latter has to be taken - * first to avoid dead-lock. - * - * To see an example of interaction of all these issues, take a look at the - * lov_cl.c:lov_lock_enqueue() function. It is called as a part of - * cl_enqueue_try(), and tries to advance top-lock to ENQUEUED state, by - * advancing state-machines of its sub-locks (lov_lock_enqueue_one()). Note - * also, that it uses trylock to grab sub-lock mutex to avoid dead-lock. It - * also has to handle CEF_ASYNC enqueue, when sub-locks enqueues have to be - * done in parallel, rather than one after another (this is used for glimpse - * locks, that cannot dead-lock). + * cl_lock is a cacheless data container for the requirements of locks to + * complete the IO. cl_lock is created before I/O starts and destroyed when the + * I/O is complete. + * + * cl_lock depends on LDLM lock to fulfill lock semantics. LDLM lock is attached + * to cl_lock at OSC layer. LDLM lock is still cacheable. * * INTERFACE AND USAGE * - * struct cl_lock_operations provide a number of call-backs that are invoked - * when events of interest occurs. Layers can intercept and handle glimpse, - * blocking, cancel ASTs and a reception of the reply from the server. + * Two major methods are supported for cl_lock: clo_enqueue and clo_cancel. A + * cl_lock is enqueued by cl_lock_request(), which will call clo_enqueue() + * methods for each layer to enqueue the lock. At the LOV layer, if a cl_lock + * consists of multiple sub cl_locks, each sub locks will be enqueued + * correspondingly. At OSC layer, the lock enqueue request will tend to reuse + * cached LDLM lock; otherwise a new LDLM lock will have to be requested from + * OST side. * - * One important difference with the old client locking model is that new - * client has a representation for the top-lock, whereas in the old code only - * sub-locks existed as real data structures and file-level locks are - * represented by "request sets" that are created and destroyed on each and - * every lock creation. + * cl_lock_cancel() must be called to release a cl_lock after use. clo_cancel() + * method will be called for each layer to release the resource held by this + * lock. At OSC layer, the reference count of LDLM lock, which is held at + * clo_enqueue time, is released. * - * Top-locks are cached, and can be found in the cache by the system calls. It - * is possible that top-lock is in cache, but some of its sub-locks were - * canceled and destroyed. In that case top-lock has to be enqueued again - * before it can be used. + * LDLM lock can only be canceled if there is no cl_lock using it. * * Overall process of the locking during IO operation is as following: * @@ -1294,7 +1149,7 @@ static inline int __page_in_use(const struct cl_page *page, int refc) * * - when all locks are acquired, IO is performed; * - * - locks are released into cache. + * - locks are released after IO is complete. * * Striping introduces major additional complexity into locking. The * fundamental problem is that it is generally unsafe to actively use (hold) @@ -1316,16 +1171,6 @@ static inline int __page_in_use(const struct cl_page *page, int refc) * buf is a part of memory mapped Lustre file, a lock or locks protecting buf * has to be held together with the usual lock on [offset, offset + count]. * - * As multi-stripe locks have to be allowed, it makes sense to cache them, so - * that, for example, a sequence of O_APPEND writes can proceed quickly - * without going down to the individual stripes to do lock matching. On the - * other hand, multi-stripe locks shouldn't be used by normal read/write - * calls. To achieve this, every layer can implement ->clo_fits_into() method, - * that is called by lock matching code (cl_lock_lookup()), and that can be - * used to selectively disable matching of certain locks for certain IOs. For - * example, lov layer implements lov_lock_fits_into() that allow multi-stripe - * locks to be matched only for truncates and O_APPEND writes. - * * Interaction with DLM * * In the expected setup, cl_lock is ultimately backed up by a collection of @@ -1356,295 +1201,27 @@ struct cl_lock_descr { __u32 cld_enq_flags; }; -#define DDESCR "%s(%d):[%lu, %lu]" +#define DDESCR "%s(%d):[%lu, %lu]:%x" #define PDESCR(descr) \ cl_lock_mode_name((descr)->cld_mode), (descr)->cld_mode, \ - (descr)->cld_start, (descr)->cld_end + (descr)->cld_start, (descr)->cld_end, (descr)->cld_enq_flags const char *cl_lock_mode_name(const enum cl_lock_mode mode); /** - * Lock state-machine states. - * - * \htmlonly - * <pre> - * - * Possible state transitions: - * - * +------------------>NEW - * | | - * | | cl_enqueue_try() - * | | - * | cl_unuse_try() V - * | +--------------QUEUING (*) - * | | | - * | | | cl_enqueue_try() - * | | | - * | | cl_unuse_try() V - * sub-lock | +-------------ENQUEUED (*) - * canceled | | | - * | | | cl_wait_try() - * | | | - * | | (R) - * | | | - * | | V - * | | HELD<---------+ - * | | | | - * | | | | cl_use_try() - * | | cl_unuse_try() | | - * | | | | - * | | V ---+ - * | +------------>INTRANSIT (D) <--+ - * | | | - * | cl_unuse_try() | | cached lock found - * | | | cl_use_try() - * | | | - * | V | - * +------------------CACHED---------+ - * | - * (C) - * | - * V - * FREEING - * - * Legend: - * - * In states marked with (*) transition to the same state (i.e., a loop - * in the diagram) is possible. - * - * (R) is the point where Receive call-back is invoked: it allows layers - * to handle arrival of lock reply. - * - * (C) is the point where Cancellation call-back is invoked. - * - * (D) is the transit state which means the lock is changing. - * - * Transition to FREEING state is possible from any other state in the - * diagram in case of unrecoverable error. - * </pre> - * \endhtmlonly - * - * These states are for individual cl_lock object. Top-lock and its sub-locks - * can be in the different states. Another way to say this is that we have - * nested state-machines. - * - * Separate QUEUING and ENQUEUED states are needed to support non-blocking - * operation for locks with multiple sub-locks. Imagine lock on a file F, that - * intersects 3 stripes S0, S1, and S2. To enqueue F client has to send - * enqueue to S0, wait for its completion, then send enqueue for S1, wait for - * its completion and at last enqueue lock for S2, and wait for its - * completion. In that case, top-lock is in QUEUING state while S0, S1 are - * handled, and is in ENQUEUED state after enqueue to S2 has been sent (note - * that in this case, sub-locks move from state to state, and top-lock remains - * in the same state). - */ -enum cl_lock_state { - /** - * Lock that wasn't yet enqueued - */ - CLS_NEW, - /** - * Enqueue is in progress, blocking for some intermediate interaction - * with the other side. - */ - CLS_QUEUING, - /** - * Lock is fully enqueued, waiting for server to reply when it is - * granted. - */ - CLS_ENQUEUED, - /** - * Lock granted, actively used by some IO. - */ - CLS_HELD, - /** - * This state is used to mark the lock is being used, or unused. - * We need this state because the lock may have several sublocks, - * so it's impossible to have an atomic way to bring all sublocks - * into CLS_HELD state at use case, or all sublocks to CLS_CACHED - * at unuse case. - * If a thread is referring to a lock, and it sees the lock is in this - * state, it must wait for the lock. - * See state diagram for details. - */ - CLS_INTRANSIT, - /** - * Lock granted, not used. - */ - CLS_CACHED, - /** - * Lock is being destroyed. - */ - CLS_FREEING, - CLS_NR -}; - -enum cl_lock_flags { - /** - * lock has been cancelled. This flag is never cleared once set (by - * cl_lock_cancel0()). - */ - CLF_CANCELLED = 1 << 0, - /** cancellation is pending for this lock. */ - CLF_CANCELPEND = 1 << 1, - /** destruction is pending for this lock. */ - CLF_DOOMED = 1 << 2, - /** from enqueue RPC reply upcall. */ - CLF_FROM_UPCALL = 1 << 3, -}; - -/** - * Lock closure. - * - * Lock closure is a collection of locks (both top-locks and sub-locks) that - * might be updated in a result of an operation on a certain lock (which lock - * this is a closure of). - * - * Closures are needed to guarantee dead-lock freedom in the presence of - * - * - nested state-machines (top-lock state-machine composed of sub-lock - * state-machines), and - * - * - shared sub-locks. - * - * Specifically, many operations, such as lock enqueue, wait, unlock, - * etc. start from a top-lock, and then operate on a sub-locks of this - * top-lock, holding a top-lock mutex. When sub-lock state changes as a result - * of such operation, this change has to be propagated to all top-locks that - * share this sub-lock. Obviously, no natural lock ordering (e.g., - * top-to-bottom or bottom-to-top) captures this scenario, so try-locking has - * to be used. Lock closure systematizes this try-and-repeat logic. - */ -struct cl_lock_closure { - /** - * Lock that is mutexed when closure construction is started. When - * closure in is `wait' mode (cl_lock_closure::clc_wait), mutex on - * origin is released before waiting. - */ - struct cl_lock *clc_origin; - /** - * List of enclosed locks, so far. Locks are linked here through - * cl_lock::cll_inclosure. - */ - struct list_head clc_list; - /** - * True iff closure is in a `wait' mode. This determines what - * cl_lock_enclosure() does when a lock L to be added to the closure - * is currently mutexed by some other thread. - * - * If cl_lock_closure::clc_wait is not set, then closure construction - * fails with CLO_REPEAT immediately. - * - * In wait mode, cl_lock_enclosure() waits until next attempt to build - * a closure might succeed. To this end it releases an origin mutex - * (cl_lock_closure::clc_origin), that has to be the only lock mutex - * owned by the current thread, and then waits on L mutex (by grabbing - * it and immediately releasing), before returning CLO_REPEAT to the - * caller. - */ - int clc_wait; - /** Number of locks in the closure. */ - int clc_nr; -}; - -/** * Layered client lock. */ struct cl_lock { - /** Reference counter. */ - atomic_t cll_ref; /** List of slices. Immutable after creation. */ struct list_head cll_layers; - /** - * Linkage into cl_lock::cll_descr::cld_obj::coh_locks list. Protected - * by cl_lock::cll_descr::cld_obj::coh_lock_guard. - */ - struct list_head cll_linkage; - /** - * Parameters of this lock. Protected by - * cl_lock::cll_descr::cld_obj::coh_lock_guard nested within - * cl_lock::cll_guard. Modified only on lock creation and in - * cl_lock_modify(). - */ + /** lock attribute, extent, cl_object, etc. */ struct cl_lock_descr cll_descr; - /** Protected by cl_lock::cll_guard. */ - enum cl_lock_state cll_state; - /** signals state changes. */ - wait_queue_head_t cll_wq; - /** - * Recursive lock, most fields in cl_lock{} are protected by this. - * - * Locking rules: this mutex is never held across network - * communication, except when lock is being canceled. - * - * Lock ordering: a mutex of a sub-lock is taken first, then a mutex - * on a top-lock. Other direction is implemented through a - * try-lock-repeat loop. Mutices of unrelated locks can be taken only - * by try-locking. - * - * \see osc_lock_enqueue_wait(), lov_lock_cancel(), lov_sublock_wait(). - */ - struct mutex cll_guard; - struct task_struct *cll_guarder; - int cll_depth; - - /** - * the owner for INTRANSIT state - */ - struct task_struct *cll_intransit_owner; - int cll_error; - /** - * Number of holds on a lock. A hold prevents a lock from being - * canceled and destroyed. Protected by cl_lock::cll_guard. - * - * \see cl_lock_hold(), cl_lock_unhold(), cl_lock_release() - */ - int cll_holds; - /** - * Number of lock users. Valid in cl_lock_state::CLS_HELD state - * only. Lock user pins lock in CLS_HELD state. Protected by - * cl_lock::cll_guard. - * - * \see cl_wait(), cl_unuse(). - */ - int cll_users; - /** - * Flag bit-mask. Values from enum cl_lock_flags. Updates are - * protected by cl_lock::cll_guard. - */ - unsigned long cll_flags; - /** - * A linkage into a list of locks in a closure. - * - * \see cl_lock_closure - */ - struct list_head cll_inclosure; - /** - * Confict lock at queuing time. - */ - struct cl_lock *cll_conflict; - /** - * A list of references to this lock, for debugging. - */ - struct lu_ref cll_reference; - /** - * A list of holds on this lock, for debugging. - */ - struct lu_ref cll_holders; - /** - * A reference for cl_lock::cll_descr::cld_obj. For debugging. - */ - struct lu_ref_link cll_obj_ref; -#ifdef CONFIG_LOCKDEP - /* "dep_map" name is assumed by lockdep.h macros. */ - struct lockdep_map dep_map; -#endif }; /** * Per-layer part of cl_lock * - * \see ccc_lock, lov_lock, lovsub_lock, osc_lock + * \see vvp_lock, lov_lock, lovsub_lock, osc_lock */ struct cl_lock_slice { struct cl_lock *cls_lock; @@ -1658,174 +1235,36 @@ struct cl_lock_slice { }; /** - * Possible (non-error) return values of ->clo_{enqueue,wait,unlock}(). - * - * NOTE: lov_subresult() depends on ordering here. - */ -enum cl_lock_transition { - /** operation cannot be completed immediately. Wait for state change. */ - CLO_WAIT = 1, - /** operation had to release lock mutex, restart. */ - CLO_REPEAT = 2, - /** lower layer re-enqueued. */ - CLO_REENQUEUED = 3, -}; - -/** * * \see vvp_lock_ops, lov_lock_ops, lovsub_lock_ops, osc_lock_ops */ struct cl_lock_operations { - /** - * \name statemachine - * - * State machine transitions. These 3 methods are called to transfer - * lock from one state to another, as described in the commentary - * above enum #cl_lock_state. - * - * \retval 0 this layer has nothing more to do to before - * transition to the target state happens; - * - * \retval CLO_REPEAT method had to release and re-acquire cl_lock - * mutex, repeat invocation of transition method - * across all layers; - * - * \retval CLO_WAIT this layer cannot move to the target state - * immediately, as it has to wait for certain event - * (e.g., the communication with the server). It - * is guaranteed, that when the state transfer - * becomes possible, cl_lock::cll_wq wait-queue - * is signaled. Caller can wait for this event by - * calling cl_lock_state_wait(); - * - * \retval -ve failure, abort state transition, move the lock - * into cl_lock_state::CLS_FREEING state, and set - * cl_lock::cll_error. - * - * Once all layers voted to agree to transition (by returning 0), lock - * is moved into corresponding target state. All state transition - * methods are optional. - */ /** @{ */ /** * Attempts to enqueue the lock. Called top-to-bottom. * - * \see ccc_lock_enqueue(), lov_lock_enqueue(), lovsub_lock_enqueue(), + * \retval 0 this layer has enqueued the lock successfully + * \retval >0 this layer has enqueued the lock, but need to wait on + * @anchor for resources + * \retval -ve failure + * + * \see vvp_lock_enqueue(), lov_lock_enqueue(), lovsub_lock_enqueue(), * \see osc_lock_enqueue() */ int (*clo_enqueue)(const struct lu_env *env, const struct cl_lock_slice *slice, - struct cl_io *io, __u32 enqflags); - /** - * Attempts to wait for enqueue result. Called top-to-bottom. - * - * \see ccc_lock_wait(), lov_lock_wait(), osc_lock_wait() - */ - int (*clo_wait)(const struct lu_env *env, - const struct cl_lock_slice *slice); + struct cl_io *io, struct cl_sync_io *anchor); /** - * Attempts to unlock the lock. Called bottom-to-top. In addition to - * usual return values of lock state-machine methods, this can return - * -ESTALE to indicate that lock cannot be returned to the cache, and - * has to be re-initialized. - * unuse is a one-shot operation, so it must NOT return CLO_WAIT. - * - * \see ccc_lock_unuse(), lov_lock_unuse(), osc_lock_unuse() - */ - int (*clo_unuse)(const struct lu_env *env, - const struct cl_lock_slice *slice); - /** - * Notifies layer that cached lock is started being used. - * - * \pre lock->cll_state == CLS_CACHED - * - * \see lov_lock_use(), osc_lock_use() - */ - int (*clo_use)(const struct lu_env *env, - const struct cl_lock_slice *slice); - /** @} statemachine */ - /** - * A method invoked when lock state is changed (as a result of state - * transition). This is used, for example, to track when the state of - * a sub-lock changes, to propagate this change to the corresponding - * top-lock. Optional - * - * \see lovsub_lock_state() - */ - void (*clo_state)(const struct lu_env *env, - const struct cl_lock_slice *slice, - enum cl_lock_state st); - /** - * Returns true, iff given lock is suitable for the given io, idea - * being, that there are certain "unsafe" locks, e.g., ones acquired - * for O_APPEND writes, that we don't want to re-use for a normal - * write, to avoid the danger of cascading evictions. Optional. Runs - * under cl_object_header::coh_lock_guard. - * - * XXX this should take more information about lock needed by - * io. Probably lock description or something similar. - * - * \see lov_fits_into() - */ - int (*clo_fits_into)(const struct lu_env *env, - const struct cl_lock_slice *slice, - const struct cl_lock_descr *need, - const struct cl_io *io); - /** - * \name ast - * Asynchronous System Traps. All of then are optional, all are - * executed bottom-to-top. - */ - /** @{ */ - - /** - * Cancellation callback. Cancel a lock voluntarily, or under - * the request of server. + * Cancel a lock, release its DLM lock ref, while does not cancel the + * DLM lock */ void (*clo_cancel)(const struct lu_env *env, const struct cl_lock_slice *slice); - /** - * Lock weighting ast. Executed to estimate how precious this lock - * is. The sum of results across all layers is used to determine - * whether lock worth keeping in cache given present memory usage. - * - * \see osc_lock_weigh(), vvp_lock_weigh(), lovsub_lock_weigh(). - */ - unsigned long (*clo_weigh)(const struct lu_env *env, - const struct cl_lock_slice *slice); - /** @} ast */ - - /** - * \see lovsub_lock_closure() - */ - int (*clo_closure)(const struct lu_env *env, - const struct cl_lock_slice *slice, - struct cl_lock_closure *closure); - /** - * Executed bottom-to-top when lock description changes (e.g., as a - * result of server granting more generous lock than was requested). - * - * \see lovsub_lock_modify() - */ - int (*clo_modify)(const struct lu_env *env, - const struct cl_lock_slice *slice, - const struct cl_lock_descr *updated); - /** - * Notifies layers (bottom-to-top) that lock is going to be - * destroyed. Responsibility of layers is to prevent new references on - * this lock from being acquired once this method returns. - * - * This can be called multiple times due to the races. - * - * \see cl_lock_delete() - * \see osc_lock_delete(), lovsub_lock_delete() - */ - void (*clo_delete)(const struct lu_env *env, - const struct cl_lock_slice *slice); + /** @} */ /** * Destructor. Frees resources and the slice. * - * \see ccc_lock_fini(), lov_lock_fini(), lovsub_lock_fini(), + * \see vvp_lock_fini(), lov_lock_fini(), lovsub_lock_fini(), * \see osc_lock_fini() */ void (*clo_fini)(const struct lu_env *env, struct cl_lock_slice *slice); @@ -2016,7 +1455,7 @@ enum cl_io_state { * This is usually embedded into layer session data, rather than allocated * dynamically. * - * \see vvp_io, lov_io, osc_io, ccc_io + * \see vvp_io, lov_io, osc_io */ struct cl_io_slice { struct cl_io *cis_io; @@ -2031,6 +1470,8 @@ struct cl_io_slice { struct list_head cis_linkage; }; +typedef void (*cl_commit_cbt)(const struct lu_env *, struct cl_io *, + struct cl_page *); /** * Per-layer io operations. * \see vvp_io_ops, lov_io_ops, lovsub_io_ops, osc_io_ops @@ -2114,7 +1555,7 @@ struct cl_io_operations { void (*cio_fini)(const struct lu_env *env, const struct cl_io_slice *slice); } op[CIT_OP_NR]; - struct { + /** * Submit pages from \a queue->c2_qin for IO, and move * successfully submitted pages into \a queue->c2_qout. Return @@ -2127,7 +1568,15 @@ struct cl_io_operations { const struct cl_io_slice *slice, enum cl_req_type crt, struct cl_2queue *queue); - } req_op[CRT_NR]; + /** + * Queue async page for write. + * The difference between cio_submit and cio_queue is that + * cio_submit is for urgent request. + */ + int (*cio_commit_async)(const struct lu_env *env, + const struct cl_io_slice *slice, + struct cl_page_list *queue, int from, int to, + cl_commit_cbt cb); /** * Read missing page. * @@ -2140,31 +1589,6 @@ struct cl_io_operations { const struct cl_io_slice *slice, const struct cl_page_slice *page); /** - * Prepare write of a \a page. Called bottom-to-top by a top-level - * cl_io_operations::op[CIT_WRITE]::cio_start() to prepare page for - * get data from user-level buffer. - * - * \pre io->ci_type == CIT_WRITE - * - * \see vvp_io_prepare_write(), lov_io_prepare_write(), - * osc_io_prepare_write(). - */ - int (*cio_prepare_write)(const struct lu_env *env, - const struct cl_io_slice *slice, - const struct cl_page_slice *page, - unsigned from, unsigned to); - /** - * - * \pre io->ci_type == CIT_WRITE - * - * \see vvp_io_commit_write(), lov_io_commit_write(), - * osc_io_commit_write(). - */ - int (*cio_commit_write)(const struct lu_env *env, - const struct cl_io_slice *slice, - const struct cl_page_slice *page, - unsigned from, unsigned to); - /** * Optional debugging helper. Print given io slice. */ int (*cio_print)(const struct lu_env *env, void *cookie, @@ -2216,9 +1640,13 @@ enum cl_enq_flags { */ CEF_AGL = 0x00000020, /** + * enqueue a lock to test DLM lock existence. + */ + CEF_PEEK = 0x00000040, + /** * mask of enq_flags. */ - CEF_MASK = 0x0000003f, + CEF_MASK = 0x0000007f, }; /** @@ -2228,12 +1656,12 @@ enum cl_enq_flags { struct cl_io_lock_link { /** linkage into one of cl_lockset lists. */ struct list_head cill_linkage; - struct cl_lock_descr cill_descr; - struct cl_lock *cill_lock; + struct cl_lock cill_lock; /** optional destructor */ void (*cill_fini)(const struct lu_env *env, struct cl_io_lock_link *link); }; +#define cill_descr cill_lock.cll_descr /** * Lock-set represents a collection of locks, that io needs at a @@ -2267,8 +1695,6 @@ struct cl_io_lock_link { struct cl_lockset { /** locks to be acquired. */ struct list_head cls_todo; - /** locks currently being processed. */ - struct list_head cls_curr; /** locks acquired. */ struct list_head cls_done; }; @@ -2632,9 +2058,7 @@ struct cl_site { * and top-locks (and top-pages) are accounted here. */ struct cache_stats cs_pages; - struct cache_stats cs_locks; atomic_t cs_pages_state[CPS_NR]; - atomic_t cs_locks_state[CLS_NR]; }; int cl_site_init(struct cl_site *s, struct cl_device *top); @@ -2725,7 +2149,7 @@ static inline void cl_device_fini(struct cl_device *d) } void cl_page_slice_add(struct cl_page *page, struct cl_page_slice *slice, - struct cl_object *obj, + struct cl_object *obj, pgoff_t index, const struct cl_page_operations *ops); void cl_lock_slice_add(struct cl_lock *lock, struct cl_lock_slice *slice, struct cl_object *obj, @@ -2758,7 +2182,7 @@ int cl_object_glimpse(const struct lu_env *env, struct cl_object *obj, struct ost_lvb *lvb); int cl_conf_set(const struct lu_env *env, struct cl_object *obj, const struct cl_object_conf *conf); -void cl_object_prune(const struct lu_env *env, struct cl_object *obj); +int cl_object_prune(const struct lu_env *env, struct cl_object *obj); void cl_object_kill(const struct lu_env *env, struct cl_object *obj); /** @@ -2772,7 +2196,7 @@ static inline int cl_object_same(struct cl_object *o0, struct cl_object *o1) static inline void cl_object_page_init(struct cl_object *clob, int size) { clob->co_slice_off = cl_object_header(clob)->coh_page_bufsize; - cl_object_header(clob)->coh_page_bufsize += ALIGN(size, 8); + cl_object_header(clob)->coh_page_bufsize += cfs_size_round(size); } static inline void *cl_object_page_slice(struct cl_object *clob, @@ -2781,6 +2205,16 @@ static inline void *cl_object_page_slice(struct cl_object *clob, return (void *)((char *)page + clob->co_slice_off); } +/** + * Return refcount of cl_object. + */ +static inline int cl_object_refc(struct cl_object *clob) +{ + struct lu_object_header *header = clob->co_lu.lo_header; + + return atomic_read(&header->loh_ref); +} + /** @} cl_object */ /** \defgroup cl_page cl_page @@ -2794,28 +2228,20 @@ enum { }; /* callback of cl_page_gang_lookup() */ -typedef int (*cl_page_gang_cb_t) (const struct lu_env *, struct cl_io *, - struct cl_page *, void *); -int cl_page_gang_lookup(const struct lu_env *env, struct cl_object *obj, - struct cl_io *io, pgoff_t start, pgoff_t end, - cl_page_gang_cb_t cb, void *cbdata); -struct cl_page *cl_page_lookup(struct cl_object_header *hdr, pgoff_t index); struct cl_page *cl_page_find(const struct lu_env *env, struct cl_object *obj, pgoff_t idx, struct page *vmpage, enum cl_page_type type); -struct cl_page *cl_page_find_sub(const struct lu_env *env, - struct cl_object *obj, - pgoff_t idx, struct page *vmpage, - struct cl_page *parent); +struct cl_page *cl_page_alloc(const struct lu_env *env, + struct cl_object *o, pgoff_t ind, + struct page *vmpage, + enum cl_page_type type); void cl_page_get(struct cl_page *page); void cl_page_put(const struct lu_env *env, struct cl_page *page); void cl_page_print(const struct lu_env *env, void *cookie, lu_printer_t printer, const struct cl_page *pg); void cl_page_header_print(const struct lu_env *env, void *cookie, lu_printer_t printer, const struct cl_page *pg); -struct page *cl_page_vmpage(const struct lu_env *env, struct cl_page *page); struct cl_page *cl_vmpage_page(struct page *vmpage, struct cl_object *obj); -struct cl_page *cl_page_top(struct cl_page *page); const struct cl_page_slice *cl_page_at(const struct cl_page *page, const struct lu_device_type *dtype); @@ -2872,12 +2298,10 @@ int cl_page_flush(const struct lu_env *env, struct cl_io *io, void cl_page_discard(const struct lu_env *env, struct cl_io *io, struct cl_page *pg); void cl_page_delete(const struct lu_env *env, struct cl_page *pg); -int cl_page_unmap(const struct lu_env *env, struct cl_io *io, - struct cl_page *pg); int cl_page_is_vmlocked(const struct lu_env *env, const struct cl_page *pg); void cl_page_export(const struct lu_env *env, struct cl_page *pg, int uptodate); int cl_page_is_under_lock(const struct lu_env *env, struct cl_io *io, - struct cl_page *page); + struct cl_page *page, pgoff_t *max_index); loff_t cl_offset(const struct cl_object *obj, pgoff_t idx); pgoff_t cl_index(const struct cl_object *obj, loff_t offset); int cl_page_size(const struct cl_object *obj); @@ -2890,138 +2314,74 @@ void cl_lock_descr_print(const struct lu_env *env, void *cookie, const struct cl_lock_descr *descr); /* @} helper */ -/** @} cl_page */ - -/** \defgroup cl_lock cl_lock - * @{ +/** + * Data structure managing a client's cached pages. A count of + * "unstable" pages is maintained, and an LRU of clean pages is + * maintained. "unstable" pages are pages pinned by the ptlrpc + * layer for recovery purposes. */ +struct cl_client_cache { + /** + * # of client cache refcount + * # of users (OSCs) + 2 (held by llite and lov) + */ + atomic_t ccc_users; + /** + * # of threads are doing shrinking + */ + unsigned int ccc_lru_shrinkers; + /** + * # of LRU entries available + */ + atomic_t ccc_lru_left; + /** + * List of entities(OSCs) for this LRU cache + */ + struct list_head ccc_lru; + /** + * Max # of LRU entries + */ + unsigned long ccc_lru_max; + /** + * Lock to protect ccc_lru list + */ + spinlock_t ccc_lru_lock; + /** + * # of unstable pages for this mount point + */ + atomic_t ccc_unstable_nr; + /** + * Waitq for awaiting unstable pages to reach zero. + * Used at umounting time and signaled on BRW commit + */ + wait_queue_head_t ccc_unstable_waitq; -struct cl_lock *cl_lock_hold(const struct lu_env *env, const struct cl_io *io, - const struct cl_lock_descr *need, - const char *scope, const void *source); -struct cl_lock *cl_lock_peek(const struct lu_env *env, const struct cl_io *io, - const struct cl_lock_descr *need, - const char *scope, const void *source); -struct cl_lock *cl_lock_request(const struct lu_env *env, struct cl_io *io, - const struct cl_lock_descr *need, - const char *scope, const void *source); -struct cl_lock *cl_lock_at_pgoff(const struct lu_env *env, - struct cl_object *obj, pgoff_t index, - struct cl_lock *except, int pending, - int canceld); -static inline struct cl_lock *cl_lock_at_page(const struct lu_env *env, - struct cl_object *obj, - struct cl_page *page, - struct cl_lock *except, - int pending, int canceld) -{ - LASSERT(cl_object_header(obj) == cl_object_header(page->cp_obj)); - return cl_lock_at_pgoff(env, obj, page->cp_index, except, - pending, canceld); -} - -const struct cl_lock_slice *cl_lock_at(const struct cl_lock *lock, - const struct lu_device_type *dtype); - -void cl_lock_get(struct cl_lock *lock); -void cl_lock_get_trust(struct cl_lock *lock); -void cl_lock_put(const struct lu_env *env, struct cl_lock *lock); -void cl_lock_hold_add(const struct lu_env *env, struct cl_lock *lock, - const char *scope, const void *source); -void cl_lock_hold_release(const struct lu_env *env, struct cl_lock *lock, - const char *scope, const void *source); -void cl_lock_unhold(const struct lu_env *env, struct cl_lock *lock, - const char *scope, const void *source); -void cl_lock_release(const struct lu_env *env, struct cl_lock *lock, - const char *scope, const void *source); -void cl_lock_user_add(const struct lu_env *env, struct cl_lock *lock); -void cl_lock_user_del(const struct lu_env *env, struct cl_lock *lock); +}; -int cl_lock_is_intransit(struct cl_lock *lock); +/** + * cl_cache functions + */ +struct cl_client_cache *cl_cache_init(unsigned long lru_page_max); +void cl_cache_incref(struct cl_client_cache *cache); +void cl_cache_decref(struct cl_client_cache *cache); -int cl_lock_enqueue_wait(const struct lu_env *env, struct cl_lock *lock, - int keep_mutex); +/** @} cl_page */ -/** \name statemachine statemachine - * Interface to lock state machine consists of 3 parts: - * - * - "try" functions that attempt to effect a state transition. If state - * transition is not possible right now (e.g., if it has to wait for some - * asynchronous event to occur), these functions return - * cl_lock_transition::CLO_WAIT. - * - * - "non-try" functions that implement synchronous blocking interface on - * top of non-blocking "try" functions. These functions repeatedly call - * corresponding "try" versions, and if state transition is not possible - * immediately, wait for lock state change. - * - * - methods from cl_lock_operations, called by "try" functions. Lock can - * be advanced to the target state only when all layers voted that they - * are ready for this transition. "Try" functions call methods under lock - * mutex. If a layer had to release a mutex, it re-acquires it and returns - * cl_lock_transition::CLO_REPEAT, causing "try" function to call all - * layers again. - * - * TRY NON-TRY METHOD FINAL STATE - * - * cl_enqueue_try() cl_enqueue() cl_lock_operations::clo_enqueue() CLS_ENQUEUED - * - * cl_wait_try() cl_wait() cl_lock_operations::clo_wait() CLS_HELD - * - * cl_unuse_try() cl_unuse() cl_lock_operations::clo_unuse() CLS_CACHED - * - * cl_use_try() NONE cl_lock_operations::clo_use() CLS_HELD - * +/** \defgroup cl_lock cl_lock * @{ */ -int cl_wait(const struct lu_env *env, struct cl_lock *lock); -void cl_unuse(const struct lu_env *env, struct cl_lock *lock); -int cl_enqueue_try(const struct lu_env *env, struct cl_lock *lock, - struct cl_io *io, __u32 flags); -int cl_unuse_try(const struct lu_env *env, struct cl_lock *lock); -int cl_wait_try(const struct lu_env *env, struct cl_lock *lock); -int cl_use_try(const struct lu_env *env, struct cl_lock *lock, int atomic); - -/** @} statemachine */ - -void cl_lock_signal(const struct lu_env *env, struct cl_lock *lock); -int cl_lock_state_wait(const struct lu_env *env, struct cl_lock *lock); -void cl_lock_state_set(const struct lu_env *env, struct cl_lock *lock, - enum cl_lock_state state); -int cl_queue_match(const struct list_head *queue, - const struct cl_lock_descr *need); - -void cl_lock_mutex_get(const struct lu_env *env, struct cl_lock *lock); -void cl_lock_mutex_put(const struct lu_env *env, struct cl_lock *lock); -int cl_lock_is_mutexed(struct cl_lock *lock); -int cl_lock_nr_mutexed(const struct lu_env *env); -int cl_lock_discard_pages(const struct lu_env *env, struct cl_lock *lock); -int cl_lock_ext_match(const struct cl_lock_descr *has, - const struct cl_lock_descr *need); -int cl_lock_descr_match(const struct cl_lock_descr *has, - const struct cl_lock_descr *need); -int cl_lock_mode_match(enum cl_lock_mode has, enum cl_lock_mode need); -int cl_lock_modify(const struct lu_env *env, struct cl_lock *lock, - const struct cl_lock_descr *desc); - -void cl_lock_closure_init(const struct lu_env *env, - struct cl_lock_closure *closure, - struct cl_lock *origin, int wait); -void cl_lock_closure_fini(struct cl_lock_closure *closure); -int cl_lock_closure_build(const struct lu_env *env, struct cl_lock *lock, - struct cl_lock_closure *closure); -void cl_lock_disclosure(const struct lu_env *env, - struct cl_lock_closure *closure); -int cl_lock_enclosure(const struct lu_env *env, struct cl_lock *lock, - struct cl_lock_closure *closure); - +int cl_lock_request(const struct lu_env *env, struct cl_io *io, + struct cl_lock *lock); +int cl_lock_init(const struct lu_env *env, struct cl_lock *lock, + const struct cl_io *io); +void cl_lock_fini(const struct lu_env *env, struct cl_lock *lock); +const struct cl_lock_slice *cl_lock_at(const struct cl_lock *lock, + const struct lu_device_type *dtype); +void cl_lock_release(const struct lu_env *env, struct cl_lock *lock); +int cl_lock_enqueue(const struct lu_env *env, struct cl_io *io, + struct cl_lock *lock, struct cl_sync_io *anchor); void cl_lock_cancel(const struct lu_env *env, struct cl_lock *lock); -void cl_lock_delete(const struct lu_env *env, struct cl_lock *lock); -void cl_lock_error(const struct lu_env *env, struct cl_lock *lock, int error); -void cl_locks_prune(const struct lu_env *env, struct cl_object *obj, int wait); - -unsigned long cl_lock_weigh(const struct lu_env *env, struct cl_lock *lock); /** @} cl_lock */ @@ -3050,15 +2410,14 @@ int cl_io_lock_alloc_add(const struct lu_env *env, struct cl_io *io, struct cl_lock_descr *descr); int cl_io_read_page(const struct lu_env *env, struct cl_io *io, struct cl_page *page); -int cl_io_prepare_write(const struct lu_env *env, struct cl_io *io, - struct cl_page *page, unsigned from, unsigned to); -int cl_io_commit_write(const struct lu_env *env, struct cl_io *io, - struct cl_page *page, unsigned from, unsigned to); int cl_io_submit_rw(const struct lu_env *env, struct cl_io *io, enum cl_req_type iot, struct cl_2queue *queue); int cl_io_submit_sync(const struct lu_env *env, struct cl_io *io, enum cl_req_type iot, struct cl_2queue *queue, long timeout); +int cl_io_commit_async(const struct lu_env *env, struct cl_io *io, + struct cl_page_list *queue, int from, int to, + cl_commit_cbt cb); int cl_io_is_going(const struct lu_env *env); /** @@ -3114,6 +2473,12 @@ static inline struct cl_page *cl_page_list_last(struct cl_page_list *plist) return list_entry(plist->pl_pages.prev, struct cl_page, cp_batch); } +static inline struct cl_page *cl_page_list_first(struct cl_page_list *plist) +{ + LASSERT(plist->pl_nr > 0); + return list_entry(plist->pl_pages.next, struct cl_page, cp_batch); +} + /** * Iterate over pages in a page list. */ @@ -3130,9 +2495,14 @@ void cl_page_list_init(struct cl_page_list *plist); void cl_page_list_add(struct cl_page_list *plist, struct cl_page *page); void cl_page_list_move(struct cl_page_list *dst, struct cl_page_list *src, struct cl_page *page); +void cl_page_list_move_head(struct cl_page_list *dst, struct cl_page_list *src, + struct cl_page *page); void cl_page_list_splice(struct cl_page_list *list, struct cl_page_list *head); +void cl_page_list_del(const struct lu_env *env, struct cl_page_list *plist, + struct cl_page *page); void cl_page_list_disown(const struct lu_env *env, struct cl_io *io, struct cl_page_list *plist); +void cl_page_list_fini(const struct lu_env *env, struct cl_page_list *plist); void cl_2queue_init(struct cl_2queue *queue); void cl_2queue_disown(const struct lu_env *env, @@ -3177,13 +2547,18 @@ struct cl_sync_io { atomic_t csi_barrier; /** completion to be signaled when transfer is complete. */ wait_queue_head_t csi_waitq; + /** callback to invoke when this IO is finished */ + void (*csi_end_io)(const struct lu_env *, + struct cl_sync_io *); }; -void cl_sync_io_init(struct cl_sync_io *anchor, int nrpages); -int cl_sync_io_wait(const struct lu_env *env, struct cl_io *io, - struct cl_page_list *queue, struct cl_sync_io *anchor, +void cl_sync_io_init(struct cl_sync_io *anchor, int nr, + void (*end)(const struct lu_env *, struct cl_sync_io *)); +int cl_sync_io_wait(const struct lu_env *env, struct cl_sync_io *anchor, long timeout); -void cl_sync_io_note(struct cl_sync_io *anchor, int ioret); +void cl_sync_io_note(const struct lu_env *env, struct cl_sync_io *anchor, + int ioret); +void cl_sync_io_end(const struct lu_env *env, struct cl_sync_io *anchor); /** @} cl_sync_io */ @@ -3241,6 +2616,9 @@ void *cl_env_reenter(void); void cl_env_reexit(void *cookie); void cl_env_implant(struct lu_env *env, int *refcheck); void cl_env_unplant(struct lu_env *env, int *refcheck); +unsigned int cl_env_cache_purge(unsigned int nr); +struct lu_env *cl_env_percpu_get(void); +void cl_env_percpu_put(struct lu_env *env); /** @} cl_env */ diff --git a/drivers/staging/lustre/lustre/include/interval_tree.h b/drivers/staging/lustre/lustre/include/interval_tree.h index f6df3f33e770..4a15228b5570 100644 --- a/drivers/staging/lustre/lustre/include/interval_tree.h +++ b/drivers/staging/lustre/lustre/include/interval_tree.h @@ -15,11 +15,7 @@ * * You should have received a copy of the GNU General Public License * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. + * http://www.gnu.org/licenses/gpl-2.0.html * * GPL HEADER END */ diff --git a/drivers/staging/lustre/lustre/include/lclient.h b/drivers/staging/lustre/lustre/include/lclient.h deleted file mode 100644 index 5d839a9f789f..000000000000 --- a/drivers/staging/lustre/lustre/include/lclient.h +++ /dev/null @@ -1,408 +0,0 @@ -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. - * - * GPL HEADER END - */ -/* - * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2011, 2012, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * Definitions shared between vvp and liblustre, and other clients in the - * future. - * - * Author: Oleg Drokin <oleg.drokin@sun.com> - * Author: Nikita Danilov <nikita.danilov@sun.com> - */ - -#ifndef LCLIENT_H -#define LCLIENT_H - -blkcnt_t dirty_cnt(struct inode *inode); - -int cl_glimpse_size0(struct inode *inode, int agl); -int cl_glimpse_lock(const struct lu_env *env, struct cl_io *io, - struct inode *inode, struct cl_object *clob, int agl); - -static inline int cl_glimpse_size(struct inode *inode) -{ - return cl_glimpse_size0(inode, 0); -} - -static inline int cl_agl(struct inode *inode) -{ - return cl_glimpse_size0(inode, 1); -} - -/** - * Locking policy for setattr. - */ -enum ccc_setattr_lock_type { - /** Locking is done by server */ - SETATTR_NOLOCK, - /** Extent lock is enqueued */ - SETATTR_EXTENT_LOCK, - /** Existing local extent lock is used */ - SETATTR_MATCH_LOCK -}; - -/** - * IO state private to vvp or slp layers. - */ -struct ccc_io { - /** super class */ - struct cl_io_slice cui_cl; - struct cl_io_lock_link cui_link; - /** - * I/O vector information to or from which read/write is going. - */ - struct iov_iter *cui_iter; - /** - * Total size for the left IO. - */ - size_t cui_tot_count; - - union { - struct { - enum ccc_setattr_lock_type cui_local_lock; - } setattr; - } u; - /** - * True iff io is processing glimpse right now. - */ - int cui_glimpse; - /** - * Layout version when this IO is initialized - */ - __u32 cui_layout_gen; - /** - * File descriptor against which IO is done. - */ - struct ll_file_data *cui_fd; - struct kiocb *cui_iocb; -}; - -/** - * True, if \a io is a normal io, False for splice_{read,write}. - * must be implemented in arch specific code. - */ -int cl_is_normalio(const struct lu_env *env, const struct cl_io *io); - -extern struct lu_context_key ccc_key; -extern struct lu_context_key ccc_session_key; - -struct ccc_thread_info { - struct cl_lock_descr cti_descr; - struct cl_io cti_io; - struct cl_attr cti_attr; -}; - -static inline struct ccc_thread_info *ccc_env_info(const struct lu_env *env) -{ - struct ccc_thread_info *info; - - info = lu_context_key_get(&env->le_ctx, &ccc_key); - LASSERT(info); - return info; -} - -static inline struct cl_attr *ccc_env_thread_attr(const struct lu_env *env) -{ - struct cl_attr *attr = &ccc_env_info(env)->cti_attr; - - memset(attr, 0, sizeof(*attr)); - return attr; -} - -static inline struct cl_io *ccc_env_thread_io(const struct lu_env *env) -{ - struct cl_io *io = &ccc_env_info(env)->cti_io; - - memset(io, 0, sizeof(*io)); - return io; -} - -struct ccc_session { - struct ccc_io cs_ios; -}; - -static inline struct ccc_session *ccc_env_session(const struct lu_env *env) -{ - struct ccc_session *ses; - - ses = lu_context_key_get(env->le_ses, &ccc_session_key); - LASSERT(ses); - return ses; -} - -static inline struct ccc_io *ccc_env_io(const struct lu_env *env) -{ - return &ccc_env_session(env)->cs_ios; -} - -/** - * ccc-private object state. - */ -struct ccc_object { - struct cl_object_header cob_header; - struct cl_object cob_cl; - struct inode *cob_inode; - - /** - * A list of dirty pages pending IO in the cache. Used by - * SOM. Protected by ll_inode_info::lli_lock. - * - * \see ccc_page::cpg_pending_linkage - */ - struct list_head cob_pending_list; - - /** - * Access this counter is protected by inode->i_sem. Now that - * the lifetime of transient pages must be covered by inode sem, - * we don't need to hold any lock.. - */ - int cob_transient_pages; - /** - * Number of outstanding mmaps on this file. - * - * \see ll_vm_open(), ll_vm_close(). - */ - atomic_t cob_mmap_cnt; - - /** - * various flags - * cob_discard_page_warned - * if pages belonging to this object are discarded when a client - * is evicted, some debug info will be printed, this flag will be set - * during processing the first discarded page, then avoid flooding - * debug message for lots of discarded pages. - * - * \see ll_dirty_page_discard_warn. - */ - unsigned int cob_discard_page_warned:1; -}; - -/** - * ccc-private page state. - */ -struct ccc_page { - struct cl_page_slice cpg_cl; - int cpg_defer_uptodate; - int cpg_ra_used; - int cpg_write_queued; - /** - * Non-empty iff this page is already counted in - * ccc_object::cob_pending_list. Protected by - * ccc_object::cob_pending_guard. This list is only used as a flag, - * that is, never iterated through, only checked for list_empty(), but - * having a list is useful for debugging. - */ - struct list_head cpg_pending_linkage; - /** VM page */ - struct page *cpg_page; -}; - -static inline struct ccc_page *cl2ccc_page(const struct cl_page_slice *slice) -{ - return container_of(slice, struct ccc_page, cpg_cl); -} - -struct ccc_device { - struct cl_device cdv_cl; - struct super_block *cdv_sb; - struct cl_device *cdv_next; -}; - -struct ccc_lock { - struct cl_lock_slice clk_cl; -}; - -struct ccc_req { - struct cl_req_slice crq_cl; -}; - -void *ccc_key_init (const struct lu_context *ctx, - struct lu_context_key *key); -void ccc_key_fini (const struct lu_context *ctx, - struct lu_context_key *key, void *data); -void *ccc_session_key_init(const struct lu_context *ctx, - struct lu_context_key *key); -void ccc_session_key_fini(const struct lu_context *ctx, - struct lu_context_key *key, void *data); - -int ccc_device_init (const struct lu_env *env, - struct lu_device *d, - const char *name, struct lu_device *next); -struct lu_device *ccc_device_fini (const struct lu_env *env, - struct lu_device *d); -struct lu_device *ccc_device_alloc(const struct lu_env *env, - struct lu_device_type *t, - struct lustre_cfg *cfg, - const struct lu_device_operations *luops, - const struct cl_device_operations *clops); -struct lu_device *ccc_device_free (const struct lu_env *env, - struct lu_device *d); -struct lu_object *ccc_object_alloc(const struct lu_env *env, - const struct lu_object_header *hdr, - struct lu_device *dev, - const struct cl_object_operations *clops, - const struct lu_object_operations *luops); - -int ccc_req_init(const struct lu_env *env, struct cl_device *dev, - struct cl_req *req); -void ccc_umount(const struct lu_env *env, struct cl_device *dev); -int ccc_global_init(struct lu_device_type *device_type); -void ccc_global_fini(struct lu_device_type *device_type); -int ccc_object_init0(const struct lu_env *env, struct ccc_object *vob, - const struct cl_object_conf *conf); -int ccc_object_init(const struct lu_env *env, struct lu_object *obj, - const struct lu_object_conf *conf); -void ccc_object_free(const struct lu_env *env, struct lu_object *obj); -int ccc_lock_init(const struct lu_env *env, struct cl_object *obj, - struct cl_lock *lock, const struct cl_io *io, - const struct cl_lock_operations *lkops); -int ccc_object_glimpse(const struct lu_env *env, - const struct cl_object *obj, struct ost_lvb *lvb); -struct page *ccc_page_vmpage(const struct lu_env *env, - const struct cl_page_slice *slice); -int ccc_page_is_under_lock(const struct lu_env *env, - const struct cl_page_slice *slice, struct cl_io *io); -int ccc_fail(const struct lu_env *env, const struct cl_page_slice *slice); -int ccc_transient_page_prep(const struct lu_env *env, - const struct cl_page_slice *slice, - struct cl_io *io); -void ccc_lock_delete(const struct lu_env *env, - const struct cl_lock_slice *slice); -void ccc_lock_fini(const struct lu_env *env, struct cl_lock_slice *slice); -int ccc_lock_enqueue(const struct lu_env *env, - const struct cl_lock_slice *slice, - struct cl_io *io, __u32 enqflags); -int ccc_lock_use(const struct lu_env *env, const struct cl_lock_slice *slice); -int ccc_lock_unuse(const struct lu_env *env, const struct cl_lock_slice *slice); -int ccc_lock_wait(const struct lu_env *env, const struct cl_lock_slice *slice); -int ccc_lock_fits_into(const struct lu_env *env, - const struct cl_lock_slice *slice, - const struct cl_lock_descr *need, - const struct cl_io *io); -void ccc_lock_state(const struct lu_env *env, - const struct cl_lock_slice *slice, - enum cl_lock_state state); - -int ccc_io_one_lock_index(const struct lu_env *env, struct cl_io *io, - __u32 enqflags, enum cl_lock_mode mode, - pgoff_t start, pgoff_t end); -int ccc_io_one_lock(const struct lu_env *env, struct cl_io *io, - __u32 enqflags, enum cl_lock_mode mode, - loff_t start, loff_t end); -void ccc_io_end(const struct lu_env *env, const struct cl_io_slice *ios); -void ccc_io_advance(const struct lu_env *env, const struct cl_io_slice *ios, - size_t nob); -void ccc_io_update_iov(const struct lu_env *env, struct ccc_io *cio, - struct cl_io *io); -int ccc_prep_size(const struct lu_env *env, struct cl_object *obj, - struct cl_io *io, loff_t start, size_t count, int *exceed); -void ccc_req_completion(const struct lu_env *env, - const struct cl_req_slice *slice, int ioret); -void ccc_req_attr_set(const struct lu_env *env, - const struct cl_req_slice *slice, - const struct cl_object *obj, - struct cl_req_attr *oa, u64 flags); - -struct lu_device *ccc2lu_dev (struct ccc_device *vdv); -struct lu_object *ccc2lu (struct ccc_object *vob); -struct ccc_device *lu2ccc_dev (const struct lu_device *d); -struct ccc_device *cl2ccc_dev (const struct cl_device *d); -struct ccc_object *lu2ccc (const struct lu_object *obj); -struct ccc_object *cl2ccc (const struct cl_object *obj); -struct ccc_lock *cl2ccc_lock (const struct cl_lock_slice *slice); -struct ccc_io *cl2ccc_io (const struct lu_env *env, - const struct cl_io_slice *slice); -struct ccc_req *cl2ccc_req (const struct cl_req_slice *slice); -struct page *cl2vm_page (const struct cl_page_slice *slice); -struct inode *ccc_object_inode(const struct cl_object *obj); -struct ccc_object *cl_inode2ccc (struct inode *inode); - -int cl_setattr_ost(struct inode *inode, const struct iattr *attr); - -int ccc_object_invariant(const struct cl_object *obj); -int cl_file_inode_init(struct inode *inode, struct lustre_md *md); -void cl_inode_fini(struct inode *inode); -int cl_local_size(struct inode *inode); - -__u16 ll_dirent_type_get(struct lu_dirent *ent); -__u64 cl_fid_build_ino(const struct lu_fid *fid, int api32); -__u32 cl_fid_build_gen(const struct lu_fid *fid); - -# define CLOBINVRNT(env, clob, expr) \ - ((void)sizeof(env), (void)sizeof(clob), (void)sizeof(!!(expr))) - -int cl_init_ea_size(struct obd_export *md_exp, struct obd_export *dt_exp); -int cl_ocd_update(struct obd_device *host, - struct obd_device *watched, - enum obd_notify_event ev, void *owner, void *data); - -struct ccc_grouplock { - struct lu_env *cg_env; - struct cl_io *cg_io; - struct cl_lock *cg_lock; - unsigned long cg_gid; -}; - -int cl_get_grouplock(struct cl_object *obj, unsigned long gid, int nonblock, - struct ccc_grouplock *cg); -void cl_put_grouplock(struct ccc_grouplock *cg); - -/** - * New interfaces to get and put lov_stripe_md from lov layer. This violates - * layering because lov_stripe_md is supposed to be a private data in lov. - * - * NB: If you find you have to use these interfaces for your new code, please - * think about it again. These interfaces may be removed in the future for - * better layering. - */ -struct lov_stripe_md *lov_lsm_get(struct cl_object *clobj); -void lov_lsm_put(struct cl_object *clobj, struct lov_stripe_md *lsm); -int lov_read_and_clear_async_rc(struct cl_object *clob); - -struct lov_stripe_md *ccc_inode_lsm_get(struct inode *inode); -void ccc_inode_lsm_put(struct inode *inode, struct lov_stripe_md *lsm); - -/** - * Data structure managing a client's cached clean pages. An LRU of - * pages is maintained, along with other statistics. - */ -struct cl_client_cache { - atomic_t ccc_users; /* # of users (OSCs) of this data */ - struct list_head ccc_lru; /* LRU list of cached clean pages */ - spinlock_t ccc_lru_lock; /* lock for list */ - atomic_t ccc_lru_left; /* # of LRU entries available */ - unsigned long ccc_lru_max; /* Max # of LRU entries possible */ - unsigned int ccc_lru_shrinkers; /* # of threads reclaiming */ -}; - -#endif /*LCLIENT_H */ diff --git a/drivers/staging/lustre/lustre/include/linux/lustre_compat25.h b/drivers/staging/lustre/lustre/include/linux/lustre_compat25.h index 79d8f93075d1..1eb64ec4bed4 100644 --- a/drivers/staging/lustre/lustre/include/linux/lustre_compat25.h +++ b/drivers/staging/lustre/lustre/include/linux/lustre_compat25.h @@ -15,11 +15,7 @@ * * You should have received a copy of the GNU General Public License * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. + * http://www.gnu.org/licenses/gpl-2.0.html * * GPL HEADER END */ diff --git a/drivers/staging/lustre/lustre/include/linux/lustre_lite.h b/drivers/staging/lustre/lustre/include/linux/lustre_lite.h index 3420cfd1278d..d18e8a76bb25 100644 --- a/drivers/staging/lustre/lustre/include/linux/lustre_lite.h +++ b/drivers/staging/lustre/lustre/include/linux/lustre_lite.h @@ -15,11 +15,7 @@ * * You should have received a copy of the GNU General Public License * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. + * http://www.gnu.org/licenses/gpl-2.0.html * * GPL HEADER END */ diff --git a/drivers/staging/lustre/lustre/include/linux/lustre_patchless_compat.h b/drivers/staging/lustre/lustre/include/linux/lustre_patchless_compat.h index c6c7f54637fb..5842cb18b49e 100644 --- a/drivers/staging/lustre/lustre/include/linux/lustre_patchless_compat.h +++ b/drivers/staging/lustre/lustre/include/linux/lustre_patchless_compat.h @@ -15,11 +15,7 @@ * * You should have received a copy of the GNU General Public License * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. + * http://www.gnu.org/licenses/gpl-2.0.html * * GPL HEADER END */ diff --git a/drivers/staging/lustre/lustre/include/linux/lustre_user.h b/drivers/staging/lustre/lustre/include/linux/lustre_user.h index 9cc2849f3f85..e967950e8536 100644 --- a/drivers/staging/lustre/lustre/include/linux/lustre_user.h +++ b/drivers/staging/lustre/lustre/include/linux/lustre_user.h @@ -15,11 +15,7 @@ * * You should have received a copy of the GNU General Public License * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. + * http://www.gnu.org/licenses/gpl-2.0.html * * GPL HEADER END */ diff --git a/drivers/staging/lustre/lustre/include/linux/obd.h b/drivers/staging/lustre/lustre/include/linux/obd.h deleted file mode 100644 index 3907bf4ce07c..000000000000 --- a/drivers/staging/lustre/lustre/include/linux/obd.h +++ /dev/null @@ -1,125 +0,0 @@ -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. - * - * GPL HEADER END - */ -/* - * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2011, 2012, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - */ - -#ifndef __LINUX_OBD_H -#define __LINUX_OBD_H - -#ifndef __OBD_H -#error Do not #include this file directly. #include <obd.h> instead -#endif - -#include "../obd_support.h" - -#include <linux/fs.h> -#include <linux/list.h> -#include <linux/sched.h> /* for struct task_struct, for current.h */ -#include <linux/mount.h> - -#include "../lustre_intent.h" - -struct ll_iattr { - struct iattr iattr; - unsigned int ia_attr_flags; -}; - -#define CLIENT_OBD_LIST_LOCK_DEBUG 1 - -struct client_obd_lock { - spinlock_t lock; - - unsigned long time; - struct task_struct *task; - const char *func; - int line; -}; - -static inline void __client_obd_list_lock(struct client_obd_lock *lock, - const char *func, int line) -{ - unsigned long cur = jiffies; - - while (1) { - if (spin_trylock(&lock->lock)) { - LASSERT(!lock->task); - lock->task = current; - lock->func = func; - lock->line = line; - lock->time = jiffies; - break; - } - - if (time_before(cur + 5 * HZ, jiffies) && - time_before(lock->time + 5 * HZ, jiffies)) { - struct task_struct *task = lock->task; - - if (!task) - continue; - - LCONSOLE_WARN("%s:%d: lock %p was acquired by <%s:%d:%s:%d> for %lu seconds.\n", - current->comm, current->pid, - lock, task->comm, task->pid, - lock->func, lock->line, - (jiffies - lock->time) / HZ); - LCONSOLE_WARN("====== for current process =====\n"); - dump_stack(); - LCONSOLE_WARN("====== end =======\n"); - set_current_state(TASK_UNINTERRUPTIBLE); - schedule_timeout(1000 * HZ); - } - cpu_relax(); - } -} - -#define client_obd_list_lock(lock) \ - __client_obd_list_lock(lock, __func__, __LINE__) - -static inline void client_obd_list_unlock(struct client_obd_lock *lock) -{ - LASSERT(lock->task); - lock->task = NULL; - lock->time = jiffies; - spin_unlock(&lock->lock); -} - -static inline void client_obd_list_lock_init(struct client_obd_lock *lock) -{ - spin_lock_init(&lock->lock); -} - -static inline void client_obd_list_lock_done(struct client_obd_lock *lock) -{} - -#endif /* __LINUX_OBD_H */ diff --git a/drivers/staging/lustre/lustre/include/lprocfs_status.h b/drivers/staging/lustre/lustre/include/lprocfs_status.h index 4146c9c3999f..d68e60e7fef7 100644 --- a/drivers/staging/lustre/lustre/include/lprocfs_status.h +++ b/drivers/staging/lustre/lustre/include/lprocfs_status.h @@ -15,11 +15,7 @@ * * You should have received a copy of the GNU General Public License * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. + * http://www.gnu.org/licenses/gpl-2.0.html * * GPL HEADER END */ diff --git a/drivers/staging/lustre/lustre/include/lu_object.h b/drivers/staging/lustre/lustre/include/lu_object.h index 242bb1ef6245..6e25c1bb6aa3 100644 --- a/drivers/staging/lustre/lustre/include/lu_object.h +++ b/drivers/staging/lustre/lustre/include/lu_object.h @@ -15,11 +15,7 @@ * * You should have received a copy of the GNU General Public License * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. + * http://www.gnu.org/licenses/gpl-2.0.html * * GPL HEADER END */ @@ -198,7 +194,6 @@ typedef int (*lu_printer_t)(const struct lu_env *env, * Operations specific for particular lu_object. */ struct lu_object_operations { - /** * Allocate lower-layer parts of the object by calling * lu_device_operations::ldo_object_alloc() of the corresponding @@ -656,21 +651,21 @@ static inline struct seq_server_site *lu_site2seq(const struct lu_site *s) * @{ */ -int lu_site_init (struct lu_site *s, struct lu_device *d); -void lu_site_fini (struct lu_site *s); -int lu_site_init_finish (struct lu_site *s); -void lu_stack_fini (const struct lu_env *env, struct lu_device *top); -void lu_device_get (struct lu_device *d); -void lu_device_put (struct lu_device *d); -int lu_device_init (struct lu_device *d, struct lu_device_type *t); -void lu_device_fini (struct lu_device *d); -int lu_object_header_init(struct lu_object_header *h); +int lu_site_init(struct lu_site *s, struct lu_device *d); +void lu_site_fini(struct lu_site *s); +int lu_site_init_finish(struct lu_site *s); +void lu_stack_fini(const struct lu_env *env, struct lu_device *top); +void lu_device_get(struct lu_device *d); +void lu_device_put(struct lu_device *d); +int lu_device_init(struct lu_device *d, struct lu_device_type *t); +void lu_device_fini(struct lu_device *d); +int lu_object_header_init(struct lu_object_header *h); void lu_object_header_fini(struct lu_object_header *h); -int lu_object_init (struct lu_object *o, - struct lu_object_header *h, struct lu_device *d); -void lu_object_fini (struct lu_object *o); -void lu_object_add_top (struct lu_object_header *h, struct lu_object *o); -void lu_object_add (struct lu_object *before, struct lu_object *o); +int lu_object_init(struct lu_object *o, + struct lu_object_header *h, struct lu_device *d); +void lu_object_fini(struct lu_object *o); +void lu_object_add_top(struct lu_object_header *h, struct lu_object *o); +void lu_object_add(struct lu_object *before, struct lu_object *o); /** * Helpers to initialize and finalize device types. @@ -781,11 +776,10 @@ int lu_cdebug_printer(const struct lu_env *env, */ #define LU_OBJECT_DEBUG(mask, env, object, format, ...) \ do { \ - LIBCFS_DEBUG_MSG_DATA_DECL(msgdata, mask, NULL); \ - \ if (cfs_cdebug_show(mask, DEBUG_SUBSYSTEM)) { \ + LIBCFS_DEBUG_MSG_DATA_DECL(msgdata, mask, NULL); \ lu_object_print(env, &msgdata, lu_cdebug_printer, object);\ - CDEBUG(mask, format, ## __VA_ARGS__); \ + CDEBUG(mask, format "\n", ## __VA_ARGS__); \ } \ } while (0) @@ -794,9 +788,8 @@ do { \ */ #define LU_OBJECT_HEADER(mask, env, object, format, ...) \ do { \ - LIBCFS_DEBUG_MSG_DATA_DECL(msgdata, mask, NULL); \ - \ if (cfs_cdebug_show(mask, DEBUG_SUBSYSTEM)) { \ + LIBCFS_DEBUG_MSG_DATA_DECL(msgdata, mask, NULL); \ lu_object_header_print(env, &msgdata, lu_cdebug_printer,\ (object)->lo_header); \ lu_cdebug_printer(env, &msgdata, "\n"); \ @@ -1007,6 +1000,10 @@ enum lu_context_tag { */ LCT_LOCAL = 1 << 7, /** + * session for server thread + **/ + LCT_SERVER_SESSION = BIT(8), + /** * Set when at least one of keys, having values in this context has * non-NULL lu_context_key::lct_exit() method. This is used to * optimize lu_context_exit() call. @@ -1118,7 +1115,7 @@ struct lu_context_key { { \ type *value; \ \ - CLASSERT(PAGE_SIZE >= sizeof (*value)); \ + CLASSERT(PAGE_SIZE >= sizeof(*value)); \ \ value = kzalloc(sizeof(*value), GFP_NOFS); \ if (!value) \ @@ -1154,12 +1151,12 @@ do { \ (key)->lct_owner = THIS_MODULE; \ } while (0) -int lu_context_key_register(struct lu_context_key *key); -void lu_context_key_degister(struct lu_context_key *key); -void *lu_context_key_get (const struct lu_context *ctx, - const struct lu_context_key *key); -void lu_context_key_quiesce (struct lu_context_key *key); -void lu_context_key_revive (struct lu_context_key *key); +int lu_context_key_register(struct lu_context_key *key); +void lu_context_key_degister(struct lu_context_key *key); +void *lu_context_key_get(const struct lu_context *ctx, + const struct lu_context_key *key); +void lu_context_key_quiesce(struct lu_context_key *key); +void lu_context_key_revive(struct lu_context_key *key); /* * LU_KEY_INIT_GENERIC() has to be a macro to correctly determine an @@ -1216,21 +1213,21 @@ void lu_context_key_revive (struct lu_context_key *key); LU_TYPE_START(mod, __VA_ARGS__); \ LU_TYPE_STOP(mod, __VA_ARGS__) -int lu_context_init (struct lu_context *ctx, __u32 tags); -void lu_context_fini (struct lu_context *ctx); -void lu_context_enter (struct lu_context *ctx); -void lu_context_exit (struct lu_context *ctx); -int lu_context_refill(struct lu_context *ctx); +int lu_context_init(struct lu_context *ctx, __u32 tags); +void lu_context_fini(struct lu_context *ctx); +void lu_context_enter(struct lu_context *ctx); +void lu_context_exit(struct lu_context *ctx); +int lu_context_refill(struct lu_context *ctx); /* * Helper functions to operate on multiple keys. These are used by the default * device type operations, defined by LU_TYPE_INIT_FINI(). */ -int lu_context_key_register_many(struct lu_context_key *k, ...); +int lu_context_key_register_many(struct lu_context_key *k, ...); void lu_context_key_degister_many(struct lu_context_key *k, ...); -void lu_context_key_revive_many (struct lu_context_key *k, ...); -void lu_context_key_quiesce_many (struct lu_context_key *k, ...); +void lu_context_key_revive_many(struct lu_context_key *k, ...); +void lu_context_key_quiesce_many(struct lu_context_key *k, ...); /** * Environment. @@ -1246,9 +1243,9 @@ struct lu_env { struct lu_context *le_ses; }; -int lu_env_init (struct lu_env *env, __u32 tags); -void lu_env_fini (struct lu_env *env); -int lu_env_refill(struct lu_env *env); +int lu_env_init(struct lu_env *env, __u32 tags); +void lu_env_fini(struct lu_env *env); +int lu_env_refill(struct lu_env *env); /** @} lu_context */ diff --git a/drivers/staging/lustre/lustre/include/lustre/ll_fiemap.h b/drivers/staging/lustre/lustre/include/lustre/ll_fiemap.h index 07d45de69dd9..c2340d643e84 100644 --- a/drivers/staging/lustre/lustre/include/lustre/ll_fiemap.h +++ b/drivers/staging/lustre/lustre/include/lustre/ll_fiemap.h @@ -15,11 +15,7 @@ * * You should have received a copy of the GNU General Public License * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. + * http://www.gnu.org/licenses/gpl-2.0.html * * GPL HEADER END */ diff --git a/drivers/staging/lustre/lustre/include/lustre/lustre_idl.h b/drivers/staging/lustre/lustre/include/lustre/lustre_idl.h index 5aae1d06a5fa..051864c23b5b 100644 --- a/drivers/staging/lustre/lustre/include/lustre/lustre_idl.h +++ b/drivers/staging/lustre/lustre/include/lustre/lustre_idl.h @@ -15,11 +15,7 @@ * * You should have received a copy of the GNU General Public License * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. + * http://www.gnu.org/licenses/gpl-2.0.html * * GPL HEADER END */ @@ -183,6 +179,12 @@ struct lu_seq_range { __u32 lsr_flags; }; +struct lu_seq_range_array { + __u32 lsra_count; + __u32 lsra_padding; + struct lu_seq_range lsra_lsr[0]; +}; + #define LU_SEQ_RANGE_MDT 0x0 #define LU_SEQ_RANGE_OST 0x1 #define LU_SEQ_RANGE_ANY 0x3 @@ -380,7 +382,7 @@ static inline __u64 fid_ver_oid(const struct lu_fid *fid) * used for other purposes and not risk collisions with existing inodes. * * Different FID Format - * http://arch.lustre.org/index.php?title=Interoperability_fids_zfs#NEW.0 + * http://wiki.old.lustre.org/index.php/Architecture_-_Interoperability_fids_zfs */ enum fid_seq { FID_SEQ_OST_MDT0 = 0, @@ -578,7 +580,7 @@ static inline __u64 ostid_seq(const struct ost_id *ostid) if (fid_seq_is_mdt0(ostid->oi.oi_seq)) return FID_SEQ_OST_MDT0; - if (fid_seq_is_default(ostid->oi.oi_seq)) + if (unlikely(fid_seq_is_default(ostid->oi.oi_seq))) return FID_SEQ_LOV_DEFAULT; if (fid_is_idif(&ostid->oi_fid)) @@ -590,9 +592,12 @@ static inline __u64 ostid_seq(const struct ost_id *ostid) /* extract OST objid from a wire ost_id (id/seq) pair */ static inline __u64 ostid_id(const struct ost_id *ostid) { - if (fid_seq_is_mdt0(ostid_seq(ostid))) + if (fid_seq_is_mdt0(ostid->oi.oi_seq)) return ostid->oi.oi_id & IDIF_OID_MASK; + if (unlikely(fid_seq_is_default(ostid->oi.oi_seq))) + return ostid->oi.oi_id; + if (fid_is_idif(&ostid->oi_fid)) return fid_idif_id(fid_seq(&ostid->oi_fid), fid_oid(&ostid->oi_fid), 0); @@ -636,12 +641,22 @@ static inline void ostid_set_seq_llog(struct ost_id *oi) */ static inline void ostid_set_id(struct ost_id *oi, __u64 oid) { - if (fid_seq_is_mdt0(ostid_seq(oi))) { + if (fid_seq_is_mdt0(oi->oi.oi_seq)) { if (oid >= IDIF_MAX_OID) { CERROR("Bad %llu to set " DOSTID "\n", oid, POSTID(oi)); return; } oi->oi.oi_id = oid; + } else if (fid_is_idif(&oi->oi_fid)) { + if (oid >= IDIF_MAX_OID) { + CERROR("Bad %llu to set "DOSTID"\n", + oid, POSTID(oi)); + return; + } + oi->oi_fid.f_seq = fid_idif_seq(oid, + fid_idif_ost_idx(&oi->oi_fid)); + oi->oi_fid.f_oid = oid; + oi->oi_fid.f_ver = oid >> 48; } else { if (oid > OBIF_MAX_OID) { CERROR("Bad %llu to set " DOSTID "\n", oid, POSTID(oi)); @@ -651,25 +666,31 @@ static inline void ostid_set_id(struct ost_id *oi, __u64 oid) } } -static inline void ostid_inc_id(struct ost_id *oi) +static inline int fid_set_id(struct lu_fid *fid, __u64 oid) { - if (fid_seq_is_mdt0(ostid_seq(oi))) { - if (unlikely(ostid_id(oi) + 1 > IDIF_MAX_OID)) { - CERROR("Bad inc "DOSTID"\n", POSTID(oi)); - return; + if (unlikely(fid_seq_is_igif(fid->f_seq))) { + CERROR("bad IGIF, "DFID"\n", PFID(fid)); + return -EBADF; + } + + if (fid_is_idif(fid)) { + if (oid >= IDIF_MAX_OID) { + CERROR("Too large OID %#llx to set IDIF "DFID"\n", + (unsigned long long)oid, PFID(fid)); + return -EBADF; } - oi->oi.oi_id++; + fid->f_seq = fid_idif_seq(oid, fid_idif_ost_idx(fid)); + fid->f_oid = oid; + fid->f_ver = oid >> 48; } else { - oi->oi_fid.f_oid++; + if (oid > OBIF_MAX_OID) { + CERROR("Too large OID %#llx to set REG "DFID"\n", + (unsigned long long)oid, PFID(fid)); + return -EBADF; + } + fid->f_oid = oid; } -} - -static inline void ostid_dec_id(struct ost_id *oi) -{ - if (fid_seq_is_mdt0(ostid_seq(oi))) - oi->oi.oi_id--; - else - oi->oi_fid.f_oid--; + return 0; } /** @@ -679,35 +700,39 @@ static inline void ostid_dec_id(struct ost_id *oi) * be passed through unchanged. Only legacy OST objects in "group 0" * will be mapped into the IDIF namespace so that they can fit into the * struct lu_fid fields without loss. For reference see: - * http://arch.lustre.org/index.php?title=Interoperability_fids_zfs + * http://wiki.old.lustre.org/index.php/Architecture_-_Interoperability_fids_zfs */ static inline int ostid_to_fid(struct lu_fid *fid, struct ost_id *ostid, __u32 ost_idx) { + __u64 seq = ostid_seq(ostid); + if (ost_idx > 0xffff) { CERROR("bad ost_idx, "DOSTID" ost_idx:%u\n", POSTID(ostid), ost_idx); return -EBADF; } - if (fid_seq_is_mdt0(ostid_seq(ostid))) { + if (fid_seq_is_mdt0(seq)) { + __u64 oid = ostid_id(ostid); + /* This is a "legacy" (old 1.x/2.early) OST object in "group 0" * that we map into the IDIF namespace. It allows up to 2^48 * objects per OST, as this is the object namespace that has * been in production for years. This can handle create rates * of 1M objects/s/OST for 9 years, or combinations thereof. */ - if (ostid_id(ostid) >= IDIF_MAX_OID) { + if (oid >= IDIF_MAX_OID) { CERROR("bad MDT0 id, " DOSTID " ost_idx:%u\n", POSTID(ostid), ost_idx); return -EBADF; } - fid->f_seq = fid_idif_seq(ostid_id(ostid), ost_idx); + fid->f_seq = fid_idif_seq(oid, ost_idx); /* truncate to 32 bits by assignment */ - fid->f_oid = ostid_id(ostid); + fid->f_oid = oid; /* in theory, not currently used */ - fid->f_ver = ostid_id(ostid) >> 48; - } else /* if (fid_seq_is_idif(seq) || fid_seq_is_norm(seq)) */ { + fid->f_ver = oid >> 48; + } else if (likely(!fid_seq_is_default(seq))) { /* This is either an IDIF object, which identifies objects across * all OSTs, or a regular FID. The IDIF namespace maps legacy * OST objects into the FID namespace. In both cases, we just @@ -1001,8 +1026,9 @@ static inline int lu_dirent_calc_size(int namelen, __u16 attr) size = (sizeof(struct lu_dirent) + namelen + align) & ~align; size += sizeof(struct luda_type); - } else + } else { size = sizeof(struct lu_dirent) + namelen; + } return (size + 7) & ~7; } @@ -1211,8 +1237,16 @@ void lustre_swab_ptlrpc_body(struct ptlrpc_body *pb); */ #define OBD_CONNECT_ATTRFID 0x4000ULL /*Server can GetAttr By Fid*/ #define OBD_CONNECT_NODEVOH 0x8000ULL /*No open hndl on specl nodes*/ -#define OBD_CONNECT_RMT_CLIENT 0x10000ULL /*Remote client */ -#define OBD_CONNECT_RMT_CLIENT_FORCE 0x20000ULL /*Remote client by force */ +#define OBD_CONNECT_RMT_CLIENT 0x10000ULL /* Remote client, never used + * in production. Removed in + * 2.9. Keep this flag to + * avoid reuse. + */ +#define OBD_CONNECT_RMT_CLIENT_FORCE 0x20000ULL /* Remote client by force, + * never used in production. + * Removed in 2.9. Keep this + * flag to avoid reuse + */ #define OBD_CONNECT_BRW_SIZE 0x40000ULL /*Max bytes per rpc */ #define OBD_CONNECT_QUOTA64 0x80000ULL /*Not used since 2.4 */ #define OBD_CONNECT_MDS_CAPA 0x100000ULL /*MDS capability */ @@ -1256,6 +1290,9 @@ void lustre_swab_ptlrpc_body(struct ptlrpc_body *pb); #define OBD_CONNECT_PINGLESS 0x4000000000000ULL/* pings not required */ #define OBD_CONNECT_FLOCK_DEAD 0x8000000000000ULL/* flock deadlock detection */ #define OBD_CONNECT_DISP_STRIPE 0x10000000000000ULL/*create stripe disposition*/ +#define OBD_CONNECT_OPEN_BY_FID 0x20000000000000ULL /* open by fid won't pack + * name in request + */ /* XXX README XXX: * Please DO NOT add flag values here before first ensuring that this same @@ -1428,6 +1465,8 @@ enum obdo_flags { */ OBD_FL_RECOV_RESEND = 0x00080000, /* recoverable resent */ OBD_FL_NOSPC_BLK = 0x00100000, /* no more block space on OST */ + OBD_FL_FLUSH = 0x00200000, /* flush pages on the OST */ + OBD_FL_SHORT_IO = 0x00400000, /* short io request */ /* Note that while these checksum values are currently separate bits, * in 2.x we can actually allow all values from 1-31 if we wanted. @@ -1525,6 +1564,11 @@ static inline void lmm_oi_set_seq(struct ost_id *oi, __u64 seq) oi->oi.oi_seq = seq; } +static inline void lmm_oi_set_id(struct ost_id *oi, __u64 oid) +{ + oi->oi.oi_id = oid; +} + static inline __u64 lmm_oi_id(struct ost_id *oi) { return oi->oi.oi_id; @@ -1663,7 +1707,7 @@ lov_mds_md_max_stripe_count(size_t buf_size, __u32 lmm_magic) #define OBD_MD_FLXATTRLS (0x0000002000000000ULL) /* xattr list */ #define OBD_MD_FLXATTRRM (0x0000004000000000ULL) /* xattr remove */ #define OBD_MD_FLACL (0x0000008000000000ULL) /* ACL */ -#define OBD_MD_FLRMTPERM (0x0000010000000000ULL) /* remote permission */ +/* OBD_MD_FLRMTPERM (0x0000010000000000ULL) remote perm, obsolete */ #define OBD_MD_FLMDSCAPA (0x0000020000000000ULL) /* MDS capability */ #define OBD_MD_FLOSSCAPA (0x0000040000000000ULL) /* OSS capability */ #define OBD_MD_FLCKSPLIT (0x0000080000000000ULL) /* Check split on server */ @@ -1675,10 +1719,10 @@ lov_mds_md_max_stripe_count(size_t buf_size, __u32 lmm_magic) */ #define OBD_MD_FLOBJCOUNT (0x0000400000000000ULL) /* for multiple destroy */ -#define OBD_MD_FLRMTLSETFACL (0x0001000000000000ULL) /* lfs lsetfacl case */ -#define OBD_MD_FLRMTLGETFACL (0x0002000000000000ULL) /* lfs lgetfacl case */ -#define OBD_MD_FLRMTRSETFACL (0x0004000000000000ULL) /* lfs rsetfacl case */ -#define OBD_MD_FLRMTRGETFACL (0x0008000000000000ULL) /* lfs rgetfacl case */ +/* OBD_MD_FLRMTLSETFACL (0x0001000000000000ULL) lfs lsetfacl, obsolete */ +/* OBD_MD_FLRMTLGETFACL (0x0002000000000000ULL) lfs lgetfacl, obsolete */ +/* OBD_MD_FLRMTRSETFACL (0x0004000000000000ULL) lfs rsetfacl, obsolete */ +/* OBD_MD_FLRMTRGETFACL (0x0008000000000000ULL) lfs rgetfacl, obsolete */ #define OBD_MD_FLDATAVERSION (0x0010000000000000ULL) /* iversion sum */ #define OBD_MD_FLRELEASED (0x0020000000000000ULL) /* file released */ @@ -1732,6 +1776,11 @@ void lustre_swab_obd_statfs(struct obd_statfs *os); #define OBD_BRW_MEMALLOC 0x800 /* Client runs in the "kswapd" context */ #define OBD_BRW_OVER_USRQUOTA 0x1000 /* Running out of user quota */ #define OBD_BRW_OVER_GRPQUOTA 0x2000 /* Running out of group quota */ +#define OBD_BRW_SOFT_SYNC 0x4000 /* This flag notifies the server + * that the client is running low on + * space for unstable pages; asking + * it to sync quickly + */ #define OBD_OBJECT_EOF 0xffffffffffffffffULL @@ -2114,26 +2163,8 @@ enum { CFS_SETUID_PERM = 0x01, CFS_SETGID_PERM = 0x02, CFS_SETGRP_PERM = 0x04, - CFS_RMTACL_PERM = 0x08, - CFS_RMTOWN_PERM = 0x10 -}; - -/* inode access permission for remote user, the inode info are omitted, - * for client knows them. - */ -struct mdt_remote_perm { - __u32 rp_uid; - __u32 rp_gid; - __u32 rp_fsuid; - __u32 rp_fsuid_h; - __u32 rp_fsgid; - __u32 rp_fsgid_h; - __u32 rp_access_perm; /* MAY_READ/WRITE/EXEC */ - __u32 rp_padding; }; -void lustre_swab_mdt_remote_perm(struct mdt_remote_perm *p); - struct mdt_rec_setattr { __u32 sa_opcode; __u32 sa_cap; @@ -2436,6 +2467,7 @@ struct mdt_rec_reint { void lustre_swab_mdt_rec_reint(struct mdt_rec_reint *rr); +/* lmv structures */ struct lmv_desc { __u32 ld_tgt_count; /* how many MDS's */ __u32 ld_active_tgt_count; /* how many active */ @@ -2460,7 +2492,6 @@ struct lmv_stripe_md { struct lu_fid mea_ids[0]; }; -/* lmv structures */ #define MEA_MAGIC_LAST_CHAR 0xb2221ca1 #define MEA_MAGIC_ALL_CHARS 0xb222a11c #define MEA_MAGIC_HASH_SEGMENT 0xb222a11b @@ -2470,9 +2501,10 @@ struct lmv_stripe_md { #define MAX_HASH_HIGHEST_BIT 0x1000000000000000ULL enum fld_rpc_opc { - FLD_QUERY = 900, + FLD_QUERY = 900, + FLD_READ = 901, FLD_LAST_OPC, - FLD_FIRST_OPC = FLD_QUERY + FLD_FIRST_OPC = FLD_QUERY }; enum seq_rpc_opc { @@ -2486,6 +2518,12 @@ enum seq_op { SEQ_ALLOC_META = 1 }; +enum fld_op { + FLD_CREATE = 0, + FLD_DELETE = 1, + FLD_LOOKUP = 2, +}; + /* * LOV data structures */ @@ -2582,6 +2620,8 @@ struct ldlm_extent { __u64 gid; }; +#define LDLM_GID_ANY ((__u64)-1) + static inline int ldlm_extent_overlap(struct ldlm_extent *ex1, struct ldlm_extent *ex2) { @@ -3304,7 +3344,7 @@ struct getinfo_fid2path { char gf_path[0]; } __packed; -void lustre_swab_fid2path (struct getinfo_fid2path *gf); +void lustre_swab_fid2path(struct getinfo_fid2path *gf); enum { LAYOUT_INTENT_ACCESS = 0, diff --git a/drivers/staging/lustre/lustre/include/lustre/lustre_user.h b/drivers/staging/lustre/lustre/include/lustre/lustre_user.h index 276906e646f5..ef6f38ff359e 100644 --- a/drivers/staging/lustre/lustre/include/lustre/lustre_user.h +++ b/drivers/staging/lustre/lustre/include/lustre/lustre_user.h @@ -15,11 +15,7 @@ * * You should have received a copy of the GNU General Public License * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. + * http://www.gnu.org/licenses/gpl-2.0.html * * GPL HEADER END */ @@ -193,37 +189,37 @@ struct ost_id { * *INFO - set/get lov_user_mds_data */ /* see <lustre_lib.h> for ioctl numberss 101-150 */ -#define LL_IOC_GETFLAGS _IOR ('f', 151, long) -#define LL_IOC_SETFLAGS _IOW ('f', 152, long) -#define LL_IOC_CLRFLAGS _IOW ('f', 153, long) +#define LL_IOC_GETFLAGS _IOR('f', 151, long) +#define LL_IOC_SETFLAGS _IOW('f', 152, long) +#define LL_IOC_CLRFLAGS _IOW('f', 153, long) /* LL_IOC_LOV_SETSTRIPE: See also OBD_IOC_LOV_SETSTRIPE */ -#define LL_IOC_LOV_SETSTRIPE _IOW ('f', 154, long) +#define LL_IOC_LOV_SETSTRIPE _IOW('f', 154, long) /* LL_IOC_LOV_GETSTRIPE: See also OBD_IOC_LOV_GETSTRIPE */ -#define LL_IOC_LOV_GETSTRIPE _IOW ('f', 155, long) +#define LL_IOC_LOV_GETSTRIPE _IOW('f', 155, long) /* LL_IOC_LOV_SETEA: See also OBD_IOC_LOV_SETEA */ -#define LL_IOC_LOV_SETEA _IOW ('f', 156, long) -#define LL_IOC_RECREATE_OBJ _IOW ('f', 157, long) -#define LL_IOC_RECREATE_FID _IOW ('f', 157, struct lu_fid) -#define LL_IOC_GROUP_LOCK _IOW ('f', 158, long) -#define LL_IOC_GROUP_UNLOCK _IOW ('f', 159, long) +#define LL_IOC_LOV_SETEA _IOW('f', 156, long) +#define LL_IOC_RECREATE_OBJ _IOW('f', 157, long) +#define LL_IOC_RECREATE_FID _IOW('f', 157, struct lu_fid) +#define LL_IOC_GROUP_LOCK _IOW('f', 158, long) +#define LL_IOC_GROUP_UNLOCK _IOW('f', 159, long) /* LL_IOC_QUOTACHECK: See also OBD_IOC_QUOTACHECK */ -#define LL_IOC_QUOTACHECK _IOW ('f', 160, int) +#define LL_IOC_QUOTACHECK _IOW('f', 160, int) /* LL_IOC_POLL_QUOTACHECK: See also OBD_IOC_POLL_QUOTACHECK */ -#define LL_IOC_POLL_QUOTACHECK _IOR ('f', 161, struct if_quotacheck *) +#define LL_IOC_POLL_QUOTACHECK _IOR('f', 161, struct if_quotacheck *) /* LL_IOC_QUOTACTL: See also OBD_IOC_QUOTACTL */ #define LL_IOC_QUOTACTL _IOWR('f', 162, struct if_quotactl) #define IOC_OBD_STATFS _IOWR('f', 164, struct obd_statfs *) #define IOC_LOV_GETINFO _IOWR('f', 165, struct lov_user_mds_data *) -#define LL_IOC_FLUSHCTX _IOW ('f', 166, long) -#define LL_IOC_RMTACL _IOW ('f', 167, long) -#define LL_IOC_GETOBDCOUNT _IOR ('f', 168, long) +#define LL_IOC_FLUSHCTX _IOW('f', 166, long) +/* LL_IOC_RMTACL 167 obsolete */ +#define LL_IOC_GETOBDCOUNT _IOR('f', 168, long) #define LL_IOC_LLOOP_ATTACH _IOWR('f', 169, long) #define LL_IOC_LLOOP_DETACH _IOWR('f', 170, long) #define LL_IOC_LLOOP_INFO _IOWR('f', 171, struct lu_fid) #define LL_IOC_LLOOP_DETACH_BYDEV _IOWR('f', 172, long) -#define LL_IOC_PATH2FID _IOR ('f', 173, long) +#define LL_IOC_PATH2FID _IOR('f', 173, long) #define LL_IOC_GET_CONNECT_FLAGS _IOWR('f', 174, __u64 *) -#define LL_IOC_GET_MDTIDX _IOR ('f', 175, int) +#define LL_IOC_GET_MDTIDX _IOR('f', 175, int) /* see <lustre_lib.h> for ioctl numbers 177-210 */ @@ -542,19 +538,6 @@ struct identity_downcall_data { __u32 idd_groups[0]; }; -/* for non-mapped uid/gid */ -#define NOBODY_UID 99 -#define NOBODY_GID 99 - -#define INVALID_ID (-1) - -enum { - RMT_LSETFACL = 1, - RMT_LGETFACL = 2, - RMT_RSETFACL = 3, - RMT_RGETFACL = 4 -}; - /* lustre volatile file support * file name header: .^L^S^T^R:volatile" */ @@ -676,7 +659,12 @@ static inline const char *changelog_type2str(int type) #define CLF_UNLINK_HSM_EXISTS 0x0002 /* File has something in HSM */ /* HSM cleaning needed */ /* Flags for rename */ -#define CLF_RENAME_LAST 0x0001 /* rename unlink last hardlink of target */ +#define CLF_RENAME_LAST 0x0001 /* rename unlink last hardlink of + * target + */ +#define CLF_RENAME_LAST_EXISTS 0x0002 /* rename unlink last hardlink of target + * has an archive in backend + */ /* Flags for HSM */ /* 12b used (from high weight to low weight): @@ -833,9 +821,8 @@ struct ioc_data_version { __u64 idv_flags; /* See LL_DV_xxx */ }; -#define LL_DV_NOFLUSH 0x01 /* Do not take READ EXTENT LOCK before sampling - * version. Dirty caches are left unchanged. - */ +#define LL_DV_RD_FLUSH BIT(0) /* Flush dirty pages from clients */ +#define LL_DV_WR_FLUSH BIT(1) /* Flush all caching pages from clients */ #ifndef offsetof # define offsetof(typ, memb) ((unsigned long)((char *)&(((typ *)0)->memb))) @@ -1095,12 +1082,12 @@ struct hsm_action_list { __u32 padding1; char hal_fsname[0]; /* null-terminated */ /* struct hsm_action_item[hal_count] follows, aligned on 8-byte - * boundaries. See hai_zero + * boundaries. See hai_first */ } __packed; #ifndef HAVE_CFS_SIZE_ROUND -static inline int cfs_size_round (int val) +static inline int cfs_size_round(int val) { return (val + 7) & (~0x7); } @@ -1109,7 +1096,7 @@ static inline int cfs_size_round (int val) #endif /* Return pointer to first hai in action list */ -static inline struct hsm_action_item *hai_zero(struct hsm_action_list *hal) +static inline struct hsm_action_item *hai_first(struct hsm_action_list *hal) { return (struct hsm_action_item *)(hal->hal_fsname + cfs_size_round(strlen(hal-> \ @@ -1131,7 +1118,7 @@ static inline int hal_size(struct hsm_action_list *hal) struct hsm_action_item *hai; sz = sizeof(*hal) + cfs_size_round(strlen(hal->hal_fsname) + 1); - hai = hai_zero(hal); + hai = hai_first(hal); for (i = 0; i < hal->hal_count; i++, hai = hai_next(hai)) sz += cfs_size_round(hai->hai_len); diff --git a/drivers/staging/lustre/lustre/include/lustre_acl.h b/drivers/staging/lustre/lustre/include/lustre_acl.h index aa4cfa7b749d..fecabe139b1f 100644 --- a/drivers/staging/lustre/lustre/include/lustre_acl.h +++ b/drivers/staging/lustre/lustre/include/lustre_acl.h @@ -15,11 +15,7 @@ * * You should have received a copy of the GNU General Public License * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. + * http://www.gnu.org/licenses/gpl-2.0.html * * GPL HEADER END */ diff --git a/drivers/staging/lustre/lustre/include/lustre_cfg.h b/drivers/staging/lustre/lustre/include/lustre_cfg.h index bb16ae980b98..95a0be13c0fb 100644 --- a/drivers/staging/lustre/lustre/include/lustre_cfg.h +++ b/drivers/staging/lustre/lustre/include/lustre_cfg.h @@ -15,11 +15,7 @@ * * You should have received a copy of the GNU General Public License * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. + * http://www.gnu.org/licenses/gpl-2.0.html * * GPL HEADER END */ @@ -161,7 +157,7 @@ static inline void *lustre_cfg_buf(struct lustre_cfg *lcfg, int index) int offset; int bufcount; - LASSERT (index >= 0); + LASSERT(index >= 0); bufcount = lcfg->lcfg_bufcount; if (index >= bufcount) diff --git a/drivers/staging/lustre/lustre/include/lustre_debug.h b/drivers/staging/lustre/lustre/include/lustre_debug.h index 8a089413c92e..93c1bdaf71a4 100644 --- a/drivers/staging/lustre/lustre/include/lustre_debug.h +++ b/drivers/staging/lustre/lustre/include/lustre_debug.h @@ -15,11 +15,7 @@ * * You should have received a copy of the GNU General Public License * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. + * http://www.gnu.org/licenses/gpl-2.0.html * * GPL HEADER END */ diff --git a/drivers/staging/lustre/lustre/include/lustre_disk.h b/drivers/staging/lustre/lustre/include/lustre_disk.h index 95fd36063f55..8886458748c1 100644 --- a/drivers/staging/lustre/lustre/include/lustre_disk.h +++ b/drivers/staging/lustre/lustre/include/lustre_disk.h @@ -15,11 +15,7 @@ * * You should have received a copy of the GNU General Public License * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. + * http://www.gnu.org/licenses/gpl-2.0.html * * GPL HEADER END */ @@ -130,7 +126,6 @@ struct lustre_sb_info { struct lustre_mount_data *lsi_lmd; /* mount command info */ struct ll_sb_info *lsi_llsbi; /* add'l client sbi info */ struct dt_device *lsi_dt_dev; /* dt device to access disk fs*/ - struct vfsmount *lsi_srv_mnt; /* the one server mount */ atomic_t lsi_mounts; /* references to the srv_mnt */ char lsi_svname[MTI_NAME_MAXLEN]; char lsi_osd_obdname[64]; @@ -158,7 +153,6 @@ struct lustre_sb_info { struct lustre_mount_info { char *lmi_name; struct super_block *lmi_sb; - struct vfsmount *lmi_mnt; struct list_head lmi_list_chain; }; diff --git a/drivers/staging/lustre/lustre/include/lustre_dlm.h b/drivers/staging/lustre/lustre/include/lustre_dlm.h index 8b0364f71129..60051a5cfe20 100644 --- a/drivers/staging/lustre/lustre/include/lustre_dlm.h +++ b/drivers/staging/lustre/lustre/include/lustre_dlm.h @@ -15,11 +15,7 @@ * * You should have received a copy of the GNU General Public License * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. + * http://www.gnu.org/licenses/gpl-2.0.html * * GPL HEADER END */ @@ -71,6 +67,7 @@ struct obd_device; */ enum ldlm_error { ELDLM_OK = 0, + ELDLM_LOCK_MATCHED = 1, ELDLM_LOCK_CHANGED = 300, ELDLM_LOCK_ABORTED = 301, @@ -269,7 +266,7 @@ struct ldlm_pool { struct completion pl_kobj_unregister; }; -typedef int (*ldlm_cancel_for_recovery)(struct ldlm_lock *lock); +typedef int (*ldlm_cancel_cbt)(struct ldlm_lock *lock); /** * LVB operations. @@ -446,8 +443,11 @@ struct ldlm_namespace { /** Limit of parallel AST RPC count. */ unsigned ns_max_parallel_ast; - /** Callback to cancel locks before replaying it during recovery. */ - ldlm_cancel_for_recovery ns_cancel_for_recovery; + /** + * Callback to check if a lock is good to be canceled by ELC or + * during recovery. + */ + ldlm_cancel_cbt ns_cancel; /** LDLM lock stats */ struct lprocfs_stats *ns_stats; @@ -479,9 +479,9 @@ static inline int ns_connect_lru_resize(struct ldlm_namespace *ns) } static inline void ns_register_cancel(struct ldlm_namespace *ns, - ldlm_cancel_for_recovery arg) + ldlm_cancel_cbt arg) { - ns->ns_cancel_for_recovery = arg; + ns->ns_cancel = arg; } struct ldlm_lock; @@ -1073,7 +1073,7 @@ void ldlm_lock2handle(const struct ldlm_lock *lock, struct ldlm_lock *__ldlm_handle2lock(const struct lustre_handle *, __u64 flags); void ldlm_cancel_callback(struct ldlm_lock *); int ldlm_lock_remove_from_lru(struct ldlm_lock *); -int ldlm_lock_set_data(struct lustre_handle *, void *); +int ldlm_lock_set_data(const struct lustre_handle *lockh, void *data); /** * Obtain a lock reference by its handle. @@ -1162,10 +1162,10 @@ do { \ struct ldlm_lock *ldlm_lock_get(struct ldlm_lock *lock); void ldlm_lock_put(struct ldlm_lock *lock); void ldlm_lock2desc(struct ldlm_lock *lock, struct ldlm_lock_desc *desc); -void ldlm_lock_addref(struct lustre_handle *lockh, __u32 mode); -int ldlm_lock_addref_try(struct lustre_handle *lockh, __u32 mode); -void ldlm_lock_decref(struct lustre_handle *lockh, __u32 mode); -void ldlm_lock_decref_and_cancel(struct lustre_handle *lockh, __u32 mode); +void ldlm_lock_addref(const struct lustre_handle *lockh, __u32 mode); +int ldlm_lock_addref_try(const struct lustre_handle *lockh, __u32 mode); +void ldlm_lock_decref(const struct lustre_handle *lockh, __u32 mode); +void ldlm_lock_decref_and_cancel(const struct lustre_handle *lockh, __u32 mode); void ldlm_lock_fail_match_locked(struct ldlm_lock *lock); void ldlm_lock_allow_match(struct ldlm_lock *lock); void ldlm_lock_allow_match_locked(struct ldlm_lock *lock); @@ -1174,10 +1174,10 @@ enum ldlm_mode ldlm_lock_match(struct ldlm_namespace *ns, __u64 flags, enum ldlm_type type, ldlm_policy_data_t *, enum ldlm_mode mode, struct lustre_handle *, int unref); -enum ldlm_mode ldlm_revalidate_lock_handle(struct lustre_handle *lockh, +enum ldlm_mode ldlm_revalidate_lock_handle(const struct lustre_handle *lockh, __u64 *bits); void ldlm_lock_cancel(struct ldlm_lock *lock); -void ldlm_lock_dump_handle(int level, struct lustre_handle *); +void ldlm_lock_dump_handle(int level, const struct lustre_handle *); void ldlm_unlink_lock_skiplist(struct ldlm_lock *req); /* resource.c */ @@ -1251,9 +1251,9 @@ int ldlm_cli_enqueue_fini(struct obd_export *exp, struct ptlrpc_request *req, enum ldlm_type type, __u8 with_policy, enum ldlm_mode mode, __u64 *flags, void *lvb, __u32 lvb_len, - struct lustre_handle *lockh, int rc); + const struct lustre_handle *lockh, int rc); int ldlm_cli_update_pool(struct ptlrpc_request *req); -int ldlm_cli_cancel(struct lustre_handle *lockh, +int ldlm_cli_cancel(const struct lustre_handle *lockh, enum ldlm_cancel_flags cancel_flags); int ldlm_cli_cancel_unused(struct ldlm_namespace *, const struct ldlm_res_id *, enum ldlm_cancel_flags flags, void *opaque); diff --git a/drivers/staging/lustre/lustre/include/lustre_dlm_flags.h b/drivers/staging/lustre/lustre/include/lustre_dlm_flags.h index 7f2ba2ffe0eb..e7e0c21a9b40 100644 --- a/drivers/staging/lustre/lustre/include/lustre_dlm_flags.h +++ b/drivers/staging/lustre/lustre/include/lustre_dlm_flags.h @@ -37,17 +37,11 @@ /** l_flags bits marked as "gone" bits */ #define LDLM_FL_GONE_MASK 0x0006004000000000ULL -/** l_flags bits marked as "hide_lock" bits */ -#define LDLM_FL_HIDE_LOCK_MASK 0x0000206400000000ULL - /** l_flags bits marked as "inherit" bits */ #define LDLM_FL_INHERIT_MASK 0x0000000000800000ULL -/** l_flags bits marked as "local_only" bits */ -#define LDLM_FL_LOCAL_ONLY_MASK 0x00FFFFFF00000000ULL - -/** l_flags bits marked as "on_wire" bits */ -#define LDLM_FL_ON_WIRE_MASK 0x00000000C08F932FULL +/** l_flags bits marked as "off_wire" bits */ +#define LDLM_FL_OFF_WIRE_MASK 0x00FFFFFF00000000ULL /** extent, mode, or resource changed */ #define LDLM_FL_LOCK_CHANGED 0x0000000000000001ULL /* bit 0 */ @@ -204,7 +198,7 @@ #define ldlm_set_cancel(_l) LDLM_SET_FLAG((_l), 1ULL << 36) #define ldlm_clear_cancel(_l) LDLM_CLEAR_FLAG((_l), 1ULL << 36) -/** whatever it might mean */ +/** whatever it might mean -- never transmitted? */ #define LDLM_FL_LOCAL_ONLY 0x0000002000000000ULL /* bit 37 */ #define ldlm_is_local_only(_l) LDLM_TEST_FLAG((_l), 1ULL << 37) #define ldlm_set_local_only(_l) LDLM_SET_FLAG((_l), 1ULL << 37) @@ -287,18 +281,18 @@ * has canceled this lock and is waiting for rpc_lock which is taken by * the first operation. LDLM_FL_BL_AST is set by ldlm_callback_handler() in * the lock to prevent the Early Lock Cancel (ELC) code from cancelling it. - * - * LDLM_FL_BL_DONE is to be set by ldlm_cancel_callback() when lock cache is - * dropped to let ldlm_callback_handler() return EINVAL to the server. It - * is used when ELC RPC is already prepared and is waiting for rpc_lock, - * too late to send a separate CANCEL RPC. */ #define LDLM_FL_BL_AST 0x0000400000000000ULL /* bit 46 */ #define ldlm_is_bl_ast(_l) LDLM_TEST_FLAG((_l), 1ULL << 46) #define ldlm_set_bl_ast(_l) LDLM_SET_FLAG((_l), 1ULL << 46) #define ldlm_clear_bl_ast(_l) LDLM_CLEAR_FLAG((_l), 1ULL << 46) -/** whatever it might mean */ +/** + * Set by ldlm_cancel_callback() when lock cache is dropped to let + * ldlm_callback_handler() return EINVAL to the server. It is used when + * ELC RPC is already prepared and is waiting for rpc_lock, too late to + * send a separate CANCEL RPC. + */ #define LDLM_FL_BL_DONE 0x0000800000000000ULL /* bit 47 */ #define ldlm_is_bl_done(_l) LDLM_TEST_FLAG((_l), 1ULL << 47) #define ldlm_set_bl_done(_l) LDLM_SET_FLAG((_l), 1ULL << 47) @@ -381,104 +375,16 @@ /** test for ldlm_lock flag bit set */ #define LDLM_TEST_FLAG(_l, _b) (((_l)->l_flags & (_b)) != 0) +/** multi-bit test: are any of mask bits set? */ +#define LDLM_HAVE_MASK(_l, _m) ((_l)->l_flags & LDLM_FL_##_m##_MASK) + /** set a ldlm_lock flag bit */ #define LDLM_SET_FLAG(_l, _b) ((_l)->l_flags |= (_b)) /** clear a ldlm_lock flag bit */ #define LDLM_CLEAR_FLAG(_l, _b) ((_l)->l_flags &= ~(_b)) -/** Mask of flags inherited from parent lock when doing intents. */ -#define LDLM_INHERIT_FLAGS LDLM_FL_INHERIT_MASK - -/** Mask of Flags sent in AST lock_flags to map into the receiving lock. */ -#define LDLM_AST_FLAGS LDLM_FL_AST_MASK - /** @} subgroup */ /** @} group */ -#ifdef WIRESHARK_COMPILE -static int hf_lustre_ldlm_fl_lock_changed = -1; -static int hf_lustre_ldlm_fl_block_granted = -1; -static int hf_lustre_ldlm_fl_block_conv = -1; -static int hf_lustre_ldlm_fl_block_wait = -1; -static int hf_lustre_ldlm_fl_ast_sent = -1; -static int hf_lustre_ldlm_fl_replay = -1; -static int hf_lustre_ldlm_fl_intent_only = -1; -static int hf_lustre_ldlm_fl_has_intent = -1; -static int hf_lustre_ldlm_fl_flock_deadlock = -1; -static int hf_lustre_ldlm_fl_discard_data = -1; -static int hf_lustre_ldlm_fl_no_timeout = -1; -static int hf_lustre_ldlm_fl_block_nowait = -1; -static int hf_lustre_ldlm_fl_test_lock = -1; -static int hf_lustre_ldlm_fl_cancel_on_block = -1; -static int hf_lustre_ldlm_fl_deny_on_contention = -1; -static int hf_lustre_ldlm_fl_ast_discard_data = -1; -static int hf_lustre_ldlm_fl_fail_loc = -1; -static int hf_lustre_ldlm_fl_skipped = -1; -static int hf_lustre_ldlm_fl_cbpending = -1; -static int hf_lustre_ldlm_fl_wait_noreproc = -1; -static int hf_lustre_ldlm_fl_cancel = -1; -static int hf_lustre_ldlm_fl_local_only = -1; -static int hf_lustre_ldlm_fl_failed = -1; -static int hf_lustre_ldlm_fl_canceling = -1; -static int hf_lustre_ldlm_fl_local = -1; -static int hf_lustre_ldlm_fl_lvb_ready = -1; -static int hf_lustre_ldlm_fl_kms_ignore = -1; -static int hf_lustre_ldlm_fl_cp_reqd = -1; -static int hf_lustre_ldlm_fl_cleaned = -1; -static int hf_lustre_ldlm_fl_atomic_cb = -1; -static int hf_lustre_ldlm_fl_bl_ast = -1; -static int hf_lustre_ldlm_fl_bl_done = -1; -static int hf_lustre_ldlm_fl_no_lru = -1; -static int hf_lustre_ldlm_fl_fail_notified = -1; -static int hf_lustre_ldlm_fl_destroyed = -1; -static int hf_lustre_ldlm_fl_server_lock = -1; -static int hf_lustre_ldlm_fl_res_locked = -1; -static int hf_lustre_ldlm_fl_waited = -1; -static int hf_lustre_ldlm_fl_ns_srv = -1; -static int hf_lustre_ldlm_fl_excl = -1; - -const value_string lustre_ldlm_flags_vals[] = { - {LDLM_FL_LOCK_CHANGED, "LDLM_FL_LOCK_CHANGED"}, - {LDLM_FL_BLOCK_GRANTED, "LDLM_FL_BLOCK_GRANTED"}, - {LDLM_FL_BLOCK_CONV, "LDLM_FL_BLOCK_CONV"}, - {LDLM_FL_BLOCK_WAIT, "LDLM_FL_BLOCK_WAIT"}, - {LDLM_FL_AST_SENT, "LDLM_FL_AST_SENT"}, - {LDLM_FL_REPLAY, "LDLM_FL_REPLAY"}, - {LDLM_FL_INTENT_ONLY, "LDLM_FL_INTENT_ONLY"}, - {LDLM_FL_HAS_INTENT, "LDLM_FL_HAS_INTENT"}, - {LDLM_FL_FLOCK_DEADLOCK, "LDLM_FL_FLOCK_DEADLOCK"}, - {LDLM_FL_DISCARD_DATA, "LDLM_FL_DISCARD_DATA"}, - {LDLM_FL_NO_TIMEOUT, "LDLM_FL_NO_TIMEOUT"}, - {LDLM_FL_BLOCK_NOWAIT, "LDLM_FL_BLOCK_NOWAIT"}, - {LDLM_FL_TEST_LOCK, "LDLM_FL_TEST_LOCK"}, - {LDLM_FL_CANCEL_ON_BLOCK, "LDLM_FL_CANCEL_ON_BLOCK"}, - {LDLM_FL_DENY_ON_CONTENTION, "LDLM_FL_DENY_ON_CONTENTION"}, - {LDLM_FL_AST_DISCARD_DATA, "LDLM_FL_AST_DISCARD_DATA"}, - {LDLM_FL_FAIL_LOC, "LDLM_FL_FAIL_LOC"}, - {LDLM_FL_SKIPPED, "LDLM_FL_SKIPPED"}, - {LDLM_FL_CBPENDING, "LDLM_FL_CBPENDING"}, - {LDLM_FL_WAIT_NOREPROC, "LDLM_FL_WAIT_NOREPROC"}, - {LDLM_FL_CANCEL, "LDLM_FL_CANCEL"}, - {LDLM_FL_LOCAL_ONLY, "LDLM_FL_LOCAL_ONLY"}, - {LDLM_FL_FAILED, "LDLM_FL_FAILED"}, - {LDLM_FL_CANCELING, "LDLM_FL_CANCELING"}, - {LDLM_FL_LOCAL, "LDLM_FL_LOCAL"}, - {LDLM_FL_LVB_READY, "LDLM_FL_LVB_READY"}, - {LDLM_FL_KMS_IGNORE, "LDLM_FL_KMS_IGNORE"}, - {LDLM_FL_CP_REQD, "LDLM_FL_CP_REQD"}, - {LDLM_FL_CLEANED, "LDLM_FL_CLEANED"}, - {LDLM_FL_ATOMIC_CB, "LDLM_FL_ATOMIC_CB"}, - {LDLM_FL_BL_AST, "LDLM_FL_BL_AST"}, - {LDLM_FL_BL_DONE, "LDLM_FL_BL_DONE"}, - {LDLM_FL_NO_LRU, "LDLM_FL_NO_LRU"}, - {LDLM_FL_FAIL_NOTIFIED, "LDLM_FL_FAIL_NOTIFIED"}, - {LDLM_FL_DESTROYED, "LDLM_FL_DESTROYED"}, - {LDLM_FL_SERVER_LOCK, "LDLM_FL_SERVER_LOCK"}, - {LDLM_FL_RES_LOCKED, "LDLM_FL_RES_LOCKED"}, - {LDLM_FL_WAITED, "LDLM_FL_WAITED"}, - {LDLM_FL_NS_SRV, "LDLM_FL_NS_SRV"}, - {LDLM_FL_EXCL, "LDLM_FL_EXCL"}, - { 0, NULL } -}; -#endif /* WIRESHARK_COMPILE */ + #endif /* LDLM_ALL_FLAGS_MASK */ diff --git a/drivers/staging/lustre/lustre/include/lustre_eacl.h b/drivers/staging/lustre/lustre/include/lustre_eacl.h index 0b66593a9526..d1039e1ff70d 100644 --- a/drivers/staging/lustre/lustre/include/lustre_eacl.h +++ b/drivers/staging/lustre/lustre/include/lustre_eacl.h @@ -15,11 +15,7 @@ * * You should have received a copy of the GNU General Public License * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. + * http://www.gnu.org/licenses/gpl-2.0.html * * GPL HEADER END */ @@ -70,17 +66,6 @@ typedef struct { #define CFS_ACL_XATTR_COUNT(size, prefix) \ (((size) - sizeof(prefix ## _header)) / sizeof(prefix ## _entry)) -extern ext_acl_xattr_header * -lustre_posix_acl_xattr_2ext(posix_acl_xattr_header *header, int size); -extern int -lustre_posix_acl_xattr_filter(posix_acl_xattr_header *header, size_t size, - posix_acl_xattr_header **out); -extern void -lustre_ext_acl_xattr_free(ext_acl_xattr_header *header); -extern ext_acl_xattr_header * -lustre_acl_xattr_merge2ext(posix_acl_xattr_header *posix_header, int size, - ext_acl_xattr_header *ext_header); - #endif /* CONFIG_FS_POSIX_ACL */ /** @} eacl */ diff --git a/drivers/staging/lustre/lustre/include/lustre_export.h b/drivers/staging/lustre/lustre/include/lustre_export.h index 3014d27e6dc2..6e7cc4689fb8 100644 --- a/drivers/staging/lustre/lustre/include/lustre_export.h +++ b/drivers/staging/lustre/lustre/include/lustre_export.h @@ -15,11 +15,7 @@ * * You should have received a copy of the GNU General Public License * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. + * http://www.gnu.org/licenses/gpl-2.0.html * * GPL HEADER END */ @@ -180,19 +176,6 @@ static inline int exp_connect_lru_resize(struct obd_export *exp) return !!(exp_connect_flags(exp) & OBD_CONNECT_LRU_RESIZE); } -static inline int exp_connect_rmtclient(struct obd_export *exp) -{ - return !!(exp_connect_flags(exp) & OBD_CONNECT_RMT_CLIENT); -} - -static inline int client_is_remote(struct obd_export *exp) -{ - struct obd_import *imp = class_exp2cliimp(exp); - - return !!(imp->imp_connect_data.ocd_connect_flags & - OBD_CONNECT_RMT_CLIENT); -} - static inline int exp_connect_vbr(struct obd_export *exp) { return !!(exp_connect_flags(exp) & OBD_CONNECT_VBR); diff --git a/drivers/staging/lustre/lustre/include/lustre_fid.h b/drivers/staging/lustre/lustre/include/lustre_fid.h index ab4a92390a43..743671a547ef 100644 --- a/drivers/staging/lustre/lustre/include/lustre_fid.h +++ b/drivers/staging/lustre/lustre/include/lustre_fid.h @@ -15,11 +15,7 @@ * * You should have received a copy of the GNU General Public License * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. + * http://www.gnu.org/licenses/gpl-2.0.html * * GPL HEADER END */ @@ -45,7 +41,7 @@ * * @{ * - * http://wiki.lustre.org/index.php/Architecture_-_Interoperability_fids_zfs + * http://wiki.old.lustre.org/index.php/Architecture_-_Interoperability_fids_zfs * describes the FID namespace and interoperability requirements for FIDs. * The important parts of that document are included here for reference. * @@ -308,10 +304,10 @@ static inline int fid_seq_in_fldb(__u64 seq) fid_seq_is_root(seq) || fid_seq_is_dot(seq); } -static inline void lu_last_id_fid(struct lu_fid *fid, __u64 seq) +static inline void lu_last_id_fid(struct lu_fid *fid, __u64 seq, __u32 ost_idx) { if (fid_seq_is_mdt0(seq)) { - fid->f_seq = fid_idif_seq(0, 0); + fid->f_seq = fid_idif_seq(0, ost_idx); } else { LASSERTF(fid_seq_is_norm(seq) || fid_seq_is_echo(seq) || fid_seq_is_idif(seq), "%#llx\n", seq); @@ -498,19 +494,6 @@ static inline void ostid_build_res_name(struct ost_id *oi, } } -static inline void ostid_res_name_to_id(struct ost_id *oi, - struct ldlm_res_id *name) -{ - if (fid_seq_is_mdt0(name->name[LUSTRE_RES_ID_SEQ_OFF])) { - /* old resid */ - ostid_set_seq(oi, name->name[LUSTRE_RES_ID_VER_OID_OFF]); - ostid_set_id(oi, name->name[LUSTRE_RES_ID_SEQ_OFF]); - } else { - /* new resid */ - fid_extract_from_res_name(&oi->oi_fid, name); - } -} - /** * Return true if the resource is for the object identified by this id & group. */ @@ -546,7 +529,8 @@ static inline void ost_fid_build_resid(const struct lu_fid *fid, } static inline void ost_fid_from_resid(struct lu_fid *fid, - const struct ldlm_res_id *name) + const struct ldlm_res_id *name, + int ost_idx) { if (fid_seq_is_mdt0(name->name[LUSTRE_RES_ID_VER_OID_OFF])) { /* old resid */ @@ -554,7 +538,7 @@ static inline void ost_fid_from_resid(struct lu_fid *fid, ostid_set_seq(&oi, name->name[LUSTRE_RES_ID_VER_OID_OFF]); ostid_set_id(&oi, name->name[LUSTRE_RES_ID_SEQ_OFF]); - ostid_to_fid(fid, &oi, 0); + ostid_to_fid(fid, &oi, ost_idx); } else { /* new resid */ fid_extract_from_res_name(fid, name); diff --git a/drivers/staging/lustre/lustre/include/lustre_fld.h b/drivers/staging/lustre/lustre/include/lustre_fld.h index 4cf2b0e61672..932410d3e3cc 100644 --- a/drivers/staging/lustre/lustre/include/lustre_fld.h +++ b/drivers/staging/lustre/lustre/include/lustre_fld.h @@ -15,11 +15,7 @@ * * You should have received a copy of the GNU General Public License * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. + * http://www.gnu.org/licenses/gpl-2.0.html * * GPL HEADER END */ diff --git a/drivers/staging/lustre/lustre/include/lustre_ha.h b/drivers/staging/lustre/lustre/include/lustre_ha.h index 5488a698dabd..cde7ed702c86 100644 --- a/drivers/staging/lustre/lustre/include/lustre_ha.h +++ b/drivers/staging/lustre/lustre/include/lustre_ha.h @@ -15,11 +15,7 @@ * * You should have received a copy of the GNU General Public License * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. + * http://www.gnu.org/licenses/gpl-2.0.html * * GPL HEADER END */ diff --git a/drivers/staging/lustre/lustre/include/lustre_handles.h b/drivers/staging/lustre/lustre/include/lustre_handles.h index 27f169d2ed34..1a63a6b9e116 100644 --- a/drivers/staging/lustre/lustre/include/lustre_handles.h +++ b/drivers/staging/lustre/lustre/include/lustre_handles.h @@ -15,11 +15,7 @@ * * You should have received a copy of the GNU General Public License * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. + * http://www.gnu.org/licenses/gpl-2.0.html * * GPL HEADER END */ diff --git a/drivers/staging/lustre/lustre/include/lustre_import.h b/drivers/staging/lustre/lustre/include/lustre_import.h index dac2d84d8266..4445be7a59dd 100644 --- a/drivers/staging/lustre/lustre/include/lustre_import.h +++ b/drivers/staging/lustre/lustre/include/lustre_import.h @@ -15,11 +15,7 @@ * * You should have received a copy of the GNU General Public License * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. + * http://www.gnu.org/licenses/gpl-2.0.html * * GPL HEADER END */ @@ -109,7 +105,7 @@ static inline char *ptlrpc_import_state_name(enum lustre_imp_state state) "RECOVER", "FULL", "EVICTED", }; - LASSERT (state <= LUSTRE_IMP_EVICTED); + LASSERT(state <= LUSTRE_IMP_EVICTED); return import_state_names[state]; } diff --git a/drivers/staging/lustre/lustre/include/lustre_intent.h b/drivers/staging/lustre/lustre/include/lustre_intent.h index c491d52d86a2..ed2b6c674109 100644 --- a/drivers/staging/lustre/lustre/include/lustre_intent.h +++ b/drivers/staging/lustre/lustre/include/lustre_intent.h @@ -15,11 +15,7 @@ * * You should have received a copy of the GNU General Public License * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. + * http://www.gnu.org/licenses/gpl-2.0.html * * GPL HEADER END */ @@ -38,7 +34,11 @@ #define LUSTRE_INTENT_H /* intent IT_XXX are defined in lustre/include/obd.h */ -struct lustre_intent_data { + +struct lookup_intent { + int it_op; + int it_create_mode; + __u64 it_flags; int it_disposition; int it_status; __u64 it_lock_handle; @@ -46,17 +46,23 @@ struct lustre_intent_data { int it_lock_mode; int it_remote_lock_mode; __u64 it_remote_lock_handle; - void *it_data; + struct ptlrpc_request *it_request; unsigned int it_lock_set:1; }; -struct lookup_intent { - int it_op; - int it_create_mode; - __u64 it_flags; - union { - struct lustre_intent_data lustre; - } d; -}; +static inline int it_disposition(struct lookup_intent *it, int flag) +{ + return it->it_disposition & flag; +} + +static inline void it_set_disposition(struct lookup_intent *it, int flag) +{ + it->it_disposition |= flag; +} + +static inline void it_clear_disposition(struct lookup_intent *it, int flag) +{ + it->it_disposition &= ~flag; +} #endif diff --git a/drivers/staging/lustre/lustre/include/lustre_lib.h b/drivers/staging/lustre/lustre/include/lustre_lib.h index f2223d55850a..06958f217fc8 100644 --- a/drivers/staging/lustre/lustre/include/lustre_lib.h +++ b/drivers/staging/lustre/lustre/include/lustre_lib.h @@ -15,11 +15,7 @@ * * You should have received a copy of the GNU General Public License * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. + * http://www.gnu.org/licenses/gpl-2.0.html * * GPL HEADER END */ @@ -280,16 +276,16 @@ static inline void obd_ioctl_freedata(char *buf, int len) #define OBD_IOC_DATA_TYPE long #define OBD_IOC_CREATE _IOWR('f', 101, OBD_IOC_DATA_TYPE) -#define OBD_IOC_DESTROY _IOW ('f', 104, OBD_IOC_DATA_TYPE) +#define OBD_IOC_DESTROY _IOW('f', 104, OBD_IOC_DATA_TYPE) #define OBD_IOC_PREALLOCATE _IOWR('f', 105, OBD_IOC_DATA_TYPE) -#define OBD_IOC_SETATTR _IOW ('f', 107, OBD_IOC_DATA_TYPE) +#define OBD_IOC_SETATTR _IOW('f', 107, OBD_IOC_DATA_TYPE) #define OBD_IOC_GETATTR _IOWR ('f', 108, OBD_IOC_DATA_TYPE) #define OBD_IOC_READ _IOWR('f', 109, OBD_IOC_DATA_TYPE) #define OBD_IOC_WRITE _IOWR('f', 110, OBD_IOC_DATA_TYPE) #define OBD_IOC_STATFS _IOWR('f', 113, OBD_IOC_DATA_TYPE) -#define OBD_IOC_SYNC _IOW ('f', 114, OBD_IOC_DATA_TYPE) +#define OBD_IOC_SYNC _IOW('f', 114, OBD_IOC_DATA_TYPE) #define OBD_IOC_READ2 _IOWR('f', 115, OBD_IOC_DATA_TYPE) #define OBD_IOC_FORMAT _IOWR('f', 116, OBD_IOC_DATA_TYPE) #define OBD_IOC_PARTITION _IOWR('f', 117, OBD_IOC_DATA_TYPE) @@ -308,13 +304,13 @@ static inline void obd_ioctl_freedata(char *buf, int len) #define OBD_IOC_GETDTNAME OBD_IOC_GETNAME #define OBD_IOC_LOV_GET_CONFIG _IOWR('f', 132, OBD_IOC_DATA_TYPE) -#define OBD_IOC_CLIENT_RECOVER _IOW ('f', 133, OBD_IOC_DATA_TYPE) -#define OBD_IOC_PING_TARGET _IOW ('f', 136, OBD_IOC_DATA_TYPE) +#define OBD_IOC_CLIENT_RECOVER _IOW('f', 133, OBD_IOC_DATA_TYPE) +#define OBD_IOC_PING_TARGET _IOW('f', 136, OBD_IOC_DATA_TYPE) #define OBD_IOC_DEC_FS_USE_COUNT _IO ('f', 139) -#define OBD_IOC_NO_TRANSNO _IOW ('f', 140, OBD_IOC_DATA_TYPE) -#define OBD_IOC_SET_READONLY _IOW ('f', 141, OBD_IOC_DATA_TYPE) -#define OBD_IOC_ABORT_RECOVERY _IOR ('f', 142, OBD_IOC_DATA_TYPE) +#define OBD_IOC_NO_TRANSNO _IOW('f', 140, OBD_IOC_DATA_TYPE) +#define OBD_IOC_SET_READONLY _IOW('f', 141, OBD_IOC_DATA_TYPE) +#define OBD_IOC_ABORT_RECOVERY _IOR('f', 142, OBD_IOC_DATA_TYPE) #define OBD_IOC_ROOT_SQUASH _IOWR('f', 143, OBD_IOC_DATA_TYPE) @@ -324,27 +320,27 @@ static inline void obd_ioctl_freedata(char *buf, int len) #define OBD_IOC_CLOSE_UUID _IOWR ('f', 147, OBD_IOC_DATA_TYPE) -#define OBD_IOC_CHANGELOG_SEND _IOW ('f', 148, OBD_IOC_DATA_TYPE) +#define OBD_IOC_CHANGELOG_SEND _IOW('f', 148, OBD_IOC_DATA_TYPE) #define OBD_IOC_GETDEVICE _IOWR ('f', 149, OBD_IOC_DATA_TYPE) #define OBD_IOC_FID2PATH _IOWR ('f', 150, OBD_IOC_DATA_TYPE) /* see also <lustre/lustre_user.h> for ioctls 151-153 */ /* OBD_IOC_LOV_SETSTRIPE: See also LL_IOC_LOV_SETSTRIPE */ -#define OBD_IOC_LOV_SETSTRIPE _IOW ('f', 154, OBD_IOC_DATA_TYPE) +#define OBD_IOC_LOV_SETSTRIPE _IOW('f', 154, OBD_IOC_DATA_TYPE) /* OBD_IOC_LOV_GETSTRIPE: See also LL_IOC_LOV_GETSTRIPE */ -#define OBD_IOC_LOV_GETSTRIPE _IOW ('f', 155, OBD_IOC_DATA_TYPE) +#define OBD_IOC_LOV_GETSTRIPE _IOW('f', 155, OBD_IOC_DATA_TYPE) /* OBD_IOC_LOV_SETEA: See also LL_IOC_LOV_SETEA */ -#define OBD_IOC_LOV_SETEA _IOW ('f', 156, OBD_IOC_DATA_TYPE) +#define OBD_IOC_LOV_SETEA _IOW('f', 156, OBD_IOC_DATA_TYPE) /* see <lustre/lustre_user.h> for ioctls 157-159 */ /* OBD_IOC_QUOTACHECK: See also LL_IOC_QUOTACHECK */ -#define OBD_IOC_QUOTACHECK _IOW ('f', 160, int) +#define OBD_IOC_QUOTACHECK _IOW('f', 160, int) /* OBD_IOC_POLL_QUOTACHECK: See also LL_IOC_POLL_QUOTACHECK */ -#define OBD_IOC_POLL_QUOTACHECK _IOR ('f', 161, struct if_quotacheck *) +#define OBD_IOC_POLL_QUOTACHECK _IOR('f', 161, struct if_quotacheck *) /* OBD_IOC_QUOTACTL: See also LL_IOC_QUOTACTL */ #define OBD_IOC_QUOTACTL _IOWR('f', 162, struct if_quotactl) /* see also <lustre/lustre_user.h> for ioctls 163-176 */ -#define OBD_IOC_CHANGELOG_REG _IOW ('f', 177, struct obd_ioctl_data) -#define OBD_IOC_CHANGELOG_DEREG _IOW ('f', 178, struct obd_ioctl_data) -#define OBD_IOC_CHANGELOG_CLEAR _IOW ('f', 179, struct obd_ioctl_data) +#define OBD_IOC_CHANGELOG_REG _IOW('f', 177, struct obd_ioctl_data) +#define OBD_IOC_CHANGELOG_DEREG _IOW('f', 178, struct obd_ioctl_data) +#define OBD_IOC_CHANGELOG_CLEAR _IOW('f', 179, struct obd_ioctl_data) #define OBD_IOC_RECORD _IOWR('f', 180, OBD_IOC_DATA_TYPE) #define OBD_IOC_ENDRECORD _IOWR('f', 181, OBD_IOC_DATA_TYPE) #define OBD_IOC_PARSE _IOWR('f', 182, OBD_IOC_DATA_TYPE) @@ -352,7 +348,7 @@ static inline void obd_ioctl_freedata(char *buf, int len) #define OBD_IOC_PROCESS_CFG _IOWR('f', 184, OBD_IOC_DATA_TYPE) #define OBD_IOC_DUMP_LOG _IOWR('f', 185, OBD_IOC_DATA_TYPE) #define OBD_IOC_CLEAR_LOG _IOWR('f', 186, OBD_IOC_DATA_TYPE) -#define OBD_IOC_PARAM _IOW ('f', 187, OBD_IOC_DATA_TYPE) +#define OBD_IOC_PARAM _IOW('f', 187, OBD_IOC_DATA_TYPE) #define OBD_IOC_POOL _IOWR('f', 188, OBD_IOC_DATA_TYPE) #define OBD_IOC_REPLACE_NIDS _IOWR('f', 189, OBD_IOC_DATA_TYPE) @@ -522,6 +518,28 @@ struct l_wait_info { sigmask(SIGTERM) | sigmask(SIGQUIT) | \ sigmask(SIGALRM)) +/** + * wait_queue_t of Linux (version < 2.6.34) is a FIFO list for exclusively + * waiting threads, which is not always desirable because all threads will + * be waken up again and again, even user only needs a few of them to be + * active most time. This is not good for performance because cache can + * be polluted by different threads. + * + * LIFO list can resolve this problem because we always wakeup the most + * recent active thread by default. + * + * NB: please don't call non-exclusive & exclusive wait on the same + * waitq if add_wait_queue_exclusive_head is used. + */ +#define add_wait_queue_exclusive_head(waitq, link) \ +{ \ + unsigned long flags; \ + \ + spin_lock_irqsave(&((waitq)->lock), flags); \ + __add_wait_queue_exclusive(waitq, link); \ + spin_unlock_irqrestore(&((waitq)->lock), flags); \ +} + /* * wait for @condition to become true, but no longer than timeout, specified * by @info. @@ -578,7 +596,7 @@ do { \ \ if (condition) \ break; \ - if (cfs_signal_pending()) { \ + if (signal_pending(current)) { \ if (info->lwi_on_signal && \ (__timeout == 0 || __allow_intr)) { \ if (info->lwi_on_signal != LWI_ON_SIGNAL_NOOP) \ diff --git a/drivers/staging/lustre/lustre/include/lustre_lite.h b/drivers/staging/lustre/lustre/include/lustre_lite.h index fcc5ebbceed8..b16897702559 100644 --- a/drivers/staging/lustre/lustre/include/lustre_lite.h +++ b/drivers/staging/lustre/lustre/include/lustre_lite.h @@ -15,11 +15,7 @@ * * You should have received a copy of the GNU General Public License * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. + * http://www.gnu.org/licenses/gpl-2.0.html * * GPL HEADER END */ diff --git a/drivers/staging/lustre/lustre/include/lustre_log.h b/drivers/staging/lustre/lustre/include/lustre_log.h index 49618e186824..b96e02317bfc 100644 --- a/drivers/staging/lustre/lustre/include/lustre_log.h +++ b/drivers/staging/lustre/lustre/include/lustre_log.h @@ -15,11 +15,7 @@ * * You should have received a copy of the GNU General Public License * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. + * http://www.gnu.org/licenses/gpl-2.0.html * * GPL HEADER END */ diff --git a/drivers/staging/lustre/lustre/include/lustre_mdc.h b/drivers/staging/lustre/lustre/include/lustre_mdc.h index af77eb359c43..fa62b95d351f 100644 --- a/drivers/staging/lustre/lustre/include/lustre_mdc.h +++ b/drivers/staging/lustre/lustre/include/lustre_mdc.h @@ -15,11 +15,7 @@ * * You should have received a copy of the GNU General Public License * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. + * http://www.gnu.org/licenses/gpl-2.0.html * * GPL HEADER END */ @@ -64,9 +60,27 @@ struct obd_export; struct ptlrpc_request; struct obd_device; +/** + * Serializes in-flight MDT-modifying RPC requests to preserve idempotency. + * + * This mutex is used to implement execute-once semantics on the MDT. + * The MDT stores the last transaction ID and result for every client in + * its last_rcvd file. If the client doesn't get a reply, it can safely + * resend the request and the MDT will reconstruct the reply being aware + * that the request has already been executed. Without this lock, + * execution status of concurrent in-flight requests would be + * overwritten. + * + * This design limits the extent to which we can keep a full pipeline of + * in-flight requests from a single client. This limitation could be + * overcome by allowing multiple slots per client in the last_rcvd file. + */ struct mdc_rpc_lock { + /** Lock protecting in-flight RPC concurrency. */ struct mutex rpcl_mutex; + /** Intent associated with currently executing request. */ struct lookup_intent *rpcl_it; + /** Used for MDS/RPC load testing purposes. */ int rpcl_fakes; }; @@ -171,9 +185,6 @@ struct mdc_cache_waiter { }; /* mdc/mdc_locks.c */ -int it_disposition(struct lookup_intent *it, int flag); -void it_clear_disposition(struct lookup_intent *it, int flag); -void it_set_disposition(struct lookup_intent *it, int flag); int it_open_error(int phase, struct lookup_intent *it); static inline bool cl_is_lov_delay_create(unsigned int flags) diff --git a/drivers/staging/lustre/lustre/include/lustre_mds.h b/drivers/staging/lustre/lustre/include/lustre_mds.h index 95d27ddecfb3..4104bd9bd5c4 100644 --- a/drivers/staging/lustre/lustre/include/lustre_mds.h +++ b/drivers/staging/lustre/lustre/include/lustre_mds.h @@ -15,11 +15,7 @@ * * You should have received a copy of the GNU General Public License * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. + * http://www.gnu.org/licenses/gpl-2.0.html * * GPL HEADER END */ diff --git a/drivers/staging/lustre/lustre/include/lustre_net.h b/drivers/staging/lustre/lustre/include/lustre_net.h index 69586a522eb7..d5debd615fdf 100644 --- a/drivers/staging/lustre/lustre/include/lustre_net.h +++ b/drivers/staging/lustre/lustre/include/lustre_net.h @@ -15,11 +15,7 @@ * * You should have received a copy of the GNU General Public License * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. + * http://www.gnu.org/licenses/gpl-2.0.html * * GPL HEADER END */ @@ -270,6 +266,11 @@ /* Macro to hide a typecast. */ #define ptlrpc_req_async_args(req) ((void *)&req->rq_async_args) +struct ptlrpc_replay_async_args { + int praa_old_state; + int praa_old_status; +}; + /** * Structure to single define portal connection. */ @@ -479,8 +480,9 @@ enum rq_phase { RQ_PHASE_BULK = 0xebc0de02, RQ_PHASE_INTERPRET = 0xebc0de03, RQ_PHASE_COMPLETE = 0xebc0de04, - RQ_PHASE_UNREGISTERING = 0xebc0de05, - RQ_PHASE_UNDEFINED = 0xebc0de06 + RQ_PHASE_UNREG_RPC = 0xebc0de05, + RQ_PHASE_UNREG_BULK = 0xebc0de06, + RQ_PHASE_UNDEFINED = 0xebc0de07 }; /** Type of request interpreter call-back */ @@ -1247,22 +1249,103 @@ struct ptlrpc_hpreq_ops { void (*hpreq_fini)(struct ptlrpc_request *); }; -/** - * Represents remote procedure call. - * - * This is a staple structure used by everybody wanting to send a request - * in Lustre. - */ -struct ptlrpc_request { - /* Request type: one of PTL_RPC_MSG_* */ - int rq_type; - /** Result of request processing */ - int rq_status; +struct ptlrpc_cli_req { + /** For bulk requests on client only: bulk descriptor */ + struct ptlrpc_bulk_desc *cr_bulk; + /** optional time limit for send attempts */ + long cr_delay_limit; + /** time request was first queued */ + time_t cr_queued_time; + /** request sent timeval */ + struct timespec64 cr_sent_tv; + /** time for request really sent out */ + time_t cr_sent_out; + /** when req reply unlink must finish. */ + time_t cr_reply_deadline; + /** when req bulk unlink must finish. */ + time_t cr_bulk_deadline; + /** when req unlink must finish. */ + time_t cr_req_deadline; + /** Portal to which this request would be sent */ + short cr_req_ptl; + /** Portal where to wait for reply and where reply would be sent */ + short cr_rep_ptl; + /** request resending number */ + unsigned int cr_resend_nr; + /** What was import generation when this request was sent */ + int cr_imp_gen; + enum lustre_imp_state cr_send_state; + /** Per-request waitq introduced by bug 21938 for recovery waiting */ + wait_queue_head_t cr_set_waitq; + /** Link item for request set lists */ + struct list_head cr_set_chain; + /** link to waited ctx */ + struct list_head cr_ctx_chain; + + /** client's half ctx */ + struct ptlrpc_cli_ctx *cr_cli_ctx; + /** Link back to the request set */ + struct ptlrpc_request_set *cr_set; + /** outgoing request MD handle */ + lnet_handle_md_t cr_req_md_h; + /** request-out callback parameter */ + struct ptlrpc_cb_id cr_req_cbid; + /** incoming reply MD handle */ + lnet_handle_md_t cr_reply_md_h; + wait_queue_head_t cr_reply_waitq; + /** reply callback parameter */ + struct ptlrpc_cb_id cr_reply_cbid; + /** Async completion handler, called when reply is received */ + ptlrpc_interpterer_t cr_reply_interp; + /** Async completion context */ + union ptlrpc_async_args cr_async_args; + /** Opaq data for replay and commit callbacks. */ + void *cr_cb_data; /** - * Linkage item through which this request is included into - * sending/delayed lists on client and into rqbd list on server + * Commit callback, called when request is committed and about to be + * freed. */ - struct list_head rq_list; + void (*cr_commit_cb)(struct ptlrpc_request *); + /** Replay callback, called after request is replayed at recovery */ + void (*cr_replay_cb)(struct ptlrpc_request *); +}; + +/** client request member alias */ +/* NB: these alias should NOT be used by any new code, instead they should + * be removed step by step to avoid potential abuse + */ +#define rq_bulk rq_cli.cr_bulk +#define rq_delay_limit rq_cli.cr_delay_limit +#define rq_queued_time rq_cli.cr_queued_time +#define rq_sent_tv rq_cli.cr_sent_tv +#define rq_real_sent rq_cli.cr_sent_out +#define rq_reply_deadline rq_cli.cr_reply_deadline +#define rq_bulk_deadline rq_cli.cr_bulk_deadline +#define rq_req_deadline rq_cli.cr_req_deadline +#define rq_nr_resend rq_cli.cr_resend_nr +#define rq_request_portal rq_cli.cr_req_ptl +#define rq_reply_portal rq_cli.cr_rep_ptl +#define rq_import_generation rq_cli.cr_imp_gen +#define rq_send_state rq_cli.cr_send_state +#define rq_set_chain rq_cli.cr_set_chain +#define rq_ctx_chain rq_cli.cr_ctx_chain +#define rq_set rq_cli.cr_set +#define rq_set_waitq rq_cli.cr_set_waitq +#define rq_cli_ctx rq_cli.cr_cli_ctx +#define rq_req_md_h rq_cli.cr_req_md_h +#define rq_req_cbid rq_cli.cr_req_cbid +#define rq_reply_md_h rq_cli.cr_reply_md_h +#define rq_reply_waitq rq_cli.cr_reply_waitq +#define rq_reply_cbid rq_cli.cr_reply_cbid +#define rq_interpret_reply rq_cli.cr_reply_interp +#define rq_async_args rq_cli.cr_async_args +#define rq_cb_data rq_cli.cr_cb_data +#define rq_commit_cb rq_cli.cr_commit_cb +#define rq_replay_cb rq_cli.cr_replay_cb + +struct ptlrpc_srv_req { + /** initial thread servicing this request */ + struct ptlrpc_thread *sr_svc_thread; /** * Server side list of incoming unserved requests sorted by arrival * time. Traversed from time to time to notice about to expire @@ -1270,32 +1353,86 @@ struct ptlrpc_request { * know server is alive and well, just very busy to service their * requests in time */ - struct list_head rq_timed_list; - /** server-side history, used for debugging purposes. */ - struct list_head rq_history_list; + struct list_head sr_timed_list; /** server-side per-export list */ - struct list_head rq_exp_list; - /** server-side hp handlers */ - struct ptlrpc_hpreq_ops *rq_ops; - - /** initial thread servicing this request */ - struct ptlrpc_thread *rq_svc_thread; - + struct list_head sr_exp_list; + /** server-side history, used for debuging purposes. */ + struct list_head sr_hist_list; /** history sequence # */ - __u64 rq_history_seq; + __u64 sr_hist_seq; + /** the index of service's srv_at_array into which request is linked */ + time_t sr_at_index; + /** authed uid */ + uid_t sr_auth_uid; + /** authed uid mapped to */ + uid_t sr_auth_mapped_uid; + /** RPC is generated from what part of Lustre */ + enum lustre_sec_part sr_sp_from; + /** request session context */ + struct lu_context sr_ses; /** \addtogroup nrs * @{ */ /** stub for NRS request */ - struct ptlrpc_nrs_request rq_nrq; + struct ptlrpc_nrs_request sr_nrq; /** @} nrs */ - /** the index of service's srv_at_array into which request is linked */ - u32 rq_at_index; + /** request arrival time */ + struct timespec64 sr_arrival_time; + /** server's half ctx */ + struct ptlrpc_svc_ctx *sr_svc_ctx; + /** (server side), pointed directly into req buffer */ + struct ptlrpc_user_desc *sr_user_desc; + /** separated reply state */ + struct ptlrpc_reply_state *sr_reply_state; + /** server-side hp handlers */ + struct ptlrpc_hpreq_ops *sr_ops; + /** incoming request buffer */ + struct ptlrpc_request_buffer_desc *sr_rqbd; +}; + +/** server request member alias */ +/* NB: these alias should NOT be used by any new code, instead they should + * be removed step by step to avoid potential abuse + */ +#define rq_svc_thread rq_srv.sr_svc_thread +#define rq_timed_list rq_srv.sr_timed_list +#define rq_exp_list rq_srv.sr_exp_list +#define rq_history_list rq_srv.sr_hist_list +#define rq_history_seq rq_srv.sr_hist_seq +#define rq_at_index rq_srv.sr_at_index +#define rq_auth_uid rq_srv.sr_auth_uid +#define rq_auth_mapped_uid rq_srv.sr_auth_mapped_uid +#define rq_sp_from rq_srv.sr_sp_from +#define rq_session rq_srv.sr_ses +#define rq_nrq rq_srv.sr_nrq +#define rq_arrival_time rq_srv.sr_arrival_time +#define rq_reply_state rq_srv.sr_reply_state +#define rq_svc_ctx rq_srv.sr_svc_ctx +#define rq_user_desc rq_srv.sr_user_desc +#define rq_ops rq_srv.sr_ops +#define rq_rqbd rq_srv.sr_rqbd + +/** + * Represents remote procedure call. + * + * This is a staple structure used by everybody wanting to send a request + * in Lustre. + */ +struct ptlrpc_request { + /* Request type: one of PTL_RPC_MSG_* */ + int rq_type; + /** Result of request processing */ + int rq_status; + /** + * Linkage item through which this request is included into + * sending/delayed lists on client and into rqbd list on server + */ + struct list_head rq_list; /** Lock to protect request flags and some other important bits, like * rq_list */ spinlock_t rq_lock; - /** client-side flags are serialized by rq_lock */ + /** client-side flags are serialized by rq_lock @{ */ unsigned int rq_intr:1, rq_replied:1, rq_err:1, rq_timedout:1, rq_resend:1, rq_restart:1, /** @@ -1311,37 +1448,40 @@ struct ptlrpc_request { rq_no_resend:1, rq_waiting:1, rq_receiving_reply:1, rq_no_delay:1, rq_net_err:1, rq_wait_ctx:1, rq_early:1, - rq_req_unlink:1, rq_reply_unlink:1, + rq_req_unlinked:1, /* unlinked request buffer from lnet */ + rq_reply_unlinked:1, /* unlinked reply buffer from lnet */ rq_memalloc:1, /* req originated from "kswapd" */ - /* server-side flags */ - rq_packed_final:1, /* packed final reply */ - rq_hp:1, /* high priority RPC */ - rq_at_linked:1, /* link into service's srv_at_array */ - rq_reply_truncate:1, rq_committed:1, - /* whether the "rq_set" is a valid one */ + rq_reply_truncated:1, + /** whether the "rq_set" is a valid one */ rq_invalid_rqset:1, rq_generation_set:1, - /* do not resend request on -EINPROGRESS */ + /** do not resend request on -EINPROGRESS */ rq_no_retry_einprogress:1, /* allow the req to be sent if the import is in recovery * status */ - rq_allow_replay:1; - - unsigned int rq_nr_resend; - - enum rq_phase rq_phase; /* one of RQ_PHASE_* */ - enum rq_phase rq_next_phase; /* one of RQ_PHASE_* to be used next */ - atomic_t rq_refcount; /* client-side refcount for SENT race, - * server-side refcount for multiple replies - */ + rq_allow_replay:1, + /* bulk request, sent to server, but uncommitted */ + rq_unstable:1; + /** @} */ - /** Portal to which this request would be sent */ - short rq_request_portal; /* XXX FIXME bug 249 */ - /** Portal where to wait for reply and where reply would be sent */ - short rq_reply_portal; /* XXX FIXME bug 249 */ + /** server-side flags @{ */ + unsigned int + rq_hp:1, /**< high priority RPC */ + rq_at_linked:1, /**< link into service's srv_at_array */ + rq_packed_final:1; /**< packed final reply */ + /** @} */ + /** one of RQ_PHASE_* */ + enum rq_phase rq_phase; + /** one of RQ_PHASE_* to be used next */ + enum rq_phase rq_next_phase; + /** + * client-side refcount for SENT race, server-side refcount + * for multiple replies + */ + atomic_t rq_refcount; /** * client-side: * !rq_truncate : # reply bytes actually received, @@ -1352,6 +1492,8 @@ struct ptlrpc_request { int rq_reqlen; /** Reply length */ int rq_replen; + /** Pool if request is from preallocated list */ + struct ptlrpc_request_pool *rq_pool; /** Request message - what client sent */ struct lustre_msg *rq_reqmsg; /** Reply message - server response */ @@ -1364,19 +1506,20 @@ struct ptlrpc_request { * List item to for replay list. Not yet committed requests get linked * there. * Also see \a rq_replay comment above. + * It's also link chain on obd_export::exp_req_replay_queue */ struct list_head rq_replay_list; - + /** non-shared members for client & server request*/ + union { + struct ptlrpc_cli_req rq_cli; + struct ptlrpc_srv_req rq_srv; + }; /** * security and encryption data * @{ */ - struct ptlrpc_cli_ctx *rq_cli_ctx; /**< client's half ctx */ - struct ptlrpc_svc_ctx *rq_svc_ctx; /**< server's half ctx */ - struct list_head rq_ctx_chain; /**< link to waited ctx */ - - struct sptlrpc_flavor rq_flvr; /**< for client & server */ - enum lustre_sec_part rq_sp_from; + /** description of flavors for client & server */ + struct sptlrpc_flavor rq_flvr; /* client/server security flags */ unsigned int @@ -1386,7 +1529,6 @@ struct ptlrpc_request { rq_bulk_write:1, /* request bulk write */ /* server authentication flags */ rq_auth_gss:1, /* authenticated by gss */ - rq_auth_remote:1, /* authed as remote user */ rq_auth_usr_root:1, /* authed as root */ rq_auth_usr_mdt:1, /* authed as mdt */ rq_auth_usr_ost:1, /* authed as ost */ @@ -1395,19 +1537,15 @@ struct ptlrpc_request { rq_pack_bulk:1, /* doesn't expect reply FIXME */ rq_no_reply:1, - rq_pill_init:1; /* pill initialized */ - - uid_t rq_auth_uid; /* authed uid */ - uid_t rq_auth_mapped_uid; /* authed uid mapped to */ - - /* (server side), pointed directly into req buffer */ - struct ptlrpc_user_desc *rq_user_desc; - - /* various buffer pointers */ - struct lustre_msg *rq_reqbuf; /* req wrapper */ - char *rq_repbuf; /* rep buffer */ - struct lustre_msg *rq_repdata; /* rep wrapper msg */ - struct lustre_msg *rq_clrbuf; /* only in priv mode */ + rq_pill_init:1, /* pill initialized */ + rq_srv_req:1; /* server request */ + + /** various buffer pointers */ + struct lustre_msg *rq_reqbuf; /**< req wrapper */ + char *rq_repbuf; /**< rep buffer */ + struct lustre_msg *rq_repdata; /**< rep wrapper msg */ + /** only in priv mode */ + struct lustre_msg *rq_clrbuf; int rq_reqbuf_len; /* req wrapper buf len */ int rq_reqdata_len; /* req wrapper msg len */ int rq_repbuf_len; /* rep buffer len */ @@ -1424,97 +1562,28 @@ struct ptlrpc_request { __u32 rq_req_swab_mask; __u32 rq_rep_swab_mask; - /** What was import generation when this request was sent */ - int rq_import_generation; - enum lustre_imp_state rq_send_state; - /** how many early replies (for stats) */ int rq_early_count; - /** client+server request */ - lnet_handle_md_t rq_req_md_h; - struct ptlrpc_cb_id rq_req_cbid; - /** optional time limit for send attempts */ - long rq_delay_limit; - /** time request was first queued */ - unsigned long rq_queued_time; - - /* server-side... */ - /** request arrival time */ - struct timespec64 rq_arrival_time; - /** separated reply state */ - struct ptlrpc_reply_state *rq_reply_state; - /** incoming request buffer */ - struct ptlrpc_request_buffer_desc *rq_rqbd; - - /** client-only incoming reply */ - lnet_handle_md_t rq_reply_md_h; - wait_queue_head_t rq_reply_waitq; - struct ptlrpc_cb_id rq_reply_cbid; - + /** Server-side, export on which request was received */ + struct obd_export *rq_export; + /** import where request is being sent */ + struct obd_import *rq_import; /** our LNet NID */ lnet_nid_t rq_self; /** Peer description (the other side) */ lnet_process_id_t rq_peer; - /** Server-side, export on which request was received */ - struct obd_export *rq_export; - /** Client side, import where request is being sent */ - struct obd_import *rq_import; - - /** Replay callback, called after request is replayed at recovery */ - void (*rq_replay_cb)(struct ptlrpc_request *); /** - * Commit callback, called when request is committed and about to be - * freed. + * service time estimate (secs) + * If the request is not served by this time, it is marked as timed out. */ - void (*rq_commit_cb)(struct ptlrpc_request *); - /** Opaq data for replay and commit callbacks. */ - void *rq_cb_data; - - /** For bulk requests on client only: bulk descriptor */ - struct ptlrpc_bulk_desc *rq_bulk; - - /** client outgoing req */ + int rq_timeout; /** * when request/reply sent (secs), or time when request should be sent */ time64_t rq_sent; - /** time for request really sent out */ - time64_t rq_real_sent; - - /** when request must finish. volatile - * so that servers' early reply updates to the deadline aren't - * kept in per-cpu cache - */ - volatile time64_t rq_deadline; - /** when req reply unlink must finish. */ - time64_t rq_reply_deadline; - /** when req bulk unlink must finish. */ - time64_t rq_bulk_deadline; - /** - * service time estimate (secs) - * If the requestsis not served by this time, it is marked as timed out. - */ - int rq_timeout; - - /** Multi-rpc bits */ - /** Per-request waitq introduced by bug 21938 for recovery waiting */ - wait_queue_head_t rq_set_waitq; - /** Link item for request set lists */ - struct list_head rq_set_chain; - /** Link back to the request set */ - struct ptlrpc_request_set *rq_set; - /** Async completion handler, called when reply is received */ - ptlrpc_interpterer_t rq_interpret_reply; - /** Async completion context */ - union ptlrpc_async_args rq_async_args; - - /** Pool if request is from preallocated list */ - struct ptlrpc_request_pool *rq_pool; - - struct lu_context rq_session; - struct lu_context rq_recov_session; - + /** when request must finish. */ + time64_t rq_deadline; /** request format description */ struct req_capsule rq_pill; }; @@ -1627,8 +1696,10 @@ ptlrpc_phase2str(enum rq_phase phase) return "Interpret"; case RQ_PHASE_COMPLETE: return "Complete"; - case RQ_PHASE_UNREGISTERING: - return "Unregistering"; + case RQ_PHASE_UNREG_RPC: + return "UnregRPC"; + case RQ_PHASE_UNREG_BULK: + return "UnregBULK"; default: return "?Phase?"; } @@ -1655,7 +1726,7 @@ ptlrpc_rqphase2str(struct ptlrpc_request *req) #define DEBUG_REQ_FLAGS(req) \ ptlrpc_rqphase2str(req), \ FLAG(req->rq_intr, "I"), FLAG(req->rq_replied, "R"), \ - FLAG(req->rq_err, "E"), \ + FLAG(req->rq_err, "E"), FLAG(req->rq_net_err, "e"), \ FLAG(req->rq_timedout, "X") /* eXpired */, FLAG(req->rq_resend, "S"), \ FLAG(req->rq_restart, "T"), FLAG(req->rq_replay, "P"), \ FLAG(req->rq_no_resend, "N"), \ @@ -1663,7 +1734,7 @@ ptlrpc_rqphase2str(struct ptlrpc_request *req) FLAG(req->rq_wait_ctx, "C"), FLAG(req->rq_hp, "H"), \ FLAG(req->rq_committed, "M") -#define REQ_FLAGS_FMT "%s:%s%s%s%s%s%s%s%s%s%s%s%s" +#define REQ_FLAGS_FMT "%s:%s%s%s%s%s%s%s%s%s%s%s%s%s" void _debug_req(struct ptlrpc_request *req, struct libcfs_debug_msg_data *data, const char *fmt, ...) @@ -2314,8 +2385,7 @@ static inline int ptlrpc_client_bulk_active(struct ptlrpc_request *req) desc = req->rq_bulk; - if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_BULK_UNLINK) && - req->rq_bulk_deadline > ktime_get_real_seconds()) + if (req->rq_bulk_deadline > ktime_get_real_seconds()) return 1; if (!desc) @@ -2662,13 +2732,20 @@ ptlrpc_rqphase_move(struct ptlrpc_request *req, enum rq_phase new_phase) if (req->rq_phase == new_phase) return; - if (new_phase == RQ_PHASE_UNREGISTERING) { + if (new_phase == RQ_PHASE_UNREG_RPC || + new_phase == RQ_PHASE_UNREG_BULK) { + /* No embedded unregistering phases */ + if (req->rq_phase == RQ_PHASE_UNREG_RPC || + req->rq_phase == RQ_PHASE_UNREG_BULK) + return; + req->rq_next_phase = req->rq_phase; if (req->rq_import) atomic_inc(&req->rq_import->imp_unregistering); } - if (req->rq_phase == RQ_PHASE_UNREGISTERING) { + if (req->rq_phase == RQ_PHASE_UNREG_RPC || + req->rq_phase == RQ_PHASE_UNREG_BULK) { if (req->rq_import) atomic_dec(&req->rq_import->imp_unregistering); } @@ -2685,9 +2762,6 @@ ptlrpc_rqphase_move(struct ptlrpc_request *req, enum rq_phase new_phase) static inline int ptlrpc_client_early(struct ptlrpc_request *req) { - if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_REPL_UNLINK) && - req->rq_reply_deadline > ktime_get_real_seconds()) - return 0; return req->rq_early; } @@ -2697,8 +2771,7 @@ ptlrpc_client_early(struct ptlrpc_request *req) static inline int ptlrpc_client_replied(struct ptlrpc_request *req) { - if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_REPL_UNLINK) && - req->rq_reply_deadline > ktime_get_real_seconds()) + if (req->rq_reply_deadline > ktime_get_real_seconds()) return 0; return req->rq_replied; } @@ -2707,8 +2780,7 @@ ptlrpc_client_replied(struct ptlrpc_request *req) static inline int ptlrpc_client_recv(struct ptlrpc_request *req) { - if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_REPL_UNLINK) && - req->rq_reply_deadline > ktime_get_real_seconds()) + if (req->rq_reply_deadline > ktime_get_real_seconds()) return 1; return req->rq_receiving_reply; } @@ -2719,13 +2791,16 @@ ptlrpc_client_recv_or_unlink(struct ptlrpc_request *req) int rc; spin_lock(&req->rq_lock); - if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_REPL_UNLINK) && - req->rq_reply_deadline > ktime_get_real_seconds()) { + if (req->rq_reply_deadline > ktime_get_real_seconds()) { + spin_unlock(&req->rq_lock); + return 1; + } + if (req->rq_req_deadline > ktime_get_real_seconds()) { spin_unlock(&req->rq_lock); return 1; } - rc = req->rq_receiving_reply; - rc = rc || req->rq_req_unlink || req->rq_reply_unlink; + rc = !req->rq_req_unlinked || !req->rq_reply_unlinked || + req->rq_receiving_reply; spin_unlock(&req->rq_lock); return rc; } diff --git a/drivers/staging/lustre/lustre/include/lustre_param.h b/drivers/staging/lustre/lustre/include/lustre_param.h index 383fe6febe4b..82aadd32c2b8 100644 --- a/drivers/staging/lustre/lustre/include/lustre_param.h +++ b/drivers/staging/lustre/lustre/include/lustre_param.h @@ -15,11 +15,7 @@ * * You should have received a copy of the GNU General Public License * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. + * http://www.gnu.org/licenses/gpl-2.0.html * * GPL HEADER END */ @@ -89,6 +85,7 @@ int class_parse_nid_quiet(char *buf, lnet_nid_t *nid, char **endh); /* Prefixes for parameters handled by obd's proc methods (XXX_process_config) */ #define PARAM_OST "ost." +#define PARAM_OSD "osd." #define PARAM_OSC "osc." #define PARAM_MDT "mdt." #define PARAM_MDD "mdd." diff --git a/drivers/staging/lustre/lustre/include/lustre_req_layout.h b/drivers/staging/lustre/lustre/include/lustre_req_layout.h index b2e67fcf9ef1..544a43c862b9 100644 --- a/drivers/staging/lustre/lustre/include/lustre_req_layout.h +++ b/drivers/staging/lustre/lustre/include/lustre_req_layout.h @@ -15,11 +15,7 @@ * * You should have received a copy of the GNU General Public License * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. + * http://www.gnu.org/licenses/gpl-2.0.html * * GPL HEADER END */ @@ -137,6 +133,7 @@ extern struct req_format RQF_MGS_CONFIG_READ; /* fid/fld req_format */ extern struct req_format RQF_SEQ_QUERY; extern struct req_format RQF_FLD_QUERY; +extern struct req_format RQF_FLD_READ; /* MDS req_format */ extern struct req_format RQF_MDS_CONNECT; extern struct req_format RQF_MDS_DISCONNECT; @@ -163,7 +160,7 @@ extern struct req_format RQF_MDS_IS_SUBDIR; extern struct req_format RQF_MDS_DONE_WRITING; extern struct req_format RQF_MDS_REINT; extern struct req_format RQF_MDS_REINT_CREATE; -extern struct req_format RQF_MDS_REINT_CREATE_RMT_ACL; +extern struct req_format RQF_MDS_REINT_CREATE_ACL; extern struct req_format RQF_MDS_REINT_CREATE_SLAVE; extern struct req_format RQF_MDS_REINT_CREATE_SYM; extern struct req_format RQF_MDS_REINT_OPEN; @@ -199,7 +196,7 @@ extern struct req_format RQF_OST_BRW_READ; extern struct req_format RQF_OST_BRW_WRITE; extern struct req_format RQF_OST_STATFS; extern struct req_format RQF_OST_SET_GRANT_INFO; -extern struct req_format RQF_OST_GET_INFO_GENERIC; +extern struct req_format RQF_OST_GET_INFO; extern struct req_format RQF_OST_GET_INFO_LAST_ID; extern struct req_format RQF_OST_GET_INFO_LAST_FID; extern struct req_format RQF_OST_SET_INFO_LAST_FID; diff --git a/drivers/staging/lustre/lustre/include/lustre_sec.h b/drivers/staging/lustre/lustre/include/lustre_sec.h index 01b4e6726a68..90c183424802 100644 --- a/drivers/staging/lustre/lustre/include/lustre_sec.h +++ b/drivers/staging/lustre/lustre/include/lustre_sec.h @@ -15,11 +15,7 @@ * * You should have received a copy of the GNU General Public License * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. + * http://www.gnu.org/licenses/gpl-2.0.html * * GPL HEADER END */ @@ -221,13 +217,13 @@ enum sptlrpc_bulk_service { #define SPTLRPC_FLVR_DEFAULT SPTLRPC_FLVR_NULL -#define SPTLRPC_FLVR_INVALID ((__u32) 0xFFFFFFFF) -#define SPTLRPC_FLVR_ANY ((__u32) 0xFFF00000) +#define SPTLRPC_FLVR_INVALID ((__u32)0xFFFFFFFF) +#define SPTLRPC_FLVR_ANY ((__u32)0xFFF00000) /** * extract the useful part from wire flavor */ -#define WIRE_FLVR(wflvr) (((__u32) (wflvr)) & 0x000FFFFF) +#define WIRE_FLVR(wflvr) (((__u32)(wflvr)) & 0x000FFFFF) /** @} flavor */ diff --git a/drivers/staging/lustre/lustre/include/obd.h b/drivers/staging/lustre/lustre/include/obd.h index 4264d97650ec..a1bc2c478ff9 100644 --- a/drivers/staging/lustre/lustre/include/obd.h +++ b/drivers/staging/lustre/lustre/include/obd.h @@ -15,11 +15,7 @@ * * You should have received a copy of the GNU General Public License * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. + * http://www.gnu.org/licenses/gpl-2.0.html * * GPL HEADER END */ @@ -37,7 +33,7 @@ #ifndef __OBD_H #define __OBD_H -#include "linux/obd.h" +#include <linux/spinlock.h> #define IOC_OSC_TYPE 'h' #define IOC_OSC_MIN_NR 20 @@ -54,6 +50,7 @@ #include "lustre_export.h" #include "lustre_fid.h" #include "lustre_fld.h" +#include "lustre_intent.h" #define MAX_OBD_DEVICES 8192 @@ -165,9 +162,6 @@ struct obd_info { obd_enqueue_update_f oi_cb_up; }; -void lov_stripe_lock(struct lov_stripe_md *md); -void lov_stripe_unlock(struct lov_stripe_md *md); - struct obd_type { struct list_head typ_chain; struct obd_ops *typ_dt_ops; @@ -234,6 +228,12 @@ enum { #define MDC_MAX_RIF_DEFAULT 8 #define MDC_MAX_RIF_MAX 512 +enum obd_cl_sem_lock_class { + OBD_CLI_SEM_NORMAL, + OBD_CLI_SEM_MGC, + OBD_CLI_SEM_MDCOSC, +}; + struct mdc_rpc_lock; struct obd_import; struct client_obd { @@ -293,14 +293,10 @@ struct client_obd { * blocking everywhere, but we don't want to slow down fast-path of * our main platform.) * - * Exact type of ->cl_loi_list_lock is defined in arch/obd.h together - * with client_obd_list_{un,}lock() and - * client_obd_list_lock_{init,done}() functions. - * * NB by Jinshan: though field names are still _loi_, but actually * osc_object{}s are in the list. */ - struct client_obd_lock cl_loi_list_lock; + spinlock_t cl_loi_list_lock; struct list_head cl_loi_ready_list; struct list_head cl_loi_hp_ready_list; struct list_head cl_loi_write_list; @@ -327,7 +323,8 @@ struct client_obd { atomic_t cl_lru_shrinkers; atomic_t cl_lru_in_list; struct list_head cl_lru_list; /* lru page list */ - struct client_obd_lock cl_lru_list_lock; /* page list protector */ + spinlock_t cl_lru_list_lock; /* page list protector */ + atomic_t cl_unstable_count; /* number of in flight destroy rpcs is limited to max_rpcs_in_flight */ atomic_t cl_destroy_in_flight; @@ -364,6 +361,7 @@ struct client_obd { /* ptlrpc work for writeback in ptlrpcd context */ void *cl_writeback_work; + void *cl_lru_work; /* hash tables for osc_quota_info */ struct cfs_hash *cl_quota_hash[MAXQUOTAS]; }; @@ -391,45 +389,9 @@ struct ost_pool { struct rw_semaphore op_rw_sem; /* to protect ost_pool use */ }; -/* Round-robin allocator data */ -struct lov_qos_rr { - __u32 lqr_start_idx; /* start index of new inode */ - __u32 lqr_offset_idx; /* aliasing for start_idx */ - int lqr_start_count; /* reseed counter */ - struct ost_pool lqr_pool; /* round-robin optimized list */ - unsigned long lqr_dirty:1; /* recalc round-robin list */ -}; - /* allow statfs data caching for 1 second */ #define OBD_STATFS_CACHE_SECONDS 1 -struct lov_statfs_data { - struct obd_info lsd_oi; - struct obd_statfs lsd_statfs; -}; - -/* Stripe placement optimization */ -struct lov_qos { - struct list_head lq_oss_list; /* list of OSSs that targets use */ - struct rw_semaphore lq_rw_sem; - __u32 lq_active_oss_count; - unsigned int lq_prio_free; /* priority for free space */ - unsigned int lq_threshold_rr;/* priority for rr */ - struct lov_qos_rr lq_rr; /* round robin qos data */ - unsigned long lq_dirty:1, /* recalc qos data */ - lq_same_space:1,/* the ost's all have approx. - * the same space avail - */ - lq_reset:1, /* zero current penalties */ - lq_statfs_in_progress:1; /* statfs op in - progress */ - /* qos statfs data */ - struct lov_statfs_data *lq_statfs_data; - wait_queue_head_t lq_statfs_waitq; /* waitqueue to notify statfs - * requests completion - */ -}; - struct lov_tgt_desc { struct list_head ltd_kill; struct obd_uuid ltd_uuid; @@ -442,25 +404,6 @@ struct lov_tgt_desc { ltd_reap:1; /* should this target be deleted */ }; -/* Pool metadata */ -#define pool_tgt_size(_p) _p->pool_obds.op_size -#define pool_tgt_count(_p) _p->pool_obds.op_count -#define pool_tgt_array(_p) _p->pool_obds.op_array -#define pool_tgt_rw_sem(_p) _p->pool_obds.op_rw_sem - -struct pool_desc { - char pool_name[LOV_MAXPOOLNAME + 1]; /* name of pool */ - struct ost_pool pool_obds; /* pool members */ - atomic_t pool_refcount; /* pool ref. counter */ - struct lov_qos_rr pool_rr; /* round robin qos */ - struct hlist_node pool_hash; /* access by poolname */ - struct list_head pool_list; /* serial access */ - struct dentry *pool_debugfs_entry; /* file in debugfs */ - struct obd_device *pool_lobd; /* obd of the lov/lod to which - * this pool belongs - */ -}; - struct lov_obd { struct lov_desc desc; struct lov_tgt_desc **lov_tgts; /* sparse array */ @@ -468,8 +411,6 @@ struct lov_obd { struct mutex lov_lock; struct obd_connect_data lov_ocd; atomic_t lov_refcount; - __u32 lov_tgt_count; /* how many OBD's */ - __u32 lov_active_tgt_count; /* how many active */ __u32 lov_death_row;/* tgts scheduled to be deleted */ __u32 lov_tgt_size; /* size of tgts array */ int lov_connects; @@ -479,8 +420,8 @@ struct lov_obd { struct dentry *lov_pool_debugfs_entry; enum lustre_sec_part lov_sp_me; - /* Cached LRU pages from upper layer */ - void *lov_cache; + /* Cached LRU and unstable data from upper layer */ + struct cl_client_cache *lov_cache; struct rw_semaphore lov_notify_lock; @@ -511,7 +452,7 @@ struct lmv_obd { struct obd_uuid cluuid; struct obd_export *exp; - struct mutex init_mutex; + struct mutex lmv_init_mutex; int connected; int max_easize; int max_def_easize; @@ -1180,9 +1121,6 @@ struct md_ops { ldlm_policy_data_t *, enum ldlm_mode, enum ldlm_cancel_flags flags, void *opaque); - int (*get_remote_perm)(struct obd_export *, const struct lu_fid *, - __u32, struct ptlrpc_request **); - int (*intent_getattr_async)(struct obd_export *, struct md_enqueue_info *, struct ldlm_enqueue_info *); diff --git a/drivers/staging/lustre/lustre/include/obd_cksum.h b/drivers/staging/lustre/lustre/include/obd_cksum.h index 637fa22110a4..a8a81e662a56 100644 --- a/drivers/staging/lustre/lustre/include/obd_cksum.h +++ b/drivers/staging/lustre/lustre/include/obd_cksum.h @@ -15,11 +15,7 @@ * * You should have received a copy of the GNU General Public License * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. + * http://www.gnu.org/licenses/gpl-2.0.html * * GPL HEADER END */ @@ -35,6 +31,7 @@ #ifndef __OBD_CKSUM #define __OBD_CKSUM #include "../../include/linux/libcfs/libcfs.h" +#include "../../include/linux/libcfs/libcfs_crypto.h" #include "lustre/lustre_idl.h" static inline unsigned char cksum_obd2cfs(enum cksum_type cksum_type) diff --git a/drivers/staging/lustre/lustre/include/obd_class.h b/drivers/staging/lustre/lustre/include/obd_class.h index 706869f8c98f..6482a937000b 100644 --- a/drivers/staging/lustre/lustre/include/obd_class.h +++ b/drivers/staging/lustre/lustre/include/obd_class.h @@ -15,11 +15,7 @@ * * You should have received a copy of the GNU General Public License * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. + * http://www.gnu.org/licenses/gpl-2.0.html * * GPL HEADER END */ @@ -477,7 +473,7 @@ static inline int obd_setup(struct obd_device *obd, struct lustre_cfg *cfg) struct lu_context session_ctx; struct lu_env env; - lu_context_init(&session_ctx, LCT_SESSION); + lu_context_init(&session_ctx, LCT_SESSION | LCT_SERVER_SESSION); session_ctx.lc_thread = NULL; lu_context_enter(&session_ctx); @@ -490,8 +486,9 @@ static inline int obd_setup(struct obd_device *obd, struct lustre_cfg *cfg) obd->obd_lu_dev = d; d->ld_obd = obd; rc = 0; - } else + } else { rc = PTR_ERR(d); + } } lu_context_exit(&session_ctx); lu_context_fini(&session_ctx); @@ -1653,16 +1650,6 @@ static inline int md_init_ea_size(struct obd_export *exp, int easize, cookiesize, def_cookiesize); } -static inline int md_get_remote_perm(struct obd_export *exp, - const struct lu_fid *fid, __u32 suppgid, - struct ptlrpc_request **request) -{ - EXP_CHECK_MD_OP(exp, get_remote_perm); - EXP_MD_COUNTER_INCREMENT(exp, get_remote_perm); - return MDP(exp->exp_obd, get_remote_perm)(exp, fid, suppgid, - request); -} - static inline int md_intent_getattr_async(struct obd_export *exp, struct md_enqueue_info *minfo, struct ldlm_enqueue_info *einfo) diff --git a/drivers/staging/lustre/lustre/include/obd_support.h b/drivers/staging/lustre/lustre/include/obd_support.h index f8ee3a3254ba..845e64a56c21 100644 --- a/drivers/staging/lustre/lustre/include/obd_support.h +++ b/drivers/staging/lustre/lustre/include/obd_support.h @@ -15,11 +15,7 @@ * * You should have received a copy of the GNU General Public License * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. + * http://www.gnu.org/licenses/gpl-2.0.html * * GPL HEADER END */ @@ -58,6 +54,7 @@ extern int at_early_margin; extern int at_extra; extern unsigned int obd_sync_filter; extern unsigned int obd_max_dirty_pages; +extern atomic_t obd_unstable_pages; extern atomic_t obd_dirty_pages; extern atomic_t obd_dirty_transit_pages; extern char obd_jobid_var[]; @@ -289,6 +286,7 @@ extern char obd_jobid_var[]; #define OBD_FAIL_OST_ENOINO 0x229 #define OBD_FAIL_OST_DQACQ_NET 0x230 #define OBD_FAIL_OST_STATFS_EINPROGRESS 0x231 +#define OBD_FAIL_OST_SET_INFO_NET 0x232 #define OBD_FAIL_LDLM 0x300 #define OBD_FAIL_LDLM_NAMESPACE_NEW 0x301 @@ -319,6 +317,7 @@ extern char obd_jobid_var[]; #define OBD_FAIL_LDLM_AGL_DELAY 0x31a #define OBD_FAIL_LDLM_AGL_NOLOCK 0x31b #define OBD_FAIL_LDLM_OST_LVB 0x31c +#define OBD_FAIL_LDLM_ENQUEUE_HANG 0x31d /* LOCKLESS IO */ #define OBD_FAIL_LDLM_SET_CONTENTION 0x385 @@ -365,6 +364,9 @@ extern char obd_jobid_var[]; #define OBD_FAIL_PTLRPC_CLIENT_BULK_CB2 0x515 #define OBD_FAIL_PTLRPC_DELAY_IMP_FULL 0x516 #define OBD_FAIL_PTLRPC_CANCEL_RESEND 0x517 +#define OBD_FAIL_PTLRPC_DROP_BULK 0x51a +#define OBD_FAIL_PTLRPC_LONG_REQ_UNLINK 0x51b +#define OBD_FAIL_PTLRPC_LONG_BOTH_UNLINK 0x51c #define OBD_FAIL_OBD_PING_NET 0x600 #define OBD_FAIL_OBD_LOG_CANCEL_NET 0x601 @@ -426,6 +428,7 @@ extern char obd_jobid_var[]; #define OBD_FAIL_FLD 0x1100 #define OBD_FAIL_FLD_QUERY_NET 0x1101 +#define OBD_FAIL_FLD_READ_NET 0x1102 #define OBD_FAIL_SEC_CTX 0x1200 #define OBD_FAIL_SEC_CTX_INIT_NET 0x1201 diff --git a/drivers/staging/lustre/lustre/lclient/lcommon_cl.c b/drivers/staging/lustre/lustre/lclient/lcommon_cl.c deleted file mode 100644 index 96141d17d07f..000000000000 --- a/drivers/staging/lustre/lustre/lclient/lcommon_cl.c +++ /dev/null @@ -1,1203 +0,0 @@ -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. - * - * GPL HEADER END - */ -/* - * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2011, 2015, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * cl code shared between vvp and liblustre (and other Lustre clients in the - * future). - * - * Author: Nikita Danilov <nikita.danilov@sun.com> - */ - -#define DEBUG_SUBSYSTEM S_LLITE - -#include "../../include/linux/libcfs/libcfs.h" -# include <linux/fs.h> -# include <linux/sched.h> -# include <linux/mm.h> -# include <linux/quotaops.h> -# include <linux/highmem.h> -# include <linux/pagemap.h> -# include <linux/rbtree.h> - -#include "../include/obd.h" -#include "../include/obd_support.h" -#include "../include/lustre_fid.h" -#include "../include/lustre_lite.h" -#include "../include/lustre_dlm.h" -#include "../include/lustre_ver.h" -#include "../include/lustre_mdc.h" -#include "../include/cl_object.h" - -#include "../include/lclient.h" - -#include "../llite/llite_internal.h" - -static const struct cl_req_operations ccc_req_ops; - -/* - * ccc_ prefix stands for "Common Client Code". - */ - -static struct kmem_cache *ccc_lock_kmem; -static struct kmem_cache *ccc_object_kmem; -static struct kmem_cache *ccc_thread_kmem; -static struct kmem_cache *ccc_session_kmem; -static struct kmem_cache *ccc_req_kmem; - -static struct lu_kmem_descr ccc_caches[] = { - { - .ckd_cache = &ccc_lock_kmem, - .ckd_name = "ccc_lock_kmem", - .ckd_size = sizeof(struct ccc_lock) - }, - { - .ckd_cache = &ccc_object_kmem, - .ckd_name = "ccc_object_kmem", - .ckd_size = sizeof(struct ccc_object) - }, - { - .ckd_cache = &ccc_thread_kmem, - .ckd_name = "ccc_thread_kmem", - .ckd_size = sizeof(struct ccc_thread_info), - }, - { - .ckd_cache = &ccc_session_kmem, - .ckd_name = "ccc_session_kmem", - .ckd_size = sizeof(struct ccc_session) - }, - { - .ckd_cache = &ccc_req_kmem, - .ckd_name = "ccc_req_kmem", - .ckd_size = sizeof(struct ccc_req) - }, - { - .ckd_cache = NULL - } -}; - -/***************************************************************************** - * - * Vvp device and device type functions. - * - */ - -void *ccc_key_init(const struct lu_context *ctx, struct lu_context_key *key) -{ - struct ccc_thread_info *info; - - info = kmem_cache_zalloc(ccc_thread_kmem, GFP_NOFS); - if (!info) - info = ERR_PTR(-ENOMEM); - return info; -} - -void ccc_key_fini(const struct lu_context *ctx, - struct lu_context_key *key, void *data) -{ - struct ccc_thread_info *info = data; - - kmem_cache_free(ccc_thread_kmem, info); -} - -void *ccc_session_key_init(const struct lu_context *ctx, - struct lu_context_key *key) -{ - struct ccc_session *session; - - session = kmem_cache_zalloc(ccc_session_kmem, GFP_NOFS); - if (!session) - session = ERR_PTR(-ENOMEM); - return session; -} - -void ccc_session_key_fini(const struct lu_context *ctx, - struct lu_context_key *key, void *data) -{ - struct ccc_session *session = data; - - kmem_cache_free(ccc_session_kmem, session); -} - -struct lu_context_key ccc_key = { - .lct_tags = LCT_CL_THREAD, - .lct_init = ccc_key_init, - .lct_fini = ccc_key_fini -}; - -struct lu_context_key ccc_session_key = { - .lct_tags = LCT_SESSION, - .lct_init = ccc_session_key_init, - .lct_fini = ccc_session_key_fini -}; - -/* type constructor/destructor: ccc_type_{init,fini,start,stop}(). */ -/* LU_TYPE_INIT_FINI(ccc, &ccc_key, &ccc_session_key); */ - -int ccc_device_init(const struct lu_env *env, struct lu_device *d, - const char *name, struct lu_device *next) -{ - struct ccc_device *vdv; - int rc; - - vdv = lu2ccc_dev(d); - vdv->cdv_next = lu2cl_dev(next); - - LASSERT(d->ld_site && next->ld_type); - next->ld_site = d->ld_site; - rc = next->ld_type->ldt_ops->ldto_device_init( - env, next, next->ld_type->ldt_name, NULL); - if (rc == 0) { - lu_device_get(next); - lu_ref_add(&next->ld_reference, "lu-stack", &lu_site_init); - } - return rc; -} - -struct lu_device *ccc_device_fini(const struct lu_env *env, - struct lu_device *d) -{ - return cl2lu_dev(lu2ccc_dev(d)->cdv_next); -} - -struct lu_device *ccc_device_alloc(const struct lu_env *env, - struct lu_device_type *t, - struct lustre_cfg *cfg, - const struct lu_device_operations *luops, - const struct cl_device_operations *clops) -{ - struct ccc_device *vdv; - struct lu_device *lud; - struct cl_site *site; - int rc; - - vdv = kzalloc(sizeof(*vdv), GFP_NOFS); - if (!vdv) - return ERR_PTR(-ENOMEM); - - lud = &vdv->cdv_cl.cd_lu_dev; - cl_device_init(&vdv->cdv_cl, t); - ccc2lu_dev(vdv)->ld_ops = luops; - vdv->cdv_cl.cd_ops = clops; - - site = kzalloc(sizeof(*site), GFP_NOFS); - if (site) { - rc = cl_site_init(site, &vdv->cdv_cl); - if (rc == 0) - rc = lu_site_init_finish(&site->cs_lu); - else { - LASSERT(!lud->ld_site); - CERROR("Cannot init lu_site, rc %d.\n", rc); - kfree(site); - } - } else - rc = -ENOMEM; - if (rc != 0) { - ccc_device_free(env, lud); - lud = ERR_PTR(rc); - } - return lud; -} - -struct lu_device *ccc_device_free(const struct lu_env *env, - struct lu_device *d) -{ - struct ccc_device *vdv = lu2ccc_dev(d); - struct cl_site *site = lu2cl_site(d->ld_site); - struct lu_device *next = cl2lu_dev(vdv->cdv_next); - - if (d->ld_site) { - cl_site_fini(site); - kfree(site); - } - cl_device_fini(lu2cl_dev(d)); - kfree(vdv); - return next; -} - -int ccc_req_init(const struct lu_env *env, struct cl_device *dev, - struct cl_req *req) -{ - struct ccc_req *vrq; - int result; - - vrq = kmem_cache_zalloc(ccc_req_kmem, GFP_NOFS); - if (vrq) { - cl_req_slice_add(req, &vrq->crq_cl, dev, &ccc_req_ops); - result = 0; - } else - result = -ENOMEM; - return result; -} - -/** - * An `emergency' environment used by ccc_inode_fini() when cl_env_get() - * fails. Access to this environment is serialized by ccc_inode_fini_guard - * mutex. - */ -static struct lu_env *ccc_inode_fini_env; - -/** - * A mutex serializing calls to slp_inode_fini() under extreme memory - * pressure, when environments cannot be allocated. - */ -static DEFINE_MUTEX(ccc_inode_fini_guard); -static int dummy_refcheck; - -int ccc_global_init(struct lu_device_type *device_type) -{ - int result; - - result = lu_kmem_init(ccc_caches); - if (result) - return result; - - result = lu_device_type_init(device_type); - if (result) - goto out_kmem; - - ccc_inode_fini_env = cl_env_alloc(&dummy_refcheck, - LCT_REMEMBER|LCT_NOREF); - if (IS_ERR(ccc_inode_fini_env)) { - result = PTR_ERR(ccc_inode_fini_env); - goto out_device; - } - - ccc_inode_fini_env->le_ctx.lc_cookie = 0x4; - return 0; -out_device: - lu_device_type_fini(device_type); -out_kmem: - lu_kmem_fini(ccc_caches); - return result; -} - -void ccc_global_fini(struct lu_device_type *device_type) -{ - if (ccc_inode_fini_env) { - cl_env_put(ccc_inode_fini_env, &dummy_refcheck); - ccc_inode_fini_env = NULL; - } - lu_device_type_fini(device_type); - lu_kmem_fini(ccc_caches); -} - -/***************************************************************************** - * - * Object operations. - * - */ - -struct lu_object *ccc_object_alloc(const struct lu_env *env, - const struct lu_object_header *unused, - struct lu_device *dev, - const struct cl_object_operations *clops, - const struct lu_object_operations *luops) -{ - struct ccc_object *vob; - struct lu_object *obj; - - vob = kmem_cache_zalloc(ccc_object_kmem, GFP_NOFS); - if (vob) { - struct cl_object_header *hdr; - - obj = ccc2lu(vob); - hdr = &vob->cob_header; - cl_object_header_init(hdr); - lu_object_init(obj, &hdr->coh_lu, dev); - lu_object_add_top(&hdr->coh_lu, obj); - - vob->cob_cl.co_ops = clops; - obj->lo_ops = luops; - } else - obj = NULL; - return obj; -} - -int ccc_object_init0(const struct lu_env *env, - struct ccc_object *vob, - const struct cl_object_conf *conf) -{ - vob->cob_inode = conf->coc_inode; - vob->cob_transient_pages = 0; - cl_object_page_init(&vob->cob_cl, sizeof(struct ccc_page)); - return 0; -} - -int ccc_object_init(const struct lu_env *env, struct lu_object *obj, - const struct lu_object_conf *conf) -{ - struct ccc_device *dev = lu2ccc_dev(obj->lo_dev); - struct ccc_object *vob = lu2ccc(obj); - struct lu_object *below; - struct lu_device *under; - int result; - - under = &dev->cdv_next->cd_lu_dev; - below = under->ld_ops->ldo_object_alloc(env, obj->lo_header, under); - if (below) { - const struct cl_object_conf *cconf; - - cconf = lu2cl_conf(conf); - INIT_LIST_HEAD(&vob->cob_pending_list); - lu_object_add(obj, below); - result = ccc_object_init0(env, vob, cconf); - } else - result = -ENOMEM; - return result; -} - -void ccc_object_free(const struct lu_env *env, struct lu_object *obj) -{ - struct ccc_object *vob = lu2ccc(obj); - - lu_object_fini(obj); - lu_object_header_fini(obj->lo_header); - kmem_cache_free(ccc_object_kmem, vob); -} - -int ccc_lock_init(const struct lu_env *env, - struct cl_object *obj, struct cl_lock *lock, - const struct cl_io *unused, - const struct cl_lock_operations *lkops) -{ - struct ccc_lock *clk; - int result; - - CLOBINVRNT(env, obj, ccc_object_invariant(obj)); - - clk = kmem_cache_zalloc(ccc_lock_kmem, GFP_NOFS); - if (clk) { - cl_lock_slice_add(lock, &clk->clk_cl, obj, lkops); - result = 0; - } else - result = -ENOMEM; - return result; -} - -int ccc_object_glimpse(const struct lu_env *env, - const struct cl_object *obj, struct ost_lvb *lvb) -{ - struct inode *inode = ccc_object_inode(obj); - - lvb->lvb_mtime = cl_inode_mtime(inode); - lvb->lvb_atime = cl_inode_atime(inode); - lvb->lvb_ctime = cl_inode_ctime(inode); - /* - * LU-417: Add dirty pages block count lest i_blocks reports 0, some - * "cp" or "tar" on remote node may think it's a completely sparse file - * and skip it. - */ - if (lvb->lvb_size > 0 && lvb->lvb_blocks == 0) - lvb->lvb_blocks = dirty_cnt(inode); - return 0; -} - -static void ccc_object_size_lock(struct cl_object *obj) -{ - struct inode *inode = ccc_object_inode(obj); - - ll_inode_size_lock(inode); - cl_object_attr_lock(obj); -} - -static void ccc_object_size_unlock(struct cl_object *obj) -{ - struct inode *inode = ccc_object_inode(obj); - - cl_object_attr_unlock(obj); - ll_inode_size_unlock(inode); -} - -/***************************************************************************** - * - * Page operations. - * - */ - -struct page *ccc_page_vmpage(const struct lu_env *env, - const struct cl_page_slice *slice) -{ - return cl2vm_page(slice); -} - -int ccc_page_is_under_lock(const struct lu_env *env, - const struct cl_page_slice *slice, - struct cl_io *io) -{ - struct ccc_io *cio = ccc_env_io(env); - struct cl_lock_descr *desc = &ccc_env_info(env)->cti_descr; - struct cl_page *page = slice->cpl_page; - - int result; - - if (io->ci_type == CIT_READ || io->ci_type == CIT_WRITE || - io->ci_type == CIT_FAULT) { - if (cio->cui_fd->fd_flags & LL_FILE_GROUP_LOCKED) - result = -EBUSY; - else { - desc->cld_start = page->cp_index; - desc->cld_end = page->cp_index; - desc->cld_obj = page->cp_obj; - desc->cld_mode = CLM_READ; - result = cl_queue_match(&io->ci_lockset.cls_done, - desc) ? -EBUSY : 0; - } - } else - result = 0; - return result; -} - -int ccc_fail(const struct lu_env *env, const struct cl_page_slice *slice) -{ - /* - * Cached read? - */ - LBUG(); - return 0; -} - -int ccc_transient_page_prep(const struct lu_env *env, - const struct cl_page_slice *slice, - struct cl_io *unused) -{ - /* transient page should always be sent. */ - return 0; -} - -/***************************************************************************** - * - * Lock operations. - * - */ - -void ccc_lock_delete(const struct lu_env *env, - const struct cl_lock_slice *slice) -{ - CLOBINVRNT(env, slice->cls_obj, ccc_object_invariant(slice->cls_obj)); -} - -void ccc_lock_fini(const struct lu_env *env, struct cl_lock_slice *slice) -{ - struct ccc_lock *clk = cl2ccc_lock(slice); - - kmem_cache_free(ccc_lock_kmem, clk); -} - -int ccc_lock_enqueue(const struct lu_env *env, - const struct cl_lock_slice *slice, - struct cl_io *unused, __u32 enqflags) -{ - CLOBINVRNT(env, slice->cls_obj, ccc_object_invariant(slice->cls_obj)); - return 0; -} - -int ccc_lock_use(const struct lu_env *env, const struct cl_lock_slice *slice) -{ - CLOBINVRNT(env, slice->cls_obj, ccc_object_invariant(slice->cls_obj)); - return 0; -} - -int ccc_lock_unuse(const struct lu_env *env, const struct cl_lock_slice *slice) -{ - CLOBINVRNT(env, slice->cls_obj, ccc_object_invariant(slice->cls_obj)); - return 0; -} - -int ccc_lock_wait(const struct lu_env *env, const struct cl_lock_slice *slice) -{ - CLOBINVRNT(env, slice->cls_obj, ccc_object_invariant(slice->cls_obj)); - return 0; -} - -/** - * Implementation of cl_lock_operations::clo_fits_into() methods for ccc - * layer. This function is executed every time io finds an existing lock in - * the lock cache while creating new lock. This function has to decide whether - * cached lock "fits" into io. - * - * \param slice lock to be checked - * \param io IO that wants a lock. - * - * \see lov_lock_fits_into(). - */ -int ccc_lock_fits_into(const struct lu_env *env, - const struct cl_lock_slice *slice, - const struct cl_lock_descr *need, - const struct cl_io *io) -{ - const struct cl_lock *lock = slice->cls_lock; - const struct cl_lock_descr *descr = &lock->cll_descr; - const struct ccc_io *cio = ccc_env_io(env); - int result; - - /* - * Work around DLM peculiarity: it assumes that glimpse - * (LDLM_FL_HAS_INTENT) lock is always LCK_PR, and returns reads lock - * when asked for LCK_PW lock with LDLM_FL_HAS_INTENT flag set. Make - * sure that glimpse doesn't get CLM_WRITE top-lock, so that it - * doesn't enqueue CLM_WRITE sub-locks. - */ - if (cio->cui_glimpse) - result = descr->cld_mode != CLM_WRITE; - - /* - * Also, don't match incomplete write locks for read, otherwise read - * would enqueue missing sub-locks in the write mode. - */ - else if (need->cld_mode != descr->cld_mode) - result = lock->cll_state >= CLS_ENQUEUED; - else - result = 1; - return result; -} - -/** - * Implements cl_lock_operations::clo_state() method for ccc layer, invoked - * whenever lock state changes. Transfers object attributes, that might be - * updated as a result of lock acquiring into inode. - */ -void ccc_lock_state(const struct lu_env *env, - const struct cl_lock_slice *slice, - enum cl_lock_state state) -{ - struct cl_lock *lock = slice->cls_lock; - - /* - * Refresh inode attributes when the lock is moving into CLS_HELD - * state, and only when this is a result of real enqueue, rather than - * of finding lock in the cache. - */ - if (state == CLS_HELD && lock->cll_state < CLS_HELD) { - struct cl_object *obj; - struct inode *inode; - - obj = slice->cls_obj; - inode = ccc_object_inode(obj); - - /* vmtruncate() sets the i_size - * under both a DLM lock and the - * ll_inode_size_lock(). If we don't get the - * ll_inode_size_lock() here we can match the DLM lock and - * reset i_size. generic_file_write can then trust the - * stale i_size when doing appending writes and effectively - * cancel the result of the truncate. Getting the - * ll_inode_size_lock() after the enqueue maintains the DLM - * -> ll_inode_size_lock() acquiring order. - */ - if (lock->cll_descr.cld_start == 0 && - lock->cll_descr.cld_end == CL_PAGE_EOF) - cl_merge_lvb(env, inode); - } -} - -/***************************************************************************** - * - * io operations. - * - */ - -int ccc_io_one_lock_index(const struct lu_env *env, struct cl_io *io, - __u32 enqflags, enum cl_lock_mode mode, - pgoff_t start, pgoff_t end) -{ - struct ccc_io *cio = ccc_env_io(env); - struct cl_lock_descr *descr = &cio->cui_link.cill_descr; - struct cl_object *obj = io->ci_obj; - - CLOBINVRNT(env, obj, ccc_object_invariant(obj)); - - CDEBUG(D_VFSTRACE, "lock: %d [%lu, %lu]\n", mode, start, end); - - memset(&cio->cui_link, 0, sizeof(cio->cui_link)); - - if (cio->cui_fd && (cio->cui_fd->fd_flags & LL_FILE_GROUP_LOCKED)) { - descr->cld_mode = CLM_GROUP; - descr->cld_gid = cio->cui_fd->fd_grouplock.cg_gid; - } else { - descr->cld_mode = mode; - } - descr->cld_obj = obj; - descr->cld_start = start; - descr->cld_end = end; - descr->cld_enq_flags = enqflags; - - cl_io_lock_add(env, io, &cio->cui_link); - return 0; -} - -void ccc_io_update_iov(const struct lu_env *env, - struct ccc_io *cio, struct cl_io *io) -{ - size_t size = io->u.ci_rw.crw_count; - - if (!cl_is_normalio(env, io) || !cio->cui_iter) - return; - - iov_iter_truncate(cio->cui_iter, size); -} - -int ccc_io_one_lock(const struct lu_env *env, struct cl_io *io, - __u32 enqflags, enum cl_lock_mode mode, - loff_t start, loff_t end) -{ - struct cl_object *obj = io->ci_obj; - - return ccc_io_one_lock_index(env, io, enqflags, mode, - cl_index(obj, start), cl_index(obj, end)); -} - -void ccc_io_end(const struct lu_env *env, const struct cl_io_slice *ios) -{ - CLOBINVRNT(env, ios->cis_io->ci_obj, - ccc_object_invariant(ios->cis_io->ci_obj)); -} - -void ccc_io_advance(const struct lu_env *env, - const struct cl_io_slice *ios, - size_t nob) -{ - struct ccc_io *cio = cl2ccc_io(env, ios); - struct cl_io *io = ios->cis_io; - struct cl_object *obj = ios->cis_io->ci_obj; - - CLOBINVRNT(env, obj, ccc_object_invariant(obj)); - - if (!cl_is_normalio(env, io)) - return; - - iov_iter_reexpand(cio->cui_iter, cio->cui_tot_count -= nob); -} - -/** - * Helper function that if necessary adjusts file size (inode->i_size), when - * position at the offset \a pos is accessed. File size can be arbitrary stale - * on a Lustre client, but client at least knows KMS. If accessed area is - * inside [0, KMS], set file size to KMS, otherwise glimpse file size. - * - * Locking: cl_isize_lock is used to serialize changes to inode size and to - * protect consistency between inode size and cl_object - * attributes. cl_object_size_lock() protects consistency between cl_attr's of - * top-object and sub-objects. - */ -int ccc_prep_size(const struct lu_env *env, struct cl_object *obj, - struct cl_io *io, loff_t start, size_t count, int *exceed) -{ - struct cl_attr *attr = ccc_env_thread_attr(env); - struct inode *inode = ccc_object_inode(obj); - loff_t pos = start + count - 1; - loff_t kms; - int result; - - /* - * Consistency guarantees: following possibilities exist for the - * relation between region being accessed and real file size at this - * moment: - * - * (A): the region is completely inside of the file; - * - * (B-x): x bytes of region are inside of the file, the rest is - * outside; - * - * (C): the region is completely outside of the file. - * - * This classification is stable under DLM lock already acquired by - * the caller, because to change the class, other client has to take - * DLM lock conflicting with our lock. Also, any updates to ->i_size - * by other threads on this client are serialized by - * ll_inode_size_lock(). This guarantees that short reads are handled - * correctly in the face of concurrent writes and truncates. - */ - ccc_object_size_lock(obj); - result = cl_object_attr_get(env, obj, attr); - if (result == 0) { - kms = attr->cat_kms; - if (pos > kms) { - /* - * A glimpse is necessary to determine whether we - * return a short read (B) or some zeroes at the end - * of the buffer (C) - */ - ccc_object_size_unlock(obj); - result = cl_glimpse_lock(env, io, inode, obj, 0); - if (result == 0 && exceed) { - /* If objective page index exceed end-of-file - * page index, return directly. Do not expect - * kernel will check such case correctly. - * linux-2.6.18-128.1.1 miss to do that. - * --bug 17336 - */ - loff_t size = cl_isize_read(inode); - loff_t cur_index = start >> PAGE_SHIFT; - loff_t size_index = (size - 1) >> - PAGE_SHIFT; - - if ((size == 0 && cur_index != 0) || - size_index < cur_index) - *exceed = 1; - } - return result; - } - /* - * region is within kms and, hence, within real file - * size (A). We need to increase i_size to cover the - * read region so that generic_file_read() will do its - * job, but that doesn't mean the kms size is - * _correct_, it is only the _minimum_ size. If - * someone does a stat they will get the correct size - * which will always be >= the kms value here. - * b=11081 - */ - if (cl_isize_read(inode) < kms) { - cl_isize_write_nolock(inode, kms); - CDEBUG(D_VFSTRACE, - DFID" updating i_size %llu\n", - PFID(lu_object_fid(&obj->co_lu)), - (__u64)cl_isize_read(inode)); - - } - } - ccc_object_size_unlock(obj); - return result; -} - -/***************************************************************************** - * - * Transfer operations. - * - */ - -void ccc_req_completion(const struct lu_env *env, - const struct cl_req_slice *slice, int ioret) -{ - struct ccc_req *vrq; - - if (ioret > 0) - cl_stats_tally(slice->crs_dev, slice->crs_req->crq_type, ioret); - - vrq = cl2ccc_req(slice); - kmem_cache_free(ccc_req_kmem, vrq); -} - -/** - * Implementation of struct cl_req_operations::cro_attr_set() for ccc - * layer. ccc is responsible for - * - * - o_[mac]time - * - * - o_mode - * - * - o_parent_seq - * - * - o_[ug]id - * - * - o_parent_oid - * - * - o_parent_ver - * - * - o_ioepoch, - * - */ -void ccc_req_attr_set(const struct lu_env *env, - const struct cl_req_slice *slice, - const struct cl_object *obj, - struct cl_req_attr *attr, u64 flags) -{ - struct inode *inode; - struct obdo *oa; - u32 valid_flags; - - oa = attr->cra_oa; - inode = ccc_object_inode(obj); - valid_flags = OBD_MD_FLTYPE; - - if (slice->crs_req->crq_type == CRT_WRITE) { - if (flags & OBD_MD_FLEPOCH) { - oa->o_valid |= OBD_MD_FLEPOCH; - oa->o_ioepoch = cl_i2info(inode)->lli_ioepoch; - valid_flags |= OBD_MD_FLMTIME | OBD_MD_FLCTIME | - OBD_MD_FLUID | OBD_MD_FLGID; - } - } - obdo_from_inode(oa, inode, valid_flags & flags); - obdo_set_parent_fid(oa, &cl_i2info(inode)->lli_fid); - memcpy(attr->cra_jobid, cl_i2info(inode)->lli_jobid, - JOBSTATS_JOBID_SIZE); -} - -static const struct cl_req_operations ccc_req_ops = { - .cro_attr_set = ccc_req_attr_set, - .cro_completion = ccc_req_completion -}; - -int cl_setattr_ost(struct inode *inode, const struct iattr *attr) -{ - struct lu_env *env; - struct cl_io *io; - int result; - int refcheck; - - env = cl_env_get(&refcheck); - if (IS_ERR(env)) - return PTR_ERR(env); - - io = ccc_env_thread_io(env); - io->ci_obj = cl_i2info(inode)->lli_clob; - - io->u.ci_setattr.sa_attr.lvb_atime = LTIME_S(attr->ia_atime); - io->u.ci_setattr.sa_attr.lvb_mtime = LTIME_S(attr->ia_mtime); - io->u.ci_setattr.sa_attr.lvb_ctime = LTIME_S(attr->ia_ctime); - io->u.ci_setattr.sa_attr.lvb_size = attr->ia_size; - io->u.ci_setattr.sa_valid = attr->ia_valid; - -again: - if (cl_io_init(env, io, CIT_SETATTR, io->ci_obj) == 0) { - struct ccc_io *cio = ccc_env_io(env); - - if (attr->ia_valid & ATTR_FILE) - /* populate the file descriptor for ftruncate to honor - * group lock - see LU-787 - */ - cio->cui_fd = cl_iattr2fd(inode, attr); - - result = cl_io_loop(env, io); - } else { - result = io->ci_result; - } - cl_io_fini(env, io); - if (unlikely(io->ci_need_restart)) - goto again; - /* HSM import case: file is released, cannot be restored - * no need to fail except if restore registration failed - * with -ENODATA - */ - if (result == -ENODATA && io->ci_restore_needed && - io->ci_result != -ENODATA) - result = 0; - cl_env_put(env, &refcheck); - return result; -} - -/***************************************************************************** - * - * Type conversions. - * - */ - -struct lu_device *ccc2lu_dev(struct ccc_device *vdv) -{ - return &vdv->cdv_cl.cd_lu_dev; -} - -struct ccc_device *lu2ccc_dev(const struct lu_device *d) -{ - return container_of0(d, struct ccc_device, cdv_cl.cd_lu_dev); -} - -struct ccc_device *cl2ccc_dev(const struct cl_device *d) -{ - return container_of0(d, struct ccc_device, cdv_cl); -} - -struct lu_object *ccc2lu(struct ccc_object *vob) -{ - return &vob->cob_cl.co_lu; -} - -struct ccc_object *lu2ccc(const struct lu_object *obj) -{ - return container_of0(obj, struct ccc_object, cob_cl.co_lu); -} - -struct ccc_object *cl2ccc(const struct cl_object *obj) -{ - return container_of0(obj, struct ccc_object, cob_cl); -} - -struct ccc_lock *cl2ccc_lock(const struct cl_lock_slice *slice) -{ - return container_of(slice, struct ccc_lock, clk_cl); -} - -struct ccc_io *cl2ccc_io(const struct lu_env *env, - const struct cl_io_slice *slice) -{ - struct ccc_io *cio; - - cio = container_of(slice, struct ccc_io, cui_cl); - LASSERT(cio == ccc_env_io(env)); - return cio; -} - -struct ccc_req *cl2ccc_req(const struct cl_req_slice *slice) -{ - return container_of0(slice, struct ccc_req, crq_cl); -} - -struct page *cl2vm_page(const struct cl_page_slice *slice) -{ - return cl2ccc_page(slice)->cpg_page; -} - -/***************************************************************************** - * - * Accessors. - * - */ -int ccc_object_invariant(const struct cl_object *obj) -{ - struct inode *inode = ccc_object_inode(obj); - struct cl_inode_info *lli = cl_i2info(inode); - - return (S_ISREG(cl_inode_mode(inode)) || - /* i_mode of unlinked inode is zeroed. */ - cl_inode_mode(inode) == 0) && lli->lli_clob == obj; -} - -struct inode *ccc_object_inode(const struct cl_object *obj) -{ - return cl2ccc(obj)->cob_inode; -} - -/** - * Initialize or update CLIO structures for regular files when new - * meta-data arrives from the server. - * - * \param inode regular file inode - * \param md new file metadata from MDS - * - allocates cl_object if necessary, - * - updated layout, if object was already here. - */ -int cl_file_inode_init(struct inode *inode, struct lustre_md *md) -{ - struct lu_env *env; - struct cl_inode_info *lli; - struct cl_object *clob; - struct lu_site *site; - struct lu_fid *fid; - struct cl_object_conf conf = { - .coc_inode = inode, - .u = { - .coc_md = md - } - }; - int result = 0; - int refcheck; - - LASSERT(md->body->valid & OBD_MD_FLID); - LASSERT(S_ISREG(cl_inode_mode(inode))); - - env = cl_env_get(&refcheck); - if (IS_ERR(env)) - return PTR_ERR(env); - - site = cl_i2sbi(inode)->ll_site; - lli = cl_i2info(inode); - fid = &lli->lli_fid; - LASSERT(fid_is_sane(fid)); - - if (!lli->lli_clob) { - /* clob is slave of inode, empty lli_clob means for new inode, - * there is no clob in cache with the given fid, so it is - * unnecessary to perform lookup-alloc-lookup-insert, just - * alloc and insert directly. - */ - LASSERT(inode->i_state & I_NEW); - conf.coc_lu.loc_flags = LOC_F_NEW; - clob = cl_object_find(env, lu2cl_dev(site->ls_top_dev), - fid, &conf); - if (!IS_ERR(clob)) { - /* - * No locking is necessary, as new inode is - * locked by I_NEW bit. - */ - lli->lli_clob = clob; - lli->lli_has_smd = lsm_has_objects(md->lsm); - lu_object_ref_add(&clob->co_lu, "inode", inode); - } else - result = PTR_ERR(clob); - } else { - result = cl_conf_set(env, lli->lli_clob, &conf); - } - - cl_env_put(env, &refcheck); - - if (result != 0) - CERROR("Failure to initialize cl object "DFID": %d\n", - PFID(fid), result); - return result; -} - -/** - * Wait for others drop their references of the object at first, then we drop - * the last one, which will lead to the object be destroyed immediately. - * Must be called after cl_object_kill() against this object. - * - * The reason we want to do this is: destroying top object will wait for sub - * objects being destroyed first, so we can't let bottom layer (e.g. from ASTs) - * to initiate top object destroying which may deadlock. See bz22520. - */ -static void cl_object_put_last(struct lu_env *env, struct cl_object *obj) -{ - struct lu_object_header *header = obj->co_lu.lo_header; - wait_queue_t waiter; - - if (unlikely(atomic_read(&header->loh_ref) != 1)) { - struct lu_site *site = obj->co_lu.lo_dev->ld_site; - struct lu_site_bkt_data *bkt; - - bkt = lu_site_bkt_from_fid(site, &header->loh_fid); - - init_waitqueue_entry(&waiter, current); - add_wait_queue(&bkt->lsb_marche_funebre, &waiter); - - while (1) { - set_current_state(TASK_UNINTERRUPTIBLE); - if (atomic_read(&header->loh_ref) == 1) - break; - schedule(); - } - - set_current_state(TASK_RUNNING); - remove_wait_queue(&bkt->lsb_marche_funebre, &waiter); - } - - cl_object_put(env, obj); -} - -void cl_inode_fini(struct inode *inode) -{ - struct lu_env *env; - struct cl_inode_info *lli = cl_i2info(inode); - struct cl_object *clob = lli->lli_clob; - int refcheck; - int emergency; - - if (clob) { - void *cookie; - - cookie = cl_env_reenter(); - env = cl_env_get(&refcheck); - emergency = IS_ERR(env); - if (emergency) { - mutex_lock(&ccc_inode_fini_guard); - LASSERT(ccc_inode_fini_env); - cl_env_implant(ccc_inode_fini_env, &refcheck); - env = ccc_inode_fini_env; - } - /* - * cl_object cache is a slave to inode cache (which, in turn - * is a slave to dentry cache), don't keep cl_object in memory - * when its master is evicted. - */ - cl_object_kill(env, clob); - lu_object_ref_del(&clob->co_lu, "inode", inode); - cl_object_put_last(env, clob); - lli->lli_clob = NULL; - if (emergency) { - cl_env_unplant(ccc_inode_fini_env, &refcheck); - mutex_unlock(&ccc_inode_fini_guard); - } else - cl_env_put(env, &refcheck); - cl_env_reexit(cookie); - } -} - -/** - * return IF_* type for given lu_dirent entry. - * IF_* flag shld be converted to particular OS file type in - * platform llite module. - */ -__u16 ll_dirent_type_get(struct lu_dirent *ent) -{ - __u16 type = 0; - struct luda_type *lt; - int len = 0; - - if (le32_to_cpu(ent->lde_attrs) & LUDA_TYPE) { - const unsigned align = sizeof(struct luda_type) - 1; - - len = le16_to_cpu(ent->lde_namelen); - len = (len + align) & ~align; - lt = (void *)ent->lde_name + len; - type = IFTODT(le16_to_cpu(lt->lt_type)); - } - return type; -} - -/** - * build inode number from passed @fid - */ -__u64 cl_fid_build_ino(const struct lu_fid *fid, int api32) -{ - if (BITS_PER_LONG == 32 || api32) - return fid_flatten32(fid); - else - return fid_flatten(fid); -} - -/** - * build inode generation from passed @fid. If our FID overflows the 32-bit - * inode number then return a non-zero generation to distinguish them. - */ -__u32 cl_fid_build_gen(const struct lu_fid *fid) -{ - __u32 gen; - - if (fid_is_igif(fid)) { - gen = lu_igif_gen(fid); - return gen; - } - - gen = fid_flatten(fid) >> 32; - return gen; -} - -/* lsm is unreliable after hsm implementation as layout can be changed at - * any time. This is only to support old, non-clio-ized interfaces. It will - * cause deadlock if clio operations are called with this extra layout refcount - * because in case the layout changed during the IO, ll_layout_refresh() will - * have to wait for the refcount to become zero to destroy the older layout. - * - * Notice that the lsm returned by this function may not be valid unless called - * inside layout lock - MDS_INODELOCK_LAYOUT. - */ -struct lov_stripe_md *ccc_inode_lsm_get(struct inode *inode) -{ - return lov_lsm_get(cl_i2info(inode)->lli_clob); -} - -inline void ccc_inode_lsm_put(struct inode *inode, struct lov_stripe_md *lsm) -{ - lov_lsm_put(cl_i2info(inode)->lli_clob, lsm); -} diff --git a/drivers/staging/lustre/lustre/ldlm/interval_tree.c b/drivers/staging/lustre/lustre/ldlm/interval_tree.c index 323060626fdf..f4a70ebddeaf 100644 --- a/drivers/staging/lustre/lustre/ldlm/interval_tree.c +++ b/drivers/staging/lustre/lustre/ldlm/interval_tree.c @@ -15,11 +15,7 @@ * * You should have received a copy of the GNU General Public License * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. + * http://www.gnu.org/licenses/gpl-2.0.html * * GPL HEADER END */ diff --git a/drivers/staging/lustre/lustre/ldlm/l_lock.c b/drivers/staging/lustre/lustre/ldlm/l_lock.c index e5d1344e817a..ea8840cb9056 100644 --- a/drivers/staging/lustre/lustre/ldlm/l_lock.c +++ b/drivers/staging/lustre/lustre/ldlm/l_lock.c @@ -15,11 +15,7 @@ * * You should have received a copy of the GNU General Public License * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. + * http://www.gnu.org/licenses/gpl-2.0.html * * GPL HEADER END */ @@ -54,7 +50,7 @@ struct ldlm_resource *lock_res_and_lock(struct ldlm_lock *lock) lock_res(lock->l_resource); - lock->l_flags |= LDLM_FL_RES_LOCKED; + ldlm_set_res_locked(lock); return lock->l_resource; } EXPORT_SYMBOL(lock_res_and_lock); @@ -65,7 +61,7 @@ EXPORT_SYMBOL(lock_res_and_lock); void unlock_res_and_lock(struct ldlm_lock *lock) { /* on server-side resource of lock doesn't change */ - lock->l_flags &= ~LDLM_FL_RES_LOCKED; + ldlm_clear_res_locked(lock); unlock_res(lock->l_resource); spin_unlock(&lock->l_lock); diff --git a/drivers/staging/lustre/lustre/ldlm/ldlm_extent.c b/drivers/staging/lustre/lustre/ldlm/ldlm_extent.c index a803e200f206..f5023d9b78f5 100644 --- a/drivers/staging/lustre/lustre/ldlm/ldlm_extent.c +++ b/drivers/staging/lustre/lustre/ldlm/ldlm_extent.c @@ -15,11 +15,7 @@ * * You should have received a copy of the GNU General Public License * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. + * http://www.gnu.org/licenses/gpl-2.0.html * * GPL HEADER END */ @@ -75,12 +71,12 @@ __u64 ldlm_extent_shift_kms(struct ldlm_lock *lock, __u64 old_kms) * just after we finish and take our lock into account in its * calculation of the kms */ - lock->l_flags |= LDLM_FL_KMS_IGNORE; + ldlm_set_kms_ignore(lock); list_for_each(tmp, &res->lr_granted) { lck = list_entry(tmp, struct ldlm_lock, l_res_link); - if (lck->l_flags & LDLM_FL_KMS_IGNORE) + if (ldlm_is_kms_ignore(lck)) continue; if (lck->l_policy_data.l_extent.end >= old_kms) diff --git a/drivers/staging/lustre/lustre/ldlm/ldlm_flock.c b/drivers/staging/lustre/lustre/ldlm/ldlm_flock.c index b88b78606aee..d6b61bc39135 100644 --- a/drivers/staging/lustre/lustre/ldlm/ldlm_flock.c +++ b/drivers/staging/lustre/lustre/ldlm/ldlm_flock.c @@ -15,11 +15,7 @@ * * You should have received a copy of the GNU General Public License * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. + * http://www.gnu.org/licenses/gpl-2.0.html * * GPL HEADER END */ @@ -101,8 +97,7 @@ ldlm_flock_destroy(struct ldlm_lock *lock, enum ldlm_mode mode, __u64 flags) LASSERT(hlist_unhashed(&lock->l_exp_flock_hash)); list_del_init(&lock->l_res_link); - if (flags == LDLM_FL_WAIT_NOREPROC && - !(lock->l_flags & LDLM_FL_FAILED)) { + if (flags == LDLM_FL_WAIT_NOREPROC && !ldlm_is_failed(lock)) { /* client side - set a flag to prevent sending a CANCEL */ lock->l_flags |= LDLM_FL_LOCAL_ONLY | LDLM_FL_CBPENDING; @@ -260,14 +255,13 @@ reprocess: * overflow and underflow. */ if ((new->l_policy_data.l_flock.start > - (lock->l_policy_data.l_flock.end + 1)) - && (lock->l_policy_data.l_flock.end != - OBD_OBJECT_EOF)) + (lock->l_policy_data.l_flock.end + 1)) && + (lock->l_policy_data.l_flock.end != OBD_OBJECT_EOF)) continue; if ((new->l_policy_data.l_flock.end < - (lock->l_policy_data.l_flock.start - 1)) - && (lock->l_policy_data.l_flock.start != 0)) + (lock->l_policy_data.l_flock.start - 1)) && + (lock->l_policy_data.l_flock.start != 0)) break; if (new->l_policy_data.l_flock.start < @@ -436,7 +430,7 @@ ldlm_flock_interrupted_wait(void *data) lock_res_and_lock(lock); /* client side - set flag to prevent lock from being put on LRU list */ - lock->l_flags |= LDLM_FL_CBPENDING; + ldlm_set_cbpending(lock); unlock_res_and_lock(lock); } @@ -520,30 +514,29 @@ ldlm_flock_completion_ast(struct ldlm_lock *lock, __u64 flags, void *data) granted: OBD_FAIL_TIMEOUT(OBD_FAIL_LDLM_CP_CB_WAIT, 10); - if (lock->l_flags & LDLM_FL_DESTROYED) { - LDLM_DEBUG(lock, "client-side enqueue waking up: destroyed"); - return 0; - } - - if (lock->l_flags & LDLM_FL_FAILED) { + if (ldlm_is_failed(lock)) { LDLM_DEBUG(lock, "client-side enqueue waking up: failed"); return -EIO; } - if (rc) { - LDLM_DEBUG(lock, "client-side enqueue waking up: failed (%d)", - rc); - return rc; - } - LDLM_DEBUG(lock, "client-side enqueue granted"); lock_res_and_lock(lock); + /* + * Protect against race where lock could have been just destroyed + * due to overlap in ldlm_process_flock_lock(). + */ + if (ldlm_is_destroyed(lock)) { + unlock_res_and_lock(lock); + LDLM_DEBUG(lock, "client-side enqueue waking up: destroyed"); + return 0; + } + /* ldlm_lock_enqueue() has already placed lock on the granted list. */ list_del_init(&lock->l_res_link); - if (lock->l_flags & LDLM_FL_FLOCK_DEADLOCK) { + if (ldlm_is_flock_deadlock(lock)) { LDLM_DEBUG(lock, "client-side enqueue deadlock received"); rc = -EDEADLK; } else if (flags & LDLM_FL_TEST_LOCK) { diff --git a/drivers/staging/lustre/lustre/ldlm/ldlm_inodebits.c b/drivers/staging/lustre/lustre/ldlm/ldlm_inodebits.c index b1bed1e17d32..79f4e6fa193e 100644 --- a/drivers/staging/lustre/lustre/ldlm/ldlm_inodebits.c +++ b/drivers/staging/lustre/lustre/ldlm/ldlm_inodebits.c @@ -15,11 +15,7 @@ * * You should have received a copy of the GNU General Public License * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. + * http://www.gnu.org/licenses/gpl-2.0.html * * GPL HEADER END */ diff --git a/drivers/staging/lustre/lustre/ldlm/ldlm_internal.h b/drivers/staging/lustre/lustre/ldlm/ldlm_internal.h index e21373e7306f..e4cf65d2d3b1 100644 --- a/drivers/staging/lustre/lustre/ldlm/ldlm_internal.h +++ b/drivers/staging/lustre/lustre/ldlm/ldlm_internal.h @@ -15,11 +15,7 @@ * * You should have received a copy of the GNU General Public License * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. + * http://www.gnu.org/licenses/gpl-2.0.html * * GPL HEADER END */ @@ -95,9 +91,10 @@ enum { LDLM_CANCEL_PASSED = 1 << 1, /* Cancel passed number of locks. */ LDLM_CANCEL_SHRINK = 1 << 2, /* Cancel locks from shrinker. */ LDLM_CANCEL_LRUR = 1 << 3, /* Cancel locks from lru resize. */ - LDLM_CANCEL_NO_WAIT = 1 << 4 /* Cancel locks w/o blocking (neither - * sending nor waiting for any rpcs) - */ + LDLM_CANCEL_NO_WAIT = 1 << 4, /* Cancel locks w/o blocking (neither + * sending nor waiting for any rpcs) + */ + LDLM_CANCEL_LRUR_NO_WAIT = 1 << 5, /* LRUR + NO_WAIT */ }; int ldlm_cancel_lru(struct ldlm_namespace *ns, int nr, @@ -145,7 +142,8 @@ void ldlm_lock_decref_internal(struct ldlm_lock *, __u32 mode); void ldlm_lock_decref_internal_nolock(struct ldlm_lock *, __u32 mode); int ldlm_run_ast_work(struct ldlm_namespace *ns, struct list_head *rpc_list, enum ldlm_desc_ast_t ast_type); -int ldlm_lock_remove_from_lru(struct ldlm_lock *lock); +int ldlm_lock_remove_from_lru_check(struct ldlm_lock *lock, time_t last_use); +#define ldlm_lock_remove_from_lru(lock) ldlm_lock_remove_from_lru_check(lock, 0) int ldlm_lock_remove_from_lru_nolock(struct ldlm_lock *lock); void ldlm_lock_destroy_nolock(struct ldlm_lock *lock); @@ -216,8 +214,6 @@ enum ldlm_policy_res { LDLM_POLICY_SKIP_LOCK }; -typedef enum ldlm_policy_res ldlm_policy_res_t; - #define LDLM_POOL_SYSFS_PRINT_int(v) sprintf(buf, "%d\n", v) #define LDLM_POOL_SYSFS_SET_int(a, b) { a = b; } #define LDLM_POOL_SYSFS_PRINT_u64(v) sprintf(buf, "%lld\n", v) @@ -305,9 +301,10 @@ static inline int is_granted_or_cancelled(struct ldlm_lock *lock) int ret = 0; lock_res_and_lock(lock); - if (((lock->l_req_mode == lock->l_granted_mode) && - !(lock->l_flags & LDLM_FL_CP_REQD)) || - (lock->l_flags & (LDLM_FL_FAILED | LDLM_FL_CANCEL))) + if ((lock->l_req_mode == lock->l_granted_mode) && + !ldlm_is_cp_reqd(lock)) + ret = 1; + else if (ldlm_is_failed(lock) || ldlm_is_cancel(lock)) ret = 1; unlock_res_and_lock(lock); diff --git a/drivers/staging/lustre/lustre/ldlm/ldlm_lib.c b/drivers/staging/lustre/lustre/ldlm/ldlm_lib.c index 7dd7df59aa1f..7c832aae7d5e 100644 --- a/drivers/staging/lustre/lustre/ldlm/ldlm_lib.c +++ b/drivers/staging/lustre/lustre/ldlm/ldlm_lib.c @@ -15,11 +15,7 @@ * * You should have received a copy of the GNU General Public License * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. + * http://www.gnu.org/licenses/gpl-2.0.html * * GPL HEADER END */ @@ -314,7 +310,7 @@ int client_obd_setup(struct obd_device *obddev, struct lustre_cfg *lcfg) INIT_LIST_HEAD(&cli->cl_loi_hp_ready_list); INIT_LIST_HEAD(&cli->cl_loi_write_list); INIT_LIST_HEAD(&cli->cl_loi_read_list); - client_obd_list_lock_init(&cli->cl_loi_list_lock); + spin_lock_init(&cli->cl_loi_list_lock); atomic_set(&cli->cl_pending_w_pages, 0); atomic_set(&cli->cl_pending_r_pages, 0); cli->cl_r_in_flight = 0; @@ -333,7 +329,8 @@ int client_obd_setup(struct obd_device *obddev, struct lustre_cfg *lcfg) atomic_set(&cli->cl_lru_busy, 0); atomic_set(&cli->cl_lru_in_list, 0); INIT_LIST_HEAD(&cli->cl_lru_list); - client_obd_list_lock_init(&cli->cl_lru_list_lock); + spin_lock_init(&cli->cl_lru_list_lock); + atomic_set(&cli->cl_unstable_count, 0); init_waitqueue_head(&cli->cl_destroy_waitq); atomic_set(&cli->cl_destroy_in_flight, 0); @@ -344,7 +341,8 @@ int client_obd_setup(struct obd_device *obddev, struct lustre_cfg *lcfg) * Set cl_chksum* to CRC32 for now to avoid returning screwed info * through procfs. */ - cli->cl_cksum_type = cli->cl_supp_cksum_types = OBD_CKSUM_CRC32; + cli->cl_cksum_type = OBD_CKSUM_CRC32; + cli->cl_supp_cksum_types = OBD_CKSUM_CRC32; atomic_set(&cli->cl_resends, OSC_DEFAULT_RESENDS); /* This value may be reduced at connect time in @@ -355,6 +353,12 @@ int client_obd_setup(struct obd_device *obddev, struct lustre_cfg *lcfg) cli->cl_max_pages_per_rpc = min_t(int, PTLRPC_MAX_BRW_PAGES, LNET_MTU >> PAGE_SHIFT); + /* + * set cl_chunkbits default value to PAGE_CACHE_SHIFT, + * it will be updated at OSC connection time. + */ + cli->cl_chunkbits = PAGE_SHIFT; + if (!strcmp(name, LUSTRE_MDC_NAME)) { cli->cl_max_rpcs_in_flight = MDC_MAX_RIF_DEFAULT; } else if (totalram_pages >> (20 - PAGE_SHIFT) <= 128 /* MB */) { @@ -429,7 +433,6 @@ err_ldlm: ldlm_put_ref(); err: return rc; - } EXPORT_SYMBOL(client_obd_setup); @@ -438,6 +441,7 @@ int client_obd_cleanup(struct obd_device *obddev) ldlm_namespace_free_post(obddev->obd_namespace); obddev->obd_namespace = NULL; + obd_cleanup_client_import(obddev); LASSERT(!obddev->u.cli.cl_import); ldlm_put_ref(); @@ -748,6 +752,7 @@ int ldlm_error2errno(enum ldlm_error error) switch (error) { case ELDLM_OK: + case ELDLM_LOCK_MATCHED: result = 0; break; case ELDLM_LOCK_CHANGED: diff --git a/drivers/staging/lustre/lustre/ldlm/ldlm_lock.c b/drivers/staging/lustre/lustre/ldlm/ldlm_lock.c index ecd65a7a3dc9..a5993f745ebe 100644 --- a/drivers/staging/lustre/lustre/ldlm/ldlm_lock.c +++ b/drivers/staging/lustre/lustre/ldlm/ldlm_lock.c @@ -15,11 +15,7 @@ * * You should have received a copy of the GNU General Public License * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. + * http://www.gnu.org/licenses/gpl-2.0.html * * GPL HEADER END */ @@ -185,7 +181,7 @@ void ldlm_lock_put(struct ldlm_lock *lock) "final lock_put on destroyed lock, freeing it."); res = lock->l_resource; - LASSERT(lock->l_flags & LDLM_FL_DESTROYED); + LASSERT(ldlm_is_destroyed(lock)); LASSERT(list_empty(&lock->l_res_link)); LASSERT(list_empty(&lock->l_pending_chain)); @@ -229,15 +225,25 @@ int ldlm_lock_remove_from_lru_nolock(struct ldlm_lock *lock) /** * Removes LDLM lock \a lock from LRU. Obtains the LRU lock first. + * + * If \a last_use is non-zero, it will remove the lock from LRU only if + * it matches lock's l_last_used. + * + * \retval 0 if \a last_use is set, the lock is not in LRU list or \a last_use + * doesn't match lock's l_last_used; + * otherwise, the lock hasn't been in the LRU list. + * \retval 1 the lock was in LRU list and removed. */ -int ldlm_lock_remove_from_lru(struct ldlm_lock *lock) +int ldlm_lock_remove_from_lru_check(struct ldlm_lock *lock, time_t last_use) { struct ldlm_namespace *ns = ldlm_lock_to_ns(lock); - int rc; + int rc = 0; spin_lock(&ns->ns_lock); - rc = ldlm_lock_remove_from_lru_nolock(lock); + if (last_use == 0 || last_use == lock->l_last_used) + rc = ldlm_lock_remove_from_lru_nolock(lock); spin_unlock(&ns->ns_lock); + return rc; } @@ -252,8 +258,7 @@ static void ldlm_lock_add_to_lru_nolock(struct ldlm_lock *lock) LASSERT(list_empty(&lock->l_lru)); LASSERT(lock->l_resource->lr_type != LDLM_FLOCK); list_add_tail(&lock->l_lru, &ns->ns_unused_list); - if (lock->l_flags & LDLM_FL_SKIPPED) - lock->l_flags &= ~LDLM_FL_SKIPPED; + ldlm_clear_skipped(lock); LASSERT(ns->ns_nr_unused >= 0); ns->ns_nr_unused++; } @@ -318,11 +323,11 @@ static int ldlm_lock_destroy_internal(struct ldlm_lock *lock) LBUG(); } - if (lock->l_flags & LDLM_FL_DESTROYED) { + if (ldlm_is_destroyed(lock)) { LASSERT(list_empty(&lock->l_lru)); return 0; } - lock->l_flags |= LDLM_FL_DESTROYED; + ldlm_set_destroyed(lock); if (lock->l_export && lock->l_export->exp_lock_hash) { /* NB: it's safe to call cfs_hash_del() even lock isn't @@ -544,7 +549,7 @@ struct ldlm_lock *__ldlm_handle2lock(const struct lustre_handle *handle, /* It's unlikely but possible that someone marked the lock as * destroyed after we did handle2object on it */ - if (flags == 0 && ((lock->l_flags & LDLM_FL_DESTROYED) == 0)) { + if (flags == 0 && !ldlm_is_destroyed(lock)) { lu_ref_add(&lock->l_reference, "handle", current); return lock; } @@ -554,21 +559,22 @@ struct ldlm_lock *__ldlm_handle2lock(const struct lustre_handle *handle, LASSERT(lock->l_resource); lu_ref_add_atomic(&lock->l_reference, "handle", current); - if (unlikely(lock->l_flags & LDLM_FL_DESTROYED)) { + if (unlikely(ldlm_is_destroyed(lock))) { unlock_res_and_lock(lock); CDEBUG(D_INFO, "lock already destroyed: lock %p\n", lock); LDLM_LOCK_PUT(lock); return NULL; } - if (flags && (lock->l_flags & flags)) { - unlock_res_and_lock(lock); - LDLM_LOCK_PUT(lock); - return NULL; - } + if (flags) { + if (lock->l_flags & flags) { + unlock_res_and_lock(lock); + LDLM_LOCK_PUT(lock); + return NULL; + } - if (flags) lock->l_flags |= flags; + } unlock_res_and_lock(lock); return lock; @@ -599,14 +605,14 @@ EXPORT_SYMBOL(ldlm_lock2desc); static void ldlm_add_bl_work_item(struct ldlm_lock *lock, struct ldlm_lock *new, struct list_head *work_list) { - if ((lock->l_flags & LDLM_FL_AST_SENT) == 0) { + if (!ldlm_is_ast_sent(lock)) { LDLM_DEBUG(lock, "lock incompatible; sending blocking AST."); - lock->l_flags |= LDLM_FL_AST_SENT; + ldlm_set_ast_sent(lock); /* If the enqueuing client said so, tell the AST recipient to * discard dirty data, rather than writing back. */ - if (new->l_flags & LDLM_FL_AST_DISCARD_DATA) - lock->l_flags |= LDLM_FL_DISCARD_DATA; + if (ldlm_is_ast_discard_data(new)) + ldlm_set_discard_data(lock); LASSERT(list_empty(&lock->l_bl_ast)); list_add(&lock->l_bl_ast, work_list); LDLM_LOCK_GET(lock); @@ -621,8 +627,8 @@ static void ldlm_add_bl_work_item(struct ldlm_lock *lock, struct ldlm_lock *new, static void ldlm_add_cp_work_item(struct ldlm_lock *lock, struct list_head *work_list) { - if ((lock->l_flags & LDLM_FL_CP_REQD) == 0) { - lock->l_flags |= LDLM_FL_CP_REQD; + if (!ldlm_is_cp_reqd(lock)) { + ldlm_set_cp_reqd(lock); LDLM_DEBUG(lock, "lock granted; sending completion AST."); LASSERT(list_empty(&lock->l_cp_ast)); list_add(&lock->l_cp_ast, work_list); @@ -652,12 +658,12 @@ static void ldlm_add_ast_work_item(struct ldlm_lock *lock, * r/w reference type is determined by \a mode * Calls ldlm_lock_addref_internal. */ -void ldlm_lock_addref(struct lustre_handle *lockh, __u32 mode) +void ldlm_lock_addref(const struct lustre_handle *lockh, __u32 mode) { struct ldlm_lock *lock; lock = ldlm_handle2lock(lockh); - LASSERT(lock); + LASSERTF(lock, "Non-existing lock: %llx\n", lockh->cookie); ldlm_lock_addref_internal(lock, mode); LDLM_LOCK_PUT(lock); } @@ -694,7 +700,7 @@ void ldlm_lock_addref_internal_nolock(struct ldlm_lock *lock, __u32 mode) * * \retval -EAGAIN lock is being canceled. */ -int ldlm_lock_addref_try(struct lustre_handle *lockh, __u32 mode) +int ldlm_lock_addref_try(const struct lustre_handle *lockh, __u32 mode) { struct ldlm_lock *lock; int result; @@ -704,7 +710,7 @@ int ldlm_lock_addref_try(struct lustre_handle *lockh, __u32 mode) if (lock) { lock_res_and_lock(lock); if (lock->l_readers != 0 || lock->l_writers != 0 || - !(lock->l_flags & LDLM_FL_CBPENDING)) { + !ldlm_is_cbpending(lock)) { ldlm_lock_addref_internal_nolock(lock, mode); result = 0; } @@ -770,17 +776,17 @@ void ldlm_lock_decref_internal(struct ldlm_lock *lock, __u32 mode) ldlm_lock_decref_internal_nolock(lock, mode); - if (lock->l_flags & LDLM_FL_LOCAL && + if (ldlm_is_local(lock) && !lock->l_readers && !lock->l_writers) { /* If this is a local lock on a server namespace and this was * the last reference, cancel the lock. */ CDEBUG(D_INFO, "forcing cancel of local lock\n"); - lock->l_flags |= LDLM_FL_CBPENDING; + ldlm_set_cbpending(lock); } if (!lock->l_readers && !lock->l_writers && - (lock->l_flags & LDLM_FL_CBPENDING)) { + ldlm_is_cbpending(lock)) { /* If we received a blocked AST and this was the last reference, * run the callback. */ @@ -791,16 +797,14 @@ void ldlm_lock_decref_internal(struct ldlm_lock *lock, __u32 mode) ldlm_lock_remove_from_lru(lock); unlock_res_and_lock(lock); - if (lock->l_flags & LDLM_FL_FAIL_LOC) + if (ldlm_is_fail_loc(lock)) OBD_RACE(OBD_FAIL_LDLM_CP_BL_RACE); - if ((lock->l_flags & LDLM_FL_ATOMIC_CB) || + if (ldlm_is_atomic_cb(lock) || ldlm_bl_to_thread_lock(ns, NULL, lock) != 0) ldlm_handle_bl_callback(ns, NULL, lock); } else if (!lock->l_readers && !lock->l_writers && - !(lock->l_flags & LDLM_FL_NO_LRU) && - !(lock->l_flags & LDLM_FL_BL_AST)) { - + !ldlm_is_no_lru(lock) && !ldlm_is_bl_ast(lock)) { LDLM_DEBUG(lock, "add lock into lru list"); /* If this is a client-side namespace and this was the last @@ -809,7 +813,7 @@ void ldlm_lock_decref_internal(struct ldlm_lock *lock, __u32 mode) ldlm_lock_add_to_lru(lock); unlock_res_and_lock(lock); - if (lock->l_flags & LDLM_FL_FAIL_LOC) + if (ldlm_is_fail_loc(lock)) OBD_RACE(OBD_FAIL_LDLM_CP_BL_RACE); /* Call ldlm_cancel_lru() only if EARLY_CANCEL and LRU RESIZE @@ -828,7 +832,7 @@ void ldlm_lock_decref_internal(struct ldlm_lock *lock, __u32 mode) /** * Decrease reader/writer refcount for LDLM lock with handle \a lockh */ -void ldlm_lock_decref(struct lustre_handle *lockh, __u32 mode) +void ldlm_lock_decref(const struct lustre_handle *lockh, __u32 mode) { struct ldlm_lock *lock = __ldlm_handle2lock(lockh, 0); @@ -845,7 +849,7 @@ EXPORT_SYMBOL(ldlm_lock_decref); * * Typical usage is for GROUP locks which we cannot allow to be cached. */ -void ldlm_lock_decref_and_cancel(struct lustre_handle *lockh, __u32 mode) +void ldlm_lock_decref_and_cancel(const struct lustre_handle *lockh, __u32 mode) { struct ldlm_lock *lock = __ldlm_handle2lock(lockh, 0); @@ -853,7 +857,7 @@ void ldlm_lock_decref_and_cancel(struct lustre_handle *lockh, __u32 mode) LDLM_DEBUG(lock, "ldlm_lock_decref(%s)", ldlm_lockname[mode]); lock_res_and_lock(lock); - lock->l_flags |= LDLM_FL_CBPENDING; + ldlm_set_cbpending(lock); unlock_res_and_lock(lock); ldlm_lock_decref_internal(lock, mode); LDLM_LOCK_PUT(lock); @@ -971,7 +975,7 @@ static void ldlm_granted_list_add_lock(struct ldlm_lock *lock, ldlm_resource_dump(D_INFO, res); LDLM_DEBUG(lock, "About to add lock:"); - if (lock->l_flags & LDLM_FL_DESTROYED) { + if (ldlm_is_destroyed(lock)) { CDEBUG(D_OTHER, "Lock destroyed, not adding to resource\n"); return; } @@ -1073,10 +1077,9 @@ static struct ldlm_lock *search_queue(struct list_head *queue, * whose parents already hold a lock so forward progress * can still happen. */ - if (lock->l_flags & LDLM_FL_CBPENDING && - !(flags & LDLM_FL_CBPENDING)) + if (ldlm_is_cbpending(lock) && !(flags & LDLM_FL_CBPENDING)) continue; - if (!unref && lock->l_flags & LDLM_FL_CBPENDING && + if (!unref && ldlm_is_cbpending(lock) && lock->l_readers == 0 && lock->l_writers == 0) continue; @@ -1092,6 +1095,7 @@ static struct ldlm_lock *search_queue(struct list_head *queue, if (unlikely(match == LCK_GROUP) && lock->l_resource->lr_type == LDLM_EXTENT && + policy->l_extent.gid != LDLM_GID_ANY && lock->l_policy_data.l_extent.gid != policy->l_extent.gid) continue; @@ -1104,11 +1108,10 @@ static struct ldlm_lock *search_queue(struct list_head *queue, policy->l_inodebits.bits)) continue; - if (!unref && (lock->l_flags & LDLM_FL_GONE_MASK)) + if (!unref && LDLM_HAVE_MASK(lock, GONE)) continue; - if ((flags & LDLM_FL_LOCAL_ONLY) && - !(lock->l_flags & LDLM_FL_LOCAL)) + if ((flags & LDLM_FL_LOCAL_ONLY) && !ldlm_is_local(lock)) continue; if (flags & LDLM_FL_TEST_LOCK) { @@ -1142,7 +1145,7 @@ EXPORT_SYMBOL(ldlm_lock_fail_match_locked); */ void ldlm_lock_allow_match_locked(struct ldlm_lock *lock) { - lock->l_flags |= LDLM_FL_LVB_READY; + ldlm_set_lvb_ready(lock); wake_up_all(&lock->l_waitq); } EXPORT_SYMBOL(ldlm_lock_allow_match_locked); @@ -1243,8 +1246,7 @@ enum ldlm_mode ldlm_lock_match(struct ldlm_namespace *ns, __u64 flags, if (lock) { ldlm_lock2handle(lock, lockh); - if ((flags & LDLM_FL_LVB_READY) && - (!(lock->l_flags & LDLM_FL_LVB_READY))) { + if ((flags & LDLM_FL_LVB_READY) && !ldlm_is_lvb_ready(lock)) { __u64 wait_flags = LDLM_FL_LVB_READY | LDLM_FL_DESTROYED | LDLM_FL_FAIL_NOTIFIED; struct l_wait_info lwi; @@ -1271,7 +1273,7 @@ enum ldlm_mode ldlm_lock_match(struct ldlm_namespace *ns, __u64 flags, l_wait_event(lock->l_waitq, lock->l_flags & wait_flags, &lwi); - if (!(lock->l_flags & LDLM_FL_LVB_READY)) { + if (!ldlm_is_lvb_ready(lock)) { if (flags & LDLM_FL_TEST_LOCK) LDLM_LOCK_RELEASE(lock); else @@ -1316,7 +1318,7 @@ enum ldlm_mode ldlm_lock_match(struct ldlm_namespace *ns, __u64 flags, } EXPORT_SYMBOL(ldlm_lock_match); -enum ldlm_mode ldlm_revalidate_lock_handle(struct lustre_handle *lockh, +enum ldlm_mode ldlm_revalidate_lock_handle(const struct lustre_handle *lockh, __u64 *bits) { struct ldlm_lock *lock; @@ -1325,10 +1327,10 @@ enum ldlm_mode ldlm_revalidate_lock_handle(struct lustre_handle *lockh, lock = ldlm_handle2lock(lockh); if (lock) { lock_res_and_lock(lock); - if (lock->l_flags & LDLM_FL_GONE_MASK) + if (LDLM_HAVE_MASK(lock, GONE)) goto out; - if (lock->l_flags & LDLM_FL_CBPENDING && + if (ldlm_is_cbpending(lock) && lock->l_readers == 0 && lock->l_writers == 0) goto out; @@ -1438,7 +1440,7 @@ int ldlm_fill_lvb(struct ldlm_lock *lock, struct req_capsule *pill, memcpy(data, lvb, size); break; default: - LDLM_ERROR(lock, "Unknown LVB type: %d\n", lock->l_lvb_type); + LDLM_ERROR(lock, "Unknown LVB type: %d", lock->l_lvb_type); dump_stack(); return -EINVAL; } @@ -1542,7 +1544,8 @@ enum ldlm_error ldlm_lock_enqueue(struct ldlm_namespace *ns, /* Some flags from the enqueue want to make it into the AST, via the * lock's l_flags. */ - lock->l_flags |= *flags & LDLM_FL_AST_DISCARD_DATA; + if (*flags & LDLM_FL_AST_DISCARD_DATA) + ldlm_set_ast_discard_data(lock); /* * This distinction between local lock trees is very important; a client @@ -1581,7 +1584,7 @@ ldlm_work_bl_ast_lock(struct ptlrpc_request_set *rqset, void *opaq) lock_res_and_lock(lock); list_del_init(&lock->l_bl_ast); - LASSERT(lock->l_flags & LDLM_FL_AST_SENT); + LASSERT(ldlm_is_ast_sent(lock)); LASSERT(lock->l_bl_ast_run == 0); LASSERT(lock->l_blocking_lock); lock->l_bl_ast_run++; @@ -1628,12 +1631,12 @@ ldlm_work_cp_ast_lock(struct ptlrpc_request_set *rqset, void *opaq) /* nobody should touch l_cp_ast */ lock_res_and_lock(lock); list_del_init(&lock->l_cp_ast); - LASSERT(lock->l_flags & LDLM_FL_CP_REQD); + LASSERT(ldlm_is_cp_reqd(lock)); /* save l_completion_ast since it can be changed by * mds_intent_policy(), see bug 14225 */ completion_callback = lock->l_completion_ast; - lock->l_flags &= ~LDLM_FL_CP_REQD; + ldlm_clear_cp_reqd(lock); unlock_res_and_lock(lock); if (completion_callback) @@ -1778,8 +1781,8 @@ out: void ldlm_cancel_callback(struct ldlm_lock *lock) { check_res_locked(lock->l_resource); - if (!(lock->l_flags & LDLM_FL_CANCEL)) { - lock->l_flags |= LDLM_FL_CANCEL; + if (!ldlm_is_cancel(lock)) { + ldlm_set_cancel(lock); if (lock->l_blocking_ast) { unlock_res_and_lock(lock); lock->l_blocking_ast(lock, NULL, lock->l_ast_data, @@ -1789,7 +1792,7 @@ void ldlm_cancel_callback(struct ldlm_lock *lock) LDLM_DEBUG(lock, "no blocking ast"); } } - lock->l_flags |= LDLM_FL_BL_DONE; + ldlm_set_bl_done(lock); } /** @@ -1846,7 +1849,7 @@ EXPORT_SYMBOL(ldlm_lock_cancel); /** * Set opaque data into the lock that only makes sense to upper layer. */ -int ldlm_lock_set_data(struct lustre_handle *lockh, void *data) +int ldlm_lock_set_data(const struct lustre_handle *lockh, void *data) { struct ldlm_lock *lock = ldlm_handle2lock(lockh); int rc = -EINVAL; @@ -1872,7 +1875,7 @@ struct export_cl_data { * * Used when printing all locks on a resource for debug purposes. */ -void ldlm_lock_dump_handle(int level, struct lustre_handle *lockh) +void ldlm_lock_dump_handle(int level, const struct lustre_handle *lockh) { struct ldlm_lock *lock; diff --git a/drivers/staging/lustre/lustre/ldlm/ldlm_lockd.c b/drivers/staging/lustre/lustre/ldlm/ldlm_lockd.c index ebe9042adb25..821939ff2e6b 100644 --- a/drivers/staging/lustre/lustre/ldlm/ldlm_lockd.c +++ b/drivers/staging/lustre/lustre/ldlm/ldlm_lockd.c @@ -15,11 +15,7 @@ * * You should have received a copy of the GNU General Public License * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. + * http://www.gnu.org/licenses/gpl-2.0.html * * GPL HEADER END */ @@ -124,10 +120,10 @@ void ldlm_handle_bl_callback(struct ldlm_namespace *ns, LDLM_DEBUG(lock, "client blocking AST callback handler"); lock_res_and_lock(lock); - lock->l_flags |= LDLM_FL_CBPENDING; + ldlm_set_cbpending(lock); - if (lock->l_flags & LDLM_FL_CANCEL_ON_BLOCK) - lock->l_flags |= LDLM_FL_CANCEL; + if (ldlm_is_cancel_on_block(lock)) + ldlm_set_cancel(lock); do_ast = !lock->l_readers && !lock->l_writers; unlock_res_and_lock(lock); @@ -172,7 +168,7 @@ static void ldlm_handle_cp_callback(struct ptlrpc_request *req, set_current_state(TASK_INTERRUPTIBLE); schedule_timeout(to); if (lock->l_granted_mode == lock->l_req_mode || - lock->l_flags & LDLM_FL_DESTROYED) + ldlm_is_destroyed(lock)) break; } } @@ -215,7 +211,7 @@ static void ldlm_handle_cp_callback(struct ptlrpc_request *req, } lock_res_and_lock(lock); - if ((lock->l_flags & LDLM_FL_DESTROYED) || + if (ldlm_is_destroyed(lock) || lock->l_granted_mode == lock->l_req_mode) { /* bug 11300: the lock has already been granted */ unlock_res_and_lock(lock); @@ -291,7 +287,7 @@ static void ldlm_handle_cp_callback(struct ptlrpc_request *req, out: if (rc < 0) { lock_res_and_lock(lock); - lock->l_flags |= LDLM_FL_FAILED; + ldlm_set_failed(lock); unlock_res_and_lock(lock); wake_up(&lock->l_waitq); } @@ -360,8 +356,7 @@ static int __ldlm_bl_to_thread(struct ldlm_bl_work_item *blwi, struct ldlm_bl_pool *blp = ldlm_state->ldlm_bl_pool; spin_lock(&blp->blp_lock); - if (blwi->blwi_lock && - blwi->blwi_lock->l_flags & LDLM_FL_DISCARD_DATA) { + if (blwi->blwi_lock && ldlm_is_discard_data(blwi->blwi_lock)) { /* add LDLM_FL_DISCARD_DATA requests to the priority list */ list_add_tail(&blwi->blwi_entry, &blp->blp_prio_list); } else { @@ -504,7 +499,7 @@ static int ldlm_handle_setinfo(struct ptlrpc_request *req) static inline void ldlm_callback_errmsg(struct ptlrpc_request *req, const char *msg, int rc, - struct lustre_handle *handle) + const struct lustre_handle *handle) { DEBUG_REQ((req->rq_no_reply || rc) ? D_WARNING : D_DLMTRACE, req, "%s: [nid %s] [rc %d] [lock %#llx]", @@ -626,24 +621,24 @@ static int ldlm_callback_handler(struct ptlrpc_request *req) return 0; } - if ((lock->l_flags & LDLM_FL_FAIL_LOC) && + if (ldlm_is_fail_loc(lock) && lustre_msg_get_opc(req->rq_reqmsg) == LDLM_BL_CALLBACK) OBD_RACE(OBD_FAIL_LDLM_CP_BL_RACE); /* Copy hints/flags (e.g. LDLM_FL_DISCARD_DATA) from AST. */ lock_res_and_lock(lock); lock->l_flags |= ldlm_flags_from_wire(dlm_req->lock_flags & - LDLM_AST_FLAGS); + LDLM_FL_AST_MASK); if (lustre_msg_get_opc(req->rq_reqmsg) == LDLM_BL_CALLBACK) { /* If somebody cancels lock and cache is already dropped, * or lock is failed before cp_ast received on client, * we can tell the server we have no lock. Otherwise, we * should send cancel after dropping the cache. */ - if (((lock->l_flags & LDLM_FL_CANCELING) && - (lock->l_flags & LDLM_FL_BL_DONE)) || - (lock->l_flags & LDLM_FL_FAILED)) { - LDLM_DEBUG(lock, "callback on lock %#llx - lock disappeared\n", + if ((ldlm_is_canceling(lock) && ldlm_is_bl_done(lock)) || + ldlm_is_failed(lock)) { + LDLM_DEBUG(lock, + "callback on lock %#llx - lock disappeared", dlm_req->lock_handle[0].cookie); unlock_res_and_lock(lock); LDLM_LOCK_RELEASE(lock); @@ -656,7 +651,7 @@ static int ldlm_callback_handler(struct ptlrpc_request *req) * Let ldlm_cancel_lru() be fast. */ ldlm_lock_remove_from_lru(lock); - lock->l_flags |= LDLM_FL_BL_AST; + ldlm_set_bl_ast(lock); } unlock_res_and_lock(lock); @@ -674,7 +669,7 @@ static int ldlm_callback_handler(struct ptlrpc_request *req) case LDLM_BL_CALLBACK: CDEBUG(D_INODE, "blocking ast\n"); req_capsule_extend(&req->rq_pill, &RQF_LDLM_BL_CALLBACK); - if (!(lock->l_flags & LDLM_FL_CANCEL_ON_BLOCK)) { + if (!ldlm_is_cancel_on_block(lock)) { rc = ldlm_callback_reply(req, 0); if (req->rq_no_reply || rc) ldlm_callback_errmsg(req, "Normal process", rc, @@ -1013,9 +1008,11 @@ static int ldlm_setup(void) blp->blp_min_threads = LDLM_NTHRS_INIT; blp->blp_max_threads = LDLM_NTHRS_MAX; } else { - blp->blp_min_threads = blp->blp_max_threads = - min_t(int, LDLM_NTHRS_MAX, max_t(int, LDLM_NTHRS_INIT, - ldlm_num_threads)); + blp->blp_min_threads = min_t(int, LDLM_NTHRS_MAX, + max_t(int, LDLM_NTHRS_INIT, + ldlm_num_threads)); + + blp->blp_max_threads = blp->blp_min_threads; } for (i = 0; i < blp->blp_min_threads; i++) { diff --git a/drivers/staging/lustre/lustre/ldlm/ldlm_plain.c b/drivers/staging/lustre/lustre/ldlm/ldlm_plain.c index 0c1965ddabb9..0aed39c46154 100644 --- a/drivers/staging/lustre/lustre/ldlm/ldlm_plain.c +++ b/drivers/staging/lustre/lustre/ldlm/ldlm_plain.c @@ -15,11 +15,7 @@ * * You should have received a copy of the GNU General Public License * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. + * http://www.gnu.org/licenses/gpl-2.0.html * * GPL HEADER END */ diff --git a/drivers/staging/lustre/lustre/ldlm/ldlm_pool.c b/drivers/staging/lustre/lustre/ldlm/ldlm_pool.c index b913ba9cf97c..657ed4012776 100644 --- a/drivers/staging/lustre/lustre/ldlm/ldlm_pool.c +++ b/drivers/staging/lustre/lustre/ldlm/ldlm_pool.c @@ -15,11 +15,7 @@ * * You should have received a copy of the GNU General Public License * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. + * http://www.gnu.org/licenses/gpl-2.0.html * * GPL HEADER END */ diff --git a/drivers/staging/lustre/lustre/ldlm/ldlm_request.c b/drivers/staging/lustre/lustre/ldlm/ldlm_request.c index 74e193e52cd6..af487f9937f4 100644 --- a/drivers/staging/lustre/lustre/ldlm/ldlm_request.c +++ b/drivers/staging/lustre/lustre/ldlm/ldlm_request.c @@ -15,11 +15,7 @@ * * You should have received a copy of the GNU General Public License * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. + * http://www.gnu.org/licenses/gpl-2.0.html * * GPL HEADER END */ @@ -153,7 +149,7 @@ static int ldlm_completion_tail(struct ldlm_lock *lock) long delay; int result; - if (lock->l_flags & (LDLM_FL_DESTROYED | LDLM_FL_FAILED)) { + if (ldlm_is_destroyed(lock) || ldlm_is_failed(lock)) { LDLM_DEBUG(lock, "client-side enqueue: destroyed"); result = -EIO; } else { @@ -252,7 +248,7 @@ noreproc: lwd.lwd_lock = lock; - if (lock->l_flags & LDLM_FL_NO_TIMEOUT) { + if (ldlm_is_no_timeout(lock)) { LDLM_DEBUG(lock, "waiting indefinitely because of NO_TIMEOUT"); lwi = LWI_INTR(interrupted_completion_wait, &lwd); } else { @@ -269,7 +265,7 @@ noreproc: if (OBD_FAIL_CHECK_RESET(OBD_FAIL_LDLM_INTR_CP_AST, OBD_FAIL_LDLM_CP_BL_RACE | OBD_FAIL_ONCE)) { - lock->l_flags |= LDLM_FL_FAIL_LOC; + ldlm_set_fail_loc(lock); rc = -EINTR; } else { /* Go to sleep until the lock is granted or cancelled. */ @@ -296,7 +292,7 @@ static void failed_lock_cleanup(struct ldlm_namespace *ns, lock_res_and_lock(lock); /* Check that lock is not granted or failed, we might race. */ if ((lock->l_req_mode != lock->l_granted_mode) && - !(lock->l_flags & LDLM_FL_FAILED)) { + !ldlm_is_failed(lock)) { /* Make sure that this lock will not be found by raced * bl_ast and -EINVAL reply is sent to server anyways. * bug 17645 @@ -340,14 +336,13 @@ int ldlm_cli_enqueue_fini(struct obd_export *exp, struct ptlrpc_request *req, enum ldlm_type type, __u8 with_policy, enum ldlm_mode mode, __u64 *flags, void *lvb, __u32 lvb_len, - struct lustre_handle *lockh, int rc) + const struct lustre_handle *lockh, int rc) { struct ldlm_namespace *ns = exp->exp_obd->obd_namespace; int is_replay = *flags & LDLM_FL_REPLAY; struct ldlm_lock *lock; struct ldlm_reply *reply; int cleanup_phase = 1; - int size = 0; lock = ldlm_handle2lock(lockh); /* ldlm_cli_enqueue is holding a reference on this lock. */ @@ -375,8 +370,8 @@ int ldlm_cli_enqueue_fini(struct obd_export *exp, struct ptlrpc_request *req, goto cleanup; } - if (lvb_len != 0) { - LASSERT(lvb); + if (lvb_len > 0) { + int size = 0; size = req_capsule_get_size(&req->rq_pill, &RMF_DLM_LVB, RCL_SERVER); @@ -390,12 +385,13 @@ int ldlm_cli_enqueue_fini(struct obd_export *exp, struct ptlrpc_request *req, rc = -EINVAL; goto cleanup; } + lvb_len = size; } if (rc == ELDLM_LOCK_ABORTED) { - if (lvb_len != 0) + if (lvb_len > 0 && lvb) rc = ldlm_fill_lvb(lock, &req->rq_pill, RCL_SERVER, - lvb, size); + lvb, lvb_len); if (rc == 0) rc = ELDLM_LOCK_ABORTED; goto cleanup; @@ -421,7 +417,7 @@ int ldlm_cli_enqueue_fini(struct obd_export *exp, struct ptlrpc_request *req, *flags = ldlm_flags_from_wire(reply->lock_flags); lock->l_flags |= ldlm_flags_from_wire(reply->lock_flags & - LDLM_INHERIT_FLAGS); + LDLM_FL_INHERIT_MASK); /* move NO_TIMEOUT flag to the lock to force ldlm_lock_match() * to wait with no timeout as well */ @@ -489,7 +485,7 @@ int ldlm_cli_enqueue_fini(struct obd_export *exp, struct ptlrpc_request *req, /* If the lock has already been granted by a completion AST, don't * clobber the LVB with an older one. */ - if (lvb_len != 0) { + if (lvb_len > 0) { /* We must lock or a racing completion might update lvb without * letting us know and we'll clobber the correct value. * Cannot unlock after the check either, as that still leaves @@ -498,7 +494,7 @@ int ldlm_cli_enqueue_fini(struct obd_export *exp, struct ptlrpc_request *req, lock_res_and_lock(lock); if (lock->l_req_mode != lock->l_granted_mode) rc = ldlm_fill_lvb(lock, &req->rq_pill, RCL_SERVER, - lock->l_lvb_data, size); + lock->l_lvb_data, lvb_len); unlock_res_and_lock(lock); if (rc < 0) { cleanup_phase = 1; @@ -518,7 +514,7 @@ int ldlm_cli_enqueue_fini(struct obd_export *exp, struct ptlrpc_request *req, } } - if (lvb_len && lvb) { + if (lvb_len > 0 && lvb) { /* Copy the LVB here, and not earlier, because the completion * AST (if any) can override what we got in the reply */ @@ -601,7 +597,7 @@ int ldlm_prep_elc_req(struct obd_export *exp, struct ptlrpc_request *req, avail = ldlm_capsule_handles_avail(pill, RCL_CLIENT, canceloff); flags = ns_connect_lru_resize(ns) ? - LDLM_CANCEL_LRUR : LDLM_CANCEL_AGED; + LDLM_CANCEL_LRUR_NO_WAIT : LDLM_CANCEL_AGED; to_free = !ns_connect_lru_resize(ns) && opc == LDLM_ENQUEUE ? 1 : 0; @@ -715,7 +711,7 @@ int ldlm_cli_enqueue(struct obd_export *exp, struct ptlrpc_request **reqp, lock->l_req_extent = policy->l_extent; } - LDLM_DEBUG(lock, "client-side enqueue START, flags %llx\n", + LDLM_DEBUG(lock, "client-side enqueue START, flags %llx", *flags); } @@ -821,12 +817,11 @@ static __u64 ldlm_cli_cancel_local(struct ldlm_lock *lock) LDLM_DEBUG(lock, "client-side cancel"); /* Set this flag to prevent others from getting new references*/ lock_res_and_lock(lock); - lock->l_flags |= LDLM_FL_CBPENDING; + ldlm_set_cbpending(lock); local_only = !!(lock->l_flags & (LDLM_FL_LOCAL_ONLY|LDLM_FL_CANCEL_ON_BLOCK)); ldlm_cancel_callback(lock); - rc = (lock->l_flags & LDLM_FL_BL_AST) ? - LDLM_FL_BL_AST : LDLM_FL_CANCELING; + rc = ldlm_is_bl_ast(lock) ? LDLM_FL_BL_AST : LDLM_FL_CANCELING; unlock_res_and_lock(lock); if (local_only) { @@ -1028,7 +1023,7 @@ EXPORT_SYMBOL(ldlm_cli_update_pool); * * Lock must not have any readers or writers by this time. */ -int ldlm_cli_cancel(struct lustre_handle *lockh, +int ldlm_cli_cancel(const struct lustre_handle *lockh, enum ldlm_cancel_flags cancel_flags) { struct obd_export *exp; @@ -1131,31 +1126,30 @@ EXPORT_SYMBOL(ldlm_cli_cancel_list_local); * dirty data, to close a file, ...) or waiting for any RPCs in-flight (e.g. * readahead requests, ...) */ -static ldlm_policy_res_t ldlm_cancel_no_wait_policy(struct ldlm_namespace *ns, - struct ldlm_lock *lock, - int unused, int added, - int count) +static enum ldlm_policy_res +ldlm_cancel_no_wait_policy(struct ldlm_namespace *ns, struct ldlm_lock *lock, + int unused, int added, int count) { - ldlm_policy_res_t result = LDLM_POLICY_CANCEL_LOCK; - ldlm_cancel_for_recovery cb = ns->ns_cancel_for_recovery; - - lock_res_and_lock(lock); + enum ldlm_policy_res result = LDLM_POLICY_CANCEL_LOCK; /* don't check added & count since we want to process all locks - * from unused list + * from unused list. + * It's fine to not take lock to access lock->l_resource since + * the lock has already been granted so it won't change. */ switch (lock->l_resource->lr_type) { case LDLM_EXTENT: case LDLM_IBITS: - if (cb && cb(lock)) + if (ns->ns_cancel && ns->ns_cancel(lock) != 0) break; default: result = LDLM_POLICY_SKIP_LOCK; - lock->l_flags |= LDLM_FL_SKIPPED; + lock_res_and_lock(lock); + ldlm_set_skipped(lock); + unlock_res_and_lock(lock); break; } - unlock_res_and_lock(lock); return result; } @@ -1168,10 +1162,10 @@ static ldlm_policy_res_t ldlm_cancel_no_wait_policy(struct ldlm_namespace *ns, * * \retval LDLM_POLICY_CANCEL_LOCK cancel lock from LRU */ -static ldlm_policy_res_t ldlm_cancel_lrur_policy(struct ldlm_namespace *ns, - struct ldlm_lock *lock, - int unused, int added, - int count) +static enum ldlm_policy_res ldlm_cancel_lrur_policy(struct ldlm_namespace *ns, + struct ldlm_lock *lock, + int unused, int added, + int count) { unsigned long cur = cfs_time_current(); struct ldlm_pool *pl = &ns->ns_pool; @@ -1196,8 +1190,13 @@ static ldlm_policy_res_t ldlm_cancel_lrur_policy(struct ldlm_namespace *ns, /* Stop when SLV is not yet come from server or lv is smaller than * it is. */ - return (slv == 0 || lv < slv) ? - LDLM_POLICY_KEEP_LOCK : LDLM_POLICY_CANCEL_LOCK; + if (slv == 0 || lv < slv) + return LDLM_POLICY_KEEP_LOCK; + + if (ns->ns_cancel && ns->ns_cancel(lock) == 0) + return LDLM_POLICY_KEEP_LOCK; + + return LDLM_POLICY_CANCEL_LOCK; } /** @@ -1209,10 +1208,10 @@ static ldlm_policy_res_t ldlm_cancel_lrur_policy(struct ldlm_namespace *ns, * * \retval LDLM_POLICY_CANCEL_LOCK cancel lock from LRU */ -static ldlm_policy_res_t ldlm_cancel_passed_policy(struct ldlm_namespace *ns, - struct ldlm_lock *lock, - int unused, int added, - int count) +static enum ldlm_policy_res ldlm_cancel_passed_policy(struct ldlm_namespace *ns, + struct ldlm_lock *lock, + int unused, int added, + int count) { /* Stop LRU processing when we reach past @count or have checked all * locks in LRU. @@ -1230,16 +1229,35 @@ static ldlm_policy_res_t ldlm_cancel_passed_policy(struct ldlm_namespace *ns, * * \retval LDLM_POLICY_CANCEL_LOCK cancel lock from LRU */ -static ldlm_policy_res_t ldlm_cancel_aged_policy(struct ldlm_namespace *ns, - struct ldlm_lock *lock, - int unused, int added, - int count) +static enum ldlm_policy_res ldlm_cancel_aged_policy(struct ldlm_namespace *ns, + struct ldlm_lock *lock, + int unused, int added, + int count) { - /* Stop LRU processing if young lock is found and we reach past count */ - return ((added >= count) && - time_before(cfs_time_current(), - cfs_time_add(lock->l_last_used, ns->ns_max_age))) ? - LDLM_POLICY_KEEP_LOCK : LDLM_POLICY_CANCEL_LOCK; + if ((added >= count) && + time_before(cfs_time_current(), + cfs_time_add(lock->l_last_used, ns->ns_max_age))) + return LDLM_POLICY_KEEP_LOCK; + + if (ns->ns_cancel && ns->ns_cancel(lock) == 0) + return LDLM_POLICY_KEEP_LOCK; + + return LDLM_POLICY_CANCEL_LOCK; +} + +static enum ldlm_policy_res +ldlm_cancel_lrur_no_wait_policy(struct ldlm_namespace *ns, + struct ldlm_lock *lock, + int unused, int added, + int count) +{ + enum ldlm_policy_res result; + + result = ldlm_cancel_lrur_policy(ns, lock, unused, added, count); + if (result == LDLM_POLICY_KEEP_LOCK) + return result; + + return ldlm_cancel_no_wait_policy(ns, lock, unused, added, count); } /** @@ -1251,10 +1269,9 @@ static ldlm_policy_res_t ldlm_cancel_aged_policy(struct ldlm_namespace *ns, * * \retval LDLM_POLICY_CANCEL_LOCK cancel lock from LRU */ -static ldlm_policy_res_t ldlm_cancel_default_policy(struct ldlm_namespace *ns, - struct ldlm_lock *lock, - int unused, int added, - int count) +static enum ldlm_policy_res +ldlm_cancel_default_policy(struct ldlm_namespace *ns, struct ldlm_lock *lock, + int unused, int added, int count) { /* Stop LRU processing when we reach past count or have checked all * locks in LRU. @@ -1263,7 +1280,8 @@ static ldlm_policy_res_t ldlm_cancel_default_policy(struct ldlm_namespace *ns, LDLM_POLICY_KEEP_LOCK : LDLM_POLICY_CANCEL_LOCK; } -typedef ldlm_policy_res_t (*ldlm_cancel_lru_policy_t)(struct ldlm_namespace *, +typedef enum ldlm_policy_res (*ldlm_cancel_lru_policy_t)( + struct ldlm_namespace *, struct ldlm_lock *, int, int, int); @@ -1281,6 +1299,8 @@ ldlm_cancel_lru_policy(struct ldlm_namespace *ns, int flags) return ldlm_cancel_lrur_policy; else if (flags & LDLM_CANCEL_PASSED) return ldlm_cancel_passed_policy; + else if (flags & LDLM_CANCEL_LRUR_NO_WAIT) + return ldlm_cancel_lrur_no_wait_policy; } else { if (flags & LDLM_CANCEL_AGED) return ldlm_cancel_aged_policy; @@ -1329,6 +1349,7 @@ static int ldlm_prepare_lru_list(struct ldlm_namespace *ns, ldlm_cancel_lru_policy_t pf; struct ldlm_lock *lock, *next; int added = 0, unused, remained; + int no_wait = flags & (LDLM_CANCEL_NO_WAIT | LDLM_CANCEL_LRUR_NO_WAIT); spin_lock(&ns->ns_lock); unused = ns->ns_nr_unused; @@ -1341,7 +1362,8 @@ static int ldlm_prepare_lru_list(struct ldlm_namespace *ns, LASSERT(pf); while (!list_empty(&ns->ns_unused_list)) { - ldlm_policy_res_t result; + enum ldlm_policy_res result; + time_t last_use = 0; /* all unused locks */ if (remained-- <= 0) @@ -1354,17 +1376,20 @@ static int ldlm_prepare_lru_list(struct ldlm_namespace *ns, list_for_each_entry_safe(lock, next, &ns->ns_unused_list, l_lru) { /* No locks which got blocking requests. */ - LASSERT(!(lock->l_flags & LDLM_FL_BL_AST)); + LASSERT(!ldlm_is_bl_ast(lock)); - if (flags & LDLM_CANCEL_NO_WAIT && - lock->l_flags & LDLM_FL_SKIPPED) + if (no_wait && ldlm_is_skipped(lock)) /* already processed */ continue; + last_use = lock->l_last_used; + if (last_use == cfs_time_current()) + continue; + /* Somebody is already doing CANCEL. No need for this * lock in LRU, do not traverse it again. */ - if (!(lock->l_flags & LDLM_FL_CANCELING)) + if (!ldlm_is_canceling(lock)) break; ldlm_lock_remove_from_lru_nolock(lock); @@ -1407,12 +1432,14 @@ static int ldlm_prepare_lru_list(struct ldlm_namespace *ns, lock_res_and_lock(lock); /* Check flags again under the lock. */ - if ((lock->l_flags & LDLM_FL_CANCELING) || - (ldlm_lock_remove_from_lru(lock) == 0)) { + if (ldlm_is_canceling(lock) || + (ldlm_lock_remove_from_lru_check(lock, last_use) == 0)) { /* Another thread is removing lock from LRU, or * somebody is already doing CANCEL, or there * is a blocking request which will send cancel - * by itself, or the lock is no longer unused. + * by itself, or the lock is no longer unused or + * the lock has been used since the pf() call and + * pages could be put under it. */ unlock_res_and_lock(lock); lu_ref_del(&lock->l_reference, @@ -1429,7 +1456,7 @@ static int ldlm_prepare_lru_list(struct ldlm_namespace *ns, * where while we are doing cancel here, server is also * silently cancelling this lock. */ - lock->l_flags &= ~LDLM_FL_CANCEL_ON_BLOCK; + ldlm_clear_cancel_on_block(lock); /* Setting the CBPENDING flag is a little misleading, * but prevents an important race; namely, once @@ -1526,8 +1553,7 @@ int ldlm_cancel_resource_local(struct ldlm_resource *res, /* If somebody is already doing CANCEL, or blocking AST came, * skip this lock. */ - if (lock->l_flags & LDLM_FL_BL_AST || - lock->l_flags & LDLM_FL_CANCELING) + if (ldlm_is_bl_ast(lock) || ldlm_is_canceling(lock)) continue; if (lockmode_compat(lock->l_granted_mode, mode)) @@ -1771,7 +1797,6 @@ static void ldlm_namespace_foreach(struct ldlm_namespace *ns, cfs_hash_for_each_nolock(ns->ns_rs_hash, ldlm_res_iter_helper, &helper); - } /* non-blocking function to manipulate a lock whose cb_data is being put away. @@ -1887,7 +1912,7 @@ static int replay_one_lock(struct obd_import *imp, struct ldlm_lock *lock) int flags; /* Bug 11974: Do not replay a lock which is actively being canceled */ - if (lock->l_flags & LDLM_FL_CANCELING) { + if (ldlm_is_canceling(lock)) { LDLM_DEBUG(lock, "Not replaying canceled lock:"); return 0; } @@ -1896,7 +1921,7 @@ static int replay_one_lock(struct obd_import *imp, struct ldlm_lock *lock) * server might have long dropped it, but notification of that event was * lost by network. (and server granted conflicting lock already) */ - if (lock->l_flags & LDLM_FL_CANCEL_ON_BLOCK) { + if (ldlm_is_cancel_on_block(lock)) { LDLM_DEBUG(lock, "Not replaying reply-less lock:"); ldlm_lock_cancel(lock); return 0; diff --git a/drivers/staging/lustre/lustre/ldlm/ldlm_resource.c b/drivers/staging/lustre/lustre/ldlm/ldlm_resource.c index 9dede87ad0a3..51a28d96af39 100644 --- a/drivers/staging/lustre/lustre/ldlm/ldlm_resource.c +++ b/drivers/staging/lustre/lustre/ldlm/ldlm_resource.c @@ -15,11 +15,7 @@ * * You should have received a copy of the GNU General Public License * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. + * http://www.gnu.org/licenses/gpl-2.0.html * * GPL HEADER END */ @@ -124,9 +120,15 @@ int ldlm_debugfs_setup(void) } rc = ldebugfs_add_vars(ldlm_debugfs_dir, ldlm_debugfs_list, NULL); + if (rc) { + CERROR("LProcFS failed in ldlm-init\n"); + goto err_svc; + } return 0; +err_svc: + ldebugfs_remove(&ldlm_svc_debugfs_dir); err_ns: ldebugfs_remove(&ldlm_ns_debugfs_dir); err_type: @@ -758,12 +760,12 @@ static void cleanup_resource(struct ldlm_resource *res, struct list_head *q, list_for_each(tmp, q) { lock = list_entry(tmp, struct ldlm_lock, l_res_link); - if (lock->l_flags & LDLM_FL_CLEANED) { + if (ldlm_is_cleaned(lock)) { lock = NULL; continue; } LDLM_LOCK_GET(lock); - lock->l_flags |= LDLM_FL_CLEANED; + ldlm_set_cleaned(lock); break; } @@ -775,13 +777,13 @@ static void cleanup_resource(struct ldlm_resource *res, struct list_head *q, /* Set CBPENDING so nothing in the cancellation path * can match this lock. */ - lock->l_flags |= LDLM_FL_CBPENDING; - lock->l_flags |= LDLM_FL_FAILED; + ldlm_set_cbpending(lock); + ldlm_set_failed(lock); lock->l_flags |= flags; /* ... without sending a CANCEL message for local_only. */ if (local_only) - lock->l_flags |= LDLM_FL_LOCAL_ONLY; + ldlm_set_local_only(lock); if (local_only && (lock->l_readers || lock->l_writers)) { /* This is a little bit gross, but much better than the @@ -1273,9 +1275,9 @@ void ldlm_resource_add_lock(struct ldlm_resource *res, struct list_head *head, { check_res_locked(res); - LDLM_DEBUG(lock, "About to add this lock:\n"); + LDLM_DEBUG(lock, "About to add this lock:"); - if (lock->l_flags & LDLM_FL_DESTROYED) { + if (ldlm_is_destroyed(lock)) { CDEBUG(D_OTHER, "Lock destroyed, not adding to resource\n"); return; } @@ -1400,3 +1402,4 @@ void ldlm_resource_dump(int level, struct ldlm_resource *res) LDLM_DEBUG_LIMIT(level, lock, "###"); } } +EXPORT_SYMBOL(ldlm_resource_dump); diff --git a/drivers/staging/lustre/lustre/llite/Makefile b/drivers/staging/lustre/lustre/llite/Makefile index 9ac29e718da3..2cbb1b80bd41 100644 --- a/drivers/staging/lustre/lustre/llite/Makefile +++ b/drivers/staging/lustre/lustre/llite/Makefile @@ -1,10 +1,7 @@ obj-$(CONFIG_LUSTRE_FS) += lustre.o -obj-$(CONFIG_LUSTRE_LLITE_LLOOP) += llite_lloop.o lustre-y := dcache.o dir.o file.o llite_close.o llite_lib.o llite_nfs.o \ rw.o namei.o symlink.o llite_mmap.o \ - xattr.o xattr_cache.o remote_perm.o llite_rmtacl.o \ - rw26.o super25.o statahead.o \ - ../lclient/glimpse.o ../lclient/lcommon_cl.o ../lclient/lcommon_misc.o \ - vvp_dev.o vvp_page.o vvp_lock.o vvp_io.o vvp_object.o lproc_llite.o - -llite_lloop-y := lloop.o + xattr.o xattr_cache.o rw26.o super25.o statahead.o \ + glimpse.o lcommon_cl.o lcommon_misc.o \ + vvp_dev.o vvp_page.o vvp_lock.o vvp_io.o vvp_object.o vvp_req.o \ + lproc_llite.o diff --git a/drivers/staging/lustre/lustre/llite/dcache.c b/drivers/staging/lustre/lustre/llite/dcache.c index dd1c827013b9..463b1a360733 100644 --- a/drivers/staging/lustre/lustre/llite/dcache.c +++ b/drivers/staging/lustre/lustre/llite/dcache.c @@ -15,11 +15,7 @@ * * You should have received a copy of the GNU General Public License * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. + * http://www.gnu.org/licenses/gpl-2.0.html * * GPL HEADER END */ @@ -82,7 +78,7 @@ static void ll_release(struct dentry *de) * INVALID) so d_lookup() matches it, but we have no lock on it (so * lock_match() fails) and we spin around real_lookup(). */ -static int ll_dcompare(const struct dentry *parent, const struct dentry *dentry, +static int ll_dcompare(const struct dentry *dentry, unsigned int len, const char *str, const struct qstr *name) { @@ -108,11 +104,8 @@ static int ll_dcompare(const struct dentry *parent, const struct dentry *dentry, static inline int return_if_equal(struct ldlm_lock *lock, void *data) { - if ((lock->l_flags & - (LDLM_FL_CANCELING | LDLM_FL_DISCARD_DATA)) == - (LDLM_FL_CANCELING | LDLM_FL_DISCARD_DATA)) - return LDLM_ITER_CONTINUE; - return LDLM_ITER_STOP; + return (ldlm_is_canceling(lock) && ldlm_is_discard_data(lock)) ? + LDLM_ITER_CONTINUE : LDLM_ITER_STOP; } /* find any ldlm lock of the inode in mdc and lov @@ -209,27 +202,27 @@ int ll_d_init(struct dentry *de) void ll_intent_drop_lock(struct lookup_intent *it) { - if (it->it_op && it->d.lustre.it_lock_mode) { + if (it->it_op && it->it_lock_mode) { struct lustre_handle handle; - handle.cookie = it->d.lustre.it_lock_handle; + handle.cookie = it->it_lock_handle; CDEBUG(D_DLMTRACE, "releasing lock with cookie %#llx from it %p\n", handle.cookie, it); - ldlm_lock_decref(&handle, it->d.lustre.it_lock_mode); + ldlm_lock_decref(&handle, it->it_lock_mode); /* bug 494: intent_release may be called multiple times, from * this thread and we don't want to double-decref this lock */ - it->d.lustre.it_lock_mode = 0; - if (it->d.lustre.it_remote_lock_mode != 0) { - handle.cookie = it->d.lustre.it_remote_lock_handle; + it->it_lock_mode = 0; + if (it->it_remote_lock_mode != 0) { + handle.cookie = it->it_remote_lock_handle; CDEBUG(D_DLMTRACE, "releasing remote lock with cookie%#llx from it %p\n", handle.cookie, it); ldlm_lock_decref(&handle, - it->d.lustre.it_remote_lock_mode); - it->d.lustre.it_remote_lock_mode = 0; + it->it_remote_lock_mode); + it->it_remote_lock_mode = 0; } } } @@ -240,23 +233,23 @@ void ll_intent_release(struct lookup_intent *it) ll_intent_drop_lock(it); /* We are still holding extra reference on a request, need to free it */ if (it_disposition(it, DISP_ENQ_OPEN_REF)) - ptlrpc_req_finished(it->d.lustre.it_data); /* ll_file_open */ + ptlrpc_req_finished(it->it_request); /* ll_file_open */ if (it_disposition(it, DISP_ENQ_CREATE_REF)) /* create rec */ - ptlrpc_req_finished(it->d.lustre.it_data); + ptlrpc_req_finished(it->it_request); - it->d.lustre.it_disposition = 0; - it->d.lustre.it_data = NULL; + it->it_disposition = 0; + it->it_request = NULL; } void ll_invalidate_aliases(struct inode *inode) { struct dentry *dentry; - CDEBUG(D_INODE, "marking dentries for ino %lu/%u(%p) invalid\n", - inode->i_ino, inode->i_generation, inode); + CDEBUG(D_INODE, "marking dentries for ino "DFID"(%p) invalid\n", + PFID(ll_inode2fid(inode)), inode); - ll_lock_dcache(inode); + spin_lock(&inode->i_lock); hlist_for_each_entry(dentry, &inode->i_dentry, d_u.d_alias) { CDEBUG(D_DENTRY, "dentry in drop %pd (%p) parent %p inode %p flags %d\n", dentry, dentry, dentry->d_parent, @@ -264,7 +257,7 @@ void ll_invalidate_aliases(struct inode *inode) d_lustre_invalidate(dentry, 0); } - ll_unlock_dcache(inode); + spin_unlock(&inode->i_lock); } int ll_revalidate_it_finish(struct ptlrpc_request *request, @@ -286,11 +279,11 @@ int ll_revalidate_it_finish(struct ptlrpc_request *request, void ll_lookup_finish_locks(struct lookup_intent *it, struct inode *inode) { - if (it->d.lustre.it_lock_mode && inode) { + if (it->it_lock_mode && inode) { struct ll_sb_info *sbi = ll_i2sbi(inode); - CDEBUG(D_DLMTRACE, "setting l_data to inode %p (%lu/%u)\n", - inode, inode->i_ino, inode->i_generation); + CDEBUG(D_DLMTRACE, "setting l_data to inode "DFID"(%p)\n", + PFID(ll_inode2fid(inode)), inode); ll_set_lock_data(sbi->ll_md_exp, inode, it, NULL); } @@ -309,6 +302,17 @@ static int ll_revalidate_dentry(struct dentry *dentry, { struct inode *dir = d_inode(dentry->d_parent); + /* If this is intermediate component path lookup and we were able to get + * to this dentry, then its lock has not been revoked and the + * path component is valid. + */ + if (lookup_flags & LOOKUP_PARENT) + return 1; + + /* Symlink - always valid as long as the dentry was found */ + if (dentry->d_inode && S_ISLNK(dentry->d_inode->i_mode)) + return 1; + /* * if open&create is set, talk to MDS to make sure file is created if * necessary, because we can't do this in ->open() later since that's diff --git a/drivers/staging/lustre/lustre/llite/dir.c b/drivers/staging/lustre/lustre/llite/dir.c index e4c82883e580..5b381779c827 100644 --- a/drivers/staging/lustre/lustre/llite/dir.c +++ b/drivers/staging/lustre/lustre/llite/dir.c @@ -15,11 +15,7 @@ * * You should have received a copy of the GNU General Public License * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. + * http://www.gnu.org/licenses/gpl-2.0.html * * GPL HEADER END */ @@ -158,11 +154,16 @@ static int ll_dir_filler(void *_hash, struct page *page0) int i; int rc; - CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p) hash %llu\n", - inode->i_ino, inode->i_generation, inode, hash); + CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p) hash %llu\n", + PFID(ll_inode2fid(inode)), inode, hash); LASSERT(max_pages > 0 && max_pages <= MD_MAX_BRW_PAGES); + op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0, + LUSTRE_OPC_ANY, NULL); + if (IS_ERR(op_data)) + return PTR_ERR(op_data); + page_pool = kcalloc(max_pages, sizeof(page), GFP_NOFS); if (page_pool) { page_pool[0] = page0; @@ -177,8 +178,6 @@ static int ll_dir_filler(void *_hash, struct page *page0) page_pool[npages] = page; } - op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0, - LUSTRE_OPC_ANY, NULL); op_data->op_npages = npages; op_data->op_offset = hash; rc = md_readpage(exp, op_data, page_pool, &request); @@ -190,7 +189,7 @@ static int ll_dir_filler(void *_hash, struct page *page0) body = req_capsule_server_get(&request->rq_pill, &RMF_MDT_BODY); /* Checked by mdc_readpage() */ if (body->valid & OBD_MD_FLSIZE) - cl_isize_write(inode, body->size); + i_size_write(inode, body->size); nrdpgs = (request->rq_bulk->bd_nob_transferred+PAGE_SIZE-1) >> PAGE_SHIFT; @@ -363,7 +362,7 @@ struct page *ll_get_dir_page(struct inode *dir, __u64 hash, ll_finish_md_op_data(op_data); - request = (struct ptlrpc_request *)it.d.lustre.it_data; + request = (struct ptlrpc_request *)it.it_request; if (request) ptlrpc_req_finished(request); if (rc < 0) { @@ -372,10 +371,10 @@ struct page *ll_get_dir_page(struct inode *dir, __u64 hash, return ERR_PTR(rc); } - CDEBUG(D_INODE, "setting lr_lvb_inode to inode %p (%lu/%u)\n", - dir, dir->i_ino, dir->i_generation); + CDEBUG(D_INODE, "setting lr_lvb_inode to inode "DFID"(%p)\n", + PFID(ll_inode2fid(dir)), dir); md_set_lock_data(ll_i2sbi(dir)->ll_md_exp, - &it.d.lustre.it_lock_handle, dir, NULL); + &it.it_lock_handle, dir, NULL); } else { /* for cross-ref object, l_ast_data of the lock may not be set, * we reset it here @@ -468,6 +467,28 @@ fail: goto out_unlock; } +/** + * return IF_* type for given lu_dirent entry. + * IF_* flag shld be converted to particular OS file type in + * platform llite module. + */ +static __u16 ll_dirent_type_get(struct lu_dirent *ent) +{ + __u16 type = 0; + struct luda_type *lt; + int len = 0; + + if (le32_to_cpu(ent->lde_attrs) & LUDA_TYPE) { + const unsigned int align = sizeof(struct luda_type) - 1; + + len = le16_to_cpu(ent->lde_namelen); + len = (len + align) & ~align; + lt = (void *)ent->lde_name + len; + type = IFTODT(le16_to_cpu(lt->lt_type)); + } + return type; +} + int ll_dir_read(struct inode *inode, struct dir_context *ctx) { struct ll_inode_info *info = ll_i2info(inode); @@ -589,15 +610,16 @@ static int ll_readdir(struct file *filp, struct dir_context *ctx) struct inode *inode = file_inode(filp); struct ll_file_data *lfd = LUSTRE_FPRIVATE(filp); struct ll_sb_info *sbi = ll_i2sbi(inode); + __u64 pos = lfd ? lfd->lfd_pos : 0; int hash64 = sbi->ll_flags & LL_SBI_64BIT_HASH; int api32 = ll_need_32bit_api(sbi); int rc; - CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p) pos %lu/%llu 32bit_api %d\n", - inode->i_ino, inode->i_generation, - inode, (unsigned long)lfd->lfd_pos, i_size_read(inode), api32); + CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p) pos %lu/%llu 32bit_api %d\n", + PFID(ll_inode2fid(inode)), inode, (unsigned long)pos, + i_size_read(inode), api32); - if (lfd->lfd_pos == MDS_DIR_END_OFF) { + if (pos == MDS_DIR_END_OFF) { /* * end-of-file. */ @@ -605,9 +627,10 @@ static int ll_readdir(struct file *filp, struct dir_context *ctx) goto out; } - ctx->pos = lfd->lfd_pos; + ctx->pos = pos; rc = ll_dir_read(inode, ctx); - lfd->lfd_pos = ctx->pos; + if (lfd) + lfd->lfd_pos = ctx->pos; if (ctx->pos == MDS_DIR_END_OFF) { if (api32) ctx->pos = LL_DIR_END_OFF_32BIT; @@ -804,9 +827,8 @@ int ll_dir_getstripe(struct inode *inode, struct lov_mds_md **lmmp, rc = md_getattr(sbi->ll_md_exp, op_data, &req); ll_finish_md_op_data(op_data); if (rc < 0) { - CDEBUG(D_INFO, "md_getattr failed on inode %lu/%u: rc %d\n", - inode->i_ino, - inode->i_generation, rc); + CDEBUG(D_INFO, "md_getattr failed on inode "DFID": rc %d\n", + PFID(ll_inode2fid(inode)), rc); goto out; } @@ -916,7 +938,7 @@ static int ll_ioc_copy_start(struct super_block *sb, struct hsm_copy *copy) } /* Read current file data version */ - rc = ll_data_version(inode, &data_version, 1); + rc = ll_data_version(inode, &data_version, LL_DV_RD_FLUSH); iput(inode); if (rc != 0) { CDEBUG(D_HSM, "Could not read file data version of " @@ -936,6 +958,9 @@ static int ll_ioc_copy_start(struct super_block *sb, struct hsm_copy *copy) } progress: + /* On error, the request should be considered as completed */ + if (hpk.hpk_errval > 0) + hpk.hpk_flags |= HP_FLAG_COMPLETED; rc = obd_iocontrol(LL_IOC_HSM_PROGRESS, sbi->ll_md_exp, sizeof(hpk), &hpk, NULL); @@ -997,8 +1022,7 @@ static int ll_ioc_copy_end(struct super_block *sb, struct hsm_copy *copy) goto progress; } - rc = ll_data_version(inode, &data_version, - copy->hc_hai.hai_action == HSMA_ARCHIVE); + rc = ll_data_version(inode, &data_version, LL_DV_RD_FLUSH); iput(inode); if (rc) { CDEBUG(D_HSM, "Could not read file data version. Request could not be confirmed.\n"); @@ -1033,7 +1057,6 @@ static int ll_ioc_copy_end(struct super_block *sb, struct hsm_copy *copy) /* hpk_errval must be >= 0 */ hpk.hpk_errval = EBUSY; } - } progress: @@ -1049,17 +1072,11 @@ static int copy_and_ioctl(int cmd, struct obd_export *exp, void *copy; int rc; - copy = kzalloc(size, GFP_NOFS); - if (!copy) - return -ENOMEM; - - if (copy_from_user(copy, data, size)) { - rc = -EFAULT; - goto out; - } + copy = memdup_user(data, size); + if (IS_ERR(copy)) + return PTR_ERR(copy); rc = obd_iocontrol(cmd, exp, size, copy, NULL); -out: kfree(copy); return rc; @@ -1080,8 +1097,7 @@ static int quotactl_ioctl(struct ll_sb_info *sbi, struct if_quotactl *qctl) case Q_QUOTAOFF: case Q_SETQUOTA: case Q_SETINFO: - if (!capable(CFS_CAP_SYS_ADMIN) || - sbi->ll_flags & LL_SBI_RMT_CLIENT) + if (!capable(CFS_CAP_SYS_ADMIN)) return -EPERM; break; case Q_GETQUOTA: @@ -1089,8 +1105,7 @@ static int quotactl_ioctl(struct ll_sb_info *sbi, struct if_quotactl *qctl) !uid_eq(current_euid(), make_kuid(&init_user_ns, id))) || (type == GRPQUOTA && !in_egroup_p(make_kgid(&init_user_ns, id)))) && - (!capable(CFS_CAP_SYS_ADMIN) || - sbi->ll_flags & LL_SBI_RMT_CLIENT)) + !capable(CFS_CAP_SYS_ADMIN)) return -EPERM; break; case Q_GETINFO: @@ -1101,9 +1116,6 @@ static int quotactl_ioctl(struct ll_sb_info *sbi, struct if_quotactl *qctl) } if (valid != QC_GENERAL) { - if (sbi->ll_flags & LL_SBI_RMT_CLIENT) - return -EOPNOTSUPP; - if (cmd == Q_GETINFO) qctl->qc_cmd = Q_GETOINFO; else if (cmd == Q_GETQUOTA) @@ -1242,8 +1254,8 @@ static long ll_dir_ioctl(struct file *file, unsigned int cmd, unsigned long arg) struct obd_ioctl_data *data; int rc = 0; - CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), cmd=%#x\n", - inode->i_ino, inode->i_generation, inode, cmd); + CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p), cmd=%#x\n", + PFID(ll_inode2fid(inode)), inode, cmd); /* asm-ppc{,64} declares TCGETS, et. al. as type 't' not 'T' */ if (_IOC_TYPE(cmd) == 'T' || _IOC_TYPE(cmd) == 't') /* tty ioctls */ @@ -1362,7 +1374,6 @@ out_free: lmv_out_free: obd_ioctl_freedata(buf, len); return rc; - } case LL_IOC_LOV_SETSTRIPE: { struct lov_user_md_v3 lumv3; @@ -1474,8 +1485,9 @@ free_lmv: cmd == LL_IOC_MDC_GETINFO)) { rc = 0; goto skip_lmm; - } else + } else { goto out_req; + } } if (cmd == IOC_MDC_GETFILESTRIPE || @@ -1511,7 +1523,9 @@ skip_lmm: st.st_atime = body->atime; st.st_mtime = body->mtime; st.st_ctime = body->ctime; - st.st_ino = inode->i_ino; + st.st_ino = cl_fid_build_ino(&body->fid1, + sbi->ll_flags & + LL_SBI_32BIT_API); lmdp = (struct lov_user_mds_data __user *)arg; if (copy_to_user(&lmdp->lmd_st, &st, sizeof(st))) { @@ -1604,8 +1618,7 @@ free_lmm: struct obd_quotactl *oqctl; int error = 0; - if (!capable(CFS_CAP_SYS_ADMIN) || - sbi->ll_flags & LL_SBI_RMT_CLIENT) + if (!capable(CFS_CAP_SYS_ADMIN)) return -EPERM; oqctl = kzalloc(sizeof(*oqctl), GFP_NOFS); @@ -1628,8 +1641,7 @@ free_lmm: case OBD_IOC_POLL_QUOTACHECK: { struct if_quotacheck *check; - if (!capable(CFS_CAP_SYS_ADMIN) || - sbi->ll_flags & LL_SBI_RMT_CLIENT) + if (!capable(CFS_CAP_SYS_ADMIN)) return -EPERM; check = kzalloc(sizeof(*check), GFP_NOFS); @@ -1686,19 +1698,6 @@ out_quotactl: return ll_get_obd_name(inode, cmd, arg); case LL_IOC_FLUSHCTX: return ll_flush_ctx(inode); -#ifdef CONFIG_FS_POSIX_ACL - case LL_IOC_RMTACL: { - if (sbi->ll_flags & LL_SBI_RMT_CLIENT && is_root_inode(inode)) { - struct ll_file_data *fd = LUSTRE_FPRIVATE(file); - - rc = rct_add(&sbi->ll_rct, current_pid(), arg); - if (!rc) - fd->fd_flags |= LL_FILE_RMTACL; - return rc; - } else - return 0; - } -#endif case LL_IOC_GETOBDCOUNT: { int count, vallen; struct obd_export *exp; @@ -1817,6 +1816,9 @@ out_quotactl: return rc; } case LL_IOC_HSM_CT_START: + if (!capable(CFS_CAP_SYS_ADMIN)) + return -EPERM; + rc = copy_and_ioctl(cmd, sbi->ll_md_exp, (void __user *)arg, sizeof(struct lustre_kernelcomm)); return rc; @@ -1865,7 +1867,6 @@ static loff_t ll_dir_seek(struct file *file, loff_t offset, int origin) int api32 = ll_need_32bit_api(sbi); loff_t ret = -EINVAL; - inode_lock(inode); switch (origin) { case SEEK_SET: break; @@ -1903,7 +1904,6 @@ static loff_t ll_dir_seek(struct file *file, loff_t offset, int origin) goto out; out: - inode_unlock(inode); return ret; } @@ -1922,7 +1922,7 @@ const struct file_operations ll_dir_operations = { .open = ll_dir_open, .release = ll_dir_release, .read = generic_read_dir, - .iterate = ll_readdir, + .iterate_shared = ll_readdir, .unlocked_ioctl = ll_dir_ioctl, .fsync = ll_fsync, }; diff --git a/drivers/staging/lustre/lustre/llite/file.c b/drivers/staging/lustre/lustre/llite/file.c index cf619af3caf5..57281b9e31ff 100644 --- a/drivers/staging/lustre/lustre/llite/file.c +++ b/drivers/staging/lustre/lustre/llite/file.c @@ -15,11 +15,7 @@ * * You should have received a copy of the GNU General Public License * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. + * http://www.gnu.org/licenses/gpl-2.0.html * * GPL HEADER END */ @@ -45,6 +41,7 @@ #include "../include/lustre_lite.h" #include <linux/pagemap.h> #include <linux/file.h> +#include <linux/mount.h> #include "llite_internal.h" #include "../include/lustre/ll_fiemap.h" @@ -87,8 +84,7 @@ void ll_pack_inode2opdata(struct inode *inode, struct md_op_data *op_data, op_data->op_attr.ia_ctime = inode->i_ctime; op_data->op_attr.ia_size = i_size_read(inode); op_data->op_attr_blocks = inode->i_blocks; - ((struct ll_iattr *)&op_data->op_attr)->ia_attr_flags = - ll_inode_to_ext_flags(inode->i_flags); + op_data->op_attr_flags = ll_inode_to_ext_flags(inode->i_flags); op_data->op_ioepoch = ll_i2info(inode)->lli_ioepoch; if (fh) op_data->op_handle = *fh; @@ -170,13 +166,15 @@ static int ll_close_inode_openhandle(struct obd_export *md_exp, */ rc = ll_som_update(inode, op_data); if (rc) { - CERROR("inode %lu mdc Size-on-MDS update failed: rc = %d\n", - inode->i_ino, rc); + CERROR("%s: inode "DFID" mdc Size-on-MDS update failed: rc = %d\n", + ll_i2mdexp(inode)->exp_obd->obd_name, + PFID(ll_inode2fid(inode)), rc); rc = 0; } } else if (rc) { - CERROR("inode %lu mdc close failed: rc = %d\n", - inode->i_ino, rc); + CERROR("%s: inode "DFID" mdc close failed: rc = %d\n", + ll_i2mdexp(inode)->exp_obd->obd_name, + PFID(ll_inode2fid(inode)), rc); } /* DATA_MODIFIED flag was successfully sent on close, cancel data @@ -278,7 +276,7 @@ static int ll_md_close(struct obd_export *md_exp, struct inode *inode, /* clear group lock, if present */ if (unlikely(fd->fd_flags & LL_FILE_GROUP_LOCKED)) - ll_put_grouplock(inode, file, fd->fd_grouplock.cg_gid); + ll_put_grouplock(inode, file, fd->fd_grouplock.lg_gid); if (fd->fd_lease_och) { bool lease_broken; @@ -343,20 +341,8 @@ int ll_file_release(struct inode *inode, struct file *file) struct ll_inode_info *lli = ll_i2info(inode); int rc; - CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n", inode->i_ino, - inode->i_generation, inode); - -#ifdef CONFIG_FS_POSIX_ACL - if (sbi->ll_flags & LL_SBI_RMT_CLIENT && is_root_inode(inode)) { - struct ll_file_data *fd = LUSTRE_FPRIVATE(file); - - if (unlikely(fd->fd_flags & LL_FILE_RMTACL)) { - fd->fd_flags &= ~LL_FILE_RMTACL; - rct_del(&sbi->ll_rct, current_pid()); - et_search_free(&sbi->ll_et, current_pid()); - } - } -#endif + CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p)\n", + PFID(ll_inode2fid(inode)), inode); if (!is_root_inode(inode)) ll_stats_ops_tally(sbi, LPROC_LL_RELEASE, 1); @@ -413,7 +399,19 @@ static int ll_intent_file_open(struct dentry *dentry, void *lmm, * parameters. No need for the open lock */ if (!lmm && lmmsize == 0) { - itp->it_flags |= MDS_OPEN_LOCK; + struct ll_dentry_data *ldd = ll_d2d(dentry); + /* + * If we came via ll_iget_for_nfs, then we need to request + * struct ll_dentry_data *ldd = ll_d2d(file->f_dentry); + * + * NB: when ldd is NULL, it must have come via normal + * lookup path only, since ll_iget_for_nfs always calls + * ll_d_init(). + */ + if (ldd && ldd->lld_nfs_dentry) { + ldd->lld_nfs_dentry = 0; + itp->it_flags |= MDS_OPEN_LOCK; + } if (itp->it_flags & FMODE_WRITE) opc = LUSTRE_OPC_CREATE; } @@ -451,7 +449,7 @@ static int ll_intent_file_open(struct dentry *dentry, void *lmm, } rc = ll_prep_inode(&inode, req, NULL, itp); - if (!rc && itp->d.lustre.it_lock_mode) + if (!rc && itp->it_lock_mode) ll_set_lock_data(sbi->ll_md_exp, inode, itp, NULL); out: @@ -478,13 +476,12 @@ void ll_ioepoch_open(struct ll_inode_info *lli, __u64 ioepoch) static int ll_och_fill(struct obd_export *md_exp, struct lookup_intent *it, struct obd_client_handle *och) { - struct ptlrpc_request *req = it->d.lustre.it_data; struct mdt_body *body; - body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY); + body = req_capsule_server_get(&it->it_request->rq_pill, &RMF_MDT_BODY); och->och_fh = body->handle; och->och_fid = body->fid1; - och->och_lease_handle.cookie = it->d.lustre.it_lock_handle; + och->och_lease_handle.cookie = it->it_lock_handle; och->och_magic = OBD_CLIENT_HANDLE_MAGIC; och->och_flags = it->it_flags; @@ -502,7 +499,6 @@ static int ll_local_open(struct file *file, struct lookup_intent *it, LASSERT(fd); if (och) { - struct ptlrpc_request *req = it->d.lustre.it_data; struct mdt_body *body; int rc; @@ -510,13 +506,19 @@ static int ll_local_open(struct file *file, struct lookup_intent *it, if (rc != 0) return rc; - body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY); + body = req_capsule_server_get(&it->it_request->rq_pill, + &RMF_MDT_BODY); ll_ioepoch_open(lli, body->ioepoch); } LUSTRE_FPRIVATE(file) = fd; ll_readahead_init(inode, &fd->fd_ras); fd->fd_omode = it->it_flags & (FMODE_READ | FMODE_WRITE | FMODE_EXEC); + + /* ll_cl_context initialize */ + rwlock_init(&fd->fd_lock); + INIT_LIST_HEAD(&fd->fd_lccs); + return 0; } @@ -543,8 +545,8 @@ int ll_file_open(struct inode *inode, struct file *file) struct ll_file_data *fd; int rc = 0, opendir_set = 0; - CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), flags %o\n", inode->i_ino, - inode->i_generation, inode, file->f_flags); + CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p), flags %o\n", + PFID(ll_inode2fid(inode)), inode, file->f_flags); it = file->private_data; /* XXX: compat macro */ file->private_data = NULL; /* prevent ll_local_open assertion */ @@ -572,7 +574,7 @@ int ll_file_open(struct inode *inode, struct file *file) return 0; } - if (!it || !it->d.lustre.it_disposition) { + if (!it || !it->it_disposition) { /* Convert f_flags into access mode. We cannot use file->f_mode, * because everything but O_ACCMODE mask was stripped from * there @@ -642,7 +644,7 @@ restart: } } else { LASSERT(*och_usecount == 0); - if (!it->d.lustre.it_disposition) { + if (!it->it_disposition) { /* We cannot just request lock handle now, new ELC code * means that one of other OPEN locks for this file * could be cancelled, and since blocking ast handler @@ -677,7 +679,9 @@ restart: if (rc) goto out_och_free; - LASSERT(it_disposition(it, DISP_ENQ_OPEN_REF)); + LASSERTF(it_disposition(it, DISP_ENQ_OPEN_REF), + "inode %p: disposition %x, status %d\n", inode, + it_disposition(it, ~0), it->it_status); rc = ll_local_open(file, it, fd, *och_p); if (rc) @@ -720,7 +724,7 @@ out_openerr: } if (it && it_disposition(it, DISP_ENQ_OPEN_REF)) { - ptlrpc_req_finished(it->d.lustre.it_data); + ptlrpc_req_finished(it->it_request); it_clear_disposition(it, DISP_ENQ_OPEN_REF); } @@ -861,12 +865,12 @@ ll_lease_open(struct inode *inode, struct file *file, fmode_t fmode, /* already get lease, handle lease lock */ ll_set_lock_data(sbi->ll_md_exp, inode, &it, NULL); - if (it.d.lustre.it_lock_mode == 0 || - it.d.lustre.it_lock_bits != MDS_INODELOCK_OPEN) { + if (it.it_lock_mode == 0 || + it.it_lock_bits != MDS_INODELOCK_OPEN) { /* open lock must return for lease */ CERROR(DFID "lease granted but no open lock, %d/%llu.\n", - PFID(ll_inode2fid(inode)), it.d.lustre.it_lock_mode, - it.d.lustre.it_lock_bits); + PFID(ll_inode2fid(inode)), it.it_lock_mode, + it.it_lock_bits); rc = -EPROTO; goto out_close; } @@ -875,16 +879,19 @@ ll_lease_open(struct inode *inode, struct file *file, fmode_t fmode, return och; out_close: - rc2 = ll_close_inode_openhandle(sbi->ll_md_exp, inode, och, NULL); - if (rc2) - CERROR("Close openhandle returned %d\n", rc2); - - /* cancel open lock */ - if (it.d.lustre.it_lock_mode != 0) { + /* Cancel open lock */ + if (it.it_lock_mode != 0) { ldlm_lock_decref_and_cancel(&och->och_lease_handle, - it.d.lustre.it_lock_mode); - it.d.lustre.it_lock_mode = 0; + it.it_lock_mode); + it.it_lock_mode = 0; + och->och_lease_handle.cookie = 0ULL; } + rc2 = ll_close_inode_openhandle(sbi->ll_md_exp, inode, och, NULL); + if (rc2 < 0) + CERROR("%s: error closing file "DFID": %d\n", + ll_get_fsname(inode->i_sb, NULL, 0), + PFID(&ll_i2info(inode)->lli_fid), rc2); + och = NULL; /* och has been freed in ll_close_inode_openhandle() */ out_release_it: ll_intent_release(&it); out: @@ -908,7 +915,7 @@ static int ll_lease_close(struct obd_client_handle *och, struct inode *inode, lock_res_and_lock(lock); cancelled = ldlm_is_cancel(lock); unlock_res_and_lock(lock); - ldlm_lock_put(lock); + LDLM_LOCK_PUT(lock); } CDEBUG(D_INODE, "lease for " DFID " broken? %d\n", @@ -926,7 +933,7 @@ static int ll_lease_close(struct obd_client_handle *och, struct inode *inode, /* Fills the obdo with the attributes for the lsm */ static int ll_lsm_getattr(struct lov_stripe_md *lsm, struct obd_export *exp, - struct obdo *obdo, __u64 ioepoch, int sync) + struct obdo *obdo, __u64 ioepoch, int dv_flags) { struct ptlrpc_request_set *set; struct obd_info oinfo = { }; @@ -945,9 +952,11 @@ static int ll_lsm_getattr(struct lov_stripe_md *lsm, struct obd_export *exp, OBD_MD_FLMTIME | OBD_MD_FLCTIME | OBD_MD_FLGROUP | OBD_MD_FLEPOCH | OBD_MD_FLDATAVERSION; - if (sync) { + if (dv_flags & (LL_DV_WR_FLUSH | LL_DV_RD_FLUSH)) { oinfo.oi_oa->o_valid |= OBD_MD_FLFLAGS; oinfo.oi_oa->o_flags |= OBD_FL_SRVLOCK; + if (dv_flags & LL_DV_WR_FLUSH) + oinfo.oi_oa->o_flags |= OBD_FL_FLUSH; } set = ptlrpc_prep_set(); @@ -960,11 +969,16 @@ static int ll_lsm_getattr(struct lov_stripe_md *lsm, struct obd_export *exp, rc = ptlrpc_set_wait(set); ptlrpc_set_destroy(set); } - if (rc == 0) + if (rc == 0) { oinfo.oi_oa->o_valid &= (OBD_MD_FLBLOCKS | OBD_MD_FLBLKSZ | OBD_MD_FLATIME | OBD_MD_FLMTIME | OBD_MD_FLCTIME | OBD_MD_FLSIZE | - OBD_MD_FLDATAVERSION); + OBD_MD_FLDATAVERSION | OBD_MD_FLFLAGS); + if (dv_flags & LL_DV_WR_FLUSH && + !(oinfo.oi_oa->o_valid & OBD_MD_FLFLAGS && + oinfo.oi_oa->o_flags & OBD_FL_FLUSH)) + return -ENOTSUPP; + } return rc; } @@ -980,7 +994,7 @@ int ll_inode_getattr(struct inode *inode, struct obdo *obdo, lsm = ccc_inode_lsm_get(inode); rc = ll_lsm_getattr(lsm, ll_i2dtexp(inode), - obdo, ioepoch, sync); + obdo, ioepoch, sync ? LL_DV_RD_FLUSH : 0); if (rc == 0) { struct ost_id *oi = lsm ? &lsm->lsm_oi : &obdo->o_oi; @@ -994,50 +1008,57 @@ int ll_inode_getattr(struct inode *inode, struct obdo *obdo, return rc; } -int ll_merge_lvb(const struct lu_env *env, struct inode *inode) +int ll_merge_attr(const struct lu_env *env, struct inode *inode) { struct ll_inode_info *lli = ll_i2info(inode); struct cl_object *obj = lli->lli_clob; - struct cl_attr *attr = ccc_env_thread_attr(env); - struct ost_lvb lvb; + struct cl_attr *attr = vvp_env_thread_attr(env); + s64 atime; + s64 mtime; + s64 ctime; int rc = 0; ll_inode_size_lock(inode); + /* merge timestamps the most recently obtained from mds with * timestamps obtained from osts */ - LTIME_S(inode->i_atime) = lli->lli_lvb.lvb_atime; - LTIME_S(inode->i_mtime) = lli->lli_lvb.lvb_mtime; - LTIME_S(inode->i_ctime) = lli->lli_lvb.lvb_ctime; + LTIME_S(inode->i_atime) = lli->lli_atime; + LTIME_S(inode->i_mtime) = lli->lli_mtime; + LTIME_S(inode->i_ctime) = lli->lli_ctime; - lvb.lvb_size = i_size_read(inode); - lvb.lvb_blocks = inode->i_blocks; - lvb.lvb_mtime = LTIME_S(inode->i_mtime); - lvb.lvb_atime = LTIME_S(inode->i_atime); - lvb.lvb_ctime = LTIME_S(inode->i_ctime); + mtime = LTIME_S(inode->i_mtime); + atime = LTIME_S(inode->i_atime); + ctime = LTIME_S(inode->i_ctime); cl_object_attr_lock(obj); rc = cl_object_attr_get(env, obj, attr); cl_object_attr_unlock(obj); - if (rc == 0) { - if (lvb.lvb_atime < attr->cat_atime) - lvb.lvb_atime = attr->cat_atime; - if (lvb.lvb_ctime < attr->cat_ctime) - lvb.lvb_ctime = attr->cat_ctime; - if (lvb.lvb_mtime < attr->cat_mtime) - lvb.lvb_mtime = attr->cat_mtime; + if (rc != 0) + goto out_size_unlock; - CDEBUG(D_VFSTRACE, DFID " updating i_size %llu\n", - PFID(&lli->lli_fid), attr->cat_size); - cl_isize_write_nolock(inode, attr->cat_size); + if (atime < attr->cat_atime) + atime = attr->cat_atime; - inode->i_blocks = attr->cat_blocks; + if (ctime < attr->cat_ctime) + ctime = attr->cat_ctime; - LTIME_S(inode->i_mtime) = lvb.lvb_mtime; - LTIME_S(inode->i_atime) = lvb.lvb_atime; - LTIME_S(inode->i_ctime) = lvb.lvb_ctime; - } + if (mtime < attr->cat_mtime) + mtime = attr->cat_mtime; + + CDEBUG(D_VFSTRACE, DFID " updating i_size %llu\n", + PFID(&lli->lli_fid), attr->cat_size); + + i_size_write(inode, attr->cat_size); + + inode->i_blocks = attr->cat_blocks; + + LTIME_S(inode->i_mtime) = mtime; + LTIME_S(inode->i_atime) = atime; + LTIME_S(inode->i_ctime) = ctime; + +out_size_unlock: ll_inode_size_unlock(inode); return rc; @@ -1120,47 +1141,50 @@ ll_file_io_generic(const struct lu_env *env, struct vvp_io_args *args, struct cl_io *io; ssize_t result; + CDEBUG(D_VFSTRACE, "file: %s, type: %d ppos: %llu, count: %zd\n", + file->f_path.dentry->d_name.name, iot, *ppos, count); + restart: - io = ccc_env_thread_io(env); + io = vvp_env_thread_io(env); ll_io_init(io, file, iot == CIT_WRITE); if (cl_io_rw_init(env, io, iot, *ppos, count) == 0) { struct vvp_io *vio = vvp_env_io(env); - struct ccc_io *cio = ccc_env_io(env); int write_mutex_locked = 0; - cio->cui_fd = LUSTRE_FPRIVATE(file); - vio->cui_io_subtype = args->via_io_subtype; + vio->vui_fd = LUSTRE_FPRIVATE(file); + vio->vui_io_subtype = args->via_io_subtype; - switch (vio->cui_io_subtype) { + switch (vio->vui_io_subtype) { case IO_NORMAL: - cio->cui_iter = args->u.normal.via_iter; - cio->cui_iocb = args->u.normal.via_iocb; + vio->vui_iter = args->u.normal.via_iter; + vio->vui_iocb = args->u.normal.via_iocb; if ((iot == CIT_WRITE) && - !(cio->cui_fd->fd_flags & LL_FILE_GROUP_LOCKED)) { + !(vio->vui_fd->fd_flags & LL_FILE_GROUP_LOCKED)) { if (mutex_lock_interruptible(&lli-> lli_write_mutex)) { result = -ERESTARTSYS; goto out; } write_mutex_locked = 1; - } else if (iot == CIT_READ) { - down_read(&lli->lli_trunc_sem); } + down_read(&lli->lli_trunc_sem); break; case IO_SPLICE: - vio->u.splice.cui_pipe = args->u.splice.via_pipe; - vio->u.splice.cui_flags = args->u.splice.via_flags; + vio->u.splice.vui_pipe = args->u.splice.via_pipe; + vio->u.splice.vui_flags = args->u.splice.via_flags; break; default: - CERROR("Unknown IO type - %u\n", vio->cui_io_subtype); + CERROR("Unknown IO type - %u\n", vio->vui_io_subtype); LBUG(); } + ll_cl_add(file, env, io); result = cl_io_loop(env, io); + ll_cl_remove(file, env); + if (args->via_io_subtype == IO_NORMAL) + up_read(&lli->lli_trunc_sem); if (write_mutex_locked) mutex_unlock(&lli->lli_write_mutex); - else if (args->via_io_subtype == IO_NORMAL && iot == CIT_READ) - up_read(&lli->lli_trunc_sem); } else { /* cl_io_rw_init() handled IO */ result = io->ci_result; @@ -1197,6 +1221,7 @@ out: fd->fd_write_failed = true; } } + CDEBUG(D_VFSTRACE, "iot: %d, result: %zd\n", iot, result); return result; } @@ -1212,7 +1237,7 @@ static ssize_t ll_file_read_iter(struct kiocb *iocb, struct iov_iter *to) if (IS_ERR(env)) return PTR_ERR(env); - args = vvp_env_args(env, IO_NORMAL); + args = ll_env_args(env, IO_NORMAL); args->u.normal.via_iter = to; args->u.normal.via_iocb = iocb; @@ -1236,7 +1261,7 @@ static ssize_t ll_file_write_iter(struct kiocb *iocb, struct iov_iter *from) if (IS_ERR(env)) return PTR_ERR(env); - args = vvp_env_args(env, IO_NORMAL); + args = ll_env_args(env, IO_NORMAL); args->u.normal.via_iter = from; args->u.normal.via_iocb = iocb; @@ -1262,7 +1287,7 @@ static ssize_t ll_file_splice_read(struct file *in_file, loff_t *ppos, if (IS_ERR(env)) return PTR_ERR(env); - args = vvp_env_args(env, IO_SPLICE); + args = ll_env_args(env, IO_SPLICE); args->u.splice.via_pipe = pipe; args->u.splice.via_flags = flags; @@ -1354,7 +1379,8 @@ static int ll_lov_recreate_fid(struct inode *inode, unsigned long arg) } int ll_lov_setstripe_ea_info(struct inode *inode, struct dentry *dentry, - int flags, struct lov_user_md *lum, int lum_size) + __u64 flags, struct lov_user_md *lum, + int lum_size) { struct lov_stripe_md *lsm = NULL; struct lookup_intent oit = {.it_op = IT_OPEN, .it_flags = flags}; @@ -1363,8 +1389,8 @@ int ll_lov_setstripe_ea_info(struct inode *inode, struct dentry *dentry, lsm = ccc_inode_lsm_get(inode); if (lsm) { ccc_inode_lsm_put(inode, lsm); - CDEBUG(D_IOCTL, "stripe already exists for ino %lu\n", - inode->i_ino); + CDEBUG(D_IOCTL, "stripe already exists for inode "DFID"\n", + PFID(ll_inode2fid(inode))); rc = -EEXIST; goto out; } @@ -1373,7 +1399,7 @@ int ll_lov_setstripe_ea_info(struct inode *inode, struct dentry *dentry, rc = ll_intent_file_open(dentry, lum, lum_size, &oit); if (rc) goto out_unlock; - rc = oit.d.lustre.it_status; + rc = oit.it_status; if (rc < 0) goto out_req_free; @@ -1386,7 +1412,7 @@ out_unlock: out: return rc; out_req_free: - ptlrpc_req_finished((struct ptlrpc_request *) oit.d.lustre.it_data); + ptlrpc_req_finished((struct ptlrpc_request *)oit.it_request); goto out; } @@ -1478,7 +1504,7 @@ out: static int ll_lov_setea(struct inode *inode, struct file *file, unsigned long arg) { - int flags = MDS_OPEN_HAS_OBJS | FMODE_WRITE; + __u64 flags = MDS_OPEN_HAS_OBJS | FMODE_WRITE; struct lov_user_md *lump; int lum_size = sizeof(struct lov_user_md) + sizeof(struct lov_user_ost_data); @@ -1512,7 +1538,7 @@ static int ll_lov_setstripe(struct inode *inode, struct file *file, struct lov_user_md_v1 __user *lumv1p = (void __user *)arg; struct lov_user_md_v3 __user *lumv3p = (void __user *)arg; int lum_size, rc; - int flags = FMODE_WRITE; + __u64 flags = FMODE_WRITE; /* first try with v1 which is smaller than v3 */ lum_size = sizeof(struct lov_user_md_v1); @@ -1561,7 +1587,7 @@ ll_get_grouplock(struct inode *inode, struct file *file, unsigned long arg) { struct ll_inode_info *lli = ll_i2info(inode); struct ll_file_data *fd = LUSTRE_FPRIVATE(file); - struct ccc_grouplock grouplock; + struct ll_grouplock grouplock; int rc; if (arg == 0) { @@ -1575,14 +1601,14 @@ ll_get_grouplock(struct inode *inode, struct file *file, unsigned long arg) spin_lock(&lli->lli_lock); if (fd->fd_flags & LL_FILE_GROUP_LOCKED) { CWARN("group lock already existed with gid %lu\n", - fd->fd_grouplock.cg_gid); + fd->fd_grouplock.lg_gid); spin_unlock(&lli->lli_lock); return -EINVAL; } - LASSERT(!fd->fd_grouplock.cg_lock); + LASSERT(!fd->fd_grouplock.lg_lock); spin_unlock(&lli->lli_lock); - rc = cl_get_grouplock(cl_i2info(inode)->lli_clob, + rc = cl_get_grouplock(ll_i2info(inode)->lli_clob, arg, (file->f_flags & O_NONBLOCK), &grouplock); if (rc) return rc; @@ -1608,7 +1634,7 @@ static int ll_put_grouplock(struct inode *inode, struct file *file, { struct ll_inode_info *lli = ll_i2info(inode); struct ll_file_data *fd = LUSTRE_FPRIVATE(file); - struct ccc_grouplock grouplock; + struct ll_grouplock grouplock; spin_lock(&lli->lli_lock); if (!(fd->fd_flags & LL_FILE_GROUP_LOCKED)) { @@ -1616,11 +1642,11 @@ static int ll_put_grouplock(struct inode *inode, struct file *file, CWARN("no group lock held\n"); return -EINVAL; } - LASSERT(fd->fd_grouplock.cg_lock); + LASSERT(fd->fd_grouplock.lg_lock); - if (fd->fd_grouplock.cg_gid != arg) { + if (fd->fd_grouplock.lg_gid != arg) { CWARN("group lock %lu doesn't match current id %lu\n", - arg, fd->fd_grouplock.cg_gid); + arg, fd->fd_grouplock.lg_gid); spin_unlock(&lli->lli_lock); return -EINVAL; } @@ -1674,7 +1700,7 @@ int ll_release_openhandle(struct inode *inode, struct lookup_intent *it) out: /* this one is in place of ll_file_open */ if (it_disposition(it, DISP_ENQ_OPEN_REF)) { - ptlrpc_req_finished(it->d.lustre.it_data); + ptlrpc_req_finished(it->it_request); it_clear_disposition(it, DISP_ENQ_OPEN_REF); } return rc; @@ -1861,11 +1887,12 @@ error: * This value is computed using stripe object version on OST. * Version is computed using server side locking. * - * @param extent_lock Take extent lock. Not needed if a process is already - * holding the OST object group locks. + * @param sync if do sync on the OST side; + * 0: no sync + * LL_DV_RD_FLUSH: flush dirty pages, LCK_PR on OSTs + * LL_DV_WR_FLUSH: drop all caching pages, LCK_PW on OSTs */ -int ll_data_version(struct inode *inode, __u64 *data_version, - int extent_lock) +int ll_data_version(struct inode *inode, __u64 *data_version, int flags) { struct lov_stripe_md *lsm = NULL; struct ll_sb_info *sbi = ll_i2sbi(inode); @@ -1887,7 +1914,7 @@ int ll_data_version(struct inode *inode, __u64 *data_version, goto out; } - rc = ll_lsm_getattr(lsm, sbi->ll_dt_exp, obdo, 0, extent_lock); + rc = ll_lsm_getattr(lsm, sbi->ll_dt_exp, obdo, 0, flags); if (rc == 0) { if (!(obdo->o_valid & OBD_MD_FLDATAVERSION)) rc = -EOPNOTSUPP; @@ -1923,7 +1950,7 @@ int ll_hsm_release(struct inode *inode) } /* Grab latest data_version and [am]time values */ - rc = ll_data_version(inode, &data_version, 1); + rc = ll_data_version(inode, &data_version, LL_DV_WR_FLUSH); if (rc != 0) goto out; @@ -1933,7 +1960,7 @@ int ll_hsm_release(struct inode *inode) goto out; } - ll_merge_lvb(env, inode); + ll_merge_attr(env, inode); cl_env_nested_put(&nest, env); /* Release the file. @@ -2227,8 +2254,8 @@ ll_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg) struct ll_file_data *fd = LUSTRE_FPRIVATE(file); int flags, rc; - CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p),cmd=%x\n", inode->i_ino, - inode->i_generation, inode, cmd); + CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p),cmd=%x\n", + PFID(ll_inode2fid(inode)), inode, cmd); ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_IOCTL, 1); /* asm-ppc{,64} declares TCGETS, et. al. as type 't' not 'T' */ @@ -2331,9 +2358,8 @@ ll_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg) if (copy_from_user(&idv, (char __user *)arg, sizeof(idv))) return -EFAULT; - rc = ll_data_version(inode, &idv.idv_version, - !(idv.idv_flags & LL_DV_NOFLUSH)); - + idv.idv_flags &= LL_DV_RD_FLUSH | LL_DV_WR_FLUSH; + rc = ll_data_version(inode, &idv.idv_version, idv.idv_flags); if (rc == 0 && copy_to_user((char __user *)arg, &idv, sizeof(idv))) return -EFAULT; @@ -2499,7 +2525,7 @@ ll_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg) rc = och->och_flags & (FMODE_READ | FMODE_WRITE); unlock_res_and_lock(lock); - ldlm_lock_put(lock); + LDLM_LOCK_PUT(lock); } } mutex_unlock(&lli->lli_och_mutex); @@ -2537,9 +2563,8 @@ static loff_t ll_file_seek(struct file *file, loff_t offset, int origin) retval = offset + ((origin == SEEK_END) ? i_size_read(inode) : (origin == SEEK_CUR) ? file->f_pos : 0); - CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), to=%llu=%#llx(%d)\n", - inode->i_ino, inode->i_generation, inode, retval, retval, - origin); + CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p), to=%llu=%#llx(%d)\n", + PFID(ll_inode2fid(inode)), inode, retval, retval, origin); ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_LLSEEK, 1); if (origin == SEEK_END || origin == SEEK_HOLE || origin == SEEK_DATA) { @@ -2603,8 +2628,8 @@ int cl_sync_file_range(struct inode *inode, loff_t start, loff_t end, if (IS_ERR(env)) return PTR_ERR(env); - io = ccc_env_thread_io(env); - io->ci_obj = cl_i2info(inode)->lli_clob; + io = vvp_env_thread_io(env); + io->ci_obj = ll_i2info(inode)->lli_clob; io->ci_ignore_layout = ignore_layout; /* initialize parameters for sync */ @@ -2634,8 +2659,8 @@ int ll_fsync(struct file *file, loff_t start, loff_t end, int datasync) struct ptlrpc_request *req; int rc, err; - CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n", inode->i_ino, - inode->i_generation, inode); + CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p)\n", + PFID(ll_inode2fid(inode)), inode); ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_FSYNC, 1); rc = filemap_write_and_wait_range(inode->i_mapping, start, end); @@ -2693,8 +2718,8 @@ ll_file_flock(struct file *file, int cmd, struct file_lock *file_lock) int rc; int rc2 = 0; - CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu file_lock=%p\n", - inode->i_ino, file_lock); + CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID" file_lock=%p\n", + PFID(ll_inode2fid(inode)), file_lock); ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_FLOCK, 1); @@ -2777,9 +2802,9 @@ ll_file_flock(struct file *file, int cmd, struct file_lock *file_lock) if (IS_ERR(op_data)) return PTR_ERR(op_data); - CDEBUG(D_DLMTRACE, "inode=%lu, pid=%u, flags=%#llx, mode=%u, start=%llu, end=%llu\n", - inode->i_ino, flock.l_flock.pid, flags, einfo.ei_mode, - flock.l_flock.start, flock.l_flock.end); + CDEBUG(D_DLMTRACE, "inode="DFID", pid=%u, flags=%#llx, mode=%u, start=%llu, end=%llu\n", + PFID(ll_inode2fid(inode)), flock.l_flock.pid, flags, + einfo.ei_mode, flock.l_flock.start, flock.l_flock.end); rc = md_enqueue(sbi->ll_md_exp, &einfo, NULL, op_data, &lockh, &flock, 0, NULL /* req */, flags); @@ -2901,8 +2926,8 @@ static int __ll_inode_revalidate(struct dentry *dentry, __u64 ibits) struct obd_export *exp; int rc = 0; - CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p),name=%pd\n", - inode->i_ino, inode->i_generation, inode, dentry); + CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p),name=%pd\n", + PFID(ll_inode2fid(inode)), inode, dentry); exp = ll_i2mdexp(inode); @@ -2949,8 +2974,11 @@ static int __ll_inode_revalidate(struct dentry *dentry, __u64 ibits) * here to preserve get_cwd functionality on 2.6. * Bug 10503 */ - if (!d_inode(dentry)->i_nlink) + if (!d_inode(dentry)->i_nlink) { + spin_lock(&inode->i_lock); d_lustre_invalidate(dentry, 0); + spin_unlock(&inode->i_lock); + } ll_lookup_finish_locks(&oit, inode); } else if (!ll_have_md_lock(d_inode(dentry), &ibits, LCK_MINMODE)) { @@ -2998,9 +3026,9 @@ static int ll_inode_revalidate(struct dentry *dentry, __u64 ibits) /* if object isn't regular file, don't validate size */ if (!S_ISREG(inode->i_mode)) { - LTIME_S(inode->i_atime) = ll_i2info(inode)->lli_lvb.lvb_atime; - LTIME_S(inode->i_mtime) = ll_i2info(inode)->lli_lvb.lvb_mtime; - LTIME_S(inode->i_ctime) = ll_i2info(inode)->lli_lvb.lvb_ctime; + LTIME_S(inode->i_atime) = ll_i2info(inode)->lli_atime; + LTIME_S(inode->i_mtime) = ll_i2info(inode)->lli_mtime; + LTIME_S(inode->i_ctime) = ll_i2info(inode)->lli_ctime; } else { /* In case of restore, the MDT has the right size and has * already send it back without granting the layout lock, @@ -3101,6 +3129,9 @@ struct posix_acl *ll_get_acl(struct inode *inode, int type) spin_lock(&lli->lli_lock); /* VFS' acl_permission_check->check_acl will release the refcount */ acl = posix_acl_dup(lli->lli_posix_acl); +#ifdef CONFIG_FS_POSIX_ACL + forget_cached_acl(inode, type); +#endif spin_unlock(&lli->lli_lock); return acl; @@ -3124,11 +3155,8 @@ int ll_inode_permission(struct inode *inode, int mask) return rc; } - CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), inode mode %x mask %o\n", - inode->i_ino, inode->i_generation, inode, inode->i_mode, mask); - - if (ll_i2sbi(inode)->ll_flags & LL_SBI_RMT_CLIENT) - return lustre_check_remote_perm(inode, mask); + CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p), inode mode %x mask %o\n", + PFID(ll_inode2fid(inode)), inode, inode->i_mode, mask); ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_INODE_PERM, 1); rc = generic_permission(inode, mask); @@ -3335,10 +3363,10 @@ static int ll_layout_fetch(struct inode *inode, struct ldlm_lock *lock) int rc; CDEBUG(D_INODE, DFID" LVB_READY=%d l_lvb_data=%p l_lvb_len=%d\n", - PFID(ll_inode2fid(inode)), !!(lock->l_flags & LDLM_FL_LVB_READY), + PFID(ll_inode2fid(inode)), ldlm_is_lvb_ready(lock), lock->l_lvb_data, lock->l_lvb_len); - if (lock->l_lvb_data && (lock->l_flags & LDLM_FL_LVB_READY)) + if (lock->l_lvb_data && ldlm_is_lvb_ready(lock)) return 0; /* if layout lock was granted right away, the layout is returned @@ -3415,14 +3443,14 @@ static int ll_layout_lock_set(struct lustre_handle *lockh, enum ldlm_mode mode, LASSERT(lock); LASSERT(ldlm_has_layout(lock)); - LDLM_DEBUG(lock, "File %p/"DFID" being reconfigured: %d", - inode, PFID(&lli->lli_fid), reconf); + LDLM_DEBUG(lock, "File "DFID"(%p) being reconfigured: %d", + PFID(&lli->lli_fid), inode, reconf); /* in case this is a caching lock and reinstate with new inode */ md_set_lock_data(sbi->ll_md_exp, &lockh->cookie, inode, NULL); lock_res_and_lock(lock); - lvb_ready = !!(lock->l_flags & LDLM_FL_LVB_READY); + lvb_ready = ldlm_is_lvb_ready(lock); unlock_res_and_lock(lock); /* checking lvb_ready is racy but this is okay. The worst case is * that multi processes may configure the file on the same time. @@ -3487,9 +3515,9 @@ out: /* wait for IO to complete if it's still being used. */ if (wait_layout) { - CDEBUG(D_INODE, "%s: %p/" DFID " wait for layout reconf.\n", + CDEBUG(D_INODE, "%s: "DFID"(%p) wait for layout reconf\n", ll_get_fsname(inode->i_sb, NULL, 0), - inode, PFID(&lli->lli_fid)); + PFID(&lli->lli_fid), inode); memset(&conf, 0, sizeof(conf)); conf.coc_opc = OBJECT_CONF_WAIT; @@ -3498,7 +3526,8 @@ out: if (rc == 0) rc = -EAGAIN; - CDEBUG(D_INODE, "file: " DFID " waiting layout return: %d.\n", + CDEBUG(D_INODE, "%s: file="DFID" waiting layout return: %d.\n", + ll_get_fsname(inode->i_sb, NULL, 0), PFID(&lli->lli_fid), rc); } return rc; @@ -3571,19 +3600,19 @@ again: it.it_op = IT_LAYOUT; lockh.cookie = 0ULL; - LDLM_DEBUG_NOLOCK("%s: requeue layout lock for file %p/" DFID "", - ll_get_fsname(inode->i_sb, NULL, 0), inode, - PFID(&lli->lli_fid)); + LDLM_DEBUG_NOLOCK("%s: requeue layout lock for file "DFID"(%p)", + ll_get_fsname(inode->i_sb, NULL, 0), + PFID(&lli->lli_fid), inode); rc = md_enqueue(sbi->ll_md_exp, &einfo, &it, op_data, &lockh, NULL, 0, NULL, 0); - ptlrpc_req_finished(it.d.lustre.it_data); - it.d.lustre.it_data = NULL; + ptlrpc_req_finished(it.it_request); + it.it_request = NULL; ll_finish_md_op_data(op_data); - mode = it.d.lustre.it_lock_mode; - it.d.lustre.it_lock_mode = 0; + mode = it.it_lock_mode; + it.it_lock_mode = 0; ll_intent_drop_lock(&it); if (rc == 0) { @@ -3601,7 +3630,7 @@ again: /** * This function send a restore request to the MDT */ -int ll_layout_restore(struct inode *inode) +int ll_layout_restore(struct inode *inode, loff_t offset, __u64 length) { struct hsm_user_request *hur; int len, rc; @@ -3617,9 +3646,10 @@ int ll_layout_restore(struct inode *inode) hur->hur_request.hr_flags = 0; memcpy(&hur->hur_user_item[0].hui_fid, &ll_i2info(inode)->lli_fid, sizeof(hur->hur_user_item[0].hui_fid)); - hur->hur_user_item[0].hui_extent.length = -1; + hur->hur_user_item[0].hui_extent.offset = offset; + hur->hur_user_item[0].hui_extent.length = length; hur->hur_request.hr_itemcount = 1; - rc = obd_iocontrol(LL_IOC_HSM_REQUEST, cl_i2sbi(inode)->ll_md_exp, + rc = obd_iocontrol(LL_IOC_HSM_REQUEST, ll_i2sbi(inode)->ll_md_exp, len, hur, NULL); kfree(hur); return rc; diff --git a/drivers/staging/lustre/lustre/lclient/glimpse.c b/drivers/staging/lustre/lustre/llite/glimpse.c index c4e8a0878ac8..92004a05f9ee 100644 --- a/drivers/staging/lustre/lustre/lclient/glimpse.c +++ b/drivers/staging/lustre/lustre/llite/glimpse.c @@ -15,11 +15,7 @@ * * You should have received a copy of the GNU General Public License * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. + * http://www.gnu.org/licenses/gpl-2.0.html * * GPL HEADER END */ @@ -52,7 +48,6 @@ #include <linux/file.h> #include "../include/cl_object.h" -#include "../include/lclient.h" #include "../llite/llite_internal.h" static const struct cl_lock_descr whole_file = { @@ -70,14 +65,14 @@ static const struct cl_lock_descr whole_file = { blkcnt_t dirty_cnt(struct inode *inode) { blkcnt_t cnt = 0; - struct ccc_object *vob = cl_inode2ccc(inode); + struct vvp_object *vob = cl_inode2vvp(inode); void *results[1]; if (inode->i_mapping) cnt += radix_tree_gang_lookup_tag(&inode->i_mapping->page_tree, results, 0, 1, PAGECACHE_TAG_DIRTY); - if (cnt == 0 && atomic_read(&vob->cob_mmap_cnt) > 0) + if (cnt == 0 && atomic_read(&vob->vob_mmap_cnt) > 0) cnt = 1; return (cnt > 0) ? 1 : 0; @@ -86,17 +81,17 @@ blkcnt_t dirty_cnt(struct inode *inode) int cl_glimpse_lock(const struct lu_env *env, struct cl_io *io, struct inode *inode, struct cl_object *clob, int agl) { - struct cl_lock_descr *descr = &ccc_env_info(env)->cti_descr; - struct cl_inode_info *lli = cl_i2info(inode); + struct ll_inode_info *lli = ll_i2info(inode); const struct lu_fid *fid = lu_object_fid(&clob->co_lu); - struct ccc_io *cio = ccc_env_io(env); - struct cl_lock *lock; int result; result = 0; if (!(lli->lli_flags & LLIF_MDS_SIZE_LOCK)) { - CDEBUG(D_DLMTRACE, "Glimpsing inode "DFID"\n", PFID(fid)); + CDEBUG(D_DLMTRACE, "Glimpsing inode " DFID "\n", PFID(fid)); if (lli->lli_has_smd) { + struct cl_lock *lock = vvp_env_lock(env); + struct cl_lock_descr *descr = &lock->cll_descr; + /* NOTE: this looks like DLM lock request, but it may * not be one. Due to CEF_ASYNC flag (translated * to LDLM_FL_HAS_INTENT by osc), this is @@ -113,11 +108,10 @@ int cl_glimpse_lock(const struct lu_env *env, struct cl_io *io, */ *descr = whole_file; descr->cld_obj = clob; - descr->cld_mode = CLM_PHANTOM; + descr->cld_mode = CLM_READ; descr->cld_enq_flags = CEF_ASYNC | CEF_MUST; if (agl) descr->cld_enq_flags |= CEF_AGL; - cio->cui_glimpse = 1; /* * CEF_ASYNC is used because glimpse sub-locks cannot * deadlock (because they never conflict with other @@ -126,21 +120,13 @@ int cl_glimpse_lock(const struct lu_env *env, struct cl_io *io, * CEF_MUST protects glimpse lock from conversion into * a lockless mode. */ - lock = cl_lock_request(env, io, descr, "glimpse", - current); - cio->cui_glimpse = 0; - - if (!lock) - return 0; + result = cl_lock_request(env, io, lock); + if (result < 0) + return result; - if (IS_ERR(lock)) - return PTR_ERR(lock); - - LASSERT(agl == 0); - result = cl_wait(env, lock); - if (result == 0) { - cl_merge_lvb(env, inode); - if (cl_isize_read(inode) > 0 && + if (!agl) { + ll_merge_attr(env, inode); + if (i_size_read(inode) > 0 && inode->i_blocks == 0) { /* * LU-417: Add dirty pages block count @@ -150,12 +136,11 @@ int cl_glimpse_lock(const struct lu_env *env, struct cl_io *io, */ inode->i_blocks = dirty_cnt(inode); } - cl_unuse(env, lock); } - cl_lock_release(env, lock, "glimpse", current); + cl_lock_release(env, lock); } else { CDEBUG(D_DLMTRACE, "No objects for inode\n"); - cl_merge_lvb(env, inode); + ll_merge_attr(env, inode); } } @@ -167,22 +152,24 @@ static int cl_io_get(struct inode *inode, struct lu_env **envout, { struct lu_env *env; struct cl_io *io; - struct cl_inode_info *lli = cl_i2info(inode); + struct ll_inode_info *lli = ll_i2info(inode); struct cl_object *clob = lli->lli_clob; int result; - if (S_ISREG(cl_inode_mode(inode))) { + if (S_ISREG(inode->i_mode)) { env = cl_env_get(refcheck); if (!IS_ERR(env)) { - io = ccc_env_thread_io(env); + io = vvp_env_thread_io(env); io->ci_obj = clob; *envout = env; *ioout = io; result = 1; - } else + } else { result = PTR_ERR(env); - } else + } + } else { result = 0; + } return result; } @@ -231,14 +218,11 @@ int cl_local_size(struct inode *inode) { struct lu_env *env = NULL; struct cl_io *io = NULL; - struct ccc_thread_info *cti; struct cl_object *clob; - struct cl_lock_descr *descr; - struct cl_lock *lock; int result; int refcheck; - if (!cl_i2info(inode)->lli_has_smd) + if (!ll_i2info(inode)->lli_has_smd) return 0; result = cl_io_get(inode, &env, &io, &refcheck); @@ -247,22 +231,19 @@ int cl_local_size(struct inode *inode) clob = io->ci_obj; result = cl_io_init(env, io, CIT_MISC, clob); - if (result > 0) + if (result > 0) { result = io->ci_result; - else if (result == 0) { - cti = ccc_env_info(env); - descr = &cti->cti_descr; - - *descr = whole_file; - descr->cld_obj = clob; - lock = cl_lock_peek(env, io, descr, "localsize", current); - if (lock) { - cl_merge_lvb(env, inode); - cl_unuse(env, lock); - cl_lock_release(env, lock, "localsize", current); - result = 0; - } else - result = -ENODATA; + } else if (result == 0) { + struct cl_lock *lock = vvp_env_lock(env); + + lock->cll_descr = whole_file; + lock->cll_descr.cld_enq_flags = CEF_PEEK; + lock->cll_descr.cld_obj = clob; + result = cl_lock_request(env, io, lock); + if (result == 0) { + ll_merge_attr(env, inode); + cl_lock_release(env, lock); + } } cl_io_fini(env, io); cl_env_put(env, &refcheck); diff --git a/drivers/staging/lustre/lustre/llite/lcommon_cl.c b/drivers/staging/lustre/lustre/llite/lcommon_cl.c new file mode 100644 index 000000000000..396e4e4f0715 --- /dev/null +++ b/drivers/staging/lustre/lustre/llite/lcommon_cl.c @@ -0,0 +1,323 @@ +/* + * GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see + * http://www.gnu.org/licenses/gpl-2.0.html + * + * GPL HEADER END + */ +/* + * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. + * Use is subject to license terms. + * + * Copyright (c) 2011, 2015, Intel Corporation. + */ +/* + * This file is part of Lustre, http://www.lustre.org/ + * Lustre is a trademark of Sun Microsystems, Inc. + * + * cl code shared between vvp and liblustre (and other Lustre clients in the + * future). + * + * Author: Nikita Danilov <nikita.danilov@sun.com> + */ + +#define DEBUG_SUBSYSTEM S_LLITE + +#include "../../include/linux/libcfs/libcfs.h" +# include <linux/fs.h> +# include <linux/sched.h> +# include <linux/mm.h> +# include <linux/quotaops.h> +# include <linux/highmem.h> +# include <linux/pagemap.h> +# include <linux/rbtree.h> + +#include "../include/obd.h" +#include "../include/obd_support.h" +#include "../include/lustre_fid.h" +#include "../include/lustre_lite.h" +#include "../include/lustre_dlm.h" +#include "../include/lustre_ver.h" +#include "../include/lustre_mdc.h" +#include "../include/cl_object.h" + +#include "../llite/llite_internal.h" + +/* + * ccc_ prefix stands for "Common Client Code". + */ + +/***************************************************************************** + * + * Vvp device and device type functions. + * + */ + +/** + * An `emergency' environment used by cl_inode_fini() when cl_env_get() + * fails. Access to this environment is serialized by cl_inode_fini_guard + * mutex. + */ +struct lu_env *cl_inode_fini_env; +int cl_inode_fini_refcheck; + +/** + * A mutex serializing calls to slp_inode_fini() under extreme memory + * pressure, when environments cannot be allocated. + */ +static DEFINE_MUTEX(cl_inode_fini_guard); + +int cl_setattr_ost(struct inode *inode, const struct iattr *attr) +{ + struct lu_env *env; + struct cl_io *io; + int result; + int refcheck; + + env = cl_env_get(&refcheck); + if (IS_ERR(env)) + return PTR_ERR(env); + + io = vvp_env_thread_io(env); + io->ci_obj = ll_i2info(inode)->lli_clob; + + io->u.ci_setattr.sa_attr.lvb_atime = LTIME_S(attr->ia_atime); + io->u.ci_setattr.sa_attr.lvb_mtime = LTIME_S(attr->ia_mtime); + io->u.ci_setattr.sa_attr.lvb_ctime = LTIME_S(attr->ia_ctime); + io->u.ci_setattr.sa_attr.lvb_size = attr->ia_size; + io->u.ci_setattr.sa_valid = attr->ia_valid; + +again: + if (cl_io_init(env, io, CIT_SETATTR, io->ci_obj) == 0) { + struct vvp_io *vio = vvp_env_io(env); + + if (attr->ia_valid & ATTR_FILE) + /* populate the file descriptor for ftruncate to honor + * group lock - see LU-787 + */ + vio->vui_fd = LUSTRE_FPRIVATE(attr->ia_file); + + result = cl_io_loop(env, io); + } else { + result = io->ci_result; + } + cl_io_fini(env, io); + if (unlikely(io->ci_need_restart)) + goto again; + /* HSM import case: file is released, cannot be restored + * no need to fail except if restore registration failed + * with -ENODATA + */ + if (result == -ENODATA && io->ci_restore_needed && + io->ci_result != -ENODATA) + result = 0; + cl_env_put(env, &refcheck); + return result; +} + +/** + * Initialize or update CLIO structures for regular files when new + * meta-data arrives from the server. + * + * \param inode regular file inode + * \param md new file metadata from MDS + * - allocates cl_object if necessary, + * - updated layout, if object was already here. + */ +int cl_file_inode_init(struct inode *inode, struct lustre_md *md) +{ + struct lu_env *env; + struct ll_inode_info *lli; + struct cl_object *clob; + struct lu_site *site; + struct lu_fid *fid; + struct cl_object_conf conf = { + .coc_inode = inode, + .u = { + .coc_md = md + } + }; + int result = 0; + int refcheck; + + LASSERT(md->body->valid & OBD_MD_FLID); + LASSERT(S_ISREG(inode->i_mode)); + + env = cl_env_get(&refcheck); + if (IS_ERR(env)) + return PTR_ERR(env); + + site = ll_i2sbi(inode)->ll_site; + lli = ll_i2info(inode); + fid = &lli->lli_fid; + LASSERT(fid_is_sane(fid)); + + if (!lli->lli_clob) { + /* clob is slave of inode, empty lli_clob means for new inode, + * there is no clob in cache with the given fid, so it is + * unnecessary to perform lookup-alloc-lookup-insert, just + * alloc and insert directly. + */ + LASSERT(inode->i_state & I_NEW); + conf.coc_lu.loc_flags = LOC_F_NEW; + clob = cl_object_find(env, lu2cl_dev(site->ls_top_dev), + fid, &conf); + if (!IS_ERR(clob)) { + /* + * No locking is necessary, as new inode is + * locked by I_NEW bit. + */ + lli->lli_clob = clob; + lli->lli_has_smd = lsm_has_objects(md->lsm); + lu_object_ref_add(&clob->co_lu, "inode", inode); + } else { + result = PTR_ERR(clob); + } + } else { + result = cl_conf_set(env, lli->lli_clob, &conf); + } + + cl_env_put(env, &refcheck); + + if (result != 0) + CERROR("Failure to initialize cl object " DFID ": %d\n", + PFID(fid), result); + return result; +} + +/** + * Wait for others drop their references of the object at first, then we drop + * the last one, which will lead to the object be destroyed immediately. + * Must be called after cl_object_kill() against this object. + * + * The reason we want to do this is: destroying top object will wait for sub + * objects being destroyed first, so we can't let bottom layer (e.g. from ASTs) + * to initiate top object destroying which may deadlock. See bz22520. + */ +static void cl_object_put_last(struct lu_env *env, struct cl_object *obj) +{ + struct lu_object_header *header = obj->co_lu.lo_header; + wait_queue_t waiter; + + if (unlikely(atomic_read(&header->loh_ref) != 1)) { + struct lu_site *site = obj->co_lu.lo_dev->ld_site; + struct lu_site_bkt_data *bkt; + + bkt = lu_site_bkt_from_fid(site, &header->loh_fid); + + init_waitqueue_entry(&waiter, current); + add_wait_queue(&bkt->lsb_marche_funebre, &waiter); + + while (1) { + set_current_state(TASK_UNINTERRUPTIBLE); + if (atomic_read(&header->loh_ref) == 1) + break; + schedule(); + } + + set_current_state(TASK_RUNNING); + remove_wait_queue(&bkt->lsb_marche_funebre, &waiter); + } + + cl_object_put(env, obj); +} + +void cl_inode_fini(struct inode *inode) +{ + struct lu_env *env; + struct ll_inode_info *lli = ll_i2info(inode); + struct cl_object *clob = lli->lli_clob; + int refcheck; + int emergency; + + if (clob) { + void *cookie; + + cookie = cl_env_reenter(); + env = cl_env_get(&refcheck); + emergency = IS_ERR(env); + if (emergency) { + mutex_lock(&cl_inode_fini_guard); + LASSERT(cl_inode_fini_env); + cl_env_implant(cl_inode_fini_env, &refcheck); + env = cl_inode_fini_env; + } + /* + * cl_object cache is a slave to inode cache (which, in turn + * is a slave to dentry cache), don't keep cl_object in memory + * when its master is evicted. + */ + cl_object_kill(env, clob); + lu_object_ref_del(&clob->co_lu, "inode", inode); + cl_object_put_last(env, clob); + lli->lli_clob = NULL; + if (emergency) { + cl_env_unplant(cl_inode_fini_env, &refcheck); + mutex_unlock(&cl_inode_fini_guard); + } else { + cl_env_put(env, &refcheck); + } + cl_env_reexit(cookie); + } +} + +/** + * build inode number from passed @fid + */ +__u64 cl_fid_build_ino(const struct lu_fid *fid, int api32) +{ + if (BITS_PER_LONG == 32 || api32) + return fid_flatten32(fid); + else + return fid_flatten(fid); +} + +/** + * build inode generation from passed @fid. If our FID overflows the 32-bit + * inode number then return a non-zero generation to distinguish them. + */ +__u32 cl_fid_build_gen(const struct lu_fid *fid) +{ + __u32 gen; + + if (fid_is_igif(fid)) { + gen = lu_igif_gen(fid); + return gen; + } + + gen = fid_flatten(fid) >> 32; + return gen; +} + +/* lsm is unreliable after hsm implementation as layout can be changed at + * any time. This is only to support old, non-clio-ized interfaces. It will + * cause deadlock if clio operations are called with this extra layout refcount + * because in case the layout changed during the IO, ll_layout_refresh() will + * have to wait for the refcount to become zero to destroy the older layout. + * + * Notice that the lsm returned by this function may not be valid unless called + * inside layout lock - MDS_INODELOCK_LAYOUT. + */ +struct lov_stripe_md *ccc_inode_lsm_get(struct inode *inode) +{ + return lov_lsm_get(ll_i2info(inode)->lli_clob); +} + +inline void ccc_inode_lsm_put(struct inode *inode, struct lov_stripe_md *lsm) +{ + lov_lsm_put(ll_i2info(inode)->lli_clob, lsm); +} diff --git a/drivers/staging/lustre/lustre/lclient/lcommon_misc.c b/drivers/staging/lustre/lustre/llite/lcommon_misc.c index d80bcedd78d1..f6be105eeef7 100644 --- a/drivers/staging/lustre/lustre/lclient/lcommon_misc.c +++ b/drivers/staging/lustre/lustre/llite/lcommon_misc.c @@ -15,11 +15,7 @@ * * You should have received a copy of the GNU General Public License * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. + * http://www.gnu.org/licenses/gpl-2.0.html * * GPL HEADER END */ @@ -41,9 +37,9 @@ #include "../include/obd_support.h" #include "../include/obd.h" #include "../include/cl_object.h" -#include "../include/lclient.h" #include "../include/lustre_lite.h" +#include "llite_internal.h" /* Initialize the default and maximum LOV EA and cookie sizes. This allows * us to make MDS RPCs with large enough reply buffers to hold the @@ -100,7 +96,8 @@ int cl_ocd_update(struct obd_device *host, __u64 flags; int result; - if (!strcmp(watched->obd_type->typ_name, LUSTRE_OSC_NAME)) { + if (!strcmp(watched->obd_type->typ_name, LUSTRE_OSC_NAME) && + watched->obd_set_up && !watched->obd_stopping) { cli = &watched->u.cli; lco = owner; flags = cli->cl_import->imp_connect_data.ocd_connect_flags; @@ -115,9 +112,10 @@ int cl_ocd_update(struct obd_device *host, mutex_unlock(&lco->lco_lock); result = 0; } else { - CERROR("unexpected notification from %s %s!\n", + CERROR("unexpected notification from %s %s (setup:%d,stopping:%d)!\n", watched->obd_type->typ_name, - watched->obd_name); + watched->obd_name, watched->obd_set_up, + watched->obd_stopping); result = -EINVAL; } return result; @@ -126,7 +124,7 @@ int cl_ocd_update(struct obd_device *host, #define GROUPLOCK_SCOPE "grouplock" int cl_get_grouplock(struct cl_object *obj, unsigned long gid, int nonblock, - struct ccc_grouplock *cg) + struct ll_grouplock *cg) { struct lu_env *env; struct cl_io *io; @@ -140,20 +138,22 @@ int cl_get_grouplock(struct cl_object *obj, unsigned long gid, int nonblock, if (IS_ERR(env)) return PTR_ERR(env); - io = ccc_env_thread_io(env); + io = vvp_env_thread_io(env); io->ci_obj = obj; io->ci_ignore_layout = 1; rc = cl_io_init(env, io, CIT_MISC, io->ci_obj); - if (rc) { + if (rc != 0) { + cl_io_fini(env, io); + cl_env_put(env, &refcheck); /* Does not make sense to take GL for released layout */ if (rc > 0) rc = -ENOTSUPP; - cl_env_put(env, &refcheck); return rc; } - descr = &ccc_env_info(env)->cti_descr; + lock = vvp_env_lock(env); + descr = &lock->cll_descr; descr->cld_obj = obj; descr->cld_start = 0; descr->cld_end = CL_PAGE_EOF; @@ -163,38 +163,37 @@ int cl_get_grouplock(struct cl_object *obj, unsigned long gid, int nonblock, enqflags = CEF_MUST | (nonblock ? CEF_NONBLOCK : 0); descr->cld_enq_flags = enqflags; - lock = cl_lock_request(env, io, descr, GROUPLOCK_SCOPE, current); - if (IS_ERR(lock)) { + rc = cl_lock_request(env, io, lock); + if (rc < 0) { cl_io_fini(env, io); cl_env_put(env, &refcheck); - return PTR_ERR(lock); + return rc; } - cg->cg_env = cl_env_get(&refcheck); - cg->cg_io = io; - cg->cg_lock = lock; - cg->cg_gid = gid; - LASSERT(cg->cg_env == env); + cg->lg_env = cl_env_get(&refcheck); + cg->lg_io = io; + cg->lg_lock = lock; + cg->lg_gid = gid; + LASSERT(cg->lg_env == env); cl_env_unplant(env, &refcheck); return 0; } -void cl_put_grouplock(struct ccc_grouplock *cg) +void cl_put_grouplock(struct ll_grouplock *cg) { - struct lu_env *env = cg->cg_env; - struct cl_io *io = cg->cg_io; - struct cl_lock *lock = cg->cg_lock; + struct lu_env *env = cg->lg_env; + struct cl_io *io = cg->lg_io; + struct cl_lock *lock = cg->lg_lock; int refcheck; - LASSERT(cg->cg_env); - LASSERT(cg->cg_gid); + LASSERT(cg->lg_env); + LASSERT(cg->lg_gid); cl_env_implant(env, &refcheck); cl_env_put(env, &refcheck); - cl_unuse(env, lock); - cl_lock_release(env, lock, GROUPLOCK_SCOPE, current); + cl_lock_release(env, lock); cl_io_fini(env, io); cl_env_put(env, NULL); } diff --git a/drivers/staging/lustre/lustre/llite/llite_close.c b/drivers/staging/lustre/lustre/llite/llite_close.c index a55ac4dccd90..2326b40a0870 100644 --- a/drivers/staging/lustre/lustre/llite/llite_close.c +++ b/drivers/staging/lustre/lustre/llite/llite_close.c @@ -15,11 +15,7 @@ * * You should have received a copy of the GNU General Public License * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. + * http://www.gnu.org/licenses/gpl-2.0.html * * GPL HEADER END */ @@ -46,31 +42,31 @@ #include "llite_internal.h" /** records that a write is in flight */ -void vvp_write_pending(struct ccc_object *club, struct ccc_page *page) +void vvp_write_pending(struct vvp_object *club, struct vvp_page *page) { - struct ll_inode_info *lli = ll_i2info(club->cob_inode); + struct ll_inode_info *lli = ll_i2info(club->vob_inode); spin_lock(&lli->lli_lock); lli->lli_flags |= LLIF_SOM_DIRTY; - if (page && list_empty(&page->cpg_pending_linkage)) - list_add(&page->cpg_pending_linkage, &club->cob_pending_list); + if (page && list_empty(&page->vpg_pending_linkage)) + list_add(&page->vpg_pending_linkage, &club->vob_pending_list); spin_unlock(&lli->lli_lock); } /** records that a write has completed */ -void vvp_write_complete(struct ccc_object *club, struct ccc_page *page) +void vvp_write_complete(struct vvp_object *club, struct vvp_page *page) { - struct ll_inode_info *lli = ll_i2info(club->cob_inode); + struct ll_inode_info *lli = ll_i2info(club->vob_inode); int rc = 0; spin_lock(&lli->lli_lock); - if (page && !list_empty(&page->cpg_pending_linkage)) { - list_del_init(&page->cpg_pending_linkage); + if (page && !list_empty(&page->vpg_pending_linkage)) { + list_del_init(&page->vpg_pending_linkage); rc = 1; } spin_unlock(&lli->lli_lock); if (rc) - ll_queue_done_writing(club->cob_inode, 0); + ll_queue_done_writing(club->vob_inode, 0); } /** Queues DONE_WRITING if @@ -80,25 +76,25 @@ void vvp_write_complete(struct ccc_object *club, struct ccc_page *page) void ll_queue_done_writing(struct inode *inode, unsigned long flags) { struct ll_inode_info *lli = ll_i2info(inode); - struct ccc_object *club = cl2ccc(ll_i2info(inode)->lli_clob); + struct vvp_object *club = cl2vvp(ll_i2info(inode)->lli_clob); spin_lock(&lli->lli_lock); lli->lli_flags |= flags; if ((lli->lli_flags & LLIF_DONE_WRITING) && - list_empty(&club->cob_pending_list)) { + list_empty(&club->vob_pending_list)) { struct ll_close_queue *lcq = ll_i2sbi(inode)->ll_lcq; if (lli->lli_flags & LLIF_MDS_SIZE_LOCK) - CWARN("ino %lu/%u(flags %u) som valid it just after recovery\n", - inode->i_ino, inode->i_generation, - lli->lli_flags); + CWARN("%s: file "DFID"(flags %u) Size-on-MDS valid, done writing allowed and no diry pages\n", + ll_get_fsname(inode->i_sb, NULL, 0), + PFID(ll_inode2fid(inode)), lli->lli_flags); /* DONE_WRITING is allowed and inode has no dirty page. */ spin_lock(&lcq->lcq_lock); LASSERT(list_empty(&lli->lli_close_list)); - CDEBUG(D_INODE, "adding inode %lu/%u to close list\n", - inode->i_ino, inode->i_generation); + CDEBUG(D_INODE, "adding inode "DFID" to close list\n", + PFID(ll_inode2fid(inode))); list_add_tail(&lli->lli_close_list, &lcq->lcq_head); /* Avoid a concurrent insertion into the close thread queue: @@ -124,9 +120,9 @@ void ll_done_writing_attr(struct inode *inode, struct md_op_data *op_data) op_data->op_flags |= MF_SOM_CHANGE; /* Check if Size-on-MDS attributes are valid. */ if (lli->lli_flags & LLIF_MDS_SIZE_LOCK) - CERROR("ino %lu/%u(flags %u) som valid it just after recovery\n", - inode->i_ino, inode->i_generation, - lli->lli_flags); + CERROR("%s: inode "DFID"(flags %u) MDS holds lock on Size-on-MDS attributes\n", + ll_get_fsname(inode->i_sb, NULL, 0), + PFID(ll_inode2fid(inode)), lli->lli_flags); if (!cl_local_size(inode)) { /* Send Size-on-MDS Attributes if valid. */ @@ -140,10 +136,10 @@ void ll_ioepoch_close(struct inode *inode, struct md_op_data *op_data, struct obd_client_handle **och, unsigned long flags) { struct ll_inode_info *lli = ll_i2info(inode); - struct ccc_object *club = cl2ccc(ll_i2info(inode)->lli_clob); + struct vvp_object *club = cl2vvp(ll_i2info(inode)->lli_clob); spin_lock(&lli->lli_lock); - if (!(list_empty(&club->cob_pending_list))) { + if (!(list_empty(&club->vob_pending_list))) { if (!(lli->lli_flags & LLIF_EPOCH_PENDING)) { LASSERT(*och); LASSERT(!lli->lli_pending_och); @@ -198,7 +194,7 @@ void ll_ioepoch_close(struct inode *inode, struct md_op_data *op_data, } } - LASSERT(list_empty(&club->cob_pending_list)); + LASSERT(list_empty(&club->vob_pending_list)); lli->lli_flags &= ~LLIF_SOM_DIRTY; spin_unlock(&lli->lli_lock); ll_done_writing_attr(inode, op_data); @@ -221,9 +217,9 @@ int ll_som_update(struct inode *inode, struct md_op_data *op_data) LASSERT(op_data); if (lli->lli_flags & LLIF_MDS_SIZE_LOCK) - CERROR("ino %lu/%u(flags %u) som valid it just after recovery\n", - inode->i_ino, inode->i_generation, - lli->lli_flags); + CERROR("%s: inode "DFID"(flags %u) MDS holds lock on Size-on-MDS attributes\n", + ll_get_fsname(inode->i_sb, NULL, 0), + PFID(ll_inode2fid(inode)), lli->lli_flags); oa = kmem_cache_zalloc(obdo_cachep, GFP_NOFS); if (!oa) { @@ -241,9 +237,9 @@ int ll_som_update(struct inode *inode, struct md_op_data *op_data) if (rc) { oa->o_valid = 0; if (rc != -ENOENT) - CERROR("inode_getattr failed (%d): unable to send a Size-on-MDS attribute update for inode %lu/%u\n", - rc, inode->i_ino, - inode->i_generation); + CERROR("%s: inode_getattr failed - unable to send a Size-on-MDS attribute update for inode "DFID": rc = %d\n", + ll_get_fsname(inode->i_sb, NULL, 0), + PFID(ll_inode2fid(inode)), rc); } else { CDEBUG(D_INODE, "Size-on-MDS update on "DFID"\n", PFID(&lli->lli_fid)); @@ -302,9 +298,11 @@ static void ll_done_writing(struct inode *inode) * OSTs and send setattr to back to MDS. */ rc = ll_som_update(inode, op_data); - else if (rc) - CERROR("inode %lu mdc done_writing failed: rc = %d\n", - inode->i_ino, rc); + else if (rc) { + CERROR("%s: inode "DFID" mdc done_writing failed: rc = %d\n", + ll_get_fsname(inode->i_sb, NULL, 0), + PFID(ll_inode2fid(inode)), rc); + } out: ll_finish_md_op_data(op_data); if (och) { @@ -323,8 +321,9 @@ static struct ll_inode_info *ll_close_next_lli(struct ll_close_queue *lcq) lli = list_entry(lcq->lcq_head.next, struct ll_inode_info, lli_close_list); list_del_init(&lli->lli_close_list); - } else if (atomic_read(&lcq->lcq_stop)) + } else if (atomic_read(&lcq->lcq_stop)) { lli = ERR_PTR(-EALREADY); + } spin_unlock(&lcq->lcq_lock); return lli; @@ -348,8 +347,8 @@ static int ll_close_thread(void *arg) break; inode = ll_info2i(lli); - CDEBUG(D_INFO, "done_writing for inode %lu/%u\n", - inode->i_ino, inode->i_generation); + CDEBUG(D_INFO, "done_writing for inode "DFID"\n", + PFID(ll_inode2fid(inode))); ll_done_writing(inode); iput(inode); } diff --git a/drivers/staging/lustre/lustre/llite/llite_internal.h b/drivers/staging/lustre/lustre/llite/llite_internal.h index e3c0f1dd4d31..4d6d589a1677 100644 --- a/drivers/staging/lustre/lustre/llite/llite_internal.h +++ b/drivers/staging/lustre/lustre/llite/llite_internal.h @@ -15,11 +15,7 @@ * * You should have received a copy of the GNU General Public License * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. + * http://www.gnu.org/licenses/gpl-2.0.html * * GPL HEADER END */ @@ -43,11 +39,11 @@ /* for struct cl_lock_descr and struct cl_io */ #include "../include/cl_object.h" -#include "../include/lclient.h" #include "../include/lustre_mdc.h" #include "../include/lustre_intent.h" #include <linux/compat.h> #include <linux/posix_acl_xattr.h> +#include "vvp_internal.h" #ifndef FMODE_EXEC #define FMODE_EXEC 0 @@ -68,6 +64,7 @@ struct ll_dentry_data { struct lookup_intent *lld_it; unsigned int lld_sa_generation; unsigned int lld_invalid:1; + unsigned int lld_nfs_dentry:1; struct rcu_head lld_rcu_head; }; @@ -76,9 +73,6 @@ struct ll_dentry_data { #define LLI_INODE_MAGIC 0x111d0de5 #define LLI_INODE_DEAD 0xdeadd00d -/* remote client permission cache */ -#define REMOTE_PERM_HASHSIZE 16 - struct ll_getname_data { struct dir_context ctx; char *lgd_name; /* points to a buffer with NAME_MAX+1 size */ @@ -86,17 +80,11 @@ struct ll_getname_data { int lgd_found; /* inode matched? */ }; -/* llite setxid/access permission for user on remote client */ -struct ll_remote_perm { - struct hlist_node lrp_list; - uid_t lrp_uid; - gid_t lrp_gid; - uid_t lrp_fsuid; - gid_t lrp_fsgid; - int lrp_access_perm; /* MAY_READ/WRITE/EXEC, this - * is access permission with - * lrp_fsuid/lrp_fsgid. - */ +struct ll_grouplock { + struct lu_env *lg_env; + struct cl_io *lg_io; + struct cl_lock *lg_lock; + unsigned long lg_gid; }; enum lli_flags { @@ -126,9 +114,6 @@ struct ll_inode_info { spinlock_t lli_lock; struct posix_acl *lli_posix_acl; - struct hlist_head *lli_remote_perms; - struct mutex lli_rmtperm_mutex; - /* identifying fields for both metadata and data stacks. */ struct lu_fid lli_fid; /* Parent fid for accessing default stripe data on parent directory @@ -138,8 +123,6 @@ struct ll_inode_info { struct list_head lli_close_list; - unsigned long lli_rmtperm_time; - /* handle is to be sent to MDS later on done_writing and setattr. * Open handle data are needed for the recovery to reconstruct * the inode state on the MDS. XXX: recovery is not ready yet. @@ -161,7 +144,9 @@ struct ll_inode_info { struct inode lli_vfs_inode; /* the most recent timestamps obtained from mds */ - struct ost_lvb lli_lvb; + s64 lli_atime; + s64 lli_mtime; + s64 lli_ctime; spinlock_t lli_agl_lock; /* Try to make the d::member and f::member are aligned. Before using @@ -328,6 +313,7 @@ enum ra_stat { RA_STAT_EOF, RA_STAT_MAX_IN_FLIGHT, RA_STAT_WRONG_GRAB_PAGE, + RA_STAT_FAILED_REACH_END, _NR_RA_STAT, }; @@ -401,7 +387,7 @@ enum stats_track_type { #define LL_SBI_FLOCK 0x04 #define LL_SBI_USER_XATTR 0x08 /* support user xattr */ #define LL_SBI_ACL 0x10 /* support ACL */ -#define LL_SBI_RMT_CLIENT 0x40 /* remote client */ +/* LL_SBI_RMT_CLIENT 0x40 remote client */ #define LL_SBI_MDS_CAPA 0x80 /* support mds capa, obsolete */ #define LL_SBI_OSS_CAPA 0x100 /* support oss capa, obsolete */ #define LL_SBI_LOCALFLOCK 0x200 /* Local flocks support by kernel */ @@ -423,7 +409,7 @@ enum stats_track_type { "xattr", \ "acl", \ "???", \ - "rmt_client", \ + "???", \ "mds_capa", \ "oss_capa", \ "flock", \ @@ -439,26 +425,6 @@ enum stats_track_type { "xattr", \ } -#define RCE_HASHES 32 - -struct rmtacl_ctl_entry { - struct list_head rce_list; - pid_t rce_key; /* hash key */ - int rce_ops; /* acl operation type */ -}; - -struct rmtacl_ctl_table { - spinlock_t rct_lock; - struct list_head rct_entries[RCE_HASHES]; -}; - -#define EE_HASHES 32 - -struct eacl_table { - spinlock_t et_lock; - struct list_head et_entries[EE_HASHES]; -}; - struct ll_sb_info { /* this protects pglist and ra_info. It isn't safe to * grab from interrupt contexts @@ -481,7 +447,13 @@ struct ll_sb_info { struct lprocfs_stats *ll_stats; /* lprocfs stats counter */ - struct cl_client_cache ll_cache; + /* + * Used to track "unstable" pages on a client, and maintain a + * LRU list of clean pages. An "unstable" page is defined as + * any page which is sent to a server as part of a bulk request, + * but is uncommitted to stable storage. + */ + struct cl_client_cache *ll_cache; struct lprocfs_stats *ll_ra_stats; @@ -517,21 +489,12 @@ struct ll_sb_info { dev_t ll_sdev_orig; /* save s_dev before assign for * clustered nfs */ - struct rmtacl_ctl_table ll_rct; - struct eacl_table ll_et; __kernel_fsid_t ll_fsid; struct kobject ll_kobj; /* sysfs object */ struct super_block *ll_sb; /* struct super_block (for sysfs code)*/ struct completion ll_kobj_unregister; }; -struct ll_ra_read { - pgoff_t lrr_start; - pgoff_t lrr_count; - struct task_struct *lrr_reader; - struct list_head lrr_linkage; -}; - /* * per file-descriptor read-ahead data. */ @@ -590,12 +553,6 @@ struct ll_readahead_state { */ unsigned long ras_request_index; /* - * list of struct ll_ra_read's one per read(2) call current in - * progress against this file descriptor. Used by read-ahead code, - * protected by ->ras_lock. - */ - struct list_head ras_read_beads; - /* * The following 3 items are used for detecting the stride I/O * mode. * In stride I/O mode, @@ -622,7 +579,7 @@ extern struct kmem_cache *ll_file_data_slab; struct lustre_handle; struct ll_file_data { struct ll_readahead_state fd_ras; - struct ccc_grouplock fd_grouplock; + struct ll_grouplock fd_grouplock; __u64 lfd_pos; __u32 fd_flags; fmode_t fd_omode; @@ -637,6 +594,8 @@ struct ll_file_data { * false: unknown failure, should report. */ bool fd_write_failed; + rwlock_t fd_lock; /* protect lcc list */ + struct list_head fd_lccs; /* list of ll_cl_context */ }; struct lov_stripe_md; @@ -663,8 +622,16 @@ static inline int ll_need_32bit_api(struct ll_sb_info *sbi) #endif } -void ll_ra_read_in(struct file *f, struct ll_ra_read *rar); -void ll_ra_read_ex(struct file *f, struct ll_ra_read *rar); +void ll_ras_enter(struct file *f); + +/* llite/lcommon_misc.c */ +int cl_init_ea_size(struct obd_export *md_exp, struct obd_export *dt_exp); +int cl_ocd_update(struct obd_device *host, + struct obd_device *watched, + enum obd_notify_event ev, void *owner, void *data); +int cl_get_grouplock(struct cl_object *obj, unsigned long gid, int nonblock, + struct ll_grouplock *cg); +void cl_put_grouplock(struct ll_grouplock *cg); /* llite/lproc_llite.c */ int ldebugfs_register_mountpoint(struct dentry *parent, @@ -697,15 +664,16 @@ int ll_md_blocking_ast(struct ldlm_lock *, struct ldlm_lock_desc *, struct dentry *ll_splice_alias(struct inode *inode, struct dentry *de); /* llite/rw.c */ -int ll_prepare_write(struct file *, struct page *, unsigned from, unsigned to); -int ll_commit_write(struct file *, struct page *, unsigned from, unsigned to); int ll_writepage(struct page *page, struct writeback_control *wbc); int ll_writepages(struct address_space *, struct writeback_control *wbc); int ll_readpage(struct file *file, struct page *page); void ll_readahead_init(struct inode *inode, struct ll_readahead_state *ras); int ll_readahead(const struct lu_env *env, struct cl_io *io, - struct ll_readahead_state *ras, struct address_space *mapping, - struct cl_page_list *queue, int flags); + struct cl_page_list *queue, struct ll_readahead_state *ras, + bool hit); +struct ll_cl_context *ll_cl_find(struct file *file); +void ll_cl_add(struct file *file, const struct lu_env *env, struct cl_io *io); +void ll_cl_remove(struct file *file, const struct lu_env *env); extern const struct address_space_operations ll_aops; @@ -740,7 +708,7 @@ struct posix_acl *ll_get_acl(struct inode *inode, int type); int ll_inode_permission(struct inode *inode, int mask); int ll_lov_setstripe_ea_info(struct inode *inode, struct dentry *dentry, - int flags, struct lov_user_md *lum, + __u64 flags, struct lov_user_md *lum, int lum_size); int ll_lov_getstripe_ea_info(struct inode *inode, const char *filename, struct lov_mds_md **lmm, int *lmm_size, @@ -750,9 +718,9 @@ int ll_dir_setstripe(struct inode *inode, struct lov_user_md *lump, int ll_dir_getstripe(struct inode *inode, struct lov_mds_md **lmmp, int *lmm_size, struct ptlrpc_request **request); int ll_fsync(struct file *file, loff_t start, loff_t end, int data); -int ll_merge_lvb(const struct lu_env *env, struct inode *inode); +int ll_merge_attr(const struct lu_env *env, struct inode *inode); int ll_fid2path(struct inode *inode, void __user *arg); -int ll_data_version(struct inode *inode, __u64 *data_version, int extent_lock); +int ll_data_version(struct inode *inode, __u64 *data_version, int flags); int ll_hsm_release(struct inode *inode); /* llite/dcache.c */ @@ -824,65 +792,8 @@ struct ll_close_queue { atomic_t lcq_stop; }; -struct ccc_object *cl_inode2ccc(struct inode *inode); - -void vvp_write_pending (struct ccc_object *club, struct ccc_page *page); -void vvp_write_complete(struct ccc_object *club, struct ccc_page *page); - -/* specific architecture can implement only part of this list */ -enum vvp_io_subtype { - /** normal IO */ - IO_NORMAL, - /** io started from splice_{read|write} */ - IO_SPLICE -}; - -/* IO subtypes */ -struct vvp_io { - /** io subtype */ - enum vvp_io_subtype cui_io_subtype; - - union { - struct { - struct pipe_inode_info *cui_pipe; - unsigned int cui_flags; - } splice; - struct vvp_fault_io { - /** - * Inode modification time that is checked across DLM - * lock request. - */ - time64_t ft_mtime; - struct vm_area_struct *ft_vma; - /** - * locked page returned from vvp_io - */ - struct page *ft_vmpage; - struct vm_fault_api { - /** - * kernel fault info - */ - struct vm_fault *ft_vmf; - /** - * fault API used bitflags for return code. - */ - unsigned int ft_flags; - /** - * check that flags are from filemap_fault - */ - bool ft_flags_valid; - } fault; - } fault; - } u; - /** - * Read-ahead state used by read and page-fault IO contexts. - */ - struct ll_ra_read cui_bead; - /** - * Set when cui_bead has been initialized. - */ - int cui_ra_window_set; -}; +void vvp_write_pending(struct vvp_object *club, struct vvp_page *page); +void vvp_write_complete(struct vvp_object *club, struct vvp_page *page); /** * IO arguments for various VFS I/O interfaces. @@ -904,61 +815,39 @@ struct vvp_io_args { }; struct ll_cl_context { + struct list_head lcc_list; void *lcc_cookie; + const struct lu_env *lcc_env; struct cl_io *lcc_io; struct cl_page *lcc_page; - struct lu_env *lcc_env; - int lcc_refcheck; }; -struct vvp_thread_info { - struct vvp_io_args vti_args; - struct ra_io_arg vti_ria; - struct ll_cl_context vti_io_ctx; +struct ll_thread_info { + struct vvp_io_args lti_args; + struct ra_io_arg lti_ria; + struct ll_cl_context lti_io_ctx; }; -static inline struct vvp_thread_info *vvp_env_info(const struct lu_env *env) +extern struct lu_context_key ll_thread_key; +static inline struct ll_thread_info *ll_env_info(const struct lu_env *env) { - extern struct lu_context_key vvp_key; - struct vvp_thread_info *info; + struct ll_thread_info *lti; - info = lu_context_key_get(&env->le_ctx, &vvp_key); - LASSERT(info); - return info; + lti = lu_context_key_get(&env->le_ctx, &ll_thread_key); + LASSERT(lti); + return lti; } -static inline struct vvp_io_args *vvp_env_args(const struct lu_env *env, - enum vvp_io_subtype type) +static inline struct vvp_io_args *ll_env_args(const struct lu_env *env, + enum vvp_io_subtype type) { - struct vvp_io_args *ret = &vvp_env_info(env)->vti_args; + struct vvp_io_args *via = &ll_env_info(env)->lti_args; - ret->via_io_subtype = type; + via->via_io_subtype = type; - return ret; + return via; } -struct vvp_session { - struct vvp_io vs_ios; -}; - -static inline struct vvp_session *vvp_env_session(const struct lu_env *env) -{ - extern struct lu_context_key vvp_session_key; - struct vvp_session *ses; - - ses = lu_context_key_get(env->le_ses, &vvp_session_key); - LASSERT(ses); - return ses; -} - -static inline struct vvp_io *vvp_env_io(const struct lu_env *env) -{ - return &vvp_env_session(env)->vs_ios; -} - -int vvp_global_init(void); -void vvp_global_fini(void); - void ll_queue_done_writing(struct inode *inode, unsigned long flags); void ll_close_thread_shutdown(struct ll_close_queue *lcq); int ll_close_thread_start(struct ll_close_queue **lcq_ret); @@ -981,6 +870,10 @@ static inline void ll_invalidate_page(struct page *vmpage) if (!mapping) return; + /* + * truncate_complete_page() calls + * a_ops->invalidatepage()->cl_page_delete()->vvp_page_delete(). + */ ll_teardown_mmaps(mapping, offset, offset + PAGE_SIZE); truncate_complete_page(mapping, vmpage); } @@ -1040,24 +933,13 @@ static inline __u64 ll_file_maxbytes(struct inode *inode) } /* llite/xattr.c */ -int ll_setxattr(struct dentry *dentry, const char *name, - const void *value, size_t size, int flags); -ssize_t ll_getxattr(struct dentry *dentry, const char *name, - void *buffer, size_t size); +int ll_setxattr(struct dentry *dentry, struct inode *inode, + const char *name, const void *value, size_t size, int flags); +ssize_t ll_getxattr(struct dentry *dentry, struct inode *inode, + const char *name, void *buffer, size_t size); ssize_t ll_listxattr(struct dentry *dentry, char *buffer, size_t size); int ll_removexattr(struct dentry *dentry, const char *name); -/* llite/remote_perm.c */ -extern struct kmem_cache *ll_remote_perm_cachep; -extern struct kmem_cache *ll_rmtperm_hash_cachep; - -void free_rmtperm_hash(struct hlist_head *hash); -int ll_update_remote_perm(struct inode *inode, struct mdt_remote_perm *perm); -int lustre_check_remote_perm(struct inode *inode, int mask); - -/* llite/llite_cl.c */ -extern struct lu_device_type vvp_device_type; - /** * Common IO arguments for various VFS I/O interfaces. */ @@ -1069,42 +951,9 @@ void ras_update(struct ll_sb_info *sbi, struct inode *inode, struct ll_readahead_state *ras, unsigned long index, unsigned hit); void ll_ra_count_put(struct ll_sb_info *sbi, unsigned long len); -void ll_ra_stats_inc(struct address_space *mapping, enum ra_stat which); - -/* llite/llite_rmtacl.c */ -#ifdef CONFIG_FS_POSIX_ACL -struct eacl_entry { - struct list_head ee_list; - pid_t ee_key; /* hash key */ - struct lu_fid ee_fid; - int ee_type; /* ACL type for ACCESS or DEFAULT */ - ext_acl_xattr_header *ee_acl; -}; - -u64 rce_ops2valid(int ops); -struct rmtacl_ctl_entry *rct_search(struct rmtacl_ctl_table *rct, pid_t key); -int rct_add(struct rmtacl_ctl_table *rct, pid_t key, int ops); -int rct_del(struct rmtacl_ctl_table *rct, pid_t key); -void rct_init(struct rmtacl_ctl_table *rct); -void rct_fini(struct rmtacl_ctl_table *rct); - -void ee_free(struct eacl_entry *ee); -int ee_add(struct eacl_table *et, pid_t key, struct lu_fid *fid, int type, - ext_acl_xattr_header *header); -struct eacl_entry *et_search_del(struct eacl_table *et, pid_t key, - struct lu_fid *fid, int type); -void et_search_free(struct eacl_table *et, pid_t key); -void et_init(struct eacl_table *et); -void et_fini(struct eacl_table *et); -#else -static inline u64 rce_ops2valid(int ops) -{ - return 0; -} -#endif +void ll_ra_stats_inc(struct inode *inode, enum ra_stat which); /* statahead.c */ - #define LL_SA_RPC_MIN 2 #define LL_SA_RPC_DEF 32 #define LL_SA_RPC_MAX 8192 @@ -1163,6 +1012,22 @@ int do_statahead_enter(struct inode *dir, struct dentry **dentry, int only_unplug); void ll_stop_statahead(struct inode *dir, void *key); +blkcnt_t dirty_cnt(struct inode *inode); + +int cl_glimpse_size0(struct inode *inode, int agl); +int cl_glimpse_lock(const struct lu_env *env, struct cl_io *io, + struct inode *inode, struct cl_object *clob, int agl); + +static inline int cl_glimpse_size(struct inode *inode) +{ + return cl_glimpse_size0(inode, 0); +} + +static inline int cl_agl(struct inode *inode) +{ + return cl_glimpse_size0(inode, 1); +} + static inline int ll_glimpse_size(struct inode *inode) { struct ll_inode_info *lli = ll_i2info(inode); @@ -1285,43 +1150,6 @@ typedef enum llioc_iter (*llioc_callback_t)(struct inode *inode, void *ll_iocontrol_register(llioc_callback_t cb, int count, unsigned int *cmd); void ll_iocontrol_unregister(void *magic); -/* lclient compat stuff */ -#define cl_inode_info ll_inode_info -#define cl_i2info(info) ll_i2info(info) -#define cl_inode_mode(inode) ((inode)->i_mode) -#define cl_i2sbi ll_i2sbi - -static inline struct ll_file_data *cl_iattr2fd(struct inode *inode, - const struct iattr *attr) -{ - LASSERT(attr->ia_valid & ATTR_FILE); - return LUSTRE_FPRIVATE(attr->ia_file); -} - -static inline void cl_isize_write_nolock(struct inode *inode, loff_t kms) -{ - LASSERT(mutex_is_locked(&ll_i2info(inode)->lli_size_mutex)); - i_size_write(inode, kms); -} - -static inline void cl_isize_write(struct inode *inode, loff_t kms) -{ - ll_inode_size_lock(inode); - i_size_write(inode, kms); - ll_inode_size_unlock(inode); -} - -#define cl_isize_read(inode) i_size_read(inode) - -static inline int cl_merge_lvb(const struct lu_env *env, struct inode *inode) -{ - return ll_merge_lvb(env, inode); -} - -#define cl_inode_atime(inode) LTIME_S((inode)->i_atime) -#define cl_inode_ctime(inode) LTIME_S((inode)->i_ctime) -#define cl_inode_mtime(inode) LTIME_S((inode)->i_mtime) - int cl_sync_file_range(struct inode *inode, loff_t start, loff_t end, enum cl_fsync_mode mode, int ignore_layout); @@ -1350,7 +1178,7 @@ static inline void cl_stats_tally(struct cl_device *dev, enum cl_req_type crt, int opc = (crt == CRT_READ) ? LPROC_LL_OSC_READ : LPROC_LL_OSC_WRITE; - ll_stats_ops_tally(ll_s2sbi(cl2ccc_dev(dev)->cdv_sb), opc, rc); + ll_stats_ops_tally(ll_s2sbi(cl2vvp_dev(dev)->vdv_sb), opc, rc); } ssize_t ll_direct_rw_pages(const struct lu_env *env, struct cl_io *io, @@ -1369,7 +1197,7 @@ static inline int ll_file_nolock(const struct file *file) static inline void ll_set_lock_data(struct obd_export *exp, struct inode *inode, struct lookup_intent *it, __u64 *bits) { - if (!it->d.lustre.it_lock_set) { + if (!it->it_lock_set) { struct lustre_handle handle; /* If this inode is a remote object, it will get two @@ -1380,38 +1208,26 @@ static inline void ll_set_lock_data(struct obd_export *exp, struct inode *inode, * LOOKUP and PERM locks, so revoking either locks will * case the dcache being cleared */ - if (it->d.lustre.it_remote_lock_mode) { - handle.cookie = it->d.lustre.it_remote_lock_handle; - CDEBUG(D_DLMTRACE, "setting l_data to inode %p(%lu/%u) for remote lock %#llx\n", - inode, - inode->i_ino, inode->i_generation, + if (it->it_remote_lock_mode) { + handle.cookie = it->it_remote_lock_handle; + CDEBUG(D_DLMTRACE, "setting l_data to inode "DFID"%p for remote lock %#llx\n", + PFID(ll_inode2fid(inode)), inode, handle.cookie); md_set_lock_data(exp, &handle.cookie, inode, NULL); } - handle.cookie = it->d.lustre.it_lock_handle; + handle.cookie = it->it_lock_handle; - CDEBUG(D_DLMTRACE, "setting l_data to inode %p (%lu/%u) for lock %#llx\n", - inode, inode->i_ino, - inode->i_generation, handle.cookie); + CDEBUG(D_DLMTRACE, "setting l_data to inode "DFID"%p for lock %#llx\n", + PFID(ll_inode2fid(inode)), inode, handle.cookie); md_set_lock_data(exp, &handle.cookie, inode, - &it->d.lustre.it_lock_bits); - it->d.lustre.it_lock_set = 1; + &it->it_lock_bits); + it->it_lock_set = 1; } if (bits) - *bits = it->d.lustre.it_lock_bits; -} - -static inline void ll_lock_dcache(struct inode *inode) -{ - spin_lock(&inode->i_lock); -} - -static inline void ll_unlock_dcache(struct inode *inode) -{ - spin_unlock(&inode->i_lock); + *bits = it->it_lock_bits; } static inline int d_lustre_invalid(const struct dentry *dentry) @@ -1471,9 +1287,25 @@ enum { int ll_layout_conf(struct inode *inode, const struct cl_object_conf *conf); int ll_layout_refresh(struct inode *inode, __u32 *gen); -int ll_layout_restore(struct inode *inode); +int ll_layout_restore(struct inode *inode, loff_t start, __u64 length); int ll_xattr_init(void); void ll_xattr_fini(void); +int ll_page_sync_io(const struct lu_env *env, struct cl_io *io, + struct cl_page *page, enum cl_req_type crt); + +/* lcommon_cl.c */ +int cl_setattr_ost(struct inode *inode, const struct iattr *attr); + +extern struct lu_env *cl_inode_fini_env; +extern int cl_inode_fini_refcheck; + +int cl_file_inode_init(struct inode *inode, struct lustre_md *md); +void cl_inode_fini(struct inode *inode); +int cl_local_size(struct inode *inode); + +__u64 cl_fid_build_ino(const struct lu_fid *fid, int api32); +__u32 cl_fid_build_gen(const struct lu_fid *fid); + #endif /* LLITE_INTERNAL_H */ diff --git a/drivers/staging/lustre/lustre/llite/llite_lib.c b/drivers/staging/lustre/lustre/llite/llite_lib.c index b57a992688a8..546063e728db 100644 --- a/drivers/staging/lustre/lustre/llite/llite_lib.c +++ b/drivers/staging/lustre/lustre/llite/llite_lib.c @@ -15,11 +15,7 @@ * * You should have received a copy of the GNU General Public License * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. + * http://www.gnu.org/licenses/gpl-2.0.html * * GPL HEADER END */ @@ -85,17 +81,13 @@ static struct ll_sb_info *ll_init_sbi(struct super_block *sb) si_meminfo(&si); pages = si.totalram - si.totalhigh; - if (pages >> (20 - PAGE_SHIFT) < 512) - lru_page_max = pages / 2; - else - lru_page_max = (pages / 4) * 3; + lru_page_max = pages / 2; - /* initialize lru data */ - atomic_set(&sbi->ll_cache.ccc_users, 0); - sbi->ll_cache.ccc_lru_max = lru_page_max; - atomic_set(&sbi->ll_cache.ccc_lru_left, lru_page_max); - spin_lock_init(&sbi->ll_cache.ccc_lru_lock); - INIT_LIST_HEAD(&sbi->ll_cache.ccc_lru); + sbi->ll_cache = cl_cache_init(lru_page_max); + if (!sbi->ll_cache) { + kfree(sbi); + return NULL; + } sbi->ll_ra_info.ra_max_pages_per_file = min(pages / 32, SBI_DEFAULT_READAHEAD_MAX); @@ -135,6 +127,11 @@ static void ll_free_sbi(struct super_block *sb) { struct ll_sb_info *sbi = ll_s2sbi(sb); + if (sbi->ll_cache) { + cl_cache_decref(sbi->ll_cache); + sbi->ll_cache = NULL; + } + kfree(sbi); } @@ -169,20 +166,14 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt, return -ENOMEM; } - if (llite_root) { - err = ldebugfs_register_mountpoint(llite_root, sb, dt, md); - if (err < 0) - CERROR("could not register mount in <debugfs>/lustre/llite\n"); - } - /* indicate the features supported by this client */ data->ocd_connect_flags = OBD_CONNECT_IBITS | OBD_CONNECT_NODEVOH | OBD_CONNECT_ATTRFID | OBD_CONNECT_VERSION | OBD_CONNECT_BRW_SIZE | OBD_CONNECT_CANCELSET | OBD_CONNECT_FID | OBD_CONNECT_AT | OBD_CONNECT_LOV_V3 | - OBD_CONNECT_RMT_CLIENT | OBD_CONNECT_VBR | - OBD_CONNECT_FULL20 | OBD_CONNECT_64BITHASH| + OBD_CONNECT_VBR | OBD_CONNECT_FULL20 | + OBD_CONNECT_64BITHASH | OBD_CONNECT_EINPROGRESS | OBD_CONNECT_JOBSTATS | OBD_CONNECT_LVB_TYPE | OBD_CONNECT_LAYOUTLOCK | @@ -223,8 +214,6 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt, /* real client */ data->ocd_connect_flags |= OBD_CONNECT_REAL; - if (sbi->ll_flags & LL_SBI_RMT_CLIENT) - data->ocd_connect_flags |= OBD_CONNECT_RMT_CLIENT_FORCE; data->ocd_brw_size = MD_MAX_BRW_SIZE; @@ -317,18 +306,6 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt, sbi->ll_flags &= ~LL_SBI_ACL; } - if (data->ocd_connect_flags & OBD_CONNECT_RMT_CLIENT) { - if (!(sbi->ll_flags & LL_SBI_RMT_CLIENT)) { - sbi->ll_flags |= LL_SBI_RMT_CLIENT; - LCONSOLE_INFO("client is set as remote by default.\n"); - } - } else { - if (sbi->ll_flags & LL_SBI_RMT_CLIENT) { - sbi->ll_flags &= ~LL_SBI_RMT_CLIENT; - LCONSOLE_INFO("client claims to be remote, but server rejected, forced to be local.\n"); - } - } - if (data->ocd_connect_flags & OBD_CONNECT_64BITHASH) sbi->ll_flags |= LL_SBI_64BIT_HASH; @@ -337,10 +314,8 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt, else sbi->ll_md_brw_size = PAGE_SIZE; - if (data->ocd_connect_flags & OBD_CONNECT_LAYOUTLOCK) { - LCONSOLE_INFO("Layout lock feature supported.\n"); + if (data->ocd_connect_flags & OBD_CONNECT_LAYOUTLOCK) sbi->ll_flags |= LL_SBI_LAYOUT_LOCK; - } if (data->ocd_ibits_known & MDS_INODELOCK_XATTR) { if (!(data->ocd_connect_flags & OBD_CONNECT_MAX_EASIZE)) { @@ -364,10 +339,9 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt, OBD_CONNECT_REQPORTAL | OBD_CONNECT_BRW_SIZE | OBD_CONNECT_CANCELSET | OBD_CONNECT_FID | OBD_CONNECT_SRVLOCK | OBD_CONNECT_TRUNCLOCK| - OBD_CONNECT_AT | OBD_CONNECT_RMT_CLIENT | - OBD_CONNECT_OSS_CAPA | OBD_CONNECT_VBR| - OBD_CONNECT_FULL20 | OBD_CONNECT_64BITHASH | - OBD_CONNECT_MAXBYTES | + OBD_CONNECT_AT | OBD_CONNECT_OSS_CAPA | + OBD_CONNECT_VBR | OBD_CONNECT_FULL20 | + OBD_CONNECT_64BITHASH | OBD_CONNECT_MAXBYTES | OBD_CONNECT_EINPROGRESS | OBD_CONNECT_JOBSTATS | OBD_CONNECT_LVB_TYPE | OBD_CONNECT_LAYOUTLOCK | OBD_CONNECT_PINGLESS; @@ -390,8 +364,6 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt, } data->ocd_connect_flags |= OBD_CONNECT_LRU_RESIZE; - if (sbi->ll_flags & LL_SBI_RMT_CLIENT) - data->ocd_connect_flags |= OBD_CONNECT_RMT_CLIENT_FORCE; CDEBUG(D_RPCTRACE, "ocd_connect_flags: %#llx ocd_version: %d ocd_grant: %d\n", data->ocd_connect_flags, @@ -453,10 +425,8 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt, /* make root inode * XXX: move this to after cbd setup? */ - valid = OBD_MD_FLGETATTR | OBD_MD_FLBLOCKS; - if (sbi->ll_flags & LL_SBI_RMT_CLIENT) - valid |= OBD_MD_FLRMTPERM; - else if (sbi->ll_flags & LL_SBI_ACL) + valid = OBD_MD_FLGETATTR | OBD_MD_FLBLOCKS | OBD_MD_FLMODEASIZE; + if (sbi->ll_flags & LL_SBI_ACL) valid |= OBD_MD_FLACL; op_data = kzalloc(sizeof(*op_data), GFP_NOFS); @@ -512,13 +482,6 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt, goto out_root; } -#ifdef CONFIG_FS_POSIX_ACL - if (sbi->ll_flags & LL_SBI_RMT_CLIENT) { - rct_init(&sbi->ll_rct); - et_init(&sbi->ll_et); - } -#endif - checksum = sbi->ll_flags & LL_SBI_CHECKSUM; err = obd_set_info_async(NULL, sbi->ll_dt_exp, sizeof(KEY_CHECKSUM), KEY_CHECKSUM, sizeof(checksum), &checksum, @@ -526,8 +489,8 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt, cl_sb_init(sb); err = obd_set_info_async(NULL, sbi->ll_dt_exp, sizeof(KEY_CACHE_SET), - KEY_CACHE_SET, sizeof(sbi->ll_cache), - &sbi->ll_cache, NULL); + KEY_CACHE_SET, sizeof(*sbi->ll_cache), + sbi->ll_cache, NULL); sb->s_root = d_make_root(root); if (!sb->s_root) { @@ -555,6 +518,15 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt, kfree(data); kfree(osfs); + if (llite_root) { + err = ldebugfs_register_mountpoint(llite_root, sb, dt, md); + if (err < 0) { + CERROR("%s: could not register mount in debugfs: " + "rc = %d\n", ll_get_fsname(sb, NULL, 0), err); + err = 0; + } + } + return err; out_root: iput(root); @@ -563,8 +535,6 @@ out_lock_cn_cb: out_dt: obd_disconnect(sbi->ll_dt_exp); sbi->ll_dt_exp = NULL; - /* Make sure all OScs are gone, since cl_cache is accessing sbi. */ - obd_zombie_barrier(); out_md_fid: obd_fid_fini(sbi->ll_md_exp->exp_obd); out_md: @@ -573,7 +543,6 @@ out_md: out: kfree(data); kfree(osfs); - ldebugfs_unregister_mountpoint(sbi); return err; } @@ -608,13 +577,6 @@ static void client_common_put_super(struct super_block *sb) { struct ll_sb_info *sbi = ll_s2sbi(sb); -#ifdef CONFIG_FS_POSIX_ACL - if (sbi->ll_flags & LL_SBI_RMT_CLIENT) { - et_fini(&sbi->ll_et); - rct_fini(&sbi->ll_rct); - } -#endif - ll_close_thread_shutdown(sbi->ll_lcq); cl_sb_fini(sb); @@ -622,10 +584,6 @@ static void client_common_put_super(struct super_block *sb) obd_fid_fini(sbi->ll_dt_exp->exp_obd); obd_disconnect(sbi->ll_dt_exp); sbi->ll_dt_exp = NULL; - /* wait till all OSCs are gone, since cl_cache is accessing sbi. - * see LU-2543. - */ - obd_zombie_barrier(); ldebugfs_unregister_mountpoint(sbi); @@ -704,11 +662,6 @@ static int ll_options(char *options, int *flags) *flags &= ~tmp; goto next; } - tmp = ll_set_opt("remote_client", s1, LL_SBI_RMT_CLIENT); - if (tmp) { - *flags |= tmp; - goto next; - } tmp = ll_set_opt("user_fid2path", s1, LL_SBI_USER_FID2PATH); if (tmp) { *flags |= tmp; @@ -792,12 +745,9 @@ void ll_lli_init(struct ll_inode_info *lli) lli->lli_maxbytes = MAX_LFS_FILESIZE; spin_lock_init(&lli->lli_lock); lli->lli_posix_acl = NULL; - lli->lli_remote_perms = NULL; - mutex_init(&lli->lli_rmtperm_mutex); /* Do not set lli_fid, it has been initialized already. */ fid_zero(&lli->lli_pfid); INIT_LIST_HEAD(&lli->lli_close_list); - lli->lli_rmtperm_time = 0; lli->lli_pending_och = NULL; lli->lli_mds_read_och = NULL; lli->lli_mds_write_och = NULL; @@ -864,7 +814,8 @@ int ll_fill_super(struct super_block *sb, struct vfsmount *mnt) try_module_get(THIS_MODULE); /* client additional sb info */ - lsi->lsi_llsbi = sbi = ll_init_sbi(sb); + sbi = ll_init_sbi(sb); + lsi->lsi_llsbi = sbi; if (!sbi) { module_put(THIS_MODULE); kfree(cfg); @@ -897,10 +848,8 @@ int ll_fill_super(struct super_block *sb, struct vfsmount *mnt) cfg->cfg_callback = class_config_llog_handler; /* set up client obds */ err = lustre_process_log(sb, profilenm, cfg); - if (err < 0) { - CERROR("Unable to process log: %d\n", err); + if (err < 0) goto out_free; - } /* Profile set with LCFG_MOUNTOPT so we can find our mdc and osc obds */ lprof = class_get_profile(profilenm); @@ -947,7 +896,7 @@ void ll_put_super(struct super_block *sb) struct lustre_sb_info *lsi = s2lsi(sb); struct ll_sb_info *sbi = ll_s2sbi(sb); char *profilenm = get_profile_name(sb); - int next, force = 1; + int ccc_count, next, force = 1, rc = 0; CDEBUG(D_VFSTRACE, "VFS Op: sb %p - %s\n", sb, profilenm); @@ -963,6 +912,19 @@ void ll_put_super(struct super_block *sb) force = obd->obd_force; } + /* Wait for unstable pages to be committed to stable storage */ + if (!force) { + struct l_wait_info lwi = LWI_INTR(LWI_ON_SIGNAL_NOOP, NULL); + + rc = l_wait_event(sbi->ll_cache->ccc_unstable_waitq, + !atomic_read(&sbi->ll_cache->ccc_unstable_nr), + &lwi); + } + + ccc_count = atomic_read(&sbi->ll_cache->ccc_unstable_nr); + if (!force && rc != -EINTR) + LASSERTF(!ccc_count, "count: %i\n", ccc_count); + /* We need to set force before the lov_disconnect in * lustre_common_put_super, since l_d cleans up osc's as well. */ @@ -999,6 +961,8 @@ void ll_put_super(struct super_block *sb) lustre_common_put_super(sb); + cl_env_cache_purge(~0); + module_put(THIS_MODULE); } /* client_put_super */ @@ -1032,8 +996,8 @@ void ll_clear_inode(struct inode *inode) struct ll_inode_info *lli = ll_i2info(inode); struct ll_sb_info *sbi = ll_i2sbi(inode); - CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n", inode->i_ino, - inode->i_generation, inode); + CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p)\n", + PFID(ll_inode2fid(inode)), inode); if (S_ISDIR(inode->i_mode)) { /* these should have been cleared in ll_file_release */ @@ -1065,17 +1029,9 @@ void ll_clear_inode(struct inode *inode) ll_xattr_cache_destroy(inode); - if (sbi->ll_flags & LL_SBI_RMT_CLIENT) { - LASSERT(!lli->lli_posix_acl); - if (lli->lli_remote_perms) { - free_rmtperm_hash(lli->lli_remote_perms); - lli->lli_remote_perms = NULL; - } - } #ifdef CONFIG_FS_POSIX_ACL - else if (lli->lli_posix_acl) { + if (lli->lli_posix_acl) { LASSERT(atomic_read(&lli->lli_posix_acl->a_refcount) == 1); - LASSERT(!lli->lli_remote_perms); posix_acl_release(lli->lli_posix_acl); lli->lli_posix_acl = NULL; } @@ -1180,9 +1136,11 @@ static int ll_setattr_done_writing(struct inode *inode, * from OSTs and send setattr to back to MDS. */ rc = ll_som_update(inode, op_data); - else if (rc) - CERROR("inode %lu mdc truncate failed: rc = %d\n", - inode->i_ino, rc); + else if (rc) { + CERROR("%s: inode "DFID" mdc truncate failed: rc = %d\n", + ll_i2sbi(inode)->ll_md_exp->exp_obd->obd_name, + PFID(ll_inode2fid(inode)), rc); + } return rc; } @@ -1210,12 +1168,9 @@ int ll_setattr_raw(struct dentry *dentry, struct iattr *attr, bool hsm_import) bool file_is_released = false; int rc = 0, rc1 = 0; - CDEBUG(D_VFSTRACE, - "%s: setattr inode %p/fid:" DFID - " from %llu to %llu, valid %x, hsm_import %d\n", - ll_get_fsname(inode->i_sb, NULL, 0), inode, - PFID(&lli->lli_fid), i_size_read(inode), attr->ia_size, - attr->ia_valid, hsm_import); + CDEBUG(D_VFSTRACE, "%s: setattr inode "DFID"(%p) from %llu to %llu, valid %x, hsm_import %d\n", + ll_get_fsname(inode->i_sb, NULL, 0), PFID(&lli->lli_fid), inode, + i_size_read(inode), attr->ia_size, attr->ia_valid, hsm_import); if (attr->ia_valid & ATTR_SIZE) { /* Check new size against VFS/VM file size limit and rlimit */ @@ -1265,14 +1220,6 @@ int ll_setattr_raw(struct dentry *dentry, struct iattr *attr, bool hsm_import) LTIME_S(attr->ia_mtime), LTIME_S(attr->ia_ctime), (s64)ktime_get_real_seconds()); - /* If we are changing file size, file content is modified, flag it. */ - if (attr->ia_valid & ATTR_SIZE) { - attr->ia_valid |= MDS_OPEN_OWNEROVERRIDE; - spin_lock(&lli->lli_lock); - lli->lli_flags |= LLIF_DATA_MODIFIED; - spin_unlock(&lli->lli_lock); - } - /* We always do an MDS RPC, even if we're only changing the size; * only the MDS knows whether truncate() should fail with -ETXTBUSY */ @@ -1284,13 +1231,6 @@ int ll_setattr_raw(struct dentry *dentry, struct iattr *attr, bool hsm_import) if (!S_ISDIR(inode->i_mode)) inode_unlock(inode); - memcpy(&op_data->op_attr, attr, sizeof(*attr)); - - /* Open epoch for truncate. */ - if (exp_connect_som(ll_i2mdexp(inode)) && - (attr->ia_valid & (ATTR_SIZE | ATTR_MTIME | ATTR_MTIME_SET))) - op_data->op_flags = MF_EPOCH_OPEN; - /* truncate on a released file must failed with -ENODATA, * so size must not be set on MDS for released file * but other attributes must be set @@ -1304,29 +1244,40 @@ int ll_setattr_raw(struct dentry *dentry, struct iattr *attr, bool hsm_import) if (lsm && lsm->lsm_pattern & LOV_PATTERN_F_RELEASED) file_is_released = true; ccc_inode_lsm_put(inode, lsm); + + if (!hsm_import && attr->ia_valid & ATTR_SIZE) { + if (file_is_released) { + rc = ll_layout_restore(inode, 0, attr->ia_size); + if (rc < 0) + goto out; + + file_is_released = false; + ll_layout_refresh(inode, &gen); + } + + /* + * If we are changing file size, file content is + * modified, flag it. + */ + attr->ia_valid |= MDS_OPEN_OWNEROVERRIDE; + spin_lock(&lli->lli_lock); + lli->lli_flags |= LLIF_DATA_MODIFIED; + spin_unlock(&lli->lli_lock); + op_data->op_bias |= MDS_DATA_MODIFIED; + } } - /* if not in HSM import mode, clear size attr for released file - * we clear the attribute send to MDT in op_data, not the original - * received from caller in attr which is used later to - * decide return code - */ - if (file_is_released && (attr->ia_valid & ATTR_SIZE) && !hsm_import) - op_data->op_attr.ia_valid &= ~ATTR_SIZE; + memcpy(&op_data->op_attr, attr, sizeof(*attr)); + + /* Open epoch for truncate. */ + if (exp_connect_som(ll_i2mdexp(inode)) && !hsm_import && + (attr->ia_valid & (ATTR_SIZE | ATTR_MTIME | ATTR_MTIME_SET))) + op_data->op_flags = MF_EPOCH_OPEN; rc = ll_md_setattr(dentry, op_data, &mod); if (rc) goto out; - /* truncate failed (only when non HSM import), others succeed */ - if (file_is_released) { - if ((attr->ia_valid & ATTR_SIZE) && !hsm_import) - rc = -ENODATA; - else - rc = 0; - goto out; - } - /* RPC to MDT is sent, cancel data modification flag */ if (op_data->op_bias & MDS_DATA_MODIFIED) { spin_lock(&lli->lli_lock); @@ -1335,7 +1286,7 @@ int ll_setattr_raw(struct dentry *dentry, struct iattr *attr, bool hsm_import) } ll_ioepoch_open(lli, op_data->op_ioepoch); - if (!S_ISREG(inode->i_mode)) { + if (!S_ISREG(inode->i_mode) || file_is_released) { rc = 0; goto out; } @@ -1532,12 +1483,8 @@ void ll_update_inode(struct inode *inode, struct lustre_md *md) lli->lli_maxbytes = MAX_LFS_FILESIZE; } - if (sbi->ll_flags & LL_SBI_RMT_CLIENT) { - if (body->valid & OBD_MD_FLRMTPERM) - ll_update_remote_perm(inode, md->remote_perm); - } #ifdef CONFIG_FS_POSIX_ACL - else if (body->valid & OBD_MD_FLACL) { + if (body->valid & OBD_MD_FLACL) { spin_lock(&lli->lli_lock); if (lli->lli_posix_acl) posix_acl_release(lli->lli_posix_acl); @@ -1552,7 +1499,7 @@ void ll_update_inode(struct inode *inode, struct lustre_md *md) if (body->valid & OBD_MD_FLATIME) { if (body->atime > LTIME_S(inode->i_atime)) LTIME_S(inode->i_atime) = body->atime; - lli->lli_lvb.lvb_atime = body->atime; + lli->lli_atime = body->atime; } if (body->valid & OBD_MD_FLMTIME) { if (body->mtime > LTIME_S(inode->i_mtime)) { @@ -1561,12 +1508,12 @@ void ll_update_inode(struct inode *inode, struct lustre_md *md) body->mtime); LTIME_S(inode->i_mtime) = body->mtime; } - lli->lli_lvb.lvb_mtime = body->mtime; + lli->lli_mtime = body->mtime; } if (body->valid & OBD_MD_FLCTIME) { if (body->ctime > LTIME_S(inode->i_ctime)) LTIME_S(inode->i_ctime) = body->ctime; - lli->lli_lvb.lvb_ctime = body->ctime; + lli->lli_ctime = body->ctime; } if (body->valid & OBD_MD_FLMODE) inode->i_mode = (inode->i_mode & S_IFMT)|(body->mode & ~S_IFMT); @@ -1593,12 +1540,12 @@ void ll_update_inode(struct inode *inode, struct lustre_md *md) /* FID shouldn't be changed! */ if (fid_is_sane(&lli->lli_fid)) { LASSERTF(lu_fid_eq(&lli->lli_fid, &body->fid1), - "Trying to change FID "DFID - " to the "DFID", inode %lu/%u(%p)\n", + "Trying to change FID "DFID" to the "DFID", inode "DFID"(%p)\n", PFID(&lli->lli_fid), PFID(&body->fid1), - inode->i_ino, inode->i_generation, inode); - } else + PFID(ll_inode2fid(inode)), inode); + } else { lli->lli_fid = body->fid1; + } } LASSERT(fid_seq(&lli->lli_fid) != 0); @@ -1622,8 +1569,10 @@ void ll_update_inode(struct inode *inode, struct lustre_md *md) if (lli->lli_flags & (LLIF_DONE_WRITING | LLIF_EPOCH_PENDING | LLIF_SOM_DIRTY)) { - CERROR("ino %lu flags %u still has size authority! do not trust the size got from MDS\n", - inode->i_ino, lli->lli_flags); + CERROR("%s: inode "DFID" flags %u still has size authority! do not trust the size got from MDS\n", + sbi->ll_md_exp->exp_obd->obd_name, + PFID(ll_inode2fid(inode)), + lli->lli_flags); } else { /* Use old size assignment to avoid * deadlock bz14138 & bz14326 @@ -1699,7 +1648,7 @@ void ll_read_inode2(struct inode *inode, void *opaque) void ll_delete_inode(struct inode *inode) { - struct cl_inode_info *lli = cl_i2info(inode); + struct ll_inode_info *lli = ll_i2info(inode); if (S_ISREG(inode->i_mode) && lli->lli_clob) /* discard all dirty pages before truncating them, required by @@ -1715,8 +1664,8 @@ void ll_delete_inode(struct inode *inode) spin_lock_irq(&inode->i_data.tree_lock); spin_unlock_irq(&inode->i_data.tree_lock); LASSERTF(inode->i_data.nrpages == 0, - "inode=%lu/%u(%p) nrpages=%lu, see http://jira.whamcloud.com/browse/LU-118\n", - inode->i_ino, inode->i_generation, inode, + "inode="DFID"(%p) nrpages=%lu, see http://jira.whamcloud.com/browse/LU-118\n", + PFID(ll_inode2fid(inode)), inode, inode->i_data.nrpages); } /* Workaround end */ @@ -1747,7 +1696,9 @@ int ll_iocontrol(struct inode *inode, struct file *file, rc = md_getattr(sbi->ll_md_exp, op_data, &req); ll_finish_md_op_data(op_data); if (rc) { - CERROR("failure %d inode %lu\n", rc, inode->i_ino); + CERROR("%s: failure inode "DFID": rc = %d\n", + sbi->ll_md_exp->exp_obd->obd_name, + PFID(ll_inode2fid(inode)), rc); return -abs(rc); } @@ -1772,7 +1723,7 @@ int ll_iocontrol(struct inode *inode, struct file *file, if (IS_ERR(op_data)) return PTR_ERR(op_data); - ((struct ll_iattr *)&op_data->op_attr)->ia_attr_flags = flags; + op_data->op_attr_flags = flags; op_data->op_attr.ia_valid |= ATTR_ATTR_FLAG; rc = md_setattr(sbi->ll_md_exp, op_data, NULL, 0, NULL, 0, &req, NULL); @@ -1967,7 +1918,13 @@ int ll_prep_inode(struct inode **inode, struct ptlrpc_request *req, * At this point server returns to client's same fid as client * generated for creating. So using ->fid1 is okay here. */ - LASSERT(fid_is_sane(&md.body->fid1)); + if (!fid_is_sane(&md.body->fid1)) { + CERROR("%s: Fid is insane " DFID "\n", + ll_get_fsname(sb, NULL, 0), + PFID(&md.body->fid1)); + rc = -EINVAL; + goto out; + } *inode = ll_iget(sb, cl_fid_build_ino(&md.body->fid1, sbi->ll_flags & LL_SBI_32BIT_API), @@ -1994,11 +1951,11 @@ int ll_prep_inode(struct inode **inode, struct ptlrpc_request *req, * 3. proc2: refresh layout and layout lock granted * 4. proc1: to apply a stale layout */ - if (it && it->d.lustre.it_lock_mode != 0) { + if (it && it->it_lock_mode != 0) { struct lustre_handle lockh; struct ldlm_lock *lock; - lockh.cookie = it->d.lustre.it_lock_handle; + lockh.cookie = it->it_lock_handle; lock = ldlm_handle2lock(&lockh); LASSERT(lock); if (ldlm_has_layout(lock)) { @@ -2066,11 +2023,11 @@ int ll_obd_statfs(struct inode *inode, void __user *arg) } memcpy(&type, data->ioc_inlbuf1, sizeof(__u32)); - if (type & LL_STATFS_LMV) + if (type & LL_STATFS_LMV) { exp = sbi->ll_md_exp; - else if (type & LL_STATFS_LOV) + } else if (type & LL_STATFS_LOV) { exp = sbi->ll_dt_exp; - else { + } else { rc = -ENODEV; goto out_statfs; } @@ -2271,7 +2228,7 @@ void ll_dirty_page_discard_warn(struct page *page, int ioret) { char *buf, *path = NULL; struct dentry *dentry = NULL; - struct ccc_object *obj = cl_inode2ccc(page->mapping->host); + struct vvp_object *obj = cl_inode2vvp(page->mapping->host); /* this can be called inside spin lock so use GFP_ATOMIC. */ buf = (char *)__get_free_page(GFP_ATOMIC); @@ -2285,7 +2242,7 @@ void ll_dirty_page_discard_warn(struct page *page, int ioret) "%s: dirty page discard: %s/fid: " DFID "/%s may get corrupted (rc %d)\n", ll_get_fsname(page->mapping->host->i_sb, NULL, 0), s2lsi(page->mapping->host->i_sb)->lsi_lmd->lmd_dev, - PFID(&obj->cob_header.coh_lu.loh_fid), + PFID(&obj->vob_header.coh_lu.loh_fid), (path && !IS_ERR(path)) ? path : "", ioret); if (dentry) diff --git a/drivers/staging/lustre/lustre/llite/llite_mmap.c b/drivers/staging/lustre/lustre/llite/llite_mmap.c index 5b484e62ffd0..66ee5db5fce8 100644 --- a/drivers/staging/lustre/lustre/llite/llite_mmap.c +++ b/drivers/staging/lustre/lustre/llite/llite_mmap.c @@ -15,11 +15,7 @@ * * You should have received a copy of the GNU General Public License * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. + * http://www.gnu.org/licenses/gpl-2.0.html * * GPL HEADER END */ @@ -57,10 +53,10 @@ void policy_from_vma(ldlm_policy_data_t *policy, struct vm_area_struct *vma, unsigned long addr, size_t count) { - policy->l_extent.start = ((addr - vma->vm_start) & CFS_PAGE_MASK) + + policy->l_extent.start = ((addr - vma->vm_start) & PAGE_MASK) + (vma->vm_pgoff << PAGE_SHIFT); policy->l_extent.end = (policy->l_extent.start + count - 1) | - ~CFS_PAGE_MASK; + ~PAGE_MASK; } struct vm_area_struct *our_vma(struct mm_struct *mm, unsigned long addr, @@ -123,7 +119,8 @@ ll_fault_io_init(struct vm_area_struct *vma, struct lu_env **env_ret, *env_ret = env; - io = ccc_env_thread_io(env); +restart: + io = vvp_env_thread_io(env); io->ci_obj = ll_i2info(inode)->lli_clob; LASSERT(io->ci_obj); @@ -146,17 +143,20 @@ ll_fault_io_init(struct vm_area_struct *vma, struct lu_env **env_ret, rc = cl_io_init(env, io, CIT_FAULT, io->ci_obj); if (rc == 0) { - struct ccc_io *cio = ccc_env_io(env); + struct vvp_io *vio = vvp_env_io(env); struct ll_file_data *fd = LUSTRE_FPRIVATE(file); - LASSERT(cio->cui_cl.cis_io == io); + LASSERT(vio->vui_cl.cis_io == io); /* mmap lock must be MANDATORY it has to cache pages. */ io->ci_lockreq = CILR_MANDATORY; - cio->cui_fd = fd; + vio->vui_fd = fd; } else { LASSERT(rc < 0); cl_io_fini(env, io); + if (io->ci_need_restart) + goto restart; + cl_env_nested_put(nest, env); io = ERR_PTR(rc); } @@ -196,18 +196,11 @@ static int ll_page_mkwrite0(struct vm_area_struct *vma, struct page *vmpage, set = cfs_block_sigsinv(sigmask(SIGKILL) | sigmask(SIGTERM)); - /* we grab lli_trunc_sem to exclude truncate case. - * Otherwise, we could add dirty pages into osc cache - * while truncate is on-going. - */ - inode = ccc_object_inode(io->ci_obj); + inode = vvp_object_inode(io->ci_obj); lli = ll_i2info(inode); - down_read(&lli->lli_trunc_sem); result = cl_io_loop(env, io); - up_read(&lli->lli_trunc_sem); - cfs_restore_sigs(set); if (result == 0) { @@ -307,17 +300,22 @@ static int ll_fault0(struct vm_area_struct *vma, struct vm_fault *vmf) vio = vvp_env_io(env); vio->u.fault.ft_vma = vma; vio->u.fault.ft_vmpage = NULL; - vio->u.fault.fault.ft_vmf = vmf; - vio->u.fault.fault.ft_flags = 0; - vio->u.fault.fault.ft_flags_valid = false; + vio->u.fault.ft_vmf = vmf; + vio->u.fault.ft_flags = 0; + vio->u.fault.ft_flags_valid = false; + + /* May call ll_readpage() */ + ll_cl_add(vma->vm_file, env, io); result = cl_io_loop(env, io); + ll_cl_remove(vma->vm_file, env); + /* ft_flags are only valid if we reached * the call to filemap_fault */ - if (vio->u.fault.fault.ft_flags_valid) - fault_ret = vio->u.fault.fault.ft_flags; + if (vio->u.fault.ft_flags_valid) + fault_ret = vio->u.fault.ft_flags; vmpage = vio->u.fault.ft_vmpage; if (result != 0 && vmpage) { @@ -390,9 +388,11 @@ static int ll_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) result = ll_page_mkwrite0(vma, vmf->page, &retry); if (!printed && ++count > 16) { - CWARN("app(%s): the page %lu of file %lu is under heavy contention.\n", + const struct dentry *de = vma->vm_file->f_path.dentry; + + CWARN("app(%s): the page %lu of file "DFID" is under heavy contention\n", current->comm, vmf->pgoff, - file_inode(vma->vm_file)->i_ino); + PFID(ll_inode2fid(de->d_inode))); printed = true; } } while (retry); @@ -422,16 +422,16 @@ static int ll_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) /** * To avoid cancel the locks covering mmapped region for lock cache pressure, - * we track the mapped vma count in ccc_object::cob_mmap_cnt. + * we track the mapped vma count in vvp_object::vob_mmap_cnt. */ static void ll_vm_open(struct vm_area_struct *vma) { struct inode *inode = file_inode(vma->vm_file); - struct ccc_object *vob = cl_inode2ccc(inode); + struct vvp_object *vob = cl_inode2vvp(inode); LASSERT(vma->vm_file); - LASSERT(atomic_read(&vob->cob_mmap_cnt) >= 0); - atomic_inc(&vob->cob_mmap_cnt); + LASSERT(atomic_read(&vob->vob_mmap_cnt) >= 0); + atomic_inc(&vob->vob_mmap_cnt); } /** @@ -440,11 +440,11 @@ static void ll_vm_open(struct vm_area_struct *vma) static void ll_vm_close(struct vm_area_struct *vma) { struct inode *inode = file_inode(vma->vm_file); - struct ccc_object *vob = cl_inode2ccc(inode); + struct vvp_object *vob = cl_inode2vvp(inode); LASSERT(vma->vm_file); - atomic_dec(&vob->cob_mmap_cnt); - LASSERT(atomic_read(&vob->cob_mmap_cnt) >= 0); + atomic_dec(&vob->vob_mmap_cnt); + LASSERT(atomic_read(&vob->vob_mmap_cnt) >= 0); } /* XXX put nice comment here. talk about __free_pte -> dirty pages and diff --git a/drivers/staging/lustre/lustre/llite/llite_nfs.c b/drivers/staging/lustre/lustre/llite/llite_nfs.c index 193aab879709..65972c892731 100644 --- a/drivers/staging/lustre/lustre/llite/llite_nfs.c +++ b/drivers/staging/lustre/lustre/llite/llite_nfs.c @@ -15,11 +15,7 @@ * * You should have received a copy of the GNU General Public License * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. + * http://www.gnu.org/licenses/gpl-2.0.html * * GPL HEADER END */ @@ -119,7 +115,7 @@ struct inode *search_inode_for_lustre(struct super_block *sb, rc = md_getattr(sbi->ll_md_exp, op_data, &req); kfree(op_data); if (rc) { - CERROR("can't get object attrs, fid "DFID", rc %d\n", + CDEBUG(D_INFO, "can't get object attrs, fid "DFID", rc %d\n", PFID(fid), rc); return ERR_PTR(rc); } @@ -172,6 +168,24 @@ ll_iget_for_nfs(struct super_block *sb, struct lu_fid *fid, struct lu_fid *paren /* N.B. d_obtain_alias() drops inode ref on error */ result = d_obtain_alias(inode); + if (!IS_ERR(result)) { + int rc; + + rc = ll_d_init(result); + if (rc < 0) { + dput(result); + result = ERR_PTR(rc); + } else { + struct ll_dentry_data *ldd = ll_d2d(result); + + /* + * Need to signal to the ll_intent_file_open that + * we came from NFS and so opencache needs to be + * enabled for this one + */ + ldd->lld_nfs_dentry = 1; + } + } return result; } @@ -191,8 +205,9 @@ static int ll_encode_fh(struct inode *inode, __u32 *fh, int *plen, int fileid_len = sizeof(struct lustre_nfs_fid) / 4; struct lustre_nfs_fid *nfs_fid = (void *)fh; - CDEBUG(D_INFO, "encoding for (%lu," DFID ") maxlen=%d minlen=%d\n", - inode->i_ino, PFID(ll_inode2fid(inode)), *plen, fileid_len); + CDEBUG(D_INFO, "%s: encoding for ("DFID") maxlen=%d minlen=%d\n", + ll_get_fsname(inode->i_sb, NULL, 0), + PFID(ll_inode2fid(inode)), *plen, fileid_len); if (*plen < fileid_len) { *plen = fileid_len; @@ -298,8 +313,9 @@ static struct dentry *ll_get_parent(struct dentry *dchild) sbi = ll_s2sbi(dir->i_sb); - CDEBUG(D_INFO, "getting parent for (%lu," DFID ")\n", - dir->i_ino, PFID(ll_inode2fid(dir))); + CDEBUG(D_INFO, "%s: getting parent for ("DFID")\n", + ll_get_fsname(dir->i_sb, NULL, 0), + PFID(ll_inode2fid(dir))); rc = ll_get_default_mdsize(sbi, &lmmsize); if (rc != 0) @@ -314,15 +330,20 @@ static struct dentry *ll_get_parent(struct dentry *dchild) rc = md_getattr_name(sbi->ll_md_exp, op_data, &req); ll_finish_md_op_data(op_data); if (rc) { - CERROR("failure %d inode %lu get parent\n", rc, dir->i_ino); + CERROR("%s: failure inode "DFID" get parent: rc = %d\n", + ll_get_fsname(dir->i_sb, NULL, 0), + PFID(ll_inode2fid(dir)), rc); return ERR_PTR(rc); } body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY); - LASSERT(body->valid & OBD_MD_FLID); - - CDEBUG(D_INFO, "parent for " DFID " is " DFID "\n", - PFID(ll_inode2fid(dir)), PFID(&body->fid1)); - + /* + * LU-3952: MDT may lost the FID of its parent, we should not crash + * the NFS server, ll_iget_for_nfs() will handle the error. + */ + if (body->valid & OBD_MD_FLID) { + CDEBUG(D_INFO, "parent for " DFID " is " DFID "\n", + PFID(ll_inode2fid(dir)), PFID(&body->fid1)); + } result = ll_iget_for_nfs(dir->i_sb, &body->fid1, NULL); ptlrpc_req_finished(req); diff --git a/drivers/staging/lustre/lustre/llite/llite_rmtacl.c b/drivers/staging/lustre/lustre/llite/llite_rmtacl.c deleted file mode 100644 index 8509b07cb5c7..000000000000 --- a/drivers/staging/lustre/lustre/llite/llite_rmtacl.c +++ /dev/null @@ -1,299 +0,0 @@ -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. - * - * GPL HEADER END - */ -/* - * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2012, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * lustre/llite/llite_rmtacl.c - * - * Lustre Remote User Access Control List. - * - * Author: Fan Yong <fanyong@clusterfs.com> - */ - -#define DEBUG_SUBSYSTEM S_LLITE - -#ifdef CONFIG_FS_POSIX_ACL - -#include "../include/lustre_lite.h" -#include "../include/lustre_eacl.h" -#include "llite_internal.h" - -static inline __u32 rce_hashfunc(uid_t id) -{ - return id & (RCE_HASHES - 1); -} - -static inline __u32 ee_hashfunc(uid_t id) -{ - return id & (EE_HASHES - 1); -} - -u64 rce_ops2valid(int ops) -{ - switch (ops) { - case RMT_LSETFACL: - return OBD_MD_FLRMTLSETFACL; - case RMT_LGETFACL: - return OBD_MD_FLRMTLGETFACL; - case RMT_RSETFACL: - return OBD_MD_FLRMTRSETFACL; - case RMT_RGETFACL: - return OBD_MD_FLRMTRGETFACL; - default: - return 0; - } -} - -static struct rmtacl_ctl_entry *rce_alloc(pid_t key, int ops) -{ - struct rmtacl_ctl_entry *rce; - - rce = kzalloc(sizeof(*rce), GFP_NOFS); - if (!rce) - return NULL; - - INIT_LIST_HEAD(&rce->rce_list); - rce->rce_key = key; - rce->rce_ops = ops; - - return rce; -} - -static void rce_free(struct rmtacl_ctl_entry *rce) -{ - if (!list_empty(&rce->rce_list)) - list_del(&rce->rce_list); - - kfree(rce); -} - -static struct rmtacl_ctl_entry *__rct_search(struct rmtacl_ctl_table *rct, - pid_t key) -{ - struct rmtacl_ctl_entry *rce; - struct list_head *head = &rct->rct_entries[rce_hashfunc(key)]; - - list_for_each_entry(rce, head, rce_list) - if (rce->rce_key == key) - return rce; - - return NULL; -} - -struct rmtacl_ctl_entry *rct_search(struct rmtacl_ctl_table *rct, pid_t key) -{ - struct rmtacl_ctl_entry *rce; - - spin_lock(&rct->rct_lock); - rce = __rct_search(rct, key); - spin_unlock(&rct->rct_lock); - return rce; -} - -int rct_add(struct rmtacl_ctl_table *rct, pid_t key, int ops) -{ - struct rmtacl_ctl_entry *rce, *e; - - rce = rce_alloc(key, ops); - if (!rce) - return -ENOMEM; - - spin_lock(&rct->rct_lock); - e = __rct_search(rct, key); - if (unlikely(e)) { - CWARN("Unexpected stale rmtacl_entry found: [key: %d] [ops: %d]\n", - (int)key, ops); - rce_free(e); - } - list_add_tail(&rce->rce_list, &rct->rct_entries[rce_hashfunc(key)]); - spin_unlock(&rct->rct_lock); - - return 0; -} - -int rct_del(struct rmtacl_ctl_table *rct, pid_t key) -{ - struct rmtacl_ctl_entry *rce; - - spin_lock(&rct->rct_lock); - rce = __rct_search(rct, key); - if (rce) - rce_free(rce); - spin_unlock(&rct->rct_lock); - - return rce ? 0 : -ENOENT; -} - -void rct_init(struct rmtacl_ctl_table *rct) -{ - int i; - - spin_lock_init(&rct->rct_lock); - for (i = 0; i < RCE_HASHES; i++) - INIT_LIST_HEAD(&rct->rct_entries[i]); -} - -void rct_fini(struct rmtacl_ctl_table *rct) -{ - struct rmtacl_ctl_entry *rce; - int i; - - spin_lock(&rct->rct_lock); - for (i = 0; i < RCE_HASHES; i++) - while (!list_empty(&rct->rct_entries[i])) { - rce = list_entry(rct->rct_entries[i].next, - struct rmtacl_ctl_entry, rce_list); - rce_free(rce); - } - spin_unlock(&rct->rct_lock); -} - -static struct eacl_entry *ee_alloc(pid_t key, struct lu_fid *fid, int type, - ext_acl_xattr_header *header) -{ - struct eacl_entry *ee; - - ee = kzalloc(sizeof(*ee), GFP_NOFS); - if (!ee) - return NULL; - - INIT_LIST_HEAD(&ee->ee_list); - ee->ee_key = key; - ee->ee_fid = *fid; - ee->ee_type = type; - ee->ee_acl = header; - - return ee; -} - -void ee_free(struct eacl_entry *ee) -{ - if (!list_empty(&ee->ee_list)) - list_del(&ee->ee_list); - - if (ee->ee_acl) - lustre_ext_acl_xattr_free(ee->ee_acl); - - kfree(ee); -} - -static struct eacl_entry *__et_search_del(struct eacl_table *et, pid_t key, - struct lu_fid *fid, int type) -{ - struct eacl_entry *ee; - struct list_head *head = &et->et_entries[ee_hashfunc(key)]; - - LASSERT(fid); - list_for_each_entry(ee, head, ee_list) - if (ee->ee_key == key) { - if (lu_fid_eq(&ee->ee_fid, fid) && - ee->ee_type == type) { - list_del_init(&ee->ee_list); - return ee; - } - } - - return NULL; -} - -struct eacl_entry *et_search_del(struct eacl_table *et, pid_t key, - struct lu_fid *fid, int type) -{ - struct eacl_entry *ee; - - spin_lock(&et->et_lock); - ee = __et_search_del(et, key, fid, type); - spin_unlock(&et->et_lock); - return ee; -} - -void et_search_free(struct eacl_table *et, pid_t key) -{ - struct eacl_entry *ee, *next; - struct list_head *head = &et->et_entries[ee_hashfunc(key)]; - - spin_lock(&et->et_lock); - list_for_each_entry_safe(ee, next, head, ee_list) - if (ee->ee_key == key) - ee_free(ee); - - spin_unlock(&et->et_lock); -} - -int ee_add(struct eacl_table *et, pid_t key, struct lu_fid *fid, int type, - ext_acl_xattr_header *header) -{ - struct eacl_entry *ee, *e; - - ee = ee_alloc(key, fid, type, header); - if (!ee) - return -ENOMEM; - - spin_lock(&et->et_lock); - e = __et_search_del(et, key, fid, type); - if (unlikely(e)) { - CWARN("Unexpected stale eacl_entry found: [key: %d] [fid: " DFID "] [type: %d]\n", - (int)key, PFID(fid), type); - ee_free(e); - } - list_add_tail(&ee->ee_list, &et->et_entries[ee_hashfunc(key)]); - spin_unlock(&et->et_lock); - - return 0; -} - -void et_init(struct eacl_table *et) -{ - int i; - - spin_lock_init(&et->et_lock); - for (i = 0; i < EE_HASHES; i++) - INIT_LIST_HEAD(&et->et_entries[i]); -} - -void et_fini(struct eacl_table *et) -{ - struct eacl_entry *ee; - int i; - - spin_lock(&et->et_lock); - for (i = 0; i < EE_HASHES; i++) - while (!list_empty(&et->et_entries[i])) { - ee = list_entry(et->et_entries[i].next, - struct eacl_entry, ee_list); - ee_free(ee); - } - spin_unlock(&et->et_lock); -} - -#endif diff --git a/drivers/staging/lustre/lustre/llite/lloop.c b/drivers/staging/lustre/lustre/llite/lloop.c deleted file mode 100644 index f169c0db63b4..000000000000 --- a/drivers/staging/lustre/lustre/llite/lloop.c +++ /dev/null @@ -1,882 +0,0 @@ -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. - * - * GPL HEADER END - */ -/* - * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2011, 2012, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - */ - -/* - * linux/drivers/block/loop.c - * - * Written by Theodore Ts'o, 3/29/93 - * - * Copyright 1993 by Theodore Ts'o. Redistribution of this file is - * permitted under the GNU General Public License. - * - * Modularized and updated for 1.1.16 kernel - Mitch Dsouza 28th May 1994 - * Adapted for 1.3.59 kernel - Andries Brouwer, 1 Feb 1996 - * - * Fixed do_loop_request() re-entrancy - Vincent.Renardias@waw.com Mar 20, 1997 - * - * Added devfs support - Richard Gooch <rgooch@atnf.csiro.au> 16-Jan-1998 - * - * Handle sparse backing files correctly - Kenn Humborg, Jun 28, 1998 - * - * Loadable modules and other fixes by AK, 1998 - * - * Maximum number of loop devices now dynamic via max_loop module parameter. - * Russell Kroll <rkroll@exploits.org> 19990701 - * - * Maximum number of loop devices when compiled-in now selectable by passing - * max_loop=<1-255> to the kernel on boot. - * Erik I. Bols?, <eriki@himolde.no>, Oct 31, 1999 - * - * Completely rewrite request handling to be make_request_fn style and - * non blocking, pushing work to a helper thread. Lots of fixes from - * Al Viro too. - * Jens Axboe <axboe@suse.de>, Nov 2000 - * - * Support up to 256 loop devices - * Heinz Mauelshagen <mge@sistina.com>, Feb 2002 - * - * Support for falling back on the write file operation when the address space - * operations prepare_write and/or commit_write are not available on the - * backing filesystem. - * Anton Altaparmakov, 16 Feb 2005 - * - * Still To Fix: - * - Advisory locking is ignored here. - * - Should use an own CAP_* category instead of CAP_SYS_ADMIN - * - */ - -#include <linux/module.h> - -#include <linux/sched.h> -#include <linux/fs.h> -#include <linux/file.h> -#include <linux/stat.h> -#include <linux/errno.h> -#include <linux/major.h> -#include <linux/wait.h> -#include <linux/blkdev.h> -#include <linux/blkpg.h> -#include <linux/init.h> -#include <linux/swap.h> -#include <linux/slab.h> -#include <linux/suspend.h> -#include <linux/writeback.h> -#include <linux/buffer_head.h> /* for invalidate_bdev() */ -#include <linux/completion.h> -#include <linux/highmem.h> -#include <linux/gfp.h> -#include <linux/pagevec.h> -#include <linux/uaccess.h> - -#include "../include/lustre_lib.h" -#include "../include/lustre_lite.h" -#include "llite_internal.h" - -#define LLOOP_MAX_SEGMENTS LNET_MAX_IOV - -/* Possible states of device */ -enum { - LLOOP_UNBOUND, - LLOOP_BOUND, - LLOOP_RUNDOWN, -}; - -struct lloop_device { - int lo_number; - int lo_refcnt; - loff_t lo_offset; - loff_t lo_sizelimit; - int lo_flags; - struct file *lo_backing_file; - struct block_device *lo_device; - unsigned lo_blocksize; - - gfp_t old_gfp_mask; - - spinlock_t lo_lock; - struct bio *lo_bio; - struct bio *lo_biotail; - int lo_state; - struct semaphore lo_sem; - struct mutex lo_ctl_mutex; - atomic_t lo_pending; - wait_queue_head_t lo_bh_wait; - - struct request_queue *lo_queue; - - const struct lu_env *lo_env; - struct cl_io lo_io; - struct ll_dio_pages lo_pvec; - - /* data to handle bio for lustre. */ - struct lo_request_data { - struct page *lrd_pages[LLOOP_MAX_SEGMENTS]; - loff_t lrd_offsets[LLOOP_MAX_SEGMENTS]; - } lo_requests[1]; -}; - -/* - * Loop flags - */ -enum { - LO_FLAGS_READ_ONLY = 1, -}; - -static int lloop_major; -#define MAX_LOOP_DEFAULT 16 -static int max_loop = MAX_LOOP_DEFAULT; -static struct lloop_device *loop_dev; -static struct gendisk **disks; -static struct mutex lloop_mutex; -static void *ll_iocontrol_magic; - -static loff_t get_loop_size(struct lloop_device *lo, struct file *file) -{ - loff_t size, offset, loopsize; - - /* Compute loopsize in bytes */ - size = i_size_read(file->f_mapping->host); - offset = lo->lo_offset; - loopsize = size - offset; - if (lo->lo_sizelimit > 0 && lo->lo_sizelimit < loopsize) - loopsize = lo->lo_sizelimit; - - /* - * Unfortunately, if we want to do I/O on the device, - * the number of 512-byte sectors has to fit into a sector_t. - */ - return loopsize >> 9; -} - -static int do_bio_lustrebacked(struct lloop_device *lo, struct bio *head) -{ - const struct lu_env *env = lo->lo_env; - struct cl_io *io = &lo->lo_io; - struct inode *inode = file_inode(lo->lo_backing_file); - struct cl_object *obj = ll_i2info(inode)->lli_clob; - pgoff_t offset; - int ret; - int rw; - u32 page_count = 0; - struct bio_vec bvec; - struct bvec_iter iter; - struct bio *bio; - ssize_t bytes; - - struct ll_dio_pages *pvec = &lo->lo_pvec; - struct page **pages = pvec->ldp_pages; - loff_t *offsets = pvec->ldp_offsets; - - truncate_inode_pages(inode->i_mapping, 0); - - /* initialize the IO */ - memset(io, 0, sizeof(*io)); - io->ci_obj = obj; - ret = cl_io_init(env, io, CIT_MISC, obj); - if (ret) - return io->ci_result; - io->ci_lockreq = CILR_NEVER; - - rw = head->bi_rw; - for (bio = head; bio ; bio = bio->bi_next) { - LASSERT(rw == bio->bi_rw); - - offset = (pgoff_t)(bio->bi_iter.bi_sector << 9) + lo->lo_offset; - bio_for_each_segment(bvec, bio, iter) { - BUG_ON(bvec.bv_offset != 0); - BUG_ON(bvec.bv_len != PAGE_SIZE); - - pages[page_count] = bvec.bv_page; - offsets[page_count] = offset; - page_count++; - offset += bvec.bv_len; - } - LASSERT(page_count <= LLOOP_MAX_SEGMENTS); - } - - ll_stats_ops_tally(ll_i2sbi(inode), - (rw == WRITE) ? LPROC_LL_BRW_WRITE : LPROC_LL_BRW_READ, - page_count); - - pvec->ldp_size = page_count << PAGE_SHIFT; - pvec->ldp_nr = page_count; - - /* FIXME: in ll_direct_rw_pages, it has to allocate many cl_page{}s to - * write those pages into OST. Even worse case is that more pages - * would be asked to write out to swap space, and then finally get here - * again. - * Unfortunately this is NOT easy to fix. - * Thoughts on solution: - * 0. Define a reserved pool for cl_pages, which could be a list of - * pre-allocated cl_pages; - * 1. Define a new operation in cl_object_operations{}, says clo_depth, - * which measures how many layers for this lustre object. Generally - * speaking, the depth would be 2, one for llite, and one for lovsub. - * However, for SNS, there will be more since we need additional page - * to store parity; - * 2. Reserve the # of (page_count * depth) cl_pages from the reserved - * pool. Afterwards, the clio would allocate the pages from reserved - * pool, this guarantees we needn't allocate the cl_pages from - * generic cl_page slab cache. - * Of course, if there is NOT enough pages in the pool, we might - * be asked to write less pages once, this purely depends on - * implementation. Anyway, we should be careful to avoid deadlocking. - */ - inode_lock(inode); - bytes = ll_direct_rw_pages(env, io, rw, inode, pvec); - inode_unlock(inode); - cl_io_fini(env, io); - return (bytes == pvec->ldp_size) ? 0 : (int)bytes; -} - -/* - * Add bio to back of pending list - */ -static void loop_add_bio(struct lloop_device *lo, struct bio *bio) -{ - unsigned long flags; - - spin_lock_irqsave(&lo->lo_lock, flags); - if (lo->lo_biotail) { - lo->lo_biotail->bi_next = bio; - lo->lo_biotail = bio; - } else - lo->lo_bio = lo->lo_biotail = bio; - spin_unlock_irqrestore(&lo->lo_lock, flags); - - atomic_inc(&lo->lo_pending); - if (waitqueue_active(&lo->lo_bh_wait)) - wake_up(&lo->lo_bh_wait); -} - -/* - * Grab first pending buffer - */ -static unsigned int loop_get_bio(struct lloop_device *lo, struct bio **req) -{ - struct bio *first; - struct bio **bio; - unsigned int count = 0; - unsigned int page_count = 0; - int rw; - - spin_lock_irq(&lo->lo_lock); - first = lo->lo_bio; - if (unlikely(!first)) { - spin_unlock_irq(&lo->lo_lock); - return 0; - } - - /* TODO: need to split the bio, too bad. */ - LASSERT(first->bi_vcnt <= LLOOP_MAX_SEGMENTS); - - rw = first->bi_rw; - bio = &lo->lo_bio; - while (*bio && (*bio)->bi_rw == rw) { - CDEBUG(D_INFO, "bio sector %llu size %u count %u vcnt%u\n", - (unsigned long long)(*bio)->bi_iter.bi_sector, - (*bio)->bi_iter.bi_size, - page_count, (*bio)->bi_vcnt); - if (page_count + (*bio)->bi_vcnt > LLOOP_MAX_SEGMENTS) - break; - - page_count += (*bio)->bi_vcnt; - count++; - bio = &(*bio)->bi_next; - } - if (*bio) { - /* Some of bios can't be mergeable. */ - lo->lo_bio = *bio; - *bio = NULL; - } else { - /* Hit the end of queue */ - lo->lo_biotail = NULL; - lo->lo_bio = NULL; - } - *req = first; - spin_unlock_irq(&lo->lo_lock); - return count; -} - -static blk_qc_t loop_make_request(struct request_queue *q, struct bio *old_bio) -{ - struct lloop_device *lo = q->queuedata; - int rw = bio_rw(old_bio); - int inactive; - - blk_queue_split(q, &old_bio, q->bio_split); - - if (!lo) - goto err; - - CDEBUG(D_INFO, "submit bio sector %llu size %u\n", - (unsigned long long)old_bio->bi_iter.bi_sector, - old_bio->bi_iter.bi_size); - - spin_lock_irq(&lo->lo_lock); - inactive = lo->lo_state != LLOOP_BOUND; - spin_unlock_irq(&lo->lo_lock); - if (inactive) - goto err; - - if (rw == WRITE) { - if (lo->lo_flags & LO_FLAGS_READ_ONLY) - goto err; - } else if (rw == READA) { - rw = READ; - } else if (rw != READ) { - CERROR("lloop: unknown command (%x)\n", rw); - goto err; - } - loop_add_bio(lo, old_bio); - return BLK_QC_T_NONE; -err: - bio_io_error(old_bio); - return BLK_QC_T_NONE; -} - -static inline void loop_handle_bio(struct lloop_device *lo, struct bio *bio) -{ - int ret; - - ret = do_bio_lustrebacked(lo, bio); - while (bio) { - struct bio *tmp = bio->bi_next; - - bio->bi_next = NULL; - bio->bi_error = ret; - bio_endio(bio); - bio = tmp; - } -} - -static inline int loop_active(struct lloop_device *lo) -{ - return atomic_read(&lo->lo_pending) || - (lo->lo_state == LLOOP_RUNDOWN); -} - -/* - * worker thread that handles reads/writes to file backed loop devices, - * to avoid blocking in our make_request_fn. - */ -static int loop_thread(void *data) -{ - struct lloop_device *lo = data; - struct bio *bio; - unsigned int count; - unsigned long times = 0; - unsigned long total_count = 0; - - struct lu_env *env; - int refcheck; - int ret = 0; - - set_user_nice(current, MIN_NICE); - - lo->lo_state = LLOOP_BOUND; - - env = cl_env_get(&refcheck); - if (IS_ERR(env)) { - ret = PTR_ERR(env); - goto out; - } - - lo->lo_env = env; - memset(&lo->lo_pvec, 0, sizeof(lo->lo_pvec)); - lo->lo_pvec.ldp_pages = lo->lo_requests[0].lrd_pages; - lo->lo_pvec.ldp_offsets = lo->lo_requests[0].lrd_offsets; - - /* - * up sem, we are running - */ - up(&lo->lo_sem); - - for (;;) { - wait_event(lo->lo_bh_wait, loop_active(lo)); - if (!atomic_read(&lo->lo_pending)) { - int exiting = 0; - - spin_lock_irq(&lo->lo_lock); - exiting = (lo->lo_state == LLOOP_RUNDOWN); - spin_unlock_irq(&lo->lo_lock); - if (exiting) - break; - } - - bio = NULL; - count = loop_get_bio(lo, &bio); - if (!count) { - CWARN("lloop(minor: %d): missing bio\n", lo->lo_number); - continue; - } - - total_count += count; - if (total_count < count) { /* overflow */ - total_count = count; - times = 1; - } else { - times++; - } - if ((times & 127) == 0) { - CDEBUG(D_INFO, "total: %lu, count: %lu, avg: %lu\n", - total_count, times, total_count / times); - } - - LASSERT(bio); - LASSERT(count <= atomic_read(&lo->lo_pending)); - loop_handle_bio(lo, bio); - atomic_sub(count, &lo->lo_pending); - } - cl_env_put(env, &refcheck); - -out: - up(&lo->lo_sem); - return ret; -} - -static int loop_set_fd(struct lloop_device *lo, struct file *unused, - struct block_device *bdev, struct file *file) -{ - struct inode *inode; - struct address_space *mapping; - int lo_flags = 0; - int error; - loff_t size; - - if (!try_module_get(THIS_MODULE)) - return -ENODEV; - - error = -EBUSY; - if (lo->lo_state != LLOOP_UNBOUND) - goto out; - - mapping = file->f_mapping; - inode = mapping->host; - - error = -EINVAL; - if (!S_ISREG(inode->i_mode) || inode->i_sb->s_magic != LL_SUPER_MAGIC) - goto out; - - if (!(file->f_mode & FMODE_WRITE)) - lo_flags |= LO_FLAGS_READ_ONLY; - - size = get_loop_size(lo, file); - - if ((loff_t)(sector_t)size != size) { - error = -EFBIG; - goto out; - } - - /* remove all pages in cache so as dirty pages not to be existent. */ - truncate_inode_pages(mapping, 0); - - set_device_ro(bdev, (lo_flags & LO_FLAGS_READ_ONLY) != 0); - - lo->lo_blocksize = PAGE_SIZE; - lo->lo_device = bdev; - lo->lo_flags = lo_flags; - lo->lo_backing_file = file; - lo->lo_sizelimit = 0; - lo->old_gfp_mask = mapping_gfp_mask(mapping); - mapping_set_gfp_mask(mapping, lo->old_gfp_mask & ~(__GFP_IO|__GFP_FS)); - - lo->lo_bio = lo->lo_biotail = NULL; - - /* - * set queue make_request_fn, and add limits based on lower level - * device - */ - blk_queue_make_request(lo->lo_queue, loop_make_request); - lo->lo_queue->queuedata = lo; - - /* queue parameters */ - CLASSERT(PAGE_SIZE < (1 << (sizeof(unsigned short) * 8))); - blk_queue_logical_block_size(lo->lo_queue, - (unsigned short)PAGE_SIZE); - blk_queue_max_hw_sectors(lo->lo_queue, - LLOOP_MAX_SEGMENTS << (PAGE_SHIFT - 9)); - blk_queue_max_segments(lo->lo_queue, LLOOP_MAX_SEGMENTS); - - set_capacity(disks[lo->lo_number], size); - bd_set_size(bdev, size << 9); - - set_blocksize(bdev, lo->lo_blocksize); - - kthread_run(loop_thread, lo, "lloop%d", lo->lo_number); - down(&lo->lo_sem); - return 0; - -out: - /* This is safe: open() is still holding a reference. */ - module_put(THIS_MODULE); - return error; -} - -static int loop_clr_fd(struct lloop_device *lo, struct block_device *bdev, - int count) -{ - struct file *filp = lo->lo_backing_file; - gfp_t gfp = lo->old_gfp_mask; - - if (lo->lo_state != LLOOP_BOUND) - return -ENXIO; - - if (lo->lo_refcnt > count) /* we needed one fd for the ioctl */ - return -EBUSY; - - if (!filp) - return -EINVAL; - - spin_lock_irq(&lo->lo_lock); - lo->lo_state = LLOOP_RUNDOWN; - spin_unlock_irq(&lo->lo_lock); - wake_up(&lo->lo_bh_wait); - - down(&lo->lo_sem); - lo->lo_backing_file = NULL; - lo->lo_device = NULL; - lo->lo_offset = 0; - lo->lo_sizelimit = 0; - lo->lo_flags = 0; - invalidate_bdev(bdev); - set_capacity(disks[lo->lo_number], 0); - bd_set_size(bdev, 0); - mapping_set_gfp_mask(filp->f_mapping, gfp); - lo->lo_state = LLOOP_UNBOUND; - fput(filp); - /* This is safe: open() is still holding a reference. */ - module_put(THIS_MODULE); - return 0; -} - -static int lo_open(struct block_device *bdev, fmode_t mode) -{ - struct lloop_device *lo = bdev->bd_disk->private_data; - - mutex_lock(&lo->lo_ctl_mutex); - lo->lo_refcnt++; - mutex_unlock(&lo->lo_ctl_mutex); - - return 0; -} - -static void lo_release(struct gendisk *disk, fmode_t mode) -{ - struct lloop_device *lo = disk->private_data; - - mutex_lock(&lo->lo_ctl_mutex); - --lo->lo_refcnt; - mutex_unlock(&lo->lo_ctl_mutex); -} - -/* lloop device node's ioctl function. */ -static int lo_ioctl(struct block_device *bdev, fmode_t mode, - unsigned int cmd, unsigned long arg) -{ - struct lloop_device *lo = bdev->bd_disk->private_data; - struct inode *inode = NULL; - int err = 0; - - mutex_lock(&lloop_mutex); - switch (cmd) { - case LL_IOC_LLOOP_DETACH: { - err = loop_clr_fd(lo, bdev, 2); - if (err == 0) - blkdev_put(bdev, 0); /* grabbed in LLOOP_ATTACH */ - break; - } - - case LL_IOC_LLOOP_INFO: { - struct lu_fid fid; - - if (!lo->lo_backing_file) { - err = -ENOENT; - break; - } - if (!inode) - inode = file_inode(lo->lo_backing_file); - if (lo->lo_state == LLOOP_BOUND) - fid = ll_i2info(inode)->lli_fid; - else - fid_zero(&fid); - - if (copy_to_user((void __user *)arg, &fid, sizeof(fid))) - err = -EFAULT; - break; - } - - default: - err = -EINVAL; - break; - } - mutex_unlock(&lloop_mutex); - - return err; -} - -static struct block_device_operations lo_fops = { - .owner = THIS_MODULE, - .open = lo_open, - .release = lo_release, - .ioctl = lo_ioctl, -}; - -/* dynamic iocontrol callback. - * This callback is registered in lloop_init and will be called by - * ll_iocontrol_call. - * - * This is a llite regular file ioctl function. It takes the responsibility - * of attaching or detaching a file by a lloop's device number. - */ -static enum llioc_iter lloop_ioctl(struct inode *unused, struct file *file, - unsigned int cmd, unsigned long arg, - void *magic, int *rcp) -{ - struct lloop_device *lo = NULL; - struct block_device *bdev = NULL; - int err = 0; - dev_t dev; - - if (magic != ll_iocontrol_magic) - return LLIOC_CONT; - - if (!disks) { - err = -ENODEV; - goto out1; - } - - CWARN("Enter llop_ioctl\n"); - - mutex_lock(&lloop_mutex); - switch (cmd) { - case LL_IOC_LLOOP_ATTACH: { - struct lloop_device *lo_free = NULL; - int i; - - for (i = 0; i < max_loop; i++, lo = NULL) { - lo = &loop_dev[i]; - if (lo->lo_state == LLOOP_UNBOUND) { - if (!lo_free) - lo_free = lo; - continue; - } - if (file_inode(lo->lo_backing_file) == file_inode(file)) - break; - } - if (lo || !lo_free) { - err = -EBUSY; - goto out; - } - - lo = lo_free; - dev = MKDEV(lloop_major, lo->lo_number); - - /* quit if the used pointer is writable */ - if (put_user((long)old_encode_dev(dev), (long __user *)arg)) { - err = -EFAULT; - goto out; - } - - bdev = blkdev_get_by_dev(dev, file->f_mode, NULL); - if (IS_ERR(bdev)) { - err = PTR_ERR(bdev); - goto out; - } - - get_file(file); - err = loop_set_fd(lo, NULL, bdev, file); - if (err) { - fput(file); - blkdev_put(bdev, 0); - } - - break; - } - - case LL_IOC_LLOOP_DETACH_BYDEV: { - int minor; - - dev = old_decode_dev(arg); - if (MAJOR(dev) != lloop_major) { - err = -EINVAL; - goto out; - } - - minor = MINOR(dev); - if (minor > max_loop - 1) { - err = -EINVAL; - goto out; - } - - lo = &loop_dev[minor]; - if (lo->lo_state != LLOOP_BOUND) { - err = -EINVAL; - goto out; - } - - bdev = lo->lo_device; - err = loop_clr_fd(lo, bdev, 1); - if (err == 0) - blkdev_put(bdev, 0); /* grabbed in LLOOP_ATTACH */ - - break; - } - - default: - err = -EINVAL; - break; - } - -out: - mutex_unlock(&lloop_mutex); -out1: - if (rcp) - *rcp = err; - return LLIOC_STOP; -} - -static int __init lloop_init(void) -{ - int i; - unsigned int cmdlist[] = { - LL_IOC_LLOOP_ATTACH, - LL_IOC_LLOOP_DETACH_BYDEV, - }; - - if (max_loop < 1 || max_loop > 256) { - max_loop = MAX_LOOP_DEFAULT; - CWARN("lloop: invalid max_loop (must be between 1 and 256), using default (%u)\n", - max_loop); - } - - lloop_major = register_blkdev(0, "lloop"); - if (lloop_major < 0) - return -EIO; - - CDEBUG(D_CONFIG, "registered lloop major %d with %u minors\n", - lloop_major, max_loop); - - ll_iocontrol_magic = ll_iocontrol_register(lloop_ioctl, 2, cmdlist); - if (!ll_iocontrol_magic) - goto out_mem1; - - loop_dev = kcalloc(max_loop, sizeof(*loop_dev), GFP_KERNEL); - if (!loop_dev) - goto out_mem1; - - disks = kcalloc(max_loop, sizeof(*disks), GFP_KERNEL); - if (!disks) - goto out_mem2; - - for (i = 0; i < max_loop; i++) { - disks[i] = alloc_disk(1); - if (!disks[i]) - goto out_mem3; - } - - mutex_init(&lloop_mutex); - - for (i = 0; i < max_loop; i++) { - struct lloop_device *lo = &loop_dev[i]; - struct gendisk *disk = disks[i]; - - lo->lo_queue = blk_alloc_queue(GFP_KERNEL); - if (!lo->lo_queue) - goto out_mem4; - - mutex_init(&lo->lo_ctl_mutex); - sema_init(&lo->lo_sem, 0); - init_waitqueue_head(&lo->lo_bh_wait); - lo->lo_number = i; - spin_lock_init(&lo->lo_lock); - disk->major = lloop_major; - disk->first_minor = i; - disk->fops = &lo_fops; - sprintf(disk->disk_name, "lloop%d", i); - disk->private_data = lo; - disk->queue = lo->lo_queue; - } - - /* We cannot fail after we call this, so another loop!*/ - for (i = 0; i < max_loop; i++) - add_disk(disks[i]); - return 0; - -out_mem4: - while (i--) - blk_cleanup_queue(loop_dev[i].lo_queue); - i = max_loop; -out_mem3: - while (i--) - put_disk(disks[i]); - kfree(disks); -out_mem2: - kfree(loop_dev); -out_mem1: - unregister_blkdev(lloop_major, "lloop"); - ll_iocontrol_unregister(ll_iocontrol_magic); - CERROR("lloop: ran out of memory\n"); - return -ENOMEM; -} - -static void lloop_exit(void) -{ - int i; - - ll_iocontrol_unregister(ll_iocontrol_magic); - for (i = 0; i < max_loop; i++) { - del_gendisk(disks[i]); - blk_cleanup_queue(loop_dev[i].lo_queue); - put_disk(disks[i]); - } - - unregister_blkdev(lloop_major, "lloop"); - - kfree(disks); - kfree(loop_dev); -} - -module_param(max_loop, int, 0444); -MODULE_PARM_DESC(max_loop, "maximum of lloop_device"); -MODULE_AUTHOR("OpenSFS, Inc. <http://www.lustre.org/>"); -MODULE_DESCRIPTION("Lustre virtual block device"); -MODULE_VERSION(LUSTRE_VERSION_STRING); -MODULE_LICENSE("GPL"); - -module_init(lloop_init); -module_exit(lloop_exit); diff --git a/drivers/staging/lustre/lustre/llite/lproc_llite.c b/drivers/staging/lustre/lustre/llite/lproc_llite.c index 27ab1261400e..e86bf3c53be3 100644 --- a/drivers/staging/lustre/lustre/llite/lproc_llite.c +++ b/drivers/staging/lustre/lustre/llite/lproc_llite.c @@ -15,11 +15,7 @@ * * You should have received a copy of the GNU General Public License * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. + * http://www.gnu.org/licenses/gpl-2.0.html * * GPL HEADER END */ @@ -180,11 +176,7 @@ LUSTRE_RO_ATTR(filesfree); static ssize_t client_type_show(struct kobject *kobj, struct attribute *attr, char *buf) { - struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info, - ll_kobj); - - return sprintf(buf, "%s client\n", - sbi->ll_flags & LL_SBI_RMT_CLIENT ? "remote" : "local"); + return sprintf(buf, "local client\n"); } LUSTRE_RO_ATTR(client_type); @@ -254,7 +246,6 @@ static ssize_t max_read_ahead_mb_store(struct kobject *kobj, pages_number *= 1 << (20 - PAGE_SHIFT); /* MB -> pages */ if (pages_number > totalram_pages / 2) { - CERROR("can't set file readahead more than %lu MB\n", totalram_pages >> (20 - PAGE_SHIFT + 1)); /*1/2 of RAM*/ return -ERANGE; @@ -365,7 +356,7 @@ static int ll_max_cached_mb_seq_show(struct seq_file *m, void *v) { struct super_block *sb = m->private; struct ll_sb_info *sbi = ll_s2sbi(sb); - struct cl_client_cache *cache = &sbi->ll_cache; + struct cl_client_cache *cache = sbi->ll_cache; int shift = 20 - PAGE_SHIFT; int max_cached_mb; int unused_mb; @@ -392,7 +383,9 @@ static ssize_t ll_max_cached_mb_seq_write(struct file *file, { struct super_block *sb = ((struct seq_file *)file->private_data)->private; struct ll_sb_info *sbi = ll_s2sbi(sb); - struct cl_client_cache *cache = &sbi->ll_cache; + struct cl_client_cache *cache = sbi->ll_cache; + struct lu_env *env; + int refcheck; int mult, rc, pages_number; int diff = 0; int nrpages = 0; @@ -430,6 +423,10 @@ static ssize_t ll_max_cached_mb_seq_write(struct file *file, goto out; } + env = cl_env_get(&refcheck); + if (IS_ERR(env)) + return 0; + diff = -diff; while (diff > 0) { int tmp; @@ -455,19 +452,20 @@ static ssize_t ll_max_cached_mb_seq_write(struct file *file, break; if (!sbi->ll_dt_exp) { /* being initialized */ - rc = -ENODEV; - break; + rc = 0; + goto out; } /* difficult - have to ask OSCs to drop LRU slots. */ tmp = diff << 1; - rc = obd_set_info_async(NULL, sbi->ll_dt_exp, + rc = obd_set_info_async(env, sbi->ll_dt_exp, sizeof(KEY_CACHE_LRU_SHRINK), KEY_CACHE_LRU_SHRINK, sizeof(tmp), &tmp, NULL); if (rc < 0) break; } + cl_env_put(env, &refcheck); out: if (rc >= 0) { @@ -818,6 +816,23 @@ static ssize_t xattr_cache_store(struct kobject *kobj, } LUSTRE_RW_ATTR(xattr_cache); +static ssize_t unstable_stats_show(struct kobject *kobj, + struct attribute *attr, + char *buf) +{ + struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info, + ll_kobj); + struct cl_client_cache *cache = sbi->ll_cache; + int pages, mb; + + pages = atomic_read(&cache->ccc_unstable_nr); + mb = (pages * PAGE_SIZE) >> 20; + + return sprintf(buf, "unstable_pages: %8d\n" + "unstable_mb: %8d\n", pages, mb); +} +LUSTRE_RO_ATTR(unstable_stats); + static struct lprocfs_vars lprocfs_llite_obd_vars[] = { /* { "mntpt_path", ll_rd_path, 0, 0 }, */ { "site", &ll_site_stats_fops, NULL, 0 }, @@ -853,6 +868,7 @@ static struct attribute *llite_attrs[] = { &lustre_attr_max_easize.attr, &lustre_attr_default_easize.attr, &lustre_attr_xattr_cache.attr, + &lustre_attr_unstable_stats.attr, NULL, }; @@ -953,6 +969,7 @@ static const char *ra_stat_string[] = { [RA_STAT_EOF] = "read-ahead to EOF", [RA_STAT_MAX_IN_FLIGHT] = "hit max r-a issue", [RA_STAT_WRONG_GRAB_PAGE] = "wrong page from grab_cache_page", + [RA_STAT_FAILED_REACH_END] = "failed to reach end" }; int ldebugfs_register_mountpoint(struct dentry *parent, diff --git a/drivers/staging/lustre/lustre/llite/namei.c b/drivers/staging/lustre/lustre/llite/namei.c index f8f98e4e8258..2c4dc69731e8 100644 --- a/drivers/staging/lustre/lustre/llite/namei.c +++ b/drivers/staging/lustre/lustre/llite/namei.c @@ -15,11 +15,7 @@ * * You should have received a copy of the GNU General Public License * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. + * http://www.gnu.org/licenses/gpl-2.0.html * * GPL HEADER END */ @@ -128,12 +124,14 @@ struct inode *ll_iget(struct super_block *sb, ino_t hash, if (rc != 0) { iget_failed(inode); inode = NULL; - } else + } else { unlock_new_inode(inode); - } else if (!(inode->i_state & (I_FREEING | I_CLEAR))) + } + } else if (!(inode->i_state & (I_FREEING | I_CLEAR))) { ll_update_inode(inode, md); - CDEBUG(D_VFSTRACE, "got inode: %p for "DFID"\n", - inode, PFID(&md->body->fid1)); + CDEBUG(D_VFSTRACE, "got inode: "DFID"(%p)\n", + PFID(&md->body->fid1), inode); + } } return inode; } @@ -142,7 +140,7 @@ static void ll_invalidate_negative_children(struct inode *dir) { struct dentry *dentry, *tmp_subdir; - ll_lock_dcache(dir); + spin_lock(&dir->i_lock); hlist_for_each_entry(dentry, &dir->i_dentry, d_u.d_alias) { spin_lock(&dentry->d_lock); if (!list_empty(&dentry->d_subdirs)) { @@ -157,7 +155,7 @@ static void ll_invalidate_negative_children(struct inode *dir) } spin_unlock(&dentry->d_lock); } - ll_unlock_dcache(dir); + spin_unlock(&dir->i_lock); } int ll_md_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc, @@ -188,7 +186,7 @@ int ll_md_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc, break; /* Invalidate all dentries associated with this inode */ - LASSERT(lock->l_flags & LDLM_FL_CANCELING); + LASSERT(ldlm_is_canceling(lock)); if (!fid_res_name_eq(ll_inode2fid(inode), &lock->l_resource->lr_name)) { @@ -255,8 +253,8 @@ int ll_md_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc, } if ((bits & MDS_INODELOCK_UPDATE) && S_ISDIR(inode->i_mode)) { - CDEBUG(D_INODE, "invalidating inode %lu\n", - inode->i_ino); + CDEBUG(D_INODE, "invalidating inode "DFID"\n", + PFID(ll_inode2fid(inode))); truncate_inode_pages(inode->i_mapping, 0); ll_invalidate_negative_children(inode); } @@ -316,9 +314,10 @@ static struct dentry *ll_find_alias(struct inode *inode, struct dentry *dentry) if (hlist_empty(&inode->i_dentry)) return NULL; - discon_alias = invalid_alias = NULL; + discon_alias = NULL; + invalid_alias = NULL; - ll_lock_dcache(inode); + spin_lock(&inode->i_lock); hlist_for_each_entry(alias, &inode->i_dentry, d_u.d_alias) { LASSERT(alias != dentry); @@ -343,7 +342,7 @@ static struct dentry *ll_find_alias(struct inode *inode, struct dentry *dentry) dget_dlock(alias); spin_unlock(&alias->d_lock); } - ll_unlock_dcache(inode); + spin_unlock(&inode->i_lock); return alias; } @@ -389,12 +388,13 @@ static int ll_lookup_it_finish(struct ptlrpc_request *request, struct inode *inode = NULL; __u64 bits = 0; int rc = 0; + struct dentry *alias; /* NB 1 request reference will be taken away by ll_intent_lock() * when I return */ CDEBUG(D_DENTRY, "it %p it_disposition %x\n", it, - it->d.lustre.it_disposition); + it->it_disposition); if (!it_disposition(it, DISP_LOOKUP_NEG)) { rc = ll_prep_inode(&inode, request, (*de)->d_sb, it); if (rc) @@ -413,26 +413,12 @@ static int ll_lookup_it_finish(struct ptlrpc_request *request, */ } - /* Only hash *de if it is unhashed (new dentry). - * Atoimc_open may passing hashed dentries for open. - */ - if (d_unhashed(*de)) { - struct dentry *alias; - - alias = ll_splice_alias(inode, *de); - if (IS_ERR(alias)) { - rc = PTR_ERR(alias); - goto out; - } - *de = alias; - } else if (!it_disposition(it, DISP_LOOKUP_NEG) && - !it_disposition(it, DISP_OPEN_CREATE)) { - /* With DISP_OPEN_CREATE dentry will be - * instantiated in ll_create_it. - */ - LASSERT(!d_inode(*de)); - d_instantiate(*de, inode); + alias = ll_splice_alias(inode, *de); + if (IS_ERR(alias)) { + rc = PTR_ERR(alias); + goto out; } + *de = alias; if (!it_disposition(it, DISP_LOOKUP_NEG)) { /* we have lookup look - unhide dentry */ @@ -446,7 +432,7 @@ static int ll_lookup_it_finish(struct ptlrpc_request *request, /* Check that parent has UPDATE lock. */ struct lookup_intent parent_it = { .it_op = IT_GETATTR, - .d.lustre.it_lock_handle = 0 }; + .it_lock_handle = 0 }; if (md_revalidate_lock(ll_i2mdexp(parent), &parent_it, &ll_i2info(parent)->lli_fid, NULL)) { @@ -476,9 +462,8 @@ static struct dentry *ll_lookup_it(struct inode *parent, struct dentry *dentry, if (dentry->d_name.len > ll_i2sbi(parent)->ll_namelen) return ERR_PTR(-ENAMETOOLONG); - CDEBUG(D_VFSTRACE, "VFS Op:name=%pd,dir=%lu/%u(%p),intent=%s\n", - dentry, parent->i_ino, - parent->i_generation, parent, LL_IT2STR(it)); + CDEBUG(D_VFSTRACE, "VFS Op:name=%pd, dir="DFID"(%p),intent=%s\n", + dentry, PFID(ll_inode2fid(parent)), parent, LL_IT2STR(it)); if (d_mountpoint(dentry)) CERROR("Tell Peter, lookup on mtpt, it %s\n", LL_IT2STR(it)); @@ -553,9 +538,8 @@ static struct dentry *ll_lookup_nd(struct inode *parent, struct dentry *dentry, struct lookup_intent *itp, it = { .it_op = IT_GETATTR }; struct dentry *de; - CDEBUG(D_VFSTRACE, "VFS Op:name=%pd,dir=%lu/%u(%p),flags=%u\n", - dentry, parent->i_ino, - parent->i_generation, parent, flags); + CDEBUG(D_VFSTRACE, "VFS Op:name=%pd, dir="DFID"(%p),flags=%u\n", + dentry, PFID(ll_inode2fid(parent)), parent, flags); /* Optimize away (CREATE && !OPEN). Let .create handle the race. */ if ((flags & LOOKUP_CREATE) && !(flags & LOOKUP_OPEN)) @@ -586,10 +570,27 @@ static int ll_atomic_open(struct inode *dir, struct dentry *dentry, long long lookup_flags = LOOKUP_OPEN; int rc = 0; - CDEBUG(D_VFSTRACE, - "VFS Op:name=%pd,dir=%lu/%u(%p),file %p,open_flags %x,mode %x opened %d\n", - dentry, dir->i_ino, - dir->i_generation, dir, file, open_flags, mode, *opened); + CDEBUG(D_VFSTRACE, "VFS Op:name=%pd, dir="DFID"(%p),file %p,open_flags %x,mode %x opened %d\n", + dentry, PFID(ll_inode2fid(dir)), dir, file, open_flags, mode, + *opened); + + /* Only negative dentries enter here */ + LASSERT(!d_inode(dentry)); + + if (!d_in_lookup(dentry)) { + /* A valid negative dentry that just passed revalidation, + * there's little point to try and open it server-side, + * even though there's a minuscle chance it might succeed. + * Either way it's a valid race to just return -ENOENT here. + */ + if (!(open_flags & O_CREAT)) + return -ENOENT; + + /* Otherwise we just unhash it to be rehashed afresh via + * lookup if necessary + */ + d_drop(dentry); + } it = kzalloc(sizeof(*it), GFP_NOFS); if (!it) @@ -626,13 +627,10 @@ static int ll_atomic_open(struct inode *dir, struct dentry *dentry, if (d_really_is_positive(dentry) && it_disposition(it, DISP_OPEN_OPEN)) { /* Open dentry. */ if (S_ISFIFO(d_inode(dentry)->i_mode)) { - /* We cannot call open here as it would - * deadlock. + /* We cannot call open here as it might + * deadlock. This case is unreachable in + * practice because of OBD_CONNECT_NODEVOH. */ - if (it_disposition(it, DISP_ENQ_OPEN_REF)) - ptlrpc_req_finished( - (struct ptlrpc_request *) - it->d.lustre.it_data); rc = finish_no_open(file, de); } else { file->private_data = it; @@ -663,10 +661,10 @@ static struct inode *ll_create_node(struct inode *dir, struct lookup_intent *it) struct ll_sb_info *sbi = ll_i2sbi(dir); int rc; - LASSERT(it && it->d.lustre.it_disposition); + LASSERT(it && it->it_disposition); LASSERT(it_disposition(it, DISP_ENQ_CREATE_REF)); - request = it->d.lustre.it_data; + request = it->it_request; it_clear_disposition(it, DISP_ENQ_CREATE_REF); rc = ll_prep_inode(&inode, request, dir->i_sb, it); if (rc) { @@ -680,8 +678,8 @@ static struct inode *ll_create_node(struct inode *dir, struct lookup_intent *it) * lock on the inode. Since we finally have an inode pointer, * stuff it in the lock. */ - CDEBUG(D_DLMTRACE, "setting l_ast_data to inode %p (%lu/%u)\n", - inode, inode->i_ino, inode->i_generation); + CDEBUG(D_DLMTRACE, "setting l_ast_data to inode "DFID"(%p)\n", + PFID(ll_inode2fid(dir)), inode); ll_set_lock_data(sbi->ll_md_exp, inode, it, NULL); out: ptlrpc_req_finished(request); @@ -708,9 +706,8 @@ static int ll_create_it(struct inode *dir, struct dentry *dentry, int mode, struct inode *inode; int rc = 0; - CDEBUG(D_VFSTRACE, "VFS Op:name=%pd,dir=%lu/%u(%p),intent=%s\n", - dentry, dir->i_ino, - dir->i_generation, dir, LL_IT2STR(it)); + CDEBUG(D_VFSTRACE, "VFS Op:name=%pd, dir="DFID"(%p), intent=%s\n", + dentry, PFID(ll_inode2fid(dir)), dir, LL_IT2STR(it)); rc = it_open_error(DISP_OPEN_CREATE, it); if (rc) @@ -733,8 +730,9 @@ static void ll_update_times(struct ptlrpc_request *request, LASSERT(body); if (body->valid & OBD_MD_FLMTIME && body->mtime > LTIME_S(inode->i_mtime)) { - CDEBUG(D_INODE, "setting ino %lu mtime from %lu to %llu\n", - inode->i_ino, LTIME_S(inode->i_mtime), body->mtime); + CDEBUG(D_INODE, "setting fid "DFID" mtime from %lu to %llu\n", + PFID(ll_inode2fid(inode)), LTIME_S(inode->i_mtime), + body->mtime); LTIME_S(inode->i_mtime) = body->mtime; } if (body->valid & OBD_MD_FLCTIME && @@ -791,9 +789,9 @@ static int ll_mknod(struct inode *dir, struct dentry *dchild, { int err; - CDEBUG(D_VFSTRACE, "VFS Op:name=%pd,dir=%lu/%u(%p) mode %o dev %x\n", - dchild, dir->i_ino, dir->i_generation, dir, - mode, old_encode_dev(rdev)); + CDEBUG(D_VFSTRACE, "VFS Op:name=%pd, dir="DFID"(%p) mode %o dev %x\n", + dchild, PFID(ll_inode2fid(dir)), dir, mode, + old_encode_dev(rdev)); if (!IS_POSIXACL(dir) || !exp_connect_umask(ll_i2mdexp(dir))) mode &= ~current_umask(); @@ -831,9 +829,8 @@ static int ll_create_nd(struct inode *dir, struct dentry *dentry, { int rc; - CDEBUG(D_VFSTRACE, "VFS Op:name=%pd,dir=%lu/%u(%p),flags=%u, excl=%d\n", - dentry, dir->i_ino, - dir->i_generation, dir, mode, want_excl); + CDEBUG(D_VFSTRACE, "VFS Op:name=%pd, dir="DFID"(%p), flags=%u, excl=%d\n", + dentry, PFID(ll_inode2fid(dir)), dir, mode, want_excl); rc = ll_mknod(dir, dentry, mode, 0); @@ -845,12 +842,6 @@ static int ll_create_nd(struct inode *dir, struct dentry *dentry, return rc; } -static inline void ll_get_child_fid(struct dentry *child, struct lu_fid *fid) -{ - if (d_really_is_positive(child)) - *fid = *ll_inode2fid(d_inode(child)); -} - int ll_objects_destroy(struct ptlrpc_request *request, struct inode *dir) { struct mdt_body *body; @@ -927,23 +918,25 @@ out: * is any lock existing. They will recycle dentries and inodes based upon locks * too. b=20433 */ -static int ll_unlink(struct inode *dir, struct dentry *dentry) +static int ll_unlink(struct inode *dir, struct dentry *dchild) { struct ptlrpc_request *request = NULL; struct md_op_data *op_data; int rc; CDEBUG(D_VFSTRACE, "VFS Op:name=%pd,dir=%lu/%u(%p)\n", - dentry, dir->i_ino, dir->i_generation, dir); + dchild, dir->i_ino, dir->i_generation, dir); op_data = ll_prep_md_op_data(NULL, dir, NULL, - dentry->d_name.name, - dentry->d_name.len, + dchild->d_name.name, + dchild->d_name.len, 0, LUSTRE_OPC_ANY, NULL); if (IS_ERR(op_data)) return PTR_ERR(op_data); - ll_get_child_fid(dentry, &op_data->op_fid3); + if (dchild && dchild->d_inode) + op_data->op_fid3 = *ll_inode2fid(dchild->d_inode); + op_data->op_fid2 = op_data->op_fid3; rc = md_unlink(ll_i2sbi(dir)->ll_md_exp, op_data, &request); ll_finish_md_op_data(op_data); @@ -963,8 +956,8 @@ static int ll_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) { int err; - CDEBUG(D_VFSTRACE, "VFS Op:name=%pd,dir=%lu/%u(%p)\n", - dentry, dir->i_ino, dir->i_generation, dir); + CDEBUG(D_VFSTRACE, "VFS Op:name=%pd, dir"DFID"(%p)\n", + dentry, PFID(ll_inode2fid(dir)), dir); if (!IS_POSIXACL(dir) || !exp_connect_umask(ll_i2mdexp(dir))) mode &= ~current_umask(); @@ -977,23 +970,25 @@ static int ll_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) return err; } -static int ll_rmdir(struct inode *dir, struct dentry *dentry) +static int ll_rmdir(struct inode *dir, struct dentry *dchild) { struct ptlrpc_request *request = NULL; struct md_op_data *op_data; int rc; - CDEBUG(D_VFSTRACE, "VFS Op:name=%pd,dir=%lu/%u(%p)\n", - dentry, dir->i_ino, dir->i_generation, dir); + CDEBUG(D_VFSTRACE, "VFS Op:name=%pd, dir="DFID"(%p)\n", + dchild, PFID(ll_inode2fid(dir)), dir); op_data = ll_prep_md_op_data(NULL, dir, NULL, - dentry->d_name.name, - dentry->d_name.len, + dchild->d_name.name, + dchild->d_name.len, S_IFDIR, LUSTRE_OPC_ANY, NULL); if (IS_ERR(op_data)) return PTR_ERR(op_data); - ll_get_child_fid(dentry, &op_data->op_fid3); + if (dchild && dchild->d_inode) + op_data->op_fid3 = *ll_inode2fid(dchild->d_inode); + op_data->op_fid2 = op_data->op_fid3; rc = md_unlink(ll_i2sbi(dir)->ll_md_exp, op_data, &request); ll_finish_md_op_data(op_data); @@ -1011,9 +1006,8 @@ static int ll_symlink(struct inode *dir, struct dentry *dentry, { int err; - CDEBUG(D_VFSTRACE, "VFS Op:name=%pd,dir=%lu/%u(%p),target=%.*s\n", - dentry, dir->i_ino, dir->i_generation, - dir, 3000, oldname); + CDEBUG(D_VFSTRACE, "VFS Op:name=%pd, dir="DFID"(%p),target=%.*s\n", + dentry, PFID(ll_inode2fid(dir)), dir, 3000, oldname); err = ll_new_node(dir, dentry, oldname, S_IFLNK | S_IRWXUGO, 0, LUSTRE_OPC_SYMLINK); @@ -1033,10 +1027,9 @@ static int ll_link(struct dentry *old_dentry, struct inode *dir, struct md_op_data *op_data; int err; - CDEBUG(D_VFSTRACE, - "VFS Op: inode=%lu/%u(%p), dir=%lu/%u(%p), target=%pd\n", - src->i_ino, src->i_generation, src, dir->i_ino, - dir->i_generation, dir, new_dentry); + CDEBUG(D_VFSTRACE, "VFS Op: inode="DFID"(%p), dir="DFID"(%p), target=%pd\n", + PFID(ll_inode2fid(src)), src, PFID(ll_inode2fid(dir)), dir, + new_dentry); op_data = ll_prep_md_op_data(NULL, src, dir, new_dentry->d_name.name, new_dentry->d_name.len, @@ -1056,42 +1049,45 @@ out: return err; } -static int ll_rename(struct inode *old_dir, struct dentry *old_dentry, - struct inode *new_dir, struct dentry *new_dentry) +static int ll_rename(struct inode *src, struct dentry *src_dchild, + struct inode *tgt, struct dentry *tgt_dchild) { struct ptlrpc_request *request = NULL; - struct ll_sb_info *sbi = ll_i2sbi(old_dir); + struct ll_sb_info *sbi = ll_i2sbi(src); struct md_op_data *op_data; int err; CDEBUG(D_VFSTRACE, - "VFS Op:oldname=%pd,src_dir=%lu/%u(%p),newname=%pd,tgt_dir=%lu/%u(%p)\n", - old_dentry, old_dir->i_ino, old_dir->i_generation, old_dir, - new_dentry, new_dir->i_ino, new_dir->i_generation, new_dir); + "VFS Op:oldname=%pd, src_dir="DFID"(%p), newname=%pd, tgt_dir="DFID"(%p)\n", + src_dchild, PFID(ll_inode2fid(src)), src, + tgt_dchild, PFID(ll_inode2fid(tgt)), tgt); - op_data = ll_prep_md_op_data(NULL, old_dir, new_dir, NULL, 0, 0, + op_data = ll_prep_md_op_data(NULL, src, tgt, NULL, 0, 0, LUSTRE_OPC_ANY, NULL); if (IS_ERR(op_data)) return PTR_ERR(op_data); - ll_get_child_fid(old_dentry, &op_data->op_fid3); - ll_get_child_fid(new_dentry, &op_data->op_fid4); + if (src_dchild && src_dchild->d_inode) + op_data->op_fid3 = *ll_inode2fid(src_dchild->d_inode); + if (tgt_dchild && tgt_dchild->d_inode) + op_data->op_fid4 = *ll_inode2fid(tgt_dchild->d_inode); + err = md_rename(sbi->ll_md_exp, op_data, - old_dentry->d_name.name, - old_dentry->d_name.len, - new_dentry->d_name.name, - new_dentry->d_name.len, &request); + src_dchild->d_name.name, + src_dchild->d_name.len, + tgt_dchild->d_name.name, + tgt_dchild->d_name.len, &request); ll_finish_md_op_data(op_data); if (!err) { - ll_update_times(request, old_dir); - ll_update_times(request, new_dir); + ll_update_times(request, src); + ll_update_times(request, tgt); ll_stats_ops_tally(sbi, LPROC_LL_RENAME, 1); - err = ll_objects_destroy(request, old_dir); + err = ll_objects_destroy(request, src); } ptlrpc_req_finished(request); if (!err) - d_move(old_dentry, new_dentry); + d_move(src_dchild, tgt_dchild); return err; } diff --git a/drivers/staging/lustre/lustre/llite/remote_perm.c b/drivers/staging/lustre/lustre/llite/remote_perm.c deleted file mode 100644 index e9d25317cd28..000000000000 --- a/drivers/staging/lustre/lustre/llite/remote_perm.c +++ /dev/null @@ -1,324 +0,0 @@ -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. - * - * GPL HEADER END - */ -/* - * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2011, 2012, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * lustre/llite/remote_perm.c - * - * Lustre Permission Cache for Remote Client - * - * Author: Lai Siyao <lsy@clusterfs.com> - * Author: Fan Yong <fanyong@clusterfs.com> - */ - -#define DEBUG_SUBSYSTEM S_LLITE - -#include <linux/module.h> -#include <linux/types.h> - -#include "../include/lustre_lite.h" -#include "../include/lustre_ha.h" -#include "../include/lustre_dlm.h" -#include "../include/lprocfs_status.h" -#include "../include/lustre_disk.h" -#include "../include/lustre_param.h" -#include "llite_internal.h" - -struct kmem_cache *ll_remote_perm_cachep; -struct kmem_cache *ll_rmtperm_hash_cachep; - -static inline struct ll_remote_perm *alloc_ll_remote_perm(void) -{ - struct ll_remote_perm *lrp; - - lrp = kmem_cache_zalloc(ll_remote_perm_cachep, GFP_KERNEL); - if (lrp) - INIT_HLIST_NODE(&lrp->lrp_list); - return lrp; -} - -static inline void free_ll_remote_perm(struct ll_remote_perm *lrp) -{ - if (!lrp) - return; - - if (!hlist_unhashed(&lrp->lrp_list)) - hlist_del(&lrp->lrp_list); - kmem_cache_free(ll_remote_perm_cachep, lrp); -} - -static struct hlist_head *alloc_rmtperm_hash(void) -{ - struct hlist_head *hash; - int i; - - hash = kmem_cache_zalloc(ll_rmtperm_hash_cachep, GFP_NOFS); - if (!hash) - return NULL; - - for (i = 0; i < REMOTE_PERM_HASHSIZE; i++) - INIT_HLIST_HEAD(hash + i); - - return hash; -} - -void free_rmtperm_hash(struct hlist_head *hash) -{ - int i; - struct ll_remote_perm *lrp; - struct hlist_node *next; - - if (!hash) - return; - - for (i = 0; i < REMOTE_PERM_HASHSIZE; i++) - hlist_for_each_entry_safe(lrp, next, hash + i, lrp_list) - free_ll_remote_perm(lrp); - kmem_cache_free(ll_rmtperm_hash_cachep, hash); -} - -static inline int remote_perm_hashfunc(uid_t uid) -{ - return uid & (REMOTE_PERM_HASHSIZE - 1); -} - -/* NB: setxid permission is not checked here, instead it's done on - * MDT when client get remote permission. - */ -static int do_check_remote_perm(struct ll_inode_info *lli, int mask) -{ - struct hlist_head *head; - struct ll_remote_perm *lrp; - int found = 0, rc; - - if (!lli->lli_remote_perms) - return -ENOENT; - - head = lli->lli_remote_perms + - remote_perm_hashfunc(from_kuid(&init_user_ns, current_uid())); - - spin_lock(&lli->lli_lock); - hlist_for_each_entry(lrp, head, lrp_list) { - if (lrp->lrp_uid != from_kuid(&init_user_ns, current_uid())) - continue; - if (lrp->lrp_gid != from_kgid(&init_user_ns, current_gid())) - continue; - if (lrp->lrp_fsuid != from_kuid(&init_user_ns, current_fsuid())) - continue; - if (lrp->lrp_fsgid != from_kgid(&init_user_ns, current_fsgid())) - continue; - found = 1; - break; - } - - if (!found) { - rc = -ENOENT; - goto out; - } - - CDEBUG(D_SEC, "found remote perm: %u/%u/%u/%u - %#x\n", - lrp->lrp_uid, lrp->lrp_gid, lrp->lrp_fsuid, lrp->lrp_fsgid, - lrp->lrp_access_perm); - rc = ((lrp->lrp_access_perm & mask) == mask) ? 0 : -EACCES; - -out: - spin_unlock(&lli->lli_lock); - return rc; -} - -int ll_update_remote_perm(struct inode *inode, struct mdt_remote_perm *perm) -{ - struct ll_inode_info *lli = ll_i2info(inode); - struct ll_remote_perm *lrp = NULL, *tmp = NULL; - struct hlist_head *head, *perm_hash = NULL; - - LASSERT(ll_i2sbi(inode)->ll_flags & LL_SBI_RMT_CLIENT); - -#if 0 - if (perm->rp_uid != current->uid || - perm->rp_gid != current->gid || - perm->rp_fsuid != current->fsuid || - perm->rp_fsgid != current->fsgid) { - /* user might setxid in this small period */ - CDEBUG(D_SEC, - "remote perm user %u/%u/%u/%u != current %u/%u/%u/%u\n", - perm->rp_uid, perm->rp_gid, perm->rp_fsuid, - perm->rp_fsgid, current->uid, current->gid, - current->fsuid, current->fsgid); - return -EAGAIN; - } -#endif - - if (!lli->lli_remote_perms) { - perm_hash = alloc_rmtperm_hash(); - if (!perm_hash) { - CERROR("alloc lli_remote_perms failed!\n"); - return -ENOMEM; - } - } - - spin_lock(&lli->lli_lock); - - if (!lli->lli_remote_perms) - lli->lli_remote_perms = perm_hash; - else - free_rmtperm_hash(perm_hash); - - head = lli->lli_remote_perms + remote_perm_hashfunc(perm->rp_uid); - -again: - hlist_for_each_entry(tmp, head, lrp_list) { - if (tmp->lrp_uid != perm->rp_uid) - continue; - if (tmp->lrp_gid != perm->rp_gid) - continue; - if (tmp->lrp_fsuid != perm->rp_fsuid) - continue; - if (tmp->lrp_fsgid != perm->rp_fsgid) - continue; - free_ll_remote_perm(lrp); - lrp = tmp; - break; - } - - if (!lrp) { - spin_unlock(&lli->lli_lock); - lrp = alloc_ll_remote_perm(); - if (!lrp) { - CERROR("alloc memory for ll_remote_perm failed!\n"); - return -ENOMEM; - } - spin_lock(&lli->lli_lock); - goto again; - } - - lrp->lrp_access_perm = perm->rp_access_perm; - if (lrp != tmp) { - lrp->lrp_uid = perm->rp_uid; - lrp->lrp_gid = perm->rp_gid; - lrp->lrp_fsuid = perm->rp_fsuid; - lrp->lrp_fsgid = perm->rp_fsgid; - hlist_add_head(&lrp->lrp_list, head); - } - lli->lli_rmtperm_time = cfs_time_current(); - spin_unlock(&lli->lli_lock); - - CDEBUG(D_SEC, "new remote perm@%p: %u/%u/%u/%u - %#x\n", - lrp, lrp->lrp_uid, lrp->lrp_gid, lrp->lrp_fsuid, lrp->lrp_fsgid, - lrp->lrp_access_perm); - - return 0; -} - -int lustre_check_remote_perm(struct inode *inode, int mask) -{ - struct ll_inode_info *lli = ll_i2info(inode); - struct ll_sb_info *sbi = ll_i2sbi(inode); - struct ptlrpc_request *req = NULL; - struct mdt_remote_perm *perm; - unsigned long save; - int i = 0, rc; - - do { - save = lli->lli_rmtperm_time; - rc = do_check_remote_perm(lli, mask); - if (!rc || (rc != -ENOENT && i)) - break; - - might_sleep(); - - mutex_lock(&lli->lli_rmtperm_mutex); - /* check again */ - if (save != lli->lli_rmtperm_time) { - rc = do_check_remote_perm(lli, mask); - if (!rc || (rc != -ENOENT && i)) { - mutex_unlock(&lli->lli_rmtperm_mutex); - break; - } - } - - if (i++ > 5) { - CERROR("check remote perm falls in dead loop!\n"); - LBUG(); - } - - rc = md_get_remote_perm(sbi->ll_md_exp, ll_inode2fid(inode), - ll_i2suppgid(inode), &req); - if (rc) { - mutex_unlock(&lli->lli_rmtperm_mutex); - break; - } - - perm = req_capsule_server_swab_get(&req->rq_pill, &RMF_ACL, - lustre_swab_mdt_remote_perm); - if (unlikely(!perm)) { - mutex_unlock(&lli->lli_rmtperm_mutex); - rc = -EPROTO; - break; - } - - rc = ll_update_remote_perm(inode, perm); - mutex_unlock(&lli->lli_rmtperm_mutex); - if (rc == -ENOMEM) - break; - - ptlrpc_req_finished(req); - req = NULL; - } while (1); - ptlrpc_req_finished(req); - return rc; -} - -#if 0 /* NB: remote perms can't be freed in ll_mdc_blocking_ast of UPDATE lock, - * because it will fail sanity test 48. - */ -void ll_free_remote_perms(struct inode *inode) -{ - struct ll_inode_info *lli = ll_i2info(inode); - struct hlist_head *hash = lli->lli_remote_perms; - struct ll_remote_perm *lrp; - struct hlist_node *node, *next; - int i; - - LASSERT(hash); - - spin_lock(&lli->lli_lock); - - for (i = 0; i < REMOTE_PERM_HASHSIZE; i++) { - hlist_for_each_entry_safe(lrp, node, next, hash + i, lrp_list) - free_ll_remote_perm(lrp); - } - - spin_unlock(&lli->lli_lock); -} -#endif diff --git a/drivers/staging/lustre/lustre/llite/rw.c b/drivers/staging/lustre/lustre/llite/rw.c index edab6c5b7e50..87393c4bd51e 100644 --- a/drivers/staging/lustre/lustre/llite/rw.c +++ b/drivers/staging/lustre/lustre/llite/rw.c @@ -15,11 +15,7 @@ * * You should have received a copy of the GNU General Public License * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. + * http://www.gnu.org/licenses/gpl-2.0.html * * GPL HEADER END */ @@ -59,225 +55,6 @@ #include "llite_internal.h" #include "../include/linux/lustre_compat25.h" -/** - * Finalizes cl-data before exiting typical address_space operation. Dual to - * ll_cl_init(). - */ -static void ll_cl_fini(struct ll_cl_context *lcc) -{ - struct lu_env *env = lcc->lcc_env; - struct cl_io *io = lcc->lcc_io; - struct cl_page *page = lcc->lcc_page; - - LASSERT(lcc->lcc_cookie == current); - LASSERT(env); - - if (page) { - lu_ref_del(&page->cp_reference, "cl_io", io); - cl_page_put(env, page); - } - - cl_env_put(env, &lcc->lcc_refcheck); -} - -/** - * Initializes common cl-data at the typical address_space operation entry - * point. - */ -static struct ll_cl_context *ll_cl_init(struct file *file, - struct page *vmpage, int create) -{ - struct ll_cl_context *lcc; - struct lu_env *env; - struct cl_io *io; - struct cl_object *clob; - struct ccc_io *cio; - - int refcheck; - int result = 0; - - clob = ll_i2info(vmpage->mapping->host)->lli_clob; - LASSERT(clob); - - env = cl_env_get(&refcheck); - if (IS_ERR(env)) - return ERR_CAST(env); - - lcc = &vvp_env_info(env)->vti_io_ctx; - memset(lcc, 0, sizeof(*lcc)); - lcc->lcc_env = env; - lcc->lcc_refcheck = refcheck; - lcc->lcc_cookie = current; - - cio = ccc_env_io(env); - io = cio->cui_cl.cis_io; - if (!io && create) { - struct inode *inode = vmpage->mapping->host; - loff_t pos; - - if (inode_trylock(inode)) { - inode_unlock((inode)); - - /* this is too bad. Someone is trying to write the - * page w/o holding inode mutex. This means we can - * add dirty pages into cache during truncate - */ - CERROR("Proc %s is dirtying page w/o inode lock, this will break truncate\n", - current->comm); - dump_stack(); - LBUG(); - return ERR_PTR(-EIO); - } - - /* - * Loop-back driver calls ->prepare_write(). - * methods directly, bypassing file system ->write() operation, - * so cl_io has to be created here. - */ - io = ccc_env_thread_io(env); - ll_io_init(io, file, 1); - - /* No lock at all for this kind of IO - we can't do it because - * we have held page lock, it would cause deadlock. - * XXX: This causes poor performance to loop device - One page - * per RPC. - * In order to get better performance, users should use - * lloop driver instead. - */ - io->ci_lockreq = CILR_NEVER; - - pos = vmpage->index << PAGE_SHIFT; - - /* Create a temp IO to serve write. */ - result = cl_io_rw_init(env, io, CIT_WRITE, pos, PAGE_SIZE); - if (result == 0) { - cio->cui_fd = LUSTRE_FPRIVATE(file); - cio->cui_iter = NULL; - result = cl_io_iter_init(env, io); - if (result == 0) { - result = cl_io_lock(env, io); - if (result == 0) - result = cl_io_start(env, io); - } - } else - result = io->ci_result; - } - - lcc->lcc_io = io; - if (!io) - result = -EIO; - if (result == 0) { - struct cl_page *page; - - LASSERT(io->ci_state == CIS_IO_GOING); - LASSERT(cio->cui_fd == LUSTRE_FPRIVATE(file)); - page = cl_page_find(env, clob, vmpage->index, vmpage, - CPT_CACHEABLE); - if (!IS_ERR(page)) { - lcc->lcc_page = page; - lu_ref_add(&page->cp_reference, "cl_io", io); - result = 0; - } else - result = PTR_ERR(page); - } - if (result) { - ll_cl_fini(lcc); - lcc = ERR_PTR(result); - } - - CDEBUG(D_VFSTRACE, "%lu@"DFID" -> %d %p %p\n", - vmpage->index, PFID(lu_object_fid(&clob->co_lu)), result, - env, io); - return lcc; -} - -static struct ll_cl_context *ll_cl_get(void) -{ - struct ll_cl_context *lcc; - struct lu_env *env; - int refcheck; - - env = cl_env_get(&refcheck); - LASSERT(!IS_ERR(env)); - lcc = &vvp_env_info(env)->vti_io_ctx; - LASSERT(env == lcc->lcc_env); - LASSERT(current == lcc->lcc_cookie); - cl_env_put(env, &refcheck); - - /* env has got in ll_cl_init, so it is still usable. */ - return lcc; -} - -/** - * ->prepare_write() address space operation called by generic_file_write() - * for every page during write. - */ -int ll_prepare_write(struct file *file, struct page *vmpage, unsigned from, - unsigned to) -{ - struct ll_cl_context *lcc; - int result; - - lcc = ll_cl_init(file, vmpage, 1); - if (!IS_ERR(lcc)) { - struct lu_env *env = lcc->lcc_env; - struct cl_io *io = lcc->lcc_io; - struct cl_page *page = lcc->lcc_page; - - cl_page_assume(env, io, page); - - result = cl_io_prepare_write(env, io, page, from, to); - if (result == 0) { - /* - * Add a reference, so that page is not evicted from - * the cache until ->commit_write() is called. - */ - cl_page_get(page); - lu_ref_add(&page->cp_reference, "prepare_write", - current); - } else { - cl_page_unassume(env, io, page); - ll_cl_fini(lcc); - } - /* returning 0 in prepare assumes commit must be called - * afterwards - */ - } else { - result = PTR_ERR(lcc); - } - return result; -} - -int ll_commit_write(struct file *file, struct page *vmpage, unsigned from, - unsigned to) -{ - struct ll_cl_context *lcc; - struct lu_env *env; - struct cl_io *io; - struct cl_page *page; - int result = 0; - - lcc = ll_cl_get(); - env = lcc->lcc_env; - page = lcc->lcc_page; - io = lcc->lcc_io; - - LASSERT(cl_page_is_owned(page, io)); - LASSERT(from <= to); - if (from != to) /* handle short write case. */ - result = cl_io_commit_write(env, io, page, from, to); - if (cl_page_is_owned(page, io)) - cl_page_unassume(env, io, page); - - /* - * Release reference acquired by ll_prepare_write(). - */ - lu_ref_del(&page->cp_reference, "prepare_write", current); - cl_page_put(env, page); - ll_cl_fini(lcc); - return result; -} - static void ll_ra_stats_inc_sbi(struct ll_sb_info *sbi, enum ra_stat which); /** @@ -301,7 +78,7 @@ static void ll_ra_stats_inc_sbi(struct ll_sb_info *sbi, enum ra_stat which); */ static unsigned long ll_ra_count_get(struct ll_sb_info *sbi, struct ra_io_arg *ria, - unsigned long pages) + unsigned long pages, unsigned long min) { struct ll_ra_info *ra = &sbi->ll_ra_info; long ret; @@ -341,6 +118,11 @@ static unsigned long ll_ra_count_get(struct ll_sb_info *sbi, } out: + if (ret < min) { + /* override ra limit for maximum performance */ + atomic_add(min - ret, &ra->ra_cur_pages); + ret = min; + } return ret; } @@ -357,9 +139,9 @@ static void ll_ra_stats_inc_sbi(struct ll_sb_info *sbi, enum ra_stat which) lprocfs_counter_incr(sbi->ll_ra_stats, which); } -void ll_ra_stats_inc(struct address_space *mapping, enum ra_stat which) +void ll_ra_stats_inc(struct inode *inode, enum ra_stat which) { - struct ll_sb_info *sbi = ll_i2sbi(mapping->host); + struct ll_sb_info *sbi = ll_i2sbi(inode); ll_ra_stats_inc_sbi(sbi, which); } @@ -388,61 +170,42 @@ static int index_in_window(unsigned long index, unsigned long point, return start <= index && index <= end; } -static struct ll_readahead_state *ll_ras_get(struct file *f) +void ll_ras_enter(struct file *f) { - struct ll_file_data *fd; - - fd = LUSTRE_FPRIVATE(f); - return &fd->fd_ras; -} - -void ll_ra_read_in(struct file *f, struct ll_ra_read *rar) -{ - struct ll_readahead_state *ras; - - ras = ll_ras_get(f); + struct ll_file_data *fd = LUSTRE_FPRIVATE(f); + struct ll_readahead_state *ras = &fd->fd_ras; spin_lock(&ras->ras_lock); ras->ras_requests++; ras->ras_request_index = 0; ras->ras_consecutive_requests++; - rar->lrr_reader = current; - - list_add(&rar->lrr_linkage, &ras->ras_read_beads); - spin_unlock(&ras->ras_lock); -} - -void ll_ra_read_ex(struct file *f, struct ll_ra_read *rar) -{ - struct ll_readahead_state *ras; - - ras = ll_ras_get(f); - - spin_lock(&ras->ras_lock); - list_del_init(&rar->lrr_linkage); spin_unlock(&ras->ras_lock); } static int cl_read_ahead_page(const struct lu_env *env, struct cl_io *io, struct cl_page_list *queue, struct cl_page *page, - struct page *vmpage) + struct cl_object *clob, pgoff_t *max_index) { - struct ccc_page *cp; + struct page *vmpage = page->cp_vmpage; + struct vvp_page *vpg; int rc; rc = 0; cl_page_assume(env, io, page); lu_ref_add(&page->cp_reference, "ra", current); - cp = cl2ccc_page(cl_page_at(page, &vvp_device_type)); - if (!cp->cpg_defer_uptodate && !PageUptodate(vmpage)) { - rc = cl_page_is_under_lock(env, io, page); - if (rc == -EBUSY) { - cp->cpg_defer_uptodate = 1; - cp->cpg_ra_used = 0; + vpg = cl2vvp_page(cl_object_page_slice(clob, page)); + if (!vpg->vpg_defer_uptodate && !PageUptodate(vmpage)) { + CDEBUG(D_READA, "page index %lu, max_index: %lu\n", + vvp_index(vpg), *max_index); + if (*max_index == 0 || vvp_index(vpg) > *max_index) + rc = cl_page_is_under_lock(env, io, page, max_index); + if (rc == 0) { + vpg->vpg_defer_uptodate = 1; + vpg->vpg_ra_used = 0; cl_page_list_add(queue, page); rc = 1; } else { - cl_page_delete(env, page); + cl_page_discard(env, io, page); rc = -ENOLCK; } } else { @@ -466,24 +229,25 @@ static int cl_read_ahead_page(const struct lu_env *env, struct cl_io *io, */ static int ll_read_ahead_page(const struct lu_env *env, struct cl_io *io, struct cl_page_list *queue, - pgoff_t index, struct address_space *mapping) + pgoff_t index, pgoff_t *max_index) { + struct cl_object *clob = io->ci_obj; + struct inode *inode = vvp_object_inode(clob); struct page *vmpage; - struct cl_object *clob = ll_i2info(mapping->host)->lli_clob; struct cl_page *page; enum ra_stat which = _NR_RA_STAT; /* keep gcc happy */ int rc = 0; const char *msg = NULL; - vmpage = grab_cache_page_nowait(mapping, index); + vmpage = grab_cache_page_nowait(inode->i_mapping, index); if (vmpage) { /* Check if vmpage was truncated or reclaimed */ - if (vmpage->mapping == mapping) { + if (vmpage->mapping == inode->i_mapping) { page = cl_page_find(env, clob, vmpage->index, vmpage, CPT_CACHEABLE); if (!IS_ERR(page)) { rc = cl_read_ahead_page(env, io, queue, - page, vmpage); + page, clob, max_index); if (rc == -ENOLCK) { which = RA_STAT_FAILED_MATCH; msg = "lock match failed"; @@ -504,7 +268,7 @@ static int ll_read_ahead_page(const struct lu_env *env, struct cl_io *io, msg = "g_c_p_n failed"; } if (msg) { - ll_ra_stats_inc(mapping, which); + ll_ra_stats_inc(inode, which); CDEBUG(D_READA, "%s\n", msg); } return rc; @@ -616,11 +380,12 @@ static int ll_read_ahead_pages(const struct lu_env *env, struct cl_io *io, struct cl_page_list *queue, struct ra_io_arg *ria, unsigned long *reserved_pages, - struct address_space *mapping, unsigned long *ra_end) { - int rc, count = 0, stride_ria; - unsigned long page_idx; + int rc, count = 0; + bool stride_ria; + pgoff_t page_idx; + pgoff_t max_index = 0; LASSERT(ria); RIA_DEBUG(ria); @@ -631,12 +396,13 @@ static int ll_read_ahead_pages(const struct lu_env *env, if (ras_inside_ra_window(page_idx, ria)) { /* If the page is inside the read-ahead window*/ rc = ll_read_ahead_page(env, io, queue, - page_idx, mapping); + page_idx, &max_index); if (rc == 1) { (*reserved_pages)--; count++; - } else if (rc == -ENOLCK) + } else if (rc == -ENOLCK) { break; + } } else if (stride_ria) { /* If it is not in the read-ahead window, and it is * read-ahead mode, then check whether it should skip @@ -666,25 +432,22 @@ static int ll_read_ahead_pages(const struct lu_env *env, } int ll_readahead(const struct lu_env *env, struct cl_io *io, - struct ll_readahead_state *ras, struct address_space *mapping, - struct cl_page_list *queue, int flags) + struct cl_page_list *queue, struct ll_readahead_state *ras, + bool hit) { struct vvp_io *vio = vvp_env_io(env); - struct vvp_thread_info *vti = vvp_env_info(env); - struct cl_attr *attr = ccc_env_thread_attr(env); + struct ll_thread_info *lti = ll_env_info(env); + struct cl_attr *attr = vvp_env_thread_attr(env); unsigned long start = 0, end = 0, reserved; - unsigned long ra_end, len; + unsigned long ra_end, len, mlen = 0; struct inode *inode; - struct ll_ra_read *bead; - struct ra_io_arg *ria = &vti->vti_ria; - struct ll_inode_info *lli; + struct ra_io_arg *ria = <i->lti_ria; struct cl_object *clob; int ret = 0; __u64 kms; - inode = mapping->host; - lli = ll_i2info(inode); - clob = lli->lli_clob; + clob = io->ci_obj; + inode = vvp_object_inode(clob); memset(ria, 0, sizeof(*ria)); @@ -696,22 +459,20 @@ int ll_readahead(const struct lu_env *env, struct cl_io *io, return ret; kms = attr->cat_kms; if (kms == 0) { - ll_ra_stats_inc(mapping, RA_STAT_ZERO_LEN); + ll_ra_stats_inc(inode, RA_STAT_ZERO_LEN); return 0; } spin_lock(&ras->ras_lock); - if (vio->cui_ra_window_set) - bead = &vio->cui_bead; - else - bead = NULL; /* Enlarge the RA window to encompass the full read */ - if (bead && ras->ras_window_start + ras->ras_window_len < - bead->lrr_start + bead->lrr_count) { - ras->ras_window_len = bead->lrr_start + bead->lrr_count - + if (vio->vui_ra_valid && + ras->ras_window_start + ras->ras_window_len < + vio->vui_ra_start + vio->vui_ra_count) { + ras->ras_window_len = vio->vui_ra_start + vio->vui_ra_count - ras->ras_window_start; } + /* Reserve a part of the read-ahead window that we'll be issuing */ if (ras->ras_window_len) { start = ras->ras_next_readahead; @@ -755,29 +516,48 @@ int ll_readahead(const struct lu_env *env, struct cl_io *io, spin_unlock(&ras->ras_lock); if (end == 0) { - ll_ra_stats_inc(mapping, RA_STAT_ZERO_WINDOW); + ll_ra_stats_inc(inode, RA_STAT_ZERO_WINDOW); return 0; } len = ria_page_count(ria); - if (len == 0) + if (len == 0) { + ll_ra_stats_inc(inode, RA_STAT_ZERO_WINDOW); return 0; + } + + CDEBUG(D_READA, DFID ": ria: %lu/%lu, bead: %lu/%lu, hit: %d\n", + PFID(lu_object_fid(&clob->co_lu)), + ria->ria_start, ria->ria_end, + vio->vui_ra_valid ? vio->vui_ra_start : 0, + vio->vui_ra_valid ? vio->vui_ra_count : 0, + hit); + + /* at least to extend the readahead window to cover current read */ + if (!hit && vio->vui_ra_valid && + vio->vui_ra_start + vio->vui_ra_count > ria->ria_start) { + /* to the end of current read window. */ + mlen = vio->vui_ra_start + vio->vui_ra_count - ria->ria_start; + /* trim to RPC boundary */ + start = ria->ria_start & (PTLRPC_MAX_BRW_PAGES - 1); + mlen = min(mlen, PTLRPC_MAX_BRW_PAGES - start); + } - reserved = ll_ra_count_get(ll_i2sbi(inode), ria, len); + reserved = ll_ra_count_get(ll_i2sbi(inode), ria, len, mlen); if (reserved < len) - ll_ra_stats_inc(mapping, RA_STAT_MAX_IN_FLIGHT); + ll_ra_stats_inc(inode, RA_STAT_MAX_IN_FLIGHT); - CDEBUG(D_READA, "reserved page %lu ra_cur %d ra_max %lu\n", reserved, + CDEBUG(D_READA, "reserved pages %lu/%lu/%lu, ra_cur %d, ra_max %lu\n", + reserved, len, mlen, atomic_read(&ll_i2sbi(inode)->ll_ra_info.ra_cur_pages), ll_i2sbi(inode)->ll_ra_info.ra_max_pages); - ret = ll_read_ahead_pages(env, io, queue, - ria, &reserved, mapping, &ra_end); + ret = ll_read_ahead_pages(env, io, queue, ria, &reserved, &ra_end); if (reserved != 0) ll_ra_count_put(ll_i2sbi(inode), reserved); if (ra_end == end + 1 && ra_end == (kms >> PAGE_SHIFT)) - ll_ra_stats_inc(mapping, RA_STAT_EOF); + ll_ra_stats_inc(inode, RA_STAT_EOF); /* if we didn't get to the end of the region we reserved from * the ras we need to go back and update the ras so that the @@ -789,6 +569,7 @@ int ll_readahead(const struct lu_env *env, struct cl_io *io, ra_end, end, ria->ria_end); if (ra_end != end + 1) { + ll_ra_stats_inc(inode, RA_STAT_FAILED_REACH_END); spin_lock(&ras->ras_lock); if (ra_end < ras->ras_next_readahead && index_in_window(ra_end, ras->ras_window_start, 0, @@ -836,7 +617,6 @@ void ll_readahead_init(struct inode *inode, struct ll_readahead_state *ras) spin_lock_init(&ras->ras_lock); ras_reset(inode, ras, 0); ras->ras_requests = 0; - INIT_LIST_HEAD(&ras->ras_read_beads); } /* @@ -1059,15 +839,18 @@ void ras_update(struct ll_sb_info *sbi, struct inode *inode, ras->ras_last_readpage = index; ras_set_start(inode, ras, index); - if (stride_io_mode(ras)) + if (stride_io_mode(ras)) { /* Since stride readahead is sensitive to the offset * of read-ahead, so we use original offset here, * instead of ras_window_start, which is RPC aligned */ ras->ras_next_readahead = max(index, ras->ras_next_readahead); - else - ras->ras_next_readahead = max(ras->ras_window_start, - ras->ras_next_readahead); + } else { + if (ras->ras_next_readahead < ras->ras_window_start) + ras->ras_next_readahead = ras->ras_window_start; + if (!hit) + ras->ras_next_readahead = index + 1; + } RAS_CDEBUG(ras); /* Trigger RA in the mmap case where ras_consecutive_requests @@ -1129,7 +912,7 @@ int ll_writepage(struct page *vmpage, struct writeback_control *wbc) clob = ll_i2info(inode)->lli_clob; LASSERT(clob); - io = ccc_env_thread_io(env); + io = vvp_env_thread_io(env); io->ci_obj = clob; io->ci_ignore_layout = 1; result = cl_io_init(env, io, CIT_MISC, clob); @@ -1240,23 +1023,77 @@ int ll_writepages(struct address_space *mapping, struct writeback_control *wbc) if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0)) { if (end == OBD_OBJECT_EOF) - end = i_size_read(inode); - mapping->writeback_index = (end >> PAGE_SHIFT) + 1; + mapping->writeback_index = 0; + else + mapping->writeback_index = (end >> PAGE_SHIFT) + 1; } return result; } +struct ll_cl_context *ll_cl_find(struct file *file) +{ + struct ll_file_data *fd = LUSTRE_FPRIVATE(file); + struct ll_cl_context *lcc; + struct ll_cl_context *found = NULL; + + read_lock(&fd->fd_lock); + list_for_each_entry(lcc, &fd->fd_lccs, lcc_list) { + if (lcc->lcc_cookie == current) { + found = lcc; + break; + } + } + read_unlock(&fd->fd_lock); + + return found; +} + +void ll_cl_add(struct file *file, const struct lu_env *env, struct cl_io *io) +{ + struct ll_file_data *fd = LUSTRE_FPRIVATE(file); + struct ll_cl_context *lcc = &ll_env_info(env)->lti_io_ctx; + + memset(lcc, 0, sizeof(*lcc)); + INIT_LIST_HEAD(&lcc->lcc_list); + lcc->lcc_cookie = current; + lcc->lcc_env = env; + lcc->lcc_io = io; + + write_lock(&fd->fd_lock); + list_add(&lcc->lcc_list, &fd->fd_lccs); + write_unlock(&fd->fd_lock); +} + +void ll_cl_remove(struct file *file, const struct lu_env *env) +{ + struct ll_file_data *fd = LUSTRE_FPRIVATE(file); + struct ll_cl_context *lcc = &ll_env_info(env)->lti_io_ctx; + + write_lock(&fd->fd_lock); + list_del_init(&lcc->lcc_list); + write_unlock(&fd->fd_lock); +} + int ll_readpage(struct file *file, struct page *vmpage) { + struct cl_object *clob = ll_i2info(file_inode(file))->lli_clob; struct ll_cl_context *lcc; + const struct lu_env *env; + struct cl_io *io; + struct cl_page *page; int result; - lcc = ll_cl_init(file, vmpage, 0); - if (!IS_ERR(lcc)) { - struct lu_env *env = lcc->lcc_env; - struct cl_io *io = lcc->lcc_io; - struct cl_page *page = lcc->lcc_page; + lcc = ll_cl_find(file); + if (!lcc) { + unlock_page(vmpage); + return -EIO; + } + env = lcc->lcc_env; + io = lcc->lcc_io; + LASSERT(io->ci_state == CIS_IO_GOING); + page = cl_page_find(env, clob, vmpage->index, vmpage, CPT_CACHEABLE); + if (!IS_ERR(page)) { LASSERT(page->cp_type == CPT_CACHEABLE); if (likely(!PageUptodate(vmpage))) { cl_page_assume(env, io, page); @@ -1266,10 +1103,35 @@ int ll_readpage(struct file *file, struct page *vmpage) unlock_page(vmpage); result = 0; } - ll_cl_fini(lcc); + cl_page_put(env, page); } else { unlock_page(vmpage); - result = PTR_ERR(lcc); + result = PTR_ERR(page); } return result; } + +int ll_page_sync_io(const struct lu_env *env, struct cl_io *io, + struct cl_page *page, enum cl_req_type crt) +{ + struct cl_2queue *queue; + int result; + + LASSERT(io->ci_type == CIT_READ || io->ci_type == CIT_WRITE); + + queue = &io->ci_queue; + cl_2queue_init_page(queue, page); + + result = cl_io_submit_sync(env, io, crt, queue, 0); + LASSERT(cl_page_is_owned(page, io)); + + if (crt == CRT_READ) + /* + * in CRT_WRITE case page is left locked even in case of + * error. + */ + cl_page_list_disown(env, io, &queue->c2_qin); + cl_2queue_fini(env, queue); + + return result; +} diff --git a/drivers/staging/lustre/lustre/llite/rw26.c b/drivers/staging/lustre/lustre/llite/rw26.c index 69aa15e8e3ef..d98c7acc0832 100644 --- a/drivers/staging/lustre/lustre/llite/rw26.c +++ b/drivers/staging/lustre/lustre/llite/rw26.c @@ -15,11 +15,7 @@ * * You should have received a copy of the GNU General Public License * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. + * http://www.gnu.org/licenses/gpl-2.0.html * * GPL HEADER END */ @@ -95,15 +91,12 @@ static void ll_invalidatepage(struct page *vmpage, unsigned int offset, if (obj) { page = cl_vmpage_page(vmpage, obj); if (page) { - lu_ref_add(&page->cp_reference, - "delete", vmpage); cl_page_delete(env, page); - lu_ref_del(&page->cp_reference, - "delete", vmpage); cl_page_put(env, page); } - } else + } else { LASSERT(vmpage->private == 0); + } cl_env_put(env, &refcheck); } } @@ -111,12 +104,12 @@ static void ll_invalidatepage(struct page *vmpage, unsigned int offset, static int ll_releasepage(struct page *vmpage, gfp_t gfp_mask) { - struct cl_env_nest nest; struct lu_env *env; + void *cookie; struct cl_object *obj; struct cl_page *page; struct address_space *mapping; - int result; + int result = 0; LASSERT(PageLocked(vmpage)); if (PageWriteback(vmpage) || PageDirty(vmpage)) @@ -130,53 +123,42 @@ static int ll_releasepage(struct page *vmpage, gfp_t gfp_mask) if (!obj) return 1; - /* 1 for page allocator, 1 for cl_page and 1 for page cache */ + /* 1 for caller, 1 for cl_page and 1 for page cache */ if (page_count(vmpage) > 3) return 0; - /* TODO: determine what gfp should be used by @gfp_mask. */ - env = cl_env_nested_get(&nest); - if (IS_ERR(env)) - /* If we can't allocate an env we won't call cl_page_put() - * later on which further means it's impossible to drop - * page refcount by cl_page, so ask kernel to not free - * this page. - */ - return 0; - page = cl_vmpage_page(vmpage, obj); - result = !page; - if (page) { - if (!cl_page_in_use(page)) { - result = 1; - cl_page_delete(env, page); - } - cl_page_put(env, page); - } - cl_env_nested_put(&nest, env); - return result; -} + if (!page) + return 1; -static int ll_set_page_dirty(struct page *vmpage) -{ -#if 0 - struct cl_page *page = vvp_vmpage_page_transient(vmpage); - struct vvp_object *obj = cl_inode2vvp(vmpage->mapping->host); - struct vvp_page *cpg; + cookie = cl_env_reenter(); + env = cl_env_percpu_get(); + LASSERT(!IS_ERR(env)); - /* - * XXX should page method be called here? - */ - LASSERT(&obj->co_cl == page->cp_obj); - cpg = cl2vvp_page(cl_page_at(page, &vvp_device_type)); - /* - * XXX cannot do much here, because page is possibly not locked: - * sys_munmap()->... - * ->unmap_page_range()->zap_pte_range()->set_page_dirty(). + if (!cl_page_in_use(page)) { + result = 1; + cl_page_delete(env, page); + } + + /* To use percpu env array, the call path can not be rescheduled; + * otherwise percpu array will be messed if ll_releaspage() called + * again on the same CPU. + * + * If this page holds the last refc of cl_object, the following + * call path may cause reschedule: + * cl_page_put -> cl_page_free -> cl_object_put -> + * lu_object_put -> lu_object_free -> lov_delete_raid0. + * + * However, the kernel can't get rid of this inode until all pages have + * been cleaned up. Now that we hold page lock here, it's pretty safe + * that we won't get into object delete path. */ - vvp_write_pending(obj, cpg); -#endif - return __set_page_dirty_nobuffers(vmpage); + LASSERT(cl_object_refc(obj) > 1); + cl_page_put(env, page); + + cl_env_percpu_put(env); + cl_env_reexit(cookie); + return result; } #define MAX_DIRECTIO_SIZE (2*1024*1024*1024UL) @@ -266,7 +248,7 @@ ssize_t ll_direct_rw_pages(const struct lu_env *env, struct cl_io *io, * write directly */ if (clp->cp_type == CPT_CACHEABLE) { - struct page *vmpage = cl_page_vmpage(env, clp); + struct page *vmpage = cl_page_vmpage(clp); struct page *src_page; struct page *dst_page; void *src; @@ -358,14 +340,14 @@ static ssize_t ll_direct_IO_26_seg(const struct lu_env *env, struct cl_io *io, */ #define MAX_DIO_SIZE ((KMALLOC_MAX_SIZE / sizeof(struct brw_page) * \ PAGE_SIZE) & ~(DT_MAX_BRW_SIZE - 1)) -static ssize_t ll_direct_IO_26(struct kiocb *iocb, struct iov_iter *iter, - loff_t file_offset) +static ssize_t ll_direct_IO_26(struct kiocb *iocb, struct iov_iter *iter) { struct lu_env *env; struct cl_io *io; struct file *file = iocb->ki_filp; struct inode *inode = file->f_mapping->host; - struct ccc_object *obj = cl_inode2ccc(inode); + struct vvp_object *obj = cl_inode2vvp(inode); + loff_t file_offset = iocb->ki_pos; ssize_t count = iov_iter_count(iter); ssize_t tot_bytes = 0, result = 0; struct ll_inode_info *lli = ll_i2info(inode); @@ -376,22 +358,21 @@ static ssize_t ll_direct_IO_26(struct kiocb *iocb, struct iov_iter *iter, return -EBADF; /* FIXME: io smaller than PAGE_SIZE is broken on ia64 ??? */ - if ((file_offset & ~CFS_PAGE_MASK) || (count & ~CFS_PAGE_MASK)) + if ((file_offset & ~PAGE_MASK) || (count & ~PAGE_MASK)) return -EINVAL; - CDEBUG(D_VFSTRACE, - "VFS Op:inode=%lu/%u(%p), size=%zd (max %lu), offset=%lld=%llx, pages %zd (max %lu)\n", - inode->i_ino, inode->i_generation, inode, count, MAX_DIO_SIZE, + CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p), size=%zd (max %lu), offset=%lld=%llx, pages %zd (max %lu)\n", + PFID(ll_inode2fid(inode)), inode, count, MAX_DIO_SIZE, file_offset, file_offset, count >> PAGE_SHIFT, MAX_DIO_SIZE >> PAGE_SHIFT); /* Check that all user buffers are aligned as well */ - if (iov_iter_alignment(iter) & ~CFS_PAGE_MASK) + if (iov_iter_alignment(iter) & ~PAGE_MASK) return -EINVAL; env = cl_env_get(&refcheck); LASSERT(!IS_ERR(env)); - io = ccc_env_io(env)->cui_cl.cis_io; + io = vvp_env_io(env)->vui_cl.cis_io; LASSERT(io); /* 0. Need locking between buffered and direct access. and race with @@ -401,7 +382,7 @@ static ssize_t ll_direct_IO_26(struct kiocb *iocb, struct iov_iter *iter, if (iov_iter_rw(iter) == READ) inode_lock(inode); - LASSERT(obj->cob_transient_pages == 0); + LASSERT(obj->vob_transient_pages == 0); while (iov_iter_count(iter)) { struct page **pages; size_t offs; @@ -435,8 +416,8 @@ static ssize_t ll_direct_IO_26(struct kiocb *iocb, struct iov_iter *iter, size > (PAGE_SIZE / sizeof(*pages)) * PAGE_SIZE) { size = ((((size / 2) - 1) | - ~CFS_PAGE_MASK) + 1) & - CFS_PAGE_MASK; + ~PAGE_MASK) + 1) & + PAGE_MASK; CDEBUG(D_VFSTRACE, "DIO size now %lu\n", size); continue; @@ -449,62 +430,214 @@ static ssize_t ll_direct_IO_26(struct kiocb *iocb, struct iov_iter *iter, file_offset += result; } out: - LASSERT(obj->cob_transient_pages == 0); + LASSERT(obj->vob_transient_pages == 0); if (iov_iter_rw(iter) == READ) inode_unlock(inode); if (tot_bytes > 0) { - if (iov_iter_rw(iter) == WRITE) { - struct lov_stripe_md *lsm; - - lsm = ccc_inode_lsm_get(inode); - LASSERT(lsm); - lov_stripe_lock(lsm); - obd_adjust_kms(ll_i2dtexp(inode), lsm, file_offset, 0); - lov_stripe_unlock(lsm); - ccc_inode_lsm_put(inode, lsm); - } + struct vvp_io *vio = vvp_env_io(env); + + /* no commit async for direct IO */ + vio->u.write.vui_written += tot_bytes; } cl_env_put(env, &refcheck); - return tot_bytes ? : result; + return tot_bytes ? tot_bytes : result; +} + +/** + * Prepare partially written-to page for a write. + */ +static int ll_prepare_partial_page(const struct lu_env *env, struct cl_io *io, + struct cl_page *pg) +{ + struct cl_attr *attr = vvp_env_thread_attr(env); + struct cl_object *obj = io->ci_obj; + struct vvp_page *vpg = cl_object_page_slice(obj, pg); + loff_t offset = cl_offset(obj, vvp_index(vpg)); + int result; + + cl_object_attr_lock(obj); + result = cl_object_attr_get(env, obj, attr); + cl_object_attr_unlock(obj); + if (result == 0) { + /* + * If are writing to a new page, no need to read old data. + * The extent locking will have updated the KMS, and for our + * purposes here we can treat it like i_size. + */ + if (attr->cat_kms <= offset) { + char *kaddr = kmap_atomic(vpg->vpg_page); + + memset(kaddr, 0, cl_page_size(obj)); + kunmap_atomic(kaddr); + } else if (vpg->vpg_defer_uptodate) { + vpg->vpg_ra_used = 1; + } else { + result = ll_page_sync_io(env, io, pg, CRT_READ); + } + } + return result; } static int ll_write_begin(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, unsigned flags, struct page **pagep, void **fsdata) { + struct ll_cl_context *lcc; + const struct lu_env *env; + struct cl_io *io; + struct cl_page *page; + struct cl_object *clob = ll_i2info(mapping->host)->lli_clob; pgoff_t index = pos >> PAGE_SHIFT; - struct page *page; - int rc; - unsigned from = pos & (PAGE_SIZE - 1); + struct page *vmpage = NULL; + unsigned int from = pos & (PAGE_SIZE - 1); + unsigned int to = from + len; + int result = 0; - page = grab_cache_page_write_begin(mapping, index, flags); - if (!page) - return -ENOMEM; + CDEBUG(D_VFSTRACE, "Writing %lu of %d to %d bytes\n", index, from, len); - *pagep = page; + lcc = ll_cl_find(file); + if (!lcc) { + result = -EIO; + goto out; + } + + env = lcc->lcc_env; + io = lcc->lcc_io; + + /* To avoid deadlock, try to lock page first. */ + vmpage = grab_cache_page_nowait(mapping, index); + if (unlikely(!vmpage || PageDirty(vmpage) || PageWriteback(vmpage))) { + struct vvp_io *vio = vvp_env_io(env); + struct cl_page_list *plist = &vio->u.write.vui_queue; + + /* if the page is already in dirty cache, we have to commit + * the pages right now; otherwise, it may cause deadlock + * because it holds page lock of a dirty page and request for + * more grants. It's okay for the dirty page to be the first + * one in commit page list, though. + */ + if (vmpage && plist->pl_nr > 0) { + unlock_page(vmpage); + put_page(vmpage); + vmpage = NULL; + } + + /* commit pages and then wait for page lock */ + result = vvp_io_write_commit(env, io); + if (result < 0) + goto out; + + if (!vmpage) { + vmpage = grab_cache_page_write_begin(mapping, index, + flags); + if (!vmpage) { + result = -ENOMEM; + goto out; + } + } + } - rc = ll_prepare_write(file, page, from, from + len); - if (rc) { - unlock_page(page); - put_page(page); + page = cl_page_find(env, clob, vmpage->index, vmpage, CPT_CACHEABLE); + if (IS_ERR(page)) { + result = PTR_ERR(page); + goto out; } - return rc; + + lcc->lcc_page = page; + lu_ref_add(&page->cp_reference, "cl_io", io); + + cl_page_assume(env, io, page); + if (!PageUptodate(vmpage)) { + /* + * We're completely overwriting an existing page, + * so _don't_ set it up to date until commit_write + */ + if (from == 0 && to == PAGE_SIZE) { + CL_PAGE_HEADER(D_PAGE, env, page, "full page write\n"); + POISON_PAGE(vmpage, 0x11); + } else { + /* TODO: can be optimized at OSC layer to check if it + * is a lockless IO. In that case, it's not necessary + * to read the data. + */ + result = ll_prepare_partial_page(env, io, page); + if (result == 0) + SetPageUptodate(vmpage); + } + } + if (result < 0) + cl_page_unassume(env, io, page); +out: + if (result < 0) { + if (vmpage) { + unlock_page(vmpage); + put_page(vmpage); + } + } else { + *pagep = vmpage; + *fsdata = lcc; + } + return result; } static int ll_write_end(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, unsigned copied, - struct page *page, void *fsdata) + struct page *vmpage, void *fsdata) { + struct ll_cl_context *lcc = fsdata; + const struct lu_env *env; + struct cl_io *io; + struct vvp_io *vio; + struct cl_page *page; unsigned from = pos & (PAGE_SIZE - 1); - int rc; + bool unplug = false; + int result = 0; + + put_page(vmpage); + + env = lcc->lcc_env; + page = lcc->lcc_page; + io = lcc->lcc_io; + vio = vvp_env_io(env); + + LASSERT(cl_page_is_owned(page, io)); + if (copied > 0) { + struct cl_page_list *plist = &vio->u.write.vui_queue; + + lcc->lcc_page = NULL; /* page will be queued */ + + /* Add it into write queue */ + cl_page_list_add(plist, page); + if (plist->pl_nr == 1) /* first page */ + vio->u.write.vui_from = from; + else + LASSERT(from == 0); + vio->u.write.vui_to = from + copied; + + /* We may have one full RPC, commit it soon */ + if (plist->pl_nr >= PTLRPC_MAX_BRW_PAGES) + unplug = true; + + CL_PAGE_DEBUG(D_VFSTRACE, env, page, + "queued page: %d.\n", plist->pl_nr); + } else { + cl_page_disown(env, io, page); + + lcc->lcc_page = NULL; + lu_ref_del(&page->cp_reference, "cl_io", io); + cl_page_put(env, page); + + /* page list is not contiguous now, commit it now */ + unplug = true; + } - rc = ll_commit_write(file, page, from, from + copied); - unlock_page(page); - put_page(page); + if (unplug || + file->f_flags & O_SYNC || IS_SYNC(file_inode(file))) + result = vvp_io_write_commit(env, io); - return rc ?: copied; + return result >= 0 ? copied : result; } #ifdef CONFIG_MIGRATION @@ -523,7 +656,7 @@ const struct address_space_operations ll_aops = { .direct_IO = ll_direct_IO_26, .writepage = ll_writepage, .writepages = ll_writepages, - .set_page_dirty = ll_set_page_dirty, + .set_page_dirty = __set_page_dirty_nobuffers, .write_begin = ll_write_begin, .write_end = ll_write_end, .invalidatepage = ll_invalidatepage, diff --git a/drivers/staging/lustre/lustre/llite/statahead.c b/drivers/staging/lustre/lustre/llite/statahead.c index 99ffd1589df8..c1cb6b19e724 100644 --- a/drivers/staging/lustre/lustre/llite/statahead.c +++ b/drivers/staging/lustre/lustre/llite/statahead.c @@ -15,11 +15,7 @@ * * You should have received a copy of the GNU General Public License * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. + * http://www.gnu.org/licenses/gpl-2.0.html * * GPL HEADER END */ @@ -174,7 +170,8 @@ static inline int is_omitted_entry(struct ll_statahead_info *sai, __u64 index) * Insert it into sai_entries tail when init. */ static struct ll_sa_entry * -ll_sa_entry_alloc(struct ll_statahead_info *sai, __u64 index, +ll_sa_entry_alloc(struct dentry *parent, + struct ll_statahead_info *sai, __u64 index, const char *name, int len) { struct ll_inode_info *lli; @@ -221,7 +218,8 @@ ll_sa_entry_alloc(struct ll_statahead_info *sai, __u64 index, dname = (char *)entry + sizeof(struct ll_sa_entry); memcpy(dname, name, len); dname[len] = 0; - entry->se_qstr.hash = full_name_hash(name, len); + + entry->se_qstr.hash = full_name_hash(parent, name, len); entry->se_qstr.len = len; entry->se_qstr.name = dname; @@ -650,7 +648,7 @@ static void ll_post_statahead(struct ll_statahead_info *sai) } } - it->d.lustre.it_lock_handle = entry->se_handle; + it->it_lock_handle = entry->se_handle; rc = md_revalidate_lock(ll_i2mdexp(dir), it, ll_inode2fid(dir), NULL); if (rc != 1) { rc = -EAGAIN; @@ -661,8 +659,9 @@ static void ll_post_statahead(struct ll_statahead_info *sai) if (rc) goto out; - CDEBUG(D_DLMTRACE, "setting l_data to inode %p (%lu/%u)\n", - child, child->i_ino, child->i_generation); + CDEBUG(D_DLMTRACE, "%s: setting l_data to inode "DFID"%p\n", + ll_get_fsname(child->i_sb, NULL, 0), + PFID(ll_inode2fid(child)), child); ll_set_lock_data(ll_i2sbi(dir)->ll_md_exp, child, it, NULL); entry->se_inode = child; @@ -703,7 +702,7 @@ static int ll_statahead_interpret(struct ptlrpc_request *req, * process enqueues lock on child with parent lock held, eg. * unlink. */ - handle = it->d.lustre.it_lock_handle; + handle = it->it_lock_handle; ll_intent_drop_lock(it); } @@ -782,7 +781,7 @@ static int sa_args_init(struct inode *dir, struct inode *child, struct ll_sa_entry *entry, struct md_enqueue_info **pmi, struct ldlm_enqueue_info **pei) { - struct qstr *qstr = &entry->se_qstr; + const struct qstr *qstr = &entry->se_qstr; struct ll_inode_info *lli = ll_i2info(dir); struct md_enqueue_info *minfo; struct ldlm_enqueue_info *einfo; @@ -853,7 +852,7 @@ static int do_sa_revalidate(struct inode *dir, struct ll_sa_entry *entry, { struct inode *inode = d_inode(dentry); struct lookup_intent it = { .it_op = IT_GETATTR, - .d.lustre.it_lock_handle = 0 }; + .it_lock_handle = 0 }; struct md_enqueue_info *minfo; struct ldlm_enqueue_info *einfo; int rc; @@ -868,7 +867,7 @@ static int do_sa_revalidate(struct inode *dir, struct ll_sa_entry *entry, rc = md_revalidate_lock(ll_i2mdexp(dir), &it, ll_inode2fid(inode), NULL); if (rc == 1) { - entry->se_handle = it.d.lustre.it_lock_handle; + entry->se_handle = it.it_lock_handle; ll_intent_release(&it); return 1; } @@ -901,7 +900,7 @@ static void ll_statahead_one(struct dentry *parent, const char *entry_name, int rc; int rc1; - entry = ll_sa_entry_alloc(sai, sai->sai_index, entry_name, + entry = ll_sa_entry_alloc(parent, sai, sai->sai_index, entry_name, entry_name_len); if (IS_ERR(entry)) return; @@ -1341,7 +1340,7 @@ enum { static int is_first_dirent(struct inode *dir, struct dentry *dentry) { struct ll_dir_chain chain; - struct qstr *target = &dentry->d_name; + const struct qstr *target = &dentry->d_name; struct page *page; __u64 pos = 0; int dot_de; @@ -1572,7 +1571,7 @@ int do_statahead_enter(struct inode *dir, struct dentry **dentryp, if (entry->se_stat == SA_ENTRY_SUCC && entry->se_inode) { struct inode *inode = entry->se_inode; struct lookup_intent it = { .it_op = IT_GETATTR, - .d.lustre.it_lock_handle = + .it_lock_handle = entry->se_handle }; __u64 bits; @@ -1591,13 +1590,11 @@ int do_statahead_enter(struct inode *dir, struct dentry **dentryp, *dentryp = alias; } else if (d_inode(*dentryp) != inode) { /* revalidate, but inode is recreated */ - CDEBUG(D_READA, - "stale dentry %pd inode %lu/%u, statahead inode %lu/%u\n", - *dentryp, - d_inode(*dentryp)->i_ino, - d_inode(*dentryp)->i_generation, - inode->i_ino, - inode->i_generation); + CDEBUG(D_READA, "%s: stale dentry %pd inode "DFID", statahead inode "DFID"\n", + ll_get_fsname(d_inode(*dentryp)->i_sb, NULL, 0), + *dentryp, + PFID(ll_inode2fid(d_inode(*dentryp))), + PFID(ll_inode2fid(inode))); ll_sai_unplug(sai, entry); return -ESTALE; } else { diff --git a/drivers/staging/lustre/lustre/llite/super25.c b/drivers/staging/lustre/lustre/llite/super25.c index 61856d37afc5..3dd7e0eb0b54 100644 --- a/drivers/staging/lustre/lustre/llite/super25.c +++ b/drivers/staging/lustre/lustre/llite/super25.c @@ -15,11 +15,7 @@ * * You should have received a copy of the GNU General Public License * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. + * http://www.gnu.org/licenses/gpl-2.0.html * * GPL HEADER END */ @@ -118,19 +114,6 @@ static int __init lustre_init(void) if (!ll_file_data_slab) goto out_cache; - ll_remote_perm_cachep = kmem_cache_create("ll_remote_perm_cache", - sizeof(struct ll_remote_perm), - 0, 0, NULL); - if (!ll_remote_perm_cachep) - goto out_cache; - - ll_rmtperm_hash_cachep = kmem_cache_create("ll_rmtperm_hash_cache", - REMOTE_PERM_HASHSIZE * - sizeof(struct list_head), - 0, 0, NULL); - if (!ll_rmtperm_hash_cachep) - goto out_cache; - llite_root = debugfs_create_dir("llite", debugfs_lustre_root); if (IS_ERR_OR_NULL(llite_root)) { rc = llite_root ? PTR_ERR(llite_root) : -ENOMEM; @@ -164,9 +147,18 @@ static int __init lustre_init(void) if (rc != 0) goto out_sysfs; + cl_inode_fini_env = cl_env_alloc(&cl_inode_fini_refcheck, + LCT_REMEMBER | LCT_NOREF); + if (IS_ERR(cl_inode_fini_env)) { + rc = PTR_ERR(cl_inode_fini_env); + goto out_vvp; + } + + cl_inode_fini_env->le_ctx.lc_cookie = 0x4; + rc = ll_xattr_init(); if (rc != 0) - goto out_vvp; + goto out_inode_fini_env; lustre_register_client_fill_super(ll_fill_super); lustre_register_kill_super_cb(ll_kill_super); @@ -174,6 +166,8 @@ static int __init lustre_init(void) return 0; +out_inode_fini_env: + cl_env_put(cl_inode_fini_env, &cl_inode_fini_refcheck); out_vvp: vvp_global_fini(); out_sysfs: @@ -183,8 +177,6 @@ out_debugfs: out_cache: kmem_cache_destroy(ll_inode_cachep); kmem_cache_destroy(ll_file_data_slab); - kmem_cache_destroy(ll_remote_perm_cachep); - kmem_cache_destroy(ll_rmtperm_hash_cachep); return rc; } @@ -198,13 +190,10 @@ static void __exit lustre_exit(void) kset_unregister(llite_kset); ll_xattr_fini(); + cl_env_put(cl_inode_fini_env, &cl_inode_fini_refcheck); vvp_global_fini(); kmem_cache_destroy(ll_inode_cachep); - kmem_cache_destroy(ll_rmtperm_hash_cachep); - - kmem_cache_destroy(ll_remote_perm_cachep); - kmem_cache_destroy(ll_file_data_slab); } diff --git a/drivers/staging/lustre/lustre/llite/symlink.c b/drivers/staging/lustre/lustre/llite/symlink.c index 46d03ea48352..8c8bdfe1ad71 100644 --- a/drivers/staging/lustre/lustre/llite/symlink.c +++ b/drivers/staging/lustre/lustre/llite/symlink.c @@ -15,11 +15,7 @@ * * You should have received a copy of the GNU General Public License * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. + * http://www.gnu.org/licenses/gpl-2.0.html * * GPL HEADER END */ @@ -77,7 +73,9 @@ static int ll_readlink_internal(struct inode *inode, ll_finish_md_op_data(op_data); if (rc) { if (rc != -ENOENT) - CERROR("inode %lu: rc = %d\n", inode->i_ino, rc); + CERROR("%s: inode "DFID": rc = %d\n", + ll_get_fsname(inode->i_sb, NULL, 0), + PFID(ll_inode2fid(inode)), rc); goto failed; } @@ -90,8 +88,10 @@ static int ll_readlink_internal(struct inode *inode, LASSERT(symlen != 0); if (body->eadatasize != symlen) { - CERROR("inode %lu: symlink length %d not expected %d\n", - inode->i_ino, body->eadatasize - 1, symlen - 1); + CERROR("%s: inode "DFID": symlink length %d not expected %d\n", + ll_get_fsname(inode->i_sb, NULL, 0), + PFID(ll_inode2fid(inode)), body->eadatasize - 1, + symlen - 1); rc = -EPROTO; goto failed; } diff --git a/drivers/staging/lustre/lustre/llite/vvp_dev.c b/drivers/staging/lustre/lustre/llite/vvp_dev.c index 282b70b776da..e623216e962d 100644 --- a/drivers/staging/lustre/lustre/llite/vvp_dev.c +++ b/drivers/staging/lustre/lustre/llite/vvp_dev.c @@ -15,11 +15,7 @@ * * You should have received a copy of the GNU General Public License * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. + * http://www.gnu.org/licenses/gpl-2.0.html * * GPL HEADER END */ @@ -36,6 +32,7 @@ * cl_device and cl_device_type implementation for VVP layer. * * Author: Nikita Danilov <nikita.danilov@sun.com> + * Author: Jinshan Xiong <jinshan.xiong@intel.com> */ #define DEBUG_SUBSYSTEM S_LLITE @@ -56,13 +53,33 @@ * "llite_" (var. "ll_") prefix. */ -static struct kmem_cache *vvp_thread_kmem; +static struct kmem_cache *ll_thread_kmem; +struct kmem_cache *vvp_lock_kmem; +struct kmem_cache *vvp_object_kmem; +struct kmem_cache *vvp_req_kmem; static struct kmem_cache *vvp_session_kmem; +static struct kmem_cache *vvp_thread_kmem; + static struct lu_kmem_descr vvp_caches[] = { { - .ckd_cache = &vvp_thread_kmem, - .ckd_name = "vvp_thread_kmem", - .ckd_size = sizeof(struct vvp_thread_info), + .ckd_cache = &ll_thread_kmem, + .ckd_name = "ll_thread_kmem", + .ckd_size = sizeof(struct ll_thread_info), + }, + { + .ckd_cache = &vvp_lock_kmem, + .ckd_name = "vvp_lock_kmem", + .ckd_size = sizeof(struct vvp_lock), + }, + { + .ckd_cache = &vvp_object_kmem, + .ckd_name = "vvp_object_kmem", + .ckd_size = sizeof(struct vvp_object), + }, + { + .ckd_cache = &vvp_req_kmem, + .ckd_name = "vvp_req_kmem", + .ckd_size = sizeof(struct vvp_req), }, { .ckd_cache = &vvp_session_kmem, @@ -70,29 +87,40 @@ static struct lu_kmem_descr vvp_caches[] = { .ckd_size = sizeof(struct vvp_session) }, { + .ckd_cache = &vvp_thread_kmem, + .ckd_name = "vvp_thread_kmem", + .ckd_size = sizeof(struct vvp_thread_info), + }, + { .ckd_cache = NULL } }; -static void *vvp_key_init(const struct lu_context *ctx, - struct lu_context_key *key) +static void *ll_thread_key_init(const struct lu_context *ctx, + struct lu_context_key *key) { struct vvp_thread_info *info; - info = kmem_cache_zalloc(vvp_thread_kmem, GFP_NOFS); + info = kmem_cache_zalloc(ll_thread_kmem, GFP_NOFS); if (!info) info = ERR_PTR(-ENOMEM); return info; } -static void vvp_key_fini(const struct lu_context *ctx, - struct lu_context_key *key, void *data) +static void ll_thread_key_fini(const struct lu_context *ctx, + struct lu_context_key *key, void *data) { struct vvp_thread_info *info = data; - kmem_cache_free(vvp_thread_kmem, info); + kmem_cache_free(ll_thread_kmem, info); } +struct lu_context_key ll_thread_key = { + .lct_tags = LCT_CL_THREAD, + .lct_init = ll_thread_key_init, + .lct_fini = ll_thread_key_fini +}; + static void *vvp_session_key_init(const struct lu_context *ctx, struct lu_context_key *key) { @@ -112,34 +140,127 @@ static void vvp_session_key_fini(const struct lu_context *ctx, kmem_cache_free(vvp_session_kmem, session); } -struct lu_context_key vvp_key = { - .lct_tags = LCT_CL_THREAD, - .lct_init = vvp_key_init, - .lct_fini = vvp_key_fini -}; - struct lu_context_key vvp_session_key = { .lct_tags = LCT_SESSION, .lct_init = vvp_session_key_init, .lct_fini = vvp_session_key_fini }; +static void *vvp_thread_key_init(const struct lu_context *ctx, + struct lu_context_key *key) +{ + struct vvp_thread_info *vti; + + vti = kmem_cache_zalloc(vvp_thread_kmem, GFP_NOFS); + if (!vti) + vti = ERR_PTR(-ENOMEM); + return vti; +} + +static void vvp_thread_key_fini(const struct lu_context *ctx, + struct lu_context_key *key, void *data) +{ + struct vvp_thread_info *vti = data; + + kmem_cache_free(vvp_thread_kmem, vti); +} + +struct lu_context_key vvp_thread_key = { + .lct_tags = LCT_CL_THREAD, + .lct_init = vvp_thread_key_init, + .lct_fini = vvp_thread_key_fini +}; + /* type constructor/destructor: vvp_type_{init,fini,start,stop}(). */ -LU_TYPE_INIT_FINI(vvp, &ccc_key, &ccc_session_key, &vvp_key, &vvp_session_key); +LU_TYPE_INIT_FINI(vvp, &vvp_thread_key, &ll_thread_key, &vvp_session_key); static const struct lu_device_operations vvp_lu_ops = { .ldo_object_alloc = vvp_object_alloc }; static const struct cl_device_operations vvp_cl_ops = { - .cdo_req_init = ccc_req_init + .cdo_req_init = vvp_req_init }; +static struct lu_device *vvp_device_free(const struct lu_env *env, + struct lu_device *d) +{ + struct vvp_device *vdv = lu2vvp_dev(d); + struct cl_site *site = lu2cl_site(d->ld_site); + struct lu_device *next = cl2lu_dev(vdv->vdv_next); + + if (d->ld_site) { + cl_site_fini(site); + kfree(site); + } + cl_device_fini(lu2cl_dev(d)); + kfree(vdv); + return next; +} + static struct lu_device *vvp_device_alloc(const struct lu_env *env, struct lu_device_type *t, struct lustre_cfg *cfg) { - return ccc_device_alloc(env, t, cfg, &vvp_lu_ops, &vvp_cl_ops); + struct vvp_device *vdv; + struct lu_device *lud; + struct cl_site *site; + int rc; + + vdv = kzalloc(sizeof(*vdv), GFP_NOFS); + if (!vdv) + return ERR_PTR(-ENOMEM); + + lud = &vdv->vdv_cl.cd_lu_dev; + cl_device_init(&vdv->vdv_cl, t); + vvp2lu_dev(vdv)->ld_ops = &vvp_lu_ops; + vdv->vdv_cl.cd_ops = &vvp_cl_ops; + + site = kzalloc(sizeof(*site), GFP_NOFS); + if (site) { + rc = cl_site_init(site, &vdv->vdv_cl); + if (rc == 0) { + rc = lu_site_init_finish(&site->cs_lu); + } else { + LASSERT(!lud->ld_site); + CERROR("Cannot init lu_site, rc %d.\n", rc); + kfree(site); + } + } else { + rc = -ENOMEM; + } + if (rc != 0) { + vvp_device_free(env, lud); + lud = ERR_PTR(rc); + } + return lud; +} + +static int vvp_device_init(const struct lu_env *env, struct lu_device *d, + const char *name, struct lu_device *next) +{ + struct vvp_device *vdv; + int rc; + + vdv = lu2vvp_dev(d); + vdv->vdv_next = lu2cl_dev(next); + + LASSERT(d->ld_site && next->ld_type); + next->ld_site = d->ld_site; + rc = next->ld_type->ldt_ops->ldto_device_init(env, next, + next->ld_type->ldt_name, + NULL); + if (rc == 0) { + lu_device_get(next); + lu_ref_add(&next->ld_reference, "lu-stack", &lu_site_init); + } + return rc; +} + +static struct lu_device *vvp_device_fini(const struct lu_env *env, + struct lu_device *d) +{ + return cl2lu_dev(lu2vvp_dev(d)->vdv_next); } static const struct lu_device_type_operations vvp_device_type_ops = { @@ -150,9 +271,9 @@ static const struct lu_device_type_operations vvp_device_type_ops = { .ldto_stop = vvp_type_stop, .ldto_device_alloc = vvp_device_alloc, - .ldto_device_free = ccc_device_free, - .ldto_device_init = ccc_device_init, - .ldto_device_fini = ccc_device_fini + .ldto_device_free = vvp_device_free, + .ldto_device_init = vvp_device_init, + .ldto_device_fini = vvp_device_fini, }; struct lu_device_type vvp_device_type = { @@ -168,20 +289,27 @@ struct lu_device_type vvp_device_type = { */ int vvp_global_init(void) { - int result; + int rc; - result = lu_kmem_init(vvp_caches); - if (result == 0) { - result = ccc_global_init(&vvp_device_type); - if (result != 0) - lu_kmem_fini(vvp_caches); - } - return result; + rc = lu_kmem_init(vvp_caches); + if (rc != 0) + return rc; + + rc = lu_device_type_init(&vvp_device_type); + if (rc != 0) + goto out_kmem; + + return 0; + +out_kmem: + lu_kmem_fini(vvp_caches); + + return rc; } void vvp_global_fini(void) { - ccc_global_fini(&vvp_device_type); + lu_device_type_fini(&vvp_device_type); lu_kmem_fini(vvp_caches); } @@ -205,13 +333,14 @@ int cl_sb_init(struct super_block *sb) cl = cl_type_setup(env, NULL, &vvp_device_type, sbi->ll_dt_exp->exp_obd->obd_lu_dev); if (!IS_ERR(cl)) { - cl2ccc_dev(cl)->cdv_sb = sb; + cl2vvp_dev(cl)->vdv_sb = sb; sbi->ll_cl = cl; sbi->ll_site = cl2lu_dev(cl)->ld_site; } cl_env_put(env, &refcheck); - } else + } else { rc = PTR_ERR(env); + } return rc; } @@ -356,23 +485,18 @@ static loff_t vvp_pgcache_find(const struct lu_env *env, return ~0ULL; clob = vvp_pgcache_obj(env, dev, &id); if (clob) { - struct cl_object_header *hdr; - int nr; - struct cl_page *pg; - - /* got an object. Find next page. */ - hdr = cl_object_header(clob); + struct inode *inode = vvp_object_inode(clob); + struct page *vmpage; + int nr; - spin_lock(&hdr->coh_page_guard); - nr = radix_tree_gang_lookup(&hdr->coh_tree, - (void **)&pg, - id.vpi_index, 1); + nr = find_get_pages_contig(inode->i_mapping, + id.vpi_index, 1, &vmpage); if (nr > 0) { - id.vpi_index = pg->cp_index; + id.vpi_index = vmpage->index; /* Cant support over 16T file */ - nr = !(pg->cp_index > 0xffffffff); + nr = !(vmpage->index > 0xffffffff); + put_page(vmpage); } - spin_unlock(&hdr->coh_page_guard); lu_object_ref_del(&clob->co_lu, "dump", current); cl_object_put(env, clob); @@ -398,21 +522,20 @@ static loff_t vvp_pgcache_find(const struct lu_env *env, static void vvp_pgcache_page_show(const struct lu_env *env, struct seq_file *seq, struct cl_page *page) { - struct ccc_page *cpg; + struct vvp_page *vpg; struct page *vmpage; int has_flags; - cpg = cl2ccc_page(cl_page_at(page, &vvp_device_type)); - vmpage = cpg->cpg_page; - seq_printf(seq, " %5i | %p %p %s %s %s %s | %p %lu/%u(%p) %lu %u [", + vpg = cl2vvp_page(cl_page_at(page, &vvp_device_type)); + vmpage = vpg->vpg_page; + seq_printf(seq, " %5i | %p %p %s %s %s %s | %p "DFID"(%p) %lu %u [", 0 /* gen */, - cpg, page, + vpg, page, "none", - cpg->cpg_write_queued ? "wq" : "- ", - cpg->cpg_defer_uptodate ? "du" : "- ", + vpg->vpg_write_queued ? "wq" : "- ", + vpg->vpg_defer_uptodate ? "du" : "- ", PageWriteback(vmpage) ? "wb" : "-", - vmpage, vmpage->mapping->host->i_ino, - vmpage->mapping->host->i_generation, + vmpage, PFID(ll_inode2fid(vmpage->mapping->host)), vmpage->mapping->host, vmpage->index, page_count(vmpage)); has_flags = 0; @@ -431,40 +554,49 @@ static int vvp_pgcache_show(struct seq_file *f, void *v) struct ll_sb_info *sbi; struct cl_object *clob; struct lu_env *env; - struct cl_page *page; - struct cl_object_header *hdr; struct vvp_pgcache_id id; int refcheck; int result; env = cl_env_get(&refcheck); if (!IS_ERR(env)) { - pos = *(loff_t *) v; + pos = *(loff_t *)v; vvp_pgcache_id_unpack(pos, &id); sbi = f->private; clob = vvp_pgcache_obj(env, &sbi->ll_cl->cd_lu_dev, &id); if (clob) { - hdr = cl_object_header(clob); - - spin_lock(&hdr->coh_page_guard); - page = cl_page_lookup(hdr, id.vpi_index); - spin_unlock(&hdr->coh_page_guard); + struct inode *inode = vvp_object_inode(clob); + struct cl_page *page = NULL; + struct page *vmpage; + + result = find_get_pages_contig(inode->i_mapping, + id.vpi_index, 1, + &vmpage); + if (result > 0) { + lock_page(vmpage); + page = cl_vmpage_page(vmpage, clob); + unlock_page(vmpage); + put_page(vmpage); + } - seq_printf(f, "%8x@"DFID": ", - id.vpi_index, PFID(&hdr->coh_lu.loh_fid)); + seq_printf(f, "%8x@" DFID ": ", id.vpi_index, + PFID(lu_object_fid(&clob->co_lu))); if (page) { vvp_pgcache_page_show(env, f, page); cl_page_put(env, page); - } else + } else { seq_puts(f, "missing\n"); + } lu_object_ref_del(&clob->co_lu, "dump", current); cl_object_put(env, clob); - } else + } else { seq_printf(f, "%llx missing\n", pos); + } cl_env_put(env, &refcheck); result = 0; - } else + } else { result = PTR_ERR(env); + } return result; } diff --git a/drivers/staging/lustre/lustre/llite/vvp_internal.h b/drivers/staging/lustre/lustre/llite/vvp_internal.h index bb393378c9bb..79fc428461ed 100644 --- a/drivers/staging/lustre/lustre/llite/vvp_internal.h +++ b/drivers/staging/lustre/lustre/llite/vvp_internal.h @@ -15,11 +15,7 @@ * * You should have received a copy of the GNU General Public License * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. + * http://www.gnu.org/licenses/gpl-2.0.html * * GPL HEADER END */ @@ -41,21 +37,337 @@ #ifndef VVP_INTERNAL_H #define VVP_INTERNAL_H +#include "../include/lustre/lustre_idl.h" #include "../include/cl_object.h" -#include "llite_internal.h" -int vvp_io_init(const struct lu_env *env, - struct cl_object *obj, struct cl_io *io); -int vvp_lock_init(const struct lu_env *env, - struct cl_object *obj, struct cl_lock *lock, - const struct cl_io *io); +enum obd_notify_event; +struct inode; +struct lov_stripe_md; +struct lustre_md; +struct obd_capa; +struct obd_device; +struct obd_export; +struct page; + +/* specific architecture can implement only part of this list */ +enum vvp_io_subtype { + /** normal IO */ + IO_NORMAL, + /** io started from splice_{read|write} */ + IO_SPLICE +}; + +/** + * IO state private to IO state private to VVP layer. + */ +struct vvp_io { + /** super class */ + struct cl_io_slice vui_cl; + struct cl_io_lock_link vui_link; + /** + * I/O vector information to or from which read/write is going. + */ + struct iov_iter *vui_iter; + /** + * Total size for the left IO. + */ + size_t vui_tot_count; + + union { + struct vvp_fault_io { + /** + * Inode modification time that is checked across DLM + * lock request. + */ + time64_t ft_mtime; + struct vm_area_struct *ft_vma; + /** + * locked page returned from vvp_io + */ + struct page *ft_vmpage; + /** + * kernel fault info + */ + struct vm_fault *ft_vmf; + /** + * fault API used bitflags for return code. + */ + unsigned int ft_flags; + /** + * check that flags are from filemap_fault + */ + bool ft_flags_valid; + } fault; + struct { + struct pipe_inode_info *vui_pipe; + unsigned int vui_flags; + } splice; + struct { + struct cl_page_list vui_queue; + unsigned long vui_written; + int vui_from; + int vui_to; + } write; + } u; + + enum vvp_io_subtype vui_io_subtype; + + /** + * Layout version when this IO is initialized + */ + __u32 vui_layout_gen; + /** + * File descriptor against which IO is done. + */ + struct ll_file_data *vui_fd; + struct kiocb *vui_iocb; + + /* Readahead state. */ + pgoff_t vui_ra_start; + pgoff_t vui_ra_count; + /* Set when vui_ra_{start,count} have been initialized. */ + bool vui_ra_valid; +}; + +extern struct lu_device_type vvp_device_type; + +extern struct lu_context_key vvp_session_key; +extern struct lu_context_key vvp_thread_key; + +extern struct kmem_cache *vvp_lock_kmem; +extern struct kmem_cache *vvp_object_kmem; +extern struct kmem_cache *vvp_req_kmem; + +struct vvp_thread_info { + struct cl_lock vti_lock; + struct cl_lock_descr vti_descr; + struct cl_io vti_io; + struct cl_attr vti_attr; +}; + +static inline struct vvp_thread_info *vvp_env_info(const struct lu_env *env) +{ + struct vvp_thread_info *vti; + + vti = lu_context_key_get(&env->le_ctx, &vvp_thread_key); + LASSERT(vti); + + return vti; +} + +static inline struct cl_lock *vvp_env_lock(const struct lu_env *env) +{ + struct cl_lock *lock = &vvp_env_info(env)->vti_lock; + + memset(lock, 0, sizeof(*lock)); + return lock; +} + +static inline struct cl_attr *vvp_env_thread_attr(const struct lu_env *env) +{ + struct cl_attr *attr = &vvp_env_info(env)->vti_attr; + + memset(attr, 0, sizeof(*attr)); + + return attr; +} + +static inline struct cl_io *vvp_env_thread_io(const struct lu_env *env) +{ + struct cl_io *io = &vvp_env_info(env)->vti_io; + + memset(io, 0, sizeof(*io)); + + return io; +} + +struct vvp_session { + struct vvp_io cs_ios; +}; + +static inline struct vvp_session *vvp_env_session(const struct lu_env *env) +{ + struct vvp_session *ses; + + ses = lu_context_key_get(env->le_ses, &vvp_session_key); + LASSERT(ses); + + return ses; +} + +static inline struct vvp_io *vvp_env_io(const struct lu_env *env) +{ + return &vvp_env_session(env)->cs_ios; +} + +/** + * ccc-private object state. + */ +struct vvp_object { + struct cl_object_header vob_header; + struct cl_object vob_cl; + struct inode *vob_inode; + + /** + * A list of dirty pages pending IO in the cache. Used by + * SOM. Protected by ll_inode_info::lli_lock. + * + * \see vvp_page::vpg_pending_linkage + */ + struct list_head vob_pending_list; + + /** + * Access this counter is protected by inode->i_sem. Now that + * the lifetime of transient pages must be covered by inode sem, + * we don't need to hold any lock.. + */ + int vob_transient_pages; + /** + * Number of outstanding mmaps on this file. + * + * \see ll_vm_open(), ll_vm_close(). + */ + atomic_t vob_mmap_cnt; + + /** + * various flags + * vob_discard_page_warned + * if pages belonging to this object are discarded when a client + * is evicted, some debug info will be printed, this flag will be set + * during processing the first discarded page, then avoid flooding + * debug message for lots of discarded pages. + * + * \see ll_dirty_page_discard_warn. + */ + unsigned int vob_discard_page_warned:1; +}; + +/** + * VVP-private page state. + */ +struct vvp_page { + struct cl_page_slice vpg_cl; + int vpg_defer_uptodate; + int vpg_ra_used; + int vpg_write_queued; + /** + * Non-empty iff this page is already counted in + * vvp_object::vob_pending_list. This list is only used as a flag, + * that is, never iterated through, only checked for list_empty(), but + * having a list is useful for debugging. + */ + struct list_head vpg_pending_linkage; + /** VM page */ + struct page *vpg_page; +}; + +static inline struct vvp_page *cl2vvp_page(const struct cl_page_slice *slice) +{ + return container_of(slice, struct vvp_page, vpg_cl); +} + +static inline pgoff_t vvp_index(struct vvp_page *vvp) +{ + return vvp->vpg_cl.cpl_index; +} + +struct vvp_device { + struct cl_device vdv_cl; + struct super_block *vdv_sb; + struct cl_device *vdv_next; +}; + +struct vvp_lock { + struct cl_lock_slice vlk_cl; +}; + +struct vvp_req { + struct cl_req_slice vrq_cl; +}; + +void *ccc_key_init(const struct lu_context *ctx, + struct lu_context_key *key); +void ccc_key_fini(const struct lu_context *ctx, + struct lu_context_key *key, void *data); + +void ccc_umount(const struct lu_env *env, struct cl_device *dev); + +static inline struct lu_device *vvp2lu_dev(struct vvp_device *vdv) +{ + return &vdv->vdv_cl.cd_lu_dev; +} + +static inline struct vvp_device *lu2vvp_dev(const struct lu_device *d) +{ + return container_of0(d, struct vvp_device, vdv_cl.cd_lu_dev); +} + +static inline struct vvp_device *cl2vvp_dev(const struct cl_device *d) +{ + return container_of0(d, struct vvp_device, vdv_cl); +} + +static inline struct vvp_object *cl2vvp(const struct cl_object *obj) +{ + return container_of0(obj, struct vvp_object, vob_cl); +} + +static inline struct vvp_object *lu2vvp(const struct lu_object *obj) +{ + return container_of0(obj, struct vvp_object, vob_cl.co_lu); +} + +static inline struct inode *vvp_object_inode(const struct cl_object *obj) +{ + return cl2vvp(obj)->vob_inode; +} + +int vvp_object_invariant(const struct cl_object *obj); +struct vvp_object *cl_inode2vvp(struct inode *inode); + +static inline struct page *cl2vm_page(const struct cl_page_slice *slice) +{ + return cl2vvp_page(slice)->vpg_page; +} + +static inline struct vvp_lock *cl2vvp_lock(const struct cl_lock_slice *slice) +{ + return container_of(slice, struct vvp_lock, vlk_cl); +} + +# define CLOBINVRNT(env, clob, expr) \ + ((void)sizeof(env), (void)sizeof(clob), (void)sizeof(!!(expr))) + +/** + * New interfaces to get and put lov_stripe_md from lov layer. This violates + * layering because lov_stripe_md is supposed to be a private data in lov. + * + * NB: If you find you have to use these interfaces for your new code, please + * think about it again. These interfaces may be removed in the future for + * better layering. + */ +struct lov_stripe_md *lov_lsm_get(struct cl_object *clobj); +void lov_lsm_put(struct cl_object *clobj, struct lov_stripe_md *lsm); +int lov_read_and_clear_async_rc(struct cl_object *clob); + +struct lov_stripe_md *ccc_inode_lsm_get(struct inode *inode); +void ccc_inode_lsm_put(struct inode *inode, struct lov_stripe_md *lsm); + +int vvp_io_init(const struct lu_env *env, struct cl_object *obj, + struct cl_io *io); +int vvp_io_write_commit(const struct lu_env *env, struct cl_io *io); +int vvp_lock_init(const struct lu_env *env, struct cl_object *obj, + struct cl_lock *lock, const struct cl_io *io); int vvp_page_init(const struct lu_env *env, struct cl_object *obj, - struct cl_page *page, struct page *vmpage); + struct cl_page *page, pgoff_t index); +int vvp_req_init(const struct lu_env *env, struct cl_device *dev, + struct cl_req *req); struct lu_object *vvp_object_alloc(const struct lu_env *env, const struct lu_object_header *hdr, struct lu_device *dev); -struct ccc_object *cl_inode2ccc(struct inode *inode); +int vvp_global_init(void); +void vvp_global_fini(void); extern const struct file_operations vvp_dump_pgcache_file_ops; diff --git a/drivers/staging/lustre/lustre/llite/vvp_io.c b/drivers/staging/lustre/lustre/llite/vvp_io.c index 85a835976174..94916dcc6caa 100644 --- a/drivers/staging/lustre/lustre/llite/vvp_io.c +++ b/drivers/staging/lustre/lustre/llite/vvp_io.c @@ -15,11 +15,7 @@ * * You should have received a copy of the GNU General Public License * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. + * http://www.gnu.org/licenses/gpl-2.0.html * * GPL HEADER END */ @@ -44,21 +40,30 @@ #include "../include/obd.h" #include "../include/lustre_lite.h" +#include "llite_internal.h" #include "vvp_internal.h" static struct vvp_io *cl2vvp_io(const struct lu_env *env, - const struct cl_io_slice *slice); + const struct cl_io_slice *slice) +{ + struct vvp_io *vio; + + vio = container_of(slice, struct vvp_io, vui_cl); + LASSERT(vio == vvp_env_io(env)); + + return vio; +} /** * True, if \a io is a normal io, False for splice_{read,write} */ -int cl_is_normalio(const struct lu_env *env, const struct cl_io *io) +static int cl_is_normalio(const struct lu_env *env, const struct cl_io *io) { struct vvp_io *vio = vvp_env_io(env); LASSERT(io->ci_type == CIT_READ || io->ci_type == CIT_WRITE); - return vio->cui_io_subtype == IO_NORMAL; + return vio->vui_io_subtype == IO_NORMAL; } /** @@ -71,7 +76,7 @@ static bool can_populate_pages(const struct lu_env *env, struct cl_io *io, struct inode *inode) { struct ll_inode_info *lli = ll_i2info(inode); - struct ccc_io *cio = ccc_env_io(env); + struct vvp_io *vio = vvp_env_io(env); bool rc = true; switch (io->ci_type) { @@ -80,7 +85,7 @@ static bool can_populate_pages(const struct lu_env *env, struct cl_io *io, /* don't need lock here to check lli_layout_gen as we have held * extent lock and GROUP lock has to hold to swap layout */ - if (ll_layout_version_get(lli) != cio->cui_layout_gen) { + if (ll_layout_version_get(lli) != vio->vui_layout_gen) { io->ci_need_restart = 1; /* this will return application a short read/write */ io->ci_continue = 0; @@ -95,20 +100,187 @@ static bool can_populate_pages(const struct lu_env *env, struct cl_io *io, return rc; } +static void vvp_object_size_lock(struct cl_object *obj) +{ + struct inode *inode = vvp_object_inode(obj); + + ll_inode_size_lock(inode); + cl_object_attr_lock(obj); +} + +static void vvp_object_size_unlock(struct cl_object *obj) +{ + struct inode *inode = vvp_object_inode(obj); + + cl_object_attr_unlock(obj); + ll_inode_size_unlock(inode); +} + +/** + * Helper function that if necessary adjusts file size (inode->i_size), when + * position at the offset \a pos is accessed. File size can be arbitrary stale + * on a Lustre client, but client at least knows KMS. If accessed area is + * inside [0, KMS], set file size to KMS, otherwise glimpse file size. + * + * Locking: cl_isize_lock is used to serialize changes to inode size and to + * protect consistency between inode size and cl_object + * attributes. cl_object_size_lock() protects consistency between cl_attr's of + * top-object and sub-objects. + */ +static int vvp_prep_size(const struct lu_env *env, struct cl_object *obj, + struct cl_io *io, loff_t start, size_t count, + int *exceed) +{ + struct cl_attr *attr = vvp_env_thread_attr(env); + struct inode *inode = vvp_object_inode(obj); + loff_t pos = start + count - 1; + loff_t kms; + int result; + + /* + * Consistency guarantees: following possibilities exist for the + * relation between region being accessed and real file size at this + * moment: + * + * (A): the region is completely inside of the file; + * + * (B-x): x bytes of region are inside of the file, the rest is + * outside; + * + * (C): the region is completely outside of the file. + * + * This classification is stable under DLM lock already acquired by + * the caller, because to change the class, other client has to take + * DLM lock conflicting with our lock. Also, any updates to ->i_size + * by other threads on this client are serialized by + * ll_inode_size_lock(). This guarantees that short reads are handled + * correctly in the face of concurrent writes and truncates. + */ + vvp_object_size_lock(obj); + result = cl_object_attr_get(env, obj, attr); + if (result == 0) { + kms = attr->cat_kms; + if (pos > kms) { + /* + * A glimpse is necessary to determine whether we + * return a short read (B) or some zeroes at the end + * of the buffer (C) + */ + vvp_object_size_unlock(obj); + result = cl_glimpse_lock(env, io, inode, obj, 0); + if (result == 0 && exceed) { + /* If objective page index exceed end-of-file + * page index, return directly. Do not expect + * kernel will check such case correctly. + * linux-2.6.18-128.1.1 miss to do that. + * --bug 17336 + */ + loff_t size = i_size_read(inode); + loff_t cur_index = start >> PAGE_SHIFT; + loff_t size_index = (size - 1) >> PAGE_SHIFT; + + if ((size == 0 && cur_index != 0) || + size_index < cur_index) + *exceed = 1; + } + return result; + } + /* + * region is within kms and, hence, within real file + * size (A). We need to increase i_size to cover the + * read region so that generic_file_read() will do its + * job, but that doesn't mean the kms size is + * _correct_, it is only the _minimum_ size. If + * someone does a stat they will get the correct size + * which will always be >= the kms value here. + * b=11081 + */ + if (i_size_read(inode) < kms) { + i_size_write(inode, kms); + CDEBUG(D_VFSTRACE, DFID " updating i_size %llu\n", + PFID(lu_object_fid(&obj->co_lu)), + (__u64)i_size_read(inode)); + } + } + + vvp_object_size_unlock(obj); + + return result; +} + /***************************************************************************** * * io operations. * */ +static int vvp_io_one_lock_index(const struct lu_env *env, struct cl_io *io, + __u32 enqflags, enum cl_lock_mode mode, + pgoff_t start, pgoff_t end) +{ + struct vvp_io *vio = vvp_env_io(env); + struct cl_lock_descr *descr = &vio->vui_link.cill_descr; + struct cl_object *obj = io->ci_obj; + + CLOBINVRNT(env, obj, vvp_object_invariant(obj)); + + CDEBUG(D_VFSTRACE, "lock: %d [%lu, %lu]\n", mode, start, end); + + memset(&vio->vui_link, 0, sizeof(vio->vui_link)); + + if (vio->vui_fd && (vio->vui_fd->fd_flags & LL_FILE_GROUP_LOCKED)) { + descr->cld_mode = CLM_GROUP; + descr->cld_gid = vio->vui_fd->fd_grouplock.lg_gid; + } else { + descr->cld_mode = mode; + } + descr->cld_obj = obj; + descr->cld_start = start; + descr->cld_end = end; + descr->cld_enq_flags = enqflags; + + cl_io_lock_add(env, io, &vio->vui_link); + return 0; +} + +static int vvp_io_one_lock(const struct lu_env *env, struct cl_io *io, + __u32 enqflags, enum cl_lock_mode mode, + loff_t start, loff_t end) +{ + struct cl_object *obj = io->ci_obj; + + return vvp_io_one_lock_index(env, io, enqflags, mode, + cl_index(obj, start), cl_index(obj, end)); +} + +static int vvp_io_write_iter_init(const struct lu_env *env, + const struct cl_io_slice *ios) +{ + struct vvp_io *vio = cl2vvp_io(env, ios); + + cl_page_list_init(&vio->u.write.vui_queue); + vio->u.write.vui_written = 0; + vio->u.write.vui_from = 0; + vio->u.write.vui_to = PAGE_SIZE; + + return 0; +} + +static void vvp_io_write_iter_fini(const struct lu_env *env, + const struct cl_io_slice *ios) +{ + struct vvp_io *vio = cl2vvp_io(env, ios); + + LASSERT(vio->u.write.vui_queue.pl_nr == 0); +} + static int vvp_io_fault_iter_init(const struct lu_env *env, const struct cl_io_slice *ios) { struct vvp_io *vio = cl2vvp_io(env, ios); - struct inode *inode = ccc_object_inode(ios->cis_obj); + struct inode *inode = vvp_object_inode(ios->cis_obj); - LASSERT(inode == - file_inode(cl2ccc_io(env, ios)->cui_fd->fd_file)); + LASSERT(inode == file_inode(vio->vui_fd->fd_file)); vio->u.fault.ft_mtime = inode->i_mtime.tv_sec; return 0; } @@ -117,15 +289,16 @@ static void vvp_io_fini(const struct lu_env *env, const struct cl_io_slice *ios) { struct cl_io *io = ios->cis_io; struct cl_object *obj = io->ci_obj; - struct ccc_io *cio = cl2ccc_io(env, ios); + struct vvp_io *vio = cl2vvp_io(env, ios); + struct inode *inode = vvp_object_inode(obj); - CLOBINVRNT(env, obj, ccc_object_invariant(obj)); + CLOBINVRNT(env, obj, vvp_object_invariant(obj)); CDEBUG(D_VFSTRACE, DFID " ignore/verify layout %d/%d, layout version %d restore needed %d\n", PFID(lu_object_fid(&obj->co_lu)), io->ci_ignore_layout, io->ci_verify_layout, - cio->cui_layout_gen, io->ci_restore_needed); + vio->vui_layout_gen, io->ci_restore_needed); if (io->ci_restore_needed == 1) { int rc; @@ -133,7 +306,7 @@ static void vvp_io_fini(const struct lu_env *env, const struct cl_io_slice *ios) /* file was detected release, we need to restore it * before finishing the io */ - rc = ll_layout_restore(ccc_object_inode(obj)); + rc = ll_layout_restore(inode, 0, OBD_OBJECT_EOF); /* if restore registration failed, no restart, * we will return -ENODATA */ @@ -159,16 +332,16 @@ static void vvp_io_fini(const struct lu_env *env, const struct cl_io_slice *ios) __u32 gen = 0; /* check layout version */ - ll_layout_refresh(ccc_object_inode(obj), &gen); - io->ci_need_restart = cio->cui_layout_gen != gen; + ll_layout_refresh(inode, &gen); + io->ci_need_restart = vio->vui_layout_gen != gen; if (io->ci_need_restart) { CDEBUG(D_VFSTRACE, DFID" layout changed from %d to %d.\n", PFID(lu_object_fid(&obj->co_lu)), - cio->cui_layout_gen, gen); + vio->vui_layout_gen, gen); /* today successful restore is the only possible case */ /* restore was done, clear restoring state */ - ll_i2info(ccc_object_inode(obj))->lli_flags &= + ll_i2info(vvp_object_inode(obj))->lli_flags &= ~LLIF_FILE_RESTORING; } } @@ -180,7 +353,7 @@ static void vvp_io_fault_fini(const struct lu_env *env, struct cl_io *io = ios->cis_io; struct cl_page *page = io->u.ci_fault.ft_page; - CLOBINVRNT(env, io->ci_obj, ccc_object_invariant(io->ci_obj)); + CLOBINVRNT(env, io->ci_obj, vvp_object_invariant(io->ci_obj)); if (page) { lu_ref_del(&page->cp_reference, "fault", io); @@ -203,16 +376,16 @@ static enum cl_lock_mode vvp_mode_from_vma(struct vm_area_struct *vma) } static int vvp_mmap_locks(const struct lu_env *env, - struct ccc_io *vio, struct cl_io *io) + struct vvp_io *vio, struct cl_io *io) { - struct ccc_thread_info *cti = ccc_env_info(env); + struct vvp_thread_info *cti = vvp_env_info(env); struct mm_struct *mm = current->mm; struct vm_area_struct *vma; - struct cl_lock_descr *descr = &cti->cti_descr; + struct cl_lock_descr *descr = &cti->vti_descr; ldlm_policy_data_t policy; unsigned long addr; ssize_t count; - int result; + int result = 0; struct iov_iter i; struct iovec iov; @@ -221,21 +394,21 @@ static int vvp_mmap_locks(const struct lu_env *env, if (!cl_is_normalio(env, io)) return 0; - if (!vio->cui_iter) /* nfs or loop back device write */ + if (!vio->vui_iter) /* nfs or loop back device write */ return 0; /* No MM (e.g. NFS)? No vmas too. */ if (!mm) return 0; - iov_for_each(iov, i, *(vio->cui_iter)) { + iov_for_each(iov, i, *vio->vui_iter) { addr = (unsigned long)iov.iov_base; count = iov.iov_len; if (count == 0) continue; - count += addr & (~CFS_PAGE_MASK); - addr &= CFS_PAGE_MASK; + count += addr & (~PAGE_MASK); + addr &= PAGE_MASK; down_read(&mm->mmap_sem); while ((vma = our_vma(mm, addr, count)) != NULL) { @@ -244,10 +417,10 @@ static int vvp_mmap_locks(const struct lu_env *env, if (ll_file_nolock(vma->vm_file)) { /* - * For no lock case, a lockless lock will be - * generated. + * For no lock case is not allowed for mmap */ - flags = CEF_NEVER; + result = -EINVAL; + break; } /* @@ -269,10 +442,8 @@ static int vvp_mmap_locks(const struct lu_env *env, descr->cld_mode, descr->cld_start, descr->cld_end); - if (result < 0) { - up_read(&mm->mmap_sem); - return result; - } + if (result < 0) + break; if (vma->vm_end - addr >= count) break; @@ -281,26 +452,55 @@ static int vvp_mmap_locks(const struct lu_env *env, addr = vma->vm_end; } up_read(&mm->mmap_sem); + if (result < 0) + break; } - return 0; + return result; +} + +static void vvp_io_advance(const struct lu_env *env, + const struct cl_io_slice *ios, + size_t nob) +{ + struct vvp_io *vio = cl2vvp_io(env, ios); + struct cl_io *io = ios->cis_io; + struct cl_object *obj = ios->cis_io->ci_obj; + + CLOBINVRNT(env, obj, vvp_object_invariant(obj)); + + if (!cl_is_normalio(env, io)) + return; + + iov_iter_reexpand(vio->vui_iter, vio->vui_tot_count -= nob); +} + +static void vvp_io_update_iov(const struct lu_env *env, + struct vvp_io *vio, struct cl_io *io) +{ + size_t size = io->u.ci_rw.crw_count; + + if (!cl_is_normalio(env, io) || !vio->vui_iter) + return; + + iov_iter_truncate(vio->vui_iter, size); } static int vvp_io_rw_lock(const struct lu_env *env, struct cl_io *io, enum cl_lock_mode mode, loff_t start, loff_t end) { - struct ccc_io *cio = ccc_env_io(env); + struct vvp_io *vio = vvp_env_io(env); int result; int ast_flags = 0; LASSERT(io->ci_type == CIT_READ || io->ci_type == CIT_WRITE); - ccc_io_update_iov(env, cio, io); + vvp_io_update_iov(env, vio, io); if (io->u.ci_rw.crw_nonblock) ast_flags |= CEF_NONBLOCK; - result = vvp_mmap_locks(env, cio, io); + result = vvp_mmap_locks(env, vio, io); if (result == 0) - result = ccc_io_one_lock(env, io, ast_flags, mode, start, end); + result = vvp_io_one_lock(env, io, ast_flags, mode, start, end); return result; } @@ -325,9 +525,11 @@ static int vvp_io_fault_lock(const struct lu_env *env, /* * XXX LDLM_FL_CBPENDING */ - return ccc_io_one_lock_index - (env, io, 0, vvp_mode_from_vma(vio->u.fault.ft_vma), - io->u.ci_fault.ft_index, io->u.ci_fault.ft_index); + return vvp_io_one_lock_index(env, + io, 0, + vvp_mode_from_vma(vio->u.fault.ft_vma), + io->u.ci_fault.ft_index, + io->u.ci_fault.ft_index); } static int vvp_io_write_lock(const struct lu_env *env, @@ -354,14 +556,13 @@ static int vvp_io_setattr_iter_init(const struct lu_env *env, } /** - * Implementation of cl_io_operations::cio_lock() method for CIT_SETATTR io. + * Implementation of cl_io_operations::vio_lock() method for CIT_SETATTR io. * * Handles "lockless io" mode when extent locking is done by server. */ static int vvp_io_setattr_lock(const struct lu_env *env, const struct cl_io_slice *ios) { - struct ccc_io *cio = ccc_env_io(env); struct cl_io *io = ios->cis_io; __u64 new_size; __u32 enqflags = 0; @@ -378,8 +579,8 @@ static int vvp_io_setattr_lock(const struct lu_env *env, return 0; new_size = 0; } - cio->u.setattr.cui_local_lock = SETATTR_EXTENT_LOCK; - return ccc_io_one_lock(env, io, enqflags, CLM_WRITE, + + return vvp_io_one_lock(env, io, enqflags, CLM_WRITE, new_size, OBD_OBJECT_EOF); } @@ -413,7 +614,7 @@ static int vvp_io_setattr_time(const struct lu_env *env, { struct cl_io *io = ios->cis_io; struct cl_object *obj = io->ci_obj; - struct cl_attr *attr = ccc_env_thread_attr(env); + struct cl_attr *attr = vvp_env_thread_attr(env); int result; unsigned valid = CAT_CTIME; @@ -437,7 +638,7 @@ static int vvp_io_setattr_start(const struct lu_env *env, const struct cl_io_slice *ios) { struct cl_io *io = ios->cis_io; - struct inode *inode = ccc_object_inode(io->ci_obj); + struct inode *inode = vvp_object_inode(io->ci_obj); int result = 0; inode_lock(inode); @@ -453,7 +654,7 @@ static void vvp_io_setattr_end(const struct lu_env *env, const struct cl_io_slice *ios) { struct cl_io *io = ios->cis_io; - struct inode *inode = ccc_object_inode(io->ci_obj); + struct inode *inode = vvp_object_inode(io->ci_obj); if (cl_io_is_trunc(io)) /* Truncate in memory pages - they must be clean pages @@ -474,27 +675,25 @@ static int vvp_io_read_start(const struct lu_env *env, const struct cl_io_slice *ios) { struct vvp_io *vio = cl2vvp_io(env, ios); - struct ccc_io *cio = cl2ccc_io(env, ios); struct cl_io *io = ios->cis_io; struct cl_object *obj = io->ci_obj; - struct inode *inode = ccc_object_inode(obj); - struct ll_ra_read *bead = &vio->cui_bead; - struct file *file = cio->cui_fd->fd_file; + struct inode *inode = vvp_object_inode(obj); + struct file *file = vio->vui_fd->fd_file; int result; loff_t pos = io->u.ci_rd.rd.crw_pos; long cnt = io->u.ci_rd.rd.crw_count; - long tot = cio->cui_tot_count; + long tot = vio->vui_tot_count; int exceed = 0; - CLOBINVRNT(env, obj, ccc_object_invariant(obj)); + CLOBINVRNT(env, obj, vvp_object_invariant(obj)); CDEBUG(D_VFSTRACE, "read: -> [%lli, %lli)\n", pos, pos + cnt); if (!can_populate_pages(env, io, inode)) return 0; - result = ccc_prep_size(env, obj, io, pos, tot, &exceed); + result = vvp_prep_size(env, obj, io, pos, tot, &exceed); if (result != 0) return result; else if (exceed != 0) @@ -505,30 +704,27 @@ static int vvp_io_read_start(const struct lu_env *env, inode->i_ino, cnt, pos, i_size_read(inode)); /* turn off the kernel's read-ahead */ - cio->cui_fd->fd_file->f_ra.ra_pages = 0; + vio->vui_fd->fd_file->f_ra.ra_pages = 0; /* initialize read-ahead window once per syscall */ - if (!vio->cui_ra_window_set) { - vio->cui_ra_window_set = 1; - bead->lrr_start = cl_index(obj, pos); - /* - * XXX: explicit PAGE_SIZE - */ - bead->lrr_count = cl_index(obj, tot + PAGE_SIZE - 1); - ll_ra_read_in(file, bead); + if (!vio->vui_ra_valid) { + vio->vui_ra_valid = true; + vio->vui_ra_start = cl_index(obj, pos); + vio->vui_ra_count = cl_index(obj, tot + PAGE_SIZE - 1); + ll_ras_enter(file); } /* BUG: 5972 */ file_accessed(file); - switch (vio->cui_io_subtype) { + switch (vio->vui_io_subtype) { case IO_NORMAL: - LASSERT(cio->cui_iocb->ki_pos == pos); - result = generic_file_read_iter(cio->cui_iocb, cio->cui_iter); + LASSERT(vio->vui_iocb->ki_pos == pos); + result = generic_file_read_iter(vio->vui_iocb, vio->vui_iter); break; case IO_SPLICE: result = generic_file_splice_read(file, &pos, - vio->u.splice.cui_pipe, cnt, - vio->u.splice.cui_flags); + vio->u.splice.vui_pipe, cnt, + vio->u.splice.vui_flags); /* LU-1109: do splice read stripe by stripe otherwise if it * may make nfsd stuck if this read occupied all internal pipe * buffers. @@ -536,7 +732,7 @@ static int vvp_io_read_start(const struct lu_env *env, io->ci_continue = 0; break; default: - CERROR("Wrong IO type %u\n", vio->cui_io_subtype); + CERROR("Wrong IO type %u\n", vio->vui_io_subtype); LBUG(); } @@ -546,30 +742,201 @@ out: io->ci_continue = 0; io->ci_nob += result; ll_rw_stats_tally(ll_i2sbi(inode), current->pid, - cio->cui_fd, pos, result, READ); + vio->vui_fd, pos, result, READ); result = 0; } return result; } -static void vvp_io_read_fini(const struct lu_env *env, const struct cl_io_slice *ios) +static int vvp_io_commit_sync(const struct lu_env *env, struct cl_io *io, + struct cl_page_list *plist, int from, int to) { - struct vvp_io *vio = cl2vvp_io(env, ios); - struct ccc_io *cio = cl2ccc_io(env, ios); + struct cl_2queue *queue = &io->ci_queue; + struct cl_page *page; + unsigned int bytes = 0; + int rc = 0; - if (vio->cui_ra_window_set) - ll_ra_read_ex(cio->cui_fd->fd_file, &vio->cui_bead); + if (plist->pl_nr == 0) + return 0; - vvp_io_fini(env, ios); + if (from > 0 || to != PAGE_SIZE) { + page = cl_page_list_first(plist); + if (plist->pl_nr == 1) { + cl_page_clip(env, page, from, to); + } else { + if (from > 0) + cl_page_clip(env, page, from, PAGE_SIZE); + if (to != PAGE_SIZE) { + page = cl_page_list_last(plist); + cl_page_clip(env, page, 0, to); + } + } + } + + cl_2queue_init(queue); + cl_page_list_splice(plist, &queue->c2_qin); + rc = cl_io_submit_sync(env, io, CRT_WRITE, queue, 0); + + /* plist is not sorted any more */ + cl_page_list_splice(&queue->c2_qin, plist); + cl_page_list_splice(&queue->c2_qout, plist); + cl_2queue_fini(env, queue); + + if (rc == 0) { + /* calculate bytes */ + bytes = plist->pl_nr << PAGE_SHIFT; + bytes -= from + PAGE_SIZE - to; + + while (plist->pl_nr > 0) { + page = cl_page_list_first(plist); + cl_page_list_del(env, plist, page); + + cl_page_clip(env, page, 0, PAGE_SIZE); + + SetPageUptodate(cl_page_vmpage(page)); + cl_page_disown(env, io, page); + + /* held in ll_cl_init() */ + lu_ref_del(&page->cp_reference, "cl_io", io); + cl_page_put(env, page); + } + } + + return bytes > 0 ? bytes : rc; +} + +static void write_commit_callback(const struct lu_env *env, struct cl_io *io, + struct cl_page *page) +{ + struct vvp_page *vpg; + struct page *vmpage = page->cp_vmpage; + struct cl_object *clob = cl_io_top(io)->ci_obj; + + SetPageUptodate(vmpage); + set_page_dirty(vmpage); + + vpg = cl2vvp_page(cl_object_page_slice(clob, page)); + vvp_write_pending(cl2vvp(clob), vpg); + + cl_page_disown(env, io, page); + + /* held in ll_cl_init() */ + lu_ref_del(&page->cp_reference, "cl_io", io); + cl_page_put(env, page); +} + +/* make sure the page list is contiguous */ +static bool page_list_sanity_check(struct cl_object *obj, + struct cl_page_list *plist) +{ + struct cl_page *page; + pgoff_t index = CL_PAGE_EOF; + + cl_page_list_for_each(page, plist) { + struct vvp_page *vpg = cl_object_page_slice(obj, page); + + if (index == CL_PAGE_EOF) { + index = vvp_index(vpg); + continue; + } + + ++index; + if (index == vvp_index(vpg)) + continue; + + return false; + } + return true; +} + +/* Return how many bytes have queued or written */ +int vvp_io_write_commit(const struct lu_env *env, struct cl_io *io) +{ + struct cl_object *obj = io->ci_obj; + struct inode *inode = vvp_object_inode(obj); + struct vvp_io *vio = vvp_env_io(env); + struct cl_page_list *queue = &vio->u.write.vui_queue; + struct cl_page *page; + int rc = 0; + int bytes = 0; + unsigned int npages = vio->u.write.vui_queue.pl_nr; + + if (npages == 0) + return 0; + + CDEBUG(D_VFSTRACE, "commit async pages: %d, from %d, to %d\n", + npages, vio->u.write.vui_from, vio->u.write.vui_to); + + LASSERT(page_list_sanity_check(obj, queue)); + + /* submit IO with async write */ + rc = cl_io_commit_async(env, io, queue, + vio->u.write.vui_from, vio->u.write.vui_to, + write_commit_callback); + npages -= queue->pl_nr; /* already committed pages */ + if (npages > 0) { + /* calculate how many bytes were written */ + bytes = npages << PAGE_SHIFT; + + /* first page */ + bytes -= vio->u.write.vui_from; + if (queue->pl_nr == 0) /* last page */ + bytes -= PAGE_SIZE - vio->u.write.vui_to; + LASSERTF(bytes > 0, "bytes = %d, pages = %d\n", bytes, npages); + + vio->u.write.vui_written += bytes; + + CDEBUG(D_VFSTRACE, "Committed %d pages %d bytes, tot: %ld\n", + npages, bytes, vio->u.write.vui_written); + + /* the first page must have been written. */ + vio->u.write.vui_from = 0; + } + LASSERT(page_list_sanity_check(obj, queue)); + LASSERT(ergo(rc == 0, queue->pl_nr == 0)); + + /* out of quota, try sync write */ + if (rc == -EDQUOT && !cl_io_is_mkwrite(io)) { + rc = vvp_io_commit_sync(env, io, queue, + vio->u.write.vui_from, + vio->u.write.vui_to); + if (rc > 0) { + vio->u.write.vui_written += rc; + rc = 0; + } + } + + /* update inode size */ + ll_merge_attr(env, inode); + + /* Now the pages in queue were failed to commit, discard them + * unless they were dirtied before. + */ + while (queue->pl_nr > 0) { + page = cl_page_list_first(queue); + cl_page_list_del(env, queue, page); + + if (!PageDirty(cl_page_vmpage(page))) + cl_page_discard(env, io, page); + + cl_page_disown(env, io, page); + + /* held in ll_cl_init() */ + lu_ref_del(&page->cp_reference, "cl_io", io); + cl_page_put(env, page); + } + cl_page_list_fini(env, queue); + + return rc; } static int vvp_io_write_start(const struct lu_env *env, const struct cl_io_slice *ios) { - struct ccc_io *cio = cl2ccc_io(env, ios); + struct vvp_io *vio = cl2vvp_io(env, ios); struct cl_io *io = ios->cis_io; struct cl_object *obj = io->ci_obj; - struct inode *inode = ccc_object_inode(obj); + struct inode *inode = vvp_object_inode(obj); ssize_t result = 0; loff_t pos = io->u.ci_wr.wr.crw_pos; size_t cnt = io->u.ci_wr.wr.crw_count; @@ -582,25 +949,42 @@ static int vvp_io_write_start(const struct lu_env *env, * PARALLEL IO This has to be changed for parallel IO doing * out-of-order writes. */ - pos = io->u.ci_wr.wr.crw_pos = i_size_read(inode); - cio->cui_iocb->ki_pos = pos; + ll_merge_attr(env, inode); + pos = i_size_read(inode); + io->u.ci_wr.wr.crw_pos = pos; + vio->vui_iocb->ki_pos = pos; } else { - LASSERT(cio->cui_iocb->ki_pos == pos); + LASSERT(vio->vui_iocb->ki_pos == pos); } CDEBUG(D_VFSTRACE, "write: [%lli, %lli)\n", pos, pos + (long long)cnt); - if (!cio->cui_iter) /* from a temp io in ll_cl_init(). */ + if (!vio->vui_iter) /* from a temp io in ll_cl_init(). */ result = 0; else - result = generic_file_write_iter(cio->cui_iocb, cio->cui_iter); + result = generic_file_write_iter(vio->vui_iocb, vio->vui_iter); + + if (result > 0) { + result = vvp_io_write_commit(env, io); + if (vio->u.write.vui_written > 0) { + result = vio->u.write.vui_written; + io->ci_nob += result; + CDEBUG(D_VFSTRACE, "write: nob %zd, result: %zd\n", + io->ci_nob, result); + } + } if (result > 0) { + struct ll_inode_info *lli = ll_i2info(inode); + + spin_lock(&lli->lli_lock); + lli->lli_flags |= LLIF_DATA_MODIFIED; + spin_unlock(&lli->lli_lock); + if (result < cnt) io->ci_continue = 0; - io->ci_nob += result; ll_rw_stats_tally(ll_i2sbi(inode), current->pid, - cio->cui_fd, pos, result, WRITE); + vio->vui_fd, pos, result, WRITE); result = 0; } return result; @@ -608,10 +992,10 @@ static int vvp_io_write_start(const struct lu_env *env, static int vvp_io_kernel_fault(struct vvp_fault_io *cfio) { - struct vm_fault *vmf = cfio->fault.ft_vmf; + struct vm_fault *vmf = cfio->ft_vmf; - cfio->fault.ft_flags = filemap_fault(cfio->ft_vma, vmf); - cfio->fault.ft_flags_valid = 1; + cfio->ft_flags = filemap_fault(cfio->ft_vma, vmf); + cfio->ft_flags_valid = 1; if (vmf->page) { CDEBUG(D_PAGE, @@ -619,39 +1003,51 @@ static int vvp_io_kernel_fault(struct vvp_fault_io *cfio) vmf->page, vmf->page->mapping, vmf->page->index, (long)vmf->page->flags, page_count(vmf->page), page_private(vmf->page), vmf->virtual_address); - if (unlikely(!(cfio->fault.ft_flags & VM_FAULT_LOCKED))) { + if (unlikely(!(cfio->ft_flags & VM_FAULT_LOCKED))) { lock_page(vmf->page); - cfio->fault.ft_flags |= VM_FAULT_LOCKED; + cfio->ft_flags |= VM_FAULT_LOCKED; } cfio->ft_vmpage = vmf->page; return 0; } - if (cfio->fault.ft_flags & (VM_FAULT_SIGBUS | VM_FAULT_SIGSEGV)) { + if (cfio->ft_flags & (VM_FAULT_SIGBUS | VM_FAULT_SIGSEGV)) { CDEBUG(D_PAGE, "got addr %p - SIGBUS\n", vmf->virtual_address); return -EFAULT; } - if (cfio->fault.ft_flags & VM_FAULT_OOM) { + if (cfio->ft_flags & VM_FAULT_OOM) { CDEBUG(D_PAGE, "got addr %p - OOM\n", vmf->virtual_address); return -ENOMEM; } - if (cfio->fault.ft_flags & VM_FAULT_RETRY) + if (cfio->ft_flags & VM_FAULT_RETRY) return -EAGAIN; - CERROR("Unknown error in page fault %d!\n", cfio->fault.ft_flags); + CERROR("Unknown error in page fault %d!\n", cfio->ft_flags); return -EINVAL; } +static void mkwrite_commit_callback(const struct lu_env *env, struct cl_io *io, + struct cl_page *page) +{ + struct vvp_page *vpg; + struct cl_object *clob = cl_io_top(io)->ci_obj; + + set_page_dirty(page->cp_vmpage); + + vpg = cl2vvp_page(cl_object_page_slice(clob, page)); + vvp_write_pending(cl2vvp(clob), vpg); +} + static int vvp_io_fault_start(const struct lu_env *env, const struct cl_io_slice *ios) { struct vvp_io *vio = cl2vvp_io(env, ios); struct cl_io *io = ios->cis_io; struct cl_object *obj = io->ci_obj; - struct inode *inode = ccc_object_inode(obj); + struct inode *inode = vvp_object_inode(obj); struct cl_fault_io *fio = &io->u.ci_fault; struct vvp_fault_io *cfio = &vio->u.fault; loff_t offset; @@ -659,7 +1055,7 @@ static int vvp_io_fault_start(const struct lu_env *env, struct page *vmpage = NULL; struct cl_page *page; loff_t size; - pgoff_t last; /* last page in a file data region */ + pgoff_t last_index; if (fio->ft_executable && inode->i_mtime.tv_sec != vio->u.fault.ft_mtime) @@ -670,7 +1066,7 @@ static int vvp_io_fault_start(const struct lu_env *env, /* offset of the last byte on the page */ offset = cl_offset(obj, fio->ft_index + 1) - 1; LASSERT(cl_index(obj, offset) == fio->ft_index); - result = ccc_prep_size(env, obj, io, 0, offset + 1, NULL); + result = vvp_prep_size(env, obj, io, 0, offset + 1, NULL); if (result != 0) return result; @@ -705,15 +1101,15 @@ static int vvp_io_fault_start(const struct lu_env *env, goto out; } + last_index = cl_index(obj, size - 1); + if (fio->ft_mkwrite) { - pgoff_t last_index; /* * Capture the size while holding the lli_trunc_sem from above * we want to make sure that we complete the mkwrite action * while holding this lock. We need to make sure that we are * not past the end of the file. */ - last_index = cl_index(obj, size - 1); if (last_index < fio->ft_index) { CDEBUG(D_PAGE, "llite: mkwrite and truncate race happened: %p: 0x%lx 0x%lx\n", @@ -745,25 +1141,32 @@ static int vvp_io_fault_start(const struct lu_env *env, */ if (fio->ft_mkwrite) { wait_on_page_writeback(vmpage); - if (set_page_dirty(vmpage)) { - struct ccc_page *cp; + if (!PageDirty(vmpage)) { + struct cl_page_list *plist = &io->ci_queue.c2_qin; + struct vvp_page *vpg = cl_object_page_slice(obj, page); + int to = PAGE_SIZE; /* vvp_page_assume() calls wait_on_page_writeback(). */ cl_page_assume(env, io, page); - cp = cl2ccc_page(cl_page_at(page, &vvp_device_type)); - vvp_write_pending(cl2ccc(obj), cp); + cl_page_list_init(plist); + cl_page_list_add(plist, page); + + /* size fixup */ + if (last_index == vvp_index(vpg)) + to = size & ~PAGE_MASK; /* Do not set Dirty bit here so that in case IO is * started before the page is really made dirty, we * still have chance to detect it. */ - result = cl_page_cache_add(env, io, page, CRT_WRITE); + result = cl_io_commit_async(env, io, plist, 0, to, + mkwrite_commit_callback); LASSERT(cl_page_is_owned(page, io)); + cl_page_list_fini(env, plist); vmpage = NULL; if (result < 0) { - cl_page_unmap(env, io, page); cl_page_discard(env, io, page); cl_page_disown(env, io, page); @@ -773,20 +1176,20 @@ static int vvp_io_fault_start(const struct lu_env *env, if (result == -EDQUOT) result = -ENOSPC; goto out; - } else + } else { cl_page_disown(env, io, page); + } } } - last = cl_index(obj, size - 1); /* * The ft_index is only used in the case of * a mkwrite action. We need to check * our assertions are correct, since * we should have caught this above */ - LASSERT(!fio->ft_mkwrite || fio->ft_index <= last); - if (fio->ft_index == last) + LASSERT(!fio->ft_mkwrite || fio->ft_index <= last_index); + if (fio->ft_index == last_index) /* * Last page is mapped partially. */ @@ -801,7 +1204,9 @@ out: /* return unlocked vmpage to avoid deadlocking */ if (vmpage) unlock_page(vmpage); - cfio->fault.ft_flags &= ~VM_FAULT_LOCKED; + + cfio->ft_flags &= ~VM_FAULT_LOCKED; + return result; } @@ -820,293 +1225,58 @@ static int vvp_io_read_page(const struct lu_env *env, const struct cl_page_slice *slice) { struct cl_io *io = ios->cis_io; - struct cl_object *obj = slice->cpl_obj; - struct ccc_page *cp = cl2ccc_page(slice); + struct vvp_page *vpg = cl2vvp_page(slice); struct cl_page *page = slice->cpl_page; - struct inode *inode = ccc_object_inode(obj); + struct inode *inode = vvp_object_inode(slice->cpl_obj); struct ll_sb_info *sbi = ll_i2sbi(inode); - struct ll_file_data *fd = cl2ccc_io(env, ios)->cui_fd; + struct ll_file_data *fd = cl2vvp_io(env, ios)->vui_fd; struct ll_readahead_state *ras = &fd->fd_ras; - struct page *vmpage = cp->cpg_page; struct cl_2queue *queue = &io->ci_queue; - int rc; - - CLOBINVRNT(env, obj, ccc_object_invariant(obj)); - LASSERT(slice->cpl_obj == obj); if (sbi->ll_ra_info.ra_max_pages_per_file && sbi->ll_ra_info.ra_max_pages) - ras_update(sbi, inode, ras, page->cp_index, - cp->cpg_defer_uptodate); - - /* Sanity check whether the page is protected by a lock. */ - rc = cl_page_is_under_lock(env, io, page); - if (rc != -EBUSY) { - CL_PAGE_HEADER(D_WARNING, env, page, "%s: %d\n", - rc == -ENODATA ? "without a lock" : - "match failed", rc); - if (rc != -ENODATA) - return rc; - } + ras_update(sbi, inode, ras, vvp_index(vpg), + vpg->vpg_defer_uptodate); - if (cp->cpg_defer_uptodate) { - cp->cpg_ra_used = 1; + if (vpg->vpg_defer_uptodate) { + vpg->vpg_ra_used = 1; cl_page_export(env, page, 1); } /* * Add page into the queue even when it is marked uptodate above. * this will unlock it automatically as part of cl_page_list_disown(). */ + cl_page_list_add(&queue->c2_qin, page); if (sbi->ll_ra_info.ra_max_pages_per_file && sbi->ll_ra_info.ra_max_pages) - ll_readahead(env, io, ras, - vmpage->mapping, &queue->c2_qin, fd->fd_flags); + ll_readahead(env, io, &queue->c2_qin, ras, + vpg->vpg_defer_uptodate); return 0; } -static int vvp_page_sync_io(const struct lu_env *env, struct cl_io *io, - struct cl_page *page, struct ccc_page *cp, - enum cl_req_type crt) +static void vvp_io_end(const struct lu_env *env, const struct cl_io_slice *ios) { - struct cl_2queue *queue; - int result; - - LASSERT(io->ci_type == CIT_READ || io->ci_type == CIT_WRITE); - - queue = &io->ci_queue; - cl_2queue_init_page(queue, page); - - result = cl_io_submit_sync(env, io, crt, queue, 0); - LASSERT(cl_page_is_owned(page, io)); - - if (crt == CRT_READ) - /* - * in CRT_WRITE case page is left locked even in case of - * error. - */ - cl_page_list_disown(env, io, &queue->c2_qin); - cl_2queue_fini(env, queue); - - return result; -} - -/** - * Prepare partially written-to page for a write. - */ -static int vvp_io_prepare_partial(const struct lu_env *env, struct cl_io *io, - struct cl_object *obj, struct cl_page *pg, - struct ccc_page *cp, - unsigned from, unsigned to) -{ - struct cl_attr *attr = ccc_env_thread_attr(env); - loff_t offset = cl_offset(obj, pg->cp_index); - int result; - - cl_object_attr_lock(obj); - result = cl_object_attr_get(env, obj, attr); - cl_object_attr_unlock(obj); - if (result == 0) { - /* - * If are writing to a new page, no need to read old data. - * The extent locking will have updated the KMS, and for our - * purposes here we can treat it like i_size. - */ - if (attr->cat_kms <= offset) { - char *kaddr = kmap_atomic(cp->cpg_page); - - memset(kaddr, 0, cl_page_size(obj)); - kunmap_atomic(kaddr); - } else if (cp->cpg_defer_uptodate) - cp->cpg_ra_used = 1; - else - result = vvp_page_sync_io(env, io, pg, cp, CRT_READ); - /* - * In older implementations, obdo_refresh_inode is called here - * to update the inode because the write might modify the - * object info at OST. However, this has been proven useless, - * since LVB functions will be called when user space program - * tries to retrieve inode attribute. Also, see bug 15909 for - * details. -jay - */ - if (result == 0) - cl_page_export(env, pg, 1); - } - return result; -} - -static int vvp_io_prepare_write(const struct lu_env *env, - const struct cl_io_slice *ios, - const struct cl_page_slice *slice, - unsigned from, unsigned to) -{ - struct cl_object *obj = slice->cpl_obj; - struct ccc_page *cp = cl2ccc_page(slice); - struct cl_page *pg = slice->cpl_page; - struct page *vmpage = cp->cpg_page; - - int result; - - LINVRNT(cl_page_is_vmlocked(env, pg)); - LASSERT(vmpage->mapping->host == ccc_object_inode(obj)); - - result = 0; - - CL_PAGE_HEADER(D_PAGE, env, pg, "preparing: [%d, %d]\n", from, to); - if (!PageUptodate(vmpage)) { - /* - * We're completely overwriting an existing page, so _don't_ - * set it up to date until commit_write - */ - if (from == 0 && to == PAGE_SIZE) { - CL_PAGE_HEADER(D_PAGE, env, pg, "full page write\n"); - POISON_PAGE(page, 0x11); - } else - result = vvp_io_prepare_partial(env, ios->cis_io, obj, - pg, cp, from, to); - } else - CL_PAGE_HEADER(D_PAGE, env, pg, "uptodate\n"); - return result; -} - -static int vvp_io_commit_write(const struct lu_env *env, - const struct cl_io_slice *ios, - const struct cl_page_slice *slice, - unsigned from, unsigned to) -{ - struct cl_object *obj = slice->cpl_obj; - struct cl_io *io = ios->cis_io; - struct ccc_page *cp = cl2ccc_page(slice); - struct cl_page *pg = slice->cpl_page; - struct inode *inode = ccc_object_inode(obj); - struct ll_sb_info *sbi = ll_i2sbi(inode); - struct ll_inode_info *lli = ll_i2info(inode); - struct page *vmpage = cp->cpg_page; - - int result; - int tallyop; - loff_t size; - - LINVRNT(cl_page_is_vmlocked(env, pg)); - LASSERT(vmpage->mapping->host == inode); - - LU_OBJECT_HEADER(D_INODE, env, &obj->co_lu, "committing page write\n"); - CL_PAGE_HEADER(D_PAGE, env, pg, "committing: [%d, %d]\n", from, to); - - /* - * queue a write for some time in the future the first time we - * dirty the page. - * - * This is different from what other file systems do: they usually - * just mark page (and some of its buffers) dirty and rely on - * balance_dirty_pages() to start a write-back. Lustre wants write-back - * to be started earlier for the following reasons: - * - * (1) with a large number of clients we need to limit the amount - * of cached data on the clients a lot; - * - * (2) large compute jobs generally want compute-only then io-only - * and the IO should complete as quickly as possible; - * - * (3) IO is batched up to the RPC size and is async until the - * client max cache is hit - * (/sys/fs/lustre/osc/OSC.../max_dirty_mb) - * - */ - if (!PageDirty(vmpage)) { - tallyop = LPROC_LL_DIRTY_MISSES; - result = cl_page_cache_add(env, io, pg, CRT_WRITE); - if (result == 0) { - /* page was added into cache successfully. */ - set_page_dirty(vmpage); - vvp_write_pending(cl2ccc(obj), cp); - } else if (result == -EDQUOT) { - pgoff_t last_index = i_size_read(inode) >> PAGE_SHIFT; - bool need_clip = true; - - /* - * Client ran out of disk space grant. Possible - * strategies are: - * - * (a) do a sync write, renewing grant; - * - * (b) stop writing on this stripe, switch to the - * next one. - * - * (b) is a part of "parallel io" design that is the - * ultimate goal. (a) is what "old" client did, and - * what the new code continues to do for the time - * being. - */ - if (last_index > pg->cp_index) { - to = PAGE_SIZE; - need_clip = false; - } else if (last_index == pg->cp_index) { - int size_to = i_size_read(inode) & ~CFS_PAGE_MASK; - - if (to < size_to) - to = size_to; - } - if (need_clip) - cl_page_clip(env, pg, 0, to); - result = vvp_page_sync_io(env, io, pg, cp, CRT_WRITE); - if (result) - CERROR("Write page %lu of inode %p failed %d\n", - pg->cp_index, inode, result); - } - } else { - tallyop = LPROC_LL_DIRTY_HITS; - result = 0; - } - ll_stats_ops_tally(sbi, tallyop, 1); - - /* Inode should be marked DIRTY even if no new page was marked DIRTY - * because page could have been not flushed between 2 modifications. - * It is important the file is marked DIRTY as soon as the I/O is done - * Indeed, when cache is flushed, file could be already closed and it - * is too late to warn the MDT. - * It is acceptable that file is marked DIRTY even if I/O is dropped - * for some reasons before being flushed to OST. - */ - if (result == 0) { - spin_lock(&lli->lli_lock); - lli->lli_flags |= LLIF_DATA_MODIFIED; - spin_unlock(&lli->lli_lock); - } - - size = cl_offset(obj, pg->cp_index) + to; - - ll_inode_size_lock(inode); - if (result == 0) { - if (size > i_size_read(inode)) { - cl_isize_write_nolock(inode, size); - CDEBUG(D_VFSTRACE, DFID" updating i_size %lu\n", - PFID(lu_object_fid(&obj->co_lu)), - (unsigned long)size); - } - cl_page_export(env, pg, 1); - } else { - if (size > i_size_read(inode)) - cl_page_discard(env, io, pg); - } - ll_inode_size_unlock(inode); - return result; + CLOBINVRNT(env, ios->cis_io->ci_obj, + vvp_object_invariant(ios->cis_io->ci_obj)); } static const struct cl_io_operations vvp_io_ops = { .op = { [CIT_READ] = { - .cio_fini = vvp_io_read_fini, + .cio_fini = vvp_io_fini, .cio_lock = vvp_io_read_lock, .cio_start = vvp_io_read_start, - .cio_advance = ccc_io_advance + .cio_advance = vvp_io_advance, }, [CIT_WRITE] = { .cio_fini = vvp_io_fini, + .cio_iter_init = vvp_io_write_iter_init, + .cio_iter_fini = vvp_io_write_iter_fini, .cio_lock = vvp_io_write_lock, .cio_start = vvp_io_write_start, - .cio_advance = ccc_io_advance + .cio_advance = vvp_io_advance, }, [CIT_SETATTR] = { .cio_fini = vvp_io_setattr_fini, @@ -1120,7 +1290,7 @@ static const struct cl_io_operations vvp_io_ops = { .cio_iter_init = vvp_io_fault_iter_init, .cio_lock = vvp_io_fault_lock, .cio_start = vvp_io_fault_start, - .cio_end = ccc_io_end + .cio_end = vvp_io_end, }, [CIT_FSYNC] = { .cio_start = vvp_io_fsync_start, @@ -1131,29 +1301,26 @@ static const struct cl_io_operations vvp_io_ops = { } }, .cio_read_page = vvp_io_read_page, - .cio_prepare_write = vvp_io_prepare_write, - .cio_commit_write = vvp_io_commit_write }; int vvp_io_init(const struct lu_env *env, struct cl_object *obj, struct cl_io *io) { struct vvp_io *vio = vvp_env_io(env); - struct ccc_io *cio = ccc_env_io(env); - struct inode *inode = ccc_object_inode(obj); + struct inode *inode = vvp_object_inode(obj); int result; - CLOBINVRNT(env, obj, ccc_object_invariant(obj)); + CLOBINVRNT(env, obj, vvp_object_invariant(obj)); CDEBUG(D_VFSTRACE, DFID " ignore/verify layout %d/%d, layout version %d restore needed %d\n", PFID(lu_object_fid(&obj->co_lu)), io->ci_ignore_layout, io->ci_verify_layout, - cio->cui_layout_gen, io->ci_restore_needed); + vio->vui_layout_gen, io->ci_restore_needed); - CL_IO_SLICE_CLEAN(cio, cui_cl); - cl_io_slice_add(io, &cio->cui_cl, obj, &vvp_io_ops); - vio->cui_ra_window_set = 0; + CL_IO_SLICE_CLEAN(vio, vui_cl); + cl_io_slice_add(io, &vio->vui_cl, obj, &vvp_io_ops); + vio->vui_ra_valid = false; result = 0; if (io->ci_type == CIT_READ || io->ci_type == CIT_WRITE) { size_t count; @@ -1166,7 +1333,7 @@ int vvp_io_init(const struct lu_env *env, struct cl_object *obj, if (count == 0) result = 1; else - cio->cui_tot_count = count; + vio->vui_tot_count = count; /* for read/write, we store the jobid in the inode, and * it'll be fetched by osc when building RPC. @@ -1192,7 +1359,7 @@ int vvp_io_init(const struct lu_env *env, struct cl_object *obj, * because it might not grant layout lock in IT_OPEN. */ if (result == 0 && !io->ci_ignore_layout) { - result = ll_layout_refresh(inode, &cio->cui_layout_gen); + result = ll_layout_refresh(inode, &vio->vui_layout_gen); if (result == -ENOENT) /* If the inode on MDS has been removed, but the objects * on OSTs haven't been destroyed (async unlink), layout @@ -1208,11 +1375,3 @@ int vvp_io_init(const struct lu_env *env, struct cl_object *obj, return result; } - -static struct vvp_io *cl2vvp_io(const struct lu_env *env, - const struct cl_io_slice *slice) -{ - /* Calling just for assertion */ - cl2ccc_io(env, slice); - return vvp_env_io(env); -} diff --git a/drivers/staging/lustre/lustre/llite/vvp_lock.c b/drivers/staging/lustre/lustre/llite/vvp_lock.c index ff0948043c7a..64be0c9df35b 100644 --- a/drivers/staging/lustre/lustre/llite/vvp_lock.c +++ b/drivers/staging/lustre/lustre/llite/vvp_lock.c @@ -15,11 +15,7 @@ * * You should have received a copy of the GNU General Public License * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. + * http://www.gnu.org/licenses/gpl-2.0.html * * GPL HEADER END */ @@ -40,7 +36,7 @@ #define DEBUG_SUBSYSTEM S_LLITE -#include "../include/obd.h" +#include "../include/obd_support.h" #include "../include/lustre_lite.h" #include "vvp_internal.h" @@ -51,36 +47,41 @@ * */ -/** - * Estimates lock value for the purpose of managing the lock cache during - * memory shortages. - * - * Locks for memory mapped files are almost infinitely precious, others are - * junk. "Mapped locks" are heavy, but not infinitely heavy, so that they are - * ordered within themselves by weights assigned from other layers. - */ -static unsigned long vvp_lock_weigh(const struct lu_env *env, - const struct cl_lock_slice *slice) +static void vvp_lock_fini(const struct lu_env *env, struct cl_lock_slice *slice) { - struct ccc_object *cob = cl2ccc(slice->cls_obj); + struct vvp_lock *vlk = cl2vvp_lock(slice); - return atomic_read(&cob->cob_mmap_cnt) > 0 ? ~0UL >> 2 : 0; + kmem_cache_free(vvp_lock_kmem, vlk); +} + +static int vvp_lock_enqueue(const struct lu_env *env, + const struct cl_lock_slice *slice, + struct cl_io *unused, struct cl_sync_io *anchor) +{ + CLOBINVRNT(env, slice->cls_obj, vvp_object_invariant(slice->cls_obj)); + + return 0; } static const struct cl_lock_operations vvp_lock_ops = { - .clo_delete = ccc_lock_delete, - .clo_fini = ccc_lock_fini, - .clo_enqueue = ccc_lock_enqueue, - .clo_wait = ccc_lock_wait, - .clo_use = ccc_lock_use, - .clo_unuse = ccc_lock_unuse, - .clo_fits_into = ccc_lock_fits_into, - .clo_state = ccc_lock_state, - .clo_weigh = vvp_lock_weigh + .clo_fini = vvp_lock_fini, + .clo_enqueue = vvp_lock_enqueue, }; int vvp_lock_init(const struct lu_env *env, struct cl_object *obj, - struct cl_lock *lock, const struct cl_io *io) + struct cl_lock *lock, const struct cl_io *unused) { - return ccc_lock_init(env, obj, lock, io, &vvp_lock_ops); + struct vvp_lock *vlk; + int result; + + CLOBINVRNT(env, obj, vvp_object_invariant(obj)); + + vlk = kmem_cache_zalloc(vvp_lock_kmem, GFP_NOFS); + if (vlk) { + cl_lock_slice_add(lock, &vlk->vlk_cl, obj, &vvp_lock_ops); + result = 0; + } else { + result = -ENOMEM; + } + return result; } diff --git a/drivers/staging/lustre/lustre/llite/vvp_object.c b/drivers/staging/lustre/lustre/llite/vvp_object.c index 03c887d8ed83..2c520b0bf6ca 100644 --- a/drivers/staging/lustre/lustre/llite/vvp_object.c +++ b/drivers/staging/lustre/lustre/llite/vvp_object.c @@ -15,11 +15,7 @@ * * You should have received a copy of the GNU General Public License * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. + * http://www.gnu.org/licenses/gpl-2.0.html * * GPL HEADER END */ @@ -45,6 +41,7 @@ #include "../include/obd.h" #include "../include/lustre_lite.h" +#include "llite_internal.h" #include "vvp_internal.h" /***************************************************************************** @@ -53,16 +50,25 @@ * */ +int vvp_object_invariant(const struct cl_object *obj) +{ + struct inode *inode = vvp_object_inode(obj); + struct ll_inode_info *lli = ll_i2info(inode); + + return (S_ISREG(inode->i_mode) || inode->i_mode == 0) && + lli->lli_clob == obj; +} + static int vvp_object_print(const struct lu_env *env, void *cookie, lu_printer_t p, const struct lu_object *o) { - struct ccc_object *obj = lu2ccc(o); - struct inode *inode = obj->cob_inode; + struct vvp_object *obj = lu2vvp(o); + struct inode *inode = obj->vob_inode; struct ll_inode_info *lli; (*p)(env, cookie, "(%s %d %d) inode: %p ", - list_empty(&obj->cob_pending_list) ? "-" : "+", - obj->cob_transient_pages, atomic_read(&obj->cob_mmap_cnt), + list_empty(&obj->vob_pending_list) ? "-" : "+", + obj->vob_transient_pages, atomic_read(&obj->vob_mmap_cnt), inode); if (inode) { lli = ll_i2info(inode); @@ -77,7 +83,7 @@ static int vvp_object_print(const struct lu_env *env, void *cookie, static int vvp_attr_get(const struct lu_env *env, struct cl_object *obj, struct cl_attr *attr) { - struct inode *inode = ccc_object_inode(obj); + struct inode *inode = vvp_object_inode(obj); /* * lov overwrites most of these fields in @@ -99,7 +105,7 @@ static int vvp_attr_get(const struct lu_env *env, struct cl_object *obj, static int vvp_attr_set(const struct lu_env *env, struct cl_object *obj, const struct cl_attr *attr, unsigned valid) { - struct inode *inode = ccc_object_inode(obj); + struct inode *inode = vvp_object_inode(obj); if (valid & CAT_UID) inode->i_uid = make_kuid(&init_user_ns, attr->cat_uid); @@ -112,7 +118,7 @@ static int vvp_attr_set(const struct lu_env *env, struct cl_object *obj, if (valid & CAT_CTIME) inode->i_ctime.tv_sec = attr->cat_ctime; if (0 && valid & CAT_SIZE) - cl_isize_write_nolock(inode, attr->cat_size); + i_size_write(inode, attr->cat_size); /* not currently necessary */ if (0 && valid & (CAT_UID|CAT_GID|CAT_SIZE)) mark_inode_dirty(inode); @@ -165,6 +171,40 @@ static int vvp_conf_set(const struct lu_env *env, struct cl_object *obj, return 0; } +static int vvp_prune(const struct lu_env *env, struct cl_object *obj) +{ + struct inode *inode = vvp_object_inode(obj); + int rc; + + rc = cl_sync_file_range(inode, 0, OBD_OBJECT_EOF, CL_FSYNC_LOCAL, 1); + if (rc < 0) { + CDEBUG(D_VFSTRACE, DFID ": writeback failed: %d\n", + PFID(lu_object_fid(&obj->co_lu)), rc); + return rc; + } + + truncate_inode_pages(inode->i_mapping, 0); + return 0; +} + +static int vvp_object_glimpse(const struct lu_env *env, + const struct cl_object *obj, struct ost_lvb *lvb) +{ + struct inode *inode = vvp_object_inode(obj); + + lvb->lvb_mtime = LTIME_S(inode->i_mtime); + lvb->lvb_atime = LTIME_S(inode->i_atime); + lvb->lvb_ctime = LTIME_S(inode->i_ctime); + /* + * LU-417: Add dirty pages block count lest i_blocks reports 0, some + * "cp" or "tar" on remote node may think it's a completely sparse file + * and skip it. + */ + if (lvb->lvb_size > 0 && lvb->lvb_blocks == 0) + lvb->lvb_blocks = dirty_cnt(inode); + return 0; +} + static const struct cl_object_operations vvp_ops = { .coo_page_init = vvp_page_init, .coo_lock_init = vvp_lock_init, @@ -172,29 +212,94 @@ static const struct cl_object_operations vvp_ops = { .coo_attr_get = vvp_attr_get, .coo_attr_set = vvp_attr_set, .coo_conf_set = vvp_conf_set, - .coo_glimpse = ccc_object_glimpse + .coo_prune = vvp_prune, + .coo_glimpse = vvp_object_glimpse }; +static int vvp_object_init0(const struct lu_env *env, + struct vvp_object *vob, + const struct cl_object_conf *conf) +{ + vob->vob_inode = conf->coc_inode; + vob->vob_transient_pages = 0; + cl_object_page_init(&vob->vob_cl, sizeof(struct vvp_page)); + return 0; +} + +static int vvp_object_init(const struct lu_env *env, struct lu_object *obj, + const struct lu_object_conf *conf) +{ + struct vvp_device *dev = lu2vvp_dev(obj->lo_dev); + struct vvp_object *vob = lu2vvp(obj); + struct lu_object *below; + struct lu_device *under; + int result; + + under = &dev->vdv_next->cd_lu_dev; + below = under->ld_ops->ldo_object_alloc(env, obj->lo_header, under); + if (below) { + const struct cl_object_conf *cconf; + + cconf = lu2cl_conf(conf); + INIT_LIST_HEAD(&vob->vob_pending_list); + lu_object_add(obj, below); + result = vvp_object_init0(env, vob, cconf); + } else { + result = -ENOMEM; + } + + return result; +} + +static void vvp_object_free(const struct lu_env *env, struct lu_object *obj) +{ + struct vvp_object *vob = lu2vvp(obj); + + lu_object_fini(obj); + lu_object_header_fini(obj->lo_header); + kmem_cache_free(vvp_object_kmem, vob); +} + static const struct lu_object_operations vvp_lu_obj_ops = { - .loo_object_init = ccc_object_init, - .loo_object_free = ccc_object_free, - .loo_object_print = vvp_object_print + .loo_object_init = vvp_object_init, + .loo_object_free = vvp_object_free, + .loo_object_print = vvp_object_print, }; -struct ccc_object *cl_inode2ccc(struct inode *inode) +struct vvp_object *cl_inode2vvp(struct inode *inode) { - struct cl_inode_info *lli = cl_i2info(inode); + struct ll_inode_info *lli = ll_i2info(inode); struct cl_object *obj = lli->lli_clob; struct lu_object *lu; lu = lu_object_locate(obj->co_lu.lo_header, &vvp_device_type); LASSERT(lu); - return lu2ccc(lu); + return lu2vvp(lu); } struct lu_object *vvp_object_alloc(const struct lu_env *env, - const struct lu_object_header *hdr, + const struct lu_object_header *unused, struct lu_device *dev) { - return ccc_object_alloc(env, hdr, dev, &vvp_ops, &vvp_lu_obj_ops); + struct vvp_object *vob; + struct lu_object *obj; + + vob = kmem_cache_zalloc(vvp_object_kmem, GFP_NOFS); + if (vob) { + struct cl_object_header *hdr; + + obj = &vob->vob_cl.co_lu; + hdr = &vob->vob_header; + cl_object_header_init(hdr); + hdr->coh_page_bufsize = cfs_size_round(sizeof(struct cl_page)); + + lu_object_init(obj, &hdr->coh_lu, dev); + lu_object_add_top(&hdr->coh_lu, obj); + + vob->vob_cl.co_ops = &vvp_ops; + obj->lo_ops = &vvp_lu_obj_ops; + } else { + obj = NULL; + } + return obj; } diff --git a/drivers/staging/lustre/lustre/llite/vvp_page.c b/drivers/staging/lustre/lustre/llite/vvp_page.c index 33ca3eb34965..2e566d90bb94 100644 --- a/drivers/staging/lustre/lustre/llite/vvp_page.c +++ b/drivers/staging/lustre/lustre/llite/vvp_page.c @@ -15,11 +15,7 @@ * * You should have received a copy of the GNU General Public License * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. + * http://www.gnu.org/licenses/gpl-2.0.html * * GPL HEADER END */ @@ -41,9 +37,16 @@ #define DEBUG_SUBSYSTEM S_LLITE -#include "../include/obd.h" +#include <linux/atomic.h> +#include <linux/bitops.h> +#include <linux/mm.h> +#include <linux/mutex.h> +#include <linux/page-flags.h> +#include <linux/pagemap.h> + #include "../include/lustre_lite.h" +#include "llite_internal.h" #include "vvp_internal.h" /***************************************************************************** @@ -52,9 +55,9 @@ * */ -static void vvp_page_fini_common(struct ccc_page *cp) +static void vvp_page_fini_common(struct vvp_page *vpg) { - struct page *vmpage = cp->cpg_page; + struct page *vmpage = vpg->vpg_page; LASSERT(vmpage); put_page(vmpage); @@ -63,23 +66,23 @@ static void vvp_page_fini_common(struct ccc_page *cp) static void vvp_page_fini(const struct lu_env *env, struct cl_page_slice *slice) { - struct ccc_page *cp = cl2ccc_page(slice); - struct page *vmpage = cp->cpg_page; + struct vvp_page *vpg = cl2vvp_page(slice); + struct page *vmpage = vpg->vpg_page; /* * vmpage->private was already cleared when page was moved into * VPG_FREEING state. */ LASSERT((struct cl_page *)vmpage->private != slice->cpl_page); - vvp_page_fini_common(cp); + vvp_page_fini_common(vpg); } static int vvp_page_own(const struct lu_env *env, const struct cl_page_slice *slice, struct cl_io *io, int nonblock) { - struct ccc_page *vpg = cl2ccc_page(slice); - struct page *vmpage = vpg->cpg_page; + struct vvp_page *vpg = cl2vvp_page(slice); + struct page *vmpage = vpg->vpg_page; LASSERT(vmpage); if (nonblock) { @@ -96,6 +99,7 @@ static int vvp_page_own(const struct lu_env *env, lock_page(vmpage); wait_on_page_writeback(vmpage); + return 0; } @@ -136,41 +140,15 @@ static void vvp_page_discard(const struct lu_env *env, struct cl_io *unused) { struct page *vmpage = cl2vm_page(slice); - struct address_space *mapping; - struct ccc_page *cpg = cl2ccc_page(slice); + struct vvp_page *vpg = cl2vvp_page(slice); LASSERT(vmpage); LASSERT(PageLocked(vmpage)); - mapping = vmpage->mapping; + if (vpg->vpg_defer_uptodate && !vpg->vpg_ra_used) + ll_ra_stats_inc(vmpage->mapping->host, RA_STAT_DISCARDED); - if (cpg->cpg_defer_uptodate && !cpg->cpg_ra_used) - ll_ra_stats_inc(mapping, RA_STAT_DISCARDED); - - /* - * truncate_complete_page() calls - * a_ops->invalidatepage()->cl_page_delete()->vvp_page_delete(). - */ - truncate_complete_page(mapping, vmpage); -} - -static int vvp_page_unmap(const struct lu_env *env, - const struct cl_page_slice *slice, - struct cl_io *unused) -{ - struct page *vmpage = cl2vm_page(slice); - __u64 offset; - - LASSERT(vmpage); - LASSERT(PageLocked(vmpage)); - - offset = vmpage->index << PAGE_SHIFT; - - /* - * XXX is it safe to call this with the page lock held? - */ - ll_teardown_mmaps(vmpage->mapping, offset, offset + PAGE_SIZE); - return 0; + ll_invalidate_page(vmpage); } static void vvp_page_delete(const struct lu_env *env, @@ -179,12 +157,20 @@ static void vvp_page_delete(const struct lu_env *env, struct page *vmpage = cl2vm_page(slice); struct inode *inode = vmpage->mapping->host; struct cl_object *obj = slice->cpl_obj; + struct cl_page *page = slice->cpl_page; + int refc; LASSERT(PageLocked(vmpage)); - LASSERT((struct cl_page *)vmpage->private == slice->cpl_page); - LASSERT(inode == ccc_object_inode(obj)); + LASSERT((struct cl_page *)vmpage->private == page); + LASSERT(inode == vvp_object_inode(obj)); + + vvp_write_complete(cl2vvp(obj), cl2vvp_page(slice)); + + /* Drop the reference count held in vvp_page_init */ + refc = atomic_dec_return(&page->cp_ref); + LASSERTF(refc >= 1, "page = %p, refc = %d\n", page, refc); - vvp_write_complete(cl2ccc(obj), cl2ccc_page(slice)); + ClearPageUptodate(vmpage); ClearPagePrivate(vmpage); vmpage->private = 0; /* @@ -237,7 +223,7 @@ static int vvp_page_prep_write(const struct lu_env *env, if (!pg->cp_sync_io) set_page_writeback(vmpage); - vvp_write_pending(cl2ccc(slice->cpl_obj), cl2ccc_page(slice)); + vvp_write_pending(cl2vvp(slice->cpl_obj), cl2vvp_page(slice)); return 0; } @@ -250,11 +236,11 @@ static int vvp_page_prep_write(const struct lu_env *env, */ static void vvp_vmpage_error(struct inode *inode, struct page *vmpage, int ioret) { - struct ccc_object *obj = cl_inode2ccc(inode); + struct vvp_object *obj = cl_inode2vvp(inode); if (ioret == 0) { ClearPageError(vmpage); - obj->cob_discard_page_warned = 0; + obj->vob_discard_page_warned = 0; } else { SetPageError(vmpage); if (ioret == -ENOSPC) @@ -263,8 +249,8 @@ static void vvp_vmpage_error(struct inode *inode, struct page *vmpage, int ioret set_bit(AS_EIO, &inode->i_mapping->flags); if ((ioret == -ESHUTDOWN || ioret == -EINTR) && - obj->cob_discard_page_warned == 0) { - obj->cob_discard_page_warned = 1; + obj->vob_discard_page_warned == 0) { + obj->vob_discard_page_warned = 1; ll_dirty_page_discard_warn(vmpage, ioret); } } @@ -274,22 +260,23 @@ static void vvp_page_completion_read(const struct lu_env *env, const struct cl_page_slice *slice, int ioret) { - struct ccc_page *cp = cl2ccc_page(slice); - struct page *vmpage = cp->cpg_page; - struct cl_page *page = cl_page_top(slice->cpl_page); - struct inode *inode = ccc_object_inode(page->cp_obj); + struct vvp_page *vpg = cl2vvp_page(slice); + struct page *vmpage = vpg->vpg_page; + struct cl_page *page = slice->cpl_page; + struct inode *inode = vvp_object_inode(page->cp_obj); LASSERT(PageLocked(vmpage)); CL_PAGE_HEADER(D_PAGE, env, page, "completing READ with %d\n", ioret); - if (cp->cpg_defer_uptodate) + if (vpg->vpg_defer_uptodate) ll_ra_count_put(ll_i2sbi(inode), 1); if (ioret == 0) { - if (!cp->cpg_defer_uptodate) + if (!vpg->vpg_defer_uptodate) cl_page_export(env, page, 1); - } else - cp->cpg_defer_uptodate = 0; + } else { + vpg->vpg_defer_uptodate = 0; + } if (!page->cp_sync_io) unlock_page(vmpage); @@ -299,9 +286,9 @@ static void vvp_page_completion_write(const struct lu_env *env, const struct cl_page_slice *slice, int ioret) { - struct ccc_page *cp = cl2ccc_page(slice); + struct vvp_page *vpg = cl2vvp_page(slice); struct cl_page *pg = slice->cpl_page; - struct page *vmpage = cp->cpg_page; + struct page *vmpage = vpg->vpg_page; CL_PAGE_HEADER(D_PAGE, env, pg, "completing WRITE with %d\n", ioret); @@ -315,8 +302,8 @@ static void vvp_page_completion_write(const struct lu_env *env, * and then re-add the page into pending transfer queue. -jay */ - cp->cpg_write_queued = 0; - vvp_write_complete(cl2ccc(slice->cpl_obj), cp); + vpg->vpg_write_queued = 0; + vvp_write_complete(cl2vvp(slice->cpl_obj), vpg); if (pg->cp_sync_io) { LASSERT(PageLocked(vmpage)); @@ -327,7 +314,7 @@ static void vvp_page_completion_write(const struct lu_env *env, * Only mark the page error only when it's an async write * because applications won't wait for IO to finish. */ - vvp_vmpage_error(ccc_object_inode(pg->cp_obj), vmpage, ioret); + vvp_vmpage_error(vvp_object_inode(pg->cp_obj), vmpage, ioret); end_page_writeback(vmpage); } @@ -359,7 +346,7 @@ static int vvp_page_make_ready(const struct lu_env *env, LASSERT(pg->cp_state == CPS_CACHED); /* This actually clears the dirty bit in the radix tree. */ set_page_writeback(vmpage); - vvp_write_pending(cl2ccc(slice->cpl_obj), cl2ccc_page(slice)); + vvp_write_pending(cl2vvp(slice->cpl_obj), cl2vvp_page(slice)); CL_PAGE_HEADER(D_PAGE, env, pg, "readied\n"); } else if (pg->cp_state == CPS_PAGEOUT) { /* is it possible for osc_flush_async_page() to already @@ -375,24 +362,51 @@ static int vvp_page_make_ready(const struct lu_env *env, return result; } +static int vvp_page_is_under_lock(const struct lu_env *env, + const struct cl_page_slice *slice, + struct cl_io *io, pgoff_t *max_index) +{ + if (io->ci_type == CIT_READ || io->ci_type == CIT_WRITE || + io->ci_type == CIT_FAULT) { + struct vvp_io *vio = vvp_env_io(env); + + if (unlikely(vio->vui_fd->fd_flags & LL_FILE_GROUP_LOCKED)) + *max_index = CL_PAGE_EOF; + } + return 0; +} + static int vvp_page_print(const struct lu_env *env, const struct cl_page_slice *slice, void *cookie, lu_printer_t printer) { - struct ccc_page *vp = cl2ccc_page(slice); - struct page *vmpage = vp->cpg_page; + struct vvp_page *vpg = cl2vvp_page(slice); + struct page *vmpage = vpg->vpg_page; (*printer)(env, cookie, LUSTRE_VVP_NAME "-page@%p(%d:%d:%d) vm@%p ", - vp, vp->cpg_defer_uptodate, vp->cpg_ra_used, - vp->cpg_write_queued, vmpage); + vpg, vpg->vpg_defer_uptodate, vpg->vpg_ra_used, + vpg->vpg_write_queued, vmpage); if (vmpage) { (*printer)(env, cookie, "%lx %d:%d %lx %lu %slru", (long)vmpage->flags, page_count(vmpage), page_mapcount(vmpage), vmpage->private, - page_index(vmpage), + vmpage->index, list_empty(&vmpage->lru) ? "not-" : ""); } + (*printer)(env, cookie, "\n"); + + return 0; +} + +static int vvp_page_fail(const struct lu_env *env, + const struct cl_page_slice *slice) +{ + /* + * Cached read? + */ + LBUG(); + return 0; } @@ -401,32 +415,38 @@ static const struct cl_page_operations vvp_page_ops = { .cpo_assume = vvp_page_assume, .cpo_unassume = vvp_page_unassume, .cpo_disown = vvp_page_disown, - .cpo_vmpage = ccc_page_vmpage, .cpo_discard = vvp_page_discard, .cpo_delete = vvp_page_delete, - .cpo_unmap = vvp_page_unmap, .cpo_export = vvp_page_export, .cpo_is_vmlocked = vvp_page_is_vmlocked, .cpo_fini = vvp_page_fini, .cpo_print = vvp_page_print, - .cpo_is_under_lock = ccc_page_is_under_lock, + .cpo_is_under_lock = vvp_page_is_under_lock, .io = { [CRT_READ] = { .cpo_prep = vvp_page_prep_read, .cpo_completion = vvp_page_completion_read, - .cpo_make_ready = ccc_fail, + .cpo_make_ready = vvp_page_fail, }, [CRT_WRITE] = { .cpo_prep = vvp_page_prep_write, .cpo_completion = vvp_page_completion_write, .cpo_make_ready = vvp_page_make_ready, - } - } + }, + }, }; +static int vvp_transient_page_prep(const struct lu_env *env, + const struct cl_page_slice *slice, + struct cl_io *unused) +{ + /* transient page should always be sent. */ + return 0; +} + static void vvp_transient_page_verify(const struct cl_page *page) { - struct inode *inode = ccc_object_inode(page->cp_obj); + struct inode *inode = vvp_object_inode(page->cp_obj); LASSERT(!inode_trylock(inode)); } @@ -477,7 +497,7 @@ static void vvp_transient_page_discard(const struct lu_env *env, static int vvp_transient_page_is_vmlocked(const struct lu_env *env, const struct cl_page_slice *slice) { - struct inode *inode = ccc_object_inode(slice->cpl_obj); + struct inode *inode = vvp_object_inode(slice->cpl_obj); int locked; locked = !inode_trylock(inode); @@ -497,13 +517,13 @@ vvp_transient_page_completion(const struct lu_env *env, static void vvp_transient_page_fini(const struct lu_env *env, struct cl_page_slice *slice) { - struct ccc_page *cp = cl2ccc_page(slice); + struct vvp_page *vpg = cl2vvp_page(slice); struct cl_page *clp = slice->cpl_page; - struct ccc_object *clobj = cl2ccc(clp->cp_obj); + struct vvp_object *clobj = cl2vvp(clp->cp_obj); - vvp_page_fini_common(cp); - LASSERT(!inode_trylock(clobj->cob_inode)); - clobj->cob_transient_pages--; + vvp_page_fini_common(vpg); + LASSERT(!inode_trylock(clobj->vob_inode)); + clobj->vob_transient_pages--; } static const struct cl_page_operations vvp_transient_page_ops = { @@ -512,45 +532,48 @@ static const struct cl_page_operations vvp_transient_page_ops = { .cpo_unassume = vvp_transient_page_unassume, .cpo_disown = vvp_transient_page_disown, .cpo_discard = vvp_transient_page_discard, - .cpo_vmpage = ccc_page_vmpage, .cpo_fini = vvp_transient_page_fini, .cpo_is_vmlocked = vvp_transient_page_is_vmlocked, .cpo_print = vvp_page_print, - .cpo_is_under_lock = ccc_page_is_under_lock, + .cpo_is_under_lock = vvp_page_is_under_lock, .io = { [CRT_READ] = { - .cpo_prep = ccc_transient_page_prep, + .cpo_prep = vvp_transient_page_prep, .cpo_completion = vvp_transient_page_completion, }, [CRT_WRITE] = { - .cpo_prep = ccc_transient_page_prep, + .cpo_prep = vvp_transient_page_prep, .cpo_completion = vvp_transient_page_completion, } } }; int vvp_page_init(const struct lu_env *env, struct cl_object *obj, - struct cl_page *page, struct page *vmpage) + struct cl_page *page, pgoff_t index) { - struct ccc_page *cpg = cl_object_page_slice(obj, page); + struct vvp_page *vpg = cl_object_page_slice(obj, page); + struct page *vmpage = page->cp_vmpage; - CLOBINVRNT(env, obj, ccc_object_invariant(obj)); + CLOBINVRNT(env, obj, vvp_object_invariant(obj)); - cpg->cpg_page = vmpage; + vpg->vpg_page = vmpage; get_page(vmpage); - INIT_LIST_HEAD(&cpg->cpg_pending_linkage); + INIT_LIST_HEAD(&vpg->vpg_pending_linkage); if (page->cp_type == CPT_CACHEABLE) { + /* in cache, decref in vvp_page_delete */ + atomic_inc(&page->cp_ref); SetPagePrivate(vmpage); vmpage->private = (unsigned long)page; - cl_page_slice_add(page, &cpg->cpg_cl, obj, &vvp_page_ops); + cl_page_slice_add(page, &vpg->vpg_cl, obj, index, + &vvp_page_ops); } else { - struct ccc_object *clobj = cl2ccc(obj); + struct vvp_object *clobj = cl2vvp(obj); - LASSERT(!inode_trylock(clobj->cob_inode)); - cl_page_slice_add(page, &cpg->cpg_cl, obj, + LASSERT(!inode_trylock(clobj->vob_inode)); + cl_page_slice_add(page, &vpg->vpg_cl, obj, index, &vvp_transient_page_ops); - clobj->cob_transient_pages++; + clobj->vob_transient_pages++; } return 0; } diff --git a/drivers/staging/lustre/lustre/llite/vvp_req.c b/drivers/staging/lustre/lustre/llite/vvp_req.c new file mode 100644 index 000000000000..9fe9d6c0a7d1 --- /dev/null +++ b/drivers/staging/lustre/lustre/llite/vvp_req.c @@ -0,0 +1,121 @@ +/* + * GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see + * http://www.gnu.org/licenses/gpl-2.0.html + * + * GPL HEADER END + */ +/* + * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. + * Use is subject to license terms. + * + * Copyright (c) 2011, 2014, Intel Corporation. + */ + +#define DEBUG_SUBSYSTEM S_LLITE + +#include "../include/lustre/lustre_idl.h" +#include "../include/cl_object.h" +#include "../include/obd.h" +#include "../include/obd_support.h" +#include "../include/lustre_lite.h" +#include "llite_internal.h" +#include "vvp_internal.h" + +static inline struct vvp_req *cl2vvp_req(const struct cl_req_slice *slice) +{ + return container_of0(slice, struct vvp_req, vrq_cl); +} + +/** + * Implementation of struct cl_req_operations::cro_attr_set() for VVP + * layer. VVP is responsible for + * + * - o_[mac]time + * + * - o_mode + * + * - o_parent_seq + * + * - o_[ug]id + * + * - o_parent_oid + * + * - o_parent_ver + * + * - o_ioepoch, + * + */ +static void vvp_req_attr_set(const struct lu_env *env, + const struct cl_req_slice *slice, + const struct cl_object *obj, + struct cl_req_attr *attr, u64 flags) +{ + struct inode *inode; + struct obdo *oa; + u32 valid_flags; + + oa = attr->cra_oa; + inode = vvp_object_inode(obj); + valid_flags = OBD_MD_FLTYPE; + + if (slice->crs_req->crq_type == CRT_WRITE) { + if (flags & OBD_MD_FLEPOCH) { + oa->o_valid |= OBD_MD_FLEPOCH; + oa->o_ioepoch = ll_i2info(inode)->lli_ioepoch; + valid_flags |= OBD_MD_FLMTIME | OBD_MD_FLCTIME | + OBD_MD_FLUID | OBD_MD_FLGID; + } + } + obdo_from_inode(oa, inode, valid_flags & flags); + obdo_set_parent_fid(oa, &ll_i2info(inode)->lli_fid); + memcpy(attr->cra_jobid, ll_i2info(inode)->lli_jobid, + JOBSTATS_JOBID_SIZE); +} + +static void vvp_req_completion(const struct lu_env *env, + const struct cl_req_slice *slice, int ioret) +{ + struct vvp_req *vrq; + + if (ioret > 0) + cl_stats_tally(slice->crs_dev, slice->crs_req->crq_type, ioret); + + vrq = cl2vvp_req(slice); + kmem_cache_free(vvp_req_kmem, vrq); +} + +static const struct cl_req_operations vvp_req_ops = { + .cro_attr_set = vvp_req_attr_set, + .cro_completion = vvp_req_completion +}; + +int vvp_req_init(const struct lu_env *env, struct cl_device *dev, + struct cl_req *req) +{ + struct vvp_req *vrq; + int result; + + vrq = kmem_cache_zalloc(vvp_req_kmem, GFP_NOFS); + if (vrq) { + cl_req_slice_add(req, &vrq->vrq_cl, dev, &vvp_req_ops); + result = 0; + } else { + result = -ENOMEM; + } + return result; +} diff --git a/drivers/staging/lustre/lustre/llite/xattr.c b/drivers/staging/lustre/lustre/llite/xattr.c index b68dcc921ca2..98303cf85815 100644 --- a/drivers/staging/lustre/lustre/llite/xattr.c +++ b/drivers/staging/lustre/lustre/llite/xattr.c @@ -15,11 +15,7 @@ * * You should have received a copy of the GNU General Public License * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. + * http://www.gnu.org/licenses/gpl-2.0.html * * GPL HEADER END */ @@ -111,11 +107,6 @@ int ll_setxattr_common(struct inode *inode, const char *name, struct ll_sb_info *sbi = ll_i2sbi(inode); struct ptlrpc_request *req = NULL; int xattr_type, rc; -#ifdef CONFIG_FS_POSIX_ACL - struct rmtacl_ctl_entry *rce = NULL; - posix_acl_xattr_header *new_value = NULL; - ext_acl_xattr_header *acl = NULL; -#endif const char *pv = value; xattr_type = get_xattr_type(name); @@ -143,61 +134,9 @@ int ll_setxattr_common(struct inode *inode, const char *name, strcmp(name, "security.selinux") == 0) return -EOPNOTSUPP; -#ifdef CONFIG_FS_POSIX_ACL - if (sbi->ll_flags & LL_SBI_RMT_CLIENT && - (xattr_type == XATTR_ACL_ACCESS_T || - xattr_type == XATTR_ACL_DEFAULT_T)) { - rce = rct_search(&sbi->ll_rct, current_pid()); - if (!rce || - (rce->rce_ops != RMT_LSETFACL && - rce->rce_ops != RMT_RSETFACL)) - return -EOPNOTSUPP; - - if (rce->rce_ops == RMT_LSETFACL) { - struct eacl_entry *ee; - - ee = et_search_del(&sbi->ll_et, current_pid(), - ll_inode2fid(inode), xattr_type); - if (valid & OBD_MD_FLXATTR) { - acl = lustre_acl_xattr_merge2ext( - (posix_acl_xattr_header *)value, - size, ee->ee_acl); - if (IS_ERR(acl)) { - ee_free(ee); - return PTR_ERR(acl); - } - size = CFS_ACL_XATTR_SIZE(\ - le32_to_cpu(acl->a_count), \ - ext_acl_xattr); - pv = (const char *)acl; - } - ee_free(ee); - } else if (rce->rce_ops == RMT_RSETFACL) { - rc = lustre_posix_acl_xattr_filter( - (posix_acl_xattr_header *)value, - size, &new_value); - if (unlikely(rc < 0)) - return rc; - size = rc; - - pv = (const char *)new_value; - } else - return -EOPNOTSUPP; - - valid |= rce_ops2valid(rce->rce_ops); - } -#endif rc = md_setxattr(sbi->ll_md_exp, ll_inode2fid(inode), valid, name, pv, size, 0, flags, ll_i2suppgid(inode), &req); -#ifdef CONFIG_FS_POSIX_ACL - /* - * Release the posix ACL space. - */ - kfree(new_value); - if (acl) - lustre_ext_acl_xattr_free(acl); -#endif if (rc) { if (rc == -EOPNOTSUPP && xattr_type == XATTR_USER_T) { LCONSOLE_INFO("Disabling user_xattr feature because it is not supported on the server\n"); @@ -210,16 +149,14 @@ int ll_setxattr_common(struct inode *inode, const char *name, return 0; } -int ll_setxattr(struct dentry *dentry, const char *name, - const void *value, size_t size, int flags) +int ll_setxattr(struct dentry *dentry, struct inode *inode, + const char *name, const void *value, size_t size, int flags) { - struct inode *inode = d_inode(dentry); - LASSERT(inode); LASSERT(name); - CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), xattr %s\n", - inode->i_ino, inode->i_generation, inode, name); + CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p), xattr %s\n", + PFID(ll_inode2fid(inode)), inode, name); ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_SETXATTR, 1); @@ -243,12 +180,12 @@ int ll_setxattr(struct dentry *dentry, const char *name, lump->lmm_stripe_offset = -1; if (lump && S_ISREG(inode->i_mode)) { - int flags = FMODE_WRITE; + __u64 it_flags = FMODE_WRITE; int lum_size = (lump->lmm_magic == LOV_USER_MAGIC_V1) ? sizeof(*lump) : sizeof(struct lov_user_md_v3); - rc = ll_lov_setstripe_ea_info(inode, dentry, flags, lump, - lum_size); + rc = ll_lov_setstripe_ea_info(inode, dentry, it_flags, + lump, lum_size); /* b10667: rc always be 0 here for now */ rc = 0; } else if (S_ISDIR(inode->i_mode)) { @@ -272,8 +209,8 @@ int ll_removexattr(struct dentry *dentry, const char *name) LASSERT(inode); LASSERT(name); - CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), xattr %s\n", - inode->i_ino, inode->i_generation, inode, name); + CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p), xattr %s\n", + PFID(ll_inode2fid(inode)), inode, name); ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_REMOVEXATTR, 1); return ll_setxattr_common(inode, name, NULL, 0, 0, @@ -289,11 +226,10 @@ int ll_getxattr_common(struct inode *inode, const char *name, struct mdt_body *body; int xattr_type, rc; void *xdata; - struct rmtacl_ctl_entry *rce = NULL; struct ll_inode_info *lli = ll_i2info(inode); - CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n", - inode->i_ino, inode->i_generation, inode); + CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p)\n", + PFID(ll_inode2fid(inode)), inode); /* listxattr have slightly different behavior from of ext3: * without 'user_xattr' ext3 will list all xattr names but @@ -320,25 +256,11 @@ int ll_getxattr_common(struct inode *inode, const char *name, return -EOPNOTSUPP; #ifdef CONFIG_FS_POSIX_ACL - if (sbi->ll_flags & LL_SBI_RMT_CLIENT && - (xattr_type == XATTR_ACL_ACCESS_T || - xattr_type == XATTR_ACL_DEFAULT_T)) { - rce = rct_search(&sbi->ll_rct, current_pid()); - if (!rce || - (rce->rce_ops != RMT_LSETFACL && - rce->rce_ops != RMT_LGETFACL && - rce->rce_ops != RMT_RSETFACL && - rce->rce_ops != RMT_RGETFACL)) - return -EOPNOTSUPP; - } - /* posix acl is under protection of LOOKUP lock. when calling to this, * we just have path resolution to the target inode, so we have great * chance that cached ACL is uptodate. */ - if (xattr_type == XATTR_ACL_ACCESS_T && - !(sbi->ll_flags & LL_SBI_RMT_CLIENT)) { - + if (xattr_type == XATTR_ACL_ACCESS_T) { struct posix_acl *acl; spin_lock(&lli->lli_lock); @@ -380,9 +302,7 @@ do_getxattr: } else { getxattr_nocache: rc = md_getxattr(sbi->ll_md_exp, ll_inode2fid(inode), - valid | (rce ? rce_ops2valid(rce->rce_ops) : 0), - name, NULL, 0, size, 0, &req); - + valid, name, NULL, 0, size, 0, &req); if (rc < 0) goto out_xattr; @@ -419,26 +339,6 @@ getxattr_nocache: rc = body->eadatasize; } -#ifdef CONFIG_FS_POSIX_ACL - if (rce && rce->rce_ops == RMT_LSETFACL) { - ext_acl_xattr_header *acl; - - acl = lustre_posix_acl_xattr_2ext( - (posix_acl_xattr_header *)buffer, rc); - if (IS_ERR(acl)) { - rc = PTR_ERR(acl); - goto out; - } - - rc = ee_add(&sbi->ll_et, current_pid(), ll_inode2fid(inode), - xattr_type, acl); - if (unlikely(rc < 0)) { - lustre_ext_acl_xattr_free(acl); - goto out; - } - } -#endif - out_xattr: if (rc == -EOPNOTSUPP && xattr_type == XATTR_USER_T) { LCONSOLE_INFO( @@ -451,16 +351,14 @@ out: return rc; } -ssize_t ll_getxattr(struct dentry *dentry, const char *name, - void *buffer, size_t size) +ssize_t ll_getxattr(struct dentry *dentry, struct inode *inode, + const char *name, void *buffer, size_t size) { - struct inode *inode = d_inode(dentry); - LASSERT(inode); LASSERT(name); - CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), xattr %s\n", - inode->i_ino, inode->i_generation, inode, name); + CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p), xattr %s\n", + PFID(ll_inode2fid(inode)), inode, name); ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_GETXATTR, 1); @@ -554,8 +452,8 @@ ssize_t ll_listxattr(struct dentry *dentry, char *buffer, size_t size) LASSERT(inode); - CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n", - inode->i_ino, inode->i_generation, inode); + CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p)\n", + PFID(ll_inode2fid(inode)), inode); ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_LISTXATTR, 1); diff --git a/drivers/staging/lustre/lustre/llite/xattr_cache.c b/drivers/staging/lustre/lustre/llite/xattr_cache.c index 3480ce2bb3cc..8089da8143d9 100644 --- a/drivers/staging/lustre/lustre/llite/xattr_cache.c +++ b/drivers/staging/lustre/lustre/llite/xattr_cache.c @@ -229,7 +229,6 @@ static int ll_xattr_cache_valid(struct ll_inode_info *lli) */ static int ll_xattr_cache_destroy_locked(struct ll_inode_info *lli) { - if (!ll_xattr_cache_valid(lli)) return 0; @@ -289,8 +288,8 @@ static int ll_xattr_find_get_lock(struct inode *inode, LCK_PR); if (mode != 0) { /* fake oit in mdc_revalidate_lock() manner */ - oit->d.lustre.it_lock_handle = lockh.cookie; - oit->d.lustre.it_lock_mode = mode; + oit->it_lock_handle = lockh.cookie; + oit->it_lock_mode = mode; goto out; } } @@ -316,7 +315,7 @@ static int ll_xattr_find_get_lock(struct inode *inode, return rc; } - *req = (struct ptlrpc_request *)oit->d.lustre.it_data; + *req = oit->it_request; out: down_write(&lli->lli_xattrs_list_rwsem); mutex_unlock(&lli->lli_xattrs_enq_lock); @@ -363,10 +362,10 @@ static int ll_xattr_cache_refill(struct inode *inode, struct lookup_intent *oit) goto out_maybe_drop; } - if (oit->d.lustre.it_status < 0) { + if (oit->it_status < 0) { CDEBUG(D_CACHE, "getxattr intent returned %d for fid "DFID"\n", - oit->d.lustre.it_status, PFID(ll_inode2fid(inode))); - rc = oit->d.lustre.it_status; + oit->it_status, PFID(ll_inode2fid(inode))); + rc = oit->it_status; /* xattr data is so large that we don't want to cache it */ if (rc == -ERANGE) rc = -EAGAIN; @@ -449,8 +448,8 @@ out_destroy: up_write(&lli->lli_xattrs_list_rwsem); ldlm_lock_decref_and_cancel((struct lustre_handle *) - &oit->d.lustre.it_lock_handle, - oit->d.lustre.it_lock_mode); + &oit->it_lock_handle, + oit->it_lock_mode); goto out_no_unlock; } diff --git a/drivers/staging/lustre/lustre/lmv/lmv_fld.c b/drivers/staging/lustre/lustre/lmv/lmv_fld.c index 378691b2a062..a3d170aa6fd2 100644 --- a/drivers/staging/lustre/lustre/lmv/lmv_fld.c +++ b/drivers/staging/lustre/lustre/lmv/lmv_fld.c @@ -15,11 +15,7 @@ * * You should have received a copy of the GNU General Public License * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. + * http://www.gnu.org/licenses/gpl-2.0.html * * GPL HEADER END */ diff --git a/drivers/staging/lustre/lustre/lmv/lmv_intent.c b/drivers/staging/lustre/lustre/lmv/lmv_intent.c index e0958eaed054..2f58fdab8d1e 100644 --- a/drivers/staging/lustre/lustre/lmv/lmv_intent.c +++ b/drivers/staging/lustre/lustre/lmv/lmv_intent.c @@ -15,11 +15,7 @@ * * You should have received a copy of the GNU General Public License * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. + * http://www.gnu.org/licenses/gpl-2.0.html * * GPL HEADER END */ @@ -84,11 +80,11 @@ static int lmv_intent_remote(struct obd_export *exp, void *lmm, /* * We got LOOKUP lock, but we really need attrs. */ - pmode = it->d.lustre.it_lock_mode; + pmode = it->it_lock_mode; if (pmode) { - plock.cookie = it->d.lustre.it_lock_handle; - it->d.lustre.it_lock_mode = 0; - it->d.lustre.it_data = NULL; + plock.cookie = it->it_lock_handle; + it->it_lock_mode = 0; + it->it_request = NULL; } LASSERT(fid_is_sane(&body->fid1)); @@ -134,14 +130,14 @@ static int lmv_intent_remote(struct obd_export *exp, void *lmm, * maintain dcache consistency. Thus drop UPDATE|PERM lock here * and put LOOKUP in request. */ - if (it->d.lustre.it_lock_mode != 0) { - it->d.lustre.it_remote_lock_handle = - it->d.lustre.it_lock_handle; - it->d.lustre.it_remote_lock_mode = it->d.lustre.it_lock_mode; + if (it->it_lock_mode != 0) { + it->it_remote_lock_handle = + it->it_lock_handle; + it->it_remote_lock_mode = it->it_lock_mode; } - it->d.lustre.it_lock_handle = plock.cookie; - it->d.lustre.it_lock_mode = pmode; + it->it_lock_handle = plock.cookie; + it->it_lock_mode = pmode; out_free_op_data: kfree(op_data); @@ -201,9 +197,9 @@ static int lmv_intent_open(struct obd_export *exp, struct md_op_data *op_data, * Nothing is found, do not access body->fid1 as it is zero and thus * pointless. */ - if ((it->d.lustre.it_disposition & DISP_LOOKUP_NEG) && - !(it->d.lustre.it_disposition & DISP_OPEN_CREATE) && - !(it->d.lustre.it_disposition & DISP_OPEN_OPEN)) + if ((it->it_disposition & DISP_LOOKUP_NEG) && + !(it->it_disposition & DISP_OPEN_CREATE) && + !(it->it_disposition & DISP_OPEN_OPEN)) return rc; body = req_capsule_server_get(&(*reqp)->rq_pill, &RMF_MDT_BODY); diff --git a/drivers/staging/lustre/lustre/lmv/lmv_internal.h b/drivers/staging/lustre/lustre/lmv/lmv_internal.h index 8a0087190e23..0beafc49b8d2 100644 --- a/drivers/staging/lustre/lustre/lmv/lmv_internal.h +++ b/drivers/staging/lustre/lustre/lmv/lmv_internal.h @@ -15,11 +15,7 @@ * * You should have received a copy of the GNU General Public License * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. + * http://www.gnu.org/licenses/gpl-2.0.html * * GPL HEADER END */ @@ -42,9 +38,6 @@ #define LMV_MAX_TGT_COUNT 128 -#define lmv_init_lock(lmv) mutex_lock(&lmv->init_mutex) -#define lmv_init_unlock(lmv) mutex_unlock(&lmv->init_mutex) - #define LL_IT2STR(it) \ ((it) ? ldlm_it2str((it)->it_op) : "0") diff --git a/drivers/staging/lustre/lustre/lmv/lmv_obd.c b/drivers/staging/lustre/lustre/lmv/lmv_obd.c index 9abb7c2b9231..0e1588a43187 100644 --- a/drivers/staging/lustre/lustre/lmv/lmv_obd.c +++ b/drivers/staging/lustre/lustre/lmv/lmv_obd.c @@ -15,11 +15,7 @@ * * You should have received a copy of the GNU General Public License * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. + * http://www.gnu.org/licenses/gpl-2.0.html * * GPL HEADER END */ @@ -132,8 +128,9 @@ static int lmv_set_mdc_active(struct lmv_obd *lmv, struct obd_uuid *uuid, static struct obd_uuid *lmv_get_uuid(struct obd_export *exp) { struct lmv_obd *lmv = &exp->exp_obd->u.lmv; + struct lmv_tgt_desc *tgt = lmv->tgts[0]; - return obd_get_uuid(lmv->tgts[0]->ltd_exp); + return tgt ? obd_get_uuid(tgt->ltd_exp) : NULL; } static int lmv_notify(struct obd_device *obd, struct obd_device *watched, @@ -249,7 +246,6 @@ static int lmv_connect(const struct lu_env *env, static void lmv_set_timeouts(struct obd_device *obd) { - struct lmv_tgt_desc *tgt; struct lmv_obd *lmv; int i; @@ -261,8 +257,10 @@ static void lmv_set_timeouts(struct obd_device *obd) return; for (i = 0; i < lmv->desc.ld_tgt_count; i++) { + struct lmv_tgt_desc *tgt = lmv->tgts[i]; + tgt = lmv->tgts[i]; - if (!tgt || !tgt->ltd_exp || tgt->ltd_active == 0) + if (!tgt || !tgt->ltd_exp || !tgt->ltd_active) continue; obd_set_info_async(NULL, tgt->ltd_exp, sizeof(KEY_INTERMDS), @@ -302,13 +300,14 @@ static int lmv_init_ea_size(struct obd_export *exp, int easize, return 0; for (i = 0; i < lmv->desc.ld_tgt_count; i++) { - if (!lmv->tgts[i] || !lmv->tgts[i]->ltd_exp || - lmv->tgts[i]->ltd_active == 0) { + struct lmv_tgt_desc *tgt = lmv->tgts[i]; + + if (!tgt || !tgt->ltd_exp || !tgt->ltd_active) { CWARN("%s: NULL export for %d\n", obd->obd_name, i); continue; } - rc = md_init_ea_size(lmv->tgts[i]->ltd_exp, easize, def_easize, + rc = md_init_ea_size(tgt->ltd_exp, easize, def_easize, cookiesize, def_cookiesize); if (rc) { CERROR("%s: obd_init_ea_size() failed on MDT target %d: rc = %d\n", @@ -425,7 +424,7 @@ static int lmv_add_target(struct obd_device *obd, struct obd_uuid *uuidp, CDEBUG(D_CONFIG, "Target uuid: %s. index %d\n", uuidp->uuid, index); - lmv_init_lock(lmv); + mutex_lock(&lmv->lmv_init_mutex); if (lmv->desc.ld_tgt_count == 0) { struct obd_device *mdc_obd; @@ -433,7 +432,7 @@ static int lmv_add_target(struct obd_device *obd, struct obd_uuid *uuidp, mdc_obd = class_find_client_obd(uuidp, LUSTRE_MDC_NAME, &obd->obd_uuid); if (!mdc_obd) { - lmv_init_unlock(lmv); + mutex_unlock(&lmv->lmv_init_mutex); CERROR("%s: Target %s not attached: rc = %d\n", obd->obd_name, uuidp->uuid, -EINVAL); return -EINVAL; @@ -445,7 +444,7 @@ static int lmv_add_target(struct obd_device *obd, struct obd_uuid *uuidp, CERROR("%s: UUID %s already assigned at LOV target index %d: rc = %d\n", obd->obd_name, obd_uuid2str(&tgt->ltd_uuid), index, -EEXIST); - lmv_init_unlock(lmv); + mutex_unlock(&lmv->lmv_init_mutex); return -EEXIST; } @@ -459,7 +458,7 @@ static int lmv_add_target(struct obd_device *obd, struct obd_uuid *uuidp, newsize <<= 1; newtgts = kcalloc(newsize, sizeof(*newtgts), GFP_NOFS); if (!newtgts) { - lmv_init_unlock(lmv); + mutex_unlock(&lmv->lmv_init_mutex); return -ENOMEM; } @@ -481,7 +480,7 @@ static int lmv_add_target(struct obd_device *obd, struct obd_uuid *uuidp, tgt = kzalloc(sizeof(*tgt), GFP_NOFS); if (!tgt) { - lmv_init_unlock(lmv); + mutex_unlock(&lmv->lmv_init_mutex); return -ENOMEM; } @@ -507,7 +506,7 @@ static int lmv_add_target(struct obd_device *obd, struct obd_uuid *uuidp, } } - lmv_init_unlock(lmv); + mutex_unlock(&lmv->lmv_init_mutex); return rc; } @@ -522,18 +521,27 @@ int lmv_check_connect(struct obd_device *obd) if (lmv->connected) return 0; - lmv_init_lock(lmv); + mutex_lock(&lmv->lmv_init_mutex); if (lmv->connected) { - lmv_init_unlock(lmv); + mutex_unlock(&lmv->lmv_init_mutex); return 0; } if (lmv->desc.ld_tgt_count == 0) { - lmv_init_unlock(lmv); + mutex_unlock(&lmv->lmv_init_mutex); CERROR("%s: no targets configured.\n", obd->obd_name); return -EINVAL; } + LASSERT(lmv->tgts); + + if (!lmv->tgts[0]) { + mutex_unlock(&lmv->lmv_init_mutex); + CERROR("%s: no target configured for index 0.\n", + obd->obd_name); + return -EINVAL; + } + CDEBUG(D_CONFIG, "Time to connect %s to %s\n", lmv->cluuid.uuid, obd->obd_name); @@ -551,7 +559,7 @@ int lmv_check_connect(struct obd_device *obd) lmv->connected = 1; easize = lmv_get_easize(lmv); lmv_init_ea_size(obd->obd_self_export, easize, 0, 0, 0); - lmv_init_unlock(lmv); + mutex_unlock(&lmv->lmv_init_mutex); return 0; out_disc: @@ -572,7 +580,7 @@ int lmv_check_connect(struct obd_device *obd) } } class_disconnect(lmv->exp); - lmv_init_unlock(lmv); + mutex_unlock(&lmv->lmv_init_mutex); return rc; } @@ -796,6 +804,11 @@ static int lmv_hsm_ct_unregister(struct lmv_obd *lmv, unsigned int cmd, int len, /* unregister request (call from llapi_hsm_copytool_fini) */ for (i = 0; i < lmv->desc.ld_tgt_count; i++) { + struct lmv_tgt_desc *tgt = lmv->tgts[i]; + + if (!tgt || !tgt->ltd_exp) + continue; + /* best effort: try to clean as much as possible * (continue on error) */ @@ -825,20 +838,28 @@ static int lmv_hsm_ct_register(struct lmv_obd *lmv, unsigned int cmd, int len, * except if it because of inactive target. */ for (i = 0; i < lmv->desc.ld_tgt_count; i++) { - err = obd_iocontrol(cmd, lmv->tgts[i]->ltd_exp, len, lk, uarg); + struct lmv_tgt_desc *tgt = lmv->tgts[i]; + + if (!tgt || !tgt->ltd_exp) + continue; + + err = obd_iocontrol(cmd, tgt->ltd_exp, len, lk, uarg); if (err) { - if (lmv->tgts[i]->ltd_active) { + if (tgt->ltd_active) { /* permanent error */ CERROR("error: iocontrol MDC %s on MDTidx %d cmd %x: err = %d\n", - lmv->tgts[i]->ltd_uuid.uuid, - i, cmd, err); + tgt->ltd_uuid.uuid, i, cmd, err); rc = err; lk->lk_flags |= LK_FLG_STOP; /* unregister from previous MDS */ - for (j = 0; j < i; j++) - obd_iocontrol(cmd, - lmv->tgts[j]->ltd_exp, - len, lk, uarg); + for (j = 0; j < i; j++) { + tgt = lmv->tgts[j]; + + if (!tgt || !tgt->ltd_exp) + continue; + obd_iocontrol(cmd, tgt->ltd_exp, len, + lk, uarg); + } return rc; } /* else: transient error. @@ -877,6 +898,7 @@ static int lmv_iocontrol(unsigned int cmd, struct obd_export *exp, { struct obd_device *obddev = class_exp2obd(exp); struct lmv_obd *lmv = &obddev->u.lmv; + struct lmv_tgt_desc *tgt = NULL; int i = 0; int rc = 0; int set = 0; @@ -896,10 +918,11 @@ static int lmv_iocontrol(unsigned int cmd, struct obd_export *exp, if (index >= count) return -ENODEV; - if (!lmv->tgts[index] || lmv->tgts[index]->ltd_active == 0) + tgt = lmv->tgts[index]; + if (!tgt || !tgt->ltd_active) return -ENODATA; - mdc_obd = class_exp2obd(lmv->tgts[index]->ltd_exp); + mdc_obd = class_exp2obd(tgt->ltd_exp); if (!mdc_obd) return -EINVAL; @@ -909,7 +932,7 @@ static int lmv_iocontrol(unsigned int cmd, struct obd_export *exp, (int)sizeof(struct obd_uuid)))) return -EFAULT; - rc = obd_statfs(NULL, lmv->tgts[index]->ltd_exp, &stat_buf, + rc = obd_statfs(NULL, tgt->ltd_exp, &stat_buf, cfs_time_shift_64(-OBD_STATFS_CACHE_SECONDS), 0); if (rc) @@ -922,11 +945,10 @@ static int lmv_iocontrol(unsigned int cmd, struct obd_export *exp, } case OBD_IOC_QUOTACTL: { struct if_quotactl *qctl = karg; - struct lmv_tgt_desc *tgt = NULL; struct obd_quotactl *oqctl; if (qctl->qc_valid == QC_MDTIDX) { - if (qctl->qc_idx < 0 || count <= qctl->qc_idx) + if (count <= qctl->qc_idx) return -EINVAL; tgt = lmv->tgts[qctl->qc_idx]; @@ -975,18 +997,18 @@ static int lmv_iocontrol(unsigned int cmd, struct obd_export *exp, if (icc->icc_mdtindex >= count) return -ENODEV; - if (!lmv->tgts[icc->icc_mdtindex] || - !lmv->tgts[icc->icc_mdtindex]->ltd_exp || - lmv->tgts[icc->icc_mdtindex]->ltd_active == 0) + tgt = lmv->tgts[icc->icc_mdtindex]; + if (!tgt || !tgt->ltd_exp || !tgt->ltd_active) return -ENODEV; - rc = obd_iocontrol(cmd, lmv->tgts[icc->icc_mdtindex]->ltd_exp, - sizeof(*icc), icc, NULL); + rc = obd_iocontrol(cmd, tgt->ltd_exp, sizeof(*icc), icc, NULL); break; } case LL_IOC_GET_CONNECT_FLAGS: { - if (!lmv->tgts[0]) + tgt = lmv->tgts[0]; + + if (!tgt || !tgt->ltd_exp) return -ENODATA; - rc = obd_iocontrol(cmd, lmv->tgts[0]->ltd_exp, len, karg, uarg); + rc = obd_iocontrol(cmd, tgt->ltd_exp, len, karg, uarg); break; } case OBD_IOC_FID2PATH: { @@ -997,7 +1019,6 @@ static int lmv_iocontrol(unsigned int cmd, struct obd_export *exp, case LL_IOC_HSM_STATE_SET: case LL_IOC_HSM_ACTION: { struct md_op_data *op_data = karg; - struct lmv_tgt_desc *tgt; tgt = lmv_find_target(lmv, &op_data->op_fid1); if (IS_ERR(tgt)) @@ -1011,7 +1032,6 @@ static int lmv_iocontrol(unsigned int cmd, struct obd_export *exp, } case LL_IOC_HSM_PROGRESS: { const struct hsm_progress_kernel *hpk = karg; - struct lmv_tgt_desc *tgt; tgt = lmv_find_target(lmv, &hpk->hpk_fid); if (IS_ERR(tgt)) @@ -1021,7 +1041,6 @@ static int lmv_iocontrol(unsigned int cmd, struct obd_export *exp, } case LL_IOC_HSM_REQUEST: { struct hsm_user_request *hur = karg; - struct lmv_tgt_desc *tgt; unsigned int reqcount = hur->hur_request.hr_itemcount; if (reqcount == 0) @@ -1044,7 +1063,11 @@ static int lmv_iocontrol(unsigned int cmd, struct obd_export *exp, int rc1; struct hsm_user_request *req; - nr = lmv_hsm_req_count(lmv, hur, lmv->tgts[i]); + tgt = lmv->tgts[i]; + if (!tgt || !tgt->ltd_exp) + continue; + + nr = lmv_hsm_req_count(lmv, hur, tgt); if (nr == 0) /* nothing for this MDS */ continue; @@ -1056,10 +1079,10 @@ static int lmv_iocontrol(unsigned int cmd, struct obd_export *exp, if (!req) return -ENOMEM; - lmv_hsm_req_build(lmv, hur, lmv->tgts[i], req); + lmv_hsm_req_build(lmv, hur, tgt, req); - rc1 = obd_iocontrol(cmd, lmv->tgts[i]->ltd_exp, - reqlen, req, uarg); + rc1 = obd_iocontrol(cmd, tgt->ltd_exp, reqlen, + req, uarg); if (rc1 != 0 && rc == 0) rc = rc1; kvfree(req); @@ -1103,27 +1126,27 @@ static int lmv_iocontrol(unsigned int cmd, struct obd_export *exp, struct obd_device *mdc_obd; int err; - if (!lmv->tgts[i] || !lmv->tgts[i]->ltd_exp) + tgt = lmv->tgts[i]; + if (!tgt || !tgt->ltd_exp) continue; /* ll_umount_begin() sets force flag but for lmv, not * mdc. Let's pass it through */ - mdc_obd = class_exp2obd(lmv->tgts[i]->ltd_exp); + mdc_obd = class_exp2obd(tgt->ltd_exp); mdc_obd->obd_force = obddev->obd_force; - err = obd_iocontrol(cmd, lmv->tgts[i]->ltd_exp, len, - karg, uarg); + err = obd_iocontrol(cmd, tgt->ltd_exp, len, karg, uarg); if (err == -ENODATA && cmd == OBD_IOC_POLL_QUOTACHECK) { return err; } else if (err) { - if (lmv->tgts[i]->ltd_active) { + if (tgt->ltd_active) { CERROR("error: iocontrol MDC %s on MDTidx %d cmd %x: err = %d\n", - lmv->tgts[i]->ltd_uuid.uuid, - i, cmd, err); + tgt->ltd_uuid.uuid, i, cmd, err); if (!rc) rc = err; } - } else + } else { set = 1; + } } if (!set && !rc) rc = -EIO; @@ -1269,7 +1292,7 @@ static int lmv_setup(struct obd_device *obd, struct lustre_cfg *lcfg) lmv->lmv_placement = PLACEMENT_CHAR_POLICY; spin_lock_init(&lmv->lmv_lock); - mutex_init(&lmv->init_mutex); + mutex_init(&lmv->lmv_init_mutex); lprocfs_lmv_init_vars(&lvars); @@ -1656,7 +1679,7 @@ lmv_enqueue_remote(struct obd_export *exp, struct ldlm_enqueue_info *einfo, struct lustre_handle *lockh, void *lmm, int lmmsize, __u64 extra_lock_flags) { - struct ptlrpc_request *req = it->d.lustre.it_data; + struct ptlrpc_request *req = it->it_request; struct obd_device *obd = exp->exp_obd; struct lmv_obd *lmv = &obd->u.lmv; struct lustre_handle plock; @@ -1678,11 +1701,11 @@ lmv_enqueue_remote(struct obd_export *exp, struct ldlm_enqueue_info *einfo, /* * We got LOOKUP lock, but we really need attrs. */ - pmode = it->d.lustre.it_lock_mode; + pmode = it->it_lock_mode; LASSERT(pmode != 0); memcpy(&plock, lockh, sizeof(plock)); - it->d.lustre.it_lock_mode = 0; - it->d.lustre.it_data = NULL; + it->it_lock_mode = 0; + it->it_request = NULL; fid1 = body->fid1; ptlrpc_req_finished(req); @@ -2071,7 +2094,7 @@ static void lmv_adjust_dirpages(struct page **pages, int ncfspgs, int nlupgs) dp = (struct lu_dirpage *)((char *)dp + LU_PAGE_SIZE); /* Check if we've reached the end of the CFS_PAGE. */ - if (!((unsigned long)dp & ~CFS_PAGE_MASK)) + if (!((unsigned long)dp & ~PAGE_MASK)) break; /* Save the hash and flags of this lu_dirpage. */ @@ -2268,7 +2291,6 @@ static int lmv_get_info(const struct lu_env *env, struct obd_export *exp, lmv = &obd->u.lmv; if (keylen >= strlen("remote_flag") && !strcmp(key, "remote_flag")) { - struct lmv_tgt_desc *tgt; int i; rc = lmv_check_connect(obd); @@ -2277,7 +2299,8 @@ static int lmv_get_info(const struct lu_env *env, struct obd_export *exp, LASSERT(*vallen == sizeof(__u32)); for (i = 0; i < lmv->desc.ld_tgt_count; i++) { - tgt = lmv->tgts[i]; + struct lmv_tgt_desc *tgt = lmv->tgts[i]; + /* * All tgts should be connected when this gets called. */ @@ -2466,12 +2489,13 @@ static int lmv_cancel_unused(struct obd_export *exp, const struct lu_fid *fid, LASSERT(fid); for (i = 0; i < lmv->desc.ld_tgt_count; i++) { - if (!lmv->tgts[i] || !lmv->tgts[i]->ltd_exp || - lmv->tgts[i]->ltd_active == 0) + struct lmv_tgt_desc *tgt = lmv->tgts[i]; + + if (!tgt || !tgt->ltd_exp || !tgt->ltd_active) continue; - err = md_cancel_unused(lmv->tgts[i]->ltd_exp, fid, - policy, mode, flags, opaque); + err = md_cancel_unused(tgt->ltd_exp, fid, policy, mode, flags, + opaque); if (!rc) rc = err; } @@ -2482,9 +2506,13 @@ static int lmv_set_lock_data(struct obd_export *exp, __u64 *lockh, void *data, __u64 *bits) { struct lmv_obd *lmv = &exp->exp_obd->u.lmv; + struct lmv_tgt_desc *tgt = lmv->tgts[0]; int rc; - rc = md_set_lock_data(lmv->tgts[0]->ltd_exp, lockh, data, bits); + if (!tgt || !tgt->ltd_exp) + return -EINVAL; + + rc = md_set_lock_data(tgt->ltd_exp, lockh, data, bits); return rc; } @@ -2509,12 +2537,13 @@ static enum ldlm_mode lmv_lock_match(struct obd_export *exp, __u64 flags, * one fid was created in. */ for (i = 0; i < lmv->desc.ld_tgt_count; i++) { - if (!lmv->tgts[i] || !lmv->tgts[i]->ltd_exp || - lmv->tgts[i]->ltd_active == 0) + struct lmv_tgt_desc *tgt = lmv->tgts[i]; + + if (!tgt || !tgt->ltd_exp || !tgt->ltd_active) continue; - rc = md_lock_match(lmv->tgts[i]->ltd_exp, flags, fid, - type, policy, mode, lockh); + rc = md_lock_match(tgt->ltd_exp, flags, fid, type, policy, mode, + lockh); if (rc) return rc; } @@ -2529,18 +2558,24 @@ static int lmv_get_lustre_md(struct obd_export *exp, struct lustre_md *md) { struct lmv_obd *lmv = &exp->exp_obd->u.lmv; + struct lmv_tgt_desc *tgt = lmv->tgts[0]; - return md_get_lustre_md(lmv->tgts[0]->ltd_exp, req, dt_exp, md_exp, md); + if (!tgt || !tgt->ltd_exp) + return -EINVAL; + return md_get_lustre_md(tgt->ltd_exp, req, dt_exp, md_exp, md); } static int lmv_free_lustre_md(struct obd_export *exp, struct lustre_md *md) { struct obd_device *obd = exp->exp_obd; struct lmv_obd *lmv = &obd->u.lmv; + struct lmv_tgt_desc *tgt = lmv->tgts[0]; if (md->mea) obd_free_memmd(exp, (void *)&md->mea); - return md_free_lustre_md(lmv->tgts[0]->ltd_exp, md); + if (!tgt || !tgt->ltd_exp) + return -EINVAL; + return md_free_lustre_md(tgt->ltd_exp, md); } static int lmv_set_open_replay_data(struct obd_export *exp, @@ -2572,27 +2607,6 @@ static int lmv_clear_open_replay_data(struct obd_export *exp, return md_clear_open_replay_data(tgt->ltd_exp, och); } -static int lmv_get_remote_perm(struct obd_export *exp, - const struct lu_fid *fid, - __u32 suppgid, struct ptlrpc_request **request) -{ - struct obd_device *obd = exp->exp_obd; - struct lmv_obd *lmv = &obd->u.lmv; - struct lmv_tgt_desc *tgt; - int rc; - - rc = lmv_check_connect(obd); - if (rc) - return rc; - - tgt = lmv_find_target(lmv, fid); - if (IS_ERR(tgt)) - return PTR_ERR(tgt); - - rc = md_get_remote_perm(tgt->ltd_exp, fid, suppgid, request); - return rc; -} - static int lmv_intent_getattr_async(struct obd_export *exp, struct md_enqueue_info *minfo, struct ldlm_enqueue_info *einfo) @@ -2647,9 +2661,10 @@ static int lmv_quotactl(struct obd_device *unused, struct obd_export *exp, struct lmv_obd *lmv = &obd->u.lmv; struct lmv_tgt_desc *tgt = lmv->tgts[0]; int rc = 0, i; - __u64 curspace, curinodes; + __u64 curspace = 0, curinodes = 0; - if (!lmv->desc.ld_tgt_count || !tgt->ltd_active) { + if (!tgt || !tgt->ltd_exp || !tgt->ltd_active || + !lmv->desc.ld_tgt_count) { CERROR("master lmv inactive\n"); return -EIO; } @@ -2659,18 +2674,13 @@ static int lmv_quotactl(struct obd_device *unused, struct obd_export *exp, return rc; } - curspace = curinodes = 0; for (i = 0; i < lmv->desc.ld_tgt_count; i++) { int err; tgt = lmv->tgts[i]; - if (!tgt || !tgt->ltd_exp || tgt->ltd_active == 0) - continue; - if (!tgt->ltd_active) { - CDEBUG(D_HA, "mdt %d is inactive.\n", i); + if (!tgt || !tgt->ltd_exp || !tgt->ltd_active) continue; - } err = obd_quotactl(tgt->ltd_exp, oqctl); if (err) { @@ -2760,7 +2770,6 @@ static struct md_ops lmv_md_ops = { .free_lustre_md = lmv_free_lustre_md, .set_open_replay_data = lmv_set_open_replay_data, .clear_open_replay_data = lmv_clear_open_replay_data, - .get_remote_perm = lmv_get_remote_perm, .intent_getattr_async = lmv_intent_getattr_async, .revalidate_lock = lmv_revalidate_lock }; diff --git a/drivers/staging/lustre/lustre/lmv/lproc_lmv.c b/drivers/staging/lustre/lustre/lmv/lproc_lmv.c index b39e364a29ab..c29c361eb0cc 100644 --- a/drivers/staging/lustre/lustre/lmv/lproc_lmv.c +++ b/drivers/staging/lustre/lustre/lmv/lproc_lmv.c @@ -15,11 +15,7 @@ * * You should have received a copy of the GNU General Public License * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. + * http://www.gnu.org/licenses/gpl-2.0.html * * GPL HEADER END */ diff --git a/drivers/staging/lustre/lustre/lov/lov_cl_internal.h b/drivers/staging/lustre/lustre/lov/lov_cl_internal.h index 7dd3162b51e9..9740568d9521 100644 --- a/drivers/staging/lustre/lustre/lov/lov_cl_internal.h +++ b/drivers/staging/lustre/lustre/lov/lov_cl_internal.h @@ -15,11 +15,7 @@ * * You should have received a copy of the GNU General Public License * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. + * http://www.gnu.org/licenses/gpl-2.0.html * * GPL HEADER END */ @@ -73,19 +69,6 @@ * - top-page keeps a reference to its sub-page, and destroys it when it * is destroyed. * - * - sub-lock keep a reference to its top-locks. Top-lock keeps a - * reference (and a hold, see cl_lock_hold()) on its sub-locks when it - * actively using them (that is, in cl_lock_state::CLS_QUEUING, - * cl_lock_state::CLS_ENQUEUED, cl_lock_state::CLS_HELD states). When - * moving into cl_lock_state::CLS_CACHED state, top-lock releases a - * hold. From this moment top-lock has only a 'weak' reference to its - * sub-locks. This reference is protected by top-lock - * cl_lock::cll_guard, and will be automatically cleared by the sub-lock - * when the latter is destroyed. When a sub-lock is canceled, a - * reference to it is removed from the top-lock array, and top-lock is - * moved into CLS_NEW state. It is guaranteed that all sub-locks exist - * while their top-lock is in CLS_HELD or CLS_CACHED states. - * * - IO's are not reference counted. * * To implement a connection between top and sub entities, lov layer is split @@ -281,24 +264,17 @@ struct lov_object { }; /** - * Flags that top-lock can set on each of its sub-locks. - */ -enum lov_sub_flags { - /** Top-lock acquired a hold (cl_lock_hold()) on a sub-lock. */ - LSF_HELD = 1 << 0 -}; - -/** * State lov_lock keeps for each sub-lock. */ struct lov_lock_sub { /** sub-lock itself */ - struct lovsub_lock *sub_lock; - /** An array of per-sub-lock flags, taken from enum lov_sub_flags */ - unsigned sub_flags; + struct cl_lock sub_lock; + /** Set if the sublock has ever been enqueued, meaning it may + * hold resources of underlying layers + */ + unsigned int sub_is_enqueued:1, + sub_initialized:1; int sub_stripe; - struct cl_lock_descr sub_descr; - struct cl_lock_descr sub_got; }; /** @@ -308,59 +284,8 @@ struct lov_lock { struct cl_lock_slice lls_cl; /** Number of sub-locks in this lock */ int lls_nr; - /** - * Number of existing sub-locks. - */ - unsigned lls_nr_filled; - /** - * Set when sub-lock was canceled, while top-lock was being - * used, or unused. - */ - unsigned int lls_cancel_race:1; - /** - * An array of sub-locks - * - * There are two issues with managing sub-locks: - * - * - sub-locks are concurrently canceled, and - * - * - sub-locks are shared with other top-locks. - * - * To manage cancellation, top-lock acquires a hold on a sublock - * (lov_sublock_adopt()) when the latter is inserted into - * lov_lock::lls_sub[]. This hold is released (lov_sublock_release()) - * when top-lock is going into CLS_CACHED state or destroyed. Hold - * prevents sub-lock from cancellation. - * - * Sub-lock sharing means, among other things, that top-lock that is - * in the process of creation (i.e., not yet inserted into lock list) - * is already accessible to other threads once at least one of its - * sub-locks is created, see lov_lock_sub_init(). - * - * Sub-lock can be in one of the following states: - * - * - doesn't exist, lov_lock::lls_sub[]::sub_lock == NULL. Such - * sub-lock was either never created (top-lock is in CLS_NEW - * state), or it was created, then canceled, then destroyed - * (lov_lock_unlink() cleared sub-lock pointer in the top-lock). - * - * - sub-lock exists and is on - * hold. (lov_lock::lls_sub[]::sub_flags & LSF_HELD). This is a - * normal state of a sub-lock in CLS_HELD and CLS_CACHED states - * of a top-lock. - * - * - sub-lock exists, but is not held by the top-lock. This - * happens after top-lock released a hold on sub-locks before - * going into cache (lov_lock_unuse()). - * - * \todo To support wide-striping, array has to be replaced with a set - * of queues to avoid scanning. - */ - struct lov_lock_sub *lls_sub; - /** - * Original description with which lock was enqueued. - */ - struct cl_lock_descr lls_orig; + /** sublock array */ + struct lov_lock_sub lls_sub[0]; }; struct lov_page { @@ -444,8 +369,9 @@ struct lov_thread_info { struct cl_lock_descr lti_ldescr; struct ost_lvb lti_lvb; struct cl_2queue lti_cl2q; - struct cl_lock_closure lti_closure; + struct cl_page_list lti_plist; wait_queue_t lti_waiter; + struct cl_attr lti_attr; }; /** @@ -611,14 +537,13 @@ int lov_sublock_modify(const struct lu_env *env, struct lov_lock *lov, const struct cl_lock_descr *d, int idx); int lov_page_init(const struct lu_env *env, struct cl_object *ob, - struct cl_page *page, struct page *vmpage); + struct cl_page *page, pgoff_t index); int lovsub_page_init(const struct lu_env *env, struct cl_object *ob, - struct cl_page *page, struct page *vmpage); - + struct cl_page *page, pgoff_t index); int lov_page_init_empty(const struct lu_env *env, struct cl_object *obj, - struct cl_page *page, struct page *vmpage); + struct cl_page *page, pgoff_t index); int lov_page_init_raid0(const struct lu_env *env, struct cl_object *obj, - struct cl_page *page, struct page *vmpage); + struct cl_page *page, pgoff_t index); struct lu_object *lov_object_alloc(const struct lu_env *env, const struct lu_object_header *hdr, struct lu_device *dev); @@ -631,6 +556,7 @@ struct lov_lock_link *lov_lock_link_find(const struct lu_env *env, struct lovsub_lock *sub); struct lov_io_sub *lov_page_subio(const struct lu_env *env, struct lov_io *lio, const struct cl_page_slice *slice); +int lov_page_stripe(const struct cl_page *page); #define lov_foreach_target(lov, var) \ for (var = 0; var < lov_targets_nr(lov); ++var) @@ -789,11 +715,6 @@ static inline struct lovsub_req *cl2lovsub_req(const struct cl_req_slice *slice) return container_of0(slice, struct lovsub_req, lsrq_cl); } -static inline struct cl_page *lov_sub_page(const struct cl_page_slice *slice) -{ - return slice->cpl_page->cp_child; -} - static inline struct lov_io *cl2lov_io(const struct lu_env *env, const struct cl_io_slice *ios) { diff --git a/drivers/staging/lustre/lustre/lov/lov_dev.c b/drivers/staging/lustre/lustre/lov/lov_dev.c index 532ef87dfb44..b1f260d43bc7 100644 --- a/drivers/staging/lustre/lustre/lov/lov_dev.c +++ b/drivers/staging/lustre/lustre/lov/lov_dev.c @@ -15,11 +15,7 @@ * * You should have received a copy of the GNU General Public License * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. + * http://www.gnu.org/licenses/gpl-2.0.html * * GPL HEADER END */ @@ -143,9 +139,7 @@ static void *lov_key_init(const struct lu_context *ctx, struct lov_thread_info *info; info = kmem_cache_zalloc(lov_thread_kmem, GFP_NOFS); - if (info) - INIT_LIST_HEAD(&info->lti_closure.clc_list); - else + if (!info) info = ERR_PTR(-ENOMEM); return info; } @@ -155,7 +149,6 @@ static void lov_key_fini(const struct lu_context *ctx, { struct lov_thread_info *info = data; - LINVRNT(list_empty(&info->lti_closure.clc_list)); kmem_cache_free(lov_thread_kmem, info); } @@ -265,8 +258,9 @@ static int lov_req_init(const struct lu_env *env, struct cl_device *dev, if (lr) { cl_req_slice_add(req, &lr->lr_cl, dev, &lov_req_ops); result = 0; - } else + } else { result = -ENOMEM; + } return result; } @@ -335,14 +329,15 @@ static struct lov_device_emerg **lov_emerg_alloc(int nr) cl_page_list_init(&em->emrg_page_list); em->emrg_env = cl_env_alloc(&em->emrg_refcheck, LCT_REMEMBER | LCT_NOREF); - if (!IS_ERR(em->emrg_env)) + if (!IS_ERR(em->emrg_env)) { em->emrg_env->le_ctx.lc_cookie = 0x2; - else { + } else { result = PTR_ERR(em->emrg_env); em->emrg_env = NULL; } - } else + } else { result = -ENOMEM; + } } if (result != 0) { lov_emerg_free(emerg, nr); diff --git a/drivers/staging/lustre/lustre/lov/lov_ea.c b/drivers/staging/lustre/lustre/lov/lov_ea.c index b6529401c713..5053dead17bb 100644 --- a/drivers/staging/lustre/lustre/lov/lov_ea.c +++ b/drivers/staging/lustre/lustre/lov/lov_ea.c @@ -15,11 +15,7 @@ * * You should have received a copy of the GNU General Public License * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. + * http://www.gnu.org/licenses/gpl-2.0.html * * GPL HEADER END */ @@ -48,11 +44,6 @@ #include "lov_internal.h" -struct lovea_unpack_args { - struct lov_stripe_md *lsm; - int cursor; -}; - static int lsm_lmm_verify_common(struct lov_mds_md *lmm, int lmm_bytes, __u16 stripe_count) { diff --git a/drivers/staging/lustre/lustre/lov/lov_internal.h b/drivers/staging/lustre/lustre/lov/lov_internal.h index 590f9326af37..12bd511e8988 100644 --- a/drivers/staging/lustre/lustre/lov/lov_internal.h +++ b/drivers/staging/lustre/lustre/lov/lov_internal.h @@ -15,11 +15,7 @@ * * You should have received a copy of the GNU General Public License * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. + * http://www.gnu.org/licenses/gpl-2.0.html * * GPL HEADER END */ @@ -72,6 +68,21 @@ }) #endif +#define pool_tgt_size(p) ((p)->pool_obds.op_size) +#define pool_tgt_count(p) ((p)->pool_obds.op_count) +#define pool_tgt_array(p) ((p)->pool_obds.op_array) +#define pool_tgt_rw_sem(p) ((p)->pool_obds.op_rw_sem) + +struct pool_desc { + char pool_name[LOV_MAXPOOLNAME + 1]; + struct ost_pool pool_obds; + atomic_t pool_refcount; + struct hlist_node pool_hash; /* access by poolname */ + struct list_head pool_list; /* serial access */ + struct dentry *pool_debugfs_entry; /* file in debugfs */ + struct obd_device *pool_lobd; /* owner */ +}; + struct lov_request { struct obd_info rq_oi; struct lov_request_set *rq_rqset; @@ -88,7 +99,6 @@ struct lov_request { }; struct lov_request_set { - struct ldlm_enqueue_info *set_ei; struct obd_info *set_oi; atomic_t set_refcount; struct obd_export *set_exp; @@ -102,10 +112,8 @@ struct lov_request_set { atomic_t set_finish_checked; struct llog_cookie *set_cookies; int set_cookie_sent; - struct obd_trans_info *set_oti; struct list_head set_list; wait_queue_head_t set_waitq; - spinlock_t set_lock; }; extern struct kmem_cache *lov_oinfo_slab; @@ -114,12 +122,6 @@ extern struct lu_kmem_descr lov_caches[]; void lov_finish_set(struct lov_request_set *set); -static inline void lov_get_reqset(struct lov_request_set *set) -{ - LASSERT(atomic_read(&set->set_refcount) > 0); - atomic_inc(&set->set_refcount); -} - static inline void lov_put_reqset(struct lov_request_set *set) { if (atomic_dec_and_test(&set->set_refcount)) @@ -146,10 +148,8 @@ int lov_stripe_intersects(struct lov_stripe_md *lsm, int stripeno, u64 start, u64 end, u64 *obd_start, u64 *obd_end); int lov_stripe_number(struct lov_stripe_md *lsm, u64 lov_off); - -/* lov_qos.c */ -#define LOV_USES_ASSIGNED_STRIPE 0 -#define LOV_USES_DEFAULT_STRIPE 1 +pgoff_t lov_stripe_pgoff(struct lov_stripe_md *lsm, pgoff_t stripe_index, + int stripe); /* lov_request.c */ int lov_update_common_set(struct lov_request_set *set, @@ -176,6 +176,8 @@ int lov_fini_statfs_set(struct lov_request_set *set); int lov_statfs_interpret(struct ptlrpc_request_set *rqset, void *data, int rc); /* lov_obd.c */ +void lov_stripe_lock(struct lov_stripe_md *md); +void lov_stripe_unlock(struct lov_stripe_md *md); void lov_fix_desc(struct lov_desc *desc); void lov_fix_desc_stripe_size(__u64 *val); void lov_fix_desc_stripe_count(__u32 *val); @@ -231,8 +233,6 @@ int lov_pool_new(struct obd_device *obd, char *poolname); int lov_pool_del(struct obd_device *obd, char *poolname); int lov_pool_add(struct obd_device *obd, char *poolname, char *ostname); int lov_pool_remove(struct obd_device *obd, char *poolname, char *ostname); -struct pool_desc *lov_find_pool(struct lov_obd *lov, char *poolname); -int lov_check_index_in_pool(__u32 idx, struct pool_desc *pool); void lov_pool_putref(struct pool_desc *pool); static inline struct lov_stripe_md *lsm_addref(struct lov_stripe_md *lsm) diff --git a/drivers/staging/lustre/lustre/lov/lov_io.c b/drivers/staging/lustre/lustre/lov/lov_io.c index 4296aacd84fc..84032a510254 100644 --- a/drivers/staging/lustre/lustre/lov/lov_io.c +++ b/drivers/staging/lustre/lustre/lov/lov_io.c @@ -15,11 +15,7 @@ * * You should have received a copy of the GNU General Public License * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. + * http://www.gnu.org/licenses/gpl-2.0.html * * GPL HEADER END */ @@ -225,8 +221,9 @@ struct lov_io_sub *lov_sub_get(const struct lu_env *env, if (!sub->sub_io_initialized) { sub->sub_stripe = stripe; rc = lov_io_sub_init(env, lio, sub); - } else + } else { rc = 0; + } if (rc == 0) lov_sub_enter(sub); else @@ -245,13 +242,15 @@ void lov_sub_put(struct lov_io_sub *sub) * */ -static int lov_page_stripe(const struct cl_page *page) +int lov_page_stripe(const struct cl_page *page) { struct lovsub_object *subobj; + const struct cl_page_slice *slice; - subobj = lu2lovsub( - lu_object_locate(page->cp_child->cp_obj->co_lu.lo_header, - &lovsub_device_type)); + slice = cl_page_at(page, &lovsub_device_type); + LASSERT(slice->cpl_obj); + + subobj = cl2lovsub(slice->cpl_obj); return subobj->lso_index; } @@ -274,10 +273,11 @@ struct lov_io_sub *lov_page_subio(const struct lu_env *env, struct lov_io *lio, static int lov_io_subio_init(const struct lu_env *env, struct lov_io *lio, struct cl_io *io) { - struct lov_stripe_md *lsm = lio->lis_object->lo_lsm; + struct lov_stripe_md *lsm; int result; LASSERT(lio->lis_object); + lsm = lio->lis_object->lo_lsm; /* * Need to be optimized, we can't afford to allocate a piece of memory @@ -292,8 +292,9 @@ static int lov_io_subio_init(const struct lu_env *env, struct lov_io *lio, lio->lis_single_subio_index = -1; lio->lis_active_subios = 0; result = 0; - } else + } else { result = -ENOMEM; + } return result; } @@ -411,8 +412,9 @@ static int lov_io_iter_init(const struct lu_env *env, lov_sub_put(sub); CDEBUG(D_VFSTRACE, "shrink: %d [%llu, %llu)\n", stripe, start, end); - } else + } else { rc = PTR_ERR(sub); + } if (!rc) list_add_tail(&sub->sub_linkage, &lio->lis_active); @@ -436,7 +438,6 @@ static int lov_io_rw_iter_init(const struct lu_env *env, /* fast path for common case. */ if (lio->lis_nr_subios != 1 && !cl_io_is_append(io)) { - lov_do_div64(start, ssize); next = (start + 1) * ssize; if (next <= start * ssize) @@ -543,13 +544,6 @@ static void lov_io_unlock(const struct lu_env *env, LASSERT(rc == 0); } -static struct cl_page_list *lov_io_submit_qin(struct lov_device *ld, - struct cl_page_list *qin, - int idx, int alloc) -{ - return alloc ? &qin[idx] : &ld->ld_emrg[idx]->emrg_page_list; -} - /** * lov implementation of cl_operations::cio_submit() method. It takes a list * of pages in \a queue, splits it into per-stripe sub-lists, invokes @@ -569,25 +563,17 @@ static int lov_io_submit(const struct lu_env *env, const struct cl_io_slice *ios, enum cl_req_type crt, struct cl_2queue *queue) { - struct lov_io *lio = cl2lov_io(env, ios); - struct lov_object *obj = lio->lis_object; - struct lov_device *ld = lu2lov_dev(lov2cl(obj)->co_lu.lo_dev); - struct cl_page_list *qin = &queue->c2_qin; - struct cl_2queue *cl2q = &lov_env_info(env)->lti_cl2q; - struct cl_page_list *stripes_qin = NULL; + struct cl_page_list *qin = &queue->c2_qin; + struct lov_io *lio = cl2lov_io(env, ios); + struct lov_io_sub *sub; + struct cl_page_list *plist = &lov_env_info(env)->lti_plist; struct cl_page *page; - struct cl_page *tmp; int stripe; -#define QIN(stripe) lov_io_submit_qin(ld, stripes_qin, stripe, alloc) - int rc = 0; - int alloc = - !(current->flags & PF_MEMALLOC); if (lio->lis_active_subios == 1) { int idx = lio->lis_single_subio_index; - struct lov_io_sub *sub; LASSERT(idx < lio->lis_nr_subios); sub = lov_sub_get(env, lio, idx); @@ -600,119 +586,120 @@ static int lov_io_submit(const struct lu_env *env, } LASSERT(lio->lis_subs); - if (alloc) { - stripes_qin = - libcfs_kvzalloc(sizeof(*stripes_qin) * - lio->lis_nr_subios, - GFP_NOFS); - if (!stripes_qin) - return -ENOMEM; - - for (stripe = 0; stripe < lio->lis_nr_subios; stripe++) - cl_page_list_init(&stripes_qin[stripe]); - } else { - /* - * If we get here, it means pageout & swap doesn't help. - * In order to not make things worse, even don't try to - * allocate the memory with __GFP_NOWARN. -jay - */ - mutex_lock(&ld->ld_mutex); - lio->lis_mem_frozen = 1; - } - cl_2queue_init(cl2q); - cl_page_list_for_each_safe(page, tmp, qin) { - stripe = lov_page_stripe(page); - cl_page_list_move(QIN(stripe), qin, page); - } + cl_page_list_init(plist); + while (qin->pl_nr > 0) { + struct cl_2queue *cl2q = &lov_env_info(env)->lti_cl2q; - for (stripe = 0; stripe < lio->lis_nr_subios; stripe++) { - struct lov_io_sub *sub; - struct cl_page_list *sub_qin = QIN(stripe); + cl_2queue_init(cl2q); - if (list_empty(&sub_qin->pl_pages)) - continue; + page = cl_page_list_first(qin); + cl_page_list_move(&cl2q->c2_qin, qin, page); + + stripe = lov_page_stripe(page); + while (qin->pl_nr > 0) { + page = cl_page_list_first(qin); + if (stripe != lov_page_stripe(page)) + break; + + cl_page_list_move(&cl2q->c2_qin, qin, page); + } - cl_page_list_splice(sub_qin, &cl2q->c2_qin); sub = lov_sub_get(env, lio, stripe); if (!IS_ERR(sub)) { rc = cl_io_submit_rw(sub->sub_env, sub->sub_io, crt, cl2q); lov_sub_put(sub); - } else + } else { rc = PTR_ERR(sub); - cl_page_list_splice(&cl2q->c2_qin, &queue->c2_qin); + } + + cl_page_list_splice(&cl2q->c2_qin, plist); cl_page_list_splice(&cl2q->c2_qout, &queue->c2_qout); + cl_2queue_fini(env, cl2q); + if (rc != 0) break; } - for (stripe = 0; stripe < lio->lis_nr_subios; stripe++) { - struct cl_page_list *sub_qin = QIN(stripe); + cl_page_list_splice(plist, qin); + cl_page_list_fini(env, plist); - if (list_empty(&sub_qin->pl_pages)) - continue; + return rc; +} + +static int lov_io_commit_async(const struct lu_env *env, + const struct cl_io_slice *ios, + struct cl_page_list *queue, int from, int to, + cl_commit_cbt cb) +{ + struct cl_page_list *plist = &lov_env_info(env)->lti_plist; + struct lov_io *lio = cl2lov_io(env, ios); + struct lov_io_sub *sub; + struct cl_page *page; + int rc = 0; - cl_page_list_splice(sub_qin, qin); + if (lio->lis_active_subios == 1) { + int idx = lio->lis_single_subio_index; + + LASSERT(idx < lio->lis_nr_subios); + sub = lov_sub_get(env, lio, idx); + LASSERT(!IS_ERR(sub)); + LASSERT(sub->sub_io == &lio->lis_single_subio); + rc = cl_io_commit_async(sub->sub_env, sub->sub_io, queue, + from, to, cb); + lov_sub_put(sub); + return rc; } - if (alloc) { - kvfree(stripes_qin); - } else { - int i; + LASSERT(lio->lis_subs); - for (i = 0; i < lio->lis_nr_subios; i++) { - struct cl_io *cio = lio->lis_subs[i].sub_io; + cl_page_list_init(plist); + while (queue->pl_nr > 0) { + int stripe_to = to; + int stripe; - if (cio && cio == &ld->ld_emrg[i]->emrg_subio) - lov_io_sub_fini(env, lio, &lio->lis_subs[i]); + LASSERT(plist->pl_nr == 0); + page = cl_page_list_first(queue); + cl_page_list_move(plist, queue, page); + + stripe = lov_page_stripe(page); + while (queue->pl_nr > 0) { + page = cl_page_list_first(queue); + if (stripe != lov_page_stripe(page)) + break; + + cl_page_list_move(plist, queue, page); } - lio->lis_mem_frozen = 0; - mutex_unlock(&ld->ld_mutex); - } - return rc; -#undef QIN -} + if (queue->pl_nr > 0) /* still has more pages */ + stripe_to = PAGE_SIZE; -static int lov_io_prepare_write(const struct lu_env *env, - const struct cl_io_slice *ios, - const struct cl_page_slice *slice, - unsigned from, unsigned to) -{ - struct lov_io *lio = cl2lov_io(env, ios); - struct cl_page *sub_page = lov_sub_page(slice); - struct lov_io_sub *sub; - int result; + sub = lov_sub_get(env, lio, stripe); + if (!IS_ERR(sub)) { + rc = cl_io_commit_async(sub->sub_env, sub->sub_io, + plist, from, stripe_to, cb); + lov_sub_put(sub); + } else { + rc = PTR_ERR(sub); + break; + } - sub = lov_page_subio(env, lio, slice); - if (!IS_ERR(sub)) { - result = cl_io_prepare_write(sub->sub_env, sub->sub_io, - sub_page, from, to); - lov_sub_put(sub); - } else - result = PTR_ERR(sub); - return result; -} + if (plist->pl_nr > 0) /* short write */ + break; -static int lov_io_commit_write(const struct lu_env *env, - const struct cl_io_slice *ios, - const struct cl_page_slice *slice, - unsigned from, unsigned to) -{ - struct lov_io *lio = cl2lov_io(env, ios); - struct cl_page *sub_page = lov_sub_page(slice); - struct lov_io_sub *sub; - int result; + from = 0; + } - sub = lov_page_subio(env, lio, slice); - if (!IS_ERR(sub)) { - result = cl_io_commit_write(sub->sub_env, sub->sub_io, - sub_page, from, to); - lov_sub_put(sub); - } else - result = PTR_ERR(sub); - return result; + /* for error case, add the page back into the qin list */ + LASSERT(ergo(rc == 0, plist->pl_nr == 0)); + while (plist->pl_nr > 0) { + /* error occurred, add the uncommitted pages back into queue */ + page = cl_page_list_last(plist); + cl_page_list_move_head(queue, plist, page); + } + + return rc; } static int lov_io_fault_start(const struct lu_env *env, @@ -803,16 +790,8 @@ static const struct cl_io_operations lov_io_ops = { .cio_fini = lov_io_fini } }, - .req_op = { - [CRT_READ] = { - .cio_submit = lov_io_submit - }, - [CRT_WRITE] = { - .cio_submit = lov_io_submit - } - }, - .cio_prepare_write = lov_io_prepare_write, - .cio_commit_write = lov_io_commit_write + .cio_submit = lov_io_submit, + .cio_commit_async = lov_io_commit_async, }; /***************************************************************************** @@ -880,15 +859,8 @@ static const struct cl_io_operations lov_empty_io_ops = { .cio_fini = lov_empty_io_fini } }, - .req_op = { - [CRT_READ] = { - .cio_submit = LOV_EMPTY_IMPOSSIBLE - }, - [CRT_WRITE] = { - .cio_submit = LOV_EMPTY_IMPOSSIBLE - } - }, - .cio_commit_write = LOV_EMPTY_IMPOSSIBLE + .cio_submit = LOV_EMPTY_IMPOSSIBLE, + .cio_commit_async = LOV_EMPTY_IMPOSSIBLE }; int lov_io_init_raid0(const struct lu_env *env, struct cl_object *obj, @@ -943,7 +915,7 @@ int lov_io_init_empty(const struct lu_env *env, struct cl_object *obj, } io->ci_result = result < 0 ? result : 0; - return result != 0; + return result; } int lov_io_init_released(const struct lu_env *env, struct cl_object *obj, @@ -986,7 +958,7 @@ int lov_io_init_released(const struct lu_env *env, struct cl_object *obj, } io->ci_result = result < 0 ? result : 0; - return result != 0; + return result; } /** @} lov */ diff --git a/drivers/staging/lustre/lustre/lov/lov_lock.c b/drivers/staging/lustre/lustre/lov/lov_lock.c index ae854bc25dbe..f3a0583f28f5 100644 --- a/drivers/staging/lustre/lustre/lov/lov_lock.c +++ b/drivers/staging/lustre/lustre/lov/lov_lock.c @@ -15,11 +15,7 @@ * * You should have received a copy of the GNU General Public License * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. + * http://www.gnu.org/licenses/gpl-2.0.html * * GPL HEADER END */ @@ -46,11 +42,6 @@ * @{ */ -static struct cl_lock_closure *lov_closure_get(const struct lu_env *env, - struct cl_lock *parent); - -static int lov_lock_unuse(const struct lu_env *env, - const struct cl_lock_slice *slice); /***************************************************************************** * * Lov lock operations. @@ -58,7 +49,7 @@ static int lov_lock_unuse(const struct lu_env *env, */ static struct lov_sublock_env *lov_sublock_env_get(const struct lu_env *env, - struct cl_lock *parent, + const struct cl_lock *parent, struct lov_lock_sub *lls) { struct lov_sublock_env *subenv; @@ -100,185 +91,26 @@ static void lov_sublock_env_put(struct lov_sublock_env *subenv) lov_sub_put(subenv->lse_sub); } -static void lov_sublock_adopt(const struct lu_env *env, struct lov_lock *lck, - struct cl_lock *sublock, int idx, - struct lov_lock_link *link) +static int lov_sublock_init(const struct lu_env *env, + const struct cl_lock *parent, + struct lov_lock_sub *lls) { - struct lovsub_lock *lsl; - struct cl_lock *parent = lck->lls_cl.cls_lock; - int rc; - - LASSERT(cl_lock_is_mutexed(parent)); - LASSERT(cl_lock_is_mutexed(sublock)); - - lsl = cl2sub_lock(sublock); - /* - * check that sub-lock doesn't have lock link to this top-lock. - */ - LASSERT(!lov_lock_link_find(env, lck, lsl)); - LASSERT(idx < lck->lls_nr); - - lck->lls_sub[idx].sub_lock = lsl; - lck->lls_nr_filled++; - LASSERT(lck->lls_nr_filled <= lck->lls_nr); - list_add_tail(&link->lll_list, &lsl->lss_parents); - link->lll_idx = idx; - link->lll_super = lck; - cl_lock_get(parent); - lu_ref_add(&parent->cll_reference, "lov-child", sublock); - lck->lls_sub[idx].sub_flags |= LSF_HELD; - cl_lock_user_add(env, sublock); - - rc = lov_sublock_modify(env, lck, lsl, &sublock->cll_descr, idx); - LASSERT(rc == 0); /* there is no way this can fail, currently */ -} - -static struct cl_lock *lov_sublock_alloc(const struct lu_env *env, - const struct cl_io *io, - struct lov_lock *lck, - int idx, struct lov_lock_link **out) -{ - struct cl_lock *sublock; - struct cl_lock *parent; - struct lov_lock_link *link; - - LASSERT(idx < lck->lls_nr); - - link = kmem_cache_zalloc(lov_lock_link_kmem, GFP_NOFS); - if (link) { - struct lov_sublock_env *subenv; - struct lov_lock_sub *lls; - struct cl_lock_descr *descr; - - parent = lck->lls_cl.cls_lock; - lls = &lck->lls_sub[idx]; - descr = &lls->sub_got; - - subenv = lov_sublock_env_get(env, parent, lls); - if (!IS_ERR(subenv)) { - /* CAVEAT: Don't try to add a field in lov_lock_sub - * to remember the subio. This is because lock is able - * to be cached, but this is not true for IO. This - * further means a sublock might be referenced in - * different io context. -jay - */ - - sublock = cl_lock_hold(subenv->lse_env, subenv->lse_io, - descr, "lov-parent", parent); - lov_sublock_env_put(subenv); - } else { - /* error occurs. */ - sublock = (void *)subenv; - } - - if (!IS_ERR(sublock)) - *out = link; - else - kmem_cache_free(lov_lock_link_kmem, link); - } else - sublock = ERR_PTR(-ENOMEM); - return sublock; -} - -static void lov_sublock_unlock(const struct lu_env *env, - struct lovsub_lock *lsl, - struct cl_lock_closure *closure, - struct lov_sublock_env *subenv) -{ - lov_sublock_env_put(subenv); - lsl->lss_active = NULL; - cl_lock_disclosure(env, closure); -} - -static int lov_sublock_lock(const struct lu_env *env, - struct lov_lock *lck, - struct lov_lock_sub *lls, - struct cl_lock_closure *closure, - struct lov_sublock_env **lsep) -{ - struct lovsub_lock *sublock; - struct cl_lock *child; - int result = 0; - - LASSERT(list_empty(&closure->clc_list)); - - sublock = lls->sub_lock; - child = sublock->lss_cl.cls_lock; - result = cl_lock_closure_build(env, child, closure); - if (result == 0) { - struct cl_lock *parent = closure->clc_origin; - - LASSERT(cl_lock_is_mutexed(child)); - sublock->lss_active = parent; - - if (unlikely((child->cll_state == CLS_FREEING) || - (child->cll_flags & CLF_CANCELLED))) { - struct lov_lock_link *link; - /* - * we could race with lock deletion which temporarily - * put the lock in freeing state, bug 19080. - */ - LASSERT(!(lls->sub_flags & LSF_HELD)); - - link = lov_lock_link_find(env, lck, sublock); - LASSERT(link); - lov_lock_unlink(env, link, sublock); - lov_sublock_unlock(env, sublock, closure, NULL); - lck->lls_cancel_race = 1; - result = CLO_REPEAT; - } else if (lsep) { - struct lov_sublock_env *subenv; + struct lov_sublock_env *subenv; + int result; - subenv = lov_sublock_env_get(env, parent, lls); - if (IS_ERR(subenv)) { - lov_sublock_unlock(env, sublock, - closure, NULL); - result = PTR_ERR(subenv); - } else { - *lsep = subenv; - } - } + subenv = lov_sublock_env_get(env, parent, lls); + if (!IS_ERR(subenv)) { + result = cl_lock_init(subenv->lse_env, &lls->sub_lock, + subenv->lse_io); + lov_sublock_env_put(subenv); + } else { + /* error occurs. */ + result = PTR_ERR(subenv); } return result; } /** - * Updates the result of a top-lock operation from a result of sub-lock - * sub-operations. Top-operations like lov_lock_{enqueue,use,unuse}() iterate - * over sub-locks and lov_subresult() is used to calculate return value of a - * top-operation. To this end, possible return values of sub-operations are - * ordered as - * - * - 0 success - * - CLO_WAIT wait for event - * - CLO_REPEAT repeat top-operation - * - -ne fundamental error - * - * Top-level return code can only go down through this list. CLO_REPEAT - * overwrites CLO_WAIT, because lock mutex was released and sleeping condition - * has to be rechecked by the upper layer. - */ -static int lov_subresult(int result, int rc) -{ - int result_rank; - int rc_rank; - - LASSERTF(result <= 0 || result == CLO_REPEAT || result == CLO_WAIT, - "result = %d\n", result); - LASSERTF(rc <= 0 || rc == CLO_REPEAT || rc == CLO_WAIT, - "rc = %d\n", rc); - CLASSERT(CLO_WAIT < CLO_REPEAT); - - /* calculate ranks in the ordering above */ - result_rank = result < 0 ? 1 + CLO_REPEAT : result; - rc_rank = rc < 0 ? 1 + CLO_REPEAT : rc; - - if (result_rank < rc_rank) - result = rc; - return result; -} - -/** * Creates sub-locks for a given lov_lock for the first time. * * Goes through all sub-objects of top-object, and creates sub-locks on every @@ -286,8 +118,9 @@ static int lov_subresult(int result, int rc) * fact that top-lock (that is being created) can be accessed concurrently * through already created sub-locks (possibly shared with other top-locks). */ -static int lov_lock_sub_init(const struct lu_env *env, - struct lov_lock *lck, const struct cl_io *io) +static struct lov_lock *lov_lock_sub_init(const struct lu_env *env, + const struct cl_object *obj, + struct cl_lock *lock) { int result = 0; int i; @@ -297,241 +130,86 @@ static int lov_lock_sub_init(const struct lu_env *env, u64 file_start; u64 file_end; - struct lov_object *loo = cl2lov(lck->lls_cl.cls_obj); + struct lov_object *loo = cl2lov(obj); struct lov_layout_raid0 *r0 = lov_r0(loo); - struct cl_lock *parent = lck->lls_cl.cls_lock; + struct lov_lock *lovlck; - lck->lls_orig = parent->cll_descr; - file_start = cl_offset(lov2cl(loo), parent->cll_descr.cld_start); - file_end = cl_offset(lov2cl(loo), parent->cll_descr.cld_end + 1) - 1; + file_start = cl_offset(lov2cl(loo), lock->cll_descr.cld_start); + file_end = cl_offset(lov2cl(loo), lock->cll_descr.cld_end + 1) - 1; for (i = 0, nr = 0; i < r0->lo_nr; i++) { /* * XXX for wide striping smarter algorithm is desirable, * breaking out of the loop, early. */ - if (likely(r0->lo_sub[i]) && + if (likely(r0->lo_sub[i]) && /* spare layout */ lov_stripe_intersects(loo->lo_lsm, i, file_start, file_end, &start, &end)) nr++; } LASSERT(nr > 0); - lck->lls_sub = libcfs_kvzalloc(nr * sizeof(lck->lls_sub[0]), GFP_NOFS); - if (!lck->lls_sub) - return -ENOMEM; + lovlck = libcfs_kvzalloc(offsetof(struct lov_lock, lls_sub[nr]), + GFP_NOFS); + if (!lovlck) + return ERR_PTR(-ENOMEM); - lck->lls_nr = nr; - /* - * First, fill in sub-lock descriptions in - * lck->lls_sub[].sub_descr. They are used by lov_sublock_alloc() - * (called below in this function, and by lov_lock_enqueue()) to - * create sub-locks. At this moment, no other thread can access - * top-lock. - */ + lovlck->lls_nr = nr; for (i = 0, nr = 0; i < r0->lo_nr; ++i) { if (likely(r0->lo_sub[i]) && lov_stripe_intersects(loo->lo_lsm, i, file_start, file_end, &start, &end)) { + struct lov_lock_sub *lls = &lovlck->lls_sub[nr]; struct cl_lock_descr *descr; - descr = &lck->lls_sub[nr].sub_descr; + descr = &lls->sub_lock.cll_descr; LASSERT(!descr->cld_obj); descr->cld_obj = lovsub2cl(r0->lo_sub[i]); descr->cld_start = cl_index(descr->cld_obj, start); descr->cld_end = cl_index(descr->cld_obj, end); - descr->cld_mode = parent->cll_descr.cld_mode; - descr->cld_gid = parent->cll_descr.cld_gid; - descr->cld_enq_flags = parent->cll_descr.cld_enq_flags; - /* XXX has no effect */ - lck->lls_sub[nr].sub_got = *descr; - lck->lls_sub[nr].sub_stripe = i; + descr->cld_mode = lock->cll_descr.cld_mode; + descr->cld_gid = lock->cll_descr.cld_gid; + descr->cld_enq_flags = lock->cll_descr.cld_enq_flags; + lls->sub_stripe = i; + + /* initialize sub lock */ + result = lov_sublock_init(env, lock, lls); + if (result < 0) + break; + + lls->sub_initialized = 1; nr++; } } - LASSERT(nr == lck->lls_nr); - - /* - * Some sub-locks can be missing at this point. This is not a problem, - * because enqueue will create them anyway. Main duty of this function - * is to fill in sub-lock descriptions in a race free manner. - */ - return result; -} + LASSERT(ergo(result == 0, nr == lovlck->lls_nr)); -static int lov_sublock_release(const struct lu_env *env, struct lov_lock *lck, - int i, int deluser, int rc) -{ - struct cl_lock *parent = lck->lls_cl.cls_lock; - - LASSERT(cl_lock_is_mutexed(parent)); - - if (lck->lls_sub[i].sub_flags & LSF_HELD) { - struct cl_lock *sublock; - int dying; - - sublock = lck->lls_sub[i].sub_lock->lss_cl.cls_lock; - LASSERT(cl_lock_is_mutexed(sublock)); + if (result != 0) { + for (i = 0; i < nr; ++i) { + if (!lovlck->lls_sub[i].sub_initialized) + break; - lck->lls_sub[i].sub_flags &= ~LSF_HELD; - if (deluser) - cl_lock_user_del(env, sublock); - /* - * If the last hold is released, and cancellation is pending - * for a sub-lock, release parent mutex, to avoid keeping it - * while sub-lock is being paged out. - */ - dying = (sublock->cll_descr.cld_mode == CLM_PHANTOM || - sublock->cll_descr.cld_mode == CLM_GROUP || - (sublock->cll_flags & (CLF_CANCELPEND|CLF_DOOMED))) && - sublock->cll_holds == 1; - if (dying) - cl_lock_mutex_put(env, parent); - cl_lock_unhold(env, sublock, "lov-parent", parent); - if (dying) { - cl_lock_mutex_get(env, parent); - rc = lov_subresult(rc, CLO_REPEAT); + cl_lock_fini(env, &lovlck->lls_sub[i].sub_lock); } - /* - * From now on lck->lls_sub[i].sub_lock is a "weak" pointer, - * not backed by a reference on a - * sub-lock. lovsub_lock_delete() will clear - * lck->lls_sub[i].sub_lock under semaphores, just before - * sub-lock is destroyed. - */ + kvfree(lovlck); + lovlck = ERR_PTR(result); } - return rc; -} - -static void lov_sublock_hold(const struct lu_env *env, struct lov_lock *lck, - int i) -{ - struct cl_lock *parent = lck->lls_cl.cls_lock; - - LASSERT(cl_lock_is_mutexed(parent)); - - if (!(lck->lls_sub[i].sub_flags & LSF_HELD)) { - struct cl_lock *sublock; - - sublock = lck->lls_sub[i].sub_lock->lss_cl.cls_lock; - LASSERT(cl_lock_is_mutexed(sublock)); - LASSERT(sublock->cll_state != CLS_FREEING); - lck->lls_sub[i].sub_flags |= LSF_HELD; - - cl_lock_get_trust(sublock); - cl_lock_hold_add(env, sublock, "lov-parent", parent); - cl_lock_user_add(env, sublock); - cl_lock_put(env, sublock); - } + return lovlck; } static void lov_lock_fini(const struct lu_env *env, struct cl_lock_slice *slice) { - struct lov_lock *lck; + struct lov_lock *lovlck; int i; - lck = cl2lov_lock(slice); - LASSERT(lck->lls_nr_filled == 0); - if (lck->lls_sub) { - for (i = 0; i < lck->lls_nr; ++i) - /* - * No sub-locks exists at this point, as sub-lock has - * a reference on its parent. - */ - LASSERT(!lck->lls_sub[i].sub_lock); - kvfree(lck->lls_sub); + lovlck = cl2lov_lock(slice); + for (i = 0; i < lovlck->lls_nr; ++i) { + LASSERT(!lovlck->lls_sub[i].sub_is_enqueued); + if (lovlck->lls_sub[i].sub_initialized) + cl_lock_fini(env, &lovlck->lls_sub[i].sub_lock); } - kmem_cache_free(lov_lock_kmem, lck); -} - -static int lov_lock_enqueue_wait(const struct lu_env *env, - struct lov_lock *lck, - struct cl_lock *sublock) -{ - struct cl_lock *lock = lck->lls_cl.cls_lock; - int result; - - LASSERT(cl_lock_is_mutexed(lock)); - - cl_lock_mutex_put(env, lock); - result = cl_lock_enqueue_wait(env, sublock, 0); - cl_lock_mutex_get(env, lock); - return result ?: CLO_REPEAT; -} - -/** - * Tries to advance a state machine of a given sub-lock toward enqueuing of - * the top-lock. - * - * \retval 0 if state-transition can proceed - * \retval -ve otherwise. - */ -static int lov_lock_enqueue_one(const struct lu_env *env, struct lov_lock *lck, - struct cl_lock *sublock, - struct cl_io *io, __u32 enqflags, int last) -{ - int result; - - /* first, try to enqueue a sub-lock ... */ - result = cl_enqueue_try(env, sublock, io, enqflags); - if ((sublock->cll_state == CLS_ENQUEUED) && !(enqflags & CEF_AGL)) { - /* if it is enqueued, try to `wait' on it---maybe it's already - * granted - */ - result = cl_wait_try(env, sublock); - if (result == CLO_REENQUEUED) - result = CLO_WAIT; - } - /* - * If CEF_ASYNC flag is set, then all sub-locks can be enqueued in - * parallel, otherwise---enqueue has to wait until sub-lock is granted - * before proceeding to the next one. - */ - if ((result == CLO_WAIT) && (sublock->cll_state <= CLS_HELD) && - (enqflags & CEF_ASYNC) && (!last || (enqflags & CEF_AGL))) - result = 0; - return result; -} - -/** - * Helper function for lov_lock_enqueue() that creates missing sub-lock. - */ -static int lov_sublock_fill(const struct lu_env *env, struct cl_lock *parent, - struct cl_io *io, struct lov_lock *lck, int idx) -{ - struct lov_lock_link *link = NULL; - struct cl_lock *sublock; - int result; - - LASSERT(parent->cll_depth == 1); - cl_lock_mutex_put(env, parent); - sublock = lov_sublock_alloc(env, io, lck, idx, &link); - if (!IS_ERR(sublock)) - cl_lock_mutex_get(env, sublock); - cl_lock_mutex_get(env, parent); - - if (!IS_ERR(sublock)) { - cl_lock_get_trust(sublock); - if (parent->cll_state == CLS_QUEUING && - !lck->lls_sub[idx].sub_lock) { - lov_sublock_adopt(env, lck, sublock, idx, link); - } else { - kmem_cache_free(lov_lock_link_kmem, link); - /* other thread allocated sub-lock, or enqueue is no - * longer going on - */ - cl_lock_mutex_put(env, parent); - cl_lock_unhold(env, sublock, "lov-parent", parent); - cl_lock_mutex_get(env, parent); - } - cl_lock_mutex_put(env, sublock); - cl_lock_put(env, sublock); - result = CLO_REPEAT; - } else - result = PTR_ERR(sublock); - return result; + kvfree(lovlck); } /** @@ -543,529 +221,59 @@ static int lov_sublock_fill(const struct lu_env *env, struct cl_lock *parent, */ static int lov_lock_enqueue(const struct lu_env *env, const struct cl_lock_slice *slice, - struct cl_io *io, __u32 enqflags) + struct cl_io *io, struct cl_sync_io *anchor) { - struct cl_lock *lock = slice->cls_lock; - struct lov_lock *lck = cl2lov_lock(slice); - struct cl_lock_closure *closure = lov_closure_get(env, lock); + struct cl_lock *lock = slice->cls_lock; + struct lov_lock *lovlck = cl2lov_lock(slice); int i; - int result; - enum cl_lock_state minstate; + int rc = 0; - for (result = 0, minstate = CLS_FREEING, i = 0; i < lck->lls_nr; ++i) { - int rc; - struct lovsub_lock *sub; - struct lov_lock_sub *lls; - struct cl_lock *sublock; + for (i = 0; i < lovlck->lls_nr; ++i) { + struct lov_lock_sub *lls = &lovlck->lls_sub[i]; struct lov_sublock_env *subenv; - if (lock->cll_state != CLS_QUEUING) { - /* - * Lock might have left QUEUING state if previous - * iteration released its mutex. Stop enqueing in this - * case and let the upper layer to decide what to do. - */ - LASSERT(i > 0 && result != 0); - break; - } - - lls = &lck->lls_sub[i]; - sub = lls->sub_lock; - /* - * Sub-lock might have been canceled, while top-lock was - * cached. - */ - if (!sub) { - result = lov_sublock_fill(env, lock, io, lck, i); - /* lov_sublock_fill() released @lock mutex, - * restart. - */ + subenv = lov_sublock_env_get(env, lock, lls); + if (IS_ERR(subenv)) { + rc = PTR_ERR(subenv); break; } - sublock = sub->lss_cl.cls_lock; - rc = lov_sublock_lock(env, lck, lls, closure, &subenv); - if (rc == 0) { - lov_sublock_hold(env, lck, i); - rc = lov_lock_enqueue_one(subenv->lse_env, lck, sublock, - subenv->lse_io, enqflags, - i == lck->lls_nr - 1); - minstate = min(minstate, sublock->cll_state); - if (rc == CLO_WAIT) { - switch (sublock->cll_state) { - case CLS_QUEUING: - /* take recursive mutex, the lock is - * released in lov_lock_enqueue_wait. - */ - cl_lock_mutex_get(env, sublock); - lov_sublock_unlock(env, sub, closure, - subenv); - rc = lov_lock_enqueue_wait(env, lck, - sublock); - break; - case CLS_CACHED: - cl_lock_get(sublock); - /* take recursive mutex of sublock */ - cl_lock_mutex_get(env, sublock); - /* need to release all locks in closure - * otherwise it may deadlock. LU-2683. - */ - lov_sublock_unlock(env, sub, closure, - subenv); - /* sublock and parent are held. */ - rc = lov_sublock_release(env, lck, i, - 1, rc); - cl_lock_mutex_put(env, sublock); - cl_lock_put(env, sublock); - break; - default: - lov_sublock_unlock(env, sub, closure, - subenv); - break; - } - } else { - LASSERT(!sublock->cll_conflict); - lov_sublock_unlock(env, sub, closure, subenv); - } - } - result = lov_subresult(result, rc); - if (result != 0) + rc = cl_lock_enqueue(subenv->lse_env, subenv->lse_io, + &lls->sub_lock, anchor); + lov_sublock_env_put(subenv); + if (rc != 0) break; - } - cl_lock_closure_fini(closure); - return result ?: minstate >= CLS_ENQUEUED ? 0 : CLO_WAIT; -} - -static int lov_lock_unuse(const struct lu_env *env, - const struct cl_lock_slice *slice) -{ - struct lov_lock *lck = cl2lov_lock(slice); - struct cl_lock_closure *closure = lov_closure_get(env, slice->cls_lock); - int i; - int result; - - for (result = 0, i = 0; i < lck->lls_nr; ++i) { - int rc; - struct lovsub_lock *sub; - struct cl_lock *sublock; - struct lov_lock_sub *lls; - struct lov_sublock_env *subenv; - /* top-lock state cannot change concurrently, because single - * thread (one that released the last hold) carries unlocking - * to the completion. - */ - LASSERT(slice->cls_lock->cll_state == CLS_INTRANSIT); - lls = &lck->lls_sub[i]; - sub = lls->sub_lock; - if (!sub) - continue; - - sublock = sub->lss_cl.cls_lock; - rc = lov_sublock_lock(env, lck, lls, closure, &subenv); - if (rc == 0) { - if (lls->sub_flags & LSF_HELD) { - LASSERT(sublock->cll_state == CLS_HELD || - sublock->cll_state == CLS_ENQUEUED); - rc = cl_unuse_try(subenv->lse_env, sublock); - rc = lov_sublock_release(env, lck, i, 0, rc); - } - lov_sublock_unlock(env, sub, closure, subenv); - } - result = lov_subresult(result, rc); + lls->sub_is_enqueued = 1; } - - if (result == 0 && lck->lls_cancel_race) { - lck->lls_cancel_race = 0; - result = -ESTALE; - } - cl_lock_closure_fini(closure); - return result; + return rc; } static void lov_lock_cancel(const struct lu_env *env, const struct cl_lock_slice *slice) { - struct lov_lock *lck = cl2lov_lock(slice); - struct cl_lock_closure *closure = lov_closure_get(env, slice->cls_lock); + struct cl_lock *lock = slice->cls_lock; + struct lov_lock *lovlck = cl2lov_lock(slice); int i; - int result; - for (result = 0, i = 0; i < lck->lls_nr; ++i) { - int rc; - struct lovsub_lock *sub; - struct cl_lock *sublock; - struct lov_lock_sub *lls; + for (i = 0; i < lovlck->lls_nr; ++i) { + struct lov_lock_sub *lls = &lovlck->lls_sub[i]; + struct cl_lock *sublock = &lls->sub_lock; struct lov_sublock_env *subenv; - /* top-lock state cannot change concurrently, because single - * thread (one that released the last hold) carries unlocking - * to the completion. - */ - lls = &lck->lls_sub[i]; - sub = lls->sub_lock; - if (!sub) - continue; - - sublock = sub->lss_cl.cls_lock; - rc = lov_sublock_lock(env, lck, lls, closure, &subenv); - if (rc == 0) { - if (!(lls->sub_flags & LSF_HELD)) { - lov_sublock_unlock(env, sub, closure, subenv); - continue; - } - - switch (sublock->cll_state) { - case CLS_HELD: - rc = cl_unuse_try(subenv->lse_env, sublock); - lov_sublock_release(env, lck, i, 0, 0); - break; - default: - lov_sublock_release(env, lck, i, 1, 0); - break; - } - lov_sublock_unlock(env, sub, closure, subenv); - } - - if (rc == CLO_REPEAT) { - --i; - continue; - } - - result = lov_subresult(result, rc); - } - - if (result) - CL_LOCK_DEBUG(D_ERROR, env, slice->cls_lock, - "lov_lock_cancel fails with %d.\n", result); - - cl_lock_closure_fini(closure); -} - -static int lov_lock_wait(const struct lu_env *env, - const struct cl_lock_slice *slice) -{ - struct lov_lock *lck = cl2lov_lock(slice); - struct cl_lock_closure *closure = lov_closure_get(env, slice->cls_lock); - enum cl_lock_state minstate; - int reenqueued; - int result; - int i; - -again: - for (result = 0, minstate = CLS_FREEING, i = 0, reenqueued = 0; - i < lck->lls_nr; ++i) { - int rc; - struct lovsub_lock *sub; - struct cl_lock *sublock; - struct lov_lock_sub *lls; - struct lov_sublock_env *subenv; - - lls = &lck->lls_sub[i]; - sub = lls->sub_lock; - sublock = sub->lss_cl.cls_lock; - rc = lov_sublock_lock(env, lck, lls, closure, &subenv); - if (rc == 0) { - LASSERT(sublock->cll_state >= CLS_ENQUEUED); - if (sublock->cll_state < CLS_HELD) - rc = cl_wait_try(env, sublock); - - minstate = min(minstate, sublock->cll_state); - lov_sublock_unlock(env, sub, closure, subenv); - } - if (rc == CLO_REENQUEUED) { - reenqueued++; - rc = 0; - } - result = lov_subresult(result, rc); - if (result != 0) - break; - } - /* Each sublock only can be reenqueued once, so will not loop - * forever. - */ - if (result == 0 && reenqueued != 0) - goto again; - cl_lock_closure_fini(closure); - return result ?: minstate >= CLS_HELD ? 0 : CLO_WAIT; -} - -static int lov_lock_use(const struct lu_env *env, - const struct cl_lock_slice *slice) -{ - struct lov_lock *lck = cl2lov_lock(slice); - struct cl_lock_closure *closure = lov_closure_get(env, slice->cls_lock); - int result; - int i; - - LASSERT(slice->cls_lock->cll_state == CLS_INTRANSIT); - - for (result = 0, i = 0; i < lck->lls_nr; ++i) { - int rc; - struct lovsub_lock *sub; - struct cl_lock *sublock; - struct lov_lock_sub *lls; - struct lov_sublock_env *subenv; - - LASSERT(slice->cls_lock->cll_state == CLS_INTRANSIT); - - lls = &lck->lls_sub[i]; - sub = lls->sub_lock; - if (!sub) { - /* - * Sub-lock might have been canceled, while top-lock was - * cached. - */ - result = -ESTALE; - break; - } - - sublock = sub->lss_cl.cls_lock; - rc = lov_sublock_lock(env, lck, lls, closure, &subenv); - if (rc == 0) { - LASSERT(sublock->cll_state != CLS_FREEING); - lov_sublock_hold(env, lck, i); - if (sublock->cll_state == CLS_CACHED) { - rc = cl_use_try(subenv->lse_env, sublock, 0); - if (rc != 0) - rc = lov_sublock_release(env, lck, - i, 1, rc); - } else if (sublock->cll_state == CLS_NEW) { - /* Sub-lock might have been canceled, while - * top-lock was cached. - */ - result = -ESTALE; - lov_sublock_release(env, lck, i, 1, result); - } - lov_sublock_unlock(env, sub, closure, subenv); - } - result = lov_subresult(result, rc); - if (result != 0) - break; - } - - if (lck->lls_cancel_race) { - /* - * If there is unlocking happened at the same time, then - * sublock_lock state should be FREEING, and lov_sublock_lock - * should return CLO_REPEAT. In this case, it should return - * ESTALE, and up layer should reset the lock state to be NEW. - */ - lck->lls_cancel_race = 0; - LASSERT(result != 0); - result = -ESTALE; - } - cl_lock_closure_fini(closure); - return result; -} - -/** - * Check if the extent region \a descr is covered by \a child against the - * specific \a stripe. - */ -static int lov_lock_stripe_is_matching(const struct lu_env *env, - struct lov_object *lov, int stripe, - const struct cl_lock_descr *child, - const struct cl_lock_descr *descr) -{ - struct lov_stripe_md *lsm = lov->lo_lsm; - u64 start; - u64 end; - int result; - - if (lov_r0(lov)->lo_nr == 1) - return cl_lock_ext_match(child, descr); - - /* - * For a multi-stripes object: - * - make sure the descr only covers child's stripe, and - * - check if extent is matching. - */ - start = cl_offset(&lov->lo_cl, descr->cld_start); - end = cl_offset(&lov->lo_cl, descr->cld_end + 1) - 1; - result = 0; - /* glimpse should work on the object with LOV EA hole. */ - if (end - start <= lsm->lsm_stripe_size) { - int idx; - - idx = lov_stripe_number(lsm, start); - if (idx == stripe || - unlikely(!lov_r0(lov)->lo_sub[idx])) { - idx = lov_stripe_number(lsm, end); - if (idx == stripe || - unlikely(!lov_r0(lov)->lo_sub[idx])) - result = 1; - } - } - - if (result != 0) { - struct cl_lock_descr *subd = &lov_env_info(env)->lti_ldescr; - u64 sub_start; - u64 sub_end; - - subd->cld_obj = NULL; /* don't need sub object at all */ - subd->cld_mode = descr->cld_mode; - subd->cld_gid = descr->cld_gid; - result = lov_stripe_intersects(lsm, stripe, start, end, - &sub_start, &sub_end); - LASSERT(result); - subd->cld_start = cl_index(child->cld_obj, sub_start); - subd->cld_end = cl_index(child->cld_obj, sub_end); - result = cl_lock_ext_match(child, subd); - } - return result; -} - -/** - * An implementation of cl_lock_operations::clo_fits_into() method. - * - * Checks whether a lock (given by \a slice) is suitable for \a - * io. Multi-stripe locks can be used only for "quick" io, like truncate, or - * O_APPEND write. - * - * \see ccc_lock_fits_into(). - */ -static int lov_lock_fits_into(const struct lu_env *env, - const struct cl_lock_slice *slice, - const struct cl_lock_descr *need, - const struct cl_io *io) -{ - struct lov_lock *lov = cl2lov_lock(slice); - struct lov_object *obj = cl2lov(slice->cls_obj); - int result; - - LASSERT(cl_object_same(need->cld_obj, slice->cls_obj)); - LASSERT(lov->lls_nr > 0); - - /* for top lock, it's necessary to match enq flags otherwise it will - * run into problem if a sublock is missing and reenqueue. - */ - if (need->cld_enq_flags != lov->lls_orig.cld_enq_flags) - return 0; - - if (need->cld_mode == CLM_GROUP) - /* - * always allow to match group lock. - */ - result = cl_lock_ext_match(&lov->lls_orig, need); - else if (lov->lls_nr == 1) { - struct cl_lock_descr *got = &lov->lls_sub[0].sub_got; - - result = lov_lock_stripe_is_matching(env, - cl2lov(slice->cls_obj), - lov->lls_sub[0].sub_stripe, - got, need); - } else if (io->ci_type != CIT_SETATTR && io->ci_type != CIT_MISC && - !cl_io_is_append(io) && need->cld_mode != CLM_PHANTOM) - /* - * Multi-stripe locks are only suitable for `quick' IO and for - * glimpse. - */ - result = 0; - else - /* - * Most general case: multi-stripe existing lock, and - * (potentially) multi-stripe @need lock. Check that @need is - * covered by @lov's sub-locks. - * - * For now, ignore lock expansions made by the server, and - * match against original lock extent. - */ - result = cl_lock_ext_match(&lov->lls_orig, need); - CDEBUG(D_DLMTRACE, DDESCR"/"DDESCR" %d %d/%d: %d\n", - PDESCR(&lov->lls_orig), PDESCR(&lov->lls_sub[0].sub_got), - lov->lls_sub[0].sub_stripe, lov->lls_nr, lov_r0(obj)->lo_nr, - result); - return result; -} - -void lov_lock_unlink(const struct lu_env *env, - struct lov_lock_link *link, struct lovsub_lock *sub) -{ - struct lov_lock *lck = link->lll_super; - struct cl_lock *parent = lck->lls_cl.cls_lock; - - LASSERT(cl_lock_is_mutexed(parent)); - LASSERT(cl_lock_is_mutexed(sub->lss_cl.cls_lock)); - - list_del_init(&link->lll_list); - LASSERT(lck->lls_sub[link->lll_idx].sub_lock == sub); - /* yank this sub-lock from parent's array */ - lck->lls_sub[link->lll_idx].sub_lock = NULL; - LASSERT(lck->lls_nr_filled > 0); - lck->lls_nr_filled--; - lu_ref_del(&parent->cll_reference, "lov-child", sub->lss_cl.cls_lock); - cl_lock_put(env, parent); - kmem_cache_free(lov_lock_link_kmem, link); -} - -struct lov_lock_link *lov_lock_link_find(const struct lu_env *env, - struct lov_lock *lck, - struct lovsub_lock *sub) -{ - struct lov_lock_link *scan; - - LASSERT(cl_lock_is_mutexed(sub->lss_cl.cls_lock)); - - list_for_each_entry(scan, &sub->lss_parents, lll_list) { - if (scan->lll_super == lck) - return scan; - } - return NULL; -} - -/** - * An implementation of cl_lock_operations::clo_delete() method. This is - * invoked for "top-to-bottom" delete, when lock destruction starts from the - * top-lock, e.g., as a result of inode destruction. - * - * Unlinks top-lock from all its sub-locks. Sub-locks are not deleted there: - * this is done separately elsewhere: - * - * - for inode destruction, lov_object_delete() calls cl_object_kill() for - * each sub-object, purging its locks; - * - * - in other cases (e.g., a fatal error with a top-lock) sub-locks are - * left in the cache. - */ -static void lov_lock_delete(const struct lu_env *env, - const struct cl_lock_slice *slice) -{ - struct lov_lock *lck = cl2lov_lock(slice); - struct cl_lock_closure *closure = lov_closure_get(env, slice->cls_lock); - struct lov_lock_link *link; - int rc; - int i; - - LASSERT(slice->cls_lock->cll_state == CLS_FREEING); - - for (i = 0; i < lck->lls_nr; ++i) { - struct lov_lock_sub *lls = &lck->lls_sub[i]; - struct lovsub_lock *lsl = lls->sub_lock; - - if (!lsl) /* already removed */ + if (!lls->sub_is_enqueued) continue; - rc = lov_sublock_lock(env, lck, lls, closure, NULL); - if (rc == CLO_REPEAT) { - --i; - continue; + lls->sub_is_enqueued = 0; + subenv = lov_sublock_env_get(env, lock, lls); + if (!IS_ERR(subenv)) { + cl_lock_cancel(subenv->lse_env, sublock); + lov_sublock_env_put(subenv); + } else { + CL_LOCK_DEBUG(D_ERROR, env, slice->cls_lock, + "lov_lock_cancel fails with %ld.\n", + PTR_ERR(subenv)); } - - LASSERT(rc == 0); - LASSERT(lsl->lss_cl.cls_lock->cll_state < CLS_FREEING); - - if (lls->sub_flags & LSF_HELD) - lov_sublock_release(env, lck, i, 1, 0); - - link = lov_lock_link_find(env, lck, lsl); - LASSERT(link); - lov_lock_unlink(env, link, lsl); - LASSERT(!lck->lls_sub[i].sub_lock); - - lov_sublock_unlock(env, lsl, closure, NULL); } - - cl_lock_closure_fini(closure); } static int lov_lock_print(const struct lu_env *env, void *cookie, @@ -1079,12 +287,8 @@ static int lov_lock_print(const struct lu_env *env, void *cookie, struct lov_lock_sub *sub; sub = &lck->lls_sub[i]; - (*p)(env, cookie, " %d %x: ", i, sub->sub_flags); - if (sub->sub_lock) - cl_lock_print(env, cookie, p, - sub->sub_lock->lss_cl.cls_lock); - else - (*p)(env, cookie, "---\n"); + (*p)(env, cookie, " %d %x: ", i, sub->sub_is_enqueued); + cl_lock_print(env, cookie, p, &sub->sub_lock); } return 0; } @@ -1092,12 +296,7 @@ static int lov_lock_print(const struct lu_env *env, void *cookie, static const struct cl_lock_operations lov_lock_ops = { .clo_fini = lov_lock_fini, .clo_enqueue = lov_lock_enqueue, - .clo_wait = lov_lock_wait, - .clo_use = lov_lock_use, - .clo_unuse = lov_lock_unuse, .clo_cancel = lov_lock_cancel, - .clo_fits_into = lov_lock_fits_into, - .clo_delete = lov_lock_delete, .clo_print = lov_lock_print }; @@ -1105,14 +304,13 @@ int lov_lock_init_raid0(const struct lu_env *env, struct cl_object *obj, struct cl_lock *lock, const struct cl_io *io) { struct lov_lock *lck; - int result; + int result = 0; - lck = kmem_cache_zalloc(lov_lock_kmem, GFP_NOFS); - if (lck) { + lck = lov_lock_sub_init(env, obj, lock); + if (!IS_ERR(lck)) cl_lock_slice_add(lock, &lck->lls_cl, obj, &lov_lock_ops); - result = lov_lock_sub_init(env, lck, io); - } else - result = -ENOMEM; + else + result = PTR_ERR(lck); return result; } @@ -1147,21 +345,9 @@ int lov_lock_init_empty(const struct lu_env *env, struct cl_object *obj, lck = kmem_cache_zalloc(lov_lock_kmem, GFP_NOFS); if (lck) { cl_lock_slice_add(lock, &lck->lls_cl, obj, &lov_empty_lock_ops); - lck->lls_orig = lock->cll_descr; result = 0; } return result; } -static struct cl_lock_closure *lov_closure_get(const struct lu_env *env, - struct cl_lock *parent) -{ - struct cl_lock_closure *closure; - - closure = &lov_env_info(env)->lti_closure; - LASSERT(list_empty(&closure->clc_list)); - cl_lock_closure_init(env, closure, parent, 1); - return closure; -} - /** @} lov */ diff --git a/drivers/staging/lustre/lustre/lov/lov_merge.c b/drivers/staging/lustre/lustre/lov/lov_merge.c index 029cd4d62796..b9c90865fdfc 100644 --- a/drivers/staging/lustre/lustre/lov/lov_merge.c +++ b/drivers/staging/lustre/lustre/lov/lov_merge.c @@ -15,11 +15,7 @@ * * You should have received a copy of the GNU General Public License * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. + * http://www.gnu.org/licenses/gpl-2.0.html * * GPL HEADER END */ @@ -154,6 +150,7 @@ void lov_merge_attrs(struct obdo *tgt, struct obdo *src, u64 valid, valid &= src->o_valid; if (*set) { + tgt->o_valid &= valid; if (valid & OBD_MD_FLSIZE) { /* this handles sparse files properly */ u64 lov_size; @@ -172,12 +169,22 @@ void lov_merge_attrs(struct obdo *tgt, struct obdo *src, u64 valid, tgt->o_mtime = src->o_mtime; if (valid & OBD_MD_FLDATAVERSION) tgt->o_data_version += src->o_data_version; + + /* handle flags */ + if (valid & OBD_MD_FLFLAGS) + tgt->o_flags &= src->o_flags; + else + tgt->o_flags = 0; } else { memcpy(tgt, src, sizeof(*tgt)); tgt->o_oi = lsm->lsm_oi; + tgt->o_valid = valid; if (valid & OBD_MD_FLSIZE) tgt->o_size = lov_stripe_size(lsm, src->o_size, stripeno); + tgt->o_flags = 0; + if (valid & OBD_MD_FLFLAGS) + tgt->o_flags = src->o_flags; } /* data_version needs to be valid on all stripes to be correct! */ diff --git a/drivers/staging/lustre/lustre/lov/lov_obd.c b/drivers/staging/lustre/lustre/lov/lov_obd.c index 5daa7faf4dda..9b92d5522edb 100644 --- a/drivers/staging/lustre/lustre/lov/lov_obd.c +++ b/drivers/staging/lustre/lustre/lov/lov_obd.c @@ -15,11 +15,7 @@ * * You should have received a copy of the GNU General Public License * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. + * http://www.gnu.org/licenses/gpl-2.0.html * * GPL HEADER END */ @@ -54,7 +50,6 @@ #include "../include/lprocfs_status.h" #include "../include/lustre_param.h" #include "../include/cl_object.h" -#include "../include/lclient.h" /* for cl_client_lru */ #include "../include/lustre/ll_fiemap.h" #include "../include/lustre_fid.h" @@ -124,7 +119,6 @@ static int lov_set_osc_active(struct obd_device *obd, struct obd_uuid *uuid, static int lov_notify(struct obd_device *obd, struct obd_device *watched, enum obd_notify_event ev, void *data); -#define MAX_STRING_SIZE 128 int lov_connect_obd(struct obd_device *obd, __u32 index, int activate, struct obd_connect_data *data) { @@ -898,6 +892,12 @@ static int lov_cleanup(struct obd_device *obd) kfree(lov->lov_tgts); lov->lov_tgt_size = 0; } + + if (lov->lov_cache) { + cl_cache_decref(lov->lov_cache); + lov->lov_cache = NULL; + } + return 0; } @@ -965,7 +965,6 @@ int lov_process_config_base(struct obd_device *obd, struct lustre_cfg *lcfg, CERROR("Unknown command: %d\n", lcfg->lcfg_command); rc = -EINVAL; goto out; - } } out: @@ -1734,6 +1733,27 @@ static int lov_fiemap(struct lov_obd *lov, __u32 keylen, void *key, unsigned int buffer_size = FIEMAP_BUFFER_SIZE; if (!lsm_has_objects(lsm)) { + if (lsm && lsm_is_released(lsm) && (fm_key->fiemap.fm_start < + fm_key->oa.o_size)) { + /* + * released file, return a minimal FIEMAP if + * request fits in file-size. + */ + fiemap->fm_mapped_extents = 1; + fiemap->fm_extents[0].fe_logical = + fm_key->fiemap.fm_start; + if (fm_key->fiemap.fm_start + fm_key->fiemap.fm_length < + fm_key->oa.o_size) { + fiemap->fm_extents[0].fe_length = + fm_key->fiemap.fm_length; + } else { + fiemap->fm_extents[0].fe_length = + fm_key->oa.o_size - fm_key->fiemap.fm_start; + fiemap->fm_extents[0].fe_flags |= + (FIEMAP_EXTENT_UNKNOWN | + FIEMAP_EXTENT_LAST); + } + } rc = 0; goto out; } @@ -1754,7 +1774,8 @@ static int lov_fiemap(struct lov_obd *lov, __u32 keylen, void *key, fm_start = fiemap->fm_start; fm_length = fiemap->fm_length; /* Calculate start stripe, last stripe and length of mapping */ - actual_start_stripe = start_stripe = lov_stripe_number(lsm, fm_start); + start_stripe = lov_stripe_number(lsm, fm_start); + actual_start_stripe = start_stripe; fm_end = (fm_length == ~0ULL ? fm_key->oa.o_size : fm_start + fm_length - 1); /* If fm_length != ~0ULL but fm_start+fm_length-1 exceeds file size */ @@ -2077,11 +2098,9 @@ static int lov_set_info_async(const struct lu_env *env, struct obd_export *exp, u32 count; int i, rc = 0, err; struct lov_tgt_desc *tgt; - unsigned incr, check_uuid, - do_inactive, no_set; - unsigned next_id = 0, mds_con = 0; + unsigned int incr = 0, check_uuid = 0, do_inactive = 0, no_set = 0; + unsigned int next_id = 0, mds_con = 0; - incr = check_uuid = do_inactive = no_set = 0; if (!set) { no_set = 1; set = ptlrpc_prep_set(); @@ -2108,6 +2127,7 @@ static int lov_set_info_async(const struct lu_env *env, struct obd_export *exp, LASSERT(!lov->lov_cache); lov->lov_cache = val; do_inactive = 1; + cl_cache_incref(lov->lov_cache); } for (i = 0; i < count; i++, val = (char *)val + incr) { @@ -2173,7 +2193,6 @@ void lov_stripe_lock(struct lov_stripe_md *md) LASSERT(md->lsm_lock_owner == 0); md->lsm_lock_owner = current_pid(); } -EXPORT_SYMBOL(lov_stripe_lock); void lov_stripe_unlock(struct lov_stripe_md *md) __releases(&md->lsm_lock) @@ -2182,7 +2201,6 @@ void lov_stripe_unlock(struct lov_stripe_md *md) md->lsm_lock_owner = 0; spin_unlock(&md->lsm_lock); } -EXPORT_SYMBOL(lov_stripe_unlock); static int lov_quotactl(struct obd_device *obd, struct obd_export *exp, struct obd_quotactl *oqctl) diff --git a/drivers/staging/lustre/lustre/lov/lov_object.c b/drivers/staging/lustre/lustre/lov/lov_object.c index 1f8ed95a6d89..f9621b0fd469 100644 --- a/drivers/staging/lustre/lustre/lov/lov_object.c +++ b/drivers/staging/lustre/lustre/lov/lov_object.c @@ -15,11 +15,7 @@ * * You should have received a copy of the GNU General Public License * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. + * http://www.gnu.org/licenses/gpl-2.0.html * * GPL HEADER END */ @@ -67,7 +63,7 @@ struct lov_layout_operations { int (*llo_print)(const struct lu_env *env, void *cookie, lu_printer_t p, const struct lu_object *o); int (*llo_page_init)(const struct lu_env *env, struct cl_object *obj, - struct cl_page *page, struct page *vmpage); + struct cl_page *page, pgoff_t index); int (*llo_lock_init)(const struct lu_env *env, struct cl_object *obj, struct cl_lock *lock, const struct cl_io *io); @@ -185,14 +181,26 @@ static int lov_init_sub(const struct lu_env *env, struct lov_object *lov, } LU_OBJECT_DEBUG(mask, env, &stripe->co_lu, - "stripe %d is already owned.\n", idx); - LU_OBJECT_DEBUG(mask, env, old_obj, "owned.\n"); + "stripe %d is already owned.", idx); + LU_OBJECT_DEBUG(mask, env, old_obj, "owned."); LU_OBJECT_HEADER(mask, env, lov2lu(lov), "try to own.\n"); cl_object_put(env, stripe); } return result; } +static int lov_page_slice_fixup(struct lov_object *lov, + struct cl_object *stripe) +{ + struct cl_object_header *hdr = cl_object_header(&lov->lo_cl); + struct cl_object *o; + + cl_object_for_each(o, stripe) + o->co_slice_off += hdr->coh_page_bufsize; + + return cl_object_header(stripe)->coh_page_bufsize; +} + static int lov_init_raid0(const struct lu_env *env, struct lov_device *dev, struct lov_object *lov, const struct cl_object_conf *conf, @@ -222,6 +230,8 @@ static int lov_init_raid0(const struct lu_env *env, r0->lo_sub = libcfs_kvzalloc(r0->lo_nr * sizeof(r0->lo_sub[0]), GFP_NOFS); if (r0->lo_sub) { + int psz = 0; + result = 0; subconf->coc_inode = conf->coc_inode; spin_lock_init(&r0->lo_sub_lock); @@ -254,13 +264,24 @@ static int lov_init_raid0(const struct lu_env *env, if (result == -EAGAIN) { /* try again */ --i; result = 0; + continue; } } else { result = PTR_ERR(stripe); } + + if (result == 0) { + int sz = lov_page_slice_fixup(lov, stripe); + + LASSERT(ergo(psz > 0, psz == sz)); + psz = sz; + } } - } else + if (result == 0) + cl_object_header(&lov->lo_cl)->coh_page_bufsize += psz; + } else { result = -ENOMEM; + } out: return result; } @@ -286,8 +307,6 @@ static int lov_delete_empty(const struct lu_env *env, struct lov_object *lov, LASSERT(lov->lo_type == LLT_EMPTY || lov->lo_type == LLT_RELEASED); lov_layout_wait(env, lov); - - cl_object_prune(env, &lov->lo_cl); return 0; } @@ -355,7 +374,7 @@ static int lov_delete_raid0(const struct lu_env *env, struct lov_object *lov, struct lovsub_object *los = r0->lo_sub[i]; if (los) { - cl_locks_prune(env, &los->lso_cl, 1); + cl_object_prune(env, &los->lso_cl); /* * If top-level object is to be evicted from * the cache, so are its sub-objects. @@ -364,7 +383,6 @@ static int lov_delete_raid0(const struct lu_env *env, struct lov_object *lov, } } } - cl_object_prune(env, &lov->lo_cl); return 0; } @@ -666,7 +684,6 @@ static int lov_layout_change(const struct lu_env *unused, const struct lov_layout_operations *old_ops; const struct lov_layout_operations *new_ops; - struct cl_object_header *hdr = cl_object_header(&lov->lo_cl); void *cookie; struct lu_env *env; int refcheck; @@ -691,13 +708,15 @@ static int lov_layout_change(const struct lu_env *unused, old_ops = &lov_dispatch[lov->lo_type]; new_ops = &lov_dispatch[llt]; + result = cl_object_prune(env, &lov->lo_cl); + if (result != 0) + goto out; + result = old_ops->llo_delete(env, lov, &lov->u); if (result == 0) { old_ops->llo_fini(env, lov, &lov->u); LASSERT(atomic_read(&lov->lo_active_ios) == 0); - LASSERT(!hdr->coh_tree.rnode); - LASSERT(hdr->coh_pages == 0); lov->lo_type = LLT_EMPTY; result = new_ops->llo_init(env, @@ -713,6 +732,7 @@ static int lov_layout_change(const struct lu_env *unused, } } +out: cl_env_put(env, &refcheck); cl_env_reexit(cookie); return result; @@ -793,7 +813,8 @@ static int lov_conf_set(const struct lu_env *env, struct cl_object *obj, goto out; } - lov->lo_layout_invalid = lov_layout_change(env, lov, conf); + result = lov_layout_change(env, lov, conf); + lov->lo_layout_invalid = result != 0; out: lov_conf_unlock(lov); @@ -825,10 +846,10 @@ static int lov_object_print(const struct lu_env *env, void *cookie, } int lov_page_init(const struct lu_env *env, struct cl_object *obj, - struct cl_page *page, struct page *vmpage) + struct cl_page *page, pgoff_t index) { - return LOV_2DISPATCH_NOLOCK(cl2lov(obj), - llo_page_init, env, obj, page, vmpage); + return LOV_2DISPATCH_NOLOCK(cl2lov(obj), llo_page_init, env, obj, page, + index); } /** @@ -911,8 +932,9 @@ struct lu_object *lov_object_alloc(const struct lu_env *env, * for object with different layouts. */ obj->lo_ops = &lov_lu_obj_ops; - } else + } else { obj = NULL; + } return obj; } diff --git a/drivers/staging/lustre/lustre/lov/lov_offset.c b/drivers/staging/lustre/lustre/lov/lov_offset.c index ae83eb0f6f36..ecca74fbff00 100644 --- a/drivers/staging/lustre/lustre/lov/lov_offset.c +++ b/drivers/staging/lustre/lustre/lov/lov_offset.c @@ -15,11 +15,7 @@ * * You should have received a copy of the GNU General Public License * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. + * http://www.gnu.org/licenses/gpl-2.0.html * * GPL HEADER END */ @@ -66,6 +62,18 @@ u64 lov_stripe_size(struct lov_stripe_md *lsm, u64 ost_size, int stripeno) return lov_size; } +/** + * Compute file level page index by stripe level page offset + */ +pgoff_t lov_stripe_pgoff(struct lov_stripe_md *lsm, pgoff_t stripe_index, + int stripe) +{ + loff_t offset; + + offset = lov_stripe_size(lsm, (stripe_index << PAGE_SHIFT) + 1, stripe); + return offset >> PAGE_SHIFT; +} + /* we have an offset in file backed by an lov and want to find out where * that offset lands in our given stripe of the file. for the easy * case where the offset is within the stripe, we just have to scale the diff --git a/drivers/staging/lustre/lustre/lov/lov_pack.c b/drivers/staging/lustre/lustre/lov/lov_pack.c index 3925633a99ec..869ef41b13ca 100644 --- a/drivers/staging/lustre/lustre/lov/lov_pack.c +++ b/drivers/staging/lustre/lustre/lov/lov_pack.c @@ -15,11 +15,7 @@ * * You should have received a copy of the GNU General Public License * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. + * http://www.gnu.org/licenses/gpl-2.0.html * * GPL HEADER END */ @@ -136,7 +132,6 @@ int lov_packmd(struct obd_export *exp, struct lov_mds_md **lmmp, CERROR("bad mem LOV MAGIC: 0x%08X != 0x%08X nor 0x%08X\n", lmm_magic, LOV_MAGIC_V1, LOV_MAGIC_V3); return -EINVAL; - } if (lsm) { @@ -444,8 +439,7 @@ int lov_getstripe(struct obd_export *exp, struct lov_stripe_md *lsm, if (lum.lmm_magic == LOV_USER_MAGIC) { /* User request for v1, we need skip lmm_pool_name */ if (lmmk->lmm_magic == LOV_MAGIC_V3) { - memmove((char *)(&lmmk->lmm_stripe_count) + - sizeof(lmmk->lmm_stripe_count), + memmove(((struct lov_mds_md_v1 *)lmmk)->lmm_objects, ((struct lov_mds_md_v3 *)lmmk)->lmm_objects, lmmk->lmm_stripe_count * sizeof(struct lov_ost_data_v1)); @@ -457,9 +451,9 @@ int lov_getstripe(struct obd_export *exp, struct lov_stripe_md *lsm, } /* User wasn't expecting this many OST entries */ - if (lum.lmm_stripe_count == 0) + if (lum.lmm_stripe_count == 0) { lmm_size = lum_size; - else if (lum.lmm_stripe_count < lmmk->lmm_stripe_count) { + } else if (lum.lmm_stripe_count < lmmk->lmm_stripe_count) { rc = -EOVERFLOW; goto out_set; } diff --git a/drivers/staging/lustre/lustre/lov/lov_page.c b/drivers/staging/lustre/lustre/lov/lov_page.c index fdcaf8047ad8..c17026f14896 100644 --- a/drivers/staging/lustre/lustre/lov/lov_page.c +++ b/drivers/staging/lustre/lustre/lov/lov_page.c @@ -15,11 +15,7 @@ * * You should have received a copy of the GNU General Public License * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. + * http://www.gnu.org/licenses/gpl-2.0.html * * GPL HEADER END */ @@ -36,6 +32,7 @@ * Implementation of cl_page for LOV layer. * * Author: Nikita Danilov <nikita.danilov@sun.com> + * Author: Jinshan Xiong <jinshan.xiong@intel.com> */ #define DEBUG_SUBSYSTEM S_LOV @@ -52,116 +49,66 @@ * */ -static int lov_page_invariant(const struct cl_page_slice *slice) +/** + * Adjust the stripe index by layout of raid0. @max_index is the maximum + * page index covered by an underlying DLM lock. + * This function converts max_index from stripe level to file level, and make + * sure it's not beyond one stripe. + */ +static int lov_raid0_page_is_under_lock(const struct lu_env *env, + const struct cl_page_slice *slice, + struct cl_io *unused, + pgoff_t *max_index) { - const struct cl_page *page = slice->cpl_page; - const struct cl_page *sub = lov_sub_page(slice); + struct lov_object *loo = cl2lov(slice->cpl_obj); + struct lov_layout_raid0 *r0 = lov_r0(loo); + pgoff_t index = *max_index; + unsigned int pps; /* pages per stripe */ - return ergo(sub, - page->cp_child == sub && - sub->cp_parent == page && - page->cp_state == sub->cp_state); -} + CDEBUG(D_READA, "*max_index = %lu, nr = %d\n", index, r0->lo_nr); + if (index == 0) /* the page is not covered by any lock */ + return 0; -static void lov_page_fini(const struct lu_env *env, - struct cl_page_slice *slice) -{ - struct cl_page *sub = lov_sub_page(slice); + if (r0->lo_nr == 1) /* single stripe file */ + return 0; - LINVRNT(lov_page_invariant(slice)); - - if (sub) { - LASSERT(sub->cp_state == CPS_FREEING); - lu_ref_del(&sub->cp_reference, "lov", sub->cp_parent); - sub->cp_parent = NULL; - slice->cpl_page->cp_child = NULL; - cl_page_put(env, sub); + /* max_index is stripe level, convert it into file level */ + if (index != CL_PAGE_EOF) { + int stripeno = lov_page_stripe(slice->cpl_page); + *max_index = lov_stripe_pgoff(loo->lo_lsm, index, stripeno); } -} - -static int lov_page_own(const struct lu_env *env, - const struct cl_page_slice *slice, struct cl_io *io, - int nonblock) -{ - struct lov_io *lio = lov_env_io(env); - struct lov_io_sub *sub; - LINVRNT(lov_page_invariant(slice)); - LINVRNT(!cl2lov_page(slice)->lps_invalid); + /* calculate the end of current stripe */ + pps = loo->lo_lsm->lsm_stripe_size >> PAGE_SHIFT; + index = ((slice->cpl_index + pps) & ~(pps - 1)) - 1; - sub = lov_page_subio(env, lio, slice); - if (!IS_ERR(sub)) { - lov_sub_page(slice)->cp_owner = sub->sub_io; - lov_sub_put(sub); - } else - LBUG(); /* Arrgh */ + /* never exceed the end of the stripe */ + *max_index = min_t(pgoff_t, *max_index, index); return 0; } -static void lov_page_assume(const struct lu_env *env, - const struct cl_page_slice *slice, struct cl_io *io) -{ - lov_page_own(env, slice, io, 0); -} - -static int lov_page_cache_add(const struct lu_env *env, - const struct cl_page_slice *slice, - struct cl_io *io) -{ - struct lov_io *lio = lov_env_io(env); - struct lov_io_sub *sub; - int rc = 0; - - LINVRNT(lov_page_invariant(slice)); - LINVRNT(!cl2lov_page(slice)->lps_invalid); - - sub = lov_page_subio(env, lio, slice); - if (!IS_ERR(sub)) { - rc = cl_page_cache_add(sub->sub_env, sub->sub_io, - slice->cpl_page->cp_child, CRT_WRITE); - lov_sub_put(sub); - } else { - rc = PTR_ERR(sub); - CL_PAGE_DEBUG(D_ERROR, env, slice->cpl_page, "rc = %d\n", rc); - } - return rc; -} - -static int lov_page_print(const struct lu_env *env, - const struct cl_page_slice *slice, - void *cookie, lu_printer_t printer) +static int lov_raid0_page_print(const struct lu_env *env, + const struct cl_page_slice *slice, + void *cookie, lu_printer_t printer) { struct lov_page *lp = cl2lov_page(slice); - return (*printer)(env, cookie, LUSTRE_LOV_NAME"-page@%p\n", lp); + return (*printer)(env, cookie, LUSTRE_LOV_NAME "-page@%p, raid0\n", lp); } -static const struct cl_page_operations lov_page_ops = { - .cpo_fini = lov_page_fini, - .cpo_own = lov_page_own, - .cpo_assume = lov_page_assume, - .io = { - [CRT_WRITE] = { - .cpo_cache_add = lov_page_cache_add - } - }, - .cpo_print = lov_page_print +static const struct cl_page_operations lov_raid0_page_ops = { + .cpo_is_under_lock = lov_raid0_page_is_under_lock, + .cpo_print = lov_raid0_page_print }; -static void lov_empty_page_fini(const struct lu_env *env, - struct cl_page_slice *slice) -{ - LASSERT(!slice->cpl_page->cp_child); -} - int lov_page_init_raid0(const struct lu_env *env, struct cl_object *obj, - struct cl_page *page, struct page *vmpage) + struct cl_page *page, pgoff_t index) { struct lov_object *loo = cl2lov(obj); struct lov_layout_raid0 *r0 = lov_r0(loo); struct lov_io *lio = lov_env_io(env); - struct cl_page *subpage; struct cl_object *subobj; + struct cl_object *o; struct lov_io_sub *sub; struct lov_page *lpg = cl_object_page_slice(obj, page); loff_t offset; @@ -169,59 +116,57 @@ int lov_page_init_raid0(const struct lu_env *env, struct cl_object *obj, int stripe; int rc; - offset = cl_offset(obj, page->cp_index); + offset = cl_offset(obj, index); stripe = lov_stripe_number(loo->lo_lsm, offset); LASSERT(stripe < r0->lo_nr); rc = lov_stripe_offset(loo->lo_lsm, offset, stripe, &suboff); LASSERT(rc == 0); - lpg->lps_invalid = 1; - cl_page_slice_add(page, &lpg->lps_cl, obj, &lov_page_ops); + cl_page_slice_add(page, &lpg->lps_cl, obj, index, &lov_raid0_page_ops); sub = lov_sub_get(env, lio, stripe); - if (IS_ERR(sub)) { - rc = PTR_ERR(sub); - goto out; - } + if (IS_ERR(sub)) + return PTR_ERR(sub); subobj = lovsub2cl(r0->lo_sub[stripe]); - subpage = cl_page_find_sub(sub->sub_env, subobj, - cl_index(subobj, suboff), vmpage, page); - lov_sub_put(sub); - if (IS_ERR(subpage)) { - rc = PTR_ERR(subpage); - goto out; - } - - if (likely(subpage->cp_parent == page)) { - lu_ref_add(&subpage->cp_reference, "lov", page); - lpg->lps_invalid = 0; - rc = 0; - } else { - CL_PAGE_DEBUG(D_ERROR, env, page, "parent page\n"); - CL_PAGE_DEBUG(D_ERROR, env, subpage, "child page\n"); - LASSERT(0); + list_for_each_entry(o, &subobj->co_lu.lo_header->loh_layers, + co_lu.lo_linkage) { + if (o->co_ops->coo_page_init) { + rc = o->co_ops->coo_page_init(sub->sub_env, o, page, + cl_index(subobj, suboff)); + if (rc != 0) + break; + } } + lov_sub_put(sub); -out: return rc; } +static int lov_empty_page_print(const struct lu_env *env, + const struct cl_page_slice *slice, + void *cookie, lu_printer_t printer) +{ + struct lov_page *lp = cl2lov_page(slice); + + return (*printer)(env, cookie, LUSTRE_LOV_NAME "-page@%p, empty.\n", + lp); +} + static const struct cl_page_operations lov_empty_page_ops = { - .cpo_fini = lov_empty_page_fini, - .cpo_print = lov_page_print + .cpo_print = lov_empty_page_print }; int lov_page_init_empty(const struct lu_env *env, struct cl_object *obj, - struct cl_page *page, struct page *vmpage) + struct cl_page *page, pgoff_t index) { struct lov_page *lpg = cl_object_page_slice(obj, page); void *addr; - cl_page_slice_add(page, &lpg->lps_cl, obj, &lov_empty_page_ops); - addr = kmap(vmpage); + cl_page_slice_add(page, &lpg->lps_cl, obj, index, &lov_empty_page_ops); + addr = kmap(page->cp_vmpage); memset(addr, 0, cl_page_size(obj)); - kunmap(vmpage); + kunmap(page->cp_vmpage); cl_page_export(env, page, 1); return 0; } diff --git a/drivers/staging/lustre/lustre/lov/lov_pool.c b/drivers/staging/lustre/lustre/lov/lov_pool.c index 9ae1d6f42d6e..4c2d21729589 100644 --- a/drivers/staging/lustre/lustre/lov/lov_pool.c +++ b/drivers/staging/lustre/lustre/lov/lov_pool.c @@ -14,12 +14,8 @@ * in the LICENSE file that accompanied this code). * * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see [sun.com URL with a - * copy of GPLv2]. - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. + * version 2 along with this program; If not, see + * http://http://www.gnu.org/licenses/gpl-2.0.html * * GPL HEADER END */ @@ -65,7 +61,6 @@ void lov_pool_putref(struct pool_desc *pool) LASSERT(hlist_unhashed(&pool->pool_hash)); LASSERT(list_empty(&pool->pool_list)); LASSERT(!pool->pool_debugfs_entry); - lov_ost_pool_free(&(pool->pool_rr.lqr_pool)); lov_ost_pool_free(&(pool->pool_obds)); kfree(pool); } @@ -424,11 +419,6 @@ int lov_pool_new(struct obd_device *obd, char *poolname) if (rc) goto out_err; - memset(&(new_pool->pool_rr), 0, sizeof(struct lov_qos_rr)); - rc = lov_ost_pool_init(&new_pool->pool_rr.lqr_pool, 0); - if (rc) - goto out_free_pool_obds; - INIT_HLIST_NODE(&new_pool->pool_hash); /* get ref for debugfs file */ @@ -469,13 +459,10 @@ out_err: list_del_init(&new_pool->pool_list); lov->lov_pool_count--; spin_unlock(&obd->obd_dev_lock); - ldebugfs_remove(&new_pool->pool_debugfs_entry); - - lov_ost_pool_free(&new_pool->pool_rr.lqr_pool); -out_free_pool_obds: lov_ost_pool_free(&new_pool->pool_obds); kfree(new_pool); + return rc; } @@ -543,8 +530,6 @@ int lov_pool_add(struct obd_device *obd, char *poolname, char *ostname) if (rc) goto out; - pool->pool_rr.lqr_dirty = 1; - CDEBUG(D_CONFIG, "Added %s to "LOV_POOLNAMEF" as member %d\n", ostname, poolname, pool_tgt_count(pool)); @@ -589,8 +574,6 @@ int lov_pool_remove(struct obd_device *obd, char *poolname, char *ostname) lov_ost_pool_remove(&pool->pool_obds, lov_idx); - pool->pool_rr.lqr_dirty = 1; - CDEBUG(D_CONFIG, "%s removed from "LOV_POOLNAMEF"\n", ostname, poolname); @@ -599,50 +582,3 @@ out: lov_pool_putref(pool); return rc; } - -int lov_check_index_in_pool(__u32 idx, struct pool_desc *pool) -{ - int i, rc; - - /* caller may no have a ref on pool if it got the pool - * without calling lov_find_pool() (e.g. go through the lov pool - * list) - */ - lov_pool_getref(pool); - - down_read(&pool_tgt_rw_sem(pool)); - - for (i = 0; i < pool_tgt_count(pool); i++) { - if (pool_tgt_array(pool)[i] == idx) { - rc = 0; - goto out; - } - } - rc = -ENOENT; -out: - up_read(&pool_tgt_rw_sem(pool)); - - lov_pool_putref(pool); - return rc; -} - -struct pool_desc *lov_find_pool(struct lov_obd *lov, char *poolname) -{ - struct pool_desc *pool; - - pool = NULL; - if (poolname[0] != '\0') { - pool = cfs_hash_lookup(lov->lov_pools_hash_body, poolname); - if (!pool) - CWARN("Request for an unknown pool ("LOV_POOLNAMEF")\n", - poolname); - if (pool && (pool_tgt_count(pool) == 0)) { - CWARN("Request for an empty pool ("LOV_POOLNAMEF")\n", - poolname); - /* pool is ignored, so we remove ref on it */ - lov_pool_putref(pool); - pool = NULL; - } - } - return pool; -} diff --git a/drivers/staging/lustre/lustre/lov/lov_request.c b/drivers/staging/lustre/lustre/lov/lov_request.c index 7178a02d6267..4099b51f826e 100644 --- a/drivers/staging/lustre/lustre/lov/lov_request.c +++ b/drivers/staging/lustre/lustre/lov/lov_request.c @@ -15,11 +15,7 @@ * * You should have received a copy of the GNU General Public License * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. + * http://www.gnu.org/licenses/gpl-2.0.html * * GPL HEADER END */ @@ -52,7 +48,6 @@ static void lov_init_set(struct lov_request_set *set) INIT_LIST_HEAD(&set->set_list); atomic_set(&set->set_refcount, 1); init_waitqueue_head(&set->set_waitq); - spin_lock_init(&set->set_lock); } void lov_finish_set(struct lov_request_set *set) @@ -235,7 +230,6 @@ out: if (tmp_oa) kmem_cache_free(obdo_cachep, tmp_oa); return rc; - } int lov_fini_getattr_set(struct lov_request_set *set) @@ -363,7 +357,6 @@ int lov_prep_destroy_set(struct obd_export *exp, struct obd_info *oinfo, set->set_oi = oinfo; set->set_oi->oi_md = lsm; set->set_oi->oi_oa = src_oa; - set->set_oti = oti; if (oti && src_oa->o_valid & OBD_MD_FLCOOKIE) set->set_cookies = oti->oti_logcookies; @@ -480,7 +473,6 @@ int lov_prep_setattr_set(struct obd_export *exp, struct obd_info *oinfo, lov_init_set(set); set->set_exp = exp; - set->set_oti = oti; set->set_oi = oinfo; if (oti && oinfo->oi_oa->o_valid & OBD_MD_FLCOOKIE) set->set_cookies = oti->oti_logcookies; @@ -716,12 +708,15 @@ int lov_prep_statfs_set(struct obd_device *obd, struct obd_info *oinfo, struct lov_request *req; if (!lov->lov_tgts[i] || - (!lov_check_and_wait_active(lov, i) && - (oinfo->oi_flags & OBD_STATFS_NODELAY))) { + (oinfo->oi_flags & OBD_STATFS_NODELAY && + !lov->lov_tgts[i]->ltd_active)) { CDEBUG(D_HA, "lov idx %d inactive\n", i); continue; } + if (!lov->lov_tgts[i]->ltd_active) + lov_check_and_wait_active(lov, i); + /* skip targets that have been explicitly disabled by the * administrator */ diff --git a/drivers/staging/lustre/lustre/lov/lovsub_dev.c b/drivers/staging/lustre/lustre/lov/lovsub_dev.c index c335c020f4f4..b519a1940e1e 100644 --- a/drivers/staging/lustre/lustre/lov/lovsub_dev.c +++ b/drivers/staging/lustre/lustre/lov/lovsub_dev.c @@ -15,11 +15,7 @@ * * You should have received a copy of the GNU General Public License * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. + * http://www.gnu.org/licenses/gpl-2.0.html * * GPL HEADER END */ @@ -151,8 +147,9 @@ static int lovsub_req_init(const struct lu_env *env, struct cl_device *dev, if (lsr) { cl_req_slice_add(req, &lsr->lsrq_cl, dev, &lovsub_req_ops); result = 0; - } else + } else { result = -ENOMEM; + } return result; } @@ -182,10 +179,12 @@ static struct lu_device *lovsub_device_alloc(const struct lu_env *env, d = lovsub2lu_dev(lsd); d->ld_ops = &lovsub_lu_ops; lsd->acid_cl.cd_ops = &lovsub_cl_ops; - } else + } else { d = ERR_PTR(result); - } else + } + } else { d = ERR_PTR(-ENOMEM); + } return d; } diff --git a/drivers/staging/lustre/lustre/lov/lovsub_io.c b/drivers/staging/lustre/lustre/lov/lovsub_io.c index 783ec687a4e7..6a9820218a3e 100644 --- a/drivers/staging/lustre/lustre/lov/lovsub_io.c +++ b/drivers/staging/lustre/lustre/lov/lovsub_io.c @@ -15,11 +15,7 @@ * * You should have received a copy of the GNU General Public License * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. + * http://www.gnu.org/licenses/gpl-2.0.html * * GPL HEADER END */ diff --git a/drivers/staging/lustre/lustre/lov/lovsub_lock.c b/drivers/staging/lustre/lustre/lov/lovsub_lock.c index 3bb0c9068a90..38f9b735c241 100644 --- a/drivers/staging/lustre/lustre/lov/lovsub_lock.c +++ b/drivers/staging/lustre/lustre/lov/lovsub_lock.c @@ -15,11 +15,7 @@ * * You should have received a copy of the GNU General Public License * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. + * http://www.gnu.org/licenses/gpl-2.0.html * * GPL HEADER END */ @@ -62,391 +58,8 @@ static void lovsub_lock_fini(const struct lu_env *env, kmem_cache_free(lovsub_lock_kmem, lsl); } -static void lovsub_parent_lock(const struct lu_env *env, struct lov_lock *lov) -{ - struct cl_lock *parent; - - parent = lov->lls_cl.cls_lock; - cl_lock_get(parent); - lu_ref_add(&parent->cll_reference, "lovsub-parent", current); - cl_lock_mutex_get(env, parent); -} - -static void lovsub_parent_unlock(const struct lu_env *env, struct lov_lock *lov) -{ - struct cl_lock *parent; - - parent = lov->lls_cl.cls_lock; - cl_lock_mutex_put(env, lov->lls_cl.cls_lock); - lu_ref_del(&parent->cll_reference, "lovsub-parent", current); - cl_lock_put(env, parent); -} - -/** - * Implements cl_lock_operations::clo_state() method for lovsub layer, which - * method is called whenever sub-lock state changes. Propagates state change - * to the top-locks. - */ -static void lovsub_lock_state(const struct lu_env *env, - const struct cl_lock_slice *slice, - enum cl_lock_state state) -{ - struct lovsub_lock *sub = cl2lovsub_lock(slice); - struct lov_lock_link *scan; - - LASSERT(cl_lock_is_mutexed(slice->cls_lock)); - - list_for_each_entry(scan, &sub->lss_parents, lll_list) { - struct lov_lock *lov = scan->lll_super; - struct cl_lock *parent = lov->lls_cl.cls_lock; - - if (sub->lss_active != parent) { - lovsub_parent_lock(env, lov); - cl_lock_signal(env, parent); - lovsub_parent_unlock(env, lov); - } - } -} - -/** - * Implementation of cl_lock_operation::clo_weigh() estimating lock weight by - * asking parent lock. - */ -static unsigned long lovsub_lock_weigh(const struct lu_env *env, - const struct cl_lock_slice *slice) -{ - struct lovsub_lock *lock = cl2lovsub_lock(slice); - struct lov_lock *lov; - unsigned long dumbbell; - - LASSERT(cl_lock_is_mutexed(slice->cls_lock)); - - if (!list_empty(&lock->lss_parents)) { - /* - * It is not clear whether all parents have to be asked and - * their estimations summed, or it is enough to ask one. For - * the current usages, one is always enough. - */ - lov = container_of(lock->lss_parents.next, - struct lov_lock_link, lll_list)->lll_super; - - lovsub_parent_lock(env, lov); - dumbbell = cl_lock_weigh(env, lov->lls_cl.cls_lock); - lovsub_parent_unlock(env, lov); - } else - dumbbell = 0; - - return dumbbell; -} - -/** - * Maps start/end offsets within a stripe, to offsets within a file. - */ -static void lovsub_lock_descr_map(const struct cl_lock_descr *in, - struct lov_object *lov, - int stripe, struct cl_lock_descr *out) -{ - pgoff_t size; /* stripe size in pages */ - pgoff_t skip; /* how many pages in every stripe are occupied by - * "other" stripes - */ - pgoff_t start; - pgoff_t end; - - start = in->cld_start; - end = in->cld_end; - - if (lov->lo_lsm->lsm_stripe_count > 1) { - size = cl_index(lov2cl(lov), lov->lo_lsm->lsm_stripe_size); - skip = (lov->lo_lsm->lsm_stripe_count - 1) * size; - - /* XXX overflow check here? */ - start += start/size * skip + stripe * size; - - if (end != CL_PAGE_EOF) { - end += end/size * skip + stripe * size; - /* - * And check for overflow... - */ - if (end < in->cld_end) - end = CL_PAGE_EOF; - } - } - out->cld_start = start; - out->cld_end = end; -} - -/** - * Adjusts parent lock extent when a sub-lock is attached to a parent. This is - * called in two ways: - * - * - as part of receive call-back, when server returns granted extent to - * the client, and - * - * - when top-lock finds existing sub-lock in the cache. - * - * Note, that lock mode is not propagated to the parent: i.e., if CLM_READ - * top-lock matches CLM_WRITE sub-lock, top-lock is still CLM_READ. - */ -int lov_sublock_modify(const struct lu_env *env, struct lov_lock *lov, - struct lovsub_lock *sublock, - const struct cl_lock_descr *d, int idx) -{ - struct cl_lock *parent; - struct lovsub_object *subobj; - struct cl_lock_descr *pd; - struct cl_lock_descr *parent_descr; - int result; - - parent = lov->lls_cl.cls_lock; - parent_descr = &parent->cll_descr; - LASSERT(cl_lock_mode_match(d->cld_mode, parent_descr->cld_mode)); - - subobj = cl2lovsub(sublock->lss_cl.cls_obj); - pd = &lov_env_info(env)->lti_ldescr; - - pd->cld_obj = parent_descr->cld_obj; - pd->cld_mode = parent_descr->cld_mode; - pd->cld_gid = parent_descr->cld_gid; - lovsub_lock_descr_map(d, subobj->lso_super, subobj->lso_index, pd); - lov->lls_sub[idx].sub_got = *d; - /* - * Notify top-lock about modification, if lock description changes - * materially. - */ - if (!cl_lock_ext_match(parent_descr, pd)) - result = cl_lock_modify(env, parent, pd); - else - result = 0; - return result; -} - -static int lovsub_lock_modify(const struct lu_env *env, - const struct cl_lock_slice *s, - const struct cl_lock_descr *d) -{ - struct lovsub_lock *lock = cl2lovsub_lock(s); - struct lov_lock_link *scan; - struct lov_lock *lov; - int result = 0; - - LASSERT(cl_lock_mode_match(d->cld_mode, - s->cls_lock->cll_descr.cld_mode)); - list_for_each_entry(scan, &lock->lss_parents, lll_list) { - int rc; - - lov = scan->lll_super; - lovsub_parent_lock(env, lov); - rc = lov_sublock_modify(env, lov, lock, d, scan->lll_idx); - lovsub_parent_unlock(env, lov); - result = result ?: rc; - } - return result; -} - -static int lovsub_lock_closure(const struct lu_env *env, - const struct cl_lock_slice *slice, - struct cl_lock_closure *closure) -{ - struct lovsub_lock *sub; - struct cl_lock *parent; - struct lov_lock_link *scan; - int result; - - LASSERT(cl_lock_is_mutexed(slice->cls_lock)); - - sub = cl2lovsub_lock(slice); - result = 0; - - list_for_each_entry(scan, &sub->lss_parents, lll_list) { - parent = scan->lll_super->lls_cl.cls_lock; - result = cl_lock_closure_build(env, parent, closure); - if (result != 0) - break; - } - return result; -} - -/** - * A helper function for lovsub_lock_delete() that deals with a given parent - * top-lock. - */ -static int lovsub_lock_delete_one(const struct lu_env *env, - struct cl_lock *child, struct lov_lock *lov) -{ - struct cl_lock *parent; - int result; - - parent = lov->lls_cl.cls_lock; - if (parent->cll_error) - return 0; - - result = 0; - switch (parent->cll_state) { - case CLS_ENQUEUED: - /* See LU-1355 for the case that a glimpse lock is - * interrupted by signal - */ - LASSERT(parent->cll_flags & CLF_CANCELLED); - break; - case CLS_QUEUING: - case CLS_FREEING: - cl_lock_signal(env, parent); - break; - case CLS_INTRANSIT: - /* - * Here lies a problem: a sub-lock is canceled while top-lock - * is being unlocked. Top-lock cannot be moved into CLS_NEW - * state, because unlocking has to succeed eventually by - * placing lock into CLS_CACHED (or failing it), see - * cl_unuse_try(). Nor can top-lock be left in CLS_CACHED - * state, because lov maintains an invariant that all - * sub-locks exist in CLS_CACHED (this allows cached top-lock - * to be reused immediately). Nor can we wait for top-lock - * state to change, because this can be synchronous to the - * current thread. - * - * We know for sure that lov_lock_unuse() will be called at - * least one more time to finish un-using, so leave a mark on - * the top-lock, that will be seen by the next call to - * lov_lock_unuse(). - */ - if (cl_lock_is_intransit(parent)) - lov->lls_cancel_race = 1; - break; - case CLS_CACHED: - /* - * if a sub-lock is canceled move its top-lock into CLS_NEW - * state to preserve an invariant that a top-lock in - * CLS_CACHED is immediately ready for re-use (i.e., has all - * sub-locks), and so that next attempt to re-use the top-lock - * enqueues missing sub-lock. - */ - cl_lock_state_set(env, parent, CLS_NEW); - /* fall through */ - case CLS_NEW: - /* - * if last sub-lock is canceled, destroy the top-lock (which - * is now `empty') proactively. - */ - if (lov->lls_nr_filled == 0) { - /* ... but unfortunately, this cannot be done easily, - * as cancellation of a top-lock might acquire mutices - * of its other sub-locks, violating lock ordering, - * see cl_lock_{cancel,delete}() preconditions. - * - * To work around this, the mutex of this sub-lock is - * released, top-lock is destroyed, and sub-lock mutex - * acquired again. The list of parents has to be - * re-scanned from the beginning after this. - * - * Only do this if no mutices other than on @child and - * @parent are held by the current thread. - * - * TODO: The lock modal here is too complex, because - * the lock may be canceled and deleted by voluntarily: - * cl_lock_request - * -> osc_lock_enqueue_wait - * -> osc_lock_cancel_wait - * -> cl_lock_delete - * -> lovsub_lock_delete - * -> cl_lock_cancel/delete - * -> ... - * - * The better choice is to spawn a kernel thread for - * this purpose. -jay - */ - if (cl_lock_nr_mutexed(env) == 2) { - cl_lock_mutex_put(env, child); - cl_lock_cancel(env, parent); - cl_lock_delete(env, parent); - result = 1; - } - } - break; - case CLS_HELD: - CL_LOCK_DEBUG(D_ERROR, env, parent, "Delete CLS_HELD lock\n"); - default: - CERROR("Impossible state: %d\n", parent->cll_state); - LBUG(); - break; - } - - return result; -} - -/** - * An implementation of cl_lock_operations::clo_delete() method. This is - * invoked in "bottom-to-top" delete, when lock destruction starts from the - * sub-lock (e.g, as a result of ldlm lock LRU policy). - */ -static void lovsub_lock_delete(const struct lu_env *env, - const struct cl_lock_slice *slice) -{ - struct cl_lock *child = slice->cls_lock; - struct lovsub_lock *sub = cl2lovsub_lock(slice); - int restart; - - LASSERT(cl_lock_is_mutexed(child)); - - /* - * Destruction of a sub-lock might take multiple iterations, because - * when the last sub-lock of a given top-lock is deleted, top-lock is - * canceled proactively, and this requires to release sub-lock - * mutex. Once sub-lock mutex has been released, list of its parents - * has to be re-scanned from the beginning. - */ - do { - struct lov_lock *lov; - struct lov_lock_link *scan; - struct lov_lock_link *temp; - struct lov_lock_sub *subdata; - - restart = 0; - list_for_each_entry_safe(scan, temp, - &sub->lss_parents, lll_list) { - lov = scan->lll_super; - subdata = &lov->lls_sub[scan->lll_idx]; - lovsub_parent_lock(env, lov); - subdata->sub_got = subdata->sub_descr; - lov_lock_unlink(env, scan, sub); - restart = lovsub_lock_delete_one(env, child, lov); - lovsub_parent_unlock(env, lov); - - if (restart) { - cl_lock_mutex_get(env, child); - break; - } - } - } while (restart); -} - -static int lovsub_lock_print(const struct lu_env *env, void *cookie, - lu_printer_t p, const struct cl_lock_slice *slice) -{ - struct lovsub_lock *sub = cl2lovsub_lock(slice); - struct lov_lock *lov; - struct lov_lock_link *scan; - - list_for_each_entry(scan, &sub->lss_parents, lll_list) { - lov = scan->lll_super; - (*p)(env, cookie, "[%d %p ", scan->lll_idx, lov); - if (lov) - cl_lock_descr_print(env, cookie, p, - &lov->lls_cl.cls_lock->cll_descr); - (*p)(env, cookie, "] "); - } - return 0; -} - static const struct cl_lock_operations lovsub_lock_ops = { .clo_fini = lovsub_lock_fini, - .clo_state = lovsub_lock_state, - .clo_delete = lovsub_lock_delete, - .clo_modify = lovsub_lock_modify, - .clo_closure = lovsub_lock_closure, - .clo_weigh = lovsub_lock_weigh, - .clo_print = lovsub_lock_print }; int lovsub_lock_init(const struct lu_env *env, struct cl_object *obj, @@ -460,8 +73,9 @@ int lovsub_lock_init(const struct lu_env *env, struct cl_object *obj, INIT_LIST_HEAD(&lsk->lss_parents); cl_lock_slice_add(lock, &lsk->lss_cl, obj, &lovsub_lock_ops); result = 0; - } else + } else { result = -ENOMEM; + } return result; } diff --git a/drivers/staging/lustre/lustre/lov/lovsub_object.c b/drivers/staging/lustre/lustre/lov/lovsub_object.c index 6c5430d938d0..fb2f2660b3e9 100644 --- a/drivers/staging/lustre/lustre/lov/lovsub_object.c +++ b/drivers/staging/lustre/lustre/lov/lovsub_object.c @@ -15,11 +15,7 @@ * * You should have received a copy of the GNU General Public License * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. + * http://www.gnu.org/licenses/gpl-2.0.html * * GPL HEADER END */ @@ -67,10 +63,10 @@ int lovsub_object_init(const struct lu_env *env, struct lu_object *obj, lu_object_add(obj, below); cl_object_page_init(lu2cl(obj), sizeof(struct lovsub_page)); result = 0; - } else + } else { result = -ENOMEM; + } return result; - } static void lovsub_object_free(const struct lu_env *env, struct lu_object *obj) @@ -154,8 +150,9 @@ struct lu_object *lovsub_object_alloc(const struct lu_env *env, lu_object_add_top(&hdr->coh_lu, obj); los->lso_cl.co_ops = &lovsub_ops; obj->lo_ops = &lovsub_lu_obj_ops; - } else + } else { obj = NULL; + } return obj; } diff --git a/drivers/staging/lustre/lustre/lov/lovsub_page.c b/drivers/staging/lustre/lustre/lov/lovsub_page.c index 2d945532b78e..b2e68c3e820d 100644 --- a/drivers/staging/lustre/lustre/lov/lovsub_page.c +++ b/drivers/staging/lustre/lustre/lov/lovsub_page.c @@ -15,11 +15,7 @@ * * You should have received a copy of the GNU General Public License * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. + * http://www.gnu.org/licenses/gpl-2.0.html * * GPL HEADER END */ @@ -60,11 +56,11 @@ static const struct cl_page_operations lovsub_page_ops = { }; int lovsub_page_init(const struct lu_env *env, struct cl_object *obj, - struct cl_page *page, struct page *unused) + struct cl_page *page, pgoff_t index) { struct lovsub_page *lsb = cl_object_page_slice(obj, page); - cl_page_slice_add(page, &lsb->lsb_cl, obj, &lovsub_page_ops); + cl_page_slice_add(page, &lsb->lsb_cl, obj, index, &lovsub_page_ops); return 0; } diff --git a/drivers/staging/lustre/lustre/lov/lproc_lov.c b/drivers/staging/lustre/lustre/lov/lproc_lov.c index 0dcb6b6a7782..eb6d30d34e3a 100644 --- a/drivers/staging/lustre/lustre/lov/lproc_lov.c +++ b/drivers/staging/lustre/lustre/lov/lproc_lov.c @@ -15,11 +15,7 @@ * * You should have received a copy of the GNU General Public License * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. + * http://www.gnu.org/licenses/gpl-2.0.html * * GPL HEADER END */ diff --git a/drivers/staging/lustre/lustre/mdc/lproc_mdc.c b/drivers/staging/lustre/lustre/mdc/lproc_mdc.c index 38f267a60f59..98d15fb247bc 100644 --- a/drivers/staging/lustre/lustre/mdc/lproc_mdc.c +++ b/drivers/staging/lustre/lustre/mdc/lproc_mdc.c @@ -15,11 +15,7 @@ * * You should have received a copy of the GNU General Public License * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. + * http://www.gnu.org/licenses/gpl-2.0.html * * GPL HEADER END */ @@ -49,9 +45,9 @@ static ssize_t max_rpcs_in_flight_show(struct kobject *kobj, obd_kobj); struct client_obd *cli = &dev->u.cli; - client_obd_list_lock(&cli->cl_loi_list_lock); + spin_lock(&cli->cl_loi_list_lock); len = sprintf(buf, "%u\n", cli->cl_max_rpcs_in_flight); - client_obd_list_unlock(&cli->cl_loi_list_lock); + spin_unlock(&cli->cl_loi_list_lock); return len; } @@ -74,9 +70,9 @@ static ssize_t max_rpcs_in_flight_store(struct kobject *kobj, if (val < 1 || val > MDC_MAX_RIF_MAX) return -ERANGE; - client_obd_list_lock(&cli->cl_loi_list_lock); + spin_lock(&cli->cl_loi_list_lock); cli->cl_max_rpcs_in_flight = val; - client_obd_list_unlock(&cli->cl_loi_list_lock); + spin_unlock(&cli->cl_loi_list_lock); return count; } diff --git a/drivers/staging/lustre/lustre/mdc/mdc_internal.h b/drivers/staging/lustre/lustre/mdc/mdc_internal.h index c5519aeb0d8a..58f2841cabe4 100644 --- a/drivers/staging/lustre/lustre/mdc/mdc_internal.h +++ b/drivers/staging/lustre/lustre/mdc/mdc_internal.h @@ -15,11 +15,7 @@ * * You should have received a copy of the GNU General Public License * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. + * http://www.gnu.org/licenses/gpl-2.0.html * * GPL HEADER END */ diff --git a/drivers/staging/lustre/lustre/mdc/mdc_lib.c b/drivers/staging/lustre/lustre/mdc/mdc_lib.c index b3bfdcb73670..143bd7628572 100644 --- a/drivers/staging/lustre/lustre/mdc/mdc_lib.c +++ b/drivers/staging/lustre/lustre/mdc/mdc_lib.c @@ -15,11 +15,7 @@ * * You should have received a copy of the GNU General Public License * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. + * http://www.gnu.org/licenses/gpl-2.0.html * * GPL HEADER END */ @@ -279,8 +275,7 @@ static void mdc_setattr_pack_rec(struct mdt_rec_setattr *rec, rec->sa_atime = LTIME_S(op_data->op_attr.ia_atime); rec->sa_mtime = LTIME_S(op_data->op_attr.ia_mtime); rec->sa_ctime = LTIME_S(op_data->op_attr.ia_ctime); - rec->sa_attr_flags = - ((struct ll_iattr *)&op_data->op_attr)->ia_attr_flags; + rec->sa_attr_flags = op_data->op_attr_flags; if ((op_data->op_attr.ia_valid & ATTR_GID) && in_group_p(op_data->op_attr.ia_gid)) rec->sa_suppgid = @@ -439,7 +434,6 @@ void mdc_getattr_pack(struct ptlrpc_request *req, __u64 valid, int flags, char *tmp = req_capsule_client_get(&req->rq_pill, &RMF_NAME); LOGL0(op_data->op_name, op_data->op_namelen, tmp); - } } @@ -455,7 +449,7 @@ static void mdc_hsm_release_pack(struct ptlrpc_request *req, lock = ldlm_handle2lock(&op_data->op_lease_handle); if (lock) { data->cd_handle = lock->l_remote_handle; - ldlm_lock_put(lock); + LDLM_LOCK_PUT(lock); } ldlm_cli_cancel(&op_data->op_lease_handle, LCF_LOCAL); @@ -473,6 +467,18 @@ void mdc_close_pack(struct ptlrpc_request *req, struct md_op_data *op_data) rec = req_capsule_client_get(&req->rq_pill, &RMF_REC_REINT); mdc_setattr_pack_rec(rec, op_data); + /* + * The client will zero out local timestamps when losing the IBITS lock + * so any new RPC timestamps will update the client inode's timestamps. + * There was a defect on the server side which allowed the atime to be + * overwritten by a zeroed-out atime packed into the close RPC. + * + * Proactively clear the MDS_ATTR_ATIME flag in the RPC in this case + * to avoid zeroing the atime on old unpatched servers. See LU-8041. + */ + if (rec->sa_atime == 0) + rec->sa_valid &= ~MDS_ATTR_ATIME; + mdc_ioepoch_pack(epoch, op_data); mdc_hsm_release_pack(req, op_data); } @@ -481,9 +487,9 @@ static int mdc_req_avail(struct client_obd *cli, struct mdc_cache_waiter *mcw) { int rc; - client_obd_list_lock(&cli->cl_loi_list_lock); + spin_lock(&cli->cl_loi_list_lock); rc = list_empty(&mcw->mcw_entry); - client_obd_list_unlock(&cli->cl_loi_list_lock); + spin_unlock(&cli->cl_loi_list_lock); return rc; }; @@ -497,23 +503,23 @@ int mdc_enter_request(struct client_obd *cli) struct mdc_cache_waiter mcw; struct l_wait_info lwi = LWI_INTR(LWI_ON_SIGNAL_NOOP, NULL); - client_obd_list_lock(&cli->cl_loi_list_lock); + spin_lock(&cli->cl_loi_list_lock); if (cli->cl_r_in_flight >= cli->cl_max_rpcs_in_flight) { list_add_tail(&mcw.mcw_entry, &cli->cl_cache_waiters); init_waitqueue_head(&mcw.mcw_waitq); - client_obd_list_unlock(&cli->cl_loi_list_lock); + spin_unlock(&cli->cl_loi_list_lock); rc = l_wait_event(mcw.mcw_waitq, mdc_req_avail(cli, &mcw), &lwi); if (rc) { - client_obd_list_lock(&cli->cl_loi_list_lock); + spin_lock(&cli->cl_loi_list_lock); if (list_empty(&mcw.mcw_entry)) cli->cl_r_in_flight--; list_del_init(&mcw.mcw_entry); - client_obd_list_unlock(&cli->cl_loi_list_lock); + spin_unlock(&cli->cl_loi_list_lock); } } else { cli->cl_r_in_flight++; - client_obd_list_unlock(&cli->cl_loi_list_lock); + spin_unlock(&cli->cl_loi_list_lock); } return rc; } @@ -523,7 +529,7 @@ void mdc_exit_request(struct client_obd *cli) struct list_head *l, *tmp; struct mdc_cache_waiter *mcw; - client_obd_list_lock(&cli->cl_loi_list_lock); + spin_lock(&cli->cl_loi_list_lock); cli->cl_r_in_flight--; list_for_each_safe(l, tmp, &cli->cl_cache_waiters) { if (cli->cl_r_in_flight >= cli->cl_max_rpcs_in_flight) { @@ -538,5 +544,5 @@ void mdc_exit_request(struct client_obd *cli) } /* Empty waiting list? Decrease reqs in-flight number */ - client_obd_list_unlock(&cli->cl_loi_list_lock); + spin_unlock(&cli->cl_loi_list_lock); } diff --git a/drivers/staging/lustre/lustre/mdc/mdc_locks.c b/drivers/staging/lustre/lustre/mdc/mdc_locks.c index 958a164f620d..f48b58423307 100644 --- a/drivers/staging/lustre/lustre/mdc/mdc_locks.c +++ b/drivers/staging/lustre/lustre/mdc/mdc_locks.c @@ -15,11 +15,7 @@ * * You should have received a copy of the GNU General Public License * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. + * http://www.gnu.org/licenses/gpl-2.0.html * * GPL HEADER END */ @@ -54,61 +50,43 @@ struct mdc_getattr_args { struct ldlm_enqueue_info *ga_einfo; }; -int it_disposition(struct lookup_intent *it, int flag) -{ - return it->d.lustre.it_disposition & flag; -} -EXPORT_SYMBOL(it_disposition); - -void it_set_disposition(struct lookup_intent *it, int flag) -{ - it->d.lustre.it_disposition |= flag; -} -EXPORT_SYMBOL(it_set_disposition); - -void it_clear_disposition(struct lookup_intent *it, int flag) -{ - it->d.lustre.it_disposition &= ~flag; -} -EXPORT_SYMBOL(it_clear_disposition); - int it_open_error(int phase, struct lookup_intent *it) { if (it_disposition(it, DISP_OPEN_LEASE)) { if (phase >= DISP_OPEN_LEASE) - return it->d.lustre.it_status; + return it->it_status; else return 0; } if (it_disposition(it, DISP_OPEN_OPEN)) { if (phase >= DISP_OPEN_OPEN) - return it->d.lustre.it_status; + return it->it_status; else return 0; } if (it_disposition(it, DISP_OPEN_CREATE)) { if (phase >= DISP_OPEN_CREATE) - return it->d.lustre.it_status; + return it->it_status; else return 0; } if (it_disposition(it, DISP_LOOKUP_EXECD)) { if (phase >= DISP_LOOKUP_EXECD) - return it->d.lustre.it_status; + return it->it_status; else return 0; } if (it_disposition(it, DISP_IT_EXECD)) { if (phase >= DISP_IT_EXECD) - return it->d.lustre.it_status; + return it->it_status; else return 0; } - CERROR("it disp: %X, status: %d\n", it->d.lustre.it_disposition, - it->d.lustre.it_status); + CERROR("it disp: %X, status: %d\n", it->it_disposition, + it->it_status); LBUG(); return 0; } @@ -347,10 +325,6 @@ static struct ptlrpc_request *mdc_intent_open_pack(struct obd_export *exp, mdc_open_pack(req, op_data, it->it_create_mode, 0, it->it_flags, lmm, lmmsize); - /* for remote client, fetch remote perm for current user */ - if (client_is_remote(exp)) - req_capsule_set_size(&req->rq_pill, &RMF_ACL, RCL_SERVER, - sizeof(struct mdt_remote_perm)); ptlrpc_request_set_replen(req); return req; } @@ -444,9 +418,7 @@ static struct ptlrpc_request *mdc_intent_getattr_pack(struct obd_export *exp, struct obd_device *obddev = class_exp2obd(exp); u64 valid = OBD_MD_FLGETATTR | OBD_MD_FLEASIZE | OBD_MD_FLMODEASIZE | OBD_MD_FLDIREA | - OBD_MD_MEA | - (client_is_remote(exp) ? - OBD_MD_FLRMTPERM : OBD_MD_FLACL); + OBD_MD_MEA | OBD_MD_FLACL; struct ldlm_intent *lit; int rc; int easize; @@ -478,9 +450,6 @@ static struct ptlrpc_request *mdc_intent_getattr_pack(struct obd_export *exp, mdc_getattr_pack(req, valid, it->it_flags, op_data, easize); req_capsule_set_size(&req->rq_pill, &RMF_MDT_MD, RCL_SERVER, easize); - if (client_is_remote(exp)) - req_capsule_set_size(&req->rq_pill, &RMF_ACL, RCL_SERVER, - sizeof(struct mdt_remote_perm)); ptlrpc_request_set_replen(req); return req; } @@ -555,7 +524,6 @@ static int mdc_finish_enqueue(struct obd_export *exp, struct req_capsule *pill = &req->rq_pill; struct ldlm_request *lockreq; struct ldlm_reply *lockrep; - struct lustre_intent_data *intent = &it->d.lustre; struct ldlm_lock *lock; void *lvb_data = NULL; int lvb_len = 0; @@ -589,17 +557,17 @@ static int mdc_finish_enqueue(struct obd_export *exp, lockrep = req_capsule_server_get(pill, &RMF_DLM_REP); - intent->it_disposition = (int)lockrep->lock_policy_res1; - intent->it_status = (int)lockrep->lock_policy_res2; - intent->it_lock_mode = einfo->ei_mode; - intent->it_lock_handle = lockh->cookie; - intent->it_data = req; + it->it_disposition = (int)lockrep->lock_policy_res1; + it->it_status = (int)lockrep->lock_policy_res2; + it->it_lock_mode = einfo->ei_mode; + it->it_lock_handle = lockh->cookie; + it->it_request = req; /* Technically speaking rq_transno must already be zero if * it_status is in error, so the check is a bit redundant */ - if ((!req->rq_transno || intent->it_status < 0) && req->rq_replay) - mdc_clear_replay_flag(req, intent->it_status); + if ((!req->rq_transno || it->it_status < 0) && req->rq_replay) + mdc_clear_replay_flag(req, it->it_status); /* If we're doing an IT_OPEN which did not result in an actual * successful open, then we need to remove the bit which saves @@ -610,11 +578,11 @@ static int mdc_finish_enqueue(struct obd_export *exp, * (bug 3440) */ if (it->it_op & IT_OPEN && req->rq_replay && - (!it_disposition(it, DISP_OPEN_OPEN) || intent->it_status != 0)) - mdc_clear_replay_flag(req, intent->it_status); + (!it_disposition(it, DISP_OPEN_OPEN) || it->it_status != 0)) + mdc_clear_replay_flag(req, it->it_status); DEBUG_REQ(D_RPCTRACE, req, "op: %d disposition: %x, status: %d", - it->it_op, intent->it_disposition, intent->it_status); + it->it_op, it->it_disposition, it->it_status); /* We know what to expect, so we do any byte flipping required here */ if (it->it_op & (IT_OPEN | IT_UNLINK | IT_LOOKUP | IT_GETATTR)) { @@ -687,16 +655,6 @@ static int mdc_finish_enqueue(struct obd_export *exp, memcpy(lmm, eadata, body->eadatasize); } } - - if (body->valid & OBD_MD_FLRMTPERM) { - struct mdt_remote_perm *perm; - - LASSERT(client_is_remote(exp)); - perm = req_capsule_server_swab_get(pill, &RMF_ACL, - lustre_swab_mdt_remote_perm); - if (!perm) - return -EPROTO; - } } else if (it->it_op & IT_LAYOUT) { /* maybe the lock was granted right away and layout * is packed into RMF_DLM_LVB of req @@ -715,7 +673,7 @@ static int mdc_finish_enqueue(struct obd_export *exp, if (lock && ldlm_has_layout(lock) && lvb_data) { void *lmm; - LDLM_DEBUG(lock, "layout lock returned by: %s, lvb_len: %d\n", + LDLM_DEBUG(lock, "layout lock returned by: %s, lvb_len: %d", ldlm_it2str(it->it_op), lvb_len); lmm = libcfs_kvzalloc(lvb_len, GFP_NOFS); @@ -869,7 +827,9 @@ resend: * (explicits or automatically generated by Kernel to clean * current FLocks upon exit) that can't be trashed */ - if ((rc == -EINTR) || (rc == -ETIMEDOUT)) + if (((rc == -EINTR) || (rc == -ETIMEDOUT)) && + (einfo->ei_type == LDLM_FLOCK) && + (einfo->ei_mode == LCK_NL)) goto resend; return rc; } @@ -921,9 +881,9 @@ resend: } ptlrpc_req_finished(req); - it->d.lustre.it_lock_handle = 0; - it->d.lustre.it_lock_mode = 0; - it->d.lustre.it_data = NULL; + it->it_lock_handle = 0; + it->it_lock_mode = 0; + it->it_request = NULL; } return rc; @@ -947,8 +907,8 @@ static int mdc_finish_intent_lock(struct obd_export *exp, /* The server failed before it even started executing the * intent, i.e. because it couldn't unpack the request. */ - LASSERT(it->d.lustre.it_status != 0); - return it->d.lustre.it_status; + LASSERT(it->it_status != 0); + return it->it_status; } rc = it_open_error(DISP_IT_EXECD, it); if (rc) @@ -963,7 +923,6 @@ static int mdc_finish_intent_lock(struct obd_export *exp, if (fid_is_sane(&op_data->op_fid2) && it->it_create_mode & M_CHECK_STALE && it->it_op != IT_GETATTR) { - /* Also: did we find the same inode? */ /* sever can return one of two fids: * op_fid2 - new allocated fid - if file is created. @@ -1032,15 +991,15 @@ static int mdc_finish_intent_lock(struct obd_export *exp, LDLM_IBITS, &policy, LCK_NL, &old_lock, 0)) { ldlm_lock_decref_and_cancel(lockh, - it->d.lustre.it_lock_mode); + it->it_lock_mode); memcpy(lockh, &old_lock, sizeof(old_lock)); - it->d.lustre.it_lock_handle = lockh->cookie; + it->it_lock_handle = lockh->cookie; } } CDEBUG(D_DENTRY, "D_IT dentry %.*s intent: %s status %d disp %x rc %d\n", op_data->op_namelen, op_data->op_name, ldlm_it2str(it->it_op), - it->d.lustre.it_status, it->d.lustre.it_disposition, rc); + it->it_status, it->it_disposition, rc); return rc; } @@ -1056,8 +1015,8 @@ int mdc_revalidate_lock(struct obd_export *exp, struct lookup_intent *it, ldlm_policy_data_t policy; enum ldlm_mode mode; - if (it->d.lustre.it_lock_handle) { - lockh.cookie = it->d.lustre.it_lock_handle; + if (it->it_lock_handle) { + lockh.cookie = it->it_lock_handle; mode = ldlm_revalidate_lock_handle(&lockh, bits); } else { fid_build_reg_res_name(fid, &res_id); @@ -1098,11 +1057,11 @@ int mdc_revalidate_lock(struct obd_export *exp, struct lookup_intent *it, } if (mode) { - it->d.lustre.it_lock_handle = lockh.cookie; - it->d.lustre.it_lock_mode = mode; + it->it_lock_handle = lockh.cookie; + it->it_lock_mode = mode; } else { - it->d.lustre.it_lock_handle = 0; - it->d.lustre.it_lock_mode = 0; + it->it_lock_handle = 0; + it->it_lock_mode = 0; } return !!mode; @@ -1124,15 +1083,15 @@ int mdc_revalidate_lock(struct obd_export *exp, struct lookup_intent *it, * ll_create/ll_open gets called. * * The server will return to us, in it_disposition, an indication of - * exactly what d.lustre.it_status refers to. + * exactly what it_status refers to. * - * If DISP_OPEN_OPEN is set, then d.lustre.it_status refers to the open() call, + * If DISP_OPEN_OPEN is set, then it_status refers to the open() call, * otherwise if DISP_OPEN_CREATE is set, then it status is the * creation failure mode. In either case, one of DISP_LOOKUP_NEG or * DISP_LOOKUP_POS will be set, indicating whether the child lookup * was successful. * - * Else, if DISP_LOOKUP_EXECD then d.lustre.it_status is the rc of the + * Else, if DISP_LOOKUP_EXECD then it_status is the rc of the * child lookup. */ int mdc_intent_lock(struct obd_export *exp, struct md_op_data *op_data, @@ -1165,7 +1124,7 @@ int mdc_intent_lock(struct obd_export *exp, struct md_op_data *op_data, * be called in revalidate_it if we already have a lock, let's * verify that. */ - it->d.lustre.it_lock_handle = 0; + it->it_lock_handle = 0; rc = mdc_revalidate_lock(exp, it, &op_data->op_fid2, NULL); /* Only return failure if it was not GETATTR by cfid * (from inode_revalidate) @@ -1187,7 +1146,7 @@ int mdc_intent_lock(struct obd_export *exp, struct md_op_data *op_data, if (rc < 0) return rc; - *reqp = it->d.lustre.it_data; + *reqp = it->it_request; rc = mdc_finish_intent_lock(exp, *reqp, op_data, it, &lockh); return rc; } diff --git a/drivers/staging/lustre/lustre/mdc/mdc_reint.c b/drivers/staging/lustre/lustre/mdc/mdc_reint.c index 4ef3db147f87..5dba2c813857 100644 --- a/drivers/staging/lustre/lustre/mdc/mdc_reint.c +++ b/drivers/staging/lustre/lustre/mdc/mdc_reint.c @@ -15,11 +15,7 @@ * * You should have received a copy of the GNU General Public License * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. + * http://www.gnu.org/licenses/gpl-2.0.html * * GPL HEADER END */ @@ -234,7 +230,7 @@ rebuild: MDS_INODELOCK_UPDATE); req = ptlrpc_request_alloc(class_exp2cliimp(exp), - &RQF_MDS_REINT_CREATE_RMT_ACL); + &RQF_MDS_REINT_CREATE_ACL); if (!req) { ldlm_lock_list_put(&cancels, l_bl_ast, count); return -ENOMEM; diff --git a/drivers/staging/lustre/lustre/mdc/mdc_request.c b/drivers/staging/lustre/lustre/mdc/mdc_request.c index b91d3ff18b02..542801f04b0d 100644 --- a/drivers/staging/lustre/lustre/mdc/mdc_request.c +++ b/drivers/staging/lustre/lustre/mdc/mdc_request.c @@ -15,11 +15,7 @@ * * You should have received a copy of the GNU General Public License * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. + * http://www.gnu.org/licenses/gpl-2.0.html * * GPL HEADER END */ @@ -142,25 +138,14 @@ static int mdc_getattr_common(struct obd_export *exp, CDEBUG(D_NET, "mode: %o\n", body->mode); + mdc_update_max_ea_from_body(exp, body); if (body->eadatasize != 0) { - mdc_update_max_ea_from_body(exp, body); - eadata = req_capsule_server_sized_get(pill, &RMF_MDT_MD, body->eadatasize); if (!eadata) return -EPROTO; } - if (body->valid & OBD_MD_FLRMTPERM) { - struct mdt_remote_perm *perm; - - LASSERT(client_is_remote(exp)); - perm = req_capsule_server_swab_get(pill, &RMF_ACL, - lustre_swab_mdt_remote_perm); - if (!perm) - return -EPROTO; - } - return 0; } @@ -191,11 +176,6 @@ static int mdc_getattr(struct obd_export *exp, struct md_op_data *op_data, req_capsule_set_size(&req->rq_pill, &RMF_MDT_MD, RCL_SERVER, op_data->op_mode); - if (op_data->op_valid & OBD_MD_FLRMTPERM) { - LASSERT(client_is_remote(exp)); - req_capsule_set_size(&req->rq_pill, &RMF_ACL, RCL_SERVER, - sizeof(struct mdt_remote_perm)); - } ptlrpc_request_set_replen(req); rc = mdc_getattr_common(exp, req); @@ -435,7 +415,7 @@ static int mdc_unpack_acl(struct ptlrpc_request *req, struct lustre_md *md) return rc; } - rc = posix_acl_valid(acl); + rc = posix_acl_valid(&init_user_ns, acl); if (rc) { CERROR("validate acl: %d\n", rc); posix_acl_release(acl); @@ -540,16 +520,7 @@ static int mdc_get_lustre_md(struct obd_export *exp, } rc = 0; - if (md->body->valid & OBD_MD_FLRMTPERM) { - /* remote permission */ - LASSERT(client_is_remote(exp)); - md->remote_perm = req_capsule_server_swab_get(pill, &RMF_ACL, - lustre_swab_mdt_remote_perm); - if (!md->remote_perm) { - rc = -EPROTO; - goto out; - } - } else if (md->body->valid & OBD_MD_FLACL) { + if (md->body->valid & OBD_MD_FLACL) { /* for ACL, it's possible that FLACL is set but aclsize is zero. * only when aclsize != 0 there's an actual segment for ACL * in reply buffer. @@ -666,7 +637,7 @@ int mdc_set_open_replay_data(struct obd_export *exp, struct md_open_data *mod; struct mdt_rec_create *rec; struct mdt_body *body; - struct ptlrpc_request *open_req = it->d.lustre.it_data; + struct ptlrpc_request *open_req = it->it_request; struct obd_import *imp = open_req->rq_import; if (!open_req->rq_replay) @@ -1169,7 +1140,7 @@ static int mdc_ioc_hsm_progress(struct obd_export *exp, goto out; } - mdc_pack_body(req, NULL, OBD_MD_FLRMTPERM, 0, 0, 0); + mdc_pack_body(req, NULL, 0, 0, -1, 0); /* Copy hsm_progress struct */ req_hpk = req_capsule_client_get(&req->rq_pill, &RMF_MDS_HSM_PROGRESS); @@ -1203,7 +1174,7 @@ static int mdc_ioc_hsm_ct_register(struct obd_import *imp, __u32 archives) goto out; } - mdc_pack_body(req, NULL, OBD_MD_FLRMTPERM, 0, 0, 0); + mdc_pack_body(req, NULL, 0, 0, -1, 0); /* Copy hsm_progress struct */ archive_mask = req_capsule_client_get(&req->rq_pill, @@ -1242,7 +1213,7 @@ static int mdc_ioc_hsm_current_action(struct obd_export *exp, return rc; } - mdc_pack_body(req, &op_data->op_fid1, OBD_MD_FLRMTPERM, 0, + mdc_pack_body(req, &op_data->op_fid1, 0, 0, op_data->op_suppgids[0], 0); ptlrpc_request_set_replen(req); @@ -1278,7 +1249,7 @@ static int mdc_ioc_hsm_ct_unregister(struct obd_import *imp) goto out; } - mdc_pack_body(req, NULL, OBD_MD_FLRMTPERM, 0, 0, 0); + mdc_pack_body(req, NULL, 0, 0, -1, 0); ptlrpc_request_set_replen(req); @@ -1307,7 +1278,7 @@ static int mdc_ioc_hsm_state_get(struct obd_export *exp, return rc; } - mdc_pack_body(req, &op_data->op_fid1, OBD_MD_FLRMTPERM, 0, + mdc_pack_body(req, &op_data->op_fid1, 0, 0, op_data->op_suppgids[0], 0); ptlrpc_request_set_replen(req); @@ -1348,7 +1319,7 @@ static int mdc_ioc_hsm_state_set(struct obd_export *exp, return rc; } - mdc_pack_body(req, &op_data->op_fid1, OBD_MD_FLRMTPERM, 0, + mdc_pack_body(req, &op_data->op_fid1, 0, 0, op_data->op_suppgids[0], 0); /* Copy states */ @@ -1395,7 +1366,7 @@ static int mdc_ioc_hsm_request(struct obd_export *exp, return rc; } - mdc_pack_body(req, NULL, OBD_MD_FLRMTPERM, 0, 0, 0); + mdc_pack_body(req, NULL, 0, 0, -1, 0); /* Copy hsm_request struct */ req_hr = req_capsule_client_get(&req->rq_pill, &RMF_MDS_HSM_REQUEST); @@ -1808,7 +1779,7 @@ static int mdc_iocontrol(unsigned int cmd, struct obd_export *exp, int len, case IOC_OBD_STATFS: { struct obd_statfs stat_buf = {0}; - if (*((__u32 *) data->ioc_inlbuf2) != 0) { + if (*((__u32 *)data->ioc_inlbuf2) != 0) { rc = -ENODEV; goto out; } @@ -1952,7 +1923,7 @@ static void lustre_swab_hal(struct hsm_action_list *h) __swab32s(&h->hal_count); __swab32s(&h->hal_archive_id); __swab64s(&h->hal_flags); - hai = hai_zero(h); + hai = hai_first(h); for (i = 0; i < h->hal_count; i++, hai = hai_next(hai)) lustre_swab_hai(hai); } @@ -2002,7 +1973,7 @@ static int mdc_hsm_copytool_send(int len, void *val) if (len < sizeof(*lh) + sizeof(*hal)) { CERROR("Short HSM message %d < %d\n", len, - (int) (sizeof(*lh) + sizeof(*hal))); + (int)(sizeof(*lh) + sizeof(*hal))); return -EPROTO; } if (lh->kuc_magic == __swab16(KUC_MAGIC)) { @@ -2249,7 +2220,7 @@ static struct obd_uuid *mdc_get_uuid(struct obd_export *exp) * recovery, non zero value will be return if the lock can be canceled, * or zero returned for not */ -static int mdc_cancel_for_recovery(struct ldlm_lock *lock) +static int mdc_cancel_weight(struct ldlm_lock *lock) { if (lock->l_resource->lr_type != LDLM_IBITS) return 0; @@ -2314,12 +2285,14 @@ static int mdc_setup(struct obd_device *obd, struct lustre_cfg *cfg) return -ENOMEM; mdc_init_rpc_lock(cli->cl_rpc_lock); - ptlrpcd_addref(); + rc = ptlrpcd_addref(); + if (rc < 0) + goto err_rpc_lock; cli->cl_close_lock = kzalloc(sizeof(*cli->cl_close_lock), GFP_NOFS); if (!cli->cl_close_lock) { rc = -ENOMEM; - goto err_rpc_lock; + goto err_ptlrpcd_decref; } mdc_init_rpc_lock(cli->cl_close_lock); @@ -2331,7 +2304,7 @@ static int mdc_setup(struct obd_device *obd, struct lustre_cfg *cfg) sptlrpc_lprocfs_cliobd_attach(obd); ptlrpc_lprocfs_register_obd(obd); - ns_register_cancel(obd->obd_namespace, mdc_cancel_for_recovery); + ns_register_cancel(obd->obd_namespace, mdc_cancel_weight); obd->obd_namespace->ns_lvbo = &inode_lvbo; @@ -2345,9 +2318,10 @@ static int mdc_setup(struct obd_device *obd, struct lustre_cfg *cfg) err_close_lock: kfree(cli->cl_close_lock); +err_ptlrpcd_decref: + ptlrpcd_decref(); err_rpc_lock: kfree(cli->cl_rpc_lock); - ptlrpcd_decref(); return rc; } @@ -2430,41 +2404,6 @@ static int mdc_process_config(struct obd_device *obd, u32 len, void *buf) return rc; } -/* get remote permission for current user on fid */ -static int mdc_get_remote_perm(struct obd_export *exp, const struct lu_fid *fid, - __u32 suppgid, struct ptlrpc_request **request) -{ - struct ptlrpc_request *req; - int rc; - - LASSERT(client_is_remote(exp)); - - *request = NULL; - req = ptlrpc_request_alloc(class_exp2cliimp(exp), &RQF_MDS_GETATTR); - if (!req) - return -ENOMEM; - - rc = ptlrpc_request_pack(req, LUSTRE_MDS_VERSION, MDS_GETATTR); - if (rc) { - ptlrpc_request_free(req); - return rc; - } - - mdc_pack_body(req, fid, OBD_MD_FLRMTPERM, 0, suppgid, 0); - - req_capsule_set_size(&req->rq_pill, &RMF_ACL, RCL_SERVER, - sizeof(struct mdt_remote_perm)); - - ptlrpc_request_set_replen(req); - - rc = ptlrpc_queue_wait(req); - if (rc) - ptlrpc_req_finished(req); - else - *request = req; - return rc; -} - static struct obd_ops mdc_obd_ops = { .owner = THIS_MODULE, .setup = mdc_setup, @@ -2516,7 +2455,6 @@ static struct md_ops mdc_md_ops = { .free_lustre_md = mdc_free_lustre_md, .set_open_replay_data = mdc_set_open_replay_data, .clear_open_replay_data = mdc_clear_open_replay_data, - .get_remote_perm = mdc_get_remote_perm, .intent_getattr_async = mdc_intent_getattr_async, .revalidate_lock = mdc_revalidate_lock }; diff --git a/drivers/staging/lustre/lustre/mgc/lproc_mgc.c b/drivers/staging/lustre/lustre/mgc/lproc_mgc.c index 8d5bc5a751a4..0735220b2a18 100644 --- a/drivers/staging/lustre/lustre/mgc/lproc_mgc.c +++ b/drivers/staging/lustre/lustre/mgc/lproc_mgc.c @@ -15,11 +15,7 @@ * * You should have received a copy of the GNU General Public License * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. + * http://www.gnu.org/licenses/gpl-2.0.html * * GPL HEADER END */ diff --git a/drivers/staging/lustre/lustre/mgc/mgc_internal.h b/drivers/staging/lustre/lustre/mgc/mgc_internal.h index 82fb8f46e037..f146f7521c92 100644 --- a/drivers/staging/lustre/lustre/mgc/mgc_internal.h +++ b/drivers/staging/lustre/lustre/mgc/mgc_internal.h @@ -15,11 +15,7 @@ * * You should have received a copy of the GNU General Public License * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. + * http://www.gnu.org/licenses/gpl-2.0.html * * GPL HEADER END */ diff --git a/drivers/staging/lustre/lustre/mgc/mgc_request.c b/drivers/staging/lustre/lustre/mgc/mgc_request.c index 3924b095bfb0..9d0bd4745865 100644 --- a/drivers/staging/lustre/lustre/mgc/mgc_request.c +++ b/drivers/staging/lustre/lustre/mgc/mgc_request.c @@ -15,11 +15,7 @@ * * You should have received a copy of the GNU General Public License * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. + * http://www.gnu.org/licenses/gpl-2.0.html * * GPL HEADER END */ @@ -500,10 +496,16 @@ static void do_requeue(struct config_llog_data *cld) * export which is being disconnected. Take the client * semaphore to make the check non-racy. */ - down_read(&cld->cld_mgcexp->exp_obd->u.cli.cl_sem); + down_read_nested(&cld->cld_mgcexp->exp_obd->u.cli.cl_sem, + OBD_CLI_SEM_MGC); + if (cld->cld_mgcexp->exp_obd->u.cli.cl_conn_count != 0) { + int rc; + CDEBUG(D_MGC, "updating log %s\n", cld->cld_logname); - mgc_process_log(cld->cld_mgcexp->exp_obd, cld); + rc = mgc_process_log(cld->cld_mgcexp->exp_obd, cld); + if (rc && rc != -ENOENT) + CERROR("failed processing log: %d\n", rc); } else { CDEBUG(D_MGC, "disconnecting, won't update log %s\n", cld->cld_logname); @@ -734,7 +736,9 @@ static int mgc_setup(struct obd_device *obd, struct lustre_cfg *lcfg) struct task_struct *task; int rc; - ptlrpcd_addref(); + rc = ptlrpcd_addref(); + if (rc < 0) + goto err_noref; rc = client_obd_setup(obd, lcfg); if (rc) @@ -773,6 +777,7 @@ err_cleanup: client_obd_cleanup(obd); err_decref: ptlrpcd_decref(); +err_noref: return rc; } @@ -1027,7 +1032,7 @@ static int mgc_set_info_async(const struct lu_env *env, struct obd_export *exp, rc = sptlrpc_parse_flavor(val, &flvr); if (rc) { CERROR("invalid sptlrpc flavor %s to MGS\n", - (char *) val); + (char *)val); return rc; } @@ -1043,7 +1048,7 @@ static int mgc_set_info_async(const struct lu_env *env, struct obd_export *exp, sptlrpc_flavor2name(&cli->cl_flvr_mgc, str, sizeof(str)); LCONSOLE_ERROR("asking sptlrpc flavor %s to MGS but currently %s is in use\n", - (char *) val, str); + (char *)val, str); rc = -EPERM; } return rc; @@ -1720,7 +1725,6 @@ static int mgc_process_config(struct obd_device *obd, u32 len, void *buf) CERROR("Unknown command: %d\n", lcfg->lcfg_command); rc = -EINVAL; goto out; - } } out: diff --git a/drivers/staging/lustre/lustre/obdclass/Makefile b/drivers/staging/lustre/lustre/obdclass/Makefile index c404eb3864ff..df7e47f35a66 100644 --- a/drivers/staging/lustre/lustre/obdclass/Makefile +++ b/drivers/staging/lustre/lustre/obdclass/Makefile @@ -5,5 +5,4 @@ obdclass-y := linux/linux-module.o linux/linux-obdo.o linux/linux-sysctl.o \ genops.o uuid.o lprocfs_status.o lprocfs_counters.o \ lustre_handles.o lustre_peer.o statfs_pack.o \ obdo.o obd_config.o obd_mount.o lu_object.o lu_ref.o \ - cl_object.o cl_page.o cl_lock.o cl_io.o \ - acl.o kernelcomm.o + cl_object.o cl_page.o cl_lock.o cl_io.o kernelcomm.o diff --git a/drivers/staging/lustre/lustre/obdclass/acl.c b/drivers/staging/lustre/lustre/obdclass/acl.c deleted file mode 100644 index 0e02ae97b7ed..000000000000 --- a/drivers/staging/lustre/lustre/obdclass/acl.c +++ /dev/null @@ -1,415 +0,0 @@ -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. - * - * GPL HEADER END - */ -/* - * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2012, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * lustre/obdclass/acl.c - * - * Lustre Access Control List. - * - * Author: Fan Yong <fanyong@clusterfs.com> - */ - -#define DEBUG_SUBSYSTEM S_SEC -#include "../include/lu_object.h" -#include "../include/lustre_acl.h" -#include "../include/lustre_eacl.h" -#include "../include/obd_support.h" - -#ifdef CONFIG_FS_POSIX_ACL - -#define CFS_ACL_XATTR_VERSION POSIX_ACL_XATTR_VERSION - -enum { - ES_UNK = 0, /* unknown stat */ - ES_UNC = 1, /* ACL entry is not changed */ - ES_MOD = 2, /* ACL entry is modified */ - ES_ADD = 3, /* ACL entry is added */ - ES_DEL = 4 /* ACL entry is deleted */ -}; - -static inline void lustre_ext_acl_le_to_cpu(ext_acl_xattr_entry *d, - ext_acl_xattr_entry *s) -{ - d->e_tag = le16_to_cpu(s->e_tag); - d->e_perm = le16_to_cpu(s->e_perm); - d->e_id = le32_to_cpu(s->e_id); - d->e_stat = le32_to_cpu(s->e_stat); -} - -static inline void lustre_ext_acl_cpu_to_le(ext_acl_xattr_entry *d, - ext_acl_xattr_entry *s) -{ - d->e_tag = cpu_to_le16(s->e_tag); - d->e_perm = cpu_to_le16(s->e_perm); - d->e_id = cpu_to_le32(s->e_id); - d->e_stat = cpu_to_le32(s->e_stat); -} - -static inline void lustre_posix_acl_le_to_cpu(posix_acl_xattr_entry *d, - posix_acl_xattr_entry *s) -{ - d->e_tag = le16_to_cpu(s->e_tag); - d->e_perm = le16_to_cpu(s->e_perm); - d->e_id = le32_to_cpu(s->e_id); -} - -static inline void lustre_posix_acl_cpu_to_le(posix_acl_xattr_entry *d, - posix_acl_xattr_entry *s) -{ - d->e_tag = cpu_to_le16(s->e_tag); - d->e_perm = cpu_to_le16(s->e_perm); - d->e_id = cpu_to_le32(s->e_id); -} - -/* if "new_count == 0", then "new = {a_version, NULL}", NOT NULL. */ -static int lustre_posix_acl_xattr_reduce_space(posix_acl_xattr_header **header, - int old_count, int new_count) -{ - int old_size = CFS_ACL_XATTR_SIZE(old_count, posix_acl_xattr); - int new_size = CFS_ACL_XATTR_SIZE(new_count, posix_acl_xattr); - posix_acl_xattr_header *new; - - if (unlikely(old_count <= new_count)) - return old_size; - - new = kmemdup(*header, new_size, GFP_NOFS); - if (unlikely(!new)) - return -ENOMEM; - - kfree(*header); - *header = new; - return new_size; -} - -/* if "new_count == 0", then "new = {0, NULL}", NOT NULL. */ -static int lustre_ext_acl_xattr_reduce_space(ext_acl_xattr_header **header, - int old_count) -{ - int ext_count = le32_to_cpu((*header)->a_count); - int ext_size = CFS_ACL_XATTR_SIZE(ext_count, ext_acl_xattr); - ext_acl_xattr_header *new; - - if (unlikely(old_count <= ext_count)) - return 0; - - new = kmemdup(*header, ext_size, GFP_NOFS); - if (unlikely(!new)) - return -ENOMEM; - - kfree(*header); - *header = new; - return 0; -} - -/* - * Generate new extended ACL based on the posix ACL. - */ -ext_acl_xattr_header * -lustre_posix_acl_xattr_2ext(posix_acl_xattr_header *header, int size) -{ - int count, i, esize; - ext_acl_xattr_header *new; - - if (unlikely(size < 0)) - return ERR_PTR(-EINVAL); - else if (!size) - count = 0; - else - count = CFS_ACL_XATTR_COUNT(size, posix_acl_xattr); - esize = CFS_ACL_XATTR_SIZE(count, ext_acl_xattr); - new = kzalloc(esize, GFP_NOFS); - if (unlikely(!new)) - return ERR_PTR(-ENOMEM); - - new->a_count = cpu_to_le32(count); - for (i = 0; i < count; i++) { - new->a_entries[i].e_tag = header->a_entries[i].e_tag; - new->a_entries[i].e_perm = header->a_entries[i].e_perm; - new->a_entries[i].e_id = header->a_entries[i].e_id; - new->a_entries[i].e_stat = cpu_to_le32(ES_UNK); - } - - return new; -} -EXPORT_SYMBOL(lustre_posix_acl_xattr_2ext); - -/* - * Filter out the "nobody" entries in the posix ACL. - */ -int lustre_posix_acl_xattr_filter(posix_acl_xattr_header *header, size_t size, - posix_acl_xattr_header **out) -{ - int count, i, j, rc = 0; - __u32 id; - posix_acl_xattr_header *new; - - if (!size) - return 0; - if (size < sizeof(*new)) - return -EINVAL; - - new = kzalloc(size, GFP_NOFS); - if (unlikely(!new)) - return -ENOMEM; - - new->a_version = cpu_to_le32(CFS_ACL_XATTR_VERSION); - count = CFS_ACL_XATTR_COUNT(size, posix_acl_xattr); - for (i = 0, j = 0; i < count; i++) { - id = le32_to_cpu(header->a_entries[i].e_id); - switch (le16_to_cpu(header->a_entries[i].e_tag)) { - case ACL_USER_OBJ: - case ACL_GROUP_OBJ: - case ACL_MASK: - case ACL_OTHER: - if (id != ACL_UNDEFINED_ID) { - rc = -EIO; - goto _out; - } - - memcpy(&new->a_entries[j++], &header->a_entries[i], - sizeof(posix_acl_xattr_entry)); - break; - case ACL_USER: - if (id != NOBODY_UID) - memcpy(&new->a_entries[j++], - &header->a_entries[i], - sizeof(posix_acl_xattr_entry)); - break; - case ACL_GROUP: - if (id != NOBODY_GID) - memcpy(&new->a_entries[j++], - &header->a_entries[i], - sizeof(posix_acl_xattr_entry)); - break; - default: - rc = -EIO; - goto _out; - } - } - - /* free unused space. */ - rc = lustre_posix_acl_xattr_reduce_space(&new, count, j); - if (rc >= 0) { - size = rc; - *out = new; - rc = 0; - } - -_out: - if (rc) { - kfree(new); - size = rc; - } - return size; -} -EXPORT_SYMBOL(lustre_posix_acl_xattr_filter); - -/* - * Release the extended ACL space. - */ -void lustre_ext_acl_xattr_free(ext_acl_xattr_header *header) -{ - kfree(header); -} -EXPORT_SYMBOL(lustre_ext_acl_xattr_free); - -static ext_acl_xattr_entry * -lustre_ext_acl_xattr_search(ext_acl_xattr_header *header, - posix_acl_xattr_entry *entry, int *pos) -{ - int once, start, end, i, j, count = le32_to_cpu(header->a_count); - - once = 0; - start = *pos; - end = count; - -again: - for (i = start; i < end; i++) { - if (header->a_entries[i].e_tag == entry->e_tag && - header->a_entries[i].e_id == entry->e_id) { - j = i; - if (++i >= count) - i = 0; - *pos = i; - return &header->a_entries[j]; - } - } - - if (!once) { - once = 1; - start = 0; - end = *pos; - goto again; - } - - return NULL; -} - -/* - * Merge the posix ACL and the extended ACL into new extended ACL. - */ -ext_acl_xattr_header * -lustre_acl_xattr_merge2ext(posix_acl_xattr_header *posix_header, int size, - ext_acl_xattr_header *ext_header) -{ - int ori_ext_count, posix_count, ext_count, ext_size; - int i, j, pos = 0, rc = 0; - posix_acl_xattr_entry pae; - ext_acl_xattr_header *new; - ext_acl_xattr_entry *ee, eae; - - if (unlikely(size < 0)) - return ERR_PTR(-EINVAL); - else if (!size) - posix_count = 0; - else - posix_count = CFS_ACL_XATTR_COUNT(size, posix_acl_xattr); - ori_ext_count = le32_to_cpu(ext_header->a_count); - ext_count = posix_count + ori_ext_count; - ext_size = CFS_ACL_XATTR_SIZE(ext_count, ext_acl_xattr); - - new = kzalloc(ext_size, GFP_NOFS); - if (unlikely(!new)) - return ERR_PTR(-ENOMEM); - - for (i = 0, j = 0; i < posix_count; i++) { - lustre_posix_acl_le_to_cpu(&pae, &posix_header->a_entries[i]); - switch (pae.e_tag) { - case ACL_USER_OBJ: - case ACL_GROUP_OBJ: - case ACL_MASK: - case ACL_OTHER: - if (pae.e_id != ACL_UNDEFINED_ID) { - rc = -EIO; - goto out; - } - case ACL_USER: - /* ignore "nobody" entry. */ - if (pae.e_id == NOBODY_UID) - break; - - new->a_entries[j].e_tag = - posix_header->a_entries[i].e_tag; - new->a_entries[j].e_perm = - posix_header->a_entries[i].e_perm; - new->a_entries[j].e_id = - posix_header->a_entries[i].e_id; - ee = lustre_ext_acl_xattr_search(ext_header, - &posix_header->a_entries[i], &pos); - if (ee) { - if (posix_header->a_entries[i].e_perm != - ee->e_perm) - /* entry modified. */ - ee->e_stat = - new->a_entries[j++].e_stat = - cpu_to_le32(ES_MOD); - else - /* entry unchanged. */ - ee->e_stat = - new->a_entries[j++].e_stat = - cpu_to_le32(ES_UNC); - } else { - /* new entry. */ - new->a_entries[j++].e_stat = - cpu_to_le32(ES_ADD); - } - break; - case ACL_GROUP: - /* ignore "nobody" entry. */ - if (pae.e_id == NOBODY_GID) - break; - new->a_entries[j].e_tag = - posix_header->a_entries[i].e_tag; - new->a_entries[j].e_perm = - posix_header->a_entries[i].e_perm; - new->a_entries[j].e_id = - posix_header->a_entries[i].e_id; - ee = lustre_ext_acl_xattr_search(ext_header, - &posix_header->a_entries[i], &pos); - if (ee) { - if (posix_header->a_entries[i].e_perm != - ee->e_perm) - /* entry modified. */ - ee->e_stat = - new->a_entries[j++].e_stat = - cpu_to_le32(ES_MOD); - else - /* entry unchanged. */ - ee->e_stat = - new->a_entries[j++].e_stat = - cpu_to_le32(ES_UNC); - } else { - /* new entry. */ - new->a_entries[j++].e_stat = - cpu_to_le32(ES_ADD); - } - break; - default: - rc = -EIO; - goto out; - } - } - - /* process deleted entries. */ - for (i = 0; i < ori_ext_count; i++) { - lustre_ext_acl_le_to_cpu(&eae, &ext_header->a_entries[i]); - if (eae.e_stat == ES_UNK) { - /* ignore "nobody" entry. */ - if ((eae.e_tag == ACL_USER && eae.e_id == NOBODY_UID) || - (eae.e_tag == ACL_GROUP && eae.e_id == NOBODY_GID)) - continue; - - new->a_entries[j].e_tag = - ext_header->a_entries[i].e_tag; - new->a_entries[j].e_perm = - ext_header->a_entries[i].e_perm; - new->a_entries[j].e_id = ext_header->a_entries[i].e_id; - new->a_entries[j++].e_stat = cpu_to_le32(ES_DEL); - } - } - - new->a_count = cpu_to_le32(j); - /* free unused space. */ - rc = lustre_ext_acl_xattr_reduce_space(&new, ext_count); - -out: - if (rc) { - kfree(new); - new = ERR_PTR(rc); - } - return new; -} -EXPORT_SYMBOL(lustre_acl_xattr_merge2ext); - -#endif diff --git a/drivers/staging/lustre/lustre/obdclass/cl_internal.h b/drivers/staging/lustre/lustre/obdclass/cl_internal.h index 7eb0ad7b3644..e866754a42d5 100644 --- a/drivers/staging/lustre/lustre/obdclass/cl_internal.h +++ b/drivers/staging/lustre/lustre/obdclass/cl_internal.h @@ -15,11 +15,7 @@ * * You should have received a copy of the GNU General Public License * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. + * http://www.gnu.org/licenses/gpl-2.0.html * * GPL HEADER END */ diff --git a/drivers/staging/lustre/lustre/obdclass/cl_io.c b/drivers/staging/lustre/lustre/obdclass/cl_io.c index f5128b4f176f..e72f1fc00a13 100644 --- a/drivers/staging/lustre/lustre/obdclass/cl_io.c +++ b/drivers/staging/lustre/lustre/obdclass/cl_io.c @@ -15,11 +15,7 @@ * * You should have received a copy of the GNU General Public License * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. + * http://www.gnu.org/licenses/gpl-2.0.html * * GPL HEADER END */ @@ -36,6 +32,7 @@ * Client IO. * * Author: Nikita Danilov <nikita.danilov@sun.com> + * Author: Jinshan Xiong <jinshan.xiong@intel.com> */ #define DEBUG_SUBSYSTEM S_CLASS @@ -132,6 +129,7 @@ void cl_io_fini(const struct lu_env *env, struct cl_io *io) case CIT_WRITE: break; case CIT_FAULT: + break; case CIT_FSYNC: LASSERT(!io->ci_need_restart); break; @@ -159,7 +157,6 @@ static int cl_io_init0(const struct lu_env *env, struct cl_io *io, io->ci_type = iot; INIT_LIST_HEAD(&io->ci_lockset.cls_todo); - INIT_LIST_HEAD(&io->ci_lockset.cls_curr); INIT_LIST_HEAD(&io->ci_lockset.cls_done); INIT_LIST_HEAD(&io->ci_layers); @@ -241,37 +238,7 @@ static int cl_lock_descr_sort(const struct cl_lock_descr *d0, const struct cl_lock_descr *d1) { return lu_fid_cmp(lu_object_fid(&d0->cld_obj->co_lu), - lu_object_fid(&d1->cld_obj->co_lu)) ?: - __diff_normalize(d0->cld_start, d1->cld_start); -} - -static int cl_lock_descr_cmp(const struct cl_lock_descr *d0, - const struct cl_lock_descr *d1) -{ - int ret; - - ret = lu_fid_cmp(lu_object_fid(&d0->cld_obj->co_lu), - lu_object_fid(&d1->cld_obj->co_lu)); - if (ret) - return ret; - if (d0->cld_end < d1->cld_start) - return -1; - if (d0->cld_start > d0->cld_end) - return 1; - return 0; -} - -static void cl_lock_descr_merge(struct cl_lock_descr *d0, - const struct cl_lock_descr *d1) -{ - d0->cld_start = min(d0->cld_start, d1->cld_start); - d0->cld_end = max(d0->cld_end, d1->cld_end); - - if (d1->cld_mode == CLM_WRITE && d0->cld_mode != CLM_WRITE) - d0->cld_mode = CLM_WRITE; - - if (d1->cld_mode == CLM_GROUP && d0->cld_mode != CLM_GROUP) - d0->cld_mode = CLM_GROUP; + lu_object_fid(&d1->cld_obj->co_lu)); } /* @@ -320,33 +287,35 @@ static void cl_io_locks_sort(struct cl_io *io) } while (!done); } -/** - * Check whether \a queue contains locks matching \a need. - * - * \retval +ve there is a matching lock in the \a queue - * \retval 0 there are no matching locks in the \a queue - */ -int cl_queue_match(const struct list_head *queue, - const struct cl_lock_descr *need) +static void cl_lock_descr_merge(struct cl_lock_descr *d0, + const struct cl_lock_descr *d1) { - struct cl_io_lock_link *scan; + d0->cld_start = min(d0->cld_start, d1->cld_start); + d0->cld_end = max(d0->cld_end, d1->cld_end); - list_for_each_entry(scan, queue, cill_linkage) { - if (cl_lock_descr_match(&scan->cill_descr, need)) - return 1; - } - return 0; + if (d1->cld_mode == CLM_WRITE && d0->cld_mode != CLM_WRITE) + d0->cld_mode = CLM_WRITE; + + if (d1->cld_mode == CLM_GROUP && d0->cld_mode != CLM_GROUP) + d0->cld_mode = CLM_GROUP; } -EXPORT_SYMBOL(cl_queue_match); -static int cl_queue_merge(const struct list_head *queue, - const struct cl_lock_descr *need) +static int cl_lockset_merge(const struct cl_lockset *set, + const struct cl_lock_descr *need) { struct cl_io_lock_link *scan; - list_for_each_entry(scan, queue, cill_linkage) { - if (cl_lock_descr_cmp(&scan->cill_descr, need)) + list_for_each_entry(scan, &set->cls_todo, cill_linkage) { + if (!cl_object_same(scan->cill_descr.cld_obj, need->cld_obj)) continue; + + /* Merge locks for the same object because ldlm lock server + * may expand the lock extent, otherwise there is a deadlock + * case if two conflicted locks are queueud for the same object + * and lock server expands one lock to overlap the another. + * The side effect is that it can generate a multi-stripe lock + * that may cause casacading problem + */ cl_lock_descr_merge(&scan->cill_descr, need); CDEBUG(D_VFSTRACE, "lock: %d: [%lu, %lu]\n", scan->cill_descr.cld_mode, scan->cill_descr.cld_start, @@ -356,87 +325,20 @@ static int cl_queue_merge(const struct list_head *queue, return 0; } -static int cl_lockset_match(const struct cl_lockset *set, - const struct cl_lock_descr *need) -{ - return cl_queue_match(&set->cls_curr, need) || - cl_queue_match(&set->cls_done, need); -} - -static int cl_lockset_merge(const struct cl_lockset *set, - const struct cl_lock_descr *need) -{ - return cl_queue_merge(&set->cls_todo, need) || - cl_lockset_match(set, need); -} - -static int cl_lockset_lock_one(const struct lu_env *env, - struct cl_io *io, struct cl_lockset *set, - struct cl_io_lock_link *link) -{ - struct cl_lock *lock; - int result; - - lock = cl_lock_request(env, io, &link->cill_descr, "io", io); - - if (!IS_ERR(lock)) { - link->cill_lock = lock; - list_move(&link->cill_linkage, &set->cls_curr); - if (!(link->cill_descr.cld_enq_flags & CEF_ASYNC)) { - result = cl_wait(env, lock); - if (result == 0) - list_move(&link->cill_linkage, &set->cls_done); - } else - result = 0; - } else - result = PTR_ERR(lock); - return result; -} - -static void cl_lock_link_fini(const struct lu_env *env, struct cl_io *io, - struct cl_io_lock_link *link) -{ - struct cl_lock *lock = link->cill_lock; - - list_del_init(&link->cill_linkage); - if (lock) { - cl_lock_release(env, lock, "io", io); - link->cill_lock = NULL; - } - if (link->cill_fini) - link->cill_fini(env, link); -} - static int cl_lockset_lock(const struct lu_env *env, struct cl_io *io, struct cl_lockset *set) { struct cl_io_lock_link *link; struct cl_io_lock_link *temp; - struct cl_lock *lock; int result; result = 0; list_for_each_entry_safe(link, temp, &set->cls_todo, cill_linkage) { - if (!cl_lockset_match(set, &link->cill_descr)) { - /* XXX some locking to guarantee that locks aren't - * expanded in between. - */ - result = cl_lockset_lock_one(env, io, set, link); - if (result != 0) - break; - } else - cl_lock_link_fini(env, io, link); - } - if (result == 0) { - list_for_each_entry_safe(link, temp, - &set->cls_curr, cill_linkage) { - lock = link->cill_lock; - result = cl_wait(env, lock); - if (result == 0) - list_move(&link->cill_linkage, &set->cls_done); - else - break; - } + result = cl_lock_request(env, io, &link->cill_lock); + if (result < 0) + break; + + list_move(&link->cill_linkage, &set->cls_done); } return result; } @@ -492,16 +394,19 @@ void cl_io_unlock(const struct lu_env *env, struct cl_io *io) set = &io->ci_lockset; - list_for_each_entry_safe(link, temp, &set->cls_todo, cill_linkage) - cl_lock_link_fini(env, io, link); - - list_for_each_entry_safe(link, temp, &set->cls_curr, cill_linkage) - cl_lock_link_fini(env, io, link); + list_for_each_entry_safe(link, temp, &set->cls_todo, cill_linkage) { + list_del_init(&link->cill_linkage); + if (link->cill_fini) + link->cill_fini(env, link); + } list_for_each_entry_safe(link, temp, &set->cls_done, cill_linkage) { - cl_unuse(env, link->cill_lock); - cl_lock_link_fini(env, io, link); + list_del_init(&link->cill_linkage); + cl_lock_release(env, &link->cill_lock); + if (link->cill_fini) + link->cill_fini(env, link); } + cl_io_for_each_reverse(scan, io) { if (scan->cis_iop->op[io->ci_type].cio_unlock) scan->cis_iop->op[io->ci_type].cio_unlock(env, scan); @@ -595,9 +500,9 @@ int cl_io_lock_add(const struct lu_env *env, struct cl_io *io, { int result; - if (cl_lockset_merge(&io->ci_lockset, &link->cill_descr)) + if (cl_lockset_merge(&io->ci_lockset, &link->cill_descr)) { result = 1; - else { + } else { list_add(&link->cill_linkage, &io->ci_lockset.cls_todo); result = 0; } @@ -627,8 +532,9 @@ int cl_io_lock_alloc_add(const struct lu_env *env, struct cl_io *io, result = cl_io_lock_add(env, io, link); if (result) /* lock match */ link->cill_fini(env, link); - } else + } else { result = -ENOMEM; + } return result; } @@ -692,42 +598,6 @@ cl_io_slice_page(const struct cl_io_slice *ios, struct cl_page *page) } /** - * True iff \a page is within \a io range. - */ -static int cl_page_in_io(const struct cl_page *page, const struct cl_io *io) -{ - int result = 1; - loff_t start; - loff_t end; - pgoff_t idx; - - idx = page->cp_index; - switch (io->ci_type) { - case CIT_READ: - case CIT_WRITE: - /* - * check that [start, end) and [pos, pos + count) extents - * overlap. - */ - if (!cl_io_is_append(io)) { - const struct cl_io_rw_common *crw = &(io->u.ci_rw); - - start = cl_offset(page->cp_obj, idx); - end = cl_offset(page->cp_obj, idx + 1); - result = crw->crw_pos < end && - start < crw->crw_pos + crw->crw_count; - } - break; - case CIT_FAULT: - result = io->u.ci_fault.ft_index == idx; - break; - default: - LBUG(); - } - return result; -} - -/** * Called by read io, when page has to be read from the server. * * \see cl_io_operations::cio_read_page() @@ -742,7 +612,6 @@ int cl_io_read_page(const struct lu_env *env, struct cl_io *io, LINVRNT(io->ci_type == CIT_READ || io->ci_type == CIT_FAULT); LINVRNT(cl_page_is_owned(page, io)); LINVRNT(io->ci_state == CIS_IO_GOING || io->ci_state == CIS_LOCKED); - LINVRNT(cl_page_in_io(page, io)); LINVRNT(cl_io_invariant(io)); queue = &io->ci_queue; @@ -769,7 +638,7 @@ int cl_io_read_page(const struct lu_env *env, struct cl_io *io, break; } } - if (result == 0) + if (result == 0 && queue->c2_qin.pl_nr > 0) result = cl_io_submit_rw(env, io, CRT_READ, queue); /* * Unlock unsent pages in case of error. @@ -781,77 +650,29 @@ int cl_io_read_page(const struct lu_env *env, struct cl_io *io, EXPORT_SYMBOL(cl_io_read_page); /** - * Called by write io to prepare page to receive data from user buffer. - * - * \see cl_io_operations::cio_prepare_write() - */ -int cl_io_prepare_write(const struct lu_env *env, struct cl_io *io, - struct cl_page *page, unsigned from, unsigned to) -{ - const struct cl_io_slice *scan; - int result = 0; - - LINVRNT(io->ci_type == CIT_WRITE); - LINVRNT(cl_page_is_owned(page, io)); - LINVRNT(io->ci_state == CIS_IO_GOING || io->ci_state == CIS_LOCKED); - LINVRNT(cl_io_invariant(io)); - LASSERT(cl_page_in_io(page, io)); - - cl_io_for_each_reverse(scan, io) { - if (scan->cis_iop->cio_prepare_write) { - const struct cl_page_slice *slice; - - slice = cl_io_slice_page(scan, page); - result = scan->cis_iop->cio_prepare_write(env, scan, - slice, - from, to); - if (result != 0) - break; - } - } - return result; -} -EXPORT_SYMBOL(cl_io_prepare_write); - -/** - * Called by write io after user data were copied into a page. + * Commit a list of contiguous pages into writeback cache. * - * \see cl_io_operations::cio_commit_write() + * \returns 0 if all pages committed, or errcode if error occurred. + * \see cl_io_operations::cio_commit_async() */ -int cl_io_commit_write(const struct lu_env *env, struct cl_io *io, - struct cl_page *page, unsigned from, unsigned to) +int cl_io_commit_async(const struct lu_env *env, struct cl_io *io, + struct cl_page_list *queue, int from, int to, + cl_commit_cbt cb) { const struct cl_io_slice *scan; int result = 0; - LINVRNT(io->ci_type == CIT_WRITE); - LINVRNT(io->ci_state == CIS_IO_GOING || io->ci_state == CIS_LOCKED); - LINVRNT(cl_io_invariant(io)); - /* - * XXX Uh... not nice. Top level cl_io_commit_write() call (vvp->lov) - * already called cl_page_cache_add(), moving page into CPS_CACHED - * state. Better (and more general) way of dealing with such situation - * is needed. - */ - LASSERT(cl_page_is_owned(page, io) || page->cp_parent); - LASSERT(cl_page_in_io(page, io)); - cl_io_for_each(scan, io) { - if (scan->cis_iop->cio_commit_write) { - const struct cl_page_slice *slice; - - slice = cl_io_slice_page(scan, page); - result = scan->cis_iop->cio_commit_write(env, scan, - slice, - from, to); - if (result != 0) - break; - } + if (!scan->cis_iop->cio_commit_async) + continue; + result = scan->cis_iop->cio_commit_async(env, scan, queue, + from, to, cb); + if (result != 0) + break; } - LINVRNT(result <= 0); return result; } -EXPORT_SYMBOL(cl_io_commit_write); +EXPORT_SYMBOL(cl_io_commit_async); /** * Submits a list of pages for immediate io. @@ -869,13 +690,10 @@ int cl_io_submit_rw(const struct lu_env *env, struct cl_io *io, const struct cl_io_slice *scan; int result = 0; - LINVRNT(crt < ARRAY_SIZE(scan->cis_iop->req_op)); - cl_io_for_each(scan, io) { - if (!scan->cis_iop->req_op[crt].cio_submit) + if (!scan->cis_iop->cio_submit) continue; - result = scan->cis_iop->req_op[crt].cio_submit(env, scan, crt, - queue); + result = scan->cis_iop->cio_submit(env, scan, crt, queue); if (result != 0) break; } @@ -887,6 +705,9 @@ int cl_io_submit_rw(const struct lu_env *env, struct cl_io *io, } EXPORT_SYMBOL(cl_io_submit_rw); +static void cl_page_list_assume(const struct lu_env *env, + struct cl_io *io, struct cl_page_list *plist); + /** * Submit a sync_io and wait for the IO to be finished, or error happens. * If \a timeout is zero, it means to wait for the IO unconditionally. @@ -904,7 +725,7 @@ int cl_io_submit_sync(const struct lu_env *env, struct cl_io *io, pg->cp_sync_io = anchor; } - cl_sync_io_init(anchor, queue->c2_qin.pl_nr); + cl_sync_io_init(anchor, queue->c2_qin.pl_nr, &cl_sync_io_end); rc = cl_io_submit_rw(env, io, iot, queue); if (rc == 0) { /* @@ -915,12 +736,12 @@ int cl_io_submit_sync(const struct lu_env *env, struct cl_io *io, */ cl_page_list_for_each(pg, &queue->c2_qin) { pg->cp_sync_io = NULL; - cl_sync_io_note(anchor, 1); + cl_sync_io_note(env, anchor, 1); } /* wait for the IO to be finished. */ - rc = cl_sync_io_wait(env, io, &queue->c2_qout, - anchor, timeout); + rc = cl_sync_io_wait(env, anchor, timeout); + cl_page_list_assume(env, io, &queue->c2_qout); } else { LASSERT(list_empty(&queue->c2_qout.pl_pages)); cl_page_list_for_each(pg, &queue->c2_qin) @@ -931,26 +752,6 @@ int cl_io_submit_sync(const struct lu_env *env, struct cl_io *io, EXPORT_SYMBOL(cl_io_submit_sync); /** - * Cancel an IO which has been submitted by cl_io_submit_rw. - */ -static int cl_io_cancel(const struct lu_env *env, struct cl_io *io, - struct cl_page_list *queue) -{ - struct cl_page *page; - int result = 0; - - CERROR("Canceling ongoing page transmission\n"); - cl_page_list_for_each(page, queue) { - int rc; - - LINVRNT(cl_page_in_io(page, io)); - rc = cl_page_cancel(env, page); - result = result ?: rc; - } - return result; -} - -/** * Main io loop. * * Pumps io through iterations calling @@ -1072,8 +873,8 @@ EXPORT_SYMBOL(cl_page_list_add); /** * Removes a page from a page list. */ -static void cl_page_list_del(const struct lu_env *env, - struct cl_page_list *plist, struct cl_page *page) +void cl_page_list_del(const struct lu_env *env, struct cl_page_list *plist, + struct cl_page *page) { LASSERT(plist->pl_nr > 0); LINVRNT(plist->pl_owner == current); @@ -1086,6 +887,7 @@ static void cl_page_list_del(const struct lu_env *env, lu_ref_del_at(&page->cp_reference, &page->cp_queue_ref, "queue", plist); cl_page_put(env, page); } +EXPORT_SYMBOL(cl_page_list_del); /** * Moves a page from one page list to another. @@ -1106,6 +908,24 @@ void cl_page_list_move(struct cl_page_list *dst, struct cl_page_list *src, EXPORT_SYMBOL(cl_page_list_move); /** + * Moves a page from one page list to the head of another list. + */ +void cl_page_list_move_head(struct cl_page_list *dst, struct cl_page_list *src, + struct cl_page *page) +{ + LASSERT(src->pl_nr > 0); + LINVRNT(dst->pl_owner == current); + LINVRNT(src->pl_owner == current); + + list_move(&page->cp_batch, &dst->pl_pages); + --src->pl_nr; + ++dst->pl_nr; + lu_ref_set_at(&page->cp_reference, &page->cp_queue_ref, "queue", + src, dst); +} +EXPORT_SYMBOL(cl_page_list_move_head); + +/** * splice the cl_page_list, just as list head does */ void cl_page_list_splice(struct cl_page_list *list, struct cl_page_list *head) @@ -1162,8 +982,7 @@ EXPORT_SYMBOL(cl_page_list_disown); /** * Releases pages from queue. */ -static void cl_page_list_fini(const struct lu_env *env, - struct cl_page_list *plist) +void cl_page_list_fini(const struct lu_env *env, struct cl_page_list *plist) { struct cl_page *page; struct cl_page *temp; @@ -1174,6 +993,7 @@ static void cl_page_list_fini(const struct lu_env *env, cl_page_list_del(env, plist, page); LASSERT(plist->pl_nr == 0); } +EXPORT_SYMBOL(cl_page_list_fini); /** * Assumes all pages in a queue. @@ -1260,7 +1080,7 @@ EXPORT_SYMBOL(cl_2queue_init_page); /** * Returns top-level io. * - * \see cl_object_top(), cl_page_top(). + * \see cl_object_top() */ struct cl_io *cl_io_top(struct cl_io *io) { @@ -1323,19 +1143,14 @@ static int cl_req_init(const struct lu_env *env, struct cl_req *req, int result; result = 0; - page = cl_page_top(page); - do { - list_for_each_entry(slice, &page->cp_layers, cpl_linkage) { - dev = lu2cl_dev(slice->cpl_obj->co_lu.lo_dev); - if (dev->cd_ops->cdo_req_init) { - result = dev->cd_ops->cdo_req_init(env, - dev, req); - if (result != 0) - break; - } + list_for_each_entry(slice, &page->cp_layers, cpl_linkage) { + dev = lu2cl_dev(slice->cpl_obj->co_lu.lo_dev); + if (dev->cd_ops->cdo_req_init) { + result = dev->cd_ops->cdo_req_init(env, dev, req); + if (result != 0) + break; } - page = page->cp_child; - } while (page && result == 0); + } return result; } @@ -1384,14 +1199,16 @@ struct cl_req *cl_req_alloc(const struct lu_env *env, struct cl_page *page, if (req->crq_o) { req->crq_nrobjs = nr_objects; result = cl_req_init(env, req, page); - } else + } else { result = -ENOMEM; + } if (result != 0) { cl_req_completion(env, req, result); req = ERR_PTR(result); } - } else + } else { req = ERR_PTR(-ENOMEM); + } return req; } EXPORT_SYMBOL(cl_req_alloc); @@ -1406,8 +1223,6 @@ void cl_req_page_add(const struct lu_env *env, struct cl_req_obj *rqo; int i; - page = cl_page_top(page); - LASSERT(list_empty(&page->cp_flight)); LASSERT(!page->cp_req); @@ -1438,8 +1253,6 @@ void cl_req_page_done(const struct lu_env *env, struct cl_page *page) { struct cl_req *req = page->cp_req; - page = cl_page_top(page); - LASSERT(!list_empty(&page->cp_flight)); LASSERT(req->crq_nrpages > 0); @@ -1511,25 +1324,39 @@ void cl_req_attr_set(const struct lu_env *env, struct cl_req *req, } EXPORT_SYMBOL(cl_req_attr_set); +/* cl_sync_io_callback assumes the caller must call cl_sync_io_wait() to + * wait for the IO to finish. + */ +void cl_sync_io_end(const struct lu_env *env, struct cl_sync_io *anchor) +{ + wake_up_all(&anchor->csi_waitq); + + /* it's safe to nuke or reuse anchor now */ + atomic_set(&anchor->csi_barrier, 0); +} +EXPORT_SYMBOL(cl_sync_io_end); /** - * Initialize synchronous io wait anchor, for transfer of \a nrpages pages. + * Initialize synchronous io wait anchor */ -void cl_sync_io_init(struct cl_sync_io *anchor, int nrpages) +void cl_sync_io_init(struct cl_sync_io *anchor, int nr, + void (*end)(const struct lu_env *, struct cl_sync_io *)) { + memset(anchor, 0, sizeof(*anchor)); init_waitqueue_head(&anchor->csi_waitq); - atomic_set(&anchor->csi_sync_nr, nrpages); - atomic_set(&anchor->csi_barrier, nrpages > 0); + atomic_set(&anchor->csi_sync_nr, nr); + atomic_set(&anchor->csi_barrier, nr > 0); anchor->csi_sync_rc = 0; + anchor->csi_end_io = end; + LASSERT(end); } EXPORT_SYMBOL(cl_sync_io_init); /** - * Wait until all transfer completes. Transfer completion routine has to call - * cl_sync_io_note() for every page. + * Wait until all IO completes. Transfer completion routine has to call + * cl_sync_io_note() for every entity. */ -int cl_sync_io_wait(const struct lu_env *env, struct cl_io *io, - struct cl_page_list *queue, struct cl_sync_io *anchor, +int cl_sync_io_wait(const struct lu_env *env, struct cl_sync_io *anchor, long timeout) { struct l_wait_info lwi = LWI_TIMEOUT_INTR(cfs_time_seconds(timeout), @@ -1542,11 +1369,9 @@ int cl_sync_io_wait(const struct lu_env *env, struct cl_io *io, atomic_read(&anchor->csi_sync_nr) == 0, &lwi); if (rc < 0) { - CERROR("SYNC IO failed with error: %d, try to cancel %d remaining pages\n", + CERROR("IO failed: %d, still wait for %d remaining entries\n", rc, atomic_read(&anchor->csi_sync_nr)); - (void)cl_io_cancel(env, io, queue); - lwi = (struct l_wait_info) { 0 }; (void)l_wait_event(anchor->csi_waitq, atomic_read(&anchor->csi_sync_nr) == 0, @@ -1555,14 +1380,12 @@ int cl_sync_io_wait(const struct lu_env *env, struct cl_io *io, rc = anchor->csi_sync_rc; } LASSERT(atomic_read(&anchor->csi_sync_nr) == 0); - cl_page_list_assume(env, io, queue); /* wait until cl_sync_io_note() has done wakeup */ while (unlikely(atomic_read(&anchor->csi_barrier) != 0)) { cpu_relax(); } - POISON(anchor, 0x5a, sizeof(*anchor)); return rc; } EXPORT_SYMBOL(cl_sync_io_wait); @@ -1570,7 +1393,8 @@ EXPORT_SYMBOL(cl_sync_io_wait); /** * Indicate that transfer of a single page completed. */ -void cl_sync_io_note(struct cl_sync_io *anchor, int ioret) +void cl_sync_io_note(const struct lu_env *env, struct cl_sync_io *anchor, + int ioret) { if (anchor->csi_sync_rc == 0 && ioret < 0) anchor->csi_sync_rc = ioret; @@ -1581,9 +1405,9 @@ void cl_sync_io_note(struct cl_sync_io *anchor, int ioret) */ LASSERT(atomic_read(&anchor->csi_sync_nr) > 0); if (atomic_dec_and_test(&anchor->csi_sync_nr)) { - wake_up_all(&anchor->csi_waitq); - /* it's safe to nuke or reuse anchor now */ - atomic_set(&anchor->csi_barrier, 0); + LASSERT(anchor->csi_end_io); + anchor->csi_end_io(env, anchor); + /* Can't access anchor any more */ } } EXPORT_SYMBOL(cl_sync_io_note); diff --git a/drivers/staging/lustre/lustre/obdclass/cl_lock.c b/drivers/staging/lustre/lustre/obdclass/cl_lock.c index aec644eb4db9..9d7b5939b0fd 100644 --- a/drivers/staging/lustre/lustre/obdclass/cl_lock.c +++ b/drivers/staging/lustre/lustre/obdclass/cl_lock.c @@ -15,11 +15,7 @@ * * You should have received a copy of the GNU General Public License * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. + * http://www.gnu.org/licenses/gpl-2.0.html * * GPL HEADER END */ @@ -36,6 +32,7 @@ * Client Extent Lock. * * Author: Nikita Danilov <nikita.danilov@sun.com> + * Author: Jinshan Xiong <jinshan.xiong@intel.com> */ #define DEBUG_SUBSYSTEM S_CLASS @@ -47,138 +44,18 @@ #include "../include/cl_object.h" #include "cl_internal.h" -/** Lock class of cl_lock::cll_guard */ -static struct lock_class_key cl_lock_guard_class; -static struct kmem_cache *cl_lock_kmem; - -static struct lu_kmem_descr cl_lock_caches[] = { - { - .ckd_cache = &cl_lock_kmem, - .ckd_name = "cl_lock_kmem", - .ckd_size = sizeof (struct cl_lock) - }, - { - .ckd_cache = NULL - } -}; - -#define CS_LOCK_INC(o, item) -#define CS_LOCK_DEC(o, item) -#define CS_LOCKSTATE_INC(o, state) -#define CS_LOCKSTATE_DEC(o, state) - -/** - * Basic lock invariant that is maintained at all times. Caller either has a - * reference to \a lock, or somehow assures that \a lock cannot be freed. - * - * \see cl_lock_invariant() - */ -static int cl_lock_invariant_trusted(const struct lu_env *env, - const struct cl_lock *lock) -{ - return ergo(lock->cll_state == CLS_FREEING, lock->cll_holds == 0) && - atomic_read(&lock->cll_ref) >= lock->cll_holds && - lock->cll_holds >= lock->cll_users && - lock->cll_holds >= 0 && - lock->cll_users >= 0 && - lock->cll_depth >= 0; -} - -/** - * Stronger lock invariant, checking that caller has a reference on a lock. - * - * \see cl_lock_invariant_trusted() - */ -static int cl_lock_invariant(const struct lu_env *env, - const struct cl_lock *lock) -{ - int result; - - result = atomic_read(&lock->cll_ref) > 0 && - cl_lock_invariant_trusted(env, lock); - if (!result && env) - CL_LOCK_DEBUG(D_ERROR, env, lock, "invariant broken\n"); - return result; -} - -/** - * Returns lock "nesting": 0 for a top-lock and 1 for a sub-lock. - */ -static enum clt_nesting_level cl_lock_nesting(const struct cl_lock *lock) -{ - return cl_object_header(lock->cll_descr.cld_obj)->coh_nesting; -} - -/** - * Returns a set of counters for this lock, depending on a lock nesting. - */ -static struct cl_thread_counters *cl_lock_counters(const struct lu_env *env, - const struct cl_lock *lock) -{ - struct cl_thread_info *info; - enum clt_nesting_level nesting; - - info = cl_env_info(env); - nesting = cl_lock_nesting(lock); - LASSERT(nesting < ARRAY_SIZE(info->clt_counters)); - return &info->clt_counters[nesting]; -} - static void cl_lock_trace0(int level, const struct lu_env *env, const char *prefix, const struct cl_lock *lock, const char *func, const int line) { struct cl_object_header *h = cl_object_header(lock->cll_descr.cld_obj); - CDEBUG(level, "%s: %p@(%d %p %d %d %d %d %d %lx)(%p/%d/%d) at %s():%d\n", - prefix, lock, atomic_read(&lock->cll_ref), - lock->cll_guarder, lock->cll_depth, - lock->cll_state, lock->cll_error, lock->cll_holds, - lock->cll_users, lock->cll_flags, - env, h->coh_nesting, cl_lock_nr_mutexed(env), - func, line); + CDEBUG(level, "%s: %p (%p/%d) at %s():%d\n", + prefix, lock, env, h->coh_nesting, func, line); } - -#define cl_lock_trace(level, env, prefix, lock) \ +#define cl_lock_trace(level, env, prefix, lock) \ cl_lock_trace0(level, env, prefix, lock, __func__, __LINE__) -#define RETIP ((unsigned long)__builtin_return_address(0)) - -#ifdef CONFIG_LOCKDEP -static struct lock_class_key cl_lock_key; - -static void cl_lock_lockdep_init(struct cl_lock *lock) -{ - lockdep_set_class_and_name(lock, &cl_lock_key, "EXT"); -} - -static void cl_lock_lockdep_acquire(const struct lu_env *env, - struct cl_lock *lock, __u32 enqflags) -{ - cl_lock_counters(env, lock)->ctc_nr_locks_acquired++; - lock_map_acquire(&lock->dep_map); -} - -static void cl_lock_lockdep_release(const struct lu_env *env, - struct cl_lock *lock) -{ - cl_lock_counters(env, lock)->ctc_nr_locks_acquired--; - lock_release(&lock->dep_map, 0, RETIP); -} - -#else /* !CONFIG_LOCKDEP */ - -static void cl_lock_lockdep_init(struct cl_lock *lock) -{} -static void cl_lock_lockdep_acquire(const struct lu_env *env, - struct cl_lock *lock, __u32 enqflags) -{} -static void cl_lock_lockdep_release(const struct lu_env *env, - struct cl_lock *lock) -{} - -#endif /* !CONFIG_LOCKDEP */ - /** * Adds lock slice to the compound lock. * @@ -199,62 +76,10 @@ void cl_lock_slice_add(struct cl_lock *lock, struct cl_lock_slice *slice, } EXPORT_SYMBOL(cl_lock_slice_add); -/** - * Returns true iff a lock with the mode \a has provides at least the same - * guarantees as a lock with the mode \a need. - */ -int cl_lock_mode_match(enum cl_lock_mode has, enum cl_lock_mode need) -{ - LINVRNT(need == CLM_READ || need == CLM_WRITE || - need == CLM_PHANTOM || need == CLM_GROUP); - LINVRNT(has == CLM_READ || has == CLM_WRITE || - has == CLM_PHANTOM || has == CLM_GROUP); - CLASSERT(CLM_PHANTOM < CLM_READ); - CLASSERT(CLM_READ < CLM_WRITE); - CLASSERT(CLM_WRITE < CLM_GROUP); - - if (has != CLM_GROUP) - return need <= has; - else - return need == has; -} -EXPORT_SYMBOL(cl_lock_mode_match); - -/** - * Returns true iff extent portions of lock descriptions match. - */ -int cl_lock_ext_match(const struct cl_lock_descr *has, - const struct cl_lock_descr *need) +void cl_lock_fini(const struct lu_env *env, struct cl_lock *lock) { - return - has->cld_start <= need->cld_start && - has->cld_end >= need->cld_end && - cl_lock_mode_match(has->cld_mode, need->cld_mode) && - (has->cld_mode != CLM_GROUP || has->cld_gid == need->cld_gid); -} -EXPORT_SYMBOL(cl_lock_ext_match); + cl_lock_trace(D_DLMTRACE, env, "destroy lock", lock); -/** - * Returns true iff a lock with the description \a has provides at least the - * same guarantees as a lock with the description \a need. - */ -int cl_lock_descr_match(const struct cl_lock_descr *has, - const struct cl_lock_descr *need) -{ - return - cl_object_same(has->cld_obj, need->cld_obj) && - cl_lock_ext_match(has, need); -} -EXPORT_SYMBOL(cl_lock_descr_match); - -static void cl_lock_free(const struct lu_env *env, struct cl_lock *lock) -{ - struct cl_object *obj = lock->cll_descr.cld_obj; - - LINVRNT(!cl_lock_is_mutexed(lock)); - - cl_lock_trace(D_DLMTRACE, env, "free lock", lock); - might_sleep(); while (!list_empty(&lock->cll_layers)) { struct cl_lock_slice *slice; @@ -263,350 +88,36 @@ static void cl_lock_free(const struct lu_env *env, struct cl_lock *lock) list_del_init(lock->cll_layers.next); slice->cls_ops->clo_fini(env, slice); } - CS_LOCK_DEC(obj, total); - CS_LOCKSTATE_DEC(obj, lock->cll_state); - lu_object_ref_del_at(&obj->co_lu, &lock->cll_obj_ref, "cl_lock", lock); - cl_object_put(env, obj); - lu_ref_fini(&lock->cll_reference); - lu_ref_fini(&lock->cll_holders); - mutex_destroy(&lock->cll_guard); - kmem_cache_free(cl_lock_kmem, lock); -} - -/** - * Releases a reference on a lock. - * - * When last reference is released, lock is returned to the cache, unless it - * is in cl_lock_state::CLS_FREEING state, in which case it is destroyed - * immediately. - * - * \see cl_object_put(), cl_page_put() - */ -void cl_lock_put(const struct lu_env *env, struct cl_lock *lock) -{ - struct cl_object *obj; - - LINVRNT(cl_lock_invariant(env, lock)); - obj = lock->cll_descr.cld_obj; - LINVRNT(obj); - - CDEBUG(D_TRACE, "releasing reference: %d %p %lu\n", - atomic_read(&lock->cll_ref), lock, RETIP); - - if (atomic_dec_and_test(&lock->cll_ref)) { - if (lock->cll_state == CLS_FREEING) { - LASSERT(list_empty(&lock->cll_linkage)); - cl_lock_free(env, lock); - } - CS_LOCK_DEC(obj, busy); - } -} -EXPORT_SYMBOL(cl_lock_put); - -/** - * Acquires an additional reference to a lock. - * - * This can be called only by caller already possessing a reference to \a - * lock. - * - * \see cl_object_get(), cl_page_get() - */ -void cl_lock_get(struct cl_lock *lock) -{ - LINVRNT(cl_lock_invariant(NULL, lock)); - CDEBUG(D_TRACE, "acquiring reference: %d %p %lu\n", - atomic_read(&lock->cll_ref), lock, RETIP); - atomic_inc(&lock->cll_ref); -} -EXPORT_SYMBOL(cl_lock_get); - -/** - * Acquires a reference to a lock. - * - * This is much like cl_lock_get(), except that this function can be used to - * acquire initial reference to the cached lock. Caller has to deal with all - * possible races. Use with care! - * - * \see cl_page_get_trust() - */ -void cl_lock_get_trust(struct cl_lock *lock) -{ - CDEBUG(D_TRACE, "acquiring trusted reference: %d %p %lu\n", - atomic_read(&lock->cll_ref), lock, RETIP); - if (atomic_inc_return(&lock->cll_ref) == 1) - CS_LOCK_INC(lock->cll_descr.cld_obj, busy); -} -EXPORT_SYMBOL(cl_lock_get_trust); - -/** - * Helper function destroying the lock that wasn't completely initialized. - * - * Other threads can acquire references to the top-lock through its - * sub-locks. Hence, it cannot be cl_lock_free()-ed immediately. - */ -static void cl_lock_finish(const struct lu_env *env, struct cl_lock *lock) -{ - cl_lock_mutex_get(env, lock); - cl_lock_cancel(env, lock); - cl_lock_delete(env, lock); - cl_lock_mutex_put(env, lock); - cl_lock_put(env, lock); -} - -static struct cl_lock *cl_lock_alloc(const struct lu_env *env, - struct cl_object *obj, - const struct cl_io *io, - const struct cl_lock_descr *descr) -{ - struct cl_lock *lock; - struct lu_object_header *head; - - lock = kmem_cache_zalloc(cl_lock_kmem, GFP_NOFS); - if (lock) { - atomic_set(&lock->cll_ref, 1); - lock->cll_descr = *descr; - lock->cll_state = CLS_NEW; - cl_object_get(obj); - lu_object_ref_add_at(&obj->co_lu, &lock->cll_obj_ref, "cl_lock", - lock); - INIT_LIST_HEAD(&lock->cll_layers); - INIT_LIST_HEAD(&lock->cll_linkage); - INIT_LIST_HEAD(&lock->cll_inclosure); - lu_ref_init(&lock->cll_reference); - lu_ref_init(&lock->cll_holders); - mutex_init(&lock->cll_guard); - lockdep_set_class(&lock->cll_guard, &cl_lock_guard_class); - init_waitqueue_head(&lock->cll_wq); - head = obj->co_lu.lo_header; - CS_LOCKSTATE_INC(obj, CLS_NEW); - CS_LOCK_INC(obj, total); - CS_LOCK_INC(obj, create); - cl_lock_lockdep_init(lock); - list_for_each_entry(obj, &head->loh_layers, co_lu.lo_linkage) { - int err; - - err = obj->co_ops->coo_lock_init(env, obj, lock, io); - if (err != 0) { - cl_lock_finish(env, lock); - lock = ERR_PTR(err); - break; - } - } - } else - lock = ERR_PTR(-ENOMEM); - return lock; -} - -/** - * Transfer the lock into INTRANSIT state and return the original state. - * - * \pre state: CLS_CACHED, CLS_HELD or CLS_ENQUEUED - * \post state: CLS_INTRANSIT - * \see CLS_INTRANSIT - */ -static enum cl_lock_state cl_lock_intransit(const struct lu_env *env, - struct cl_lock *lock) -{ - enum cl_lock_state state = lock->cll_state; - - LASSERT(cl_lock_is_mutexed(lock)); - LASSERT(state != CLS_INTRANSIT); - LASSERTF(state >= CLS_ENQUEUED && state <= CLS_CACHED, - "Malformed lock state %d.\n", state); - - cl_lock_state_set(env, lock, CLS_INTRANSIT); - lock->cll_intransit_owner = current; - cl_lock_hold_add(env, lock, "intransit", current); - return state; -} - -/** - * Exit the intransit state and restore the lock state to the original state - */ -static void cl_lock_extransit(const struct lu_env *env, struct cl_lock *lock, - enum cl_lock_state state) -{ - LASSERT(cl_lock_is_mutexed(lock)); - LASSERT(lock->cll_state == CLS_INTRANSIT); - LASSERT(state != CLS_INTRANSIT); - LASSERT(lock->cll_intransit_owner == current); - - lock->cll_intransit_owner = NULL; - cl_lock_state_set(env, lock, state); - cl_lock_unhold(env, lock, "intransit", current); -} - -/** - * Checking whether the lock is intransit state - */ -int cl_lock_is_intransit(struct cl_lock *lock) -{ - LASSERT(cl_lock_is_mutexed(lock)); - return lock->cll_state == CLS_INTRANSIT && - lock->cll_intransit_owner != current; -} -EXPORT_SYMBOL(cl_lock_is_intransit); -/** - * Returns true iff lock is "suitable" for given io. E.g., locks acquired by - * truncate and O_APPEND cannot be reused for read/non-append-write, as they - * cover multiple stripes and can trigger cascading timeouts. - */ -static int cl_lock_fits_into(const struct lu_env *env, - const struct cl_lock *lock, - const struct cl_lock_descr *need, - const struct cl_io *io) -{ - const struct cl_lock_slice *slice; - - LINVRNT(cl_lock_invariant_trusted(env, lock)); - list_for_each_entry(slice, &lock->cll_layers, cls_linkage) { - if (slice->cls_ops->clo_fits_into && - !slice->cls_ops->clo_fits_into(env, slice, need, io)) - return 0; - } - return 1; + POISON(lock, 0x5a, sizeof(*lock)); } +EXPORT_SYMBOL(cl_lock_fini); -static struct cl_lock *cl_lock_lookup(const struct lu_env *env, - struct cl_object *obj, - const struct cl_io *io, - const struct cl_lock_descr *need) +int cl_lock_init(const struct lu_env *env, struct cl_lock *lock, + const struct cl_io *io) { - struct cl_lock *lock; - struct cl_object_header *head; - - head = cl_object_header(obj); - assert_spin_locked(&head->coh_lock_guard); - CS_LOCK_INC(obj, lookup); - list_for_each_entry(lock, &head->coh_locks, cll_linkage) { - int matched; - - matched = cl_lock_ext_match(&lock->cll_descr, need) && - lock->cll_state < CLS_FREEING && - lock->cll_error == 0 && - !(lock->cll_flags & CLF_CANCELLED) && - cl_lock_fits_into(env, lock, need, io); - CDEBUG(D_DLMTRACE, "has: "DDESCR"(%d) need: "DDESCR": %d\n", - PDESCR(&lock->cll_descr), lock->cll_state, PDESCR(need), - matched); - if (matched) { - cl_lock_get_trust(lock); - CS_LOCK_INC(obj, hit); - return lock; - } - } - return NULL; -} - -/** - * Returns a lock matching description \a need. - * - * This is the main entry point into the cl_lock caching interface. First, a - * cache (implemented as a per-object linked list) is consulted. If lock is - * found there, it is returned immediately. Otherwise new lock is allocated - * and returned. In any case, additional reference to lock is acquired. - * - * \see cl_object_find(), cl_page_find() - */ -static struct cl_lock *cl_lock_find(const struct lu_env *env, - const struct cl_io *io, - const struct cl_lock_descr *need) -{ - struct cl_object_header *head; - struct cl_object *obj; - struct cl_lock *lock; - - obj = need->cld_obj; - head = cl_object_header(obj); - - spin_lock(&head->coh_lock_guard); - lock = cl_lock_lookup(env, obj, io, need); - spin_unlock(&head->coh_lock_guard); - - if (!lock) { - lock = cl_lock_alloc(env, obj, io, need); - if (!IS_ERR(lock)) { - struct cl_lock *ghost; - - spin_lock(&head->coh_lock_guard); - ghost = cl_lock_lookup(env, obj, io, need); - if (!ghost) { - cl_lock_get_trust(lock); - list_add_tail(&lock->cll_linkage, - &head->coh_locks); - spin_unlock(&head->coh_lock_guard); - CS_LOCK_INC(obj, busy); - } else { - spin_unlock(&head->coh_lock_guard); - /* - * Other threads can acquire references to the - * top-lock through its sub-locks. Hence, it - * cannot be cl_lock_free()-ed immediately. - */ - cl_lock_finish(env, lock); - lock = ghost; - } - } - } - return lock; -} - -/** - * Returns existing lock matching given description. This is similar to - * cl_lock_find() except that no new lock is created, and returned lock is - * guaranteed to be in enum cl_lock_state::CLS_HELD state. - */ -struct cl_lock *cl_lock_peek(const struct lu_env *env, const struct cl_io *io, - const struct cl_lock_descr *need, - const char *scope, const void *source) -{ - struct cl_object_header *head; - struct cl_object *obj; - struct cl_lock *lock; - - obj = need->cld_obj; - head = cl_object_header(obj); + struct cl_object *obj = lock->cll_descr.cld_obj; + struct cl_object *scan; + int result = 0; - do { - spin_lock(&head->coh_lock_guard); - lock = cl_lock_lookup(env, obj, io, need); - spin_unlock(&head->coh_lock_guard); - if (!lock) - return NULL; + /* Make sure cl_lock::cll_descr is initialized. */ + LASSERT(obj); - cl_lock_mutex_get(env, lock); - if (lock->cll_state == CLS_INTRANSIT) - /* Don't care return value. */ - cl_lock_state_wait(env, lock); - if (lock->cll_state == CLS_FREEING) { - cl_lock_mutex_put(env, lock); - cl_lock_put(env, lock); - lock = NULL; + INIT_LIST_HEAD(&lock->cll_layers); + list_for_each_entry(scan, &obj->co_lu.lo_header->loh_layers, + co_lu.lo_linkage) { + result = scan->co_ops->coo_lock_init(env, scan, lock, io); + if (result != 0) { + cl_lock_fini(env, lock); + break; } - } while (!lock); - - cl_lock_hold_add(env, lock, scope, source); - cl_lock_user_add(env, lock); - if (lock->cll_state == CLS_CACHED) - cl_use_try(env, lock, 1); - if (lock->cll_state == CLS_HELD) { - cl_lock_mutex_put(env, lock); - cl_lock_lockdep_acquire(env, lock, 0); - cl_lock_put(env, lock); - } else { - cl_unuse_try(env, lock); - cl_lock_unhold(env, lock, scope, source); - cl_lock_mutex_put(env, lock); - cl_lock_put(env, lock); - lock = NULL; } - return lock; + return result; } -EXPORT_SYMBOL(cl_lock_peek); +EXPORT_SYMBOL(cl_lock_init); /** - * Returns a slice within a lock, corresponding to the given layer in the + * Returns a slice with a lock, corresponding to the given layer in the * device stack. * * \see cl_page_at() @@ -616,8 +127,6 @@ const struct cl_lock_slice *cl_lock_at(const struct cl_lock *lock, { const struct cl_lock_slice *slice; - LINVRNT(cl_lock_invariant_trusted(NULL, lock)); - list_for_each_entry(slice, &lock->cll_layers, cls_linkage) { if (slice->cls_obj->co_lu.lo_dev->ld_type == dtype) return slice; @@ -626,1537 +135,96 @@ const struct cl_lock_slice *cl_lock_at(const struct cl_lock *lock, } EXPORT_SYMBOL(cl_lock_at); -static void cl_lock_mutex_tail(const struct lu_env *env, struct cl_lock *lock) -{ - struct cl_thread_counters *counters; - - counters = cl_lock_counters(env, lock); - lock->cll_depth++; - counters->ctc_nr_locks_locked++; - lu_ref_add(&counters->ctc_locks_locked, "cll_guard", lock); - cl_lock_trace(D_TRACE, env, "got mutex", lock); -} - -/** - * Locks cl_lock object. - * - * This is used to manipulate cl_lock fields, and to serialize state - * transitions in the lock state machine. - * - * \post cl_lock_is_mutexed(lock) - * - * \see cl_lock_mutex_put() - */ -void cl_lock_mutex_get(const struct lu_env *env, struct cl_lock *lock) -{ - LINVRNT(cl_lock_invariant(env, lock)); - - if (lock->cll_guarder == current) { - LINVRNT(cl_lock_is_mutexed(lock)); - LINVRNT(lock->cll_depth > 0); - } else { - struct cl_object_header *hdr; - struct cl_thread_info *info; - int i; - - LINVRNT(lock->cll_guarder != current); - hdr = cl_object_header(lock->cll_descr.cld_obj); - /* - * Check that mutices are taken in the bottom-to-top order. - */ - info = cl_env_info(env); - for (i = 0; i < hdr->coh_nesting; ++i) - LASSERT(info->clt_counters[i].ctc_nr_locks_locked == 0); - mutex_lock_nested(&lock->cll_guard, hdr->coh_nesting); - lock->cll_guarder = current; - LINVRNT(lock->cll_depth == 0); - } - cl_lock_mutex_tail(env, lock); -} -EXPORT_SYMBOL(cl_lock_mutex_get); - -/** - * Try-locks cl_lock object. - * - * \retval 0 \a lock was successfully locked - * - * \retval -EBUSY \a lock cannot be locked right now - * - * \post ergo(result == 0, cl_lock_is_mutexed(lock)) - * - * \see cl_lock_mutex_get() - */ -static int cl_lock_mutex_try(const struct lu_env *env, struct cl_lock *lock) -{ - int result; - - LINVRNT(cl_lock_invariant_trusted(env, lock)); - - result = 0; - if (lock->cll_guarder == current) { - LINVRNT(lock->cll_depth > 0); - cl_lock_mutex_tail(env, lock); - } else if (mutex_trylock(&lock->cll_guard)) { - LINVRNT(lock->cll_depth == 0); - lock->cll_guarder = current; - cl_lock_mutex_tail(env, lock); - } else - result = -EBUSY; - return result; -} - -/** - {* Unlocks cl_lock object. - * - * \pre cl_lock_is_mutexed(lock) - * - * \see cl_lock_mutex_get() - */ -void cl_lock_mutex_put(const struct lu_env *env, struct cl_lock *lock) -{ - struct cl_thread_counters *counters; - - LINVRNT(cl_lock_invariant(env, lock)); - LINVRNT(cl_lock_is_mutexed(lock)); - LINVRNT(lock->cll_guarder == current); - LINVRNT(lock->cll_depth > 0); - - counters = cl_lock_counters(env, lock); - LINVRNT(counters->ctc_nr_locks_locked > 0); - - cl_lock_trace(D_TRACE, env, "put mutex", lock); - lu_ref_del(&counters->ctc_locks_locked, "cll_guard", lock); - counters->ctc_nr_locks_locked--; - if (--lock->cll_depth == 0) { - lock->cll_guarder = NULL; - mutex_unlock(&lock->cll_guard); - } -} -EXPORT_SYMBOL(cl_lock_mutex_put); - -/** - * Returns true iff lock's mutex is owned by the current thread. - */ -int cl_lock_is_mutexed(struct cl_lock *lock) -{ - return lock->cll_guarder == current; -} -EXPORT_SYMBOL(cl_lock_is_mutexed); - -/** - * Returns number of cl_lock mutices held by the current thread (environment). - */ -int cl_lock_nr_mutexed(const struct lu_env *env) -{ - struct cl_thread_info *info; - int i; - int locked; - - /* - * NOTE: if summation across all nesting levels (currently 2) proves - * too expensive, a summary counter can be added to - * struct cl_thread_info. - */ - info = cl_env_info(env); - for (i = 0, locked = 0; i < ARRAY_SIZE(info->clt_counters); ++i) - locked += info->clt_counters[i].ctc_nr_locks_locked; - return locked; -} -EXPORT_SYMBOL(cl_lock_nr_mutexed); - -static void cl_lock_cancel0(const struct lu_env *env, struct cl_lock *lock) -{ - LINVRNT(cl_lock_is_mutexed(lock)); - LINVRNT(cl_lock_invariant(env, lock)); - if (!(lock->cll_flags & CLF_CANCELLED)) { - const struct cl_lock_slice *slice; - - lock->cll_flags |= CLF_CANCELLED; - list_for_each_entry_reverse(slice, &lock->cll_layers, - cls_linkage) { - if (slice->cls_ops->clo_cancel) - slice->cls_ops->clo_cancel(env, slice); - } - } -} - -static void cl_lock_delete0(const struct lu_env *env, struct cl_lock *lock) -{ - struct cl_object_header *head; - const struct cl_lock_slice *slice; - - LINVRNT(cl_lock_is_mutexed(lock)); - LINVRNT(cl_lock_invariant(env, lock)); - - if (lock->cll_state < CLS_FREEING) { - bool in_cache; - - LASSERT(lock->cll_state != CLS_INTRANSIT); - cl_lock_state_set(env, lock, CLS_FREEING); - - head = cl_object_header(lock->cll_descr.cld_obj); - - spin_lock(&head->coh_lock_guard); - in_cache = !list_empty(&lock->cll_linkage); - if (in_cache) - list_del_init(&lock->cll_linkage); - spin_unlock(&head->coh_lock_guard); - - if (in_cache) /* coh_locks cache holds a refcount. */ - cl_lock_put(env, lock); - - /* - * From now on, no new references to this lock can be acquired - * by cl_lock_lookup(). - */ - list_for_each_entry_reverse(slice, &lock->cll_layers, - cls_linkage) { - if (slice->cls_ops->clo_delete) - slice->cls_ops->clo_delete(env, slice); - } - /* - * From now on, no new references to this lock can be acquired - * by layer-specific means (like a pointer from struct - * ldlm_lock in osc, or a pointer from top-lock to sub-lock in - * lov). - * - * Lock will be finally freed in cl_lock_put() when last of - * existing references goes away. - */ - } -} - -/** - * Mod(ifie)s cl_lock::cll_holds counter for a given lock. Also, for a - * top-lock (nesting == 0) accounts for this modification in the per-thread - * debugging counters. Sub-lock holds can be released by a thread different - * from one that acquired it. - */ -static void cl_lock_hold_mod(const struct lu_env *env, struct cl_lock *lock, - int delta) -{ - struct cl_thread_counters *counters; - enum clt_nesting_level nesting; - - lock->cll_holds += delta; - nesting = cl_lock_nesting(lock); - if (nesting == CNL_TOP) { - counters = &cl_env_info(env)->clt_counters[CNL_TOP]; - counters->ctc_nr_held += delta; - LASSERT(counters->ctc_nr_held >= 0); - } -} - -/** - * Mod(ifie)s cl_lock::cll_users counter for a given lock. See - * cl_lock_hold_mod() for the explanation of the debugging code. - */ -static void cl_lock_used_mod(const struct lu_env *env, struct cl_lock *lock, - int delta) -{ - struct cl_thread_counters *counters; - enum clt_nesting_level nesting; - - lock->cll_users += delta; - nesting = cl_lock_nesting(lock); - if (nesting == CNL_TOP) { - counters = &cl_env_info(env)->clt_counters[CNL_TOP]; - counters->ctc_nr_used += delta; - LASSERT(counters->ctc_nr_used >= 0); - } -} - -void cl_lock_hold_release(const struct lu_env *env, struct cl_lock *lock, - const char *scope, const void *source) -{ - LINVRNT(cl_lock_is_mutexed(lock)); - LINVRNT(cl_lock_invariant(env, lock)); - LASSERT(lock->cll_holds > 0); - - cl_lock_trace(D_DLMTRACE, env, "hold release lock", lock); - lu_ref_del(&lock->cll_holders, scope, source); - cl_lock_hold_mod(env, lock, -1); - if (lock->cll_holds == 0) { - CL_LOCK_ASSERT(lock->cll_state != CLS_HELD, env, lock); - if (lock->cll_descr.cld_mode == CLM_PHANTOM || - lock->cll_descr.cld_mode == CLM_GROUP || - lock->cll_state != CLS_CACHED) - /* - * If lock is still phantom or grouplock when user is - * done with it---destroy the lock. - */ - lock->cll_flags |= CLF_CANCELPEND|CLF_DOOMED; - if (lock->cll_flags & CLF_CANCELPEND) { - lock->cll_flags &= ~CLF_CANCELPEND; - cl_lock_cancel0(env, lock); - } - if (lock->cll_flags & CLF_DOOMED) { - /* no longer doomed: it's dead... Jim. */ - lock->cll_flags &= ~CLF_DOOMED; - cl_lock_delete0(env, lock); - } - } -} -EXPORT_SYMBOL(cl_lock_hold_release); - -/** - * Waits until lock state is changed. - * - * This function is called with cl_lock mutex locked, atomically releases - * mutex and goes to sleep, waiting for a lock state change (signaled by - * cl_lock_signal()), and re-acquires the mutex before return. - * - * This function is used to wait until lock state machine makes some progress - * and to emulate synchronous operations on top of asynchronous lock - * interface. - * - * \retval -EINTR wait was interrupted - * - * \retval 0 wait wasn't interrupted - * - * \pre cl_lock_is_mutexed(lock) - * - * \see cl_lock_signal() - */ -int cl_lock_state_wait(const struct lu_env *env, struct cl_lock *lock) -{ - wait_queue_t waiter; - sigset_t blocked; - int result; - - LINVRNT(cl_lock_is_mutexed(lock)); - LINVRNT(cl_lock_invariant(env, lock)); - LASSERT(lock->cll_depth == 1); - LASSERT(lock->cll_state != CLS_FREEING); /* too late to wait */ - - cl_lock_trace(D_DLMTRACE, env, "state wait lock", lock); - result = lock->cll_error; - if (result == 0) { - /* To avoid being interrupted by the 'non-fatal' signals - * (SIGCHLD, for instance), we'd block them temporarily. - * LU-305 - */ - blocked = cfs_block_sigsinv(LUSTRE_FATAL_SIGS); - - init_waitqueue_entry(&waiter, current); - add_wait_queue(&lock->cll_wq, &waiter); - set_current_state(TASK_INTERRUPTIBLE); - cl_lock_mutex_put(env, lock); - - LASSERT(cl_lock_nr_mutexed(env) == 0); - - /* Returning ERESTARTSYS instead of EINTR so syscalls - * can be restarted if signals are pending here - */ - result = -ERESTARTSYS; - if (likely(!OBD_FAIL_CHECK(OBD_FAIL_LOCK_STATE_WAIT_INTR))) { - schedule(); - if (!cfs_signal_pending()) - result = 0; - } - - cl_lock_mutex_get(env, lock); - set_current_state(TASK_RUNNING); - remove_wait_queue(&lock->cll_wq, &waiter); - - /* Restore old blocked signals */ - cfs_restore_sigs(blocked); - } - return result; -} -EXPORT_SYMBOL(cl_lock_state_wait); - -static void cl_lock_state_signal(const struct lu_env *env, struct cl_lock *lock, - enum cl_lock_state state) -{ - const struct cl_lock_slice *slice; - - LINVRNT(cl_lock_is_mutexed(lock)); - LINVRNT(cl_lock_invariant(env, lock)); - - list_for_each_entry(slice, &lock->cll_layers, cls_linkage) - if (slice->cls_ops->clo_state) - slice->cls_ops->clo_state(env, slice, state); - wake_up_all(&lock->cll_wq); -} - -/** - * Notifies waiters that lock state changed. - * - * Wakes up all waiters sleeping in cl_lock_state_wait(), also notifies all - * layers about state change by calling cl_lock_operations::clo_state() - * top-to-bottom. - */ -void cl_lock_signal(const struct lu_env *env, struct cl_lock *lock) -{ - cl_lock_trace(D_DLMTRACE, env, "state signal lock", lock); - cl_lock_state_signal(env, lock, lock->cll_state); -} -EXPORT_SYMBOL(cl_lock_signal); - -/** - * Changes lock state. - * - * This function is invoked to notify layers that lock state changed, possible - * as a result of an asynchronous event such as call-back reception. - * - * \post lock->cll_state == state - * - * \see cl_lock_operations::clo_state() - */ -void cl_lock_state_set(const struct lu_env *env, struct cl_lock *lock, - enum cl_lock_state state) -{ - LASSERT(lock->cll_state <= state || - (lock->cll_state == CLS_CACHED && - (state == CLS_HELD || /* lock found in cache */ - state == CLS_NEW || /* sub-lock canceled */ - state == CLS_INTRANSIT)) || - /* lock is in transit state */ - lock->cll_state == CLS_INTRANSIT); - - if (lock->cll_state != state) { - CS_LOCKSTATE_DEC(lock->cll_descr.cld_obj, lock->cll_state); - CS_LOCKSTATE_INC(lock->cll_descr.cld_obj, state); - - cl_lock_state_signal(env, lock, state); - lock->cll_state = state; - } -} -EXPORT_SYMBOL(cl_lock_state_set); - -static int cl_unuse_try_internal(const struct lu_env *env, struct cl_lock *lock) -{ - const struct cl_lock_slice *slice; - int result; - - do { - result = 0; - - LINVRNT(cl_lock_is_mutexed(lock)); - LINVRNT(cl_lock_invariant(env, lock)); - LASSERT(lock->cll_state == CLS_INTRANSIT); - - result = -ENOSYS; - list_for_each_entry_reverse(slice, &lock->cll_layers, - cls_linkage) { - if (slice->cls_ops->clo_unuse) { - result = slice->cls_ops->clo_unuse(env, slice); - if (result != 0) - break; - } - } - LASSERT(result != -ENOSYS); - } while (result == CLO_REPEAT); - - return result; -} - -/** - * Yanks lock from the cache (cl_lock_state::CLS_CACHED state) by calling - * cl_lock_operations::clo_use() top-to-bottom to notify layers. - * @atomic = 1, it must unuse the lock to recovery the lock to keep the - * use process atomic - */ -int cl_use_try(const struct lu_env *env, struct cl_lock *lock, int atomic) +void cl_lock_cancel(const struct lu_env *env, struct cl_lock *lock) { const struct cl_lock_slice *slice; - int result; - enum cl_lock_state state; - - cl_lock_trace(D_DLMTRACE, env, "use lock", lock); - - LASSERT(lock->cll_state == CLS_CACHED); - if (lock->cll_error) - return lock->cll_error; - - result = -ENOSYS; - state = cl_lock_intransit(env, lock); - list_for_each_entry(slice, &lock->cll_layers, cls_linkage) { - if (slice->cls_ops->clo_use) { - result = slice->cls_ops->clo_use(env, slice); - if (result != 0) - break; - } - } - LASSERT(result != -ENOSYS); - - LASSERTF(lock->cll_state == CLS_INTRANSIT, "Wrong state %d.\n", - lock->cll_state); - - if (result == 0) { - state = CLS_HELD; - } else { - if (result == -ESTALE) { - /* - * ESTALE means sublock being cancelled - * at this time, and set lock state to - * be NEW here and ask the caller to repeat. - */ - state = CLS_NEW; - result = CLO_REPEAT; - } - - /* @atomic means back-off-on-failure. */ - if (atomic) { - int rc; - - rc = cl_unuse_try_internal(env, lock); - /* Vet the results. */ - if (rc < 0 && result > 0) - result = rc; - } + cl_lock_trace(D_DLMTRACE, env, "cancel lock", lock); + list_for_each_entry_reverse(slice, &lock->cll_layers, cls_linkage) { + if (slice->cls_ops->clo_cancel) + slice->cls_ops->clo_cancel(env, slice); } - cl_lock_extransit(env, lock, state); - return result; } -EXPORT_SYMBOL(cl_use_try); +EXPORT_SYMBOL(cl_lock_cancel); /** - * Helper for cl_enqueue_try() that calls ->clo_enqueue() across all layers - * top-to-bottom. + * Enqueue a lock. + * \param anchor: if we need to wait for resources before getting the lock, + * use @anchor for the purpose. + * \retval 0 enqueue successfully + * \retval <0 error code */ -static int cl_enqueue_kick(const struct lu_env *env, - struct cl_lock *lock, - struct cl_io *io, __u32 flags) +int cl_lock_enqueue(const struct lu_env *env, struct cl_io *io, + struct cl_lock *lock, struct cl_sync_io *anchor) { - int result; const struct cl_lock_slice *slice; + int rc = -ENOSYS; - result = -ENOSYS; list_for_each_entry(slice, &lock->cll_layers, cls_linkage) { - if (slice->cls_ops->clo_enqueue) { - result = slice->cls_ops->clo_enqueue(env, - slice, io, flags); - if (result != 0) - break; - } - } - LASSERT(result != -ENOSYS); - return result; -} - -/** - * Tries to enqueue a lock. - * - * This function is called repeatedly by cl_enqueue() until either lock is - * enqueued, or error occurs. This function does not block waiting for - * networking communication to complete. - * - * \post ergo(result == 0, lock->cll_state == CLS_ENQUEUED || - * lock->cll_state == CLS_HELD) - * - * \see cl_enqueue() cl_lock_operations::clo_enqueue() - * \see cl_lock_state::CLS_ENQUEUED - */ -int cl_enqueue_try(const struct lu_env *env, struct cl_lock *lock, - struct cl_io *io, __u32 flags) -{ - int result; - - cl_lock_trace(D_DLMTRACE, env, "enqueue lock", lock); - do { - LINVRNT(cl_lock_is_mutexed(lock)); - - result = lock->cll_error; - if (result != 0) - break; - - switch (lock->cll_state) { - case CLS_NEW: - cl_lock_state_set(env, lock, CLS_QUEUING); - /* fall-through */ - case CLS_QUEUING: - /* kick layers. */ - result = cl_enqueue_kick(env, lock, io, flags); - /* For AGL case, the cl_lock::cll_state may - * become CLS_HELD already. - */ - if (result == 0 && lock->cll_state == CLS_QUEUING) - cl_lock_state_set(env, lock, CLS_ENQUEUED); - break; - case CLS_INTRANSIT: - LASSERT(cl_lock_is_intransit(lock)); - result = CLO_WAIT; - break; - case CLS_CACHED: - /* yank lock from the cache. */ - result = cl_use_try(env, lock, 0); - break; - case CLS_ENQUEUED: - case CLS_HELD: - result = 0; - break; - default: - case CLS_FREEING: - /* - * impossible, only held locks with increased - * ->cll_holds can be enqueued, and they cannot be - * freed. - */ - LBUG(); - } - } while (result == CLO_REPEAT); - return result; -} -EXPORT_SYMBOL(cl_enqueue_try); - -/** - * Cancel the conflicting lock found during previous enqueue. - * - * \retval 0 conflicting lock has been canceled. - * \retval -ve error code. - */ -int cl_lock_enqueue_wait(const struct lu_env *env, - struct cl_lock *lock, - int keep_mutex) -{ - struct cl_lock *conflict; - int rc = 0; - - LASSERT(cl_lock_is_mutexed(lock)); - LASSERT(lock->cll_state == CLS_QUEUING); - LASSERT(lock->cll_conflict); + if (!slice->cls_ops->clo_enqueue) + continue; - conflict = lock->cll_conflict; - lock->cll_conflict = NULL; - - cl_lock_mutex_put(env, lock); - LASSERT(cl_lock_nr_mutexed(env) == 0); - - cl_lock_mutex_get(env, conflict); - cl_lock_trace(D_DLMTRACE, env, "enqueue wait", conflict); - cl_lock_cancel(env, conflict); - cl_lock_delete(env, conflict); - - while (conflict->cll_state != CLS_FREEING) { - rc = cl_lock_state_wait(env, conflict); + rc = slice->cls_ops->clo_enqueue(env, slice, io, anchor); if (rc != 0) break; - } - cl_lock_mutex_put(env, conflict); - lu_ref_del(&conflict->cll_reference, "cancel-wait", lock); - cl_lock_put(env, conflict); - - if (keep_mutex) - cl_lock_mutex_get(env, lock); - - LASSERT(rc <= 0); - return rc; -} -EXPORT_SYMBOL(cl_lock_enqueue_wait); - -static int cl_enqueue_locked(const struct lu_env *env, struct cl_lock *lock, - struct cl_io *io, __u32 enqflags) -{ - int result; - - LINVRNT(cl_lock_is_mutexed(lock)); - LINVRNT(cl_lock_invariant(env, lock)); - LASSERT(lock->cll_holds > 0); - - cl_lock_user_add(env, lock); - do { - result = cl_enqueue_try(env, lock, io, enqflags); - if (result == CLO_WAIT) { - if (lock->cll_conflict) - result = cl_lock_enqueue_wait(env, lock, 1); - else - result = cl_lock_state_wait(env, lock); - if (result == 0) - continue; - } - break; - } while (1); - if (result != 0) - cl_unuse_try(env, lock); - LASSERT(ergo(result == 0 && !(enqflags & CEF_AGL), - lock->cll_state == CLS_ENQUEUED || - lock->cll_state == CLS_HELD)); - return result; -} - -/** - * Tries to unlock a lock. - * - * This function is called to release underlying resource: - * 1. for top lock, the resource is sublocks it held; - * 2. for sublock, the resource is the reference to dlmlock. - * - * cl_unuse_try is a one-shot operation, so it must NOT return CLO_WAIT. - * - * \see cl_unuse() cl_lock_operations::clo_unuse() - * \see cl_lock_state::CLS_CACHED - */ -int cl_unuse_try(const struct lu_env *env, struct cl_lock *lock) -{ - int result; - enum cl_lock_state state = CLS_NEW; - - cl_lock_trace(D_DLMTRACE, env, "unuse lock", lock); - - if (lock->cll_users > 1) { - cl_lock_user_del(env, lock); - return 0; - } - - /* Only if the lock is in CLS_HELD or CLS_ENQUEUED state, it can hold - * underlying resources. - */ - if (!(lock->cll_state == CLS_HELD || lock->cll_state == CLS_ENQUEUED)) { - cl_lock_user_del(env, lock); - return 0; - } - - /* - * New lock users (->cll_users) are not protecting unlocking - * from proceeding. From this point, lock eventually reaches - * CLS_CACHED, is reinitialized to CLS_NEW or fails into - * CLS_FREEING. - */ - state = cl_lock_intransit(env, lock); - - result = cl_unuse_try_internal(env, lock); - LASSERT(lock->cll_state == CLS_INTRANSIT); - LASSERT(result != CLO_WAIT); - cl_lock_user_del(env, lock); - if (result == 0 || result == -ESTALE) { - /* - * Return lock back to the cache. This is the only - * place where lock is moved into CLS_CACHED state. - * - * If one of ->clo_unuse() methods returned -ESTALE, lock - * cannot be placed into cache and has to be - * re-initialized. This happens e.g., when a sub-lock was - * canceled while unlocking was in progress. - */ - if (state == CLS_HELD && result == 0) - state = CLS_CACHED; - else - state = CLS_NEW; - cl_lock_extransit(env, lock, state); - - /* - * Hide -ESTALE error. - * If the lock is a glimpse lock, and it has multiple - * stripes. Assuming that one of its sublock returned -ENAVAIL, - * and other sublocks are matched write locks. In this case, - * we can't set this lock to error because otherwise some of - * its sublocks may not be canceled. This causes some dirty - * pages won't be written to OSTs. -jay - */ - result = 0; - } else { - CERROR("result = %d, this is unlikely!\n", result); - state = CLS_NEW; - cl_lock_extransit(env, lock, state); - } - return result ?: lock->cll_error; -} -EXPORT_SYMBOL(cl_unuse_try); - -static void cl_unuse_locked(const struct lu_env *env, struct cl_lock *lock) -{ - int result; - - result = cl_unuse_try(env, lock); - if (result) - CL_LOCK_DEBUG(D_ERROR, env, lock, "unuse return %d\n", result); -} - -/** - * Unlocks a lock. - */ -void cl_unuse(const struct lu_env *env, struct cl_lock *lock) -{ - cl_lock_mutex_get(env, lock); - cl_unuse_locked(env, lock); - cl_lock_mutex_put(env, lock); - cl_lock_lockdep_release(env, lock); -} -EXPORT_SYMBOL(cl_unuse); - -/** - * Tries to wait for a lock. - * - * This function is called repeatedly by cl_wait() until either lock is - * granted, or error occurs. This function does not block waiting for network - * communication to complete. - * - * \see cl_wait() cl_lock_operations::clo_wait() - * \see cl_lock_state::CLS_HELD - */ -int cl_wait_try(const struct lu_env *env, struct cl_lock *lock) -{ - const struct cl_lock_slice *slice; - int result; - - cl_lock_trace(D_DLMTRACE, env, "wait lock try", lock); - do { - LINVRNT(cl_lock_is_mutexed(lock)); - LINVRNT(cl_lock_invariant(env, lock)); - LASSERTF(lock->cll_state == CLS_QUEUING || - lock->cll_state == CLS_ENQUEUED || - lock->cll_state == CLS_HELD || - lock->cll_state == CLS_INTRANSIT, - "lock state: %d\n", lock->cll_state); - LASSERT(lock->cll_users > 0); - LASSERT(lock->cll_holds > 0); - - result = lock->cll_error; - if (result != 0) - break; - - if (cl_lock_is_intransit(lock)) { - result = CLO_WAIT; - break; - } - - if (lock->cll_state == CLS_HELD) - /* nothing to do */ - break; - - result = -ENOSYS; - list_for_each_entry(slice, &lock->cll_layers, cls_linkage) { - if (slice->cls_ops->clo_wait) { - result = slice->cls_ops->clo_wait(env, slice); - if (result != 0) - break; - } - } - LASSERT(result != -ENOSYS); - if (result == 0) { - LASSERT(lock->cll_state != CLS_INTRANSIT); - cl_lock_state_set(env, lock, CLS_HELD); - } - } while (result == CLO_REPEAT); - return result; -} -EXPORT_SYMBOL(cl_wait_try); - -/** - * Waits until enqueued lock is granted. - * - * \pre current thread or io owns a hold on the lock - * \pre ergo(result == 0, lock->cll_state == CLS_ENQUEUED || - * lock->cll_state == CLS_HELD) - * - * \post ergo(result == 0, lock->cll_state == CLS_HELD) - */ -int cl_wait(const struct lu_env *env, struct cl_lock *lock) -{ - int result; - - cl_lock_mutex_get(env, lock); - - LINVRNT(cl_lock_invariant(env, lock)); - LASSERTF(lock->cll_state == CLS_ENQUEUED || lock->cll_state == CLS_HELD, - "Wrong state %d\n", lock->cll_state); - LASSERT(lock->cll_holds > 0); - - do { - result = cl_wait_try(env, lock); - if (result == CLO_WAIT) { - result = cl_lock_state_wait(env, lock); - if (result == 0) - continue; - } - break; - } while (1); - if (result < 0) { - cl_unuse_try(env, lock); - cl_lock_lockdep_release(env, lock); - } - cl_lock_trace(D_DLMTRACE, env, "wait lock", lock); - cl_lock_mutex_put(env, lock); - LASSERT(ergo(result == 0, lock->cll_state == CLS_HELD)); - return result; -} -EXPORT_SYMBOL(cl_wait); - -/** - * Executes cl_lock_operations::clo_weigh(), and sums results to estimate lock - * value. - */ -unsigned long cl_lock_weigh(const struct lu_env *env, struct cl_lock *lock) -{ - const struct cl_lock_slice *slice; - unsigned long pound; - unsigned long ounce; - - LINVRNT(cl_lock_is_mutexed(lock)); - LINVRNT(cl_lock_invariant(env, lock)); - - pound = 0; - list_for_each_entry_reverse(slice, &lock->cll_layers, cls_linkage) { - if (slice->cls_ops->clo_weigh) { - ounce = slice->cls_ops->clo_weigh(env, slice); - pound += ounce; - if (pound < ounce) /* over-weight^Wflow */ - pound = ~0UL; - } - } - return pound; -} -EXPORT_SYMBOL(cl_lock_weigh); - -/** - * Notifies layers that lock description changed. - * - * The server can grant client a lock different from one that was requested - * (e.g., larger in extent). This method is called when actually granted lock - * description becomes known to let layers to accommodate for changed lock - * description. - * - * \see cl_lock_operations::clo_modify() - */ -int cl_lock_modify(const struct lu_env *env, struct cl_lock *lock, - const struct cl_lock_descr *desc) -{ - const struct cl_lock_slice *slice; - struct cl_object *obj = lock->cll_descr.cld_obj; - struct cl_object_header *hdr = cl_object_header(obj); - int result; - - cl_lock_trace(D_DLMTRACE, env, "modify lock", lock); - /* don't allow object to change */ - LASSERT(obj == desc->cld_obj); - LINVRNT(cl_lock_is_mutexed(lock)); - LINVRNT(cl_lock_invariant(env, lock)); - - list_for_each_entry_reverse(slice, &lock->cll_layers, cls_linkage) { - if (slice->cls_ops->clo_modify) { - result = slice->cls_ops->clo_modify(env, slice, desc); - if (result != 0) - return result; } - } - CL_LOCK_DEBUG(D_DLMTRACE, env, lock, " -> "DDESCR"@"DFID"\n", - PDESCR(desc), PFID(lu_object_fid(&desc->cld_obj->co_lu))); - /* - * Just replace description in place. Nothing more is needed for - * now. If locks were indexed according to their extent and/or mode, - * that index would have to be updated here. - */ - spin_lock(&hdr->coh_lock_guard); - lock->cll_descr = *desc; - spin_unlock(&hdr->coh_lock_guard); - return 0; -} -EXPORT_SYMBOL(cl_lock_modify); - -/** - * Initializes lock closure with a given origin. - * - * \see cl_lock_closure - */ -void cl_lock_closure_init(const struct lu_env *env, - struct cl_lock_closure *closure, - struct cl_lock *origin, int wait) -{ - LINVRNT(cl_lock_is_mutexed(origin)); - LINVRNT(cl_lock_invariant(env, origin)); - - INIT_LIST_HEAD(&closure->clc_list); - closure->clc_origin = origin; - closure->clc_wait = wait; - closure->clc_nr = 0; -} -EXPORT_SYMBOL(cl_lock_closure_init); - -/** - * Builds a closure of \a lock. - * - * Building of a closure consists of adding initial lock (\a lock) into it, - * and calling cl_lock_operations::clo_closure() methods of \a lock. These - * methods might call cl_lock_closure_build() recursively again, adding more - * locks to the closure, etc. - * - * \see cl_lock_closure - */ -int cl_lock_closure_build(const struct lu_env *env, struct cl_lock *lock, - struct cl_lock_closure *closure) -{ - const struct cl_lock_slice *slice; - int result; - - LINVRNT(cl_lock_is_mutexed(closure->clc_origin)); - LINVRNT(cl_lock_invariant(env, closure->clc_origin)); - - result = cl_lock_enclosure(env, lock, closure); - if (result == 0) { - list_for_each_entry(slice, &lock->cll_layers, cls_linkage) { - if (slice->cls_ops->clo_closure) { - result = slice->cls_ops->clo_closure(env, slice, - closure); - if (result != 0) - break; - } - } - } - if (result != 0) - cl_lock_disclosure(env, closure); - return result; -} -EXPORT_SYMBOL(cl_lock_closure_build); - -/** - * Adds new lock to a closure. - * - * Try-locks \a lock and if succeeded, adds it to the closure (never more than - * once). If try-lock failed, returns CLO_REPEAT, after optionally waiting - * until next try-lock is likely to succeed. - */ -int cl_lock_enclosure(const struct lu_env *env, struct cl_lock *lock, - struct cl_lock_closure *closure) -{ - int result = 0; - - cl_lock_trace(D_DLMTRACE, env, "enclosure lock", lock); - if (!cl_lock_mutex_try(env, lock)) { - /* - * If lock->cll_inclosure is not empty, lock is already in - * this closure. - */ - if (list_empty(&lock->cll_inclosure)) { - cl_lock_get_trust(lock); - lu_ref_add(&lock->cll_reference, "closure", closure); - list_add(&lock->cll_inclosure, &closure->clc_list); - closure->clc_nr++; - } else - cl_lock_mutex_put(env, lock); - result = 0; - } else { - cl_lock_disclosure(env, closure); - if (closure->clc_wait) { - cl_lock_get_trust(lock); - lu_ref_add(&lock->cll_reference, "closure-w", closure); - cl_lock_mutex_put(env, closure->clc_origin); - - LASSERT(cl_lock_nr_mutexed(env) == 0); - cl_lock_mutex_get(env, lock); - cl_lock_mutex_put(env, lock); - - cl_lock_mutex_get(env, closure->clc_origin); - lu_ref_del(&lock->cll_reference, "closure-w", closure); - cl_lock_put(env, lock); - } - result = CLO_REPEAT; - } - return result; -} -EXPORT_SYMBOL(cl_lock_enclosure); - -/** Releases mutices of enclosed locks. */ -void cl_lock_disclosure(const struct lu_env *env, - struct cl_lock_closure *closure) -{ - struct cl_lock *scan; - struct cl_lock *temp; - - cl_lock_trace(D_DLMTRACE, env, "disclosure lock", closure->clc_origin); - list_for_each_entry_safe(scan, temp, &closure->clc_list, - cll_inclosure) { - list_del_init(&scan->cll_inclosure); - cl_lock_mutex_put(env, scan); - lu_ref_del(&scan->cll_reference, "closure", closure); - cl_lock_put(env, scan); - closure->clc_nr--; - } - LASSERT(closure->clc_nr == 0); -} -EXPORT_SYMBOL(cl_lock_disclosure); - -/** Finalizes a closure. */ -void cl_lock_closure_fini(struct cl_lock_closure *closure) -{ - LASSERT(closure->clc_nr == 0); - LASSERT(list_empty(&closure->clc_list)); -} -EXPORT_SYMBOL(cl_lock_closure_fini); - -/** - * Destroys this lock. Notifies layers (bottom-to-top) that lock is being - * destroyed, then destroy the lock. If there are holds on the lock, postpone - * destruction until all holds are released. This is called when a decision is - * made to destroy the lock in the future. E.g., when a blocking AST is - * received on it, or fatal communication error happens. - * - * Caller must have a reference on this lock to prevent a situation, when - * deleted lock lingers in memory for indefinite time, because nobody calls - * cl_lock_put() to finish it. - * - * \pre atomic_read(&lock->cll_ref) > 0 - * \pre ergo(cl_lock_nesting(lock) == CNL_TOP, - * cl_lock_nr_mutexed(env) == 1) - * [i.e., if a top-lock is deleted, mutices of no other locks can be - * held, as deletion of sub-locks might require releasing a top-lock - * mutex] - * - * \see cl_lock_operations::clo_delete() - * \see cl_lock::cll_holds - */ -void cl_lock_delete(const struct lu_env *env, struct cl_lock *lock) -{ - LINVRNT(cl_lock_is_mutexed(lock)); - LINVRNT(cl_lock_invariant(env, lock)); - LASSERT(ergo(cl_lock_nesting(lock) == CNL_TOP, - cl_lock_nr_mutexed(env) == 1)); - - cl_lock_trace(D_DLMTRACE, env, "delete lock", lock); - if (lock->cll_holds == 0) - cl_lock_delete0(env, lock); - else - lock->cll_flags |= CLF_DOOMED; -} -EXPORT_SYMBOL(cl_lock_delete); - -/** - * Mark lock as irrecoverably failed, and mark it for destruction. This - * happens when, e.g., server fails to grant a lock to us, or networking - * time-out happens. - * - * \pre atomic_read(&lock->cll_ref) > 0 - * - * \see clo_lock_delete() - * \see cl_lock::cll_holds - */ -void cl_lock_error(const struct lu_env *env, struct cl_lock *lock, int error) -{ - LINVRNT(cl_lock_is_mutexed(lock)); - LINVRNT(cl_lock_invariant(env, lock)); - - if (lock->cll_error == 0 && error != 0) { - cl_lock_trace(D_DLMTRACE, env, "set lock error", lock); - lock->cll_error = error; - cl_lock_signal(env, lock); - cl_lock_cancel(env, lock); - cl_lock_delete(env, lock); - } -} -EXPORT_SYMBOL(cl_lock_error); - -/** - * Cancels this lock. Notifies layers - * (bottom-to-top) that lock is being cancelled, then destroy the lock. If - * there are holds on the lock, postpone cancellation until - * all holds are released. - * - * Cancellation notification is delivered to layers at most once. - * - * \see cl_lock_operations::clo_cancel() - * \see cl_lock::cll_holds - */ -void cl_lock_cancel(const struct lu_env *env, struct cl_lock *lock) -{ - LINVRNT(cl_lock_is_mutexed(lock)); - LINVRNT(cl_lock_invariant(env, lock)); - - cl_lock_trace(D_DLMTRACE, env, "cancel lock", lock); - if (lock->cll_holds == 0) - cl_lock_cancel0(env, lock); - else - lock->cll_flags |= CLF_CANCELPEND; -} -EXPORT_SYMBOL(cl_lock_cancel); - -/** - * Finds an existing lock covering given index and optionally different from a - * given \a except lock. - */ -struct cl_lock *cl_lock_at_pgoff(const struct lu_env *env, - struct cl_object *obj, pgoff_t index, - struct cl_lock *except, - int pending, int canceld) -{ - struct cl_object_header *head; - struct cl_lock *scan; - struct cl_lock *lock; - struct cl_lock_descr *need; - - head = cl_object_header(obj); - need = &cl_env_info(env)->clt_descr; - lock = NULL; - - need->cld_mode = CLM_READ; /* CLM_READ matches both READ & WRITE, but - * not PHANTOM - */ - need->cld_start = need->cld_end = index; - need->cld_enq_flags = 0; - - spin_lock(&head->coh_lock_guard); - /* It is fine to match any group lock since there could be only one - * with a uniq gid and it conflicts with all other lock modes too - */ - list_for_each_entry(scan, &head->coh_locks, cll_linkage) { - if (scan != except && - (scan->cll_descr.cld_mode == CLM_GROUP || - cl_lock_ext_match(&scan->cll_descr, need)) && - scan->cll_state >= CLS_HELD && - scan->cll_state < CLS_FREEING && - /* - * This check is racy as the lock can be canceled right - * after it is done, but this is fine, because page exists - * already. - */ - (canceld || !(scan->cll_flags & CLF_CANCELLED)) && - (pending || !(scan->cll_flags & CLF_CANCELPEND))) { - /* Don't increase cs_hit here since this - * is just a helper function. - */ - cl_lock_get_trust(scan); - lock = scan; - break; - } - } - spin_unlock(&head->coh_lock_guard); - return lock; -} -EXPORT_SYMBOL(cl_lock_at_pgoff); - -/** - * Calculate the page offset at the layer of @lock. - * At the time of this writing, @page is top page and @lock is sub lock. - */ -static pgoff_t pgoff_at_lock(struct cl_page *page, struct cl_lock *lock) -{ - struct lu_device_type *dtype; - const struct cl_page_slice *slice; - - dtype = lock->cll_descr.cld_obj->co_lu.lo_dev->ld_type; - slice = cl_page_at(page, dtype); - return slice->cpl_page->cp_index; + return rc; } +EXPORT_SYMBOL(cl_lock_enqueue); /** - * Check if page @page is covered by an extra lock or discard it. + * Main high-level entry point of cl_lock interface that finds existing or + * enqueues new lock matching given description. */ -static int check_and_discard_cb(const struct lu_env *env, struct cl_io *io, - struct cl_page *page, void *cbdata) +int cl_lock_request(const struct lu_env *env, struct cl_io *io, + struct cl_lock *lock) { - struct cl_thread_info *info = cl_env_info(env); - struct cl_lock *lock = cbdata; - pgoff_t index = pgoff_at_lock(page, lock); + struct cl_sync_io *anchor = NULL; + __u32 enq_flags = lock->cll_descr.cld_enq_flags; + int rc; - if (index >= info->clt_fn_index) { - struct cl_lock *tmp; + rc = cl_lock_init(env, lock, io); + if (rc < 0) + return rc; - /* refresh non-overlapped index */ - tmp = cl_lock_at_pgoff(env, lock->cll_descr.cld_obj, index, - lock, 1, 0); - if (tmp) { - /* Cache the first-non-overlapped index so as to skip - * all pages within [index, clt_fn_index). This - * is safe because if tmp lock is canceled, it will - * discard these pages. - */ - info->clt_fn_index = tmp->cll_descr.cld_end + 1; - if (tmp->cll_descr.cld_end == CL_PAGE_EOF) - info->clt_fn_index = CL_PAGE_EOF; - cl_lock_put(env, tmp); - } else if (cl_page_own(env, io, page) == 0) { - /* discard the page */ - cl_page_unmap(env, io, page); - cl_page_discard(env, io, page); - cl_page_disown(env, io, page); - } else { - LASSERT(page->cp_state == CPS_FREEING); - } + if ((enq_flags & CEF_ASYNC) && !(enq_flags & CEF_AGL)) { + anchor = &cl_env_info(env)->clt_anchor; + cl_sync_io_init(anchor, 1, cl_sync_io_end); } - info->clt_next_index = index + 1; - return CLP_GANG_OKAY; -} + rc = cl_lock_enqueue(env, io, lock, anchor); -static int discard_cb(const struct lu_env *env, struct cl_io *io, - struct cl_page *page, void *cbdata) -{ - struct cl_thread_info *info = cl_env_info(env); - struct cl_lock *lock = cbdata; + if (anchor) { + int rc2; - LASSERT(lock->cll_descr.cld_mode >= CLM_WRITE); - KLASSERT(ergo(page->cp_type == CPT_CACHEABLE, - !PageWriteback(cl_page_vmpage(env, page)))); - KLASSERT(ergo(page->cp_type == CPT_CACHEABLE, - !PageDirty(cl_page_vmpage(env, page)))); - - info->clt_next_index = pgoff_at_lock(page, lock) + 1; - if (cl_page_own(env, io, page) == 0) { - /* discard the page */ - cl_page_unmap(env, io, page); - cl_page_discard(env, io, page); - cl_page_disown(env, io, page); - } else { - LASSERT(page->cp_state == CPS_FREEING); + /* drop the reference count held at initialization time */ + cl_sync_io_note(env, anchor, 0); + rc2 = cl_sync_io_wait(env, anchor, 0); + if (rc2 < 0 && rc == 0) + rc = rc2; } - return CLP_GANG_OKAY; -} + if (rc < 0) + cl_lock_release(env, lock); -/** - * Discard pages protected by the given lock. This function traverses radix - * tree to find all covering pages and discard them. If a page is being covered - * by other locks, it should remain in cache. - * - * If error happens on any step, the process continues anyway (the reasoning - * behind this being that lock cancellation cannot be delayed indefinitely). - */ -int cl_lock_discard_pages(const struct lu_env *env, struct cl_lock *lock) -{ - struct cl_thread_info *info = cl_env_info(env); - struct cl_io *io = &info->clt_io; - struct cl_lock_descr *descr = &lock->cll_descr; - cl_page_gang_cb_t cb; - int res; - int result; - - LINVRNT(cl_lock_invariant(env, lock)); - - io->ci_obj = cl_object_top(descr->cld_obj); - io->ci_ignore_layout = 1; - result = cl_io_init(env, io, CIT_MISC, io->ci_obj); - if (result != 0) - goto out; - - cb = descr->cld_mode == CLM_READ ? check_and_discard_cb : discard_cb; - info->clt_fn_index = info->clt_next_index = descr->cld_start; - do { - res = cl_page_gang_lookup(env, descr->cld_obj, io, - info->clt_next_index, descr->cld_end, - cb, (void *)lock); - if (info->clt_next_index > descr->cld_end) - break; - - if (res == CLP_GANG_RESCHED) - cond_resched(); - } while (res != CLP_GANG_OKAY); -out: - cl_io_fini(env, io); - return result; -} -EXPORT_SYMBOL(cl_lock_discard_pages); - -/** - * Eliminate all locks for a given object. - * - * Caller has to guarantee that no lock is in active use. - * - * \param cancel when this is set, cl_locks_prune() cancels locks before - * destroying. - */ -void cl_locks_prune(const struct lu_env *env, struct cl_object *obj, int cancel) -{ - struct cl_object_header *head; - struct cl_lock *lock; - - head = cl_object_header(obj); - /* - * If locks are destroyed without cancellation, all pages must be - * already destroyed (as otherwise they will be left unprotected). - */ - LASSERT(ergo(!cancel, - !head->coh_tree.rnode && head->coh_pages == 0)); - - spin_lock(&head->coh_lock_guard); - while (!list_empty(&head->coh_locks)) { - lock = container_of(head->coh_locks.next, - struct cl_lock, cll_linkage); - cl_lock_get_trust(lock); - spin_unlock(&head->coh_lock_guard); - lu_ref_add(&lock->cll_reference, "prune", current); - -again: - cl_lock_mutex_get(env, lock); - if (lock->cll_state < CLS_FREEING) { - LASSERT(lock->cll_users <= 1); - if (unlikely(lock->cll_users == 1)) { - struct l_wait_info lwi = { 0 }; - - cl_lock_mutex_put(env, lock); - l_wait_event(lock->cll_wq, - lock->cll_users == 0, - &lwi); - goto again; - } - - if (cancel) - cl_lock_cancel(env, lock); - cl_lock_delete(env, lock); - } - cl_lock_mutex_put(env, lock); - lu_ref_del(&lock->cll_reference, "prune", current); - cl_lock_put(env, lock); - spin_lock(&head->coh_lock_guard); - } - spin_unlock(&head->coh_lock_guard); -} -EXPORT_SYMBOL(cl_locks_prune); - -static struct cl_lock *cl_lock_hold_mutex(const struct lu_env *env, - const struct cl_io *io, - const struct cl_lock_descr *need, - const char *scope, const void *source) -{ - struct cl_lock *lock; - - while (1) { - lock = cl_lock_find(env, io, need); - if (IS_ERR(lock)) - break; - cl_lock_mutex_get(env, lock); - if (lock->cll_state < CLS_FREEING && - !(lock->cll_flags & CLF_CANCELLED)) { - cl_lock_hold_mod(env, lock, 1); - lu_ref_add(&lock->cll_holders, scope, source); - lu_ref_add(&lock->cll_reference, scope, source); - break; - } - cl_lock_mutex_put(env, lock); - cl_lock_put(env, lock); - } - return lock; -} - -/** - * Returns a lock matching \a need description with a reference and a hold on - * it. - * - * This is much like cl_lock_find(), except that cl_lock_hold() additionally - * guarantees that lock is not in the CLS_FREEING state on return. - */ -struct cl_lock *cl_lock_hold(const struct lu_env *env, const struct cl_io *io, - const struct cl_lock_descr *need, - const char *scope, const void *source) -{ - struct cl_lock *lock; - - lock = cl_lock_hold_mutex(env, io, need, scope, source); - if (!IS_ERR(lock)) - cl_lock_mutex_put(env, lock); - return lock; -} -EXPORT_SYMBOL(cl_lock_hold); - -/** - * Main high-level entry point of cl_lock interface that finds existing or - * enqueues new lock matching given description. - */ -struct cl_lock *cl_lock_request(const struct lu_env *env, struct cl_io *io, - const struct cl_lock_descr *need, - const char *scope, const void *source) -{ - struct cl_lock *lock; - int rc; - __u32 enqflags = need->cld_enq_flags; - - do { - lock = cl_lock_hold_mutex(env, io, need, scope, source); - if (IS_ERR(lock)) - break; - - rc = cl_enqueue_locked(env, lock, io, enqflags); - if (rc == 0) { - if (cl_lock_fits_into(env, lock, need, io)) { - if (!(enqflags & CEF_AGL)) { - cl_lock_mutex_put(env, lock); - cl_lock_lockdep_acquire(env, lock, - enqflags); - break; - } - rc = 1; - } - cl_unuse_locked(env, lock); - } - cl_lock_trace(D_DLMTRACE, env, - rc <= 0 ? "enqueue failed" : "agl succeed", lock); - cl_lock_hold_release(env, lock, scope, source); - cl_lock_mutex_put(env, lock); - lu_ref_del(&lock->cll_reference, scope, source); - cl_lock_put(env, lock); - if (rc > 0) { - LASSERT(enqflags & CEF_AGL); - lock = NULL; - } else if (rc != 0) { - lock = ERR_PTR(rc); - } - } while (rc == 0); - return lock; + return rc; } EXPORT_SYMBOL(cl_lock_request); /** - * Adds a hold to a known lock. - */ -void cl_lock_hold_add(const struct lu_env *env, struct cl_lock *lock, - const char *scope, const void *source) -{ - LINVRNT(cl_lock_is_mutexed(lock)); - LINVRNT(cl_lock_invariant(env, lock)); - LASSERT(lock->cll_state != CLS_FREEING); - - cl_lock_hold_mod(env, lock, 1); - cl_lock_get(lock); - lu_ref_add(&lock->cll_holders, scope, source); - lu_ref_add(&lock->cll_reference, scope, source); -} -EXPORT_SYMBOL(cl_lock_hold_add); - -/** - * Releases a hold and a reference on a lock, on which caller acquired a - * mutex. - */ -void cl_lock_unhold(const struct lu_env *env, struct cl_lock *lock, - const char *scope, const void *source) -{ - LINVRNT(cl_lock_invariant(env, lock)); - cl_lock_hold_release(env, lock, scope, source); - lu_ref_del(&lock->cll_reference, scope, source); - cl_lock_put(env, lock); -} -EXPORT_SYMBOL(cl_lock_unhold); - -/** * Releases a hold and a reference on a lock, obtained by cl_lock_hold(). */ -void cl_lock_release(const struct lu_env *env, struct cl_lock *lock, - const char *scope, const void *source) +void cl_lock_release(const struct lu_env *env, struct cl_lock *lock) { - LINVRNT(cl_lock_invariant(env, lock)); cl_lock_trace(D_DLMTRACE, env, "release lock", lock); - cl_lock_mutex_get(env, lock); - cl_lock_hold_release(env, lock, scope, source); - cl_lock_mutex_put(env, lock); - lu_ref_del(&lock->cll_reference, scope, source); - cl_lock_put(env, lock); + cl_lock_cancel(env, lock); + cl_lock_fini(env, lock); } EXPORT_SYMBOL(cl_lock_release); -void cl_lock_user_add(const struct lu_env *env, struct cl_lock *lock) -{ - LINVRNT(cl_lock_is_mutexed(lock)); - LINVRNT(cl_lock_invariant(env, lock)); - - cl_lock_used_mod(env, lock, 1); -} -EXPORT_SYMBOL(cl_lock_user_add); - -void cl_lock_user_del(const struct lu_env *env, struct cl_lock *lock) -{ - LINVRNT(cl_lock_is_mutexed(lock)); - LINVRNT(cl_lock_invariant(env, lock)); - LASSERT(lock->cll_users > 0); - - cl_lock_used_mod(env, lock, -1); - if (lock->cll_users == 0) - wake_up_all(&lock->cll_wq); -} -EXPORT_SYMBOL(cl_lock_user_del); - const char *cl_lock_mode_name(const enum cl_lock_mode mode) { static const char *names[] = { - [CLM_PHANTOM] = "P", [CLM_READ] = "R", [CLM_WRITE] = "W", [CLM_GROUP] = "G" @@ -2189,10 +257,8 @@ void cl_lock_print(const struct lu_env *env, void *cookie, lu_printer_t printer, const struct cl_lock *lock) { const struct cl_lock_slice *slice; - (*printer)(env, cookie, "lock@%p[%d %d %d %d %d %08lx] ", - lock, atomic_read(&lock->cll_ref), - lock->cll_state, lock->cll_error, lock->cll_holds, - lock->cll_users, lock->cll_flags); + + (*printer)(env, cookie, "lock@%p", lock); cl_lock_descr_print(env, cookie, printer, &lock->cll_descr); (*printer)(env, cookie, " {\n"); @@ -2207,13 +273,3 @@ void cl_lock_print(const struct lu_env *env, void *cookie, (*printer)(env, cookie, "} lock@%p\n", lock); } EXPORT_SYMBOL(cl_lock_print); - -int cl_lock_init(void) -{ - return lu_kmem_init(cl_lock_caches); -} - -void cl_lock_fini(void) -{ - lu_kmem_fini(cl_lock_caches); -} diff --git a/drivers/staging/lustre/lustre/obdclass/cl_object.c b/drivers/staging/lustre/lustre/obdclass/cl_object.c index 43e299d4d416..91a5806d0239 100644 --- a/drivers/staging/lustre/lustre/obdclass/cl_object.c +++ b/drivers/staging/lustre/lustre/obdclass/cl_object.c @@ -15,11 +15,7 @@ * * You should have received a copy of the GNU General Public License * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. + * http://www.gnu.org/licenses/gpl-2.0.html * * GPL HEADER END */ @@ -36,6 +32,7 @@ * Client Lustre Object. * * Author: Nikita Danilov <nikita.danilov@sun.com> + * Author: Jinshan Xiong <jinshan.xiong@intel.com> */ /* @@ -43,8 +40,6 @@ * * i_mutex * PG_locked - * ->coh_page_guard - * ->coh_lock_guard * ->coh_attr_guard * ->ls_guard */ @@ -63,10 +58,6 @@ static struct kmem_cache *cl_env_kmem; -/** Lock class of cl_object_header::coh_page_guard */ -static struct lock_class_key cl_page_guard_class; -/** Lock class of cl_object_header::coh_lock_guard */ -static struct lock_class_key cl_lock_guard_class; /** Lock class of cl_object_header::coh_attr_guard */ static struct lock_class_key cl_attr_guard_class; @@ -81,17 +72,9 @@ int cl_object_header_init(struct cl_object_header *h) result = lu_object_header_init(&h->coh_lu); if (result == 0) { - spin_lock_init(&h->coh_page_guard); - spin_lock_init(&h->coh_lock_guard); spin_lock_init(&h->coh_attr_guard); - lockdep_set_class(&h->coh_page_guard, &cl_page_guard_class); - lockdep_set_class(&h->coh_lock_guard, &cl_lock_guard_class); lockdep_set_class(&h->coh_attr_guard, &cl_attr_guard_class); - h->coh_pages = 0; - /* XXX hard coded GFP_* mask. */ - INIT_RADIX_TREE(&h->coh_tree, GFP_ATOMIC); - INIT_LIST_HEAD(&h->coh_locks); - h->coh_page_bufsize = ALIGN(sizeof(struct cl_page), 8); + h->coh_page_bufsize = 0; } return result; } @@ -145,7 +128,7 @@ EXPORT_SYMBOL(cl_object_get); /** * Returns the top-object for a given \a o. * - * \see cl_page_top(), cl_io_top() + * \see cl_io_top() */ struct cl_object *cl_object_top(struct cl_object *o) { @@ -315,6 +298,29 @@ int cl_conf_set(const struct lu_env *env, struct cl_object *obj, EXPORT_SYMBOL(cl_conf_set); /** + * Prunes caches of pages and locks for this object. + */ +int cl_object_prune(const struct lu_env *env, struct cl_object *obj) +{ + struct lu_object_header *top; + struct cl_object *o; + int result; + + top = obj->co_lu.lo_header; + result = 0; + list_for_each_entry(o, &top->loh_layers, co_lu.lo_linkage) { + if (o->co_ops->coo_prune) { + result = o->co_ops->coo_prune(env, o); + if (result != 0) + break; + } + } + + return result; +} +EXPORT_SYMBOL(cl_object_prune); + +/** * Helper function removing all object locks, and marking object for * deletion. All object pages must have been deleted at this point. * @@ -323,34 +329,12 @@ EXPORT_SYMBOL(cl_conf_set); */ void cl_object_kill(const struct lu_env *env, struct cl_object *obj) { - struct cl_object_header *hdr; - - hdr = cl_object_header(obj); - LASSERT(!hdr->coh_tree.rnode); - LASSERT(hdr->coh_pages == 0); + struct cl_object_header *hdr = cl_object_header(obj); set_bit(LU_OBJECT_HEARD_BANSHEE, &hdr->coh_lu.loh_flags); - /* - * Destroy all locks. Object destruction (including cl_inode_fini()) - * cannot cancel the locks, because in the case of a local client, - * where client and server share the same thread running - * prune_icache(), this can dead-lock with ldlm_cancel_handler() - * waiting on __wait_on_freeing_inode(). - */ - cl_locks_prune(env, obj, 0); } EXPORT_SYMBOL(cl_object_kill); -/** - * Prunes caches of pages and locks for this object. - */ -void cl_object_prune(const struct lu_env *env, struct cl_object *obj) -{ - cl_pages_prune(env, obj); - cl_locks_prune(env, obj, 1); -} -EXPORT_SYMBOL(cl_object_prune); - void cache_stats_init(struct cache_stats *cs, const char *name) { int i; @@ -383,6 +367,8 @@ static int cache_stats_print(const struct cache_stats *cs, return 0; } +static void cl_env_percpu_refill(void); + /** * Initialize client site. * @@ -397,11 +383,9 @@ int cl_site_init(struct cl_site *s, struct cl_device *d) result = lu_site_init(&s->cs_lu, &d->cd_lu_dev); if (result == 0) { cache_stats_init(&s->cs_pages, "pages"); - cache_stats_init(&s->cs_locks, "locks"); for (i = 0; i < ARRAY_SIZE(s->cs_pages_state); ++i) atomic_set(&s->cs_pages_state[0], 0); - for (i = 0; i < ARRAY_SIZE(s->cs_locks_state); ++i) - atomic_set(&s->cs_locks_state[i], 0); + cl_env_percpu_refill(); } return result; } @@ -435,15 +419,6 @@ int cl_site_stats_print(const struct cl_site *site, struct seq_file *m) [CPS_PAGEIN] = "r", [CPS_FREEING] = "f" }; - static const char *lstate[] = { - [CLS_NEW] = "n", - [CLS_QUEUING] = "q", - [CLS_ENQUEUED] = "e", - [CLS_HELD] = "h", - [CLS_INTRANSIT] = "t", - [CLS_CACHED] = "c", - [CLS_FREEING] = "f" - }; /* lookup hit total busy create pages: ...... ...... ...... ...... ...... [...... ...... ...... ......] @@ -457,12 +432,6 @@ locks: ...... ...... ...... ...... ...... [...... ...... ...... ...... ......] seq_printf(m, "%s: %u ", pstate[i], atomic_read(&site->cs_pages_state[i])); seq_printf(m, "]\n"); - cache_stats_print(&site->cs_locks, m, 0); - seq_printf(m, " ["); - for (i = 0; i < ARRAY_SIZE(site->cs_locks_state); ++i) - seq_printf(m, "%s: %u ", lstate[i], - atomic_read(&site->cs_locks_state[i])); - seq_printf(m, "]\n"); cache_stats_print(&cl_env_stats, m, 0); seq_printf(m, "\n"); return 0; @@ -492,6 +461,13 @@ EXPORT_SYMBOL(cl_site_stats_print); * bz20044, bz22683. */ +static LIST_HEAD(cl_envs); +static unsigned int cl_envs_cached_nr; +static unsigned int cl_envs_cached_max = 128; /* XXX: prototype: arbitrary limit + * for now. + */ +static DEFINE_SPINLOCK(cl_envs_guard); + struct cl_env { void *ce_magic; struct lu_env ce_lu; @@ -597,7 +573,7 @@ static inline struct cl_env *cl_env_fetch(void) { struct cl_env *cle; - cle = cfs_hash_lookup(cl_env_hash, (void *) (long) current->pid); + cle = cfs_hash_lookup(cl_env_hash, (void *)(long)current->pid); LASSERT(ergo(cle, cle->ce_magic == &cl_env_init0)); return cle; } @@ -608,7 +584,7 @@ static inline void cl_env_attach(struct cl_env *cle) int rc; LASSERT(!cle->ce_owner); - cle->ce_owner = (void *) (long) current->pid; + cle->ce_owner = (void *)(long)current->pid; rc = cfs_hash_add_unique(cl_env_hash, cle->ce_owner, &cle->ce_node); LASSERT(rc == 0); @@ -619,7 +595,7 @@ static inline void cl_env_do_detach(struct cl_env *cle) { void *cookie; - LASSERT(cle->ce_owner == (void *) (long) current->pid); + LASSERT(cle->ce_owner == (void *)(long)current->pid); cookie = cfs_hash_del(cl_env_hash, cle->ce_owner, &cle->ce_node); LASSERT(cookie == cle); @@ -674,8 +650,9 @@ static struct lu_env *cl_env_new(__u32 ctx_tags, __u32 ses_tags, void *debug) lu_context_enter(&cle->ce_ses); env->le_ses = &cle->ce_ses; cl_env_init0(cle, debug); - } else + } else { lu_env_fini(env); + } } if (rc != 0) { kmem_cache_free(cl_env_kmem, cle); @@ -684,8 +661,9 @@ static struct lu_env *cl_env_new(__u32 ctx_tags, __u32 ses_tags, void *debug) CL_ENV_INC(create); CL_ENV_INC(total); } - } else + } else { env = ERR_PTR(-ENOMEM); + } return env; } @@ -697,6 +675,39 @@ static void cl_env_fini(struct cl_env *cle) kmem_cache_free(cl_env_kmem, cle); } +static struct lu_env *cl_env_obtain(void *debug) +{ + struct cl_env *cle; + struct lu_env *env; + + spin_lock(&cl_envs_guard); + LASSERT(equi(cl_envs_cached_nr == 0, list_empty(&cl_envs))); + if (cl_envs_cached_nr > 0) { + int rc; + + cle = container_of(cl_envs.next, struct cl_env, ce_linkage); + list_del_init(&cle->ce_linkage); + cl_envs_cached_nr--; + spin_unlock(&cl_envs_guard); + + env = &cle->ce_lu; + rc = lu_env_refill(env); + if (rc == 0) { + cl_env_init0(cle, debug); + lu_context_enter(&env->le_ctx); + lu_context_enter(&cle->ce_ses); + } else { + cl_env_fini(cle); + env = ERR_PTR(rc); + } + } else { + spin_unlock(&cl_envs_guard); + env = cl_env_new(lu_context_tags_default, + lu_session_tags_default, debug); + } + return env; +} + static inline struct cl_env *cl_env_container(struct lu_env *env) { return container_of(env, struct cl_env, ce_lu); @@ -727,6 +738,8 @@ static struct lu_env *cl_env_peek(int *refcheck) * Returns lu_env: if there already is an environment associated with the * current thread, it is returned, otherwise, new environment is allocated. * + * Allocations are amortized through the global cache of environments. + * * \param refcheck pointer to a counter used to detect environment leaks. In * the usual case cl_env_get() and cl_env_put() are called in the same lexical * scope and pointer to the same integer is passed as \a refcheck. This is @@ -740,10 +753,7 @@ struct lu_env *cl_env_get(int *refcheck) env = cl_env_peek(refcheck); if (!env) { - env = cl_env_new(lu_context_tags_default, - lu_session_tags_default, - __builtin_return_address(0)); - + env = cl_env_obtain(__builtin_return_address(0)); if (!IS_ERR(env)) { struct cl_env *cle; @@ -787,6 +797,32 @@ static void cl_env_exit(struct cl_env *cle) } /** + * Finalizes and frees a given number of cached environments. This is done to + * (1) free some memory (not currently hooked into VM), or (2) release + * references to modules. + */ +unsigned int cl_env_cache_purge(unsigned int nr) +{ + struct cl_env *cle; + + spin_lock(&cl_envs_guard); + for (; !list_empty(&cl_envs) && nr > 0; --nr) { + cle = container_of(cl_envs.next, struct cl_env, ce_linkage); + list_del_init(&cle->ce_linkage); + LASSERT(cl_envs_cached_nr > 0); + cl_envs_cached_nr--; + spin_unlock(&cl_envs_guard); + + cl_env_fini(cle); + spin_lock(&cl_envs_guard); + } + LASSERT(equi(cl_envs_cached_nr == 0, list_empty(&cl_envs))); + spin_unlock(&cl_envs_guard); + return nr; +} +EXPORT_SYMBOL(cl_env_cache_purge); + +/** * Release an environment. * * Decrement \a env reference counter. When counter drops to 0, nothing in @@ -808,7 +844,22 @@ void cl_env_put(struct lu_env *env, int *refcheck) cl_env_detach(cle); cle->ce_debug = NULL; cl_env_exit(cle); - cl_env_fini(cle); + /* + * Don't bother to take a lock here. + * + * Return environment to the cache only when it was allocated + * with the standard tags. + */ + if (cl_envs_cached_nr < cl_envs_cached_max && + (env->le_ctx.lc_tags & ~LCT_HAS_EXIT) == LCT_CL_THREAD && + (env->le_ses->lc_tags & ~LCT_HAS_EXIT) == LCT_SESSION) { + spin_lock(&cl_envs_guard); + list_add(&cle->ce_linkage, &cl_envs); + cl_envs_cached_nr++; + spin_unlock(&cl_envs_guard); + } else { + cl_env_fini(cle); + } } } EXPORT_SYMBOL(cl_env_put); @@ -914,6 +965,104 @@ void cl_lvb2attr(struct cl_attr *attr, const struct ost_lvb *lvb) } EXPORT_SYMBOL(cl_lvb2attr); +static struct cl_env cl_env_percpu[NR_CPUS]; + +static int cl_env_percpu_init(void) +{ + struct cl_env *cle; + int tags = LCT_REMEMBER | LCT_NOREF; + int i, j; + int rc = 0; + + for_each_possible_cpu(i) { + struct lu_env *env; + + cle = &cl_env_percpu[i]; + env = &cle->ce_lu; + + INIT_LIST_HEAD(&cle->ce_linkage); + cle->ce_magic = &cl_env_init0; + rc = lu_env_init(env, LCT_CL_THREAD | tags); + if (rc == 0) { + rc = lu_context_init(&cle->ce_ses, LCT_SESSION | tags); + if (rc == 0) { + lu_context_enter(&cle->ce_ses); + env->le_ses = &cle->ce_ses; + } else { + lu_env_fini(env); + } + } + if (rc != 0) + break; + } + if (rc != 0) { + /* Indices 0 to i (excluding i) were correctly initialized, + * thus we must uninitialize up to i, the rest are undefined. + */ + for (j = 0; j < i; j++) { + cle = &cl_env_percpu[i]; + lu_context_exit(&cle->ce_ses); + lu_context_fini(&cle->ce_ses); + lu_env_fini(&cle->ce_lu); + } + } + + return rc; +} + +static void cl_env_percpu_fini(void) +{ + int i; + + for_each_possible_cpu(i) { + struct cl_env *cle = &cl_env_percpu[i]; + + lu_context_exit(&cle->ce_ses); + lu_context_fini(&cle->ce_ses); + lu_env_fini(&cle->ce_lu); + } +} + +static void cl_env_percpu_refill(void) +{ + int i; + + for_each_possible_cpu(i) + lu_env_refill(&cl_env_percpu[i].ce_lu); +} + +void cl_env_percpu_put(struct lu_env *env) +{ + struct cl_env *cle; + int cpu; + + cpu = smp_processor_id(); + cle = cl_env_container(env); + LASSERT(cle == &cl_env_percpu[cpu]); + + cle->ce_ref--; + LASSERT(cle->ce_ref == 0); + + CL_ENV_DEC(busy); + cl_env_detach(cle); + cle->ce_debug = NULL; + + put_cpu(); +} +EXPORT_SYMBOL(cl_env_percpu_put); + +struct lu_env *cl_env_percpu_get(void) +{ + struct cl_env *cle; + + cle = &cl_env_percpu[get_cpu()]; + cl_env_init0(cle, __builtin_return_address(0)); + + cl_env_attach(cle); + return &cle->ce_lu; +} +EXPORT_SYMBOL(cl_env_percpu_get); + /***************************************************************************** * * Temporary prototype thing: mirror obd-devices into cl devices. @@ -944,8 +1093,9 @@ struct cl_device *cl_type_setup(const struct lu_env *env, struct lu_site *site, CERROR("can't init device '%s', %d\n", typename, rc); d = ERR_PTR(rc); } - } else + } else { CERROR("Cannot allocate device: '%s'\n", typename); + } return lu2cl_dev(d); } EXPORT_SYMBOL(cl_type_setup); @@ -959,12 +1109,6 @@ void cl_stack_fini(const struct lu_env *env, struct cl_device *cl) } EXPORT_SYMBOL(cl_stack_fini); -int cl_lock_init(void); -void cl_lock_fini(void); - -int cl_page_init(void); -void cl_page_fini(void); - static struct lu_context_key cl_key; struct cl_thread_info *cl_env_info(const struct lu_env *env) @@ -1059,17 +1203,13 @@ int cl_global_init(void) if (result) goto out_kmem; - result = cl_lock_init(); + result = cl_env_percpu_init(); if (result) + /* no cl_env_percpu_fini on error */ goto out_context; - result = cl_page_init(); - if (result) - goto out_lock; - return 0; -out_lock: - cl_lock_fini(); + out_context: lu_context_key_degister(&cl_key); out_kmem: @@ -1084,8 +1224,7 @@ out_store: */ void cl_global_fini(void) { - cl_lock_fini(); - cl_page_fini(); + cl_env_percpu_fini(); lu_context_key_degister(&cl_key); lu_kmem_fini(cl_object_caches); cl_env_store_fini(); diff --git a/drivers/staging/lustre/lustre/obdclass/cl_page.c b/drivers/staging/lustre/lustre/obdclass/cl_page.c index 394580016638..db2dc6b39073 100644 --- a/drivers/staging/lustre/lustre/obdclass/cl_page.c +++ b/drivers/staging/lustre/lustre/obdclass/cl_page.c @@ -15,11 +15,7 @@ * * You should have received a copy of the GNU General Public License * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. + * http://www.gnu.org/licenses/gpl-2.0.html * * GPL HEADER END */ @@ -36,6 +32,7 @@ * Client Lustre Page. * * Author: Nikita Danilov <nikita.danilov@sun.com> + * Author: Jinshan Xiong <jinshan.xiong@intel.com> */ #define DEBUG_SUBSYSTEM S_CLASS @@ -48,8 +45,7 @@ #include "../include/cl_object.h" #include "cl_internal.h" -static void cl_page_delete0(const struct lu_env *env, struct cl_page *pg, - int radix); +static void cl_page_delete0(const struct lu_env *env, struct cl_page *pg); # define PASSERT(env, page, expr) \ do { \ @@ -63,24 +59,11 @@ static void cl_page_delete0(const struct lu_env *env, struct cl_page *pg, ((void)sizeof(env), (void)sizeof(page), (void)sizeof !!(exp)) /** - * Internal version of cl_page_top, it should be called if the page is - * known to be not freed, says with page referenced, or radix tree lock held, - * or page owned. - */ -static struct cl_page *cl_page_top_trusted(struct cl_page *page) -{ - while (page->cp_parent) - page = page->cp_parent; - return page; -} - -/** * Internal version of cl_page_get(). * * This function can be used to obtain initial reference to previously * unreferenced cached object. It can be called only if concurrent page - * reclamation is somehow prevented, e.g., by locking page radix-tree - * (cl_object_header::hdr->coh_page_guard), or by keeping a lock on a VM page, + * reclamation is somehow prevented, e.g., by keeping a lock on a VM page, * associated with \a page. * * Use with care! Not exported. @@ -103,142 +86,12 @@ cl_page_at_trusted(const struct cl_page *page, { const struct cl_page_slice *slice; - page = cl_page_top_trusted((struct cl_page *)page); - do { - list_for_each_entry(slice, &page->cp_layers, cpl_linkage) { - if (slice->cpl_obj->co_lu.lo_dev->ld_type == dtype) - return slice; - } - page = page->cp_child; - } while (page); - return NULL; -} - -/** - * Returns a page with given index in the given object, or NULL if no page is - * found. Acquires a reference on \a page. - * - * Locking: called under cl_object_header::coh_page_guard spin-lock. - */ -struct cl_page *cl_page_lookup(struct cl_object_header *hdr, pgoff_t index) -{ - struct cl_page *page; - - assert_spin_locked(&hdr->coh_page_guard); - - page = radix_tree_lookup(&hdr->coh_tree, index); - if (page) - cl_page_get_trust(page); - return page; -} -EXPORT_SYMBOL(cl_page_lookup); - -/** - * Returns a list of pages by a given [start, end] of \a obj. - * - * \param resched If not NULL, then we give up before hogging CPU for too - * long and set *resched = 1, in that case caller should implement a retry - * logic. - * - * Gang tree lookup (radix_tree_gang_lookup()) optimization is absolutely - * crucial in the face of [offset, EOF] locks. - * - * Return at least one page in @queue unless there is no covered page. - */ -int cl_page_gang_lookup(const struct lu_env *env, struct cl_object *obj, - struct cl_io *io, pgoff_t start, pgoff_t end, - cl_page_gang_cb_t cb, void *cbdata) -{ - struct cl_object_header *hdr; - struct cl_page *page; - struct cl_page **pvec; - const struct cl_page_slice *slice; - const struct lu_device_type *dtype; - pgoff_t idx; - unsigned int nr; - unsigned int i; - unsigned int j; - int res = CLP_GANG_OKAY; - int tree_lock = 1; - - idx = start; - hdr = cl_object_header(obj); - pvec = cl_env_info(env)->clt_pvec; - dtype = cl_object_top(obj)->co_lu.lo_dev->ld_type; - spin_lock(&hdr->coh_page_guard); - while ((nr = radix_tree_gang_lookup(&hdr->coh_tree, (void **)pvec, - idx, CLT_PVEC_SIZE)) > 0) { - int end_of_region = 0; - - idx = pvec[nr - 1]->cp_index + 1; - for (i = 0, j = 0; i < nr; ++i) { - page = pvec[i]; - pvec[i] = NULL; - - LASSERT(page->cp_type == CPT_CACHEABLE); - if (page->cp_index > end) { - end_of_region = 1; - break; - } - if (page->cp_state == CPS_FREEING) - continue; - - slice = cl_page_at_trusted(page, dtype); - /* - * Pages for lsm-less file has no underneath sub-page - * for osc, in case of ... - */ - PASSERT(env, page, slice); - - page = slice->cpl_page; - /* - * Can safely call cl_page_get_trust() under - * radix-tree spin-lock. - * - * XXX not true, because @page is from object another - * than @hdr and protected by different tree lock. - */ - cl_page_get_trust(page); - lu_ref_add_atomic(&page->cp_reference, - "gang_lookup", current); - pvec[j++] = page; - } - - /* - * Here a delicate locking dance is performed. Current thread - * holds a reference to a page, but has to own it before it - * can be placed into queue. Owning implies waiting, so - * radix-tree lock is to be released. After a wait one has to - * check that pages weren't truncated (cl_page_own() returns - * error in the latter case). - */ - spin_unlock(&hdr->coh_page_guard); - tree_lock = 0; - - for (i = 0; i < j; ++i) { - page = pvec[i]; - if (res == CLP_GANG_OKAY) - res = (*cb)(env, io, page, cbdata); - lu_ref_del(&page->cp_reference, - "gang_lookup", current); - cl_page_put(env, page); - } - if (nr < CLT_PVEC_SIZE || end_of_region) - break; - - if (res == CLP_GANG_OKAY && need_resched()) - res = CLP_GANG_RESCHED; - if (res != CLP_GANG_OKAY) - break; - - spin_lock(&hdr->coh_page_guard); - tree_lock = 1; + list_for_each_entry(slice, &page->cp_layers, cpl_linkage) { + if (slice->cpl_obj->co_lu.lo_dev->ld_type == dtype) + return slice; } - if (tree_lock) - spin_unlock(&hdr->coh_page_guard); - return res; + return NULL; } -EXPORT_SYMBOL(cl_page_gang_lookup); static void cl_page_free(const struct lu_env *env, struct cl_page *page) { @@ -247,17 +100,16 @@ static void cl_page_free(const struct lu_env *env, struct cl_page *page) PASSERT(env, page, list_empty(&page->cp_batch)); PASSERT(env, page, !page->cp_owner); PASSERT(env, page, !page->cp_req); - PASSERT(env, page, !page->cp_parent); PASSERT(env, page, page->cp_state == CPS_FREEING); - might_sleep(); while (!list_empty(&page->cp_layers)) { struct cl_page_slice *slice; slice = list_entry(page->cp_layers.next, struct cl_page_slice, cpl_linkage); list_del_init(page->cp_layers.next); - slice->cpl_ops->cpo_fini(env, slice); + if (unlikely(slice->cpl_ops->cpo_fini)) + slice->cpl_ops->cpo_fini(env, slice); } lu_object_ref_del_at(&obj->co_lu, &page->cp_obj_ref, "cl_page", page); cl_object_put(env, obj); @@ -276,10 +128,10 @@ static inline void cl_page_state_set_trust(struct cl_page *page, *(enum cl_page_state *)&page->cp_state = state; } -static struct cl_page *cl_page_alloc(const struct lu_env *env, - struct cl_object *o, pgoff_t ind, - struct page *vmpage, - enum cl_page_type type) +struct cl_page *cl_page_alloc(const struct lu_env *env, + struct cl_object *o, pgoff_t ind, + struct page *vmpage, + enum cl_page_type type) { struct cl_page *page; struct lu_object_header *head; @@ -289,13 +141,11 @@ static struct cl_page *cl_page_alloc(const struct lu_env *env, int result = 0; atomic_set(&page->cp_ref, 1); - if (type == CPT_CACHEABLE) /* for radix tree */ - atomic_inc(&page->cp_ref); page->cp_obj = o; cl_object_get(o); lu_object_ref_add_at(&o->co_lu, &page->cp_obj_ref, "cl_page", page); - page->cp_index = ind; + page->cp_vmpage = vmpage; cl_page_state_set_trust(page, CPS_CACHED); page->cp_type = type; INIT_LIST_HEAD(&page->cp_layers); @@ -306,10 +156,10 @@ static struct cl_page *cl_page_alloc(const struct lu_env *env, head = o->co_lu.lo_header; list_for_each_entry(o, &head->loh_layers, co_lu.lo_linkage) { if (o->co_ops->coo_page_init) { - result = o->co_ops->coo_page_init(env, o, - page, vmpage); + result = o->co_ops->coo_page_init(env, o, page, + ind); if (result != 0) { - cl_page_delete0(env, page, 0); + cl_page_delete0(env, page); cl_page_free(env, page); page = ERR_PTR(result); break; @@ -321,6 +171,7 @@ static struct cl_page *cl_page_alloc(const struct lu_env *env, } return page; } +EXPORT_SYMBOL(cl_page_alloc); /** * Returns a cl_page with index \a idx at the object \a o, and associated with @@ -333,16 +184,13 @@ static struct cl_page *cl_page_alloc(const struct lu_env *env, * * \see cl_object_find(), cl_lock_find() */ -static struct cl_page *cl_page_find0(const struct lu_env *env, - struct cl_object *o, - pgoff_t idx, struct page *vmpage, - enum cl_page_type type, - struct cl_page *parent) +struct cl_page *cl_page_find(const struct lu_env *env, + struct cl_object *o, + pgoff_t idx, struct page *vmpage, + enum cl_page_type type) { struct cl_page *page = NULL; - struct cl_page *ghost = NULL; struct cl_object_header *hdr; - int err; LASSERT(type == CPT_CACHEABLE || type == CPT_TRANSIENT); might_sleep(); @@ -368,120 +216,25 @@ static struct cl_page *cl_page_find0(const struct lu_env *env, * reference on it. */ page = cl_vmpage_page(vmpage, o); - PINVRNT(env, page, - ergo(page, - cl_page_vmpage(env, page) == vmpage && - (void *)radix_tree_lookup(&hdr->coh_tree, - idx) == page)); - } - if (page) - return page; + if (page) + return page; + } /* allocate and initialize cl_page */ page = cl_page_alloc(env, o, idx, vmpage, type); - if (IS_ERR(page)) - return page; - - if (type == CPT_TRANSIENT) { - if (parent) { - LASSERT(!page->cp_parent); - page->cp_parent = parent; - parent->cp_child = page; - } - return page; - } - - /* - * XXX optimization: use radix_tree_preload() here, and change tree - * gfp mask to GFP_KERNEL in cl_object_header_init(). - */ - spin_lock(&hdr->coh_page_guard); - err = radix_tree_insert(&hdr->coh_tree, idx, page); - if (err != 0) { - ghost = page; - /* - * Noted by Jay: a lock on \a vmpage protects cl_page_find() - * from this race, but - * - * 0. it's better to have cl_page interface "locally - * consistent" so that its correctness can be reasoned - * about without appealing to the (obscure world of) VM - * locking. - * - * 1. handling this race allows ->coh_tree to remain - * consistent even when VM locking is somehow busted, - * which is very useful during diagnosing and debugging. - */ - page = ERR_PTR(err); - CL_PAGE_DEBUG(D_ERROR, env, ghost, - "fail to insert into radix tree: %d\n", err); - } else { - if (parent) { - LASSERT(!page->cp_parent); - page->cp_parent = parent; - parent->cp_child = page; - } - hdr->coh_pages++; - } - spin_unlock(&hdr->coh_page_guard); - - if (unlikely(ghost)) { - cl_page_delete0(env, ghost, 0); - cl_page_free(env, ghost); - } return page; } - -struct cl_page *cl_page_find(const struct lu_env *env, struct cl_object *o, - pgoff_t idx, struct page *vmpage, - enum cl_page_type type) -{ - return cl_page_find0(env, o, idx, vmpage, type, NULL); -} EXPORT_SYMBOL(cl_page_find); -struct cl_page *cl_page_find_sub(const struct lu_env *env, struct cl_object *o, - pgoff_t idx, struct page *vmpage, - struct cl_page *parent) -{ - return cl_page_find0(env, o, idx, vmpage, parent->cp_type, parent); -} -EXPORT_SYMBOL(cl_page_find_sub); - static inline int cl_page_invariant(const struct cl_page *pg) { - struct cl_object_header *header; - struct cl_page *parent; - struct cl_page *child; - struct cl_io *owner; - /* * Page invariant is protected by a VM lock. */ LINVRNT(cl_page_is_vmlocked(NULL, pg)); - header = cl_object_header(pg->cp_obj); - parent = pg->cp_parent; - child = pg->cp_child; - owner = pg->cp_owner; - - return cl_page_in_use(pg) && - ergo(parent, parent->cp_child == pg) && - ergo(child, child->cp_parent == pg) && - ergo(child, pg->cp_obj != child->cp_obj) && - ergo(parent, pg->cp_obj != parent->cp_obj) && - ergo(owner && parent, - parent->cp_owner == pg->cp_owner->ci_parent) && - ergo(owner && child, child->cp_owner->ci_parent == owner) && - /* - * Either page is early in initialization (has neither child - * nor parent yet), or it is in the object radix tree. - */ - ergo(pg->cp_state < CPS_FREEING && pg->cp_type == CPT_CACHEABLE, - (void *)radix_tree_lookup(&header->coh_tree, - pg->cp_index) == pg || - (!child && !parent)); + return cl_page_in_use_noref(pg); } static void cl_page_state_set0(const struct lu_env *env, @@ -534,13 +287,9 @@ static void cl_page_state_set0(const struct lu_env *env, old = page->cp_state; PASSERT(env, page, allowed_transitions[old][state]); CL_PAGE_HEADER(D_TRACE, env, page, "%d -> %d\n", old, state); - for (; page; page = page->cp_child) { - PASSERT(env, page, page->cp_state == old); - PASSERT(env, page, - equi(state == CPS_OWNED, page->cp_owner)); - - cl_page_state_set_trust(page, state); - } + PASSERT(env, page, page->cp_state == old); + PASSERT(env, page, equi(state == CPS_OWNED, page->cp_owner)); + cl_page_state_set_trust(page, state); } static void cl_page_state_set(const struct lu_env *env, @@ -574,8 +323,6 @@ EXPORT_SYMBOL(cl_page_get); */ void cl_page_put(const struct lu_env *env, struct cl_page *page) { - PASSERT(env, page, atomic_read(&page->cp_ref) > !!page->cp_parent); - CL_PAGE_HEADER(D_TRACE, env, page, "%d\n", atomic_read(&page->cp_ref)); @@ -595,34 +342,10 @@ void cl_page_put(const struct lu_env *env, struct cl_page *page) EXPORT_SYMBOL(cl_page_put); /** - * Returns a VM page associated with a given cl_page. - */ -struct page *cl_page_vmpage(const struct lu_env *env, struct cl_page *page) -{ - const struct cl_page_slice *slice; - - /* - * Find uppermost layer with ->cpo_vmpage() method, and return its - * result. - */ - page = cl_page_top(page); - do { - list_for_each_entry(slice, &page->cp_layers, cpl_linkage) { - if (slice->cpl_ops->cpo_vmpage) - return slice->cpl_ops->cpo_vmpage(env, slice); - } - page = page->cp_child; - } while (page); - LBUG(); /* ->cpo_vmpage() has to be defined somewhere in the stack */ -} -EXPORT_SYMBOL(cl_page_vmpage); - -/** * Returns a cl_page associated with a VM page, and given cl_object. */ struct cl_page *cl_vmpage_page(struct page *vmpage, struct cl_object *obj) { - struct cl_page *top; struct cl_page *page; KLASSERT(PageLocked(vmpage)); @@ -633,36 +356,15 @@ struct cl_page *cl_vmpage_page(struct page *vmpage, struct cl_object *obj) * bottom-to-top pass. */ - /* - * This loop assumes that ->private points to the top-most page. This - * can be rectified easily. - */ - top = (struct cl_page *)vmpage->private; - if (!top) - return NULL; - - for (page = top; page; page = page->cp_child) { - if (cl_object_same(page->cp_obj, obj)) { - cl_page_get_trust(page); - break; - } + page = (struct cl_page *)vmpage->private; + if (page) { + cl_page_get_trust(page); + LASSERT(page->cp_type == CPT_CACHEABLE); } - LASSERT(ergo(page, page->cp_type == CPT_CACHEABLE)); return page; } EXPORT_SYMBOL(cl_vmpage_page); -/** - * Returns the top-page for a given page. - * - * \see cl_object_top(), cl_io_top() - */ -struct cl_page *cl_page_top(struct cl_page *page) -{ - return cl_page_top_trusted(page); -} -EXPORT_SYMBOL(cl_page_top); - const struct cl_page_slice *cl_page_at(const struct cl_page *page, const struct lu_device_type *dtype) { @@ -682,26 +384,43 @@ EXPORT_SYMBOL(cl_page_at); int (*__method)_proto; \ \ __result = 0; \ - __page = cl_page_top(__page); \ - do { \ - list_for_each_entry(__scan, &__page->cp_layers, \ - cpl_linkage) { \ - __method = *(void **)((char *)__scan->cpl_ops + \ - __op); \ - if (__method) { \ - __result = (*__method)(__env, __scan, \ - ## __VA_ARGS__); \ - if (__result != 0) \ - break; \ - } \ - } \ - __page = __page->cp_child; \ - } while (__page && __result == 0); \ + list_for_each_entry(__scan, &__page->cp_layers, cpl_linkage) { \ + __method = *(void **)((char *)__scan->cpl_ops + __op); \ + if (__method) { \ + __result = (*__method)(__env, __scan, ## __VA_ARGS__); \ + if (__result != 0) \ + break; \ + } \ + } \ if (__result > 0) \ __result = 0; \ __result; \ }) +#define CL_PAGE_INVOKE_REVERSE(_env, _page, _op, _proto, ...) \ +({ \ + const struct lu_env *__env = (_env); \ + struct cl_page *__page = (_page); \ + const struct cl_page_slice *__scan; \ + int __result; \ + ptrdiff_t __op = (_op); \ + int (*__method)_proto; \ + \ + __result = 0; \ + list_for_each_entry_reverse(__scan, &__page->cp_layers, \ + cpl_linkage) { \ + __method = *(void **)((char *)__scan->cpl_ops + __op); \ + if (__method) { \ + __result = (*__method)(__env, __scan, ## __VA_ARGS__); \ + if (__result != 0) \ + break; \ + } \ + } \ + if (__result > 0) \ + __result = 0; \ + __result; \ +}) + #define CL_PAGE_INVOID(_env, _page, _op, _proto, ...) \ do { \ const struct lu_env *__env = (_env); \ @@ -710,18 +429,11 @@ do { \ ptrdiff_t __op = (_op); \ void (*__method)_proto; \ \ - __page = cl_page_top(__page); \ - do { \ - list_for_each_entry(__scan, &__page->cp_layers, \ - cpl_linkage) { \ - __method = *(void **)((char *)__scan->cpl_ops + \ - __op); \ - if (__method) \ - (*__method)(__env, __scan, \ - ## __VA_ARGS__); \ - } \ - __page = __page->cp_child; \ - } while (__page); \ + list_for_each_entry(__scan, &__page->cp_layers, cpl_linkage) { \ + __method = *(void **)((char *)__scan->cpl_ops + __op); \ + if (__method) \ + (*__method)(__env, __scan, ## __VA_ARGS__); \ + } \ } while (0) #define CL_PAGE_INVOID_REVERSE(_env, _page, _op, _proto, ...) \ @@ -732,20 +444,11 @@ do { \ ptrdiff_t __op = (_op); \ void (*__method)_proto; \ \ - /* get to the bottom page. */ \ - while (__page->cp_child) \ - __page = __page->cp_child; \ - do { \ - list_for_each_entry_reverse(__scan, &__page->cp_layers, \ - cpl_linkage) { \ - __method = *(void **)((char *)__scan->cpl_ops + \ - __op); \ - if (__method) \ - (*__method)(__env, __scan, \ - ## __VA_ARGS__); \ - } \ - __page = __page->cp_parent; \ - } while (__page); \ + list_for_each_entry_reverse(__scan, &__page->cp_layers, cpl_linkage) { \ + __method = *(void **)((char *)__scan->cpl_ops + __op); \ + if (__method) \ + (*__method)(__env, __scan, ## __VA_ARGS__); \ + } \ } while (0) static int cl_page_invoke(const struct lu_env *env, @@ -771,20 +474,17 @@ static void cl_page_invoid(const struct lu_env *env, static void cl_page_owner_clear(struct cl_page *page) { - for (page = cl_page_top(page); page; page = page->cp_child) { - if (page->cp_owner) { - LASSERT(page->cp_owner->ci_owned_nr > 0); - page->cp_owner->ci_owned_nr--; - page->cp_owner = NULL; - page->cp_task = NULL; - } + if (page->cp_owner) { + LASSERT(page->cp_owner->ci_owned_nr > 0); + page->cp_owner->ci_owned_nr--; + page->cp_owner = NULL; + page->cp_task = NULL; } } static void cl_page_owner_set(struct cl_page *page) { - for (page = cl_page_top(page); page; page = page->cp_child) - page->cp_owner->ci_owned_nr++; + page->cp_owner->ci_owned_nr++; } void cl_page_disown0(const struct lu_env *env, @@ -794,7 +494,7 @@ void cl_page_disown0(const struct lu_env *env, state = pg->cp_state; PINVRNT(env, pg, state == CPS_OWNED || state == CPS_FREEING); - PINVRNT(env, pg, cl_page_invariant(pg)); + PINVRNT(env, pg, cl_page_invariant(pg) || state == CPS_FREEING); cl_page_owner_clear(pg); if (state == CPS_OWNED) @@ -815,8 +515,9 @@ void cl_page_disown0(const struct lu_env *env, */ int cl_page_is_owned(const struct cl_page *pg, const struct cl_io *io) { + struct cl_io *top = cl_io_top((struct cl_io *)io); LINVRNT(cl_object_same(pg->cp_obj, io->ci_obj)); - return pg->cp_state == CPS_OWNED && pg->cp_owner == io; + return pg->cp_state == CPS_OWNED && pg->cp_owner == top; } EXPORT_SYMBOL(cl_page_is_owned); @@ -847,7 +548,6 @@ static int cl_page_own0(const struct lu_env *env, struct cl_io *io, PINVRNT(env, pg, !cl_page_is_owned(pg, io)); - pg = cl_page_top(pg); io = cl_io_top(io); if (pg->cp_state == CPS_FREEING) { @@ -861,7 +561,7 @@ static int cl_page_own0(const struct lu_env *env, struct cl_io *io, if (result == 0) { PASSERT(env, pg, !pg->cp_owner); PASSERT(env, pg, !pg->cp_req); - pg->cp_owner = io; + pg->cp_owner = cl_io_top(io); pg->cp_task = current; cl_page_owner_set(pg); if (pg->cp_state != CPS_FREEING) { @@ -914,12 +614,11 @@ void cl_page_assume(const struct lu_env *env, { PINVRNT(env, pg, cl_object_same(pg->cp_obj, io->ci_obj)); - pg = cl_page_top(pg); io = cl_io_top(io); cl_page_invoid(env, io, pg, CL_PAGE_OP(cpo_assume)); PASSERT(env, pg, !pg->cp_owner); - pg->cp_owner = io; + pg->cp_owner = cl_io_top(io); pg->cp_task = current; cl_page_owner_set(pg); cl_page_state_set(env, pg, CPS_OWNED); @@ -943,7 +642,6 @@ void cl_page_unassume(const struct lu_env *env, PINVRNT(env, pg, cl_page_is_owned(pg, io)); PINVRNT(env, pg, cl_page_invariant(pg)); - pg = cl_page_top(pg); io = cl_io_top(io); cl_page_owner_clear(pg); cl_page_state_set(env, pg, CPS_CACHED); @@ -968,9 +666,9 @@ EXPORT_SYMBOL(cl_page_unassume); void cl_page_disown(const struct lu_env *env, struct cl_io *io, struct cl_page *pg) { - PINVRNT(env, pg, cl_page_is_owned(pg, io)); + PINVRNT(env, pg, cl_page_is_owned(pg, io) || + pg->cp_state == CPS_FREEING); - pg = cl_page_top(pg); io = cl_io_top(io); cl_page_disown0(env, io, pg); } @@ -1001,12 +699,8 @@ EXPORT_SYMBOL(cl_page_discard); * pages, e.g,. in a error handling cl_page_find()->cl_page_delete0() * path. Doesn't check page invariant. */ -static void cl_page_delete0(const struct lu_env *env, struct cl_page *pg, - int radix) +static void cl_page_delete0(const struct lu_env *env, struct cl_page *pg) { - struct cl_page *tmp = pg; - - PASSERT(env, pg, pg == cl_page_top(pg)); PASSERT(env, pg, pg->cp_state != CPS_FREEING); /* @@ -1014,41 +708,11 @@ static void cl_page_delete0(const struct lu_env *env, struct cl_page *pg, */ cl_page_owner_clear(pg); - /* - * unexport the page firstly before freeing it so that - * the page content is considered to be invalid. - * We have to do this because a CPS_FREEING cl_page may - * be NOT under the protection of a cl_lock. - * Afterwards, if this page is found by other threads, then this - * page will be forced to reread. - */ - cl_page_export(env, pg, 0); cl_page_state_set0(env, pg, CPS_FREEING); - CL_PAGE_INVOID(env, pg, CL_PAGE_OP(cpo_delete), - (const struct lu_env *, const struct cl_page_slice *)); - - if (tmp->cp_type == CPT_CACHEABLE) { - if (!radix) - /* !radix means that @pg is not yet in the radix tree, - * skip removing it. - */ - tmp = pg->cp_child; - for (; tmp; tmp = tmp->cp_child) { - void *value; - struct cl_object_header *hdr; - - hdr = cl_object_header(tmp->cp_obj); - spin_lock(&hdr->coh_page_guard); - value = radix_tree_delete(&hdr->coh_tree, - tmp->cp_index); - PASSERT(env, tmp, value == tmp); - PASSERT(env, tmp, hdr->coh_pages > 0); - hdr->coh_pages--; - spin_unlock(&hdr->coh_page_guard); - cl_page_put(env, tmp); - } - } + CL_PAGE_INVOID_REVERSE(env, pg, CL_PAGE_OP(cpo_delete), + (const struct lu_env *, + const struct cl_page_slice *)); } /** @@ -1070,7 +734,6 @@ static void cl_page_delete0(const struct lu_env *env, struct cl_page *pg, * Once page reaches cl_page_state::CPS_FREEING, all remaining references will * drain after some time, at which point page will be recycled. * - * \pre pg == cl_page_top(pg) * \pre VM page is locked * \post pg->cp_state == CPS_FREEING * @@ -1079,30 +742,11 @@ static void cl_page_delete0(const struct lu_env *env, struct cl_page *pg, void cl_page_delete(const struct lu_env *env, struct cl_page *pg) { PINVRNT(env, pg, cl_page_invariant(pg)); - cl_page_delete0(env, pg, 1); + cl_page_delete0(env, pg); } EXPORT_SYMBOL(cl_page_delete); /** - * Unmaps page from user virtual memory. - * - * Calls cl_page_operations::cpo_unmap() through all layers top-to-bottom. The - * layer responsible for VM interaction has to unmap page from user space - * virtual memory. - * - * \see cl_page_operations::cpo_unmap() - */ -int cl_page_unmap(const struct lu_env *env, - struct cl_io *io, struct cl_page *pg) -{ - PINVRNT(env, pg, cl_page_is_owned(pg, io)); - PINVRNT(env, pg, cl_page_invariant(pg)); - - return cl_page_invoke(env, io, pg, CL_PAGE_OP(cpo_unmap)); -} -EXPORT_SYMBOL(cl_page_unmap); - -/** * Marks page up-to-date. * * Call cl_page_operations::cpo_export() through all layers top-to-bottom. The @@ -1129,7 +773,6 @@ int cl_page_is_vmlocked(const struct lu_env *env, const struct cl_page *pg) int result; const struct cl_page_slice *slice; - pg = cl_page_top_trusted((struct cl_page *)pg); slice = container_of(pg->cp_layers.next, const struct cl_page_slice, cpl_linkage); PASSERT(env, pg, slice->cpl_ops->cpo_is_vmlocked); @@ -1241,7 +884,7 @@ void cl_page_completion(const struct lu_env *env, cl_page_put(env, pg); if (anchor) - cl_sync_io_note(anchor, ioret); + cl_sync_io_note(env, anchor, ioret); } EXPORT_SYMBOL(cl_page_completion); @@ -1276,44 +919,6 @@ int cl_page_make_ready(const struct lu_env *env, struct cl_page *pg, EXPORT_SYMBOL(cl_page_make_ready); /** - * Notify layers that high level io decided to place this page into a cache - * for future transfer. - * - * The layer implementing transfer engine (osc) has to register this page in - * its queues. - * - * \pre cl_page_is_owned(pg, io) - * \post cl_page_is_owned(pg, io) - * - * \see cl_page_operations::cpo_cache_add() - */ -int cl_page_cache_add(const struct lu_env *env, struct cl_io *io, - struct cl_page *pg, enum cl_req_type crt) -{ - const struct cl_page_slice *scan; - int result = 0; - - PINVRNT(env, pg, crt < CRT_NR); - PINVRNT(env, pg, cl_page_is_owned(pg, io)); - PINVRNT(env, pg, cl_page_invariant(pg)); - - if (crt >= CRT_NR) - return -EINVAL; - - list_for_each_entry(scan, &pg->cp_layers, cpl_linkage) { - if (!scan->cpl_ops->io[crt].cpo_cache_add) - continue; - - result = scan->cpl_ops->io[crt].cpo_cache_add(env, scan, io); - if (result != 0) - break; - } - CL_PAGE_HEADER(D_TRACE, env, pg, "%d %d\n", crt, result); - return result; -} -EXPORT_SYMBOL(cl_page_cache_add); - -/** * Called if a pge is being written back by kernel's intention. * * \pre cl_page_is_owned(pg, io) @@ -1344,68 +949,21 @@ EXPORT_SYMBOL(cl_page_flush); * \see cl_page_operations::cpo_is_under_lock() */ int cl_page_is_under_lock(const struct lu_env *env, struct cl_io *io, - struct cl_page *page) + struct cl_page *page, pgoff_t *max_index) { int rc; PINVRNT(env, page, cl_page_invariant(page)); - rc = CL_PAGE_INVOKE(env, page, CL_PAGE_OP(cpo_is_under_lock), - (const struct lu_env *, - const struct cl_page_slice *, struct cl_io *), - io); - PASSERT(env, page, rc != 0); + rc = CL_PAGE_INVOKE_REVERSE(env, page, CL_PAGE_OP(cpo_is_under_lock), + (const struct lu_env *, + const struct cl_page_slice *, + struct cl_io *, pgoff_t *), + io, max_index); return rc; } EXPORT_SYMBOL(cl_page_is_under_lock); -static int page_prune_cb(const struct lu_env *env, struct cl_io *io, - struct cl_page *page, void *cbdata) -{ - cl_page_own(env, io, page); - cl_page_unmap(env, io, page); - cl_page_discard(env, io, page); - cl_page_disown(env, io, page); - return CLP_GANG_OKAY; -} - -/** - * Purges all cached pages belonging to the object \a obj. - */ -int cl_pages_prune(const struct lu_env *env, struct cl_object *clobj) -{ - struct cl_thread_info *info; - struct cl_object *obj = cl_object_top(clobj); - struct cl_io *io; - int result; - - info = cl_env_info(env); - io = &info->clt_io; - - /* - * initialize the io. This is ugly since we never do IO in this - * function, we just make cl_page_list functions happy. -jay - */ - io->ci_obj = obj; - io->ci_ignore_layout = 1; - result = cl_io_init(env, io, CIT_MISC, obj); - if (result != 0) { - cl_io_fini(env, io); - return io->ci_result; - } - - do { - result = cl_page_gang_lookup(env, obj, io, 0, CL_PAGE_EOF, - page_prune_cb, NULL); - if (result == CLP_GANG_RESCHED) - cond_resched(); - } while (result != CLP_GANG_OKAY); - - cl_io_fini(env, io); - return result; -} -EXPORT_SYMBOL(cl_pages_prune); - /** * Tells transfer engine that only part of a page is to be transmitted. * @@ -1431,9 +989,8 @@ void cl_page_header_print(const struct lu_env *env, void *cookie, lu_printer_t printer, const struct cl_page *pg) { (*printer)(env, cookie, - "page@%p[%d %p:%lu ^%p_%p %d %d %d %p %p %#x]\n", + "page@%p[%d %p %d %d %d %p %p %#x]\n", pg, atomic_read(&pg->cp_ref), pg->cp_obj, - pg->cp_index, pg->cp_parent, pg->cp_child, pg->cp_state, pg->cp_error, pg->cp_type, pg->cp_owner, pg->cp_req, pg->cp_flags); } @@ -1445,11 +1002,7 @@ EXPORT_SYMBOL(cl_page_header_print); void cl_page_print(const struct lu_env *env, void *cookie, lu_printer_t printer, const struct cl_page *pg) { - struct cl_page *scan; - - for (scan = cl_page_top((struct cl_page *)pg); scan; - scan = scan->cp_child) - cl_page_header_print(env, cookie, printer, scan); + cl_page_header_print(env, cookie, printer, pg); CL_PAGE_INVOKE(env, (struct cl_page *)pg, CL_PAGE_OP(cpo_print), (const struct lu_env *env, const struct cl_page_slice *slice, @@ -1509,21 +1062,59 @@ EXPORT_SYMBOL(cl_page_size); * \see cl_lock_slice_add(), cl_req_slice_add(), cl_io_slice_add() */ void cl_page_slice_add(struct cl_page *page, struct cl_page_slice *slice, - struct cl_object *obj, + struct cl_object *obj, pgoff_t index, const struct cl_page_operations *ops) { list_add_tail(&slice->cpl_linkage, &page->cp_layers); slice->cpl_obj = obj; + slice->cpl_index = index; slice->cpl_ops = ops; slice->cpl_page = page; } EXPORT_SYMBOL(cl_page_slice_add); -int cl_page_init(void) +/** + * Allocate and initialize cl_cache, called by ll_init_sbi(). + */ +struct cl_client_cache *cl_cache_init(unsigned long lru_page_max) { - return 0; + struct cl_client_cache *cache = NULL; + + cache = kzalloc(sizeof(*cache), GFP_KERNEL); + if (!cache) + return NULL; + + /* Initialize cache data */ + atomic_set(&cache->ccc_users, 1); + cache->ccc_lru_max = lru_page_max; + atomic_set(&cache->ccc_lru_left, lru_page_max); + spin_lock_init(&cache->ccc_lru_lock); + INIT_LIST_HEAD(&cache->ccc_lru); + + atomic_set(&cache->ccc_unstable_nr, 0); + init_waitqueue_head(&cache->ccc_unstable_waitq); + + return cache; +} +EXPORT_SYMBOL(cl_cache_init); + +/** + * Increase cl_cache refcount + */ +void cl_cache_incref(struct cl_client_cache *cache) +{ + atomic_inc(&cache->ccc_users); } +EXPORT_SYMBOL(cl_cache_incref); -void cl_page_fini(void) +/** + * Decrease cl_cache refcount and free the cache if refcount=0. + * Since llite, lov and osc all hold cl_cache refcount, + * the free will not cause race. (LU-6173) + */ +void cl_cache_decref(struct cl_client_cache *cache) { + if (atomic_dec_and_test(&cache->ccc_users)) + kfree(cache); } +EXPORT_SYMBOL(cl_cache_decref); diff --git a/drivers/staging/lustre/lustre/obdclass/class_obd.c b/drivers/staging/lustre/lustre/obdclass/class_obd.c index c2cf015962dd..d9d2a1952b8b 100644 --- a/drivers/staging/lustre/lustre/obdclass/class_obd.c +++ b/drivers/staging/lustre/lustre/obdclass/class_obd.c @@ -15,11 +15,7 @@ * * You should have received a copy of the GNU General Public License * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. + * http://www.gnu.org/licenses/gpl-2.0.html * * GPL HEADER END */ @@ -60,6 +56,8 @@ unsigned int obd_dump_on_eviction; EXPORT_SYMBOL(obd_dump_on_eviction); unsigned int obd_max_dirty_pages = 256; EXPORT_SYMBOL(obd_max_dirty_pages); +atomic_t obd_unstable_pages; +EXPORT_SYMBOL(obd_unstable_pages); atomic_t obd_dirty_pages; EXPORT_SYMBOL(obd_dirty_pages); unsigned int obd_timeout = OBD_TIMEOUT_DEFAULT; /* seconds */ @@ -335,7 +333,6 @@ int class_handle_ioctl(unsigned int cmd, unsigned long arg) err = 0; goto out; } - } if (data->ioc_dev == OBD_DEV_BY_DEVNAME) { @@ -461,7 +458,7 @@ static int obd_init_checks(void) CWARN("LPD64 wrong length! strlen(%s)=%d != 2\n", buf, len); ret = -EINVAL; } - if ((u64val & ~CFS_PAGE_MASK) >= PAGE_SIZE) { + if ((u64val & ~PAGE_MASK) >= PAGE_SIZE) { CWARN("mask failed: u64val %llu >= %llu\n", u64val, (__u64)PAGE_SIZE); ret = -EINVAL; diff --git a/drivers/staging/lustre/lustre/obdclass/debug.c b/drivers/staging/lustre/lustre/obdclass/debug.c index 43a7f7a79b35..8acf67239fa8 100644 --- a/drivers/staging/lustre/lustre/obdclass/debug.c +++ b/drivers/staging/lustre/lustre/obdclass/debug.c @@ -15,11 +15,7 @@ * * You should have received a copy of the GNU General Public License * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. + * http://www.gnu.org/licenses/gpl-2.0.html * * GPL HEADER END */ @@ -68,8 +64,8 @@ int block_debug_check(char *who, void *addr, int end, __u64 off, __u64 id) LASSERT(addr); - ne_off = le64_to_cpu (off); - id = le64_to_cpu (id); + ne_off = le64_to_cpu(off); + id = le64_to_cpu(id); if (memcmp(addr, (char *)&ne_off, LPDS)) { CDEBUG(D_ERROR, "%s: id %#llx offset %llu off: %#llx != %#llx\n", who, id, off, *(__u64 *)addr, ne_off); diff --git a/drivers/staging/lustre/lustre/obdclass/genops.c b/drivers/staging/lustre/lustre/obdclass/genops.c index cf97b8f06764..99c2da632b51 100644 --- a/drivers/staging/lustre/lustre/obdclass/genops.c +++ b/drivers/staging/lustre/lustre/obdclass/genops.c @@ -15,11 +15,7 @@ * * You should have received a copy of the GNU General Public License * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. + * http://www.gnu.org/licenses/gpl-2.0.html * * GPL HEADER END */ @@ -604,7 +600,6 @@ int obd_init_caches(void) out: obd_cleanup_caches(); return -ENOMEM; - } /* map connection to client */ diff --git a/drivers/staging/lustre/lustre/obdclass/kernelcomm.c b/drivers/staging/lustre/lustre/obdclass/kernelcomm.c index 8405eccdac19..a0f65c470f4d 100644 --- a/drivers/staging/lustre/lustre/obdclass/kernelcomm.c +++ b/drivers/staging/lustre/lustre/obdclass/kernelcomm.c @@ -15,11 +15,7 @@ * * You should have received a copy of the GNU General Public License * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. + * http://www.gnu.org/licenses/gpl-2.0.html * * GPL HEADER END */ diff --git a/drivers/staging/lustre/lustre/obdclass/linux/linux-module.c b/drivers/staging/lustre/lustre/obdclass/linux/linux-module.c index 8eddf206f1ed..33342bfcc90e 100644 --- a/drivers/staging/lustre/lustre/obdclass/linux/linux-module.c +++ b/drivers/staging/lustre/lustre/obdclass/linux/linux-module.c @@ -15,11 +15,7 @@ * * You should have received a copy of the GNU General Public License * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. + * http://www.gnu.org/licenses/gpl-2.0.html * * GPL HEADER END */ @@ -158,9 +154,7 @@ int obd_ioctl_popdata(void __user *arg, void *data, int len) { int err; - err = copy_to_user(arg, data, len); - if (err) - err = -EFAULT; + err = copy_to_user(arg, data, len) ? -EFAULT : 0; return err; } EXPORT_SYMBOL(obd_ioctl_popdata); diff --git a/drivers/staging/lustre/lustre/obdclass/linux/linux-obdo.c b/drivers/staging/lustre/lustre/obdclass/linux/linux-obdo.c index b41b65e2f021..c6cc6a7666e3 100644 --- a/drivers/staging/lustre/lustre/obdclass/linux/linux-obdo.c +++ b/drivers/staging/lustre/lustre/obdclass/linux/linux-obdo.c @@ -15,11 +15,7 @@ * * You should have received a copy of the GNU General Public License * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. + * http://www.gnu.org/licenses/gpl-2.0.html * * GPL HEADER END */ diff --git a/drivers/staging/lustre/lustre/obdclass/linux/linux-sysctl.c b/drivers/staging/lustre/lustre/obdclass/linux/linux-sysctl.c index e6bf414a4444..8f70dd2686f9 100644 --- a/drivers/staging/lustre/lustre/obdclass/linux/linux-sysctl.c +++ b/drivers/staging/lustre/lustre/obdclass/linux/linux-sysctl.c @@ -15,11 +15,7 @@ * * You should have received a copy of the GNU General Public License * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. + * http://www.gnu.org/licenses/gpl-2.0.html * * GPL HEADER END */ diff --git a/drivers/staging/lustre/lustre/obdclass/llog.c b/drivers/staging/lustre/lustre/obdclass/llog.c index 992573eae1b1..1784ca063428 100644 --- a/drivers/staging/lustre/lustre/obdclass/llog.c +++ b/drivers/staging/lustre/lustre/obdclass/llog.c @@ -15,11 +15,7 @@ * * You should have received a copy of the GNU General Public License * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. + * http://www.gnu.org/licenses/gpl-2.0.html * * GPL HEADER END */ @@ -123,8 +119,10 @@ static int llog_read_header(const struct lu_env *env, handle->lgh_last_idx = 0; /* header is record with index 0 */ llh->llh_count = 1; /* for the header record */ llh->llh_hdr.lrh_type = LLOG_HDR_MAGIC; - llh->llh_hdr.lrh_len = llh->llh_tail.lrt_len = LLOG_CHUNK_SIZE; - llh->llh_hdr.lrh_index = llh->llh_tail.lrt_index = 0; + llh->llh_hdr.lrh_len = LLOG_CHUNK_SIZE; + llh->llh_tail.lrt_len = LLOG_CHUNK_SIZE; + llh->llh_hdr.lrh_index = 0; + llh->llh_tail.lrt_index = 0; llh->llh_timestamp = ktime_get_real_seconds(); if (uuid) memcpy(&llh->llh_tgtuuid, uuid, @@ -265,7 +263,6 @@ repeat: for (rec = (struct llog_rec_hdr *)buf; (char *)rec < buf + LLOG_CHUNK_SIZE; rec = (struct llog_rec_hdr *)((char *)rec + rec->lrh_len)) { - CDEBUG(D_OTHER, "processing rec 0x%p type %#x\n", rec, rec->lrh_type); diff --git a/drivers/staging/lustre/lustre/obdclass/llog_cat.c b/drivers/staging/lustre/lustre/obdclass/llog_cat.c index c27d4ec1df9e..a82a2950295a 100644 --- a/drivers/staging/lustre/lustre/obdclass/llog_cat.c +++ b/drivers/staging/lustre/lustre/obdclass/llog_cat.c @@ -15,11 +15,7 @@ * * You should have received a copy of the GNU General Public License * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. + * http://www.gnu.org/licenses/gpl-2.0.html * * GPL HEADER END */ diff --git a/drivers/staging/lustre/lustre/obdclass/llog_internal.h b/drivers/staging/lustre/lustre/obdclass/llog_internal.h index 7fb48dda355e..f7949525d952 100644 --- a/drivers/staging/lustre/lustre/obdclass/llog_internal.h +++ b/drivers/staging/lustre/lustre/obdclass/llog_internal.h @@ -15,11 +15,7 @@ * * You should have received a copy of the GNU General Public License * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. + * http://www.gnu.org/licenses/gpl-2.0.html * * GPL HEADER END */ diff --git a/drivers/staging/lustre/lustre/obdclass/llog_obd.c b/drivers/staging/lustre/lustre/obdclass/llog_obd.c index 826623f528da..6ace7e097859 100644 --- a/drivers/staging/lustre/lustre/obdclass/llog_obd.c +++ b/drivers/staging/lustre/lustre/obdclass/llog_obd.c @@ -15,11 +15,7 @@ * * You should have received a copy of the GNU General Public License * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. + * http://www.gnu.org/licenses/gpl-2.0.html * * GPL HEADER END */ diff --git a/drivers/staging/lustre/lustre/obdclass/llog_swab.c b/drivers/staging/lustre/lustre/obdclass/llog_swab.c index 967ba2e1bfcb..f7b9b190350c 100644 --- a/drivers/staging/lustre/lustre/obdclass/llog_swab.c +++ b/drivers/staging/lustre/lustre/obdclass/llog_swab.c @@ -15,11 +15,7 @@ * * You should have received a copy of the GNU General Public License * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. + * http://www.gnu.org/licenses/gpl-2.0.html * * GPL HEADER END */ diff --git a/drivers/staging/lustre/lustre/obdclass/lprocfs_status.c b/drivers/staging/lustre/lustre/obdclass/lprocfs_status.c index d93f42fee420..279b625f1afe 100644 --- a/drivers/staging/lustre/lustre/obdclass/lprocfs_status.c +++ b/drivers/staging/lustre/lustre/obdclass/lprocfs_status.c @@ -15,11 +15,7 @@ * * You should have received a copy of the GNU General Public License * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. + * http://www.gnu.org/licenses/gpl-2.0.html * * GPL HEADER END */ @@ -49,7 +45,7 @@ static const char * const obd_connect_names[] = { "read_only", "lov_index", - "unused", + "connect_from_mds", "write_grant", "server_lock", "version", @@ -122,6 +118,56 @@ int obd_connect_flags2str(char *page, int count, __u64 flags, char *sep) } EXPORT_SYMBOL(obd_connect_flags2str); +static void obd_connect_data_seqprint(struct seq_file *m, + struct obd_connect_data *ocd) +{ + int flags; + + LASSERT(ocd); + flags = ocd->ocd_connect_flags; + + seq_printf(m, " connect_data:\n" + " flags: %llx\n" + " instance: %u\n", + ocd->ocd_connect_flags, + ocd->ocd_instance); + if (flags & OBD_CONNECT_VERSION) + seq_printf(m, " target_version: %u.%u.%u.%u\n", + OBD_OCD_VERSION_MAJOR(ocd->ocd_version), + OBD_OCD_VERSION_MINOR(ocd->ocd_version), + OBD_OCD_VERSION_PATCH(ocd->ocd_version), + OBD_OCD_VERSION_FIX(ocd->ocd_version)); + if (flags & OBD_CONNECT_MDS) + seq_printf(m, " mdt_index: %d\n", ocd->ocd_group); + if (flags & OBD_CONNECT_GRANT) + seq_printf(m, " initial_grant: %d\n", ocd->ocd_grant); + if (flags & OBD_CONNECT_INDEX) + seq_printf(m, " target_index: %u\n", ocd->ocd_index); + if (flags & OBD_CONNECT_BRW_SIZE) + seq_printf(m, " max_brw_size: %d\n", ocd->ocd_brw_size); + if (flags & OBD_CONNECT_IBITS) + seq_printf(m, " ibits_known: %llx\n", + ocd->ocd_ibits_known); + if (flags & OBD_CONNECT_GRANT_PARAM) + seq_printf(m, " grant_block_size: %d\n" + " grant_inode_size: %d\n" + " grant_extent_overhead: %d\n", + ocd->ocd_blocksize, + ocd->ocd_inodespace, + ocd->ocd_grant_extent); + if (flags & OBD_CONNECT_TRANSNO) + seq_printf(m, " first_transno: %llx\n", + ocd->ocd_transno); + if (flags & OBD_CONNECT_CKSUM) + seq_printf(m, " cksum_types: %#x\n", + ocd->ocd_cksum_types); + if (flags & OBD_CONNECT_MAX_EASIZE) + seq_printf(m, " max_easize: %d\n", ocd->ocd_max_easize); + if (flags & OBD_CONNECT_MAXBYTES) + seq_printf(m, " max_object_bytes: %llx\n", + ocd->ocd_maxbytes); +} + int lprocfs_read_frac_helper(char *buffer, unsigned long count, long val, int mult) { @@ -624,6 +670,7 @@ int lprocfs_rd_import(struct seq_file *m, void *data) struct obd_device *obd = data; struct obd_import *imp; struct obd_import_conn *conn; + struct obd_connect_data *ocd; int j; int k; int rw = 0; @@ -635,9 +682,9 @@ int lprocfs_rd_import(struct seq_file *m, void *data) return rc; imp = obd->u.cli.cl_import; + ocd = &imp->imp_connect_data; - seq_printf(m, - "import:\n" + seq_printf(m, "import:\n" " name: %s\n" " target: %s\n" " state: %s\n" @@ -649,9 +696,9 @@ int lprocfs_rd_import(struct seq_file *m, void *data) imp->imp_connect_data.ocd_instance); obd_connect_seq_flags2str(m, imp->imp_connect_data.ocd_connect_flags, ", "); - seq_printf(m, - " ]\n" - " import_flags: [ "); + seq_printf(m, " ]\n"); + obd_connect_data_seqprint(m, ocd); + seq_printf(m, " import_flags: [ "); obd_import_flags2str(imp, m); seq_printf(m, @@ -694,8 +741,9 @@ int lprocfs_rd_import(struct seq_file *m, void *data) do_div(sum, ret.lc_count); ret.lc_sum = sum; - } else + } else { ret.lc_sum = 0; + } seq_printf(m, " rpcs:\n" " inflight: %u\n" @@ -1471,10 +1519,10 @@ EXPORT_SYMBOL(lprocfs_oh_tally); void lprocfs_oh_tally_log2(struct obd_histogram *oh, unsigned int value) { - unsigned int val; + unsigned int val = 0; - for (val = 0; ((1 << val) < value) && (val <= OBD_HIST_MAX); val++) - ; + if (likely(value != 0)) + val = min(fls(value - 1), OBD_HIST_MAX); lprocfs_oh_tally(oh, val); } diff --git a/drivers/staging/lustre/lustre/obdclass/lu_object.c b/drivers/staging/lustre/lustre/obdclass/lu_object.c index 978568ada8e9..9b03059f34d6 100644 --- a/drivers/staging/lustre/lustre/obdclass/lu_object.c +++ b/drivers/staging/lustre/lustre/obdclass/lu_object.c @@ -15,11 +15,7 @@ * * You should have received a copy of the GNU General Public License * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. + * http://www.gnu.org/licenses/gpl-2.0.html * * GPL HEADER END */ @@ -55,6 +51,7 @@ #include "../include/lustre_disk.h" #include "../include/lustre_fid.h" #include "../include/lu_object.h" +#include "../include/cl_object.h" #include "../include/lu_ref.h" #include <linux/list.h> @@ -103,7 +100,6 @@ void lu_object_put(const struct lu_env *env, struct lu_object *o) if (!cfs_hash_bd_dec_and_lock(site->ls_obj_hash, &bd, &top->loh_ref)) { if (lu_object_is_dying(top)) { - /* * somebody may be waiting for this, currently only * used for cl_object, see cl_object_put_last(). @@ -357,7 +353,6 @@ int lu_site_purge(const struct lu_env *env, struct lu_site *s, int nr) if (count > 0 && --count == 0) break; - } cfs_hash_bd_unlock(s->ls_obj_hash, &bd, 1); cond_resched(); @@ -715,8 +710,9 @@ struct lu_object *lu_object_find_slice(const struct lu_env *env, obj = lu_object_locate(top->lo_header, dev->ld_type); if (!obj) lu_object_put(env, top); - } else + } else { obj = top; + } return obj; } EXPORT_SYMBOL(lu_object_find_slice); @@ -935,7 +931,7 @@ static void lu_dev_add_linkage(struct lu_site *s, struct lu_device *d) * Initialize site \a s, with \a d as the top level device. */ #define LU_SITE_BITS_MIN 12 -#define LU_SITE_BITS_MAX 24 +#define LU_SITE_BITS_MAX 19 /** * total 256 buckets, we don't want too many buckets because: * - consume too much memory @@ -1468,6 +1464,7 @@ void lu_context_key_quiesce(struct lu_context_key *key) /* * XXX layering violation. */ + cl_env_cache_purge(~0); key->lct_tags |= LCT_QUIESCENT; /* * XXX memory barrier has to go here. diff --git a/drivers/staging/lustre/lustre/obdclass/lu_ref.c b/drivers/staging/lustre/lustre/obdclass/lu_ref.c index 993697b660f6..e9f6040d19eb 100644 --- a/drivers/staging/lustre/lustre/obdclass/lu_ref.c +++ b/drivers/staging/lustre/lustre/obdclass/lu_ref.c @@ -15,11 +15,7 @@ * * You should have received a copy of the GNU General Public License * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. + * http://www.gnu.org/licenses/gpl-2.0.html * * GPL HEADER END */ diff --git a/drivers/staging/lustre/lustre/obdclass/lustre_handles.c b/drivers/staging/lustre/lustre/obdclass/lustre_handles.c index 403ceea06186..082f530c527c 100644 --- a/drivers/staging/lustre/lustre/obdclass/lustre_handles.c +++ b/drivers/staging/lustre/lustre/obdclass/lustre_handles.c @@ -15,11 +15,7 @@ * * You should have received a copy of the GNU General Public License * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. + * http://www.gnu.org/licenses/gpl-2.0.html * * GPL HEADER END */ diff --git a/drivers/staging/lustre/lustre/obdclass/lustre_peer.c b/drivers/staging/lustre/lustre/obdclass/lustre_peer.c index 5f812460b3ea..5974a9bf77c0 100644 --- a/drivers/staging/lustre/lustre/obdclass/lustre_peer.c +++ b/drivers/staging/lustre/lustre/obdclass/lustre_peer.c @@ -15,11 +15,7 @@ * * You should have received a copy of the GNU General Public License * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. + * http://www.gnu.org/licenses/gpl-2.0.html * * GPL HEADER END */ @@ -163,8 +159,9 @@ int class_del_uuid(const char *uuid) break; } } - } else + } else { list_splice_init(&g_uuid_list, &deathrow); + } spin_unlock(&g_uuid_lock); if (uuid && list_empty(&deathrow)) { diff --git a/drivers/staging/lustre/lustre/obdclass/obd_config.c b/drivers/staging/lustre/lustre/obdclass/obd_config.c index 5395e994deab..0eab1236501b 100644 --- a/drivers/staging/lustre/lustre/obdclass/obd_config.c +++ b/drivers/staging/lustre/lustre/obdclass/obd_config.c @@ -15,11 +15,7 @@ * * You should have received a copy of the GNU General Public License * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. + * http://www.gnu.org/licenses/gpl-2.0.html * * GPL HEADER END */ @@ -606,7 +602,7 @@ static int class_del_conn(struct obd_device *obd, struct lustre_cfg *lcfg) return rc; } -LIST_HEAD(lustre_profile_list); +static LIST_HEAD(lustre_profile_list); struct lustre_profile *class_get_profile(const char *prof) { @@ -961,7 +957,6 @@ int class_process_config(struct lustre_cfg *lcfg) default: { err = obd_process_config(obd, sizeof(*lcfg), lcfg); goto out; - } } out: @@ -1001,7 +996,13 @@ int class_process_proc_param(char *prefix, struct lprocfs_vars *lvars, for (i = 1; i < lcfg->lcfg_bufcount; i++) { key = lustre_cfg_buf(lcfg, i); /* Strip off prefix */ - class_match_param(key, prefix, &key); + if (class_match_param(key, prefix, &key)) { + /* + * If the prefix doesn't match, return error so we + * can pass it down the stack + */ + return -ENOSYS; + } sval = strchr(key, '='); if (!sval || (*(sval + 1) == 0)) { CERROR("Can't parse param %s (missing '=')\n", key); @@ -1016,8 +1017,8 @@ int class_process_proc_param(char *prefix, struct lprocfs_vars *lvars, /* Search proc entries */ while (lvars[j].name) { var = &lvars[j]; - if (!class_match_param(key, var->name, NULL) - && keylen == strlen(var->name)) { + if (!class_match_param(key, var->name, NULL) && + keylen == strlen(var->name)) { matched++; rc = -EROFS; if (var->fops && var->fops->write) { @@ -1034,18 +1035,14 @@ int class_process_proc_param(char *prefix, struct lprocfs_vars *lvars, j++; } if (!matched) { - /* If the prefix doesn't match, return error so we - * can pass it down the stack - */ - if (strnchr(key, keylen, '.')) - return -ENOSYS; - CERROR("%s: unknown param %s\n", + CERROR("%.*s: %s unknown param %s\n", + (int)strlen(prefix) - 1, prefix, (char *)lustre_cfg_string(lcfg, 0), key); /* rc = -EINVAL; continue parsing other params */ skip++; } else if (rc < 0) { - CERROR("writing proc entry %s err %d\n", - var->name, rc); + CERROR("%s: error writing proc entry '%s': rc = %d\n", + prefix, var->name, rc); rc = 0; } else { CDEBUG(D_CONFIG, "%s.%.*s: Set parameter %.*s=%s\n", @@ -1076,7 +1073,7 @@ int class_config_llog_handler(const struct lu_env *env, { struct config_llog_instance *clli = data; int cfg_len = rec->lrh_len; - char *cfg_buf = (char *) (rec + 1); + char *cfg_buf = (char *)(rec + 1); int rc = 0; switch (rec->lrh_type) { @@ -1350,6 +1347,7 @@ static int class_config_parse_rec(struct llog_rec_hdr *rec, char *buf, lustre_cfg_string(lcfg, i)); } } + ptr += snprintf(ptr, end - ptr, "\n"); /* return consumed bytes */ rc = ptr - buf; return rc; @@ -1368,7 +1366,7 @@ int class_config_dump_handler(const struct lu_env *env, if (rec->lrh_type == OBD_CFG_REC) { class_config_parse_rec(rec, outstr, 256); - LCONSOLE(D_WARNING, " %s\n", outstr); + LCONSOLE(D_WARNING, " %s", outstr); } else { LCONSOLE(D_WARNING, "unhandled lrh_type: %#x\n", rec->lrh_type); rc = -EINVAL; diff --git a/drivers/staging/lustre/lustre/obdclass/obd_mount.c b/drivers/staging/lustre/lustre/obdclass/obd_mount.c index d3e28a389ac1..aa84a50e9904 100644 --- a/drivers/staging/lustre/lustre/obdclass/obd_mount.c +++ b/drivers/staging/lustre/lustre/obdclass/obd_mount.c @@ -15,11 +15,7 @@ * * You should have received a copy of the GNU General Public License * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. + * http://www.gnu.org/licenses/gpl-2.0.html * * GPL HEADER END */ @@ -102,7 +98,7 @@ int lustre_process_log(struct super_block *sb, char *logname, LCONSOLE_ERROR_MSG(0x15b, "%s: The configuration from log '%s' failed from the MGS (%d). Make sure this client and the MGS are running compatible versions of Lustre.\n", mgc->obd_name, logname, rc); - if (rc) + else if (rc) LCONSOLE_ERROR_MSG(0x15c, "%s: The configuration from log '%s' failed (%d). This may be the result of communication errors between this node and the MGS, a bad configuration, or other errors. See the syslog for more information.\n", mgc->obd_name, logname, rc); @@ -192,7 +188,7 @@ static int lustre_start_simple(char *obdname, char *type, char *uuid, return rc; } -DEFINE_MUTEX(mgc_start_lock); +static DEFINE_MUTEX(mgc_start_lock); /** Set up a mgc obd to process startup logs * @@ -307,7 +303,8 @@ int lustre_start_mgc(struct super_block *sb) while (class_parse_nid(ptr, &nid, &ptr) == 0) { rc = do_lcfg(mgcname, nid, LCFG_ADD_UUID, niduuid, NULL, NULL, NULL); - i++; + if (!rc) + i++; /* Stop at the first failover nid */ if (*ptr == ':') break; @@ -345,16 +342,18 @@ int lustre_start_mgc(struct super_block *sb) sprintf(niduuid, "%s_%x", mgcname, i); j = 0; while (class_parse_nid_quiet(ptr, &nid, &ptr) == 0) { - j++; - rc = do_lcfg(mgcname, nid, - LCFG_ADD_UUID, niduuid, NULL, NULL, NULL); + rc = do_lcfg(mgcname, nid, LCFG_ADD_UUID, niduuid, + NULL, NULL, NULL); + if (!rc) + ++j; if (*ptr == ':') break; } if (j > 0) { rc = do_lcfg(mgcname, 0, LCFG_ADD_CONN, niduuid, NULL, NULL, NULL); - i++; + if (!rc) + i++; } else { /* at ":/fsname" */ break; diff --git a/drivers/staging/lustre/lustre/obdclass/obdo.c b/drivers/staging/lustre/lustre/obdclass/obdo.c index e6436cb4ac62..8583a4a8c206 100644 --- a/drivers/staging/lustre/lustre/obdclass/obdo.c +++ b/drivers/staging/lustre/lustre/obdclass/obdo.c @@ -15,11 +15,7 @@ * * You should have received a copy of the GNU General Public License * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. + * http://www.gnu.org/licenses/gpl-2.0.html * * GPL HEADER END */ @@ -185,8 +181,7 @@ void md_from_obdo(struct md_op_data *op_data, struct obdo *oa, u32 valid) op_data->op_attr.ia_valid |= ATTR_BLOCKS; } if (valid & OBD_MD_FLFLAGS) { - ((struct ll_iattr *)&op_data->op_attr)->ia_attr_flags = - oa->o_flags; + op_data->op_attr_flags = oa->o_flags; op_data->op_attr.ia_valid |= ATTR_ATTR_FLAG; } } diff --git a/drivers/staging/lustre/lustre/obdclass/statfs_pack.c b/drivers/staging/lustre/lustre/obdclass/statfs_pack.c index fb4e3ae845e0..4bad1fa27d40 100644 --- a/drivers/staging/lustre/lustre/obdclass/statfs_pack.c +++ b/drivers/staging/lustre/lustre/obdclass/statfs_pack.c @@ -15,11 +15,7 @@ * * You should have received a copy of the GNU General Public License * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. + * http://www.gnu.org/licenses/gpl-2.0.html * * GPL HEADER END */ diff --git a/drivers/staging/lustre/lustre/obdclass/uuid.c b/drivers/staging/lustre/lustre/obdclass/uuid.c index b0b0157a6334..abd9b1ae72cd 100644 --- a/drivers/staging/lustre/lustre/obdclass/uuid.c +++ b/drivers/staging/lustre/lustre/obdclass/uuid.c @@ -15,11 +15,7 @@ * * You should have received a copy of the GNU General Public License * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. + * http://www.gnu.org/licenses/gpl-2.0.html * * GPL HEADER END */ diff --git a/drivers/staging/lustre/lustre/obdecho/echo_client.c b/drivers/staging/lustre/lustre/obdecho/echo_client.c index 1e83669c204d..5b29c4a44fe5 100644 --- a/drivers/staging/lustre/lustre/obdecho/echo_client.c +++ b/drivers/staging/lustre/lustre/obdecho/echo_client.c @@ -15,11 +15,7 @@ * * You should have received a copy of the GNU General Public License * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. + * http://www.gnu.org/licenses/gpl-2.0.html * * GPL HEADER END */ @@ -81,7 +77,6 @@ struct echo_object_conf { struct echo_page { struct cl_page_slice ep_cl; struct mutex ep_lock; - struct page *ep_vmpage; }; struct echo_lock { @@ -164,15 +159,13 @@ static int cl_echo_object_put(struct echo_object *eco); static int cl_echo_object_brw(struct echo_object *eco, int rw, u64 offset, struct page **pages, int npages, int async); -static struct echo_thread_info *echo_env_info(const struct lu_env *env); - struct echo_thread_info { struct echo_object_conf eti_conf; struct lustre_md eti_md; struct cl_2queue eti_queue; struct cl_io eti_io; - struct cl_lock_descr eti_descr; + struct cl_lock eti_lock; struct lu_fid eti_fid; struct lu_fid eti_fid2; }; @@ -219,12 +212,6 @@ static struct lu_kmem_descr echo_caches[] = { * * @{ */ -static struct page *echo_page_vmpage(const struct lu_env *env, - const struct cl_page_slice *slice) -{ - return cl2echo_page(slice)->ep_vmpage; -} - static int echo_page_own(const struct lu_env *env, const struct cl_page_slice *slice, struct cl_io *io, int nonblock) @@ -273,12 +260,10 @@ static void echo_page_completion(const struct lu_env *env, static void echo_page_fini(const struct lu_env *env, struct cl_page_slice *slice) { - struct echo_page *ep = cl2echo_page(slice); struct echo_object *eco = cl2echo_obj(slice->cpl_obj); - struct page *vmpage = ep->ep_vmpage; atomic_dec(&eco->eo_npages); - put_page(vmpage); + put_page(slice->cpl_page->cp_vmpage); } static int echo_page_prep(const struct lu_env *env, @@ -295,7 +280,8 @@ static int echo_page_print(const struct lu_env *env, struct echo_page *ep = cl2echo_page(slice); (*printer)(env, cookie, LUSTRE_ECHO_CLIENT_NAME"-page@%p %d vm@%p\n", - ep, mutex_is_locked(&ep->ep_lock), ep->ep_vmpage); + ep, mutex_is_locked(&ep->ep_lock), + slice->cpl_page->cp_vmpage); return 0; } @@ -303,7 +289,6 @@ static const struct cl_page_operations echo_page_ops = { .cpo_own = echo_page_own, .cpo_disown = echo_page_disown, .cpo_discard = echo_page_discard, - .cpo_vmpage = echo_page_vmpage, .cpo_fini = echo_page_fini, .cpo_print = echo_page_print, .cpo_is_vmlocked = echo_page_is_vmlocked, @@ -336,26 +321,8 @@ static void echo_lock_fini(const struct lu_env *env, kmem_cache_free(echo_lock_kmem, ecl); } -static void echo_lock_delete(const struct lu_env *env, - const struct cl_lock_slice *slice) -{ - struct echo_lock *ecl = cl2echo_lock(slice); - - LASSERT(list_empty(&ecl->el_chain)); -} - -static int echo_lock_fits_into(const struct lu_env *env, - const struct cl_lock_slice *slice, - const struct cl_lock_descr *need, - const struct cl_io *unused) -{ - return 1; -} - static struct cl_lock_operations echo_lock_ops = { .clo_fini = echo_lock_fini, - .clo_delete = echo_lock_delete, - .clo_fits_into = echo_lock_fits_into }; /** @} echo_lock */ @@ -367,15 +334,14 @@ static struct cl_lock_operations echo_lock_ops = { * @{ */ static int echo_page_init(const struct lu_env *env, struct cl_object *obj, - struct cl_page *page, struct page *vmpage) + struct cl_page *page, pgoff_t index) { struct echo_page *ep = cl_object_page_slice(obj, page); struct echo_object *eco = cl2echo_obj(obj); - ep->ep_vmpage = vmpage; - get_page(vmpage); + get_page(page->cp_vmpage); mutex_init(&ep->ep_lock); - cl_page_slice_add(page, &ep->ep_cl, obj, &echo_page_ops); + cl_page_slice_add(page, &ep->ep_cl, obj, index, &echo_page_ops); atomic_inc(&eco->eo_npages); return 0; } @@ -568,6 +534,8 @@ static struct lu_object *echo_object_alloc(const struct lu_env *env, obj = &echo_obj2cl(eco)->co_lu; cl_object_header_init(hdr); + hdr->coh_page_bufsize = cfs_size_round(sizeof(struct cl_page)); + lu_object_init(obj, &hdr->coh_lu, dev); lu_object_add_top(&hdr->coh_lu, obj); @@ -694,8 +662,7 @@ static struct lu_device *echo_device_alloc(const struct lu_env *env, struct obd_device *obd = NULL; /* to keep compiler happy */ struct obd_device *tgt; const char *tgt_type_name; - int rc; - int cleanup = 0; + int rc, err; ed = kzalloc(sizeof(*ed), GFP_NOFS); if (!ed) { @@ -703,16 +670,14 @@ static struct lu_device *echo_device_alloc(const struct lu_env *env, goto out; } - cleanup = 1; cd = &ed->ed_cl; rc = cl_device_init(cd, t); if (rc) - goto out; + goto out_free; cd->cd_lu_dev.ld_ops = &echo_device_lu_ops; cd->cd_ops = &echo_device_cl_ops; - cleanup = 2; obd = class_name2obd(lustre_cfg_string(cfg, 0)); LASSERT(obd); LASSERT(env); @@ -722,28 +687,25 @@ static struct lu_device *echo_device_alloc(const struct lu_env *env, CERROR("Can not find tgt device %s\n", lustre_cfg_string(cfg, 1)); rc = -ENODEV; - goto out; + goto out_device_fini; } next = tgt->obd_lu_dev; if (!strcmp(tgt->obd_type->typ_name, LUSTRE_MDT_NAME)) { CERROR("echo MDT client must be run on server\n"); rc = -EOPNOTSUPP; - goto out; + goto out_device_fini; } rc = echo_site_init(env, ed); if (rc) - goto out; - - cleanup = 3; + goto out_device_fini; rc = echo_client_setup(env, obd, cfg); if (rc) - goto out; + goto out_site_fini; ed->ed_ec = &obd->u.echo_client; - cleanup = 4; /* if echo client is to be stacked upon ost device, the next is * NULL since ost is not a clio device so far @@ -755,7 +717,7 @@ static struct lu_device *echo_device_alloc(const struct lu_env *env, if (next) { if (next->ld_site) { rc = -EBUSY; - goto out; + goto out_cleanup; } next->ld_site = &ed->ed_site->cs_lu; @@ -763,7 +725,7 @@ static struct lu_device *echo_device_alloc(const struct lu_env *env, next->ld_type->ldt_name, NULL); if (rc) - goto out; + goto out_cleanup; } else { LASSERT(strcmp(tgt_type_name, LUSTRE_OST_NAME) == 0); @@ -771,27 +733,19 @@ static struct lu_device *echo_device_alloc(const struct lu_env *env, ed->ed_next = next; return &cd->cd_lu_dev; -out: - switch (cleanup) { - case 4: { - int rc2; - - rc2 = echo_client_cleanup(obd); - if (rc2) - CERROR("Cleanup obd device %s error(%d)\n", - obd->obd_name, rc2); - } - case 3: - echo_site_fini(env, ed); - case 2: - cl_device_fini(&ed->ed_cl); - case 1: - kfree(ed); - case 0: - default: - break; - } +out_cleanup: + err = echo_client_cleanup(obd); + if (err) + CERROR("Cleanup obd device %s error(%d)\n", + obd->obd_name, err); +out_site_fini: + echo_site_fini(env, ed); +out_device_fini: + cl_device_fini(&ed->ed_cl); +out_free: + kfree(ed); +out: return ERR_PTR(rc); } @@ -819,16 +773,7 @@ static void echo_lock_release(const struct lu_env *env, { struct cl_lock *clk = echo_lock2cl(ecl); - cl_lock_get(clk); - cl_unuse(env, clk); - cl_lock_release(env, clk, "ec enqueue", ecl->el_object); - if (!still_used) { - cl_lock_mutex_get(env, clk); - cl_lock_cancel(env, clk); - cl_lock_delete(env, clk); - cl_lock_mutex_put(env, clk); - } - cl_lock_put(env, clk); + cl_lock_release(env, clk); } static struct lu_device *echo_device_free(const struct lu_env *env, @@ -1022,9 +967,11 @@ static int cl_echo_enqueue0(struct lu_env *env, struct echo_object *eco, info = echo_env_info(env); io = &info->eti_io; - descr = &info->eti_descr; + lck = &info->eti_lock; obj = echo_obj2cl(eco); + memset(lck, 0, sizeof(*lck)); + descr = &lck->cll_descr; descr->cld_obj = obj; descr->cld_start = cl_index(obj, start); descr->cld_end = cl_index(obj, end); @@ -1032,25 +979,20 @@ static int cl_echo_enqueue0(struct lu_env *env, struct echo_object *eco, descr->cld_enq_flags = enqflags; io->ci_obj = obj; - lck = cl_lock_request(env, io, descr, "ec enqueue", eco); - if (lck) { + rc = cl_lock_request(env, io, lck); + if (rc == 0) { struct echo_client_obd *ec = eco->eo_dev->ed_ec; struct echo_lock *el; - rc = cl_wait(env, lck); - if (rc == 0) { - el = cl2echo_lock(cl_lock_at(lck, &echo_device_type)); - spin_lock(&ec->ec_lock); - if (list_empty(&el->el_chain)) { - list_add(&el->el_chain, &ec->ec_locks); - el->el_cookie = ++ec->ec_unique; - } - atomic_inc(&el->el_refcount); - *cookie = el->el_cookie; - spin_unlock(&ec->ec_lock); - } else { - cl_lock_release(env, lck, "ec enqueue", current); + el = cl2echo_lock(cl_lock_at(lck, &echo_device_type)); + spin_lock(&ec->ec_lock); + if (list_empty(&el->el_chain)) { + list_add(&el->el_chain, &ec->ec_locks); + el->el_cookie = ++ec->ec_unique; } + atomic_inc(&el->el_refcount); + *cookie = el->el_cookie; + spin_unlock(&ec->ec_lock); } return rc; } @@ -1085,22 +1027,17 @@ static int cl_echo_cancel0(struct lu_env *env, struct echo_device *ed, return 0; } -static int cl_echo_async_brw(const struct lu_env *env, struct cl_io *io, - enum cl_req_type unused, struct cl_2queue *queue) +static void echo_commit_callback(const struct lu_env *env, struct cl_io *io, + struct cl_page *page) { - struct cl_page *clp; - struct cl_page *temp; - int result = 0; + struct echo_thread_info *info; + struct cl_2queue *queue; - cl_page_list_for_each_safe(clp, temp, &queue->c2_qin) { - int rc; + info = echo_env_info(env); + LASSERT(io == &info->eti_io); - rc = cl_page_cache_add(env, io, clp, CRT_WRITE); - if (rc == 0) - continue; - result = result ?: rc; - } - return result; + queue = &info->eti_queue; + cl_page_list_add(&queue->c2_qout, page); } static int cl_echo_object_brw(struct echo_object *eco, int rw, u64 offset, @@ -1119,7 +1056,7 @@ static int cl_echo_object_brw(struct echo_object *eco, int rw, u64 offset, int rc; int i; - LASSERT((offset & ~CFS_PAGE_MASK) == 0); + LASSERT((offset & ~PAGE_MASK) == 0); LASSERT(ed->ed_next); env = cl_env_get(&refcheck); if (IS_ERR(env)) @@ -1179,7 +1116,9 @@ static int cl_echo_object_brw(struct echo_object *eco, int rw, u64 offset, async = async && (typ == CRT_WRITE); if (async) - rc = cl_echo_async_brw(env, io, typ, queue); + rc = cl_io_commit_async(env, io, &queue->c2_qin, + 0, PAGE_SIZE, + echo_commit_callback); else rc = cl_io_submit_sync(env, io, typ, queue, 0); CDEBUG(D_INFO, "echo_client %s write returns %d\n", @@ -1387,7 +1326,7 @@ static int echo_client_kbrw(struct echo_device *ed, int rw, struct obdo *oa, LASSERT(rw == OBD_BRW_WRITE || rw == OBD_BRW_READ); if (count <= 0 || - (count & (~CFS_PAGE_MASK)) != 0) + (count & (~PAGE_MASK)) != 0) return -EINVAL; /* XXX think again with misaligned I/O */ @@ -1409,7 +1348,6 @@ static int echo_client_kbrw(struct echo_device *ed, int rw, struct obdo *oa, for (i = 0, pgp = pga, off = offset; i < npages; i++, pgp++, off += PAGE_SIZE) { - LASSERT(!pgp->pg); /* for cleanup */ rc = -ENOMEM; @@ -1470,7 +1408,7 @@ static int echo_client_prep_commit(const struct lu_env *env, u64 npages, tot_pages; int i, ret = 0, brw_flags = 0; - if (count <= 0 || (count & (~CFS_PAGE_MASK)) != 0) + if (count <= 0 || (count & (~PAGE_MASK)) != 0) return -EINVAL; npages = batch >> PAGE_SHIFT; @@ -1886,7 +1824,6 @@ static int __init obdecho_init(void) static void /*__exit*/ obdecho_exit(void) { echo_client_exit(); - } MODULE_AUTHOR("OpenSFS, Inc. <http://www.lustre.org/>"); diff --git a/drivers/staging/lustre/lustre/osc/lproc_osc.c b/drivers/staging/lustre/lustre/osc/lproc_osc.c index a3358c39b2f1..7e83d395b998 100644 --- a/drivers/staging/lustre/lustre/osc/lproc_osc.c +++ b/drivers/staging/lustre/lustre/osc/lproc_osc.c @@ -15,11 +15,7 @@ * * You should have received a copy of the GNU General Public License * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. + * http://www.gnu.org/licenses/gpl-2.0.html * * GPL HEADER END */ @@ -121,9 +117,9 @@ static ssize_t max_rpcs_in_flight_store(struct kobject *kobj, atomic_add(added, &osc_pool_req_count); } - client_obd_list_lock(&cli->cl_loi_list_lock); + spin_lock(&cli->cl_loi_list_lock); cli->cl_max_rpcs_in_flight = val; - client_obd_list_unlock(&cli->cl_loi_list_lock); + spin_unlock(&cli->cl_loi_list_lock); return count; } @@ -139,9 +135,9 @@ static ssize_t max_dirty_mb_show(struct kobject *kobj, long val; int mult; - client_obd_list_lock(&cli->cl_loi_list_lock); + spin_lock(&cli->cl_loi_list_lock); val = cli->cl_dirty_max; - client_obd_list_unlock(&cli->cl_loi_list_lock); + spin_unlock(&cli->cl_loi_list_lock); mult = 1 << 20; return lprocfs_read_frac_helper(buf, PAGE_SIZE, val, mult); @@ -169,10 +165,10 @@ static ssize_t max_dirty_mb_store(struct kobject *kobj, pages_number > totalram_pages / 4) /* 1/4 of RAM */ return -ERANGE; - client_obd_list_lock(&cli->cl_loi_list_lock); + spin_lock(&cli->cl_loi_list_lock); cli->cl_dirty_max = (u32)(pages_number << PAGE_SHIFT); osc_wake_cache_waiters(cli); - client_obd_list_unlock(&cli->cl_loi_list_lock); + spin_unlock(&cli->cl_loi_list_lock); return count; } @@ -222,8 +218,16 @@ static ssize_t osc_cached_mb_seq_write(struct file *file, return -ERANGE; rc = atomic_read(&cli->cl_lru_in_list) - pages_number; - if (rc > 0) - (void)osc_lru_shrink(cli, rc); + if (rc > 0) { + struct lu_env *env; + int refcheck; + + env = cl_env_get(&refcheck); + if (!IS_ERR(env)) { + (void)osc_lru_shrink(env, cli, rc, true); + cl_env_put(env, &refcheck); + } + } return count; } @@ -239,9 +243,9 @@ static ssize_t cur_dirty_bytes_show(struct kobject *kobj, struct client_obd *cli = &dev->u.cli; int len; - client_obd_list_lock(&cli->cl_loi_list_lock); + spin_lock(&cli->cl_loi_list_lock); len = sprintf(buf, "%lu\n", cli->cl_dirty); - client_obd_list_unlock(&cli->cl_loi_list_lock); + spin_unlock(&cli->cl_loi_list_lock); return len; } @@ -256,9 +260,9 @@ static ssize_t cur_grant_bytes_show(struct kobject *kobj, struct client_obd *cli = &dev->u.cli; int len; - client_obd_list_lock(&cli->cl_loi_list_lock); + spin_lock(&cli->cl_loi_list_lock); len = sprintf(buf, "%lu\n", cli->cl_avail_grant); - client_obd_list_unlock(&cli->cl_loi_list_lock); + spin_unlock(&cli->cl_loi_list_lock); return len; } @@ -279,12 +283,12 @@ static ssize_t cur_grant_bytes_store(struct kobject *kobj, return rc; /* this is only for shrinking grant */ - client_obd_list_lock(&cli->cl_loi_list_lock); + spin_lock(&cli->cl_loi_list_lock); if (val >= cli->cl_avail_grant) { - client_obd_list_unlock(&cli->cl_loi_list_lock); + spin_unlock(&cli->cl_loi_list_lock); return -EINVAL; } - client_obd_list_unlock(&cli->cl_loi_list_lock); + spin_unlock(&cli->cl_loi_list_lock); if (cli->cl_import->imp_state == LUSTRE_IMP_FULL) rc = osc_shrink_grant_to_target(cli, val); @@ -303,9 +307,9 @@ static ssize_t cur_lost_grant_bytes_show(struct kobject *kobj, struct client_obd *cli = &dev->u.cli; int len; - client_obd_list_lock(&cli->cl_loi_list_lock); + spin_lock(&cli->cl_loi_list_lock); len = sprintf(buf, "%lu\n", cli->cl_lost_grant); - client_obd_list_unlock(&cli->cl_loi_list_lock); + spin_unlock(&cli->cl_loi_list_lock); return len; } @@ -577,14 +581,31 @@ static ssize_t max_pages_per_rpc_store(struct kobject *kobj, if (val == 0 || val > ocd->ocd_brw_size >> PAGE_SHIFT) { return -ERANGE; } - client_obd_list_lock(&cli->cl_loi_list_lock); + spin_lock(&cli->cl_loi_list_lock); cli->cl_max_pages_per_rpc = val; - client_obd_list_unlock(&cli->cl_loi_list_lock); + spin_unlock(&cli->cl_loi_list_lock); return count; } LUSTRE_RW_ATTR(max_pages_per_rpc); +static ssize_t unstable_stats_show(struct kobject *kobj, + struct attribute *attr, + char *buf) +{ + struct obd_device *dev = container_of(kobj, struct obd_device, + obd_kobj); + struct client_obd *cli = &dev->u.cli; + int pages, mb; + + pages = atomic_read(&cli->cl_unstable_count); + mb = (pages * PAGE_SIZE) >> 20; + + return sprintf(buf, "unstable_pages: %8d\n" + "unstable_mb: %8d\n", pages, mb); +} +LUSTRE_RO_ATTR(unstable_stats); + LPROC_SEQ_FOPS_RO_TYPE(osc, connect_flags); LPROC_SEQ_FOPS_RO_TYPE(osc, server_uuid); LPROC_SEQ_FOPS_RO_TYPE(osc, conn_uuid); @@ -623,7 +644,7 @@ static int osc_rpc_stats_seq_show(struct seq_file *seq, void *v) ktime_get_real_ts64(&now); - client_obd_list_lock(&cli->cl_loi_list_lock); + spin_lock(&cli->cl_loi_list_lock); seq_printf(seq, "snapshot_time: %llu.%9lu (secs.usecs)\n", (s64)now.tv_sec, (unsigned long)now.tv_nsec); @@ -707,7 +728,7 @@ static int osc_rpc_stats_seq_show(struct seq_file *seq, void *v) break; } - client_obd_list_unlock(&cli->cl_loi_list_lock); + spin_unlock(&cli->cl_loi_list_lock); return 0; } @@ -794,6 +815,7 @@ static struct attribute *osc_attrs[] = { &lustre_attr_max_pages_per_rpc.attr, &lustre_attr_max_rpcs_in_flight.attr, &lustre_attr_resend_count.attr, + &lustre_attr_unstable_stats.attr, NULL, }; diff --git a/drivers/staging/lustre/lustre/osc/osc_cache.c b/drivers/staging/lustre/lustre/osc/osc_cache.c index 5f25bf83dcfc..d011135802d5 100644 --- a/drivers/staging/lustre/lustre/osc/osc_cache.c +++ b/drivers/staging/lustre/lustre/osc/osc_cache.c @@ -15,11 +15,7 @@ * * You should have received a copy of the GNU General Public License * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. + * http://www.gnu.org/licenses/gpl-2.0.html * * GPL HEADER END */ @@ -76,6 +72,8 @@ static inline char *ext_flags(struct osc_extent *ext, char *flags) *buf++ = ext->oe_rw ? 'r' : 'w'; if (ext->oe_intree) *buf++ = 'i'; + if (ext->oe_sync) + *buf++ = 'S'; if (ext->oe_srvlock) *buf++ = 's'; if (ext->oe_hp) @@ -121,9 +119,13 @@ static const char *oes_strings[] = { __ext->oe_grants, __ext->oe_nr_pages, \ list_empty_marker(&__ext->oe_pages), \ waitqueue_active(&__ext->oe_waitq) ? '+' : '-', \ - __ext->oe_osclock, __ext->oe_mppr, __ext->oe_owner, \ + __ext->oe_dlmlock, __ext->oe_mppr, __ext->oe_owner, \ /* ----- part 4 ----- */ \ ## __VA_ARGS__); \ + if (lvl == D_ERROR && __ext->oe_dlmlock) \ + LDLM_ERROR(__ext->oe_dlmlock, "extent: %p", __ext); \ + else \ + LDLM_DEBUG(__ext->oe_dlmlock, "extent: %p", __ext); \ } while (0) #undef EASSERTF @@ -240,20 +242,25 @@ static int osc_extent_sanity_check0(struct osc_extent *ext, goto out; } - if (!ext->oe_osclock && ext->oe_grants > 0) { + if (ext->oe_sync && ext->oe_grants > 0) { rc = 90; goto out; } - if (ext->oe_osclock) { - struct cl_lock_descr *descr; + if (ext->oe_dlmlock) { + struct ldlm_extent *extent; - descr = &ext->oe_osclock->cll_descr; - if (!(descr->cld_start <= ext->oe_start && - descr->cld_end >= ext->oe_max_end)) { + extent = &ext->oe_dlmlock->l_policy_data.l_extent; + if (!(extent->start <= cl_offset(osc2cl(obj), ext->oe_start) && + extent->end >= cl_offset(osc2cl(obj), ext->oe_max_end))) { rc = 100; goto out; } + + if (!(ext->oe_dlmlock->l_granted_mode & (LCK_PW | LCK_GROUP))) { + rc = 102; + goto out; + } } if (ext->oe_nr_pages > ext->oe_mppr) { @@ -276,7 +283,7 @@ static int osc_extent_sanity_check0(struct osc_extent *ext, page_count = 0; list_for_each_entry(oap, &ext->oe_pages, oap_pending_item) { - pgoff_t index = oap2cl_page(oap)->cp_index; + pgoff_t index = osc_index(oap2osc(oap)); ++page_count; if (index > ext->oe_end || index < ext->oe_start) { rc = 110; @@ -359,7 +366,7 @@ static struct osc_extent *osc_extent_alloc(struct osc_object *obj) ext->oe_state = OES_INV; INIT_LIST_HEAD(&ext->oe_pages); init_waitqueue_head(&ext->oe_waitq); - ext->oe_osclock = NULL; + ext->oe_dlmlock = NULL; return ext; } @@ -385,9 +392,11 @@ static void osc_extent_put(const struct lu_env *env, struct osc_extent *ext) LASSERT(ext->oe_state == OES_INV); LASSERT(!ext->oe_intree); - if (ext->oe_osclock) { - cl_lock_put(env, ext->oe_osclock); - ext->oe_osclock = NULL; + if (ext->oe_dlmlock) { + lu_ref_add(&ext->oe_dlmlock->l_reference, + "osc_extent", ext); + LDLM_LOCK_PUT(ext->oe_dlmlock); + ext->oe_dlmlock = NULL; } osc_extent_free(ext); } @@ -543,7 +552,7 @@ static int osc_extent_merge(const struct lu_env *env, struct osc_extent *cur, if (cur->oe_max_end != victim->oe_max_end) return -ERANGE; - LASSERT(cur->oe_osclock == victim->oe_osclock); + LASSERT(cur->oe_dlmlock == victim->oe_dlmlock); ppc_bits = osc_cli(obj)->cl_chunkbits - PAGE_SHIFT; chunk_start = cur->oe_start >> ppc_bits; chunk_end = cur->oe_end >> ppc_bits; @@ -624,10 +633,10 @@ static inline int overlapped(struct osc_extent *ex1, struct osc_extent *ex2) static struct osc_extent *osc_extent_find(const struct lu_env *env, struct osc_object *obj, pgoff_t index, int *grants) - { struct client_obd *cli = osc_cli(obj); - struct cl_lock *lock; + struct osc_lock *olck; + struct cl_lock_descr *descr; struct osc_extent *cur; struct osc_extent *ext; struct osc_extent *conflict = NULL; @@ -644,8 +653,12 @@ static struct osc_extent *osc_extent_find(const struct lu_env *env, if (!cur) return ERR_PTR(-ENOMEM); - lock = cl_lock_at_pgoff(env, osc2cl(obj), index, NULL, 1, 0); - LASSERT(lock->cll_descr.cld_mode >= CLM_WRITE); + olck = osc_env_io(env)->oi_write_osclock; + LASSERTF(olck, "page %lu is not covered by lock\n", index); + LASSERT(olck->ols_state == OLS_GRANTED); + + descr = &olck->ols_cl.cls_lock->cll_descr; + LASSERT(descr->cld_mode >= CLM_WRITE); LASSERT(cli->cl_chunkbits >= PAGE_SHIFT); ppc_bits = cli->cl_chunkbits - PAGE_SHIFT; @@ -657,19 +670,23 @@ static struct osc_extent *osc_extent_find(const struct lu_env *env, max_pages = cli->cl_max_pages_per_rpc; LASSERT((max_pages & ~chunk_mask) == 0); max_end = index - (index % max_pages) + max_pages - 1; - max_end = min_t(pgoff_t, max_end, lock->cll_descr.cld_end); + max_end = min_t(pgoff_t, max_end, descr->cld_end); /* initialize new extent by parameters so far */ cur->oe_max_end = max_end; cur->oe_start = index & chunk_mask; cur->oe_end = ((index + ~chunk_mask + 1) & chunk_mask) - 1; - if (cur->oe_start < lock->cll_descr.cld_start) - cur->oe_start = lock->cll_descr.cld_start; + if (cur->oe_start < descr->cld_start) + cur->oe_start = descr->cld_start; if (cur->oe_end > max_end) cur->oe_end = max_end; - cur->oe_osclock = lock; cur->oe_grants = 0; cur->oe_mppr = max_pages; + if (olck->ols_dlmlock) { + LASSERT(olck->ols_hold); + cur->oe_dlmlock = LDLM_LOCK_GET(olck->ols_dlmlock); + lu_ref_add(&olck->ols_dlmlock->l_reference, "osc_extent", cur); + } /* grants has been allocated by caller */ LASSERTF(*grants >= chunksize + cli->cl_extent_tax, @@ -691,7 +708,7 @@ restart: break; /* if covering by different locks, no chance to match */ - if (lock != ext->oe_osclock) { + if (olck->ols_dlmlock != ext->oe_dlmlock) { EASSERTF(!overlapped(ext, cur), ext, EXTSTR"\n", EXTPARA(cur)); @@ -795,7 +812,7 @@ restart: if (found) { LASSERT(!conflict); if (!IS_ERR(found)) { - LASSERT(found->oe_osclock == cur->oe_osclock); + LASSERT(found->oe_dlmlock == cur->oe_dlmlock); OSC_EXTENT_DUMP(D_CACHE, found, "found caching ext for %lu.\n", index); } @@ -810,7 +827,7 @@ restart: found = osc_extent_hold(cur); osc_extent_insert(obj, cur); OSC_EXTENT_DUMP(D_CACHE, cur, "add into tree %lu/%lu.\n", - index, lock->cll_descr.cld_end); + index, descr->cld_end); } osc_object_unlock(obj); @@ -856,6 +873,8 @@ int osc_extent_finish(const struct lu_env *env, struct osc_extent *ext, ext->oe_rc = rc ?: ext->oe_nr_pages; EASSERT(ergo(rc == 0, ext->oe_state == OES_RPC), ext); + + osc_lru_add_batch(cli, &ext->oe_pages); list_for_each_entry_safe(oap, tmp, &ext->oe_pages, oap_pending_item) { list_del_init(&oap->oap_rpc_item); list_del_init(&oap->oap_pending_item); @@ -877,10 +896,9 @@ int osc_extent_finish(const struct lu_env *env, struct osc_extent *ext, * span a whole chunk on the OST side, or our accounting goes * wrong. Should match the code in filter_grant_check. */ - int offset = oap->oap_page_off & ~CFS_PAGE_MASK; - int count = oap->oap_count + (offset & (blocksize - 1)); - int end = (offset + oap->oap_count) & (blocksize - 1); - + int offset = last_off & ~PAGE_MASK; + int count = last_count + (offset & (blocksize - 1)); + int end = (offset + last_count) & (blocksize - 1); if (end) count += blocksize - end; @@ -943,7 +961,7 @@ static int osc_extent_wait(const struct lu_env *env, struct osc_extent *ext, "%s: wait ext to %d timedout, recovery in progress?\n", osc_export(obj)->exp_obd->obd_name, state); - lwi = LWI_INTR(LWI_ON_SIGNAL_NOOP, NULL); + lwi = LWI_INTR(NULL, NULL); rc = l_wait_event(ext->oe_waitq, extent_wait_cb(ext, state), &lwi); } @@ -990,19 +1008,19 @@ static int osc_extent_truncate(struct osc_extent *ext, pgoff_t trunc_index, /* discard all pages with index greater then trunc_index */ list_for_each_entry_safe(oap, tmp, &ext->oe_pages, oap_pending_item) { - struct cl_page *sub = oap2cl_page(oap); - struct cl_page *page = cl_page_top(sub); + pgoff_t index = osc_index(oap2osc(oap)); + struct cl_page *page = oap2cl_page(oap); LASSERT(list_empty(&oap->oap_rpc_item)); /* only discard the pages with their index greater than * trunc_index, and ... */ - if (sub->cp_index < trunc_index || - (sub->cp_index == trunc_index && partial)) { + if (index < trunc_index || + (index == trunc_index && partial)) { /* accounting how many pages remaining in the chunk * so that we can calculate grants correctly. */ - if (sub->cp_index >> ppc_bits == trunc_chunk) + if (index >> ppc_bits == trunc_chunk) ++pages_in_chunk; continue; } @@ -1013,7 +1031,6 @@ static int osc_extent_truncate(struct osc_extent *ext, pgoff_t trunc_index, lu_ref_add(&page->cp_reference, "truncate", current); if (cl_page_own(env, io, page) == 0) { - cl_page_unmap(env, io, page); cl_page_discard(env, io, page); cl_page_disown(env, io, page); } else { @@ -1126,7 +1143,9 @@ static int osc_extent_make_ready(const struct lu_env *env, last->oap_count = osc_refresh_count(env, last, OBD_BRW_WRITE); LASSERT(last->oap_count > 0); LASSERT(last->oap_page_off + last->oap_count <= PAGE_SIZE); + spin_lock(&last->oap_lock); last->oap_async_flags |= ASYNC_COUNT_STABLE; + spin_unlock(&last->oap_lock); } /* for the rest of pages, we don't need to call osf_refresh_count() @@ -1135,7 +1154,9 @@ static int osc_extent_make_ready(const struct lu_env *env, list_for_each_entry(oap, &ext->oe_pages, oap_pending_item) { if (!(oap->oap_async_flags & ASYNC_COUNT_STABLE)) { oap->oap_count = PAGE_SIZE - oap->oap_page_off; + spin_lock(&last->oap_lock); oap->oap_async_flags |= ASYNC_COUNT_STABLE; + spin_unlock(&last->oap_lock); } } @@ -1256,7 +1277,7 @@ static int osc_make_ready(const struct lu_env *env, struct osc_async_page *oap, int cmd) { struct osc_page *opg = oap2osc_page(oap); - struct cl_page *page = cl_page_top(oap2cl_page(oap)); + struct cl_page *page = oap2cl_page(oap); int result; LASSERT(cmd == OBD_BRW_WRITE); /* no cached reads */ @@ -1271,7 +1292,7 @@ static int osc_refresh_count(const struct lu_env *env, struct osc_async_page *oap, int cmd) { struct osc_page *opg = oap2osc_page(oap); - struct cl_page *page = oap2cl_page(oap); + pgoff_t index = osc_index(oap2osc(oap)); struct cl_object *obj; struct cl_attr *attr = &osc_env_info(env)->oti_attr; @@ -1288,10 +1309,10 @@ static int osc_refresh_count(const struct lu_env *env, if (result < 0) return result; kms = attr->cat_kms; - if (cl_offset(obj, page->cp_index) >= kms) + if (cl_offset(obj, index) >= kms) /* catch race with truncate */ return 0; - else if (cl_offset(obj, page->cp_index + 1) > kms) + else if (cl_offset(obj, index + 1) > kms) /* catch sub-page write at end of file */ return kms % PAGE_SIZE; else @@ -1302,14 +1323,16 @@ static int osc_completion(const struct lu_env *env, struct osc_async_page *oap, int cmd, int rc) { struct osc_page *opg = oap2osc_page(oap); - struct cl_page *page = cl_page_top(oap2cl_page(oap)); + struct cl_page *page = oap2cl_page(oap); struct osc_object *obj = cl2osc(opg->ops_cl.cpl_obj); enum cl_req_type crt; int srvlock; cmd &= ~OBD_BRW_NOQUOTA; - LASSERT(equi(page->cp_state == CPS_PAGEIN, cmd == OBD_BRW_READ)); - LASSERT(equi(page->cp_state == CPS_PAGEOUT, cmd == OBD_BRW_WRITE)); + LASSERTF(equi(page->cp_state == CPS_PAGEIN, cmd == OBD_BRW_READ), + "cp_state:%u, cmd:%d\n", page->cp_state, cmd); + LASSERTF(equi(page->cp_state == CPS_PAGEOUT, cmd == OBD_BRW_WRITE), + "cp_state:%u, cmd:%d\n", page->cp_state, cmd); LASSERT(opg->ops_transfer_pinned); /* @@ -1358,22 +1381,28 @@ static int osc_completion(const struct lu_env *env, struct osc_async_page *oap, return 0; } -#define OSC_DUMP_GRANT(cli, fmt, args...) do { \ +#define OSC_DUMP_GRANT(lvl, cli, fmt, args...) do { \ struct client_obd *__tmp = (cli); \ - CDEBUG(D_CACHE, "%s: { dirty: %ld/%ld dirty_pages: %d/%d " \ - "dropped: %ld avail: %ld, reserved: %ld, flight: %d } " fmt, \ + CDEBUG(lvl, "%s: grant { dirty: %ld/%ld dirty_pages: %d/%d " \ + "unstable_pages: %d/%d dropped: %ld avail: %ld, " \ + "reserved: %ld, flight: %d } lru {in list: %d, " \ + "left: %d, waiters: %d }" fmt, \ __tmp->cl_import->imp_obd->obd_name, \ __tmp->cl_dirty, __tmp->cl_dirty_max, \ atomic_read(&obd_dirty_pages), obd_max_dirty_pages, \ + atomic_read(&obd_unstable_pages), obd_max_dirty_pages, \ __tmp->cl_lost_grant, __tmp->cl_avail_grant, \ - __tmp->cl_reserved_grant, __tmp->cl_w_in_flight, ##args); \ + __tmp->cl_reserved_grant, __tmp->cl_w_in_flight, \ + atomic_read(&__tmp->cl_lru_in_list), \ + atomic_read(&__tmp->cl_lru_busy), \ + atomic_read(&__tmp->cl_lru_shrinkers), ##args); \ } while (0) /* caller must hold loi_list_lock */ static void osc_consume_write_grant(struct client_obd *cli, struct brw_page *pga) { - assert_spin_locked(&cli->cl_loi_list_lock.lock); + assert_spin_locked(&cli->cl_loi_list_lock); LASSERT(!(pga->flag & OBD_BRW_FROM_GRANT)); atomic_inc(&obd_dirty_pages); cli->cl_dirty += PAGE_SIZE; @@ -1389,7 +1418,7 @@ static void osc_consume_write_grant(struct client_obd *cli, static void osc_release_write_grant(struct client_obd *cli, struct brw_page *pga) { - assert_spin_locked(&cli->cl_loi_list_lock.lock); + assert_spin_locked(&cli->cl_loi_list_lock); if (!(pga->flag & OBD_BRW_FROM_GRANT)) { return; } @@ -1408,7 +1437,7 @@ static void osc_release_write_grant(struct client_obd *cli, * To avoid sleeping with object lock held, it's good for us allocate enough * grants before entering into critical section. * - * client_obd_list_lock held by caller + * spin_lock held by caller */ static int osc_reserve_grant(struct client_obd *cli, unsigned int bytes) { @@ -1442,11 +1471,11 @@ static void __osc_unreserve_grant(struct client_obd *cli, static void osc_unreserve_grant(struct client_obd *cli, unsigned int reserved, unsigned int unused) { - client_obd_list_lock(&cli->cl_loi_list_lock); + spin_lock(&cli->cl_loi_list_lock); __osc_unreserve_grant(cli, reserved, unused); if (unused > 0) osc_wake_cache_waiters(cli); - client_obd_list_unlock(&cli->cl_loi_list_lock); + spin_unlock(&cli->cl_loi_list_lock); } /** @@ -1467,7 +1496,7 @@ static void osc_free_grant(struct client_obd *cli, unsigned int nr_pages, { int grant = (1 << cli->cl_chunkbits) + cli->cl_extent_tax; - client_obd_list_lock(&cli->cl_loi_list_lock); + spin_lock(&cli->cl_loi_list_lock); atomic_sub(nr_pages, &obd_dirty_pages); cli->cl_dirty -= nr_pages << PAGE_SHIFT; cli->cl_lost_grant += lost_grant; @@ -1479,7 +1508,7 @@ static void osc_free_grant(struct client_obd *cli, unsigned int nr_pages, cli->cl_avail_grant += grant; } osc_wake_cache_waiters(cli); - client_obd_list_unlock(&cli->cl_loi_list_lock); + spin_unlock(&cli->cl_loi_list_lock); CDEBUG(D_CACHE, "lost %u grant: %lu avail: %lu dirty: %lu\n", lost_grant, cli->cl_lost_grant, cli->cl_avail_grant, cli->cl_dirty); @@ -1491,9 +1520,9 @@ static void osc_free_grant(struct client_obd *cli, unsigned int nr_pages, */ static void osc_exit_cache(struct client_obd *cli, struct osc_async_page *oap) { - client_obd_list_lock(&cli->cl_loi_list_lock); + spin_lock(&cli->cl_loi_list_lock); osc_release_write_grant(cli, &oap->oap_brw_page); - client_obd_list_unlock(&cli->cl_loi_list_lock); + spin_unlock(&cli->cl_loi_list_lock); } /** @@ -1506,14 +1535,15 @@ static int osc_enter_cache_try(struct client_obd *cli, { int rc; - OSC_DUMP_GRANT(cli, "need:%d.\n", bytes); + OSC_DUMP_GRANT(D_CACHE, cli, "need:%d.\n", bytes); rc = osc_reserve_grant(cli, bytes); if (rc < 0) return 0; if (cli->cl_dirty + PAGE_SIZE <= cli->cl_dirty_max && - atomic_read(&obd_dirty_pages) + 1 <= obd_max_dirty_pages) { + atomic_read(&obd_unstable_pages) + 1 + + atomic_read(&obd_dirty_pages) <= obd_max_dirty_pages) { osc_consume_write_grant(cli, &oap->oap_brw_page); if (transient) { cli->cl_dirty_transit += PAGE_SIZE; @@ -1532,9 +1562,9 @@ static int ocw_granted(struct client_obd *cli, struct osc_cache_waiter *ocw) { int rc; - client_obd_list_lock(&cli->cl_loi_list_lock); + spin_lock(&cli->cl_loi_list_lock); rc = list_empty(&ocw->ocw_entry); - client_obd_list_unlock(&cli->cl_loi_list_lock); + spin_unlock(&cli->cl_loi_list_lock); return rc; } @@ -1551,12 +1581,13 @@ static int osc_enter_cache(const struct lu_env *env, struct client_obd *cli, struct osc_object *osc = oap->oap_obj; struct lov_oinfo *loi = osc->oo_oinfo; struct osc_cache_waiter ocw; - struct l_wait_info lwi = LWI_INTR(LWI_ON_SIGNAL_NOOP, NULL); + struct l_wait_info lwi = LWI_TIMEOUT_INTR(cfs_time_seconds(600), NULL, + LWI_ON_SIGNAL_NOOP, NULL); int rc = -EDQUOT; - OSC_DUMP_GRANT(cli, "need:%d.\n", bytes); + OSC_DUMP_GRANT(D_CACHE, cli, "need:%d.\n", bytes); - client_obd_list_lock(&cli->cl_loi_list_lock); + spin_lock(&cli->cl_loi_list_lock); /* force the caller to try sync io. this can jump the list * of queued writes and create a discontiguous rpc stream @@ -1587,7 +1618,7 @@ static int osc_enter_cache(const struct lu_env *env, struct client_obd *cli, while (cli->cl_dirty > 0 || cli->cl_w_in_flight > 0) { list_add_tail(&ocw.ocw_entry, &cli->cl_cache_waiters); ocw.ocw_rc = 0; - client_obd_list_unlock(&cli->cl_loi_list_lock); + spin_unlock(&cli->cl_loi_list_lock); osc_io_unplug_async(env, cli, NULL); @@ -1596,10 +1627,17 @@ static int osc_enter_cache(const struct lu_env *env, struct client_obd *cli, rc = l_wait_event(ocw.ocw_waitq, ocw_granted(cli, &ocw), &lwi); - client_obd_list_lock(&cli->cl_loi_list_lock); + spin_lock(&cli->cl_loi_list_lock); - /* l_wait_event is interrupted by signal */ + /* l_wait_event is interrupted by signal, or timed out */ if (rc < 0) { + if (rc == -ETIMEDOUT) { + OSC_DUMP_GRANT(D_ERROR, cli, + "try to reserve %d.\n", bytes); + osc_extent_tree_dump(D_ERROR, osc); + rc = -EDQUOT; + } + list_del_init(&ocw.ocw_entry); goto out; } @@ -1615,8 +1653,8 @@ static int osc_enter_cache(const struct lu_env *env, struct client_obd *cli, } } out: - client_obd_list_unlock(&cli->cl_loi_list_lock); - OSC_DUMP_GRANT(cli, "returned %d.\n", rc); + spin_unlock(&cli->cl_loi_list_lock); + OSC_DUMP_GRANT(D_CACHE, cli, "returned %d.\n", rc); return rc; } @@ -1633,8 +1671,8 @@ void osc_wake_cache_waiters(struct client_obd *cli) ocw->ocw_rc = -EDQUOT; /* we can't dirty more */ if ((cli->cl_dirty + PAGE_SIZE > cli->cl_dirty_max) || - (atomic_read(&obd_dirty_pages) + 1 > - obd_max_dirty_pages)) { + (atomic_read(&obd_unstable_pages) + 1 + + atomic_read(&obd_dirty_pages) > obd_max_dirty_pages)) { CDEBUG(D_CACHE, "no dirty room: dirty: %ld osc max %ld, sys max %d\n", cli->cl_dirty, cli->cl_dirty_max, obd_max_dirty_pages); @@ -1776,9 +1814,9 @@ static int osc_list_maint(struct client_obd *cli, struct osc_object *osc) { int is_ready; - client_obd_list_lock(&cli->cl_loi_list_lock); + spin_lock(&cli->cl_loi_list_lock); is_ready = __osc_list_maint(cli, osc); - client_obd_list_unlock(&cli->cl_loi_list_lock); + spin_unlock(&cli->cl_loi_list_lock); return is_ready; } @@ -1799,13 +1837,103 @@ static void osc_process_ar(struct osc_async_rc *ar, __u64 xid, ar->ar_force_sync = 1; ar->ar_min_xid = ptlrpc_sample_next_xid(); return; - } if (ar->ar_force_sync && (xid >= ar->ar_min_xid)) ar->ar_force_sync = 0; } +/** + * Performs "unstable" page accounting. This function balances the + * increment operations performed in osc_inc_unstable_pages. It is + * registered as the RPC request callback, and is executed when the + * bulk RPC is committed on the server. Thus at this point, the pages + * involved in the bulk transfer are no longer considered unstable. + */ +void osc_dec_unstable_pages(struct ptlrpc_request *req) +{ + struct client_obd *cli = &req->rq_import->imp_obd->u.cli; + struct ptlrpc_bulk_desc *desc = req->rq_bulk; + int page_count = desc->bd_iov_count; + int i; + + /* No unstable page tracking */ + if (!cli->cl_cache) + return; + + LASSERT(page_count >= 0); + + for (i = 0; i < page_count; i++) + dec_node_page_state(desc->bd_iov[i].kiov_page, + NR_UNSTABLE_NFS); + + atomic_sub(page_count, &cli->cl_cache->ccc_unstable_nr); + LASSERT(atomic_read(&cli->cl_cache->ccc_unstable_nr) >= 0); + + atomic_sub(page_count, &cli->cl_unstable_count); + LASSERT(atomic_read(&cli->cl_unstable_count) >= 0); + + atomic_sub(page_count, &obd_unstable_pages); + LASSERT(atomic_read(&obd_unstable_pages) >= 0); + + spin_lock(&req->rq_lock); + req->rq_committed = 1; + req->rq_unstable = 0; + spin_unlock(&req->rq_lock); + + wake_up_all(&cli->cl_cache->ccc_unstable_waitq); +} + +/* "unstable" page accounting. See: osc_dec_unstable_pages. */ +void osc_inc_unstable_pages(struct ptlrpc_request *req) +{ + struct client_obd *cli = &req->rq_import->imp_obd->u.cli; + struct ptlrpc_bulk_desc *desc = req->rq_bulk; + long page_count = desc->bd_iov_count; + int i; + + /* No unstable page tracking */ + if (!cli->cl_cache) + return; + + LASSERT(page_count >= 0); + + for (i = 0; i < page_count; i++) + inc_node_page_state(desc->bd_iov[i].kiov_page, + NR_UNSTABLE_NFS); + + LASSERT(atomic_read(&cli->cl_cache->ccc_unstable_nr) >= 0); + atomic_add(page_count, &cli->cl_cache->ccc_unstable_nr); + + LASSERT(atomic_read(&cli->cl_unstable_count) >= 0); + atomic_add(page_count, &cli->cl_unstable_count); + + LASSERT(atomic_read(&obd_unstable_pages) >= 0); + atomic_add(page_count, &obd_unstable_pages); + + spin_lock(&req->rq_lock); + + /* + * If the request has already been committed (i.e. brw_commit + * called via rq_commit_cb), we need to undo the unstable page + * increments we just performed because rq_commit_cb wont be + * called again. Otherwise, just set the commit callback so the + * unstable page accounting is properly updated when the request + * is committed + */ + if (req->rq_committed) { + /* Drop lock before calling osc_dec_unstable_pages */ + spin_unlock(&req->rq_lock); + osc_dec_unstable_pages(req); + spin_lock(&req->rq_lock); + } else { + req->rq_unstable = 1; + req->rq_commit_cb = osc_dec_unstable_pages; + } + + spin_unlock(&req->rq_lock); +} + /* this must be called holding the loi list lock to give coverage to exit_cache, * async_flag maintenance, and oap_request */ @@ -1817,6 +1945,9 @@ static void osc_ap_completion(const struct lu_env *env, struct client_obd *cli, __u64 xid = 0; if (oap->oap_request) { + if (!rc) + osc_inc_unstable_pages(oap->oap_request); + xid = ptlrpc_req_xid(oap->oap_request); ptlrpc_req_finished(oap->oap_request); oap->oap_request = NULL; @@ -1829,10 +1960,10 @@ static void osc_ap_completion(const struct lu_env *env, struct client_obd *cli, oap->oap_interrupted = 0; if (oap->oap_cmd & OBD_BRW_WRITE && xid > 0) { - client_obd_list_lock(&cli->cl_loi_list_lock); + spin_lock(&cli->cl_loi_list_lock); osc_process_ar(&cli->cl_ar, xid, rc); osc_process_ar(&loi->loi_ar, xid, rc); - client_obd_list_unlock(&cli->cl_loi_list_lock); + spin_unlock(&cli->cl_loi_list_lock); } rc = osc_completion(env, oap, oap->oap_cmd, rc); @@ -2133,9 +2264,8 @@ static void osc_check_rpcs(const struct lu_env *env, struct client_obd *cli) } cl_object_get(obj); - client_obd_list_unlock(&cli->cl_loi_list_lock); - lu_object_ref_add_at(&obj->co_lu, &link, "check", - current); + spin_unlock(&cli->cl_loi_list_lock); + lu_object_ref_add_at(&obj->co_lu, &link, "check", current); /* attempt some read/write balancing by alternating between * reads and writes in an object. The makes_rpc checks here @@ -2178,11 +2308,10 @@ static void osc_check_rpcs(const struct lu_env *env, struct client_obd *cli) osc_object_unlock(osc); osc_list_maint(cli, osc); - lu_object_ref_del_at(&obj->co_lu, &link, "check", - current); + lu_object_ref_del_at(&obj->co_lu, &link, "check", current); cl_object_put(env, obj); - client_obd_list_lock(&cli->cl_loi_list_lock); + spin_lock(&cli->cl_loi_list_lock); } } @@ -2199,9 +2328,9 @@ static int osc_io_unplug0(const struct lu_env *env, struct client_obd *cli, * potential stack overrun problem. LU-2859 */ atomic_inc(&cli->cl_lru_shrinkers); - client_obd_list_lock(&cli->cl_loi_list_lock); + spin_lock(&cli->cl_loi_list_lock); osc_check_rpcs(env, cli); - client_obd_list_unlock(&cli->cl_loi_list_lock); + spin_unlock(&cli->cl_loi_list_lock); atomic_dec(&cli->cl_lru_shrinkers); } else { CDEBUG(D_CACHE, "Queue writeback work for client %p.\n", cli); @@ -2238,9 +2367,9 @@ int osc_prep_async_page(struct osc_object *osc, struct osc_page *ops, oap->oap_page = page; oap->oap_obj_off = offset; - LASSERT(!(offset & ~CFS_PAGE_MASK)); + LASSERT(!(offset & ~PAGE_MASK)); - if (!client_is_remote(exp) && capable(CFS_CAP_SYS_RESOURCE)) + if (capable(CFS_CAP_SYS_RESOURCE)) oap->oap_brw_flags = OBD_BRW_NOQUOTA; INIT_LIST_HEAD(&oap->oap_pending_item); @@ -2279,8 +2408,7 @@ int osc_queue_async_io(const struct lu_env *env, struct cl_io *io, /* Set the OBD_BRW_SRVLOCK before the page is queued. */ brw_flags |= ops->ops_srvlock ? OBD_BRW_SRVLOCK : 0; - if (!client_is_remote(osc_export(osc)) && - capable(CFS_CAP_SYS_RESOURCE)) { + if (capable(CFS_CAP_SYS_RESOURCE)) { brw_flags |= OBD_BRW_NOQUOTA; cmd |= OBD_BRW_NOQUOTA; } @@ -2306,16 +2434,23 @@ int osc_queue_async_io(const struct lu_env *env, struct cl_io *io, return rc; } + if (osc_over_unstable_soft_limit(cli)) + brw_flags |= OBD_BRW_SOFT_SYNC; + oap->oap_cmd = cmd; oap->oap_page_off = ops->ops_from; oap->oap_count = ops->ops_to - ops->ops_from; + /* + * No need to hold a lock here, + * since this page is not in any list yet. + */ oap->oap_async_flags = 0; oap->oap_brw_flags = brw_flags; OSC_IO_DEBUG(osc, "oap %p page %p added for cmd %d\n", oap, oap->oap_page, oap->oap_cmd & OBD_BRW_RWMASK); - index = oap2cl_page(oap)->cp_index; + index = osc_index(oap2osc(oap)); /* Add this page into extent by the following steps: * 1. if there exists an active extent for this IO, mostly this page @@ -2334,9 +2469,9 @@ int osc_queue_async_io(const struct lu_env *env, struct cl_io *io, grants = 0; /* it doesn't need any grant to dirty this page */ - client_obd_list_lock(&cli->cl_loi_list_lock); + spin_lock(&cli->cl_loi_list_lock); rc = osc_enter_cache_try(cli, oap, grants, 0); - client_obd_list_unlock(&cli->cl_loi_list_lock); + spin_unlock(&cli->cl_loi_list_lock); if (rc == 0) { /* try failed */ grants = 0; need_release = 1; @@ -2427,21 +2562,21 @@ int osc_teardown_async_page(const struct lu_env *env, LASSERT(oap->oap_magic == OAP_MAGIC); CDEBUG(D_INFO, "teardown oap %p page %p at index %lu.\n", - oap, ops, oap2cl_page(oap)->cp_index); + oap, ops, osc_index(oap2osc(oap))); osc_object_lock(obj); if (!list_empty(&oap->oap_rpc_item)) { CDEBUG(D_CACHE, "oap %p is not in cache.\n", oap); rc = -EBUSY; } else if (!list_empty(&oap->oap_pending_item)) { - ext = osc_extent_lookup(obj, oap2cl_page(oap)->cp_index); + ext = osc_extent_lookup(obj, osc_index(oap2osc(oap))); /* only truncated pages are allowed to be taken out. * See osc_extent_truncate() and osc_cache_truncate_start() * for details. */ if (ext && ext->oe_state != OES_TRUNC) { OSC_EXTENT_DUMP(D_ERROR, ext, "trunc at %lu.\n", - oap2cl_page(oap)->cp_index); + osc_index(oap2osc(oap))); rc = -EBUSY; } } @@ -2464,7 +2599,7 @@ int osc_flush_async_page(const struct lu_env *env, struct cl_io *io, struct osc_extent *ext = NULL; struct osc_object *obj = cl2osc(ops->ops_cl.cpl_obj); struct cl_page *cp = ops->ops_cl.cpl_page; - pgoff_t index = cp->cp_index; + pgoff_t index = osc_index(ops); struct osc_async_page *oap = &ops->ops_oap; bool unplug = false; int rc = 0; @@ -2479,8 +2614,7 @@ int osc_flush_async_page(const struct lu_env *env, struct cl_io *io, switch (ext->oe_state) { case OES_RPC: case OES_LOCK_DONE: - CL_PAGE_DEBUG(D_ERROR, env, cl_page_top(cp), - "flush an in-rpc page?\n"); + CL_PAGE_DEBUG(D_ERROR, env, cp, "flush an in-rpc page?\n"); LASSERT(0); break; case OES_LOCKING: @@ -2506,7 +2640,7 @@ int osc_flush_async_page(const struct lu_env *env, struct cl_io *io, break; } - rc = cl_page_prep(env, io, cl_page_top(cp), CRT_WRITE); + rc = cl_page_prep(env, io, cp, CRT_WRITE); if (rc) goto out; @@ -2550,7 +2684,7 @@ int osc_cancel_async_page(const struct lu_env *env, struct osc_page *ops) struct osc_extent *ext; struct osc_extent *found = NULL; struct list_head *plist; - pgoff_t index = oap2cl_page(oap)->cp_index; + pgoff_t index = osc_index(ops); int rc = -EBUSY; int cmd; @@ -2613,12 +2747,12 @@ int osc_queue_sync_pages(const struct lu_env *env, struct osc_object *obj, pgoff_t end = 0; list_for_each_entry(oap, list, oap_pending_item) { - struct cl_page *cp = oap2cl_page(oap); + pgoff_t index = osc_index(oap2osc(oap)); - if (cp->cp_index > end) - end = cp->cp_index; - if (cp->cp_index < start) - start = cp->cp_index; + if (index > end) + end = index; + if (index < start) + start = index; ++page_count; mppr <<= (page_count > mppr); } @@ -2633,9 +2767,11 @@ int osc_queue_sync_pages(const struct lu_env *env, struct osc_object *obj, } ext->oe_rw = !!(cmd & OBD_BRW_READ); + ext->oe_sync = 1; ext->oe_urgent = 1; ext->oe_start = start; - ext->oe_end = ext->oe_max_end = end; + ext->oe_end = end; + ext->oe_max_end = end; ext->oe_obj = obj; ext->oe_srvlock = !!(brw_flags & OBD_BRW_SRVLOCK); ext->oe_nr_pages = page_count; @@ -2988,7 +3124,201 @@ int osc_cache_writeback_range(const struct lu_env *env, struct osc_object *obj, result = rc; } - OSC_IO_DEBUG(obj, "cache page out.\n"); + OSC_IO_DEBUG(obj, "pageout [%lu, %lu], %d.\n", start, end, result); + return result; +} + +/** + * Returns a list of pages by a given [start, end] of \a obj. + * + * \param resched If not NULL, then we give up before hogging CPU for too + * long and set *resched = 1, in that case caller should implement a retry + * logic. + * + * Gang tree lookup (radix_tree_gang_lookup()) optimization is absolutely + * crucial in the face of [offset, EOF] locks. + * + * Return at least one page in @queue unless there is no covered page. + */ +int osc_page_gang_lookup(const struct lu_env *env, struct cl_io *io, + struct osc_object *osc, pgoff_t start, pgoff_t end, + osc_page_gang_cbt cb, void *cbdata) +{ + struct osc_page *ops; + void **pvec; + pgoff_t idx; + unsigned int nr; + unsigned int i; + unsigned int j; + int res = CLP_GANG_OKAY; + bool tree_lock = true; + + idx = start; + pvec = osc_env_info(env)->oti_pvec; + spin_lock(&osc->oo_tree_lock); + while ((nr = radix_tree_gang_lookup(&osc->oo_tree, pvec, + idx, OTI_PVEC_SIZE)) > 0) { + struct cl_page *page; + bool end_of_region = false; + + for (i = 0, j = 0; i < nr; ++i) { + ops = pvec[i]; + pvec[i] = NULL; + + idx = osc_index(ops); + if (idx > end) { + end_of_region = true; + break; + } + + page = ops->ops_cl.cpl_page; + LASSERT(page->cp_type == CPT_CACHEABLE); + if (page->cp_state == CPS_FREEING) + continue; + + cl_page_get(page); + lu_ref_add_atomic(&page->cp_reference, + "gang_lookup", current); + pvec[j++] = ops; + } + ++idx; + + /* + * Here a delicate locking dance is performed. Current thread + * holds a reference to a page, but has to own it before it + * can be placed into queue. Owning implies waiting, so + * radix-tree lock is to be released. After a wait one has to + * check that pages weren't truncated (cl_page_own() returns + * error in the latter case). + */ + spin_unlock(&osc->oo_tree_lock); + tree_lock = false; + + for (i = 0; i < j; ++i) { + ops = pvec[i]; + if (res == CLP_GANG_OKAY) + res = (*cb)(env, io, ops, cbdata); + + page = ops->ops_cl.cpl_page; + lu_ref_del(&page->cp_reference, "gang_lookup", current); + cl_page_put(env, page); + } + if (nr < OTI_PVEC_SIZE || end_of_region) + break; + + if (res == CLP_GANG_OKAY && need_resched()) + res = CLP_GANG_RESCHED; + if (res != CLP_GANG_OKAY) + break; + + spin_lock(&osc->oo_tree_lock); + tree_lock = true; + } + if (tree_lock) + spin_unlock(&osc->oo_tree_lock); + return res; +} + +/** + * Check if page @page is covered by an extra lock or discard it. + */ +static int check_and_discard_cb(const struct lu_env *env, struct cl_io *io, + struct osc_page *ops, void *cbdata) +{ + struct osc_thread_info *info = osc_env_info(env); + struct osc_object *osc = cbdata; + pgoff_t index; + + index = osc_index(ops); + if (index >= info->oti_fn_index) { + struct ldlm_lock *tmp; + struct cl_page *page = ops->ops_cl.cpl_page; + + /* refresh non-overlapped index */ + tmp = osc_dlmlock_at_pgoff(env, osc, index, 0, 0); + if (tmp) { + __u64 end = tmp->l_policy_data.l_extent.end; + /* Cache the first-non-overlapped index so as to skip + * all pages within [index, oti_fn_index). This is safe + * because if tmp lock is canceled, it will discard + * these pages. + */ + info->oti_fn_index = cl_index(osc2cl(osc), end + 1); + if (end == OBD_OBJECT_EOF) + info->oti_fn_index = CL_PAGE_EOF; + LDLM_LOCK_PUT(tmp); + } else if (cl_page_own(env, io, page) == 0) { + /* discard the page */ + cl_page_discard(env, io, page); + cl_page_disown(env, io, page); + } else { + LASSERT(page->cp_state == CPS_FREEING); + } + } + + info->oti_next_index = index + 1; + return CLP_GANG_OKAY; +} + +static int discard_cb(const struct lu_env *env, struct cl_io *io, + struct osc_page *ops, void *cbdata) +{ + struct osc_thread_info *info = osc_env_info(env); + struct cl_page *page = ops->ops_cl.cpl_page; + + /* page is top page. */ + info->oti_next_index = osc_index(ops) + 1; + if (cl_page_own(env, io, page) == 0) { + KLASSERT(ergo(page->cp_type == CPT_CACHEABLE, + !PageDirty(cl_page_vmpage(page)))); + + /* discard the page */ + cl_page_discard(env, io, page); + cl_page_disown(env, io, page); + } else { + LASSERT(page->cp_state == CPS_FREEING); + } + + return CLP_GANG_OKAY; +} + +/** + * Discard pages protected by the given lock. This function traverses radix + * tree to find all covering pages and discard them. If a page is being covered + * by other locks, it should remain in cache. + * + * If error happens on any step, the process continues anyway (the reasoning + * behind this being that lock cancellation cannot be delayed indefinitely). + */ +int osc_lock_discard_pages(const struct lu_env *env, struct osc_object *osc, + pgoff_t start, pgoff_t end, enum cl_lock_mode mode) +{ + struct osc_thread_info *info = osc_env_info(env); + struct cl_io *io = &info->oti_io; + osc_page_gang_cbt cb; + int res; + int result; + + io->ci_obj = cl_object_top(osc2cl(osc)); + io->ci_ignore_layout = 1; + result = cl_io_init(env, io, CIT_MISC, io->ci_obj); + if (result != 0) + goto out; + + cb = mode == CLM_READ ? check_and_discard_cb : discard_cb; + info->oti_fn_index = start; + info->oti_next_index = start; + do { + res = osc_page_gang_lookup(env, io, osc, + info->oti_next_index, end, cb, osc); + if (info->oti_next_index > end) + break; + + if (res == CLP_GANG_RESCHED) + cond_resched(); + } while (res != CLP_GANG_OKAY); +out: + cl_io_fini(env, io); return result; } diff --git a/drivers/staging/lustre/lustre/osc/osc_cl_internal.h b/drivers/staging/lustre/lustre/osc/osc_cl_internal.h index d55d04d0428b..c8c3f1ca77be 100644 --- a/drivers/staging/lustre/lustre/osc/osc_cl_internal.h +++ b/drivers/staging/lustre/lustre/osc/osc_cl_internal.h @@ -15,11 +15,7 @@ * * You should have received a copy of the GNU General Public License * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. + * http://www.gnu.org/licenses/gpl-2.0.html * * GPL HEADER END */ @@ -51,7 +47,6 @@ #include "../include/obd.h" /* osc_build_res_name() */ #include "../include/cl_object.h" -#include "../include/lclient.h" #include "osc_internal.h" /** \defgroup osc osc @@ -67,7 +62,10 @@ struct osc_io { /** super class */ struct cl_io_slice oi_cl; /** true if this io is lockless. */ - int oi_lockless; + unsigned int oi_lockless; + /** how many LRU pages are reserved for this IO */ + int oi_lru_reserved; + /** active extents, we know how many bytes is going to be written, * so having an active extent will prevent it from being fragmented */ @@ -77,6 +75,8 @@ struct osc_io { */ struct osc_extent *oi_trunc; + /** write osc_lock for this IO, used by osc_extent_find(). */ + struct osc_lock *oi_write_osclock; struct obd_info oi_info; struct obdo oi_oa; struct osc_async_cbargs { @@ -100,7 +100,7 @@ struct osc_session { struct osc_io os_io; }; -#define OTI_PVEC_SIZE 64 +#define OTI_PVEC_SIZE 256 struct osc_thread_info { struct ldlm_res_id oti_resname; ldlm_policy_data_t oti_policy; @@ -109,7 +109,13 @@ struct osc_thread_info { struct lustre_handle oti_handle; struct cl_page_list oti_plist; struct cl_io oti_io; - struct cl_page *oti_pvec[OTI_PVEC_SIZE]; + void *oti_pvec[OTI_PVEC_SIZE]; + /** + * Fields used by cl_lock_discard_pages(). + */ + pgoff_t oti_next_index; + pgoff_t oti_fn_index; /* first non-overlapped index */ + struct cl_sync_io oti_anchor; }; struct osc_object { @@ -125,7 +131,7 @@ struct osc_object { */ struct list_head oo_inflight[CRT_NR]; /** - * Lock, protecting ccc_object::cob_inflight, because a seat-belt is + * Lock, protecting osc_page::ops_inflight, because a seat-belt is * locked during take-off and landing. */ spinlock_t oo_seatbelt; @@ -159,6 +165,17 @@ struct osc_object { * oo_{read|write}_pages soon. */ spinlock_t oo_lock; + + /** + * Radix tree for caching pages + */ + struct radix_tree_root oo_tree; + spinlock_t oo_tree_lock; + unsigned long oo_npages; + + /* Protect osc_lock this osc_object has */ + spinlock_t oo_ol_spin; + struct list_head oo_ol_list; }; static inline void osc_object_lock(struct osc_object *obj) @@ -198,8 +215,6 @@ enum osc_lock_state { OLS_ENQUEUED, OLS_UPCALL_RECEIVED, OLS_GRANTED, - OLS_RELEASED, - OLS_BLOCKED, OLS_CANCELLED }; @@ -208,10 +223,8 @@ enum osc_lock_state { * * Interaction with DLM. * - * CLIO enqueues all DLM locks through ptlrpcd (that is, in "async" mode). - * * Once receive upcall is invoked, osc_lock remembers a handle of DLM lock in - * osc_lock::ols_handle and a pointer to that lock in osc_lock::ols_lock. + * osc_lock::ols_handle and a pointer to that lock in osc_lock::ols_dlmlock. * * This pointer is protected through a reference, acquired by * osc_lock_upcall0(). Also, an additional reference is acquired by @@ -249,26 +262,27 @@ enum osc_lock_state { */ struct osc_lock { struct cl_lock_slice ols_cl; + /** Internal lock to protect states, etc. */ + spinlock_t ols_lock; + /** Owner sleeps on this channel for state change */ + struct cl_sync_io *ols_owner; + /** waiting list for this lock to be cancelled */ + struct list_head ols_waiting_list; + /** wait entry of ols_waiting_list */ + struct list_head ols_wait_entry; + /** list entry for osc_object::oo_ol_list */ + struct list_head ols_nextlock_oscobj; + /** underlying DLM lock */ - struct ldlm_lock *ols_lock; - /** lock value block */ - struct ost_lvb ols_lvb; + struct ldlm_lock *ols_dlmlock; /** DLM flags with which osc_lock::ols_lock was enqueued */ __u64 ols_flags; /** osc_lock::ols_lock handle */ struct lustre_handle ols_handle; struct ldlm_enqueue_info ols_einfo; enum osc_lock_state ols_state; - - /** - * How many pages are using this lock for io, currently only used by - * read-ahead. If non-zero, the underlying dlm lock won't be cancelled - * during recovery to avoid deadlock. see bz16774. - * - * \see osc_page::ops_lock - * \see osc_page_addref_lock(), osc_page_putref_lock() - */ - atomic_t ols_pageref; + /** lock value block */ + struct ost_lvb ols_lvb; /** * true, if ldlm_lock_addref() was called against @@ -299,16 +313,6 @@ struct osc_lock { */ ols_locklessable:1, /** - * set by osc_lock_use() to wait until blocking AST enters into - * osc_ldlm_blocking_ast0(), so that cl_lock mutex can be used for - * further synchronization. - */ - ols_ast_wait:1, - /** - * If the data of this lock has been flushed to server side. - */ - ols_flush:1, - /** * if set, the osc_lock is a glimpse lock. For glimpse locks, we treat * the EVAVAIL error as tolerable, this will make upper logic happy * to wait all glimpse locks to each OSTs to be completed. @@ -321,15 +325,6 @@ struct osc_lock { * For async glimpse lock. */ ols_agl:1; - /** - * IO that owns this lock. This field is used for a dead-lock - * avoidance by osc_lock_enqueue_wait(). - * - * XXX: unfortunately, the owner of a osc_lock is not unique, - * the lock may have multiple users, if the lock is granted and - * then matched. - */ - struct osc_io *ols_owner; }; /** @@ -357,11 +352,6 @@ struct osc_page { */ unsigned ops_transfer_pinned:1, /** - * True for a `temporary page' created by read-ahead code, probably - * outside of any DLM lock. - */ - ops_temp:1, - /** * in LRU? */ ops_in_lru:1, @@ -369,18 +359,15 @@ struct osc_page { * Set if the page must be transferred with OBD_BRW_SRVLOCK. */ ops_srvlock:1; - union { - /** - * lru page list. ops_inflight and ops_lru are exclusive so - * that they can share the same data. - */ - struct list_head ops_lru; - /** - * Linkage into a per-osc_object list of pages in flight. For - * debugging. - */ - struct list_head ops_inflight; - }; + /** + * lru page list. See osc_lru_{del|use}() in osc_page.c for usage. + */ + struct list_head ops_lru; + /** + * Linkage into a per-osc_object list of pages in flight. For + * debugging. + */ + struct list_head ops_inflight; /** * Thread that submitted this page for transfer. For debugging. */ @@ -389,16 +376,6 @@ struct osc_page { * Submit time - the time when the page is starting RPC. For debugging. */ unsigned long ops_submit_time; - - /** - * A lock of which we hold a reference covers this page. Only used by - * read-ahead: for a readahead page, we hold it's covering lock to - * prevent it from being canceled during recovery. - * - * \see osc_lock::ols_pageref - * \see osc_page_addref_lock(), osc_page_putref_lock(). - */ - struct cl_lock *ops_lock; }; extern struct kmem_cache *osc_lock_kmem; @@ -417,21 +394,22 @@ extern struct lu_context_key osc_session_key; int osc_lock_init(const struct lu_env *env, struct cl_object *obj, struct cl_lock *lock, const struct cl_io *io); -int osc_io_init (const struct lu_env *env, - struct cl_object *obj, struct cl_io *io); -int osc_req_init (const struct lu_env *env, struct cl_device *dev, - struct cl_req *req); +int osc_io_init(const struct lu_env *env, + struct cl_object *obj, struct cl_io *io); +int osc_req_init(const struct lu_env *env, struct cl_device *dev, + struct cl_req *req); struct lu_object *osc_object_alloc(const struct lu_env *env, const struct lu_object_header *hdr, struct lu_device *dev); int osc_page_init(const struct lu_env *env, struct cl_object *obj, - struct cl_page *page, struct page *vmpage); + struct cl_page *page, pgoff_t ind); -void osc_index2policy (ldlm_policy_data_t *policy, const struct cl_object *obj, - pgoff_t start, pgoff_t end); -int osc_lvb_print (const struct lu_env *env, void *cookie, - lu_printer_t p, const struct ost_lvb *lvb); +void osc_index2policy(ldlm_policy_data_t *policy, const struct cl_object *obj, + pgoff_t start, pgoff_t end); +int osc_lvb_print(const struct lu_env *env, void *cookie, + lu_printer_t p, const struct ost_lvb *lvb); +void osc_lru_add_batch(struct client_obd *cli, struct list_head *list); void osc_page_submit(const struct lu_env *env, struct osc_page *opg, enum cl_req_type crt, int brw_flags); int osc_cancel_async_page(const struct lu_env *env, struct osc_page *ops); @@ -441,6 +419,8 @@ int osc_prep_async_page(struct osc_object *osc, struct osc_page *ops, struct page *page, loff_t offset); int osc_queue_async_io(const struct lu_env *env, struct cl_io *io, struct osc_page *ops); +int osc_page_cache_add(const struct lu_env *env, + const struct cl_page_slice *slice, struct cl_io *io); int osc_teardown_async_page(const struct lu_env *env, struct osc_object *obj, struct osc_page *ops); int osc_flush_async_page(const struct lu_env *env, struct cl_io *io, @@ -457,12 +437,13 @@ int osc_cache_wait_range(const struct lu_env *env, struct osc_object *obj, pgoff_t start, pgoff_t end); void osc_io_unplug(const struct lu_env *env, struct client_obd *cli, struct osc_object *osc); +int lru_queue_work(const struct lu_env *env, void *data); -void osc_object_set_contended (struct osc_object *obj); +void osc_object_set_contended(struct osc_object *obj); void osc_object_clear_contended(struct osc_object *obj); -int osc_object_is_contended (struct osc_object *obj); +int osc_object_is_contended(struct osc_object *obj); -int osc_lock_is_lockless (const struct osc_lock *olck); +int osc_lock_is_lockless(const struct osc_lock *olck); /***************************************************************************** * @@ -558,6 +539,11 @@ static inline struct osc_page *oap2osc(struct osc_async_page *oap) return container_of0(oap, struct osc_page, ops_oap); } +static inline pgoff_t osc_index(struct osc_page *opg) +{ + return opg->ops_cl.cpl_index; +} + static inline struct cl_page *oap2cl_page(struct osc_async_page *oap) { return oap2osc(oap)->ops_cl.cpl_page; @@ -608,7 +594,7 @@ enum osc_extent_state { * * LOCKING ORDER * ============= - * page lock -> client_obd_list_lock -> object lock(osc_object::oo_lock) + * page lock -> cl_loi_list_lock -> object lock(osc_object::oo_lock) */ struct osc_extent { /** red-black tree node */ @@ -627,6 +613,8 @@ struct osc_extent { unsigned int oe_intree:1, /** 0 is write, 1 is read */ oe_rw:1, + /** sync extent, queued by osc_queue_sync_pages() */ + oe_sync:1, oe_srvlock:1, oe_memalloc:1, /** an ACTIVE extent is going to be truncated, so when this extent @@ -675,7 +663,7 @@ struct osc_extent { */ wait_queue_head_t oe_waitq; /** lock covering this extent */ - struct cl_lock *oe_osclock; + struct ldlm_lock *oe_dlmlock; /** terminator of this extent. Must be true if this extent is in IO. */ struct task_struct *oe_owner; /** return value of writeback. If somebody is waiting for this extent, @@ -690,6 +678,14 @@ int osc_extent_finish(const struct lu_env *env, struct osc_extent *ext, int sent, int rc); void osc_extent_release(const struct lu_env *env, struct osc_extent *ext); +int osc_lock_discard_pages(const struct lu_env *env, struct osc_object *osc, + pgoff_t start, pgoff_t end, enum cl_lock_mode mode); + +typedef int (*osc_page_gang_cbt)(const struct lu_env *, struct cl_io *, + struct osc_page *, void *); +int osc_page_gang_lookup(const struct lu_env *env, struct cl_io *io, + struct osc_object *osc, pgoff_t start, pgoff_t end, + osc_page_gang_cbt cb, void *cbdata); /** @} osc */ #endif /* OSC_CL_INTERNAL_H */ diff --git a/drivers/staging/lustre/lustre/osc/osc_dev.c b/drivers/staging/lustre/lustre/osc/osc_dev.c index d4fe507f165f..83d30c135ba4 100644 --- a/drivers/staging/lustre/lustre/osc/osc_dev.c +++ b/drivers/staging/lustre/lustre/osc/osc_dev.c @@ -15,11 +15,7 @@ * * You should have received a copy of the GNU General Public License * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. + * http://www.gnu.org/licenses/gpl-2.0.html * * GPL HEADER END */ diff --git a/drivers/staging/lustre/lustre/osc/osc_internal.h b/drivers/staging/lustre/lustre/osc/osc_internal.h index ea695c2099ee..7a27f0961955 100644 --- a/drivers/staging/lustre/lustre/osc/osc_internal.h +++ b/drivers/staging/lustre/lustre/osc/osc_internal.h @@ -15,11 +15,7 @@ * * You should have received a copy of the GNU General Public License * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. + * http://www.gnu.org/licenses/gpl-2.0.html * * GPL HEADER END */ @@ -83,6 +79,12 @@ struct osc_async_page { #define oap_count oap_brw_page.count #define oap_brw_flags oap_brw_page.flag +static inline struct osc_async_page *brw_page2oap(struct brw_page *pga) +{ + return (struct osc_async_page *)container_of(pga, struct osc_async_page, + oap_brw_page); +} + struct osc_cache_waiter { struct list_head ocw_entry; wait_queue_head_t ocw_waitq; @@ -102,12 +104,14 @@ void osc_update_next_shrink(struct client_obd *cli); extern struct ptlrpc_request_set *PTLRPCD_SET; +typedef int (*osc_enqueue_upcall_f)(void *cookie, struct lustre_handle *lockh, + int rc); + int osc_enqueue_base(struct obd_export *exp, struct ldlm_res_id *res_id, __u64 *flags, ldlm_policy_data_t *policy, struct ost_lvb *lvb, int kms_valid, - obd_enqueue_update_f upcall, + osc_enqueue_upcall_f upcall, void *cookie, struct ldlm_enqueue_info *einfo, - struct lustre_handle *lockh, struct ptlrpc_request_set *rqset, int async, int agl); int osc_cancel_base(struct lustre_handle *lockh, __u32 mode); @@ -130,9 +134,11 @@ int osc_sync_base(struct obd_export *exp, struct obd_info *oinfo, int osc_process_config_base(struct obd_device *obd, struct lustre_cfg *cfg); int osc_build_rpc(const struct lu_env *env, struct client_obd *cli, struct list_head *ext_list, int cmd); -int osc_lru_shrink(struct client_obd *cli, int target); +int osc_lru_shrink(const struct lu_env *env, struct client_obd *cli, + int target, bool force); +int osc_lru_reclaim(struct client_obd *cli); -extern spinlock_t osc_ast_guard; +unsigned long osc_ldlm_weigh_ast(struct ldlm_lock *dlmlock); int osc_setup(struct obd_device *obd, struct lustre_cfg *lcfg); @@ -173,8 +179,6 @@ static inline struct osc_device *obd2osc_dev(const struct obd_device *d) return container_of0(d->obd_lu_dev, struct osc_device, od_cl.cd_lu_dev); } -int osc_dlm_lock_pageref(struct ldlm_lock *dlm); - extern struct kmem_cache *osc_quota_kmem; struct osc_quota_info { /** linkage for quota hash table */ @@ -192,5 +196,12 @@ int osc_quotactl(struct obd_device *unused, struct obd_export *exp, int osc_quotacheck(struct obd_device *unused, struct obd_export *exp, struct obd_quotactl *oqctl); int osc_quota_poll_check(struct obd_export *exp, struct if_quotacheck *qchk); +void osc_inc_unstable_pages(struct ptlrpc_request *req); +void osc_dec_unstable_pages(struct ptlrpc_request *req); +int osc_over_unstable_soft_limit(struct client_obd *cli); + +struct ldlm_lock *osc_dlmlock_at_pgoff(const struct lu_env *env, + struct osc_object *obj, pgoff_t index, + int pending, int canceling); #endif /* OSC_INTERNAL_H */ diff --git a/drivers/staging/lustre/lustre/osc/osc_io.c b/drivers/staging/lustre/lustre/osc/osc_io.c index 6bd0a45d8b06..6e3dcd38913f 100644 --- a/drivers/staging/lustre/lustre/osc/osc_io.c +++ b/drivers/staging/lustre/lustre/osc/osc_io.c @@ -15,11 +15,7 @@ * * You should have received a copy of the GNU General Public License * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. + * http://www.gnu.org/licenses/gpl-2.0.html * * GPL HEADER END */ @@ -68,11 +64,15 @@ static struct osc_io *cl2osc_io(const struct lu_env *env, return oio; } -static struct osc_page *osc_cl_page_osc(struct cl_page *page) +static struct osc_page *osc_cl_page_osc(struct cl_page *page, + struct osc_object *osc) { const struct cl_page_slice *slice; - slice = cl_page_at(page, &osc_device_type); + if (osc) + slice = cl_object_page_slice(&osc->oo_cl, page); + else + slice = cl_page_at(page, &osc_device_type); LASSERT(slice); return cl2osc_page(slice); @@ -137,7 +137,7 @@ static int osc_io_submit(const struct lu_env *env, io = page->cp_owner; LASSERT(io); - opg = osc_cl_page_osc(page); + opg = osc_cl_page_osc(page, osc); oap = &opg->ops_oap; LASSERT(osc == oap->oap_obj); @@ -164,8 +164,10 @@ static int osc_io_submit(const struct lu_env *env, } cl_page_list_move(qout, qin, page); + spin_lock(&oap->oap_lock); oap->oap_async_flags = ASYNC_URGENT|ASYNC_READY; oap->oap_async_flags |= ASYNC_COUNT_STABLE; + spin_unlock(&oap->oap_lock); osc_page_submit(env, opg, crt, brw_flags); list_add_tail(&oap->oap_pending_item, &list); @@ -185,6 +187,13 @@ static int osc_io_submit(const struct lu_env *env, return qout->pl_nr > 0 ? 0 : result; } +/** + * This is called when a page is accessed within file in a way that creates + * new page, if one were missing (i.e., if there were a hole at that place in + * the file, or accessed page is beyond the current file size). + * + * Expand stripe KMS if necessary. + */ static void osc_page_touch_at(const struct lu_env *env, struct cl_object *obj, pgoff_t idx, unsigned to) { @@ -208,7 +217,9 @@ static void osc_page_touch_at(const struct lu_env *env, kms > loi->loi_kms ? "" : "not ", loi->loi_kms, kms, loi->loi_lvb.lvb_size); - valid = 0; + attr->cat_ctime = LTIME_S(CURRENT_TIME); + attr->cat_mtime = attr->cat_ctime; + valid = CAT_MTIME | CAT_CTIME; if (kms > loi->loi_kms) { attr->cat_kms = kms; valid |= CAT_KMS; @@ -221,91 +232,128 @@ static void osc_page_touch_at(const struct lu_env *env, cl_object_attr_unlock(obj); } -/** - * This is called when a page is accessed within file in a way that creates - * new page, if one were missing (i.e., if there were a hole at that place in - * the file, or accessed page is beyond the current file size). Examples: - * ->commit_write() and ->nopage() methods. - * - * Expand stripe KMS if necessary. - */ -static void osc_page_touch(const struct lu_env *env, - struct osc_page *opage, unsigned to) -{ - struct cl_page *page = opage->ops_cl.cpl_page; - struct cl_object *obj = opage->ops_cl.cpl_obj; - - osc_page_touch_at(env, obj, page->cp_index, to); -} - -/** - * Implements cl_io_operations::cio_prepare_write() method for osc layer. - * - * \retval -EIO transfer initiated against this osc will most likely fail - * \retval 0 transfer initiated against this osc will most likely succeed. - * - * The reason for this check is to immediately return an error to the caller - * in the case of a deactivated import. Note, that import can be deactivated - * later, while pages, dirtied by this IO, are still in the cache, but this is - * irrelevant, because that would still return an error to the application (if - * it does fsync), but many applications don't do fsync because of performance - * issues, and we wanted to return an -EIO at write time to notify the - * application. - */ -static int osc_io_prepare_write(const struct lu_env *env, - const struct cl_io_slice *ios, - const struct cl_page_slice *slice, - unsigned from, unsigned to) +static int osc_io_commit_async(const struct lu_env *env, + const struct cl_io_slice *ios, + struct cl_page_list *qin, int from, int to, + cl_commit_cbt cb) { - struct osc_device *dev = lu2osc_dev(slice->cpl_obj->co_lu.lo_dev); - struct obd_import *imp = class_exp2cliimp(dev->od_exp); + struct cl_io *io = ios->cis_io; struct osc_io *oio = cl2osc_io(env, ios); + struct osc_object *osc = cl2osc(ios->cis_obj); + struct cl_page *page; + struct cl_page *last_page; + struct osc_page *opg; int result = 0; - /* - * This implements OBD_BRW_CHECK logic from old client. - */ + LASSERT(qin->pl_nr > 0); + + /* Handle partial page cases */ + last_page = cl_page_list_last(qin); + if (oio->oi_lockless) { + page = cl_page_list_first(qin); + if (page == last_page) { + cl_page_clip(env, page, from, to); + } else { + if (from != 0) + cl_page_clip(env, page, from, PAGE_SIZE); + if (to != PAGE_SIZE) + cl_page_clip(env, last_page, 0, to); + } + } + + while (qin->pl_nr > 0) { + struct osc_async_page *oap; - if (!imp || imp->imp_invalid) - result = -EIO; - if (result == 0 && oio->oi_lockless) - /* this page contains `invalid' data, but who cares? - * nobody can access the invalid data. - * in osc_io_commit_write(), we're going to write exact - * [from, to) bytes of this page to OST. -jay + page = cl_page_list_first(qin); + opg = osc_cl_page_osc(page, osc); + oap = &opg->ops_oap; + + if (!list_empty(&oap->oap_rpc_item)) { + CDEBUG(D_CACHE, "Busy oap %p page %p for submit.\n", + oap, opg); + result = -EBUSY; + break; + } + + /* The page may be already in dirty cache. */ + if (list_empty(&oap->oap_pending_item)) { + result = osc_page_cache_add(env, &opg->ops_cl, io); + if (result != 0) + break; + } + + osc_page_touch_at(env, osc2cl(osc), osc_index(opg), + page == last_page ? to : PAGE_SIZE); + + cl_page_list_del(env, qin, page); + + (*cb)(env, io, page); + /* Can't access page any more. Page can be in transfer and + * complete at any time. */ - cl_page_export(env, slice->cpl_page, 1); + } + /* for sync write, kernel will wait for this page to be flushed before + * osc_io_end() is called, so release it earlier. + * for mkwrite(), it's known there is no further pages. + */ + if (cl_io_is_sync_write(io) && oio->oi_active) { + osc_extent_release(env, oio->oi_active); + oio->oi_active = NULL; + } + + CDEBUG(D_INFO, "%d %d\n", qin->pl_nr, result); return result; } -static int osc_io_commit_write(const struct lu_env *env, - const struct cl_io_slice *ios, - const struct cl_page_slice *slice, - unsigned from, unsigned to) +static int osc_io_rw_iter_init(const struct lu_env *env, + const struct cl_io_slice *ios) { - struct osc_io *oio = cl2osc_io(env, ios); - struct osc_page *opg = cl2osc_page(slice); - struct osc_object *obj = cl2osc(opg->ops_cl.cpl_obj); - struct osc_async_page *oap = &opg->ops_oap; + struct cl_io *io = ios->cis_io; + struct osc_io *oio = osc_env_io(env); + struct osc_object *osc = cl2osc(ios->cis_obj); + struct client_obd *cli = osc_cli(osc); + unsigned long c; + unsigned int npages; + unsigned int max_pages; + + if (cl_io_is_append(io)) + return 0; + + npages = io->u.ci_rw.crw_count >> PAGE_SHIFT; + if (io->u.ci_rw.crw_pos & ~PAGE_MASK) + ++npages; + + max_pages = cli->cl_max_pages_per_rpc * cli->cl_max_rpcs_in_flight; + if (npages > max_pages) + npages = max_pages; + + c = atomic_read(cli->cl_lru_left); + if (c < npages && osc_lru_reclaim(cli) > 0) + c = atomic_read(cli->cl_lru_left); + while (c >= npages) { + if (c == atomic_cmpxchg(cli->cl_lru_left, c, c - npages)) { + oio->oi_lru_reserved = npages; + break; + } + c = atomic_read(cli->cl_lru_left); + } - LASSERT(to > 0); - /* - * XXX instead of calling osc_page_touch() here and in - * osc_io_fault_start() it might be more logical to introduce - * cl_page_touch() method, that generic cl_io_commit_write() and page - * fault code calls. - */ - osc_page_touch(env, cl2osc_page(slice), to); - if (!client_is_remote(osc_export(obj)) && - capable(CFS_CAP_SYS_RESOURCE)) - oap->oap_brw_flags |= OBD_BRW_NOQUOTA; + return 0; +} - if (oio->oi_lockless) - /* see osc_io_prepare_write() for lockless io handling. */ - cl_page_clip(env, slice->cpl_page, from, to); +static void osc_io_rw_iter_fini(const struct lu_env *env, + const struct cl_io_slice *ios) +{ + struct osc_io *oio = osc_env_io(env); + struct osc_object *osc = cl2osc(ios->cis_obj); + struct client_obd *cli = osc_cli(osc); - return 0; + if (oio->oi_lru_reserved > 0) { + atomic_add(oio->oi_lru_reserved, cli->cl_lru_left); + oio->oi_lru_reserved = 0; + } + oio->oi_write_osclock = NULL; } static int osc_io_fault_start(const struct lu_env *env, @@ -342,31 +390,21 @@ static int osc_async_upcall(void *a, int rc) * Checks that there are no pages being written in the extent being truncated. */ static int trunc_check_cb(const struct lu_env *env, struct cl_io *io, - struct cl_page *page, void *cbdata) + struct osc_page *ops, void *cbdata) { - const struct cl_page_slice *slice; - struct osc_page *ops; + struct cl_page *page = ops->ops_cl.cpl_page; struct osc_async_page *oap; __u64 start = *(__u64 *)cbdata; - slice = cl_page_at(page, &osc_device_type); - LASSERT(slice); - ops = cl2osc_page(slice); oap = &ops->ops_oap; - if (oap->oap_cmd & OBD_BRW_WRITE && !list_empty(&oap->oap_pending_item)) CL_PAGE_DEBUG(D_ERROR, env, page, "exists %llu/%s.\n", start, current->comm); - { - struct page *vmpage = cl_page_vmpage(env, page); - - if (PageLocked(vmpage)) - CDEBUG(D_CACHE, "page %p index %lu locked for %d.\n", - ops, page->cp_index, - (oap->oap_cmd & OBD_BRW_RWMASK)); - } + if (PageLocked(page->cp_vmpage)) + CDEBUG(D_CACHE, "page %p index %lu locked for %d.\n", + ops, osc_index(ops), oap->oap_cmd & OBD_BRW_RWMASK); return CLP_GANG_OKAY; } @@ -385,8 +423,9 @@ static void osc_trunc_check(const struct lu_env *env, struct cl_io *io, /* * Complain if there are pages in the truncated region. */ - cl_page_gang_lookup(env, clob, io, start + partial, CL_PAGE_EOF, - trunc_check_cb, (void *)&size); + osc_page_gang_lookup(env, io, cl2osc(clob), + start + partial, CL_PAGE_EOF, + trunc_check_cb, (void *)&size); } static int osc_io_setattr_start(const struct lu_env *env, @@ -416,7 +455,8 @@ static int osc_io_setattr_start(const struct lu_env *env, unsigned int cl_valid = 0; if (ia_valid & ATTR_SIZE) { - attr->cat_size = attr->cat_kms = size; + attr->cat_size = size; + attr->cat_kms = size; cl_valid = CAT_SIZE | CAT_KMS; } if (ia_valid & ATTR_MTIME_SET) { @@ -484,7 +524,8 @@ static void osc_io_setattr_end(const struct lu_env *env, if (cbargs->opc_rpc_sent) { wait_for_completion(&cbargs->opc_sync); - result = io->ci_result = cbargs->opc_rc; + result = cbargs->opc_rc; + io->ci_result = cbargs->opc_rc; } if (result == 0) { if (oio->oi_lockless) { @@ -533,7 +574,8 @@ static int osc_io_write_start(const struct lu_env *env, OBD_FAIL_TIMEOUT(OBD_FAIL_OSC_DELAY_SETTIME, 1); cl_object_attr_lock(obj); - attr->cat_mtime = attr->cat_ctime = ktime_get_real_seconds(); + attr->cat_ctime = ktime_get_real_seconds(); + attr->cat_mtime = attr->cat_ctime; rc = cl_object_attr_set(env, obj, attr, CAT_MTIME | CAT_CTIME); cl_object_attr_unlock(obj); @@ -650,6 +692,8 @@ static const struct cl_io_operations osc_io_ops = { .cio_fini = osc_io_fini }, [CIT_WRITE] = { + .cio_iter_init = osc_io_rw_iter_init, + .cio_iter_fini = osc_io_rw_iter_fini, .cio_start = osc_io_write_start, .cio_end = osc_io_end, .cio_fini = osc_io_fini @@ -672,16 +716,8 @@ static const struct cl_io_operations osc_io_ops = { .cio_fini = osc_io_fini } }, - .req_op = { - [CRT_READ] = { - .cio_submit = osc_io_submit - }, - [CRT_WRITE] = { - .cio_submit = osc_io_submit - } - }, - .cio_prepare_write = osc_io_prepare_write, - .cio_commit_write = osc_io_commit_write + .cio_submit = osc_io_submit, + .cio_commit_async = osc_io_commit_async }; /***************************************************************************** @@ -718,8 +754,7 @@ static void osc_req_attr_set(const struct lu_env *env, struct lov_oinfo *oinfo; struct cl_req *clerq; struct cl_page *apage; /* _some_ page in @clerq */ - struct cl_lock *lock; /* _some_ lock protecting @apage */ - struct osc_lock *olck; + struct ldlm_lock *lock; /* _some_ lock protecting @apage */ struct osc_page *opg; struct obdo *oa; struct ost_lvb *lvb; @@ -753,31 +788,32 @@ static void osc_req_attr_set(const struct lu_env *env, LASSERT(!list_empty(&clerq->crq_pages)); apage = container_of(clerq->crq_pages.next, struct cl_page, cp_flight); - opg = osc_cl_page_osc(apage); - apage = opg->ops_cl.cpl_page; /* now apage is a sub-page */ - lock = cl_lock_at_page(env, apage->cp_obj, apage, NULL, 1, 1); - if (!lock) { - struct cl_object_header *head; - struct cl_lock *scan; - - head = cl_object_header(apage->cp_obj); - list_for_each_entry(scan, &head->coh_locks, cll_linkage) - CL_LOCK_DEBUG(D_ERROR, env, scan, - "no cover page!\n"); - CL_PAGE_DEBUG(D_ERROR, env, apage, - "dump uncover page!\n"); + opg = osc_cl_page_osc(apage, NULL); + lock = osc_dlmlock_at_pgoff(env, cl2osc(obj), osc_index(opg), + 1, 1); + if (!lock && !opg->ops_srvlock) { + struct ldlm_resource *res; + struct ldlm_res_id *resname; + + CL_PAGE_DEBUG(D_ERROR, env, apage, "uncovered page!\n"); + + resname = &osc_env_info(env)->oti_resname; + ostid_build_res_name(&oinfo->loi_oi, resname); + res = ldlm_resource_get( + osc_export(cl2osc(obj))->exp_obd->obd_namespace, + NULL, resname, LDLM_EXTENT, 0); + ldlm_resource_dump(D_ERROR, res); + dump_stack(); LBUG(); } - olck = osc_lock_at(lock); - LASSERT(ergo(opg->ops_srvlock, !olck->ols_lock)); /* check for lockless io. */ - if (olck->ols_lock) { - oa->o_handle = olck->ols_lock->l_remote_handle; + if (lock) { + oa->o_handle = lock->l_remote_handle; oa->o_valid |= OBD_MD_FLHANDLE; + LDLM_LOCK_PUT(lock); } - cl_lock_put(env, lock); } } @@ -807,8 +843,9 @@ int osc_req_init(const struct lu_env *env, struct cl_device *dev, if (or) { cl_req_slice_add(req, &or->or_cl, dev, &osc_req_ops); result = 0; - } else + } else { result = -ENOMEM; + } return result; } diff --git a/drivers/staging/lustre/lustre/osc/osc_lock.c b/drivers/staging/lustre/lustre/osc/osc_lock.c index 013df9787f3e..717d3ffb6789 100644 --- a/drivers/staging/lustre/lustre/osc/osc_lock.c +++ b/drivers/staging/lustre/lustre/osc/osc_lock.c @@ -15,11 +15,7 @@ * * You should have received a copy of the GNU General Public License * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. + * http://www.gnu.org/licenses/gpl-2.0.html * * GPL HEADER END */ @@ -36,6 +32,7 @@ * Implementation of cl_lock for OSC layer. * * Author: Nikita Danilov <nikita.danilov@sun.com> + * Author: Jinshan Xiong <jinshan.xiong@intel.com> */ #define DEBUG_SUBSYSTEM S_OSC @@ -50,8 +47,6 @@ * @{ */ -#define _PAGEREF_MAGIC (-10000000) - /***************************************************************************** * * Type conversions. @@ -62,7 +57,6 @@ static const struct cl_lock_operations osc_lock_ops; static const struct cl_lock_operations osc_lock_lockless_ops; static void osc_lock_to_lockless(const struct lu_env *env, struct osc_lock *ols, int force); -static int osc_lock_has_pages(struct osc_lock *olck); int osc_lock_is_lockless(const struct osc_lock *olck) { @@ -90,11 +84,11 @@ static struct ldlm_lock *osc_handle_ptr(struct lustre_handle *handle) static int osc_lock_invariant(struct osc_lock *ols) { struct ldlm_lock *lock = osc_handle_ptr(&ols->ols_handle); - struct ldlm_lock *olock = ols->ols_lock; + struct ldlm_lock *olock = ols->ols_dlmlock; int handle_used = lustre_handle_is_used(&ols->ols_handle); if (ergo(osc_lock_is_lockless(ols), - ols->ols_locklessable && !ols->ols_lock)) + ols->ols_locklessable && !ols->ols_dlmlock)) return 1; /* @@ -111,7 +105,7 @@ static int osc_lock_invariant(struct osc_lock *ols) ergo(!lock, !olock))) return 0; /* - * Check that ->ols_handle and ->ols_lock are consistent, but + * Check that ->ols_handle and ->ols_dlmlock are consistent, but * take into account that they are set at the different time. */ if (!ergo(ols->ols_state == OLS_CANCELLED, @@ -122,7 +116,7 @@ static int osc_lock_invariant(struct osc_lock *ols) * ast. */ if (!ergo(olock && ols->ols_state < OLS_CANCELLED, - ((olock->l_flags & LDLM_FL_DESTROYED) == 0))) + !ldlm_is_destroyed(olock))) return 0; if (!ergo(ols->ols_state == OLS_GRANTED, @@ -138,117 +132,13 @@ static int osc_lock_invariant(struct osc_lock *ols) * */ -/** - * Breaks a link between osc_lock and dlm_lock. - */ -static void osc_lock_detach(const struct lu_env *env, struct osc_lock *olck) -{ - struct ldlm_lock *dlmlock; - - spin_lock(&osc_ast_guard); - dlmlock = olck->ols_lock; - if (!dlmlock) { - spin_unlock(&osc_ast_guard); - return; - } - - olck->ols_lock = NULL; - /* wb(); --- for all who checks (ols->ols_lock != NULL) before - * call to osc_lock_detach() - */ - dlmlock->l_ast_data = NULL; - olck->ols_handle.cookie = 0ULL; - spin_unlock(&osc_ast_guard); - - lock_res_and_lock(dlmlock); - if (dlmlock->l_granted_mode == dlmlock->l_req_mode) { - struct cl_object *obj = olck->ols_cl.cls_obj; - struct cl_attr *attr = &osc_env_info(env)->oti_attr; - __u64 old_kms; - - cl_object_attr_lock(obj); - /* Must get the value under the lock to avoid possible races. */ - old_kms = cl2osc(obj)->oo_oinfo->loi_kms; - /* Update the kms. Need to loop all granted locks. - * Not a problem for the client - */ - attr->cat_kms = ldlm_extent_shift_kms(dlmlock, old_kms); - - cl_object_attr_set(env, obj, attr, CAT_KMS); - cl_object_attr_unlock(obj); - } - unlock_res_and_lock(dlmlock); - - /* release a reference taken in osc_lock_upcall0(). */ - LASSERT(olck->ols_has_ref); - lu_ref_del(&dlmlock->l_reference, "osc_lock", olck); - LDLM_LOCK_RELEASE(dlmlock); - olck->ols_has_ref = 0; -} - -static int osc_lock_unhold(struct osc_lock *ols) -{ - int result = 0; - - if (ols->ols_hold) { - ols->ols_hold = 0; - result = osc_cancel_base(&ols->ols_handle, - ols->ols_einfo.ei_mode); - } - return result; -} - -static int osc_lock_unuse(const struct lu_env *env, - const struct cl_lock_slice *slice) -{ - struct osc_lock *ols = cl2osc_lock(slice); - - LINVRNT(osc_lock_invariant(ols)); - - switch (ols->ols_state) { - case OLS_NEW: - LASSERT(!ols->ols_hold); - LASSERT(ols->ols_agl); - return 0; - case OLS_UPCALL_RECEIVED: - osc_lock_unhold(ols); - case OLS_ENQUEUED: - LASSERT(!ols->ols_hold); - osc_lock_detach(env, ols); - ols->ols_state = OLS_NEW; - return 0; - case OLS_GRANTED: - LASSERT(!ols->ols_glimpse); - LASSERT(ols->ols_hold); - /* - * Move lock into OLS_RELEASED state before calling - * osc_cancel_base() so that possible synchronous cancellation - * sees that lock is released. - */ - ols->ols_state = OLS_RELEASED; - return osc_lock_unhold(ols); - default: - CERROR("Impossible state: %d\n", ols->ols_state); - LBUG(); - } -} - static void osc_lock_fini(const struct lu_env *env, struct cl_lock_slice *slice) { struct osc_lock *ols = cl2osc_lock(slice); LINVRNT(osc_lock_invariant(ols)); - /* - * ->ols_hold can still be true at this point if, for example, a - * thread that requested a lock was killed (and released a reference - * to the lock), before reply from a server was received. In this case - * lock is destroyed immediately after upcall. - */ - osc_lock_unhold(ols); - LASSERT(!ols->ols_lock); - LASSERT(atomic_read(&ols->ols_pageref) == 0 || - atomic_read(&ols->ols_pageref) == _PAGEREF_MAGIC); + LASSERT(!ols->ols_dlmlock); kmem_cache_free(osc_lock_kmem, ols); } @@ -275,55 +165,12 @@ static __u64 osc_enq2ldlm_flags(__u32 enqflags) result |= LDLM_FL_HAS_INTENT; if (enqflags & CEF_DISCARD_DATA) result |= LDLM_FL_AST_DISCARD_DATA; + if (enqflags & CEF_PEEK) + result |= LDLM_FL_TEST_LOCK; return result; } /** - * Global spin-lock protecting consistency of ldlm_lock::l_ast_data - * pointers. Initialized in osc_init(). - */ -spinlock_t osc_ast_guard; - -static struct osc_lock *osc_ast_data_get(struct ldlm_lock *dlm_lock) -{ - struct osc_lock *olck; - - lock_res_and_lock(dlm_lock); - spin_lock(&osc_ast_guard); - olck = dlm_lock->l_ast_data; - if (olck) { - struct cl_lock *lock = olck->ols_cl.cls_lock; - /* - * If osc_lock holds a reference on ldlm lock, return it even - * when cl_lock is in CLS_FREEING state. This way - * - * osc_ast_data_get(dlmlock) == NULL - * - * guarantees that all osc references on dlmlock were - * released. osc_dlm_blocking_ast0() relies on that. - */ - if (lock->cll_state < CLS_FREEING || olck->ols_has_ref) { - cl_lock_get_trust(lock); - lu_ref_add_atomic(&lock->cll_reference, - "ast", current); - } else - olck = NULL; - } - spin_unlock(&osc_ast_guard); - unlock_res_and_lock(dlm_lock); - return olck; -} - -static void osc_ast_data_put(const struct lu_env *env, struct osc_lock *olck) -{ - struct cl_lock *lock; - - lock = olck->ols_cl.cls_lock; - lu_ref_del(&lock->cll_reference, "ast", current); - cl_lock_put(env, lock); -} - -/** * Updates object attributes from a lock value block (lvb) received together * with the DLM lock reply from the server. Copy of osc_update_enqueue() * logic. @@ -333,35 +180,30 @@ static void osc_ast_data_put(const struct lu_env *env, struct osc_lock *olck) * * Called under lock and resource spin-locks. */ -static void osc_lock_lvb_update(const struct lu_env *env, struct osc_lock *olck, - int rc) +static void osc_lock_lvb_update(const struct lu_env *env, + struct osc_object *osc, + struct ldlm_lock *dlmlock, + struct ost_lvb *lvb) { - struct ost_lvb *lvb; - struct cl_object *obj; - struct lov_oinfo *oinfo; - struct cl_attr *attr; + struct cl_object *obj = osc2cl(osc); + struct lov_oinfo *oinfo = osc->oo_oinfo; + struct cl_attr *attr = &osc_env_info(env)->oti_attr; unsigned valid; - if (!(olck->ols_flags & LDLM_FL_LVB_READY)) - return; - - lvb = &olck->ols_lvb; - obj = olck->ols_cl.cls_obj; - oinfo = cl2osc(obj)->oo_oinfo; - attr = &osc_env_info(env)->oti_attr; valid = CAT_BLOCKS | CAT_ATIME | CAT_CTIME | CAT_MTIME | CAT_SIZE; + if (!lvb) + lvb = dlmlock->l_lvb_data; + cl_lvb2attr(attr, lvb); cl_object_attr_lock(obj); - if (rc == 0) { - struct ldlm_lock *dlmlock; + if (dlmlock) { __u64 size; - dlmlock = olck->ols_lock; - - /* re-grab LVB from a dlm lock under DLM spin-locks. */ - *lvb = *(struct ost_lvb *)dlmlock->l_lvb_data; + check_res_locked(dlmlock->l_resource); + LASSERT(lvb == dlmlock->l_lvb_data); size = lvb->lvb_size; + /* Extend KMS up to the end of this lock and no further * A lock on [x,y] means a KMS of up to y + 1 bytes! */ @@ -378,102 +220,67 @@ static void osc_lock_lvb_update(const struct lu_env *env, struct osc_lock *olck, dlmlock->l_policy_data.l_extent.end); } ldlm_lock_allow_match_locked(dlmlock); - } else if (rc == -ENAVAIL && olck->ols_glimpse) { - CDEBUG(D_INODE, "glimpsed, setting rss=%llu; leaving kms=%llu\n", - lvb->lvb_size, oinfo->loi_kms); - } else - valid = 0; - - if (valid != 0) - cl_object_attr_set(env, obj, attr, valid); + } + cl_object_attr_set(env, obj, attr, valid); cl_object_attr_unlock(obj); } -/** - * Called when a lock is granted, from an upcall (when server returned a - * granted lock), or from completion AST, when server returned a blocked lock. - * - * Called under lock and resource spin-locks, that are released temporarily - * here. - */ -static void osc_lock_granted(const struct lu_env *env, struct osc_lock *olck, - struct ldlm_lock *dlmlock, int rc) +static void osc_lock_granted(const struct lu_env *env, struct osc_lock *oscl, + struct lustre_handle *lockh, bool lvb_update) { - struct ldlm_extent *ext; - struct cl_lock *lock; - struct cl_lock_descr *descr; + struct ldlm_lock *dlmlock; - LASSERT(dlmlock->l_granted_mode == dlmlock->l_req_mode); + dlmlock = ldlm_handle2lock_long(lockh, 0); + LASSERT(dlmlock); - if (olck->ols_state < OLS_GRANTED) { - lock = olck->ols_cl.cls_lock; - ext = &dlmlock->l_policy_data.l_extent; - descr = &osc_env_info(env)->oti_descr; - descr->cld_obj = lock->cll_descr.cld_obj; + /* lock reference taken by ldlm_handle2lock_long() is + * owned by osc_lock and released in osc_lock_detach() + */ + lu_ref_add(&dlmlock->l_reference, "osc_lock", oscl); + oscl->ols_has_ref = 1; - /* XXX check that ->l_granted_mode is valid. */ - descr->cld_mode = osc_ldlm2cl_lock(dlmlock->l_granted_mode); - descr->cld_start = cl_index(descr->cld_obj, ext->start); - descr->cld_end = cl_index(descr->cld_obj, ext->end); - descr->cld_gid = ext->gid; - /* - * tell upper layers the extent of the lock that was actually - * granted - */ - olck->ols_state = OLS_GRANTED; - osc_lock_lvb_update(env, olck, rc); - - /* release DLM spin-locks to allow cl_lock_{modify,signal}() - * to take a semaphore on a parent lock. This is safe, because - * spin-locks are needed to protect consistency of - * dlmlock->l_*_mode and LVB, and we have finished processing - * them. + LASSERT(!oscl->ols_dlmlock); + oscl->ols_dlmlock = dlmlock; + + /* This may be a matched lock for glimpse request, do not hold + * lock reference in that case. + */ + if (!oscl->ols_glimpse) { + /* hold a refc for non glimpse lock which will + * be released in osc_lock_cancel() */ - unlock_res_and_lock(dlmlock); - cl_lock_modify(env, lock, descr); - cl_lock_signal(env, lock); - LINVRNT(osc_lock_invariant(olck)); - lock_res_and_lock(dlmlock); + lustre_handle_copy(&oscl->ols_handle, lockh); + ldlm_lock_addref(lockh, oscl->ols_einfo.ei_mode); + oscl->ols_hold = 1; } -} - -static void osc_lock_upcall0(const struct lu_env *env, struct osc_lock *olck) - -{ - struct ldlm_lock *dlmlock; - - dlmlock = ldlm_handle2lock_long(&olck->ols_handle, 0); - LASSERT(dlmlock); + /* Lock must have been granted. */ lock_res_and_lock(dlmlock); - spin_lock(&osc_ast_guard); - LASSERT(dlmlock->l_ast_data == olck); - LASSERT(!olck->ols_lock); - olck->ols_lock = dlmlock; - spin_unlock(&osc_ast_guard); + if (dlmlock->l_granted_mode == dlmlock->l_req_mode) { + struct ldlm_extent *ext = &dlmlock->l_policy_data.l_extent; + struct cl_lock_descr *descr = &oscl->ols_cl.cls_lock->cll_descr; - /* - * Lock might be not yet granted. In this case, completion ast - * (osc_ldlm_completion_ast()) comes later and finishes lock - * granting. - */ - if (dlmlock->l_granted_mode == dlmlock->l_req_mode) - osc_lock_granted(env, olck, dlmlock, 0); - unlock_res_and_lock(dlmlock); + /* extend the lock extent, otherwise it will have problem when + * we decide whether to grant a lockless lock. + */ + descr->cld_mode = osc_ldlm2cl_lock(dlmlock->l_granted_mode); + descr->cld_start = cl_index(descr->cld_obj, ext->start); + descr->cld_end = cl_index(descr->cld_obj, ext->end); + descr->cld_gid = ext->gid; - /* - * osc_enqueue_interpret() decrefs asynchronous locks, counter - * this. - */ - ldlm_lock_addref(&olck->ols_handle, olck->ols_einfo.ei_mode); - olck->ols_hold = 1; + /* no lvb update for matched lock */ + if (lvb_update) { + LASSERT(oscl->ols_flags & LDLM_FL_LVB_READY); + osc_lock_lvb_update(env, cl2osc(oscl->ols_cl.cls_obj), + dlmlock, NULL); + } + LINVRNT(osc_lock_invariant(oscl)); + } + unlock_res_and_lock(dlmlock); - /* lock reference taken by ldlm_handle2lock_long() is owned by - * osc_lock and released in osc_lock_detach() - */ - lu_ref_add(&dlmlock->l_reference, "osc_lock", olck); - olck->ols_has_ref = 1; + LASSERT(oscl->ols_state != OLS_GRANTED); + oscl->ols_state = OLS_GRANTED; } /** @@ -481,143 +288,124 @@ static void osc_lock_upcall0(const struct lu_env *env, struct osc_lock *olck) * received from a server, or after osc_enqueue_base() matched a local DLM * lock. */ -static int osc_lock_upcall(void *cookie, int errcode) +static int osc_lock_upcall(void *cookie, struct lustre_handle *lockh, + int errcode) { - struct osc_lock *olck = cookie; - struct cl_lock_slice *slice = &olck->ols_cl; - struct cl_lock *lock = slice->cls_lock; + struct osc_lock *oscl = cookie; + struct cl_lock_slice *slice = &oscl->ols_cl; struct lu_env *env; struct cl_env_nest nest; + int rc; env = cl_env_nested_get(&nest); - if (!IS_ERR(env)) { - int rc; + /* should never happen, similar to osc_ldlm_blocking_ast(). */ + LASSERT(!IS_ERR(env)); + + rc = ldlm_error2errno(errcode); + if (oscl->ols_state == OLS_ENQUEUED) { + oscl->ols_state = OLS_UPCALL_RECEIVED; + } else if (oscl->ols_state == OLS_CANCELLED) { + rc = -EIO; + } else { + CERROR("Impossible state: %d\n", oscl->ols_state); + LBUG(); + } - cl_lock_mutex_get(env, lock); + if (rc == 0) + osc_lock_granted(env, oscl, lockh, errcode == ELDLM_OK); - LASSERT(lock->cll_state >= CLS_QUEUING); - if (olck->ols_state == OLS_ENQUEUED) { - olck->ols_state = OLS_UPCALL_RECEIVED; - rc = ldlm_error2errno(errcode); - } else if (olck->ols_state == OLS_CANCELLED) { - rc = -EIO; - } else { - CERROR("Impossible state: %d\n", olck->ols_state); - LBUG(); - } - if (rc) { - struct ldlm_lock *dlmlock; - - dlmlock = ldlm_handle2lock(&olck->ols_handle); - if (dlmlock) { - lock_res_and_lock(dlmlock); - spin_lock(&osc_ast_guard); - LASSERT(!olck->ols_lock); - dlmlock->l_ast_data = NULL; - olck->ols_handle.cookie = 0ULL; - spin_unlock(&osc_ast_guard); - ldlm_lock_fail_match_locked(dlmlock); - unlock_res_and_lock(dlmlock); - LDLM_LOCK_PUT(dlmlock); - } - } else { - if (olck->ols_glimpse) - olck->ols_glimpse = 0; - osc_lock_upcall0(env, olck); - } + /* Error handling, some errors are tolerable. */ + if (oscl->ols_locklessable && rc == -EUSERS) { + /* This is a tolerable error, turn this lock into + * lockless lock. + */ + osc_object_set_contended(cl2osc(slice->cls_obj)); + LASSERT(slice->cls_ops == &osc_lock_ops); + + /* Change this lock to ldlmlock-less lock. */ + osc_lock_to_lockless(env, oscl, 1); + oscl->ols_state = OLS_GRANTED; + rc = 0; + } else if (oscl->ols_glimpse && rc == -ENAVAIL) { + LASSERT(oscl->ols_flags & LDLM_FL_LVB_READY); + osc_lock_lvb_update(env, cl2osc(slice->cls_obj), + NULL, &oscl->ols_lvb); + /* Hide the error. */ + rc = 0; + } - /* Error handling, some errors are tolerable. */ - if (olck->ols_locklessable && rc == -EUSERS) { - /* This is a tolerable error, turn this lock into - * lockless lock. - */ - osc_object_set_contended(cl2osc(slice->cls_obj)); - LASSERT(slice->cls_ops == &osc_lock_ops); + if (oscl->ols_owner) + cl_sync_io_note(env, oscl->ols_owner, rc); + cl_env_nested_put(&nest, env); - /* Change this lock to ldlmlock-less lock. */ - osc_lock_to_lockless(env, olck, 1); - olck->ols_state = OLS_GRANTED; - rc = 0; - } else if (olck->ols_glimpse && rc == -ENAVAIL) { - osc_lock_lvb_update(env, olck, rc); - cl_lock_delete(env, lock); - /* Hide the error. */ - rc = 0; - } - - if (rc == 0) { - /* For AGL case, the RPC sponsor may exits the cl_lock - * processing without wait() called before related OSC - * lock upcall(). So update the lock status according - * to the enqueue result inside AGL upcall(). - */ - if (olck->ols_agl) { - lock->cll_flags |= CLF_FROM_UPCALL; - cl_wait_try(env, lock); - lock->cll_flags &= ~CLF_FROM_UPCALL; - if (!olck->ols_glimpse) - olck->ols_agl = 0; - } - cl_lock_signal(env, lock); - /* del user for lock upcall cookie */ - cl_unuse_try(env, lock); - } else { - /* del user for lock upcall cookie */ - cl_lock_user_del(env, lock); - cl_lock_error(env, lock, rc); - } + return rc; +} - /* release cookie reference, acquired by osc_lock_enqueue() */ - cl_lock_hold_release(env, lock, "upcall", lock); - cl_lock_mutex_put(env, lock); +static int osc_lock_upcall_agl(void *cookie, struct lustre_handle *lockh, + int errcode) +{ + struct osc_object *osc = cookie; + struct ldlm_lock *dlmlock; + struct lu_env *env; + struct cl_env_nest nest; - lu_ref_del(&lock->cll_reference, "upcall", lock); - /* This maybe the last reference, so must be called after - * cl_lock_mutex_put(). - */ - cl_lock_put(env, lock); + env = cl_env_nested_get(&nest); + LASSERT(!IS_ERR(env)); - cl_env_nested_put(&nest, env); - } else { - /* should never happen, similar to osc_ldlm_blocking_ast(). */ - LBUG(); + if (errcode == ELDLM_LOCK_MATCHED) { + errcode = ELDLM_OK; + goto out; } - return errcode; + + if (errcode != ELDLM_OK) + goto out; + + dlmlock = ldlm_handle2lock(lockh); + LASSERT(dlmlock); + + lock_res_and_lock(dlmlock); + LASSERT(dlmlock->l_granted_mode == dlmlock->l_req_mode); + + /* there is no osc_lock associated with AGL lock */ + osc_lock_lvb_update(env, osc, dlmlock, NULL); + + unlock_res_and_lock(dlmlock); + LDLM_LOCK_PUT(dlmlock); + +out: + cl_object_put(env, osc2cl(osc)); + cl_env_nested_put(&nest, env); + return ldlm_error2errno(errcode); } -/** - * Core of osc_dlm_blocking_ast() logic. - */ -static void osc_lock_blocking(const struct lu_env *env, - struct ldlm_lock *dlmlock, - struct osc_lock *olck, int blocking) +static int osc_lock_flush(struct osc_object *obj, pgoff_t start, pgoff_t end, + enum cl_lock_mode mode, int discard) { - struct cl_lock *lock = olck->ols_cl.cls_lock; + struct lu_env *env; + struct cl_env_nest nest; + int rc = 0; + int rc2 = 0; - LASSERT(olck->ols_lock == dlmlock); - CLASSERT(OLS_BLOCKED < OLS_CANCELLED); - LASSERT(!osc_lock_is_lockless(olck)); + env = cl_env_nested_get(&nest); + if (IS_ERR(env)) + return PTR_ERR(env); + + if (mode == CLM_WRITE) { + rc = osc_cache_writeback_range(env, obj, start, end, 1, + discard); + CDEBUG(D_CACHE, "object %p: [%lu -> %lu] %d pages were %s.\n", + obj, start, end, rc, + discard ? "discarded" : "written back"); + if (rc > 0) + rc = 0; + } - /* - * Lock might be still addref-ed here, if e.g., blocking ast - * is sent for a failed lock. - */ - osc_lock_unhold(olck); + rc2 = osc_lock_discard_pages(env, obj, start, end, mode); + if (rc == 0 && rc2 < 0) + rc = rc2; - if (blocking && olck->ols_state < OLS_BLOCKED) - /* - * Move osc_lock into OLS_BLOCKED before canceling the lock, - * because it recursively re-enters osc_lock_blocking(), with - * the state set to OLS_CANCELLED. - */ - olck->ols_state = OLS_BLOCKED; - /* - * cancel and destroy lock at least once no matter how blocking ast is - * entered (see comment above osc_ldlm_blocking_ast() for use - * cases). cl_lock_cancel() and cl_lock_delete() are idempotent. - */ - cl_lock_cancel(env, lock); - cl_lock_delete(env, lock); + cl_env_nested_put(&nest, env); + return rc; } /** @@ -628,65 +416,63 @@ static int osc_dlm_blocking_ast0(const struct lu_env *env, struct ldlm_lock *dlmlock, void *data, int flag) { - struct osc_lock *olck; - struct cl_lock *lock; - int result; - int cancel; - - LASSERT(flag == LDLM_CB_BLOCKING || flag == LDLM_CB_CANCELING); - - cancel = 0; - olck = osc_ast_data_get(dlmlock); - if (olck) { - lock = olck->ols_cl.cls_lock; - cl_lock_mutex_get(env, lock); - LINVRNT(osc_lock_invariant(olck)); - if (olck->ols_ast_wait) { - /* wake up osc_lock_use() */ - cl_lock_signal(env, lock); - olck->ols_ast_wait = 0; - } - /* - * Lock might have been canceled while this thread was - * sleeping for lock mutex, but olck is pinned in memory. - */ - if (olck == dlmlock->l_ast_data) { - /* - * NOTE: DLM sends blocking AST's for failed locks - * (that are still in pre-OLS_GRANTED state) - * too, and they have to be canceled otherwise - * DLM lock is never destroyed and stuck in - * the memory. - * - * Alternatively, ldlm_cli_cancel() can be - * called here directly for osc_locks with - * ols_state < OLS_GRANTED to maintain an - * invariant that ->clo_cancel() is only called - * for locks that were granted. - */ - LASSERT(data == olck); - osc_lock_blocking(env, dlmlock, - olck, flag == LDLM_CB_BLOCKING); - } else - cancel = 1; - cl_lock_mutex_put(env, lock); - osc_ast_data_put(env, olck); - } else - /* - * DLM lock exists, but there is no cl_lock attached to it. - * This is a `normal' race. cl_object and its cl_lock's can be - * removed by memory pressure, together with all pages. + struct cl_object *obj = NULL; + int result = 0; + int discard; + enum cl_lock_mode mode = CLM_READ; + + LASSERT(flag == LDLM_CB_CANCELING); + + lock_res_and_lock(dlmlock); + if (dlmlock->l_granted_mode != dlmlock->l_req_mode) { + dlmlock->l_ast_data = NULL; + unlock_res_and_lock(dlmlock); + return 0; + } + + discard = ldlm_is_discard_data(dlmlock); + if (dlmlock->l_granted_mode & (LCK_PW | LCK_GROUP)) + mode = CLM_WRITE; + + if (dlmlock->l_ast_data) { + obj = osc2cl(dlmlock->l_ast_data); + dlmlock->l_ast_data = NULL; + + cl_object_get(obj); + } + + unlock_res_and_lock(dlmlock); + + /* if l_ast_data is NULL, the dlmlock was enqueued by AGL or + * the object has been destroyed. + */ + if (obj) { + struct ldlm_extent *extent = &dlmlock->l_policy_data.l_extent; + struct cl_attr *attr = &osc_env_info(env)->oti_attr; + __u64 old_kms; + + /* Destroy pages covered by the extent of the DLM lock */ + result = osc_lock_flush(cl2osc(obj), + cl_index(obj, extent->start), + cl_index(obj, extent->end), + mode, discard); + + /* losing a lock, update kms */ + lock_res_and_lock(dlmlock); + cl_object_attr_lock(obj); + /* Must get the value under the lock to avoid race. */ + old_kms = cl2osc(obj)->oo_oinfo->loi_kms; + /* Update the kms. Need to loop all granted locks. + * Not a problem for the client */ - cancel = (flag == LDLM_CB_BLOCKING); + attr->cat_kms = ldlm_extent_shift_kms(dlmlock, old_kms); - if (cancel) { - struct lustre_handle *lockh; + cl_object_attr_set(env, obj, attr, CAT_KMS); + cl_object_attr_unlock(obj); + unlock_res_and_lock(dlmlock); - lockh = &osc_env_info(env)->oti_handle; - ldlm_lock2handle(dlmlock, lockh); - result = ldlm_cli_cancel(lockh, LCF_ASYNC); - } else - result = 0; + cl_object_put(env, obj); + } return result; } @@ -736,107 +522,52 @@ static int osc_ldlm_blocking_ast(struct ldlm_lock *dlmlock, struct ldlm_lock_desc *new, void *data, int flag) { - struct lu_env *env; - struct cl_env_nest nest; - int result; + int result = 0; - /* - * This can be called in the context of outer IO, e.g., - * - * cl_enqueue()->... - * ->osc_enqueue_base()->... - * ->ldlm_prep_elc_req()->... - * ->ldlm_cancel_callback()->... - * ->osc_ldlm_blocking_ast() - * - * new environment has to be created to not corrupt outer context. - */ - env = cl_env_nested_get(&nest); - if (!IS_ERR(env)) { - result = osc_dlm_blocking_ast0(env, dlmlock, data, flag); - cl_env_nested_put(&nest, env); - } else { - result = PTR_ERR(env); - /* - * XXX This should never happen, as cl_lock is - * stuck. Pre-allocated environment a la vvp_inode_fini_env - * should be used. - */ - LBUG(); - } - if (result != 0) { + switch (flag) { + case LDLM_CB_BLOCKING: { + struct lustre_handle lockh; + + ldlm_lock2handle(dlmlock, &lockh); + result = ldlm_cli_cancel(&lockh, LCF_ASYNC); if (result == -ENODATA) result = 0; - else - CERROR("BAST failed: %d\n", result); + break; } - return result; -} + case LDLM_CB_CANCELING: { + struct lu_env *env; + struct cl_env_nest nest; -static int osc_ldlm_completion_ast(struct ldlm_lock *dlmlock, - __u64 flags, void *data) -{ - struct cl_env_nest nest; - struct lu_env *env; - struct osc_lock *olck; - struct cl_lock *lock; - int result; - int dlmrc; - - /* first, do dlm part of the work */ - dlmrc = ldlm_completion_ast_async(dlmlock, flags, data); - /* then, notify cl_lock */ - env = cl_env_nested_get(&nest); - if (!IS_ERR(env)) { - olck = osc_ast_data_get(dlmlock); - if (olck) { - lock = olck->ols_cl.cls_lock; - cl_lock_mutex_get(env, lock); - /* - * ldlm_handle_cp_callback() copied LVB from request - * to lock->l_lvb_data, store it in osc_lock. - */ - LASSERT(dlmlock->l_lvb_data); - lock_res_and_lock(dlmlock); - olck->ols_lvb = *(struct ost_lvb *)dlmlock->l_lvb_data; - if (!olck->ols_lock) { - /* - * upcall (osc_lock_upcall()) hasn't yet been - * called. Do nothing now, upcall will bind - * olck to dlmlock and signal the waiters. - * - * This maintains an invariant that osc_lock - * and ldlm_lock are always bound when - * osc_lock is in OLS_GRANTED state. - */ - } else if (dlmlock->l_granted_mode == - dlmlock->l_req_mode) { - osc_lock_granted(env, olck, dlmlock, dlmrc); - } - unlock_res_and_lock(dlmlock); + /* + * This can be called in the context of outer IO, e.g., + * + * osc_enqueue_base()->... + * ->ldlm_prep_elc_req()->... + * ->ldlm_cancel_callback()->... + * ->osc_ldlm_blocking_ast() + * + * new environment has to be created to not corrupt outer + * context. + */ + env = cl_env_nested_get(&nest); + if (IS_ERR(env)) { + result = PTR_ERR(env); + break; + } - if (dlmrc != 0) { - CL_LOCK_DEBUG(D_ERROR, env, lock, - "dlmlock returned %d\n", dlmrc); - cl_lock_error(env, lock, dlmrc); - } - cl_lock_mutex_put(env, lock); - osc_ast_data_put(env, olck); - result = 0; - } else - result = -ELDLM_NO_LOCK_DATA; + result = osc_dlm_blocking_ast0(env, dlmlock, data, flag); cl_env_nested_put(&nest, env); - } else - result = PTR_ERR(env); - return dlmrc ?: result; + break; + } + default: + LBUG(); + } + return result; } static int osc_ldlm_glimpse_ast(struct ldlm_lock *dlmlock, void *data) { struct ptlrpc_request *req = data; - struct osc_lock *olck; - struct cl_lock *lock; - struct cl_object *obj; struct cl_env_nest nest; struct lu_env *env; struct ost_lvb *lvb; @@ -847,14 +578,16 @@ static int osc_ldlm_glimpse_ast(struct ldlm_lock *dlmlock, void *data) env = cl_env_nested_get(&nest); if (!IS_ERR(env)) { - /* osc_ast_data_get() has to go after environment is - * allocated, because osc_ast_data() acquires a - * reference to a lock, and it can only be released in - * environment. - */ - olck = osc_ast_data_get(dlmlock); - if (olck) { - lock = olck->ols_cl.cls_lock; + struct cl_object *obj = NULL; + + lock_res_and_lock(dlmlock); + if (dlmlock->l_ast_data) { + obj = osc2cl(dlmlock->l_ast_data); + cl_object_get(obj); + } + unlock_res_and_lock(dlmlock); + + if (obj) { /* Do not grab the mutex of cl_lock for glimpse. * See LU-1274 for details. * BTW, it's okay for cl_lock to be cancelled during @@ -869,7 +602,6 @@ static int osc_ldlm_glimpse_ast(struct ldlm_lock *dlmlock, void *data) result = req_capsule_server_pack(cap); if (result == 0) { lvb = req_capsule_server_get(cap, &RMF_DLM_LVB); - obj = lock->cll_descr.cld_obj; result = cl_object_glimpse(env, obj, lvb); } if (!exp_connect_lvb_type(req->rq_export)) @@ -877,7 +609,7 @@ static int osc_ldlm_glimpse_ast(struct ldlm_lock *dlmlock, void *data) &RMF_DLM_LVB, sizeof(struct ost_lvb_v1), RCL_SERVER); - osc_ast_data_put(env, olck); + cl_object_put(env, obj); } else { /* * These errors are normal races, so we don't want to @@ -888,44 +620,123 @@ static int osc_ldlm_glimpse_ast(struct ldlm_lock *dlmlock, void *data) result = -ELDLM_NO_LOCK_DATA; } cl_env_nested_put(&nest, env); - } else + } else { result = PTR_ERR(env); + } req->rq_status = result; return result; } -static unsigned long osc_lock_weigh(const struct lu_env *env, - const struct cl_lock_slice *slice) +static int weigh_cb(const struct lu_env *env, struct cl_io *io, + struct osc_page *ops, void *cbdata) { - /* - * don't need to grab coh_page_guard since we don't care the exact # - * of pages.. - */ - return cl_object_header(slice->cls_obj)->coh_pages; + struct cl_page *page = ops->ops_cl.cpl_page; + + if (cl_page_is_vmlocked(env, page) || + PageDirty(page->cp_vmpage) || PageWriteback(page->cp_vmpage) + ) + return CLP_GANG_ABORT; + + *(pgoff_t *)cbdata = osc_index(ops) + 1; + return CLP_GANG_OKAY; } -static void osc_lock_build_einfo(const struct lu_env *env, - const struct cl_lock *clock, - struct osc_lock *lock, - struct ldlm_enqueue_info *einfo) +static unsigned long osc_lock_weight(const struct lu_env *env, + struct osc_object *oscobj, + struct ldlm_extent *extent) +{ + struct cl_io *io = &osc_env_info(env)->oti_io; + struct cl_object *obj = cl_object_top(&oscobj->oo_cl); + pgoff_t page_index; + int result; + + io->ci_obj = obj; + io->ci_ignore_layout = 1; + result = cl_io_init(env, io, CIT_MISC, io->ci_obj); + if (result != 0) + return result; + + page_index = cl_index(obj, extent->start); + do { + result = osc_page_gang_lookup(env, io, oscobj, + page_index, + cl_index(obj, extent->end), + weigh_cb, (void *)&page_index); + if (result == CLP_GANG_ABORT) + break; + if (result == CLP_GANG_RESCHED) + cond_resched(); + } while (result != CLP_GANG_OKAY); + cl_io_fini(env, io); + + return result == CLP_GANG_ABORT ? 1 : 0; +} + +/** + * Get the weight of dlm lock for early cancellation. + */ +unsigned long osc_ldlm_weigh_ast(struct ldlm_lock *dlmlock) { - enum cl_lock_mode mode; + struct cl_env_nest nest; + struct lu_env *env; + struct osc_object *obj; + struct osc_lock *oscl; + unsigned long weight; + bool found = false; + + might_sleep(); + /* + * osc_ldlm_weigh_ast has a complex context since it might be called + * because of lock canceling, or from user's input. We have to make + * a new environment for it. Probably it is implementation safe to use + * the upper context because cl_lock_put don't modify environment + * variables. But just in case .. + */ + env = cl_env_nested_get(&nest); + if (IS_ERR(env)) + /* Mostly because lack of memory, do not eliminate this lock */ + return 1; - mode = clock->cll_descr.cld_mode; - if (mode == CLM_PHANTOM) + LASSERT(dlmlock->l_resource->lr_type == LDLM_EXTENT); + obj = dlmlock->l_ast_data; + if (!obj) { + weight = 1; + goto out; + } + + spin_lock(&obj->oo_ol_spin); + list_for_each_entry(oscl, &obj->oo_ol_list, ols_nextlock_oscobj) { + if (oscl->ols_dlmlock && oscl->ols_dlmlock != dlmlock) + continue; + found = true; + } + spin_unlock(&obj->oo_ol_spin); + if (found) { /* - * For now, enqueue all glimpse locks in read mode. In the - * future, client might choose to enqueue LCK_PW lock for - * glimpse on a file opened for write. + * If the lock is being used by an IO, definitely not cancel it. */ - mode = CLM_READ; + weight = 1; + goto out; + } + + weight = osc_lock_weight(env, obj, &dlmlock->l_policy_data.l_extent); + +out: + cl_env_nested_put(&nest, env); + return weight; +} +static void osc_lock_build_einfo(const struct lu_env *env, + const struct cl_lock *lock, + struct osc_object *osc, + struct ldlm_enqueue_info *einfo) +{ einfo->ei_type = LDLM_EXTENT; - einfo->ei_mode = osc_cl_lock2ldlm(mode); + einfo->ei_mode = osc_cl_lock2ldlm(lock->cll_descr.cld_mode); einfo->ei_cb_bl = osc_ldlm_blocking_ast; - einfo->ei_cb_cp = osc_ldlm_completion_ast; + einfo->ei_cb_cp = ldlm_completion_ast; einfo->ei_cb_gl = osc_ldlm_glimpse_ast; - einfo->ei_cbdata = lock; /* value to be put into ->l_ast_data */ + einfo->ei_cbdata = osc; /* value to be put into ->l_ast_data */ } /** @@ -981,113 +792,100 @@ static void osc_lock_to_lockless(const struct lu_env *env, LASSERT(ergo(ols->ols_glimpse, !osc_lock_is_lockless(ols))); } -static int osc_lock_compatible(const struct osc_lock *qing, - const struct osc_lock *qed) +static bool osc_lock_compatible(const struct osc_lock *qing, + const struct osc_lock *qed) { - enum cl_lock_mode qing_mode; - enum cl_lock_mode qed_mode; + struct cl_lock_descr *qed_descr = &qed->ols_cl.cls_lock->cll_descr; + struct cl_lock_descr *qing_descr = &qing->ols_cl.cls_lock->cll_descr; - qing_mode = qing->ols_cl.cls_lock->cll_descr.cld_mode; - if (qed->ols_glimpse && - (qed->ols_state >= OLS_UPCALL_RECEIVED || qing_mode == CLM_READ)) - return 1; + if (qed->ols_glimpse) + return true; + + if (qing_descr->cld_mode == CLM_READ && qed_descr->cld_mode == CLM_READ) + return true; + + if (qed->ols_state < OLS_GRANTED) + return true; + + if (qed_descr->cld_mode >= qing_descr->cld_mode && + qed_descr->cld_start <= qing_descr->cld_start && + qed_descr->cld_end >= qing_descr->cld_end) + return true; - qed_mode = qed->ols_cl.cls_lock->cll_descr.cld_mode; - return ((qing_mode == CLM_READ) && (qed_mode == CLM_READ)); + return false; } -/** - * Cancel all conflicting locks and wait for them to be destroyed. - * - * This function is used for two purposes: - * - * - early cancel all conflicting locks before starting IO, and - * - * - guarantee that pages added to the page cache by lockless IO are never - * covered by locks other than lockless IO lock, and, hence, are not - * visible to other threads. - */ -static int osc_lock_enqueue_wait(const struct lu_env *env, - const struct osc_lock *olck) +static void osc_lock_wake_waiters(const struct lu_env *env, + struct osc_object *osc, + struct osc_lock *oscl) { - struct cl_lock *lock = olck->ols_cl.cls_lock; - struct cl_lock_descr *descr = &lock->cll_descr; - struct cl_object_header *hdr = cl_object_header(descr->cld_obj); - struct cl_lock *scan; - struct cl_lock *conflict = NULL; - int lockless = osc_lock_is_lockless(olck); - int rc = 0; + spin_lock(&osc->oo_ol_spin); + list_del_init(&oscl->ols_nextlock_oscobj); + spin_unlock(&osc->oo_ol_spin); - LASSERT(cl_lock_is_mutexed(lock)); + spin_lock(&oscl->ols_lock); + while (!list_empty(&oscl->ols_waiting_list)) { + struct osc_lock *scan; - /* make it enqueue anyway for glimpse lock, because we actually - * don't need to cancel any conflicting locks. - */ - if (olck->ols_glimpse) - return 0; + scan = list_entry(oscl->ols_waiting_list.next, struct osc_lock, + ols_wait_entry); + list_del_init(&scan->ols_wait_entry); - spin_lock(&hdr->coh_lock_guard); - list_for_each_entry(scan, &hdr->coh_locks, cll_linkage) { - struct cl_lock_descr *cld = &scan->cll_descr; - const struct osc_lock *scan_ols; + cl_sync_io_note(env, scan->ols_owner, 0); + } + spin_unlock(&oscl->ols_lock); +} + +static void osc_lock_enqueue_wait(const struct lu_env *env, + struct osc_object *obj, + struct osc_lock *oscl) +{ + struct osc_lock *tmp_oscl; + struct cl_lock_descr *need = &oscl->ols_cl.cls_lock->cll_descr; + struct cl_sync_io *waiter = &osc_env_info(env)->oti_anchor; - if (scan == lock) + spin_lock(&obj->oo_ol_spin); + list_add_tail(&oscl->ols_nextlock_oscobj, &obj->oo_ol_list); + +restart: + list_for_each_entry(tmp_oscl, &obj->oo_ol_list, + ols_nextlock_oscobj) { + struct cl_lock_descr *descr; + + if (tmp_oscl == oscl) break; - if (scan->cll_state < CLS_QUEUING || - scan->cll_state == CLS_FREEING || - cld->cld_start > descr->cld_end || - cld->cld_end < descr->cld_start) + descr = &tmp_oscl->ols_cl.cls_lock->cll_descr; + if (descr->cld_start > need->cld_end || + descr->cld_end < need->cld_start) continue; - /* overlapped and living locks. */ + /* We're not supposed to give up group lock */ + if (descr->cld_mode == CLM_GROUP) + break; - /* We're not supposed to give up group lock. */ - if (scan->cll_descr.cld_mode == CLM_GROUP) { - LASSERT(descr->cld_mode != CLM_GROUP || - descr->cld_gid != scan->cll_descr.cld_gid); + if (!osc_lock_is_lockless(oscl) && + osc_lock_compatible(oscl, tmp_oscl)) continue; - } - scan_ols = osc_lock_at(scan); + /* wait for conflicting lock to be canceled */ + cl_sync_io_init(waiter, 1, cl_sync_io_end); + oscl->ols_owner = waiter; - /* We need to cancel the compatible locks if we're enqueuing - * a lockless lock, for example: - * imagine that client has PR lock on [0, 1000], and thread T0 - * is doing lockless IO in [500, 1500] region. Concurrent - * thread T1 can see lockless data in [500, 1000], which is - * wrong, because these data are possibly stale. - */ - if (!lockless && osc_lock_compatible(olck, scan_ols)) - continue; + spin_lock(&tmp_oscl->ols_lock); + /* add oscl into tmp's ols_waiting list */ + list_add_tail(&oscl->ols_wait_entry, + &tmp_oscl->ols_waiting_list); + spin_unlock(&tmp_oscl->ols_lock); - cl_lock_get_trust(scan); - conflict = scan; - break; - } - spin_unlock(&hdr->coh_lock_guard); + spin_unlock(&obj->oo_ol_spin); + (void)cl_sync_io_wait(env, waiter, 0); - if (conflict) { - if (lock->cll_descr.cld_mode == CLM_GROUP) { - /* we want a group lock but a previous lock request - * conflicts, we do not wait but return 0 so the - * request is send to the server - */ - CDEBUG(D_DLMTRACE, "group lock %p is conflicted with %p, no wait, send to server\n", - lock, conflict); - cl_lock_put(env, conflict); - rc = 0; - } else { - CDEBUG(D_DLMTRACE, "lock %p is conflicted with %p, will wait\n", - lock, conflict); - LASSERT(!lock->cll_conflict); - lu_ref_add(&conflict->cll_reference, "cancel-wait", - lock); - lock->cll_conflict = conflict; - rc = CLO_WAIT; - } + spin_lock(&obj->oo_ol_spin); + oscl->ols_owner = NULL; + goto restart; } - return rc; + spin_unlock(&obj->oo_ol_spin); } /** @@ -1106,188 +904,122 @@ static int osc_lock_enqueue_wait(const struct lu_env *env, */ static int osc_lock_enqueue(const struct lu_env *env, const struct cl_lock_slice *slice, - struct cl_io *unused, __u32 enqflags) + struct cl_io *unused, struct cl_sync_io *anchor) { - struct osc_lock *ols = cl2osc_lock(slice); - struct cl_lock *lock = ols->ols_cl.cls_lock; + struct osc_thread_info *info = osc_env_info(env); + struct osc_io *oio = osc_env_io(env); + struct osc_object *osc = cl2osc(slice->cls_obj); + struct osc_lock *oscl = cl2osc_lock(slice); + struct cl_lock *lock = slice->cls_lock; + struct ldlm_res_id *resname = &info->oti_resname; + ldlm_policy_data_t *policy = &info->oti_policy; + osc_enqueue_upcall_f upcall = osc_lock_upcall; + void *cookie = oscl; + bool async = false; int result; - LASSERT(cl_lock_is_mutexed(lock)); - LASSERTF(ols->ols_state == OLS_NEW, - "Impossible state: %d\n", ols->ols_state); - - LASSERTF(ergo(ols->ols_glimpse, lock->cll_descr.cld_mode <= CLM_READ), - "lock = %p, ols = %p\n", lock, ols); + LASSERTF(ergo(oscl->ols_glimpse, lock->cll_descr.cld_mode <= CLM_READ), + "lock = %p, ols = %p\n", lock, oscl); - result = osc_lock_enqueue_wait(env, ols); - if (result == 0) { - if (!osc_lock_is_lockless(ols)) { - struct osc_object *obj = cl2osc(slice->cls_obj); - struct osc_thread_info *info = osc_env_info(env); - struct ldlm_res_id *resname = &info->oti_resname; - ldlm_policy_data_t *policy = &info->oti_policy; - struct ldlm_enqueue_info *einfo = &ols->ols_einfo; + if (oscl->ols_state == OLS_GRANTED) + return 0; - /* lock will be passed as upcall cookie, - * hold ref to prevent to be released. - */ - cl_lock_hold_add(env, lock, "upcall", lock); - /* a user for lock also */ - cl_lock_user_add(env, lock); - ols->ols_state = OLS_ENQUEUED; + if (oscl->ols_flags & LDLM_FL_TEST_LOCK) + goto enqueue_base; - /* - * XXX: this is possible blocking point as - * ldlm_lock_match(LDLM_FL_LVB_READY) waits for - * LDLM_CP_CALLBACK. - */ - ostid_build_res_name(&obj->oo_oinfo->loi_oi, resname); - osc_lock_build_policy(env, lock, policy); - result = osc_enqueue_base(osc_export(obj), resname, - &ols->ols_flags, policy, - &ols->ols_lvb, - obj->oo_oinfo->loi_kms_valid, - osc_lock_upcall, - ols, einfo, &ols->ols_handle, - PTLRPCD_SET, 1, ols->ols_agl); - if (result != 0) { - cl_lock_user_del(env, lock); - cl_lock_unhold(env, lock, "upcall", lock); - if (unlikely(result == -ECANCELED)) { - ols->ols_state = OLS_NEW; - result = 0; - } - } - } else { - ols->ols_state = OLS_GRANTED; - ols->ols_owner = osc_env_io(env); - } + if (oscl->ols_glimpse) { + LASSERT(equi(oscl->ols_agl, !anchor)); + async = true; + goto enqueue_base; } - LASSERT(ergo(ols->ols_glimpse, !osc_lock_is_lockless(ols))); - return result; -} -static int osc_lock_wait(const struct lu_env *env, - const struct cl_lock_slice *slice) -{ - struct osc_lock *olck = cl2osc_lock(slice); - struct cl_lock *lock = olck->ols_cl.cls_lock; - - LINVRNT(osc_lock_invariant(olck)); - - if (olck->ols_glimpse && olck->ols_state >= OLS_UPCALL_RECEIVED) { - if (olck->ols_flags & LDLM_FL_LVB_READY) { - return 0; - } else if (olck->ols_agl) { - if (lock->cll_flags & CLF_FROM_UPCALL) - /* It is from enqueue RPC reply upcall for - * updating state. Do not re-enqueue. - */ - return -ENAVAIL; - olck->ols_state = OLS_NEW; - } else { - LASSERT(lock->cll_error); - return lock->cll_error; - } + osc_lock_enqueue_wait(env, osc, oscl); + + /* we can grant lockless lock right after all conflicting locks + * are canceled. + */ + if (osc_lock_is_lockless(oscl)) { + oscl->ols_state = OLS_GRANTED; + oio->oi_lockless = 1; + return 0; } - if (olck->ols_state == OLS_NEW) { - int rc; - - LASSERT(olck->ols_agl); - olck->ols_agl = 0; - olck->ols_flags &= ~LDLM_FL_BLOCK_NOWAIT; - rc = osc_lock_enqueue(env, slice, NULL, CEF_ASYNC | CEF_MUST); - if (rc != 0) - return rc; - else - return CLO_REENQUEUED; +enqueue_base: + oscl->ols_state = OLS_ENQUEUED; + if (anchor) { + atomic_inc(&anchor->csi_sync_nr); + oscl->ols_owner = anchor; } - LASSERT(equi(olck->ols_state >= OLS_UPCALL_RECEIVED && - lock->cll_error == 0, olck->ols_lock)); + /** + * DLM lock's ast data must be osc_object; + * if glimpse or AGL lock, async of osc_enqueue_base() must be true, + * DLM's enqueue callback set to osc_lock_upcall() with cookie as + * osc_lock. + */ + ostid_build_res_name(&osc->oo_oinfo->loi_oi, resname); + osc_lock_build_einfo(env, lock, osc, &oscl->ols_einfo); + osc_lock_build_policy(env, lock, policy); + if (oscl->ols_agl) { + oscl->ols_einfo.ei_cbdata = NULL; + /* hold a reference for callback */ + cl_object_get(osc2cl(osc)); + upcall = osc_lock_upcall_agl; + cookie = osc; + } + result = osc_enqueue_base(osc_export(osc), resname, &oscl->ols_flags, + policy, &oscl->ols_lvb, + osc->oo_oinfo->loi_kms_valid, + upcall, cookie, + &oscl->ols_einfo, PTLRPCD_SET, async, + oscl->ols_agl); + if (result != 0) { + oscl->ols_state = OLS_CANCELLED; + osc_lock_wake_waiters(env, osc, oscl); - return lock->cll_error ?: olck->ols_state >= OLS_GRANTED ? 0 : CLO_WAIT; + /* hide error for AGL lock. */ + if (oscl->ols_agl) { + cl_object_put(env, osc2cl(osc)); + result = 0; + } + if (anchor) + cl_sync_io_note(env, anchor, result); + } else { + if (osc_lock_is_lockless(oscl)) { + oio->oi_lockless = 1; + } else if (!async) { + LASSERT(oscl->ols_state == OLS_GRANTED); + LASSERT(oscl->ols_hold); + LASSERT(oscl->ols_dlmlock); + } + } + return result; } /** - * An implementation of cl_lock_operations::clo_use() method that pins cached - * lock. + * Breaks a link between osc_lock and dlm_lock. */ -static int osc_lock_use(const struct lu_env *env, - const struct cl_lock_slice *slice) +static void osc_lock_detach(const struct lu_env *env, struct osc_lock *olck) { - struct osc_lock *olck = cl2osc_lock(slice); - int rc; - - LASSERT(!olck->ols_hold); + struct ldlm_lock *dlmlock; - /* - * Atomically check for LDLM_FL_CBPENDING and addref a lock if this - * flag is not set. This protects us from a concurrent blocking ast. - */ - rc = ldlm_lock_addref_try(&olck->ols_handle, olck->ols_einfo.ei_mode); - if (rc == 0) { - olck->ols_hold = 1; - olck->ols_state = OLS_GRANTED; - } else { - struct cl_lock *lock; + dlmlock = olck->ols_dlmlock; + if (!dlmlock) + return; - /* - * Lock is being cancelled somewhere within - * ldlm_handle_bl_callback(): LDLM_FL_CBPENDING is already - * set, but osc_ldlm_blocking_ast() hasn't yet acquired - * cl_lock mutex. - */ - lock = slice->cls_lock; - LASSERT(lock->cll_state == CLS_INTRANSIT); - LASSERT(lock->cll_users > 0); - /* set a flag for osc_dlm_blocking_ast0() to signal the - * lock. - */ - olck->ols_ast_wait = 1; - rc = CLO_WAIT; + if (olck->ols_hold) { + olck->ols_hold = 0; + osc_cancel_base(&olck->ols_handle, olck->ols_einfo.ei_mode); + olck->ols_handle.cookie = 0ULL; } - return rc; -} -static int osc_lock_flush(struct osc_lock *ols, int discard) -{ - struct cl_lock *lock = ols->ols_cl.cls_lock; - struct cl_env_nest nest; - struct lu_env *env; - int result = 0; - - env = cl_env_nested_get(&nest); - if (!IS_ERR(env)) { - struct osc_object *obj = cl2osc(ols->ols_cl.cls_obj); - struct cl_lock_descr *descr = &lock->cll_descr; - int rc = 0; - - if (descr->cld_mode >= CLM_WRITE) { - result = osc_cache_writeback_range(env, obj, - descr->cld_start, - descr->cld_end, - 1, discard); - LDLM_DEBUG(ols->ols_lock, - "lock %p: %d pages were %s.\n", lock, result, - discard ? "discarded" : "written"); - if (result > 0) - result = 0; - } + olck->ols_dlmlock = NULL; - rc = cl_lock_discard_pages(env, lock); - if (result == 0 && rc < 0) - result = rc; - - cl_env_nested_put(&nest, env); - } else - result = PTR_ERR(env); - if (result == 0) { - ols->ols_flush = 1; - LINVRNT(!osc_lock_has_pages(ols)); - } - return result; + /* release a reference taken in osc_lock_upcall(). */ + LASSERT(olck->ols_has_ref); + lu_ref_del(&dlmlock->l_reference, "osc_lock", olck); + LDLM_LOCK_RELEASE(dlmlock); + olck->ols_has_ref = 0; } /** @@ -1307,96 +1039,16 @@ static int osc_lock_flush(struct osc_lock *ols, int discard) static void osc_lock_cancel(const struct lu_env *env, const struct cl_lock_slice *slice) { - struct cl_lock *lock = slice->cls_lock; - struct osc_lock *olck = cl2osc_lock(slice); - struct ldlm_lock *dlmlock = olck->ols_lock; - int result = 0; - int discard; - - LASSERT(cl_lock_is_mutexed(lock)); - LINVRNT(osc_lock_invariant(olck)); - - if (dlmlock) { - int do_cancel; - - discard = !!(dlmlock->l_flags & LDLM_FL_DISCARD_DATA); - if (olck->ols_state >= OLS_GRANTED) - result = osc_lock_flush(olck, discard); - osc_lock_unhold(olck); - - lock_res_and_lock(dlmlock); - /* Now that we're the only user of dlm read/write reference, - * mostly the ->l_readers + ->l_writers should be zero. - * However, there is a corner case. - * See bug 18829 for details. - */ - do_cancel = (dlmlock->l_readers == 0 && - dlmlock->l_writers == 0); - dlmlock->l_flags |= LDLM_FL_CBPENDING; - unlock_res_and_lock(dlmlock); - if (do_cancel) - result = ldlm_cli_cancel(&olck->ols_handle, LCF_ASYNC); - if (result < 0) - CL_LOCK_DEBUG(D_ERROR, env, lock, - "lock %p cancel failure with error(%d)\n", - lock, result); - } - olck->ols_state = OLS_CANCELLED; - olck->ols_flags &= ~LDLM_FL_LVB_READY; - osc_lock_detach(env, olck); -} - -static int osc_lock_has_pages(struct osc_lock *olck) -{ - return 0; -} - -static void osc_lock_delete(const struct lu_env *env, - const struct cl_lock_slice *slice) -{ - struct osc_lock *olck; + struct osc_object *obj = cl2osc(slice->cls_obj); + struct osc_lock *oscl = cl2osc_lock(slice); - olck = cl2osc_lock(slice); - if (olck->ols_glimpse) { - LASSERT(!olck->ols_hold); - LASSERT(!olck->ols_lock); - return; - } + LINVRNT(osc_lock_invariant(oscl)); - LINVRNT(osc_lock_invariant(olck)); - LINVRNT(!osc_lock_has_pages(olck)); + osc_lock_detach(env, oscl); + oscl->ols_state = OLS_CANCELLED; + oscl->ols_flags &= ~LDLM_FL_LVB_READY; - osc_lock_unhold(olck); - osc_lock_detach(env, olck); -} - -/** - * Implements cl_lock_operations::clo_state() method for osc layer. - * - * Maintains osc_lock::ols_owner field. - * - * This assumes that lock always enters CLS_HELD (from some other state) in - * the same IO context as one that requested the lock. This should not be a - * problem, because context is by definition shared by all activity pertaining - * to the same high-level IO. - */ -static void osc_lock_state(const struct lu_env *env, - const struct cl_lock_slice *slice, - enum cl_lock_state state) -{ - struct osc_lock *lock = cl2osc_lock(slice); - - /* - * XXX multiple io contexts can use the lock at the same time. - */ - LINVRNT(osc_lock_invariant(lock)); - if (state == CLS_HELD && slice->cls_lock->cll_state != CLS_HELD) { - struct osc_io *oio = osc_env_io(env); - - LASSERT(!lock->ols_owner); - lock->ols_owner = oio; - } else if (state != CLS_HELD) - lock->ols_owner = NULL; + osc_lock_wake_waiters(env, obj, oscl); } static int osc_lock_print(const struct lu_env *env, void *cookie, @@ -1404,221 +1056,162 @@ static int osc_lock_print(const struct lu_env *env, void *cookie, { struct osc_lock *lock = cl2osc_lock(slice); - /* - * XXX print ldlm lock and einfo properly. - */ (*p)(env, cookie, "%p %#16llx %#llx %d %p ", - lock->ols_lock, lock->ols_flags, lock->ols_handle.cookie, + lock->ols_dlmlock, lock->ols_flags, lock->ols_handle.cookie, lock->ols_state, lock->ols_owner); osc_lvb_print(env, cookie, p, &lock->ols_lvb); return 0; } -static int osc_lock_fits_into(const struct lu_env *env, - const struct cl_lock_slice *slice, - const struct cl_lock_descr *need, - const struct cl_io *io) -{ - struct osc_lock *ols = cl2osc_lock(slice); - - if (need->cld_enq_flags & CEF_NEVER) - return 0; - - if (ols->ols_state >= OLS_CANCELLED) - return 0; - - if (need->cld_mode == CLM_PHANTOM) { - if (ols->ols_agl) - return !(ols->ols_state > OLS_RELEASED); - - /* - * Note: the QUEUED lock can't be matched here, otherwise - * it might cause the deadlocks. - * In read_process, - * P1: enqueued read lock, create sublock1 - * P2: enqueued write lock, create sublock2(conflicted - * with sublock1). - * P1: Grant read lock. - * P1: enqueued glimpse lock(with holding sublock1_read), - * matched with sublock2, waiting sublock2 to be granted. - * But sublock2 can not be granted, because P1 - * will not release sublock1. Bang! - */ - if (ols->ols_state < OLS_GRANTED || - ols->ols_state > OLS_RELEASED) - return 0; - } else if (need->cld_enq_flags & CEF_MUST) { - /* - * If the lock hasn't ever enqueued, it can't be matched - * because enqueue process brings in many information - * which can be used to determine things such as lockless, - * CEF_MUST, etc. - */ - if (ols->ols_state < OLS_UPCALL_RECEIVED && - ols->ols_locklessable) - return 0; - } - return 1; -} - static const struct cl_lock_operations osc_lock_ops = { .clo_fini = osc_lock_fini, .clo_enqueue = osc_lock_enqueue, - .clo_wait = osc_lock_wait, - .clo_unuse = osc_lock_unuse, - .clo_use = osc_lock_use, - .clo_delete = osc_lock_delete, - .clo_state = osc_lock_state, .clo_cancel = osc_lock_cancel, - .clo_weigh = osc_lock_weigh, .clo_print = osc_lock_print, - .clo_fits_into = osc_lock_fits_into, }; -static int osc_lock_lockless_unuse(const struct lu_env *env, - const struct cl_lock_slice *slice) -{ - struct osc_lock *ols = cl2osc_lock(slice); - struct cl_lock *lock = slice->cls_lock; - - LASSERT(ols->ols_state == OLS_GRANTED); - LINVRNT(osc_lock_invariant(ols)); - - cl_lock_cancel(env, lock); - cl_lock_delete(env, lock); - return 0; -} - static void osc_lock_lockless_cancel(const struct lu_env *env, const struct cl_lock_slice *slice) { struct osc_lock *ols = cl2osc_lock(slice); + struct osc_object *osc = cl2osc(slice->cls_obj); + struct cl_lock_descr *descr = &slice->cls_lock->cll_descr; int result; - result = osc_lock_flush(ols, 0); + LASSERT(!ols->ols_dlmlock); + result = osc_lock_flush(osc, descr->cld_start, descr->cld_end, + descr->cld_mode, 0); if (result) CERROR("Pages for lockless lock %p were not purged(%d)\n", ols, result); - ols->ols_state = OLS_CANCELLED; -} - -static int osc_lock_lockless_wait(const struct lu_env *env, - const struct cl_lock_slice *slice) -{ - struct osc_lock *olck = cl2osc_lock(slice); - struct cl_lock *lock = olck->ols_cl.cls_lock; - LINVRNT(osc_lock_invariant(olck)); - LASSERT(olck->ols_state >= OLS_UPCALL_RECEIVED); - - return lock->cll_error; + osc_lock_wake_waiters(env, osc, ols); } -static void osc_lock_lockless_state(const struct lu_env *env, - const struct cl_lock_slice *slice, - enum cl_lock_state state) -{ - struct osc_lock *lock = cl2osc_lock(slice); +static const struct cl_lock_operations osc_lock_lockless_ops = { + .clo_fini = osc_lock_fini, + .clo_enqueue = osc_lock_enqueue, + .clo_cancel = osc_lock_lockless_cancel, + .clo_print = osc_lock_print +}; - LINVRNT(osc_lock_invariant(lock)); - if (state == CLS_HELD) { - struct osc_io *oio = osc_env_io(env); +static void osc_lock_set_writer(const struct lu_env *env, + const struct cl_io *io, + struct cl_object *obj, struct osc_lock *oscl) +{ + struct cl_lock_descr *descr = &oscl->ols_cl.cls_lock->cll_descr; + pgoff_t io_start; + pgoff_t io_end; - LASSERT(ergo(lock->ols_owner, lock->ols_owner == oio)); - lock->ols_owner = oio; + if (!cl_object_same(io->ci_obj, obj)) + return; - /* set the io to be lockless if this lock is for io's - * host object - */ - if (cl_object_same(oio->oi_cl.cis_obj, slice->cls_obj)) - oio->oi_lockless = 1; + if (likely(io->ci_type == CIT_WRITE)) { + io_start = cl_index(obj, io->u.ci_rw.crw_pos); + io_end = cl_index(obj, io->u.ci_rw.crw_pos + + io->u.ci_rw.crw_count - 1); + if (cl_io_is_append(io)) { + io_start = 0; + io_end = CL_PAGE_EOF; + } + } else { + LASSERT(cl_io_is_mkwrite(io)); + io_start = io->u.ci_fault.ft_index; + io_end = io->u.ci_fault.ft_index; } -} -static int osc_lock_lockless_fits_into(const struct lu_env *env, - const struct cl_lock_slice *slice, - const struct cl_lock_descr *need, - const struct cl_io *io) -{ - struct osc_lock *lock = cl2osc_lock(slice); - - if (!(need->cld_enq_flags & CEF_NEVER)) - return 0; + if (descr->cld_mode >= CLM_WRITE && + descr->cld_start <= io_start && descr->cld_end >= io_end) { + struct osc_io *oio = osc_env_io(env); - /* lockless lock should only be used by its owning io. b22147 */ - return (lock->ols_owner == osc_env_io(env)); + /* There must be only one lock to match the write region */ + LASSERT(!oio->oi_write_osclock); + oio->oi_write_osclock = oscl; + } } -static const struct cl_lock_operations osc_lock_lockless_ops = { - .clo_fini = osc_lock_fini, - .clo_enqueue = osc_lock_enqueue, - .clo_wait = osc_lock_lockless_wait, - .clo_unuse = osc_lock_lockless_unuse, - .clo_state = osc_lock_lockless_state, - .clo_fits_into = osc_lock_lockless_fits_into, - .clo_cancel = osc_lock_lockless_cancel, - .clo_print = osc_lock_print -}; - int osc_lock_init(const struct lu_env *env, struct cl_object *obj, struct cl_lock *lock, - const struct cl_io *unused) + const struct cl_io *io) { - struct osc_lock *clk; - int result; - - clk = kmem_cache_zalloc(osc_lock_kmem, GFP_NOFS); - if (clk) { - __u32 enqflags = lock->cll_descr.cld_enq_flags; + struct osc_lock *oscl; + __u32 enqflags = lock->cll_descr.cld_enq_flags; + + oscl = kmem_cache_zalloc(osc_lock_kmem, GFP_NOFS); + if (!oscl) + return -ENOMEM; + + oscl->ols_state = OLS_NEW; + spin_lock_init(&oscl->ols_lock); + INIT_LIST_HEAD(&oscl->ols_waiting_list); + INIT_LIST_HEAD(&oscl->ols_wait_entry); + INIT_LIST_HEAD(&oscl->ols_nextlock_oscobj); + + oscl->ols_flags = osc_enq2ldlm_flags(enqflags); + oscl->ols_agl = !!(enqflags & CEF_AGL); + if (oscl->ols_agl) + oscl->ols_flags |= LDLM_FL_BLOCK_NOWAIT; + if (oscl->ols_flags & LDLM_FL_HAS_INTENT) { + oscl->ols_flags |= LDLM_FL_BLOCK_GRANTED; + oscl->ols_glimpse = 1; + } - osc_lock_build_einfo(env, lock, clk, &clk->ols_einfo); - atomic_set(&clk->ols_pageref, 0); - clk->ols_state = OLS_NEW; + cl_lock_slice_add(lock, &oscl->ols_cl, obj, &osc_lock_ops); - clk->ols_flags = osc_enq2ldlm_flags(enqflags); - clk->ols_agl = !!(enqflags & CEF_AGL); - if (clk->ols_agl) - clk->ols_flags |= LDLM_FL_BLOCK_NOWAIT; - if (clk->ols_flags & LDLM_FL_HAS_INTENT) - clk->ols_glimpse = 1; + if (!(enqflags & CEF_MUST)) + /* try to convert this lock to a lockless lock */ + osc_lock_to_lockless(env, oscl, (enqflags & CEF_NEVER)); + if (oscl->ols_locklessable && !(enqflags & CEF_DISCARD_DATA)) + oscl->ols_flags |= LDLM_FL_DENY_ON_CONTENTION; - cl_lock_slice_add(lock, &clk->ols_cl, obj, &osc_lock_ops); + if (io->ci_type == CIT_WRITE || cl_io_is_mkwrite(io)) + osc_lock_set_writer(env, io, obj, oscl); - if (!(enqflags & CEF_MUST)) - /* try to convert this lock to a lockless lock */ - osc_lock_to_lockless(env, clk, (enqflags & CEF_NEVER)); - if (clk->ols_locklessable && !(enqflags & CEF_DISCARD_DATA)) - clk->ols_flags |= LDLM_FL_DENY_ON_CONTENTION; - LDLM_DEBUG_NOLOCK("lock %p, osc lock %p, flags %llx", - lock, clk, clk->ols_flags); + LDLM_DEBUG_NOLOCK("lock %p, osc lock %p, flags %llx", + lock, oscl, oscl->ols_flags); - result = 0; - } else - result = -ENOMEM; - return result; + return 0; } -int osc_dlm_lock_pageref(struct ldlm_lock *dlm) +/** + * Finds an existing lock covering given index and optionally different from a + * given \a except lock. + */ +struct ldlm_lock *osc_dlmlock_at_pgoff(const struct lu_env *env, + struct osc_object *obj, pgoff_t index, + int pending, int canceling) { - struct osc_lock *olock; - int rc = 0; - - spin_lock(&osc_ast_guard); - olock = dlm->l_ast_data; + struct osc_thread_info *info = osc_env_info(env); + struct ldlm_res_id *resname = &info->oti_resname; + ldlm_policy_data_t *policy = &info->oti_policy; + struct lustre_handle lockh; + struct ldlm_lock *lock = NULL; + enum ldlm_mode mode; + __u64 flags; + + ostid_build_res_name(&obj->oo_oinfo->loi_oi, resname); + osc_index2policy(policy, osc2cl(obj), index, index); + policy->l_extent.gid = LDLM_GID_ANY; + + flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_TEST_LOCK; + if (pending) + flags |= LDLM_FL_CBPENDING; /* - * there's a very rare race with osc_page_addref_lock(), but that - * doesn't matter because in the worst case we don't cancel a lock - * which we actually can, that's no harm. + * It is fine to match any group lock since there could be only one + * with a uniq gid and it conflicts with all other lock modes too */ - if (olock && - atomic_add_return(_PAGEREF_MAGIC, - &olock->ols_pageref) != _PAGEREF_MAGIC) { - atomic_sub(_PAGEREF_MAGIC, &olock->ols_pageref); - rc = 1; +again: + mode = ldlm_lock_match(osc_export(obj)->exp_obd->obd_namespace, + flags, resname, LDLM_EXTENT, policy, + LCK_PR | LCK_PW | LCK_GROUP, &lockh, canceling); + if (mode != 0) { + lock = ldlm_handle2lock(&lockh); + /* RACE: the lock is cancelled so let's try again */ + if (unlikely(!lock)) + goto again; } - spin_unlock(&osc_ast_guard); - return rc; + return lock; } /** @} osc */ diff --git a/drivers/staging/lustre/lustre/osc/osc_object.c b/drivers/staging/lustre/lustre/osc/osc_object.c index 9d474fcdd9a7..d211d1905e83 100644 --- a/drivers/staging/lustre/lustre/osc/osc_object.c +++ b/drivers/staging/lustre/lustre/osc/osc_object.c @@ -15,11 +15,7 @@ * * You should have received a copy of the GNU General Public License * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. + * http://www.gnu.org/licenses/gpl-2.0.html * * GPL HEADER END */ @@ -36,6 +32,7 @@ * Implementation of cl_object for OSC layer. * * Author: Nikita Danilov <nikita.danilov@sun.com> + * Author: Jinshan Xiong <jinshan.xiong@intel.com> */ #define DEBUG_SUBSYSTEM S_OSC @@ -94,6 +91,9 @@ static int osc_object_init(const struct lu_env *env, struct lu_object *obj, atomic_set(&osc->oo_nr_reads, 0); atomic_set(&osc->oo_nr_writes, 0); spin_lock_init(&osc->oo_lock); + spin_lock_init(&osc->oo_tree_lock); + spin_lock_init(&osc->oo_ol_spin); + INIT_LIST_HEAD(&osc->oo_ol_list); cl_object_page_init(lu2cl(obj), sizeof(struct osc_page)); @@ -120,6 +120,7 @@ static void osc_object_free(const struct lu_env *env, struct lu_object *obj) LASSERT(list_empty(&osc->oo_reading_exts)); LASSERT(atomic_read(&osc->oo_nr_reads) == 0); LASSERT(atomic_read(&osc->oo_nr_writes) == 0); + LASSERT(list_empty(&osc->oo_ol_list)); lu_object_fini(obj); kmem_cache_free(osc_object_kmem, osc); @@ -192,6 +193,32 @@ static int osc_object_glimpse(const struct lu_env *env, return 0; } +static int osc_object_ast_clear(struct ldlm_lock *lock, void *data) +{ + LASSERT(lock->l_granted_mode == lock->l_req_mode); + if (lock->l_ast_data == data) + lock->l_ast_data = NULL; + return LDLM_ITER_CONTINUE; +} + +static int osc_object_prune(const struct lu_env *env, struct cl_object *obj) +{ + struct osc_object *osc = cl2osc(obj); + struct ldlm_res_id *resname = &osc_env_info(env)->oti_resname; + + LASSERTF(osc->oo_npages == 0, + DFID "still have %lu pages, obj: %p, osc: %p\n", + PFID(lu_object_fid(&obj->co_lu)), osc->oo_npages, obj, osc); + + /* DLM locks don't hold a reference of osc_object so we have to + * clear it before the object is being destroyed. + */ + ostid_build_res_name(&osc->oo_oinfo->loi_oi, resname); + ldlm_resource_iterate(osc_export(osc)->exp_obd->obd_namespace, resname, + osc_object_ast_clear, osc); + return 0; +} + void osc_object_set_contended(struct osc_object *obj) { obj->oo_contention_time = cfs_time_current(); @@ -236,12 +263,12 @@ static const struct cl_object_operations osc_ops = { .coo_io_init = osc_io_init, .coo_attr_get = osc_attr_get, .coo_attr_set = osc_attr_set, - .coo_glimpse = osc_object_glimpse + .coo_glimpse = osc_object_glimpse, + .coo_prune = osc_object_prune }; static const struct lu_object_operations osc_lu_obj_ops = { .loo_object_init = osc_object_init, - .loo_object_delete = NULL, .loo_object_release = NULL, .loo_object_free = osc_object_free, .loo_object_print = osc_object_print, @@ -261,8 +288,9 @@ struct lu_object *osc_object_alloc(const struct lu_env *env, lu_object_init(obj, NULL, dev); osc->oo_cl.co_ops = &osc_ops; obj->lo_ops = &osc_lu_obj_ops; - } else + } else { obj = NULL; + } return obj; } diff --git a/drivers/staging/lustre/lustre/osc/osc_page.c b/drivers/staging/lustre/lustre/osc/osc_page.c index ce9ddd515f64..355f496a2093 100644 --- a/drivers/staging/lustre/lustre/osc/osc_page.c +++ b/drivers/staging/lustre/lustre/osc/osc_page.c @@ -15,11 +15,7 @@ * * You should have received a copy of the GNU General Public License * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. + * http://www.gnu.org/licenses/gpl-2.0.html * * GPL HEADER END */ @@ -36,14 +32,15 @@ * Implementation of cl_page for OSC layer. * * Author: Nikita Danilov <nikita.danilov@sun.com> + * Author: Jinshan Xiong <jinshan.xiong@intel.com> */ #define DEBUG_SUBSYSTEM S_OSC #include "osc_cl_internal.h" -static void osc_lru_del(struct client_obd *cli, struct osc_page *opg, bool del); -static void osc_lru_add(struct client_obd *cli, struct osc_page *opg); +static void osc_lru_del(struct client_obd *cli, struct osc_page *opg); +static void osc_lru_use(struct client_obd *cli, struct osc_page *opg); static int osc_lru_reserve(const struct lu_env *env, struct osc_object *obj, struct osc_page *opg); @@ -51,30 +48,14 @@ static int osc_lru_reserve(const struct lu_env *env, struct osc_object *obj, * @{ */ -static int osc_page_protected(const struct lu_env *env, - const struct osc_page *opg, - enum cl_lock_mode mode, int unref) -{ - return 1; -} - /***************************************************************************** * * Page operations. * */ -static void osc_page_fini(const struct lu_env *env, - struct cl_page_slice *slice) -{ - struct osc_page *opg = cl2osc_page(slice); - - CDEBUG(D_TRACE, "%p\n", opg); - LASSERT(!opg->ops_lock); -} - static void osc_page_transfer_get(struct osc_page *opg, const char *label) { - struct cl_page *page = cl_page_top(opg->ops_cl.cpl_page); + struct cl_page *page = opg->ops_cl.cpl_page; LASSERT(!opg->ops_transfer_pinned); cl_page_get(page); @@ -85,11 +66,11 @@ static void osc_page_transfer_get(struct osc_page *opg, const char *label) static void osc_page_transfer_put(const struct lu_env *env, struct osc_page *opg) { - struct cl_page *page = cl_page_top(opg->ops_cl.cpl_page); + struct cl_page *page = opg->ops_cl.cpl_page; if (opg->ops_transfer_pinned) { - lu_ref_del(&page->cp_reference, "transfer", page); opg->ops_transfer_pinned = 0; + lu_ref_del(&page->cp_reference, "transfer", page); cl_page_put(env, page); } } @@ -104,10 +85,7 @@ static void osc_page_transfer_add(const struct lu_env *env, { struct osc_object *obj = cl2osc(opg->ops_cl.cpl_obj); - /* ops_lru and ops_inflight share the same field, so take it from LRU - * first and then use it as inflight. - */ - osc_lru_del(osc_cli(obj), opg, false); + osc_lru_use(osc_cli(obj), opg); spin_lock(&obj->oo_seatbelt); list_add(&opg->ops_inflight, &obj->oo_inflight[crt]); @@ -115,16 +93,12 @@ static void osc_page_transfer_add(const struct lu_env *env, spin_unlock(&obj->oo_seatbelt); } -static int osc_page_cache_add(const struct lu_env *env, - const struct cl_page_slice *slice, - struct cl_io *io) +int osc_page_cache_add(const struct lu_env *env, + const struct cl_page_slice *slice, struct cl_io *io) { - struct osc_io *oio = osc_env_io(env); struct osc_page *opg = cl2osc_page(slice); int result; - LINVRNT(osc_page_protected(env, opg, CLM_WRITE, 0)); - osc_page_transfer_get(opg, "transfer\0cache"); result = osc_queue_async_io(env, io, opg); if (result != 0) @@ -132,17 +106,6 @@ static int osc_page_cache_add(const struct lu_env *env, else osc_page_transfer_add(env, opg, CRT_WRITE); - /* for sync write, kernel will wait for this page to be flushed before - * osc_io_end() is called, so release it earlier. - * for mkwrite(), it's known there is no further pages. - */ - if (cl_io_is_sync_write(io) || cl_io_is_mkwrite(io)) { - if (oio->oi_active) { - osc_extent_release(env, oio->oi_active); - oio->oi_active = NULL; - } - } - return result; } @@ -154,102 +117,25 @@ void osc_index2policy(ldlm_policy_data_t *policy, const struct cl_object *obj, policy->l_extent.end = cl_offset(obj, end + 1) - 1; } -static int osc_page_addref_lock(const struct lu_env *env, - struct osc_page *opg, - struct cl_lock *lock) -{ - struct osc_lock *olock; - int rc; - - LASSERT(!opg->ops_lock); - - olock = osc_lock_at(lock); - if (atomic_inc_return(&olock->ols_pageref) <= 0) { - atomic_dec(&olock->ols_pageref); - rc = -ENODATA; - } else { - cl_lock_get(lock); - opg->ops_lock = lock; - rc = 0; - } - return rc; -} - -static void osc_page_putref_lock(const struct lu_env *env, - struct osc_page *opg) -{ - struct cl_lock *lock = opg->ops_lock; - struct osc_lock *olock; - - LASSERT(lock); - olock = osc_lock_at(lock); - - atomic_dec(&olock->ols_pageref); - opg->ops_lock = NULL; - - cl_lock_put(env, lock); -} - static int osc_page_is_under_lock(const struct lu_env *env, const struct cl_page_slice *slice, - struct cl_io *unused) + struct cl_io *unused, pgoff_t *max_index) { - struct cl_lock *lock; + struct osc_page *opg = cl2osc_page(slice); + struct ldlm_lock *dlmlock; int result = -ENODATA; - lock = cl_lock_at_page(env, slice->cpl_obj, slice->cpl_page, - NULL, 1, 0); - if (lock) { - if (osc_page_addref_lock(env, cl2osc_page(slice), lock) == 0) - result = -EBUSY; - cl_lock_put(env, lock); + dlmlock = osc_dlmlock_at_pgoff(env, cl2osc(slice->cpl_obj), + osc_index(opg), 1, 0); + if (dlmlock) { + *max_index = cl_index(slice->cpl_obj, + dlmlock->l_policy_data.l_extent.end); + LDLM_LOCK_PUT(dlmlock); + result = 0; } return result; } -static void osc_page_disown(const struct lu_env *env, - const struct cl_page_slice *slice, - struct cl_io *io) -{ - struct osc_page *opg = cl2osc_page(slice); - - if (unlikely(opg->ops_lock)) - osc_page_putref_lock(env, opg); -} - -static void osc_page_completion_read(const struct lu_env *env, - const struct cl_page_slice *slice, - int ioret) -{ - struct osc_page *opg = cl2osc_page(slice); - struct osc_object *obj = cl2osc(opg->ops_cl.cpl_obj); - - if (likely(opg->ops_lock)) - osc_page_putref_lock(env, opg); - osc_lru_add(osc_cli(obj), opg); -} - -static void osc_page_completion_write(const struct lu_env *env, - const struct cl_page_slice *slice, - int ioret) -{ - struct osc_page *opg = cl2osc_page(slice); - struct osc_object *obj = cl2osc(slice->cpl_obj); - - osc_lru_add(osc_cli(obj), opg); -} - -static int osc_page_fail(const struct lu_env *env, - const struct cl_page_slice *slice, - struct cl_io *unused) -{ - /* - * Cached read? - */ - LBUG(); - return 0; -} - static const char *osc_list(struct list_head *head) { return list_empty(head) ? "-" : "+"; @@ -272,8 +158,8 @@ static int osc_page_print(const struct lu_env *env, struct osc_object *obj = cl2osc(slice->cpl_obj); struct client_obd *cli = &osc_export(obj)->exp_obd->u.cli; - return (*printer)(env, cookie, LUSTRE_OSC_NAME "-page@%p: 1< %#x %d %u %s %s > 2< %llu %u %u %#x %#x | %p %p %p > 3< %s %p %d %lu %d > 4< %d %d %d %lu %s | %s %s %s %s > 5< %s %s %s %s | %d %s | %d %s %s>\n", - opg, + return (*printer)(env, cookie, LUSTRE_OSC_NAME "-page@%p %lu: 1< %#x %d %u %s %s > 2< %llu %u %u %#x %#x | %p %p %p > 3< %s %p %d %lu %d > 4< %d %d %d %lu %s | %s %s %s %s > 5< %s %s %s %s | %d %s | %d %s %s>\n", + opg, osc_index(opg), /* 1 */ oap->oap_magic, oap->oap_cmd, oap->oap_interrupted, @@ -315,13 +201,11 @@ static void osc_page_delete(const struct lu_env *env, struct osc_object *obj = cl2osc(opg->ops_cl.cpl_obj); int rc; - LINVRNT(opg->ops_temp || osc_page_protected(env, opg, CLM_READ, 1)); - CDEBUG(D_TRACE, "%p\n", opg); osc_page_transfer_put(env, opg); rc = osc_teardown_async_page(env, obj, opg); if (rc) { - CL_PAGE_DEBUG(D_ERROR, env, cl_page_top(slice->cpl_page), + CL_PAGE_DEBUG(D_ERROR, env, slice->cpl_page, "Trying to teardown failed: %d\n", rc); LASSERT(0); } @@ -334,7 +218,19 @@ static void osc_page_delete(const struct lu_env *env, } spin_unlock(&obj->oo_seatbelt); - osc_lru_del(osc_cli(obj), opg, true); + osc_lru_del(osc_cli(obj), opg); + + if (slice->cpl_page->cp_type == CPT_CACHEABLE) { + void *value; + + spin_lock(&obj->oo_tree_lock); + value = radix_tree_delete(&obj->oo_tree, osc_index(opg)); + if (value) + --obj->oo_npages; + spin_unlock(&obj->oo_tree_lock); + + LASSERT(ergo(value, value == opg)); + } } static void osc_page_clip(const struct lu_env *env, @@ -343,8 +239,6 @@ static void osc_page_clip(const struct lu_env *env, struct osc_page *opg = cl2osc_page(slice); struct osc_async_page *oap = &opg->ops_oap; - LINVRNT(osc_page_protected(env, opg, CLM_READ, 0)); - opg->ops_from = from; opg->ops_to = to; spin_lock(&oap->oap_lock); @@ -358,8 +252,6 @@ static int osc_page_cancel(const struct lu_env *env, struct osc_page *opg = cl2osc_page(slice); int rc = 0; - LINVRNT(osc_page_protected(env, opg, CLM_READ, 0)); - /* Check if the transferring against this page * is completed, or not even queued. */ @@ -382,28 +274,16 @@ static int osc_page_flush(const struct lu_env *env, } static const struct cl_page_operations osc_page_ops = { - .cpo_fini = osc_page_fini, .cpo_print = osc_page_print, .cpo_delete = osc_page_delete, .cpo_is_under_lock = osc_page_is_under_lock, - .cpo_disown = osc_page_disown, - .io = { - [CRT_READ] = { - .cpo_cache_add = osc_page_fail, - .cpo_completion = osc_page_completion_read - }, - [CRT_WRITE] = { - .cpo_cache_add = osc_page_cache_add, - .cpo_completion = osc_page_completion_write - } - }, .cpo_clip = osc_page_clip, .cpo_cancel = osc_page_cancel, .cpo_flush = osc_page_flush }; int osc_page_init(const struct lu_env *env, struct cl_object *obj, - struct cl_page *page, struct page *vmpage) + struct cl_page *page, pgoff_t index) { struct osc_object *osc = cl2osc(obj); struct osc_page *opg = cl_object_page_slice(obj, page); @@ -412,18 +292,15 @@ int osc_page_init(const struct lu_env *env, struct cl_object *obj, opg->ops_from = 0; opg->ops_to = PAGE_SIZE; - result = osc_prep_async_page(osc, opg, vmpage, - cl_offset(obj, page->cp_index)); + result = osc_prep_async_page(osc, opg, page->cp_vmpage, + cl_offset(obj, index)); if (result == 0) { struct osc_io *oio = osc_env_io(env); opg->ops_srvlock = osc_io_srvlock(oio); - cl_page_slice_add(page, &opg->ops_cl, obj, &osc_page_ops); + cl_page_slice_add(page, &opg->ops_cl, obj, index, + &osc_page_ops); } - /* - * Cannot assert osc_page_protected() here as read-ahead - * creates temporary pages outside of a lock. - */ /* ops_inflight and ops_lru are the same field, but it doesn't * hurt to initialize it twice :-) */ @@ -431,12 +308,47 @@ int osc_page_init(const struct lu_env *env, struct cl_object *obj, INIT_LIST_HEAD(&opg->ops_lru); /* reserve an LRU space for this page */ - if (page->cp_type == CPT_CACHEABLE && result == 0) + if (page->cp_type == CPT_CACHEABLE && result == 0) { result = osc_lru_reserve(env, osc, opg); + if (result == 0) { + spin_lock(&osc->oo_tree_lock); + result = radix_tree_insert(&osc->oo_tree, index, opg); + if (result == 0) + ++osc->oo_npages; + spin_unlock(&osc->oo_tree_lock); + LASSERT(result == 0); + } + } return result; } +int osc_over_unstable_soft_limit(struct client_obd *cli) +{ + long obd_upages, obd_dpages, osc_upages; + + /* Can't check cli->cl_unstable_count, therefore, no soft limit */ + if (!cli) + return 0; + + obd_upages = atomic_read(&obd_unstable_pages); + obd_dpages = atomic_read(&obd_dirty_pages); + + osc_upages = atomic_read(&cli->cl_unstable_count); + + /* + * obd_max_dirty_pages is the max number of (dirty + unstable) + * pages allowed at any given time. To simulate an unstable page + * only limit, we subtract the current number of dirty pages + * from this max. This difference is roughly the amount of pages + * currently available for unstable pages. Thus, the soft limit + * is half of that difference. Check osc_upages to ensure we don't + * set SOFT_SYNC for OSCs without any outstanding unstable pages. + */ + return osc_upages && + obd_upages >= (obd_max_dirty_pages - obd_dpages) / 2; +} + /** * Helper function called by osc_io_submit() for every page in an immediate * transfer (i.e., transferred synchronously). @@ -445,10 +357,6 @@ void osc_page_submit(const struct lu_env *env, struct osc_page *opg, enum cl_req_type crt, int brw_flags) { struct osc_async_page *oap = &opg->ops_oap; - struct osc_object *obj = oap->oap_obj; - - LINVRNT(osc_page_protected(env, opg, - crt == CRT_WRITE ? CLM_WRITE : CLM_READ, 1)); LASSERTF(oap->oap_magic == OAP_MAGIC, "Bad oap magic: oap %p, magic 0x%x\n", oap, oap->oap_magic); @@ -460,8 +368,10 @@ void osc_page_submit(const struct lu_env *env, struct osc_page *opg, oap->oap_count = opg->ops_to - opg->ops_from; oap->oap_brw_flags = brw_flags | OBD_BRW_SYNC; - if (!client_is_remote(osc_export(obj)) && - capable(CFS_CAP_SYS_RESOURCE)) { + if (osc_over_unstable_soft_limit(oap->oap_cli)) + oap->oap_brw_flags |= OBD_BRW_SOFT_SYNC; + + if (capable(CFS_CAP_SYS_RESOURCE)) { oap->oap_brw_flags |= OBD_BRW_NOQUOTA; oap->oap_cmd |= OBD_BRW_NOQUOTA; } @@ -483,13 +393,12 @@ void osc_page_submit(const struct lu_env *env, struct osc_page *opg, */ static DECLARE_WAIT_QUEUE_HEAD(osc_lru_waitq); -static atomic_t osc_lru_waiters = ATOMIC_INIT(0); /* LRU pages are freed in batch mode. OSC should at least free this * number of pages to avoid running out of LRU budget, and.. */ static const int lru_shrink_min = 2 << (20 - PAGE_SHIFT); /* 2M */ /* free this number at most otherwise it will take too long time to finish. */ -static const int lru_shrink_max = 32 << (20 - PAGE_SHIFT); /* 32M */ +static const int lru_shrink_max = 8 << (20 - PAGE_SHIFT); /* 8M */ /* Check if we can free LRU slots from this OSC. If there exists LRU waiters, * we should free slots aggressively. In this way, slots are freed in a steady @@ -500,65 +409,142 @@ static const int lru_shrink_max = 32 << (20 - PAGE_SHIFT); /* 32M */ static int osc_cache_too_much(struct client_obd *cli) { struct cl_client_cache *cache = cli->cl_cache; - int pages = atomic_read(&cli->cl_lru_in_list) >> 1; + int pages = atomic_read(&cli->cl_lru_in_list); + unsigned long budget; - if (atomic_read(&osc_lru_waiters) > 0 && - atomic_read(cli->cl_lru_left) < lru_shrink_max) - /* drop lru pages aggressively */ - return min(pages, lru_shrink_max); + budget = cache->ccc_lru_max / (atomic_read(&cache->ccc_users) - 2); /* if it's going to run out LRU slots, we should free some, but not * too much to maintain fairness among OSCs. */ if (atomic_read(cli->cl_lru_left) < cache->ccc_lru_max >> 4) { - unsigned long tmp; + if (pages >= budget) + return lru_shrink_max; + else if (pages >= budget / 2) + return lru_shrink_min; + } else if (pages >= budget * 2) { + return lru_shrink_min; + } + return 0; +} - tmp = cache->ccc_lru_max / atomic_read(&cache->ccc_users); - if (pages > tmp) - return min(pages, lru_shrink_max); +int lru_queue_work(const struct lu_env *env, void *data) +{ + struct client_obd *cli = data; - return pages > lru_shrink_min ? lru_shrink_min : 0; - } + CDEBUG(D_CACHE, "Run LRU work for client obd %p.\n", cli); + + if (osc_cache_too_much(cli)) + osc_lru_shrink(env, cli, lru_shrink_max, true); return 0; } -/* Return how many pages are not discarded in @pvec. */ -static int discard_pagevec(const struct lu_env *env, struct cl_io *io, - struct cl_page **pvec, int max_index) +void osc_lru_add_batch(struct client_obd *cli, struct list_head *plist) +{ + LIST_HEAD(lru); + struct osc_async_page *oap; + int npages = 0; + + list_for_each_entry(oap, plist, oap_pending_item) { + struct osc_page *opg = oap2osc_page(oap); + + if (!opg->ops_in_lru) + continue; + + ++npages; + LASSERT(list_empty(&opg->ops_lru)); + list_add(&opg->ops_lru, &lru); + } + + if (npages > 0) { + spin_lock(&cli->cl_lru_list_lock); + list_splice_tail(&lru, &cli->cl_lru_list); + atomic_sub(npages, &cli->cl_lru_busy); + atomic_add(npages, &cli->cl_lru_in_list); + spin_unlock(&cli->cl_lru_list_lock); + + /* XXX: May set force to be true for better performance */ + if (osc_cache_too_much(cli)) + (void)ptlrpcd_queue_work(cli->cl_lru_work); + } +} + +static void __osc_lru_del(struct client_obd *cli, struct osc_page *opg) +{ + LASSERT(atomic_read(&cli->cl_lru_in_list) > 0); + list_del_init(&opg->ops_lru); + atomic_dec(&cli->cl_lru_in_list); +} + +/** + * Page is being destroyed. The page may be not in LRU list, if the transfer + * has never finished(error occurred). + */ +static void osc_lru_del(struct client_obd *cli, struct osc_page *opg) +{ + if (opg->ops_in_lru) { + spin_lock(&cli->cl_lru_list_lock); + if (!list_empty(&opg->ops_lru)) { + __osc_lru_del(cli, opg); + } else { + LASSERT(atomic_read(&cli->cl_lru_busy) > 0); + atomic_dec(&cli->cl_lru_busy); + } + spin_unlock(&cli->cl_lru_list_lock); + + atomic_inc(cli->cl_lru_left); + /* this is a great place to release more LRU pages if + * this osc occupies too many LRU pages and kernel is + * stealing one of them. + */ + if (!memory_pressure_get()) + (void)ptlrpcd_queue_work(cli->cl_lru_work); + wake_up(&osc_lru_waitq); + } else { + LASSERT(list_empty(&opg->ops_lru)); + } +} + +/** + * Delete page from LRUlist for redirty. + */ +static void osc_lru_use(struct client_obd *cli, struct osc_page *opg) +{ + /* If page is being transferred for the first time, + * ops_lru should be empty + */ + if (opg->ops_in_lru && !list_empty(&opg->ops_lru)) { + spin_lock(&cli->cl_lru_list_lock); + __osc_lru_del(cli, opg); + spin_unlock(&cli->cl_lru_list_lock); + atomic_inc(&cli->cl_lru_busy); + } +} + +static void discard_pagevec(const struct lu_env *env, struct cl_io *io, + struct cl_page **pvec, int max_index) { - int count; int i; - for (count = 0, i = 0; i < max_index; i++) { + for (i = 0; i < max_index; i++) { struct cl_page *page = pvec[i]; - if (cl_page_own_try(env, io, page) == 0) { - /* free LRU page only if nobody is using it. - * This check is necessary to avoid freeing the pages - * having already been removed from LRU and pinned - * for IO. - */ - if (!cl_page_in_use(page)) { - cl_page_unmap(env, io, page); - cl_page_discard(env, io, page); - ++count; - } - cl_page_disown(env, io, page); - } + LASSERT(cl_page_is_owned(page, io)); + cl_page_discard(env, io, page); + cl_page_disown(env, io, page); cl_page_put(env, page); + pvec[i] = NULL; } - return max_index - count; } /** * Drop @target of pages from LRU at most. */ -int osc_lru_shrink(struct client_obd *cli, int target) +int osc_lru_shrink(const struct lu_env *env, struct client_obd *cli, + int target, bool force) { - struct cl_env_nest nest; - struct lu_env *env; struct cl_io *io; struct cl_object *clobj = NULL; struct cl_page **pvec; @@ -573,23 +559,31 @@ int osc_lru_shrink(struct client_obd *cli, int target) if (atomic_read(&cli->cl_lru_in_list) == 0 || target <= 0) return 0; - env = cl_env_nested_get(&nest); - if (IS_ERR(env)) - return PTR_ERR(env); + if (!force) { + if (atomic_read(&cli->cl_lru_shrinkers) > 0) + return -EBUSY; + + if (atomic_inc_return(&cli->cl_lru_shrinkers) > 1) { + atomic_dec(&cli->cl_lru_shrinkers); + return -EBUSY; + } + } else { + atomic_inc(&cli->cl_lru_shrinkers); + } - pvec = osc_env_info(env)->oti_pvec; + pvec = (struct cl_page **)osc_env_info(env)->oti_pvec; io = &osc_env_info(env)->oti_io; - client_obd_list_lock(&cli->cl_lru_list_lock); - atomic_inc(&cli->cl_lru_shrinkers); + spin_lock(&cli->cl_lru_list_lock); maxscan = min(target << 1, atomic_read(&cli->cl_lru_in_list)); list_for_each_entry_safe(opg, temp, &cli->cl_lru_list, ops_lru) { struct cl_page *page; + bool will_free = false; if (--maxscan < 0) break; - page = cl_page_top(opg->ops_cl.cpl_page); + page = opg->ops_cl.cpl_page; if (cl_page_in_use_noref(page)) { list_move_tail(&opg->ops_lru, &cli->cl_lru_list); continue; @@ -600,10 +594,10 @@ int osc_lru_shrink(struct client_obd *cli, int target) struct cl_object *tmp = page->cp_obj; cl_object_get(tmp); - client_obd_list_unlock(&cli->cl_lru_list_lock); + spin_unlock(&cli->cl_lru_list_lock); if (clobj) { - count -= discard_pagevec(env, io, pvec, index); + discard_pagevec(env, io, pvec, index); index = 0; cl_io_fini(env, io); @@ -616,7 +610,7 @@ int osc_lru_shrink(struct client_obd *cli, int target) io->ci_ignore_layout = 1; rc = cl_io_init(env, io, CIT_MISC, clobj); - client_obd_list_lock(&cli->cl_lru_list_lock); + spin_lock(&cli->cl_lru_list_lock); if (rc != 0) break; @@ -625,98 +619,54 @@ int osc_lru_shrink(struct client_obd *cli, int target) continue; } - /* move this page to the end of list as it will be discarded - * soon. The page will be finally removed from LRU list in - * osc_page_delete(). - */ - list_move_tail(&opg->ops_lru, &cli->cl_lru_list); + if (cl_page_own_try(env, io, page) == 0) { + if (!cl_page_in_use_noref(page)) { + /* remove it from lru list earlier to avoid + * lock contention + */ + __osc_lru_del(cli, opg); + opg->ops_in_lru = 0; /* will be discarded */ + + cl_page_get(page); + will_free = true; + } else { + cl_page_disown(env, io, page); + } + } - /* it's okay to grab a refcount here w/o holding lock because - * it has to grab cl_lru_list_lock to delete the page. - */ - cl_page_get(page); - pvec[index++] = page; - if (++count >= target) - break; + if (!will_free) { + list_move_tail(&opg->ops_lru, &cli->cl_lru_list); + continue; + } + /* Don't discard and free the page with cl_lru_list held */ + pvec[index++] = page; if (unlikely(index == OTI_PVEC_SIZE)) { - client_obd_list_unlock(&cli->cl_lru_list_lock); - count -= discard_pagevec(env, io, pvec, index); + spin_unlock(&cli->cl_lru_list_lock); + discard_pagevec(env, io, pvec, index); index = 0; - client_obd_list_lock(&cli->cl_lru_list_lock); + spin_lock(&cli->cl_lru_list_lock); } + + if (++count >= target) + break; } - client_obd_list_unlock(&cli->cl_lru_list_lock); + spin_unlock(&cli->cl_lru_list_lock); if (clobj) { - count -= discard_pagevec(env, io, pvec, index); + discard_pagevec(env, io, pvec, index); cl_io_fini(env, io); cl_object_put(env, clobj); } - cl_env_nested_put(&nest, env); atomic_dec(&cli->cl_lru_shrinkers); - return count > 0 ? count : rc; -} - -static void osc_lru_add(struct client_obd *cli, struct osc_page *opg) -{ - bool wakeup = false; - - if (!opg->ops_in_lru) - return; - - atomic_dec(&cli->cl_lru_busy); - client_obd_list_lock(&cli->cl_lru_list_lock); - if (list_empty(&opg->ops_lru)) { - list_move_tail(&opg->ops_lru, &cli->cl_lru_list); - atomic_inc_return(&cli->cl_lru_in_list); - wakeup = atomic_read(&osc_lru_waiters) > 0; - } - client_obd_list_unlock(&cli->cl_lru_list_lock); - - if (wakeup) { - osc_lru_shrink(cli, osc_cache_too_much(cli)); + if (count > 0) { + atomic_add(count, cli->cl_lru_left); wake_up_all(&osc_lru_waitq); } -} - -/* delete page from LRUlist. The page can be deleted from LRUlist for two - * reasons: redirtied or deleted from page cache. - */ -static void osc_lru_del(struct client_obd *cli, struct osc_page *opg, bool del) -{ - if (opg->ops_in_lru) { - client_obd_list_lock(&cli->cl_lru_list_lock); - if (!list_empty(&opg->ops_lru)) { - LASSERT(atomic_read(&cli->cl_lru_in_list) > 0); - list_del_init(&opg->ops_lru); - atomic_dec(&cli->cl_lru_in_list); - if (!del) - atomic_inc(&cli->cl_lru_busy); - } else if (del) { - LASSERT(atomic_read(&cli->cl_lru_busy) > 0); - atomic_dec(&cli->cl_lru_busy); - } - client_obd_list_unlock(&cli->cl_lru_list_lock); - if (del) { - atomic_inc(cli->cl_lru_left); - /* this is a great place to release more LRU pages if - * this osc occupies too many LRU pages and kernel is - * stealing one of them. - * cl_lru_shrinkers is to avoid recursive call in case - * we're already in the context of osc_lru_shrink(). - */ - if (atomic_read(&cli->cl_lru_shrinkers) == 0 && - !memory_pressure_get()) - osc_lru_shrink(cli, osc_cache_too_much(cli)); - wake_up(&osc_lru_waitq); - } - } else { - LASSERT(list_empty(&opg->ops_lru)); - } + return count > 0 ? count : rc; } static inline int max_to_shrink(struct client_obd *cli) @@ -724,19 +674,28 @@ static inline int max_to_shrink(struct client_obd *cli) return min(atomic_read(&cli->cl_lru_in_list) >> 1, lru_shrink_max); } -static int osc_lru_reclaim(struct client_obd *cli) +int osc_lru_reclaim(struct client_obd *cli) { + struct cl_env_nest nest; + struct lu_env *env; struct cl_client_cache *cache = cli->cl_cache; int max_scans; - int rc; + int rc = 0; LASSERT(cache); - rc = osc_lru_shrink(cli, lru_shrink_min); + env = cl_env_nested_get(&nest); + if (IS_ERR(env)) + return 0; + + rc = osc_lru_shrink(env, cli, osc_cache_too_much(cli), false); if (rc != 0) { + if (rc == -EBUSY) + rc = 0; + CDEBUG(D_CACHE, "%s: Free %d pages from own LRU: %p.\n", cli->cl_import->imp_obd->obd_name, rc, cli); - return rc; + goto out; } CDEBUG(D_CACHE, "%s: cli %p no free slots, pages: %d, busy: %d.\n", @@ -753,7 +712,7 @@ static int osc_lru_reclaim(struct client_obd *cli) cache->ccc_lru_shrinkers++; list_move_tail(&cli->cl_lru_osc, &cache->ccc_lru); - max_scans = atomic_read(&cache->ccc_users); + max_scans = atomic_read(&cache->ccc_users) - 2; while (--max_scans > 0 && !list_empty(&cache->ccc_lru)) { cli = list_entry(cache->ccc_lru.next, struct client_obd, cl_lru_osc); @@ -764,10 +723,11 @@ static int osc_lru_reclaim(struct client_obd *cli) atomic_read(&cli->cl_lru_busy)); list_move_tail(&cli->cl_lru_osc, &cache->ccc_lru); - if (atomic_read(&cli->cl_lru_in_list) > 0) { + if (osc_cache_too_much(cli) > 0) { spin_unlock(&cache->ccc_lru_lock); - rc = osc_lru_shrink(cli, max_to_shrink(cli)); + rc = osc_lru_shrink(env, cli, osc_cache_too_much(cli), + true); spin_lock(&cache->ccc_lru_lock); if (rc != 0) break; @@ -775,6 +735,8 @@ static int osc_lru_reclaim(struct client_obd *cli) } spin_unlock(&cache->ccc_lru_lock); +out: + cl_env_nested_put(&nest, env); CDEBUG(D_CACHE, "%s: cli %p freed %d pages.\n", cli->cl_import->imp_obd->obd_name, cli, rc); return rc; @@ -784,16 +746,20 @@ static int osc_lru_reserve(const struct lu_env *env, struct osc_object *obj, struct osc_page *opg) { struct l_wait_info lwi = LWI_INTR(LWI_ON_SIGNAL_NOOP, NULL); + struct osc_io *oio = osc_env_io(env); struct client_obd *cli = osc_cli(obj); int rc = 0; if (!cli->cl_cache) /* shall not be in LRU */ return 0; + if (oio->oi_lru_reserved > 0) { + --oio->oi_lru_reserved; + goto out; + } + LASSERT(atomic_read(cli->cl_lru_left) >= 0); while (!atomic_add_unless(cli->cl_lru_left, -1, 0)) { - int gen; - /* run out of LRU spaces, try to drop some by itself */ rc = osc_lru_reclaim(cli); if (rc < 0) @@ -803,23 +769,15 @@ static int osc_lru_reserve(const struct lu_env *env, struct osc_object *obj, cond_resched(); - /* slowest case, all of caching pages are busy, notifying - * other OSCs that we're lack of LRU slots. - */ - atomic_inc(&osc_lru_waiters); - - gen = atomic_read(&cli->cl_lru_in_list); rc = l_wait_event(osc_lru_waitq, - atomic_read(cli->cl_lru_left) > 0 || - (atomic_read(&cli->cl_lru_in_list) > 0 && - gen != atomic_read(&cli->cl_lru_in_list)), + atomic_read(cli->cl_lru_left) > 0, &lwi); - atomic_dec(&osc_lru_waiters); if (rc < 0) break; } +out: if (rc >= 0) { atomic_inc(&cli->cl_lru_busy); opg->ops_in_lru = 1; diff --git a/drivers/staging/lustre/lustre/osc/osc_request.c b/drivers/staging/lustre/lustre/osc/osc_request.c index 30526ebcad04..536b868ff776 100644 --- a/drivers/staging/lustre/lustre/osc/osc_request.c +++ b/drivers/staging/lustre/lustre/osc/osc_request.c @@ -15,11 +15,7 @@ * * You should have received a copy of the GNU General Public License * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. + * http://www.gnu.org/licenses/gpl-2.0.html * * GPL HEADER END */ @@ -92,12 +88,13 @@ struct osc_fsync_args { struct osc_enqueue_args { struct obd_export *oa_exp; + enum ldlm_type oa_type; + enum ldlm_mode oa_mode; __u64 *oa_flags; - obd_enqueue_update_f oa_upcall; + osc_enqueue_upcall_f oa_upcall; void *oa_cookie; struct ost_lvb *oa_lvb; - struct lustre_handle *oa_lockh; - struct ldlm_enqueue_info *oa_ei; + struct lustre_handle oa_lockh; unsigned int oa_agl:1; }; @@ -473,7 +470,8 @@ static int osc_real_create(struct obd_export *exp, struct obdo *oa, DEBUG_REQ(D_HA, req, "delorphan from OST integration"); /* Don't resend the delorphan req */ - req->rq_no_resend = req->rq_no_delay = 1; + req->rq_no_resend = 1; + req->rq_no_delay = 1; } rc = ptlrpc_queue_wait(req); @@ -801,21 +799,24 @@ static void osc_announce_cached(struct client_obd *cli, struct obdo *oa, LASSERT(!(oa->o_valid & bits)); oa->o_valid |= bits; - client_obd_list_lock(&cli->cl_loi_list_lock); + spin_lock(&cli->cl_loi_list_lock); oa->o_dirty = cli->cl_dirty; if (unlikely(cli->cl_dirty - cli->cl_dirty_transit > cli->cl_dirty_max)) { CERROR("dirty %lu - %lu > dirty_max %lu\n", cli->cl_dirty, cli->cl_dirty_transit, cli->cl_dirty_max); oa->o_undirty = 0; - } else if (unlikely(atomic_read(&obd_dirty_pages) - + } else if (unlikely(atomic_read(&obd_unstable_pages) + + atomic_read(&obd_dirty_pages) - atomic_read(&obd_dirty_transit_pages) > (long)(obd_max_dirty_pages + 1))) { /* The atomic_read() allowing the atomic_inc() are * not covered by a lock thus they may safely race and trip * this CERROR() unless we add in a small fudge factor (+1). */ - CERROR("dirty %d - %d > system dirty_max %d\n", + CERROR("%s: dirty %d + %d - %d > system dirty_max %d\n", + cli->cl_import->imp_obd->obd_name, + atomic_read(&obd_unstable_pages), atomic_read(&obd_dirty_pages), atomic_read(&obd_dirty_transit_pages), obd_max_dirty_pages); @@ -833,10 +834,9 @@ static void osc_announce_cached(struct client_obd *cli, struct obdo *oa, oa->o_grant = cli->cl_avail_grant + cli->cl_reserved_grant; oa->o_dropped = cli->cl_lost_grant; cli->cl_lost_grant = 0; - client_obd_list_unlock(&cli->cl_loi_list_lock); + spin_unlock(&cli->cl_loi_list_lock); CDEBUG(D_CACHE, "dirty: %llu undirty: %u dropped %u grant: %llu\n", oa->o_dirty, oa->o_undirty, oa->o_dropped, oa->o_grant); - } void osc_update_next_shrink(struct client_obd *cli) @@ -849,9 +849,9 @@ void osc_update_next_shrink(struct client_obd *cli) static void __osc_update_grant(struct client_obd *cli, u64 grant) { - client_obd_list_lock(&cli->cl_loi_list_lock); + spin_lock(&cli->cl_loi_list_lock); cli->cl_avail_grant += grant; - client_obd_list_unlock(&cli->cl_loi_list_lock); + spin_unlock(&cli->cl_loi_list_lock); } static void osc_update_grant(struct client_obd *cli, struct ost_body *body) @@ -889,10 +889,10 @@ out: static void osc_shrink_grant_local(struct client_obd *cli, struct obdo *oa) { - client_obd_list_lock(&cli->cl_loi_list_lock); + spin_lock(&cli->cl_loi_list_lock); oa->o_grant = cli->cl_avail_grant / 4; cli->cl_avail_grant -= oa->o_grant; - client_obd_list_unlock(&cli->cl_loi_list_lock); + spin_unlock(&cli->cl_loi_list_lock); if (!(oa->o_valid & OBD_MD_FLFLAGS)) { oa->o_valid |= OBD_MD_FLFLAGS; oa->o_flags = 0; @@ -911,10 +911,10 @@ static int osc_shrink_grant(struct client_obd *cli) __u64 target_bytes = (cli->cl_max_rpcs_in_flight + 1) * (cli->cl_max_pages_per_rpc << PAGE_SHIFT); - client_obd_list_lock(&cli->cl_loi_list_lock); + spin_lock(&cli->cl_loi_list_lock); if (cli->cl_avail_grant <= target_bytes) target_bytes = cli->cl_max_pages_per_rpc << PAGE_SHIFT; - client_obd_list_unlock(&cli->cl_loi_list_lock); + spin_unlock(&cli->cl_loi_list_lock); return osc_shrink_grant_to_target(cli, target_bytes); } @@ -924,7 +924,7 @@ int osc_shrink_grant_to_target(struct client_obd *cli, __u64 target_bytes) int rc = 0; struct ost_body *body; - client_obd_list_lock(&cli->cl_loi_list_lock); + spin_lock(&cli->cl_loi_list_lock); /* Don't shrink if we are already above or below the desired limit * We don't want to shrink below a single RPC, as that will negatively * impact block allocation and long-term performance. @@ -933,10 +933,10 @@ int osc_shrink_grant_to_target(struct client_obd *cli, __u64 target_bytes) target_bytes = cli->cl_max_pages_per_rpc << PAGE_SHIFT; if (target_bytes >= cli->cl_avail_grant) { - client_obd_list_unlock(&cli->cl_loi_list_lock); + spin_unlock(&cli->cl_loi_list_lock); return 0; } - client_obd_list_unlock(&cli->cl_loi_list_lock); + spin_unlock(&cli->cl_loi_list_lock); body = kzalloc(sizeof(*body), GFP_NOFS); if (!body) @@ -944,10 +944,10 @@ int osc_shrink_grant_to_target(struct client_obd *cli, __u64 target_bytes) osc_announce_cached(cli, &body->oa, 0); - client_obd_list_lock(&cli->cl_loi_list_lock); + spin_lock(&cli->cl_loi_list_lock); body->oa.o_grant = cli->cl_avail_grant - target_bytes; cli->cl_avail_grant = target_bytes; - client_obd_list_unlock(&cli->cl_loi_list_lock); + spin_unlock(&cli->cl_loi_list_lock); if (!(body->oa.o_valid & OBD_MD_FLFLAGS)) { body->oa.o_valid |= OBD_MD_FLFLAGS; body->oa.o_flags = 0; @@ -1035,7 +1035,7 @@ static void osc_init_grant(struct client_obd *cli, struct obd_connect_data *ocd) * race is tolerable here: if we're evicted, but imp_state already * left EVICTED state, then cl_dirty must be 0 already. */ - client_obd_list_lock(&cli->cl_loi_list_lock); + spin_lock(&cli->cl_loi_list_lock); if (cli->cl_import->imp_state == LUSTRE_IMP_EVICTED) cli->cl_avail_grant = ocd->ocd_grant; else @@ -1053,7 +1053,7 @@ static void osc_init_grant(struct client_obd *cli, struct obd_connect_data *ocd) /* determine the appropriate chunk size used by osc_extent. */ cli->cl_chunkbits = max_t(int, PAGE_SHIFT, ocd->ocd_blocksize); - client_obd_list_unlock(&cli->cl_loi_list_lock); + spin_unlock(&cli->cl_loi_list_lock); CDEBUG(D_CACHE, "%s, setting cl_avail_grant: %ld cl_lost_grant: %ld chunk bits: %d\n", cli->cl_import->imp_obd->obd_name, @@ -1082,7 +1082,7 @@ static void handle_short_read(int nob_read, u32 page_count, if (pga[i]->count > nob_read) { /* EOF inside this page */ ptr = kmap(pga[i]->pg) + - (pga[i]->off & ~CFS_PAGE_MASK); + (pga[i]->off & ~PAGE_MASK); memset(ptr + nob_read, 0, pga[i]->count - nob_read); kunmap(pga[i]->pg); page_count--; @@ -1097,7 +1097,7 @@ static void handle_short_read(int nob_read, u32 page_count, /* zero remaining pages */ while (page_count-- > 0) { - ptr = kmap(pga[i]->pg) + (pga[i]->off & ~CFS_PAGE_MASK); + ptr = kmap(pga[i]->pg) + (pga[i]->off & ~PAGE_MASK); memset(ptr, 0, pga[i]->count); kunmap(pga[i]->pg); i++; @@ -1144,7 +1144,8 @@ static inline int can_merge_pages(struct brw_page *p1, struct brw_page *p2) { if (p1->flag != p2->flag) { unsigned mask = ~(OBD_BRW_FROM_GRANT | OBD_BRW_NOCACHE | - OBD_BRW_SYNC | OBD_BRW_ASYNC|OBD_BRW_NOQUOTA); + OBD_BRW_SYNC | OBD_BRW_ASYNC | + OBD_BRW_NOQUOTA | OBD_BRW_SOFT_SYNC); /* warn if we try to combine flags that we don't know to be * safe to combine @@ -1188,32 +1189,29 @@ static u32 osc_checksum_bulk(int nob, u32 pg_count, if (i == 0 && opc == OST_READ && OBD_FAIL_CHECK(OBD_FAIL_OSC_CHECKSUM_RECEIVE)) { unsigned char *ptr = kmap(pga[i]->pg); - int off = pga[i]->off & ~CFS_PAGE_MASK; + int off = pga[i]->off & ~PAGE_MASK; memcpy(ptr + off, "bad1", min(4, nob)); kunmap(pga[i]->pg); } cfs_crypto_hash_update_page(hdesc, pga[i]->pg, - pga[i]->off & ~CFS_PAGE_MASK, + pga[i]->off & ~PAGE_MASK, count); CDEBUG(D_PAGE, "page %p map %p index %lu flags %lx count %u priv %0lx: off %d\n", pga[i]->pg, pga[i]->pg->mapping, pga[i]->pg->index, (long)pga[i]->pg->flags, page_count(pga[i]->pg), page_private(pga[i]->pg), - (int)(pga[i]->off & ~CFS_PAGE_MASK)); + (int)(pga[i]->off & ~PAGE_MASK)); nob -= pga[i]->count; pg_count--; i++; } - bufsize = 4; + bufsize = sizeof(cksum); err = cfs_crypto_hash_final(hdesc, (unsigned char *)&cksum, &bufsize); - if (err) - cfs_crypto_hash_final(hdesc, NULL, NULL); - /* For sending we only compute the wrong checksum instead * of corrupting the data so it is still correct on a redo */ @@ -1312,7 +1310,7 @@ static int osc_brw_prep_request(int cmd, struct client_obd *cli, pg_prev = pga[0]; for (requested_nob = i = 0; i < page_count; i++, niobuf++) { struct brw_page *pg = pga[i]; - int poff = pg->off & ~CFS_PAGE_MASK; + int poff = pg->off & ~PAGE_MASK; LASSERT(pg->count > 0); /* make sure there is no gap in the middle of page array */ @@ -1658,6 +1656,7 @@ static int osc_brw_redo_request(struct ptlrpc_request *request, aa->aa_resends++; new_req->rq_interpret_reply = request->rq_interpret_reply; new_req->rq_async_args = request->rq_async_args; + new_req->rq_commit_cb = request->rq_commit_cb; /* cap resend delay to the current request timeout, this is similar to * what ptlrpc does (see after_reply()) */ @@ -1737,7 +1736,6 @@ static int brw_interpret(const struct lu_env *env, struct osc_brw_async_args *aa = data; struct osc_extent *ext; struct osc_extent *tmp; - struct cl_object *obj = NULL; struct client_obd *cli = aa->aa_cli; rc = osc_brw_fini_request(req, rc); @@ -1766,24 +1764,17 @@ static int brw_interpret(const struct lu_env *env, rc = -EIO; } - list_for_each_entry_safe(ext, tmp, &aa->aa_exts, oe_link) { - if (!obj && rc == 0) { - obj = osc2cl(ext->oe_obj); - cl_object_get(obj); - } - - list_del_init(&ext->oe_link); - osc_extent_finish(env, ext, 1, rc); - } - LASSERT(list_empty(&aa->aa_exts)); - LASSERT(list_empty(&aa->aa_oaps)); - - if (obj) { + if (rc == 0) { struct obdo *oa = aa->aa_oa; struct cl_attr *attr = &osc_env_info(env)->oti_attr; unsigned long valid = 0; + struct cl_object *obj; + struct osc_async_page *last; - LASSERT(rc == 0); + last = brw_page2oap(aa->aa_ppga[aa->aa_page_count - 1]); + obj = osc2cl(last->oap_obj); + + cl_object_attr_lock(obj); if (oa->o_valid & OBD_MD_FLBLOCKS) { attr->cat_blocks = oa->o_blocks; valid |= CAT_BLOCKS; @@ -1800,21 +1791,45 @@ static int brw_interpret(const struct lu_env *env, attr->cat_ctime = oa->o_ctime; valid |= CAT_CTIME; } - if (valid != 0) { - cl_object_attr_lock(obj); - cl_object_attr_set(env, obj, attr, valid); - cl_object_attr_unlock(obj); + + if (lustre_msg_get_opc(req->rq_reqmsg) == OST_WRITE) { + struct lov_oinfo *loi = cl2osc(obj)->oo_oinfo; + loff_t last_off = last->oap_count + last->oap_obj_off; + + /* Change file size if this is an out of quota or + * direct IO write and it extends the file size + */ + if (loi->loi_lvb.lvb_size < last_off) { + attr->cat_size = last_off; + valid |= CAT_SIZE; + } + /* Extend KMS if it's not a lockless write */ + if (loi->loi_kms < last_off && + oap2osc_page(last)->ops_srvlock == 0) { + attr->cat_kms = last_off; + valid |= CAT_KMS; + } } - cl_object_put(env, obj); + + if (valid != 0) + cl_object_attr_set(env, obj, attr, valid); + cl_object_attr_unlock(obj); } kmem_cache_free(obdo_cachep, aa->aa_oa); + list_for_each_entry_safe(ext, tmp, &aa->aa_exts, oe_link) { + list_del_init(&ext->oe_link); + osc_extent_finish(env, ext, 1, rc); + } + LASSERT(list_empty(&aa->aa_exts)); + LASSERT(list_empty(&aa->aa_oaps)); + cl_req_completion(env, aa->aa_clerq, rc < 0 ? rc : req->rq_bulk->bd_nob_transferred); osc_release_ppga(aa->aa_ppga, aa->aa_page_count); ptlrpc_lprocfs_brw(req, req->rq_bulk->bd_nob_transferred); - client_obd_list_lock(&cli->cl_loi_list_lock); + spin_lock(&cli->cl_loi_list_lock); /* We need to decrement before osc_ap_completion->osc_wake_cache_waiters * is called so we know whether to go to sync BRWs or wait for more * RPCs to complete @@ -1824,12 +1839,31 @@ static int brw_interpret(const struct lu_env *env, else cli->cl_r_in_flight--; osc_wake_cache_waiters(cli); - client_obd_list_unlock(&cli->cl_loi_list_lock); + spin_unlock(&cli->cl_loi_list_lock); osc_io_unplug(env, cli, NULL); return rc; } +static void brw_commit(struct ptlrpc_request *req) +{ + spin_lock(&req->rq_lock); + /* + * If osc_inc_unstable_pages (via osc_extent_finish) races with + * this called via the rq_commit_cb, I need to ensure + * osc_dec_unstable_pages is still called. Otherwise unstable + * pages may be leaked. + */ + if (req->rq_unstable) { + spin_unlock(&req->rq_lock); + osc_dec_unstable_pages(req); + spin_lock(&req->rq_lock); + } else { + req->rq_committed = 1; + } + spin_unlock(&req->rq_lock); +} + /** * Build an RPC by the list of extent @ext_list. The caller must ensure * that the total pages in this list are NOT over max pages per RPC. @@ -1920,7 +1954,7 @@ int osc_build_rpc(const struct lu_env *env, struct client_obd *cli, pga[i] = &oap->oap_brw_page; pga[i]->off = oap->oap_obj_off + oap->oap_page_off; CDEBUG(0, "put page %p index %lu oap %p flg %x to pga\n", - pga[i]->pg, page_index(oap->oap_page), oap, + pga[i]->pg, oap->oap_page->index, oap, pga[i]->flag); i++; cl_req_page_add(env, clerq, page); @@ -1949,6 +1983,7 @@ int osc_build_rpc(const struct lu_env *env, struct client_obd *cli, goto out; } + req->rq_commit_cb = brw_commit; req->rq_interpret_reply = brw_interpret; if (mem_tight != 0) @@ -1992,7 +2027,7 @@ int osc_build_rpc(const struct lu_env *env, struct client_obd *cli, if (tmp) tmp->oap_request = ptlrpc_request_addref(req); - client_obd_list_lock(&cli->cl_loi_list_lock); + spin_lock(&cli->cl_loi_list_lock); starting_offset >>= PAGE_SHIFT; if (cmd == OBD_BRW_READ) { cli->cl_r_in_flight++; @@ -2007,7 +2042,7 @@ int osc_build_rpc(const struct lu_env *env, struct client_obd *cli, lprocfs_oh_tally_log2(&cli->cl_write_offset_hist, starting_offset + 1); } - client_obd_list_unlock(&cli->cl_loi_list_lock); + spin_unlock(&cli->cl_loi_list_lock); DEBUG_REQ(D_INODE, req, "%d pages, aa %p. now %dr/%dw in flight", page_count, aa, cli->cl_r_in_flight, @@ -2055,14 +2090,12 @@ static int osc_set_lock_data_with_check(struct ldlm_lock *lock, LASSERT(lock->l_glimpse_ast == einfo->ei_cb_gl); lock_res_and_lock(lock); - spin_lock(&osc_ast_guard); if (!lock->l_ast_data) lock->l_ast_data = data; if (lock->l_ast_data == data) set = 1; - spin_unlock(&osc_ast_guard); unlock_res_and_lock(lock); return set; @@ -2104,36 +2137,38 @@ static int osc_find_cbdata(struct obd_export *exp, struct lov_stripe_md *lsm, return rc; } -static int osc_enqueue_fini(struct ptlrpc_request *req, struct ost_lvb *lvb, - obd_enqueue_update_f upcall, void *cookie, - __u64 *flags, int agl, int rc) +static int osc_enqueue_fini(struct ptlrpc_request *req, + osc_enqueue_upcall_f upcall, void *cookie, + struct lustre_handle *lockh, enum ldlm_mode mode, + __u64 *flags, int agl, int errcode) { - int intent = *flags & LDLM_FL_HAS_INTENT; - - if (intent) { - /* The request was created before ldlm_cli_enqueue call. */ - if (rc == ELDLM_LOCK_ABORTED) { - struct ldlm_reply *rep; + bool intent = *flags & LDLM_FL_HAS_INTENT; + int rc; - rep = req_capsule_server_get(&req->rq_pill, - &RMF_DLM_REP); + /* The request was created before ldlm_cli_enqueue call. */ + if (intent && errcode == ELDLM_LOCK_ABORTED) { + struct ldlm_reply *rep; - rep->lock_policy_res1 = - ptlrpc_status_ntoh(rep->lock_policy_res1); - if (rep->lock_policy_res1) - rc = rep->lock_policy_res1; - } - } + rep = req_capsule_server_get(&req->rq_pill, &RMF_DLM_REP); - if ((intent != 0 && rc == ELDLM_LOCK_ABORTED && agl == 0) || - (rc == 0)) { + rep->lock_policy_res1 = + ptlrpc_status_ntoh(rep->lock_policy_res1); + if (rep->lock_policy_res1) + errcode = rep->lock_policy_res1; + if (!agl) + *flags |= LDLM_FL_LVB_READY; + } else if (errcode == ELDLM_OK) { *flags |= LDLM_FL_LVB_READY; - CDEBUG(D_INODE, "got kms %llu blocks %llu mtime %llu\n", - lvb->lvb_size, lvb->lvb_blocks, lvb->lvb_mtime); } /* Call the update callback. */ - rc = (*upcall)(cookie, rc); + rc = (*upcall)(cookie, lockh, errcode); + /* release the reference taken in ldlm_cli_enqueue() */ + if (errcode == ELDLM_LOCK_MATCHED) + errcode = ELDLM_OK; + if (errcode == ELDLM_OK && lustre_handle_is_used(lockh)) + ldlm_lock_decref(lockh, mode); + return rc; } @@ -2142,62 +2177,50 @@ static int osc_enqueue_interpret(const struct lu_env *env, struct osc_enqueue_args *aa, int rc) { struct ldlm_lock *lock; - struct lustre_handle handle; - __u32 mode; - struct ost_lvb *lvb; - __u32 lvb_len; - __u64 *flags = aa->oa_flags; - - /* Make a local copy of a lock handle and a mode, because aa->oa_* - * might be freed anytime after lock upcall has been called. - */ - lustre_handle_copy(&handle, aa->oa_lockh); - mode = aa->oa_ei->ei_mode; + struct lustre_handle *lockh = &aa->oa_lockh; + enum ldlm_mode mode = aa->oa_mode; + struct ost_lvb *lvb = aa->oa_lvb; + __u32 lvb_len = sizeof(*lvb); + __u64 flags = 0; + /* ldlm_cli_enqueue is holding a reference on the lock, so it must * be valid. */ - lock = ldlm_handle2lock(&handle); + lock = ldlm_handle2lock(lockh); + LASSERTF(lock, "lockh %llx, req %p, aa %p - client evicted?\n", + lockh->cookie, req, aa); /* Take an additional reference so that a blocking AST that * ldlm_cli_enqueue_fini() might post for a failed lock, is guaranteed * to arrive after an upcall has been executed by * osc_enqueue_fini(). */ - ldlm_lock_addref(&handle, mode); + ldlm_lock_addref(lockh, mode); + + /* Let cl_lock_state_wait fail with -ERESTARTSYS to unuse sublocks. */ + OBD_FAIL_TIMEOUT(OBD_FAIL_LDLM_ENQUEUE_HANG, 2); /* Let CP AST to grant the lock first. */ OBD_FAIL_TIMEOUT(OBD_FAIL_OSC_CP_ENQ_RACE, 1); - if (aa->oa_agl && rc == ELDLM_LOCK_ABORTED) { - lvb = NULL; - lvb_len = 0; - } else { - lvb = aa->oa_lvb; - lvb_len = sizeof(*aa->oa_lvb); + if (aa->oa_agl) { + LASSERT(!aa->oa_lvb); + LASSERT(!aa->oa_flags); + aa->oa_flags = &flags; } /* Complete obtaining the lock procedure. */ - rc = ldlm_cli_enqueue_fini(aa->oa_exp, req, aa->oa_ei->ei_type, 1, - mode, flags, lvb, lvb_len, &handle, rc); + rc = ldlm_cli_enqueue_fini(aa->oa_exp, req, aa->oa_type, 1, + aa->oa_mode, aa->oa_flags, lvb, lvb_len, + lockh, rc); /* Complete osc stuff. */ - rc = osc_enqueue_fini(req, aa->oa_lvb, aa->oa_upcall, aa->oa_cookie, - flags, aa->oa_agl, rc); + rc = osc_enqueue_fini(req, aa->oa_upcall, aa->oa_cookie, lockh, mode, + aa->oa_flags, aa->oa_agl, rc); OBD_FAIL_TIMEOUT(OBD_FAIL_OSC_CP_CANCEL_RACE, 10); - /* Release the lock for async request. */ - if (lustre_handle_is_used(&handle) && rc == ELDLM_OK) - /* - * Releases a reference taken by ldlm_cli_enqueue(), if it is - * not already released by - * ldlm_cli_enqueue_fini()->failed_lock_cleanup() - */ - ldlm_lock_decref(&handle, mode); - - LASSERTF(lock, "lockh %p, req %p, aa %p - client evicted?\n", - aa->oa_lockh, req, aa); - ldlm_lock_decref(&handle, mode); + ldlm_lock_decref(lockh, mode); LDLM_LOCK_PUT(lock); return rc; } @@ -2209,29 +2232,29 @@ struct ptlrpc_request_set *PTLRPCD_SET = (void *)1; * other synchronous requests, however keeping some locks and trying to obtain * others may take a considerable amount of time in a case of ost failure; and * when other sync requests do not get released lock from a client, the client - * is excluded from the cluster -- such scenarious make the life difficult, so + * is evicted from the cluster -- such scenaries make the life difficult, so * release locks just after they are obtained. */ int osc_enqueue_base(struct obd_export *exp, struct ldlm_res_id *res_id, __u64 *flags, ldlm_policy_data_t *policy, struct ost_lvb *lvb, int kms_valid, - obd_enqueue_update_f upcall, void *cookie, + osc_enqueue_upcall_f upcall, void *cookie, struct ldlm_enqueue_info *einfo, - struct lustre_handle *lockh, struct ptlrpc_request_set *rqset, int async, int agl) { struct obd_device *obd = exp->exp_obd; + struct lustre_handle lockh = { 0 }; struct ptlrpc_request *req = NULL; int intent = *flags & LDLM_FL_HAS_INTENT; - __u64 match_lvb = (agl != 0 ? 0 : LDLM_FL_LVB_READY); + __u64 match_flags = *flags; enum ldlm_mode mode; int rc; /* Filesystem lock extents are extended to page boundaries so that * dealing with the page cache is a little smoother. */ - policy->l_extent.start -= policy->l_extent.start & ~CFS_PAGE_MASK; - policy->l_extent.end |= ~CFS_PAGE_MASK; + policy->l_extent.start -= policy->l_extent.start & ~PAGE_MASK; + policy->l_extent.end |= ~PAGE_MASK; /* * kms is not valid when either object is completely fresh (so that no @@ -2258,65 +2281,51 @@ int osc_enqueue_base(struct obd_export *exp, struct ldlm_res_id *res_id, mode = einfo->ei_mode; if (einfo->ei_mode == LCK_PR) mode |= LCK_PW; - mode = ldlm_lock_match(obd->obd_namespace, *flags | match_lvb, res_id, - einfo->ei_type, policy, mode, lockh, 0); + if (agl == 0) + match_flags |= LDLM_FL_LVB_READY; + if (intent != 0) + match_flags |= LDLM_FL_BLOCK_GRANTED; + mode = ldlm_lock_match(obd->obd_namespace, match_flags, res_id, + einfo->ei_type, policy, mode, &lockh, 0); if (mode) { - struct ldlm_lock *matched = ldlm_handle2lock(lockh); + struct ldlm_lock *matched; - if ((agl != 0) && !(matched->l_flags & LDLM_FL_LVB_READY)) { - /* For AGL, if enqueue RPC is sent but the lock is not - * granted, then skip to process this strpe. - * Return -ECANCELED to tell the caller. + if (*flags & LDLM_FL_TEST_LOCK) + return ELDLM_OK; + + matched = ldlm_handle2lock(&lockh); + if (agl) { + /* AGL enqueues DLM locks speculatively. Therefore if + * it already exists a DLM lock, it wll just inform the + * caller to cancel the AGL process for this stripe. */ - ldlm_lock_decref(lockh, mode); + ldlm_lock_decref(&lockh, mode); LDLM_LOCK_PUT(matched); return -ECANCELED; - } - - if (osc_set_lock_data_with_check(matched, einfo)) { + } else if (osc_set_lock_data_with_check(matched, einfo)) { *flags |= LDLM_FL_LVB_READY; - /* addref the lock only if not async requests and PW - * lock is matched whereas we asked for PR. - */ - if (!rqset && einfo->ei_mode != mode) - ldlm_lock_addref(lockh, LCK_PR); - if (intent) { - /* I would like to be able to ASSERT here that - * rss <= kms, but I can't, for reasons which - * are explained in lov_enqueue() - */ - } - - /* We already have a lock, and it's referenced. - * - * At this point, the cl_lock::cll_state is CLS_QUEUING, - * AGL upcall may change it to CLS_HELD directly. - */ - (*upcall)(cookie, ELDLM_OK); + /* We already have a lock, and it's referenced. */ + (*upcall)(cookie, &lockh, ELDLM_LOCK_MATCHED); - if (einfo->ei_mode != mode) - ldlm_lock_decref(lockh, LCK_PW); - else if (rqset) - /* For async requests, decref the lock. */ - ldlm_lock_decref(lockh, einfo->ei_mode); + ldlm_lock_decref(&lockh, mode); LDLM_LOCK_PUT(matched); return ELDLM_OK; + } else { + ldlm_lock_decref(&lockh, mode); + LDLM_LOCK_PUT(matched); } - - ldlm_lock_decref(lockh, mode); - LDLM_LOCK_PUT(matched); } - no_match: +no_match: + if (*flags & LDLM_FL_TEST_LOCK) + return -ENOLCK; if (intent) { - LIST_HEAD(cancels); - req = ptlrpc_request_alloc(class_exp2cliimp(exp), &RQF_LDLM_ENQUEUE_LVB); if (!req) return -ENOMEM; - rc = ldlm_prep_enqueue_req(exp, req, &cancels, 0); + rc = ldlm_prep_enqueue_req(exp, req, NULL, 0); if (rc) { ptlrpc_request_free(req); return rc; @@ -2331,21 +2340,31 @@ int osc_enqueue_base(struct obd_export *exp, struct ldlm_res_id *res_id, *flags &= ~LDLM_FL_BLOCK_GRANTED; rc = ldlm_cli_enqueue(exp, &req, einfo, res_id, policy, flags, lvb, - sizeof(*lvb), LVB_T_OST, lockh, async); - if (rqset) { + sizeof(*lvb), LVB_T_OST, &lockh, async); + if (async) { if (!rc) { struct osc_enqueue_args *aa; - CLASSERT (sizeof(*aa) <= sizeof(req->rq_async_args)); + CLASSERT(sizeof(*aa) <= sizeof(req->rq_async_args)); aa = ptlrpc_req_async_args(req); - aa->oa_ei = einfo; aa->oa_exp = exp; - aa->oa_flags = flags; + aa->oa_mode = einfo->ei_mode; + aa->oa_type = einfo->ei_type; + lustre_handle_copy(&aa->oa_lockh, &lockh); aa->oa_upcall = upcall; aa->oa_cookie = cookie; - aa->oa_lvb = lvb; - aa->oa_lockh = lockh; aa->oa_agl = !!agl; + if (!agl) { + aa->oa_flags = flags; + aa->oa_lvb = lvb; + } else { + /* AGL is essentially to enqueue an DLM lock + * in advance, so we don't care about the + * result of AGL enqueue. + */ + aa->oa_lvb = NULL; + aa->oa_flags = NULL; + } req->rq_interpret_reply = (ptlrpc_interpterer_t)osc_enqueue_interpret; @@ -2359,7 +2378,8 @@ int osc_enqueue_base(struct obd_export *exp, struct ldlm_res_id *res_id, return rc; } - rc = osc_enqueue_fini(req, lvb, upcall, cookie, flags, agl, rc); + rc = osc_enqueue_fini(req, upcall, cookie, &lockh, einfo->ei_mode, + flags, agl, rc); if (intent) ptlrpc_req_finished(req); @@ -2381,8 +2401,8 @@ int osc_match_base(struct obd_export *exp, struct ldlm_res_id *res_id, /* Filesystem lock extents are extended to page boundaries so that * dealing with the page cache is a little smoother */ - policy->l_extent.start -= policy->l_extent.start & ~CFS_PAGE_MASK; - policy->l_extent.end |= ~CFS_PAGE_MASK; + policy->l_extent.start -= policy->l_extent.start & ~PAGE_MASK; + policy->l_extent.end |= ~PAGE_MASK; /* Next, search for already existing extent locks that will cover us */ /* If we're trying to read, we also search for an existing PW lock. The @@ -2493,7 +2513,7 @@ static int osc_statfs_async(struct obd_export *exp, } req->rq_interpret_reply = (ptlrpc_interpterer_t)osc_statfs_interpret; - CLASSERT (sizeof(*aa) <= sizeof(req->rq_async_args)); + CLASSERT(sizeof(*aa) <= sizeof(req->rq_async_args)); aa = ptlrpc_req_async_args(req); aa->aa_oi = oinfo; @@ -2756,7 +2776,8 @@ static int osc_get_info(const struct lu_env *env, struct obd_export *exp, tmp = req_capsule_client_get(&req->rq_pill, &RMF_SETINFO_KEY); memcpy(tmp, key, keylen); - req->rq_no_delay = req->rq_no_resend = 1; + req->rq_no_delay = 1; + req->rq_no_resend = 1; ptlrpc_request_set_replen(req); rc = ptlrpc_queue_wait(req); if (rc) @@ -2787,7 +2808,7 @@ out: goto skip_locking; policy.l_extent.start = fm_key->fiemap.fm_start & - CFS_PAGE_MASK; + PAGE_MASK; if (OBD_OBJECT_EOF - fm_key->fiemap.fm_length <= fm_key->fiemap.fm_start + PAGE_SIZE - 1) @@ -2795,7 +2816,7 @@ out: else policy.l_extent.end = (fm_key->fiemap.fm_start + fm_key->fiemap.fm_length + - PAGE_SIZE - 1) & CFS_PAGE_MASK; + PAGE_SIZE - 1) & PAGE_MASK; ostid_build_res_name(&fm_key->oa.o_oi, &res_id); mode = ldlm_lock_match(exp->exp_obd->obd_namespace, @@ -2896,7 +2917,7 @@ static int osc_set_info_async(const struct lu_env *env, struct obd_export *exp, LASSERT(!cli->cl_cache); /* only once */ cli->cl_cache = val; - atomic_inc(&cli->cl_cache->ccc_users); + cl_cache_incref(cli->cl_cache); cli->cl_lru_left = &cli->cl_cache->ccc_lru_left; /* add this osc into entity list */ @@ -2913,7 +2934,7 @@ static int osc_set_info_async(const struct lu_env *env, struct obd_export *exp, int nr = atomic_read(&cli->cl_lru_in_list) >> 1; int target = *(int *)val; - nr = osc_lru_shrink(cli, min(nr, target)); + nr = osc_lru_shrink(env, cli, min(nr, target), true); *(int *)val -= nr; return 0; } @@ -2992,12 +3013,12 @@ static int osc_reconnect(const struct lu_env *env, if (data && (data->ocd_connect_flags & OBD_CONNECT_GRANT)) { long lost_grant; - client_obd_list_lock(&cli->cl_loi_list_lock); + spin_lock(&cli->cl_loi_list_lock); data->ocd_grant = (cli->cl_avail_grant + cli->cl_dirty) ?: 2 * cli_brw_size(obd); lost_grant = cli->cl_lost_grant; cli->cl_lost_grant = 0; - client_obd_list_unlock(&cli->cl_loi_list_lock); + spin_unlock(&cli->cl_loi_list_lock); CDEBUG(D_RPCTRACE, "ocd_connect_flags: %#llx ocd_version: %d ocd_grant: %d, lost: %ld.\n", data->ocd_connect_flags, @@ -3047,10 +3068,10 @@ static int osc_import_event(struct obd_device *obd, switch (event) { case IMP_EVENT_DISCON: { cli = &obd->u.cli; - client_obd_list_lock(&cli->cl_loi_list_lock); + spin_lock(&cli->cl_loi_list_lock); cli->cl_avail_grant = 0; cli->cl_lost_grant = 0; - client_obd_list_unlock(&cli->cl_loi_list_lock); + spin_unlock(&cli->cl_loi_list_lock); break; } case IMP_EVENT_INACTIVE: { @@ -3073,8 +3094,9 @@ static int osc_import_event(struct obd_device *obd, ldlm_namespace_cleanup(ns, LDLM_FL_LOCAL_ONLY); cl_env_put(env, &refcheck); - } else + } else { rc = PTR_ERR(env); + } break; } case IMP_EVENT_ACTIVE: { @@ -3116,20 +3138,14 @@ static int osc_import_event(struct obd_device *obd, * \retval zero the lock can't be canceled * \retval other ok to cancel */ -static int osc_cancel_for_recovery(struct ldlm_lock *lock) +static int osc_cancel_weight(struct ldlm_lock *lock) { - check_res_locked(lock->l_resource); - /* - * Cancel all unused extent lock in granted mode LCK_PR or LCK_CR. - * - * XXX as a future improvement, we can also cancel unused write lock - * if it doesn't have dirty data and active mmaps. + * Cancel all unused and granted extent lock. */ if (lock->l_resource->lr_type == LDLM_EXTENT && - (lock->l_granted_mode == LCK_PR || - lock->l_granted_mode == LCK_CR) && - (osc_dlm_lock_pageref(lock) == 0)) + lock->l_granted_mode == lock->l_req_mode && + osc_ldlm_weigh_ast(lock) == 0) return 1; return 0; @@ -3170,6 +3186,14 @@ int osc_setup(struct obd_device *obd, struct lustre_cfg *lcfg) } cli->cl_writeback_work = handler; + handler = ptlrpcd_alloc_work(cli->cl_import, lru_queue_work, cli); + if (IS_ERR(handler)) { + rc = PTR_ERR(handler); + goto out_ptlrpcd_work; + } + + cli->cl_lru_work = handler; + rc = osc_quota_setup(obd); if (rc) goto out_ptlrpcd_work; @@ -3198,11 +3222,18 @@ int osc_setup(struct obd_device *obd, struct lustre_cfg *lcfg) } INIT_LIST_HEAD(&cli->cl_grant_shrink_list); - ns_register_cancel(obd->obd_namespace, osc_cancel_for_recovery); + ns_register_cancel(obd->obd_namespace, osc_cancel_weight); return rc; out_ptlrpcd_work: - ptlrpcd_destroy_work(handler); + if (cli->cl_writeback_work) { + ptlrpcd_destroy_work(cli->cl_writeback_work); + cli->cl_writeback_work = NULL; + } + if (cli->cl_lru_work) { + ptlrpcd_destroy_work(cli->cl_lru_work); + cli->cl_lru_work = NULL; + } out_client_setup: client_obd_cleanup(obd); out_ptlrpcd: @@ -3241,6 +3272,10 @@ static int osc_precleanup(struct obd_device *obd, enum obd_cleanup_stage stage) ptlrpcd_destroy_work(cli->cl_writeback_work); cli->cl_writeback_work = NULL; } + if (cli->cl_lru_work) { + ptlrpcd_destroy_work(cli->cl_lru_work); + cli->cl_lru_work = NULL; + } obd_cleanup_client_import(obd); ptlrpc_lprocfs_unregister_obd(obd); lprocfs_obd_cleanup(obd); @@ -3262,7 +3297,7 @@ static int osc_cleanup(struct obd_device *obd) list_del_init(&cli->cl_lru_osc); spin_unlock(&cli->cl_cache->ccc_lru_lock); cli->cl_lru_left = NULL; - atomic_dec(&cli->cl_cache->ccc_users); + cl_cache_decref(cli->cl_cache); cli->cl_cache = NULL; } @@ -3330,7 +3365,6 @@ static struct obd_ops osc_obd_ops = { }; extern struct lu_kmem_descr osc_caches[]; -extern spinlock_t osc_ast_guard; extern struct lock_class_key osc_ast_guard_class; static int __init osc_init(void) @@ -3357,9 +3391,6 @@ static int __init osc_init(void) if (rc) goto out_kmem; - spin_lock_init(&osc_ast_guard); - lockdep_set_class(&osc_ast_guard, &osc_ast_guard_class); - /* This is obviously too much memory, only prevent overflow here */ if (osc_reqpool_mem_max >= 1 << 12 || osc_reqpool_mem_max == 0) { rc = -EINVAL; diff --git a/drivers/staging/lustre/lustre/ptlrpc/client.c b/drivers/staging/lustre/lustre/ptlrpc/client.c index cf3ac8eee9ee..d4463d7c81d2 100644 --- a/drivers/staging/lustre/lustre/ptlrpc/client.c +++ b/drivers/staging/lustre/lustre/ptlrpc/client.c @@ -15,11 +15,7 @@ * * You should have received a copy of the GNU General Public License * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. + * http://www.gnu.org/licenses/gpl-2.0.html * * GPL HEADER END */ @@ -587,35 +583,36 @@ static void __ptlrpc_free_req_to_pool(struct ptlrpc_request *request) spin_unlock(&pool->prp_lock); } -static int __ptlrpc_request_bufs_pack(struct ptlrpc_request *request, - __u32 version, int opcode, - int count, __u32 *lengths, char **bufs, - struct ptlrpc_cli_ctx *ctx) +int ptlrpc_request_bufs_pack(struct ptlrpc_request *request, + __u32 version, int opcode, char **bufs, + struct ptlrpc_cli_ctx *ctx) { - struct obd_import *imp = request->rq_import; + int count; + struct obd_import *imp; + __u32 *lengths; int rc; - if (unlikely(ctx)) + count = req_capsule_filled_sizes(&request->rq_pill, RCL_CLIENT); + imp = request->rq_import; + lengths = request->rq_pill.rc_area[RCL_CLIENT]; + + if (unlikely(ctx)) { request->rq_cli_ctx = sptlrpc_cli_ctx_get(ctx); - else { + } else { rc = sptlrpc_req_get_ctx(request); if (rc) goto out_free; } - sptlrpc_req_set_flavor(request, opcode); rc = lustre_pack_request(request, imp->imp_msg_magic, count, lengths, bufs); - if (rc) { - LASSERT(!request->rq_pool); + if (rc) goto out_ctx; - } lustre_msg_add_version(request->rq_reqmsg, version); request->rq_send_state = LUSTRE_IMP_FULL; request->rq_type = PTL_RPC_MSG_REQUEST; - request->rq_export = NULL; request->rq_req_cbid.cbid_fn = request_out_callback; request->rq_req_cbid.cbid_arg = request; @@ -624,6 +621,8 @@ static int __ptlrpc_request_bufs_pack(struct ptlrpc_request *request, request->rq_reply_cbid.cbid_arg = request; request->rq_reply_deadline = 0; + request->rq_bulk_deadline = 0; + request->rq_req_deadline = 0; request->rq_phase = RQ_PHASE_NEW; request->rq_next_phase = RQ_PHASE_UNDEFINED; @@ -632,40 +631,49 @@ static int __ptlrpc_request_bufs_pack(struct ptlrpc_request *request, ptlrpc_at_set_req_timeout(request); - spin_lock_init(&request->rq_lock); - INIT_LIST_HEAD(&request->rq_list); - INIT_LIST_HEAD(&request->rq_timed_list); - INIT_LIST_HEAD(&request->rq_replay_list); - INIT_LIST_HEAD(&request->rq_ctx_chain); - INIT_LIST_HEAD(&request->rq_set_chain); - INIT_LIST_HEAD(&request->rq_history_list); - INIT_LIST_HEAD(&request->rq_exp_list); - init_waitqueue_head(&request->rq_reply_waitq); - init_waitqueue_head(&request->rq_set_waitq); request->rq_xid = ptlrpc_next_xid(); - atomic_set(&request->rq_refcount, 1); - lustre_msg_set_opc(request->rq_reqmsg, opcode); + /* Let's setup deadline for req/reply/bulk unlink for opcode. */ + if (cfs_fail_val == opcode) { + time_t *fail_t = NULL, *fail2_t = NULL; + + if (CFS_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_BULK_UNLINK)) { + fail_t = &request->rq_bulk_deadline; + } else if (CFS_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_REPL_UNLINK)) { + fail_t = &request->rq_reply_deadline; + } else if (CFS_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_REQ_UNLINK)) { + fail_t = &request->rq_req_deadline; + } else if (CFS_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_BOTH_UNLINK)) { + fail_t = &request->rq_reply_deadline; + fail2_t = &request->rq_bulk_deadline; + } + + if (fail_t) { + *fail_t = ktime_get_real_seconds() + LONG_UNLINK; + + if (fail2_t) + *fail2_t = ktime_get_real_seconds() + + LONG_UNLINK; + + /* The RPC is infected, let the test change the + * fail_loc + */ + set_current_state(TASK_UNINTERRUPTIBLE); + schedule_timeout(cfs_time_seconds(2)); + set_current_state(TASK_RUNNING); + } + } + return 0; + out_ctx: + LASSERT(!request->rq_pool); sptlrpc_cli_ctx_put(request->rq_cli_ctx, 1); out_free: class_import_put(imp); return rc; } - -int ptlrpc_request_bufs_pack(struct ptlrpc_request *request, - __u32 version, int opcode, char **bufs, - struct ptlrpc_cli_ctx *ctx) -{ - int count; - - count = req_capsule_filled_sizes(&request->rq_pill, RCL_CLIENT); - return __ptlrpc_request_bufs_pack(request, version, opcode, count, - request->rq_pill.rc_area[RCL_CLIENT], - bufs, ctx); -} EXPORT_SYMBOL(ptlrpc_request_bufs_pack); /** @@ -722,7 +730,9 @@ struct ptlrpc_request *__ptlrpc_request_alloc(struct obd_import *imp, request = ptlrpc_prep_req_from_pool(pool); if (request) { - LASSERTF((unsigned long)imp > 0x1000, "%p\n", imp); + ptlrpc_cli_req_init(request); + + LASSERTF((unsigned long)imp > 0x1000, "%p", imp); LASSERT(imp != LP_POISON); LASSERTF((unsigned long)imp->imp_client > 0x1000, "%p\n", imp->imp_client); @@ -1082,7 +1092,6 @@ static int ptlrpc_console_allow(struct ptlrpc_request *req) */ if ((lustre_handle_is_used(&req->rq_import->imp_remote_handle)) && (opc == OST_CONNECT || opc == MDS_CONNECT || opc == MGS_CONNECT)) { - /* Suppress timed out reconnect requests */ if (req->rq_timedout) return 0; @@ -1164,9 +1173,9 @@ static int after_reply(struct ptlrpc_request *req) LASSERT(obd); /* repbuf must be unlinked */ - LASSERT(!req->rq_receiving_reply && !req->rq_reply_unlink); + LASSERT(!req->rq_receiving_reply && req->rq_reply_unlinked); - if (req->rq_reply_truncate) { + if (req->rq_reply_truncated) { if (ptlrpc_no_resend(req)) { DEBUG_REQ(D_ERROR, req, "reply buffer overflow, expected: %d, actual size: %d", req->rq_nob_received, req->rq_repbuf_len); @@ -1240,8 +1249,9 @@ static int after_reply(struct ptlrpc_request *req) } ktime_get_real_ts64(&work_start); - timediff = (work_start.tv_sec - req->rq_arrival_time.tv_sec) * USEC_PER_SEC + - (work_start.tv_nsec - req->rq_arrival_time.tv_nsec) / NSEC_PER_USEC; + timediff = (work_start.tv_sec - req->rq_sent_tv.tv_sec) * USEC_PER_SEC + + (work_start.tv_nsec - req->rq_sent_tv.tv_nsec) / + NSEC_PER_USEC; if (obd->obd_svc_stats) { lprocfs_counter_add(obd->obd_svc_stats, PTLRPC_REQWAIT_CNTR, timediff); @@ -1504,16 +1514,28 @@ int ptlrpc_check_set(const struct lu_env *env, struct ptlrpc_request_set *set) if (!(req->rq_phase == RQ_PHASE_RPC || req->rq_phase == RQ_PHASE_BULK || req->rq_phase == RQ_PHASE_INTERPRET || - req->rq_phase == RQ_PHASE_UNREGISTERING || + req->rq_phase == RQ_PHASE_UNREG_RPC || + req->rq_phase == RQ_PHASE_UNREG_BULK || req->rq_phase == RQ_PHASE_COMPLETE)) { DEBUG_REQ(D_ERROR, req, "bad phase %x", req->rq_phase); LBUG(); } - if (req->rq_phase == RQ_PHASE_UNREGISTERING) { + if (req->rq_phase == RQ_PHASE_UNREG_RPC || + req->rq_phase == RQ_PHASE_UNREG_BULK) { LASSERT(req->rq_next_phase != req->rq_phase); LASSERT(req->rq_next_phase != RQ_PHASE_UNDEFINED); + if (req->rq_req_deadline && + !OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_REQ_UNLINK)) + req->rq_req_deadline = 0; + if (req->rq_reply_deadline && + !OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_REPL_UNLINK)) + req->rq_reply_deadline = 0; + if (req->rq_bulk_deadline && + !OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_BULK_UNLINK)) + req->rq_bulk_deadline = 0; + /* * Skip processing until reply is unlinked. We * can't return to pool before that and we can't @@ -1521,7 +1543,10 @@ int ptlrpc_check_set(const struct lu_env *env, struct ptlrpc_request_set *set) * sure that all rdma transfers finished and will * not corrupt any data. */ - if (ptlrpc_client_recv_or_unlink(req) || + if (req->rq_phase == RQ_PHASE_UNREG_RPC && + ptlrpc_client_recv_or_unlink(req)) + continue; + if (req->rq_phase == RQ_PHASE_UNREG_BULK && ptlrpc_client_bulk_active(req)) continue; @@ -1999,7 +2024,7 @@ void ptlrpc_interrupted_set(void *data) list_entry(tmp, struct ptlrpc_request, rq_set_chain); if (req->rq_phase != RQ_PHASE_RPC && - req->rq_phase != RQ_PHASE_UNREGISTERING) + req->rq_phase != RQ_PHASE_UNREG_RPC) continue; ptlrpc_mark_interrupted(req); @@ -2087,7 +2112,7 @@ int ptlrpc_set_wait(struct ptlrpc_request_set *set) CDEBUG(D_RPCTRACE, "set %p going to sleep for %d seconds\n", set, timeout); - if (timeout == 0 && !cfs_signal_pending()) + if (timeout == 0 && !signal_pending(current)) /* * No requests are in-flight (ether timed out * or delayed), so we can allow interrupts. @@ -2114,7 +2139,7 @@ int ptlrpc_set_wait(struct ptlrpc_request_set *set) * it being ignored forever */ if (rc == -ETIMEDOUT && !lwi.lwi_allow_intr && - cfs_signal_pending()) { + signal_pending(current)) { sigset_t blocked_sigs = cfs_block_sigsinv(LUSTRE_FATAL_SIGS); @@ -2124,7 +2149,7 @@ int ptlrpc_set_wait(struct ptlrpc_request_set *set) * important signals since ptlrpc set is not easily * reentrant from userspace again */ - if (cfs_signal_pending()) + if (signal_pending(current)) ptlrpc_interrupted_set(set); cfs_restore_sigs(blocked_sigs); } @@ -2196,11 +2221,11 @@ static void __ptlrpc_free_req(struct ptlrpc_request *request, int locked) { if (!request) return; + LASSERT(!request->rq_srv_req); + LASSERT(!request->rq_export); LASSERTF(!request->rq_receiving_reply, "req %p\n", request); - LASSERTF(!request->rq_rqbd, "req %p\n", request);/* client-side */ LASSERTF(list_empty(&request->rq_list), "req %p\n", request); LASSERTF(list_empty(&request->rq_set_chain), "req %p\n", request); - LASSERTF(list_empty(&request->rq_exp_list), "req %p\n", request); LASSERTF(!request->rq_replay, "req %p\n", request); req_capsule_fini(&request->rq_pill); @@ -2226,10 +2251,7 @@ static void __ptlrpc_free_req(struct ptlrpc_request *request, int locked) if (request->rq_repbuf) sptlrpc_cli_free_repbuf(request); - if (request->rq_export) { - class_export_put(request->rq_export); - request->rq_export = NULL; - } + if (request->rq_import) { class_import_put(request->rq_import); request->rq_import = NULL; @@ -2314,8 +2336,9 @@ int ptlrpc_unregister_reply(struct ptlrpc_request *request, int async) /* Let's setup deadline for reply unlink. */ if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_REPL_UNLINK) && - async && request->rq_reply_deadline == 0) - request->rq_reply_deadline = ktime_get_real_seconds()+LONG_UNLINK; + async && request->rq_reply_deadline == 0 && cfs_fail_val == 0) + request->rq_reply_deadline = + ktime_get_real_seconds() + LONG_UNLINK; /* Nothing left to do. */ if (!ptlrpc_client_recv_or_unlink(request)) @@ -2328,7 +2351,7 @@ int ptlrpc_unregister_reply(struct ptlrpc_request *request, int async) return 1; /* Move to "Unregistering" phase as reply was not unlinked yet. */ - ptlrpc_rqphase_move(request, RQ_PHASE_UNREGISTERING); + ptlrpc_rqphase_move(request, RQ_PHASE_UNREG_RPC); /* Do not wait for unlink to finish. */ if (async) @@ -2360,9 +2383,10 @@ int ptlrpc_unregister_reply(struct ptlrpc_request *request, int async) LASSERT(rc == -ETIMEDOUT); DEBUG_REQ(D_WARNING, request, - "Unexpectedly long timeout rvcng=%d unlnk=%d/%d", + "Unexpectedly long timeout receiving_reply=%d req_ulinked=%d reply_unlinked=%d", request->rq_receiving_reply, - request->rq_req_unlink, request->rq_reply_unlink); + request->rq_req_unlinked, + request->rq_reply_unlinked); } return 0; } @@ -2619,11 +2643,6 @@ int ptlrpc_queue_wait(struct ptlrpc_request *req) } EXPORT_SYMBOL(ptlrpc_queue_wait); -struct ptlrpc_replay_async_args { - int praa_old_state; - int praa_old_status; -}; - /** * Callback used for replayed requests reply processing. * In case of successful reply calls registered request replay callback. @@ -2962,7 +2981,6 @@ static void ptlrpcd_add_work_req(struct ptlrpc_request *req) req->rq_timeout = obd_timeout; req->rq_sent = ktime_get_real_seconds(); req->rq_deadline = req->rq_sent + req->rq_timeout; - req->rq_reply_deadline = req->rq_deadline; req->rq_phase = RQ_PHASE_INTERPRET; req->rq_next_phase = RQ_PHASE_COMPLETE; req->rq_xid = ptlrpc_next_xid(); @@ -3018,27 +3036,17 @@ void *ptlrpcd_alloc_work(struct obd_import *imp, return ERR_PTR(-ENOMEM); } + ptlrpc_cli_req_init(req); + req->rq_send_state = LUSTRE_IMP_FULL; req->rq_type = PTL_RPC_MSG_REQUEST; req->rq_import = class_import_get(imp); - req->rq_export = NULL; req->rq_interpret_reply = work_interpreter; /* don't want reply */ - req->rq_receiving_reply = 0; - req->rq_req_unlink = req->rq_reply_unlink = 0; - req->rq_no_delay = req->rq_no_resend = 1; + req->rq_no_delay = 1; + req->rq_no_resend = 1; req->rq_pill.rc_fmt = (void *)&worker_format; - spin_lock_init(&req->rq_lock); - INIT_LIST_HEAD(&req->rq_list); - INIT_LIST_HEAD(&req->rq_replay_list); - INIT_LIST_HEAD(&req->rq_set_chain); - INIT_LIST_HEAD(&req->rq_history_list); - INIT_LIST_HEAD(&req->rq_exp_list); - init_waitqueue_head(&req->rq_reply_waitq); - init_waitqueue_head(&req->rq_set_waitq); - atomic_set(&req->rq_refcount, 1); - CLASSERT(sizeof(*args) <= sizeof(req->rq_async_args)); args = ptlrpc_req_async_args(req); args->cb = cb; diff --git a/drivers/staging/lustre/lustre/ptlrpc/connection.c b/drivers/staging/lustre/lustre/ptlrpc/connection.c index a14daff3fca0..177a379da9fa 100644 --- a/drivers/staging/lustre/lustre/ptlrpc/connection.c +++ b/drivers/staging/lustre/lustre/ptlrpc/connection.c @@ -15,11 +15,7 @@ * * You should have received a copy of the GNU General Public License * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. + * http://www.gnu.org/licenses/gpl-2.0.html * * GPL HEADER END */ diff --git a/drivers/staging/lustre/lustre/ptlrpc/events.c b/drivers/staging/lustre/lustre/ptlrpc/events.c index 47be21ac9f10..b1ce72511509 100644 --- a/drivers/staging/lustre/lustre/ptlrpc/events.c +++ b/drivers/staging/lustre/lustre/ptlrpc/events.c @@ -15,11 +15,7 @@ * * You should have received a copy of the GNU General Public License * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. + * http://www.gnu.org/licenses/gpl-2.0.html * * GPL HEADER END */ @@ -55,28 +51,33 @@ void request_out_callback(lnet_event_t *ev) { struct ptlrpc_cb_id *cbid = ev->md.user_ptr; struct ptlrpc_request *req = cbid->cbid_arg; + bool wakeup = false; - LASSERT(ev->type == LNET_EVENT_SEND || - ev->type == LNET_EVENT_UNLINK); + LASSERT(ev->type == LNET_EVENT_SEND || ev->type == LNET_EVENT_UNLINK); LASSERT(ev->unlinked); DEBUG_REQ(D_NET, req, "type %d, status %d", ev->type, ev->status); sptlrpc_request_out_callback(req); + spin_lock(&req->rq_lock); req->rq_real_sent = ktime_get_real_seconds(); - if (ev->unlinked) - req->rq_req_unlink = 0; + req->rq_req_unlinked = 1; + /* reply_in_callback happened before request_out_callback? */ + if (req->rq_reply_unlinked) + wakeup = true; if (ev->type == LNET_EVENT_UNLINK || ev->status != 0) { - /* Failed send: make it seem like the reply timed out, just * like failing sends in client.c does currently... */ - req->rq_net_err = 1; - ptlrpc_client_wake_req(req); + wakeup = true; } + + if (wakeup) + ptlrpc_client_wake_req(req); + spin_unlock(&req->rq_lock); ptlrpc_req_finished(req); @@ -105,7 +106,7 @@ void reply_in_callback(lnet_event_t *ev) req->rq_receiving_reply = 0; req->rq_early = 0; if (ev->unlinked) - req->rq_reply_unlink = 0; + req->rq_reply_unlinked = 1; if (ev->status) goto out_wake; @@ -119,7 +120,7 @@ void reply_in_callback(lnet_event_t *ev) if (ev->mlength < ev->rlength) { CDEBUG(D_RPCTRACE, "truncate req %p rpc %d - %d+%d\n", req, req->rq_replen, ev->rlength, ev->offset); - req->rq_reply_truncate = 1; + req->rq_reply_truncated = 1; req->rq_replied = 1; req->rq_status = -EOVERFLOW; req->rq_nob_received = ev->rlength + ev->offset; @@ -136,7 +137,8 @@ void reply_in_callback(lnet_event_t *ev) req->rq_early_count++; /* number received, client side */ - if (req->rq_replied) /* already got the real reply */ + /* already got the real reply or buffers are already unlinked */ + if (req->rq_replied || req->rq_reply_unlinked == 1) goto out_wake; req->rq_early = 1; @@ -329,6 +331,7 @@ void request_in_callback(lnet_event_t *ev) } } + ptlrpc_srv_req_init(req); /* NB we ABSOLUTELY RELY on req being zeroed, so pointers are NULL, * flags are reset and scalars are zero. We only set the message * size to non-zero if this was a successful receive. @@ -342,10 +345,6 @@ void request_in_callback(lnet_event_t *ev) req->rq_self = ev->target.nid; req->rq_rqbd = rqbd; req->rq_phase = RQ_PHASE_NEW; - spin_lock_init(&req->rq_lock); - INIT_LIST_HEAD(&req->rq_timed_list); - INIT_LIST_HEAD(&req->rq_exp_list); - atomic_set(&req->rq_refcount, 1); if (ev->type == LNET_EVENT_PUT) CDEBUG(D_INFO, "incoming req@%p x%llu msgsize %u\n", req, req->rq_xid, ev->mlength); diff --git a/drivers/staging/lustre/lustre/ptlrpc/import.c b/drivers/staging/lustre/lustre/ptlrpc/import.c index cd94fed0ffdf..3292e6ea0102 100644 --- a/drivers/staging/lustre/lustre/ptlrpc/import.c +++ b/drivers/staging/lustre/lustre/ptlrpc/import.c @@ -15,11 +15,7 @@ * * You should have received a copy of the GNU General Public License * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. + * http://www.gnu.org/licenses/gpl-2.0.html * * GPL HEADER END */ @@ -360,9 +356,8 @@ void ptlrpc_invalidate_import(struct obd_import *imp) "still on delayed list"); } - CERROR("%s: RPCs in \"%s\" phase found (%d). Network is sluggish? Waiting them to error out.\n", + CERROR("%s: Unregistering RPCs found (%d). Network is sluggish? Waiting them to error out.\n", cli_tgt, - ptlrpc_phase2str(RQ_PHASE_UNREGISTERING), atomic_read(&imp-> imp_unregistering)); } @@ -698,7 +693,8 @@ int ptlrpc_connect_import(struct obd_import *imp) lustre_msg_add_op_flags(request->rq_reqmsg, MSG_CONNECT_NEXT_VER); - request->rq_no_resend = request->rq_no_delay = 1; + request->rq_no_resend = 1; + request->rq_no_delay = 1; request->rq_send_state = LUSTRE_IMP_CONNECTING; /* Allow a slightly larger reply for future growth compatibility */ req_capsule_set_size(&request->rq_pill, &RMF_CONNECT_DATA, RCL_SERVER, @@ -1001,6 +997,7 @@ finish: return 0; } } else { + static bool warned; spin_lock(&imp->imp_lock); list_del(&imp->imp_conn_current->oic_item); @@ -1021,7 +1018,7 @@ finish: goto out; } - if ((ocd->ocd_connect_flags & OBD_CONNECT_VERSION) && + if (!warned && (ocd->ocd_connect_flags & OBD_CONNECT_VERSION) && (ocd->ocd_version > LUSTRE_VERSION_CODE + LUSTRE_VERSION_OFFSET_WARN || ocd->ocd_version < LUSTRE_VERSION_CODE - @@ -1029,10 +1026,8 @@ finish: /* Sigh, some compilers do not like #ifdef in the middle * of macro arguments */ - const char *older = "older. Consider upgrading server or downgrading client" - ; - const char *newer = "newer than client version. Consider upgrading client" - ; + const char *older = "older than client. Consider upgrading server"; + const char *newer = "newer than client. Consider recompiling application"; LCONSOLE_WARN("Server %s version (%d.%d.%d.%d) is much %s (%s)\n", obd2cli_tgt(imp->imp_obd), @@ -1042,6 +1037,7 @@ finish: OBD_OCD_VERSION_FIX(ocd->ocd_version), ocd->ocd_version > LUSTRE_VERSION_CODE ? newer : older, LUSTRE_VERSION_STRING); + warned = true; } #if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(3, 2, 50, 0) @@ -1370,7 +1366,6 @@ int ptlrpc_import_recovery_state_machine(struct obd_import *imp) if (rc) goto out; } - } if (imp->imp_state == LUSTRE_IMP_REPLAY_WAIT) { @@ -1453,7 +1448,6 @@ int ptlrpc_disconnect_import(struct obd_import *imp, int noclose) back_to_sleep, LWI_ON_SIGNAL_NOOP, NULL); rc = l_wait_event(imp->imp_recovery_waitq, !ptlrpc_import_in_recovery(imp), &lwi); - } spin_lock(&imp->imp_lock); diff --git a/drivers/staging/lustre/lustre/ptlrpc/layout.c b/drivers/staging/lustre/lustre/ptlrpc/layout.c index 5b06901e5729..ab5d85174245 100644 --- a/drivers/staging/lustre/lustre/ptlrpc/layout.c +++ b/drivers/staging/lustre/lustre/ptlrpc/layout.c @@ -15,11 +15,7 @@ * * You should have received a copy of the GNU General Public License * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. + * http://www.gnu.org/licenses/gpl-2.0.html * * GPL HEADER END */ @@ -160,6 +156,16 @@ static const struct req_msg_field *fld_query_server[] = { &RMF_FLD_MDFLD }; +static const struct req_msg_field *fld_read_client[] = { + &RMF_PTLRPC_BODY, + &RMF_FLD_MDFLD +}; + +static const struct req_msg_field *fld_read_server[] = { + &RMF_PTLRPC_BODY, + &RMF_GENERIC_DATA +}; + static const struct req_msg_field *mds_getattr_name_client[] = { &RMF_PTLRPC_BODY, &RMF_MDT_BODY, @@ -188,7 +194,7 @@ static const struct req_msg_field *mds_reint_create_slave_client[] = { &RMF_DLM_REQ }; -static const struct req_msg_field *mds_reint_create_rmt_acl_client[] = { +static const struct req_msg_field *mds_reint_create_acl_client[] = { &RMF_PTLRPC_BODY, &RMF_REC_REINT, &RMF_CAPA1, @@ -566,7 +572,7 @@ static const struct req_msg_field *ost_get_info_generic_server[] = { static const struct req_msg_field *ost_get_info_generic_client[] = { &RMF_PTLRPC_BODY, - &RMF_SETINFO_KEY + &RMF_GETINFO_KEY }; static const struct req_msg_field *ost_get_last_id_server[] = { @@ -574,6 +580,12 @@ static const struct req_msg_field *ost_get_last_id_server[] = { &RMF_OBD_ID }; +static const struct req_msg_field *ost_get_last_fid_client[] = { + &RMF_PTLRPC_BODY, + &RMF_GETINFO_KEY, + &RMF_FID, +}; + static const struct req_msg_field *ost_get_last_fid_server[] = { &RMF_PTLRPC_BODY, &RMF_FID, @@ -643,6 +655,7 @@ static struct req_format *req_formats[] = { &RQF_MGS_CONFIG_READ, &RQF_SEQ_QUERY, &RQF_FLD_QUERY, + &RQF_FLD_READ, &RQF_MDS_CONNECT, &RQF_MDS_DISCONNECT, &RQF_MDS_GET_INFO, @@ -662,7 +675,7 @@ static struct req_format *req_formats[] = { &RQF_MDS_DONE_WRITING, &RQF_MDS_REINT, &RQF_MDS_REINT_CREATE, - &RQF_MDS_REINT_CREATE_RMT_ACL, + &RQF_MDS_REINT_CREATE_ACL, &RQF_MDS_REINT_CREATE_SLAVE, &RQF_MDS_REINT_CREATE_SYM, &RQF_MDS_REINT_OPEN, @@ -696,7 +709,7 @@ static struct req_format *req_formats[] = { &RQF_OST_BRW_WRITE, &RQF_OST_STATFS, &RQF_OST_SET_GRANT_INFO, - &RQF_OST_GET_INFO_GENERIC, + &RQF_OST_GET_INFO, &RQF_OST_GET_INFO_LAST_ID, &RQF_OST_GET_INFO_LAST_FID, &RQF_OST_SET_INFO_LAST_FID, @@ -1162,6 +1175,10 @@ struct req_format RQF_FLD_QUERY = DEFINE_REQ_FMT0("FLD_QUERY", fld_query_client, fld_query_server); EXPORT_SYMBOL(RQF_FLD_QUERY); +struct req_format RQF_FLD_READ = + DEFINE_REQ_FMT0("FLD_READ", fld_read_client, fld_read_server); +EXPORT_SYMBOL(RQF_FLD_READ); + struct req_format RQF_LOG_CANCEL = DEFINE_REQ_FMT0("OBD_LOG_CANCEL", log_cancel_client, empty); EXPORT_SYMBOL(RQF_LOG_CANCEL); @@ -1221,10 +1238,10 @@ struct req_format RQF_MDS_REINT_CREATE = mds_reint_create_client, mdt_body_capa); EXPORT_SYMBOL(RQF_MDS_REINT_CREATE); -struct req_format RQF_MDS_REINT_CREATE_RMT_ACL = - DEFINE_REQ_FMT0("MDS_REINT_CREATE_RMT_ACL", - mds_reint_create_rmt_acl_client, mdt_body_capa); -EXPORT_SYMBOL(RQF_MDS_REINT_CREATE_RMT_ACL); +struct req_format RQF_MDS_REINT_CREATE_ACL = + DEFINE_REQ_FMT0("MDS_REINT_CREATE_ACL", + mds_reint_create_acl_client, mdt_body_capa); +EXPORT_SYMBOL(RQF_MDS_REINT_CREATE_ACL); struct req_format RQF_MDS_REINT_CREATE_SLAVE = DEFINE_REQ_FMT0("MDS_REINT_CREATE_EA", @@ -1519,10 +1536,10 @@ struct req_format RQF_OST_SET_GRANT_INFO = ost_body_only); EXPORT_SYMBOL(RQF_OST_SET_GRANT_INFO); -struct req_format RQF_OST_GET_INFO_GENERIC = +struct req_format RQF_OST_GET_INFO = DEFINE_REQ_FMT0("OST_GET_INFO", ost_get_info_generic_client, ost_get_info_generic_server); -EXPORT_SYMBOL(RQF_OST_GET_INFO_GENERIC); +EXPORT_SYMBOL(RQF_OST_GET_INFO); struct req_format RQF_OST_GET_INFO_LAST_ID = DEFINE_REQ_FMT0("OST_GET_INFO_LAST_ID", ost_get_info_generic_client, @@ -1530,7 +1547,7 @@ struct req_format RQF_OST_GET_INFO_LAST_ID = EXPORT_SYMBOL(RQF_OST_GET_INFO_LAST_ID); struct req_format RQF_OST_GET_INFO_LAST_FID = - DEFINE_REQ_FMT0("OST_GET_INFO_LAST_FID", obd_set_info_client, + DEFINE_REQ_FMT0("OST_GET_INFO_LAST_FID", ost_get_last_fid_client, ost_get_last_fid_server); EXPORT_SYMBOL(RQF_OST_GET_INFO_LAST_FID); diff --git a/drivers/staging/lustre/lustre/ptlrpc/llog_client.c b/drivers/staging/lustre/lustre/ptlrpc/llog_client.c index a23ac5f9ae96..0f55c01feba8 100644 --- a/drivers/staging/lustre/lustre/ptlrpc/llog_client.c +++ b/drivers/staging/lustre/lustre/ptlrpc/llog_client.c @@ -15,11 +15,7 @@ * * You should have received a copy of the GNU General Public License * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. + * http://www.gnu.org/licenses/gpl-2.0.html * * GPL HEADER END */ diff --git a/drivers/staging/lustre/lustre/ptlrpc/llog_net.c b/drivers/staging/lustre/lustre/ptlrpc/llog_net.c index fbccb62213b5..bccdace7e51f 100644 --- a/drivers/staging/lustre/lustre/ptlrpc/llog_net.c +++ b/drivers/staging/lustre/lustre/ptlrpc/llog_net.c @@ -15,11 +15,7 @@ * * You should have received a copy of the GNU General Public License * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. + * http://www.gnu.org/licenses/gpl-2.0.html * * GPL HEADER END */ diff --git a/drivers/staging/lustre/lustre/ptlrpc/lproc_ptlrpc.c b/drivers/staging/lustre/lustre/ptlrpc/lproc_ptlrpc.c index c95a91ce26c9..bc93b75744e1 100644 --- a/drivers/staging/lustre/lustre/ptlrpc/lproc_ptlrpc.c +++ b/drivers/staging/lustre/lustre/ptlrpc/lproc_ptlrpc.c @@ -15,11 +15,7 @@ * * You should have received a copy of the GNU General Public License * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. + * http://www.gnu.org/licenses/gpl-2.0.html * * GPL HEADER END */ @@ -131,6 +127,7 @@ static struct ll_rpc_opcode { { SEC_CTX_INIT_CONT, "sec_ctx_init_cont" }, { SEC_CTX_FINI, "sec_ctx_fini" }, { FLD_QUERY, "fld_query" }, + { FLD_READ, "fld_read" }, }; static struct ll_eopcode { @@ -679,11 +676,11 @@ static ssize_t ptlrpc_lprocfs_nrs_seq_write(struct file *file, /** * The second token is either NULL, or an optional [reg|hp] string */ - if (strcmp(cmd, "reg") == 0) + if (strcmp(cmd, "reg") == 0) { queue = PTLRPC_NRS_QUEUE_REG; - else if (strcmp(cmd, "hp") == 0) + } else if (strcmp(cmd, "hp") == 0) { queue = PTLRPC_NRS_QUEUE_HP; - else { + } else { rc = -EINVAL; goto out; } @@ -693,8 +690,9 @@ default_queue: if (queue == PTLRPC_NRS_QUEUE_HP && !nrs_svc_has_hp(svc)) { rc = -ENODEV; goto out; - } else if (queue == PTLRPC_NRS_QUEUE_BOTH && !nrs_svc_has_hp(svc)) + } else if (queue == PTLRPC_NRS_QUEUE_BOTH && !nrs_svc_has_hp(svc)) { queue = PTLRPC_NRS_QUEUE_REG; + } /** * Serialize NRS core lprocfs operations with policy registration/ @@ -870,7 +868,8 @@ ptlrpc_lprocfs_svc_req_history_next(struct seq_file *s, if (i > srhi->srhi_idx) { /* reset iterator for a new CPT */ srhi->srhi_req = NULL; - seq = srhi->srhi_seq = 0; + seq = 0; + srhi->srhi_seq = 0; } else { /* the next sequence */ seq = srhi->srhi_seq + (1 << svc->srv_cpt_bits); } @@ -1159,7 +1158,6 @@ void ptlrpc_lprocfs_brw(struct ptlrpc_request *req, int bytes) lprocfs_counter_add(svc_stats, idx, bytes); } - EXPORT_SYMBOL(ptlrpc_lprocfs_brw); void ptlrpc_lprocfs_unregister_service(struct ptlrpc_service *svc) @@ -1320,6 +1318,5 @@ int lprocfs_wr_pinger_recov(struct file *file, const char __user *buffer, up_read(&obd->u.cli.cl_sem); return count; - } EXPORT_SYMBOL(lprocfs_wr_pinger_recov); diff --git a/drivers/staging/lustre/lustre/ptlrpc/niobuf.c b/drivers/staging/lustre/lustre/ptlrpc/niobuf.c index 10b8fe82a342..11ec82545347 100644 --- a/drivers/staging/lustre/lustre/ptlrpc/niobuf.c +++ b/drivers/staging/lustre/lustre/ptlrpc/niobuf.c @@ -15,11 +15,7 @@ * * You should have received a copy of the GNU General Public License * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. + * http://www.gnu.org/licenses/gpl-2.0.html * * GPL HEADER END */ @@ -251,7 +247,7 @@ int ptlrpc_unregister_bulk(struct ptlrpc_request *req, int async) /* Let's setup deadline for reply unlink. */ if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_BULK_UNLINK) && - async && req->rq_bulk_deadline == 0) + async && req->rq_bulk_deadline == 0 && cfs_fail_val == 0) req->rq_bulk_deadline = ktime_get_real_seconds() + LONG_UNLINK; if (ptlrpc_client_bulk_active(req) == 0) /* completed or */ @@ -270,7 +266,7 @@ int ptlrpc_unregister_bulk(struct ptlrpc_request *req, int async) return 1; /* never registered */ /* Move to "Unregistering" phase as bulk was not unlinked yet. */ - ptlrpc_rqphase_move(req, RQ_PHASE_UNREGISTERING); + ptlrpc_rqphase_move(req, RQ_PHASE_UNREG_BULK); /* Do not wait for unlink to finish. */ if (async) @@ -581,19 +577,18 @@ int ptl_send_rpc(struct ptlrpc_request *request, int noreply) } spin_lock(&request->rq_lock); - /* If the MD attach succeeds, there _will_ be a reply_in callback */ - request->rq_receiving_reply = !noreply; - request->rq_req_unlink = 1; /* We are responsible for unlinking the reply buffer */ - request->rq_reply_unlink = !noreply; + request->rq_reply_unlinked = noreply; + request->rq_receiving_reply = !noreply; /* Clear any flags that may be present from previous sends. */ + request->rq_req_unlinked = 0; request->rq_replied = 0; request->rq_err = 0; request->rq_timedout = 0; request->rq_net_err = 0; request->rq_resend = 0; request->rq_restart = 0; - request->rq_reply_truncate = 0; + request->rq_reply_truncated = 0; spin_unlock(&request->rq_lock); if (!noreply) { @@ -608,7 +603,7 @@ int ptl_send_rpc(struct ptlrpc_request *request, int noreply) reply_md.user_ptr = &request->rq_reply_cbid; reply_md.eq_handle = ptlrpc_eq_h; - /* We must see the unlink callback to unset rq_reply_unlink, + /* We must see the unlink callback to set rq_reply_unlinked, * so we can't auto-unlink */ rc = LNetMDAttach(reply_me_h, reply_md, LNET_RETAIN, @@ -637,7 +632,7 @@ int ptl_send_rpc(struct ptlrpc_request *request, int noreply) OBD_FAIL_TIMEOUT(OBD_FAIL_PTLRPC_DELAY_SEND, request->rq_timeout + 5); - ktime_get_real_ts64(&request->rq_arrival_time); + ktime_get_real_ts64(&request->rq_sent_tv); request->rq_sent = ktime_get_real_seconds(); /* We give the server rq_timeout secs to process the req, and * add the network latency for our local timeout. @@ -655,9 +650,10 @@ int ptl_send_rpc(struct ptlrpc_request *request, int noreply) connection, request->rq_request_portal, request->rq_xid, 0); - if (rc == 0) + if (likely(rc == 0)) goto out; + request->rq_req_unlinked = 1; ptlrpc_req_finished(request); if (noreply) goto out; diff --git a/drivers/staging/lustre/lustre/ptlrpc/nrs.c b/drivers/staging/lustre/lustre/ptlrpc/nrs.c index 710fb806f122..d88faf61e740 100644 --- a/drivers/staging/lustre/lustre/ptlrpc/nrs.c +++ b/drivers/staging/lustre/lustre/ptlrpc/nrs.c @@ -769,7 +769,7 @@ static int nrs_policy_register(struct ptlrpc_nrs *nrs, spin_unlock(&nrs->nrs_lock); if (rc != 0) - (void) nrs_policy_unregister(nrs, policy->pol_desc->pd_name); + (void)nrs_policy_unregister(nrs, policy->pol_desc->pd_name); return rc; } @@ -975,7 +975,11 @@ static void nrs_svcpt_cleanup_locked(struct ptlrpc_service_part *svcpt) LASSERT(mutex_is_locked(&nrs_core.nrs_mutex)); again: - nrs = nrs_svcpt2nrs(svcpt, hp); + /* scp_nrs_hp could be NULL due to short of memory. */ + nrs = hp ? svcpt->scp_nrs_hp : &svcpt->scp_nrs_reg; + /* check the nrs_svcpt to see if nrs is initialized. */ + if (!nrs || !nrs->nrs_svcpt) + return; nrs->nrs_stopping = 1; list_for_each_entry_safe(policy, tmp, &nrs->nrs_policy_list, pol_list) { @@ -1038,7 +1042,6 @@ static int nrs_policy_unregister_locked(struct ptlrpc_nrs_pol_desc *desc) LASSERT(mutex_is_locked(&ptlrpc_all_services_mutex)); list_for_each_entry(svc, &ptlrpc_all_services, srv_list) { - if (!nrs_policy_compatible(svc, desc) || unlikely(svc->srv_is_stopping)) continue; diff --git a/drivers/staging/lustre/lustre/ptlrpc/pack_generic.c b/drivers/staging/lustre/lustre/ptlrpc/pack_generic.c index 492d63fad6f9..b514f18fae50 100644 --- a/drivers/staging/lustre/lustre/ptlrpc/pack_generic.c +++ b/drivers/staging/lustre/lustre/ptlrpc/pack_generic.c @@ -15,11 +15,7 @@ * * You should have received a copy of the GNU General Public License * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. + * http://www.gnu.org/licenses/gpl-2.0.html * * GPL HEADER END */ @@ -1160,7 +1156,6 @@ __u32 lustre_msg_get_timeout(struct lustre_msg *msg) if (!pb) { CERROR("invalid msg %p: no ptlrpc body!\n", msg); return 0; - } return pb->pb_timeout; } @@ -1179,7 +1174,6 @@ __u32 lustre_msg_get_service_time(struct lustre_msg *msg) if (!pb) { CERROR("invalid msg %p: no ptlrpc body!\n", msg); return 0; - } return pb->pb_service_time; } @@ -1572,7 +1566,6 @@ static void lustre_swab_obdo(struct obdo *o) CLASSERT(offsetof(typeof(*o), o_padding_4) != 0); CLASSERT(offsetof(typeof(*o), o_padding_5) != 0); CLASSERT(offsetof(typeof(*o), o_padding_6) != 0); - } void lustre_swab_obd_statfs(struct obd_statfs *os) @@ -1809,19 +1802,6 @@ void lustre_swab_obd_quotactl(struct obd_quotactl *q) } EXPORT_SYMBOL(lustre_swab_obd_quotactl); -void lustre_swab_mdt_remote_perm(struct mdt_remote_perm *p) -{ - __swab32s(&p->rp_uid); - __swab32s(&p->rp_gid); - __swab32s(&p->rp_fsuid); - __swab32s(&p->rp_fsuid_h); - __swab32s(&p->rp_fsgid); - __swab32s(&p->rp_fsgid_h); - __swab32s(&p->rp_access_perm); - __swab32s(&p->rp_padding); -}; -EXPORT_SYMBOL(lustre_swab_mdt_remote_perm); - void lustre_swab_fid2path(struct getinfo_fid2path *gf) { lustre_swab_lu_fid(&gf->gf_fid); diff --git a/drivers/staging/lustre/lustre/ptlrpc/pers.c b/drivers/staging/lustre/lustre/ptlrpc/pers.c index ec3af109a1d7..6c820e944171 100644 --- a/drivers/staging/lustre/lustre/ptlrpc/pers.c +++ b/drivers/staging/lustre/lustre/ptlrpc/pers.c @@ -15,11 +15,7 @@ * * You should have received a copy of the GNU General Public License * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. + * http://www.gnu.org/licenses/gpl-2.0.html * * GPL HEADER END */ diff --git a/drivers/staging/lustre/lustre/ptlrpc/pinger.c b/drivers/staging/lustre/lustre/ptlrpc/pinger.c index 8a869315c258..c0529d808d81 100644 --- a/drivers/staging/lustre/lustre/ptlrpc/pinger.c +++ b/drivers/staging/lustre/lustre/ptlrpc/pinger.c @@ -15,11 +15,7 @@ * * You should have received a copy of the GNU General Public License * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. + * http://www.gnu.org/licenses/gpl-2.0.html * * GPL HEADER END */ @@ -57,7 +53,8 @@ ptlrpc_prep_ping(struct obd_import *imp) LUSTRE_OBD_VERSION, OBD_PING); if (req) { ptlrpc_request_set_replen(req); - req->rq_no_resend = req->rq_no_delay = 1; + req->rq_no_resend = 1; + req->rq_no_delay = 1; } return req; } diff --git a/drivers/staging/lustre/lustre/ptlrpc/ptlrpc_internal.h b/drivers/staging/lustre/lustre/ptlrpc/ptlrpc_internal.h index 6ca26c98de1b..a9831fab80f3 100644 --- a/drivers/staging/lustre/lustre/ptlrpc/ptlrpc_internal.h +++ b/drivers/staging/lustre/lustre/ptlrpc/ptlrpc_internal.h @@ -15,11 +15,7 @@ * * You should have received a copy of the GNU General Public License * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. + * http://www.gnu.org/licenses/gpl-2.0.html * * GPL HEADER END */ @@ -292,4 +288,47 @@ static inline void ptlrpc_reqset_put(struct ptlrpc_request_set *set) if (atomic_dec_and_test(&set->set_refcount)) kfree(set); } + +/** initialise ptlrpc common fields */ +static inline void ptlrpc_req_comm_init(struct ptlrpc_request *req) +{ + spin_lock_init(&req->rq_lock); + atomic_set(&req->rq_refcount, 1); + INIT_LIST_HEAD(&req->rq_list); + INIT_LIST_HEAD(&req->rq_replay_list); +} + +/** initialise client side ptlrpc request */ +static inline void ptlrpc_cli_req_init(struct ptlrpc_request *req) +{ + struct ptlrpc_cli_req *cr = &req->rq_cli; + + ptlrpc_req_comm_init(req); + + req->rq_receiving_reply = 0; + req->rq_req_unlinked = 1; + req->rq_reply_unlinked = 1; + + req->rq_receiving_reply = 0; + req->rq_req_unlinked = 1; + req->rq_reply_unlinked = 1; + + INIT_LIST_HEAD(&cr->cr_set_chain); + INIT_LIST_HEAD(&cr->cr_ctx_chain); + init_waitqueue_head(&cr->cr_reply_waitq); + init_waitqueue_head(&cr->cr_set_waitq); +} + +/** initialise server side ptlrpc request */ +static inline void ptlrpc_srv_req_init(struct ptlrpc_request *req) +{ + struct ptlrpc_srv_req *sr = &req->rq_srv; + + ptlrpc_req_comm_init(req); + req->rq_srv_req = 1; + INIT_LIST_HEAD(&sr->sr_exp_list); + INIT_LIST_HEAD(&sr->sr_timed_list); + INIT_LIST_HEAD(&sr->sr_hist_list); +} + #endif /* PTLRPC_INTERNAL_H */ diff --git a/drivers/staging/lustre/lustre/ptlrpc/ptlrpc_module.c b/drivers/staging/lustre/lustre/ptlrpc/ptlrpc_module.c index a8ec0e9d7b2e..a70d5843f30e 100644 --- a/drivers/staging/lustre/lustre/ptlrpc/ptlrpc_module.c +++ b/drivers/staging/lustre/lustre/ptlrpc/ptlrpc_module.c @@ -15,11 +15,7 @@ * * You should have received a copy of the GNU General Public License * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. + * http://www.gnu.org/licenses/gpl-2.0.html * * GPL HEADER END */ diff --git a/drivers/staging/lustre/lustre/ptlrpc/ptlrpcd.c b/drivers/staging/lustre/lustre/ptlrpc/ptlrpcd.c index db003f5da09e..0a374b6c2f71 100644 --- a/drivers/staging/lustre/lustre/ptlrpc/ptlrpcd.c +++ b/drivers/staging/lustre/lustre/ptlrpc/ptlrpcd.c @@ -15,11 +15,7 @@ * * You should have received a copy of the GNU General Public License * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. + * http://www.gnu.org/licenses/gpl-2.0.html * * GPL HEADER END */ @@ -161,9 +157,9 @@ static int ptlrpcd_users; void ptlrpcd_wake(struct ptlrpc_request *req) { - struct ptlrpc_request_set *rq_set = req->rq_set; + struct ptlrpc_request_set *set = req->rq_set; - wake_up(&rq_set->set_waitq); + wake_up(&set->set_waitq); } EXPORT_SYMBOL(ptlrpcd_wake); @@ -387,7 +383,8 @@ static int ptlrpcd(void *arg) { struct ptlrpcd_ctl *pc = arg; struct ptlrpc_request_set *set; - struct lu_env env = { .le_ses = NULL }; + struct lu_context ses = { 0 }; + struct lu_env env = { .le_ses = &ses }; int rc = 0; int exit = 0; @@ -416,6 +413,13 @@ static int ptlrpcd(void *arg) */ rc = lu_context_init(&env.le_ctx, LCT_CL_THREAD|LCT_REMEMBER|LCT_NOREF); + if (rc == 0) { + rc = lu_context_init(env.le_ses, + LCT_SESSION | LCT_REMEMBER | LCT_NOREF); + if (rc != 0) + lu_context_fini(&env.le_ctx); + } + if (rc != 0) goto failed; @@ -436,9 +440,10 @@ static int ptlrpcd(void *arg) ptlrpc_expired_set, set); lu_context_enter(&env.le_ctx); - l_wait_event(set->set_waitq, - ptlrpcd_check(&env, pc), &lwi); + lu_context_enter(env.le_ses); + l_wait_event(set->set_waitq, ptlrpcd_check(&env, pc), &lwi); lu_context_exit(&env.le_ctx); + lu_context_exit(env.le_ses); /* * Abort inflight rpcs for forced stop case. @@ -461,6 +466,7 @@ static int ptlrpcd(void *arg) if (!list_empty(&set->set_requests)) ptlrpc_set_wait(set); lu_context_fini(&env.le_ctx); + lu_context_fini(env.le_ses); complete(&pc->pc_finishing); @@ -899,8 +905,11 @@ int ptlrpcd_addref(void) int rc = 0; mutex_lock(&ptlrpcd_mutex); - if (++ptlrpcd_users == 1) + if (++ptlrpcd_users == 1) { rc = ptlrpcd_init(); + if (rc < 0) + ptlrpcd_users--; + } mutex_unlock(&ptlrpcd_mutex); return rc; } diff --git a/drivers/staging/lustre/lustre/ptlrpc/recover.c b/drivers/staging/lustre/lustre/ptlrpc/recover.c index 30d9a164e52d..718b3a8d61c6 100644 --- a/drivers/staging/lustre/lustre/ptlrpc/recover.c +++ b/drivers/staging/lustre/lustre/ptlrpc/recover.c @@ -15,11 +15,7 @@ * * You should have received a copy of the GNU General Public License * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. + * http://www.gnu.org/licenses/gpl-2.0.html * * GPL HEADER END */ diff --git a/drivers/staging/lustre/lustre/ptlrpc/sec.c b/drivers/staging/lustre/lustre/ptlrpc/sec.c index 187fd1d6898c..dbd819fa6b75 100644 --- a/drivers/staging/lustre/lustre/ptlrpc/sec.c +++ b/drivers/staging/lustre/lustre/ptlrpc/sec.c @@ -15,11 +15,7 @@ * * You should have received a copy of the GNU General Public License * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. + * http://www.gnu.org/licenses/gpl-2.0.html * * GPL HEADER END */ @@ -867,11 +863,9 @@ int sptlrpc_import_check_ctx(struct obd_import *imp) if (!req) return -ENOMEM; - spin_lock_init(&req->rq_lock); + ptlrpc_cli_req_init(req); atomic_set(&req->rq_refcount, 10000); - INIT_LIST_HEAD(&req->rq_ctx_chain); - init_waitqueue_head(&req->rq_reply_waitq); - init_waitqueue_head(&req->rq_set_waitq); + req->rq_import = imp; req->rq_flvr = sec->ps_flvr; req->rq_cli_ctx = ctx; @@ -1051,6 +1045,8 @@ int sptlrpc_cli_unwrap_early_reply(struct ptlrpc_request *req, if (!early_req) return -ENOMEM; + ptlrpc_cli_req_init(early_req); + early_size = req->rq_nob_received; early_bufsz = size_roundup_power2(early_size); early_buf = libcfs_kvzalloc(early_bufsz, GFP_NOFS); @@ -1099,12 +1095,11 @@ int sptlrpc_cli_unwrap_early_reply(struct ptlrpc_request *req, memcpy(early_buf, req->rq_repbuf, early_size); spin_unlock(&req->rq_lock); - spin_lock_init(&early_req->rq_lock); early_req->rq_cli_ctx = sptlrpc_cli_ctx_get(req->rq_cli_ctx); early_req->rq_flvr = req->rq_flvr; early_req->rq_repbuf = early_buf; early_req->rq_repbuf_len = early_bufsz; - early_req->rq_repdata = (struct lustre_msg *) early_buf; + early_req->rq_repdata = (struct lustre_msg *)early_buf; early_req->rq_repdata_len = early_size; early_req->rq_early = 1; early_req->rq_reqmsg = req->rq_reqmsg; @@ -1556,7 +1551,7 @@ void _sptlrpc_enlarge_msg_inplace(struct lustre_msg *msg, /* move from segment + 1 to end segment */ LASSERT(msg->lm_magic == LUSTRE_MSG_MAGIC_V2); oldmsg_size = lustre_msg_size_v2(msg->lm_bufcount, msg->lm_buflens); - movesize = oldmsg_size - ((unsigned long) src - (unsigned long) msg); + movesize = oldmsg_size - ((unsigned long)src - (unsigned long)msg); LASSERT(movesize >= 0); if (movesize) @@ -2196,6 +2191,9 @@ int sptlrpc_pack_user_desc(struct lustre_msg *msg, int offset) pud = lustre_msg_buf(msg, offset, 0); + if (!pud) + return -EINVAL; + pud->pud_uid = from_kuid(&init_user_ns, current_uid()); pud->pud_gid = from_kgid(&init_user_ns, current_gid()); pud->pud_fsuid = from_kuid(&init_user_ns, current_fsuid()); diff --git a/drivers/staging/lustre/lustre/ptlrpc/sec_bulk.c b/drivers/staging/lustre/lustre/ptlrpc/sec_bulk.c index d3872b8c9a6e..5f4d79718589 100644 --- a/drivers/staging/lustre/lustre/ptlrpc/sec_bulk.c +++ b/drivers/staging/lustre/lustre/ptlrpc/sec_bulk.c @@ -15,11 +15,7 @@ * * You should have received a copy of the GNU General Public License * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. + * http://www.gnu.org/licenses/gpl-2.0.html * * GPL HEADER END */ @@ -41,7 +37,6 @@ #define DEBUG_SUBSYSTEM S_SEC #include "../../include/linux/libcfs/libcfs.h" -#include <linux/crypto.h> #include "../include/obd.h" #include "../include/obd_cksum.h" @@ -274,7 +269,7 @@ static unsigned long enc_pools_shrink_scan(struct shrinker *s, static inline int npages_to_npools(unsigned long npages) { - return (int) ((npages + PAGES_PER_POOL - 1) / PAGES_PER_POOL); + return (int)((npages + PAGES_PER_POOL - 1) / PAGES_PER_POOL); } /* @@ -511,7 +506,6 @@ int sptlrpc_get_bulk_checksum(struct ptlrpc_bulk_desc *desc, __u8 alg, { struct cfs_crypto_hash_desc *hdesc; int hashsize; - char hashbuf[64]; unsigned int bufsize; int i, err; @@ -529,21 +523,23 @@ int sptlrpc_get_bulk_checksum(struct ptlrpc_bulk_desc *desc, __u8 alg, for (i = 0; i < desc->bd_iov_count; i++) { cfs_crypto_hash_update_page(hdesc, desc->bd_iov[i].kiov_page, - desc->bd_iov[i].kiov_offset & ~CFS_PAGE_MASK, + desc->bd_iov[i].kiov_offset & ~PAGE_MASK, desc->bd_iov[i].kiov_len); } + if (hashsize > buflen) { + unsigned char hashbuf[CFS_CRYPTO_HASH_DIGESTSIZE_MAX]; + bufsize = sizeof(hashbuf); - err = cfs_crypto_hash_final(hdesc, (unsigned char *)hashbuf, - &bufsize); + LASSERTF(bufsize >= hashsize, "bufsize = %u < hashsize %u\n", + bufsize, hashsize); + err = cfs_crypto_hash_final(hdesc, hashbuf, &bufsize); memcpy(buf, hashbuf, buflen); } else { bufsize = buflen; err = cfs_crypto_hash_final(hdesc, buf, &bufsize); } - if (err) - cfs_crypto_hash_final(hdesc, NULL, NULL); return err; } EXPORT_SYMBOL(sptlrpc_get_bulk_checksum); diff --git a/drivers/staging/lustre/lustre/ptlrpc/sec_config.c b/drivers/staging/lustre/lustre/ptlrpc/sec_config.c index a51b18bbfd34..c14035479c5f 100644 --- a/drivers/staging/lustre/lustre/ptlrpc/sec_config.c +++ b/drivers/staging/lustre/lustre/ptlrpc/sec_config.c @@ -15,11 +15,7 @@ * * You should have received a copy of the GNU General Public License * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. + * http://www.gnu.org/licenses/gpl-2.0.html * * GPL HEADER END */ @@ -648,7 +644,7 @@ static int logname2fsname(const char *logname, char *buf, int buflen) return -EINVAL; } - len = min((int) (ptr - logname), buflen - 1); + len = min((int)(ptr - logname), buflen - 1); memcpy(buf, logname, len); buf[len] = '\0'; @@ -819,7 +815,7 @@ void sptlrpc_conf_client_adapt(struct obd_device *obd) CDEBUG(D_SEC, "obd %s\n", obd->u.cli.cl_target_uuid.uuid); /* serialize with connect/disconnect import */ - down_read(&obd->u.cli.cl_sem); + down_read_nested(&obd->u.cli.cl_sem, OBD_CLI_SEM_MDCOSC); imp = obd->u.cli.cl_import; if (imp) { diff --git a/drivers/staging/lustre/lustre/ptlrpc/sec_gc.c b/drivers/staging/lustre/lustre/ptlrpc/sec_gc.c index 9082da06b28a..9b9801ece582 100644 --- a/drivers/staging/lustre/lustre/ptlrpc/sec_gc.c +++ b/drivers/staging/lustre/lustre/ptlrpc/sec_gc.c @@ -15,11 +15,7 @@ * * You should have received a copy of the GNU General Public License * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. + * http://www.gnu.org/licenses/gpl-2.0.html * * GPL HEADER END */ diff --git a/drivers/staging/lustre/lustre/ptlrpc/sec_lproc.c b/drivers/staging/lustre/lustre/ptlrpc/sec_lproc.c index e610a8ddd223..07273f577969 100644 --- a/drivers/staging/lustre/lustre/ptlrpc/sec_lproc.c +++ b/drivers/staging/lustre/lustre/ptlrpc/sec_lproc.c @@ -15,11 +15,7 @@ * * You should have received a copy of the GNU General Public License * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. + * http://www.gnu.org/licenses/gpl-2.0.html * * GPL HEADER END */ diff --git a/drivers/staging/lustre/lustre/ptlrpc/sec_null.c b/drivers/staging/lustre/lustre/ptlrpc/sec_null.c index 40e5349de38c..70a61e12bb7b 100644 --- a/drivers/staging/lustre/lustre/ptlrpc/sec_null.c +++ b/drivers/staging/lustre/lustre/ptlrpc/sec_null.c @@ -15,11 +15,7 @@ * * You should have received a copy of the GNU General Public License * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. + * http://www.gnu.org/licenses/gpl-2.0.html * * GPL HEADER END */ @@ -60,7 +56,7 @@ static struct ptlrpc_svc_ctx null_svc_ctx; static inline void null_encode_sec_part(struct lustre_msg *msg, enum lustre_sec_part sp) { - msg->lm_secflvr |= (((__u32) sp) & 0xFF) << 24; + msg->lm_secflvr |= (((__u32)sp) & 0xFF) << 24; } static inline @@ -265,7 +261,8 @@ int null_enlarge_reqbuf(struct ptlrpc_sec *sec, memcpy(newbuf, req->rq_reqbuf, req->rq_reqlen); kvfree(req->rq_reqbuf); - req->rq_reqbuf = req->rq_reqmsg = newbuf; + req->rq_reqbuf = newbuf; + req->rq_reqmsg = newbuf; req->rq_reqbuf_len = alloc_size; if (req->rq_import) @@ -329,7 +326,7 @@ int null_alloc_rs(struct ptlrpc_request *req, int msgsize) rs->rs_svc_ctx = req->rq_svc_ctx; atomic_inc(&req->rq_svc_ctx->sc_refcount); - rs->rs_repbuf = (struct lustre_msg *) (rs + 1); + rs->rs_repbuf = (struct lustre_msg *)(rs + 1); rs->rs_repbuf_len = rs_size - sizeof(*rs); rs->rs_msg = rs->rs_repbuf; diff --git a/drivers/staging/lustre/lustre/ptlrpc/sec_plain.c b/drivers/staging/lustre/lustre/ptlrpc/sec_plain.c index 6276bf59c3aa..5c4590b0c521 100644 --- a/drivers/staging/lustre/lustre/ptlrpc/sec_plain.c +++ b/drivers/staging/lustre/lustre/ptlrpc/sec_plain.c @@ -15,11 +15,7 @@ * * You should have received a copy of the GNU General Public License * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. + * http://www.gnu.org/licenses/gpl-2.0.html * * GPL HEADER END */ @@ -162,7 +158,7 @@ static void corrupt_bulk_data(struct ptlrpc_bulk_desc *desc) continue; ptr = kmap(desc->bd_iov[i].kiov_page); - off = desc->bd_iov[i].kiov_offset & ~CFS_PAGE_MASK; + off = desc->bd_iov[i].kiov_offset & ~PAGE_MASK; ptr[off] ^= 0x1; kunmap(desc->bd_iov[i].kiov_page); return; @@ -298,7 +294,7 @@ int plain_cli_wrap_bulk(struct ptlrpc_cli_ctx *ctx, LASSERT(req->rq_reqbuf->lm_bufcount == PLAIN_PACK_SEGMENTS); bsd = lustre_msg_buf(req->rq_reqbuf, PLAIN_PACK_BULK_OFF, 0); - token = (struct plain_bulk_token *) bsd->bsd_data; + token = (struct plain_bulk_token *)bsd->bsd_data; bsd->bsd_version = 0; bsd->bsd_flags = 0; @@ -343,7 +339,7 @@ int plain_cli_unwrap_bulk(struct ptlrpc_cli_ctx *ctx, LASSERT(req->rq_repdata->lm_bufcount == PLAIN_PACK_SEGMENTS); bsdv = lustre_msg_buf(req->rq_repdata, PLAIN_PACK_BULK_OFF, 0); - tokenv = (struct plain_bulk_token *) bsdv->bsd_data; + tokenv = (struct plain_bulk_token *)bsdv->bsd_data; if (req->rq_bulk_write) { if (bsdv->bsd_flags & BSD_FL_ERR) @@ -574,8 +570,12 @@ int plain_alloc_reqbuf(struct ptlrpc_sec *sec, lustre_init_msg_v2(req->rq_reqbuf, PLAIN_PACK_SEGMENTS, buflens, NULL); req->rq_reqmsg = lustre_msg_buf(req->rq_reqbuf, PLAIN_PACK_MSG_OFF, 0); - if (req->rq_pack_udesc) - sptlrpc_pack_user_desc(req->rq_reqbuf, PLAIN_PACK_USER_OFF); + if (req->rq_pack_udesc) { + int rc = sptlrpc_pack_user_desc(req->rq_reqbuf, + PLAIN_PACK_USER_OFF); + if (rc < 0) + return rc; + } return 0; } @@ -811,7 +811,7 @@ int plain_alloc_rs(struct ptlrpc_request *req, int msgsize) rs->rs_svc_ctx = req->rq_svc_ctx; atomic_inc(&req->rq_svc_ctx->sc_refcount); - rs->rs_repbuf = (struct lustre_msg *) (rs + 1); + rs->rs_repbuf = (struct lustre_msg *)(rs + 1); rs->rs_repbuf_len = rs_size - sizeof(*rs); lustre_init_msg_v2(rs->rs_repbuf, PLAIN_PACK_SEGMENTS, buflens, NULL); @@ -891,7 +891,7 @@ int plain_svc_unwrap_bulk(struct ptlrpc_request *req, LASSERT(req->rq_pack_bulk); bsdr = lustre_msg_buf(req->rq_reqbuf, PLAIN_PACK_BULK_OFF, 0); - tokenr = (struct plain_bulk_token *) bsdr->bsd_data; + tokenr = (struct plain_bulk_token *)bsdr->bsd_data; bsdv = lustre_msg_buf(rs->rs_repbuf, PLAIN_PACK_BULK_OFF, 0); bsdv->bsd_version = 0; @@ -926,7 +926,7 @@ int plain_svc_wrap_bulk(struct ptlrpc_request *req, bsdr = lustre_msg_buf(req->rq_reqbuf, PLAIN_PACK_BULK_OFF, 0); bsdv = lustre_msg_buf(rs->rs_repbuf, PLAIN_PACK_BULK_OFF, 0); - tokenv = (struct plain_bulk_token *) bsdv->bsd_data; + tokenv = (struct plain_bulk_token *)bsdv->bsd_data; bsdv->bsd_version = 0; bsdv->bsd_type = SPTLRPC_BULK_DEFAULT; diff --git a/drivers/staging/lustre/lustre/ptlrpc/service.c b/drivers/staging/lustre/lustre/ptlrpc/service.c index 1bbd1d39ccf8..4788c4940c2a 100644 --- a/drivers/staging/lustre/lustre/ptlrpc/service.c +++ b/drivers/staging/lustre/lustre/ptlrpc/service.c @@ -15,11 +15,7 @@ * * You should have received a copy of the GNU General Public License * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. + * http://www.gnu.org/licenses/gpl-2.0.html * * GPL HEADER END */ @@ -838,6 +834,11 @@ static void ptlrpc_server_finish_request(struct ptlrpc_service_part *svcpt, { ptlrpc_server_hpreq_fini(req); + if (req->rq_session.lc_thread) { + lu_context_exit(&req->rq_session); + lu_context_fini(&req->rq_session); + } + ptlrpc_server_drop_request(req); } @@ -1579,6 +1580,21 @@ ptlrpc_server_handle_req_in(struct ptlrpc_service_part *svcpt, } req->rq_svc_thread = thread; + if (thread) { + /* initialize request session, it is needed for request + * processing by target + */ + rc = lu_context_init(&req->rq_session, + LCT_SERVER_SESSION | LCT_NOREF); + if (rc) { + CERROR("%s: failure to initialize session: rc = %d\n", + thread->t_name, rc); + goto err_req; + } + req->rq_session.lc_thread = thread; + lu_context_enter(&req->rq_session); + req->rq_svc_thread->t_env->le_ses = &req->rq_session; + } ptlrpc_at_add_timed(req); @@ -1612,7 +1628,6 @@ ptlrpc_server_handle_request(struct ptlrpc_service_part *svcpt, struct timespec64 arrived; unsigned long timediff_usecs; unsigned long arrived_usecs; - int rc; int fail_opc = 0; request = ptlrpc_server_request_get(svcpt, false); @@ -1649,21 +1664,6 @@ ptlrpc_server_handle_request(struct ptlrpc_service_part *svcpt, at_get(&svcpt->scp_at_estimate)); } - rc = lu_context_init(&request->rq_session, LCT_SESSION | LCT_NOREF); - if (rc) { - CERROR("Failure to initialize session: %d\n", rc); - goto out_req; - } - request->rq_session.lc_thread = thread; - request->rq_session.lc_cookie = 0x5; - lu_context_enter(&request->rq_session); - - CDEBUG(D_NET, "got req %llu\n", request->rq_xid); - - request->rq_svc_thread = thread; - if (thread) - request->rq_svc_thread->t_env->le_ses = &request->rq_session; - if (likely(request->rq_export)) { if (unlikely(ptlrpc_check_req(request))) goto put_conn; @@ -1695,14 +1695,21 @@ ptlrpc_server_handle_request(struct ptlrpc_service_part *svcpt, if (lustre_msg_get_opc(request->rq_reqmsg) != OBD_PING) CFS_FAIL_TIMEOUT_MS(OBD_FAIL_PTLRPC_PAUSE_REQ, cfs_fail_val); - rc = svc->srv_ops.so_req_handler(request); + CDEBUG(D_NET, "got req %llu\n", request->rq_xid); + + /* re-assign request and sesson thread to the current one */ + request->rq_svc_thread = thread; + if (thread) { + LASSERT(request->rq_session.lc_thread); + request->rq_session.lc_thread = thread; + request->rq_session.lc_cookie = 0x55; + thread->t_env->le_ses = &request->rq_session; + } + svc->srv_ops.so_req_handler(request); ptlrpc_rqphase_move(request, RQ_PHASE_COMPLETE); put_conn: - lu_context_exit(&request->rq_session); - lu_context_fini(&request->rq_session); - if (unlikely(ktime_get_real_seconds() > request->rq_deadline)) { DEBUG_REQ(D_WARNING, request, "Request took longer than estimated (%lld:%llds); " @@ -1756,7 +1763,6 @@ put_conn: request->rq_arrival_time.tv_sec); } -out_req: ptlrpc_server_finish_active_request(svcpt, request); return 1; diff --git a/drivers/staging/lustre/lustre/ptlrpc/wiretest.c b/drivers/staging/lustre/lustre/ptlrpc/wiretest.c index 3ffd2d91f274..6cc2b2edf3fc 100644 --- a/drivers/staging/lustre/lustre/ptlrpc/wiretest.c +++ b/drivers/staging/lustre/lustre/ptlrpc/wiretest.c @@ -15,11 +15,7 @@ * * You should have received a copy of the GNU General Public License * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. + * http://www.gnu.org/licenses/gpl-2.0.html * * GPL HEADER END */ @@ -276,7 +272,9 @@ void lustre_assert_wire_constants(void) (long long)FLD_QUERY); LASSERTF(FLD_FIRST_OPC == 900, "found %lld\n", (long long)FLD_FIRST_OPC); - LASSERTF(FLD_LAST_OPC == 901, "found %lld\n", + LASSERTF(FLD_READ == 901, "found %lld\n", + (long long)FLD_READ); + LASSERTF(FLD_LAST_OPC == 902, "found %lld\n", (long long)FLD_LAST_OPC); LASSERTF(SEQ_QUERY == 700, "found %lld\n", (long long)SEQ_QUERY); @@ -1069,6 +1067,8 @@ void lustre_assert_wire_constants(void) OBD_CONNECT_PINGLESS); LASSERTF(OBD_CONNECT_FLOCK_DEAD == 0x8000000000000ULL, "found 0x%.16llxULL\n", OBD_CONNECT_FLOCK_DEAD); + LASSERTF(OBD_CONNECT_OPEN_BY_FID == 0x20000000000000ULL, + "found 0x%.16llxULL\n", OBD_CONNECT_OPEN_BY_FID); LASSERTF(OBD_CKSUM_CRC32 == 0x00000001UL, "found 0x%.8xUL\n", (unsigned)OBD_CKSUM_CRC32); LASSERTF(OBD_CKSUM_ADLER == 0x00000002UL, "found 0x%.8xUL\n", @@ -1265,8 +1265,6 @@ void lustre_assert_wire_constants(void) OBD_MD_FLXATTRRM); LASSERTF(OBD_MD_FLACL == (0x0000008000000000ULL), "found 0x%.16llxULL\n", OBD_MD_FLACL); - LASSERTF(OBD_MD_FLRMTPERM == (0x0000010000000000ULL), "found 0x%.16llxULL\n", - OBD_MD_FLRMTPERM); LASSERTF(OBD_MD_FLMDSCAPA == (0x0000020000000000ULL), "found 0x%.16llxULL\n", OBD_MD_FLMDSCAPA); LASSERTF(OBD_MD_FLOSSCAPA == (0x0000040000000000ULL), "found 0x%.16llxULL\n", @@ -1277,14 +1275,6 @@ void lustre_assert_wire_constants(void) OBD_MD_FLCROSSREF); LASSERTF(OBD_MD_FLGETATTRLOCK == (0x0000200000000000ULL), "found 0x%.16llxULL\n", OBD_MD_FLGETATTRLOCK); - LASSERTF(OBD_MD_FLRMTLSETFACL == (0x0001000000000000ULL), "found 0x%.16llxULL\n", - OBD_MD_FLRMTLSETFACL); - LASSERTF(OBD_MD_FLRMTLGETFACL == (0x0002000000000000ULL), "found 0x%.16llxULL\n", - OBD_MD_FLRMTLGETFACL); - LASSERTF(OBD_MD_FLRMTRSETFACL == (0x0004000000000000ULL), "found 0x%.16llxULL\n", - OBD_MD_FLRMTRSETFACL); - LASSERTF(OBD_MD_FLRMTRGETFACL == (0x0008000000000000ULL), "found 0x%.16llxULL\n", - OBD_MD_FLRMTRGETFACL); LASSERTF(OBD_MD_FLDATAVERSION == (0x0010000000000000ULL), "found 0x%.16llxULL\n", OBD_MD_FLDATAVERSION); CLASSERT(OBD_FL_INLINEDATA == 0x00000001); @@ -1639,6 +1629,12 @@ void lustre_assert_wire_constants(void) OBD_BRW_ASYNC); LASSERTF(OBD_BRW_MEMALLOC == 0x800, "found 0x%.8x\n", OBD_BRW_MEMALLOC); + LASSERTF(OBD_BRW_OVER_USRQUOTA == 0x1000, "found 0x%.8x\n", + OBD_BRW_OVER_USRQUOTA); + LASSERTF(OBD_BRW_OVER_GRPQUOTA == 0x2000, "found 0x%.8x\n", + OBD_BRW_OVER_GRPQUOTA); + LASSERTF(OBD_BRW_SOFT_SYNC == 0x4000, "found 0x%.8x\n", + OBD_BRW_SOFT_SYNC); /* Checks for struct ost_body */ LASSERTF((int)sizeof(struct ost_body) == 208, "found %lld\n", @@ -1885,44 +1881,6 @@ void lustre_assert_wire_constants(void) LASSERTF((int)sizeof(((struct mdt_ioepoch *)0)->padding) == 4, "found %lld\n", (long long)(int)sizeof(((struct mdt_ioepoch *)0)->padding)); - /* Checks for struct mdt_remote_perm */ - LASSERTF((int)sizeof(struct mdt_remote_perm) == 32, "found %lld\n", - (long long)(int)sizeof(struct mdt_remote_perm)); - LASSERTF((int)offsetof(struct mdt_remote_perm, rp_uid) == 0, "found %lld\n", - (long long)(int)offsetof(struct mdt_remote_perm, rp_uid)); - LASSERTF((int)sizeof(((struct mdt_remote_perm *)0)->rp_uid) == 4, "found %lld\n", - (long long)(int)sizeof(((struct mdt_remote_perm *)0)->rp_uid)); - LASSERTF((int)offsetof(struct mdt_remote_perm, rp_gid) == 4, "found %lld\n", - (long long)(int)offsetof(struct mdt_remote_perm, rp_gid)); - LASSERTF((int)sizeof(((struct mdt_remote_perm *)0)->rp_gid) == 4, "found %lld\n", - (long long)(int)sizeof(((struct mdt_remote_perm *)0)->rp_gid)); - LASSERTF((int)offsetof(struct mdt_remote_perm, rp_fsuid) == 8, "found %lld\n", - (long long)(int)offsetof(struct mdt_remote_perm, rp_fsuid)); - LASSERTF((int)sizeof(((struct mdt_remote_perm *)0)->rp_fsuid) == 4, "found %lld\n", - (long long)(int)sizeof(((struct mdt_remote_perm *)0)->rp_fsuid)); - LASSERTF((int)offsetof(struct mdt_remote_perm, rp_fsgid) == 16, "found %lld\n", - (long long)(int)offsetof(struct mdt_remote_perm, rp_fsgid)); - LASSERTF((int)sizeof(((struct mdt_remote_perm *)0)->rp_fsgid) == 4, "found %lld\n", - (long long)(int)sizeof(((struct mdt_remote_perm *)0)->rp_fsgid)); - LASSERTF((int)offsetof(struct mdt_remote_perm, rp_access_perm) == 24, "found %lld\n", - (long long)(int)offsetof(struct mdt_remote_perm, rp_access_perm)); - LASSERTF((int)sizeof(((struct mdt_remote_perm *)0)->rp_access_perm) == 4, "found %lld\n", - (long long)(int)sizeof(((struct mdt_remote_perm *)0)->rp_access_perm)); - LASSERTF((int)offsetof(struct mdt_remote_perm, rp_padding) == 28, "found %lld\n", - (long long)(int)offsetof(struct mdt_remote_perm, rp_padding)); - LASSERTF((int)sizeof(((struct mdt_remote_perm *)0)->rp_padding) == 4, "found %lld\n", - (long long)(int)sizeof(((struct mdt_remote_perm *)0)->rp_padding)); - LASSERTF(CFS_SETUID_PERM == 0x00000001UL, "found 0x%.8xUL\n", - (unsigned)CFS_SETUID_PERM); - LASSERTF(CFS_SETGID_PERM == 0x00000002UL, "found 0x%.8xUL\n", - (unsigned)CFS_SETGID_PERM); - LASSERTF(CFS_SETGRP_PERM == 0x00000004UL, "found 0x%.8xUL\n", - (unsigned)CFS_SETGRP_PERM); - LASSERTF(CFS_RMTACL_PERM == 0x00000008UL, "found 0x%.8xUL\n", - (unsigned)CFS_RMTACL_PERM); - LASSERTF(CFS_RMTOWN_PERM == 0x00000010UL, "found 0x%.8xUL\n", - (unsigned)CFS_RMTOWN_PERM); - /* Checks for struct mdt_rec_setattr */ LASSERTF((int)sizeof(struct mdt_rec_setattr) == 136, "found %lld\n", (long long)(int)sizeof(struct mdt_rec_setattr)); |