aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/staging/lustre/lustre
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/staging/lustre/lustre')
-rw-r--r--drivers/staging/lustre/lustre/Kconfig6
-rw-r--r--drivers/staging/lustre/lustre/fid/fid_internal.h6
-rw-r--r--drivers/staging/lustre/lustre/fid/fid_lib.c6
-rw-r--r--drivers/staging/lustre/lustre/fid/fid_request.c24
-rw-r--r--drivers/staging/lustre/lustre/fid/lproc_fid.c6
-rw-r--r--drivers/staging/lustre/lustre/fld/fld_cache.c9
-rw-r--r--drivers/staging/lustre/lustre/fld/fld_internal.h15
-rw-r--r--drivers/staging/lustre/lustre/fld/fld_request.c100
-rw-r--r--drivers/staging/lustre/lustre/fld/lproc_fld.c6
-rw-r--r--drivers/staging/lustre/lustre/include/cl_object.h992
-rw-r--r--drivers/staging/lustre/lustre/include/interval_tree.h6
-rw-r--r--drivers/staging/lustre/lustre/include/lclient.h408
-rw-r--r--drivers/staging/lustre/lustre/include/linux/lustre_compat25.h6
-rw-r--r--drivers/staging/lustre/lustre/include/linux/lustre_lite.h6
-rw-r--r--drivers/staging/lustre/lustre/include/linux/lustre_patchless_compat.h6
-rw-r--r--drivers/staging/lustre/lustre/include/linux/lustre_user.h6
-rw-r--r--drivers/staging/lustre/lustre/include/linux/obd.h125
-rw-r--r--drivers/staging/lustre/lustre/include/lprocfs_status.h6
-rw-r--r--drivers/staging/lustre/lustre/include/lu_object.h83
-rw-r--r--drivers/staging/lustre/lustre/include/lustre/ll_fiemap.h6
-rw-r--r--drivers/staging/lustre/lustre/include/lustre/lustre_idl.h162
-rw-r--r--drivers/staging/lustre/lustre/include/lustre/lustre_user.h73
-rw-r--r--drivers/staging/lustre/lustre/include/lustre_acl.h6
-rw-r--r--drivers/staging/lustre/lustre/include/lustre_cfg.h8
-rw-r--r--drivers/staging/lustre/lustre/include/lustre_debug.h6
-rw-r--r--drivers/staging/lustre/lustre/include/lustre_disk.h8
-rw-r--r--drivers/staging/lustre/lustre/include/lustre_dlm.h38
-rw-r--r--drivers/staging/lustre/lustre/include/lustre_dlm_flags.h120
-rw-r--r--drivers/staging/lustre/lustre/include/lustre_eacl.h17
-rw-r--r--drivers/staging/lustre/lustre/include/lustre_export.h19
-rw-r--r--drivers/staging/lustre/lustre/include/lustre_fid.h30
-rw-r--r--drivers/staging/lustre/lustre/include/lustre_fld.h6
-rw-r--r--drivers/staging/lustre/lustre/include/lustre_ha.h6
-rw-r--r--drivers/staging/lustre/lustre/include/lustre_handles.h6
-rw-r--r--drivers/staging/lustre/lustre/include/lustre_import.h8
-rw-r--r--drivers/staging/lustre/lustre/include/lustre_intent.h36
-rw-r--r--drivers/staging/lustre/lustre/include/lustre_lib.h66
-rw-r--r--drivers/staging/lustre/lustre/include/lustre_lite.h6
-rw-r--r--drivers/staging/lustre/lustre/include/lustre_log.h6
-rw-r--r--drivers/staging/lustre/lustre/include/lustre_mdc.h27
-rw-r--r--drivers/staging/lustre/lustre/include/lustre_mds.h6
-rw-r--r--drivers/staging/lustre/lustre/include/lustre_net.h425
-rw-r--r--drivers/staging/lustre/lustre/include/lustre_param.h7
-rw-r--r--drivers/staging/lustre/lustre/include/lustre_req_layout.h11
-rw-r--r--drivers/staging/lustre/lustre/include/lustre_sec.h12
-rw-r--r--drivers/staging/lustre/lustre/include/obd.h94
-rw-r--r--drivers/staging/lustre/lustre/include/obd_cksum.h7
-rw-r--r--drivers/staging/lustre/lustre/include/obd_class.h21
-rw-r--r--drivers/staging/lustre/lustre/include/obd_support.h13
-rw-r--r--drivers/staging/lustre/lustre/lclient/lcommon_cl.c1203
-rw-r--r--drivers/staging/lustre/lustre/ldlm/interval_tree.c6
-rw-r--r--drivers/staging/lustre/lustre/ldlm/l_lock.c10
-rw-r--r--drivers/staging/lustre/lustre/ldlm/ldlm_extent.c10
-rw-r--r--drivers/staging/lustre/lustre/ldlm/ldlm_flock.c45
-rw-r--r--drivers/staging/lustre/lustre/ldlm/ldlm_inodebits.c6
-rw-r--r--drivers/staging/lustre/lustre/ldlm/ldlm_internal.h25
-rw-r--r--drivers/staging/lustre/lustre/ldlm/ldlm_lib.c23
-rw-r--r--drivers/staging/lustre/lustre/ldlm/ldlm_lock.c137
-rw-r--r--drivers/staging/lustre/lustre/ldlm/ldlm_lockd.c47
-rw-r--r--drivers/staging/lustre/lustre/ldlm/ldlm_plain.c6
-rw-r--r--drivers/staging/lustre/lustre/ldlm/ldlm_pool.c6
-rw-r--r--drivers/staging/lustre/lustre/ldlm/ldlm_request.c175
-rw-r--r--drivers/staging/lustre/lustre/ldlm/ldlm_resource.c27
-rw-r--r--drivers/staging/lustre/lustre/llite/Makefile11
-rw-r--r--drivers/staging/lustre/lustre/llite/dcache.c64
-rw-r--r--drivers/staging/lustre/lustre/llite/dir.c136
-rw-r--r--drivers/staging/lustre/lustre/llite/file.c372
-rw-r--r--drivers/staging/lustre/lustre/llite/glimpse.c (renamed from drivers/staging/lustre/lustre/lclient/glimpse.c)93
-rw-r--r--drivers/staging/lustre/lustre/llite/lcommon_cl.c323
-rw-r--r--drivers/staging/lustre/lustre/llite/lcommon_misc.c (renamed from drivers/staging/lustre/lustre/lclient/lcommon_misc.c)59
-rw-r--r--drivers/staging/lustre/lustre/llite/llite_close.c77
-rw-r--r--drivers/staging/lustre/lustre/llite/llite_internal.h398
-rw-r--r--drivers/staging/lustre/lustre/llite/llite_lib.c287
-rw-r--r--drivers/staging/lustre/lustre/llite/llite_mmap.c66
-rw-r--r--drivers/staging/lustre/lustre/llite/llite_nfs.c53
-rw-r--r--drivers/staging/lustre/lustre/llite/llite_rmtacl.c299
-rw-r--r--drivers/staging/lustre/lustre/llite/lloop.c882
-rw-r--r--drivers/staging/lustre/lustre/llite/lproc_llite.c49
-rw-r--r--drivers/staging/lustre/lustre/llite/namei.c220
-rw-r--r--drivers/staging/lustre/lustre/llite/remote_perm.c324
-rw-r--r--drivers/staging/lustre/lustre/llite/rw.c492
-rw-r--r--drivers/staging/lustre/lustre/llite/rw26.c325
-rw-r--r--drivers/staging/lustre/lustre/llite/statahead.c45
-rw-r--r--drivers/staging/lustre/lustre/llite/super25.c39
-rw-r--r--drivers/staging/lustre/lustre/llite/symlink.c16
-rw-r--r--drivers/staging/lustre/lustre/llite/vvp_dev.c278
-rw-r--r--drivers/staging/lustre/lustre/llite/vvp_internal.h338
-rw-r--r--drivers/staging/lustre/lustre/llite/vvp_io.c935
-rw-r--r--drivers/staging/lustre/lustre/llite/vvp_lock.c59
-rw-r--r--drivers/staging/lustre/lustre/llite/vvp_object.c147
-rw-r--r--drivers/staging/lustre/lustre/llite/vvp_page.c217
-rw-r--r--drivers/staging/lustre/lustre/llite/vvp_req.c121
-rw-r--r--drivers/staging/lustre/lustre/llite/xattr.c142
-rw-r--r--drivers/staging/lustre/lustre/llite/xattr_cache.c17
-rw-r--r--drivers/staging/lustre/lustre/lmv/lmv_fld.c6
-rw-r--r--drivers/staging/lustre/lustre/lmv/lmv_intent.c32
-rw-r--r--drivers/staging/lustre/lustre/lmv/lmv_internal.h9
-rw-r--r--drivers/staging/lustre/lustre/lmv/lmv_obd.c221
-rw-r--r--drivers/staging/lustre/lustre/lmv/lproc_lmv.c6
-rw-r--r--drivers/staging/lustre/lustre/lov/lov_cl_internal.h111
-rw-r--r--drivers/staging/lustre/lustre/lov/lov_dev.c21
-rw-r--r--drivers/staging/lustre/lustre/lov/lov_ea.c11
-rw-r--r--drivers/staging/lustre/lustre/lov/lov_internal.h40
-rw-r--r--drivers/staging/lustre/lustre/lov/lov_io.c252
-rw-r--r--drivers/staging/lustre/lustre/lov/lov_lock.c1002
-rw-r--r--drivers/staging/lustre/lustre/lov/lov_merge.c17
-rw-r--r--drivers/staging/lustre/lustre/lov/lov_obd.c48
-rw-r--r--drivers/staging/lustre/lustre/lov/lov_object.c64
-rw-r--r--drivers/staging/lustre/lustre/lov/lov_offset.c18
-rw-r--r--drivers/staging/lustre/lustre/lov/lov_pack.c14
-rw-r--r--drivers/staging/lustre/lustre/lov/lov_page.c189
-rw-r--r--drivers/staging/lustre/lustre/lov/lov_pool.c70
-rw-r--r--drivers/staging/lustre/lustre/lov/lov_request.c17
-rw-r--r--drivers/staging/lustre/lustre/lov/lovsub_dev.c15
-rw-r--r--drivers/staging/lustre/lustre/lov/lovsub_io.c6
-rw-r--r--drivers/staging/lustre/lustre/lov/lovsub_lock.c392
-rw-r--r--drivers/staging/lustre/lustre/lov/lovsub_object.c13
-rw-r--r--drivers/staging/lustre/lustre/lov/lovsub_page.c10
-rw-r--r--drivers/staging/lustre/lustre/lov/lproc_lov.c6
-rw-r--r--drivers/staging/lustre/lustre/mdc/lproc_mdc.c14
-rw-r--r--drivers/staging/lustre/lustre/mdc/mdc_internal.h6
-rw-r--r--drivers/staging/lustre/lustre/mdc/mdc_lib.c42
-rw-r--r--drivers/staging/lustre/lustre/mdc/mdc_locks.c125
-rw-r--r--drivers/staging/lustre/lustre/mdc/mdc_reint.c8
-rw-r--r--drivers/staging/lustre/lustre/mdc/mdc_request.c108
-rw-r--r--drivers/staging/lustre/lustre/mgc/lproc_mgc.c6
-rw-r--r--drivers/staging/lustre/lustre/mgc/mgc_internal.h6
-rw-r--r--drivers/staging/lustre/lustre/mgc/mgc_request.c26
-rw-r--r--drivers/staging/lustre/lustre/obdclass/Makefile3
-rw-r--r--drivers/staging/lustre/lustre/obdclass/acl.c415
-rw-r--r--drivers/staging/lustre/lustre/obdclass/cl_internal.h6
-rw-r--r--drivers/staging/lustre/lustre/obdclass/cl_io.c436
-rw-r--r--drivers/staging/lustre/lustre/obdclass/cl_lock.c2092
-rw-r--r--drivers/staging/lustre/lustre/obdclass/cl_object.c315
-rw-r--r--drivers/staging/lustre/lustre/obdclass/cl_page.c699
-rw-r--r--drivers/staging/lustre/lustre/obdclass/class_obd.c11
-rw-r--r--drivers/staging/lustre/lustre/obdclass/debug.c10
-rw-r--r--drivers/staging/lustre/lustre/obdclass/genops.c7
-rw-r--r--drivers/staging/lustre/lustre/obdclass/kernelcomm.c6
-rw-r--r--drivers/staging/lustre/lustre/obdclass/linux/linux-module.c10
-rw-r--r--drivers/staging/lustre/lustre/obdclass/linux/linux-obdo.c6
-rw-r--r--drivers/staging/lustre/lustre/obdclass/linux/linux-sysctl.c6
-rw-r--r--drivers/staging/lustre/lustre/obdclass/llog.c13
-rw-r--r--drivers/staging/lustre/lustre/obdclass/llog_cat.c6
-rw-r--r--drivers/staging/lustre/lustre/obdclass/llog_internal.h6
-rw-r--r--drivers/staging/lustre/lustre/obdclass/llog_obd.c6
-rw-r--r--drivers/staging/lustre/lustre/obdclass/llog_swab.c6
-rw-r--r--drivers/staging/lustre/lustre/obdclass/lprocfs_status.c78
-rw-r--r--drivers/staging/lustre/lustre/obdclass/lu_object.c15
-rw-r--r--drivers/staging/lustre/lustre/obdclass/lu_ref.c6
-rw-r--r--drivers/staging/lustre/lustre/obdclass/lustre_handles.c6
-rw-r--r--drivers/staging/lustre/lustre/obdclass/lustre_peer.c9
-rw-r--r--drivers/staging/lustre/lustre/obdclass/obd_config.c38
-rw-r--r--drivers/staging/lustre/lustre/obdclass/obd_mount.c23
-rw-r--r--drivers/staging/lustre/lustre/obdclass/obdo.c9
-rw-r--r--drivers/staging/lustre/lustre/obdclass/statfs_pack.c6
-rw-r--r--drivers/staging/lustre/lustre/obdclass/uuid.c6
-rw-r--r--drivers/staging/lustre/lustre/obdecho/echo_client.c179
-rw-r--r--drivers/staging/lustre/lustre/osc/lproc_osc.c74
-rw-r--r--drivers/staging/lustre/lustre/osc/osc_cache.c548
-rw-r--r--drivers/staging/lustre/lustre/osc/osc_cl_internal.h172
-rw-r--r--drivers/staging/lustre/lustre/osc/osc_dev.c6
-rw-r--r--drivers/staging/lustre/lustre/osc/osc_internal.h33
-rw-r--r--drivers/staging/lustre/lustre/osc/osc_io.c299
-rw-r--r--drivers/staging/lustre/lustre/osc/osc_lock.c1705
-rw-r--r--drivers/staging/lustre/lustre/osc/osc_object.c44
-rw-r--r--drivers/staging/lustre/lustre/osc/osc_page.c578
-rw-r--r--drivers/staging/lustre/lustre/osc/osc_request.c445
-rw-r--r--drivers/staging/lustre/lustre/ptlrpc/client.c178
-rw-r--r--drivers/staging/lustre/lustre/ptlrpc/connection.c6
-rw-r--r--drivers/staging/lustre/lustre/ptlrpc/events.c37
-rw-r--r--drivers/staging/lustre/lustre/ptlrpc/import.c24
-rw-r--r--drivers/staging/lustre/lustre/ptlrpc/layout.c49
-rw-r--r--drivers/staging/lustre/lustre/ptlrpc/llog_client.c6
-rw-r--r--drivers/staging/lustre/lustre/ptlrpc/llog_net.c6
-rw-r--r--drivers/staging/lustre/lustre/ptlrpc/lproc_ptlrpc.c21
-rw-r--r--drivers/staging/lustre/lustre/ptlrpc/niobuf.c26
-rw-r--r--drivers/staging/lustre/lustre/ptlrpc/nrs.c9
-rw-r--r--drivers/staging/lustre/lustre/ptlrpc/pack_generic.c22
-rw-r--r--drivers/staging/lustre/lustre/ptlrpc/pers.c6
-rw-r--r--drivers/staging/lustre/lustre/ptlrpc/pinger.c9
-rw-r--r--drivers/staging/lustre/lustre/ptlrpc/ptlrpc_internal.h49
-rw-r--r--drivers/staging/lustre/lustre/ptlrpc/ptlrpc_module.c6
-rw-r--r--drivers/staging/lustre/lustre/ptlrpc/ptlrpcd.c31
-rw-r--r--drivers/staging/lustre/lustre/ptlrpc/recover.c6
-rw-r--r--drivers/staging/lustre/lustre/ptlrpc/sec.c22
-rw-r--r--drivers/staging/lustre/lustre/ptlrpc/sec_bulk.c22
-rw-r--r--drivers/staging/lustre/lustre/ptlrpc/sec_config.c10
-rw-r--r--drivers/staging/lustre/lustre/ptlrpc/sec_gc.c6
-rw-r--r--drivers/staging/lustre/lustre/ptlrpc/sec_lproc.c6
-rw-r--r--drivers/staging/lustre/lustre/ptlrpc/sec_null.c13
-rw-r--r--drivers/staging/lustre/lustre/ptlrpc/sec_plain.c26
-rw-r--r--drivers/staging/lustre/lustre/ptlrpc/service.c58
-rw-r--r--drivers/staging/lustre/lustre/ptlrpc/wiretest.c66
194 files changed, 8022 insertions, 15655 deletions
diff --git a/drivers/staging/lustre/lustre/Kconfig b/drivers/staging/lustre/lustre/Kconfig
index 8ac7cd4d6fdb..9f5d75f166e7 100644
--- a/drivers/staging/lustre/lustre/Kconfig
+++ b/drivers/staging/lustre/lustre/Kconfig
@@ -54,9 +54,3 @@ config LUSTRE_TRANSLATE_ERRNOS
bool
depends on LUSTRE_FS && !X86
default y
-
-config LUSTRE_LLITE_LLOOP
- tristate "Lustre virtual block device"
- depends on LUSTRE_FS && BLOCK
- depends on !PPC_64K_PAGES && !ARM64_64K_PAGES && !MICROBLAZE_64K_PAGES && !PAGE_SIZE_64KB && !IA64_PAGE_SIZE_64KB && !PARISC_PAGE_SIZE_64KB
- default m
diff --git a/drivers/staging/lustre/lustre/fid/fid_internal.h b/drivers/staging/lustre/lustre/fid/fid_internal.h
index b79a813977cf..5c53773ecc5a 100644
--- a/drivers/staging/lustre/lustre/fid/fid_internal.h
+++ b/drivers/staging/lustre/lustre/fid/fid_internal.h
@@ -15,11 +15,7 @@
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
*
* GPL HEADER END
*/
diff --git a/drivers/staging/lustre/lustre/fid/fid_lib.c b/drivers/staging/lustre/lustre/fid/fid_lib.c
index dd65159ebb38..99ae7eb6720e 100644
--- a/drivers/staging/lustre/lustre/fid/fid_lib.c
+++ b/drivers/staging/lustre/lustre/fid/fid_lib.c
@@ -15,11 +15,7 @@
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
*
* GPL HEADER END
*/
diff --git a/drivers/staging/lustre/lustre/fid/fid_request.c b/drivers/staging/lustre/lustre/fid/fid_request.c
index 39269c3c56a6..454744d25956 100644
--- a/drivers/staging/lustre/lustre/fid/fid_request.c
+++ b/drivers/staging/lustre/lustre/fid/fid_request.c
@@ -15,11 +15,7 @@
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
*
* GPL HEADER END
*/
@@ -66,6 +62,7 @@ static int seq_client_rpc(struct lu_client_seq *seq,
unsigned int debug_mask;
int rc;
+ LASSERT(exp && !IS_ERR(exp));
req = ptlrpc_request_alloc_pack(class_exp2cliimp(exp), &RQF_SEQ_QUERY,
LUSTRE_MDS_VERSION, SEQ_QUERY);
if (!req)
@@ -97,23 +94,28 @@ static int seq_client_rpc(struct lu_client_seq *seq,
* request here, otherwise if MDT0 is failed(umounted),
* it can not release the export of MDT0
*/
- if (seq->lcs_type == LUSTRE_SEQ_DATA)
- req->rq_no_delay = req->rq_no_resend = 1;
+ if (seq->lcs_type == LUSTRE_SEQ_DATA) {
+ req->rq_no_delay = 1;
+ req->rq_no_resend = 1;
+ }
debug_mask = D_CONSOLE;
} else {
- if (seq->lcs_type == LUSTRE_SEQ_METADATA)
+ if (seq->lcs_type == LUSTRE_SEQ_METADATA) {
+ req->rq_reply_portal = MDC_REPLY_PORTAL;
req->rq_request_portal = SEQ_METADATA_PORTAL;
- else
+ } else {
+ req->rq_reply_portal = OSC_REPLY_PORTAL;
req->rq_request_portal = SEQ_DATA_PORTAL;
+ }
debug_mask = D_INFO;
}
ptlrpc_at_set_req_timeout(req);
- if (seq->lcs_type == LUSTRE_SEQ_METADATA)
+ if (opc != SEQ_ALLOC_SUPER && seq->lcs_type == LUSTRE_SEQ_METADATA)
mdc_get_rpc_lock(exp->exp_obd->u.cli.cl_rpc_lock, NULL);
rc = ptlrpc_queue_wait(req);
- if (seq->lcs_type == LUSTRE_SEQ_METADATA)
+ if (opc != SEQ_ALLOC_SUPER && seq->lcs_type == LUSTRE_SEQ_METADATA)
mdc_put_rpc_lock(exp->exp_obd->u.cli.cl_rpc_lock, NULL);
if (rc)
goto out_req;
diff --git a/drivers/staging/lustre/lustre/fid/lproc_fid.c b/drivers/staging/lustre/lustre/fid/lproc_fid.c
index 1f0e78686278..81b7ca9ea2fd 100644
--- a/drivers/staging/lustre/lustre/fid/lproc_fid.c
+++ b/drivers/staging/lustre/lustre/fid/lproc_fid.c
@@ -15,11 +15,7 @@
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
*
* GPL HEADER END
*/
diff --git a/drivers/staging/lustre/lustre/fld/fld_cache.c b/drivers/staging/lustre/lustre/fld/fld_cache.c
index 062f388cf38a..0100a935f4ff 100644
--- a/drivers/staging/lustre/lustre/fld/fld_cache.c
+++ b/drivers/staging/lustre/lustre/fld/fld_cache.c
@@ -15,11 +15,7 @@
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
*
* GPL HEADER END
*/
@@ -178,8 +174,9 @@ restart_fixup:
if (n_range->lsr_end <= c_range->lsr_end) {
*n_range = *c_range;
fld_cache_entry_delete(cache, f_curr);
- } else
+ } else {
n_range->lsr_start = c_range->lsr_end;
+ }
}
/* we could have overlap over next
diff --git a/drivers/staging/lustre/lustre/fld/fld_internal.h b/drivers/staging/lustre/lustre/fld/fld_internal.h
index e8a3caf20c9b..f0efe5b9fbec 100644
--- a/drivers/staging/lustre/lustre/fld/fld_internal.h
+++ b/drivers/staging/lustre/lustre/fld/fld_internal.h
@@ -15,11 +15,7 @@
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
*
* GPL HEADER END
*/
@@ -101,12 +97,6 @@ struct fld_cache {
unsigned int fci_no_shrink:1;
};
-enum fld_op {
- FLD_CREATE = 0,
- FLD_DELETE = 1,
- FLD_LOOKUP = 2
-};
-
enum {
/* 4M of FLD cache will not hurt client a lot. */
FLD_SERVER_CACHE_SIZE = (4 * 0x100000),
@@ -126,7 +116,8 @@ enum {
extern struct lu_fld_hash fld_hash[];
int fld_client_rpc(struct obd_export *exp,
- struct lu_seq_range *range, __u32 fld_op);
+ struct lu_seq_range *range, __u32 fld_op,
+ struct ptlrpc_request **reqp);
extern struct lprocfs_vars fld_client_debugfs_list[];
diff --git a/drivers/staging/lustre/lustre/fld/fld_request.c b/drivers/staging/lustre/lustre/fld/fld_request.c
index a3d122d85c8d..e59d626a1548 100644
--- a/drivers/staging/lustre/lustre/fld/fld_request.c
+++ b/drivers/staging/lustre/lustre/fld/fld_request.c
@@ -15,11 +15,7 @@
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
*
* GPL HEADER END
*/
@@ -64,9 +60,9 @@ static int fld_req_avail(struct client_obd *cli, struct mdc_cache_waiter *mcw)
{
int rc;
- client_obd_list_lock(&cli->cl_loi_list_lock);
+ spin_lock(&cli->cl_loi_list_lock);
rc = list_empty(&mcw->mcw_entry);
- client_obd_list_unlock(&cli->cl_loi_list_lock);
+ spin_unlock(&cli->cl_loi_list_lock);
return rc;
};
@@ -75,15 +71,15 @@ static void fld_enter_request(struct client_obd *cli)
struct mdc_cache_waiter mcw;
struct l_wait_info lwi = { 0 };
- client_obd_list_lock(&cli->cl_loi_list_lock);
+ spin_lock(&cli->cl_loi_list_lock);
if (cli->cl_r_in_flight >= cli->cl_max_rpcs_in_flight) {
list_add_tail(&mcw.mcw_entry, &cli->cl_cache_waiters);
init_waitqueue_head(&mcw.mcw_waitq);
- client_obd_list_unlock(&cli->cl_loi_list_lock);
+ spin_unlock(&cli->cl_loi_list_lock);
l_wait_event(mcw.mcw_waitq, fld_req_avail(cli, &mcw), &lwi);
} else {
cli->cl_r_in_flight++;
- client_obd_list_unlock(&cli->cl_loi_list_lock);
+ spin_unlock(&cli->cl_loi_list_lock);
}
}
@@ -92,10 +88,9 @@ static void fld_exit_request(struct client_obd *cli)
struct list_head *l, *tmp;
struct mdc_cache_waiter *mcw;
- client_obd_list_lock(&cli->cl_loi_list_lock);
+ spin_lock(&cli->cl_loi_list_lock);
cli->cl_r_in_flight--;
list_for_each_safe(l, tmp, &cli->cl_cache_waiters) {
-
if (cli->cl_r_in_flight >= cli->cl_max_rpcs_in_flight) {
/* No free request slots anymore */
break;
@@ -106,7 +101,7 @@ static void fld_exit_request(struct client_obd *cli)
cli->cl_r_in_flight++;
wake_up(&mcw->mcw_waitq);
}
- client_obd_list_unlock(&cli->cl_loi_list_lock);
+ spin_unlock(&cli->cl_loi_list_lock);
}
static int fld_rrb_hash(struct lu_client_fld *fld, u64 seq)
@@ -392,55 +387,82 @@ void fld_client_fini(struct lu_client_fld *fld)
EXPORT_SYMBOL(fld_client_fini);
int fld_client_rpc(struct obd_export *exp,
- struct lu_seq_range *range, __u32 fld_op)
+ struct lu_seq_range *range, __u32 fld_op,
+ struct ptlrpc_request **reqp)
{
- struct ptlrpc_request *req;
+ struct ptlrpc_request *req = NULL;
struct lu_seq_range *prange;
__u32 *op;
- int rc;
+ int rc = 0;
struct obd_import *imp;
LASSERT(exp);
imp = class_exp2cliimp(exp);
- req = ptlrpc_request_alloc_pack(imp, &RQF_FLD_QUERY, LUSTRE_MDS_VERSION,
- FLD_QUERY);
- if (!req)
- return -ENOMEM;
-
- op = req_capsule_client_get(&req->rq_pill, &RMF_FLD_OPC);
- *op = fld_op;
+ switch (fld_op) {
+ case FLD_QUERY:
+ req = ptlrpc_request_alloc_pack(imp, &RQF_FLD_QUERY,
+ LUSTRE_MDS_VERSION, FLD_QUERY);
+ if (!req)
+ return -ENOMEM;
+
+ /*
+ * XXX: only needed when talking to old server(< 2.6), it should
+ * be removed when < 2.6 server is not supported
+ */
+ op = req_capsule_client_get(&req->rq_pill, &RMF_FLD_OPC);
+ *op = FLD_LOOKUP;
+
+ if (imp->imp_connect_flags_orig & OBD_CONNECT_MDS_MDS)
+ req->rq_allow_replay = 1;
+ break;
+ case FLD_READ:
+ req = ptlrpc_request_alloc_pack(imp, &RQF_FLD_READ,
+ LUSTRE_MDS_VERSION, FLD_READ);
+ if (!req)
+ return -ENOMEM;
+
+ req_capsule_set_size(&req->rq_pill, &RMF_GENERIC_DATA,
+ RCL_SERVER, PAGE_SIZE);
+ break;
+ default:
+ rc = -EINVAL;
+ break;
+ }
+ if (rc)
+ return rc;
prange = req_capsule_client_get(&req->rq_pill, &RMF_FLD_MDFLD);
*prange = *range;
-
ptlrpc_request_set_replen(req);
req->rq_request_portal = FLD_REQUEST_PORTAL;
req->rq_reply_portal = MDC_REPLY_PORTAL;
ptlrpc_at_set_req_timeout(req);
- if (fld_op == FLD_LOOKUP &&
- imp->imp_connect_flags_orig & OBD_CONNECT_MDS_MDS)
- req->rq_allow_replay = 1;
-
- if (fld_op != FLD_LOOKUP)
- mdc_get_rpc_lock(exp->exp_obd->u.cli.cl_rpc_lock, NULL);
fld_enter_request(&exp->exp_obd->u.cli);
rc = ptlrpc_queue_wait(req);
fld_exit_request(&exp->exp_obd->u.cli);
- if (fld_op != FLD_LOOKUP)
- mdc_put_rpc_lock(exp->exp_obd->u.cli.cl_rpc_lock, NULL);
if (rc)
goto out_req;
- prange = req_capsule_server_get(&req->rq_pill, &RMF_FLD_MDFLD);
- if (!prange) {
- rc = -EFAULT;
- goto out_req;
+ if (fld_op == FLD_QUERY) {
+ prange = req_capsule_server_get(&req->rq_pill, &RMF_FLD_MDFLD);
+ if (!prange) {
+ rc = -EFAULT;
+ goto out_req;
+ }
+ *range = *prange;
}
- *range = *prange;
+
out_req:
- ptlrpc_req_finished(req);
+ if (rc || !reqp) {
+ ptlrpc_req_finished(req);
+ req = NULL;
+ }
+
+ if (reqp)
+ *reqp = req;
+
return rc;
}
@@ -468,7 +490,7 @@ int fld_client_lookup(struct lu_client_fld *fld, u64 seq, u32 *mds,
res.lsr_start = seq;
fld_range_set_type(&res, flags);
- rc = fld_client_rpc(target->ft_exp, &res, FLD_LOOKUP);
+ rc = fld_client_rpc(target->ft_exp, &res, FLD_QUERY, NULL);
if (rc == 0) {
*mds = res.lsr_index;
diff --git a/drivers/staging/lustre/lustre/fld/lproc_fld.c b/drivers/staging/lustre/lustre/fld/lproc_fld.c
index ca898befeba6..61ac420798af 100644
--- a/drivers/staging/lustre/lustre/fld/lproc_fld.c
+++ b/drivers/staging/lustre/lustre/fld/lproc_fld.c
@@ -15,11 +15,7 @@
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
*
* GPL HEADER END
*/
diff --git a/drivers/staging/lustre/lustre/include/cl_object.h b/drivers/staging/lustre/lustre/include/cl_object.h
index fb971ded5a1b..3cd4a2577d90 100644
--- a/drivers/staging/lustre/lustre/include/cl_object.h
+++ b/drivers/staging/lustre/lustre/include/cl_object.h
@@ -15,11 +15,7 @@
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
*
* GPL HEADER END
*/
@@ -82,7 +78,6 @@
* - i_mutex
* - PG_locked
* - cl_object_header::coh_page_guard
- * - cl_object_header::coh_lock_guard
* - lu_site::ls_guard
*
* See the top comment in cl_object.c for the description of overall locking and
@@ -98,9 +93,12 @@
* super-class definitions.
*/
#include "lu_object.h"
+#include <linux/atomic.h>
#include "linux/lustre_compat25.h"
#include <linux/mutex.h>
#include <linux/radix-tree.h>
+#include <linux/spinlock.h>
+#include <linux/wait.h>
struct inode;
@@ -138,7 +136,7 @@ struct cl_device_operations {
* cl_req_slice_add().
*
* \see osc_req_init(), lov_req_init(), lovsub_req_init()
- * \see ccc_req_init()
+ * \see vvp_req_init()
*/
int (*cdo_req_init)(const struct lu_env *env, struct cl_device *dev,
struct cl_req *req);
@@ -147,7 +145,7 @@ struct cl_device_operations {
/**
* Device in the client stack.
*
- * \see ccc_device, lov_device, lovsub_device, osc_device
+ * \see vvp_device, lov_device, lovsub_device, osc_device
*/
struct cl_device {
/** Super-class. */
@@ -243,7 +241,7 @@ enum cl_attr_valid {
* be discarded from the memory, all its sub-objects are torn-down and
* destroyed too.
*
- * \see ccc_object, lov_object, lovsub_object, osc_object
+ * \see vvp_object, lov_object, lovsub_object, osc_object
*/
struct cl_object {
/** super class */
@@ -322,7 +320,7 @@ struct cl_object_operations {
* to be used instead of newly created.
*/
int (*coo_page_init)(const struct lu_env *env, struct cl_object *obj,
- struct cl_page *page, struct page *vmpage);
+ struct cl_page *page, pgoff_t index);
/**
* Initialize lock slice for this layer. Called top-to-bottom through
* every object layer when a new cl_lock is instantiated. Layer
@@ -383,11 +381,17 @@ struct cl_object_operations {
* object. Layers are supposed to fill parts of \a lvb that will be
* shipped to the glimpse originator as a glimpse result.
*
- * \see ccc_object_glimpse(), lovsub_object_glimpse(),
+ * \see vvp_object_glimpse(), lovsub_object_glimpse(),
* \see osc_object_glimpse()
*/
int (*coo_glimpse)(const struct lu_env *env,
const struct cl_object *obj, struct ost_lvb *lvb);
+ /**
+ * Object prune method. Called when the layout is going to change on
+ * this object, therefore each layer has to clean up their cache,
+ * mainly pages and locks.
+ */
+ int (*coo_prune)(const struct lu_env *env, struct cl_object *obj);
};
/**
@@ -398,22 +402,6 @@ struct cl_object_header {
* here.
*/
struct lu_object_header coh_lu;
- /** \name locks
- * \todo XXX move locks below to the separate cache-lines, they are
- * mostly useless otherwise.
- */
- /** @{ */
- /** Lock protecting page tree. */
- spinlock_t coh_page_guard;
- /** Lock protecting lock list. */
- spinlock_t coh_lock_guard;
- /** @} locks */
- /** Radix tree of cl_page's, cached for this object. */
- struct radix_tree_root coh_tree;
- /** # of pages in radix tree. */
- unsigned long coh_pages;
- /** List of cl_lock's granted for this object. */
- struct list_head coh_locks;
/**
* Parent object. It is assumed that an object has a well-defined
@@ -460,10 +448,6 @@ struct cl_object_header {
co_lu.lo_linkage)
/** @} cl_object */
-#ifndef pgoff_t
-#define pgoff_t unsigned long
-#endif
-
#define CL_PAGE_EOF ((pgoff_t)~0ull)
/** \addtogroup cl_page cl_page
@@ -727,16 +711,10 @@ struct cl_page {
atomic_t cp_ref;
/** An object this page is a part of. Immutable after creation. */
struct cl_object *cp_obj;
- /** Logical page index within the object. Immutable after creation. */
- pgoff_t cp_index;
/** List of slices. Immutable after creation. */
struct list_head cp_layers;
- /** Parent page, NULL for top-level page. Immutable after creation. */
- struct cl_page *cp_parent;
- /** Lower-layer page. NULL for bottommost page. Immutable after
- * creation.
- */
- struct cl_page *cp_child;
+ /** vmpage */
+ struct page *cp_vmpage;
/**
* Page state. This field is const to avoid accidental update, it is
* modified only internally within cl_page.c. Protected by a VM lock.
@@ -787,10 +765,11 @@ struct cl_page {
/**
* Per-layer part of cl_page.
*
- * \see ccc_page, lov_page, osc_page
+ * \see vvp_page, lov_page, osc_page
*/
struct cl_page_slice {
struct cl_page *cpl_page;
+ pgoff_t cpl_index;
/**
* Object slice corresponding to this page slice. Immutable after
* creation.
@@ -804,16 +783,9 @@ struct cl_page_slice {
/**
* Lock mode. For the client extent locks.
*
- * \warning: cl_lock_mode_match() assumes particular ordering here.
* \ingroup cl_lock
*/
enum cl_lock_mode {
- /**
- * Mode of a lock that protects no data, and exists only as a
- * placeholder. This is used for `glimpse' requests. A phantom lock
- * might get promoted to real lock at some point.
- */
- CLM_PHANTOM,
CLM_READ,
CLM_WRITE,
CLM_GROUP
@@ -846,11 +818,6 @@ struct cl_page_operations {
*/
/**
- * \return the underlying VM page. Optional.
- */
- struct page *(*cpo_vmpage)(const struct lu_env *env,
- const struct cl_page_slice *slice);
- /**
* Called when \a io acquires this page into the exclusive
* ownership. When this method returns, it is guaranteed that the is
* not owned by other io, and no transfer is going on against
@@ -897,14 +864,6 @@ struct cl_page_operations {
void (*cpo_export)(const struct lu_env *env,
const struct cl_page_slice *slice, int uptodate);
/**
- * Unmaps page from the user space (if it is mapped).
- *
- * \see cl_page_unmap()
- * \see vvp_page_unmap()
- */
- int (*cpo_unmap)(const struct lu_env *env,
- const struct cl_page_slice *slice, struct cl_io *io);
- /**
* Checks whether underlying VM page is locked (in the suitable
* sense). Used for assertions.
*
@@ -957,7 +916,7 @@ struct cl_page_operations {
*/
int (*cpo_is_under_lock)(const struct lu_env *env,
const struct cl_page_slice *slice,
- struct cl_io *io);
+ struct cl_io *io, pgoff_t *max);
/**
* Optional debugging helper. Prints given page slice.
@@ -1027,26 +986,6 @@ struct cl_page_operations {
*/
int (*cpo_make_ready)(const struct lu_env *env,
const struct cl_page_slice *slice);
- /**
- * Announce that this page is to be written out
- * opportunistically, that is, page is dirty, it is not
- * necessary to start write-out transfer right now, but
- * eventually page has to be written out.
- *
- * Main caller of this is the write path (see
- * vvp_io_commit_write()), using this method to build a
- * "transfer cache" from which large transfers are then
- * constructed by the req-formation engine.
- *
- * \todo XXX it would make sense to add page-age tracking
- * semantics here, and to oblige the req-formation engine to
- * send the page out not later than it is too old.
- *
- * \see cl_page_cache_add()
- */
- int (*cpo_cache_add)(const struct lu_env *env,
- const struct cl_page_slice *slice,
- struct cl_io *io);
} io[CRT_NR];
/**
* Tell transfer engine that only [to, from] part of a page should be
@@ -1098,9 +1037,8 @@ struct cl_page_operations {
*/
#define CL_PAGE_DEBUG(mask, env, page, format, ...) \
do { \
- LIBCFS_DEBUG_MSG_DATA_DECL(msgdata, mask, NULL); \
- \
if (cfs_cdebug_show(mask, DEBUG_SUBSYSTEM)) { \
+ LIBCFS_DEBUG_MSG_DATA_DECL(msgdata, mask, NULL); \
cl_page_print(env, &msgdata, lu_cdebug_printer, page); \
CDEBUG(mask, format, ## __VA_ARGS__); \
} \
@@ -1111,9 +1049,8 @@ do { \
*/
#define CL_PAGE_HEADER(mask, env, page, format, ...) \
do { \
- LIBCFS_DEBUG_MSG_DATA_DECL(msgdata, mask, NULL); \
- \
if (cfs_cdebug_show(mask, DEBUG_SUBSYSTEM)) { \
+ LIBCFS_DEBUG_MSG_DATA_DECL(msgdata, mask, NULL); \
cl_page_header_print(env, &msgdata, lu_cdebug_printer, page); \
CDEBUG(mask, format, ## __VA_ARGS__); \
} \
@@ -1130,6 +1067,12 @@ static inline int __page_in_use(const struct cl_page *page, int refc)
#define cl_page_in_use(pg) __page_in_use(pg, 1)
#define cl_page_in_use_noref(pg) __page_in_use(pg, 0)
+static inline struct page *cl_page_vmpage(struct cl_page *page)
+{
+ LASSERT(page->cp_vmpage);
+ return page->cp_vmpage;
+}
+
/** @} cl_page */
/** \addtogroup cl_lock cl_lock
@@ -1150,12 +1093,6 @@ static inline int __page_in_use(const struct cl_page *page, int refc)
* (struct cl_lock) and a list of layers (struct cl_lock_slice), linked to
* cl_lock::cll_layers list through cl_lock_slice::cls_linkage.
*
- * All locks for a given object are linked into cl_object_header::coh_locks
- * list (protected by cl_object_header::coh_lock_guard spin-lock) through
- * cl_lock::cll_linkage. Currently this list is not sorted in any way. We can
- * sort it in starting lock offset, or use altogether different data structure
- * like a tree.
- *
* Typical cl_lock consists of the two layers:
*
* - vvp_lock (vvp specific data), and
@@ -1177,111 +1114,29 @@ static inline int __page_in_use(const struct cl_page *page, int refc)
*
* LIFE CYCLE
*
- * cl_lock is reference counted. When reference counter drops to 0, lock is
- * placed in the cache, except when lock is in CLS_FREEING state. CLS_FREEING
- * lock is destroyed when last reference is released. Referencing between
- * top-lock and its sub-locks is described in the lov documentation module.
- *
- * STATE MACHINE
- *
- * Also, cl_lock is a state machine. This requires some clarification. One of
- * the goals of client IO re-write was to make IO path non-blocking, or at
- * least to make it easier to make it non-blocking in the future. Here
- * `non-blocking' means that when a system call (read, write, truncate)
- * reaches a situation where it has to wait for a communication with the
- * server, it should --instead of waiting-- remember its current state and
- * switch to some other work. E.g,. instead of waiting for a lock enqueue,
- * client should proceed doing IO on the next stripe, etc. Obviously this is
- * rather radical redesign, and it is not planned to be fully implemented at
- * this time, instead we are putting some infrastructure in place, that would
- * make it easier to do asynchronous non-blocking IO easier in the
- * future. Specifically, where old locking code goes to sleep (waiting for
- * enqueue, for example), new code returns cl_lock_transition::CLO_WAIT. When
- * enqueue reply comes, its completion handler signals that lock state-machine
- * is ready to transit to the next state. There is some generic code in
- * cl_lock.c that sleeps, waiting for these signals. As a result, for users of
- * this cl_lock.c code, it looks like locking is done in normal blocking
- * fashion, and it the same time it is possible to switch to the non-blocking
- * locking (simply by returning cl_lock_transition::CLO_WAIT from cl_lock.c
- * functions).
- *
- * For a description of state machine states and transitions see enum
- * cl_lock_state.
- *
- * There are two ways to restrict a set of states which lock might move to:
- *
- * - placing a "hold" on a lock guarantees that lock will not be moved
- * into cl_lock_state::CLS_FREEING state until hold is released. Hold
- * can be only acquired on a lock that is not in
- * cl_lock_state::CLS_FREEING. All holds on a lock are counted in
- * cl_lock::cll_holds. Hold protects lock from cancellation and
- * destruction. Requests to cancel and destroy a lock on hold will be
- * recorded, but only honored when last hold on a lock is released;
- *
- * - placing a "user" on a lock guarantees that lock will not leave
- * cl_lock_state::CLS_NEW, cl_lock_state::CLS_QUEUING,
- * cl_lock_state::CLS_ENQUEUED and cl_lock_state::CLS_HELD set of
- * states, once it enters this set. That is, if a user is added onto a
- * lock in a state not from this set, it doesn't immediately enforce
- * lock to move to this set, but once lock enters this set it will
- * remain there until all users are removed. Lock users are counted in
- * cl_lock::cll_users.
- *
- * User is used to assure that lock is not canceled or destroyed while
- * it is being enqueued, or actively used by some IO.
- *
- * Currently, a user always comes with a hold (cl_lock_invariant()
- * checks that a number of holds is not less than a number of users).
- *
- * CONCURRENCY
- *
- * This is how lock state-machine operates. struct cl_lock contains a mutex
- * cl_lock::cll_guard that protects struct fields.
- *
- * - mutex is taken, and cl_lock::cll_state is examined.
- *
- * - for every state there are possible target states where lock can move
- * into. They are tried in order. Attempts to move into next state are
- * done by _try() functions in cl_lock.c:cl_{enqueue,unlock,wait}_try().
- *
- * - if the transition can be performed immediately, state is changed,
- * and mutex is released.
- *
- * - if the transition requires blocking, _try() function returns
- * cl_lock_transition::CLO_WAIT. Caller unlocks mutex and goes to
- * sleep, waiting for possibility of lock state change. It is woken
- * up when some event occurs, that makes lock state change possible
- * (e.g., the reception of the reply from the server), and repeats
- * the loop.
- *
- * Top-lock and sub-lock has separate mutexes and the latter has to be taken
- * first to avoid dead-lock.
- *
- * To see an example of interaction of all these issues, take a look at the
- * lov_cl.c:lov_lock_enqueue() function. It is called as a part of
- * cl_enqueue_try(), and tries to advance top-lock to ENQUEUED state, by
- * advancing state-machines of its sub-locks (lov_lock_enqueue_one()). Note
- * also, that it uses trylock to grab sub-lock mutex to avoid dead-lock. It
- * also has to handle CEF_ASYNC enqueue, when sub-locks enqueues have to be
- * done in parallel, rather than one after another (this is used for glimpse
- * locks, that cannot dead-lock).
+ * cl_lock is a cacheless data container for the requirements of locks to
+ * complete the IO. cl_lock is created before I/O starts and destroyed when the
+ * I/O is complete.
+ *
+ * cl_lock depends on LDLM lock to fulfill lock semantics. LDLM lock is attached
+ * to cl_lock at OSC layer. LDLM lock is still cacheable.
*
* INTERFACE AND USAGE
*
- * struct cl_lock_operations provide a number of call-backs that are invoked
- * when events of interest occurs. Layers can intercept and handle glimpse,
- * blocking, cancel ASTs and a reception of the reply from the server.
+ * Two major methods are supported for cl_lock: clo_enqueue and clo_cancel. A
+ * cl_lock is enqueued by cl_lock_request(), which will call clo_enqueue()
+ * methods for each layer to enqueue the lock. At the LOV layer, if a cl_lock
+ * consists of multiple sub cl_locks, each sub locks will be enqueued
+ * correspondingly. At OSC layer, the lock enqueue request will tend to reuse
+ * cached LDLM lock; otherwise a new LDLM lock will have to be requested from
+ * OST side.
*
- * One important difference with the old client locking model is that new
- * client has a representation for the top-lock, whereas in the old code only
- * sub-locks existed as real data structures and file-level locks are
- * represented by "request sets" that are created and destroyed on each and
- * every lock creation.
+ * cl_lock_cancel() must be called to release a cl_lock after use. clo_cancel()
+ * method will be called for each layer to release the resource held by this
+ * lock. At OSC layer, the reference count of LDLM lock, which is held at
+ * clo_enqueue time, is released.
*
- * Top-locks are cached, and can be found in the cache by the system calls. It
- * is possible that top-lock is in cache, but some of its sub-locks were
- * canceled and destroyed. In that case top-lock has to be enqueued again
- * before it can be used.
+ * LDLM lock can only be canceled if there is no cl_lock using it.
*
* Overall process of the locking during IO operation is as following:
*
@@ -1294,7 +1149,7 @@ static inline int __page_in_use(const struct cl_page *page, int refc)
*
* - when all locks are acquired, IO is performed;
*
- * - locks are released into cache.
+ * - locks are released after IO is complete.
*
* Striping introduces major additional complexity into locking. The
* fundamental problem is that it is generally unsafe to actively use (hold)
@@ -1316,16 +1171,6 @@ static inline int __page_in_use(const struct cl_page *page, int refc)
* buf is a part of memory mapped Lustre file, a lock or locks protecting buf
* has to be held together with the usual lock on [offset, offset + count].
*
- * As multi-stripe locks have to be allowed, it makes sense to cache them, so
- * that, for example, a sequence of O_APPEND writes can proceed quickly
- * without going down to the individual stripes to do lock matching. On the
- * other hand, multi-stripe locks shouldn't be used by normal read/write
- * calls. To achieve this, every layer can implement ->clo_fits_into() method,
- * that is called by lock matching code (cl_lock_lookup()), and that can be
- * used to selectively disable matching of certain locks for certain IOs. For
- * example, lov layer implements lov_lock_fits_into() that allow multi-stripe
- * locks to be matched only for truncates and O_APPEND writes.
- *
* Interaction with DLM
*
* In the expected setup, cl_lock is ultimately backed up by a collection of
@@ -1356,295 +1201,27 @@ struct cl_lock_descr {
__u32 cld_enq_flags;
};
-#define DDESCR "%s(%d):[%lu, %lu]"
+#define DDESCR "%s(%d):[%lu, %lu]:%x"
#define PDESCR(descr) \
cl_lock_mode_name((descr)->cld_mode), (descr)->cld_mode, \
- (descr)->cld_start, (descr)->cld_end
+ (descr)->cld_start, (descr)->cld_end, (descr)->cld_enq_flags
const char *cl_lock_mode_name(const enum cl_lock_mode mode);
/**
- * Lock state-machine states.
- *
- * \htmlonly
- * <pre>
- *
- * Possible state transitions:
- *
- * +------------------>NEW
- * | |
- * | | cl_enqueue_try()
- * | |
- * | cl_unuse_try() V
- * | +--------------QUEUING (*)
- * | | |
- * | | | cl_enqueue_try()
- * | | |
- * | | cl_unuse_try() V
- * sub-lock | +-------------ENQUEUED (*)
- * canceled | | |
- * | | | cl_wait_try()
- * | | |
- * | | (R)
- * | | |
- * | | V
- * | | HELD<---------+
- * | | | |
- * | | | | cl_use_try()
- * | | cl_unuse_try() | |
- * | | | |
- * | | V ---+
- * | +------------>INTRANSIT (D) <--+
- * | | |
- * | cl_unuse_try() | | cached lock found
- * | | | cl_use_try()
- * | | |
- * | V |
- * +------------------CACHED---------+
- * |
- * (C)
- * |
- * V
- * FREEING
- *
- * Legend:
- *
- * In states marked with (*) transition to the same state (i.e., a loop
- * in the diagram) is possible.
- *
- * (R) is the point where Receive call-back is invoked: it allows layers
- * to handle arrival of lock reply.
- *
- * (C) is the point where Cancellation call-back is invoked.
- *
- * (D) is the transit state which means the lock is changing.
- *
- * Transition to FREEING state is possible from any other state in the
- * diagram in case of unrecoverable error.
- * </pre>
- * \endhtmlonly
- *
- * These states are for individual cl_lock object. Top-lock and its sub-locks
- * can be in the different states. Another way to say this is that we have
- * nested state-machines.
- *
- * Separate QUEUING and ENQUEUED states are needed to support non-blocking
- * operation for locks with multiple sub-locks. Imagine lock on a file F, that
- * intersects 3 stripes S0, S1, and S2. To enqueue F client has to send
- * enqueue to S0, wait for its completion, then send enqueue for S1, wait for
- * its completion and at last enqueue lock for S2, and wait for its
- * completion. In that case, top-lock is in QUEUING state while S0, S1 are
- * handled, and is in ENQUEUED state after enqueue to S2 has been sent (note
- * that in this case, sub-locks move from state to state, and top-lock remains
- * in the same state).
- */
-enum cl_lock_state {
- /**
- * Lock that wasn't yet enqueued
- */
- CLS_NEW,
- /**
- * Enqueue is in progress, blocking for some intermediate interaction
- * with the other side.
- */
- CLS_QUEUING,
- /**
- * Lock is fully enqueued, waiting for server to reply when it is
- * granted.
- */
- CLS_ENQUEUED,
- /**
- * Lock granted, actively used by some IO.
- */
- CLS_HELD,
- /**
- * This state is used to mark the lock is being used, or unused.
- * We need this state because the lock may have several sublocks,
- * so it's impossible to have an atomic way to bring all sublocks
- * into CLS_HELD state at use case, or all sublocks to CLS_CACHED
- * at unuse case.
- * If a thread is referring to a lock, and it sees the lock is in this
- * state, it must wait for the lock.
- * See state diagram for details.
- */
- CLS_INTRANSIT,
- /**
- * Lock granted, not used.
- */
- CLS_CACHED,
- /**
- * Lock is being destroyed.
- */
- CLS_FREEING,
- CLS_NR
-};
-
-enum cl_lock_flags {
- /**
- * lock has been cancelled. This flag is never cleared once set (by
- * cl_lock_cancel0()).
- */
- CLF_CANCELLED = 1 << 0,
- /** cancellation is pending for this lock. */
- CLF_CANCELPEND = 1 << 1,
- /** destruction is pending for this lock. */
- CLF_DOOMED = 1 << 2,
- /** from enqueue RPC reply upcall. */
- CLF_FROM_UPCALL = 1 << 3,
-};
-
-/**
- * Lock closure.
- *
- * Lock closure is a collection of locks (both top-locks and sub-locks) that
- * might be updated in a result of an operation on a certain lock (which lock
- * this is a closure of).
- *
- * Closures are needed to guarantee dead-lock freedom in the presence of
- *
- * - nested state-machines (top-lock state-machine composed of sub-lock
- * state-machines), and
- *
- * - shared sub-locks.
- *
- * Specifically, many operations, such as lock enqueue, wait, unlock,
- * etc. start from a top-lock, and then operate on a sub-locks of this
- * top-lock, holding a top-lock mutex. When sub-lock state changes as a result
- * of such operation, this change has to be propagated to all top-locks that
- * share this sub-lock. Obviously, no natural lock ordering (e.g.,
- * top-to-bottom or bottom-to-top) captures this scenario, so try-locking has
- * to be used. Lock closure systematizes this try-and-repeat logic.
- */
-struct cl_lock_closure {
- /**
- * Lock that is mutexed when closure construction is started. When
- * closure in is `wait' mode (cl_lock_closure::clc_wait), mutex on
- * origin is released before waiting.
- */
- struct cl_lock *clc_origin;
- /**
- * List of enclosed locks, so far. Locks are linked here through
- * cl_lock::cll_inclosure.
- */
- struct list_head clc_list;
- /**
- * True iff closure is in a `wait' mode. This determines what
- * cl_lock_enclosure() does when a lock L to be added to the closure
- * is currently mutexed by some other thread.
- *
- * If cl_lock_closure::clc_wait is not set, then closure construction
- * fails with CLO_REPEAT immediately.
- *
- * In wait mode, cl_lock_enclosure() waits until next attempt to build
- * a closure might succeed. To this end it releases an origin mutex
- * (cl_lock_closure::clc_origin), that has to be the only lock mutex
- * owned by the current thread, and then waits on L mutex (by grabbing
- * it and immediately releasing), before returning CLO_REPEAT to the
- * caller.
- */
- int clc_wait;
- /** Number of locks in the closure. */
- int clc_nr;
-};
-
-/**
* Layered client lock.
*/
struct cl_lock {
- /** Reference counter. */
- atomic_t cll_ref;
/** List of slices. Immutable after creation. */
struct list_head cll_layers;
- /**
- * Linkage into cl_lock::cll_descr::cld_obj::coh_locks list. Protected
- * by cl_lock::cll_descr::cld_obj::coh_lock_guard.
- */
- struct list_head cll_linkage;
- /**
- * Parameters of this lock. Protected by
- * cl_lock::cll_descr::cld_obj::coh_lock_guard nested within
- * cl_lock::cll_guard. Modified only on lock creation and in
- * cl_lock_modify().
- */
+ /** lock attribute, extent, cl_object, etc. */
struct cl_lock_descr cll_descr;
- /** Protected by cl_lock::cll_guard. */
- enum cl_lock_state cll_state;
- /** signals state changes. */
- wait_queue_head_t cll_wq;
- /**
- * Recursive lock, most fields in cl_lock{} are protected by this.
- *
- * Locking rules: this mutex is never held across network
- * communication, except when lock is being canceled.
- *
- * Lock ordering: a mutex of a sub-lock is taken first, then a mutex
- * on a top-lock. Other direction is implemented through a
- * try-lock-repeat loop. Mutices of unrelated locks can be taken only
- * by try-locking.
- *
- * \see osc_lock_enqueue_wait(), lov_lock_cancel(), lov_sublock_wait().
- */
- struct mutex cll_guard;
- struct task_struct *cll_guarder;
- int cll_depth;
-
- /**
- * the owner for INTRANSIT state
- */
- struct task_struct *cll_intransit_owner;
- int cll_error;
- /**
- * Number of holds on a lock. A hold prevents a lock from being
- * canceled and destroyed. Protected by cl_lock::cll_guard.
- *
- * \see cl_lock_hold(), cl_lock_unhold(), cl_lock_release()
- */
- int cll_holds;
- /**
- * Number of lock users. Valid in cl_lock_state::CLS_HELD state
- * only. Lock user pins lock in CLS_HELD state. Protected by
- * cl_lock::cll_guard.
- *
- * \see cl_wait(), cl_unuse().
- */
- int cll_users;
- /**
- * Flag bit-mask. Values from enum cl_lock_flags. Updates are
- * protected by cl_lock::cll_guard.
- */
- unsigned long cll_flags;
- /**
- * A linkage into a list of locks in a closure.
- *
- * \see cl_lock_closure
- */
- struct list_head cll_inclosure;
- /**
- * Confict lock at queuing time.
- */
- struct cl_lock *cll_conflict;
- /**
- * A list of references to this lock, for debugging.
- */
- struct lu_ref cll_reference;
- /**
- * A list of holds on this lock, for debugging.
- */
- struct lu_ref cll_holders;
- /**
- * A reference for cl_lock::cll_descr::cld_obj. For debugging.
- */
- struct lu_ref_link cll_obj_ref;
-#ifdef CONFIG_LOCKDEP
- /* "dep_map" name is assumed by lockdep.h macros. */
- struct lockdep_map dep_map;
-#endif
};
/**
* Per-layer part of cl_lock
*
- * \see ccc_lock, lov_lock, lovsub_lock, osc_lock
+ * \see vvp_lock, lov_lock, lovsub_lock, osc_lock
*/
struct cl_lock_slice {
struct cl_lock *cls_lock;
@@ -1658,174 +1235,36 @@ struct cl_lock_slice {
};
/**
- * Possible (non-error) return values of ->clo_{enqueue,wait,unlock}().
- *
- * NOTE: lov_subresult() depends on ordering here.
- */
-enum cl_lock_transition {
- /** operation cannot be completed immediately. Wait for state change. */
- CLO_WAIT = 1,
- /** operation had to release lock mutex, restart. */
- CLO_REPEAT = 2,
- /** lower layer re-enqueued. */
- CLO_REENQUEUED = 3,
-};
-
-/**
*
* \see vvp_lock_ops, lov_lock_ops, lovsub_lock_ops, osc_lock_ops
*/
struct cl_lock_operations {
- /**
- * \name statemachine
- *
- * State machine transitions. These 3 methods are called to transfer
- * lock from one state to another, as described in the commentary
- * above enum #cl_lock_state.
- *
- * \retval 0 this layer has nothing more to do to before
- * transition to the target state happens;
- *
- * \retval CLO_REPEAT method had to release and re-acquire cl_lock
- * mutex, repeat invocation of transition method
- * across all layers;
- *
- * \retval CLO_WAIT this layer cannot move to the target state
- * immediately, as it has to wait for certain event
- * (e.g., the communication with the server). It
- * is guaranteed, that when the state transfer
- * becomes possible, cl_lock::cll_wq wait-queue
- * is signaled. Caller can wait for this event by
- * calling cl_lock_state_wait();
- *
- * \retval -ve failure, abort state transition, move the lock
- * into cl_lock_state::CLS_FREEING state, and set
- * cl_lock::cll_error.
- *
- * Once all layers voted to agree to transition (by returning 0), lock
- * is moved into corresponding target state. All state transition
- * methods are optional.
- */
/** @{ */
/**
* Attempts to enqueue the lock. Called top-to-bottom.
*
- * \see ccc_lock_enqueue(), lov_lock_enqueue(), lovsub_lock_enqueue(),
+ * \retval 0 this layer has enqueued the lock successfully
+ * \retval >0 this layer has enqueued the lock, but need to wait on
+ * @anchor for resources
+ * \retval -ve failure
+ *
+ * \see vvp_lock_enqueue(), lov_lock_enqueue(), lovsub_lock_enqueue(),
* \see osc_lock_enqueue()
*/
int (*clo_enqueue)(const struct lu_env *env,
const struct cl_lock_slice *slice,
- struct cl_io *io, __u32 enqflags);
- /**
- * Attempts to wait for enqueue result. Called top-to-bottom.
- *
- * \see ccc_lock_wait(), lov_lock_wait(), osc_lock_wait()
- */
- int (*clo_wait)(const struct lu_env *env,
- const struct cl_lock_slice *slice);
+ struct cl_io *io, struct cl_sync_io *anchor);
/**
- * Attempts to unlock the lock. Called bottom-to-top. In addition to
- * usual return values of lock state-machine methods, this can return
- * -ESTALE to indicate that lock cannot be returned to the cache, and
- * has to be re-initialized.
- * unuse is a one-shot operation, so it must NOT return CLO_WAIT.
- *
- * \see ccc_lock_unuse(), lov_lock_unuse(), osc_lock_unuse()
- */
- int (*clo_unuse)(const struct lu_env *env,
- const struct cl_lock_slice *slice);
- /**
- * Notifies layer that cached lock is started being used.
- *
- * \pre lock->cll_state == CLS_CACHED
- *
- * \see lov_lock_use(), osc_lock_use()
- */
- int (*clo_use)(const struct lu_env *env,
- const struct cl_lock_slice *slice);
- /** @} statemachine */
- /**
- * A method invoked when lock state is changed (as a result of state
- * transition). This is used, for example, to track when the state of
- * a sub-lock changes, to propagate this change to the corresponding
- * top-lock. Optional
- *
- * \see lovsub_lock_state()
- */
- void (*clo_state)(const struct lu_env *env,
- const struct cl_lock_slice *slice,
- enum cl_lock_state st);
- /**
- * Returns true, iff given lock is suitable for the given io, idea
- * being, that there are certain "unsafe" locks, e.g., ones acquired
- * for O_APPEND writes, that we don't want to re-use for a normal
- * write, to avoid the danger of cascading evictions. Optional. Runs
- * under cl_object_header::coh_lock_guard.
- *
- * XXX this should take more information about lock needed by
- * io. Probably lock description or something similar.
- *
- * \see lov_fits_into()
- */
- int (*clo_fits_into)(const struct lu_env *env,
- const struct cl_lock_slice *slice,
- const struct cl_lock_descr *need,
- const struct cl_io *io);
- /**
- * \name ast
- * Asynchronous System Traps. All of then are optional, all are
- * executed bottom-to-top.
- */
- /** @{ */
-
- /**
- * Cancellation callback. Cancel a lock voluntarily, or under
- * the request of server.
+ * Cancel a lock, release its DLM lock ref, while does not cancel the
+ * DLM lock
*/
void (*clo_cancel)(const struct lu_env *env,
const struct cl_lock_slice *slice);
- /**
- * Lock weighting ast. Executed to estimate how precious this lock
- * is. The sum of results across all layers is used to determine
- * whether lock worth keeping in cache given present memory usage.
- *
- * \see osc_lock_weigh(), vvp_lock_weigh(), lovsub_lock_weigh().
- */
- unsigned long (*clo_weigh)(const struct lu_env *env,
- const struct cl_lock_slice *slice);
- /** @} ast */
-
- /**
- * \see lovsub_lock_closure()
- */
- int (*clo_closure)(const struct lu_env *env,
- const struct cl_lock_slice *slice,
- struct cl_lock_closure *closure);
- /**
- * Executed bottom-to-top when lock description changes (e.g., as a
- * result of server granting more generous lock than was requested).
- *
- * \see lovsub_lock_modify()
- */
- int (*clo_modify)(const struct lu_env *env,
- const struct cl_lock_slice *slice,
- const struct cl_lock_descr *updated);
- /**
- * Notifies layers (bottom-to-top) that lock is going to be
- * destroyed. Responsibility of layers is to prevent new references on
- * this lock from being acquired once this method returns.
- *
- * This can be called multiple times due to the races.
- *
- * \see cl_lock_delete()
- * \see osc_lock_delete(), lovsub_lock_delete()
- */
- void (*clo_delete)(const struct lu_env *env,
- const struct cl_lock_slice *slice);
+ /** @} */
/**
* Destructor. Frees resources and the slice.
*
- * \see ccc_lock_fini(), lov_lock_fini(), lovsub_lock_fini(),
+ * \see vvp_lock_fini(), lov_lock_fini(), lovsub_lock_fini(),
* \see osc_lock_fini()
*/
void (*clo_fini)(const struct lu_env *env, struct cl_lock_slice *slice);
@@ -2016,7 +1455,7 @@ enum cl_io_state {
* This is usually embedded into layer session data, rather than allocated
* dynamically.
*
- * \see vvp_io, lov_io, osc_io, ccc_io
+ * \see vvp_io, lov_io, osc_io
*/
struct cl_io_slice {
struct cl_io *cis_io;
@@ -2031,6 +1470,8 @@ struct cl_io_slice {
struct list_head cis_linkage;
};
+typedef void (*cl_commit_cbt)(const struct lu_env *, struct cl_io *,
+ struct cl_page *);
/**
* Per-layer io operations.
* \see vvp_io_ops, lov_io_ops, lovsub_io_ops, osc_io_ops
@@ -2114,7 +1555,7 @@ struct cl_io_operations {
void (*cio_fini)(const struct lu_env *env,
const struct cl_io_slice *slice);
} op[CIT_OP_NR];
- struct {
+
/**
* Submit pages from \a queue->c2_qin for IO, and move
* successfully submitted pages into \a queue->c2_qout. Return
@@ -2127,7 +1568,15 @@ struct cl_io_operations {
const struct cl_io_slice *slice,
enum cl_req_type crt,
struct cl_2queue *queue);
- } req_op[CRT_NR];
+ /**
+ * Queue async page for write.
+ * The difference between cio_submit and cio_queue is that
+ * cio_submit is for urgent request.
+ */
+ int (*cio_commit_async)(const struct lu_env *env,
+ const struct cl_io_slice *slice,
+ struct cl_page_list *queue, int from, int to,
+ cl_commit_cbt cb);
/**
* Read missing page.
*
@@ -2140,31 +1589,6 @@ struct cl_io_operations {
const struct cl_io_slice *slice,
const struct cl_page_slice *page);
/**
- * Prepare write of a \a page. Called bottom-to-top by a top-level
- * cl_io_operations::op[CIT_WRITE]::cio_start() to prepare page for
- * get data from user-level buffer.
- *
- * \pre io->ci_type == CIT_WRITE
- *
- * \see vvp_io_prepare_write(), lov_io_prepare_write(),
- * osc_io_prepare_write().
- */
- int (*cio_prepare_write)(const struct lu_env *env,
- const struct cl_io_slice *slice,
- const struct cl_page_slice *page,
- unsigned from, unsigned to);
- /**
- *
- * \pre io->ci_type == CIT_WRITE
- *
- * \see vvp_io_commit_write(), lov_io_commit_write(),
- * osc_io_commit_write().
- */
- int (*cio_commit_write)(const struct lu_env *env,
- const struct cl_io_slice *slice,
- const struct cl_page_slice *page,
- unsigned from, unsigned to);
- /**
* Optional debugging helper. Print given io slice.
*/
int (*cio_print)(const struct lu_env *env, void *cookie,
@@ -2216,9 +1640,13 @@ enum cl_enq_flags {
*/
CEF_AGL = 0x00000020,
/**
+ * enqueue a lock to test DLM lock existence.
+ */
+ CEF_PEEK = 0x00000040,
+ /**
* mask of enq_flags.
*/
- CEF_MASK = 0x0000003f,
+ CEF_MASK = 0x0000007f,
};
/**
@@ -2228,12 +1656,12 @@ enum cl_enq_flags {
struct cl_io_lock_link {
/** linkage into one of cl_lockset lists. */
struct list_head cill_linkage;
- struct cl_lock_descr cill_descr;
- struct cl_lock *cill_lock;
+ struct cl_lock cill_lock;
/** optional destructor */
void (*cill_fini)(const struct lu_env *env,
struct cl_io_lock_link *link);
};
+#define cill_descr cill_lock.cll_descr
/**
* Lock-set represents a collection of locks, that io needs at a
@@ -2267,8 +1695,6 @@ struct cl_io_lock_link {
struct cl_lockset {
/** locks to be acquired. */
struct list_head cls_todo;
- /** locks currently being processed. */
- struct list_head cls_curr;
/** locks acquired. */
struct list_head cls_done;
};
@@ -2632,9 +2058,7 @@ struct cl_site {
* and top-locks (and top-pages) are accounted here.
*/
struct cache_stats cs_pages;
- struct cache_stats cs_locks;
atomic_t cs_pages_state[CPS_NR];
- atomic_t cs_locks_state[CLS_NR];
};
int cl_site_init(struct cl_site *s, struct cl_device *top);
@@ -2725,7 +2149,7 @@ static inline void cl_device_fini(struct cl_device *d)
}
void cl_page_slice_add(struct cl_page *page, struct cl_page_slice *slice,
- struct cl_object *obj,
+ struct cl_object *obj, pgoff_t index,
const struct cl_page_operations *ops);
void cl_lock_slice_add(struct cl_lock *lock, struct cl_lock_slice *slice,
struct cl_object *obj,
@@ -2758,7 +2182,7 @@ int cl_object_glimpse(const struct lu_env *env, struct cl_object *obj,
struct ost_lvb *lvb);
int cl_conf_set(const struct lu_env *env, struct cl_object *obj,
const struct cl_object_conf *conf);
-void cl_object_prune(const struct lu_env *env, struct cl_object *obj);
+int cl_object_prune(const struct lu_env *env, struct cl_object *obj);
void cl_object_kill(const struct lu_env *env, struct cl_object *obj);
/**
@@ -2772,7 +2196,7 @@ static inline int cl_object_same(struct cl_object *o0, struct cl_object *o1)
static inline void cl_object_page_init(struct cl_object *clob, int size)
{
clob->co_slice_off = cl_object_header(clob)->coh_page_bufsize;
- cl_object_header(clob)->coh_page_bufsize += ALIGN(size, 8);
+ cl_object_header(clob)->coh_page_bufsize += cfs_size_round(size);
}
static inline void *cl_object_page_slice(struct cl_object *clob,
@@ -2781,6 +2205,16 @@ static inline void *cl_object_page_slice(struct cl_object *clob,
return (void *)((char *)page + clob->co_slice_off);
}
+/**
+ * Return refcount of cl_object.
+ */
+static inline int cl_object_refc(struct cl_object *clob)
+{
+ struct lu_object_header *header = clob->co_lu.lo_header;
+
+ return atomic_read(&header->loh_ref);
+}
+
/** @} cl_object */
/** \defgroup cl_page cl_page
@@ -2794,28 +2228,20 @@ enum {
};
/* callback of cl_page_gang_lookup() */
-typedef int (*cl_page_gang_cb_t) (const struct lu_env *, struct cl_io *,
- struct cl_page *, void *);
-int cl_page_gang_lookup(const struct lu_env *env, struct cl_object *obj,
- struct cl_io *io, pgoff_t start, pgoff_t end,
- cl_page_gang_cb_t cb, void *cbdata);
-struct cl_page *cl_page_lookup(struct cl_object_header *hdr, pgoff_t index);
struct cl_page *cl_page_find(const struct lu_env *env, struct cl_object *obj,
pgoff_t idx, struct page *vmpage,
enum cl_page_type type);
-struct cl_page *cl_page_find_sub(const struct lu_env *env,
- struct cl_object *obj,
- pgoff_t idx, struct page *vmpage,
- struct cl_page *parent);
+struct cl_page *cl_page_alloc(const struct lu_env *env,
+ struct cl_object *o, pgoff_t ind,
+ struct page *vmpage,
+ enum cl_page_type type);
void cl_page_get(struct cl_page *page);
void cl_page_put(const struct lu_env *env, struct cl_page *page);
void cl_page_print(const struct lu_env *env, void *cookie, lu_printer_t printer,
const struct cl_page *pg);
void cl_page_header_print(const struct lu_env *env, void *cookie,
lu_printer_t printer, const struct cl_page *pg);
-struct page *cl_page_vmpage(const struct lu_env *env, struct cl_page *page);
struct cl_page *cl_vmpage_page(struct page *vmpage, struct cl_object *obj);
-struct cl_page *cl_page_top(struct cl_page *page);
const struct cl_page_slice *cl_page_at(const struct cl_page *page,
const struct lu_device_type *dtype);
@@ -2872,12 +2298,10 @@ int cl_page_flush(const struct lu_env *env, struct cl_io *io,
void cl_page_discard(const struct lu_env *env, struct cl_io *io,
struct cl_page *pg);
void cl_page_delete(const struct lu_env *env, struct cl_page *pg);
-int cl_page_unmap(const struct lu_env *env, struct cl_io *io,
- struct cl_page *pg);
int cl_page_is_vmlocked(const struct lu_env *env, const struct cl_page *pg);
void cl_page_export(const struct lu_env *env, struct cl_page *pg, int uptodate);
int cl_page_is_under_lock(const struct lu_env *env, struct cl_io *io,
- struct cl_page *page);
+ struct cl_page *page, pgoff_t *max_index);
loff_t cl_offset(const struct cl_object *obj, pgoff_t idx);
pgoff_t cl_index(const struct cl_object *obj, loff_t offset);
int cl_page_size(const struct cl_object *obj);
@@ -2890,138 +2314,74 @@ void cl_lock_descr_print(const struct lu_env *env, void *cookie,
const struct cl_lock_descr *descr);
/* @} helper */
-/** @} cl_page */
-
-/** \defgroup cl_lock cl_lock
- * @{
+/**
+ * Data structure managing a client's cached pages. A count of
+ * "unstable" pages is maintained, and an LRU of clean pages is
+ * maintained. "unstable" pages are pages pinned by the ptlrpc
+ * layer for recovery purposes.
*/
+struct cl_client_cache {
+ /**
+ * # of client cache refcount
+ * # of users (OSCs) + 2 (held by llite and lov)
+ */
+ atomic_t ccc_users;
+ /**
+ * # of threads are doing shrinking
+ */
+ unsigned int ccc_lru_shrinkers;
+ /**
+ * # of LRU entries available
+ */
+ atomic_t ccc_lru_left;
+ /**
+ * List of entities(OSCs) for this LRU cache
+ */
+ struct list_head ccc_lru;
+ /**
+ * Max # of LRU entries
+ */
+ unsigned long ccc_lru_max;
+ /**
+ * Lock to protect ccc_lru list
+ */
+ spinlock_t ccc_lru_lock;
+ /**
+ * # of unstable pages for this mount point
+ */
+ atomic_t ccc_unstable_nr;
+ /**
+ * Waitq for awaiting unstable pages to reach zero.
+ * Used at umounting time and signaled on BRW commit
+ */
+ wait_queue_head_t ccc_unstable_waitq;
-struct cl_lock *cl_lock_hold(const struct lu_env *env, const struct cl_io *io,
- const struct cl_lock_descr *need,
- const char *scope, const void *source);
-struct cl_lock *cl_lock_peek(const struct lu_env *env, const struct cl_io *io,
- const struct cl_lock_descr *need,
- const char *scope, const void *source);
-struct cl_lock *cl_lock_request(const struct lu_env *env, struct cl_io *io,
- const struct cl_lock_descr *need,
- const char *scope, const void *source);
-struct cl_lock *cl_lock_at_pgoff(const struct lu_env *env,
- struct cl_object *obj, pgoff_t index,
- struct cl_lock *except, int pending,
- int canceld);
-static inline struct cl_lock *cl_lock_at_page(const struct lu_env *env,
- struct cl_object *obj,
- struct cl_page *page,
- struct cl_lock *except,
- int pending, int canceld)
-{
- LASSERT(cl_object_header(obj) == cl_object_header(page->cp_obj));
- return cl_lock_at_pgoff(env, obj, page->cp_index, except,
- pending, canceld);
-}
-
-const struct cl_lock_slice *cl_lock_at(const struct cl_lock *lock,
- const struct lu_device_type *dtype);
-
-void cl_lock_get(struct cl_lock *lock);
-void cl_lock_get_trust(struct cl_lock *lock);
-void cl_lock_put(const struct lu_env *env, struct cl_lock *lock);
-void cl_lock_hold_add(const struct lu_env *env, struct cl_lock *lock,
- const char *scope, const void *source);
-void cl_lock_hold_release(const struct lu_env *env, struct cl_lock *lock,
- const char *scope, const void *source);
-void cl_lock_unhold(const struct lu_env *env, struct cl_lock *lock,
- const char *scope, const void *source);
-void cl_lock_release(const struct lu_env *env, struct cl_lock *lock,
- const char *scope, const void *source);
-void cl_lock_user_add(const struct lu_env *env, struct cl_lock *lock);
-void cl_lock_user_del(const struct lu_env *env, struct cl_lock *lock);
+};
-int cl_lock_is_intransit(struct cl_lock *lock);
+/**
+ * cl_cache functions
+ */
+struct cl_client_cache *cl_cache_init(unsigned long lru_page_max);
+void cl_cache_incref(struct cl_client_cache *cache);
+void cl_cache_decref(struct cl_client_cache *cache);
-int cl_lock_enqueue_wait(const struct lu_env *env, struct cl_lock *lock,
- int keep_mutex);
+/** @} cl_page */
-/** \name statemachine statemachine
- * Interface to lock state machine consists of 3 parts:
- *
- * - "try" functions that attempt to effect a state transition. If state
- * transition is not possible right now (e.g., if it has to wait for some
- * asynchronous event to occur), these functions return
- * cl_lock_transition::CLO_WAIT.
- *
- * - "non-try" functions that implement synchronous blocking interface on
- * top of non-blocking "try" functions. These functions repeatedly call
- * corresponding "try" versions, and if state transition is not possible
- * immediately, wait for lock state change.
- *
- * - methods from cl_lock_operations, called by "try" functions. Lock can
- * be advanced to the target state only when all layers voted that they
- * are ready for this transition. "Try" functions call methods under lock
- * mutex. If a layer had to release a mutex, it re-acquires it and returns
- * cl_lock_transition::CLO_REPEAT, causing "try" function to call all
- * layers again.
- *
- * TRY NON-TRY METHOD FINAL STATE
- *
- * cl_enqueue_try() cl_enqueue() cl_lock_operations::clo_enqueue() CLS_ENQUEUED
- *
- * cl_wait_try() cl_wait() cl_lock_operations::clo_wait() CLS_HELD
- *
- * cl_unuse_try() cl_unuse() cl_lock_operations::clo_unuse() CLS_CACHED
- *
- * cl_use_try() NONE cl_lock_operations::clo_use() CLS_HELD
- *
+/** \defgroup cl_lock cl_lock
* @{
*/
-int cl_wait(const struct lu_env *env, struct cl_lock *lock);
-void cl_unuse(const struct lu_env *env, struct cl_lock *lock);
-int cl_enqueue_try(const struct lu_env *env, struct cl_lock *lock,
- struct cl_io *io, __u32 flags);
-int cl_unuse_try(const struct lu_env *env, struct cl_lock *lock);
-int cl_wait_try(const struct lu_env *env, struct cl_lock *lock);
-int cl_use_try(const struct lu_env *env, struct cl_lock *lock, int atomic);
-
-/** @} statemachine */
-
-void cl_lock_signal(const struct lu_env *env, struct cl_lock *lock);
-int cl_lock_state_wait(const struct lu_env *env, struct cl_lock *lock);
-void cl_lock_state_set(const struct lu_env *env, struct cl_lock *lock,
- enum cl_lock_state state);
-int cl_queue_match(const struct list_head *queue,
- const struct cl_lock_descr *need);
-
-void cl_lock_mutex_get(const struct lu_env *env, struct cl_lock *lock);
-void cl_lock_mutex_put(const struct lu_env *env, struct cl_lock *lock);
-int cl_lock_is_mutexed(struct cl_lock *lock);
-int cl_lock_nr_mutexed(const struct lu_env *env);
-int cl_lock_discard_pages(const struct lu_env *env, struct cl_lock *lock);
-int cl_lock_ext_match(const struct cl_lock_descr *has,
- const struct cl_lock_descr *need);
-int cl_lock_descr_match(const struct cl_lock_descr *has,
- const struct cl_lock_descr *need);
-int cl_lock_mode_match(enum cl_lock_mode has, enum cl_lock_mode need);
-int cl_lock_modify(const struct lu_env *env, struct cl_lock *lock,
- const struct cl_lock_descr *desc);
-
-void cl_lock_closure_init(const struct lu_env *env,
- struct cl_lock_closure *closure,
- struct cl_lock *origin, int wait);
-void cl_lock_closure_fini(struct cl_lock_closure *closure);
-int cl_lock_closure_build(const struct lu_env *env, struct cl_lock *lock,
- struct cl_lock_closure *closure);
-void cl_lock_disclosure(const struct lu_env *env,
- struct cl_lock_closure *closure);
-int cl_lock_enclosure(const struct lu_env *env, struct cl_lock *lock,
- struct cl_lock_closure *closure);
-
+int cl_lock_request(const struct lu_env *env, struct cl_io *io,
+ struct cl_lock *lock);
+int cl_lock_init(const struct lu_env *env, struct cl_lock *lock,
+ const struct cl_io *io);
+void cl_lock_fini(const struct lu_env *env, struct cl_lock *lock);
+const struct cl_lock_slice *cl_lock_at(const struct cl_lock *lock,
+ const struct lu_device_type *dtype);
+void cl_lock_release(const struct lu_env *env, struct cl_lock *lock);
+int cl_lock_enqueue(const struct lu_env *env, struct cl_io *io,
+ struct cl_lock *lock, struct cl_sync_io *anchor);
void cl_lock_cancel(const struct lu_env *env, struct cl_lock *lock);
-void cl_lock_delete(const struct lu_env *env, struct cl_lock *lock);
-void cl_lock_error(const struct lu_env *env, struct cl_lock *lock, int error);
-void cl_locks_prune(const struct lu_env *env, struct cl_object *obj, int wait);
-
-unsigned long cl_lock_weigh(const struct lu_env *env, struct cl_lock *lock);
/** @} cl_lock */
@@ -3050,15 +2410,14 @@ int cl_io_lock_alloc_add(const struct lu_env *env, struct cl_io *io,
struct cl_lock_descr *descr);
int cl_io_read_page(const struct lu_env *env, struct cl_io *io,
struct cl_page *page);
-int cl_io_prepare_write(const struct lu_env *env, struct cl_io *io,
- struct cl_page *page, unsigned from, unsigned to);
-int cl_io_commit_write(const struct lu_env *env, struct cl_io *io,
- struct cl_page *page, unsigned from, unsigned to);
int cl_io_submit_rw(const struct lu_env *env, struct cl_io *io,
enum cl_req_type iot, struct cl_2queue *queue);
int cl_io_submit_sync(const struct lu_env *env, struct cl_io *io,
enum cl_req_type iot, struct cl_2queue *queue,
long timeout);
+int cl_io_commit_async(const struct lu_env *env, struct cl_io *io,
+ struct cl_page_list *queue, int from, int to,
+ cl_commit_cbt cb);
int cl_io_is_going(const struct lu_env *env);
/**
@@ -3114,6 +2473,12 @@ static inline struct cl_page *cl_page_list_last(struct cl_page_list *plist)
return list_entry(plist->pl_pages.prev, struct cl_page, cp_batch);
}
+static inline struct cl_page *cl_page_list_first(struct cl_page_list *plist)
+{
+ LASSERT(plist->pl_nr > 0);
+ return list_entry(plist->pl_pages.next, struct cl_page, cp_batch);
+}
+
/**
* Iterate over pages in a page list.
*/
@@ -3130,9 +2495,14 @@ void cl_page_list_init(struct cl_page_list *plist);
void cl_page_list_add(struct cl_page_list *plist, struct cl_page *page);
void cl_page_list_move(struct cl_page_list *dst, struct cl_page_list *src,
struct cl_page *page);
+void cl_page_list_move_head(struct cl_page_list *dst, struct cl_page_list *src,
+ struct cl_page *page);
void cl_page_list_splice(struct cl_page_list *list, struct cl_page_list *head);
+void cl_page_list_del(const struct lu_env *env, struct cl_page_list *plist,
+ struct cl_page *page);
void cl_page_list_disown(const struct lu_env *env,
struct cl_io *io, struct cl_page_list *plist);
+void cl_page_list_fini(const struct lu_env *env, struct cl_page_list *plist);
void cl_2queue_init(struct cl_2queue *queue);
void cl_2queue_disown(const struct lu_env *env,
@@ -3177,13 +2547,18 @@ struct cl_sync_io {
atomic_t csi_barrier;
/** completion to be signaled when transfer is complete. */
wait_queue_head_t csi_waitq;
+ /** callback to invoke when this IO is finished */
+ void (*csi_end_io)(const struct lu_env *,
+ struct cl_sync_io *);
};
-void cl_sync_io_init(struct cl_sync_io *anchor, int nrpages);
-int cl_sync_io_wait(const struct lu_env *env, struct cl_io *io,
- struct cl_page_list *queue, struct cl_sync_io *anchor,
+void cl_sync_io_init(struct cl_sync_io *anchor, int nr,
+ void (*end)(const struct lu_env *, struct cl_sync_io *));
+int cl_sync_io_wait(const struct lu_env *env, struct cl_sync_io *anchor,
long timeout);
-void cl_sync_io_note(struct cl_sync_io *anchor, int ioret);
+void cl_sync_io_note(const struct lu_env *env, struct cl_sync_io *anchor,
+ int ioret);
+void cl_sync_io_end(const struct lu_env *env, struct cl_sync_io *anchor);
/** @} cl_sync_io */
@@ -3241,6 +2616,9 @@ void *cl_env_reenter(void);
void cl_env_reexit(void *cookie);
void cl_env_implant(struct lu_env *env, int *refcheck);
void cl_env_unplant(struct lu_env *env, int *refcheck);
+unsigned int cl_env_cache_purge(unsigned int nr);
+struct lu_env *cl_env_percpu_get(void);
+void cl_env_percpu_put(struct lu_env *env);
/** @} cl_env */
diff --git a/drivers/staging/lustre/lustre/include/interval_tree.h b/drivers/staging/lustre/lustre/include/interval_tree.h
index f6df3f33e770..4a15228b5570 100644
--- a/drivers/staging/lustre/lustre/include/interval_tree.h
+++ b/drivers/staging/lustre/lustre/include/interval_tree.h
@@ -15,11 +15,7 @@
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
*
* GPL HEADER END
*/
diff --git a/drivers/staging/lustre/lustre/include/lclient.h b/drivers/staging/lustre/lustre/include/lclient.h
deleted file mode 100644
index 5d839a9f789f..000000000000
--- a/drivers/staging/lustre/lustre/include/lclient.h
+++ /dev/null
@@ -1,408 +0,0 @@
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2012, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * Definitions shared between vvp and liblustre, and other clients in the
- * future.
- *
- * Author: Oleg Drokin <oleg.drokin@sun.com>
- * Author: Nikita Danilov <nikita.danilov@sun.com>
- */
-
-#ifndef LCLIENT_H
-#define LCLIENT_H
-
-blkcnt_t dirty_cnt(struct inode *inode);
-
-int cl_glimpse_size0(struct inode *inode, int agl);
-int cl_glimpse_lock(const struct lu_env *env, struct cl_io *io,
- struct inode *inode, struct cl_object *clob, int agl);
-
-static inline int cl_glimpse_size(struct inode *inode)
-{
- return cl_glimpse_size0(inode, 0);
-}
-
-static inline int cl_agl(struct inode *inode)
-{
- return cl_glimpse_size0(inode, 1);
-}
-
-/**
- * Locking policy for setattr.
- */
-enum ccc_setattr_lock_type {
- /** Locking is done by server */
- SETATTR_NOLOCK,
- /** Extent lock is enqueued */
- SETATTR_EXTENT_LOCK,
- /** Existing local extent lock is used */
- SETATTR_MATCH_LOCK
-};
-
-/**
- * IO state private to vvp or slp layers.
- */
-struct ccc_io {
- /** super class */
- struct cl_io_slice cui_cl;
- struct cl_io_lock_link cui_link;
- /**
- * I/O vector information to or from which read/write is going.
- */
- struct iov_iter *cui_iter;
- /**
- * Total size for the left IO.
- */
- size_t cui_tot_count;
-
- union {
- struct {
- enum ccc_setattr_lock_type cui_local_lock;
- } setattr;
- } u;
- /**
- * True iff io is processing glimpse right now.
- */
- int cui_glimpse;
- /**
- * Layout version when this IO is initialized
- */
- __u32 cui_layout_gen;
- /**
- * File descriptor against which IO is done.
- */
- struct ll_file_data *cui_fd;
- struct kiocb *cui_iocb;
-};
-
-/**
- * True, if \a io is a normal io, False for splice_{read,write}.
- * must be implemented in arch specific code.
- */
-int cl_is_normalio(const struct lu_env *env, const struct cl_io *io);
-
-extern struct lu_context_key ccc_key;
-extern struct lu_context_key ccc_session_key;
-
-struct ccc_thread_info {
- struct cl_lock_descr cti_descr;
- struct cl_io cti_io;
- struct cl_attr cti_attr;
-};
-
-static inline struct ccc_thread_info *ccc_env_info(const struct lu_env *env)
-{
- struct ccc_thread_info *info;
-
- info = lu_context_key_get(&env->le_ctx, &ccc_key);
- LASSERT(info);
- return info;
-}
-
-static inline struct cl_attr *ccc_env_thread_attr(const struct lu_env *env)
-{
- struct cl_attr *attr = &ccc_env_info(env)->cti_attr;
-
- memset(attr, 0, sizeof(*attr));
- return attr;
-}
-
-static inline struct cl_io *ccc_env_thread_io(const struct lu_env *env)
-{
- struct cl_io *io = &ccc_env_info(env)->cti_io;
-
- memset(io, 0, sizeof(*io));
- return io;
-}
-
-struct ccc_session {
- struct ccc_io cs_ios;
-};
-
-static inline struct ccc_session *ccc_env_session(const struct lu_env *env)
-{
- struct ccc_session *ses;
-
- ses = lu_context_key_get(env->le_ses, &ccc_session_key);
- LASSERT(ses);
- return ses;
-}
-
-static inline struct ccc_io *ccc_env_io(const struct lu_env *env)
-{
- return &ccc_env_session(env)->cs_ios;
-}
-
-/**
- * ccc-private object state.
- */
-struct ccc_object {
- struct cl_object_header cob_header;
- struct cl_object cob_cl;
- struct inode *cob_inode;
-
- /**
- * A list of dirty pages pending IO in the cache. Used by
- * SOM. Protected by ll_inode_info::lli_lock.
- *
- * \see ccc_page::cpg_pending_linkage
- */
- struct list_head cob_pending_list;
-
- /**
- * Access this counter is protected by inode->i_sem. Now that
- * the lifetime of transient pages must be covered by inode sem,
- * we don't need to hold any lock..
- */
- int cob_transient_pages;
- /**
- * Number of outstanding mmaps on this file.
- *
- * \see ll_vm_open(), ll_vm_close().
- */
- atomic_t cob_mmap_cnt;
-
- /**
- * various flags
- * cob_discard_page_warned
- * if pages belonging to this object are discarded when a client
- * is evicted, some debug info will be printed, this flag will be set
- * during processing the first discarded page, then avoid flooding
- * debug message for lots of discarded pages.
- *
- * \see ll_dirty_page_discard_warn.
- */
- unsigned int cob_discard_page_warned:1;
-};
-
-/**
- * ccc-private page state.
- */
-struct ccc_page {
- struct cl_page_slice cpg_cl;
- int cpg_defer_uptodate;
- int cpg_ra_used;
- int cpg_write_queued;
- /**
- * Non-empty iff this page is already counted in
- * ccc_object::cob_pending_list. Protected by
- * ccc_object::cob_pending_guard. This list is only used as a flag,
- * that is, never iterated through, only checked for list_empty(), but
- * having a list is useful for debugging.
- */
- struct list_head cpg_pending_linkage;
- /** VM page */
- struct page *cpg_page;
-};
-
-static inline struct ccc_page *cl2ccc_page(const struct cl_page_slice *slice)
-{
- return container_of(slice, struct ccc_page, cpg_cl);
-}
-
-struct ccc_device {
- struct cl_device cdv_cl;
- struct super_block *cdv_sb;
- struct cl_device *cdv_next;
-};
-
-struct ccc_lock {
- struct cl_lock_slice clk_cl;
-};
-
-struct ccc_req {
- struct cl_req_slice crq_cl;
-};
-
-void *ccc_key_init (const struct lu_context *ctx,
- struct lu_context_key *key);
-void ccc_key_fini (const struct lu_context *ctx,
- struct lu_context_key *key, void *data);
-void *ccc_session_key_init(const struct lu_context *ctx,
- struct lu_context_key *key);
-void ccc_session_key_fini(const struct lu_context *ctx,
- struct lu_context_key *key, void *data);
-
-int ccc_device_init (const struct lu_env *env,
- struct lu_device *d,
- const char *name, struct lu_device *next);
-struct lu_device *ccc_device_fini (const struct lu_env *env,
- struct lu_device *d);
-struct lu_device *ccc_device_alloc(const struct lu_env *env,
- struct lu_device_type *t,
- struct lustre_cfg *cfg,
- const struct lu_device_operations *luops,
- const struct cl_device_operations *clops);
-struct lu_device *ccc_device_free (const struct lu_env *env,
- struct lu_device *d);
-struct lu_object *ccc_object_alloc(const struct lu_env *env,
- const struct lu_object_header *hdr,
- struct lu_device *dev,
- const struct cl_object_operations *clops,
- const struct lu_object_operations *luops);
-
-int ccc_req_init(const struct lu_env *env, struct cl_device *dev,
- struct cl_req *req);
-void ccc_umount(const struct lu_env *env, struct cl_device *dev);
-int ccc_global_init(struct lu_device_type *device_type);
-void ccc_global_fini(struct lu_device_type *device_type);
-int ccc_object_init0(const struct lu_env *env, struct ccc_object *vob,
- const struct cl_object_conf *conf);
-int ccc_object_init(const struct lu_env *env, struct lu_object *obj,
- const struct lu_object_conf *conf);
-void ccc_object_free(const struct lu_env *env, struct lu_object *obj);
-int ccc_lock_init(const struct lu_env *env, struct cl_object *obj,
- struct cl_lock *lock, const struct cl_io *io,
- const struct cl_lock_operations *lkops);
-int ccc_object_glimpse(const struct lu_env *env,
- const struct cl_object *obj, struct ost_lvb *lvb);
-struct page *ccc_page_vmpage(const struct lu_env *env,
- const struct cl_page_slice *slice);
-int ccc_page_is_under_lock(const struct lu_env *env,
- const struct cl_page_slice *slice, struct cl_io *io);
-int ccc_fail(const struct lu_env *env, const struct cl_page_slice *slice);
-int ccc_transient_page_prep(const struct lu_env *env,
- const struct cl_page_slice *slice,
- struct cl_io *io);
-void ccc_lock_delete(const struct lu_env *env,
- const struct cl_lock_slice *slice);
-void ccc_lock_fini(const struct lu_env *env, struct cl_lock_slice *slice);
-int ccc_lock_enqueue(const struct lu_env *env,
- const struct cl_lock_slice *slice,
- struct cl_io *io, __u32 enqflags);
-int ccc_lock_use(const struct lu_env *env, const struct cl_lock_slice *slice);
-int ccc_lock_unuse(const struct lu_env *env, const struct cl_lock_slice *slice);
-int ccc_lock_wait(const struct lu_env *env, const struct cl_lock_slice *slice);
-int ccc_lock_fits_into(const struct lu_env *env,
- const struct cl_lock_slice *slice,
- const struct cl_lock_descr *need,
- const struct cl_io *io);
-void ccc_lock_state(const struct lu_env *env,
- const struct cl_lock_slice *slice,
- enum cl_lock_state state);
-
-int ccc_io_one_lock_index(const struct lu_env *env, struct cl_io *io,
- __u32 enqflags, enum cl_lock_mode mode,
- pgoff_t start, pgoff_t end);
-int ccc_io_one_lock(const struct lu_env *env, struct cl_io *io,
- __u32 enqflags, enum cl_lock_mode mode,
- loff_t start, loff_t end);
-void ccc_io_end(const struct lu_env *env, const struct cl_io_slice *ios);
-void ccc_io_advance(const struct lu_env *env, const struct cl_io_slice *ios,
- size_t nob);
-void ccc_io_update_iov(const struct lu_env *env, struct ccc_io *cio,
- struct cl_io *io);
-int ccc_prep_size(const struct lu_env *env, struct cl_object *obj,
- struct cl_io *io, loff_t start, size_t count, int *exceed);
-void ccc_req_completion(const struct lu_env *env,
- const struct cl_req_slice *slice, int ioret);
-void ccc_req_attr_set(const struct lu_env *env,
- const struct cl_req_slice *slice,
- const struct cl_object *obj,
- struct cl_req_attr *oa, u64 flags);
-
-struct lu_device *ccc2lu_dev (struct ccc_device *vdv);
-struct lu_object *ccc2lu (struct ccc_object *vob);
-struct ccc_device *lu2ccc_dev (const struct lu_device *d);
-struct ccc_device *cl2ccc_dev (const struct cl_device *d);
-struct ccc_object *lu2ccc (const struct lu_object *obj);
-struct ccc_object *cl2ccc (const struct cl_object *obj);
-struct ccc_lock *cl2ccc_lock (const struct cl_lock_slice *slice);
-struct ccc_io *cl2ccc_io (const struct lu_env *env,
- const struct cl_io_slice *slice);
-struct ccc_req *cl2ccc_req (const struct cl_req_slice *slice);
-struct page *cl2vm_page (const struct cl_page_slice *slice);
-struct inode *ccc_object_inode(const struct cl_object *obj);
-struct ccc_object *cl_inode2ccc (struct inode *inode);
-
-int cl_setattr_ost(struct inode *inode, const struct iattr *attr);
-
-int ccc_object_invariant(const struct cl_object *obj);
-int cl_file_inode_init(struct inode *inode, struct lustre_md *md);
-void cl_inode_fini(struct inode *inode);
-int cl_local_size(struct inode *inode);
-
-__u16 ll_dirent_type_get(struct lu_dirent *ent);
-__u64 cl_fid_build_ino(const struct lu_fid *fid, int api32);
-__u32 cl_fid_build_gen(const struct lu_fid *fid);
-
-# define CLOBINVRNT(env, clob, expr) \
- ((void)sizeof(env), (void)sizeof(clob), (void)sizeof(!!(expr)))
-
-int cl_init_ea_size(struct obd_export *md_exp, struct obd_export *dt_exp);
-int cl_ocd_update(struct obd_device *host,
- struct obd_device *watched,
- enum obd_notify_event ev, void *owner, void *data);
-
-struct ccc_grouplock {
- struct lu_env *cg_env;
- struct cl_io *cg_io;
- struct cl_lock *cg_lock;
- unsigned long cg_gid;
-};
-
-int cl_get_grouplock(struct cl_object *obj, unsigned long gid, int nonblock,
- struct ccc_grouplock *cg);
-void cl_put_grouplock(struct ccc_grouplock *cg);
-
-/**
- * New interfaces to get and put lov_stripe_md from lov layer. This violates
- * layering because lov_stripe_md is supposed to be a private data in lov.
- *
- * NB: If you find you have to use these interfaces for your new code, please
- * think about it again. These interfaces may be removed in the future for
- * better layering.
- */
-struct lov_stripe_md *lov_lsm_get(struct cl_object *clobj);
-void lov_lsm_put(struct cl_object *clobj, struct lov_stripe_md *lsm);
-int lov_read_and_clear_async_rc(struct cl_object *clob);
-
-struct lov_stripe_md *ccc_inode_lsm_get(struct inode *inode);
-void ccc_inode_lsm_put(struct inode *inode, struct lov_stripe_md *lsm);
-
-/**
- * Data structure managing a client's cached clean pages. An LRU of
- * pages is maintained, along with other statistics.
- */
-struct cl_client_cache {
- atomic_t ccc_users; /* # of users (OSCs) of this data */
- struct list_head ccc_lru; /* LRU list of cached clean pages */
- spinlock_t ccc_lru_lock; /* lock for list */
- atomic_t ccc_lru_left; /* # of LRU entries available */
- unsigned long ccc_lru_max; /* Max # of LRU entries possible */
- unsigned int ccc_lru_shrinkers; /* # of threads reclaiming */
-};
-
-#endif /*LCLIENT_H */
diff --git a/drivers/staging/lustre/lustre/include/linux/lustre_compat25.h b/drivers/staging/lustre/lustre/include/linux/lustre_compat25.h
index 79d8f93075d1..1eb64ec4bed4 100644
--- a/drivers/staging/lustre/lustre/include/linux/lustre_compat25.h
+++ b/drivers/staging/lustre/lustre/include/linux/lustre_compat25.h
@@ -15,11 +15,7 @@
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
*
* GPL HEADER END
*/
diff --git a/drivers/staging/lustre/lustre/include/linux/lustre_lite.h b/drivers/staging/lustre/lustre/include/linux/lustre_lite.h
index 3420cfd1278d..d18e8a76bb25 100644
--- a/drivers/staging/lustre/lustre/include/linux/lustre_lite.h
+++ b/drivers/staging/lustre/lustre/include/linux/lustre_lite.h
@@ -15,11 +15,7 @@
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
*
* GPL HEADER END
*/
diff --git a/drivers/staging/lustre/lustre/include/linux/lustre_patchless_compat.h b/drivers/staging/lustre/lustre/include/linux/lustre_patchless_compat.h
index c6c7f54637fb..5842cb18b49e 100644
--- a/drivers/staging/lustre/lustre/include/linux/lustre_patchless_compat.h
+++ b/drivers/staging/lustre/lustre/include/linux/lustre_patchless_compat.h
@@ -15,11 +15,7 @@
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
*
* GPL HEADER END
*/
diff --git a/drivers/staging/lustre/lustre/include/linux/lustre_user.h b/drivers/staging/lustre/lustre/include/linux/lustre_user.h
index 9cc2849f3f85..e967950e8536 100644
--- a/drivers/staging/lustre/lustre/include/linux/lustre_user.h
+++ b/drivers/staging/lustre/lustre/include/linux/lustre_user.h
@@ -15,11 +15,7 @@
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
*
* GPL HEADER END
*/
diff --git a/drivers/staging/lustre/lustre/include/linux/obd.h b/drivers/staging/lustre/lustre/include/linux/obd.h
deleted file mode 100644
index 3907bf4ce07c..000000000000
--- a/drivers/staging/lustre/lustre/include/linux/obd.h
+++ /dev/null
@@ -1,125 +0,0 @@
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2012, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- */
-
-#ifndef __LINUX_OBD_H
-#define __LINUX_OBD_H
-
-#ifndef __OBD_H
-#error Do not #include this file directly. #include <obd.h> instead
-#endif
-
-#include "../obd_support.h"
-
-#include <linux/fs.h>
-#include <linux/list.h>
-#include <linux/sched.h> /* for struct task_struct, for current.h */
-#include <linux/mount.h>
-
-#include "../lustre_intent.h"
-
-struct ll_iattr {
- struct iattr iattr;
- unsigned int ia_attr_flags;
-};
-
-#define CLIENT_OBD_LIST_LOCK_DEBUG 1
-
-struct client_obd_lock {
- spinlock_t lock;
-
- unsigned long time;
- struct task_struct *task;
- const char *func;
- int line;
-};
-
-static inline void __client_obd_list_lock(struct client_obd_lock *lock,
- const char *func, int line)
-{
- unsigned long cur = jiffies;
-
- while (1) {
- if (spin_trylock(&lock->lock)) {
- LASSERT(!lock->task);
- lock->task = current;
- lock->func = func;
- lock->line = line;
- lock->time = jiffies;
- break;
- }
-
- if (time_before(cur + 5 * HZ, jiffies) &&
- time_before(lock->time + 5 * HZ, jiffies)) {
- struct task_struct *task = lock->task;
-
- if (!task)
- continue;
-
- LCONSOLE_WARN("%s:%d: lock %p was acquired by <%s:%d:%s:%d> for %lu seconds.\n",
- current->comm, current->pid,
- lock, task->comm, task->pid,
- lock->func, lock->line,
- (jiffies - lock->time) / HZ);
- LCONSOLE_WARN("====== for current process =====\n");
- dump_stack();
- LCONSOLE_WARN("====== end =======\n");
- set_current_state(TASK_UNINTERRUPTIBLE);
- schedule_timeout(1000 * HZ);
- }
- cpu_relax();
- }
-}
-
-#define client_obd_list_lock(lock) \
- __client_obd_list_lock(lock, __func__, __LINE__)
-
-static inline void client_obd_list_unlock(struct client_obd_lock *lock)
-{
- LASSERT(lock->task);
- lock->task = NULL;
- lock->time = jiffies;
- spin_unlock(&lock->lock);
-}
-
-static inline void client_obd_list_lock_init(struct client_obd_lock *lock)
-{
- spin_lock_init(&lock->lock);
-}
-
-static inline void client_obd_list_lock_done(struct client_obd_lock *lock)
-{}
-
-#endif /* __LINUX_OBD_H */
diff --git a/drivers/staging/lustre/lustre/include/lprocfs_status.h b/drivers/staging/lustre/lustre/include/lprocfs_status.h
index 4146c9c3999f..d68e60e7fef7 100644
--- a/drivers/staging/lustre/lustre/include/lprocfs_status.h
+++ b/drivers/staging/lustre/lustre/include/lprocfs_status.h
@@ -15,11 +15,7 @@
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
*
* GPL HEADER END
*/
diff --git a/drivers/staging/lustre/lustre/include/lu_object.h b/drivers/staging/lustre/lustre/include/lu_object.h
index 242bb1ef6245..6e25c1bb6aa3 100644
--- a/drivers/staging/lustre/lustre/include/lu_object.h
+++ b/drivers/staging/lustre/lustre/include/lu_object.h
@@ -15,11 +15,7 @@
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
*
* GPL HEADER END
*/
@@ -198,7 +194,6 @@ typedef int (*lu_printer_t)(const struct lu_env *env,
* Operations specific for particular lu_object.
*/
struct lu_object_operations {
-
/**
* Allocate lower-layer parts of the object by calling
* lu_device_operations::ldo_object_alloc() of the corresponding
@@ -656,21 +651,21 @@ static inline struct seq_server_site *lu_site2seq(const struct lu_site *s)
* @{
*/
-int lu_site_init (struct lu_site *s, struct lu_device *d);
-void lu_site_fini (struct lu_site *s);
-int lu_site_init_finish (struct lu_site *s);
-void lu_stack_fini (const struct lu_env *env, struct lu_device *top);
-void lu_device_get (struct lu_device *d);
-void lu_device_put (struct lu_device *d);
-int lu_device_init (struct lu_device *d, struct lu_device_type *t);
-void lu_device_fini (struct lu_device *d);
-int lu_object_header_init(struct lu_object_header *h);
+int lu_site_init(struct lu_site *s, struct lu_device *d);
+void lu_site_fini(struct lu_site *s);
+int lu_site_init_finish(struct lu_site *s);
+void lu_stack_fini(const struct lu_env *env, struct lu_device *top);
+void lu_device_get(struct lu_device *d);
+void lu_device_put(struct lu_device *d);
+int lu_device_init(struct lu_device *d, struct lu_device_type *t);
+void lu_device_fini(struct lu_device *d);
+int lu_object_header_init(struct lu_object_header *h);
void lu_object_header_fini(struct lu_object_header *h);
-int lu_object_init (struct lu_object *o,
- struct lu_object_header *h, struct lu_device *d);
-void lu_object_fini (struct lu_object *o);
-void lu_object_add_top (struct lu_object_header *h, struct lu_object *o);
-void lu_object_add (struct lu_object *before, struct lu_object *o);
+int lu_object_init(struct lu_object *o,
+ struct lu_object_header *h, struct lu_device *d);
+void lu_object_fini(struct lu_object *o);
+void lu_object_add_top(struct lu_object_header *h, struct lu_object *o);
+void lu_object_add(struct lu_object *before, struct lu_object *o);
/**
* Helpers to initialize and finalize device types.
@@ -781,11 +776,10 @@ int lu_cdebug_printer(const struct lu_env *env,
*/
#define LU_OBJECT_DEBUG(mask, env, object, format, ...) \
do { \
- LIBCFS_DEBUG_MSG_DATA_DECL(msgdata, mask, NULL); \
- \
if (cfs_cdebug_show(mask, DEBUG_SUBSYSTEM)) { \
+ LIBCFS_DEBUG_MSG_DATA_DECL(msgdata, mask, NULL); \
lu_object_print(env, &msgdata, lu_cdebug_printer, object);\
- CDEBUG(mask, format, ## __VA_ARGS__); \
+ CDEBUG(mask, format "\n", ## __VA_ARGS__); \
} \
} while (0)
@@ -794,9 +788,8 @@ do { \
*/
#define LU_OBJECT_HEADER(mask, env, object, format, ...) \
do { \
- LIBCFS_DEBUG_MSG_DATA_DECL(msgdata, mask, NULL); \
- \
if (cfs_cdebug_show(mask, DEBUG_SUBSYSTEM)) { \
+ LIBCFS_DEBUG_MSG_DATA_DECL(msgdata, mask, NULL); \
lu_object_header_print(env, &msgdata, lu_cdebug_printer,\
(object)->lo_header); \
lu_cdebug_printer(env, &msgdata, "\n"); \
@@ -1007,6 +1000,10 @@ enum lu_context_tag {
*/
LCT_LOCAL = 1 << 7,
/**
+ * session for server thread
+ **/
+ LCT_SERVER_SESSION = BIT(8),
+ /**
* Set when at least one of keys, having values in this context has
* non-NULL lu_context_key::lct_exit() method. This is used to
* optimize lu_context_exit() call.
@@ -1118,7 +1115,7 @@ struct lu_context_key {
{ \
type *value; \
\
- CLASSERT(PAGE_SIZE >= sizeof (*value)); \
+ CLASSERT(PAGE_SIZE >= sizeof(*value)); \
\
value = kzalloc(sizeof(*value), GFP_NOFS); \
if (!value) \
@@ -1154,12 +1151,12 @@ do { \
(key)->lct_owner = THIS_MODULE; \
} while (0)
-int lu_context_key_register(struct lu_context_key *key);
-void lu_context_key_degister(struct lu_context_key *key);
-void *lu_context_key_get (const struct lu_context *ctx,
- const struct lu_context_key *key);
-void lu_context_key_quiesce (struct lu_context_key *key);
-void lu_context_key_revive (struct lu_context_key *key);
+int lu_context_key_register(struct lu_context_key *key);
+void lu_context_key_degister(struct lu_context_key *key);
+void *lu_context_key_get(const struct lu_context *ctx,
+ const struct lu_context_key *key);
+void lu_context_key_quiesce(struct lu_context_key *key);
+void lu_context_key_revive(struct lu_context_key *key);
/*
* LU_KEY_INIT_GENERIC() has to be a macro to correctly determine an
@@ -1216,21 +1213,21 @@ void lu_context_key_revive (struct lu_context_key *key);
LU_TYPE_START(mod, __VA_ARGS__); \
LU_TYPE_STOP(mod, __VA_ARGS__)
-int lu_context_init (struct lu_context *ctx, __u32 tags);
-void lu_context_fini (struct lu_context *ctx);
-void lu_context_enter (struct lu_context *ctx);
-void lu_context_exit (struct lu_context *ctx);
-int lu_context_refill(struct lu_context *ctx);
+int lu_context_init(struct lu_context *ctx, __u32 tags);
+void lu_context_fini(struct lu_context *ctx);
+void lu_context_enter(struct lu_context *ctx);
+void lu_context_exit(struct lu_context *ctx);
+int lu_context_refill(struct lu_context *ctx);
/*
* Helper functions to operate on multiple keys. These are used by the default
* device type operations, defined by LU_TYPE_INIT_FINI().
*/
-int lu_context_key_register_many(struct lu_context_key *k, ...);
+int lu_context_key_register_many(struct lu_context_key *k, ...);
void lu_context_key_degister_many(struct lu_context_key *k, ...);
-void lu_context_key_revive_many (struct lu_context_key *k, ...);
-void lu_context_key_quiesce_many (struct lu_context_key *k, ...);
+void lu_context_key_revive_many(struct lu_context_key *k, ...);
+void lu_context_key_quiesce_many(struct lu_context_key *k, ...);
/**
* Environment.
@@ -1246,9 +1243,9 @@ struct lu_env {
struct lu_context *le_ses;
};
-int lu_env_init (struct lu_env *env, __u32 tags);
-void lu_env_fini (struct lu_env *env);
-int lu_env_refill(struct lu_env *env);
+int lu_env_init(struct lu_env *env, __u32 tags);
+void lu_env_fini(struct lu_env *env);
+int lu_env_refill(struct lu_env *env);
/** @} lu_context */
diff --git a/drivers/staging/lustre/lustre/include/lustre/ll_fiemap.h b/drivers/staging/lustre/lustre/include/lustre/ll_fiemap.h
index 07d45de69dd9..c2340d643e84 100644
--- a/drivers/staging/lustre/lustre/include/lustre/ll_fiemap.h
+++ b/drivers/staging/lustre/lustre/include/lustre/ll_fiemap.h
@@ -15,11 +15,7 @@
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
*
* GPL HEADER END
*/
diff --git a/drivers/staging/lustre/lustre/include/lustre/lustre_idl.h b/drivers/staging/lustre/lustre/include/lustre/lustre_idl.h
index 5aae1d06a5fa..051864c23b5b 100644
--- a/drivers/staging/lustre/lustre/include/lustre/lustre_idl.h
+++ b/drivers/staging/lustre/lustre/include/lustre/lustre_idl.h
@@ -15,11 +15,7 @@
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
*
* GPL HEADER END
*/
@@ -183,6 +179,12 @@ struct lu_seq_range {
__u32 lsr_flags;
};
+struct lu_seq_range_array {
+ __u32 lsra_count;
+ __u32 lsra_padding;
+ struct lu_seq_range lsra_lsr[0];
+};
+
#define LU_SEQ_RANGE_MDT 0x0
#define LU_SEQ_RANGE_OST 0x1
#define LU_SEQ_RANGE_ANY 0x3
@@ -380,7 +382,7 @@ static inline __u64 fid_ver_oid(const struct lu_fid *fid)
* used for other purposes and not risk collisions with existing inodes.
*
* Different FID Format
- * http://arch.lustre.org/index.php?title=Interoperability_fids_zfs#NEW.0
+ * http://wiki.old.lustre.org/index.php/Architecture_-_Interoperability_fids_zfs
*/
enum fid_seq {
FID_SEQ_OST_MDT0 = 0,
@@ -578,7 +580,7 @@ static inline __u64 ostid_seq(const struct ost_id *ostid)
if (fid_seq_is_mdt0(ostid->oi.oi_seq))
return FID_SEQ_OST_MDT0;
- if (fid_seq_is_default(ostid->oi.oi_seq))
+ if (unlikely(fid_seq_is_default(ostid->oi.oi_seq)))
return FID_SEQ_LOV_DEFAULT;
if (fid_is_idif(&ostid->oi_fid))
@@ -590,9 +592,12 @@ static inline __u64 ostid_seq(const struct ost_id *ostid)
/* extract OST objid from a wire ost_id (id/seq) pair */
static inline __u64 ostid_id(const struct ost_id *ostid)
{
- if (fid_seq_is_mdt0(ostid_seq(ostid)))
+ if (fid_seq_is_mdt0(ostid->oi.oi_seq))
return ostid->oi.oi_id & IDIF_OID_MASK;
+ if (unlikely(fid_seq_is_default(ostid->oi.oi_seq)))
+ return ostid->oi.oi_id;
+
if (fid_is_idif(&ostid->oi_fid))
return fid_idif_id(fid_seq(&ostid->oi_fid),
fid_oid(&ostid->oi_fid), 0);
@@ -636,12 +641,22 @@ static inline void ostid_set_seq_llog(struct ost_id *oi)
*/
static inline void ostid_set_id(struct ost_id *oi, __u64 oid)
{
- if (fid_seq_is_mdt0(ostid_seq(oi))) {
+ if (fid_seq_is_mdt0(oi->oi.oi_seq)) {
if (oid >= IDIF_MAX_OID) {
CERROR("Bad %llu to set " DOSTID "\n", oid, POSTID(oi));
return;
}
oi->oi.oi_id = oid;
+ } else if (fid_is_idif(&oi->oi_fid)) {
+ if (oid >= IDIF_MAX_OID) {
+ CERROR("Bad %llu to set "DOSTID"\n",
+ oid, POSTID(oi));
+ return;
+ }
+ oi->oi_fid.f_seq = fid_idif_seq(oid,
+ fid_idif_ost_idx(&oi->oi_fid));
+ oi->oi_fid.f_oid = oid;
+ oi->oi_fid.f_ver = oid >> 48;
} else {
if (oid > OBIF_MAX_OID) {
CERROR("Bad %llu to set " DOSTID "\n", oid, POSTID(oi));
@@ -651,25 +666,31 @@ static inline void ostid_set_id(struct ost_id *oi, __u64 oid)
}
}
-static inline void ostid_inc_id(struct ost_id *oi)
+static inline int fid_set_id(struct lu_fid *fid, __u64 oid)
{
- if (fid_seq_is_mdt0(ostid_seq(oi))) {
- if (unlikely(ostid_id(oi) + 1 > IDIF_MAX_OID)) {
- CERROR("Bad inc "DOSTID"\n", POSTID(oi));
- return;
+ if (unlikely(fid_seq_is_igif(fid->f_seq))) {
+ CERROR("bad IGIF, "DFID"\n", PFID(fid));
+ return -EBADF;
+ }
+
+ if (fid_is_idif(fid)) {
+ if (oid >= IDIF_MAX_OID) {
+ CERROR("Too large OID %#llx to set IDIF "DFID"\n",
+ (unsigned long long)oid, PFID(fid));
+ return -EBADF;
}
- oi->oi.oi_id++;
+ fid->f_seq = fid_idif_seq(oid, fid_idif_ost_idx(fid));
+ fid->f_oid = oid;
+ fid->f_ver = oid >> 48;
} else {
- oi->oi_fid.f_oid++;
+ if (oid > OBIF_MAX_OID) {
+ CERROR("Too large OID %#llx to set REG "DFID"\n",
+ (unsigned long long)oid, PFID(fid));
+ return -EBADF;
+ }
+ fid->f_oid = oid;
}
-}
-
-static inline void ostid_dec_id(struct ost_id *oi)
-{
- if (fid_seq_is_mdt0(ostid_seq(oi)))
- oi->oi.oi_id--;
- else
- oi->oi_fid.f_oid--;
+ return 0;
}
/**
@@ -679,35 +700,39 @@ static inline void ostid_dec_id(struct ost_id *oi)
* be passed through unchanged. Only legacy OST objects in "group 0"
* will be mapped into the IDIF namespace so that they can fit into the
* struct lu_fid fields without loss. For reference see:
- * http://arch.lustre.org/index.php?title=Interoperability_fids_zfs
+ * http://wiki.old.lustre.org/index.php/Architecture_-_Interoperability_fids_zfs
*/
static inline int ostid_to_fid(struct lu_fid *fid, struct ost_id *ostid,
__u32 ost_idx)
{
+ __u64 seq = ostid_seq(ostid);
+
if (ost_idx > 0xffff) {
CERROR("bad ost_idx, "DOSTID" ost_idx:%u\n", POSTID(ostid),
ost_idx);
return -EBADF;
}
- if (fid_seq_is_mdt0(ostid_seq(ostid))) {
+ if (fid_seq_is_mdt0(seq)) {
+ __u64 oid = ostid_id(ostid);
+
/* This is a "legacy" (old 1.x/2.early) OST object in "group 0"
* that we map into the IDIF namespace. It allows up to 2^48
* objects per OST, as this is the object namespace that has
* been in production for years. This can handle create rates
* of 1M objects/s/OST for 9 years, or combinations thereof.
*/
- if (ostid_id(ostid) >= IDIF_MAX_OID) {
+ if (oid >= IDIF_MAX_OID) {
CERROR("bad MDT0 id, " DOSTID " ost_idx:%u\n",
POSTID(ostid), ost_idx);
return -EBADF;
}
- fid->f_seq = fid_idif_seq(ostid_id(ostid), ost_idx);
+ fid->f_seq = fid_idif_seq(oid, ost_idx);
/* truncate to 32 bits by assignment */
- fid->f_oid = ostid_id(ostid);
+ fid->f_oid = oid;
/* in theory, not currently used */
- fid->f_ver = ostid_id(ostid) >> 48;
- } else /* if (fid_seq_is_idif(seq) || fid_seq_is_norm(seq)) */ {
+ fid->f_ver = oid >> 48;
+ } else if (likely(!fid_seq_is_default(seq))) {
/* This is either an IDIF object, which identifies objects across
* all OSTs, or a regular FID. The IDIF namespace maps legacy
* OST objects into the FID namespace. In both cases, we just
@@ -1001,8 +1026,9 @@ static inline int lu_dirent_calc_size(int namelen, __u16 attr)
size = (sizeof(struct lu_dirent) + namelen + align) & ~align;
size += sizeof(struct luda_type);
- } else
+ } else {
size = sizeof(struct lu_dirent) + namelen;
+ }
return (size + 7) & ~7;
}
@@ -1211,8 +1237,16 @@ void lustre_swab_ptlrpc_body(struct ptlrpc_body *pb);
*/
#define OBD_CONNECT_ATTRFID 0x4000ULL /*Server can GetAttr By Fid*/
#define OBD_CONNECT_NODEVOH 0x8000ULL /*No open hndl on specl nodes*/
-#define OBD_CONNECT_RMT_CLIENT 0x10000ULL /*Remote client */
-#define OBD_CONNECT_RMT_CLIENT_FORCE 0x20000ULL /*Remote client by force */
+#define OBD_CONNECT_RMT_CLIENT 0x10000ULL /* Remote client, never used
+ * in production. Removed in
+ * 2.9. Keep this flag to
+ * avoid reuse.
+ */
+#define OBD_CONNECT_RMT_CLIENT_FORCE 0x20000ULL /* Remote client by force,
+ * never used in production.
+ * Removed in 2.9. Keep this
+ * flag to avoid reuse
+ */
#define OBD_CONNECT_BRW_SIZE 0x40000ULL /*Max bytes per rpc */
#define OBD_CONNECT_QUOTA64 0x80000ULL /*Not used since 2.4 */
#define OBD_CONNECT_MDS_CAPA 0x100000ULL /*MDS capability */
@@ -1256,6 +1290,9 @@ void lustre_swab_ptlrpc_body(struct ptlrpc_body *pb);
#define OBD_CONNECT_PINGLESS 0x4000000000000ULL/* pings not required */
#define OBD_CONNECT_FLOCK_DEAD 0x8000000000000ULL/* flock deadlock detection */
#define OBD_CONNECT_DISP_STRIPE 0x10000000000000ULL/*create stripe disposition*/
+#define OBD_CONNECT_OPEN_BY_FID 0x20000000000000ULL /* open by fid won't pack
+ * name in request
+ */
/* XXX README XXX:
* Please DO NOT add flag values here before first ensuring that this same
@@ -1428,6 +1465,8 @@ enum obdo_flags {
*/
OBD_FL_RECOV_RESEND = 0x00080000, /* recoverable resent */
OBD_FL_NOSPC_BLK = 0x00100000, /* no more block space on OST */
+ OBD_FL_FLUSH = 0x00200000, /* flush pages on the OST */
+ OBD_FL_SHORT_IO = 0x00400000, /* short io request */
/* Note that while these checksum values are currently separate bits,
* in 2.x we can actually allow all values from 1-31 if we wanted.
@@ -1525,6 +1564,11 @@ static inline void lmm_oi_set_seq(struct ost_id *oi, __u64 seq)
oi->oi.oi_seq = seq;
}
+static inline void lmm_oi_set_id(struct ost_id *oi, __u64 oid)
+{
+ oi->oi.oi_id = oid;
+}
+
static inline __u64 lmm_oi_id(struct ost_id *oi)
{
return oi->oi.oi_id;
@@ -1663,7 +1707,7 @@ lov_mds_md_max_stripe_count(size_t buf_size, __u32 lmm_magic)
#define OBD_MD_FLXATTRLS (0x0000002000000000ULL) /* xattr list */
#define OBD_MD_FLXATTRRM (0x0000004000000000ULL) /* xattr remove */
#define OBD_MD_FLACL (0x0000008000000000ULL) /* ACL */
-#define OBD_MD_FLRMTPERM (0x0000010000000000ULL) /* remote permission */
+/* OBD_MD_FLRMTPERM (0x0000010000000000ULL) remote perm, obsolete */
#define OBD_MD_FLMDSCAPA (0x0000020000000000ULL) /* MDS capability */
#define OBD_MD_FLOSSCAPA (0x0000040000000000ULL) /* OSS capability */
#define OBD_MD_FLCKSPLIT (0x0000080000000000ULL) /* Check split on server */
@@ -1675,10 +1719,10 @@ lov_mds_md_max_stripe_count(size_t buf_size, __u32 lmm_magic)
*/
#define OBD_MD_FLOBJCOUNT (0x0000400000000000ULL) /* for multiple destroy */
-#define OBD_MD_FLRMTLSETFACL (0x0001000000000000ULL) /* lfs lsetfacl case */
-#define OBD_MD_FLRMTLGETFACL (0x0002000000000000ULL) /* lfs lgetfacl case */
-#define OBD_MD_FLRMTRSETFACL (0x0004000000000000ULL) /* lfs rsetfacl case */
-#define OBD_MD_FLRMTRGETFACL (0x0008000000000000ULL) /* lfs rgetfacl case */
+/* OBD_MD_FLRMTLSETFACL (0x0001000000000000ULL) lfs lsetfacl, obsolete */
+/* OBD_MD_FLRMTLGETFACL (0x0002000000000000ULL) lfs lgetfacl, obsolete */
+/* OBD_MD_FLRMTRSETFACL (0x0004000000000000ULL) lfs rsetfacl, obsolete */
+/* OBD_MD_FLRMTRGETFACL (0x0008000000000000ULL) lfs rgetfacl, obsolete */
#define OBD_MD_FLDATAVERSION (0x0010000000000000ULL) /* iversion sum */
#define OBD_MD_FLRELEASED (0x0020000000000000ULL) /* file released */
@@ -1732,6 +1776,11 @@ void lustre_swab_obd_statfs(struct obd_statfs *os);
#define OBD_BRW_MEMALLOC 0x800 /* Client runs in the "kswapd" context */
#define OBD_BRW_OVER_USRQUOTA 0x1000 /* Running out of user quota */
#define OBD_BRW_OVER_GRPQUOTA 0x2000 /* Running out of group quota */
+#define OBD_BRW_SOFT_SYNC 0x4000 /* This flag notifies the server
+ * that the client is running low on
+ * space for unstable pages; asking
+ * it to sync quickly
+ */
#define OBD_OBJECT_EOF 0xffffffffffffffffULL
@@ -2114,26 +2163,8 @@ enum {
CFS_SETUID_PERM = 0x01,
CFS_SETGID_PERM = 0x02,
CFS_SETGRP_PERM = 0x04,
- CFS_RMTACL_PERM = 0x08,
- CFS_RMTOWN_PERM = 0x10
-};
-
-/* inode access permission for remote user, the inode info are omitted,
- * for client knows them.
- */
-struct mdt_remote_perm {
- __u32 rp_uid;
- __u32 rp_gid;
- __u32 rp_fsuid;
- __u32 rp_fsuid_h;
- __u32 rp_fsgid;
- __u32 rp_fsgid_h;
- __u32 rp_access_perm; /* MAY_READ/WRITE/EXEC */
- __u32 rp_padding;
};
-void lustre_swab_mdt_remote_perm(struct mdt_remote_perm *p);
-
struct mdt_rec_setattr {
__u32 sa_opcode;
__u32 sa_cap;
@@ -2436,6 +2467,7 @@ struct mdt_rec_reint {
void lustre_swab_mdt_rec_reint(struct mdt_rec_reint *rr);
+/* lmv structures */
struct lmv_desc {
__u32 ld_tgt_count; /* how many MDS's */
__u32 ld_active_tgt_count; /* how many active */
@@ -2460,7 +2492,6 @@ struct lmv_stripe_md {
struct lu_fid mea_ids[0];
};
-/* lmv structures */
#define MEA_MAGIC_LAST_CHAR 0xb2221ca1
#define MEA_MAGIC_ALL_CHARS 0xb222a11c
#define MEA_MAGIC_HASH_SEGMENT 0xb222a11b
@@ -2470,9 +2501,10 @@ struct lmv_stripe_md {
#define MAX_HASH_HIGHEST_BIT 0x1000000000000000ULL
enum fld_rpc_opc {
- FLD_QUERY = 900,
+ FLD_QUERY = 900,
+ FLD_READ = 901,
FLD_LAST_OPC,
- FLD_FIRST_OPC = FLD_QUERY
+ FLD_FIRST_OPC = FLD_QUERY
};
enum seq_rpc_opc {
@@ -2486,6 +2518,12 @@ enum seq_op {
SEQ_ALLOC_META = 1
};
+enum fld_op {
+ FLD_CREATE = 0,
+ FLD_DELETE = 1,
+ FLD_LOOKUP = 2,
+};
+
/*
* LOV data structures
*/
@@ -2582,6 +2620,8 @@ struct ldlm_extent {
__u64 gid;
};
+#define LDLM_GID_ANY ((__u64)-1)
+
static inline int ldlm_extent_overlap(struct ldlm_extent *ex1,
struct ldlm_extent *ex2)
{
@@ -3304,7 +3344,7 @@ struct getinfo_fid2path {
char gf_path[0];
} __packed;
-void lustre_swab_fid2path (struct getinfo_fid2path *gf);
+void lustre_swab_fid2path(struct getinfo_fid2path *gf);
enum {
LAYOUT_INTENT_ACCESS = 0,
diff --git a/drivers/staging/lustre/lustre/include/lustre/lustre_user.h b/drivers/staging/lustre/lustre/include/lustre/lustre_user.h
index 276906e646f5..ef6f38ff359e 100644
--- a/drivers/staging/lustre/lustre/include/lustre/lustre_user.h
+++ b/drivers/staging/lustre/lustre/include/lustre/lustre_user.h
@@ -15,11 +15,7 @@
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
*
* GPL HEADER END
*/
@@ -193,37 +189,37 @@ struct ost_id {
* *INFO - set/get lov_user_mds_data
*/
/* see <lustre_lib.h> for ioctl numberss 101-150 */
-#define LL_IOC_GETFLAGS _IOR ('f', 151, long)
-#define LL_IOC_SETFLAGS _IOW ('f', 152, long)
-#define LL_IOC_CLRFLAGS _IOW ('f', 153, long)
+#define LL_IOC_GETFLAGS _IOR('f', 151, long)
+#define LL_IOC_SETFLAGS _IOW('f', 152, long)
+#define LL_IOC_CLRFLAGS _IOW('f', 153, long)
/* LL_IOC_LOV_SETSTRIPE: See also OBD_IOC_LOV_SETSTRIPE */
-#define LL_IOC_LOV_SETSTRIPE _IOW ('f', 154, long)
+#define LL_IOC_LOV_SETSTRIPE _IOW('f', 154, long)
/* LL_IOC_LOV_GETSTRIPE: See also OBD_IOC_LOV_GETSTRIPE */
-#define LL_IOC_LOV_GETSTRIPE _IOW ('f', 155, long)
+#define LL_IOC_LOV_GETSTRIPE _IOW('f', 155, long)
/* LL_IOC_LOV_SETEA: See also OBD_IOC_LOV_SETEA */
-#define LL_IOC_LOV_SETEA _IOW ('f', 156, long)
-#define LL_IOC_RECREATE_OBJ _IOW ('f', 157, long)
-#define LL_IOC_RECREATE_FID _IOW ('f', 157, struct lu_fid)
-#define LL_IOC_GROUP_LOCK _IOW ('f', 158, long)
-#define LL_IOC_GROUP_UNLOCK _IOW ('f', 159, long)
+#define LL_IOC_LOV_SETEA _IOW('f', 156, long)
+#define LL_IOC_RECREATE_OBJ _IOW('f', 157, long)
+#define LL_IOC_RECREATE_FID _IOW('f', 157, struct lu_fid)
+#define LL_IOC_GROUP_LOCK _IOW('f', 158, long)
+#define LL_IOC_GROUP_UNLOCK _IOW('f', 159, long)
/* LL_IOC_QUOTACHECK: See also OBD_IOC_QUOTACHECK */
-#define LL_IOC_QUOTACHECK _IOW ('f', 160, int)
+#define LL_IOC_QUOTACHECK _IOW('f', 160, int)
/* LL_IOC_POLL_QUOTACHECK: See also OBD_IOC_POLL_QUOTACHECK */
-#define LL_IOC_POLL_QUOTACHECK _IOR ('f', 161, struct if_quotacheck *)
+#define LL_IOC_POLL_QUOTACHECK _IOR('f', 161, struct if_quotacheck *)
/* LL_IOC_QUOTACTL: See also OBD_IOC_QUOTACTL */
#define LL_IOC_QUOTACTL _IOWR('f', 162, struct if_quotactl)
#define IOC_OBD_STATFS _IOWR('f', 164, struct obd_statfs *)
#define IOC_LOV_GETINFO _IOWR('f', 165, struct lov_user_mds_data *)
-#define LL_IOC_FLUSHCTX _IOW ('f', 166, long)
-#define LL_IOC_RMTACL _IOW ('f', 167, long)
-#define LL_IOC_GETOBDCOUNT _IOR ('f', 168, long)
+#define LL_IOC_FLUSHCTX _IOW('f', 166, long)
+/* LL_IOC_RMTACL 167 obsolete */
+#define LL_IOC_GETOBDCOUNT _IOR('f', 168, long)
#define LL_IOC_LLOOP_ATTACH _IOWR('f', 169, long)
#define LL_IOC_LLOOP_DETACH _IOWR('f', 170, long)
#define LL_IOC_LLOOP_INFO _IOWR('f', 171, struct lu_fid)
#define LL_IOC_LLOOP_DETACH_BYDEV _IOWR('f', 172, long)
-#define LL_IOC_PATH2FID _IOR ('f', 173, long)
+#define LL_IOC_PATH2FID _IOR('f', 173, long)
#define LL_IOC_GET_CONNECT_FLAGS _IOWR('f', 174, __u64 *)
-#define LL_IOC_GET_MDTIDX _IOR ('f', 175, int)
+#define LL_IOC_GET_MDTIDX _IOR('f', 175, int)
/* see <lustre_lib.h> for ioctl numbers 177-210 */
@@ -542,19 +538,6 @@ struct identity_downcall_data {
__u32 idd_groups[0];
};
-/* for non-mapped uid/gid */
-#define NOBODY_UID 99
-#define NOBODY_GID 99
-
-#define INVALID_ID (-1)
-
-enum {
- RMT_LSETFACL = 1,
- RMT_LGETFACL = 2,
- RMT_RSETFACL = 3,
- RMT_RGETFACL = 4
-};
-
/* lustre volatile file support
* file name header: .^L^S^T^R:volatile"
*/
@@ -676,7 +659,12 @@ static inline const char *changelog_type2str(int type)
#define CLF_UNLINK_HSM_EXISTS 0x0002 /* File has something in HSM */
/* HSM cleaning needed */
/* Flags for rename */
-#define CLF_RENAME_LAST 0x0001 /* rename unlink last hardlink of target */
+#define CLF_RENAME_LAST 0x0001 /* rename unlink last hardlink of
+ * target
+ */
+#define CLF_RENAME_LAST_EXISTS 0x0002 /* rename unlink last hardlink of target
+ * has an archive in backend
+ */
/* Flags for HSM */
/* 12b used (from high weight to low weight):
@@ -833,9 +821,8 @@ struct ioc_data_version {
__u64 idv_flags; /* See LL_DV_xxx */
};
-#define LL_DV_NOFLUSH 0x01 /* Do not take READ EXTENT LOCK before sampling
- * version. Dirty caches are left unchanged.
- */
+#define LL_DV_RD_FLUSH BIT(0) /* Flush dirty pages from clients */
+#define LL_DV_WR_FLUSH BIT(1) /* Flush all caching pages from clients */
#ifndef offsetof
# define offsetof(typ, memb) ((unsigned long)((char *)&(((typ *)0)->memb)))
@@ -1095,12 +1082,12 @@ struct hsm_action_list {
__u32 padding1;
char hal_fsname[0]; /* null-terminated */
/* struct hsm_action_item[hal_count] follows, aligned on 8-byte
- * boundaries. See hai_zero
+ * boundaries. See hai_first
*/
} __packed;
#ifndef HAVE_CFS_SIZE_ROUND
-static inline int cfs_size_round (int val)
+static inline int cfs_size_round(int val)
{
return (val + 7) & (~0x7);
}
@@ -1109,7 +1096,7 @@ static inline int cfs_size_round (int val)
#endif
/* Return pointer to first hai in action list */
-static inline struct hsm_action_item *hai_zero(struct hsm_action_list *hal)
+static inline struct hsm_action_item *hai_first(struct hsm_action_list *hal)
{
return (struct hsm_action_item *)(hal->hal_fsname +
cfs_size_round(strlen(hal-> \
@@ -1131,7 +1118,7 @@ static inline int hal_size(struct hsm_action_list *hal)
struct hsm_action_item *hai;
sz = sizeof(*hal) + cfs_size_round(strlen(hal->hal_fsname) + 1);
- hai = hai_zero(hal);
+ hai = hai_first(hal);
for (i = 0; i < hal->hal_count; i++, hai = hai_next(hai))
sz += cfs_size_round(hai->hai_len);
diff --git a/drivers/staging/lustre/lustre/include/lustre_acl.h b/drivers/staging/lustre/lustre/include/lustre_acl.h
index aa4cfa7b749d..fecabe139b1f 100644
--- a/drivers/staging/lustre/lustre/include/lustre_acl.h
+++ b/drivers/staging/lustre/lustre/include/lustre_acl.h
@@ -15,11 +15,7 @@
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
*
* GPL HEADER END
*/
diff --git a/drivers/staging/lustre/lustre/include/lustre_cfg.h b/drivers/staging/lustre/lustre/include/lustre_cfg.h
index bb16ae980b98..95a0be13c0fb 100644
--- a/drivers/staging/lustre/lustre/include/lustre_cfg.h
+++ b/drivers/staging/lustre/lustre/include/lustre_cfg.h
@@ -15,11 +15,7 @@
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
*
* GPL HEADER END
*/
@@ -161,7 +157,7 @@ static inline void *lustre_cfg_buf(struct lustre_cfg *lcfg, int index)
int offset;
int bufcount;
- LASSERT (index >= 0);
+ LASSERT(index >= 0);
bufcount = lcfg->lcfg_bufcount;
if (index >= bufcount)
diff --git a/drivers/staging/lustre/lustre/include/lustre_debug.h b/drivers/staging/lustre/lustre/include/lustre_debug.h
index 8a089413c92e..93c1bdaf71a4 100644
--- a/drivers/staging/lustre/lustre/include/lustre_debug.h
+++ b/drivers/staging/lustre/lustre/include/lustre_debug.h
@@ -15,11 +15,7 @@
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
*
* GPL HEADER END
*/
diff --git a/drivers/staging/lustre/lustre/include/lustre_disk.h b/drivers/staging/lustre/lustre/include/lustre_disk.h
index 95fd36063f55..8886458748c1 100644
--- a/drivers/staging/lustre/lustre/include/lustre_disk.h
+++ b/drivers/staging/lustre/lustre/include/lustre_disk.h
@@ -15,11 +15,7 @@
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
*
* GPL HEADER END
*/
@@ -130,7 +126,6 @@ struct lustre_sb_info {
struct lustre_mount_data *lsi_lmd; /* mount command info */
struct ll_sb_info *lsi_llsbi; /* add'l client sbi info */
struct dt_device *lsi_dt_dev; /* dt device to access disk fs*/
- struct vfsmount *lsi_srv_mnt; /* the one server mount */
atomic_t lsi_mounts; /* references to the srv_mnt */
char lsi_svname[MTI_NAME_MAXLEN];
char lsi_osd_obdname[64];
@@ -158,7 +153,6 @@ struct lustre_sb_info {
struct lustre_mount_info {
char *lmi_name;
struct super_block *lmi_sb;
- struct vfsmount *lmi_mnt;
struct list_head lmi_list_chain;
};
diff --git a/drivers/staging/lustre/lustre/include/lustre_dlm.h b/drivers/staging/lustre/lustre/include/lustre_dlm.h
index 8b0364f71129..60051a5cfe20 100644
--- a/drivers/staging/lustre/lustre/include/lustre_dlm.h
+++ b/drivers/staging/lustre/lustre/include/lustre_dlm.h
@@ -15,11 +15,7 @@
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
*
* GPL HEADER END
*/
@@ -71,6 +67,7 @@ struct obd_device;
*/
enum ldlm_error {
ELDLM_OK = 0,
+ ELDLM_LOCK_MATCHED = 1,
ELDLM_LOCK_CHANGED = 300,
ELDLM_LOCK_ABORTED = 301,
@@ -269,7 +266,7 @@ struct ldlm_pool {
struct completion pl_kobj_unregister;
};
-typedef int (*ldlm_cancel_for_recovery)(struct ldlm_lock *lock);
+typedef int (*ldlm_cancel_cbt)(struct ldlm_lock *lock);
/**
* LVB operations.
@@ -446,8 +443,11 @@ struct ldlm_namespace {
/** Limit of parallel AST RPC count. */
unsigned ns_max_parallel_ast;
- /** Callback to cancel locks before replaying it during recovery. */
- ldlm_cancel_for_recovery ns_cancel_for_recovery;
+ /**
+ * Callback to check if a lock is good to be canceled by ELC or
+ * during recovery.
+ */
+ ldlm_cancel_cbt ns_cancel;
/** LDLM lock stats */
struct lprocfs_stats *ns_stats;
@@ -479,9 +479,9 @@ static inline int ns_connect_lru_resize(struct ldlm_namespace *ns)
}
static inline void ns_register_cancel(struct ldlm_namespace *ns,
- ldlm_cancel_for_recovery arg)
+ ldlm_cancel_cbt arg)
{
- ns->ns_cancel_for_recovery = arg;
+ ns->ns_cancel = arg;
}
struct ldlm_lock;
@@ -1073,7 +1073,7 @@ void ldlm_lock2handle(const struct ldlm_lock *lock,
struct ldlm_lock *__ldlm_handle2lock(const struct lustre_handle *, __u64 flags);
void ldlm_cancel_callback(struct ldlm_lock *);
int ldlm_lock_remove_from_lru(struct ldlm_lock *);
-int ldlm_lock_set_data(struct lustre_handle *, void *);
+int ldlm_lock_set_data(const struct lustre_handle *lockh, void *data);
/**
* Obtain a lock reference by its handle.
@@ -1162,10 +1162,10 @@ do { \
struct ldlm_lock *ldlm_lock_get(struct ldlm_lock *lock);
void ldlm_lock_put(struct ldlm_lock *lock);
void ldlm_lock2desc(struct ldlm_lock *lock, struct ldlm_lock_desc *desc);
-void ldlm_lock_addref(struct lustre_handle *lockh, __u32 mode);
-int ldlm_lock_addref_try(struct lustre_handle *lockh, __u32 mode);
-void ldlm_lock_decref(struct lustre_handle *lockh, __u32 mode);
-void ldlm_lock_decref_and_cancel(struct lustre_handle *lockh, __u32 mode);
+void ldlm_lock_addref(const struct lustre_handle *lockh, __u32 mode);
+int ldlm_lock_addref_try(const struct lustre_handle *lockh, __u32 mode);
+void ldlm_lock_decref(const struct lustre_handle *lockh, __u32 mode);
+void ldlm_lock_decref_and_cancel(const struct lustre_handle *lockh, __u32 mode);
void ldlm_lock_fail_match_locked(struct ldlm_lock *lock);
void ldlm_lock_allow_match(struct ldlm_lock *lock);
void ldlm_lock_allow_match_locked(struct ldlm_lock *lock);
@@ -1174,10 +1174,10 @@ enum ldlm_mode ldlm_lock_match(struct ldlm_namespace *ns, __u64 flags,
enum ldlm_type type, ldlm_policy_data_t *,
enum ldlm_mode mode, struct lustre_handle *,
int unref);
-enum ldlm_mode ldlm_revalidate_lock_handle(struct lustre_handle *lockh,
+enum ldlm_mode ldlm_revalidate_lock_handle(const struct lustre_handle *lockh,
__u64 *bits);
void ldlm_lock_cancel(struct ldlm_lock *lock);
-void ldlm_lock_dump_handle(int level, struct lustre_handle *);
+void ldlm_lock_dump_handle(int level, const struct lustre_handle *);
void ldlm_unlink_lock_skiplist(struct ldlm_lock *req);
/* resource.c */
@@ -1251,9 +1251,9 @@ int ldlm_cli_enqueue_fini(struct obd_export *exp, struct ptlrpc_request *req,
enum ldlm_type type, __u8 with_policy,
enum ldlm_mode mode,
__u64 *flags, void *lvb, __u32 lvb_len,
- struct lustre_handle *lockh, int rc);
+ const struct lustre_handle *lockh, int rc);
int ldlm_cli_update_pool(struct ptlrpc_request *req);
-int ldlm_cli_cancel(struct lustre_handle *lockh,
+int ldlm_cli_cancel(const struct lustre_handle *lockh,
enum ldlm_cancel_flags cancel_flags);
int ldlm_cli_cancel_unused(struct ldlm_namespace *, const struct ldlm_res_id *,
enum ldlm_cancel_flags flags, void *opaque);
diff --git a/drivers/staging/lustre/lustre/include/lustre_dlm_flags.h b/drivers/staging/lustre/lustre/include/lustre_dlm_flags.h
index 7f2ba2ffe0eb..e7e0c21a9b40 100644
--- a/drivers/staging/lustre/lustre/include/lustre_dlm_flags.h
+++ b/drivers/staging/lustre/lustre/include/lustre_dlm_flags.h
@@ -37,17 +37,11 @@
/** l_flags bits marked as "gone" bits */
#define LDLM_FL_GONE_MASK 0x0006004000000000ULL
-/** l_flags bits marked as "hide_lock" bits */
-#define LDLM_FL_HIDE_LOCK_MASK 0x0000206400000000ULL
-
/** l_flags bits marked as "inherit" bits */
#define LDLM_FL_INHERIT_MASK 0x0000000000800000ULL
-/** l_flags bits marked as "local_only" bits */
-#define LDLM_FL_LOCAL_ONLY_MASK 0x00FFFFFF00000000ULL
-
-/** l_flags bits marked as "on_wire" bits */
-#define LDLM_FL_ON_WIRE_MASK 0x00000000C08F932FULL
+/** l_flags bits marked as "off_wire" bits */
+#define LDLM_FL_OFF_WIRE_MASK 0x00FFFFFF00000000ULL
/** extent, mode, or resource changed */
#define LDLM_FL_LOCK_CHANGED 0x0000000000000001ULL /* bit 0 */
@@ -204,7 +198,7 @@
#define ldlm_set_cancel(_l) LDLM_SET_FLAG((_l), 1ULL << 36)
#define ldlm_clear_cancel(_l) LDLM_CLEAR_FLAG((_l), 1ULL << 36)
-/** whatever it might mean */
+/** whatever it might mean -- never transmitted? */
#define LDLM_FL_LOCAL_ONLY 0x0000002000000000ULL /* bit 37 */
#define ldlm_is_local_only(_l) LDLM_TEST_FLAG((_l), 1ULL << 37)
#define ldlm_set_local_only(_l) LDLM_SET_FLAG((_l), 1ULL << 37)
@@ -287,18 +281,18 @@
* has canceled this lock and is waiting for rpc_lock which is taken by
* the first operation. LDLM_FL_BL_AST is set by ldlm_callback_handler() in
* the lock to prevent the Early Lock Cancel (ELC) code from cancelling it.
- *
- * LDLM_FL_BL_DONE is to be set by ldlm_cancel_callback() when lock cache is
- * dropped to let ldlm_callback_handler() return EINVAL to the server. It
- * is used when ELC RPC is already prepared and is waiting for rpc_lock,
- * too late to send a separate CANCEL RPC.
*/
#define LDLM_FL_BL_AST 0x0000400000000000ULL /* bit 46 */
#define ldlm_is_bl_ast(_l) LDLM_TEST_FLAG((_l), 1ULL << 46)
#define ldlm_set_bl_ast(_l) LDLM_SET_FLAG((_l), 1ULL << 46)
#define ldlm_clear_bl_ast(_l) LDLM_CLEAR_FLAG((_l), 1ULL << 46)
-/** whatever it might mean */
+/**
+ * Set by ldlm_cancel_callback() when lock cache is dropped to let
+ * ldlm_callback_handler() return EINVAL to the server. It is used when
+ * ELC RPC is already prepared and is waiting for rpc_lock, too late to
+ * send a separate CANCEL RPC.
+ */
#define LDLM_FL_BL_DONE 0x0000800000000000ULL /* bit 47 */
#define ldlm_is_bl_done(_l) LDLM_TEST_FLAG((_l), 1ULL << 47)
#define ldlm_set_bl_done(_l) LDLM_SET_FLAG((_l), 1ULL << 47)
@@ -381,104 +375,16 @@
/** test for ldlm_lock flag bit set */
#define LDLM_TEST_FLAG(_l, _b) (((_l)->l_flags & (_b)) != 0)
+/** multi-bit test: are any of mask bits set? */
+#define LDLM_HAVE_MASK(_l, _m) ((_l)->l_flags & LDLM_FL_##_m##_MASK)
+
/** set a ldlm_lock flag bit */
#define LDLM_SET_FLAG(_l, _b) ((_l)->l_flags |= (_b))
/** clear a ldlm_lock flag bit */
#define LDLM_CLEAR_FLAG(_l, _b) ((_l)->l_flags &= ~(_b))
-/** Mask of flags inherited from parent lock when doing intents. */
-#define LDLM_INHERIT_FLAGS LDLM_FL_INHERIT_MASK
-
-/** Mask of Flags sent in AST lock_flags to map into the receiving lock. */
-#define LDLM_AST_FLAGS LDLM_FL_AST_MASK
-
/** @} subgroup */
/** @} group */
-#ifdef WIRESHARK_COMPILE
-static int hf_lustre_ldlm_fl_lock_changed = -1;
-static int hf_lustre_ldlm_fl_block_granted = -1;
-static int hf_lustre_ldlm_fl_block_conv = -1;
-static int hf_lustre_ldlm_fl_block_wait = -1;
-static int hf_lustre_ldlm_fl_ast_sent = -1;
-static int hf_lustre_ldlm_fl_replay = -1;
-static int hf_lustre_ldlm_fl_intent_only = -1;
-static int hf_lustre_ldlm_fl_has_intent = -1;
-static int hf_lustre_ldlm_fl_flock_deadlock = -1;
-static int hf_lustre_ldlm_fl_discard_data = -1;
-static int hf_lustre_ldlm_fl_no_timeout = -1;
-static int hf_lustre_ldlm_fl_block_nowait = -1;
-static int hf_lustre_ldlm_fl_test_lock = -1;
-static int hf_lustre_ldlm_fl_cancel_on_block = -1;
-static int hf_lustre_ldlm_fl_deny_on_contention = -1;
-static int hf_lustre_ldlm_fl_ast_discard_data = -1;
-static int hf_lustre_ldlm_fl_fail_loc = -1;
-static int hf_lustre_ldlm_fl_skipped = -1;
-static int hf_lustre_ldlm_fl_cbpending = -1;
-static int hf_lustre_ldlm_fl_wait_noreproc = -1;
-static int hf_lustre_ldlm_fl_cancel = -1;
-static int hf_lustre_ldlm_fl_local_only = -1;
-static int hf_lustre_ldlm_fl_failed = -1;
-static int hf_lustre_ldlm_fl_canceling = -1;
-static int hf_lustre_ldlm_fl_local = -1;
-static int hf_lustre_ldlm_fl_lvb_ready = -1;
-static int hf_lustre_ldlm_fl_kms_ignore = -1;
-static int hf_lustre_ldlm_fl_cp_reqd = -1;
-static int hf_lustre_ldlm_fl_cleaned = -1;
-static int hf_lustre_ldlm_fl_atomic_cb = -1;
-static int hf_lustre_ldlm_fl_bl_ast = -1;
-static int hf_lustre_ldlm_fl_bl_done = -1;
-static int hf_lustre_ldlm_fl_no_lru = -1;
-static int hf_lustre_ldlm_fl_fail_notified = -1;
-static int hf_lustre_ldlm_fl_destroyed = -1;
-static int hf_lustre_ldlm_fl_server_lock = -1;
-static int hf_lustre_ldlm_fl_res_locked = -1;
-static int hf_lustre_ldlm_fl_waited = -1;
-static int hf_lustre_ldlm_fl_ns_srv = -1;
-static int hf_lustre_ldlm_fl_excl = -1;
-
-const value_string lustre_ldlm_flags_vals[] = {
- {LDLM_FL_LOCK_CHANGED, "LDLM_FL_LOCK_CHANGED"},
- {LDLM_FL_BLOCK_GRANTED, "LDLM_FL_BLOCK_GRANTED"},
- {LDLM_FL_BLOCK_CONV, "LDLM_FL_BLOCK_CONV"},
- {LDLM_FL_BLOCK_WAIT, "LDLM_FL_BLOCK_WAIT"},
- {LDLM_FL_AST_SENT, "LDLM_FL_AST_SENT"},
- {LDLM_FL_REPLAY, "LDLM_FL_REPLAY"},
- {LDLM_FL_INTENT_ONLY, "LDLM_FL_INTENT_ONLY"},
- {LDLM_FL_HAS_INTENT, "LDLM_FL_HAS_INTENT"},
- {LDLM_FL_FLOCK_DEADLOCK, "LDLM_FL_FLOCK_DEADLOCK"},
- {LDLM_FL_DISCARD_DATA, "LDLM_FL_DISCARD_DATA"},
- {LDLM_FL_NO_TIMEOUT, "LDLM_FL_NO_TIMEOUT"},
- {LDLM_FL_BLOCK_NOWAIT, "LDLM_FL_BLOCK_NOWAIT"},
- {LDLM_FL_TEST_LOCK, "LDLM_FL_TEST_LOCK"},
- {LDLM_FL_CANCEL_ON_BLOCK, "LDLM_FL_CANCEL_ON_BLOCK"},
- {LDLM_FL_DENY_ON_CONTENTION, "LDLM_FL_DENY_ON_CONTENTION"},
- {LDLM_FL_AST_DISCARD_DATA, "LDLM_FL_AST_DISCARD_DATA"},
- {LDLM_FL_FAIL_LOC, "LDLM_FL_FAIL_LOC"},
- {LDLM_FL_SKIPPED, "LDLM_FL_SKIPPED"},
- {LDLM_FL_CBPENDING, "LDLM_FL_CBPENDING"},
- {LDLM_FL_WAIT_NOREPROC, "LDLM_FL_WAIT_NOREPROC"},
- {LDLM_FL_CANCEL, "LDLM_FL_CANCEL"},
- {LDLM_FL_LOCAL_ONLY, "LDLM_FL_LOCAL_ONLY"},
- {LDLM_FL_FAILED, "LDLM_FL_FAILED"},
- {LDLM_FL_CANCELING, "LDLM_FL_CANCELING"},
- {LDLM_FL_LOCAL, "LDLM_FL_LOCAL"},
- {LDLM_FL_LVB_READY, "LDLM_FL_LVB_READY"},
- {LDLM_FL_KMS_IGNORE, "LDLM_FL_KMS_IGNORE"},
- {LDLM_FL_CP_REQD, "LDLM_FL_CP_REQD"},
- {LDLM_FL_CLEANED, "LDLM_FL_CLEANED"},
- {LDLM_FL_ATOMIC_CB, "LDLM_FL_ATOMIC_CB"},
- {LDLM_FL_BL_AST, "LDLM_FL_BL_AST"},
- {LDLM_FL_BL_DONE, "LDLM_FL_BL_DONE"},
- {LDLM_FL_NO_LRU, "LDLM_FL_NO_LRU"},
- {LDLM_FL_FAIL_NOTIFIED, "LDLM_FL_FAIL_NOTIFIED"},
- {LDLM_FL_DESTROYED, "LDLM_FL_DESTROYED"},
- {LDLM_FL_SERVER_LOCK, "LDLM_FL_SERVER_LOCK"},
- {LDLM_FL_RES_LOCKED, "LDLM_FL_RES_LOCKED"},
- {LDLM_FL_WAITED, "LDLM_FL_WAITED"},
- {LDLM_FL_NS_SRV, "LDLM_FL_NS_SRV"},
- {LDLM_FL_EXCL, "LDLM_FL_EXCL"},
- { 0, NULL }
-};
-#endif /* WIRESHARK_COMPILE */
+
#endif /* LDLM_ALL_FLAGS_MASK */
diff --git a/drivers/staging/lustre/lustre/include/lustre_eacl.h b/drivers/staging/lustre/lustre/include/lustre_eacl.h
index 0b66593a9526..d1039e1ff70d 100644
--- a/drivers/staging/lustre/lustre/include/lustre_eacl.h
+++ b/drivers/staging/lustre/lustre/include/lustre_eacl.h
@@ -15,11 +15,7 @@
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
*
* GPL HEADER END
*/
@@ -70,17 +66,6 @@ typedef struct {
#define CFS_ACL_XATTR_COUNT(size, prefix) \
(((size) - sizeof(prefix ## _header)) / sizeof(prefix ## _entry))
-extern ext_acl_xattr_header *
-lustre_posix_acl_xattr_2ext(posix_acl_xattr_header *header, int size);
-extern int
-lustre_posix_acl_xattr_filter(posix_acl_xattr_header *header, size_t size,
- posix_acl_xattr_header **out);
-extern void
-lustre_ext_acl_xattr_free(ext_acl_xattr_header *header);
-extern ext_acl_xattr_header *
-lustre_acl_xattr_merge2ext(posix_acl_xattr_header *posix_header, int size,
- ext_acl_xattr_header *ext_header);
-
#endif /* CONFIG_FS_POSIX_ACL */
/** @} eacl */
diff --git a/drivers/staging/lustre/lustre/include/lustre_export.h b/drivers/staging/lustre/lustre/include/lustre_export.h
index 3014d27e6dc2..6e7cc4689fb8 100644
--- a/drivers/staging/lustre/lustre/include/lustre_export.h
+++ b/drivers/staging/lustre/lustre/include/lustre_export.h
@@ -15,11 +15,7 @@
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
*
* GPL HEADER END
*/
@@ -180,19 +176,6 @@ static inline int exp_connect_lru_resize(struct obd_export *exp)
return !!(exp_connect_flags(exp) & OBD_CONNECT_LRU_RESIZE);
}
-static inline int exp_connect_rmtclient(struct obd_export *exp)
-{
- return !!(exp_connect_flags(exp) & OBD_CONNECT_RMT_CLIENT);
-}
-
-static inline int client_is_remote(struct obd_export *exp)
-{
- struct obd_import *imp = class_exp2cliimp(exp);
-
- return !!(imp->imp_connect_data.ocd_connect_flags &
- OBD_CONNECT_RMT_CLIENT);
-}
-
static inline int exp_connect_vbr(struct obd_export *exp)
{
return !!(exp_connect_flags(exp) & OBD_CONNECT_VBR);
diff --git a/drivers/staging/lustre/lustre/include/lustre_fid.h b/drivers/staging/lustre/lustre/include/lustre_fid.h
index ab4a92390a43..743671a547ef 100644
--- a/drivers/staging/lustre/lustre/include/lustre_fid.h
+++ b/drivers/staging/lustre/lustre/include/lustre_fid.h
@@ -15,11 +15,7 @@
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
*
* GPL HEADER END
*/
@@ -45,7 +41,7 @@
*
* @{
*
- * http://wiki.lustre.org/index.php/Architecture_-_Interoperability_fids_zfs
+ * http://wiki.old.lustre.org/index.php/Architecture_-_Interoperability_fids_zfs
* describes the FID namespace and interoperability requirements for FIDs.
* The important parts of that document are included here for reference.
*
@@ -308,10 +304,10 @@ static inline int fid_seq_in_fldb(__u64 seq)
fid_seq_is_root(seq) || fid_seq_is_dot(seq);
}
-static inline void lu_last_id_fid(struct lu_fid *fid, __u64 seq)
+static inline void lu_last_id_fid(struct lu_fid *fid, __u64 seq, __u32 ost_idx)
{
if (fid_seq_is_mdt0(seq)) {
- fid->f_seq = fid_idif_seq(0, 0);
+ fid->f_seq = fid_idif_seq(0, ost_idx);
} else {
LASSERTF(fid_seq_is_norm(seq) || fid_seq_is_echo(seq) ||
fid_seq_is_idif(seq), "%#llx\n", seq);
@@ -498,19 +494,6 @@ static inline void ostid_build_res_name(struct ost_id *oi,
}
}
-static inline void ostid_res_name_to_id(struct ost_id *oi,
- struct ldlm_res_id *name)
-{
- if (fid_seq_is_mdt0(name->name[LUSTRE_RES_ID_SEQ_OFF])) {
- /* old resid */
- ostid_set_seq(oi, name->name[LUSTRE_RES_ID_VER_OID_OFF]);
- ostid_set_id(oi, name->name[LUSTRE_RES_ID_SEQ_OFF]);
- } else {
- /* new resid */
- fid_extract_from_res_name(&oi->oi_fid, name);
- }
-}
-
/**
* Return true if the resource is for the object identified by this id & group.
*/
@@ -546,7 +529,8 @@ static inline void ost_fid_build_resid(const struct lu_fid *fid,
}
static inline void ost_fid_from_resid(struct lu_fid *fid,
- const struct ldlm_res_id *name)
+ const struct ldlm_res_id *name,
+ int ost_idx)
{
if (fid_seq_is_mdt0(name->name[LUSTRE_RES_ID_VER_OID_OFF])) {
/* old resid */
@@ -554,7 +538,7 @@ static inline void ost_fid_from_resid(struct lu_fid *fid,
ostid_set_seq(&oi, name->name[LUSTRE_RES_ID_VER_OID_OFF]);
ostid_set_id(&oi, name->name[LUSTRE_RES_ID_SEQ_OFF]);
- ostid_to_fid(fid, &oi, 0);
+ ostid_to_fid(fid, &oi, ost_idx);
} else {
/* new resid */
fid_extract_from_res_name(fid, name);
diff --git a/drivers/staging/lustre/lustre/include/lustre_fld.h b/drivers/staging/lustre/lustre/include/lustre_fld.h
index 4cf2b0e61672..932410d3e3cc 100644
--- a/drivers/staging/lustre/lustre/include/lustre_fld.h
+++ b/drivers/staging/lustre/lustre/include/lustre_fld.h
@@ -15,11 +15,7 @@
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
*
* GPL HEADER END
*/
diff --git a/drivers/staging/lustre/lustre/include/lustre_ha.h b/drivers/staging/lustre/lustre/include/lustre_ha.h
index 5488a698dabd..cde7ed702c86 100644
--- a/drivers/staging/lustre/lustre/include/lustre_ha.h
+++ b/drivers/staging/lustre/lustre/include/lustre_ha.h
@@ -15,11 +15,7 @@
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
*
* GPL HEADER END
*/
diff --git a/drivers/staging/lustre/lustre/include/lustre_handles.h b/drivers/staging/lustre/lustre/include/lustre_handles.h
index 27f169d2ed34..1a63a6b9e116 100644
--- a/drivers/staging/lustre/lustre/include/lustre_handles.h
+++ b/drivers/staging/lustre/lustre/include/lustre_handles.h
@@ -15,11 +15,7 @@
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
*
* GPL HEADER END
*/
diff --git a/drivers/staging/lustre/lustre/include/lustre_import.h b/drivers/staging/lustre/lustre/include/lustre_import.h
index dac2d84d8266..4445be7a59dd 100644
--- a/drivers/staging/lustre/lustre/include/lustre_import.h
+++ b/drivers/staging/lustre/lustre/include/lustre_import.h
@@ -15,11 +15,7 @@
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
*
* GPL HEADER END
*/
@@ -109,7 +105,7 @@ static inline char *ptlrpc_import_state_name(enum lustre_imp_state state)
"RECOVER", "FULL", "EVICTED",
};
- LASSERT (state <= LUSTRE_IMP_EVICTED);
+ LASSERT(state <= LUSTRE_IMP_EVICTED);
return import_state_names[state];
}
diff --git a/drivers/staging/lustre/lustre/include/lustre_intent.h b/drivers/staging/lustre/lustre/include/lustre_intent.h
index c491d52d86a2..ed2b6c674109 100644
--- a/drivers/staging/lustre/lustre/include/lustre_intent.h
+++ b/drivers/staging/lustre/lustre/include/lustre_intent.h
@@ -15,11 +15,7 @@
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
*
* GPL HEADER END
*/
@@ -38,7 +34,11 @@
#define LUSTRE_INTENT_H
/* intent IT_XXX are defined in lustre/include/obd.h */
-struct lustre_intent_data {
+
+struct lookup_intent {
+ int it_op;
+ int it_create_mode;
+ __u64 it_flags;
int it_disposition;
int it_status;
__u64 it_lock_handle;
@@ -46,17 +46,23 @@ struct lustre_intent_data {
int it_lock_mode;
int it_remote_lock_mode;
__u64 it_remote_lock_handle;
- void *it_data;
+ struct ptlrpc_request *it_request;
unsigned int it_lock_set:1;
};
-struct lookup_intent {
- int it_op;
- int it_create_mode;
- __u64 it_flags;
- union {
- struct lustre_intent_data lustre;
- } d;
-};
+static inline int it_disposition(struct lookup_intent *it, int flag)
+{
+ return it->it_disposition & flag;
+}
+
+static inline void it_set_disposition(struct lookup_intent *it, int flag)
+{
+ it->it_disposition |= flag;
+}
+
+static inline void it_clear_disposition(struct lookup_intent *it, int flag)
+{
+ it->it_disposition &= ~flag;
+}
#endif
diff --git a/drivers/staging/lustre/lustre/include/lustre_lib.h b/drivers/staging/lustre/lustre/include/lustre_lib.h
index f2223d55850a..06958f217fc8 100644
--- a/drivers/staging/lustre/lustre/include/lustre_lib.h
+++ b/drivers/staging/lustre/lustre/include/lustre_lib.h
@@ -15,11 +15,7 @@
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
*
* GPL HEADER END
*/
@@ -280,16 +276,16 @@ static inline void obd_ioctl_freedata(char *buf, int len)
#define OBD_IOC_DATA_TYPE long
#define OBD_IOC_CREATE _IOWR('f', 101, OBD_IOC_DATA_TYPE)
-#define OBD_IOC_DESTROY _IOW ('f', 104, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_DESTROY _IOW('f', 104, OBD_IOC_DATA_TYPE)
#define OBD_IOC_PREALLOCATE _IOWR('f', 105, OBD_IOC_DATA_TYPE)
-#define OBD_IOC_SETATTR _IOW ('f', 107, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_SETATTR _IOW('f', 107, OBD_IOC_DATA_TYPE)
#define OBD_IOC_GETATTR _IOWR ('f', 108, OBD_IOC_DATA_TYPE)
#define OBD_IOC_READ _IOWR('f', 109, OBD_IOC_DATA_TYPE)
#define OBD_IOC_WRITE _IOWR('f', 110, OBD_IOC_DATA_TYPE)
#define OBD_IOC_STATFS _IOWR('f', 113, OBD_IOC_DATA_TYPE)
-#define OBD_IOC_SYNC _IOW ('f', 114, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_SYNC _IOW('f', 114, OBD_IOC_DATA_TYPE)
#define OBD_IOC_READ2 _IOWR('f', 115, OBD_IOC_DATA_TYPE)
#define OBD_IOC_FORMAT _IOWR('f', 116, OBD_IOC_DATA_TYPE)
#define OBD_IOC_PARTITION _IOWR('f', 117, OBD_IOC_DATA_TYPE)
@@ -308,13 +304,13 @@ static inline void obd_ioctl_freedata(char *buf, int len)
#define OBD_IOC_GETDTNAME OBD_IOC_GETNAME
#define OBD_IOC_LOV_GET_CONFIG _IOWR('f', 132, OBD_IOC_DATA_TYPE)
-#define OBD_IOC_CLIENT_RECOVER _IOW ('f', 133, OBD_IOC_DATA_TYPE)
-#define OBD_IOC_PING_TARGET _IOW ('f', 136, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_CLIENT_RECOVER _IOW('f', 133, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_PING_TARGET _IOW('f', 136, OBD_IOC_DATA_TYPE)
#define OBD_IOC_DEC_FS_USE_COUNT _IO ('f', 139)
-#define OBD_IOC_NO_TRANSNO _IOW ('f', 140, OBD_IOC_DATA_TYPE)
-#define OBD_IOC_SET_READONLY _IOW ('f', 141, OBD_IOC_DATA_TYPE)
-#define OBD_IOC_ABORT_RECOVERY _IOR ('f', 142, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_NO_TRANSNO _IOW('f', 140, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_SET_READONLY _IOW('f', 141, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_ABORT_RECOVERY _IOR('f', 142, OBD_IOC_DATA_TYPE)
#define OBD_IOC_ROOT_SQUASH _IOWR('f', 143, OBD_IOC_DATA_TYPE)
@@ -324,27 +320,27 @@ static inline void obd_ioctl_freedata(char *buf, int len)
#define OBD_IOC_CLOSE_UUID _IOWR ('f', 147, OBD_IOC_DATA_TYPE)
-#define OBD_IOC_CHANGELOG_SEND _IOW ('f', 148, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_CHANGELOG_SEND _IOW('f', 148, OBD_IOC_DATA_TYPE)
#define OBD_IOC_GETDEVICE _IOWR ('f', 149, OBD_IOC_DATA_TYPE)
#define OBD_IOC_FID2PATH _IOWR ('f', 150, OBD_IOC_DATA_TYPE)
/* see also <lustre/lustre_user.h> for ioctls 151-153 */
/* OBD_IOC_LOV_SETSTRIPE: See also LL_IOC_LOV_SETSTRIPE */
-#define OBD_IOC_LOV_SETSTRIPE _IOW ('f', 154, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_LOV_SETSTRIPE _IOW('f', 154, OBD_IOC_DATA_TYPE)
/* OBD_IOC_LOV_GETSTRIPE: See also LL_IOC_LOV_GETSTRIPE */
-#define OBD_IOC_LOV_GETSTRIPE _IOW ('f', 155, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_LOV_GETSTRIPE _IOW('f', 155, OBD_IOC_DATA_TYPE)
/* OBD_IOC_LOV_SETEA: See also LL_IOC_LOV_SETEA */
-#define OBD_IOC_LOV_SETEA _IOW ('f', 156, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_LOV_SETEA _IOW('f', 156, OBD_IOC_DATA_TYPE)
/* see <lustre/lustre_user.h> for ioctls 157-159 */
/* OBD_IOC_QUOTACHECK: See also LL_IOC_QUOTACHECK */
-#define OBD_IOC_QUOTACHECK _IOW ('f', 160, int)
+#define OBD_IOC_QUOTACHECK _IOW('f', 160, int)
/* OBD_IOC_POLL_QUOTACHECK: See also LL_IOC_POLL_QUOTACHECK */
-#define OBD_IOC_POLL_QUOTACHECK _IOR ('f', 161, struct if_quotacheck *)
+#define OBD_IOC_POLL_QUOTACHECK _IOR('f', 161, struct if_quotacheck *)
/* OBD_IOC_QUOTACTL: See also LL_IOC_QUOTACTL */
#define OBD_IOC_QUOTACTL _IOWR('f', 162, struct if_quotactl)
/* see also <lustre/lustre_user.h> for ioctls 163-176 */
-#define OBD_IOC_CHANGELOG_REG _IOW ('f', 177, struct obd_ioctl_data)
-#define OBD_IOC_CHANGELOG_DEREG _IOW ('f', 178, struct obd_ioctl_data)
-#define OBD_IOC_CHANGELOG_CLEAR _IOW ('f', 179, struct obd_ioctl_data)
+#define OBD_IOC_CHANGELOG_REG _IOW('f', 177, struct obd_ioctl_data)
+#define OBD_IOC_CHANGELOG_DEREG _IOW('f', 178, struct obd_ioctl_data)
+#define OBD_IOC_CHANGELOG_CLEAR _IOW('f', 179, struct obd_ioctl_data)
#define OBD_IOC_RECORD _IOWR('f', 180, OBD_IOC_DATA_TYPE)
#define OBD_IOC_ENDRECORD _IOWR('f', 181, OBD_IOC_DATA_TYPE)
#define OBD_IOC_PARSE _IOWR('f', 182, OBD_IOC_DATA_TYPE)
@@ -352,7 +348,7 @@ static inline void obd_ioctl_freedata(char *buf, int len)
#define OBD_IOC_PROCESS_CFG _IOWR('f', 184, OBD_IOC_DATA_TYPE)
#define OBD_IOC_DUMP_LOG _IOWR('f', 185, OBD_IOC_DATA_TYPE)
#define OBD_IOC_CLEAR_LOG _IOWR('f', 186, OBD_IOC_DATA_TYPE)
-#define OBD_IOC_PARAM _IOW ('f', 187, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_PARAM _IOW('f', 187, OBD_IOC_DATA_TYPE)
#define OBD_IOC_POOL _IOWR('f', 188, OBD_IOC_DATA_TYPE)
#define OBD_IOC_REPLACE_NIDS _IOWR('f', 189, OBD_IOC_DATA_TYPE)
@@ -522,6 +518,28 @@ struct l_wait_info {
sigmask(SIGTERM) | sigmask(SIGQUIT) | \
sigmask(SIGALRM))
+/**
+ * wait_queue_t of Linux (version < 2.6.34) is a FIFO list for exclusively
+ * waiting threads, which is not always desirable because all threads will
+ * be waken up again and again, even user only needs a few of them to be
+ * active most time. This is not good for performance because cache can
+ * be polluted by different threads.
+ *
+ * LIFO list can resolve this problem because we always wakeup the most
+ * recent active thread by default.
+ *
+ * NB: please don't call non-exclusive & exclusive wait on the same
+ * waitq if add_wait_queue_exclusive_head is used.
+ */
+#define add_wait_queue_exclusive_head(waitq, link) \
+{ \
+ unsigned long flags; \
+ \
+ spin_lock_irqsave(&((waitq)->lock), flags); \
+ __add_wait_queue_exclusive(waitq, link); \
+ spin_unlock_irqrestore(&((waitq)->lock), flags); \
+}
+
/*
* wait for @condition to become true, but no longer than timeout, specified
* by @info.
@@ -578,7 +596,7 @@ do { \
\
if (condition) \
break; \
- if (cfs_signal_pending()) { \
+ if (signal_pending(current)) { \
if (info->lwi_on_signal && \
(__timeout == 0 || __allow_intr)) { \
if (info->lwi_on_signal != LWI_ON_SIGNAL_NOOP) \
diff --git a/drivers/staging/lustre/lustre/include/lustre_lite.h b/drivers/staging/lustre/lustre/include/lustre_lite.h
index fcc5ebbceed8..b16897702559 100644
--- a/drivers/staging/lustre/lustre/include/lustre_lite.h
+++ b/drivers/staging/lustre/lustre/include/lustre_lite.h
@@ -15,11 +15,7 @@
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
*
* GPL HEADER END
*/
diff --git a/drivers/staging/lustre/lustre/include/lustre_log.h b/drivers/staging/lustre/lustre/include/lustre_log.h
index 49618e186824..b96e02317bfc 100644
--- a/drivers/staging/lustre/lustre/include/lustre_log.h
+++ b/drivers/staging/lustre/lustre/include/lustre_log.h
@@ -15,11 +15,7 @@
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
*
* GPL HEADER END
*/
diff --git a/drivers/staging/lustre/lustre/include/lustre_mdc.h b/drivers/staging/lustre/lustre/include/lustre_mdc.h
index af77eb359c43..fa62b95d351f 100644
--- a/drivers/staging/lustre/lustre/include/lustre_mdc.h
+++ b/drivers/staging/lustre/lustre/include/lustre_mdc.h
@@ -15,11 +15,7 @@
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
*
* GPL HEADER END
*/
@@ -64,9 +60,27 @@ struct obd_export;
struct ptlrpc_request;
struct obd_device;
+/**
+ * Serializes in-flight MDT-modifying RPC requests to preserve idempotency.
+ *
+ * This mutex is used to implement execute-once semantics on the MDT.
+ * The MDT stores the last transaction ID and result for every client in
+ * its last_rcvd file. If the client doesn't get a reply, it can safely
+ * resend the request and the MDT will reconstruct the reply being aware
+ * that the request has already been executed. Without this lock,
+ * execution status of concurrent in-flight requests would be
+ * overwritten.
+ *
+ * This design limits the extent to which we can keep a full pipeline of
+ * in-flight requests from a single client. This limitation could be
+ * overcome by allowing multiple slots per client in the last_rcvd file.
+ */
struct mdc_rpc_lock {
+ /** Lock protecting in-flight RPC concurrency. */
struct mutex rpcl_mutex;
+ /** Intent associated with currently executing request. */
struct lookup_intent *rpcl_it;
+ /** Used for MDS/RPC load testing purposes. */
int rpcl_fakes;
};
@@ -171,9 +185,6 @@ struct mdc_cache_waiter {
};
/* mdc/mdc_locks.c */
-int it_disposition(struct lookup_intent *it, int flag);
-void it_clear_disposition(struct lookup_intent *it, int flag);
-void it_set_disposition(struct lookup_intent *it, int flag);
int it_open_error(int phase, struct lookup_intent *it);
static inline bool cl_is_lov_delay_create(unsigned int flags)
diff --git a/drivers/staging/lustre/lustre/include/lustre_mds.h b/drivers/staging/lustre/lustre/include/lustre_mds.h
index 95d27ddecfb3..4104bd9bd5c4 100644
--- a/drivers/staging/lustre/lustre/include/lustre_mds.h
+++ b/drivers/staging/lustre/lustre/include/lustre_mds.h
@@ -15,11 +15,7 @@
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
*
* GPL HEADER END
*/
diff --git a/drivers/staging/lustre/lustre/include/lustre_net.h b/drivers/staging/lustre/lustre/include/lustre_net.h
index 69586a522eb7..d5debd615fdf 100644
--- a/drivers/staging/lustre/lustre/include/lustre_net.h
+++ b/drivers/staging/lustre/lustre/include/lustre_net.h
@@ -15,11 +15,7 @@
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
*
* GPL HEADER END
*/
@@ -270,6 +266,11 @@
/* Macro to hide a typecast. */
#define ptlrpc_req_async_args(req) ((void *)&req->rq_async_args)
+struct ptlrpc_replay_async_args {
+ int praa_old_state;
+ int praa_old_status;
+};
+
/**
* Structure to single define portal connection.
*/
@@ -479,8 +480,9 @@ enum rq_phase {
RQ_PHASE_BULK = 0xebc0de02,
RQ_PHASE_INTERPRET = 0xebc0de03,
RQ_PHASE_COMPLETE = 0xebc0de04,
- RQ_PHASE_UNREGISTERING = 0xebc0de05,
- RQ_PHASE_UNDEFINED = 0xebc0de06
+ RQ_PHASE_UNREG_RPC = 0xebc0de05,
+ RQ_PHASE_UNREG_BULK = 0xebc0de06,
+ RQ_PHASE_UNDEFINED = 0xebc0de07
};
/** Type of request interpreter call-back */
@@ -1247,22 +1249,103 @@ struct ptlrpc_hpreq_ops {
void (*hpreq_fini)(struct ptlrpc_request *);
};
-/**
- * Represents remote procedure call.
- *
- * This is a staple structure used by everybody wanting to send a request
- * in Lustre.
- */
-struct ptlrpc_request {
- /* Request type: one of PTL_RPC_MSG_* */
- int rq_type;
- /** Result of request processing */
- int rq_status;
+struct ptlrpc_cli_req {
+ /** For bulk requests on client only: bulk descriptor */
+ struct ptlrpc_bulk_desc *cr_bulk;
+ /** optional time limit for send attempts */
+ long cr_delay_limit;
+ /** time request was first queued */
+ time_t cr_queued_time;
+ /** request sent timeval */
+ struct timespec64 cr_sent_tv;
+ /** time for request really sent out */
+ time_t cr_sent_out;
+ /** when req reply unlink must finish. */
+ time_t cr_reply_deadline;
+ /** when req bulk unlink must finish. */
+ time_t cr_bulk_deadline;
+ /** when req unlink must finish. */
+ time_t cr_req_deadline;
+ /** Portal to which this request would be sent */
+ short cr_req_ptl;
+ /** Portal where to wait for reply and where reply would be sent */
+ short cr_rep_ptl;
+ /** request resending number */
+ unsigned int cr_resend_nr;
+ /** What was import generation when this request was sent */
+ int cr_imp_gen;
+ enum lustre_imp_state cr_send_state;
+ /** Per-request waitq introduced by bug 21938 for recovery waiting */
+ wait_queue_head_t cr_set_waitq;
+ /** Link item for request set lists */
+ struct list_head cr_set_chain;
+ /** link to waited ctx */
+ struct list_head cr_ctx_chain;
+
+ /** client's half ctx */
+ struct ptlrpc_cli_ctx *cr_cli_ctx;
+ /** Link back to the request set */
+ struct ptlrpc_request_set *cr_set;
+ /** outgoing request MD handle */
+ lnet_handle_md_t cr_req_md_h;
+ /** request-out callback parameter */
+ struct ptlrpc_cb_id cr_req_cbid;
+ /** incoming reply MD handle */
+ lnet_handle_md_t cr_reply_md_h;
+ wait_queue_head_t cr_reply_waitq;
+ /** reply callback parameter */
+ struct ptlrpc_cb_id cr_reply_cbid;
+ /** Async completion handler, called when reply is received */
+ ptlrpc_interpterer_t cr_reply_interp;
+ /** Async completion context */
+ union ptlrpc_async_args cr_async_args;
+ /** Opaq data for replay and commit callbacks. */
+ void *cr_cb_data;
/**
- * Linkage item through which this request is included into
- * sending/delayed lists on client and into rqbd list on server
+ * Commit callback, called when request is committed and about to be
+ * freed.
*/
- struct list_head rq_list;
+ void (*cr_commit_cb)(struct ptlrpc_request *);
+ /** Replay callback, called after request is replayed at recovery */
+ void (*cr_replay_cb)(struct ptlrpc_request *);
+};
+
+/** client request member alias */
+/* NB: these alias should NOT be used by any new code, instead they should
+ * be removed step by step to avoid potential abuse
+ */
+#define rq_bulk rq_cli.cr_bulk
+#define rq_delay_limit rq_cli.cr_delay_limit
+#define rq_queued_time rq_cli.cr_queued_time
+#define rq_sent_tv rq_cli.cr_sent_tv
+#define rq_real_sent rq_cli.cr_sent_out
+#define rq_reply_deadline rq_cli.cr_reply_deadline
+#define rq_bulk_deadline rq_cli.cr_bulk_deadline
+#define rq_req_deadline rq_cli.cr_req_deadline
+#define rq_nr_resend rq_cli.cr_resend_nr
+#define rq_request_portal rq_cli.cr_req_ptl
+#define rq_reply_portal rq_cli.cr_rep_ptl
+#define rq_import_generation rq_cli.cr_imp_gen
+#define rq_send_state rq_cli.cr_send_state
+#define rq_set_chain rq_cli.cr_set_chain
+#define rq_ctx_chain rq_cli.cr_ctx_chain
+#define rq_set rq_cli.cr_set
+#define rq_set_waitq rq_cli.cr_set_waitq
+#define rq_cli_ctx rq_cli.cr_cli_ctx
+#define rq_req_md_h rq_cli.cr_req_md_h
+#define rq_req_cbid rq_cli.cr_req_cbid
+#define rq_reply_md_h rq_cli.cr_reply_md_h
+#define rq_reply_waitq rq_cli.cr_reply_waitq
+#define rq_reply_cbid rq_cli.cr_reply_cbid
+#define rq_interpret_reply rq_cli.cr_reply_interp
+#define rq_async_args rq_cli.cr_async_args
+#define rq_cb_data rq_cli.cr_cb_data
+#define rq_commit_cb rq_cli.cr_commit_cb
+#define rq_replay_cb rq_cli.cr_replay_cb
+
+struct ptlrpc_srv_req {
+ /** initial thread servicing this request */
+ struct ptlrpc_thread *sr_svc_thread;
/**
* Server side list of incoming unserved requests sorted by arrival
* time. Traversed from time to time to notice about to expire
@@ -1270,32 +1353,86 @@ struct ptlrpc_request {
* know server is alive and well, just very busy to service their
* requests in time
*/
- struct list_head rq_timed_list;
- /** server-side history, used for debugging purposes. */
- struct list_head rq_history_list;
+ struct list_head sr_timed_list;
/** server-side per-export list */
- struct list_head rq_exp_list;
- /** server-side hp handlers */
- struct ptlrpc_hpreq_ops *rq_ops;
-
- /** initial thread servicing this request */
- struct ptlrpc_thread *rq_svc_thread;
-
+ struct list_head sr_exp_list;
+ /** server-side history, used for debuging purposes. */
+ struct list_head sr_hist_list;
/** history sequence # */
- __u64 rq_history_seq;
+ __u64 sr_hist_seq;
+ /** the index of service's srv_at_array into which request is linked */
+ time_t sr_at_index;
+ /** authed uid */
+ uid_t sr_auth_uid;
+ /** authed uid mapped to */
+ uid_t sr_auth_mapped_uid;
+ /** RPC is generated from what part of Lustre */
+ enum lustre_sec_part sr_sp_from;
+ /** request session context */
+ struct lu_context sr_ses;
/** \addtogroup nrs
* @{
*/
/** stub for NRS request */
- struct ptlrpc_nrs_request rq_nrq;
+ struct ptlrpc_nrs_request sr_nrq;
/** @} nrs */
- /** the index of service's srv_at_array into which request is linked */
- u32 rq_at_index;
+ /** request arrival time */
+ struct timespec64 sr_arrival_time;
+ /** server's half ctx */
+ struct ptlrpc_svc_ctx *sr_svc_ctx;
+ /** (server side), pointed directly into req buffer */
+ struct ptlrpc_user_desc *sr_user_desc;
+ /** separated reply state */
+ struct ptlrpc_reply_state *sr_reply_state;
+ /** server-side hp handlers */
+ struct ptlrpc_hpreq_ops *sr_ops;
+ /** incoming request buffer */
+ struct ptlrpc_request_buffer_desc *sr_rqbd;
+};
+
+/** server request member alias */
+/* NB: these alias should NOT be used by any new code, instead they should
+ * be removed step by step to avoid potential abuse
+ */
+#define rq_svc_thread rq_srv.sr_svc_thread
+#define rq_timed_list rq_srv.sr_timed_list
+#define rq_exp_list rq_srv.sr_exp_list
+#define rq_history_list rq_srv.sr_hist_list
+#define rq_history_seq rq_srv.sr_hist_seq
+#define rq_at_index rq_srv.sr_at_index
+#define rq_auth_uid rq_srv.sr_auth_uid
+#define rq_auth_mapped_uid rq_srv.sr_auth_mapped_uid
+#define rq_sp_from rq_srv.sr_sp_from
+#define rq_session rq_srv.sr_ses
+#define rq_nrq rq_srv.sr_nrq
+#define rq_arrival_time rq_srv.sr_arrival_time
+#define rq_reply_state rq_srv.sr_reply_state
+#define rq_svc_ctx rq_srv.sr_svc_ctx
+#define rq_user_desc rq_srv.sr_user_desc
+#define rq_ops rq_srv.sr_ops
+#define rq_rqbd rq_srv.sr_rqbd
+
+/**
+ * Represents remote procedure call.
+ *
+ * This is a staple structure used by everybody wanting to send a request
+ * in Lustre.
+ */
+struct ptlrpc_request {
+ /* Request type: one of PTL_RPC_MSG_* */
+ int rq_type;
+ /** Result of request processing */
+ int rq_status;
+ /**
+ * Linkage item through which this request is included into
+ * sending/delayed lists on client and into rqbd list on server
+ */
+ struct list_head rq_list;
/** Lock to protect request flags and some other important bits, like
* rq_list
*/
spinlock_t rq_lock;
- /** client-side flags are serialized by rq_lock */
+ /** client-side flags are serialized by rq_lock @{ */
unsigned int rq_intr:1, rq_replied:1, rq_err:1,
rq_timedout:1, rq_resend:1, rq_restart:1,
/**
@@ -1311,37 +1448,40 @@ struct ptlrpc_request {
rq_no_resend:1, rq_waiting:1, rq_receiving_reply:1,
rq_no_delay:1, rq_net_err:1, rq_wait_ctx:1,
rq_early:1,
- rq_req_unlink:1, rq_reply_unlink:1,
+ rq_req_unlinked:1, /* unlinked request buffer from lnet */
+ rq_reply_unlinked:1, /* unlinked reply buffer from lnet */
rq_memalloc:1, /* req originated from "kswapd" */
- /* server-side flags */
- rq_packed_final:1, /* packed final reply */
- rq_hp:1, /* high priority RPC */
- rq_at_linked:1, /* link into service's srv_at_array */
- rq_reply_truncate:1,
rq_committed:1,
- /* whether the "rq_set" is a valid one */
+ rq_reply_truncated:1,
+ /** whether the "rq_set" is a valid one */
rq_invalid_rqset:1,
rq_generation_set:1,
- /* do not resend request on -EINPROGRESS */
+ /** do not resend request on -EINPROGRESS */
rq_no_retry_einprogress:1,
/* allow the req to be sent if the import is in recovery
* status
*/
- rq_allow_replay:1;
-
- unsigned int rq_nr_resend;
-
- enum rq_phase rq_phase; /* one of RQ_PHASE_* */
- enum rq_phase rq_next_phase; /* one of RQ_PHASE_* to be used next */
- atomic_t rq_refcount; /* client-side refcount for SENT race,
- * server-side refcount for multiple replies
- */
+ rq_allow_replay:1,
+ /* bulk request, sent to server, but uncommitted */
+ rq_unstable:1;
+ /** @} */
- /** Portal to which this request would be sent */
- short rq_request_portal; /* XXX FIXME bug 249 */
- /** Portal where to wait for reply and where reply would be sent */
- short rq_reply_portal; /* XXX FIXME bug 249 */
+ /** server-side flags @{ */
+ unsigned int
+ rq_hp:1, /**< high priority RPC */
+ rq_at_linked:1, /**< link into service's srv_at_array */
+ rq_packed_final:1; /**< packed final reply */
+ /** @} */
+ /** one of RQ_PHASE_* */
+ enum rq_phase rq_phase;
+ /** one of RQ_PHASE_* to be used next */
+ enum rq_phase rq_next_phase;
+ /**
+ * client-side refcount for SENT race, server-side refcount
+ * for multiple replies
+ */
+ atomic_t rq_refcount;
/**
* client-side:
* !rq_truncate : # reply bytes actually received,
@@ -1352,6 +1492,8 @@ struct ptlrpc_request {
int rq_reqlen;
/** Reply length */
int rq_replen;
+ /** Pool if request is from preallocated list */
+ struct ptlrpc_request_pool *rq_pool;
/** Request message - what client sent */
struct lustre_msg *rq_reqmsg;
/** Reply message - server response */
@@ -1364,19 +1506,20 @@ struct ptlrpc_request {
* List item to for replay list. Not yet committed requests get linked
* there.
* Also see \a rq_replay comment above.
+ * It's also link chain on obd_export::exp_req_replay_queue
*/
struct list_head rq_replay_list;
-
+ /** non-shared members for client & server request*/
+ union {
+ struct ptlrpc_cli_req rq_cli;
+ struct ptlrpc_srv_req rq_srv;
+ };
/**
* security and encryption data
* @{
*/
- struct ptlrpc_cli_ctx *rq_cli_ctx; /**< client's half ctx */
- struct ptlrpc_svc_ctx *rq_svc_ctx; /**< server's half ctx */
- struct list_head rq_ctx_chain; /**< link to waited ctx */
-
- struct sptlrpc_flavor rq_flvr; /**< for client & server */
- enum lustre_sec_part rq_sp_from;
+ /** description of flavors for client & server */
+ struct sptlrpc_flavor rq_flvr;
/* client/server security flags */
unsigned int
@@ -1386,7 +1529,6 @@ struct ptlrpc_request {
rq_bulk_write:1, /* request bulk write */
/* server authentication flags */
rq_auth_gss:1, /* authenticated by gss */
- rq_auth_remote:1, /* authed as remote user */
rq_auth_usr_root:1, /* authed as root */
rq_auth_usr_mdt:1, /* authed as mdt */
rq_auth_usr_ost:1, /* authed as ost */
@@ -1395,19 +1537,15 @@ struct ptlrpc_request {
rq_pack_bulk:1,
/* doesn't expect reply FIXME */
rq_no_reply:1,
- rq_pill_init:1; /* pill initialized */
-
- uid_t rq_auth_uid; /* authed uid */
- uid_t rq_auth_mapped_uid; /* authed uid mapped to */
-
- /* (server side), pointed directly into req buffer */
- struct ptlrpc_user_desc *rq_user_desc;
-
- /* various buffer pointers */
- struct lustre_msg *rq_reqbuf; /* req wrapper */
- char *rq_repbuf; /* rep buffer */
- struct lustre_msg *rq_repdata; /* rep wrapper msg */
- struct lustre_msg *rq_clrbuf; /* only in priv mode */
+ rq_pill_init:1, /* pill initialized */
+ rq_srv_req:1; /* server request */
+
+ /** various buffer pointers */
+ struct lustre_msg *rq_reqbuf; /**< req wrapper */
+ char *rq_repbuf; /**< rep buffer */
+ struct lustre_msg *rq_repdata; /**< rep wrapper msg */
+ /** only in priv mode */
+ struct lustre_msg *rq_clrbuf;
int rq_reqbuf_len; /* req wrapper buf len */
int rq_reqdata_len; /* req wrapper msg len */
int rq_repbuf_len; /* rep buffer len */
@@ -1424,97 +1562,28 @@ struct ptlrpc_request {
__u32 rq_req_swab_mask;
__u32 rq_rep_swab_mask;
- /** What was import generation when this request was sent */
- int rq_import_generation;
- enum lustre_imp_state rq_send_state;
-
/** how many early replies (for stats) */
int rq_early_count;
- /** client+server request */
- lnet_handle_md_t rq_req_md_h;
- struct ptlrpc_cb_id rq_req_cbid;
- /** optional time limit for send attempts */
- long rq_delay_limit;
- /** time request was first queued */
- unsigned long rq_queued_time;
-
- /* server-side... */
- /** request arrival time */
- struct timespec64 rq_arrival_time;
- /** separated reply state */
- struct ptlrpc_reply_state *rq_reply_state;
- /** incoming request buffer */
- struct ptlrpc_request_buffer_desc *rq_rqbd;
-
- /** client-only incoming reply */
- lnet_handle_md_t rq_reply_md_h;
- wait_queue_head_t rq_reply_waitq;
- struct ptlrpc_cb_id rq_reply_cbid;
-
+ /** Server-side, export on which request was received */
+ struct obd_export *rq_export;
+ /** import where request is being sent */
+ struct obd_import *rq_import;
/** our LNet NID */
lnet_nid_t rq_self;
/** Peer description (the other side) */
lnet_process_id_t rq_peer;
- /** Server-side, export on which request was received */
- struct obd_export *rq_export;
- /** Client side, import where request is being sent */
- struct obd_import *rq_import;
-
- /** Replay callback, called after request is replayed at recovery */
- void (*rq_replay_cb)(struct ptlrpc_request *);
/**
- * Commit callback, called when request is committed and about to be
- * freed.
+ * service time estimate (secs)
+ * If the request is not served by this time, it is marked as timed out.
*/
- void (*rq_commit_cb)(struct ptlrpc_request *);
- /** Opaq data for replay and commit callbacks. */
- void *rq_cb_data;
-
- /** For bulk requests on client only: bulk descriptor */
- struct ptlrpc_bulk_desc *rq_bulk;
-
- /** client outgoing req */
+ int rq_timeout;
/**
* when request/reply sent (secs), or time when request should be sent
*/
time64_t rq_sent;
- /** time for request really sent out */
- time64_t rq_real_sent;
-
- /** when request must finish. volatile
- * so that servers' early reply updates to the deadline aren't
- * kept in per-cpu cache
- */
- volatile time64_t rq_deadline;
- /** when req reply unlink must finish. */
- time64_t rq_reply_deadline;
- /** when req bulk unlink must finish. */
- time64_t rq_bulk_deadline;
- /**
- * service time estimate (secs)
- * If the requestsis not served by this time, it is marked as timed out.
- */
- int rq_timeout;
-
- /** Multi-rpc bits */
- /** Per-request waitq introduced by bug 21938 for recovery waiting */
- wait_queue_head_t rq_set_waitq;
- /** Link item for request set lists */
- struct list_head rq_set_chain;
- /** Link back to the request set */
- struct ptlrpc_request_set *rq_set;
- /** Async completion handler, called when reply is received */
- ptlrpc_interpterer_t rq_interpret_reply;
- /** Async completion context */
- union ptlrpc_async_args rq_async_args;
-
- /** Pool if request is from preallocated list */
- struct ptlrpc_request_pool *rq_pool;
-
- struct lu_context rq_session;
- struct lu_context rq_recov_session;
-
+ /** when request must finish. */
+ time64_t rq_deadline;
/** request format description */
struct req_capsule rq_pill;
};
@@ -1627,8 +1696,10 @@ ptlrpc_phase2str(enum rq_phase phase)
return "Interpret";
case RQ_PHASE_COMPLETE:
return "Complete";
- case RQ_PHASE_UNREGISTERING:
- return "Unregistering";
+ case RQ_PHASE_UNREG_RPC:
+ return "UnregRPC";
+ case RQ_PHASE_UNREG_BULK:
+ return "UnregBULK";
default:
return "?Phase?";
}
@@ -1655,7 +1726,7 @@ ptlrpc_rqphase2str(struct ptlrpc_request *req)
#define DEBUG_REQ_FLAGS(req) \
ptlrpc_rqphase2str(req), \
FLAG(req->rq_intr, "I"), FLAG(req->rq_replied, "R"), \
- FLAG(req->rq_err, "E"), \
+ FLAG(req->rq_err, "E"), FLAG(req->rq_net_err, "e"), \
FLAG(req->rq_timedout, "X") /* eXpired */, FLAG(req->rq_resend, "S"), \
FLAG(req->rq_restart, "T"), FLAG(req->rq_replay, "P"), \
FLAG(req->rq_no_resend, "N"), \
@@ -1663,7 +1734,7 @@ ptlrpc_rqphase2str(struct ptlrpc_request *req)
FLAG(req->rq_wait_ctx, "C"), FLAG(req->rq_hp, "H"), \
FLAG(req->rq_committed, "M")
-#define REQ_FLAGS_FMT "%s:%s%s%s%s%s%s%s%s%s%s%s%s"
+#define REQ_FLAGS_FMT "%s:%s%s%s%s%s%s%s%s%s%s%s%s%s"
void _debug_req(struct ptlrpc_request *req,
struct libcfs_debug_msg_data *data, const char *fmt, ...)
@@ -2314,8 +2385,7 @@ static inline int ptlrpc_client_bulk_active(struct ptlrpc_request *req)
desc = req->rq_bulk;
- if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_BULK_UNLINK) &&
- req->rq_bulk_deadline > ktime_get_real_seconds())
+ if (req->rq_bulk_deadline > ktime_get_real_seconds())
return 1;
if (!desc)
@@ -2662,13 +2732,20 @@ ptlrpc_rqphase_move(struct ptlrpc_request *req, enum rq_phase new_phase)
if (req->rq_phase == new_phase)
return;
- if (new_phase == RQ_PHASE_UNREGISTERING) {
+ if (new_phase == RQ_PHASE_UNREG_RPC ||
+ new_phase == RQ_PHASE_UNREG_BULK) {
+ /* No embedded unregistering phases */
+ if (req->rq_phase == RQ_PHASE_UNREG_RPC ||
+ req->rq_phase == RQ_PHASE_UNREG_BULK)
+ return;
+
req->rq_next_phase = req->rq_phase;
if (req->rq_import)
atomic_inc(&req->rq_import->imp_unregistering);
}
- if (req->rq_phase == RQ_PHASE_UNREGISTERING) {
+ if (req->rq_phase == RQ_PHASE_UNREG_RPC ||
+ req->rq_phase == RQ_PHASE_UNREG_BULK) {
if (req->rq_import)
atomic_dec(&req->rq_import->imp_unregistering);
}
@@ -2685,9 +2762,6 @@ ptlrpc_rqphase_move(struct ptlrpc_request *req, enum rq_phase new_phase)
static inline int
ptlrpc_client_early(struct ptlrpc_request *req)
{
- if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_REPL_UNLINK) &&
- req->rq_reply_deadline > ktime_get_real_seconds())
- return 0;
return req->rq_early;
}
@@ -2697,8 +2771,7 @@ ptlrpc_client_early(struct ptlrpc_request *req)
static inline int
ptlrpc_client_replied(struct ptlrpc_request *req)
{
- if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_REPL_UNLINK) &&
- req->rq_reply_deadline > ktime_get_real_seconds())
+ if (req->rq_reply_deadline > ktime_get_real_seconds())
return 0;
return req->rq_replied;
}
@@ -2707,8 +2780,7 @@ ptlrpc_client_replied(struct ptlrpc_request *req)
static inline int
ptlrpc_client_recv(struct ptlrpc_request *req)
{
- if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_REPL_UNLINK) &&
- req->rq_reply_deadline > ktime_get_real_seconds())
+ if (req->rq_reply_deadline > ktime_get_real_seconds())
return 1;
return req->rq_receiving_reply;
}
@@ -2719,13 +2791,16 @@ ptlrpc_client_recv_or_unlink(struct ptlrpc_request *req)
int rc;
spin_lock(&req->rq_lock);
- if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_REPL_UNLINK) &&
- req->rq_reply_deadline > ktime_get_real_seconds()) {
+ if (req->rq_reply_deadline > ktime_get_real_seconds()) {
+ spin_unlock(&req->rq_lock);
+ return 1;
+ }
+ if (req->rq_req_deadline > ktime_get_real_seconds()) {
spin_unlock(&req->rq_lock);
return 1;
}
- rc = req->rq_receiving_reply;
- rc = rc || req->rq_req_unlink || req->rq_reply_unlink;
+ rc = !req->rq_req_unlinked || !req->rq_reply_unlinked ||
+ req->rq_receiving_reply;
spin_unlock(&req->rq_lock);
return rc;
}
diff --git a/drivers/staging/lustre/lustre/include/lustre_param.h b/drivers/staging/lustre/lustre/include/lustre_param.h
index 383fe6febe4b..82aadd32c2b8 100644
--- a/drivers/staging/lustre/lustre/include/lustre_param.h
+++ b/drivers/staging/lustre/lustre/include/lustre_param.h
@@ -15,11 +15,7 @@
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
*
* GPL HEADER END
*/
@@ -89,6 +85,7 @@ int class_parse_nid_quiet(char *buf, lnet_nid_t *nid, char **endh);
/* Prefixes for parameters handled by obd's proc methods (XXX_process_config) */
#define PARAM_OST "ost."
+#define PARAM_OSD "osd."
#define PARAM_OSC "osc."
#define PARAM_MDT "mdt."
#define PARAM_MDD "mdd."
diff --git a/drivers/staging/lustre/lustre/include/lustre_req_layout.h b/drivers/staging/lustre/lustre/include/lustre_req_layout.h
index b2e67fcf9ef1..544a43c862b9 100644
--- a/drivers/staging/lustre/lustre/include/lustre_req_layout.h
+++ b/drivers/staging/lustre/lustre/include/lustre_req_layout.h
@@ -15,11 +15,7 @@
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
*
* GPL HEADER END
*/
@@ -137,6 +133,7 @@ extern struct req_format RQF_MGS_CONFIG_READ;
/* fid/fld req_format */
extern struct req_format RQF_SEQ_QUERY;
extern struct req_format RQF_FLD_QUERY;
+extern struct req_format RQF_FLD_READ;
/* MDS req_format */
extern struct req_format RQF_MDS_CONNECT;
extern struct req_format RQF_MDS_DISCONNECT;
@@ -163,7 +160,7 @@ extern struct req_format RQF_MDS_IS_SUBDIR;
extern struct req_format RQF_MDS_DONE_WRITING;
extern struct req_format RQF_MDS_REINT;
extern struct req_format RQF_MDS_REINT_CREATE;
-extern struct req_format RQF_MDS_REINT_CREATE_RMT_ACL;
+extern struct req_format RQF_MDS_REINT_CREATE_ACL;
extern struct req_format RQF_MDS_REINT_CREATE_SLAVE;
extern struct req_format RQF_MDS_REINT_CREATE_SYM;
extern struct req_format RQF_MDS_REINT_OPEN;
@@ -199,7 +196,7 @@ extern struct req_format RQF_OST_BRW_READ;
extern struct req_format RQF_OST_BRW_WRITE;
extern struct req_format RQF_OST_STATFS;
extern struct req_format RQF_OST_SET_GRANT_INFO;
-extern struct req_format RQF_OST_GET_INFO_GENERIC;
+extern struct req_format RQF_OST_GET_INFO;
extern struct req_format RQF_OST_GET_INFO_LAST_ID;
extern struct req_format RQF_OST_GET_INFO_LAST_FID;
extern struct req_format RQF_OST_SET_INFO_LAST_FID;
diff --git a/drivers/staging/lustre/lustre/include/lustre_sec.h b/drivers/staging/lustre/lustre/include/lustre_sec.h
index 01b4e6726a68..90c183424802 100644
--- a/drivers/staging/lustre/lustre/include/lustre_sec.h
+++ b/drivers/staging/lustre/lustre/include/lustre_sec.h
@@ -15,11 +15,7 @@
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
*
* GPL HEADER END
*/
@@ -221,13 +217,13 @@ enum sptlrpc_bulk_service {
#define SPTLRPC_FLVR_DEFAULT SPTLRPC_FLVR_NULL
-#define SPTLRPC_FLVR_INVALID ((__u32) 0xFFFFFFFF)
-#define SPTLRPC_FLVR_ANY ((__u32) 0xFFF00000)
+#define SPTLRPC_FLVR_INVALID ((__u32)0xFFFFFFFF)
+#define SPTLRPC_FLVR_ANY ((__u32)0xFFF00000)
/**
* extract the useful part from wire flavor
*/
-#define WIRE_FLVR(wflvr) (((__u32) (wflvr)) & 0x000FFFFF)
+#define WIRE_FLVR(wflvr) (((__u32)(wflvr)) & 0x000FFFFF)
/** @} flavor */
diff --git a/drivers/staging/lustre/lustre/include/obd.h b/drivers/staging/lustre/lustre/include/obd.h
index 4264d97650ec..a1bc2c478ff9 100644
--- a/drivers/staging/lustre/lustre/include/obd.h
+++ b/drivers/staging/lustre/lustre/include/obd.h
@@ -15,11 +15,7 @@
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
*
* GPL HEADER END
*/
@@ -37,7 +33,7 @@
#ifndef __OBD_H
#define __OBD_H
-#include "linux/obd.h"
+#include <linux/spinlock.h>
#define IOC_OSC_TYPE 'h'
#define IOC_OSC_MIN_NR 20
@@ -54,6 +50,7 @@
#include "lustre_export.h"
#include "lustre_fid.h"
#include "lustre_fld.h"
+#include "lustre_intent.h"
#define MAX_OBD_DEVICES 8192
@@ -165,9 +162,6 @@ struct obd_info {
obd_enqueue_update_f oi_cb_up;
};
-void lov_stripe_lock(struct lov_stripe_md *md);
-void lov_stripe_unlock(struct lov_stripe_md *md);
-
struct obd_type {
struct list_head typ_chain;
struct obd_ops *typ_dt_ops;
@@ -234,6 +228,12 @@ enum {
#define MDC_MAX_RIF_DEFAULT 8
#define MDC_MAX_RIF_MAX 512
+enum obd_cl_sem_lock_class {
+ OBD_CLI_SEM_NORMAL,
+ OBD_CLI_SEM_MGC,
+ OBD_CLI_SEM_MDCOSC,
+};
+
struct mdc_rpc_lock;
struct obd_import;
struct client_obd {
@@ -293,14 +293,10 @@ struct client_obd {
* blocking everywhere, but we don't want to slow down fast-path of
* our main platform.)
*
- * Exact type of ->cl_loi_list_lock is defined in arch/obd.h together
- * with client_obd_list_{un,}lock() and
- * client_obd_list_lock_{init,done}() functions.
- *
* NB by Jinshan: though field names are still _loi_, but actually
* osc_object{}s are in the list.
*/
- struct client_obd_lock cl_loi_list_lock;
+ spinlock_t cl_loi_list_lock;
struct list_head cl_loi_ready_list;
struct list_head cl_loi_hp_ready_list;
struct list_head cl_loi_write_list;
@@ -327,7 +323,8 @@ struct client_obd {
atomic_t cl_lru_shrinkers;
atomic_t cl_lru_in_list;
struct list_head cl_lru_list; /* lru page list */
- struct client_obd_lock cl_lru_list_lock; /* page list protector */
+ spinlock_t cl_lru_list_lock; /* page list protector */
+ atomic_t cl_unstable_count;
/* number of in flight destroy rpcs is limited to max_rpcs_in_flight */
atomic_t cl_destroy_in_flight;
@@ -364,6 +361,7 @@ struct client_obd {
/* ptlrpc work for writeback in ptlrpcd context */
void *cl_writeback_work;
+ void *cl_lru_work;
/* hash tables for osc_quota_info */
struct cfs_hash *cl_quota_hash[MAXQUOTAS];
};
@@ -391,45 +389,9 @@ struct ost_pool {
struct rw_semaphore op_rw_sem; /* to protect ost_pool use */
};
-/* Round-robin allocator data */
-struct lov_qos_rr {
- __u32 lqr_start_idx; /* start index of new inode */
- __u32 lqr_offset_idx; /* aliasing for start_idx */
- int lqr_start_count; /* reseed counter */
- struct ost_pool lqr_pool; /* round-robin optimized list */
- unsigned long lqr_dirty:1; /* recalc round-robin list */
-};
-
/* allow statfs data caching for 1 second */
#define OBD_STATFS_CACHE_SECONDS 1
-struct lov_statfs_data {
- struct obd_info lsd_oi;
- struct obd_statfs lsd_statfs;
-};
-
-/* Stripe placement optimization */
-struct lov_qos {
- struct list_head lq_oss_list; /* list of OSSs that targets use */
- struct rw_semaphore lq_rw_sem;
- __u32 lq_active_oss_count;
- unsigned int lq_prio_free; /* priority for free space */
- unsigned int lq_threshold_rr;/* priority for rr */
- struct lov_qos_rr lq_rr; /* round robin qos data */
- unsigned long lq_dirty:1, /* recalc qos data */
- lq_same_space:1,/* the ost's all have approx.
- * the same space avail
- */
- lq_reset:1, /* zero current penalties */
- lq_statfs_in_progress:1; /* statfs op in
- progress */
- /* qos statfs data */
- struct lov_statfs_data *lq_statfs_data;
- wait_queue_head_t lq_statfs_waitq; /* waitqueue to notify statfs
- * requests completion
- */
-};
-
struct lov_tgt_desc {
struct list_head ltd_kill;
struct obd_uuid ltd_uuid;
@@ -442,25 +404,6 @@ struct lov_tgt_desc {
ltd_reap:1; /* should this target be deleted */
};
-/* Pool metadata */
-#define pool_tgt_size(_p) _p->pool_obds.op_size
-#define pool_tgt_count(_p) _p->pool_obds.op_count
-#define pool_tgt_array(_p) _p->pool_obds.op_array
-#define pool_tgt_rw_sem(_p) _p->pool_obds.op_rw_sem
-
-struct pool_desc {
- char pool_name[LOV_MAXPOOLNAME + 1]; /* name of pool */
- struct ost_pool pool_obds; /* pool members */
- atomic_t pool_refcount; /* pool ref. counter */
- struct lov_qos_rr pool_rr; /* round robin qos */
- struct hlist_node pool_hash; /* access by poolname */
- struct list_head pool_list; /* serial access */
- struct dentry *pool_debugfs_entry; /* file in debugfs */
- struct obd_device *pool_lobd; /* obd of the lov/lod to which
- * this pool belongs
- */
-};
-
struct lov_obd {
struct lov_desc desc;
struct lov_tgt_desc **lov_tgts; /* sparse array */
@@ -468,8 +411,6 @@ struct lov_obd {
struct mutex lov_lock;
struct obd_connect_data lov_ocd;
atomic_t lov_refcount;
- __u32 lov_tgt_count; /* how many OBD's */
- __u32 lov_active_tgt_count; /* how many active */
__u32 lov_death_row;/* tgts scheduled to be deleted */
__u32 lov_tgt_size; /* size of tgts array */
int lov_connects;
@@ -479,8 +420,8 @@ struct lov_obd {
struct dentry *lov_pool_debugfs_entry;
enum lustre_sec_part lov_sp_me;
- /* Cached LRU pages from upper layer */
- void *lov_cache;
+ /* Cached LRU and unstable data from upper layer */
+ struct cl_client_cache *lov_cache;
struct rw_semaphore lov_notify_lock;
@@ -511,7 +452,7 @@ struct lmv_obd {
struct obd_uuid cluuid;
struct obd_export *exp;
- struct mutex init_mutex;
+ struct mutex lmv_init_mutex;
int connected;
int max_easize;
int max_def_easize;
@@ -1180,9 +1121,6 @@ struct md_ops {
ldlm_policy_data_t *, enum ldlm_mode,
enum ldlm_cancel_flags flags, void *opaque);
- int (*get_remote_perm)(struct obd_export *, const struct lu_fid *,
- __u32, struct ptlrpc_request **);
-
int (*intent_getattr_async)(struct obd_export *,
struct md_enqueue_info *,
struct ldlm_enqueue_info *);
diff --git a/drivers/staging/lustre/lustre/include/obd_cksum.h b/drivers/staging/lustre/lustre/include/obd_cksum.h
index 637fa22110a4..a8a81e662a56 100644
--- a/drivers/staging/lustre/lustre/include/obd_cksum.h
+++ b/drivers/staging/lustre/lustre/include/obd_cksum.h
@@ -15,11 +15,7 @@
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
*
* GPL HEADER END
*/
@@ -35,6 +31,7 @@
#ifndef __OBD_CKSUM
#define __OBD_CKSUM
#include "../../include/linux/libcfs/libcfs.h"
+#include "../../include/linux/libcfs/libcfs_crypto.h"
#include "lustre/lustre_idl.h"
static inline unsigned char cksum_obd2cfs(enum cksum_type cksum_type)
diff --git a/drivers/staging/lustre/lustre/include/obd_class.h b/drivers/staging/lustre/lustre/include/obd_class.h
index 706869f8c98f..6482a937000b 100644
--- a/drivers/staging/lustre/lustre/include/obd_class.h
+++ b/drivers/staging/lustre/lustre/include/obd_class.h
@@ -15,11 +15,7 @@
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
*
* GPL HEADER END
*/
@@ -477,7 +473,7 @@ static inline int obd_setup(struct obd_device *obd, struct lustre_cfg *cfg)
struct lu_context session_ctx;
struct lu_env env;
- lu_context_init(&session_ctx, LCT_SESSION);
+ lu_context_init(&session_ctx, LCT_SESSION | LCT_SERVER_SESSION);
session_ctx.lc_thread = NULL;
lu_context_enter(&session_ctx);
@@ -490,8 +486,9 @@ static inline int obd_setup(struct obd_device *obd, struct lustre_cfg *cfg)
obd->obd_lu_dev = d;
d->ld_obd = obd;
rc = 0;
- } else
+ } else {
rc = PTR_ERR(d);
+ }
}
lu_context_exit(&session_ctx);
lu_context_fini(&session_ctx);
@@ -1653,16 +1650,6 @@ static inline int md_init_ea_size(struct obd_export *exp, int easize,
cookiesize, def_cookiesize);
}
-static inline int md_get_remote_perm(struct obd_export *exp,
- const struct lu_fid *fid, __u32 suppgid,
- struct ptlrpc_request **request)
-{
- EXP_CHECK_MD_OP(exp, get_remote_perm);
- EXP_MD_COUNTER_INCREMENT(exp, get_remote_perm);
- return MDP(exp->exp_obd, get_remote_perm)(exp, fid, suppgid,
- request);
-}
-
static inline int md_intent_getattr_async(struct obd_export *exp,
struct md_enqueue_info *minfo,
struct ldlm_enqueue_info *einfo)
diff --git a/drivers/staging/lustre/lustre/include/obd_support.h b/drivers/staging/lustre/lustre/include/obd_support.h
index f8ee3a3254ba..845e64a56c21 100644
--- a/drivers/staging/lustre/lustre/include/obd_support.h
+++ b/drivers/staging/lustre/lustre/include/obd_support.h
@@ -15,11 +15,7 @@
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
*
* GPL HEADER END
*/
@@ -58,6 +54,7 @@ extern int at_early_margin;
extern int at_extra;
extern unsigned int obd_sync_filter;
extern unsigned int obd_max_dirty_pages;
+extern atomic_t obd_unstable_pages;
extern atomic_t obd_dirty_pages;
extern atomic_t obd_dirty_transit_pages;
extern char obd_jobid_var[];
@@ -289,6 +286,7 @@ extern char obd_jobid_var[];
#define OBD_FAIL_OST_ENOINO 0x229
#define OBD_FAIL_OST_DQACQ_NET 0x230
#define OBD_FAIL_OST_STATFS_EINPROGRESS 0x231
+#define OBD_FAIL_OST_SET_INFO_NET 0x232
#define OBD_FAIL_LDLM 0x300
#define OBD_FAIL_LDLM_NAMESPACE_NEW 0x301
@@ -319,6 +317,7 @@ extern char obd_jobid_var[];
#define OBD_FAIL_LDLM_AGL_DELAY 0x31a
#define OBD_FAIL_LDLM_AGL_NOLOCK 0x31b
#define OBD_FAIL_LDLM_OST_LVB 0x31c
+#define OBD_FAIL_LDLM_ENQUEUE_HANG 0x31d
/* LOCKLESS IO */
#define OBD_FAIL_LDLM_SET_CONTENTION 0x385
@@ -365,6 +364,9 @@ extern char obd_jobid_var[];
#define OBD_FAIL_PTLRPC_CLIENT_BULK_CB2 0x515
#define OBD_FAIL_PTLRPC_DELAY_IMP_FULL 0x516
#define OBD_FAIL_PTLRPC_CANCEL_RESEND 0x517
+#define OBD_FAIL_PTLRPC_DROP_BULK 0x51a
+#define OBD_FAIL_PTLRPC_LONG_REQ_UNLINK 0x51b
+#define OBD_FAIL_PTLRPC_LONG_BOTH_UNLINK 0x51c
#define OBD_FAIL_OBD_PING_NET 0x600
#define OBD_FAIL_OBD_LOG_CANCEL_NET 0x601
@@ -426,6 +428,7 @@ extern char obd_jobid_var[];
#define OBD_FAIL_FLD 0x1100
#define OBD_FAIL_FLD_QUERY_NET 0x1101
+#define OBD_FAIL_FLD_READ_NET 0x1102
#define OBD_FAIL_SEC_CTX 0x1200
#define OBD_FAIL_SEC_CTX_INIT_NET 0x1201
diff --git a/drivers/staging/lustre/lustre/lclient/lcommon_cl.c b/drivers/staging/lustre/lustre/lclient/lcommon_cl.c
deleted file mode 100644
index 96141d17d07f..000000000000
--- a/drivers/staging/lustre/lustre/lclient/lcommon_cl.c
+++ /dev/null
@@ -1,1203 +0,0 @@
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * cl code shared between vvp and liblustre (and other Lustre clients in the
- * future).
- *
- * Author: Nikita Danilov <nikita.danilov@sun.com>
- */
-
-#define DEBUG_SUBSYSTEM S_LLITE
-
-#include "../../include/linux/libcfs/libcfs.h"
-# include <linux/fs.h>
-# include <linux/sched.h>
-# include <linux/mm.h>
-# include <linux/quotaops.h>
-# include <linux/highmem.h>
-# include <linux/pagemap.h>
-# include <linux/rbtree.h>
-
-#include "../include/obd.h"
-#include "../include/obd_support.h"
-#include "../include/lustre_fid.h"
-#include "../include/lustre_lite.h"
-#include "../include/lustre_dlm.h"
-#include "../include/lustre_ver.h"
-#include "../include/lustre_mdc.h"
-#include "../include/cl_object.h"
-
-#include "../include/lclient.h"
-
-#include "../llite/llite_internal.h"
-
-static const struct cl_req_operations ccc_req_ops;
-
-/*
- * ccc_ prefix stands for "Common Client Code".
- */
-
-static struct kmem_cache *ccc_lock_kmem;
-static struct kmem_cache *ccc_object_kmem;
-static struct kmem_cache *ccc_thread_kmem;
-static struct kmem_cache *ccc_session_kmem;
-static struct kmem_cache *ccc_req_kmem;
-
-static struct lu_kmem_descr ccc_caches[] = {
- {
- .ckd_cache = &ccc_lock_kmem,
- .ckd_name = "ccc_lock_kmem",
- .ckd_size = sizeof(struct ccc_lock)
- },
- {
- .ckd_cache = &ccc_object_kmem,
- .ckd_name = "ccc_object_kmem",
- .ckd_size = sizeof(struct ccc_object)
- },
- {
- .ckd_cache = &ccc_thread_kmem,
- .ckd_name = "ccc_thread_kmem",
- .ckd_size = sizeof(struct ccc_thread_info),
- },
- {
- .ckd_cache = &ccc_session_kmem,
- .ckd_name = "ccc_session_kmem",
- .ckd_size = sizeof(struct ccc_session)
- },
- {
- .ckd_cache = &ccc_req_kmem,
- .ckd_name = "ccc_req_kmem",
- .ckd_size = sizeof(struct ccc_req)
- },
- {
- .ckd_cache = NULL
- }
-};
-
-/*****************************************************************************
- *
- * Vvp device and device type functions.
- *
- */
-
-void *ccc_key_init(const struct lu_context *ctx, struct lu_context_key *key)
-{
- struct ccc_thread_info *info;
-
- info = kmem_cache_zalloc(ccc_thread_kmem, GFP_NOFS);
- if (!info)
- info = ERR_PTR(-ENOMEM);
- return info;
-}
-
-void ccc_key_fini(const struct lu_context *ctx,
- struct lu_context_key *key, void *data)
-{
- struct ccc_thread_info *info = data;
-
- kmem_cache_free(ccc_thread_kmem, info);
-}
-
-void *ccc_session_key_init(const struct lu_context *ctx,
- struct lu_context_key *key)
-{
- struct ccc_session *session;
-
- session = kmem_cache_zalloc(ccc_session_kmem, GFP_NOFS);
- if (!session)
- session = ERR_PTR(-ENOMEM);
- return session;
-}
-
-void ccc_session_key_fini(const struct lu_context *ctx,
- struct lu_context_key *key, void *data)
-{
- struct ccc_session *session = data;
-
- kmem_cache_free(ccc_session_kmem, session);
-}
-
-struct lu_context_key ccc_key = {
- .lct_tags = LCT_CL_THREAD,
- .lct_init = ccc_key_init,
- .lct_fini = ccc_key_fini
-};
-
-struct lu_context_key ccc_session_key = {
- .lct_tags = LCT_SESSION,
- .lct_init = ccc_session_key_init,
- .lct_fini = ccc_session_key_fini
-};
-
-/* type constructor/destructor: ccc_type_{init,fini,start,stop}(). */
-/* LU_TYPE_INIT_FINI(ccc, &ccc_key, &ccc_session_key); */
-
-int ccc_device_init(const struct lu_env *env, struct lu_device *d,
- const char *name, struct lu_device *next)
-{
- struct ccc_device *vdv;
- int rc;
-
- vdv = lu2ccc_dev(d);
- vdv->cdv_next = lu2cl_dev(next);
-
- LASSERT(d->ld_site && next->ld_type);
- next->ld_site = d->ld_site;
- rc = next->ld_type->ldt_ops->ldto_device_init(
- env, next, next->ld_type->ldt_name, NULL);
- if (rc == 0) {
- lu_device_get(next);
- lu_ref_add(&next->ld_reference, "lu-stack", &lu_site_init);
- }
- return rc;
-}
-
-struct lu_device *ccc_device_fini(const struct lu_env *env,
- struct lu_device *d)
-{
- return cl2lu_dev(lu2ccc_dev(d)->cdv_next);
-}
-
-struct lu_device *ccc_device_alloc(const struct lu_env *env,
- struct lu_device_type *t,
- struct lustre_cfg *cfg,
- const struct lu_device_operations *luops,
- const struct cl_device_operations *clops)
-{
- struct ccc_device *vdv;
- struct lu_device *lud;
- struct cl_site *site;
- int rc;
-
- vdv = kzalloc(sizeof(*vdv), GFP_NOFS);
- if (!vdv)
- return ERR_PTR(-ENOMEM);
-
- lud = &vdv->cdv_cl.cd_lu_dev;
- cl_device_init(&vdv->cdv_cl, t);
- ccc2lu_dev(vdv)->ld_ops = luops;
- vdv->cdv_cl.cd_ops = clops;
-
- site = kzalloc(sizeof(*site), GFP_NOFS);
- if (site) {
- rc = cl_site_init(site, &vdv->cdv_cl);
- if (rc == 0)
- rc = lu_site_init_finish(&site->cs_lu);
- else {
- LASSERT(!lud->ld_site);
- CERROR("Cannot init lu_site, rc %d.\n", rc);
- kfree(site);
- }
- } else
- rc = -ENOMEM;
- if (rc != 0) {
- ccc_device_free(env, lud);
- lud = ERR_PTR(rc);
- }
- return lud;
-}
-
-struct lu_device *ccc_device_free(const struct lu_env *env,
- struct lu_device *d)
-{
- struct ccc_device *vdv = lu2ccc_dev(d);
- struct cl_site *site = lu2cl_site(d->ld_site);
- struct lu_device *next = cl2lu_dev(vdv->cdv_next);
-
- if (d->ld_site) {
- cl_site_fini(site);
- kfree(site);
- }
- cl_device_fini(lu2cl_dev(d));
- kfree(vdv);
- return next;
-}
-
-int ccc_req_init(const struct lu_env *env, struct cl_device *dev,
- struct cl_req *req)
-{
- struct ccc_req *vrq;
- int result;
-
- vrq = kmem_cache_zalloc(ccc_req_kmem, GFP_NOFS);
- if (vrq) {
- cl_req_slice_add(req, &vrq->crq_cl, dev, &ccc_req_ops);
- result = 0;
- } else
- result = -ENOMEM;
- return result;
-}
-
-/**
- * An `emergency' environment used by ccc_inode_fini() when cl_env_get()
- * fails. Access to this environment is serialized by ccc_inode_fini_guard
- * mutex.
- */
-static struct lu_env *ccc_inode_fini_env;
-
-/**
- * A mutex serializing calls to slp_inode_fini() under extreme memory
- * pressure, when environments cannot be allocated.
- */
-static DEFINE_MUTEX(ccc_inode_fini_guard);
-static int dummy_refcheck;
-
-int ccc_global_init(struct lu_device_type *device_type)
-{
- int result;
-
- result = lu_kmem_init(ccc_caches);
- if (result)
- return result;
-
- result = lu_device_type_init(device_type);
- if (result)
- goto out_kmem;
-
- ccc_inode_fini_env = cl_env_alloc(&dummy_refcheck,
- LCT_REMEMBER|LCT_NOREF);
- if (IS_ERR(ccc_inode_fini_env)) {
- result = PTR_ERR(ccc_inode_fini_env);
- goto out_device;
- }
-
- ccc_inode_fini_env->le_ctx.lc_cookie = 0x4;
- return 0;
-out_device:
- lu_device_type_fini(device_type);
-out_kmem:
- lu_kmem_fini(ccc_caches);
- return result;
-}
-
-void ccc_global_fini(struct lu_device_type *device_type)
-{
- if (ccc_inode_fini_env) {
- cl_env_put(ccc_inode_fini_env, &dummy_refcheck);
- ccc_inode_fini_env = NULL;
- }
- lu_device_type_fini(device_type);
- lu_kmem_fini(ccc_caches);
-}
-
-/*****************************************************************************
- *
- * Object operations.
- *
- */
-
-struct lu_object *ccc_object_alloc(const struct lu_env *env,
- const struct lu_object_header *unused,
- struct lu_device *dev,
- const struct cl_object_operations *clops,
- const struct lu_object_operations *luops)
-{
- struct ccc_object *vob;
- struct lu_object *obj;
-
- vob = kmem_cache_zalloc(ccc_object_kmem, GFP_NOFS);
- if (vob) {
- struct cl_object_header *hdr;
-
- obj = ccc2lu(vob);
- hdr = &vob->cob_header;
- cl_object_header_init(hdr);
- lu_object_init(obj, &hdr->coh_lu, dev);
- lu_object_add_top(&hdr->coh_lu, obj);
-
- vob->cob_cl.co_ops = clops;
- obj->lo_ops = luops;
- } else
- obj = NULL;
- return obj;
-}
-
-int ccc_object_init0(const struct lu_env *env,
- struct ccc_object *vob,
- const struct cl_object_conf *conf)
-{
- vob->cob_inode = conf->coc_inode;
- vob->cob_transient_pages = 0;
- cl_object_page_init(&vob->cob_cl, sizeof(struct ccc_page));
- return 0;
-}
-
-int ccc_object_init(const struct lu_env *env, struct lu_object *obj,
- const struct lu_object_conf *conf)
-{
- struct ccc_device *dev = lu2ccc_dev(obj->lo_dev);
- struct ccc_object *vob = lu2ccc(obj);
- struct lu_object *below;
- struct lu_device *under;
- int result;
-
- under = &dev->cdv_next->cd_lu_dev;
- below = under->ld_ops->ldo_object_alloc(env, obj->lo_header, under);
- if (below) {
- const struct cl_object_conf *cconf;
-
- cconf = lu2cl_conf(conf);
- INIT_LIST_HEAD(&vob->cob_pending_list);
- lu_object_add(obj, below);
- result = ccc_object_init0(env, vob, cconf);
- } else
- result = -ENOMEM;
- return result;
-}
-
-void ccc_object_free(const struct lu_env *env, struct lu_object *obj)
-{
- struct ccc_object *vob = lu2ccc(obj);
-
- lu_object_fini(obj);
- lu_object_header_fini(obj->lo_header);
- kmem_cache_free(ccc_object_kmem, vob);
-}
-
-int ccc_lock_init(const struct lu_env *env,
- struct cl_object *obj, struct cl_lock *lock,
- const struct cl_io *unused,
- const struct cl_lock_operations *lkops)
-{
- struct ccc_lock *clk;
- int result;
-
- CLOBINVRNT(env, obj, ccc_object_invariant(obj));
-
- clk = kmem_cache_zalloc(ccc_lock_kmem, GFP_NOFS);
- if (clk) {
- cl_lock_slice_add(lock, &clk->clk_cl, obj, lkops);
- result = 0;
- } else
- result = -ENOMEM;
- return result;
-}
-
-int ccc_object_glimpse(const struct lu_env *env,
- const struct cl_object *obj, struct ost_lvb *lvb)
-{
- struct inode *inode = ccc_object_inode(obj);
-
- lvb->lvb_mtime = cl_inode_mtime(inode);
- lvb->lvb_atime = cl_inode_atime(inode);
- lvb->lvb_ctime = cl_inode_ctime(inode);
- /*
- * LU-417: Add dirty pages block count lest i_blocks reports 0, some
- * "cp" or "tar" on remote node may think it's a completely sparse file
- * and skip it.
- */
- if (lvb->lvb_size > 0 && lvb->lvb_blocks == 0)
- lvb->lvb_blocks = dirty_cnt(inode);
- return 0;
-}
-
-static void ccc_object_size_lock(struct cl_object *obj)
-{
- struct inode *inode = ccc_object_inode(obj);
-
- ll_inode_size_lock(inode);
- cl_object_attr_lock(obj);
-}
-
-static void ccc_object_size_unlock(struct cl_object *obj)
-{
- struct inode *inode = ccc_object_inode(obj);
-
- cl_object_attr_unlock(obj);
- ll_inode_size_unlock(inode);
-}
-
-/*****************************************************************************
- *
- * Page operations.
- *
- */
-
-struct page *ccc_page_vmpage(const struct lu_env *env,
- const struct cl_page_slice *slice)
-{
- return cl2vm_page(slice);
-}
-
-int ccc_page_is_under_lock(const struct lu_env *env,
- const struct cl_page_slice *slice,
- struct cl_io *io)
-{
- struct ccc_io *cio = ccc_env_io(env);
- struct cl_lock_descr *desc = &ccc_env_info(env)->cti_descr;
- struct cl_page *page = slice->cpl_page;
-
- int result;
-
- if (io->ci_type == CIT_READ || io->ci_type == CIT_WRITE ||
- io->ci_type == CIT_FAULT) {
- if (cio->cui_fd->fd_flags & LL_FILE_GROUP_LOCKED)
- result = -EBUSY;
- else {
- desc->cld_start = page->cp_index;
- desc->cld_end = page->cp_index;
- desc->cld_obj = page->cp_obj;
- desc->cld_mode = CLM_READ;
- result = cl_queue_match(&io->ci_lockset.cls_done,
- desc) ? -EBUSY : 0;
- }
- } else
- result = 0;
- return result;
-}
-
-int ccc_fail(const struct lu_env *env, const struct cl_page_slice *slice)
-{
- /*
- * Cached read?
- */
- LBUG();
- return 0;
-}
-
-int ccc_transient_page_prep(const struct lu_env *env,
- const struct cl_page_slice *slice,
- struct cl_io *unused)
-{
- /* transient page should always be sent. */
- return 0;
-}
-
-/*****************************************************************************
- *
- * Lock operations.
- *
- */
-
-void ccc_lock_delete(const struct lu_env *env,
- const struct cl_lock_slice *slice)
-{
- CLOBINVRNT(env, slice->cls_obj, ccc_object_invariant(slice->cls_obj));
-}
-
-void ccc_lock_fini(const struct lu_env *env, struct cl_lock_slice *slice)
-{
- struct ccc_lock *clk = cl2ccc_lock(slice);
-
- kmem_cache_free(ccc_lock_kmem, clk);
-}
-
-int ccc_lock_enqueue(const struct lu_env *env,
- const struct cl_lock_slice *slice,
- struct cl_io *unused, __u32 enqflags)
-{
- CLOBINVRNT(env, slice->cls_obj, ccc_object_invariant(slice->cls_obj));
- return 0;
-}
-
-int ccc_lock_use(const struct lu_env *env, const struct cl_lock_slice *slice)
-{
- CLOBINVRNT(env, slice->cls_obj, ccc_object_invariant(slice->cls_obj));
- return 0;
-}
-
-int ccc_lock_unuse(const struct lu_env *env, const struct cl_lock_slice *slice)
-{
- CLOBINVRNT(env, slice->cls_obj, ccc_object_invariant(slice->cls_obj));
- return 0;
-}
-
-int ccc_lock_wait(const struct lu_env *env, const struct cl_lock_slice *slice)
-{
- CLOBINVRNT(env, slice->cls_obj, ccc_object_invariant(slice->cls_obj));
- return 0;
-}
-
-/**
- * Implementation of cl_lock_operations::clo_fits_into() methods for ccc
- * layer. This function is executed every time io finds an existing lock in
- * the lock cache while creating new lock. This function has to decide whether
- * cached lock "fits" into io.
- *
- * \param slice lock to be checked
- * \param io IO that wants a lock.
- *
- * \see lov_lock_fits_into().
- */
-int ccc_lock_fits_into(const struct lu_env *env,
- const struct cl_lock_slice *slice,
- const struct cl_lock_descr *need,
- const struct cl_io *io)
-{
- const struct cl_lock *lock = slice->cls_lock;
- const struct cl_lock_descr *descr = &lock->cll_descr;
- const struct ccc_io *cio = ccc_env_io(env);
- int result;
-
- /*
- * Work around DLM peculiarity: it assumes that glimpse
- * (LDLM_FL_HAS_INTENT) lock is always LCK_PR, and returns reads lock
- * when asked for LCK_PW lock with LDLM_FL_HAS_INTENT flag set. Make
- * sure that glimpse doesn't get CLM_WRITE top-lock, so that it
- * doesn't enqueue CLM_WRITE sub-locks.
- */
- if (cio->cui_glimpse)
- result = descr->cld_mode != CLM_WRITE;
-
- /*
- * Also, don't match incomplete write locks for read, otherwise read
- * would enqueue missing sub-locks in the write mode.
- */
- else if (need->cld_mode != descr->cld_mode)
- result = lock->cll_state >= CLS_ENQUEUED;
- else
- result = 1;
- return result;
-}
-
-/**
- * Implements cl_lock_operations::clo_state() method for ccc layer, invoked
- * whenever lock state changes. Transfers object attributes, that might be
- * updated as a result of lock acquiring into inode.
- */
-void ccc_lock_state(const struct lu_env *env,
- const struct cl_lock_slice *slice,
- enum cl_lock_state state)
-{
- struct cl_lock *lock = slice->cls_lock;
-
- /*
- * Refresh inode attributes when the lock is moving into CLS_HELD
- * state, and only when this is a result of real enqueue, rather than
- * of finding lock in the cache.
- */
- if (state == CLS_HELD && lock->cll_state < CLS_HELD) {
- struct cl_object *obj;
- struct inode *inode;
-
- obj = slice->cls_obj;
- inode = ccc_object_inode(obj);
-
- /* vmtruncate() sets the i_size
- * under both a DLM lock and the
- * ll_inode_size_lock(). If we don't get the
- * ll_inode_size_lock() here we can match the DLM lock and
- * reset i_size. generic_file_write can then trust the
- * stale i_size when doing appending writes and effectively
- * cancel the result of the truncate. Getting the
- * ll_inode_size_lock() after the enqueue maintains the DLM
- * -> ll_inode_size_lock() acquiring order.
- */
- if (lock->cll_descr.cld_start == 0 &&
- lock->cll_descr.cld_end == CL_PAGE_EOF)
- cl_merge_lvb(env, inode);
- }
-}
-
-/*****************************************************************************
- *
- * io operations.
- *
- */
-
-int ccc_io_one_lock_index(const struct lu_env *env, struct cl_io *io,
- __u32 enqflags, enum cl_lock_mode mode,
- pgoff_t start, pgoff_t end)
-{
- struct ccc_io *cio = ccc_env_io(env);
- struct cl_lock_descr *descr = &cio->cui_link.cill_descr;
- struct cl_object *obj = io->ci_obj;
-
- CLOBINVRNT(env, obj, ccc_object_invariant(obj));
-
- CDEBUG(D_VFSTRACE, "lock: %d [%lu, %lu]\n", mode, start, end);
-
- memset(&cio->cui_link, 0, sizeof(cio->cui_link));
-
- if (cio->cui_fd && (cio->cui_fd->fd_flags & LL_FILE_GROUP_LOCKED)) {
- descr->cld_mode = CLM_GROUP;
- descr->cld_gid = cio->cui_fd->fd_grouplock.cg_gid;
- } else {
- descr->cld_mode = mode;
- }
- descr->cld_obj = obj;
- descr->cld_start = start;
- descr->cld_end = end;
- descr->cld_enq_flags = enqflags;
-
- cl_io_lock_add(env, io, &cio->cui_link);
- return 0;
-}
-
-void ccc_io_update_iov(const struct lu_env *env,
- struct ccc_io *cio, struct cl_io *io)
-{
- size_t size = io->u.ci_rw.crw_count;
-
- if (!cl_is_normalio(env, io) || !cio->cui_iter)
- return;
-
- iov_iter_truncate(cio->cui_iter, size);
-}
-
-int ccc_io_one_lock(const struct lu_env *env, struct cl_io *io,
- __u32 enqflags, enum cl_lock_mode mode,
- loff_t start, loff_t end)
-{
- struct cl_object *obj = io->ci_obj;
-
- return ccc_io_one_lock_index(env, io, enqflags, mode,
- cl_index(obj, start), cl_index(obj, end));
-}
-
-void ccc_io_end(const struct lu_env *env, const struct cl_io_slice *ios)
-{
- CLOBINVRNT(env, ios->cis_io->ci_obj,
- ccc_object_invariant(ios->cis_io->ci_obj));
-}
-
-void ccc_io_advance(const struct lu_env *env,
- const struct cl_io_slice *ios,
- size_t nob)
-{
- struct ccc_io *cio = cl2ccc_io(env, ios);
- struct cl_io *io = ios->cis_io;
- struct cl_object *obj = ios->cis_io->ci_obj;
-
- CLOBINVRNT(env, obj, ccc_object_invariant(obj));
-
- if (!cl_is_normalio(env, io))
- return;
-
- iov_iter_reexpand(cio->cui_iter, cio->cui_tot_count -= nob);
-}
-
-/**
- * Helper function that if necessary adjusts file size (inode->i_size), when
- * position at the offset \a pos is accessed. File size can be arbitrary stale
- * on a Lustre client, but client at least knows KMS. If accessed area is
- * inside [0, KMS], set file size to KMS, otherwise glimpse file size.
- *
- * Locking: cl_isize_lock is used to serialize changes to inode size and to
- * protect consistency between inode size and cl_object
- * attributes. cl_object_size_lock() protects consistency between cl_attr's of
- * top-object and sub-objects.
- */
-int ccc_prep_size(const struct lu_env *env, struct cl_object *obj,
- struct cl_io *io, loff_t start, size_t count, int *exceed)
-{
- struct cl_attr *attr = ccc_env_thread_attr(env);
- struct inode *inode = ccc_object_inode(obj);
- loff_t pos = start + count - 1;
- loff_t kms;
- int result;
-
- /*
- * Consistency guarantees: following possibilities exist for the
- * relation between region being accessed and real file size at this
- * moment:
- *
- * (A): the region is completely inside of the file;
- *
- * (B-x): x bytes of region are inside of the file, the rest is
- * outside;
- *
- * (C): the region is completely outside of the file.
- *
- * This classification is stable under DLM lock already acquired by
- * the caller, because to change the class, other client has to take
- * DLM lock conflicting with our lock. Also, any updates to ->i_size
- * by other threads on this client are serialized by
- * ll_inode_size_lock(). This guarantees that short reads are handled
- * correctly in the face of concurrent writes and truncates.
- */
- ccc_object_size_lock(obj);
- result = cl_object_attr_get(env, obj, attr);
- if (result == 0) {
- kms = attr->cat_kms;
- if (pos > kms) {
- /*
- * A glimpse is necessary to determine whether we
- * return a short read (B) or some zeroes at the end
- * of the buffer (C)
- */
- ccc_object_size_unlock(obj);
- result = cl_glimpse_lock(env, io, inode, obj, 0);
- if (result == 0 && exceed) {
- /* If objective page index exceed end-of-file
- * page index, return directly. Do not expect
- * kernel will check such case correctly.
- * linux-2.6.18-128.1.1 miss to do that.
- * --bug 17336
- */
- loff_t size = cl_isize_read(inode);
- loff_t cur_index = start >> PAGE_SHIFT;
- loff_t size_index = (size - 1) >>
- PAGE_SHIFT;
-
- if ((size == 0 && cur_index != 0) ||
- size_index < cur_index)
- *exceed = 1;
- }
- return result;
- }
- /*
- * region is within kms and, hence, within real file
- * size (A). We need to increase i_size to cover the
- * read region so that generic_file_read() will do its
- * job, but that doesn't mean the kms size is
- * _correct_, it is only the _minimum_ size. If
- * someone does a stat they will get the correct size
- * which will always be >= the kms value here.
- * b=11081
- */
- if (cl_isize_read(inode) < kms) {
- cl_isize_write_nolock(inode, kms);
- CDEBUG(D_VFSTRACE,
- DFID" updating i_size %llu\n",
- PFID(lu_object_fid(&obj->co_lu)),
- (__u64)cl_isize_read(inode));
-
- }
- }
- ccc_object_size_unlock(obj);
- return result;
-}
-
-/*****************************************************************************
- *
- * Transfer operations.
- *
- */
-
-void ccc_req_completion(const struct lu_env *env,
- const struct cl_req_slice *slice, int ioret)
-{
- struct ccc_req *vrq;
-
- if (ioret > 0)
- cl_stats_tally(slice->crs_dev, slice->crs_req->crq_type, ioret);
-
- vrq = cl2ccc_req(slice);
- kmem_cache_free(ccc_req_kmem, vrq);
-}
-
-/**
- * Implementation of struct cl_req_operations::cro_attr_set() for ccc
- * layer. ccc is responsible for
- *
- * - o_[mac]time
- *
- * - o_mode
- *
- * - o_parent_seq
- *
- * - o_[ug]id
- *
- * - o_parent_oid
- *
- * - o_parent_ver
- *
- * - o_ioepoch,
- *
- */
-void ccc_req_attr_set(const struct lu_env *env,
- const struct cl_req_slice *slice,
- const struct cl_object *obj,
- struct cl_req_attr *attr, u64 flags)
-{
- struct inode *inode;
- struct obdo *oa;
- u32 valid_flags;
-
- oa = attr->cra_oa;
- inode = ccc_object_inode(obj);
- valid_flags = OBD_MD_FLTYPE;
-
- if (slice->crs_req->crq_type == CRT_WRITE) {
- if (flags & OBD_MD_FLEPOCH) {
- oa->o_valid |= OBD_MD_FLEPOCH;
- oa->o_ioepoch = cl_i2info(inode)->lli_ioepoch;
- valid_flags |= OBD_MD_FLMTIME | OBD_MD_FLCTIME |
- OBD_MD_FLUID | OBD_MD_FLGID;
- }
- }
- obdo_from_inode(oa, inode, valid_flags & flags);
- obdo_set_parent_fid(oa, &cl_i2info(inode)->lli_fid);
- memcpy(attr->cra_jobid, cl_i2info(inode)->lli_jobid,
- JOBSTATS_JOBID_SIZE);
-}
-
-static const struct cl_req_operations ccc_req_ops = {
- .cro_attr_set = ccc_req_attr_set,
- .cro_completion = ccc_req_completion
-};
-
-int cl_setattr_ost(struct inode *inode, const struct iattr *attr)
-{
- struct lu_env *env;
- struct cl_io *io;
- int result;
- int refcheck;
-
- env = cl_env_get(&refcheck);
- if (IS_ERR(env))
- return PTR_ERR(env);
-
- io = ccc_env_thread_io(env);
- io->ci_obj = cl_i2info(inode)->lli_clob;
-
- io->u.ci_setattr.sa_attr.lvb_atime = LTIME_S(attr->ia_atime);
- io->u.ci_setattr.sa_attr.lvb_mtime = LTIME_S(attr->ia_mtime);
- io->u.ci_setattr.sa_attr.lvb_ctime = LTIME_S(attr->ia_ctime);
- io->u.ci_setattr.sa_attr.lvb_size = attr->ia_size;
- io->u.ci_setattr.sa_valid = attr->ia_valid;
-
-again:
- if (cl_io_init(env, io, CIT_SETATTR, io->ci_obj) == 0) {
- struct ccc_io *cio = ccc_env_io(env);
-
- if (attr->ia_valid & ATTR_FILE)
- /* populate the file descriptor for ftruncate to honor
- * group lock - see LU-787
- */
- cio->cui_fd = cl_iattr2fd(inode, attr);
-
- result = cl_io_loop(env, io);
- } else {
- result = io->ci_result;
- }
- cl_io_fini(env, io);
- if (unlikely(io->ci_need_restart))
- goto again;
- /* HSM import case: file is released, cannot be restored
- * no need to fail except if restore registration failed
- * with -ENODATA
- */
- if (result == -ENODATA && io->ci_restore_needed &&
- io->ci_result != -ENODATA)
- result = 0;
- cl_env_put(env, &refcheck);
- return result;
-}
-
-/*****************************************************************************
- *
- * Type conversions.
- *
- */
-
-struct lu_device *ccc2lu_dev(struct ccc_device *vdv)
-{
- return &vdv->cdv_cl.cd_lu_dev;
-}
-
-struct ccc_device *lu2ccc_dev(const struct lu_device *d)
-{
- return container_of0(d, struct ccc_device, cdv_cl.cd_lu_dev);
-}
-
-struct ccc_device *cl2ccc_dev(const struct cl_device *d)
-{
- return container_of0(d, struct ccc_device, cdv_cl);
-}
-
-struct lu_object *ccc2lu(struct ccc_object *vob)
-{
- return &vob->cob_cl.co_lu;
-}
-
-struct ccc_object *lu2ccc(const struct lu_object *obj)
-{
- return container_of0(obj, struct ccc_object, cob_cl.co_lu);
-}
-
-struct ccc_object *cl2ccc(const struct cl_object *obj)
-{
- return container_of0(obj, struct ccc_object, cob_cl);
-}
-
-struct ccc_lock *cl2ccc_lock(const struct cl_lock_slice *slice)
-{
- return container_of(slice, struct ccc_lock, clk_cl);
-}
-
-struct ccc_io *cl2ccc_io(const struct lu_env *env,
- const struct cl_io_slice *slice)
-{
- struct ccc_io *cio;
-
- cio = container_of(slice, struct ccc_io, cui_cl);
- LASSERT(cio == ccc_env_io(env));
- return cio;
-}
-
-struct ccc_req *cl2ccc_req(const struct cl_req_slice *slice)
-{
- return container_of0(slice, struct ccc_req, crq_cl);
-}
-
-struct page *cl2vm_page(const struct cl_page_slice *slice)
-{
- return cl2ccc_page(slice)->cpg_page;
-}
-
-/*****************************************************************************
- *
- * Accessors.
- *
- */
-int ccc_object_invariant(const struct cl_object *obj)
-{
- struct inode *inode = ccc_object_inode(obj);
- struct cl_inode_info *lli = cl_i2info(inode);
-
- return (S_ISREG(cl_inode_mode(inode)) ||
- /* i_mode of unlinked inode is zeroed. */
- cl_inode_mode(inode) == 0) && lli->lli_clob == obj;
-}
-
-struct inode *ccc_object_inode(const struct cl_object *obj)
-{
- return cl2ccc(obj)->cob_inode;
-}
-
-/**
- * Initialize or update CLIO structures for regular files when new
- * meta-data arrives from the server.
- *
- * \param inode regular file inode
- * \param md new file metadata from MDS
- * - allocates cl_object if necessary,
- * - updated layout, if object was already here.
- */
-int cl_file_inode_init(struct inode *inode, struct lustre_md *md)
-{
- struct lu_env *env;
- struct cl_inode_info *lli;
- struct cl_object *clob;
- struct lu_site *site;
- struct lu_fid *fid;
- struct cl_object_conf conf = {
- .coc_inode = inode,
- .u = {
- .coc_md = md
- }
- };
- int result = 0;
- int refcheck;
-
- LASSERT(md->body->valid & OBD_MD_FLID);
- LASSERT(S_ISREG(cl_inode_mode(inode)));
-
- env = cl_env_get(&refcheck);
- if (IS_ERR(env))
- return PTR_ERR(env);
-
- site = cl_i2sbi(inode)->ll_site;
- lli = cl_i2info(inode);
- fid = &lli->lli_fid;
- LASSERT(fid_is_sane(fid));
-
- if (!lli->lli_clob) {
- /* clob is slave of inode, empty lli_clob means for new inode,
- * there is no clob in cache with the given fid, so it is
- * unnecessary to perform lookup-alloc-lookup-insert, just
- * alloc and insert directly.
- */
- LASSERT(inode->i_state & I_NEW);
- conf.coc_lu.loc_flags = LOC_F_NEW;
- clob = cl_object_find(env, lu2cl_dev(site->ls_top_dev),
- fid, &conf);
- if (!IS_ERR(clob)) {
- /*
- * No locking is necessary, as new inode is
- * locked by I_NEW bit.
- */
- lli->lli_clob = clob;
- lli->lli_has_smd = lsm_has_objects(md->lsm);
- lu_object_ref_add(&clob->co_lu, "inode", inode);
- } else
- result = PTR_ERR(clob);
- } else {
- result = cl_conf_set(env, lli->lli_clob, &conf);
- }
-
- cl_env_put(env, &refcheck);
-
- if (result != 0)
- CERROR("Failure to initialize cl object "DFID": %d\n",
- PFID(fid), result);
- return result;
-}
-
-/**
- * Wait for others drop their references of the object at first, then we drop
- * the last one, which will lead to the object be destroyed immediately.
- * Must be called after cl_object_kill() against this object.
- *
- * The reason we want to do this is: destroying top object will wait for sub
- * objects being destroyed first, so we can't let bottom layer (e.g. from ASTs)
- * to initiate top object destroying which may deadlock. See bz22520.
- */
-static void cl_object_put_last(struct lu_env *env, struct cl_object *obj)
-{
- struct lu_object_header *header = obj->co_lu.lo_header;
- wait_queue_t waiter;
-
- if (unlikely(atomic_read(&header->loh_ref) != 1)) {
- struct lu_site *site = obj->co_lu.lo_dev->ld_site;
- struct lu_site_bkt_data *bkt;
-
- bkt = lu_site_bkt_from_fid(site, &header->loh_fid);
-
- init_waitqueue_entry(&waiter, current);
- add_wait_queue(&bkt->lsb_marche_funebre, &waiter);
-
- while (1) {
- set_current_state(TASK_UNINTERRUPTIBLE);
- if (atomic_read(&header->loh_ref) == 1)
- break;
- schedule();
- }
-
- set_current_state(TASK_RUNNING);
- remove_wait_queue(&bkt->lsb_marche_funebre, &waiter);
- }
-
- cl_object_put(env, obj);
-}
-
-void cl_inode_fini(struct inode *inode)
-{
- struct lu_env *env;
- struct cl_inode_info *lli = cl_i2info(inode);
- struct cl_object *clob = lli->lli_clob;
- int refcheck;
- int emergency;
-
- if (clob) {
- void *cookie;
-
- cookie = cl_env_reenter();
- env = cl_env_get(&refcheck);
- emergency = IS_ERR(env);
- if (emergency) {
- mutex_lock(&ccc_inode_fini_guard);
- LASSERT(ccc_inode_fini_env);
- cl_env_implant(ccc_inode_fini_env, &refcheck);
- env = ccc_inode_fini_env;
- }
- /*
- * cl_object cache is a slave to inode cache (which, in turn
- * is a slave to dentry cache), don't keep cl_object in memory
- * when its master is evicted.
- */
- cl_object_kill(env, clob);
- lu_object_ref_del(&clob->co_lu, "inode", inode);
- cl_object_put_last(env, clob);
- lli->lli_clob = NULL;
- if (emergency) {
- cl_env_unplant(ccc_inode_fini_env, &refcheck);
- mutex_unlock(&ccc_inode_fini_guard);
- } else
- cl_env_put(env, &refcheck);
- cl_env_reexit(cookie);
- }
-}
-
-/**
- * return IF_* type for given lu_dirent entry.
- * IF_* flag shld be converted to particular OS file type in
- * platform llite module.
- */
-__u16 ll_dirent_type_get(struct lu_dirent *ent)
-{
- __u16 type = 0;
- struct luda_type *lt;
- int len = 0;
-
- if (le32_to_cpu(ent->lde_attrs) & LUDA_TYPE) {
- const unsigned align = sizeof(struct luda_type) - 1;
-
- len = le16_to_cpu(ent->lde_namelen);
- len = (len + align) & ~align;
- lt = (void *)ent->lde_name + len;
- type = IFTODT(le16_to_cpu(lt->lt_type));
- }
- return type;
-}
-
-/**
- * build inode number from passed @fid
- */
-__u64 cl_fid_build_ino(const struct lu_fid *fid, int api32)
-{
- if (BITS_PER_LONG == 32 || api32)
- return fid_flatten32(fid);
- else
- return fid_flatten(fid);
-}
-
-/**
- * build inode generation from passed @fid. If our FID overflows the 32-bit
- * inode number then return a non-zero generation to distinguish them.
- */
-__u32 cl_fid_build_gen(const struct lu_fid *fid)
-{
- __u32 gen;
-
- if (fid_is_igif(fid)) {
- gen = lu_igif_gen(fid);
- return gen;
- }
-
- gen = fid_flatten(fid) >> 32;
- return gen;
-}
-
-/* lsm is unreliable after hsm implementation as layout can be changed at
- * any time. This is only to support old, non-clio-ized interfaces. It will
- * cause deadlock if clio operations are called with this extra layout refcount
- * because in case the layout changed during the IO, ll_layout_refresh() will
- * have to wait for the refcount to become zero to destroy the older layout.
- *
- * Notice that the lsm returned by this function may not be valid unless called
- * inside layout lock - MDS_INODELOCK_LAYOUT.
- */
-struct lov_stripe_md *ccc_inode_lsm_get(struct inode *inode)
-{
- return lov_lsm_get(cl_i2info(inode)->lli_clob);
-}
-
-inline void ccc_inode_lsm_put(struct inode *inode, struct lov_stripe_md *lsm)
-{
- lov_lsm_put(cl_i2info(inode)->lli_clob, lsm);
-}
diff --git a/drivers/staging/lustre/lustre/ldlm/interval_tree.c b/drivers/staging/lustre/lustre/ldlm/interval_tree.c
index 323060626fdf..f4a70ebddeaf 100644
--- a/drivers/staging/lustre/lustre/ldlm/interval_tree.c
+++ b/drivers/staging/lustre/lustre/ldlm/interval_tree.c
@@ -15,11 +15,7 @@
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
*
* GPL HEADER END
*/
diff --git a/drivers/staging/lustre/lustre/ldlm/l_lock.c b/drivers/staging/lustre/lustre/ldlm/l_lock.c
index e5d1344e817a..ea8840cb9056 100644
--- a/drivers/staging/lustre/lustre/ldlm/l_lock.c
+++ b/drivers/staging/lustre/lustre/ldlm/l_lock.c
@@ -15,11 +15,7 @@
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
*
* GPL HEADER END
*/
@@ -54,7 +50,7 @@ struct ldlm_resource *lock_res_and_lock(struct ldlm_lock *lock)
lock_res(lock->l_resource);
- lock->l_flags |= LDLM_FL_RES_LOCKED;
+ ldlm_set_res_locked(lock);
return lock->l_resource;
}
EXPORT_SYMBOL(lock_res_and_lock);
@@ -65,7 +61,7 @@ EXPORT_SYMBOL(lock_res_and_lock);
void unlock_res_and_lock(struct ldlm_lock *lock)
{
/* on server-side resource of lock doesn't change */
- lock->l_flags &= ~LDLM_FL_RES_LOCKED;
+ ldlm_clear_res_locked(lock);
unlock_res(lock->l_resource);
spin_unlock(&lock->l_lock);
diff --git a/drivers/staging/lustre/lustre/ldlm/ldlm_extent.c b/drivers/staging/lustre/lustre/ldlm/ldlm_extent.c
index a803e200f206..f5023d9b78f5 100644
--- a/drivers/staging/lustre/lustre/ldlm/ldlm_extent.c
+++ b/drivers/staging/lustre/lustre/ldlm/ldlm_extent.c
@@ -15,11 +15,7 @@
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
*
* GPL HEADER END
*/
@@ -75,12 +71,12 @@ __u64 ldlm_extent_shift_kms(struct ldlm_lock *lock, __u64 old_kms)
* just after we finish and take our lock into account in its
* calculation of the kms
*/
- lock->l_flags |= LDLM_FL_KMS_IGNORE;
+ ldlm_set_kms_ignore(lock);
list_for_each(tmp, &res->lr_granted) {
lck = list_entry(tmp, struct ldlm_lock, l_res_link);
- if (lck->l_flags & LDLM_FL_KMS_IGNORE)
+ if (ldlm_is_kms_ignore(lck))
continue;
if (lck->l_policy_data.l_extent.end >= old_kms)
diff --git a/drivers/staging/lustre/lustre/ldlm/ldlm_flock.c b/drivers/staging/lustre/lustre/ldlm/ldlm_flock.c
index b88b78606aee..d6b61bc39135 100644
--- a/drivers/staging/lustre/lustre/ldlm/ldlm_flock.c
+++ b/drivers/staging/lustre/lustre/ldlm/ldlm_flock.c
@@ -15,11 +15,7 @@
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
*
* GPL HEADER END
*/
@@ -101,8 +97,7 @@ ldlm_flock_destroy(struct ldlm_lock *lock, enum ldlm_mode mode, __u64 flags)
LASSERT(hlist_unhashed(&lock->l_exp_flock_hash));
list_del_init(&lock->l_res_link);
- if (flags == LDLM_FL_WAIT_NOREPROC &&
- !(lock->l_flags & LDLM_FL_FAILED)) {
+ if (flags == LDLM_FL_WAIT_NOREPROC && !ldlm_is_failed(lock)) {
/* client side - set a flag to prevent sending a CANCEL */
lock->l_flags |= LDLM_FL_LOCAL_ONLY | LDLM_FL_CBPENDING;
@@ -260,14 +255,13 @@ reprocess:
* overflow and underflow.
*/
if ((new->l_policy_data.l_flock.start >
- (lock->l_policy_data.l_flock.end + 1))
- && (lock->l_policy_data.l_flock.end !=
- OBD_OBJECT_EOF))
+ (lock->l_policy_data.l_flock.end + 1)) &&
+ (lock->l_policy_data.l_flock.end != OBD_OBJECT_EOF))
continue;
if ((new->l_policy_data.l_flock.end <
- (lock->l_policy_data.l_flock.start - 1))
- && (lock->l_policy_data.l_flock.start != 0))
+ (lock->l_policy_data.l_flock.start - 1)) &&
+ (lock->l_policy_data.l_flock.start != 0))
break;
if (new->l_policy_data.l_flock.start <
@@ -436,7 +430,7 @@ ldlm_flock_interrupted_wait(void *data)
lock_res_and_lock(lock);
/* client side - set flag to prevent lock from being put on LRU list */
- lock->l_flags |= LDLM_FL_CBPENDING;
+ ldlm_set_cbpending(lock);
unlock_res_and_lock(lock);
}
@@ -520,30 +514,29 @@ ldlm_flock_completion_ast(struct ldlm_lock *lock, __u64 flags, void *data)
granted:
OBD_FAIL_TIMEOUT(OBD_FAIL_LDLM_CP_CB_WAIT, 10);
- if (lock->l_flags & LDLM_FL_DESTROYED) {
- LDLM_DEBUG(lock, "client-side enqueue waking up: destroyed");
- return 0;
- }
-
- if (lock->l_flags & LDLM_FL_FAILED) {
+ if (ldlm_is_failed(lock)) {
LDLM_DEBUG(lock, "client-side enqueue waking up: failed");
return -EIO;
}
- if (rc) {
- LDLM_DEBUG(lock, "client-side enqueue waking up: failed (%d)",
- rc);
- return rc;
- }
-
LDLM_DEBUG(lock, "client-side enqueue granted");
lock_res_and_lock(lock);
+ /*
+ * Protect against race where lock could have been just destroyed
+ * due to overlap in ldlm_process_flock_lock().
+ */
+ if (ldlm_is_destroyed(lock)) {
+ unlock_res_and_lock(lock);
+ LDLM_DEBUG(lock, "client-side enqueue waking up: destroyed");
+ return 0;
+ }
+
/* ldlm_lock_enqueue() has already placed lock on the granted list. */
list_del_init(&lock->l_res_link);
- if (lock->l_flags & LDLM_FL_FLOCK_DEADLOCK) {
+ if (ldlm_is_flock_deadlock(lock)) {
LDLM_DEBUG(lock, "client-side enqueue deadlock received");
rc = -EDEADLK;
} else if (flags & LDLM_FL_TEST_LOCK) {
diff --git a/drivers/staging/lustre/lustre/ldlm/ldlm_inodebits.c b/drivers/staging/lustre/lustre/ldlm/ldlm_inodebits.c
index b1bed1e17d32..79f4e6fa193e 100644
--- a/drivers/staging/lustre/lustre/ldlm/ldlm_inodebits.c
+++ b/drivers/staging/lustre/lustre/ldlm/ldlm_inodebits.c
@@ -15,11 +15,7 @@
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
*
* GPL HEADER END
*/
diff --git a/drivers/staging/lustre/lustre/ldlm/ldlm_internal.h b/drivers/staging/lustre/lustre/ldlm/ldlm_internal.h
index e21373e7306f..e4cf65d2d3b1 100644
--- a/drivers/staging/lustre/lustre/ldlm/ldlm_internal.h
+++ b/drivers/staging/lustre/lustre/ldlm/ldlm_internal.h
@@ -15,11 +15,7 @@
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
*
* GPL HEADER END
*/
@@ -95,9 +91,10 @@ enum {
LDLM_CANCEL_PASSED = 1 << 1, /* Cancel passed number of locks. */
LDLM_CANCEL_SHRINK = 1 << 2, /* Cancel locks from shrinker. */
LDLM_CANCEL_LRUR = 1 << 3, /* Cancel locks from lru resize. */
- LDLM_CANCEL_NO_WAIT = 1 << 4 /* Cancel locks w/o blocking (neither
- * sending nor waiting for any rpcs)
- */
+ LDLM_CANCEL_NO_WAIT = 1 << 4, /* Cancel locks w/o blocking (neither
+ * sending nor waiting for any rpcs)
+ */
+ LDLM_CANCEL_LRUR_NO_WAIT = 1 << 5, /* LRUR + NO_WAIT */
};
int ldlm_cancel_lru(struct ldlm_namespace *ns, int nr,
@@ -145,7 +142,8 @@ void ldlm_lock_decref_internal(struct ldlm_lock *, __u32 mode);
void ldlm_lock_decref_internal_nolock(struct ldlm_lock *, __u32 mode);
int ldlm_run_ast_work(struct ldlm_namespace *ns, struct list_head *rpc_list,
enum ldlm_desc_ast_t ast_type);
-int ldlm_lock_remove_from_lru(struct ldlm_lock *lock);
+int ldlm_lock_remove_from_lru_check(struct ldlm_lock *lock, time_t last_use);
+#define ldlm_lock_remove_from_lru(lock) ldlm_lock_remove_from_lru_check(lock, 0)
int ldlm_lock_remove_from_lru_nolock(struct ldlm_lock *lock);
void ldlm_lock_destroy_nolock(struct ldlm_lock *lock);
@@ -216,8 +214,6 @@ enum ldlm_policy_res {
LDLM_POLICY_SKIP_LOCK
};
-typedef enum ldlm_policy_res ldlm_policy_res_t;
-
#define LDLM_POOL_SYSFS_PRINT_int(v) sprintf(buf, "%d\n", v)
#define LDLM_POOL_SYSFS_SET_int(a, b) { a = b; }
#define LDLM_POOL_SYSFS_PRINT_u64(v) sprintf(buf, "%lld\n", v)
@@ -305,9 +301,10 @@ static inline int is_granted_or_cancelled(struct ldlm_lock *lock)
int ret = 0;
lock_res_and_lock(lock);
- if (((lock->l_req_mode == lock->l_granted_mode) &&
- !(lock->l_flags & LDLM_FL_CP_REQD)) ||
- (lock->l_flags & (LDLM_FL_FAILED | LDLM_FL_CANCEL)))
+ if ((lock->l_req_mode == lock->l_granted_mode) &&
+ !ldlm_is_cp_reqd(lock))
+ ret = 1;
+ else if (ldlm_is_failed(lock) || ldlm_is_cancel(lock))
ret = 1;
unlock_res_and_lock(lock);
diff --git a/drivers/staging/lustre/lustre/ldlm/ldlm_lib.c b/drivers/staging/lustre/lustre/ldlm/ldlm_lib.c
index 7dd7df59aa1f..7c832aae7d5e 100644
--- a/drivers/staging/lustre/lustre/ldlm/ldlm_lib.c
+++ b/drivers/staging/lustre/lustre/ldlm/ldlm_lib.c
@@ -15,11 +15,7 @@
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
*
* GPL HEADER END
*/
@@ -314,7 +310,7 @@ int client_obd_setup(struct obd_device *obddev, struct lustre_cfg *lcfg)
INIT_LIST_HEAD(&cli->cl_loi_hp_ready_list);
INIT_LIST_HEAD(&cli->cl_loi_write_list);
INIT_LIST_HEAD(&cli->cl_loi_read_list);
- client_obd_list_lock_init(&cli->cl_loi_list_lock);
+ spin_lock_init(&cli->cl_loi_list_lock);
atomic_set(&cli->cl_pending_w_pages, 0);
atomic_set(&cli->cl_pending_r_pages, 0);
cli->cl_r_in_flight = 0;
@@ -333,7 +329,8 @@ int client_obd_setup(struct obd_device *obddev, struct lustre_cfg *lcfg)
atomic_set(&cli->cl_lru_busy, 0);
atomic_set(&cli->cl_lru_in_list, 0);
INIT_LIST_HEAD(&cli->cl_lru_list);
- client_obd_list_lock_init(&cli->cl_lru_list_lock);
+ spin_lock_init(&cli->cl_lru_list_lock);
+ atomic_set(&cli->cl_unstable_count, 0);
init_waitqueue_head(&cli->cl_destroy_waitq);
atomic_set(&cli->cl_destroy_in_flight, 0);
@@ -344,7 +341,8 @@ int client_obd_setup(struct obd_device *obddev, struct lustre_cfg *lcfg)
* Set cl_chksum* to CRC32 for now to avoid returning screwed info
* through procfs.
*/
- cli->cl_cksum_type = cli->cl_supp_cksum_types = OBD_CKSUM_CRC32;
+ cli->cl_cksum_type = OBD_CKSUM_CRC32;
+ cli->cl_supp_cksum_types = OBD_CKSUM_CRC32;
atomic_set(&cli->cl_resends, OSC_DEFAULT_RESENDS);
/* This value may be reduced at connect time in
@@ -355,6 +353,12 @@ int client_obd_setup(struct obd_device *obddev, struct lustre_cfg *lcfg)
cli->cl_max_pages_per_rpc = min_t(int, PTLRPC_MAX_BRW_PAGES,
LNET_MTU >> PAGE_SHIFT);
+ /*
+ * set cl_chunkbits default value to PAGE_CACHE_SHIFT,
+ * it will be updated at OSC connection time.
+ */
+ cli->cl_chunkbits = PAGE_SHIFT;
+
if (!strcmp(name, LUSTRE_MDC_NAME)) {
cli->cl_max_rpcs_in_flight = MDC_MAX_RIF_DEFAULT;
} else if (totalram_pages >> (20 - PAGE_SHIFT) <= 128 /* MB */) {
@@ -429,7 +433,6 @@ err_ldlm:
ldlm_put_ref();
err:
return rc;
-
}
EXPORT_SYMBOL(client_obd_setup);
@@ -438,6 +441,7 @@ int client_obd_cleanup(struct obd_device *obddev)
ldlm_namespace_free_post(obddev->obd_namespace);
obddev->obd_namespace = NULL;
+ obd_cleanup_client_import(obddev);
LASSERT(!obddev->u.cli.cl_import);
ldlm_put_ref();
@@ -748,6 +752,7 @@ int ldlm_error2errno(enum ldlm_error error)
switch (error) {
case ELDLM_OK:
+ case ELDLM_LOCK_MATCHED:
result = 0;
break;
case ELDLM_LOCK_CHANGED:
diff --git a/drivers/staging/lustre/lustre/ldlm/ldlm_lock.c b/drivers/staging/lustre/lustre/ldlm/ldlm_lock.c
index ecd65a7a3dc9..a5993f745ebe 100644
--- a/drivers/staging/lustre/lustre/ldlm/ldlm_lock.c
+++ b/drivers/staging/lustre/lustre/ldlm/ldlm_lock.c
@@ -15,11 +15,7 @@
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
*
* GPL HEADER END
*/
@@ -185,7 +181,7 @@ void ldlm_lock_put(struct ldlm_lock *lock)
"final lock_put on destroyed lock, freeing it.");
res = lock->l_resource;
- LASSERT(lock->l_flags & LDLM_FL_DESTROYED);
+ LASSERT(ldlm_is_destroyed(lock));
LASSERT(list_empty(&lock->l_res_link));
LASSERT(list_empty(&lock->l_pending_chain));
@@ -229,15 +225,25 @@ int ldlm_lock_remove_from_lru_nolock(struct ldlm_lock *lock)
/**
* Removes LDLM lock \a lock from LRU. Obtains the LRU lock first.
+ *
+ * If \a last_use is non-zero, it will remove the lock from LRU only if
+ * it matches lock's l_last_used.
+ *
+ * \retval 0 if \a last_use is set, the lock is not in LRU list or \a last_use
+ * doesn't match lock's l_last_used;
+ * otherwise, the lock hasn't been in the LRU list.
+ * \retval 1 the lock was in LRU list and removed.
*/
-int ldlm_lock_remove_from_lru(struct ldlm_lock *lock)
+int ldlm_lock_remove_from_lru_check(struct ldlm_lock *lock, time_t last_use)
{
struct ldlm_namespace *ns = ldlm_lock_to_ns(lock);
- int rc;
+ int rc = 0;
spin_lock(&ns->ns_lock);
- rc = ldlm_lock_remove_from_lru_nolock(lock);
+ if (last_use == 0 || last_use == lock->l_last_used)
+ rc = ldlm_lock_remove_from_lru_nolock(lock);
spin_unlock(&ns->ns_lock);
+
return rc;
}
@@ -252,8 +258,7 @@ static void ldlm_lock_add_to_lru_nolock(struct ldlm_lock *lock)
LASSERT(list_empty(&lock->l_lru));
LASSERT(lock->l_resource->lr_type != LDLM_FLOCK);
list_add_tail(&lock->l_lru, &ns->ns_unused_list);
- if (lock->l_flags & LDLM_FL_SKIPPED)
- lock->l_flags &= ~LDLM_FL_SKIPPED;
+ ldlm_clear_skipped(lock);
LASSERT(ns->ns_nr_unused >= 0);
ns->ns_nr_unused++;
}
@@ -318,11 +323,11 @@ static int ldlm_lock_destroy_internal(struct ldlm_lock *lock)
LBUG();
}
- if (lock->l_flags & LDLM_FL_DESTROYED) {
+ if (ldlm_is_destroyed(lock)) {
LASSERT(list_empty(&lock->l_lru));
return 0;
}
- lock->l_flags |= LDLM_FL_DESTROYED;
+ ldlm_set_destroyed(lock);
if (lock->l_export && lock->l_export->exp_lock_hash) {
/* NB: it's safe to call cfs_hash_del() even lock isn't
@@ -544,7 +549,7 @@ struct ldlm_lock *__ldlm_handle2lock(const struct lustre_handle *handle,
/* It's unlikely but possible that someone marked the lock as
* destroyed after we did handle2object on it
*/
- if (flags == 0 && ((lock->l_flags & LDLM_FL_DESTROYED) == 0)) {
+ if (flags == 0 && !ldlm_is_destroyed(lock)) {
lu_ref_add(&lock->l_reference, "handle", current);
return lock;
}
@@ -554,21 +559,22 @@ struct ldlm_lock *__ldlm_handle2lock(const struct lustre_handle *handle,
LASSERT(lock->l_resource);
lu_ref_add_atomic(&lock->l_reference, "handle", current);
- if (unlikely(lock->l_flags & LDLM_FL_DESTROYED)) {
+ if (unlikely(ldlm_is_destroyed(lock))) {
unlock_res_and_lock(lock);
CDEBUG(D_INFO, "lock already destroyed: lock %p\n", lock);
LDLM_LOCK_PUT(lock);
return NULL;
}
- if (flags && (lock->l_flags & flags)) {
- unlock_res_and_lock(lock);
- LDLM_LOCK_PUT(lock);
- return NULL;
- }
+ if (flags) {
+ if (lock->l_flags & flags) {
+ unlock_res_and_lock(lock);
+ LDLM_LOCK_PUT(lock);
+ return NULL;
+ }
- if (flags)
lock->l_flags |= flags;
+ }
unlock_res_and_lock(lock);
return lock;
@@ -599,14 +605,14 @@ EXPORT_SYMBOL(ldlm_lock2desc);
static void ldlm_add_bl_work_item(struct ldlm_lock *lock, struct ldlm_lock *new,
struct list_head *work_list)
{
- if ((lock->l_flags & LDLM_FL_AST_SENT) == 0) {
+ if (!ldlm_is_ast_sent(lock)) {
LDLM_DEBUG(lock, "lock incompatible; sending blocking AST.");
- lock->l_flags |= LDLM_FL_AST_SENT;
+ ldlm_set_ast_sent(lock);
/* If the enqueuing client said so, tell the AST recipient to
* discard dirty data, rather than writing back.
*/
- if (new->l_flags & LDLM_FL_AST_DISCARD_DATA)
- lock->l_flags |= LDLM_FL_DISCARD_DATA;
+ if (ldlm_is_ast_discard_data(new))
+ ldlm_set_discard_data(lock);
LASSERT(list_empty(&lock->l_bl_ast));
list_add(&lock->l_bl_ast, work_list);
LDLM_LOCK_GET(lock);
@@ -621,8 +627,8 @@ static void ldlm_add_bl_work_item(struct ldlm_lock *lock, struct ldlm_lock *new,
static void ldlm_add_cp_work_item(struct ldlm_lock *lock,
struct list_head *work_list)
{
- if ((lock->l_flags & LDLM_FL_CP_REQD) == 0) {
- lock->l_flags |= LDLM_FL_CP_REQD;
+ if (!ldlm_is_cp_reqd(lock)) {
+ ldlm_set_cp_reqd(lock);
LDLM_DEBUG(lock, "lock granted; sending completion AST.");
LASSERT(list_empty(&lock->l_cp_ast));
list_add(&lock->l_cp_ast, work_list);
@@ -652,12 +658,12 @@ static void ldlm_add_ast_work_item(struct ldlm_lock *lock,
* r/w reference type is determined by \a mode
* Calls ldlm_lock_addref_internal.
*/
-void ldlm_lock_addref(struct lustre_handle *lockh, __u32 mode)
+void ldlm_lock_addref(const struct lustre_handle *lockh, __u32 mode)
{
struct ldlm_lock *lock;
lock = ldlm_handle2lock(lockh);
- LASSERT(lock);
+ LASSERTF(lock, "Non-existing lock: %llx\n", lockh->cookie);
ldlm_lock_addref_internal(lock, mode);
LDLM_LOCK_PUT(lock);
}
@@ -694,7 +700,7 @@ void ldlm_lock_addref_internal_nolock(struct ldlm_lock *lock, __u32 mode)
*
* \retval -EAGAIN lock is being canceled.
*/
-int ldlm_lock_addref_try(struct lustre_handle *lockh, __u32 mode)
+int ldlm_lock_addref_try(const struct lustre_handle *lockh, __u32 mode)
{
struct ldlm_lock *lock;
int result;
@@ -704,7 +710,7 @@ int ldlm_lock_addref_try(struct lustre_handle *lockh, __u32 mode)
if (lock) {
lock_res_and_lock(lock);
if (lock->l_readers != 0 || lock->l_writers != 0 ||
- !(lock->l_flags & LDLM_FL_CBPENDING)) {
+ !ldlm_is_cbpending(lock)) {
ldlm_lock_addref_internal_nolock(lock, mode);
result = 0;
}
@@ -770,17 +776,17 @@ void ldlm_lock_decref_internal(struct ldlm_lock *lock, __u32 mode)
ldlm_lock_decref_internal_nolock(lock, mode);
- if (lock->l_flags & LDLM_FL_LOCAL &&
+ if (ldlm_is_local(lock) &&
!lock->l_readers && !lock->l_writers) {
/* If this is a local lock on a server namespace and this was
* the last reference, cancel the lock.
*/
CDEBUG(D_INFO, "forcing cancel of local lock\n");
- lock->l_flags |= LDLM_FL_CBPENDING;
+ ldlm_set_cbpending(lock);
}
if (!lock->l_readers && !lock->l_writers &&
- (lock->l_flags & LDLM_FL_CBPENDING)) {
+ ldlm_is_cbpending(lock)) {
/* If we received a blocked AST and this was the last reference,
* run the callback.
*/
@@ -791,16 +797,14 @@ void ldlm_lock_decref_internal(struct ldlm_lock *lock, __u32 mode)
ldlm_lock_remove_from_lru(lock);
unlock_res_and_lock(lock);
- if (lock->l_flags & LDLM_FL_FAIL_LOC)
+ if (ldlm_is_fail_loc(lock))
OBD_RACE(OBD_FAIL_LDLM_CP_BL_RACE);
- if ((lock->l_flags & LDLM_FL_ATOMIC_CB) ||
+ if (ldlm_is_atomic_cb(lock) ||
ldlm_bl_to_thread_lock(ns, NULL, lock) != 0)
ldlm_handle_bl_callback(ns, NULL, lock);
} else if (!lock->l_readers && !lock->l_writers &&
- !(lock->l_flags & LDLM_FL_NO_LRU) &&
- !(lock->l_flags & LDLM_FL_BL_AST)) {
-
+ !ldlm_is_no_lru(lock) && !ldlm_is_bl_ast(lock)) {
LDLM_DEBUG(lock, "add lock into lru list");
/* If this is a client-side namespace and this was the last
@@ -809,7 +813,7 @@ void ldlm_lock_decref_internal(struct ldlm_lock *lock, __u32 mode)
ldlm_lock_add_to_lru(lock);
unlock_res_and_lock(lock);
- if (lock->l_flags & LDLM_FL_FAIL_LOC)
+ if (ldlm_is_fail_loc(lock))
OBD_RACE(OBD_FAIL_LDLM_CP_BL_RACE);
/* Call ldlm_cancel_lru() only if EARLY_CANCEL and LRU RESIZE
@@ -828,7 +832,7 @@ void ldlm_lock_decref_internal(struct ldlm_lock *lock, __u32 mode)
/**
* Decrease reader/writer refcount for LDLM lock with handle \a lockh
*/
-void ldlm_lock_decref(struct lustre_handle *lockh, __u32 mode)
+void ldlm_lock_decref(const struct lustre_handle *lockh, __u32 mode)
{
struct ldlm_lock *lock = __ldlm_handle2lock(lockh, 0);
@@ -845,7 +849,7 @@ EXPORT_SYMBOL(ldlm_lock_decref);
*
* Typical usage is for GROUP locks which we cannot allow to be cached.
*/
-void ldlm_lock_decref_and_cancel(struct lustre_handle *lockh, __u32 mode)
+void ldlm_lock_decref_and_cancel(const struct lustre_handle *lockh, __u32 mode)
{
struct ldlm_lock *lock = __ldlm_handle2lock(lockh, 0);
@@ -853,7 +857,7 @@ void ldlm_lock_decref_and_cancel(struct lustre_handle *lockh, __u32 mode)
LDLM_DEBUG(lock, "ldlm_lock_decref(%s)", ldlm_lockname[mode]);
lock_res_and_lock(lock);
- lock->l_flags |= LDLM_FL_CBPENDING;
+ ldlm_set_cbpending(lock);
unlock_res_and_lock(lock);
ldlm_lock_decref_internal(lock, mode);
LDLM_LOCK_PUT(lock);
@@ -971,7 +975,7 @@ static void ldlm_granted_list_add_lock(struct ldlm_lock *lock,
ldlm_resource_dump(D_INFO, res);
LDLM_DEBUG(lock, "About to add lock:");
- if (lock->l_flags & LDLM_FL_DESTROYED) {
+ if (ldlm_is_destroyed(lock)) {
CDEBUG(D_OTHER, "Lock destroyed, not adding to resource\n");
return;
}
@@ -1073,10 +1077,9 @@ static struct ldlm_lock *search_queue(struct list_head *queue,
* whose parents already hold a lock so forward progress
* can still happen.
*/
- if (lock->l_flags & LDLM_FL_CBPENDING &&
- !(flags & LDLM_FL_CBPENDING))
+ if (ldlm_is_cbpending(lock) && !(flags & LDLM_FL_CBPENDING))
continue;
- if (!unref && lock->l_flags & LDLM_FL_CBPENDING &&
+ if (!unref && ldlm_is_cbpending(lock) &&
lock->l_readers == 0 && lock->l_writers == 0)
continue;
@@ -1092,6 +1095,7 @@ static struct ldlm_lock *search_queue(struct list_head *queue,
if (unlikely(match == LCK_GROUP) &&
lock->l_resource->lr_type == LDLM_EXTENT &&
+ policy->l_extent.gid != LDLM_GID_ANY &&
lock->l_policy_data.l_extent.gid != policy->l_extent.gid)
continue;
@@ -1104,11 +1108,10 @@ static struct ldlm_lock *search_queue(struct list_head *queue,
policy->l_inodebits.bits))
continue;
- if (!unref && (lock->l_flags & LDLM_FL_GONE_MASK))
+ if (!unref && LDLM_HAVE_MASK(lock, GONE))
continue;
- if ((flags & LDLM_FL_LOCAL_ONLY) &&
- !(lock->l_flags & LDLM_FL_LOCAL))
+ if ((flags & LDLM_FL_LOCAL_ONLY) && !ldlm_is_local(lock))
continue;
if (flags & LDLM_FL_TEST_LOCK) {
@@ -1142,7 +1145,7 @@ EXPORT_SYMBOL(ldlm_lock_fail_match_locked);
*/
void ldlm_lock_allow_match_locked(struct ldlm_lock *lock)
{
- lock->l_flags |= LDLM_FL_LVB_READY;
+ ldlm_set_lvb_ready(lock);
wake_up_all(&lock->l_waitq);
}
EXPORT_SYMBOL(ldlm_lock_allow_match_locked);
@@ -1243,8 +1246,7 @@ enum ldlm_mode ldlm_lock_match(struct ldlm_namespace *ns, __u64 flags,
if (lock) {
ldlm_lock2handle(lock, lockh);
- if ((flags & LDLM_FL_LVB_READY) &&
- (!(lock->l_flags & LDLM_FL_LVB_READY))) {
+ if ((flags & LDLM_FL_LVB_READY) && !ldlm_is_lvb_ready(lock)) {
__u64 wait_flags = LDLM_FL_LVB_READY |
LDLM_FL_DESTROYED | LDLM_FL_FAIL_NOTIFIED;
struct l_wait_info lwi;
@@ -1271,7 +1273,7 @@ enum ldlm_mode ldlm_lock_match(struct ldlm_namespace *ns, __u64 flags,
l_wait_event(lock->l_waitq,
lock->l_flags & wait_flags,
&lwi);
- if (!(lock->l_flags & LDLM_FL_LVB_READY)) {
+ if (!ldlm_is_lvb_ready(lock)) {
if (flags & LDLM_FL_TEST_LOCK)
LDLM_LOCK_RELEASE(lock);
else
@@ -1316,7 +1318,7 @@ enum ldlm_mode ldlm_lock_match(struct ldlm_namespace *ns, __u64 flags,
}
EXPORT_SYMBOL(ldlm_lock_match);
-enum ldlm_mode ldlm_revalidate_lock_handle(struct lustre_handle *lockh,
+enum ldlm_mode ldlm_revalidate_lock_handle(const struct lustre_handle *lockh,
__u64 *bits)
{
struct ldlm_lock *lock;
@@ -1325,10 +1327,10 @@ enum ldlm_mode ldlm_revalidate_lock_handle(struct lustre_handle *lockh,
lock = ldlm_handle2lock(lockh);
if (lock) {
lock_res_and_lock(lock);
- if (lock->l_flags & LDLM_FL_GONE_MASK)
+ if (LDLM_HAVE_MASK(lock, GONE))
goto out;
- if (lock->l_flags & LDLM_FL_CBPENDING &&
+ if (ldlm_is_cbpending(lock) &&
lock->l_readers == 0 && lock->l_writers == 0)
goto out;
@@ -1438,7 +1440,7 @@ int ldlm_fill_lvb(struct ldlm_lock *lock, struct req_capsule *pill,
memcpy(data, lvb, size);
break;
default:
- LDLM_ERROR(lock, "Unknown LVB type: %d\n", lock->l_lvb_type);
+ LDLM_ERROR(lock, "Unknown LVB type: %d", lock->l_lvb_type);
dump_stack();
return -EINVAL;
}
@@ -1542,7 +1544,8 @@ enum ldlm_error ldlm_lock_enqueue(struct ldlm_namespace *ns,
/* Some flags from the enqueue want to make it into the AST, via the
* lock's l_flags.
*/
- lock->l_flags |= *flags & LDLM_FL_AST_DISCARD_DATA;
+ if (*flags & LDLM_FL_AST_DISCARD_DATA)
+ ldlm_set_ast_discard_data(lock);
/*
* This distinction between local lock trees is very important; a client
@@ -1581,7 +1584,7 @@ ldlm_work_bl_ast_lock(struct ptlrpc_request_set *rqset, void *opaq)
lock_res_and_lock(lock);
list_del_init(&lock->l_bl_ast);
- LASSERT(lock->l_flags & LDLM_FL_AST_SENT);
+ LASSERT(ldlm_is_ast_sent(lock));
LASSERT(lock->l_bl_ast_run == 0);
LASSERT(lock->l_blocking_lock);
lock->l_bl_ast_run++;
@@ -1628,12 +1631,12 @@ ldlm_work_cp_ast_lock(struct ptlrpc_request_set *rqset, void *opaq)
/* nobody should touch l_cp_ast */
lock_res_and_lock(lock);
list_del_init(&lock->l_cp_ast);
- LASSERT(lock->l_flags & LDLM_FL_CP_REQD);
+ LASSERT(ldlm_is_cp_reqd(lock));
/* save l_completion_ast since it can be changed by
* mds_intent_policy(), see bug 14225
*/
completion_callback = lock->l_completion_ast;
- lock->l_flags &= ~LDLM_FL_CP_REQD;
+ ldlm_clear_cp_reqd(lock);
unlock_res_and_lock(lock);
if (completion_callback)
@@ -1778,8 +1781,8 @@ out:
void ldlm_cancel_callback(struct ldlm_lock *lock)
{
check_res_locked(lock->l_resource);
- if (!(lock->l_flags & LDLM_FL_CANCEL)) {
- lock->l_flags |= LDLM_FL_CANCEL;
+ if (!ldlm_is_cancel(lock)) {
+ ldlm_set_cancel(lock);
if (lock->l_blocking_ast) {
unlock_res_and_lock(lock);
lock->l_blocking_ast(lock, NULL, lock->l_ast_data,
@@ -1789,7 +1792,7 @@ void ldlm_cancel_callback(struct ldlm_lock *lock)
LDLM_DEBUG(lock, "no blocking ast");
}
}
- lock->l_flags |= LDLM_FL_BL_DONE;
+ ldlm_set_bl_done(lock);
}
/**
@@ -1846,7 +1849,7 @@ EXPORT_SYMBOL(ldlm_lock_cancel);
/**
* Set opaque data into the lock that only makes sense to upper layer.
*/
-int ldlm_lock_set_data(struct lustre_handle *lockh, void *data)
+int ldlm_lock_set_data(const struct lustre_handle *lockh, void *data)
{
struct ldlm_lock *lock = ldlm_handle2lock(lockh);
int rc = -EINVAL;
@@ -1872,7 +1875,7 @@ struct export_cl_data {
*
* Used when printing all locks on a resource for debug purposes.
*/
-void ldlm_lock_dump_handle(int level, struct lustre_handle *lockh)
+void ldlm_lock_dump_handle(int level, const struct lustre_handle *lockh)
{
struct ldlm_lock *lock;
diff --git a/drivers/staging/lustre/lustre/ldlm/ldlm_lockd.c b/drivers/staging/lustre/lustre/ldlm/ldlm_lockd.c
index ebe9042adb25..821939ff2e6b 100644
--- a/drivers/staging/lustre/lustre/ldlm/ldlm_lockd.c
+++ b/drivers/staging/lustre/lustre/ldlm/ldlm_lockd.c
@@ -15,11 +15,7 @@
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
*
* GPL HEADER END
*/
@@ -124,10 +120,10 @@ void ldlm_handle_bl_callback(struct ldlm_namespace *ns,
LDLM_DEBUG(lock, "client blocking AST callback handler");
lock_res_and_lock(lock);
- lock->l_flags |= LDLM_FL_CBPENDING;
+ ldlm_set_cbpending(lock);
- if (lock->l_flags & LDLM_FL_CANCEL_ON_BLOCK)
- lock->l_flags |= LDLM_FL_CANCEL;
+ if (ldlm_is_cancel_on_block(lock))
+ ldlm_set_cancel(lock);
do_ast = !lock->l_readers && !lock->l_writers;
unlock_res_and_lock(lock);
@@ -172,7 +168,7 @@ static void ldlm_handle_cp_callback(struct ptlrpc_request *req,
set_current_state(TASK_INTERRUPTIBLE);
schedule_timeout(to);
if (lock->l_granted_mode == lock->l_req_mode ||
- lock->l_flags & LDLM_FL_DESTROYED)
+ ldlm_is_destroyed(lock))
break;
}
}
@@ -215,7 +211,7 @@ static void ldlm_handle_cp_callback(struct ptlrpc_request *req,
}
lock_res_and_lock(lock);
- if ((lock->l_flags & LDLM_FL_DESTROYED) ||
+ if (ldlm_is_destroyed(lock) ||
lock->l_granted_mode == lock->l_req_mode) {
/* bug 11300: the lock has already been granted */
unlock_res_and_lock(lock);
@@ -291,7 +287,7 @@ static void ldlm_handle_cp_callback(struct ptlrpc_request *req,
out:
if (rc < 0) {
lock_res_and_lock(lock);
- lock->l_flags |= LDLM_FL_FAILED;
+ ldlm_set_failed(lock);
unlock_res_and_lock(lock);
wake_up(&lock->l_waitq);
}
@@ -360,8 +356,7 @@ static int __ldlm_bl_to_thread(struct ldlm_bl_work_item *blwi,
struct ldlm_bl_pool *blp = ldlm_state->ldlm_bl_pool;
spin_lock(&blp->blp_lock);
- if (blwi->blwi_lock &&
- blwi->blwi_lock->l_flags & LDLM_FL_DISCARD_DATA) {
+ if (blwi->blwi_lock && ldlm_is_discard_data(blwi->blwi_lock)) {
/* add LDLM_FL_DISCARD_DATA requests to the priority list */
list_add_tail(&blwi->blwi_entry, &blp->blp_prio_list);
} else {
@@ -504,7 +499,7 @@ static int ldlm_handle_setinfo(struct ptlrpc_request *req)
static inline void ldlm_callback_errmsg(struct ptlrpc_request *req,
const char *msg, int rc,
- struct lustre_handle *handle)
+ const struct lustre_handle *handle)
{
DEBUG_REQ((req->rq_no_reply || rc) ? D_WARNING : D_DLMTRACE, req,
"%s: [nid %s] [rc %d] [lock %#llx]",
@@ -626,24 +621,24 @@ static int ldlm_callback_handler(struct ptlrpc_request *req)
return 0;
}
- if ((lock->l_flags & LDLM_FL_FAIL_LOC) &&
+ if (ldlm_is_fail_loc(lock) &&
lustre_msg_get_opc(req->rq_reqmsg) == LDLM_BL_CALLBACK)
OBD_RACE(OBD_FAIL_LDLM_CP_BL_RACE);
/* Copy hints/flags (e.g. LDLM_FL_DISCARD_DATA) from AST. */
lock_res_and_lock(lock);
lock->l_flags |= ldlm_flags_from_wire(dlm_req->lock_flags &
- LDLM_AST_FLAGS);
+ LDLM_FL_AST_MASK);
if (lustre_msg_get_opc(req->rq_reqmsg) == LDLM_BL_CALLBACK) {
/* If somebody cancels lock and cache is already dropped,
* or lock is failed before cp_ast received on client,
* we can tell the server we have no lock. Otherwise, we
* should send cancel after dropping the cache.
*/
- if (((lock->l_flags & LDLM_FL_CANCELING) &&
- (lock->l_flags & LDLM_FL_BL_DONE)) ||
- (lock->l_flags & LDLM_FL_FAILED)) {
- LDLM_DEBUG(lock, "callback on lock %#llx - lock disappeared\n",
+ if ((ldlm_is_canceling(lock) && ldlm_is_bl_done(lock)) ||
+ ldlm_is_failed(lock)) {
+ LDLM_DEBUG(lock,
+ "callback on lock %#llx - lock disappeared",
dlm_req->lock_handle[0].cookie);
unlock_res_and_lock(lock);
LDLM_LOCK_RELEASE(lock);
@@ -656,7 +651,7 @@ static int ldlm_callback_handler(struct ptlrpc_request *req)
* Let ldlm_cancel_lru() be fast.
*/
ldlm_lock_remove_from_lru(lock);
- lock->l_flags |= LDLM_FL_BL_AST;
+ ldlm_set_bl_ast(lock);
}
unlock_res_and_lock(lock);
@@ -674,7 +669,7 @@ static int ldlm_callback_handler(struct ptlrpc_request *req)
case LDLM_BL_CALLBACK:
CDEBUG(D_INODE, "blocking ast\n");
req_capsule_extend(&req->rq_pill, &RQF_LDLM_BL_CALLBACK);
- if (!(lock->l_flags & LDLM_FL_CANCEL_ON_BLOCK)) {
+ if (!ldlm_is_cancel_on_block(lock)) {
rc = ldlm_callback_reply(req, 0);
if (req->rq_no_reply || rc)
ldlm_callback_errmsg(req, "Normal process", rc,
@@ -1013,9 +1008,11 @@ static int ldlm_setup(void)
blp->blp_min_threads = LDLM_NTHRS_INIT;
blp->blp_max_threads = LDLM_NTHRS_MAX;
} else {
- blp->blp_min_threads = blp->blp_max_threads =
- min_t(int, LDLM_NTHRS_MAX, max_t(int, LDLM_NTHRS_INIT,
- ldlm_num_threads));
+ blp->blp_min_threads = min_t(int, LDLM_NTHRS_MAX,
+ max_t(int, LDLM_NTHRS_INIT,
+ ldlm_num_threads));
+
+ blp->blp_max_threads = blp->blp_min_threads;
}
for (i = 0; i < blp->blp_min_threads; i++) {
diff --git a/drivers/staging/lustre/lustre/ldlm/ldlm_plain.c b/drivers/staging/lustre/lustre/ldlm/ldlm_plain.c
index 0c1965ddabb9..0aed39c46154 100644
--- a/drivers/staging/lustre/lustre/ldlm/ldlm_plain.c
+++ b/drivers/staging/lustre/lustre/ldlm/ldlm_plain.c
@@ -15,11 +15,7 @@
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
*
* GPL HEADER END
*/
diff --git a/drivers/staging/lustre/lustre/ldlm/ldlm_pool.c b/drivers/staging/lustre/lustre/ldlm/ldlm_pool.c
index b913ba9cf97c..657ed4012776 100644
--- a/drivers/staging/lustre/lustre/ldlm/ldlm_pool.c
+++ b/drivers/staging/lustre/lustre/ldlm/ldlm_pool.c
@@ -15,11 +15,7 @@
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
*
* GPL HEADER END
*/
diff --git a/drivers/staging/lustre/lustre/ldlm/ldlm_request.c b/drivers/staging/lustre/lustre/ldlm/ldlm_request.c
index 74e193e52cd6..af487f9937f4 100644
--- a/drivers/staging/lustre/lustre/ldlm/ldlm_request.c
+++ b/drivers/staging/lustre/lustre/ldlm/ldlm_request.c
@@ -15,11 +15,7 @@
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
*
* GPL HEADER END
*/
@@ -153,7 +149,7 @@ static int ldlm_completion_tail(struct ldlm_lock *lock)
long delay;
int result;
- if (lock->l_flags & (LDLM_FL_DESTROYED | LDLM_FL_FAILED)) {
+ if (ldlm_is_destroyed(lock) || ldlm_is_failed(lock)) {
LDLM_DEBUG(lock, "client-side enqueue: destroyed");
result = -EIO;
} else {
@@ -252,7 +248,7 @@ noreproc:
lwd.lwd_lock = lock;
- if (lock->l_flags & LDLM_FL_NO_TIMEOUT) {
+ if (ldlm_is_no_timeout(lock)) {
LDLM_DEBUG(lock, "waiting indefinitely because of NO_TIMEOUT");
lwi = LWI_INTR(interrupted_completion_wait, &lwd);
} else {
@@ -269,7 +265,7 @@ noreproc:
if (OBD_FAIL_CHECK_RESET(OBD_FAIL_LDLM_INTR_CP_AST,
OBD_FAIL_LDLM_CP_BL_RACE | OBD_FAIL_ONCE)) {
- lock->l_flags |= LDLM_FL_FAIL_LOC;
+ ldlm_set_fail_loc(lock);
rc = -EINTR;
} else {
/* Go to sleep until the lock is granted or cancelled. */
@@ -296,7 +292,7 @@ static void failed_lock_cleanup(struct ldlm_namespace *ns,
lock_res_and_lock(lock);
/* Check that lock is not granted or failed, we might race. */
if ((lock->l_req_mode != lock->l_granted_mode) &&
- !(lock->l_flags & LDLM_FL_FAILED)) {
+ !ldlm_is_failed(lock)) {
/* Make sure that this lock will not be found by raced
* bl_ast and -EINVAL reply is sent to server anyways.
* bug 17645
@@ -340,14 +336,13 @@ int ldlm_cli_enqueue_fini(struct obd_export *exp, struct ptlrpc_request *req,
enum ldlm_type type, __u8 with_policy,
enum ldlm_mode mode,
__u64 *flags, void *lvb, __u32 lvb_len,
- struct lustre_handle *lockh, int rc)
+ const struct lustre_handle *lockh, int rc)
{
struct ldlm_namespace *ns = exp->exp_obd->obd_namespace;
int is_replay = *flags & LDLM_FL_REPLAY;
struct ldlm_lock *lock;
struct ldlm_reply *reply;
int cleanup_phase = 1;
- int size = 0;
lock = ldlm_handle2lock(lockh);
/* ldlm_cli_enqueue is holding a reference on this lock. */
@@ -375,8 +370,8 @@ int ldlm_cli_enqueue_fini(struct obd_export *exp, struct ptlrpc_request *req,
goto cleanup;
}
- if (lvb_len != 0) {
- LASSERT(lvb);
+ if (lvb_len > 0) {
+ int size = 0;
size = req_capsule_get_size(&req->rq_pill, &RMF_DLM_LVB,
RCL_SERVER);
@@ -390,12 +385,13 @@ int ldlm_cli_enqueue_fini(struct obd_export *exp, struct ptlrpc_request *req,
rc = -EINVAL;
goto cleanup;
}
+ lvb_len = size;
}
if (rc == ELDLM_LOCK_ABORTED) {
- if (lvb_len != 0)
+ if (lvb_len > 0 && lvb)
rc = ldlm_fill_lvb(lock, &req->rq_pill, RCL_SERVER,
- lvb, size);
+ lvb, lvb_len);
if (rc == 0)
rc = ELDLM_LOCK_ABORTED;
goto cleanup;
@@ -421,7 +417,7 @@ int ldlm_cli_enqueue_fini(struct obd_export *exp, struct ptlrpc_request *req,
*flags = ldlm_flags_from_wire(reply->lock_flags);
lock->l_flags |= ldlm_flags_from_wire(reply->lock_flags &
- LDLM_INHERIT_FLAGS);
+ LDLM_FL_INHERIT_MASK);
/* move NO_TIMEOUT flag to the lock to force ldlm_lock_match()
* to wait with no timeout as well
*/
@@ -489,7 +485,7 @@ int ldlm_cli_enqueue_fini(struct obd_export *exp, struct ptlrpc_request *req,
/* If the lock has already been granted by a completion AST, don't
* clobber the LVB with an older one.
*/
- if (lvb_len != 0) {
+ if (lvb_len > 0) {
/* We must lock or a racing completion might update lvb without
* letting us know and we'll clobber the correct value.
* Cannot unlock after the check either, as that still leaves
@@ -498,7 +494,7 @@ int ldlm_cli_enqueue_fini(struct obd_export *exp, struct ptlrpc_request *req,
lock_res_and_lock(lock);
if (lock->l_req_mode != lock->l_granted_mode)
rc = ldlm_fill_lvb(lock, &req->rq_pill, RCL_SERVER,
- lock->l_lvb_data, size);
+ lock->l_lvb_data, lvb_len);
unlock_res_and_lock(lock);
if (rc < 0) {
cleanup_phase = 1;
@@ -518,7 +514,7 @@ int ldlm_cli_enqueue_fini(struct obd_export *exp, struct ptlrpc_request *req,
}
}
- if (lvb_len && lvb) {
+ if (lvb_len > 0 && lvb) {
/* Copy the LVB here, and not earlier, because the completion
* AST (if any) can override what we got in the reply
*/
@@ -601,7 +597,7 @@ int ldlm_prep_elc_req(struct obd_export *exp, struct ptlrpc_request *req,
avail = ldlm_capsule_handles_avail(pill, RCL_CLIENT, canceloff);
flags = ns_connect_lru_resize(ns) ?
- LDLM_CANCEL_LRUR : LDLM_CANCEL_AGED;
+ LDLM_CANCEL_LRUR_NO_WAIT : LDLM_CANCEL_AGED;
to_free = !ns_connect_lru_resize(ns) &&
opc == LDLM_ENQUEUE ? 1 : 0;
@@ -715,7 +711,7 @@ int ldlm_cli_enqueue(struct obd_export *exp, struct ptlrpc_request **reqp,
lock->l_req_extent = policy->l_extent;
}
- LDLM_DEBUG(lock, "client-side enqueue START, flags %llx\n",
+ LDLM_DEBUG(lock, "client-side enqueue START, flags %llx",
*flags);
}
@@ -821,12 +817,11 @@ static __u64 ldlm_cli_cancel_local(struct ldlm_lock *lock)
LDLM_DEBUG(lock, "client-side cancel");
/* Set this flag to prevent others from getting new references*/
lock_res_and_lock(lock);
- lock->l_flags |= LDLM_FL_CBPENDING;
+ ldlm_set_cbpending(lock);
local_only = !!(lock->l_flags &
(LDLM_FL_LOCAL_ONLY|LDLM_FL_CANCEL_ON_BLOCK));
ldlm_cancel_callback(lock);
- rc = (lock->l_flags & LDLM_FL_BL_AST) ?
- LDLM_FL_BL_AST : LDLM_FL_CANCELING;
+ rc = ldlm_is_bl_ast(lock) ? LDLM_FL_BL_AST : LDLM_FL_CANCELING;
unlock_res_and_lock(lock);
if (local_only) {
@@ -1028,7 +1023,7 @@ EXPORT_SYMBOL(ldlm_cli_update_pool);
*
* Lock must not have any readers or writers by this time.
*/
-int ldlm_cli_cancel(struct lustre_handle *lockh,
+int ldlm_cli_cancel(const struct lustre_handle *lockh,
enum ldlm_cancel_flags cancel_flags)
{
struct obd_export *exp;
@@ -1131,31 +1126,30 @@ EXPORT_SYMBOL(ldlm_cli_cancel_list_local);
* dirty data, to close a file, ...) or waiting for any RPCs in-flight (e.g.
* readahead requests, ...)
*/
-static ldlm_policy_res_t ldlm_cancel_no_wait_policy(struct ldlm_namespace *ns,
- struct ldlm_lock *lock,
- int unused, int added,
- int count)
+static enum ldlm_policy_res
+ldlm_cancel_no_wait_policy(struct ldlm_namespace *ns, struct ldlm_lock *lock,
+ int unused, int added, int count)
{
- ldlm_policy_res_t result = LDLM_POLICY_CANCEL_LOCK;
- ldlm_cancel_for_recovery cb = ns->ns_cancel_for_recovery;
-
- lock_res_and_lock(lock);
+ enum ldlm_policy_res result = LDLM_POLICY_CANCEL_LOCK;
/* don't check added & count since we want to process all locks
- * from unused list
+ * from unused list.
+ * It's fine to not take lock to access lock->l_resource since
+ * the lock has already been granted so it won't change.
*/
switch (lock->l_resource->lr_type) {
case LDLM_EXTENT:
case LDLM_IBITS:
- if (cb && cb(lock))
+ if (ns->ns_cancel && ns->ns_cancel(lock) != 0)
break;
default:
result = LDLM_POLICY_SKIP_LOCK;
- lock->l_flags |= LDLM_FL_SKIPPED;
+ lock_res_and_lock(lock);
+ ldlm_set_skipped(lock);
+ unlock_res_and_lock(lock);
break;
}
- unlock_res_and_lock(lock);
return result;
}
@@ -1168,10 +1162,10 @@ static ldlm_policy_res_t ldlm_cancel_no_wait_policy(struct ldlm_namespace *ns,
*
* \retval LDLM_POLICY_CANCEL_LOCK cancel lock from LRU
*/
-static ldlm_policy_res_t ldlm_cancel_lrur_policy(struct ldlm_namespace *ns,
- struct ldlm_lock *lock,
- int unused, int added,
- int count)
+static enum ldlm_policy_res ldlm_cancel_lrur_policy(struct ldlm_namespace *ns,
+ struct ldlm_lock *lock,
+ int unused, int added,
+ int count)
{
unsigned long cur = cfs_time_current();
struct ldlm_pool *pl = &ns->ns_pool;
@@ -1196,8 +1190,13 @@ static ldlm_policy_res_t ldlm_cancel_lrur_policy(struct ldlm_namespace *ns,
/* Stop when SLV is not yet come from server or lv is smaller than
* it is.
*/
- return (slv == 0 || lv < slv) ?
- LDLM_POLICY_KEEP_LOCK : LDLM_POLICY_CANCEL_LOCK;
+ if (slv == 0 || lv < slv)
+ return LDLM_POLICY_KEEP_LOCK;
+
+ if (ns->ns_cancel && ns->ns_cancel(lock) == 0)
+ return LDLM_POLICY_KEEP_LOCK;
+
+ return LDLM_POLICY_CANCEL_LOCK;
}
/**
@@ -1209,10 +1208,10 @@ static ldlm_policy_res_t ldlm_cancel_lrur_policy(struct ldlm_namespace *ns,
*
* \retval LDLM_POLICY_CANCEL_LOCK cancel lock from LRU
*/
-static ldlm_policy_res_t ldlm_cancel_passed_policy(struct ldlm_namespace *ns,
- struct ldlm_lock *lock,
- int unused, int added,
- int count)
+static enum ldlm_policy_res ldlm_cancel_passed_policy(struct ldlm_namespace *ns,
+ struct ldlm_lock *lock,
+ int unused, int added,
+ int count)
{
/* Stop LRU processing when we reach past @count or have checked all
* locks in LRU.
@@ -1230,16 +1229,35 @@ static ldlm_policy_res_t ldlm_cancel_passed_policy(struct ldlm_namespace *ns,
*
* \retval LDLM_POLICY_CANCEL_LOCK cancel lock from LRU
*/
-static ldlm_policy_res_t ldlm_cancel_aged_policy(struct ldlm_namespace *ns,
- struct ldlm_lock *lock,
- int unused, int added,
- int count)
+static enum ldlm_policy_res ldlm_cancel_aged_policy(struct ldlm_namespace *ns,
+ struct ldlm_lock *lock,
+ int unused, int added,
+ int count)
{
- /* Stop LRU processing if young lock is found and we reach past count */
- return ((added >= count) &&
- time_before(cfs_time_current(),
- cfs_time_add(lock->l_last_used, ns->ns_max_age))) ?
- LDLM_POLICY_KEEP_LOCK : LDLM_POLICY_CANCEL_LOCK;
+ if ((added >= count) &&
+ time_before(cfs_time_current(),
+ cfs_time_add(lock->l_last_used, ns->ns_max_age)))
+ return LDLM_POLICY_KEEP_LOCK;
+
+ if (ns->ns_cancel && ns->ns_cancel(lock) == 0)
+ return LDLM_POLICY_KEEP_LOCK;
+
+ return LDLM_POLICY_CANCEL_LOCK;
+}
+
+static enum ldlm_policy_res
+ldlm_cancel_lrur_no_wait_policy(struct ldlm_namespace *ns,
+ struct ldlm_lock *lock,
+ int unused, int added,
+ int count)
+{
+ enum ldlm_policy_res result;
+
+ result = ldlm_cancel_lrur_policy(ns, lock, unused, added, count);
+ if (result == LDLM_POLICY_KEEP_LOCK)
+ return result;
+
+ return ldlm_cancel_no_wait_policy(ns, lock, unused, added, count);
}
/**
@@ -1251,10 +1269,9 @@ static ldlm_policy_res_t ldlm_cancel_aged_policy(struct ldlm_namespace *ns,
*
* \retval LDLM_POLICY_CANCEL_LOCK cancel lock from LRU
*/
-static ldlm_policy_res_t ldlm_cancel_default_policy(struct ldlm_namespace *ns,
- struct ldlm_lock *lock,
- int unused, int added,
- int count)
+static enum ldlm_policy_res
+ldlm_cancel_default_policy(struct ldlm_namespace *ns, struct ldlm_lock *lock,
+ int unused, int added, int count)
{
/* Stop LRU processing when we reach past count or have checked all
* locks in LRU.
@@ -1263,7 +1280,8 @@ static ldlm_policy_res_t ldlm_cancel_default_policy(struct ldlm_namespace *ns,
LDLM_POLICY_KEEP_LOCK : LDLM_POLICY_CANCEL_LOCK;
}
-typedef ldlm_policy_res_t (*ldlm_cancel_lru_policy_t)(struct ldlm_namespace *,
+typedef enum ldlm_policy_res (*ldlm_cancel_lru_policy_t)(
+ struct ldlm_namespace *,
struct ldlm_lock *, int,
int, int);
@@ -1281,6 +1299,8 @@ ldlm_cancel_lru_policy(struct ldlm_namespace *ns, int flags)
return ldlm_cancel_lrur_policy;
else if (flags & LDLM_CANCEL_PASSED)
return ldlm_cancel_passed_policy;
+ else if (flags & LDLM_CANCEL_LRUR_NO_WAIT)
+ return ldlm_cancel_lrur_no_wait_policy;
} else {
if (flags & LDLM_CANCEL_AGED)
return ldlm_cancel_aged_policy;
@@ -1329,6 +1349,7 @@ static int ldlm_prepare_lru_list(struct ldlm_namespace *ns,
ldlm_cancel_lru_policy_t pf;
struct ldlm_lock *lock, *next;
int added = 0, unused, remained;
+ int no_wait = flags & (LDLM_CANCEL_NO_WAIT | LDLM_CANCEL_LRUR_NO_WAIT);
spin_lock(&ns->ns_lock);
unused = ns->ns_nr_unused;
@@ -1341,7 +1362,8 @@ static int ldlm_prepare_lru_list(struct ldlm_namespace *ns,
LASSERT(pf);
while (!list_empty(&ns->ns_unused_list)) {
- ldlm_policy_res_t result;
+ enum ldlm_policy_res result;
+ time_t last_use = 0;
/* all unused locks */
if (remained-- <= 0)
@@ -1354,17 +1376,20 @@ static int ldlm_prepare_lru_list(struct ldlm_namespace *ns,
list_for_each_entry_safe(lock, next, &ns->ns_unused_list,
l_lru) {
/* No locks which got blocking requests. */
- LASSERT(!(lock->l_flags & LDLM_FL_BL_AST));
+ LASSERT(!ldlm_is_bl_ast(lock));
- if (flags & LDLM_CANCEL_NO_WAIT &&
- lock->l_flags & LDLM_FL_SKIPPED)
+ if (no_wait && ldlm_is_skipped(lock))
/* already processed */
continue;
+ last_use = lock->l_last_used;
+ if (last_use == cfs_time_current())
+ continue;
+
/* Somebody is already doing CANCEL. No need for this
* lock in LRU, do not traverse it again.
*/
- if (!(lock->l_flags & LDLM_FL_CANCELING))
+ if (!ldlm_is_canceling(lock))
break;
ldlm_lock_remove_from_lru_nolock(lock);
@@ -1407,12 +1432,14 @@ static int ldlm_prepare_lru_list(struct ldlm_namespace *ns,
lock_res_and_lock(lock);
/* Check flags again under the lock. */
- if ((lock->l_flags & LDLM_FL_CANCELING) ||
- (ldlm_lock_remove_from_lru(lock) == 0)) {
+ if (ldlm_is_canceling(lock) ||
+ (ldlm_lock_remove_from_lru_check(lock, last_use) == 0)) {
/* Another thread is removing lock from LRU, or
* somebody is already doing CANCEL, or there
* is a blocking request which will send cancel
- * by itself, or the lock is no longer unused.
+ * by itself, or the lock is no longer unused or
+ * the lock has been used since the pf() call and
+ * pages could be put under it.
*/
unlock_res_and_lock(lock);
lu_ref_del(&lock->l_reference,
@@ -1429,7 +1456,7 @@ static int ldlm_prepare_lru_list(struct ldlm_namespace *ns,
* where while we are doing cancel here, server is also
* silently cancelling this lock.
*/
- lock->l_flags &= ~LDLM_FL_CANCEL_ON_BLOCK;
+ ldlm_clear_cancel_on_block(lock);
/* Setting the CBPENDING flag is a little misleading,
* but prevents an important race; namely, once
@@ -1526,8 +1553,7 @@ int ldlm_cancel_resource_local(struct ldlm_resource *res,
/* If somebody is already doing CANCEL, or blocking AST came,
* skip this lock.
*/
- if (lock->l_flags & LDLM_FL_BL_AST ||
- lock->l_flags & LDLM_FL_CANCELING)
+ if (ldlm_is_bl_ast(lock) || ldlm_is_canceling(lock))
continue;
if (lockmode_compat(lock->l_granted_mode, mode))
@@ -1771,7 +1797,6 @@ static void ldlm_namespace_foreach(struct ldlm_namespace *ns,
cfs_hash_for_each_nolock(ns->ns_rs_hash,
ldlm_res_iter_helper, &helper);
-
}
/* non-blocking function to manipulate a lock whose cb_data is being put away.
@@ -1887,7 +1912,7 @@ static int replay_one_lock(struct obd_import *imp, struct ldlm_lock *lock)
int flags;
/* Bug 11974: Do not replay a lock which is actively being canceled */
- if (lock->l_flags & LDLM_FL_CANCELING) {
+ if (ldlm_is_canceling(lock)) {
LDLM_DEBUG(lock, "Not replaying canceled lock:");
return 0;
}
@@ -1896,7 +1921,7 @@ static int replay_one_lock(struct obd_import *imp, struct ldlm_lock *lock)
* server might have long dropped it, but notification of that event was
* lost by network. (and server granted conflicting lock already)
*/
- if (lock->l_flags & LDLM_FL_CANCEL_ON_BLOCK) {
+ if (ldlm_is_cancel_on_block(lock)) {
LDLM_DEBUG(lock, "Not replaying reply-less lock:");
ldlm_lock_cancel(lock);
return 0;
diff --git a/drivers/staging/lustre/lustre/ldlm/ldlm_resource.c b/drivers/staging/lustre/lustre/ldlm/ldlm_resource.c
index 9dede87ad0a3..51a28d96af39 100644
--- a/drivers/staging/lustre/lustre/ldlm/ldlm_resource.c
+++ b/drivers/staging/lustre/lustre/ldlm/ldlm_resource.c
@@ -15,11 +15,7 @@
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
*
* GPL HEADER END
*/
@@ -124,9 +120,15 @@ int ldlm_debugfs_setup(void)
}
rc = ldebugfs_add_vars(ldlm_debugfs_dir, ldlm_debugfs_list, NULL);
+ if (rc) {
+ CERROR("LProcFS failed in ldlm-init\n");
+ goto err_svc;
+ }
return 0;
+err_svc:
+ ldebugfs_remove(&ldlm_svc_debugfs_dir);
err_ns:
ldebugfs_remove(&ldlm_ns_debugfs_dir);
err_type:
@@ -758,12 +760,12 @@ static void cleanup_resource(struct ldlm_resource *res, struct list_head *q,
list_for_each(tmp, q) {
lock = list_entry(tmp, struct ldlm_lock,
l_res_link);
- if (lock->l_flags & LDLM_FL_CLEANED) {
+ if (ldlm_is_cleaned(lock)) {
lock = NULL;
continue;
}
LDLM_LOCK_GET(lock);
- lock->l_flags |= LDLM_FL_CLEANED;
+ ldlm_set_cleaned(lock);
break;
}
@@ -775,13 +777,13 @@ static void cleanup_resource(struct ldlm_resource *res, struct list_head *q,
/* Set CBPENDING so nothing in the cancellation path
* can match this lock.
*/
- lock->l_flags |= LDLM_FL_CBPENDING;
- lock->l_flags |= LDLM_FL_FAILED;
+ ldlm_set_cbpending(lock);
+ ldlm_set_failed(lock);
lock->l_flags |= flags;
/* ... without sending a CANCEL message for local_only. */
if (local_only)
- lock->l_flags |= LDLM_FL_LOCAL_ONLY;
+ ldlm_set_local_only(lock);
if (local_only && (lock->l_readers || lock->l_writers)) {
/* This is a little bit gross, but much better than the
@@ -1273,9 +1275,9 @@ void ldlm_resource_add_lock(struct ldlm_resource *res, struct list_head *head,
{
check_res_locked(res);
- LDLM_DEBUG(lock, "About to add this lock:\n");
+ LDLM_DEBUG(lock, "About to add this lock:");
- if (lock->l_flags & LDLM_FL_DESTROYED) {
+ if (ldlm_is_destroyed(lock)) {
CDEBUG(D_OTHER, "Lock destroyed, not adding to resource\n");
return;
}
@@ -1400,3 +1402,4 @@ void ldlm_resource_dump(int level, struct ldlm_resource *res)
LDLM_DEBUG_LIMIT(level, lock, "###");
}
}
+EXPORT_SYMBOL(ldlm_resource_dump);
diff --git a/drivers/staging/lustre/lustre/llite/Makefile b/drivers/staging/lustre/lustre/llite/Makefile
index 9ac29e718da3..2cbb1b80bd41 100644
--- a/drivers/staging/lustre/lustre/llite/Makefile
+++ b/drivers/staging/lustre/lustre/llite/Makefile
@@ -1,10 +1,7 @@
obj-$(CONFIG_LUSTRE_FS) += lustre.o
-obj-$(CONFIG_LUSTRE_LLITE_LLOOP) += llite_lloop.o
lustre-y := dcache.o dir.o file.o llite_close.o llite_lib.o llite_nfs.o \
rw.o namei.o symlink.o llite_mmap.o \
- xattr.o xattr_cache.o remote_perm.o llite_rmtacl.o \
- rw26.o super25.o statahead.o \
- ../lclient/glimpse.o ../lclient/lcommon_cl.o ../lclient/lcommon_misc.o \
- vvp_dev.o vvp_page.o vvp_lock.o vvp_io.o vvp_object.o lproc_llite.o
-
-llite_lloop-y := lloop.o
+ xattr.o xattr_cache.o rw26.o super25.o statahead.o \
+ glimpse.o lcommon_cl.o lcommon_misc.o \
+ vvp_dev.o vvp_page.o vvp_lock.o vvp_io.o vvp_object.o vvp_req.o \
+ lproc_llite.o
diff --git a/drivers/staging/lustre/lustre/llite/dcache.c b/drivers/staging/lustre/lustre/llite/dcache.c
index dd1c827013b9..463b1a360733 100644
--- a/drivers/staging/lustre/lustre/llite/dcache.c
+++ b/drivers/staging/lustre/lustre/llite/dcache.c
@@ -15,11 +15,7 @@
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
*
* GPL HEADER END
*/
@@ -82,7 +78,7 @@ static void ll_release(struct dentry *de)
* INVALID) so d_lookup() matches it, but we have no lock on it (so
* lock_match() fails) and we spin around real_lookup().
*/
-static int ll_dcompare(const struct dentry *parent, const struct dentry *dentry,
+static int ll_dcompare(const struct dentry *dentry,
unsigned int len, const char *str,
const struct qstr *name)
{
@@ -108,11 +104,8 @@ static int ll_dcompare(const struct dentry *parent, const struct dentry *dentry,
static inline int return_if_equal(struct ldlm_lock *lock, void *data)
{
- if ((lock->l_flags &
- (LDLM_FL_CANCELING | LDLM_FL_DISCARD_DATA)) ==
- (LDLM_FL_CANCELING | LDLM_FL_DISCARD_DATA))
- return LDLM_ITER_CONTINUE;
- return LDLM_ITER_STOP;
+ return (ldlm_is_canceling(lock) && ldlm_is_discard_data(lock)) ?
+ LDLM_ITER_CONTINUE : LDLM_ITER_STOP;
}
/* find any ldlm lock of the inode in mdc and lov
@@ -209,27 +202,27 @@ int ll_d_init(struct dentry *de)
void ll_intent_drop_lock(struct lookup_intent *it)
{
- if (it->it_op && it->d.lustre.it_lock_mode) {
+ if (it->it_op && it->it_lock_mode) {
struct lustre_handle handle;
- handle.cookie = it->d.lustre.it_lock_handle;
+ handle.cookie = it->it_lock_handle;
CDEBUG(D_DLMTRACE, "releasing lock with cookie %#llx from it %p\n",
handle.cookie, it);
- ldlm_lock_decref(&handle, it->d.lustre.it_lock_mode);
+ ldlm_lock_decref(&handle, it->it_lock_mode);
/* bug 494: intent_release may be called multiple times, from
* this thread and we don't want to double-decref this lock
*/
- it->d.lustre.it_lock_mode = 0;
- if (it->d.lustre.it_remote_lock_mode != 0) {
- handle.cookie = it->d.lustre.it_remote_lock_handle;
+ it->it_lock_mode = 0;
+ if (it->it_remote_lock_mode != 0) {
+ handle.cookie = it->it_remote_lock_handle;
CDEBUG(D_DLMTRACE, "releasing remote lock with cookie%#llx from it %p\n",
handle.cookie, it);
ldlm_lock_decref(&handle,
- it->d.lustre.it_remote_lock_mode);
- it->d.lustre.it_remote_lock_mode = 0;
+ it->it_remote_lock_mode);
+ it->it_remote_lock_mode = 0;
}
}
}
@@ -240,23 +233,23 @@ void ll_intent_release(struct lookup_intent *it)
ll_intent_drop_lock(it);
/* We are still holding extra reference on a request, need to free it */
if (it_disposition(it, DISP_ENQ_OPEN_REF))
- ptlrpc_req_finished(it->d.lustre.it_data); /* ll_file_open */
+ ptlrpc_req_finished(it->it_request); /* ll_file_open */
if (it_disposition(it, DISP_ENQ_CREATE_REF)) /* create rec */
- ptlrpc_req_finished(it->d.lustre.it_data);
+ ptlrpc_req_finished(it->it_request);
- it->d.lustre.it_disposition = 0;
- it->d.lustre.it_data = NULL;
+ it->it_disposition = 0;
+ it->it_request = NULL;
}
void ll_invalidate_aliases(struct inode *inode)
{
struct dentry *dentry;
- CDEBUG(D_INODE, "marking dentries for ino %lu/%u(%p) invalid\n",
- inode->i_ino, inode->i_generation, inode);
+ CDEBUG(D_INODE, "marking dentries for ino "DFID"(%p) invalid\n",
+ PFID(ll_inode2fid(inode)), inode);
- ll_lock_dcache(inode);
+ spin_lock(&inode->i_lock);
hlist_for_each_entry(dentry, &inode->i_dentry, d_u.d_alias) {
CDEBUG(D_DENTRY, "dentry in drop %pd (%p) parent %p inode %p flags %d\n",
dentry, dentry, dentry->d_parent,
@@ -264,7 +257,7 @@ void ll_invalidate_aliases(struct inode *inode)
d_lustre_invalidate(dentry, 0);
}
- ll_unlock_dcache(inode);
+ spin_unlock(&inode->i_lock);
}
int ll_revalidate_it_finish(struct ptlrpc_request *request,
@@ -286,11 +279,11 @@ int ll_revalidate_it_finish(struct ptlrpc_request *request,
void ll_lookup_finish_locks(struct lookup_intent *it, struct inode *inode)
{
- if (it->d.lustre.it_lock_mode && inode) {
+ if (it->it_lock_mode && inode) {
struct ll_sb_info *sbi = ll_i2sbi(inode);
- CDEBUG(D_DLMTRACE, "setting l_data to inode %p (%lu/%u)\n",
- inode, inode->i_ino, inode->i_generation);
+ CDEBUG(D_DLMTRACE, "setting l_data to inode "DFID"(%p)\n",
+ PFID(ll_inode2fid(inode)), inode);
ll_set_lock_data(sbi->ll_md_exp, inode, it, NULL);
}
@@ -309,6 +302,17 @@ static int ll_revalidate_dentry(struct dentry *dentry,
{
struct inode *dir = d_inode(dentry->d_parent);
+ /* If this is intermediate component path lookup and we were able to get
+ * to this dentry, then its lock has not been revoked and the
+ * path component is valid.
+ */
+ if (lookup_flags & LOOKUP_PARENT)
+ return 1;
+
+ /* Symlink - always valid as long as the dentry was found */
+ if (dentry->d_inode && S_ISLNK(dentry->d_inode->i_mode))
+ return 1;
+
/*
* if open&create is set, talk to MDS to make sure file is created if
* necessary, because we can't do this in ->open() later since that's
diff --git a/drivers/staging/lustre/lustre/llite/dir.c b/drivers/staging/lustre/lustre/llite/dir.c
index e4c82883e580..5b381779c827 100644
--- a/drivers/staging/lustre/lustre/llite/dir.c
+++ b/drivers/staging/lustre/lustre/llite/dir.c
@@ -15,11 +15,7 @@
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
*
* GPL HEADER END
*/
@@ -158,11 +154,16 @@ static int ll_dir_filler(void *_hash, struct page *page0)
int i;
int rc;
- CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p) hash %llu\n",
- inode->i_ino, inode->i_generation, inode, hash);
+ CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p) hash %llu\n",
+ PFID(ll_inode2fid(inode)), inode, hash);
LASSERT(max_pages > 0 && max_pages <= MD_MAX_BRW_PAGES);
+ op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
+ LUSTRE_OPC_ANY, NULL);
+ if (IS_ERR(op_data))
+ return PTR_ERR(op_data);
+
page_pool = kcalloc(max_pages, sizeof(page), GFP_NOFS);
if (page_pool) {
page_pool[0] = page0;
@@ -177,8 +178,6 @@ static int ll_dir_filler(void *_hash, struct page *page0)
page_pool[npages] = page;
}
- op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
- LUSTRE_OPC_ANY, NULL);
op_data->op_npages = npages;
op_data->op_offset = hash;
rc = md_readpage(exp, op_data, page_pool, &request);
@@ -190,7 +189,7 @@ static int ll_dir_filler(void *_hash, struct page *page0)
body = req_capsule_server_get(&request->rq_pill, &RMF_MDT_BODY);
/* Checked by mdc_readpage() */
if (body->valid & OBD_MD_FLSIZE)
- cl_isize_write(inode, body->size);
+ i_size_write(inode, body->size);
nrdpgs = (request->rq_bulk->bd_nob_transferred+PAGE_SIZE-1)
>> PAGE_SHIFT;
@@ -363,7 +362,7 @@ struct page *ll_get_dir_page(struct inode *dir, __u64 hash,
ll_finish_md_op_data(op_data);
- request = (struct ptlrpc_request *)it.d.lustre.it_data;
+ request = (struct ptlrpc_request *)it.it_request;
if (request)
ptlrpc_req_finished(request);
if (rc < 0) {
@@ -372,10 +371,10 @@ struct page *ll_get_dir_page(struct inode *dir, __u64 hash,
return ERR_PTR(rc);
}
- CDEBUG(D_INODE, "setting lr_lvb_inode to inode %p (%lu/%u)\n",
- dir, dir->i_ino, dir->i_generation);
+ CDEBUG(D_INODE, "setting lr_lvb_inode to inode "DFID"(%p)\n",
+ PFID(ll_inode2fid(dir)), dir);
md_set_lock_data(ll_i2sbi(dir)->ll_md_exp,
- &it.d.lustre.it_lock_handle, dir, NULL);
+ &it.it_lock_handle, dir, NULL);
} else {
/* for cross-ref object, l_ast_data of the lock may not be set,
* we reset it here
@@ -468,6 +467,28 @@ fail:
goto out_unlock;
}
+/**
+ * return IF_* type for given lu_dirent entry.
+ * IF_* flag shld be converted to particular OS file type in
+ * platform llite module.
+ */
+static __u16 ll_dirent_type_get(struct lu_dirent *ent)
+{
+ __u16 type = 0;
+ struct luda_type *lt;
+ int len = 0;
+
+ if (le32_to_cpu(ent->lde_attrs) & LUDA_TYPE) {
+ const unsigned int align = sizeof(struct luda_type) - 1;
+
+ len = le16_to_cpu(ent->lde_namelen);
+ len = (len + align) & ~align;
+ lt = (void *)ent->lde_name + len;
+ type = IFTODT(le16_to_cpu(lt->lt_type));
+ }
+ return type;
+}
+
int ll_dir_read(struct inode *inode, struct dir_context *ctx)
{
struct ll_inode_info *info = ll_i2info(inode);
@@ -589,15 +610,16 @@ static int ll_readdir(struct file *filp, struct dir_context *ctx)
struct inode *inode = file_inode(filp);
struct ll_file_data *lfd = LUSTRE_FPRIVATE(filp);
struct ll_sb_info *sbi = ll_i2sbi(inode);
+ __u64 pos = lfd ? lfd->lfd_pos : 0;
int hash64 = sbi->ll_flags & LL_SBI_64BIT_HASH;
int api32 = ll_need_32bit_api(sbi);
int rc;
- CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p) pos %lu/%llu 32bit_api %d\n",
- inode->i_ino, inode->i_generation,
- inode, (unsigned long)lfd->lfd_pos, i_size_read(inode), api32);
+ CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p) pos %lu/%llu 32bit_api %d\n",
+ PFID(ll_inode2fid(inode)), inode, (unsigned long)pos,
+ i_size_read(inode), api32);
- if (lfd->lfd_pos == MDS_DIR_END_OFF) {
+ if (pos == MDS_DIR_END_OFF) {
/*
* end-of-file.
*/
@@ -605,9 +627,10 @@ static int ll_readdir(struct file *filp, struct dir_context *ctx)
goto out;
}
- ctx->pos = lfd->lfd_pos;
+ ctx->pos = pos;
rc = ll_dir_read(inode, ctx);
- lfd->lfd_pos = ctx->pos;
+ if (lfd)
+ lfd->lfd_pos = ctx->pos;
if (ctx->pos == MDS_DIR_END_OFF) {
if (api32)
ctx->pos = LL_DIR_END_OFF_32BIT;
@@ -804,9 +827,8 @@ int ll_dir_getstripe(struct inode *inode, struct lov_mds_md **lmmp,
rc = md_getattr(sbi->ll_md_exp, op_data, &req);
ll_finish_md_op_data(op_data);
if (rc < 0) {
- CDEBUG(D_INFO, "md_getattr failed on inode %lu/%u: rc %d\n",
- inode->i_ino,
- inode->i_generation, rc);
+ CDEBUG(D_INFO, "md_getattr failed on inode "DFID": rc %d\n",
+ PFID(ll_inode2fid(inode)), rc);
goto out;
}
@@ -916,7 +938,7 @@ static int ll_ioc_copy_start(struct super_block *sb, struct hsm_copy *copy)
}
/* Read current file data version */
- rc = ll_data_version(inode, &data_version, 1);
+ rc = ll_data_version(inode, &data_version, LL_DV_RD_FLUSH);
iput(inode);
if (rc != 0) {
CDEBUG(D_HSM, "Could not read file data version of "
@@ -936,6 +958,9 @@ static int ll_ioc_copy_start(struct super_block *sb, struct hsm_copy *copy)
}
progress:
+ /* On error, the request should be considered as completed */
+ if (hpk.hpk_errval > 0)
+ hpk.hpk_flags |= HP_FLAG_COMPLETED;
rc = obd_iocontrol(LL_IOC_HSM_PROGRESS, sbi->ll_md_exp, sizeof(hpk),
&hpk, NULL);
@@ -997,8 +1022,7 @@ static int ll_ioc_copy_end(struct super_block *sb, struct hsm_copy *copy)
goto progress;
}
- rc = ll_data_version(inode, &data_version,
- copy->hc_hai.hai_action == HSMA_ARCHIVE);
+ rc = ll_data_version(inode, &data_version, LL_DV_RD_FLUSH);
iput(inode);
if (rc) {
CDEBUG(D_HSM, "Could not read file data version. Request could not be confirmed.\n");
@@ -1033,7 +1057,6 @@ static int ll_ioc_copy_end(struct super_block *sb, struct hsm_copy *copy)
/* hpk_errval must be >= 0 */
hpk.hpk_errval = EBUSY;
}
-
}
progress:
@@ -1049,17 +1072,11 @@ static int copy_and_ioctl(int cmd, struct obd_export *exp,
void *copy;
int rc;
- copy = kzalloc(size, GFP_NOFS);
- if (!copy)
- return -ENOMEM;
-
- if (copy_from_user(copy, data, size)) {
- rc = -EFAULT;
- goto out;
- }
+ copy = memdup_user(data, size);
+ if (IS_ERR(copy))
+ return PTR_ERR(copy);
rc = obd_iocontrol(cmd, exp, size, copy, NULL);
-out:
kfree(copy);
return rc;
@@ -1080,8 +1097,7 @@ static int quotactl_ioctl(struct ll_sb_info *sbi, struct if_quotactl *qctl)
case Q_QUOTAOFF:
case Q_SETQUOTA:
case Q_SETINFO:
- if (!capable(CFS_CAP_SYS_ADMIN) ||
- sbi->ll_flags & LL_SBI_RMT_CLIENT)
+ if (!capable(CFS_CAP_SYS_ADMIN))
return -EPERM;
break;
case Q_GETQUOTA:
@@ -1089,8 +1105,7 @@ static int quotactl_ioctl(struct ll_sb_info *sbi, struct if_quotactl *qctl)
!uid_eq(current_euid(), make_kuid(&init_user_ns, id))) ||
(type == GRPQUOTA &&
!in_egroup_p(make_kgid(&init_user_ns, id)))) &&
- (!capable(CFS_CAP_SYS_ADMIN) ||
- sbi->ll_flags & LL_SBI_RMT_CLIENT))
+ !capable(CFS_CAP_SYS_ADMIN))
return -EPERM;
break;
case Q_GETINFO:
@@ -1101,9 +1116,6 @@ static int quotactl_ioctl(struct ll_sb_info *sbi, struct if_quotactl *qctl)
}
if (valid != QC_GENERAL) {
- if (sbi->ll_flags & LL_SBI_RMT_CLIENT)
- return -EOPNOTSUPP;
-
if (cmd == Q_GETINFO)
qctl->qc_cmd = Q_GETOINFO;
else if (cmd == Q_GETQUOTA)
@@ -1242,8 +1254,8 @@ static long ll_dir_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
struct obd_ioctl_data *data;
int rc = 0;
- CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), cmd=%#x\n",
- inode->i_ino, inode->i_generation, inode, cmd);
+ CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p), cmd=%#x\n",
+ PFID(ll_inode2fid(inode)), inode, cmd);
/* asm-ppc{,64} declares TCGETS, et. al. as type 't' not 'T' */
if (_IOC_TYPE(cmd) == 'T' || _IOC_TYPE(cmd) == 't') /* tty ioctls */
@@ -1362,7 +1374,6 @@ out_free:
lmv_out_free:
obd_ioctl_freedata(buf, len);
return rc;
-
}
case LL_IOC_LOV_SETSTRIPE: {
struct lov_user_md_v3 lumv3;
@@ -1474,8 +1485,9 @@ free_lmv:
cmd == LL_IOC_MDC_GETINFO)) {
rc = 0;
goto skip_lmm;
- } else
+ } else {
goto out_req;
+ }
}
if (cmd == IOC_MDC_GETFILESTRIPE ||
@@ -1511,7 +1523,9 @@ skip_lmm:
st.st_atime = body->atime;
st.st_mtime = body->mtime;
st.st_ctime = body->ctime;
- st.st_ino = inode->i_ino;
+ st.st_ino = cl_fid_build_ino(&body->fid1,
+ sbi->ll_flags &
+ LL_SBI_32BIT_API);
lmdp = (struct lov_user_mds_data __user *)arg;
if (copy_to_user(&lmdp->lmd_st, &st, sizeof(st))) {
@@ -1604,8 +1618,7 @@ free_lmm:
struct obd_quotactl *oqctl;
int error = 0;
- if (!capable(CFS_CAP_SYS_ADMIN) ||
- sbi->ll_flags & LL_SBI_RMT_CLIENT)
+ if (!capable(CFS_CAP_SYS_ADMIN))
return -EPERM;
oqctl = kzalloc(sizeof(*oqctl), GFP_NOFS);
@@ -1628,8 +1641,7 @@ free_lmm:
case OBD_IOC_POLL_QUOTACHECK: {
struct if_quotacheck *check;
- if (!capable(CFS_CAP_SYS_ADMIN) ||
- sbi->ll_flags & LL_SBI_RMT_CLIENT)
+ if (!capable(CFS_CAP_SYS_ADMIN))
return -EPERM;
check = kzalloc(sizeof(*check), GFP_NOFS);
@@ -1686,19 +1698,6 @@ out_quotactl:
return ll_get_obd_name(inode, cmd, arg);
case LL_IOC_FLUSHCTX:
return ll_flush_ctx(inode);
-#ifdef CONFIG_FS_POSIX_ACL
- case LL_IOC_RMTACL: {
- if (sbi->ll_flags & LL_SBI_RMT_CLIENT && is_root_inode(inode)) {
- struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
-
- rc = rct_add(&sbi->ll_rct, current_pid(), arg);
- if (!rc)
- fd->fd_flags |= LL_FILE_RMTACL;
- return rc;
- } else
- return 0;
- }
-#endif
case LL_IOC_GETOBDCOUNT: {
int count, vallen;
struct obd_export *exp;
@@ -1817,6 +1816,9 @@ out_quotactl:
return rc;
}
case LL_IOC_HSM_CT_START:
+ if (!capable(CFS_CAP_SYS_ADMIN))
+ return -EPERM;
+
rc = copy_and_ioctl(cmd, sbi->ll_md_exp, (void __user *)arg,
sizeof(struct lustre_kernelcomm));
return rc;
@@ -1865,7 +1867,6 @@ static loff_t ll_dir_seek(struct file *file, loff_t offset, int origin)
int api32 = ll_need_32bit_api(sbi);
loff_t ret = -EINVAL;
- inode_lock(inode);
switch (origin) {
case SEEK_SET:
break;
@@ -1903,7 +1904,6 @@ static loff_t ll_dir_seek(struct file *file, loff_t offset, int origin)
goto out;
out:
- inode_unlock(inode);
return ret;
}
@@ -1922,7 +1922,7 @@ const struct file_operations ll_dir_operations = {
.open = ll_dir_open,
.release = ll_dir_release,
.read = generic_read_dir,
- .iterate = ll_readdir,
+ .iterate_shared = ll_readdir,
.unlocked_ioctl = ll_dir_ioctl,
.fsync = ll_fsync,
};
diff --git a/drivers/staging/lustre/lustre/llite/file.c b/drivers/staging/lustre/lustre/llite/file.c
index cf619af3caf5..57281b9e31ff 100644
--- a/drivers/staging/lustre/lustre/llite/file.c
+++ b/drivers/staging/lustre/lustre/llite/file.c
@@ -15,11 +15,7 @@
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
*
* GPL HEADER END
*/
@@ -45,6 +41,7 @@
#include "../include/lustre_lite.h"
#include <linux/pagemap.h>
#include <linux/file.h>
+#include <linux/mount.h>
#include "llite_internal.h"
#include "../include/lustre/ll_fiemap.h"
@@ -87,8 +84,7 @@ void ll_pack_inode2opdata(struct inode *inode, struct md_op_data *op_data,
op_data->op_attr.ia_ctime = inode->i_ctime;
op_data->op_attr.ia_size = i_size_read(inode);
op_data->op_attr_blocks = inode->i_blocks;
- ((struct ll_iattr *)&op_data->op_attr)->ia_attr_flags =
- ll_inode_to_ext_flags(inode->i_flags);
+ op_data->op_attr_flags = ll_inode_to_ext_flags(inode->i_flags);
op_data->op_ioepoch = ll_i2info(inode)->lli_ioepoch;
if (fh)
op_data->op_handle = *fh;
@@ -170,13 +166,15 @@ static int ll_close_inode_openhandle(struct obd_export *md_exp,
*/
rc = ll_som_update(inode, op_data);
if (rc) {
- CERROR("inode %lu mdc Size-on-MDS update failed: rc = %d\n",
- inode->i_ino, rc);
+ CERROR("%s: inode "DFID" mdc Size-on-MDS update failed: rc = %d\n",
+ ll_i2mdexp(inode)->exp_obd->obd_name,
+ PFID(ll_inode2fid(inode)), rc);
rc = 0;
}
} else if (rc) {
- CERROR("inode %lu mdc close failed: rc = %d\n",
- inode->i_ino, rc);
+ CERROR("%s: inode "DFID" mdc close failed: rc = %d\n",
+ ll_i2mdexp(inode)->exp_obd->obd_name,
+ PFID(ll_inode2fid(inode)), rc);
}
/* DATA_MODIFIED flag was successfully sent on close, cancel data
@@ -278,7 +276,7 @@ static int ll_md_close(struct obd_export *md_exp, struct inode *inode,
/* clear group lock, if present */
if (unlikely(fd->fd_flags & LL_FILE_GROUP_LOCKED))
- ll_put_grouplock(inode, file, fd->fd_grouplock.cg_gid);
+ ll_put_grouplock(inode, file, fd->fd_grouplock.lg_gid);
if (fd->fd_lease_och) {
bool lease_broken;
@@ -343,20 +341,8 @@ int ll_file_release(struct inode *inode, struct file *file)
struct ll_inode_info *lli = ll_i2info(inode);
int rc;
- CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n", inode->i_ino,
- inode->i_generation, inode);
-
-#ifdef CONFIG_FS_POSIX_ACL
- if (sbi->ll_flags & LL_SBI_RMT_CLIENT && is_root_inode(inode)) {
- struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
-
- if (unlikely(fd->fd_flags & LL_FILE_RMTACL)) {
- fd->fd_flags &= ~LL_FILE_RMTACL;
- rct_del(&sbi->ll_rct, current_pid());
- et_search_free(&sbi->ll_et, current_pid());
- }
- }
-#endif
+ CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p)\n",
+ PFID(ll_inode2fid(inode)), inode);
if (!is_root_inode(inode))
ll_stats_ops_tally(sbi, LPROC_LL_RELEASE, 1);
@@ -413,7 +399,19 @@ static int ll_intent_file_open(struct dentry *dentry, void *lmm,
* parameters. No need for the open lock
*/
if (!lmm && lmmsize == 0) {
- itp->it_flags |= MDS_OPEN_LOCK;
+ struct ll_dentry_data *ldd = ll_d2d(dentry);
+ /*
+ * If we came via ll_iget_for_nfs, then we need to request
+ * struct ll_dentry_data *ldd = ll_d2d(file->f_dentry);
+ *
+ * NB: when ldd is NULL, it must have come via normal
+ * lookup path only, since ll_iget_for_nfs always calls
+ * ll_d_init().
+ */
+ if (ldd && ldd->lld_nfs_dentry) {
+ ldd->lld_nfs_dentry = 0;
+ itp->it_flags |= MDS_OPEN_LOCK;
+ }
if (itp->it_flags & FMODE_WRITE)
opc = LUSTRE_OPC_CREATE;
}
@@ -451,7 +449,7 @@ static int ll_intent_file_open(struct dentry *dentry, void *lmm,
}
rc = ll_prep_inode(&inode, req, NULL, itp);
- if (!rc && itp->d.lustre.it_lock_mode)
+ if (!rc && itp->it_lock_mode)
ll_set_lock_data(sbi->ll_md_exp, inode, itp, NULL);
out:
@@ -478,13 +476,12 @@ void ll_ioepoch_open(struct ll_inode_info *lli, __u64 ioepoch)
static int ll_och_fill(struct obd_export *md_exp, struct lookup_intent *it,
struct obd_client_handle *och)
{
- struct ptlrpc_request *req = it->d.lustre.it_data;
struct mdt_body *body;
- body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
+ body = req_capsule_server_get(&it->it_request->rq_pill, &RMF_MDT_BODY);
och->och_fh = body->handle;
och->och_fid = body->fid1;
- och->och_lease_handle.cookie = it->d.lustre.it_lock_handle;
+ och->och_lease_handle.cookie = it->it_lock_handle;
och->och_magic = OBD_CLIENT_HANDLE_MAGIC;
och->och_flags = it->it_flags;
@@ -502,7 +499,6 @@ static int ll_local_open(struct file *file, struct lookup_intent *it,
LASSERT(fd);
if (och) {
- struct ptlrpc_request *req = it->d.lustre.it_data;
struct mdt_body *body;
int rc;
@@ -510,13 +506,19 @@ static int ll_local_open(struct file *file, struct lookup_intent *it,
if (rc != 0)
return rc;
- body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
+ body = req_capsule_server_get(&it->it_request->rq_pill,
+ &RMF_MDT_BODY);
ll_ioepoch_open(lli, body->ioepoch);
}
LUSTRE_FPRIVATE(file) = fd;
ll_readahead_init(inode, &fd->fd_ras);
fd->fd_omode = it->it_flags & (FMODE_READ | FMODE_WRITE | FMODE_EXEC);
+
+ /* ll_cl_context initialize */
+ rwlock_init(&fd->fd_lock);
+ INIT_LIST_HEAD(&fd->fd_lccs);
+
return 0;
}
@@ -543,8 +545,8 @@ int ll_file_open(struct inode *inode, struct file *file)
struct ll_file_data *fd;
int rc = 0, opendir_set = 0;
- CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), flags %o\n", inode->i_ino,
- inode->i_generation, inode, file->f_flags);
+ CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p), flags %o\n",
+ PFID(ll_inode2fid(inode)), inode, file->f_flags);
it = file->private_data; /* XXX: compat macro */
file->private_data = NULL; /* prevent ll_local_open assertion */
@@ -572,7 +574,7 @@ int ll_file_open(struct inode *inode, struct file *file)
return 0;
}
- if (!it || !it->d.lustre.it_disposition) {
+ if (!it || !it->it_disposition) {
/* Convert f_flags into access mode. We cannot use file->f_mode,
* because everything but O_ACCMODE mask was stripped from
* there
@@ -642,7 +644,7 @@ restart:
}
} else {
LASSERT(*och_usecount == 0);
- if (!it->d.lustre.it_disposition) {
+ if (!it->it_disposition) {
/* We cannot just request lock handle now, new ELC code
* means that one of other OPEN locks for this file
* could be cancelled, and since blocking ast handler
@@ -677,7 +679,9 @@ restart:
if (rc)
goto out_och_free;
- LASSERT(it_disposition(it, DISP_ENQ_OPEN_REF));
+ LASSERTF(it_disposition(it, DISP_ENQ_OPEN_REF),
+ "inode %p: disposition %x, status %d\n", inode,
+ it_disposition(it, ~0), it->it_status);
rc = ll_local_open(file, it, fd, *och_p);
if (rc)
@@ -720,7 +724,7 @@ out_openerr:
}
if (it && it_disposition(it, DISP_ENQ_OPEN_REF)) {
- ptlrpc_req_finished(it->d.lustre.it_data);
+ ptlrpc_req_finished(it->it_request);
it_clear_disposition(it, DISP_ENQ_OPEN_REF);
}
@@ -861,12 +865,12 @@ ll_lease_open(struct inode *inode, struct file *file, fmode_t fmode,
/* already get lease, handle lease lock */
ll_set_lock_data(sbi->ll_md_exp, inode, &it, NULL);
- if (it.d.lustre.it_lock_mode == 0 ||
- it.d.lustre.it_lock_bits != MDS_INODELOCK_OPEN) {
+ if (it.it_lock_mode == 0 ||
+ it.it_lock_bits != MDS_INODELOCK_OPEN) {
/* open lock must return for lease */
CERROR(DFID "lease granted but no open lock, %d/%llu.\n",
- PFID(ll_inode2fid(inode)), it.d.lustre.it_lock_mode,
- it.d.lustre.it_lock_bits);
+ PFID(ll_inode2fid(inode)), it.it_lock_mode,
+ it.it_lock_bits);
rc = -EPROTO;
goto out_close;
}
@@ -875,16 +879,19 @@ ll_lease_open(struct inode *inode, struct file *file, fmode_t fmode,
return och;
out_close:
- rc2 = ll_close_inode_openhandle(sbi->ll_md_exp, inode, och, NULL);
- if (rc2)
- CERROR("Close openhandle returned %d\n", rc2);
-
- /* cancel open lock */
- if (it.d.lustre.it_lock_mode != 0) {
+ /* Cancel open lock */
+ if (it.it_lock_mode != 0) {
ldlm_lock_decref_and_cancel(&och->och_lease_handle,
- it.d.lustre.it_lock_mode);
- it.d.lustre.it_lock_mode = 0;
+ it.it_lock_mode);
+ it.it_lock_mode = 0;
+ och->och_lease_handle.cookie = 0ULL;
}
+ rc2 = ll_close_inode_openhandle(sbi->ll_md_exp, inode, och, NULL);
+ if (rc2 < 0)
+ CERROR("%s: error closing file "DFID": %d\n",
+ ll_get_fsname(inode->i_sb, NULL, 0),
+ PFID(&ll_i2info(inode)->lli_fid), rc2);
+ och = NULL; /* och has been freed in ll_close_inode_openhandle() */
out_release_it:
ll_intent_release(&it);
out:
@@ -908,7 +915,7 @@ static int ll_lease_close(struct obd_client_handle *och, struct inode *inode,
lock_res_and_lock(lock);
cancelled = ldlm_is_cancel(lock);
unlock_res_and_lock(lock);
- ldlm_lock_put(lock);
+ LDLM_LOCK_PUT(lock);
}
CDEBUG(D_INODE, "lease for " DFID " broken? %d\n",
@@ -926,7 +933,7 @@ static int ll_lease_close(struct obd_client_handle *och, struct inode *inode,
/* Fills the obdo with the attributes for the lsm */
static int ll_lsm_getattr(struct lov_stripe_md *lsm, struct obd_export *exp,
- struct obdo *obdo, __u64 ioepoch, int sync)
+ struct obdo *obdo, __u64 ioepoch, int dv_flags)
{
struct ptlrpc_request_set *set;
struct obd_info oinfo = { };
@@ -945,9 +952,11 @@ static int ll_lsm_getattr(struct lov_stripe_md *lsm, struct obd_export *exp,
OBD_MD_FLMTIME | OBD_MD_FLCTIME |
OBD_MD_FLGROUP | OBD_MD_FLEPOCH |
OBD_MD_FLDATAVERSION;
- if (sync) {
+ if (dv_flags & (LL_DV_WR_FLUSH | LL_DV_RD_FLUSH)) {
oinfo.oi_oa->o_valid |= OBD_MD_FLFLAGS;
oinfo.oi_oa->o_flags |= OBD_FL_SRVLOCK;
+ if (dv_flags & LL_DV_WR_FLUSH)
+ oinfo.oi_oa->o_flags |= OBD_FL_FLUSH;
}
set = ptlrpc_prep_set();
@@ -960,11 +969,16 @@ static int ll_lsm_getattr(struct lov_stripe_md *lsm, struct obd_export *exp,
rc = ptlrpc_set_wait(set);
ptlrpc_set_destroy(set);
}
- if (rc == 0)
+ if (rc == 0) {
oinfo.oi_oa->o_valid &= (OBD_MD_FLBLOCKS | OBD_MD_FLBLKSZ |
OBD_MD_FLATIME | OBD_MD_FLMTIME |
OBD_MD_FLCTIME | OBD_MD_FLSIZE |
- OBD_MD_FLDATAVERSION);
+ OBD_MD_FLDATAVERSION | OBD_MD_FLFLAGS);
+ if (dv_flags & LL_DV_WR_FLUSH &&
+ !(oinfo.oi_oa->o_valid & OBD_MD_FLFLAGS &&
+ oinfo.oi_oa->o_flags & OBD_FL_FLUSH))
+ return -ENOTSUPP;
+ }
return rc;
}
@@ -980,7 +994,7 @@ int ll_inode_getattr(struct inode *inode, struct obdo *obdo,
lsm = ccc_inode_lsm_get(inode);
rc = ll_lsm_getattr(lsm, ll_i2dtexp(inode),
- obdo, ioepoch, sync);
+ obdo, ioepoch, sync ? LL_DV_RD_FLUSH : 0);
if (rc == 0) {
struct ost_id *oi = lsm ? &lsm->lsm_oi : &obdo->o_oi;
@@ -994,50 +1008,57 @@ int ll_inode_getattr(struct inode *inode, struct obdo *obdo,
return rc;
}
-int ll_merge_lvb(const struct lu_env *env, struct inode *inode)
+int ll_merge_attr(const struct lu_env *env, struct inode *inode)
{
struct ll_inode_info *lli = ll_i2info(inode);
struct cl_object *obj = lli->lli_clob;
- struct cl_attr *attr = ccc_env_thread_attr(env);
- struct ost_lvb lvb;
+ struct cl_attr *attr = vvp_env_thread_attr(env);
+ s64 atime;
+ s64 mtime;
+ s64 ctime;
int rc = 0;
ll_inode_size_lock(inode);
+
/* merge timestamps the most recently obtained from mds with
* timestamps obtained from osts
*/
- LTIME_S(inode->i_atime) = lli->lli_lvb.lvb_atime;
- LTIME_S(inode->i_mtime) = lli->lli_lvb.lvb_mtime;
- LTIME_S(inode->i_ctime) = lli->lli_lvb.lvb_ctime;
+ LTIME_S(inode->i_atime) = lli->lli_atime;
+ LTIME_S(inode->i_mtime) = lli->lli_mtime;
+ LTIME_S(inode->i_ctime) = lli->lli_ctime;
- lvb.lvb_size = i_size_read(inode);
- lvb.lvb_blocks = inode->i_blocks;
- lvb.lvb_mtime = LTIME_S(inode->i_mtime);
- lvb.lvb_atime = LTIME_S(inode->i_atime);
- lvb.lvb_ctime = LTIME_S(inode->i_ctime);
+ mtime = LTIME_S(inode->i_mtime);
+ atime = LTIME_S(inode->i_atime);
+ ctime = LTIME_S(inode->i_ctime);
cl_object_attr_lock(obj);
rc = cl_object_attr_get(env, obj, attr);
cl_object_attr_unlock(obj);
- if (rc == 0) {
- if (lvb.lvb_atime < attr->cat_atime)
- lvb.lvb_atime = attr->cat_atime;
- if (lvb.lvb_ctime < attr->cat_ctime)
- lvb.lvb_ctime = attr->cat_ctime;
- if (lvb.lvb_mtime < attr->cat_mtime)
- lvb.lvb_mtime = attr->cat_mtime;
+ if (rc != 0)
+ goto out_size_unlock;
- CDEBUG(D_VFSTRACE, DFID " updating i_size %llu\n",
- PFID(&lli->lli_fid), attr->cat_size);
- cl_isize_write_nolock(inode, attr->cat_size);
+ if (atime < attr->cat_atime)
+ atime = attr->cat_atime;
- inode->i_blocks = attr->cat_blocks;
+ if (ctime < attr->cat_ctime)
+ ctime = attr->cat_ctime;
- LTIME_S(inode->i_mtime) = lvb.lvb_mtime;
- LTIME_S(inode->i_atime) = lvb.lvb_atime;
- LTIME_S(inode->i_ctime) = lvb.lvb_ctime;
- }
+ if (mtime < attr->cat_mtime)
+ mtime = attr->cat_mtime;
+
+ CDEBUG(D_VFSTRACE, DFID " updating i_size %llu\n",
+ PFID(&lli->lli_fid), attr->cat_size);
+
+ i_size_write(inode, attr->cat_size);
+
+ inode->i_blocks = attr->cat_blocks;
+
+ LTIME_S(inode->i_mtime) = mtime;
+ LTIME_S(inode->i_atime) = atime;
+ LTIME_S(inode->i_ctime) = ctime;
+
+out_size_unlock:
ll_inode_size_unlock(inode);
return rc;
@@ -1120,47 +1141,50 @@ ll_file_io_generic(const struct lu_env *env, struct vvp_io_args *args,
struct cl_io *io;
ssize_t result;
+ CDEBUG(D_VFSTRACE, "file: %s, type: %d ppos: %llu, count: %zd\n",
+ file->f_path.dentry->d_name.name, iot, *ppos, count);
+
restart:
- io = ccc_env_thread_io(env);
+ io = vvp_env_thread_io(env);
ll_io_init(io, file, iot == CIT_WRITE);
if (cl_io_rw_init(env, io, iot, *ppos, count) == 0) {
struct vvp_io *vio = vvp_env_io(env);
- struct ccc_io *cio = ccc_env_io(env);
int write_mutex_locked = 0;
- cio->cui_fd = LUSTRE_FPRIVATE(file);
- vio->cui_io_subtype = args->via_io_subtype;
+ vio->vui_fd = LUSTRE_FPRIVATE(file);
+ vio->vui_io_subtype = args->via_io_subtype;
- switch (vio->cui_io_subtype) {
+ switch (vio->vui_io_subtype) {
case IO_NORMAL:
- cio->cui_iter = args->u.normal.via_iter;
- cio->cui_iocb = args->u.normal.via_iocb;
+ vio->vui_iter = args->u.normal.via_iter;
+ vio->vui_iocb = args->u.normal.via_iocb;
if ((iot == CIT_WRITE) &&
- !(cio->cui_fd->fd_flags & LL_FILE_GROUP_LOCKED)) {
+ !(vio->vui_fd->fd_flags & LL_FILE_GROUP_LOCKED)) {
if (mutex_lock_interruptible(&lli->
lli_write_mutex)) {
result = -ERESTARTSYS;
goto out;
}
write_mutex_locked = 1;
- } else if (iot == CIT_READ) {
- down_read(&lli->lli_trunc_sem);
}
+ down_read(&lli->lli_trunc_sem);
break;
case IO_SPLICE:
- vio->u.splice.cui_pipe = args->u.splice.via_pipe;
- vio->u.splice.cui_flags = args->u.splice.via_flags;
+ vio->u.splice.vui_pipe = args->u.splice.via_pipe;
+ vio->u.splice.vui_flags = args->u.splice.via_flags;
break;
default:
- CERROR("Unknown IO type - %u\n", vio->cui_io_subtype);
+ CERROR("Unknown IO type - %u\n", vio->vui_io_subtype);
LBUG();
}
+ ll_cl_add(file, env, io);
result = cl_io_loop(env, io);
+ ll_cl_remove(file, env);
+ if (args->via_io_subtype == IO_NORMAL)
+ up_read(&lli->lli_trunc_sem);
if (write_mutex_locked)
mutex_unlock(&lli->lli_write_mutex);
- else if (args->via_io_subtype == IO_NORMAL && iot == CIT_READ)
- up_read(&lli->lli_trunc_sem);
} else {
/* cl_io_rw_init() handled IO */
result = io->ci_result;
@@ -1197,6 +1221,7 @@ out:
fd->fd_write_failed = true;
}
}
+ CDEBUG(D_VFSTRACE, "iot: %d, result: %zd\n", iot, result);
return result;
}
@@ -1212,7 +1237,7 @@ static ssize_t ll_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
if (IS_ERR(env))
return PTR_ERR(env);
- args = vvp_env_args(env, IO_NORMAL);
+ args = ll_env_args(env, IO_NORMAL);
args->u.normal.via_iter = to;
args->u.normal.via_iocb = iocb;
@@ -1236,7 +1261,7 @@ static ssize_t ll_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
if (IS_ERR(env))
return PTR_ERR(env);
- args = vvp_env_args(env, IO_NORMAL);
+ args = ll_env_args(env, IO_NORMAL);
args->u.normal.via_iter = from;
args->u.normal.via_iocb = iocb;
@@ -1262,7 +1287,7 @@ static ssize_t ll_file_splice_read(struct file *in_file, loff_t *ppos,
if (IS_ERR(env))
return PTR_ERR(env);
- args = vvp_env_args(env, IO_SPLICE);
+ args = ll_env_args(env, IO_SPLICE);
args->u.splice.via_pipe = pipe;
args->u.splice.via_flags = flags;
@@ -1354,7 +1379,8 @@ static int ll_lov_recreate_fid(struct inode *inode, unsigned long arg)
}
int ll_lov_setstripe_ea_info(struct inode *inode, struct dentry *dentry,
- int flags, struct lov_user_md *lum, int lum_size)
+ __u64 flags, struct lov_user_md *lum,
+ int lum_size)
{
struct lov_stripe_md *lsm = NULL;
struct lookup_intent oit = {.it_op = IT_OPEN, .it_flags = flags};
@@ -1363,8 +1389,8 @@ int ll_lov_setstripe_ea_info(struct inode *inode, struct dentry *dentry,
lsm = ccc_inode_lsm_get(inode);
if (lsm) {
ccc_inode_lsm_put(inode, lsm);
- CDEBUG(D_IOCTL, "stripe already exists for ino %lu\n",
- inode->i_ino);
+ CDEBUG(D_IOCTL, "stripe already exists for inode "DFID"\n",
+ PFID(ll_inode2fid(inode)));
rc = -EEXIST;
goto out;
}
@@ -1373,7 +1399,7 @@ int ll_lov_setstripe_ea_info(struct inode *inode, struct dentry *dentry,
rc = ll_intent_file_open(dentry, lum, lum_size, &oit);
if (rc)
goto out_unlock;
- rc = oit.d.lustre.it_status;
+ rc = oit.it_status;
if (rc < 0)
goto out_req_free;
@@ -1386,7 +1412,7 @@ out_unlock:
out:
return rc;
out_req_free:
- ptlrpc_req_finished((struct ptlrpc_request *) oit.d.lustre.it_data);
+ ptlrpc_req_finished((struct ptlrpc_request *)oit.it_request);
goto out;
}
@@ -1478,7 +1504,7 @@ out:
static int ll_lov_setea(struct inode *inode, struct file *file,
unsigned long arg)
{
- int flags = MDS_OPEN_HAS_OBJS | FMODE_WRITE;
+ __u64 flags = MDS_OPEN_HAS_OBJS | FMODE_WRITE;
struct lov_user_md *lump;
int lum_size = sizeof(struct lov_user_md) +
sizeof(struct lov_user_ost_data);
@@ -1512,7 +1538,7 @@ static int ll_lov_setstripe(struct inode *inode, struct file *file,
struct lov_user_md_v1 __user *lumv1p = (void __user *)arg;
struct lov_user_md_v3 __user *lumv3p = (void __user *)arg;
int lum_size, rc;
- int flags = FMODE_WRITE;
+ __u64 flags = FMODE_WRITE;
/* first try with v1 which is smaller than v3 */
lum_size = sizeof(struct lov_user_md_v1);
@@ -1561,7 +1587,7 @@ ll_get_grouplock(struct inode *inode, struct file *file, unsigned long arg)
{
struct ll_inode_info *lli = ll_i2info(inode);
struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
- struct ccc_grouplock grouplock;
+ struct ll_grouplock grouplock;
int rc;
if (arg == 0) {
@@ -1575,14 +1601,14 @@ ll_get_grouplock(struct inode *inode, struct file *file, unsigned long arg)
spin_lock(&lli->lli_lock);
if (fd->fd_flags & LL_FILE_GROUP_LOCKED) {
CWARN("group lock already existed with gid %lu\n",
- fd->fd_grouplock.cg_gid);
+ fd->fd_grouplock.lg_gid);
spin_unlock(&lli->lli_lock);
return -EINVAL;
}
- LASSERT(!fd->fd_grouplock.cg_lock);
+ LASSERT(!fd->fd_grouplock.lg_lock);
spin_unlock(&lli->lli_lock);
- rc = cl_get_grouplock(cl_i2info(inode)->lli_clob,
+ rc = cl_get_grouplock(ll_i2info(inode)->lli_clob,
arg, (file->f_flags & O_NONBLOCK), &grouplock);
if (rc)
return rc;
@@ -1608,7 +1634,7 @@ static int ll_put_grouplock(struct inode *inode, struct file *file,
{
struct ll_inode_info *lli = ll_i2info(inode);
struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
- struct ccc_grouplock grouplock;
+ struct ll_grouplock grouplock;
spin_lock(&lli->lli_lock);
if (!(fd->fd_flags & LL_FILE_GROUP_LOCKED)) {
@@ -1616,11 +1642,11 @@ static int ll_put_grouplock(struct inode *inode, struct file *file,
CWARN("no group lock held\n");
return -EINVAL;
}
- LASSERT(fd->fd_grouplock.cg_lock);
+ LASSERT(fd->fd_grouplock.lg_lock);
- if (fd->fd_grouplock.cg_gid != arg) {
+ if (fd->fd_grouplock.lg_gid != arg) {
CWARN("group lock %lu doesn't match current id %lu\n",
- arg, fd->fd_grouplock.cg_gid);
+ arg, fd->fd_grouplock.lg_gid);
spin_unlock(&lli->lli_lock);
return -EINVAL;
}
@@ -1674,7 +1700,7 @@ int ll_release_openhandle(struct inode *inode, struct lookup_intent *it)
out:
/* this one is in place of ll_file_open */
if (it_disposition(it, DISP_ENQ_OPEN_REF)) {
- ptlrpc_req_finished(it->d.lustre.it_data);
+ ptlrpc_req_finished(it->it_request);
it_clear_disposition(it, DISP_ENQ_OPEN_REF);
}
return rc;
@@ -1861,11 +1887,12 @@ error:
* This value is computed using stripe object version on OST.
* Version is computed using server side locking.
*
- * @param extent_lock Take extent lock. Not needed if a process is already
- * holding the OST object group locks.
+ * @param sync if do sync on the OST side;
+ * 0: no sync
+ * LL_DV_RD_FLUSH: flush dirty pages, LCK_PR on OSTs
+ * LL_DV_WR_FLUSH: drop all caching pages, LCK_PW on OSTs
*/
-int ll_data_version(struct inode *inode, __u64 *data_version,
- int extent_lock)
+int ll_data_version(struct inode *inode, __u64 *data_version, int flags)
{
struct lov_stripe_md *lsm = NULL;
struct ll_sb_info *sbi = ll_i2sbi(inode);
@@ -1887,7 +1914,7 @@ int ll_data_version(struct inode *inode, __u64 *data_version,
goto out;
}
- rc = ll_lsm_getattr(lsm, sbi->ll_dt_exp, obdo, 0, extent_lock);
+ rc = ll_lsm_getattr(lsm, sbi->ll_dt_exp, obdo, 0, flags);
if (rc == 0) {
if (!(obdo->o_valid & OBD_MD_FLDATAVERSION))
rc = -EOPNOTSUPP;
@@ -1923,7 +1950,7 @@ int ll_hsm_release(struct inode *inode)
}
/* Grab latest data_version and [am]time values */
- rc = ll_data_version(inode, &data_version, 1);
+ rc = ll_data_version(inode, &data_version, LL_DV_WR_FLUSH);
if (rc != 0)
goto out;
@@ -1933,7 +1960,7 @@ int ll_hsm_release(struct inode *inode)
goto out;
}
- ll_merge_lvb(env, inode);
+ ll_merge_attr(env, inode);
cl_env_nested_put(&nest, env);
/* Release the file.
@@ -2227,8 +2254,8 @@ ll_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
int flags, rc;
- CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p),cmd=%x\n", inode->i_ino,
- inode->i_generation, inode, cmd);
+ CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p),cmd=%x\n",
+ PFID(ll_inode2fid(inode)), inode, cmd);
ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_IOCTL, 1);
/* asm-ppc{,64} declares TCGETS, et. al. as type 't' not 'T' */
@@ -2331,9 +2358,8 @@ ll_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
if (copy_from_user(&idv, (char __user *)arg, sizeof(idv)))
return -EFAULT;
- rc = ll_data_version(inode, &idv.idv_version,
- !(idv.idv_flags & LL_DV_NOFLUSH));
-
+ idv.idv_flags &= LL_DV_RD_FLUSH | LL_DV_WR_FLUSH;
+ rc = ll_data_version(inode, &idv.idv_version, idv.idv_flags);
if (rc == 0 && copy_to_user((char __user *)arg, &idv,
sizeof(idv)))
return -EFAULT;
@@ -2499,7 +2525,7 @@ ll_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
rc = och->och_flags &
(FMODE_READ | FMODE_WRITE);
unlock_res_and_lock(lock);
- ldlm_lock_put(lock);
+ LDLM_LOCK_PUT(lock);
}
}
mutex_unlock(&lli->lli_och_mutex);
@@ -2537,9 +2563,8 @@ static loff_t ll_file_seek(struct file *file, loff_t offset, int origin)
retval = offset + ((origin == SEEK_END) ? i_size_read(inode) :
(origin == SEEK_CUR) ? file->f_pos : 0);
- CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), to=%llu=%#llx(%d)\n",
- inode->i_ino, inode->i_generation, inode, retval, retval,
- origin);
+ CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p), to=%llu=%#llx(%d)\n",
+ PFID(ll_inode2fid(inode)), inode, retval, retval, origin);
ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_LLSEEK, 1);
if (origin == SEEK_END || origin == SEEK_HOLE || origin == SEEK_DATA) {
@@ -2603,8 +2628,8 @@ int cl_sync_file_range(struct inode *inode, loff_t start, loff_t end,
if (IS_ERR(env))
return PTR_ERR(env);
- io = ccc_env_thread_io(env);
- io->ci_obj = cl_i2info(inode)->lli_clob;
+ io = vvp_env_thread_io(env);
+ io->ci_obj = ll_i2info(inode)->lli_clob;
io->ci_ignore_layout = ignore_layout;
/* initialize parameters for sync */
@@ -2634,8 +2659,8 @@ int ll_fsync(struct file *file, loff_t start, loff_t end, int datasync)
struct ptlrpc_request *req;
int rc, err;
- CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n", inode->i_ino,
- inode->i_generation, inode);
+ CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p)\n",
+ PFID(ll_inode2fid(inode)), inode);
ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_FSYNC, 1);
rc = filemap_write_and_wait_range(inode->i_mapping, start, end);
@@ -2693,8 +2718,8 @@ ll_file_flock(struct file *file, int cmd, struct file_lock *file_lock)
int rc;
int rc2 = 0;
- CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu file_lock=%p\n",
- inode->i_ino, file_lock);
+ CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID" file_lock=%p\n",
+ PFID(ll_inode2fid(inode)), file_lock);
ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_FLOCK, 1);
@@ -2777,9 +2802,9 @@ ll_file_flock(struct file *file, int cmd, struct file_lock *file_lock)
if (IS_ERR(op_data))
return PTR_ERR(op_data);
- CDEBUG(D_DLMTRACE, "inode=%lu, pid=%u, flags=%#llx, mode=%u, start=%llu, end=%llu\n",
- inode->i_ino, flock.l_flock.pid, flags, einfo.ei_mode,
- flock.l_flock.start, flock.l_flock.end);
+ CDEBUG(D_DLMTRACE, "inode="DFID", pid=%u, flags=%#llx, mode=%u, start=%llu, end=%llu\n",
+ PFID(ll_inode2fid(inode)), flock.l_flock.pid, flags,
+ einfo.ei_mode, flock.l_flock.start, flock.l_flock.end);
rc = md_enqueue(sbi->ll_md_exp, &einfo, NULL,
op_data, &lockh, &flock, 0, NULL /* req */, flags);
@@ -2901,8 +2926,8 @@ static int __ll_inode_revalidate(struct dentry *dentry, __u64 ibits)
struct obd_export *exp;
int rc = 0;
- CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p),name=%pd\n",
- inode->i_ino, inode->i_generation, inode, dentry);
+ CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p),name=%pd\n",
+ PFID(ll_inode2fid(inode)), inode, dentry);
exp = ll_i2mdexp(inode);
@@ -2949,8 +2974,11 @@ static int __ll_inode_revalidate(struct dentry *dentry, __u64 ibits)
* here to preserve get_cwd functionality on 2.6.
* Bug 10503
*/
- if (!d_inode(dentry)->i_nlink)
+ if (!d_inode(dentry)->i_nlink) {
+ spin_lock(&inode->i_lock);
d_lustre_invalidate(dentry, 0);
+ spin_unlock(&inode->i_lock);
+ }
ll_lookup_finish_locks(&oit, inode);
} else if (!ll_have_md_lock(d_inode(dentry), &ibits, LCK_MINMODE)) {
@@ -2998,9 +3026,9 @@ static int ll_inode_revalidate(struct dentry *dentry, __u64 ibits)
/* if object isn't regular file, don't validate size */
if (!S_ISREG(inode->i_mode)) {
- LTIME_S(inode->i_atime) = ll_i2info(inode)->lli_lvb.lvb_atime;
- LTIME_S(inode->i_mtime) = ll_i2info(inode)->lli_lvb.lvb_mtime;
- LTIME_S(inode->i_ctime) = ll_i2info(inode)->lli_lvb.lvb_ctime;
+ LTIME_S(inode->i_atime) = ll_i2info(inode)->lli_atime;
+ LTIME_S(inode->i_mtime) = ll_i2info(inode)->lli_mtime;
+ LTIME_S(inode->i_ctime) = ll_i2info(inode)->lli_ctime;
} else {
/* In case of restore, the MDT has the right size and has
* already send it back without granting the layout lock,
@@ -3101,6 +3129,9 @@ struct posix_acl *ll_get_acl(struct inode *inode, int type)
spin_lock(&lli->lli_lock);
/* VFS' acl_permission_check->check_acl will release the refcount */
acl = posix_acl_dup(lli->lli_posix_acl);
+#ifdef CONFIG_FS_POSIX_ACL
+ forget_cached_acl(inode, type);
+#endif
spin_unlock(&lli->lli_lock);
return acl;
@@ -3124,11 +3155,8 @@ int ll_inode_permission(struct inode *inode, int mask)
return rc;
}
- CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), inode mode %x mask %o\n",
- inode->i_ino, inode->i_generation, inode, inode->i_mode, mask);
-
- if (ll_i2sbi(inode)->ll_flags & LL_SBI_RMT_CLIENT)
- return lustre_check_remote_perm(inode, mask);
+ CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p), inode mode %x mask %o\n",
+ PFID(ll_inode2fid(inode)), inode, inode->i_mode, mask);
ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_INODE_PERM, 1);
rc = generic_permission(inode, mask);
@@ -3335,10 +3363,10 @@ static int ll_layout_fetch(struct inode *inode, struct ldlm_lock *lock)
int rc;
CDEBUG(D_INODE, DFID" LVB_READY=%d l_lvb_data=%p l_lvb_len=%d\n",
- PFID(ll_inode2fid(inode)), !!(lock->l_flags & LDLM_FL_LVB_READY),
+ PFID(ll_inode2fid(inode)), ldlm_is_lvb_ready(lock),
lock->l_lvb_data, lock->l_lvb_len);
- if (lock->l_lvb_data && (lock->l_flags & LDLM_FL_LVB_READY))
+ if (lock->l_lvb_data && ldlm_is_lvb_ready(lock))
return 0;
/* if layout lock was granted right away, the layout is returned
@@ -3415,14 +3443,14 @@ static int ll_layout_lock_set(struct lustre_handle *lockh, enum ldlm_mode mode,
LASSERT(lock);
LASSERT(ldlm_has_layout(lock));
- LDLM_DEBUG(lock, "File %p/"DFID" being reconfigured: %d",
- inode, PFID(&lli->lli_fid), reconf);
+ LDLM_DEBUG(lock, "File "DFID"(%p) being reconfigured: %d",
+ PFID(&lli->lli_fid), inode, reconf);
/* in case this is a caching lock and reinstate with new inode */
md_set_lock_data(sbi->ll_md_exp, &lockh->cookie, inode, NULL);
lock_res_and_lock(lock);
- lvb_ready = !!(lock->l_flags & LDLM_FL_LVB_READY);
+ lvb_ready = ldlm_is_lvb_ready(lock);
unlock_res_and_lock(lock);
/* checking lvb_ready is racy but this is okay. The worst case is
* that multi processes may configure the file on the same time.
@@ -3487,9 +3515,9 @@ out:
/* wait for IO to complete if it's still being used. */
if (wait_layout) {
- CDEBUG(D_INODE, "%s: %p/" DFID " wait for layout reconf.\n",
+ CDEBUG(D_INODE, "%s: "DFID"(%p) wait for layout reconf\n",
ll_get_fsname(inode->i_sb, NULL, 0),
- inode, PFID(&lli->lli_fid));
+ PFID(&lli->lli_fid), inode);
memset(&conf, 0, sizeof(conf));
conf.coc_opc = OBJECT_CONF_WAIT;
@@ -3498,7 +3526,8 @@ out:
if (rc == 0)
rc = -EAGAIN;
- CDEBUG(D_INODE, "file: " DFID " waiting layout return: %d.\n",
+ CDEBUG(D_INODE, "%s: file="DFID" waiting layout return: %d.\n",
+ ll_get_fsname(inode->i_sb, NULL, 0),
PFID(&lli->lli_fid), rc);
}
return rc;
@@ -3571,19 +3600,19 @@ again:
it.it_op = IT_LAYOUT;
lockh.cookie = 0ULL;
- LDLM_DEBUG_NOLOCK("%s: requeue layout lock for file %p/" DFID "",
- ll_get_fsname(inode->i_sb, NULL, 0), inode,
- PFID(&lli->lli_fid));
+ LDLM_DEBUG_NOLOCK("%s: requeue layout lock for file "DFID"(%p)",
+ ll_get_fsname(inode->i_sb, NULL, 0),
+ PFID(&lli->lli_fid), inode);
rc = md_enqueue(sbi->ll_md_exp, &einfo, &it, op_data, &lockh,
NULL, 0, NULL, 0);
- ptlrpc_req_finished(it.d.lustre.it_data);
- it.d.lustre.it_data = NULL;
+ ptlrpc_req_finished(it.it_request);
+ it.it_request = NULL;
ll_finish_md_op_data(op_data);
- mode = it.d.lustre.it_lock_mode;
- it.d.lustre.it_lock_mode = 0;
+ mode = it.it_lock_mode;
+ it.it_lock_mode = 0;
ll_intent_drop_lock(&it);
if (rc == 0) {
@@ -3601,7 +3630,7 @@ again:
/**
* This function send a restore request to the MDT
*/
-int ll_layout_restore(struct inode *inode)
+int ll_layout_restore(struct inode *inode, loff_t offset, __u64 length)
{
struct hsm_user_request *hur;
int len, rc;
@@ -3617,9 +3646,10 @@ int ll_layout_restore(struct inode *inode)
hur->hur_request.hr_flags = 0;
memcpy(&hur->hur_user_item[0].hui_fid, &ll_i2info(inode)->lli_fid,
sizeof(hur->hur_user_item[0].hui_fid));
- hur->hur_user_item[0].hui_extent.length = -1;
+ hur->hur_user_item[0].hui_extent.offset = offset;
+ hur->hur_user_item[0].hui_extent.length = length;
hur->hur_request.hr_itemcount = 1;
- rc = obd_iocontrol(LL_IOC_HSM_REQUEST, cl_i2sbi(inode)->ll_md_exp,
+ rc = obd_iocontrol(LL_IOC_HSM_REQUEST, ll_i2sbi(inode)->ll_md_exp,
len, hur, NULL);
kfree(hur);
return rc;
diff --git a/drivers/staging/lustre/lustre/lclient/glimpse.c b/drivers/staging/lustre/lustre/llite/glimpse.c
index c4e8a0878ac8..92004a05f9ee 100644
--- a/drivers/staging/lustre/lustre/lclient/glimpse.c
+++ b/drivers/staging/lustre/lustre/llite/glimpse.c
@@ -15,11 +15,7 @@
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
*
* GPL HEADER END
*/
@@ -52,7 +48,6 @@
#include <linux/file.h>
#include "../include/cl_object.h"
-#include "../include/lclient.h"
#include "../llite/llite_internal.h"
static const struct cl_lock_descr whole_file = {
@@ -70,14 +65,14 @@ static const struct cl_lock_descr whole_file = {
blkcnt_t dirty_cnt(struct inode *inode)
{
blkcnt_t cnt = 0;
- struct ccc_object *vob = cl_inode2ccc(inode);
+ struct vvp_object *vob = cl_inode2vvp(inode);
void *results[1];
if (inode->i_mapping)
cnt += radix_tree_gang_lookup_tag(&inode->i_mapping->page_tree,
results, 0, 1,
PAGECACHE_TAG_DIRTY);
- if (cnt == 0 && atomic_read(&vob->cob_mmap_cnt) > 0)
+ if (cnt == 0 && atomic_read(&vob->vob_mmap_cnt) > 0)
cnt = 1;
return (cnt > 0) ? 1 : 0;
@@ -86,17 +81,17 @@ blkcnt_t dirty_cnt(struct inode *inode)
int cl_glimpse_lock(const struct lu_env *env, struct cl_io *io,
struct inode *inode, struct cl_object *clob, int agl)
{
- struct cl_lock_descr *descr = &ccc_env_info(env)->cti_descr;
- struct cl_inode_info *lli = cl_i2info(inode);
+ struct ll_inode_info *lli = ll_i2info(inode);
const struct lu_fid *fid = lu_object_fid(&clob->co_lu);
- struct ccc_io *cio = ccc_env_io(env);
- struct cl_lock *lock;
int result;
result = 0;
if (!(lli->lli_flags & LLIF_MDS_SIZE_LOCK)) {
- CDEBUG(D_DLMTRACE, "Glimpsing inode "DFID"\n", PFID(fid));
+ CDEBUG(D_DLMTRACE, "Glimpsing inode " DFID "\n", PFID(fid));
if (lli->lli_has_smd) {
+ struct cl_lock *lock = vvp_env_lock(env);
+ struct cl_lock_descr *descr = &lock->cll_descr;
+
/* NOTE: this looks like DLM lock request, but it may
* not be one. Due to CEF_ASYNC flag (translated
* to LDLM_FL_HAS_INTENT by osc), this is
@@ -113,11 +108,10 @@ int cl_glimpse_lock(const struct lu_env *env, struct cl_io *io,
*/
*descr = whole_file;
descr->cld_obj = clob;
- descr->cld_mode = CLM_PHANTOM;
+ descr->cld_mode = CLM_READ;
descr->cld_enq_flags = CEF_ASYNC | CEF_MUST;
if (agl)
descr->cld_enq_flags |= CEF_AGL;
- cio->cui_glimpse = 1;
/*
* CEF_ASYNC is used because glimpse sub-locks cannot
* deadlock (because they never conflict with other
@@ -126,21 +120,13 @@ int cl_glimpse_lock(const struct lu_env *env, struct cl_io *io,
* CEF_MUST protects glimpse lock from conversion into
* a lockless mode.
*/
- lock = cl_lock_request(env, io, descr, "glimpse",
- current);
- cio->cui_glimpse = 0;
-
- if (!lock)
- return 0;
+ result = cl_lock_request(env, io, lock);
+ if (result < 0)
+ return result;
- if (IS_ERR(lock))
- return PTR_ERR(lock);
-
- LASSERT(agl == 0);
- result = cl_wait(env, lock);
- if (result == 0) {
- cl_merge_lvb(env, inode);
- if (cl_isize_read(inode) > 0 &&
+ if (!agl) {
+ ll_merge_attr(env, inode);
+ if (i_size_read(inode) > 0 &&
inode->i_blocks == 0) {
/*
* LU-417: Add dirty pages block count
@@ -150,12 +136,11 @@ int cl_glimpse_lock(const struct lu_env *env, struct cl_io *io,
*/
inode->i_blocks = dirty_cnt(inode);
}
- cl_unuse(env, lock);
}
- cl_lock_release(env, lock, "glimpse", current);
+ cl_lock_release(env, lock);
} else {
CDEBUG(D_DLMTRACE, "No objects for inode\n");
- cl_merge_lvb(env, inode);
+ ll_merge_attr(env, inode);
}
}
@@ -167,22 +152,24 @@ static int cl_io_get(struct inode *inode, struct lu_env **envout,
{
struct lu_env *env;
struct cl_io *io;
- struct cl_inode_info *lli = cl_i2info(inode);
+ struct ll_inode_info *lli = ll_i2info(inode);
struct cl_object *clob = lli->lli_clob;
int result;
- if (S_ISREG(cl_inode_mode(inode))) {
+ if (S_ISREG(inode->i_mode)) {
env = cl_env_get(refcheck);
if (!IS_ERR(env)) {
- io = ccc_env_thread_io(env);
+ io = vvp_env_thread_io(env);
io->ci_obj = clob;
*envout = env;
*ioout = io;
result = 1;
- } else
+ } else {
result = PTR_ERR(env);
- } else
+ }
+ } else {
result = 0;
+ }
return result;
}
@@ -231,14 +218,11 @@ int cl_local_size(struct inode *inode)
{
struct lu_env *env = NULL;
struct cl_io *io = NULL;
- struct ccc_thread_info *cti;
struct cl_object *clob;
- struct cl_lock_descr *descr;
- struct cl_lock *lock;
int result;
int refcheck;
- if (!cl_i2info(inode)->lli_has_smd)
+ if (!ll_i2info(inode)->lli_has_smd)
return 0;
result = cl_io_get(inode, &env, &io, &refcheck);
@@ -247,22 +231,19 @@ int cl_local_size(struct inode *inode)
clob = io->ci_obj;
result = cl_io_init(env, io, CIT_MISC, clob);
- if (result > 0)
+ if (result > 0) {
result = io->ci_result;
- else if (result == 0) {
- cti = ccc_env_info(env);
- descr = &cti->cti_descr;
-
- *descr = whole_file;
- descr->cld_obj = clob;
- lock = cl_lock_peek(env, io, descr, "localsize", current);
- if (lock) {
- cl_merge_lvb(env, inode);
- cl_unuse(env, lock);
- cl_lock_release(env, lock, "localsize", current);
- result = 0;
- } else
- result = -ENODATA;
+ } else if (result == 0) {
+ struct cl_lock *lock = vvp_env_lock(env);
+
+ lock->cll_descr = whole_file;
+ lock->cll_descr.cld_enq_flags = CEF_PEEK;
+ lock->cll_descr.cld_obj = clob;
+ result = cl_lock_request(env, io, lock);
+ if (result == 0) {
+ ll_merge_attr(env, inode);
+ cl_lock_release(env, lock);
+ }
}
cl_io_fini(env, io);
cl_env_put(env, &refcheck);
diff --git a/drivers/staging/lustre/lustre/llite/lcommon_cl.c b/drivers/staging/lustre/lustre/llite/lcommon_cl.c
new file mode 100644
index 000000000000..396e4e4f0715
--- /dev/null
+++ b/drivers/staging/lustre/lustre/llite/lcommon_cl.c
@@ -0,0 +1,323 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.gnu.org/licenses/gpl-2.0.html
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2015, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * cl code shared between vvp and liblustre (and other Lustre clients in the
+ * future).
+ *
+ * Author: Nikita Danilov <nikita.danilov@sun.com>
+ */
+
+#define DEBUG_SUBSYSTEM S_LLITE
+
+#include "../../include/linux/libcfs/libcfs.h"
+# include <linux/fs.h>
+# include <linux/sched.h>
+# include <linux/mm.h>
+# include <linux/quotaops.h>
+# include <linux/highmem.h>
+# include <linux/pagemap.h>
+# include <linux/rbtree.h>
+
+#include "../include/obd.h"
+#include "../include/obd_support.h"
+#include "../include/lustre_fid.h"
+#include "../include/lustre_lite.h"
+#include "../include/lustre_dlm.h"
+#include "../include/lustre_ver.h"
+#include "../include/lustre_mdc.h"
+#include "../include/cl_object.h"
+
+#include "../llite/llite_internal.h"
+
+/*
+ * ccc_ prefix stands for "Common Client Code".
+ */
+
+/*****************************************************************************
+ *
+ * Vvp device and device type functions.
+ *
+ */
+
+/**
+ * An `emergency' environment used by cl_inode_fini() when cl_env_get()
+ * fails. Access to this environment is serialized by cl_inode_fini_guard
+ * mutex.
+ */
+struct lu_env *cl_inode_fini_env;
+int cl_inode_fini_refcheck;
+
+/**
+ * A mutex serializing calls to slp_inode_fini() under extreme memory
+ * pressure, when environments cannot be allocated.
+ */
+static DEFINE_MUTEX(cl_inode_fini_guard);
+
+int cl_setattr_ost(struct inode *inode, const struct iattr *attr)
+{
+ struct lu_env *env;
+ struct cl_io *io;
+ int result;
+ int refcheck;
+
+ env = cl_env_get(&refcheck);
+ if (IS_ERR(env))
+ return PTR_ERR(env);
+
+ io = vvp_env_thread_io(env);
+ io->ci_obj = ll_i2info(inode)->lli_clob;
+
+ io->u.ci_setattr.sa_attr.lvb_atime = LTIME_S(attr->ia_atime);
+ io->u.ci_setattr.sa_attr.lvb_mtime = LTIME_S(attr->ia_mtime);
+ io->u.ci_setattr.sa_attr.lvb_ctime = LTIME_S(attr->ia_ctime);
+ io->u.ci_setattr.sa_attr.lvb_size = attr->ia_size;
+ io->u.ci_setattr.sa_valid = attr->ia_valid;
+
+again:
+ if (cl_io_init(env, io, CIT_SETATTR, io->ci_obj) == 0) {
+ struct vvp_io *vio = vvp_env_io(env);
+
+ if (attr->ia_valid & ATTR_FILE)
+ /* populate the file descriptor for ftruncate to honor
+ * group lock - see LU-787
+ */
+ vio->vui_fd = LUSTRE_FPRIVATE(attr->ia_file);
+
+ result = cl_io_loop(env, io);
+ } else {
+ result = io->ci_result;
+ }
+ cl_io_fini(env, io);
+ if (unlikely(io->ci_need_restart))
+ goto again;
+ /* HSM import case: file is released, cannot be restored
+ * no need to fail except if restore registration failed
+ * with -ENODATA
+ */
+ if (result == -ENODATA && io->ci_restore_needed &&
+ io->ci_result != -ENODATA)
+ result = 0;
+ cl_env_put(env, &refcheck);
+ return result;
+}
+
+/**
+ * Initialize or update CLIO structures for regular files when new
+ * meta-data arrives from the server.
+ *
+ * \param inode regular file inode
+ * \param md new file metadata from MDS
+ * - allocates cl_object if necessary,
+ * - updated layout, if object was already here.
+ */
+int cl_file_inode_init(struct inode *inode, struct lustre_md *md)
+{
+ struct lu_env *env;
+ struct ll_inode_info *lli;
+ struct cl_object *clob;
+ struct lu_site *site;
+ struct lu_fid *fid;
+ struct cl_object_conf conf = {
+ .coc_inode = inode,
+ .u = {
+ .coc_md = md
+ }
+ };
+ int result = 0;
+ int refcheck;
+
+ LASSERT(md->body->valid & OBD_MD_FLID);
+ LASSERT(S_ISREG(inode->i_mode));
+
+ env = cl_env_get(&refcheck);
+ if (IS_ERR(env))
+ return PTR_ERR(env);
+
+ site = ll_i2sbi(inode)->ll_site;
+ lli = ll_i2info(inode);
+ fid = &lli->lli_fid;
+ LASSERT(fid_is_sane(fid));
+
+ if (!lli->lli_clob) {
+ /* clob is slave of inode, empty lli_clob means for new inode,
+ * there is no clob in cache with the given fid, so it is
+ * unnecessary to perform lookup-alloc-lookup-insert, just
+ * alloc and insert directly.
+ */
+ LASSERT(inode->i_state & I_NEW);
+ conf.coc_lu.loc_flags = LOC_F_NEW;
+ clob = cl_object_find(env, lu2cl_dev(site->ls_top_dev),
+ fid, &conf);
+ if (!IS_ERR(clob)) {
+ /*
+ * No locking is necessary, as new inode is
+ * locked by I_NEW bit.
+ */
+ lli->lli_clob = clob;
+ lli->lli_has_smd = lsm_has_objects(md->lsm);
+ lu_object_ref_add(&clob->co_lu, "inode", inode);
+ } else {
+ result = PTR_ERR(clob);
+ }
+ } else {
+ result = cl_conf_set(env, lli->lli_clob, &conf);
+ }
+
+ cl_env_put(env, &refcheck);
+
+ if (result != 0)
+ CERROR("Failure to initialize cl object " DFID ": %d\n",
+ PFID(fid), result);
+ return result;
+}
+
+/**
+ * Wait for others drop their references of the object at first, then we drop
+ * the last one, which will lead to the object be destroyed immediately.
+ * Must be called after cl_object_kill() against this object.
+ *
+ * The reason we want to do this is: destroying top object will wait for sub
+ * objects being destroyed first, so we can't let bottom layer (e.g. from ASTs)
+ * to initiate top object destroying which may deadlock. See bz22520.
+ */
+static void cl_object_put_last(struct lu_env *env, struct cl_object *obj)
+{
+ struct lu_object_header *header = obj->co_lu.lo_header;
+ wait_queue_t waiter;
+
+ if (unlikely(atomic_read(&header->loh_ref) != 1)) {
+ struct lu_site *site = obj->co_lu.lo_dev->ld_site;
+ struct lu_site_bkt_data *bkt;
+
+ bkt = lu_site_bkt_from_fid(site, &header->loh_fid);
+
+ init_waitqueue_entry(&waiter, current);
+ add_wait_queue(&bkt->lsb_marche_funebre, &waiter);
+
+ while (1) {
+ set_current_state(TASK_UNINTERRUPTIBLE);
+ if (atomic_read(&header->loh_ref) == 1)
+ break;
+ schedule();
+ }
+
+ set_current_state(TASK_RUNNING);
+ remove_wait_queue(&bkt->lsb_marche_funebre, &waiter);
+ }
+
+ cl_object_put(env, obj);
+}
+
+void cl_inode_fini(struct inode *inode)
+{
+ struct lu_env *env;
+ struct ll_inode_info *lli = ll_i2info(inode);
+ struct cl_object *clob = lli->lli_clob;
+ int refcheck;
+ int emergency;
+
+ if (clob) {
+ void *cookie;
+
+ cookie = cl_env_reenter();
+ env = cl_env_get(&refcheck);
+ emergency = IS_ERR(env);
+ if (emergency) {
+ mutex_lock(&cl_inode_fini_guard);
+ LASSERT(cl_inode_fini_env);
+ cl_env_implant(cl_inode_fini_env, &refcheck);
+ env = cl_inode_fini_env;
+ }
+ /*
+ * cl_object cache is a slave to inode cache (which, in turn
+ * is a slave to dentry cache), don't keep cl_object in memory
+ * when its master is evicted.
+ */
+ cl_object_kill(env, clob);
+ lu_object_ref_del(&clob->co_lu, "inode", inode);
+ cl_object_put_last(env, clob);
+ lli->lli_clob = NULL;
+ if (emergency) {
+ cl_env_unplant(cl_inode_fini_env, &refcheck);
+ mutex_unlock(&cl_inode_fini_guard);
+ } else {
+ cl_env_put(env, &refcheck);
+ }
+ cl_env_reexit(cookie);
+ }
+}
+
+/**
+ * build inode number from passed @fid
+ */
+__u64 cl_fid_build_ino(const struct lu_fid *fid, int api32)
+{
+ if (BITS_PER_LONG == 32 || api32)
+ return fid_flatten32(fid);
+ else
+ return fid_flatten(fid);
+}
+
+/**
+ * build inode generation from passed @fid. If our FID overflows the 32-bit
+ * inode number then return a non-zero generation to distinguish them.
+ */
+__u32 cl_fid_build_gen(const struct lu_fid *fid)
+{
+ __u32 gen;
+
+ if (fid_is_igif(fid)) {
+ gen = lu_igif_gen(fid);
+ return gen;
+ }
+
+ gen = fid_flatten(fid) >> 32;
+ return gen;
+}
+
+/* lsm is unreliable after hsm implementation as layout can be changed at
+ * any time. This is only to support old, non-clio-ized interfaces. It will
+ * cause deadlock if clio operations are called with this extra layout refcount
+ * because in case the layout changed during the IO, ll_layout_refresh() will
+ * have to wait for the refcount to become zero to destroy the older layout.
+ *
+ * Notice that the lsm returned by this function may not be valid unless called
+ * inside layout lock - MDS_INODELOCK_LAYOUT.
+ */
+struct lov_stripe_md *ccc_inode_lsm_get(struct inode *inode)
+{
+ return lov_lsm_get(ll_i2info(inode)->lli_clob);
+}
+
+inline void ccc_inode_lsm_put(struct inode *inode, struct lov_stripe_md *lsm)
+{
+ lov_lsm_put(ll_i2info(inode)->lli_clob, lsm);
+}
diff --git a/drivers/staging/lustre/lustre/lclient/lcommon_misc.c b/drivers/staging/lustre/lustre/llite/lcommon_misc.c
index d80bcedd78d1..f6be105eeef7 100644
--- a/drivers/staging/lustre/lustre/lclient/lcommon_misc.c
+++ b/drivers/staging/lustre/lustre/llite/lcommon_misc.c
@@ -15,11 +15,7 @@
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
*
* GPL HEADER END
*/
@@ -41,9 +37,9 @@
#include "../include/obd_support.h"
#include "../include/obd.h"
#include "../include/cl_object.h"
-#include "../include/lclient.h"
#include "../include/lustre_lite.h"
+#include "llite_internal.h"
/* Initialize the default and maximum LOV EA and cookie sizes. This allows
* us to make MDS RPCs with large enough reply buffers to hold the
@@ -100,7 +96,8 @@ int cl_ocd_update(struct obd_device *host,
__u64 flags;
int result;
- if (!strcmp(watched->obd_type->typ_name, LUSTRE_OSC_NAME)) {
+ if (!strcmp(watched->obd_type->typ_name, LUSTRE_OSC_NAME) &&
+ watched->obd_set_up && !watched->obd_stopping) {
cli = &watched->u.cli;
lco = owner;
flags = cli->cl_import->imp_connect_data.ocd_connect_flags;
@@ -115,9 +112,10 @@ int cl_ocd_update(struct obd_device *host,
mutex_unlock(&lco->lco_lock);
result = 0;
} else {
- CERROR("unexpected notification from %s %s!\n",
+ CERROR("unexpected notification from %s %s (setup:%d,stopping:%d)!\n",
watched->obd_type->typ_name,
- watched->obd_name);
+ watched->obd_name, watched->obd_set_up,
+ watched->obd_stopping);
result = -EINVAL;
}
return result;
@@ -126,7 +124,7 @@ int cl_ocd_update(struct obd_device *host,
#define GROUPLOCK_SCOPE "grouplock"
int cl_get_grouplock(struct cl_object *obj, unsigned long gid, int nonblock,
- struct ccc_grouplock *cg)
+ struct ll_grouplock *cg)
{
struct lu_env *env;
struct cl_io *io;
@@ -140,20 +138,22 @@ int cl_get_grouplock(struct cl_object *obj, unsigned long gid, int nonblock,
if (IS_ERR(env))
return PTR_ERR(env);
- io = ccc_env_thread_io(env);
+ io = vvp_env_thread_io(env);
io->ci_obj = obj;
io->ci_ignore_layout = 1;
rc = cl_io_init(env, io, CIT_MISC, io->ci_obj);
- if (rc) {
+ if (rc != 0) {
+ cl_io_fini(env, io);
+ cl_env_put(env, &refcheck);
/* Does not make sense to take GL for released layout */
if (rc > 0)
rc = -ENOTSUPP;
- cl_env_put(env, &refcheck);
return rc;
}
- descr = &ccc_env_info(env)->cti_descr;
+ lock = vvp_env_lock(env);
+ descr = &lock->cll_descr;
descr->cld_obj = obj;
descr->cld_start = 0;
descr->cld_end = CL_PAGE_EOF;
@@ -163,38 +163,37 @@ int cl_get_grouplock(struct cl_object *obj, unsigned long gid, int nonblock,
enqflags = CEF_MUST | (nonblock ? CEF_NONBLOCK : 0);
descr->cld_enq_flags = enqflags;
- lock = cl_lock_request(env, io, descr, GROUPLOCK_SCOPE, current);
- if (IS_ERR(lock)) {
+ rc = cl_lock_request(env, io, lock);
+ if (rc < 0) {
cl_io_fini(env, io);
cl_env_put(env, &refcheck);
- return PTR_ERR(lock);
+ return rc;
}
- cg->cg_env = cl_env_get(&refcheck);
- cg->cg_io = io;
- cg->cg_lock = lock;
- cg->cg_gid = gid;
- LASSERT(cg->cg_env == env);
+ cg->lg_env = cl_env_get(&refcheck);
+ cg->lg_io = io;
+ cg->lg_lock = lock;
+ cg->lg_gid = gid;
+ LASSERT(cg->lg_env == env);
cl_env_unplant(env, &refcheck);
return 0;
}
-void cl_put_grouplock(struct ccc_grouplock *cg)
+void cl_put_grouplock(struct ll_grouplock *cg)
{
- struct lu_env *env = cg->cg_env;
- struct cl_io *io = cg->cg_io;
- struct cl_lock *lock = cg->cg_lock;
+ struct lu_env *env = cg->lg_env;
+ struct cl_io *io = cg->lg_io;
+ struct cl_lock *lock = cg->lg_lock;
int refcheck;
- LASSERT(cg->cg_env);
- LASSERT(cg->cg_gid);
+ LASSERT(cg->lg_env);
+ LASSERT(cg->lg_gid);
cl_env_implant(env, &refcheck);
cl_env_put(env, &refcheck);
- cl_unuse(env, lock);
- cl_lock_release(env, lock, GROUPLOCK_SCOPE, current);
+ cl_lock_release(env, lock);
cl_io_fini(env, io);
cl_env_put(env, NULL);
}
diff --git a/drivers/staging/lustre/lustre/llite/llite_close.c b/drivers/staging/lustre/lustre/llite/llite_close.c
index a55ac4dccd90..2326b40a0870 100644
--- a/drivers/staging/lustre/lustre/llite/llite_close.c
+++ b/drivers/staging/lustre/lustre/llite/llite_close.c
@@ -15,11 +15,7 @@
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
*
* GPL HEADER END
*/
@@ -46,31 +42,31 @@
#include "llite_internal.h"
/** records that a write is in flight */
-void vvp_write_pending(struct ccc_object *club, struct ccc_page *page)
+void vvp_write_pending(struct vvp_object *club, struct vvp_page *page)
{
- struct ll_inode_info *lli = ll_i2info(club->cob_inode);
+ struct ll_inode_info *lli = ll_i2info(club->vob_inode);
spin_lock(&lli->lli_lock);
lli->lli_flags |= LLIF_SOM_DIRTY;
- if (page && list_empty(&page->cpg_pending_linkage))
- list_add(&page->cpg_pending_linkage, &club->cob_pending_list);
+ if (page && list_empty(&page->vpg_pending_linkage))
+ list_add(&page->vpg_pending_linkage, &club->vob_pending_list);
spin_unlock(&lli->lli_lock);
}
/** records that a write has completed */
-void vvp_write_complete(struct ccc_object *club, struct ccc_page *page)
+void vvp_write_complete(struct vvp_object *club, struct vvp_page *page)
{
- struct ll_inode_info *lli = ll_i2info(club->cob_inode);
+ struct ll_inode_info *lli = ll_i2info(club->vob_inode);
int rc = 0;
spin_lock(&lli->lli_lock);
- if (page && !list_empty(&page->cpg_pending_linkage)) {
- list_del_init(&page->cpg_pending_linkage);
+ if (page && !list_empty(&page->vpg_pending_linkage)) {
+ list_del_init(&page->vpg_pending_linkage);
rc = 1;
}
spin_unlock(&lli->lli_lock);
if (rc)
- ll_queue_done_writing(club->cob_inode, 0);
+ ll_queue_done_writing(club->vob_inode, 0);
}
/** Queues DONE_WRITING if
@@ -80,25 +76,25 @@ void vvp_write_complete(struct ccc_object *club, struct ccc_page *page)
void ll_queue_done_writing(struct inode *inode, unsigned long flags)
{
struct ll_inode_info *lli = ll_i2info(inode);
- struct ccc_object *club = cl2ccc(ll_i2info(inode)->lli_clob);
+ struct vvp_object *club = cl2vvp(ll_i2info(inode)->lli_clob);
spin_lock(&lli->lli_lock);
lli->lli_flags |= flags;
if ((lli->lli_flags & LLIF_DONE_WRITING) &&
- list_empty(&club->cob_pending_list)) {
+ list_empty(&club->vob_pending_list)) {
struct ll_close_queue *lcq = ll_i2sbi(inode)->ll_lcq;
if (lli->lli_flags & LLIF_MDS_SIZE_LOCK)
- CWARN("ino %lu/%u(flags %u) som valid it just after recovery\n",
- inode->i_ino, inode->i_generation,
- lli->lli_flags);
+ CWARN("%s: file "DFID"(flags %u) Size-on-MDS valid, done writing allowed and no diry pages\n",
+ ll_get_fsname(inode->i_sb, NULL, 0),
+ PFID(ll_inode2fid(inode)), lli->lli_flags);
/* DONE_WRITING is allowed and inode has no dirty page. */
spin_lock(&lcq->lcq_lock);
LASSERT(list_empty(&lli->lli_close_list));
- CDEBUG(D_INODE, "adding inode %lu/%u to close list\n",
- inode->i_ino, inode->i_generation);
+ CDEBUG(D_INODE, "adding inode "DFID" to close list\n",
+ PFID(ll_inode2fid(inode)));
list_add_tail(&lli->lli_close_list, &lcq->lcq_head);
/* Avoid a concurrent insertion into the close thread queue:
@@ -124,9 +120,9 @@ void ll_done_writing_attr(struct inode *inode, struct md_op_data *op_data)
op_data->op_flags |= MF_SOM_CHANGE;
/* Check if Size-on-MDS attributes are valid. */
if (lli->lli_flags & LLIF_MDS_SIZE_LOCK)
- CERROR("ino %lu/%u(flags %u) som valid it just after recovery\n",
- inode->i_ino, inode->i_generation,
- lli->lli_flags);
+ CERROR("%s: inode "DFID"(flags %u) MDS holds lock on Size-on-MDS attributes\n",
+ ll_get_fsname(inode->i_sb, NULL, 0),
+ PFID(ll_inode2fid(inode)), lli->lli_flags);
if (!cl_local_size(inode)) {
/* Send Size-on-MDS Attributes if valid. */
@@ -140,10 +136,10 @@ void ll_ioepoch_close(struct inode *inode, struct md_op_data *op_data,
struct obd_client_handle **och, unsigned long flags)
{
struct ll_inode_info *lli = ll_i2info(inode);
- struct ccc_object *club = cl2ccc(ll_i2info(inode)->lli_clob);
+ struct vvp_object *club = cl2vvp(ll_i2info(inode)->lli_clob);
spin_lock(&lli->lli_lock);
- if (!(list_empty(&club->cob_pending_list))) {
+ if (!(list_empty(&club->vob_pending_list))) {
if (!(lli->lli_flags & LLIF_EPOCH_PENDING)) {
LASSERT(*och);
LASSERT(!lli->lli_pending_och);
@@ -198,7 +194,7 @@ void ll_ioepoch_close(struct inode *inode, struct md_op_data *op_data,
}
}
- LASSERT(list_empty(&club->cob_pending_list));
+ LASSERT(list_empty(&club->vob_pending_list));
lli->lli_flags &= ~LLIF_SOM_DIRTY;
spin_unlock(&lli->lli_lock);
ll_done_writing_attr(inode, op_data);
@@ -221,9 +217,9 @@ int ll_som_update(struct inode *inode, struct md_op_data *op_data)
LASSERT(op_data);
if (lli->lli_flags & LLIF_MDS_SIZE_LOCK)
- CERROR("ino %lu/%u(flags %u) som valid it just after recovery\n",
- inode->i_ino, inode->i_generation,
- lli->lli_flags);
+ CERROR("%s: inode "DFID"(flags %u) MDS holds lock on Size-on-MDS attributes\n",
+ ll_get_fsname(inode->i_sb, NULL, 0),
+ PFID(ll_inode2fid(inode)), lli->lli_flags);
oa = kmem_cache_zalloc(obdo_cachep, GFP_NOFS);
if (!oa) {
@@ -241,9 +237,9 @@ int ll_som_update(struct inode *inode, struct md_op_data *op_data)
if (rc) {
oa->o_valid = 0;
if (rc != -ENOENT)
- CERROR("inode_getattr failed (%d): unable to send a Size-on-MDS attribute update for inode %lu/%u\n",
- rc, inode->i_ino,
- inode->i_generation);
+ CERROR("%s: inode_getattr failed - unable to send a Size-on-MDS attribute update for inode "DFID": rc = %d\n",
+ ll_get_fsname(inode->i_sb, NULL, 0),
+ PFID(ll_inode2fid(inode)), rc);
} else {
CDEBUG(D_INODE, "Size-on-MDS update on "DFID"\n",
PFID(&lli->lli_fid));
@@ -302,9 +298,11 @@ static void ll_done_writing(struct inode *inode)
* OSTs and send setattr to back to MDS.
*/
rc = ll_som_update(inode, op_data);
- else if (rc)
- CERROR("inode %lu mdc done_writing failed: rc = %d\n",
- inode->i_ino, rc);
+ else if (rc) {
+ CERROR("%s: inode "DFID" mdc done_writing failed: rc = %d\n",
+ ll_get_fsname(inode->i_sb, NULL, 0),
+ PFID(ll_inode2fid(inode)), rc);
+ }
out:
ll_finish_md_op_data(op_data);
if (och) {
@@ -323,8 +321,9 @@ static struct ll_inode_info *ll_close_next_lli(struct ll_close_queue *lcq)
lli = list_entry(lcq->lcq_head.next, struct ll_inode_info,
lli_close_list);
list_del_init(&lli->lli_close_list);
- } else if (atomic_read(&lcq->lcq_stop))
+ } else if (atomic_read(&lcq->lcq_stop)) {
lli = ERR_PTR(-EALREADY);
+ }
spin_unlock(&lcq->lcq_lock);
return lli;
@@ -348,8 +347,8 @@ static int ll_close_thread(void *arg)
break;
inode = ll_info2i(lli);
- CDEBUG(D_INFO, "done_writing for inode %lu/%u\n",
- inode->i_ino, inode->i_generation);
+ CDEBUG(D_INFO, "done_writing for inode "DFID"\n",
+ PFID(ll_inode2fid(inode)));
ll_done_writing(inode);
iput(inode);
}
diff --git a/drivers/staging/lustre/lustre/llite/llite_internal.h b/drivers/staging/lustre/lustre/llite/llite_internal.h
index e3c0f1dd4d31..4d6d589a1677 100644
--- a/drivers/staging/lustre/lustre/llite/llite_internal.h
+++ b/drivers/staging/lustre/lustre/llite/llite_internal.h
@@ -15,11 +15,7 @@
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
*
* GPL HEADER END
*/
@@ -43,11 +39,11 @@
/* for struct cl_lock_descr and struct cl_io */
#include "../include/cl_object.h"
-#include "../include/lclient.h"
#include "../include/lustre_mdc.h"
#include "../include/lustre_intent.h"
#include <linux/compat.h>
#include <linux/posix_acl_xattr.h>
+#include "vvp_internal.h"
#ifndef FMODE_EXEC
#define FMODE_EXEC 0
@@ -68,6 +64,7 @@ struct ll_dentry_data {
struct lookup_intent *lld_it;
unsigned int lld_sa_generation;
unsigned int lld_invalid:1;
+ unsigned int lld_nfs_dentry:1;
struct rcu_head lld_rcu_head;
};
@@ -76,9 +73,6 @@ struct ll_dentry_data {
#define LLI_INODE_MAGIC 0x111d0de5
#define LLI_INODE_DEAD 0xdeadd00d
-/* remote client permission cache */
-#define REMOTE_PERM_HASHSIZE 16
-
struct ll_getname_data {
struct dir_context ctx;
char *lgd_name; /* points to a buffer with NAME_MAX+1 size */
@@ -86,17 +80,11 @@ struct ll_getname_data {
int lgd_found; /* inode matched? */
};
-/* llite setxid/access permission for user on remote client */
-struct ll_remote_perm {
- struct hlist_node lrp_list;
- uid_t lrp_uid;
- gid_t lrp_gid;
- uid_t lrp_fsuid;
- gid_t lrp_fsgid;
- int lrp_access_perm; /* MAY_READ/WRITE/EXEC, this
- * is access permission with
- * lrp_fsuid/lrp_fsgid.
- */
+struct ll_grouplock {
+ struct lu_env *lg_env;
+ struct cl_io *lg_io;
+ struct cl_lock *lg_lock;
+ unsigned long lg_gid;
};
enum lli_flags {
@@ -126,9 +114,6 @@ struct ll_inode_info {
spinlock_t lli_lock;
struct posix_acl *lli_posix_acl;
- struct hlist_head *lli_remote_perms;
- struct mutex lli_rmtperm_mutex;
-
/* identifying fields for both metadata and data stacks. */
struct lu_fid lli_fid;
/* Parent fid for accessing default stripe data on parent directory
@@ -138,8 +123,6 @@ struct ll_inode_info {
struct list_head lli_close_list;
- unsigned long lli_rmtperm_time;
-
/* handle is to be sent to MDS later on done_writing and setattr.
* Open handle data are needed for the recovery to reconstruct
* the inode state on the MDS. XXX: recovery is not ready yet.
@@ -161,7 +144,9 @@ struct ll_inode_info {
struct inode lli_vfs_inode;
/* the most recent timestamps obtained from mds */
- struct ost_lvb lli_lvb;
+ s64 lli_atime;
+ s64 lli_mtime;
+ s64 lli_ctime;
spinlock_t lli_agl_lock;
/* Try to make the d::member and f::member are aligned. Before using
@@ -328,6 +313,7 @@ enum ra_stat {
RA_STAT_EOF,
RA_STAT_MAX_IN_FLIGHT,
RA_STAT_WRONG_GRAB_PAGE,
+ RA_STAT_FAILED_REACH_END,
_NR_RA_STAT,
};
@@ -401,7 +387,7 @@ enum stats_track_type {
#define LL_SBI_FLOCK 0x04
#define LL_SBI_USER_XATTR 0x08 /* support user xattr */
#define LL_SBI_ACL 0x10 /* support ACL */
-#define LL_SBI_RMT_CLIENT 0x40 /* remote client */
+/* LL_SBI_RMT_CLIENT 0x40 remote client */
#define LL_SBI_MDS_CAPA 0x80 /* support mds capa, obsolete */
#define LL_SBI_OSS_CAPA 0x100 /* support oss capa, obsolete */
#define LL_SBI_LOCALFLOCK 0x200 /* Local flocks support by kernel */
@@ -423,7 +409,7 @@ enum stats_track_type {
"xattr", \
"acl", \
"???", \
- "rmt_client", \
+ "???", \
"mds_capa", \
"oss_capa", \
"flock", \
@@ -439,26 +425,6 @@ enum stats_track_type {
"xattr", \
}
-#define RCE_HASHES 32
-
-struct rmtacl_ctl_entry {
- struct list_head rce_list;
- pid_t rce_key; /* hash key */
- int rce_ops; /* acl operation type */
-};
-
-struct rmtacl_ctl_table {
- spinlock_t rct_lock;
- struct list_head rct_entries[RCE_HASHES];
-};
-
-#define EE_HASHES 32
-
-struct eacl_table {
- spinlock_t et_lock;
- struct list_head et_entries[EE_HASHES];
-};
-
struct ll_sb_info {
/* this protects pglist and ra_info. It isn't safe to
* grab from interrupt contexts
@@ -481,7 +447,13 @@ struct ll_sb_info {
struct lprocfs_stats *ll_stats; /* lprocfs stats counter */
- struct cl_client_cache ll_cache;
+ /*
+ * Used to track "unstable" pages on a client, and maintain a
+ * LRU list of clean pages. An "unstable" page is defined as
+ * any page which is sent to a server as part of a bulk request,
+ * but is uncommitted to stable storage.
+ */
+ struct cl_client_cache *ll_cache;
struct lprocfs_stats *ll_ra_stats;
@@ -517,21 +489,12 @@ struct ll_sb_info {
dev_t ll_sdev_orig; /* save s_dev before assign for
* clustered nfs
*/
- struct rmtacl_ctl_table ll_rct;
- struct eacl_table ll_et;
__kernel_fsid_t ll_fsid;
struct kobject ll_kobj; /* sysfs object */
struct super_block *ll_sb; /* struct super_block (for sysfs code)*/
struct completion ll_kobj_unregister;
};
-struct ll_ra_read {
- pgoff_t lrr_start;
- pgoff_t lrr_count;
- struct task_struct *lrr_reader;
- struct list_head lrr_linkage;
-};
-
/*
* per file-descriptor read-ahead data.
*/
@@ -590,12 +553,6 @@ struct ll_readahead_state {
*/
unsigned long ras_request_index;
/*
- * list of struct ll_ra_read's one per read(2) call current in
- * progress against this file descriptor. Used by read-ahead code,
- * protected by ->ras_lock.
- */
- struct list_head ras_read_beads;
- /*
* The following 3 items are used for detecting the stride I/O
* mode.
* In stride I/O mode,
@@ -622,7 +579,7 @@ extern struct kmem_cache *ll_file_data_slab;
struct lustre_handle;
struct ll_file_data {
struct ll_readahead_state fd_ras;
- struct ccc_grouplock fd_grouplock;
+ struct ll_grouplock fd_grouplock;
__u64 lfd_pos;
__u32 fd_flags;
fmode_t fd_omode;
@@ -637,6 +594,8 @@ struct ll_file_data {
* false: unknown failure, should report.
*/
bool fd_write_failed;
+ rwlock_t fd_lock; /* protect lcc list */
+ struct list_head fd_lccs; /* list of ll_cl_context */
};
struct lov_stripe_md;
@@ -663,8 +622,16 @@ static inline int ll_need_32bit_api(struct ll_sb_info *sbi)
#endif
}
-void ll_ra_read_in(struct file *f, struct ll_ra_read *rar);
-void ll_ra_read_ex(struct file *f, struct ll_ra_read *rar);
+void ll_ras_enter(struct file *f);
+
+/* llite/lcommon_misc.c */
+int cl_init_ea_size(struct obd_export *md_exp, struct obd_export *dt_exp);
+int cl_ocd_update(struct obd_device *host,
+ struct obd_device *watched,
+ enum obd_notify_event ev, void *owner, void *data);
+int cl_get_grouplock(struct cl_object *obj, unsigned long gid, int nonblock,
+ struct ll_grouplock *cg);
+void cl_put_grouplock(struct ll_grouplock *cg);
/* llite/lproc_llite.c */
int ldebugfs_register_mountpoint(struct dentry *parent,
@@ -697,15 +664,16 @@ int ll_md_blocking_ast(struct ldlm_lock *, struct ldlm_lock_desc *,
struct dentry *ll_splice_alias(struct inode *inode, struct dentry *de);
/* llite/rw.c */
-int ll_prepare_write(struct file *, struct page *, unsigned from, unsigned to);
-int ll_commit_write(struct file *, struct page *, unsigned from, unsigned to);
int ll_writepage(struct page *page, struct writeback_control *wbc);
int ll_writepages(struct address_space *, struct writeback_control *wbc);
int ll_readpage(struct file *file, struct page *page);
void ll_readahead_init(struct inode *inode, struct ll_readahead_state *ras);
int ll_readahead(const struct lu_env *env, struct cl_io *io,
- struct ll_readahead_state *ras, struct address_space *mapping,
- struct cl_page_list *queue, int flags);
+ struct cl_page_list *queue, struct ll_readahead_state *ras,
+ bool hit);
+struct ll_cl_context *ll_cl_find(struct file *file);
+void ll_cl_add(struct file *file, const struct lu_env *env, struct cl_io *io);
+void ll_cl_remove(struct file *file, const struct lu_env *env);
extern const struct address_space_operations ll_aops;
@@ -740,7 +708,7 @@ struct posix_acl *ll_get_acl(struct inode *inode, int type);
int ll_inode_permission(struct inode *inode, int mask);
int ll_lov_setstripe_ea_info(struct inode *inode, struct dentry *dentry,
- int flags, struct lov_user_md *lum,
+ __u64 flags, struct lov_user_md *lum,
int lum_size);
int ll_lov_getstripe_ea_info(struct inode *inode, const char *filename,
struct lov_mds_md **lmm, int *lmm_size,
@@ -750,9 +718,9 @@ int ll_dir_setstripe(struct inode *inode, struct lov_user_md *lump,
int ll_dir_getstripe(struct inode *inode, struct lov_mds_md **lmmp,
int *lmm_size, struct ptlrpc_request **request);
int ll_fsync(struct file *file, loff_t start, loff_t end, int data);
-int ll_merge_lvb(const struct lu_env *env, struct inode *inode);
+int ll_merge_attr(const struct lu_env *env, struct inode *inode);
int ll_fid2path(struct inode *inode, void __user *arg);
-int ll_data_version(struct inode *inode, __u64 *data_version, int extent_lock);
+int ll_data_version(struct inode *inode, __u64 *data_version, int flags);
int ll_hsm_release(struct inode *inode);
/* llite/dcache.c */
@@ -824,65 +792,8 @@ struct ll_close_queue {
atomic_t lcq_stop;
};
-struct ccc_object *cl_inode2ccc(struct inode *inode);
-
-void vvp_write_pending (struct ccc_object *club, struct ccc_page *page);
-void vvp_write_complete(struct ccc_object *club, struct ccc_page *page);
-
-/* specific architecture can implement only part of this list */
-enum vvp_io_subtype {
- /** normal IO */
- IO_NORMAL,
- /** io started from splice_{read|write} */
- IO_SPLICE
-};
-
-/* IO subtypes */
-struct vvp_io {
- /** io subtype */
- enum vvp_io_subtype cui_io_subtype;
-
- union {
- struct {
- struct pipe_inode_info *cui_pipe;
- unsigned int cui_flags;
- } splice;
- struct vvp_fault_io {
- /**
- * Inode modification time that is checked across DLM
- * lock request.
- */
- time64_t ft_mtime;
- struct vm_area_struct *ft_vma;
- /**
- * locked page returned from vvp_io
- */
- struct page *ft_vmpage;
- struct vm_fault_api {
- /**
- * kernel fault info
- */
- struct vm_fault *ft_vmf;
- /**
- * fault API used bitflags for return code.
- */
- unsigned int ft_flags;
- /**
- * check that flags are from filemap_fault
- */
- bool ft_flags_valid;
- } fault;
- } fault;
- } u;
- /**
- * Read-ahead state used by read and page-fault IO contexts.
- */
- struct ll_ra_read cui_bead;
- /**
- * Set when cui_bead has been initialized.
- */
- int cui_ra_window_set;
-};
+void vvp_write_pending(struct vvp_object *club, struct vvp_page *page);
+void vvp_write_complete(struct vvp_object *club, struct vvp_page *page);
/**
* IO arguments for various VFS I/O interfaces.
@@ -904,61 +815,39 @@ struct vvp_io_args {
};
struct ll_cl_context {
+ struct list_head lcc_list;
void *lcc_cookie;
+ const struct lu_env *lcc_env;
struct cl_io *lcc_io;
struct cl_page *lcc_page;
- struct lu_env *lcc_env;
- int lcc_refcheck;
};
-struct vvp_thread_info {
- struct vvp_io_args vti_args;
- struct ra_io_arg vti_ria;
- struct ll_cl_context vti_io_ctx;
+struct ll_thread_info {
+ struct vvp_io_args lti_args;
+ struct ra_io_arg lti_ria;
+ struct ll_cl_context lti_io_ctx;
};
-static inline struct vvp_thread_info *vvp_env_info(const struct lu_env *env)
+extern struct lu_context_key ll_thread_key;
+static inline struct ll_thread_info *ll_env_info(const struct lu_env *env)
{
- extern struct lu_context_key vvp_key;
- struct vvp_thread_info *info;
+ struct ll_thread_info *lti;
- info = lu_context_key_get(&env->le_ctx, &vvp_key);
- LASSERT(info);
- return info;
+ lti = lu_context_key_get(&env->le_ctx, &ll_thread_key);
+ LASSERT(lti);
+ return lti;
}
-static inline struct vvp_io_args *vvp_env_args(const struct lu_env *env,
- enum vvp_io_subtype type)
+static inline struct vvp_io_args *ll_env_args(const struct lu_env *env,
+ enum vvp_io_subtype type)
{
- struct vvp_io_args *ret = &vvp_env_info(env)->vti_args;
+ struct vvp_io_args *via = &ll_env_info(env)->lti_args;
- ret->via_io_subtype = type;
+ via->via_io_subtype = type;
- return ret;
+ return via;
}
-struct vvp_session {
- struct vvp_io vs_ios;
-};
-
-static inline struct vvp_session *vvp_env_session(const struct lu_env *env)
-{
- extern struct lu_context_key vvp_session_key;
- struct vvp_session *ses;
-
- ses = lu_context_key_get(env->le_ses, &vvp_session_key);
- LASSERT(ses);
- return ses;
-}
-
-static inline struct vvp_io *vvp_env_io(const struct lu_env *env)
-{
- return &vvp_env_session(env)->vs_ios;
-}
-
-int vvp_global_init(void);
-void vvp_global_fini(void);
-
void ll_queue_done_writing(struct inode *inode, unsigned long flags);
void ll_close_thread_shutdown(struct ll_close_queue *lcq);
int ll_close_thread_start(struct ll_close_queue **lcq_ret);
@@ -981,6 +870,10 @@ static inline void ll_invalidate_page(struct page *vmpage)
if (!mapping)
return;
+ /*
+ * truncate_complete_page() calls
+ * a_ops->invalidatepage()->cl_page_delete()->vvp_page_delete().
+ */
ll_teardown_mmaps(mapping, offset, offset + PAGE_SIZE);
truncate_complete_page(mapping, vmpage);
}
@@ -1040,24 +933,13 @@ static inline __u64 ll_file_maxbytes(struct inode *inode)
}
/* llite/xattr.c */
-int ll_setxattr(struct dentry *dentry, const char *name,
- const void *value, size_t size, int flags);
-ssize_t ll_getxattr(struct dentry *dentry, const char *name,
- void *buffer, size_t size);
+int ll_setxattr(struct dentry *dentry, struct inode *inode,
+ const char *name, const void *value, size_t size, int flags);
+ssize_t ll_getxattr(struct dentry *dentry, struct inode *inode,
+ const char *name, void *buffer, size_t size);
ssize_t ll_listxattr(struct dentry *dentry, char *buffer, size_t size);
int ll_removexattr(struct dentry *dentry, const char *name);
-/* llite/remote_perm.c */
-extern struct kmem_cache *ll_remote_perm_cachep;
-extern struct kmem_cache *ll_rmtperm_hash_cachep;
-
-void free_rmtperm_hash(struct hlist_head *hash);
-int ll_update_remote_perm(struct inode *inode, struct mdt_remote_perm *perm);
-int lustre_check_remote_perm(struct inode *inode, int mask);
-
-/* llite/llite_cl.c */
-extern struct lu_device_type vvp_device_type;
-
/**
* Common IO arguments for various VFS I/O interfaces.
*/
@@ -1069,42 +951,9 @@ void ras_update(struct ll_sb_info *sbi, struct inode *inode,
struct ll_readahead_state *ras, unsigned long index,
unsigned hit);
void ll_ra_count_put(struct ll_sb_info *sbi, unsigned long len);
-void ll_ra_stats_inc(struct address_space *mapping, enum ra_stat which);
-
-/* llite/llite_rmtacl.c */
-#ifdef CONFIG_FS_POSIX_ACL
-struct eacl_entry {
- struct list_head ee_list;
- pid_t ee_key; /* hash key */
- struct lu_fid ee_fid;
- int ee_type; /* ACL type for ACCESS or DEFAULT */
- ext_acl_xattr_header *ee_acl;
-};
-
-u64 rce_ops2valid(int ops);
-struct rmtacl_ctl_entry *rct_search(struct rmtacl_ctl_table *rct, pid_t key);
-int rct_add(struct rmtacl_ctl_table *rct, pid_t key, int ops);
-int rct_del(struct rmtacl_ctl_table *rct, pid_t key);
-void rct_init(struct rmtacl_ctl_table *rct);
-void rct_fini(struct rmtacl_ctl_table *rct);
-
-void ee_free(struct eacl_entry *ee);
-int ee_add(struct eacl_table *et, pid_t key, struct lu_fid *fid, int type,
- ext_acl_xattr_header *header);
-struct eacl_entry *et_search_del(struct eacl_table *et, pid_t key,
- struct lu_fid *fid, int type);
-void et_search_free(struct eacl_table *et, pid_t key);
-void et_init(struct eacl_table *et);
-void et_fini(struct eacl_table *et);
-#else
-static inline u64 rce_ops2valid(int ops)
-{
- return 0;
-}
-#endif
+void ll_ra_stats_inc(struct inode *inode, enum ra_stat which);
/* statahead.c */
-
#define LL_SA_RPC_MIN 2
#define LL_SA_RPC_DEF 32
#define LL_SA_RPC_MAX 8192
@@ -1163,6 +1012,22 @@ int do_statahead_enter(struct inode *dir, struct dentry **dentry,
int only_unplug);
void ll_stop_statahead(struct inode *dir, void *key);
+blkcnt_t dirty_cnt(struct inode *inode);
+
+int cl_glimpse_size0(struct inode *inode, int agl);
+int cl_glimpse_lock(const struct lu_env *env, struct cl_io *io,
+ struct inode *inode, struct cl_object *clob, int agl);
+
+static inline int cl_glimpse_size(struct inode *inode)
+{
+ return cl_glimpse_size0(inode, 0);
+}
+
+static inline int cl_agl(struct inode *inode)
+{
+ return cl_glimpse_size0(inode, 1);
+}
+
static inline int ll_glimpse_size(struct inode *inode)
{
struct ll_inode_info *lli = ll_i2info(inode);
@@ -1285,43 +1150,6 @@ typedef enum llioc_iter (*llioc_callback_t)(struct inode *inode,
void *ll_iocontrol_register(llioc_callback_t cb, int count, unsigned int *cmd);
void ll_iocontrol_unregister(void *magic);
-/* lclient compat stuff */
-#define cl_inode_info ll_inode_info
-#define cl_i2info(info) ll_i2info(info)
-#define cl_inode_mode(inode) ((inode)->i_mode)
-#define cl_i2sbi ll_i2sbi
-
-static inline struct ll_file_data *cl_iattr2fd(struct inode *inode,
- const struct iattr *attr)
-{
- LASSERT(attr->ia_valid & ATTR_FILE);
- return LUSTRE_FPRIVATE(attr->ia_file);
-}
-
-static inline void cl_isize_write_nolock(struct inode *inode, loff_t kms)
-{
- LASSERT(mutex_is_locked(&ll_i2info(inode)->lli_size_mutex));
- i_size_write(inode, kms);
-}
-
-static inline void cl_isize_write(struct inode *inode, loff_t kms)
-{
- ll_inode_size_lock(inode);
- i_size_write(inode, kms);
- ll_inode_size_unlock(inode);
-}
-
-#define cl_isize_read(inode) i_size_read(inode)
-
-static inline int cl_merge_lvb(const struct lu_env *env, struct inode *inode)
-{
- return ll_merge_lvb(env, inode);
-}
-
-#define cl_inode_atime(inode) LTIME_S((inode)->i_atime)
-#define cl_inode_ctime(inode) LTIME_S((inode)->i_ctime)
-#define cl_inode_mtime(inode) LTIME_S((inode)->i_mtime)
-
int cl_sync_file_range(struct inode *inode, loff_t start, loff_t end,
enum cl_fsync_mode mode, int ignore_layout);
@@ -1350,7 +1178,7 @@ static inline void cl_stats_tally(struct cl_device *dev, enum cl_req_type crt,
int opc = (crt == CRT_READ) ? LPROC_LL_OSC_READ :
LPROC_LL_OSC_WRITE;
- ll_stats_ops_tally(ll_s2sbi(cl2ccc_dev(dev)->cdv_sb), opc, rc);
+ ll_stats_ops_tally(ll_s2sbi(cl2vvp_dev(dev)->vdv_sb), opc, rc);
}
ssize_t ll_direct_rw_pages(const struct lu_env *env, struct cl_io *io,
@@ -1369,7 +1197,7 @@ static inline int ll_file_nolock(const struct file *file)
static inline void ll_set_lock_data(struct obd_export *exp, struct inode *inode,
struct lookup_intent *it, __u64 *bits)
{
- if (!it->d.lustre.it_lock_set) {
+ if (!it->it_lock_set) {
struct lustre_handle handle;
/* If this inode is a remote object, it will get two
@@ -1380,38 +1208,26 @@ static inline void ll_set_lock_data(struct obd_export *exp, struct inode *inode,
* LOOKUP and PERM locks, so revoking either locks will
* case the dcache being cleared
*/
- if (it->d.lustre.it_remote_lock_mode) {
- handle.cookie = it->d.lustre.it_remote_lock_handle;
- CDEBUG(D_DLMTRACE, "setting l_data to inode %p(%lu/%u) for remote lock %#llx\n",
- inode,
- inode->i_ino, inode->i_generation,
+ if (it->it_remote_lock_mode) {
+ handle.cookie = it->it_remote_lock_handle;
+ CDEBUG(D_DLMTRACE, "setting l_data to inode "DFID"%p for remote lock %#llx\n",
+ PFID(ll_inode2fid(inode)), inode,
handle.cookie);
md_set_lock_data(exp, &handle.cookie, inode, NULL);
}
- handle.cookie = it->d.lustre.it_lock_handle;
+ handle.cookie = it->it_lock_handle;
- CDEBUG(D_DLMTRACE, "setting l_data to inode %p (%lu/%u) for lock %#llx\n",
- inode, inode->i_ino,
- inode->i_generation, handle.cookie);
+ CDEBUG(D_DLMTRACE, "setting l_data to inode "DFID"%p for lock %#llx\n",
+ PFID(ll_inode2fid(inode)), inode, handle.cookie);
md_set_lock_data(exp, &handle.cookie, inode,
- &it->d.lustre.it_lock_bits);
- it->d.lustre.it_lock_set = 1;
+ &it->it_lock_bits);
+ it->it_lock_set = 1;
}
if (bits)
- *bits = it->d.lustre.it_lock_bits;
-}
-
-static inline void ll_lock_dcache(struct inode *inode)
-{
- spin_lock(&inode->i_lock);
-}
-
-static inline void ll_unlock_dcache(struct inode *inode)
-{
- spin_unlock(&inode->i_lock);
+ *bits = it->it_lock_bits;
}
static inline int d_lustre_invalid(const struct dentry *dentry)
@@ -1471,9 +1287,25 @@ enum {
int ll_layout_conf(struct inode *inode, const struct cl_object_conf *conf);
int ll_layout_refresh(struct inode *inode, __u32 *gen);
-int ll_layout_restore(struct inode *inode);
+int ll_layout_restore(struct inode *inode, loff_t start, __u64 length);
int ll_xattr_init(void);
void ll_xattr_fini(void);
+int ll_page_sync_io(const struct lu_env *env, struct cl_io *io,
+ struct cl_page *page, enum cl_req_type crt);
+
+/* lcommon_cl.c */
+int cl_setattr_ost(struct inode *inode, const struct iattr *attr);
+
+extern struct lu_env *cl_inode_fini_env;
+extern int cl_inode_fini_refcheck;
+
+int cl_file_inode_init(struct inode *inode, struct lustre_md *md);
+void cl_inode_fini(struct inode *inode);
+int cl_local_size(struct inode *inode);
+
+__u64 cl_fid_build_ino(const struct lu_fid *fid, int api32);
+__u32 cl_fid_build_gen(const struct lu_fid *fid);
+
#endif /* LLITE_INTERNAL_H */
diff --git a/drivers/staging/lustre/lustre/llite/llite_lib.c b/drivers/staging/lustre/lustre/llite/llite_lib.c
index b57a992688a8..546063e728db 100644
--- a/drivers/staging/lustre/lustre/llite/llite_lib.c
+++ b/drivers/staging/lustre/lustre/llite/llite_lib.c
@@ -15,11 +15,7 @@
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
*
* GPL HEADER END
*/
@@ -85,17 +81,13 @@ static struct ll_sb_info *ll_init_sbi(struct super_block *sb)
si_meminfo(&si);
pages = si.totalram - si.totalhigh;
- if (pages >> (20 - PAGE_SHIFT) < 512)
- lru_page_max = pages / 2;
- else
- lru_page_max = (pages / 4) * 3;
+ lru_page_max = pages / 2;
- /* initialize lru data */
- atomic_set(&sbi->ll_cache.ccc_users, 0);
- sbi->ll_cache.ccc_lru_max = lru_page_max;
- atomic_set(&sbi->ll_cache.ccc_lru_left, lru_page_max);
- spin_lock_init(&sbi->ll_cache.ccc_lru_lock);
- INIT_LIST_HEAD(&sbi->ll_cache.ccc_lru);
+ sbi->ll_cache = cl_cache_init(lru_page_max);
+ if (!sbi->ll_cache) {
+ kfree(sbi);
+ return NULL;
+ }
sbi->ll_ra_info.ra_max_pages_per_file = min(pages / 32,
SBI_DEFAULT_READAHEAD_MAX);
@@ -135,6 +127,11 @@ static void ll_free_sbi(struct super_block *sb)
{
struct ll_sb_info *sbi = ll_s2sbi(sb);
+ if (sbi->ll_cache) {
+ cl_cache_decref(sbi->ll_cache);
+ sbi->ll_cache = NULL;
+ }
+
kfree(sbi);
}
@@ -169,20 +166,14 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt,
return -ENOMEM;
}
- if (llite_root) {
- err = ldebugfs_register_mountpoint(llite_root, sb, dt, md);
- if (err < 0)
- CERROR("could not register mount in <debugfs>/lustre/llite\n");
- }
-
/* indicate the features supported by this client */
data->ocd_connect_flags = OBD_CONNECT_IBITS | OBD_CONNECT_NODEVOH |
OBD_CONNECT_ATTRFID |
OBD_CONNECT_VERSION | OBD_CONNECT_BRW_SIZE |
OBD_CONNECT_CANCELSET | OBD_CONNECT_FID |
OBD_CONNECT_AT | OBD_CONNECT_LOV_V3 |
- OBD_CONNECT_RMT_CLIENT | OBD_CONNECT_VBR |
- OBD_CONNECT_FULL20 | OBD_CONNECT_64BITHASH|
+ OBD_CONNECT_VBR | OBD_CONNECT_FULL20 |
+ OBD_CONNECT_64BITHASH |
OBD_CONNECT_EINPROGRESS |
OBD_CONNECT_JOBSTATS | OBD_CONNECT_LVB_TYPE |
OBD_CONNECT_LAYOUTLOCK |
@@ -223,8 +214,6 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt,
/* real client */
data->ocd_connect_flags |= OBD_CONNECT_REAL;
- if (sbi->ll_flags & LL_SBI_RMT_CLIENT)
- data->ocd_connect_flags |= OBD_CONNECT_RMT_CLIENT_FORCE;
data->ocd_brw_size = MD_MAX_BRW_SIZE;
@@ -317,18 +306,6 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt,
sbi->ll_flags &= ~LL_SBI_ACL;
}
- if (data->ocd_connect_flags & OBD_CONNECT_RMT_CLIENT) {
- if (!(sbi->ll_flags & LL_SBI_RMT_CLIENT)) {
- sbi->ll_flags |= LL_SBI_RMT_CLIENT;
- LCONSOLE_INFO("client is set as remote by default.\n");
- }
- } else {
- if (sbi->ll_flags & LL_SBI_RMT_CLIENT) {
- sbi->ll_flags &= ~LL_SBI_RMT_CLIENT;
- LCONSOLE_INFO("client claims to be remote, but server rejected, forced to be local.\n");
- }
- }
-
if (data->ocd_connect_flags & OBD_CONNECT_64BITHASH)
sbi->ll_flags |= LL_SBI_64BIT_HASH;
@@ -337,10 +314,8 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt,
else
sbi->ll_md_brw_size = PAGE_SIZE;
- if (data->ocd_connect_flags & OBD_CONNECT_LAYOUTLOCK) {
- LCONSOLE_INFO("Layout lock feature supported.\n");
+ if (data->ocd_connect_flags & OBD_CONNECT_LAYOUTLOCK)
sbi->ll_flags |= LL_SBI_LAYOUT_LOCK;
- }
if (data->ocd_ibits_known & MDS_INODELOCK_XATTR) {
if (!(data->ocd_connect_flags & OBD_CONNECT_MAX_EASIZE)) {
@@ -364,10 +339,9 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt,
OBD_CONNECT_REQPORTAL | OBD_CONNECT_BRW_SIZE |
OBD_CONNECT_CANCELSET | OBD_CONNECT_FID |
OBD_CONNECT_SRVLOCK | OBD_CONNECT_TRUNCLOCK|
- OBD_CONNECT_AT | OBD_CONNECT_RMT_CLIENT |
- OBD_CONNECT_OSS_CAPA | OBD_CONNECT_VBR|
- OBD_CONNECT_FULL20 | OBD_CONNECT_64BITHASH |
- OBD_CONNECT_MAXBYTES |
+ OBD_CONNECT_AT | OBD_CONNECT_OSS_CAPA |
+ OBD_CONNECT_VBR | OBD_CONNECT_FULL20 |
+ OBD_CONNECT_64BITHASH | OBD_CONNECT_MAXBYTES |
OBD_CONNECT_EINPROGRESS |
OBD_CONNECT_JOBSTATS | OBD_CONNECT_LVB_TYPE |
OBD_CONNECT_LAYOUTLOCK | OBD_CONNECT_PINGLESS;
@@ -390,8 +364,6 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt,
}
data->ocd_connect_flags |= OBD_CONNECT_LRU_RESIZE;
- if (sbi->ll_flags & LL_SBI_RMT_CLIENT)
- data->ocd_connect_flags |= OBD_CONNECT_RMT_CLIENT_FORCE;
CDEBUG(D_RPCTRACE, "ocd_connect_flags: %#llx ocd_version: %d ocd_grant: %d\n",
data->ocd_connect_flags,
@@ -453,10 +425,8 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt,
/* make root inode
* XXX: move this to after cbd setup?
*/
- valid = OBD_MD_FLGETATTR | OBD_MD_FLBLOCKS;
- if (sbi->ll_flags & LL_SBI_RMT_CLIENT)
- valid |= OBD_MD_FLRMTPERM;
- else if (sbi->ll_flags & LL_SBI_ACL)
+ valid = OBD_MD_FLGETATTR | OBD_MD_FLBLOCKS | OBD_MD_FLMODEASIZE;
+ if (sbi->ll_flags & LL_SBI_ACL)
valid |= OBD_MD_FLACL;
op_data = kzalloc(sizeof(*op_data), GFP_NOFS);
@@ -512,13 +482,6 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt,
goto out_root;
}
-#ifdef CONFIG_FS_POSIX_ACL
- if (sbi->ll_flags & LL_SBI_RMT_CLIENT) {
- rct_init(&sbi->ll_rct);
- et_init(&sbi->ll_et);
- }
-#endif
-
checksum = sbi->ll_flags & LL_SBI_CHECKSUM;
err = obd_set_info_async(NULL, sbi->ll_dt_exp, sizeof(KEY_CHECKSUM),
KEY_CHECKSUM, sizeof(checksum), &checksum,
@@ -526,8 +489,8 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt,
cl_sb_init(sb);
err = obd_set_info_async(NULL, sbi->ll_dt_exp, sizeof(KEY_CACHE_SET),
- KEY_CACHE_SET, sizeof(sbi->ll_cache),
- &sbi->ll_cache, NULL);
+ KEY_CACHE_SET, sizeof(*sbi->ll_cache),
+ sbi->ll_cache, NULL);
sb->s_root = d_make_root(root);
if (!sb->s_root) {
@@ -555,6 +518,15 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt,
kfree(data);
kfree(osfs);
+ if (llite_root) {
+ err = ldebugfs_register_mountpoint(llite_root, sb, dt, md);
+ if (err < 0) {
+ CERROR("%s: could not register mount in debugfs: "
+ "rc = %d\n", ll_get_fsname(sb, NULL, 0), err);
+ err = 0;
+ }
+ }
+
return err;
out_root:
iput(root);
@@ -563,8 +535,6 @@ out_lock_cn_cb:
out_dt:
obd_disconnect(sbi->ll_dt_exp);
sbi->ll_dt_exp = NULL;
- /* Make sure all OScs are gone, since cl_cache is accessing sbi. */
- obd_zombie_barrier();
out_md_fid:
obd_fid_fini(sbi->ll_md_exp->exp_obd);
out_md:
@@ -573,7 +543,6 @@ out_md:
out:
kfree(data);
kfree(osfs);
- ldebugfs_unregister_mountpoint(sbi);
return err;
}
@@ -608,13 +577,6 @@ static void client_common_put_super(struct super_block *sb)
{
struct ll_sb_info *sbi = ll_s2sbi(sb);
-#ifdef CONFIG_FS_POSIX_ACL
- if (sbi->ll_flags & LL_SBI_RMT_CLIENT) {
- et_fini(&sbi->ll_et);
- rct_fini(&sbi->ll_rct);
- }
-#endif
-
ll_close_thread_shutdown(sbi->ll_lcq);
cl_sb_fini(sb);
@@ -622,10 +584,6 @@ static void client_common_put_super(struct super_block *sb)
obd_fid_fini(sbi->ll_dt_exp->exp_obd);
obd_disconnect(sbi->ll_dt_exp);
sbi->ll_dt_exp = NULL;
- /* wait till all OSCs are gone, since cl_cache is accessing sbi.
- * see LU-2543.
- */
- obd_zombie_barrier();
ldebugfs_unregister_mountpoint(sbi);
@@ -704,11 +662,6 @@ static int ll_options(char *options, int *flags)
*flags &= ~tmp;
goto next;
}
- tmp = ll_set_opt("remote_client", s1, LL_SBI_RMT_CLIENT);
- if (tmp) {
- *flags |= tmp;
- goto next;
- }
tmp = ll_set_opt("user_fid2path", s1, LL_SBI_USER_FID2PATH);
if (tmp) {
*flags |= tmp;
@@ -792,12 +745,9 @@ void ll_lli_init(struct ll_inode_info *lli)
lli->lli_maxbytes = MAX_LFS_FILESIZE;
spin_lock_init(&lli->lli_lock);
lli->lli_posix_acl = NULL;
- lli->lli_remote_perms = NULL;
- mutex_init(&lli->lli_rmtperm_mutex);
/* Do not set lli_fid, it has been initialized already. */
fid_zero(&lli->lli_pfid);
INIT_LIST_HEAD(&lli->lli_close_list);
- lli->lli_rmtperm_time = 0;
lli->lli_pending_och = NULL;
lli->lli_mds_read_och = NULL;
lli->lli_mds_write_och = NULL;
@@ -864,7 +814,8 @@ int ll_fill_super(struct super_block *sb, struct vfsmount *mnt)
try_module_get(THIS_MODULE);
/* client additional sb info */
- lsi->lsi_llsbi = sbi = ll_init_sbi(sb);
+ sbi = ll_init_sbi(sb);
+ lsi->lsi_llsbi = sbi;
if (!sbi) {
module_put(THIS_MODULE);
kfree(cfg);
@@ -897,10 +848,8 @@ int ll_fill_super(struct super_block *sb, struct vfsmount *mnt)
cfg->cfg_callback = class_config_llog_handler;
/* set up client obds */
err = lustre_process_log(sb, profilenm, cfg);
- if (err < 0) {
- CERROR("Unable to process log: %d\n", err);
+ if (err < 0)
goto out_free;
- }
/* Profile set with LCFG_MOUNTOPT so we can find our mdc and osc obds */
lprof = class_get_profile(profilenm);
@@ -947,7 +896,7 @@ void ll_put_super(struct super_block *sb)
struct lustre_sb_info *lsi = s2lsi(sb);
struct ll_sb_info *sbi = ll_s2sbi(sb);
char *profilenm = get_profile_name(sb);
- int next, force = 1;
+ int ccc_count, next, force = 1, rc = 0;
CDEBUG(D_VFSTRACE, "VFS Op: sb %p - %s\n", sb, profilenm);
@@ -963,6 +912,19 @@ void ll_put_super(struct super_block *sb)
force = obd->obd_force;
}
+ /* Wait for unstable pages to be committed to stable storage */
+ if (!force) {
+ struct l_wait_info lwi = LWI_INTR(LWI_ON_SIGNAL_NOOP, NULL);
+
+ rc = l_wait_event(sbi->ll_cache->ccc_unstable_waitq,
+ !atomic_read(&sbi->ll_cache->ccc_unstable_nr),
+ &lwi);
+ }
+
+ ccc_count = atomic_read(&sbi->ll_cache->ccc_unstable_nr);
+ if (!force && rc != -EINTR)
+ LASSERTF(!ccc_count, "count: %i\n", ccc_count);
+
/* We need to set force before the lov_disconnect in
* lustre_common_put_super, since l_d cleans up osc's as well.
*/
@@ -999,6 +961,8 @@ void ll_put_super(struct super_block *sb)
lustre_common_put_super(sb);
+ cl_env_cache_purge(~0);
+
module_put(THIS_MODULE);
} /* client_put_super */
@@ -1032,8 +996,8 @@ void ll_clear_inode(struct inode *inode)
struct ll_inode_info *lli = ll_i2info(inode);
struct ll_sb_info *sbi = ll_i2sbi(inode);
- CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n", inode->i_ino,
- inode->i_generation, inode);
+ CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p)\n",
+ PFID(ll_inode2fid(inode)), inode);
if (S_ISDIR(inode->i_mode)) {
/* these should have been cleared in ll_file_release */
@@ -1065,17 +1029,9 @@ void ll_clear_inode(struct inode *inode)
ll_xattr_cache_destroy(inode);
- if (sbi->ll_flags & LL_SBI_RMT_CLIENT) {
- LASSERT(!lli->lli_posix_acl);
- if (lli->lli_remote_perms) {
- free_rmtperm_hash(lli->lli_remote_perms);
- lli->lli_remote_perms = NULL;
- }
- }
#ifdef CONFIG_FS_POSIX_ACL
- else if (lli->lli_posix_acl) {
+ if (lli->lli_posix_acl) {
LASSERT(atomic_read(&lli->lli_posix_acl->a_refcount) == 1);
- LASSERT(!lli->lli_remote_perms);
posix_acl_release(lli->lli_posix_acl);
lli->lli_posix_acl = NULL;
}
@@ -1180,9 +1136,11 @@ static int ll_setattr_done_writing(struct inode *inode,
* from OSTs and send setattr to back to MDS.
*/
rc = ll_som_update(inode, op_data);
- else if (rc)
- CERROR("inode %lu mdc truncate failed: rc = %d\n",
- inode->i_ino, rc);
+ else if (rc) {
+ CERROR("%s: inode "DFID" mdc truncate failed: rc = %d\n",
+ ll_i2sbi(inode)->ll_md_exp->exp_obd->obd_name,
+ PFID(ll_inode2fid(inode)), rc);
+ }
return rc;
}
@@ -1210,12 +1168,9 @@ int ll_setattr_raw(struct dentry *dentry, struct iattr *attr, bool hsm_import)
bool file_is_released = false;
int rc = 0, rc1 = 0;
- CDEBUG(D_VFSTRACE,
- "%s: setattr inode %p/fid:" DFID
- " from %llu to %llu, valid %x, hsm_import %d\n",
- ll_get_fsname(inode->i_sb, NULL, 0), inode,
- PFID(&lli->lli_fid), i_size_read(inode), attr->ia_size,
- attr->ia_valid, hsm_import);
+ CDEBUG(D_VFSTRACE, "%s: setattr inode "DFID"(%p) from %llu to %llu, valid %x, hsm_import %d\n",
+ ll_get_fsname(inode->i_sb, NULL, 0), PFID(&lli->lli_fid), inode,
+ i_size_read(inode), attr->ia_size, attr->ia_valid, hsm_import);
if (attr->ia_valid & ATTR_SIZE) {
/* Check new size against VFS/VM file size limit and rlimit */
@@ -1265,14 +1220,6 @@ int ll_setattr_raw(struct dentry *dentry, struct iattr *attr, bool hsm_import)
LTIME_S(attr->ia_mtime), LTIME_S(attr->ia_ctime),
(s64)ktime_get_real_seconds());
- /* If we are changing file size, file content is modified, flag it. */
- if (attr->ia_valid & ATTR_SIZE) {
- attr->ia_valid |= MDS_OPEN_OWNEROVERRIDE;
- spin_lock(&lli->lli_lock);
- lli->lli_flags |= LLIF_DATA_MODIFIED;
- spin_unlock(&lli->lli_lock);
- }
-
/* We always do an MDS RPC, even if we're only changing the size;
* only the MDS knows whether truncate() should fail with -ETXTBUSY
*/
@@ -1284,13 +1231,6 @@ int ll_setattr_raw(struct dentry *dentry, struct iattr *attr, bool hsm_import)
if (!S_ISDIR(inode->i_mode))
inode_unlock(inode);
- memcpy(&op_data->op_attr, attr, sizeof(*attr));
-
- /* Open epoch for truncate. */
- if (exp_connect_som(ll_i2mdexp(inode)) &&
- (attr->ia_valid & (ATTR_SIZE | ATTR_MTIME | ATTR_MTIME_SET)))
- op_data->op_flags = MF_EPOCH_OPEN;
-
/* truncate on a released file must failed with -ENODATA,
* so size must not be set on MDS for released file
* but other attributes must be set
@@ -1304,29 +1244,40 @@ int ll_setattr_raw(struct dentry *dentry, struct iattr *attr, bool hsm_import)
if (lsm && lsm->lsm_pattern & LOV_PATTERN_F_RELEASED)
file_is_released = true;
ccc_inode_lsm_put(inode, lsm);
+
+ if (!hsm_import && attr->ia_valid & ATTR_SIZE) {
+ if (file_is_released) {
+ rc = ll_layout_restore(inode, 0, attr->ia_size);
+ if (rc < 0)
+ goto out;
+
+ file_is_released = false;
+ ll_layout_refresh(inode, &gen);
+ }
+
+ /*
+ * If we are changing file size, file content is
+ * modified, flag it.
+ */
+ attr->ia_valid |= MDS_OPEN_OWNEROVERRIDE;
+ spin_lock(&lli->lli_lock);
+ lli->lli_flags |= LLIF_DATA_MODIFIED;
+ spin_unlock(&lli->lli_lock);
+ op_data->op_bias |= MDS_DATA_MODIFIED;
+ }
}
- /* if not in HSM import mode, clear size attr for released file
- * we clear the attribute send to MDT in op_data, not the original
- * received from caller in attr which is used later to
- * decide return code
- */
- if (file_is_released && (attr->ia_valid & ATTR_SIZE) && !hsm_import)
- op_data->op_attr.ia_valid &= ~ATTR_SIZE;
+ memcpy(&op_data->op_attr, attr, sizeof(*attr));
+
+ /* Open epoch for truncate. */
+ if (exp_connect_som(ll_i2mdexp(inode)) && !hsm_import &&
+ (attr->ia_valid & (ATTR_SIZE | ATTR_MTIME | ATTR_MTIME_SET)))
+ op_data->op_flags = MF_EPOCH_OPEN;
rc = ll_md_setattr(dentry, op_data, &mod);
if (rc)
goto out;
- /* truncate failed (only when non HSM import), others succeed */
- if (file_is_released) {
- if ((attr->ia_valid & ATTR_SIZE) && !hsm_import)
- rc = -ENODATA;
- else
- rc = 0;
- goto out;
- }
-
/* RPC to MDT is sent, cancel data modification flag */
if (op_data->op_bias & MDS_DATA_MODIFIED) {
spin_lock(&lli->lli_lock);
@@ -1335,7 +1286,7 @@ int ll_setattr_raw(struct dentry *dentry, struct iattr *attr, bool hsm_import)
}
ll_ioepoch_open(lli, op_data->op_ioepoch);
- if (!S_ISREG(inode->i_mode)) {
+ if (!S_ISREG(inode->i_mode) || file_is_released) {
rc = 0;
goto out;
}
@@ -1532,12 +1483,8 @@ void ll_update_inode(struct inode *inode, struct lustre_md *md)
lli->lli_maxbytes = MAX_LFS_FILESIZE;
}
- if (sbi->ll_flags & LL_SBI_RMT_CLIENT) {
- if (body->valid & OBD_MD_FLRMTPERM)
- ll_update_remote_perm(inode, md->remote_perm);
- }
#ifdef CONFIG_FS_POSIX_ACL
- else if (body->valid & OBD_MD_FLACL) {
+ if (body->valid & OBD_MD_FLACL) {
spin_lock(&lli->lli_lock);
if (lli->lli_posix_acl)
posix_acl_release(lli->lli_posix_acl);
@@ -1552,7 +1499,7 @@ void ll_update_inode(struct inode *inode, struct lustre_md *md)
if (body->valid & OBD_MD_FLATIME) {
if (body->atime > LTIME_S(inode->i_atime))
LTIME_S(inode->i_atime) = body->atime;
- lli->lli_lvb.lvb_atime = body->atime;
+ lli->lli_atime = body->atime;
}
if (body->valid & OBD_MD_FLMTIME) {
if (body->mtime > LTIME_S(inode->i_mtime)) {
@@ -1561,12 +1508,12 @@ void ll_update_inode(struct inode *inode, struct lustre_md *md)
body->mtime);
LTIME_S(inode->i_mtime) = body->mtime;
}
- lli->lli_lvb.lvb_mtime = body->mtime;
+ lli->lli_mtime = body->mtime;
}
if (body->valid & OBD_MD_FLCTIME) {
if (body->ctime > LTIME_S(inode->i_ctime))
LTIME_S(inode->i_ctime) = body->ctime;
- lli->lli_lvb.lvb_ctime = body->ctime;
+ lli->lli_ctime = body->ctime;
}
if (body->valid & OBD_MD_FLMODE)
inode->i_mode = (inode->i_mode & S_IFMT)|(body->mode & ~S_IFMT);
@@ -1593,12 +1540,12 @@ void ll_update_inode(struct inode *inode, struct lustre_md *md)
/* FID shouldn't be changed! */
if (fid_is_sane(&lli->lli_fid)) {
LASSERTF(lu_fid_eq(&lli->lli_fid, &body->fid1),
- "Trying to change FID "DFID
- " to the "DFID", inode %lu/%u(%p)\n",
+ "Trying to change FID "DFID" to the "DFID", inode "DFID"(%p)\n",
PFID(&lli->lli_fid), PFID(&body->fid1),
- inode->i_ino, inode->i_generation, inode);
- } else
+ PFID(ll_inode2fid(inode)), inode);
+ } else {
lli->lli_fid = body->fid1;
+ }
}
LASSERT(fid_seq(&lli->lli_fid) != 0);
@@ -1622,8 +1569,10 @@ void ll_update_inode(struct inode *inode, struct lustre_md *md)
if (lli->lli_flags & (LLIF_DONE_WRITING |
LLIF_EPOCH_PENDING |
LLIF_SOM_DIRTY)) {
- CERROR("ino %lu flags %u still has size authority! do not trust the size got from MDS\n",
- inode->i_ino, lli->lli_flags);
+ CERROR("%s: inode "DFID" flags %u still has size authority! do not trust the size got from MDS\n",
+ sbi->ll_md_exp->exp_obd->obd_name,
+ PFID(ll_inode2fid(inode)),
+ lli->lli_flags);
} else {
/* Use old size assignment to avoid
* deadlock bz14138 & bz14326
@@ -1699,7 +1648,7 @@ void ll_read_inode2(struct inode *inode, void *opaque)
void ll_delete_inode(struct inode *inode)
{
- struct cl_inode_info *lli = cl_i2info(inode);
+ struct ll_inode_info *lli = ll_i2info(inode);
if (S_ISREG(inode->i_mode) && lli->lli_clob)
/* discard all dirty pages before truncating them, required by
@@ -1715,8 +1664,8 @@ void ll_delete_inode(struct inode *inode)
spin_lock_irq(&inode->i_data.tree_lock);
spin_unlock_irq(&inode->i_data.tree_lock);
LASSERTF(inode->i_data.nrpages == 0,
- "inode=%lu/%u(%p) nrpages=%lu, see http://jira.whamcloud.com/browse/LU-118\n",
- inode->i_ino, inode->i_generation, inode,
+ "inode="DFID"(%p) nrpages=%lu, see http://jira.whamcloud.com/browse/LU-118\n",
+ PFID(ll_inode2fid(inode)), inode,
inode->i_data.nrpages);
}
/* Workaround end */
@@ -1747,7 +1696,9 @@ int ll_iocontrol(struct inode *inode, struct file *file,
rc = md_getattr(sbi->ll_md_exp, op_data, &req);
ll_finish_md_op_data(op_data);
if (rc) {
- CERROR("failure %d inode %lu\n", rc, inode->i_ino);
+ CERROR("%s: failure inode "DFID": rc = %d\n",
+ sbi->ll_md_exp->exp_obd->obd_name,
+ PFID(ll_inode2fid(inode)), rc);
return -abs(rc);
}
@@ -1772,7 +1723,7 @@ int ll_iocontrol(struct inode *inode, struct file *file,
if (IS_ERR(op_data))
return PTR_ERR(op_data);
- ((struct ll_iattr *)&op_data->op_attr)->ia_attr_flags = flags;
+ op_data->op_attr_flags = flags;
op_data->op_attr.ia_valid |= ATTR_ATTR_FLAG;
rc = md_setattr(sbi->ll_md_exp, op_data,
NULL, 0, NULL, 0, &req, NULL);
@@ -1967,7 +1918,13 @@ int ll_prep_inode(struct inode **inode, struct ptlrpc_request *req,
* At this point server returns to client's same fid as client
* generated for creating. So using ->fid1 is okay here.
*/
- LASSERT(fid_is_sane(&md.body->fid1));
+ if (!fid_is_sane(&md.body->fid1)) {
+ CERROR("%s: Fid is insane " DFID "\n",
+ ll_get_fsname(sb, NULL, 0),
+ PFID(&md.body->fid1));
+ rc = -EINVAL;
+ goto out;
+ }
*inode = ll_iget(sb, cl_fid_build_ino(&md.body->fid1,
sbi->ll_flags & LL_SBI_32BIT_API),
@@ -1994,11 +1951,11 @@ int ll_prep_inode(struct inode **inode, struct ptlrpc_request *req,
* 3. proc2: refresh layout and layout lock granted
* 4. proc1: to apply a stale layout
*/
- if (it && it->d.lustre.it_lock_mode != 0) {
+ if (it && it->it_lock_mode != 0) {
struct lustre_handle lockh;
struct ldlm_lock *lock;
- lockh.cookie = it->d.lustre.it_lock_handle;
+ lockh.cookie = it->it_lock_handle;
lock = ldlm_handle2lock(&lockh);
LASSERT(lock);
if (ldlm_has_layout(lock)) {
@@ -2066,11 +2023,11 @@ int ll_obd_statfs(struct inode *inode, void __user *arg)
}
memcpy(&type, data->ioc_inlbuf1, sizeof(__u32));
- if (type & LL_STATFS_LMV)
+ if (type & LL_STATFS_LMV) {
exp = sbi->ll_md_exp;
- else if (type & LL_STATFS_LOV)
+ } else if (type & LL_STATFS_LOV) {
exp = sbi->ll_dt_exp;
- else {
+ } else {
rc = -ENODEV;
goto out_statfs;
}
@@ -2271,7 +2228,7 @@ void ll_dirty_page_discard_warn(struct page *page, int ioret)
{
char *buf, *path = NULL;
struct dentry *dentry = NULL;
- struct ccc_object *obj = cl_inode2ccc(page->mapping->host);
+ struct vvp_object *obj = cl_inode2vvp(page->mapping->host);
/* this can be called inside spin lock so use GFP_ATOMIC. */
buf = (char *)__get_free_page(GFP_ATOMIC);
@@ -2285,7 +2242,7 @@ void ll_dirty_page_discard_warn(struct page *page, int ioret)
"%s: dirty page discard: %s/fid: " DFID "/%s may get corrupted (rc %d)\n",
ll_get_fsname(page->mapping->host->i_sb, NULL, 0),
s2lsi(page->mapping->host->i_sb)->lsi_lmd->lmd_dev,
- PFID(&obj->cob_header.coh_lu.loh_fid),
+ PFID(&obj->vob_header.coh_lu.loh_fid),
(path && !IS_ERR(path)) ? path : "", ioret);
if (dentry)
diff --git a/drivers/staging/lustre/lustre/llite/llite_mmap.c b/drivers/staging/lustre/lustre/llite/llite_mmap.c
index 5b484e62ffd0..66ee5db5fce8 100644
--- a/drivers/staging/lustre/lustre/llite/llite_mmap.c
+++ b/drivers/staging/lustre/lustre/llite/llite_mmap.c
@@ -15,11 +15,7 @@
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
*
* GPL HEADER END
*/
@@ -57,10 +53,10 @@ void policy_from_vma(ldlm_policy_data_t *policy,
struct vm_area_struct *vma, unsigned long addr,
size_t count)
{
- policy->l_extent.start = ((addr - vma->vm_start) & CFS_PAGE_MASK) +
+ policy->l_extent.start = ((addr - vma->vm_start) & PAGE_MASK) +
(vma->vm_pgoff << PAGE_SHIFT);
policy->l_extent.end = (policy->l_extent.start + count - 1) |
- ~CFS_PAGE_MASK;
+ ~PAGE_MASK;
}
struct vm_area_struct *our_vma(struct mm_struct *mm, unsigned long addr,
@@ -123,7 +119,8 @@ ll_fault_io_init(struct vm_area_struct *vma, struct lu_env **env_ret,
*env_ret = env;
- io = ccc_env_thread_io(env);
+restart:
+ io = vvp_env_thread_io(env);
io->ci_obj = ll_i2info(inode)->lli_clob;
LASSERT(io->ci_obj);
@@ -146,17 +143,20 @@ ll_fault_io_init(struct vm_area_struct *vma, struct lu_env **env_ret,
rc = cl_io_init(env, io, CIT_FAULT, io->ci_obj);
if (rc == 0) {
- struct ccc_io *cio = ccc_env_io(env);
+ struct vvp_io *vio = vvp_env_io(env);
struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
- LASSERT(cio->cui_cl.cis_io == io);
+ LASSERT(vio->vui_cl.cis_io == io);
/* mmap lock must be MANDATORY it has to cache pages. */
io->ci_lockreq = CILR_MANDATORY;
- cio->cui_fd = fd;
+ vio->vui_fd = fd;
} else {
LASSERT(rc < 0);
cl_io_fini(env, io);
+ if (io->ci_need_restart)
+ goto restart;
+
cl_env_nested_put(nest, env);
io = ERR_PTR(rc);
}
@@ -196,18 +196,11 @@ static int ll_page_mkwrite0(struct vm_area_struct *vma, struct page *vmpage,
set = cfs_block_sigsinv(sigmask(SIGKILL) | sigmask(SIGTERM));
- /* we grab lli_trunc_sem to exclude truncate case.
- * Otherwise, we could add dirty pages into osc cache
- * while truncate is on-going.
- */
- inode = ccc_object_inode(io->ci_obj);
+ inode = vvp_object_inode(io->ci_obj);
lli = ll_i2info(inode);
- down_read(&lli->lli_trunc_sem);
result = cl_io_loop(env, io);
- up_read(&lli->lli_trunc_sem);
-
cfs_restore_sigs(set);
if (result == 0) {
@@ -307,17 +300,22 @@ static int ll_fault0(struct vm_area_struct *vma, struct vm_fault *vmf)
vio = vvp_env_io(env);
vio->u.fault.ft_vma = vma;
vio->u.fault.ft_vmpage = NULL;
- vio->u.fault.fault.ft_vmf = vmf;
- vio->u.fault.fault.ft_flags = 0;
- vio->u.fault.fault.ft_flags_valid = false;
+ vio->u.fault.ft_vmf = vmf;
+ vio->u.fault.ft_flags = 0;
+ vio->u.fault.ft_flags_valid = false;
+
+ /* May call ll_readpage() */
+ ll_cl_add(vma->vm_file, env, io);
result = cl_io_loop(env, io);
+ ll_cl_remove(vma->vm_file, env);
+
/* ft_flags are only valid if we reached
* the call to filemap_fault
*/
- if (vio->u.fault.fault.ft_flags_valid)
- fault_ret = vio->u.fault.fault.ft_flags;
+ if (vio->u.fault.ft_flags_valid)
+ fault_ret = vio->u.fault.ft_flags;
vmpage = vio->u.fault.ft_vmpage;
if (result != 0 && vmpage) {
@@ -390,9 +388,11 @@ static int ll_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
result = ll_page_mkwrite0(vma, vmf->page, &retry);
if (!printed && ++count > 16) {
- CWARN("app(%s): the page %lu of file %lu is under heavy contention.\n",
+ const struct dentry *de = vma->vm_file->f_path.dentry;
+
+ CWARN("app(%s): the page %lu of file "DFID" is under heavy contention\n",
current->comm, vmf->pgoff,
- file_inode(vma->vm_file)->i_ino);
+ PFID(ll_inode2fid(de->d_inode)));
printed = true;
}
} while (retry);
@@ -422,16 +422,16 @@ static int ll_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
/**
* To avoid cancel the locks covering mmapped region for lock cache pressure,
- * we track the mapped vma count in ccc_object::cob_mmap_cnt.
+ * we track the mapped vma count in vvp_object::vob_mmap_cnt.
*/
static void ll_vm_open(struct vm_area_struct *vma)
{
struct inode *inode = file_inode(vma->vm_file);
- struct ccc_object *vob = cl_inode2ccc(inode);
+ struct vvp_object *vob = cl_inode2vvp(inode);
LASSERT(vma->vm_file);
- LASSERT(atomic_read(&vob->cob_mmap_cnt) >= 0);
- atomic_inc(&vob->cob_mmap_cnt);
+ LASSERT(atomic_read(&vob->vob_mmap_cnt) >= 0);
+ atomic_inc(&vob->vob_mmap_cnt);
}
/**
@@ -440,11 +440,11 @@ static void ll_vm_open(struct vm_area_struct *vma)
static void ll_vm_close(struct vm_area_struct *vma)
{
struct inode *inode = file_inode(vma->vm_file);
- struct ccc_object *vob = cl_inode2ccc(inode);
+ struct vvp_object *vob = cl_inode2vvp(inode);
LASSERT(vma->vm_file);
- atomic_dec(&vob->cob_mmap_cnt);
- LASSERT(atomic_read(&vob->cob_mmap_cnt) >= 0);
+ atomic_dec(&vob->vob_mmap_cnt);
+ LASSERT(atomic_read(&vob->vob_mmap_cnt) >= 0);
}
/* XXX put nice comment here. talk about __free_pte -> dirty pages and
diff --git a/drivers/staging/lustre/lustre/llite/llite_nfs.c b/drivers/staging/lustre/lustre/llite/llite_nfs.c
index 193aab879709..65972c892731 100644
--- a/drivers/staging/lustre/lustre/llite/llite_nfs.c
+++ b/drivers/staging/lustre/lustre/llite/llite_nfs.c
@@ -15,11 +15,7 @@
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
*
* GPL HEADER END
*/
@@ -119,7 +115,7 @@ struct inode *search_inode_for_lustre(struct super_block *sb,
rc = md_getattr(sbi->ll_md_exp, op_data, &req);
kfree(op_data);
if (rc) {
- CERROR("can't get object attrs, fid "DFID", rc %d\n",
+ CDEBUG(D_INFO, "can't get object attrs, fid "DFID", rc %d\n",
PFID(fid), rc);
return ERR_PTR(rc);
}
@@ -172,6 +168,24 @@ ll_iget_for_nfs(struct super_block *sb, struct lu_fid *fid, struct lu_fid *paren
/* N.B. d_obtain_alias() drops inode ref on error */
result = d_obtain_alias(inode);
+ if (!IS_ERR(result)) {
+ int rc;
+
+ rc = ll_d_init(result);
+ if (rc < 0) {
+ dput(result);
+ result = ERR_PTR(rc);
+ } else {
+ struct ll_dentry_data *ldd = ll_d2d(result);
+
+ /*
+ * Need to signal to the ll_intent_file_open that
+ * we came from NFS and so opencache needs to be
+ * enabled for this one
+ */
+ ldd->lld_nfs_dentry = 1;
+ }
+ }
return result;
}
@@ -191,8 +205,9 @@ static int ll_encode_fh(struct inode *inode, __u32 *fh, int *plen,
int fileid_len = sizeof(struct lustre_nfs_fid) / 4;
struct lustre_nfs_fid *nfs_fid = (void *)fh;
- CDEBUG(D_INFO, "encoding for (%lu," DFID ") maxlen=%d minlen=%d\n",
- inode->i_ino, PFID(ll_inode2fid(inode)), *plen, fileid_len);
+ CDEBUG(D_INFO, "%s: encoding for ("DFID") maxlen=%d minlen=%d\n",
+ ll_get_fsname(inode->i_sb, NULL, 0),
+ PFID(ll_inode2fid(inode)), *plen, fileid_len);
if (*plen < fileid_len) {
*plen = fileid_len;
@@ -298,8 +313,9 @@ static struct dentry *ll_get_parent(struct dentry *dchild)
sbi = ll_s2sbi(dir->i_sb);
- CDEBUG(D_INFO, "getting parent for (%lu," DFID ")\n",
- dir->i_ino, PFID(ll_inode2fid(dir)));
+ CDEBUG(D_INFO, "%s: getting parent for ("DFID")\n",
+ ll_get_fsname(dir->i_sb, NULL, 0),
+ PFID(ll_inode2fid(dir)));
rc = ll_get_default_mdsize(sbi, &lmmsize);
if (rc != 0)
@@ -314,15 +330,20 @@ static struct dentry *ll_get_parent(struct dentry *dchild)
rc = md_getattr_name(sbi->ll_md_exp, op_data, &req);
ll_finish_md_op_data(op_data);
if (rc) {
- CERROR("failure %d inode %lu get parent\n", rc, dir->i_ino);
+ CERROR("%s: failure inode "DFID" get parent: rc = %d\n",
+ ll_get_fsname(dir->i_sb, NULL, 0),
+ PFID(ll_inode2fid(dir)), rc);
return ERR_PTR(rc);
}
body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
- LASSERT(body->valid & OBD_MD_FLID);
-
- CDEBUG(D_INFO, "parent for " DFID " is " DFID "\n",
- PFID(ll_inode2fid(dir)), PFID(&body->fid1));
-
+ /*
+ * LU-3952: MDT may lost the FID of its parent, we should not crash
+ * the NFS server, ll_iget_for_nfs() will handle the error.
+ */
+ if (body->valid & OBD_MD_FLID) {
+ CDEBUG(D_INFO, "parent for " DFID " is " DFID "\n",
+ PFID(ll_inode2fid(dir)), PFID(&body->fid1));
+ }
result = ll_iget_for_nfs(dir->i_sb, &body->fid1, NULL);
ptlrpc_req_finished(req);
diff --git a/drivers/staging/lustre/lustre/llite/llite_rmtacl.c b/drivers/staging/lustre/lustre/llite/llite_rmtacl.c
deleted file mode 100644
index 8509b07cb5c7..000000000000
--- a/drivers/staging/lustre/lustre/llite/llite_rmtacl.c
+++ /dev/null
@@ -1,299 +0,0 @@
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2012, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * lustre/llite/llite_rmtacl.c
- *
- * Lustre Remote User Access Control List.
- *
- * Author: Fan Yong <fanyong@clusterfs.com>
- */
-
-#define DEBUG_SUBSYSTEM S_LLITE
-
-#ifdef CONFIG_FS_POSIX_ACL
-
-#include "../include/lustre_lite.h"
-#include "../include/lustre_eacl.h"
-#include "llite_internal.h"
-
-static inline __u32 rce_hashfunc(uid_t id)
-{
- return id & (RCE_HASHES - 1);
-}
-
-static inline __u32 ee_hashfunc(uid_t id)
-{
- return id & (EE_HASHES - 1);
-}
-
-u64 rce_ops2valid(int ops)
-{
- switch (ops) {
- case RMT_LSETFACL:
- return OBD_MD_FLRMTLSETFACL;
- case RMT_LGETFACL:
- return OBD_MD_FLRMTLGETFACL;
- case RMT_RSETFACL:
- return OBD_MD_FLRMTRSETFACL;
- case RMT_RGETFACL:
- return OBD_MD_FLRMTRGETFACL;
- default:
- return 0;
- }
-}
-
-static struct rmtacl_ctl_entry *rce_alloc(pid_t key, int ops)
-{
- struct rmtacl_ctl_entry *rce;
-
- rce = kzalloc(sizeof(*rce), GFP_NOFS);
- if (!rce)
- return NULL;
-
- INIT_LIST_HEAD(&rce->rce_list);
- rce->rce_key = key;
- rce->rce_ops = ops;
-
- return rce;
-}
-
-static void rce_free(struct rmtacl_ctl_entry *rce)
-{
- if (!list_empty(&rce->rce_list))
- list_del(&rce->rce_list);
-
- kfree(rce);
-}
-
-static struct rmtacl_ctl_entry *__rct_search(struct rmtacl_ctl_table *rct,
- pid_t key)
-{
- struct rmtacl_ctl_entry *rce;
- struct list_head *head = &rct->rct_entries[rce_hashfunc(key)];
-
- list_for_each_entry(rce, head, rce_list)
- if (rce->rce_key == key)
- return rce;
-
- return NULL;
-}
-
-struct rmtacl_ctl_entry *rct_search(struct rmtacl_ctl_table *rct, pid_t key)
-{
- struct rmtacl_ctl_entry *rce;
-
- spin_lock(&rct->rct_lock);
- rce = __rct_search(rct, key);
- spin_unlock(&rct->rct_lock);
- return rce;
-}
-
-int rct_add(struct rmtacl_ctl_table *rct, pid_t key, int ops)
-{
- struct rmtacl_ctl_entry *rce, *e;
-
- rce = rce_alloc(key, ops);
- if (!rce)
- return -ENOMEM;
-
- spin_lock(&rct->rct_lock);
- e = __rct_search(rct, key);
- if (unlikely(e)) {
- CWARN("Unexpected stale rmtacl_entry found: [key: %d] [ops: %d]\n",
- (int)key, ops);
- rce_free(e);
- }
- list_add_tail(&rce->rce_list, &rct->rct_entries[rce_hashfunc(key)]);
- spin_unlock(&rct->rct_lock);
-
- return 0;
-}
-
-int rct_del(struct rmtacl_ctl_table *rct, pid_t key)
-{
- struct rmtacl_ctl_entry *rce;
-
- spin_lock(&rct->rct_lock);
- rce = __rct_search(rct, key);
- if (rce)
- rce_free(rce);
- spin_unlock(&rct->rct_lock);
-
- return rce ? 0 : -ENOENT;
-}
-
-void rct_init(struct rmtacl_ctl_table *rct)
-{
- int i;
-
- spin_lock_init(&rct->rct_lock);
- for (i = 0; i < RCE_HASHES; i++)
- INIT_LIST_HEAD(&rct->rct_entries[i]);
-}
-
-void rct_fini(struct rmtacl_ctl_table *rct)
-{
- struct rmtacl_ctl_entry *rce;
- int i;
-
- spin_lock(&rct->rct_lock);
- for (i = 0; i < RCE_HASHES; i++)
- while (!list_empty(&rct->rct_entries[i])) {
- rce = list_entry(rct->rct_entries[i].next,
- struct rmtacl_ctl_entry, rce_list);
- rce_free(rce);
- }
- spin_unlock(&rct->rct_lock);
-}
-
-static struct eacl_entry *ee_alloc(pid_t key, struct lu_fid *fid, int type,
- ext_acl_xattr_header *header)
-{
- struct eacl_entry *ee;
-
- ee = kzalloc(sizeof(*ee), GFP_NOFS);
- if (!ee)
- return NULL;
-
- INIT_LIST_HEAD(&ee->ee_list);
- ee->ee_key = key;
- ee->ee_fid = *fid;
- ee->ee_type = type;
- ee->ee_acl = header;
-
- return ee;
-}
-
-void ee_free(struct eacl_entry *ee)
-{
- if (!list_empty(&ee->ee_list))
- list_del(&ee->ee_list);
-
- if (ee->ee_acl)
- lustre_ext_acl_xattr_free(ee->ee_acl);
-
- kfree(ee);
-}
-
-static struct eacl_entry *__et_search_del(struct eacl_table *et, pid_t key,
- struct lu_fid *fid, int type)
-{
- struct eacl_entry *ee;
- struct list_head *head = &et->et_entries[ee_hashfunc(key)];
-
- LASSERT(fid);
- list_for_each_entry(ee, head, ee_list)
- if (ee->ee_key == key) {
- if (lu_fid_eq(&ee->ee_fid, fid) &&
- ee->ee_type == type) {
- list_del_init(&ee->ee_list);
- return ee;
- }
- }
-
- return NULL;
-}
-
-struct eacl_entry *et_search_del(struct eacl_table *et, pid_t key,
- struct lu_fid *fid, int type)
-{
- struct eacl_entry *ee;
-
- spin_lock(&et->et_lock);
- ee = __et_search_del(et, key, fid, type);
- spin_unlock(&et->et_lock);
- return ee;
-}
-
-void et_search_free(struct eacl_table *et, pid_t key)
-{
- struct eacl_entry *ee, *next;
- struct list_head *head = &et->et_entries[ee_hashfunc(key)];
-
- spin_lock(&et->et_lock);
- list_for_each_entry_safe(ee, next, head, ee_list)
- if (ee->ee_key == key)
- ee_free(ee);
-
- spin_unlock(&et->et_lock);
-}
-
-int ee_add(struct eacl_table *et, pid_t key, struct lu_fid *fid, int type,
- ext_acl_xattr_header *header)
-{
- struct eacl_entry *ee, *e;
-
- ee = ee_alloc(key, fid, type, header);
- if (!ee)
- return -ENOMEM;
-
- spin_lock(&et->et_lock);
- e = __et_search_del(et, key, fid, type);
- if (unlikely(e)) {
- CWARN("Unexpected stale eacl_entry found: [key: %d] [fid: " DFID "] [type: %d]\n",
- (int)key, PFID(fid), type);
- ee_free(e);
- }
- list_add_tail(&ee->ee_list, &et->et_entries[ee_hashfunc(key)]);
- spin_unlock(&et->et_lock);
-
- return 0;
-}
-
-void et_init(struct eacl_table *et)
-{
- int i;
-
- spin_lock_init(&et->et_lock);
- for (i = 0; i < EE_HASHES; i++)
- INIT_LIST_HEAD(&et->et_entries[i]);
-}
-
-void et_fini(struct eacl_table *et)
-{
- struct eacl_entry *ee;
- int i;
-
- spin_lock(&et->et_lock);
- for (i = 0; i < EE_HASHES; i++)
- while (!list_empty(&et->et_entries[i])) {
- ee = list_entry(et->et_entries[i].next,
- struct eacl_entry, ee_list);
- ee_free(ee);
- }
- spin_unlock(&et->et_lock);
-}
-
-#endif
diff --git a/drivers/staging/lustre/lustre/llite/lloop.c b/drivers/staging/lustre/lustre/llite/lloop.c
deleted file mode 100644
index f169c0db63b4..000000000000
--- a/drivers/staging/lustre/lustre/llite/lloop.c
+++ /dev/null
@@ -1,882 +0,0 @@
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2012, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- */
-
-/*
- * linux/drivers/block/loop.c
- *
- * Written by Theodore Ts'o, 3/29/93
- *
- * Copyright 1993 by Theodore Ts'o. Redistribution of this file is
- * permitted under the GNU General Public License.
- *
- * Modularized and updated for 1.1.16 kernel - Mitch Dsouza 28th May 1994
- * Adapted for 1.3.59 kernel - Andries Brouwer, 1 Feb 1996
- *
- * Fixed do_loop_request() re-entrancy - Vincent.Renardias@waw.com Mar 20, 1997
- *
- * Added devfs support - Richard Gooch <rgooch@atnf.csiro.au> 16-Jan-1998
- *
- * Handle sparse backing files correctly - Kenn Humborg, Jun 28, 1998
- *
- * Loadable modules and other fixes by AK, 1998
- *
- * Maximum number of loop devices now dynamic via max_loop module parameter.
- * Russell Kroll <rkroll@exploits.org> 19990701
- *
- * Maximum number of loop devices when compiled-in now selectable by passing
- * max_loop=<1-255> to the kernel on boot.
- * Erik I. Bols?, <eriki@himolde.no>, Oct 31, 1999
- *
- * Completely rewrite request handling to be make_request_fn style and
- * non blocking, pushing work to a helper thread. Lots of fixes from
- * Al Viro too.
- * Jens Axboe <axboe@suse.de>, Nov 2000
- *
- * Support up to 256 loop devices
- * Heinz Mauelshagen <mge@sistina.com>, Feb 2002
- *
- * Support for falling back on the write file operation when the address space
- * operations prepare_write and/or commit_write are not available on the
- * backing filesystem.
- * Anton Altaparmakov, 16 Feb 2005
- *
- * Still To Fix:
- * - Advisory locking is ignored here.
- * - Should use an own CAP_* category instead of CAP_SYS_ADMIN
- *
- */
-
-#include <linux/module.h>
-
-#include <linux/sched.h>
-#include <linux/fs.h>
-#include <linux/file.h>
-#include <linux/stat.h>
-#include <linux/errno.h>
-#include <linux/major.h>
-#include <linux/wait.h>
-#include <linux/blkdev.h>
-#include <linux/blkpg.h>
-#include <linux/init.h>
-#include <linux/swap.h>
-#include <linux/slab.h>
-#include <linux/suspend.h>
-#include <linux/writeback.h>
-#include <linux/buffer_head.h> /* for invalidate_bdev() */
-#include <linux/completion.h>
-#include <linux/highmem.h>
-#include <linux/gfp.h>
-#include <linux/pagevec.h>
-#include <linux/uaccess.h>
-
-#include "../include/lustre_lib.h"
-#include "../include/lustre_lite.h"
-#include "llite_internal.h"
-
-#define LLOOP_MAX_SEGMENTS LNET_MAX_IOV
-
-/* Possible states of device */
-enum {
- LLOOP_UNBOUND,
- LLOOP_BOUND,
- LLOOP_RUNDOWN,
-};
-
-struct lloop_device {
- int lo_number;
- int lo_refcnt;
- loff_t lo_offset;
- loff_t lo_sizelimit;
- int lo_flags;
- struct file *lo_backing_file;
- struct block_device *lo_device;
- unsigned lo_blocksize;
-
- gfp_t old_gfp_mask;
-
- spinlock_t lo_lock;
- struct bio *lo_bio;
- struct bio *lo_biotail;
- int lo_state;
- struct semaphore lo_sem;
- struct mutex lo_ctl_mutex;
- atomic_t lo_pending;
- wait_queue_head_t lo_bh_wait;
-
- struct request_queue *lo_queue;
-
- const struct lu_env *lo_env;
- struct cl_io lo_io;
- struct ll_dio_pages lo_pvec;
-
- /* data to handle bio for lustre. */
- struct lo_request_data {
- struct page *lrd_pages[LLOOP_MAX_SEGMENTS];
- loff_t lrd_offsets[LLOOP_MAX_SEGMENTS];
- } lo_requests[1];
-};
-
-/*
- * Loop flags
- */
-enum {
- LO_FLAGS_READ_ONLY = 1,
-};
-
-static int lloop_major;
-#define MAX_LOOP_DEFAULT 16
-static int max_loop = MAX_LOOP_DEFAULT;
-static struct lloop_device *loop_dev;
-static struct gendisk **disks;
-static struct mutex lloop_mutex;
-static void *ll_iocontrol_magic;
-
-static loff_t get_loop_size(struct lloop_device *lo, struct file *file)
-{
- loff_t size, offset, loopsize;
-
- /* Compute loopsize in bytes */
- size = i_size_read(file->f_mapping->host);
- offset = lo->lo_offset;
- loopsize = size - offset;
- if (lo->lo_sizelimit > 0 && lo->lo_sizelimit < loopsize)
- loopsize = lo->lo_sizelimit;
-
- /*
- * Unfortunately, if we want to do I/O on the device,
- * the number of 512-byte sectors has to fit into a sector_t.
- */
- return loopsize >> 9;
-}
-
-static int do_bio_lustrebacked(struct lloop_device *lo, struct bio *head)
-{
- const struct lu_env *env = lo->lo_env;
- struct cl_io *io = &lo->lo_io;
- struct inode *inode = file_inode(lo->lo_backing_file);
- struct cl_object *obj = ll_i2info(inode)->lli_clob;
- pgoff_t offset;
- int ret;
- int rw;
- u32 page_count = 0;
- struct bio_vec bvec;
- struct bvec_iter iter;
- struct bio *bio;
- ssize_t bytes;
-
- struct ll_dio_pages *pvec = &lo->lo_pvec;
- struct page **pages = pvec->ldp_pages;
- loff_t *offsets = pvec->ldp_offsets;
-
- truncate_inode_pages(inode->i_mapping, 0);
-
- /* initialize the IO */
- memset(io, 0, sizeof(*io));
- io->ci_obj = obj;
- ret = cl_io_init(env, io, CIT_MISC, obj);
- if (ret)
- return io->ci_result;
- io->ci_lockreq = CILR_NEVER;
-
- rw = head->bi_rw;
- for (bio = head; bio ; bio = bio->bi_next) {
- LASSERT(rw == bio->bi_rw);
-
- offset = (pgoff_t)(bio->bi_iter.bi_sector << 9) + lo->lo_offset;
- bio_for_each_segment(bvec, bio, iter) {
- BUG_ON(bvec.bv_offset != 0);
- BUG_ON(bvec.bv_len != PAGE_SIZE);
-
- pages[page_count] = bvec.bv_page;
- offsets[page_count] = offset;
- page_count++;
- offset += bvec.bv_len;
- }
- LASSERT(page_count <= LLOOP_MAX_SEGMENTS);
- }
-
- ll_stats_ops_tally(ll_i2sbi(inode),
- (rw == WRITE) ? LPROC_LL_BRW_WRITE : LPROC_LL_BRW_READ,
- page_count);
-
- pvec->ldp_size = page_count << PAGE_SHIFT;
- pvec->ldp_nr = page_count;
-
- /* FIXME: in ll_direct_rw_pages, it has to allocate many cl_page{}s to
- * write those pages into OST. Even worse case is that more pages
- * would be asked to write out to swap space, and then finally get here
- * again.
- * Unfortunately this is NOT easy to fix.
- * Thoughts on solution:
- * 0. Define a reserved pool for cl_pages, which could be a list of
- * pre-allocated cl_pages;
- * 1. Define a new operation in cl_object_operations{}, says clo_depth,
- * which measures how many layers for this lustre object. Generally
- * speaking, the depth would be 2, one for llite, and one for lovsub.
- * However, for SNS, there will be more since we need additional page
- * to store parity;
- * 2. Reserve the # of (page_count * depth) cl_pages from the reserved
- * pool. Afterwards, the clio would allocate the pages from reserved
- * pool, this guarantees we needn't allocate the cl_pages from
- * generic cl_page slab cache.
- * Of course, if there is NOT enough pages in the pool, we might
- * be asked to write less pages once, this purely depends on
- * implementation. Anyway, we should be careful to avoid deadlocking.
- */
- inode_lock(inode);
- bytes = ll_direct_rw_pages(env, io, rw, inode, pvec);
- inode_unlock(inode);
- cl_io_fini(env, io);
- return (bytes == pvec->ldp_size) ? 0 : (int)bytes;
-}
-
-/*
- * Add bio to back of pending list
- */
-static void loop_add_bio(struct lloop_device *lo, struct bio *bio)
-{
- unsigned long flags;
-
- spin_lock_irqsave(&lo->lo_lock, flags);
- if (lo->lo_biotail) {
- lo->lo_biotail->bi_next = bio;
- lo->lo_biotail = bio;
- } else
- lo->lo_bio = lo->lo_biotail = bio;
- spin_unlock_irqrestore(&lo->lo_lock, flags);
-
- atomic_inc(&lo->lo_pending);
- if (waitqueue_active(&lo->lo_bh_wait))
- wake_up(&lo->lo_bh_wait);
-}
-
-/*
- * Grab first pending buffer
- */
-static unsigned int loop_get_bio(struct lloop_device *lo, struct bio **req)
-{
- struct bio *first;
- struct bio **bio;
- unsigned int count = 0;
- unsigned int page_count = 0;
- int rw;
-
- spin_lock_irq(&lo->lo_lock);
- first = lo->lo_bio;
- if (unlikely(!first)) {
- spin_unlock_irq(&lo->lo_lock);
- return 0;
- }
-
- /* TODO: need to split the bio, too bad. */
- LASSERT(first->bi_vcnt <= LLOOP_MAX_SEGMENTS);
-
- rw = first->bi_rw;
- bio = &lo->lo_bio;
- while (*bio && (*bio)->bi_rw == rw) {
- CDEBUG(D_INFO, "bio sector %llu size %u count %u vcnt%u\n",
- (unsigned long long)(*bio)->bi_iter.bi_sector,
- (*bio)->bi_iter.bi_size,
- page_count, (*bio)->bi_vcnt);
- if (page_count + (*bio)->bi_vcnt > LLOOP_MAX_SEGMENTS)
- break;
-
- page_count += (*bio)->bi_vcnt;
- count++;
- bio = &(*bio)->bi_next;
- }
- if (*bio) {
- /* Some of bios can't be mergeable. */
- lo->lo_bio = *bio;
- *bio = NULL;
- } else {
- /* Hit the end of queue */
- lo->lo_biotail = NULL;
- lo->lo_bio = NULL;
- }
- *req = first;
- spin_unlock_irq(&lo->lo_lock);
- return count;
-}
-
-static blk_qc_t loop_make_request(struct request_queue *q, struct bio *old_bio)
-{
- struct lloop_device *lo = q->queuedata;
- int rw = bio_rw(old_bio);
- int inactive;
-
- blk_queue_split(q, &old_bio, q->bio_split);
-
- if (!lo)
- goto err;
-
- CDEBUG(D_INFO, "submit bio sector %llu size %u\n",
- (unsigned long long)old_bio->bi_iter.bi_sector,
- old_bio->bi_iter.bi_size);
-
- spin_lock_irq(&lo->lo_lock);
- inactive = lo->lo_state != LLOOP_BOUND;
- spin_unlock_irq(&lo->lo_lock);
- if (inactive)
- goto err;
-
- if (rw == WRITE) {
- if (lo->lo_flags & LO_FLAGS_READ_ONLY)
- goto err;
- } else if (rw == READA) {
- rw = READ;
- } else if (rw != READ) {
- CERROR("lloop: unknown command (%x)\n", rw);
- goto err;
- }
- loop_add_bio(lo, old_bio);
- return BLK_QC_T_NONE;
-err:
- bio_io_error(old_bio);
- return BLK_QC_T_NONE;
-}
-
-static inline void loop_handle_bio(struct lloop_device *lo, struct bio *bio)
-{
- int ret;
-
- ret = do_bio_lustrebacked(lo, bio);
- while (bio) {
- struct bio *tmp = bio->bi_next;
-
- bio->bi_next = NULL;
- bio->bi_error = ret;
- bio_endio(bio);
- bio = tmp;
- }
-}
-
-static inline int loop_active(struct lloop_device *lo)
-{
- return atomic_read(&lo->lo_pending) ||
- (lo->lo_state == LLOOP_RUNDOWN);
-}
-
-/*
- * worker thread that handles reads/writes to file backed loop devices,
- * to avoid blocking in our make_request_fn.
- */
-static int loop_thread(void *data)
-{
- struct lloop_device *lo = data;
- struct bio *bio;
- unsigned int count;
- unsigned long times = 0;
- unsigned long total_count = 0;
-
- struct lu_env *env;
- int refcheck;
- int ret = 0;
-
- set_user_nice(current, MIN_NICE);
-
- lo->lo_state = LLOOP_BOUND;
-
- env = cl_env_get(&refcheck);
- if (IS_ERR(env)) {
- ret = PTR_ERR(env);
- goto out;
- }
-
- lo->lo_env = env;
- memset(&lo->lo_pvec, 0, sizeof(lo->lo_pvec));
- lo->lo_pvec.ldp_pages = lo->lo_requests[0].lrd_pages;
- lo->lo_pvec.ldp_offsets = lo->lo_requests[0].lrd_offsets;
-
- /*
- * up sem, we are running
- */
- up(&lo->lo_sem);
-
- for (;;) {
- wait_event(lo->lo_bh_wait, loop_active(lo));
- if (!atomic_read(&lo->lo_pending)) {
- int exiting = 0;
-
- spin_lock_irq(&lo->lo_lock);
- exiting = (lo->lo_state == LLOOP_RUNDOWN);
- spin_unlock_irq(&lo->lo_lock);
- if (exiting)
- break;
- }
-
- bio = NULL;
- count = loop_get_bio(lo, &bio);
- if (!count) {
- CWARN("lloop(minor: %d): missing bio\n", lo->lo_number);
- continue;
- }
-
- total_count += count;
- if (total_count < count) { /* overflow */
- total_count = count;
- times = 1;
- } else {
- times++;
- }
- if ((times & 127) == 0) {
- CDEBUG(D_INFO, "total: %lu, count: %lu, avg: %lu\n",
- total_count, times, total_count / times);
- }
-
- LASSERT(bio);
- LASSERT(count <= atomic_read(&lo->lo_pending));
- loop_handle_bio(lo, bio);
- atomic_sub(count, &lo->lo_pending);
- }
- cl_env_put(env, &refcheck);
-
-out:
- up(&lo->lo_sem);
- return ret;
-}
-
-static int loop_set_fd(struct lloop_device *lo, struct file *unused,
- struct block_device *bdev, struct file *file)
-{
- struct inode *inode;
- struct address_space *mapping;
- int lo_flags = 0;
- int error;
- loff_t size;
-
- if (!try_module_get(THIS_MODULE))
- return -ENODEV;
-
- error = -EBUSY;
- if (lo->lo_state != LLOOP_UNBOUND)
- goto out;
-
- mapping = file->f_mapping;
- inode = mapping->host;
-
- error = -EINVAL;
- if (!S_ISREG(inode->i_mode) || inode->i_sb->s_magic != LL_SUPER_MAGIC)
- goto out;
-
- if (!(file->f_mode & FMODE_WRITE))
- lo_flags |= LO_FLAGS_READ_ONLY;
-
- size = get_loop_size(lo, file);
-
- if ((loff_t)(sector_t)size != size) {
- error = -EFBIG;
- goto out;
- }
-
- /* remove all pages in cache so as dirty pages not to be existent. */
- truncate_inode_pages(mapping, 0);
-
- set_device_ro(bdev, (lo_flags & LO_FLAGS_READ_ONLY) != 0);
-
- lo->lo_blocksize = PAGE_SIZE;
- lo->lo_device = bdev;
- lo->lo_flags = lo_flags;
- lo->lo_backing_file = file;
- lo->lo_sizelimit = 0;
- lo->old_gfp_mask = mapping_gfp_mask(mapping);
- mapping_set_gfp_mask(mapping, lo->old_gfp_mask & ~(__GFP_IO|__GFP_FS));
-
- lo->lo_bio = lo->lo_biotail = NULL;
-
- /*
- * set queue make_request_fn, and add limits based on lower level
- * device
- */
- blk_queue_make_request(lo->lo_queue, loop_make_request);
- lo->lo_queue->queuedata = lo;
-
- /* queue parameters */
- CLASSERT(PAGE_SIZE < (1 << (sizeof(unsigned short) * 8)));
- blk_queue_logical_block_size(lo->lo_queue,
- (unsigned short)PAGE_SIZE);
- blk_queue_max_hw_sectors(lo->lo_queue,
- LLOOP_MAX_SEGMENTS << (PAGE_SHIFT - 9));
- blk_queue_max_segments(lo->lo_queue, LLOOP_MAX_SEGMENTS);
-
- set_capacity(disks[lo->lo_number], size);
- bd_set_size(bdev, size << 9);
-
- set_blocksize(bdev, lo->lo_blocksize);
-
- kthread_run(loop_thread, lo, "lloop%d", lo->lo_number);
- down(&lo->lo_sem);
- return 0;
-
-out:
- /* This is safe: open() is still holding a reference. */
- module_put(THIS_MODULE);
- return error;
-}
-
-static int loop_clr_fd(struct lloop_device *lo, struct block_device *bdev,
- int count)
-{
- struct file *filp = lo->lo_backing_file;
- gfp_t gfp = lo->old_gfp_mask;
-
- if (lo->lo_state != LLOOP_BOUND)
- return -ENXIO;
-
- if (lo->lo_refcnt > count) /* we needed one fd for the ioctl */
- return -EBUSY;
-
- if (!filp)
- return -EINVAL;
-
- spin_lock_irq(&lo->lo_lock);
- lo->lo_state = LLOOP_RUNDOWN;
- spin_unlock_irq(&lo->lo_lock);
- wake_up(&lo->lo_bh_wait);
-
- down(&lo->lo_sem);
- lo->lo_backing_file = NULL;
- lo->lo_device = NULL;
- lo->lo_offset = 0;
- lo->lo_sizelimit = 0;
- lo->lo_flags = 0;
- invalidate_bdev(bdev);
- set_capacity(disks[lo->lo_number], 0);
- bd_set_size(bdev, 0);
- mapping_set_gfp_mask(filp->f_mapping, gfp);
- lo->lo_state = LLOOP_UNBOUND;
- fput(filp);
- /* This is safe: open() is still holding a reference. */
- module_put(THIS_MODULE);
- return 0;
-}
-
-static int lo_open(struct block_device *bdev, fmode_t mode)
-{
- struct lloop_device *lo = bdev->bd_disk->private_data;
-
- mutex_lock(&lo->lo_ctl_mutex);
- lo->lo_refcnt++;
- mutex_unlock(&lo->lo_ctl_mutex);
-
- return 0;
-}
-
-static void lo_release(struct gendisk *disk, fmode_t mode)
-{
- struct lloop_device *lo = disk->private_data;
-
- mutex_lock(&lo->lo_ctl_mutex);
- --lo->lo_refcnt;
- mutex_unlock(&lo->lo_ctl_mutex);
-}
-
-/* lloop device node's ioctl function. */
-static int lo_ioctl(struct block_device *bdev, fmode_t mode,
- unsigned int cmd, unsigned long arg)
-{
- struct lloop_device *lo = bdev->bd_disk->private_data;
- struct inode *inode = NULL;
- int err = 0;
-
- mutex_lock(&lloop_mutex);
- switch (cmd) {
- case LL_IOC_LLOOP_DETACH: {
- err = loop_clr_fd(lo, bdev, 2);
- if (err == 0)
- blkdev_put(bdev, 0); /* grabbed in LLOOP_ATTACH */
- break;
- }
-
- case LL_IOC_LLOOP_INFO: {
- struct lu_fid fid;
-
- if (!lo->lo_backing_file) {
- err = -ENOENT;
- break;
- }
- if (!inode)
- inode = file_inode(lo->lo_backing_file);
- if (lo->lo_state == LLOOP_BOUND)
- fid = ll_i2info(inode)->lli_fid;
- else
- fid_zero(&fid);
-
- if (copy_to_user((void __user *)arg, &fid, sizeof(fid)))
- err = -EFAULT;
- break;
- }
-
- default:
- err = -EINVAL;
- break;
- }
- mutex_unlock(&lloop_mutex);
-
- return err;
-}
-
-static struct block_device_operations lo_fops = {
- .owner = THIS_MODULE,
- .open = lo_open,
- .release = lo_release,
- .ioctl = lo_ioctl,
-};
-
-/* dynamic iocontrol callback.
- * This callback is registered in lloop_init and will be called by
- * ll_iocontrol_call.
- *
- * This is a llite regular file ioctl function. It takes the responsibility
- * of attaching or detaching a file by a lloop's device number.
- */
-static enum llioc_iter lloop_ioctl(struct inode *unused, struct file *file,
- unsigned int cmd, unsigned long arg,
- void *magic, int *rcp)
-{
- struct lloop_device *lo = NULL;
- struct block_device *bdev = NULL;
- int err = 0;
- dev_t dev;
-
- if (magic != ll_iocontrol_magic)
- return LLIOC_CONT;
-
- if (!disks) {
- err = -ENODEV;
- goto out1;
- }
-
- CWARN("Enter llop_ioctl\n");
-
- mutex_lock(&lloop_mutex);
- switch (cmd) {
- case LL_IOC_LLOOP_ATTACH: {
- struct lloop_device *lo_free = NULL;
- int i;
-
- for (i = 0; i < max_loop; i++, lo = NULL) {
- lo = &loop_dev[i];
- if (lo->lo_state == LLOOP_UNBOUND) {
- if (!lo_free)
- lo_free = lo;
- continue;
- }
- if (file_inode(lo->lo_backing_file) == file_inode(file))
- break;
- }
- if (lo || !lo_free) {
- err = -EBUSY;
- goto out;
- }
-
- lo = lo_free;
- dev = MKDEV(lloop_major, lo->lo_number);
-
- /* quit if the used pointer is writable */
- if (put_user((long)old_encode_dev(dev), (long __user *)arg)) {
- err = -EFAULT;
- goto out;
- }
-
- bdev = blkdev_get_by_dev(dev, file->f_mode, NULL);
- if (IS_ERR(bdev)) {
- err = PTR_ERR(bdev);
- goto out;
- }
-
- get_file(file);
- err = loop_set_fd(lo, NULL, bdev, file);
- if (err) {
- fput(file);
- blkdev_put(bdev, 0);
- }
-
- break;
- }
-
- case LL_IOC_LLOOP_DETACH_BYDEV: {
- int minor;
-
- dev = old_decode_dev(arg);
- if (MAJOR(dev) != lloop_major) {
- err = -EINVAL;
- goto out;
- }
-
- minor = MINOR(dev);
- if (minor > max_loop - 1) {
- err = -EINVAL;
- goto out;
- }
-
- lo = &loop_dev[minor];
- if (lo->lo_state != LLOOP_BOUND) {
- err = -EINVAL;
- goto out;
- }
-
- bdev = lo->lo_device;
- err = loop_clr_fd(lo, bdev, 1);
- if (err == 0)
- blkdev_put(bdev, 0); /* grabbed in LLOOP_ATTACH */
-
- break;
- }
-
- default:
- err = -EINVAL;
- break;
- }
-
-out:
- mutex_unlock(&lloop_mutex);
-out1:
- if (rcp)
- *rcp = err;
- return LLIOC_STOP;
-}
-
-static int __init lloop_init(void)
-{
- int i;
- unsigned int cmdlist[] = {
- LL_IOC_LLOOP_ATTACH,
- LL_IOC_LLOOP_DETACH_BYDEV,
- };
-
- if (max_loop < 1 || max_loop > 256) {
- max_loop = MAX_LOOP_DEFAULT;
- CWARN("lloop: invalid max_loop (must be between 1 and 256), using default (%u)\n",
- max_loop);
- }
-
- lloop_major = register_blkdev(0, "lloop");
- if (lloop_major < 0)
- return -EIO;
-
- CDEBUG(D_CONFIG, "registered lloop major %d with %u minors\n",
- lloop_major, max_loop);
-
- ll_iocontrol_magic = ll_iocontrol_register(lloop_ioctl, 2, cmdlist);
- if (!ll_iocontrol_magic)
- goto out_mem1;
-
- loop_dev = kcalloc(max_loop, sizeof(*loop_dev), GFP_KERNEL);
- if (!loop_dev)
- goto out_mem1;
-
- disks = kcalloc(max_loop, sizeof(*disks), GFP_KERNEL);
- if (!disks)
- goto out_mem2;
-
- for (i = 0; i < max_loop; i++) {
- disks[i] = alloc_disk(1);
- if (!disks[i])
- goto out_mem3;
- }
-
- mutex_init(&lloop_mutex);
-
- for (i = 0; i < max_loop; i++) {
- struct lloop_device *lo = &loop_dev[i];
- struct gendisk *disk = disks[i];
-
- lo->lo_queue = blk_alloc_queue(GFP_KERNEL);
- if (!lo->lo_queue)
- goto out_mem4;
-
- mutex_init(&lo->lo_ctl_mutex);
- sema_init(&lo->lo_sem, 0);
- init_waitqueue_head(&lo->lo_bh_wait);
- lo->lo_number = i;
- spin_lock_init(&lo->lo_lock);
- disk->major = lloop_major;
- disk->first_minor = i;
- disk->fops = &lo_fops;
- sprintf(disk->disk_name, "lloop%d", i);
- disk->private_data = lo;
- disk->queue = lo->lo_queue;
- }
-
- /* We cannot fail after we call this, so another loop!*/
- for (i = 0; i < max_loop; i++)
- add_disk(disks[i]);
- return 0;
-
-out_mem4:
- while (i--)
- blk_cleanup_queue(loop_dev[i].lo_queue);
- i = max_loop;
-out_mem3:
- while (i--)
- put_disk(disks[i]);
- kfree(disks);
-out_mem2:
- kfree(loop_dev);
-out_mem1:
- unregister_blkdev(lloop_major, "lloop");
- ll_iocontrol_unregister(ll_iocontrol_magic);
- CERROR("lloop: ran out of memory\n");
- return -ENOMEM;
-}
-
-static void lloop_exit(void)
-{
- int i;
-
- ll_iocontrol_unregister(ll_iocontrol_magic);
- for (i = 0; i < max_loop; i++) {
- del_gendisk(disks[i]);
- blk_cleanup_queue(loop_dev[i].lo_queue);
- put_disk(disks[i]);
- }
-
- unregister_blkdev(lloop_major, "lloop");
-
- kfree(disks);
- kfree(loop_dev);
-}
-
-module_param(max_loop, int, 0444);
-MODULE_PARM_DESC(max_loop, "maximum of lloop_device");
-MODULE_AUTHOR("OpenSFS, Inc. <http://www.lustre.org/>");
-MODULE_DESCRIPTION("Lustre virtual block device");
-MODULE_VERSION(LUSTRE_VERSION_STRING);
-MODULE_LICENSE("GPL");
-
-module_init(lloop_init);
-module_exit(lloop_exit);
diff --git a/drivers/staging/lustre/lustre/llite/lproc_llite.c b/drivers/staging/lustre/lustre/llite/lproc_llite.c
index 27ab1261400e..e86bf3c53be3 100644
--- a/drivers/staging/lustre/lustre/llite/lproc_llite.c
+++ b/drivers/staging/lustre/lustre/llite/lproc_llite.c
@@ -15,11 +15,7 @@
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
*
* GPL HEADER END
*/
@@ -180,11 +176,7 @@ LUSTRE_RO_ATTR(filesfree);
static ssize_t client_type_show(struct kobject *kobj, struct attribute *attr,
char *buf)
{
- struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info,
- ll_kobj);
-
- return sprintf(buf, "%s client\n",
- sbi->ll_flags & LL_SBI_RMT_CLIENT ? "remote" : "local");
+ return sprintf(buf, "local client\n");
}
LUSTRE_RO_ATTR(client_type);
@@ -254,7 +246,6 @@ static ssize_t max_read_ahead_mb_store(struct kobject *kobj,
pages_number *= 1 << (20 - PAGE_SHIFT); /* MB -> pages */
if (pages_number > totalram_pages / 2) {
-
CERROR("can't set file readahead more than %lu MB\n",
totalram_pages >> (20 - PAGE_SHIFT + 1)); /*1/2 of RAM*/
return -ERANGE;
@@ -365,7 +356,7 @@ static int ll_max_cached_mb_seq_show(struct seq_file *m, void *v)
{
struct super_block *sb = m->private;
struct ll_sb_info *sbi = ll_s2sbi(sb);
- struct cl_client_cache *cache = &sbi->ll_cache;
+ struct cl_client_cache *cache = sbi->ll_cache;
int shift = 20 - PAGE_SHIFT;
int max_cached_mb;
int unused_mb;
@@ -392,7 +383,9 @@ static ssize_t ll_max_cached_mb_seq_write(struct file *file,
{
struct super_block *sb = ((struct seq_file *)file->private_data)->private;
struct ll_sb_info *sbi = ll_s2sbi(sb);
- struct cl_client_cache *cache = &sbi->ll_cache;
+ struct cl_client_cache *cache = sbi->ll_cache;
+ struct lu_env *env;
+ int refcheck;
int mult, rc, pages_number;
int diff = 0;
int nrpages = 0;
@@ -430,6 +423,10 @@ static ssize_t ll_max_cached_mb_seq_write(struct file *file,
goto out;
}
+ env = cl_env_get(&refcheck);
+ if (IS_ERR(env))
+ return 0;
+
diff = -diff;
while (diff > 0) {
int tmp;
@@ -455,19 +452,20 @@ static ssize_t ll_max_cached_mb_seq_write(struct file *file,
break;
if (!sbi->ll_dt_exp) { /* being initialized */
- rc = -ENODEV;
- break;
+ rc = 0;
+ goto out;
}
/* difficult - have to ask OSCs to drop LRU slots. */
tmp = diff << 1;
- rc = obd_set_info_async(NULL, sbi->ll_dt_exp,
+ rc = obd_set_info_async(env, sbi->ll_dt_exp,
sizeof(KEY_CACHE_LRU_SHRINK),
KEY_CACHE_LRU_SHRINK,
sizeof(tmp), &tmp, NULL);
if (rc < 0)
break;
}
+ cl_env_put(env, &refcheck);
out:
if (rc >= 0) {
@@ -818,6 +816,23 @@ static ssize_t xattr_cache_store(struct kobject *kobj,
}
LUSTRE_RW_ATTR(xattr_cache);
+static ssize_t unstable_stats_show(struct kobject *kobj,
+ struct attribute *attr,
+ char *buf)
+{
+ struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info,
+ ll_kobj);
+ struct cl_client_cache *cache = sbi->ll_cache;
+ int pages, mb;
+
+ pages = atomic_read(&cache->ccc_unstable_nr);
+ mb = (pages * PAGE_SIZE) >> 20;
+
+ return sprintf(buf, "unstable_pages: %8d\n"
+ "unstable_mb: %8d\n", pages, mb);
+}
+LUSTRE_RO_ATTR(unstable_stats);
+
static struct lprocfs_vars lprocfs_llite_obd_vars[] = {
/* { "mntpt_path", ll_rd_path, 0, 0 }, */
{ "site", &ll_site_stats_fops, NULL, 0 },
@@ -853,6 +868,7 @@ static struct attribute *llite_attrs[] = {
&lustre_attr_max_easize.attr,
&lustre_attr_default_easize.attr,
&lustre_attr_xattr_cache.attr,
+ &lustre_attr_unstable_stats.attr,
NULL,
};
@@ -953,6 +969,7 @@ static const char *ra_stat_string[] = {
[RA_STAT_EOF] = "read-ahead to EOF",
[RA_STAT_MAX_IN_FLIGHT] = "hit max r-a issue",
[RA_STAT_WRONG_GRAB_PAGE] = "wrong page from grab_cache_page",
+ [RA_STAT_FAILED_REACH_END] = "failed to reach end"
};
int ldebugfs_register_mountpoint(struct dentry *parent,
diff --git a/drivers/staging/lustre/lustre/llite/namei.c b/drivers/staging/lustre/lustre/llite/namei.c
index f8f98e4e8258..2c4dc69731e8 100644
--- a/drivers/staging/lustre/lustre/llite/namei.c
+++ b/drivers/staging/lustre/lustre/llite/namei.c
@@ -15,11 +15,7 @@
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
*
* GPL HEADER END
*/
@@ -128,12 +124,14 @@ struct inode *ll_iget(struct super_block *sb, ino_t hash,
if (rc != 0) {
iget_failed(inode);
inode = NULL;
- } else
+ } else {
unlock_new_inode(inode);
- } else if (!(inode->i_state & (I_FREEING | I_CLEAR)))
+ }
+ } else if (!(inode->i_state & (I_FREEING | I_CLEAR))) {
ll_update_inode(inode, md);
- CDEBUG(D_VFSTRACE, "got inode: %p for "DFID"\n",
- inode, PFID(&md->body->fid1));
+ CDEBUG(D_VFSTRACE, "got inode: "DFID"(%p)\n",
+ PFID(&md->body->fid1), inode);
+ }
}
return inode;
}
@@ -142,7 +140,7 @@ static void ll_invalidate_negative_children(struct inode *dir)
{
struct dentry *dentry, *tmp_subdir;
- ll_lock_dcache(dir);
+ spin_lock(&dir->i_lock);
hlist_for_each_entry(dentry, &dir->i_dentry, d_u.d_alias) {
spin_lock(&dentry->d_lock);
if (!list_empty(&dentry->d_subdirs)) {
@@ -157,7 +155,7 @@ static void ll_invalidate_negative_children(struct inode *dir)
}
spin_unlock(&dentry->d_lock);
}
- ll_unlock_dcache(dir);
+ spin_unlock(&dir->i_lock);
}
int ll_md_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc,
@@ -188,7 +186,7 @@ int ll_md_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc,
break;
/* Invalidate all dentries associated with this inode */
- LASSERT(lock->l_flags & LDLM_FL_CANCELING);
+ LASSERT(ldlm_is_canceling(lock));
if (!fid_res_name_eq(ll_inode2fid(inode),
&lock->l_resource->lr_name)) {
@@ -255,8 +253,8 @@ int ll_md_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc,
}
if ((bits & MDS_INODELOCK_UPDATE) && S_ISDIR(inode->i_mode)) {
- CDEBUG(D_INODE, "invalidating inode %lu\n",
- inode->i_ino);
+ CDEBUG(D_INODE, "invalidating inode "DFID"\n",
+ PFID(ll_inode2fid(inode)));
truncate_inode_pages(inode->i_mapping, 0);
ll_invalidate_negative_children(inode);
}
@@ -316,9 +314,10 @@ static struct dentry *ll_find_alias(struct inode *inode, struct dentry *dentry)
if (hlist_empty(&inode->i_dentry))
return NULL;
- discon_alias = invalid_alias = NULL;
+ discon_alias = NULL;
+ invalid_alias = NULL;
- ll_lock_dcache(inode);
+ spin_lock(&inode->i_lock);
hlist_for_each_entry(alias, &inode->i_dentry, d_u.d_alias) {
LASSERT(alias != dentry);
@@ -343,7 +342,7 @@ static struct dentry *ll_find_alias(struct inode *inode, struct dentry *dentry)
dget_dlock(alias);
spin_unlock(&alias->d_lock);
}
- ll_unlock_dcache(inode);
+ spin_unlock(&inode->i_lock);
return alias;
}
@@ -389,12 +388,13 @@ static int ll_lookup_it_finish(struct ptlrpc_request *request,
struct inode *inode = NULL;
__u64 bits = 0;
int rc = 0;
+ struct dentry *alias;
/* NB 1 request reference will be taken away by ll_intent_lock()
* when I return
*/
CDEBUG(D_DENTRY, "it %p it_disposition %x\n", it,
- it->d.lustre.it_disposition);
+ it->it_disposition);
if (!it_disposition(it, DISP_LOOKUP_NEG)) {
rc = ll_prep_inode(&inode, request, (*de)->d_sb, it);
if (rc)
@@ -413,26 +413,12 @@ static int ll_lookup_it_finish(struct ptlrpc_request *request,
*/
}
- /* Only hash *de if it is unhashed (new dentry).
- * Atoimc_open may passing hashed dentries for open.
- */
- if (d_unhashed(*de)) {
- struct dentry *alias;
-
- alias = ll_splice_alias(inode, *de);
- if (IS_ERR(alias)) {
- rc = PTR_ERR(alias);
- goto out;
- }
- *de = alias;
- } else if (!it_disposition(it, DISP_LOOKUP_NEG) &&
- !it_disposition(it, DISP_OPEN_CREATE)) {
- /* With DISP_OPEN_CREATE dentry will be
- * instantiated in ll_create_it.
- */
- LASSERT(!d_inode(*de));
- d_instantiate(*de, inode);
+ alias = ll_splice_alias(inode, *de);
+ if (IS_ERR(alias)) {
+ rc = PTR_ERR(alias);
+ goto out;
}
+ *de = alias;
if (!it_disposition(it, DISP_LOOKUP_NEG)) {
/* we have lookup look - unhide dentry */
@@ -446,7 +432,7 @@ static int ll_lookup_it_finish(struct ptlrpc_request *request,
/* Check that parent has UPDATE lock. */
struct lookup_intent parent_it = {
.it_op = IT_GETATTR,
- .d.lustre.it_lock_handle = 0 };
+ .it_lock_handle = 0 };
if (md_revalidate_lock(ll_i2mdexp(parent), &parent_it,
&ll_i2info(parent)->lli_fid, NULL)) {
@@ -476,9 +462,8 @@ static struct dentry *ll_lookup_it(struct inode *parent, struct dentry *dentry,
if (dentry->d_name.len > ll_i2sbi(parent)->ll_namelen)
return ERR_PTR(-ENAMETOOLONG);
- CDEBUG(D_VFSTRACE, "VFS Op:name=%pd,dir=%lu/%u(%p),intent=%s\n",
- dentry, parent->i_ino,
- parent->i_generation, parent, LL_IT2STR(it));
+ CDEBUG(D_VFSTRACE, "VFS Op:name=%pd, dir="DFID"(%p),intent=%s\n",
+ dentry, PFID(ll_inode2fid(parent)), parent, LL_IT2STR(it));
if (d_mountpoint(dentry))
CERROR("Tell Peter, lookup on mtpt, it %s\n", LL_IT2STR(it));
@@ -553,9 +538,8 @@ static struct dentry *ll_lookup_nd(struct inode *parent, struct dentry *dentry,
struct lookup_intent *itp, it = { .it_op = IT_GETATTR };
struct dentry *de;
- CDEBUG(D_VFSTRACE, "VFS Op:name=%pd,dir=%lu/%u(%p),flags=%u\n",
- dentry, parent->i_ino,
- parent->i_generation, parent, flags);
+ CDEBUG(D_VFSTRACE, "VFS Op:name=%pd, dir="DFID"(%p),flags=%u\n",
+ dentry, PFID(ll_inode2fid(parent)), parent, flags);
/* Optimize away (CREATE && !OPEN). Let .create handle the race. */
if ((flags & LOOKUP_CREATE) && !(flags & LOOKUP_OPEN))
@@ -586,10 +570,27 @@ static int ll_atomic_open(struct inode *dir, struct dentry *dentry,
long long lookup_flags = LOOKUP_OPEN;
int rc = 0;
- CDEBUG(D_VFSTRACE,
- "VFS Op:name=%pd,dir=%lu/%u(%p),file %p,open_flags %x,mode %x opened %d\n",
- dentry, dir->i_ino,
- dir->i_generation, dir, file, open_flags, mode, *opened);
+ CDEBUG(D_VFSTRACE, "VFS Op:name=%pd, dir="DFID"(%p),file %p,open_flags %x,mode %x opened %d\n",
+ dentry, PFID(ll_inode2fid(dir)), dir, file, open_flags, mode,
+ *opened);
+
+ /* Only negative dentries enter here */
+ LASSERT(!d_inode(dentry));
+
+ if (!d_in_lookup(dentry)) {
+ /* A valid negative dentry that just passed revalidation,
+ * there's little point to try and open it server-side,
+ * even though there's a minuscle chance it might succeed.
+ * Either way it's a valid race to just return -ENOENT here.
+ */
+ if (!(open_flags & O_CREAT))
+ return -ENOENT;
+
+ /* Otherwise we just unhash it to be rehashed afresh via
+ * lookup if necessary
+ */
+ d_drop(dentry);
+ }
it = kzalloc(sizeof(*it), GFP_NOFS);
if (!it)
@@ -626,13 +627,10 @@ static int ll_atomic_open(struct inode *dir, struct dentry *dentry,
if (d_really_is_positive(dentry) && it_disposition(it, DISP_OPEN_OPEN)) {
/* Open dentry. */
if (S_ISFIFO(d_inode(dentry)->i_mode)) {
- /* We cannot call open here as it would
- * deadlock.
+ /* We cannot call open here as it might
+ * deadlock. This case is unreachable in
+ * practice because of OBD_CONNECT_NODEVOH.
*/
- if (it_disposition(it, DISP_ENQ_OPEN_REF))
- ptlrpc_req_finished(
- (struct ptlrpc_request *)
- it->d.lustre.it_data);
rc = finish_no_open(file, de);
} else {
file->private_data = it;
@@ -663,10 +661,10 @@ static struct inode *ll_create_node(struct inode *dir, struct lookup_intent *it)
struct ll_sb_info *sbi = ll_i2sbi(dir);
int rc;
- LASSERT(it && it->d.lustre.it_disposition);
+ LASSERT(it && it->it_disposition);
LASSERT(it_disposition(it, DISP_ENQ_CREATE_REF));
- request = it->d.lustre.it_data;
+ request = it->it_request;
it_clear_disposition(it, DISP_ENQ_CREATE_REF);
rc = ll_prep_inode(&inode, request, dir->i_sb, it);
if (rc) {
@@ -680,8 +678,8 @@ static struct inode *ll_create_node(struct inode *dir, struct lookup_intent *it)
* lock on the inode. Since we finally have an inode pointer,
* stuff it in the lock.
*/
- CDEBUG(D_DLMTRACE, "setting l_ast_data to inode %p (%lu/%u)\n",
- inode, inode->i_ino, inode->i_generation);
+ CDEBUG(D_DLMTRACE, "setting l_ast_data to inode "DFID"(%p)\n",
+ PFID(ll_inode2fid(dir)), inode);
ll_set_lock_data(sbi->ll_md_exp, inode, it, NULL);
out:
ptlrpc_req_finished(request);
@@ -708,9 +706,8 @@ static int ll_create_it(struct inode *dir, struct dentry *dentry, int mode,
struct inode *inode;
int rc = 0;
- CDEBUG(D_VFSTRACE, "VFS Op:name=%pd,dir=%lu/%u(%p),intent=%s\n",
- dentry, dir->i_ino,
- dir->i_generation, dir, LL_IT2STR(it));
+ CDEBUG(D_VFSTRACE, "VFS Op:name=%pd, dir="DFID"(%p), intent=%s\n",
+ dentry, PFID(ll_inode2fid(dir)), dir, LL_IT2STR(it));
rc = it_open_error(DISP_OPEN_CREATE, it);
if (rc)
@@ -733,8 +730,9 @@ static void ll_update_times(struct ptlrpc_request *request,
LASSERT(body);
if (body->valid & OBD_MD_FLMTIME &&
body->mtime > LTIME_S(inode->i_mtime)) {
- CDEBUG(D_INODE, "setting ino %lu mtime from %lu to %llu\n",
- inode->i_ino, LTIME_S(inode->i_mtime), body->mtime);
+ CDEBUG(D_INODE, "setting fid "DFID" mtime from %lu to %llu\n",
+ PFID(ll_inode2fid(inode)), LTIME_S(inode->i_mtime),
+ body->mtime);
LTIME_S(inode->i_mtime) = body->mtime;
}
if (body->valid & OBD_MD_FLCTIME &&
@@ -791,9 +789,9 @@ static int ll_mknod(struct inode *dir, struct dentry *dchild,
{
int err;
- CDEBUG(D_VFSTRACE, "VFS Op:name=%pd,dir=%lu/%u(%p) mode %o dev %x\n",
- dchild, dir->i_ino, dir->i_generation, dir,
- mode, old_encode_dev(rdev));
+ CDEBUG(D_VFSTRACE, "VFS Op:name=%pd, dir="DFID"(%p) mode %o dev %x\n",
+ dchild, PFID(ll_inode2fid(dir)), dir, mode,
+ old_encode_dev(rdev));
if (!IS_POSIXACL(dir) || !exp_connect_umask(ll_i2mdexp(dir)))
mode &= ~current_umask();
@@ -831,9 +829,8 @@ static int ll_create_nd(struct inode *dir, struct dentry *dentry,
{
int rc;
- CDEBUG(D_VFSTRACE, "VFS Op:name=%pd,dir=%lu/%u(%p),flags=%u, excl=%d\n",
- dentry, dir->i_ino,
- dir->i_generation, dir, mode, want_excl);
+ CDEBUG(D_VFSTRACE, "VFS Op:name=%pd, dir="DFID"(%p), flags=%u, excl=%d\n",
+ dentry, PFID(ll_inode2fid(dir)), dir, mode, want_excl);
rc = ll_mknod(dir, dentry, mode, 0);
@@ -845,12 +842,6 @@ static int ll_create_nd(struct inode *dir, struct dentry *dentry,
return rc;
}
-static inline void ll_get_child_fid(struct dentry *child, struct lu_fid *fid)
-{
- if (d_really_is_positive(child))
- *fid = *ll_inode2fid(d_inode(child));
-}
-
int ll_objects_destroy(struct ptlrpc_request *request, struct inode *dir)
{
struct mdt_body *body;
@@ -927,23 +918,25 @@ out:
* is any lock existing. They will recycle dentries and inodes based upon locks
* too. b=20433
*/
-static int ll_unlink(struct inode *dir, struct dentry *dentry)
+static int ll_unlink(struct inode *dir, struct dentry *dchild)
{
struct ptlrpc_request *request = NULL;
struct md_op_data *op_data;
int rc;
CDEBUG(D_VFSTRACE, "VFS Op:name=%pd,dir=%lu/%u(%p)\n",
- dentry, dir->i_ino, dir->i_generation, dir);
+ dchild, dir->i_ino, dir->i_generation, dir);
op_data = ll_prep_md_op_data(NULL, dir, NULL,
- dentry->d_name.name,
- dentry->d_name.len,
+ dchild->d_name.name,
+ dchild->d_name.len,
0, LUSTRE_OPC_ANY, NULL);
if (IS_ERR(op_data))
return PTR_ERR(op_data);
- ll_get_child_fid(dentry, &op_data->op_fid3);
+ if (dchild && dchild->d_inode)
+ op_data->op_fid3 = *ll_inode2fid(dchild->d_inode);
+
op_data->op_fid2 = op_data->op_fid3;
rc = md_unlink(ll_i2sbi(dir)->ll_md_exp, op_data, &request);
ll_finish_md_op_data(op_data);
@@ -963,8 +956,8 @@ static int ll_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
{
int err;
- CDEBUG(D_VFSTRACE, "VFS Op:name=%pd,dir=%lu/%u(%p)\n",
- dentry, dir->i_ino, dir->i_generation, dir);
+ CDEBUG(D_VFSTRACE, "VFS Op:name=%pd, dir"DFID"(%p)\n",
+ dentry, PFID(ll_inode2fid(dir)), dir);
if (!IS_POSIXACL(dir) || !exp_connect_umask(ll_i2mdexp(dir)))
mode &= ~current_umask();
@@ -977,23 +970,25 @@ static int ll_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
return err;
}
-static int ll_rmdir(struct inode *dir, struct dentry *dentry)
+static int ll_rmdir(struct inode *dir, struct dentry *dchild)
{
struct ptlrpc_request *request = NULL;
struct md_op_data *op_data;
int rc;
- CDEBUG(D_VFSTRACE, "VFS Op:name=%pd,dir=%lu/%u(%p)\n",
- dentry, dir->i_ino, dir->i_generation, dir);
+ CDEBUG(D_VFSTRACE, "VFS Op:name=%pd, dir="DFID"(%p)\n",
+ dchild, PFID(ll_inode2fid(dir)), dir);
op_data = ll_prep_md_op_data(NULL, dir, NULL,
- dentry->d_name.name,
- dentry->d_name.len,
+ dchild->d_name.name,
+ dchild->d_name.len,
S_IFDIR, LUSTRE_OPC_ANY, NULL);
if (IS_ERR(op_data))
return PTR_ERR(op_data);
- ll_get_child_fid(dentry, &op_data->op_fid3);
+ if (dchild && dchild->d_inode)
+ op_data->op_fid3 = *ll_inode2fid(dchild->d_inode);
+
op_data->op_fid2 = op_data->op_fid3;
rc = md_unlink(ll_i2sbi(dir)->ll_md_exp, op_data, &request);
ll_finish_md_op_data(op_data);
@@ -1011,9 +1006,8 @@ static int ll_symlink(struct inode *dir, struct dentry *dentry,
{
int err;
- CDEBUG(D_VFSTRACE, "VFS Op:name=%pd,dir=%lu/%u(%p),target=%.*s\n",
- dentry, dir->i_ino, dir->i_generation,
- dir, 3000, oldname);
+ CDEBUG(D_VFSTRACE, "VFS Op:name=%pd, dir="DFID"(%p),target=%.*s\n",
+ dentry, PFID(ll_inode2fid(dir)), dir, 3000, oldname);
err = ll_new_node(dir, dentry, oldname, S_IFLNK | S_IRWXUGO,
0, LUSTRE_OPC_SYMLINK);
@@ -1033,10 +1027,9 @@ static int ll_link(struct dentry *old_dentry, struct inode *dir,
struct md_op_data *op_data;
int err;
- CDEBUG(D_VFSTRACE,
- "VFS Op: inode=%lu/%u(%p), dir=%lu/%u(%p), target=%pd\n",
- src->i_ino, src->i_generation, src, dir->i_ino,
- dir->i_generation, dir, new_dentry);
+ CDEBUG(D_VFSTRACE, "VFS Op: inode="DFID"(%p), dir="DFID"(%p), target=%pd\n",
+ PFID(ll_inode2fid(src)), src, PFID(ll_inode2fid(dir)), dir,
+ new_dentry);
op_data = ll_prep_md_op_data(NULL, src, dir, new_dentry->d_name.name,
new_dentry->d_name.len,
@@ -1056,42 +1049,45 @@ out:
return err;
}
-static int ll_rename(struct inode *old_dir, struct dentry *old_dentry,
- struct inode *new_dir, struct dentry *new_dentry)
+static int ll_rename(struct inode *src, struct dentry *src_dchild,
+ struct inode *tgt, struct dentry *tgt_dchild)
{
struct ptlrpc_request *request = NULL;
- struct ll_sb_info *sbi = ll_i2sbi(old_dir);
+ struct ll_sb_info *sbi = ll_i2sbi(src);
struct md_op_data *op_data;
int err;
CDEBUG(D_VFSTRACE,
- "VFS Op:oldname=%pd,src_dir=%lu/%u(%p),newname=%pd,tgt_dir=%lu/%u(%p)\n",
- old_dentry, old_dir->i_ino, old_dir->i_generation, old_dir,
- new_dentry, new_dir->i_ino, new_dir->i_generation, new_dir);
+ "VFS Op:oldname=%pd, src_dir="DFID"(%p), newname=%pd, tgt_dir="DFID"(%p)\n",
+ src_dchild, PFID(ll_inode2fid(src)), src,
+ tgt_dchild, PFID(ll_inode2fid(tgt)), tgt);
- op_data = ll_prep_md_op_data(NULL, old_dir, new_dir, NULL, 0, 0,
+ op_data = ll_prep_md_op_data(NULL, src, tgt, NULL, 0, 0,
LUSTRE_OPC_ANY, NULL);
if (IS_ERR(op_data))
return PTR_ERR(op_data);
- ll_get_child_fid(old_dentry, &op_data->op_fid3);
- ll_get_child_fid(new_dentry, &op_data->op_fid4);
+ if (src_dchild && src_dchild->d_inode)
+ op_data->op_fid3 = *ll_inode2fid(src_dchild->d_inode);
+ if (tgt_dchild && tgt_dchild->d_inode)
+ op_data->op_fid4 = *ll_inode2fid(tgt_dchild->d_inode);
+
err = md_rename(sbi->ll_md_exp, op_data,
- old_dentry->d_name.name,
- old_dentry->d_name.len,
- new_dentry->d_name.name,
- new_dentry->d_name.len, &request);
+ src_dchild->d_name.name,
+ src_dchild->d_name.len,
+ tgt_dchild->d_name.name,
+ tgt_dchild->d_name.len, &request);
ll_finish_md_op_data(op_data);
if (!err) {
- ll_update_times(request, old_dir);
- ll_update_times(request, new_dir);
+ ll_update_times(request, src);
+ ll_update_times(request, tgt);
ll_stats_ops_tally(sbi, LPROC_LL_RENAME, 1);
- err = ll_objects_destroy(request, old_dir);
+ err = ll_objects_destroy(request, src);
}
ptlrpc_req_finished(request);
if (!err)
- d_move(old_dentry, new_dentry);
+ d_move(src_dchild, tgt_dchild);
return err;
}
diff --git a/drivers/staging/lustre/lustre/llite/remote_perm.c b/drivers/staging/lustre/lustre/llite/remote_perm.c
deleted file mode 100644
index e9d25317cd28..000000000000
--- a/drivers/staging/lustre/lustre/llite/remote_perm.c
+++ /dev/null
@@ -1,324 +0,0 @@
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2012, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * lustre/llite/remote_perm.c
- *
- * Lustre Permission Cache for Remote Client
- *
- * Author: Lai Siyao <lsy@clusterfs.com>
- * Author: Fan Yong <fanyong@clusterfs.com>
- */
-
-#define DEBUG_SUBSYSTEM S_LLITE
-
-#include <linux/module.h>
-#include <linux/types.h>
-
-#include "../include/lustre_lite.h"
-#include "../include/lustre_ha.h"
-#include "../include/lustre_dlm.h"
-#include "../include/lprocfs_status.h"
-#include "../include/lustre_disk.h"
-#include "../include/lustre_param.h"
-#include "llite_internal.h"
-
-struct kmem_cache *ll_remote_perm_cachep;
-struct kmem_cache *ll_rmtperm_hash_cachep;
-
-static inline struct ll_remote_perm *alloc_ll_remote_perm(void)
-{
- struct ll_remote_perm *lrp;
-
- lrp = kmem_cache_zalloc(ll_remote_perm_cachep, GFP_KERNEL);
- if (lrp)
- INIT_HLIST_NODE(&lrp->lrp_list);
- return lrp;
-}
-
-static inline void free_ll_remote_perm(struct ll_remote_perm *lrp)
-{
- if (!lrp)
- return;
-
- if (!hlist_unhashed(&lrp->lrp_list))
- hlist_del(&lrp->lrp_list);
- kmem_cache_free(ll_remote_perm_cachep, lrp);
-}
-
-static struct hlist_head *alloc_rmtperm_hash(void)
-{
- struct hlist_head *hash;
- int i;
-
- hash = kmem_cache_zalloc(ll_rmtperm_hash_cachep, GFP_NOFS);
- if (!hash)
- return NULL;
-
- for (i = 0; i < REMOTE_PERM_HASHSIZE; i++)
- INIT_HLIST_HEAD(hash + i);
-
- return hash;
-}
-
-void free_rmtperm_hash(struct hlist_head *hash)
-{
- int i;
- struct ll_remote_perm *lrp;
- struct hlist_node *next;
-
- if (!hash)
- return;
-
- for (i = 0; i < REMOTE_PERM_HASHSIZE; i++)
- hlist_for_each_entry_safe(lrp, next, hash + i, lrp_list)
- free_ll_remote_perm(lrp);
- kmem_cache_free(ll_rmtperm_hash_cachep, hash);
-}
-
-static inline int remote_perm_hashfunc(uid_t uid)
-{
- return uid & (REMOTE_PERM_HASHSIZE - 1);
-}
-
-/* NB: setxid permission is not checked here, instead it's done on
- * MDT when client get remote permission.
- */
-static int do_check_remote_perm(struct ll_inode_info *lli, int mask)
-{
- struct hlist_head *head;
- struct ll_remote_perm *lrp;
- int found = 0, rc;
-
- if (!lli->lli_remote_perms)
- return -ENOENT;
-
- head = lli->lli_remote_perms +
- remote_perm_hashfunc(from_kuid(&init_user_ns, current_uid()));
-
- spin_lock(&lli->lli_lock);
- hlist_for_each_entry(lrp, head, lrp_list) {
- if (lrp->lrp_uid != from_kuid(&init_user_ns, current_uid()))
- continue;
- if (lrp->lrp_gid != from_kgid(&init_user_ns, current_gid()))
- continue;
- if (lrp->lrp_fsuid != from_kuid(&init_user_ns, current_fsuid()))
- continue;
- if (lrp->lrp_fsgid != from_kgid(&init_user_ns, current_fsgid()))
- continue;
- found = 1;
- break;
- }
-
- if (!found) {
- rc = -ENOENT;
- goto out;
- }
-
- CDEBUG(D_SEC, "found remote perm: %u/%u/%u/%u - %#x\n",
- lrp->lrp_uid, lrp->lrp_gid, lrp->lrp_fsuid, lrp->lrp_fsgid,
- lrp->lrp_access_perm);
- rc = ((lrp->lrp_access_perm & mask) == mask) ? 0 : -EACCES;
-
-out:
- spin_unlock(&lli->lli_lock);
- return rc;
-}
-
-int ll_update_remote_perm(struct inode *inode, struct mdt_remote_perm *perm)
-{
- struct ll_inode_info *lli = ll_i2info(inode);
- struct ll_remote_perm *lrp = NULL, *tmp = NULL;
- struct hlist_head *head, *perm_hash = NULL;
-
- LASSERT(ll_i2sbi(inode)->ll_flags & LL_SBI_RMT_CLIENT);
-
-#if 0
- if (perm->rp_uid != current->uid ||
- perm->rp_gid != current->gid ||
- perm->rp_fsuid != current->fsuid ||
- perm->rp_fsgid != current->fsgid) {
- /* user might setxid in this small period */
- CDEBUG(D_SEC,
- "remote perm user %u/%u/%u/%u != current %u/%u/%u/%u\n",
- perm->rp_uid, perm->rp_gid, perm->rp_fsuid,
- perm->rp_fsgid, current->uid, current->gid,
- current->fsuid, current->fsgid);
- return -EAGAIN;
- }
-#endif
-
- if (!lli->lli_remote_perms) {
- perm_hash = alloc_rmtperm_hash();
- if (!perm_hash) {
- CERROR("alloc lli_remote_perms failed!\n");
- return -ENOMEM;
- }
- }
-
- spin_lock(&lli->lli_lock);
-
- if (!lli->lli_remote_perms)
- lli->lli_remote_perms = perm_hash;
- else
- free_rmtperm_hash(perm_hash);
-
- head = lli->lli_remote_perms + remote_perm_hashfunc(perm->rp_uid);
-
-again:
- hlist_for_each_entry(tmp, head, lrp_list) {
- if (tmp->lrp_uid != perm->rp_uid)
- continue;
- if (tmp->lrp_gid != perm->rp_gid)
- continue;
- if (tmp->lrp_fsuid != perm->rp_fsuid)
- continue;
- if (tmp->lrp_fsgid != perm->rp_fsgid)
- continue;
- free_ll_remote_perm(lrp);
- lrp = tmp;
- break;
- }
-
- if (!lrp) {
- spin_unlock(&lli->lli_lock);
- lrp = alloc_ll_remote_perm();
- if (!lrp) {
- CERROR("alloc memory for ll_remote_perm failed!\n");
- return -ENOMEM;
- }
- spin_lock(&lli->lli_lock);
- goto again;
- }
-
- lrp->lrp_access_perm = perm->rp_access_perm;
- if (lrp != tmp) {
- lrp->lrp_uid = perm->rp_uid;
- lrp->lrp_gid = perm->rp_gid;
- lrp->lrp_fsuid = perm->rp_fsuid;
- lrp->lrp_fsgid = perm->rp_fsgid;
- hlist_add_head(&lrp->lrp_list, head);
- }
- lli->lli_rmtperm_time = cfs_time_current();
- spin_unlock(&lli->lli_lock);
-
- CDEBUG(D_SEC, "new remote perm@%p: %u/%u/%u/%u - %#x\n",
- lrp, lrp->lrp_uid, lrp->lrp_gid, lrp->lrp_fsuid, lrp->lrp_fsgid,
- lrp->lrp_access_perm);
-
- return 0;
-}
-
-int lustre_check_remote_perm(struct inode *inode, int mask)
-{
- struct ll_inode_info *lli = ll_i2info(inode);
- struct ll_sb_info *sbi = ll_i2sbi(inode);
- struct ptlrpc_request *req = NULL;
- struct mdt_remote_perm *perm;
- unsigned long save;
- int i = 0, rc;
-
- do {
- save = lli->lli_rmtperm_time;
- rc = do_check_remote_perm(lli, mask);
- if (!rc || (rc != -ENOENT && i))
- break;
-
- might_sleep();
-
- mutex_lock(&lli->lli_rmtperm_mutex);
- /* check again */
- if (save != lli->lli_rmtperm_time) {
- rc = do_check_remote_perm(lli, mask);
- if (!rc || (rc != -ENOENT && i)) {
- mutex_unlock(&lli->lli_rmtperm_mutex);
- break;
- }
- }
-
- if (i++ > 5) {
- CERROR("check remote perm falls in dead loop!\n");
- LBUG();
- }
-
- rc = md_get_remote_perm(sbi->ll_md_exp, ll_inode2fid(inode),
- ll_i2suppgid(inode), &req);
- if (rc) {
- mutex_unlock(&lli->lli_rmtperm_mutex);
- break;
- }
-
- perm = req_capsule_server_swab_get(&req->rq_pill, &RMF_ACL,
- lustre_swab_mdt_remote_perm);
- if (unlikely(!perm)) {
- mutex_unlock(&lli->lli_rmtperm_mutex);
- rc = -EPROTO;
- break;
- }
-
- rc = ll_update_remote_perm(inode, perm);
- mutex_unlock(&lli->lli_rmtperm_mutex);
- if (rc == -ENOMEM)
- break;
-
- ptlrpc_req_finished(req);
- req = NULL;
- } while (1);
- ptlrpc_req_finished(req);
- return rc;
-}
-
-#if 0 /* NB: remote perms can't be freed in ll_mdc_blocking_ast of UPDATE lock,
- * because it will fail sanity test 48.
- */
-void ll_free_remote_perms(struct inode *inode)
-{
- struct ll_inode_info *lli = ll_i2info(inode);
- struct hlist_head *hash = lli->lli_remote_perms;
- struct ll_remote_perm *lrp;
- struct hlist_node *node, *next;
- int i;
-
- LASSERT(hash);
-
- spin_lock(&lli->lli_lock);
-
- for (i = 0; i < REMOTE_PERM_HASHSIZE; i++) {
- hlist_for_each_entry_safe(lrp, node, next, hash + i, lrp_list)
- free_ll_remote_perm(lrp);
- }
-
- spin_unlock(&lli->lli_lock);
-}
-#endif
diff --git a/drivers/staging/lustre/lustre/llite/rw.c b/drivers/staging/lustre/lustre/llite/rw.c
index edab6c5b7e50..87393c4bd51e 100644
--- a/drivers/staging/lustre/lustre/llite/rw.c
+++ b/drivers/staging/lustre/lustre/llite/rw.c
@@ -15,11 +15,7 @@
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
*
* GPL HEADER END
*/
@@ -59,225 +55,6 @@
#include "llite_internal.h"
#include "../include/linux/lustre_compat25.h"
-/**
- * Finalizes cl-data before exiting typical address_space operation. Dual to
- * ll_cl_init().
- */
-static void ll_cl_fini(struct ll_cl_context *lcc)
-{
- struct lu_env *env = lcc->lcc_env;
- struct cl_io *io = lcc->lcc_io;
- struct cl_page *page = lcc->lcc_page;
-
- LASSERT(lcc->lcc_cookie == current);
- LASSERT(env);
-
- if (page) {
- lu_ref_del(&page->cp_reference, "cl_io", io);
- cl_page_put(env, page);
- }
-
- cl_env_put(env, &lcc->lcc_refcheck);
-}
-
-/**
- * Initializes common cl-data at the typical address_space operation entry
- * point.
- */
-static struct ll_cl_context *ll_cl_init(struct file *file,
- struct page *vmpage, int create)
-{
- struct ll_cl_context *lcc;
- struct lu_env *env;
- struct cl_io *io;
- struct cl_object *clob;
- struct ccc_io *cio;
-
- int refcheck;
- int result = 0;
-
- clob = ll_i2info(vmpage->mapping->host)->lli_clob;
- LASSERT(clob);
-
- env = cl_env_get(&refcheck);
- if (IS_ERR(env))
- return ERR_CAST(env);
-
- lcc = &vvp_env_info(env)->vti_io_ctx;
- memset(lcc, 0, sizeof(*lcc));
- lcc->lcc_env = env;
- lcc->lcc_refcheck = refcheck;
- lcc->lcc_cookie = current;
-
- cio = ccc_env_io(env);
- io = cio->cui_cl.cis_io;
- if (!io && create) {
- struct inode *inode = vmpage->mapping->host;
- loff_t pos;
-
- if (inode_trylock(inode)) {
- inode_unlock((inode));
-
- /* this is too bad. Someone is trying to write the
- * page w/o holding inode mutex. This means we can
- * add dirty pages into cache during truncate
- */
- CERROR("Proc %s is dirtying page w/o inode lock, this will break truncate\n",
- current->comm);
- dump_stack();
- LBUG();
- return ERR_PTR(-EIO);
- }
-
- /*
- * Loop-back driver calls ->prepare_write().
- * methods directly, bypassing file system ->write() operation,
- * so cl_io has to be created here.
- */
- io = ccc_env_thread_io(env);
- ll_io_init(io, file, 1);
-
- /* No lock at all for this kind of IO - we can't do it because
- * we have held page lock, it would cause deadlock.
- * XXX: This causes poor performance to loop device - One page
- * per RPC.
- * In order to get better performance, users should use
- * lloop driver instead.
- */
- io->ci_lockreq = CILR_NEVER;
-
- pos = vmpage->index << PAGE_SHIFT;
-
- /* Create a temp IO to serve write. */
- result = cl_io_rw_init(env, io, CIT_WRITE, pos, PAGE_SIZE);
- if (result == 0) {
- cio->cui_fd = LUSTRE_FPRIVATE(file);
- cio->cui_iter = NULL;
- result = cl_io_iter_init(env, io);
- if (result == 0) {
- result = cl_io_lock(env, io);
- if (result == 0)
- result = cl_io_start(env, io);
- }
- } else
- result = io->ci_result;
- }
-
- lcc->lcc_io = io;
- if (!io)
- result = -EIO;
- if (result == 0) {
- struct cl_page *page;
-
- LASSERT(io->ci_state == CIS_IO_GOING);
- LASSERT(cio->cui_fd == LUSTRE_FPRIVATE(file));
- page = cl_page_find(env, clob, vmpage->index, vmpage,
- CPT_CACHEABLE);
- if (!IS_ERR(page)) {
- lcc->lcc_page = page;
- lu_ref_add(&page->cp_reference, "cl_io", io);
- result = 0;
- } else
- result = PTR_ERR(page);
- }
- if (result) {
- ll_cl_fini(lcc);
- lcc = ERR_PTR(result);
- }
-
- CDEBUG(D_VFSTRACE, "%lu@"DFID" -> %d %p %p\n",
- vmpage->index, PFID(lu_object_fid(&clob->co_lu)), result,
- env, io);
- return lcc;
-}
-
-static struct ll_cl_context *ll_cl_get(void)
-{
- struct ll_cl_context *lcc;
- struct lu_env *env;
- int refcheck;
-
- env = cl_env_get(&refcheck);
- LASSERT(!IS_ERR(env));
- lcc = &vvp_env_info(env)->vti_io_ctx;
- LASSERT(env == lcc->lcc_env);
- LASSERT(current == lcc->lcc_cookie);
- cl_env_put(env, &refcheck);
-
- /* env has got in ll_cl_init, so it is still usable. */
- return lcc;
-}
-
-/**
- * ->prepare_write() address space operation called by generic_file_write()
- * for every page during write.
- */
-int ll_prepare_write(struct file *file, struct page *vmpage, unsigned from,
- unsigned to)
-{
- struct ll_cl_context *lcc;
- int result;
-
- lcc = ll_cl_init(file, vmpage, 1);
- if (!IS_ERR(lcc)) {
- struct lu_env *env = lcc->lcc_env;
- struct cl_io *io = lcc->lcc_io;
- struct cl_page *page = lcc->lcc_page;
-
- cl_page_assume(env, io, page);
-
- result = cl_io_prepare_write(env, io, page, from, to);
- if (result == 0) {
- /*
- * Add a reference, so that page is not evicted from
- * the cache until ->commit_write() is called.
- */
- cl_page_get(page);
- lu_ref_add(&page->cp_reference, "prepare_write",
- current);
- } else {
- cl_page_unassume(env, io, page);
- ll_cl_fini(lcc);
- }
- /* returning 0 in prepare assumes commit must be called
- * afterwards
- */
- } else {
- result = PTR_ERR(lcc);
- }
- return result;
-}
-
-int ll_commit_write(struct file *file, struct page *vmpage, unsigned from,
- unsigned to)
-{
- struct ll_cl_context *lcc;
- struct lu_env *env;
- struct cl_io *io;
- struct cl_page *page;
- int result = 0;
-
- lcc = ll_cl_get();
- env = lcc->lcc_env;
- page = lcc->lcc_page;
- io = lcc->lcc_io;
-
- LASSERT(cl_page_is_owned(page, io));
- LASSERT(from <= to);
- if (from != to) /* handle short write case. */
- result = cl_io_commit_write(env, io, page, from, to);
- if (cl_page_is_owned(page, io))
- cl_page_unassume(env, io, page);
-
- /*
- * Release reference acquired by ll_prepare_write().
- */
- lu_ref_del(&page->cp_reference, "prepare_write", current);
- cl_page_put(env, page);
- ll_cl_fini(lcc);
- return result;
-}
-
static void ll_ra_stats_inc_sbi(struct ll_sb_info *sbi, enum ra_stat which);
/**
@@ -301,7 +78,7 @@ static void ll_ra_stats_inc_sbi(struct ll_sb_info *sbi, enum ra_stat which);
*/
static unsigned long ll_ra_count_get(struct ll_sb_info *sbi,
struct ra_io_arg *ria,
- unsigned long pages)
+ unsigned long pages, unsigned long min)
{
struct ll_ra_info *ra = &sbi->ll_ra_info;
long ret;
@@ -341,6 +118,11 @@ static unsigned long ll_ra_count_get(struct ll_sb_info *sbi,
}
out:
+ if (ret < min) {
+ /* override ra limit for maximum performance */
+ atomic_add(min - ret, &ra->ra_cur_pages);
+ ret = min;
+ }
return ret;
}
@@ -357,9 +139,9 @@ static void ll_ra_stats_inc_sbi(struct ll_sb_info *sbi, enum ra_stat which)
lprocfs_counter_incr(sbi->ll_ra_stats, which);
}
-void ll_ra_stats_inc(struct address_space *mapping, enum ra_stat which)
+void ll_ra_stats_inc(struct inode *inode, enum ra_stat which)
{
- struct ll_sb_info *sbi = ll_i2sbi(mapping->host);
+ struct ll_sb_info *sbi = ll_i2sbi(inode);
ll_ra_stats_inc_sbi(sbi, which);
}
@@ -388,61 +170,42 @@ static int index_in_window(unsigned long index, unsigned long point,
return start <= index && index <= end;
}
-static struct ll_readahead_state *ll_ras_get(struct file *f)
+void ll_ras_enter(struct file *f)
{
- struct ll_file_data *fd;
-
- fd = LUSTRE_FPRIVATE(f);
- return &fd->fd_ras;
-}
-
-void ll_ra_read_in(struct file *f, struct ll_ra_read *rar)
-{
- struct ll_readahead_state *ras;
-
- ras = ll_ras_get(f);
+ struct ll_file_data *fd = LUSTRE_FPRIVATE(f);
+ struct ll_readahead_state *ras = &fd->fd_ras;
spin_lock(&ras->ras_lock);
ras->ras_requests++;
ras->ras_request_index = 0;
ras->ras_consecutive_requests++;
- rar->lrr_reader = current;
-
- list_add(&rar->lrr_linkage, &ras->ras_read_beads);
- spin_unlock(&ras->ras_lock);
-}
-
-void ll_ra_read_ex(struct file *f, struct ll_ra_read *rar)
-{
- struct ll_readahead_state *ras;
-
- ras = ll_ras_get(f);
-
- spin_lock(&ras->ras_lock);
- list_del_init(&rar->lrr_linkage);
spin_unlock(&ras->ras_lock);
}
static int cl_read_ahead_page(const struct lu_env *env, struct cl_io *io,
struct cl_page_list *queue, struct cl_page *page,
- struct page *vmpage)
+ struct cl_object *clob, pgoff_t *max_index)
{
- struct ccc_page *cp;
+ struct page *vmpage = page->cp_vmpage;
+ struct vvp_page *vpg;
int rc;
rc = 0;
cl_page_assume(env, io, page);
lu_ref_add(&page->cp_reference, "ra", current);
- cp = cl2ccc_page(cl_page_at(page, &vvp_device_type));
- if (!cp->cpg_defer_uptodate && !PageUptodate(vmpage)) {
- rc = cl_page_is_under_lock(env, io, page);
- if (rc == -EBUSY) {
- cp->cpg_defer_uptodate = 1;
- cp->cpg_ra_used = 0;
+ vpg = cl2vvp_page(cl_object_page_slice(clob, page));
+ if (!vpg->vpg_defer_uptodate && !PageUptodate(vmpage)) {
+ CDEBUG(D_READA, "page index %lu, max_index: %lu\n",
+ vvp_index(vpg), *max_index);
+ if (*max_index == 0 || vvp_index(vpg) > *max_index)
+ rc = cl_page_is_under_lock(env, io, page, max_index);
+ if (rc == 0) {
+ vpg->vpg_defer_uptodate = 1;
+ vpg->vpg_ra_used = 0;
cl_page_list_add(queue, page);
rc = 1;
} else {
- cl_page_delete(env, page);
+ cl_page_discard(env, io, page);
rc = -ENOLCK;
}
} else {
@@ -466,24 +229,25 @@ static int cl_read_ahead_page(const struct lu_env *env, struct cl_io *io,
*/
static int ll_read_ahead_page(const struct lu_env *env, struct cl_io *io,
struct cl_page_list *queue,
- pgoff_t index, struct address_space *mapping)
+ pgoff_t index, pgoff_t *max_index)
{
+ struct cl_object *clob = io->ci_obj;
+ struct inode *inode = vvp_object_inode(clob);
struct page *vmpage;
- struct cl_object *clob = ll_i2info(mapping->host)->lli_clob;
struct cl_page *page;
enum ra_stat which = _NR_RA_STAT; /* keep gcc happy */
int rc = 0;
const char *msg = NULL;
- vmpage = grab_cache_page_nowait(mapping, index);
+ vmpage = grab_cache_page_nowait(inode->i_mapping, index);
if (vmpage) {
/* Check if vmpage was truncated or reclaimed */
- if (vmpage->mapping == mapping) {
+ if (vmpage->mapping == inode->i_mapping) {
page = cl_page_find(env, clob, vmpage->index,
vmpage, CPT_CACHEABLE);
if (!IS_ERR(page)) {
rc = cl_read_ahead_page(env, io, queue,
- page, vmpage);
+ page, clob, max_index);
if (rc == -ENOLCK) {
which = RA_STAT_FAILED_MATCH;
msg = "lock match failed";
@@ -504,7 +268,7 @@ static int ll_read_ahead_page(const struct lu_env *env, struct cl_io *io,
msg = "g_c_p_n failed";
}
if (msg) {
- ll_ra_stats_inc(mapping, which);
+ ll_ra_stats_inc(inode, which);
CDEBUG(D_READA, "%s\n", msg);
}
return rc;
@@ -616,11 +380,12 @@ static int ll_read_ahead_pages(const struct lu_env *env,
struct cl_io *io, struct cl_page_list *queue,
struct ra_io_arg *ria,
unsigned long *reserved_pages,
- struct address_space *mapping,
unsigned long *ra_end)
{
- int rc, count = 0, stride_ria;
- unsigned long page_idx;
+ int rc, count = 0;
+ bool stride_ria;
+ pgoff_t page_idx;
+ pgoff_t max_index = 0;
LASSERT(ria);
RIA_DEBUG(ria);
@@ -631,12 +396,13 @@ static int ll_read_ahead_pages(const struct lu_env *env,
if (ras_inside_ra_window(page_idx, ria)) {
/* If the page is inside the read-ahead window*/
rc = ll_read_ahead_page(env, io, queue,
- page_idx, mapping);
+ page_idx, &max_index);
if (rc == 1) {
(*reserved_pages)--;
count++;
- } else if (rc == -ENOLCK)
+ } else if (rc == -ENOLCK) {
break;
+ }
} else if (stride_ria) {
/* If it is not in the read-ahead window, and it is
* read-ahead mode, then check whether it should skip
@@ -666,25 +432,22 @@ static int ll_read_ahead_pages(const struct lu_env *env,
}
int ll_readahead(const struct lu_env *env, struct cl_io *io,
- struct ll_readahead_state *ras, struct address_space *mapping,
- struct cl_page_list *queue, int flags)
+ struct cl_page_list *queue, struct ll_readahead_state *ras,
+ bool hit)
{
struct vvp_io *vio = vvp_env_io(env);
- struct vvp_thread_info *vti = vvp_env_info(env);
- struct cl_attr *attr = ccc_env_thread_attr(env);
+ struct ll_thread_info *lti = ll_env_info(env);
+ struct cl_attr *attr = vvp_env_thread_attr(env);
unsigned long start = 0, end = 0, reserved;
- unsigned long ra_end, len;
+ unsigned long ra_end, len, mlen = 0;
struct inode *inode;
- struct ll_ra_read *bead;
- struct ra_io_arg *ria = &vti->vti_ria;
- struct ll_inode_info *lli;
+ struct ra_io_arg *ria = &lti->lti_ria;
struct cl_object *clob;
int ret = 0;
__u64 kms;
- inode = mapping->host;
- lli = ll_i2info(inode);
- clob = lli->lli_clob;
+ clob = io->ci_obj;
+ inode = vvp_object_inode(clob);
memset(ria, 0, sizeof(*ria));
@@ -696,22 +459,20 @@ int ll_readahead(const struct lu_env *env, struct cl_io *io,
return ret;
kms = attr->cat_kms;
if (kms == 0) {
- ll_ra_stats_inc(mapping, RA_STAT_ZERO_LEN);
+ ll_ra_stats_inc(inode, RA_STAT_ZERO_LEN);
return 0;
}
spin_lock(&ras->ras_lock);
- if (vio->cui_ra_window_set)
- bead = &vio->cui_bead;
- else
- bead = NULL;
/* Enlarge the RA window to encompass the full read */
- if (bead && ras->ras_window_start + ras->ras_window_len <
- bead->lrr_start + bead->lrr_count) {
- ras->ras_window_len = bead->lrr_start + bead->lrr_count -
+ if (vio->vui_ra_valid &&
+ ras->ras_window_start + ras->ras_window_len <
+ vio->vui_ra_start + vio->vui_ra_count) {
+ ras->ras_window_len = vio->vui_ra_start + vio->vui_ra_count -
ras->ras_window_start;
}
+
/* Reserve a part of the read-ahead window that we'll be issuing */
if (ras->ras_window_len) {
start = ras->ras_next_readahead;
@@ -755,29 +516,48 @@ int ll_readahead(const struct lu_env *env, struct cl_io *io,
spin_unlock(&ras->ras_lock);
if (end == 0) {
- ll_ra_stats_inc(mapping, RA_STAT_ZERO_WINDOW);
+ ll_ra_stats_inc(inode, RA_STAT_ZERO_WINDOW);
return 0;
}
len = ria_page_count(ria);
- if (len == 0)
+ if (len == 0) {
+ ll_ra_stats_inc(inode, RA_STAT_ZERO_WINDOW);
return 0;
+ }
+
+ CDEBUG(D_READA, DFID ": ria: %lu/%lu, bead: %lu/%lu, hit: %d\n",
+ PFID(lu_object_fid(&clob->co_lu)),
+ ria->ria_start, ria->ria_end,
+ vio->vui_ra_valid ? vio->vui_ra_start : 0,
+ vio->vui_ra_valid ? vio->vui_ra_count : 0,
+ hit);
+
+ /* at least to extend the readahead window to cover current read */
+ if (!hit && vio->vui_ra_valid &&
+ vio->vui_ra_start + vio->vui_ra_count > ria->ria_start) {
+ /* to the end of current read window. */
+ mlen = vio->vui_ra_start + vio->vui_ra_count - ria->ria_start;
+ /* trim to RPC boundary */
+ start = ria->ria_start & (PTLRPC_MAX_BRW_PAGES - 1);
+ mlen = min(mlen, PTLRPC_MAX_BRW_PAGES - start);
+ }
- reserved = ll_ra_count_get(ll_i2sbi(inode), ria, len);
+ reserved = ll_ra_count_get(ll_i2sbi(inode), ria, len, mlen);
if (reserved < len)
- ll_ra_stats_inc(mapping, RA_STAT_MAX_IN_FLIGHT);
+ ll_ra_stats_inc(inode, RA_STAT_MAX_IN_FLIGHT);
- CDEBUG(D_READA, "reserved page %lu ra_cur %d ra_max %lu\n", reserved,
+ CDEBUG(D_READA, "reserved pages %lu/%lu/%lu, ra_cur %d, ra_max %lu\n",
+ reserved, len, mlen,
atomic_read(&ll_i2sbi(inode)->ll_ra_info.ra_cur_pages),
ll_i2sbi(inode)->ll_ra_info.ra_max_pages);
- ret = ll_read_ahead_pages(env, io, queue,
- ria, &reserved, mapping, &ra_end);
+ ret = ll_read_ahead_pages(env, io, queue, ria, &reserved, &ra_end);
if (reserved != 0)
ll_ra_count_put(ll_i2sbi(inode), reserved);
if (ra_end == end + 1 && ra_end == (kms >> PAGE_SHIFT))
- ll_ra_stats_inc(mapping, RA_STAT_EOF);
+ ll_ra_stats_inc(inode, RA_STAT_EOF);
/* if we didn't get to the end of the region we reserved from
* the ras we need to go back and update the ras so that the
@@ -789,6 +569,7 @@ int ll_readahead(const struct lu_env *env, struct cl_io *io,
ra_end, end, ria->ria_end);
if (ra_end != end + 1) {
+ ll_ra_stats_inc(inode, RA_STAT_FAILED_REACH_END);
spin_lock(&ras->ras_lock);
if (ra_end < ras->ras_next_readahead &&
index_in_window(ra_end, ras->ras_window_start, 0,
@@ -836,7 +617,6 @@ void ll_readahead_init(struct inode *inode, struct ll_readahead_state *ras)
spin_lock_init(&ras->ras_lock);
ras_reset(inode, ras, 0);
ras->ras_requests = 0;
- INIT_LIST_HEAD(&ras->ras_read_beads);
}
/*
@@ -1059,15 +839,18 @@ void ras_update(struct ll_sb_info *sbi, struct inode *inode,
ras->ras_last_readpage = index;
ras_set_start(inode, ras, index);
- if (stride_io_mode(ras))
+ if (stride_io_mode(ras)) {
/* Since stride readahead is sensitive to the offset
* of read-ahead, so we use original offset here,
* instead of ras_window_start, which is RPC aligned
*/
ras->ras_next_readahead = max(index, ras->ras_next_readahead);
- else
- ras->ras_next_readahead = max(ras->ras_window_start,
- ras->ras_next_readahead);
+ } else {
+ if (ras->ras_next_readahead < ras->ras_window_start)
+ ras->ras_next_readahead = ras->ras_window_start;
+ if (!hit)
+ ras->ras_next_readahead = index + 1;
+ }
RAS_CDEBUG(ras);
/* Trigger RA in the mmap case where ras_consecutive_requests
@@ -1129,7 +912,7 @@ int ll_writepage(struct page *vmpage, struct writeback_control *wbc)
clob = ll_i2info(inode)->lli_clob;
LASSERT(clob);
- io = ccc_env_thread_io(env);
+ io = vvp_env_thread_io(env);
io->ci_obj = clob;
io->ci_ignore_layout = 1;
result = cl_io_init(env, io, CIT_MISC, clob);
@@ -1240,23 +1023,77 @@ int ll_writepages(struct address_space *mapping, struct writeback_control *wbc)
if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0)) {
if (end == OBD_OBJECT_EOF)
- end = i_size_read(inode);
- mapping->writeback_index = (end >> PAGE_SHIFT) + 1;
+ mapping->writeback_index = 0;
+ else
+ mapping->writeback_index = (end >> PAGE_SHIFT) + 1;
}
return result;
}
+struct ll_cl_context *ll_cl_find(struct file *file)
+{
+ struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
+ struct ll_cl_context *lcc;
+ struct ll_cl_context *found = NULL;
+
+ read_lock(&fd->fd_lock);
+ list_for_each_entry(lcc, &fd->fd_lccs, lcc_list) {
+ if (lcc->lcc_cookie == current) {
+ found = lcc;
+ break;
+ }
+ }
+ read_unlock(&fd->fd_lock);
+
+ return found;
+}
+
+void ll_cl_add(struct file *file, const struct lu_env *env, struct cl_io *io)
+{
+ struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
+ struct ll_cl_context *lcc = &ll_env_info(env)->lti_io_ctx;
+
+ memset(lcc, 0, sizeof(*lcc));
+ INIT_LIST_HEAD(&lcc->lcc_list);
+ lcc->lcc_cookie = current;
+ lcc->lcc_env = env;
+ lcc->lcc_io = io;
+
+ write_lock(&fd->fd_lock);
+ list_add(&lcc->lcc_list, &fd->fd_lccs);
+ write_unlock(&fd->fd_lock);
+}
+
+void ll_cl_remove(struct file *file, const struct lu_env *env)
+{
+ struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
+ struct ll_cl_context *lcc = &ll_env_info(env)->lti_io_ctx;
+
+ write_lock(&fd->fd_lock);
+ list_del_init(&lcc->lcc_list);
+ write_unlock(&fd->fd_lock);
+}
+
int ll_readpage(struct file *file, struct page *vmpage)
{
+ struct cl_object *clob = ll_i2info(file_inode(file))->lli_clob;
struct ll_cl_context *lcc;
+ const struct lu_env *env;
+ struct cl_io *io;
+ struct cl_page *page;
int result;
- lcc = ll_cl_init(file, vmpage, 0);
- if (!IS_ERR(lcc)) {
- struct lu_env *env = lcc->lcc_env;
- struct cl_io *io = lcc->lcc_io;
- struct cl_page *page = lcc->lcc_page;
+ lcc = ll_cl_find(file);
+ if (!lcc) {
+ unlock_page(vmpage);
+ return -EIO;
+ }
+ env = lcc->lcc_env;
+ io = lcc->lcc_io;
+ LASSERT(io->ci_state == CIS_IO_GOING);
+ page = cl_page_find(env, clob, vmpage->index, vmpage, CPT_CACHEABLE);
+ if (!IS_ERR(page)) {
LASSERT(page->cp_type == CPT_CACHEABLE);
if (likely(!PageUptodate(vmpage))) {
cl_page_assume(env, io, page);
@@ -1266,10 +1103,35 @@ int ll_readpage(struct file *file, struct page *vmpage)
unlock_page(vmpage);
result = 0;
}
- ll_cl_fini(lcc);
+ cl_page_put(env, page);
} else {
unlock_page(vmpage);
- result = PTR_ERR(lcc);
+ result = PTR_ERR(page);
}
return result;
}
+
+int ll_page_sync_io(const struct lu_env *env, struct cl_io *io,
+ struct cl_page *page, enum cl_req_type crt)
+{
+ struct cl_2queue *queue;
+ int result;
+
+ LASSERT(io->ci_type == CIT_READ || io->ci_type == CIT_WRITE);
+
+ queue = &io->ci_queue;
+ cl_2queue_init_page(queue, page);
+
+ result = cl_io_submit_sync(env, io, crt, queue, 0);
+ LASSERT(cl_page_is_owned(page, io));
+
+ if (crt == CRT_READ)
+ /*
+ * in CRT_WRITE case page is left locked even in case of
+ * error.
+ */
+ cl_page_list_disown(env, io, &queue->c2_qin);
+ cl_2queue_fini(env, queue);
+
+ return result;
+}
diff --git a/drivers/staging/lustre/lustre/llite/rw26.c b/drivers/staging/lustre/lustre/llite/rw26.c
index 69aa15e8e3ef..d98c7acc0832 100644
--- a/drivers/staging/lustre/lustre/llite/rw26.c
+++ b/drivers/staging/lustre/lustre/llite/rw26.c
@@ -15,11 +15,7 @@
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
*
* GPL HEADER END
*/
@@ -95,15 +91,12 @@ static void ll_invalidatepage(struct page *vmpage, unsigned int offset,
if (obj) {
page = cl_vmpage_page(vmpage, obj);
if (page) {
- lu_ref_add(&page->cp_reference,
- "delete", vmpage);
cl_page_delete(env, page);
- lu_ref_del(&page->cp_reference,
- "delete", vmpage);
cl_page_put(env, page);
}
- } else
+ } else {
LASSERT(vmpage->private == 0);
+ }
cl_env_put(env, &refcheck);
}
}
@@ -111,12 +104,12 @@ static void ll_invalidatepage(struct page *vmpage, unsigned int offset,
static int ll_releasepage(struct page *vmpage, gfp_t gfp_mask)
{
- struct cl_env_nest nest;
struct lu_env *env;
+ void *cookie;
struct cl_object *obj;
struct cl_page *page;
struct address_space *mapping;
- int result;
+ int result = 0;
LASSERT(PageLocked(vmpage));
if (PageWriteback(vmpage) || PageDirty(vmpage))
@@ -130,53 +123,42 @@ static int ll_releasepage(struct page *vmpage, gfp_t gfp_mask)
if (!obj)
return 1;
- /* 1 for page allocator, 1 for cl_page and 1 for page cache */
+ /* 1 for caller, 1 for cl_page and 1 for page cache */
if (page_count(vmpage) > 3)
return 0;
- /* TODO: determine what gfp should be used by @gfp_mask. */
- env = cl_env_nested_get(&nest);
- if (IS_ERR(env))
- /* If we can't allocate an env we won't call cl_page_put()
- * later on which further means it's impossible to drop
- * page refcount by cl_page, so ask kernel to not free
- * this page.
- */
- return 0;
-
page = cl_vmpage_page(vmpage, obj);
- result = !page;
- if (page) {
- if (!cl_page_in_use(page)) {
- result = 1;
- cl_page_delete(env, page);
- }
- cl_page_put(env, page);
- }
- cl_env_nested_put(&nest, env);
- return result;
-}
+ if (!page)
+ return 1;
-static int ll_set_page_dirty(struct page *vmpage)
-{
-#if 0
- struct cl_page *page = vvp_vmpage_page_transient(vmpage);
- struct vvp_object *obj = cl_inode2vvp(vmpage->mapping->host);
- struct vvp_page *cpg;
+ cookie = cl_env_reenter();
+ env = cl_env_percpu_get();
+ LASSERT(!IS_ERR(env));
- /*
- * XXX should page method be called here?
- */
- LASSERT(&obj->co_cl == page->cp_obj);
- cpg = cl2vvp_page(cl_page_at(page, &vvp_device_type));
- /*
- * XXX cannot do much here, because page is possibly not locked:
- * sys_munmap()->...
- * ->unmap_page_range()->zap_pte_range()->set_page_dirty().
+ if (!cl_page_in_use(page)) {
+ result = 1;
+ cl_page_delete(env, page);
+ }
+
+ /* To use percpu env array, the call path can not be rescheduled;
+ * otherwise percpu array will be messed if ll_releaspage() called
+ * again on the same CPU.
+ *
+ * If this page holds the last refc of cl_object, the following
+ * call path may cause reschedule:
+ * cl_page_put -> cl_page_free -> cl_object_put ->
+ * lu_object_put -> lu_object_free -> lov_delete_raid0.
+ *
+ * However, the kernel can't get rid of this inode until all pages have
+ * been cleaned up. Now that we hold page lock here, it's pretty safe
+ * that we won't get into object delete path.
*/
- vvp_write_pending(obj, cpg);
-#endif
- return __set_page_dirty_nobuffers(vmpage);
+ LASSERT(cl_object_refc(obj) > 1);
+ cl_page_put(env, page);
+
+ cl_env_percpu_put(env);
+ cl_env_reexit(cookie);
+ return result;
}
#define MAX_DIRECTIO_SIZE (2*1024*1024*1024UL)
@@ -266,7 +248,7 @@ ssize_t ll_direct_rw_pages(const struct lu_env *env, struct cl_io *io,
* write directly
*/
if (clp->cp_type == CPT_CACHEABLE) {
- struct page *vmpage = cl_page_vmpage(env, clp);
+ struct page *vmpage = cl_page_vmpage(clp);
struct page *src_page;
struct page *dst_page;
void *src;
@@ -358,14 +340,14 @@ static ssize_t ll_direct_IO_26_seg(const struct lu_env *env, struct cl_io *io,
*/
#define MAX_DIO_SIZE ((KMALLOC_MAX_SIZE / sizeof(struct brw_page) * \
PAGE_SIZE) & ~(DT_MAX_BRW_SIZE - 1))
-static ssize_t ll_direct_IO_26(struct kiocb *iocb, struct iov_iter *iter,
- loff_t file_offset)
+static ssize_t ll_direct_IO_26(struct kiocb *iocb, struct iov_iter *iter)
{
struct lu_env *env;
struct cl_io *io;
struct file *file = iocb->ki_filp;
struct inode *inode = file->f_mapping->host;
- struct ccc_object *obj = cl_inode2ccc(inode);
+ struct vvp_object *obj = cl_inode2vvp(inode);
+ loff_t file_offset = iocb->ki_pos;
ssize_t count = iov_iter_count(iter);
ssize_t tot_bytes = 0, result = 0;
struct ll_inode_info *lli = ll_i2info(inode);
@@ -376,22 +358,21 @@ static ssize_t ll_direct_IO_26(struct kiocb *iocb, struct iov_iter *iter,
return -EBADF;
/* FIXME: io smaller than PAGE_SIZE is broken on ia64 ??? */
- if ((file_offset & ~CFS_PAGE_MASK) || (count & ~CFS_PAGE_MASK))
+ if ((file_offset & ~PAGE_MASK) || (count & ~PAGE_MASK))
return -EINVAL;
- CDEBUG(D_VFSTRACE,
- "VFS Op:inode=%lu/%u(%p), size=%zd (max %lu), offset=%lld=%llx, pages %zd (max %lu)\n",
- inode->i_ino, inode->i_generation, inode, count, MAX_DIO_SIZE,
+ CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p), size=%zd (max %lu), offset=%lld=%llx, pages %zd (max %lu)\n",
+ PFID(ll_inode2fid(inode)), inode, count, MAX_DIO_SIZE,
file_offset, file_offset, count >> PAGE_SHIFT,
MAX_DIO_SIZE >> PAGE_SHIFT);
/* Check that all user buffers are aligned as well */
- if (iov_iter_alignment(iter) & ~CFS_PAGE_MASK)
+ if (iov_iter_alignment(iter) & ~PAGE_MASK)
return -EINVAL;
env = cl_env_get(&refcheck);
LASSERT(!IS_ERR(env));
- io = ccc_env_io(env)->cui_cl.cis_io;
+ io = vvp_env_io(env)->vui_cl.cis_io;
LASSERT(io);
/* 0. Need locking between buffered and direct access. and race with
@@ -401,7 +382,7 @@ static ssize_t ll_direct_IO_26(struct kiocb *iocb, struct iov_iter *iter,
if (iov_iter_rw(iter) == READ)
inode_lock(inode);
- LASSERT(obj->cob_transient_pages == 0);
+ LASSERT(obj->vob_transient_pages == 0);
while (iov_iter_count(iter)) {
struct page **pages;
size_t offs;
@@ -435,8 +416,8 @@ static ssize_t ll_direct_IO_26(struct kiocb *iocb, struct iov_iter *iter,
size > (PAGE_SIZE / sizeof(*pages)) *
PAGE_SIZE) {
size = ((((size / 2) - 1) |
- ~CFS_PAGE_MASK) + 1) &
- CFS_PAGE_MASK;
+ ~PAGE_MASK) + 1) &
+ PAGE_MASK;
CDEBUG(D_VFSTRACE, "DIO size now %lu\n",
size);
continue;
@@ -449,62 +430,214 @@ static ssize_t ll_direct_IO_26(struct kiocb *iocb, struct iov_iter *iter,
file_offset += result;
}
out:
- LASSERT(obj->cob_transient_pages == 0);
+ LASSERT(obj->vob_transient_pages == 0);
if (iov_iter_rw(iter) == READ)
inode_unlock(inode);
if (tot_bytes > 0) {
- if (iov_iter_rw(iter) == WRITE) {
- struct lov_stripe_md *lsm;
-
- lsm = ccc_inode_lsm_get(inode);
- LASSERT(lsm);
- lov_stripe_lock(lsm);
- obd_adjust_kms(ll_i2dtexp(inode), lsm, file_offset, 0);
- lov_stripe_unlock(lsm);
- ccc_inode_lsm_put(inode, lsm);
- }
+ struct vvp_io *vio = vvp_env_io(env);
+
+ /* no commit async for direct IO */
+ vio->u.write.vui_written += tot_bytes;
}
cl_env_put(env, &refcheck);
- return tot_bytes ? : result;
+ return tot_bytes ? tot_bytes : result;
+}
+
+/**
+ * Prepare partially written-to page for a write.
+ */
+static int ll_prepare_partial_page(const struct lu_env *env, struct cl_io *io,
+ struct cl_page *pg)
+{
+ struct cl_attr *attr = vvp_env_thread_attr(env);
+ struct cl_object *obj = io->ci_obj;
+ struct vvp_page *vpg = cl_object_page_slice(obj, pg);
+ loff_t offset = cl_offset(obj, vvp_index(vpg));
+ int result;
+
+ cl_object_attr_lock(obj);
+ result = cl_object_attr_get(env, obj, attr);
+ cl_object_attr_unlock(obj);
+ if (result == 0) {
+ /*
+ * If are writing to a new page, no need to read old data.
+ * The extent locking will have updated the KMS, and for our
+ * purposes here we can treat it like i_size.
+ */
+ if (attr->cat_kms <= offset) {
+ char *kaddr = kmap_atomic(vpg->vpg_page);
+
+ memset(kaddr, 0, cl_page_size(obj));
+ kunmap_atomic(kaddr);
+ } else if (vpg->vpg_defer_uptodate) {
+ vpg->vpg_ra_used = 1;
+ } else {
+ result = ll_page_sync_io(env, io, pg, CRT_READ);
+ }
+ }
+ return result;
}
static int ll_write_begin(struct file *file, struct address_space *mapping,
loff_t pos, unsigned len, unsigned flags,
struct page **pagep, void **fsdata)
{
+ struct ll_cl_context *lcc;
+ const struct lu_env *env;
+ struct cl_io *io;
+ struct cl_page *page;
+ struct cl_object *clob = ll_i2info(mapping->host)->lli_clob;
pgoff_t index = pos >> PAGE_SHIFT;
- struct page *page;
- int rc;
- unsigned from = pos & (PAGE_SIZE - 1);
+ struct page *vmpage = NULL;
+ unsigned int from = pos & (PAGE_SIZE - 1);
+ unsigned int to = from + len;
+ int result = 0;
- page = grab_cache_page_write_begin(mapping, index, flags);
- if (!page)
- return -ENOMEM;
+ CDEBUG(D_VFSTRACE, "Writing %lu of %d to %d bytes\n", index, from, len);
- *pagep = page;
+ lcc = ll_cl_find(file);
+ if (!lcc) {
+ result = -EIO;
+ goto out;
+ }
+
+ env = lcc->lcc_env;
+ io = lcc->lcc_io;
+
+ /* To avoid deadlock, try to lock page first. */
+ vmpage = grab_cache_page_nowait(mapping, index);
+ if (unlikely(!vmpage || PageDirty(vmpage) || PageWriteback(vmpage))) {
+ struct vvp_io *vio = vvp_env_io(env);
+ struct cl_page_list *plist = &vio->u.write.vui_queue;
+
+ /* if the page is already in dirty cache, we have to commit
+ * the pages right now; otherwise, it may cause deadlock
+ * because it holds page lock of a dirty page and request for
+ * more grants. It's okay for the dirty page to be the first
+ * one in commit page list, though.
+ */
+ if (vmpage && plist->pl_nr > 0) {
+ unlock_page(vmpage);
+ put_page(vmpage);
+ vmpage = NULL;
+ }
+
+ /* commit pages and then wait for page lock */
+ result = vvp_io_write_commit(env, io);
+ if (result < 0)
+ goto out;
+
+ if (!vmpage) {
+ vmpage = grab_cache_page_write_begin(mapping, index,
+ flags);
+ if (!vmpage) {
+ result = -ENOMEM;
+ goto out;
+ }
+ }
+ }
- rc = ll_prepare_write(file, page, from, from + len);
- if (rc) {
- unlock_page(page);
- put_page(page);
+ page = cl_page_find(env, clob, vmpage->index, vmpage, CPT_CACHEABLE);
+ if (IS_ERR(page)) {
+ result = PTR_ERR(page);
+ goto out;
}
- return rc;
+
+ lcc->lcc_page = page;
+ lu_ref_add(&page->cp_reference, "cl_io", io);
+
+ cl_page_assume(env, io, page);
+ if (!PageUptodate(vmpage)) {
+ /*
+ * We're completely overwriting an existing page,
+ * so _don't_ set it up to date until commit_write
+ */
+ if (from == 0 && to == PAGE_SIZE) {
+ CL_PAGE_HEADER(D_PAGE, env, page, "full page write\n");
+ POISON_PAGE(vmpage, 0x11);
+ } else {
+ /* TODO: can be optimized at OSC layer to check if it
+ * is a lockless IO. In that case, it's not necessary
+ * to read the data.
+ */
+ result = ll_prepare_partial_page(env, io, page);
+ if (result == 0)
+ SetPageUptodate(vmpage);
+ }
+ }
+ if (result < 0)
+ cl_page_unassume(env, io, page);
+out:
+ if (result < 0) {
+ if (vmpage) {
+ unlock_page(vmpage);
+ put_page(vmpage);
+ }
+ } else {
+ *pagep = vmpage;
+ *fsdata = lcc;
+ }
+ return result;
}
static int ll_write_end(struct file *file, struct address_space *mapping,
loff_t pos, unsigned len, unsigned copied,
- struct page *page, void *fsdata)
+ struct page *vmpage, void *fsdata)
{
+ struct ll_cl_context *lcc = fsdata;
+ const struct lu_env *env;
+ struct cl_io *io;
+ struct vvp_io *vio;
+ struct cl_page *page;
unsigned from = pos & (PAGE_SIZE - 1);
- int rc;
+ bool unplug = false;
+ int result = 0;
+
+ put_page(vmpage);
+
+ env = lcc->lcc_env;
+ page = lcc->lcc_page;
+ io = lcc->lcc_io;
+ vio = vvp_env_io(env);
+
+ LASSERT(cl_page_is_owned(page, io));
+ if (copied > 0) {
+ struct cl_page_list *plist = &vio->u.write.vui_queue;
+
+ lcc->lcc_page = NULL; /* page will be queued */
+
+ /* Add it into write queue */
+ cl_page_list_add(plist, page);
+ if (plist->pl_nr == 1) /* first page */
+ vio->u.write.vui_from = from;
+ else
+ LASSERT(from == 0);
+ vio->u.write.vui_to = from + copied;
+
+ /* We may have one full RPC, commit it soon */
+ if (plist->pl_nr >= PTLRPC_MAX_BRW_PAGES)
+ unplug = true;
+
+ CL_PAGE_DEBUG(D_VFSTRACE, env, page,
+ "queued page: %d.\n", plist->pl_nr);
+ } else {
+ cl_page_disown(env, io, page);
+
+ lcc->lcc_page = NULL;
+ lu_ref_del(&page->cp_reference, "cl_io", io);
+ cl_page_put(env, page);
+
+ /* page list is not contiguous now, commit it now */
+ unplug = true;
+ }
- rc = ll_commit_write(file, page, from, from + copied);
- unlock_page(page);
- put_page(page);
+ if (unplug ||
+ file->f_flags & O_SYNC || IS_SYNC(file_inode(file)))
+ result = vvp_io_write_commit(env, io);
- return rc ?: copied;
+ return result >= 0 ? copied : result;
}
#ifdef CONFIG_MIGRATION
@@ -523,7 +656,7 @@ const struct address_space_operations ll_aops = {
.direct_IO = ll_direct_IO_26,
.writepage = ll_writepage,
.writepages = ll_writepages,
- .set_page_dirty = ll_set_page_dirty,
+ .set_page_dirty = __set_page_dirty_nobuffers,
.write_begin = ll_write_begin,
.write_end = ll_write_end,
.invalidatepage = ll_invalidatepage,
diff --git a/drivers/staging/lustre/lustre/llite/statahead.c b/drivers/staging/lustre/lustre/llite/statahead.c
index 99ffd1589df8..c1cb6b19e724 100644
--- a/drivers/staging/lustre/lustre/llite/statahead.c
+++ b/drivers/staging/lustre/lustre/llite/statahead.c
@@ -15,11 +15,7 @@
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
*
* GPL HEADER END
*/
@@ -174,7 +170,8 @@ static inline int is_omitted_entry(struct ll_statahead_info *sai, __u64 index)
* Insert it into sai_entries tail when init.
*/
static struct ll_sa_entry *
-ll_sa_entry_alloc(struct ll_statahead_info *sai, __u64 index,
+ll_sa_entry_alloc(struct dentry *parent,
+ struct ll_statahead_info *sai, __u64 index,
const char *name, int len)
{
struct ll_inode_info *lli;
@@ -221,7 +218,8 @@ ll_sa_entry_alloc(struct ll_statahead_info *sai, __u64 index,
dname = (char *)entry + sizeof(struct ll_sa_entry);
memcpy(dname, name, len);
dname[len] = 0;
- entry->se_qstr.hash = full_name_hash(name, len);
+
+ entry->se_qstr.hash = full_name_hash(parent, name, len);
entry->se_qstr.len = len;
entry->se_qstr.name = dname;
@@ -650,7 +648,7 @@ static void ll_post_statahead(struct ll_statahead_info *sai)
}
}
- it->d.lustre.it_lock_handle = entry->se_handle;
+ it->it_lock_handle = entry->se_handle;
rc = md_revalidate_lock(ll_i2mdexp(dir), it, ll_inode2fid(dir), NULL);
if (rc != 1) {
rc = -EAGAIN;
@@ -661,8 +659,9 @@ static void ll_post_statahead(struct ll_statahead_info *sai)
if (rc)
goto out;
- CDEBUG(D_DLMTRACE, "setting l_data to inode %p (%lu/%u)\n",
- child, child->i_ino, child->i_generation);
+ CDEBUG(D_DLMTRACE, "%s: setting l_data to inode "DFID"%p\n",
+ ll_get_fsname(child->i_sb, NULL, 0),
+ PFID(ll_inode2fid(child)), child);
ll_set_lock_data(ll_i2sbi(dir)->ll_md_exp, child, it, NULL);
entry->se_inode = child;
@@ -703,7 +702,7 @@ static int ll_statahead_interpret(struct ptlrpc_request *req,
* process enqueues lock on child with parent lock held, eg.
* unlink.
*/
- handle = it->d.lustre.it_lock_handle;
+ handle = it->it_lock_handle;
ll_intent_drop_lock(it);
}
@@ -782,7 +781,7 @@ static int sa_args_init(struct inode *dir, struct inode *child,
struct ll_sa_entry *entry, struct md_enqueue_info **pmi,
struct ldlm_enqueue_info **pei)
{
- struct qstr *qstr = &entry->se_qstr;
+ const struct qstr *qstr = &entry->se_qstr;
struct ll_inode_info *lli = ll_i2info(dir);
struct md_enqueue_info *minfo;
struct ldlm_enqueue_info *einfo;
@@ -853,7 +852,7 @@ static int do_sa_revalidate(struct inode *dir, struct ll_sa_entry *entry,
{
struct inode *inode = d_inode(dentry);
struct lookup_intent it = { .it_op = IT_GETATTR,
- .d.lustre.it_lock_handle = 0 };
+ .it_lock_handle = 0 };
struct md_enqueue_info *minfo;
struct ldlm_enqueue_info *einfo;
int rc;
@@ -868,7 +867,7 @@ static int do_sa_revalidate(struct inode *dir, struct ll_sa_entry *entry,
rc = md_revalidate_lock(ll_i2mdexp(dir), &it, ll_inode2fid(inode),
NULL);
if (rc == 1) {
- entry->se_handle = it.d.lustre.it_lock_handle;
+ entry->se_handle = it.it_lock_handle;
ll_intent_release(&it);
return 1;
}
@@ -901,7 +900,7 @@ static void ll_statahead_one(struct dentry *parent, const char *entry_name,
int rc;
int rc1;
- entry = ll_sa_entry_alloc(sai, sai->sai_index, entry_name,
+ entry = ll_sa_entry_alloc(parent, sai, sai->sai_index, entry_name,
entry_name_len);
if (IS_ERR(entry))
return;
@@ -1341,7 +1340,7 @@ enum {
static int is_first_dirent(struct inode *dir, struct dentry *dentry)
{
struct ll_dir_chain chain;
- struct qstr *target = &dentry->d_name;
+ const struct qstr *target = &dentry->d_name;
struct page *page;
__u64 pos = 0;
int dot_de;
@@ -1572,7 +1571,7 @@ int do_statahead_enter(struct inode *dir, struct dentry **dentryp,
if (entry->se_stat == SA_ENTRY_SUCC && entry->se_inode) {
struct inode *inode = entry->se_inode;
struct lookup_intent it = { .it_op = IT_GETATTR,
- .d.lustre.it_lock_handle =
+ .it_lock_handle =
entry->se_handle };
__u64 bits;
@@ -1591,13 +1590,11 @@ int do_statahead_enter(struct inode *dir, struct dentry **dentryp,
*dentryp = alias;
} else if (d_inode(*dentryp) != inode) {
/* revalidate, but inode is recreated */
- CDEBUG(D_READA,
- "stale dentry %pd inode %lu/%u, statahead inode %lu/%u\n",
- *dentryp,
- d_inode(*dentryp)->i_ino,
- d_inode(*dentryp)->i_generation,
- inode->i_ino,
- inode->i_generation);
+ CDEBUG(D_READA, "%s: stale dentry %pd inode "DFID", statahead inode "DFID"\n",
+ ll_get_fsname(d_inode(*dentryp)->i_sb, NULL, 0),
+ *dentryp,
+ PFID(ll_inode2fid(d_inode(*dentryp))),
+ PFID(ll_inode2fid(inode)));
ll_sai_unplug(sai, entry);
return -ESTALE;
} else {
diff --git a/drivers/staging/lustre/lustre/llite/super25.c b/drivers/staging/lustre/lustre/llite/super25.c
index 61856d37afc5..3dd7e0eb0b54 100644
--- a/drivers/staging/lustre/lustre/llite/super25.c
+++ b/drivers/staging/lustre/lustre/llite/super25.c
@@ -15,11 +15,7 @@
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
*
* GPL HEADER END
*/
@@ -118,19 +114,6 @@ static int __init lustre_init(void)
if (!ll_file_data_slab)
goto out_cache;
- ll_remote_perm_cachep = kmem_cache_create("ll_remote_perm_cache",
- sizeof(struct ll_remote_perm),
- 0, 0, NULL);
- if (!ll_remote_perm_cachep)
- goto out_cache;
-
- ll_rmtperm_hash_cachep = kmem_cache_create("ll_rmtperm_hash_cache",
- REMOTE_PERM_HASHSIZE *
- sizeof(struct list_head),
- 0, 0, NULL);
- if (!ll_rmtperm_hash_cachep)
- goto out_cache;
-
llite_root = debugfs_create_dir("llite", debugfs_lustre_root);
if (IS_ERR_OR_NULL(llite_root)) {
rc = llite_root ? PTR_ERR(llite_root) : -ENOMEM;
@@ -164,9 +147,18 @@ static int __init lustre_init(void)
if (rc != 0)
goto out_sysfs;
+ cl_inode_fini_env = cl_env_alloc(&cl_inode_fini_refcheck,
+ LCT_REMEMBER | LCT_NOREF);
+ if (IS_ERR(cl_inode_fini_env)) {
+ rc = PTR_ERR(cl_inode_fini_env);
+ goto out_vvp;
+ }
+
+ cl_inode_fini_env->le_ctx.lc_cookie = 0x4;
+
rc = ll_xattr_init();
if (rc != 0)
- goto out_vvp;
+ goto out_inode_fini_env;
lustre_register_client_fill_super(ll_fill_super);
lustre_register_kill_super_cb(ll_kill_super);
@@ -174,6 +166,8 @@ static int __init lustre_init(void)
return 0;
+out_inode_fini_env:
+ cl_env_put(cl_inode_fini_env, &cl_inode_fini_refcheck);
out_vvp:
vvp_global_fini();
out_sysfs:
@@ -183,8 +177,6 @@ out_debugfs:
out_cache:
kmem_cache_destroy(ll_inode_cachep);
kmem_cache_destroy(ll_file_data_slab);
- kmem_cache_destroy(ll_remote_perm_cachep);
- kmem_cache_destroy(ll_rmtperm_hash_cachep);
return rc;
}
@@ -198,13 +190,10 @@ static void __exit lustre_exit(void)
kset_unregister(llite_kset);
ll_xattr_fini();
+ cl_env_put(cl_inode_fini_env, &cl_inode_fini_refcheck);
vvp_global_fini();
kmem_cache_destroy(ll_inode_cachep);
- kmem_cache_destroy(ll_rmtperm_hash_cachep);
-
- kmem_cache_destroy(ll_remote_perm_cachep);
-
kmem_cache_destroy(ll_file_data_slab);
}
diff --git a/drivers/staging/lustre/lustre/llite/symlink.c b/drivers/staging/lustre/lustre/llite/symlink.c
index 46d03ea48352..8c8bdfe1ad71 100644
--- a/drivers/staging/lustre/lustre/llite/symlink.c
+++ b/drivers/staging/lustre/lustre/llite/symlink.c
@@ -15,11 +15,7 @@
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
*
* GPL HEADER END
*/
@@ -77,7 +73,9 @@ static int ll_readlink_internal(struct inode *inode,
ll_finish_md_op_data(op_data);
if (rc) {
if (rc != -ENOENT)
- CERROR("inode %lu: rc = %d\n", inode->i_ino, rc);
+ CERROR("%s: inode "DFID": rc = %d\n",
+ ll_get_fsname(inode->i_sb, NULL, 0),
+ PFID(ll_inode2fid(inode)), rc);
goto failed;
}
@@ -90,8 +88,10 @@ static int ll_readlink_internal(struct inode *inode,
LASSERT(symlen != 0);
if (body->eadatasize != symlen) {
- CERROR("inode %lu: symlink length %d not expected %d\n",
- inode->i_ino, body->eadatasize - 1, symlen - 1);
+ CERROR("%s: inode "DFID": symlink length %d not expected %d\n",
+ ll_get_fsname(inode->i_sb, NULL, 0),
+ PFID(ll_inode2fid(inode)), body->eadatasize - 1,
+ symlen - 1);
rc = -EPROTO;
goto failed;
}
diff --git a/drivers/staging/lustre/lustre/llite/vvp_dev.c b/drivers/staging/lustre/lustre/llite/vvp_dev.c
index 282b70b776da..e623216e962d 100644
--- a/drivers/staging/lustre/lustre/llite/vvp_dev.c
+++ b/drivers/staging/lustre/lustre/llite/vvp_dev.c
@@ -15,11 +15,7 @@
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
*
* GPL HEADER END
*/
@@ -36,6 +32,7 @@
* cl_device and cl_device_type implementation for VVP layer.
*
* Author: Nikita Danilov <nikita.danilov@sun.com>
+ * Author: Jinshan Xiong <jinshan.xiong@intel.com>
*/
#define DEBUG_SUBSYSTEM S_LLITE
@@ -56,13 +53,33 @@
* "llite_" (var. "ll_") prefix.
*/
-static struct kmem_cache *vvp_thread_kmem;
+static struct kmem_cache *ll_thread_kmem;
+struct kmem_cache *vvp_lock_kmem;
+struct kmem_cache *vvp_object_kmem;
+struct kmem_cache *vvp_req_kmem;
static struct kmem_cache *vvp_session_kmem;
+static struct kmem_cache *vvp_thread_kmem;
+
static struct lu_kmem_descr vvp_caches[] = {
{
- .ckd_cache = &vvp_thread_kmem,
- .ckd_name = "vvp_thread_kmem",
- .ckd_size = sizeof(struct vvp_thread_info),
+ .ckd_cache = &ll_thread_kmem,
+ .ckd_name = "ll_thread_kmem",
+ .ckd_size = sizeof(struct ll_thread_info),
+ },
+ {
+ .ckd_cache = &vvp_lock_kmem,
+ .ckd_name = "vvp_lock_kmem",
+ .ckd_size = sizeof(struct vvp_lock),
+ },
+ {
+ .ckd_cache = &vvp_object_kmem,
+ .ckd_name = "vvp_object_kmem",
+ .ckd_size = sizeof(struct vvp_object),
+ },
+ {
+ .ckd_cache = &vvp_req_kmem,
+ .ckd_name = "vvp_req_kmem",
+ .ckd_size = sizeof(struct vvp_req),
},
{
.ckd_cache = &vvp_session_kmem,
@@ -70,29 +87,40 @@ static struct lu_kmem_descr vvp_caches[] = {
.ckd_size = sizeof(struct vvp_session)
},
{
+ .ckd_cache = &vvp_thread_kmem,
+ .ckd_name = "vvp_thread_kmem",
+ .ckd_size = sizeof(struct vvp_thread_info),
+ },
+ {
.ckd_cache = NULL
}
};
-static void *vvp_key_init(const struct lu_context *ctx,
- struct lu_context_key *key)
+static void *ll_thread_key_init(const struct lu_context *ctx,
+ struct lu_context_key *key)
{
struct vvp_thread_info *info;
- info = kmem_cache_zalloc(vvp_thread_kmem, GFP_NOFS);
+ info = kmem_cache_zalloc(ll_thread_kmem, GFP_NOFS);
if (!info)
info = ERR_PTR(-ENOMEM);
return info;
}
-static void vvp_key_fini(const struct lu_context *ctx,
- struct lu_context_key *key, void *data)
+static void ll_thread_key_fini(const struct lu_context *ctx,
+ struct lu_context_key *key, void *data)
{
struct vvp_thread_info *info = data;
- kmem_cache_free(vvp_thread_kmem, info);
+ kmem_cache_free(ll_thread_kmem, info);
}
+struct lu_context_key ll_thread_key = {
+ .lct_tags = LCT_CL_THREAD,
+ .lct_init = ll_thread_key_init,
+ .lct_fini = ll_thread_key_fini
+};
+
static void *vvp_session_key_init(const struct lu_context *ctx,
struct lu_context_key *key)
{
@@ -112,34 +140,127 @@ static void vvp_session_key_fini(const struct lu_context *ctx,
kmem_cache_free(vvp_session_kmem, session);
}
-struct lu_context_key vvp_key = {
- .lct_tags = LCT_CL_THREAD,
- .lct_init = vvp_key_init,
- .lct_fini = vvp_key_fini
-};
-
struct lu_context_key vvp_session_key = {
.lct_tags = LCT_SESSION,
.lct_init = vvp_session_key_init,
.lct_fini = vvp_session_key_fini
};
+static void *vvp_thread_key_init(const struct lu_context *ctx,
+ struct lu_context_key *key)
+{
+ struct vvp_thread_info *vti;
+
+ vti = kmem_cache_zalloc(vvp_thread_kmem, GFP_NOFS);
+ if (!vti)
+ vti = ERR_PTR(-ENOMEM);
+ return vti;
+}
+
+static void vvp_thread_key_fini(const struct lu_context *ctx,
+ struct lu_context_key *key, void *data)
+{
+ struct vvp_thread_info *vti = data;
+
+ kmem_cache_free(vvp_thread_kmem, vti);
+}
+
+struct lu_context_key vvp_thread_key = {
+ .lct_tags = LCT_CL_THREAD,
+ .lct_init = vvp_thread_key_init,
+ .lct_fini = vvp_thread_key_fini
+};
+
/* type constructor/destructor: vvp_type_{init,fini,start,stop}(). */
-LU_TYPE_INIT_FINI(vvp, &ccc_key, &ccc_session_key, &vvp_key, &vvp_session_key);
+LU_TYPE_INIT_FINI(vvp, &vvp_thread_key, &ll_thread_key, &vvp_session_key);
static const struct lu_device_operations vvp_lu_ops = {
.ldo_object_alloc = vvp_object_alloc
};
static const struct cl_device_operations vvp_cl_ops = {
- .cdo_req_init = ccc_req_init
+ .cdo_req_init = vvp_req_init
};
+static struct lu_device *vvp_device_free(const struct lu_env *env,
+ struct lu_device *d)
+{
+ struct vvp_device *vdv = lu2vvp_dev(d);
+ struct cl_site *site = lu2cl_site(d->ld_site);
+ struct lu_device *next = cl2lu_dev(vdv->vdv_next);
+
+ if (d->ld_site) {
+ cl_site_fini(site);
+ kfree(site);
+ }
+ cl_device_fini(lu2cl_dev(d));
+ kfree(vdv);
+ return next;
+}
+
static struct lu_device *vvp_device_alloc(const struct lu_env *env,
struct lu_device_type *t,
struct lustre_cfg *cfg)
{
- return ccc_device_alloc(env, t, cfg, &vvp_lu_ops, &vvp_cl_ops);
+ struct vvp_device *vdv;
+ struct lu_device *lud;
+ struct cl_site *site;
+ int rc;
+
+ vdv = kzalloc(sizeof(*vdv), GFP_NOFS);
+ if (!vdv)
+ return ERR_PTR(-ENOMEM);
+
+ lud = &vdv->vdv_cl.cd_lu_dev;
+ cl_device_init(&vdv->vdv_cl, t);
+ vvp2lu_dev(vdv)->ld_ops = &vvp_lu_ops;
+ vdv->vdv_cl.cd_ops = &vvp_cl_ops;
+
+ site = kzalloc(sizeof(*site), GFP_NOFS);
+ if (site) {
+ rc = cl_site_init(site, &vdv->vdv_cl);
+ if (rc == 0) {
+ rc = lu_site_init_finish(&site->cs_lu);
+ } else {
+ LASSERT(!lud->ld_site);
+ CERROR("Cannot init lu_site, rc %d.\n", rc);
+ kfree(site);
+ }
+ } else {
+ rc = -ENOMEM;
+ }
+ if (rc != 0) {
+ vvp_device_free(env, lud);
+ lud = ERR_PTR(rc);
+ }
+ return lud;
+}
+
+static int vvp_device_init(const struct lu_env *env, struct lu_device *d,
+ const char *name, struct lu_device *next)
+{
+ struct vvp_device *vdv;
+ int rc;
+
+ vdv = lu2vvp_dev(d);
+ vdv->vdv_next = lu2cl_dev(next);
+
+ LASSERT(d->ld_site && next->ld_type);
+ next->ld_site = d->ld_site;
+ rc = next->ld_type->ldt_ops->ldto_device_init(env, next,
+ next->ld_type->ldt_name,
+ NULL);
+ if (rc == 0) {
+ lu_device_get(next);
+ lu_ref_add(&next->ld_reference, "lu-stack", &lu_site_init);
+ }
+ return rc;
+}
+
+static struct lu_device *vvp_device_fini(const struct lu_env *env,
+ struct lu_device *d)
+{
+ return cl2lu_dev(lu2vvp_dev(d)->vdv_next);
}
static const struct lu_device_type_operations vvp_device_type_ops = {
@@ -150,9 +271,9 @@ static const struct lu_device_type_operations vvp_device_type_ops = {
.ldto_stop = vvp_type_stop,
.ldto_device_alloc = vvp_device_alloc,
- .ldto_device_free = ccc_device_free,
- .ldto_device_init = ccc_device_init,
- .ldto_device_fini = ccc_device_fini
+ .ldto_device_free = vvp_device_free,
+ .ldto_device_init = vvp_device_init,
+ .ldto_device_fini = vvp_device_fini,
};
struct lu_device_type vvp_device_type = {
@@ -168,20 +289,27 @@ struct lu_device_type vvp_device_type = {
*/
int vvp_global_init(void)
{
- int result;
+ int rc;
- result = lu_kmem_init(vvp_caches);
- if (result == 0) {
- result = ccc_global_init(&vvp_device_type);
- if (result != 0)
- lu_kmem_fini(vvp_caches);
- }
- return result;
+ rc = lu_kmem_init(vvp_caches);
+ if (rc != 0)
+ return rc;
+
+ rc = lu_device_type_init(&vvp_device_type);
+ if (rc != 0)
+ goto out_kmem;
+
+ return 0;
+
+out_kmem:
+ lu_kmem_fini(vvp_caches);
+
+ return rc;
}
void vvp_global_fini(void)
{
- ccc_global_fini(&vvp_device_type);
+ lu_device_type_fini(&vvp_device_type);
lu_kmem_fini(vvp_caches);
}
@@ -205,13 +333,14 @@ int cl_sb_init(struct super_block *sb)
cl = cl_type_setup(env, NULL, &vvp_device_type,
sbi->ll_dt_exp->exp_obd->obd_lu_dev);
if (!IS_ERR(cl)) {
- cl2ccc_dev(cl)->cdv_sb = sb;
+ cl2vvp_dev(cl)->vdv_sb = sb;
sbi->ll_cl = cl;
sbi->ll_site = cl2lu_dev(cl)->ld_site;
}
cl_env_put(env, &refcheck);
- } else
+ } else {
rc = PTR_ERR(env);
+ }
return rc;
}
@@ -356,23 +485,18 @@ static loff_t vvp_pgcache_find(const struct lu_env *env,
return ~0ULL;
clob = vvp_pgcache_obj(env, dev, &id);
if (clob) {
- struct cl_object_header *hdr;
- int nr;
- struct cl_page *pg;
-
- /* got an object. Find next page. */
- hdr = cl_object_header(clob);
+ struct inode *inode = vvp_object_inode(clob);
+ struct page *vmpage;
+ int nr;
- spin_lock(&hdr->coh_page_guard);
- nr = radix_tree_gang_lookup(&hdr->coh_tree,
- (void **)&pg,
- id.vpi_index, 1);
+ nr = find_get_pages_contig(inode->i_mapping,
+ id.vpi_index, 1, &vmpage);
if (nr > 0) {
- id.vpi_index = pg->cp_index;
+ id.vpi_index = vmpage->index;
/* Cant support over 16T file */
- nr = !(pg->cp_index > 0xffffffff);
+ nr = !(vmpage->index > 0xffffffff);
+ put_page(vmpage);
}
- spin_unlock(&hdr->coh_page_guard);
lu_object_ref_del(&clob->co_lu, "dump", current);
cl_object_put(env, clob);
@@ -398,21 +522,20 @@ static loff_t vvp_pgcache_find(const struct lu_env *env,
static void vvp_pgcache_page_show(const struct lu_env *env,
struct seq_file *seq, struct cl_page *page)
{
- struct ccc_page *cpg;
+ struct vvp_page *vpg;
struct page *vmpage;
int has_flags;
- cpg = cl2ccc_page(cl_page_at(page, &vvp_device_type));
- vmpage = cpg->cpg_page;
- seq_printf(seq, " %5i | %p %p %s %s %s %s | %p %lu/%u(%p) %lu %u [",
+ vpg = cl2vvp_page(cl_page_at(page, &vvp_device_type));
+ vmpage = vpg->vpg_page;
+ seq_printf(seq, " %5i | %p %p %s %s %s %s | %p "DFID"(%p) %lu %u [",
0 /* gen */,
- cpg, page,
+ vpg, page,
"none",
- cpg->cpg_write_queued ? "wq" : "- ",
- cpg->cpg_defer_uptodate ? "du" : "- ",
+ vpg->vpg_write_queued ? "wq" : "- ",
+ vpg->vpg_defer_uptodate ? "du" : "- ",
PageWriteback(vmpage) ? "wb" : "-",
- vmpage, vmpage->mapping->host->i_ino,
- vmpage->mapping->host->i_generation,
+ vmpage, PFID(ll_inode2fid(vmpage->mapping->host)),
vmpage->mapping->host, vmpage->index,
page_count(vmpage));
has_flags = 0;
@@ -431,40 +554,49 @@ static int vvp_pgcache_show(struct seq_file *f, void *v)
struct ll_sb_info *sbi;
struct cl_object *clob;
struct lu_env *env;
- struct cl_page *page;
- struct cl_object_header *hdr;
struct vvp_pgcache_id id;
int refcheck;
int result;
env = cl_env_get(&refcheck);
if (!IS_ERR(env)) {
- pos = *(loff_t *) v;
+ pos = *(loff_t *)v;
vvp_pgcache_id_unpack(pos, &id);
sbi = f->private;
clob = vvp_pgcache_obj(env, &sbi->ll_cl->cd_lu_dev, &id);
if (clob) {
- hdr = cl_object_header(clob);
-
- spin_lock(&hdr->coh_page_guard);
- page = cl_page_lookup(hdr, id.vpi_index);
- spin_unlock(&hdr->coh_page_guard);
+ struct inode *inode = vvp_object_inode(clob);
+ struct cl_page *page = NULL;
+ struct page *vmpage;
+
+ result = find_get_pages_contig(inode->i_mapping,
+ id.vpi_index, 1,
+ &vmpage);
+ if (result > 0) {
+ lock_page(vmpage);
+ page = cl_vmpage_page(vmpage, clob);
+ unlock_page(vmpage);
+ put_page(vmpage);
+ }
- seq_printf(f, "%8x@"DFID": ",
- id.vpi_index, PFID(&hdr->coh_lu.loh_fid));
+ seq_printf(f, "%8x@" DFID ": ", id.vpi_index,
+ PFID(lu_object_fid(&clob->co_lu)));
if (page) {
vvp_pgcache_page_show(env, f, page);
cl_page_put(env, page);
- } else
+ } else {
seq_puts(f, "missing\n");
+ }
lu_object_ref_del(&clob->co_lu, "dump", current);
cl_object_put(env, clob);
- } else
+ } else {
seq_printf(f, "%llx missing\n", pos);
+ }
cl_env_put(env, &refcheck);
result = 0;
- } else
+ } else {
result = PTR_ERR(env);
+ }
return result;
}
diff --git a/drivers/staging/lustre/lustre/llite/vvp_internal.h b/drivers/staging/lustre/lustre/llite/vvp_internal.h
index bb393378c9bb..79fc428461ed 100644
--- a/drivers/staging/lustre/lustre/llite/vvp_internal.h
+++ b/drivers/staging/lustre/lustre/llite/vvp_internal.h
@@ -15,11 +15,7 @@
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
*
* GPL HEADER END
*/
@@ -41,21 +37,337 @@
#ifndef VVP_INTERNAL_H
#define VVP_INTERNAL_H
+#include "../include/lustre/lustre_idl.h"
#include "../include/cl_object.h"
-#include "llite_internal.h"
-int vvp_io_init(const struct lu_env *env,
- struct cl_object *obj, struct cl_io *io);
-int vvp_lock_init(const struct lu_env *env,
- struct cl_object *obj, struct cl_lock *lock,
- const struct cl_io *io);
+enum obd_notify_event;
+struct inode;
+struct lov_stripe_md;
+struct lustre_md;
+struct obd_capa;
+struct obd_device;
+struct obd_export;
+struct page;
+
+/* specific architecture can implement only part of this list */
+enum vvp_io_subtype {
+ /** normal IO */
+ IO_NORMAL,
+ /** io started from splice_{read|write} */
+ IO_SPLICE
+};
+
+/**
+ * IO state private to IO state private to VVP layer.
+ */
+struct vvp_io {
+ /** super class */
+ struct cl_io_slice vui_cl;
+ struct cl_io_lock_link vui_link;
+ /**
+ * I/O vector information to or from which read/write is going.
+ */
+ struct iov_iter *vui_iter;
+ /**
+ * Total size for the left IO.
+ */
+ size_t vui_tot_count;
+
+ union {
+ struct vvp_fault_io {
+ /**
+ * Inode modification time that is checked across DLM
+ * lock request.
+ */
+ time64_t ft_mtime;
+ struct vm_area_struct *ft_vma;
+ /**
+ * locked page returned from vvp_io
+ */
+ struct page *ft_vmpage;
+ /**
+ * kernel fault info
+ */
+ struct vm_fault *ft_vmf;
+ /**
+ * fault API used bitflags for return code.
+ */
+ unsigned int ft_flags;
+ /**
+ * check that flags are from filemap_fault
+ */
+ bool ft_flags_valid;
+ } fault;
+ struct {
+ struct pipe_inode_info *vui_pipe;
+ unsigned int vui_flags;
+ } splice;
+ struct {
+ struct cl_page_list vui_queue;
+ unsigned long vui_written;
+ int vui_from;
+ int vui_to;
+ } write;
+ } u;
+
+ enum vvp_io_subtype vui_io_subtype;
+
+ /**
+ * Layout version when this IO is initialized
+ */
+ __u32 vui_layout_gen;
+ /**
+ * File descriptor against which IO is done.
+ */
+ struct ll_file_data *vui_fd;
+ struct kiocb *vui_iocb;
+
+ /* Readahead state. */
+ pgoff_t vui_ra_start;
+ pgoff_t vui_ra_count;
+ /* Set when vui_ra_{start,count} have been initialized. */
+ bool vui_ra_valid;
+};
+
+extern struct lu_device_type vvp_device_type;
+
+extern struct lu_context_key vvp_session_key;
+extern struct lu_context_key vvp_thread_key;
+
+extern struct kmem_cache *vvp_lock_kmem;
+extern struct kmem_cache *vvp_object_kmem;
+extern struct kmem_cache *vvp_req_kmem;
+
+struct vvp_thread_info {
+ struct cl_lock vti_lock;
+ struct cl_lock_descr vti_descr;
+ struct cl_io vti_io;
+ struct cl_attr vti_attr;
+};
+
+static inline struct vvp_thread_info *vvp_env_info(const struct lu_env *env)
+{
+ struct vvp_thread_info *vti;
+
+ vti = lu_context_key_get(&env->le_ctx, &vvp_thread_key);
+ LASSERT(vti);
+
+ return vti;
+}
+
+static inline struct cl_lock *vvp_env_lock(const struct lu_env *env)
+{
+ struct cl_lock *lock = &vvp_env_info(env)->vti_lock;
+
+ memset(lock, 0, sizeof(*lock));
+ return lock;
+}
+
+static inline struct cl_attr *vvp_env_thread_attr(const struct lu_env *env)
+{
+ struct cl_attr *attr = &vvp_env_info(env)->vti_attr;
+
+ memset(attr, 0, sizeof(*attr));
+
+ return attr;
+}
+
+static inline struct cl_io *vvp_env_thread_io(const struct lu_env *env)
+{
+ struct cl_io *io = &vvp_env_info(env)->vti_io;
+
+ memset(io, 0, sizeof(*io));
+
+ return io;
+}
+
+struct vvp_session {
+ struct vvp_io cs_ios;
+};
+
+static inline struct vvp_session *vvp_env_session(const struct lu_env *env)
+{
+ struct vvp_session *ses;
+
+ ses = lu_context_key_get(env->le_ses, &vvp_session_key);
+ LASSERT(ses);
+
+ return ses;
+}
+
+static inline struct vvp_io *vvp_env_io(const struct lu_env *env)
+{
+ return &vvp_env_session(env)->cs_ios;
+}
+
+/**
+ * ccc-private object state.
+ */
+struct vvp_object {
+ struct cl_object_header vob_header;
+ struct cl_object vob_cl;
+ struct inode *vob_inode;
+
+ /**
+ * A list of dirty pages pending IO in the cache. Used by
+ * SOM. Protected by ll_inode_info::lli_lock.
+ *
+ * \see vvp_page::vpg_pending_linkage
+ */
+ struct list_head vob_pending_list;
+
+ /**
+ * Access this counter is protected by inode->i_sem. Now that
+ * the lifetime of transient pages must be covered by inode sem,
+ * we don't need to hold any lock..
+ */
+ int vob_transient_pages;
+ /**
+ * Number of outstanding mmaps on this file.
+ *
+ * \see ll_vm_open(), ll_vm_close().
+ */
+ atomic_t vob_mmap_cnt;
+
+ /**
+ * various flags
+ * vob_discard_page_warned
+ * if pages belonging to this object are discarded when a client
+ * is evicted, some debug info will be printed, this flag will be set
+ * during processing the first discarded page, then avoid flooding
+ * debug message for lots of discarded pages.
+ *
+ * \see ll_dirty_page_discard_warn.
+ */
+ unsigned int vob_discard_page_warned:1;
+};
+
+/**
+ * VVP-private page state.
+ */
+struct vvp_page {
+ struct cl_page_slice vpg_cl;
+ int vpg_defer_uptodate;
+ int vpg_ra_used;
+ int vpg_write_queued;
+ /**
+ * Non-empty iff this page is already counted in
+ * vvp_object::vob_pending_list. This list is only used as a flag,
+ * that is, never iterated through, only checked for list_empty(), but
+ * having a list is useful for debugging.
+ */
+ struct list_head vpg_pending_linkage;
+ /** VM page */
+ struct page *vpg_page;
+};
+
+static inline struct vvp_page *cl2vvp_page(const struct cl_page_slice *slice)
+{
+ return container_of(slice, struct vvp_page, vpg_cl);
+}
+
+static inline pgoff_t vvp_index(struct vvp_page *vvp)
+{
+ return vvp->vpg_cl.cpl_index;
+}
+
+struct vvp_device {
+ struct cl_device vdv_cl;
+ struct super_block *vdv_sb;
+ struct cl_device *vdv_next;
+};
+
+struct vvp_lock {
+ struct cl_lock_slice vlk_cl;
+};
+
+struct vvp_req {
+ struct cl_req_slice vrq_cl;
+};
+
+void *ccc_key_init(const struct lu_context *ctx,
+ struct lu_context_key *key);
+void ccc_key_fini(const struct lu_context *ctx,
+ struct lu_context_key *key, void *data);
+
+void ccc_umount(const struct lu_env *env, struct cl_device *dev);
+
+static inline struct lu_device *vvp2lu_dev(struct vvp_device *vdv)
+{
+ return &vdv->vdv_cl.cd_lu_dev;
+}
+
+static inline struct vvp_device *lu2vvp_dev(const struct lu_device *d)
+{
+ return container_of0(d, struct vvp_device, vdv_cl.cd_lu_dev);
+}
+
+static inline struct vvp_device *cl2vvp_dev(const struct cl_device *d)
+{
+ return container_of0(d, struct vvp_device, vdv_cl);
+}
+
+static inline struct vvp_object *cl2vvp(const struct cl_object *obj)
+{
+ return container_of0(obj, struct vvp_object, vob_cl);
+}
+
+static inline struct vvp_object *lu2vvp(const struct lu_object *obj)
+{
+ return container_of0(obj, struct vvp_object, vob_cl.co_lu);
+}
+
+static inline struct inode *vvp_object_inode(const struct cl_object *obj)
+{
+ return cl2vvp(obj)->vob_inode;
+}
+
+int vvp_object_invariant(const struct cl_object *obj);
+struct vvp_object *cl_inode2vvp(struct inode *inode);
+
+static inline struct page *cl2vm_page(const struct cl_page_slice *slice)
+{
+ return cl2vvp_page(slice)->vpg_page;
+}
+
+static inline struct vvp_lock *cl2vvp_lock(const struct cl_lock_slice *slice)
+{
+ return container_of(slice, struct vvp_lock, vlk_cl);
+}
+
+# define CLOBINVRNT(env, clob, expr) \
+ ((void)sizeof(env), (void)sizeof(clob), (void)sizeof(!!(expr)))
+
+/**
+ * New interfaces to get and put lov_stripe_md from lov layer. This violates
+ * layering because lov_stripe_md is supposed to be a private data in lov.
+ *
+ * NB: If you find you have to use these interfaces for your new code, please
+ * think about it again. These interfaces may be removed in the future for
+ * better layering.
+ */
+struct lov_stripe_md *lov_lsm_get(struct cl_object *clobj);
+void lov_lsm_put(struct cl_object *clobj, struct lov_stripe_md *lsm);
+int lov_read_and_clear_async_rc(struct cl_object *clob);
+
+struct lov_stripe_md *ccc_inode_lsm_get(struct inode *inode);
+void ccc_inode_lsm_put(struct inode *inode, struct lov_stripe_md *lsm);
+
+int vvp_io_init(const struct lu_env *env, struct cl_object *obj,
+ struct cl_io *io);
+int vvp_io_write_commit(const struct lu_env *env, struct cl_io *io);
+int vvp_lock_init(const struct lu_env *env, struct cl_object *obj,
+ struct cl_lock *lock, const struct cl_io *io);
int vvp_page_init(const struct lu_env *env, struct cl_object *obj,
- struct cl_page *page, struct page *vmpage);
+ struct cl_page *page, pgoff_t index);
+int vvp_req_init(const struct lu_env *env, struct cl_device *dev,
+ struct cl_req *req);
struct lu_object *vvp_object_alloc(const struct lu_env *env,
const struct lu_object_header *hdr,
struct lu_device *dev);
-struct ccc_object *cl_inode2ccc(struct inode *inode);
+int vvp_global_init(void);
+void vvp_global_fini(void);
extern const struct file_operations vvp_dump_pgcache_file_ops;
diff --git a/drivers/staging/lustre/lustre/llite/vvp_io.c b/drivers/staging/lustre/lustre/llite/vvp_io.c
index 85a835976174..94916dcc6caa 100644
--- a/drivers/staging/lustre/lustre/llite/vvp_io.c
+++ b/drivers/staging/lustre/lustre/llite/vvp_io.c
@@ -15,11 +15,7 @@
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
*
* GPL HEADER END
*/
@@ -44,21 +40,30 @@
#include "../include/obd.h"
#include "../include/lustre_lite.h"
+#include "llite_internal.h"
#include "vvp_internal.h"
static struct vvp_io *cl2vvp_io(const struct lu_env *env,
- const struct cl_io_slice *slice);
+ const struct cl_io_slice *slice)
+{
+ struct vvp_io *vio;
+
+ vio = container_of(slice, struct vvp_io, vui_cl);
+ LASSERT(vio == vvp_env_io(env));
+
+ return vio;
+}
/**
* True, if \a io is a normal io, False for splice_{read,write}
*/
-int cl_is_normalio(const struct lu_env *env, const struct cl_io *io)
+static int cl_is_normalio(const struct lu_env *env, const struct cl_io *io)
{
struct vvp_io *vio = vvp_env_io(env);
LASSERT(io->ci_type == CIT_READ || io->ci_type == CIT_WRITE);
- return vio->cui_io_subtype == IO_NORMAL;
+ return vio->vui_io_subtype == IO_NORMAL;
}
/**
@@ -71,7 +76,7 @@ static bool can_populate_pages(const struct lu_env *env, struct cl_io *io,
struct inode *inode)
{
struct ll_inode_info *lli = ll_i2info(inode);
- struct ccc_io *cio = ccc_env_io(env);
+ struct vvp_io *vio = vvp_env_io(env);
bool rc = true;
switch (io->ci_type) {
@@ -80,7 +85,7 @@ static bool can_populate_pages(const struct lu_env *env, struct cl_io *io,
/* don't need lock here to check lli_layout_gen as we have held
* extent lock and GROUP lock has to hold to swap layout
*/
- if (ll_layout_version_get(lli) != cio->cui_layout_gen) {
+ if (ll_layout_version_get(lli) != vio->vui_layout_gen) {
io->ci_need_restart = 1;
/* this will return application a short read/write */
io->ci_continue = 0;
@@ -95,20 +100,187 @@ static bool can_populate_pages(const struct lu_env *env, struct cl_io *io,
return rc;
}
+static void vvp_object_size_lock(struct cl_object *obj)
+{
+ struct inode *inode = vvp_object_inode(obj);
+
+ ll_inode_size_lock(inode);
+ cl_object_attr_lock(obj);
+}
+
+static void vvp_object_size_unlock(struct cl_object *obj)
+{
+ struct inode *inode = vvp_object_inode(obj);
+
+ cl_object_attr_unlock(obj);
+ ll_inode_size_unlock(inode);
+}
+
+/**
+ * Helper function that if necessary adjusts file size (inode->i_size), when
+ * position at the offset \a pos is accessed. File size can be arbitrary stale
+ * on a Lustre client, but client at least knows KMS. If accessed area is
+ * inside [0, KMS], set file size to KMS, otherwise glimpse file size.
+ *
+ * Locking: cl_isize_lock is used to serialize changes to inode size and to
+ * protect consistency between inode size and cl_object
+ * attributes. cl_object_size_lock() protects consistency between cl_attr's of
+ * top-object and sub-objects.
+ */
+static int vvp_prep_size(const struct lu_env *env, struct cl_object *obj,
+ struct cl_io *io, loff_t start, size_t count,
+ int *exceed)
+{
+ struct cl_attr *attr = vvp_env_thread_attr(env);
+ struct inode *inode = vvp_object_inode(obj);
+ loff_t pos = start + count - 1;
+ loff_t kms;
+ int result;
+
+ /*
+ * Consistency guarantees: following possibilities exist for the
+ * relation between region being accessed and real file size at this
+ * moment:
+ *
+ * (A): the region is completely inside of the file;
+ *
+ * (B-x): x bytes of region are inside of the file, the rest is
+ * outside;
+ *
+ * (C): the region is completely outside of the file.
+ *
+ * This classification is stable under DLM lock already acquired by
+ * the caller, because to change the class, other client has to take
+ * DLM lock conflicting with our lock. Also, any updates to ->i_size
+ * by other threads on this client are serialized by
+ * ll_inode_size_lock(). This guarantees that short reads are handled
+ * correctly in the face of concurrent writes and truncates.
+ */
+ vvp_object_size_lock(obj);
+ result = cl_object_attr_get(env, obj, attr);
+ if (result == 0) {
+ kms = attr->cat_kms;
+ if (pos > kms) {
+ /*
+ * A glimpse is necessary to determine whether we
+ * return a short read (B) or some zeroes at the end
+ * of the buffer (C)
+ */
+ vvp_object_size_unlock(obj);
+ result = cl_glimpse_lock(env, io, inode, obj, 0);
+ if (result == 0 && exceed) {
+ /* If objective page index exceed end-of-file
+ * page index, return directly. Do not expect
+ * kernel will check such case correctly.
+ * linux-2.6.18-128.1.1 miss to do that.
+ * --bug 17336
+ */
+ loff_t size = i_size_read(inode);
+ loff_t cur_index = start >> PAGE_SHIFT;
+ loff_t size_index = (size - 1) >> PAGE_SHIFT;
+
+ if ((size == 0 && cur_index != 0) ||
+ size_index < cur_index)
+ *exceed = 1;
+ }
+ return result;
+ }
+ /*
+ * region is within kms and, hence, within real file
+ * size (A). We need to increase i_size to cover the
+ * read region so that generic_file_read() will do its
+ * job, but that doesn't mean the kms size is
+ * _correct_, it is only the _minimum_ size. If
+ * someone does a stat they will get the correct size
+ * which will always be >= the kms value here.
+ * b=11081
+ */
+ if (i_size_read(inode) < kms) {
+ i_size_write(inode, kms);
+ CDEBUG(D_VFSTRACE, DFID " updating i_size %llu\n",
+ PFID(lu_object_fid(&obj->co_lu)),
+ (__u64)i_size_read(inode));
+ }
+ }
+
+ vvp_object_size_unlock(obj);
+
+ return result;
+}
+
/*****************************************************************************
*
* io operations.
*
*/
+static int vvp_io_one_lock_index(const struct lu_env *env, struct cl_io *io,
+ __u32 enqflags, enum cl_lock_mode mode,
+ pgoff_t start, pgoff_t end)
+{
+ struct vvp_io *vio = vvp_env_io(env);
+ struct cl_lock_descr *descr = &vio->vui_link.cill_descr;
+ struct cl_object *obj = io->ci_obj;
+
+ CLOBINVRNT(env, obj, vvp_object_invariant(obj));
+
+ CDEBUG(D_VFSTRACE, "lock: %d [%lu, %lu]\n", mode, start, end);
+
+ memset(&vio->vui_link, 0, sizeof(vio->vui_link));
+
+ if (vio->vui_fd && (vio->vui_fd->fd_flags & LL_FILE_GROUP_LOCKED)) {
+ descr->cld_mode = CLM_GROUP;
+ descr->cld_gid = vio->vui_fd->fd_grouplock.lg_gid;
+ } else {
+ descr->cld_mode = mode;
+ }
+ descr->cld_obj = obj;
+ descr->cld_start = start;
+ descr->cld_end = end;
+ descr->cld_enq_flags = enqflags;
+
+ cl_io_lock_add(env, io, &vio->vui_link);
+ return 0;
+}
+
+static int vvp_io_one_lock(const struct lu_env *env, struct cl_io *io,
+ __u32 enqflags, enum cl_lock_mode mode,
+ loff_t start, loff_t end)
+{
+ struct cl_object *obj = io->ci_obj;
+
+ return vvp_io_one_lock_index(env, io, enqflags, mode,
+ cl_index(obj, start), cl_index(obj, end));
+}
+
+static int vvp_io_write_iter_init(const struct lu_env *env,
+ const struct cl_io_slice *ios)
+{
+ struct vvp_io *vio = cl2vvp_io(env, ios);
+
+ cl_page_list_init(&vio->u.write.vui_queue);
+ vio->u.write.vui_written = 0;
+ vio->u.write.vui_from = 0;
+ vio->u.write.vui_to = PAGE_SIZE;
+
+ return 0;
+}
+
+static void vvp_io_write_iter_fini(const struct lu_env *env,
+ const struct cl_io_slice *ios)
+{
+ struct vvp_io *vio = cl2vvp_io(env, ios);
+
+ LASSERT(vio->u.write.vui_queue.pl_nr == 0);
+}
+
static int vvp_io_fault_iter_init(const struct lu_env *env,
const struct cl_io_slice *ios)
{
struct vvp_io *vio = cl2vvp_io(env, ios);
- struct inode *inode = ccc_object_inode(ios->cis_obj);
+ struct inode *inode = vvp_object_inode(ios->cis_obj);
- LASSERT(inode ==
- file_inode(cl2ccc_io(env, ios)->cui_fd->fd_file));
+ LASSERT(inode == file_inode(vio->vui_fd->fd_file));
vio->u.fault.ft_mtime = inode->i_mtime.tv_sec;
return 0;
}
@@ -117,15 +289,16 @@ static void vvp_io_fini(const struct lu_env *env, const struct cl_io_slice *ios)
{
struct cl_io *io = ios->cis_io;
struct cl_object *obj = io->ci_obj;
- struct ccc_io *cio = cl2ccc_io(env, ios);
+ struct vvp_io *vio = cl2vvp_io(env, ios);
+ struct inode *inode = vvp_object_inode(obj);
- CLOBINVRNT(env, obj, ccc_object_invariant(obj));
+ CLOBINVRNT(env, obj, vvp_object_invariant(obj));
CDEBUG(D_VFSTRACE, DFID
" ignore/verify layout %d/%d, layout version %d restore needed %d\n",
PFID(lu_object_fid(&obj->co_lu)),
io->ci_ignore_layout, io->ci_verify_layout,
- cio->cui_layout_gen, io->ci_restore_needed);
+ vio->vui_layout_gen, io->ci_restore_needed);
if (io->ci_restore_needed == 1) {
int rc;
@@ -133,7 +306,7 @@ static void vvp_io_fini(const struct lu_env *env, const struct cl_io_slice *ios)
/* file was detected release, we need to restore it
* before finishing the io
*/
- rc = ll_layout_restore(ccc_object_inode(obj));
+ rc = ll_layout_restore(inode, 0, OBD_OBJECT_EOF);
/* if restore registration failed, no restart,
* we will return -ENODATA
*/
@@ -159,16 +332,16 @@ static void vvp_io_fini(const struct lu_env *env, const struct cl_io_slice *ios)
__u32 gen = 0;
/* check layout version */
- ll_layout_refresh(ccc_object_inode(obj), &gen);
- io->ci_need_restart = cio->cui_layout_gen != gen;
+ ll_layout_refresh(inode, &gen);
+ io->ci_need_restart = vio->vui_layout_gen != gen;
if (io->ci_need_restart) {
CDEBUG(D_VFSTRACE,
DFID" layout changed from %d to %d.\n",
PFID(lu_object_fid(&obj->co_lu)),
- cio->cui_layout_gen, gen);
+ vio->vui_layout_gen, gen);
/* today successful restore is the only possible case */
/* restore was done, clear restoring state */
- ll_i2info(ccc_object_inode(obj))->lli_flags &=
+ ll_i2info(vvp_object_inode(obj))->lli_flags &=
~LLIF_FILE_RESTORING;
}
}
@@ -180,7 +353,7 @@ static void vvp_io_fault_fini(const struct lu_env *env,
struct cl_io *io = ios->cis_io;
struct cl_page *page = io->u.ci_fault.ft_page;
- CLOBINVRNT(env, io->ci_obj, ccc_object_invariant(io->ci_obj));
+ CLOBINVRNT(env, io->ci_obj, vvp_object_invariant(io->ci_obj));
if (page) {
lu_ref_del(&page->cp_reference, "fault", io);
@@ -203,16 +376,16 @@ static enum cl_lock_mode vvp_mode_from_vma(struct vm_area_struct *vma)
}
static int vvp_mmap_locks(const struct lu_env *env,
- struct ccc_io *vio, struct cl_io *io)
+ struct vvp_io *vio, struct cl_io *io)
{
- struct ccc_thread_info *cti = ccc_env_info(env);
+ struct vvp_thread_info *cti = vvp_env_info(env);
struct mm_struct *mm = current->mm;
struct vm_area_struct *vma;
- struct cl_lock_descr *descr = &cti->cti_descr;
+ struct cl_lock_descr *descr = &cti->vti_descr;
ldlm_policy_data_t policy;
unsigned long addr;
ssize_t count;
- int result;
+ int result = 0;
struct iov_iter i;
struct iovec iov;
@@ -221,21 +394,21 @@ static int vvp_mmap_locks(const struct lu_env *env,
if (!cl_is_normalio(env, io))
return 0;
- if (!vio->cui_iter) /* nfs or loop back device write */
+ if (!vio->vui_iter) /* nfs or loop back device write */
return 0;
/* No MM (e.g. NFS)? No vmas too. */
if (!mm)
return 0;
- iov_for_each(iov, i, *(vio->cui_iter)) {
+ iov_for_each(iov, i, *vio->vui_iter) {
addr = (unsigned long)iov.iov_base;
count = iov.iov_len;
if (count == 0)
continue;
- count += addr & (~CFS_PAGE_MASK);
- addr &= CFS_PAGE_MASK;
+ count += addr & (~PAGE_MASK);
+ addr &= PAGE_MASK;
down_read(&mm->mmap_sem);
while ((vma = our_vma(mm, addr, count)) != NULL) {
@@ -244,10 +417,10 @@ static int vvp_mmap_locks(const struct lu_env *env,
if (ll_file_nolock(vma->vm_file)) {
/*
- * For no lock case, a lockless lock will be
- * generated.
+ * For no lock case is not allowed for mmap
*/
- flags = CEF_NEVER;
+ result = -EINVAL;
+ break;
}
/*
@@ -269,10 +442,8 @@ static int vvp_mmap_locks(const struct lu_env *env,
descr->cld_mode, descr->cld_start,
descr->cld_end);
- if (result < 0) {
- up_read(&mm->mmap_sem);
- return result;
- }
+ if (result < 0)
+ break;
if (vma->vm_end - addr >= count)
break;
@@ -281,26 +452,55 @@ static int vvp_mmap_locks(const struct lu_env *env,
addr = vma->vm_end;
}
up_read(&mm->mmap_sem);
+ if (result < 0)
+ break;
}
- return 0;
+ return result;
+}
+
+static void vvp_io_advance(const struct lu_env *env,
+ const struct cl_io_slice *ios,
+ size_t nob)
+{
+ struct vvp_io *vio = cl2vvp_io(env, ios);
+ struct cl_io *io = ios->cis_io;
+ struct cl_object *obj = ios->cis_io->ci_obj;
+
+ CLOBINVRNT(env, obj, vvp_object_invariant(obj));
+
+ if (!cl_is_normalio(env, io))
+ return;
+
+ iov_iter_reexpand(vio->vui_iter, vio->vui_tot_count -= nob);
+}
+
+static void vvp_io_update_iov(const struct lu_env *env,
+ struct vvp_io *vio, struct cl_io *io)
+{
+ size_t size = io->u.ci_rw.crw_count;
+
+ if (!cl_is_normalio(env, io) || !vio->vui_iter)
+ return;
+
+ iov_iter_truncate(vio->vui_iter, size);
}
static int vvp_io_rw_lock(const struct lu_env *env, struct cl_io *io,
enum cl_lock_mode mode, loff_t start, loff_t end)
{
- struct ccc_io *cio = ccc_env_io(env);
+ struct vvp_io *vio = vvp_env_io(env);
int result;
int ast_flags = 0;
LASSERT(io->ci_type == CIT_READ || io->ci_type == CIT_WRITE);
- ccc_io_update_iov(env, cio, io);
+ vvp_io_update_iov(env, vio, io);
if (io->u.ci_rw.crw_nonblock)
ast_flags |= CEF_NONBLOCK;
- result = vvp_mmap_locks(env, cio, io);
+ result = vvp_mmap_locks(env, vio, io);
if (result == 0)
- result = ccc_io_one_lock(env, io, ast_flags, mode, start, end);
+ result = vvp_io_one_lock(env, io, ast_flags, mode, start, end);
return result;
}
@@ -325,9 +525,11 @@ static int vvp_io_fault_lock(const struct lu_env *env,
/*
* XXX LDLM_FL_CBPENDING
*/
- return ccc_io_one_lock_index
- (env, io, 0, vvp_mode_from_vma(vio->u.fault.ft_vma),
- io->u.ci_fault.ft_index, io->u.ci_fault.ft_index);
+ return vvp_io_one_lock_index(env,
+ io, 0,
+ vvp_mode_from_vma(vio->u.fault.ft_vma),
+ io->u.ci_fault.ft_index,
+ io->u.ci_fault.ft_index);
}
static int vvp_io_write_lock(const struct lu_env *env,
@@ -354,14 +556,13 @@ static int vvp_io_setattr_iter_init(const struct lu_env *env,
}
/**
- * Implementation of cl_io_operations::cio_lock() method for CIT_SETATTR io.
+ * Implementation of cl_io_operations::vio_lock() method for CIT_SETATTR io.
*
* Handles "lockless io" mode when extent locking is done by server.
*/
static int vvp_io_setattr_lock(const struct lu_env *env,
const struct cl_io_slice *ios)
{
- struct ccc_io *cio = ccc_env_io(env);
struct cl_io *io = ios->cis_io;
__u64 new_size;
__u32 enqflags = 0;
@@ -378,8 +579,8 @@ static int vvp_io_setattr_lock(const struct lu_env *env,
return 0;
new_size = 0;
}
- cio->u.setattr.cui_local_lock = SETATTR_EXTENT_LOCK;
- return ccc_io_one_lock(env, io, enqflags, CLM_WRITE,
+
+ return vvp_io_one_lock(env, io, enqflags, CLM_WRITE,
new_size, OBD_OBJECT_EOF);
}
@@ -413,7 +614,7 @@ static int vvp_io_setattr_time(const struct lu_env *env,
{
struct cl_io *io = ios->cis_io;
struct cl_object *obj = io->ci_obj;
- struct cl_attr *attr = ccc_env_thread_attr(env);
+ struct cl_attr *attr = vvp_env_thread_attr(env);
int result;
unsigned valid = CAT_CTIME;
@@ -437,7 +638,7 @@ static int vvp_io_setattr_start(const struct lu_env *env,
const struct cl_io_slice *ios)
{
struct cl_io *io = ios->cis_io;
- struct inode *inode = ccc_object_inode(io->ci_obj);
+ struct inode *inode = vvp_object_inode(io->ci_obj);
int result = 0;
inode_lock(inode);
@@ -453,7 +654,7 @@ static void vvp_io_setattr_end(const struct lu_env *env,
const struct cl_io_slice *ios)
{
struct cl_io *io = ios->cis_io;
- struct inode *inode = ccc_object_inode(io->ci_obj);
+ struct inode *inode = vvp_object_inode(io->ci_obj);
if (cl_io_is_trunc(io))
/* Truncate in memory pages - they must be clean pages
@@ -474,27 +675,25 @@ static int vvp_io_read_start(const struct lu_env *env,
const struct cl_io_slice *ios)
{
struct vvp_io *vio = cl2vvp_io(env, ios);
- struct ccc_io *cio = cl2ccc_io(env, ios);
struct cl_io *io = ios->cis_io;
struct cl_object *obj = io->ci_obj;
- struct inode *inode = ccc_object_inode(obj);
- struct ll_ra_read *bead = &vio->cui_bead;
- struct file *file = cio->cui_fd->fd_file;
+ struct inode *inode = vvp_object_inode(obj);
+ struct file *file = vio->vui_fd->fd_file;
int result;
loff_t pos = io->u.ci_rd.rd.crw_pos;
long cnt = io->u.ci_rd.rd.crw_count;
- long tot = cio->cui_tot_count;
+ long tot = vio->vui_tot_count;
int exceed = 0;
- CLOBINVRNT(env, obj, ccc_object_invariant(obj));
+ CLOBINVRNT(env, obj, vvp_object_invariant(obj));
CDEBUG(D_VFSTRACE, "read: -> [%lli, %lli)\n", pos, pos + cnt);
if (!can_populate_pages(env, io, inode))
return 0;
- result = ccc_prep_size(env, obj, io, pos, tot, &exceed);
+ result = vvp_prep_size(env, obj, io, pos, tot, &exceed);
if (result != 0)
return result;
else if (exceed != 0)
@@ -505,30 +704,27 @@ static int vvp_io_read_start(const struct lu_env *env,
inode->i_ino, cnt, pos, i_size_read(inode));
/* turn off the kernel's read-ahead */
- cio->cui_fd->fd_file->f_ra.ra_pages = 0;
+ vio->vui_fd->fd_file->f_ra.ra_pages = 0;
/* initialize read-ahead window once per syscall */
- if (!vio->cui_ra_window_set) {
- vio->cui_ra_window_set = 1;
- bead->lrr_start = cl_index(obj, pos);
- /*
- * XXX: explicit PAGE_SIZE
- */
- bead->lrr_count = cl_index(obj, tot + PAGE_SIZE - 1);
- ll_ra_read_in(file, bead);
+ if (!vio->vui_ra_valid) {
+ vio->vui_ra_valid = true;
+ vio->vui_ra_start = cl_index(obj, pos);
+ vio->vui_ra_count = cl_index(obj, tot + PAGE_SIZE - 1);
+ ll_ras_enter(file);
}
/* BUG: 5972 */
file_accessed(file);
- switch (vio->cui_io_subtype) {
+ switch (vio->vui_io_subtype) {
case IO_NORMAL:
- LASSERT(cio->cui_iocb->ki_pos == pos);
- result = generic_file_read_iter(cio->cui_iocb, cio->cui_iter);
+ LASSERT(vio->vui_iocb->ki_pos == pos);
+ result = generic_file_read_iter(vio->vui_iocb, vio->vui_iter);
break;
case IO_SPLICE:
result = generic_file_splice_read(file, &pos,
- vio->u.splice.cui_pipe, cnt,
- vio->u.splice.cui_flags);
+ vio->u.splice.vui_pipe, cnt,
+ vio->u.splice.vui_flags);
/* LU-1109: do splice read stripe by stripe otherwise if it
* may make nfsd stuck if this read occupied all internal pipe
* buffers.
@@ -536,7 +732,7 @@ static int vvp_io_read_start(const struct lu_env *env,
io->ci_continue = 0;
break;
default:
- CERROR("Wrong IO type %u\n", vio->cui_io_subtype);
+ CERROR("Wrong IO type %u\n", vio->vui_io_subtype);
LBUG();
}
@@ -546,30 +742,201 @@ out:
io->ci_continue = 0;
io->ci_nob += result;
ll_rw_stats_tally(ll_i2sbi(inode), current->pid,
- cio->cui_fd, pos, result, READ);
+ vio->vui_fd, pos, result, READ);
result = 0;
}
return result;
}
-static void vvp_io_read_fini(const struct lu_env *env, const struct cl_io_slice *ios)
+static int vvp_io_commit_sync(const struct lu_env *env, struct cl_io *io,
+ struct cl_page_list *plist, int from, int to)
{
- struct vvp_io *vio = cl2vvp_io(env, ios);
- struct ccc_io *cio = cl2ccc_io(env, ios);
+ struct cl_2queue *queue = &io->ci_queue;
+ struct cl_page *page;
+ unsigned int bytes = 0;
+ int rc = 0;
- if (vio->cui_ra_window_set)
- ll_ra_read_ex(cio->cui_fd->fd_file, &vio->cui_bead);
+ if (plist->pl_nr == 0)
+ return 0;
- vvp_io_fini(env, ios);
+ if (from > 0 || to != PAGE_SIZE) {
+ page = cl_page_list_first(plist);
+ if (plist->pl_nr == 1) {
+ cl_page_clip(env, page, from, to);
+ } else {
+ if (from > 0)
+ cl_page_clip(env, page, from, PAGE_SIZE);
+ if (to != PAGE_SIZE) {
+ page = cl_page_list_last(plist);
+ cl_page_clip(env, page, 0, to);
+ }
+ }
+ }
+
+ cl_2queue_init(queue);
+ cl_page_list_splice(plist, &queue->c2_qin);
+ rc = cl_io_submit_sync(env, io, CRT_WRITE, queue, 0);
+
+ /* plist is not sorted any more */
+ cl_page_list_splice(&queue->c2_qin, plist);
+ cl_page_list_splice(&queue->c2_qout, plist);
+ cl_2queue_fini(env, queue);
+
+ if (rc == 0) {
+ /* calculate bytes */
+ bytes = plist->pl_nr << PAGE_SHIFT;
+ bytes -= from + PAGE_SIZE - to;
+
+ while (plist->pl_nr > 0) {
+ page = cl_page_list_first(plist);
+ cl_page_list_del(env, plist, page);
+
+ cl_page_clip(env, page, 0, PAGE_SIZE);
+
+ SetPageUptodate(cl_page_vmpage(page));
+ cl_page_disown(env, io, page);
+
+ /* held in ll_cl_init() */
+ lu_ref_del(&page->cp_reference, "cl_io", io);
+ cl_page_put(env, page);
+ }
+ }
+
+ return bytes > 0 ? bytes : rc;
+}
+
+static void write_commit_callback(const struct lu_env *env, struct cl_io *io,
+ struct cl_page *page)
+{
+ struct vvp_page *vpg;
+ struct page *vmpage = page->cp_vmpage;
+ struct cl_object *clob = cl_io_top(io)->ci_obj;
+
+ SetPageUptodate(vmpage);
+ set_page_dirty(vmpage);
+
+ vpg = cl2vvp_page(cl_object_page_slice(clob, page));
+ vvp_write_pending(cl2vvp(clob), vpg);
+
+ cl_page_disown(env, io, page);
+
+ /* held in ll_cl_init() */
+ lu_ref_del(&page->cp_reference, "cl_io", io);
+ cl_page_put(env, page);
+}
+
+/* make sure the page list is contiguous */
+static bool page_list_sanity_check(struct cl_object *obj,
+ struct cl_page_list *plist)
+{
+ struct cl_page *page;
+ pgoff_t index = CL_PAGE_EOF;
+
+ cl_page_list_for_each(page, plist) {
+ struct vvp_page *vpg = cl_object_page_slice(obj, page);
+
+ if (index == CL_PAGE_EOF) {
+ index = vvp_index(vpg);
+ continue;
+ }
+
+ ++index;
+ if (index == vvp_index(vpg))
+ continue;
+
+ return false;
+ }
+ return true;
+}
+
+/* Return how many bytes have queued or written */
+int vvp_io_write_commit(const struct lu_env *env, struct cl_io *io)
+{
+ struct cl_object *obj = io->ci_obj;
+ struct inode *inode = vvp_object_inode(obj);
+ struct vvp_io *vio = vvp_env_io(env);
+ struct cl_page_list *queue = &vio->u.write.vui_queue;
+ struct cl_page *page;
+ int rc = 0;
+ int bytes = 0;
+ unsigned int npages = vio->u.write.vui_queue.pl_nr;
+
+ if (npages == 0)
+ return 0;
+
+ CDEBUG(D_VFSTRACE, "commit async pages: %d, from %d, to %d\n",
+ npages, vio->u.write.vui_from, vio->u.write.vui_to);
+
+ LASSERT(page_list_sanity_check(obj, queue));
+
+ /* submit IO with async write */
+ rc = cl_io_commit_async(env, io, queue,
+ vio->u.write.vui_from, vio->u.write.vui_to,
+ write_commit_callback);
+ npages -= queue->pl_nr; /* already committed pages */
+ if (npages > 0) {
+ /* calculate how many bytes were written */
+ bytes = npages << PAGE_SHIFT;
+
+ /* first page */
+ bytes -= vio->u.write.vui_from;
+ if (queue->pl_nr == 0) /* last page */
+ bytes -= PAGE_SIZE - vio->u.write.vui_to;
+ LASSERTF(bytes > 0, "bytes = %d, pages = %d\n", bytes, npages);
+
+ vio->u.write.vui_written += bytes;
+
+ CDEBUG(D_VFSTRACE, "Committed %d pages %d bytes, tot: %ld\n",
+ npages, bytes, vio->u.write.vui_written);
+
+ /* the first page must have been written. */
+ vio->u.write.vui_from = 0;
+ }
+ LASSERT(page_list_sanity_check(obj, queue));
+ LASSERT(ergo(rc == 0, queue->pl_nr == 0));
+
+ /* out of quota, try sync write */
+ if (rc == -EDQUOT && !cl_io_is_mkwrite(io)) {
+ rc = vvp_io_commit_sync(env, io, queue,
+ vio->u.write.vui_from,
+ vio->u.write.vui_to);
+ if (rc > 0) {
+ vio->u.write.vui_written += rc;
+ rc = 0;
+ }
+ }
+
+ /* update inode size */
+ ll_merge_attr(env, inode);
+
+ /* Now the pages in queue were failed to commit, discard them
+ * unless they were dirtied before.
+ */
+ while (queue->pl_nr > 0) {
+ page = cl_page_list_first(queue);
+ cl_page_list_del(env, queue, page);
+
+ if (!PageDirty(cl_page_vmpage(page)))
+ cl_page_discard(env, io, page);
+
+ cl_page_disown(env, io, page);
+
+ /* held in ll_cl_init() */
+ lu_ref_del(&page->cp_reference, "cl_io", io);
+ cl_page_put(env, page);
+ }
+ cl_page_list_fini(env, queue);
+
+ return rc;
}
static int vvp_io_write_start(const struct lu_env *env,
const struct cl_io_slice *ios)
{
- struct ccc_io *cio = cl2ccc_io(env, ios);
+ struct vvp_io *vio = cl2vvp_io(env, ios);
struct cl_io *io = ios->cis_io;
struct cl_object *obj = io->ci_obj;
- struct inode *inode = ccc_object_inode(obj);
+ struct inode *inode = vvp_object_inode(obj);
ssize_t result = 0;
loff_t pos = io->u.ci_wr.wr.crw_pos;
size_t cnt = io->u.ci_wr.wr.crw_count;
@@ -582,25 +949,42 @@ static int vvp_io_write_start(const struct lu_env *env,
* PARALLEL IO This has to be changed for parallel IO doing
* out-of-order writes.
*/
- pos = io->u.ci_wr.wr.crw_pos = i_size_read(inode);
- cio->cui_iocb->ki_pos = pos;
+ ll_merge_attr(env, inode);
+ pos = i_size_read(inode);
+ io->u.ci_wr.wr.crw_pos = pos;
+ vio->vui_iocb->ki_pos = pos;
} else {
- LASSERT(cio->cui_iocb->ki_pos == pos);
+ LASSERT(vio->vui_iocb->ki_pos == pos);
}
CDEBUG(D_VFSTRACE, "write: [%lli, %lli)\n", pos, pos + (long long)cnt);
- if (!cio->cui_iter) /* from a temp io in ll_cl_init(). */
+ if (!vio->vui_iter) /* from a temp io in ll_cl_init(). */
result = 0;
else
- result = generic_file_write_iter(cio->cui_iocb, cio->cui_iter);
+ result = generic_file_write_iter(vio->vui_iocb, vio->vui_iter);
+
+ if (result > 0) {
+ result = vvp_io_write_commit(env, io);
+ if (vio->u.write.vui_written > 0) {
+ result = vio->u.write.vui_written;
+ io->ci_nob += result;
+ CDEBUG(D_VFSTRACE, "write: nob %zd, result: %zd\n",
+ io->ci_nob, result);
+ }
+ }
if (result > 0) {
+ struct ll_inode_info *lli = ll_i2info(inode);
+
+ spin_lock(&lli->lli_lock);
+ lli->lli_flags |= LLIF_DATA_MODIFIED;
+ spin_unlock(&lli->lli_lock);
+
if (result < cnt)
io->ci_continue = 0;
- io->ci_nob += result;
ll_rw_stats_tally(ll_i2sbi(inode), current->pid,
- cio->cui_fd, pos, result, WRITE);
+ vio->vui_fd, pos, result, WRITE);
result = 0;
}
return result;
@@ -608,10 +992,10 @@ static int vvp_io_write_start(const struct lu_env *env,
static int vvp_io_kernel_fault(struct vvp_fault_io *cfio)
{
- struct vm_fault *vmf = cfio->fault.ft_vmf;
+ struct vm_fault *vmf = cfio->ft_vmf;
- cfio->fault.ft_flags = filemap_fault(cfio->ft_vma, vmf);
- cfio->fault.ft_flags_valid = 1;
+ cfio->ft_flags = filemap_fault(cfio->ft_vma, vmf);
+ cfio->ft_flags_valid = 1;
if (vmf->page) {
CDEBUG(D_PAGE,
@@ -619,39 +1003,51 @@ static int vvp_io_kernel_fault(struct vvp_fault_io *cfio)
vmf->page, vmf->page->mapping, vmf->page->index,
(long)vmf->page->flags, page_count(vmf->page),
page_private(vmf->page), vmf->virtual_address);
- if (unlikely(!(cfio->fault.ft_flags & VM_FAULT_LOCKED))) {
+ if (unlikely(!(cfio->ft_flags & VM_FAULT_LOCKED))) {
lock_page(vmf->page);
- cfio->fault.ft_flags |= VM_FAULT_LOCKED;
+ cfio->ft_flags |= VM_FAULT_LOCKED;
}
cfio->ft_vmpage = vmf->page;
return 0;
}
- if (cfio->fault.ft_flags & (VM_FAULT_SIGBUS | VM_FAULT_SIGSEGV)) {
+ if (cfio->ft_flags & (VM_FAULT_SIGBUS | VM_FAULT_SIGSEGV)) {
CDEBUG(D_PAGE, "got addr %p - SIGBUS\n", vmf->virtual_address);
return -EFAULT;
}
- if (cfio->fault.ft_flags & VM_FAULT_OOM) {
+ if (cfio->ft_flags & VM_FAULT_OOM) {
CDEBUG(D_PAGE, "got addr %p - OOM\n", vmf->virtual_address);
return -ENOMEM;
}
- if (cfio->fault.ft_flags & VM_FAULT_RETRY)
+ if (cfio->ft_flags & VM_FAULT_RETRY)
return -EAGAIN;
- CERROR("Unknown error in page fault %d!\n", cfio->fault.ft_flags);
+ CERROR("Unknown error in page fault %d!\n", cfio->ft_flags);
return -EINVAL;
}
+static void mkwrite_commit_callback(const struct lu_env *env, struct cl_io *io,
+ struct cl_page *page)
+{
+ struct vvp_page *vpg;
+ struct cl_object *clob = cl_io_top(io)->ci_obj;
+
+ set_page_dirty(page->cp_vmpage);
+
+ vpg = cl2vvp_page(cl_object_page_slice(clob, page));
+ vvp_write_pending(cl2vvp(clob), vpg);
+}
+
static int vvp_io_fault_start(const struct lu_env *env,
const struct cl_io_slice *ios)
{
struct vvp_io *vio = cl2vvp_io(env, ios);
struct cl_io *io = ios->cis_io;
struct cl_object *obj = io->ci_obj;
- struct inode *inode = ccc_object_inode(obj);
+ struct inode *inode = vvp_object_inode(obj);
struct cl_fault_io *fio = &io->u.ci_fault;
struct vvp_fault_io *cfio = &vio->u.fault;
loff_t offset;
@@ -659,7 +1055,7 @@ static int vvp_io_fault_start(const struct lu_env *env,
struct page *vmpage = NULL;
struct cl_page *page;
loff_t size;
- pgoff_t last; /* last page in a file data region */
+ pgoff_t last_index;
if (fio->ft_executable &&
inode->i_mtime.tv_sec != vio->u.fault.ft_mtime)
@@ -670,7 +1066,7 @@ static int vvp_io_fault_start(const struct lu_env *env,
/* offset of the last byte on the page */
offset = cl_offset(obj, fio->ft_index + 1) - 1;
LASSERT(cl_index(obj, offset) == fio->ft_index);
- result = ccc_prep_size(env, obj, io, 0, offset + 1, NULL);
+ result = vvp_prep_size(env, obj, io, 0, offset + 1, NULL);
if (result != 0)
return result;
@@ -705,15 +1101,15 @@ static int vvp_io_fault_start(const struct lu_env *env,
goto out;
}
+ last_index = cl_index(obj, size - 1);
+
if (fio->ft_mkwrite) {
- pgoff_t last_index;
/*
* Capture the size while holding the lli_trunc_sem from above
* we want to make sure that we complete the mkwrite action
* while holding this lock. We need to make sure that we are
* not past the end of the file.
*/
- last_index = cl_index(obj, size - 1);
if (last_index < fio->ft_index) {
CDEBUG(D_PAGE,
"llite: mkwrite and truncate race happened: %p: 0x%lx 0x%lx\n",
@@ -745,25 +1141,32 @@ static int vvp_io_fault_start(const struct lu_env *env,
*/
if (fio->ft_mkwrite) {
wait_on_page_writeback(vmpage);
- if (set_page_dirty(vmpage)) {
- struct ccc_page *cp;
+ if (!PageDirty(vmpage)) {
+ struct cl_page_list *plist = &io->ci_queue.c2_qin;
+ struct vvp_page *vpg = cl_object_page_slice(obj, page);
+ int to = PAGE_SIZE;
/* vvp_page_assume() calls wait_on_page_writeback(). */
cl_page_assume(env, io, page);
- cp = cl2ccc_page(cl_page_at(page, &vvp_device_type));
- vvp_write_pending(cl2ccc(obj), cp);
+ cl_page_list_init(plist);
+ cl_page_list_add(plist, page);
+
+ /* size fixup */
+ if (last_index == vvp_index(vpg))
+ to = size & ~PAGE_MASK;
/* Do not set Dirty bit here so that in case IO is
* started before the page is really made dirty, we
* still have chance to detect it.
*/
- result = cl_page_cache_add(env, io, page, CRT_WRITE);
+ result = cl_io_commit_async(env, io, plist, 0, to,
+ mkwrite_commit_callback);
LASSERT(cl_page_is_owned(page, io));
+ cl_page_list_fini(env, plist);
vmpage = NULL;
if (result < 0) {
- cl_page_unmap(env, io, page);
cl_page_discard(env, io, page);
cl_page_disown(env, io, page);
@@ -773,20 +1176,20 @@ static int vvp_io_fault_start(const struct lu_env *env,
if (result == -EDQUOT)
result = -ENOSPC;
goto out;
- } else
+ } else {
cl_page_disown(env, io, page);
+ }
}
}
- last = cl_index(obj, size - 1);
/*
* The ft_index is only used in the case of
* a mkwrite action. We need to check
* our assertions are correct, since
* we should have caught this above
*/
- LASSERT(!fio->ft_mkwrite || fio->ft_index <= last);
- if (fio->ft_index == last)
+ LASSERT(!fio->ft_mkwrite || fio->ft_index <= last_index);
+ if (fio->ft_index == last_index)
/*
* Last page is mapped partially.
*/
@@ -801,7 +1204,9 @@ out:
/* return unlocked vmpage to avoid deadlocking */
if (vmpage)
unlock_page(vmpage);
- cfio->fault.ft_flags &= ~VM_FAULT_LOCKED;
+
+ cfio->ft_flags &= ~VM_FAULT_LOCKED;
+
return result;
}
@@ -820,293 +1225,58 @@ static int vvp_io_read_page(const struct lu_env *env,
const struct cl_page_slice *slice)
{
struct cl_io *io = ios->cis_io;
- struct cl_object *obj = slice->cpl_obj;
- struct ccc_page *cp = cl2ccc_page(slice);
+ struct vvp_page *vpg = cl2vvp_page(slice);
struct cl_page *page = slice->cpl_page;
- struct inode *inode = ccc_object_inode(obj);
+ struct inode *inode = vvp_object_inode(slice->cpl_obj);
struct ll_sb_info *sbi = ll_i2sbi(inode);
- struct ll_file_data *fd = cl2ccc_io(env, ios)->cui_fd;
+ struct ll_file_data *fd = cl2vvp_io(env, ios)->vui_fd;
struct ll_readahead_state *ras = &fd->fd_ras;
- struct page *vmpage = cp->cpg_page;
struct cl_2queue *queue = &io->ci_queue;
- int rc;
-
- CLOBINVRNT(env, obj, ccc_object_invariant(obj));
- LASSERT(slice->cpl_obj == obj);
if (sbi->ll_ra_info.ra_max_pages_per_file &&
sbi->ll_ra_info.ra_max_pages)
- ras_update(sbi, inode, ras, page->cp_index,
- cp->cpg_defer_uptodate);
-
- /* Sanity check whether the page is protected by a lock. */
- rc = cl_page_is_under_lock(env, io, page);
- if (rc != -EBUSY) {
- CL_PAGE_HEADER(D_WARNING, env, page, "%s: %d\n",
- rc == -ENODATA ? "without a lock" :
- "match failed", rc);
- if (rc != -ENODATA)
- return rc;
- }
+ ras_update(sbi, inode, ras, vvp_index(vpg),
+ vpg->vpg_defer_uptodate);
- if (cp->cpg_defer_uptodate) {
- cp->cpg_ra_used = 1;
+ if (vpg->vpg_defer_uptodate) {
+ vpg->vpg_ra_used = 1;
cl_page_export(env, page, 1);
}
/*
* Add page into the queue even when it is marked uptodate above.
* this will unlock it automatically as part of cl_page_list_disown().
*/
+
cl_page_list_add(&queue->c2_qin, page);
if (sbi->ll_ra_info.ra_max_pages_per_file &&
sbi->ll_ra_info.ra_max_pages)
- ll_readahead(env, io, ras,
- vmpage->mapping, &queue->c2_qin, fd->fd_flags);
+ ll_readahead(env, io, &queue->c2_qin, ras,
+ vpg->vpg_defer_uptodate);
return 0;
}
-static int vvp_page_sync_io(const struct lu_env *env, struct cl_io *io,
- struct cl_page *page, struct ccc_page *cp,
- enum cl_req_type crt)
+static void vvp_io_end(const struct lu_env *env, const struct cl_io_slice *ios)
{
- struct cl_2queue *queue;
- int result;
-
- LASSERT(io->ci_type == CIT_READ || io->ci_type == CIT_WRITE);
-
- queue = &io->ci_queue;
- cl_2queue_init_page(queue, page);
-
- result = cl_io_submit_sync(env, io, crt, queue, 0);
- LASSERT(cl_page_is_owned(page, io));
-
- if (crt == CRT_READ)
- /*
- * in CRT_WRITE case page is left locked even in case of
- * error.
- */
- cl_page_list_disown(env, io, &queue->c2_qin);
- cl_2queue_fini(env, queue);
-
- return result;
-}
-
-/**
- * Prepare partially written-to page for a write.
- */
-static int vvp_io_prepare_partial(const struct lu_env *env, struct cl_io *io,
- struct cl_object *obj, struct cl_page *pg,
- struct ccc_page *cp,
- unsigned from, unsigned to)
-{
- struct cl_attr *attr = ccc_env_thread_attr(env);
- loff_t offset = cl_offset(obj, pg->cp_index);
- int result;
-
- cl_object_attr_lock(obj);
- result = cl_object_attr_get(env, obj, attr);
- cl_object_attr_unlock(obj);
- if (result == 0) {
- /*
- * If are writing to a new page, no need to read old data.
- * The extent locking will have updated the KMS, and for our
- * purposes here we can treat it like i_size.
- */
- if (attr->cat_kms <= offset) {
- char *kaddr = kmap_atomic(cp->cpg_page);
-
- memset(kaddr, 0, cl_page_size(obj));
- kunmap_atomic(kaddr);
- } else if (cp->cpg_defer_uptodate)
- cp->cpg_ra_used = 1;
- else
- result = vvp_page_sync_io(env, io, pg, cp, CRT_READ);
- /*
- * In older implementations, obdo_refresh_inode is called here
- * to update the inode because the write might modify the
- * object info at OST. However, this has been proven useless,
- * since LVB functions will be called when user space program
- * tries to retrieve inode attribute. Also, see bug 15909 for
- * details. -jay
- */
- if (result == 0)
- cl_page_export(env, pg, 1);
- }
- return result;
-}
-
-static int vvp_io_prepare_write(const struct lu_env *env,
- const struct cl_io_slice *ios,
- const struct cl_page_slice *slice,
- unsigned from, unsigned to)
-{
- struct cl_object *obj = slice->cpl_obj;
- struct ccc_page *cp = cl2ccc_page(slice);
- struct cl_page *pg = slice->cpl_page;
- struct page *vmpage = cp->cpg_page;
-
- int result;
-
- LINVRNT(cl_page_is_vmlocked(env, pg));
- LASSERT(vmpage->mapping->host == ccc_object_inode(obj));
-
- result = 0;
-
- CL_PAGE_HEADER(D_PAGE, env, pg, "preparing: [%d, %d]\n", from, to);
- if (!PageUptodate(vmpage)) {
- /*
- * We're completely overwriting an existing page, so _don't_
- * set it up to date until commit_write
- */
- if (from == 0 && to == PAGE_SIZE) {
- CL_PAGE_HEADER(D_PAGE, env, pg, "full page write\n");
- POISON_PAGE(page, 0x11);
- } else
- result = vvp_io_prepare_partial(env, ios->cis_io, obj,
- pg, cp, from, to);
- } else
- CL_PAGE_HEADER(D_PAGE, env, pg, "uptodate\n");
- return result;
-}
-
-static int vvp_io_commit_write(const struct lu_env *env,
- const struct cl_io_slice *ios,
- const struct cl_page_slice *slice,
- unsigned from, unsigned to)
-{
- struct cl_object *obj = slice->cpl_obj;
- struct cl_io *io = ios->cis_io;
- struct ccc_page *cp = cl2ccc_page(slice);
- struct cl_page *pg = slice->cpl_page;
- struct inode *inode = ccc_object_inode(obj);
- struct ll_sb_info *sbi = ll_i2sbi(inode);
- struct ll_inode_info *lli = ll_i2info(inode);
- struct page *vmpage = cp->cpg_page;
-
- int result;
- int tallyop;
- loff_t size;
-
- LINVRNT(cl_page_is_vmlocked(env, pg));
- LASSERT(vmpage->mapping->host == inode);
-
- LU_OBJECT_HEADER(D_INODE, env, &obj->co_lu, "committing page write\n");
- CL_PAGE_HEADER(D_PAGE, env, pg, "committing: [%d, %d]\n", from, to);
-
- /*
- * queue a write for some time in the future the first time we
- * dirty the page.
- *
- * This is different from what other file systems do: they usually
- * just mark page (and some of its buffers) dirty and rely on
- * balance_dirty_pages() to start a write-back. Lustre wants write-back
- * to be started earlier for the following reasons:
- *
- * (1) with a large number of clients we need to limit the amount
- * of cached data on the clients a lot;
- *
- * (2) large compute jobs generally want compute-only then io-only
- * and the IO should complete as quickly as possible;
- *
- * (3) IO is batched up to the RPC size and is async until the
- * client max cache is hit
- * (/sys/fs/lustre/osc/OSC.../max_dirty_mb)
- *
- */
- if (!PageDirty(vmpage)) {
- tallyop = LPROC_LL_DIRTY_MISSES;
- result = cl_page_cache_add(env, io, pg, CRT_WRITE);
- if (result == 0) {
- /* page was added into cache successfully. */
- set_page_dirty(vmpage);
- vvp_write_pending(cl2ccc(obj), cp);
- } else if (result == -EDQUOT) {
- pgoff_t last_index = i_size_read(inode) >> PAGE_SHIFT;
- bool need_clip = true;
-
- /*
- * Client ran out of disk space grant. Possible
- * strategies are:
- *
- * (a) do a sync write, renewing grant;
- *
- * (b) stop writing on this stripe, switch to the
- * next one.
- *
- * (b) is a part of "parallel io" design that is the
- * ultimate goal. (a) is what "old" client did, and
- * what the new code continues to do for the time
- * being.
- */
- if (last_index > pg->cp_index) {
- to = PAGE_SIZE;
- need_clip = false;
- } else if (last_index == pg->cp_index) {
- int size_to = i_size_read(inode) & ~CFS_PAGE_MASK;
-
- if (to < size_to)
- to = size_to;
- }
- if (need_clip)
- cl_page_clip(env, pg, 0, to);
- result = vvp_page_sync_io(env, io, pg, cp, CRT_WRITE);
- if (result)
- CERROR("Write page %lu of inode %p failed %d\n",
- pg->cp_index, inode, result);
- }
- } else {
- tallyop = LPROC_LL_DIRTY_HITS;
- result = 0;
- }
- ll_stats_ops_tally(sbi, tallyop, 1);
-
- /* Inode should be marked DIRTY even if no new page was marked DIRTY
- * because page could have been not flushed between 2 modifications.
- * It is important the file is marked DIRTY as soon as the I/O is done
- * Indeed, when cache is flushed, file could be already closed and it
- * is too late to warn the MDT.
- * It is acceptable that file is marked DIRTY even if I/O is dropped
- * for some reasons before being flushed to OST.
- */
- if (result == 0) {
- spin_lock(&lli->lli_lock);
- lli->lli_flags |= LLIF_DATA_MODIFIED;
- spin_unlock(&lli->lli_lock);
- }
-
- size = cl_offset(obj, pg->cp_index) + to;
-
- ll_inode_size_lock(inode);
- if (result == 0) {
- if (size > i_size_read(inode)) {
- cl_isize_write_nolock(inode, size);
- CDEBUG(D_VFSTRACE, DFID" updating i_size %lu\n",
- PFID(lu_object_fid(&obj->co_lu)),
- (unsigned long)size);
- }
- cl_page_export(env, pg, 1);
- } else {
- if (size > i_size_read(inode))
- cl_page_discard(env, io, pg);
- }
- ll_inode_size_unlock(inode);
- return result;
+ CLOBINVRNT(env, ios->cis_io->ci_obj,
+ vvp_object_invariant(ios->cis_io->ci_obj));
}
static const struct cl_io_operations vvp_io_ops = {
.op = {
[CIT_READ] = {
- .cio_fini = vvp_io_read_fini,
+ .cio_fini = vvp_io_fini,
.cio_lock = vvp_io_read_lock,
.cio_start = vvp_io_read_start,
- .cio_advance = ccc_io_advance
+ .cio_advance = vvp_io_advance,
},
[CIT_WRITE] = {
.cio_fini = vvp_io_fini,
+ .cio_iter_init = vvp_io_write_iter_init,
+ .cio_iter_fini = vvp_io_write_iter_fini,
.cio_lock = vvp_io_write_lock,
.cio_start = vvp_io_write_start,
- .cio_advance = ccc_io_advance
+ .cio_advance = vvp_io_advance,
},
[CIT_SETATTR] = {
.cio_fini = vvp_io_setattr_fini,
@@ -1120,7 +1290,7 @@ static const struct cl_io_operations vvp_io_ops = {
.cio_iter_init = vvp_io_fault_iter_init,
.cio_lock = vvp_io_fault_lock,
.cio_start = vvp_io_fault_start,
- .cio_end = ccc_io_end
+ .cio_end = vvp_io_end,
},
[CIT_FSYNC] = {
.cio_start = vvp_io_fsync_start,
@@ -1131,29 +1301,26 @@ static const struct cl_io_operations vvp_io_ops = {
}
},
.cio_read_page = vvp_io_read_page,
- .cio_prepare_write = vvp_io_prepare_write,
- .cio_commit_write = vvp_io_commit_write
};
int vvp_io_init(const struct lu_env *env, struct cl_object *obj,
struct cl_io *io)
{
struct vvp_io *vio = vvp_env_io(env);
- struct ccc_io *cio = ccc_env_io(env);
- struct inode *inode = ccc_object_inode(obj);
+ struct inode *inode = vvp_object_inode(obj);
int result;
- CLOBINVRNT(env, obj, ccc_object_invariant(obj));
+ CLOBINVRNT(env, obj, vvp_object_invariant(obj));
CDEBUG(D_VFSTRACE, DFID
" ignore/verify layout %d/%d, layout version %d restore needed %d\n",
PFID(lu_object_fid(&obj->co_lu)),
io->ci_ignore_layout, io->ci_verify_layout,
- cio->cui_layout_gen, io->ci_restore_needed);
+ vio->vui_layout_gen, io->ci_restore_needed);
- CL_IO_SLICE_CLEAN(cio, cui_cl);
- cl_io_slice_add(io, &cio->cui_cl, obj, &vvp_io_ops);
- vio->cui_ra_window_set = 0;
+ CL_IO_SLICE_CLEAN(vio, vui_cl);
+ cl_io_slice_add(io, &vio->vui_cl, obj, &vvp_io_ops);
+ vio->vui_ra_valid = false;
result = 0;
if (io->ci_type == CIT_READ || io->ci_type == CIT_WRITE) {
size_t count;
@@ -1166,7 +1333,7 @@ int vvp_io_init(const struct lu_env *env, struct cl_object *obj,
if (count == 0)
result = 1;
else
- cio->cui_tot_count = count;
+ vio->vui_tot_count = count;
/* for read/write, we store the jobid in the inode, and
* it'll be fetched by osc when building RPC.
@@ -1192,7 +1359,7 @@ int vvp_io_init(const struct lu_env *env, struct cl_object *obj,
* because it might not grant layout lock in IT_OPEN.
*/
if (result == 0 && !io->ci_ignore_layout) {
- result = ll_layout_refresh(inode, &cio->cui_layout_gen);
+ result = ll_layout_refresh(inode, &vio->vui_layout_gen);
if (result == -ENOENT)
/* If the inode on MDS has been removed, but the objects
* on OSTs haven't been destroyed (async unlink), layout
@@ -1208,11 +1375,3 @@ int vvp_io_init(const struct lu_env *env, struct cl_object *obj,
return result;
}
-
-static struct vvp_io *cl2vvp_io(const struct lu_env *env,
- const struct cl_io_slice *slice)
-{
- /* Calling just for assertion */
- cl2ccc_io(env, slice);
- return vvp_env_io(env);
-}
diff --git a/drivers/staging/lustre/lustre/llite/vvp_lock.c b/drivers/staging/lustre/lustre/llite/vvp_lock.c
index ff0948043c7a..64be0c9df35b 100644
--- a/drivers/staging/lustre/lustre/llite/vvp_lock.c
+++ b/drivers/staging/lustre/lustre/llite/vvp_lock.c
@@ -15,11 +15,7 @@
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
*
* GPL HEADER END
*/
@@ -40,7 +36,7 @@
#define DEBUG_SUBSYSTEM S_LLITE
-#include "../include/obd.h"
+#include "../include/obd_support.h"
#include "../include/lustre_lite.h"
#include "vvp_internal.h"
@@ -51,36 +47,41 @@
*
*/
-/**
- * Estimates lock value for the purpose of managing the lock cache during
- * memory shortages.
- *
- * Locks for memory mapped files are almost infinitely precious, others are
- * junk. "Mapped locks" are heavy, but not infinitely heavy, so that they are
- * ordered within themselves by weights assigned from other layers.
- */
-static unsigned long vvp_lock_weigh(const struct lu_env *env,
- const struct cl_lock_slice *slice)
+static void vvp_lock_fini(const struct lu_env *env, struct cl_lock_slice *slice)
{
- struct ccc_object *cob = cl2ccc(slice->cls_obj);
+ struct vvp_lock *vlk = cl2vvp_lock(slice);
- return atomic_read(&cob->cob_mmap_cnt) > 0 ? ~0UL >> 2 : 0;
+ kmem_cache_free(vvp_lock_kmem, vlk);
+}
+
+static int vvp_lock_enqueue(const struct lu_env *env,
+ const struct cl_lock_slice *slice,
+ struct cl_io *unused, struct cl_sync_io *anchor)
+{
+ CLOBINVRNT(env, slice->cls_obj, vvp_object_invariant(slice->cls_obj));
+
+ return 0;
}
static const struct cl_lock_operations vvp_lock_ops = {
- .clo_delete = ccc_lock_delete,
- .clo_fini = ccc_lock_fini,
- .clo_enqueue = ccc_lock_enqueue,
- .clo_wait = ccc_lock_wait,
- .clo_use = ccc_lock_use,
- .clo_unuse = ccc_lock_unuse,
- .clo_fits_into = ccc_lock_fits_into,
- .clo_state = ccc_lock_state,
- .clo_weigh = vvp_lock_weigh
+ .clo_fini = vvp_lock_fini,
+ .clo_enqueue = vvp_lock_enqueue,
};
int vvp_lock_init(const struct lu_env *env, struct cl_object *obj,
- struct cl_lock *lock, const struct cl_io *io)
+ struct cl_lock *lock, const struct cl_io *unused)
{
- return ccc_lock_init(env, obj, lock, io, &vvp_lock_ops);
+ struct vvp_lock *vlk;
+ int result;
+
+ CLOBINVRNT(env, obj, vvp_object_invariant(obj));
+
+ vlk = kmem_cache_zalloc(vvp_lock_kmem, GFP_NOFS);
+ if (vlk) {
+ cl_lock_slice_add(lock, &vlk->vlk_cl, obj, &vvp_lock_ops);
+ result = 0;
+ } else {
+ result = -ENOMEM;
+ }
+ return result;
}
diff --git a/drivers/staging/lustre/lustre/llite/vvp_object.c b/drivers/staging/lustre/lustre/llite/vvp_object.c
index 03c887d8ed83..2c520b0bf6ca 100644
--- a/drivers/staging/lustre/lustre/llite/vvp_object.c
+++ b/drivers/staging/lustre/lustre/llite/vvp_object.c
@@ -15,11 +15,7 @@
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
*
* GPL HEADER END
*/
@@ -45,6 +41,7 @@
#include "../include/obd.h"
#include "../include/lustre_lite.h"
+#include "llite_internal.h"
#include "vvp_internal.h"
/*****************************************************************************
@@ -53,16 +50,25 @@
*
*/
+int vvp_object_invariant(const struct cl_object *obj)
+{
+ struct inode *inode = vvp_object_inode(obj);
+ struct ll_inode_info *lli = ll_i2info(inode);
+
+ return (S_ISREG(inode->i_mode) || inode->i_mode == 0) &&
+ lli->lli_clob == obj;
+}
+
static int vvp_object_print(const struct lu_env *env, void *cookie,
lu_printer_t p, const struct lu_object *o)
{
- struct ccc_object *obj = lu2ccc(o);
- struct inode *inode = obj->cob_inode;
+ struct vvp_object *obj = lu2vvp(o);
+ struct inode *inode = obj->vob_inode;
struct ll_inode_info *lli;
(*p)(env, cookie, "(%s %d %d) inode: %p ",
- list_empty(&obj->cob_pending_list) ? "-" : "+",
- obj->cob_transient_pages, atomic_read(&obj->cob_mmap_cnt),
+ list_empty(&obj->vob_pending_list) ? "-" : "+",
+ obj->vob_transient_pages, atomic_read(&obj->vob_mmap_cnt),
inode);
if (inode) {
lli = ll_i2info(inode);
@@ -77,7 +83,7 @@ static int vvp_object_print(const struct lu_env *env, void *cookie,
static int vvp_attr_get(const struct lu_env *env, struct cl_object *obj,
struct cl_attr *attr)
{
- struct inode *inode = ccc_object_inode(obj);
+ struct inode *inode = vvp_object_inode(obj);
/*
* lov overwrites most of these fields in
@@ -99,7 +105,7 @@ static int vvp_attr_get(const struct lu_env *env, struct cl_object *obj,
static int vvp_attr_set(const struct lu_env *env, struct cl_object *obj,
const struct cl_attr *attr, unsigned valid)
{
- struct inode *inode = ccc_object_inode(obj);
+ struct inode *inode = vvp_object_inode(obj);
if (valid & CAT_UID)
inode->i_uid = make_kuid(&init_user_ns, attr->cat_uid);
@@ -112,7 +118,7 @@ static int vvp_attr_set(const struct lu_env *env, struct cl_object *obj,
if (valid & CAT_CTIME)
inode->i_ctime.tv_sec = attr->cat_ctime;
if (0 && valid & CAT_SIZE)
- cl_isize_write_nolock(inode, attr->cat_size);
+ i_size_write(inode, attr->cat_size);
/* not currently necessary */
if (0 && valid & (CAT_UID|CAT_GID|CAT_SIZE))
mark_inode_dirty(inode);
@@ -165,6 +171,40 @@ static int vvp_conf_set(const struct lu_env *env, struct cl_object *obj,
return 0;
}
+static int vvp_prune(const struct lu_env *env, struct cl_object *obj)
+{
+ struct inode *inode = vvp_object_inode(obj);
+ int rc;
+
+ rc = cl_sync_file_range(inode, 0, OBD_OBJECT_EOF, CL_FSYNC_LOCAL, 1);
+ if (rc < 0) {
+ CDEBUG(D_VFSTRACE, DFID ": writeback failed: %d\n",
+ PFID(lu_object_fid(&obj->co_lu)), rc);
+ return rc;
+ }
+
+ truncate_inode_pages(inode->i_mapping, 0);
+ return 0;
+}
+
+static int vvp_object_glimpse(const struct lu_env *env,
+ const struct cl_object *obj, struct ost_lvb *lvb)
+{
+ struct inode *inode = vvp_object_inode(obj);
+
+ lvb->lvb_mtime = LTIME_S(inode->i_mtime);
+ lvb->lvb_atime = LTIME_S(inode->i_atime);
+ lvb->lvb_ctime = LTIME_S(inode->i_ctime);
+ /*
+ * LU-417: Add dirty pages block count lest i_blocks reports 0, some
+ * "cp" or "tar" on remote node may think it's a completely sparse file
+ * and skip it.
+ */
+ if (lvb->lvb_size > 0 && lvb->lvb_blocks == 0)
+ lvb->lvb_blocks = dirty_cnt(inode);
+ return 0;
+}
+
static const struct cl_object_operations vvp_ops = {
.coo_page_init = vvp_page_init,
.coo_lock_init = vvp_lock_init,
@@ -172,29 +212,94 @@ static const struct cl_object_operations vvp_ops = {
.coo_attr_get = vvp_attr_get,
.coo_attr_set = vvp_attr_set,
.coo_conf_set = vvp_conf_set,
- .coo_glimpse = ccc_object_glimpse
+ .coo_prune = vvp_prune,
+ .coo_glimpse = vvp_object_glimpse
};
+static int vvp_object_init0(const struct lu_env *env,
+ struct vvp_object *vob,
+ const struct cl_object_conf *conf)
+{
+ vob->vob_inode = conf->coc_inode;
+ vob->vob_transient_pages = 0;
+ cl_object_page_init(&vob->vob_cl, sizeof(struct vvp_page));
+ return 0;
+}
+
+static int vvp_object_init(const struct lu_env *env, struct lu_object *obj,
+ const struct lu_object_conf *conf)
+{
+ struct vvp_device *dev = lu2vvp_dev(obj->lo_dev);
+ struct vvp_object *vob = lu2vvp(obj);
+ struct lu_object *below;
+ struct lu_device *under;
+ int result;
+
+ under = &dev->vdv_next->cd_lu_dev;
+ below = under->ld_ops->ldo_object_alloc(env, obj->lo_header, under);
+ if (below) {
+ const struct cl_object_conf *cconf;
+
+ cconf = lu2cl_conf(conf);
+ INIT_LIST_HEAD(&vob->vob_pending_list);
+ lu_object_add(obj, below);
+ result = vvp_object_init0(env, vob, cconf);
+ } else {
+ result = -ENOMEM;
+ }
+
+ return result;
+}
+
+static void vvp_object_free(const struct lu_env *env, struct lu_object *obj)
+{
+ struct vvp_object *vob = lu2vvp(obj);
+
+ lu_object_fini(obj);
+ lu_object_header_fini(obj->lo_header);
+ kmem_cache_free(vvp_object_kmem, vob);
+}
+
static const struct lu_object_operations vvp_lu_obj_ops = {
- .loo_object_init = ccc_object_init,
- .loo_object_free = ccc_object_free,
- .loo_object_print = vvp_object_print
+ .loo_object_init = vvp_object_init,
+ .loo_object_free = vvp_object_free,
+ .loo_object_print = vvp_object_print,
};
-struct ccc_object *cl_inode2ccc(struct inode *inode)
+struct vvp_object *cl_inode2vvp(struct inode *inode)
{
- struct cl_inode_info *lli = cl_i2info(inode);
+ struct ll_inode_info *lli = ll_i2info(inode);
struct cl_object *obj = lli->lli_clob;
struct lu_object *lu;
lu = lu_object_locate(obj->co_lu.lo_header, &vvp_device_type);
LASSERT(lu);
- return lu2ccc(lu);
+ return lu2vvp(lu);
}
struct lu_object *vvp_object_alloc(const struct lu_env *env,
- const struct lu_object_header *hdr,
+ const struct lu_object_header *unused,
struct lu_device *dev)
{
- return ccc_object_alloc(env, hdr, dev, &vvp_ops, &vvp_lu_obj_ops);
+ struct vvp_object *vob;
+ struct lu_object *obj;
+
+ vob = kmem_cache_zalloc(vvp_object_kmem, GFP_NOFS);
+ if (vob) {
+ struct cl_object_header *hdr;
+
+ obj = &vob->vob_cl.co_lu;
+ hdr = &vob->vob_header;
+ cl_object_header_init(hdr);
+ hdr->coh_page_bufsize = cfs_size_round(sizeof(struct cl_page));
+
+ lu_object_init(obj, &hdr->coh_lu, dev);
+ lu_object_add_top(&hdr->coh_lu, obj);
+
+ vob->vob_cl.co_ops = &vvp_ops;
+ obj->lo_ops = &vvp_lu_obj_ops;
+ } else {
+ obj = NULL;
+ }
+ return obj;
}
diff --git a/drivers/staging/lustre/lustre/llite/vvp_page.c b/drivers/staging/lustre/lustre/llite/vvp_page.c
index 33ca3eb34965..2e566d90bb94 100644
--- a/drivers/staging/lustre/lustre/llite/vvp_page.c
+++ b/drivers/staging/lustre/lustre/llite/vvp_page.c
@@ -15,11 +15,7 @@
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
*
* GPL HEADER END
*/
@@ -41,9 +37,16 @@
#define DEBUG_SUBSYSTEM S_LLITE
-#include "../include/obd.h"
+#include <linux/atomic.h>
+#include <linux/bitops.h>
+#include <linux/mm.h>
+#include <linux/mutex.h>
+#include <linux/page-flags.h>
+#include <linux/pagemap.h>
+
#include "../include/lustre_lite.h"
+#include "llite_internal.h"
#include "vvp_internal.h"
/*****************************************************************************
@@ -52,9 +55,9 @@
*
*/
-static void vvp_page_fini_common(struct ccc_page *cp)
+static void vvp_page_fini_common(struct vvp_page *vpg)
{
- struct page *vmpage = cp->cpg_page;
+ struct page *vmpage = vpg->vpg_page;
LASSERT(vmpage);
put_page(vmpage);
@@ -63,23 +66,23 @@ static void vvp_page_fini_common(struct ccc_page *cp)
static void vvp_page_fini(const struct lu_env *env,
struct cl_page_slice *slice)
{
- struct ccc_page *cp = cl2ccc_page(slice);
- struct page *vmpage = cp->cpg_page;
+ struct vvp_page *vpg = cl2vvp_page(slice);
+ struct page *vmpage = vpg->vpg_page;
/*
* vmpage->private was already cleared when page was moved into
* VPG_FREEING state.
*/
LASSERT((struct cl_page *)vmpage->private != slice->cpl_page);
- vvp_page_fini_common(cp);
+ vvp_page_fini_common(vpg);
}
static int vvp_page_own(const struct lu_env *env,
const struct cl_page_slice *slice, struct cl_io *io,
int nonblock)
{
- struct ccc_page *vpg = cl2ccc_page(slice);
- struct page *vmpage = vpg->cpg_page;
+ struct vvp_page *vpg = cl2vvp_page(slice);
+ struct page *vmpage = vpg->vpg_page;
LASSERT(vmpage);
if (nonblock) {
@@ -96,6 +99,7 @@ static int vvp_page_own(const struct lu_env *env,
lock_page(vmpage);
wait_on_page_writeback(vmpage);
+
return 0;
}
@@ -136,41 +140,15 @@ static void vvp_page_discard(const struct lu_env *env,
struct cl_io *unused)
{
struct page *vmpage = cl2vm_page(slice);
- struct address_space *mapping;
- struct ccc_page *cpg = cl2ccc_page(slice);
+ struct vvp_page *vpg = cl2vvp_page(slice);
LASSERT(vmpage);
LASSERT(PageLocked(vmpage));
- mapping = vmpage->mapping;
+ if (vpg->vpg_defer_uptodate && !vpg->vpg_ra_used)
+ ll_ra_stats_inc(vmpage->mapping->host, RA_STAT_DISCARDED);
- if (cpg->cpg_defer_uptodate && !cpg->cpg_ra_used)
- ll_ra_stats_inc(mapping, RA_STAT_DISCARDED);
-
- /*
- * truncate_complete_page() calls
- * a_ops->invalidatepage()->cl_page_delete()->vvp_page_delete().
- */
- truncate_complete_page(mapping, vmpage);
-}
-
-static int vvp_page_unmap(const struct lu_env *env,
- const struct cl_page_slice *slice,
- struct cl_io *unused)
-{
- struct page *vmpage = cl2vm_page(slice);
- __u64 offset;
-
- LASSERT(vmpage);
- LASSERT(PageLocked(vmpage));
-
- offset = vmpage->index << PAGE_SHIFT;
-
- /*
- * XXX is it safe to call this with the page lock held?
- */
- ll_teardown_mmaps(vmpage->mapping, offset, offset + PAGE_SIZE);
- return 0;
+ ll_invalidate_page(vmpage);
}
static void vvp_page_delete(const struct lu_env *env,
@@ -179,12 +157,20 @@ static void vvp_page_delete(const struct lu_env *env,
struct page *vmpage = cl2vm_page(slice);
struct inode *inode = vmpage->mapping->host;
struct cl_object *obj = slice->cpl_obj;
+ struct cl_page *page = slice->cpl_page;
+ int refc;
LASSERT(PageLocked(vmpage));
- LASSERT((struct cl_page *)vmpage->private == slice->cpl_page);
- LASSERT(inode == ccc_object_inode(obj));
+ LASSERT((struct cl_page *)vmpage->private == page);
+ LASSERT(inode == vvp_object_inode(obj));
+
+ vvp_write_complete(cl2vvp(obj), cl2vvp_page(slice));
+
+ /* Drop the reference count held in vvp_page_init */
+ refc = atomic_dec_return(&page->cp_ref);
+ LASSERTF(refc >= 1, "page = %p, refc = %d\n", page, refc);
- vvp_write_complete(cl2ccc(obj), cl2ccc_page(slice));
+ ClearPageUptodate(vmpage);
ClearPagePrivate(vmpage);
vmpage->private = 0;
/*
@@ -237,7 +223,7 @@ static int vvp_page_prep_write(const struct lu_env *env,
if (!pg->cp_sync_io)
set_page_writeback(vmpage);
- vvp_write_pending(cl2ccc(slice->cpl_obj), cl2ccc_page(slice));
+ vvp_write_pending(cl2vvp(slice->cpl_obj), cl2vvp_page(slice));
return 0;
}
@@ -250,11 +236,11 @@ static int vvp_page_prep_write(const struct lu_env *env,
*/
static void vvp_vmpage_error(struct inode *inode, struct page *vmpage, int ioret)
{
- struct ccc_object *obj = cl_inode2ccc(inode);
+ struct vvp_object *obj = cl_inode2vvp(inode);
if (ioret == 0) {
ClearPageError(vmpage);
- obj->cob_discard_page_warned = 0;
+ obj->vob_discard_page_warned = 0;
} else {
SetPageError(vmpage);
if (ioret == -ENOSPC)
@@ -263,8 +249,8 @@ static void vvp_vmpage_error(struct inode *inode, struct page *vmpage, int ioret
set_bit(AS_EIO, &inode->i_mapping->flags);
if ((ioret == -ESHUTDOWN || ioret == -EINTR) &&
- obj->cob_discard_page_warned == 0) {
- obj->cob_discard_page_warned = 1;
+ obj->vob_discard_page_warned == 0) {
+ obj->vob_discard_page_warned = 1;
ll_dirty_page_discard_warn(vmpage, ioret);
}
}
@@ -274,22 +260,23 @@ static void vvp_page_completion_read(const struct lu_env *env,
const struct cl_page_slice *slice,
int ioret)
{
- struct ccc_page *cp = cl2ccc_page(slice);
- struct page *vmpage = cp->cpg_page;
- struct cl_page *page = cl_page_top(slice->cpl_page);
- struct inode *inode = ccc_object_inode(page->cp_obj);
+ struct vvp_page *vpg = cl2vvp_page(slice);
+ struct page *vmpage = vpg->vpg_page;
+ struct cl_page *page = slice->cpl_page;
+ struct inode *inode = vvp_object_inode(page->cp_obj);
LASSERT(PageLocked(vmpage));
CL_PAGE_HEADER(D_PAGE, env, page, "completing READ with %d\n", ioret);
- if (cp->cpg_defer_uptodate)
+ if (vpg->vpg_defer_uptodate)
ll_ra_count_put(ll_i2sbi(inode), 1);
if (ioret == 0) {
- if (!cp->cpg_defer_uptodate)
+ if (!vpg->vpg_defer_uptodate)
cl_page_export(env, page, 1);
- } else
- cp->cpg_defer_uptodate = 0;
+ } else {
+ vpg->vpg_defer_uptodate = 0;
+ }
if (!page->cp_sync_io)
unlock_page(vmpage);
@@ -299,9 +286,9 @@ static void vvp_page_completion_write(const struct lu_env *env,
const struct cl_page_slice *slice,
int ioret)
{
- struct ccc_page *cp = cl2ccc_page(slice);
+ struct vvp_page *vpg = cl2vvp_page(slice);
struct cl_page *pg = slice->cpl_page;
- struct page *vmpage = cp->cpg_page;
+ struct page *vmpage = vpg->vpg_page;
CL_PAGE_HEADER(D_PAGE, env, pg, "completing WRITE with %d\n", ioret);
@@ -315,8 +302,8 @@ static void vvp_page_completion_write(const struct lu_env *env,
* and then re-add the page into pending transfer queue. -jay
*/
- cp->cpg_write_queued = 0;
- vvp_write_complete(cl2ccc(slice->cpl_obj), cp);
+ vpg->vpg_write_queued = 0;
+ vvp_write_complete(cl2vvp(slice->cpl_obj), vpg);
if (pg->cp_sync_io) {
LASSERT(PageLocked(vmpage));
@@ -327,7 +314,7 @@ static void vvp_page_completion_write(const struct lu_env *env,
* Only mark the page error only when it's an async write
* because applications won't wait for IO to finish.
*/
- vvp_vmpage_error(ccc_object_inode(pg->cp_obj), vmpage, ioret);
+ vvp_vmpage_error(vvp_object_inode(pg->cp_obj), vmpage, ioret);
end_page_writeback(vmpage);
}
@@ -359,7 +346,7 @@ static int vvp_page_make_ready(const struct lu_env *env,
LASSERT(pg->cp_state == CPS_CACHED);
/* This actually clears the dirty bit in the radix tree. */
set_page_writeback(vmpage);
- vvp_write_pending(cl2ccc(slice->cpl_obj), cl2ccc_page(slice));
+ vvp_write_pending(cl2vvp(slice->cpl_obj), cl2vvp_page(slice));
CL_PAGE_HEADER(D_PAGE, env, pg, "readied\n");
} else if (pg->cp_state == CPS_PAGEOUT) {
/* is it possible for osc_flush_async_page() to already
@@ -375,24 +362,51 @@ static int vvp_page_make_ready(const struct lu_env *env,
return result;
}
+static int vvp_page_is_under_lock(const struct lu_env *env,
+ const struct cl_page_slice *slice,
+ struct cl_io *io, pgoff_t *max_index)
+{
+ if (io->ci_type == CIT_READ || io->ci_type == CIT_WRITE ||
+ io->ci_type == CIT_FAULT) {
+ struct vvp_io *vio = vvp_env_io(env);
+
+ if (unlikely(vio->vui_fd->fd_flags & LL_FILE_GROUP_LOCKED))
+ *max_index = CL_PAGE_EOF;
+ }
+ return 0;
+}
+
static int vvp_page_print(const struct lu_env *env,
const struct cl_page_slice *slice,
void *cookie, lu_printer_t printer)
{
- struct ccc_page *vp = cl2ccc_page(slice);
- struct page *vmpage = vp->cpg_page;
+ struct vvp_page *vpg = cl2vvp_page(slice);
+ struct page *vmpage = vpg->vpg_page;
(*printer)(env, cookie, LUSTRE_VVP_NAME "-page@%p(%d:%d:%d) vm@%p ",
- vp, vp->cpg_defer_uptodate, vp->cpg_ra_used,
- vp->cpg_write_queued, vmpage);
+ vpg, vpg->vpg_defer_uptodate, vpg->vpg_ra_used,
+ vpg->vpg_write_queued, vmpage);
if (vmpage) {
(*printer)(env, cookie, "%lx %d:%d %lx %lu %slru",
(long)vmpage->flags, page_count(vmpage),
page_mapcount(vmpage), vmpage->private,
- page_index(vmpage),
+ vmpage->index,
list_empty(&vmpage->lru) ? "not-" : "");
}
+
(*printer)(env, cookie, "\n");
+
+ return 0;
+}
+
+static int vvp_page_fail(const struct lu_env *env,
+ const struct cl_page_slice *slice)
+{
+ /*
+ * Cached read?
+ */
+ LBUG();
+
return 0;
}
@@ -401,32 +415,38 @@ static const struct cl_page_operations vvp_page_ops = {
.cpo_assume = vvp_page_assume,
.cpo_unassume = vvp_page_unassume,
.cpo_disown = vvp_page_disown,
- .cpo_vmpage = ccc_page_vmpage,
.cpo_discard = vvp_page_discard,
.cpo_delete = vvp_page_delete,
- .cpo_unmap = vvp_page_unmap,
.cpo_export = vvp_page_export,
.cpo_is_vmlocked = vvp_page_is_vmlocked,
.cpo_fini = vvp_page_fini,
.cpo_print = vvp_page_print,
- .cpo_is_under_lock = ccc_page_is_under_lock,
+ .cpo_is_under_lock = vvp_page_is_under_lock,
.io = {
[CRT_READ] = {
.cpo_prep = vvp_page_prep_read,
.cpo_completion = vvp_page_completion_read,
- .cpo_make_ready = ccc_fail,
+ .cpo_make_ready = vvp_page_fail,
},
[CRT_WRITE] = {
.cpo_prep = vvp_page_prep_write,
.cpo_completion = vvp_page_completion_write,
.cpo_make_ready = vvp_page_make_ready,
- }
- }
+ },
+ },
};
+static int vvp_transient_page_prep(const struct lu_env *env,
+ const struct cl_page_slice *slice,
+ struct cl_io *unused)
+{
+ /* transient page should always be sent. */
+ return 0;
+}
+
static void vvp_transient_page_verify(const struct cl_page *page)
{
- struct inode *inode = ccc_object_inode(page->cp_obj);
+ struct inode *inode = vvp_object_inode(page->cp_obj);
LASSERT(!inode_trylock(inode));
}
@@ -477,7 +497,7 @@ static void vvp_transient_page_discard(const struct lu_env *env,
static int vvp_transient_page_is_vmlocked(const struct lu_env *env,
const struct cl_page_slice *slice)
{
- struct inode *inode = ccc_object_inode(slice->cpl_obj);
+ struct inode *inode = vvp_object_inode(slice->cpl_obj);
int locked;
locked = !inode_trylock(inode);
@@ -497,13 +517,13 @@ vvp_transient_page_completion(const struct lu_env *env,
static void vvp_transient_page_fini(const struct lu_env *env,
struct cl_page_slice *slice)
{
- struct ccc_page *cp = cl2ccc_page(slice);
+ struct vvp_page *vpg = cl2vvp_page(slice);
struct cl_page *clp = slice->cpl_page;
- struct ccc_object *clobj = cl2ccc(clp->cp_obj);
+ struct vvp_object *clobj = cl2vvp(clp->cp_obj);
- vvp_page_fini_common(cp);
- LASSERT(!inode_trylock(clobj->cob_inode));
- clobj->cob_transient_pages--;
+ vvp_page_fini_common(vpg);
+ LASSERT(!inode_trylock(clobj->vob_inode));
+ clobj->vob_transient_pages--;
}
static const struct cl_page_operations vvp_transient_page_ops = {
@@ -512,45 +532,48 @@ static const struct cl_page_operations vvp_transient_page_ops = {
.cpo_unassume = vvp_transient_page_unassume,
.cpo_disown = vvp_transient_page_disown,
.cpo_discard = vvp_transient_page_discard,
- .cpo_vmpage = ccc_page_vmpage,
.cpo_fini = vvp_transient_page_fini,
.cpo_is_vmlocked = vvp_transient_page_is_vmlocked,
.cpo_print = vvp_page_print,
- .cpo_is_under_lock = ccc_page_is_under_lock,
+ .cpo_is_under_lock = vvp_page_is_under_lock,
.io = {
[CRT_READ] = {
- .cpo_prep = ccc_transient_page_prep,
+ .cpo_prep = vvp_transient_page_prep,
.cpo_completion = vvp_transient_page_completion,
},
[CRT_WRITE] = {
- .cpo_prep = ccc_transient_page_prep,
+ .cpo_prep = vvp_transient_page_prep,
.cpo_completion = vvp_transient_page_completion,
}
}
};
int vvp_page_init(const struct lu_env *env, struct cl_object *obj,
- struct cl_page *page, struct page *vmpage)
+ struct cl_page *page, pgoff_t index)
{
- struct ccc_page *cpg = cl_object_page_slice(obj, page);
+ struct vvp_page *vpg = cl_object_page_slice(obj, page);
+ struct page *vmpage = page->cp_vmpage;
- CLOBINVRNT(env, obj, ccc_object_invariant(obj));
+ CLOBINVRNT(env, obj, vvp_object_invariant(obj));
- cpg->cpg_page = vmpage;
+ vpg->vpg_page = vmpage;
get_page(vmpage);
- INIT_LIST_HEAD(&cpg->cpg_pending_linkage);
+ INIT_LIST_HEAD(&vpg->vpg_pending_linkage);
if (page->cp_type == CPT_CACHEABLE) {
+ /* in cache, decref in vvp_page_delete */
+ atomic_inc(&page->cp_ref);
SetPagePrivate(vmpage);
vmpage->private = (unsigned long)page;
- cl_page_slice_add(page, &cpg->cpg_cl, obj, &vvp_page_ops);
+ cl_page_slice_add(page, &vpg->vpg_cl, obj, index,
+ &vvp_page_ops);
} else {
- struct ccc_object *clobj = cl2ccc(obj);
+ struct vvp_object *clobj = cl2vvp(obj);
- LASSERT(!inode_trylock(clobj->cob_inode));
- cl_page_slice_add(page, &cpg->cpg_cl, obj,
+ LASSERT(!inode_trylock(clobj->vob_inode));
+ cl_page_slice_add(page, &vpg->vpg_cl, obj, index,
&vvp_transient_page_ops);
- clobj->cob_transient_pages++;
+ clobj->vob_transient_pages++;
}
return 0;
}
diff --git a/drivers/staging/lustre/lustre/llite/vvp_req.c b/drivers/staging/lustre/lustre/llite/vvp_req.c
new file mode 100644
index 000000000000..9fe9d6c0a7d1
--- /dev/null
+++ b/drivers/staging/lustre/lustre/llite/vvp_req.c
@@ -0,0 +1,121 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.gnu.org/licenses/gpl-2.0.html
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2014, Intel Corporation.
+ */
+
+#define DEBUG_SUBSYSTEM S_LLITE
+
+#include "../include/lustre/lustre_idl.h"
+#include "../include/cl_object.h"
+#include "../include/obd.h"
+#include "../include/obd_support.h"
+#include "../include/lustre_lite.h"
+#include "llite_internal.h"
+#include "vvp_internal.h"
+
+static inline struct vvp_req *cl2vvp_req(const struct cl_req_slice *slice)
+{
+ return container_of0(slice, struct vvp_req, vrq_cl);
+}
+
+/**
+ * Implementation of struct cl_req_operations::cro_attr_set() for VVP
+ * layer. VVP is responsible for
+ *
+ * - o_[mac]time
+ *
+ * - o_mode
+ *
+ * - o_parent_seq
+ *
+ * - o_[ug]id
+ *
+ * - o_parent_oid
+ *
+ * - o_parent_ver
+ *
+ * - o_ioepoch,
+ *
+ */
+static void vvp_req_attr_set(const struct lu_env *env,
+ const struct cl_req_slice *slice,
+ const struct cl_object *obj,
+ struct cl_req_attr *attr, u64 flags)
+{
+ struct inode *inode;
+ struct obdo *oa;
+ u32 valid_flags;
+
+ oa = attr->cra_oa;
+ inode = vvp_object_inode(obj);
+ valid_flags = OBD_MD_FLTYPE;
+
+ if (slice->crs_req->crq_type == CRT_WRITE) {
+ if (flags & OBD_MD_FLEPOCH) {
+ oa->o_valid |= OBD_MD_FLEPOCH;
+ oa->o_ioepoch = ll_i2info(inode)->lli_ioepoch;
+ valid_flags |= OBD_MD_FLMTIME | OBD_MD_FLCTIME |
+ OBD_MD_FLUID | OBD_MD_FLGID;
+ }
+ }
+ obdo_from_inode(oa, inode, valid_flags & flags);
+ obdo_set_parent_fid(oa, &ll_i2info(inode)->lli_fid);
+ memcpy(attr->cra_jobid, ll_i2info(inode)->lli_jobid,
+ JOBSTATS_JOBID_SIZE);
+}
+
+static void vvp_req_completion(const struct lu_env *env,
+ const struct cl_req_slice *slice, int ioret)
+{
+ struct vvp_req *vrq;
+
+ if (ioret > 0)
+ cl_stats_tally(slice->crs_dev, slice->crs_req->crq_type, ioret);
+
+ vrq = cl2vvp_req(slice);
+ kmem_cache_free(vvp_req_kmem, vrq);
+}
+
+static const struct cl_req_operations vvp_req_ops = {
+ .cro_attr_set = vvp_req_attr_set,
+ .cro_completion = vvp_req_completion
+};
+
+int vvp_req_init(const struct lu_env *env, struct cl_device *dev,
+ struct cl_req *req)
+{
+ struct vvp_req *vrq;
+ int result;
+
+ vrq = kmem_cache_zalloc(vvp_req_kmem, GFP_NOFS);
+ if (vrq) {
+ cl_req_slice_add(req, &vrq->vrq_cl, dev, &vvp_req_ops);
+ result = 0;
+ } else {
+ result = -ENOMEM;
+ }
+ return result;
+}
diff --git a/drivers/staging/lustre/lustre/llite/xattr.c b/drivers/staging/lustre/lustre/llite/xattr.c
index b68dcc921ca2..98303cf85815 100644
--- a/drivers/staging/lustre/lustre/llite/xattr.c
+++ b/drivers/staging/lustre/lustre/llite/xattr.c
@@ -15,11 +15,7 @@
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
*
* GPL HEADER END
*/
@@ -111,11 +107,6 @@ int ll_setxattr_common(struct inode *inode, const char *name,
struct ll_sb_info *sbi = ll_i2sbi(inode);
struct ptlrpc_request *req = NULL;
int xattr_type, rc;
-#ifdef CONFIG_FS_POSIX_ACL
- struct rmtacl_ctl_entry *rce = NULL;
- posix_acl_xattr_header *new_value = NULL;
- ext_acl_xattr_header *acl = NULL;
-#endif
const char *pv = value;
xattr_type = get_xattr_type(name);
@@ -143,61 +134,9 @@ int ll_setxattr_common(struct inode *inode, const char *name,
strcmp(name, "security.selinux") == 0)
return -EOPNOTSUPP;
-#ifdef CONFIG_FS_POSIX_ACL
- if (sbi->ll_flags & LL_SBI_RMT_CLIENT &&
- (xattr_type == XATTR_ACL_ACCESS_T ||
- xattr_type == XATTR_ACL_DEFAULT_T)) {
- rce = rct_search(&sbi->ll_rct, current_pid());
- if (!rce ||
- (rce->rce_ops != RMT_LSETFACL &&
- rce->rce_ops != RMT_RSETFACL))
- return -EOPNOTSUPP;
-
- if (rce->rce_ops == RMT_LSETFACL) {
- struct eacl_entry *ee;
-
- ee = et_search_del(&sbi->ll_et, current_pid(),
- ll_inode2fid(inode), xattr_type);
- if (valid & OBD_MD_FLXATTR) {
- acl = lustre_acl_xattr_merge2ext(
- (posix_acl_xattr_header *)value,
- size, ee->ee_acl);
- if (IS_ERR(acl)) {
- ee_free(ee);
- return PTR_ERR(acl);
- }
- size = CFS_ACL_XATTR_SIZE(\
- le32_to_cpu(acl->a_count), \
- ext_acl_xattr);
- pv = (const char *)acl;
- }
- ee_free(ee);
- } else if (rce->rce_ops == RMT_RSETFACL) {
- rc = lustre_posix_acl_xattr_filter(
- (posix_acl_xattr_header *)value,
- size, &new_value);
- if (unlikely(rc < 0))
- return rc;
- size = rc;
-
- pv = (const char *)new_value;
- } else
- return -EOPNOTSUPP;
-
- valid |= rce_ops2valid(rce->rce_ops);
- }
-#endif
rc = md_setxattr(sbi->ll_md_exp, ll_inode2fid(inode),
valid, name, pv, size, 0, flags,
ll_i2suppgid(inode), &req);
-#ifdef CONFIG_FS_POSIX_ACL
- /*
- * Release the posix ACL space.
- */
- kfree(new_value);
- if (acl)
- lustre_ext_acl_xattr_free(acl);
-#endif
if (rc) {
if (rc == -EOPNOTSUPP && xattr_type == XATTR_USER_T) {
LCONSOLE_INFO("Disabling user_xattr feature because it is not supported on the server\n");
@@ -210,16 +149,14 @@ int ll_setxattr_common(struct inode *inode, const char *name,
return 0;
}
-int ll_setxattr(struct dentry *dentry, const char *name,
- const void *value, size_t size, int flags)
+int ll_setxattr(struct dentry *dentry, struct inode *inode,
+ const char *name, const void *value, size_t size, int flags)
{
- struct inode *inode = d_inode(dentry);
-
LASSERT(inode);
LASSERT(name);
- CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), xattr %s\n",
- inode->i_ino, inode->i_generation, inode, name);
+ CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p), xattr %s\n",
+ PFID(ll_inode2fid(inode)), inode, name);
ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_SETXATTR, 1);
@@ -243,12 +180,12 @@ int ll_setxattr(struct dentry *dentry, const char *name,
lump->lmm_stripe_offset = -1;
if (lump && S_ISREG(inode->i_mode)) {
- int flags = FMODE_WRITE;
+ __u64 it_flags = FMODE_WRITE;
int lum_size = (lump->lmm_magic == LOV_USER_MAGIC_V1) ?
sizeof(*lump) : sizeof(struct lov_user_md_v3);
- rc = ll_lov_setstripe_ea_info(inode, dentry, flags, lump,
- lum_size);
+ rc = ll_lov_setstripe_ea_info(inode, dentry, it_flags,
+ lump, lum_size);
/* b10667: rc always be 0 here for now */
rc = 0;
} else if (S_ISDIR(inode->i_mode)) {
@@ -272,8 +209,8 @@ int ll_removexattr(struct dentry *dentry, const char *name)
LASSERT(inode);
LASSERT(name);
- CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), xattr %s\n",
- inode->i_ino, inode->i_generation, inode, name);
+ CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p), xattr %s\n",
+ PFID(ll_inode2fid(inode)), inode, name);
ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_REMOVEXATTR, 1);
return ll_setxattr_common(inode, name, NULL, 0, 0,
@@ -289,11 +226,10 @@ int ll_getxattr_common(struct inode *inode, const char *name,
struct mdt_body *body;
int xattr_type, rc;
void *xdata;
- struct rmtacl_ctl_entry *rce = NULL;
struct ll_inode_info *lli = ll_i2info(inode);
- CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n",
- inode->i_ino, inode->i_generation, inode);
+ CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p)\n",
+ PFID(ll_inode2fid(inode)), inode);
/* listxattr have slightly different behavior from of ext3:
* without 'user_xattr' ext3 will list all xattr names but
@@ -320,25 +256,11 @@ int ll_getxattr_common(struct inode *inode, const char *name,
return -EOPNOTSUPP;
#ifdef CONFIG_FS_POSIX_ACL
- if (sbi->ll_flags & LL_SBI_RMT_CLIENT &&
- (xattr_type == XATTR_ACL_ACCESS_T ||
- xattr_type == XATTR_ACL_DEFAULT_T)) {
- rce = rct_search(&sbi->ll_rct, current_pid());
- if (!rce ||
- (rce->rce_ops != RMT_LSETFACL &&
- rce->rce_ops != RMT_LGETFACL &&
- rce->rce_ops != RMT_RSETFACL &&
- rce->rce_ops != RMT_RGETFACL))
- return -EOPNOTSUPP;
- }
-
/* posix acl is under protection of LOOKUP lock. when calling to this,
* we just have path resolution to the target inode, so we have great
* chance that cached ACL is uptodate.
*/
- if (xattr_type == XATTR_ACL_ACCESS_T &&
- !(sbi->ll_flags & LL_SBI_RMT_CLIENT)) {
-
+ if (xattr_type == XATTR_ACL_ACCESS_T) {
struct posix_acl *acl;
spin_lock(&lli->lli_lock);
@@ -380,9 +302,7 @@ do_getxattr:
} else {
getxattr_nocache:
rc = md_getxattr(sbi->ll_md_exp, ll_inode2fid(inode),
- valid | (rce ? rce_ops2valid(rce->rce_ops) : 0),
- name, NULL, 0, size, 0, &req);
-
+ valid, name, NULL, 0, size, 0, &req);
if (rc < 0)
goto out_xattr;
@@ -419,26 +339,6 @@ getxattr_nocache:
rc = body->eadatasize;
}
-#ifdef CONFIG_FS_POSIX_ACL
- if (rce && rce->rce_ops == RMT_LSETFACL) {
- ext_acl_xattr_header *acl;
-
- acl = lustre_posix_acl_xattr_2ext(
- (posix_acl_xattr_header *)buffer, rc);
- if (IS_ERR(acl)) {
- rc = PTR_ERR(acl);
- goto out;
- }
-
- rc = ee_add(&sbi->ll_et, current_pid(), ll_inode2fid(inode),
- xattr_type, acl);
- if (unlikely(rc < 0)) {
- lustre_ext_acl_xattr_free(acl);
- goto out;
- }
- }
-#endif
-
out_xattr:
if (rc == -EOPNOTSUPP && xattr_type == XATTR_USER_T) {
LCONSOLE_INFO(
@@ -451,16 +351,14 @@ out:
return rc;
}
-ssize_t ll_getxattr(struct dentry *dentry, const char *name,
- void *buffer, size_t size)
+ssize_t ll_getxattr(struct dentry *dentry, struct inode *inode,
+ const char *name, void *buffer, size_t size)
{
- struct inode *inode = d_inode(dentry);
-
LASSERT(inode);
LASSERT(name);
- CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), xattr %s\n",
- inode->i_ino, inode->i_generation, inode, name);
+ CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p), xattr %s\n",
+ PFID(ll_inode2fid(inode)), inode, name);
ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_GETXATTR, 1);
@@ -554,8 +452,8 @@ ssize_t ll_listxattr(struct dentry *dentry, char *buffer, size_t size)
LASSERT(inode);
- CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n",
- inode->i_ino, inode->i_generation, inode);
+ CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p)\n",
+ PFID(ll_inode2fid(inode)), inode);
ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_LISTXATTR, 1);
diff --git a/drivers/staging/lustre/lustre/llite/xattr_cache.c b/drivers/staging/lustre/lustre/llite/xattr_cache.c
index 3480ce2bb3cc..8089da8143d9 100644
--- a/drivers/staging/lustre/lustre/llite/xattr_cache.c
+++ b/drivers/staging/lustre/lustre/llite/xattr_cache.c
@@ -229,7 +229,6 @@ static int ll_xattr_cache_valid(struct ll_inode_info *lli)
*/
static int ll_xattr_cache_destroy_locked(struct ll_inode_info *lli)
{
-
if (!ll_xattr_cache_valid(lli))
return 0;
@@ -289,8 +288,8 @@ static int ll_xattr_find_get_lock(struct inode *inode,
LCK_PR);
if (mode != 0) {
/* fake oit in mdc_revalidate_lock() manner */
- oit->d.lustre.it_lock_handle = lockh.cookie;
- oit->d.lustre.it_lock_mode = mode;
+ oit->it_lock_handle = lockh.cookie;
+ oit->it_lock_mode = mode;
goto out;
}
}
@@ -316,7 +315,7 @@ static int ll_xattr_find_get_lock(struct inode *inode,
return rc;
}
- *req = (struct ptlrpc_request *)oit->d.lustre.it_data;
+ *req = oit->it_request;
out:
down_write(&lli->lli_xattrs_list_rwsem);
mutex_unlock(&lli->lli_xattrs_enq_lock);
@@ -363,10 +362,10 @@ static int ll_xattr_cache_refill(struct inode *inode, struct lookup_intent *oit)
goto out_maybe_drop;
}
- if (oit->d.lustre.it_status < 0) {
+ if (oit->it_status < 0) {
CDEBUG(D_CACHE, "getxattr intent returned %d for fid "DFID"\n",
- oit->d.lustre.it_status, PFID(ll_inode2fid(inode)));
- rc = oit->d.lustre.it_status;
+ oit->it_status, PFID(ll_inode2fid(inode)));
+ rc = oit->it_status;
/* xattr data is so large that we don't want to cache it */
if (rc == -ERANGE)
rc = -EAGAIN;
@@ -449,8 +448,8 @@ out_destroy:
up_write(&lli->lli_xattrs_list_rwsem);
ldlm_lock_decref_and_cancel((struct lustre_handle *)
- &oit->d.lustre.it_lock_handle,
- oit->d.lustre.it_lock_mode);
+ &oit->it_lock_handle,
+ oit->it_lock_mode);
goto out_no_unlock;
}
diff --git a/drivers/staging/lustre/lustre/lmv/lmv_fld.c b/drivers/staging/lustre/lustre/lmv/lmv_fld.c
index 378691b2a062..a3d170aa6fd2 100644
--- a/drivers/staging/lustre/lustre/lmv/lmv_fld.c
+++ b/drivers/staging/lustre/lustre/lmv/lmv_fld.c
@@ -15,11 +15,7 @@
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
*
* GPL HEADER END
*/
diff --git a/drivers/staging/lustre/lustre/lmv/lmv_intent.c b/drivers/staging/lustre/lustre/lmv/lmv_intent.c
index e0958eaed054..2f58fdab8d1e 100644
--- a/drivers/staging/lustre/lustre/lmv/lmv_intent.c
+++ b/drivers/staging/lustre/lustre/lmv/lmv_intent.c
@@ -15,11 +15,7 @@
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
*
* GPL HEADER END
*/
@@ -84,11 +80,11 @@ static int lmv_intent_remote(struct obd_export *exp, void *lmm,
/*
* We got LOOKUP lock, but we really need attrs.
*/
- pmode = it->d.lustre.it_lock_mode;
+ pmode = it->it_lock_mode;
if (pmode) {
- plock.cookie = it->d.lustre.it_lock_handle;
- it->d.lustre.it_lock_mode = 0;
- it->d.lustre.it_data = NULL;
+ plock.cookie = it->it_lock_handle;
+ it->it_lock_mode = 0;
+ it->it_request = NULL;
}
LASSERT(fid_is_sane(&body->fid1));
@@ -134,14 +130,14 @@ static int lmv_intent_remote(struct obd_export *exp, void *lmm,
* maintain dcache consistency. Thus drop UPDATE|PERM lock here
* and put LOOKUP in request.
*/
- if (it->d.lustre.it_lock_mode != 0) {
- it->d.lustre.it_remote_lock_handle =
- it->d.lustre.it_lock_handle;
- it->d.lustre.it_remote_lock_mode = it->d.lustre.it_lock_mode;
+ if (it->it_lock_mode != 0) {
+ it->it_remote_lock_handle =
+ it->it_lock_handle;
+ it->it_remote_lock_mode = it->it_lock_mode;
}
- it->d.lustre.it_lock_handle = plock.cookie;
- it->d.lustre.it_lock_mode = pmode;
+ it->it_lock_handle = plock.cookie;
+ it->it_lock_mode = pmode;
out_free_op_data:
kfree(op_data);
@@ -201,9 +197,9 @@ static int lmv_intent_open(struct obd_export *exp, struct md_op_data *op_data,
* Nothing is found, do not access body->fid1 as it is zero and thus
* pointless.
*/
- if ((it->d.lustre.it_disposition & DISP_LOOKUP_NEG) &&
- !(it->d.lustre.it_disposition & DISP_OPEN_CREATE) &&
- !(it->d.lustre.it_disposition & DISP_OPEN_OPEN))
+ if ((it->it_disposition & DISP_LOOKUP_NEG) &&
+ !(it->it_disposition & DISP_OPEN_CREATE) &&
+ !(it->it_disposition & DISP_OPEN_OPEN))
return rc;
body = req_capsule_server_get(&(*reqp)->rq_pill, &RMF_MDT_BODY);
diff --git a/drivers/staging/lustre/lustre/lmv/lmv_internal.h b/drivers/staging/lustre/lustre/lmv/lmv_internal.h
index 8a0087190e23..0beafc49b8d2 100644
--- a/drivers/staging/lustre/lustre/lmv/lmv_internal.h
+++ b/drivers/staging/lustre/lustre/lmv/lmv_internal.h
@@ -15,11 +15,7 @@
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
*
* GPL HEADER END
*/
@@ -42,9 +38,6 @@
#define LMV_MAX_TGT_COUNT 128
-#define lmv_init_lock(lmv) mutex_lock(&lmv->init_mutex)
-#define lmv_init_unlock(lmv) mutex_unlock(&lmv->init_mutex)
-
#define LL_IT2STR(it) \
((it) ? ldlm_it2str((it)->it_op) : "0")
diff --git a/drivers/staging/lustre/lustre/lmv/lmv_obd.c b/drivers/staging/lustre/lustre/lmv/lmv_obd.c
index 9abb7c2b9231..0e1588a43187 100644
--- a/drivers/staging/lustre/lustre/lmv/lmv_obd.c
+++ b/drivers/staging/lustre/lustre/lmv/lmv_obd.c
@@ -15,11 +15,7 @@
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
*
* GPL HEADER END
*/
@@ -132,8 +128,9 @@ static int lmv_set_mdc_active(struct lmv_obd *lmv, struct obd_uuid *uuid,
static struct obd_uuid *lmv_get_uuid(struct obd_export *exp)
{
struct lmv_obd *lmv = &exp->exp_obd->u.lmv;
+ struct lmv_tgt_desc *tgt = lmv->tgts[0];
- return obd_get_uuid(lmv->tgts[0]->ltd_exp);
+ return tgt ? obd_get_uuid(tgt->ltd_exp) : NULL;
}
static int lmv_notify(struct obd_device *obd, struct obd_device *watched,
@@ -249,7 +246,6 @@ static int lmv_connect(const struct lu_env *env,
static void lmv_set_timeouts(struct obd_device *obd)
{
- struct lmv_tgt_desc *tgt;
struct lmv_obd *lmv;
int i;
@@ -261,8 +257,10 @@ static void lmv_set_timeouts(struct obd_device *obd)
return;
for (i = 0; i < lmv->desc.ld_tgt_count; i++) {
+ struct lmv_tgt_desc *tgt = lmv->tgts[i];
+
tgt = lmv->tgts[i];
- if (!tgt || !tgt->ltd_exp || tgt->ltd_active == 0)
+ if (!tgt || !tgt->ltd_exp || !tgt->ltd_active)
continue;
obd_set_info_async(NULL, tgt->ltd_exp, sizeof(KEY_INTERMDS),
@@ -302,13 +300,14 @@ static int lmv_init_ea_size(struct obd_export *exp, int easize,
return 0;
for (i = 0; i < lmv->desc.ld_tgt_count; i++) {
- if (!lmv->tgts[i] || !lmv->tgts[i]->ltd_exp ||
- lmv->tgts[i]->ltd_active == 0) {
+ struct lmv_tgt_desc *tgt = lmv->tgts[i];
+
+ if (!tgt || !tgt->ltd_exp || !tgt->ltd_active) {
CWARN("%s: NULL export for %d\n", obd->obd_name, i);
continue;
}
- rc = md_init_ea_size(lmv->tgts[i]->ltd_exp, easize, def_easize,
+ rc = md_init_ea_size(tgt->ltd_exp, easize, def_easize,
cookiesize, def_cookiesize);
if (rc) {
CERROR("%s: obd_init_ea_size() failed on MDT target %d: rc = %d\n",
@@ -425,7 +424,7 @@ static int lmv_add_target(struct obd_device *obd, struct obd_uuid *uuidp,
CDEBUG(D_CONFIG, "Target uuid: %s. index %d\n", uuidp->uuid, index);
- lmv_init_lock(lmv);
+ mutex_lock(&lmv->lmv_init_mutex);
if (lmv->desc.ld_tgt_count == 0) {
struct obd_device *mdc_obd;
@@ -433,7 +432,7 @@ static int lmv_add_target(struct obd_device *obd, struct obd_uuid *uuidp,
mdc_obd = class_find_client_obd(uuidp, LUSTRE_MDC_NAME,
&obd->obd_uuid);
if (!mdc_obd) {
- lmv_init_unlock(lmv);
+ mutex_unlock(&lmv->lmv_init_mutex);
CERROR("%s: Target %s not attached: rc = %d\n",
obd->obd_name, uuidp->uuid, -EINVAL);
return -EINVAL;
@@ -445,7 +444,7 @@ static int lmv_add_target(struct obd_device *obd, struct obd_uuid *uuidp,
CERROR("%s: UUID %s already assigned at LOV target index %d: rc = %d\n",
obd->obd_name,
obd_uuid2str(&tgt->ltd_uuid), index, -EEXIST);
- lmv_init_unlock(lmv);
+ mutex_unlock(&lmv->lmv_init_mutex);
return -EEXIST;
}
@@ -459,7 +458,7 @@ static int lmv_add_target(struct obd_device *obd, struct obd_uuid *uuidp,
newsize <<= 1;
newtgts = kcalloc(newsize, sizeof(*newtgts), GFP_NOFS);
if (!newtgts) {
- lmv_init_unlock(lmv);
+ mutex_unlock(&lmv->lmv_init_mutex);
return -ENOMEM;
}
@@ -481,7 +480,7 @@ static int lmv_add_target(struct obd_device *obd, struct obd_uuid *uuidp,
tgt = kzalloc(sizeof(*tgt), GFP_NOFS);
if (!tgt) {
- lmv_init_unlock(lmv);
+ mutex_unlock(&lmv->lmv_init_mutex);
return -ENOMEM;
}
@@ -507,7 +506,7 @@ static int lmv_add_target(struct obd_device *obd, struct obd_uuid *uuidp,
}
}
- lmv_init_unlock(lmv);
+ mutex_unlock(&lmv->lmv_init_mutex);
return rc;
}
@@ -522,18 +521,27 @@ int lmv_check_connect(struct obd_device *obd)
if (lmv->connected)
return 0;
- lmv_init_lock(lmv);
+ mutex_lock(&lmv->lmv_init_mutex);
if (lmv->connected) {
- lmv_init_unlock(lmv);
+ mutex_unlock(&lmv->lmv_init_mutex);
return 0;
}
if (lmv->desc.ld_tgt_count == 0) {
- lmv_init_unlock(lmv);
+ mutex_unlock(&lmv->lmv_init_mutex);
CERROR("%s: no targets configured.\n", obd->obd_name);
return -EINVAL;
}
+ LASSERT(lmv->tgts);
+
+ if (!lmv->tgts[0]) {
+ mutex_unlock(&lmv->lmv_init_mutex);
+ CERROR("%s: no target configured for index 0.\n",
+ obd->obd_name);
+ return -EINVAL;
+ }
+
CDEBUG(D_CONFIG, "Time to connect %s to %s\n",
lmv->cluuid.uuid, obd->obd_name);
@@ -551,7 +559,7 @@ int lmv_check_connect(struct obd_device *obd)
lmv->connected = 1;
easize = lmv_get_easize(lmv);
lmv_init_ea_size(obd->obd_self_export, easize, 0, 0, 0);
- lmv_init_unlock(lmv);
+ mutex_unlock(&lmv->lmv_init_mutex);
return 0;
out_disc:
@@ -572,7 +580,7 @@ int lmv_check_connect(struct obd_device *obd)
}
}
class_disconnect(lmv->exp);
- lmv_init_unlock(lmv);
+ mutex_unlock(&lmv->lmv_init_mutex);
return rc;
}
@@ -796,6 +804,11 @@ static int lmv_hsm_ct_unregister(struct lmv_obd *lmv, unsigned int cmd, int len,
/* unregister request (call from llapi_hsm_copytool_fini) */
for (i = 0; i < lmv->desc.ld_tgt_count; i++) {
+ struct lmv_tgt_desc *tgt = lmv->tgts[i];
+
+ if (!tgt || !tgt->ltd_exp)
+ continue;
+
/* best effort: try to clean as much as possible
* (continue on error)
*/
@@ -825,20 +838,28 @@ static int lmv_hsm_ct_register(struct lmv_obd *lmv, unsigned int cmd, int len,
* except if it because of inactive target.
*/
for (i = 0; i < lmv->desc.ld_tgt_count; i++) {
- err = obd_iocontrol(cmd, lmv->tgts[i]->ltd_exp, len, lk, uarg);
+ struct lmv_tgt_desc *tgt = lmv->tgts[i];
+
+ if (!tgt || !tgt->ltd_exp)
+ continue;
+
+ err = obd_iocontrol(cmd, tgt->ltd_exp, len, lk, uarg);
if (err) {
- if (lmv->tgts[i]->ltd_active) {
+ if (tgt->ltd_active) {
/* permanent error */
CERROR("error: iocontrol MDC %s on MDTidx %d cmd %x: err = %d\n",
- lmv->tgts[i]->ltd_uuid.uuid,
- i, cmd, err);
+ tgt->ltd_uuid.uuid, i, cmd, err);
rc = err;
lk->lk_flags |= LK_FLG_STOP;
/* unregister from previous MDS */
- for (j = 0; j < i; j++)
- obd_iocontrol(cmd,
- lmv->tgts[j]->ltd_exp,
- len, lk, uarg);
+ for (j = 0; j < i; j++) {
+ tgt = lmv->tgts[j];
+
+ if (!tgt || !tgt->ltd_exp)
+ continue;
+ obd_iocontrol(cmd, tgt->ltd_exp, len,
+ lk, uarg);
+ }
return rc;
}
/* else: transient error.
@@ -877,6 +898,7 @@ static int lmv_iocontrol(unsigned int cmd, struct obd_export *exp,
{
struct obd_device *obddev = class_exp2obd(exp);
struct lmv_obd *lmv = &obddev->u.lmv;
+ struct lmv_tgt_desc *tgt = NULL;
int i = 0;
int rc = 0;
int set = 0;
@@ -896,10 +918,11 @@ static int lmv_iocontrol(unsigned int cmd, struct obd_export *exp,
if (index >= count)
return -ENODEV;
- if (!lmv->tgts[index] || lmv->tgts[index]->ltd_active == 0)
+ tgt = lmv->tgts[index];
+ if (!tgt || !tgt->ltd_active)
return -ENODATA;
- mdc_obd = class_exp2obd(lmv->tgts[index]->ltd_exp);
+ mdc_obd = class_exp2obd(tgt->ltd_exp);
if (!mdc_obd)
return -EINVAL;
@@ -909,7 +932,7 @@ static int lmv_iocontrol(unsigned int cmd, struct obd_export *exp,
(int)sizeof(struct obd_uuid))))
return -EFAULT;
- rc = obd_statfs(NULL, lmv->tgts[index]->ltd_exp, &stat_buf,
+ rc = obd_statfs(NULL, tgt->ltd_exp, &stat_buf,
cfs_time_shift_64(-OBD_STATFS_CACHE_SECONDS),
0);
if (rc)
@@ -922,11 +945,10 @@ static int lmv_iocontrol(unsigned int cmd, struct obd_export *exp,
}
case OBD_IOC_QUOTACTL: {
struct if_quotactl *qctl = karg;
- struct lmv_tgt_desc *tgt = NULL;
struct obd_quotactl *oqctl;
if (qctl->qc_valid == QC_MDTIDX) {
- if (qctl->qc_idx < 0 || count <= qctl->qc_idx)
+ if (count <= qctl->qc_idx)
return -EINVAL;
tgt = lmv->tgts[qctl->qc_idx];
@@ -975,18 +997,18 @@ static int lmv_iocontrol(unsigned int cmd, struct obd_export *exp,
if (icc->icc_mdtindex >= count)
return -ENODEV;
- if (!lmv->tgts[icc->icc_mdtindex] ||
- !lmv->tgts[icc->icc_mdtindex]->ltd_exp ||
- lmv->tgts[icc->icc_mdtindex]->ltd_active == 0)
+ tgt = lmv->tgts[icc->icc_mdtindex];
+ if (!tgt || !tgt->ltd_exp || !tgt->ltd_active)
return -ENODEV;
- rc = obd_iocontrol(cmd, lmv->tgts[icc->icc_mdtindex]->ltd_exp,
- sizeof(*icc), icc, NULL);
+ rc = obd_iocontrol(cmd, tgt->ltd_exp, sizeof(*icc), icc, NULL);
break;
}
case LL_IOC_GET_CONNECT_FLAGS: {
- if (!lmv->tgts[0])
+ tgt = lmv->tgts[0];
+
+ if (!tgt || !tgt->ltd_exp)
return -ENODATA;
- rc = obd_iocontrol(cmd, lmv->tgts[0]->ltd_exp, len, karg, uarg);
+ rc = obd_iocontrol(cmd, tgt->ltd_exp, len, karg, uarg);
break;
}
case OBD_IOC_FID2PATH: {
@@ -997,7 +1019,6 @@ static int lmv_iocontrol(unsigned int cmd, struct obd_export *exp,
case LL_IOC_HSM_STATE_SET:
case LL_IOC_HSM_ACTION: {
struct md_op_data *op_data = karg;
- struct lmv_tgt_desc *tgt;
tgt = lmv_find_target(lmv, &op_data->op_fid1);
if (IS_ERR(tgt))
@@ -1011,7 +1032,6 @@ static int lmv_iocontrol(unsigned int cmd, struct obd_export *exp,
}
case LL_IOC_HSM_PROGRESS: {
const struct hsm_progress_kernel *hpk = karg;
- struct lmv_tgt_desc *tgt;
tgt = lmv_find_target(lmv, &hpk->hpk_fid);
if (IS_ERR(tgt))
@@ -1021,7 +1041,6 @@ static int lmv_iocontrol(unsigned int cmd, struct obd_export *exp,
}
case LL_IOC_HSM_REQUEST: {
struct hsm_user_request *hur = karg;
- struct lmv_tgt_desc *tgt;
unsigned int reqcount = hur->hur_request.hr_itemcount;
if (reqcount == 0)
@@ -1044,7 +1063,11 @@ static int lmv_iocontrol(unsigned int cmd, struct obd_export *exp,
int rc1;
struct hsm_user_request *req;
- nr = lmv_hsm_req_count(lmv, hur, lmv->tgts[i]);
+ tgt = lmv->tgts[i];
+ if (!tgt || !tgt->ltd_exp)
+ continue;
+
+ nr = lmv_hsm_req_count(lmv, hur, tgt);
if (nr == 0) /* nothing for this MDS */
continue;
@@ -1056,10 +1079,10 @@ static int lmv_iocontrol(unsigned int cmd, struct obd_export *exp,
if (!req)
return -ENOMEM;
- lmv_hsm_req_build(lmv, hur, lmv->tgts[i], req);
+ lmv_hsm_req_build(lmv, hur, tgt, req);
- rc1 = obd_iocontrol(cmd, lmv->tgts[i]->ltd_exp,
- reqlen, req, uarg);
+ rc1 = obd_iocontrol(cmd, tgt->ltd_exp, reqlen,
+ req, uarg);
if (rc1 != 0 && rc == 0)
rc = rc1;
kvfree(req);
@@ -1103,27 +1126,27 @@ static int lmv_iocontrol(unsigned int cmd, struct obd_export *exp,
struct obd_device *mdc_obd;
int err;
- if (!lmv->tgts[i] || !lmv->tgts[i]->ltd_exp)
+ tgt = lmv->tgts[i];
+ if (!tgt || !tgt->ltd_exp)
continue;
/* ll_umount_begin() sets force flag but for lmv, not
* mdc. Let's pass it through
*/
- mdc_obd = class_exp2obd(lmv->tgts[i]->ltd_exp);
+ mdc_obd = class_exp2obd(tgt->ltd_exp);
mdc_obd->obd_force = obddev->obd_force;
- err = obd_iocontrol(cmd, lmv->tgts[i]->ltd_exp, len,
- karg, uarg);
+ err = obd_iocontrol(cmd, tgt->ltd_exp, len, karg, uarg);
if (err == -ENODATA && cmd == OBD_IOC_POLL_QUOTACHECK) {
return err;
} else if (err) {
- if (lmv->tgts[i]->ltd_active) {
+ if (tgt->ltd_active) {
CERROR("error: iocontrol MDC %s on MDTidx %d cmd %x: err = %d\n",
- lmv->tgts[i]->ltd_uuid.uuid,
- i, cmd, err);
+ tgt->ltd_uuid.uuid, i, cmd, err);
if (!rc)
rc = err;
}
- } else
+ } else {
set = 1;
+ }
}
if (!set && !rc)
rc = -EIO;
@@ -1269,7 +1292,7 @@ static int lmv_setup(struct obd_device *obd, struct lustre_cfg *lcfg)
lmv->lmv_placement = PLACEMENT_CHAR_POLICY;
spin_lock_init(&lmv->lmv_lock);
- mutex_init(&lmv->init_mutex);
+ mutex_init(&lmv->lmv_init_mutex);
lprocfs_lmv_init_vars(&lvars);
@@ -1656,7 +1679,7 @@ lmv_enqueue_remote(struct obd_export *exp, struct ldlm_enqueue_info *einfo,
struct lustre_handle *lockh, void *lmm, int lmmsize,
__u64 extra_lock_flags)
{
- struct ptlrpc_request *req = it->d.lustre.it_data;
+ struct ptlrpc_request *req = it->it_request;
struct obd_device *obd = exp->exp_obd;
struct lmv_obd *lmv = &obd->u.lmv;
struct lustre_handle plock;
@@ -1678,11 +1701,11 @@ lmv_enqueue_remote(struct obd_export *exp, struct ldlm_enqueue_info *einfo,
/*
* We got LOOKUP lock, but we really need attrs.
*/
- pmode = it->d.lustre.it_lock_mode;
+ pmode = it->it_lock_mode;
LASSERT(pmode != 0);
memcpy(&plock, lockh, sizeof(plock));
- it->d.lustre.it_lock_mode = 0;
- it->d.lustre.it_data = NULL;
+ it->it_lock_mode = 0;
+ it->it_request = NULL;
fid1 = body->fid1;
ptlrpc_req_finished(req);
@@ -2071,7 +2094,7 @@ static void lmv_adjust_dirpages(struct page **pages, int ncfspgs, int nlupgs)
dp = (struct lu_dirpage *)((char *)dp + LU_PAGE_SIZE);
/* Check if we've reached the end of the CFS_PAGE. */
- if (!((unsigned long)dp & ~CFS_PAGE_MASK))
+ if (!((unsigned long)dp & ~PAGE_MASK))
break;
/* Save the hash and flags of this lu_dirpage. */
@@ -2268,7 +2291,6 @@ static int lmv_get_info(const struct lu_env *env, struct obd_export *exp,
lmv = &obd->u.lmv;
if (keylen >= strlen("remote_flag") && !strcmp(key, "remote_flag")) {
- struct lmv_tgt_desc *tgt;
int i;
rc = lmv_check_connect(obd);
@@ -2277,7 +2299,8 @@ static int lmv_get_info(const struct lu_env *env, struct obd_export *exp,
LASSERT(*vallen == sizeof(__u32));
for (i = 0; i < lmv->desc.ld_tgt_count; i++) {
- tgt = lmv->tgts[i];
+ struct lmv_tgt_desc *tgt = lmv->tgts[i];
+
/*
* All tgts should be connected when this gets called.
*/
@@ -2466,12 +2489,13 @@ static int lmv_cancel_unused(struct obd_export *exp, const struct lu_fid *fid,
LASSERT(fid);
for (i = 0; i < lmv->desc.ld_tgt_count; i++) {
- if (!lmv->tgts[i] || !lmv->tgts[i]->ltd_exp ||
- lmv->tgts[i]->ltd_active == 0)
+ struct lmv_tgt_desc *tgt = lmv->tgts[i];
+
+ if (!tgt || !tgt->ltd_exp || !tgt->ltd_active)
continue;
- err = md_cancel_unused(lmv->tgts[i]->ltd_exp, fid,
- policy, mode, flags, opaque);
+ err = md_cancel_unused(tgt->ltd_exp, fid, policy, mode, flags,
+ opaque);
if (!rc)
rc = err;
}
@@ -2482,9 +2506,13 @@ static int lmv_set_lock_data(struct obd_export *exp, __u64 *lockh, void *data,
__u64 *bits)
{
struct lmv_obd *lmv = &exp->exp_obd->u.lmv;
+ struct lmv_tgt_desc *tgt = lmv->tgts[0];
int rc;
- rc = md_set_lock_data(lmv->tgts[0]->ltd_exp, lockh, data, bits);
+ if (!tgt || !tgt->ltd_exp)
+ return -EINVAL;
+
+ rc = md_set_lock_data(tgt->ltd_exp, lockh, data, bits);
return rc;
}
@@ -2509,12 +2537,13 @@ static enum ldlm_mode lmv_lock_match(struct obd_export *exp, __u64 flags,
* one fid was created in.
*/
for (i = 0; i < lmv->desc.ld_tgt_count; i++) {
- if (!lmv->tgts[i] || !lmv->tgts[i]->ltd_exp ||
- lmv->tgts[i]->ltd_active == 0)
+ struct lmv_tgt_desc *tgt = lmv->tgts[i];
+
+ if (!tgt || !tgt->ltd_exp || !tgt->ltd_active)
continue;
- rc = md_lock_match(lmv->tgts[i]->ltd_exp, flags, fid,
- type, policy, mode, lockh);
+ rc = md_lock_match(tgt->ltd_exp, flags, fid, type, policy, mode,
+ lockh);
if (rc)
return rc;
}
@@ -2529,18 +2558,24 @@ static int lmv_get_lustre_md(struct obd_export *exp,
struct lustre_md *md)
{
struct lmv_obd *lmv = &exp->exp_obd->u.lmv;
+ struct lmv_tgt_desc *tgt = lmv->tgts[0];
- return md_get_lustre_md(lmv->tgts[0]->ltd_exp, req, dt_exp, md_exp, md);
+ if (!tgt || !tgt->ltd_exp)
+ return -EINVAL;
+ return md_get_lustre_md(tgt->ltd_exp, req, dt_exp, md_exp, md);
}
static int lmv_free_lustre_md(struct obd_export *exp, struct lustre_md *md)
{
struct obd_device *obd = exp->exp_obd;
struct lmv_obd *lmv = &obd->u.lmv;
+ struct lmv_tgt_desc *tgt = lmv->tgts[0];
if (md->mea)
obd_free_memmd(exp, (void *)&md->mea);
- return md_free_lustre_md(lmv->tgts[0]->ltd_exp, md);
+ if (!tgt || !tgt->ltd_exp)
+ return -EINVAL;
+ return md_free_lustre_md(tgt->ltd_exp, md);
}
static int lmv_set_open_replay_data(struct obd_export *exp,
@@ -2572,27 +2607,6 @@ static int lmv_clear_open_replay_data(struct obd_export *exp,
return md_clear_open_replay_data(tgt->ltd_exp, och);
}
-static int lmv_get_remote_perm(struct obd_export *exp,
- const struct lu_fid *fid,
- __u32 suppgid, struct ptlrpc_request **request)
-{
- struct obd_device *obd = exp->exp_obd;
- struct lmv_obd *lmv = &obd->u.lmv;
- struct lmv_tgt_desc *tgt;
- int rc;
-
- rc = lmv_check_connect(obd);
- if (rc)
- return rc;
-
- tgt = lmv_find_target(lmv, fid);
- if (IS_ERR(tgt))
- return PTR_ERR(tgt);
-
- rc = md_get_remote_perm(tgt->ltd_exp, fid, suppgid, request);
- return rc;
-}
-
static int lmv_intent_getattr_async(struct obd_export *exp,
struct md_enqueue_info *minfo,
struct ldlm_enqueue_info *einfo)
@@ -2647,9 +2661,10 @@ static int lmv_quotactl(struct obd_device *unused, struct obd_export *exp,
struct lmv_obd *lmv = &obd->u.lmv;
struct lmv_tgt_desc *tgt = lmv->tgts[0];
int rc = 0, i;
- __u64 curspace, curinodes;
+ __u64 curspace = 0, curinodes = 0;
- if (!lmv->desc.ld_tgt_count || !tgt->ltd_active) {
+ if (!tgt || !tgt->ltd_exp || !tgt->ltd_active ||
+ !lmv->desc.ld_tgt_count) {
CERROR("master lmv inactive\n");
return -EIO;
}
@@ -2659,18 +2674,13 @@ static int lmv_quotactl(struct obd_device *unused, struct obd_export *exp,
return rc;
}
- curspace = curinodes = 0;
for (i = 0; i < lmv->desc.ld_tgt_count; i++) {
int err;
tgt = lmv->tgts[i];
- if (!tgt || !tgt->ltd_exp || tgt->ltd_active == 0)
- continue;
- if (!tgt->ltd_active) {
- CDEBUG(D_HA, "mdt %d is inactive.\n", i);
+ if (!tgt || !tgt->ltd_exp || !tgt->ltd_active)
continue;
- }
err = obd_quotactl(tgt->ltd_exp, oqctl);
if (err) {
@@ -2760,7 +2770,6 @@ static struct md_ops lmv_md_ops = {
.free_lustre_md = lmv_free_lustre_md,
.set_open_replay_data = lmv_set_open_replay_data,
.clear_open_replay_data = lmv_clear_open_replay_data,
- .get_remote_perm = lmv_get_remote_perm,
.intent_getattr_async = lmv_intent_getattr_async,
.revalidate_lock = lmv_revalidate_lock
};
diff --git a/drivers/staging/lustre/lustre/lmv/lproc_lmv.c b/drivers/staging/lustre/lustre/lmv/lproc_lmv.c
index b39e364a29ab..c29c361eb0cc 100644
--- a/drivers/staging/lustre/lustre/lmv/lproc_lmv.c
+++ b/drivers/staging/lustre/lustre/lmv/lproc_lmv.c
@@ -15,11 +15,7 @@
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
*
* GPL HEADER END
*/
diff --git a/drivers/staging/lustre/lustre/lov/lov_cl_internal.h b/drivers/staging/lustre/lustre/lov/lov_cl_internal.h
index 7dd3162b51e9..9740568d9521 100644
--- a/drivers/staging/lustre/lustre/lov/lov_cl_internal.h
+++ b/drivers/staging/lustre/lustre/lov/lov_cl_internal.h
@@ -15,11 +15,7 @@
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
*
* GPL HEADER END
*/
@@ -73,19 +69,6 @@
* - top-page keeps a reference to its sub-page, and destroys it when it
* is destroyed.
*
- * - sub-lock keep a reference to its top-locks. Top-lock keeps a
- * reference (and a hold, see cl_lock_hold()) on its sub-locks when it
- * actively using them (that is, in cl_lock_state::CLS_QUEUING,
- * cl_lock_state::CLS_ENQUEUED, cl_lock_state::CLS_HELD states). When
- * moving into cl_lock_state::CLS_CACHED state, top-lock releases a
- * hold. From this moment top-lock has only a 'weak' reference to its
- * sub-locks. This reference is protected by top-lock
- * cl_lock::cll_guard, and will be automatically cleared by the sub-lock
- * when the latter is destroyed. When a sub-lock is canceled, a
- * reference to it is removed from the top-lock array, and top-lock is
- * moved into CLS_NEW state. It is guaranteed that all sub-locks exist
- * while their top-lock is in CLS_HELD or CLS_CACHED states.
- *
* - IO's are not reference counted.
*
* To implement a connection between top and sub entities, lov layer is split
@@ -281,24 +264,17 @@ struct lov_object {
};
/**
- * Flags that top-lock can set on each of its sub-locks.
- */
-enum lov_sub_flags {
- /** Top-lock acquired a hold (cl_lock_hold()) on a sub-lock. */
- LSF_HELD = 1 << 0
-};
-
-/**
* State lov_lock keeps for each sub-lock.
*/
struct lov_lock_sub {
/** sub-lock itself */
- struct lovsub_lock *sub_lock;
- /** An array of per-sub-lock flags, taken from enum lov_sub_flags */
- unsigned sub_flags;
+ struct cl_lock sub_lock;
+ /** Set if the sublock has ever been enqueued, meaning it may
+ * hold resources of underlying layers
+ */
+ unsigned int sub_is_enqueued:1,
+ sub_initialized:1;
int sub_stripe;
- struct cl_lock_descr sub_descr;
- struct cl_lock_descr sub_got;
};
/**
@@ -308,59 +284,8 @@ struct lov_lock {
struct cl_lock_slice lls_cl;
/** Number of sub-locks in this lock */
int lls_nr;
- /**
- * Number of existing sub-locks.
- */
- unsigned lls_nr_filled;
- /**
- * Set when sub-lock was canceled, while top-lock was being
- * used, or unused.
- */
- unsigned int lls_cancel_race:1;
- /**
- * An array of sub-locks
- *
- * There are two issues with managing sub-locks:
- *
- * - sub-locks are concurrently canceled, and
- *
- * - sub-locks are shared with other top-locks.
- *
- * To manage cancellation, top-lock acquires a hold on a sublock
- * (lov_sublock_adopt()) when the latter is inserted into
- * lov_lock::lls_sub[]. This hold is released (lov_sublock_release())
- * when top-lock is going into CLS_CACHED state or destroyed. Hold
- * prevents sub-lock from cancellation.
- *
- * Sub-lock sharing means, among other things, that top-lock that is
- * in the process of creation (i.e., not yet inserted into lock list)
- * is already accessible to other threads once at least one of its
- * sub-locks is created, see lov_lock_sub_init().
- *
- * Sub-lock can be in one of the following states:
- *
- * - doesn't exist, lov_lock::lls_sub[]::sub_lock == NULL. Such
- * sub-lock was either never created (top-lock is in CLS_NEW
- * state), or it was created, then canceled, then destroyed
- * (lov_lock_unlink() cleared sub-lock pointer in the top-lock).
- *
- * - sub-lock exists and is on
- * hold. (lov_lock::lls_sub[]::sub_flags & LSF_HELD). This is a
- * normal state of a sub-lock in CLS_HELD and CLS_CACHED states
- * of a top-lock.
- *
- * - sub-lock exists, but is not held by the top-lock. This
- * happens after top-lock released a hold on sub-locks before
- * going into cache (lov_lock_unuse()).
- *
- * \todo To support wide-striping, array has to be replaced with a set
- * of queues to avoid scanning.
- */
- struct lov_lock_sub *lls_sub;
- /**
- * Original description with which lock was enqueued.
- */
- struct cl_lock_descr lls_orig;
+ /** sublock array */
+ struct lov_lock_sub lls_sub[0];
};
struct lov_page {
@@ -444,8 +369,9 @@ struct lov_thread_info {
struct cl_lock_descr lti_ldescr;
struct ost_lvb lti_lvb;
struct cl_2queue lti_cl2q;
- struct cl_lock_closure lti_closure;
+ struct cl_page_list lti_plist;
wait_queue_t lti_waiter;
+ struct cl_attr lti_attr;
};
/**
@@ -611,14 +537,13 @@ int lov_sublock_modify(const struct lu_env *env, struct lov_lock *lov,
const struct cl_lock_descr *d, int idx);
int lov_page_init(const struct lu_env *env, struct cl_object *ob,
- struct cl_page *page, struct page *vmpage);
+ struct cl_page *page, pgoff_t index);
int lovsub_page_init(const struct lu_env *env, struct cl_object *ob,
- struct cl_page *page, struct page *vmpage);
-
+ struct cl_page *page, pgoff_t index);
int lov_page_init_empty(const struct lu_env *env, struct cl_object *obj,
- struct cl_page *page, struct page *vmpage);
+ struct cl_page *page, pgoff_t index);
int lov_page_init_raid0(const struct lu_env *env, struct cl_object *obj,
- struct cl_page *page, struct page *vmpage);
+ struct cl_page *page, pgoff_t index);
struct lu_object *lov_object_alloc(const struct lu_env *env,
const struct lu_object_header *hdr,
struct lu_device *dev);
@@ -631,6 +556,7 @@ struct lov_lock_link *lov_lock_link_find(const struct lu_env *env,
struct lovsub_lock *sub);
struct lov_io_sub *lov_page_subio(const struct lu_env *env, struct lov_io *lio,
const struct cl_page_slice *slice);
+int lov_page_stripe(const struct cl_page *page);
#define lov_foreach_target(lov, var) \
for (var = 0; var < lov_targets_nr(lov); ++var)
@@ -789,11 +715,6 @@ static inline struct lovsub_req *cl2lovsub_req(const struct cl_req_slice *slice)
return container_of0(slice, struct lovsub_req, lsrq_cl);
}
-static inline struct cl_page *lov_sub_page(const struct cl_page_slice *slice)
-{
- return slice->cpl_page->cp_child;
-}
-
static inline struct lov_io *cl2lov_io(const struct lu_env *env,
const struct cl_io_slice *ios)
{
diff --git a/drivers/staging/lustre/lustre/lov/lov_dev.c b/drivers/staging/lustre/lustre/lov/lov_dev.c
index 532ef87dfb44..b1f260d43bc7 100644
--- a/drivers/staging/lustre/lustre/lov/lov_dev.c
+++ b/drivers/staging/lustre/lustre/lov/lov_dev.c
@@ -15,11 +15,7 @@
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
*
* GPL HEADER END
*/
@@ -143,9 +139,7 @@ static void *lov_key_init(const struct lu_context *ctx,
struct lov_thread_info *info;
info = kmem_cache_zalloc(lov_thread_kmem, GFP_NOFS);
- if (info)
- INIT_LIST_HEAD(&info->lti_closure.clc_list);
- else
+ if (!info)
info = ERR_PTR(-ENOMEM);
return info;
}
@@ -155,7 +149,6 @@ static void lov_key_fini(const struct lu_context *ctx,
{
struct lov_thread_info *info = data;
- LINVRNT(list_empty(&info->lti_closure.clc_list));
kmem_cache_free(lov_thread_kmem, info);
}
@@ -265,8 +258,9 @@ static int lov_req_init(const struct lu_env *env, struct cl_device *dev,
if (lr) {
cl_req_slice_add(req, &lr->lr_cl, dev, &lov_req_ops);
result = 0;
- } else
+ } else {
result = -ENOMEM;
+ }
return result;
}
@@ -335,14 +329,15 @@ static struct lov_device_emerg **lov_emerg_alloc(int nr)
cl_page_list_init(&em->emrg_page_list);
em->emrg_env = cl_env_alloc(&em->emrg_refcheck,
LCT_REMEMBER | LCT_NOREF);
- if (!IS_ERR(em->emrg_env))
+ if (!IS_ERR(em->emrg_env)) {
em->emrg_env->le_ctx.lc_cookie = 0x2;
- else {
+ } else {
result = PTR_ERR(em->emrg_env);
em->emrg_env = NULL;
}
- } else
+ } else {
result = -ENOMEM;
+ }
}
if (result != 0) {
lov_emerg_free(emerg, nr);
diff --git a/drivers/staging/lustre/lustre/lov/lov_ea.c b/drivers/staging/lustre/lustre/lov/lov_ea.c
index b6529401c713..5053dead17bb 100644
--- a/drivers/staging/lustre/lustre/lov/lov_ea.c
+++ b/drivers/staging/lustre/lustre/lov/lov_ea.c
@@ -15,11 +15,7 @@
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
*
* GPL HEADER END
*/
@@ -48,11 +44,6 @@
#include "lov_internal.h"
-struct lovea_unpack_args {
- struct lov_stripe_md *lsm;
- int cursor;
-};
-
static int lsm_lmm_verify_common(struct lov_mds_md *lmm, int lmm_bytes,
__u16 stripe_count)
{
diff --git a/drivers/staging/lustre/lustre/lov/lov_internal.h b/drivers/staging/lustre/lustre/lov/lov_internal.h
index 590f9326af37..12bd511e8988 100644
--- a/drivers/staging/lustre/lustre/lov/lov_internal.h
+++ b/drivers/staging/lustre/lustre/lov/lov_internal.h
@@ -15,11 +15,7 @@
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
*
* GPL HEADER END
*/
@@ -72,6 +68,21 @@
})
#endif
+#define pool_tgt_size(p) ((p)->pool_obds.op_size)
+#define pool_tgt_count(p) ((p)->pool_obds.op_count)
+#define pool_tgt_array(p) ((p)->pool_obds.op_array)
+#define pool_tgt_rw_sem(p) ((p)->pool_obds.op_rw_sem)
+
+struct pool_desc {
+ char pool_name[LOV_MAXPOOLNAME + 1];
+ struct ost_pool pool_obds;
+ atomic_t pool_refcount;
+ struct hlist_node pool_hash; /* access by poolname */
+ struct list_head pool_list; /* serial access */
+ struct dentry *pool_debugfs_entry; /* file in debugfs */
+ struct obd_device *pool_lobd; /* owner */
+};
+
struct lov_request {
struct obd_info rq_oi;
struct lov_request_set *rq_rqset;
@@ -88,7 +99,6 @@ struct lov_request {
};
struct lov_request_set {
- struct ldlm_enqueue_info *set_ei;
struct obd_info *set_oi;
atomic_t set_refcount;
struct obd_export *set_exp;
@@ -102,10 +112,8 @@ struct lov_request_set {
atomic_t set_finish_checked;
struct llog_cookie *set_cookies;
int set_cookie_sent;
- struct obd_trans_info *set_oti;
struct list_head set_list;
wait_queue_head_t set_waitq;
- spinlock_t set_lock;
};
extern struct kmem_cache *lov_oinfo_slab;
@@ -114,12 +122,6 @@ extern struct lu_kmem_descr lov_caches[];
void lov_finish_set(struct lov_request_set *set);
-static inline void lov_get_reqset(struct lov_request_set *set)
-{
- LASSERT(atomic_read(&set->set_refcount) > 0);
- atomic_inc(&set->set_refcount);
-}
-
static inline void lov_put_reqset(struct lov_request_set *set)
{
if (atomic_dec_and_test(&set->set_refcount))
@@ -146,10 +148,8 @@ int lov_stripe_intersects(struct lov_stripe_md *lsm, int stripeno,
u64 start, u64 end,
u64 *obd_start, u64 *obd_end);
int lov_stripe_number(struct lov_stripe_md *lsm, u64 lov_off);
-
-/* lov_qos.c */
-#define LOV_USES_ASSIGNED_STRIPE 0
-#define LOV_USES_DEFAULT_STRIPE 1
+pgoff_t lov_stripe_pgoff(struct lov_stripe_md *lsm, pgoff_t stripe_index,
+ int stripe);
/* lov_request.c */
int lov_update_common_set(struct lov_request_set *set,
@@ -176,6 +176,8 @@ int lov_fini_statfs_set(struct lov_request_set *set);
int lov_statfs_interpret(struct ptlrpc_request_set *rqset, void *data, int rc);
/* lov_obd.c */
+void lov_stripe_lock(struct lov_stripe_md *md);
+void lov_stripe_unlock(struct lov_stripe_md *md);
void lov_fix_desc(struct lov_desc *desc);
void lov_fix_desc_stripe_size(__u64 *val);
void lov_fix_desc_stripe_count(__u32 *val);
@@ -231,8 +233,6 @@ int lov_pool_new(struct obd_device *obd, char *poolname);
int lov_pool_del(struct obd_device *obd, char *poolname);
int lov_pool_add(struct obd_device *obd, char *poolname, char *ostname);
int lov_pool_remove(struct obd_device *obd, char *poolname, char *ostname);
-struct pool_desc *lov_find_pool(struct lov_obd *lov, char *poolname);
-int lov_check_index_in_pool(__u32 idx, struct pool_desc *pool);
void lov_pool_putref(struct pool_desc *pool);
static inline struct lov_stripe_md *lsm_addref(struct lov_stripe_md *lsm)
diff --git a/drivers/staging/lustre/lustre/lov/lov_io.c b/drivers/staging/lustre/lustre/lov/lov_io.c
index 4296aacd84fc..84032a510254 100644
--- a/drivers/staging/lustre/lustre/lov/lov_io.c
+++ b/drivers/staging/lustre/lustre/lov/lov_io.c
@@ -15,11 +15,7 @@
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
*
* GPL HEADER END
*/
@@ -225,8 +221,9 @@ struct lov_io_sub *lov_sub_get(const struct lu_env *env,
if (!sub->sub_io_initialized) {
sub->sub_stripe = stripe;
rc = lov_io_sub_init(env, lio, sub);
- } else
+ } else {
rc = 0;
+ }
if (rc == 0)
lov_sub_enter(sub);
else
@@ -245,13 +242,15 @@ void lov_sub_put(struct lov_io_sub *sub)
*
*/
-static int lov_page_stripe(const struct cl_page *page)
+int lov_page_stripe(const struct cl_page *page)
{
struct lovsub_object *subobj;
+ const struct cl_page_slice *slice;
- subobj = lu2lovsub(
- lu_object_locate(page->cp_child->cp_obj->co_lu.lo_header,
- &lovsub_device_type));
+ slice = cl_page_at(page, &lovsub_device_type);
+ LASSERT(slice->cpl_obj);
+
+ subobj = cl2lovsub(slice->cpl_obj);
return subobj->lso_index;
}
@@ -274,10 +273,11 @@ struct lov_io_sub *lov_page_subio(const struct lu_env *env, struct lov_io *lio,
static int lov_io_subio_init(const struct lu_env *env, struct lov_io *lio,
struct cl_io *io)
{
- struct lov_stripe_md *lsm = lio->lis_object->lo_lsm;
+ struct lov_stripe_md *lsm;
int result;
LASSERT(lio->lis_object);
+ lsm = lio->lis_object->lo_lsm;
/*
* Need to be optimized, we can't afford to allocate a piece of memory
@@ -292,8 +292,9 @@ static int lov_io_subio_init(const struct lu_env *env, struct lov_io *lio,
lio->lis_single_subio_index = -1;
lio->lis_active_subios = 0;
result = 0;
- } else
+ } else {
result = -ENOMEM;
+ }
return result;
}
@@ -411,8 +412,9 @@ static int lov_io_iter_init(const struct lu_env *env,
lov_sub_put(sub);
CDEBUG(D_VFSTRACE, "shrink: %d [%llu, %llu)\n",
stripe, start, end);
- } else
+ } else {
rc = PTR_ERR(sub);
+ }
if (!rc)
list_add_tail(&sub->sub_linkage, &lio->lis_active);
@@ -436,7 +438,6 @@ static int lov_io_rw_iter_init(const struct lu_env *env,
/* fast path for common case. */
if (lio->lis_nr_subios != 1 && !cl_io_is_append(io)) {
-
lov_do_div64(start, ssize);
next = (start + 1) * ssize;
if (next <= start * ssize)
@@ -543,13 +544,6 @@ static void lov_io_unlock(const struct lu_env *env,
LASSERT(rc == 0);
}
-static struct cl_page_list *lov_io_submit_qin(struct lov_device *ld,
- struct cl_page_list *qin,
- int idx, int alloc)
-{
- return alloc ? &qin[idx] : &ld->ld_emrg[idx]->emrg_page_list;
-}
-
/**
* lov implementation of cl_operations::cio_submit() method. It takes a list
* of pages in \a queue, splits it into per-stripe sub-lists, invokes
@@ -569,25 +563,17 @@ static int lov_io_submit(const struct lu_env *env,
const struct cl_io_slice *ios,
enum cl_req_type crt, struct cl_2queue *queue)
{
- struct lov_io *lio = cl2lov_io(env, ios);
- struct lov_object *obj = lio->lis_object;
- struct lov_device *ld = lu2lov_dev(lov2cl(obj)->co_lu.lo_dev);
- struct cl_page_list *qin = &queue->c2_qin;
- struct cl_2queue *cl2q = &lov_env_info(env)->lti_cl2q;
- struct cl_page_list *stripes_qin = NULL;
+ struct cl_page_list *qin = &queue->c2_qin;
+ struct lov_io *lio = cl2lov_io(env, ios);
+ struct lov_io_sub *sub;
+ struct cl_page_list *plist = &lov_env_info(env)->lti_plist;
struct cl_page *page;
- struct cl_page *tmp;
int stripe;
-#define QIN(stripe) lov_io_submit_qin(ld, stripes_qin, stripe, alloc)
-
int rc = 0;
- int alloc =
- !(current->flags & PF_MEMALLOC);
if (lio->lis_active_subios == 1) {
int idx = lio->lis_single_subio_index;
- struct lov_io_sub *sub;
LASSERT(idx < lio->lis_nr_subios);
sub = lov_sub_get(env, lio, idx);
@@ -600,119 +586,120 @@ static int lov_io_submit(const struct lu_env *env,
}
LASSERT(lio->lis_subs);
- if (alloc) {
- stripes_qin =
- libcfs_kvzalloc(sizeof(*stripes_qin) *
- lio->lis_nr_subios,
- GFP_NOFS);
- if (!stripes_qin)
- return -ENOMEM;
-
- for (stripe = 0; stripe < lio->lis_nr_subios; stripe++)
- cl_page_list_init(&stripes_qin[stripe]);
- } else {
- /*
- * If we get here, it means pageout & swap doesn't help.
- * In order to not make things worse, even don't try to
- * allocate the memory with __GFP_NOWARN. -jay
- */
- mutex_lock(&ld->ld_mutex);
- lio->lis_mem_frozen = 1;
- }
- cl_2queue_init(cl2q);
- cl_page_list_for_each_safe(page, tmp, qin) {
- stripe = lov_page_stripe(page);
- cl_page_list_move(QIN(stripe), qin, page);
- }
+ cl_page_list_init(plist);
+ while (qin->pl_nr > 0) {
+ struct cl_2queue *cl2q = &lov_env_info(env)->lti_cl2q;
- for (stripe = 0; stripe < lio->lis_nr_subios; stripe++) {
- struct lov_io_sub *sub;
- struct cl_page_list *sub_qin = QIN(stripe);
+ cl_2queue_init(cl2q);
- if (list_empty(&sub_qin->pl_pages))
- continue;
+ page = cl_page_list_first(qin);
+ cl_page_list_move(&cl2q->c2_qin, qin, page);
+
+ stripe = lov_page_stripe(page);
+ while (qin->pl_nr > 0) {
+ page = cl_page_list_first(qin);
+ if (stripe != lov_page_stripe(page))
+ break;
+
+ cl_page_list_move(&cl2q->c2_qin, qin, page);
+ }
- cl_page_list_splice(sub_qin, &cl2q->c2_qin);
sub = lov_sub_get(env, lio, stripe);
if (!IS_ERR(sub)) {
rc = cl_io_submit_rw(sub->sub_env, sub->sub_io,
crt, cl2q);
lov_sub_put(sub);
- } else
+ } else {
rc = PTR_ERR(sub);
- cl_page_list_splice(&cl2q->c2_qin, &queue->c2_qin);
+ }
+
+ cl_page_list_splice(&cl2q->c2_qin, plist);
cl_page_list_splice(&cl2q->c2_qout, &queue->c2_qout);
+ cl_2queue_fini(env, cl2q);
+
if (rc != 0)
break;
}
- for (stripe = 0; stripe < lio->lis_nr_subios; stripe++) {
- struct cl_page_list *sub_qin = QIN(stripe);
+ cl_page_list_splice(plist, qin);
+ cl_page_list_fini(env, plist);
- if (list_empty(&sub_qin->pl_pages))
- continue;
+ return rc;
+}
+
+static int lov_io_commit_async(const struct lu_env *env,
+ const struct cl_io_slice *ios,
+ struct cl_page_list *queue, int from, int to,
+ cl_commit_cbt cb)
+{
+ struct cl_page_list *plist = &lov_env_info(env)->lti_plist;
+ struct lov_io *lio = cl2lov_io(env, ios);
+ struct lov_io_sub *sub;
+ struct cl_page *page;
+ int rc = 0;
- cl_page_list_splice(sub_qin, qin);
+ if (lio->lis_active_subios == 1) {
+ int idx = lio->lis_single_subio_index;
+
+ LASSERT(idx < lio->lis_nr_subios);
+ sub = lov_sub_get(env, lio, idx);
+ LASSERT(!IS_ERR(sub));
+ LASSERT(sub->sub_io == &lio->lis_single_subio);
+ rc = cl_io_commit_async(sub->sub_env, sub->sub_io, queue,
+ from, to, cb);
+ lov_sub_put(sub);
+ return rc;
}
- if (alloc) {
- kvfree(stripes_qin);
- } else {
- int i;
+ LASSERT(lio->lis_subs);
- for (i = 0; i < lio->lis_nr_subios; i++) {
- struct cl_io *cio = lio->lis_subs[i].sub_io;
+ cl_page_list_init(plist);
+ while (queue->pl_nr > 0) {
+ int stripe_to = to;
+ int stripe;
- if (cio && cio == &ld->ld_emrg[i]->emrg_subio)
- lov_io_sub_fini(env, lio, &lio->lis_subs[i]);
+ LASSERT(plist->pl_nr == 0);
+ page = cl_page_list_first(queue);
+ cl_page_list_move(plist, queue, page);
+
+ stripe = lov_page_stripe(page);
+ while (queue->pl_nr > 0) {
+ page = cl_page_list_first(queue);
+ if (stripe != lov_page_stripe(page))
+ break;
+
+ cl_page_list_move(plist, queue, page);
}
- lio->lis_mem_frozen = 0;
- mutex_unlock(&ld->ld_mutex);
- }
- return rc;
-#undef QIN
-}
+ if (queue->pl_nr > 0) /* still has more pages */
+ stripe_to = PAGE_SIZE;
-static int lov_io_prepare_write(const struct lu_env *env,
- const struct cl_io_slice *ios,
- const struct cl_page_slice *slice,
- unsigned from, unsigned to)
-{
- struct lov_io *lio = cl2lov_io(env, ios);
- struct cl_page *sub_page = lov_sub_page(slice);
- struct lov_io_sub *sub;
- int result;
+ sub = lov_sub_get(env, lio, stripe);
+ if (!IS_ERR(sub)) {
+ rc = cl_io_commit_async(sub->sub_env, sub->sub_io,
+ plist, from, stripe_to, cb);
+ lov_sub_put(sub);
+ } else {
+ rc = PTR_ERR(sub);
+ break;
+ }
- sub = lov_page_subio(env, lio, slice);
- if (!IS_ERR(sub)) {
- result = cl_io_prepare_write(sub->sub_env, sub->sub_io,
- sub_page, from, to);
- lov_sub_put(sub);
- } else
- result = PTR_ERR(sub);
- return result;
-}
+ if (plist->pl_nr > 0) /* short write */
+ break;
-static int lov_io_commit_write(const struct lu_env *env,
- const struct cl_io_slice *ios,
- const struct cl_page_slice *slice,
- unsigned from, unsigned to)
-{
- struct lov_io *lio = cl2lov_io(env, ios);
- struct cl_page *sub_page = lov_sub_page(slice);
- struct lov_io_sub *sub;
- int result;
+ from = 0;
+ }
- sub = lov_page_subio(env, lio, slice);
- if (!IS_ERR(sub)) {
- result = cl_io_commit_write(sub->sub_env, sub->sub_io,
- sub_page, from, to);
- lov_sub_put(sub);
- } else
- result = PTR_ERR(sub);
- return result;
+ /* for error case, add the page back into the qin list */
+ LASSERT(ergo(rc == 0, plist->pl_nr == 0));
+ while (plist->pl_nr > 0) {
+ /* error occurred, add the uncommitted pages back into queue */
+ page = cl_page_list_last(plist);
+ cl_page_list_move_head(queue, plist, page);
+ }
+
+ return rc;
}
static int lov_io_fault_start(const struct lu_env *env,
@@ -803,16 +790,8 @@ static const struct cl_io_operations lov_io_ops = {
.cio_fini = lov_io_fini
}
},
- .req_op = {
- [CRT_READ] = {
- .cio_submit = lov_io_submit
- },
- [CRT_WRITE] = {
- .cio_submit = lov_io_submit
- }
- },
- .cio_prepare_write = lov_io_prepare_write,
- .cio_commit_write = lov_io_commit_write
+ .cio_submit = lov_io_submit,
+ .cio_commit_async = lov_io_commit_async,
};
/*****************************************************************************
@@ -880,15 +859,8 @@ static const struct cl_io_operations lov_empty_io_ops = {
.cio_fini = lov_empty_io_fini
}
},
- .req_op = {
- [CRT_READ] = {
- .cio_submit = LOV_EMPTY_IMPOSSIBLE
- },
- [CRT_WRITE] = {
- .cio_submit = LOV_EMPTY_IMPOSSIBLE
- }
- },
- .cio_commit_write = LOV_EMPTY_IMPOSSIBLE
+ .cio_submit = LOV_EMPTY_IMPOSSIBLE,
+ .cio_commit_async = LOV_EMPTY_IMPOSSIBLE
};
int lov_io_init_raid0(const struct lu_env *env, struct cl_object *obj,
@@ -943,7 +915,7 @@ int lov_io_init_empty(const struct lu_env *env, struct cl_object *obj,
}
io->ci_result = result < 0 ? result : 0;
- return result != 0;
+ return result;
}
int lov_io_init_released(const struct lu_env *env, struct cl_object *obj,
@@ -986,7 +958,7 @@ int lov_io_init_released(const struct lu_env *env, struct cl_object *obj,
}
io->ci_result = result < 0 ? result : 0;
- return result != 0;
+ return result;
}
/** @} lov */
diff --git a/drivers/staging/lustre/lustre/lov/lov_lock.c b/drivers/staging/lustre/lustre/lov/lov_lock.c
index ae854bc25dbe..f3a0583f28f5 100644
--- a/drivers/staging/lustre/lustre/lov/lov_lock.c
+++ b/drivers/staging/lustre/lustre/lov/lov_lock.c
@@ -15,11 +15,7 @@
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
*
* GPL HEADER END
*/
@@ -46,11 +42,6 @@
* @{
*/
-static struct cl_lock_closure *lov_closure_get(const struct lu_env *env,
- struct cl_lock *parent);
-
-static int lov_lock_unuse(const struct lu_env *env,
- const struct cl_lock_slice *slice);
/*****************************************************************************
*
* Lov lock operations.
@@ -58,7 +49,7 @@ static int lov_lock_unuse(const struct lu_env *env,
*/
static struct lov_sublock_env *lov_sublock_env_get(const struct lu_env *env,
- struct cl_lock *parent,
+ const struct cl_lock *parent,
struct lov_lock_sub *lls)
{
struct lov_sublock_env *subenv;
@@ -100,185 +91,26 @@ static void lov_sublock_env_put(struct lov_sublock_env *subenv)
lov_sub_put(subenv->lse_sub);
}
-static void lov_sublock_adopt(const struct lu_env *env, struct lov_lock *lck,
- struct cl_lock *sublock, int idx,
- struct lov_lock_link *link)
+static int lov_sublock_init(const struct lu_env *env,
+ const struct cl_lock *parent,
+ struct lov_lock_sub *lls)
{
- struct lovsub_lock *lsl;
- struct cl_lock *parent = lck->lls_cl.cls_lock;
- int rc;
-
- LASSERT(cl_lock_is_mutexed(parent));
- LASSERT(cl_lock_is_mutexed(sublock));
-
- lsl = cl2sub_lock(sublock);
- /*
- * check that sub-lock doesn't have lock link to this top-lock.
- */
- LASSERT(!lov_lock_link_find(env, lck, lsl));
- LASSERT(idx < lck->lls_nr);
-
- lck->lls_sub[idx].sub_lock = lsl;
- lck->lls_nr_filled++;
- LASSERT(lck->lls_nr_filled <= lck->lls_nr);
- list_add_tail(&link->lll_list, &lsl->lss_parents);
- link->lll_idx = idx;
- link->lll_super = lck;
- cl_lock_get(parent);
- lu_ref_add(&parent->cll_reference, "lov-child", sublock);
- lck->lls_sub[idx].sub_flags |= LSF_HELD;
- cl_lock_user_add(env, sublock);
-
- rc = lov_sublock_modify(env, lck, lsl, &sublock->cll_descr, idx);
- LASSERT(rc == 0); /* there is no way this can fail, currently */
-}
-
-static struct cl_lock *lov_sublock_alloc(const struct lu_env *env,
- const struct cl_io *io,
- struct lov_lock *lck,
- int idx, struct lov_lock_link **out)
-{
- struct cl_lock *sublock;
- struct cl_lock *parent;
- struct lov_lock_link *link;
-
- LASSERT(idx < lck->lls_nr);
-
- link = kmem_cache_zalloc(lov_lock_link_kmem, GFP_NOFS);
- if (link) {
- struct lov_sublock_env *subenv;
- struct lov_lock_sub *lls;
- struct cl_lock_descr *descr;
-
- parent = lck->lls_cl.cls_lock;
- lls = &lck->lls_sub[idx];
- descr = &lls->sub_got;
-
- subenv = lov_sublock_env_get(env, parent, lls);
- if (!IS_ERR(subenv)) {
- /* CAVEAT: Don't try to add a field in lov_lock_sub
- * to remember the subio. This is because lock is able
- * to be cached, but this is not true for IO. This
- * further means a sublock might be referenced in
- * different io context. -jay
- */
-
- sublock = cl_lock_hold(subenv->lse_env, subenv->lse_io,
- descr, "lov-parent", parent);
- lov_sublock_env_put(subenv);
- } else {
- /* error occurs. */
- sublock = (void *)subenv;
- }
-
- if (!IS_ERR(sublock))
- *out = link;
- else
- kmem_cache_free(lov_lock_link_kmem, link);
- } else
- sublock = ERR_PTR(-ENOMEM);
- return sublock;
-}
-
-static void lov_sublock_unlock(const struct lu_env *env,
- struct lovsub_lock *lsl,
- struct cl_lock_closure *closure,
- struct lov_sublock_env *subenv)
-{
- lov_sublock_env_put(subenv);
- lsl->lss_active = NULL;
- cl_lock_disclosure(env, closure);
-}
-
-static int lov_sublock_lock(const struct lu_env *env,
- struct lov_lock *lck,
- struct lov_lock_sub *lls,
- struct cl_lock_closure *closure,
- struct lov_sublock_env **lsep)
-{
- struct lovsub_lock *sublock;
- struct cl_lock *child;
- int result = 0;
-
- LASSERT(list_empty(&closure->clc_list));
-
- sublock = lls->sub_lock;
- child = sublock->lss_cl.cls_lock;
- result = cl_lock_closure_build(env, child, closure);
- if (result == 0) {
- struct cl_lock *parent = closure->clc_origin;
-
- LASSERT(cl_lock_is_mutexed(child));
- sublock->lss_active = parent;
-
- if (unlikely((child->cll_state == CLS_FREEING) ||
- (child->cll_flags & CLF_CANCELLED))) {
- struct lov_lock_link *link;
- /*
- * we could race with lock deletion which temporarily
- * put the lock in freeing state, bug 19080.
- */
- LASSERT(!(lls->sub_flags & LSF_HELD));
-
- link = lov_lock_link_find(env, lck, sublock);
- LASSERT(link);
- lov_lock_unlink(env, link, sublock);
- lov_sublock_unlock(env, sublock, closure, NULL);
- lck->lls_cancel_race = 1;
- result = CLO_REPEAT;
- } else if (lsep) {
- struct lov_sublock_env *subenv;
+ struct lov_sublock_env *subenv;
+ int result;
- subenv = lov_sublock_env_get(env, parent, lls);
- if (IS_ERR(subenv)) {
- lov_sublock_unlock(env, sublock,
- closure, NULL);
- result = PTR_ERR(subenv);
- } else {
- *lsep = subenv;
- }
- }
+ subenv = lov_sublock_env_get(env, parent, lls);
+ if (!IS_ERR(subenv)) {
+ result = cl_lock_init(subenv->lse_env, &lls->sub_lock,
+ subenv->lse_io);
+ lov_sublock_env_put(subenv);
+ } else {
+ /* error occurs. */
+ result = PTR_ERR(subenv);
}
return result;
}
/**
- * Updates the result of a top-lock operation from a result of sub-lock
- * sub-operations. Top-operations like lov_lock_{enqueue,use,unuse}() iterate
- * over sub-locks and lov_subresult() is used to calculate return value of a
- * top-operation. To this end, possible return values of sub-operations are
- * ordered as
- *
- * - 0 success
- * - CLO_WAIT wait for event
- * - CLO_REPEAT repeat top-operation
- * - -ne fundamental error
- *
- * Top-level return code can only go down through this list. CLO_REPEAT
- * overwrites CLO_WAIT, because lock mutex was released and sleeping condition
- * has to be rechecked by the upper layer.
- */
-static int lov_subresult(int result, int rc)
-{
- int result_rank;
- int rc_rank;
-
- LASSERTF(result <= 0 || result == CLO_REPEAT || result == CLO_WAIT,
- "result = %d\n", result);
- LASSERTF(rc <= 0 || rc == CLO_REPEAT || rc == CLO_WAIT,
- "rc = %d\n", rc);
- CLASSERT(CLO_WAIT < CLO_REPEAT);
-
- /* calculate ranks in the ordering above */
- result_rank = result < 0 ? 1 + CLO_REPEAT : result;
- rc_rank = rc < 0 ? 1 + CLO_REPEAT : rc;
-
- if (result_rank < rc_rank)
- result = rc;
- return result;
-}
-
-/**
* Creates sub-locks for a given lov_lock for the first time.
*
* Goes through all sub-objects of top-object, and creates sub-locks on every
@@ -286,8 +118,9 @@ static int lov_subresult(int result, int rc)
* fact that top-lock (that is being created) can be accessed concurrently
* through already created sub-locks (possibly shared with other top-locks).
*/
-static int lov_lock_sub_init(const struct lu_env *env,
- struct lov_lock *lck, const struct cl_io *io)
+static struct lov_lock *lov_lock_sub_init(const struct lu_env *env,
+ const struct cl_object *obj,
+ struct cl_lock *lock)
{
int result = 0;
int i;
@@ -297,241 +130,86 @@ static int lov_lock_sub_init(const struct lu_env *env,
u64 file_start;
u64 file_end;
- struct lov_object *loo = cl2lov(lck->lls_cl.cls_obj);
+ struct lov_object *loo = cl2lov(obj);
struct lov_layout_raid0 *r0 = lov_r0(loo);
- struct cl_lock *parent = lck->lls_cl.cls_lock;
+ struct lov_lock *lovlck;
- lck->lls_orig = parent->cll_descr;
- file_start = cl_offset(lov2cl(loo), parent->cll_descr.cld_start);
- file_end = cl_offset(lov2cl(loo), parent->cll_descr.cld_end + 1) - 1;
+ file_start = cl_offset(lov2cl(loo), lock->cll_descr.cld_start);
+ file_end = cl_offset(lov2cl(loo), lock->cll_descr.cld_end + 1) - 1;
for (i = 0, nr = 0; i < r0->lo_nr; i++) {
/*
* XXX for wide striping smarter algorithm is desirable,
* breaking out of the loop, early.
*/
- if (likely(r0->lo_sub[i]) &&
+ if (likely(r0->lo_sub[i]) && /* spare layout */
lov_stripe_intersects(loo->lo_lsm, i,
file_start, file_end, &start, &end))
nr++;
}
LASSERT(nr > 0);
- lck->lls_sub = libcfs_kvzalloc(nr * sizeof(lck->lls_sub[0]), GFP_NOFS);
- if (!lck->lls_sub)
- return -ENOMEM;
+ lovlck = libcfs_kvzalloc(offsetof(struct lov_lock, lls_sub[nr]),
+ GFP_NOFS);
+ if (!lovlck)
+ return ERR_PTR(-ENOMEM);
- lck->lls_nr = nr;
- /*
- * First, fill in sub-lock descriptions in
- * lck->lls_sub[].sub_descr. They are used by lov_sublock_alloc()
- * (called below in this function, and by lov_lock_enqueue()) to
- * create sub-locks. At this moment, no other thread can access
- * top-lock.
- */
+ lovlck->lls_nr = nr;
for (i = 0, nr = 0; i < r0->lo_nr; ++i) {
if (likely(r0->lo_sub[i]) &&
lov_stripe_intersects(loo->lo_lsm, i,
file_start, file_end, &start, &end)) {
+ struct lov_lock_sub *lls = &lovlck->lls_sub[nr];
struct cl_lock_descr *descr;
- descr = &lck->lls_sub[nr].sub_descr;
+ descr = &lls->sub_lock.cll_descr;
LASSERT(!descr->cld_obj);
descr->cld_obj = lovsub2cl(r0->lo_sub[i]);
descr->cld_start = cl_index(descr->cld_obj, start);
descr->cld_end = cl_index(descr->cld_obj, end);
- descr->cld_mode = parent->cll_descr.cld_mode;
- descr->cld_gid = parent->cll_descr.cld_gid;
- descr->cld_enq_flags = parent->cll_descr.cld_enq_flags;
- /* XXX has no effect */
- lck->lls_sub[nr].sub_got = *descr;
- lck->lls_sub[nr].sub_stripe = i;
+ descr->cld_mode = lock->cll_descr.cld_mode;
+ descr->cld_gid = lock->cll_descr.cld_gid;
+ descr->cld_enq_flags = lock->cll_descr.cld_enq_flags;
+ lls->sub_stripe = i;
+
+ /* initialize sub lock */
+ result = lov_sublock_init(env, lock, lls);
+ if (result < 0)
+ break;
+
+ lls->sub_initialized = 1;
nr++;
}
}
- LASSERT(nr == lck->lls_nr);
-
- /*
- * Some sub-locks can be missing at this point. This is not a problem,
- * because enqueue will create them anyway. Main duty of this function
- * is to fill in sub-lock descriptions in a race free manner.
- */
- return result;
-}
+ LASSERT(ergo(result == 0, nr == lovlck->lls_nr));
-static int lov_sublock_release(const struct lu_env *env, struct lov_lock *lck,
- int i, int deluser, int rc)
-{
- struct cl_lock *parent = lck->lls_cl.cls_lock;
-
- LASSERT(cl_lock_is_mutexed(parent));
-
- if (lck->lls_sub[i].sub_flags & LSF_HELD) {
- struct cl_lock *sublock;
- int dying;
-
- sublock = lck->lls_sub[i].sub_lock->lss_cl.cls_lock;
- LASSERT(cl_lock_is_mutexed(sublock));
+ if (result != 0) {
+ for (i = 0; i < nr; ++i) {
+ if (!lovlck->lls_sub[i].sub_initialized)
+ break;
- lck->lls_sub[i].sub_flags &= ~LSF_HELD;
- if (deluser)
- cl_lock_user_del(env, sublock);
- /*
- * If the last hold is released, and cancellation is pending
- * for a sub-lock, release parent mutex, to avoid keeping it
- * while sub-lock is being paged out.
- */
- dying = (sublock->cll_descr.cld_mode == CLM_PHANTOM ||
- sublock->cll_descr.cld_mode == CLM_GROUP ||
- (sublock->cll_flags & (CLF_CANCELPEND|CLF_DOOMED))) &&
- sublock->cll_holds == 1;
- if (dying)
- cl_lock_mutex_put(env, parent);
- cl_lock_unhold(env, sublock, "lov-parent", parent);
- if (dying) {
- cl_lock_mutex_get(env, parent);
- rc = lov_subresult(rc, CLO_REPEAT);
+ cl_lock_fini(env, &lovlck->lls_sub[i].sub_lock);
}
- /*
- * From now on lck->lls_sub[i].sub_lock is a "weak" pointer,
- * not backed by a reference on a
- * sub-lock. lovsub_lock_delete() will clear
- * lck->lls_sub[i].sub_lock under semaphores, just before
- * sub-lock is destroyed.
- */
+ kvfree(lovlck);
+ lovlck = ERR_PTR(result);
}
- return rc;
-}
-
-static void lov_sublock_hold(const struct lu_env *env, struct lov_lock *lck,
- int i)
-{
- struct cl_lock *parent = lck->lls_cl.cls_lock;
-
- LASSERT(cl_lock_is_mutexed(parent));
-
- if (!(lck->lls_sub[i].sub_flags & LSF_HELD)) {
- struct cl_lock *sublock;
-
- sublock = lck->lls_sub[i].sub_lock->lss_cl.cls_lock;
- LASSERT(cl_lock_is_mutexed(sublock));
- LASSERT(sublock->cll_state != CLS_FREEING);
- lck->lls_sub[i].sub_flags |= LSF_HELD;
-
- cl_lock_get_trust(sublock);
- cl_lock_hold_add(env, sublock, "lov-parent", parent);
- cl_lock_user_add(env, sublock);
- cl_lock_put(env, sublock);
- }
+ return lovlck;
}
static void lov_lock_fini(const struct lu_env *env,
struct cl_lock_slice *slice)
{
- struct lov_lock *lck;
+ struct lov_lock *lovlck;
int i;
- lck = cl2lov_lock(slice);
- LASSERT(lck->lls_nr_filled == 0);
- if (lck->lls_sub) {
- for (i = 0; i < lck->lls_nr; ++i)
- /*
- * No sub-locks exists at this point, as sub-lock has
- * a reference on its parent.
- */
- LASSERT(!lck->lls_sub[i].sub_lock);
- kvfree(lck->lls_sub);
+ lovlck = cl2lov_lock(slice);
+ for (i = 0; i < lovlck->lls_nr; ++i) {
+ LASSERT(!lovlck->lls_sub[i].sub_is_enqueued);
+ if (lovlck->lls_sub[i].sub_initialized)
+ cl_lock_fini(env, &lovlck->lls_sub[i].sub_lock);
}
- kmem_cache_free(lov_lock_kmem, lck);
-}
-
-static int lov_lock_enqueue_wait(const struct lu_env *env,
- struct lov_lock *lck,
- struct cl_lock *sublock)
-{
- struct cl_lock *lock = lck->lls_cl.cls_lock;
- int result;
-
- LASSERT(cl_lock_is_mutexed(lock));
-
- cl_lock_mutex_put(env, lock);
- result = cl_lock_enqueue_wait(env, sublock, 0);
- cl_lock_mutex_get(env, lock);
- return result ?: CLO_REPEAT;
-}
-
-/**
- * Tries to advance a state machine of a given sub-lock toward enqueuing of
- * the top-lock.
- *
- * \retval 0 if state-transition can proceed
- * \retval -ve otherwise.
- */
-static int lov_lock_enqueue_one(const struct lu_env *env, struct lov_lock *lck,
- struct cl_lock *sublock,
- struct cl_io *io, __u32 enqflags, int last)
-{
- int result;
-
- /* first, try to enqueue a sub-lock ... */
- result = cl_enqueue_try(env, sublock, io, enqflags);
- if ((sublock->cll_state == CLS_ENQUEUED) && !(enqflags & CEF_AGL)) {
- /* if it is enqueued, try to `wait' on it---maybe it's already
- * granted
- */
- result = cl_wait_try(env, sublock);
- if (result == CLO_REENQUEUED)
- result = CLO_WAIT;
- }
- /*
- * If CEF_ASYNC flag is set, then all sub-locks can be enqueued in
- * parallel, otherwise---enqueue has to wait until sub-lock is granted
- * before proceeding to the next one.
- */
- if ((result == CLO_WAIT) && (sublock->cll_state <= CLS_HELD) &&
- (enqflags & CEF_ASYNC) && (!last || (enqflags & CEF_AGL)))
- result = 0;
- return result;
-}
-
-/**
- * Helper function for lov_lock_enqueue() that creates missing sub-lock.
- */
-static int lov_sublock_fill(const struct lu_env *env, struct cl_lock *parent,
- struct cl_io *io, struct lov_lock *lck, int idx)
-{
- struct lov_lock_link *link = NULL;
- struct cl_lock *sublock;
- int result;
-
- LASSERT(parent->cll_depth == 1);
- cl_lock_mutex_put(env, parent);
- sublock = lov_sublock_alloc(env, io, lck, idx, &link);
- if (!IS_ERR(sublock))
- cl_lock_mutex_get(env, sublock);
- cl_lock_mutex_get(env, parent);
-
- if (!IS_ERR(sublock)) {
- cl_lock_get_trust(sublock);
- if (parent->cll_state == CLS_QUEUING &&
- !lck->lls_sub[idx].sub_lock) {
- lov_sublock_adopt(env, lck, sublock, idx, link);
- } else {
- kmem_cache_free(lov_lock_link_kmem, link);
- /* other thread allocated sub-lock, or enqueue is no
- * longer going on
- */
- cl_lock_mutex_put(env, parent);
- cl_lock_unhold(env, sublock, "lov-parent", parent);
- cl_lock_mutex_get(env, parent);
- }
- cl_lock_mutex_put(env, sublock);
- cl_lock_put(env, sublock);
- result = CLO_REPEAT;
- } else
- result = PTR_ERR(sublock);
- return result;
+ kvfree(lovlck);
}
/**
@@ -543,529 +221,59 @@ static int lov_sublock_fill(const struct lu_env *env, struct cl_lock *parent,
*/
static int lov_lock_enqueue(const struct lu_env *env,
const struct cl_lock_slice *slice,
- struct cl_io *io, __u32 enqflags)
+ struct cl_io *io, struct cl_sync_io *anchor)
{
- struct cl_lock *lock = slice->cls_lock;
- struct lov_lock *lck = cl2lov_lock(slice);
- struct cl_lock_closure *closure = lov_closure_get(env, lock);
+ struct cl_lock *lock = slice->cls_lock;
+ struct lov_lock *lovlck = cl2lov_lock(slice);
int i;
- int result;
- enum cl_lock_state minstate;
+ int rc = 0;
- for (result = 0, minstate = CLS_FREEING, i = 0; i < lck->lls_nr; ++i) {
- int rc;
- struct lovsub_lock *sub;
- struct lov_lock_sub *lls;
- struct cl_lock *sublock;
+ for (i = 0; i < lovlck->lls_nr; ++i) {
+ struct lov_lock_sub *lls = &lovlck->lls_sub[i];
struct lov_sublock_env *subenv;
- if (lock->cll_state != CLS_QUEUING) {
- /*
- * Lock might have left QUEUING state if previous
- * iteration released its mutex. Stop enqueing in this
- * case and let the upper layer to decide what to do.
- */
- LASSERT(i > 0 && result != 0);
- break;
- }
-
- lls = &lck->lls_sub[i];
- sub = lls->sub_lock;
- /*
- * Sub-lock might have been canceled, while top-lock was
- * cached.
- */
- if (!sub) {
- result = lov_sublock_fill(env, lock, io, lck, i);
- /* lov_sublock_fill() released @lock mutex,
- * restart.
- */
+ subenv = lov_sublock_env_get(env, lock, lls);
+ if (IS_ERR(subenv)) {
+ rc = PTR_ERR(subenv);
break;
}
- sublock = sub->lss_cl.cls_lock;
- rc = lov_sublock_lock(env, lck, lls, closure, &subenv);
- if (rc == 0) {
- lov_sublock_hold(env, lck, i);
- rc = lov_lock_enqueue_one(subenv->lse_env, lck, sublock,
- subenv->lse_io, enqflags,
- i == lck->lls_nr - 1);
- minstate = min(minstate, sublock->cll_state);
- if (rc == CLO_WAIT) {
- switch (sublock->cll_state) {
- case CLS_QUEUING:
- /* take recursive mutex, the lock is
- * released in lov_lock_enqueue_wait.
- */
- cl_lock_mutex_get(env, sublock);
- lov_sublock_unlock(env, sub, closure,
- subenv);
- rc = lov_lock_enqueue_wait(env, lck,
- sublock);
- break;
- case CLS_CACHED:
- cl_lock_get(sublock);
- /* take recursive mutex of sublock */
- cl_lock_mutex_get(env, sublock);
- /* need to release all locks in closure
- * otherwise it may deadlock. LU-2683.
- */
- lov_sublock_unlock(env, sub, closure,
- subenv);
- /* sublock and parent are held. */
- rc = lov_sublock_release(env, lck, i,
- 1, rc);
- cl_lock_mutex_put(env, sublock);
- cl_lock_put(env, sublock);
- break;
- default:
- lov_sublock_unlock(env, sub, closure,
- subenv);
- break;
- }
- } else {
- LASSERT(!sublock->cll_conflict);
- lov_sublock_unlock(env, sub, closure, subenv);
- }
- }
- result = lov_subresult(result, rc);
- if (result != 0)
+ rc = cl_lock_enqueue(subenv->lse_env, subenv->lse_io,
+ &lls->sub_lock, anchor);
+ lov_sublock_env_put(subenv);
+ if (rc != 0)
break;
- }
- cl_lock_closure_fini(closure);
- return result ?: minstate >= CLS_ENQUEUED ? 0 : CLO_WAIT;
-}
-
-static int lov_lock_unuse(const struct lu_env *env,
- const struct cl_lock_slice *slice)
-{
- struct lov_lock *lck = cl2lov_lock(slice);
- struct cl_lock_closure *closure = lov_closure_get(env, slice->cls_lock);
- int i;
- int result;
-
- for (result = 0, i = 0; i < lck->lls_nr; ++i) {
- int rc;
- struct lovsub_lock *sub;
- struct cl_lock *sublock;
- struct lov_lock_sub *lls;
- struct lov_sublock_env *subenv;
- /* top-lock state cannot change concurrently, because single
- * thread (one that released the last hold) carries unlocking
- * to the completion.
- */
- LASSERT(slice->cls_lock->cll_state == CLS_INTRANSIT);
- lls = &lck->lls_sub[i];
- sub = lls->sub_lock;
- if (!sub)
- continue;
-
- sublock = sub->lss_cl.cls_lock;
- rc = lov_sublock_lock(env, lck, lls, closure, &subenv);
- if (rc == 0) {
- if (lls->sub_flags & LSF_HELD) {
- LASSERT(sublock->cll_state == CLS_HELD ||
- sublock->cll_state == CLS_ENQUEUED);
- rc = cl_unuse_try(subenv->lse_env, sublock);
- rc = lov_sublock_release(env, lck, i, 0, rc);
- }
- lov_sublock_unlock(env, sub, closure, subenv);
- }
- result = lov_subresult(result, rc);
+ lls->sub_is_enqueued = 1;
}
-
- if (result == 0 && lck->lls_cancel_race) {
- lck->lls_cancel_race = 0;
- result = -ESTALE;
- }
- cl_lock_closure_fini(closure);
- return result;
+ return rc;
}
static void lov_lock_cancel(const struct lu_env *env,
const struct cl_lock_slice *slice)
{
- struct lov_lock *lck = cl2lov_lock(slice);
- struct cl_lock_closure *closure = lov_closure_get(env, slice->cls_lock);
+ struct cl_lock *lock = slice->cls_lock;
+ struct lov_lock *lovlck = cl2lov_lock(slice);
int i;
- int result;
- for (result = 0, i = 0; i < lck->lls_nr; ++i) {
- int rc;
- struct lovsub_lock *sub;
- struct cl_lock *sublock;
- struct lov_lock_sub *lls;
+ for (i = 0; i < lovlck->lls_nr; ++i) {
+ struct lov_lock_sub *lls = &lovlck->lls_sub[i];
+ struct cl_lock *sublock = &lls->sub_lock;
struct lov_sublock_env *subenv;
- /* top-lock state cannot change concurrently, because single
- * thread (one that released the last hold) carries unlocking
- * to the completion.
- */
- lls = &lck->lls_sub[i];
- sub = lls->sub_lock;
- if (!sub)
- continue;
-
- sublock = sub->lss_cl.cls_lock;
- rc = lov_sublock_lock(env, lck, lls, closure, &subenv);
- if (rc == 0) {
- if (!(lls->sub_flags & LSF_HELD)) {
- lov_sublock_unlock(env, sub, closure, subenv);
- continue;
- }
-
- switch (sublock->cll_state) {
- case CLS_HELD:
- rc = cl_unuse_try(subenv->lse_env, sublock);
- lov_sublock_release(env, lck, i, 0, 0);
- break;
- default:
- lov_sublock_release(env, lck, i, 1, 0);
- break;
- }
- lov_sublock_unlock(env, sub, closure, subenv);
- }
-
- if (rc == CLO_REPEAT) {
- --i;
- continue;
- }
-
- result = lov_subresult(result, rc);
- }
-
- if (result)
- CL_LOCK_DEBUG(D_ERROR, env, slice->cls_lock,
- "lov_lock_cancel fails with %d.\n", result);
-
- cl_lock_closure_fini(closure);
-}
-
-static int lov_lock_wait(const struct lu_env *env,
- const struct cl_lock_slice *slice)
-{
- struct lov_lock *lck = cl2lov_lock(slice);
- struct cl_lock_closure *closure = lov_closure_get(env, slice->cls_lock);
- enum cl_lock_state minstate;
- int reenqueued;
- int result;
- int i;
-
-again:
- for (result = 0, minstate = CLS_FREEING, i = 0, reenqueued = 0;
- i < lck->lls_nr; ++i) {
- int rc;
- struct lovsub_lock *sub;
- struct cl_lock *sublock;
- struct lov_lock_sub *lls;
- struct lov_sublock_env *subenv;
-
- lls = &lck->lls_sub[i];
- sub = lls->sub_lock;
- sublock = sub->lss_cl.cls_lock;
- rc = lov_sublock_lock(env, lck, lls, closure, &subenv);
- if (rc == 0) {
- LASSERT(sublock->cll_state >= CLS_ENQUEUED);
- if (sublock->cll_state < CLS_HELD)
- rc = cl_wait_try(env, sublock);
-
- minstate = min(minstate, sublock->cll_state);
- lov_sublock_unlock(env, sub, closure, subenv);
- }
- if (rc == CLO_REENQUEUED) {
- reenqueued++;
- rc = 0;
- }
- result = lov_subresult(result, rc);
- if (result != 0)
- break;
- }
- /* Each sublock only can be reenqueued once, so will not loop
- * forever.
- */
- if (result == 0 && reenqueued != 0)
- goto again;
- cl_lock_closure_fini(closure);
- return result ?: minstate >= CLS_HELD ? 0 : CLO_WAIT;
-}
-
-static int lov_lock_use(const struct lu_env *env,
- const struct cl_lock_slice *slice)
-{
- struct lov_lock *lck = cl2lov_lock(slice);
- struct cl_lock_closure *closure = lov_closure_get(env, slice->cls_lock);
- int result;
- int i;
-
- LASSERT(slice->cls_lock->cll_state == CLS_INTRANSIT);
-
- for (result = 0, i = 0; i < lck->lls_nr; ++i) {
- int rc;
- struct lovsub_lock *sub;
- struct cl_lock *sublock;
- struct lov_lock_sub *lls;
- struct lov_sublock_env *subenv;
-
- LASSERT(slice->cls_lock->cll_state == CLS_INTRANSIT);
-
- lls = &lck->lls_sub[i];
- sub = lls->sub_lock;
- if (!sub) {
- /*
- * Sub-lock might have been canceled, while top-lock was
- * cached.
- */
- result = -ESTALE;
- break;
- }
-
- sublock = sub->lss_cl.cls_lock;
- rc = lov_sublock_lock(env, lck, lls, closure, &subenv);
- if (rc == 0) {
- LASSERT(sublock->cll_state != CLS_FREEING);
- lov_sublock_hold(env, lck, i);
- if (sublock->cll_state == CLS_CACHED) {
- rc = cl_use_try(subenv->lse_env, sublock, 0);
- if (rc != 0)
- rc = lov_sublock_release(env, lck,
- i, 1, rc);
- } else if (sublock->cll_state == CLS_NEW) {
- /* Sub-lock might have been canceled, while
- * top-lock was cached.
- */
- result = -ESTALE;
- lov_sublock_release(env, lck, i, 1, result);
- }
- lov_sublock_unlock(env, sub, closure, subenv);
- }
- result = lov_subresult(result, rc);
- if (result != 0)
- break;
- }
-
- if (lck->lls_cancel_race) {
- /*
- * If there is unlocking happened at the same time, then
- * sublock_lock state should be FREEING, and lov_sublock_lock
- * should return CLO_REPEAT. In this case, it should return
- * ESTALE, and up layer should reset the lock state to be NEW.
- */
- lck->lls_cancel_race = 0;
- LASSERT(result != 0);
- result = -ESTALE;
- }
- cl_lock_closure_fini(closure);
- return result;
-}
-
-/**
- * Check if the extent region \a descr is covered by \a child against the
- * specific \a stripe.
- */
-static int lov_lock_stripe_is_matching(const struct lu_env *env,
- struct lov_object *lov, int stripe,
- const struct cl_lock_descr *child,
- const struct cl_lock_descr *descr)
-{
- struct lov_stripe_md *lsm = lov->lo_lsm;
- u64 start;
- u64 end;
- int result;
-
- if (lov_r0(lov)->lo_nr == 1)
- return cl_lock_ext_match(child, descr);
-
- /*
- * For a multi-stripes object:
- * - make sure the descr only covers child's stripe, and
- * - check if extent is matching.
- */
- start = cl_offset(&lov->lo_cl, descr->cld_start);
- end = cl_offset(&lov->lo_cl, descr->cld_end + 1) - 1;
- result = 0;
- /* glimpse should work on the object with LOV EA hole. */
- if (end - start <= lsm->lsm_stripe_size) {
- int idx;
-
- idx = lov_stripe_number(lsm, start);
- if (idx == stripe ||
- unlikely(!lov_r0(lov)->lo_sub[idx])) {
- idx = lov_stripe_number(lsm, end);
- if (idx == stripe ||
- unlikely(!lov_r0(lov)->lo_sub[idx]))
- result = 1;
- }
- }
-
- if (result != 0) {
- struct cl_lock_descr *subd = &lov_env_info(env)->lti_ldescr;
- u64 sub_start;
- u64 sub_end;
-
- subd->cld_obj = NULL; /* don't need sub object at all */
- subd->cld_mode = descr->cld_mode;
- subd->cld_gid = descr->cld_gid;
- result = lov_stripe_intersects(lsm, stripe, start, end,
- &sub_start, &sub_end);
- LASSERT(result);
- subd->cld_start = cl_index(child->cld_obj, sub_start);
- subd->cld_end = cl_index(child->cld_obj, sub_end);
- result = cl_lock_ext_match(child, subd);
- }
- return result;
-}
-
-/**
- * An implementation of cl_lock_operations::clo_fits_into() method.
- *
- * Checks whether a lock (given by \a slice) is suitable for \a
- * io. Multi-stripe locks can be used only for "quick" io, like truncate, or
- * O_APPEND write.
- *
- * \see ccc_lock_fits_into().
- */
-static int lov_lock_fits_into(const struct lu_env *env,
- const struct cl_lock_slice *slice,
- const struct cl_lock_descr *need,
- const struct cl_io *io)
-{
- struct lov_lock *lov = cl2lov_lock(slice);
- struct lov_object *obj = cl2lov(slice->cls_obj);
- int result;
-
- LASSERT(cl_object_same(need->cld_obj, slice->cls_obj));
- LASSERT(lov->lls_nr > 0);
-
- /* for top lock, it's necessary to match enq flags otherwise it will
- * run into problem if a sublock is missing and reenqueue.
- */
- if (need->cld_enq_flags != lov->lls_orig.cld_enq_flags)
- return 0;
-
- if (need->cld_mode == CLM_GROUP)
- /*
- * always allow to match group lock.
- */
- result = cl_lock_ext_match(&lov->lls_orig, need);
- else if (lov->lls_nr == 1) {
- struct cl_lock_descr *got = &lov->lls_sub[0].sub_got;
-
- result = lov_lock_stripe_is_matching(env,
- cl2lov(slice->cls_obj),
- lov->lls_sub[0].sub_stripe,
- got, need);
- } else if (io->ci_type != CIT_SETATTR && io->ci_type != CIT_MISC &&
- !cl_io_is_append(io) && need->cld_mode != CLM_PHANTOM)
- /*
- * Multi-stripe locks are only suitable for `quick' IO and for
- * glimpse.
- */
- result = 0;
- else
- /*
- * Most general case: multi-stripe existing lock, and
- * (potentially) multi-stripe @need lock. Check that @need is
- * covered by @lov's sub-locks.
- *
- * For now, ignore lock expansions made by the server, and
- * match against original lock extent.
- */
- result = cl_lock_ext_match(&lov->lls_orig, need);
- CDEBUG(D_DLMTRACE, DDESCR"/"DDESCR" %d %d/%d: %d\n",
- PDESCR(&lov->lls_orig), PDESCR(&lov->lls_sub[0].sub_got),
- lov->lls_sub[0].sub_stripe, lov->lls_nr, lov_r0(obj)->lo_nr,
- result);
- return result;
-}
-
-void lov_lock_unlink(const struct lu_env *env,
- struct lov_lock_link *link, struct lovsub_lock *sub)
-{
- struct lov_lock *lck = link->lll_super;
- struct cl_lock *parent = lck->lls_cl.cls_lock;
-
- LASSERT(cl_lock_is_mutexed(parent));
- LASSERT(cl_lock_is_mutexed(sub->lss_cl.cls_lock));
-
- list_del_init(&link->lll_list);
- LASSERT(lck->lls_sub[link->lll_idx].sub_lock == sub);
- /* yank this sub-lock from parent's array */
- lck->lls_sub[link->lll_idx].sub_lock = NULL;
- LASSERT(lck->lls_nr_filled > 0);
- lck->lls_nr_filled--;
- lu_ref_del(&parent->cll_reference, "lov-child", sub->lss_cl.cls_lock);
- cl_lock_put(env, parent);
- kmem_cache_free(lov_lock_link_kmem, link);
-}
-
-struct lov_lock_link *lov_lock_link_find(const struct lu_env *env,
- struct lov_lock *lck,
- struct lovsub_lock *sub)
-{
- struct lov_lock_link *scan;
-
- LASSERT(cl_lock_is_mutexed(sub->lss_cl.cls_lock));
-
- list_for_each_entry(scan, &sub->lss_parents, lll_list) {
- if (scan->lll_super == lck)
- return scan;
- }
- return NULL;
-}
-
-/**
- * An implementation of cl_lock_operations::clo_delete() method. This is
- * invoked for "top-to-bottom" delete, when lock destruction starts from the
- * top-lock, e.g., as a result of inode destruction.
- *
- * Unlinks top-lock from all its sub-locks. Sub-locks are not deleted there:
- * this is done separately elsewhere:
- *
- * - for inode destruction, lov_object_delete() calls cl_object_kill() for
- * each sub-object, purging its locks;
- *
- * - in other cases (e.g., a fatal error with a top-lock) sub-locks are
- * left in the cache.
- */
-static void lov_lock_delete(const struct lu_env *env,
- const struct cl_lock_slice *slice)
-{
- struct lov_lock *lck = cl2lov_lock(slice);
- struct cl_lock_closure *closure = lov_closure_get(env, slice->cls_lock);
- struct lov_lock_link *link;
- int rc;
- int i;
-
- LASSERT(slice->cls_lock->cll_state == CLS_FREEING);
-
- for (i = 0; i < lck->lls_nr; ++i) {
- struct lov_lock_sub *lls = &lck->lls_sub[i];
- struct lovsub_lock *lsl = lls->sub_lock;
-
- if (!lsl) /* already removed */
+ if (!lls->sub_is_enqueued)
continue;
- rc = lov_sublock_lock(env, lck, lls, closure, NULL);
- if (rc == CLO_REPEAT) {
- --i;
- continue;
+ lls->sub_is_enqueued = 0;
+ subenv = lov_sublock_env_get(env, lock, lls);
+ if (!IS_ERR(subenv)) {
+ cl_lock_cancel(subenv->lse_env, sublock);
+ lov_sublock_env_put(subenv);
+ } else {
+ CL_LOCK_DEBUG(D_ERROR, env, slice->cls_lock,
+ "lov_lock_cancel fails with %ld.\n",
+ PTR_ERR(subenv));
}
-
- LASSERT(rc == 0);
- LASSERT(lsl->lss_cl.cls_lock->cll_state < CLS_FREEING);
-
- if (lls->sub_flags & LSF_HELD)
- lov_sublock_release(env, lck, i, 1, 0);
-
- link = lov_lock_link_find(env, lck, lsl);
- LASSERT(link);
- lov_lock_unlink(env, link, lsl);
- LASSERT(!lck->lls_sub[i].sub_lock);
-
- lov_sublock_unlock(env, lsl, closure, NULL);
}
-
- cl_lock_closure_fini(closure);
}
static int lov_lock_print(const struct lu_env *env, void *cookie,
@@ -1079,12 +287,8 @@ static int lov_lock_print(const struct lu_env *env, void *cookie,
struct lov_lock_sub *sub;
sub = &lck->lls_sub[i];
- (*p)(env, cookie, " %d %x: ", i, sub->sub_flags);
- if (sub->sub_lock)
- cl_lock_print(env, cookie, p,
- sub->sub_lock->lss_cl.cls_lock);
- else
- (*p)(env, cookie, "---\n");
+ (*p)(env, cookie, " %d %x: ", i, sub->sub_is_enqueued);
+ cl_lock_print(env, cookie, p, &sub->sub_lock);
}
return 0;
}
@@ -1092,12 +296,7 @@ static int lov_lock_print(const struct lu_env *env, void *cookie,
static const struct cl_lock_operations lov_lock_ops = {
.clo_fini = lov_lock_fini,
.clo_enqueue = lov_lock_enqueue,
- .clo_wait = lov_lock_wait,
- .clo_use = lov_lock_use,
- .clo_unuse = lov_lock_unuse,
.clo_cancel = lov_lock_cancel,
- .clo_fits_into = lov_lock_fits_into,
- .clo_delete = lov_lock_delete,
.clo_print = lov_lock_print
};
@@ -1105,14 +304,13 @@ int lov_lock_init_raid0(const struct lu_env *env, struct cl_object *obj,
struct cl_lock *lock, const struct cl_io *io)
{
struct lov_lock *lck;
- int result;
+ int result = 0;
- lck = kmem_cache_zalloc(lov_lock_kmem, GFP_NOFS);
- if (lck) {
+ lck = lov_lock_sub_init(env, obj, lock);
+ if (!IS_ERR(lck))
cl_lock_slice_add(lock, &lck->lls_cl, obj, &lov_lock_ops);
- result = lov_lock_sub_init(env, lck, io);
- } else
- result = -ENOMEM;
+ else
+ result = PTR_ERR(lck);
return result;
}
@@ -1147,21 +345,9 @@ int lov_lock_init_empty(const struct lu_env *env, struct cl_object *obj,
lck = kmem_cache_zalloc(lov_lock_kmem, GFP_NOFS);
if (lck) {
cl_lock_slice_add(lock, &lck->lls_cl, obj, &lov_empty_lock_ops);
- lck->lls_orig = lock->cll_descr;
result = 0;
}
return result;
}
-static struct cl_lock_closure *lov_closure_get(const struct lu_env *env,
- struct cl_lock *parent)
-{
- struct cl_lock_closure *closure;
-
- closure = &lov_env_info(env)->lti_closure;
- LASSERT(list_empty(&closure->clc_list));
- cl_lock_closure_init(env, closure, parent, 1);
- return closure;
-}
-
/** @} lov */
diff --git a/drivers/staging/lustre/lustre/lov/lov_merge.c b/drivers/staging/lustre/lustre/lov/lov_merge.c
index 029cd4d62796..b9c90865fdfc 100644
--- a/drivers/staging/lustre/lustre/lov/lov_merge.c
+++ b/drivers/staging/lustre/lustre/lov/lov_merge.c
@@ -15,11 +15,7 @@
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
*
* GPL HEADER END
*/
@@ -154,6 +150,7 @@ void lov_merge_attrs(struct obdo *tgt, struct obdo *src, u64 valid,
valid &= src->o_valid;
if (*set) {
+ tgt->o_valid &= valid;
if (valid & OBD_MD_FLSIZE) {
/* this handles sparse files properly */
u64 lov_size;
@@ -172,12 +169,22 @@ void lov_merge_attrs(struct obdo *tgt, struct obdo *src, u64 valid,
tgt->o_mtime = src->o_mtime;
if (valid & OBD_MD_FLDATAVERSION)
tgt->o_data_version += src->o_data_version;
+
+ /* handle flags */
+ if (valid & OBD_MD_FLFLAGS)
+ tgt->o_flags &= src->o_flags;
+ else
+ tgt->o_flags = 0;
} else {
memcpy(tgt, src, sizeof(*tgt));
tgt->o_oi = lsm->lsm_oi;
+ tgt->o_valid = valid;
if (valid & OBD_MD_FLSIZE)
tgt->o_size = lov_stripe_size(lsm, src->o_size,
stripeno);
+ tgt->o_flags = 0;
+ if (valid & OBD_MD_FLFLAGS)
+ tgt->o_flags = src->o_flags;
}
/* data_version needs to be valid on all stripes to be correct! */
diff --git a/drivers/staging/lustre/lustre/lov/lov_obd.c b/drivers/staging/lustre/lustre/lov/lov_obd.c
index 5daa7faf4dda..9b92d5522edb 100644
--- a/drivers/staging/lustre/lustre/lov/lov_obd.c
+++ b/drivers/staging/lustre/lustre/lov/lov_obd.c
@@ -15,11 +15,7 @@
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
*
* GPL HEADER END
*/
@@ -54,7 +50,6 @@
#include "../include/lprocfs_status.h"
#include "../include/lustre_param.h"
#include "../include/cl_object.h"
-#include "../include/lclient.h" /* for cl_client_lru */
#include "../include/lustre/ll_fiemap.h"
#include "../include/lustre_fid.h"
@@ -124,7 +119,6 @@ static int lov_set_osc_active(struct obd_device *obd, struct obd_uuid *uuid,
static int lov_notify(struct obd_device *obd, struct obd_device *watched,
enum obd_notify_event ev, void *data);
-#define MAX_STRING_SIZE 128
int lov_connect_obd(struct obd_device *obd, __u32 index, int activate,
struct obd_connect_data *data)
{
@@ -898,6 +892,12 @@ static int lov_cleanup(struct obd_device *obd)
kfree(lov->lov_tgts);
lov->lov_tgt_size = 0;
}
+
+ if (lov->lov_cache) {
+ cl_cache_decref(lov->lov_cache);
+ lov->lov_cache = NULL;
+ }
+
return 0;
}
@@ -965,7 +965,6 @@ int lov_process_config_base(struct obd_device *obd, struct lustre_cfg *lcfg,
CERROR("Unknown command: %d\n", lcfg->lcfg_command);
rc = -EINVAL;
goto out;
-
}
}
out:
@@ -1734,6 +1733,27 @@ static int lov_fiemap(struct lov_obd *lov, __u32 keylen, void *key,
unsigned int buffer_size = FIEMAP_BUFFER_SIZE;
if (!lsm_has_objects(lsm)) {
+ if (lsm && lsm_is_released(lsm) && (fm_key->fiemap.fm_start <
+ fm_key->oa.o_size)) {
+ /*
+ * released file, return a minimal FIEMAP if
+ * request fits in file-size.
+ */
+ fiemap->fm_mapped_extents = 1;
+ fiemap->fm_extents[0].fe_logical =
+ fm_key->fiemap.fm_start;
+ if (fm_key->fiemap.fm_start + fm_key->fiemap.fm_length <
+ fm_key->oa.o_size) {
+ fiemap->fm_extents[0].fe_length =
+ fm_key->fiemap.fm_length;
+ } else {
+ fiemap->fm_extents[0].fe_length =
+ fm_key->oa.o_size - fm_key->fiemap.fm_start;
+ fiemap->fm_extents[0].fe_flags |=
+ (FIEMAP_EXTENT_UNKNOWN |
+ FIEMAP_EXTENT_LAST);
+ }
+ }
rc = 0;
goto out;
}
@@ -1754,7 +1774,8 @@ static int lov_fiemap(struct lov_obd *lov, __u32 keylen, void *key,
fm_start = fiemap->fm_start;
fm_length = fiemap->fm_length;
/* Calculate start stripe, last stripe and length of mapping */
- actual_start_stripe = start_stripe = lov_stripe_number(lsm, fm_start);
+ start_stripe = lov_stripe_number(lsm, fm_start);
+ actual_start_stripe = start_stripe;
fm_end = (fm_length == ~0ULL ? fm_key->oa.o_size :
fm_start + fm_length - 1);
/* If fm_length != ~0ULL but fm_start+fm_length-1 exceeds file size */
@@ -2077,11 +2098,9 @@ static int lov_set_info_async(const struct lu_env *env, struct obd_export *exp,
u32 count;
int i, rc = 0, err;
struct lov_tgt_desc *tgt;
- unsigned incr, check_uuid,
- do_inactive, no_set;
- unsigned next_id = 0, mds_con = 0;
+ unsigned int incr = 0, check_uuid = 0, do_inactive = 0, no_set = 0;
+ unsigned int next_id = 0, mds_con = 0;
- incr = check_uuid = do_inactive = no_set = 0;
if (!set) {
no_set = 1;
set = ptlrpc_prep_set();
@@ -2108,6 +2127,7 @@ static int lov_set_info_async(const struct lu_env *env, struct obd_export *exp,
LASSERT(!lov->lov_cache);
lov->lov_cache = val;
do_inactive = 1;
+ cl_cache_incref(lov->lov_cache);
}
for (i = 0; i < count; i++, val = (char *)val + incr) {
@@ -2173,7 +2193,6 @@ void lov_stripe_lock(struct lov_stripe_md *md)
LASSERT(md->lsm_lock_owner == 0);
md->lsm_lock_owner = current_pid();
}
-EXPORT_SYMBOL(lov_stripe_lock);
void lov_stripe_unlock(struct lov_stripe_md *md)
__releases(&md->lsm_lock)
@@ -2182,7 +2201,6 @@ void lov_stripe_unlock(struct lov_stripe_md *md)
md->lsm_lock_owner = 0;
spin_unlock(&md->lsm_lock);
}
-EXPORT_SYMBOL(lov_stripe_unlock);
static int lov_quotactl(struct obd_device *obd, struct obd_export *exp,
struct obd_quotactl *oqctl)
diff --git a/drivers/staging/lustre/lustre/lov/lov_object.c b/drivers/staging/lustre/lustre/lov/lov_object.c
index 1f8ed95a6d89..f9621b0fd469 100644
--- a/drivers/staging/lustre/lustre/lov/lov_object.c
+++ b/drivers/staging/lustre/lustre/lov/lov_object.c
@@ -15,11 +15,7 @@
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
*
* GPL HEADER END
*/
@@ -67,7 +63,7 @@ struct lov_layout_operations {
int (*llo_print)(const struct lu_env *env, void *cookie,
lu_printer_t p, const struct lu_object *o);
int (*llo_page_init)(const struct lu_env *env, struct cl_object *obj,
- struct cl_page *page, struct page *vmpage);
+ struct cl_page *page, pgoff_t index);
int (*llo_lock_init)(const struct lu_env *env,
struct cl_object *obj, struct cl_lock *lock,
const struct cl_io *io);
@@ -185,14 +181,26 @@ static int lov_init_sub(const struct lu_env *env, struct lov_object *lov,
}
LU_OBJECT_DEBUG(mask, env, &stripe->co_lu,
- "stripe %d is already owned.\n", idx);
- LU_OBJECT_DEBUG(mask, env, old_obj, "owned.\n");
+ "stripe %d is already owned.", idx);
+ LU_OBJECT_DEBUG(mask, env, old_obj, "owned.");
LU_OBJECT_HEADER(mask, env, lov2lu(lov), "try to own.\n");
cl_object_put(env, stripe);
}
return result;
}
+static int lov_page_slice_fixup(struct lov_object *lov,
+ struct cl_object *stripe)
+{
+ struct cl_object_header *hdr = cl_object_header(&lov->lo_cl);
+ struct cl_object *o;
+
+ cl_object_for_each(o, stripe)
+ o->co_slice_off += hdr->coh_page_bufsize;
+
+ return cl_object_header(stripe)->coh_page_bufsize;
+}
+
static int lov_init_raid0(const struct lu_env *env,
struct lov_device *dev, struct lov_object *lov,
const struct cl_object_conf *conf,
@@ -222,6 +230,8 @@ static int lov_init_raid0(const struct lu_env *env,
r0->lo_sub = libcfs_kvzalloc(r0->lo_nr * sizeof(r0->lo_sub[0]),
GFP_NOFS);
if (r0->lo_sub) {
+ int psz = 0;
+
result = 0;
subconf->coc_inode = conf->coc_inode;
spin_lock_init(&r0->lo_sub_lock);
@@ -254,13 +264,24 @@ static int lov_init_raid0(const struct lu_env *env,
if (result == -EAGAIN) { /* try again */
--i;
result = 0;
+ continue;
}
} else {
result = PTR_ERR(stripe);
}
+
+ if (result == 0) {
+ int sz = lov_page_slice_fixup(lov, stripe);
+
+ LASSERT(ergo(psz > 0, psz == sz));
+ psz = sz;
+ }
}
- } else
+ if (result == 0)
+ cl_object_header(&lov->lo_cl)->coh_page_bufsize += psz;
+ } else {
result = -ENOMEM;
+ }
out:
return result;
}
@@ -286,8 +307,6 @@ static int lov_delete_empty(const struct lu_env *env, struct lov_object *lov,
LASSERT(lov->lo_type == LLT_EMPTY || lov->lo_type == LLT_RELEASED);
lov_layout_wait(env, lov);
-
- cl_object_prune(env, &lov->lo_cl);
return 0;
}
@@ -355,7 +374,7 @@ static int lov_delete_raid0(const struct lu_env *env, struct lov_object *lov,
struct lovsub_object *los = r0->lo_sub[i];
if (los) {
- cl_locks_prune(env, &los->lso_cl, 1);
+ cl_object_prune(env, &los->lso_cl);
/*
* If top-level object is to be evicted from
* the cache, so are its sub-objects.
@@ -364,7 +383,6 @@ static int lov_delete_raid0(const struct lu_env *env, struct lov_object *lov,
}
}
}
- cl_object_prune(env, &lov->lo_cl);
return 0;
}
@@ -666,7 +684,6 @@ static int lov_layout_change(const struct lu_env *unused,
const struct lov_layout_operations *old_ops;
const struct lov_layout_operations *new_ops;
- struct cl_object_header *hdr = cl_object_header(&lov->lo_cl);
void *cookie;
struct lu_env *env;
int refcheck;
@@ -691,13 +708,15 @@ static int lov_layout_change(const struct lu_env *unused,
old_ops = &lov_dispatch[lov->lo_type];
new_ops = &lov_dispatch[llt];
+ result = cl_object_prune(env, &lov->lo_cl);
+ if (result != 0)
+ goto out;
+
result = old_ops->llo_delete(env, lov, &lov->u);
if (result == 0) {
old_ops->llo_fini(env, lov, &lov->u);
LASSERT(atomic_read(&lov->lo_active_ios) == 0);
- LASSERT(!hdr->coh_tree.rnode);
- LASSERT(hdr->coh_pages == 0);
lov->lo_type = LLT_EMPTY;
result = new_ops->llo_init(env,
@@ -713,6 +732,7 @@ static int lov_layout_change(const struct lu_env *unused,
}
}
+out:
cl_env_put(env, &refcheck);
cl_env_reexit(cookie);
return result;
@@ -793,7 +813,8 @@ static int lov_conf_set(const struct lu_env *env, struct cl_object *obj,
goto out;
}
- lov->lo_layout_invalid = lov_layout_change(env, lov, conf);
+ result = lov_layout_change(env, lov, conf);
+ lov->lo_layout_invalid = result != 0;
out:
lov_conf_unlock(lov);
@@ -825,10 +846,10 @@ static int lov_object_print(const struct lu_env *env, void *cookie,
}
int lov_page_init(const struct lu_env *env, struct cl_object *obj,
- struct cl_page *page, struct page *vmpage)
+ struct cl_page *page, pgoff_t index)
{
- return LOV_2DISPATCH_NOLOCK(cl2lov(obj),
- llo_page_init, env, obj, page, vmpage);
+ return LOV_2DISPATCH_NOLOCK(cl2lov(obj), llo_page_init, env, obj, page,
+ index);
}
/**
@@ -911,8 +932,9 @@ struct lu_object *lov_object_alloc(const struct lu_env *env,
* for object with different layouts.
*/
obj->lo_ops = &lov_lu_obj_ops;
- } else
+ } else {
obj = NULL;
+ }
return obj;
}
diff --git a/drivers/staging/lustre/lustre/lov/lov_offset.c b/drivers/staging/lustre/lustre/lov/lov_offset.c
index ae83eb0f6f36..ecca74fbff00 100644
--- a/drivers/staging/lustre/lustre/lov/lov_offset.c
+++ b/drivers/staging/lustre/lustre/lov/lov_offset.c
@@ -15,11 +15,7 @@
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
*
* GPL HEADER END
*/
@@ -66,6 +62,18 @@ u64 lov_stripe_size(struct lov_stripe_md *lsm, u64 ost_size, int stripeno)
return lov_size;
}
+/**
+ * Compute file level page index by stripe level page offset
+ */
+pgoff_t lov_stripe_pgoff(struct lov_stripe_md *lsm, pgoff_t stripe_index,
+ int stripe)
+{
+ loff_t offset;
+
+ offset = lov_stripe_size(lsm, (stripe_index << PAGE_SHIFT) + 1, stripe);
+ return offset >> PAGE_SHIFT;
+}
+
/* we have an offset in file backed by an lov and want to find out where
* that offset lands in our given stripe of the file. for the easy
* case where the offset is within the stripe, we just have to scale the
diff --git a/drivers/staging/lustre/lustre/lov/lov_pack.c b/drivers/staging/lustre/lustre/lov/lov_pack.c
index 3925633a99ec..869ef41b13ca 100644
--- a/drivers/staging/lustre/lustre/lov/lov_pack.c
+++ b/drivers/staging/lustre/lustre/lov/lov_pack.c
@@ -15,11 +15,7 @@
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
*
* GPL HEADER END
*/
@@ -136,7 +132,6 @@ int lov_packmd(struct obd_export *exp, struct lov_mds_md **lmmp,
CERROR("bad mem LOV MAGIC: 0x%08X != 0x%08X nor 0x%08X\n",
lmm_magic, LOV_MAGIC_V1, LOV_MAGIC_V3);
return -EINVAL;
-
}
if (lsm) {
@@ -444,8 +439,7 @@ int lov_getstripe(struct obd_export *exp, struct lov_stripe_md *lsm,
if (lum.lmm_magic == LOV_USER_MAGIC) {
/* User request for v1, we need skip lmm_pool_name */
if (lmmk->lmm_magic == LOV_MAGIC_V3) {
- memmove((char *)(&lmmk->lmm_stripe_count) +
- sizeof(lmmk->lmm_stripe_count),
+ memmove(((struct lov_mds_md_v1 *)lmmk)->lmm_objects,
((struct lov_mds_md_v3 *)lmmk)->lmm_objects,
lmmk->lmm_stripe_count *
sizeof(struct lov_ost_data_v1));
@@ -457,9 +451,9 @@ int lov_getstripe(struct obd_export *exp, struct lov_stripe_md *lsm,
}
/* User wasn't expecting this many OST entries */
- if (lum.lmm_stripe_count == 0)
+ if (lum.lmm_stripe_count == 0) {
lmm_size = lum_size;
- else if (lum.lmm_stripe_count < lmmk->lmm_stripe_count) {
+ } else if (lum.lmm_stripe_count < lmmk->lmm_stripe_count) {
rc = -EOVERFLOW;
goto out_set;
}
diff --git a/drivers/staging/lustre/lustre/lov/lov_page.c b/drivers/staging/lustre/lustre/lov/lov_page.c
index fdcaf8047ad8..c17026f14896 100644
--- a/drivers/staging/lustre/lustre/lov/lov_page.c
+++ b/drivers/staging/lustre/lustre/lov/lov_page.c
@@ -15,11 +15,7 @@
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
*
* GPL HEADER END
*/
@@ -36,6 +32,7 @@
* Implementation of cl_page for LOV layer.
*
* Author: Nikita Danilov <nikita.danilov@sun.com>
+ * Author: Jinshan Xiong <jinshan.xiong@intel.com>
*/
#define DEBUG_SUBSYSTEM S_LOV
@@ -52,116 +49,66 @@
*
*/
-static int lov_page_invariant(const struct cl_page_slice *slice)
+/**
+ * Adjust the stripe index by layout of raid0. @max_index is the maximum
+ * page index covered by an underlying DLM lock.
+ * This function converts max_index from stripe level to file level, and make
+ * sure it's not beyond one stripe.
+ */
+static int lov_raid0_page_is_under_lock(const struct lu_env *env,
+ const struct cl_page_slice *slice,
+ struct cl_io *unused,
+ pgoff_t *max_index)
{
- const struct cl_page *page = slice->cpl_page;
- const struct cl_page *sub = lov_sub_page(slice);
+ struct lov_object *loo = cl2lov(slice->cpl_obj);
+ struct lov_layout_raid0 *r0 = lov_r0(loo);
+ pgoff_t index = *max_index;
+ unsigned int pps; /* pages per stripe */
- return ergo(sub,
- page->cp_child == sub &&
- sub->cp_parent == page &&
- page->cp_state == sub->cp_state);
-}
+ CDEBUG(D_READA, "*max_index = %lu, nr = %d\n", index, r0->lo_nr);
+ if (index == 0) /* the page is not covered by any lock */
+ return 0;
-static void lov_page_fini(const struct lu_env *env,
- struct cl_page_slice *slice)
-{
- struct cl_page *sub = lov_sub_page(slice);
+ if (r0->lo_nr == 1) /* single stripe file */
+ return 0;
- LINVRNT(lov_page_invariant(slice));
-
- if (sub) {
- LASSERT(sub->cp_state == CPS_FREEING);
- lu_ref_del(&sub->cp_reference, "lov", sub->cp_parent);
- sub->cp_parent = NULL;
- slice->cpl_page->cp_child = NULL;
- cl_page_put(env, sub);
+ /* max_index is stripe level, convert it into file level */
+ if (index != CL_PAGE_EOF) {
+ int stripeno = lov_page_stripe(slice->cpl_page);
+ *max_index = lov_stripe_pgoff(loo->lo_lsm, index, stripeno);
}
-}
-
-static int lov_page_own(const struct lu_env *env,
- const struct cl_page_slice *slice, struct cl_io *io,
- int nonblock)
-{
- struct lov_io *lio = lov_env_io(env);
- struct lov_io_sub *sub;
- LINVRNT(lov_page_invariant(slice));
- LINVRNT(!cl2lov_page(slice)->lps_invalid);
+ /* calculate the end of current stripe */
+ pps = loo->lo_lsm->lsm_stripe_size >> PAGE_SHIFT;
+ index = ((slice->cpl_index + pps) & ~(pps - 1)) - 1;
- sub = lov_page_subio(env, lio, slice);
- if (!IS_ERR(sub)) {
- lov_sub_page(slice)->cp_owner = sub->sub_io;
- lov_sub_put(sub);
- } else
- LBUG(); /* Arrgh */
+ /* never exceed the end of the stripe */
+ *max_index = min_t(pgoff_t, *max_index, index);
return 0;
}
-static void lov_page_assume(const struct lu_env *env,
- const struct cl_page_slice *slice, struct cl_io *io)
-{
- lov_page_own(env, slice, io, 0);
-}
-
-static int lov_page_cache_add(const struct lu_env *env,
- const struct cl_page_slice *slice,
- struct cl_io *io)
-{
- struct lov_io *lio = lov_env_io(env);
- struct lov_io_sub *sub;
- int rc = 0;
-
- LINVRNT(lov_page_invariant(slice));
- LINVRNT(!cl2lov_page(slice)->lps_invalid);
-
- sub = lov_page_subio(env, lio, slice);
- if (!IS_ERR(sub)) {
- rc = cl_page_cache_add(sub->sub_env, sub->sub_io,
- slice->cpl_page->cp_child, CRT_WRITE);
- lov_sub_put(sub);
- } else {
- rc = PTR_ERR(sub);
- CL_PAGE_DEBUG(D_ERROR, env, slice->cpl_page, "rc = %d\n", rc);
- }
- return rc;
-}
-
-static int lov_page_print(const struct lu_env *env,
- const struct cl_page_slice *slice,
- void *cookie, lu_printer_t printer)
+static int lov_raid0_page_print(const struct lu_env *env,
+ const struct cl_page_slice *slice,
+ void *cookie, lu_printer_t printer)
{
struct lov_page *lp = cl2lov_page(slice);
- return (*printer)(env, cookie, LUSTRE_LOV_NAME"-page@%p\n", lp);
+ return (*printer)(env, cookie, LUSTRE_LOV_NAME "-page@%p, raid0\n", lp);
}
-static const struct cl_page_operations lov_page_ops = {
- .cpo_fini = lov_page_fini,
- .cpo_own = lov_page_own,
- .cpo_assume = lov_page_assume,
- .io = {
- [CRT_WRITE] = {
- .cpo_cache_add = lov_page_cache_add
- }
- },
- .cpo_print = lov_page_print
+static const struct cl_page_operations lov_raid0_page_ops = {
+ .cpo_is_under_lock = lov_raid0_page_is_under_lock,
+ .cpo_print = lov_raid0_page_print
};
-static void lov_empty_page_fini(const struct lu_env *env,
- struct cl_page_slice *slice)
-{
- LASSERT(!slice->cpl_page->cp_child);
-}
-
int lov_page_init_raid0(const struct lu_env *env, struct cl_object *obj,
- struct cl_page *page, struct page *vmpage)
+ struct cl_page *page, pgoff_t index)
{
struct lov_object *loo = cl2lov(obj);
struct lov_layout_raid0 *r0 = lov_r0(loo);
struct lov_io *lio = lov_env_io(env);
- struct cl_page *subpage;
struct cl_object *subobj;
+ struct cl_object *o;
struct lov_io_sub *sub;
struct lov_page *lpg = cl_object_page_slice(obj, page);
loff_t offset;
@@ -169,59 +116,57 @@ int lov_page_init_raid0(const struct lu_env *env, struct cl_object *obj,
int stripe;
int rc;
- offset = cl_offset(obj, page->cp_index);
+ offset = cl_offset(obj, index);
stripe = lov_stripe_number(loo->lo_lsm, offset);
LASSERT(stripe < r0->lo_nr);
rc = lov_stripe_offset(loo->lo_lsm, offset, stripe, &suboff);
LASSERT(rc == 0);
- lpg->lps_invalid = 1;
- cl_page_slice_add(page, &lpg->lps_cl, obj, &lov_page_ops);
+ cl_page_slice_add(page, &lpg->lps_cl, obj, index, &lov_raid0_page_ops);
sub = lov_sub_get(env, lio, stripe);
- if (IS_ERR(sub)) {
- rc = PTR_ERR(sub);
- goto out;
- }
+ if (IS_ERR(sub))
+ return PTR_ERR(sub);
subobj = lovsub2cl(r0->lo_sub[stripe]);
- subpage = cl_page_find_sub(sub->sub_env, subobj,
- cl_index(subobj, suboff), vmpage, page);
- lov_sub_put(sub);
- if (IS_ERR(subpage)) {
- rc = PTR_ERR(subpage);
- goto out;
- }
-
- if (likely(subpage->cp_parent == page)) {
- lu_ref_add(&subpage->cp_reference, "lov", page);
- lpg->lps_invalid = 0;
- rc = 0;
- } else {
- CL_PAGE_DEBUG(D_ERROR, env, page, "parent page\n");
- CL_PAGE_DEBUG(D_ERROR, env, subpage, "child page\n");
- LASSERT(0);
+ list_for_each_entry(o, &subobj->co_lu.lo_header->loh_layers,
+ co_lu.lo_linkage) {
+ if (o->co_ops->coo_page_init) {
+ rc = o->co_ops->coo_page_init(sub->sub_env, o, page,
+ cl_index(subobj, suboff));
+ if (rc != 0)
+ break;
+ }
}
+ lov_sub_put(sub);
-out:
return rc;
}
+static int lov_empty_page_print(const struct lu_env *env,
+ const struct cl_page_slice *slice,
+ void *cookie, lu_printer_t printer)
+{
+ struct lov_page *lp = cl2lov_page(slice);
+
+ return (*printer)(env, cookie, LUSTRE_LOV_NAME "-page@%p, empty.\n",
+ lp);
+}
+
static const struct cl_page_operations lov_empty_page_ops = {
- .cpo_fini = lov_empty_page_fini,
- .cpo_print = lov_page_print
+ .cpo_print = lov_empty_page_print
};
int lov_page_init_empty(const struct lu_env *env, struct cl_object *obj,
- struct cl_page *page, struct page *vmpage)
+ struct cl_page *page, pgoff_t index)
{
struct lov_page *lpg = cl_object_page_slice(obj, page);
void *addr;
- cl_page_slice_add(page, &lpg->lps_cl, obj, &lov_empty_page_ops);
- addr = kmap(vmpage);
+ cl_page_slice_add(page, &lpg->lps_cl, obj, index, &lov_empty_page_ops);
+ addr = kmap(page->cp_vmpage);
memset(addr, 0, cl_page_size(obj));
- kunmap(vmpage);
+ kunmap(page->cp_vmpage);
cl_page_export(env, page, 1);
return 0;
}
diff --git a/drivers/staging/lustre/lustre/lov/lov_pool.c b/drivers/staging/lustre/lustre/lov/lov_pool.c
index 9ae1d6f42d6e..4c2d21729589 100644
--- a/drivers/staging/lustre/lustre/lov/lov_pool.c
+++ b/drivers/staging/lustre/lustre/lov/lov_pool.c
@@ -14,12 +14,8 @@
* in the LICENSE file that accompanied this code).
*
* You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see [sun.com URL with a
- * copy of GPLv2].
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * version 2 along with this program; If not, see
+ * http://http://www.gnu.org/licenses/gpl-2.0.html
*
* GPL HEADER END
*/
@@ -65,7 +61,6 @@ void lov_pool_putref(struct pool_desc *pool)
LASSERT(hlist_unhashed(&pool->pool_hash));
LASSERT(list_empty(&pool->pool_list));
LASSERT(!pool->pool_debugfs_entry);
- lov_ost_pool_free(&(pool->pool_rr.lqr_pool));
lov_ost_pool_free(&(pool->pool_obds));
kfree(pool);
}
@@ -424,11 +419,6 @@ int lov_pool_new(struct obd_device *obd, char *poolname)
if (rc)
goto out_err;
- memset(&(new_pool->pool_rr), 0, sizeof(struct lov_qos_rr));
- rc = lov_ost_pool_init(&new_pool->pool_rr.lqr_pool, 0);
- if (rc)
- goto out_free_pool_obds;
-
INIT_HLIST_NODE(&new_pool->pool_hash);
/* get ref for debugfs file */
@@ -469,13 +459,10 @@ out_err:
list_del_init(&new_pool->pool_list);
lov->lov_pool_count--;
spin_unlock(&obd->obd_dev_lock);
-
ldebugfs_remove(&new_pool->pool_debugfs_entry);
-
- lov_ost_pool_free(&new_pool->pool_rr.lqr_pool);
-out_free_pool_obds:
lov_ost_pool_free(&new_pool->pool_obds);
kfree(new_pool);
+
return rc;
}
@@ -543,8 +530,6 @@ int lov_pool_add(struct obd_device *obd, char *poolname, char *ostname)
if (rc)
goto out;
- pool->pool_rr.lqr_dirty = 1;
-
CDEBUG(D_CONFIG, "Added %s to "LOV_POOLNAMEF" as member %d\n",
ostname, poolname, pool_tgt_count(pool));
@@ -589,8 +574,6 @@ int lov_pool_remove(struct obd_device *obd, char *poolname, char *ostname)
lov_ost_pool_remove(&pool->pool_obds, lov_idx);
- pool->pool_rr.lqr_dirty = 1;
-
CDEBUG(D_CONFIG, "%s removed from "LOV_POOLNAMEF"\n", ostname,
poolname);
@@ -599,50 +582,3 @@ out:
lov_pool_putref(pool);
return rc;
}
-
-int lov_check_index_in_pool(__u32 idx, struct pool_desc *pool)
-{
- int i, rc;
-
- /* caller may no have a ref on pool if it got the pool
- * without calling lov_find_pool() (e.g. go through the lov pool
- * list)
- */
- lov_pool_getref(pool);
-
- down_read(&pool_tgt_rw_sem(pool));
-
- for (i = 0; i < pool_tgt_count(pool); i++) {
- if (pool_tgt_array(pool)[i] == idx) {
- rc = 0;
- goto out;
- }
- }
- rc = -ENOENT;
-out:
- up_read(&pool_tgt_rw_sem(pool));
-
- lov_pool_putref(pool);
- return rc;
-}
-
-struct pool_desc *lov_find_pool(struct lov_obd *lov, char *poolname)
-{
- struct pool_desc *pool;
-
- pool = NULL;
- if (poolname[0] != '\0') {
- pool = cfs_hash_lookup(lov->lov_pools_hash_body, poolname);
- if (!pool)
- CWARN("Request for an unknown pool ("LOV_POOLNAMEF")\n",
- poolname);
- if (pool && (pool_tgt_count(pool) == 0)) {
- CWARN("Request for an empty pool ("LOV_POOLNAMEF")\n",
- poolname);
- /* pool is ignored, so we remove ref on it */
- lov_pool_putref(pool);
- pool = NULL;
- }
- }
- return pool;
-}
diff --git a/drivers/staging/lustre/lustre/lov/lov_request.c b/drivers/staging/lustre/lustre/lov/lov_request.c
index 7178a02d6267..4099b51f826e 100644
--- a/drivers/staging/lustre/lustre/lov/lov_request.c
+++ b/drivers/staging/lustre/lustre/lov/lov_request.c
@@ -15,11 +15,7 @@
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
*
* GPL HEADER END
*/
@@ -52,7 +48,6 @@ static void lov_init_set(struct lov_request_set *set)
INIT_LIST_HEAD(&set->set_list);
atomic_set(&set->set_refcount, 1);
init_waitqueue_head(&set->set_waitq);
- spin_lock_init(&set->set_lock);
}
void lov_finish_set(struct lov_request_set *set)
@@ -235,7 +230,6 @@ out:
if (tmp_oa)
kmem_cache_free(obdo_cachep, tmp_oa);
return rc;
-
}
int lov_fini_getattr_set(struct lov_request_set *set)
@@ -363,7 +357,6 @@ int lov_prep_destroy_set(struct obd_export *exp, struct obd_info *oinfo,
set->set_oi = oinfo;
set->set_oi->oi_md = lsm;
set->set_oi->oi_oa = src_oa;
- set->set_oti = oti;
if (oti && src_oa->o_valid & OBD_MD_FLCOOKIE)
set->set_cookies = oti->oti_logcookies;
@@ -480,7 +473,6 @@ int lov_prep_setattr_set(struct obd_export *exp, struct obd_info *oinfo,
lov_init_set(set);
set->set_exp = exp;
- set->set_oti = oti;
set->set_oi = oinfo;
if (oti && oinfo->oi_oa->o_valid & OBD_MD_FLCOOKIE)
set->set_cookies = oti->oti_logcookies;
@@ -716,12 +708,15 @@ int lov_prep_statfs_set(struct obd_device *obd, struct obd_info *oinfo,
struct lov_request *req;
if (!lov->lov_tgts[i] ||
- (!lov_check_and_wait_active(lov, i) &&
- (oinfo->oi_flags & OBD_STATFS_NODELAY))) {
+ (oinfo->oi_flags & OBD_STATFS_NODELAY &&
+ !lov->lov_tgts[i]->ltd_active)) {
CDEBUG(D_HA, "lov idx %d inactive\n", i);
continue;
}
+ if (!lov->lov_tgts[i]->ltd_active)
+ lov_check_and_wait_active(lov, i);
+
/* skip targets that have been explicitly disabled by the
* administrator
*/
diff --git a/drivers/staging/lustre/lustre/lov/lovsub_dev.c b/drivers/staging/lustre/lustre/lov/lovsub_dev.c
index c335c020f4f4..b519a1940e1e 100644
--- a/drivers/staging/lustre/lustre/lov/lovsub_dev.c
+++ b/drivers/staging/lustre/lustre/lov/lovsub_dev.c
@@ -15,11 +15,7 @@
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
*
* GPL HEADER END
*/
@@ -151,8 +147,9 @@ static int lovsub_req_init(const struct lu_env *env, struct cl_device *dev,
if (lsr) {
cl_req_slice_add(req, &lsr->lsrq_cl, dev, &lovsub_req_ops);
result = 0;
- } else
+ } else {
result = -ENOMEM;
+ }
return result;
}
@@ -182,10 +179,12 @@ static struct lu_device *lovsub_device_alloc(const struct lu_env *env,
d = lovsub2lu_dev(lsd);
d->ld_ops = &lovsub_lu_ops;
lsd->acid_cl.cd_ops = &lovsub_cl_ops;
- } else
+ } else {
d = ERR_PTR(result);
- } else
+ }
+ } else {
d = ERR_PTR(-ENOMEM);
+ }
return d;
}
diff --git a/drivers/staging/lustre/lustre/lov/lovsub_io.c b/drivers/staging/lustre/lustre/lov/lovsub_io.c
index 783ec687a4e7..6a9820218a3e 100644
--- a/drivers/staging/lustre/lustre/lov/lovsub_io.c
+++ b/drivers/staging/lustre/lustre/lov/lovsub_io.c
@@ -15,11 +15,7 @@
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
*
* GPL HEADER END
*/
diff --git a/drivers/staging/lustre/lustre/lov/lovsub_lock.c b/drivers/staging/lustre/lustre/lov/lovsub_lock.c
index 3bb0c9068a90..38f9b735c241 100644
--- a/drivers/staging/lustre/lustre/lov/lovsub_lock.c
+++ b/drivers/staging/lustre/lustre/lov/lovsub_lock.c
@@ -15,11 +15,7 @@
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
*
* GPL HEADER END
*/
@@ -62,391 +58,8 @@ static void lovsub_lock_fini(const struct lu_env *env,
kmem_cache_free(lovsub_lock_kmem, lsl);
}
-static void lovsub_parent_lock(const struct lu_env *env, struct lov_lock *lov)
-{
- struct cl_lock *parent;
-
- parent = lov->lls_cl.cls_lock;
- cl_lock_get(parent);
- lu_ref_add(&parent->cll_reference, "lovsub-parent", current);
- cl_lock_mutex_get(env, parent);
-}
-
-static void lovsub_parent_unlock(const struct lu_env *env, struct lov_lock *lov)
-{
- struct cl_lock *parent;
-
- parent = lov->lls_cl.cls_lock;
- cl_lock_mutex_put(env, lov->lls_cl.cls_lock);
- lu_ref_del(&parent->cll_reference, "lovsub-parent", current);
- cl_lock_put(env, parent);
-}
-
-/**
- * Implements cl_lock_operations::clo_state() method for lovsub layer, which
- * method is called whenever sub-lock state changes. Propagates state change
- * to the top-locks.
- */
-static void lovsub_lock_state(const struct lu_env *env,
- const struct cl_lock_slice *slice,
- enum cl_lock_state state)
-{
- struct lovsub_lock *sub = cl2lovsub_lock(slice);
- struct lov_lock_link *scan;
-
- LASSERT(cl_lock_is_mutexed(slice->cls_lock));
-
- list_for_each_entry(scan, &sub->lss_parents, lll_list) {
- struct lov_lock *lov = scan->lll_super;
- struct cl_lock *parent = lov->lls_cl.cls_lock;
-
- if (sub->lss_active != parent) {
- lovsub_parent_lock(env, lov);
- cl_lock_signal(env, parent);
- lovsub_parent_unlock(env, lov);
- }
- }
-}
-
-/**
- * Implementation of cl_lock_operation::clo_weigh() estimating lock weight by
- * asking parent lock.
- */
-static unsigned long lovsub_lock_weigh(const struct lu_env *env,
- const struct cl_lock_slice *slice)
-{
- struct lovsub_lock *lock = cl2lovsub_lock(slice);
- struct lov_lock *lov;
- unsigned long dumbbell;
-
- LASSERT(cl_lock_is_mutexed(slice->cls_lock));
-
- if (!list_empty(&lock->lss_parents)) {
- /*
- * It is not clear whether all parents have to be asked and
- * their estimations summed, or it is enough to ask one. For
- * the current usages, one is always enough.
- */
- lov = container_of(lock->lss_parents.next,
- struct lov_lock_link, lll_list)->lll_super;
-
- lovsub_parent_lock(env, lov);
- dumbbell = cl_lock_weigh(env, lov->lls_cl.cls_lock);
- lovsub_parent_unlock(env, lov);
- } else
- dumbbell = 0;
-
- return dumbbell;
-}
-
-/**
- * Maps start/end offsets within a stripe, to offsets within a file.
- */
-static void lovsub_lock_descr_map(const struct cl_lock_descr *in,
- struct lov_object *lov,
- int stripe, struct cl_lock_descr *out)
-{
- pgoff_t size; /* stripe size in pages */
- pgoff_t skip; /* how many pages in every stripe are occupied by
- * "other" stripes
- */
- pgoff_t start;
- pgoff_t end;
-
- start = in->cld_start;
- end = in->cld_end;
-
- if (lov->lo_lsm->lsm_stripe_count > 1) {
- size = cl_index(lov2cl(lov), lov->lo_lsm->lsm_stripe_size);
- skip = (lov->lo_lsm->lsm_stripe_count - 1) * size;
-
- /* XXX overflow check here? */
- start += start/size * skip + stripe * size;
-
- if (end != CL_PAGE_EOF) {
- end += end/size * skip + stripe * size;
- /*
- * And check for overflow...
- */
- if (end < in->cld_end)
- end = CL_PAGE_EOF;
- }
- }
- out->cld_start = start;
- out->cld_end = end;
-}
-
-/**
- * Adjusts parent lock extent when a sub-lock is attached to a parent. This is
- * called in two ways:
- *
- * - as part of receive call-back, when server returns granted extent to
- * the client, and
- *
- * - when top-lock finds existing sub-lock in the cache.
- *
- * Note, that lock mode is not propagated to the parent: i.e., if CLM_READ
- * top-lock matches CLM_WRITE sub-lock, top-lock is still CLM_READ.
- */
-int lov_sublock_modify(const struct lu_env *env, struct lov_lock *lov,
- struct lovsub_lock *sublock,
- const struct cl_lock_descr *d, int idx)
-{
- struct cl_lock *parent;
- struct lovsub_object *subobj;
- struct cl_lock_descr *pd;
- struct cl_lock_descr *parent_descr;
- int result;
-
- parent = lov->lls_cl.cls_lock;
- parent_descr = &parent->cll_descr;
- LASSERT(cl_lock_mode_match(d->cld_mode, parent_descr->cld_mode));
-
- subobj = cl2lovsub(sublock->lss_cl.cls_obj);
- pd = &lov_env_info(env)->lti_ldescr;
-
- pd->cld_obj = parent_descr->cld_obj;
- pd->cld_mode = parent_descr->cld_mode;
- pd->cld_gid = parent_descr->cld_gid;
- lovsub_lock_descr_map(d, subobj->lso_super, subobj->lso_index, pd);
- lov->lls_sub[idx].sub_got = *d;
- /*
- * Notify top-lock about modification, if lock description changes
- * materially.
- */
- if (!cl_lock_ext_match(parent_descr, pd))
- result = cl_lock_modify(env, parent, pd);
- else
- result = 0;
- return result;
-}
-
-static int lovsub_lock_modify(const struct lu_env *env,
- const struct cl_lock_slice *s,
- const struct cl_lock_descr *d)
-{
- struct lovsub_lock *lock = cl2lovsub_lock(s);
- struct lov_lock_link *scan;
- struct lov_lock *lov;
- int result = 0;
-
- LASSERT(cl_lock_mode_match(d->cld_mode,
- s->cls_lock->cll_descr.cld_mode));
- list_for_each_entry(scan, &lock->lss_parents, lll_list) {
- int rc;
-
- lov = scan->lll_super;
- lovsub_parent_lock(env, lov);
- rc = lov_sublock_modify(env, lov, lock, d, scan->lll_idx);
- lovsub_parent_unlock(env, lov);
- result = result ?: rc;
- }
- return result;
-}
-
-static int lovsub_lock_closure(const struct lu_env *env,
- const struct cl_lock_slice *slice,
- struct cl_lock_closure *closure)
-{
- struct lovsub_lock *sub;
- struct cl_lock *parent;
- struct lov_lock_link *scan;
- int result;
-
- LASSERT(cl_lock_is_mutexed(slice->cls_lock));
-
- sub = cl2lovsub_lock(slice);
- result = 0;
-
- list_for_each_entry(scan, &sub->lss_parents, lll_list) {
- parent = scan->lll_super->lls_cl.cls_lock;
- result = cl_lock_closure_build(env, parent, closure);
- if (result != 0)
- break;
- }
- return result;
-}
-
-/**
- * A helper function for lovsub_lock_delete() that deals with a given parent
- * top-lock.
- */
-static int lovsub_lock_delete_one(const struct lu_env *env,
- struct cl_lock *child, struct lov_lock *lov)
-{
- struct cl_lock *parent;
- int result;
-
- parent = lov->lls_cl.cls_lock;
- if (parent->cll_error)
- return 0;
-
- result = 0;
- switch (parent->cll_state) {
- case CLS_ENQUEUED:
- /* See LU-1355 for the case that a glimpse lock is
- * interrupted by signal
- */
- LASSERT(parent->cll_flags & CLF_CANCELLED);
- break;
- case CLS_QUEUING:
- case CLS_FREEING:
- cl_lock_signal(env, parent);
- break;
- case CLS_INTRANSIT:
- /*
- * Here lies a problem: a sub-lock is canceled while top-lock
- * is being unlocked. Top-lock cannot be moved into CLS_NEW
- * state, because unlocking has to succeed eventually by
- * placing lock into CLS_CACHED (or failing it), see
- * cl_unuse_try(). Nor can top-lock be left in CLS_CACHED
- * state, because lov maintains an invariant that all
- * sub-locks exist in CLS_CACHED (this allows cached top-lock
- * to be reused immediately). Nor can we wait for top-lock
- * state to change, because this can be synchronous to the
- * current thread.
- *
- * We know for sure that lov_lock_unuse() will be called at
- * least one more time to finish un-using, so leave a mark on
- * the top-lock, that will be seen by the next call to
- * lov_lock_unuse().
- */
- if (cl_lock_is_intransit(parent))
- lov->lls_cancel_race = 1;
- break;
- case CLS_CACHED:
- /*
- * if a sub-lock is canceled move its top-lock into CLS_NEW
- * state to preserve an invariant that a top-lock in
- * CLS_CACHED is immediately ready for re-use (i.e., has all
- * sub-locks), and so that next attempt to re-use the top-lock
- * enqueues missing sub-lock.
- */
- cl_lock_state_set(env, parent, CLS_NEW);
- /* fall through */
- case CLS_NEW:
- /*
- * if last sub-lock is canceled, destroy the top-lock (which
- * is now `empty') proactively.
- */
- if (lov->lls_nr_filled == 0) {
- /* ... but unfortunately, this cannot be done easily,
- * as cancellation of a top-lock might acquire mutices
- * of its other sub-locks, violating lock ordering,
- * see cl_lock_{cancel,delete}() preconditions.
- *
- * To work around this, the mutex of this sub-lock is
- * released, top-lock is destroyed, and sub-lock mutex
- * acquired again. The list of parents has to be
- * re-scanned from the beginning after this.
- *
- * Only do this if no mutices other than on @child and
- * @parent are held by the current thread.
- *
- * TODO: The lock modal here is too complex, because
- * the lock may be canceled and deleted by voluntarily:
- * cl_lock_request
- * -> osc_lock_enqueue_wait
- * -> osc_lock_cancel_wait
- * -> cl_lock_delete
- * -> lovsub_lock_delete
- * -> cl_lock_cancel/delete
- * -> ...
- *
- * The better choice is to spawn a kernel thread for
- * this purpose. -jay
- */
- if (cl_lock_nr_mutexed(env) == 2) {
- cl_lock_mutex_put(env, child);
- cl_lock_cancel(env, parent);
- cl_lock_delete(env, parent);
- result = 1;
- }
- }
- break;
- case CLS_HELD:
- CL_LOCK_DEBUG(D_ERROR, env, parent, "Delete CLS_HELD lock\n");
- default:
- CERROR("Impossible state: %d\n", parent->cll_state);
- LBUG();
- break;
- }
-
- return result;
-}
-
-/**
- * An implementation of cl_lock_operations::clo_delete() method. This is
- * invoked in "bottom-to-top" delete, when lock destruction starts from the
- * sub-lock (e.g, as a result of ldlm lock LRU policy).
- */
-static void lovsub_lock_delete(const struct lu_env *env,
- const struct cl_lock_slice *slice)
-{
- struct cl_lock *child = slice->cls_lock;
- struct lovsub_lock *sub = cl2lovsub_lock(slice);
- int restart;
-
- LASSERT(cl_lock_is_mutexed(child));
-
- /*
- * Destruction of a sub-lock might take multiple iterations, because
- * when the last sub-lock of a given top-lock is deleted, top-lock is
- * canceled proactively, and this requires to release sub-lock
- * mutex. Once sub-lock mutex has been released, list of its parents
- * has to be re-scanned from the beginning.
- */
- do {
- struct lov_lock *lov;
- struct lov_lock_link *scan;
- struct lov_lock_link *temp;
- struct lov_lock_sub *subdata;
-
- restart = 0;
- list_for_each_entry_safe(scan, temp,
- &sub->lss_parents, lll_list) {
- lov = scan->lll_super;
- subdata = &lov->lls_sub[scan->lll_idx];
- lovsub_parent_lock(env, lov);
- subdata->sub_got = subdata->sub_descr;
- lov_lock_unlink(env, scan, sub);
- restart = lovsub_lock_delete_one(env, child, lov);
- lovsub_parent_unlock(env, lov);
-
- if (restart) {
- cl_lock_mutex_get(env, child);
- break;
- }
- }
- } while (restart);
-}
-
-static int lovsub_lock_print(const struct lu_env *env, void *cookie,
- lu_printer_t p, const struct cl_lock_slice *slice)
-{
- struct lovsub_lock *sub = cl2lovsub_lock(slice);
- struct lov_lock *lov;
- struct lov_lock_link *scan;
-
- list_for_each_entry(scan, &sub->lss_parents, lll_list) {
- lov = scan->lll_super;
- (*p)(env, cookie, "[%d %p ", scan->lll_idx, lov);
- if (lov)
- cl_lock_descr_print(env, cookie, p,
- &lov->lls_cl.cls_lock->cll_descr);
- (*p)(env, cookie, "] ");
- }
- return 0;
-}
-
static const struct cl_lock_operations lovsub_lock_ops = {
.clo_fini = lovsub_lock_fini,
- .clo_state = lovsub_lock_state,
- .clo_delete = lovsub_lock_delete,
- .clo_modify = lovsub_lock_modify,
- .clo_closure = lovsub_lock_closure,
- .clo_weigh = lovsub_lock_weigh,
- .clo_print = lovsub_lock_print
};
int lovsub_lock_init(const struct lu_env *env, struct cl_object *obj,
@@ -460,8 +73,9 @@ int lovsub_lock_init(const struct lu_env *env, struct cl_object *obj,
INIT_LIST_HEAD(&lsk->lss_parents);
cl_lock_slice_add(lock, &lsk->lss_cl, obj, &lovsub_lock_ops);
result = 0;
- } else
+ } else {
result = -ENOMEM;
+ }
return result;
}
diff --git a/drivers/staging/lustre/lustre/lov/lovsub_object.c b/drivers/staging/lustre/lustre/lov/lovsub_object.c
index 6c5430d938d0..fb2f2660b3e9 100644
--- a/drivers/staging/lustre/lustre/lov/lovsub_object.c
+++ b/drivers/staging/lustre/lustre/lov/lovsub_object.c
@@ -15,11 +15,7 @@
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
*
* GPL HEADER END
*/
@@ -67,10 +63,10 @@ int lovsub_object_init(const struct lu_env *env, struct lu_object *obj,
lu_object_add(obj, below);
cl_object_page_init(lu2cl(obj), sizeof(struct lovsub_page));
result = 0;
- } else
+ } else {
result = -ENOMEM;
+ }
return result;
-
}
static void lovsub_object_free(const struct lu_env *env, struct lu_object *obj)
@@ -154,8 +150,9 @@ struct lu_object *lovsub_object_alloc(const struct lu_env *env,
lu_object_add_top(&hdr->coh_lu, obj);
los->lso_cl.co_ops = &lovsub_ops;
obj->lo_ops = &lovsub_lu_obj_ops;
- } else
+ } else {
obj = NULL;
+ }
return obj;
}
diff --git a/drivers/staging/lustre/lustre/lov/lovsub_page.c b/drivers/staging/lustre/lustre/lov/lovsub_page.c
index 2d945532b78e..b2e68c3e820d 100644
--- a/drivers/staging/lustre/lustre/lov/lovsub_page.c
+++ b/drivers/staging/lustre/lustre/lov/lovsub_page.c
@@ -15,11 +15,7 @@
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
*
* GPL HEADER END
*/
@@ -60,11 +56,11 @@ static const struct cl_page_operations lovsub_page_ops = {
};
int lovsub_page_init(const struct lu_env *env, struct cl_object *obj,
- struct cl_page *page, struct page *unused)
+ struct cl_page *page, pgoff_t index)
{
struct lovsub_page *lsb = cl_object_page_slice(obj, page);
- cl_page_slice_add(page, &lsb->lsb_cl, obj, &lovsub_page_ops);
+ cl_page_slice_add(page, &lsb->lsb_cl, obj, index, &lovsub_page_ops);
return 0;
}
diff --git a/drivers/staging/lustre/lustre/lov/lproc_lov.c b/drivers/staging/lustre/lustre/lov/lproc_lov.c
index 0dcb6b6a7782..eb6d30d34e3a 100644
--- a/drivers/staging/lustre/lustre/lov/lproc_lov.c
+++ b/drivers/staging/lustre/lustre/lov/lproc_lov.c
@@ -15,11 +15,7 @@
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
*
* GPL HEADER END
*/
diff --git a/drivers/staging/lustre/lustre/mdc/lproc_mdc.c b/drivers/staging/lustre/lustre/mdc/lproc_mdc.c
index 38f267a60f59..98d15fb247bc 100644
--- a/drivers/staging/lustre/lustre/mdc/lproc_mdc.c
+++ b/drivers/staging/lustre/lustre/mdc/lproc_mdc.c
@@ -15,11 +15,7 @@
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
*
* GPL HEADER END
*/
@@ -49,9 +45,9 @@ static ssize_t max_rpcs_in_flight_show(struct kobject *kobj,
obd_kobj);
struct client_obd *cli = &dev->u.cli;
- client_obd_list_lock(&cli->cl_loi_list_lock);
+ spin_lock(&cli->cl_loi_list_lock);
len = sprintf(buf, "%u\n", cli->cl_max_rpcs_in_flight);
- client_obd_list_unlock(&cli->cl_loi_list_lock);
+ spin_unlock(&cli->cl_loi_list_lock);
return len;
}
@@ -74,9 +70,9 @@ static ssize_t max_rpcs_in_flight_store(struct kobject *kobj,
if (val < 1 || val > MDC_MAX_RIF_MAX)
return -ERANGE;
- client_obd_list_lock(&cli->cl_loi_list_lock);
+ spin_lock(&cli->cl_loi_list_lock);
cli->cl_max_rpcs_in_flight = val;
- client_obd_list_unlock(&cli->cl_loi_list_lock);
+ spin_unlock(&cli->cl_loi_list_lock);
return count;
}
diff --git a/drivers/staging/lustre/lustre/mdc/mdc_internal.h b/drivers/staging/lustre/lustre/mdc/mdc_internal.h
index c5519aeb0d8a..58f2841cabe4 100644
--- a/drivers/staging/lustre/lustre/mdc/mdc_internal.h
+++ b/drivers/staging/lustre/lustre/mdc/mdc_internal.h
@@ -15,11 +15,7 @@
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
*
* GPL HEADER END
*/
diff --git a/drivers/staging/lustre/lustre/mdc/mdc_lib.c b/drivers/staging/lustre/lustre/mdc/mdc_lib.c
index b3bfdcb73670..143bd7628572 100644
--- a/drivers/staging/lustre/lustre/mdc/mdc_lib.c
+++ b/drivers/staging/lustre/lustre/mdc/mdc_lib.c
@@ -15,11 +15,7 @@
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
*
* GPL HEADER END
*/
@@ -279,8 +275,7 @@ static void mdc_setattr_pack_rec(struct mdt_rec_setattr *rec,
rec->sa_atime = LTIME_S(op_data->op_attr.ia_atime);
rec->sa_mtime = LTIME_S(op_data->op_attr.ia_mtime);
rec->sa_ctime = LTIME_S(op_data->op_attr.ia_ctime);
- rec->sa_attr_flags =
- ((struct ll_iattr *)&op_data->op_attr)->ia_attr_flags;
+ rec->sa_attr_flags = op_data->op_attr_flags;
if ((op_data->op_attr.ia_valid & ATTR_GID) &&
in_group_p(op_data->op_attr.ia_gid))
rec->sa_suppgid =
@@ -439,7 +434,6 @@ void mdc_getattr_pack(struct ptlrpc_request *req, __u64 valid, int flags,
char *tmp = req_capsule_client_get(&req->rq_pill, &RMF_NAME);
LOGL0(op_data->op_name, op_data->op_namelen, tmp);
-
}
}
@@ -455,7 +449,7 @@ static void mdc_hsm_release_pack(struct ptlrpc_request *req,
lock = ldlm_handle2lock(&op_data->op_lease_handle);
if (lock) {
data->cd_handle = lock->l_remote_handle;
- ldlm_lock_put(lock);
+ LDLM_LOCK_PUT(lock);
}
ldlm_cli_cancel(&op_data->op_lease_handle, LCF_LOCAL);
@@ -473,6 +467,18 @@ void mdc_close_pack(struct ptlrpc_request *req, struct md_op_data *op_data)
rec = req_capsule_client_get(&req->rq_pill, &RMF_REC_REINT);
mdc_setattr_pack_rec(rec, op_data);
+ /*
+ * The client will zero out local timestamps when losing the IBITS lock
+ * so any new RPC timestamps will update the client inode's timestamps.
+ * There was a defect on the server side which allowed the atime to be
+ * overwritten by a zeroed-out atime packed into the close RPC.
+ *
+ * Proactively clear the MDS_ATTR_ATIME flag in the RPC in this case
+ * to avoid zeroing the atime on old unpatched servers. See LU-8041.
+ */
+ if (rec->sa_atime == 0)
+ rec->sa_valid &= ~MDS_ATTR_ATIME;
+
mdc_ioepoch_pack(epoch, op_data);
mdc_hsm_release_pack(req, op_data);
}
@@ -481,9 +487,9 @@ static int mdc_req_avail(struct client_obd *cli, struct mdc_cache_waiter *mcw)
{
int rc;
- client_obd_list_lock(&cli->cl_loi_list_lock);
+ spin_lock(&cli->cl_loi_list_lock);
rc = list_empty(&mcw->mcw_entry);
- client_obd_list_unlock(&cli->cl_loi_list_lock);
+ spin_unlock(&cli->cl_loi_list_lock);
return rc;
};
@@ -497,23 +503,23 @@ int mdc_enter_request(struct client_obd *cli)
struct mdc_cache_waiter mcw;
struct l_wait_info lwi = LWI_INTR(LWI_ON_SIGNAL_NOOP, NULL);
- client_obd_list_lock(&cli->cl_loi_list_lock);
+ spin_lock(&cli->cl_loi_list_lock);
if (cli->cl_r_in_flight >= cli->cl_max_rpcs_in_flight) {
list_add_tail(&mcw.mcw_entry, &cli->cl_cache_waiters);
init_waitqueue_head(&mcw.mcw_waitq);
- client_obd_list_unlock(&cli->cl_loi_list_lock);
+ spin_unlock(&cli->cl_loi_list_lock);
rc = l_wait_event(mcw.mcw_waitq, mdc_req_avail(cli, &mcw),
&lwi);
if (rc) {
- client_obd_list_lock(&cli->cl_loi_list_lock);
+ spin_lock(&cli->cl_loi_list_lock);
if (list_empty(&mcw.mcw_entry))
cli->cl_r_in_flight--;
list_del_init(&mcw.mcw_entry);
- client_obd_list_unlock(&cli->cl_loi_list_lock);
+ spin_unlock(&cli->cl_loi_list_lock);
}
} else {
cli->cl_r_in_flight++;
- client_obd_list_unlock(&cli->cl_loi_list_lock);
+ spin_unlock(&cli->cl_loi_list_lock);
}
return rc;
}
@@ -523,7 +529,7 @@ void mdc_exit_request(struct client_obd *cli)
struct list_head *l, *tmp;
struct mdc_cache_waiter *mcw;
- client_obd_list_lock(&cli->cl_loi_list_lock);
+ spin_lock(&cli->cl_loi_list_lock);
cli->cl_r_in_flight--;
list_for_each_safe(l, tmp, &cli->cl_cache_waiters) {
if (cli->cl_r_in_flight >= cli->cl_max_rpcs_in_flight) {
@@ -538,5 +544,5 @@ void mdc_exit_request(struct client_obd *cli)
}
/* Empty waiting list? Decrease reqs in-flight number */
- client_obd_list_unlock(&cli->cl_loi_list_lock);
+ spin_unlock(&cli->cl_loi_list_lock);
}
diff --git a/drivers/staging/lustre/lustre/mdc/mdc_locks.c b/drivers/staging/lustre/lustre/mdc/mdc_locks.c
index 958a164f620d..f48b58423307 100644
--- a/drivers/staging/lustre/lustre/mdc/mdc_locks.c
+++ b/drivers/staging/lustre/lustre/mdc/mdc_locks.c
@@ -15,11 +15,7 @@
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
*
* GPL HEADER END
*/
@@ -54,61 +50,43 @@ struct mdc_getattr_args {
struct ldlm_enqueue_info *ga_einfo;
};
-int it_disposition(struct lookup_intent *it, int flag)
-{
- return it->d.lustre.it_disposition & flag;
-}
-EXPORT_SYMBOL(it_disposition);
-
-void it_set_disposition(struct lookup_intent *it, int flag)
-{
- it->d.lustre.it_disposition |= flag;
-}
-EXPORT_SYMBOL(it_set_disposition);
-
-void it_clear_disposition(struct lookup_intent *it, int flag)
-{
- it->d.lustre.it_disposition &= ~flag;
-}
-EXPORT_SYMBOL(it_clear_disposition);
-
int it_open_error(int phase, struct lookup_intent *it)
{
if (it_disposition(it, DISP_OPEN_LEASE)) {
if (phase >= DISP_OPEN_LEASE)
- return it->d.lustre.it_status;
+ return it->it_status;
else
return 0;
}
if (it_disposition(it, DISP_OPEN_OPEN)) {
if (phase >= DISP_OPEN_OPEN)
- return it->d.lustre.it_status;
+ return it->it_status;
else
return 0;
}
if (it_disposition(it, DISP_OPEN_CREATE)) {
if (phase >= DISP_OPEN_CREATE)
- return it->d.lustre.it_status;
+ return it->it_status;
else
return 0;
}
if (it_disposition(it, DISP_LOOKUP_EXECD)) {
if (phase >= DISP_LOOKUP_EXECD)
- return it->d.lustre.it_status;
+ return it->it_status;
else
return 0;
}
if (it_disposition(it, DISP_IT_EXECD)) {
if (phase >= DISP_IT_EXECD)
- return it->d.lustre.it_status;
+ return it->it_status;
else
return 0;
}
- CERROR("it disp: %X, status: %d\n", it->d.lustre.it_disposition,
- it->d.lustre.it_status);
+ CERROR("it disp: %X, status: %d\n", it->it_disposition,
+ it->it_status);
LBUG();
return 0;
}
@@ -347,10 +325,6 @@ static struct ptlrpc_request *mdc_intent_open_pack(struct obd_export *exp,
mdc_open_pack(req, op_data, it->it_create_mode, 0, it->it_flags, lmm,
lmmsize);
- /* for remote client, fetch remote perm for current user */
- if (client_is_remote(exp))
- req_capsule_set_size(&req->rq_pill, &RMF_ACL, RCL_SERVER,
- sizeof(struct mdt_remote_perm));
ptlrpc_request_set_replen(req);
return req;
}
@@ -444,9 +418,7 @@ static struct ptlrpc_request *mdc_intent_getattr_pack(struct obd_export *exp,
struct obd_device *obddev = class_exp2obd(exp);
u64 valid = OBD_MD_FLGETATTR | OBD_MD_FLEASIZE |
OBD_MD_FLMODEASIZE | OBD_MD_FLDIREA |
- OBD_MD_MEA |
- (client_is_remote(exp) ?
- OBD_MD_FLRMTPERM : OBD_MD_FLACL);
+ OBD_MD_MEA | OBD_MD_FLACL;
struct ldlm_intent *lit;
int rc;
int easize;
@@ -478,9 +450,6 @@ static struct ptlrpc_request *mdc_intent_getattr_pack(struct obd_export *exp,
mdc_getattr_pack(req, valid, it->it_flags, op_data, easize);
req_capsule_set_size(&req->rq_pill, &RMF_MDT_MD, RCL_SERVER, easize);
- if (client_is_remote(exp))
- req_capsule_set_size(&req->rq_pill, &RMF_ACL, RCL_SERVER,
- sizeof(struct mdt_remote_perm));
ptlrpc_request_set_replen(req);
return req;
}
@@ -555,7 +524,6 @@ static int mdc_finish_enqueue(struct obd_export *exp,
struct req_capsule *pill = &req->rq_pill;
struct ldlm_request *lockreq;
struct ldlm_reply *lockrep;
- struct lustre_intent_data *intent = &it->d.lustre;
struct ldlm_lock *lock;
void *lvb_data = NULL;
int lvb_len = 0;
@@ -589,17 +557,17 @@ static int mdc_finish_enqueue(struct obd_export *exp,
lockrep = req_capsule_server_get(pill, &RMF_DLM_REP);
- intent->it_disposition = (int)lockrep->lock_policy_res1;
- intent->it_status = (int)lockrep->lock_policy_res2;
- intent->it_lock_mode = einfo->ei_mode;
- intent->it_lock_handle = lockh->cookie;
- intent->it_data = req;
+ it->it_disposition = (int)lockrep->lock_policy_res1;
+ it->it_status = (int)lockrep->lock_policy_res2;
+ it->it_lock_mode = einfo->ei_mode;
+ it->it_lock_handle = lockh->cookie;
+ it->it_request = req;
/* Technically speaking rq_transno must already be zero if
* it_status is in error, so the check is a bit redundant
*/
- if ((!req->rq_transno || intent->it_status < 0) && req->rq_replay)
- mdc_clear_replay_flag(req, intent->it_status);
+ if ((!req->rq_transno || it->it_status < 0) && req->rq_replay)
+ mdc_clear_replay_flag(req, it->it_status);
/* If we're doing an IT_OPEN which did not result in an actual
* successful open, then we need to remove the bit which saves
@@ -610,11 +578,11 @@ static int mdc_finish_enqueue(struct obd_export *exp,
* (bug 3440)
*/
if (it->it_op & IT_OPEN && req->rq_replay &&
- (!it_disposition(it, DISP_OPEN_OPEN) || intent->it_status != 0))
- mdc_clear_replay_flag(req, intent->it_status);
+ (!it_disposition(it, DISP_OPEN_OPEN) || it->it_status != 0))
+ mdc_clear_replay_flag(req, it->it_status);
DEBUG_REQ(D_RPCTRACE, req, "op: %d disposition: %x, status: %d",
- it->it_op, intent->it_disposition, intent->it_status);
+ it->it_op, it->it_disposition, it->it_status);
/* We know what to expect, so we do any byte flipping required here */
if (it->it_op & (IT_OPEN | IT_UNLINK | IT_LOOKUP | IT_GETATTR)) {
@@ -687,16 +655,6 @@ static int mdc_finish_enqueue(struct obd_export *exp,
memcpy(lmm, eadata, body->eadatasize);
}
}
-
- if (body->valid & OBD_MD_FLRMTPERM) {
- struct mdt_remote_perm *perm;
-
- LASSERT(client_is_remote(exp));
- perm = req_capsule_server_swab_get(pill, &RMF_ACL,
- lustre_swab_mdt_remote_perm);
- if (!perm)
- return -EPROTO;
- }
} else if (it->it_op & IT_LAYOUT) {
/* maybe the lock was granted right away and layout
* is packed into RMF_DLM_LVB of req
@@ -715,7 +673,7 @@ static int mdc_finish_enqueue(struct obd_export *exp,
if (lock && ldlm_has_layout(lock) && lvb_data) {
void *lmm;
- LDLM_DEBUG(lock, "layout lock returned by: %s, lvb_len: %d\n",
+ LDLM_DEBUG(lock, "layout lock returned by: %s, lvb_len: %d",
ldlm_it2str(it->it_op), lvb_len);
lmm = libcfs_kvzalloc(lvb_len, GFP_NOFS);
@@ -869,7 +827,9 @@ resend:
* (explicits or automatically generated by Kernel to clean
* current FLocks upon exit) that can't be trashed
*/
- if ((rc == -EINTR) || (rc == -ETIMEDOUT))
+ if (((rc == -EINTR) || (rc == -ETIMEDOUT)) &&
+ (einfo->ei_type == LDLM_FLOCK) &&
+ (einfo->ei_mode == LCK_NL))
goto resend;
return rc;
}
@@ -921,9 +881,9 @@ resend:
}
ptlrpc_req_finished(req);
- it->d.lustre.it_lock_handle = 0;
- it->d.lustre.it_lock_mode = 0;
- it->d.lustre.it_data = NULL;
+ it->it_lock_handle = 0;
+ it->it_lock_mode = 0;
+ it->it_request = NULL;
}
return rc;
@@ -947,8 +907,8 @@ static int mdc_finish_intent_lock(struct obd_export *exp,
/* The server failed before it even started executing the
* intent, i.e. because it couldn't unpack the request.
*/
- LASSERT(it->d.lustre.it_status != 0);
- return it->d.lustre.it_status;
+ LASSERT(it->it_status != 0);
+ return it->it_status;
}
rc = it_open_error(DISP_IT_EXECD, it);
if (rc)
@@ -963,7 +923,6 @@ static int mdc_finish_intent_lock(struct obd_export *exp,
if (fid_is_sane(&op_data->op_fid2) &&
it->it_create_mode & M_CHECK_STALE &&
it->it_op != IT_GETATTR) {
-
/* Also: did we find the same inode? */
/* sever can return one of two fids:
* op_fid2 - new allocated fid - if file is created.
@@ -1032,15 +991,15 @@ static int mdc_finish_intent_lock(struct obd_export *exp,
LDLM_IBITS, &policy, LCK_NL,
&old_lock, 0)) {
ldlm_lock_decref_and_cancel(lockh,
- it->d.lustre.it_lock_mode);
+ it->it_lock_mode);
memcpy(lockh, &old_lock, sizeof(old_lock));
- it->d.lustre.it_lock_handle = lockh->cookie;
+ it->it_lock_handle = lockh->cookie;
}
}
CDEBUG(D_DENTRY,
"D_IT dentry %.*s intent: %s status %d disp %x rc %d\n",
op_data->op_namelen, op_data->op_name, ldlm_it2str(it->it_op),
- it->d.lustre.it_status, it->d.lustre.it_disposition, rc);
+ it->it_status, it->it_disposition, rc);
return rc;
}
@@ -1056,8 +1015,8 @@ int mdc_revalidate_lock(struct obd_export *exp, struct lookup_intent *it,
ldlm_policy_data_t policy;
enum ldlm_mode mode;
- if (it->d.lustre.it_lock_handle) {
- lockh.cookie = it->d.lustre.it_lock_handle;
+ if (it->it_lock_handle) {
+ lockh.cookie = it->it_lock_handle;
mode = ldlm_revalidate_lock_handle(&lockh, bits);
} else {
fid_build_reg_res_name(fid, &res_id);
@@ -1098,11 +1057,11 @@ int mdc_revalidate_lock(struct obd_export *exp, struct lookup_intent *it,
}
if (mode) {
- it->d.lustre.it_lock_handle = lockh.cookie;
- it->d.lustre.it_lock_mode = mode;
+ it->it_lock_handle = lockh.cookie;
+ it->it_lock_mode = mode;
} else {
- it->d.lustre.it_lock_handle = 0;
- it->d.lustre.it_lock_mode = 0;
+ it->it_lock_handle = 0;
+ it->it_lock_mode = 0;
}
return !!mode;
@@ -1124,15 +1083,15 @@ int mdc_revalidate_lock(struct obd_export *exp, struct lookup_intent *it,
* ll_create/ll_open gets called.
*
* The server will return to us, in it_disposition, an indication of
- * exactly what d.lustre.it_status refers to.
+ * exactly what it_status refers to.
*
- * If DISP_OPEN_OPEN is set, then d.lustre.it_status refers to the open() call,
+ * If DISP_OPEN_OPEN is set, then it_status refers to the open() call,
* otherwise if DISP_OPEN_CREATE is set, then it status is the
* creation failure mode. In either case, one of DISP_LOOKUP_NEG or
* DISP_LOOKUP_POS will be set, indicating whether the child lookup
* was successful.
*
- * Else, if DISP_LOOKUP_EXECD then d.lustre.it_status is the rc of the
+ * Else, if DISP_LOOKUP_EXECD then it_status is the rc of the
* child lookup.
*/
int mdc_intent_lock(struct obd_export *exp, struct md_op_data *op_data,
@@ -1165,7 +1124,7 @@ int mdc_intent_lock(struct obd_export *exp, struct md_op_data *op_data,
* be called in revalidate_it if we already have a lock, let's
* verify that.
*/
- it->d.lustre.it_lock_handle = 0;
+ it->it_lock_handle = 0;
rc = mdc_revalidate_lock(exp, it, &op_data->op_fid2, NULL);
/* Only return failure if it was not GETATTR by cfid
* (from inode_revalidate)
@@ -1187,7 +1146,7 @@ int mdc_intent_lock(struct obd_export *exp, struct md_op_data *op_data,
if (rc < 0)
return rc;
- *reqp = it->d.lustre.it_data;
+ *reqp = it->it_request;
rc = mdc_finish_intent_lock(exp, *reqp, op_data, it, &lockh);
return rc;
}
diff --git a/drivers/staging/lustre/lustre/mdc/mdc_reint.c b/drivers/staging/lustre/lustre/mdc/mdc_reint.c
index 4ef3db147f87..5dba2c813857 100644
--- a/drivers/staging/lustre/lustre/mdc/mdc_reint.c
+++ b/drivers/staging/lustre/lustre/mdc/mdc_reint.c
@@ -15,11 +15,7 @@
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
*
* GPL HEADER END
*/
@@ -234,7 +230,7 @@ rebuild:
MDS_INODELOCK_UPDATE);
req = ptlrpc_request_alloc(class_exp2cliimp(exp),
- &RQF_MDS_REINT_CREATE_RMT_ACL);
+ &RQF_MDS_REINT_CREATE_ACL);
if (!req) {
ldlm_lock_list_put(&cancels, l_bl_ast, count);
return -ENOMEM;
diff --git a/drivers/staging/lustre/lustre/mdc/mdc_request.c b/drivers/staging/lustre/lustre/mdc/mdc_request.c
index b91d3ff18b02..542801f04b0d 100644
--- a/drivers/staging/lustre/lustre/mdc/mdc_request.c
+++ b/drivers/staging/lustre/lustre/mdc/mdc_request.c
@@ -15,11 +15,7 @@
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
*
* GPL HEADER END
*/
@@ -142,25 +138,14 @@ static int mdc_getattr_common(struct obd_export *exp,
CDEBUG(D_NET, "mode: %o\n", body->mode);
+ mdc_update_max_ea_from_body(exp, body);
if (body->eadatasize != 0) {
- mdc_update_max_ea_from_body(exp, body);
-
eadata = req_capsule_server_sized_get(pill, &RMF_MDT_MD,
body->eadatasize);
if (!eadata)
return -EPROTO;
}
- if (body->valid & OBD_MD_FLRMTPERM) {
- struct mdt_remote_perm *perm;
-
- LASSERT(client_is_remote(exp));
- perm = req_capsule_server_swab_get(pill, &RMF_ACL,
- lustre_swab_mdt_remote_perm);
- if (!perm)
- return -EPROTO;
- }
-
return 0;
}
@@ -191,11 +176,6 @@ static int mdc_getattr(struct obd_export *exp, struct md_op_data *op_data,
req_capsule_set_size(&req->rq_pill, &RMF_MDT_MD, RCL_SERVER,
op_data->op_mode);
- if (op_data->op_valid & OBD_MD_FLRMTPERM) {
- LASSERT(client_is_remote(exp));
- req_capsule_set_size(&req->rq_pill, &RMF_ACL, RCL_SERVER,
- sizeof(struct mdt_remote_perm));
- }
ptlrpc_request_set_replen(req);
rc = mdc_getattr_common(exp, req);
@@ -435,7 +415,7 @@ static int mdc_unpack_acl(struct ptlrpc_request *req, struct lustre_md *md)
return rc;
}
- rc = posix_acl_valid(acl);
+ rc = posix_acl_valid(&init_user_ns, acl);
if (rc) {
CERROR("validate acl: %d\n", rc);
posix_acl_release(acl);
@@ -540,16 +520,7 @@ static int mdc_get_lustre_md(struct obd_export *exp,
}
rc = 0;
- if (md->body->valid & OBD_MD_FLRMTPERM) {
- /* remote permission */
- LASSERT(client_is_remote(exp));
- md->remote_perm = req_capsule_server_swab_get(pill, &RMF_ACL,
- lustre_swab_mdt_remote_perm);
- if (!md->remote_perm) {
- rc = -EPROTO;
- goto out;
- }
- } else if (md->body->valid & OBD_MD_FLACL) {
+ if (md->body->valid & OBD_MD_FLACL) {
/* for ACL, it's possible that FLACL is set but aclsize is zero.
* only when aclsize != 0 there's an actual segment for ACL
* in reply buffer.
@@ -666,7 +637,7 @@ int mdc_set_open_replay_data(struct obd_export *exp,
struct md_open_data *mod;
struct mdt_rec_create *rec;
struct mdt_body *body;
- struct ptlrpc_request *open_req = it->d.lustre.it_data;
+ struct ptlrpc_request *open_req = it->it_request;
struct obd_import *imp = open_req->rq_import;
if (!open_req->rq_replay)
@@ -1169,7 +1140,7 @@ static int mdc_ioc_hsm_progress(struct obd_export *exp,
goto out;
}
- mdc_pack_body(req, NULL, OBD_MD_FLRMTPERM, 0, 0, 0);
+ mdc_pack_body(req, NULL, 0, 0, -1, 0);
/* Copy hsm_progress struct */
req_hpk = req_capsule_client_get(&req->rq_pill, &RMF_MDS_HSM_PROGRESS);
@@ -1203,7 +1174,7 @@ static int mdc_ioc_hsm_ct_register(struct obd_import *imp, __u32 archives)
goto out;
}
- mdc_pack_body(req, NULL, OBD_MD_FLRMTPERM, 0, 0, 0);
+ mdc_pack_body(req, NULL, 0, 0, -1, 0);
/* Copy hsm_progress struct */
archive_mask = req_capsule_client_get(&req->rq_pill,
@@ -1242,7 +1213,7 @@ static int mdc_ioc_hsm_current_action(struct obd_export *exp,
return rc;
}
- mdc_pack_body(req, &op_data->op_fid1, OBD_MD_FLRMTPERM, 0,
+ mdc_pack_body(req, &op_data->op_fid1, 0, 0,
op_data->op_suppgids[0], 0);
ptlrpc_request_set_replen(req);
@@ -1278,7 +1249,7 @@ static int mdc_ioc_hsm_ct_unregister(struct obd_import *imp)
goto out;
}
- mdc_pack_body(req, NULL, OBD_MD_FLRMTPERM, 0, 0, 0);
+ mdc_pack_body(req, NULL, 0, 0, -1, 0);
ptlrpc_request_set_replen(req);
@@ -1307,7 +1278,7 @@ static int mdc_ioc_hsm_state_get(struct obd_export *exp,
return rc;
}
- mdc_pack_body(req, &op_data->op_fid1, OBD_MD_FLRMTPERM, 0,
+ mdc_pack_body(req, &op_data->op_fid1, 0, 0,
op_data->op_suppgids[0], 0);
ptlrpc_request_set_replen(req);
@@ -1348,7 +1319,7 @@ static int mdc_ioc_hsm_state_set(struct obd_export *exp,
return rc;
}
- mdc_pack_body(req, &op_data->op_fid1, OBD_MD_FLRMTPERM, 0,
+ mdc_pack_body(req, &op_data->op_fid1, 0, 0,
op_data->op_suppgids[0], 0);
/* Copy states */
@@ -1395,7 +1366,7 @@ static int mdc_ioc_hsm_request(struct obd_export *exp,
return rc;
}
- mdc_pack_body(req, NULL, OBD_MD_FLRMTPERM, 0, 0, 0);
+ mdc_pack_body(req, NULL, 0, 0, -1, 0);
/* Copy hsm_request struct */
req_hr = req_capsule_client_get(&req->rq_pill, &RMF_MDS_HSM_REQUEST);
@@ -1808,7 +1779,7 @@ static int mdc_iocontrol(unsigned int cmd, struct obd_export *exp, int len,
case IOC_OBD_STATFS: {
struct obd_statfs stat_buf = {0};
- if (*((__u32 *) data->ioc_inlbuf2) != 0) {
+ if (*((__u32 *)data->ioc_inlbuf2) != 0) {
rc = -ENODEV;
goto out;
}
@@ -1952,7 +1923,7 @@ static void lustre_swab_hal(struct hsm_action_list *h)
__swab32s(&h->hal_count);
__swab32s(&h->hal_archive_id);
__swab64s(&h->hal_flags);
- hai = hai_zero(h);
+ hai = hai_first(h);
for (i = 0; i < h->hal_count; i++, hai = hai_next(hai))
lustre_swab_hai(hai);
}
@@ -2002,7 +1973,7 @@ static int mdc_hsm_copytool_send(int len, void *val)
if (len < sizeof(*lh) + sizeof(*hal)) {
CERROR("Short HSM message %d < %d\n", len,
- (int) (sizeof(*lh) + sizeof(*hal)));
+ (int)(sizeof(*lh) + sizeof(*hal)));
return -EPROTO;
}
if (lh->kuc_magic == __swab16(KUC_MAGIC)) {
@@ -2249,7 +2220,7 @@ static struct obd_uuid *mdc_get_uuid(struct obd_export *exp)
* recovery, non zero value will be return if the lock can be canceled,
* or zero returned for not
*/
-static int mdc_cancel_for_recovery(struct ldlm_lock *lock)
+static int mdc_cancel_weight(struct ldlm_lock *lock)
{
if (lock->l_resource->lr_type != LDLM_IBITS)
return 0;
@@ -2314,12 +2285,14 @@ static int mdc_setup(struct obd_device *obd, struct lustre_cfg *cfg)
return -ENOMEM;
mdc_init_rpc_lock(cli->cl_rpc_lock);
- ptlrpcd_addref();
+ rc = ptlrpcd_addref();
+ if (rc < 0)
+ goto err_rpc_lock;
cli->cl_close_lock = kzalloc(sizeof(*cli->cl_close_lock), GFP_NOFS);
if (!cli->cl_close_lock) {
rc = -ENOMEM;
- goto err_rpc_lock;
+ goto err_ptlrpcd_decref;
}
mdc_init_rpc_lock(cli->cl_close_lock);
@@ -2331,7 +2304,7 @@ static int mdc_setup(struct obd_device *obd, struct lustre_cfg *cfg)
sptlrpc_lprocfs_cliobd_attach(obd);
ptlrpc_lprocfs_register_obd(obd);
- ns_register_cancel(obd->obd_namespace, mdc_cancel_for_recovery);
+ ns_register_cancel(obd->obd_namespace, mdc_cancel_weight);
obd->obd_namespace->ns_lvbo = &inode_lvbo;
@@ -2345,9 +2318,10 @@ static int mdc_setup(struct obd_device *obd, struct lustre_cfg *cfg)
err_close_lock:
kfree(cli->cl_close_lock);
+err_ptlrpcd_decref:
+ ptlrpcd_decref();
err_rpc_lock:
kfree(cli->cl_rpc_lock);
- ptlrpcd_decref();
return rc;
}
@@ -2430,41 +2404,6 @@ static int mdc_process_config(struct obd_device *obd, u32 len, void *buf)
return rc;
}
-/* get remote permission for current user on fid */
-static int mdc_get_remote_perm(struct obd_export *exp, const struct lu_fid *fid,
- __u32 suppgid, struct ptlrpc_request **request)
-{
- struct ptlrpc_request *req;
- int rc;
-
- LASSERT(client_is_remote(exp));
-
- *request = NULL;
- req = ptlrpc_request_alloc(class_exp2cliimp(exp), &RQF_MDS_GETATTR);
- if (!req)
- return -ENOMEM;
-
- rc = ptlrpc_request_pack(req, LUSTRE_MDS_VERSION, MDS_GETATTR);
- if (rc) {
- ptlrpc_request_free(req);
- return rc;
- }
-
- mdc_pack_body(req, fid, OBD_MD_FLRMTPERM, 0, suppgid, 0);
-
- req_capsule_set_size(&req->rq_pill, &RMF_ACL, RCL_SERVER,
- sizeof(struct mdt_remote_perm));
-
- ptlrpc_request_set_replen(req);
-
- rc = ptlrpc_queue_wait(req);
- if (rc)
- ptlrpc_req_finished(req);
- else
- *request = req;
- return rc;
-}
-
static struct obd_ops mdc_obd_ops = {
.owner = THIS_MODULE,
.setup = mdc_setup,
@@ -2516,7 +2455,6 @@ static struct md_ops mdc_md_ops = {
.free_lustre_md = mdc_free_lustre_md,
.set_open_replay_data = mdc_set_open_replay_data,
.clear_open_replay_data = mdc_clear_open_replay_data,
- .get_remote_perm = mdc_get_remote_perm,
.intent_getattr_async = mdc_intent_getattr_async,
.revalidate_lock = mdc_revalidate_lock
};
diff --git a/drivers/staging/lustre/lustre/mgc/lproc_mgc.c b/drivers/staging/lustre/lustre/mgc/lproc_mgc.c
index 8d5bc5a751a4..0735220b2a18 100644
--- a/drivers/staging/lustre/lustre/mgc/lproc_mgc.c
+++ b/drivers/staging/lustre/lustre/mgc/lproc_mgc.c
@@ -15,11 +15,7 @@
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
*
* GPL HEADER END
*/
diff --git a/drivers/staging/lustre/lustre/mgc/mgc_internal.h b/drivers/staging/lustre/lustre/mgc/mgc_internal.h
index 82fb8f46e037..f146f7521c92 100644
--- a/drivers/staging/lustre/lustre/mgc/mgc_internal.h
+++ b/drivers/staging/lustre/lustre/mgc/mgc_internal.h
@@ -15,11 +15,7 @@
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
*
* GPL HEADER END
*/
diff --git a/drivers/staging/lustre/lustre/mgc/mgc_request.c b/drivers/staging/lustre/lustre/mgc/mgc_request.c
index 3924b095bfb0..9d0bd4745865 100644
--- a/drivers/staging/lustre/lustre/mgc/mgc_request.c
+++ b/drivers/staging/lustre/lustre/mgc/mgc_request.c
@@ -15,11 +15,7 @@
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
*
* GPL HEADER END
*/
@@ -500,10 +496,16 @@ static void do_requeue(struct config_llog_data *cld)
* export which is being disconnected. Take the client
* semaphore to make the check non-racy.
*/
- down_read(&cld->cld_mgcexp->exp_obd->u.cli.cl_sem);
+ down_read_nested(&cld->cld_mgcexp->exp_obd->u.cli.cl_sem,
+ OBD_CLI_SEM_MGC);
+
if (cld->cld_mgcexp->exp_obd->u.cli.cl_conn_count != 0) {
+ int rc;
+
CDEBUG(D_MGC, "updating log %s\n", cld->cld_logname);
- mgc_process_log(cld->cld_mgcexp->exp_obd, cld);
+ rc = mgc_process_log(cld->cld_mgcexp->exp_obd, cld);
+ if (rc && rc != -ENOENT)
+ CERROR("failed processing log: %d\n", rc);
} else {
CDEBUG(D_MGC, "disconnecting, won't update log %s\n",
cld->cld_logname);
@@ -734,7 +736,9 @@ static int mgc_setup(struct obd_device *obd, struct lustre_cfg *lcfg)
struct task_struct *task;
int rc;
- ptlrpcd_addref();
+ rc = ptlrpcd_addref();
+ if (rc < 0)
+ goto err_noref;
rc = client_obd_setup(obd, lcfg);
if (rc)
@@ -773,6 +777,7 @@ err_cleanup:
client_obd_cleanup(obd);
err_decref:
ptlrpcd_decref();
+err_noref:
return rc;
}
@@ -1027,7 +1032,7 @@ static int mgc_set_info_async(const struct lu_env *env, struct obd_export *exp,
rc = sptlrpc_parse_flavor(val, &flvr);
if (rc) {
CERROR("invalid sptlrpc flavor %s to MGS\n",
- (char *) val);
+ (char *)val);
return rc;
}
@@ -1043,7 +1048,7 @@ static int mgc_set_info_async(const struct lu_env *env, struct obd_export *exp,
sptlrpc_flavor2name(&cli->cl_flvr_mgc,
str, sizeof(str));
LCONSOLE_ERROR("asking sptlrpc flavor %s to MGS but currently %s is in use\n",
- (char *) val, str);
+ (char *)val, str);
rc = -EPERM;
}
return rc;
@@ -1720,7 +1725,6 @@ static int mgc_process_config(struct obd_device *obd, u32 len, void *buf)
CERROR("Unknown command: %d\n", lcfg->lcfg_command);
rc = -EINVAL;
goto out;
-
}
}
out:
diff --git a/drivers/staging/lustre/lustre/obdclass/Makefile b/drivers/staging/lustre/lustre/obdclass/Makefile
index c404eb3864ff..df7e47f35a66 100644
--- a/drivers/staging/lustre/lustre/obdclass/Makefile
+++ b/drivers/staging/lustre/lustre/obdclass/Makefile
@@ -5,5 +5,4 @@ obdclass-y := linux/linux-module.o linux/linux-obdo.o linux/linux-sysctl.o \
genops.o uuid.o lprocfs_status.o lprocfs_counters.o \
lustre_handles.o lustre_peer.o statfs_pack.o \
obdo.o obd_config.o obd_mount.o lu_object.o lu_ref.o \
- cl_object.o cl_page.o cl_lock.o cl_io.o \
- acl.o kernelcomm.o
+ cl_object.o cl_page.o cl_lock.o cl_io.o kernelcomm.o
diff --git a/drivers/staging/lustre/lustre/obdclass/acl.c b/drivers/staging/lustre/lustre/obdclass/acl.c
deleted file mode 100644
index 0e02ae97b7ed..000000000000
--- a/drivers/staging/lustre/lustre/obdclass/acl.c
+++ /dev/null
@@ -1,415 +0,0 @@
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2012, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * lustre/obdclass/acl.c
- *
- * Lustre Access Control List.
- *
- * Author: Fan Yong <fanyong@clusterfs.com>
- */
-
-#define DEBUG_SUBSYSTEM S_SEC
-#include "../include/lu_object.h"
-#include "../include/lustre_acl.h"
-#include "../include/lustre_eacl.h"
-#include "../include/obd_support.h"
-
-#ifdef CONFIG_FS_POSIX_ACL
-
-#define CFS_ACL_XATTR_VERSION POSIX_ACL_XATTR_VERSION
-
-enum {
- ES_UNK = 0, /* unknown stat */
- ES_UNC = 1, /* ACL entry is not changed */
- ES_MOD = 2, /* ACL entry is modified */
- ES_ADD = 3, /* ACL entry is added */
- ES_DEL = 4 /* ACL entry is deleted */
-};
-
-static inline void lustre_ext_acl_le_to_cpu(ext_acl_xattr_entry *d,
- ext_acl_xattr_entry *s)
-{
- d->e_tag = le16_to_cpu(s->e_tag);
- d->e_perm = le16_to_cpu(s->e_perm);
- d->e_id = le32_to_cpu(s->e_id);
- d->e_stat = le32_to_cpu(s->e_stat);
-}
-
-static inline void lustre_ext_acl_cpu_to_le(ext_acl_xattr_entry *d,
- ext_acl_xattr_entry *s)
-{
- d->e_tag = cpu_to_le16(s->e_tag);
- d->e_perm = cpu_to_le16(s->e_perm);
- d->e_id = cpu_to_le32(s->e_id);
- d->e_stat = cpu_to_le32(s->e_stat);
-}
-
-static inline void lustre_posix_acl_le_to_cpu(posix_acl_xattr_entry *d,
- posix_acl_xattr_entry *s)
-{
- d->e_tag = le16_to_cpu(s->e_tag);
- d->e_perm = le16_to_cpu(s->e_perm);
- d->e_id = le32_to_cpu(s->e_id);
-}
-
-static inline void lustre_posix_acl_cpu_to_le(posix_acl_xattr_entry *d,
- posix_acl_xattr_entry *s)
-{
- d->e_tag = cpu_to_le16(s->e_tag);
- d->e_perm = cpu_to_le16(s->e_perm);
- d->e_id = cpu_to_le32(s->e_id);
-}
-
-/* if "new_count == 0", then "new = {a_version, NULL}", NOT NULL. */
-static int lustre_posix_acl_xattr_reduce_space(posix_acl_xattr_header **header,
- int old_count, int new_count)
-{
- int old_size = CFS_ACL_XATTR_SIZE(old_count, posix_acl_xattr);
- int new_size = CFS_ACL_XATTR_SIZE(new_count, posix_acl_xattr);
- posix_acl_xattr_header *new;
-
- if (unlikely(old_count <= new_count))
- return old_size;
-
- new = kmemdup(*header, new_size, GFP_NOFS);
- if (unlikely(!new))
- return -ENOMEM;
-
- kfree(*header);
- *header = new;
- return new_size;
-}
-
-/* if "new_count == 0", then "new = {0, NULL}", NOT NULL. */
-static int lustre_ext_acl_xattr_reduce_space(ext_acl_xattr_header **header,
- int old_count)
-{
- int ext_count = le32_to_cpu((*header)->a_count);
- int ext_size = CFS_ACL_XATTR_SIZE(ext_count, ext_acl_xattr);
- ext_acl_xattr_header *new;
-
- if (unlikely(old_count <= ext_count))
- return 0;
-
- new = kmemdup(*header, ext_size, GFP_NOFS);
- if (unlikely(!new))
- return -ENOMEM;
-
- kfree(*header);
- *header = new;
- return 0;
-}
-
-/*
- * Generate new extended ACL based on the posix ACL.
- */
-ext_acl_xattr_header *
-lustre_posix_acl_xattr_2ext(posix_acl_xattr_header *header, int size)
-{
- int count, i, esize;
- ext_acl_xattr_header *new;
-
- if (unlikely(size < 0))
- return ERR_PTR(-EINVAL);
- else if (!size)
- count = 0;
- else
- count = CFS_ACL_XATTR_COUNT(size, posix_acl_xattr);
- esize = CFS_ACL_XATTR_SIZE(count, ext_acl_xattr);
- new = kzalloc(esize, GFP_NOFS);
- if (unlikely(!new))
- return ERR_PTR(-ENOMEM);
-
- new->a_count = cpu_to_le32(count);
- for (i = 0; i < count; i++) {
- new->a_entries[i].e_tag = header->a_entries[i].e_tag;
- new->a_entries[i].e_perm = header->a_entries[i].e_perm;
- new->a_entries[i].e_id = header->a_entries[i].e_id;
- new->a_entries[i].e_stat = cpu_to_le32(ES_UNK);
- }
-
- return new;
-}
-EXPORT_SYMBOL(lustre_posix_acl_xattr_2ext);
-
-/*
- * Filter out the "nobody" entries in the posix ACL.
- */
-int lustre_posix_acl_xattr_filter(posix_acl_xattr_header *header, size_t size,
- posix_acl_xattr_header **out)
-{
- int count, i, j, rc = 0;
- __u32 id;
- posix_acl_xattr_header *new;
-
- if (!size)
- return 0;
- if (size < sizeof(*new))
- return -EINVAL;
-
- new = kzalloc(size, GFP_NOFS);
- if (unlikely(!new))
- return -ENOMEM;
-
- new->a_version = cpu_to_le32(CFS_ACL_XATTR_VERSION);
- count = CFS_ACL_XATTR_COUNT(size, posix_acl_xattr);
- for (i = 0, j = 0; i < count; i++) {
- id = le32_to_cpu(header->a_entries[i].e_id);
- switch (le16_to_cpu(header->a_entries[i].e_tag)) {
- case ACL_USER_OBJ:
- case ACL_GROUP_OBJ:
- case ACL_MASK:
- case ACL_OTHER:
- if (id != ACL_UNDEFINED_ID) {
- rc = -EIO;
- goto _out;
- }
-
- memcpy(&new->a_entries[j++], &header->a_entries[i],
- sizeof(posix_acl_xattr_entry));
- break;
- case ACL_USER:
- if (id != NOBODY_UID)
- memcpy(&new->a_entries[j++],
- &header->a_entries[i],
- sizeof(posix_acl_xattr_entry));
- break;
- case ACL_GROUP:
- if (id != NOBODY_GID)
- memcpy(&new->a_entries[j++],
- &header->a_entries[i],
- sizeof(posix_acl_xattr_entry));
- break;
- default:
- rc = -EIO;
- goto _out;
- }
- }
-
- /* free unused space. */
- rc = lustre_posix_acl_xattr_reduce_space(&new, count, j);
- if (rc >= 0) {
- size = rc;
- *out = new;
- rc = 0;
- }
-
-_out:
- if (rc) {
- kfree(new);
- size = rc;
- }
- return size;
-}
-EXPORT_SYMBOL(lustre_posix_acl_xattr_filter);
-
-/*
- * Release the extended ACL space.
- */
-void lustre_ext_acl_xattr_free(ext_acl_xattr_header *header)
-{
- kfree(header);
-}
-EXPORT_SYMBOL(lustre_ext_acl_xattr_free);
-
-static ext_acl_xattr_entry *
-lustre_ext_acl_xattr_search(ext_acl_xattr_header *header,
- posix_acl_xattr_entry *entry, int *pos)
-{
- int once, start, end, i, j, count = le32_to_cpu(header->a_count);
-
- once = 0;
- start = *pos;
- end = count;
-
-again:
- for (i = start; i < end; i++) {
- if (header->a_entries[i].e_tag == entry->e_tag &&
- header->a_entries[i].e_id == entry->e_id) {
- j = i;
- if (++i >= count)
- i = 0;
- *pos = i;
- return &header->a_entries[j];
- }
- }
-
- if (!once) {
- once = 1;
- start = 0;
- end = *pos;
- goto again;
- }
-
- return NULL;
-}
-
-/*
- * Merge the posix ACL and the extended ACL into new extended ACL.
- */
-ext_acl_xattr_header *
-lustre_acl_xattr_merge2ext(posix_acl_xattr_header *posix_header, int size,
- ext_acl_xattr_header *ext_header)
-{
- int ori_ext_count, posix_count, ext_count, ext_size;
- int i, j, pos = 0, rc = 0;
- posix_acl_xattr_entry pae;
- ext_acl_xattr_header *new;
- ext_acl_xattr_entry *ee, eae;
-
- if (unlikely(size < 0))
- return ERR_PTR(-EINVAL);
- else if (!size)
- posix_count = 0;
- else
- posix_count = CFS_ACL_XATTR_COUNT(size, posix_acl_xattr);
- ori_ext_count = le32_to_cpu(ext_header->a_count);
- ext_count = posix_count + ori_ext_count;
- ext_size = CFS_ACL_XATTR_SIZE(ext_count, ext_acl_xattr);
-
- new = kzalloc(ext_size, GFP_NOFS);
- if (unlikely(!new))
- return ERR_PTR(-ENOMEM);
-
- for (i = 0, j = 0; i < posix_count; i++) {
- lustre_posix_acl_le_to_cpu(&pae, &posix_header->a_entries[i]);
- switch (pae.e_tag) {
- case ACL_USER_OBJ:
- case ACL_GROUP_OBJ:
- case ACL_MASK:
- case ACL_OTHER:
- if (pae.e_id != ACL_UNDEFINED_ID) {
- rc = -EIO;
- goto out;
- }
- case ACL_USER:
- /* ignore "nobody" entry. */
- if (pae.e_id == NOBODY_UID)
- break;
-
- new->a_entries[j].e_tag =
- posix_header->a_entries[i].e_tag;
- new->a_entries[j].e_perm =
- posix_header->a_entries[i].e_perm;
- new->a_entries[j].e_id =
- posix_header->a_entries[i].e_id;
- ee = lustre_ext_acl_xattr_search(ext_header,
- &posix_header->a_entries[i], &pos);
- if (ee) {
- if (posix_header->a_entries[i].e_perm !=
- ee->e_perm)
- /* entry modified. */
- ee->e_stat =
- new->a_entries[j++].e_stat =
- cpu_to_le32(ES_MOD);
- else
- /* entry unchanged. */
- ee->e_stat =
- new->a_entries[j++].e_stat =
- cpu_to_le32(ES_UNC);
- } else {
- /* new entry. */
- new->a_entries[j++].e_stat =
- cpu_to_le32(ES_ADD);
- }
- break;
- case ACL_GROUP:
- /* ignore "nobody" entry. */
- if (pae.e_id == NOBODY_GID)
- break;
- new->a_entries[j].e_tag =
- posix_header->a_entries[i].e_tag;
- new->a_entries[j].e_perm =
- posix_header->a_entries[i].e_perm;
- new->a_entries[j].e_id =
- posix_header->a_entries[i].e_id;
- ee = lustre_ext_acl_xattr_search(ext_header,
- &posix_header->a_entries[i], &pos);
- if (ee) {
- if (posix_header->a_entries[i].e_perm !=
- ee->e_perm)
- /* entry modified. */
- ee->e_stat =
- new->a_entries[j++].e_stat =
- cpu_to_le32(ES_MOD);
- else
- /* entry unchanged. */
- ee->e_stat =
- new->a_entries[j++].e_stat =
- cpu_to_le32(ES_UNC);
- } else {
- /* new entry. */
- new->a_entries[j++].e_stat =
- cpu_to_le32(ES_ADD);
- }
- break;
- default:
- rc = -EIO;
- goto out;
- }
- }
-
- /* process deleted entries. */
- for (i = 0; i < ori_ext_count; i++) {
- lustre_ext_acl_le_to_cpu(&eae, &ext_header->a_entries[i]);
- if (eae.e_stat == ES_UNK) {
- /* ignore "nobody" entry. */
- if ((eae.e_tag == ACL_USER && eae.e_id == NOBODY_UID) ||
- (eae.e_tag == ACL_GROUP && eae.e_id == NOBODY_GID))
- continue;
-
- new->a_entries[j].e_tag =
- ext_header->a_entries[i].e_tag;
- new->a_entries[j].e_perm =
- ext_header->a_entries[i].e_perm;
- new->a_entries[j].e_id = ext_header->a_entries[i].e_id;
- new->a_entries[j++].e_stat = cpu_to_le32(ES_DEL);
- }
- }
-
- new->a_count = cpu_to_le32(j);
- /* free unused space. */
- rc = lustre_ext_acl_xattr_reduce_space(&new, ext_count);
-
-out:
- if (rc) {
- kfree(new);
- new = ERR_PTR(rc);
- }
- return new;
-}
-EXPORT_SYMBOL(lustre_acl_xattr_merge2ext);
-
-#endif
diff --git a/drivers/staging/lustre/lustre/obdclass/cl_internal.h b/drivers/staging/lustre/lustre/obdclass/cl_internal.h
index 7eb0ad7b3644..e866754a42d5 100644
--- a/drivers/staging/lustre/lustre/obdclass/cl_internal.h
+++ b/drivers/staging/lustre/lustre/obdclass/cl_internal.h
@@ -15,11 +15,7 @@
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
*
* GPL HEADER END
*/
diff --git a/drivers/staging/lustre/lustre/obdclass/cl_io.c b/drivers/staging/lustre/lustre/obdclass/cl_io.c
index f5128b4f176f..e72f1fc00a13 100644
--- a/drivers/staging/lustre/lustre/obdclass/cl_io.c
+++ b/drivers/staging/lustre/lustre/obdclass/cl_io.c
@@ -15,11 +15,7 @@
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
*
* GPL HEADER END
*/
@@ -36,6 +32,7 @@
* Client IO.
*
* Author: Nikita Danilov <nikita.danilov@sun.com>
+ * Author: Jinshan Xiong <jinshan.xiong@intel.com>
*/
#define DEBUG_SUBSYSTEM S_CLASS
@@ -132,6 +129,7 @@ void cl_io_fini(const struct lu_env *env, struct cl_io *io)
case CIT_WRITE:
break;
case CIT_FAULT:
+ break;
case CIT_FSYNC:
LASSERT(!io->ci_need_restart);
break;
@@ -159,7 +157,6 @@ static int cl_io_init0(const struct lu_env *env, struct cl_io *io,
io->ci_type = iot;
INIT_LIST_HEAD(&io->ci_lockset.cls_todo);
- INIT_LIST_HEAD(&io->ci_lockset.cls_curr);
INIT_LIST_HEAD(&io->ci_lockset.cls_done);
INIT_LIST_HEAD(&io->ci_layers);
@@ -241,37 +238,7 @@ static int cl_lock_descr_sort(const struct cl_lock_descr *d0,
const struct cl_lock_descr *d1)
{
return lu_fid_cmp(lu_object_fid(&d0->cld_obj->co_lu),
- lu_object_fid(&d1->cld_obj->co_lu)) ?:
- __diff_normalize(d0->cld_start, d1->cld_start);
-}
-
-static int cl_lock_descr_cmp(const struct cl_lock_descr *d0,
- const struct cl_lock_descr *d1)
-{
- int ret;
-
- ret = lu_fid_cmp(lu_object_fid(&d0->cld_obj->co_lu),
- lu_object_fid(&d1->cld_obj->co_lu));
- if (ret)
- return ret;
- if (d0->cld_end < d1->cld_start)
- return -1;
- if (d0->cld_start > d0->cld_end)
- return 1;
- return 0;
-}
-
-static void cl_lock_descr_merge(struct cl_lock_descr *d0,
- const struct cl_lock_descr *d1)
-{
- d0->cld_start = min(d0->cld_start, d1->cld_start);
- d0->cld_end = max(d0->cld_end, d1->cld_end);
-
- if (d1->cld_mode == CLM_WRITE && d0->cld_mode != CLM_WRITE)
- d0->cld_mode = CLM_WRITE;
-
- if (d1->cld_mode == CLM_GROUP && d0->cld_mode != CLM_GROUP)
- d0->cld_mode = CLM_GROUP;
+ lu_object_fid(&d1->cld_obj->co_lu));
}
/*
@@ -320,33 +287,35 @@ static void cl_io_locks_sort(struct cl_io *io)
} while (!done);
}
-/**
- * Check whether \a queue contains locks matching \a need.
- *
- * \retval +ve there is a matching lock in the \a queue
- * \retval 0 there are no matching locks in the \a queue
- */
-int cl_queue_match(const struct list_head *queue,
- const struct cl_lock_descr *need)
+static void cl_lock_descr_merge(struct cl_lock_descr *d0,
+ const struct cl_lock_descr *d1)
{
- struct cl_io_lock_link *scan;
+ d0->cld_start = min(d0->cld_start, d1->cld_start);
+ d0->cld_end = max(d0->cld_end, d1->cld_end);
- list_for_each_entry(scan, queue, cill_linkage) {
- if (cl_lock_descr_match(&scan->cill_descr, need))
- return 1;
- }
- return 0;
+ if (d1->cld_mode == CLM_WRITE && d0->cld_mode != CLM_WRITE)
+ d0->cld_mode = CLM_WRITE;
+
+ if (d1->cld_mode == CLM_GROUP && d0->cld_mode != CLM_GROUP)
+ d0->cld_mode = CLM_GROUP;
}
-EXPORT_SYMBOL(cl_queue_match);
-static int cl_queue_merge(const struct list_head *queue,
- const struct cl_lock_descr *need)
+static int cl_lockset_merge(const struct cl_lockset *set,
+ const struct cl_lock_descr *need)
{
struct cl_io_lock_link *scan;
- list_for_each_entry(scan, queue, cill_linkage) {
- if (cl_lock_descr_cmp(&scan->cill_descr, need))
+ list_for_each_entry(scan, &set->cls_todo, cill_linkage) {
+ if (!cl_object_same(scan->cill_descr.cld_obj, need->cld_obj))
continue;
+
+ /* Merge locks for the same object because ldlm lock server
+ * may expand the lock extent, otherwise there is a deadlock
+ * case if two conflicted locks are queueud for the same object
+ * and lock server expands one lock to overlap the another.
+ * The side effect is that it can generate a multi-stripe lock
+ * that may cause casacading problem
+ */
cl_lock_descr_merge(&scan->cill_descr, need);
CDEBUG(D_VFSTRACE, "lock: %d: [%lu, %lu]\n",
scan->cill_descr.cld_mode, scan->cill_descr.cld_start,
@@ -356,87 +325,20 @@ static int cl_queue_merge(const struct list_head *queue,
return 0;
}
-static int cl_lockset_match(const struct cl_lockset *set,
- const struct cl_lock_descr *need)
-{
- return cl_queue_match(&set->cls_curr, need) ||
- cl_queue_match(&set->cls_done, need);
-}
-
-static int cl_lockset_merge(const struct cl_lockset *set,
- const struct cl_lock_descr *need)
-{
- return cl_queue_merge(&set->cls_todo, need) ||
- cl_lockset_match(set, need);
-}
-
-static int cl_lockset_lock_one(const struct lu_env *env,
- struct cl_io *io, struct cl_lockset *set,
- struct cl_io_lock_link *link)
-{
- struct cl_lock *lock;
- int result;
-
- lock = cl_lock_request(env, io, &link->cill_descr, "io", io);
-
- if (!IS_ERR(lock)) {
- link->cill_lock = lock;
- list_move(&link->cill_linkage, &set->cls_curr);
- if (!(link->cill_descr.cld_enq_flags & CEF_ASYNC)) {
- result = cl_wait(env, lock);
- if (result == 0)
- list_move(&link->cill_linkage, &set->cls_done);
- } else
- result = 0;
- } else
- result = PTR_ERR(lock);
- return result;
-}
-
-static void cl_lock_link_fini(const struct lu_env *env, struct cl_io *io,
- struct cl_io_lock_link *link)
-{
- struct cl_lock *lock = link->cill_lock;
-
- list_del_init(&link->cill_linkage);
- if (lock) {
- cl_lock_release(env, lock, "io", io);
- link->cill_lock = NULL;
- }
- if (link->cill_fini)
- link->cill_fini(env, link);
-}
-
static int cl_lockset_lock(const struct lu_env *env, struct cl_io *io,
struct cl_lockset *set)
{
struct cl_io_lock_link *link;
struct cl_io_lock_link *temp;
- struct cl_lock *lock;
int result;
result = 0;
list_for_each_entry_safe(link, temp, &set->cls_todo, cill_linkage) {
- if (!cl_lockset_match(set, &link->cill_descr)) {
- /* XXX some locking to guarantee that locks aren't
- * expanded in between.
- */
- result = cl_lockset_lock_one(env, io, set, link);
- if (result != 0)
- break;
- } else
- cl_lock_link_fini(env, io, link);
- }
- if (result == 0) {
- list_for_each_entry_safe(link, temp,
- &set->cls_curr, cill_linkage) {
- lock = link->cill_lock;
- result = cl_wait(env, lock);
- if (result == 0)
- list_move(&link->cill_linkage, &set->cls_done);
- else
- break;
- }
+ result = cl_lock_request(env, io, &link->cill_lock);
+ if (result < 0)
+ break;
+
+ list_move(&link->cill_linkage, &set->cls_done);
}
return result;
}
@@ -492,16 +394,19 @@ void cl_io_unlock(const struct lu_env *env, struct cl_io *io)
set = &io->ci_lockset;
- list_for_each_entry_safe(link, temp, &set->cls_todo, cill_linkage)
- cl_lock_link_fini(env, io, link);
-
- list_for_each_entry_safe(link, temp, &set->cls_curr, cill_linkage)
- cl_lock_link_fini(env, io, link);
+ list_for_each_entry_safe(link, temp, &set->cls_todo, cill_linkage) {
+ list_del_init(&link->cill_linkage);
+ if (link->cill_fini)
+ link->cill_fini(env, link);
+ }
list_for_each_entry_safe(link, temp, &set->cls_done, cill_linkage) {
- cl_unuse(env, link->cill_lock);
- cl_lock_link_fini(env, io, link);
+ list_del_init(&link->cill_linkage);
+ cl_lock_release(env, &link->cill_lock);
+ if (link->cill_fini)
+ link->cill_fini(env, link);
}
+
cl_io_for_each_reverse(scan, io) {
if (scan->cis_iop->op[io->ci_type].cio_unlock)
scan->cis_iop->op[io->ci_type].cio_unlock(env, scan);
@@ -595,9 +500,9 @@ int cl_io_lock_add(const struct lu_env *env, struct cl_io *io,
{
int result;
- if (cl_lockset_merge(&io->ci_lockset, &link->cill_descr))
+ if (cl_lockset_merge(&io->ci_lockset, &link->cill_descr)) {
result = 1;
- else {
+ } else {
list_add(&link->cill_linkage, &io->ci_lockset.cls_todo);
result = 0;
}
@@ -627,8 +532,9 @@ int cl_io_lock_alloc_add(const struct lu_env *env, struct cl_io *io,
result = cl_io_lock_add(env, io, link);
if (result) /* lock match */
link->cill_fini(env, link);
- } else
+ } else {
result = -ENOMEM;
+ }
return result;
}
@@ -692,42 +598,6 @@ cl_io_slice_page(const struct cl_io_slice *ios, struct cl_page *page)
}
/**
- * True iff \a page is within \a io range.
- */
-static int cl_page_in_io(const struct cl_page *page, const struct cl_io *io)
-{
- int result = 1;
- loff_t start;
- loff_t end;
- pgoff_t idx;
-
- idx = page->cp_index;
- switch (io->ci_type) {
- case CIT_READ:
- case CIT_WRITE:
- /*
- * check that [start, end) and [pos, pos + count) extents
- * overlap.
- */
- if (!cl_io_is_append(io)) {
- const struct cl_io_rw_common *crw = &(io->u.ci_rw);
-
- start = cl_offset(page->cp_obj, idx);
- end = cl_offset(page->cp_obj, idx + 1);
- result = crw->crw_pos < end &&
- start < crw->crw_pos + crw->crw_count;
- }
- break;
- case CIT_FAULT:
- result = io->u.ci_fault.ft_index == idx;
- break;
- default:
- LBUG();
- }
- return result;
-}
-
-/**
* Called by read io, when page has to be read from the server.
*
* \see cl_io_operations::cio_read_page()
@@ -742,7 +612,6 @@ int cl_io_read_page(const struct lu_env *env, struct cl_io *io,
LINVRNT(io->ci_type == CIT_READ || io->ci_type == CIT_FAULT);
LINVRNT(cl_page_is_owned(page, io));
LINVRNT(io->ci_state == CIS_IO_GOING || io->ci_state == CIS_LOCKED);
- LINVRNT(cl_page_in_io(page, io));
LINVRNT(cl_io_invariant(io));
queue = &io->ci_queue;
@@ -769,7 +638,7 @@ int cl_io_read_page(const struct lu_env *env, struct cl_io *io,
break;
}
}
- if (result == 0)
+ if (result == 0 && queue->c2_qin.pl_nr > 0)
result = cl_io_submit_rw(env, io, CRT_READ, queue);
/*
* Unlock unsent pages in case of error.
@@ -781,77 +650,29 @@ int cl_io_read_page(const struct lu_env *env, struct cl_io *io,
EXPORT_SYMBOL(cl_io_read_page);
/**
- * Called by write io to prepare page to receive data from user buffer.
- *
- * \see cl_io_operations::cio_prepare_write()
- */
-int cl_io_prepare_write(const struct lu_env *env, struct cl_io *io,
- struct cl_page *page, unsigned from, unsigned to)
-{
- const struct cl_io_slice *scan;
- int result = 0;
-
- LINVRNT(io->ci_type == CIT_WRITE);
- LINVRNT(cl_page_is_owned(page, io));
- LINVRNT(io->ci_state == CIS_IO_GOING || io->ci_state == CIS_LOCKED);
- LINVRNT(cl_io_invariant(io));
- LASSERT(cl_page_in_io(page, io));
-
- cl_io_for_each_reverse(scan, io) {
- if (scan->cis_iop->cio_prepare_write) {
- const struct cl_page_slice *slice;
-
- slice = cl_io_slice_page(scan, page);
- result = scan->cis_iop->cio_prepare_write(env, scan,
- slice,
- from, to);
- if (result != 0)
- break;
- }
- }
- return result;
-}
-EXPORT_SYMBOL(cl_io_prepare_write);
-
-/**
- * Called by write io after user data were copied into a page.
+ * Commit a list of contiguous pages into writeback cache.
*
- * \see cl_io_operations::cio_commit_write()
+ * \returns 0 if all pages committed, or errcode if error occurred.
+ * \see cl_io_operations::cio_commit_async()
*/
-int cl_io_commit_write(const struct lu_env *env, struct cl_io *io,
- struct cl_page *page, unsigned from, unsigned to)
+int cl_io_commit_async(const struct lu_env *env, struct cl_io *io,
+ struct cl_page_list *queue, int from, int to,
+ cl_commit_cbt cb)
{
const struct cl_io_slice *scan;
int result = 0;
- LINVRNT(io->ci_type == CIT_WRITE);
- LINVRNT(io->ci_state == CIS_IO_GOING || io->ci_state == CIS_LOCKED);
- LINVRNT(cl_io_invariant(io));
- /*
- * XXX Uh... not nice. Top level cl_io_commit_write() call (vvp->lov)
- * already called cl_page_cache_add(), moving page into CPS_CACHED
- * state. Better (and more general) way of dealing with such situation
- * is needed.
- */
- LASSERT(cl_page_is_owned(page, io) || page->cp_parent);
- LASSERT(cl_page_in_io(page, io));
-
cl_io_for_each(scan, io) {
- if (scan->cis_iop->cio_commit_write) {
- const struct cl_page_slice *slice;
-
- slice = cl_io_slice_page(scan, page);
- result = scan->cis_iop->cio_commit_write(env, scan,
- slice,
- from, to);
- if (result != 0)
- break;
- }
+ if (!scan->cis_iop->cio_commit_async)
+ continue;
+ result = scan->cis_iop->cio_commit_async(env, scan, queue,
+ from, to, cb);
+ if (result != 0)
+ break;
}
- LINVRNT(result <= 0);
return result;
}
-EXPORT_SYMBOL(cl_io_commit_write);
+EXPORT_SYMBOL(cl_io_commit_async);
/**
* Submits a list of pages for immediate io.
@@ -869,13 +690,10 @@ int cl_io_submit_rw(const struct lu_env *env, struct cl_io *io,
const struct cl_io_slice *scan;
int result = 0;
- LINVRNT(crt < ARRAY_SIZE(scan->cis_iop->req_op));
-
cl_io_for_each(scan, io) {
- if (!scan->cis_iop->req_op[crt].cio_submit)
+ if (!scan->cis_iop->cio_submit)
continue;
- result = scan->cis_iop->req_op[crt].cio_submit(env, scan, crt,
- queue);
+ result = scan->cis_iop->cio_submit(env, scan, crt, queue);
if (result != 0)
break;
}
@@ -887,6 +705,9 @@ int cl_io_submit_rw(const struct lu_env *env, struct cl_io *io,
}
EXPORT_SYMBOL(cl_io_submit_rw);
+static void cl_page_list_assume(const struct lu_env *env,
+ struct cl_io *io, struct cl_page_list *plist);
+
/**
* Submit a sync_io and wait for the IO to be finished, or error happens.
* If \a timeout is zero, it means to wait for the IO unconditionally.
@@ -904,7 +725,7 @@ int cl_io_submit_sync(const struct lu_env *env, struct cl_io *io,
pg->cp_sync_io = anchor;
}
- cl_sync_io_init(anchor, queue->c2_qin.pl_nr);
+ cl_sync_io_init(anchor, queue->c2_qin.pl_nr, &cl_sync_io_end);
rc = cl_io_submit_rw(env, io, iot, queue);
if (rc == 0) {
/*
@@ -915,12 +736,12 @@ int cl_io_submit_sync(const struct lu_env *env, struct cl_io *io,
*/
cl_page_list_for_each(pg, &queue->c2_qin) {
pg->cp_sync_io = NULL;
- cl_sync_io_note(anchor, 1);
+ cl_sync_io_note(env, anchor, 1);
}
/* wait for the IO to be finished. */
- rc = cl_sync_io_wait(env, io, &queue->c2_qout,
- anchor, timeout);
+ rc = cl_sync_io_wait(env, anchor, timeout);
+ cl_page_list_assume(env, io, &queue->c2_qout);
} else {
LASSERT(list_empty(&queue->c2_qout.pl_pages));
cl_page_list_for_each(pg, &queue->c2_qin)
@@ -931,26 +752,6 @@ int cl_io_submit_sync(const struct lu_env *env, struct cl_io *io,
EXPORT_SYMBOL(cl_io_submit_sync);
/**
- * Cancel an IO which has been submitted by cl_io_submit_rw.
- */
-static int cl_io_cancel(const struct lu_env *env, struct cl_io *io,
- struct cl_page_list *queue)
-{
- struct cl_page *page;
- int result = 0;
-
- CERROR("Canceling ongoing page transmission\n");
- cl_page_list_for_each(page, queue) {
- int rc;
-
- LINVRNT(cl_page_in_io(page, io));
- rc = cl_page_cancel(env, page);
- result = result ?: rc;
- }
- return result;
-}
-
-/**
* Main io loop.
*
* Pumps io through iterations calling
@@ -1072,8 +873,8 @@ EXPORT_SYMBOL(cl_page_list_add);
/**
* Removes a page from a page list.
*/
-static void cl_page_list_del(const struct lu_env *env,
- struct cl_page_list *plist, struct cl_page *page)
+void cl_page_list_del(const struct lu_env *env, struct cl_page_list *plist,
+ struct cl_page *page)
{
LASSERT(plist->pl_nr > 0);
LINVRNT(plist->pl_owner == current);
@@ -1086,6 +887,7 @@ static void cl_page_list_del(const struct lu_env *env,
lu_ref_del_at(&page->cp_reference, &page->cp_queue_ref, "queue", plist);
cl_page_put(env, page);
}
+EXPORT_SYMBOL(cl_page_list_del);
/**
* Moves a page from one page list to another.
@@ -1106,6 +908,24 @@ void cl_page_list_move(struct cl_page_list *dst, struct cl_page_list *src,
EXPORT_SYMBOL(cl_page_list_move);
/**
+ * Moves a page from one page list to the head of another list.
+ */
+void cl_page_list_move_head(struct cl_page_list *dst, struct cl_page_list *src,
+ struct cl_page *page)
+{
+ LASSERT(src->pl_nr > 0);
+ LINVRNT(dst->pl_owner == current);
+ LINVRNT(src->pl_owner == current);
+
+ list_move(&page->cp_batch, &dst->pl_pages);
+ --src->pl_nr;
+ ++dst->pl_nr;
+ lu_ref_set_at(&page->cp_reference, &page->cp_queue_ref, "queue",
+ src, dst);
+}
+EXPORT_SYMBOL(cl_page_list_move_head);
+
+/**
* splice the cl_page_list, just as list head does
*/
void cl_page_list_splice(struct cl_page_list *list, struct cl_page_list *head)
@@ -1162,8 +982,7 @@ EXPORT_SYMBOL(cl_page_list_disown);
/**
* Releases pages from queue.
*/
-static void cl_page_list_fini(const struct lu_env *env,
- struct cl_page_list *plist)
+void cl_page_list_fini(const struct lu_env *env, struct cl_page_list *plist)
{
struct cl_page *page;
struct cl_page *temp;
@@ -1174,6 +993,7 @@ static void cl_page_list_fini(const struct lu_env *env,
cl_page_list_del(env, plist, page);
LASSERT(plist->pl_nr == 0);
}
+EXPORT_SYMBOL(cl_page_list_fini);
/**
* Assumes all pages in a queue.
@@ -1260,7 +1080,7 @@ EXPORT_SYMBOL(cl_2queue_init_page);
/**
* Returns top-level io.
*
- * \see cl_object_top(), cl_page_top().
+ * \see cl_object_top()
*/
struct cl_io *cl_io_top(struct cl_io *io)
{
@@ -1323,19 +1143,14 @@ static int cl_req_init(const struct lu_env *env, struct cl_req *req,
int result;
result = 0;
- page = cl_page_top(page);
- do {
- list_for_each_entry(slice, &page->cp_layers, cpl_linkage) {
- dev = lu2cl_dev(slice->cpl_obj->co_lu.lo_dev);
- if (dev->cd_ops->cdo_req_init) {
- result = dev->cd_ops->cdo_req_init(env,
- dev, req);
- if (result != 0)
- break;
- }
+ list_for_each_entry(slice, &page->cp_layers, cpl_linkage) {
+ dev = lu2cl_dev(slice->cpl_obj->co_lu.lo_dev);
+ if (dev->cd_ops->cdo_req_init) {
+ result = dev->cd_ops->cdo_req_init(env, dev, req);
+ if (result != 0)
+ break;
}
- page = page->cp_child;
- } while (page && result == 0);
+ }
return result;
}
@@ -1384,14 +1199,16 @@ struct cl_req *cl_req_alloc(const struct lu_env *env, struct cl_page *page,
if (req->crq_o) {
req->crq_nrobjs = nr_objects;
result = cl_req_init(env, req, page);
- } else
+ } else {
result = -ENOMEM;
+ }
if (result != 0) {
cl_req_completion(env, req, result);
req = ERR_PTR(result);
}
- } else
+ } else {
req = ERR_PTR(-ENOMEM);
+ }
return req;
}
EXPORT_SYMBOL(cl_req_alloc);
@@ -1406,8 +1223,6 @@ void cl_req_page_add(const struct lu_env *env,
struct cl_req_obj *rqo;
int i;
- page = cl_page_top(page);
-
LASSERT(list_empty(&page->cp_flight));
LASSERT(!page->cp_req);
@@ -1438,8 +1253,6 @@ void cl_req_page_done(const struct lu_env *env, struct cl_page *page)
{
struct cl_req *req = page->cp_req;
- page = cl_page_top(page);
-
LASSERT(!list_empty(&page->cp_flight));
LASSERT(req->crq_nrpages > 0);
@@ -1511,25 +1324,39 @@ void cl_req_attr_set(const struct lu_env *env, struct cl_req *req,
}
EXPORT_SYMBOL(cl_req_attr_set);
+/* cl_sync_io_callback assumes the caller must call cl_sync_io_wait() to
+ * wait for the IO to finish.
+ */
+void cl_sync_io_end(const struct lu_env *env, struct cl_sync_io *anchor)
+{
+ wake_up_all(&anchor->csi_waitq);
+
+ /* it's safe to nuke or reuse anchor now */
+ atomic_set(&anchor->csi_barrier, 0);
+}
+EXPORT_SYMBOL(cl_sync_io_end);
/**
- * Initialize synchronous io wait anchor, for transfer of \a nrpages pages.
+ * Initialize synchronous io wait anchor
*/
-void cl_sync_io_init(struct cl_sync_io *anchor, int nrpages)
+void cl_sync_io_init(struct cl_sync_io *anchor, int nr,
+ void (*end)(const struct lu_env *, struct cl_sync_io *))
{
+ memset(anchor, 0, sizeof(*anchor));
init_waitqueue_head(&anchor->csi_waitq);
- atomic_set(&anchor->csi_sync_nr, nrpages);
- atomic_set(&anchor->csi_barrier, nrpages > 0);
+ atomic_set(&anchor->csi_sync_nr, nr);
+ atomic_set(&anchor->csi_barrier, nr > 0);
anchor->csi_sync_rc = 0;
+ anchor->csi_end_io = end;
+ LASSERT(end);
}
EXPORT_SYMBOL(cl_sync_io_init);
/**
- * Wait until all transfer completes. Transfer completion routine has to call
- * cl_sync_io_note() for every page.
+ * Wait until all IO completes. Transfer completion routine has to call
+ * cl_sync_io_note() for every entity.
*/
-int cl_sync_io_wait(const struct lu_env *env, struct cl_io *io,
- struct cl_page_list *queue, struct cl_sync_io *anchor,
+int cl_sync_io_wait(const struct lu_env *env, struct cl_sync_io *anchor,
long timeout)
{
struct l_wait_info lwi = LWI_TIMEOUT_INTR(cfs_time_seconds(timeout),
@@ -1542,11 +1369,9 @@ int cl_sync_io_wait(const struct lu_env *env, struct cl_io *io,
atomic_read(&anchor->csi_sync_nr) == 0,
&lwi);
if (rc < 0) {
- CERROR("SYNC IO failed with error: %d, try to cancel %d remaining pages\n",
+ CERROR("IO failed: %d, still wait for %d remaining entries\n",
rc, atomic_read(&anchor->csi_sync_nr));
- (void)cl_io_cancel(env, io, queue);
-
lwi = (struct l_wait_info) { 0 };
(void)l_wait_event(anchor->csi_waitq,
atomic_read(&anchor->csi_sync_nr) == 0,
@@ -1555,14 +1380,12 @@ int cl_sync_io_wait(const struct lu_env *env, struct cl_io *io,
rc = anchor->csi_sync_rc;
}
LASSERT(atomic_read(&anchor->csi_sync_nr) == 0);
- cl_page_list_assume(env, io, queue);
/* wait until cl_sync_io_note() has done wakeup */
while (unlikely(atomic_read(&anchor->csi_barrier) != 0)) {
cpu_relax();
}
- POISON(anchor, 0x5a, sizeof(*anchor));
return rc;
}
EXPORT_SYMBOL(cl_sync_io_wait);
@@ -1570,7 +1393,8 @@ EXPORT_SYMBOL(cl_sync_io_wait);
/**
* Indicate that transfer of a single page completed.
*/
-void cl_sync_io_note(struct cl_sync_io *anchor, int ioret)
+void cl_sync_io_note(const struct lu_env *env, struct cl_sync_io *anchor,
+ int ioret)
{
if (anchor->csi_sync_rc == 0 && ioret < 0)
anchor->csi_sync_rc = ioret;
@@ -1581,9 +1405,9 @@ void cl_sync_io_note(struct cl_sync_io *anchor, int ioret)
*/
LASSERT(atomic_read(&anchor->csi_sync_nr) > 0);
if (atomic_dec_and_test(&anchor->csi_sync_nr)) {
- wake_up_all(&anchor->csi_waitq);
- /* it's safe to nuke or reuse anchor now */
- atomic_set(&anchor->csi_barrier, 0);
+ LASSERT(anchor->csi_end_io);
+ anchor->csi_end_io(env, anchor);
+ /* Can't access anchor any more */
}
}
EXPORT_SYMBOL(cl_sync_io_note);
diff --git a/drivers/staging/lustre/lustre/obdclass/cl_lock.c b/drivers/staging/lustre/lustre/obdclass/cl_lock.c
index aec644eb4db9..9d7b5939b0fd 100644
--- a/drivers/staging/lustre/lustre/obdclass/cl_lock.c
+++ b/drivers/staging/lustre/lustre/obdclass/cl_lock.c
@@ -15,11 +15,7 @@
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
*
* GPL HEADER END
*/
@@ -36,6 +32,7 @@
* Client Extent Lock.
*
* Author: Nikita Danilov <nikita.danilov@sun.com>
+ * Author: Jinshan Xiong <jinshan.xiong@intel.com>
*/
#define DEBUG_SUBSYSTEM S_CLASS
@@ -47,138 +44,18 @@
#include "../include/cl_object.h"
#include "cl_internal.h"
-/** Lock class of cl_lock::cll_guard */
-static struct lock_class_key cl_lock_guard_class;
-static struct kmem_cache *cl_lock_kmem;
-
-static struct lu_kmem_descr cl_lock_caches[] = {
- {
- .ckd_cache = &cl_lock_kmem,
- .ckd_name = "cl_lock_kmem",
- .ckd_size = sizeof (struct cl_lock)
- },
- {
- .ckd_cache = NULL
- }
-};
-
-#define CS_LOCK_INC(o, item)
-#define CS_LOCK_DEC(o, item)
-#define CS_LOCKSTATE_INC(o, state)
-#define CS_LOCKSTATE_DEC(o, state)
-
-/**
- * Basic lock invariant that is maintained at all times. Caller either has a
- * reference to \a lock, or somehow assures that \a lock cannot be freed.
- *
- * \see cl_lock_invariant()
- */
-static int cl_lock_invariant_trusted(const struct lu_env *env,
- const struct cl_lock *lock)
-{
- return ergo(lock->cll_state == CLS_FREEING, lock->cll_holds == 0) &&
- atomic_read(&lock->cll_ref) >= lock->cll_holds &&
- lock->cll_holds >= lock->cll_users &&
- lock->cll_holds >= 0 &&
- lock->cll_users >= 0 &&
- lock->cll_depth >= 0;
-}
-
-/**
- * Stronger lock invariant, checking that caller has a reference on a lock.
- *
- * \see cl_lock_invariant_trusted()
- */
-static int cl_lock_invariant(const struct lu_env *env,
- const struct cl_lock *lock)
-{
- int result;
-
- result = atomic_read(&lock->cll_ref) > 0 &&
- cl_lock_invariant_trusted(env, lock);
- if (!result && env)
- CL_LOCK_DEBUG(D_ERROR, env, lock, "invariant broken\n");
- return result;
-}
-
-/**
- * Returns lock "nesting": 0 for a top-lock and 1 for a sub-lock.
- */
-static enum clt_nesting_level cl_lock_nesting(const struct cl_lock *lock)
-{
- return cl_object_header(lock->cll_descr.cld_obj)->coh_nesting;
-}
-
-/**
- * Returns a set of counters for this lock, depending on a lock nesting.
- */
-static struct cl_thread_counters *cl_lock_counters(const struct lu_env *env,
- const struct cl_lock *lock)
-{
- struct cl_thread_info *info;
- enum clt_nesting_level nesting;
-
- info = cl_env_info(env);
- nesting = cl_lock_nesting(lock);
- LASSERT(nesting < ARRAY_SIZE(info->clt_counters));
- return &info->clt_counters[nesting];
-}
-
static void cl_lock_trace0(int level, const struct lu_env *env,
const char *prefix, const struct cl_lock *lock,
const char *func, const int line)
{
struct cl_object_header *h = cl_object_header(lock->cll_descr.cld_obj);
- CDEBUG(level, "%s: %p@(%d %p %d %d %d %d %d %lx)(%p/%d/%d) at %s():%d\n",
- prefix, lock, atomic_read(&lock->cll_ref),
- lock->cll_guarder, lock->cll_depth,
- lock->cll_state, lock->cll_error, lock->cll_holds,
- lock->cll_users, lock->cll_flags,
- env, h->coh_nesting, cl_lock_nr_mutexed(env),
- func, line);
+ CDEBUG(level, "%s: %p (%p/%d) at %s():%d\n",
+ prefix, lock, env, h->coh_nesting, func, line);
}
-
-#define cl_lock_trace(level, env, prefix, lock) \
+#define cl_lock_trace(level, env, prefix, lock) \
cl_lock_trace0(level, env, prefix, lock, __func__, __LINE__)
-#define RETIP ((unsigned long)__builtin_return_address(0))
-
-#ifdef CONFIG_LOCKDEP
-static struct lock_class_key cl_lock_key;
-
-static void cl_lock_lockdep_init(struct cl_lock *lock)
-{
- lockdep_set_class_and_name(lock, &cl_lock_key, "EXT");
-}
-
-static void cl_lock_lockdep_acquire(const struct lu_env *env,
- struct cl_lock *lock, __u32 enqflags)
-{
- cl_lock_counters(env, lock)->ctc_nr_locks_acquired++;
- lock_map_acquire(&lock->dep_map);
-}
-
-static void cl_lock_lockdep_release(const struct lu_env *env,
- struct cl_lock *lock)
-{
- cl_lock_counters(env, lock)->ctc_nr_locks_acquired--;
- lock_release(&lock->dep_map, 0, RETIP);
-}
-
-#else /* !CONFIG_LOCKDEP */
-
-static void cl_lock_lockdep_init(struct cl_lock *lock)
-{}
-static void cl_lock_lockdep_acquire(const struct lu_env *env,
- struct cl_lock *lock, __u32 enqflags)
-{}
-static void cl_lock_lockdep_release(const struct lu_env *env,
- struct cl_lock *lock)
-{}
-
-#endif /* !CONFIG_LOCKDEP */
-
/**
* Adds lock slice to the compound lock.
*
@@ -199,62 +76,10 @@ void cl_lock_slice_add(struct cl_lock *lock, struct cl_lock_slice *slice,
}
EXPORT_SYMBOL(cl_lock_slice_add);
-/**
- * Returns true iff a lock with the mode \a has provides at least the same
- * guarantees as a lock with the mode \a need.
- */
-int cl_lock_mode_match(enum cl_lock_mode has, enum cl_lock_mode need)
-{
- LINVRNT(need == CLM_READ || need == CLM_WRITE ||
- need == CLM_PHANTOM || need == CLM_GROUP);
- LINVRNT(has == CLM_READ || has == CLM_WRITE ||
- has == CLM_PHANTOM || has == CLM_GROUP);
- CLASSERT(CLM_PHANTOM < CLM_READ);
- CLASSERT(CLM_READ < CLM_WRITE);
- CLASSERT(CLM_WRITE < CLM_GROUP);
-
- if (has != CLM_GROUP)
- return need <= has;
- else
- return need == has;
-}
-EXPORT_SYMBOL(cl_lock_mode_match);
-
-/**
- * Returns true iff extent portions of lock descriptions match.
- */
-int cl_lock_ext_match(const struct cl_lock_descr *has,
- const struct cl_lock_descr *need)
+void cl_lock_fini(const struct lu_env *env, struct cl_lock *lock)
{
- return
- has->cld_start <= need->cld_start &&
- has->cld_end >= need->cld_end &&
- cl_lock_mode_match(has->cld_mode, need->cld_mode) &&
- (has->cld_mode != CLM_GROUP || has->cld_gid == need->cld_gid);
-}
-EXPORT_SYMBOL(cl_lock_ext_match);
+ cl_lock_trace(D_DLMTRACE, env, "destroy lock", lock);
-/**
- * Returns true iff a lock with the description \a has provides at least the
- * same guarantees as a lock with the description \a need.
- */
-int cl_lock_descr_match(const struct cl_lock_descr *has,
- const struct cl_lock_descr *need)
-{
- return
- cl_object_same(has->cld_obj, need->cld_obj) &&
- cl_lock_ext_match(has, need);
-}
-EXPORT_SYMBOL(cl_lock_descr_match);
-
-static void cl_lock_free(const struct lu_env *env, struct cl_lock *lock)
-{
- struct cl_object *obj = lock->cll_descr.cld_obj;
-
- LINVRNT(!cl_lock_is_mutexed(lock));
-
- cl_lock_trace(D_DLMTRACE, env, "free lock", lock);
- might_sleep();
while (!list_empty(&lock->cll_layers)) {
struct cl_lock_slice *slice;
@@ -263,350 +88,36 @@ static void cl_lock_free(const struct lu_env *env, struct cl_lock *lock)
list_del_init(lock->cll_layers.next);
slice->cls_ops->clo_fini(env, slice);
}
- CS_LOCK_DEC(obj, total);
- CS_LOCKSTATE_DEC(obj, lock->cll_state);
- lu_object_ref_del_at(&obj->co_lu, &lock->cll_obj_ref, "cl_lock", lock);
- cl_object_put(env, obj);
- lu_ref_fini(&lock->cll_reference);
- lu_ref_fini(&lock->cll_holders);
- mutex_destroy(&lock->cll_guard);
- kmem_cache_free(cl_lock_kmem, lock);
-}
-
-/**
- * Releases a reference on a lock.
- *
- * When last reference is released, lock is returned to the cache, unless it
- * is in cl_lock_state::CLS_FREEING state, in which case it is destroyed
- * immediately.
- *
- * \see cl_object_put(), cl_page_put()
- */
-void cl_lock_put(const struct lu_env *env, struct cl_lock *lock)
-{
- struct cl_object *obj;
-
- LINVRNT(cl_lock_invariant(env, lock));
- obj = lock->cll_descr.cld_obj;
- LINVRNT(obj);
-
- CDEBUG(D_TRACE, "releasing reference: %d %p %lu\n",
- atomic_read(&lock->cll_ref), lock, RETIP);
-
- if (atomic_dec_and_test(&lock->cll_ref)) {
- if (lock->cll_state == CLS_FREEING) {
- LASSERT(list_empty(&lock->cll_linkage));
- cl_lock_free(env, lock);
- }
- CS_LOCK_DEC(obj, busy);
- }
-}
-EXPORT_SYMBOL(cl_lock_put);
-
-/**
- * Acquires an additional reference to a lock.
- *
- * This can be called only by caller already possessing a reference to \a
- * lock.
- *
- * \see cl_object_get(), cl_page_get()
- */
-void cl_lock_get(struct cl_lock *lock)
-{
- LINVRNT(cl_lock_invariant(NULL, lock));
- CDEBUG(D_TRACE, "acquiring reference: %d %p %lu\n",
- atomic_read(&lock->cll_ref), lock, RETIP);
- atomic_inc(&lock->cll_ref);
-}
-EXPORT_SYMBOL(cl_lock_get);
-
-/**
- * Acquires a reference to a lock.
- *
- * This is much like cl_lock_get(), except that this function can be used to
- * acquire initial reference to the cached lock. Caller has to deal with all
- * possible races. Use with care!
- *
- * \see cl_page_get_trust()
- */
-void cl_lock_get_trust(struct cl_lock *lock)
-{
- CDEBUG(D_TRACE, "acquiring trusted reference: %d %p %lu\n",
- atomic_read(&lock->cll_ref), lock, RETIP);
- if (atomic_inc_return(&lock->cll_ref) == 1)
- CS_LOCK_INC(lock->cll_descr.cld_obj, busy);
-}
-EXPORT_SYMBOL(cl_lock_get_trust);
-
-/**
- * Helper function destroying the lock that wasn't completely initialized.
- *
- * Other threads can acquire references to the top-lock through its
- * sub-locks. Hence, it cannot be cl_lock_free()-ed immediately.
- */
-static void cl_lock_finish(const struct lu_env *env, struct cl_lock *lock)
-{
- cl_lock_mutex_get(env, lock);
- cl_lock_cancel(env, lock);
- cl_lock_delete(env, lock);
- cl_lock_mutex_put(env, lock);
- cl_lock_put(env, lock);
-}
-
-static struct cl_lock *cl_lock_alloc(const struct lu_env *env,
- struct cl_object *obj,
- const struct cl_io *io,
- const struct cl_lock_descr *descr)
-{
- struct cl_lock *lock;
- struct lu_object_header *head;
-
- lock = kmem_cache_zalloc(cl_lock_kmem, GFP_NOFS);
- if (lock) {
- atomic_set(&lock->cll_ref, 1);
- lock->cll_descr = *descr;
- lock->cll_state = CLS_NEW;
- cl_object_get(obj);
- lu_object_ref_add_at(&obj->co_lu, &lock->cll_obj_ref, "cl_lock",
- lock);
- INIT_LIST_HEAD(&lock->cll_layers);
- INIT_LIST_HEAD(&lock->cll_linkage);
- INIT_LIST_HEAD(&lock->cll_inclosure);
- lu_ref_init(&lock->cll_reference);
- lu_ref_init(&lock->cll_holders);
- mutex_init(&lock->cll_guard);
- lockdep_set_class(&lock->cll_guard, &cl_lock_guard_class);
- init_waitqueue_head(&lock->cll_wq);
- head = obj->co_lu.lo_header;
- CS_LOCKSTATE_INC(obj, CLS_NEW);
- CS_LOCK_INC(obj, total);
- CS_LOCK_INC(obj, create);
- cl_lock_lockdep_init(lock);
- list_for_each_entry(obj, &head->loh_layers, co_lu.lo_linkage) {
- int err;
-
- err = obj->co_ops->coo_lock_init(env, obj, lock, io);
- if (err != 0) {
- cl_lock_finish(env, lock);
- lock = ERR_PTR(err);
- break;
- }
- }
- } else
- lock = ERR_PTR(-ENOMEM);
- return lock;
-}
-
-/**
- * Transfer the lock into INTRANSIT state and return the original state.
- *
- * \pre state: CLS_CACHED, CLS_HELD or CLS_ENQUEUED
- * \post state: CLS_INTRANSIT
- * \see CLS_INTRANSIT
- */
-static enum cl_lock_state cl_lock_intransit(const struct lu_env *env,
- struct cl_lock *lock)
-{
- enum cl_lock_state state = lock->cll_state;
-
- LASSERT(cl_lock_is_mutexed(lock));
- LASSERT(state != CLS_INTRANSIT);
- LASSERTF(state >= CLS_ENQUEUED && state <= CLS_CACHED,
- "Malformed lock state %d.\n", state);
-
- cl_lock_state_set(env, lock, CLS_INTRANSIT);
- lock->cll_intransit_owner = current;
- cl_lock_hold_add(env, lock, "intransit", current);
- return state;
-}
-
-/**
- * Exit the intransit state and restore the lock state to the original state
- */
-static void cl_lock_extransit(const struct lu_env *env, struct cl_lock *lock,
- enum cl_lock_state state)
-{
- LASSERT(cl_lock_is_mutexed(lock));
- LASSERT(lock->cll_state == CLS_INTRANSIT);
- LASSERT(state != CLS_INTRANSIT);
- LASSERT(lock->cll_intransit_owner == current);
-
- lock->cll_intransit_owner = NULL;
- cl_lock_state_set(env, lock, state);
- cl_lock_unhold(env, lock, "intransit", current);
-}
-
-/**
- * Checking whether the lock is intransit state
- */
-int cl_lock_is_intransit(struct cl_lock *lock)
-{
- LASSERT(cl_lock_is_mutexed(lock));
- return lock->cll_state == CLS_INTRANSIT &&
- lock->cll_intransit_owner != current;
-}
-EXPORT_SYMBOL(cl_lock_is_intransit);
-/**
- * Returns true iff lock is "suitable" for given io. E.g., locks acquired by
- * truncate and O_APPEND cannot be reused for read/non-append-write, as they
- * cover multiple stripes and can trigger cascading timeouts.
- */
-static int cl_lock_fits_into(const struct lu_env *env,
- const struct cl_lock *lock,
- const struct cl_lock_descr *need,
- const struct cl_io *io)
-{
- const struct cl_lock_slice *slice;
-
- LINVRNT(cl_lock_invariant_trusted(env, lock));
- list_for_each_entry(slice, &lock->cll_layers, cls_linkage) {
- if (slice->cls_ops->clo_fits_into &&
- !slice->cls_ops->clo_fits_into(env, slice, need, io))
- return 0;
- }
- return 1;
+ POISON(lock, 0x5a, sizeof(*lock));
}
+EXPORT_SYMBOL(cl_lock_fini);
-static struct cl_lock *cl_lock_lookup(const struct lu_env *env,
- struct cl_object *obj,
- const struct cl_io *io,
- const struct cl_lock_descr *need)
+int cl_lock_init(const struct lu_env *env, struct cl_lock *lock,
+ const struct cl_io *io)
{
- struct cl_lock *lock;
- struct cl_object_header *head;
-
- head = cl_object_header(obj);
- assert_spin_locked(&head->coh_lock_guard);
- CS_LOCK_INC(obj, lookup);
- list_for_each_entry(lock, &head->coh_locks, cll_linkage) {
- int matched;
-
- matched = cl_lock_ext_match(&lock->cll_descr, need) &&
- lock->cll_state < CLS_FREEING &&
- lock->cll_error == 0 &&
- !(lock->cll_flags & CLF_CANCELLED) &&
- cl_lock_fits_into(env, lock, need, io);
- CDEBUG(D_DLMTRACE, "has: "DDESCR"(%d) need: "DDESCR": %d\n",
- PDESCR(&lock->cll_descr), lock->cll_state, PDESCR(need),
- matched);
- if (matched) {
- cl_lock_get_trust(lock);
- CS_LOCK_INC(obj, hit);
- return lock;
- }
- }
- return NULL;
-}
-
-/**
- * Returns a lock matching description \a need.
- *
- * This is the main entry point into the cl_lock caching interface. First, a
- * cache (implemented as a per-object linked list) is consulted. If lock is
- * found there, it is returned immediately. Otherwise new lock is allocated
- * and returned. In any case, additional reference to lock is acquired.
- *
- * \see cl_object_find(), cl_page_find()
- */
-static struct cl_lock *cl_lock_find(const struct lu_env *env,
- const struct cl_io *io,
- const struct cl_lock_descr *need)
-{
- struct cl_object_header *head;
- struct cl_object *obj;
- struct cl_lock *lock;
-
- obj = need->cld_obj;
- head = cl_object_header(obj);
-
- spin_lock(&head->coh_lock_guard);
- lock = cl_lock_lookup(env, obj, io, need);
- spin_unlock(&head->coh_lock_guard);
-
- if (!lock) {
- lock = cl_lock_alloc(env, obj, io, need);
- if (!IS_ERR(lock)) {
- struct cl_lock *ghost;
-
- spin_lock(&head->coh_lock_guard);
- ghost = cl_lock_lookup(env, obj, io, need);
- if (!ghost) {
- cl_lock_get_trust(lock);
- list_add_tail(&lock->cll_linkage,
- &head->coh_locks);
- spin_unlock(&head->coh_lock_guard);
- CS_LOCK_INC(obj, busy);
- } else {
- spin_unlock(&head->coh_lock_guard);
- /*
- * Other threads can acquire references to the
- * top-lock through its sub-locks. Hence, it
- * cannot be cl_lock_free()-ed immediately.
- */
- cl_lock_finish(env, lock);
- lock = ghost;
- }
- }
- }
- return lock;
-}
-
-/**
- * Returns existing lock matching given description. This is similar to
- * cl_lock_find() except that no new lock is created, and returned lock is
- * guaranteed to be in enum cl_lock_state::CLS_HELD state.
- */
-struct cl_lock *cl_lock_peek(const struct lu_env *env, const struct cl_io *io,
- const struct cl_lock_descr *need,
- const char *scope, const void *source)
-{
- struct cl_object_header *head;
- struct cl_object *obj;
- struct cl_lock *lock;
-
- obj = need->cld_obj;
- head = cl_object_header(obj);
+ struct cl_object *obj = lock->cll_descr.cld_obj;
+ struct cl_object *scan;
+ int result = 0;
- do {
- spin_lock(&head->coh_lock_guard);
- lock = cl_lock_lookup(env, obj, io, need);
- spin_unlock(&head->coh_lock_guard);
- if (!lock)
- return NULL;
+ /* Make sure cl_lock::cll_descr is initialized. */
+ LASSERT(obj);
- cl_lock_mutex_get(env, lock);
- if (lock->cll_state == CLS_INTRANSIT)
- /* Don't care return value. */
- cl_lock_state_wait(env, lock);
- if (lock->cll_state == CLS_FREEING) {
- cl_lock_mutex_put(env, lock);
- cl_lock_put(env, lock);
- lock = NULL;
+ INIT_LIST_HEAD(&lock->cll_layers);
+ list_for_each_entry(scan, &obj->co_lu.lo_header->loh_layers,
+ co_lu.lo_linkage) {
+ result = scan->co_ops->coo_lock_init(env, scan, lock, io);
+ if (result != 0) {
+ cl_lock_fini(env, lock);
+ break;
}
- } while (!lock);
-
- cl_lock_hold_add(env, lock, scope, source);
- cl_lock_user_add(env, lock);
- if (lock->cll_state == CLS_CACHED)
- cl_use_try(env, lock, 1);
- if (lock->cll_state == CLS_HELD) {
- cl_lock_mutex_put(env, lock);
- cl_lock_lockdep_acquire(env, lock, 0);
- cl_lock_put(env, lock);
- } else {
- cl_unuse_try(env, lock);
- cl_lock_unhold(env, lock, scope, source);
- cl_lock_mutex_put(env, lock);
- cl_lock_put(env, lock);
- lock = NULL;
}
- return lock;
+ return result;
}
-EXPORT_SYMBOL(cl_lock_peek);
+EXPORT_SYMBOL(cl_lock_init);
/**
- * Returns a slice within a lock, corresponding to the given layer in the
+ * Returns a slice with a lock, corresponding to the given layer in the
* device stack.
*
* \see cl_page_at()
@@ -616,8 +127,6 @@ const struct cl_lock_slice *cl_lock_at(const struct cl_lock *lock,
{
const struct cl_lock_slice *slice;
- LINVRNT(cl_lock_invariant_trusted(NULL, lock));
-
list_for_each_entry(slice, &lock->cll_layers, cls_linkage) {
if (slice->cls_obj->co_lu.lo_dev->ld_type == dtype)
return slice;
@@ -626,1537 +135,96 @@ const struct cl_lock_slice *cl_lock_at(const struct cl_lock *lock,
}
EXPORT_SYMBOL(cl_lock_at);
-static void cl_lock_mutex_tail(const struct lu_env *env, struct cl_lock *lock)
-{
- struct cl_thread_counters *counters;
-
- counters = cl_lock_counters(env, lock);
- lock->cll_depth++;
- counters->ctc_nr_locks_locked++;
- lu_ref_add(&counters->ctc_locks_locked, "cll_guard", lock);
- cl_lock_trace(D_TRACE, env, "got mutex", lock);
-}
-
-/**
- * Locks cl_lock object.
- *
- * This is used to manipulate cl_lock fields, and to serialize state
- * transitions in the lock state machine.
- *
- * \post cl_lock_is_mutexed(lock)
- *
- * \see cl_lock_mutex_put()
- */
-void cl_lock_mutex_get(const struct lu_env *env, struct cl_lock *lock)
-{
- LINVRNT(cl_lock_invariant(env, lock));
-
- if (lock->cll_guarder == current) {
- LINVRNT(cl_lock_is_mutexed(lock));
- LINVRNT(lock->cll_depth > 0);
- } else {
- struct cl_object_header *hdr;
- struct cl_thread_info *info;
- int i;
-
- LINVRNT(lock->cll_guarder != current);
- hdr = cl_object_header(lock->cll_descr.cld_obj);
- /*
- * Check that mutices are taken in the bottom-to-top order.
- */
- info = cl_env_info(env);
- for (i = 0; i < hdr->coh_nesting; ++i)
- LASSERT(info->clt_counters[i].ctc_nr_locks_locked == 0);
- mutex_lock_nested(&lock->cll_guard, hdr->coh_nesting);
- lock->cll_guarder = current;
- LINVRNT(lock->cll_depth == 0);
- }
- cl_lock_mutex_tail(env, lock);
-}
-EXPORT_SYMBOL(cl_lock_mutex_get);
-
-/**
- * Try-locks cl_lock object.
- *
- * \retval 0 \a lock was successfully locked
- *
- * \retval -EBUSY \a lock cannot be locked right now
- *
- * \post ergo(result == 0, cl_lock_is_mutexed(lock))
- *
- * \see cl_lock_mutex_get()
- */
-static int cl_lock_mutex_try(const struct lu_env *env, struct cl_lock *lock)
-{
- int result;
-
- LINVRNT(cl_lock_invariant_trusted(env, lock));
-
- result = 0;
- if (lock->cll_guarder == current) {
- LINVRNT(lock->cll_depth > 0);
- cl_lock_mutex_tail(env, lock);
- } else if (mutex_trylock(&lock->cll_guard)) {
- LINVRNT(lock->cll_depth == 0);
- lock->cll_guarder = current;
- cl_lock_mutex_tail(env, lock);
- } else
- result = -EBUSY;
- return result;
-}
-
-/**
- {* Unlocks cl_lock object.
- *
- * \pre cl_lock_is_mutexed(lock)
- *
- * \see cl_lock_mutex_get()
- */
-void cl_lock_mutex_put(const struct lu_env *env, struct cl_lock *lock)
-{
- struct cl_thread_counters *counters;
-
- LINVRNT(cl_lock_invariant(env, lock));
- LINVRNT(cl_lock_is_mutexed(lock));
- LINVRNT(lock->cll_guarder == current);
- LINVRNT(lock->cll_depth > 0);
-
- counters = cl_lock_counters(env, lock);
- LINVRNT(counters->ctc_nr_locks_locked > 0);
-
- cl_lock_trace(D_TRACE, env, "put mutex", lock);
- lu_ref_del(&counters->ctc_locks_locked, "cll_guard", lock);
- counters->ctc_nr_locks_locked--;
- if (--lock->cll_depth == 0) {
- lock->cll_guarder = NULL;
- mutex_unlock(&lock->cll_guard);
- }
-}
-EXPORT_SYMBOL(cl_lock_mutex_put);
-
-/**
- * Returns true iff lock's mutex is owned by the current thread.
- */
-int cl_lock_is_mutexed(struct cl_lock *lock)
-{
- return lock->cll_guarder == current;
-}
-EXPORT_SYMBOL(cl_lock_is_mutexed);
-
-/**
- * Returns number of cl_lock mutices held by the current thread (environment).
- */
-int cl_lock_nr_mutexed(const struct lu_env *env)
-{
- struct cl_thread_info *info;
- int i;
- int locked;
-
- /*
- * NOTE: if summation across all nesting levels (currently 2) proves
- * too expensive, a summary counter can be added to
- * struct cl_thread_info.
- */
- info = cl_env_info(env);
- for (i = 0, locked = 0; i < ARRAY_SIZE(info->clt_counters); ++i)
- locked += info->clt_counters[i].ctc_nr_locks_locked;
- return locked;
-}
-EXPORT_SYMBOL(cl_lock_nr_mutexed);
-
-static void cl_lock_cancel0(const struct lu_env *env, struct cl_lock *lock)
-{
- LINVRNT(cl_lock_is_mutexed(lock));
- LINVRNT(cl_lock_invariant(env, lock));
- if (!(lock->cll_flags & CLF_CANCELLED)) {
- const struct cl_lock_slice *slice;
-
- lock->cll_flags |= CLF_CANCELLED;
- list_for_each_entry_reverse(slice, &lock->cll_layers,
- cls_linkage) {
- if (slice->cls_ops->clo_cancel)
- slice->cls_ops->clo_cancel(env, slice);
- }
- }
-}
-
-static void cl_lock_delete0(const struct lu_env *env, struct cl_lock *lock)
-{
- struct cl_object_header *head;
- const struct cl_lock_slice *slice;
-
- LINVRNT(cl_lock_is_mutexed(lock));
- LINVRNT(cl_lock_invariant(env, lock));
-
- if (lock->cll_state < CLS_FREEING) {
- bool in_cache;
-
- LASSERT(lock->cll_state != CLS_INTRANSIT);
- cl_lock_state_set(env, lock, CLS_FREEING);
-
- head = cl_object_header(lock->cll_descr.cld_obj);
-
- spin_lock(&head->coh_lock_guard);
- in_cache = !list_empty(&lock->cll_linkage);
- if (in_cache)
- list_del_init(&lock->cll_linkage);
- spin_unlock(&head->coh_lock_guard);
-
- if (in_cache) /* coh_locks cache holds a refcount. */
- cl_lock_put(env, lock);
-
- /*
- * From now on, no new references to this lock can be acquired
- * by cl_lock_lookup().
- */
- list_for_each_entry_reverse(slice, &lock->cll_layers,
- cls_linkage) {
- if (slice->cls_ops->clo_delete)
- slice->cls_ops->clo_delete(env, slice);
- }
- /*
- * From now on, no new references to this lock can be acquired
- * by layer-specific means (like a pointer from struct
- * ldlm_lock in osc, or a pointer from top-lock to sub-lock in
- * lov).
- *
- * Lock will be finally freed in cl_lock_put() when last of
- * existing references goes away.
- */
- }
-}
-
-/**
- * Mod(ifie)s cl_lock::cll_holds counter for a given lock. Also, for a
- * top-lock (nesting == 0) accounts for this modification in the per-thread
- * debugging counters. Sub-lock holds can be released by a thread different
- * from one that acquired it.
- */
-static void cl_lock_hold_mod(const struct lu_env *env, struct cl_lock *lock,
- int delta)
-{
- struct cl_thread_counters *counters;
- enum clt_nesting_level nesting;
-
- lock->cll_holds += delta;
- nesting = cl_lock_nesting(lock);
- if (nesting == CNL_TOP) {
- counters = &cl_env_info(env)->clt_counters[CNL_TOP];
- counters->ctc_nr_held += delta;
- LASSERT(counters->ctc_nr_held >= 0);
- }
-}
-
-/**
- * Mod(ifie)s cl_lock::cll_users counter for a given lock. See
- * cl_lock_hold_mod() for the explanation of the debugging code.
- */
-static void cl_lock_used_mod(const struct lu_env *env, struct cl_lock *lock,
- int delta)
-{
- struct cl_thread_counters *counters;
- enum clt_nesting_level nesting;
-
- lock->cll_users += delta;
- nesting = cl_lock_nesting(lock);
- if (nesting == CNL_TOP) {
- counters = &cl_env_info(env)->clt_counters[CNL_TOP];
- counters->ctc_nr_used += delta;
- LASSERT(counters->ctc_nr_used >= 0);
- }
-}
-
-void cl_lock_hold_release(const struct lu_env *env, struct cl_lock *lock,
- const char *scope, const void *source)
-{
- LINVRNT(cl_lock_is_mutexed(lock));
- LINVRNT(cl_lock_invariant(env, lock));
- LASSERT(lock->cll_holds > 0);
-
- cl_lock_trace(D_DLMTRACE, env, "hold release lock", lock);
- lu_ref_del(&lock->cll_holders, scope, source);
- cl_lock_hold_mod(env, lock, -1);
- if (lock->cll_holds == 0) {
- CL_LOCK_ASSERT(lock->cll_state != CLS_HELD, env, lock);
- if (lock->cll_descr.cld_mode == CLM_PHANTOM ||
- lock->cll_descr.cld_mode == CLM_GROUP ||
- lock->cll_state != CLS_CACHED)
- /*
- * If lock is still phantom or grouplock when user is
- * done with it---destroy the lock.
- */
- lock->cll_flags |= CLF_CANCELPEND|CLF_DOOMED;
- if (lock->cll_flags & CLF_CANCELPEND) {
- lock->cll_flags &= ~CLF_CANCELPEND;
- cl_lock_cancel0(env, lock);
- }
- if (lock->cll_flags & CLF_DOOMED) {
- /* no longer doomed: it's dead... Jim. */
- lock->cll_flags &= ~CLF_DOOMED;
- cl_lock_delete0(env, lock);
- }
- }
-}
-EXPORT_SYMBOL(cl_lock_hold_release);
-
-/**
- * Waits until lock state is changed.
- *
- * This function is called with cl_lock mutex locked, atomically releases
- * mutex and goes to sleep, waiting for a lock state change (signaled by
- * cl_lock_signal()), and re-acquires the mutex before return.
- *
- * This function is used to wait until lock state machine makes some progress
- * and to emulate synchronous operations on top of asynchronous lock
- * interface.
- *
- * \retval -EINTR wait was interrupted
- *
- * \retval 0 wait wasn't interrupted
- *
- * \pre cl_lock_is_mutexed(lock)
- *
- * \see cl_lock_signal()
- */
-int cl_lock_state_wait(const struct lu_env *env, struct cl_lock *lock)
-{
- wait_queue_t waiter;
- sigset_t blocked;
- int result;
-
- LINVRNT(cl_lock_is_mutexed(lock));
- LINVRNT(cl_lock_invariant(env, lock));
- LASSERT(lock->cll_depth == 1);
- LASSERT(lock->cll_state != CLS_FREEING); /* too late to wait */
-
- cl_lock_trace(D_DLMTRACE, env, "state wait lock", lock);
- result = lock->cll_error;
- if (result == 0) {
- /* To avoid being interrupted by the 'non-fatal' signals
- * (SIGCHLD, for instance), we'd block them temporarily.
- * LU-305
- */
- blocked = cfs_block_sigsinv(LUSTRE_FATAL_SIGS);
-
- init_waitqueue_entry(&waiter, current);
- add_wait_queue(&lock->cll_wq, &waiter);
- set_current_state(TASK_INTERRUPTIBLE);
- cl_lock_mutex_put(env, lock);
-
- LASSERT(cl_lock_nr_mutexed(env) == 0);
-
- /* Returning ERESTARTSYS instead of EINTR so syscalls
- * can be restarted if signals are pending here
- */
- result = -ERESTARTSYS;
- if (likely(!OBD_FAIL_CHECK(OBD_FAIL_LOCK_STATE_WAIT_INTR))) {
- schedule();
- if (!cfs_signal_pending())
- result = 0;
- }
-
- cl_lock_mutex_get(env, lock);
- set_current_state(TASK_RUNNING);
- remove_wait_queue(&lock->cll_wq, &waiter);
-
- /* Restore old blocked signals */
- cfs_restore_sigs(blocked);
- }
- return result;
-}
-EXPORT_SYMBOL(cl_lock_state_wait);
-
-static void cl_lock_state_signal(const struct lu_env *env, struct cl_lock *lock,
- enum cl_lock_state state)
-{
- const struct cl_lock_slice *slice;
-
- LINVRNT(cl_lock_is_mutexed(lock));
- LINVRNT(cl_lock_invariant(env, lock));
-
- list_for_each_entry(slice, &lock->cll_layers, cls_linkage)
- if (slice->cls_ops->clo_state)
- slice->cls_ops->clo_state(env, slice, state);
- wake_up_all(&lock->cll_wq);
-}
-
-/**
- * Notifies waiters that lock state changed.
- *
- * Wakes up all waiters sleeping in cl_lock_state_wait(), also notifies all
- * layers about state change by calling cl_lock_operations::clo_state()
- * top-to-bottom.
- */
-void cl_lock_signal(const struct lu_env *env, struct cl_lock *lock)
-{
- cl_lock_trace(D_DLMTRACE, env, "state signal lock", lock);
- cl_lock_state_signal(env, lock, lock->cll_state);
-}
-EXPORT_SYMBOL(cl_lock_signal);
-
-/**
- * Changes lock state.
- *
- * This function is invoked to notify layers that lock state changed, possible
- * as a result of an asynchronous event such as call-back reception.
- *
- * \post lock->cll_state == state
- *
- * \see cl_lock_operations::clo_state()
- */
-void cl_lock_state_set(const struct lu_env *env, struct cl_lock *lock,
- enum cl_lock_state state)
-{
- LASSERT(lock->cll_state <= state ||
- (lock->cll_state == CLS_CACHED &&
- (state == CLS_HELD || /* lock found in cache */
- state == CLS_NEW || /* sub-lock canceled */
- state == CLS_INTRANSIT)) ||
- /* lock is in transit state */
- lock->cll_state == CLS_INTRANSIT);
-
- if (lock->cll_state != state) {
- CS_LOCKSTATE_DEC(lock->cll_descr.cld_obj, lock->cll_state);
- CS_LOCKSTATE_INC(lock->cll_descr.cld_obj, state);
-
- cl_lock_state_signal(env, lock, state);
- lock->cll_state = state;
- }
-}
-EXPORT_SYMBOL(cl_lock_state_set);
-
-static int cl_unuse_try_internal(const struct lu_env *env, struct cl_lock *lock)
-{
- const struct cl_lock_slice *slice;
- int result;
-
- do {
- result = 0;
-
- LINVRNT(cl_lock_is_mutexed(lock));
- LINVRNT(cl_lock_invariant(env, lock));
- LASSERT(lock->cll_state == CLS_INTRANSIT);
-
- result = -ENOSYS;
- list_for_each_entry_reverse(slice, &lock->cll_layers,
- cls_linkage) {
- if (slice->cls_ops->clo_unuse) {
- result = slice->cls_ops->clo_unuse(env, slice);
- if (result != 0)
- break;
- }
- }
- LASSERT(result != -ENOSYS);
- } while (result == CLO_REPEAT);
-
- return result;
-}
-
-/**
- * Yanks lock from the cache (cl_lock_state::CLS_CACHED state) by calling
- * cl_lock_operations::clo_use() top-to-bottom to notify layers.
- * @atomic = 1, it must unuse the lock to recovery the lock to keep the
- * use process atomic
- */
-int cl_use_try(const struct lu_env *env, struct cl_lock *lock, int atomic)
+void cl_lock_cancel(const struct lu_env *env, struct cl_lock *lock)
{
const struct cl_lock_slice *slice;
- int result;
- enum cl_lock_state state;
-
- cl_lock_trace(D_DLMTRACE, env, "use lock", lock);
-
- LASSERT(lock->cll_state == CLS_CACHED);
- if (lock->cll_error)
- return lock->cll_error;
-
- result = -ENOSYS;
- state = cl_lock_intransit(env, lock);
- list_for_each_entry(slice, &lock->cll_layers, cls_linkage) {
- if (slice->cls_ops->clo_use) {
- result = slice->cls_ops->clo_use(env, slice);
- if (result != 0)
- break;
- }
- }
- LASSERT(result != -ENOSYS);
-
- LASSERTF(lock->cll_state == CLS_INTRANSIT, "Wrong state %d.\n",
- lock->cll_state);
-
- if (result == 0) {
- state = CLS_HELD;
- } else {
- if (result == -ESTALE) {
- /*
- * ESTALE means sublock being cancelled
- * at this time, and set lock state to
- * be NEW here and ask the caller to repeat.
- */
- state = CLS_NEW;
- result = CLO_REPEAT;
- }
-
- /* @atomic means back-off-on-failure. */
- if (atomic) {
- int rc;
-
- rc = cl_unuse_try_internal(env, lock);
- /* Vet the results. */
- if (rc < 0 && result > 0)
- result = rc;
- }
+ cl_lock_trace(D_DLMTRACE, env, "cancel lock", lock);
+ list_for_each_entry_reverse(slice, &lock->cll_layers, cls_linkage) {
+ if (slice->cls_ops->clo_cancel)
+ slice->cls_ops->clo_cancel(env, slice);
}
- cl_lock_extransit(env, lock, state);
- return result;
}
-EXPORT_SYMBOL(cl_use_try);
+EXPORT_SYMBOL(cl_lock_cancel);
/**
- * Helper for cl_enqueue_try() that calls ->clo_enqueue() across all layers
- * top-to-bottom.
+ * Enqueue a lock.
+ * \param anchor: if we need to wait for resources before getting the lock,
+ * use @anchor for the purpose.
+ * \retval 0 enqueue successfully
+ * \retval <0 error code
*/
-static int cl_enqueue_kick(const struct lu_env *env,
- struct cl_lock *lock,
- struct cl_io *io, __u32 flags)
+int cl_lock_enqueue(const struct lu_env *env, struct cl_io *io,
+ struct cl_lock *lock, struct cl_sync_io *anchor)
{
- int result;
const struct cl_lock_slice *slice;
+ int rc = -ENOSYS;
- result = -ENOSYS;
list_for_each_entry(slice, &lock->cll_layers, cls_linkage) {
- if (slice->cls_ops->clo_enqueue) {
- result = slice->cls_ops->clo_enqueue(env,
- slice, io, flags);
- if (result != 0)
- break;
- }
- }
- LASSERT(result != -ENOSYS);
- return result;
-}
-
-/**
- * Tries to enqueue a lock.
- *
- * This function is called repeatedly by cl_enqueue() until either lock is
- * enqueued, or error occurs. This function does not block waiting for
- * networking communication to complete.
- *
- * \post ergo(result == 0, lock->cll_state == CLS_ENQUEUED ||
- * lock->cll_state == CLS_HELD)
- *
- * \see cl_enqueue() cl_lock_operations::clo_enqueue()
- * \see cl_lock_state::CLS_ENQUEUED
- */
-int cl_enqueue_try(const struct lu_env *env, struct cl_lock *lock,
- struct cl_io *io, __u32 flags)
-{
- int result;
-
- cl_lock_trace(D_DLMTRACE, env, "enqueue lock", lock);
- do {
- LINVRNT(cl_lock_is_mutexed(lock));
-
- result = lock->cll_error;
- if (result != 0)
- break;
-
- switch (lock->cll_state) {
- case CLS_NEW:
- cl_lock_state_set(env, lock, CLS_QUEUING);
- /* fall-through */
- case CLS_QUEUING:
- /* kick layers. */
- result = cl_enqueue_kick(env, lock, io, flags);
- /* For AGL case, the cl_lock::cll_state may
- * become CLS_HELD already.
- */
- if (result == 0 && lock->cll_state == CLS_QUEUING)
- cl_lock_state_set(env, lock, CLS_ENQUEUED);
- break;
- case CLS_INTRANSIT:
- LASSERT(cl_lock_is_intransit(lock));
- result = CLO_WAIT;
- break;
- case CLS_CACHED:
- /* yank lock from the cache. */
- result = cl_use_try(env, lock, 0);
- break;
- case CLS_ENQUEUED:
- case CLS_HELD:
- result = 0;
- break;
- default:
- case CLS_FREEING:
- /*
- * impossible, only held locks with increased
- * ->cll_holds can be enqueued, and they cannot be
- * freed.
- */
- LBUG();
- }
- } while (result == CLO_REPEAT);
- return result;
-}
-EXPORT_SYMBOL(cl_enqueue_try);
-
-/**
- * Cancel the conflicting lock found during previous enqueue.
- *
- * \retval 0 conflicting lock has been canceled.
- * \retval -ve error code.
- */
-int cl_lock_enqueue_wait(const struct lu_env *env,
- struct cl_lock *lock,
- int keep_mutex)
-{
- struct cl_lock *conflict;
- int rc = 0;
-
- LASSERT(cl_lock_is_mutexed(lock));
- LASSERT(lock->cll_state == CLS_QUEUING);
- LASSERT(lock->cll_conflict);
+ if (!slice->cls_ops->clo_enqueue)
+ continue;
- conflict = lock->cll_conflict;
- lock->cll_conflict = NULL;
-
- cl_lock_mutex_put(env, lock);
- LASSERT(cl_lock_nr_mutexed(env) == 0);
-
- cl_lock_mutex_get(env, conflict);
- cl_lock_trace(D_DLMTRACE, env, "enqueue wait", conflict);
- cl_lock_cancel(env, conflict);
- cl_lock_delete(env, conflict);
-
- while (conflict->cll_state != CLS_FREEING) {
- rc = cl_lock_state_wait(env, conflict);
+ rc = slice->cls_ops->clo_enqueue(env, slice, io, anchor);
if (rc != 0)
break;
- }
- cl_lock_mutex_put(env, conflict);
- lu_ref_del(&conflict->cll_reference, "cancel-wait", lock);
- cl_lock_put(env, conflict);
-
- if (keep_mutex)
- cl_lock_mutex_get(env, lock);
-
- LASSERT(rc <= 0);
- return rc;
-}
-EXPORT_SYMBOL(cl_lock_enqueue_wait);
-
-static int cl_enqueue_locked(const struct lu_env *env, struct cl_lock *lock,
- struct cl_io *io, __u32 enqflags)
-{
- int result;
-
- LINVRNT(cl_lock_is_mutexed(lock));
- LINVRNT(cl_lock_invariant(env, lock));
- LASSERT(lock->cll_holds > 0);
-
- cl_lock_user_add(env, lock);
- do {
- result = cl_enqueue_try(env, lock, io, enqflags);
- if (result == CLO_WAIT) {
- if (lock->cll_conflict)
- result = cl_lock_enqueue_wait(env, lock, 1);
- else
- result = cl_lock_state_wait(env, lock);
- if (result == 0)
- continue;
- }
- break;
- } while (1);
- if (result != 0)
- cl_unuse_try(env, lock);
- LASSERT(ergo(result == 0 && !(enqflags & CEF_AGL),
- lock->cll_state == CLS_ENQUEUED ||
- lock->cll_state == CLS_HELD));
- return result;
-}
-
-/**
- * Tries to unlock a lock.
- *
- * This function is called to release underlying resource:
- * 1. for top lock, the resource is sublocks it held;
- * 2. for sublock, the resource is the reference to dlmlock.
- *
- * cl_unuse_try is a one-shot operation, so it must NOT return CLO_WAIT.
- *
- * \see cl_unuse() cl_lock_operations::clo_unuse()
- * \see cl_lock_state::CLS_CACHED
- */
-int cl_unuse_try(const struct lu_env *env, struct cl_lock *lock)
-{
- int result;
- enum cl_lock_state state = CLS_NEW;
-
- cl_lock_trace(D_DLMTRACE, env, "unuse lock", lock);
-
- if (lock->cll_users > 1) {
- cl_lock_user_del(env, lock);
- return 0;
- }
-
- /* Only if the lock is in CLS_HELD or CLS_ENQUEUED state, it can hold
- * underlying resources.
- */
- if (!(lock->cll_state == CLS_HELD || lock->cll_state == CLS_ENQUEUED)) {
- cl_lock_user_del(env, lock);
- return 0;
- }
-
- /*
- * New lock users (->cll_users) are not protecting unlocking
- * from proceeding. From this point, lock eventually reaches
- * CLS_CACHED, is reinitialized to CLS_NEW or fails into
- * CLS_FREEING.
- */
- state = cl_lock_intransit(env, lock);
-
- result = cl_unuse_try_internal(env, lock);
- LASSERT(lock->cll_state == CLS_INTRANSIT);
- LASSERT(result != CLO_WAIT);
- cl_lock_user_del(env, lock);
- if (result == 0 || result == -ESTALE) {
- /*
- * Return lock back to the cache. This is the only
- * place where lock is moved into CLS_CACHED state.
- *
- * If one of ->clo_unuse() methods returned -ESTALE, lock
- * cannot be placed into cache and has to be
- * re-initialized. This happens e.g., when a sub-lock was
- * canceled while unlocking was in progress.
- */
- if (state == CLS_HELD && result == 0)
- state = CLS_CACHED;
- else
- state = CLS_NEW;
- cl_lock_extransit(env, lock, state);
-
- /*
- * Hide -ESTALE error.
- * If the lock is a glimpse lock, and it has multiple
- * stripes. Assuming that one of its sublock returned -ENAVAIL,
- * and other sublocks are matched write locks. In this case,
- * we can't set this lock to error because otherwise some of
- * its sublocks may not be canceled. This causes some dirty
- * pages won't be written to OSTs. -jay
- */
- result = 0;
- } else {
- CERROR("result = %d, this is unlikely!\n", result);
- state = CLS_NEW;
- cl_lock_extransit(env, lock, state);
- }
- return result ?: lock->cll_error;
-}
-EXPORT_SYMBOL(cl_unuse_try);
-
-static void cl_unuse_locked(const struct lu_env *env, struct cl_lock *lock)
-{
- int result;
-
- result = cl_unuse_try(env, lock);
- if (result)
- CL_LOCK_DEBUG(D_ERROR, env, lock, "unuse return %d\n", result);
-}
-
-/**
- * Unlocks a lock.
- */
-void cl_unuse(const struct lu_env *env, struct cl_lock *lock)
-{
- cl_lock_mutex_get(env, lock);
- cl_unuse_locked(env, lock);
- cl_lock_mutex_put(env, lock);
- cl_lock_lockdep_release(env, lock);
-}
-EXPORT_SYMBOL(cl_unuse);
-
-/**
- * Tries to wait for a lock.
- *
- * This function is called repeatedly by cl_wait() until either lock is
- * granted, or error occurs. This function does not block waiting for network
- * communication to complete.
- *
- * \see cl_wait() cl_lock_operations::clo_wait()
- * \see cl_lock_state::CLS_HELD
- */
-int cl_wait_try(const struct lu_env *env, struct cl_lock *lock)
-{
- const struct cl_lock_slice *slice;
- int result;
-
- cl_lock_trace(D_DLMTRACE, env, "wait lock try", lock);
- do {
- LINVRNT(cl_lock_is_mutexed(lock));
- LINVRNT(cl_lock_invariant(env, lock));
- LASSERTF(lock->cll_state == CLS_QUEUING ||
- lock->cll_state == CLS_ENQUEUED ||
- lock->cll_state == CLS_HELD ||
- lock->cll_state == CLS_INTRANSIT,
- "lock state: %d\n", lock->cll_state);
- LASSERT(lock->cll_users > 0);
- LASSERT(lock->cll_holds > 0);
-
- result = lock->cll_error;
- if (result != 0)
- break;
-
- if (cl_lock_is_intransit(lock)) {
- result = CLO_WAIT;
- break;
- }
-
- if (lock->cll_state == CLS_HELD)
- /* nothing to do */
- break;
-
- result = -ENOSYS;
- list_for_each_entry(slice, &lock->cll_layers, cls_linkage) {
- if (slice->cls_ops->clo_wait) {
- result = slice->cls_ops->clo_wait(env, slice);
- if (result != 0)
- break;
- }
- }
- LASSERT(result != -ENOSYS);
- if (result == 0) {
- LASSERT(lock->cll_state != CLS_INTRANSIT);
- cl_lock_state_set(env, lock, CLS_HELD);
- }
- } while (result == CLO_REPEAT);
- return result;
-}
-EXPORT_SYMBOL(cl_wait_try);
-
-/**
- * Waits until enqueued lock is granted.
- *
- * \pre current thread or io owns a hold on the lock
- * \pre ergo(result == 0, lock->cll_state == CLS_ENQUEUED ||
- * lock->cll_state == CLS_HELD)
- *
- * \post ergo(result == 0, lock->cll_state == CLS_HELD)
- */
-int cl_wait(const struct lu_env *env, struct cl_lock *lock)
-{
- int result;
-
- cl_lock_mutex_get(env, lock);
-
- LINVRNT(cl_lock_invariant(env, lock));
- LASSERTF(lock->cll_state == CLS_ENQUEUED || lock->cll_state == CLS_HELD,
- "Wrong state %d\n", lock->cll_state);
- LASSERT(lock->cll_holds > 0);
-
- do {
- result = cl_wait_try(env, lock);
- if (result == CLO_WAIT) {
- result = cl_lock_state_wait(env, lock);
- if (result == 0)
- continue;
- }
- break;
- } while (1);
- if (result < 0) {
- cl_unuse_try(env, lock);
- cl_lock_lockdep_release(env, lock);
- }
- cl_lock_trace(D_DLMTRACE, env, "wait lock", lock);
- cl_lock_mutex_put(env, lock);
- LASSERT(ergo(result == 0, lock->cll_state == CLS_HELD));
- return result;
-}
-EXPORT_SYMBOL(cl_wait);
-
-/**
- * Executes cl_lock_operations::clo_weigh(), and sums results to estimate lock
- * value.
- */
-unsigned long cl_lock_weigh(const struct lu_env *env, struct cl_lock *lock)
-{
- const struct cl_lock_slice *slice;
- unsigned long pound;
- unsigned long ounce;
-
- LINVRNT(cl_lock_is_mutexed(lock));
- LINVRNT(cl_lock_invariant(env, lock));
-
- pound = 0;
- list_for_each_entry_reverse(slice, &lock->cll_layers, cls_linkage) {
- if (slice->cls_ops->clo_weigh) {
- ounce = slice->cls_ops->clo_weigh(env, slice);
- pound += ounce;
- if (pound < ounce) /* over-weight^Wflow */
- pound = ~0UL;
- }
- }
- return pound;
-}
-EXPORT_SYMBOL(cl_lock_weigh);
-
-/**
- * Notifies layers that lock description changed.
- *
- * The server can grant client a lock different from one that was requested
- * (e.g., larger in extent). This method is called when actually granted lock
- * description becomes known to let layers to accommodate for changed lock
- * description.
- *
- * \see cl_lock_operations::clo_modify()
- */
-int cl_lock_modify(const struct lu_env *env, struct cl_lock *lock,
- const struct cl_lock_descr *desc)
-{
- const struct cl_lock_slice *slice;
- struct cl_object *obj = lock->cll_descr.cld_obj;
- struct cl_object_header *hdr = cl_object_header(obj);
- int result;
-
- cl_lock_trace(D_DLMTRACE, env, "modify lock", lock);
- /* don't allow object to change */
- LASSERT(obj == desc->cld_obj);
- LINVRNT(cl_lock_is_mutexed(lock));
- LINVRNT(cl_lock_invariant(env, lock));
-
- list_for_each_entry_reverse(slice, &lock->cll_layers, cls_linkage) {
- if (slice->cls_ops->clo_modify) {
- result = slice->cls_ops->clo_modify(env, slice, desc);
- if (result != 0)
- return result;
}
- }
- CL_LOCK_DEBUG(D_DLMTRACE, env, lock, " -> "DDESCR"@"DFID"\n",
- PDESCR(desc), PFID(lu_object_fid(&desc->cld_obj->co_lu)));
- /*
- * Just replace description in place. Nothing more is needed for
- * now. If locks were indexed according to their extent and/or mode,
- * that index would have to be updated here.
- */
- spin_lock(&hdr->coh_lock_guard);
- lock->cll_descr = *desc;
- spin_unlock(&hdr->coh_lock_guard);
- return 0;
-}
-EXPORT_SYMBOL(cl_lock_modify);
-
-/**
- * Initializes lock closure with a given origin.
- *
- * \see cl_lock_closure
- */
-void cl_lock_closure_init(const struct lu_env *env,
- struct cl_lock_closure *closure,
- struct cl_lock *origin, int wait)
-{
- LINVRNT(cl_lock_is_mutexed(origin));
- LINVRNT(cl_lock_invariant(env, origin));
-
- INIT_LIST_HEAD(&closure->clc_list);
- closure->clc_origin = origin;
- closure->clc_wait = wait;
- closure->clc_nr = 0;
-}
-EXPORT_SYMBOL(cl_lock_closure_init);
-
-/**
- * Builds a closure of \a lock.
- *
- * Building of a closure consists of adding initial lock (\a lock) into it,
- * and calling cl_lock_operations::clo_closure() methods of \a lock. These
- * methods might call cl_lock_closure_build() recursively again, adding more
- * locks to the closure, etc.
- *
- * \see cl_lock_closure
- */
-int cl_lock_closure_build(const struct lu_env *env, struct cl_lock *lock,
- struct cl_lock_closure *closure)
-{
- const struct cl_lock_slice *slice;
- int result;
-
- LINVRNT(cl_lock_is_mutexed(closure->clc_origin));
- LINVRNT(cl_lock_invariant(env, closure->clc_origin));
-
- result = cl_lock_enclosure(env, lock, closure);
- if (result == 0) {
- list_for_each_entry(slice, &lock->cll_layers, cls_linkage) {
- if (slice->cls_ops->clo_closure) {
- result = slice->cls_ops->clo_closure(env, slice,
- closure);
- if (result != 0)
- break;
- }
- }
- }
- if (result != 0)
- cl_lock_disclosure(env, closure);
- return result;
-}
-EXPORT_SYMBOL(cl_lock_closure_build);
-
-/**
- * Adds new lock to a closure.
- *
- * Try-locks \a lock and if succeeded, adds it to the closure (never more than
- * once). If try-lock failed, returns CLO_REPEAT, after optionally waiting
- * until next try-lock is likely to succeed.
- */
-int cl_lock_enclosure(const struct lu_env *env, struct cl_lock *lock,
- struct cl_lock_closure *closure)
-{
- int result = 0;
-
- cl_lock_trace(D_DLMTRACE, env, "enclosure lock", lock);
- if (!cl_lock_mutex_try(env, lock)) {
- /*
- * If lock->cll_inclosure is not empty, lock is already in
- * this closure.
- */
- if (list_empty(&lock->cll_inclosure)) {
- cl_lock_get_trust(lock);
- lu_ref_add(&lock->cll_reference, "closure", closure);
- list_add(&lock->cll_inclosure, &closure->clc_list);
- closure->clc_nr++;
- } else
- cl_lock_mutex_put(env, lock);
- result = 0;
- } else {
- cl_lock_disclosure(env, closure);
- if (closure->clc_wait) {
- cl_lock_get_trust(lock);
- lu_ref_add(&lock->cll_reference, "closure-w", closure);
- cl_lock_mutex_put(env, closure->clc_origin);
-
- LASSERT(cl_lock_nr_mutexed(env) == 0);
- cl_lock_mutex_get(env, lock);
- cl_lock_mutex_put(env, lock);
-
- cl_lock_mutex_get(env, closure->clc_origin);
- lu_ref_del(&lock->cll_reference, "closure-w", closure);
- cl_lock_put(env, lock);
- }
- result = CLO_REPEAT;
- }
- return result;
-}
-EXPORT_SYMBOL(cl_lock_enclosure);
-
-/** Releases mutices of enclosed locks. */
-void cl_lock_disclosure(const struct lu_env *env,
- struct cl_lock_closure *closure)
-{
- struct cl_lock *scan;
- struct cl_lock *temp;
-
- cl_lock_trace(D_DLMTRACE, env, "disclosure lock", closure->clc_origin);
- list_for_each_entry_safe(scan, temp, &closure->clc_list,
- cll_inclosure) {
- list_del_init(&scan->cll_inclosure);
- cl_lock_mutex_put(env, scan);
- lu_ref_del(&scan->cll_reference, "closure", closure);
- cl_lock_put(env, scan);
- closure->clc_nr--;
- }
- LASSERT(closure->clc_nr == 0);
-}
-EXPORT_SYMBOL(cl_lock_disclosure);
-
-/** Finalizes a closure. */
-void cl_lock_closure_fini(struct cl_lock_closure *closure)
-{
- LASSERT(closure->clc_nr == 0);
- LASSERT(list_empty(&closure->clc_list));
-}
-EXPORT_SYMBOL(cl_lock_closure_fini);
-
-/**
- * Destroys this lock. Notifies layers (bottom-to-top) that lock is being
- * destroyed, then destroy the lock. If there are holds on the lock, postpone
- * destruction until all holds are released. This is called when a decision is
- * made to destroy the lock in the future. E.g., when a blocking AST is
- * received on it, or fatal communication error happens.
- *
- * Caller must have a reference on this lock to prevent a situation, when
- * deleted lock lingers in memory for indefinite time, because nobody calls
- * cl_lock_put() to finish it.
- *
- * \pre atomic_read(&lock->cll_ref) > 0
- * \pre ergo(cl_lock_nesting(lock) == CNL_TOP,
- * cl_lock_nr_mutexed(env) == 1)
- * [i.e., if a top-lock is deleted, mutices of no other locks can be
- * held, as deletion of sub-locks might require releasing a top-lock
- * mutex]
- *
- * \see cl_lock_operations::clo_delete()
- * \see cl_lock::cll_holds
- */
-void cl_lock_delete(const struct lu_env *env, struct cl_lock *lock)
-{
- LINVRNT(cl_lock_is_mutexed(lock));
- LINVRNT(cl_lock_invariant(env, lock));
- LASSERT(ergo(cl_lock_nesting(lock) == CNL_TOP,
- cl_lock_nr_mutexed(env) == 1));
-
- cl_lock_trace(D_DLMTRACE, env, "delete lock", lock);
- if (lock->cll_holds == 0)
- cl_lock_delete0(env, lock);
- else
- lock->cll_flags |= CLF_DOOMED;
-}
-EXPORT_SYMBOL(cl_lock_delete);
-
-/**
- * Mark lock as irrecoverably failed, and mark it for destruction. This
- * happens when, e.g., server fails to grant a lock to us, or networking
- * time-out happens.
- *
- * \pre atomic_read(&lock->cll_ref) > 0
- *
- * \see clo_lock_delete()
- * \see cl_lock::cll_holds
- */
-void cl_lock_error(const struct lu_env *env, struct cl_lock *lock, int error)
-{
- LINVRNT(cl_lock_is_mutexed(lock));
- LINVRNT(cl_lock_invariant(env, lock));
-
- if (lock->cll_error == 0 && error != 0) {
- cl_lock_trace(D_DLMTRACE, env, "set lock error", lock);
- lock->cll_error = error;
- cl_lock_signal(env, lock);
- cl_lock_cancel(env, lock);
- cl_lock_delete(env, lock);
- }
-}
-EXPORT_SYMBOL(cl_lock_error);
-
-/**
- * Cancels this lock. Notifies layers
- * (bottom-to-top) that lock is being cancelled, then destroy the lock. If
- * there are holds on the lock, postpone cancellation until
- * all holds are released.
- *
- * Cancellation notification is delivered to layers at most once.
- *
- * \see cl_lock_operations::clo_cancel()
- * \see cl_lock::cll_holds
- */
-void cl_lock_cancel(const struct lu_env *env, struct cl_lock *lock)
-{
- LINVRNT(cl_lock_is_mutexed(lock));
- LINVRNT(cl_lock_invariant(env, lock));
-
- cl_lock_trace(D_DLMTRACE, env, "cancel lock", lock);
- if (lock->cll_holds == 0)
- cl_lock_cancel0(env, lock);
- else
- lock->cll_flags |= CLF_CANCELPEND;
-}
-EXPORT_SYMBOL(cl_lock_cancel);
-
-/**
- * Finds an existing lock covering given index and optionally different from a
- * given \a except lock.
- */
-struct cl_lock *cl_lock_at_pgoff(const struct lu_env *env,
- struct cl_object *obj, pgoff_t index,
- struct cl_lock *except,
- int pending, int canceld)
-{
- struct cl_object_header *head;
- struct cl_lock *scan;
- struct cl_lock *lock;
- struct cl_lock_descr *need;
-
- head = cl_object_header(obj);
- need = &cl_env_info(env)->clt_descr;
- lock = NULL;
-
- need->cld_mode = CLM_READ; /* CLM_READ matches both READ & WRITE, but
- * not PHANTOM
- */
- need->cld_start = need->cld_end = index;
- need->cld_enq_flags = 0;
-
- spin_lock(&head->coh_lock_guard);
- /* It is fine to match any group lock since there could be only one
- * with a uniq gid and it conflicts with all other lock modes too
- */
- list_for_each_entry(scan, &head->coh_locks, cll_linkage) {
- if (scan != except &&
- (scan->cll_descr.cld_mode == CLM_GROUP ||
- cl_lock_ext_match(&scan->cll_descr, need)) &&
- scan->cll_state >= CLS_HELD &&
- scan->cll_state < CLS_FREEING &&
- /*
- * This check is racy as the lock can be canceled right
- * after it is done, but this is fine, because page exists
- * already.
- */
- (canceld || !(scan->cll_flags & CLF_CANCELLED)) &&
- (pending || !(scan->cll_flags & CLF_CANCELPEND))) {
- /* Don't increase cs_hit here since this
- * is just a helper function.
- */
- cl_lock_get_trust(scan);
- lock = scan;
- break;
- }
- }
- spin_unlock(&head->coh_lock_guard);
- return lock;
-}
-EXPORT_SYMBOL(cl_lock_at_pgoff);
-
-/**
- * Calculate the page offset at the layer of @lock.
- * At the time of this writing, @page is top page and @lock is sub lock.
- */
-static pgoff_t pgoff_at_lock(struct cl_page *page, struct cl_lock *lock)
-{
- struct lu_device_type *dtype;
- const struct cl_page_slice *slice;
-
- dtype = lock->cll_descr.cld_obj->co_lu.lo_dev->ld_type;
- slice = cl_page_at(page, dtype);
- return slice->cpl_page->cp_index;
+ return rc;
}
+EXPORT_SYMBOL(cl_lock_enqueue);
/**
- * Check if page @page is covered by an extra lock or discard it.
+ * Main high-level entry point of cl_lock interface that finds existing or
+ * enqueues new lock matching given description.
*/
-static int check_and_discard_cb(const struct lu_env *env, struct cl_io *io,
- struct cl_page *page, void *cbdata)
+int cl_lock_request(const struct lu_env *env, struct cl_io *io,
+ struct cl_lock *lock)
{
- struct cl_thread_info *info = cl_env_info(env);
- struct cl_lock *lock = cbdata;
- pgoff_t index = pgoff_at_lock(page, lock);
+ struct cl_sync_io *anchor = NULL;
+ __u32 enq_flags = lock->cll_descr.cld_enq_flags;
+ int rc;
- if (index >= info->clt_fn_index) {
- struct cl_lock *tmp;
+ rc = cl_lock_init(env, lock, io);
+ if (rc < 0)
+ return rc;
- /* refresh non-overlapped index */
- tmp = cl_lock_at_pgoff(env, lock->cll_descr.cld_obj, index,
- lock, 1, 0);
- if (tmp) {
- /* Cache the first-non-overlapped index so as to skip
- * all pages within [index, clt_fn_index). This
- * is safe because if tmp lock is canceled, it will
- * discard these pages.
- */
- info->clt_fn_index = tmp->cll_descr.cld_end + 1;
- if (tmp->cll_descr.cld_end == CL_PAGE_EOF)
- info->clt_fn_index = CL_PAGE_EOF;
- cl_lock_put(env, tmp);
- } else if (cl_page_own(env, io, page) == 0) {
- /* discard the page */
- cl_page_unmap(env, io, page);
- cl_page_discard(env, io, page);
- cl_page_disown(env, io, page);
- } else {
- LASSERT(page->cp_state == CPS_FREEING);
- }
+ if ((enq_flags & CEF_ASYNC) && !(enq_flags & CEF_AGL)) {
+ anchor = &cl_env_info(env)->clt_anchor;
+ cl_sync_io_init(anchor, 1, cl_sync_io_end);
}
- info->clt_next_index = index + 1;
- return CLP_GANG_OKAY;
-}
+ rc = cl_lock_enqueue(env, io, lock, anchor);
-static int discard_cb(const struct lu_env *env, struct cl_io *io,
- struct cl_page *page, void *cbdata)
-{
- struct cl_thread_info *info = cl_env_info(env);
- struct cl_lock *lock = cbdata;
+ if (anchor) {
+ int rc2;
- LASSERT(lock->cll_descr.cld_mode >= CLM_WRITE);
- KLASSERT(ergo(page->cp_type == CPT_CACHEABLE,
- !PageWriteback(cl_page_vmpage(env, page))));
- KLASSERT(ergo(page->cp_type == CPT_CACHEABLE,
- !PageDirty(cl_page_vmpage(env, page))));
-
- info->clt_next_index = pgoff_at_lock(page, lock) + 1;
- if (cl_page_own(env, io, page) == 0) {
- /* discard the page */
- cl_page_unmap(env, io, page);
- cl_page_discard(env, io, page);
- cl_page_disown(env, io, page);
- } else {
- LASSERT(page->cp_state == CPS_FREEING);
+ /* drop the reference count held at initialization time */
+ cl_sync_io_note(env, anchor, 0);
+ rc2 = cl_sync_io_wait(env, anchor, 0);
+ if (rc2 < 0 && rc == 0)
+ rc = rc2;
}
- return CLP_GANG_OKAY;
-}
+ if (rc < 0)
+ cl_lock_release(env, lock);
-/**
- * Discard pages protected by the given lock. This function traverses radix
- * tree to find all covering pages and discard them. If a page is being covered
- * by other locks, it should remain in cache.
- *
- * If error happens on any step, the process continues anyway (the reasoning
- * behind this being that lock cancellation cannot be delayed indefinitely).
- */
-int cl_lock_discard_pages(const struct lu_env *env, struct cl_lock *lock)
-{
- struct cl_thread_info *info = cl_env_info(env);
- struct cl_io *io = &info->clt_io;
- struct cl_lock_descr *descr = &lock->cll_descr;
- cl_page_gang_cb_t cb;
- int res;
- int result;
-
- LINVRNT(cl_lock_invariant(env, lock));
-
- io->ci_obj = cl_object_top(descr->cld_obj);
- io->ci_ignore_layout = 1;
- result = cl_io_init(env, io, CIT_MISC, io->ci_obj);
- if (result != 0)
- goto out;
-
- cb = descr->cld_mode == CLM_READ ? check_and_discard_cb : discard_cb;
- info->clt_fn_index = info->clt_next_index = descr->cld_start;
- do {
- res = cl_page_gang_lookup(env, descr->cld_obj, io,
- info->clt_next_index, descr->cld_end,
- cb, (void *)lock);
- if (info->clt_next_index > descr->cld_end)
- break;
-
- if (res == CLP_GANG_RESCHED)
- cond_resched();
- } while (res != CLP_GANG_OKAY);
-out:
- cl_io_fini(env, io);
- return result;
-}
-EXPORT_SYMBOL(cl_lock_discard_pages);
-
-/**
- * Eliminate all locks for a given object.
- *
- * Caller has to guarantee that no lock is in active use.
- *
- * \param cancel when this is set, cl_locks_prune() cancels locks before
- * destroying.
- */
-void cl_locks_prune(const struct lu_env *env, struct cl_object *obj, int cancel)
-{
- struct cl_object_header *head;
- struct cl_lock *lock;
-
- head = cl_object_header(obj);
- /*
- * If locks are destroyed without cancellation, all pages must be
- * already destroyed (as otherwise they will be left unprotected).
- */
- LASSERT(ergo(!cancel,
- !head->coh_tree.rnode && head->coh_pages == 0));
-
- spin_lock(&head->coh_lock_guard);
- while (!list_empty(&head->coh_locks)) {
- lock = container_of(head->coh_locks.next,
- struct cl_lock, cll_linkage);
- cl_lock_get_trust(lock);
- spin_unlock(&head->coh_lock_guard);
- lu_ref_add(&lock->cll_reference, "prune", current);
-
-again:
- cl_lock_mutex_get(env, lock);
- if (lock->cll_state < CLS_FREEING) {
- LASSERT(lock->cll_users <= 1);
- if (unlikely(lock->cll_users == 1)) {
- struct l_wait_info lwi = { 0 };
-
- cl_lock_mutex_put(env, lock);
- l_wait_event(lock->cll_wq,
- lock->cll_users == 0,
- &lwi);
- goto again;
- }
-
- if (cancel)
- cl_lock_cancel(env, lock);
- cl_lock_delete(env, lock);
- }
- cl_lock_mutex_put(env, lock);
- lu_ref_del(&lock->cll_reference, "prune", current);
- cl_lock_put(env, lock);
- spin_lock(&head->coh_lock_guard);
- }
- spin_unlock(&head->coh_lock_guard);
-}
-EXPORT_SYMBOL(cl_locks_prune);
-
-static struct cl_lock *cl_lock_hold_mutex(const struct lu_env *env,
- const struct cl_io *io,
- const struct cl_lock_descr *need,
- const char *scope, const void *source)
-{
- struct cl_lock *lock;
-
- while (1) {
- lock = cl_lock_find(env, io, need);
- if (IS_ERR(lock))
- break;
- cl_lock_mutex_get(env, lock);
- if (lock->cll_state < CLS_FREEING &&
- !(lock->cll_flags & CLF_CANCELLED)) {
- cl_lock_hold_mod(env, lock, 1);
- lu_ref_add(&lock->cll_holders, scope, source);
- lu_ref_add(&lock->cll_reference, scope, source);
- break;
- }
- cl_lock_mutex_put(env, lock);
- cl_lock_put(env, lock);
- }
- return lock;
-}
-
-/**
- * Returns a lock matching \a need description with a reference and a hold on
- * it.
- *
- * This is much like cl_lock_find(), except that cl_lock_hold() additionally
- * guarantees that lock is not in the CLS_FREEING state on return.
- */
-struct cl_lock *cl_lock_hold(const struct lu_env *env, const struct cl_io *io,
- const struct cl_lock_descr *need,
- const char *scope, const void *source)
-{
- struct cl_lock *lock;
-
- lock = cl_lock_hold_mutex(env, io, need, scope, source);
- if (!IS_ERR(lock))
- cl_lock_mutex_put(env, lock);
- return lock;
-}
-EXPORT_SYMBOL(cl_lock_hold);
-
-/**
- * Main high-level entry point of cl_lock interface that finds existing or
- * enqueues new lock matching given description.
- */
-struct cl_lock *cl_lock_request(const struct lu_env *env, struct cl_io *io,
- const struct cl_lock_descr *need,
- const char *scope, const void *source)
-{
- struct cl_lock *lock;
- int rc;
- __u32 enqflags = need->cld_enq_flags;
-
- do {
- lock = cl_lock_hold_mutex(env, io, need, scope, source);
- if (IS_ERR(lock))
- break;
-
- rc = cl_enqueue_locked(env, lock, io, enqflags);
- if (rc == 0) {
- if (cl_lock_fits_into(env, lock, need, io)) {
- if (!(enqflags & CEF_AGL)) {
- cl_lock_mutex_put(env, lock);
- cl_lock_lockdep_acquire(env, lock,
- enqflags);
- break;
- }
- rc = 1;
- }
- cl_unuse_locked(env, lock);
- }
- cl_lock_trace(D_DLMTRACE, env,
- rc <= 0 ? "enqueue failed" : "agl succeed", lock);
- cl_lock_hold_release(env, lock, scope, source);
- cl_lock_mutex_put(env, lock);
- lu_ref_del(&lock->cll_reference, scope, source);
- cl_lock_put(env, lock);
- if (rc > 0) {
- LASSERT(enqflags & CEF_AGL);
- lock = NULL;
- } else if (rc != 0) {
- lock = ERR_PTR(rc);
- }
- } while (rc == 0);
- return lock;
+ return rc;
}
EXPORT_SYMBOL(cl_lock_request);
/**
- * Adds a hold to a known lock.
- */
-void cl_lock_hold_add(const struct lu_env *env, struct cl_lock *lock,
- const char *scope, const void *source)
-{
- LINVRNT(cl_lock_is_mutexed(lock));
- LINVRNT(cl_lock_invariant(env, lock));
- LASSERT(lock->cll_state != CLS_FREEING);
-
- cl_lock_hold_mod(env, lock, 1);
- cl_lock_get(lock);
- lu_ref_add(&lock->cll_holders, scope, source);
- lu_ref_add(&lock->cll_reference, scope, source);
-}
-EXPORT_SYMBOL(cl_lock_hold_add);
-
-/**
- * Releases a hold and a reference on a lock, on which caller acquired a
- * mutex.
- */
-void cl_lock_unhold(const struct lu_env *env, struct cl_lock *lock,
- const char *scope, const void *source)
-{
- LINVRNT(cl_lock_invariant(env, lock));
- cl_lock_hold_release(env, lock, scope, source);
- lu_ref_del(&lock->cll_reference, scope, source);
- cl_lock_put(env, lock);
-}
-EXPORT_SYMBOL(cl_lock_unhold);
-
-/**
* Releases a hold and a reference on a lock, obtained by cl_lock_hold().
*/
-void cl_lock_release(const struct lu_env *env, struct cl_lock *lock,
- const char *scope, const void *source)
+void cl_lock_release(const struct lu_env *env, struct cl_lock *lock)
{
- LINVRNT(cl_lock_invariant(env, lock));
cl_lock_trace(D_DLMTRACE, env, "release lock", lock);
- cl_lock_mutex_get(env, lock);
- cl_lock_hold_release(env, lock, scope, source);
- cl_lock_mutex_put(env, lock);
- lu_ref_del(&lock->cll_reference, scope, source);
- cl_lock_put(env, lock);
+ cl_lock_cancel(env, lock);
+ cl_lock_fini(env, lock);
}
EXPORT_SYMBOL(cl_lock_release);
-void cl_lock_user_add(const struct lu_env *env, struct cl_lock *lock)
-{
- LINVRNT(cl_lock_is_mutexed(lock));
- LINVRNT(cl_lock_invariant(env, lock));
-
- cl_lock_used_mod(env, lock, 1);
-}
-EXPORT_SYMBOL(cl_lock_user_add);
-
-void cl_lock_user_del(const struct lu_env *env, struct cl_lock *lock)
-{
- LINVRNT(cl_lock_is_mutexed(lock));
- LINVRNT(cl_lock_invariant(env, lock));
- LASSERT(lock->cll_users > 0);
-
- cl_lock_used_mod(env, lock, -1);
- if (lock->cll_users == 0)
- wake_up_all(&lock->cll_wq);
-}
-EXPORT_SYMBOL(cl_lock_user_del);
-
const char *cl_lock_mode_name(const enum cl_lock_mode mode)
{
static const char *names[] = {
- [CLM_PHANTOM] = "P",
[CLM_READ] = "R",
[CLM_WRITE] = "W",
[CLM_GROUP] = "G"
@@ -2189,10 +257,8 @@ void cl_lock_print(const struct lu_env *env, void *cookie,
lu_printer_t printer, const struct cl_lock *lock)
{
const struct cl_lock_slice *slice;
- (*printer)(env, cookie, "lock@%p[%d %d %d %d %d %08lx] ",
- lock, atomic_read(&lock->cll_ref),
- lock->cll_state, lock->cll_error, lock->cll_holds,
- lock->cll_users, lock->cll_flags);
+
+ (*printer)(env, cookie, "lock@%p", lock);
cl_lock_descr_print(env, cookie, printer, &lock->cll_descr);
(*printer)(env, cookie, " {\n");
@@ -2207,13 +273,3 @@ void cl_lock_print(const struct lu_env *env, void *cookie,
(*printer)(env, cookie, "} lock@%p\n", lock);
}
EXPORT_SYMBOL(cl_lock_print);
-
-int cl_lock_init(void)
-{
- return lu_kmem_init(cl_lock_caches);
-}
-
-void cl_lock_fini(void)
-{
- lu_kmem_fini(cl_lock_caches);
-}
diff --git a/drivers/staging/lustre/lustre/obdclass/cl_object.c b/drivers/staging/lustre/lustre/obdclass/cl_object.c
index 43e299d4d416..91a5806d0239 100644
--- a/drivers/staging/lustre/lustre/obdclass/cl_object.c
+++ b/drivers/staging/lustre/lustre/obdclass/cl_object.c
@@ -15,11 +15,7 @@
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
*
* GPL HEADER END
*/
@@ -36,6 +32,7 @@
* Client Lustre Object.
*
* Author: Nikita Danilov <nikita.danilov@sun.com>
+ * Author: Jinshan Xiong <jinshan.xiong@intel.com>
*/
/*
@@ -43,8 +40,6 @@
*
* i_mutex
* PG_locked
- * ->coh_page_guard
- * ->coh_lock_guard
* ->coh_attr_guard
* ->ls_guard
*/
@@ -63,10 +58,6 @@
static struct kmem_cache *cl_env_kmem;
-/** Lock class of cl_object_header::coh_page_guard */
-static struct lock_class_key cl_page_guard_class;
-/** Lock class of cl_object_header::coh_lock_guard */
-static struct lock_class_key cl_lock_guard_class;
/** Lock class of cl_object_header::coh_attr_guard */
static struct lock_class_key cl_attr_guard_class;
@@ -81,17 +72,9 @@ int cl_object_header_init(struct cl_object_header *h)
result = lu_object_header_init(&h->coh_lu);
if (result == 0) {
- spin_lock_init(&h->coh_page_guard);
- spin_lock_init(&h->coh_lock_guard);
spin_lock_init(&h->coh_attr_guard);
- lockdep_set_class(&h->coh_page_guard, &cl_page_guard_class);
- lockdep_set_class(&h->coh_lock_guard, &cl_lock_guard_class);
lockdep_set_class(&h->coh_attr_guard, &cl_attr_guard_class);
- h->coh_pages = 0;
- /* XXX hard coded GFP_* mask. */
- INIT_RADIX_TREE(&h->coh_tree, GFP_ATOMIC);
- INIT_LIST_HEAD(&h->coh_locks);
- h->coh_page_bufsize = ALIGN(sizeof(struct cl_page), 8);
+ h->coh_page_bufsize = 0;
}
return result;
}
@@ -145,7 +128,7 @@ EXPORT_SYMBOL(cl_object_get);
/**
* Returns the top-object for a given \a o.
*
- * \see cl_page_top(), cl_io_top()
+ * \see cl_io_top()
*/
struct cl_object *cl_object_top(struct cl_object *o)
{
@@ -315,6 +298,29 @@ int cl_conf_set(const struct lu_env *env, struct cl_object *obj,
EXPORT_SYMBOL(cl_conf_set);
/**
+ * Prunes caches of pages and locks for this object.
+ */
+int cl_object_prune(const struct lu_env *env, struct cl_object *obj)
+{
+ struct lu_object_header *top;
+ struct cl_object *o;
+ int result;
+
+ top = obj->co_lu.lo_header;
+ result = 0;
+ list_for_each_entry(o, &top->loh_layers, co_lu.lo_linkage) {
+ if (o->co_ops->coo_prune) {
+ result = o->co_ops->coo_prune(env, o);
+ if (result != 0)
+ break;
+ }
+ }
+
+ return result;
+}
+EXPORT_SYMBOL(cl_object_prune);
+
+/**
* Helper function removing all object locks, and marking object for
* deletion. All object pages must have been deleted at this point.
*
@@ -323,34 +329,12 @@ EXPORT_SYMBOL(cl_conf_set);
*/
void cl_object_kill(const struct lu_env *env, struct cl_object *obj)
{
- struct cl_object_header *hdr;
-
- hdr = cl_object_header(obj);
- LASSERT(!hdr->coh_tree.rnode);
- LASSERT(hdr->coh_pages == 0);
+ struct cl_object_header *hdr = cl_object_header(obj);
set_bit(LU_OBJECT_HEARD_BANSHEE, &hdr->coh_lu.loh_flags);
- /*
- * Destroy all locks. Object destruction (including cl_inode_fini())
- * cannot cancel the locks, because in the case of a local client,
- * where client and server share the same thread running
- * prune_icache(), this can dead-lock with ldlm_cancel_handler()
- * waiting on __wait_on_freeing_inode().
- */
- cl_locks_prune(env, obj, 0);
}
EXPORT_SYMBOL(cl_object_kill);
-/**
- * Prunes caches of pages and locks for this object.
- */
-void cl_object_prune(const struct lu_env *env, struct cl_object *obj)
-{
- cl_pages_prune(env, obj);
- cl_locks_prune(env, obj, 1);
-}
-EXPORT_SYMBOL(cl_object_prune);
-
void cache_stats_init(struct cache_stats *cs, const char *name)
{
int i;
@@ -383,6 +367,8 @@ static int cache_stats_print(const struct cache_stats *cs,
return 0;
}
+static void cl_env_percpu_refill(void);
+
/**
* Initialize client site.
*
@@ -397,11 +383,9 @@ int cl_site_init(struct cl_site *s, struct cl_device *d)
result = lu_site_init(&s->cs_lu, &d->cd_lu_dev);
if (result == 0) {
cache_stats_init(&s->cs_pages, "pages");
- cache_stats_init(&s->cs_locks, "locks");
for (i = 0; i < ARRAY_SIZE(s->cs_pages_state); ++i)
atomic_set(&s->cs_pages_state[0], 0);
- for (i = 0; i < ARRAY_SIZE(s->cs_locks_state); ++i)
- atomic_set(&s->cs_locks_state[i], 0);
+ cl_env_percpu_refill();
}
return result;
}
@@ -435,15 +419,6 @@ int cl_site_stats_print(const struct cl_site *site, struct seq_file *m)
[CPS_PAGEIN] = "r",
[CPS_FREEING] = "f"
};
- static const char *lstate[] = {
- [CLS_NEW] = "n",
- [CLS_QUEUING] = "q",
- [CLS_ENQUEUED] = "e",
- [CLS_HELD] = "h",
- [CLS_INTRANSIT] = "t",
- [CLS_CACHED] = "c",
- [CLS_FREEING] = "f"
- };
/*
lookup hit total busy create
pages: ...... ...... ...... ...... ...... [...... ...... ...... ......]
@@ -457,12 +432,6 @@ locks: ...... ...... ...... ...... ...... [...... ...... ...... ...... ......]
seq_printf(m, "%s: %u ", pstate[i],
atomic_read(&site->cs_pages_state[i]));
seq_printf(m, "]\n");
- cache_stats_print(&site->cs_locks, m, 0);
- seq_printf(m, " [");
- for (i = 0; i < ARRAY_SIZE(site->cs_locks_state); ++i)
- seq_printf(m, "%s: %u ", lstate[i],
- atomic_read(&site->cs_locks_state[i]));
- seq_printf(m, "]\n");
cache_stats_print(&cl_env_stats, m, 0);
seq_printf(m, "\n");
return 0;
@@ -492,6 +461,13 @@ EXPORT_SYMBOL(cl_site_stats_print);
* bz20044, bz22683.
*/
+static LIST_HEAD(cl_envs);
+static unsigned int cl_envs_cached_nr;
+static unsigned int cl_envs_cached_max = 128; /* XXX: prototype: arbitrary limit
+ * for now.
+ */
+static DEFINE_SPINLOCK(cl_envs_guard);
+
struct cl_env {
void *ce_magic;
struct lu_env ce_lu;
@@ -597,7 +573,7 @@ static inline struct cl_env *cl_env_fetch(void)
{
struct cl_env *cle;
- cle = cfs_hash_lookup(cl_env_hash, (void *) (long) current->pid);
+ cle = cfs_hash_lookup(cl_env_hash, (void *)(long)current->pid);
LASSERT(ergo(cle, cle->ce_magic == &cl_env_init0));
return cle;
}
@@ -608,7 +584,7 @@ static inline void cl_env_attach(struct cl_env *cle)
int rc;
LASSERT(!cle->ce_owner);
- cle->ce_owner = (void *) (long) current->pid;
+ cle->ce_owner = (void *)(long)current->pid;
rc = cfs_hash_add_unique(cl_env_hash, cle->ce_owner,
&cle->ce_node);
LASSERT(rc == 0);
@@ -619,7 +595,7 @@ static inline void cl_env_do_detach(struct cl_env *cle)
{
void *cookie;
- LASSERT(cle->ce_owner == (void *) (long) current->pid);
+ LASSERT(cle->ce_owner == (void *)(long)current->pid);
cookie = cfs_hash_del(cl_env_hash, cle->ce_owner,
&cle->ce_node);
LASSERT(cookie == cle);
@@ -674,8 +650,9 @@ static struct lu_env *cl_env_new(__u32 ctx_tags, __u32 ses_tags, void *debug)
lu_context_enter(&cle->ce_ses);
env->le_ses = &cle->ce_ses;
cl_env_init0(cle, debug);
- } else
+ } else {
lu_env_fini(env);
+ }
}
if (rc != 0) {
kmem_cache_free(cl_env_kmem, cle);
@@ -684,8 +661,9 @@ static struct lu_env *cl_env_new(__u32 ctx_tags, __u32 ses_tags, void *debug)
CL_ENV_INC(create);
CL_ENV_INC(total);
}
- } else
+ } else {
env = ERR_PTR(-ENOMEM);
+ }
return env;
}
@@ -697,6 +675,39 @@ static void cl_env_fini(struct cl_env *cle)
kmem_cache_free(cl_env_kmem, cle);
}
+static struct lu_env *cl_env_obtain(void *debug)
+{
+ struct cl_env *cle;
+ struct lu_env *env;
+
+ spin_lock(&cl_envs_guard);
+ LASSERT(equi(cl_envs_cached_nr == 0, list_empty(&cl_envs)));
+ if (cl_envs_cached_nr > 0) {
+ int rc;
+
+ cle = container_of(cl_envs.next, struct cl_env, ce_linkage);
+ list_del_init(&cle->ce_linkage);
+ cl_envs_cached_nr--;
+ spin_unlock(&cl_envs_guard);
+
+ env = &cle->ce_lu;
+ rc = lu_env_refill(env);
+ if (rc == 0) {
+ cl_env_init0(cle, debug);
+ lu_context_enter(&env->le_ctx);
+ lu_context_enter(&cle->ce_ses);
+ } else {
+ cl_env_fini(cle);
+ env = ERR_PTR(rc);
+ }
+ } else {
+ spin_unlock(&cl_envs_guard);
+ env = cl_env_new(lu_context_tags_default,
+ lu_session_tags_default, debug);
+ }
+ return env;
+}
+
static inline struct cl_env *cl_env_container(struct lu_env *env)
{
return container_of(env, struct cl_env, ce_lu);
@@ -727,6 +738,8 @@ static struct lu_env *cl_env_peek(int *refcheck)
* Returns lu_env: if there already is an environment associated with the
* current thread, it is returned, otherwise, new environment is allocated.
*
+ * Allocations are amortized through the global cache of environments.
+ *
* \param refcheck pointer to a counter used to detect environment leaks. In
* the usual case cl_env_get() and cl_env_put() are called in the same lexical
* scope and pointer to the same integer is passed as \a refcheck. This is
@@ -740,10 +753,7 @@ struct lu_env *cl_env_get(int *refcheck)
env = cl_env_peek(refcheck);
if (!env) {
- env = cl_env_new(lu_context_tags_default,
- lu_session_tags_default,
- __builtin_return_address(0));
-
+ env = cl_env_obtain(__builtin_return_address(0));
if (!IS_ERR(env)) {
struct cl_env *cle;
@@ -787,6 +797,32 @@ static void cl_env_exit(struct cl_env *cle)
}
/**
+ * Finalizes and frees a given number of cached environments. This is done to
+ * (1) free some memory (not currently hooked into VM), or (2) release
+ * references to modules.
+ */
+unsigned int cl_env_cache_purge(unsigned int nr)
+{
+ struct cl_env *cle;
+
+ spin_lock(&cl_envs_guard);
+ for (; !list_empty(&cl_envs) && nr > 0; --nr) {
+ cle = container_of(cl_envs.next, struct cl_env, ce_linkage);
+ list_del_init(&cle->ce_linkage);
+ LASSERT(cl_envs_cached_nr > 0);
+ cl_envs_cached_nr--;
+ spin_unlock(&cl_envs_guard);
+
+ cl_env_fini(cle);
+ spin_lock(&cl_envs_guard);
+ }
+ LASSERT(equi(cl_envs_cached_nr == 0, list_empty(&cl_envs)));
+ spin_unlock(&cl_envs_guard);
+ return nr;
+}
+EXPORT_SYMBOL(cl_env_cache_purge);
+
+/**
* Release an environment.
*
* Decrement \a env reference counter. When counter drops to 0, nothing in
@@ -808,7 +844,22 @@ void cl_env_put(struct lu_env *env, int *refcheck)
cl_env_detach(cle);
cle->ce_debug = NULL;
cl_env_exit(cle);
- cl_env_fini(cle);
+ /*
+ * Don't bother to take a lock here.
+ *
+ * Return environment to the cache only when it was allocated
+ * with the standard tags.
+ */
+ if (cl_envs_cached_nr < cl_envs_cached_max &&
+ (env->le_ctx.lc_tags & ~LCT_HAS_EXIT) == LCT_CL_THREAD &&
+ (env->le_ses->lc_tags & ~LCT_HAS_EXIT) == LCT_SESSION) {
+ spin_lock(&cl_envs_guard);
+ list_add(&cle->ce_linkage, &cl_envs);
+ cl_envs_cached_nr++;
+ spin_unlock(&cl_envs_guard);
+ } else {
+ cl_env_fini(cle);
+ }
}
}
EXPORT_SYMBOL(cl_env_put);
@@ -914,6 +965,104 @@ void cl_lvb2attr(struct cl_attr *attr, const struct ost_lvb *lvb)
}
EXPORT_SYMBOL(cl_lvb2attr);
+static struct cl_env cl_env_percpu[NR_CPUS];
+
+static int cl_env_percpu_init(void)
+{
+ struct cl_env *cle;
+ int tags = LCT_REMEMBER | LCT_NOREF;
+ int i, j;
+ int rc = 0;
+
+ for_each_possible_cpu(i) {
+ struct lu_env *env;
+
+ cle = &cl_env_percpu[i];
+ env = &cle->ce_lu;
+
+ INIT_LIST_HEAD(&cle->ce_linkage);
+ cle->ce_magic = &cl_env_init0;
+ rc = lu_env_init(env, LCT_CL_THREAD | tags);
+ if (rc == 0) {
+ rc = lu_context_init(&cle->ce_ses, LCT_SESSION | tags);
+ if (rc == 0) {
+ lu_context_enter(&cle->ce_ses);
+ env->le_ses = &cle->ce_ses;
+ } else {
+ lu_env_fini(env);
+ }
+ }
+ if (rc != 0)
+ break;
+ }
+ if (rc != 0) {
+ /* Indices 0 to i (excluding i) were correctly initialized,
+ * thus we must uninitialize up to i, the rest are undefined.
+ */
+ for (j = 0; j < i; j++) {
+ cle = &cl_env_percpu[i];
+ lu_context_exit(&cle->ce_ses);
+ lu_context_fini(&cle->ce_ses);
+ lu_env_fini(&cle->ce_lu);
+ }
+ }
+
+ return rc;
+}
+
+static void cl_env_percpu_fini(void)
+{
+ int i;
+
+ for_each_possible_cpu(i) {
+ struct cl_env *cle = &cl_env_percpu[i];
+
+ lu_context_exit(&cle->ce_ses);
+ lu_context_fini(&cle->ce_ses);
+ lu_env_fini(&cle->ce_lu);
+ }
+}
+
+static void cl_env_percpu_refill(void)
+{
+ int i;
+
+ for_each_possible_cpu(i)
+ lu_env_refill(&cl_env_percpu[i].ce_lu);
+}
+
+void cl_env_percpu_put(struct lu_env *env)
+{
+ struct cl_env *cle;
+ int cpu;
+
+ cpu = smp_processor_id();
+ cle = cl_env_container(env);
+ LASSERT(cle == &cl_env_percpu[cpu]);
+
+ cle->ce_ref--;
+ LASSERT(cle->ce_ref == 0);
+
+ CL_ENV_DEC(busy);
+ cl_env_detach(cle);
+ cle->ce_debug = NULL;
+
+ put_cpu();
+}
+EXPORT_SYMBOL(cl_env_percpu_put);
+
+struct lu_env *cl_env_percpu_get(void)
+{
+ struct cl_env *cle;
+
+ cle = &cl_env_percpu[get_cpu()];
+ cl_env_init0(cle, __builtin_return_address(0));
+
+ cl_env_attach(cle);
+ return &cle->ce_lu;
+}
+EXPORT_SYMBOL(cl_env_percpu_get);
+
/*****************************************************************************
*
* Temporary prototype thing: mirror obd-devices into cl devices.
@@ -944,8 +1093,9 @@ struct cl_device *cl_type_setup(const struct lu_env *env, struct lu_site *site,
CERROR("can't init device '%s', %d\n", typename, rc);
d = ERR_PTR(rc);
}
- } else
+ } else {
CERROR("Cannot allocate device: '%s'\n", typename);
+ }
return lu2cl_dev(d);
}
EXPORT_SYMBOL(cl_type_setup);
@@ -959,12 +1109,6 @@ void cl_stack_fini(const struct lu_env *env, struct cl_device *cl)
}
EXPORT_SYMBOL(cl_stack_fini);
-int cl_lock_init(void);
-void cl_lock_fini(void);
-
-int cl_page_init(void);
-void cl_page_fini(void);
-
static struct lu_context_key cl_key;
struct cl_thread_info *cl_env_info(const struct lu_env *env)
@@ -1059,17 +1203,13 @@ int cl_global_init(void)
if (result)
goto out_kmem;
- result = cl_lock_init();
+ result = cl_env_percpu_init();
if (result)
+ /* no cl_env_percpu_fini on error */
goto out_context;
- result = cl_page_init();
- if (result)
- goto out_lock;
-
return 0;
-out_lock:
- cl_lock_fini();
+
out_context:
lu_context_key_degister(&cl_key);
out_kmem:
@@ -1084,8 +1224,7 @@ out_store:
*/
void cl_global_fini(void)
{
- cl_lock_fini();
- cl_page_fini();
+ cl_env_percpu_fini();
lu_context_key_degister(&cl_key);
lu_kmem_fini(cl_object_caches);
cl_env_store_fini();
diff --git a/drivers/staging/lustre/lustre/obdclass/cl_page.c b/drivers/staging/lustre/lustre/obdclass/cl_page.c
index 394580016638..db2dc6b39073 100644
--- a/drivers/staging/lustre/lustre/obdclass/cl_page.c
+++ b/drivers/staging/lustre/lustre/obdclass/cl_page.c
@@ -15,11 +15,7 @@
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
*
* GPL HEADER END
*/
@@ -36,6 +32,7 @@
* Client Lustre Page.
*
* Author: Nikita Danilov <nikita.danilov@sun.com>
+ * Author: Jinshan Xiong <jinshan.xiong@intel.com>
*/
#define DEBUG_SUBSYSTEM S_CLASS
@@ -48,8 +45,7 @@
#include "../include/cl_object.h"
#include "cl_internal.h"
-static void cl_page_delete0(const struct lu_env *env, struct cl_page *pg,
- int radix);
+static void cl_page_delete0(const struct lu_env *env, struct cl_page *pg);
# define PASSERT(env, page, expr) \
do { \
@@ -63,24 +59,11 @@ static void cl_page_delete0(const struct lu_env *env, struct cl_page *pg,
((void)sizeof(env), (void)sizeof(page), (void)sizeof !!(exp))
/**
- * Internal version of cl_page_top, it should be called if the page is
- * known to be not freed, says with page referenced, or radix tree lock held,
- * or page owned.
- */
-static struct cl_page *cl_page_top_trusted(struct cl_page *page)
-{
- while (page->cp_parent)
- page = page->cp_parent;
- return page;
-}
-
-/**
* Internal version of cl_page_get().
*
* This function can be used to obtain initial reference to previously
* unreferenced cached object. It can be called only if concurrent page
- * reclamation is somehow prevented, e.g., by locking page radix-tree
- * (cl_object_header::hdr->coh_page_guard), or by keeping a lock on a VM page,
+ * reclamation is somehow prevented, e.g., by keeping a lock on a VM page,
* associated with \a page.
*
* Use with care! Not exported.
@@ -103,142 +86,12 @@ cl_page_at_trusted(const struct cl_page *page,
{
const struct cl_page_slice *slice;
- page = cl_page_top_trusted((struct cl_page *)page);
- do {
- list_for_each_entry(slice, &page->cp_layers, cpl_linkage) {
- if (slice->cpl_obj->co_lu.lo_dev->ld_type == dtype)
- return slice;
- }
- page = page->cp_child;
- } while (page);
- return NULL;
-}
-
-/**
- * Returns a page with given index in the given object, or NULL if no page is
- * found. Acquires a reference on \a page.
- *
- * Locking: called under cl_object_header::coh_page_guard spin-lock.
- */
-struct cl_page *cl_page_lookup(struct cl_object_header *hdr, pgoff_t index)
-{
- struct cl_page *page;
-
- assert_spin_locked(&hdr->coh_page_guard);
-
- page = radix_tree_lookup(&hdr->coh_tree, index);
- if (page)
- cl_page_get_trust(page);
- return page;
-}
-EXPORT_SYMBOL(cl_page_lookup);
-
-/**
- * Returns a list of pages by a given [start, end] of \a obj.
- *
- * \param resched If not NULL, then we give up before hogging CPU for too
- * long and set *resched = 1, in that case caller should implement a retry
- * logic.
- *
- * Gang tree lookup (radix_tree_gang_lookup()) optimization is absolutely
- * crucial in the face of [offset, EOF] locks.
- *
- * Return at least one page in @queue unless there is no covered page.
- */
-int cl_page_gang_lookup(const struct lu_env *env, struct cl_object *obj,
- struct cl_io *io, pgoff_t start, pgoff_t end,
- cl_page_gang_cb_t cb, void *cbdata)
-{
- struct cl_object_header *hdr;
- struct cl_page *page;
- struct cl_page **pvec;
- const struct cl_page_slice *slice;
- const struct lu_device_type *dtype;
- pgoff_t idx;
- unsigned int nr;
- unsigned int i;
- unsigned int j;
- int res = CLP_GANG_OKAY;
- int tree_lock = 1;
-
- idx = start;
- hdr = cl_object_header(obj);
- pvec = cl_env_info(env)->clt_pvec;
- dtype = cl_object_top(obj)->co_lu.lo_dev->ld_type;
- spin_lock(&hdr->coh_page_guard);
- while ((nr = radix_tree_gang_lookup(&hdr->coh_tree, (void **)pvec,
- idx, CLT_PVEC_SIZE)) > 0) {
- int end_of_region = 0;
-
- idx = pvec[nr - 1]->cp_index + 1;
- for (i = 0, j = 0; i < nr; ++i) {
- page = pvec[i];
- pvec[i] = NULL;
-
- LASSERT(page->cp_type == CPT_CACHEABLE);
- if (page->cp_index > end) {
- end_of_region = 1;
- break;
- }
- if (page->cp_state == CPS_FREEING)
- continue;
-
- slice = cl_page_at_trusted(page, dtype);
- /*
- * Pages for lsm-less file has no underneath sub-page
- * for osc, in case of ...
- */
- PASSERT(env, page, slice);
-
- page = slice->cpl_page;
- /*
- * Can safely call cl_page_get_trust() under
- * radix-tree spin-lock.
- *
- * XXX not true, because @page is from object another
- * than @hdr and protected by different tree lock.
- */
- cl_page_get_trust(page);
- lu_ref_add_atomic(&page->cp_reference,
- "gang_lookup", current);
- pvec[j++] = page;
- }
-
- /*
- * Here a delicate locking dance is performed. Current thread
- * holds a reference to a page, but has to own it before it
- * can be placed into queue. Owning implies waiting, so
- * radix-tree lock is to be released. After a wait one has to
- * check that pages weren't truncated (cl_page_own() returns
- * error in the latter case).
- */
- spin_unlock(&hdr->coh_page_guard);
- tree_lock = 0;
-
- for (i = 0; i < j; ++i) {
- page = pvec[i];
- if (res == CLP_GANG_OKAY)
- res = (*cb)(env, io, page, cbdata);
- lu_ref_del(&page->cp_reference,
- "gang_lookup", current);
- cl_page_put(env, page);
- }
- if (nr < CLT_PVEC_SIZE || end_of_region)
- break;
-
- if (res == CLP_GANG_OKAY && need_resched())
- res = CLP_GANG_RESCHED;
- if (res != CLP_GANG_OKAY)
- break;
-
- spin_lock(&hdr->coh_page_guard);
- tree_lock = 1;
+ list_for_each_entry(slice, &page->cp_layers, cpl_linkage) {
+ if (slice->cpl_obj->co_lu.lo_dev->ld_type == dtype)
+ return slice;
}
- if (tree_lock)
- spin_unlock(&hdr->coh_page_guard);
- return res;
+ return NULL;
}
-EXPORT_SYMBOL(cl_page_gang_lookup);
static void cl_page_free(const struct lu_env *env, struct cl_page *page)
{
@@ -247,17 +100,16 @@ static void cl_page_free(const struct lu_env *env, struct cl_page *page)
PASSERT(env, page, list_empty(&page->cp_batch));
PASSERT(env, page, !page->cp_owner);
PASSERT(env, page, !page->cp_req);
- PASSERT(env, page, !page->cp_parent);
PASSERT(env, page, page->cp_state == CPS_FREEING);
- might_sleep();
while (!list_empty(&page->cp_layers)) {
struct cl_page_slice *slice;
slice = list_entry(page->cp_layers.next,
struct cl_page_slice, cpl_linkage);
list_del_init(page->cp_layers.next);
- slice->cpl_ops->cpo_fini(env, slice);
+ if (unlikely(slice->cpl_ops->cpo_fini))
+ slice->cpl_ops->cpo_fini(env, slice);
}
lu_object_ref_del_at(&obj->co_lu, &page->cp_obj_ref, "cl_page", page);
cl_object_put(env, obj);
@@ -276,10 +128,10 @@ static inline void cl_page_state_set_trust(struct cl_page *page,
*(enum cl_page_state *)&page->cp_state = state;
}
-static struct cl_page *cl_page_alloc(const struct lu_env *env,
- struct cl_object *o, pgoff_t ind,
- struct page *vmpage,
- enum cl_page_type type)
+struct cl_page *cl_page_alloc(const struct lu_env *env,
+ struct cl_object *o, pgoff_t ind,
+ struct page *vmpage,
+ enum cl_page_type type)
{
struct cl_page *page;
struct lu_object_header *head;
@@ -289,13 +141,11 @@ static struct cl_page *cl_page_alloc(const struct lu_env *env,
int result = 0;
atomic_set(&page->cp_ref, 1);
- if (type == CPT_CACHEABLE) /* for radix tree */
- atomic_inc(&page->cp_ref);
page->cp_obj = o;
cl_object_get(o);
lu_object_ref_add_at(&o->co_lu, &page->cp_obj_ref, "cl_page",
page);
- page->cp_index = ind;
+ page->cp_vmpage = vmpage;
cl_page_state_set_trust(page, CPS_CACHED);
page->cp_type = type;
INIT_LIST_HEAD(&page->cp_layers);
@@ -306,10 +156,10 @@ static struct cl_page *cl_page_alloc(const struct lu_env *env,
head = o->co_lu.lo_header;
list_for_each_entry(o, &head->loh_layers, co_lu.lo_linkage) {
if (o->co_ops->coo_page_init) {
- result = o->co_ops->coo_page_init(env, o,
- page, vmpage);
+ result = o->co_ops->coo_page_init(env, o, page,
+ ind);
if (result != 0) {
- cl_page_delete0(env, page, 0);
+ cl_page_delete0(env, page);
cl_page_free(env, page);
page = ERR_PTR(result);
break;
@@ -321,6 +171,7 @@ static struct cl_page *cl_page_alloc(const struct lu_env *env,
}
return page;
}
+EXPORT_SYMBOL(cl_page_alloc);
/**
* Returns a cl_page with index \a idx at the object \a o, and associated with
@@ -333,16 +184,13 @@ static struct cl_page *cl_page_alloc(const struct lu_env *env,
*
* \see cl_object_find(), cl_lock_find()
*/
-static struct cl_page *cl_page_find0(const struct lu_env *env,
- struct cl_object *o,
- pgoff_t idx, struct page *vmpage,
- enum cl_page_type type,
- struct cl_page *parent)
+struct cl_page *cl_page_find(const struct lu_env *env,
+ struct cl_object *o,
+ pgoff_t idx, struct page *vmpage,
+ enum cl_page_type type)
{
struct cl_page *page = NULL;
- struct cl_page *ghost = NULL;
struct cl_object_header *hdr;
- int err;
LASSERT(type == CPT_CACHEABLE || type == CPT_TRANSIENT);
might_sleep();
@@ -368,120 +216,25 @@ static struct cl_page *cl_page_find0(const struct lu_env *env,
* reference on it.
*/
page = cl_vmpage_page(vmpage, o);
- PINVRNT(env, page,
- ergo(page,
- cl_page_vmpage(env, page) == vmpage &&
- (void *)radix_tree_lookup(&hdr->coh_tree,
- idx) == page));
- }
- if (page)
- return page;
+ if (page)
+ return page;
+ }
/* allocate and initialize cl_page */
page = cl_page_alloc(env, o, idx, vmpage, type);
- if (IS_ERR(page))
- return page;
-
- if (type == CPT_TRANSIENT) {
- if (parent) {
- LASSERT(!page->cp_parent);
- page->cp_parent = parent;
- parent->cp_child = page;
- }
- return page;
- }
-
- /*
- * XXX optimization: use radix_tree_preload() here, and change tree
- * gfp mask to GFP_KERNEL in cl_object_header_init().
- */
- spin_lock(&hdr->coh_page_guard);
- err = radix_tree_insert(&hdr->coh_tree, idx, page);
- if (err != 0) {
- ghost = page;
- /*
- * Noted by Jay: a lock on \a vmpage protects cl_page_find()
- * from this race, but
- *
- * 0. it's better to have cl_page interface "locally
- * consistent" so that its correctness can be reasoned
- * about without appealing to the (obscure world of) VM
- * locking.
- *
- * 1. handling this race allows ->coh_tree to remain
- * consistent even when VM locking is somehow busted,
- * which is very useful during diagnosing and debugging.
- */
- page = ERR_PTR(err);
- CL_PAGE_DEBUG(D_ERROR, env, ghost,
- "fail to insert into radix tree: %d\n", err);
- } else {
- if (parent) {
- LASSERT(!page->cp_parent);
- page->cp_parent = parent;
- parent->cp_child = page;
- }
- hdr->coh_pages++;
- }
- spin_unlock(&hdr->coh_page_guard);
-
- if (unlikely(ghost)) {
- cl_page_delete0(env, ghost, 0);
- cl_page_free(env, ghost);
- }
return page;
}
-
-struct cl_page *cl_page_find(const struct lu_env *env, struct cl_object *o,
- pgoff_t idx, struct page *vmpage,
- enum cl_page_type type)
-{
- return cl_page_find0(env, o, idx, vmpage, type, NULL);
-}
EXPORT_SYMBOL(cl_page_find);
-struct cl_page *cl_page_find_sub(const struct lu_env *env, struct cl_object *o,
- pgoff_t idx, struct page *vmpage,
- struct cl_page *parent)
-{
- return cl_page_find0(env, o, idx, vmpage, parent->cp_type, parent);
-}
-EXPORT_SYMBOL(cl_page_find_sub);
-
static inline int cl_page_invariant(const struct cl_page *pg)
{
- struct cl_object_header *header;
- struct cl_page *parent;
- struct cl_page *child;
- struct cl_io *owner;
-
/*
* Page invariant is protected by a VM lock.
*/
LINVRNT(cl_page_is_vmlocked(NULL, pg));
- header = cl_object_header(pg->cp_obj);
- parent = pg->cp_parent;
- child = pg->cp_child;
- owner = pg->cp_owner;
-
- return cl_page_in_use(pg) &&
- ergo(parent, parent->cp_child == pg) &&
- ergo(child, child->cp_parent == pg) &&
- ergo(child, pg->cp_obj != child->cp_obj) &&
- ergo(parent, pg->cp_obj != parent->cp_obj) &&
- ergo(owner && parent,
- parent->cp_owner == pg->cp_owner->ci_parent) &&
- ergo(owner && child, child->cp_owner->ci_parent == owner) &&
- /*
- * Either page is early in initialization (has neither child
- * nor parent yet), or it is in the object radix tree.
- */
- ergo(pg->cp_state < CPS_FREEING && pg->cp_type == CPT_CACHEABLE,
- (void *)radix_tree_lookup(&header->coh_tree,
- pg->cp_index) == pg ||
- (!child && !parent));
+ return cl_page_in_use_noref(pg);
}
static void cl_page_state_set0(const struct lu_env *env,
@@ -534,13 +287,9 @@ static void cl_page_state_set0(const struct lu_env *env,
old = page->cp_state;
PASSERT(env, page, allowed_transitions[old][state]);
CL_PAGE_HEADER(D_TRACE, env, page, "%d -> %d\n", old, state);
- for (; page; page = page->cp_child) {
- PASSERT(env, page, page->cp_state == old);
- PASSERT(env, page,
- equi(state == CPS_OWNED, page->cp_owner));
-
- cl_page_state_set_trust(page, state);
- }
+ PASSERT(env, page, page->cp_state == old);
+ PASSERT(env, page, equi(state == CPS_OWNED, page->cp_owner));
+ cl_page_state_set_trust(page, state);
}
static void cl_page_state_set(const struct lu_env *env,
@@ -574,8 +323,6 @@ EXPORT_SYMBOL(cl_page_get);
*/
void cl_page_put(const struct lu_env *env, struct cl_page *page)
{
- PASSERT(env, page, atomic_read(&page->cp_ref) > !!page->cp_parent);
-
CL_PAGE_HEADER(D_TRACE, env, page, "%d\n",
atomic_read(&page->cp_ref));
@@ -595,34 +342,10 @@ void cl_page_put(const struct lu_env *env, struct cl_page *page)
EXPORT_SYMBOL(cl_page_put);
/**
- * Returns a VM page associated with a given cl_page.
- */
-struct page *cl_page_vmpage(const struct lu_env *env, struct cl_page *page)
-{
- const struct cl_page_slice *slice;
-
- /*
- * Find uppermost layer with ->cpo_vmpage() method, and return its
- * result.
- */
- page = cl_page_top(page);
- do {
- list_for_each_entry(slice, &page->cp_layers, cpl_linkage) {
- if (slice->cpl_ops->cpo_vmpage)
- return slice->cpl_ops->cpo_vmpage(env, slice);
- }
- page = page->cp_child;
- } while (page);
- LBUG(); /* ->cpo_vmpage() has to be defined somewhere in the stack */
-}
-EXPORT_SYMBOL(cl_page_vmpage);
-
-/**
* Returns a cl_page associated with a VM page, and given cl_object.
*/
struct cl_page *cl_vmpage_page(struct page *vmpage, struct cl_object *obj)
{
- struct cl_page *top;
struct cl_page *page;
KLASSERT(PageLocked(vmpage));
@@ -633,36 +356,15 @@ struct cl_page *cl_vmpage_page(struct page *vmpage, struct cl_object *obj)
* bottom-to-top pass.
*/
- /*
- * This loop assumes that ->private points to the top-most page. This
- * can be rectified easily.
- */
- top = (struct cl_page *)vmpage->private;
- if (!top)
- return NULL;
-
- for (page = top; page; page = page->cp_child) {
- if (cl_object_same(page->cp_obj, obj)) {
- cl_page_get_trust(page);
- break;
- }
+ page = (struct cl_page *)vmpage->private;
+ if (page) {
+ cl_page_get_trust(page);
+ LASSERT(page->cp_type == CPT_CACHEABLE);
}
- LASSERT(ergo(page, page->cp_type == CPT_CACHEABLE));
return page;
}
EXPORT_SYMBOL(cl_vmpage_page);
-/**
- * Returns the top-page for a given page.
- *
- * \see cl_object_top(), cl_io_top()
- */
-struct cl_page *cl_page_top(struct cl_page *page)
-{
- return cl_page_top_trusted(page);
-}
-EXPORT_SYMBOL(cl_page_top);
-
const struct cl_page_slice *cl_page_at(const struct cl_page *page,
const struct lu_device_type *dtype)
{
@@ -682,26 +384,43 @@ EXPORT_SYMBOL(cl_page_at);
int (*__method)_proto; \
\
__result = 0; \
- __page = cl_page_top(__page); \
- do { \
- list_for_each_entry(__scan, &__page->cp_layers, \
- cpl_linkage) { \
- __method = *(void **)((char *)__scan->cpl_ops + \
- __op); \
- if (__method) { \
- __result = (*__method)(__env, __scan, \
- ## __VA_ARGS__); \
- if (__result != 0) \
- break; \
- } \
- } \
- __page = __page->cp_child; \
- } while (__page && __result == 0); \
+ list_for_each_entry(__scan, &__page->cp_layers, cpl_linkage) { \
+ __method = *(void **)((char *)__scan->cpl_ops + __op); \
+ if (__method) { \
+ __result = (*__method)(__env, __scan, ## __VA_ARGS__); \
+ if (__result != 0) \
+ break; \
+ } \
+ } \
if (__result > 0) \
__result = 0; \
__result; \
})
+#define CL_PAGE_INVOKE_REVERSE(_env, _page, _op, _proto, ...) \
+({ \
+ const struct lu_env *__env = (_env); \
+ struct cl_page *__page = (_page); \
+ const struct cl_page_slice *__scan; \
+ int __result; \
+ ptrdiff_t __op = (_op); \
+ int (*__method)_proto; \
+ \
+ __result = 0; \
+ list_for_each_entry_reverse(__scan, &__page->cp_layers, \
+ cpl_linkage) { \
+ __method = *(void **)((char *)__scan->cpl_ops + __op); \
+ if (__method) { \
+ __result = (*__method)(__env, __scan, ## __VA_ARGS__); \
+ if (__result != 0) \
+ break; \
+ } \
+ } \
+ if (__result > 0) \
+ __result = 0; \
+ __result; \
+})
+
#define CL_PAGE_INVOID(_env, _page, _op, _proto, ...) \
do { \
const struct lu_env *__env = (_env); \
@@ -710,18 +429,11 @@ do { \
ptrdiff_t __op = (_op); \
void (*__method)_proto; \
\
- __page = cl_page_top(__page); \
- do { \
- list_for_each_entry(__scan, &__page->cp_layers, \
- cpl_linkage) { \
- __method = *(void **)((char *)__scan->cpl_ops + \
- __op); \
- if (__method) \
- (*__method)(__env, __scan, \
- ## __VA_ARGS__); \
- } \
- __page = __page->cp_child; \
- } while (__page); \
+ list_for_each_entry(__scan, &__page->cp_layers, cpl_linkage) { \
+ __method = *(void **)((char *)__scan->cpl_ops + __op); \
+ if (__method) \
+ (*__method)(__env, __scan, ## __VA_ARGS__); \
+ } \
} while (0)
#define CL_PAGE_INVOID_REVERSE(_env, _page, _op, _proto, ...) \
@@ -732,20 +444,11 @@ do { \
ptrdiff_t __op = (_op); \
void (*__method)_proto; \
\
- /* get to the bottom page. */ \
- while (__page->cp_child) \
- __page = __page->cp_child; \
- do { \
- list_for_each_entry_reverse(__scan, &__page->cp_layers, \
- cpl_linkage) { \
- __method = *(void **)((char *)__scan->cpl_ops + \
- __op); \
- if (__method) \
- (*__method)(__env, __scan, \
- ## __VA_ARGS__); \
- } \
- __page = __page->cp_parent; \
- } while (__page); \
+ list_for_each_entry_reverse(__scan, &__page->cp_layers, cpl_linkage) { \
+ __method = *(void **)((char *)__scan->cpl_ops + __op); \
+ if (__method) \
+ (*__method)(__env, __scan, ## __VA_ARGS__); \
+ } \
} while (0)
static int cl_page_invoke(const struct lu_env *env,
@@ -771,20 +474,17 @@ static void cl_page_invoid(const struct lu_env *env,
static void cl_page_owner_clear(struct cl_page *page)
{
- for (page = cl_page_top(page); page; page = page->cp_child) {
- if (page->cp_owner) {
- LASSERT(page->cp_owner->ci_owned_nr > 0);
- page->cp_owner->ci_owned_nr--;
- page->cp_owner = NULL;
- page->cp_task = NULL;
- }
+ if (page->cp_owner) {
+ LASSERT(page->cp_owner->ci_owned_nr > 0);
+ page->cp_owner->ci_owned_nr--;
+ page->cp_owner = NULL;
+ page->cp_task = NULL;
}
}
static void cl_page_owner_set(struct cl_page *page)
{
- for (page = cl_page_top(page); page; page = page->cp_child)
- page->cp_owner->ci_owned_nr++;
+ page->cp_owner->ci_owned_nr++;
}
void cl_page_disown0(const struct lu_env *env,
@@ -794,7 +494,7 @@ void cl_page_disown0(const struct lu_env *env,
state = pg->cp_state;
PINVRNT(env, pg, state == CPS_OWNED || state == CPS_FREEING);
- PINVRNT(env, pg, cl_page_invariant(pg));
+ PINVRNT(env, pg, cl_page_invariant(pg) || state == CPS_FREEING);
cl_page_owner_clear(pg);
if (state == CPS_OWNED)
@@ -815,8 +515,9 @@ void cl_page_disown0(const struct lu_env *env,
*/
int cl_page_is_owned(const struct cl_page *pg, const struct cl_io *io)
{
+ struct cl_io *top = cl_io_top((struct cl_io *)io);
LINVRNT(cl_object_same(pg->cp_obj, io->ci_obj));
- return pg->cp_state == CPS_OWNED && pg->cp_owner == io;
+ return pg->cp_state == CPS_OWNED && pg->cp_owner == top;
}
EXPORT_SYMBOL(cl_page_is_owned);
@@ -847,7 +548,6 @@ static int cl_page_own0(const struct lu_env *env, struct cl_io *io,
PINVRNT(env, pg, !cl_page_is_owned(pg, io));
- pg = cl_page_top(pg);
io = cl_io_top(io);
if (pg->cp_state == CPS_FREEING) {
@@ -861,7 +561,7 @@ static int cl_page_own0(const struct lu_env *env, struct cl_io *io,
if (result == 0) {
PASSERT(env, pg, !pg->cp_owner);
PASSERT(env, pg, !pg->cp_req);
- pg->cp_owner = io;
+ pg->cp_owner = cl_io_top(io);
pg->cp_task = current;
cl_page_owner_set(pg);
if (pg->cp_state != CPS_FREEING) {
@@ -914,12 +614,11 @@ void cl_page_assume(const struct lu_env *env,
{
PINVRNT(env, pg, cl_object_same(pg->cp_obj, io->ci_obj));
- pg = cl_page_top(pg);
io = cl_io_top(io);
cl_page_invoid(env, io, pg, CL_PAGE_OP(cpo_assume));
PASSERT(env, pg, !pg->cp_owner);
- pg->cp_owner = io;
+ pg->cp_owner = cl_io_top(io);
pg->cp_task = current;
cl_page_owner_set(pg);
cl_page_state_set(env, pg, CPS_OWNED);
@@ -943,7 +642,6 @@ void cl_page_unassume(const struct lu_env *env,
PINVRNT(env, pg, cl_page_is_owned(pg, io));
PINVRNT(env, pg, cl_page_invariant(pg));
- pg = cl_page_top(pg);
io = cl_io_top(io);
cl_page_owner_clear(pg);
cl_page_state_set(env, pg, CPS_CACHED);
@@ -968,9 +666,9 @@ EXPORT_SYMBOL(cl_page_unassume);
void cl_page_disown(const struct lu_env *env,
struct cl_io *io, struct cl_page *pg)
{
- PINVRNT(env, pg, cl_page_is_owned(pg, io));
+ PINVRNT(env, pg, cl_page_is_owned(pg, io) ||
+ pg->cp_state == CPS_FREEING);
- pg = cl_page_top(pg);
io = cl_io_top(io);
cl_page_disown0(env, io, pg);
}
@@ -1001,12 +699,8 @@ EXPORT_SYMBOL(cl_page_discard);
* pages, e.g,. in a error handling cl_page_find()->cl_page_delete0()
* path. Doesn't check page invariant.
*/
-static void cl_page_delete0(const struct lu_env *env, struct cl_page *pg,
- int radix)
+static void cl_page_delete0(const struct lu_env *env, struct cl_page *pg)
{
- struct cl_page *tmp = pg;
-
- PASSERT(env, pg, pg == cl_page_top(pg));
PASSERT(env, pg, pg->cp_state != CPS_FREEING);
/*
@@ -1014,41 +708,11 @@ static void cl_page_delete0(const struct lu_env *env, struct cl_page *pg,
*/
cl_page_owner_clear(pg);
- /*
- * unexport the page firstly before freeing it so that
- * the page content is considered to be invalid.
- * We have to do this because a CPS_FREEING cl_page may
- * be NOT under the protection of a cl_lock.
- * Afterwards, if this page is found by other threads, then this
- * page will be forced to reread.
- */
- cl_page_export(env, pg, 0);
cl_page_state_set0(env, pg, CPS_FREEING);
- CL_PAGE_INVOID(env, pg, CL_PAGE_OP(cpo_delete),
- (const struct lu_env *, const struct cl_page_slice *));
-
- if (tmp->cp_type == CPT_CACHEABLE) {
- if (!radix)
- /* !radix means that @pg is not yet in the radix tree,
- * skip removing it.
- */
- tmp = pg->cp_child;
- for (; tmp; tmp = tmp->cp_child) {
- void *value;
- struct cl_object_header *hdr;
-
- hdr = cl_object_header(tmp->cp_obj);
- spin_lock(&hdr->coh_page_guard);
- value = radix_tree_delete(&hdr->coh_tree,
- tmp->cp_index);
- PASSERT(env, tmp, value == tmp);
- PASSERT(env, tmp, hdr->coh_pages > 0);
- hdr->coh_pages--;
- spin_unlock(&hdr->coh_page_guard);
- cl_page_put(env, tmp);
- }
- }
+ CL_PAGE_INVOID_REVERSE(env, pg, CL_PAGE_OP(cpo_delete),
+ (const struct lu_env *,
+ const struct cl_page_slice *));
}
/**
@@ -1070,7 +734,6 @@ static void cl_page_delete0(const struct lu_env *env, struct cl_page *pg,
* Once page reaches cl_page_state::CPS_FREEING, all remaining references will
* drain after some time, at which point page will be recycled.
*
- * \pre pg == cl_page_top(pg)
* \pre VM page is locked
* \post pg->cp_state == CPS_FREEING
*
@@ -1079,30 +742,11 @@ static void cl_page_delete0(const struct lu_env *env, struct cl_page *pg,
void cl_page_delete(const struct lu_env *env, struct cl_page *pg)
{
PINVRNT(env, pg, cl_page_invariant(pg));
- cl_page_delete0(env, pg, 1);
+ cl_page_delete0(env, pg);
}
EXPORT_SYMBOL(cl_page_delete);
/**
- * Unmaps page from user virtual memory.
- *
- * Calls cl_page_operations::cpo_unmap() through all layers top-to-bottom. The
- * layer responsible for VM interaction has to unmap page from user space
- * virtual memory.
- *
- * \see cl_page_operations::cpo_unmap()
- */
-int cl_page_unmap(const struct lu_env *env,
- struct cl_io *io, struct cl_page *pg)
-{
- PINVRNT(env, pg, cl_page_is_owned(pg, io));
- PINVRNT(env, pg, cl_page_invariant(pg));
-
- return cl_page_invoke(env, io, pg, CL_PAGE_OP(cpo_unmap));
-}
-EXPORT_SYMBOL(cl_page_unmap);
-
-/**
* Marks page up-to-date.
*
* Call cl_page_operations::cpo_export() through all layers top-to-bottom. The
@@ -1129,7 +773,6 @@ int cl_page_is_vmlocked(const struct lu_env *env, const struct cl_page *pg)
int result;
const struct cl_page_slice *slice;
- pg = cl_page_top_trusted((struct cl_page *)pg);
slice = container_of(pg->cp_layers.next,
const struct cl_page_slice, cpl_linkage);
PASSERT(env, pg, slice->cpl_ops->cpo_is_vmlocked);
@@ -1241,7 +884,7 @@ void cl_page_completion(const struct lu_env *env,
cl_page_put(env, pg);
if (anchor)
- cl_sync_io_note(anchor, ioret);
+ cl_sync_io_note(env, anchor, ioret);
}
EXPORT_SYMBOL(cl_page_completion);
@@ -1276,44 +919,6 @@ int cl_page_make_ready(const struct lu_env *env, struct cl_page *pg,
EXPORT_SYMBOL(cl_page_make_ready);
/**
- * Notify layers that high level io decided to place this page into a cache
- * for future transfer.
- *
- * The layer implementing transfer engine (osc) has to register this page in
- * its queues.
- *
- * \pre cl_page_is_owned(pg, io)
- * \post cl_page_is_owned(pg, io)
- *
- * \see cl_page_operations::cpo_cache_add()
- */
-int cl_page_cache_add(const struct lu_env *env, struct cl_io *io,
- struct cl_page *pg, enum cl_req_type crt)
-{
- const struct cl_page_slice *scan;
- int result = 0;
-
- PINVRNT(env, pg, crt < CRT_NR);
- PINVRNT(env, pg, cl_page_is_owned(pg, io));
- PINVRNT(env, pg, cl_page_invariant(pg));
-
- if (crt >= CRT_NR)
- return -EINVAL;
-
- list_for_each_entry(scan, &pg->cp_layers, cpl_linkage) {
- if (!scan->cpl_ops->io[crt].cpo_cache_add)
- continue;
-
- result = scan->cpl_ops->io[crt].cpo_cache_add(env, scan, io);
- if (result != 0)
- break;
- }
- CL_PAGE_HEADER(D_TRACE, env, pg, "%d %d\n", crt, result);
- return result;
-}
-EXPORT_SYMBOL(cl_page_cache_add);
-
-/**
* Called if a pge is being written back by kernel's intention.
*
* \pre cl_page_is_owned(pg, io)
@@ -1344,68 +949,21 @@ EXPORT_SYMBOL(cl_page_flush);
* \see cl_page_operations::cpo_is_under_lock()
*/
int cl_page_is_under_lock(const struct lu_env *env, struct cl_io *io,
- struct cl_page *page)
+ struct cl_page *page, pgoff_t *max_index)
{
int rc;
PINVRNT(env, page, cl_page_invariant(page));
- rc = CL_PAGE_INVOKE(env, page, CL_PAGE_OP(cpo_is_under_lock),
- (const struct lu_env *,
- const struct cl_page_slice *, struct cl_io *),
- io);
- PASSERT(env, page, rc != 0);
+ rc = CL_PAGE_INVOKE_REVERSE(env, page, CL_PAGE_OP(cpo_is_under_lock),
+ (const struct lu_env *,
+ const struct cl_page_slice *,
+ struct cl_io *, pgoff_t *),
+ io, max_index);
return rc;
}
EXPORT_SYMBOL(cl_page_is_under_lock);
-static int page_prune_cb(const struct lu_env *env, struct cl_io *io,
- struct cl_page *page, void *cbdata)
-{
- cl_page_own(env, io, page);
- cl_page_unmap(env, io, page);
- cl_page_discard(env, io, page);
- cl_page_disown(env, io, page);
- return CLP_GANG_OKAY;
-}
-
-/**
- * Purges all cached pages belonging to the object \a obj.
- */
-int cl_pages_prune(const struct lu_env *env, struct cl_object *clobj)
-{
- struct cl_thread_info *info;
- struct cl_object *obj = cl_object_top(clobj);
- struct cl_io *io;
- int result;
-
- info = cl_env_info(env);
- io = &info->clt_io;
-
- /*
- * initialize the io. This is ugly since we never do IO in this
- * function, we just make cl_page_list functions happy. -jay
- */
- io->ci_obj = obj;
- io->ci_ignore_layout = 1;
- result = cl_io_init(env, io, CIT_MISC, obj);
- if (result != 0) {
- cl_io_fini(env, io);
- return io->ci_result;
- }
-
- do {
- result = cl_page_gang_lookup(env, obj, io, 0, CL_PAGE_EOF,
- page_prune_cb, NULL);
- if (result == CLP_GANG_RESCHED)
- cond_resched();
- } while (result != CLP_GANG_OKAY);
-
- cl_io_fini(env, io);
- return result;
-}
-EXPORT_SYMBOL(cl_pages_prune);
-
/**
* Tells transfer engine that only part of a page is to be transmitted.
*
@@ -1431,9 +989,8 @@ void cl_page_header_print(const struct lu_env *env, void *cookie,
lu_printer_t printer, const struct cl_page *pg)
{
(*printer)(env, cookie,
- "page@%p[%d %p:%lu ^%p_%p %d %d %d %p %p %#x]\n",
+ "page@%p[%d %p %d %d %d %p %p %#x]\n",
pg, atomic_read(&pg->cp_ref), pg->cp_obj,
- pg->cp_index, pg->cp_parent, pg->cp_child,
pg->cp_state, pg->cp_error, pg->cp_type,
pg->cp_owner, pg->cp_req, pg->cp_flags);
}
@@ -1445,11 +1002,7 @@ EXPORT_SYMBOL(cl_page_header_print);
void cl_page_print(const struct lu_env *env, void *cookie,
lu_printer_t printer, const struct cl_page *pg)
{
- struct cl_page *scan;
-
- for (scan = cl_page_top((struct cl_page *)pg); scan;
- scan = scan->cp_child)
- cl_page_header_print(env, cookie, printer, scan);
+ cl_page_header_print(env, cookie, printer, pg);
CL_PAGE_INVOKE(env, (struct cl_page *)pg, CL_PAGE_OP(cpo_print),
(const struct lu_env *env,
const struct cl_page_slice *slice,
@@ -1509,21 +1062,59 @@ EXPORT_SYMBOL(cl_page_size);
* \see cl_lock_slice_add(), cl_req_slice_add(), cl_io_slice_add()
*/
void cl_page_slice_add(struct cl_page *page, struct cl_page_slice *slice,
- struct cl_object *obj,
+ struct cl_object *obj, pgoff_t index,
const struct cl_page_operations *ops)
{
list_add_tail(&slice->cpl_linkage, &page->cp_layers);
slice->cpl_obj = obj;
+ slice->cpl_index = index;
slice->cpl_ops = ops;
slice->cpl_page = page;
}
EXPORT_SYMBOL(cl_page_slice_add);
-int cl_page_init(void)
+/**
+ * Allocate and initialize cl_cache, called by ll_init_sbi().
+ */
+struct cl_client_cache *cl_cache_init(unsigned long lru_page_max)
{
- return 0;
+ struct cl_client_cache *cache = NULL;
+
+ cache = kzalloc(sizeof(*cache), GFP_KERNEL);
+ if (!cache)
+ return NULL;
+
+ /* Initialize cache data */
+ atomic_set(&cache->ccc_users, 1);
+ cache->ccc_lru_max = lru_page_max;
+ atomic_set(&cache->ccc_lru_left, lru_page_max);
+ spin_lock_init(&cache->ccc_lru_lock);
+ INIT_LIST_HEAD(&cache->ccc_lru);
+
+ atomic_set(&cache->ccc_unstable_nr, 0);
+ init_waitqueue_head(&cache->ccc_unstable_waitq);
+
+ return cache;
+}
+EXPORT_SYMBOL(cl_cache_init);
+
+/**
+ * Increase cl_cache refcount
+ */
+void cl_cache_incref(struct cl_client_cache *cache)
+{
+ atomic_inc(&cache->ccc_users);
}
+EXPORT_SYMBOL(cl_cache_incref);
-void cl_page_fini(void)
+/**
+ * Decrease cl_cache refcount and free the cache if refcount=0.
+ * Since llite, lov and osc all hold cl_cache refcount,
+ * the free will not cause race. (LU-6173)
+ */
+void cl_cache_decref(struct cl_client_cache *cache)
{
+ if (atomic_dec_and_test(&cache->ccc_users))
+ kfree(cache);
}
+EXPORT_SYMBOL(cl_cache_decref);
diff --git a/drivers/staging/lustre/lustre/obdclass/class_obd.c b/drivers/staging/lustre/lustre/obdclass/class_obd.c
index c2cf015962dd..d9d2a1952b8b 100644
--- a/drivers/staging/lustre/lustre/obdclass/class_obd.c
+++ b/drivers/staging/lustre/lustre/obdclass/class_obd.c
@@ -15,11 +15,7 @@
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
*
* GPL HEADER END
*/
@@ -60,6 +56,8 @@ unsigned int obd_dump_on_eviction;
EXPORT_SYMBOL(obd_dump_on_eviction);
unsigned int obd_max_dirty_pages = 256;
EXPORT_SYMBOL(obd_max_dirty_pages);
+atomic_t obd_unstable_pages;
+EXPORT_SYMBOL(obd_unstable_pages);
atomic_t obd_dirty_pages;
EXPORT_SYMBOL(obd_dirty_pages);
unsigned int obd_timeout = OBD_TIMEOUT_DEFAULT; /* seconds */
@@ -335,7 +333,6 @@ int class_handle_ioctl(unsigned int cmd, unsigned long arg)
err = 0;
goto out;
}
-
}
if (data->ioc_dev == OBD_DEV_BY_DEVNAME) {
@@ -461,7 +458,7 @@ static int obd_init_checks(void)
CWARN("LPD64 wrong length! strlen(%s)=%d != 2\n", buf, len);
ret = -EINVAL;
}
- if ((u64val & ~CFS_PAGE_MASK) >= PAGE_SIZE) {
+ if ((u64val & ~PAGE_MASK) >= PAGE_SIZE) {
CWARN("mask failed: u64val %llu >= %llu\n", u64val,
(__u64)PAGE_SIZE);
ret = -EINVAL;
diff --git a/drivers/staging/lustre/lustre/obdclass/debug.c b/drivers/staging/lustre/lustre/obdclass/debug.c
index 43a7f7a79b35..8acf67239fa8 100644
--- a/drivers/staging/lustre/lustre/obdclass/debug.c
+++ b/drivers/staging/lustre/lustre/obdclass/debug.c
@@ -15,11 +15,7 @@
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
*
* GPL HEADER END
*/
@@ -68,8 +64,8 @@ int block_debug_check(char *who, void *addr, int end, __u64 off, __u64 id)
LASSERT(addr);
- ne_off = le64_to_cpu (off);
- id = le64_to_cpu (id);
+ ne_off = le64_to_cpu(off);
+ id = le64_to_cpu(id);
if (memcmp(addr, (char *)&ne_off, LPDS)) {
CDEBUG(D_ERROR, "%s: id %#llx offset %llu off: %#llx != %#llx\n",
who, id, off, *(__u64 *)addr, ne_off);
diff --git a/drivers/staging/lustre/lustre/obdclass/genops.c b/drivers/staging/lustre/lustre/obdclass/genops.c
index cf97b8f06764..99c2da632b51 100644
--- a/drivers/staging/lustre/lustre/obdclass/genops.c
+++ b/drivers/staging/lustre/lustre/obdclass/genops.c
@@ -15,11 +15,7 @@
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
*
* GPL HEADER END
*/
@@ -604,7 +600,6 @@ int obd_init_caches(void)
out:
obd_cleanup_caches();
return -ENOMEM;
-
}
/* map connection to client */
diff --git a/drivers/staging/lustre/lustre/obdclass/kernelcomm.c b/drivers/staging/lustre/lustre/obdclass/kernelcomm.c
index 8405eccdac19..a0f65c470f4d 100644
--- a/drivers/staging/lustre/lustre/obdclass/kernelcomm.c
+++ b/drivers/staging/lustre/lustre/obdclass/kernelcomm.c
@@ -15,11 +15,7 @@
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
*
* GPL HEADER END
*/
diff --git a/drivers/staging/lustre/lustre/obdclass/linux/linux-module.c b/drivers/staging/lustre/lustre/obdclass/linux/linux-module.c
index 8eddf206f1ed..33342bfcc90e 100644
--- a/drivers/staging/lustre/lustre/obdclass/linux/linux-module.c
+++ b/drivers/staging/lustre/lustre/obdclass/linux/linux-module.c
@@ -15,11 +15,7 @@
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
*
* GPL HEADER END
*/
@@ -158,9 +154,7 @@ int obd_ioctl_popdata(void __user *arg, void *data, int len)
{
int err;
- err = copy_to_user(arg, data, len);
- if (err)
- err = -EFAULT;
+ err = copy_to_user(arg, data, len) ? -EFAULT : 0;
return err;
}
EXPORT_SYMBOL(obd_ioctl_popdata);
diff --git a/drivers/staging/lustre/lustre/obdclass/linux/linux-obdo.c b/drivers/staging/lustre/lustre/obdclass/linux/linux-obdo.c
index b41b65e2f021..c6cc6a7666e3 100644
--- a/drivers/staging/lustre/lustre/obdclass/linux/linux-obdo.c
+++ b/drivers/staging/lustre/lustre/obdclass/linux/linux-obdo.c
@@ -15,11 +15,7 @@
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
*
* GPL HEADER END
*/
diff --git a/drivers/staging/lustre/lustre/obdclass/linux/linux-sysctl.c b/drivers/staging/lustre/lustre/obdclass/linux/linux-sysctl.c
index e6bf414a4444..8f70dd2686f9 100644
--- a/drivers/staging/lustre/lustre/obdclass/linux/linux-sysctl.c
+++ b/drivers/staging/lustre/lustre/obdclass/linux/linux-sysctl.c
@@ -15,11 +15,7 @@
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
*
* GPL HEADER END
*/
diff --git a/drivers/staging/lustre/lustre/obdclass/llog.c b/drivers/staging/lustre/lustre/obdclass/llog.c
index 992573eae1b1..1784ca063428 100644
--- a/drivers/staging/lustre/lustre/obdclass/llog.c
+++ b/drivers/staging/lustre/lustre/obdclass/llog.c
@@ -15,11 +15,7 @@
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
*
* GPL HEADER END
*/
@@ -123,8 +119,10 @@ static int llog_read_header(const struct lu_env *env,
handle->lgh_last_idx = 0; /* header is record with index 0 */
llh->llh_count = 1; /* for the header record */
llh->llh_hdr.lrh_type = LLOG_HDR_MAGIC;
- llh->llh_hdr.lrh_len = llh->llh_tail.lrt_len = LLOG_CHUNK_SIZE;
- llh->llh_hdr.lrh_index = llh->llh_tail.lrt_index = 0;
+ llh->llh_hdr.lrh_len = LLOG_CHUNK_SIZE;
+ llh->llh_tail.lrt_len = LLOG_CHUNK_SIZE;
+ llh->llh_hdr.lrh_index = 0;
+ llh->llh_tail.lrt_index = 0;
llh->llh_timestamp = ktime_get_real_seconds();
if (uuid)
memcpy(&llh->llh_tgtuuid, uuid,
@@ -265,7 +263,6 @@ repeat:
for (rec = (struct llog_rec_hdr *)buf;
(char *)rec < buf + LLOG_CHUNK_SIZE;
rec = (struct llog_rec_hdr *)((char *)rec + rec->lrh_len)) {
-
CDEBUG(D_OTHER, "processing rec 0x%p type %#x\n",
rec, rec->lrh_type);
diff --git a/drivers/staging/lustre/lustre/obdclass/llog_cat.c b/drivers/staging/lustre/lustre/obdclass/llog_cat.c
index c27d4ec1df9e..a82a2950295a 100644
--- a/drivers/staging/lustre/lustre/obdclass/llog_cat.c
+++ b/drivers/staging/lustre/lustre/obdclass/llog_cat.c
@@ -15,11 +15,7 @@
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
*
* GPL HEADER END
*/
diff --git a/drivers/staging/lustre/lustre/obdclass/llog_internal.h b/drivers/staging/lustre/lustre/obdclass/llog_internal.h
index 7fb48dda355e..f7949525d952 100644
--- a/drivers/staging/lustre/lustre/obdclass/llog_internal.h
+++ b/drivers/staging/lustre/lustre/obdclass/llog_internal.h
@@ -15,11 +15,7 @@
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
*
* GPL HEADER END
*/
diff --git a/drivers/staging/lustre/lustre/obdclass/llog_obd.c b/drivers/staging/lustre/lustre/obdclass/llog_obd.c
index 826623f528da..6ace7e097859 100644
--- a/drivers/staging/lustre/lustre/obdclass/llog_obd.c
+++ b/drivers/staging/lustre/lustre/obdclass/llog_obd.c
@@ -15,11 +15,7 @@
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
*
* GPL HEADER END
*/
diff --git a/drivers/staging/lustre/lustre/obdclass/llog_swab.c b/drivers/staging/lustre/lustre/obdclass/llog_swab.c
index 967ba2e1bfcb..f7b9b190350c 100644
--- a/drivers/staging/lustre/lustre/obdclass/llog_swab.c
+++ b/drivers/staging/lustre/lustre/obdclass/llog_swab.c
@@ -15,11 +15,7 @@
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
*
* GPL HEADER END
*/
diff --git a/drivers/staging/lustre/lustre/obdclass/lprocfs_status.c b/drivers/staging/lustre/lustre/obdclass/lprocfs_status.c
index d93f42fee420..279b625f1afe 100644
--- a/drivers/staging/lustre/lustre/obdclass/lprocfs_status.c
+++ b/drivers/staging/lustre/lustre/obdclass/lprocfs_status.c
@@ -15,11 +15,7 @@
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
*
* GPL HEADER END
*/
@@ -49,7 +45,7 @@
static const char * const obd_connect_names[] = {
"read_only",
"lov_index",
- "unused",
+ "connect_from_mds",
"write_grant",
"server_lock",
"version",
@@ -122,6 +118,56 @@ int obd_connect_flags2str(char *page, int count, __u64 flags, char *sep)
}
EXPORT_SYMBOL(obd_connect_flags2str);
+static void obd_connect_data_seqprint(struct seq_file *m,
+ struct obd_connect_data *ocd)
+{
+ int flags;
+
+ LASSERT(ocd);
+ flags = ocd->ocd_connect_flags;
+
+ seq_printf(m, " connect_data:\n"
+ " flags: %llx\n"
+ " instance: %u\n",
+ ocd->ocd_connect_flags,
+ ocd->ocd_instance);
+ if (flags & OBD_CONNECT_VERSION)
+ seq_printf(m, " target_version: %u.%u.%u.%u\n",
+ OBD_OCD_VERSION_MAJOR(ocd->ocd_version),
+ OBD_OCD_VERSION_MINOR(ocd->ocd_version),
+ OBD_OCD_VERSION_PATCH(ocd->ocd_version),
+ OBD_OCD_VERSION_FIX(ocd->ocd_version));
+ if (flags & OBD_CONNECT_MDS)
+ seq_printf(m, " mdt_index: %d\n", ocd->ocd_group);
+ if (flags & OBD_CONNECT_GRANT)
+ seq_printf(m, " initial_grant: %d\n", ocd->ocd_grant);
+ if (flags & OBD_CONNECT_INDEX)
+ seq_printf(m, " target_index: %u\n", ocd->ocd_index);
+ if (flags & OBD_CONNECT_BRW_SIZE)
+ seq_printf(m, " max_brw_size: %d\n", ocd->ocd_brw_size);
+ if (flags & OBD_CONNECT_IBITS)
+ seq_printf(m, " ibits_known: %llx\n",
+ ocd->ocd_ibits_known);
+ if (flags & OBD_CONNECT_GRANT_PARAM)
+ seq_printf(m, " grant_block_size: %d\n"
+ " grant_inode_size: %d\n"
+ " grant_extent_overhead: %d\n",
+ ocd->ocd_blocksize,
+ ocd->ocd_inodespace,
+ ocd->ocd_grant_extent);
+ if (flags & OBD_CONNECT_TRANSNO)
+ seq_printf(m, " first_transno: %llx\n",
+ ocd->ocd_transno);
+ if (flags & OBD_CONNECT_CKSUM)
+ seq_printf(m, " cksum_types: %#x\n",
+ ocd->ocd_cksum_types);
+ if (flags & OBD_CONNECT_MAX_EASIZE)
+ seq_printf(m, " max_easize: %d\n", ocd->ocd_max_easize);
+ if (flags & OBD_CONNECT_MAXBYTES)
+ seq_printf(m, " max_object_bytes: %llx\n",
+ ocd->ocd_maxbytes);
+}
+
int lprocfs_read_frac_helper(char *buffer, unsigned long count, long val,
int mult)
{
@@ -624,6 +670,7 @@ int lprocfs_rd_import(struct seq_file *m, void *data)
struct obd_device *obd = data;
struct obd_import *imp;
struct obd_import_conn *conn;
+ struct obd_connect_data *ocd;
int j;
int k;
int rw = 0;
@@ -635,9 +682,9 @@ int lprocfs_rd_import(struct seq_file *m, void *data)
return rc;
imp = obd->u.cli.cl_import;
+ ocd = &imp->imp_connect_data;
- seq_printf(m,
- "import:\n"
+ seq_printf(m, "import:\n"
" name: %s\n"
" target: %s\n"
" state: %s\n"
@@ -649,9 +696,9 @@ int lprocfs_rd_import(struct seq_file *m, void *data)
imp->imp_connect_data.ocd_instance);
obd_connect_seq_flags2str(m, imp->imp_connect_data.ocd_connect_flags,
", ");
- seq_printf(m,
- " ]\n"
- " import_flags: [ ");
+ seq_printf(m, " ]\n");
+ obd_connect_data_seqprint(m, ocd);
+ seq_printf(m, " import_flags: [ ");
obd_import_flags2str(imp, m);
seq_printf(m,
@@ -694,8 +741,9 @@ int lprocfs_rd_import(struct seq_file *m, void *data)
do_div(sum, ret.lc_count);
ret.lc_sum = sum;
- } else
+ } else {
ret.lc_sum = 0;
+ }
seq_printf(m,
" rpcs:\n"
" inflight: %u\n"
@@ -1471,10 +1519,10 @@ EXPORT_SYMBOL(lprocfs_oh_tally);
void lprocfs_oh_tally_log2(struct obd_histogram *oh, unsigned int value)
{
- unsigned int val;
+ unsigned int val = 0;
- for (val = 0; ((1 << val) < value) && (val <= OBD_HIST_MAX); val++)
- ;
+ if (likely(value != 0))
+ val = min(fls(value - 1), OBD_HIST_MAX);
lprocfs_oh_tally(oh, val);
}
diff --git a/drivers/staging/lustre/lustre/obdclass/lu_object.c b/drivers/staging/lustre/lustre/obdclass/lu_object.c
index 978568ada8e9..9b03059f34d6 100644
--- a/drivers/staging/lustre/lustre/obdclass/lu_object.c
+++ b/drivers/staging/lustre/lustre/obdclass/lu_object.c
@@ -15,11 +15,7 @@
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
*
* GPL HEADER END
*/
@@ -55,6 +51,7 @@
#include "../include/lustre_disk.h"
#include "../include/lustre_fid.h"
#include "../include/lu_object.h"
+#include "../include/cl_object.h"
#include "../include/lu_ref.h"
#include <linux/list.h>
@@ -103,7 +100,6 @@ void lu_object_put(const struct lu_env *env, struct lu_object *o)
if (!cfs_hash_bd_dec_and_lock(site->ls_obj_hash, &bd, &top->loh_ref)) {
if (lu_object_is_dying(top)) {
-
/*
* somebody may be waiting for this, currently only
* used for cl_object, see cl_object_put_last().
@@ -357,7 +353,6 @@ int lu_site_purge(const struct lu_env *env, struct lu_site *s, int nr)
if (count > 0 && --count == 0)
break;
-
}
cfs_hash_bd_unlock(s->ls_obj_hash, &bd, 1);
cond_resched();
@@ -715,8 +710,9 @@ struct lu_object *lu_object_find_slice(const struct lu_env *env,
obj = lu_object_locate(top->lo_header, dev->ld_type);
if (!obj)
lu_object_put(env, top);
- } else
+ } else {
obj = top;
+ }
return obj;
}
EXPORT_SYMBOL(lu_object_find_slice);
@@ -935,7 +931,7 @@ static void lu_dev_add_linkage(struct lu_site *s, struct lu_device *d)
* Initialize site \a s, with \a d as the top level device.
*/
#define LU_SITE_BITS_MIN 12
-#define LU_SITE_BITS_MAX 24
+#define LU_SITE_BITS_MAX 19
/**
* total 256 buckets, we don't want too many buckets because:
* - consume too much memory
@@ -1468,6 +1464,7 @@ void lu_context_key_quiesce(struct lu_context_key *key)
/*
* XXX layering violation.
*/
+ cl_env_cache_purge(~0);
key->lct_tags |= LCT_QUIESCENT;
/*
* XXX memory barrier has to go here.
diff --git a/drivers/staging/lustre/lustre/obdclass/lu_ref.c b/drivers/staging/lustre/lustre/obdclass/lu_ref.c
index 993697b660f6..e9f6040d19eb 100644
--- a/drivers/staging/lustre/lustre/obdclass/lu_ref.c
+++ b/drivers/staging/lustre/lustre/obdclass/lu_ref.c
@@ -15,11 +15,7 @@
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
*
* GPL HEADER END
*/
diff --git a/drivers/staging/lustre/lustre/obdclass/lustre_handles.c b/drivers/staging/lustre/lustre/obdclass/lustre_handles.c
index 403ceea06186..082f530c527c 100644
--- a/drivers/staging/lustre/lustre/obdclass/lustre_handles.c
+++ b/drivers/staging/lustre/lustre/obdclass/lustre_handles.c
@@ -15,11 +15,7 @@
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
*
* GPL HEADER END
*/
diff --git a/drivers/staging/lustre/lustre/obdclass/lustre_peer.c b/drivers/staging/lustre/lustre/obdclass/lustre_peer.c
index 5f812460b3ea..5974a9bf77c0 100644
--- a/drivers/staging/lustre/lustre/obdclass/lustre_peer.c
+++ b/drivers/staging/lustre/lustre/obdclass/lustre_peer.c
@@ -15,11 +15,7 @@
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
*
* GPL HEADER END
*/
@@ -163,8 +159,9 @@ int class_del_uuid(const char *uuid)
break;
}
}
- } else
+ } else {
list_splice_init(&g_uuid_list, &deathrow);
+ }
spin_unlock(&g_uuid_lock);
if (uuid && list_empty(&deathrow)) {
diff --git a/drivers/staging/lustre/lustre/obdclass/obd_config.c b/drivers/staging/lustre/lustre/obdclass/obd_config.c
index 5395e994deab..0eab1236501b 100644
--- a/drivers/staging/lustre/lustre/obdclass/obd_config.c
+++ b/drivers/staging/lustre/lustre/obdclass/obd_config.c
@@ -15,11 +15,7 @@
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
*
* GPL HEADER END
*/
@@ -606,7 +602,7 @@ static int class_del_conn(struct obd_device *obd, struct lustre_cfg *lcfg)
return rc;
}
-LIST_HEAD(lustre_profile_list);
+static LIST_HEAD(lustre_profile_list);
struct lustre_profile *class_get_profile(const char *prof)
{
@@ -961,7 +957,6 @@ int class_process_config(struct lustre_cfg *lcfg)
default: {
err = obd_process_config(obd, sizeof(*lcfg), lcfg);
goto out;
-
}
}
out:
@@ -1001,7 +996,13 @@ int class_process_proc_param(char *prefix, struct lprocfs_vars *lvars,
for (i = 1; i < lcfg->lcfg_bufcount; i++) {
key = lustre_cfg_buf(lcfg, i);
/* Strip off prefix */
- class_match_param(key, prefix, &key);
+ if (class_match_param(key, prefix, &key)) {
+ /*
+ * If the prefix doesn't match, return error so we
+ * can pass it down the stack
+ */
+ return -ENOSYS;
+ }
sval = strchr(key, '=');
if (!sval || (*(sval + 1) == 0)) {
CERROR("Can't parse param %s (missing '=')\n", key);
@@ -1016,8 +1017,8 @@ int class_process_proc_param(char *prefix, struct lprocfs_vars *lvars,
/* Search proc entries */
while (lvars[j].name) {
var = &lvars[j];
- if (!class_match_param(key, var->name, NULL)
- && keylen == strlen(var->name)) {
+ if (!class_match_param(key, var->name, NULL) &&
+ keylen == strlen(var->name)) {
matched++;
rc = -EROFS;
if (var->fops && var->fops->write) {
@@ -1034,18 +1035,14 @@ int class_process_proc_param(char *prefix, struct lprocfs_vars *lvars,
j++;
}
if (!matched) {
- /* If the prefix doesn't match, return error so we
- * can pass it down the stack
- */
- if (strnchr(key, keylen, '.'))
- return -ENOSYS;
- CERROR("%s: unknown param %s\n",
+ CERROR("%.*s: %s unknown param %s\n",
+ (int)strlen(prefix) - 1, prefix,
(char *)lustre_cfg_string(lcfg, 0), key);
/* rc = -EINVAL; continue parsing other params */
skip++;
} else if (rc < 0) {
- CERROR("writing proc entry %s err %d\n",
- var->name, rc);
+ CERROR("%s: error writing proc entry '%s': rc = %d\n",
+ prefix, var->name, rc);
rc = 0;
} else {
CDEBUG(D_CONFIG, "%s.%.*s: Set parameter %.*s=%s\n",
@@ -1076,7 +1073,7 @@ int class_config_llog_handler(const struct lu_env *env,
{
struct config_llog_instance *clli = data;
int cfg_len = rec->lrh_len;
- char *cfg_buf = (char *) (rec + 1);
+ char *cfg_buf = (char *)(rec + 1);
int rc = 0;
switch (rec->lrh_type) {
@@ -1350,6 +1347,7 @@ static int class_config_parse_rec(struct llog_rec_hdr *rec, char *buf,
lustre_cfg_string(lcfg, i));
}
}
+ ptr += snprintf(ptr, end - ptr, "\n");
/* return consumed bytes */
rc = ptr - buf;
return rc;
@@ -1368,7 +1366,7 @@ int class_config_dump_handler(const struct lu_env *env,
if (rec->lrh_type == OBD_CFG_REC) {
class_config_parse_rec(rec, outstr, 256);
- LCONSOLE(D_WARNING, " %s\n", outstr);
+ LCONSOLE(D_WARNING, " %s", outstr);
} else {
LCONSOLE(D_WARNING, "unhandled lrh_type: %#x\n", rec->lrh_type);
rc = -EINVAL;
diff --git a/drivers/staging/lustre/lustre/obdclass/obd_mount.c b/drivers/staging/lustre/lustre/obdclass/obd_mount.c
index d3e28a389ac1..aa84a50e9904 100644
--- a/drivers/staging/lustre/lustre/obdclass/obd_mount.c
+++ b/drivers/staging/lustre/lustre/obdclass/obd_mount.c
@@ -15,11 +15,7 @@
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
*
* GPL HEADER END
*/
@@ -102,7 +98,7 @@ int lustre_process_log(struct super_block *sb, char *logname,
LCONSOLE_ERROR_MSG(0x15b, "%s: The configuration from log '%s' failed from the MGS (%d). Make sure this client and the MGS are running compatible versions of Lustre.\n",
mgc->obd_name, logname, rc);
- if (rc)
+ else if (rc)
LCONSOLE_ERROR_MSG(0x15c, "%s: The configuration from log '%s' failed (%d). This may be the result of communication errors between this node and the MGS, a bad configuration, or other errors. See the syslog for more information.\n",
mgc->obd_name, logname,
rc);
@@ -192,7 +188,7 @@ static int lustre_start_simple(char *obdname, char *type, char *uuid,
return rc;
}
-DEFINE_MUTEX(mgc_start_lock);
+static DEFINE_MUTEX(mgc_start_lock);
/** Set up a mgc obd to process startup logs
*
@@ -307,7 +303,8 @@ int lustre_start_mgc(struct super_block *sb)
while (class_parse_nid(ptr, &nid, &ptr) == 0) {
rc = do_lcfg(mgcname, nid,
LCFG_ADD_UUID, niduuid, NULL, NULL, NULL);
- i++;
+ if (!rc)
+ i++;
/* Stop at the first failover nid */
if (*ptr == ':')
break;
@@ -345,16 +342,18 @@ int lustre_start_mgc(struct super_block *sb)
sprintf(niduuid, "%s_%x", mgcname, i);
j = 0;
while (class_parse_nid_quiet(ptr, &nid, &ptr) == 0) {
- j++;
- rc = do_lcfg(mgcname, nid,
- LCFG_ADD_UUID, niduuid, NULL, NULL, NULL);
+ rc = do_lcfg(mgcname, nid, LCFG_ADD_UUID, niduuid,
+ NULL, NULL, NULL);
+ if (!rc)
+ ++j;
if (*ptr == ':')
break;
}
if (j > 0) {
rc = do_lcfg(mgcname, 0, LCFG_ADD_CONN,
niduuid, NULL, NULL, NULL);
- i++;
+ if (!rc)
+ i++;
} else {
/* at ":/fsname" */
break;
diff --git a/drivers/staging/lustre/lustre/obdclass/obdo.c b/drivers/staging/lustre/lustre/obdclass/obdo.c
index e6436cb4ac62..8583a4a8c206 100644
--- a/drivers/staging/lustre/lustre/obdclass/obdo.c
+++ b/drivers/staging/lustre/lustre/obdclass/obdo.c
@@ -15,11 +15,7 @@
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
*
* GPL HEADER END
*/
@@ -185,8 +181,7 @@ void md_from_obdo(struct md_op_data *op_data, struct obdo *oa, u32 valid)
op_data->op_attr.ia_valid |= ATTR_BLOCKS;
}
if (valid & OBD_MD_FLFLAGS) {
- ((struct ll_iattr *)&op_data->op_attr)->ia_attr_flags =
- oa->o_flags;
+ op_data->op_attr_flags = oa->o_flags;
op_data->op_attr.ia_valid |= ATTR_ATTR_FLAG;
}
}
diff --git a/drivers/staging/lustre/lustre/obdclass/statfs_pack.c b/drivers/staging/lustre/lustre/obdclass/statfs_pack.c
index fb4e3ae845e0..4bad1fa27d40 100644
--- a/drivers/staging/lustre/lustre/obdclass/statfs_pack.c
+++ b/drivers/staging/lustre/lustre/obdclass/statfs_pack.c
@@ -15,11 +15,7 @@
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
*
* GPL HEADER END
*/
diff --git a/drivers/staging/lustre/lustre/obdclass/uuid.c b/drivers/staging/lustre/lustre/obdclass/uuid.c
index b0b0157a6334..abd9b1ae72cd 100644
--- a/drivers/staging/lustre/lustre/obdclass/uuid.c
+++ b/drivers/staging/lustre/lustre/obdclass/uuid.c
@@ -15,11 +15,7 @@
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
*
* GPL HEADER END
*/
diff --git a/drivers/staging/lustre/lustre/obdecho/echo_client.c b/drivers/staging/lustre/lustre/obdecho/echo_client.c
index 1e83669c204d..5b29c4a44fe5 100644
--- a/drivers/staging/lustre/lustre/obdecho/echo_client.c
+++ b/drivers/staging/lustre/lustre/obdecho/echo_client.c
@@ -15,11 +15,7 @@
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
*
* GPL HEADER END
*/
@@ -81,7 +77,6 @@ struct echo_object_conf {
struct echo_page {
struct cl_page_slice ep_cl;
struct mutex ep_lock;
- struct page *ep_vmpage;
};
struct echo_lock {
@@ -164,15 +159,13 @@ static int cl_echo_object_put(struct echo_object *eco);
static int cl_echo_object_brw(struct echo_object *eco, int rw, u64 offset,
struct page **pages, int npages, int async);
-static struct echo_thread_info *echo_env_info(const struct lu_env *env);
-
struct echo_thread_info {
struct echo_object_conf eti_conf;
struct lustre_md eti_md;
struct cl_2queue eti_queue;
struct cl_io eti_io;
- struct cl_lock_descr eti_descr;
+ struct cl_lock eti_lock;
struct lu_fid eti_fid;
struct lu_fid eti_fid2;
};
@@ -219,12 +212,6 @@ static struct lu_kmem_descr echo_caches[] = {
*
* @{
*/
-static struct page *echo_page_vmpage(const struct lu_env *env,
- const struct cl_page_slice *slice)
-{
- return cl2echo_page(slice)->ep_vmpage;
-}
-
static int echo_page_own(const struct lu_env *env,
const struct cl_page_slice *slice,
struct cl_io *io, int nonblock)
@@ -273,12 +260,10 @@ static void echo_page_completion(const struct lu_env *env,
static void echo_page_fini(const struct lu_env *env,
struct cl_page_slice *slice)
{
- struct echo_page *ep = cl2echo_page(slice);
struct echo_object *eco = cl2echo_obj(slice->cpl_obj);
- struct page *vmpage = ep->ep_vmpage;
atomic_dec(&eco->eo_npages);
- put_page(vmpage);
+ put_page(slice->cpl_page->cp_vmpage);
}
static int echo_page_prep(const struct lu_env *env,
@@ -295,7 +280,8 @@ static int echo_page_print(const struct lu_env *env,
struct echo_page *ep = cl2echo_page(slice);
(*printer)(env, cookie, LUSTRE_ECHO_CLIENT_NAME"-page@%p %d vm@%p\n",
- ep, mutex_is_locked(&ep->ep_lock), ep->ep_vmpage);
+ ep, mutex_is_locked(&ep->ep_lock),
+ slice->cpl_page->cp_vmpage);
return 0;
}
@@ -303,7 +289,6 @@ static const struct cl_page_operations echo_page_ops = {
.cpo_own = echo_page_own,
.cpo_disown = echo_page_disown,
.cpo_discard = echo_page_discard,
- .cpo_vmpage = echo_page_vmpage,
.cpo_fini = echo_page_fini,
.cpo_print = echo_page_print,
.cpo_is_vmlocked = echo_page_is_vmlocked,
@@ -336,26 +321,8 @@ static void echo_lock_fini(const struct lu_env *env,
kmem_cache_free(echo_lock_kmem, ecl);
}
-static void echo_lock_delete(const struct lu_env *env,
- const struct cl_lock_slice *slice)
-{
- struct echo_lock *ecl = cl2echo_lock(slice);
-
- LASSERT(list_empty(&ecl->el_chain));
-}
-
-static int echo_lock_fits_into(const struct lu_env *env,
- const struct cl_lock_slice *slice,
- const struct cl_lock_descr *need,
- const struct cl_io *unused)
-{
- return 1;
-}
-
static struct cl_lock_operations echo_lock_ops = {
.clo_fini = echo_lock_fini,
- .clo_delete = echo_lock_delete,
- .clo_fits_into = echo_lock_fits_into
};
/** @} echo_lock */
@@ -367,15 +334,14 @@ static struct cl_lock_operations echo_lock_ops = {
* @{
*/
static int echo_page_init(const struct lu_env *env, struct cl_object *obj,
- struct cl_page *page, struct page *vmpage)
+ struct cl_page *page, pgoff_t index)
{
struct echo_page *ep = cl_object_page_slice(obj, page);
struct echo_object *eco = cl2echo_obj(obj);
- ep->ep_vmpage = vmpage;
- get_page(vmpage);
+ get_page(page->cp_vmpage);
mutex_init(&ep->ep_lock);
- cl_page_slice_add(page, &ep->ep_cl, obj, &echo_page_ops);
+ cl_page_slice_add(page, &ep->ep_cl, obj, index, &echo_page_ops);
atomic_inc(&eco->eo_npages);
return 0;
}
@@ -568,6 +534,8 @@ static struct lu_object *echo_object_alloc(const struct lu_env *env,
obj = &echo_obj2cl(eco)->co_lu;
cl_object_header_init(hdr);
+ hdr->coh_page_bufsize = cfs_size_round(sizeof(struct cl_page));
+
lu_object_init(obj, &hdr->coh_lu, dev);
lu_object_add_top(&hdr->coh_lu, obj);
@@ -694,8 +662,7 @@ static struct lu_device *echo_device_alloc(const struct lu_env *env,
struct obd_device *obd = NULL; /* to keep compiler happy */
struct obd_device *tgt;
const char *tgt_type_name;
- int rc;
- int cleanup = 0;
+ int rc, err;
ed = kzalloc(sizeof(*ed), GFP_NOFS);
if (!ed) {
@@ -703,16 +670,14 @@ static struct lu_device *echo_device_alloc(const struct lu_env *env,
goto out;
}
- cleanup = 1;
cd = &ed->ed_cl;
rc = cl_device_init(cd, t);
if (rc)
- goto out;
+ goto out_free;
cd->cd_lu_dev.ld_ops = &echo_device_lu_ops;
cd->cd_ops = &echo_device_cl_ops;
- cleanup = 2;
obd = class_name2obd(lustre_cfg_string(cfg, 0));
LASSERT(obd);
LASSERT(env);
@@ -722,28 +687,25 @@ static struct lu_device *echo_device_alloc(const struct lu_env *env,
CERROR("Can not find tgt device %s\n",
lustre_cfg_string(cfg, 1));
rc = -ENODEV;
- goto out;
+ goto out_device_fini;
}
next = tgt->obd_lu_dev;
if (!strcmp(tgt->obd_type->typ_name, LUSTRE_MDT_NAME)) {
CERROR("echo MDT client must be run on server\n");
rc = -EOPNOTSUPP;
- goto out;
+ goto out_device_fini;
}
rc = echo_site_init(env, ed);
if (rc)
- goto out;
-
- cleanup = 3;
+ goto out_device_fini;
rc = echo_client_setup(env, obd, cfg);
if (rc)
- goto out;
+ goto out_site_fini;
ed->ed_ec = &obd->u.echo_client;
- cleanup = 4;
/* if echo client is to be stacked upon ost device, the next is
* NULL since ost is not a clio device so far
@@ -755,7 +717,7 @@ static struct lu_device *echo_device_alloc(const struct lu_env *env,
if (next) {
if (next->ld_site) {
rc = -EBUSY;
- goto out;
+ goto out_cleanup;
}
next->ld_site = &ed->ed_site->cs_lu;
@@ -763,7 +725,7 @@ static struct lu_device *echo_device_alloc(const struct lu_env *env,
next->ld_type->ldt_name,
NULL);
if (rc)
- goto out;
+ goto out_cleanup;
} else {
LASSERT(strcmp(tgt_type_name, LUSTRE_OST_NAME) == 0);
@@ -771,27 +733,19 @@ static struct lu_device *echo_device_alloc(const struct lu_env *env,
ed->ed_next = next;
return &cd->cd_lu_dev;
-out:
- switch (cleanup) {
- case 4: {
- int rc2;
-
- rc2 = echo_client_cleanup(obd);
- if (rc2)
- CERROR("Cleanup obd device %s error(%d)\n",
- obd->obd_name, rc2);
- }
- case 3:
- echo_site_fini(env, ed);
- case 2:
- cl_device_fini(&ed->ed_cl);
- case 1:
- kfree(ed);
- case 0:
- default:
- break;
- }
+out_cleanup:
+ err = echo_client_cleanup(obd);
+ if (err)
+ CERROR("Cleanup obd device %s error(%d)\n",
+ obd->obd_name, err);
+out_site_fini:
+ echo_site_fini(env, ed);
+out_device_fini:
+ cl_device_fini(&ed->ed_cl);
+out_free:
+ kfree(ed);
+out:
return ERR_PTR(rc);
}
@@ -819,16 +773,7 @@ static void echo_lock_release(const struct lu_env *env,
{
struct cl_lock *clk = echo_lock2cl(ecl);
- cl_lock_get(clk);
- cl_unuse(env, clk);
- cl_lock_release(env, clk, "ec enqueue", ecl->el_object);
- if (!still_used) {
- cl_lock_mutex_get(env, clk);
- cl_lock_cancel(env, clk);
- cl_lock_delete(env, clk);
- cl_lock_mutex_put(env, clk);
- }
- cl_lock_put(env, clk);
+ cl_lock_release(env, clk);
}
static struct lu_device *echo_device_free(const struct lu_env *env,
@@ -1022,9 +967,11 @@ static int cl_echo_enqueue0(struct lu_env *env, struct echo_object *eco,
info = echo_env_info(env);
io = &info->eti_io;
- descr = &info->eti_descr;
+ lck = &info->eti_lock;
obj = echo_obj2cl(eco);
+ memset(lck, 0, sizeof(*lck));
+ descr = &lck->cll_descr;
descr->cld_obj = obj;
descr->cld_start = cl_index(obj, start);
descr->cld_end = cl_index(obj, end);
@@ -1032,25 +979,20 @@ static int cl_echo_enqueue0(struct lu_env *env, struct echo_object *eco,
descr->cld_enq_flags = enqflags;
io->ci_obj = obj;
- lck = cl_lock_request(env, io, descr, "ec enqueue", eco);
- if (lck) {
+ rc = cl_lock_request(env, io, lck);
+ if (rc == 0) {
struct echo_client_obd *ec = eco->eo_dev->ed_ec;
struct echo_lock *el;
- rc = cl_wait(env, lck);
- if (rc == 0) {
- el = cl2echo_lock(cl_lock_at(lck, &echo_device_type));
- spin_lock(&ec->ec_lock);
- if (list_empty(&el->el_chain)) {
- list_add(&el->el_chain, &ec->ec_locks);
- el->el_cookie = ++ec->ec_unique;
- }
- atomic_inc(&el->el_refcount);
- *cookie = el->el_cookie;
- spin_unlock(&ec->ec_lock);
- } else {
- cl_lock_release(env, lck, "ec enqueue", current);
+ el = cl2echo_lock(cl_lock_at(lck, &echo_device_type));
+ spin_lock(&ec->ec_lock);
+ if (list_empty(&el->el_chain)) {
+ list_add(&el->el_chain, &ec->ec_locks);
+ el->el_cookie = ++ec->ec_unique;
}
+ atomic_inc(&el->el_refcount);
+ *cookie = el->el_cookie;
+ spin_unlock(&ec->ec_lock);
}
return rc;
}
@@ -1085,22 +1027,17 @@ static int cl_echo_cancel0(struct lu_env *env, struct echo_device *ed,
return 0;
}
-static int cl_echo_async_brw(const struct lu_env *env, struct cl_io *io,
- enum cl_req_type unused, struct cl_2queue *queue)
+static void echo_commit_callback(const struct lu_env *env, struct cl_io *io,
+ struct cl_page *page)
{
- struct cl_page *clp;
- struct cl_page *temp;
- int result = 0;
+ struct echo_thread_info *info;
+ struct cl_2queue *queue;
- cl_page_list_for_each_safe(clp, temp, &queue->c2_qin) {
- int rc;
+ info = echo_env_info(env);
+ LASSERT(io == &info->eti_io);
- rc = cl_page_cache_add(env, io, clp, CRT_WRITE);
- if (rc == 0)
- continue;
- result = result ?: rc;
- }
- return result;
+ queue = &info->eti_queue;
+ cl_page_list_add(&queue->c2_qout, page);
}
static int cl_echo_object_brw(struct echo_object *eco, int rw, u64 offset,
@@ -1119,7 +1056,7 @@ static int cl_echo_object_brw(struct echo_object *eco, int rw, u64 offset,
int rc;
int i;
- LASSERT((offset & ~CFS_PAGE_MASK) == 0);
+ LASSERT((offset & ~PAGE_MASK) == 0);
LASSERT(ed->ed_next);
env = cl_env_get(&refcheck);
if (IS_ERR(env))
@@ -1179,7 +1116,9 @@ static int cl_echo_object_brw(struct echo_object *eco, int rw, u64 offset,
async = async && (typ == CRT_WRITE);
if (async)
- rc = cl_echo_async_brw(env, io, typ, queue);
+ rc = cl_io_commit_async(env, io, &queue->c2_qin,
+ 0, PAGE_SIZE,
+ echo_commit_callback);
else
rc = cl_io_submit_sync(env, io, typ, queue, 0);
CDEBUG(D_INFO, "echo_client %s write returns %d\n",
@@ -1387,7 +1326,7 @@ static int echo_client_kbrw(struct echo_device *ed, int rw, struct obdo *oa,
LASSERT(rw == OBD_BRW_WRITE || rw == OBD_BRW_READ);
if (count <= 0 ||
- (count & (~CFS_PAGE_MASK)) != 0)
+ (count & (~PAGE_MASK)) != 0)
return -EINVAL;
/* XXX think again with misaligned I/O */
@@ -1409,7 +1348,6 @@ static int echo_client_kbrw(struct echo_device *ed, int rw, struct obdo *oa,
for (i = 0, pgp = pga, off = offset;
i < npages;
i++, pgp++, off += PAGE_SIZE) {
-
LASSERT(!pgp->pg); /* for cleanup */
rc = -ENOMEM;
@@ -1470,7 +1408,7 @@ static int echo_client_prep_commit(const struct lu_env *env,
u64 npages, tot_pages;
int i, ret = 0, brw_flags = 0;
- if (count <= 0 || (count & (~CFS_PAGE_MASK)) != 0)
+ if (count <= 0 || (count & (~PAGE_MASK)) != 0)
return -EINVAL;
npages = batch >> PAGE_SHIFT;
@@ -1886,7 +1824,6 @@ static int __init obdecho_init(void)
static void /*__exit*/ obdecho_exit(void)
{
echo_client_exit();
-
}
MODULE_AUTHOR("OpenSFS, Inc. <http://www.lustre.org/>");
diff --git a/drivers/staging/lustre/lustre/osc/lproc_osc.c b/drivers/staging/lustre/lustre/osc/lproc_osc.c
index a3358c39b2f1..7e83d395b998 100644
--- a/drivers/staging/lustre/lustre/osc/lproc_osc.c
+++ b/drivers/staging/lustre/lustre/osc/lproc_osc.c
@@ -15,11 +15,7 @@
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
*
* GPL HEADER END
*/
@@ -121,9 +117,9 @@ static ssize_t max_rpcs_in_flight_store(struct kobject *kobj,
atomic_add(added, &osc_pool_req_count);
}
- client_obd_list_lock(&cli->cl_loi_list_lock);
+ spin_lock(&cli->cl_loi_list_lock);
cli->cl_max_rpcs_in_flight = val;
- client_obd_list_unlock(&cli->cl_loi_list_lock);
+ spin_unlock(&cli->cl_loi_list_lock);
return count;
}
@@ -139,9 +135,9 @@ static ssize_t max_dirty_mb_show(struct kobject *kobj,
long val;
int mult;
- client_obd_list_lock(&cli->cl_loi_list_lock);
+ spin_lock(&cli->cl_loi_list_lock);
val = cli->cl_dirty_max;
- client_obd_list_unlock(&cli->cl_loi_list_lock);
+ spin_unlock(&cli->cl_loi_list_lock);
mult = 1 << 20;
return lprocfs_read_frac_helper(buf, PAGE_SIZE, val, mult);
@@ -169,10 +165,10 @@ static ssize_t max_dirty_mb_store(struct kobject *kobj,
pages_number > totalram_pages / 4) /* 1/4 of RAM */
return -ERANGE;
- client_obd_list_lock(&cli->cl_loi_list_lock);
+ spin_lock(&cli->cl_loi_list_lock);
cli->cl_dirty_max = (u32)(pages_number << PAGE_SHIFT);
osc_wake_cache_waiters(cli);
- client_obd_list_unlock(&cli->cl_loi_list_lock);
+ spin_unlock(&cli->cl_loi_list_lock);
return count;
}
@@ -222,8 +218,16 @@ static ssize_t osc_cached_mb_seq_write(struct file *file,
return -ERANGE;
rc = atomic_read(&cli->cl_lru_in_list) - pages_number;
- if (rc > 0)
- (void)osc_lru_shrink(cli, rc);
+ if (rc > 0) {
+ struct lu_env *env;
+ int refcheck;
+
+ env = cl_env_get(&refcheck);
+ if (!IS_ERR(env)) {
+ (void)osc_lru_shrink(env, cli, rc, true);
+ cl_env_put(env, &refcheck);
+ }
+ }
return count;
}
@@ -239,9 +243,9 @@ static ssize_t cur_dirty_bytes_show(struct kobject *kobj,
struct client_obd *cli = &dev->u.cli;
int len;
- client_obd_list_lock(&cli->cl_loi_list_lock);
+ spin_lock(&cli->cl_loi_list_lock);
len = sprintf(buf, "%lu\n", cli->cl_dirty);
- client_obd_list_unlock(&cli->cl_loi_list_lock);
+ spin_unlock(&cli->cl_loi_list_lock);
return len;
}
@@ -256,9 +260,9 @@ static ssize_t cur_grant_bytes_show(struct kobject *kobj,
struct client_obd *cli = &dev->u.cli;
int len;
- client_obd_list_lock(&cli->cl_loi_list_lock);
+ spin_lock(&cli->cl_loi_list_lock);
len = sprintf(buf, "%lu\n", cli->cl_avail_grant);
- client_obd_list_unlock(&cli->cl_loi_list_lock);
+ spin_unlock(&cli->cl_loi_list_lock);
return len;
}
@@ -279,12 +283,12 @@ static ssize_t cur_grant_bytes_store(struct kobject *kobj,
return rc;
/* this is only for shrinking grant */
- client_obd_list_lock(&cli->cl_loi_list_lock);
+ spin_lock(&cli->cl_loi_list_lock);
if (val >= cli->cl_avail_grant) {
- client_obd_list_unlock(&cli->cl_loi_list_lock);
+ spin_unlock(&cli->cl_loi_list_lock);
return -EINVAL;
}
- client_obd_list_unlock(&cli->cl_loi_list_lock);
+ spin_unlock(&cli->cl_loi_list_lock);
if (cli->cl_import->imp_state == LUSTRE_IMP_FULL)
rc = osc_shrink_grant_to_target(cli, val);
@@ -303,9 +307,9 @@ static ssize_t cur_lost_grant_bytes_show(struct kobject *kobj,
struct client_obd *cli = &dev->u.cli;
int len;
- client_obd_list_lock(&cli->cl_loi_list_lock);
+ spin_lock(&cli->cl_loi_list_lock);
len = sprintf(buf, "%lu\n", cli->cl_lost_grant);
- client_obd_list_unlock(&cli->cl_loi_list_lock);
+ spin_unlock(&cli->cl_loi_list_lock);
return len;
}
@@ -577,14 +581,31 @@ static ssize_t max_pages_per_rpc_store(struct kobject *kobj,
if (val == 0 || val > ocd->ocd_brw_size >> PAGE_SHIFT) {
return -ERANGE;
}
- client_obd_list_lock(&cli->cl_loi_list_lock);
+ spin_lock(&cli->cl_loi_list_lock);
cli->cl_max_pages_per_rpc = val;
- client_obd_list_unlock(&cli->cl_loi_list_lock);
+ spin_unlock(&cli->cl_loi_list_lock);
return count;
}
LUSTRE_RW_ATTR(max_pages_per_rpc);
+static ssize_t unstable_stats_show(struct kobject *kobj,
+ struct attribute *attr,
+ char *buf)
+{
+ struct obd_device *dev = container_of(kobj, struct obd_device,
+ obd_kobj);
+ struct client_obd *cli = &dev->u.cli;
+ int pages, mb;
+
+ pages = atomic_read(&cli->cl_unstable_count);
+ mb = (pages * PAGE_SIZE) >> 20;
+
+ return sprintf(buf, "unstable_pages: %8d\n"
+ "unstable_mb: %8d\n", pages, mb);
+}
+LUSTRE_RO_ATTR(unstable_stats);
+
LPROC_SEQ_FOPS_RO_TYPE(osc, connect_flags);
LPROC_SEQ_FOPS_RO_TYPE(osc, server_uuid);
LPROC_SEQ_FOPS_RO_TYPE(osc, conn_uuid);
@@ -623,7 +644,7 @@ static int osc_rpc_stats_seq_show(struct seq_file *seq, void *v)
ktime_get_real_ts64(&now);
- client_obd_list_lock(&cli->cl_loi_list_lock);
+ spin_lock(&cli->cl_loi_list_lock);
seq_printf(seq, "snapshot_time: %llu.%9lu (secs.usecs)\n",
(s64)now.tv_sec, (unsigned long)now.tv_nsec);
@@ -707,7 +728,7 @@ static int osc_rpc_stats_seq_show(struct seq_file *seq, void *v)
break;
}
- client_obd_list_unlock(&cli->cl_loi_list_lock);
+ spin_unlock(&cli->cl_loi_list_lock);
return 0;
}
@@ -794,6 +815,7 @@ static struct attribute *osc_attrs[] = {
&lustre_attr_max_pages_per_rpc.attr,
&lustre_attr_max_rpcs_in_flight.attr,
&lustre_attr_resend_count.attr,
+ &lustre_attr_unstable_stats.attr,
NULL,
};
diff --git a/drivers/staging/lustre/lustre/osc/osc_cache.c b/drivers/staging/lustre/lustre/osc/osc_cache.c
index 5f25bf83dcfc..d011135802d5 100644
--- a/drivers/staging/lustre/lustre/osc/osc_cache.c
+++ b/drivers/staging/lustre/lustre/osc/osc_cache.c
@@ -15,11 +15,7 @@
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
*
* GPL HEADER END
*/
@@ -76,6 +72,8 @@ static inline char *ext_flags(struct osc_extent *ext, char *flags)
*buf++ = ext->oe_rw ? 'r' : 'w';
if (ext->oe_intree)
*buf++ = 'i';
+ if (ext->oe_sync)
+ *buf++ = 'S';
if (ext->oe_srvlock)
*buf++ = 's';
if (ext->oe_hp)
@@ -121,9 +119,13 @@ static const char *oes_strings[] = {
__ext->oe_grants, __ext->oe_nr_pages, \
list_empty_marker(&__ext->oe_pages), \
waitqueue_active(&__ext->oe_waitq) ? '+' : '-', \
- __ext->oe_osclock, __ext->oe_mppr, __ext->oe_owner, \
+ __ext->oe_dlmlock, __ext->oe_mppr, __ext->oe_owner, \
/* ----- part 4 ----- */ \
## __VA_ARGS__); \
+ if (lvl == D_ERROR && __ext->oe_dlmlock) \
+ LDLM_ERROR(__ext->oe_dlmlock, "extent: %p", __ext); \
+ else \
+ LDLM_DEBUG(__ext->oe_dlmlock, "extent: %p", __ext); \
} while (0)
#undef EASSERTF
@@ -240,20 +242,25 @@ static int osc_extent_sanity_check0(struct osc_extent *ext,
goto out;
}
- if (!ext->oe_osclock && ext->oe_grants > 0) {
+ if (ext->oe_sync && ext->oe_grants > 0) {
rc = 90;
goto out;
}
- if (ext->oe_osclock) {
- struct cl_lock_descr *descr;
+ if (ext->oe_dlmlock) {
+ struct ldlm_extent *extent;
- descr = &ext->oe_osclock->cll_descr;
- if (!(descr->cld_start <= ext->oe_start &&
- descr->cld_end >= ext->oe_max_end)) {
+ extent = &ext->oe_dlmlock->l_policy_data.l_extent;
+ if (!(extent->start <= cl_offset(osc2cl(obj), ext->oe_start) &&
+ extent->end >= cl_offset(osc2cl(obj), ext->oe_max_end))) {
rc = 100;
goto out;
}
+
+ if (!(ext->oe_dlmlock->l_granted_mode & (LCK_PW | LCK_GROUP))) {
+ rc = 102;
+ goto out;
+ }
}
if (ext->oe_nr_pages > ext->oe_mppr) {
@@ -276,7 +283,7 @@ static int osc_extent_sanity_check0(struct osc_extent *ext,
page_count = 0;
list_for_each_entry(oap, &ext->oe_pages, oap_pending_item) {
- pgoff_t index = oap2cl_page(oap)->cp_index;
+ pgoff_t index = osc_index(oap2osc(oap));
++page_count;
if (index > ext->oe_end || index < ext->oe_start) {
rc = 110;
@@ -359,7 +366,7 @@ static struct osc_extent *osc_extent_alloc(struct osc_object *obj)
ext->oe_state = OES_INV;
INIT_LIST_HEAD(&ext->oe_pages);
init_waitqueue_head(&ext->oe_waitq);
- ext->oe_osclock = NULL;
+ ext->oe_dlmlock = NULL;
return ext;
}
@@ -385,9 +392,11 @@ static void osc_extent_put(const struct lu_env *env, struct osc_extent *ext)
LASSERT(ext->oe_state == OES_INV);
LASSERT(!ext->oe_intree);
- if (ext->oe_osclock) {
- cl_lock_put(env, ext->oe_osclock);
- ext->oe_osclock = NULL;
+ if (ext->oe_dlmlock) {
+ lu_ref_add(&ext->oe_dlmlock->l_reference,
+ "osc_extent", ext);
+ LDLM_LOCK_PUT(ext->oe_dlmlock);
+ ext->oe_dlmlock = NULL;
}
osc_extent_free(ext);
}
@@ -543,7 +552,7 @@ static int osc_extent_merge(const struct lu_env *env, struct osc_extent *cur,
if (cur->oe_max_end != victim->oe_max_end)
return -ERANGE;
- LASSERT(cur->oe_osclock == victim->oe_osclock);
+ LASSERT(cur->oe_dlmlock == victim->oe_dlmlock);
ppc_bits = osc_cli(obj)->cl_chunkbits - PAGE_SHIFT;
chunk_start = cur->oe_start >> ppc_bits;
chunk_end = cur->oe_end >> ppc_bits;
@@ -624,10 +633,10 @@ static inline int overlapped(struct osc_extent *ex1, struct osc_extent *ex2)
static struct osc_extent *osc_extent_find(const struct lu_env *env,
struct osc_object *obj, pgoff_t index,
int *grants)
-
{
struct client_obd *cli = osc_cli(obj);
- struct cl_lock *lock;
+ struct osc_lock *olck;
+ struct cl_lock_descr *descr;
struct osc_extent *cur;
struct osc_extent *ext;
struct osc_extent *conflict = NULL;
@@ -644,8 +653,12 @@ static struct osc_extent *osc_extent_find(const struct lu_env *env,
if (!cur)
return ERR_PTR(-ENOMEM);
- lock = cl_lock_at_pgoff(env, osc2cl(obj), index, NULL, 1, 0);
- LASSERT(lock->cll_descr.cld_mode >= CLM_WRITE);
+ olck = osc_env_io(env)->oi_write_osclock;
+ LASSERTF(olck, "page %lu is not covered by lock\n", index);
+ LASSERT(olck->ols_state == OLS_GRANTED);
+
+ descr = &olck->ols_cl.cls_lock->cll_descr;
+ LASSERT(descr->cld_mode >= CLM_WRITE);
LASSERT(cli->cl_chunkbits >= PAGE_SHIFT);
ppc_bits = cli->cl_chunkbits - PAGE_SHIFT;
@@ -657,19 +670,23 @@ static struct osc_extent *osc_extent_find(const struct lu_env *env,
max_pages = cli->cl_max_pages_per_rpc;
LASSERT((max_pages & ~chunk_mask) == 0);
max_end = index - (index % max_pages) + max_pages - 1;
- max_end = min_t(pgoff_t, max_end, lock->cll_descr.cld_end);
+ max_end = min_t(pgoff_t, max_end, descr->cld_end);
/* initialize new extent by parameters so far */
cur->oe_max_end = max_end;
cur->oe_start = index & chunk_mask;
cur->oe_end = ((index + ~chunk_mask + 1) & chunk_mask) - 1;
- if (cur->oe_start < lock->cll_descr.cld_start)
- cur->oe_start = lock->cll_descr.cld_start;
+ if (cur->oe_start < descr->cld_start)
+ cur->oe_start = descr->cld_start;
if (cur->oe_end > max_end)
cur->oe_end = max_end;
- cur->oe_osclock = lock;
cur->oe_grants = 0;
cur->oe_mppr = max_pages;
+ if (olck->ols_dlmlock) {
+ LASSERT(olck->ols_hold);
+ cur->oe_dlmlock = LDLM_LOCK_GET(olck->ols_dlmlock);
+ lu_ref_add(&olck->ols_dlmlock->l_reference, "osc_extent", cur);
+ }
/* grants has been allocated by caller */
LASSERTF(*grants >= chunksize + cli->cl_extent_tax,
@@ -691,7 +708,7 @@ restart:
break;
/* if covering by different locks, no chance to match */
- if (lock != ext->oe_osclock) {
+ if (olck->ols_dlmlock != ext->oe_dlmlock) {
EASSERTF(!overlapped(ext, cur), ext,
EXTSTR"\n", EXTPARA(cur));
@@ -795,7 +812,7 @@ restart:
if (found) {
LASSERT(!conflict);
if (!IS_ERR(found)) {
- LASSERT(found->oe_osclock == cur->oe_osclock);
+ LASSERT(found->oe_dlmlock == cur->oe_dlmlock);
OSC_EXTENT_DUMP(D_CACHE, found,
"found caching ext for %lu.\n", index);
}
@@ -810,7 +827,7 @@ restart:
found = osc_extent_hold(cur);
osc_extent_insert(obj, cur);
OSC_EXTENT_DUMP(D_CACHE, cur, "add into tree %lu/%lu.\n",
- index, lock->cll_descr.cld_end);
+ index, descr->cld_end);
}
osc_object_unlock(obj);
@@ -856,6 +873,8 @@ int osc_extent_finish(const struct lu_env *env, struct osc_extent *ext,
ext->oe_rc = rc ?: ext->oe_nr_pages;
EASSERT(ergo(rc == 0, ext->oe_state == OES_RPC), ext);
+
+ osc_lru_add_batch(cli, &ext->oe_pages);
list_for_each_entry_safe(oap, tmp, &ext->oe_pages, oap_pending_item) {
list_del_init(&oap->oap_rpc_item);
list_del_init(&oap->oap_pending_item);
@@ -877,10 +896,9 @@ int osc_extent_finish(const struct lu_env *env, struct osc_extent *ext,
* span a whole chunk on the OST side, or our accounting goes
* wrong. Should match the code in filter_grant_check.
*/
- int offset = oap->oap_page_off & ~CFS_PAGE_MASK;
- int count = oap->oap_count + (offset & (blocksize - 1));
- int end = (offset + oap->oap_count) & (blocksize - 1);
-
+ int offset = last_off & ~PAGE_MASK;
+ int count = last_count + (offset & (blocksize - 1));
+ int end = (offset + last_count) & (blocksize - 1);
if (end)
count += blocksize - end;
@@ -943,7 +961,7 @@ static int osc_extent_wait(const struct lu_env *env, struct osc_extent *ext,
"%s: wait ext to %d timedout, recovery in progress?\n",
osc_export(obj)->exp_obd->obd_name, state);
- lwi = LWI_INTR(LWI_ON_SIGNAL_NOOP, NULL);
+ lwi = LWI_INTR(NULL, NULL);
rc = l_wait_event(ext->oe_waitq, extent_wait_cb(ext, state),
&lwi);
}
@@ -990,19 +1008,19 @@ static int osc_extent_truncate(struct osc_extent *ext, pgoff_t trunc_index,
/* discard all pages with index greater then trunc_index */
list_for_each_entry_safe(oap, tmp, &ext->oe_pages, oap_pending_item) {
- struct cl_page *sub = oap2cl_page(oap);
- struct cl_page *page = cl_page_top(sub);
+ pgoff_t index = osc_index(oap2osc(oap));
+ struct cl_page *page = oap2cl_page(oap);
LASSERT(list_empty(&oap->oap_rpc_item));
/* only discard the pages with their index greater than
* trunc_index, and ...
*/
- if (sub->cp_index < trunc_index ||
- (sub->cp_index == trunc_index && partial)) {
+ if (index < trunc_index ||
+ (index == trunc_index && partial)) {
/* accounting how many pages remaining in the chunk
* so that we can calculate grants correctly. */
- if (sub->cp_index >> ppc_bits == trunc_chunk)
+ if (index >> ppc_bits == trunc_chunk)
++pages_in_chunk;
continue;
}
@@ -1013,7 +1031,6 @@ static int osc_extent_truncate(struct osc_extent *ext, pgoff_t trunc_index,
lu_ref_add(&page->cp_reference, "truncate", current);
if (cl_page_own(env, io, page) == 0) {
- cl_page_unmap(env, io, page);
cl_page_discard(env, io, page);
cl_page_disown(env, io, page);
} else {
@@ -1126,7 +1143,9 @@ static int osc_extent_make_ready(const struct lu_env *env,
last->oap_count = osc_refresh_count(env, last, OBD_BRW_WRITE);
LASSERT(last->oap_count > 0);
LASSERT(last->oap_page_off + last->oap_count <= PAGE_SIZE);
+ spin_lock(&last->oap_lock);
last->oap_async_flags |= ASYNC_COUNT_STABLE;
+ spin_unlock(&last->oap_lock);
}
/* for the rest of pages, we don't need to call osf_refresh_count()
@@ -1135,7 +1154,9 @@ static int osc_extent_make_ready(const struct lu_env *env,
list_for_each_entry(oap, &ext->oe_pages, oap_pending_item) {
if (!(oap->oap_async_flags & ASYNC_COUNT_STABLE)) {
oap->oap_count = PAGE_SIZE - oap->oap_page_off;
+ spin_lock(&last->oap_lock);
oap->oap_async_flags |= ASYNC_COUNT_STABLE;
+ spin_unlock(&last->oap_lock);
}
}
@@ -1256,7 +1277,7 @@ static int osc_make_ready(const struct lu_env *env, struct osc_async_page *oap,
int cmd)
{
struct osc_page *opg = oap2osc_page(oap);
- struct cl_page *page = cl_page_top(oap2cl_page(oap));
+ struct cl_page *page = oap2cl_page(oap);
int result;
LASSERT(cmd == OBD_BRW_WRITE); /* no cached reads */
@@ -1271,7 +1292,7 @@ static int osc_refresh_count(const struct lu_env *env,
struct osc_async_page *oap, int cmd)
{
struct osc_page *opg = oap2osc_page(oap);
- struct cl_page *page = oap2cl_page(oap);
+ pgoff_t index = osc_index(oap2osc(oap));
struct cl_object *obj;
struct cl_attr *attr = &osc_env_info(env)->oti_attr;
@@ -1288,10 +1309,10 @@ static int osc_refresh_count(const struct lu_env *env,
if (result < 0)
return result;
kms = attr->cat_kms;
- if (cl_offset(obj, page->cp_index) >= kms)
+ if (cl_offset(obj, index) >= kms)
/* catch race with truncate */
return 0;
- else if (cl_offset(obj, page->cp_index + 1) > kms)
+ else if (cl_offset(obj, index + 1) > kms)
/* catch sub-page write at end of file */
return kms % PAGE_SIZE;
else
@@ -1302,14 +1323,16 @@ static int osc_completion(const struct lu_env *env, struct osc_async_page *oap,
int cmd, int rc)
{
struct osc_page *opg = oap2osc_page(oap);
- struct cl_page *page = cl_page_top(oap2cl_page(oap));
+ struct cl_page *page = oap2cl_page(oap);
struct osc_object *obj = cl2osc(opg->ops_cl.cpl_obj);
enum cl_req_type crt;
int srvlock;
cmd &= ~OBD_BRW_NOQUOTA;
- LASSERT(equi(page->cp_state == CPS_PAGEIN, cmd == OBD_BRW_READ));
- LASSERT(equi(page->cp_state == CPS_PAGEOUT, cmd == OBD_BRW_WRITE));
+ LASSERTF(equi(page->cp_state == CPS_PAGEIN, cmd == OBD_BRW_READ),
+ "cp_state:%u, cmd:%d\n", page->cp_state, cmd);
+ LASSERTF(equi(page->cp_state == CPS_PAGEOUT, cmd == OBD_BRW_WRITE),
+ "cp_state:%u, cmd:%d\n", page->cp_state, cmd);
LASSERT(opg->ops_transfer_pinned);
/*
@@ -1358,22 +1381,28 @@ static int osc_completion(const struct lu_env *env, struct osc_async_page *oap,
return 0;
}
-#define OSC_DUMP_GRANT(cli, fmt, args...) do { \
+#define OSC_DUMP_GRANT(lvl, cli, fmt, args...) do { \
struct client_obd *__tmp = (cli); \
- CDEBUG(D_CACHE, "%s: { dirty: %ld/%ld dirty_pages: %d/%d " \
- "dropped: %ld avail: %ld, reserved: %ld, flight: %d } " fmt, \
+ CDEBUG(lvl, "%s: grant { dirty: %ld/%ld dirty_pages: %d/%d " \
+ "unstable_pages: %d/%d dropped: %ld avail: %ld, " \
+ "reserved: %ld, flight: %d } lru {in list: %d, " \
+ "left: %d, waiters: %d }" fmt, \
__tmp->cl_import->imp_obd->obd_name, \
__tmp->cl_dirty, __tmp->cl_dirty_max, \
atomic_read(&obd_dirty_pages), obd_max_dirty_pages, \
+ atomic_read(&obd_unstable_pages), obd_max_dirty_pages, \
__tmp->cl_lost_grant, __tmp->cl_avail_grant, \
- __tmp->cl_reserved_grant, __tmp->cl_w_in_flight, ##args); \
+ __tmp->cl_reserved_grant, __tmp->cl_w_in_flight, \
+ atomic_read(&__tmp->cl_lru_in_list), \
+ atomic_read(&__tmp->cl_lru_busy), \
+ atomic_read(&__tmp->cl_lru_shrinkers), ##args); \
} while (0)
/* caller must hold loi_list_lock */
static void osc_consume_write_grant(struct client_obd *cli,
struct brw_page *pga)
{
- assert_spin_locked(&cli->cl_loi_list_lock.lock);
+ assert_spin_locked(&cli->cl_loi_list_lock);
LASSERT(!(pga->flag & OBD_BRW_FROM_GRANT));
atomic_inc(&obd_dirty_pages);
cli->cl_dirty += PAGE_SIZE;
@@ -1389,7 +1418,7 @@ static void osc_consume_write_grant(struct client_obd *cli,
static void osc_release_write_grant(struct client_obd *cli,
struct brw_page *pga)
{
- assert_spin_locked(&cli->cl_loi_list_lock.lock);
+ assert_spin_locked(&cli->cl_loi_list_lock);
if (!(pga->flag & OBD_BRW_FROM_GRANT)) {
return;
}
@@ -1408,7 +1437,7 @@ static void osc_release_write_grant(struct client_obd *cli,
* To avoid sleeping with object lock held, it's good for us allocate enough
* grants before entering into critical section.
*
- * client_obd_list_lock held by caller
+ * spin_lock held by caller
*/
static int osc_reserve_grant(struct client_obd *cli, unsigned int bytes)
{
@@ -1442,11 +1471,11 @@ static void __osc_unreserve_grant(struct client_obd *cli,
static void osc_unreserve_grant(struct client_obd *cli,
unsigned int reserved, unsigned int unused)
{
- client_obd_list_lock(&cli->cl_loi_list_lock);
+ spin_lock(&cli->cl_loi_list_lock);
__osc_unreserve_grant(cli, reserved, unused);
if (unused > 0)
osc_wake_cache_waiters(cli);
- client_obd_list_unlock(&cli->cl_loi_list_lock);
+ spin_unlock(&cli->cl_loi_list_lock);
}
/**
@@ -1467,7 +1496,7 @@ static void osc_free_grant(struct client_obd *cli, unsigned int nr_pages,
{
int grant = (1 << cli->cl_chunkbits) + cli->cl_extent_tax;
- client_obd_list_lock(&cli->cl_loi_list_lock);
+ spin_lock(&cli->cl_loi_list_lock);
atomic_sub(nr_pages, &obd_dirty_pages);
cli->cl_dirty -= nr_pages << PAGE_SHIFT;
cli->cl_lost_grant += lost_grant;
@@ -1479,7 +1508,7 @@ static void osc_free_grant(struct client_obd *cli, unsigned int nr_pages,
cli->cl_avail_grant += grant;
}
osc_wake_cache_waiters(cli);
- client_obd_list_unlock(&cli->cl_loi_list_lock);
+ spin_unlock(&cli->cl_loi_list_lock);
CDEBUG(D_CACHE, "lost %u grant: %lu avail: %lu dirty: %lu\n",
lost_grant, cli->cl_lost_grant,
cli->cl_avail_grant, cli->cl_dirty);
@@ -1491,9 +1520,9 @@ static void osc_free_grant(struct client_obd *cli, unsigned int nr_pages,
*/
static void osc_exit_cache(struct client_obd *cli, struct osc_async_page *oap)
{
- client_obd_list_lock(&cli->cl_loi_list_lock);
+ spin_lock(&cli->cl_loi_list_lock);
osc_release_write_grant(cli, &oap->oap_brw_page);
- client_obd_list_unlock(&cli->cl_loi_list_lock);
+ spin_unlock(&cli->cl_loi_list_lock);
}
/**
@@ -1506,14 +1535,15 @@ static int osc_enter_cache_try(struct client_obd *cli,
{
int rc;
- OSC_DUMP_GRANT(cli, "need:%d.\n", bytes);
+ OSC_DUMP_GRANT(D_CACHE, cli, "need:%d.\n", bytes);
rc = osc_reserve_grant(cli, bytes);
if (rc < 0)
return 0;
if (cli->cl_dirty + PAGE_SIZE <= cli->cl_dirty_max &&
- atomic_read(&obd_dirty_pages) + 1 <= obd_max_dirty_pages) {
+ atomic_read(&obd_unstable_pages) + 1 +
+ atomic_read(&obd_dirty_pages) <= obd_max_dirty_pages) {
osc_consume_write_grant(cli, &oap->oap_brw_page);
if (transient) {
cli->cl_dirty_transit += PAGE_SIZE;
@@ -1532,9 +1562,9 @@ static int ocw_granted(struct client_obd *cli, struct osc_cache_waiter *ocw)
{
int rc;
- client_obd_list_lock(&cli->cl_loi_list_lock);
+ spin_lock(&cli->cl_loi_list_lock);
rc = list_empty(&ocw->ocw_entry);
- client_obd_list_unlock(&cli->cl_loi_list_lock);
+ spin_unlock(&cli->cl_loi_list_lock);
return rc;
}
@@ -1551,12 +1581,13 @@ static int osc_enter_cache(const struct lu_env *env, struct client_obd *cli,
struct osc_object *osc = oap->oap_obj;
struct lov_oinfo *loi = osc->oo_oinfo;
struct osc_cache_waiter ocw;
- struct l_wait_info lwi = LWI_INTR(LWI_ON_SIGNAL_NOOP, NULL);
+ struct l_wait_info lwi = LWI_TIMEOUT_INTR(cfs_time_seconds(600), NULL,
+ LWI_ON_SIGNAL_NOOP, NULL);
int rc = -EDQUOT;
- OSC_DUMP_GRANT(cli, "need:%d.\n", bytes);
+ OSC_DUMP_GRANT(D_CACHE, cli, "need:%d.\n", bytes);
- client_obd_list_lock(&cli->cl_loi_list_lock);
+ spin_lock(&cli->cl_loi_list_lock);
/* force the caller to try sync io. this can jump the list
* of queued writes and create a discontiguous rpc stream
@@ -1587,7 +1618,7 @@ static int osc_enter_cache(const struct lu_env *env, struct client_obd *cli,
while (cli->cl_dirty > 0 || cli->cl_w_in_flight > 0) {
list_add_tail(&ocw.ocw_entry, &cli->cl_cache_waiters);
ocw.ocw_rc = 0;
- client_obd_list_unlock(&cli->cl_loi_list_lock);
+ spin_unlock(&cli->cl_loi_list_lock);
osc_io_unplug_async(env, cli, NULL);
@@ -1596,10 +1627,17 @@ static int osc_enter_cache(const struct lu_env *env, struct client_obd *cli,
rc = l_wait_event(ocw.ocw_waitq, ocw_granted(cli, &ocw), &lwi);
- client_obd_list_lock(&cli->cl_loi_list_lock);
+ spin_lock(&cli->cl_loi_list_lock);
- /* l_wait_event is interrupted by signal */
+ /* l_wait_event is interrupted by signal, or timed out */
if (rc < 0) {
+ if (rc == -ETIMEDOUT) {
+ OSC_DUMP_GRANT(D_ERROR, cli,
+ "try to reserve %d.\n", bytes);
+ osc_extent_tree_dump(D_ERROR, osc);
+ rc = -EDQUOT;
+ }
+
list_del_init(&ocw.ocw_entry);
goto out;
}
@@ -1615,8 +1653,8 @@ static int osc_enter_cache(const struct lu_env *env, struct client_obd *cli,
}
}
out:
- client_obd_list_unlock(&cli->cl_loi_list_lock);
- OSC_DUMP_GRANT(cli, "returned %d.\n", rc);
+ spin_unlock(&cli->cl_loi_list_lock);
+ OSC_DUMP_GRANT(D_CACHE, cli, "returned %d.\n", rc);
return rc;
}
@@ -1633,8 +1671,8 @@ void osc_wake_cache_waiters(struct client_obd *cli)
ocw->ocw_rc = -EDQUOT;
/* we can't dirty more */
if ((cli->cl_dirty + PAGE_SIZE > cli->cl_dirty_max) ||
- (atomic_read(&obd_dirty_pages) + 1 >
- obd_max_dirty_pages)) {
+ (atomic_read(&obd_unstable_pages) + 1 +
+ atomic_read(&obd_dirty_pages) > obd_max_dirty_pages)) {
CDEBUG(D_CACHE, "no dirty room: dirty: %ld osc max %ld, sys max %d\n",
cli->cl_dirty,
cli->cl_dirty_max, obd_max_dirty_pages);
@@ -1776,9 +1814,9 @@ static int osc_list_maint(struct client_obd *cli, struct osc_object *osc)
{
int is_ready;
- client_obd_list_lock(&cli->cl_loi_list_lock);
+ spin_lock(&cli->cl_loi_list_lock);
is_ready = __osc_list_maint(cli, osc);
- client_obd_list_unlock(&cli->cl_loi_list_lock);
+ spin_unlock(&cli->cl_loi_list_lock);
return is_ready;
}
@@ -1799,13 +1837,103 @@ static void osc_process_ar(struct osc_async_rc *ar, __u64 xid,
ar->ar_force_sync = 1;
ar->ar_min_xid = ptlrpc_sample_next_xid();
return;
-
}
if (ar->ar_force_sync && (xid >= ar->ar_min_xid))
ar->ar_force_sync = 0;
}
+/**
+ * Performs "unstable" page accounting. This function balances the
+ * increment operations performed in osc_inc_unstable_pages. It is
+ * registered as the RPC request callback, and is executed when the
+ * bulk RPC is committed on the server. Thus at this point, the pages
+ * involved in the bulk transfer are no longer considered unstable.
+ */
+void osc_dec_unstable_pages(struct ptlrpc_request *req)
+{
+ struct client_obd *cli = &req->rq_import->imp_obd->u.cli;
+ struct ptlrpc_bulk_desc *desc = req->rq_bulk;
+ int page_count = desc->bd_iov_count;
+ int i;
+
+ /* No unstable page tracking */
+ if (!cli->cl_cache)
+ return;
+
+ LASSERT(page_count >= 0);
+
+ for (i = 0; i < page_count; i++)
+ dec_node_page_state(desc->bd_iov[i].kiov_page,
+ NR_UNSTABLE_NFS);
+
+ atomic_sub(page_count, &cli->cl_cache->ccc_unstable_nr);
+ LASSERT(atomic_read(&cli->cl_cache->ccc_unstable_nr) >= 0);
+
+ atomic_sub(page_count, &cli->cl_unstable_count);
+ LASSERT(atomic_read(&cli->cl_unstable_count) >= 0);
+
+ atomic_sub(page_count, &obd_unstable_pages);
+ LASSERT(atomic_read(&obd_unstable_pages) >= 0);
+
+ spin_lock(&req->rq_lock);
+ req->rq_committed = 1;
+ req->rq_unstable = 0;
+ spin_unlock(&req->rq_lock);
+
+ wake_up_all(&cli->cl_cache->ccc_unstable_waitq);
+}
+
+/* "unstable" page accounting. See: osc_dec_unstable_pages. */
+void osc_inc_unstable_pages(struct ptlrpc_request *req)
+{
+ struct client_obd *cli = &req->rq_import->imp_obd->u.cli;
+ struct ptlrpc_bulk_desc *desc = req->rq_bulk;
+ long page_count = desc->bd_iov_count;
+ int i;
+
+ /* No unstable page tracking */
+ if (!cli->cl_cache)
+ return;
+
+ LASSERT(page_count >= 0);
+
+ for (i = 0; i < page_count; i++)
+ inc_node_page_state(desc->bd_iov[i].kiov_page,
+ NR_UNSTABLE_NFS);
+
+ LASSERT(atomic_read(&cli->cl_cache->ccc_unstable_nr) >= 0);
+ atomic_add(page_count, &cli->cl_cache->ccc_unstable_nr);
+
+ LASSERT(atomic_read(&cli->cl_unstable_count) >= 0);
+ atomic_add(page_count, &cli->cl_unstable_count);
+
+ LASSERT(atomic_read(&obd_unstable_pages) >= 0);
+ atomic_add(page_count, &obd_unstable_pages);
+
+ spin_lock(&req->rq_lock);
+
+ /*
+ * If the request has already been committed (i.e. brw_commit
+ * called via rq_commit_cb), we need to undo the unstable page
+ * increments we just performed because rq_commit_cb wont be
+ * called again. Otherwise, just set the commit callback so the
+ * unstable page accounting is properly updated when the request
+ * is committed
+ */
+ if (req->rq_committed) {
+ /* Drop lock before calling osc_dec_unstable_pages */
+ spin_unlock(&req->rq_lock);
+ osc_dec_unstable_pages(req);
+ spin_lock(&req->rq_lock);
+ } else {
+ req->rq_unstable = 1;
+ req->rq_commit_cb = osc_dec_unstable_pages;
+ }
+
+ spin_unlock(&req->rq_lock);
+}
+
/* this must be called holding the loi list lock to give coverage to exit_cache,
* async_flag maintenance, and oap_request
*/
@@ -1817,6 +1945,9 @@ static void osc_ap_completion(const struct lu_env *env, struct client_obd *cli,
__u64 xid = 0;
if (oap->oap_request) {
+ if (!rc)
+ osc_inc_unstable_pages(oap->oap_request);
+
xid = ptlrpc_req_xid(oap->oap_request);
ptlrpc_req_finished(oap->oap_request);
oap->oap_request = NULL;
@@ -1829,10 +1960,10 @@ static void osc_ap_completion(const struct lu_env *env, struct client_obd *cli,
oap->oap_interrupted = 0;
if (oap->oap_cmd & OBD_BRW_WRITE && xid > 0) {
- client_obd_list_lock(&cli->cl_loi_list_lock);
+ spin_lock(&cli->cl_loi_list_lock);
osc_process_ar(&cli->cl_ar, xid, rc);
osc_process_ar(&loi->loi_ar, xid, rc);
- client_obd_list_unlock(&cli->cl_loi_list_lock);
+ spin_unlock(&cli->cl_loi_list_lock);
}
rc = osc_completion(env, oap, oap->oap_cmd, rc);
@@ -2133,9 +2264,8 @@ static void osc_check_rpcs(const struct lu_env *env, struct client_obd *cli)
}
cl_object_get(obj);
- client_obd_list_unlock(&cli->cl_loi_list_lock);
- lu_object_ref_add_at(&obj->co_lu, &link, "check",
- current);
+ spin_unlock(&cli->cl_loi_list_lock);
+ lu_object_ref_add_at(&obj->co_lu, &link, "check", current);
/* attempt some read/write balancing by alternating between
* reads and writes in an object. The makes_rpc checks here
@@ -2178,11 +2308,10 @@ static void osc_check_rpcs(const struct lu_env *env, struct client_obd *cli)
osc_object_unlock(osc);
osc_list_maint(cli, osc);
- lu_object_ref_del_at(&obj->co_lu, &link, "check",
- current);
+ lu_object_ref_del_at(&obj->co_lu, &link, "check", current);
cl_object_put(env, obj);
- client_obd_list_lock(&cli->cl_loi_list_lock);
+ spin_lock(&cli->cl_loi_list_lock);
}
}
@@ -2199,9 +2328,9 @@ static int osc_io_unplug0(const struct lu_env *env, struct client_obd *cli,
* potential stack overrun problem. LU-2859
*/
atomic_inc(&cli->cl_lru_shrinkers);
- client_obd_list_lock(&cli->cl_loi_list_lock);
+ spin_lock(&cli->cl_loi_list_lock);
osc_check_rpcs(env, cli);
- client_obd_list_unlock(&cli->cl_loi_list_lock);
+ spin_unlock(&cli->cl_loi_list_lock);
atomic_dec(&cli->cl_lru_shrinkers);
} else {
CDEBUG(D_CACHE, "Queue writeback work for client %p.\n", cli);
@@ -2238,9 +2367,9 @@ int osc_prep_async_page(struct osc_object *osc, struct osc_page *ops,
oap->oap_page = page;
oap->oap_obj_off = offset;
- LASSERT(!(offset & ~CFS_PAGE_MASK));
+ LASSERT(!(offset & ~PAGE_MASK));
- if (!client_is_remote(exp) && capable(CFS_CAP_SYS_RESOURCE))
+ if (capable(CFS_CAP_SYS_RESOURCE))
oap->oap_brw_flags = OBD_BRW_NOQUOTA;
INIT_LIST_HEAD(&oap->oap_pending_item);
@@ -2279,8 +2408,7 @@ int osc_queue_async_io(const struct lu_env *env, struct cl_io *io,
/* Set the OBD_BRW_SRVLOCK before the page is queued. */
brw_flags |= ops->ops_srvlock ? OBD_BRW_SRVLOCK : 0;
- if (!client_is_remote(osc_export(osc)) &&
- capable(CFS_CAP_SYS_RESOURCE)) {
+ if (capable(CFS_CAP_SYS_RESOURCE)) {
brw_flags |= OBD_BRW_NOQUOTA;
cmd |= OBD_BRW_NOQUOTA;
}
@@ -2306,16 +2434,23 @@ int osc_queue_async_io(const struct lu_env *env, struct cl_io *io,
return rc;
}
+ if (osc_over_unstable_soft_limit(cli))
+ brw_flags |= OBD_BRW_SOFT_SYNC;
+
oap->oap_cmd = cmd;
oap->oap_page_off = ops->ops_from;
oap->oap_count = ops->ops_to - ops->ops_from;
+ /*
+ * No need to hold a lock here,
+ * since this page is not in any list yet.
+ */
oap->oap_async_flags = 0;
oap->oap_brw_flags = brw_flags;
OSC_IO_DEBUG(osc, "oap %p page %p added for cmd %d\n",
oap, oap->oap_page, oap->oap_cmd & OBD_BRW_RWMASK);
- index = oap2cl_page(oap)->cp_index;
+ index = osc_index(oap2osc(oap));
/* Add this page into extent by the following steps:
* 1. if there exists an active extent for this IO, mostly this page
@@ -2334,9 +2469,9 @@ int osc_queue_async_io(const struct lu_env *env, struct cl_io *io,
grants = 0;
/* it doesn't need any grant to dirty this page */
- client_obd_list_lock(&cli->cl_loi_list_lock);
+ spin_lock(&cli->cl_loi_list_lock);
rc = osc_enter_cache_try(cli, oap, grants, 0);
- client_obd_list_unlock(&cli->cl_loi_list_lock);
+ spin_unlock(&cli->cl_loi_list_lock);
if (rc == 0) { /* try failed */
grants = 0;
need_release = 1;
@@ -2427,21 +2562,21 @@ int osc_teardown_async_page(const struct lu_env *env,
LASSERT(oap->oap_magic == OAP_MAGIC);
CDEBUG(D_INFO, "teardown oap %p page %p at index %lu.\n",
- oap, ops, oap2cl_page(oap)->cp_index);
+ oap, ops, osc_index(oap2osc(oap)));
osc_object_lock(obj);
if (!list_empty(&oap->oap_rpc_item)) {
CDEBUG(D_CACHE, "oap %p is not in cache.\n", oap);
rc = -EBUSY;
} else if (!list_empty(&oap->oap_pending_item)) {
- ext = osc_extent_lookup(obj, oap2cl_page(oap)->cp_index);
+ ext = osc_extent_lookup(obj, osc_index(oap2osc(oap)));
/* only truncated pages are allowed to be taken out.
* See osc_extent_truncate() and osc_cache_truncate_start()
* for details.
*/
if (ext && ext->oe_state != OES_TRUNC) {
OSC_EXTENT_DUMP(D_ERROR, ext, "trunc at %lu.\n",
- oap2cl_page(oap)->cp_index);
+ osc_index(oap2osc(oap)));
rc = -EBUSY;
}
}
@@ -2464,7 +2599,7 @@ int osc_flush_async_page(const struct lu_env *env, struct cl_io *io,
struct osc_extent *ext = NULL;
struct osc_object *obj = cl2osc(ops->ops_cl.cpl_obj);
struct cl_page *cp = ops->ops_cl.cpl_page;
- pgoff_t index = cp->cp_index;
+ pgoff_t index = osc_index(ops);
struct osc_async_page *oap = &ops->ops_oap;
bool unplug = false;
int rc = 0;
@@ -2479,8 +2614,7 @@ int osc_flush_async_page(const struct lu_env *env, struct cl_io *io,
switch (ext->oe_state) {
case OES_RPC:
case OES_LOCK_DONE:
- CL_PAGE_DEBUG(D_ERROR, env, cl_page_top(cp),
- "flush an in-rpc page?\n");
+ CL_PAGE_DEBUG(D_ERROR, env, cp, "flush an in-rpc page?\n");
LASSERT(0);
break;
case OES_LOCKING:
@@ -2506,7 +2640,7 @@ int osc_flush_async_page(const struct lu_env *env, struct cl_io *io,
break;
}
- rc = cl_page_prep(env, io, cl_page_top(cp), CRT_WRITE);
+ rc = cl_page_prep(env, io, cp, CRT_WRITE);
if (rc)
goto out;
@@ -2550,7 +2684,7 @@ int osc_cancel_async_page(const struct lu_env *env, struct osc_page *ops)
struct osc_extent *ext;
struct osc_extent *found = NULL;
struct list_head *plist;
- pgoff_t index = oap2cl_page(oap)->cp_index;
+ pgoff_t index = osc_index(ops);
int rc = -EBUSY;
int cmd;
@@ -2613,12 +2747,12 @@ int osc_queue_sync_pages(const struct lu_env *env, struct osc_object *obj,
pgoff_t end = 0;
list_for_each_entry(oap, list, oap_pending_item) {
- struct cl_page *cp = oap2cl_page(oap);
+ pgoff_t index = osc_index(oap2osc(oap));
- if (cp->cp_index > end)
- end = cp->cp_index;
- if (cp->cp_index < start)
- start = cp->cp_index;
+ if (index > end)
+ end = index;
+ if (index < start)
+ start = index;
++page_count;
mppr <<= (page_count > mppr);
}
@@ -2633,9 +2767,11 @@ int osc_queue_sync_pages(const struct lu_env *env, struct osc_object *obj,
}
ext->oe_rw = !!(cmd & OBD_BRW_READ);
+ ext->oe_sync = 1;
ext->oe_urgent = 1;
ext->oe_start = start;
- ext->oe_end = ext->oe_max_end = end;
+ ext->oe_end = end;
+ ext->oe_max_end = end;
ext->oe_obj = obj;
ext->oe_srvlock = !!(brw_flags & OBD_BRW_SRVLOCK);
ext->oe_nr_pages = page_count;
@@ -2988,7 +3124,201 @@ int osc_cache_writeback_range(const struct lu_env *env, struct osc_object *obj,
result = rc;
}
- OSC_IO_DEBUG(obj, "cache page out.\n");
+ OSC_IO_DEBUG(obj, "pageout [%lu, %lu], %d.\n", start, end, result);
+ return result;
+}
+
+/**
+ * Returns a list of pages by a given [start, end] of \a obj.
+ *
+ * \param resched If not NULL, then we give up before hogging CPU for too
+ * long and set *resched = 1, in that case caller should implement a retry
+ * logic.
+ *
+ * Gang tree lookup (radix_tree_gang_lookup()) optimization is absolutely
+ * crucial in the face of [offset, EOF] locks.
+ *
+ * Return at least one page in @queue unless there is no covered page.
+ */
+int osc_page_gang_lookup(const struct lu_env *env, struct cl_io *io,
+ struct osc_object *osc, pgoff_t start, pgoff_t end,
+ osc_page_gang_cbt cb, void *cbdata)
+{
+ struct osc_page *ops;
+ void **pvec;
+ pgoff_t idx;
+ unsigned int nr;
+ unsigned int i;
+ unsigned int j;
+ int res = CLP_GANG_OKAY;
+ bool tree_lock = true;
+
+ idx = start;
+ pvec = osc_env_info(env)->oti_pvec;
+ spin_lock(&osc->oo_tree_lock);
+ while ((nr = radix_tree_gang_lookup(&osc->oo_tree, pvec,
+ idx, OTI_PVEC_SIZE)) > 0) {
+ struct cl_page *page;
+ bool end_of_region = false;
+
+ for (i = 0, j = 0; i < nr; ++i) {
+ ops = pvec[i];
+ pvec[i] = NULL;
+
+ idx = osc_index(ops);
+ if (idx > end) {
+ end_of_region = true;
+ break;
+ }
+
+ page = ops->ops_cl.cpl_page;
+ LASSERT(page->cp_type == CPT_CACHEABLE);
+ if (page->cp_state == CPS_FREEING)
+ continue;
+
+ cl_page_get(page);
+ lu_ref_add_atomic(&page->cp_reference,
+ "gang_lookup", current);
+ pvec[j++] = ops;
+ }
+ ++idx;
+
+ /*
+ * Here a delicate locking dance is performed. Current thread
+ * holds a reference to a page, but has to own it before it
+ * can be placed into queue. Owning implies waiting, so
+ * radix-tree lock is to be released. After a wait one has to
+ * check that pages weren't truncated (cl_page_own() returns
+ * error in the latter case).
+ */
+ spin_unlock(&osc->oo_tree_lock);
+ tree_lock = false;
+
+ for (i = 0; i < j; ++i) {
+ ops = pvec[i];
+ if (res == CLP_GANG_OKAY)
+ res = (*cb)(env, io, ops, cbdata);
+
+ page = ops->ops_cl.cpl_page;
+ lu_ref_del(&page->cp_reference, "gang_lookup", current);
+ cl_page_put(env, page);
+ }
+ if (nr < OTI_PVEC_SIZE || end_of_region)
+ break;
+
+ if (res == CLP_GANG_OKAY && need_resched())
+ res = CLP_GANG_RESCHED;
+ if (res != CLP_GANG_OKAY)
+ break;
+
+ spin_lock(&osc->oo_tree_lock);
+ tree_lock = true;
+ }
+ if (tree_lock)
+ spin_unlock(&osc->oo_tree_lock);
+ return res;
+}
+
+/**
+ * Check if page @page is covered by an extra lock or discard it.
+ */
+static int check_and_discard_cb(const struct lu_env *env, struct cl_io *io,
+ struct osc_page *ops, void *cbdata)
+{
+ struct osc_thread_info *info = osc_env_info(env);
+ struct osc_object *osc = cbdata;
+ pgoff_t index;
+
+ index = osc_index(ops);
+ if (index >= info->oti_fn_index) {
+ struct ldlm_lock *tmp;
+ struct cl_page *page = ops->ops_cl.cpl_page;
+
+ /* refresh non-overlapped index */
+ tmp = osc_dlmlock_at_pgoff(env, osc, index, 0, 0);
+ if (tmp) {
+ __u64 end = tmp->l_policy_data.l_extent.end;
+ /* Cache the first-non-overlapped index so as to skip
+ * all pages within [index, oti_fn_index). This is safe
+ * because if tmp lock is canceled, it will discard
+ * these pages.
+ */
+ info->oti_fn_index = cl_index(osc2cl(osc), end + 1);
+ if (end == OBD_OBJECT_EOF)
+ info->oti_fn_index = CL_PAGE_EOF;
+ LDLM_LOCK_PUT(tmp);
+ } else if (cl_page_own(env, io, page) == 0) {
+ /* discard the page */
+ cl_page_discard(env, io, page);
+ cl_page_disown(env, io, page);
+ } else {
+ LASSERT(page->cp_state == CPS_FREEING);
+ }
+ }
+
+ info->oti_next_index = index + 1;
+ return CLP_GANG_OKAY;
+}
+
+static int discard_cb(const struct lu_env *env, struct cl_io *io,
+ struct osc_page *ops, void *cbdata)
+{
+ struct osc_thread_info *info = osc_env_info(env);
+ struct cl_page *page = ops->ops_cl.cpl_page;
+
+ /* page is top page. */
+ info->oti_next_index = osc_index(ops) + 1;
+ if (cl_page_own(env, io, page) == 0) {
+ KLASSERT(ergo(page->cp_type == CPT_CACHEABLE,
+ !PageDirty(cl_page_vmpage(page))));
+
+ /* discard the page */
+ cl_page_discard(env, io, page);
+ cl_page_disown(env, io, page);
+ } else {
+ LASSERT(page->cp_state == CPS_FREEING);
+ }
+
+ return CLP_GANG_OKAY;
+}
+
+/**
+ * Discard pages protected by the given lock. This function traverses radix
+ * tree to find all covering pages and discard them. If a page is being covered
+ * by other locks, it should remain in cache.
+ *
+ * If error happens on any step, the process continues anyway (the reasoning
+ * behind this being that lock cancellation cannot be delayed indefinitely).
+ */
+int osc_lock_discard_pages(const struct lu_env *env, struct osc_object *osc,
+ pgoff_t start, pgoff_t end, enum cl_lock_mode mode)
+{
+ struct osc_thread_info *info = osc_env_info(env);
+ struct cl_io *io = &info->oti_io;
+ osc_page_gang_cbt cb;
+ int res;
+ int result;
+
+ io->ci_obj = cl_object_top(osc2cl(osc));
+ io->ci_ignore_layout = 1;
+ result = cl_io_init(env, io, CIT_MISC, io->ci_obj);
+ if (result != 0)
+ goto out;
+
+ cb = mode == CLM_READ ? check_and_discard_cb : discard_cb;
+ info->oti_fn_index = start;
+ info->oti_next_index = start;
+ do {
+ res = osc_page_gang_lookup(env, io, osc,
+ info->oti_next_index, end, cb, osc);
+ if (info->oti_next_index > end)
+ break;
+
+ if (res == CLP_GANG_RESCHED)
+ cond_resched();
+ } while (res != CLP_GANG_OKAY);
+out:
+ cl_io_fini(env, io);
return result;
}
diff --git a/drivers/staging/lustre/lustre/osc/osc_cl_internal.h b/drivers/staging/lustre/lustre/osc/osc_cl_internal.h
index d55d04d0428b..c8c3f1ca77be 100644
--- a/drivers/staging/lustre/lustre/osc/osc_cl_internal.h
+++ b/drivers/staging/lustre/lustre/osc/osc_cl_internal.h
@@ -15,11 +15,7 @@
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
*
* GPL HEADER END
*/
@@ -51,7 +47,6 @@
#include "../include/obd.h"
/* osc_build_res_name() */
#include "../include/cl_object.h"
-#include "../include/lclient.h"
#include "osc_internal.h"
/** \defgroup osc osc
@@ -67,7 +62,10 @@ struct osc_io {
/** super class */
struct cl_io_slice oi_cl;
/** true if this io is lockless. */
- int oi_lockless;
+ unsigned int oi_lockless;
+ /** how many LRU pages are reserved for this IO */
+ int oi_lru_reserved;
+
/** active extents, we know how many bytes is going to be written,
* so having an active extent will prevent it from being fragmented
*/
@@ -77,6 +75,8 @@ struct osc_io {
*/
struct osc_extent *oi_trunc;
+ /** write osc_lock for this IO, used by osc_extent_find(). */
+ struct osc_lock *oi_write_osclock;
struct obd_info oi_info;
struct obdo oi_oa;
struct osc_async_cbargs {
@@ -100,7 +100,7 @@ struct osc_session {
struct osc_io os_io;
};
-#define OTI_PVEC_SIZE 64
+#define OTI_PVEC_SIZE 256
struct osc_thread_info {
struct ldlm_res_id oti_resname;
ldlm_policy_data_t oti_policy;
@@ -109,7 +109,13 @@ struct osc_thread_info {
struct lustre_handle oti_handle;
struct cl_page_list oti_plist;
struct cl_io oti_io;
- struct cl_page *oti_pvec[OTI_PVEC_SIZE];
+ void *oti_pvec[OTI_PVEC_SIZE];
+ /**
+ * Fields used by cl_lock_discard_pages().
+ */
+ pgoff_t oti_next_index;
+ pgoff_t oti_fn_index; /* first non-overlapped index */
+ struct cl_sync_io oti_anchor;
};
struct osc_object {
@@ -125,7 +131,7 @@ struct osc_object {
*/
struct list_head oo_inflight[CRT_NR];
/**
- * Lock, protecting ccc_object::cob_inflight, because a seat-belt is
+ * Lock, protecting osc_page::ops_inflight, because a seat-belt is
* locked during take-off and landing.
*/
spinlock_t oo_seatbelt;
@@ -159,6 +165,17 @@ struct osc_object {
* oo_{read|write}_pages soon.
*/
spinlock_t oo_lock;
+
+ /**
+ * Radix tree for caching pages
+ */
+ struct radix_tree_root oo_tree;
+ spinlock_t oo_tree_lock;
+ unsigned long oo_npages;
+
+ /* Protect osc_lock this osc_object has */
+ spinlock_t oo_ol_spin;
+ struct list_head oo_ol_list;
};
static inline void osc_object_lock(struct osc_object *obj)
@@ -198,8 +215,6 @@ enum osc_lock_state {
OLS_ENQUEUED,
OLS_UPCALL_RECEIVED,
OLS_GRANTED,
- OLS_RELEASED,
- OLS_BLOCKED,
OLS_CANCELLED
};
@@ -208,10 +223,8 @@ enum osc_lock_state {
*
* Interaction with DLM.
*
- * CLIO enqueues all DLM locks through ptlrpcd (that is, in "async" mode).
- *
* Once receive upcall is invoked, osc_lock remembers a handle of DLM lock in
- * osc_lock::ols_handle and a pointer to that lock in osc_lock::ols_lock.
+ * osc_lock::ols_handle and a pointer to that lock in osc_lock::ols_dlmlock.
*
* This pointer is protected through a reference, acquired by
* osc_lock_upcall0(). Also, an additional reference is acquired by
@@ -249,26 +262,27 @@ enum osc_lock_state {
*/
struct osc_lock {
struct cl_lock_slice ols_cl;
+ /** Internal lock to protect states, etc. */
+ spinlock_t ols_lock;
+ /** Owner sleeps on this channel for state change */
+ struct cl_sync_io *ols_owner;
+ /** waiting list for this lock to be cancelled */
+ struct list_head ols_waiting_list;
+ /** wait entry of ols_waiting_list */
+ struct list_head ols_wait_entry;
+ /** list entry for osc_object::oo_ol_list */
+ struct list_head ols_nextlock_oscobj;
+
/** underlying DLM lock */
- struct ldlm_lock *ols_lock;
- /** lock value block */
- struct ost_lvb ols_lvb;
+ struct ldlm_lock *ols_dlmlock;
/** DLM flags with which osc_lock::ols_lock was enqueued */
__u64 ols_flags;
/** osc_lock::ols_lock handle */
struct lustre_handle ols_handle;
struct ldlm_enqueue_info ols_einfo;
enum osc_lock_state ols_state;
-
- /**
- * How many pages are using this lock for io, currently only used by
- * read-ahead. If non-zero, the underlying dlm lock won't be cancelled
- * during recovery to avoid deadlock. see bz16774.
- *
- * \see osc_page::ops_lock
- * \see osc_page_addref_lock(), osc_page_putref_lock()
- */
- atomic_t ols_pageref;
+ /** lock value block */
+ struct ost_lvb ols_lvb;
/**
* true, if ldlm_lock_addref() was called against
@@ -299,16 +313,6 @@ struct osc_lock {
*/
ols_locklessable:1,
/**
- * set by osc_lock_use() to wait until blocking AST enters into
- * osc_ldlm_blocking_ast0(), so that cl_lock mutex can be used for
- * further synchronization.
- */
- ols_ast_wait:1,
- /**
- * If the data of this lock has been flushed to server side.
- */
- ols_flush:1,
- /**
* if set, the osc_lock is a glimpse lock. For glimpse locks, we treat
* the EVAVAIL error as tolerable, this will make upper logic happy
* to wait all glimpse locks to each OSTs to be completed.
@@ -321,15 +325,6 @@ struct osc_lock {
* For async glimpse lock.
*/
ols_agl:1;
- /**
- * IO that owns this lock. This field is used for a dead-lock
- * avoidance by osc_lock_enqueue_wait().
- *
- * XXX: unfortunately, the owner of a osc_lock is not unique,
- * the lock may have multiple users, if the lock is granted and
- * then matched.
- */
- struct osc_io *ols_owner;
};
/**
@@ -357,11 +352,6 @@ struct osc_page {
*/
unsigned ops_transfer_pinned:1,
/**
- * True for a `temporary page' created by read-ahead code, probably
- * outside of any DLM lock.
- */
- ops_temp:1,
- /**
* in LRU?
*/
ops_in_lru:1,
@@ -369,18 +359,15 @@ struct osc_page {
* Set if the page must be transferred with OBD_BRW_SRVLOCK.
*/
ops_srvlock:1;
- union {
- /**
- * lru page list. ops_inflight and ops_lru are exclusive so
- * that they can share the same data.
- */
- struct list_head ops_lru;
- /**
- * Linkage into a per-osc_object list of pages in flight. For
- * debugging.
- */
- struct list_head ops_inflight;
- };
+ /**
+ * lru page list. See osc_lru_{del|use}() in osc_page.c for usage.
+ */
+ struct list_head ops_lru;
+ /**
+ * Linkage into a per-osc_object list of pages in flight. For
+ * debugging.
+ */
+ struct list_head ops_inflight;
/**
* Thread that submitted this page for transfer. For debugging.
*/
@@ -389,16 +376,6 @@ struct osc_page {
* Submit time - the time when the page is starting RPC. For debugging.
*/
unsigned long ops_submit_time;
-
- /**
- * A lock of which we hold a reference covers this page. Only used by
- * read-ahead: for a readahead page, we hold it's covering lock to
- * prevent it from being canceled during recovery.
- *
- * \see osc_lock::ols_pageref
- * \see osc_page_addref_lock(), osc_page_putref_lock().
- */
- struct cl_lock *ops_lock;
};
extern struct kmem_cache *osc_lock_kmem;
@@ -417,21 +394,22 @@ extern struct lu_context_key osc_session_key;
int osc_lock_init(const struct lu_env *env,
struct cl_object *obj, struct cl_lock *lock,
const struct cl_io *io);
-int osc_io_init (const struct lu_env *env,
- struct cl_object *obj, struct cl_io *io);
-int osc_req_init (const struct lu_env *env, struct cl_device *dev,
- struct cl_req *req);
+int osc_io_init(const struct lu_env *env,
+ struct cl_object *obj, struct cl_io *io);
+int osc_req_init(const struct lu_env *env, struct cl_device *dev,
+ struct cl_req *req);
struct lu_object *osc_object_alloc(const struct lu_env *env,
const struct lu_object_header *hdr,
struct lu_device *dev);
int osc_page_init(const struct lu_env *env, struct cl_object *obj,
- struct cl_page *page, struct page *vmpage);
+ struct cl_page *page, pgoff_t ind);
-void osc_index2policy (ldlm_policy_data_t *policy, const struct cl_object *obj,
- pgoff_t start, pgoff_t end);
-int osc_lvb_print (const struct lu_env *env, void *cookie,
- lu_printer_t p, const struct ost_lvb *lvb);
+void osc_index2policy(ldlm_policy_data_t *policy, const struct cl_object *obj,
+ pgoff_t start, pgoff_t end);
+int osc_lvb_print(const struct lu_env *env, void *cookie,
+ lu_printer_t p, const struct ost_lvb *lvb);
+void osc_lru_add_batch(struct client_obd *cli, struct list_head *list);
void osc_page_submit(const struct lu_env *env, struct osc_page *opg,
enum cl_req_type crt, int brw_flags);
int osc_cancel_async_page(const struct lu_env *env, struct osc_page *ops);
@@ -441,6 +419,8 @@ int osc_prep_async_page(struct osc_object *osc, struct osc_page *ops,
struct page *page, loff_t offset);
int osc_queue_async_io(const struct lu_env *env, struct cl_io *io,
struct osc_page *ops);
+int osc_page_cache_add(const struct lu_env *env,
+ const struct cl_page_slice *slice, struct cl_io *io);
int osc_teardown_async_page(const struct lu_env *env, struct osc_object *obj,
struct osc_page *ops);
int osc_flush_async_page(const struct lu_env *env, struct cl_io *io,
@@ -457,12 +437,13 @@ int osc_cache_wait_range(const struct lu_env *env, struct osc_object *obj,
pgoff_t start, pgoff_t end);
void osc_io_unplug(const struct lu_env *env, struct client_obd *cli,
struct osc_object *osc);
+int lru_queue_work(const struct lu_env *env, void *data);
-void osc_object_set_contended (struct osc_object *obj);
+void osc_object_set_contended(struct osc_object *obj);
void osc_object_clear_contended(struct osc_object *obj);
-int osc_object_is_contended (struct osc_object *obj);
+int osc_object_is_contended(struct osc_object *obj);
-int osc_lock_is_lockless (const struct osc_lock *olck);
+int osc_lock_is_lockless(const struct osc_lock *olck);
/*****************************************************************************
*
@@ -558,6 +539,11 @@ static inline struct osc_page *oap2osc(struct osc_async_page *oap)
return container_of0(oap, struct osc_page, ops_oap);
}
+static inline pgoff_t osc_index(struct osc_page *opg)
+{
+ return opg->ops_cl.cpl_index;
+}
+
static inline struct cl_page *oap2cl_page(struct osc_async_page *oap)
{
return oap2osc(oap)->ops_cl.cpl_page;
@@ -608,7 +594,7 @@ enum osc_extent_state {
*
* LOCKING ORDER
* =============
- * page lock -> client_obd_list_lock -> object lock(osc_object::oo_lock)
+ * page lock -> cl_loi_list_lock -> object lock(osc_object::oo_lock)
*/
struct osc_extent {
/** red-black tree node */
@@ -627,6 +613,8 @@ struct osc_extent {
unsigned int oe_intree:1,
/** 0 is write, 1 is read */
oe_rw:1,
+ /** sync extent, queued by osc_queue_sync_pages() */
+ oe_sync:1,
oe_srvlock:1,
oe_memalloc:1,
/** an ACTIVE extent is going to be truncated, so when this extent
@@ -675,7 +663,7 @@ struct osc_extent {
*/
wait_queue_head_t oe_waitq;
/** lock covering this extent */
- struct cl_lock *oe_osclock;
+ struct ldlm_lock *oe_dlmlock;
/** terminator of this extent. Must be true if this extent is in IO. */
struct task_struct *oe_owner;
/** return value of writeback. If somebody is waiting for this extent,
@@ -690,6 +678,14 @@ int osc_extent_finish(const struct lu_env *env, struct osc_extent *ext,
int sent, int rc);
void osc_extent_release(const struct lu_env *env, struct osc_extent *ext);
+int osc_lock_discard_pages(const struct lu_env *env, struct osc_object *osc,
+ pgoff_t start, pgoff_t end, enum cl_lock_mode mode);
+
+typedef int (*osc_page_gang_cbt)(const struct lu_env *, struct cl_io *,
+ struct osc_page *, void *);
+int osc_page_gang_lookup(const struct lu_env *env, struct cl_io *io,
+ struct osc_object *osc, pgoff_t start, pgoff_t end,
+ osc_page_gang_cbt cb, void *cbdata);
/** @} osc */
#endif /* OSC_CL_INTERNAL_H */
diff --git a/drivers/staging/lustre/lustre/osc/osc_dev.c b/drivers/staging/lustre/lustre/osc/osc_dev.c
index d4fe507f165f..83d30c135ba4 100644
--- a/drivers/staging/lustre/lustre/osc/osc_dev.c
+++ b/drivers/staging/lustre/lustre/osc/osc_dev.c
@@ -15,11 +15,7 @@
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
*
* GPL HEADER END
*/
diff --git a/drivers/staging/lustre/lustre/osc/osc_internal.h b/drivers/staging/lustre/lustre/osc/osc_internal.h
index ea695c2099ee..7a27f0961955 100644
--- a/drivers/staging/lustre/lustre/osc/osc_internal.h
+++ b/drivers/staging/lustre/lustre/osc/osc_internal.h
@@ -15,11 +15,7 @@
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
*
* GPL HEADER END
*/
@@ -83,6 +79,12 @@ struct osc_async_page {
#define oap_count oap_brw_page.count
#define oap_brw_flags oap_brw_page.flag
+static inline struct osc_async_page *brw_page2oap(struct brw_page *pga)
+{
+ return (struct osc_async_page *)container_of(pga, struct osc_async_page,
+ oap_brw_page);
+}
+
struct osc_cache_waiter {
struct list_head ocw_entry;
wait_queue_head_t ocw_waitq;
@@ -102,12 +104,14 @@ void osc_update_next_shrink(struct client_obd *cli);
extern struct ptlrpc_request_set *PTLRPCD_SET;
+typedef int (*osc_enqueue_upcall_f)(void *cookie, struct lustre_handle *lockh,
+ int rc);
+
int osc_enqueue_base(struct obd_export *exp, struct ldlm_res_id *res_id,
__u64 *flags, ldlm_policy_data_t *policy,
struct ost_lvb *lvb, int kms_valid,
- obd_enqueue_update_f upcall,
+ osc_enqueue_upcall_f upcall,
void *cookie, struct ldlm_enqueue_info *einfo,
- struct lustre_handle *lockh,
struct ptlrpc_request_set *rqset, int async, int agl);
int osc_cancel_base(struct lustre_handle *lockh, __u32 mode);
@@ -130,9 +134,11 @@ int osc_sync_base(struct obd_export *exp, struct obd_info *oinfo,
int osc_process_config_base(struct obd_device *obd, struct lustre_cfg *cfg);
int osc_build_rpc(const struct lu_env *env, struct client_obd *cli,
struct list_head *ext_list, int cmd);
-int osc_lru_shrink(struct client_obd *cli, int target);
+int osc_lru_shrink(const struct lu_env *env, struct client_obd *cli,
+ int target, bool force);
+int osc_lru_reclaim(struct client_obd *cli);
-extern spinlock_t osc_ast_guard;
+unsigned long osc_ldlm_weigh_ast(struct ldlm_lock *dlmlock);
int osc_setup(struct obd_device *obd, struct lustre_cfg *lcfg);
@@ -173,8 +179,6 @@ static inline struct osc_device *obd2osc_dev(const struct obd_device *d)
return container_of0(d->obd_lu_dev, struct osc_device, od_cl.cd_lu_dev);
}
-int osc_dlm_lock_pageref(struct ldlm_lock *dlm);
-
extern struct kmem_cache *osc_quota_kmem;
struct osc_quota_info {
/** linkage for quota hash table */
@@ -192,5 +196,12 @@ int osc_quotactl(struct obd_device *unused, struct obd_export *exp,
int osc_quotacheck(struct obd_device *unused, struct obd_export *exp,
struct obd_quotactl *oqctl);
int osc_quota_poll_check(struct obd_export *exp, struct if_quotacheck *qchk);
+void osc_inc_unstable_pages(struct ptlrpc_request *req);
+void osc_dec_unstable_pages(struct ptlrpc_request *req);
+int osc_over_unstable_soft_limit(struct client_obd *cli);
+
+struct ldlm_lock *osc_dlmlock_at_pgoff(const struct lu_env *env,
+ struct osc_object *obj, pgoff_t index,
+ int pending, int canceling);
#endif /* OSC_INTERNAL_H */
diff --git a/drivers/staging/lustre/lustre/osc/osc_io.c b/drivers/staging/lustre/lustre/osc/osc_io.c
index 6bd0a45d8b06..6e3dcd38913f 100644
--- a/drivers/staging/lustre/lustre/osc/osc_io.c
+++ b/drivers/staging/lustre/lustre/osc/osc_io.c
@@ -15,11 +15,7 @@
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
*
* GPL HEADER END
*/
@@ -68,11 +64,15 @@ static struct osc_io *cl2osc_io(const struct lu_env *env,
return oio;
}
-static struct osc_page *osc_cl_page_osc(struct cl_page *page)
+static struct osc_page *osc_cl_page_osc(struct cl_page *page,
+ struct osc_object *osc)
{
const struct cl_page_slice *slice;
- slice = cl_page_at(page, &osc_device_type);
+ if (osc)
+ slice = cl_object_page_slice(&osc->oo_cl, page);
+ else
+ slice = cl_page_at(page, &osc_device_type);
LASSERT(slice);
return cl2osc_page(slice);
@@ -137,7 +137,7 @@ static int osc_io_submit(const struct lu_env *env,
io = page->cp_owner;
LASSERT(io);
- opg = osc_cl_page_osc(page);
+ opg = osc_cl_page_osc(page, osc);
oap = &opg->ops_oap;
LASSERT(osc == oap->oap_obj);
@@ -164,8 +164,10 @@ static int osc_io_submit(const struct lu_env *env,
}
cl_page_list_move(qout, qin, page);
+ spin_lock(&oap->oap_lock);
oap->oap_async_flags = ASYNC_URGENT|ASYNC_READY;
oap->oap_async_flags |= ASYNC_COUNT_STABLE;
+ spin_unlock(&oap->oap_lock);
osc_page_submit(env, opg, crt, brw_flags);
list_add_tail(&oap->oap_pending_item, &list);
@@ -185,6 +187,13 @@ static int osc_io_submit(const struct lu_env *env,
return qout->pl_nr > 0 ? 0 : result;
}
+/**
+ * This is called when a page is accessed within file in a way that creates
+ * new page, if one were missing (i.e., if there were a hole at that place in
+ * the file, or accessed page is beyond the current file size).
+ *
+ * Expand stripe KMS if necessary.
+ */
static void osc_page_touch_at(const struct lu_env *env,
struct cl_object *obj, pgoff_t idx, unsigned to)
{
@@ -208,7 +217,9 @@ static void osc_page_touch_at(const struct lu_env *env,
kms > loi->loi_kms ? "" : "not ", loi->loi_kms, kms,
loi->loi_lvb.lvb_size);
- valid = 0;
+ attr->cat_ctime = LTIME_S(CURRENT_TIME);
+ attr->cat_mtime = attr->cat_ctime;
+ valid = CAT_MTIME | CAT_CTIME;
if (kms > loi->loi_kms) {
attr->cat_kms = kms;
valid |= CAT_KMS;
@@ -221,91 +232,128 @@ static void osc_page_touch_at(const struct lu_env *env,
cl_object_attr_unlock(obj);
}
-/**
- * This is called when a page is accessed within file in a way that creates
- * new page, if one were missing (i.e., if there were a hole at that place in
- * the file, or accessed page is beyond the current file size). Examples:
- * ->commit_write() and ->nopage() methods.
- *
- * Expand stripe KMS if necessary.
- */
-static void osc_page_touch(const struct lu_env *env,
- struct osc_page *opage, unsigned to)
-{
- struct cl_page *page = opage->ops_cl.cpl_page;
- struct cl_object *obj = opage->ops_cl.cpl_obj;
-
- osc_page_touch_at(env, obj, page->cp_index, to);
-}
-
-/**
- * Implements cl_io_operations::cio_prepare_write() method for osc layer.
- *
- * \retval -EIO transfer initiated against this osc will most likely fail
- * \retval 0 transfer initiated against this osc will most likely succeed.
- *
- * The reason for this check is to immediately return an error to the caller
- * in the case of a deactivated import. Note, that import can be deactivated
- * later, while pages, dirtied by this IO, are still in the cache, but this is
- * irrelevant, because that would still return an error to the application (if
- * it does fsync), but many applications don't do fsync because of performance
- * issues, and we wanted to return an -EIO at write time to notify the
- * application.
- */
-static int osc_io_prepare_write(const struct lu_env *env,
- const struct cl_io_slice *ios,
- const struct cl_page_slice *slice,
- unsigned from, unsigned to)
+static int osc_io_commit_async(const struct lu_env *env,
+ const struct cl_io_slice *ios,
+ struct cl_page_list *qin, int from, int to,
+ cl_commit_cbt cb)
{
- struct osc_device *dev = lu2osc_dev(slice->cpl_obj->co_lu.lo_dev);
- struct obd_import *imp = class_exp2cliimp(dev->od_exp);
+ struct cl_io *io = ios->cis_io;
struct osc_io *oio = cl2osc_io(env, ios);
+ struct osc_object *osc = cl2osc(ios->cis_obj);
+ struct cl_page *page;
+ struct cl_page *last_page;
+ struct osc_page *opg;
int result = 0;
- /*
- * This implements OBD_BRW_CHECK logic from old client.
- */
+ LASSERT(qin->pl_nr > 0);
+
+ /* Handle partial page cases */
+ last_page = cl_page_list_last(qin);
+ if (oio->oi_lockless) {
+ page = cl_page_list_first(qin);
+ if (page == last_page) {
+ cl_page_clip(env, page, from, to);
+ } else {
+ if (from != 0)
+ cl_page_clip(env, page, from, PAGE_SIZE);
+ if (to != PAGE_SIZE)
+ cl_page_clip(env, last_page, 0, to);
+ }
+ }
+
+ while (qin->pl_nr > 0) {
+ struct osc_async_page *oap;
- if (!imp || imp->imp_invalid)
- result = -EIO;
- if (result == 0 && oio->oi_lockless)
- /* this page contains `invalid' data, but who cares?
- * nobody can access the invalid data.
- * in osc_io_commit_write(), we're going to write exact
- * [from, to) bytes of this page to OST. -jay
+ page = cl_page_list_first(qin);
+ opg = osc_cl_page_osc(page, osc);
+ oap = &opg->ops_oap;
+
+ if (!list_empty(&oap->oap_rpc_item)) {
+ CDEBUG(D_CACHE, "Busy oap %p page %p for submit.\n",
+ oap, opg);
+ result = -EBUSY;
+ break;
+ }
+
+ /* The page may be already in dirty cache. */
+ if (list_empty(&oap->oap_pending_item)) {
+ result = osc_page_cache_add(env, &opg->ops_cl, io);
+ if (result != 0)
+ break;
+ }
+
+ osc_page_touch_at(env, osc2cl(osc), osc_index(opg),
+ page == last_page ? to : PAGE_SIZE);
+
+ cl_page_list_del(env, qin, page);
+
+ (*cb)(env, io, page);
+ /* Can't access page any more. Page can be in transfer and
+ * complete at any time.
*/
- cl_page_export(env, slice->cpl_page, 1);
+ }
+ /* for sync write, kernel will wait for this page to be flushed before
+ * osc_io_end() is called, so release it earlier.
+ * for mkwrite(), it's known there is no further pages.
+ */
+ if (cl_io_is_sync_write(io) && oio->oi_active) {
+ osc_extent_release(env, oio->oi_active);
+ oio->oi_active = NULL;
+ }
+
+ CDEBUG(D_INFO, "%d %d\n", qin->pl_nr, result);
return result;
}
-static int osc_io_commit_write(const struct lu_env *env,
- const struct cl_io_slice *ios,
- const struct cl_page_slice *slice,
- unsigned from, unsigned to)
+static int osc_io_rw_iter_init(const struct lu_env *env,
+ const struct cl_io_slice *ios)
{
- struct osc_io *oio = cl2osc_io(env, ios);
- struct osc_page *opg = cl2osc_page(slice);
- struct osc_object *obj = cl2osc(opg->ops_cl.cpl_obj);
- struct osc_async_page *oap = &opg->ops_oap;
+ struct cl_io *io = ios->cis_io;
+ struct osc_io *oio = osc_env_io(env);
+ struct osc_object *osc = cl2osc(ios->cis_obj);
+ struct client_obd *cli = osc_cli(osc);
+ unsigned long c;
+ unsigned int npages;
+ unsigned int max_pages;
+
+ if (cl_io_is_append(io))
+ return 0;
+
+ npages = io->u.ci_rw.crw_count >> PAGE_SHIFT;
+ if (io->u.ci_rw.crw_pos & ~PAGE_MASK)
+ ++npages;
+
+ max_pages = cli->cl_max_pages_per_rpc * cli->cl_max_rpcs_in_flight;
+ if (npages > max_pages)
+ npages = max_pages;
+
+ c = atomic_read(cli->cl_lru_left);
+ if (c < npages && osc_lru_reclaim(cli) > 0)
+ c = atomic_read(cli->cl_lru_left);
+ while (c >= npages) {
+ if (c == atomic_cmpxchg(cli->cl_lru_left, c, c - npages)) {
+ oio->oi_lru_reserved = npages;
+ break;
+ }
+ c = atomic_read(cli->cl_lru_left);
+ }
- LASSERT(to > 0);
- /*
- * XXX instead of calling osc_page_touch() here and in
- * osc_io_fault_start() it might be more logical to introduce
- * cl_page_touch() method, that generic cl_io_commit_write() and page
- * fault code calls.
- */
- osc_page_touch(env, cl2osc_page(slice), to);
- if (!client_is_remote(osc_export(obj)) &&
- capable(CFS_CAP_SYS_RESOURCE))
- oap->oap_brw_flags |= OBD_BRW_NOQUOTA;
+ return 0;
+}
- if (oio->oi_lockless)
- /* see osc_io_prepare_write() for lockless io handling. */
- cl_page_clip(env, slice->cpl_page, from, to);
+static void osc_io_rw_iter_fini(const struct lu_env *env,
+ const struct cl_io_slice *ios)
+{
+ struct osc_io *oio = osc_env_io(env);
+ struct osc_object *osc = cl2osc(ios->cis_obj);
+ struct client_obd *cli = osc_cli(osc);
- return 0;
+ if (oio->oi_lru_reserved > 0) {
+ atomic_add(oio->oi_lru_reserved, cli->cl_lru_left);
+ oio->oi_lru_reserved = 0;
+ }
+ oio->oi_write_osclock = NULL;
}
static int osc_io_fault_start(const struct lu_env *env,
@@ -342,31 +390,21 @@ static int osc_async_upcall(void *a, int rc)
* Checks that there are no pages being written in the extent being truncated.
*/
static int trunc_check_cb(const struct lu_env *env, struct cl_io *io,
- struct cl_page *page, void *cbdata)
+ struct osc_page *ops, void *cbdata)
{
- const struct cl_page_slice *slice;
- struct osc_page *ops;
+ struct cl_page *page = ops->ops_cl.cpl_page;
struct osc_async_page *oap;
__u64 start = *(__u64 *)cbdata;
- slice = cl_page_at(page, &osc_device_type);
- LASSERT(slice);
- ops = cl2osc_page(slice);
oap = &ops->ops_oap;
-
if (oap->oap_cmd & OBD_BRW_WRITE &&
!list_empty(&oap->oap_pending_item))
CL_PAGE_DEBUG(D_ERROR, env, page, "exists %llu/%s.\n",
start, current->comm);
- {
- struct page *vmpage = cl_page_vmpage(env, page);
-
- if (PageLocked(vmpage))
- CDEBUG(D_CACHE, "page %p index %lu locked for %d.\n",
- ops, page->cp_index,
- (oap->oap_cmd & OBD_BRW_RWMASK));
- }
+ if (PageLocked(page->cp_vmpage))
+ CDEBUG(D_CACHE, "page %p index %lu locked for %d.\n",
+ ops, osc_index(ops), oap->oap_cmd & OBD_BRW_RWMASK);
return CLP_GANG_OKAY;
}
@@ -385,8 +423,9 @@ static void osc_trunc_check(const struct lu_env *env, struct cl_io *io,
/*
* Complain if there are pages in the truncated region.
*/
- cl_page_gang_lookup(env, clob, io, start + partial, CL_PAGE_EOF,
- trunc_check_cb, (void *)&size);
+ osc_page_gang_lookup(env, io, cl2osc(clob),
+ start + partial, CL_PAGE_EOF,
+ trunc_check_cb, (void *)&size);
}
static int osc_io_setattr_start(const struct lu_env *env,
@@ -416,7 +455,8 @@ static int osc_io_setattr_start(const struct lu_env *env,
unsigned int cl_valid = 0;
if (ia_valid & ATTR_SIZE) {
- attr->cat_size = attr->cat_kms = size;
+ attr->cat_size = size;
+ attr->cat_kms = size;
cl_valid = CAT_SIZE | CAT_KMS;
}
if (ia_valid & ATTR_MTIME_SET) {
@@ -484,7 +524,8 @@ static void osc_io_setattr_end(const struct lu_env *env,
if (cbargs->opc_rpc_sent) {
wait_for_completion(&cbargs->opc_sync);
- result = io->ci_result = cbargs->opc_rc;
+ result = cbargs->opc_rc;
+ io->ci_result = cbargs->opc_rc;
}
if (result == 0) {
if (oio->oi_lockless) {
@@ -533,7 +574,8 @@ static int osc_io_write_start(const struct lu_env *env,
OBD_FAIL_TIMEOUT(OBD_FAIL_OSC_DELAY_SETTIME, 1);
cl_object_attr_lock(obj);
- attr->cat_mtime = attr->cat_ctime = ktime_get_real_seconds();
+ attr->cat_ctime = ktime_get_real_seconds();
+ attr->cat_mtime = attr->cat_ctime;
rc = cl_object_attr_set(env, obj, attr, CAT_MTIME | CAT_CTIME);
cl_object_attr_unlock(obj);
@@ -650,6 +692,8 @@ static const struct cl_io_operations osc_io_ops = {
.cio_fini = osc_io_fini
},
[CIT_WRITE] = {
+ .cio_iter_init = osc_io_rw_iter_init,
+ .cio_iter_fini = osc_io_rw_iter_fini,
.cio_start = osc_io_write_start,
.cio_end = osc_io_end,
.cio_fini = osc_io_fini
@@ -672,16 +716,8 @@ static const struct cl_io_operations osc_io_ops = {
.cio_fini = osc_io_fini
}
},
- .req_op = {
- [CRT_READ] = {
- .cio_submit = osc_io_submit
- },
- [CRT_WRITE] = {
- .cio_submit = osc_io_submit
- }
- },
- .cio_prepare_write = osc_io_prepare_write,
- .cio_commit_write = osc_io_commit_write
+ .cio_submit = osc_io_submit,
+ .cio_commit_async = osc_io_commit_async
};
/*****************************************************************************
@@ -718,8 +754,7 @@ static void osc_req_attr_set(const struct lu_env *env,
struct lov_oinfo *oinfo;
struct cl_req *clerq;
struct cl_page *apage; /* _some_ page in @clerq */
- struct cl_lock *lock; /* _some_ lock protecting @apage */
- struct osc_lock *olck;
+ struct ldlm_lock *lock; /* _some_ lock protecting @apage */
struct osc_page *opg;
struct obdo *oa;
struct ost_lvb *lvb;
@@ -753,31 +788,32 @@ static void osc_req_attr_set(const struct lu_env *env,
LASSERT(!list_empty(&clerq->crq_pages));
apage = container_of(clerq->crq_pages.next,
struct cl_page, cp_flight);
- opg = osc_cl_page_osc(apage);
- apage = opg->ops_cl.cpl_page; /* now apage is a sub-page */
- lock = cl_lock_at_page(env, apage->cp_obj, apage, NULL, 1, 1);
- if (!lock) {
- struct cl_object_header *head;
- struct cl_lock *scan;
-
- head = cl_object_header(apage->cp_obj);
- list_for_each_entry(scan, &head->coh_locks, cll_linkage)
- CL_LOCK_DEBUG(D_ERROR, env, scan,
- "no cover page!\n");
- CL_PAGE_DEBUG(D_ERROR, env, apage,
- "dump uncover page!\n");
+ opg = osc_cl_page_osc(apage, NULL);
+ lock = osc_dlmlock_at_pgoff(env, cl2osc(obj), osc_index(opg),
+ 1, 1);
+ if (!lock && !opg->ops_srvlock) {
+ struct ldlm_resource *res;
+ struct ldlm_res_id *resname;
+
+ CL_PAGE_DEBUG(D_ERROR, env, apage, "uncovered page!\n");
+
+ resname = &osc_env_info(env)->oti_resname;
+ ostid_build_res_name(&oinfo->loi_oi, resname);
+ res = ldlm_resource_get(
+ osc_export(cl2osc(obj))->exp_obd->obd_namespace,
+ NULL, resname, LDLM_EXTENT, 0);
+ ldlm_resource_dump(D_ERROR, res);
+
dump_stack();
LBUG();
}
- olck = osc_lock_at(lock);
- LASSERT(ergo(opg->ops_srvlock, !olck->ols_lock));
/* check for lockless io. */
- if (olck->ols_lock) {
- oa->o_handle = olck->ols_lock->l_remote_handle;
+ if (lock) {
+ oa->o_handle = lock->l_remote_handle;
oa->o_valid |= OBD_MD_FLHANDLE;
+ LDLM_LOCK_PUT(lock);
}
- cl_lock_put(env, lock);
}
}
@@ -807,8 +843,9 @@ int osc_req_init(const struct lu_env *env, struct cl_device *dev,
if (or) {
cl_req_slice_add(req, &or->or_cl, dev, &osc_req_ops);
result = 0;
- } else
+ } else {
result = -ENOMEM;
+ }
return result;
}
diff --git a/drivers/staging/lustre/lustre/osc/osc_lock.c b/drivers/staging/lustre/lustre/osc/osc_lock.c
index 013df9787f3e..717d3ffb6789 100644
--- a/drivers/staging/lustre/lustre/osc/osc_lock.c
+++ b/drivers/staging/lustre/lustre/osc/osc_lock.c
@@ -15,11 +15,7 @@
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
*
* GPL HEADER END
*/
@@ -36,6 +32,7 @@
* Implementation of cl_lock for OSC layer.
*
* Author: Nikita Danilov <nikita.danilov@sun.com>
+ * Author: Jinshan Xiong <jinshan.xiong@intel.com>
*/
#define DEBUG_SUBSYSTEM S_OSC
@@ -50,8 +47,6 @@
* @{
*/
-#define _PAGEREF_MAGIC (-10000000)
-
/*****************************************************************************
*
* Type conversions.
@@ -62,7 +57,6 @@ static const struct cl_lock_operations osc_lock_ops;
static const struct cl_lock_operations osc_lock_lockless_ops;
static void osc_lock_to_lockless(const struct lu_env *env,
struct osc_lock *ols, int force);
-static int osc_lock_has_pages(struct osc_lock *olck);
int osc_lock_is_lockless(const struct osc_lock *olck)
{
@@ -90,11 +84,11 @@ static struct ldlm_lock *osc_handle_ptr(struct lustre_handle *handle)
static int osc_lock_invariant(struct osc_lock *ols)
{
struct ldlm_lock *lock = osc_handle_ptr(&ols->ols_handle);
- struct ldlm_lock *olock = ols->ols_lock;
+ struct ldlm_lock *olock = ols->ols_dlmlock;
int handle_used = lustre_handle_is_used(&ols->ols_handle);
if (ergo(osc_lock_is_lockless(ols),
- ols->ols_locklessable && !ols->ols_lock))
+ ols->ols_locklessable && !ols->ols_dlmlock))
return 1;
/*
@@ -111,7 +105,7 @@ static int osc_lock_invariant(struct osc_lock *ols)
ergo(!lock, !olock)))
return 0;
/*
- * Check that ->ols_handle and ->ols_lock are consistent, but
+ * Check that ->ols_handle and ->ols_dlmlock are consistent, but
* take into account that they are set at the different time.
*/
if (!ergo(ols->ols_state == OLS_CANCELLED,
@@ -122,7 +116,7 @@ static int osc_lock_invariant(struct osc_lock *ols)
* ast.
*/
if (!ergo(olock && ols->ols_state < OLS_CANCELLED,
- ((olock->l_flags & LDLM_FL_DESTROYED) == 0)))
+ !ldlm_is_destroyed(olock)))
return 0;
if (!ergo(ols->ols_state == OLS_GRANTED,
@@ -138,117 +132,13 @@ static int osc_lock_invariant(struct osc_lock *ols)
*
*/
-/**
- * Breaks a link between osc_lock and dlm_lock.
- */
-static void osc_lock_detach(const struct lu_env *env, struct osc_lock *olck)
-{
- struct ldlm_lock *dlmlock;
-
- spin_lock(&osc_ast_guard);
- dlmlock = olck->ols_lock;
- if (!dlmlock) {
- spin_unlock(&osc_ast_guard);
- return;
- }
-
- olck->ols_lock = NULL;
- /* wb(); --- for all who checks (ols->ols_lock != NULL) before
- * call to osc_lock_detach()
- */
- dlmlock->l_ast_data = NULL;
- olck->ols_handle.cookie = 0ULL;
- spin_unlock(&osc_ast_guard);
-
- lock_res_and_lock(dlmlock);
- if (dlmlock->l_granted_mode == dlmlock->l_req_mode) {
- struct cl_object *obj = olck->ols_cl.cls_obj;
- struct cl_attr *attr = &osc_env_info(env)->oti_attr;
- __u64 old_kms;
-
- cl_object_attr_lock(obj);
- /* Must get the value under the lock to avoid possible races. */
- old_kms = cl2osc(obj)->oo_oinfo->loi_kms;
- /* Update the kms. Need to loop all granted locks.
- * Not a problem for the client
- */
- attr->cat_kms = ldlm_extent_shift_kms(dlmlock, old_kms);
-
- cl_object_attr_set(env, obj, attr, CAT_KMS);
- cl_object_attr_unlock(obj);
- }
- unlock_res_and_lock(dlmlock);
-
- /* release a reference taken in osc_lock_upcall0(). */
- LASSERT(olck->ols_has_ref);
- lu_ref_del(&dlmlock->l_reference, "osc_lock", olck);
- LDLM_LOCK_RELEASE(dlmlock);
- olck->ols_has_ref = 0;
-}
-
-static int osc_lock_unhold(struct osc_lock *ols)
-{
- int result = 0;
-
- if (ols->ols_hold) {
- ols->ols_hold = 0;
- result = osc_cancel_base(&ols->ols_handle,
- ols->ols_einfo.ei_mode);
- }
- return result;
-}
-
-static int osc_lock_unuse(const struct lu_env *env,
- const struct cl_lock_slice *slice)
-{
- struct osc_lock *ols = cl2osc_lock(slice);
-
- LINVRNT(osc_lock_invariant(ols));
-
- switch (ols->ols_state) {
- case OLS_NEW:
- LASSERT(!ols->ols_hold);
- LASSERT(ols->ols_agl);
- return 0;
- case OLS_UPCALL_RECEIVED:
- osc_lock_unhold(ols);
- case OLS_ENQUEUED:
- LASSERT(!ols->ols_hold);
- osc_lock_detach(env, ols);
- ols->ols_state = OLS_NEW;
- return 0;
- case OLS_GRANTED:
- LASSERT(!ols->ols_glimpse);
- LASSERT(ols->ols_hold);
- /*
- * Move lock into OLS_RELEASED state before calling
- * osc_cancel_base() so that possible synchronous cancellation
- * sees that lock is released.
- */
- ols->ols_state = OLS_RELEASED;
- return osc_lock_unhold(ols);
- default:
- CERROR("Impossible state: %d\n", ols->ols_state);
- LBUG();
- }
-}
-
static void osc_lock_fini(const struct lu_env *env,
struct cl_lock_slice *slice)
{
struct osc_lock *ols = cl2osc_lock(slice);
LINVRNT(osc_lock_invariant(ols));
- /*
- * ->ols_hold can still be true at this point if, for example, a
- * thread that requested a lock was killed (and released a reference
- * to the lock), before reply from a server was received. In this case
- * lock is destroyed immediately after upcall.
- */
- osc_lock_unhold(ols);
- LASSERT(!ols->ols_lock);
- LASSERT(atomic_read(&ols->ols_pageref) == 0 ||
- atomic_read(&ols->ols_pageref) == _PAGEREF_MAGIC);
+ LASSERT(!ols->ols_dlmlock);
kmem_cache_free(osc_lock_kmem, ols);
}
@@ -275,55 +165,12 @@ static __u64 osc_enq2ldlm_flags(__u32 enqflags)
result |= LDLM_FL_HAS_INTENT;
if (enqflags & CEF_DISCARD_DATA)
result |= LDLM_FL_AST_DISCARD_DATA;
+ if (enqflags & CEF_PEEK)
+ result |= LDLM_FL_TEST_LOCK;
return result;
}
/**
- * Global spin-lock protecting consistency of ldlm_lock::l_ast_data
- * pointers. Initialized in osc_init().
- */
-spinlock_t osc_ast_guard;
-
-static struct osc_lock *osc_ast_data_get(struct ldlm_lock *dlm_lock)
-{
- struct osc_lock *olck;
-
- lock_res_and_lock(dlm_lock);
- spin_lock(&osc_ast_guard);
- olck = dlm_lock->l_ast_data;
- if (olck) {
- struct cl_lock *lock = olck->ols_cl.cls_lock;
- /*
- * If osc_lock holds a reference on ldlm lock, return it even
- * when cl_lock is in CLS_FREEING state. This way
- *
- * osc_ast_data_get(dlmlock) == NULL
- *
- * guarantees that all osc references on dlmlock were
- * released. osc_dlm_blocking_ast0() relies on that.
- */
- if (lock->cll_state < CLS_FREEING || olck->ols_has_ref) {
- cl_lock_get_trust(lock);
- lu_ref_add_atomic(&lock->cll_reference,
- "ast", current);
- } else
- olck = NULL;
- }
- spin_unlock(&osc_ast_guard);
- unlock_res_and_lock(dlm_lock);
- return olck;
-}
-
-static void osc_ast_data_put(const struct lu_env *env, struct osc_lock *olck)
-{
- struct cl_lock *lock;
-
- lock = olck->ols_cl.cls_lock;
- lu_ref_del(&lock->cll_reference, "ast", current);
- cl_lock_put(env, lock);
-}
-
-/**
* Updates object attributes from a lock value block (lvb) received together
* with the DLM lock reply from the server. Copy of osc_update_enqueue()
* logic.
@@ -333,35 +180,30 @@ static void osc_ast_data_put(const struct lu_env *env, struct osc_lock *olck)
*
* Called under lock and resource spin-locks.
*/
-static void osc_lock_lvb_update(const struct lu_env *env, struct osc_lock *olck,
- int rc)
+static void osc_lock_lvb_update(const struct lu_env *env,
+ struct osc_object *osc,
+ struct ldlm_lock *dlmlock,
+ struct ost_lvb *lvb)
{
- struct ost_lvb *lvb;
- struct cl_object *obj;
- struct lov_oinfo *oinfo;
- struct cl_attr *attr;
+ struct cl_object *obj = osc2cl(osc);
+ struct lov_oinfo *oinfo = osc->oo_oinfo;
+ struct cl_attr *attr = &osc_env_info(env)->oti_attr;
unsigned valid;
- if (!(olck->ols_flags & LDLM_FL_LVB_READY))
- return;
-
- lvb = &olck->ols_lvb;
- obj = olck->ols_cl.cls_obj;
- oinfo = cl2osc(obj)->oo_oinfo;
- attr = &osc_env_info(env)->oti_attr;
valid = CAT_BLOCKS | CAT_ATIME | CAT_CTIME | CAT_MTIME | CAT_SIZE;
+ if (!lvb)
+ lvb = dlmlock->l_lvb_data;
+
cl_lvb2attr(attr, lvb);
cl_object_attr_lock(obj);
- if (rc == 0) {
- struct ldlm_lock *dlmlock;
+ if (dlmlock) {
__u64 size;
- dlmlock = olck->ols_lock;
-
- /* re-grab LVB from a dlm lock under DLM spin-locks. */
- *lvb = *(struct ost_lvb *)dlmlock->l_lvb_data;
+ check_res_locked(dlmlock->l_resource);
+ LASSERT(lvb == dlmlock->l_lvb_data);
size = lvb->lvb_size;
+
/* Extend KMS up to the end of this lock and no further
* A lock on [x,y] means a KMS of up to y + 1 bytes!
*/
@@ -378,102 +220,67 @@ static void osc_lock_lvb_update(const struct lu_env *env, struct osc_lock *olck,
dlmlock->l_policy_data.l_extent.end);
}
ldlm_lock_allow_match_locked(dlmlock);
- } else if (rc == -ENAVAIL && olck->ols_glimpse) {
- CDEBUG(D_INODE, "glimpsed, setting rss=%llu; leaving kms=%llu\n",
- lvb->lvb_size, oinfo->loi_kms);
- } else
- valid = 0;
-
- if (valid != 0)
- cl_object_attr_set(env, obj, attr, valid);
+ }
+ cl_object_attr_set(env, obj, attr, valid);
cl_object_attr_unlock(obj);
}
-/**
- * Called when a lock is granted, from an upcall (when server returned a
- * granted lock), or from completion AST, when server returned a blocked lock.
- *
- * Called under lock and resource spin-locks, that are released temporarily
- * here.
- */
-static void osc_lock_granted(const struct lu_env *env, struct osc_lock *olck,
- struct ldlm_lock *dlmlock, int rc)
+static void osc_lock_granted(const struct lu_env *env, struct osc_lock *oscl,
+ struct lustre_handle *lockh, bool lvb_update)
{
- struct ldlm_extent *ext;
- struct cl_lock *lock;
- struct cl_lock_descr *descr;
+ struct ldlm_lock *dlmlock;
- LASSERT(dlmlock->l_granted_mode == dlmlock->l_req_mode);
+ dlmlock = ldlm_handle2lock_long(lockh, 0);
+ LASSERT(dlmlock);
- if (olck->ols_state < OLS_GRANTED) {
- lock = olck->ols_cl.cls_lock;
- ext = &dlmlock->l_policy_data.l_extent;
- descr = &osc_env_info(env)->oti_descr;
- descr->cld_obj = lock->cll_descr.cld_obj;
+ /* lock reference taken by ldlm_handle2lock_long() is
+ * owned by osc_lock and released in osc_lock_detach()
+ */
+ lu_ref_add(&dlmlock->l_reference, "osc_lock", oscl);
+ oscl->ols_has_ref = 1;
- /* XXX check that ->l_granted_mode is valid. */
- descr->cld_mode = osc_ldlm2cl_lock(dlmlock->l_granted_mode);
- descr->cld_start = cl_index(descr->cld_obj, ext->start);
- descr->cld_end = cl_index(descr->cld_obj, ext->end);
- descr->cld_gid = ext->gid;
- /*
- * tell upper layers the extent of the lock that was actually
- * granted
- */
- olck->ols_state = OLS_GRANTED;
- osc_lock_lvb_update(env, olck, rc);
-
- /* release DLM spin-locks to allow cl_lock_{modify,signal}()
- * to take a semaphore on a parent lock. This is safe, because
- * spin-locks are needed to protect consistency of
- * dlmlock->l_*_mode and LVB, and we have finished processing
- * them.
+ LASSERT(!oscl->ols_dlmlock);
+ oscl->ols_dlmlock = dlmlock;
+
+ /* This may be a matched lock for glimpse request, do not hold
+ * lock reference in that case.
+ */
+ if (!oscl->ols_glimpse) {
+ /* hold a refc for non glimpse lock which will
+ * be released in osc_lock_cancel()
*/
- unlock_res_and_lock(dlmlock);
- cl_lock_modify(env, lock, descr);
- cl_lock_signal(env, lock);
- LINVRNT(osc_lock_invariant(olck));
- lock_res_and_lock(dlmlock);
+ lustre_handle_copy(&oscl->ols_handle, lockh);
+ ldlm_lock_addref(lockh, oscl->ols_einfo.ei_mode);
+ oscl->ols_hold = 1;
}
-}
-
-static void osc_lock_upcall0(const struct lu_env *env, struct osc_lock *olck)
-
-{
- struct ldlm_lock *dlmlock;
-
- dlmlock = ldlm_handle2lock_long(&olck->ols_handle, 0);
- LASSERT(dlmlock);
+ /* Lock must have been granted. */
lock_res_and_lock(dlmlock);
- spin_lock(&osc_ast_guard);
- LASSERT(dlmlock->l_ast_data == olck);
- LASSERT(!olck->ols_lock);
- olck->ols_lock = dlmlock;
- spin_unlock(&osc_ast_guard);
+ if (dlmlock->l_granted_mode == dlmlock->l_req_mode) {
+ struct ldlm_extent *ext = &dlmlock->l_policy_data.l_extent;
+ struct cl_lock_descr *descr = &oscl->ols_cl.cls_lock->cll_descr;
- /*
- * Lock might be not yet granted. In this case, completion ast
- * (osc_ldlm_completion_ast()) comes later and finishes lock
- * granting.
- */
- if (dlmlock->l_granted_mode == dlmlock->l_req_mode)
- osc_lock_granted(env, olck, dlmlock, 0);
- unlock_res_and_lock(dlmlock);
+ /* extend the lock extent, otherwise it will have problem when
+ * we decide whether to grant a lockless lock.
+ */
+ descr->cld_mode = osc_ldlm2cl_lock(dlmlock->l_granted_mode);
+ descr->cld_start = cl_index(descr->cld_obj, ext->start);
+ descr->cld_end = cl_index(descr->cld_obj, ext->end);
+ descr->cld_gid = ext->gid;
- /*
- * osc_enqueue_interpret() decrefs asynchronous locks, counter
- * this.
- */
- ldlm_lock_addref(&olck->ols_handle, olck->ols_einfo.ei_mode);
- olck->ols_hold = 1;
+ /* no lvb update for matched lock */
+ if (lvb_update) {
+ LASSERT(oscl->ols_flags & LDLM_FL_LVB_READY);
+ osc_lock_lvb_update(env, cl2osc(oscl->ols_cl.cls_obj),
+ dlmlock, NULL);
+ }
+ LINVRNT(osc_lock_invariant(oscl));
+ }
+ unlock_res_and_lock(dlmlock);
- /* lock reference taken by ldlm_handle2lock_long() is owned by
- * osc_lock and released in osc_lock_detach()
- */
- lu_ref_add(&dlmlock->l_reference, "osc_lock", olck);
- olck->ols_has_ref = 1;
+ LASSERT(oscl->ols_state != OLS_GRANTED);
+ oscl->ols_state = OLS_GRANTED;
}
/**
@@ -481,143 +288,124 @@ static void osc_lock_upcall0(const struct lu_env *env, struct osc_lock *olck)
* received from a server, or after osc_enqueue_base() matched a local DLM
* lock.
*/
-static int osc_lock_upcall(void *cookie, int errcode)
+static int osc_lock_upcall(void *cookie, struct lustre_handle *lockh,
+ int errcode)
{
- struct osc_lock *olck = cookie;
- struct cl_lock_slice *slice = &olck->ols_cl;
- struct cl_lock *lock = slice->cls_lock;
+ struct osc_lock *oscl = cookie;
+ struct cl_lock_slice *slice = &oscl->ols_cl;
struct lu_env *env;
struct cl_env_nest nest;
+ int rc;
env = cl_env_nested_get(&nest);
- if (!IS_ERR(env)) {
- int rc;
+ /* should never happen, similar to osc_ldlm_blocking_ast(). */
+ LASSERT(!IS_ERR(env));
+
+ rc = ldlm_error2errno(errcode);
+ if (oscl->ols_state == OLS_ENQUEUED) {
+ oscl->ols_state = OLS_UPCALL_RECEIVED;
+ } else if (oscl->ols_state == OLS_CANCELLED) {
+ rc = -EIO;
+ } else {
+ CERROR("Impossible state: %d\n", oscl->ols_state);
+ LBUG();
+ }
- cl_lock_mutex_get(env, lock);
+ if (rc == 0)
+ osc_lock_granted(env, oscl, lockh, errcode == ELDLM_OK);
- LASSERT(lock->cll_state >= CLS_QUEUING);
- if (olck->ols_state == OLS_ENQUEUED) {
- olck->ols_state = OLS_UPCALL_RECEIVED;
- rc = ldlm_error2errno(errcode);
- } else if (olck->ols_state == OLS_CANCELLED) {
- rc = -EIO;
- } else {
- CERROR("Impossible state: %d\n", olck->ols_state);
- LBUG();
- }
- if (rc) {
- struct ldlm_lock *dlmlock;
-
- dlmlock = ldlm_handle2lock(&olck->ols_handle);
- if (dlmlock) {
- lock_res_and_lock(dlmlock);
- spin_lock(&osc_ast_guard);
- LASSERT(!olck->ols_lock);
- dlmlock->l_ast_data = NULL;
- olck->ols_handle.cookie = 0ULL;
- spin_unlock(&osc_ast_guard);
- ldlm_lock_fail_match_locked(dlmlock);
- unlock_res_and_lock(dlmlock);
- LDLM_LOCK_PUT(dlmlock);
- }
- } else {
- if (olck->ols_glimpse)
- olck->ols_glimpse = 0;
- osc_lock_upcall0(env, olck);
- }
+ /* Error handling, some errors are tolerable. */
+ if (oscl->ols_locklessable && rc == -EUSERS) {
+ /* This is a tolerable error, turn this lock into
+ * lockless lock.
+ */
+ osc_object_set_contended(cl2osc(slice->cls_obj));
+ LASSERT(slice->cls_ops == &osc_lock_ops);
+
+ /* Change this lock to ldlmlock-less lock. */
+ osc_lock_to_lockless(env, oscl, 1);
+ oscl->ols_state = OLS_GRANTED;
+ rc = 0;
+ } else if (oscl->ols_glimpse && rc == -ENAVAIL) {
+ LASSERT(oscl->ols_flags & LDLM_FL_LVB_READY);
+ osc_lock_lvb_update(env, cl2osc(slice->cls_obj),
+ NULL, &oscl->ols_lvb);
+ /* Hide the error. */
+ rc = 0;
+ }
- /* Error handling, some errors are tolerable. */
- if (olck->ols_locklessable && rc == -EUSERS) {
- /* This is a tolerable error, turn this lock into
- * lockless lock.
- */
- osc_object_set_contended(cl2osc(slice->cls_obj));
- LASSERT(slice->cls_ops == &osc_lock_ops);
+ if (oscl->ols_owner)
+ cl_sync_io_note(env, oscl->ols_owner, rc);
+ cl_env_nested_put(&nest, env);
- /* Change this lock to ldlmlock-less lock. */
- osc_lock_to_lockless(env, olck, 1);
- olck->ols_state = OLS_GRANTED;
- rc = 0;
- } else if (olck->ols_glimpse && rc == -ENAVAIL) {
- osc_lock_lvb_update(env, olck, rc);
- cl_lock_delete(env, lock);
- /* Hide the error. */
- rc = 0;
- }
-
- if (rc == 0) {
- /* For AGL case, the RPC sponsor may exits the cl_lock
- * processing without wait() called before related OSC
- * lock upcall(). So update the lock status according
- * to the enqueue result inside AGL upcall().
- */
- if (olck->ols_agl) {
- lock->cll_flags |= CLF_FROM_UPCALL;
- cl_wait_try(env, lock);
- lock->cll_flags &= ~CLF_FROM_UPCALL;
- if (!olck->ols_glimpse)
- olck->ols_agl = 0;
- }
- cl_lock_signal(env, lock);
- /* del user for lock upcall cookie */
- cl_unuse_try(env, lock);
- } else {
- /* del user for lock upcall cookie */
- cl_lock_user_del(env, lock);
- cl_lock_error(env, lock, rc);
- }
+ return rc;
+}
- /* release cookie reference, acquired by osc_lock_enqueue() */
- cl_lock_hold_release(env, lock, "upcall", lock);
- cl_lock_mutex_put(env, lock);
+static int osc_lock_upcall_agl(void *cookie, struct lustre_handle *lockh,
+ int errcode)
+{
+ struct osc_object *osc = cookie;
+ struct ldlm_lock *dlmlock;
+ struct lu_env *env;
+ struct cl_env_nest nest;
- lu_ref_del(&lock->cll_reference, "upcall", lock);
- /* This maybe the last reference, so must be called after
- * cl_lock_mutex_put().
- */
- cl_lock_put(env, lock);
+ env = cl_env_nested_get(&nest);
+ LASSERT(!IS_ERR(env));
- cl_env_nested_put(&nest, env);
- } else {
- /* should never happen, similar to osc_ldlm_blocking_ast(). */
- LBUG();
+ if (errcode == ELDLM_LOCK_MATCHED) {
+ errcode = ELDLM_OK;
+ goto out;
}
- return errcode;
+
+ if (errcode != ELDLM_OK)
+ goto out;
+
+ dlmlock = ldlm_handle2lock(lockh);
+ LASSERT(dlmlock);
+
+ lock_res_and_lock(dlmlock);
+ LASSERT(dlmlock->l_granted_mode == dlmlock->l_req_mode);
+
+ /* there is no osc_lock associated with AGL lock */
+ osc_lock_lvb_update(env, osc, dlmlock, NULL);
+
+ unlock_res_and_lock(dlmlock);
+ LDLM_LOCK_PUT(dlmlock);
+
+out:
+ cl_object_put(env, osc2cl(osc));
+ cl_env_nested_put(&nest, env);
+ return ldlm_error2errno(errcode);
}
-/**
- * Core of osc_dlm_blocking_ast() logic.
- */
-static void osc_lock_blocking(const struct lu_env *env,
- struct ldlm_lock *dlmlock,
- struct osc_lock *olck, int blocking)
+static int osc_lock_flush(struct osc_object *obj, pgoff_t start, pgoff_t end,
+ enum cl_lock_mode mode, int discard)
{
- struct cl_lock *lock = olck->ols_cl.cls_lock;
+ struct lu_env *env;
+ struct cl_env_nest nest;
+ int rc = 0;
+ int rc2 = 0;
- LASSERT(olck->ols_lock == dlmlock);
- CLASSERT(OLS_BLOCKED < OLS_CANCELLED);
- LASSERT(!osc_lock_is_lockless(olck));
+ env = cl_env_nested_get(&nest);
+ if (IS_ERR(env))
+ return PTR_ERR(env);
+
+ if (mode == CLM_WRITE) {
+ rc = osc_cache_writeback_range(env, obj, start, end, 1,
+ discard);
+ CDEBUG(D_CACHE, "object %p: [%lu -> %lu] %d pages were %s.\n",
+ obj, start, end, rc,
+ discard ? "discarded" : "written back");
+ if (rc > 0)
+ rc = 0;
+ }
- /*
- * Lock might be still addref-ed here, if e.g., blocking ast
- * is sent for a failed lock.
- */
- osc_lock_unhold(olck);
+ rc2 = osc_lock_discard_pages(env, obj, start, end, mode);
+ if (rc == 0 && rc2 < 0)
+ rc = rc2;
- if (blocking && olck->ols_state < OLS_BLOCKED)
- /*
- * Move osc_lock into OLS_BLOCKED before canceling the lock,
- * because it recursively re-enters osc_lock_blocking(), with
- * the state set to OLS_CANCELLED.
- */
- olck->ols_state = OLS_BLOCKED;
- /*
- * cancel and destroy lock at least once no matter how blocking ast is
- * entered (see comment above osc_ldlm_blocking_ast() for use
- * cases). cl_lock_cancel() and cl_lock_delete() are idempotent.
- */
- cl_lock_cancel(env, lock);
- cl_lock_delete(env, lock);
+ cl_env_nested_put(&nest, env);
+ return rc;
}
/**
@@ -628,65 +416,63 @@ static int osc_dlm_blocking_ast0(const struct lu_env *env,
struct ldlm_lock *dlmlock,
void *data, int flag)
{
- struct osc_lock *olck;
- struct cl_lock *lock;
- int result;
- int cancel;
-
- LASSERT(flag == LDLM_CB_BLOCKING || flag == LDLM_CB_CANCELING);
-
- cancel = 0;
- olck = osc_ast_data_get(dlmlock);
- if (olck) {
- lock = olck->ols_cl.cls_lock;
- cl_lock_mutex_get(env, lock);
- LINVRNT(osc_lock_invariant(olck));
- if (olck->ols_ast_wait) {
- /* wake up osc_lock_use() */
- cl_lock_signal(env, lock);
- olck->ols_ast_wait = 0;
- }
- /*
- * Lock might have been canceled while this thread was
- * sleeping for lock mutex, but olck is pinned in memory.
- */
- if (olck == dlmlock->l_ast_data) {
- /*
- * NOTE: DLM sends blocking AST's for failed locks
- * (that are still in pre-OLS_GRANTED state)
- * too, and they have to be canceled otherwise
- * DLM lock is never destroyed and stuck in
- * the memory.
- *
- * Alternatively, ldlm_cli_cancel() can be
- * called here directly for osc_locks with
- * ols_state < OLS_GRANTED to maintain an
- * invariant that ->clo_cancel() is only called
- * for locks that were granted.
- */
- LASSERT(data == olck);
- osc_lock_blocking(env, dlmlock,
- olck, flag == LDLM_CB_BLOCKING);
- } else
- cancel = 1;
- cl_lock_mutex_put(env, lock);
- osc_ast_data_put(env, olck);
- } else
- /*
- * DLM lock exists, but there is no cl_lock attached to it.
- * This is a `normal' race. cl_object and its cl_lock's can be
- * removed by memory pressure, together with all pages.
+ struct cl_object *obj = NULL;
+ int result = 0;
+ int discard;
+ enum cl_lock_mode mode = CLM_READ;
+
+ LASSERT(flag == LDLM_CB_CANCELING);
+
+ lock_res_and_lock(dlmlock);
+ if (dlmlock->l_granted_mode != dlmlock->l_req_mode) {
+ dlmlock->l_ast_data = NULL;
+ unlock_res_and_lock(dlmlock);
+ return 0;
+ }
+
+ discard = ldlm_is_discard_data(dlmlock);
+ if (dlmlock->l_granted_mode & (LCK_PW | LCK_GROUP))
+ mode = CLM_WRITE;
+
+ if (dlmlock->l_ast_data) {
+ obj = osc2cl(dlmlock->l_ast_data);
+ dlmlock->l_ast_data = NULL;
+
+ cl_object_get(obj);
+ }
+
+ unlock_res_and_lock(dlmlock);
+
+ /* if l_ast_data is NULL, the dlmlock was enqueued by AGL or
+ * the object has been destroyed.
+ */
+ if (obj) {
+ struct ldlm_extent *extent = &dlmlock->l_policy_data.l_extent;
+ struct cl_attr *attr = &osc_env_info(env)->oti_attr;
+ __u64 old_kms;
+
+ /* Destroy pages covered by the extent of the DLM lock */
+ result = osc_lock_flush(cl2osc(obj),
+ cl_index(obj, extent->start),
+ cl_index(obj, extent->end),
+ mode, discard);
+
+ /* losing a lock, update kms */
+ lock_res_and_lock(dlmlock);
+ cl_object_attr_lock(obj);
+ /* Must get the value under the lock to avoid race. */
+ old_kms = cl2osc(obj)->oo_oinfo->loi_kms;
+ /* Update the kms. Need to loop all granted locks.
+ * Not a problem for the client
*/
- cancel = (flag == LDLM_CB_BLOCKING);
+ attr->cat_kms = ldlm_extent_shift_kms(dlmlock, old_kms);
- if (cancel) {
- struct lustre_handle *lockh;
+ cl_object_attr_set(env, obj, attr, CAT_KMS);
+ cl_object_attr_unlock(obj);
+ unlock_res_and_lock(dlmlock);
- lockh = &osc_env_info(env)->oti_handle;
- ldlm_lock2handle(dlmlock, lockh);
- result = ldlm_cli_cancel(lockh, LCF_ASYNC);
- } else
- result = 0;
+ cl_object_put(env, obj);
+ }
return result;
}
@@ -736,107 +522,52 @@ static int osc_ldlm_blocking_ast(struct ldlm_lock *dlmlock,
struct ldlm_lock_desc *new, void *data,
int flag)
{
- struct lu_env *env;
- struct cl_env_nest nest;
- int result;
+ int result = 0;
- /*
- * This can be called in the context of outer IO, e.g.,
- *
- * cl_enqueue()->...
- * ->osc_enqueue_base()->...
- * ->ldlm_prep_elc_req()->...
- * ->ldlm_cancel_callback()->...
- * ->osc_ldlm_blocking_ast()
- *
- * new environment has to be created to not corrupt outer context.
- */
- env = cl_env_nested_get(&nest);
- if (!IS_ERR(env)) {
- result = osc_dlm_blocking_ast0(env, dlmlock, data, flag);
- cl_env_nested_put(&nest, env);
- } else {
- result = PTR_ERR(env);
- /*
- * XXX This should never happen, as cl_lock is
- * stuck. Pre-allocated environment a la vvp_inode_fini_env
- * should be used.
- */
- LBUG();
- }
- if (result != 0) {
+ switch (flag) {
+ case LDLM_CB_BLOCKING: {
+ struct lustre_handle lockh;
+
+ ldlm_lock2handle(dlmlock, &lockh);
+ result = ldlm_cli_cancel(&lockh, LCF_ASYNC);
if (result == -ENODATA)
result = 0;
- else
- CERROR("BAST failed: %d\n", result);
+ break;
}
- return result;
-}
+ case LDLM_CB_CANCELING: {
+ struct lu_env *env;
+ struct cl_env_nest nest;
-static int osc_ldlm_completion_ast(struct ldlm_lock *dlmlock,
- __u64 flags, void *data)
-{
- struct cl_env_nest nest;
- struct lu_env *env;
- struct osc_lock *olck;
- struct cl_lock *lock;
- int result;
- int dlmrc;
-
- /* first, do dlm part of the work */
- dlmrc = ldlm_completion_ast_async(dlmlock, flags, data);
- /* then, notify cl_lock */
- env = cl_env_nested_get(&nest);
- if (!IS_ERR(env)) {
- olck = osc_ast_data_get(dlmlock);
- if (olck) {
- lock = olck->ols_cl.cls_lock;
- cl_lock_mutex_get(env, lock);
- /*
- * ldlm_handle_cp_callback() copied LVB from request
- * to lock->l_lvb_data, store it in osc_lock.
- */
- LASSERT(dlmlock->l_lvb_data);
- lock_res_and_lock(dlmlock);
- olck->ols_lvb = *(struct ost_lvb *)dlmlock->l_lvb_data;
- if (!olck->ols_lock) {
- /*
- * upcall (osc_lock_upcall()) hasn't yet been
- * called. Do nothing now, upcall will bind
- * olck to dlmlock and signal the waiters.
- *
- * This maintains an invariant that osc_lock
- * and ldlm_lock are always bound when
- * osc_lock is in OLS_GRANTED state.
- */
- } else if (dlmlock->l_granted_mode ==
- dlmlock->l_req_mode) {
- osc_lock_granted(env, olck, dlmlock, dlmrc);
- }
- unlock_res_and_lock(dlmlock);
+ /*
+ * This can be called in the context of outer IO, e.g.,
+ *
+ * osc_enqueue_base()->...
+ * ->ldlm_prep_elc_req()->...
+ * ->ldlm_cancel_callback()->...
+ * ->osc_ldlm_blocking_ast()
+ *
+ * new environment has to be created to not corrupt outer
+ * context.
+ */
+ env = cl_env_nested_get(&nest);
+ if (IS_ERR(env)) {
+ result = PTR_ERR(env);
+ break;
+ }
- if (dlmrc != 0) {
- CL_LOCK_DEBUG(D_ERROR, env, lock,
- "dlmlock returned %d\n", dlmrc);
- cl_lock_error(env, lock, dlmrc);
- }
- cl_lock_mutex_put(env, lock);
- osc_ast_data_put(env, olck);
- result = 0;
- } else
- result = -ELDLM_NO_LOCK_DATA;
+ result = osc_dlm_blocking_ast0(env, dlmlock, data, flag);
cl_env_nested_put(&nest, env);
- } else
- result = PTR_ERR(env);
- return dlmrc ?: result;
+ break;
+ }
+ default:
+ LBUG();
+ }
+ return result;
}
static int osc_ldlm_glimpse_ast(struct ldlm_lock *dlmlock, void *data)
{
struct ptlrpc_request *req = data;
- struct osc_lock *olck;
- struct cl_lock *lock;
- struct cl_object *obj;
struct cl_env_nest nest;
struct lu_env *env;
struct ost_lvb *lvb;
@@ -847,14 +578,16 @@ static int osc_ldlm_glimpse_ast(struct ldlm_lock *dlmlock, void *data)
env = cl_env_nested_get(&nest);
if (!IS_ERR(env)) {
- /* osc_ast_data_get() has to go after environment is
- * allocated, because osc_ast_data() acquires a
- * reference to a lock, and it can only be released in
- * environment.
- */
- olck = osc_ast_data_get(dlmlock);
- if (olck) {
- lock = olck->ols_cl.cls_lock;
+ struct cl_object *obj = NULL;
+
+ lock_res_and_lock(dlmlock);
+ if (dlmlock->l_ast_data) {
+ obj = osc2cl(dlmlock->l_ast_data);
+ cl_object_get(obj);
+ }
+ unlock_res_and_lock(dlmlock);
+
+ if (obj) {
/* Do not grab the mutex of cl_lock for glimpse.
* See LU-1274 for details.
* BTW, it's okay for cl_lock to be cancelled during
@@ -869,7 +602,6 @@ static int osc_ldlm_glimpse_ast(struct ldlm_lock *dlmlock, void *data)
result = req_capsule_server_pack(cap);
if (result == 0) {
lvb = req_capsule_server_get(cap, &RMF_DLM_LVB);
- obj = lock->cll_descr.cld_obj;
result = cl_object_glimpse(env, obj, lvb);
}
if (!exp_connect_lvb_type(req->rq_export))
@@ -877,7 +609,7 @@ static int osc_ldlm_glimpse_ast(struct ldlm_lock *dlmlock, void *data)
&RMF_DLM_LVB,
sizeof(struct ost_lvb_v1),
RCL_SERVER);
- osc_ast_data_put(env, olck);
+ cl_object_put(env, obj);
} else {
/*
* These errors are normal races, so we don't want to
@@ -888,44 +620,123 @@ static int osc_ldlm_glimpse_ast(struct ldlm_lock *dlmlock, void *data)
result = -ELDLM_NO_LOCK_DATA;
}
cl_env_nested_put(&nest, env);
- } else
+ } else {
result = PTR_ERR(env);
+ }
req->rq_status = result;
return result;
}
-static unsigned long osc_lock_weigh(const struct lu_env *env,
- const struct cl_lock_slice *slice)
+static int weigh_cb(const struct lu_env *env, struct cl_io *io,
+ struct osc_page *ops, void *cbdata)
{
- /*
- * don't need to grab coh_page_guard since we don't care the exact #
- * of pages..
- */
- return cl_object_header(slice->cls_obj)->coh_pages;
+ struct cl_page *page = ops->ops_cl.cpl_page;
+
+ if (cl_page_is_vmlocked(env, page) ||
+ PageDirty(page->cp_vmpage) || PageWriteback(page->cp_vmpage)
+ )
+ return CLP_GANG_ABORT;
+
+ *(pgoff_t *)cbdata = osc_index(ops) + 1;
+ return CLP_GANG_OKAY;
}
-static void osc_lock_build_einfo(const struct lu_env *env,
- const struct cl_lock *clock,
- struct osc_lock *lock,
- struct ldlm_enqueue_info *einfo)
+static unsigned long osc_lock_weight(const struct lu_env *env,
+ struct osc_object *oscobj,
+ struct ldlm_extent *extent)
+{
+ struct cl_io *io = &osc_env_info(env)->oti_io;
+ struct cl_object *obj = cl_object_top(&oscobj->oo_cl);
+ pgoff_t page_index;
+ int result;
+
+ io->ci_obj = obj;
+ io->ci_ignore_layout = 1;
+ result = cl_io_init(env, io, CIT_MISC, io->ci_obj);
+ if (result != 0)
+ return result;
+
+ page_index = cl_index(obj, extent->start);
+ do {
+ result = osc_page_gang_lookup(env, io, oscobj,
+ page_index,
+ cl_index(obj, extent->end),
+ weigh_cb, (void *)&page_index);
+ if (result == CLP_GANG_ABORT)
+ break;
+ if (result == CLP_GANG_RESCHED)
+ cond_resched();
+ } while (result != CLP_GANG_OKAY);
+ cl_io_fini(env, io);
+
+ return result == CLP_GANG_ABORT ? 1 : 0;
+}
+
+/**
+ * Get the weight of dlm lock for early cancellation.
+ */
+unsigned long osc_ldlm_weigh_ast(struct ldlm_lock *dlmlock)
{
- enum cl_lock_mode mode;
+ struct cl_env_nest nest;
+ struct lu_env *env;
+ struct osc_object *obj;
+ struct osc_lock *oscl;
+ unsigned long weight;
+ bool found = false;
+
+ might_sleep();
+ /*
+ * osc_ldlm_weigh_ast has a complex context since it might be called
+ * because of lock canceling, or from user's input. We have to make
+ * a new environment for it. Probably it is implementation safe to use
+ * the upper context because cl_lock_put don't modify environment
+ * variables. But just in case ..
+ */
+ env = cl_env_nested_get(&nest);
+ if (IS_ERR(env))
+ /* Mostly because lack of memory, do not eliminate this lock */
+ return 1;
- mode = clock->cll_descr.cld_mode;
- if (mode == CLM_PHANTOM)
+ LASSERT(dlmlock->l_resource->lr_type == LDLM_EXTENT);
+ obj = dlmlock->l_ast_data;
+ if (!obj) {
+ weight = 1;
+ goto out;
+ }
+
+ spin_lock(&obj->oo_ol_spin);
+ list_for_each_entry(oscl, &obj->oo_ol_list, ols_nextlock_oscobj) {
+ if (oscl->ols_dlmlock && oscl->ols_dlmlock != dlmlock)
+ continue;
+ found = true;
+ }
+ spin_unlock(&obj->oo_ol_spin);
+ if (found) {
/*
- * For now, enqueue all glimpse locks in read mode. In the
- * future, client might choose to enqueue LCK_PW lock for
- * glimpse on a file opened for write.
+ * If the lock is being used by an IO, definitely not cancel it.
*/
- mode = CLM_READ;
+ weight = 1;
+ goto out;
+ }
+
+ weight = osc_lock_weight(env, obj, &dlmlock->l_policy_data.l_extent);
+
+out:
+ cl_env_nested_put(&nest, env);
+ return weight;
+}
+static void osc_lock_build_einfo(const struct lu_env *env,
+ const struct cl_lock *lock,
+ struct osc_object *osc,
+ struct ldlm_enqueue_info *einfo)
+{
einfo->ei_type = LDLM_EXTENT;
- einfo->ei_mode = osc_cl_lock2ldlm(mode);
+ einfo->ei_mode = osc_cl_lock2ldlm(lock->cll_descr.cld_mode);
einfo->ei_cb_bl = osc_ldlm_blocking_ast;
- einfo->ei_cb_cp = osc_ldlm_completion_ast;
+ einfo->ei_cb_cp = ldlm_completion_ast;
einfo->ei_cb_gl = osc_ldlm_glimpse_ast;
- einfo->ei_cbdata = lock; /* value to be put into ->l_ast_data */
+ einfo->ei_cbdata = osc; /* value to be put into ->l_ast_data */
}
/**
@@ -981,113 +792,100 @@ static void osc_lock_to_lockless(const struct lu_env *env,
LASSERT(ergo(ols->ols_glimpse, !osc_lock_is_lockless(ols)));
}
-static int osc_lock_compatible(const struct osc_lock *qing,
- const struct osc_lock *qed)
+static bool osc_lock_compatible(const struct osc_lock *qing,
+ const struct osc_lock *qed)
{
- enum cl_lock_mode qing_mode;
- enum cl_lock_mode qed_mode;
+ struct cl_lock_descr *qed_descr = &qed->ols_cl.cls_lock->cll_descr;
+ struct cl_lock_descr *qing_descr = &qing->ols_cl.cls_lock->cll_descr;
- qing_mode = qing->ols_cl.cls_lock->cll_descr.cld_mode;
- if (qed->ols_glimpse &&
- (qed->ols_state >= OLS_UPCALL_RECEIVED || qing_mode == CLM_READ))
- return 1;
+ if (qed->ols_glimpse)
+ return true;
+
+ if (qing_descr->cld_mode == CLM_READ && qed_descr->cld_mode == CLM_READ)
+ return true;
+
+ if (qed->ols_state < OLS_GRANTED)
+ return true;
+
+ if (qed_descr->cld_mode >= qing_descr->cld_mode &&
+ qed_descr->cld_start <= qing_descr->cld_start &&
+ qed_descr->cld_end >= qing_descr->cld_end)
+ return true;
- qed_mode = qed->ols_cl.cls_lock->cll_descr.cld_mode;
- return ((qing_mode == CLM_READ) && (qed_mode == CLM_READ));
+ return false;
}
-/**
- * Cancel all conflicting locks and wait for them to be destroyed.
- *
- * This function is used for two purposes:
- *
- * - early cancel all conflicting locks before starting IO, and
- *
- * - guarantee that pages added to the page cache by lockless IO are never
- * covered by locks other than lockless IO lock, and, hence, are not
- * visible to other threads.
- */
-static int osc_lock_enqueue_wait(const struct lu_env *env,
- const struct osc_lock *olck)
+static void osc_lock_wake_waiters(const struct lu_env *env,
+ struct osc_object *osc,
+ struct osc_lock *oscl)
{
- struct cl_lock *lock = olck->ols_cl.cls_lock;
- struct cl_lock_descr *descr = &lock->cll_descr;
- struct cl_object_header *hdr = cl_object_header(descr->cld_obj);
- struct cl_lock *scan;
- struct cl_lock *conflict = NULL;
- int lockless = osc_lock_is_lockless(olck);
- int rc = 0;
+ spin_lock(&osc->oo_ol_spin);
+ list_del_init(&oscl->ols_nextlock_oscobj);
+ spin_unlock(&osc->oo_ol_spin);
- LASSERT(cl_lock_is_mutexed(lock));
+ spin_lock(&oscl->ols_lock);
+ while (!list_empty(&oscl->ols_waiting_list)) {
+ struct osc_lock *scan;
- /* make it enqueue anyway for glimpse lock, because we actually
- * don't need to cancel any conflicting locks.
- */
- if (olck->ols_glimpse)
- return 0;
+ scan = list_entry(oscl->ols_waiting_list.next, struct osc_lock,
+ ols_wait_entry);
+ list_del_init(&scan->ols_wait_entry);
- spin_lock(&hdr->coh_lock_guard);
- list_for_each_entry(scan, &hdr->coh_locks, cll_linkage) {
- struct cl_lock_descr *cld = &scan->cll_descr;
- const struct osc_lock *scan_ols;
+ cl_sync_io_note(env, scan->ols_owner, 0);
+ }
+ spin_unlock(&oscl->ols_lock);
+}
+
+static void osc_lock_enqueue_wait(const struct lu_env *env,
+ struct osc_object *obj,
+ struct osc_lock *oscl)
+{
+ struct osc_lock *tmp_oscl;
+ struct cl_lock_descr *need = &oscl->ols_cl.cls_lock->cll_descr;
+ struct cl_sync_io *waiter = &osc_env_info(env)->oti_anchor;
- if (scan == lock)
+ spin_lock(&obj->oo_ol_spin);
+ list_add_tail(&oscl->ols_nextlock_oscobj, &obj->oo_ol_list);
+
+restart:
+ list_for_each_entry(tmp_oscl, &obj->oo_ol_list,
+ ols_nextlock_oscobj) {
+ struct cl_lock_descr *descr;
+
+ if (tmp_oscl == oscl)
break;
- if (scan->cll_state < CLS_QUEUING ||
- scan->cll_state == CLS_FREEING ||
- cld->cld_start > descr->cld_end ||
- cld->cld_end < descr->cld_start)
+ descr = &tmp_oscl->ols_cl.cls_lock->cll_descr;
+ if (descr->cld_start > need->cld_end ||
+ descr->cld_end < need->cld_start)
continue;
- /* overlapped and living locks. */
+ /* We're not supposed to give up group lock */
+ if (descr->cld_mode == CLM_GROUP)
+ break;
- /* We're not supposed to give up group lock. */
- if (scan->cll_descr.cld_mode == CLM_GROUP) {
- LASSERT(descr->cld_mode != CLM_GROUP ||
- descr->cld_gid != scan->cll_descr.cld_gid);
+ if (!osc_lock_is_lockless(oscl) &&
+ osc_lock_compatible(oscl, tmp_oscl))
continue;
- }
- scan_ols = osc_lock_at(scan);
+ /* wait for conflicting lock to be canceled */
+ cl_sync_io_init(waiter, 1, cl_sync_io_end);
+ oscl->ols_owner = waiter;
- /* We need to cancel the compatible locks if we're enqueuing
- * a lockless lock, for example:
- * imagine that client has PR lock on [0, 1000], and thread T0
- * is doing lockless IO in [500, 1500] region. Concurrent
- * thread T1 can see lockless data in [500, 1000], which is
- * wrong, because these data are possibly stale.
- */
- if (!lockless && osc_lock_compatible(olck, scan_ols))
- continue;
+ spin_lock(&tmp_oscl->ols_lock);
+ /* add oscl into tmp's ols_waiting list */
+ list_add_tail(&oscl->ols_wait_entry,
+ &tmp_oscl->ols_waiting_list);
+ spin_unlock(&tmp_oscl->ols_lock);
- cl_lock_get_trust(scan);
- conflict = scan;
- break;
- }
- spin_unlock(&hdr->coh_lock_guard);
+ spin_unlock(&obj->oo_ol_spin);
+ (void)cl_sync_io_wait(env, waiter, 0);
- if (conflict) {
- if (lock->cll_descr.cld_mode == CLM_GROUP) {
- /* we want a group lock but a previous lock request
- * conflicts, we do not wait but return 0 so the
- * request is send to the server
- */
- CDEBUG(D_DLMTRACE, "group lock %p is conflicted with %p, no wait, send to server\n",
- lock, conflict);
- cl_lock_put(env, conflict);
- rc = 0;
- } else {
- CDEBUG(D_DLMTRACE, "lock %p is conflicted with %p, will wait\n",
- lock, conflict);
- LASSERT(!lock->cll_conflict);
- lu_ref_add(&conflict->cll_reference, "cancel-wait",
- lock);
- lock->cll_conflict = conflict;
- rc = CLO_WAIT;
- }
+ spin_lock(&obj->oo_ol_spin);
+ oscl->ols_owner = NULL;
+ goto restart;
}
- return rc;
+ spin_unlock(&obj->oo_ol_spin);
}
/**
@@ -1106,188 +904,122 @@ static int osc_lock_enqueue_wait(const struct lu_env *env,
*/
static int osc_lock_enqueue(const struct lu_env *env,
const struct cl_lock_slice *slice,
- struct cl_io *unused, __u32 enqflags)
+ struct cl_io *unused, struct cl_sync_io *anchor)
{
- struct osc_lock *ols = cl2osc_lock(slice);
- struct cl_lock *lock = ols->ols_cl.cls_lock;
+ struct osc_thread_info *info = osc_env_info(env);
+ struct osc_io *oio = osc_env_io(env);
+ struct osc_object *osc = cl2osc(slice->cls_obj);
+ struct osc_lock *oscl = cl2osc_lock(slice);
+ struct cl_lock *lock = slice->cls_lock;
+ struct ldlm_res_id *resname = &info->oti_resname;
+ ldlm_policy_data_t *policy = &info->oti_policy;
+ osc_enqueue_upcall_f upcall = osc_lock_upcall;
+ void *cookie = oscl;
+ bool async = false;
int result;
- LASSERT(cl_lock_is_mutexed(lock));
- LASSERTF(ols->ols_state == OLS_NEW,
- "Impossible state: %d\n", ols->ols_state);
-
- LASSERTF(ergo(ols->ols_glimpse, lock->cll_descr.cld_mode <= CLM_READ),
- "lock = %p, ols = %p\n", lock, ols);
+ LASSERTF(ergo(oscl->ols_glimpse, lock->cll_descr.cld_mode <= CLM_READ),
+ "lock = %p, ols = %p\n", lock, oscl);
- result = osc_lock_enqueue_wait(env, ols);
- if (result == 0) {
- if (!osc_lock_is_lockless(ols)) {
- struct osc_object *obj = cl2osc(slice->cls_obj);
- struct osc_thread_info *info = osc_env_info(env);
- struct ldlm_res_id *resname = &info->oti_resname;
- ldlm_policy_data_t *policy = &info->oti_policy;
- struct ldlm_enqueue_info *einfo = &ols->ols_einfo;
+ if (oscl->ols_state == OLS_GRANTED)
+ return 0;
- /* lock will be passed as upcall cookie,
- * hold ref to prevent to be released.
- */
- cl_lock_hold_add(env, lock, "upcall", lock);
- /* a user for lock also */
- cl_lock_user_add(env, lock);
- ols->ols_state = OLS_ENQUEUED;
+ if (oscl->ols_flags & LDLM_FL_TEST_LOCK)
+ goto enqueue_base;
- /*
- * XXX: this is possible blocking point as
- * ldlm_lock_match(LDLM_FL_LVB_READY) waits for
- * LDLM_CP_CALLBACK.
- */
- ostid_build_res_name(&obj->oo_oinfo->loi_oi, resname);
- osc_lock_build_policy(env, lock, policy);
- result = osc_enqueue_base(osc_export(obj), resname,
- &ols->ols_flags, policy,
- &ols->ols_lvb,
- obj->oo_oinfo->loi_kms_valid,
- osc_lock_upcall,
- ols, einfo, &ols->ols_handle,
- PTLRPCD_SET, 1, ols->ols_agl);
- if (result != 0) {
- cl_lock_user_del(env, lock);
- cl_lock_unhold(env, lock, "upcall", lock);
- if (unlikely(result == -ECANCELED)) {
- ols->ols_state = OLS_NEW;
- result = 0;
- }
- }
- } else {
- ols->ols_state = OLS_GRANTED;
- ols->ols_owner = osc_env_io(env);
- }
+ if (oscl->ols_glimpse) {
+ LASSERT(equi(oscl->ols_agl, !anchor));
+ async = true;
+ goto enqueue_base;
}
- LASSERT(ergo(ols->ols_glimpse, !osc_lock_is_lockless(ols)));
- return result;
-}
-static int osc_lock_wait(const struct lu_env *env,
- const struct cl_lock_slice *slice)
-{
- struct osc_lock *olck = cl2osc_lock(slice);
- struct cl_lock *lock = olck->ols_cl.cls_lock;
-
- LINVRNT(osc_lock_invariant(olck));
-
- if (olck->ols_glimpse && olck->ols_state >= OLS_UPCALL_RECEIVED) {
- if (olck->ols_flags & LDLM_FL_LVB_READY) {
- return 0;
- } else if (olck->ols_agl) {
- if (lock->cll_flags & CLF_FROM_UPCALL)
- /* It is from enqueue RPC reply upcall for
- * updating state. Do not re-enqueue.
- */
- return -ENAVAIL;
- olck->ols_state = OLS_NEW;
- } else {
- LASSERT(lock->cll_error);
- return lock->cll_error;
- }
+ osc_lock_enqueue_wait(env, osc, oscl);
+
+ /* we can grant lockless lock right after all conflicting locks
+ * are canceled.
+ */
+ if (osc_lock_is_lockless(oscl)) {
+ oscl->ols_state = OLS_GRANTED;
+ oio->oi_lockless = 1;
+ return 0;
}
- if (olck->ols_state == OLS_NEW) {
- int rc;
-
- LASSERT(olck->ols_agl);
- olck->ols_agl = 0;
- olck->ols_flags &= ~LDLM_FL_BLOCK_NOWAIT;
- rc = osc_lock_enqueue(env, slice, NULL, CEF_ASYNC | CEF_MUST);
- if (rc != 0)
- return rc;
- else
- return CLO_REENQUEUED;
+enqueue_base:
+ oscl->ols_state = OLS_ENQUEUED;
+ if (anchor) {
+ atomic_inc(&anchor->csi_sync_nr);
+ oscl->ols_owner = anchor;
}
- LASSERT(equi(olck->ols_state >= OLS_UPCALL_RECEIVED &&
- lock->cll_error == 0, olck->ols_lock));
+ /**
+ * DLM lock's ast data must be osc_object;
+ * if glimpse or AGL lock, async of osc_enqueue_base() must be true,
+ * DLM's enqueue callback set to osc_lock_upcall() with cookie as
+ * osc_lock.
+ */
+ ostid_build_res_name(&osc->oo_oinfo->loi_oi, resname);
+ osc_lock_build_einfo(env, lock, osc, &oscl->ols_einfo);
+ osc_lock_build_policy(env, lock, policy);
+ if (oscl->ols_agl) {
+ oscl->ols_einfo.ei_cbdata = NULL;
+ /* hold a reference for callback */
+ cl_object_get(osc2cl(osc));
+ upcall = osc_lock_upcall_agl;
+ cookie = osc;
+ }
+ result = osc_enqueue_base(osc_export(osc), resname, &oscl->ols_flags,
+ policy, &oscl->ols_lvb,
+ osc->oo_oinfo->loi_kms_valid,
+ upcall, cookie,
+ &oscl->ols_einfo, PTLRPCD_SET, async,
+ oscl->ols_agl);
+ if (result != 0) {
+ oscl->ols_state = OLS_CANCELLED;
+ osc_lock_wake_waiters(env, osc, oscl);
- return lock->cll_error ?: olck->ols_state >= OLS_GRANTED ? 0 : CLO_WAIT;
+ /* hide error for AGL lock. */
+ if (oscl->ols_agl) {
+ cl_object_put(env, osc2cl(osc));
+ result = 0;
+ }
+ if (anchor)
+ cl_sync_io_note(env, anchor, result);
+ } else {
+ if (osc_lock_is_lockless(oscl)) {
+ oio->oi_lockless = 1;
+ } else if (!async) {
+ LASSERT(oscl->ols_state == OLS_GRANTED);
+ LASSERT(oscl->ols_hold);
+ LASSERT(oscl->ols_dlmlock);
+ }
+ }
+ return result;
}
/**
- * An implementation of cl_lock_operations::clo_use() method that pins cached
- * lock.
+ * Breaks a link between osc_lock and dlm_lock.
*/
-static int osc_lock_use(const struct lu_env *env,
- const struct cl_lock_slice *slice)
+static void osc_lock_detach(const struct lu_env *env, struct osc_lock *olck)
{
- struct osc_lock *olck = cl2osc_lock(slice);
- int rc;
-
- LASSERT(!olck->ols_hold);
+ struct ldlm_lock *dlmlock;
- /*
- * Atomically check for LDLM_FL_CBPENDING and addref a lock if this
- * flag is not set. This protects us from a concurrent blocking ast.
- */
- rc = ldlm_lock_addref_try(&olck->ols_handle, olck->ols_einfo.ei_mode);
- if (rc == 0) {
- olck->ols_hold = 1;
- olck->ols_state = OLS_GRANTED;
- } else {
- struct cl_lock *lock;
+ dlmlock = olck->ols_dlmlock;
+ if (!dlmlock)
+ return;
- /*
- * Lock is being cancelled somewhere within
- * ldlm_handle_bl_callback(): LDLM_FL_CBPENDING is already
- * set, but osc_ldlm_blocking_ast() hasn't yet acquired
- * cl_lock mutex.
- */
- lock = slice->cls_lock;
- LASSERT(lock->cll_state == CLS_INTRANSIT);
- LASSERT(lock->cll_users > 0);
- /* set a flag for osc_dlm_blocking_ast0() to signal the
- * lock.
- */
- olck->ols_ast_wait = 1;
- rc = CLO_WAIT;
+ if (olck->ols_hold) {
+ olck->ols_hold = 0;
+ osc_cancel_base(&olck->ols_handle, olck->ols_einfo.ei_mode);
+ olck->ols_handle.cookie = 0ULL;
}
- return rc;
-}
-static int osc_lock_flush(struct osc_lock *ols, int discard)
-{
- struct cl_lock *lock = ols->ols_cl.cls_lock;
- struct cl_env_nest nest;
- struct lu_env *env;
- int result = 0;
-
- env = cl_env_nested_get(&nest);
- if (!IS_ERR(env)) {
- struct osc_object *obj = cl2osc(ols->ols_cl.cls_obj);
- struct cl_lock_descr *descr = &lock->cll_descr;
- int rc = 0;
-
- if (descr->cld_mode >= CLM_WRITE) {
- result = osc_cache_writeback_range(env, obj,
- descr->cld_start,
- descr->cld_end,
- 1, discard);
- LDLM_DEBUG(ols->ols_lock,
- "lock %p: %d pages were %s.\n", lock, result,
- discard ? "discarded" : "written");
- if (result > 0)
- result = 0;
- }
+ olck->ols_dlmlock = NULL;
- rc = cl_lock_discard_pages(env, lock);
- if (result == 0 && rc < 0)
- result = rc;
-
- cl_env_nested_put(&nest, env);
- } else
- result = PTR_ERR(env);
- if (result == 0) {
- ols->ols_flush = 1;
- LINVRNT(!osc_lock_has_pages(ols));
- }
- return result;
+ /* release a reference taken in osc_lock_upcall(). */
+ LASSERT(olck->ols_has_ref);
+ lu_ref_del(&dlmlock->l_reference, "osc_lock", olck);
+ LDLM_LOCK_RELEASE(dlmlock);
+ olck->ols_has_ref = 0;
}
/**
@@ -1307,96 +1039,16 @@ static int osc_lock_flush(struct osc_lock *ols, int discard)
static void osc_lock_cancel(const struct lu_env *env,
const struct cl_lock_slice *slice)
{
- struct cl_lock *lock = slice->cls_lock;
- struct osc_lock *olck = cl2osc_lock(slice);
- struct ldlm_lock *dlmlock = olck->ols_lock;
- int result = 0;
- int discard;
-
- LASSERT(cl_lock_is_mutexed(lock));
- LINVRNT(osc_lock_invariant(olck));
-
- if (dlmlock) {
- int do_cancel;
-
- discard = !!(dlmlock->l_flags & LDLM_FL_DISCARD_DATA);
- if (olck->ols_state >= OLS_GRANTED)
- result = osc_lock_flush(olck, discard);
- osc_lock_unhold(olck);
-
- lock_res_and_lock(dlmlock);
- /* Now that we're the only user of dlm read/write reference,
- * mostly the ->l_readers + ->l_writers should be zero.
- * However, there is a corner case.
- * See bug 18829 for details.
- */
- do_cancel = (dlmlock->l_readers == 0 &&
- dlmlock->l_writers == 0);
- dlmlock->l_flags |= LDLM_FL_CBPENDING;
- unlock_res_and_lock(dlmlock);
- if (do_cancel)
- result = ldlm_cli_cancel(&olck->ols_handle, LCF_ASYNC);
- if (result < 0)
- CL_LOCK_DEBUG(D_ERROR, env, lock,
- "lock %p cancel failure with error(%d)\n",
- lock, result);
- }
- olck->ols_state = OLS_CANCELLED;
- olck->ols_flags &= ~LDLM_FL_LVB_READY;
- osc_lock_detach(env, olck);
-}
-
-static int osc_lock_has_pages(struct osc_lock *olck)
-{
- return 0;
-}
-
-static void osc_lock_delete(const struct lu_env *env,
- const struct cl_lock_slice *slice)
-{
- struct osc_lock *olck;
+ struct osc_object *obj = cl2osc(slice->cls_obj);
+ struct osc_lock *oscl = cl2osc_lock(slice);
- olck = cl2osc_lock(slice);
- if (olck->ols_glimpse) {
- LASSERT(!olck->ols_hold);
- LASSERT(!olck->ols_lock);
- return;
- }
+ LINVRNT(osc_lock_invariant(oscl));
- LINVRNT(osc_lock_invariant(olck));
- LINVRNT(!osc_lock_has_pages(olck));
+ osc_lock_detach(env, oscl);
+ oscl->ols_state = OLS_CANCELLED;
+ oscl->ols_flags &= ~LDLM_FL_LVB_READY;
- osc_lock_unhold(olck);
- osc_lock_detach(env, olck);
-}
-
-/**
- * Implements cl_lock_operations::clo_state() method for osc layer.
- *
- * Maintains osc_lock::ols_owner field.
- *
- * This assumes that lock always enters CLS_HELD (from some other state) in
- * the same IO context as one that requested the lock. This should not be a
- * problem, because context is by definition shared by all activity pertaining
- * to the same high-level IO.
- */
-static void osc_lock_state(const struct lu_env *env,
- const struct cl_lock_slice *slice,
- enum cl_lock_state state)
-{
- struct osc_lock *lock = cl2osc_lock(slice);
-
- /*
- * XXX multiple io contexts can use the lock at the same time.
- */
- LINVRNT(osc_lock_invariant(lock));
- if (state == CLS_HELD && slice->cls_lock->cll_state != CLS_HELD) {
- struct osc_io *oio = osc_env_io(env);
-
- LASSERT(!lock->ols_owner);
- lock->ols_owner = oio;
- } else if (state != CLS_HELD)
- lock->ols_owner = NULL;
+ osc_lock_wake_waiters(env, obj, oscl);
}
static int osc_lock_print(const struct lu_env *env, void *cookie,
@@ -1404,221 +1056,162 @@ static int osc_lock_print(const struct lu_env *env, void *cookie,
{
struct osc_lock *lock = cl2osc_lock(slice);
- /*
- * XXX print ldlm lock and einfo properly.
- */
(*p)(env, cookie, "%p %#16llx %#llx %d %p ",
- lock->ols_lock, lock->ols_flags, lock->ols_handle.cookie,
+ lock->ols_dlmlock, lock->ols_flags, lock->ols_handle.cookie,
lock->ols_state, lock->ols_owner);
osc_lvb_print(env, cookie, p, &lock->ols_lvb);
return 0;
}
-static int osc_lock_fits_into(const struct lu_env *env,
- const struct cl_lock_slice *slice,
- const struct cl_lock_descr *need,
- const struct cl_io *io)
-{
- struct osc_lock *ols = cl2osc_lock(slice);
-
- if (need->cld_enq_flags & CEF_NEVER)
- return 0;
-
- if (ols->ols_state >= OLS_CANCELLED)
- return 0;
-
- if (need->cld_mode == CLM_PHANTOM) {
- if (ols->ols_agl)
- return !(ols->ols_state > OLS_RELEASED);
-
- /*
- * Note: the QUEUED lock can't be matched here, otherwise
- * it might cause the deadlocks.
- * In read_process,
- * P1: enqueued read lock, create sublock1
- * P2: enqueued write lock, create sublock2(conflicted
- * with sublock1).
- * P1: Grant read lock.
- * P1: enqueued glimpse lock(with holding sublock1_read),
- * matched with sublock2, waiting sublock2 to be granted.
- * But sublock2 can not be granted, because P1
- * will not release sublock1. Bang!
- */
- if (ols->ols_state < OLS_GRANTED ||
- ols->ols_state > OLS_RELEASED)
- return 0;
- } else if (need->cld_enq_flags & CEF_MUST) {
- /*
- * If the lock hasn't ever enqueued, it can't be matched
- * because enqueue process brings in many information
- * which can be used to determine things such as lockless,
- * CEF_MUST, etc.
- */
- if (ols->ols_state < OLS_UPCALL_RECEIVED &&
- ols->ols_locklessable)
- return 0;
- }
- return 1;
-}
-
static const struct cl_lock_operations osc_lock_ops = {
.clo_fini = osc_lock_fini,
.clo_enqueue = osc_lock_enqueue,
- .clo_wait = osc_lock_wait,
- .clo_unuse = osc_lock_unuse,
- .clo_use = osc_lock_use,
- .clo_delete = osc_lock_delete,
- .clo_state = osc_lock_state,
.clo_cancel = osc_lock_cancel,
- .clo_weigh = osc_lock_weigh,
.clo_print = osc_lock_print,
- .clo_fits_into = osc_lock_fits_into,
};
-static int osc_lock_lockless_unuse(const struct lu_env *env,
- const struct cl_lock_slice *slice)
-{
- struct osc_lock *ols = cl2osc_lock(slice);
- struct cl_lock *lock = slice->cls_lock;
-
- LASSERT(ols->ols_state == OLS_GRANTED);
- LINVRNT(osc_lock_invariant(ols));
-
- cl_lock_cancel(env, lock);
- cl_lock_delete(env, lock);
- return 0;
-}
-
static void osc_lock_lockless_cancel(const struct lu_env *env,
const struct cl_lock_slice *slice)
{
struct osc_lock *ols = cl2osc_lock(slice);
+ struct osc_object *osc = cl2osc(slice->cls_obj);
+ struct cl_lock_descr *descr = &slice->cls_lock->cll_descr;
int result;
- result = osc_lock_flush(ols, 0);
+ LASSERT(!ols->ols_dlmlock);
+ result = osc_lock_flush(osc, descr->cld_start, descr->cld_end,
+ descr->cld_mode, 0);
if (result)
CERROR("Pages for lockless lock %p were not purged(%d)\n",
ols, result);
- ols->ols_state = OLS_CANCELLED;
-}
-
-static int osc_lock_lockless_wait(const struct lu_env *env,
- const struct cl_lock_slice *slice)
-{
- struct osc_lock *olck = cl2osc_lock(slice);
- struct cl_lock *lock = olck->ols_cl.cls_lock;
- LINVRNT(osc_lock_invariant(olck));
- LASSERT(olck->ols_state >= OLS_UPCALL_RECEIVED);
-
- return lock->cll_error;
+ osc_lock_wake_waiters(env, osc, ols);
}
-static void osc_lock_lockless_state(const struct lu_env *env,
- const struct cl_lock_slice *slice,
- enum cl_lock_state state)
-{
- struct osc_lock *lock = cl2osc_lock(slice);
+static const struct cl_lock_operations osc_lock_lockless_ops = {
+ .clo_fini = osc_lock_fini,
+ .clo_enqueue = osc_lock_enqueue,
+ .clo_cancel = osc_lock_lockless_cancel,
+ .clo_print = osc_lock_print
+};
- LINVRNT(osc_lock_invariant(lock));
- if (state == CLS_HELD) {
- struct osc_io *oio = osc_env_io(env);
+static void osc_lock_set_writer(const struct lu_env *env,
+ const struct cl_io *io,
+ struct cl_object *obj, struct osc_lock *oscl)
+{
+ struct cl_lock_descr *descr = &oscl->ols_cl.cls_lock->cll_descr;
+ pgoff_t io_start;
+ pgoff_t io_end;
- LASSERT(ergo(lock->ols_owner, lock->ols_owner == oio));
- lock->ols_owner = oio;
+ if (!cl_object_same(io->ci_obj, obj))
+ return;
- /* set the io to be lockless if this lock is for io's
- * host object
- */
- if (cl_object_same(oio->oi_cl.cis_obj, slice->cls_obj))
- oio->oi_lockless = 1;
+ if (likely(io->ci_type == CIT_WRITE)) {
+ io_start = cl_index(obj, io->u.ci_rw.crw_pos);
+ io_end = cl_index(obj, io->u.ci_rw.crw_pos +
+ io->u.ci_rw.crw_count - 1);
+ if (cl_io_is_append(io)) {
+ io_start = 0;
+ io_end = CL_PAGE_EOF;
+ }
+ } else {
+ LASSERT(cl_io_is_mkwrite(io));
+ io_start = io->u.ci_fault.ft_index;
+ io_end = io->u.ci_fault.ft_index;
}
-}
-static int osc_lock_lockless_fits_into(const struct lu_env *env,
- const struct cl_lock_slice *slice,
- const struct cl_lock_descr *need,
- const struct cl_io *io)
-{
- struct osc_lock *lock = cl2osc_lock(slice);
-
- if (!(need->cld_enq_flags & CEF_NEVER))
- return 0;
+ if (descr->cld_mode >= CLM_WRITE &&
+ descr->cld_start <= io_start && descr->cld_end >= io_end) {
+ struct osc_io *oio = osc_env_io(env);
- /* lockless lock should only be used by its owning io. b22147 */
- return (lock->ols_owner == osc_env_io(env));
+ /* There must be only one lock to match the write region */
+ LASSERT(!oio->oi_write_osclock);
+ oio->oi_write_osclock = oscl;
+ }
}
-static const struct cl_lock_operations osc_lock_lockless_ops = {
- .clo_fini = osc_lock_fini,
- .clo_enqueue = osc_lock_enqueue,
- .clo_wait = osc_lock_lockless_wait,
- .clo_unuse = osc_lock_lockless_unuse,
- .clo_state = osc_lock_lockless_state,
- .clo_fits_into = osc_lock_lockless_fits_into,
- .clo_cancel = osc_lock_lockless_cancel,
- .clo_print = osc_lock_print
-};
-
int osc_lock_init(const struct lu_env *env,
struct cl_object *obj, struct cl_lock *lock,
- const struct cl_io *unused)
+ const struct cl_io *io)
{
- struct osc_lock *clk;
- int result;
-
- clk = kmem_cache_zalloc(osc_lock_kmem, GFP_NOFS);
- if (clk) {
- __u32 enqflags = lock->cll_descr.cld_enq_flags;
+ struct osc_lock *oscl;
+ __u32 enqflags = lock->cll_descr.cld_enq_flags;
+
+ oscl = kmem_cache_zalloc(osc_lock_kmem, GFP_NOFS);
+ if (!oscl)
+ return -ENOMEM;
+
+ oscl->ols_state = OLS_NEW;
+ spin_lock_init(&oscl->ols_lock);
+ INIT_LIST_HEAD(&oscl->ols_waiting_list);
+ INIT_LIST_HEAD(&oscl->ols_wait_entry);
+ INIT_LIST_HEAD(&oscl->ols_nextlock_oscobj);
+
+ oscl->ols_flags = osc_enq2ldlm_flags(enqflags);
+ oscl->ols_agl = !!(enqflags & CEF_AGL);
+ if (oscl->ols_agl)
+ oscl->ols_flags |= LDLM_FL_BLOCK_NOWAIT;
+ if (oscl->ols_flags & LDLM_FL_HAS_INTENT) {
+ oscl->ols_flags |= LDLM_FL_BLOCK_GRANTED;
+ oscl->ols_glimpse = 1;
+ }
- osc_lock_build_einfo(env, lock, clk, &clk->ols_einfo);
- atomic_set(&clk->ols_pageref, 0);
- clk->ols_state = OLS_NEW;
+ cl_lock_slice_add(lock, &oscl->ols_cl, obj, &osc_lock_ops);
- clk->ols_flags = osc_enq2ldlm_flags(enqflags);
- clk->ols_agl = !!(enqflags & CEF_AGL);
- if (clk->ols_agl)
- clk->ols_flags |= LDLM_FL_BLOCK_NOWAIT;
- if (clk->ols_flags & LDLM_FL_HAS_INTENT)
- clk->ols_glimpse = 1;
+ if (!(enqflags & CEF_MUST))
+ /* try to convert this lock to a lockless lock */
+ osc_lock_to_lockless(env, oscl, (enqflags & CEF_NEVER));
+ if (oscl->ols_locklessable && !(enqflags & CEF_DISCARD_DATA))
+ oscl->ols_flags |= LDLM_FL_DENY_ON_CONTENTION;
- cl_lock_slice_add(lock, &clk->ols_cl, obj, &osc_lock_ops);
+ if (io->ci_type == CIT_WRITE || cl_io_is_mkwrite(io))
+ osc_lock_set_writer(env, io, obj, oscl);
- if (!(enqflags & CEF_MUST))
- /* try to convert this lock to a lockless lock */
- osc_lock_to_lockless(env, clk, (enqflags & CEF_NEVER));
- if (clk->ols_locklessable && !(enqflags & CEF_DISCARD_DATA))
- clk->ols_flags |= LDLM_FL_DENY_ON_CONTENTION;
- LDLM_DEBUG_NOLOCK("lock %p, osc lock %p, flags %llx",
- lock, clk, clk->ols_flags);
+ LDLM_DEBUG_NOLOCK("lock %p, osc lock %p, flags %llx",
+ lock, oscl, oscl->ols_flags);
- result = 0;
- } else
- result = -ENOMEM;
- return result;
+ return 0;
}
-int osc_dlm_lock_pageref(struct ldlm_lock *dlm)
+/**
+ * Finds an existing lock covering given index and optionally different from a
+ * given \a except lock.
+ */
+struct ldlm_lock *osc_dlmlock_at_pgoff(const struct lu_env *env,
+ struct osc_object *obj, pgoff_t index,
+ int pending, int canceling)
{
- struct osc_lock *olock;
- int rc = 0;
-
- spin_lock(&osc_ast_guard);
- olock = dlm->l_ast_data;
+ struct osc_thread_info *info = osc_env_info(env);
+ struct ldlm_res_id *resname = &info->oti_resname;
+ ldlm_policy_data_t *policy = &info->oti_policy;
+ struct lustre_handle lockh;
+ struct ldlm_lock *lock = NULL;
+ enum ldlm_mode mode;
+ __u64 flags;
+
+ ostid_build_res_name(&obj->oo_oinfo->loi_oi, resname);
+ osc_index2policy(policy, osc2cl(obj), index, index);
+ policy->l_extent.gid = LDLM_GID_ANY;
+
+ flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_TEST_LOCK;
+ if (pending)
+ flags |= LDLM_FL_CBPENDING;
/*
- * there's a very rare race with osc_page_addref_lock(), but that
- * doesn't matter because in the worst case we don't cancel a lock
- * which we actually can, that's no harm.
+ * It is fine to match any group lock since there could be only one
+ * with a uniq gid and it conflicts with all other lock modes too
*/
- if (olock &&
- atomic_add_return(_PAGEREF_MAGIC,
- &olock->ols_pageref) != _PAGEREF_MAGIC) {
- atomic_sub(_PAGEREF_MAGIC, &olock->ols_pageref);
- rc = 1;
+again:
+ mode = ldlm_lock_match(osc_export(obj)->exp_obd->obd_namespace,
+ flags, resname, LDLM_EXTENT, policy,
+ LCK_PR | LCK_PW | LCK_GROUP, &lockh, canceling);
+ if (mode != 0) {
+ lock = ldlm_handle2lock(&lockh);
+ /* RACE: the lock is cancelled so let's try again */
+ if (unlikely(!lock))
+ goto again;
}
- spin_unlock(&osc_ast_guard);
- return rc;
+ return lock;
}
/** @} osc */
diff --git a/drivers/staging/lustre/lustre/osc/osc_object.c b/drivers/staging/lustre/lustre/osc/osc_object.c
index 9d474fcdd9a7..d211d1905e83 100644
--- a/drivers/staging/lustre/lustre/osc/osc_object.c
+++ b/drivers/staging/lustre/lustre/osc/osc_object.c
@@ -15,11 +15,7 @@
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
*
* GPL HEADER END
*/
@@ -36,6 +32,7 @@
* Implementation of cl_object for OSC layer.
*
* Author: Nikita Danilov <nikita.danilov@sun.com>
+ * Author: Jinshan Xiong <jinshan.xiong@intel.com>
*/
#define DEBUG_SUBSYSTEM S_OSC
@@ -94,6 +91,9 @@ static int osc_object_init(const struct lu_env *env, struct lu_object *obj,
atomic_set(&osc->oo_nr_reads, 0);
atomic_set(&osc->oo_nr_writes, 0);
spin_lock_init(&osc->oo_lock);
+ spin_lock_init(&osc->oo_tree_lock);
+ spin_lock_init(&osc->oo_ol_spin);
+ INIT_LIST_HEAD(&osc->oo_ol_list);
cl_object_page_init(lu2cl(obj), sizeof(struct osc_page));
@@ -120,6 +120,7 @@ static void osc_object_free(const struct lu_env *env, struct lu_object *obj)
LASSERT(list_empty(&osc->oo_reading_exts));
LASSERT(atomic_read(&osc->oo_nr_reads) == 0);
LASSERT(atomic_read(&osc->oo_nr_writes) == 0);
+ LASSERT(list_empty(&osc->oo_ol_list));
lu_object_fini(obj);
kmem_cache_free(osc_object_kmem, osc);
@@ -192,6 +193,32 @@ static int osc_object_glimpse(const struct lu_env *env,
return 0;
}
+static int osc_object_ast_clear(struct ldlm_lock *lock, void *data)
+{
+ LASSERT(lock->l_granted_mode == lock->l_req_mode);
+ if (lock->l_ast_data == data)
+ lock->l_ast_data = NULL;
+ return LDLM_ITER_CONTINUE;
+}
+
+static int osc_object_prune(const struct lu_env *env, struct cl_object *obj)
+{
+ struct osc_object *osc = cl2osc(obj);
+ struct ldlm_res_id *resname = &osc_env_info(env)->oti_resname;
+
+ LASSERTF(osc->oo_npages == 0,
+ DFID "still have %lu pages, obj: %p, osc: %p\n",
+ PFID(lu_object_fid(&obj->co_lu)), osc->oo_npages, obj, osc);
+
+ /* DLM locks don't hold a reference of osc_object so we have to
+ * clear it before the object is being destroyed.
+ */
+ ostid_build_res_name(&osc->oo_oinfo->loi_oi, resname);
+ ldlm_resource_iterate(osc_export(osc)->exp_obd->obd_namespace, resname,
+ osc_object_ast_clear, osc);
+ return 0;
+}
+
void osc_object_set_contended(struct osc_object *obj)
{
obj->oo_contention_time = cfs_time_current();
@@ -236,12 +263,12 @@ static const struct cl_object_operations osc_ops = {
.coo_io_init = osc_io_init,
.coo_attr_get = osc_attr_get,
.coo_attr_set = osc_attr_set,
- .coo_glimpse = osc_object_glimpse
+ .coo_glimpse = osc_object_glimpse,
+ .coo_prune = osc_object_prune
};
static const struct lu_object_operations osc_lu_obj_ops = {
.loo_object_init = osc_object_init,
- .loo_object_delete = NULL,
.loo_object_release = NULL,
.loo_object_free = osc_object_free,
.loo_object_print = osc_object_print,
@@ -261,8 +288,9 @@ struct lu_object *osc_object_alloc(const struct lu_env *env,
lu_object_init(obj, NULL, dev);
osc->oo_cl.co_ops = &osc_ops;
obj->lo_ops = &osc_lu_obj_ops;
- } else
+ } else {
obj = NULL;
+ }
return obj;
}
diff --git a/drivers/staging/lustre/lustre/osc/osc_page.c b/drivers/staging/lustre/lustre/osc/osc_page.c
index ce9ddd515f64..355f496a2093 100644
--- a/drivers/staging/lustre/lustre/osc/osc_page.c
+++ b/drivers/staging/lustre/lustre/osc/osc_page.c
@@ -15,11 +15,7 @@
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
*
* GPL HEADER END
*/
@@ -36,14 +32,15 @@
* Implementation of cl_page for OSC layer.
*
* Author: Nikita Danilov <nikita.danilov@sun.com>
+ * Author: Jinshan Xiong <jinshan.xiong@intel.com>
*/
#define DEBUG_SUBSYSTEM S_OSC
#include "osc_cl_internal.h"
-static void osc_lru_del(struct client_obd *cli, struct osc_page *opg, bool del);
-static void osc_lru_add(struct client_obd *cli, struct osc_page *opg);
+static void osc_lru_del(struct client_obd *cli, struct osc_page *opg);
+static void osc_lru_use(struct client_obd *cli, struct osc_page *opg);
static int osc_lru_reserve(const struct lu_env *env, struct osc_object *obj,
struct osc_page *opg);
@@ -51,30 +48,14 @@ static int osc_lru_reserve(const struct lu_env *env, struct osc_object *obj,
* @{
*/
-static int osc_page_protected(const struct lu_env *env,
- const struct osc_page *opg,
- enum cl_lock_mode mode, int unref)
-{
- return 1;
-}
-
/*****************************************************************************
*
* Page operations.
*
*/
-static void osc_page_fini(const struct lu_env *env,
- struct cl_page_slice *slice)
-{
- struct osc_page *opg = cl2osc_page(slice);
-
- CDEBUG(D_TRACE, "%p\n", opg);
- LASSERT(!opg->ops_lock);
-}
-
static void osc_page_transfer_get(struct osc_page *opg, const char *label)
{
- struct cl_page *page = cl_page_top(opg->ops_cl.cpl_page);
+ struct cl_page *page = opg->ops_cl.cpl_page;
LASSERT(!opg->ops_transfer_pinned);
cl_page_get(page);
@@ -85,11 +66,11 @@ static void osc_page_transfer_get(struct osc_page *opg, const char *label)
static void osc_page_transfer_put(const struct lu_env *env,
struct osc_page *opg)
{
- struct cl_page *page = cl_page_top(opg->ops_cl.cpl_page);
+ struct cl_page *page = opg->ops_cl.cpl_page;
if (opg->ops_transfer_pinned) {
- lu_ref_del(&page->cp_reference, "transfer", page);
opg->ops_transfer_pinned = 0;
+ lu_ref_del(&page->cp_reference, "transfer", page);
cl_page_put(env, page);
}
}
@@ -104,10 +85,7 @@ static void osc_page_transfer_add(const struct lu_env *env,
{
struct osc_object *obj = cl2osc(opg->ops_cl.cpl_obj);
- /* ops_lru and ops_inflight share the same field, so take it from LRU
- * first and then use it as inflight.
- */
- osc_lru_del(osc_cli(obj), opg, false);
+ osc_lru_use(osc_cli(obj), opg);
spin_lock(&obj->oo_seatbelt);
list_add(&opg->ops_inflight, &obj->oo_inflight[crt]);
@@ -115,16 +93,12 @@ static void osc_page_transfer_add(const struct lu_env *env,
spin_unlock(&obj->oo_seatbelt);
}
-static int osc_page_cache_add(const struct lu_env *env,
- const struct cl_page_slice *slice,
- struct cl_io *io)
+int osc_page_cache_add(const struct lu_env *env,
+ const struct cl_page_slice *slice, struct cl_io *io)
{
- struct osc_io *oio = osc_env_io(env);
struct osc_page *opg = cl2osc_page(slice);
int result;
- LINVRNT(osc_page_protected(env, opg, CLM_WRITE, 0));
-
osc_page_transfer_get(opg, "transfer\0cache");
result = osc_queue_async_io(env, io, opg);
if (result != 0)
@@ -132,17 +106,6 @@ static int osc_page_cache_add(const struct lu_env *env,
else
osc_page_transfer_add(env, opg, CRT_WRITE);
- /* for sync write, kernel will wait for this page to be flushed before
- * osc_io_end() is called, so release it earlier.
- * for mkwrite(), it's known there is no further pages.
- */
- if (cl_io_is_sync_write(io) || cl_io_is_mkwrite(io)) {
- if (oio->oi_active) {
- osc_extent_release(env, oio->oi_active);
- oio->oi_active = NULL;
- }
- }
-
return result;
}
@@ -154,102 +117,25 @@ void osc_index2policy(ldlm_policy_data_t *policy, const struct cl_object *obj,
policy->l_extent.end = cl_offset(obj, end + 1) - 1;
}
-static int osc_page_addref_lock(const struct lu_env *env,
- struct osc_page *opg,
- struct cl_lock *lock)
-{
- struct osc_lock *olock;
- int rc;
-
- LASSERT(!opg->ops_lock);
-
- olock = osc_lock_at(lock);
- if (atomic_inc_return(&olock->ols_pageref) <= 0) {
- atomic_dec(&olock->ols_pageref);
- rc = -ENODATA;
- } else {
- cl_lock_get(lock);
- opg->ops_lock = lock;
- rc = 0;
- }
- return rc;
-}
-
-static void osc_page_putref_lock(const struct lu_env *env,
- struct osc_page *opg)
-{
- struct cl_lock *lock = opg->ops_lock;
- struct osc_lock *olock;
-
- LASSERT(lock);
- olock = osc_lock_at(lock);
-
- atomic_dec(&olock->ols_pageref);
- opg->ops_lock = NULL;
-
- cl_lock_put(env, lock);
-}
-
static int osc_page_is_under_lock(const struct lu_env *env,
const struct cl_page_slice *slice,
- struct cl_io *unused)
+ struct cl_io *unused, pgoff_t *max_index)
{
- struct cl_lock *lock;
+ struct osc_page *opg = cl2osc_page(slice);
+ struct ldlm_lock *dlmlock;
int result = -ENODATA;
- lock = cl_lock_at_page(env, slice->cpl_obj, slice->cpl_page,
- NULL, 1, 0);
- if (lock) {
- if (osc_page_addref_lock(env, cl2osc_page(slice), lock) == 0)
- result = -EBUSY;
- cl_lock_put(env, lock);
+ dlmlock = osc_dlmlock_at_pgoff(env, cl2osc(slice->cpl_obj),
+ osc_index(opg), 1, 0);
+ if (dlmlock) {
+ *max_index = cl_index(slice->cpl_obj,
+ dlmlock->l_policy_data.l_extent.end);
+ LDLM_LOCK_PUT(dlmlock);
+ result = 0;
}
return result;
}
-static void osc_page_disown(const struct lu_env *env,
- const struct cl_page_slice *slice,
- struct cl_io *io)
-{
- struct osc_page *opg = cl2osc_page(slice);
-
- if (unlikely(opg->ops_lock))
- osc_page_putref_lock(env, opg);
-}
-
-static void osc_page_completion_read(const struct lu_env *env,
- const struct cl_page_slice *slice,
- int ioret)
-{
- struct osc_page *opg = cl2osc_page(slice);
- struct osc_object *obj = cl2osc(opg->ops_cl.cpl_obj);
-
- if (likely(opg->ops_lock))
- osc_page_putref_lock(env, opg);
- osc_lru_add(osc_cli(obj), opg);
-}
-
-static void osc_page_completion_write(const struct lu_env *env,
- const struct cl_page_slice *slice,
- int ioret)
-{
- struct osc_page *opg = cl2osc_page(slice);
- struct osc_object *obj = cl2osc(slice->cpl_obj);
-
- osc_lru_add(osc_cli(obj), opg);
-}
-
-static int osc_page_fail(const struct lu_env *env,
- const struct cl_page_slice *slice,
- struct cl_io *unused)
-{
- /*
- * Cached read?
- */
- LBUG();
- return 0;
-}
-
static const char *osc_list(struct list_head *head)
{
return list_empty(head) ? "-" : "+";
@@ -272,8 +158,8 @@ static int osc_page_print(const struct lu_env *env,
struct osc_object *obj = cl2osc(slice->cpl_obj);
struct client_obd *cli = &osc_export(obj)->exp_obd->u.cli;
- return (*printer)(env, cookie, LUSTRE_OSC_NAME "-page@%p: 1< %#x %d %u %s %s > 2< %llu %u %u %#x %#x | %p %p %p > 3< %s %p %d %lu %d > 4< %d %d %d %lu %s | %s %s %s %s > 5< %s %s %s %s | %d %s | %d %s %s>\n",
- opg,
+ return (*printer)(env, cookie, LUSTRE_OSC_NAME "-page@%p %lu: 1< %#x %d %u %s %s > 2< %llu %u %u %#x %#x | %p %p %p > 3< %s %p %d %lu %d > 4< %d %d %d %lu %s | %s %s %s %s > 5< %s %s %s %s | %d %s | %d %s %s>\n",
+ opg, osc_index(opg),
/* 1 */
oap->oap_magic, oap->oap_cmd,
oap->oap_interrupted,
@@ -315,13 +201,11 @@ static void osc_page_delete(const struct lu_env *env,
struct osc_object *obj = cl2osc(opg->ops_cl.cpl_obj);
int rc;
- LINVRNT(opg->ops_temp || osc_page_protected(env, opg, CLM_READ, 1));
-
CDEBUG(D_TRACE, "%p\n", opg);
osc_page_transfer_put(env, opg);
rc = osc_teardown_async_page(env, obj, opg);
if (rc) {
- CL_PAGE_DEBUG(D_ERROR, env, cl_page_top(slice->cpl_page),
+ CL_PAGE_DEBUG(D_ERROR, env, slice->cpl_page,
"Trying to teardown failed: %d\n", rc);
LASSERT(0);
}
@@ -334,7 +218,19 @@ static void osc_page_delete(const struct lu_env *env,
}
spin_unlock(&obj->oo_seatbelt);
- osc_lru_del(osc_cli(obj), opg, true);
+ osc_lru_del(osc_cli(obj), opg);
+
+ if (slice->cpl_page->cp_type == CPT_CACHEABLE) {
+ void *value;
+
+ spin_lock(&obj->oo_tree_lock);
+ value = radix_tree_delete(&obj->oo_tree, osc_index(opg));
+ if (value)
+ --obj->oo_npages;
+ spin_unlock(&obj->oo_tree_lock);
+
+ LASSERT(ergo(value, value == opg));
+ }
}
static void osc_page_clip(const struct lu_env *env,
@@ -343,8 +239,6 @@ static void osc_page_clip(const struct lu_env *env,
struct osc_page *opg = cl2osc_page(slice);
struct osc_async_page *oap = &opg->ops_oap;
- LINVRNT(osc_page_protected(env, opg, CLM_READ, 0));
-
opg->ops_from = from;
opg->ops_to = to;
spin_lock(&oap->oap_lock);
@@ -358,8 +252,6 @@ static int osc_page_cancel(const struct lu_env *env,
struct osc_page *opg = cl2osc_page(slice);
int rc = 0;
- LINVRNT(osc_page_protected(env, opg, CLM_READ, 0));
-
/* Check if the transferring against this page
* is completed, or not even queued.
*/
@@ -382,28 +274,16 @@ static int osc_page_flush(const struct lu_env *env,
}
static const struct cl_page_operations osc_page_ops = {
- .cpo_fini = osc_page_fini,
.cpo_print = osc_page_print,
.cpo_delete = osc_page_delete,
.cpo_is_under_lock = osc_page_is_under_lock,
- .cpo_disown = osc_page_disown,
- .io = {
- [CRT_READ] = {
- .cpo_cache_add = osc_page_fail,
- .cpo_completion = osc_page_completion_read
- },
- [CRT_WRITE] = {
- .cpo_cache_add = osc_page_cache_add,
- .cpo_completion = osc_page_completion_write
- }
- },
.cpo_clip = osc_page_clip,
.cpo_cancel = osc_page_cancel,
.cpo_flush = osc_page_flush
};
int osc_page_init(const struct lu_env *env, struct cl_object *obj,
- struct cl_page *page, struct page *vmpage)
+ struct cl_page *page, pgoff_t index)
{
struct osc_object *osc = cl2osc(obj);
struct osc_page *opg = cl_object_page_slice(obj, page);
@@ -412,18 +292,15 @@ int osc_page_init(const struct lu_env *env, struct cl_object *obj,
opg->ops_from = 0;
opg->ops_to = PAGE_SIZE;
- result = osc_prep_async_page(osc, opg, vmpage,
- cl_offset(obj, page->cp_index));
+ result = osc_prep_async_page(osc, opg, page->cp_vmpage,
+ cl_offset(obj, index));
if (result == 0) {
struct osc_io *oio = osc_env_io(env);
opg->ops_srvlock = osc_io_srvlock(oio);
- cl_page_slice_add(page, &opg->ops_cl, obj, &osc_page_ops);
+ cl_page_slice_add(page, &opg->ops_cl, obj, index,
+ &osc_page_ops);
}
- /*
- * Cannot assert osc_page_protected() here as read-ahead
- * creates temporary pages outside of a lock.
- */
/* ops_inflight and ops_lru are the same field, but it doesn't
* hurt to initialize it twice :-)
*/
@@ -431,12 +308,47 @@ int osc_page_init(const struct lu_env *env, struct cl_object *obj,
INIT_LIST_HEAD(&opg->ops_lru);
/* reserve an LRU space for this page */
- if (page->cp_type == CPT_CACHEABLE && result == 0)
+ if (page->cp_type == CPT_CACHEABLE && result == 0) {
result = osc_lru_reserve(env, osc, opg);
+ if (result == 0) {
+ spin_lock(&osc->oo_tree_lock);
+ result = radix_tree_insert(&osc->oo_tree, index, opg);
+ if (result == 0)
+ ++osc->oo_npages;
+ spin_unlock(&osc->oo_tree_lock);
+ LASSERT(result == 0);
+ }
+ }
return result;
}
+int osc_over_unstable_soft_limit(struct client_obd *cli)
+{
+ long obd_upages, obd_dpages, osc_upages;
+
+ /* Can't check cli->cl_unstable_count, therefore, no soft limit */
+ if (!cli)
+ return 0;
+
+ obd_upages = atomic_read(&obd_unstable_pages);
+ obd_dpages = atomic_read(&obd_dirty_pages);
+
+ osc_upages = atomic_read(&cli->cl_unstable_count);
+
+ /*
+ * obd_max_dirty_pages is the max number of (dirty + unstable)
+ * pages allowed at any given time. To simulate an unstable page
+ * only limit, we subtract the current number of dirty pages
+ * from this max. This difference is roughly the amount of pages
+ * currently available for unstable pages. Thus, the soft limit
+ * is half of that difference. Check osc_upages to ensure we don't
+ * set SOFT_SYNC for OSCs without any outstanding unstable pages.
+ */
+ return osc_upages &&
+ obd_upages >= (obd_max_dirty_pages - obd_dpages) / 2;
+}
+
/**
* Helper function called by osc_io_submit() for every page in an immediate
* transfer (i.e., transferred synchronously).
@@ -445,10 +357,6 @@ void osc_page_submit(const struct lu_env *env, struct osc_page *opg,
enum cl_req_type crt, int brw_flags)
{
struct osc_async_page *oap = &opg->ops_oap;
- struct osc_object *obj = oap->oap_obj;
-
- LINVRNT(osc_page_protected(env, opg,
- crt == CRT_WRITE ? CLM_WRITE : CLM_READ, 1));
LASSERTF(oap->oap_magic == OAP_MAGIC, "Bad oap magic: oap %p, magic 0x%x\n",
oap, oap->oap_magic);
@@ -460,8 +368,10 @@ void osc_page_submit(const struct lu_env *env, struct osc_page *opg,
oap->oap_count = opg->ops_to - opg->ops_from;
oap->oap_brw_flags = brw_flags | OBD_BRW_SYNC;
- if (!client_is_remote(osc_export(obj)) &&
- capable(CFS_CAP_SYS_RESOURCE)) {
+ if (osc_over_unstable_soft_limit(oap->oap_cli))
+ oap->oap_brw_flags |= OBD_BRW_SOFT_SYNC;
+
+ if (capable(CFS_CAP_SYS_RESOURCE)) {
oap->oap_brw_flags |= OBD_BRW_NOQUOTA;
oap->oap_cmd |= OBD_BRW_NOQUOTA;
}
@@ -483,13 +393,12 @@ void osc_page_submit(const struct lu_env *env, struct osc_page *opg,
*/
static DECLARE_WAIT_QUEUE_HEAD(osc_lru_waitq);
-static atomic_t osc_lru_waiters = ATOMIC_INIT(0);
/* LRU pages are freed in batch mode. OSC should at least free this
* number of pages to avoid running out of LRU budget, and..
*/
static const int lru_shrink_min = 2 << (20 - PAGE_SHIFT); /* 2M */
/* free this number at most otherwise it will take too long time to finish. */
-static const int lru_shrink_max = 32 << (20 - PAGE_SHIFT); /* 32M */
+static const int lru_shrink_max = 8 << (20 - PAGE_SHIFT); /* 8M */
/* Check if we can free LRU slots from this OSC. If there exists LRU waiters,
* we should free slots aggressively. In this way, slots are freed in a steady
@@ -500,65 +409,142 @@ static const int lru_shrink_max = 32 << (20 - PAGE_SHIFT); /* 32M */
static int osc_cache_too_much(struct client_obd *cli)
{
struct cl_client_cache *cache = cli->cl_cache;
- int pages = atomic_read(&cli->cl_lru_in_list) >> 1;
+ int pages = atomic_read(&cli->cl_lru_in_list);
+ unsigned long budget;
- if (atomic_read(&osc_lru_waiters) > 0 &&
- atomic_read(cli->cl_lru_left) < lru_shrink_max)
- /* drop lru pages aggressively */
- return min(pages, lru_shrink_max);
+ budget = cache->ccc_lru_max / (atomic_read(&cache->ccc_users) - 2);
/* if it's going to run out LRU slots, we should free some, but not
* too much to maintain fairness among OSCs.
*/
if (atomic_read(cli->cl_lru_left) < cache->ccc_lru_max >> 4) {
- unsigned long tmp;
+ if (pages >= budget)
+ return lru_shrink_max;
+ else if (pages >= budget / 2)
+ return lru_shrink_min;
+ } else if (pages >= budget * 2) {
+ return lru_shrink_min;
+ }
+ return 0;
+}
- tmp = cache->ccc_lru_max / atomic_read(&cache->ccc_users);
- if (pages > tmp)
- return min(pages, lru_shrink_max);
+int lru_queue_work(const struct lu_env *env, void *data)
+{
+ struct client_obd *cli = data;
- return pages > lru_shrink_min ? lru_shrink_min : 0;
- }
+ CDEBUG(D_CACHE, "Run LRU work for client obd %p.\n", cli);
+
+ if (osc_cache_too_much(cli))
+ osc_lru_shrink(env, cli, lru_shrink_max, true);
return 0;
}
-/* Return how many pages are not discarded in @pvec. */
-static int discard_pagevec(const struct lu_env *env, struct cl_io *io,
- struct cl_page **pvec, int max_index)
+void osc_lru_add_batch(struct client_obd *cli, struct list_head *plist)
+{
+ LIST_HEAD(lru);
+ struct osc_async_page *oap;
+ int npages = 0;
+
+ list_for_each_entry(oap, plist, oap_pending_item) {
+ struct osc_page *opg = oap2osc_page(oap);
+
+ if (!opg->ops_in_lru)
+ continue;
+
+ ++npages;
+ LASSERT(list_empty(&opg->ops_lru));
+ list_add(&opg->ops_lru, &lru);
+ }
+
+ if (npages > 0) {
+ spin_lock(&cli->cl_lru_list_lock);
+ list_splice_tail(&lru, &cli->cl_lru_list);
+ atomic_sub(npages, &cli->cl_lru_busy);
+ atomic_add(npages, &cli->cl_lru_in_list);
+ spin_unlock(&cli->cl_lru_list_lock);
+
+ /* XXX: May set force to be true for better performance */
+ if (osc_cache_too_much(cli))
+ (void)ptlrpcd_queue_work(cli->cl_lru_work);
+ }
+}
+
+static void __osc_lru_del(struct client_obd *cli, struct osc_page *opg)
+{
+ LASSERT(atomic_read(&cli->cl_lru_in_list) > 0);
+ list_del_init(&opg->ops_lru);
+ atomic_dec(&cli->cl_lru_in_list);
+}
+
+/**
+ * Page is being destroyed. The page may be not in LRU list, if the transfer
+ * has never finished(error occurred).
+ */
+static void osc_lru_del(struct client_obd *cli, struct osc_page *opg)
+{
+ if (opg->ops_in_lru) {
+ spin_lock(&cli->cl_lru_list_lock);
+ if (!list_empty(&opg->ops_lru)) {
+ __osc_lru_del(cli, opg);
+ } else {
+ LASSERT(atomic_read(&cli->cl_lru_busy) > 0);
+ atomic_dec(&cli->cl_lru_busy);
+ }
+ spin_unlock(&cli->cl_lru_list_lock);
+
+ atomic_inc(cli->cl_lru_left);
+ /* this is a great place to release more LRU pages if
+ * this osc occupies too many LRU pages and kernel is
+ * stealing one of them.
+ */
+ if (!memory_pressure_get())
+ (void)ptlrpcd_queue_work(cli->cl_lru_work);
+ wake_up(&osc_lru_waitq);
+ } else {
+ LASSERT(list_empty(&opg->ops_lru));
+ }
+}
+
+/**
+ * Delete page from LRUlist for redirty.
+ */
+static void osc_lru_use(struct client_obd *cli, struct osc_page *opg)
+{
+ /* If page is being transferred for the first time,
+ * ops_lru should be empty
+ */
+ if (opg->ops_in_lru && !list_empty(&opg->ops_lru)) {
+ spin_lock(&cli->cl_lru_list_lock);
+ __osc_lru_del(cli, opg);
+ spin_unlock(&cli->cl_lru_list_lock);
+ atomic_inc(&cli->cl_lru_busy);
+ }
+}
+
+static void discard_pagevec(const struct lu_env *env, struct cl_io *io,
+ struct cl_page **pvec, int max_index)
{
- int count;
int i;
- for (count = 0, i = 0; i < max_index; i++) {
+ for (i = 0; i < max_index; i++) {
struct cl_page *page = pvec[i];
- if (cl_page_own_try(env, io, page) == 0) {
- /* free LRU page only if nobody is using it.
- * This check is necessary to avoid freeing the pages
- * having already been removed from LRU and pinned
- * for IO.
- */
- if (!cl_page_in_use(page)) {
- cl_page_unmap(env, io, page);
- cl_page_discard(env, io, page);
- ++count;
- }
- cl_page_disown(env, io, page);
- }
+ LASSERT(cl_page_is_owned(page, io));
+ cl_page_discard(env, io, page);
+ cl_page_disown(env, io, page);
cl_page_put(env, page);
+
pvec[i] = NULL;
}
- return max_index - count;
}
/**
* Drop @target of pages from LRU at most.
*/
-int osc_lru_shrink(struct client_obd *cli, int target)
+int osc_lru_shrink(const struct lu_env *env, struct client_obd *cli,
+ int target, bool force)
{
- struct cl_env_nest nest;
- struct lu_env *env;
struct cl_io *io;
struct cl_object *clobj = NULL;
struct cl_page **pvec;
@@ -573,23 +559,31 @@ int osc_lru_shrink(struct client_obd *cli, int target)
if (atomic_read(&cli->cl_lru_in_list) == 0 || target <= 0)
return 0;
- env = cl_env_nested_get(&nest);
- if (IS_ERR(env))
- return PTR_ERR(env);
+ if (!force) {
+ if (atomic_read(&cli->cl_lru_shrinkers) > 0)
+ return -EBUSY;
+
+ if (atomic_inc_return(&cli->cl_lru_shrinkers) > 1) {
+ atomic_dec(&cli->cl_lru_shrinkers);
+ return -EBUSY;
+ }
+ } else {
+ atomic_inc(&cli->cl_lru_shrinkers);
+ }
- pvec = osc_env_info(env)->oti_pvec;
+ pvec = (struct cl_page **)osc_env_info(env)->oti_pvec;
io = &osc_env_info(env)->oti_io;
- client_obd_list_lock(&cli->cl_lru_list_lock);
- atomic_inc(&cli->cl_lru_shrinkers);
+ spin_lock(&cli->cl_lru_list_lock);
maxscan = min(target << 1, atomic_read(&cli->cl_lru_in_list));
list_for_each_entry_safe(opg, temp, &cli->cl_lru_list, ops_lru) {
struct cl_page *page;
+ bool will_free = false;
if (--maxscan < 0)
break;
- page = cl_page_top(opg->ops_cl.cpl_page);
+ page = opg->ops_cl.cpl_page;
if (cl_page_in_use_noref(page)) {
list_move_tail(&opg->ops_lru, &cli->cl_lru_list);
continue;
@@ -600,10 +594,10 @@ int osc_lru_shrink(struct client_obd *cli, int target)
struct cl_object *tmp = page->cp_obj;
cl_object_get(tmp);
- client_obd_list_unlock(&cli->cl_lru_list_lock);
+ spin_unlock(&cli->cl_lru_list_lock);
if (clobj) {
- count -= discard_pagevec(env, io, pvec, index);
+ discard_pagevec(env, io, pvec, index);
index = 0;
cl_io_fini(env, io);
@@ -616,7 +610,7 @@ int osc_lru_shrink(struct client_obd *cli, int target)
io->ci_ignore_layout = 1;
rc = cl_io_init(env, io, CIT_MISC, clobj);
- client_obd_list_lock(&cli->cl_lru_list_lock);
+ spin_lock(&cli->cl_lru_list_lock);
if (rc != 0)
break;
@@ -625,98 +619,54 @@ int osc_lru_shrink(struct client_obd *cli, int target)
continue;
}
- /* move this page to the end of list as it will be discarded
- * soon. The page will be finally removed from LRU list in
- * osc_page_delete().
- */
- list_move_tail(&opg->ops_lru, &cli->cl_lru_list);
+ if (cl_page_own_try(env, io, page) == 0) {
+ if (!cl_page_in_use_noref(page)) {
+ /* remove it from lru list earlier to avoid
+ * lock contention
+ */
+ __osc_lru_del(cli, opg);
+ opg->ops_in_lru = 0; /* will be discarded */
+
+ cl_page_get(page);
+ will_free = true;
+ } else {
+ cl_page_disown(env, io, page);
+ }
+ }
- /* it's okay to grab a refcount here w/o holding lock because
- * it has to grab cl_lru_list_lock to delete the page.
- */
- cl_page_get(page);
- pvec[index++] = page;
- if (++count >= target)
- break;
+ if (!will_free) {
+ list_move_tail(&opg->ops_lru, &cli->cl_lru_list);
+ continue;
+ }
+ /* Don't discard and free the page with cl_lru_list held */
+ pvec[index++] = page;
if (unlikely(index == OTI_PVEC_SIZE)) {
- client_obd_list_unlock(&cli->cl_lru_list_lock);
- count -= discard_pagevec(env, io, pvec, index);
+ spin_unlock(&cli->cl_lru_list_lock);
+ discard_pagevec(env, io, pvec, index);
index = 0;
- client_obd_list_lock(&cli->cl_lru_list_lock);
+ spin_lock(&cli->cl_lru_list_lock);
}
+
+ if (++count >= target)
+ break;
}
- client_obd_list_unlock(&cli->cl_lru_list_lock);
+ spin_unlock(&cli->cl_lru_list_lock);
if (clobj) {
- count -= discard_pagevec(env, io, pvec, index);
+ discard_pagevec(env, io, pvec, index);
cl_io_fini(env, io);
cl_object_put(env, clobj);
}
- cl_env_nested_put(&nest, env);
atomic_dec(&cli->cl_lru_shrinkers);
- return count > 0 ? count : rc;
-}
-
-static void osc_lru_add(struct client_obd *cli, struct osc_page *opg)
-{
- bool wakeup = false;
-
- if (!opg->ops_in_lru)
- return;
-
- atomic_dec(&cli->cl_lru_busy);
- client_obd_list_lock(&cli->cl_lru_list_lock);
- if (list_empty(&opg->ops_lru)) {
- list_move_tail(&opg->ops_lru, &cli->cl_lru_list);
- atomic_inc_return(&cli->cl_lru_in_list);
- wakeup = atomic_read(&osc_lru_waiters) > 0;
- }
- client_obd_list_unlock(&cli->cl_lru_list_lock);
-
- if (wakeup) {
- osc_lru_shrink(cli, osc_cache_too_much(cli));
+ if (count > 0) {
+ atomic_add(count, cli->cl_lru_left);
wake_up_all(&osc_lru_waitq);
}
-}
-
-/* delete page from LRUlist. The page can be deleted from LRUlist for two
- * reasons: redirtied or deleted from page cache.
- */
-static void osc_lru_del(struct client_obd *cli, struct osc_page *opg, bool del)
-{
- if (opg->ops_in_lru) {
- client_obd_list_lock(&cli->cl_lru_list_lock);
- if (!list_empty(&opg->ops_lru)) {
- LASSERT(atomic_read(&cli->cl_lru_in_list) > 0);
- list_del_init(&opg->ops_lru);
- atomic_dec(&cli->cl_lru_in_list);
- if (!del)
- atomic_inc(&cli->cl_lru_busy);
- } else if (del) {
- LASSERT(atomic_read(&cli->cl_lru_busy) > 0);
- atomic_dec(&cli->cl_lru_busy);
- }
- client_obd_list_unlock(&cli->cl_lru_list_lock);
- if (del) {
- atomic_inc(cli->cl_lru_left);
- /* this is a great place to release more LRU pages if
- * this osc occupies too many LRU pages and kernel is
- * stealing one of them.
- * cl_lru_shrinkers is to avoid recursive call in case
- * we're already in the context of osc_lru_shrink().
- */
- if (atomic_read(&cli->cl_lru_shrinkers) == 0 &&
- !memory_pressure_get())
- osc_lru_shrink(cli, osc_cache_too_much(cli));
- wake_up(&osc_lru_waitq);
- }
- } else {
- LASSERT(list_empty(&opg->ops_lru));
- }
+ return count > 0 ? count : rc;
}
static inline int max_to_shrink(struct client_obd *cli)
@@ -724,19 +674,28 @@ static inline int max_to_shrink(struct client_obd *cli)
return min(atomic_read(&cli->cl_lru_in_list) >> 1, lru_shrink_max);
}
-static int osc_lru_reclaim(struct client_obd *cli)
+int osc_lru_reclaim(struct client_obd *cli)
{
+ struct cl_env_nest nest;
+ struct lu_env *env;
struct cl_client_cache *cache = cli->cl_cache;
int max_scans;
- int rc;
+ int rc = 0;
LASSERT(cache);
- rc = osc_lru_shrink(cli, lru_shrink_min);
+ env = cl_env_nested_get(&nest);
+ if (IS_ERR(env))
+ return 0;
+
+ rc = osc_lru_shrink(env, cli, osc_cache_too_much(cli), false);
if (rc != 0) {
+ if (rc == -EBUSY)
+ rc = 0;
+
CDEBUG(D_CACHE, "%s: Free %d pages from own LRU: %p.\n",
cli->cl_import->imp_obd->obd_name, rc, cli);
- return rc;
+ goto out;
}
CDEBUG(D_CACHE, "%s: cli %p no free slots, pages: %d, busy: %d.\n",
@@ -753,7 +712,7 @@ static int osc_lru_reclaim(struct client_obd *cli)
cache->ccc_lru_shrinkers++;
list_move_tail(&cli->cl_lru_osc, &cache->ccc_lru);
- max_scans = atomic_read(&cache->ccc_users);
+ max_scans = atomic_read(&cache->ccc_users) - 2;
while (--max_scans > 0 && !list_empty(&cache->ccc_lru)) {
cli = list_entry(cache->ccc_lru.next, struct client_obd,
cl_lru_osc);
@@ -764,10 +723,11 @@ static int osc_lru_reclaim(struct client_obd *cli)
atomic_read(&cli->cl_lru_busy));
list_move_tail(&cli->cl_lru_osc, &cache->ccc_lru);
- if (atomic_read(&cli->cl_lru_in_list) > 0) {
+ if (osc_cache_too_much(cli) > 0) {
spin_unlock(&cache->ccc_lru_lock);
- rc = osc_lru_shrink(cli, max_to_shrink(cli));
+ rc = osc_lru_shrink(env, cli, osc_cache_too_much(cli),
+ true);
spin_lock(&cache->ccc_lru_lock);
if (rc != 0)
break;
@@ -775,6 +735,8 @@ static int osc_lru_reclaim(struct client_obd *cli)
}
spin_unlock(&cache->ccc_lru_lock);
+out:
+ cl_env_nested_put(&nest, env);
CDEBUG(D_CACHE, "%s: cli %p freed %d pages.\n",
cli->cl_import->imp_obd->obd_name, cli, rc);
return rc;
@@ -784,16 +746,20 @@ static int osc_lru_reserve(const struct lu_env *env, struct osc_object *obj,
struct osc_page *opg)
{
struct l_wait_info lwi = LWI_INTR(LWI_ON_SIGNAL_NOOP, NULL);
+ struct osc_io *oio = osc_env_io(env);
struct client_obd *cli = osc_cli(obj);
int rc = 0;
if (!cli->cl_cache) /* shall not be in LRU */
return 0;
+ if (oio->oi_lru_reserved > 0) {
+ --oio->oi_lru_reserved;
+ goto out;
+ }
+
LASSERT(atomic_read(cli->cl_lru_left) >= 0);
while (!atomic_add_unless(cli->cl_lru_left, -1, 0)) {
- int gen;
-
/* run out of LRU spaces, try to drop some by itself */
rc = osc_lru_reclaim(cli);
if (rc < 0)
@@ -803,23 +769,15 @@ static int osc_lru_reserve(const struct lu_env *env, struct osc_object *obj,
cond_resched();
- /* slowest case, all of caching pages are busy, notifying
- * other OSCs that we're lack of LRU slots.
- */
- atomic_inc(&osc_lru_waiters);
-
- gen = atomic_read(&cli->cl_lru_in_list);
rc = l_wait_event(osc_lru_waitq,
- atomic_read(cli->cl_lru_left) > 0 ||
- (atomic_read(&cli->cl_lru_in_list) > 0 &&
- gen != atomic_read(&cli->cl_lru_in_list)),
+ atomic_read(cli->cl_lru_left) > 0,
&lwi);
- atomic_dec(&osc_lru_waiters);
if (rc < 0)
break;
}
+out:
if (rc >= 0) {
atomic_inc(&cli->cl_lru_busy);
opg->ops_in_lru = 1;
diff --git a/drivers/staging/lustre/lustre/osc/osc_request.c b/drivers/staging/lustre/lustre/osc/osc_request.c
index 30526ebcad04..536b868ff776 100644
--- a/drivers/staging/lustre/lustre/osc/osc_request.c
+++ b/drivers/staging/lustre/lustre/osc/osc_request.c
@@ -15,11 +15,7 @@
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
*
* GPL HEADER END
*/
@@ -92,12 +88,13 @@ struct osc_fsync_args {
struct osc_enqueue_args {
struct obd_export *oa_exp;
+ enum ldlm_type oa_type;
+ enum ldlm_mode oa_mode;
__u64 *oa_flags;
- obd_enqueue_update_f oa_upcall;
+ osc_enqueue_upcall_f oa_upcall;
void *oa_cookie;
struct ost_lvb *oa_lvb;
- struct lustre_handle *oa_lockh;
- struct ldlm_enqueue_info *oa_ei;
+ struct lustre_handle oa_lockh;
unsigned int oa_agl:1;
};
@@ -473,7 +470,8 @@ static int osc_real_create(struct obd_export *exp, struct obdo *oa,
DEBUG_REQ(D_HA, req,
"delorphan from OST integration");
/* Don't resend the delorphan req */
- req->rq_no_resend = req->rq_no_delay = 1;
+ req->rq_no_resend = 1;
+ req->rq_no_delay = 1;
}
rc = ptlrpc_queue_wait(req);
@@ -801,21 +799,24 @@ static void osc_announce_cached(struct client_obd *cli, struct obdo *oa,
LASSERT(!(oa->o_valid & bits));
oa->o_valid |= bits;
- client_obd_list_lock(&cli->cl_loi_list_lock);
+ spin_lock(&cli->cl_loi_list_lock);
oa->o_dirty = cli->cl_dirty;
if (unlikely(cli->cl_dirty - cli->cl_dirty_transit >
cli->cl_dirty_max)) {
CERROR("dirty %lu - %lu > dirty_max %lu\n",
cli->cl_dirty, cli->cl_dirty_transit, cli->cl_dirty_max);
oa->o_undirty = 0;
- } else if (unlikely(atomic_read(&obd_dirty_pages) -
+ } else if (unlikely(atomic_read(&obd_unstable_pages) +
+ atomic_read(&obd_dirty_pages) -
atomic_read(&obd_dirty_transit_pages) >
(long)(obd_max_dirty_pages + 1))) {
/* The atomic_read() allowing the atomic_inc() are
* not covered by a lock thus they may safely race and trip
* this CERROR() unless we add in a small fudge factor (+1).
*/
- CERROR("dirty %d - %d > system dirty_max %d\n",
+ CERROR("%s: dirty %d + %d - %d > system dirty_max %d\n",
+ cli->cl_import->imp_obd->obd_name,
+ atomic_read(&obd_unstable_pages),
atomic_read(&obd_dirty_pages),
atomic_read(&obd_dirty_transit_pages),
obd_max_dirty_pages);
@@ -833,10 +834,9 @@ static void osc_announce_cached(struct client_obd *cli, struct obdo *oa,
oa->o_grant = cli->cl_avail_grant + cli->cl_reserved_grant;
oa->o_dropped = cli->cl_lost_grant;
cli->cl_lost_grant = 0;
- client_obd_list_unlock(&cli->cl_loi_list_lock);
+ spin_unlock(&cli->cl_loi_list_lock);
CDEBUG(D_CACHE, "dirty: %llu undirty: %u dropped %u grant: %llu\n",
oa->o_dirty, oa->o_undirty, oa->o_dropped, oa->o_grant);
-
}
void osc_update_next_shrink(struct client_obd *cli)
@@ -849,9 +849,9 @@ void osc_update_next_shrink(struct client_obd *cli)
static void __osc_update_grant(struct client_obd *cli, u64 grant)
{
- client_obd_list_lock(&cli->cl_loi_list_lock);
+ spin_lock(&cli->cl_loi_list_lock);
cli->cl_avail_grant += grant;
- client_obd_list_unlock(&cli->cl_loi_list_lock);
+ spin_unlock(&cli->cl_loi_list_lock);
}
static void osc_update_grant(struct client_obd *cli, struct ost_body *body)
@@ -889,10 +889,10 @@ out:
static void osc_shrink_grant_local(struct client_obd *cli, struct obdo *oa)
{
- client_obd_list_lock(&cli->cl_loi_list_lock);
+ spin_lock(&cli->cl_loi_list_lock);
oa->o_grant = cli->cl_avail_grant / 4;
cli->cl_avail_grant -= oa->o_grant;
- client_obd_list_unlock(&cli->cl_loi_list_lock);
+ spin_unlock(&cli->cl_loi_list_lock);
if (!(oa->o_valid & OBD_MD_FLFLAGS)) {
oa->o_valid |= OBD_MD_FLFLAGS;
oa->o_flags = 0;
@@ -911,10 +911,10 @@ static int osc_shrink_grant(struct client_obd *cli)
__u64 target_bytes = (cli->cl_max_rpcs_in_flight + 1) *
(cli->cl_max_pages_per_rpc << PAGE_SHIFT);
- client_obd_list_lock(&cli->cl_loi_list_lock);
+ spin_lock(&cli->cl_loi_list_lock);
if (cli->cl_avail_grant <= target_bytes)
target_bytes = cli->cl_max_pages_per_rpc << PAGE_SHIFT;
- client_obd_list_unlock(&cli->cl_loi_list_lock);
+ spin_unlock(&cli->cl_loi_list_lock);
return osc_shrink_grant_to_target(cli, target_bytes);
}
@@ -924,7 +924,7 @@ int osc_shrink_grant_to_target(struct client_obd *cli, __u64 target_bytes)
int rc = 0;
struct ost_body *body;
- client_obd_list_lock(&cli->cl_loi_list_lock);
+ spin_lock(&cli->cl_loi_list_lock);
/* Don't shrink if we are already above or below the desired limit
* We don't want to shrink below a single RPC, as that will negatively
* impact block allocation and long-term performance.
@@ -933,10 +933,10 @@ int osc_shrink_grant_to_target(struct client_obd *cli, __u64 target_bytes)
target_bytes = cli->cl_max_pages_per_rpc << PAGE_SHIFT;
if (target_bytes >= cli->cl_avail_grant) {
- client_obd_list_unlock(&cli->cl_loi_list_lock);
+ spin_unlock(&cli->cl_loi_list_lock);
return 0;
}
- client_obd_list_unlock(&cli->cl_loi_list_lock);
+ spin_unlock(&cli->cl_loi_list_lock);
body = kzalloc(sizeof(*body), GFP_NOFS);
if (!body)
@@ -944,10 +944,10 @@ int osc_shrink_grant_to_target(struct client_obd *cli, __u64 target_bytes)
osc_announce_cached(cli, &body->oa, 0);
- client_obd_list_lock(&cli->cl_loi_list_lock);
+ spin_lock(&cli->cl_loi_list_lock);
body->oa.o_grant = cli->cl_avail_grant - target_bytes;
cli->cl_avail_grant = target_bytes;
- client_obd_list_unlock(&cli->cl_loi_list_lock);
+ spin_unlock(&cli->cl_loi_list_lock);
if (!(body->oa.o_valid & OBD_MD_FLFLAGS)) {
body->oa.o_valid |= OBD_MD_FLFLAGS;
body->oa.o_flags = 0;
@@ -1035,7 +1035,7 @@ static void osc_init_grant(struct client_obd *cli, struct obd_connect_data *ocd)
* race is tolerable here: if we're evicted, but imp_state already
* left EVICTED state, then cl_dirty must be 0 already.
*/
- client_obd_list_lock(&cli->cl_loi_list_lock);
+ spin_lock(&cli->cl_loi_list_lock);
if (cli->cl_import->imp_state == LUSTRE_IMP_EVICTED)
cli->cl_avail_grant = ocd->ocd_grant;
else
@@ -1053,7 +1053,7 @@ static void osc_init_grant(struct client_obd *cli, struct obd_connect_data *ocd)
/* determine the appropriate chunk size used by osc_extent. */
cli->cl_chunkbits = max_t(int, PAGE_SHIFT, ocd->ocd_blocksize);
- client_obd_list_unlock(&cli->cl_loi_list_lock);
+ spin_unlock(&cli->cl_loi_list_lock);
CDEBUG(D_CACHE, "%s, setting cl_avail_grant: %ld cl_lost_grant: %ld chunk bits: %d\n",
cli->cl_import->imp_obd->obd_name,
@@ -1082,7 +1082,7 @@ static void handle_short_read(int nob_read, u32 page_count,
if (pga[i]->count > nob_read) {
/* EOF inside this page */
ptr = kmap(pga[i]->pg) +
- (pga[i]->off & ~CFS_PAGE_MASK);
+ (pga[i]->off & ~PAGE_MASK);
memset(ptr + nob_read, 0, pga[i]->count - nob_read);
kunmap(pga[i]->pg);
page_count--;
@@ -1097,7 +1097,7 @@ static void handle_short_read(int nob_read, u32 page_count,
/* zero remaining pages */
while (page_count-- > 0) {
- ptr = kmap(pga[i]->pg) + (pga[i]->off & ~CFS_PAGE_MASK);
+ ptr = kmap(pga[i]->pg) + (pga[i]->off & ~PAGE_MASK);
memset(ptr, 0, pga[i]->count);
kunmap(pga[i]->pg);
i++;
@@ -1144,7 +1144,8 @@ static inline int can_merge_pages(struct brw_page *p1, struct brw_page *p2)
{
if (p1->flag != p2->flag) {
unsigned mask = ~(OBD_BRW_FROM_GRANT | OBD_BRW_NOCACHE |
- OBD_BRW_SYNC | OBD_BRW_ASYNC|OBD_BRW_NOQUOTA);
+ OBD_BRW_SYNC | OBD_BRW_ASYNC |
+ OBD_BRW_NOQUOTA | OBD_BRW_SOFT_SYNC);
/* warn if we try to combine flags that we don't know to be
* safe to combine
@@ -1188,32 +1189,29 @@ static u32 osc_checksum_bulk(int nob, u32 pg_count,
if (i == 0 && opc == OST_READ &&
OBD_FAIL_CHECK(OBD_FAIL_OSC_CHECKSUM_RECEIVE)) {
unsigned char *ptr = kmap(pga[i]->pg);
- int off = pga[i]->off & ~CFS_PAGE_MASK;
+ int off = pga[i]->off & ~PAGE_MASK;
memcpy(ptr + off, "bad1", min(4, nob));
kunmap(pga[i]->pg);
}
cfs_crypto_hash_update_page(hdesc, pga[i]->pg,
- pga[i]->off & ~CFS_PAGE_MASK,
+ pga[i]->off & ~PAGE_MASK,
count);
CDEBUG(D_PAGE,
"page %p map %p index %lu flags %lx count %u priv %0lx: off %d\n",
pga[i]->pg, pga[i]->pg->mapping, pga[i]->pg->index,
(long)pga[i]->pg->flags, page_count(pga[i]->pg),
page_private(pga[i]->pg),
- (int)(pga[i]->off & ~CFS_PAGE_MASK));
+ (int)(pga[i]->off & ~PAGE_MASK));
nob -= pga[i]->count;
pg_count--;
i++;
}
- bufsize = 4;
+ bufsize = sizeof(cksum);
err = cfs_crypto_hash_final(hdesc, (unsigned char *)&cksum, &bufsize);
- if (err)
- cfs_crypto_hash_final(hdesc, NULL, NULL);
-
/* For sending we only compute the wrong checksum instead
* of corrupting the data so it is still correct on a redo
*/
@@ -1312,7 +1310,7 @@ static int osc_brw_prep_request(int cmd, struct client_obd *cli,
pg_prev = pga[0];
for (requested_nob = i = 0; i < page_count; i++, niobuf++) {
struct brw_page *pg = pga[i];
- int poff = pg->off & ~CFS_PAGE_MASK;
+ int poff = pg->off & ~PAGE_MASK;
LASSERT(pg->count > 0);
/* make sure there is no gap in the middle of page array */
@@ -1658,6 +1656,7 @@ static int osc_brw_redo_request(struct ptlrpc_request *request,
aa->aa_resends++;
new_req->rq_interpret_reply = request->rq_interpret_reply;
new_req->rq_async_args = request->rq_async_args;
+ new_req->rq_commit_cb = request->rq_commit_cb;
/* cap resend delay to the current request timeout, this is similar to
* what ptlrpc does (see after_reply())
*/
@@ -1737,7 +1736,6 @@ static int brw_interpret(const struct lu_env *env,
struct osc_brw_async_args *aa = data;
struct osc_extent *ext;
struct osc_extent *tmp;
- struct cl_object *obj = NULL;
struct client_obd *cli = aa->aa_cli;
rc = osc_brw_fini_request(req, rc);
@@ -1766,24 +1764,17 @@ static int brw_interpret(const struct lu_env *env,
rc = -EIO;
}
- list_for_each_entry_safe(ext, tmp, &aa->aa_exts, oe_link) {
- if (!obj && rc == 0) {
- obj = osc2cl(ext->oe_obj);
- cl_object_get(obj);
- }
-
- list_del_init(&ext->oe_link);
- osc_extent_finish(env, ext, 1, rc);
- }
- LASSERT(list_empty(&aa->aa_exts));
- LASSERT(list_empty(&aa->aa_oaps));
-
- if (obj) {
+ if (rc == 0) {
struct obdo *oa = aa->aa_oa;
struct cl_attr *attr = &osc_env_info(env)->oti_attr;
unsigned long valid = 0;
+ struct cl_object *obj;
+ struct osc_async_page *last;
- LASSERT(rc == 0);
+ last = brw_page2oap(aa->aa_ppga[aa->aa_page_count - 1]);
+ obj = osc2cl(last->oap_obj);
+
+ cl_object_attr_lock(obj);
if (oa->o_valid & OBD_MD_FLBLOCKS) {
attr->cat_blocks = oa->o_blocks;
valid |= CAT_BLOCKS;
@@ -1800,21 +1791,45 @@ static int brw_interpret(const struct lu_env *env,
attr->cat_ctime = oa->o_ctime;
valid |= CAT_CTIME;
}
- if (valid != 0) {
- cl_object_attr_lock(obj);
- cl_object_attr_set(env, obj, attr, valid);
- cl_object_attr_unlock(obj);
+
+ if (lustre_msg_get_opc(req->rq_reqmsg) == OST_WRITE) {
+ struct lov_oinfo *loi = cl2osc(obj)->oo_oinfo;
+ loff_t last_off = last->oap_count + last->oap_obj_off;
+
+ /* Change file size if this is an out of quota or
+ * direct IO write and it extends the file size
+ */
+ if (loi->loi_lvb.lvb_size < last_off) {
+ attr->cat_size = last_off;
+ valid |= CAT_SIZE;
+ }
+ /* Extend KMS if it's not a lockless write */
+ if (loi->loi_kms < last_off &&
+ oap2osc_page(last)->ops_srvlock == 0) {
+ attr->cat_kms = last_off;
+ valid |= CAT_KMS;
+ }
}
- cl_object_put(env, obj);
+
+ if (valid != 0)
+ cl_object_attr_set(env, obj, attr, valid);
+ cl_object_attr_unlock(obj);
}
kmem_cache_free(obdo_cachep, aa->aa_oa);
+ list_for_each_entry_safe(ext, tmp, &aa->aa_exts, oe_link) {
+ list_del_init(&ext->oe_link);
+ osc_extent_finish(env, ext, 1, rc);
+ }
+ LASSERT(list_empty(&aa->aa_exts));
+ LASSERT(list_empty(&aa->aa_oaps));
+
cl_req_completion(env, aa->aa_clerq, rc < 0 ? rc :
req->rq_bulk->bd_nob_transferred);
osc_release_ppga(aa->aa_ppga, aa->aa_page_count);
ptlrpc_lprocfs_brw(req, req->rq_bulk->bd_nob_transferred);
- client_obd_list_lock(&cli->cl_loi_list_lock);
+ spin_lock(&cli->cl_loi_list_lock);
/* We need to decrement before osc_ap_completion->osc_wake_cache_waiters
* is called so we know whether to go to sync BRWs or wait for more
* RPCs to complete
@@ -1824,12 +1839,31 @@ static int brw_interpret(const struct lu_env *env,
else
cli->cl_r_in_flight--;
osc_wake_cache_waiters(cli);
- client_obd_list_unlock(&cli->cl_loi_list_lock);
+ spin_unlock(&cli->cl_loi_list_lock);
osc_io_unplug(env, cli, NULL);
return rc;
}
+static void brw_commit(struct ptlrpc_request *req)
+{
+ spin_lock(&req->rq_lock);
+ /*
+ * If osc_inc_unstable_pages (via osc_extent_finish) races with
+ * this called via the rq_commit_cb, I need to ensure
+ * osc_dec_unstable_pages is still called. Otherwise unstable
+ * pages may be leaked.
+ */
+ if (req->rq_unstable) {
+ spin_unlock(&req->rq_lock);
+ osc_dec_unstable_pages(req);
+ spin_lock(&req->rq_lock);
+ } else {
+ req->rq_committed = 1;
+ }
+ spin_unlock(&req->rq_lock);
+}
+
/**
* Build an RPC by the list of extent @ext_list. The caller must ensure
* that the total pages in this list are NOT over max pages per RPC.
@@ -1920,7 +1954,7 @@ int osc_build_rpc(const struct lu_env *env, struct client_obd *cli,
pga[i] = &oap->oap_brw_page;
pga[i]->off = oap->oap_obj_off + oap->oap_page_off;
CDEBUG(0, "put page %p index %lu oap %p flg %x to pga\n",
- pga[i]->pg, page_index(oap->oap_page), oap,
+ pga[i]->pg, oap->oap_page->index, oap,
pga[i]->flag);
i++;
cl_req_page_add(env, clerq, page);
@@ -1949,6 +1983,7 @@ int osc_build_rpc(const struct lu_env *env, struct client_obd *cli,
goto out;
}
+ req->rq_commit_cb = brw_commit;
req->rq_interpret_reply = brw_interpret;
if (mem_tight != 0)
@@ -1992,7 +2027,7 @@ int osc_build_rpc(const struct lu_env *env, struct client_obd *cli,
if (tmp)
tmp->oap_request = ptlrpc_request_addref(req);
- client_obd_list_lock(&cli->cl_loi_list_lock);
+ spin_lock(&cli->cl_loi_list_lock);
starting_offset >>= PAGE_SHIFT;
if (cmd == OBD_BRW_READ) {
cli->cl_r_in_flight++;
@@ -2007,7 +2042,7 @@ int osc_build_rpc(const struct lu_env *env, struct client_obd *cli,
lprocfs_oh_tally_log2(&cli->cl_write_offset_hist,
starting_offset + 1);
}
- client_obd_list_unlock(&cli->cl_loi_list_lock);
+ spin_unlock(&cli->cl_loi_list_lock);
DEBUG_REQ(D_INODE, req, "%d pages, aa %p. now %dr/%dw in flight",
page_count, aa, cli->cl_r_in_flight,
@@ -2055,14 +2090,12 @@ static int osc_set_lock_data_with_check(struct ldlm_lock *lock,
LASSERT(lock->l_glimpse_ast == einfo->ei_cb_gl);
lock_res_and_lock(lock);
- spin_lock(&osc_ast_guard);
if (!lock->l_ast_data)
lock->l_ast_data = data;
if (lock->l_ast_data == data)
set = 1;
- spin_unlock(&osc_ast_guard);
unlock_res_and_lock(lock);
return set;
@@ -2104,36 +2137,38 @@ static int osc_find_cbdata(struct obd_export *exp, struct lov_stripe_md *lsm,
return rc;
}
-static int osc_enqueue_fini(struct ptlrpc_request *req, struct ost_lvb *lvb,
- obd_enqueue_update_f upcall, void *cookie,
- __u64 *flags, int agl, int rc)
+static int osc_enqueue_fini(struct ptlrpc_request *req,
+ osc_enqueue_upcall_f upcall, void *cookie,
+ struct lustre_handle *lockh, enum ldlm_mode mode,
+ __u64 *flags, int agl, int errcode)
{
- int intent = *flags & LDLM_FL_HAS_INTENT;
-
- if (intent) {
- /* The request was created before ldlm_cli_enqueue call. */
- if (rc == ELDLM_LOCK_ABORTED) {
- struct ldlm_reply *rep;
+ bool intent = *flags & LDLM_FL_HAS_INTENT;
+ int rc;
- rep = req_capsule_server_get(&req->rq_pill,
- &RMF_DLM_REP);
+ /* The request was created before ldlm_cli_enqueue call. */
+ if (intent && errcode == ELDLM_LOCK_ABORTED) {
+ struct ldlm_reply *rep;
- rep->lock_policy_res1 =
- ptlrpc_status_ntoh(rep->lock_policy_res1);
- if (rep->lock_policy_res1)
- rc = rep->lock_policy_res1;
- }
- }
+ rep = req_capsule_server_get(&req->rq_pill, &RMF_DLM_REP);
- if ((intent != 0 && rc == ELDLM_LOCK_ABORTED && agl == 0) ||
- (rc == 0)) {
+ rep->lock_policy_res1 =
+ ptlrpc_status_ntoh(rep->lock_policy_res1);
+ if (rep->lock_policy_res1)
+ errcode = rep->lock_policy_res1;
+ if (!agl)
+ *flags |= LDLM_FL_LVB_READY;
+ } else if (errcode == ELDLM_OK) {
*flags |= LDLM_FL_LVB_READY;
- CDEBUG(D_INODE, "got kms %llu blocks %llu mtime %llu\n",
- lvb->lvb_size, lvb->lvb_blocks, lvb->lvb_mtime);
}
/* Call the update callback. */
- rc = (*upcall)(cookie, rc);
+ rc = (*upcall)(cookie, lockh, errcode);
+ /* release the reference taken in ldlm_cli_enqueue() */
+ if (errcode == ELDLM_LOCK_MATCHED)
+ errcode = ELDLM_OK;
+ if (errcode == ELDLM_OK && lustre_handle_is_used(lockh))
+ ldlm_lock_decref(lockh, mode);
+
return rc;
}
@@ -2142,62 +2177,50 @@ static int osc_enqueue_interpret(const struct lu_env *env,
struct osc_enqueue_args *aa, int rc)
{
struct ldlm_lock *lock;
- struct lustre_handle handle;
- __u32 mode;
- struct ost_lvb *lvb;
- __u32 lvb_len;
- __u64 *flags = aa->oa_flags;
-
- /* Make a local copy of a lock handle and a mode, because aa->oa_*
- * might be freed anytime after lock upcall has been called.
- */
- lustre_handle_copy(&handle, aa->oa_lockh);
- mode = aa->oa_ei->ei_mode;
+ struct lustre_handle *lockh = &aa->oa_lockh;
+ enum ldlm_mode mode = aa->oa_mode;
+ struct ost_lvb *lvb = aa->oa_lvb;
+ __u32 lvb_len = sizeof(*lvb);
+ __u64 flags = 0;
+
/* ldlm_cli_enqueue is holding a reference on the lock, so it must
* be valid.
*/
- lock = ldlm_handle2lock(&handle);
+ lock = ldlm_handle2lock(lockh);
+ LASSERTF(lock, "lockh %llx, req %p, aa %p - client evicted?\n",
+ lockh->cookie, req, aa);
/* Take an additional reference so that a blocking AST that
* ldlm_cli_enqueue_fini() might post for a failed lock, is guaranteed
* to arrive after an upcall has been executed by
* osc_enqueue_fini().
*/
- ldlm_lock_addref(&handle, mode);
+ ldlm_lock_addref(lockh, mode);
+
+ /* Let cl_lock_state_wait fail with -ERESTARTSYS to unuse sublocks. */
+ OBD_FAIL_TIMEOUT(OBD_FAIL_LDLM_ENQUEUE_HANG, 2);
/* Let CP AST to grant the lock first. */
OBD_FAIL_TIMEOUT(OBD_FAIL_OSC_CP_ENQ_RACE, 1);
- if (aa->oa_agl && rc == ELDLM_LOCK_ABORTED) {
- lvb = NULL;
- lvb_len = 0;
- } else {
- lvb = aa->oa_lvb;
- lvb_len = sizeof(*aa->oa_lvb);
+ if (aa->oa_agl) {
+ LASSERT(!aa->oa_lvb);
+ LASSERT(!aa->oa_flags);
+ aa->oa_flags = &flags;
}
/* Complete obtaining the lock procedure. */
- rc = ldlm_cli_enqueue_fini(aa->oa_exp, req, aa->oa_ei->ei_type, 1,
- mode, flags, lvb, lvb_len, &handle, rc);
+ rc = ldlm_cli_enqueue_fini(aa->oa_exp, req, aa->oa_type, 1,
+ aa->oa_mode, aa->oa_flags, lvb, lvb_len,
+ lockh, rc);
/* Complete osc stuff. */
- rc = osc_enqueue_fini(req, aa->oa_lvb, aa->oa_upcall, aa->oa_cookie,
- flags, aa->oa_agl, rc);
+ rc = osc_enqueue_fini(req, aa->oa_upcall, aa->oa_cookie, lockh, mode,
+ aa->oa_flags, aa->oa_agl, rc);
OBD_FAIL_TIMEOUT(OBD_FAIL_OSC_CP_CANCEL_RACE, 10);
- /* Release the lock for async request. */
- if (lustre_handle_is_used(&handle) && rc == ELDLM_OK)
- /*
- * Releases a reference taken by ldlm_cli_enqueue(), if it is
- * not already released by
- * ldlm_cli_enqueue_fini()->failed_lock_cleanup()
- */
- ldlm_lock_decref(&handle, mode);
-
- LASSERTF(lock, "lockh %p, req %p, aa %p - client evicted?\n",
- aa->oa_lockh, req, aa);
- ldlm_lock_decref(&handle, mode);
+ ldlm_lock_decref(lockh, mode);
LDLM_LOCK_PUT(lock);
return rc;
}
@@ -2209,29 +2232,29 @@ struct ptlrpc_request_set *PTLRPCD_SET = (void *)1;
* other synchronous requests, however keeping some locks and trying to obtain
* others may take a considerable amount of time in a case of ost failure; and
* when other sync requests do not get released lock from a client, the client
- * is excluded from the cluster -- such scenarious make the life difficult, so
+ * is evicted from the cluster -- such scenaries make the life difficult, so
* release locks just after they are obtained.
*/
int osc_enqueue_base(struct obd_export *exp, struct ldlm_res_id *res_id,
__u64 *flags, ldlm_policy_data_t *policy,
struct ost_lvb *lvb, int kms_valid,
- obd_enqueue_update_f upcall, void *cookie,
+ osc_enqueue_upcall_f upcall, void *cookie,
struct ldlm_enqueue_info *einfo,
- struct lustre_handle *lockh,
struct ptlrpc_request_set *rqset, int async, int agl)
{
struct obd_device *obd = exp->exp_obd;
+ struct lustre_handle lockh = { 0 };
struct ptlrpc_request *req = NULL;
int intent = *flags & LDLM_FL_HAS_INTENT;
- __u64 match_lvb = (agl != 0 ? 0 : LDLM_FL_LVB_READY);
+ __u64 match_flags = *flags;
enum ldlm_mode mode;
int rc;
/* Filesystem lock extents are extended to page boundaries so that
* dealing with the page cache is a little smoother.
*/
- policy->l_extent.start -= policy->l_extent.start & ~CFS_PAGE_MASK;
- policy->l_extent.end |= ~CFS_PAGE_MASK;
+ policy->l_extent.start -= policy->l_extent.start & ~PAGE_MASK;
+ policy->l_extent.end |= ~PAGE_MASK;
/*
* kms is not valid when either object is completely fresh (so that no
@@ -2258,65 +2281,51 @@ int osc_enqueue_base(struct obd_export *exp, struct ldlm_res_id *res_id,
mode = einfo->ei_mode;
if (einfo->ei_mode == LCK_PR)
mode |= LCK_PW;
- mode = ldlm_lock_match(obd->obd_namespace, *flags | match_lvb, res_id,
- einfo->ei_type, policy, mode, lockh, 0);
+ if (agl == 0)
+ match_flags |= LDLM_FL_LVB_READY;
+ if (intent != 0)
+ match_flags |= LDLM_FL_BLOCK_GRANTED;
+ mode = ldlm_lock_match(obd->obd_namespace, match_flags, res_id,
+ einfo->ei_type, policy, mode, &lockh, 0);
if (mode) {
- struct ldlm_lock *matched = ldlm_handle2lock(lockh);
+ struct ldlm_lock *matched;
- if ((agl != 0) && !(matched->l_flags & LDLM_FL_LVB_READY)) {
- /* For AGL, if enqueue RPC is sent but the lock is not
- * granted, then skip to process this strpe.
- * Return -ECANCELED to tell the caller.
+ if (*flags & LDLM_FL_TEST_LOCK)
+ return ELDLM_OK;
+
+ matched = ldlm_handle2lock(&lockh);
+ if (agl) {
+ /* AGL enqueues DLM locks speculatively. Therefore if
+ * it already exists a DLM lock, it wll just inform the
+ * caller to cancel the AGL process for this stripe.
*/
- ldlm_lock_decref(lockh, mode);
+ ldlm_lock_decref(&lockh, mode);
LDLM_LOCK_PUT(matched);
return -ECANCELED;
- }
-
- if (osc_set_lock_data_with_check(matched, einfo)) {
+ } else if (osc_set_lock_data_with_check(matched, einfo)) {
*flags |= LDLM_FL_LVB_READY;
- /* addref the lock only if not async requests and PW
- * lock is matched whereas we asked for PR.
- */
- if (!rqset && einfo->ei_mode != mode)
- ldlm_lock_addref(lockh, LCK_PR);
- if (intent) {
- /* I would like to be able to ASSERT here that
- * rss <= kms, but I can't, for reasons which
- * are explained in lov_enqueue()
- */
- }
-
- /* We already have a lock, and it's referenced.
- *
- * At this point, the cl_lock::cll_state is CLS_QUEUING,
- * AGL upcall may change it to CLS_HELD directly.
- */
- (*upcall)(cookie, ELDLM_OK);
+ /* We already have a lock, and it's referenced. */
+ (*upcall)(cookie, &lockh, ELDLM_LOCK_MATCHED);
- if (einfo->ei_mode != mode)
- ldlm_lock_decref(lockh, LCK_PW);
- else if (rqset)
- /* For async requests, decref the lock. */
- ldlm_lock_decref(lockh, einfo->ei_mode);
+ ldlm_lock_decref(&lockh, mode);
LDLM_LOCK_PUT(matched);
return ELDLM_OK;
+ } else {
+ ldlm_lock_decref(&lockh, mode);
+ LDLM_LOCK_PUT(matched);
}
-
- ldlm_lock_decref(lockh, mode);
- LDLM_LOCK_PUT(matched);
}
- no_match:
+no_match:
+ if (*flags & LDLM_FL_TEST_LOCK)
+ return -ENOLCK;
if (intent) {
- LIST_HEAD(cancels);
-
req = ptlrpc_request_alloc(class_exp2cliimp(exp),
&RQF_LDLM_ENQUEUE_LVB);
if (!req)
return -ENOMEM;
- rc = ldlm_prep_enqueue_req(exp, req, &cancels, 0);
+ rc = ldlm_prep_enqueue_req(exp, req, NULL, 0);
if (rc) {
ptlrpc_request_free(req);
return rc;
@@ -2331,21 +2340,31 @@ int osc_enqueue_base(struct obd_export *exp, struct ldlm_res_id *res_id,
*flags &= ~LDLM_FL_BLOCK_GRANTED;
rc = ldlm_cli_enqueue(exp, &req, einfo, res_id, policy, flags, lvb,
- sizeof(*lvb), LVB_T_OST, lockh, async);
- if (rqset) {
+ sizeof(*lvb), LVB_T_OST, &lockh, async);
+ if (async) {
if (!rc) {
struct osc_enqueue_args *aa;
- CLASSERT (sizeof(*aa) <= sizeof(req->rq_async_args));
+ CLASSERT(sizeof(*aa) <= sizeof(req->rq_async_args));
aa = ptlrpc_req_async_args(req);
- aa->oa_ei = einfo;
aa->oa_exp = exp;
- aa->oa_flags = flags;
+ aa->oa_mode = einfo->ei_mode;
+ aa->oa_type = einfo->ei_type;
+ lustre_handle_copy(&aa->oa_lockh, &lockh);
aa->oa_upcall = upcall;
aa->oa_cookie = cookie;
- aa->oa_lvb = lvb;
- aa->oa_lockh = lockh;
aa->oa_agl = !!agl;
+ if (!agl) {
+ aa->oa_flags = flags;
+ aa->oa_lvb = lvb;
+ } else {
+ /* AGL is essentially to enqueue an DLM lock
+ * in advance, so we don't care about the
+ * result of AGL enqueue.
+ */
+ aa->oa_lvb = NULL;
+ aa->oa_flags = NULL;
+ }
req->rq_interpret_reply =
(ptlrpc_interpterer_t)osc_enqueue_interpret;
@@ -2359,7 +2378,8 @@ int osc_enqueue_base(struct obd_export *exp, struct ldlm_res_id *res_id,
return rc;
}
- rc = osc_enqueue_fini(req, lvb, upcall, cookie, flags, agl, rc);
+ rc = osc_enqueue_fini(req, upcall, cookie, &lockh, einfo->ei_mode,
+ flags, agl, rc);
if (intent)
ptlrpc_req_finished(req);
@@ -2381,8 +2401,8 @@ int osc_match_base(struct obd_export *exp, struct ldlm_res_id *res_id,
/* Filesystem lock extents are extended to page boundaries so that
* dealing with the page cache is a little smoother
*/
- policy->l_extent.start -= policy->l_extent.start & ~CFS_PAGE_MASK;
- policy->l_extent.end |= ~CFS_PAGE_MASK;
+ policy->l_extent.start -= policy->l_extent.start & ~PAGE_MASK;
+ policy->l_extent.end |= ~PAGE_MASK;
/* Next, search for already existing extent locks that will cover us */
/* If we're trying to read, we also search for an existing PW lock. The
@@ -2493,7 +2513,7 @@ static int osc_statfs_async(struct obd_export *exp,
}
req->rq_interpret_reply = (ptlrpc_interpterer_t)osc_statfs_interpret;
- CLASSERT (sizeof(*aa) <= sizeof(req->rq_async_args));
+ CLASSERT(sizeof(*aa) <= sizeof(req->rq_async_args));
aa = ptlrpc_req_async_args(req);
aa->aa_oi = oinfo;
@@ -2756,7 +2776,8 @@ static int osc_get_info(const struct lu_env *env, struct obd_export *exp,
tmp = req_capsule_client_get(&req->rq_pill, &RMF_SETINFO_KEY);
memcpy(tmp, key, keylen);
- req->rq_no_delay = req->rq_no_resend = 1;
+ req->rq_no_delay = 1;
+ req->rq_no_resend = 1;
ptlrpc_request_set_replen(req);
rc = ptlrpc_queue_wait(req);
if (rc)
@@ -2787,7 +2808,7 @@ out:
goto skip_locking;
policy.l_extent.start = fm_key->fiemap.fm_start &
- CFS_PAGE_MASK;
+ PAGE_MASK;
if (OBD_OBJECT_EOF - fm_key->fiemap.fm_length <=
fm_key->fiemap.fm_start + PAGE_SIZE - 1)
@@ -2795,7 +2816,7 @@ out:
else
policy.l_extent.end = (fm_key->fiemap.fm_start +
fm_key->fiemap.fm_length +
- PAGE_SIZE - 1) & CFS_PAGE_MASK;
+ PAGE_SIZE - 1) & PAGE_MASK;
ostid_build_res_name(&fm_key->oa.o_oi, &res_id);
mode = ldlm_lock_match(exp->exp_obd->obd_namespace,
@@ -2896,7 +2917,7 @@ static int osc_set_info_async(const struct lu_env *env, struct obd_export *exp,
LASSERT(!cli->cl_cache); /* only once */
cli->cl_cache = val;
- atomic_inc(&cli->cl_cache->ccc_users);
+ cl_cache_incref(cli->cl_cache);
cli->cl_lru_left = &cli->cl_cache->ccc_lru_left;
/* add this osc into entity list */
@@ -2913,7 +2934,7 @@ static int osc_set_info_async(const struct lu_env *env, struct obd_export *exp,
int nr = atomic_read(&cli->cl_lru_in_list) >> 1;
int target = *(int *)val;
- nr = osc_lru_shrink(cli, min(nr, target));
+ nr = osc_lru_shrink(env, cli, min(nr, target), true);
*(int *)val -= nr;
return 0;
}
@@ -2992,12 +3013,12 @@ static int osc_reconnect(const struct lu_env *env,
if (data && (data->ocd_connect_flags & OBD_CONNECT_GRANT)) {
long lost_grant;
- client_obd_list_lock(&cli->cl_loi_list_lock);
+ spin_lock(&cli->cl_loi_list_lock);
data->ocd_grant = (cli->cl_avail_grant + cli->cl_dirty) ?:
2 * cli_brw_size(obd);
lost_grant = cli->cl_lost_grant;
cli->cl_lost_grant = 0;
- client_obd_list_unlock(&cli->cl_loi_list_lock);
+ spin_unlock(&cli->cl_loi_list_lock);
CDEBUG(D_RPCTRACE, "ocd_connect_flags: %#llx ocd_version: %d ocd_grant: %d, lost: %ld.\n",
data->ocd_connect_flags,
@@ -3047,10 +3068,10 @@ static int osc_import_event(struct obd_device *obd,
switch (event) {
case IMP_EVENT_DISCON: {
cli = &obd->u.cli;
- client_obd_list_lock(&cli->cl_loi_list_lock);
+ spin_lock(&cli->cl_loi_list_lock);
cli->cl_avail_grant = 0;
cli->cl_lost_grant = 0;
- client_obd_list_unlock(&cli->cl_loi_list_lock);
+ spin_unlock(&cli->cl_loi_list_lock);
break;
}
case IMP_EVENT_INACTIVE: {
@@ -3073,8 +3094,9 @@ static int osc_import_event(struct obd_device *obd,
ldlm_namespace_cleanup(ns, LDLM_FL_LOCAL_ONLY);
cl_env_put(env, &refcheck);
- } else
+ } else {
rc = PTR_ERR(env);
+ }
break;
}
case IMP_EVENT_ACTIVE: {
@@ -3116,20 +3138,14 @@ static int osc_import_event(struct obd_device *obd,
* \retval zero the lock can't be canceled
* \retval other ok to cancel
*/
-static int osc_cancel_for_recovery(struct ldlm_lock *lock)
+static int osc_cancel_weight(struct ldlm_lock *lock)
{
- check_res_locked(lock->l_resource);
-
/*
- * Cancel all unused extent lock in granted mode LCK_PR or LCK_CR.
- *
- * XXX as a future improvement, we can also cancel unused write lock
- * if it doesn't have dirty data and active mmaps.
+ * Cancel all unused and granted extent lock.
*/
if (lock->l_resource->lr_type == LDLM_EXTENT &&
- (lock->l_granted_mode == LCK_PR ||
- lock->l_granted_mode == LCK_CR) &&
- (osc_dlm_lock_pageref(lock) == 0))
+ lock->l_granted_mode == lock->l_req_mode &&
+ osc_ldlm_weigh_ast(lock) == 0)
return 1;
return 0;
@@ -3170,6 +3186,14 @@ int osc_setup(struct obd_device *obd, struct lustre_cfg *lcfg)
}
cli->cl_writeback_work = handler;
+ handler = ptlrpcd_alloc_work(cli->cl_import, lru_queue_work, cli);
+ if (IS_ERR(handler)) {
+ rc = PTR_ERR(handler);
+ goto out_ptlrpcd_work;
+ }
+
+ cli->cl_lru_work = handler;
+
rc = osc_quota_setup(obd);
if (rc)
goto out_ptlrpcd_work;
@@ -3198,11 +3222,18 @@ int osc_setup(struct obd_device *obd, struct lustre_cfg *lcfg)
}
INIT_LIST_HEAD(&cli->cl_grant_shrink_list);
- ns_register_cancel(obd->obd_namespace, osc_cancel_for_recovery);
+ ns_register_cancel(obd->obd_namespace, osc_cancel_weight);
return rc;
out_ptlrpcd_work:
- ptlrpcd_destroy_work(handler);
+ if (cli->cl_writeback_work) {
+ ptlrpcd_destroy_work(cli->cl_writeback_work);
+ cli->cl_writeback_work = NULL;
+ }
+ if (cli->cl_lru_work) {
+ ptlrpcd_destroy_work(cli->cl_lru_work);
+ cli->cl_lru_work = NULL;
+ }
out_client_setup:
client_obd_cleanup(obd);
out_ptlrpcd:
@@ -3241,6 +3272,10 @@ static int osc_precleanup(struct obd_device *obd, enum obd_cleanup_stage stage)
ptlrpcd_destroy_work(cli->cl_writeback_work);
cli->cl_writeback_work = NULL;
}
+ if (cli->cl_lru_work) {
+ ptlrpcd_destroy_work(cli->cl_lru_work);
+ cli->cl_lru_work = NULL;
+ }
obd_cleanup_client_import(obd);
ptlrpc_lprocfs_unregister_obd(obd);
lprocfs_obd_cleanup(obd);
@@ -3262,7 +3297,7 @@ static int osc_cleanup(struct obd_device *obd)
list_del_init(&cli->cl_lru_osc);
spin_unlock(&cli->cl_cache->ccc_lru_lock);
cli->cl_lru_left = NULL;
- atomic_dec(&cli->cl_cache->ccc_users);
+ cl_cache_decref(cli->cl_cache);
cli->cl_cache = NULL;
}
@@ -3330,7 +3365,6 @@ static struct obd_ops osc_obd_ops = {
};
extern struct lu_kmem_descr osc_caches[];
-extern spinlock_t osc_ast_guard;
extern struct lock_class_key osc_ast_guard_class;
static int __init osc_init(void)
@@ -3357,9 +3391,6 @@ static int __init osc_init(void)
if (rc)
goto out_kmem;
- spin_lock_init(&osc_ast_guard);
- lockdep_set_class(&osc_ast_guard, &osc_ast_guard_class);
-
/* This is obviously too much memory, only prevent overflow here */
if (osc_reqpool_mem_max >= 1 << 12 || osc_reqpool_mem_max == 0) {
rc = -EINVAL;
diff --git a/drivers/staging/lustre/lustre/ptlrpc/client.c b/drivers/staging/lustre/lustre/ptlrpc/client.c
index cf3ac8eee9ee..d4463d7c81d2 100644
--- a/drivers/staging/lustre/lustre/ptlrpc/client.c
+++ b/drivers/staging/lustre/lustre/ptlrpc/client.c
@@ -15,11 +15,7 @@
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
*
* GPL HEADER END
*/
@@ -587,35 +583,36 @@ static void __ptlrpc_free_req_to_pool(struct ptlrpc_request *request)
spin_unlock(&pool->prp_lock);
}
-static int __ptlrpc_request_bufs_pack(struct ptlrpc_request *request,
- __u32 version, int opcode,
- int count, __u32 *lengths, char **bufs,
- struct ptlrpc_cli_ctx *ctx)
+int ptlrpc_request_bufs_pack(struct ptlrpc_request *request,
+ __u32 version, int opcode, char **bufs,
+ struct ptlrpc_cli_ctx *ctx)
{
- struct obd_import *imp = request->rq_import;
+ int count;
+ struct obd_import *imp;
+ __u32 *lengths;
int rc;
- if (unlikely(ctx))
+ count = req_capsule_filled_sizes(&request->rq_pill, RCL_CLIENT);
+ imp = request->rq_import;
+ lengths = request->rq_pill.rc_area[RCL_CLIENT];
+
+ if (unlikely(ctx)) {
request->rq_cli_ctx = sptlrpc_cli_ctx_get(ctx);
- else {
+ } else {
rc = sptlrpc_req_get_ctx(request);
if (rc)
goto out_free;
}
-
sptlrpc_req_set_flavor(request, opcode);
rc = lustre_pack_request(request, imp->imp_msg_magic, count,
lengths, bufs);
- if (rc) {
- LASSERT(!request->rq_pool);
+ if (rc)
goto out_ctx;
- }
lustre_msg_add_version(request->rq_reqmsg, version);
request->rq_send_state = LUSTRE_IMP_FULL;
request->rq_type = PTL_RPC_MSG_REQUEST;
- request->rq_export = NULL;
request->rq_req_cbid.cbid_fn = request_out_callback;
request->rq_req_cbid.cbid_arg = request;
@@ -624,6 +621,8 @@ static int __ptlrpc_request_bufs_pack(struct ptlrpc_request *request,
request->rq_reply_cbid.cbid_arg = request;
request->rq_reply_deadline = 0;
+ request->rq_bulk_deadline = 0;
+ request->rq_req_deadline = 0;
request->rq_phase = RQ_PHASE_NEW;
request->rq_next_phase = RQ_PHASE_UNDEFINED;
@@ -632,40 +631,49 @@ static int __ptlrpc_request_bufs_pack(struct ptlrpc_request *request,
ptlrpc_at_set_req_timeout(request);
- spin_lock_init(&request->rq_lock);
- INIT_LIST_HEAD(&request->rq_list);
- INIT_LIST_HEAD(&request->rq_timed_list);
- INIT_LIST_HEAD(&request->rq_replay_list);
- INIT_LIST_HEAD(&request->rq_ctx_chain);
- INIT_LIST_HEAD(&request->rq_set_chain);
- INIT_LIST_HEAD(&request->rq_history_list);
- INIT_LIST_HEAD(&request->rq_exp_list);
- init_waitqueue_head(&request->rq_reply_waitq);
- init_waitqueue_head(&request->rq_set_waitq);
request->rq_xid = ptlrpc_next_xid();
- atomic_set(&request->rq_refcount, 1);
-
lustre_msg_set_opc(request->rq_reqmsg, opcode);
+ /* Let's setup deadline for req/reply/bulk unlink for opcode. */
+ if (cfs_fail_val == opcode) {
+ time_t *fail_t = NULL, *fail2_t = NULL;
+
+ if (CFS_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_BULK_UNLINK)) {
+ fail_t = &request->rq_bulk_deadline;
+ } else if (CFS_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_REPL_UNLINK)) {
+ fail_t = &request->rq_reply_deadline;
+ } else if (CFS_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_REQ_UNLINK)) {
+ fail_t = &request->rq_req_deadline;
+ } else if (CFS_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_BOTH_UNLINK)) {
+ fail_t = &request->rq_reply_deadline;
+ fail2_t = &request->rq_bulk_deadline;
+ }
+
+ if (fail_t) {
+ *fail_t = ktime_get_real_seconds() + LONG_UNLINK;
+
+ if (fail2_t)
+ *fail2_t = ktime_get_real_seconds() +
+ LONG_UNLINK;
+
+ /* The RPC is infected, let the test change the
+ * fail_loc
+ */
+ set_current_state(TASK_UNINTERRUPTIBLE);
+ schedule_timeout(cfs_time_seconds(2));
+ set_current_state(TASK_RUNNING);
+ }
+ }
+
return 0;
+
out_ctx:
+ LASSERT(!request->rq_pool);
sptlrpc_cli_ctx_put(request->rq_cli_ctx, 1);
out_free:
class_import_put(imp);
return rc;
}
-
-int ptlrpc_request_bufs_pack(struct ptlrpc_request *request,
- __u32 version, int opcode, char **bufs,
- struct ptlrpc_cli_ctx *ctx)
-{
- int count;
-
- count = req_capsule_filled_sizes(&request->rq_pill, RCL_CLIENT);
- return __ptlrpc_request_bufs_pack(request, version, opcode, count,
- request->rq_pill.rc_area[RCL_CLIENT],
- bufs, ctx);
-}
EXPORT_SYMBOL(ptlrpc_request_bufs_pack);
/**
@@ -722,7 +730,9 @@ struct ptlrpc_request *__ptlrpc_request_alloc(struct obd_import *imp,
request = ptlrpc_prep_req_from_pool(pool);
if (request) {
- LASSERTF((unsigned long)imp > 0x1000, "%p\n", imp);
+ ptlrpc_cli_req_init(request);
+
+ LASSERTF((unsigned long)imp > 0x1000, "%p", imp);
LASSERT(imp != LP_POISON);
LASSERTF((unsigned long)imp->imp_client > 0x1000, "%p\n",
imp->imp_client);
@@ -1082,7 +1092,6 @@ static int ptlrpc_console_allow(struct ptlrpc_request *req)
*/
if ((lustre_handle_is_used(&req->rq_import->imp_remote_handle)) &&
(opc == OST_CONNECT || opc == MDS_CONNECT || opc == MGS_CONNECT)) {
-
/* Suppress timed out reconnect requests */
if (req->rq_timedout)
return 0;
@@ -1164,9 +1173,9 @@ static int after_reply(struct ptlrpc_request *req)
LASSERT(obd);
/* repbuf must be unlinked */
- LASSERT(!req->rq_receiving_reply && !req->rq_reply_unlink);
+ LASSERT(!req->rq_receiving_reply && req->rq_reply_unlinked);
- if (req->rq_reply_truncate) {
+ if (req->rq_reply_truncated) {
if (ptlrpc_no_resend(req)) {
DEBUG_REQ(D_ERROR, req, "reply buffer overflow, expected: %d, actual size: %d",
req->rq_nob_received, req->rq_repbuf_len);
@@ -1240,8 +1249,9 @@ static int after_reply(struct ptlrpc_request *req)
}
ktime_get_real_ts64(&work_start);
- timediff = (work_start.tv_sec - req->rq_arrival_time.tv_sec) * USEC_PER_SEC +
- (work_start.tv_nsec - req->rq_arrival_time.tv_nsec) / NSEC_PER_USEC;
+ timediff = (work_start.tv_sec - req->rq_sent_tv.tv_sec) * USEC_PER_SEC +
+ (work_start.tv_nsec - req->rq_sent_tv.tv_nsec) /
+ NSEC_PER_USEC;
if (obd->obd_svc_stats) {
lprocfs_counter_add(obd->obd_svc_stats, PTLRPC_REQWAIT_CNTR,
timediff);
@@ -1504,16 +1514,28 @@ int ptlrpc_check_set(const struct lu_env *env, struct ptlrpc_request_set *set)
if (!(req->rq_phase == RQ_PHASE_RPC ||
req->rq_phase == RQ_PHASE_BULK ||
req->rq_phase == RQ_PHASE_INTERPRET ||
- req->rq_phase == RQ_PHASE_UNREGISTERING ||
+ req->rq_phase == RQ_PHASE_UNREG_RPC ||
+ req->rq_phase == RQ_PHASE_UNREG_BULK ||
req->rq_phase == RQ_PHASE_COMPLETE)) {
DEBUG_REQ(D_ERROR, req, "bad phase %x", req->rq_phase);
LBUG();
}
- if (req->rq_phase == RQ_PHASE_UNREGISTERING) {
+ if (req->rq_phase == RQ_PHASE_UNREG_RPC ||
+ req->rq_phase == RQ_PHASE_UNREG_BULK) {
LASSERT(req->rq_next_phase != req->rq_phase);
LASSERT(req->rq_next_phase != RQ_PHASE_UNDEFINED);
+ if (req->rq_req_deadline &&
+ !OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_REQ_UNLINK))
+ req->rq_req_deadline = 0;
+ if (req->rq_reply_deadline &&
+ !OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_REPL_UNLINK))
+ req->rq_reply_deadline = 0;
+ if (req->rq_bulk_deadline &&
+ !OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_BULK_UNLINK))
+ req->rq_bulk_deadline = 0;
+
/*
* Skip processing until reply is unlinked. We
* can't return to pool before that and we can't
@@ -1521,7 +1543,10 @@ int ptlrpc_check_set(const struct lu_env *env, struct ptlrpc_request_set *set)
* sure that all rdma transfers finished and will
* not corrupt any data.
*/
- if (ptlrpc_client_recv_or_unlink(req) ||
+ if (req->rq_phase == RQ_PHASE_UNREG_RPC &&
+ ptlrpc_client_recv_or_unlink(req))
+ continue;
+ if (req->rq_phase == RQ_PHASE_UNREG_BULK &&
ptlrpc_client_bulk_active(req))
continue;
@@ -1999,7 +2024,7 @@ void ptlrpc_interrupted_set(void *data)
list_entry(tmp, struct ptlrpc_request, rq_set_chain);
if (req->rq_phase != RQ_PHASE_RPC &&
- req->rq_phase != RQ_PHASE_UNREGISTERING)
+ req->rq_phase != RQ_PHASE_UNREG_RPC)
continue;
ptlrpc_mark_interrupted(req);
@@ -2087,7 +2112,7 @@ int ptlrpc_set_wait(struct ptlrpc_request_set *set)
CDEBUG(D_RPCTRACE, "set %p going to sleep for %d seconds\n",
set, timeout);
- if (timeout == 0 && !cfs_signal_pending())
+ if (timeout == 0 && !signal_pending(current))
/*
* No requests are in-flight (ether timed out
* or delayed), so we can allow interrupts.
@@ -2114,7 +2139,7 @@ int ptlrpc_set_wait(struct ptlrpc_request_set *set)
* it being ignored forever
*/
if (rc == -ETIMEDOUT && !lwi.lwi_allow_intr &&
- cfs_signal_pending()) {
+ signal_pending(current)) {
sigset_t blocked_sigs =
cfs_block_sigsinv(LUSTRE_FATAL_SIGS);
@@ -2124,7 +2149,7 @@ int ptlrpc_set_wait(struct ptlrpc_request_set *set)
* important signals since ptlrpc set is not easily
* reentrant from userspace again
*/
- if (cfs_signal_pending())
+ if (signal_pending(current))
ptlrpc_interrupted_set(set);
cfs_restore_sigs(blocked_sigs);
}
@@ -2196,11 +2221,11 @@ static void __ptlrpc_free_req(struct ptlrpc_request *request, int locked)
{
if (!request)
return;
+ LASSERT(!request->rq_srv_req);
+ LASSERT(!request->rq_export);
LASSERTF(!request->rq_receiving_reply, "req %p\n", request);
- LASSERTF(!request->rq_rqbd, "req %p\n", request);/* client-side */
LASSERTF(list_empty(&request->rq_list), "req %p\n", request);
LASSERTF(list_empty(&request->rq_set_chain), "req %p\n", request);
- LASSERTF(list_empty(&request->rq_exp_list), "req %p\n", request);
LASSERTF(!request->rq_replay, "req %p\n", request);
req_capsule_fini(&request->rq_pill);
@@ -2226,10 +2251,7 @@ static void __ptlrpc_free_req(struct ptlrpc_request *request, int locked)
if (request->rq_repbuf)
sptlrpc_cli_free_repbuf(request);
- if (request->rq_export) {
- class_export_put(request->rq_export);
- request->rq_export = NULL;
- }
+
if (request->rq_import) {
class_import_put(request->rq_import);
request->rq_import = NULL;
@@ -2314,8 +2336,9 @@ int ptlrpc_unregister_reply(struct ptlrpc_request *request, int async)
/* Let's setup deadline for reply unlink. */
if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_REPL_UNLINK) &&
- async && request->rq_reply_deadline == 0)
- request->rq_reply_deadline = ktime_get_real_seconds()+LONG_UNLINK;
+ async && request->rq_reply_deadline == 0 && cfs_fail_val == 0)
+ request->rq_reply_deadline =
+ ktime_get_real_seconds() + LONG_UNLINK;
/* Nothing left to do. */
if (!ptlrpc_client_recv_or_unlink(request))
@@ -2328,7 +2351,7 @@ int ptlrpc_unregister_reply(struct ptlrpc_request *request, int async)
return 1;
/* Move to "Unregistering" phase as reply was not unlinked yet. */
- ptlrpc_rqphase_move(request, RQ_PHASE_UNREGISTERING);
+ ptlrpc_rqphase_move(request, RQ_PHASE_UNREG_RPC);
/* Do not wait for unlink to finish. */
if (async)
@@ -2360,9 +2383,10 @@ int ptlrpc_unregister_reply(struct ptlrpc_request *request, int async)
LASSERT(rc == -ETIMEDOUT);
DEBUG_REQ(D_WARNING, request,
- "Unexpectedly long timeout rvcng=%d unlnk=%d/%d",
+ "Unexpectedly long timeout receiving_reply=%d req_ulinked=%d reply_unlinked=%d",
request->rq_receiving_reply,
- request->rq_req_unlink, request->rq_reply_unlink);
+ request->rq_req_unlinked,
+ request->rq_reply_unlinked);
}
return 0;
}
@@ -2619,11 +2643,6 @@ int ptlrpc_queue_wait(struct ptlrpc_request *req)
}
EXPORT_SYMBOL(ptlrpc_queue_wait);
-struct ptlrpc_replay_async_args {
- int praa_old_state;
- int praa_old_status;
-};
-
/**
* Callback used for replayed requests reply processing.
* In case of successful reply calls registered request replay callback.
@@ -2962,7 +2981,6 @@ static void ptlrpcd_add_work_req(struct ptlrpc_request *req)
req->rq_timeout = obd_timeout;
req->rq_sent = ktime_get_real_seconds();
req->rq_deadline = req->rq_sent + req->rq_timeout;
- req->rq_reply_deadline = req->rq_deadline;
req->rq_phase = RQ_PHASE_INTERPRET;
req->rq_next_phase = RQ_PHASE_COMPLETE;
req->rq_xid = ptlrpc_next_xid();
@@ -3018,27 +3036,17 @@ void *ptlrpcd_alloc_work(struct obd_import *imp,
return ERR_PTR(-ENOMEM);
}
+ ptlrpc_cli_req_init(req);
+
req->rq_send_state = LUSTRE_IMP_FULL;
req->rq_type = PTL_RPC_MSG_REQUEST;
req->rq_import = class_import_get(imp);
- req->rq_export = NULL;
req->rq_interpret_reply = work_interpreter;
/* don't want reply */
- req->rq_receiving_reply = 0;
- req->rq_req_unlink = req->rq_reply_unlink = 0;
- req->rq_no_delay = req->rq_no_resend = 1;
+ req->rq_no_delay = 1;
+ req->rq_no_resend = 1;
req->rq_pill.rc_fmt = (void *)&worker_format;
- spin_lock_init(&req->rq_lock);
- INIT_LIST_HEAD(&req->rq_list);
- INIT_LIST_HEAD(&req->rq_replay_list);
- INIT_LIST_HEAD(&req->rq_set_chain);
- INIT_LIST_HEAD(&req->rq_history_list);
- INIT_LIST_HEAD(&req->rq_exp_list);
- init_waitqueue_head(&req->rq_reply_waitq);
- init_waitqueue_head(&req->rq_set_waitq);
- atomic_set(&req->rq_refcount, 1);
-
CLASSERT(sizeof(*args) <= sizeof(req->rq_async_args));
args = ptlrpc_req_async_args(req);
args->cb = cb;
diff --git a/drivers/staging/lustre/lustre/ptlrpc/connection.c b/drivers/staging/lustre/lustre/ptlrpc/connection.c
index a14daff3fca0..177a379da9fa 100644
--- a/drivers/staging/lustre/lustre/ptlrpc/connection.c
+++ b/drivers/staging/lustre/lustre/ptlrpc/connection.c
@@ -15,11 +15,7 @@
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
*
* GPL HEADER END
*/
diff --git a/drivers/staging/lustre/lustre/ptlrpc/events.c b/drivers/staging/lustre/lustre/ptlrpc/events.c
index 47be21ac9f10..b1ce72511509 100644
--- a/drivers/staging/lustre/lustre/ptlrpc/events.c
+++ b/drivers/staging/lustre/lustre/ptlrpc/events.c
@@ -15,11 +15,7 @@
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
*
* GPL HEADER END
*/
@@ -55,28 +51,33 @@ void request_out_callback(lnet_event_t *ev)
{
struct ptlrpc_cb_id *cbid = ev->md.user_ptr;
struct ptlrpc_request *req = cbid->cbid_arg;
+ bool wakeup = false;
- LASSERT(ev->type == LNET_EVENT_SEND ||
- ev->type == LNET_EVENT_UNLINK);
+ LASSERT(ev->type == LNET_EVENT_SEND || ev->type == LNET_EVENT_UNLINK);
LASSERT(ev->unlinked);
DEBUG_REQ(D_NET, req, "type %d, status %d", ev->type, ev->status);
sptlrpc_request_out_callback(req);
+
spin_lock(&req->rq_lock);
req->rq_real_sent = ktime_get_real_seconds();
- if (ev->unlinked)
- req->rq_req_unlink = 0;
+ req->rq_req_unlinked = 1;
+ /* reply_in_callback happened before request_out_callback? */
+ if (req->rq_reply_unlinked)
+ wakeup = true;
if (ev->type == LNET_EVENT_UNLINK || ev->status != 0) {
-
/* Failed send: make it seem like the reply timed out, just
* like failing sends in client.c does currently...
*/
-
req->rq_net_err = 1;
- ptlrpc_client_wake_req(req);
+ wakeup = true;
}
+
+ if (wakeup)
+ ptlrpc_client_wake_req(req);
+
spin_unlock(&req->rq_lock);
ptlrpc_req_finished(req);
@@ -105,7 +106,7 @@ void reply_in_callback(lnet_event_t *ev)
req->rq_receiving_reply = 0;
req->rq_early = 0;
if (ev->unlinked)
- req->rq_reply_unlink = 0;
+ req->rq_reply_unlinked = 1;
if (ev->status)
goto out_wake;
@@ -119,7 +120,7 @@ void reply_in_callback(lnet_event_t *ev)
if (ev->mlength < ev->rlength) {
CDEBUG(D_RPCTRACE, "truncate req %p rpc %d - %d+%d\n", req,
req->rq_replen, ev->rlength, ev->offset);
- req->rq_reply_truncate = 1;
+ req->rq_reply_truncated = 1;
req->rq_replied = 1;
req->rq_status = -EOVERFLOW;
req->rq_nob_received = ev->rlength + ev->offset;
@@ -136,7 +137,8 @@ void reply_in_callback(lnet_event_t *ev)
req->rq_early_count++; /* number received, client side */
- if (req->rq_replied) /* already got the real reply */
+ /* already got the real reply or buffers are already unlinked */
+ if (req->rq_replied || req->rq_reply_unlinked == 1)
goto out_wake;
req->rq_early = 1;
@@ -329,6 +331,7 @@ void request_in_callback(lnet_event_t *ev)
}
}
+ ptlrpc_srv_req_init(req);
/* NB we ABSOLUTELY RELY on req being zeroed, so pointers are NULL,
* flags are reset and scalars are zero. We only set the message
* size to non-zero if this was a successful receive.
@@ -342,10 +345,6 @@ void request_in_callback(lnet_event_t *ev)
req->rq_self = ev->target.nid;
req->rq_rqbd = rqbd;
req->rq_phase = RQ_PHASE_NEW;
- spin_lock_init(&req->rq_lock);
- INIT_LIST_HEAD(&req->rq_timed_list);
- INIT_LIST_HEAD(&req->rq_exp_list);
- atomic_set(&req->rq_refcount, 1);
if (ev->type == LNET_EVENT_PUT)
CDEBUG(D_INFO, "incoming req@%p x%llu msgsize %u\n",
req, req->rq_xid, ev->mlength);
diff --git a/drivers/staging/lustre/lustre/ptlrpc/import.c b/drivers/staging/lustre/lustre/ptlrpc/import.c
index cd94fed0ffdf..3292e6ea0102 100644
--- a/drivers/staging/lustre/lustre/ptlrpc/import.c
+++ b/drivers/staging/lustre/lustre/ptlrpc/import.c
@@ -15,11 +15,7 @@
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
*
* GPL HEADER END
*/
@@ -360,9 +356,8 @@ void ptlrpc_invalidate_import(struct obd_import *imp)
"still on delayed list");
}
- CERROR("%s: RPCs in \"%s\" phase found (%d). Network is sluggish? Waiting them to error out.\n",
+ CERROR("%s: Unregistering RPCs found (%d). Network is sluggish? Waiting them to error out.\n",
cli_tgt,
- ptlrpc_phase2str(RQ_PHASE_UNREGISTERING),
atomic_read(&imp->
imp_unregistering));
}
@@ -698,7 +693,8 @@ int ptlrpc_connect_import(struct obd_import *imp)
lustre_msg_add_op_flags(request->rq_reqmsg, MSG_CONNECT_NEXT_VER);
- request->rq_no_resend = request->rq_no_delay = 1;
+ request->rq_no_resend = 1;
+ request->rq_no_delay = 1;
request->rq_send_state = LUSTRE_IMP_CONNECTING;
/* Allow a slightly larger reply for future growth compatibility */
req_capsule_set_size(&request->rq_pill, &RMF_CONNECT_DATA, RCL_SERVER,
@@ -1001,6 +997,7 @@ finish:
return 0;
}
} else {
+ static bool warned;
spin_lock(&imp->imp_lock);
list_del(&imp->imp_conn_current->oic_item);
@@ -1021,7 +1018,7 @@ finish:
goto out;
}
- if ((ocd->ocd_connect_flags & OBD_CONNECT_VERSION) &&
+ if (!warned && (ocd->ocd_connect_flags & OBD_CONNECT_VERSION) &&
(ocd->ocd_version > LUSTRE_VERSION_CODE +
LUSTRE_VERSION_OFFSET_WARN ||
ocd->ocd_version < LUSTRE_VERSION_CODE -
@@ -1029,10 +1026,8 @@ finish:
/* Sigh, some compilers do not like #ifdef in the middle
* of macro arguments
*/
- const char *older = "older. Consider upgrading server or downgrading client"
- ;
- const char *newer = "newer than client version. Consider upgrading client"
- ;
+ const char *older = "older than client. Consider upgrading server";
+ const char *newer = "newer than client. Consider recompiling application";
LCONSOLE_WARN("Server %s version (%d.%d.%d.%d) is much %s (%s)\n",
obd2cli_tgt(imp->imp_obd),
@@ -1042,6 +1037,7 @@ finish:
OBD_OCD_VERSION_FIX(ocd->ocd_version),
ocd->ocd_version > LUSTRE_VERSION_CODE ?
newer : older, LUSTRE_VERSION_STRING);
+ warned = true;
}
#if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(3, 2, 50, 0)
@@ -1370,7 +1366,6 @@ int ptlrpc_import_recovery_state_machine(struct obd_import *imp)
if (rc)
goto out;
}
-
}
if (imp->imp_state == LUSTRE_IMP_REPLAY_WAIT) {
@@ -1453,7 +1448,6 @@ int ptlrpc_disconnect_import(struct obd_import *imp, int noclose)
back_to_sleep, LWI_ON_SIGNAL_NOOP, NULL);
rc = l_wait_event(imp->imp_recovery_waitq,
!ptlrpc_import_in_recovery(imp), &lwi);
-
}
spin_lock(&imp->imp_lock);
diff --git a/drivers/staging/lustre/lustre/ptlrpc/layout.c b/drivers/staging/lustre/lustre/ptlrpc/layout.c
index 5b06901e5729..ab5d85174245 100644
--- a/drivers/staging/lustre/lustre/ptlrpc/layout.c
+++ b/drivers/staging/lustre/lustre/ptlrpc/layout.c
@@ -15,11 +15,7 @@
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
*
* GPL HEADER END
*/
@@ -160,6 +156,16 @@ static const struct req_msg_field *fld_query_server[] = {
&RMF_FLD_MDFLD
};
+static const struct req_msg_field *fld_read_client[] = {
+ &RMF_PTLRPC_BODY,
+ &RMF_FLD_MDFLD
+};
+
+static const struct req_msg_field *fld_read_server[] = {
+ &RMF_PTLRPC_BODY,
+ &RMF_GENERIC_DATA
+};
+
static const struct req_msg_field *mds_getattr_name_client[] = {
&RMF_PTLRPC_BODY,
&RMF_MDT_BODY,
@@ -188,7 +194,7 @@ static const struct req_msg_field *mds_reint_create_slave_client[] = {
&RMF_DLM_REQ
};
-static const struct req_msg_field *mds_reint_create_rmt_acl_client[] = {
+static const struct req_msg_field *mds_reint_create_acl_client[] = {
&RMF_PTLRPC_BODY,
&RMF_REC_REINT,
&RMF_CAPA1,
@@ -566,7 +572,7 @@ static const struct req_msg_field *ost_get_info_generic_server[] = {
static const struct req_msg_field *ost_get_info_generic_client[] = {
&RMF_PTLRPC_BODY,
- &RMF_SETINFO_KEY
+ &RMF_GETINFO_KEY
};
static const struct req_msg_field *ost_get_last_id_server[] = {
@@ -574,6 +580,12 @@ static const struct req_msg_field *ost_get_last_id_server[] = {
&RMF_OBD_ID
};
+static const struct req_msg_field *ost_get_last_fid_client[] = {
+ &RMF_PTLRPC_BODY,
+ &RMF_GETINFO_KEY,
+ &RMF_FID,
+};
+
static const struct req_msg_field *ost_get_last_fid_server[] = {
&RMF_PTLRPC_BODY,
&RMF_FID,
@@ -643,6 +655,7 @@ static struct req_format *req_formats[] = {
&RQF_MGS_CONFIG_READ,
&RQF_SEQ_QUERY,
&RQF_FLD_QUERY,
+ &RQF_FLD_READ,
&RQF_MDS_CONNECT,
&RQF_MDS_DISCONNECT,
&RQF_MDS_GET_INFO,
@@ -662,7 +675,7 @@ static struct req_format *req_formats[] = {
&RQF_MDS_DONE_WRITING,
&RQF_MDS_REINT,
&RQF_MDS_REINT_CREATE,
- &RQF_MDS_REINT_CREATE_RMT_ACL,
+ &RQF_MDS_REINT_CREATE_ACL,
&RQF_MDS_REINT_CREATE_SLAVE,
&RQF_MDS_REINT_CREATE_SYM,
&RQF_MDS_REINT_OPEN,
@@ -696,7 +709,7 @@ static struct req_format *req_formats[] = {
&RQF_OST_BRW_WRITE,
&RQF_OST_STATFS,
&RQF_OST_SET_GRANT_INFO,
- &RQF_OST_GET_INFO_GENERIC,
+ &RQF_OST_GET_INFO,
&RQF_OST_GET_INFO_LAST_ID,
&RQF_OST_GET_INFO_LAST_FID,
&RQF_OST_SET_INFO_LAST_FID,
@@ -1162,6 +1175,10 @@ struct req_format RQF_FLD_QUERY =
DEFINE_REQ_FMT0("FLD_QUERY", fld_query_client, fld_query_server);
EXPORT_SYMBOL(RQF_FLD_QUERY);
+struct req_format RQF_FLD_READ =
+ DEFINE_REQ_FMT0("FLD_READ", fld_read_client, fld_read_server);
+EXPORT_SYMBOL(RQF_FLD_READ);
+
struct req_format RQF_LOG_CANCEL =
DEFINE_REQ_FMT0("OBD_LOG_CANCEL", log_cancel_client, empty);
EXPORT_SYMBOL(RQF_LOG_CANCEL);
@@ -1221,10 +1238,10 @@ struct req_format RQF_MDS_REINT_CREATE =
mds_reint_create_client, mdt_body_capa);
EXPORT_SYMBOL(RQF_MDS_REINT_CREATE);
-struct req_format RQF_MDS_REINT_CREATE_RMT_ACL =
- DEFINE_REQ_FMT0("MDS_REINT_CREATE_RMT_ACL",
- mds_reint_create_rmt_acl_client, mdt_body_capa);
-EXPORT_SYMBOL(RQF_MDS_REINT_CREATE_RMT_ACL);
+struct req_format RQF_MDS_REINT_CREATE_ACL =
+ DEFINE_REQ_FMT0("MDS_REINT_CREATE_ACL",
+ mds_reint_create_acl_client, mdt_body_capa);
+EXPORT_SYMBOL(RQF_MDS_REINT_CREATE_ACL);
struct req_format RQF_MDS_REINT_CREATE_SLAVE =
DEFINE_REQ_FMT0("MDS_REINT_CREATE_EA",
@@ -1519,10 +1536,10 @@ struct req_format RQF_OST_SET_GRANT_INFO =
ost_body_only);
EXPORT_SYMBOL(RQF_OST_SET_GRANT_INFO);
-struct req_format RQF_OST_GET_INFO_GENERIC =
+struct req_format RQF_OST_GET_INFO =
DEFINE_REQ_FMT0("OST_GET_INFO", ost_get_info_generic_client,
ost_get_info_generic_server);
-EXPORT_SYMBOL(RQF_OST_GET_INFO_GENERIC);
+EXPORT_SYMBOL(RQF_OST_GET_INFO);
struct req_format RQF_OST_GET_INFO_LAST_ID =
DEFINE_REQ_FMT0("OST_GET_INFO_LAST_ID", ost_get_info_generic_client,
@@ -1530,7 +1547,7 @@ struct req_format RQF_OST_GET_INFO_LAST_ID =
EXPORT_SYMBOL(RQF_OST_GET_INFO_LAST_ID);
struct req_format RQF_OST_GET_INFO_LAST_FID =
- DEFINE_REQ_FMT0("OST_GET_INFO_LAST_FID", obd_set_info_client,
+ DEFINE_REQ_FMT0("OST_GET_INFO_LAST_FID", ost_get_last_fid_client,
ost_get_last_fid_server);
EXPORT_SYMBOL(RQF_OST_GET_INFO_LAST_FID);
diff --git a/drivers/staging/lustre/lustre/ptlrpc/llog_client.c b/drivers/staging/lustre/lustre/ptlrpc/llog_client.c
index a23ac5f9ae96..0f55c01feba8 100644
--- a/drivers/staging/lustre/lustre/ptlrpc/llog_client.c
+++ b/drivers/staging/lustre/lustre/ptlrpc/llog_client.c
@@ -15,11 +15,7 @@
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
*
* GPL HEADER END
*/
diff --git a/drivers/staging/lustre/lustre/ptlrpc/llog_net.c b/drivers/staging/lustre/lustre/ptlrpc/llog_net.c
index fbccb62213b5..bccdace7e51f 100644
--- a/drivers/staging/lustre/lustre/ptlrpc/llog_net.c
+++ b/drivers/staging/lustre/lustre/ptlrpc/llog_net.c
@@ -15,11 +15,7 @@
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
*
* GPL HEADER END
*/
diff --git a/drivers/staging/lustre/lustre/ptlrpc/lproc_ptlrpc.c b/drivers/staging/lustre/lustre/ptlrpc/lproc_ptlrpc.c
index c95a91ce26c9..bc93b75744e1 100644
--- a/drivers/staging/lustre/lustre/ptlrpc/lproc_ptlrpc.c
+++ b/drivers/staging/lustre/lustre/ptlrpc/lproc_ptlrpc.c
@@ -15,11 +15,7 @@
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
*
* GPL HEADER END
*/
@@ -131,6 +127,7 @@ static struct ll_rpc_opcode {
{ SEC_CTX_INIT_CONT, "sec_ctx_init_cont" },
{ SEC_CTX_FINI, "sec_ctx_fini" },
{ FLD_QUERY, "fld_query" },
+ { FLD_READ, "fld_read" },
};
static struct ll_eopcode {
@@ -679,11 +676,11 @@ static ssize_t ptlrpc_lprocfs_nrs_seq_write(struct file *file,
/**
* The second token is either NULL, or an optional [reg|hp] string
*/
- if (strcmp(cmd, "reg") == 0)
+ if (strcmp(cmd, "reg") == 0) {
queue = PTLRPC_NRS_QUEUE_REG;
- else if (strcmp(cmd, "hp") == 0)
+ } else if (strcmp(cmd, "hp") == 0) {
queue = PTLRPC_NRS_QUEUE_HP;
- else {
+ } else {
rc = -EINVAL;
goto out;
}
@@ -693,8 +690,9 @@ default_queue:
if (queue == PTLRPC_NRS_QUEUE_HP && !nrs_svc_has_hp(svc)) {
rc = -ENODEV;
goto out;
- } else if (queue == PTLRPC_NRS_QUEUE_BOTH && !nrs_svc_has_hp(svc))
+ } else if (queue == PTLRPC_NRS_QUEUE_BOTH && !nrs_svc_has_hp(svc)) {
queue = PTLRPC_NRS_QUEUE_REG;
+ }
/**
* Serialize NRS core lprocfs operations with policy registration/
@@ -870,7 +868,8 @@ ptlrpc_lprocfs_svc_req_history_next(struct seq_file *s,
if (i > srhi->srhi_idx) { /* reset iterator for a new CPT */
srhi->srhi_req = NULL;
- seq = srhi->srhi_seq = 0;
+ seq = 0;
+ srhi->srhi_seq = 0;
} else { /* the next sequence */
seq = srhi->srhi_seq + (1 << svc->srv_cpt_bits);
}
@@ -1159,7 +1158,6 @@ void ptlrpc_lprocfs_brw(struct ptlrpc_request *req, int bytes)
lprocfs_counter_add(svc_stats, idx, bytes);
}
-
EXPORT_SYMBOL(ptlrpc_lprocfs_brw);
void ptlrpc_lprocfs_unregister_service(struct ptlrpc_service *svc)
@@ -1320,6 +1318,5 @@ int lprocfs_wr_pinger_recov(struct file *file, const char __user *buffer,
up_read(&obd->u.cli.cl_sem);
return count;
-
}
EXPORT_SYMBOL(lprocfs_wr_pinger_recov);
diff --git a/drivers/staging/lustre/lustre/ptlrpc/niobuf.c b/drivers/staging/lustre/lustre/ptlrpc/niobuf.c
index 10b8fe82a342..11ec82545347 100644
--- a/drivers/staging/lustre/lustre/ptlrpc/niobuf.c
+++ b/drivers/staging/lustre/lustre/ptlrpc/niobuf.c
@@ -15,11 +15,7 @@
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
*
* GPL HEADER END
*/
@@ -251,7 +247,7 @@ int ptlrpc_unregister_bulk(struct ptlrpc_request *req, int async)
/* Let's setup deadline for reply unlink. */
if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_BULK_UNLINK) &&
- async && req->rq_bulk_deadline == 0)
+ async && req->rq_bulk_deadline == 0 && cfs_fail_val == 0)
req->rq_bulk_deadline = ktime_get_real_seconds() + LONG_UNLINK;
if (ptlrpc_client_bulk_active(req) == 0) /* completed or */
@@ -270,7 +266,7 @@ int ptlrpc_unregister_bulk(struct ptlrpc_request *req, int async)
return 1; /* never registered */
/* Move to "Unregistering" phase as bulk was not unlinked yet. */
- ptlrpc_rqphase_move(req, RQ_PHASE_UNREGISTERING);
+ ptlrpc_rqphase_move(req, RQ_PHASE_UNREG_BULK);
/* Do not wait for unlink to finish. */
if (async)
@@ -581,19 +577,18 @@ int ptl_send_rpc(struct ptlrpc_request *request, int noreply)
}
spin_lock(&request->rq_lock);
- /* If the MD attach succeeds, there _will_ be a reply_in callback */
- request->rq_receiving_reply = !noreply;
- request->rq_req_unlink = 1;
/* We are responsible for unlinking the reply buffer */
- request->rq_reply_unlink = !noreply;
+ request->rq_reply_unlinked = noreply;
+ request->rq_receiving_reply = !noreply;
/* Clear any flags that may be present from previous sends. */
+ request->rq_req_unlinked = 0;
request->rq_replied = 0;
request->rq_err = 0;
request->rq_timedout = 0;
request->rq_net_err = 0;
request->rq_resend = 0;
request->rq_restart = 0;
- request->rq_reply_truncate = 0;
+ request->rq_reply_truncated = 0;
spin_unlock(&request->rq_lock);
if (!noreply) {
@@ -608,7 +603,7 @@ int ptl_send_rpc(struct ptlrpc_request *request, int noreply)
reply_md.user_ptr = &request->rq_reply_cbid;
reply_md.eq_handle = ptlrpc_eq_h;
- /* We must see the unlink callback to unset rq_reply_unlink,
+ /* We must see the unlink callback to set rq_reply_unlinked,
* so we can't auto-unlink
*/
rc = LNetMDAttach(reply_me_h, reply_md, LNET_RETAIN,
@@ -637,7 +632,7 @@ int ptl_send_rpc(struct ptlrpc_request *request, int noreply)
OBD_FAIL_TIMEOUT(OBD_FAIL_PTLRPC_DELAY_SEND, request->rq_timeout + 5);
- ktime_get_real_ts64(&request->rq_arrival_time);
+ ktime_get_real_ts64(&request->rq_sent_tv);
request->rq_sent = ktime_get_real_seconds();
/* We give the server rq_timeout secs to process the req, and
* add the network latency for our local timeout.
@@ -655,9 +650,10 @@ int ptl_send_rpc(struct ptlrpc_request *request, int noreply)
connection,
request->rq_request_portal,
request->rq_xid, 0);
- if (rc == 0)
+ if (likely(rc == 0))
goto out;
+ request->rq_req_unlinked = 1;
ptlrpc_req_finished(request);
if (noreply)
goto out;
diff --git a/drivers/staging/lustre/lustre/ptlrpc/nrs.c b/drivers/staging/lustre/lustre/ptlrpc/nrs.c
index 710fb806f122..d88faf61e740 100644
--- a/drivers/staging/lustre/lustre/ptlrpc/nrs.c
+++ b/drivers/staging/lustre/lustre/ptlrpc/nrs.c
@@ -769,7 +769,7 @@ static int nrs_policy_register(struct ptlrpc_nrs *nrs,
spin_unlock(&nrs->nrs_lock);
if (rc != 0)
- (void) nrs_policy_unregister(nrs, policy->pol_desc->pd_name);
+ (void)nrs_policy_unregister(nrs, policy->pol_desc->pd_name);
return rc;
}
@@ -975,7 +975,11 @@ static void nrs_svcpt_cleanup_locked(struct ptlrpc_service_part *svcpt)
LASSERT(mutex_is_locked(&nrs_core.nrs_mutex));
again:
- nrs = nrs_svcpt2nrs(svcpt, hp);
+ /* scp_nrs_hp could be NULL due to short of memory. */
+ nrs = hp ? svcpt->scp_nrs_hp : &svcpt->scp_nrs_reg;
+ /* check the nrs_svcpt to see if nrs is initialized. */
+ if (!nrs || !nrs->nrs_svcpt)
+ return;
nrs->nrs_stopping = 1;
list_for_each_entry_safe(policy, tmp, &nrs->nrs_policy_list, pol_list) {
@@ -1038,7 +1042,6 @@ static int nrs_policy_unregister_locked(struct ptlrpc_nrs_pol_desc *desc)
LASSERT(mutex_is_locked(&ptlrpc_all_services_mutex));
list_for_each_entry(svc, &ptlrpc_all_services, srv_list) {
-
if (!nrs_policy_compatible(svc, desc) ||
unlikely(svc->srv_is_stopping))
continue;
diff --git a/drivers/staging/lustre/lustre/ptlrpc/pack_generic.c b/drivers/staging/lustre/lustre/ptlrpc/pack_generic.c
index 492d63fad6f9..b514f18fae50 100644
--- a/drivers/staging/lustre/lustre/ptlrpc/pack_generic.c
+++ b/drivers/staging/lustre/lustre/ptlrpc/pack_generic.c
@@ -15,11 +15,7 @@
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
*
* GPL HEADER END
*/
@@ -1160,7 +1156,6 @@ __u32 lustre_msg_get_timeout(struct lustre_msg *msg)
if (!pb) {
CERROR("invalid msg %p: no ptlrpc body!\n", msg);
return 0;
-
}
return pb->pb_timeout;
}
@@ -1179,7 +1174,6 @@ __u32 lustre_msg_get_service_time(struct lustre_msg *msg)
if (!pb) {
CERROR("invalid msg %p: no ptlrpc body!\n", msg);
return 0;
-
}
return pb->pb_service_time;
}
@@ -1572,7 +1566,6 @@ static void lustre_swab_obdo(struct obdo *o)
CLASSERT(offsetof(typeof(*o), o_padding_4) != 0);
CLASSERT(offsetof(typeof(*o), o_padding_5) != 0);
CLASSERT(offsetof(typeof(*o), o_padding_6) != 0);
-
}
void lustre_swab_obd_statfs(struct obd_statfs *os)
@@ -1809,19 +1802,6 @@ void lustre_swab_obd_quotactl(struct obd_quotactl *q)
}
EXPORT_SYMBOL(lustre_swab_obd_quotactl);
-void lustre_swab_mdt_remote_perm(struct mdt_remote_perm *p)
-{
- __swab32s(&p->rp_uid);
- __swab32s(&p->rp_gid);
- __swab32s(&p->rp_fsuid);
- __swab32s(&p->rp_fsuid_h);
- __swab32s(&p->rp_fsgid);
- __swab32s(&p->rp_fsgid_h);
- __swab32s(&p->rp_access_perm);
- __swab32s(&p->rp_padding);
-};
-EXPORT_SYMBOL(lustre_swab_mdt_remote_perm);
-
void lustre_swab_fid2path(struct getinfo_fid2path *gf)
{
lustre_swab_lu_fid(&gf->gf_fid);
diff --git a/drivers/staging/lustre/lustre/ptlrpc/pers.c b/drivers/staging/lustre/lustre/ptlrpc/pers.c
index ec3af109a1d7..6c820e944171 100644
--- a/drivers/staging/lustre/lustre/ptlrpc/pers.c
+++ b/drivers/staging/lustre/lustre/ptlrpc/pers.c
@@ -15,11 +15,7 @@
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
*
* GPL HEADER END
*/
diff --git a/drivers/staging/lustre/lustre/ptlrpc/pinger.c b/drivers/staging/lustre/lustre/ptlrpc/pinger.c
index 8a869315c258..c0529d808d81 100644
--- a/drivers/staging/lustre/lustre/ptlrpc/pinger.c
+++ b/drivers/staging/lustre/lustre/ptlrpc/pinger.c
@@ -15,11 +15,7 @@
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
*
* GPL HEADER END
*/
@@ -57,7 +53,8 @@ ptlrpc_prep_ping(struct obd_import *imp)
LUSTRE_OBD_VERSION, OBD_PING);
if (req) {
ptlrpc_request_set_replen(req);
- req->rq_no_resend = req->rq_no_delay = 1;
+ req->rq_no_resend = 1;
+ req->rq_no_delay = 1;
}
return req;
}
diff --git a/drivers/staging/lustre/lustre/ptlrpc/ptlrpc_internal.h b/drivers/staging/lustre/lustre/ptlrpc/ptlrpc_internal.h
index 6ca26c98de1b..a9831fab80f3 100644
--- a/drivers/staging/lustre/lustre/ptlrpc/ptlrpc_internal.h
+++ b/drivers/staging/lustre/lustre/ptlrpc/ptlrpc_internal.h
@@ -15,11 +15,7 @@
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
*
* GPL HEADER END
*/
@@ -292,4 +288,47 @@ static inline void ptlrpc_reqset_put(struct ptlrpc_request_set *set)
if (atomic_dec_and_test(&set->set_refcount))
kfree(set);
}
+
+/** initialise ptlrpc common fields */
+static inline void ptlrpc_req_comm_init(struct ptlrpc_request *req)
+{
+ spin_lock_init(&req->rq_lock);
+ atomic_set(&req->rq_refcount, 1);
+ INIT_LIST_HEAD(&req->rq_list);
+ INIT_LIST_HEAD(&req->rq_replay_list);
+}
+
+/** initialise client side ptlrpc request */
+static inline void ptlrpc_cli_req_init(struct ptlrpc_request *req)
+{
+ struct ptlrpc_cli_req *cr = &req->rq_cli;
+
+ ptlrpc_req_comm_init(req);
+
+ req->rq_receiving_reply = 0;
+ req->rq_req_unlinked = 1;
+ req->rq_reply_unlinked = 1;
+
+ req->rq_receiving_reply = 0;
+ req->rq_req_unlinked = 1;
+ req->rq_reply_unlinked = 1;
+
+ INIT_LIST_HEAD(&cr->cr_set_chain);
+ INIT_LIST_HEAD(&cr->cr_ctx_chain);
+ init_waitqueue_head(&cr->cr_reply_waitq);
+ init_waitqueue_head(&cr->cr_set_waitq);
+}
+
+/** initialise server side ptlrpc request */
+static inline void ptlrpc_srv_req_init(struct ptlrpc_request *req)
+{
+ struct ptlrpc_srv_req *sr = &req->rq_srv;
+
+ ptlrpc_req_comm_init(req);
+ req->rq_srv_req = 1;
+ INIT_LIST_HEAD(&sr->sr_exp_list);
+ INIT_LIST_HEAD(&sr->sr_timed_list);
+ INIT_LIST_HEAD(&sr->sr_hist_list);
+}
+
#endif /* PTLRPC_INTERNAL_H */
diff --git a/drivers/staging/lustre/lustre/ptlrpc/ptlrpc_module.c b/drivers/staging/lustre/lustre/ptlrpc/ptlrpc_module.c
index a8ec0e9d7b2e..a70d5843f30e 100644
--- a/drivers/staging/lustre/lustre/ptlrpc/ptlrpc_module.c
+++ b/drivers/staging/lustre/lustre/ptlrpc/ptlrpc_module.c
@@ -15,11 +15,7 @@
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
*
* GPL HEADER END
*/
diff --git a/drivers/staging/lustre/lustre/ptlrpc/ptlrpcd.c b/drivers/staging/lustre/lustre/ptlrpc/ptlrpcd.c
index db003f5da09e..0a374b6c2f71 100644
--- a/drivers/staging/lustre/lustre/ptlrpc/ptlrpcd.c
+++ b/drivers/staging/lustre/lustre/ptlrpc/ptlrpcd.c
@@ -15,11 +15,7 @@
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
*
* GPL HEADER END
*/
@@ -161,9 +157,9 @@ static int ptlrpcd_users;
void ptlrpcd_wake(struct ptlrpc_request *req)
{
- struct ptlrpc_request_set *rq_set = req->rq_set;
+ struct ptlrpc_request_set *set = req->rq_set;
- wake_up(&rq_set->set_waitq);
+ wake_up(&set->set_waitq);
}
EXPORT_SYMBOL(ptlrpcd_wake);
@@ -387,7 +383,8 @@ static int ptlrpcd(void *arg)
{
struct ptlrpcd_ctl *pc = arg;
struct ptlrpc_request_set *set;
- struct lu_env env = { .le_ses = NULL };
+ struct lu_context ses = { 0 };
+ struct lu_env env = { .le_ses = &ses };
int rc = 0;
int exit = 0;
@@ -416,6 +413,13 @@ static int ptlrpcd(void *arg)
*/
rc = lu_context_init(&env.le_ctx,
LCT_CL_THREAD|LCT_REMEMBER|LCT_NOREF);
+ if (rc == 0) {
+ rc = lu_context_init(env.le_ses,
+ LCT_SESSION | LCT_REMEMBER | LCT_NOREF);
+ if (rc != 0)
+ lu_context_fini(&env.le_ctx);
+ }
+
if (rc != 0)
goto failed;
@@ -436,9 +440,10 @@ static int ptlrpcd(void *arg)
ptlrpc_expired_set, set);
lu_context_enter(&env.le_ctx);
- l_wait_event(set->set_waitq,
- ptlrpcd_check(&env, pc), &lwi);
+ lu_context_enter(env.le_ses);
+ l_wait_event(set->set_waitq, ptlrpcd_check(&env, pc), &lwi);
lu_context_exit(&env.le_ctx);
+ lu_context_exit(env.le_ses);
/*
* Abort inflight rpcs for forced stop case.
@@ -461,6 +466,7 @@ static int ptlrpcd(void *arg)
if (!list_empty(&set->set_requests))
ptlrpc_set_wait(set);
lu_context_fini(&env.le_ctx);
+ lu_context_fini(env.le_ses);
complete(&pc->pc_finishing);
@@ -899,8 +905,11 @@ int ptlrpcd_addref(void)
int rc = 0;
mutex_lock(&ptlrpcd_mutex);
- if (++ptlrpcd_users == 1)
+ if (++ptlrpcd_users == 1) {
rc = ptlrpcd_init();
+ if (rc < 0)
+ ptlrpcd_users--;
+ }
mutex_unlock(&ptlrpcd_mutex);
return rc;
}
diff --git a/drivers/staging/lustre/lustre/ptlrpc/recover.c b/drivers/staging/lustre/lustre/ptlrpc/recover.c
index 30d9a164e52d..718b3a8d61c6 100644
--- a/drivers/staging/lustre/lustre/ptlrpc/recover.c
+++ b/drivers/staging/lustre/lustre/ptlrpc/recover.c
@@ -15,11 +15,7 @@
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
*
* GPL HEADER END
*/
diff --git a/drivers/staging/lustre/lustre/ptlrpc/sec.c b/drivers/staging/lustre/lustre/ptlrpc/sec.c
index 187fd1d6898c..dbd819fa6b75 100644
--- a/drivers/staging/lustre/lustre/ptlrpc/sec.c
+++ b/drivers/staging/lustre/lustre/ptlrpc/sec.c
@@ -15,11 +15,7 @@
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
*
* GPL HEADER END
*/
@@ -867,11 +863,9 @@ int sptlrpc_import_check_ctx(struct obd_import *imp)
if (!req)
return -ENOMEM;
- spin_lock_init(&req->rq_lock);
+ ptlrpc_cli_req_init(req);
atomic_set(&req->rq_refcount, 10000);
- INIT_LIST_HEAD(&req->rq_ctx_chain);
- init_waitqueue_head(&req->rq_reply_waitq);
- init_waitqueue_head(&req->rq_set_waitq);
+
req->rq_import = imp;
req->rq_flvr = sec->ps_flvr;
req->rq_cli_ctx = ctx;
@@ -1051,6 +1045,8 @@ int sptlrpc_cli_unwrap_early_reply(struct ptlrpc_request *req,
if (!early_req)
return -ENOMEM;
+ ptlrpc_cli_req_init(early_req);
+
early_size = req->rq_nob_received;
early_bufsz = size_roundup_power2(early_size);
early_buf = libcfs_kvzalloc(early_bufsz, GFP_NOFS);
@@ -1099,12 +1095,11 @@ int sptlrpc_cli_unwrap_early_reply(struct ptlrpc_request *req,
memcpy(early_buf, req->rq_repbuf, early_size);
spin_unlock(&req->rq_lock);
- spin_lock_init(&early_req->rq_lock);
early_req->rq_cli_ctx = sptlrpc_cli_ctx_get(req->rq_cli_ctx);
early_req->rq_flvr = req->rq_flvr;
early_req->rq_repbuf = early_buf;
early_req->rq_repbuf_len = early_bufsz;
- early_req->rq_repdata = (struct lustre_msg *) early_buf;
+ early_req->rq_repdata = (struct lustre_msg *)early_buf;
early_req->rq_repdata_len = early_size;
early_req->rq_early = 1;
early_req->rq_reqmsg = req->rq_reqmsg;
@@ -1556,7 +1551,7 @@ void _sptlrpc_enlarge_msg_inplace(struct lustre_msg *msg,
/* move from segment + 1 to end segment */
LASSERT(msg->lm_magic == LUSTRE_MSG_MAGIC_V2);
oldmsg_size = lustre_msg_size_v2(msg->lm_bufcount, msg->lm_buflens);
- movesize = oldmsg_size - ((unsigned long) src - (unsigned long) msg);
+ movesize = oldmsg_size - ((unsigned long)src - (unsigned long)msg);
LASSERT(movesize >= 0);
if (movesize)
@@ -2196,6 +2191,9 @@ int sptlrpc_pack_user_desc(struct lustre_msg *msg, int offset)
pud = lustre_msg_buf(msg, offset, 0);
+ if (!pud)
+ return -EINVAL;
+
pud->pud_uid = from_kuid(&init_user_ns, current_uid());
pud->pud_gid = from_kgid(&init_user_ns, current_gid());
pud->pud_fsuid = from_kuid(&init_user_ns, current_fsuid());
diff --git a/drivers/staging/lustre/lustre/ptlrpc/sec_bulk.c b/drivers/staging/lustre/lustre/ptlrpc/sec_bulk.c
index d3872b8c9a6e..5f4d79718589 100644
--- a/drivers/staging/lustre/lustre/ptlrpc/sec_bulk.c
+++ b/drivers/staging/lustre/lustre/ptlrpc/sec_bulk.c
@@ -15,11 +15,7 @@
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
*
* GPL HEADER END
*/
@@ -41,7 +37,6 @@
#define DEBUG_SUBSYSTEM S_SEC
#include "../../include/linux/libcfs/libcfs.h"
-#include <linux/crypto.h>
#include "../include/obd.h"
#include "../include/obd_cksum.h"
@@ -274,7 +269,7 @@ static unsigned long enc_pools_shrink_scan(struct shrinker *s,
static inline
int npages_to_npools(unsigned long npages)
{
- return (int) ((npages + PAGES_PER_POOL - 1) / PAGES_PER_POOL);
+ return (int)((npages + PAGES_PER_POOL - 1) / PAGES_PER_POOL);
}
/*
@@ -511,7 +506,6 @@ int sptlrpc_get_bulk_checksum(struct ptlrpc_bulk_desc *desc, __u8 alg,
{
struct cfs_crypto_hash_desc *hdesc;
int hashsize;
- char hashbuf[64];
unsigned int bufsize;
int i, err;
@@ -529,21 +523,23 @@ int sptlrpc_get_bulk_checksum(struct ptlrpc_bulk_desc *desc, __u8 alg,
for (i = 0; i < desc->bd_iov_count; i++) {
cfs_crypto_hash_update_page(hdesc, desc->bd_iov[i].kiov_page,
- desc->bd_iov[i].kiov_offset & ~CFS_PAGE_MASK,
+ desc->bd_iov[i].kiov_offset & ~PAGE_MASK,
desc->bd_iov[i].kiov_len);
}
+
if (hashsize > buflen) {
+ unsigned char hashbuf[CFS_CRYPTO_HASH_DIGESTSIZE_MAX];
+
bufsize = sizeof(hashbuf);
- err = cfs_crypto_hash_final(hdesc, (unsigned char *)hashbuf,
- &bufsize);
+ LASSERTF(bufsize >= hashsize, "bufsize = %u < hashsize %u\n",
+ bufsize, hashsize);
+ err = cfs_crypto_hash_final(hdesc, hashbuf, &bufsize);
memcpy(buf, hashbuf, buflen);
} else {
bufsize = buflen;
err = cfs_crypto_hash_final(hdesc, buf, &bufsize);
}
- if (err)
- cfs_crypto_hash_final(hdesc, NULL, NULL);
return err;
}
EXPORT_SYMBOL(sptlrpc_get_bulk_checksum);
diff --git a/drivers/staging/lustre/lustre/ptlrpc/sec_config.c b/drivers/staging/lustre/lustre/ptlrpc/sec_config.c
index a51b18bbfd34..c14035479c5f 100644
--- a/drivers/staging/lustre/lustre/ptlrpc/sec_config.c
+++ b/drivers/staging/lustre/lustre/ptlrpc/sec_config.c
@@ -15,11 +15,7 @@
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
*
* GPL HEADER END
*/
@@ -648,7 +644,7 @@ static int logname2fsname(const char *logname, char *buf, int buflen)
return -EINVAL;
}
- len = min((int) (ptr - logname), buflen - 1);
+ len = min((int)(ptr - logname), buflen - 1);
memcpy(buf, logname, len);
buf[len] = '\0';
@@ -819,7 +815,7 @@ void sptlrpc_conf_client_adapt(struct obd_device *obd)
CDEBUG(D_SEC, "obd %s\n", obd->u.cli.cl_target_uuid.uuid);
/* serialize with connect/disconnect import */
- down_read(&obd->u.cli.cl_sem);
+ down_read_nested(&obd->u.cli.cl_sem, OBD_CLI_SEM_MDCOSC);
imp = obd->u.cli.cl_import;
if (imp) {
diff --git a/drivers/staging/lustre/lustre/ptlrpc/sec_gc.c b/drivers/staging/lustre/lustre/ptlrpc/sec_gc.c
index 9082da06b28a..9b9801ece582 100644
--- a/drivers/staging/lustre/lustre/ptlrpc/sec_gc.c
+++ b/drivers/staging/lustre/lustre/ptlrpc/sec_gc.c
@@ -15,11 +15,7 @@
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
*
* GPL HEADER END
*/
diff --git a/drivers/staging/lustre/lustre/ptlrpc/sec_lproc.c b/drivers/staging/lustre/lustre/ptlrpc/sec_lproc.c
index e610a8ddd223..07273f577969 100644
--- a/drivers/staging/lustre/lustre/ptlrpc/sec_lproc.c
+++ b/drivers/staging/lustre/lustre/ptlrpc/sec_lproc.c
@@ -15,11 +15,7 @@
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
*
* GPL HEADER END
*/
diff --git a/drivers/staging/lustre/lustre/ptlrpc/sec_null.c b/drivers/staging/lustre/lustre/ptlrpc/sec_null.c
index 40e5349de38c..70a61e12bb7b 100644
--- a/drivers/staging/lustre/lustre/ptlrpc/sec_null.c
+++ b/drivers/staging/lustre/lustre/ptlrpc/sec_null.c
@@ -15,11 +15,7 @@
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
*
* GPL HEADER END
*/
@@ -60,7 +56,7 @@ static struct ptlrpc_svc_ctx null_svc_ctx;
static inline
void null_encode_sec_part(struct lustre_msg *msg, enum lustre_sec_part sp)
{
- msg->lm_secflvr |= (((__u32) sp) & 0xFF) << 24;
+ msg->lm_secflvr |= (((__u32)sp) & 0xFF) << 24;
}
static inline
@@ -265,7 +261,8 @@ int null_enlarge_reqbuf(struct ptlrpc_sec *sec,
memcpy(newbuf, req->rq_reqbuf, req->rq_reqlen);
kvfree(req->rq_reqbuf);
- req->rq_reqbuf = req->rq_reqmsg = newbuf;
+ req->rq_reqbuf = newbuf;
+ req->rq_reqmsg = newbuf;
req->rq_reqbuf_len = alloc_size;
if (req->rq_import)
@@ -329,7 +326,7 @@ int null_alloc_rs(struct ptlrpc_request *req, int msgsize)
rs->rs_svc_ctx = req->rq_svc_ctx;
atomic_inc(&req->rq_svc_ctx->sc_refcount);
- rs->rs_repbuf = (struct lustre_msg *) (rs + 1);
+ rs->rs_repbuf = (struct lustre_msg *)(rs + 1);
rs->rs_repbuf_len = rs_size - sizeof(*rs);
rs->rs_msg = rs->rs_repbuf;
diff --git a/drivers/staging/lustre/lustre/ptlrpc/sec_plain.c b/drivers/staging/lustre/lustre/ptlrpc/sec_plain.c
index 6276bf59c3aa..5c4590b0c521 100644
--- a/drivers/staging/lustre/lustre/ptlrpc/sec_plain.c
+++ b/drivers/staging/lustre/lustre/ptlrpc/sec_plain.c
@@ -15,11 +15,7 @@
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
*
* GPL HEADER END
*/
@@ -162,7 +158,7 @@ static void corrupt_bulk_data(struct ptlrpc_bulk_desc *desc)
continue;
ptr = kmap(desc->bd_iov[i].kiov_page);
- off = desc->bd_iov[i].kiov_offset & ~CFS_PAGE_MASK;
+ off = desc->bd_iov[i].kiov_offset & ~PAGE_MASK;
ptr[off] ^= 0x1;
kunmap(desc->bd_iov[i].kiov_page);
return;
@@ -298,7 +294,7 @@ int plain_cli_wrap_bulk(struct ptlrpc_cli_ctx *ctx,
LASSERT(req->rq_reqbuf->lm_bufcount == PLAIN_PACK_SEGMENTS);
bsd = lustre_msg_buf(req->rq_reqbuf, PLAIN_PACK_BULK_OFF, 0);
- token = (struct plain_bulk_token *) bsd->bsd_data;
+ token = (struct plain_bulk_token *)bsd->bsd_data;
bsd->bsd_version = 0;
bsd->bsd_flags = 0;
@@ -343,7 +339,7 @@ int plain_cli_unwrap_bulk(struct ptlrpc_cli_ctx *ctx,
LASSERT(req->rq_repdata->lm_bufcount == PLAIN_PACK_SEGMENTS);
bsdv = lustre_msg_buf(req->rq_repdata, PLAIN_PACK_BULK_OFF, 0);
- tokenv = (struct plain_bulk_token *) bsdv->bsd_data;
+ tokenv = (struct plain_bulk_token *)bsdv->bsd_data;
if (req->rq_bulk_write) {
if (bsdv->bsd_flags & BSD_FL_ERR)
@@ -574,8 +570,12 @@ int plain_alloc_reqbuf(struct ptlrpc_sec *sec,
lustre_init_msg_v2(req->rq_reqbuf, PLAIN_PACK_SEGMENTS, buflens, NULL);
req->rq_reqmsg = lustre_msg_buf(req->rq_reqbuf, PLAIN_PACK_MSG_OFF, 0);
- if (req->rq_pack_udesc)
- sptlrpc_pack_user_desc(req->rq_reqbuf, PLAIN_PACK_USER_OFF);
+ if (req->rq_pack_udesc) {
+ int rc = sptlrpc_pack_user_desc(req->rq_reqbuf,
+ PLAIN_PACK_USER_OFF);
+ if (rc < 0)
+ return rc;
+ }
return 0;
}
@@ -811,7 +811,7 @@ int plain_alloc_rs(struct ptlrpc_request *req, int msgsize)
rs->rs_svc_ctx = req->rq_svc_ctx;
atomic_inc(&req->rq_svc_ctx->sc_refcount);
- rs->rs_repbuf = (struct lustre_msg *) (rs + 1);
+ rs->rs_repbuf = (struct lustre_msg *)(rs + 1);
rs->rs_repbuf_len = rs_size - sizeof(*rs);
lustre_init_msg_v2(rs->rs_repbuf, PLAIN_PACK_SEGMENTS, buflens, NULL);
@@ -891,7 +891,7 @@ int plain_svc_unwrap_bulk(struct ptlrpc_request *req,
LASSERT(req->rq_pack_bulk);
bsdr = lustre_msg_buf(req->rq_reqbuf, PLAIN_PACK_BULK_OFF, 0);
- tokenr = (struct plain_bulk_token *) bsdr->bsd_data;
+ tokenr = (struct plain_bulk_token *)bsdr->bsd_data;
bsdv = lustre_msg_buf(rs->rs_repbuf, PLAIN_PACK_BULK_OFF, 0);
bsdv->bsd_version = 0;
@@ -926,7 +926,7 @@ int plain_svc_wrap_bulk(struct ptlrpc_request *req,
bsdr = lustre_msg_buf(req->rq_reqbuf, PLAIN_PACK_BULK_OFF, 0);
bsdv = lustre_msg_buf(rs->rs_repbuf, PLAIN_PACK_BULK_OFF, 0);
- tokenv = (struct plain_bulk_token *) bsdv->bsd_data;
+ tokenv = (struct plain_bulk_token *)bsdv->bsd_data;
bsdv->bsd_version = 0;
bsdv->bsd_type = SPTLRPC_BULK_DEFAULT;
diff --git a/drivers/staging/lustre/lustre/ptlrpc/service.c b/drivers/staging/lustre/lustre/ptlrpc/service.c
index 1bbd1d39ccf8..4788c4940c2a 100644
--- a/drivers/staging/lustre/lustre/ptlrpc/service.c
+++ b/drivers/staging/lustre/lustre/ptlrpc/service.c
@@ -15,11 +15,7 @@
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
*
* GPL HEADER END
*/
@@ -838,6 +834,11 @@ static void ptlrpc_server_finish_request(struct ptlrpc_service_part *svcpt,
{
ptlrpc_server_hpreq_fini(req);
+ if (req->rq_session.lc_thread) {
+ lu_context_exit(&req->rq_session);
+ lu_context_fini(&req->rq_session);
+ }
+
ptlrpc_server_drop_request(req);
}
@@ -1579,6 +1580,21 @@ ptlrpc_server_handle_req_in(struct ptlrpc_service_part *svcpt,
}
req->rq_svc_thread = thread;
+ if (thread) {
+ /* initialize request session, it is needed for request
+ * processing by target
+ */
+ rc = lu_context_init(&req->rq_session,
+ LCT_SERVER_SESSION | LCT_NOREF);
+ if (rc) {
+ CERROR("%s: failure to initialize session: rc = %d\n",
+ thread->t_name, rc);
+ goto err_req;
+ }
+ req->rq_session.lc_thread = thread;
+ lu_context_enter(&req->rq_session);
+ req->rq_svc_thread->t_env->le_ses = &req->rq_session;
+ }
ptlrpc_at_add_timed(req);
@@ -1612,7 +1628,6 @@ ptlrpc_server_handle_request(struct ptlrpc_service_part *svcpt,
struct timespec64 arrived;
unsigned long timediff_usecs;
unsigned long arrived_usecs;
- int rc;
int fail_opc = 0;
request = ptlrpc_server_request_get(svcpt, false);
@@ -1649,21 +1664,6 @@ ptlrpc_server_handle_request(struct ptlrpc_service_part *svcpt,
at_get(&svcpt->scp_at_estimate));
}
- rc = lu_context_init(&request->rq_session, LCT_SESSION | LCT_NOREF);
- if (rc) {
- CERROR("Failure to initialize session: %d\n", rc);
- goto out_req;
- }
- request->rq_session.lc_thread = thread;
- request->rq_session.lc_cookie = 0x5;
- lu_context_enter(&request->rq_session);
-
- CDEBUG(D_NET, "got req %llu\n", request->rq_xid);
-
- request->rq_svc_thread = thread;
- if (thread)
- request->rq_svc_thread->t_env->le_ses = &request->rq_session;
-
if (likely(request->rq_export)) {
if (unlikely(ptlrpc_check_req(request)))
goto put_conn;
@@ -1695,14 +1695,21 @@ ptlrpc_server_handle_request(struct ptlrpc_service_part *svcpt,
if (lustre_msg_get_opc(request->rq_reqmsg) != OBD_PING)
CFS_FAIL_TIMEOUT_MS(OBD_FAIL_PTLRPC_PAUSE_REQ, cfs_fail_val);
- rc = svc->srv_ops.so_req_handler(request);
+ CDEBUG(D_NET, "got req %llu\n", request->rq_xid);
+
+ /* re-assign request and sesson thread to the current one */
+ request->rq_svc_thread = thread;
+ if (thread) {
+ LASSERT(request->rq_session.lc_thread);
+ request->rq_session.lc_thread = thread;
+ request->rq_session.lc_cookie = 0x55;
+ thread->t_env->le_ses = &request->rq_session;
+ }
+ svc->srv_ops.so_req_handler(request);
ptlrpc_rqphase_move(request, RQ_PHASE_COMPLETE);
put_conn:
- lu_context_exit(&request->rq_session);
- lu_context_fini(&request->rq_session);
-
if (unlikely(ktime_get_real_seconds() > request->rq_deadline)) {
DEBUG_REQ(D_WARNING, request,
"Request took longer than estimated (%lld:%llds); "
@@ -1756,7 +1763,6 @@ put_conn:
request->rq_arrival_time.tv_sec);
}
-out_req:
ptlrpc_server_finish_active_request(svcpt, request);
return 1;
diff --git a/drivers/staging/lustre/lustre/ptlrpc/wiretest.c b/drivers/staging/lustre/lustre/ptlrpc/wiretest.c
index 3ffd2d91f274..6cc2b2edf3fc 100644
--- a/drivers/staging/lustre/lustre/ptlrpc/wiretest.c
+++ b/drivers/staging/lustre/lustre/ptlrpc/wiretest.c
@@ -15,11 +15,7 @@
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
*
* GPL HEADER END
*/
@@ -276,7 +272,9 @@ void lustre_assert_wire_constants(void)
(long long)FLD_QUERY);
LASSERTF(FLD_FIRST_OPC == 900, "found %lld\n",
(long long)FLD_FIRST_OPC);
- LASSERTF(FLD_LAST_OPC == 901, "found %lld\n",
+ LASSERTF(FLD_READ == 901, "found %lld\n",
+ (long long)FLD_READ);
+ LASSERTF(FLD_LAST_OPC == 902, "found %lld\n",
(long long)FLD_LAST_OPC);
LASSERTF(SEQ_QUERY == 700, "found %lld\n",
(long long)SEQ_QUERY);
@@ -1069,6 +1067,8 @@ void lustre_assert_wire_constants(void)
OBD_CONNECT_PINGLESS);
LASSERTF(OBD_CONNECT_FLOCK_DEAD == 0x8000000000000ULL,
"found 0x%.16llxULL\n", OBD_CONNECT_FLOCK_DEAD);
+ LASSERTF(OBD_CONNECT_OPEN_BY_FID == 0x20000000000000ULL,
+ "found 0x%.16llxULL\n", OBD_CONNECT_OPEN_BY_FID);
LASSERTF(OBD_CKSUM_CRC32 == 0x00000001UL, "found 0x%.8xUL\n",
(unsigned)OBD_CKSUM_CRC32);
LASSERTF(OBD_CKSUM_ADLER == 0x00000002UL, "found 0x%.8xUL\n",
@@ -1265,8 +1265,6 @@ void lustre_assert_wire_constants(void)
OBD_MD_FLXATTRRM);
LASSERTF(OBD_MD_FLACL == (0x0000008000000000ULL), "found 0x%.16llxULL\n",
OBD_MD_FLACL);
- LASSERTF(OBD_MD_FLRMTPERM == (0x0000010000000000ULL), "found 0x%.16llxULL\n",
- OBD_MD_FLRMTPERM);
LASSERTF(OBD_MD_FLMDSCAPA == (0x0000020000000000ULL), "found 0x%.16llxULL\n",
OBD_MD_FLMDSCAPA);
LASSERTF(OBD_MD_FLOSSCAPA == (0x0000040000000000ULL), "found 0x%.16llxULL\n",
@@ -1277,14 +1275,6 @@ void lustre_assert_wire_constants(void)
OBD_MD_FLCROSSREF);
LASSERTF(OBD_MD_FLGETATTRLOCK == (0x0000200000000000ULL), "found 0x%.16llxULL\n",
OBD_MD_FLGETATTRLOCK);
- LASSERTF(OBD_MD_FLRMTLSETFACL == (0x0001000000000000ULL), "found 0x%.16llxULL\n",
- OBD_MD_FLRMTLSETFACL);
- LASSERTF(OBD_MD_FLRMTLGETFACL == (0x0002000000000000ULL), "found 0x%.16llxULL\n",
- OBD_MD_FLRMTLGETFACL);
- LASSERTF(OBD_MD_FLRMTRSETFACL == (0x0004000000000000ULL), "found 0x%.16llxULL\n",
- OBD_MD_FLRMTRSETFACL);
- LASSERTF(OBD_MD_FLRMTRGETFACL == (0x0008000000000000ULL), "found 0x%.16llxULL\n",
- OBD_MD_FLRMTRGETFACL);
LASSERTF(OBD_MD_FLDATAVERSION == (0x0010000000000000ULL), "found 0x%.16llxULL\n",
OBD_MD_FLDATAVERSION);
CLASSERT(OBD_FL_INLINEDATA == 0x00000001);
@@ -1639,6 +1629,12 @@ void lustre_assert_wire_constants(void)
OBD_BRW_ASYNC);
LASSERTF(OBD_BRW_MEMALLOC == 0x800, "found 0x%.8x\n",
OBD_BRW_MEMALLOC);
+ LASSERTF(OBD_BRW_OVER_USRQUOTA == 0x1000, "found 0x%.8x\n",
+ OBD_BRW_OVER_USRQUOTA);
+ LASSERTF(OBD_BRW_OVER_GRPQUOTA == 0x2000, "found 0x%.8x\n",
+ OBD_BRW_OVER_GRPQUOTA);
+ LASSERTF(OBD_BRW_SOFT_SYNC == 0x4000, "found 0x%.8x\n",
+ OBD_BRW_SOFT_SYNC);
/* Checks for struct ost_body */
LASSERTF((int)sizeof(struct ost_body) == 208, "found %lld\n",
@@ -1885,44 +1881,6 @@ void lustre_assert_wire_constants(void)
LASSERTF((int)sizeof(((struct mdt_ioepoch *)0)->padding) == 4, "found %lld\n",
(long long)(int)sizeof(((struct mdt_ioepoch *)0)->padding));
- /* Checks for struct mdt_remote_perm */
- LASSERTF((int)sizeof(struct mdt_remote_perm) == 32, "found %lld\n",
- (long long)(int)sizeof(struct mdt_remote_perm));
- LASSERTF((int)offsetof(struct mdt_remote_perm, rp_uid) == 0, "found %lld\n",
- (long long)(int)offsetof(struct mdt_remote_perm, rp_uid));
- LASSERTF((int)sizeof(((struct mdt_remote_perm *)0)->rp_uid) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_remote_perm *)0)->rp_uid));
- LASSERTF((int)offsetof(struct mdt_remote_perm, rp_gid) == 4, "found %lld\n",
- (long long)(int)offsetof(struct mdt_remote_perm, rp_gid));
- LASSERTF((int)sizeof(((struct mdt_remote_perm *)0)->rp_gid) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_remote_perm *)0)->rp_gid));
- LASSERTF((int)offsetof(struct mdt_remote_perm, rp_fsuid) == 8, "found %lld\n",
- (long long)(int)offsetof(struct mdt_remote_perm, rp_fsuid));
- LASSERTF((int)sizeof(((struct mdt_remote_perm *)0)->rp_fsuid) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_remote_perm *)0)->rp_fsuid));
- LASSERTF((int)offsetof(struct mdt_remote_perm, rp_fsgid) == 16, "found %lld\n",
- (long long)(int)offsetof(struct mdt_remote_perm, rp_fsgid));
- LASSERTF((int)sizeof(((struct mdt_remote_perm *)0)->rp_fsgid) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_remote_perm *)0)->rp_fsgid));
- LASSERTF((int)offsetof(struct mdt_remote_perm, rp_access_perm) == 24, "found %lld\n",
- (long long)(int)offsetof(struct mdt_remote_perm, rp_access_perm));
- LASSERTF((int)sizeof(((struct mdt_remote_perm *)0)->rp_access_perm) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_remote_perm *)0)->rp_access_perm));
- LASSERTF((int)offsetof(struct mdt_remote_perm, rp_padding) == 28, "found %lld\n",
- (long long)(int)offsetof(struct mdt_remote_perm, rp_padding));
- LASSERTF((int)sizeof(((struct mdt_remote_perm *)0)->rp_padding) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_remote_perm *)0)->rp_padding));
- LASSERTF(CFS_SETUID_PERM == 0x00000001UL, "found 0x%.8xUL\n",
- (unsigned)CFS_SETUID_PERM);
- LASSERTF(CFS_SETGID_PERM == 0x00000002UL, "found 0x%.8xUL\n",
- (unsigned)CFS_SETGID_PERM);
- LASSERTF(CFS_SETGRP_PERM == 0x00000004UL, "found 0x%.8xUL\n",
- (unsigned)CFS_SETGRP_PERM);
- LASSERTF(CFS_RMTACL_PERM == 0x00000008UL, "found 0x%.8xUL\n",
- (unsigned)CFS_RMTACL_PERM);
- LASSERTF(CFS_RMTOWN_PERM == 0x00000010UL, "found 0x%.8xUL\n",
- (unsigned)CFS_RMTOWN_PERM);
-
/* Checks for struct mdt_rec_setattr */
LASSERTF((int)sizeof(struct mdt_rec_setattr) == 136, "found %lld\n",
(long long)(int)sizeof(struct mdt_rec_setattr));