aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/staging/lustre
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/staging/lustre')
-rw-r--r--drivers/staging/lustre/include/linux/libcfs/curproc.h20
-rw-r--r--drivers/staging/lustre/include/linux/libcfs/libcfs.h5
-rw-r--r--drivers/staging/lustre/include/linux/libcfs/libcfs_cpu.h4
-rw-r--r--drivers/staging/lustre/include/linux/libcfs/libcfs_crypto.h12
-rw-r--r--drivers/staging/lustre/include/linux/libcfs/libcfs_fail.h53
-rw-r--r--drivers/staging/lustre/include/linux/libcfs/libcfs_hash.h185
-rw-r--r--drivers/staging/lustre/include/linux/libcfs/libcfs_private.h13
-rw-r--r--drivers/staging/lustre/include/linux/libcfs/libcfs_string.h24
-rw-r--r--drivers/staging/lustre/include/linux/libcfs/libcfs_workitem.h4
-rw-r--r--drivers/staging/lustre/include/linux/libcfs/linux/linux-cpu.h8
-rw-r--r--drivers/staging/lustre/include/linux/libcfs/linux/linux-time.h8
-rw-r--r--drivers/staging/lustre/include/linux/lnet/lnetst.h8
-rw-r--r--drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.c6
-rw-r--r--drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c26
-rw-r--r--drivers/staging/lustre/lnet/klnds/socklnd/socklnd.c31
-rw-r--r--drivers/staging/lustre/lnet/klnds/socklnd/socklnd.h134
-rw-r--r--drivers/staging/lustre/lnet/klnds/socklnd/socklnd_cb.c44
-rw-r--r--drivers/staging/lustre/lnet/klnds/socklnd/socklnd_lib.c6
-rw-r--r--drivers/staging/lustre/lnet/klnds/socklnd/socklnd_proto.c26
-rw-r--r--drivers/staging/lustre/lnet/libcfs/debug.c36
-rw-r--r--drivers/staging/lustre/lnet/libcfs/fail.c5
-rw-r--r--drivers/staging/lustre/lnet/libcfs/hash.c143
-rw-r--r--drivers/staging/lustre/lnet/libcfs/libcfs_cpu.c2
-rw-r--r--drivers/staging/lustre/lnet/libcfs/libcfs_lock.c20
-rw-r--r--drivers/staging/lustre/lnet/libcfs/libcfs_mem.c22
-rw-r--r--drivers/staging/lustre/lnet/libcfs/libcfs_string.c58
-rw-r--r--drivers/staging/lustre/lnet/libcfs/linux/linux-cpu.c187
-rw-r--r--drivers/staging/lustre/lnet/libcfs/linux/linux-crypto.c32
-rw-r--r--drivers/staging/lustre/lnet/libcfs/linux/linux-crypto.h2
-rw-r--r--drivers/staging/lustre/lnet/libcfs/linux/linux-debug.c62
-rw-r--r--drivers/staging/lustre/lnet/libcfs/linux/linux-module.c2
-rw-r--r--drivers/staging/lustre/lnet/libcfs/linux/linux-prim.c10
-rw-r--r--drivers/staging/lustre/lnet/libcfs/linux/linux-tracefile.c28
-rw-r--r--drivers/staging/lustre/lnet/libcfs/module.c26
-rw-r--r--drivers/staging/lustre/lnet/libcfs/prng.c2
-rw-r--r--drivers/staging/lustre/lnet/libcfs/tracefile.c108
-rw-r--r--drivers/staging/lustre/lnet/libcfs/tracefile.h28
-rw-r--r--drivers/staging/lustre/lnet/libcfs/workitem.c50
-rw-r--r--drivers/staging/lustre/lnet/lnet/api-ni.c8
-rw-r--r--drivers/staging/lustre/lnet/lnet/lib-me.c18
-rw-r--r--drivers/staging/lustre/lnet/lnet/lib-move.c1
-rw-r--r--drivers/staging/lustre/lnet/lnet/nidstrings.c6
-rw-r--r--drivers/staging/lustre/lnet/lnet/router.c17
-rw-r--r--drivers/staging/lustre/lnet/selftest/brw_test.c73
-rw-r--r--drivers/staging/lustre/lnet/selftest/conctl.c6
-rw-r--r--drivers/staging/lustre/lnet/selftest/conrpc.c36
-rw-r--r--drivers/staging/lustre/lnet/selftest/conrpc.h23
-rw-r--r--drivers/staging/lustre/lnet/selftest/console.c29
-rw-r--r--drivers/staging/lustre/lnet/selftest/console.h25
-rw-r--r--drivers/staging/lustre/lnet/selftest/framework.c25
-rw-r--r--drivers/staging/lustre/lnet/selftest/ping_test.c4
-rw-r--r--drivers/staging/lustre/lnet/selftest/rpc.c22
-rw-r--r--drivers/staging/lustre/lnet/selftest/rpc.h8
-rw-r--r--drivers/staging/lustre/lnet/selftest/selftest.h30
-rw-r--r--drivers/staging/lustre/lnet/selftest/timer.c7
-rw-r--r--drivers/staging/lustre/lustre/fid/fid_request.c18
-rw-r--r--drivers/staging/lustre/lustre/fid/lproc_fid.c2
-rw-r--r--drivers/staging/lustre/lustre/fld/fld_cache.c6
-rw-r--r--drivers/staging/lustre/lustre/fld/fld_internal.h5
-rw-r--r--drivers/staging/lustre/lustre/fld/fld_request.c8
-rw-r--r--drivers/staging/lustre/lustre/include/cl_object.h379
-rw-r--r--drivers/staging/lustre/lustre/include/llog_swab.h65
-rw-r--r--drivers/staging/lustre/lustre/include/lprocfs_status.h9
-rw-r--r--drivers/staging/lustre/lustre/include/lustre/ll_fiemap.h75
-rw-r--r--drivers/staging/lustre/lustre/include/lustre/lustre_idl.h438
-rw-r--r--drivers/staging/lustre/lustre/include/lustre/lustre_ioctl.h4
-rw-r--r--drivers/staging/lustre/lustre/include/lustre/lustre_user.h44
-rw-r--r--drivers/staging/lustre/lustre/include/lustre_compat.h2
-rw-r--r--drivers/staging/lustre/lustre/include/lustre_dlm.h50
-rw-r--r--drivers/staging/lustre/lustre/include/lustre_fid.h1
-rw-r--r--drivers/staging/lustre/lustre/include/lustre_fld.h2
-rw-r--r--drivers/staging/lustre/lustre/include/lustre_ha.h1
-rw-r--r--drivers/staging/lustre/lustre/include/lustre_import.h9
-rw-r--r--drivers/staging/lustre/lustre/include/lustre_lib.h2
-rw-r--r--drivers/staging/lustre/lustre/include/lustre_lmv.h13
-rw-r--r--drivers/staging/lustre/lustre/include/lustre_log.h6
-rw-r--r--drivers/staging/lustre/lustre/include/lustre_mdc.h44
-rw-r--r--drivers/staging/lustre/lustre/include/lustre_net.h898
-rw-r--r--drivers/staging/lustre/lustre/include/lustre_nrs.h717
-rw-r--r--drivers/staging/lustre/lustre/include/lustre_nrs_fifo.h70
-rw-r--r--drivers/staging/lustre/lustre/include/lustre_req_layout.h7
-rw-r--r--drivers/staging/lustre/lustre/include/lustre_sec.h3
-rw-r--r--drivers/staging/lustre/lustre/include/lustre_swab.h102
-rw-r--r--drivers/staging/lustre/lustre/include/obd.h291
-rw-r--r--drivers/staging/lustre/lustre/include/obd_class.h264
-rw-r--r--drivers/staging/lustre/lustre/include/obd_support.h15
-rw-r--r--drivers/staging/lustre/lustre/include/seq_range.h199
-rw-r--r--drivers/staging/lustre/lustre/ldlm/ldlm_extent.c28
-rw-r--r--drivers/staging/lustre/lustre/ldlm/ldlm_flock.c22
-rw-r--r--drivers/staging/lustre/lustre/ldlm/ldlm_inodebits.c8
-rw-r--r--drivers/staging/lustre/lustre/ldlm/ldlm_internal.h88
-rw-r--r--drivers/staging/lustre/lustre/ldlm/ldlm_lib.c45
-rw-r--r--drivers/staging/lustre/lustre/ldlm/ldlm_lock.c316
-rw-r--r--drivers/staging/lustre/lustre/ldlm/ldlm_lockd.c26
-rw-r--r--drivers/staging/lustre/lustre/ldlm/ldlm_plain.c8
-rw-r--r--drivers/staging/lustre/lustre/ldlm/ldlm_pool.c36
-rw-r--r--drivers/staging/lustre/lustre/ldlm/ldlm_request.c132
-rw-r--r--drivers/staging/lustre/lustre/ldlm/ldlm_resource.c42
-rw-r--r--drivers/staging/lustre/lustre/llite/Makefile10
-rw-r--r--drivers/staging/lustre/lustre/llite/dir.c195
-rw-r--r--drivers/staging/lustre/lustre/llite/file.c968
-rw-r--r--drivers/staging/lustre/lustre/llite/glimpse.c139
-rw-r--r--drivers/staging/lustre/lustre/llite/lcommon_cl.c41
-rw-r--r--drivers/staging/lustre/lustre/llite/lcommon_misc.c47
-rw-r--r--drivers/staging/lustre/lustre/llite/llite_close.c395
-rw-r--r--drivers/staging/lustre/lustre/llite/llite_internal.h121
-rw-r--r--drivers/staging/lustre/lustre/llite/llite_lib.c342
-rw-r--r--drivers/staging/lustre/lustre/llite/llite_mmap.c69
-rw-r--r--drivers/staging/lustre/lustre/llite/llite_nfs.c2
-rw-r--r--drivers/staging/lustre/lustre/llite/lproc_llite.c4
-rw-r--r--drivers/staging/lustre/lustre/llite/namei.c68
-rw-r--r--drivers/staging/lustre/lustre/llite/rw.c302
-rw-r--r--drivers/staging/lustre/lustre/llite/rw26.c65
-rw-r--r--drivers/staging/lustre/lustre/llite/statahead.c20
-rw-r--r--drivers/staging/lustre/lustre/llite/vvp_dev.c15
-rw-r--r--drivers/staging/lustre/lustre/llite/vvp_internal.h41
-rw-r--r--drivers/staging/lustre/lustre/llite/vvp_io.c185
-rw-r--r--drivers/staging/lustre/lustre/llite/vvp_object.c48
-rw-r--r--drivers/staging/lustre/lustre/llite/vvp_page.c41
-rw-r--r--drivers/staging/lustre/lustre/llite/vvp_req.c122
-rw-r--r--drivers/staging/lustre/lustre/llite/xattr.c360
-rw-r--r--drivers/staging/lustre/lustre/llite/xattr_cache.c17
-rw-r--r--drivers/staging/lustre/lustre/llite/xattr_security.c88
-rw-r--r--drivers/staging/lustre/lustre/lmv/lmv_intent.c11
-rw-r--r--drivers/staging/lustre/lustre/lmv/lmv_internal.h3
-rw-r--r--drivers/staging/lustre/lustre/lmv/lmv_obd.c407
-rw-r--r--drivers/staging/lustre/lustre/lov/lov_cl_internal.h29
-rw-r--r--drivers/staging/lustre/lustre/lov/lov_dev.c52
-rw-r--r--drivers/staging/lustre/lustre/lov/lov_ea.c208
-rw-r--r--drivers/staging/lustre/lustre/lov/lov_internal.h100
-rw-r--r--drivers/staging/lustre/lustre/lov/lov_io.c116
-rw-r--r--drivers/staging/lustre/lustre/lov/lov_merge.c50
-rw-r--r--drivers/staging/lustre/lustre/lov/lov_obd.c720
-rw-r--r--drivers/staging/lustre/lustre/lov/lov_object.c698
-rw-r--r--drivers/staging/lustre/lustre/lov/lov_pack.c293
-rw-r--r--drivers/staging/lustre/lustre/lov/lov_page.c46
-rw-r--r--drivers/staging/lustre/lustre/lov/lov_pool.c3
-rw-r--r--drivers/staging/lustre/lustre/lov/lov_request.c292
-rw-r--r--drivers/staging/lustre/lustre/lov/lovsub_dev.c61
-rw-r--r--drivers/staging/lustre/lustre/lov/lovsub_object.c22
-rw-r--r--drivers/staging/lustre/lustre/mdc/lproc_mdc.c98
-rw-r--r--drivers/staging/lustre/lustre/mdc/mdc_internal.h12
-rw-r--r--drivers/staging/lustre/lustre/mdc/mdc_lib.c80
-rw-r--r--drivers/staging/lustre/lustre/mdc/mdc_locks.c60
-rw-r--r--drivers/staging/lustre/lustre/mdc/mdc_reint.c107
-rw-r--r--drivers/staging/lustre/lustre/mdc/mdc_request.c317
-rw-r--r--drivers/staging/lustre/lustre/mgc/mgc_request.c167
-rw-r--r--drivers/staging/lustre/lustre/obdclass/Makefile2
-rw-r--r--drivers/staging/lustre/lustre/obdclass/cl_internal.h23
-rw-r--r--drivers/staging/lustre/lustre/obdclass/cl_io.c290
-rw-r--r--drivers/staging/lustre/lustre/obdclass/cl_object.c452
-rw-r--r--drivers/staging/lustre/lustre/obdclass/cl_page.c68
-rw-r--r--drivers/staging/lustre/lustre/obdclass/genops.c228
-rw-r--r--drivers/staging/lustre/lustre/obdclass/linux/linux-module.c8
-rw-r--r--drivers/staging/lustre/lustre/obdclass/linux/linux-obdo.c80
-rw-r--r--drivers/staging/lustre/lustre/obdclass/llog.c139
-rw-r--r--drivers/staging/lustre/lustre/obdclass/llog_obd.c1
-rw-r--r--drivers/staging/lustre/lustre/obdclass/llog_swab.c9
-rw-r--r--drivers/staging/lustre/lustre/obdclass/lprocfs_status.c27
-rw-r--r--drivers/staging/lustre/lustre/obdclass/lu_object.c84
-rw-r--r--drivers/staging/lustre/lustre/obdclass/obd_config.c70
-rw-r--r--drivers/staging/lustre/lustre/obdclass/obd_mount.c7
-rw-r--r--drivers/staging/lustre/lustre/obdclass/obdo.c65
-rw-r--r--drivers/staging/lustre/lustre/obdecho/echo_client.c140
-rw-r--r--drivers/staging/lustre/lustre/osc/lproc_osc.c11
-rw-r--r--drivers/staging/lustre/lustre/osc/osc_cache.c65
-rw-r--r--drivers/staging/lustre/lustre/osc/osc_cl_internal.h50
-rw-r--r--drivers/staging/lustre/lustre/osc/osc_dev.c15
-rw-r--r--drivers/staging/lustre/lustre/osc/osc_internal.h43
-rw-r--r--drivers/staging/lustre/lustre/osc/osc_io.c330
-rw-r--r--drivers/staging/lustre/lustre/osc/osc_lock.c143
-rw-r--r--drivers/staging/lustre/lustre/osc/osc_object.c171
-rw-r--r--drivers/staging/lustre/lustre/osc/osc_page.c186
-rw-r--r--drivers/staging/lustre/lustre/osc/osc_quota.c52
-rw-r--r--drivers/staging/lustre/lustre/osc/osc_request.c669
-rw-r--r--drivers/staging/lustre/lustre/ptlrpc/client.c338
-rw-r--r--drivers/staging/lustre/lustre/ptlrpc/connection.c4
-rw-r--r--drivers/staging/lustre/lustre/ptlrpc/events.c4
-rw-r--r--drivers/staging/lustre/lustre/ptlrpc/import.c95
-rw-r--r--drivers/staging/lustre/lustre/ptlrpc/layout.c66
-rw-r--r--drivers/staging/lustre/lustre/ptlrpc/llog_client.c20
-rw-r--r--drivers/staging/lustre/lustre/ptlrpc/lproc_ptlrpc.c4
-rw-r--r--drivers/staging/lustre/lustre/ptlrpc/niobuf.c98
-rw-r--r--drivers/staging/lustre/lustre/ptlrpc/nrs.c15
-rw-r--r--drivers/staging/lustre/lustre/ptlrpc/pack_generic.c106
-rw-r--r--drivers/staging/lustre/lustre/ptlrpc/pers.c31
-rw-r--r--drivers/staging/lustre/lustre/ptlrpc/ptlrpc_internal.h34
-rw-r--r--drivers/staging/lustre/lustre/ptlrpc/recover.c26
-rw-r--r--drivers/staging/lustre/lustre/ptlrpc/sec.c9
-rw-r--r--drivers/staging/lustre/lustre/ptlrpc/sec_bulk.c48
-rw-r--r--drivers/staging/lustre/lustre/ptlrpc/sec_plain.c20
-rw-r--r--drivers/staging/lustre/lustre/ptlrpc/service.c9
-rw-r--r--drivers/staging/lustre/lustre/ptlrpc/wiretest.c410
-rw-r--r--drivers/staging/lustre/sysfs-fs-lustre2
194 files changed, 8607 insertions, 10095 deletions
diff --git a/drivers/staging/lustre/include/linux/libcfs/curproc.h b/drivers/staging/lustre/include/linux/libcfs/curproc.h
index be0675d8ff5e..1ea27c9e3708 100644
--- a/drivers/staging/lustre/include/linux/libcfs/curproc.h
+++ b/drivers/staging/lustre/include/linux/libcfs/curproc.h
@@ -53,7 +53,7 @@
#define current_pid() (current->pid)
#define current_comm() (current->comm)
-typedef __u32 cfs_cap_t;
+typedef u32 cfs_cap_t;
#define CFS_CAP_CHOWN 0
#define CFS_CAP_DAC_OVERRIDE 1
@@ -65,15 +65,15 @@ typedef __u32 cfs_cap_t;
#define CFS_CAP_SYS_BOOT 23
#define CFS_CAP_SYS_RESOURCE 24
-#define CFS_CAP_FS_MASK ((1 << CFS_CAP_CHOWN) | \
- (1 << CFS_CAP_DAC_OVERRIDE) | \
- (1 << CFS_CAP_DAC_READ_SEARCH) | \
- (1 << CFS_CAP_FOWNER) | \
- (1 << CFS_CAP_FSETID) | \
- (1 << CFS_CAP_LINUX_IMMUTABLE) | \
- (1 << CFS_CAP_SYS_ADMIN) | \
- (1 << CFS_CAP_SYS_BOOT) | \
- (1 << CFS_CAP_SYS_RESOURCE))
+#define CFS_CAP_FS_MASK (BIT(CFS_CAP_CHOWN) | \
+ BIT(CFS_CAP_DAC_OVERRIDE) | \
+ BIT(CFS_CAP_DAC_READ_SEARCH) | \
+ BIT(CFS_CAP_FOWNER) | \
+ BIT(CFS_CAP_FSETID) | \
+ BIT(CFS_CAP_LINUX_IMMUTABLE) | \
+ BIT(CFS_CAP_SYS_ADMIN) | \
+ BIT(CFS_CAP_SYS_BOOT) | \
+ BIT(CFS_CAP_SYS_RESOURCE))
void cfs_cap_raise(cfs_cap_t cap);
void cfs_cap_lower(cfs_cap_t cap);
diff --git a/drivers/staging/lustre/include/linux/libcfs/libcfs.h b/drivers/staging/lustre/include/linux/libcfs/libcfs.h
index 3b92d38d37e2..cc2c0e97bb7e 100644
--- a/drivers/staging/lustre/include/linux/libcfs/libcfs.h
+++ b/drivers/staging/lustre/include/linux/libcfs/libcfs.h
@@ -61,7 +61,7 @@
sigset_t cfs_block_allsigs(void);
sigset_t cfs_block_sigs(unsigned long sigs);
sigset_t cfs_block_sigsinv(unsigned long sigs);
-void cfs_restore_sigs(sigset_t);
+void cfs_restore_sigs(sigset_t sigset);
void cfs_clear_sigpending(void);
/*
@@ -71,7 +71,7 @@ void cfs_clear_sigpending(void);
/* returns a random 32-bit integer */
unsigned int cfs_rand(void);
/* seed the generator */
-void cfs_srand(unsigned int, unsigned int);
+void cfs_srand(unsigned int seed1, unsigned int seed2);
void cfs_get_random_bytes(void *buf, int size);
#include "libcfs_debug.h"
@@ -125,7 +125,6 @@ extern struct miscdevice libcfs_dev;
/**
* The path of debug log dump upcall script.
*/
-extern char lnet_upcall[1024];
extern char lnet_debug_log_upcall[1024];
extern struct cfs_wi_sched *cfs_sched_rehash;
diff --git a/drivers/staging/lustre/include/linux/libcfs/libcfs_cpu.h b/drivers/staging/lustre/include/linux/libcfs/libcfs_cpu.h
index 81d8079e3b5e..6d8752a368fa 100644
--- a/drivers/staging/lustre/include/linux/libcfs/libcfs_cpu.h
+++ b/drivers/staging/lustre/include/linux/libcfs/libcfs_cpu.h
@@ -92,7 +92,7 @@ struct cfs_cpt_table {
/* node mask */
nodemask_t ctb_nodemask;
/* version */
- __u64 ctb_version;
+ u64 ctb_version;
};
static inline cpumask_t *
@@ -211,7 +211,7 @@ int cfs_cpu_ht_nsiblings(int cpu);
*/
void *cfs_percpt_alloc(struct cfs_cpt_table *cptab, unsigned int size);
/*
- * destory per-cpu-partition variable
+ * destroy per-cpu-partition variable
*/
void cfs_percpt_free(void *vars);
int cfs_percpt_number(void *vars);
diff --git a/drivers/staging/lustre/include/linux/libcfs/libcfs_crypto.h b/drivers/staging/lustre/include/linux/libcfs/libcfs_crypto.h
index 02be7d7608a5..8f34c5ddc63e 100644
--- a/drivers/staging/lustre/include/linux/libcfs/libcfs_crypto.h
+++ b/drivers/staging/lustre/include/linux/libcfs/libcfs_crypto.h
@@ -29,10 +29,12 @@
#define _LIBCFS_CRYPTO_H
struct cfs_crypto_hash_type {
- char *cht_name; /**< hash algorithm name, equal to
- * format name for crypto api */
- unsigned int cht_key; /**< init key by default (valid for
- * 4 bytes context like crc32, adler */
+ char *cht_name; /*< hash algorithm name, equal to
+ * format name for crypto api
+ */
+ unsigned int cht_key; /*< init key by default (valid for
+ * 4 bytes context like crc32, adler
+ */
unsigned int cht_size; /**< hash digest size */
};
@@ -135,7 +137,7 @@ static inline unsigned char cfs_crypto_hash_alg(const char *algname)
enum cfs_crypto_hash_alg hash_alg;
for (hash_alg = 0; hash_alg < CFS_HASH_ALG_MAX; hash_alg++)
- if (strcmp(hash_types[hash_alg].cht_name, algname) == 0)
+ if (!strcmp(hash_types[hash_alg].cht_name, algname))
return hash_alg;
return CFS_HASH_ALG_UNKNOWN;
diff --git a/drivers/staging/lustre/include/linux/libcfs/libcfs_fail.h b/drivers/staging/lustre/include/linux/libcfs/libcfs_fail.h
index bdbbe934584c..fedb46dff696 100644
--- a/drivers/staging/lustre/include/linux/libcfs/libcfs_fail.h
+++ b/drivers/staging/lustre/include/linux/libcfs/libcfs_fail.h
@@ -39,8 +39,8 @@ extern int cfs_fail_err;
extern wait_queue_head_t cfs_race_waitq;
extern int cfs_race_state;
-int __cfs_fail_check_set(__u32 id, __u32 value, int set);
-int __cfs_fail_timeout_set(__u32 id, __u32 value, int ms, int set);
+int __cfs_fail_check_set(u32 id, u32 value, int set);
+int __cfs_fail_timeout_set(u32 id, u32 value, int ms, int set);
enum {
CFS_FAIL_LOC_NOSET = 0,
@@ -55,11 +55,11 @@ enum {
#define CFS_FAILED_BIT 30
/* CFS_FAILED is 0x40000000 */
-#define CFS_FAILED (1 << CFS_FAILED_BIT)
+#define CFS_FAILED BIT(CFS_FAILED_BIT)
#define CFS_FAIL_ONCE_BIT 31
/* CFS_FAIL_ONCE is 0x80000000 */
-#define CFS_FAIL_ONCE (1 << CFS_FAIL_ONCE_BIT)
+#define CFS_FAIL_ONCE BIT(CFS_FAIL_ONCE_BIT)
/* The following flags aren't made to be combined */
#define CFS_FAIL_SKIP 0x20000000 /* skip N times then fail */
@@ -69,14 +69,14 @@ enum {
#define CFS_FAULT 0x02000000 /* match any CFS_FAULT_CHECK */
-static inline bool CFS_FAIL_PRECHECK(__u32 id)
+static inline bool CFS_FAIL_PRECHECK(u32 id)
{
- return cfs_fail_loc != 0 &&
+ return cfs_fail_loc &&
((cfs_fail_loc & CFS_FAIL_MASK_LOC) == (id & CFS_FAIL_MASK_LOC) ||
- (cfs_fail_loc & id & CFS_FAULT));
+ (cfs_fail_loc & id & CFS_FAULT));
}
-static inline int cfs_fail_check_set(__u32 id, __u32 value,
+static inline int cfs_fail_check_set(u32 id, u32 value,
int set, int quiet)
{
int ret = 0;
@@ -103,28 +103,34 @@ static inline int cfs_fail_check_set(__u32 id, __u32 value,
#define CFS_FAIL_CHECK_QUIET(id) \
cfs_fail_check_set(id, 0, CFS_FAIL_LOC_NOSET, 1)
-/* If id hit cfs_fail_loc and cfs_fail_val == (-1 or value) return 1,
- * otherwise return 0 */
+/*
+ * If id hit cfs_fail_loc and cfs_fail_val == (-1 or value) return 1,
+ * otherwise return 0
+ */
#define CFS_FAIL_CHECK_VALUE(id, value) \
cfs_fail_check_set(id, value, CFS_FAIL_LOC_VALUE, 0)
#define CFS_FAIL_CHECK_VALUE_QUIET(id, value) \
cfs_fail_check_set(id, value, CFS_FAIL_LOC_VALUE, 1)
-/* If id hit cfs_fail_loc, cfs_fail_loc |= value and return 1,
- * otherwise return 0 */
+/*
+ * If id hit cfs_fail_loc, cfs_fail_loc |= value and return 1,
+ * otherwise return 0
+ */
#define CFS_FAIL_CHECK_ORSET(id, value) \
cfs_fail_check_set(id, value, CFS_FAIL_LOC_ORSET, 0)
#define CFS_FAIL_CHECK_ORSET_QUIET(id, value) \
cfs_fail_check_set(id, value, CFS_FAIL_LOC_ORSET, 1)
-/* If id hit cfs_fail_loc, cfs_fail_loc = value and return 1,
- * otherwise return 0 */
+/*
+ * If id hit cfs_fail_loc, cfs_fail_loc = value and return 1,
+ * otherwise return 0
+ */
#define CFS_FAIL_CHECK_RESET(id, value) \
cfs_fail_check_set(id, value, CFS_FAIL_LOC_RESET, 0)
#define CFS_FAIL_CHECK_RESET_QUIET(id, value) \
cfs_fail_check_set(id, value, CFS_FAIL_LOC_RESET, 1)
-static inline int cfs_fail_timeout_set(__u32 id, __u32 value, int ms, int set)
+static inline int cfs_fail_timeout_set(u32 id, u32 value, int ms, int set)
{
if (unlikely(CFS_FAIL_PRECHECK(id)))
return __cfs_fail_timeout_set(id, value, ms, set);
@@ -138,8 +144,10 @@ static inline int cfs_fail_timeout_set(__u32 id, __u32 value, int ms, int set)
#define CFS_FAIL_TIMEOUT_MS(id, ms) \
cfs_fail_timeout_set(id, 0, ms, CFS_FAIL_LOC_NOSET)
-/* If id hit cfs_fail_loc, cfs_fail_loc |= value and
- * sleep seconds or milliseconds */
+/*
+ * If id hit cfs_fail_loc, cfs_fail_loc |= value and
+ * sleep seconds or milliseconds
+ */
#define CFS_FAIL_TIMEOUT_ORSET(id, value, secs) \
cfs_fail_timeout_set(id, value, secs * 1000, CFS_FAIL_LOC_ORSET)
@@ -152,13 +160,14 @@ static inline int cfs_fail_timeout_set(__u32 id, __u32 value, int ms, int set)
#define CFS_FAULT_CHECK(id) \
CFS_FAIL_CHECK(CFS_FAULT | (id))
-/* The idea here is to synchronise two threads to force a race. The
+/*
+ * The idea here is to synchronise two threads to force a race. The
* first thread that calls this with a matching fail_loc is put to
* sleep. The next thread that calls with the same fail_loc wakes up
- * the first and continues. */
-static inline void cfs_race(__u32 id)
+ * the first and continues.
+ */
+static inline void cfs_race(u32 id)
{
-
if (CFS_FAIL_PRECHECK(id)) {
if (unlikely(__cfs_fail_check_set(id, 0, CFS_FAIL_LOC_NOSET))) {
int rc;
@@ -166,7 +175,7 @@ static inline void cfs_race(__u32 id)
cfs_race_state = 0;
CERROR("cfs_race id %x sleeping\n", id);
rc = wait_event_interruptible(cfs_race_waitq,
- cfs_race_state != 0);
+ !!cfs_race_state);
CERROR("cfs_fail_race id %x awake, rc=%d\n", id, rc);
} else {
CERROR("cfs_fail_race id %x waking\n", id);
diff --git a/drivers/staging/lustre/include/linux/libcfs/libcfs_hash.h b/drivers/staging/lustre/include/linux/libcfs/libcfs_hash.h
index 6949a1846635..0cc2fc465c1a 100644
--- a/drivers/staging/lustre/include/linux/libcfs/libcfs_hash.h
+++ b/drivers/staging/lustre/include/linux/libcfs/libcfs_hash.h
@@ -57,8 +57,10 @@
/** disable debug */
#define CFS_HASH_DEBUG_NONE 0
-/** record hash depth and output to console when it's too deep,
- * computing overhead is low but consume more memory */
+/*
+ * record hash depth and output to console when it's too deep,
+ * computing overhead is low but consume more memory
+ */
#define CFS_HASH_DEBUG_1 1
/** expensive, check key validation */
#define CFS_HASH_DEBUG_2 2
@@ -87,8 +89,8 @@ union cfs_hash_lock {
*/
struct cfs_hash_bucket {
union cfs_hash_lock hsb_lock; /**< bucket lock */
- __u32 hsb_count; /**< current entries */
- __u32 hsb_version; /**< change version */
+ u32 hsb_count; /**< current entries */
+ u32 hsb_version; /**< change version */
unsigned int hsb_index; /**< index of bucket */
int hsb_depmax; /**< max depth on bucket */
long hsb_head[0]; /**< hash-head array */
@@ -123,38 +125,40 @@ enum cfs_hash_tag {
* . Some functions will be disabled with this flag, i.e:
* cfs_hash_for_each_empty, cfs_hash_rehash
*/
- CFS_HASH_NO_LOCK = 1 << 0,
+ CFS_HASH_NO_LOCK = BIT(0),
/** no bucket lock, use one spinlock to protect the whole hash */
- CFS_HASH_NO_BKTLOCK = 1 << 1,
+ CFS_HASH_NO_BKTLOCK = BIT(1),
/** rwlock to protect bucket */
- CFS_HASH_RW_BKTLOCK = 1 << 2,
+ CFS_HASH_RW_BKTLOCK = BIT(2),
/** spinlock to protect bucket */
- CFS_HASH_SPIN_BKTLOCK = 1 << 3,
+ CFS_HASH_SPIN_BKTLOCK = BIT(3),
/** always add new item to tail */
- CFS_HASH_ADD_TAIL = 1 << 4,
+ CFS_HASH_ADD_TAIL = BIT(4),
/** hash-table doesn't have refcount on item */
- CFS_HASH_NO_ITEMREF = 1 << 5,
+ CFS_HASH_NO_ITEMREF = BIT(5),
/** big name for param-tree */
- CFS_HASH_BIGNAME = 1 << 6,
+ CFS_HASH_BIGNAME = BIT(6),
/** track global count */
- CFS_HASH_COUNTER = 1 << 7,
+ CFS_HASH_COUNTER = BIT(7),
/** rehash item by new key */
- CFS_HASH_REHASH_KEY = 1 << 8,
+ CFS_HASH_REHASH_KEY = BIT(8),
/** Enable dynamic hash resizing */
- CFS_HASH_REHASH = 1 << 9,
+ CFS_HASH_REHASH = BIT(9),
/** can shrink hash-size */
- CFS_HASH_SHRINK = 1 << 10,
+ CFS_HASH_SHRINK = BIT(10),
/** assert hash is empty on exit */
- CFS_HASH_ASSERT_EMPTY = 1 << 11,
+ CFS_HASH_ASSERT_EMPTY = BIT(11),
/** record hlist depth */
- CFS_HASH_DEPTH = 1 << 12,
+ CFS_HASH_DEPTH = BIT(12),
/**
* rehash is always scheduled in a different thread, so current
* change on hash table is non-blocking
*/
- CFS_HASH_NBLK_CHANGE = 1 << 13,
- /** NB, we typed hs_flags as __u16, please change it
- * if you need to extend >=16 flags */
+ CFS_HASH_NBLK_CHANGE = BIT(13),
+ /**
+ * NB, we typed hs_flags as u16, please change it
+ * if you need to extend >=16 flags
+ */
};
/** most used attributes */
@@ -201,8 +205,10 @@ enum cfs_hash_tag {
*/
struct cfs_hash {
- /** serialize with rehash, or serialize all operations if
- * the hash-table has CFS_HASH_NO_BKTLOCK */
+ /**
+ * serialize with rehash, or serialize all operations if
+ * the hash-table has CFS_HASH_NO_BKTLOCK
+ */
union cfs_hash_lock hs_lock;
/** hash operations */
struct cfs_hash_ops *hs_ops;
@@ -215,31 +221,31 @@ struct cfs_hash {
/** total number of items on this hash-table */
atomic_t hs_count;
/** hash flags, see cfs_hash_tag for detail */
- __u16 hs_flags;
+ u16 hs_flags;
/** # of extra-bytes for bucket, for user saving extended attributes */
- __u16 hs_extra_bytes;
+ u16 hs_extra_bytes;
/** wants to iterate */
- __u8 hs_iterating;
+ u8 hs_iterating;
/** hash-table is dying */
- __u8 hs_exiting;
+ u8 hs_exiting;
/** current hash bits */
- __u8 hs_cur_bits;
+ u8 hs_cur_bits;
/** min hash bits */
- __u8 hs_min_bits;
+ u8 hs_min_bits;
/** max hash bits */
- __u8 hs_max_bits;
+ u8 hs_max_bits;
/** bits for rehash */
- __u8 hs_rehash_bits;
+ u8 hs_rehash_bits;
/** bits for each bucket */
- __u8 hs_bkt_bits;
+ u8 hs_bkt_bits;
/** resize min threshold */
- __u16 hs_min_theta;
+ u16 hs_min_theta;
/** resize max threshold */
- __u16 hs_max_theta;
+ u16 hs_max_theta;
/** resize count */
- __u32 hs_rehash_count;
+ u32 hs_rehash_count;
/** # of iterators (caller of cfs_hash_for_each_*) */
- __u32 hs_iterators;
+ u32 hs_iterators;
/** rehash workitem */
struct cfs_workitem hs_rehash_wi;
/** refcount on this hash table */
@@ -291,8 +297,8 @@ struct cfs_hash_hlist_ops {
struct cfs_hash_ops {
/** return hashed value from @key */
- unsigned (*hs_hash)(struct cfs_hash *hs, const void *key,
- unsigned mask);
+ unsigned int (*hs_hash)(struct cfs_hash *hs, const void *key,
+ unsigned int mask);
/** return key address of @hnode */
void * (*hs_key)(struct hlist_node *hnode);
/** copy key from @hnode to @key */
@@ -317,110 +323,112 @@ struct cfs_hash_ops {
/** total number of buckets in @hs */
#define CFS_HASH_NBKT(hs) \
- (1U << ((hs)->hs_cur_bits - (hs)->hs_bkt_bits))
+ BIT((hs)->hs_cur_bits - (hs)->hs_bkt_bits)
/** total number of buckets in @hs while rehashing */
#define CFS_HASH_RH_NBKT(hs) \
- (1U << ((hs)->hs_rehash_bits - (hs)->hs_bkt_bits))
+ BIT((hs)->hs_rehash_bits - (hs)->hs_bkt_bits)
/** number of hlist for in bucket */
-#define CFS_HASH_BKT_NHLIST(hs) (1U << (hs)->hs_bkt_bits)
+#define CFS_HASH_BKT_NHLIST(hs) BIT((hs)->hs_bkt_bits)
/** total number of hlist in @hs */
-#define CFS_HASH_NHLIST(hs) (1U << (hs)->hs_cur_bits)
+#define CFS_HASH_NHLIST(hs) BIT((hs)->hs_cur_bits)
/** total number of hlist in @hs while rehashing */
-#define CFS_HASH_RH_NHLIST(hs) (1U << (hs)->hs_rehash_bits)
+#define CFS_HASH_RH_NHLIST(hs) BIT((hs)->hs_rehash_bits)
static inline int
cfs_hash_with_no_lock(struct cfs_hash *hs)
{
/* caller will serialize all operations for this hash-table */
- return (hs->hs_flags & CFS_HASH_NO_LOCK) != 0;
+ return hs->hs_flags & CFS_HASH_NO_LOCK;
}
static inline int
cfs_hash_with_no_bktlock(struct cfs_hash *hs)
{
/* no bucket lock, one single lock to protect the hash-table */
- return (hs->hs_flags & CFS_HASH_NO_BKTLOCK) != 0;
+ return hs->hs_flags & CFS_HASH_NO_BKTLOCK;
}
static inline int
cfs_hash_with_rw_bktlock(struct cfs_hash *hs)
{
/* rwlock to protect hash bucket */
- return (hs->hs_flags & CFS_HASH_RW_BKTLOCK) != 0;
+ return hs->hs_flags & CFS_HASH_RW_BKTLOCK;
}
static inline int
cfs_hash_with_spin_bktlock(struct cfs_hash *hs)
{
/* spinlock to protect hash bucket */
- return (hs->hs_flags & CFS_HASH_SPIN_BKTLOCK) != 0;
+ return hs->hs_flags & CFS_HASH_SPIN_BKTLOCK;
}
static inline int
cfs_hash_with_add_tail(struct cfs_hash *hs)
{
- return (hs->hs_flags & CFS_HASH_ADD_TAIL) != 0;
+ return hs->hs_flags & CFS_HASH_ADD_TAIL;
}
static inline int
cfs_hash_with_no_itemref(struct cfs_hash *hs)
{
- /* hash-table doesn't keep refcount on item,
+ /*
+ * hash-table doesn't keep refcount on item,
* item can't be removed from hash unless it's
- * ZERO refcount */
- return (hs->hs_flags & CFS_HASH_NO_ITEMREF) != 0;
+ * ZERO refcount
+ */
+ return hs->hs_flags & CFS_HASH_NO_ITEMREF;
}
static inline int
cfs_hash_with_bigname(struct cfs_hash *hs)
{
- return (hs->hs_flags & CFS_HASH_BIGNAME) != 0;
+ return hs->hs_flags & CFS_HASH_BIGNAME;
}
static inline int
cfs_hash_with_counter(struct cfs_hash *hs)
{
- return (hs->hs_flags & CFS_HASH_COUNTER) != 0;
+ return hs->hs_flags & CFS_HASH_COUNTER;
}
static inline int
cfs_hash_with_rehash(struct cfs_hash *hs)
{
- return (hs->hs_flags & CFS_HASH_REHASH) != 0;
+ return hs->hs_flags & CFS_HASH_REHASH;
}
static inline int
cfs_hash_with_rehash_key(struct cfs_hash *hs)
{
- return (hs->hs_flags & CFS_HASH_REHASH_KEY) != 0;
+ return hs->hs_flags & CFS_HASH_REHASH_KEY;
}
static inline int
cfs_hash_with_shrink(struct cfs_hash *hs)
{
- return (hs->hs_flags & CFS_HASH_SHRINK) != 0;
+ return hs->hs_flags & CFS_HASH_SHRINK;
}
static inline int
cfs_hash_with_assert_empty(struct cfs_hash *hs)
{
- return (hs->hs_flags & CFS_HASH_ASSERT_EMPTY) != 0;
+ return hs->hs_flags & CFS_HASH_ASSERT_EMPTY;
}
static inline int
cfs_hash_with_depth(struct cfs_hash *hs)
{
- return (hs->hs_flags & CFS_HASH_DEPTH) != 0;
+ return hs->hs_flags & CFS_HASH_DEPTH;
}
static inline int
cfs_hash_with_nblk_change(struct cfs_hash *hs)
{
- return (hs->hs_flags & CFS_HASH_NBLK_CHANGE) != 0;
+ return hs->hs_flags & CFS_HASH_NBLK_CHANGE;
}
static inline int
@@ -434,14 +442,14 @@ static inline int
cfs_hash_is_rehashing(struct cfs_hash *hs)
{
/* rehash is launched */
- return hs->hs_rehash_bits != 0;
+ return !!hs->hs_rehash_bits;
}
static inline int
cfs_hash_is_iterating(struct cfs_hash *hs)
{
/* someone is calling cfs_hash_for_each_* */
- return hs->hs_iterating || hs->hs_iterators != 0;
+ return hs->hs_iterating || hs->hs_iterators;
}
static inline int
@@ -453,7 +461,7 @@ cfs_hash_bkt_size(struct cfs_hash *hs)
}
static inline unsigned
-cfs_hash_id(struct cfs_hash *hs, const void *key, unsigned mask)
+cfs_hash_id(struct cfs_hash *hs, const void *key, unsigned int mask)
{
return hs->hs_ops->hs_hash(hs, key, mask);
}
@@ -562,7 +570,7 @@ cfs_hash_bd_index_get(struct cfs_hash *hs, struct cfs_hash_bd *bd)
}
static inline void
-cfs_hash_bd_index_set(struct cfs_hash *hs, unsigned index,
+cfs_hash_bd_index_set(struct cfs_hash *hs, unsigned int index,
struct cfs_hash_bd *bd)
{
bd->bd_bucket = hs->hs_buckets[index >> hs->hs_bkt_bits];
@@ -576,14 +584,14 @@ cfs_hash_bd_extra_get(struct cfs_hash *hs, struct cfs_hash_bd *bd)
cfs_hash_bkt_size(hs) - hs->hs_extra_bytes;
}
-static inline __u32
+static inline u32
cfs_hash_bd_version_get(struct cfs_hash_bd *bd)
{
/* need hold cfs_hash_bd_lock */
return bd->bd_bucket->hsb_version;
}
-static inline __u32
+static inline u32
cfs_hash_bd_count_get(struct cfs_hash_bd *bd)
{
/* need hold cfs_hash_bd_lock */
@@ -669,10 +677,10 @@ cfs_hash_dual_bd_finddel_locked(struct cfs_hash *hs, struct cfs_hash_bd *bds,
/* Hash init/cleanup functions */
struct cfs_hash *
-cfs_hash_create(char *name, unsigned cur_bits, unsigned max_bits,
- unsigned bkt_bits, unsigned extra_bytes,
- unsigned min_theta, unsigned max_theta,
- struct cfs_hash_ops *ops, unsigned flags);
+cfs_hash_create(char *name, unsigned int cur_bits, unsigned int max_bits,
+ unsigned int bkt_bits, unsigned int extra_bytes,
+ unsigned int min_theta, unsigned int max_theta,
+ struct cfs_hash_ops *ops, unsigned int flags);
struct cfs_hash *cfs_hash_getref(struct cfs_hash *hs);
void cfs_hash_putref(struct cfs_hash *hs);
@@ -700,27 +708,28 @@ typedef int (*cfs_hash_for_each_cb_t)(struct cfs_hash *hs,
void *
cfs_hash_lookup(struct cfs_hash *hs, const void *key);
void
-cfs_hash_for_each(struct cfs_hash *hs, cfs_hash_for_each_cb_t, void *data);
+cfs_hash_for_each(struct cfs_hash *hs, cfs_hash_for_each_cb_t cb, void *data);
void
-cfs_hash_for_each_safe(struct cfs_hash *hs, cfs_hash_for_each_cb_t, void *data);
+cfs_hash_for_each_safe(struct cfs_hash *hs, cfs_hash_for_each_cb_t cb,
+ void *data);
int
-cfs_hash_for_each_nolock(struct cfs_hash *hs, cfs_hash_for_each_cb_t,
- void *data);
+cfs_hash_for_each_nolock(struct cfs_hash *hs, cfs_hash_for_each_cb_t cb,
+ void *data, int start);
int
-cfs_hash_for_each_empty(struct cfs_hash *hs, cfs_hash_for_each_cb_t,
+cfs_hash_for_each_empty(struct cfs_hash *hs, cfs_hash_for_each_cb_t cb,
void *data);
void
cfs_hash_for_each_key(struct cfs_hash *hs, const void *key,
- cfs_hash_for_each_cb_t, void *data);
+ cfs_hash_for_each_cb_t cb, void *data);
typedef int (*cfs_hash_cond_opt_cb_t)(void *obj, void *data);
void
-cfs_hash_cond_del(struct cfs_hash *hs, cfs_hash_cond_opt_cb_t, void *data);
+cfs_hash_cond_del(struct cfs_hash *hs, cfs_hash_cond_opt_cb_t cb, void *data);
void
-cfs_hash_hlist_for_each(struct cfs_hash *hs, unsigned hindex,
- cfs_hash_for_each_cb_t, void *data);
+cfs_hash_hlist_for_each(struct cfs_hash *hs, unsigned int hindex,
+ cfs_hash_for_each_cb_t cb, void *data);
int cfs_hash_is_empty(struct cfs_hash *hs);
-__u64 cfs_hash_size_get(struct cfs_hash *hs);
+u64 cfs_hash_size_get(struct cfs_hash *hs);
/*
* Rehash - Theta is calculated to be the average chained
@@ -766,8 +775,8 @@ cfs_hash_bucket_validate(struct cfs_hash *hs, struct cfs_hash_bd *bd,
#endif /* CFS_HASH_DEBUG_LEVEL */
#define CFS_HASH_THETA_BITS 10
-#define CFS_HASH_MIN_THETA (1U << (CFS_HASH_THETA_BITS - 1))
-#define CFS_HASH_MAX_THETA (1U << (CFS_HASH_THETA_BITS + 1))
+#define CFS_HASH_MIN_THETA BIT(CFS_HASH_THETA_BITS - 1)
+#define CFS_HASH_MAX_THETA BIT(CFS_HASH_THETA_BITS + 1)
/* Return integer component of theta */
static inline int __cfs_hash_theta_int(int theta)
@@ -792,8 +801,8 @@ static inline void
__cfs_hash_set_theta(struct cfs_hash *hs, int min, int max)
{
LASSERT(min < max);
- hs->hs_min_theta = (__u16)min;
- hs->hs_max_theta = (__u16)max;
+ hs->hs_min_theta = (u16)min;
+ hs->hs_max_theta = (u16)max;
}
/* Generic debug formatting routines mainly for proc handler */
@@ -805,11 +814,11 @@ void cfs_hash_debug_str(struct cfs_hash *hs, struct seq_file *m);
* Generic djb2 hash algorithm for character arrays.
*/
static inline unsigned
-cfs_hash_djb2_hash(const void *key, size_t size, unsigned mask)
+cfs_hash_djb2_hash(const void *key, size_t size, unsigned int mask)
{
- unsigned i, hash = 5381;
+ unsigned int i, hash = 5381;
- LASSERT(key != NULL);
+ LASSERT(key);
for (i = 0; i < size; i++)
hash = hash * 33 + ((char *)key)[i];
@@ -821,7 +830,7 @@ cfs_hash_djb2_hash(const void *key, size_t size, unsigned mask)
* Generic u32 hash algorithm.
*/
static inline unsigned
-cfs_hash_u32_hash(const __u32 key, unsigned mask)
+cfs_hash_u32_hash(const u32 key, unsigned int mask)
{
return ((key * CFS_GOLDEN_RATIO_PRIME_32) & mask);
}
@@ -830,9 +839,9 @@ cfs_hash_u32_hash(const __u32 key, unsigned mask)
* Generic u64 hash algorithm.
*/
static inline unsigned
-cfs_hash_u64_hash(const __u64 key, unsigned mask)
+cfs_hash_u64_hash(const u64 key, unsigned int mask)
{
- return ((unsigned)(key * CFS_GOLDEN_RATIO_PRIME_64) & mask);
+ return ((unsigned int)(key * CFS_GOLDEN_RATIO_PRIME_64) & mask);
}
/** iterate over all buckets in @bds (array of struct cfs_hash_bd) */
diff --git a/drivers/staging/lustre/include/linux/libcfs/libcfs_private.h b/drivers/staging/lustre/include/linux/libcfs/libcfs_private.h
index e0e1a5d0949d..aab15d8112a4 100644
--- a/drivers/staging/lustre/include/linux/libcfs/libcfs_private.h
+++ b/drivers/staging/lustre/include/linux/libcfs/libcfs_private.h
@@ -75,7 +75,7 @@ do { \
#define KLASSERT(e) LASSERT(e)
-void __noreturn lbug_with_loc(struct libcfs_debug_msg_data *);
+void __noreturn lbug_with_loc(struct libcfs_debug_msg_data *msg);
#define LBUG() \
do { \
@@ -96,7 +96,7 @@ do { \
#define LIBCFS_ALLOC_POST(ptr, size) \
do { \
- if (unlikely((ptr) == NULL)) { \
+ if (unlikely(!(ptr))) { \
CERROR("LNET: out of memory at %s:%d (tried to alloc '" \
#ptr "' = %d)\n", __FILE__, __LINE__, (int)(size)); \
} else { \
@@ -147,7 +147,7 @@ do { \
#define LIBCFS_FREE(ptr, size) \
do { \
- if (unlikely((ptr) == NULL)) { \
+ if (unlikely(!(ptr))) { \
CERROR("LIBCFS: free NULL '" #ptr "' (%d bytes) at " \
"%s:%d\n", (int)(size), __FILE__, __LINE__); \
break; \
@@ -169,8 +169,6 @@ do { \
#define ntohs(x) ___ntohs(x)
#endif
-void libcfs_run_upcall(char **argv);
-void libcfs_run_lbug_upcall(struct libcfs_debug_msg_data *);
void libcfs_debug_dumplog(void);
int libcfs_debug_init(unsigned long bufsize);
int libcfs_debug_cleanup(void);
@@ -280,7 +278,7 @@ do { \
#define CFS_FREE_PTR(ptr) LIBCFS_FREE(ptr, sizeof(*(ptr)))
/** Compile-time assertion.
-
+ *
* Check an invariant described by a constant expression at compile time by
* forcing a compiler error if it does not hold. \a cond must be a constant
* expression as defined by the ISO C Standard:
@@ -306,7 +304,8 @@ do { \
/* --------------------------------------------------------------------
* Light-weight trace
* Support for temporary event tracing with minimal Heisenberg effect.
- * -------------------------------------------------------------------- */
+ * --------------------------------------------------------------------
+ */
#define MKSTR(ptr) ((ptr)) ? (ptr) : ""
diff --git a/drivers/staging/lustre/include/linux/libcfs/libcfs_string.h b/drivers/staging/lustre/include/linux/libcfs/libcfs_string.h
index 0ee60ff336f2..41795d9b3b9b 100644
--- a/drivers/staging/lustre/include/linux/libcfs/libcfs_string.h
+++ b/drivers/staging/lustre/include/linux/libcfs/libcfs_string.h
@@ -62,9 +62,9 @@ struct cfs_range_expr {
* Link to cfs_expr_list::el_exprs.
*/
struct list_head re_link;
- __u32 re_lo;
- __u32 re_hi;
- __u32 re_stride;
+ u32 re_lo;
+ u32 re_hi;
+ u32 re_stride;
};
struct cfs_expr_list {
@@ -74,24 +74,26 @@ struct cfs_expr_list {
char *cfs_trimwhite(char *str);
int cfs_gettok(struct cfs_lstr *next, char delim, struct cfs_lstr *res);
-int cfs_str2num_check(char *str, int nob, unsigned *num,
- unsigned min, unsigned max);
-int cfs_expr_list_match(__u32 value, struct cfs_expr_list *expr_list);
+int cfs_str2num_check(char *str, int nob, unsigned int *num,
+ unsigned int min, unsigned int max);
+int cfs_expr_list_match(u32 value, struct cfs_expr_list *expr_list);
int cfs_expr_list_print(char *buffer, int count,
struct cfs_expr_list *expr_list);
int cfs_expr_list_values(struct cfs_expr_list *expr_list,
- int max, __u32 **values);
+ int max, u32 **values);
static inline void
-cfs_expr_list_values_free(__u32 *values, int num)
+cfs_expr_list_values_free(u32 *values, int num)
{
- /* This array is allocated by LIBCFS_ALLOC(), so it shouldn't be freed
+ /*
+ * This array is allocated by LIBCFS_ALLOC(), so it shouldn't be freed
* by OBD_FREE() if it's called by module other than libcfs & LNet,
- * otherwise we will see fake memory leak */
+ * otherwise we will see fake memory leak
+ */
LIBCFS_FREE(values, num * sizeof(values[0]));
}
void cfs_expr_list_free(struct cfs_expr_list *expr_list);
-int cfs_expr_list_parse(char *str, int len, unsigned min, unsigned max,
+int cfs_expr_list_parse(char *str, int len, unsigned int min, unsigned int max,
struct cfs_expr_list **elpp);
void cfs_expr_list_free_list(struct list_head *list);
diff --git a/drivers/staging/lustre/include/linux/libcfs/libcfs_workitem.h b/drivers/staging/lustre/include/linux/libcfs/libcfs_workitem.h
index a7e1340e69a1..2accd9a85472 100644
--- a/drivers/staging/lustre/include/linux/libcfs/libcfs_workitem.h
+++ b/drivers/staging/lustre/include/linux/libcfs/libcfs_workitem.h
@@ -62,9 +62,9 @@
struct cfs_wi_sched;
-void cfs_wi_sched_destroy(struct cfs_wi_sched *);
+void cfs_wi_sched_destroy(struct cfs_wi_sched *sched);
int cfs_wi_sched_create(char *name, struct cfs_cpt_table *cptab, int cpt,
- int nthrs, struct cfs_wi_sched **);
+ int nthrs, struct cfs_wi_sched **sched_pp);
struct cfs_workitem;
diff --git a/drivers/staging/lustre/include/linux/libcfs/linux/linux-cpu.h b/drivers/staging/lustre/include/linux/libcfs/linux/linux-cpu.h
index f63cb47bc309..dd0cd0442b86 100644
--- a/drivers/staging/lustre/include/linux/libcfs/linux/linux-cpu.h
+++ b/drivers/staging/lustre/include/linux/libcfs/linux/linux-cpu.h
@@ -52,17 +52,17 @@ struct cfs_cpu_partition {
/* nodes mask for this partition */
nodemask_t *cpt_nodemask;
/* spread rotor for NUMA allocator */
- unsigned cpt_spread_rotor;
+ unsigned int cpt_spread_rotor;
};
/** descriptor for CPU partitions */
struct cfs_cpt_table {
/* version, reserved for hotplug */
- unsigned ctb_version;
+ unsigned int ctb_version;
/* spread rotor for NUMA allocator */
- unsigned ctb_spread_rotor;
+ unsigned int ctb_spread_rotor;
/* # of CPU partitions */
- unsigned ctb_nparts;
+ unsigned int ctb_nparts;
/* partitions tables */
struct cfs_cpu_partition *ctb_parts;
/* shadow HW CPU to CPU partition ID */
diff --git a/drivers/staging/lustre/include/linux/libcfs/linux/linux-time.h b/drivers/staging/lustre/include/linux/libcfs/linux/linux-time.h
index b646acd1f7e7..709e1ce98d8d 100644
--- a/drivers/staging/lustre/include/linux/libcfs/linux/linux-time.h
+++ b/drivers/staging/lustre/include/linux/libcfs/linux/linux-time.h
@@ -76,23 +76,23 @@ static inline long cfs_duration_sec(long d)
#define cfs_time_current_64 get_jiffies_64
-static inline __u64 cfs_time_add_64(__u64 t, __u64 d)
+static inline u64 cfs_time_add_64(u64 t, u64 d)
{
return t + d;
}
-static inline __u64 cfs_time_shift_64(int seconds)
+static inline u64 cfs_time_shift_64(int seconds)
{
return cfs_time_add_64(cfs_time_current_64(),
cfs_time_seconds(seconds));
}
-static inline int cfs_time_before_64(__u64 t1, __u64 t2)
+static inline int cfs_time_before_64(u64 t1, u64 t2)
{
return (__s64)t2 - (__s64)t1 > 0;
}
-static inline int cfs_time_beforeq_64(__u64 t1, __u64 t2)
+static inline int cfs_time_beforeq_64(u64 t1, u64 t2)
{
return (__s64)t2 - (__s64)t1 >= 0;
}
diff --git a/drivers/staging/lustre/include/linux/lnet/lnetst.h b/drivers/staging/lustre/include/linux/lnet/lnetst.h
index 417044552d3f..8a84888635ff 100644
--- a/drivers/staging/lustre/include/linux/lnet/lnetst.h
+++ b/drivers/staging/lustre/include/linux/lnet/lnetst.h
@@ -244,7 +244,7 @@ typedef struct {
int lstio_ses_timeout; /* IN: session timeout */
int lstio_ses_force; /* IN: force create ? */
/** IN: session features */
- unsigned lstio_ses_feats;
+ unsigned int lstio_ses_feats;
lst_sid_t __user *lstio_ses_idp; /* OUT: session id */
int lstio_ses_nmlen; /* IN: name length */
char __user *lstio_ses_namep; /* IN: session name */
@@ -255,7 +255,7 @@ typedef struct {
lst_sid_t __user *lstio_ses_idp; /* OUT: session id */
int __user *lstio_ses_keyp; /* OUT: local key */
/** OUT: session features */
- unsigned __user *lstio_ses_featp;
+ unsigned int __user *lstio_ses_featp;
lstcon_ndlist_ent_t __user *lstio_ses_ndinfo; /* OUT: */
int lstio_ses_nmlen; /* IN: name length */
char __user *lstio_ses_namep; /* OUT: session name */
@@ -328,7 +328,7 @@ typedef struct {
char __user *lstio_grp_namep; /* IN: group name */
int lstio_grp_count; /* IN: # of nodes */
/** OUT: session features */
- unsigned __user *lstio_grp_featp;
+ unsigned int __user *lstio_grp_featp;
lnet_process_id_t __user *lstio_grp_idsp; /* IN: nodes */
struct list_head __user *lstio_grp_resultp; /* OUT: list head of
result buffer */
@@ -490,6 +490,8 @@ typedef struct {
int blk_size; /* size (bytes) */
int blk_time; /* time of running the test*/
int blk_flags; /* reserved flags */
+ int blk_cli_off; /* bulk offset on client */
+ int blk_srv_off; /* reserved: bulk offset on server */
} lst_test_bulk_param_t;
typedef struct {
diff --git a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.c b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.c
index 9e8802181452..7f761b327166 100644
--- a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.c
+++ b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.c
@@ -1489,7 +1489,7 @@ out_fpo:
static void kiblnd_fail_fmr_poolset(struct kib_fmr_poolset *fps,
struct list_head *zombies)
{
- if (!fps->fps_net) /* intialized? */
+ if (!fps->fps_net) /* initialized? */
return;
spin_lock(&fps->fps_lock);
@@ -1637,7 +1637,7 @@ int kiblnd_fmr_pool_map(struct kib_fmr_poolset *fps, struct kib_tx *tx,
{
__u64 *pages = tx->tx_pages;
bool is_rx = (rd != tx->tx_rd);
- bool tx_pages_mapped = 0;
+ bool tx_pages_mapped = false;
struct kib_fmr_pool *fpo;
int npages = 0;
__u64 version;
@@ -1812,7 +1812,7 @@ static void kiblnd_destroy_pool_list(struct list_head *head)
static void kiblnd_fail_poolset(struct kib_poolset *ps, struct list_head *zombies)
{
- if (!ps->ps_net) /* intialized? */
+ if (!ps->ps_net) /* initialized? */
return;
spin_lock(&ps->ps_lock);
diff --git a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c
index b27de8888149..c7917abf9944 100644
--- a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c
+++ b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c
@@ -1912,12 +1912,12 @@ kiblnd_close_conn_locked(struct kib_conn *conn, int error)
libcfs_nid2str(peer->ibp_nid));
} else {
CNETERR("Closing conn to %s: error %d%s%s%s%s%s\n",
- libcfs_nid2str(peer->ibp_nid), error,
- list_empty(&conn->ibc_tx_queue) ? "" : "(sending)",
- list_empty(&conn->ibc_tx_noops) ? "" : "(sending_noops)",
- list_empty(&conn->ibc_tx_queue_rsrvd) ? "" : "(sending_rsrvd)",
- list_empty(&conn->ibc_tx_queue_nocred) ? "" : "(sending_nocred)",
- list_empty(&conn->ibc_active_txs) ? "" : "(waiting)");
+ libcfs_nid2str(peer->ibp_nid), error,
+ list_empty(&conn->ibc_tx_queue) ? "" : "(sending)",
+ list_empty(&conn->ibc_tx_noops) ? "" : "(sending_noops)",
+ list_empty(&conn->ibc_tx_queue_rsrvd) ? "" : "(sending_rsrvd)",
+ list_empty(&conn->ibc_tx_queue_nocred) ? "" : "(sending_nocred)",
+ list_empty(&conn->ibc_active_txs) ? "" : "(waiting)");
}
dev = ((struct kib_net *)peer->ibp_ni->ni_data)->ibn_dev;
@@ -2643,7 +2643,7 @@ kiblnd_check_reconnect(struct kib_conn *conn, int version,
if (incarnation)
peer->ibp_incarnation = incarnation;
out:
- write_unlock_irqrestore(glock, flags);
+ write_unlock_irqrestore(glock, flags);
CNETERR("%s: %s (%s), %x, %x, msg_size: %d, queue_depth: %d/%d, max_frags: %d/%d\n",
libcfs_nid2str(peer->ibp_nid),
@@ -2651,7 +2651,7 @@ out:
reason, IBLND_MSG_VERSION, version, msg_size,
conn->ibc_queue_depth, queue_dep,
conn->ibc_max_frags, frag_num);
- /**
+ /**
* if conn::ibc_reconnect is TRUE, connd will reconnect to the peer
* while destroying the zombie
*/
@@ -2976,7 +2976,7 @@ kiblnd_cm_callback(struct rdma_cm_id *cmid, struct rdma_cm_event *event)
case RDMA_CM_EVENT_ADDR_ERROR:
peer = (struct kib_peer *)cmid->context;
CNETERR("%s: ADDR ERROR %d\n",
- libcfs_nid2str(peer->ibp_nid), event->status);
+ libcfs_nid2str(peer->ibp_nid), event->status);
kiblnd_peer_connect_failed(peer, 1, -EHOSTUNREACH);
kiblnd_peer_decref(peer);
return -EHOSTUNREACH; /* rc destroys cmid */
@@ -3021,7 +3021,7 @@ kiblnd_cm_callback(struct rdma_cm_id *cmid, struct rdma_cm_event *event)
return kiblnd_active_connect(cmid);
CNETERR("Can't resolve route for %s: %d\n",
- libcfs_nid2str(peer->ibp_nid), event->status);
+ libcfs_nid2str(peer->ibp_nid), event->status);
kiblnd_peer_connect_failed(peer, 1, event->status);
kiblnd_peer_decref(peer);
return event->status; /* rc destroys cmid */
@@ -3031,7 +3031,7 @@ kiblnd_cm_callback(struct rdma_cm_id *cmid, struct rdma_cm_event *event)
LASSERT(conn->ibc_state == IBLND_CONN_ACTIVE_CONNECT ||
conn->ibc_state == IBLND_CONN_PASSIVE_WAIT);
CNETERR("%s: UNREACHABLE %d\n",
- libcfs_nid2str(conn->ibc_peer->ibp_nid), event->status);
+ libcfs_nid2str(conn->ibc_peer->ibp_nid), event->status);
kiblnd_connreq_done(conn, -ENETDOWN);
kiblnd_conn_decref(conn);
return 0;
@@ -3269,14 +3269,14 @@ kiblnd_disconnect_conn(struct kib_conn *conn)
#define KIB_RECONN_HIGH_RACE 10
/**
* Allow connd to take a break and handle other things after consecutive
- * reconnection attemps.
+ * reconnection attempts.
*/
#define KIB_RECONN_BREAK 100
int
kiblnd_connd(void *arg)
{
- spinlock_t *lock= &kiblnd_data.kib_connd_lock;
+ spinlock_t *lock = &kiblnd_data.kib_connd_lock;
wait_queue_t wait;
unsigned long flags;
struct kib_conn *conn;
diff --git a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd.c b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd.c
index cbc9a9c5385f..b74cf635faee 100644
--- a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd.c
+++ b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd.c
@@ -96,7 +96,8 @@ ksocknal_destroy_route(struct ksock_route *route)
}
static int
-ksocknal_create_peer(struct ksock_peer **peerp, lnet_ni_t *ni, lnet_process_id_t id)
+ksocknal_create_peer(struct ksock_peer **peerp, lnet_ni_t *ni,
+ lnet_process_id_t id)
{
int cpt = lnet_cpt_of_nid(id.nid);
struct ksock_net *net = ni->ni_data;
@@ -319,7 +320,8 @@ ksocknal_get_peer_info(lnet_ni_t *ni, int index,
}
static void
-ksocknal_associate_route_conn_locked(struct ksock_route *route, struct ksock_conn *conn)
+ksocknal_associate_route_conn_locked(struct ksock_route *route,
+ struct ksock_conn *conn)
{
struct ksock_peer *peer = route->ksnr_peer;
int type = conn->ksnc_type;
@@ -821,7 +823,8 @@ ksocknal_select_ips(struct ksock_peer *peer, __u32 *peerips, int n_peerips)
if (k < peer->ksnp_n_passive_ips) /* using it already */
continue;
- k = ksocknal_match_peerip(iface, peerips, n_peerips);
+ k = ksocknal_match_peerip(iface, peerips,
+ n_peerips);
xor = ip ^ peerips[k];
this_netmatch = !(xor & iface->ksni_netmask) ? 1 : 0;
@@ -1302,8 +1305,11 @@ ksocknal_create_conn(lnet_ni_t *ni, struct ksock_route *route,
/* Take packets blocking for this connection. */
list_for_each_entry_safe(tx, txtmp, &peer->ksnp_tx_queue, tx_list) {
- if (conn->ksnc_proto->pro_match_tx(conn, tx, tx->tx_nonblk) == SOCKNAL_MATCH_NO)
- continue;
+ int match = conn->ksnc_proto->pro_match_tx(conn, tx,
+ tx->tx_nonblk);
+
+ if (match == SOCKNAL_MATCH_NO)
+ continue;
list_del(&tx->tx_list);
ksocknal_queue_tx_locked(tx, conn);
@@ -1493,8 +1499,8 @@ ksocknal_close_conn_locked(struct ksock_conn *conn, int error)
spin_unlock_bh(&conn->ksnc_scheduler->kss_lock);
}
- peer->ksnp_proto = NULL; /* renegotiate protocol version */
- peer->ksnp_error = error; /* stash last conn close reason */
+ peer->ksnp_proto = NULL; /* renegotiate protocol version */
+ peer->ksnp_error = error; /* stash last conn close reason */
if (list_empty(&peer->ksnp_routes)) {
/*
@@ -1786,7 +1792,8 @@ ksocknal_close_matching_conns(lnet_process_id_t id, __u32 ipaddr)
(id.pid == LNET_PID_ANY || id.pid == peer->ksnp_id.pid)))
continue;
- count += ksocknal_close_peer_conns_locked(peer, ipaddr, 0);
+ count += ksocknal_close_peer_conns_locked(peer, ipaddr,
+ 0);
}
}
@@ -2026,7 +2033,10 @@ ksocknal_add_interface(lnet_ni_t *ni, __u32 ipaddress, __u32 netmask)
}
rc = 0;
- /* NB only new connections will pay attention to the new interface! */
+ /*
+ * NB only new connections will pay attention to the
+ * new interface!
+ */
}
write_unlock_bh(&ksocknal_data.ksnd_global_lock);
@@ -2200,8 +2210,9 @@ ksocknal_ctl(lnet_ni_t *ni, unsigned int cmd, void *arg)
int txmem;
int rxmem;
int nagle;
- struct ksock_conn *conn = ksocknal_get_conn_by_idx(ni, data->ioc_count);
+ struct ksock_conn *conn;
+ conn = ksocknal_get_conn_by_idx(ni, data->ioc_count);
if (!conn)
return -ENOENT;
diff --git a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd.h b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd.h
index e6ca0cf52691..842c45393b38 100644
--- a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd.h
+++ b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd.h
@@ -84,7 +84,8 @@ struct ksock_sched { /* per scheduler state */
struct list_head kss_zombie_noop_txs; /* zombie noop tx list */
wait_queue_head_t kss_waitq; /* where scheduler sleeps */
int kss_nconns; /* # connections assigned to
- * this scheduler */
+ * this scheduler
+ */
struct ksock_sched_info *kss_info; /* owner of it */
};
@@ -110,15 +111,19 @@ struct ksock_interface { /* in-use interface */
struct ksock_tunables {
int *ksnd_timeout; /* "stuck" socket timeout
- * (seconds) */
+ * (seconds)
+ */
int *ksnd_nscheds; /* # scheduler threads in each
- * pool while starting */
+ * pool while starting
+ */
int *ksnd_nconnds; /* # connection daemons */
int *ksnd_nconnds_max; /* max # connection daemons */
int *ksnd_min_reconnectms; /* first connection retry after
- * (ms)... */
+ * (ms)...
+ */
int *ksnd_max_reconnectms; /* ...exponentially increasing to
- * this */
+ * this
+ */
int *ksnd_eager_ack; /* make TCP ack eagerly? */
int *ksnd_typed_conns; /* drive sockets by type? */
int *ksnd_min_bulk; /* smallest "large" message */
@@ -126,9 +131,11 @@ struct ksock_tunables {
int *ksnd_rx_buffer_size; /* socket rx buffer size */
int *ksnd_nagle; /* enable NAGLE? */
int *ksnd_round_robin; /* round robin for multiple
- * interfaces */
+ * interfaces
+ */
int *ksnd_keepalive; /* # secs for sending keepalive
- * NOOP */
+ * NOOP
+ */
int *ksnd_keepalive_idle; /* # idle secs before 1st probe
*/
int *ksnd_keepalive_count; /* # probes */
@@ -137,20 +144,26 @@ struct ksock_tunables {
int *ksnd_peertxcredits; /* # concurrent sends to 1 peer
*/
int *ksnd_peerrtrcredits; /* # per-peer router buffer
- * credits */
+ * credits
+ */
int *ksnd_peertimeout; /* seconds to consider peer dead
*/
int *ksnd_enable_csum; /* enable check sum */
int *ksnd_inject_csum_error; /* set non-zero to inject
- * checksum error */
+ * checksum error
+ */
int *ksnd_nonblk_zcack; /* always send zc-ack on
- * non-blocking connection */
+ * non-blocking connection
+ */
unsigned int *ksnd_zc_min_payload; /* minimum zero copy payload
- * size */
+ * size
+ */
int *ksnd_zc_recv; /* enable ZC receive (for
- * Chelsio TOE) */
+ * Chelsio TOE)
+ */
int *ksnd_zc_recv_min_nfrags; /* minimum # of fragments to
- * enable ZC receive */
+ * enable ZC receive
+ */
};
struct ksock_net {
@@ -174,9 +187,11 @@ struct ksock_nal_data {
int ksnd_nnets; /* # networks set up */
struct list_head ksnd_nets; /* list of nets */
rwlock_t ksnd_global_lock; /* stabilize peer/conn
- * ops */
+ * ops
+ */
struct list_head *ksnd_peers; /* hash table of all my
- * known peers */
+ * known peers
+ */
int ksnd_peer_hash_size; /* size of ksnd_peers */
int ksnd_nthreads; /* # live threads */
@@ -187,11 +202,14 @@ struct ksock_nal_data {
atomic_t ksnd_nactive_txs; /* #active txs */
struct list_head ksnd_deathrow_conns; /* conns to close:
- * reaper_lock*/
+ * reaper_lock
+ */
struct list_head ksnd_zombie_conns; /* conns to free:
- * reaper_lock */
+ * reaper_lock
+ */
struct list_head ksnd_enomem_conns; /* conns to retry:
- * reaper_lock*/
+ * reaper_lock
+ */
wait_queue_head_t ksnd_reaper_waitq; /* reaper sleeps here */
unsigned long ksnd_reaper_waketime; /* when reaper will wake
*/
@@ -201,30 +219,34 @@ struct ksock_nal_data {
int ksnd_stall_tx; /* test sluggish sender
*/
int ksnd_stall_rx; /* test sluggish
- * receiver */
-
+ * receiver
+ */
struct list_head ksnd_connd_connreqs; /* incoming connection
- * requests */
+ * requests
+ */
struct list_head ksnd_connd_routes; /* routes waiting to be
- * connected */
+ * connected
+ */
wait_queue_head_t ksnd_connd_waitq; /* connds sleep here */
int ksnd_connd_connecting; /* # connds connecting
*/
time64_t ksnd_connd_failed_stamp;/* time stamp of the
* last failed
- * connecting attempt */
+ * connecting attempt
+ */
time64_t ksnd_connd_starting_stamp;/* time stamp of the
* last starting connd
*/
- unsigned ksnd_connd_starting; /* # starting connd */
- unsigned ksnd_connd_running; /* # running connd */
+ unsigned int ksnd_connd_starting; /* # starting connd */
+ unsigned int ksnd_connd_running; /* # running connd */
spinlock_t ksnd_connd_lock; /* serialise */
struct list_head ksnd_idle_noop_txs; /* list head for freed
- * noop tx */
+ * noop tx
+ */
spinlock_t ksnd_tx_lock; /* serialise, g_lock
- * unsafe */
-
+ * unsafe
+ */
};
#define SOCKNAL_INIT_NOTHING 0
@@ -304,18 +326,21 @@ struct ksock_conn {
struct list_head ksnc_list; /* stash on peer's conn list */
struct socket *ksnc_sock; /* actual socket */
void *ksnc_saved_data_ready; /* socket's original
- * data_ready() callback */
+ * data_ready() callback
+ */
void *ksnc_saved_write_space; /* socket's original
- * write_space() callback */
+ * write_space() callback
+ */
atomic_t ksnc_conn_refcount;/* conn refcount */
atomic_t ksnc_sock_refcount;/* sock refcount */
struct ksock_sched *ksnc_scheduler; /* who schedules this connection
- */
+ */
__u32 ksnc_myipaddr; /* my IP */
__u32 ksnc_ipaddr; /* peer's IP */
int ksnc_port; /* peer's port */
signed int ksnc_type:3; /* type of connection, should be
- * signed value */
+ * signed value
+ */
unsigned int ksnc_closing:1; /* being shut down */
unsigned int ksnc_flip:1; /* flip or not, only for V2.x */
unsigned int ksnc_zc_capable:1; /* enable to ZC */
@@ -323,9 +348,11 @@ struct ksock_conn {
/* reader */
struct list_head ksnc_rx_list; /* where I enq waiting input or a
- * forwarding descriptor */
+ * forwarding descriptor
+ */
unsigned long ksnc_rx_deadline; /* when (in jiffies) receive times
- * out */
+ * out
+ */
__u8 ksnc_rx_started; /* started receiving a message */
__u8 ksnc_rx_ready; /* data ready to read */
__u8 ksnc_rx_scheduled; /* being progressed */
@@ -338,7 +365,8 @@ struct ksock_conn {
lnet_kiov_t *ksnc_rx_kiov; /* the page frags */
union ksock_rxiovspace ksnc_rx_iov_space; /* space for frag descriptors */
__u32 ksnc_rx_csum; /* partial checksum for incoming
- * data */
+ * data
+ */
void *ksnc_cookie; /* rx lnet_finalize passthru arg
*/
ksock_msg_t ksnc_msg; /* incoming message buffer:
@@ -346,14 +374,16 @@ struct ksock_conn {
* whole struct
* V1.x message is a bare
* lnet_hdr_t, it's stored in
- * ksnc_msg.ksm_u.lnetmsg */
-
+ * ksnc_msg.ksm_u.lnetmsg
+ */
/* WRITER */
struct list_head ksnc_tx_list; /* where I enq waiting for output
- * space */
+ * space
+ */
struct list_head ksnc_tx_queue; /* packets waiting to be sent */
- struct ksock_tx *ksnc_tx_carrier; /* next TX that can carry a LNet
- * message or ZC-ACK */
+ struct ksock_tx *ksnc_tx_carrier; /* next TX that can carry a LNet
+ * message or ZC-ACK
+ */
unsigned long ksnc_tx_deadline; /* when (in jiffies) tx times out
*/
int ksnc_tx_bufnob; /* send buffer marker */
@@ -361,7 +391,8 @@ struct ksock_conn {
int ksnc_tx_ready; /* write space */
int ksnc_tx_scheduled; /* being progressed */
unsigned long ksnc_tx_last_post; /* time stamp of the last posted
- * TX */
+ * TX
+ */
};
struct ksock_route {
@@ -370,20 +401,24 @@ struct ksock_route {
struct ksock_peer *ksnr_peer; /* owning peer */
atomic_t ksnr_refcount; /* # users */
unsigned long ksnr_timeout; /* when (in jiffies) reconnection
- * can happen next */
+ * can happen next
+ */
long ksnr_retry_interval; /* how long between retries */
__u32 ksnr_myipaddr; /* my IP */
__u32 ksnr_ipaddr; /* IP address to connect to */
int ksnr_port; /* port to connect to */
unsigned int ksnr_scheduled:1; /* scheduled for attention */
unsigned int ksnr_connecting:1; /* connection establishment in
- * progress */
+ * progress
+ */
unsigned int ksnr_connected:4; /* connections established by
- * type */
+ * type
+ */
unsigned int ksnr_deleted:1; /* been removed from peer? */
unsigned int ksnr_share_count; /* created explicitly? */
int ksnr_conn_count; /* # conns established by this
- * route */
+ * route
+ */
};
#define SOCKNAL_KEEPALIVE_PING 1 /* cookie for keepalive ping */
@@ -391,7 +426,8 @@ struct ksock_route {
struct ksock_peer {
struct list_head ksnp_list; /* stash on global peer list */
unsigned long ksnp_last_alive; /* when (in jiffies) I was last
- * alive */
+ * alive
+ */
lnet_process_id_t ksnp_id; /* who's on the other end(s) */
atomic_t ksnp_refcount; /* # users */
int ksnp_sharecount; /* lconf usage counter */
@@ -408,7 +444,8 @@ struct ksock_peer {
struct list_head ksnp_tx_queue; /* waiting packets */
spinlock_t ksnp_lock; /* serialize, g_lock unsafe */
struct list_head ksnp_zc_req_list; /* zero copy requests wait for
- * ACK */
+ * ACK
+ */
unsigned long ksnp_send_keepalive; /* time to send keepalive */
lnet_ni_t *ksnp_ni; /* which network */
int ksnp_n_passive_ips; /* # of... */
@@ -429,7 +466,8 @@ extern struct ksock_tunables ksocknal_tunables;
#define SOCKNAL_MATCH_NO 0 /* TX can't match type of connection */
#define SOCKNAL_MATCH_YES 1 /* TX matches type of connection */
#define SOCKNAL_MATCH_MAY 2 /* TX can be sent on the connection, but not
- * preferred */
+ * preferred
+ */
struct ksock_proto {
/* version number of protocol */
diff --git a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_cb.c b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_cb.c
index c1c6f604e6ad..972f6094be75 100644
--- a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_cb.c
+++ b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_cb.c
@@ -620,7 +620,8 @@ ksocknal_launch_all_connections_locked(struct ksock_peer *peer)
}
struct ksock_conn *
-ksocknal_find_conn_locked(struct ksock_peer *peer, struct ksock_tx *tx, int nonblk)
+ksocknal_find_conn_locked(struct ksock_peer *peer, struct ksock_tx *tx,
+ int nonblk)
{
struct list_head *tmp;
struct ksock_conn *conn;
@@ -630,10 +631,12 @@ ksocknal_find_conn_locked(struct ksock_peer *peer, struct ksock_tx *tx, int nonb
int fnob = 0;
list_for_each(tmp, &peer->ksnp_conns) {
- struct ksock_conn *c = list_entry(tmp, struct ksock_conn, ksnc_list);
- int nob = atomic_read(&c->ksnc_tx_nob) +
- c->ksnc_sock->sk->sk_wmem_queued;
- int rc;
+ struct ksock_conn *c;
+ int nob, rc;
+
+ c = list_entry(tmp, struct ksock_conn, ksnc_list);
+ nob = atomic_read(&c->ksnc_tx_nob) +
+ c->ksnc_sock->sk->sk_wmem_queued;
LASSERT(!c->ksnc_closing);
LASSERT(c->ksnc_proto &&
@@ -752,9 +755,9 @@ ksocknal_queue_tx_locked(struct ksock_tx *tx, struct ksock_conn *conn)
LASSERT(msg->ksm_zc_cookies[1]);
LASSERT(conn->ksnc_proto->pro_queue_tx_zcack);
+ /* ZC ACK piggybacked on ztx release tx later */
if (conn->ksnc_proto->pro_queue_tx_zcack(conn, tx, 0))
- ztx = tx; /* ZC ACK piggybacked on ztx release tx later */
-
+ ztx = tx;
} else {
/*
* It's a normal packet - can it piggback a noop zc-ack that
@@ -796,7 +799,8 @@ ksocknal_find_connectable_route_locked(struct ksock_peer *peer)
LASSERT(!route->ksnr_connecting || route->ksnr_scheduled);
- if (route->ksnr_scheduled) /* connections being established */
+ /* connections being established */
+ if (route->ksnr_scheduled)
continue;
/* all route types connected ? */
@@ -1514,7 +1518,10 @@ int ksocknal_scheduler(void *arg)
rc = ksocknal_process_transmit(conn, tx);
if (rc == -ENOMEM || rc == -EAGAIN) {
- /* Incomplete send: replace tx on HEAD of tx_queue */
+ /*
+ * Incomplete send: replace tx on HEAD of
+ * tx_queue
+ */
spin_lock_bh(&sched->kss_lock);
list_add(&tx->tx_list, &conn->ksnc_tx_queue);
} else {
@@ -1724,7 +1731,8 @@ ksocknal_recv_hello(lnet_ni_t *ni, struct ksock_conn *conn,
timeout = active ? *ksocknal_tunables.ksnd_timeout :
lnet_acceptor_timeout();
- rc = lnet_sock_read(sock, &hello->kshm_magic, sizeof(hello->kshm_magic), timeout);
+ rc = lnet_sock_read(sock, &hello->kshm_magic,
+ sizeof(hello->kshm_magic), timeout);
if (rc) {
CERROR("Error %d reading HELLO from %pI4h\n",
rc, &conn->ksnc_ipaddr);
@@ -1798,7 +1806,8 @@ ksocknal_recv_hello(lnet_ni_t *ni, struct ksock_conn *conn,
conn->ksnc_port > LNET_ACCEPTOR_MAX_RESERVED_PORT) {
/* Userspace NAL assigns peer process ID from socket */
recv_id.pid = conn->ksnc_port | LNET_PID_USERFLAG;
- recv_id.nid = LNET_MKNID(LNET_NIDNET(ni->ni_nid), conn->ksnc_ipaddr);
+ recv_id.nid = LNET_MKNID(LNET_NIDNET(ni->ni_nid),
+ conn->ksnc_ipaddr);
} else {
recv_id.nid = hello->kshm_src_nid;
recv_id.pid = hello->kshm_src_pid;
@@ -1882,7 +1891,8 @@ ksocknal_connect(struct ksock_route *route)
if (peer->ksnp_accepting > 0) {
CDEBUG(D_NET,
"peer %s(%d) already connecting to me, retry later.\n",
- libcfs_nid2str(peer->ksnp_id.nid), peer->ksnp_accepting);
+ libcfs_nid2str(peer->ksnp_id.nid),
+ peer->ksnp_accepting);
retry_later = 1;
}
@@ -2241,7 +2251,8 @@ ksocknal_connd(void *arg)
/* Nothing to do for 'timeout' */
set_current_state(TASK_INTERRUPTIBLE);
- add_wait_queue_exclusive(&ksocknal_data.ksnd_connd_waitq, &wait);
+ add_wait_queue_exclusive(&ksocknal_data.ksnd_connd_waitq,
+ &wait);
spin_unlock_bh(connd_lock);
nloops = 0;
@@ -2371,7 +2382,8 @@ ksocknal_send_keepalive_locked(struct ksock_peer *peer)
struct ksock_conn *conn;
struct ksock_tx *tx;
- if (list_empty(&peer->ksnp_conns)) /* last_alive will be updated by create_conn */
+ /* last_alive will be updated by create_conn */
+ if (list_empty(&peer->ksnp_conns))
return 0;
if (peer->ksnp_proto != &ksocknal_protocol_v3x)
@@ -2473,8 +2485,8 @@ ksocknal_check_peer_timeouts(int idx)
* holding only shared lock
*/
if (!list_empty(&peer->ksnp_tx_queue)) {
- struct ksock_tx *tx = list_entry(peer->ksnp_tx_queue.next,
- struct ksock_tx, tx_list);
+ tx = list_entry(peer->ksnp_tx_queue.next,
+ struct ksock_tx, tx_list);
if (cfs_time_aftereq(cfs_time_current(),
tx->tx_deadline)) {
diff --git a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_lib.c b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_lib.c
index 6c95e989ca12..4bcab4bcc2de 100644
--- a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_lib.c
+++ b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_lib.c
@@ -202,7 +202,8 @@ ksocknal_lib_recv_iov(struct ksock_conn *conn)
fragnob = sum;
conn->ksnc_rx_csum = ksocknal_csum(conn->ksnc_rx_csum,
- iov[i].iov_base, fragnob);
+ iov[i].iov_base,
+ fragnob);
}
conn->ksnc_msg.ksm_csum = saved_csum;
}
@@ -291,7 +292,8 @@ ksocknal_lib_csum_tx(struct ksock_tx *tx)
}
int
-ksocknal_lib_get_conn_tunables(struct ksock_conn *conn, int *txmem, int *rxmem, int *nagle)
+ksocknal_lib_get_conn_tunables(struct ksock_conn *conn, int *txmem,
+ int *rxmem, int *nagle)
{
struct socket *sock = conn->ksnc_sock;
int len;
diff --git a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_proto.c b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_proto.c
index 82e174f6d9fe..8f0ff6ca1f39 100644
--- a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_proto.c
+++ b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_proto.c
@@ -194,7 +194,10 @@ ksocknal_queue_tx_zcack_v3(struct ksock_conn *conn,
}
if (!tx->tx_msg.ksm_zc_cookies[0]) {
- /* NOOP tx has only one ZC-ACK cookie, can carry at least one more */
+ /*
+ * NOOP tx has only one ZC-ACK cookie,
+ * can carry at least one more
+ */
if (tx->tx_msg.ksm_zc_cookies[1] > cookie) {
tx->tx_msg.ksm_zc_cookies[0] = tx->tx_msg.ksm_zc_cookies[1];
tx->tx_msg.ksm_zc_cookies[1] = cookie;
@@ -203,7 +206,10 @@ ksocknal_queue_tx_zcack_v3(struct ksock_conn *conn,
}
if (tx->tx_msg.ksm_zc_cookies[0] - tx->tx_msg.ksm_zc_cookies[1] > 2) {
- /* not likely to carry more ACKs, skip it to simplify logic */
+ /*
+ * not likely to carry more ACKs, skip it
+ * to simplify logic
+ */
ksocknal_next_tx_carrier(conn);
}
@@ -237,7 +243,10 @@ ksocknal_queue_tx_zcack_v3(struct ksock_conn *conn,
}
} else {
- /* ksm_zc_cookies[0] < ksm_zc_cookies[1], it is range of cookies */
+ /*
+ * ksm_zc_cookies[0] < ksm_zc_cookies[1],
+ * it is range of cookies
+ */
if (cookie >= tx->tx_msg.ksm_zc_cookies[0] &&
cookie <= tx->tx_msg.ksm_zc_cookies[1]) {
CWARN("%s: duplicated ZC cookie: %llu\n",
@@ -425,7 +434,8 @@ ksocknal_handle_zcack(struct ksock_conn *conn, __u64 cookie1, __u64 cookie2)
tx_zc_list) {
__u64 c = tx->tx_msg.ksm_zc_cookies[0];
- if (c == cookie1 || c == cookie2 || (cookie1 < c && c < cookie2)) {
+ if (c == cookie1 || c == cookie2 ||
+ (cookie1 < c && c < cookie2)) {
tx->tx_msg.ksm_zc_cookies[0] = 0;
list_del(&tx->tx_zc_list);
list_add(&tx->tx_zc_list, &zlist);
@@ -639,7 +649,8 @@ out:
}
static int
-ksocknal_recv_hello_v2(struct ksock_conn *conn, ksock_hello_msg_t *hello, int timeout)
+ksocknal_recv_hello_v2(struct ksock_conn *conn, ksock_hello_msg_t *hello,
+ int timeout)
{
struct socket *sock = conn->ksnc_sock;
int rc;
@@ -737,7 +748,10 @@ ksocknal_pack_msg_v2(struct ksock_tx *tx)
tx->tx_nob = offsetof(ksock_msg_t, ksm_u.lnetmsg.ksnm_hdr);
tx->tx_resid = offsetof(ksock_msg_t, ksm_u.lnetmsg.ksnm_hdr);
}
- /* Don't checksum before start sending, because packet can be piggybacked with ACK */
+ /*
+ * Don't checksum before start sending, because packet can be
+ * piggybacked with ACK
+ */
}
static void
diff --git a/drivers/staging/lustre/lnet/libcfs/debug.c b/drivers/staging/lustre/lnet/libcfs/debug.c
index 23b36b890964..a38db2322225 100644
--- a/drivers/staging/lustre/lnet/libcfs/debug.c
+++ b/drivers/staging/lustre/lnet/libcfs/debug.c
@@ -57,7 +57,7 @@ static int libcfs_param_debug_mb_set(const char *val,
const struct kernel_param *kp)
{
int rc;
- unsigned num;
+ unsigned int num;
rc = kstrtouint(val, 0, &num);
if (rc < 0)
@@ -228,7 +228,8 @@ int libcfs_panic_in_progress;
static const char *
libcfs_debug_subsys2str(int subsys)
{
- static const char *libcfs_debug_subsystems[] = LIBCFS_DEBUG_SUBSYS_NAMES;
+ static const char * const libcfs_debug_subsystems[] =
+ LIBCFS_DEBUG_SUBSYS_NAMES;
if (subsys >= ARRAY_SIZE(libcfs_debug_subsystems))
return NULL;
@@ -240,7 +241,8 @@ libcfs_debug_subsys2str(int subsys)
static const char *
libcfs_debug_dbg2str(int debug)
{
- static const char *libcfs_debug_masks[] = LIBCFS_DEBUG_MASKS_NAMES;
+ static const char * const libcfs_debug_masks[] =
+ LIBCFS_DEBUG_MASKS_NAMES;
if (debug >= ARRAY_SIZE(libcfs_debug_masks))
return NULL;
@@ -253,17 +255,17 @@ libcfs_debug_mask2str(char *str, int size, int mask, int is_subsys)
{
const char *(*fn)(int bit) = is_subsys ? libcfs_debug_subsys2str :
libcfs_debug_dbg2str;
- int len = 0;
- const char *token;
- int i;
+ int len = 0;
+ const char *token;
+ int i;
- if (mask == 0) { /* "0" */
+ if (!mask) { /* "0" */
if (size > 0)
str[0] = '0';
len = 1;
} else { /* space-separated tokens */
for (i = 0; i < 32; i++) {
- if ((mask & (1 << i)) == 0)
+ if (!(mask & (1 << i)))
continue;
token = fn(i);
@@ -276,7 +278,7 @@ libcfs_debug_mask2str(char *str, int size, int mask, int is_subsys)
len++;
}
- while (*token != 0) {
+ while (*token) {
if (len < size)
str[len] = *token;
token++;
@@ -299,10 +301,10 @@ libcfs_debug_str2mask(int *mask, const char *str, int is_subsys)
{
const char *(*fn)(int bit) = is_subsys ? libcfs_debug_subsys2str :
libcfs_debug_dbg2str;
- int m = 0;
- int matched;
- int n;
- int t;
+ int m = 0;
+ int matched;
+ int n;
+ int t;
/* Allow a number for backwards compatibility */
@@ -313,7 +315,7 @@ libcfs_debug_str2mask(int *mask, const char *str, int is_subsys)
t = sscanf(str, "%i%n", &m, &matched);
if (t >= 1 && matched == n) {
/* don't print warning for lctl set_param debug=0 or -1 */
- if (m != 0 && m != -1)
+ if (m && m != -1)
CWARN("You are trying to use a numerical value for the mask - this will be deprecated in a future release.\n");
*mask = m;
return 0;
@@ -387,8 +389,8 @@ EXPORT_SYMBOL(libcfs_debug_dumplog);
int libcfs_debug_init(unsigned long bufsize)
{
- int rc = 0;
unsigned int max = libcfs_debug_mb;
+ int rc = 0;
init_waitqueue_head(&debug_ctlwq);
@@ -414,9 +416,9 @@ int libcfs_debug_init(unsigned long bufsize)
max = max / num_possible_cpus();
max <<= (20 - PAGE_SHIFT);
}
- rc = cfs_tracefile_init(max);
- if (rc == 0) {
+ rc = cfs_tracefile_init(max);
+ if (!rc) {
libcfs_register_panic_notifier();
libcfs_debug_mb = cfs_trace_get_debug_mb();
}
diff --git a/drivers/staging/lustre/lnet/libcfs/fail.c b/drivers/staging/lustre/lnet/libcfs/fail.c
index e4b1a0a86eae..12dd50ad4efb 100644
--- a/drivers/staging/lustre/lnet/libcfs/fail.c
+++ b/drivers/staging/lustre/lnet/libcfs/fail.c
@@ -46,7 +46,7 @@ EXPORT_SYMBOL(cfs_race_waitq);
int cfs_race_state;
EXPORT_SYMBOL(cfs_race_state);
-int __cfs_fail_check_set(__u32 id, __u32 value, int set)
+int __cfs_fail_check_set(u32 id, u32 value, int set)
{
static atomic_t cfs_fail_count = ATOMIC_INIT(0);
@@ -113,6 +113,7 @@ int __cfs_fail_check_set(__u32 id, __u32 value, int set)
break;
case CFS_FAIL_LOC_RESET:
cfs_fail_loc = value;
+ atomic_set(&cfs_fail_count, 0);
break;
default:
LASSERTF(0, "called with bad set %u\n", set);
@@ -123,7 +124,7 @@ int __cfs_fail_check_set(__u32 id, __u32 value, int set)
}
EXPORT_SYMBOL(__cfs_fail_check_set);
-int __cfs_fail_timeout_set(__u32 id, __u32 value, int ms, int set)
+int __cfs_fail_timeout_set(u32 id, u32 value, int ms, int set)
{
int ret;
diff --git a/drivers/staging/lustre/lnet/libcfs/hash.c b/drivers/staging/lustre/lnet/libcfs/hash.c
index 23283b6e09ab..c93c59d8fe6c 100644
--- a/drivers/staging/lustre/lnet/libcfs/hash.c
+++ b/drivers/staging/lustre/lnet/libcfs/hash.c
@@ -289,7 +289,7 @@ cfs_hash_hd_hhead_size(struct cfs_hash *hs)
static struct hlist_head *
cfs_hash_hd_hhead(struct cfs_hash *hs, struct cfs_hash_bd *bd)
{
- struct cfs_hash_head_dep *head;
+ struct cfs_hash_head_dep *head;
head = (struct cfs_hash_head_dep *)&bd->bd_bucket->hsb_head[0];
return &head[bd->bd_offset].hd_head;
@@ -492,7 +492,7 @@ cfs_hash_bd_get(struct cfs_hash *hs, const void *key, struct cfs_hash_bd *bd)
cfs_hash_bd_from_key(hs, hs->hs_buckets,
hs->hs_cur_bits, key, bd);
} else {
- LASSERT(hs->hs_rehash_bits != 0);
+ LASSERT(hs->hs_rehash_bits);
cfs_hash_bd_from_key(hs, hs->hs_rehash_buckets,
hs->hs_rehash_bits, key, bd);
}
@@ -507,14 +507,14 @@ cfs_hash_bd_dep_record(struct cfs_hash *hs, struct cfs_hash_bd *bd, int dep_cur)
bd->bd_bucket->hsb_depmax = dep_cur;
# if CFS_HASH_DEBUG_LEVEL >= CFS_HASH_DEBUG_1
- if (likely(warn_on_depth == 0 ||
+ if (likely(!warn_on_depth ||
max(warn_on_depth, hs->hs_dep_max) >= dep_cur))
return;
spin_lock(&hs->hs_dep_lock);
- hs->hs_dep_max = dep_cur;
- hs->hs_dep_bkt = bd->bd_bucket->hsb_index;
- hs->hs_dep_off = bd->bd_offset;
+ hs->hs_dep_max = dep_cur;
+ hs->hs_dep_bkt = bd->bd_bucket->hsb_index;
+ hs->hs_dep_off = bd->bd_offset;
hs->hs_dep_bits = hs->hs_cur_bits;
spin_unlock(&hs->hs_dep_lock);
@@ -531,7 +531,7 @@ cfs_hash_bd_add_locked(struct cfs_hash *hs, struct cfs_hash_bd *bd,
rc = hs->hs_hops->hop_hnode_add(hs, bd, hnode);
cfs_hash_bd_dep_record(hs, bd, rc);
bd->bd_bucket->hsb_version++;
- if (unlikely(bd->bd_bucket->hsb_version == 0))
+ if (unlikely(!bd->bd_bucket->hsb_version))
bd->bd_bucket->hsb_version++;
bd->bd_bucket->hsb_count++;
@@ -551,7 +551,7 @@ cfs_hash_bd_del_locked(struct cfs_hash *hs, struct cfs_hash_bd *bd,
LASSERT(bd->bd_bucket->hsb_count > 0);
bd->bd_bucket->hsb_count--;
bd->bd_bucket->hsb_version++;
- if (unlikely(bd->bd_bucket->hsb_version == 0))
+ if (unlikely(!bd->bd_bucket->hsb_version))
bd->bd_bucket->hsb_version++;
if (cfs_hash_with_counter(hs)) {
@@ -571,7 +571,7 @@ cfs_hash_bd_move_locked(struct cfs_hash *hs, struct cfs_hash_bd *bd_old,
struct cfs_hash_bucket *nbkt = bd_new->bd_bucket;
int rc;
- if (cfs_hash_bd_compare(bd_old, bd_new) == 0)
+ if (!cfs_hash_bd_compare(bd_old, bd_new))
return;
/* use cfs_hash_bd_hnode_add/del, to avoid atomic & refcount ops
@@ -584,11 +584,11 @@ cfs_hash_bd_move_locked(struct cfs_hash *hs, struct cfs_hash_bd *bd_old,
LASSERT(obkt->hsb_count > 0);
obkt->hsb_count--;
obkt->hsb_version++;
- if (unlikely(obkt->hsb_version == 0))
+ if (unlikely(!obkt->hsb_version))
obkt->hsb_version++;
nbkt->hsb_count++;
nbkt->hsb_version++;
- if (unlikely(nbkt->hsb_version == 0))
+ if (unlikely(!nbkt->hsb_version))
nbkt->hsb_version++;
}
@@ -629,7 +629,7 @@ cfs_hash_bd_lookup_intent(struct cfs_hash *hs, struct cfs_hash_bd *bd,
struct hlist_head *hhead = cfs_hash_bd_hhead(hs, bd);
struct hlist_node *ehnode;
struct hlist_node *match;
- int intent_add = (intent & CFS_HS_LOOKUP_MASK_ADD) != 0;
+ int intent_add = intent & CFS_HS_LOOKUP_MASK_ADD;
/* with this function, we can avoid a lot of useless refcount ops,
* which are expensive atomic operations most time.
@@ -643,13 +643,13 @@ cfs_hash_bd_lookup_intent(struct cfs_hash *hs, struct cfs_hash_bd *bd,
continue;
/* match and ... */
- if ((intent & CFS_HS_LOOKUP_MASK_DEL) != 0) {
+ if (intent & CFS_HS_LOOKUP_MASK_DEL) {
cfs_hash_bd_del_locked(hs, bd, ehnode);
return ehnode;
}
/* caller wants refcount? */
- if ((intent & CFS_HS_LOOKUP_MASK_REF) != 0)
+ if (intent & CFS_HS_LOOKUP_MASK_REF)
cfs_hash_get(hs, ehnode);
return ehnode;
}
@@ -682,7 +682,7 @@ EXPORT_SYMBOL(cfs_hash_bd_peek_locked);
static void
cfs_hash_multi_bd_lock(struct cfs_hash *hs, struct cfs_hash_bd *bds,
- unsigned n, int excl)
+ unsigned int n, int excl)
{
struct cfs_hash_bucket *prev = NULL;
int i;
@@ -704,7 +704,7 @@ cfs_hash_multi_bd_lock(struct cfs_hash *hs, struct cfs_hash_bd *bds,
static void
cfs_hash_multi_bd_unlock(struct cfs_hash *hs, struct cfs_hash_bd *bds,
- unsigned n, int excl)
+ unsigned int n, int excl)
{
struct cfs_hash_bucket *prev = NULL;
int i;
@@ -719,10 +719,10 @@ cfs_hash_multi_bd_unlock(struct cfs_hash *hs, struct cfs_hash_bd *bds,
static struct hlist_node *
cfs_hash_multi_bd_lookup_locked(struct cfs_hash *hs, struct cfs_hash_bd *bds,
- unsigned n, const void *key)
+ unsigned int n, const void *key)
{
struct hlist_node *ehnode;
- unsigned i;
+ unsigned int i;
cfs_hash_for_each_bd(bds, n, i) {
ehnode = cfs_hash_bd_lookup_intent(hs, &bds[i], key, NULL,
@@ -735,12 +735,12 @@ cfs_hash_multi_bd_lookup_locked(struct cfs_hash *hs, struct cfs_hash_bd *bds,
static struct hlist_node *
cfs_hash_multi_bd_findadd_locked(struct cfs_hash *hs, struct cfs_hash_bd *bds,
- unsigned n, const void *key,
+ unsigned int n, const void *key,
struct hlist_node *hnode, int noref)
{
struct hlist_node *ehnode;
int intent;
- unsigned i;
+ unsigned int i;
LASSERT(hnode);
intent = (!noref * CFS_HS_LOOKUP_MASK_REF) | CFS_HS_LOOKUP_IT_PEEK;
@@ -766,7 +766,7 @@ cfs_hash_multi_bd_findadd_locked(struct cfs_hash *hs, struct cfs_hash_bd *bds,
static struct hlist_node *
cfs_hash_multi_bd_finddel_locked(struct cfs_hash *hs, struct cfs_hash_bd *bds,
- unsigned n, const void *key,
+ unsigned int n, const void *key,
struct hlist_node *hnode)
{
struct hlist_node *ehnode;
@@ -815,7 +815,7 @@ cfs_hash_dual_bd_get(struct cfs_hash *hs, const void *key,
return;
}
- LASSERT(hs->hs_rehash_bits != 0);
+ LASSERT(hs->hs_rehash_bits);
cfs_hash_bd_from_key(hs, hs->hs_rehash_buckets,
hs->hs_rehash_bits, key, &bds[1]);
@@ -883,7 +883,7 @@ cfs_hash_buckets_realloc(struct cfs_hash *hs, struct cfs_hash_bucket **old_bkts,
struct cfs_hash_bucket **new_bkts;
int i;
- LASSERT(old_size == 0 || old_bkts);
+ LASSERT(!old_size || old_bkts);
if (old_bkts && old_size == new_size)
return old_bkts;
@@ -908,9 +908,9 @@ cfs_hash_buckets_realloc(struct cfs_hash *hs, struct cfs_hash_bucket **old_bkts,
return NULL;
}
- new_bkts[i]->hsb_index = i;
- new_bkts[i]->hsb_version = 1; /* shouldn't be zero */
- new_bkts[i]->hsb_depmax = -1; /* unknown */
+ new_bkts[i]->hsb_index = i;
+ new_bkts[i]->hsb_version = 1; /* shouldn't be zero */
+ new_bkts[i]->hsb_depmax = -1; /* unknown */
bd.bd_bucket = new_bkts[i];
cfs_hash_bd_for_each_hlist(hs, &bd, hhead)
INIT_HLIST_HEAD(hhead);
@@ -950,9 +950,9 @@ static int cfs_hash_dep_print(struct cfs_workitem *wi)
int bits;
spin_lock(&hs->hs_dep_lock);
- dep = hs->hs_dep_max;
- bkt = hs->hs_dep_bkt;
- off = hs->hs_dep_off;
+ dep = hs->hs_dep_max;
+ bkt = hs->hs_dep_bkt;
+ off = hs->hs_dep_off;
bits = hs->hs_dep_bits;
spin_unlock(&hs->hs_dep_lock);
@@ -976,7 +976,7 @@ static void cfs_hash_depth_wi_cancel(struct cfs_hash *hs)
return;
spin_lock(&hs->hs_dep_lock);
- while (hs->hs_dep_bits != 0) {
+ while (hs->hs_dep_bits) {
spin_unlock(&hs->hs_dep_lock);
cond_resched();
spin_lock(&hs->hs_dep_lock);
@@ -992,10 +992,10 @@ static inline void cfs_hash_depth_wi_cancel(struct cfs_hash *hs) {}
#endif /* CFS_HASH_DEBUG_LEVEL >= CFS_HASH_DEBUG_1 */
struct cfs_hash *
-cfs_hash_create(char *name, unsigned cur_bits, unsigned max_bits,
- unsigned bkt_bits, unsigned extra_bytes,
- unsigned min_theta, unsigned max_theta,
- struct cfs_hash_ops *ops, unsigned flags)
+cfs_hash_create(char *name, unsigned int cur_bits, unsigned int max_bits,
+ unsigned int bkt_bits, unsigned int extra_bytes,
+ unsigned int min_theta, unsigned int max_theta,
+ struct cfs_hash_ops *ops, unsigned int flags)
{
struct cfs_hash *hs;
int len;
@@ -1010,18 +1010,17 @@ cfs_hash_create(char *name, unsigned cur_bits, unsigned max_bits,
LASSERT(ops->hs_get);
LASSERT(ops->hs_put_locked);
- if ((flags & CFS_HASH_REHASH) != 0)
+ if (flags & CFS_HASH_REHASH)
flags |= CFS_HASH_COUNTER; /* must have counter */
LASSERT(cur_bits > 0);
LASSERT(cur_bits >= bkt_bits);
LASSERT(max_bits >= cur_bits && max_bits < 31);
- LASSERT(ergo((flags & CFS_HASH_REHASH) == 0, cur_bits == max_bits));
- LASSERT(ergo((flags & CFS_HASH_REHASH) != 0,
- (flags & CFS_HASH_NO_LOCK) == 0));
- LASSERT(ergo((flags & CFS_HASH_REHASH_KEY) != 0, ops->hs_keycpy));
+ LASSERT(ergo(!(flags & CFS_HASH_REHASH), cur_bits == max_bits));
+ LASSERT(ergo(flags & CFS_HASH_REHASH, !(flags & CFS_HASH_NO_LOCK)));
+ LASSERT(ergo(flags & CFS_HASH_REHASH_KEY, ops->hs_keycpy));
- len = (flags & CFS_HASH_BIGNAME) == 0 ?
+ len = !(flags & CFS_HASH_BIGNAME) ?
CFS_HASH_NAME_LEN : CFS_HASH_BIGNAME_LEN;
LIBCFS_ALLOC(hs, offsetof(struct cfs_hash, hs_name[len]));
if (!hs)
@@ -1036,12 +1035,12 @@ cfs_hash_create(char *name, unsigned cur_bits, unsigned max_bits,
cfs_hash_lock_setup(hs);
cfs_hash_hlist_setup(hs);
- hs->hs_cur_bits = (__u8)cur_bits;
- hs->hs_min_bits = (__u8)cur_bits;
- hs->hs_max_bits = (__u8)max_bits;
- hs->hs_bkt_bits = (__u8)bkt_bits;
+ hs->hs_cur_bits = (u8)cur_bits;
+ hs->hs_min_bits = (u8)cur_bits;
+ hs->hs_max_bits = (u8)max_bits;
+ hs->hs_bkt_bits = (u8)bkt_bits;
- hs->hs_ops = ops;
+ hs->hs_ops = ops;
hs->hs_extra_bytes = extra_bytes;
hs->hs_rehash_bits = 0;
cfs_wi_init(&hs->hs_rehash_wi, hs, cfs_hash_rehash_worker);
@@ -1107,12 +1106,12 @@ cfs_hash_destroy(struct cfs_hash *hs)
cfs_hash_exit(hs, hnode);
}
}
- LASSERT(bd.bd_bucket->hsb_count == 0);
+ LASSERT(!bd.bd_bucket->hsb_count);
cfs_hash_bd_unlock(hs, &bd, 1);
cond_resched();
}
- LASSERT(atomic_read(&hs->hs_count) == 0);
+ LASSERT(!atomic_read(&hs->hs_count));
cfs_hash_buckets_free(hs->hs_buckets, cfs_hash_bkt_size(hs),
0, CFS_HASH_NBKT(hs));
@@ -1216,7 +1215,7 @@ cfs_hash_find_or_add(struct cfs_hash *hs, const void *key,
struct cfs_hash_bd bds[2];
int bits = 0;
- LASSERT(hlist_unhashed(hnode));
+ LASSERTF(hlist_unhashed(hnode), "hnode = %p\n", hnode);
cfs_hash_lock(hs, 0);
cfs_hash_dual_bd_get_and_lock(hs, key, bds, 1);
@@ -1293,7 +1292,7 @@ cfs_hash_del(struct cfs_hash *hs, const void *key, struct hlist_node *hnode)
}
if (hnode) {
- obj = cfs_hash_object(hs, hnode);
+ obj = cfs_hash_object(hs, hnode);
bits = cfs_hash_rehash_bits(hs);
}
@@ -1388,7 +1387,7 @@ cfs_hash_for_each_exit(struct cfs_hash *hs)
bits = cfs_hash_rehash_bits(hs);
cfs_hash_unlock(hs, 1);
/* NB: it's race on cfs_has_t::hs_iterating, see above */
- if (remained == 0)
+ if (!remained)
hs->hs_iterating = 0;
if (bits > 0) {
cfs_hash_rehash(hs, atomic_read(&hs->hs_count) <
@@ -1406,14 +1405,14 @@ cfs_hash_for_each_exit(struct cfs_hash *hs)
* . if @removal_safe is true, use can remove current item by
* cfs_hash_bd_del_locked
*/
-static __u64
+static u64
cfs_hash_for_each_tight(struct cfs_hash *hs, cfs_hash_for_each_cb_t func,
void *data, int remove_safe)
{
struct hlist_node *hnode;
struct hlist_node *pos;
struct cfs_hash_bd bd;
- __u64 count = 0;
+ u64 count = 0;
int excl = !!remove_safe;
int loop = 0;
int i;
@@ -1526,7 +1525,7 @@ cfs_hash_is_empty(struct cfs_hash *hs)
}
EXPORT_SYMBOL(cfs_hash_is_empty);
-__u64
+u64
cfs_hash_size_get(struct cfs_hash *hs)
{
return cfs_hash_with_counter(hs) ?
@@ -1552,26 +1551,33 @@ EXPORT_SYMBOL(cfs_hash_size_get);
*/
static int
cfs_hash_for_each_relax(struct cfs_hash *hs, cfs_hash_for_each_cb_t func,
- void *data)
+ void *data, int start)
{
struct hlist_node *hnode;
struct hlist_node *tmp;
struct cfs_hash_bd bd;
- __u32 version;
+ u32 version;
int count = 0;
int stop_on_change;
- int rc;
+ int end = -1;
+ int rc = 0;
int i;
stop_on_change = cfs_hash_with_rehash_key(hs) ||
!cfs_hash_with_no_itemref(hs) ||
!hs->hs_ops->hs_put_locked;
cfs_hash_lock(hs, 0);
+again:
LASSERT(!cfs_hash_is_rehashing(hs));
cfs_hash_for_each_bucket(hs, &bd, i) {
struct hlist_head *hhead;
+ if (i < start)
+ continue;
+ else if (end > 0 && i >= end)
+ break;
+
cfs_hash_bd_lock(hs, &bd, 0);
version = cfs_hash_bd_version_get(&bd);
@@ -1611,14 +1617,19 @@ cfs_hash_for_each_relax(struct cfs_hash *hs, cfs_hash_for_each_cb_t func,
if (rc) /* callback wants to break iteration */
break;
}
- cfs_hash_unlock(hs, 0);
+ if (start > 0 && !rc) {
+ end = start;
+ start = 0;
+ goto again;
+ }
+ cfs_hash_unlock(hs, 0);
return count;
}
int
cfs_hash_for_each_nolock(struct cfs_hash *hs, cfs_hash_for_each_cb_t func,
- void *data)
+ void *data, int start)
{
if (cfs_hash_with_no_lock(hs) ||
cfs_hash_with_rehash_key(hs) ||
@@ -1630,7 +1641,7 @@ cfs_hash_for_each_nolock(struct cfs_hash *hs, cfs_hash_for_each_cb_t func,
return -EOPNOTSUPP;
cfs_hash_for_each_enter(hs);
- cfs_hash_for_each_relax(hs, func, data);
+ cfs_hash_for_each_relax(hs, func, data, start);
cfs_hash_for_each_exit(hs);
return 0;
@@ -1652,7 +1663,7 @@ int
cfs_hash_for_each_empty(struct cfs_hash *hs, cfs_hash_for_each_cb_t func,
void *data)
{
- unsigned i = 0;
+ unsigned int i = 0;
if (cfs_hash_with_no_lock(hs))
return -EOPNOTSUPP;
@@ -1662,7 +1673,7 @@ cfs_hash_for_each_empty(struct cfs_hash *hs, cfs_hash_for_each_cb_t func,
return -EOPNOTSUPP;
cfs_hash_for_each_enter(hs);
- while (cfs_hash_for_each_relax(hs, func, data)) {
+ while (cfs_hash_for_each_relax(hs, func, data, 0)) {
CDEBUG(D_INFO, "Try to empty hash: %s, loop: %u\n",
hs->hs_name, i++);
}
@@ -1672,7 +1683,7 @@ cfs_hash_for_each_empty(struct cfs_hash *hs, cfs_hash_for_each_cb_t func,
EXPORT_SYMBOL(cfs_hash_for_each_empty);
void
-cfs_hash_hlist_for_each(struct cfs_hash *hs, unsigned hindex,
+cfs_hash_hlist_for_each(struct cfs_hash *hs, unsigned int hindex,
cfs_hash_for_each_cb_t func, void *data)
{
struct hlist_head *hhead;
@@ -1704,7 +1715,7 @@ EXPORT_SYMBOL(cfs_hash_hlist_for_each);
* the passed callback @func and pass to it as an argument each hash
* item and the private @data. During the callback the bucket lock
* is held so the callback must never sleep.
- */
+ */
void
cfs_hash_for_each_key(struct cfs_hash *hs, const void *key,
cfs_hash_for_each_cb_t func, void *data)
@@ -1936,7 +1947,7 @@ out:
/* can't refer to @hs anymore because it could be destroyed */
if (bkts)
cfs_hash_buckets_free(bkts, bsize, new_size, old_size);
- if (rc != 0)
+ if (rc)
CDEBUG(D_INFO, "early quit of rehashing: %d\n", rc);
/* return 1 only if cfs_wi_exit is called */
return rc == -ESRCH;
@@ -2005,7 +2016,7 @@ cfs_hash_full_bkts(struct cfs_hash *hs)
if (!hs->hs_rehash_buckets)
return hs->hs_buckets;
- LASSERT(hs->hs_rehash_bits != 0);
+ LASSERT(hs->hs_rehash_bits);
return hs->hs_rehash_bits > hs->hs_cur_bits ?
hs->hs_rehash_buckets : hs->hs_buckets;
}
@@ -2017,7 +2028,7 @@ cfs_hash_full_nbkt(struct cfs_hash *hs)
if (!hs->hs_rehash_buckets)
return CFS_HASH_NBKT(hs);
- LASSERT(hs->hs_rehash_bits != 0);
+ LASSERT(hs->hs_rehash_bits);
return hs->hs_rehash_bits > hs->hs_cur_bits ?
CFS_HASH_RH_NBKT(hs) : CFS_HASH_NBKT(hs);
}
diff --git a/drivers/staging/lustre/lnet/libcfs/libcfs_cpu.c b/drivers/staging/lustre/lnet/libcfs/libcfs_cpu.c
index 33352af6c27f..55caa19def51 100644
--- a/drivers/staging/lustre/lnet/libcfs/libcfs_cpu.c
+++ b/drivers/staging/lustre/lnet/libcfs/libcfs_cpu.c
@@ -74,7 +74,7 @@ EXPORT_SYMBOL(cfs_cpt_table_free);
int
cfs_cpt_table_print(struct cfs_cpt_table *cptab, char *buf, int len)
{
- int rc;
+ int rc;
rc = snprintf(buf, len, "%d\t: %d\n", 0, 0);
len -= rc;
diff --git a/drivers/staging/lustre/lnet/libcfs/libcfs_lock.c b/drivers/staging/lustre/lnet/libcfs/libcfs_lock.c
index 83543f928279..1967b97c4afc 100644
--- a/drivers/staging/lustre/lnet/libcfs/libcfs_lock.c
+++ b/drivers/staging/lustre/lnet/libcfs/libcfs_lock.c
@@ -52,9 +52,9 @@ struct cfs_percpt_lock *
cfs_percpt_lock_create(struct cfs_cpt_table *cptab,
struct lock_class_key *keys)
{
- struct cfs_percpt_lock *pcl;
- spinlock_t *lock;
- int i;
+ struct cfs_percpt_lock *pcl;
+ spinlock_t *lock;
+ int i;
/* NB: cptab can be NULL, pcl will be for HW CPUs on that case */
LIBCFS_ALLOC(pcl, sizeof(*pcl));
@@ -73,7 +73,7 @@ cfs_percpt_lock_create(struct cfs_cpt_table *cptab,
cfs_percpt_for_each(lock, i, pcl->pcl_locks) {
spin_lock_init(lock);
- if (keys != NULL)
+ if (keys)
lockdep_set_class(lock, &keys[i]);
}
@@ -94,8 +94,8 @@ void
cfs_percpt_lock(struct cfs_percpt_lock *pcl, int index)
__acquires(pcl->pcl_locks)
{
- int ncpt = cfs_cpt_number(pcl->pcl_cptab);
- int i;
+ int ncpt = cfs_cpt_number(pcl->pcl_cptab);
+ int i;
LASSERT(index >= CFS_PERCPT_LOCK_EX && index < ncpt);
@@ -114,7 +114,7 @@ cfs_percpt_lock(struct cfs_percpt_lock *pcl, int index)
/* exclusive lock request */
for (i = 0; i < ncpt; i++) {
spin_lock(pcl->pcl_locks[i]);
- if (i == 0) {
+ if (!i) {
LASSERT(!pcl->pcl_locked);
/* nobody should take private lock after this
* so I wouldn't starve for too long time
@@ -130,8 +130,8 @@ void
cfs_percpt_unlock(struct cfs_percpt_lock *pcl, int index)
__releases(pcl->pcl_locks)
{
- int ncpt = cfs_cpt_number(pcl->pcl_cptab);
- int i;
+ int ncpt = cfs_cpt_number(pcl->pcl_cptab);
+ int i;
index = ncpt == 1 ? 0 : index;
@@ -141,7 +141,7 @@ cfs_percpt_unlock(struct cfs_percpt_lock *pcl, int index)
}
for (i = ncpt - 1; i >= 0; i--) {
- if (i == 0) {
+ if (!i) {
LASSERT(pcl->pcl_locked);
pcl->pcl_locked = 0;
}
diff --git a/drivers/staging/lustre/lnet/libcfs/libcfs_mem.c b/drivers/staging/lustre/lnet/libcfs/libcfs_mem.c
index d0e81bb41cdc..ef085ba23194 100644
--- a/drivers/staging/lustre/lnet/libcfs/libcfs_mem.c
+++ b/drivers/staging/lustre/lnet/libcfs/libcfs_mem.c
@@ -43,8 +43,8 @@ struct cfs_var_array {
void
cfs_percpt_free(void *vars)
{
- struct cfs_var_array *arr;
- int i;
+ struct cfs_var_array *arr;
+ int i;
arr = container_of(vars, struct cfs_var_array, va_ptrs[0]);
@@ -72,9 +72,9 @@ EXPORT_SYMBOL(cfs_percpt_free);
void *
cfs_percpt_alloc(struct cfs_cpt_table *cptab, unsigned int size)
{
- struct cfs_var_array *arr;
- int count;
- int i;
+ struct cfs_var_array *arr;
+ int count;
+ int i;
count = cfs_cpt_number(cptab);
@@ -120,8 +120,8 @@ EXPORT_SYMBOL(cfs_percpt_number);
void
cfs_array_free(void *vars)
{
- struct cfs_var_array *arr;
- int i;
+ struct cfs_var_array *arr;
+ int i;
arr = container_of(vars, struct cfs_var_array, va_ptrs[0]);
@@ -144,15 +144,15 @@ EXPORT_SYMBOL(cfs_array_free);
void *
cfs_array_alloc(int count, unsigned int size)
{
- struct cfs_var_array *arr;
- int i;
+ struct cfs_var_array *arr;
+ int i;
LIBCFS_ALLOC(arr, offsetof(struct cfs_var_array, va_ptrs[count]));
if (!arr)
return NULL;
- arr->va_count = count;
- arr->va_size = size;
+ arr->va_count = count;
+ arr->va_size = size;
for (i = 0; i < count; i++) {
LIBCFS_ALLOC(arr->va_ptrs[i], size);
diff --git a/drivers/staging/lustre/lnet/libcfs/libcfs_string.c b/drivers/staging/lustre/lnet/libcfs/libcfs_string.c
index 56a614d7713b..02de1ee720fd 100644
--- a/drivers/staging/lustre/lnet/libcfs/libcfs_string.c
+++ b/drivers/staging/lustre/lnet/libcfs/libcfs_string.c
@@ -79,7 +79,7 @@ int cfs_str2mask(const char *str, const char *(*bit2str)(int bit),
for (i = 0; i < 32; i++) {
debugstr = bit2str(i);
if (debugstr && strlen(debugstr) == len &&
- strncasecmp(str, debugstr, len) == 0) {
+ !strncasecmp(str, debugstr, len)) {
if (op == '-')
newmask &= ~(1 << i);
else
@@ -89,7 +89,7 @@ int cfs_str2mask(const char *str, const char *(*bit2str)(int bit),
}
}
if (!found && len == 3 &&
- (strncasecmp(str, "ALL", len) == 0)) {
+ !strncasecmp(str, "ALL", len)) {
if (op == '-')
newmask = minmask;
else
@@ -112,7 +112,7 @@ int cfs_str2mask(const char *str, const char *(*bit2str)(int bit),
char *cfs_firststr(char *str, size_t size)
{
size_t i = 0;
- char *end;
+ char *end;
/* trim leading spaces */
while (i < size && *str && isspace(*str)) {
@@ -182,7 +182,7 @@ cfs_gettok(struct cfs_lstr *next, char delim, struct cfs_lstr *res)
next->ls_len--;
}
- if (next->ls_len == 0) /* whitespaces only */
+ if (!next->ls_len) /* whitespaces only */
return 0;
if (*next->ls_str == delim) {
@@ -222,8 +222,8 @@ EXPORT_SYMBOL(cfs_gettok);
* \retval 0 otherwise
*/
int
-cfs_str2num_check(char *str, int nob, unsigned *num,
- unsigned min, unsigned max)
+cfs_str2num_check(char *str, int nob, unsigned int *num,
+ unsigned int min, unsigned int max)
{
bool all_numbers = true;
char *endp, cache;
@@ -273,11 +273,11 @@ EXPORT_SYMBOL(cfs_str2num_check);
* -ENOMEM will be returned.
*/
static int
-cfs_range_expr_parse(struct cfs_lstr *src, unsigned min, unsigned max,
+cfs_range_expr_parse(struct cfs_lstr *src, unsigned int min, unsigned int max,
int bracketed, struct cfs_range_expr **expr)
{
- struct cfs_range_expr *re;
- struct cfs_lstr tok;
+ struct cfs_range_expr *re;
+ struct cfs_lstr tok;
LIBCFS_ALLOC(re, sizeof(*re));
if (!re)
@@ -391,7 +391,7 @@ cfs_expr_list_print(char *buffer, int count, struct cfs_expr_list *expr_list)
i += scnprintf(buffer + i, count - i, "[");
list_for_each_entry(expr, &expr_list->el_exprs, re_link) {
- if (j++ != 0)
+ if (j++)
i += scnprintf(buffer + i, count - i, ",");
i += cfs_range_expr_print(buffer + i, count - i, expr,
numexprs > 1);
@@ -411,13 +411,13 @@ EXPORT_SYMBOL(cfs_expr_list_print);
* \retval 0 otherwise
*/
int
-cfs_expr_list_match(__u32 value, struct cfs_expr_list *expr_list)
+cfs_expr_list_match(u32 value, struct cfs_expr_list *expr_list)
{
- struct cfs_range_expr *expr;
+ struct cfs_range_expr *expr;
list_for_each_entry(expr, &expr_list->el_exprs, re_link) {
if (value >= expr->re_lo && value <= expr->re_hi &&
- ((value - expr->re_lo) % expr->re_stride) == 0)
+ !((value - expr->re_lo) % expr->re_stride))
return 1;
}
@@ -433,21 +433,21 @@ EXPORT_SYMBOL(cfs_expr_list_match);
* \retval < 0 for failure
*/
int
-cfs_expr_list_values(struct cfs_expr_list *expr_list, int max, __u32 **valpp)
+cfs_expr_list_values(struct cfs_expr_list *expr_list, int max, u32 **valpp)
{
- struct cfs_range_expr *expr;
- __u32 *val;
- int count = 0;
- int i;
+ struct cfs_range_expr *expr;
+ u32 *val;
+ int count = 0;
+ int i;
list_for_each_entry(expr, &expr_list->el_exprs, re_link) {
for (i = expr->re_lo; i <= expr->re_hi; i++) {
- if (((i - expr->re_lo) % expr->re_stride) == 0)
+ if (!((i - expr->re_lo) % expr->re_stride))
count++;
}
}
- if (count == 0) /* empty expression list */
+ if (!count) /* empty expression list */
return 0;
if (count > max) {
@@ -463,7 +463,7 @@ cfs_expr_list_values(struct cfs_expr_list *expr_list, int max, __u32 **valpp)
count = 0;
list_for_each_entry(expr, &expr_list->el_exprs, re_link) {
for (i = expr->re_lo; i <= expr->re_hi; i++) {
- if (((i - expr->re_lo) % expr->re_stride) == 0)
+ if (!((i - expr->re_lo) % expr->re_stride))
val[count++] = i;
}
}
@@ -501,13 +501,13 @@ EXPORT_SYMBOL(cfs_expr_list_free);
* \retval -errno otherwise
*/
int
-cfs_expr_list_parse(char *str, int len, unsigned min, unsigned max,
+cfs_expr_list_parse(char *str, int len, unsigned int min, unsigned int max,
struct cfs_expr_list **elpp)
{
- struct cfs_expr_list *expr_list;
- struct cfs_range_expr *expr;
- struct cfs_lstr src;
- int rc;
+ struct cfs_expr_list *expr_list;
+ struct cfs_range_expr *expr;
+ struct cfs_lstr src;
+ int rc;
LIBCFS_ALLOC(expr_list, sizeof(*expr_list));
if (!expr_list)
@@ -533,18 +533,18 @@ cfs_expr_list_parse(char *str, int len, unsigned min, unsigned max,
}
rc = cfs_range_expr_parse(&tok, min, max, 1, &expr);
- if (rc != 0)
+ if (rc)
break;
list_add_tail(&expr->re_link, &expr_list->el_exprs);
}
} else {
rc = cfs_range_expr_parse(&src, min, max, 0, &expr);
- if (rc == 0)
+ if (!rc)
list_add_tail(&expr->re_link, &expr_list->el_exprs);
}
- if (rc != 0)
+ if (rc)
cfs_expr_list_free(expr_list);
else
*elpp = expr_list;
diff --git a/drivers/staging/lustre/lnet/libcfs/linux/linux-cpu.c b/drivers/staging/lustre/lnet/libcfs/linux/linux-cpu.c
index e8b1a61420de..6b9cf06e8df2 100644
--- a/drivers/staging/lustre/lnet/libcfs/linux/linux-cpu.c
+++ b/drivers/staging/lustre/lnet/libcfs/linux/linux-cpu.c
@@ -55,6 +55,8 @@ MODULE_PARM_DESC(cpu_npartitions, "# of CPU partitions");
* i.e: "N 0[0,1] 1[2,3]" the first character 'N' means numbers in bracket
* are NUMA node ID, number before bracket is CPU partition ID.
*
+ * i.e: "N", shortcut expression to create CPT from NUMA & CPU topology
+ *
* NB: If user specified cpu_pattern, cpu_npartitions will be ignored
*/
static char *cpu_pattern = "";
@@ -88,7 +90,7 @@ cfs_node_to_cpumask(int node, cpumask_t *mask)
void
cfs_cpt_table_free(struct cfs_cpt_table *cptab)
{
- int i;
+ int i;
if (cptab->ctb_cpu2cpt) {
LIBCFS_FREE(cptab->ctb_cpu2cpt,
@@ -126,7 +128,7 @@ struct cfs_cpt_table *
cfs_cpt_table_alloc(unsigned int ncpt)
{
struct cfs_cpt_table *cptab;
- int i;
+ int i;
LIBCFS_ALLOC(cptab, sizeof(*cptab));
if (!cptab)
@@ -177,10 +179,10 @@ EXPORT_SYMBOL(cfs_cpt_table_alloc);
int
cfs_cpt_table_print(struct cfs_cpt_table *cptab, char *buf, int len)
{
- char *tmp = buf;
- int rc = 0;
- int i;
- int j;
+ char *tmp = buf;
+ int rc = 0;
+ int i;
+ int j;
for (i = 0; i < cptab->ctb_nparts; i++) {
if (len > 0) {
@@ -271,7 +273,7 @@ EXPORT_SYMBOL(cfs_cpt_nodemask);
int
cfs_cpt_set_cpu(struct cfs_cpt_table *cptab, int cpt, int cpu)
{
- int node;
+ int node;
LASSERT(cpt >= 0 && cpt < cptab->ctb_nparts);
@@ -311,8 +313,8 @@ EXPORT_SYMBOL(cfs_cpt_set_cpu);
void
cfs_cpt_unset_cpu(struct cfs_cpt_table *cptab, int cpt, int cpu)
{
- int node;
- int i;
+ int node;
+ int i;
LASSERT(cpt == CFS_CPT_ANY || (cpt >= 0 && cpt < cptab->ctb_nparts));
@@ -371,9 +373,9 @@ EXPORT_SYMBOL(cfs_cpt_unset_cpu);
int
cfs_cpt_set_cpumask(struct cfs_cpt_table *cptab, int cpt, cpumask_t *mask)
{
- int i;
+ int i;
- if (cpumask_weight(mask) == 0 ||
+ if (!cpumask_weight(mask) ||
cpumask_any_and(mask, cpu_online_mask) >= nr_cpu_ids) {
CDEBUG(D_INFO, "No online CPU is found in the CPU mask for CPU partition %d\n",
cpt);
@@ -392,7 +394,7 @@ EXPORT_SYMBOL(cfs_cpt_set_cpumask);
void
cfs_cpt_unset_cpumask(struct cfs_cpt_table *cptab, int cpt, cpumask_t *mask)
{
- int i;
+ int i;
for_each_cpu(i, mask)
cfs_cpt_unset_cpu(cptab, cpt, i);
@@ -402,8 +404,8 @@ EXPORT_SYMBOL(cfs_cpt_unset_cpumask);
int
cfs_cpt_set_node(struct cfs_cpt_table *cptab, int cpt, int node)
{
- cpumask_t *mask;
- int rc;
+ cpumask_t *mask;
+ int rc;
if (node < 0 || node >= MAX_NUMNODES) {
CDEBUG(D_INFO,
@@ -449,7 +451,7 @@ EXPORT_SYMBOL(cfs_cpt_unset_node);
int
cfs_cpt_set_nodemask(struct cfs_cpt_table *cptab, int cpt, nodemask_t *mask)
{
- int i;
+ int i;
for_each_node_mask(i, *mask) {
if (!cfs_cpt_set_node(cptab, cpt, i))
@@ -463,7 +465,7 @@ EXPORT_SYMBOL(cfs_cpt_set_nodemask);
void
cfs_cpt_unset_nodemask(struct cfs_cpt_table *cptab, int cpt, nodemask_t *mask)
{
- int i;
+ int i;
for_each_node_mask(i, *mask)
cfs_cpt_unset_node(cptab, cpt, i);
@@ -473,8 +475,8 @@ EXPORT_SYMBOL(cfs_cpt_unset_nodemask);
void
cfs_cpt_clear(struct cfs_cpt_table *cptab, int cpt)
{
- int last;
- int i;
+ int last;
+ int i;
if (cpt == CFS_CPT_ANY) {
last = cptab->ctb_nparts - 1;
@@ -493,10 +495,10 @@ EXPORT_SYMBOL(cfs_cpt_clear);
int
cfs_cpt_spread_node(struct cfs_cpt_table *cptab, int cpt)
{
- nodemask_t *mask;
- int weight;
- int rotor;
- int node;
+ nodemask_t *mask;
+ int weight;
+ int rotor;
+ int node;
/* convert CPU partition ID to HW node id */
@@ -514,7 +516,7 @@ cfs_cpt_spread_node(struct cfs_cpt_table *cptab, int cpt)
rotor %= weight;
for_each_node_mask(node, *mask) {
- if (rotor-- == 0)
+ if (!rotor--)
return node;
}
@@ -526,8 +528,8 @@ EXPORT_SYMBOL(cfs_cpt_spread_node);
int
cfs_cpt_current(struct cfs_cpt_table *cptab, int remap)
{
- int cpu = smp_processor_id();
- int cpt = cptab->ctb_cpu2cpt[cpu];
+ int cpu = smp_processor_id();
+ int cpt = cptab->ctb_cpu2cpt[cpu];
if (cpt < 0) {
if (!remap)
@@ -555,10 +557,10 @@ EXPORT_SYMBOL(cfs_cpt_of_cpu);
int
cfs_cpt_bind(struct cfs_cpt_table *cptab, int cpt)
{
- cpumask_t *cpumask;
- nodemask_t *nodemask;
- int rc;
- int i;
+ cpumask_t *cpumask;
+ nodemask_t *nodemask;
+ int rc;
+ int i;
LASSERT(cpt == CFS_CPT_ANY || (cpt >= 0 && cpt < cptab->ctb_nparts));
@@ -582,7 +584,7 @@ cfs_cpt_bind(struct cfs_cpt_table *cptab, int cpt)
rc = set_cpus_allowed_ptr(current, cpumask);
set_mems_allowed(*nodemask);
- if (rc == 0)
+ if (!rc)
schedule(); /* switch to allowed CPU */
return rc;
@@ -601,10 +603,10 @@ static int
cfs_cpt_choose_ncpus(struct cfs_cpt_table *cptab, int cpt,
cpumask_t *node, int number)
{
- cpumask_t *socket = NULL;
- cpumask_t *core = NULL;
- int rc = 0;
- int cpu;
+ cpumask_t *socket = NULL;
+ cpumask_t *core = NULL;
+ int rc = 0;
+ int cpu;
LASSERT(number > 0);
@@ -638,7 +640,7 @@ cfs_cpt_choose_ncpus(struct cfs_cpt_table *cptab, int cpt,
LASSERT(!cpumask_empty(socket));
while (!cpumask_empty(socket)) {
- int i;
+ int i;
/* get cpumask for hts in the same core */
cpumask_copy(core, topology_sibling_cpumask(cpu));
@@ -656,14 +658,14 @@ cfs_cpt_choose_ncpus(struct cfs_cpt_table *cptab, int cpt,
goto out;
}
- if (--number == 0)
+ if (!--number)
goto out;
}
cpu = cpumask_first(socket);
}
}
- out:
+out:
if (socket)
LIBCFS_FREE(socket, cpumask_size());
if (core)
@@ -676,9 +678,9 @@ cfs_cpt_choose_ncpus(struct cfs_cpt_table *cptab, int cpt,
static unsigned int
cfs_cpt_num_estimate(void)
{
- unsigned nnode = num_online_nodes();
- unsigned ncpu = num_online_cpus();
- unsigned ncpt;
+ unsigned int nnode = num_online_nodes();
+ unsigned int ncpu = num_online_cpus();
+ unsigned int ncpt;
if (ncpu <= CPT_WEIGHT_MIN) {
ncpt = 1;
@@ -703,14 +705,14 @@ cfs_cpt_num_estimate(void)
ncpt = nnode;
- out:
+out:
#if (BITS_PER_LONG == 32)
/* config many CPU partitions on 32-bit system could consume
* too much memory
*/
ncpt = min(2U, ncpt);
#endif
- while (ncpu % ncpt != 0)
+ while (ncpu % ncpt)
ncpt--; /* worst case is 1 */
return ncpt;
@@ -720,11 +722,11 @@ static struct cfs_cpt_table *
cfs_cpt_table_create(int ncpt)
{
struct cfs_cpt_table *cptab = NULL;
- cpumask_t *mask = NULL;
- int cpt = 0;
- int num;
- int rc;
- int i;
+ cpumask_t *mask = NULL;
+ int cpt = 0;
+ int num;
+ int rc;
+ int i;
rc = cfs_cpt_num_estimate();
if (ncpt <= 0)
@@ -735,7 +737,7 @@ cfs_cpt_table_create(int ncpt)
ncpt, rc);
}
- if (num_online_cpus() % ncpt != 0) {
+ if (num_online_cpus() % ncpt) {
CERROR("CPU number %d is not multiple of cpu_npartition %d, please try different cpu_npartitions value or set pattern string by cpu_pattern=STRING\n",
(int)num_online_cpus(), ncpt);
goto failed;
@@ -748,7 +750,7 @@ cfs_cpt_table_create(int ncpt)
}
num = num_online_cpus() / ncpt;
- if (num == 0) {
+ if (!num) {
CERROR("CPU changed while setting CPU partition\n");
goto failed;
}
@@ -764,7 +766,7 @@ cfs_cpt_table_create(int ncpt)
while (!cpumask_empty(mask)) {
struct cfs_cpu_partition *part;
- int n;
+ int n;
/*
* Each emulated NUMA node has all allowed CPUs in
@@ -817,27 +819,36 @@ cfs_cpt_table_create(int ncpt)
static struct cfs_cpt_table *
cfs_cpt_table_create_pattern(char *pattern)
{
- struct cfs_cpt_table *cptab;
- char *str = pattern;
- int node = 0;
- int high;
- int ncpt;
- int c;
-
- for (ncpt = 0;; ncpt++) { /* quick scan bracket */
- str = strchr(str, '[');
- if (!str)
- break;
- str++;
- }
+ struct cfs_cpt_table *cptab;
+ char *str;
+ int node = 0;
+ int high;
+ int ncpt = 0;
+ int cpt;
+ int rc;
+ int c;
+ int i;
str = cfs_trimwhite(pattern);
if (*str == 'n' || *str == 'N') {
pattern = str + 1;
- node = 1;
+ if (*pattern != '\0') {
+ node = 1;
+ } else { /* shortcut to create CPT from NUMA & CPU topology */
+ node = -1;
+ ncpt = num_online_nodes();
+ }
+ }
+
+ if (!ncpt) { /* scanning bracket which is mark of partition */
+ for (str = pattern;; str++, ncpt++) {
+ str = strchr(str, '[');
+ if (!str)
+ break;
+ }
}
- if (ncpt == 0 ||
+ if (!ncpt ||
(node && ncpt > num_online_nodes()) ||
(!node && ncpt > num_online_cpus())) {
CERROR("Invalid pattern %s, or too many partitions %d\n",
@@ -845,25 +856,39 @@ cfs_cpt_table_create_pattern(char *pattern)
return NULL;
}
- high = node ? MAX_NUMNODES - 1 : nr_cpu_ids - 1;
-
cptab = cfs_cpt_table_alloc(ncpt);
if (!cptab) {
CERROR("Failed to allocate cpu partition table\n");
return NULL;
}
+ if (node < 0) { /* shortcut to create CPT from NUMA & CPU topology */
+ cpt = 0;
+
+ for_each_online_node(i) {
+ if (cpt >= ncpt) {
+ CERROR("CPU changed while setting CPU partition table, %d/%d\n",
+ cpt, ncpt);
+ goto failed;
+ }
+
+ rc = cfs_cpt_set_node(cptab, cpt++, i);
+ if (!rc)
+ goto failed;
+ }
+ return cptab;
+ }
+
+ high = node ? MAX_NUMNODES - 1 : nr_cpu_ids - 1;
+
for (str = cfs_trimwhite(pattern), c = 0;; c++) {
- struct cfs_range_expr *range;
- struct cfs_expr_list *el;
- char *bracket = strchr(str, '[');
- int cpt;
- int rc;
- int i;
- int n;
+ struct cfs_range_expr *range;
+ struct cfs_expr_list *el;
+ char *bracket = strchr(str, '[');
+ int n;
if (!bracket) {
- if (*str != 0) {
+ if (*str) {
CERROR("Invalid pattern %s\n", str);
goto failed;
}
@@ -886,7 +911,7 @@ cfs_cpt_table_create_pattern(char *pattern)
goto failed;
}
- if (cfs_cpt_weight(cptab, cpt) != 0) {
+ if (cfs_cpt_weight(cptab, cpt)) {
CERROR("Partition %d has already been set.\n", cpt);
goto failed;
}
@@ -905,14 +930,14 @@ cfs_cpt_table_create_pattern(char *pattern)
}
if (cfs_expr_list_parse(str, (bracket - str) + 1,
- 0, high, &el) != 0) {
+ 0, high, &el)) {
CERROR("Can't parse number range: %s\n", str);
goto failed;
}
list_for_each_entry(range, &el->el_exprs, re_link) {
for (i = range->re_lo; i <= range->re_hi; i++) {
- if ((i - range->re_lo) % range->re_stride != 0)
+ if ((i - range->re_lo) % range->re_stride)
continue;
rc = node ? cfs_cpt_set_node(cptab, cpt, i) :
@@ -945,8 +970,8 @@ cfs_cpt_table_create_pattern(char *pattern)
static int
cfs_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu)
{
- unsigned int cpu = (unsigned long)hcpu;
- bool warn;
+ unsigned int cpu = (unsigned long)hcpu;
+ bool warn;
switch (action) {
case CPU_DEAD:
@@ -1019,7 +1044,7 @@ cfs_cpu_init(void)
register_hotcpu_notifier(&cfs_cpu_notifier);
#endif
- if (*cpu_pattern != 0) {
+ if (*cpu_pattern) {
cfs_cpt_table = cfs_cpt_table_create_pattern(cpu_pattern);
if (!cfs_cpt_table) {
CERROR("Failed to create cptab from pattern %s\n",
diff --git a/drivers/staging/lustre/lnet/libcfs/linux/linux-crypto.c b/drivers/staging/lustre/lnet/libcfs/linux/linux-crypto.c
index 7f56d2c9dd00..68e34b4a76c9 100644
--- a/drivers/staging/lustre/lnet/libcfs/linux/linux-crypto.c
+++ b/drivers/staging/lustre/lnet/libcfs/linux/linux-crypto.c
@@ -64,7 +64,7 @@ static int cfs_crypto_hash_alloc(enum cfs_crypto_hash_alg hash_alg,
unsigned int key_len)
{
struct crypto_ahash *tfm;
- int err = 0;
+ int err = 0;
*type = cfs_crypto_hash_type(hash_alg);
@@ -93,12 +93,12 @@ static int cfs_crypto_hash_alloc(enum cfs_crypto_hash_alg hash_alg,
if (key)
err = crypto_ahash_setkey(tfm, key, key_len);
- else if ((*type)->cht_key != 0)
+ else if ((*type)->cht_key)
err = crypto_ahash_setkey(tfm,
(unsigned char *)&((*type)->cht_key),
(*type)->cht_size);
- if (err != 0) {
+ if (err) {
ahash_request_free(*req);
crypto_free_ahash(tfm);
return err;
@@ -147,16 +147,16 @@ int cfs_crypto_hash_digest(enum cfs_crypto_hash_alg hash_alg,
unsigned char *key, unsigned int key_len,
unsigned char *hash, unsigned int *hash_len)
{
- struct scatterlist sl;
+ struct scatterlist sl;
struct ahash_request *req;
- int err;
- const struct cfs_crypto_hash_type *type;
+ int err;
+ const struct cfs_crypto_hash_type *type;
- if (!buf || buf_len == 0 || !hash_len)
+ if (!buf || !buf_len || !hash_len)
return -EINVAL;
err = cfs_crypto_hash_alloc(hash_alg, &type, &req, key, key_len);
- if (err != 0)
+ if (err)
return err;
if (!hash || *hash_len < type->cht_size) {
@@ -177,7 +177,7 @@ int cfs_crypto_hash_digest(enum cfs_crypto_hash_alg hash_alg,
EXPORT_SYMBOL(cfs_crypto_hash_digest);
/**
- * Allocate and initialize desriptor for hash algorithm.
+ * Allocate and initialize descriptor for hash algorithm.
*
* This should be used to initialize a hash descriptor for multiple calls
* to a single hash function when computing the hash across multiple
@@ -198,8 +198,8 @@ cfs_crypto_hash_init(enum cfs_crypto_hash_alg hash_alg,
unsigned char *key, unsigned int key_len)
{
struct ahash_request *req;
- int err;
- const struct cfs_crypto_hash_type *type;
+ int err;
+ const struct cfs_crypto_hash_type *type;
err = cfs_crypto_hash_alloc(hash_alg, &type, &req, key, key_len);
@@ -273,7 +273,7 @@ EXPORT_SYMBOL(cfs_crypto_hash_update);
int cfs_crypto_hash_final(struct cfs_crypto_hash_desc *hdesc,
unsigned char *hash, unsigned int *hash_len)
{
- int err;
+ int err;
struct ahash_request *req = (void *)hdesc;
int size = crypto_ahash_digestsize(crypto_ahash_reqtfm(req));
@@ -312,8 +312,8 @@ static void cfs_crypto_performance_test(enum cfs_crypto_hash_alg hash_alg)
{
int buf_len = max(PAGE_SIZE, 1048576UL);
void *buf;
- unsigned long start, end;
- int bcount, err = 0;
+ unsigned long start, end;
+ int bcount, err = 0;
struct page *page;
unsigned char hash[CFS_CRYPTO_HASH_DIGESTSIZE_MAX];
unsigned int hash_len = sizeof(hash);
@@ -358,7 +358,7 @@ out_err:
CDEBUG(D_INFO, "Crypto hash algorithm %s test error: rc = %d\n",
cfs_crypto_hash_name(hash_alg), err);
} else {
- unsigned long tmp;
+ unsigned long tmp;
tmp = ((bcount * buf_len / jiffies_to_msecs(end - start)) *
1000) / (1024 * 1024);
@@ -440,6 +440,6 @@ int cfs_crypto_register(void)
*/
void cfs_crypto_unregister(void)
{
- if (adler32 == 0)
+ if (!adler32)
cfs_crypto_adler32_unregister();
}
diff --git a/drivers/staging/lustre/lnet/libcfs/linux/linux-crypto.h b/drivers/staging/lustre/lnet/libcfs/linux/linux-crypto.h
index 18e8cd4d8758..d0b3aa80cfa6 100644
--- a/drivers/staging/lustre/lnet/libcfs/linux/linux-crypto.h
+++ b/drivers/staging/lustre/lnet/libcfs/linux/linux-crypto.h
@@ -1,4 +1,4 @@
- /*
+/*
* GPL HEADER START
*
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
diff --git a/drivers/staging/lustre/lnet/libcfs/linux/linux-debug.c b/drivers/staging/lustre/lnet/libcfs/linux/linux-debug.c
index 435b784c52f8..39a72e3f0c18 100644
--- a/drivers/staging/lustre/lnet/libcfs/linux/linux-debug.c
+++ b/drivers/staging/lustre/lnet/libcfs/linux/linux-debug.c
@@ -57,7 +57,6 @@
#include <linux/kallsyms.h>
-char lnet_upcall[1024] = "/usr/lib/lustre/lnet_upcall";
char lnet_debug_log_upcall[1024] = "/usr/lib/lustre/lnet_debug_log_upcall";
/**
@@ -68,11 +67,12 @@ char lnet_debug_log_upcall[1024] = "/usr/lib/lustre/lnet_debug_log_upcall";
void libcfs_run_debug_log_upcall(char *file)
{
char *argv[3];
- int rc;
- char *envp[] = {
+ int rc;
+ static const char * const envp[] = {
"HOME=/",
"PATH=/sbin:/bin:/usr/sbin:/usr/bin",
- NULL};
+ NULL
+ };
argv[0] = lnet_debug_log_upcall;
@@ -81,7 +81,7 @@ void libcfs_run_debug_log_upcall(char *file)
argv[2] = NULL;
- rc = call_usermodehelper(argv[0], argv, envp, 1);
+ rc = call_usermodehelper(argv[0], argv, (char **)envp, 1);
if (rc < 0 && rc != -ENOENT) {
CERROR("Error %d invoking LNET debug log upcall %s %s; check /sys/kernel/debug/lnet/debug_log_upcall\n",
rc, argv[0], argv[1]);
@@ -91,57 +91,6 @@ void libcfs_run_debug_log_upcall(char *file)
}
}
-void libcfs_run_upcall(char **argv)
-{
- int rc;
- int argc;
- char *envp[] = {
- "HOME=/",
- "PATH=/sbin:/bin:/usr/sbin:/usr/bin",
- NULL};
-
- argv[0] = lnet_upcall;
- argc = 1;
- while (argv[argc])
- argc++;
-
- LASSERT(argc >= 2);
-
- rc = call_usermodehelper(argv[0], argv, envp, 1);
- if (rc < 0 && rc != -ENOENT) {
- CERROR("Error %d invoking LNET upcall %s %s%s%s%s%s%s%s%s; check /sys/kernel/debug/lnet/upcall\n",
- rc, argv[0], argv[1],
- argc < 3 ? "" : ",", argc < 3 ? "" : argv[2],
- argc < 4 ? "" : ",", argc < 4 ? "" : argv[3],
- argc < 5 ? "" : ",", argc < 5 ? "" : argv[4],
- argc < 6 ? "" : ",...");
- } else {
- CDEBUG(D_HA, "Invoked LNET upcall %s %s%s%s%s%s%s%s%s\n",
- argv[0], argv[1],
- argc < 3 ? "" : ",", argc < 3 ? "" : argv[2],
- argc < 4 ? "" : ",", argc < 4 ? "" : argv[3],
- argc < 5 ? "" : ",", argc < 5 ? "" : argv[4],
- argc < 6 ? "" : ",...");
- }
-}
-
-void libcfs_run_lbug_upcall(struct libcfs_debug_msg_data *msgdata)
-{
- char *argv[6];
- char buf[32];
-
- snprintf(buf, sizeof(buf), "%d", msgdata->msg_line);
-
- argv[1] = "LBUG";
- argv[2] = (char *)msgdata->msg_file;
- argv[3] = (char *)msgdata->msg_fn;
- argv[4] = buf;
- argv[5] = NULL;
-
- libcfs_run_upcall(argv);
-}
-EXPORT_SYMBOL(libcfs_run_lbug_upcall);
-
/* coverity[+kill] */
void __noreturn lbug_with_loc(struct libcfs_debug_msg_data *msgdata)
{
@@ -156,7 +105,6 @@ void __noreturn lbug_with_loc(struct libcfs_debug_msg_data *msgdata)
dump_stack();
if (!libcfs_panic_on_lbug)
libcfs_debug_dumplog();
- libcfs_run_lbug_upcall(msgdata);
if (libcfs_panic_on_lbug)
panic("LBUG");
set_task_state(current, TASK_UNINTERRUPTIBLE);
diff --git a/drivers/staging/lustre/lnet/libcfs/linux/linux-module.c b/drivers/staging/lustre/lnet/libcfs/linux/linux-module.c
index 38308f8b6aae..3f5d58babc2f 100644
--- a/drivers/staging/lustre/lnet/libcfs/linux/linux-module.c
+++ b/drivers/staging/lustre/lnet/libcfs/linux/linux-module.c
@@ -83,7 +83,7 @@ static inline bool libcfs_ioctl_is_invalid(struct libcfs_ioctl_data *data)
CERROR("LIBCFS ioctl: plen2 nonzero but no pbuf2 pointer\n");
return true;
}
- if ((__u32)libcfs_ioctl_packlen(data) != data->ioc_hdr.ioc_len) {
+ if ((u32)libcfs_ioctl_packlen(data) != data->ioc_hdr.ioc_len) {
CERROR("LIBCFS ioctl: packlen != ioc_len\n");
return true;
}
diff --git a/drivers/staging/lustre/lnet/libcfs/linux/linux-prim.c b/drivers/staging/lustre/lnet/libcfs/linux/linux-prim.c
index 291d286eab48..cf902154f0aa 100644
--- a/drivers/staging/lustre/lnet/libcfs/linux/linux-prim.c
+++ b/drivers/staging/lustre/lnet/libcfs/linux/linux-prim.c
@@ -45,8 +45,8 @@
sigset_t
cfs_block_allsigs(void)
{
- unsigned long flags;
- sigset_t old;
+ unsigned long flags;
+ sigset_t old;
spin_lock_irqsave(&current->sighand->siglock, flags);
old = current->blocked;
@@ -60,8 +60,8 @@ EXPORT_SYMBOL(cfs_block_allsigs);
sigset_t cfs_block_sigs(unsigned long sigs)
{
- unsigned long flags;
- sigset_t old;
+ unsigned long flags;
+ sigset_t old;
spin_lock_irqsave(&current->sighand->siglock, flags);
old = current->blocked;
@@ -91,7 +91,7 @@ EXPORT_SYMBOL(cfs_block_sigsinv);
void
cfs_restore_sigs(sigset_t old)
{
- unsigned long flags;
+ unsigned long flags;
spin_lock_irqsave(&current->sighand->siglock, flags);
current->blocked = old;
diff --git a/drivers/staging/lustre/lnet/libcfs/linux/linux-tracefile.c b/drivers/staging/lustre/lnet/libcfs/linux/linux-tracefile.c
index 8b551d2708ba..75eb84e7f0f8 100644
--- a/drivers/staging/lustre/lnet/libcfs/linux/linux-tracefile.c
+++ b/drivers/staging/lustre/lnet/libcfs/linux/linux-tracefile.c
@@ -49,8 +49,8 @@ static DECLARE_RWSEM(cfs_tracefile_sem);
int cfs_tracefile_init_arch(void)
{
- int i;
- int j;
+ int i;
+ int j;
struct cfs_trace_cpu_data *tcd;
/* initialize trace_data */
@@ -85,14 +85,14 @@ int cfs_tracefile_init_arch(void)
out:
cfs_tracefile_fini_arch();
- printk(KERN_ERR "lnet: Not enough memory\n");
+ pr_err("lnet: Not enough memory\n");
return -ENOMEM;
}
void cfs_tracefile_fini_arch(void)
{
- int i;
- int j;
+ int i;
+ int j;
for (i = 0; i < num_possible_cpus(); i++)
for (j = 0; j < 3; j++) {
@@ -224,26 +224,26 @@ void cfs_print_to_console(struct ptldebug_header *hdr, int mask,
{
char *prefix = "Lustre", *ptype = NULL;
- if ((mask & D_EMERG) != 0) {
+ if (mask & D_EMERG) {
prefix = dbghdr_to_err_string(hdr);
ptype = KERN_EMERG;
- } else if ((mask & D_ERROR) != 0) {
+ } else if (mask & D_ERROR) {
prefix = dbghdr_to_err_string(hdr);
ptype = KERN_ERR;
- } else if ((mask & D_WARNING) != 0) {
+ } else if (mask & D_WARNING) {
prefix = dbghdr_to_info_string(hdr);
ptype = KERN_WARNING;
- } else if ((mask & (D_CONSOLE | libcfs_printk)) != 0) {
+ } else if (mask & (D_CONSOLE | libcfs_printk)) {
prefix = dbghdr_to_info_string(hdr);
ptype = KERN_INFO;
}
- if ((mask & D_CONSOLE) != 0) {
- printk("%s%s: %.*s", ptype, prefix, len, buf);
+ if (mask & D_CONSOLE) {
+ pr_info("%s%s: %.*s", ptype, prefix, len, buf);
} else {
- printk("%s%s: %d:%d:(%s:%d:%s()) %.*s", ptype, prefix,
- hdr->ph_pid, hdr->ph_extern_pid, file, hdr->ph_line_num,
- fn, len, buf);
+ pr_info("%s%s: %d:%d:(%s:%d:%s()) %.*s", ptype, prefix,
+ hdr->ph_pid, hdr->ph_extern_pid, file,
+ hdr->ph_line_num, fn, len, buf);
}
}
diff --git a/drivers/staging/lustre/lnet/libcfs/module.c b/drivers/staging/lustre/lnet/libcfs/module.c
index 86b4d25cad46..161e04226521 100644
--- a/drivers/staging/lustre/lnet/libcfs/module.c
+++ b/drivers/staging/lustre/lnet/libcfs/module.c
@@ -183,12 +183,12 @@ EXPORT_SYMBOL(lprocfs_call_handler);
static int __proc_dobitmasks(void *data, int write,
loff_t pos, void __user *buffer, int nob)
{
- const int tmpstrlen = 512;
- char *tmpstr;
- int rc;
+ const int tmpstrlen = 512;
+ char *tmpstr;
+ int rc;
unsigned int *mask = data;
- int is_subsys = (mask == &libcfs_subsystem_debug) ? 1 : 0;
- int is_printk = (mask == &libcfs_printk) ? 1 : 0;
+ int is_subsys = (mask == &libcfs_subsystem_debug) ? 1 : 0;
+ int is_printk = (mask == &libcfs_printk) ? 1 : 0;
rc = cfs_trace_allocate_string_buffer(&tmpstr, tmpstrlen);
if (rc < 0)
@@ -293,8 +293,8 @@ static int __proc_cpt_table(void *data, int write,
loff_t pos, void __user *buffer, int nob)
{
char *buf = NULL;
- int len = 4096;
- int rc = 0;
+ int len = 4096;
+ int rc = 0;
if (write)
return -EPERM;
@@ -365,14 +365,6 @@ static struct ctl_table lnet_table[] = {
.mode = 0444,
.proc_handler = &proc_cpt_table,
},
-
- {
- .procname = "upcall",
- .data = lnet_upcall,
- .maxlen = sizeof(lnet_upcall),
- .mode = 0644,
- .proc_handler = &proc_dostring,
- },
{
.procname = "debug_log_upcall",
.data = lnet_debug_log_upcall,
@@ -547,7 +539,7 @@ static int libcfs_init(void)
}
rc = cfs_cpu_init();
- if (rc != 0)
+ if (rc)
goto cleanup_debug;
rc = misc_register(&libcfs_dev);
@@ -566,7 +558,7 @@ static int libcfs_init(void)
rc = min(cfs_cpt_weight(cfs_cpt_table, CFS_CPT_ANY), 4);
rc = cfs_wi_sched_create("cfs_rh", cfs_cpt_table, CFS_CPT_ANY,
rc, &cfs_sched_rehash);
- if (rc != 0) {
+ if (rc) {
CERROR("Startup workitem scheduler: error: %d\n", rc);
goto cleanup_deregister;
}
diff --git a/drivers/staging/lustre/lnet/libcfs/prng.c b/drivers/staging/lustre/lnet/libcfs/prng.c
index a9bdb284fd15..21d5a3912c5f 100644
--- a/drivers/staging/lustre/lnet/libcfs/prng.c
+++ b/drivers/staging/lustre/lnet/libcfs/prng.c
@@ -33,7 +33,7 @@
* x(n)=a*x(n-1)+carry mod 2^16 and y(n)=b*y(n-1)+carry mod 2^16,
* number and carry packed within the same 32 bit integer.
* algorithm recommended by Marsaglia
-*/
+ */
#include "../../include/linux/libcfs/libcfs.h"
diff --git a/drivers/staging/lustre/lnet/libcfs/tracefile.c b/drivers/staging/lustre/lnet/libcfs/tracefile.c
index 1c7efdfaffcf..d7b29f8997c0 100644
--- a/drivers/staging/lustre/lnet/libcfs/tracefile.c
+++ b/drivers/staging/lustre/lnet/libcfs/tracefile.c
@@ -59,13 +59,13 @@ struct page_collection {
* ->tcd_daemon_pages and ->tcd_pages to the ->pc_pages. Otherwise,
* only ->tcd_pages are spilled.
*/
- int pc_want_daemon_pages;
+ int pc_want_daemon_pages;
};
struct tracefiled_ctl {
struct completion tctl_start;
struct completion tctl_stop;
- wait_queue_head_t tctl_waitq;
+ wait_queue_head_t tctl_waitq;
pid_t tctl_pid;
atomic_t tctl_shutdown;
};
@@ -77,24 +77,24 @@ struct cfs_trace_page {
/*
* page itself
*/
- struct page *page;
+ struct page *page;
/*
* linkage into one of the lists in trace_data_union or
* page_collection
*/
- struct list_head linkage;
+ struct list_head linkage;
/*
* number of bytes used within this page
*/
- unsigned int used;
+ unsigned int used;
/*
* cpu that owns this page
*/
- unsigned short cpu;
+ unsigned short cpu;
/*
* type(context) of this page
*/
- unsigned short type;
+ unsigned short type;
};
static void put_pages_on_tcd_daemon_list(struct page_collection *pc,
@@ -108,7 +108,7 @@ cfs_tage_from_list(struct list_head *list)
static struct cfs_trace_page *cfs_tage_alloc(gfp_t gfp)
{
- struct page *page;
+ struct page *page;
struct cfs_trace_page *tage;
/* My caller is trying to free memory */
@@ -236,7 +236,7 @@ static void cfs_tcd_shrink(struct cfs_trace_cpu_data *tcd)
INIT_LIST_HEAD(&pc.pc_pages);
list_for_each_entry_safe(tage, tmp, &tcd->tcd_pages, linkage) {
- if (pgcount-- == 0)
+ if (!pgcount--)
break;
list_move_tail(&tage->linkage, &pc.pc_pages);
@@ -278,7 +278,7 @@ int libcfs_debug_msg(struct libcfs_debug_msg_data *msgdata,
const char *format, ...)
{
va_list args;
- int rc;
+ int rc;
va_start(args, format);
rc = libcfs_debug_vmsg2(msgdata, format, args, NULL);
@@ -293,21 +293,21 @@ int libcfs_debug_vmsg2(struct libcfs_debug_msg_data *msgdata,
const char *format2, ...)
{
struct cfs_trace_cpu_data *tcd = NULL;
- struct ptldebug_header header = {0};
- struct cfs_trace_page *tage;
+ struct ptldebug_header header = { 0 };
+ struct cfs_trace_page *tage;
/* string_buf is used only if tcd != NULL, and is always set then */
- char *string_buf = NULL;
- char *debug_buf;
- int known_size;
- int needed = 85; /* average message length */
- int max_nob;
- va_list ap;
- int depth;
- int i;
- int remain;
- int mask = msgdata->msg_mask;
- const char *file = kbasename(msgdata->msg_file);
- struct cfs_debug_limit_state *cdls = msgdata->msg_cdls;
+ char *string_buf = NULL;
+ char *debug_buf;
+ int known_size;
+ int needed = 85; /* average message length */
+ int max_nob;
+ va_list ap;
+ int depth;
+ int i;
+ int remain;
+ int mask = msgdata->msg_mask;
+ const char *file = kbasename(msgdata->msg_file);
+ struct cfs_debug_limit_state *cdls = msgdata->msg_cdls;
tcd = cfs_trace_get_tcd();
@@ -320,7 +320,7 @@ int libcfs_debug_vmsg2(struct libcfs_debug_msg_data *msgdata,
if (!tcd) /* arch may not log in IRQ context */
goto console;
- if (tcd->tcd_cur_pages == 0)
+ if (!tcd->tcd_cur_pages)
header.ph_flags |= PH_FLAG_FIRST_RECORD;
if (tcd->tcd_shutting_down) {
@@ -423,7 +423,7 @@ int libcfs_debug_vmsg2(struct libcfs_debug_msg_data *msgdata,
__LASSERT(tage->used <= PAGE_SIZE);
console:
- if ((mask & libcfs_printk) == 0) {
+ if (!(mask & libcfs_printk)) {
/* no console output requested */
if (tcd)
cfs_trace_put_tcd(tcd);
@@ -432,7 +432,7 @@ console:
if (cdls) {
if (libcfs_console_ratelimit &&
- cdls->cdls_next != 0 && /* not first time ever */
+ cdls->cdls_next && /* not first time ever */
!cfs_time_after(cfs_time_current(), cdls->cdls_next)) {
/* skipping a console message */
cdls->cdls_count++;
@@ -489,7 +489,7 @@ console:
put_cpu();
}
- if (cdls && cdls->cdls_count != 0) {
+ if (cdls && cdls->cdls_count) {
string_buf = cfs_trace_get_console_buffer();
needed = snprintf(string_buf, CFS_TRACE_CONSOLE_BUFFER_SIZE,
@@ -535,9 +535,9 @@ panic_collect_pages(struct page_collection *pc)
* CPUs have been stopped during a panic. If this isn't true for some
* arch, this will have to be implemented separately in each arch.
*/
- int i;
- int j;
struct cfs_trace_cpu_data *tcd;
+ int i;
+ int j;
INIT_LIST_HEAD(&pc->pc_pages);
@@ -698,11 +698,11 @@ void cfs_trace_debug_print(void)
int cfs_tracefile_dump_all_pages(char *filename)
{
- struct page_collection pc;
- struct file *filp;
- struct cfs_trace_page *tage;
- struct cfs_trace_page *tmp;
- char *buf;
+ struct page_collection pc;
+ struct file *filp;
+ struct cfs_trace_page *tage;
+ struct cfs_trace_page *tmp;
+ char *buf;
mm_segment_t __oldfs;
int rc;
@@ -778,7 +778,7 @@ void cfs_trace_flush_pages(void)
int cfs_trace_copyin_string(char *knl_buffer, int knl_buffer_nob,
const char __user *usr_buffer, int usr_buffer_nob)
{
- int nob;
+ int nob;
if (usr_buffer_nob > knl_buffer_nob)
return -EOVERFLOW;
@@ -810,7 +810,7 @@ int cfs_trace_copyout_string(char __user *usr_buffer, int usr_buffer_nob,
* NB if 'append' != NULL, it's a single character to append to the
* copied out string - usually "\n" or "" (i.e. a terminating zero byte)
*/
- int nob = strlen(knl_buffer);
+ int nob = strlen(knl_buffer);
if (nob > usr_buffer_nob)
nob = usr_buffer_nob;
@@ -843,16 +843,16 @@ int cfs_trace_allocate_string_buffer(char **str, int nob)
int cfs_trace_dump_debug_buffer_usrstr(void __user *usr_str, int usr_str_nob)
{
- char *str;
- int rc;
+ char *str;
+ int rc;
rc = cfs_trace_allocate_string_buffer(&str, usr_str_nob + 1);
- if (rc != 0)
+ if (rc)
return rc;
rc = cfs_trace_copyin_string(str, usr_str_nob + 1,
usr_str, usr_str_nob);
- if (rc != 0)
+ if (rc)
goto out;
if (str[0] != '/') {
@@ -867,17 +867,17 @@ out:
int cfs_trace_daemon_command(char *str)
{
- int rc = 0;
+ int rc = 0;
cfs_tracefile_write_lock();
- if (strcmp(str, "stop") == 0) {
+ if (!strcmp(str, "stop")) {
cfs_tracefile_write_unlock();
cfs_trace_stop_thread();
cfs_tracefile_write_lock();
memset(cfs_tracefile, 0, sizeof(cfs_tracefile));
- } else if (strncmp(str, "size=", 5) == 0) {
+ } else if (!strncmp(str, "size=", 5)) {
unsigned long tmp;
rc = kstrtoul(str + 5, 10, &tmp);
@@ -909,15 +909,15 @@ int cfs_trace_daemon_command(char *str)
int cfs_trace_daemon_command_usrstr(void __user *usr_str, int usr_str_nob)
{
char *str;
- int rc;
+ int rc;
rc = cfs_trace_allocate_string_buffer(&str, usr_str_nob + 1);
- if (rc != 0)
+ if (rc)
return rc;
rc = cfs_trace_copyin_string(str, usr_str_nob + 1,
usr_str, usr_str_nob);
- if (rc == 0)
+ if (!rc)
rc = cfs_trace_daemon_command(str);
kfree(str);
@@ -1003,7 +1003,7 @@ static int tracefiled(void *arg)
filp = NULL;
cfs_tracefile_read_lock();
- if (cfs_tracefile[0] != 0) {
+ if (cfs_tracefile[0]) {
filp = filp_open(cfs_tracefile,
O_CREAT | O_RDWR | O_LARGEFILE,
0600);
@@ -1072,7 +1072,7 @@ static int tracefiled(void *arg)
__LASSERT(list_empty(&pc.pc_pages));
end_loop:
if (atomic_read(&tctl->tctl_shutdown)) {
- if (last_loop == 0) {
+ if (!last_loop) {
last_loop = 1;
continue;
} else {
@@ -1135,13 +1135,13 @@ void cfs_trace_stop_thread(void)
int cfs_tracefile_init(int max_pages)
{
struct cfs_trace_cpu_data *tcd;
- int i;
- int j;
- int rc;
- int factor;
+ int i;
+ int j;
+ int rc;
+ int factor;
rc = cfs_tracefile_init_arch();
- if (rc != 0)
+ if (rc)
return rc;
cfs_tcd_for_each(tcd, i, j) {
diff --git a/drivers/staging/lustre/lnet/libcfs/tracefile.h b/drivers/staging/lustre/lnet/libcfs/tracefile.h
index d878676bc375..f644cbc5a277 100644
--- a/drivers/staging/lustre/lnet/libcfs/tracefile.h
+++ b/drivers/staging/lustre/lnet/libcfs/tracefile.h
@@ -45,7 +45,7 @@ enum cfs_trace_buf_type {
/* trace file lock routines */
#define TRACEFILE_NAME_SIZE 1024
-extern char cfs_tracefile[TRACEFILE_NAME_SIZE];
+extern char cfs_tracefile[TRACEFILE_NAME_SIZE];
extern long long cfs_tracefile_size;
void libcfs_run_debug_log_upcall(char *file);
@@ -80,7 +80,7 @@ int cfs_trace_get_debug_mb(void);
void libcfs_debug_dumplog_internal(void *arg);
void libcfs_register_panic_notifier(void);
void libcfs_unregister_panic_notifier(void);
-extern int libcfs_panic_in_progress;
+extern int libcfs_panic_in_progress;
int cfs_trace_max_debug_mb(void);
#define TCD_MAX_PAGES (5 << (20 - PAGE_SHIFT))
@@ -113,14 +113,14 @@ union cfs_trace_data_union {
* tcd_for_each_type_lock
*/
spinlock_t tcd_lock;
- unsigned long tcd_lock_flags;
+ unsigned long tcd_lock_flags;
/*
* pages with trace records not yet processed by tracefiled.
*/
- struct list_head tcd_pages;
+ struct list_head tcd_pages;
/* number of pages on ->tcd_pages */
- unsigned long tcd_cur_pages;
+ unsigned long tcd_cur_pages;
/*
* pages with trace records already processed by
@@ -132,9 +132,9 @@ union cfs_trace_data_union {
* (put_pages_on_daemon_list()). LRU pages from this list are
* discarded when list grows too large.
*/
- struct list_head tcd_daemon_pages;
+ struct list_head tcd_daemon_pages;
/* number of pages on ->tcd_daemon_pages */
- unsigned long tcd_cur_daemon_pages;
+ unsigned long tcd_cur_daemon_pages;
/*
* Maximal number of pages allowed on ->tcd_pages and
@@ -142,7 +142,7 @@ union cfs_trace_data_union {
* Always TCD_MAX_PAGES * tcd_pages_factor / 100 in current
* implementation.
*/
- unsigned long tcd_max_pages;
+ unsigned long tcd_max_pages;
/*
* preallocated pages to write trace records into. Pages from
@@ -166,15 +166,15 @@ union cfs_trace_data_union {
* TCD_STOCK_PAGES pagesful are consumed by trace records all
* emitted in non-blocking contexts. Which is quite unlikely.
*/
- struct list_head tcd_stock_pages;
+ struct list_head tcd_stock_pages;
/* number of pages on ->tcd_stock_pages */
- unsigned long tcd_cur_stock_pages;
+ unsigned long tcd_cur_stock_pages;
- unsigned short tcd_shutting_down;
- unsigned short tcd_cpu;
- unsigned short tcd_type;
+ unsigned short tcd_shutting_down;
+ unsigned short tcd_cpu;
+ unsigned short tcd_type;
/* The factors to share debug memory. */
- unsigned short tcd_pages_factor;
+ unsigned short tcd_pages_factor;
} tcd;
char __pad[L1_CACHE_ALIGN(sizeof(struct cfs_trace_cpu_data))];
};
diff --git a/drivers/staging/lustre/lnet/libcfs/workitem.c b/drivers/staging/lustre/lnet/libcfs/workitem.c
index e98c818a14fb..d0512da6bcde 100644
--- a/drivers/staging/lustre/lnet/libcfs/workitem.c
+++ b/drivers/staging/lustre/lnet/libcfs/workitem.c
@@ -45,7 +45,7 @@ struct cfs_wi_sched {
/* chain on global list */
struct list_head ws_list;
/** serialised workitems */
- spinlock_t ws_lock;
+ spinlock_t ws_lock;
/** where schedulers sleep */
wait_queue_head_t ws_waitq;
/** concurrent workitems */
@@ -59,26 +59,26 @@ struct cfs_wi_sched {
*/
struct list_head ws_rerunq;
/** CPT-table for this scheduler */
- struct cfs_cpt_table *ws_cptab;
+ struct cfs_cpt_table *ws_cptab;
/** CPT id for affinity */
- int ws_cpt;
+ int ws_cpt;
/** number of scheduled workitems */
- int ws_nscheduled;
+ int ws_nscheduled;
/** started scheduler thread, protected by cfs_wi_data::wi_glock */
- unsigned int ws_nthreads:30;
+ unsigned int ws_nthreads:30;
/** shutting down, protected by cfs_wi_data::wi_glock */
- unsigned int ws_stopping:1;
+ unsigned int ws_stopping:1;
/** serialize starting thread, protected by cfs_wi_data::wi_glock */
- unsigned int ws_starting:1;
+ unsigned int ws_starting:1;
/** scheduler name */
- char ws_name[CFS_WS_NAME_LEN];
+ char ws_name[CFS_WS_NAME_LEN];
};
static struct cfs_workitem_data {
/** serialize */
spinlock_t wi_glock;
/** list of all schedulers */
- struct list_head wi_scheds;
+ struct list_head wi_scheds;
/** WI module is initialized */
int wi_init;
/** shutting down the whole WI module */
@@ -136,7 +136,7 @@ EXPORT_SYMBOL(cfs_wi_exit);
int
cfs_wi_deschedule(struct cfs_wi_sched *sched, struct cfs_workitem *wi)
{
- int rc;
+ int rc;
LASSERT(!in_interrupt()); /* because we use plain spinlock */
LASSERT(!sched->ws_stopping);
@@ -202,13 +202,13 @@ EXPORT_SYMBOL(cfs_wi_schedule);
static int cfs_wi_scheduler(void *arg)
{
- struct cfs_wi_sched *sched = (struct cfs_wi_sched *)arg;
+ struct cfs_wi_sched *sched = (struct cfs_wi_sched *)arg;
cfs_block_allsigs();
/* CPT affinity scheduler? */
if (sched->ws_cptab)
- if (cfs_cpt_bind(sched->ws_cptab, sched->ws_cpt) != 0)
+ if (cfs_cpt_bind(sched->ws_cptab, sched->ws_cpt))
CWARN("Failed to bind %s on CPT %d\n",
sched->ws_name, sched->ws_cpt);
@@ -223,8 +223,8 @@ static int cfs_wi_scheduler(void *arg)
spin_lock(&sched->ws_lock);
while (!sched->ws_stopping) {
- int nloops = 0;
- int rc;
+ int nloops = 0;
+ int rc;
struct cfs_workitem *wi;
while (!list_empty(&sched->ws_runq) &&
@@ -238,16 +238,16 @@ static int cfs_wi_scheduler(void *arg)
LASSERT(sched->ws_nscheduled > 0);
sched->ws_nscheduled--;
- wi->wi_running = 1;
+ wi->wi_running = 1;
wi->wi_scheduled = 0;
spin_unlock(&sched->ws_lock);
nloops++;
- rc = (*wi->wi_action) (wi);
+ rc = (*wi->wi_action)(wi);
spin_lock(&sched->ws_lock);
- if (rc != 0) /* WI should be dead, even be freed! */
+ if (rc) /* WI should be dead, even be freed! */
continue;
wi->wi_running = 0;
@@ -273,7 +273,7 @@ static int cfs_wi_scheduler(void *arg)
spin_unlock(&sched->ws_lock);
rc = wait_event_interruptible_exclusive(sched->ws_waitq,
- !cfs_wi_sched_cansleep(sched));
+ !cfs_wi_sched_cansleep(sched));
spin_lock(&sched->ws_lock);
}
@@ -289,7 +289,7 @@ static int cfs_wi_scheduler(void *arg)
void
cfs_wi_sched_destroy(struct cfs_wi_sched *sched)
{
- int i;
+ int i;
LASSERT(cfs_wi_data.wi_init);
LASSERT(!cfs_wi_data.wi_stopping);
@@ -325,7 +325,7 @@ cfs_wi_sched_destroy(struct cfs_wi_sched *sched)
list_del(&sched->ws_list);
spin_unlock(&cfs_wi_data.wi_glock);
- LASSERT(sched->ws_nscheduled == 0);
+ LASSERT(!sched->ws_nscheduled);
LIBCFS_FREE(sched, sizeof(*sched));
}
@@ -335,8 +335,8 @@ int
cfs_wi_sched_create(char *name, struct cfs_cpt_table *cptab,
int cpt, int nthrs, struct cfs_wi_sched **sched_pp)
{
- struct cfs_wi_sched *sched;
- int rc;
+ struct cfs_wi_sched *sched;
+ int rc;
LASSERT(cfs_wi_data.wi_init);
LASSERT(!cfs_wi_data.wi_stopping);
@@ -364,7 +364,7 @@ cfs_wi_sched_create(char *name, struct cfs_cpt_table *cptab,
rc = 0;
while (nthrs > 0) {
- char name[16];
+ char name[16];
struct task_struct *task;
spin_lock(&cfs_wi_data.wi_glock);
@@ -431,7 +431,7 @@ cfs_wi_startup(void)
void
cfs_wi_shutdown(void)
{
- struct cfs_wi_sched *sched;
+ struct cfs_wi_sched *sched;
struct cfs_wi_sched *temp;
spin_lock(&cfs_wi_data.wi_glock);
@@ -447,7 +447,7 @@ cfs_wi_shutdown(void)
list_for_each_entry(sched, &cfs_wi_data.wi_scheds, ws_list) {
spin_lock(&cfs_wi_data.wi_glock);
- while (sched->ws_nthreads != 0) {
+ while (sched->ws_nthreads) {
spin_unlock(&cfs_wi_data.wi_glock);
set_current_state(TASK_UNINTERRUPTIBLE);
schedule_timeout(cfs_time_seconds(1) / 20);
diff --git a/drivers/staging/lustre/lnet/lnet/api-ni.c b/drivers/staging/lustre/lnet/lnet/api-ni.c
index 4daf828198c3..b2ba10d59f84 100644
--- a/drivers/staging/lustre/lnet/lnet/api-ni.c
+++ b/drivers/staging/lustre/lnet/lnet/api-ni.c
@@ -1551,16 +1551,16 @@ LNetNIInit(lnet_pid_t requested_pid)
rc = lnet_check_routes();
if (rc)
- goto err_destory_routes;
+ goto err_destroy_routes;
rc = lnet_rtrpools_alloc(im_a_router);
if (rc)
- goto err_destory_routes;
+ goto err_destroy_routes;
}
rc = lnet_acceptor_start();
if (rc)
- goto err_destory_routes;
+ goto err_destroy_routes;
the_lnet.ln_refcount = 1;
/* Now I may use my own API functions... */
@@ -1587,7 +1587,7 @@ err_stop_ping:
err_acceptor_stop:
the_lnet.ln_refcount = 0;
lnet_acceptor_stop();
-err_destory_routes:
+err_destroy_routes:
if (!the_lnet.ln_nis_from_mod_params)
lnet_destroy_routes();
err_shutdown_lndnis:
diff --git a/drivers/staging/lustre/lnet/lnet/lib-me.c b/drivers/staging/lustre/lnet/lnet/lib-me.c
index b430046dc294..eb796a86e6ab 100644
--- a/drivers/staging/lustre/lnet/lnet/lib-me.c
+++ b/drivers/staging/lustre/lnet/lnet/lib-me.c
@@ -271,21 +271,3 @@ lnet_me_unlink(lnet_me_t *me)
lnet_res_lh_invalidate(&me->me_lh);
lnet_me_free(me);
}
-
-#if 0
-static void
-lib_me_dump(lnet_me_t *me)
-{
- CWARN("Match Entry %p (%#llx)\n", me,
- me->me_lh.lh_cookie);
-
- CWARN("\tMatch/Ignore\t= %016lx / %016lx\n",
- me->me_match_bits, me->me_ignore_bits);
-
- CWARN("\tMD\t= %p\n", me->md);
- CWARN("\tprev\t= %p\n",
- list_entry(me->me_list.prev, lnet_me_t, me_list));
- CWARN("\tnext\t= %p\n",
- list_entry(me->me_list.next, lnet_me_t, me_list));
-}
-#endif
diff --git a/drivers/staging/lustre/lnet/lnet/lib-move.c b/drivers/staging/lustre/lnet/lnet/lib-move.c
index 48e6f8f2392f..f3dd6e42f4d4 100644
--- a/drivers/staging/lustre/lnet/lnet/lib-move.c
+++ b/drivers/staging/lustre/lnet/lnet/lib-move.c
@@ -192,6 +192,7 @@ lnet_copy_iov2iter(struct iov_iter *to,
left = siov->iov_len - soffset;
do {
size_t n, copy = left;
+
LASSERT(nsiov > 0);
if (copy > nob)
diff --git a/drivers/staging/lustre/lnet/lnet/nidstrings.c b/drivers/staging/lustre/lnet/lnet/nidstrings.c
index a6d7a6159b8f..a9fe3e69daae 100644
--- a/drivers/staging/lustre/lnet/lnet/nidstrings.c
+++ b/drivers/staging/lustre/lnet/lnet/nidstrings.c
@@ -193,7 +193,7 @@ add_nidrange(const struct cfs_lstr *src,
struct netstrfns *nf;
struct nidrange *nr;
int endlen;
- unsigned netnum;
+ unsigned int netnum;
if (src->ls_len >= LNET_NIDSTR_SIZE)
return NULL;
@@ -247,10 +247,8 @@ parse_nidrange(struct cfs_lstr *src, struct list_head *nidlist)
{
struct cfs_lstr addrrange;
struct cfs_lstr net;
- struct cfs_lstr tmp;
struct nidrange *nr;
- tmp = *src;
if (!cfs_gettok(src, '@', &addrrange))
goto failed;
@@ -1156,7 +1154,7 @@ EXPORT_SYMBOL(libcfs_nid2str_r);
static struct netstrfns *
libcfs_str2net_internal(const char *str, __u32 *net)
{
- struct netstrfns *uninitialized_var(nf);
+ struct netstrfns *nf = NULL;
int nob;
unsigned int netnum;
int i;
diff --git a/drivers/staging/lustre/lnet/lnet/router.c b/drivers/staging/lustre/lnet/lnet/router.c
index 063ad55ec950..8afa0abf15cd 100644
--- a/drivers/staging/lustre/lnet/lnet/router.c
+++ b/drivers/staging/lustre/lnet/lnet/router.c
@@ -903,6 +903,7 @@ lnet_create_rc_data_locked(lnet_peer_t *gateway)
{
lnet_rc_data_t *rcd = NULL;
lnet_ping_info_t *pi;
+ lnet_md_t md;
int rc;
int i;
@@ -925,15 +926,15 @@ lnet_create_rc_data_locked(lnet_peer_t *gateway)
}
rcd->rcd_pinginfo = pi;
+ md.start = pi;
+ md.user_ptr = rcd;
+ md.length = LNET_PINGINFO_SIZE;
+ md.threshold = LNET_MD_THRESH_INF;
+ md.options = LNET_MD_TRUNCATE;
+ md.eq_handle = the_lnet.ln_rc_eqh;
+
LASSERT(!LNetHandleIsInvalid(the_lnet.ln_rc_eqh));
- rc = LNetMDBind((lnet_md_t){.start = pi,
- .user_ptr = rcd,
- .length = LNET_PINGINFO_SIZE,
- .threshold = LNET_MD_THRESH_INF,
- .options = LNET_MD_TRUNCATE,
- .eq_handle = the_lnet.ln_rc_eqh},
- LNET_UNLINK,
- &rcd->rcd_mdh);
+ rc = LNetMDBind(md, LNET_UNLINK, &rcd->rcd_mdh);
if (rc < 0) {
CERROR("Can't bind MD: %d\n", rc);
goto out;
diff --git a/drivers/staging/lustre/lnet/selftest/brw_test.c b/drivers/staging/lustre/lnet/selftest/brw_test.c
index b20c5d394e3b..67b460f41d6e 100644
--- a/drivers/staging/lustre/lnet/selftest/brw_test.c
+++ b/drivers/staging/lustre/lnet/selftest/brw_test.c
@@ -44,6 +44,10 @@ static int brw_inject_errors;
module_param(brw_inject_errors, int, 0644);
MODULE_PARM_DESC(brw_inject_errors, "# data errors to inject randomly, zero by default");
+#define BRW_POISON 0xbeefbeefbeefbeefULL
+#define BRW_MAGIC 0xeeb0eeb1eeb2eeb3ULL
+#define BRW_MSIZE sizeof(u64)
+
static void
brw_client_fini(struct sfw_test_instance *tsi)
{
@@ -67,6 +71,7 @@ brw_client_init(struct sfw_test_instance *tsi)
{
struct sfw_session *sn = tsi->tsi_batch->bat_session;
int flags;
+ int off;
int npg;
int len;
int opc;
@@ -87,6 +92,7 @@ brw_client_init(struct sfw_test_instance *tsi)
* but we have to keep it for compatibility
*/
len = npg * PAGE_SIZE;
+ off = 0;
} else {
struct test_bulk_req_v1 *breq = &tsi->tsi_u.bulk_v1;
@@ -99,9 +105,13 @@ brw_client_init(struct sfw_test_instance *tsi)
opc = breq->blk_opc;
flags = breq->blk_flags;
len = breq->blk_len;
- npg = (len + PAGE_SIZE - 1) >> PAGE_SHIFT;
+ off = breq->blk_offset & ~PAGE_MASK;
+ npg = (off + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
}
+ if (off % BRW_MSIZE)
+ return -EINVAL;
+
if (npg > LNET_MAX_IOV || npg <= 0)
return -EINVAL;
@@ -114,7 +124,7 @@ brw_client_init(struct sfw_test_instance *tsi)
list_for_each_entry(tsu, &tsi->tsi_units, tsu_list) {
bulk = srpc_alloc_bulk(lnet_cpt_of_nid(tsu->tsu_dest.nid),
- npg, len, opc == LST_BRW_READ);
+ off, npg, len, opc == LST_BRW_READ);
if (!bulk) {
brw_client_fini(tsi);
return -ENOMEM;
@@ -126,12 +136,7 @@ brw_client_init(struct sfw_test_instance *tsi)
return 0;
}
-#define BRW_POISON 0xbeefbeefbeefbeefULL
-#define BRW_MAGIC 0xeeb0eeb1eeb2eeb3ULL
-#define BRW_MSIZE sizeof(__u64)
-
-static int
-brw_inject_one_error(void)
+int brw_inject_one_error(void)
{
struct timespec64 ts;
@@ -147,12 +152,13 @@ brw_inject_one_error(void)
}
static void
-brw_fill_page(struct page *pg, int pattern, __u64 magic)
+brw_fill_page(struct page *pg, int off, int len, int pattern, __u64 magic)
{
- char *addr = page_address(pg);
+ char *addr = page_address(pg) + off;
int i;
LASSERT(addr);
+ LASSERT(!(off % BRW_MSIZE) && !(len % BRW_MSIZE));
if (pattern == LST_BRW_CHECK_NONE)
return;
@@ -162,14 +168,16 @@ brw_fill_page(struct page *pg, int pattern, __u64 magic)
if (pattern == LST_BRW_CHECK_SIMPLE) {
memcpy(addr, &magic, BRW_MSIZE);
- addr += PAGE_SIZE - BRW_MSIZE;
- memcpy(addr, &magic, BRW_MSIZE);
+ if (len > BRW_MSIZE) {
+ addr += PAGE_SIZE - BRW_MSIZE;
+ memcpy(addr, &magic, BRW_MSIZE);
+ }
return;
}
if (pattern == LST_BRW_CHECK_FULL) {
- for (i = 0; i < PAGE_SIZE / BRW_MSIZE; i++)
- memcpy(addr + i * BRW_MSIZE, &magic, BRW_MSIZE);
+ for (i = 0; i < len; i += BRW_MSIZE)
+ memcpy(addr + i, &magic, BRW_MSIZE);
return;
}
@@ -177,13 +185,14 @@ brw_fill_page(struct page *pg, int pattern, __u64 magic)
}
static int
-brw_check_page(struct page *pg, int pattern, __u64 magic)
+brw_check_page(struct page *pg, int off, int len, int pattern, __u64 magic)
{
- char *addr = page_address(pg);
+ char *addr = page_address(pg) + off;
__u64 data = 0; /* make compiler happy */
int i;
LASSERT(addr);
+ LASSERT(!(off % BRW_MSIZE) && !(len % BRW_MSIZE));
if (pattern == LST_BRW_CHECK_NONE)
return 0;
@@ -193,21 +202,21 @@ brw_check_page(struct page *pg, int pattern, __u64 magic)
if (data != magic)
goto bad_data;
- addr += PAGE_SIZE - BRW_MSIZE;
- data = *((__u64 *)addr);
- if (data != magic)
- goto bad_data;
-
+ if (len > BRW_MSIZE) {
+ addr += PAGE_SIZE - BRW_MSIZE;
+ data = *((__u64 *)addr);
+ if (data != magic)
+ goto bad_data;
+ }
return 0;
}
if (pattern == LST_BRW_CHECK_FULL) {
- for (i = 0; i < PAGE_SIZE / BRW_MSIZE; i++) {
- data = *(((__u64 *)addr) + i);
+ for (i = 0; i < len; i += BRW_MSIZE) {
+ data = *(u64 *)(addr + i);
if (data != magic)
goto bad_data;
}
-
return 0;
}
@@ -226,8 +235,12 @@ brw_fill_bulk(struct srpc_bulk *bk, int pattern, __u64 magic)
struct page *pg;
for (i = 0; i < bk->bk_niov; i++) {
+ int off, len;
+
pg = bk->bk_iovs[i].bv_page;
- brw_fill_page(pg, pattern, magic);
+ off = bk->bk_iovs[i].bv_offset;
+ len = bk->bk_iovs[i].bv_len;
+ brw_fill_page(pg, off, len, pattern, magic);
}
}
@@ -238,8 +251,12 @@ brw_check_bulk(struct srpc_bulk *bk, int pattern, __u64 magic)
struct page *pg;
for (i = 0; i < bk->bk_niov; i++) {
+ int off, len;
+
pg = bk->bk_iovs[i].bv_page;
- if (brw_check_page(pg, pattern, magic)) {
+ off = bk->bk_iovs[i].bv_offset;
+ len = bk->bk_iovs[i].bv_len;
+ if (brw_check_page(pg, off, len, pattern, magic)) {
CERROR("Bulk page %p (%d/%d) is corrupted!\n",
pg, i, bk->bk_niov);
return 1;
@@ -276,6 +293,7 @@ brw_client_prep_rpc(struct sfw_test_unit *tsu,
len = npg * PAGE_SIZE;
} else {
struct test_bulk_req_v1 *breq = &tsi->tsi_u.bulk_v1;
+ int off;
/*
* I should never get this step if it's unknown feature
@@ -286,7 +304,8 @@ brw_client_prep_rpc(struct sfw_test_unit *tsu,
opc = breq->blk_opc;
flags = breq->blk_flags;
len = breq->blk_len;
- npg = (len + PAGE_SIZE - 1) >> PAGE_SHIFT;
+ off = breq->blk_offset;
+ npg = (off + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
}
rc = sfw_create_test_rpc(tsu, dest, sn->sn_features, npg, len, &rpc);
diff --git a/drivers/staging/lustre/lnet/selftest/conctl.c b/drivers/staging/lustre/lnet/selftest/conctl.c
index b786f8b4a73d..94383023c1be 100644
--- a/drivers/staging/lustre/lnet/selftest/conctl.c
+++ b/drivers/staging/lustre/lnet/selftest/conctl.c
@@ -315,7 +315,7 @@ lst_group_update_ioctl(lstio_group_update_args_t *args)
static int
lst_nodes_add_ioctl(lstio_group_nodes_args_t *args)
{
- unsigned feats;
+ unsigned int feats;
int rc;
char *name;
@@ -742,6 +742,10 @@ static int lst_test_add_ioctl(lstio_test_args_t *args)
PAGE_SIZE - sizeof(struct lstcon_test)))
return -EINVAL;
+ /* Enforce zero parameter length if there's no parameter */
+ if (!args->lstio_tes_param && args->lstio_tes_param_len)
+ return -EINVAL;
+
LIBCFS_ALLOC(batch_name, args->lstio_tes_bat_nmlen + 1);
if (!batch_name)
return rc;
diff --git a/drivers/staging/lustre/lnet/selftest/conrpc.c b/drivers/staging/lustre/lnet/selftest/conrpc.c
index 55afb53b0743..994422c62487 100644
--- a/drivers/staging/lustre/lnet/selftest/conrpc.c
+++ b/drivers/staging/lustre/lnet/selftest/conrpc.c
@@ -86,8 +86,9 @@ lstcon_rpc_done(struct srpc_client_rpc *rpc)
}
static int
-lstcon_rpc_init(struct lstcon_node *nd, int service, unsigned feats,
- int bulk_npg, int bulk_len, int embedded, struct lstcon_rpc *crpc)
+lstcon_rpc_init(struct lstcon_node *nd, int service, unsigned int feats,
+ int bulk_npg, int bulk_len, int embedded,
+ struct lstcon_rpc *crpc)
{
crpc->crp_rpc = sfw_create_rpc(nd->nd_id, service,
feats, bulk_npg, bulk_len,
@@ -111,7 +112,7 @@ lstcon_rpc_init(struct lstcon_node *nd, int service, unsigned feats,
}
static int
-lstcon_rpc_prep(struct lstcon_node *nd, int service, unsigned feats,
+lstcon_rpc_prep(struct lstcon_node *nd, int service, unsigned int feats,
int bulk_npg, int bulk_len, struct lstcon_rpc **crpcpp)
{
struct lstcon_rpc *crpc = NULL;
@@ -292,8 +293,8 @@ lstcon_rpc_trans_abort(struct lstcon_rpc_trans *trans, int error)
spin_lock(&rpc->crpc_lock);
- if (!crpc->crp_posted || /* not posted */
- crpc->crp_stamp) { /* rpc done or aborted already */
+ if (!crpc->crp_posted || /* not posted */
+ crpc->crp_stamp) { /* rpc done or aborted already */
if (!crpc->crp_stamp) {
crpc->crp_stamp = cfs_time_current();
crpc->crp_status = -EINTR;
@@ -589,7 +590,7 @@ lstcon_rpc_trans_destroy(struct lstcon_rpc_trans *trans)
int
lstcon_sesrpc_prep(struct lstcon_node *nd, int transop,
- unsigned feats, struct lstcon_rpc **crpc)
+ unsigned int feats, struct lstcon_rpc **crpc)
{
struct srpc_mksn_reqst *msrq;
struct srpc_rmsn_reqst *rsrq;
@@ -627,7 +628,8 @@ lstcon_sesrpc_prep(struct lstcon_node *nd, int transop,
}
int
-lstcon_dbgrpc_prep(struct lstcon_node *nd, unsigned feats, struct lstcon_rpc **crpc)
+lstcon_dbgrpc_prep(struct lstcon_node *nd, unsigned int feats,
+ struct lstcon_rpc **crpc)
{
struct srpc_debug_reqst *drq;
int rc;
@@ -645,7 +647,7 @@ lstcon_dbgrpc_prep(struct lstcon_node *nd, unsigned feats, struct lstcon_rpc **c
}
int
-lstcon_batrpc_prep(struct lstcon_node *nd, int transop, unsigned feats,
+lstcon_batrpc_prep(struct lstcon_node *nd, int transop, unsigned int feats,
struct lstcon_tsb_hdr *tsb, struct lstcon_rpc **crpc)
{
struct lstcon_batch *batch;
@@ -678,7 +680,8 @@ lstcon_batrpc_prep(struct lstcon_node *nd, int transop, unsigned feats,
}
int
-lstcon_statrpc_prep(struct lstcon_node *nd, unsigned feats, struct lstcon_rpc **crpc)
+lstcon_statrpc_prep(struct lstcon_node *nd, unsigned int feats,
+ struct lstcon_rpc **crpc)
{
struct srpc_stat_reqst *srq;
int rc;
@@ -776,7 +779,8 @@ lstcon_pingrpc_prep(lst_test_ping_param_t *param, struct srpc_test_reqst *req)
}
static int
-lstcon_bulkrpc_v0_prep(lst_test_bulk_param_t *param, struct srpc_test_reqst *req)
+lstcon_bulkrpc_v0_prep(lst_test_bulk_param_t *param,
+ struct srpc_test_reqst *req)
{
struct test_bulk_req *brq = &req->tsr_u.bulk_v0;
@@ -789,20 +793,21 @@ lstcon_bulkrpc_v0_prep(lst_test_bulk_param_t *param, struct srpc_test_reqst *req
}
static int
-lstcon_bulkrpc_v1_prep(lst_test_bulk_param_t *param, struct srpc_test_reqst *req)
+lstcon_bulkrpc_v1_prep(lst_test_bulk_param_t *param, bool is_client,
+ struct srpc_test_reqst *req)
{
struct test_bulk_req_v1 *brq = &req->tsr_u.bulk_v1;
brq->blk_opc = param->blk_opc;
brq->blk_flags = param->blk_flags;
brq->blk_len = param->blk_size;
- brq->blk_offset = 0; /* reserved */
+ brq->blk_offset = is_client ? param->blk_cli_off : param->blk_srv_off;
return 0;
}
int
-lstcon_testrpc_prep(struct lstcon_node *nd, int transop, unsigned feats,
+lstcon_testrpc_prep(struct lstcon_node *nd, int transop, unsigned int feats,
struct lstcon_test *test, struct lstcon_rpc **crpc)
{
struct lstcon_group *sgrp = test->tes_src_grp;
@@ -897,7 +902,8 @@ lstcon_testrpc_prep(struct lstcon_node *nd, int transop, unsigned feats,
&test->tes_param[0], trq);
} else {
rc = lstcon_bulkrpc_v1_prep((lst_test_bulk_param_t *)
- &test->tes_param[0], trq);
+ &test->tes_param[0],
+ trq->tsr_is_client, trq);
}
break;
@@ -1084,7 +1090,7 @@ lstcon_rpc_trans_ndlist(struct list_head *ndlist,
struct lstcon_ndlink *ndl;
struct lstcon_node *nd;
struct lstcon_rpc *rpc;
- unsigned feats;
+ unsigned int feats;
int rc;
/* Creating session RPG for list of nodes */
diff --git a/drivers/staging/lustre/lnet/selftest/conrpc.h b/drivers/staging/lustre/lnet/selftest/conrpc.h
index 7ec6fc96959e..e629e87c461c 100644
--- a/drivers/staging/lustre/lnet/selftest/conrpc.h
+++ b/drivers/staging/lustre/lnet/selftest/conrpc.h
@@ -78,8 +78,8 @@ struct lstcon_rpc_trans {
struct list_head tas_olink; /* link chain on owner list */
struct list_head tas_link; /* link chain on global list */
int tas_opc; /* operation code of transaction */
- unsigned tas_feats_updated; /* features mask is uptodate */
- unsigned tas_features; /* test features mask */
+ unsigned int tas_feats_updated; /* features mask is uptodate */
+ unsigned int tas_features; /* test features mask */
wait_queue_head_t tas_waitq; /* wait queue head */
atomic_t tas_remaining; /* # of un-scheduled rpcs */
struct list_head tas_rpcs_list; /* queued requests */
@@ -106,14 +106,16 @@ typedef int (*lstcon_rpc_readent_func_t)(int, struct srpc_msg *,
lstcon_rpc_ent_t __user *);
int lstcon_sesrpc_prep(struct lstcon_node *nd, int transop,
- unsigned version, struct lstcon_rpc **crpc);
+ unsigned int version, struct lstcon_rpc **crpc);
int lstcon_dbgrpc_prep(struct lstcon_node *nd,
- unsigned version, struct lstcon_rpc **crpc);
-int lstcon_batrpc_prep(struct lstcon_node *nd, int transop, unsigned version,
- struct lstcon_tsb_hdr *tsb, struct lstcon_rpc **crpc);
-int lstcon_testrpc_prep(struct lstcon_node *nd, int transop, unsigned version,
- struct lstcon_test *test, struct lstcon_rpc **crpc);
-int lstcon_statrpc_prep(struct lstcon_node *nd, unsigned version,
+ unsigned int version, struct lstcon_rpc **crpc);
+int lstcon_batrpc_prep(struct lstcon_node *nd, int transop,
+ unsigned int version, struct lstcon_tsb_hdr *tsb,
+ struct lstcon_rpc **crpc);
+int lstcon_testrpc_prep(struct lstcon_node *nd, int transop,
+ unsigned int version, struct lstcon_test *test,
+ struct lstcon_rpc **crpc);
+int lstcon_statrpc_prep(struct lstcon_node *nd, unsigned int version,
struct lstcon_rpc **crpc);
void lstcon_rpc_put(struct lstcon_rpc *crpc);
int lstcon_rpc_trans_prep(struct list_head *translist,
@@ -129,7 +131,8 @@ int lstcon_rpc_trans_interpreter(struct lstcon_rpc_trans *trans,
lstcon_rpc_readent_func_t readent);
void lstcon_rpc_trans_abort(struct lstcon_rpc_trans *trans, int error);
void lstcon_rpc_trans_destroy(struct lstcon_rpc_trans *trans);
-void lstcon_rpc_trans_addreq(struct lstcon_rpc_trans *trans, struct lstcon_rpc *req);
+void lstcon_rpc_trans_addreq(struct lstcon_rpc_trans *trans,
+ struct lstcon_rpc *req);
int lstcon_rpc_trans_postwait(struct lstcon_rpc_trans *trans, int timeout);
int lstcon_rpc_pinger_start(void);
void lstcon_rpc_pinger_stop(void);
diff --git a/drivers/staging/lustre/lnet/selftest/console.c b/drivers/staging/lustre/lnet/selftest/console.c
index a0fcbf3bcc95..1456d2395cc9 100644
--- a/drivers/staging/lustre/lnet/selftest/console.c
+++ b/drivers/staging/lustre/lnet/selftest/console.c
@@ -86,7 +86,7 @@ lstcon_node_find(lnet_process_id_t id, struct lstcon_node **ndpp, int create)
if (!create)
return -ENOENT;
- LIBCFS_ALLOC(*ndpp, sizeof(struct lstcon_node) + sizeof(struct lstcon_ndlink));
+ LIBCFS_ALLOC(*ndpp, sizeof(**ndpp) + sizeof(*ndl));
if (!*ndpp)
return -ENOMEM;
@@ -131,12 +131,12 @@ lstcon_node_put(struct lstcon_node *nd)
list_del(&ndl->ndl_link);
list_del(&ndl->ndl_hlink);
- LIBCFS_FREE(nd, sizeof(struct lstcon_node) + sizeof(struct lstcon_ndlink));
+ LIBCFS_FREE(nd, sizeof(*nd) + sizeof(*ndl));
}
static int
-lstcon_ndlink_find(struct list_head *hash,
- lnet_process_id_t id, struct lstcon_ndlink **ndlpp, int create)
+lstcon_ndlink_find(struct list_head *hash, lnet_process_id_t id,
+ struct lstcon_ndlink **ndlpp, int create)
{
unsigned int idx = LNET_NIDADDR(id.nid) % LST_NODE_HASHSIZE;
struct lstcon_ndlink *ndl;
@@ -230,7 +230,8 @@ lstcon_group_addref(struct lstcon_group *grp)
grp->grp_ref++;
}
-static void lstcon_group_ndlink_release(struct lstcon_group *, struct lstcon_ndlink *);
+static void lstcon_group_ndlink_release(struct lstcon_group *,
+ struct lstcon_ndlink *);
static void
lstcon_group_drain(struct lstcon_group *grp, int keep)
@@ -397,7 +398,8 @@ lstcon_sesrpc_readent(int transop, struct srpc_msg *msg,
static int
lstcon_group_nodes_add(struct lstcon_group *grp,
int count, lnet_process_id_t __user *ids_up,
- unsigned *featp, struct list_head __user *result_up)
+ unsigned int *featp,
+ struct list_head __user *result_up)
{
struct lstcon_rpc_trans *trans;
struct lstcon_ndlink *ndl;
@@ -542,7 +544,8 @@ lstcon_group_add(char *name)
int
lstcon_nodes_add(char *name, int count, lnet_process_id_t __user *ids_up,
- unsigned *featp, struct list_head __user *result_up)
+ unsigned int *featp,
+ struct list_head __user *result_up)
{
struct lstcon_group *grp;
int rc;
@@ -820,7 +823,7 @@ lstcon_group_info(char *name, lstcon_ndlist_ent_t __user *gents_p,
lstcon_group_decref(grp);
- return 0;
+ return rc;
}
static int
@@ -1181,7 +1184,8 @@ lstcon_testrpc_condition(int transop, struct lstcon_node *nd, void *arg)
}
static int
-lstcon_test_nodes_add(struct lstcon_test *test, struct list_head __user *result_up)
+lstcon_test_nodes_add(struct lstcon_test *test,
+ struct list_head __user *result_up)
{
struct lstcon_rpc_trans *trans;
struct lstcon_group *grp;
@@ -1364,7 +1368,8 @@ out:
}
static int
-lstcon_test_find(struct lstcon_batch *batch, int idx, struct lstcon_test **testpp)
+lstcon_test_find(struct lstcon_batch *batch, int idx,
+ struct lstcon_test **testpp)
{
struct lstcon_test *test;
@@ -1702,7 +1707,7 @@ lstcon_new_session_id(lst_sid_t *sid)
}
int
-lstcon_session_new(char *name, int key, unsigned feats,
+lstcon_session_new(char *name, int key, unsigned int feats,
int timeout, int force, lst_sid_t __user *sid_up)
{
int rc = 0;
@@ -1868,7 +1873,7 @@ lstcon_session_end(void)
}
int
-lstcon_session_feats_check(unsigned feats)
+lstcon_session_feats_check(unsigned int feats)
{
int rc = 0;
diff --git a/drivers/staging/lustre/lnet/selftest/console.h b/drivers/staging/lustre/lnet/selftest/console.h
index 78388a611c22..5dc1de48a10e 100644
--- a/drivers/staging/lustre/lnet/selftest/console.h
+++ b/drivers/staging/lustre/lnet/selftest/console.h
@@ -92,14 +92,16 @@ struct lstcon_batch {
int bat_ntest; /* # of test */
int bat_state; /* state of the batch */
int bat_arg; /* parameter for run|stop, timeout
- * for run, force for stop */
+ * for run, force for stop
+ */
char bat_name[LST_NAME_SIZE];/* name of batch */
struct list_head bat_test_list; /* list head of tests (struct lstcon_test)
*/
struct list_head bat_trans_list; /* list head of transaction */
struct list_head bat_cli_list; /* list head of client nodes
- * (struct lstcon_node) */
+ * (struct lstcon_node)
+ */
struct list_head *bat_cli_hash; /* hash table of client nodes */
struct list_head bat_srv_list; /* list head of server nodes */
struct list_head *bat_srv_hash; /* hash table of server nodes */
@@ -144,13 +146,14 @@ struct lstcon_session {
int ses_timeout; /* timeout in seconds */
time64_t ses_laststamp; /* last operation stamp (seconds)
*/
- unsigned ses_features; /* tests features of the session
+ unsigned int ses_features; /* tests features of the session
*/
- unsigned ses_feats_updated:1; /* features are synced with
- * remote test nodes */
- unsigned ses_force:1; /* force creating */
- unsigned ses_shutdown:1; /* session is shutting down */
- unsigned ses_expired:1; /* console is timedout */
+ unsigned int ses_feats_updated:1; /* features are synced with
+ * remote test nodes
+ */
+ unsigned int ses_force:1; /* force creating */
+ unsigned int ses_shutdown:1; /* session is shutting down */
+ unsigned int ses_expired:1; /* console is timedout */
__u64 ses_id_cookie; /* batch id cookie */
char ses_name[LST_NAME_SIZE];/* session name */
struct lstcon_rpc_trans *ses_ping; /* session pinger */
@@ -188,14 +191,14 @@ int lstcon_ioctl_entry(unsigned int cmd, struct libcfs_ioctl_hdr *hdr);
int lstcon_console_init(void);
int lstcon_console_fini(void);
int lstcon_session_match(lst_sid_t sid);
-int lstcon_session_new(char *name, int key, unsigned version,
+int lstcon_session_new(char *name, int key, unsigned int version,
int timeout, int flags, lst_sid_t __user *sid_up);
int lstcon_session_info(lst_sid_t __user *sid_up, int __user *key,
unsigned __user *verp, lstcon_ndlist_ent_t __user *entp,
char __user *name_up, int len);
int lstcon_session_end(void);
int lstcon_session_debug(int timeout, struct list_head __user *result_up);
-int lstcon_session_feats_check(unsigned feats);
+int lstcon_session_feats_check(unsigned int feats);
int lstcon_batch_debug(int timeout, char *name,
int client, struct list_head __user *result_up);
int lstcon_group_debug(int timeout, char *name,
@@ -207,7 +210,7 @@ int lstcon_group_del(char *name);
int lstcon_group_clean(char *name, int args);
int lstcon_group_refresh(char *name, struct list_head __user *result_up);
int lstcon_nodes_add(char *name, int nnd, lnet_process_id_t __user *nds_up,
- unsigned *featp, struct list_head __user *result_up);
+ unsigned int *featp, struct list_head __user *result_up);
int lstcon_nodes_remove(char *name, int nnd, lnet_process_id_t __user *nds_up,
struct list_head __user *result_up);
int lstcon_group_info(char *name, lstcon_ndlist_ent_t __user *gent_up,
diff --git a/drivers/staging/lustre/lnet/selftest/framework.c b/drivers/staging/lustre/lnet/selftest/framework.c
index abbd6287b4bd..48dcc330dc9b 100644
--- a/drivers/staging/lustre/lnet/selftest/framework.c
+++ b/drivers/staging/lustre/lnet/selftest/framework.c
@@ -131,7 +131,8 @@ sfw_find_test_case(int id)
}
static int
-sfw_register_test(struct srpc_service *service, struct sfw_test_client_ops *cliops)
+sfw_register_test(struct srpc_service *service,
+ struct sfw_test_client_ops *cliops)
{
struct sfw_test_case *tsc;
@@ -254,7 +255,7 @@ sfw_session_expired(void *data)
static inline void
sfw_init_session(struct sfw_session *sn, lst_sid_t sid,
- unsigned features, const char *name)
+ unsigned int features, const char *name)
{
struct stt_timer *timer = &sn->sn_timer;
@@ -469,7 +470,8 @@ sfw_make_session(struct srpc_mksn_reqst *request, struct srpc_mksn_reply *reply)
}
static int
-sfw_remove_session(struct srpc_rmsn_reqst *request, struct srpc_rmsn_reply *reply)
+sfw_remove_session(struct srpc_rmsn_reqst *request,
+ struct srpc_rmsn_reply *reply)
{
struct sfw_session *sn = sfw_data.fw_session;
@@ -501,7 +503,8 @@ sfw_remove_session(struct srpc_rmsn_reqst *request, struct srpc_rmsn_reply *repl
}
static int
-sfw_debug_session(struct srpc_debug_reqst *request, struct srpc_debug_reply *reply)
+sfw_debug_session(struct srpc_debug_reqst *request,
+ struct srpc_debug_reply *reply)
{
struct sfw_session *sn = sfw_data.fw_session;
@@ -897,7 +900,7 @@ sfw_test_rpc_done(struct srpc_client_rpc *rpc)
int
sfw_create_test_rpc(struct sfw_test_unit *tsu, lnet_process_id_t peer,
- unsigned features, int nblk, int blklen,
+ unsigned int features, int nblk, int blklen,
struct srpc_client_rpc **rpcpp)
{
struct srpc_client_rpc *rpc = NULL;
@@ -1064,7 +1067,8 @@ sfw_stop_batch(struct sfw_batch *tsb, int force)
}
static int
-sfw_query_batch(struct sfw_batch *tsb, int testidx, struct srpc_batch_reply *reply)
+sfw_query_batch(struct sfw_batch *tsb, int testidx,
+ struct srpc_batch_reply *reply)
{
struct sfw_test_instance *tsi;
@@ -1101,7 +1105,7 @@ sfw_alloc_pages(struct srpc_server_rpc *rpc, int cpt, int npages, int len,
LASSERT(!rpc->srpc_bulk);
LASSERT(npages > 0 && npages <= LNET_MAX_IOV);
- rpc->srpc_bulk = srpc_alloc_bulk(cpt, npages, len, sink);
+ rpc->srpc_bulk = srpc_alloc_bulk(cpt, 0, npages, len, sink);
if (!rpc->srpc_bulk)
return -ENOMEM;
@@ -1179,7 +1183,8 @@ sfw_add_test(struct srpc_server_rpc *rpc)
}
static int
-sfw_control_batch(struct srpc_batch_reqst *request, struct srpc_batch_reply *reply)
+sfw_control_batch(struct srpc_batch_reqst *request,
+ struct srpc_batch_reply *reply)
{
struct sfw_session *sn = sfw_data.fw_session;
int rc = 0;
@@ -1225,7 +1230,7 @@ sfw_handle_server_rpc(struct srpc_server_rpc *rpc)
struct srpc_service *sv = rpc->srpc_scd->scd_svc;
struct srpc_msg *reply = &rpc->srpc_replymsg;
struct srpc_msg *request = &rpc->srpc_reqstbuf->buf_msg;
- unsigned features = LST_FEATS_MASK;
+ unsigned int features = LST_FEATS_MASK;
int rc = 0;
LASSERT(!sfw_data.fw_active_srpc);
@@ -1375,7 +1380,7 @@ sfw_bulk_ready(struct srpc_server_rpc *rpc, int status)
struct srpc_client_rpc *
sfw_create_rpc(lnet_process_id_t peer, int service,
- unsigned features, int nbulkiov, int bulklen,
+ unsigned int features, int nbulkiov, int bulklen,
void (*done)(struct srpc_client_rpc *), void *priv)
{
struct srpc_client_rpc *rpc = NULL;
diff --git a/drivers/staging/lustre/lnet/selftest/ping_test.c b/drivers/staging/lustre/lnet/selftest/ping_test.c
index 9331ca4e3606..b9601b00a273 100644
--- a/drivers/staging/lustre/lnet/selftest/ping_test.c
+++ b/drivers/staging/lustre/lnet/selftest/ping_test.c
@@ -159,8 +159,8 @@ ping_client_done_rpc(struct sfw_test_unit *tsu, struct srpc_client_rpc *rpc)
ktime_get_real_ts64(&ts);
CDEBUG(D_NET, "%d reply in %u usec\n", reply->pnr_seq,
- (unsigned)((ts.tv_sec - reqst->pnr_time_sec) * 1000000 +
- (ts.tv_nsec / NSEC_PER_USEC - reqst->pnr_time_usec)));
+ (unsigned int)((ts.tv_sec - reqst->pnr_time_sec) * 1000000 +
+ (ts.tv_nsec / NSEC_PER_USEC - reqst->pnr_time_usec)));
}
static int
diff --git a/drivers/staging/lustre/lnet/selftest/rpc.c b/drivers/staging/lustre/lnet/selftest/rpc.c
index f5619d8744ef..ce9de8c9be57 100644
--- a/drivers/staging/lustre/lnet/selftest/rpc.c
+++ b/drivers/staging/lustre/lnet/selftest/rpc.c
@@ -84,14 +84,13 @@ void srpc_set_counters(const srpc_counters_t *cnt)
}
static int
-srpc_add_bulk_page(struct srpc_bulk *bk, struct page *pg, int i, int nob)
+srpc_add_bulk_page(struct srpc_bulk *bk, struct page *pg, int i, int off,
+ int nob)
{
- nob = min_t(int, nob, PAGE_SIZE);
+ LASSERT(off < PAGE_SIZE);
+ LASSERT(nob > 0 && nob <= PAGE_SIZE);
- LASSERT(nob > 0);
- LASSERT(i >= 0 && i < bk->bk_niov);
-
- bk->bk_iovs[i].bv_offset = 0;
+ bk->bk_iovs[i].bv_offset = off;
bk->bk_iovs[i].bv_page = pg;
bk->bk_iovs[i].bv_len = nob;
return nob;
@@ -117,7 +116,8 @@ srpc_free_bulk(struct srpc_bulk *bk)
}
struct srpc_bulk *
-srpc_alloc_bulk(int cpt, unsigned bulk_npg, unsigned bulk_len, int sink)
+srpc_alloc_bulk(int cpt, unsigned int bulk_off, unsigned int bulk_npg,
+ unsigned int bulk_len, int sink)
{
struct srpc_bulk *bk;
int i;
@@ -148,8 +148,11 @@ srpc_alloc_bulk(int cpt, unsigned bulk_npg, unsigned bulk_len, int sink)
return NULL;
}
- nob = srpc_add_bulk_page(bk, pg, i, bulk_len);
+ nob = min_t(unsigned int, bulk_off + bulk_len, PAGE_SIZE) -
+ bulk_off;
+ srpc_add_bulk_page(bk, pg, i, bulk_off, nob);
bulk_len -= nob;
+ bulk_off = 0;
}
return bk;
@@ -693,7 +696,8 @@ srpc_finish_service(struct srpc_service *sv)
/* called with sv->sv_lock held */
static void
-srpc_service_recycle_buffer(struct srpc_service_cd *scd, struct srpc_buffer *buf)
+srpc_service_recycle_buffer(struct srpc_service_cd *scd,
+ struct srpc_buffer *buf)
__must_hold(&scd->scd_lock)
{
if (!scd->scd_svc->sv_shuttingdown && scd->scd_buf_adjust >= 0) {
diff --git a/drivers/staging/lustre/lnet/selftest/rpc.h b/drivers/staging/lustre/lnet/selftest/rpc.h
index 4ab2ee264004..f353a634cc8e 100644
--- a/drivers/staging/lustre/lnet/selftest/rpc.h
+++ b/drivers/staging/lustre/lnet/selftest/rpc.h
@@ -113,7 +113,8 @@ struct srpc_join_reply {
__u32 join_status; /* returned status */
lst_sid_t join_sid; /* session id */
__u32 join_timeout; /* # seconds' inactivity to
- * expire */
+ * expire
+ */
char join_session[LST_NAME_SIZE]; /* session name */
} WIRE_ATTR;
@@ -175,7 +176,7 @@ struct test_bulk_req_v1 {
__u16 blk_opc; /* bulk operation code */
__u16 blk_flags; /* data check flags */
__u32 blk_len; /* data length */
- __u32 blk_offset; /* reserved: offset */
+ __u32 blk_offset; /* offset */
} WIRE_ATTR;
struct test_ping_req {
@@ -190,7 +191,8 @@ struct srpc_test_reqst {
lst_bid_t tsr_bid; /* batch id */
__u32 tsr_service; /* test type: bulk|ping|... */
__u32 tsr_loop; /* test client loop count or
- * # server buffers needed */
+ * # server buffers needed
+ */
__u32 tsr_concur; /* concurrency of test */
__u8 tsr_is_client; /* is test client or not */
__u8 tsr_stop_onerr; /* stop on error */
diff --git a/drivers/staging/lustre/lnet/selftest/selftest.h b/drivers/staging/lustre/lnet/selftest/selftest.h
index d033ac03d953..c8833a016b6d 100644
--- a/drivers/staging/lustre/lnet/selftest/selftest.h
+++ b/drivers/staging/lustre/lnet/selftest/selftest.h
@@ -131,7 +131,8 @@ srpc_service2reply(int service)
enum srpc_event_type {
SRPC_BULK_REQ_RCVD = 1, /* passive bulk request(PUT sink/GET source)
- * received */
+ * received
+ */
SRPC_BULK_PUT_SENT = 2, /* active bulk PUT sent (source) */
SRPC_BULK_GET_RPLD = 3, /* active bulk GET replied (sink) */
SRPC_REPLY_RCVD = 4, /* incoming reply received */
@@ -295,7 +296,8 @@ struct srpc_service_cd {
#define SFW_TEST_WI_MIN 256
#define SFW_TEST_WI_MAX 2048
/* extra buffers for tolerating buggy peers, or unbalanced number
- * of peers between partitions */
+ * of peers between partitions
+ */
#define SFW_TEST_WI_EXTRA 64
/* number of server workitems (mini-thread) for framework service */
@@ -347,9 +349,11 @@ struct sfw_batch {
struct sfw_test_client_ops {
int (*tso_init)(struct sfw_test_instance *tsi); /* initialize test
- * client */
+ * client
+ */
void (*tso_fini)(struct sfw_test_instance *tsi); /* finalize test
- * client */
+ * client
+ */
int (*tso_prep_rpc)(struct sfw_test_unit *tsu,
lnet_process_id_t dest,
struct srpc_client_rpc **rpc); /* prep a tests rpc */
@@ -374,7 +378,8 @@ struct sfw_test_instance {
spinlock_t tsi_lock; /* serialize */
unsigned int tsi_stopping:1; /* test is stopping */
atomic_t tsi_nactive; /* # of active test
- * unit */
+ * unit
+ */
struct list_head tsi_units; /* test units */
struct list_head tsi_free_rpcs; /* free rpcs */
struct list_head tsi_active_rpcs; /* active rpcs */
@@ -386,8 +391,10 @@ struct sfw_test_instance {
} tsi_u;
};
-/* XXX: trailing (PAGE_SIZE % sizeof(lnet_process_id_t)) bytes at the end of
- * pages are not used */
+/*
+ * XXX: trailing (PAGE_SIZE % sizeof(lnet_process_id_t)) bytes at the end of
+ * pages are not used
+ */
#define SFW_MAX_CONCUR LST_MAX_CONCUR
#define SFW_ID_PER_PAGE (PAGE_SIZE / sizeof(lnet_process_id_packed_t))
#define SFW_MAX_NDESTS (LNET_MAX_IOV * SFW_ID_PER_PAGE)
@@ -410,10 +417,10 @@ struct sfw_test_case {
struct srpc_client_rpc *
sfw_create_rpc(lnet_process_id_t peer, int service,
- unsigned features, int nbulkiov, int bulklen,
+ unsigned int features, int nbulkiov, int bulklen,
void (*done)(struct srpc_client_rpc *), void *priv);
int sfw_create_test_rpc(struct sfw_test_unit *tsu,
- lnet_process_id_t peer, unsigned features,
+ lnet_process_id_t peer, unsigned int features,
int nblk, int blklen, struct srpc_client_rpc **rpc);
void sfw_abort_rpc(struct srpc_client_rpc *rpc);
void sfw_post_rpc(struct srpc_client_rpc *rpc);
@@ -434,8 +441,9 @@ srpc_create_client_rpc(lnet_process_id_t peer, int service,
void srpc_post_rpc(struct srpc_client_rpc *rpc);
void srpc_abort_rpc(struct srpc_client_rpc *rpc, int why);
void srpc_free_bulk(struct srpc_bulk *bk);
-struct srpc_bulk *srpc_alloc_bulk(int cpt, unsigned bulk_npg,
- unsigned bulk_len, int sink);
+struct srpc_bulk *srpc_alloc_bulk(int cpt, unsigned int off,
+ unsigned int bulk_npg, unsigned int bulk_len,
+ int sink);
int srpc_send_rpc(struct swi_workitem *wi);
int srpc_send_reply(struct srpc_server_rpc *rpc);
int srpc_add_service(struct srpc_service *sv);
diff --git a/drivers/staging/lustre/lnet/selftest/timer.c b/drivers/staging/lustre/lnet/selftest/timer.c
index dcd22580b1f0..2fe692df19d0 100644
--- a/drivers/staging/lustre/lnet/selftest/timer.c
+++ b/drivers/staging/lustre/lnet/selftest/timer.c
@@ -46,16 +46,17 @@
* to cover a time period of 1024 seconds into the future before wrapping.
*/
#define STTIMER_MINPOLL 3 /* log2 min poll interval (8 s) */
-#define STTIMER_SLOTTIME (1 << STTIMER_MINPOLL)
+#define STTIMER_SLOTTIME BIT(STTIMER_MINPOLL)
#define STTIMER_SLOTTIMEMASK (~(STTIMER_SLOTTIME - 1))
-#define STTIMER_NSLOTS (1 << 7)
+#define STTIMER_NSLOTS BIT(7)
#define STTIMER_SLOT(t) (&stt_data.stt_hash[(((t) >> STTIMER_MINPOLL) & \
(STTIMER_NSLOTS - 1))])
static struct st_timer_data {
spinlock_t stt_lock;
unsigned long stt_prev_slot; /* start time of the slot processed
- * previously */
+ * previously
+ */
struct list_head stt_hash[STTIMER_NSLOTS];
int stt_shuttingdown;
wait_queue_head_t stt_waitq;
diff --git a/drivers/staging/lustre/lustre/fid/fid_request.c b/drivers/staging/lustre/lustre/fid/fid_request.c
index edd72b926f81..999f250ceed0 100644
--- a/drivers/staging/lustre/lustre/fid/fid_request.c
+++ b/drivers/staging/lustre/lustre/fid/fid_request.c
@@ -74,7 +74,7 @@ static int seq_client_rpc(struct lu_client_seq *seq,
/* Zero out input range, this is not recovery yet. */
in = req_capsule_client_get(&req->rq_pill, &RMF_SEQ_RANGE);
- range_init(in);
+ lu_seq_range_init(in);
ptlrpc_request_set_replen(req);
@@ -112,25 +112,21 @@ static int seq_client_rpc(struct lu_client_seq *seq,
ptlrpc_at_set_req_timeout(req);
- if (opc != SEQ_ALLOC_SUPER && seq->lcs_type == LUSTRE_SEQ_METADATA)
- mdc_get_rpc_lock(exp->exp_obd->u.cli.cl_rpc_lock, NULL);
rc = ptlrpc_queue_wait(req);
- if (opc != SEQ_ALLOC_SUPER && seq->lcs_type == LUSTRE_SEQ_METADATA)
- mdc_put_rpc_lock(exp->exp_obd->u.cli.cl_rpc_lock, NULL);
if (rc)
goto out_req;
out = req_capsule_server_get(&req->rq_pill, &RMF_SEQ_RANGE);
*output = *out;
- if (!range_is_sane(output)) {
+ if (!lu_seq_range_is_sane(output)) {
CERROR("%s: Invalid range received from server: "
DRANGE "\n", seq->lcs_name, PRANGE(output));
rc = -EINVAL;
goto out_req;
}
- if (range_is_exhausted(output)) {
+ if (lu_seq_range_is_exhausted(output)) {
CERROR("%s: Range received from server is exhausted: "
DRANGE "]\n", seq->lcs_name, PRANGE(output));
rc = -EINVAL;
@@ -170,9 +166,9 @@ static int seq_client_alloc_seq(const struct lu_env *env,
{
int rc;
- LASSERT(range_is_sane(&seq->lcs_space));
+ LASSERT(lu_seq_range_is_sane(&seq->lcs_space));
- if (range_is_exhausted(&seq->lcs_space)) {
+ if (lu_seq_range_is_exhausted(&seq->lcs_space)) {
rc = seq_client_alloc_meta(env, seq);
if (rc) {
CERROR("%s: Can't allocate new meta-sequence, rc %d\n",
@@ -185,7 +181,7 @@ static int seq_client_alloc_seq(const struct lu_env *env,
rc = 0;
}
- LASSERT(!range_is_exhausted(&seq->lcs_space));
+ LASSERT(!lu_seq_range_is_exhausted(&seq->lcs_space));
*seqnr = seq->lcs_space.lsr_start;
seq->lcs_space.lsr_start += 1;
@@ -320,7 +316,7 @@ void seq_client_flush(struct lu_client_seq *seq)
seq->lcs_space.lsr_index = -1;
- range_init(&seq->lcs_space);
+ lu_seq_range_init(&seq->lcs_space);
mutex_unlock(&seq->lcs_mutex);
}
EXPORT_SYMBOL(seq_client_flush);
diff --git a/drivers/staging/lustre/lustre/fid/lproc_fid.c b/drivers/staging/lustre/lustre/fid/lproc_fid.c
index 3ed32d77f38b..97d4849c7199 100644
--- a/drivers/staging/lustre/lustre/fid/lproc_fid.c
+++ b/drivers/staging/lustre/lustre/fid/lproc_fid.c
@@ -83,7 +83,7 @@ ldebugfs_fid_write_common(const char __user *buffer, size_t count,
(unsigned long long *)&tmp.lsr_end);
if (rc != 2)
return -EINVAL;
- if (!range_is_sane(&tmp) || range_is_zero(&tmp) ||
+ if (!lu_seq_range_is_sane(&tmp) || lu_seq_range_is_zero(&tmp) ||
tmp.lsr_start < range->lsr_start || tmp.lsr_end > range->lsr_end)
return -EINVAL;
*range = tmp;
diff --git a/drivers/staging/lustre/lustre/fld/fld_cache.c b/drivers/staging/lustre/lustre/fld/fld_cache.c
index 0100a935f4ff..11f697496180 100644
--- a/drivers/staging/lustre/lustre/fld/fld_cache.c
+++ b/drivers/staging/lustre/lustre/fld/fld_cache.c
@@ -143,7 +143,7 @@ restart_fixup:
c_range = &f_curr->fce_range;
n_range = &f_next->fce_range;
- LASSERT(range_is_sane(c_range));
+ LASSERT(lu_seq_range_is_sane(c_range));
if (&f_next->fce_list == head)
break;
@@ -358,7 +358,7 @@ struct fld_cache_entry
{
struct fld_cache_entry *f_new;
- LASSERT(range_is_sane(range));
+ LASSERT(lu_seq_range_is_sane(range));
f_new = kzalloc(sizeof(*f_new), GFP_NOFS);
if (!f_new)
@@ -503,7 +503,7 @@ int fld_cache_lookup(struct fld_cache *cache,
}
prev = flde;
- if (range_within(&flde->fce_range, seq)) {
+ if (lu_seq_range_within(&flde->fce_range, seq)) {
*range = flde->fce_range;
cache->fci_stat.fst_cache++;
diff --git a/drivers/staging/lustre/lustre/fld/fld_internal.h b/drivers/staging/lustre/lustre/fld/fld_internal.h
index 08eaec735d6f..4a7f0b71c48d 100644
--- a/drivers/staging/lustre/lustre/fld/fld_internal.h
+++ b/drivers/staging/lustre/lustre/fld/fld_internal.h
@@ -62,11 +62,6 @@
#include "../include/lustre_req_layout.h"
#include "../include/lustre_fld.h"
-enum {
- LUSTRE_FLD_INIT = 1 << 0,
- LUSTRE_FLD_RUN = 1 << 1
-};
-
struct fld_stats {
__u64 fst_count;
__u64 fst_cache;
diff --git a/drivers/staging/lustre/lustre/fld/fld_request.c b/drivers/staging/lustre/lustre/fld/fld_request.c
index 0de72b717ce5..4cade7a16800 100644
--- a/drivers/staging/lustre/lustre/fld/fld_request.c
+++ b/drivers/staging/lustre/lustre/fld/fld_request.c
@@ -159,11 +159,6 @@ int fld_client_add_target(struct lu_client_fld *fld,
LASSERT(name);
LASSERT(tar->ft_srv || tar->ft_exp);
- if (fld->lcf_flags != LUSTRE_FLD_INIT) {
- CERROR("%s: Attempt to add target %s (idx %llu) on fly - skip it\n",
- fld->lcf_name, name, tar->ft_idx);
- return 0;
- }
CDEBUG(D_INFO, "%s: Adding target %s (idx %llu)\n",
fld->lcf_name, name, tar->ft_idx);
@@ -282,7 +277,6 @@ int fld_client_init(struct lu_client_fld *fld,
fld->lcf_count = 0;
spin_lock_init(&fld->lcf_lock);
fld->lcf_hash = &fld_hash[hash];
- fld->lcf_flags = LUSTRE_FLD_INIT;
INIT_LIST_HEAD(&fld->lcf_targets);
cache_size = FLD_CLIENT_CACHE_SIZE /
@@ -421,8 +415,6 @@ int fld_client_lookup(struct lu_client_fld *fld, u64 seq, u32 *mds,
struct lu_fld_target *target;
int rc;
- fld->lcf_flags |= LUSTRE_FLD_RUN;
-
rc = fld_cache_lookup(fld->lcf_cache, seq, &res);
if (rc == 0) {
*mds = res.lsr_index;
diff --git a/drivers/staging/lustre/lustre/include/cl_object.h b/drivers/staging/lustre/lustre/include/cl_object.h
index 89292c93dcd5..dc685610c4c4 100644
--- a/drivers/staging/lustre/lustre/include/cl_object.h
+++ b/drivers/staging/lustre/lustre/include/cl_object.h
@@ -59,10 +59,6 @@
* read/write system call it is associated with the single user
* thread, that issued the system call).
*
- * - cl_req represents a collection of pages for a transfer. cl_req is
- * constructed by req-forming engine that tries to saturate
- * transport with large and continuous transfers.
- *
* Terminology
*
* - to avoid confusion high-level I/O operation like read or write system
@@ -103,11 +99,8 @@
struct inode;
struct cl_device;
-struct cl_device_operations;
struct cl_object;
-struct cl_object_page_operations;
-struct cl_object_lock_operations;
struct cl_page;
struct cl_page_slice;
@@ -120,27 +113,7 @@ struct cl_page_operations;
struct cl_io;
struct cl_io_slice;
-struct cl_req;
-struct cl_req_slice;
-
-/**
- * Operations for each data device in the client stack.
- *
- * \see vvp_cl_ops, lov_cl_ops, lovsub_cl_ops, osc_cl_ops
- */
-struct cl_device_operations {
- /**
- * Initialize cl_req. This method is called top-to-bottom on all
- * devices in the stack to get them a chance to allocate layer-private
- * data, and to attach them to the cl_req by calling
- * cl_req_slice_add().
- *
- * \see osc_req_init(), lov_req_init(), lovsub_req_init()
- * \see vvp_req_init()
- */
- int (*cdo_req_init)(const struct lu_env *env, struct cl_device *dev,
- struct cl_req *req);
-};
+struct cl_req_attr;
/**
* Device in the client stack.
@@ -150,8 +123,6 @@ struct cl_device_operations {
struct cl_device {
/** Super-class. */
struct lu_device cd_lu_dev;
- /** Per-layer operation vector. */
- const struct cl_device_operations *cd_ops;
};
/** \addtogroup cl_object cl_object
@@ -267,7 +238,7 @@ struct cl_object_conf {
/**
* Object layout. This is consumed by lov.
*/
- struct lustre_md *coc_md;
+ struct lu_buf coc_layout;
/**
* Description of particular stripe location in the
* cluster. This is consumed by osc.
@@ -301,6 +272,26 @@ enum {
OBJECT_CONF_WAIT = 2
};
+enum {
+ CL_LAYOUT_GEN_NONE = (u32)-2, /* layout lock was cancelled */
+ CL_LAYOUT_GEN_EMPTY = (u32)-1, /* for empty layout */
+};
+
+struct cl_layout {
+ /** the buffer to return the layout in lov_mds_md format. */
+ struct lu_buf cl_buf;
+ /** size of layout in lov_mds_md format. */
+ size_t cl_size;
+ /** Layout generation. */
+ u32 cl_layout_gen;
+ /**
+ * True if this is a released file.
+ * Temporarily added for released file truncate in ll_setattr_raw().
+ * It will be removed later. -Jinshan
+ */
+ bool cl_is_released;
+};
+
/**
* Operations implemented for each cl object layer.
*
@@ -400,6 +391,27 @@ struct cl_object_operations {
*/
int (*coo_getstripe)(const struct lu_env *env, struct cl_object *obj,
struct lov_user_md __user *lum);
+ /**
+ * Get FIEMAP mapping from the object.
+ */
+ int (*coo_fiemap)(const struct lu_env *env, struct cl_object *obj,
+ struct ll_fiemap_info_key *fmkey,
+ struct fiemap *fiemap, size_t *buflen);
+ /**
+ * Get layout and generation of the object.
+ */
+ int (*coo_layout_get)(const struct lu_env *env, struct cl_object *obj,
+ struct cl_layout *layout);
+ /**
+ * Get maximum size of the object.
+ */
+ loff_t (*coo_maxbytes)(struct cl_object *obj);
+ /**
+ * Set request attributes.
+ */
+ void (*coo_req_attr_set)(const struct lu_env *env,
+ struct cl_object *obj,
+ struct cl_req_attr *attr);
};
/**
@@ -591,7 +603,7 @@ enum cl_page_state {
*
* - [cl_page_state::CPS_PAGEOUT] page is dirty, the
* req-formation engine decides that it wants to include this page
- * into an cl_req being constructed, and yanks it from the cache;
+ * into an RPC being constructed, and yanks it from the cache;
*
* - [cl_page_state::CPS_FREEING] VM callback is executed to
* evict the page form the memory;
@@ -660,7 +672,7 @@ enum cl_page_state {
* Page is being read in, as a part of a transfer. This is quite
* similar to the cl_page_state::CPS_PAGEOUT state, except that
* read-in is always "immediate"---there is no such thing a sudden
- * construction of read cl_req from cached, presumably not up to date,
+ * construction of read request from cached, presumably not up to date,
* pages.
*
* Underlying VM page is locked for the duration of transfer.
@@ -714,8 +726,6 @@ struct cl_page {
struct list_head cp_batch;
/** List of slices. Immutable after creation. */
struct list_head cp_layers;
- /** Linkage of pages within cl_req. */
- struct list_head cp_flight;
/**
* Page state. This field is const to avoid accidental update, it is
* modified only internally within cl_page.c. Protected by a VM lock.
@@ -732,12 +742,6 @@ struct cl_page {
* by sub-io. Protected by a VM lock.
*/
struct cl_io *cp_owner;
- /**
- * Owning IO request in cl_page_state::CPS_PAGEOUT and
- * cl_page_state::CPS_PAGEIN states. This field is maintained only in
- * the top-level pages. Protected by a VM lock.
- */
- struct cl_req *cp_req;
/** List of references to this page, for debugging. */
struct lu_ref cp_reference;
/** Link to an object, for debugging. */
@@ -779,7 +783,6 @@ enum cl_lock_mode {
/**
* Requested transfer type.
- * \ingroup cl_req
*/
enum cl_req_type {
CRT_READ,
@@ -884,26 +887,6 @@ struct cl_page_operations {
/** Destructor. Frees resources and slice itself. */
void (*cpo_fini)(const struct lu_env *env,
struct cl_page_slice *slice);
-
- /**
- * Checks whether the page is protected by a cl_lock. This is a
- * per-layer method, because certain layers have ways to check for the
- * lock much more efficiently than through the generic locks scan, or
- * implement locking mechanisms separate from cl_lock, e.g.,
- * LL_FILE_GROUP_LOCKED in vvp. If \a pending is true, check for locks
- * being canceled, or scheduled for cancellation as soon as the last
- * user goes away, too.
- *
- * \retval -EBUSY: page is protected by a lock of a given mode;
- * \retval -ENODATA: page is not protected by a lock;
- * \retval 0: this layer cannot decide.
- *
- * \see cl_page_is_under_lock()
- */
- int (*cpo_is_under_lock)(const struct lu_env *env,
- const struct cl_page_slice *slice,
- struct cl_io *io, pgoff_t *max);
-
/**
* Optional debugging helper. Prints given page slice.
*
@@ -915,8 +898,7 @@ struct cl_page_operations {
/**
* \name transfer
*
- * Transfer methods. See comment on cl_req for a description of
- * transfer formation and life-cycle.
+ * Transfer methods.
*
* @{
*/
@@ -962,7 +944,7 @@ struct cl_page_operations {
int ioret);
/**
* Called when cached page is about to be added to the
- * cl_req as a part of req formation.
+ * ptlrpc request as a part of req formation.
*
* \return 0 : proceed with this page;
* \return -EAGAIN : skip this page;
@@ -1365,7 +1347,6 @@ struct cl_2queue {
* (3) sort all locks to avoid dead-locks, and acquire them
*
* (4) process the chunk: call per-page methods
- * (cl_io_operations::cio_read_page() for read,
* cl_io_operations::cio_prepare_write(),
* cl_io_operations::cio_commit_write() for write)
*
@@ -1388,6 +1369,8 @@ enum cl_io_type {
CIT_WRITE,
/** truncate, utime system calls */
CIT_SETATTR,
+ /** get data version */
+ CIT_DATA_VERSION,
/**
* page fault handling
*/
@@ -1467,6 +1450,31 @@ struct cl_io_slice {
typedef void (*cl_commit_cbt)(const struct lu_env *, struct cl_io *,
struct cl_page *);
+
+struct cl_read_ahead {
+ /*
+ * Maximum page index the readahead window will end.
+ * This is determined DLM lock coverage, RPC and stripe boundary.
+ * cra_end is included.
+ */
+ pgoff_t cra_end;
+ /*
+ * Release routine. If readahead holds resources underneath, this
+ * function should be called to release it.
+ */
+ void (*cra_release)(const struct lu_env *env, void *cbdata);
+ /* Callback data for cra_release routine */
+ void *cra_cbdata;
+};
+
+static inline void cl_read_ahead_release(const struct lu_env *env,
+ struct cl_read_ahead *ra)
+{
+ if (ra->cra_release)
+ ra->cra_release(env, ra->cra_cbdata);
+ memset(ra, 0, sizeof(*ra));
+}
+
/**
* Per-layer io operations.
* \see vvp_io_ops, lov_io_ops, lovsub_io_ops, osc_io_ops
@@ -1573,16 +1581,13 @@ struct cl_io_operations {
struct cl_page_list *queue, int from, int to,
cl_commit_cbt cb);
/**
- * Read missing page.
- *
- * Called by a top-level cl_io_operations::op[CIT_READ]::cio_start()
- * method, when it hits not-up-to-date page in the range. Optional.
+ * Decide maximum read ahead extent
*
* \pre io->ci_type == CIT_READ
*/
- int (*cio_read_page)(const struct lu_env *env,
- const struct cl_io_slice *slice,
- const struct cl_page_slice *page);
+ int (*cio_read_ahead)(const struct lu_env *env,
+ const struct cl_io_slice *slice,
+ pgoff_t start, struct cl_read_ahead *ra);
/**
* Optional debugging helper. Print given io slice.
*/
@@ -1765,10 +1770,15 @@ struct cl_io {
struct cl_io_rw_common ci_rw;
struct cl_setattr_io {
struct ost_lvb sa_attr;
+ unsigned int sa_attr_flags;
unsigned int sa_valid;
int sa_stripe_index;
- struct lu_fid *sa_parent_fid;
+ const struct lu_fid *sa_parent_fid;
} ci_setattr;
+ struct cl_data_version_io {
+ u64 dv_data_version;
+ int dv_flags;
+ } ci_data_version;
struct cl_fault_io {
/** page index within file. */
pgoff_t ft_index;
@@ -1836,179 +1846,20 @@ struct cl_io {
/** @} cl_io */
-/** \addtogroup cl_req cl_req
- * @{
- */
-/** \struct cl_req
- * Transfer.
- *
- * There are two possible modes of transfer initiation on the client:
- *
- * - immediate transfer: this is started when a high level io wants a page
- * or a collection of pages to be transferred right away. Examples:
- * read-ahead, synchronous read in the case of non-page aligned write,
- * page write-out as a part of extent lock cancellation, page write-out
- * as a part of memory cleansing. Immediate transfer can be both
- * cl_req_type::CRT_READ and cl_req_type::CRT_WRITE;
- *
- * - opportunistic transfer (cl_req_type::CRT_WRITE only), that happens
- * when io wants to transfer a page to the server some time later, when
- * it can be done efficiently. Example: pages dirtied by the write(2)
- * path.
- *
- * In any case, transfer takes place in the form of a cl_req, which is a
- * representation for a network RPC.
- *
- * Pages queued for an opportunistic transfer are cached until it is decided
- * that efficient RPC can be composed of them. This decision is made by "a
- * req-formation engine", currently implemented as a part of osc
- * layer. Req-formation depends on many factors: the size of the resulting
- * RPC, whether or not multi-object RPCs are supported by the server,
- * max-rpc-in-flight limitations, size of the dirty cache, etc.
- *
- * For the immediate transfer io submits a cl_page_list, that req-formation
- * engine slices into cl_req's, possibly adding cached pages to some of
- * the resulting req's.
- *
- * Whenever a page from cl_page_list is added to a newly constructed req, its
- * cl_page_operations::cpo_prep() layer methods are called. At that moment,
- * page state is atomically changed from cl_page_state::CPS_OWNED to
- * cl_page_state::CPS_PAGEOUT or cl_page_state::CPS_PAGEIN, cl_page::cp_owner
- * is zeroed, and cl_page::cp_req is set to the
- * req. cl_page_operations::cpo_prep() method at the particular layer might
- * return -EALREADY to indicate that it does not need to submit this page
- * at all. This is possible, for example, if page, submitted for read,
- * became up-to-date in the meantime; and for write, the page don't have
- * dirty bit marked. \see cl_io_submit_rw()
- *
- * Whenever a cached page is added to a newly constructed req, its
- * cl_page_operations::cpo_make_ready() layer methods are called. At that
- * moment, page state is atomically changed from cl_page_state::CPS_CACHED to
- * cl_page_state::CPS_PAGEOUT, and cl_page::cp_req is set to
- * req. cl_page_operations::cpo_make_ready() method at the particular layer
- * might return -EAGAIN to indicate that this page is not eligible for the
- * transfer right now.
- *
- * FUTURE
- *
- * Plan is to divide transfers into "priority bands" (indicated when
- * submitting cl_page_list, and queuing a page for the opportunistic transfer)
- * and allow glueing of cached pages to immediate transfers only within single
- * band. This would make high priority transfers (like lock cancellation or
- * memory pressure induced write-out) really high priority.
- *
- */
-
/**
* Per-transfer attributes.
*/
struct cl_req_attr {
+ enum cl_req_type cra_type;
+ u64 cra_flags;
+ struct cl_page *cra_page;
+
/** Generic attributes for the server consumption. */
struct obdo *cra_oa;
/** Jobid */
char cra_jobid[LUSTRE_JOBID_SIZE];
};
-/**
- * Transfer request operations definable at every layer.
- *
- * Concurrency: transfer formation engine synchronizes calls to all transfer
- * methods.
- */
-struct cl_req_operations {
- /**
- * Invoked top-to-bottom by cl_req_prep() when transfer formation is
- * complete (all pages are added).
- *
- * \see osc_req_prep()
- */
- int (*cro_prep)(const struct lu_env *env,
- const struct cl_req_slice *slice);
- /**
- * Called top-to-bottom to fill in \a oa fields. This is called twice
- * with different flags, see bug 10150 and osc_build_req().
- *
- * \param obj an object from cl_req which attributes are to be set in
- * \a oa.
- *
- * \param oa struct obdo where attributes are placed
- *
- * \param flags \a oa fields to be filled.
- */
- void (*cro_attr_set)(const struct lu_env *env,
- const struct cl_req_slice *slice,
- const struct cl_object *obj,
- struct cl_req_attr *attr, u64 flags);
- /**
- * Called top-to-bottom from cl_req_completion() to notify layers that
- * transfer completed. Has to free all state allocated by
- * cl_device_operations::cdo_req_init().
- */
- void (*cro_completion)(const struct lu_env *env,
- const struct cl_req_slice *slice, int ioret);
-};
-
-/**
- * A per-object state that (potentially multi-object) transfer request keeps.
- */
-struct cl_req_obj {
- /** object itself */
- struct cl_object *ro_obj;
- /** reference to cl_req_obj::ro_obj. For debugging. */
- struct lu_ref_link ro_obj_ref;
- /* something else? Number of pages for a given object? */
-};
-
-/**
- * Transfer request.
- *
- * Transfer requests are not reference counted, because IO sub-system owns
- * them exclusively and knows when to free them.
- *
- * Life cycle.
- *
- * cl_req is created by cl_req_alloc() that calls
- * cl_device_operations::cdo_req_init() device methods to allocate per-req
- * state in every layer.
- *
- * Then pages are added (cl_req_page_add()), req keeps track of all objects it
- * contains pages for.
- *
- * Once all pages were collected, cl_page_operations::cpo_prep() method is
- * called top-to-bottom. At that point layers can modify req, let it pass, or
- * deny it completely. This is to support things like SNS that have transfer
- * ordering requirements invisible to the individual req-formation engine.
- *
- * On transfer completion (or transfer timeout, or failure to initiate the
- * transfer of an allocated req), cl_req_operations::cro_completion() method
- * is called, after execution of cl_page_operations::cpo_completion() of all
- * req's pages.
- */
-struct cl_req {
- enum cl_req_type crq_type;
- /** A list of pages being transferred */
- struct list_head crq_pages;
- /** Number of pages in cl_req::crq_pages */
- unsigned crq_nrpages;
- /** An array of objects which pages are in ->crq_pages */
- struct cl_req_obj *crq_o;
- /** Number of elements in cl_req::crq_objs[] */
- unsigned crq_nrobjs;
- struct list_head crq_layers;
-};
-
-/**
- * Per-layer state for request.
- */
-struct cl_req_slice {
- struct cl_req *crs_req;
- struct cl_device *crs_dev;
- struct list_head crs_linkage;
- const struct cl_req_operations *crs_ops;
-};
-
-/* @} cl_req */
-
enum cache_stats_item {
/** how many cache lookups were performed */
CS_lookup = 0,
@@ -2153,9 +2004,6 @@ void cl_lock_slice_add(struct cl_lock *lock, struct cl_lock_slice *slice,
const struct cl_lock_operations *ops);
void cl_io_slice_add(struct cl_io *io, struct cl_io_slice *slice,
struct cl_object *obj, const struct cl_io_operations *ops);
-void cl_req_slice_add(struct cl_req *req, struct cl_req_slice *slice,
- struct cl_device *dev,
- const struct cl_req_operations *ops);
/** @} helpers */
/** \defgroup cl_object cl_object
@@ -2183,6 +2031,12 @@ int cl_object_prune(const struct lu_env *env, struct cl_object *obj);
void cl_object_kill(const struct lu_env *env, struct cl_object *obj);
int cl_object_getstripe(const struct lu_env *env, struct cl_object *obj,
struct lov_user_md __user *lum);
+int cl_object_fiemap(const struct lu_env *env, struct cl_object *obj,
+ struct ll_fiemap_info_key *fmkey, struct fiemap *fiemap,
+ size_t *buflen);
+int cl_object_layout_get(const struct lu_env *env, struct cl_object *obj,
+ struct cl_layout *cl);
+loff_t cl_object_maxbytes(struct cl_object *obj);
/**
* Returns true, iff \a o0 and \a o1 are slices of the same object.
@@ -2302,8 +2156,6 @@ void cl_page_discard(const struct lu_env *env, struct cl_io *io,
void cl_page_delete(const struct lu_env *env, struct cl_page *pg);
int cl_page_is_vmlocked(const struct lu_env *env, const struct cl_page *pg);
void cl_page_export(const struct lu_env *env, struct cl_page *pg, int uptodate);
-int cl_page_is_under_lock(const struct lu_env *env, struct cl_io *io,
- struct cl_page *page, pgoff_t *max_index);
loff_t cl_offset(const struct cl_object *obj, pgoff_t idx);
pgoff_t cl_index(const struct cl_object *obj, loff_t offset);
size_t cl_page_size(const struct cl_object *obj);
@@ -2414,8 +2266,6 @@ int cl_io_lock_add(const struct lu_env *env, struct cl_io *io,
struct cl_io_lock_link *link);
int cl_io_lock_alloc_add(const struct lu_env *env, struct cl_io *io,
struct cl_lock_descr *descr);
-int cl_io_read_page(const struct lu_env *env, struct cl_io *io,
- struct cl_page *page);
int cl_io_submit_rw(const struct lu_env *env, struct cl_io *io,
enum cl_req_type iot, struct cl_2queue *queue);
int cl_io_submit_sync(const struct lu_env *env, struct cl_io *io,
@@ -2424,6 +2274,8 @@ int cl_io_submit_sync(const struct lu_env *env, struct cl_io *io,
int cl_io_commit_async(const struct lu_env *env, struct cl_io *io,
struct cl_page_list *queue, int from, int to,
cl_commit_cbt cb);
+int cl_io_read_ahead(const struct lu_env *env, struct cl_io *io,
+ pgoff_t start, struct cl_read_ahead *ra);
int cl_io_is_going(const struct lu_env *env);
/**
@@ -2520,19 +2372,8 @@ void cl_2queue_init_page(struct cl_2queue *queue, struct cl_page *page);
/** @} cl_page_list */
-/** \defgroup cl_req cl_req
- * @{
- */
-struct cl_req *cl_req_alloc(const struct lu_env *env, struct cl_page *page,
- enum cl_req_type crt, int nr_objects);
-
-void cl_req_page_add(const struct lu_env *env, struct cl_req *req,
- struct cl_page *page);
-void cl_req_page_done(const struct lu_env *env, struct cl_page *page);
-int cl_req_prep(const struct lu_env *env, struct cl_req *req);
-void cl_req_attr_set(const struct lu_env *env, struct cl_req *req,
- struct cl_req_attr *attr, u64 flags);
-void cl_req_completion(const struct lu_env *env, struct cl_req *req, int ioret);
+void cl_req_attr_set(const struct lu_env *env, struct cl_object *obj,
+ struct cl_req_attr *attr);
/** \defgroup cl_sync_io cl_sync_io
* @{
@@ -2568,8 +2409,6 @@ void cl_sync_io_end(const struct lu_env *env, struct cl_sync_io *anchor);
/** @} cl_sync_io */
-/** @} cl_req */
-
/** \defgroup cl_env cl_env
*
* lu_env handling for a client.
@@ -2593,35 +2432,13 @@ void cl_sync_io_end(const struct lu_env *env, struct cl_sync_io *anchor);
* - allocation and destruction of environment is amortized by caching no
* longer used environments instead of destroying them;
*
- * - there is a notion of "current" environment, attached to the kernel
- * data structure representing current thread Top-level lustre code
- * allocates an environment and makes it current, then calls into
- * non-lustre code, that in turn calls lustre back. Low-level lustre
- * code thus called can fetch environment created by the top-level code
- * and reuse it, avoiding additional environment allocation.
- * Right now, three interfaces can attach the cl_env to running thread:
- * - cl_env_get
- * - cl_env_implant
- * - cl_env_reexit(cl_env_reenter had to be called priorly)
- *
* \see lu_env, lu_context, lu_context_key
* @{
*/
-struct cl_env_nest {
- int cen_refcheck;
- void *cen_cookie;
-};
-
struct lu_env *cl_env_get(int *refcheck);
struct lu_env *cl_env_alloc(int *refcheck, __u32 tags);
-struct lu_env *cl_env_nested_get(struct cl_env_nest *nest);
void cl_env_put(struct lu_env *env, int *refcheck);
-void cl_env_nested_put(struct cl_env_nest *nest, struct lu_env *env);
-void *cl_env_reenter(void);
-void cl_env_reexit(void *cookie);
-void cl_env_implant(struct lu_env *env, int *refcheck);
-void cl_env_unplant(struct lu_env *env, int *refcheck);
unsigned int cl_env_cache_purge(unsigned int nr);
struct lu_env *cl_env_percpu_get(void);
void cl_env_percpu_put(struct lu_env *env);
diff --git a/drivers/staging/lustre/lustre/include/llog_swab.h b/drivers/staging/lustre/lustre/include/llog_swab.h
new file mode 100644
index 000000000000..fd7ffb154ad1
--- /dev/null
+++ b/drivers/staging/lustre/lustre/include/llog_swab.h
@@ -0,0 +1,65 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.gnu.org/licenses/gpl-2.0.html
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2014, Intel Corporation.
+ *
+ * Copyright 2015 Cray Inc, all rights reserved.
+ * Author: Ben Evans.
+ *
+ * We assume all nodes are either little-endian or big-endian, and we
+ * always send messages in the sender's native format. The receiver
+ * detects the message format by checking the 'magic' field of the message
+ * (see lustre_msg_swabbed() below).
+ *
+ * Each type has corresponding 'lustre_swab_xxxtypexxx()' routines
+ * are implemented in ptlrpc/pack_generic.c. These 'swabbers' convert the
+ * type from "other" endian, in-place in the message buffer.
+ *
+ * A swabber takes a single pointer argument. The caller must already have
+ * verified that the length of the message buffer >= sizeof (type).
+ *
+ * For variable length types, a second 'lustre_swab_v_xxxtypexxx()' routine
+ * may be defined that swabs just the variable part, after the caller has
+ * verified that the message buffer is large enough.
+ */
+
+#ifndef _LLOG_SWAB_H_
+#define _LLOG_SWAB_H_
+
+#include "lustre/lustre_idl.h"
+struct lustre_cfg;
+
+void lustre_swab_lu_fid(struct lu_fid *fid);
+void lustre_swab_ost_id(struct ost_id *oid);
+void lustre_swab_llogd_body(struct llogd_body *d);
+void lustre_swab_llog_hdr(struct llog_log_hdr *h);
+void lustre_swab_llogd_conn_body(struct llogd_conn_body *d);
+void lustre_swab_llog_rec(struct llog_rec_hdr *rec);
+void lustre_swab_lu_seq_range(struct lu_seq_range *range);
+void lustre_swab_lustre_cfg(struct lustre_cfg *lcfg);
+void lustre_swab_cfg_marker(struct cfg_marker *marker,
+ int swab, int size);
+
+#endif
diff --git a/drivers/staging/lustre/lustre/include/lprocfs_status.h b/drivers/staging/lustre/lustre/include/lprocfs_status.h
index cc0713ef8ae5..62753dae0bfa 100644
--- a/drivers/staging/lustre/lustre/include/lprocfs_status.h
+++ b/drivers/staging/lustre/lustre/include/lprocfs_status.h
@@ -43,6 +43,8 @@
#include <linux/spinlock.h>
#include <linux/types.h>
+#include "../../include/linux/libcfs/libcfs.h"
+#include "lustre_cfg.h"
#include "lustre/lustre_idl.h"
struct lprocfs_vars {
@@ -540,7 +542,8 @@ lprocfs_alloc_stats(unsigned int num, enum lprocfs_stats_flags flags);
void lprocfs_clear_stats(struct lprocfs_stats *stats);
void lprocfs_free_stats(struct lprocfs_stats **stats);
void lprocfs_counter_init(struct lprocfs_stats *stats, int index,
- unsigned conf, const char *name, const char *units);
+ unsigned int conf, const char *name,
+ const char *units);
struct obd_export;
int lprocfs_exp_cleanup(struct obd_export *exp);
struct dentry *ldebugfs_add_simple(struct dentry *root,
@@ -701,9 +704,9 @@ static struct lustre_attr lustre_attr_##name = __ATTR(name, mode, show, store)
extern const struct sysfs_ops lustre_sysfs_ops;
struct root_squash_info;
-int lprocfs_wr_root_squash(const char *buffer, unsigned long count,
+int lprocfs_wr_root_squash(const char __user *buffer, unsigned long count,
struct root_squash_info *squash, char *name);
-int lprocfs_wr_nosquash_nids(const char *buffer, unsigned long count,
+int lprocfs_wr_nosquash_nids(const char __user *buffer, unsigned long count,
struct root_squash_info *squash, char *name);
/* all quota proc functions */
diff --git a/drivers/staging/lustre/lustre/include/lustre/ll_fiemap.h b/drivers/staging/lustre/lustre/include/lustre/ll_fiemap.h
index c2340d643e84..b8ad5559a3b9 100644
--- a/drivers/staging/lustre/lustre/include/lustre/ll_fiemap.h
+++ b/drivers/staging/lustre/lustre/include/lustre/ll_fiemap.h
@@ -41,79 +41,24 @@
#ifndef _LUSTRE_FIEMAP_H
#define _LUSTRE_FIEMAP_H
-struct ll_fiemap_extent {
- __u64 fe_logical; /* logical offset in bytes for the start of
- * the extent from the beginning of the file
- */
- __u64 fe_physical; /* physical offset in bytes for the start
- * of the extent from the beginning of the disk
- */
- __u64 fe_length; /* length in bytes for this extent */
- __u64 fe_reserved64[2];
- __u32 fe_flags; /* FIEMAP_EXTENT_* flags for this extent */
- __u32 fe_device; /* device number for this extent */
- __u32 fe_reserved[2];
-};
-
-struct ll_user_fiemap {
- __u64 fm_start; /* logical offset (inclusive) at
- * which to start mapping (in)
- */
- __u64 fm_length; /* logical length of mapping which
- * userspace wants (in)
- */
- __u32 fm_flags; /* FIEMAP_FLAG_* flags for request (in/out) */
- __u32 fm_mapped_extents;/* number of extents that were mapped (out) */
- __u32 fm_extent_count; /* size of fm_extents array (in) */
- __u32 fm_reserved;
- struct ll_fiemap_extent fm_extents[0]; /* array of mapped extents (out) */
-};
-
-#define FIEMAP_MAX_OFFSET (~0ULL)
+#ifndef __KERNEL__
+#include <stddef.h>
+#include <fiemap.h>
+#endif
-#define FIEMAP_FLAG_SYNC 0x00000001 /* sync file data before
- * map
- */
-#define FIEMAP_FLAG_XATTR 0x00000002 /* map extended attribute
- * tree
- */
-#define FIEMAP_EXTENT_LAST 0x00000001 /* Last extent in file. */
-#define FIEMAP_EXTENT_UNKNOWN 0x00000002 /* Data location unknown. */
-#define FIEMAP_EXTENT_DELALLOC 0x00000004 /* Location still pending.
- * Sets EXTENT_UNKNOWN.
- */
-#define FIEMAP_EXTENT_ENCODED 0x00000008 /* Data can not be read
- * while fs is unmounted
- */
-#define FIEMAP_EXTENT_DATA_ENCRYPTED 0x00000080 /* Data is encrypted by fs.
- * Sets EXTENT_NO_DIRECT.
- */
-#define FIEMAP_EXTENT_NOT_ALIGNED 0x00000100 /* Extent offsets may not be
- * block aligned.
- */
-#define FIEMAP_EXTENT_DATA_INLINE 0x00000200 /* Data mixed with metadata.
- * Sets EXTENT_NOT_ALIGNED.*/
-#define FIEMAP_EXTENT_DATA_TAIL 0x00000400 /* Multiple files in block.
- * Sets EXTENT_NOT_ALIGNED.
- */
-#define FIEMAP_EXTENT_UNWRITTEN 0x00000800 /* Space allocated, but
- * no data (i.e. zero).
- */
-#define FIEMAP_EXTENT_MERGED 0x00001000 /* File does not natively
- * support extents. Result
- * merged for efficiency.
- */
+/* XXX: We use fiemap_extent::fe_reserved[0] */
+#define fe_device fe_reserved[0]
static inline size_t fiemap_count_to_size(size_t extent_count)
{
- return (sizeof(struct ll_user_fiemap) + extent_count *
- sizeof(struct ll_fiemap_extent));
+ return sizeof(struct fiemap) + extent_count *
+ sizeof(struct fiemap_extent);
}
static inline unsigned fiemap_size_to_count(size_t array_size)
{
- return ((array_size - sizeof(struct ll_user_fiemap)) /
- sizeof(struct ll_fiemap_extent));
+ return (array_size - sizeof(struct fiemap)) /
+ sizeof(struct fiemap_extent);
}
#define FIEMAP_FLAG_DEVICE_ORDER 0x40000000 /* return device ordered mapping */
diff --git a/drivers/staging/lustre/lustre/include/lustre/lustre_idl.h b/drivers/staging/lustre/lustre/include/lustre/lustre_idl.h
index 72eaee95c6b8..65ce503ad595 100644
--- a/drivers/staging/lustre/lustre/include/lustre/lustre_idl.h
+++ b/drivers/staging/lustre/lustre/include/lustre/lustre_idl.h
@@ -48,8 +48,7 @@
* that the Lustre wire protocol is not influenced by external dependencies.
*
* The only other acceptable items in this file are VERY SIMPLE accessor
- * functions to avoid callers grubbing inside the structures, and the
- * prototypes of the swabber functions for each struct. Nothing that
+ * functions to avoid callers grubbing inside the structures. Nothing that
* depends on external functions or definitions should be in here.
*
* Structs must be properly aligned to put 64-bit values on an 8-byte
@@ -64,23 +63,6 @@
* in the code to ensure that new/old clients that see this larger struct
* do not fail, otherwise you need to implement protocol compatibility).
*
- * We assume all nodes are either little-endian or big-endian, and we
- * always send messages in the sender's native format. The receiver
- * detects the message format by checking the 'magic' field of the message
- * (see lustre_msg_swabbed() below).
- *
- * Each wire type has corresponding 'lustre_swab_xxxtypexxx()' routines,
- * implemented either here, inline (trivial implementations) or in
- * ptlrpc/pack_generic.c. These 'swabbers' convert the type from "other"
- * endian, in-place in the message buffer.
- *
- * A swabber takes a single pointer argument. The caller must already have
- * verified that the length of the message buffer >= sizeof (type).
- *
- * For variable length types, a second 'lustre_swab_v_xxxtypexxx()' routine
- * may be defined that swabs just the variable part, after the caller has
- * verified that the message buffer is large enough.
- *
* @{
*/
@@ -192,113 +174,6 @@ struct lu_seq_range_array {
#define LU_SEQ_RANGE_MASK 0x3
-static inline unsigned fld_range_type(const struct lu_seq_range *range)
-{
- return range->lsr_flags & LU_SEQ_RANGE_MASK;
-}
-
-static inline bool fld_range_is_ost(const struct lu_seq_range *range)
-{
- return fld_range_type(range) == LU_SEQ_RANGE_OST;
-}
-
-static inline bool fld_range_is_mdt(const struct lu_seq_range *range)
-{
- return fld_range_type(range) == LU_SEQ_RANGE_MDT;
-}
-
-/**
- * This all range is only being used when fld client sends fld query request,
- * but it does not know whether the seq is MDT or OST, so it will send req
- * with ALL type, which means either seq type gotten from lookup can be
- * expected.
- */
-static inline unsigned fld_range_is_any(const struct lu_seq_range *range)
-{
- return fld_range_type(range) == LU_SEQ_RANGE_ANY;
-}
-
-static inline void fld_range_set_type(struct lu_seq_range *range,
- unsigned flags)
-{
- range->lsr_flags |= flags;
-}
-
-static inline void fld_range_set_mdt(struct lu_seq_range *range)
-{
- fld_range_set_type(range, LU_SEQ_RANGE_MDT);
-}
-
-static inline void fld_range_set_ost(struct lu_seq_range *range)
-{
- fld_range_set_type(range, LU_SEQ_RANGE_OST);
-}
-
-static inline void fld_range_set_any(struct lu_seq_range *range)
-{
- fld_range_set_type(range, LU_SEQ_RANGE_ANY);
-}
-
-/**
- * returns width of given range \a r
- */
-
-static inline __u64 range_space(const struct lu_seq_range *range)
-{
- return range->lsr_end - range->lsr_start;
-}
-
-/**
- * initialize range to zero
- */
-
-static inline void range_init(struct lu_seq_range *range)
-{
- memset(range, 0, sizeof(*range));
-}
-
-/**
- * check if given seq id \a s is within given range \a r
- */
-
-static inline bool range_within(const struct lu_seq_range *range,
- __u64 s)
-{
- return s >= range->lsr_start && s < range->lsr_end;
-}
-
-static inline bool range_is_sane(const struct lu_seq_range *range)
-{
- return (range->lsr_end >= range->lsr_start);
-}
-
-static inline bool range_is_zero(const struct lu_seq_range *range)
-{
- return (range->lsr_start == 0 && range->lsr_end == 0);
-}
-
-static inline bool range_is_exhausted(const struct lu_seq_range *range)
-
-{
- return range_space(range) == 0;
-}
-
-/* return 0 if two range have the same location */
-static inline int range_compare_loc(const struct lu_seq_range *r1,
- const struct lu_seq_range *r2)
-{
- return r1->lsr_index != r2->lsr_index ||
- r1->lsr_flags != r2->lsr_flags;
-}
-
-#define DRANGE "[%#16.16Lx-%#16.16Lx):%x:%s"
-
-#define PRANGE(range) \
- (range)->lsr_start, \
- (range)->lsr_end, \
- (range)->lsr_index, \
- fld_range_is_mdt(range) ? "mdt" : "ost"
-
/** \defgroup lu_fid lu_fid
* @{
*/
@@ -310,7 +185,7 @@ static inline int range_compare_loc(const struct lu_seq_range *r1,
*/
enum lma_compat {
LMAC_HSM = 0x00000001,
- LMAC_SOM = 0x00000002,
+/* LMAC_SOM = 0x00000002, obsolete since 2.8.0 */
LMAC_NOT_IN_OI = 0x00000004, /* the object does NOT need OI mapping */
LMAC_FID_ON_OST = 0x00000008, /* For OST-object, its OI mapping is
* under /O/<seq>/d<x>.
@@ -644,13 +519,14 @@ static inline void ostid_set_id(struct ost_id *oi, __u64 oid)
{
if (fid_seq_is_mdt0(oi->oi.oi_seq)) {
if (oid >= IDIF_MAX_OID) {
- CERROR("Bad %llu to set " DOSTID "\n", oid, POSTID(oi));
+ CERROR("Too large OID %#llx to set MDT0 " DOSTID "\n",
+ oid, POSTID(oi));
return;
}
oi->oi.oi_id = oid;
} else if (fid_is_idif(&oi->oi_fid)) {
if (oid >= IDIF_MAX_OID) {
- CERROR("Bad %llu to set "DOSTID"\n",
+ CERROR("Too large OID %#llx to set IDIF " DOSTID "\n",
oid, POSTID(oi));
return;
}
@@ -676,7 +552,7 @@ static inline int fid_set_id(struct lu_fid *fid, __u64 oid)
if (fid_is_idif(fid)) {
if (oid >= IDIF_MAX_OID) {
- CERROR("Too large OID %#llx to set IDIF "DFID"\n",
+ CERROR("Too large OID %#llx to set IDIF " DFID "\n",
(unsigned long long)oid, PFID(fid));
return -EBADF;
}
@@ -685,7 +561,7 @@ static inline int fid_set_id(struct lu_fid *fid, __u64 oid)
fid->f_ver = oid >> 48;
} else {
if (oid >= OBIF_MAX_OID) {
- CERROR("Too large OID %#llx to set REG "DFID"\n",
+ CERROR("Too large OID %#llx to set REG " DFID "\n",
(unsigned long long)oid, PFID(fid));
return -EBADF;
}
@@ -785,8 +661,6 @@ static inline ino_t lu_igif_ino(const struct lu_fid *fid)
return fid_seq(fid);
}
-void lustre_swab_ost_id(struct ost_id *oid);
-
/**
* Get inode generation from a igif.
* \param fid a igif to get inode generation from.
@@ -847,9 +721,6 @@ static inline bool fid_is_sane(const struct lu_fid *fid)
fid_seq_is_rsvd(fid_seq(fid)));
}
-void lustre_swab_lu_fid(struct lu_fid *fid);
-void lustre_swab_lu_seq_range(struct lu_seq_range *range);
-
static inline bool lu_fid_eq(const struct lu_fid *f0, const struct lu_fid *f1)
{
return memcmp(f0, f1, sizeof(*f0)) == 0;
@@ -1099,8 +970,10 @@ struct ptlrpc_body_v3 {
__u32 pb_version;
__u32 pb_opc;
__u32 pb_status;
- __u64 pb_last_xid;
- __u64 pb_last_seen;
+ __u64 pb_last_xid; /* highest replied XID without lower unreplied XID */
+ __u16 pb_tag; /* virtual slot idx for multiple modifying RPCs */
+ __u16 pb_padding0;
+ __u32 pb_padding1;
__u64 pb_last_committed;
__u64 pb_transno;
__u32 pb_flags;
@@ -1112,8 +985,11 @@ struct ptlrpc_body_v3 {
__u64 pb_slv;
/* VBR: pre-versions */
__u64 pb_pre_versions[PTLRPC_NUM_VERSIONS];
+ __u64 pb_mbits; /**< match bits for bulk request */
/* padding for future needs */
- __u64 pb_padding[4];
+ __u64 pb_padding64_0;
+ __u64 pb_padding64_1;
+ __u64 pb_padding64_2;
char pb_jobid[LUSTRE_JOBID_SIZE];
};
@@ -1125,8 +1001,10 @@ struct ptlrpc_body_v2 {
__u32 pb_version;
__u32 pb_opc;
__u32 pb_status;
- __u64 pb_last_xid;
- __u64 pb_last_seen;
+ __u64 pb_last_xid; /* highest replied XID without lower unreplied XID */
+ __u16 pb_tag; /* virtual slot idx for multiple modifying RPCs */
+ __u16 pb_padding0;
+ __u32 pb_padding1;
__u64 pb_last_committed;
__u64 pb_transno;
__u32 pb_flags;
@@ -1140,12 +1018,13 @@ struct ptlrpc_body_v2 {
__u64 pb_slv;
/* VBR: pre-versions */
__u64 pb_pre_versions[PTLRPC_NUM_VERSIONS];
+ __u64 pb_mbits; /**< unused in V2 */
/* padding for future needs */
- __u64 pb_padding[4];
+ __u64 pb_padding64_0;
+ __u64 pb_padding64_1;
+ __u64 pb_padding64_2;
};
-void lustre_swab_ptlrpc_body(struct ptlrpc_body *pb);
-
/* message body offset for lustre_msg_v2 */
/* ptlrpc body offset in all request/reply messages */
#define MSG_PTLRPC_BODY_OFF 0
@@ -1282,7 +1161,16 @@ void lustre_swab_ptlrpc_body(struct ptlrpc_body *pb);
*/
#define OBD_CONNECT_LFSCK 0x40000000000000ULL/* support online LFSCK */
#define OBD_CONNECT_UNLINK_CLOSE 0x100000000000000ULL/* close file in unlink */
+#define OBD_CONNECT_MULTIMODRPCS 0x200000000000000ULL /* support multiple modify
+ * RPCs in parallel
+ */
#define OBD_CONNECT_DIR_STRIPE 0x400000000000000ULL/* striped DNE dir */
+#define OBD_CONNECT_SUBTREE 0x800000000000000ULL /* fileset mount */
+#define OBD_CONNECT_LOCK_AHEAD 0x1000000000000000ULL /* lock ahead */
+/** bulk matchbits is sent within ptlrpc_body */
+#define OBD_CONNECT_BULK_MBITS 0x2000000000000000ULL
+#define OBD_CONNECT_OBDOPACK 0x4000000000000000ULL /* compact OUT obdo */
+#define OBD_CONNECT_FLAGS2 0x8000000000000000ULL /* second flags word */
/* XXX README XXX:
* Please DO NOT add flag values here before first ensuring that this same
@@ -1313,25 +1201,6 @@ void lustre_swab_ptlrpc_body(struct ptlrpc_body *pb);
* If we eventually have separate connect data for different types, which we
* almost certainly will, then perhaps we stick a union in here.
*/
-struct obd_connect_data_v1 {
- __u64 ocd_connect_flags; /* OBD_CONNECT_* per above */
- __u32 ocd_version; /* lustre release version number */
- __u32 ocd_grant; /* initial cache grant amount (bytes) */
- __u32 ocd_index; /* LOV index to connect to */
- __u32 ocd_brw_size; /* Maximum BRW size in bytes, must be 2^n */
- __u64 ocd_ibits_known; /* inode bits this client understands */
- __u8 ocd_blocksize; /* log2 of the backend filesystem blocksize */
- __u8 ocd_inodespace; /* log2 of the per-inode space consumption */
- __u16 ocd_grant_extent; /* per-extent grant overhead, in 1K blocks */
- __u32 ocd_unused; /* also fix lustre_swab_connect */
- __u64 ocd_transno; /* first transno from client to be replayed */
- __u32 ocd_group; /* MDS group on OST */
- __u32 ocd_cksum_types; /* supported checksum algorithms */
- __u32 ocd_max_easize; /* How big LOV EA can be on MDS */
- __u32 ocd_instance; /* also fix lustre_swab_connect */
- __u64 ocd_maxbytes; /* Maximum stripe size in bytes */
-};
-
struct obd_connect_data {
__u64 ocd_connect_flags; /* OBD_CONNECT_* per above */
__u32 ocd_version; /* lustre release version number */
@@ -1354,8 +1223,10 @@ struct obd_connect_data {
* any field after ocd_maxbytes on the receiver without a valid flag
* may result in out-of-bound memory access and kernel oops.
*/
- __u64 padding1; /* added 2.1.0. also fix lustre_swab_connect */
- __u64 padding2; /* added 2.1.0. also fix lustre_swab_connect */
+ __u16 ocd_maxmodrpcs; /* Maximum modify RPCs in parallel */
+ __u16 padding0; /* added 2.1.0. also fix lustre_swab_connect */
+ __u32 padding1; /* added 2.1.0. also fix lustre_swab_connect */
+ __u64 ocd_connect_flags2;
__u64 padding3; /* added 2.1.0. also fix lustre_swab_connect */
__u64 padding4; /* added 2.1.0. also fix lustre_swab_connect */
__u64 padding5; /* added 2.1.0. also fix lustre_swab_connect */
@@ -1380,8 +1251,6 @@ struct obd_connect_data {
* reserve the flag for future use.
*/
-void lustre_swab_connect(struct obd_connect_data *ocd);
-
/*
* Supported checksum algorithms. Up to 32 checksum types are supported.
* (32-bit mask stored in obd_connect_data::ocd_cksum_types)
@@ -1416,7 +1285,7 @@ enum ost_cmd {
OST_STATFS = 13,
OST_SYNC = 16,
OST_SET_INFO = 17,
- OST_QUOTACHECK = 18,
+ OST_QUOTACHECK = 18, /* not used since 2.4 */
OST_QUOTACTL = 19,
OST_QUOTA_ADJUST_QUNIT = 20, /* not used since 2.4 */
OST_LAST_OPC
@@ -1580,8 +1449,6 @@ static inline void lmm_oi_cpu_to_le(struct ost_id *dst_oi,
dst_oi->oi.oi_seq = cpu_to_le64(src_oi->oi.oi_seq);
}
-/* extern void lustre_swab_lov_mds_md(struct lov_mds_md *llm); */
-
#define MAX_MD_SIZE \
(sizeof(struct lov_mds_md) + 4 * sizeof(struct lov_ost_data))
#define MIN_MD_SIZE \
@@ -1674,7 +1541,7 @@ lov_mds_md_max_stripe_count(size_t buf_size, __u32 lmm_magic)
#define OBD_MD_FLCKSUM (0x00100000ULL) /* bulk data checksum */
#define OBD_MD_FLQOS (0x00200000ULL) /* quality of service stats */
/*#define OBD_MD_FLOSCOPQ (0x00400000ULL) osc opaque data, never used */
-#define OBD_MD_FLCOOKIE (0x00800000ULL) /* log cancellation cookie */
+/* OBD_MD_FLCOOKIE (0x00800000ULL) obsolete in 2.8 */
#define OBD_MD_FLGROUP (0x01000000ULL) /* group */
#define OBD_MD_FLFID (0x02000000ULL) /* ->ost write inline fid */
#define OBD_MD_FLEPOCH (0x04000000ULL) /* ->ost write with ioepoch */
@@ -1713,7 +1580,9 @@ lov_mds_md_max_stripe_count(size_t buf_size, __u32 lmm_magic)
/* OBD_MD_FLRMTRGETFACL (0x0008000000000000ULL) lfs rgetfacl, obsolete */
#define OBD_MD_FLDATAVERSION (0x0010000000000000ULL) /* iversion sum */
-#define OBD_MD_FLRELEASED (0x0020000000000000ULL) /* file released */
+#define OBD_MD_CLOSE_INTENT_EXECED (0x0020000000000000ULL) /* close intent
+ * executed
+ */
#define OBD_MD_DEFAULT_MEA (0x0040000000000000ULL) /* default MEA */
@@ -1742,11 +1611,6 @@ struct hsm_state_set {
__u64 hss_clearmask;
};
-void lustre_swab_hsm_user_state(struct hsm_user_state *hus);
-void lustre_swab_hsm_state_set(struct hsm_state_set *hss);
-
-void lustre_swab_obd_statfs(struct obd_statfs *os);
-
/* ost_body.data values for OST_BRW */
#define OBD_BRW_READ 0x01
@@ -1786,14 +1650,16 @@ struct obd_ioobj {
__u32 ioo_bufcnt; /* number of niobufs for this object */
};
+/*
+ * NOTE: IOOBJ_MAX_BRW_BITS defines the _offset_ of the max_brw field in
+ * ioo_max_brw, NOT the maximum number of bits in PTLRPC_BULK_OPS_BITS.
+ * That said, ioo_max_brw is a 32-bit field so the limit is also 16 bits.
+ */
#define IOOBJ_MAX_BRW_BITS 16
-#define IOOBJ_TYPE_MASK ((1U << IOOBJ_MAX_BRW_BITS) - 1)
#define ioobj_max_brw_get(ioo) (((ioo)->ioo_max_brw >> IOOBJ_MAX_BRW_BITS) + 1)
#define ioobj_max_brw_set(ioo, num) \
do { (ioo)->ioo_max_brw = ((num) - 1) << IOOBJ_MAX_BRW_BITS; } while (0)
-void lustre_swab_obd_ioobj(struct obd_ioobj *ioo);
-
/* multiple of 8 bytes => can array */
struct niobuf_remote {
__u64 rnb_offset;
@@ -1801,8 +1667,6 @@ struct niobuf_remote {
__u32 rnb_flags;
};
-void lustre_swab_niobuf_remote(struct niobuf_remote *nbr);
-
/* lock value block communicated between the filter and llite */
/* OST_LVB_ERR_INIT is needed because the return code in rc is
@@ -1824,8 +1688,6 @@ struct ost_lvb_v1 {
__u64 lvb_blocks;
};
-void lustre_swab_ost_lvb_v1(struct ost_lvb_v1 *lvb);
-
struct ost_lvb {
__u64 lvb_size;
__s64 lvb_mtime;
@@ -1838,8 +1700,6 @@ struct ost_lvb {
__u32 lvb_padding;
};
-void lustre_swab_ost_lvb(struct ost_lvb *lvb);
-
/*
* lquota data structures
*/
@@ -1866,8 +1726,6 @@ struct obd_quotactl {
struct obd_dqblk qc_dqblk;
};
-void lustre_swab_obd_quotactl(struct obd_quotactl *q);
-
#define Q_COPY(out, in, member) (out)->member = (in)->member
#define QCTL_COPY(out, in) \
@@ -1905,8 +1763,6 @@ struct lquota_lvb {
__u64 lvb_pad1;
};
-void lustre_swab_lquota_lvb(struct lquota_lvb *lvb);
-
/* op codes */
enum quota_cmd {
QUOTA_DQACQ = 601,
@@ -1933,9 +1789,9 @@ enum mds_cmd {
MDS_PIN = 42, /* obsolete, never used in a release */
MDS_UNPIN = 43, /* obsolete, never used in a release */
MDS_SYNC = 44,
- MDS_DONE_WRITING = 45,
+ MDS_DONE_WRITING = 45, /* obsolete since 2.8.0 */
MDS_SET_INFO = 46,
- MDS_QUOTACHECK = 47,
+ MDS_QUOTACHECK = 47, /* not used since 2.4 */
MDS_QUOTACTL = 48,
MDS_GETXATTR = 49,
MDS_SETXATTR = 50, /* obsolete, now it's MDS_REINT op */
@@ -1972,8 +1828,6 @@ enum mdt_reint_cmd {
REINT_MAX
};
-void lustre_swab_generic_32s(__u32 *val);
-
/* the disposition of the intent outlines what was executed */
#define DISP_IT_EXECD 0x00000001
#define DISP_LOOKUP_EXECD 0x00000002
@@ -2031,36 +1885,19 @@ enum {
#define MDS_STATUS_CONN 1
#define MDS_STATUS_LOV 2
-/* mdt_thread_info.mti_flags. */
-enum md_op_flags {
- /* The flag indicates Size-on-MDS attributes are changed. */
- MF_SOM_CHANGE = (1 << 0),
- /* Flags indicates an epoch opens or closes. */
- MF_EPOCH_OPEN = (1 << 1),
- MF_EPOCH_CLOSE = (1 << 2),
- MF_MDC_CANCEL_FID1 = (1 << 3),
- MF_MDC_CANCEL_FID2 = (1 << 4),
- MF_MDC_CANCEL_FID3 = (1 << 5),
- MF_MDC_CANCEL_FID4 = (1 << 6),
- /* There is a pending attribute update. */
- MF_SOM_AU = (1 << 7),
- /* Cancel OST locks while getattr OST attributes. */
- MF_GETATTR_LOCK = (1 << 8),
- MF_GET_MDT_IDX = (1 << 9),
-};
-
-#define MF_SOM_LOCAL_FLAGS (MF_SOM_CHANGE | MF_EPOCH_OPEN | MF_EPOCH_CLOSE)
-
-#define LUSTRE_BFLAG_UNCOMMITTED_WRITES 0x1
-
/* these should be identical to their EXT4_*_FL counterparts, they are
* redefined here only to avoid dragging in fs/ext4/ext4.h
*/
#define LUSTRE_SYNC_FL 0x00000008 /* Synchronous updates */
#define LUSTRE_IMMUTABLE_FL 0x00000010 /* Immutable file */
#define LUSTRE_APPEND_FL 0x00000020 /* writes to file may only append */
+#define LUSTRE_NODUMP_FL 0x00000040 /* do not dump file */
#define LUSTRE_NOATIME_FL 0x00000080 /* do not update atime */
+#define LUSTRE_INDEX_FL 0x00001000 /* hash-indexed directory */
#define LUSTRE_DIRSYNC_FL 0x00010000 /* dirsync behaviour (dir only) */
+#define LUSTRE_TOPDIR_FL 0x00020000 /* Top of directory hierarchies*/
+#define LUSTRE_DIRECTIO_FL 0x00100000 /* Use direct i/o */
+#define LUSTRE_INLINE_DATA_FL 0x10000000 /* Inode has inline data. */
/* Convert wire LUSTRE_*_FL to corresponding client local VFS S_* values
* for the client inode i_flags. The LUSTRE_*_FL are the Lustre wire
@@ -2113,7 +1950,7 @@ struct mdt_body {
__u32 mbo_mode;
__u32 mbo_uid;
__u32 mbo_gid;
- __u32 mbo_flags;
+ __u32 mbo_flags; /* LUSTRE_*_FL file attributes */
__u32 mbo_rdev;
__u32 mbo_nlink; /* #bytes to read in the case of MDS_READPAGE */
__u32 mbo_unused2; /* was "generation" until 2.4.0 */
@@ -2121,7 +1958,7 @@ struct mdt_body {
__u32 mbo_eadatasize;
__u32 mbo_aclsize;
__u32 mbo_max_mdsize;
- __u32 mbo_max_cookiesize;
+ __u32 mbo_unused3; /* was max_cookiesize until 2.8 */
__u32 mbo_uid_h; /* high 32-bits of uid, for FUID */
__u32 mbo_gid_h; /* high 32-bits of gid, for FUID */
__u32 mbo_padding_5; /* also fix lustre_swab_mdt_body */
@@ -2132,17 +1969,13 @@ struct mdt_body {
__u64 mbo_padding_10;
}; /* 216 */
-void lustre_swab_mdt_body(struct mdt_body *b);
-
struct mdt_ioepoch {
- struct lustre_handle handle;
- __u64 ioepoch;
- __u32 flags;
- __u32 padding;
+ struct lustre_handle mio_handle;
+ __u64 mio_unused1; /* was ioepoch */
+ __u32 mio_unused2; /* was flags */
+ __u32 mio_padding;
};
-void lustre_swab_mdt_ioepoch(struct mdt_ioepoch *b);
-
/* permissions for md_perm.mp_perm */
enum {
CFS_SETUID_PERM = 0x01,
@@ -2178,8 +2011,6 @@ struct mdt_rec_setattr {
__u32 sa_padding_5;
};
-void lustre_swab_mdt_rec_setattr(struct mdt_rec_setattr *sa);
-
/*
* Attribute flags used in mdt_rec_setattr::sa_valid.
* The kernel's #defines for ATTR_* should not be used over the network
@@ -2207,12 +2038,9 @@ void lustre_swab_mdt_rec_setattr(struct mdt_rec_setattr *sa);
#define MDS_FMODE_CLOSED 00000000
#define MDS_FMODE_EXEC 00000004
-/* IO Epoch is opened on a closed file. */
-#define MDS_FMODE_EPOCH 01000000
-/* IO Epoch is opened on a file truncate. */
-#define MDS_FMODE_TRUNC 02000000
-/* Size-on-MDS Attribute Update is pending. */
-#define MDS_FMODE_SOM 04000000
+/* MDS_FMODE_EPOCH 01000000 obsolete since 2.8.0 */
+/* MDS_FMODE_TRUNC 02000000 obsolete since 2.8.0 */
+/* MDS_FMODE_SOM 04000000 obsolete since 2.8.0 */
#define MDS_OPEN_CREATED 00000010
#define MDS_OPEN_CROSS 00000020
@@ -2258,7 +2086,7 @@ enum mds_op_bias {
MDS_CROSS_REF = 1 << 1,
MDS_VTX_BYPASS = 1 << 2,
MDS_PERM_BYPASS = 1 << 3,
- MDS_SOM = 1 << 4,
+/* MDS_SOM = 1 << 4, obsolete since 2.8.0 */
MDS_QUOTA_IGNORE = 1 << 5,
MDS_CLOSE_CLEANUP = 1 << 6,
MDS_KEEP_ORPHAN = 1 << 7,
@@ -2268,6 +2096,7 @@ enum mds_op_bias {
MDS_OWNEROVERRIDE = 1 << 11,
MDS_HSM_RELEASE = 1 << 12,
MDS_RENAME_MIGRATE = BIT(13),
+ MDS_CLOSE_LAYOUT_SWAP = BIT(14),
};
/* instance of mdt_reint_rec */
@@ -2456,8 +2285,6 @@ struct mdt_rec_reint {
__u32 rr_padding_4; /* also fix lustre_swab_mdt_rec_reint */
};
-void lustre_swab_mdt_rec_reint(struct mdt_rec_reint *rr);
-
/* lmv structures */
struct lmv_desc {
__u32 ld_tgt_count; /* how many MDS's */
@@ -2547,8 +2374,6 @@ union lmv_mds_md {
struct lmv_user_md lmv_user_md;
};
-void lustre_swab_lmv_mds_md(union lmv_mds_md *lmm);
-
static inline ssize_t lmv_mds_md_size(int stripe_count, unsigned int lmm_magic)
{
ssize_t len = -EINVAL;
@@ -2652,8 +2477,6 @@ struct lov_desc {
#define ld_magic ld_active_tgt_count /* for swabbing from llogs */
-void lustre_swab_lov_desc(struct lov_desc *ld);
-
/*
* LDLM requests:
*/
@@ -2749,24 +2572,38 @@ struct ldlm_flock_wire {
* on the resource type.
*/
-typedef union {
+union ldlm_wire_policy_data {
struct ldlm_extent l_extent;
struct ldlm_flock_wire l_flock;
struct ldlm_inodebits l_inodebits;
-} ldlm_wire_policy_data_t;
+};
union ldlm_gl_desc {
struct ldlm_gl_lquota_desc lquota_desc;
};
-void lustre_swab_gl_desc(union ldlm_gl_desc *);
+enum ldlm_intent_flags {
+ IT_OPEN = BIT(0),
+ IT_CREAT = BIT(1),
+ IT_OPEN_CREAT = BIT(1) | BIT(0),
+ IT_READDIR = BIT(2),
+ IT_GETATTR = BIT(3),
+ IT_LOOKUP = BIT(4),
+ IT_UNLINK = BIT(5),
+ IT_TRUNC = BIT(6),
+ IT_GETXATTR = BIT(7),
+ IT_EXEC = BIT(8),
+ IT_PIN = BIT(9),
+ IT_LAYOUT = BIT(10),
+ IT_QUOTA_DQACQ = BIT(11),
+ IT_QUOTA_CONN = BIT(12),
+ IT_SETXATTR = BIT(13),
+};
struct ldlm_intent {
__u64 opc;
};
-void lustre_swab_ldlm_intent(struct ldlm_intent *i);
-
struct ldlm_resource_desc {
enum ldlm_type lr_type;
__u32 lr_padding; /* also fix lustre_swab_ldlm_resource_desc */
@@ -2777,7 +2614,7 @@ struct ldlm_lock_desc {
struct ldlm_resource_desc l_resource;
enum ldlm_mode l_req_mode;
enum ldlm_mode l_granted_mode;
- ldlm_wire_policy_data_t l_policy_data;
+ union ldlm_wire_policy_data l_policy_data;
};
#define LDLM_LOCKREQ_HANDLES 2
@@ -2790,8 +2627,6 @@ struct ldlm_request {
struct lustre_handle lock_handle[LDLM_LOCKREQ_HANDLES];
};
-void lustre_swab_ldlm_request(struct ldlm_request *rq);
-
/* If LDLM_ENQUEUE, 1 slot is already occupied, 1 is available.
* Otherwise, 2 are available.
*/
@@ -2813,8 +2648,6 @@ struct ldlm_reply {
__u64 lock_policy_res2;
};
-void lustre_swab_ldlm_reply(struct ldlm_reply *r);
-
#define ldlm_flags_to_wire(flags) ((__u32)(flags))
#define ldlm_flags_from_wire(flags) ((__u64)(flags))
@@ -2858,8 +2691,6 @@ struct mgs_target_info {
char mti_params[MTI_PARAM_MAXLEN];
};
-void lustre_swab_mgs_target_info(struct mgs_target_info *oinfo);
-
struct mgs_nidtbl_entry {
__u64 mne_version; /* table version of this entry */
__u32 mne_instance; /* target instance # */
@@ -2874,8 +2705,6 @@ struct mgs_nidtbl_entry {
} u;
};
-void lustre_swab_mgs_nidtbl_entry(struct mgs_nidtbl_entry *oinfo);
-
struct mgs_config_body {
char mcb_name[MTI_NAME_MAXLEN]; /* logname */
__u64 mcb_offset; /* next index of config log to request */
@@ -2885,15 +2714,11 @@ struct mgs_config_body {
__u32 mcb_units; /* # of units for bulk transfer */
};
-void lustre_swab_mgs_config_body(struct mgs_config_body *body);
-
struct mgs_config_res {
__u64 mcr_offset; /* index of last config log */
__u64 mcr_size; /* size of the log */
};
-void lustre_swab_mgs_config_res(struct mgs_config_res *body);
-
/* Config marker flags (in config log) */
#define CM_START 0x01
#define CM_END 0x02
@@ -2913,8 +2738,6 @@ struct cfg_marker {
char cm_comment[MTI_NAME_MAXLEN];
};
-void lustre_swab_cfg_marker(struct cfg_marker *marker, int swab, int size);
-
/*
* Opcodes for multiple servers.
*/
@@ -2922,7 +2745,7 @@ void lustre_swab_cfg_marker(struct cfg_marker *marker, int swab, int size);
enum obd_cmd {
OBD_PING = 400,
OBD_LOG_CANCEL,
- OBD_QC_CALLBACK,
+ OBD_QC_CALLBACK, /* not used since 2.4 */
OBD_IDX_READ,
OBD_LAST_OPC
};
@@ -3155,23 +2978,32 @@ struct llog_gen_rec {
struct llog_rec_tail lgr_tail;
};
-/* On-disk header structure of each log object, stored in little endian order */
-#define LLOG_CHUNK_SIZE 8192
-#define LLOG_HEADER_SIZE (96)
-#define LLOG_BITMAP_BYTES (LLOG_CHUNK_SIZE - LLOG_HEADER_SIZE)
-
-#define LLOG_MIN_REC_SIZE (24) /* round(llog_rec_hdr + llog_rec_tail) */
-
/* flags for the logs */
enum llog_flag {
LLOG_F_ZAP_WHEN_EMPTY = 0x1,
LLOG_F_IS_CAT = 0x2,
LLOG_F_IS_PLAIN = 0x4,
LLOG_F_EXT_JOBID = BIT(3),
+ LLOG_F_IS_FIXSIZE = BIT(4),
+ /*
+ * Note: Flags covered by LLOG_F_EXT_MASK will be inherited from
+ * catlog to plain log, so do not add LLOG_F_IS_FIXSIZE here,
+ * because the catlog record is usually fixed size, but its plain
+ * log record can be variable
+ */
LLOG_F_EXT_MASK = LLOG_F_EXT_JOBID,
};
+/* On-disk header structure of each log object, stored in little endian order */
+#define LLOG_MIN_CHUNK_SIZE 8192
+#define LLOG_HEADER_SIZE (96) /* sizeof (llog_log_hdr) +
+ * sizeof(llh_tail) - sizeof(llh_bitmap)
+ */
+#define LLOG_BITMAP_BYTES (LLOG_MIN_CHUNK_SIZE - LLOG_HEADER_SIZE)
+#define LLOG_MIN_REC_SIZE (24) /* round(llog_rec_hdr + llog_rec_tail) */
+
+/* flags for the logs */
struct llog_log_hdr {
struct llog_rec_hdr llh_hdr;
__s64 llh_timestamp;
@@ -3183,13 +3015,30 @@ struct llog_log_hdr {
/* for a catalog the first plain slot is next to it */
struct obd_uuid llh_tgtuuid;
__u32 llh_reserved[LLOG_HEADER_SIZE / sizeof(__u32) - 23];
+ /* These fields must always be at the end of the llog_log_hdr.
+ * Note: llh_bitmap size is variable because llog chunk size could be
+ * bigger than LLOG_MIN_CHUNK_SIZE, i.e. sizeof(llog_log_hdr) > 8192
+ * bytes, and the real size is stored in llh_hdr.lrh_len, which means
+ * llh_tail should only be referred by LLOG_HDR_TAIL().
+ * But this structure is also used by client/server llog interface
+ * (see llog_client.c), it will be kept in its original way to avoid
+ * compatibility issue.
+ */
__u32 llh_bitmap[LLOG_BITMAP_BYTES / sizeof(__u32)];
struct llog_rec_tail llh_tail;
} __packed;
-#define LLOG_BITMAP_SIZE(llh) (__u32)((llh->llh_hdr.lrh_len - \
- llh->llh_bitmap_offset - \
- sizeof(llh->llh_tail)) * 8)
+#undef LLOG_HEADER_SIZE
+#undef LLOG_BITMAP_BYTES
+
+#define LLOG_HDR_BITMAP_SIZE(llh) (__u32)((llh->llh_hdr.lrh_len - \
+ llh->llh_bitmap_offset - \
+ sizeof(llh->llh_tail)) * 8)
+#define LLOG_HDR_BITMAP(llh) (__u32 *)((char *)(llh) + \
+ (llh)->llh_bitmap_offset)
+#define LLOG_HDR_TAIL(llh) ((struct llog_rec_tail *)((char *)llh + \
+ llh->llh_hdr.lrh_len - \
+ sizeof(llh->llh_tail)))
/** log cookies are used to reference a specific log file and a record
* therein
@@ -3259,7 +3108,8 @@ struct obdo {
__u32 o_parent_ver;
struct lustre_handle o_handle; /* brw: lock handle to prolong locks
*/
- struct llog_cookie o_lcookie; /* destroy: unlink cookie from MDS
+ struct llog_cookie o_lcookie; /* destroy: unlink cookie from MDS,
+ * obsolete in 2.8, reused in OSP
*/
__u32 o_uid_h;
__u32 o_gid_h;
@@ -3333,30 +3183,11 @@ struct ost_body {
/* Key for FIEMAP to be used in get_info calls */
struct ll_fiemap_info_key {
- char name[8];
- struct obdo oa;
- struct ll_user_fiemap fiemap;
+ char lfik_name[8];
+ struct obdo lfik_oa;
+ struct fiemap lfik_fiemap;
};
-void lustre_swab_ost_body(struct ost_body *b);
-void lustre_swab_ost_last_id(__u64 *id);
-void lustre_swab_fiemap(struct ll_user_fiemap *fiemap);
-
-void lustre_swab_lov_user_md_v1(struct lov_user_md_v1 *lum);
-void lustre_swab_lov_user_md_v3(struct lov_user_md_v3 *lum);
-void lustre_swab_lov_user_md_objects(struct lov_user_ost_data *lod,
- int stripe_count);
-void lustre_swab_lov_mds_md(struct lov_mds_md *lmm);
-
-/* llog_swab.c */
-void lustre_swab_llogd_body(struct llogd_body *d);
-void lustre_swab_llog_hdr(struct llog_log_hdr *h);
-void lustre_swab_llogd_conn_body(struct llogd_conn_body *d);
-void lustre_swab_llog_rec(struct llog_rec_hdr *rec);
-
-struct lustre_cfg;
-void lustre_swab_lustre_cfg(struct lustre_cfg *lcfg);
-
/* Functions for dumping PTLRPC fields */
void dump_rniobuf(struct niobuf_remote *rnb);
void dump_ioo(struct obd_ioobj *nb);
@@ -3394,8 +3225,6 @@ struct lustre_capa {
__u8 lc_hmac[CAPA_HMAC_MAX_LEN]; /** HMAC */
} __packed;
-void lustre_swab_lustre_capa(struct lustre_capa *c);
-
/** lustre_capa::lc_opc */
enum {
CAPA_OPC_BODY_WRITE = 1 << 0, /**< write object data */
@@ -3458,8 +3287,6 @@ struct getinfo_fid2path {
char gf_path[0];
} __packed;
-void lustre_swab_fid2path(struct getinfo_fid2path *gf);
-
/** path2parent request/reply structures */
struct getparent {
struct lu_fid gp_fid; /**< parent FID */
@@ -3486,8 +3313,6 @@ struct layout_intent {
__u64 li_end;
};
-void lustre_swab_layout_intent(struct layout_intent *li);
-
/**
* On the wire version of hsm_progress structure.
*
@@ -3506,13 +3331,6 @@ struct hsm_progress_kernel {
__u64 hpk_padding2;
} __packed;
-void lustre_swab_hsm_user_state(struct hsm_user_state *hus);
-void lustre_swab_hsm_current_action(struct hsm_current_action *action);
-void lustre_swab_hsm_progress_kernel(struct hsm_progress_kernel *hpk);
-void lustre_swab_hsm_user_state(struct hsm_user_state *hus);
-void lustre_swab_hsm_user_item(struct hsm_user_item *hui);
-void lustre_swab_hsm_request(struct hsm_request *hr);
-
/** layout swap request structure
* fid1 and fid2 are in mdt_body
*/
@@ -3520,8 +3338,6 @@ struct mdc_swap_layouts {
__u64 msl_flags;
} __packed;
-void lustre_swab_swap_layouts(struct mdc_swap_layouts *msl);
-
struct close_data {
struct lustre_handle cd_handle;
struct lu_fid cd_fid;
@@ -3529,7 +3345,5 @@ struct close_data {
__u64 cd_reserved[8];
};
-void lustre_swab_close_data(struct close_data *data);
-
#endif
/** @} lustreidl */
diff --git a/drivers/staging/lustre/lustre/include/lustre/lustre_ioctl.h b/drivers/staging/lustre/lustre/include/lustre/lustre_ioctl.h
index f3d7c94c3b50..eb08df33b2db 100644
--- a/drivers/staging/lustre/lustre/include/lustre/lustre_ioctl.h
+++ b/drivers/staging/lustre/lustre/include/lustre/lustre_ioctl.h
@@ -363,8 +363,8 @@ obd_ioctl_unpack(struct obd_ioctl_data *data, char *pbuf, int max_len)
/* OBD_IOC_LOV_GETSTRIPE 155 LL_IOC_LOV_GETSTRIPE */
/* OBD_IOC_LOV_SETEA 156 LL_IOC_LOV_SETEA */
/* lustre/lustre_user.h 157-159 */
-#define OBD_IOC_QUOTACHECK _IOW('f', 160, int)
-#define OBD_IOC_POLL_QUOTACHECK _IOR('f', 161, struct if_quotacheck *)
+/* OBD_IOC_QUOTACHECK _IOW('f', 160, int) */
+/* OBD_IOC_POLL_QUOTACHECK _IOR('f', 161, struct if_quotacheck *) */
#define OBD_IOC_QUOTACTL _IOWR('f', 162, struct if_quotactl)
/* lustre/lustre_user.h 163-176 */
#define OBD_IOC_CHANGELOG_REG _IOW('f', 177, struct obd_ioctl_data)
diff --git a/drivers/staging/lustre/lustre/include/lustre/lustre_user.h b/drivers/staging/lustre/lustre/include/lustre/lustre_user.h
index 6fc985571cba..3301ad652db1 100644
--- a/drivers/staging/lustre/lustre/include/lustre/lustre_user.h
+++ b/drivers/staging/lustre/lustre/include/lustre/lustre_user.h
@@ -63,9 +63,13 @@
#if __BITS_PER_LONG != 64 || defined(__ARCH_WANT_STAT64)
typedef struct stat64 lstat_t;
#define lstat_f lstat64
+#define fstat_f fstat64
+#define fstatat_f fstatat64
#else
typedef struct stat lstat_t;
#define lstat_f lstat
+#define fstat_f fstat
+#define fstatat_f fstatat
#endif
#define HAVE_LOV_USER_MDS_DATA
@@ -82,7 +86,6 @@ typedef struct stat lstat_t;
#define FSFILT_IOC_SETVERSION _IOW('f', 4, long)
#define FSFILT_IOC_GETVERSION_OLD _IOR('v', 1, long)
#define FSFILT_IOC_SETVERSION_OLD _IOW('v', 2, long)
-#define FSFILT_IOC_FIEMAP _IOWR('f', 11, struct ll_user_fiemap)
#endif
/* FIEMAP flags supported by Lustre */
@@ -235,7 +238,7 @@ struct ost_id {
/* #define LL_IOC_POLL_QUOTACHECK 161 OBD_IOC_POLL_QUOTACHECK */
/* #define LL_IOC_QUOTACTL 162 OBD_IOC_QUOTACTL */
#define IOC_OBD_STATFS _IOWR('f', 164, struct obd_statfs *)
-#define IOC_LOV_GETINFO _IOWR('f', 165, struct lov_user_mds_data *)
+/* IOC_LOV_GETINFO 165 obsolete */
#define LL_IOC_FLUSHCTX _IOW('f', 166, long)
/* LL_IOC_RMTACL 167 obsolete */
#define LL_IOC_GETOBDCOUNT _IOR('f', 168, long)
@@ -343,6 +346,9 @@ enum ll_lease_type {
#define LOV_ALL_STRIPES 0xffff /* only valid for directories */
#define LOV_V1_INSANE_STRIPE_COUNT 65532 /* maximum stripe count bz13933 */
+#define XATTR_LUSTRE_PREFIX "lustre."
+#define XATTR_LUSTRE_LOV "lustre.lov"
+
#define lov_user_ost_data lov_user_ost_data_v1
struct lov_user_ost_data_v1 { /* per-stripe data structure */
struct ost_id l_ost_oi; /* OST object ID */
@@ -451,8 +457,6 @@ static inline int lmv_user_md_size(int stripes, int lmm_magic)
stripes * sizeof(struct lmv_user_mds_data);
}
-void lustre_swab_lmv_user_md(struct lmv_user_md *lum);
-
struct ll_recreate_obj {
__u64 lrc_id;
__u32 lrc_ost_idx;
@@ -522,25 +526,20 @@ static inline void obd_uuid2fsname(char *buf, char *uuid, int buflen)
}
/* printf display format
- * e.g. printf("file FID is "DFID"\n", PFID(fid));
+ * * usage: printf("file FID is "DFID"\n", PFID(fid));
*/
#define FID_NOBRACE_LEN 40
#define FID_LEN (FID_NOBRACE_LEN + 2)
#define DFID_NOBRACE "%#llx:0x%x:0x%x"
#define DFID "["DFID_NOBRACE"]"
-#define PFID(fid) \
- (fid)->f_seq, \
- (fid)->f_oid, \
- (fid)->f_ver
+#define PFID(fid) (unsigned long long)(fid)->f_seq, (fid)->f_oid, (fid)->f_ver
-/* scanf input parse format -- strip '[' first.
- * e.g. sscanf(fidstr, SFID, RFID(&fid));
+/* scanf input parse format for fids in DFID_NOBRACE format
+ * Need to strip '[' from DFID format first or use "["SFID"]" at caller.
+ * usage: sscanf(fidstr, SFID, RFID(&fid));
*/
#define SFID "0x%llx:0x%x:0x%x"
-#define RFID(fid) \
- &((fid)->f_seq), \
- &((fid)->f_oid), \
- &((fid)->f_ver)
+#define RFID(fid) &((fid)->f_seq), &((fid)->f_oid), &((fid)->f_ver)
/********* Quotas **********/
@@ -551,23 +550,18 @@ static inline void obd_uuid2fsname(char *buf, char *uuid, int buflen)
#define Q_FINVALIDATE 0x800104 /* deprecated as of 2.4 */
/* these must be explicitly translated into linux Q_* in ll_dir_ioctl */
-#define LUSTRE_Q_QUOTAON 0x800002 /* turn quotas on */
-#define LUSTRE_Q_QUOTAOFF 0x800003 /* turn quotas off */
+#define LUSTRE_Q_QUOTAON 0x800002 /* deprecated as of 2.4 */
+#define LUSTRE_Q_QUOTAOFF 0x800003 /* deprecated as of 2.4 */
#define LUSTRE_Q_GETINFO 0x800005 /* get information about quota files */
#define LUSTRE_Q_SETINFO 0x800006 /* set information about quota files */
#define LUSTRE_Q_GETQUOTA 0x800007 /* get user quota structure */
#define LUSTRE_Q_SETQUOTA 0x800008 /* set user quota structure */
/* lustre-specific control commands */
-#define LUSTRE_Q_INVALIDATE 0x80000b /* invalidate quota data */
-#define LUSTRE_Q_FINVALIDATE 0x80000c /* invalidate filter quota data */
+#define LUSTRE_Q_INVALIDATE 0x80000b /* deprecated as of 2.4 */
+#define LUSTRE_Q_FINVALIDATE 0x80000c /* deprecated as of 2.4 */
#define UGQUOTA 2 /* set both USRQUOTA and GRPQUOTA */
-struct if_quotacheck {
- char obd_type[16];
- struct obd_uuid obd_uuid;
-};
-
#define IDENTITY_DOWNCALL_MAGIC 0x6d6dd629
/* permission */
@@ -649,6 +643,7 @@ struct if_quotactl {
#define SWAP_LAYOUTS_CHECK_DV2 (1 << 1)
#define SWAP_LAYOUTS_KEEP_MTIME (1 << 2)
#define SWAP_LAYOUTS_KEEP_ATIME (1 << 3)
+#define SWAP_LAYOUTS_CLOSE BIT(4)
/* Swap XATTR_NAME_HSM as well, only on the MDT so far */
#define SWAP_LAYOUTS_MDS_HSM (1 << 31)
@@ -999,6 +994,7 @@ struct ioc_data_version {
* See HSM_FLAGS below.
*/
enum hsm_states {
+ HS_NONE = 0x00000000,
HS_EXISTS = 0x00000001,
HS_DIRTY = 0x00000002,
HS_RELEASED = 0x00000004,
diff --git a/drivers/staging/lustre/lustre/include/lustre_compat.h b/drivers/staging/lustre/lustre/include/lustre_compat.h
index 567c438e93cb..300e96fb032a 100644
--- a/drivers/staging/lustre/lustre/include/lustre_compat.h
+++ b/drivers/staging/lustre/lustre/include/lustre_compat.h
@@ -74,4 +74,6 @@
# define ext2_find_next_zero_bit find_next_zero_bit_le
#endif
+#define TIMES_SET_FLAGS (ATTR_MTIME_SET | ATTR_ATIME_SET | ATTR_TIMES_SET)
+
#endif /* _LUSTRE_COMPAT_H */
diff --git a/drivers/staging/lustre/lustre/include/lustre_dlm.h b/drivers/staging/lustre/lustre/include/lustre_dlm.h
index d03534432624..b7e61d082e55 100644
--- a/drivers/staging/lustre/lustre/include/lustre_dlm.h
+++ b/drivers/staging/lustre/lustre/include/lustre_dlm.h
@@ -59,7 +59,7 @@ struct obd_device;
#define OBD_LDLM_DEVICENAME "ldlm"
#define LDLM_DEFAULT_LRU_SIZE (100 * num_online_cpus())
-#define LDLM_DEFAULT_MAX_ALIVE (cfs_time_seconds(36000))
+#define LDLM_DEFAULT_MAX_ALIVE (cfs_time_seconds(3900)) /* 65 min */
#define LDLM_DEFAULT_PARALLEL_AST_LIMIT 1024
/**
@@ -86,10 +86,10 @@ enum ldlm_error {
* decisions about lack of conflicts or do any autonomous lock granting without
* first speaking to a server.
*/
-typedef enum {
+enum ldlm_side {
LDLM_NAMESPACE_SERVER = 1 << 0,
LDLM_NAMESPACE_CLIENT = 1 << 1
-} ldlm_side_t;
+};
/**
* The blocking callback is overloaded to perform two functions. These flags
@@ -359,7 +359,7 @@ struct ldlm_namespace {
struct obd_device *ns_obd;
/** Flag indicating if namespace is on client instead of server */
- ldlm_side_t ns_client;
+ enum ldlm_side ns_client;
/** Resource hash table for namespace. */
struct cfs_hash *ns_rs_hash;
@@ -550,20 +550,18 @@ struct ldlm_flock {
__u64 owner;
__u64 blocking_owner;
struct obd_export *blocking_export;
- /* Protected by the hash lock */
- __u32 blocking_refs;
__u32 pid;
};
-typedef union {
+union ldlm_policy_data {
struct ldlm_extent l_extent;
struct ldlm_flock l_flock;
struct ldlm_inodebits l_inodebits;
-} ldlm_policy_data_t;
+};
void ldlm_convert_policy_to_local(struct obd_export *exp, enum ldlm_type type,
- const ldlm_wire_policy_data_t *wpolicy,
- ldlm_policy_data_t *lpolicy);
+ const union ldlm_wire_policy_data *wpolicy,
+ union ldlm_policy_data *lpolicy);
enum lvb_type {
LVB_T_NONE = 0,
@@ -692,7 +690,7 @@ struct ldlm_lock {
* Representation of private data specific for a lock type.
* Examples are: extent range for extent lock or bitmask for ibits locks
*/
- ldlm_policy_data_t l_policy_data;
+ union ldlm_policy_data l_policy_data;
/**
* Lock state flags. Protected by lr_lock.
@@ -967,8 +965,8 @@ struct ldlm_ast_work {
* Common ldlm_enqueue parameters
*/
struct ldlm_enqueue_info {
- __u32 ei_type; /** Type of the lock being enqueued. */
- __u32 ei_mode; /** Mode of the lock being enqueued. */
+ enum ldlm_type ei_type; /** Type of the lock being enqueued. */
+ enum ldlm_mode ei_mode; /** Mode of the lock being enqueued. */
void *ei_cb_bl; /** blocking lock callback */
void *ei_cb_cp; /** lock completion callback */
void *ei_cb_gl; /** lock glimpse callback */
@@ -979,7 +977,7 @@ struct ldlm_enqueue_info {
extern struct obd_ops ldlm_obd_ops;
extern char *ldlm_lockname[];
-char *ldlm_it2str(int it);
+const char *ldlm_it2str(enum ldlm_intent_flags it);
/**
* Just a fancy CDEBUG call with log level preset to LDLM_DEBUG.
@@ -1168,16 +1166,18 @@ do { \
struct ldlm_lock *ldlm_lock_get(struct ldlm_lock *lock);
void ldlm_lock_put(struct ldlm_lock *lock);
void ldlm_lock2desc(struct ldlm_lock *lock, struct ldlm_lock_desc *desc);
-void ldlm_lock_addref(const struct lustre_handle *lockh, __u32 mode);
-int ldlm_lock_addref_try(const struct lustre_handle *lockh, __u32 mode);
-void ldlm_lock_decref(const struct lustre_handle *lockh, __u32 mode);
-void ldlm_lock_decref_and_cancel(const struct lustre_handle *lockh, __u32 mode);
+void ldlm_lock_addref(const struct lustre_handle *lockh, enum ldlm_mode mode);
+int ldlm_lock_addref_try(const struct lustre_handle *lockh,
+ enum ldlm_mode mode);
+void ldlm_lock_decref(const struct lustre_handle *lockh, enum ldlm_mode mode);
+void ldlm_lock_decref_and_cancel(const struct lustre_handle *lockh,
+ enum ldlm_mode mode);
void ldlm_lock_fail_match_locked(struct ldlm_lock *lock);
void ldlm_lock_allow_match(struct ldlm_lock *lock);
void ldlm_lock_allow_match_locked(struct ldlm_lock *lock);
enum ldlm_mode ldlm_lock_match(struct ldlm_namespace *ns, __u64 flags,
const struct ldlm_res_id *,
- enum ldlm_type type, ldlm_policy_data_t *,
+ enum ldlm_type type, union ldlm_policy_data *,
enum ldlm_mode mode, struct lustre_handle *,
int unref);
enum ldlm_mode ldlm_revalidate_lock_handle(const struct lustre_handle *lockh,
@@ -1189,7 +1189,7 @@ void ldlm_unlink_lock_skiplist(struct ldlm_lock *req);
/* resource.c */
struct ldlm_namespace *
ldlm_namespace_new(struct obd_device *obd, char *name,
- ldlm_side_t client, enum ldlm_appetite apt,
+ enum ldlm_side client, enum ldlm_appetite apt,
enum ldlm_ns_type ns_type);
int ldlm_namespace_cleanup(struct ldlm_namespace *ns, __u64 flags);
void ldlm_namespace_get(struct ldlm_namespace *ns);
@@ -1208,7 +1208,7 @@ void ldlm_resource_add_lock(struct ldlm_resource *res,
struct ldlm_lock *lock);
void ldlm_resource_unlink_lock(struct ldlm_lock *lock);
void ldlm_res2desc(struct ldlm_resource *res, struct ldlm_resource_desc *desc);
-void ldlm_dump_all_namespaces(ldlm_side_t client, int level);
+void ldlm_dump_all_namespaces(enum ldlm_side client, int level);
void ldlm_namespace_dump(int level, struct ldlm_namespace *);
void ldlm_resource_dump(int level, struct ldlm_resource *);
int ldlm_lock_change_resource(struct ldlm_namespace *, struct ldlm_lock *,
@@ -1241,7 +1241,7 @@ int ldlm_completion_ast(struct ldlm_lock *lock, __u64 flags, void *data);
int ldlm_cli_enqueue(struct obd_export *exp, struct ptlrpc_request **reqp,
struct ldlm_enqueue_info *einfo,
const struct ldlm_res_id *res_id,
- ldlm_policy_data_t const *policy, __u64 *flags,
+ union ldlm_policy_data const *policy, __u64 *flags,
void *lvb, __u32 lvb_len, enum lvb_type lvb_type,
struct lustre_handle *lockh, int async);
int ldlm_prep_enqueue_req(struct obd_export *exp,
@@ -1265,13 +1265,13 @@ int ldlm_cli_cancel_unused(struct ldlm_namespace *, const struct ldlm_res_id *,
enum ldlm_cancel_flags flags, void *opaque);
int ldlm_cli_cancel_unused_resource(struct ldlm_namespace *ns,
const struct ldlm_res_id *res_id,
- ldlm_policy_data_t *policy,
+ union ldlm_policy_data *policy,
enum ldlm_mode mode,
enum ldlm_cancel_flags flags,
void *opaque);
int ldlm_cancel_resource_local(struct ldlm_resource *res,
struct list_head *cancels,
- ldlm_policy_data_t *policy,
+ union ldlm_policy_data *policy,
enum ldlm_mode mode, __u64 lock_flags,
enum ldlm_cancel_flags cancel_flags,
void *opaque);
@@ -1333,7 +1333,7 @@ int ldlm_pools_init(void);
void ldlm_pools_fini(void);
int ldlm_pool_init(struct ldlm_pool *pl, struct ldlm_namespace *ns,
- int idx, ldlm_side_t client);
+ int idx, enum ldlm_side client);
void ldlm_pool_fini(struct ldlm_pool *pl);
void ldlm_pool_add(struct ldlm_pool *pl, struct ldlm_lock *lock);
void ldlm_pool_del(struct ldlm_pool *pl, struct ldlm_lock *lock);
diff --git a/drivers/staging/lustre/lustre/include/lustre_fid.h b/drivers/staging/lustre/lustre/include/lustre_fid.h
index 316780693193..b5a1aadbcb93 100644
--- a/drivers/staging/lustre/lustre/include/lustre_fid.h
+++ b/drivers/staging/lustre/lustre/include/lustre_fid.h
@@ -150,6 +150,7 @@
#include "../../include/linux/libcfs/libcfs.h"
#include "lustre/lustre_idl.h"
+#include "seq_range.h"
struct lu_env;
struct lu_site;
diff --git a/drivers/staging/lustre/lustre/include/lustre_fld.h b/drivers/staging/lustre/lustre/include/lustre_fld.h
index 932410d3e3cc..6ef1b03cb986 100644
--- a/drivers/staging/lustre/lustre/include/lustre_fld.h
+++ b/drivers/staging/lustre/lustre/include/lustre_fld.h
@@ -103,8 +103,6 @@ struct lu_client_fld {
/** Client fld debugfs entry name. */
char lcf_name[LUSTRE_MDT_MAXNAMELEN];
-
- int lcf_flags;
};
/* Client methods */
diff --git a/drivers/staging/lustre/lustre/include/lustre_ha.h b/drivers/staging/lustre/lustre/include/lustre_ha.h
index cde7ed702c86..dec1e99d594d 100644
--- a/drivers/staging/lustre/lustre/include/lustre_ha.h
+++ b/drivers/staging/lustre/lustre/include/lustre_ha.h
@@ -53,6 +53,7 @@ void ptlrpc_activate_import(struct obd_import *imp);
void ptlrpc_deactivate_import(struct obd_import *imp);
void ptlrpc_invalidate_import(struct obd_import *imp);
void ptlrpc_fail_import(struct obd_import *imp, __u32 conn_cnt);
+void ptlrpc_pinger_force(struct obd_import *imp);
/** @} ha */
diff --git a/drivers/staging/lustre/lustre/include/lustre_import.h b/drivers/staging/lustre/lustre/include/lustre_import.h
index 5461ba33d90c..f0c931ce1a67 100644
--- a/drivers/staging/lustre/lustre/include/lustre_import.h
+++ b/drivers/staging/lustre/lustre/include/lustre_import.h
@@ -185,6 +185,11 @@ struct obd_import {
struct list_head *imp_replay_cursor;
/** @} */
+ /** List of not replied requests */
+ struct list_head imp_unreplied_list;
+ /** Known maximal replied XID */
+ __u64 imp_known_replied_xid;
+
/** obd device for this import */
struct obd_device *imp_obd;
@@ -294,7 +299,9 @@ struct obd_import {
*/
imp_force_reconnect:1,
/* import has tried to connect with server */
- imp_connect_tried:1;
+ imp_connect_tried:1,
+ /* connected but not FULL yet */
+ imp_connected:1;
__u32 imp_connect_op;
struct obd_connect_data imp_connect_data;
__u64 imp_connect_flags_orig;
diff --git a/drivers/staging/lustre/lustre/include/lustre_lib.h b/drivers/staging/lustre/lustre/include/lustre_lib.h
index 6b231913ba2e..27f3148c4344 100644
--- a/drivers/staging/lustre/lustre/include/lustre_lib.h
+++ b/drivers/staging/lustre/lustre/include/lustre_lib.h
@@ -350,8 +350,6 @@ do { \
l_wait_event_exclusive_head(wq, condition, &lwi); \
})
-#define LIBLUSTRE_CLIENT (0)
-
/** @} lib */
#endif /* _LUSTRE_LIB_H */
diff --git a/drivers/staging/lustre/lustre/include/lustre_lmv.h b/drivers/staging/lustre/lustre/include/lustre_lmv.h
index d7f7afa8dfa7..5aa3645e64dc 100644
--- a/drivers/staging/lustre/lustre/include/lustre_lmv.h
+++ b/drivers/staging/lustre/lustre/include/lustre_lmv.h
@@ -76,18 +76,7 @@ lsm_md_eq(const struct lmv_stripe_md *lsm1, const struct lmv_stripe_md *lsm2)
union lmv_mds_md;
-int lmv_unpack_md(struct obd_export *exp, struct lmv_stripe_md **lsmp,
- const union lmv_mds_md *lmm, int stripe_count);
-
-static inline int lmv_alloc_memmd(struct lmv_stripe_md **lsmp, int stripe_count)
-{
- return lmv_unpack_md(NULL, lsmp, NULL, stripe_count);
-}
-
-static inline void lmv_free_memmd(struct lmv_stripe_md *lsm)
-{
- lmv_unpack_md(NULL, &lsm, NULL, 0);
-}
+void lmv_free_memmd(struct lmv_stripe_md *lsm);
static inline void lmv1_le_to_cpu(struct lmv_mds_md_v1 *lmv_dst,
const struct lmv_mds_md_v1 *lmv_src)
diff --git a/drivers/staging/lustre/lustre/include/lustre_log.h b/drivers/staging/lustre/lustre/include/lustre_log.h
index 995b266932e3..35e37eb1bc2c 100644
--- a/drivers/staging/lustre/lustre/include/lustre_log.h
+++ b/drivers/staging/lustre/lustre/include/lustre_log.h
@@ -214,6 +214,7 @@ struct llog_handle {
spinlock_t lgh_hdr_lock; /* protect lgh_hdr data */
struct llog_logid lgh_id; /* id of this log */
struct llog_log_hdr *lgh_hdr;
+ size_t lgh_hdr_size;
int lgh_last_idx;
int lgh_cur_idx; /* used during llog_process */
__u64 lgh_cur_offset; /* used during llog_process */
@@ -244,6 +245,11 @@ struct llog_ctxt {
struct mutex loc_mutex; /* protect loc_imp */
atomic_t loc_refcount;
long loc_flags; /* flags, see above defines */
+ /*
+ * llog chunk size, and llog record size can not be bigger than
+ * loc_chunk_size
+ */
+ __u32 loc_chunk_size;
};
#define LLOG_PROC_BREAK 0x0001
diff --git a/drivers/staging/lustre/lustre/include/lustre_mdc.h b/drivers/staging/lustre/lustre/include/lustre_mdc.h
index 8fc2d3f2dfd6..198ceb0c66f9 100644
--- a/drivers/staging/lustre/lustre/include/lustre_mdc.h
+++ b/drivers/staging/lustre/lustre/include/lustre_mdc.h
@@ -156,16 +156,39 @@ static inline void mdc_put_rpc_lock(struct mdc_rpc_lock *lck,
mutex_unlock(&lck->rpcl_mutex);
}
+static inline void mdc_get_mod_rpc_slot(struct ptlrpc_request *req,
+ struct lookup_intent *it)
+{
+ struct client_obd *cli = &req->rq_import->imp_obd->u.cli;
+ u32 opc;
+ u16 tag;
+
+ opc = lustre_msg_get_opc(req->rq_reqmsg);
+ tag = obd_get_mod_rpc_slot(cli, opc, it);
+ lustre_msg_set_tag(req->rq_reqmsg, tag);
+}
+
+static inline void mdc_put_mod_rpc_slot(struct ptlrpc_request *req,
+ struct lookup_intent *it)
+{
+ struct client_obd *cli = &req->rq_import->imp_obd->u.cli;
+ u32 opc;
+ u16 tag;
+
+ opc = lustre_msg_get_opc(req->rq_reqmsg);
+ tag = lustre_msg_get_tag(req->rq_reqmsg);
+ obd_put_mod_rpc_slot(cli, opc, it, tag);
+}
+
/**
- * Update the maximum possible easize and cookiesize.
+ * Update the maximum possible easize.
*
- * The values are learned from ptlrpc replies sent by the MDT. The
- * default easize and cookiesize is initialized to the minimum value but
- * allowed to grow up to a single page in size if required to handle the
+ * This value is learned from ptlrpc replies sent by the MDT. The
+ * default easize is initialized to the minimum value but allowed
+ * to grow up to a single page in size if required to handle the
* common case.
*
- * \see client_obd::cl_default_mds_easize and
- * client_obd::cl_default_mds_cookiesize
+ * \see client_obd::cl_default_mds_easize
*
* \param[in] exp export for MDC device
* \param[in] body body of ptlrpc reply from MDT
@@ -176,7 +199,7 @@ static inline void mdc_update_max_ea_from_body(struct obd_export *exp,
{
if (body->mbo_valid & OBD_MD_FLMODEASIZE) {
struct client_obd *cli = &exp->exp_obd->u.cli;
- u32 def_cookiesize, def_easize;
+ u32 def_easize;
if (cli->cl_max_mds_easize < body->mbo_max_mdsize)
cli->cl_max_mds_easize = body->mbo_max_mdsize;
@@ -184,13 +207,6 @@ static inline void mdc_update_max_ea_from_body(struct obd_export *exp,
def_easize = min_t(__u32, body->mbo_max_mdsize,
OBD_MAX_DEFAULT_EA_SIZE);
cli->cl_default_mds_easize = def_easize;
-
- if (cli->cl_max_mds_cookiesize < body->mbo_max_cookiesize)
- cli->cl_max_mds_cookiesize = body->mbo_max_cookiesize;
-
- def_cookiesize = min_t(__u32, body->mbo_max_cookiesize,
- OBD_MAX_DEFAULT_COOKIE_SIZE);
- cli->cl_default_mds_cookiesize = def_cookiesize;
}
}
diff --git a/drivers/staging/lustre/lustre/include/lustre_net.h b/drivers/staging/lustre/lustre/include/lustre_net.h
index e9aba99ee52a..411eb0dc7f38 100644
--- a/drivers/staging/lustre/lustre/include/lustre_net.h
+++ b/drivers/staging/lustre/lustre/include/lustre_net.h
@@ -50,6 +50,7 @@
* @{
*/
+#include <linux/uio.h>
#include "../../include/linux/libcfs/libcfs.h"
#include "../../include/linux/lnet/nidstr.h"
#include "../../include/linux/lnet/api.h"
@@ -68,13 +69,17 @@
#define PTLRPC_MD_OPTIONS 0
/**
- * Max # of bulk operations in one request.
+ * log2 max # of bulk operations in one request: 2=4MB/RPC, 5=32MB/RPC, ...
* In order for the client and server to properly negotiate the maximum
* possible transfer size, PTLRPC_BULK_OPS_COUNT must be a power-of-two
* value. The client is free to limit the actual RPC size for any bulk
* transfer via cl_max_pages_per_rpc to some non-power-of-two value.
+ * NOTE: This is limited to 16 (=64GB RPCs) by IOOBJ_MAX_BRW_BITS.
*/
-#define PTLRPC_BULK_OPS_BITS 2
+#define PTLRPC_BULK_OPS_BITS 4
+#if PTLRPC_BULK_OPS_BITS > 16
+#error "More than 65536 BRW RPCs not allowed by IOOBJ_MAX_BRW_BITS."
+#endif
#define PTLRPC_BULK_OPS_COUNT (1U << PTLRPC_BULK_OPS_BITS)
/**
* PTLRPC_BULK_OPS_MASK is for the convenience of the client only, and
@@ -437,6 +442,10 @@ struct ptlrpc_reply_state {
unsigned long rs_committed:1;/* the transaction was committed
* and the rs was dispatched
*/
+ atomic_t rs_refcount; /* number of users */
+ /** Number of locks awaiting client ACK */
+ int rs_nlocks;
+
/** Size of the state */
int rs_size;
/** opcode */
@@ -449,7 +458,6 @@ struct ptlrpc_reply_state {
struct ptlrpc_service_part *rs_svcpt;
/** Lnet metadata handle for the reply */
lnet_handle_md_t rs_md_h;
- atomic_t rs_refcount;
/** Context for the service thread */
struct ptlrpc_svc_ctx *rs_svc_ctx;
@@ -466,8 +474,6 @@ struct ptlrpc_reply_state {
*/
struct lustre_msg *rs_msg; /* reply message */
- /** Number of locks awaiting client ACK */
- int rs_nlocks;
/** Handles of locks awaiting client reply ACK */
struct lustre_handle rs_locks[RS_MAX_LOCKS];
/** Lock modes of locks in \a rs_locks */
@@ -515,717 +521,7 @@ struct lu_env;
struct ldlm_lock;
-/**
- * \defgroup nrs Network Request Scheduler
- * @{
- */
-struct ptlrpc_nrs_policy;
-struct ptlrpc_nrs_resource;
-struct ptlrpc_nrs_request;
-
-/**
- * NRS control operations.
- *
- * These are common for all policies.
- */
-enum ptlrpc_nrs_ctl {
- /**
- * Not a valid opcode.
- */
- PTLRPC_NRS_CTL_INVALID,
- /**
- * Activate the policy.
- */
- PTLRPC_NRS_CTL_START,
- /**
- * Reserved for multiple primary policies, which may be a possibility
- * in the future.
- */
- PTLRPC_NRS_CTL_STOP,
- /**
- * Policies can start using opcodes from this value and onwards for
- * their own purposes; the assigned value itself is arbitrary.
- */
- PTLRPC_NRS_CTL_1ST_POL_SPEC = 0x20,
-};
-
-/**
- * ORR policy operations
- */
-enum nrs_ctl_orr {
- NRS_CTL_ORR_RD_QUANTUM = PTLRPC_NRS_CTL_1ST_POL_SPEC,
- NRS_CTL_ORR_WR_QUANTUM,
- NRS_CTL_ORR_RD_OFF_TYPE,
- NRS_CTL_ORR_WR_OFF_TYPE,
- NRS_CTL_ORR_RD_SUPP_REQ,
- NRS_CTL_ORR_WR_SUPP_REQ,
-};
-
-/**
- * NRS policy operations.
- *
- * These determine the behaviour of a policy, and are called in response to
- * NRS core events.
- */
-struct ptlrpc_nrs_pol_ops {
- /**
- * Called during policy registration; this operation is optional.
- *
- * \param[in,out] policy The policy being initialized
- */
- int (*op_policy_init)(struct ptlrpc_nrs_policy *policy);
- /**
- * Called during policy unregistration; this operation is optional.
- *
- * \param[in,out] policy The policy being unregistered/finalized
- */
- void (*op_policy_fini)(struct ptlrpc_nrs_policy *policy);
- /**
- * Called when activating a policy via lprocfs; policies allocate and
- * initialize their resources here; this operation is optional.
- *
- * \param[in,out] policy The policy being started
- *
- * \see nrs_policy_start_locked()
- */
- int (*op_policy_start)(struct ptlrpc_nrs_policy *policy);
- /**
- * Called when deactivating a policy via lprocfs; policies deallocate
- * their resources here; this operation is optional
- *
- * \param[in,out] policy The policy being stopped
- *
- * \see nrs_policy_stop0()
- */
- void (*op_policy_stop)(struct ptlrpc_nrs_policy *policy);
- /**
- * Used for policy-specific operations; i.e. not generic ones like
- * \e PTLRPC_NRS_CTL_START and \e PTLRPC_NRS_CTL_GET_INFO; analogous
- * to an ioctl; this operation is optional.
- *
- * \param[in,out] policy The policy carrying out operation \a opc
- * \param[in] opc The command operation being carried out
- * \param[in,out] arg An generic buffer for communication between the
- * user and the control operation
- *
- * \retval -ve error
- * \retval 0 success
- *
- * \see ptlrpc_nrs_policy_control()
- */
- int (*op_policy_ctl)(struct ptlrpc_nrs_policy *policy,
- enum ptlrpc_nrs_ctl opc, void *arg);
-
- /**
- * Called when obtaining references to the resources of the resource
- * hierarchy for a request that has arrived for handling at the PTLRPC
- * service. Policies should return -ve for requests they do not wish
- * to handle. This operation is mandatory.
- *
- * \param[in,out] policy The policy we're getting resources for.
- * \param[in,out] nrq The request we are getting resources for.
- * \param[in] parent The parent resource of the resource being
- * requested; set to NULL if none.
- * \param[out] resp The resource is to be returned here; the
- * fallback policy in an NRS head should
- * \e always return a non-NULL pointer value.
- * \param[in] moving_req When set, signifies that this is an attempt
- * to obtain resources for a request being moved
- * to the high-priority NRS head by
- * ldlm_lock_reorder_req().
- * This implies two things:
- * 1. We are under obd_export::exp_rpc_lock and
- * so should not sleep.
- * 2. We should not perform non-idempotent or can
- * skip performing idempotent operations that
- * were carried out when resources were first
- * taken for the request when it was initialized
- * in ptlrpc_nrs_req_initialize().
- *
- * \retval 0, +ve The level of the returned resource in the resource
- * hierarchy; currently only 0 (for a non-leaf resource)
- * and 1 (for a leaf resource) are supported by the
- * framework.
- * \retval -ve error
- *
- * \see ptlrpc_nrs_req_initialize()
- * \see ptlrpc_nrs_hpreq_add_nolock()
- */
- int (*op_res_get)(struct ptlrpc_nrs_policy *policy,
- struct ptlrpc_nrs_request *nrq,
- const struct ptlrpc_nrs_resource *parent,
- struct ptlrpc_nrs_resource **resp,
- bool moving_req);
- /**
- * Called when releasing references taken for resources in the resource
- * hierarchy for the request; this operation is optional.
- *
- * \param[in,out] policy The policy the resource belongs to
- * \param[in] res The resource to be freed
- *
- * \see ptlrpc_nrs_req_finalize()
- * \see ptlrpc_nrs_hpreq_add_nolock()
- */
- void (*op_res_put)(struct ptlrpc_nrs_policy *policy,
- const struct ptlrpc_nrs_resource *res);
-
- /**
- * Obtains a request for handling from the policy, and optionally
- * removes the request from the policy; this operation is mandatory.
- *
- * \param[in,out] policy The policy to poll
- * \param[in] peek When set, signifies that we just want to
- * examine the request, and not handle it, so the
- * request is not removed from the policy.
- * \param[in] force When set, it will force a policy to return a
- * request if it has one queued.
- *
- * \retval NULL No request available for handling
- * \retval valid-pointer The request polled for handling
- *
- * \see ptlrpc_nrs_req_get_nolock()
- */
- struct ptlrpc_nrs_request *
- (*op_req_get)(struct ptlrpc_nrs_policy *policy, bool peek,
- bool force);
- /**
- * Called when attempting to add a request to a policy for later
- * handling; this operation is mandatory.
- *
- * \param[in,out] policy The policy on which to enqueue \a nrq
- * \param[in,out] nrq The request to enqueue
- *
- * \retval 0 success
- * \retval != 0 error
- *
- * \see ptlrpc_nrs_req_add_nolock()
- */
- int (*op_req_enqueue)(struct ptlrpc_nrs_policy *policy,
- struct ptlrpc_nrs_request *nrq);
- /**
- * Removes a request from the policy's set of pending requests. Normally
- * called after a request has been polled successfully from the policy
- * for handling; this operation is mandatory.
- *
- * \param[in,out] policy The policy the request \a nrq belongs to
- * \param[in,out] nrq The request to dequeue
- */
- void (*op_req_dequeue)(struct ptlrpc_nrs_policy *policy,
- struct ptlrpc_nrs_request *nrq);
- /**
- * Called after the request being carried out. Could be used for
- * job/resource control; this operation is optional.
- *
- * \param[in,out] policy The policy which is stopping to handle request
- * \a nrq
- * \param[in,out] nrq The request
- *
- * \pre assert_spin_locked(&svcpt->scp_req_lock)
- *
- * \see ptlrpc_nrs_req_stop_nolock()
- */
- void (*op_req_stop)(struct ptlrpc_nrs_policy *policy,
- struct ptlrpc_nrs_request *nrq);
- /**
- * Registers the policy's lprocfs interface with a PTLRPC service.
- *
- * \param[in] svc The service
- *
- * \retval 0 success
- * \retval != 0 error
- */
- int (*op_lprocfs_init)(struct ptlrpc_service *svc);
- /**
- * Unegisters the policy's lprocfs interface with a PTLRPC service.
- *
- * In cases of failed policy registration in
- * \e ptlrpc_nrs_policy_register(), this function may be called for a
- * service which has not registered the policy successfully, so
- * implementations of this method should make sure their operations are
- * safe in such cases.
- *
- * \param[in] svc The service
- */
- void (*op_lprocfs_fini)(struct ptlrpc_service *svc);
-};
-
-/**
- * Policy flags
- */
-enum nrs_policy_flags {
- /**
- * Fallback policy, use this flag only on a single supported policy per
- * service. The flag cannot be used on policies that use
- * \e PTLRPC_NRS_FL_REG_EXTERN
- */
- PTLRPC_NRS_FL_FALLBACK = (1 << 0),
- /**
- * Start policy immediately after registering.
- */
- PTLRPC_NRS_FL_REG_START = (1 << 1),
- /**
- * This is a policy registering from a module different to the one NRS
- * core ships in (currently ptlrpc).
- */
- PTLRPC_NRS_FL_REG_EXTERN = (1 << 2),
-};
-
-/**
- * NRS queue type.
- *
- * Denotes whether an NRS instance is for handling normal or high-priority
- * RPCs, or whether an operation pertains to one or both of the NRS instances
- * in a service.
- */
-enum ptlrpc_nrs_queue_type {
- PTLRPC_NRS_QUEUE_REG = (1 << 0),
- PTLRPC_NRS_QUEUE_HP = (1 << 1),
- PTLRPC_NRS_QUEUE_BOTH = (PTLRPC_NRS_QUEUE_REG | PTLRPC_NRS_QUEUE_HP)
-};
-
-/**
- * NRS head
- *
- * A PTLRPC service has at least one NRS head instance for handling normal
- * priority RPCs, and may optionally have a second NRS head instance for
- * handling high-priority RPCs. Each NRS head maintains a list of available
- * policies, of which one and only one policy is acting as the fallback policy,
- * and optionally a different policy may be acting as the primary policy. For
- * all RPCs handled by this NRS head instance, NRS core will first attempt to
- * enqueue the RPC using the primary policy (if any). The fallback policy is
- * used in the following cases:
- * - when there was no primary policy in the
- * ptlrpc_nrs_pol_state::NRS_POL_STATE_STARTED state at the time the request
- * was initialized.
- * - when the primary policy that was at the
- * ptlrpc_nrs_pol_state::PTLRPC_NRS_POL_STATE_STARTED state at the time the
- * RPC was initialized, denoted it did not wish, or for some other reason was
- * not able to handle the request, by returning a non-valid NRS resource
- * reference.
- * - when the primary policy that was at the
- * ptlrpc_nrs_pol_state::PTLRPC_NRS_POL_STATE_STARTED state at the time the
- * RPC was initialized, fails later during the request enqueueing stage.
- *
- * \see nrs_resource_get_safe()
- * \see nrs_request_enqueue()
- */
-struct ptlrpc_nrs {
- spinlock_t nrs_lock;
- /** XXX Possibly replace svcpt->scp_req_lock with another lock here. */
- /**
- * List of registered policies
- */
- struct list_head nrs_policy_list;
- /**
- * List of policies with queued requests. Policies that have any
- * outstanding requests are queued here, and this list is queried
- * in a round-robin manner from NRS core when obtaining a request
- * for handling. This ensures that requests from policies that at some
- * point transition away from the
- * ptlrpc_nrs_pol_state::NRS_POL_STATE_STARTED state are drained.
- */
- struct list_head nrs_policy_queued;
- /**
- * Service partition for this NRS head
- */
- struct ptlrpc_service_part *nrs_svcpt;
- /**
- * Primary policy, which is the preferred policy for handling RPCs
- */
- struct ptlrpc_nrs_policy *nrs_policy_primary;
- /**
- * Fallback policy, which is the backup policy for handling RPCs
- */
- struct ptlrpc_nrs_policy *nrs_policy_fallback;
- /**
- * This NRS head handles either HP or regular requests
- */
- enum ptlrpc_nrs_queue_type nrs_queue_type;
- /**
- * # queued requests from all policies in this NRS head
- */
- unsigned long nrs_req_queued;
- /**
- * # scheduled requests from all policies in this NRS head
- */
- unsigned long nrs_req_started;
- /**
- * # policies on this NRS
- */
- unsigned nrs_num_pols;
- /**
- * This NRS head is in progress of starting a policy
- */
- unsigned nrs_policy_starting:1;
- /**
- * In progress of shutting down the whole NRS head; used during
- * unregistration
- */
- unsigned nrs_stopping:1;
-};
-
-#define NRS_POL_NAME_MAX 16
-
-struct ptlrpc_nrs_pol_desc;
-
-/**
- * Service compatibility predicate; this determines whether a policy is adequate
- * for handling RPCs of a particular PTLRPC service.
- *
- * XXX:This should give the same result during policy registration and
- * unregistration, and for all partitions of a service; so the result should not
- * depend on temporal service or other properties, that may influence the
- * result.
- */
-typedef bool (*nrs_pol_desc_compat_t) (const struct ptlrpc_service *svc,
- const struct ptlrpc_nrs_pol_desc *desc);
-
-struct ptlrpc_nrs_pol_conf {
- /**
- * Human-readable policy name
- */
- char nc_name[NRS_POL_NAME_MAX];
- /**
- * NRS operations for this policy
- */
- const struct ptlrpc_nrs_pol_ops *nc_ops;
- /**
- * Service compatibility predicate
- */
- nrs_pol_desc_compat_t nc_compat;
- /**
- * Set for policies that support a single ptlrpc service, i.e. ones that
- * have \a pd_compat set to nrs_policy_compat_one(). The variable value
- * depicts the name of the single service that such policies are
- * compatible with.
- */
- const char *nc_compat_svc_name;
- /**
- * Owner module for this policy descriptor; policies registering from a
- * different module to the one the NRS framework is held within
- * (currently ptlrpc), should set this field to THIS_MODULE.
- */
- struct module *nc_owner;
- /**
- * Policy registration flags; a bitmask of \e nrs_policy_flags
- */
- unsigned nc_flags;
-};
-
-/**
- * NRS policy registering descriptor
- *
- * Is used to hold a description of a policy that can be passed to NRS core in
- * order to register the policy with NRS heads in different PTLRPC services.
- */
-struct ptlrpc_nrs_pol_desc {
- /**
- * Human-readable policy name
- */
- char pd_name[NRS_POL_NAME_MAX];
- /**
- * Link into nrs_core::nrs_policies
- */
- struct list_head pd_list;
- /**
- * NRS operations for this policy
- */
- const struct ptlrpc_nrs_pol_ops *pd_ops;
- /**
- * Service compatibility predicate
- */
- nrs_pol_desc_compat_t pd_compat;
- /**
- * Set for policies that are compatible with only one PTLRPC service.
- *
- * \see ptlrpc_nrs_pol_conf::nc_compat_svc_name
- */
- const char *pd_compat_svc_name;
- /**
- * Owner module for this policy descriptor.
- *
- * We need to hold a reference to the module whenever we might make use
- * of any of the module's contents, i.e.
- * - If one or more instances of the policy are at a state where they
- * might be handling a request, i.e.
- * ptlrpc_nrs_pol_state::NRS_POL_STATE_STARTED or
- * ptlrpc_nrs_pol_state::NRS_POL_STATE_STOPPING as we will have to
- * call into the policy's ptlrpc_nrs_pol_ops() handlers. A reference
- * is taken on the module when
- * \e ptlrpc_nrs_pol_desc::pd_refs becomes 1, and released when it
- * becomes 0, so that we hold only one reference to the module maximum
- * at any time.
- *
- * We do not need to hold a reference to the module, even though we
- * might use code and data from the module, in the following cases:
- * - During external policy registration, because this should happen in
- * the module's init() function, in which case the module is safe from
- * removal because a reference is being held on the module by the
- * kernel, and iirc kmod (and I guess module-init-tools also) will
- * serialize any racing processes properly anyway.
- * - During external policy unregistration, because this should happen
- * in a module's exit() function, and any attempts to start a policy
- * instance would need to take a reference on the module, and this is
- * not possible once we have reached the point where the exit()
- * handler is called.
- * - During service registration and unregistration, as service setup
- * and cleanup, and policy registration, unregistration and policy
- * instance starting, are serialized by \e nrs_core::nrs_mutex, so
- * as long as users adhere to the convention of registering policies
- * in init() and unregistering them in module exit() functions, there
- * should not be a race between these operations.
- * - During any policy-specific lprocfs operations, because a reference
- * is held by the kernel on a proc entry that has been entered by a
- * syscall, so as long as proc entries are removed during unregistration time,
- * then unregistration and lprocfs operations will be properly
- * serialized.
- */
- struct module *pd_owner;
- /**
- * Bitmask of \e nrs_policy_flags
- */
- unsigned pd_flags;
- /**
- * # of references on this descriptor
- */
- atomic_t pd_refs;
-};
-
-/**
- * NRS policy state
- *
- * Policies transition from one state to the other during their lifetime
- */
-enum ptlrpc_nrs_pol_state {
- /**
- * Not a valid policy state.
- */
- NRS_POL_STATE_INVALID,
- /**
- * Policies are at this state either at the start of their life, or
- * transition here when the user selects a different policy to act
- * as the primary one.
- */
- NRS_POL_STATE_STOPPED,
- /**
- * Policy is progress of stopping
- */
- NRS_POL_STATE_STOPPING,
- /**
- * Policy is in progress of starting
- */
- NRS_POL_STATE_STARTING,
- /**
- * A policy is in this state in two cases:
- * - it is the fallback policy, which is always in this state.
- * - it has been activated by the user; i.e. it is the primary policy,
- */
- NRS_POL_STATE_STARTED,
-};
-
-/**
- * NRS policy information
- *
- * Used for obtaining information for the status of a policy via lprocfs
- */
-struct ptlrpc_nrs_pol_info {
- /**
- * Policy name
- */
- char pi_name[NRS_POL_NAME_MAX];
- /**
- * Current policy state
- */
- enum ptlrpc_nrs_pol_state pi_state;
- /**
- * # RPCs enqueued for later dispatching by the policy
- */
- long pi_req_queued;
- /**
- * # RPCs started for dispatch by the policy
- */
- long pi_req_started;
- /**
- * Is this a fallback policy?
- */
- unsigned pi_fallback:1;
-};
-
-/**
- * NRS policy
- *
- * There is one instance of this for each policy in each NRS head of each
- * PTLRPC service partition.
- */
-struct ptlrpc_nrs_policy {
- /**
- * Linkage into the NRS head's list of policies,
- * ptlrpc_nrs:nrs_policy_list
- */
- struct list_head pol_list;
- /**
- * Linkage into the NRS head's list of policies with enqueued
- * requests ptlrpc_nrs:nrs_policy_queued
- */
- struct list_head pol_list_queued;
- /**
- * Current state of this policy
- */
- enum ptlrpc_nrs_pol_state pol_state;
- /**
- * Bitmask of nrs_policy_flags
- */
- unsigned pol_flags;
- /**
- * # RPCs enqueued for later dispatching by the policy
- */
- long pol_req_queued;
- /**
- * # RPCs started for dispatch by the policy
- */
- long pol_req_started;
- /**
- * Usage Reference count taken on the policy instance
- */
- long pol_ref;
- /**
- * The NRS head this policy has been created at
- */
- struct ptlrpc_nrs *pol_nrs;
- /**
- * Private policy data; varies by policy type
- */
- void *pol_private;
- /**
- * Policy descriptor for this policy instance.
- */
- struct ptlrpc_nrs_pol_desc *pol_desc;
-};
-
-/**
- * NRS resource
- *
- * Resources are embedded into two types of NRS entities:
- * - Inside NRS policies, in the policy's private data in
- * ptlrpc_nrs_policy::pol_private
- * - In objects that act as prime-level scheduling entities in different NRS
- * policies; e.g. on a policy that performs round robin or similar order
- * scheduling across client NIDs, there would be one NRS resource per unique
- * client NID. On a policy which performs round robin scheduling across
- * backend filesystem objects, there would be one resource associated with
- * each of the backend filesystem objects partaking in the scheduling
- * performed by the policy.
- *
- * NRS resources share a parent-child relationship, in which resources embedded
- * in policy instances are the parent entities, with all scheduling entities
- * a policy schedules across being the children, thus forming a simple resource
- * hierarchy. This hierarchy may be extended with one or more levels in the
- * future if the ability to have more than one primary policy is added.
- *
- * Upon request initialization, references to the then active NRS policies are
- * taken and used to later handle the dispatching of the request with one of
- * these policies.
- *
- * \see nrs_resource_get_safe()
- * \see ptlrpc_nrs_req_add()
- */
-struct ptlrpc_nrs_resource {
- /**
- * This NRS resource's parent; is NULL for resources embedded in NRS
- * policy instances; i.e. those are top-level ones.
- */
- struct ptlrpc_nrs_resource *res_parent;
- /**
- * The policy associated with this resource.
- */
- struct ptlrpc_nrs_policy *res_policy;
-};
-
-enum {
- NRS_RES_FALLBACK,
- NRS_RES_PRIMARY,
- NRS_RES_MAX
-};
-
-/* \name fifo
- *
- * FIFO policy
- *
- * This policy is a logical wrapper around previous, non-NRS functionality.
- * It dispatches RPCs in the same order as they arrive from the network. This
- * policy is currently used as the fallback policy, and the only enabled policy
- * on all NRS heads of all PTLRPC service partitions.
- * @{
- */
-
-/**
- * Private data structure for the FIFO policy
- */
-struct nrs_fifo_head {
- /**
- * Resource object for policy instance.
- */
- struct ptlrpc_nrs_resource fh_res;
- /**
- * List of queued requests.
- */
- struct list_head fh_list;
- /**
- * For debugging purposes.
- */
- __u64 fh_sequence;
-};
-
-struct nrs_fifo_req {
- struct list_head fr_list;
- __u64 fr_sequence;
-};
-
-/** @} fifo */
-
-/**
- * NRS request
- *
- * Instances of this object exist embedded within ptlrpc_request; the main
- * purpose of this object is to hold references to the request's resources
- * for the lifetime of the request, and to hold properties that policies use
- * use for determining the request's scheduling priority.
- */
-struct ptlrpc_nrs_request {
- /**
- * The request's resource hierarchy.
- */
- struct ptlrpc_nrs_resource *nr_res_ptrs[NRS_RES_MAX];
- /**
- * Index into ptlrpc_nrs_request::nr_res_ptrs of the resource of the
- * policy that was used to enqueue the request.
- *
- * \see nrs_request_enqueue()
- */
- unsigned nr_res_idx;
- unsigned nr_initialized:1;
- unsigned nr_enqueued:1;
- unsigned nr_started:1;
- unsigned nr_finalized:1;
-
- /**
- * Policy-specific fields, used for determining a request's scheduling
- * priority, and other supporting functionality.
- */
- union {
- /**
- * Fields for the FIFO policy
- */
- struct nrs_fifo_req fifo;
- } nr_u;
- /**
- * Externally-registering policies may want to use this to allocate
- * their own request properties.
- */
- void *ext;
-};
-
-/** @} nrs */
+#include "lustre_nrs.h"
/**
* Basic request prioritization operations structure.
@@ -1304,6 +600,8 @@ struct ptlrpc_cli_req {
union ptlrpc_async_args cr_async_args;
/** Opaq data for replay and commit callbacks. */
void *cr_cb_data;
+ /** Link to the imp->imp_unreplied_list */
+ struct list_head cr_unreplied_list;
/**
* Commit callback, called when request is committed and about to be
* freed.
@@ -1343,6 +641,7 @@ struct ptlrpc_cli_req {
#define rq_interpret_reply rq_cli.cr_reply_interp
#define rq_async_args rq_cli.cr_async_args
#define rq_cb_data rq_cli.cr_cb_data
+#define rq_unreplied_list rq_cli.cr_unreplied_list
#define rq_commit_cb rq_cli.cr_commit_cb
#define rq_replay_cb rq_cli.cr_replay_cb
@@ -1505,6 +804,8 @@ struct ptlrpc_request {
__u64 rq_transno;
/** xid */
__u64 rq_xid;
+ /** bulk match bits */
+ u64 rq_mbits;
/**
* List item to for replay list. Not yet committed requests get linked
* there.
@@ -1793,10 +1094,93 @@ struct ptlrpc_bulk_page {
struct page *bp_page;
};
-#define BULK_GET_SOURCE 0
-#define BULK_PUT_SINK 1
-#define BULK_GET_SINK 2
-#define BULK_PUT_SOURCE 3
+enum ptlrpc_bulk_op_type {
+ PTLRPC_BULK_OP_ACTIVE = 0x00000001,
+ PTLRPC_BULK_OP_PASSIVE = 0x00000002,
+ PTLRPC_BULK_OP_PUT = 0x00000004,
+ PTLRPC_BULK_OP_GET = 0x00000008,
+ PTLRPC_BULK_BUF_KVEC = 0x00000010,
+ PTLRPC_BULK_BUF_KIOV = 0x00000020,
+ PTLRPC_BULK_GET_SOURCE = PTLRPC_BULK_OP_PASSIVE | PTLRPC_BULK_OP_GET,
+ PTLRPC_BULK_PUT_SINK = PTLRPC_BULK_OP_PASSIVE | PTLRPC_BULK_OP_PUT,
+ PTLRPC_BULK_GET_SINK = PTLRPC_BULK_OP_ACTIVE | PTLRPC_BULK_OP_GET,
+ PTLRPC_BULK_PUT_SOURCE = PTLRPC_BULK_OP_ACTIVE | PTLRPC_BULK_OP_PUT,
+};
+
+static inline bool ptlrpc_is_bulk_op_get(enum ptlrpc_bulk_op_type type)
+{
+ return (type & PTLRPC_BULK_OP_GET) == PTLRPC_BULK_OP_GET;
+}
+
+static inline bool ptlrpc_is_bulk_get_source(enum ptlrpc_bulk_op_type type)
+{
+ return (type & PTLRPC_BULK_GET_SOURCE) == PTLRPC_BULK_GET_SOURCE;
+}
+
+static inline bool ptlrpc_is_bulk_put_sink(enum ptlrpc_bulk_op_type type)
+{
+ return (type & PTLRPC_BULK_PUT_SINK) == PTLRPC_BULK_PUT_SINK;
+}
+
+static inline bool ptlrpc_is_bulk_get_sink(enum ptlrpc_bulk_op_type type)
+{
+ return (type & PTLRPC_BULK_GET_SINK) == PTLRPC_BULK_GET_SINK;
+}
+
+static inline bool ptlrpc_is_bulk_put_source(enum ptlrpc_bulk_op_type type)
+{
+ return (type & PTLRPC_BULK_PUT_SOURCE) == PTLRPC_BULK_PUT_SOURCE;
+}
+
+static inline bool ptlrpc_is_bulk_desc_kvec(enum ptlrpc_bulk_op_type type)
+{
+ return ((type & PTLRPC_BULK_BUF_KVEC) | (type & PTLRPC_BULK_BUF_KIOV))
+ == PTLRPC_BULK_BUF_KVEC;
+}
+
+static inline bool ptlrpc_is_bulk_desc_kiov(enum ptlrpc_bulk_op_type type)
+{
+ return ((type & PTLRPC_BULK_BUF_KVEC) | (type & PTLRPC_BULK_BUF_KIOV))
+ == PTLRPC_BULK_BUF_KIOV;
+}
+
+static inline bool ptlrpc_is_bulk_op_active(enum ptlrpc_bulk_op_type type)
+{
+ return ((type & PTLRPC_BULK_OP_ACTIVE) |
+ (type & PTLRPC_BULK_OP_PASSIVE)) == PTLRPC_BULK_OP_ACTIVE;
+}
+
+static inline bool ptlrpc_is_bulk_op_passive(enum ptlrpc_bulk_op_type type)
+{
+ return ((type & PTLRPC_BULK_OP_ACTIVE) |
+ (type & PTLRPC_BULK_OP_PASSIVE)) == PTLRPC_BULK_OP_PASSIVE;
+}
+
+struct ptlrpc_bulk_frag_ops {
+ /**
+ * Add a page \a page to the bulk descriptor \a desc
+ * Data to transfer in the page starts at offset \a pageoffset and
+ * amount of data to transfer from the page is \a len
+ */
+ void (*add_kiov_frag)(struct ptlrpc_bulk_desc *desc,
+ struct page *page, int pageoffset, int len);
+
+ /*
+ * Add a \a fragment to the bulk descriptor \a desc.
+ * Data to transfer in the fragment is pointed to by \a frag
+ * The size of the fragment is \a len
+ */
+ int (*add_iov_frag)(struct ptlrpc_bulk_desc *desc, void *frag, int len);
+
+ /**
+ * Uninitialize and free bulk descriptor \a desc.
+ * Works on bulk descriptors both from server and client side.
+ */
+ void (*release_frags)(struct ptlrpc_bulk_desc *desc);
+};
+
+extern const struct ptlrpc_bulk_frag_ops ptlrpc_bulk_kiov_pin_ops;
+extern const struct ptlrpc_bulk_frag_ops ptlrpc_bulk_kiov_nopin_ops;
/**
* Definition of bulk descriptor.
@@ -1811,14 +1195,14 @@ struct ptlrpc_bulk_page {
struct ptlrpc_bulk_desc {
/** completed with failure */
unsigned long bd_failure:1;
- /** {put,get}{source,sink} */
- unsigned long bd_type:2;
/** client side */
unsigned long bd_registered:1;
/** For serialization with callback */
spinlock_t bd_lock;
/** Import generation when request for this bulk was sent */
int bd_import_generation;
+ /** {put,get}{source,sink}{kvec,kiov} */
+ enum ptlrpc_bulk_op_type bd_type;
/** LNet portal for this bulk */
__u32 bd_portal;
/** Server side - export this bulk created for */
@@ -1827,13 +1211,14 @@ struct ptlrpc_bulk_desc {
struct obd_import *bd_import;
/** Back pointer to the request */
struct ptlrpc_request *bd_req;
+ struct ptlrpc_bulk_frag_ops *bd_frag_ops;
wait_queue_head_t bd_waitq; /* server side only WQ */
int bd_iov_count; /* # entries in bd_iov */
int bd_max_iov; /* allocated size of bd_iov */
int bd_nob; /* # bytes covered */
int bd_nob_transferred; /* # bytes GOT/PUT */
- __u64 bd_last_xid;
+ u64 bd_last_mbits;
struct ptlrpc_cb_id bd_cbid; /* network callback info */
lnet_nid_t bd_sender; /* stash event::sender */
@@ -1842,14 +1227,31 @@ struct ptlrpc_bulk_desc {
/** array of associated MDs */
lnet_handle_md_t bd_mds[PTLRPC_BULK_OPS_COUNT];
- /*
- * encrypt iov, size is either 0 or bd_iov_count.
- */
- lnet_kiov_t *bd_enc_iov;
-
- lnet_kiov_t bd_iov[0];
+ union {
+ struct {
+ /*
+ * encrypt iov, size is either 0 or bd_iov_count.
+ */
+ struct bio_vec *bd_enc_vec;
+ struct bio_vec *bd_vec; /* Array of bio_vecs */
+ } bd_kiov;
+
+ struct {
+ struct kvec *bd_enc_kvec;
+ struct kvec *bd_kvec; /* Array of kvecs */
+ } bd_kvec;
+ } bd_u;
};
+#define GET_KIOV(desc) ((desc)->bd_u.bd_kiov.bd_vec)
+#define BD_GET_KIOV(desc, i) ((desc)->bd_u.bd_kiov.bd_vec[i])
+#define GET_ENC_KIOV(desc) ((desc)->bd_u.bd_kiov.bd_enc_vec)
+#define BD_GET_ENC_KIOV(desc, i) ((desc)->bd_u.bd_kiov.bd_enc_vec[i])
+#define GET_KVEC(desc) ((desc)->bd_u.bd_kvec.bd_kvec)
+#define BD_GET_KVEC(desc, i) ((desc)->bd_u.bd_kvec.bd_kvec[i])
+#define GET_ENC_KVEC(desc) ((desc)->bd_u.bd_kvec.bd_enc_kvec)
+#define BD_GET_ENC_KVEC(desc, i) ((desc)->bd_u.bd_kvec.bd_enc_kvec[i])
+
enum {
SVC_STOPPED = 1 << 0,
SVC_STOPPING = 1 << 1,
@@ -2464,21 +1866,17 @@ int ptlrpc_request_bufs_pack(struct ptlrpc_request *request,
void ptlrpc_req_finished(struct ptlrpc_request *request);
struct ptlrpc_request *ptlrpc_request_addref(struct ptlrpc_request *req);
struct ptlrpc_bulk_desc *ptlrpc_prep_bulk_imp(struct ptlrpc_request *req,
- unsigned npages, unsigned max_brw,
- unsigned type, unsigned portal);
-void __ptlrpc_free_bulk(struct ptlrpc_bulk_desc *bulk, int pin);
-static inline void ptlrpc_free_bulk_pin(struct ptlrpc_bulk_desc *bulk)
-{
- __ptlrpc_free_bulk(bulk, 1);
-}
-
-static inline void ptlrpc_free_bulk_nopin(struct ptlrpc_bulk_desc *bulk)
-{
- __ptlrpc_free_bulk(bulk, 0);
-}
-
+ unsigned int nfrags,
+ unsigned int max_brw,
+ unsigned int type,
+ unsigned int portal,
+ const struct ptlrpc_bulk_frag_ops *ops);
+
+int ptlrpc_prep_bulk_frag(struct ptlrpc_bulk_desc *desc,
+ void *frag, int len);
void __ptlrpc_prep_bulk_page(struct ptlrpc_bulk_desc *desc,
- struct page *page, int pageoffset, int len, int);
+ struct page *page, int pageoffset, int len,
+ int pin);
static inline void ptlrpc_prep_bulk_page_pin(struct ptlrpc_bulk_desc *desc,
struct page *page, int pageoffset,
int len)
@@ -2493,6 +1891,16 @@ static inline void ptlrpc_prep_bulk_page_nopin(struct ptlrpc_bulk_desc *desc,
__ptlrpc_prep_bulk_page(desc, page, pageoffset, len, 0);
}
+void ptlrpc_free_bulk(struct ptlrpc_bulk_desc *bulk);
+
+static inline void ptlrpc_release_bulk_page_pin(struct ptlrpc_bulk_desc *desc)
+{
+ int i;
+
+ for (i = 0; i < desc->bd_iov_count ; i++)
+ put_page(BD_GET_KIOV(desc, i).bv_page);
+}
+
void ptlrpc_retain_replayable_request(struct ptlrpc_request *req,
struct obd_import *imp);
__u64 ptlrpc_next_xid(void);
@@ -2652,6 +2060,7 @@ struct lustre_handle *lustre_msg_get_handle(struct lustre_msg *msg);
__u32 lustre_msg_get_type(struct lustre_msg *msg);
void lustre_msg_add_version(struct lustre_msg *msg, u32 version);
__u32 lustre_msg_get_opc(struct lustre_msg *msg);
+__u16 lustre_msg_get_tag(struct lustre_msg *msg);
__u64 lustre_msg_get_last_committed(struct lustre_msg *msg);
__u64 *lustre_msg_get_versions(struct lustre_msg *msg);
__u64 lustre_msg_get_transno(struct lustre_msg *msg);
@@ -2670,6 +2079,8 @@ void lustre_msg_set_handle(struct lustre_msg *msg,
struct lustre_handle *handle);
void lustre_msg_set_type(struct lustre_msg *msg, __u32 type);
void lustre_msg_set_opc(struct lustre_msg *msg, __u32 opc);
+void lustre_msg_set_last_xid(struct lustre_msg *msg, u64 last_xid);
+void lustre_msg_set_tag(struct lustre_msg *msg, __u16 tag);
void lustre_msg_set_versions(struct lustre_msg *msg, __u64 *versions);
void lustre_msg_set_transno(struct lustre_msg *msg, __u64 transno);
void lustre_msg_set_status(struct lustre_msg *msg, __u32 status);
@@ -2679,6 +2090,7 @@ void lustre_msg_set_timeout(struct lustre_msg *msg, __u32 timeout);
void lustre_msg_set_service_time(struct lustre_msg *msg, __u32 service_time);
void lustre_msg_set_jobid(struct lustre_msg *msg, char *jobid);
void lustre_msg_set_cksum(struct lustre_msg *msg, __u32 cksum);
+void lustre_msg_set_mbits(struct lustre_msg *msg, u64 mbits);
static inline void
lustre_shrink_reply(struct ptlrpc_request *req, int segment,
diff --git a/drivers/staging/lustre/lustre/include/lustre_nrs.h b/drivers/staging/lustre/lustre/include/lustre_nrs.h
new file mode 100644
index 000000000000..a5028aaa19cd
--- /dev/null
+++ b/drivers/staging/lustre/lustre/include/lustre_nrs.h
@@ -0,0 +1,717 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License version 2 for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.gnu.org/licenses/gpl-2.0.html
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2014, Intel Corporation.
+ *
+ * Copyright 2012 Xyratex Technology Limited
+ */
+/*
+ *
+ * Network Request Scheduler (NRS)
+ *
+ */
+
+#ifndef _LUSTRE_NRS_H
+#define _LUSTRE_NRS_H
+
+/**
+ * \defgroup nrs Network Request Scheduler
+ * @{
+ */
+struct ptlrpc_nrs_policy;
+struct ptlrpc_nrs_resource;
+struct ptlrpc_nrs_request;
+
+/**
+ * NRS control operations.
+ *
+ * These are common for all policies.
+ */
+enum ptlrpc_nrs_ctl {
+ /**
+ * Not a valid opcode.
+ */
+ PTLRPC_NRS_CTL_INVALID,
+ /**
+ * Activate the policy.
+ */
+ PTLRPC_NRS_CTL_START,
+ /**
+ * Reserved for multiple primary policies, which may be a possibility
+ * in the future.
+ */
+ PTLRPC_NRS_CTL_STOP,
+ /**
+ * Policies can start using opcodes from this value and onwards for
+ * their own purposes; the assigned value itself is arbitrary.
+ */
+ PTLRPC_NRS_CTL_1ST_POL_SPEC = 0x20,
+};
+
+/**
+ * NRS policy operations.
+ *
+ * These determine the behaviour of a policy, and are called in response to
+ * NRS core events.
+ */
+struct ptlrpc_nrs_pol_ops {
+ /**
+ * Called during policy registration; this operation is optional.
+ *
+ * \param[in,out] policy The policy being initialized
+ */
+ int (*op_policy_init)(struct ptlrpc_nrs_policy *policy);
+ /**
+ * Called during policy unregistration; this operation is optional.
+ *
+ * \param[in,out] policy The policy being unregistered/finalized
+ */
+ void (*op_policy_fini)(struct ptlrpc_nrs_policy *policy);
+ /**
+ * Called when activating a policy via lprocfs; policies allocate and
+ * initialize their resources here; this operation is optional.
+ *
+ * \param[in,out] policy The policy being started
+ *
+ * \see nrs_policy_start_locked()
+ */
+ int (*op_policy_start)(struct ptlrpc_nrs_policy *policy);
+ /**
+ * Called when deactivating a policy via lprocfs; policies deallocate
+ * their resources here; this operation is optional
+ *
+ * \param[in,out] policy The policy being stopped
+ *
+ * \see nrs_policy_stop0()
+ */
+ void (*op_policy_stop)(struct ptlrpc_nrs_policy *policy);
+ /**
+ * Used for policy-specific operations; i.e. not generic ones like
+ * \e PTLRPC_NRS_CTL_START and \e PTLRPC_NRS_CTL_GET_INFO; analogous
+ * to an ioctl; this operation is optional.
+ *
+ * \param[in,out] policy The policy carrying out operation \a opc
+ * \param[in] opc The command operation being carried out
+ * \param[in,out] arg An generic buffer for communication between the
+ * user and the control operation
+ *
+ * \retval -ve error
+ * \retval 0 success
+ *
+ * \see ptlrpc_nrs_policy_control()
+ */
+ int (*op_policy_ctl)(struct ptlrpc_nrs_policy *policy,
+ enum ptlrpc_nrs_ctl opc, void *arg);
+
+ /**
+ * Called when obtaining references to the resources of the resource
+ * hierarchy for a request that has arrived for handling at the PTLRPC
+ * service. Policies should return -ve for requests they do not wish
+ * to handle. This operation is mandatory.
+ *
+ * \param[in,out] policy The policy we're getting resources for.
+ * \param[in,out] nrq The request we are getting resources for.
+ * \param[in] parent The parent resource of the resource being
+ * requested; set to NULL if none.
+ * \param[out] resp The resource is to be returned here; the
+ * fallback policy in an NRS head should
+ * \e always return a non-NULL pointer value.
+ * \param[in] moving_req When set, signifies that this is an attempt
+ * to obtain resources for a request being moved
+ * to the high-priority NRS head by
+ * ldlm_lock_reorder_req().
+ * This implies two things:
+ * 1. We are under obd_export::exp_rpc_lock and
+ * so should not sleep.
+ * 2. We should not perform non-idempotent or can
+ * skip performing idempotent operations that
+ * were carried out when resources were first
+ * taken for the request when it was initialized
+ * in ptlrpc_nrs_req_initialize().
+ *
+ * \retval 0, +ve The level of the returned resource in the resource
+ * hierarchy; currently only 0 (for a non-leaf resource)
+ * and 1 (for a leaf resource) are supported by the
+ * framework.
+ * \retval -ve error
+ *
+ * \see ptlrpc_nrs_req_initialize()
+ * \see ptlrpc_nrs_hpreq_add_nolock()
+ * \see ptlrpc_nrs_req_hp_move()
+ */
+ int (*op_res_get)(struct ptlrpc_nrs_policy *policy,
+ struct ptlrpc_nrs_request *nrq,
+ const struct ptlrpc_nrs_resource *parent,
+ struct ptlrpc_nrs_resource **resp,
+ bool moving_req);
+ /**
+ * Called when releasing references taken for resources in the resource
+ * hierarchy for the request; this operation is optional.
+ *
+ * \param[in,out] policy The policy the resource belongs to
+ * \param[in] res The resource to be freed
+ *
+ * \see ptlrpc_nrs_req_finalize()
+ * \see ptlrpc_nrs_hpreq_add_nolock()
+ * \see ptlrpc_nrs_req_hp_move()
+ */
+ void (*op_res_put)(struct ptlrpc_nrs_policy *policy,
+ const struct ptlrpc_nrs_resource *res);
+
+ /**
+ * Obtains a request for handling from the policy, and optionally
+ * removes the request from the policy; this operation is mandatory.
+ *
+ * \param[in,out] policy The policy to poll
+ * \param[in] peek When set, signifies that we just want to
+ * examine the request, and not handle it, so the
+ * request is not removed from the policy.
+ * \param[in] force When set, it will force a policy to return a
+ * request if it has one queued.
+ *
+ * \retval NULL No request available for handling
+ * \retval valid-pointer The request polled for handling
+ *
+ * \see ptlrpc_nrs_req_get_nolock()
+ */
+ struct ptlrpc_nrs_request *
+ (*op_req_get)(struct ptlrpc_nrs_policy *policy, bool peek,
+ bool force);
+ /**
+ * Called when attempting to add a request to a policy for later
+ * handling; this operation is mandatory.
+ *
+ * \param[in,out] policy The policy on which to enqueue \a nrq
+ * \param[in,out] nrq The request to enqueue
+ *
+ * \retval 0 success
+ * \retval != 0 error
+ *
+ * \see ptlrpc_nrs_req_add_nolock()
+ */
+ int (*op_req_enqueue)(struct ptlrpc_nrs_policy *policy,
+ struct ptlrpc_nrs_request *nrq);
+ /**
+ * Removes a request from the policy's set of pending requests. Normally
+ * called after a request has been polled successfully from the policy
+ * for handling; this operation is mandatory.
+ *
+ * \param[in,out] policy The policy the request \a nrq belongs to
+ * \param[in,out] nrq The request to dequeue
+ *
+ * \see ptlrpc_nrs_req_del_nolock()
+ */
+ void (*op_req_dequeue)(struct ptlrpc_nrs_policy *policy,
+ struct ptlrpc_nrs_request *nrq);
+ /**
+ * Called after the request being carried out. Could be used for
+ * job/resource control; this operation is optional.
+ *
+ * \param[in,out] policy The policy which is stopping to handle request
+ * \a nrq
+ * \param[in,out] nrq The request
+ *
+ * \pre assert_spin_locked(&svcpt->scp_req_lock)
+ *
+ * \see ptlrpc_nrs_req_stop_nolock()
+ */
+ void (*op_req_stop)(struct ptlrpc_nrs_policy *policy,
+ struct ptlrpc_nrs_request *nrq);
+ /**
+ * Registers the policy's lprocfs interface with a PTLRPC service.
+ *
+ * \param[in] svc The service
+ *
+ * \retval 0 success
+ * \retval != 0 error
+ */
+ int (*op_lprocfs_init)(struct ptlrpc_service *svc);
+ /**
+ * Unegisters the policy's lprocfs interface with a PTLRPC service.
+ *
+ * In cases of failed policy registration in
+ * \e ptlrpc_nrs_policy_register(), this function may be called for a
+ * service which has not registered the policy successfully, so
+ * implementations of this method should make sure their operations are
+ * safe in such cases.
+ *
+ * \param[in] svc The service
+ */
+ void (*op_lprocfs_fini)(struct ptlrpc_service *svc);
+};
+
+/**
+ * Policy flags
+ */
+enum nrs_policy_flags {
+ /**
+ * Fallback policy, use this flag only on a single supported policy per
+ * service. The flag cannot be used on policies that use
+ * \e PTLRPC_NRS_FL_REG_EXTERN
+ */
+ PTLRPC_NRS_FL_FALLBACK = BIT(0),
+ /**
+ * Start policy immediately after registering.
+ */
+ PTLRPC_NRS_FL_REG_START = BIT(1),
+ /**
+ * This is a policy registering from a module different to the one NRS
+ * core ships in (currently ptlrpc).
+ */
+ PTLRPC_NRS_FL_REG_EXTERN = BIT(2),
+};
+
+/**
+ * NRS queue type.
+ *
+ * Denotes whether an NRS instance is for handling normal or high-priority
+ * RPCs, or whether an operation pertains to one or both of the NRS instances
+ * in a service.
+ */
+enum ptlrpc_nrs_queue_type {
+ PTLRPC_NRS_QUEUE_REG = BIT(0),
+ PTLRPC_NRS_QUEUE_HP = BIT(1),
+ PTLRPC_NRS_QUEUE_BOTH = (PTLRPC_NRS_QUEUE_REG | PTLRPC_NRS_QUEUE_HP)
+};
+
+/**
+ * NRS head
+ *
+ * A PTLRPC service has at least one NRS head instance for handling normal
+ * priority RPCs, and may optionally have a second NRS head instance for
+ * handling high-priority RPCs. Each NRS head maintains a list of available
+ * policies, of which one and only one policy is acting as the fallback policy,
+ * and optionally a different policy may be acting as the primary policy. For
+ * all RPCs handled by this NRS head instance, NRS core will first attempt to
+ * enqueue the RPC using the primary policy (if any). The fallback policy is
+ * used in the following cases:
+ * - when there was no primary policy in the
+ * ptlrpc_nrs_pol_state::NRS_POL_STATE_STARTED state at the time the request
+ * was initialized.
+ * - when the primary policy that was at the
+ * ptlrpc_nrs_pol_state::PTLRPC_NRS_POL_STATE_STARTED state at the time the
+ * RPC was initialized, denoted it did not wish, or for some other reason was
+ * not able to handle the request, by returning a non-valid NRS resource
+ * reference.
+ * - when the primary policy that was at the
+ * ptlrpc_nrs_pol_state::PTLRPC_NRS_POL_STATE_STARTED state at the time the
+ * RPC was initialized, fails later during the request enqueueing stage.
+ *
+ * \see nrs_resource_get_safe()
+ * \see nrs_request_enqueue()
+ */
+struct ptlrpc_nrs {
+ spinlock_t nrs_lock;
+ /** XXX Possibly replace svcpt->scp_req_lock with another lock here. */
+ /**
+ * List of registered policies
+ */
+ struct list_head nrs_policy_list;
+ /**
+ * List of policies with queued requests. Policies that have any
+ * outstanding requests are queued here, and this list is queried
+ * in a round-robin manner from NRS core when obtaining a request
+ * for handling. This ensures that requests from policies that at some
+ * point transition away from the
+ * ptlrpc_nrs_pol_state::NRS_POL_STATE_STARTED state are drained.
+ */
+ struct list_head nrs_policy_queued;
+ /**
+ * Service partition for this NRS head
+ */
+ struct ptlrpc_service_part *nrs_svcpt;
+ /**
+ * Primary policy, which is the preferred policy for handling RPCs
+ */
+ struct ptlrpc_nrs_policy *nrs_policy_primary;
+ /**
+ * Fallback policy, which is the backup policy for handling RPCs
+ */
+ struct ptlrpc_nrs_policy *nrs_policy_fallback;
+ /**
+ * This NRS head handles either HP or regular requests
+ */
+ enum ptlrpc_nrs_queue_type nrs_queue_type;
+ /**
+ * # queued requests from all policies in this NRS head
+ */
+ unsigned long nrs_req_queued;
+ /**
+ * # scheduled requests from all policies in this NRS head
+ */
+ unsigned long nrs_req_started;
+ /**
+ * # policies on this NRS
+ */
+ unsigned int nrs_num_pols;
+ /**
+ * This NRS head is in progress of starting a policy
+ */
+ unsigned int nrs_policy_starting:1;
+ /**
+ * In progress of shutting down the whole NRS head; used during
+ * unregistration
+ */
+ unsigned int nrs_stopping:1;
+ /**
+ * NRS policy is throttling request
+ */
+ unsigned int nrs_throttling:1;
+};
+
+#define NRS_POL_NAME_MAX 16
+#define NRS_POL_ARG_MAX 16
+
+struct ptlrpc_nrs_pol_desc;
+
+/**
+ * Service compatibility predicate; this determines whether a policy is adequate
+ * for handling RPCs of a particular PTLRPC service.
+ *
+ * XXX:This should give the same result during policy registration and
+ * unregistration, and for all partitions of a service; so the result should not
+ * depend on temporal service or other properties, that may influence the
+ * result.
+ */
+typedef bool (*nrs_pol_desc_compat_t)(const struct ptlrpc_service *svc,
+ const struct ptlrpc_nrs_pol_desc *desc);
+
+struct ptlrpc_nrs_pol_conf {
+ /**
+ * Human-readable policy name
+ */
+ char nc_name[NRS_POL_NAME_MAX];
+ /**
+ * NRS operations for this policy
+ */
+ const struct ptlrpc_nrs_pol_ops *nc_ops;
+ /**
+ * Service compatibility predicate
+ */
+ nrs_pol_desc_compat_t nc_compat;
+ /**
+ * Set for policies that support a single ptlrpc service, i.e. ones that
+ * have \a pd_compat set to nrs_policy_compat_one(). The variable value
+ * depicts the name of the single service that such policies are
+ * compatible with.
+ */
+ const char *nc_compat_svc_name;
+ /**
+ * Owner module for this policy descriptor; policies registering from a
+ * different module to the one the NRS framework is held within
+ * (currently ptlrpc), should set this field to THIS_MODULE.
+ */
+ struct module *nc_owner;
+ /**
+ * Policy registration flags; a bitmask of \e nrs_policy_flags
+ */
+ unsigned int nc_flags;
+};
+
+/**
+ * NRS policy registering descriptor
+ *
+ * Is used to hold a description of a policy that can be passed to NRS core in
+ * order to register the policy with NRS heads in different PTLRPC services.
+ */
+struct ptlrpc_nrs_pol_desc {
+ /**
+ * Human-readable policy name
+ */
+ char pd_name[NRS_POL_NAME_MAX];
+ /**
+ * Link into nrs_core::nrs_policies
+ */
+ struct list_head pd_list;
+ /**
+ * NRS operations for this policy
+ */
+ const struct ptlrpc_nrs_pol_ops *pd_ops;
+ /**
+ * Service compatibility predicate
+ */
+ nrs_pol_desc_compat_t pd_compat;
+ /**
+ * Set for policies that are compatible with only one PTLRPC service.
+ *
+ * \see ptlrpc_nrs_pol_conf::nc_compat_svc_name
+ */
+ const char *pd_compat_svc_name;
+ /**
+ * Owner module for this policy descriptor.
+ *
+ * We need to hold a reference to the module whenever we might make use
+ * of any of the module's contents, i.e.
+ * - If one or more instances of the policy are at a state where they
+ * might be handling a request, i.e.
+ * ptlrpc_nrs_pol_state::NRS_POL_STATE_STARTED or
+ * ptlrpc_nrs_pol_state::NRS_POL_STATE_STOPPING as we will have to
+ * call into the policy's ptlrpc_nrs_pol_ops() handlers. A reference
+ * is taken on the module when
+ * \e ptlrpc_nrs_pol_desc::pd_refs becomes 1, and released when it
+ * becomes 0, so that we hold only one reference to the module maximum
+ * at any time.
+ *
+ * We do not need to hold a reference to the module, even though we
+ * might use code and data from the module, in the following cases:
+ * - During external policy registration, because this should happen in
+ * the module's init() function, in which case the module is safe from
+ * removal because a reference is being held on the module by the
+ * kernel, and iirc kmod (and I guess module-init-tools also) will
+ * serialize any racing processes properly anyway.
+ * - During external policy unregistration, because this should happen
+ * in a module's exit() function, and any attempts to start a policy
+ * instance would need to take a reference on the module, and this is
+ * not possible once we have reached the point where the exit()
+ * handler is called.
+ * - During service registration and unregistration, as service setup
+ * and cleanup, and policy registration, unregistration and policy
+ * instance starting, are serialized by \e nrs_core::nrs_mutex, so
+ * as long as users adhere to the convention of registering policies
+ * in init() and unregistering them in module exit() functions, there
+ * should not be a race between these operations.
+ * - During any policy-specific lprocfs operations, because a reference
+ * is held by the kernel on a proc entry that has been entered by a
+ * syscall, so as long as proc entries are removed during
+ * unregistration time, then unregistration and lprocfs operations
+ * will be properly serialized.
+ */
+ struct module *pd_owner;
+ /**
+ * Bitmask of \e nrs_policy_flags
+ */
+ unsigned int pd_flags;
+ /**
+ * # of references on this descriptor
+ */
+ atomic_t pd_refs;
+};
+
+/**
+ * NRS policy state
+ *
+ * Policies transition from one state to the other during their lifetime
+ */
+enum ptlrpc_nrs_pol_state {
+ /**
+ * Not a valid policy state.
+ */
+ NRS_POL_STATE_INVALID,
+ /**
+ * Policies are at this state either at the start of their life, or
+ * transition here when the user selects a different policy to act
+ * as the primary one.
+ */
+ NRS_POL_STATE_STOPPED,
+ /**
+ * Policy is progress of stopping
+ */
+ NRS_POL_STATE_STOPPING,
+ /**
+ * Policy is in progress of starting
+ */
+ NRS_POL_STATE_STARTING,
+ /**
+ * A policy is in this state in two cases:
+ * - it is the fallback policy, which is always in this state.
+ * - it has been activated by the user; i.e. it is the primary policy,
+ */
+ NRS_POL_STATE_STARTED,
+};
+
+/**
+ * NRS policy information
+ *
+ * Used for obtaining information for the status of a policy via lprocfs
+ */
+struct ptlrpc_nrs_pol_info {
+ /**
+ * Policy name
+ */
+ char pi_name[NRS_POL_NAME_MAX];
+ /**
+ * Policy argument
+ */
+ char pi_arg[NRS_POL_ARG_MAX];
+ /**
+ * Current policy state
+ */
+ enum ptlrpc_nrs_pol_state pi_state;
+ /**
+ * # RPCs enqueued for later dispatching by the policy
+ */
+ long pi_req_queued;
+ /**
+ * # RPCs started for dispatch by the policy
+ */
+ long pi_req_started;
+ /**
+ * Is this a fallback policy?
+ */
+ unsigned pi_fallback:1;
+};
+
+/**
+ * NRS policy
+ *
+ * There is one instance of this for each policy in each NRS head of each
+ * PTLRPC service partition.
+ */
+struct ptlrpc_nrs_policy {
+ /**
+ * Linkage into the NRS head's list of policies,
+ * ptlrpc_nrs:nrs_policy_list
+ */
+ struct list_head pol_list;
+ /**
+ * Linkage into the NRS head's list of policies with enqueued
+ * requests ptlrpc_nrs:nrs_policy_queued
+ */
+ struct list_head pol_list_queued;
+ /**
+ * Current state of this policy
+ */
+ enum ptlrpc_nrs_pol_state pol_state;
+ /**
+ * Bitmask of nrs_policy_flags
+ */
+ unsigned int pol_flags;
+ /**
+ * # RPCs enqueued for later dispatching by the policy
+ */
+ long pol_req_queued;
+ /**
+ * # RPCs started for dispatch by the policy
+ */
+ long pol_req_started;
+ /**
+ * Usage Reference count taken on the policy instance
+ */
+ long pol_ref;
+ /**
+ * Human-readable policy argument
+ */
+ char pol_arg[NRS_POL_ARG_MAX];
+ /**
+ * The NRS head this policy has been created at
+ */
+ struct ptlrpc_nrs *pol_nrs;
+ /**
+ * Private policy data; varies by policy type
+ */
+ void *pol_private;
+ /**
+ * Policy descriptor for this policy instance.
+ */
+ struct ptlrpc_nrs_pol_desc *pol_desc;
+};
+
+/**
+ * NRS resource
+ *
+ * Resources are embedded into two types of NRS entities:
+ * - Inside NRS policies, in the policy's private data in
+ * ptlrpc_nrs_policy::pol_private
+ * - In objects that act as prime-level scheduling entities in different NRS
+ * policies; e.g. on a policy that performs round robin or similar order
+ * scheduling across client NIDs, there would be one NRS resource per unique
+ * client NID. On a policy which performs round robin scheduling across
+ * backend filesystem objects, there would be one resource associated with
+ * each of the backend filesystem objects partaking in the scheduling
+ * performed by the policy.
+ *
+ * NRS resources share a parent-child relationship, in which resources embedded
+ * in policy instances are the parent entities, with all scheduling entities
+ * a policy schedules across being the children, thus forming a simple resource
+ * hierarchy. This hierarchy may be extended with one or more levels in the
+ * future if the ability to have more than one primary policy is added.
+ *
+ * Upon request initialization, references to the then active NRS policies are
+ * taken and used to later handle the dispatching of the request with one of
+ * these policies.
+ *
+ * \see nrs_resource_get_safe()
+ * \see ptlrpc_nrs_req_add()
+ */
+struct ptlrpc_nrs_resource {
+ /**
+ * This NRS resource's parent; is NULL for resources embedded in NRS
+ * policy instances; i.e. those are top-level ones.
+ */
+ struct ptlrpc_nrs_resource *res_parent;
+ /**
+ * The policy associated with this resource.
+ */
+ struct ptlrpc_nrs_policy *res_policy;
+};
+
+enum {
+ NRS_RES_FALLBACK,
+ NRS_RES_PRIMARY,
+ NRS_RES_MAX
+};
+
+#include "lustre_nrs_fifo.h"
+
+/**
+ * NRS request
+ *
+ * Instances of this object exist embedded within ptlrpc_request; the main
+ * purpose of this object is to hold references to the request's resources
+ * for the lifetime of the request, and to hold properties that policies use
+ * use for determining the request's scheduling priority.
+ **/
+struct ptlrpc_nrs_request {
+ /**
+ * The request's resource hierarchy.
+ */
+ struct ptlrpc_nrs_resource *nr_res_ptrs[NRS_RES_MAX];
+ /**
+ * Index into ptlrpc_nrs_request::nr_res_ptrs of the resource of the
+ * policy that was used to enqueue the request.
+ *
+ * \see nrs_request_enqueue()
+ */
+ unsigned int nr_res_idx;
+ unsigned int nr_initialized:1;
+ unsigned int nr_enqueued:1;
+ unsigned int nr_started:1;
+ unsigned int nr_finalized:1;
+
+ /**
+ * Policy-specific fields, used for determining a request's scheduling
+ * priority, and other supporting functionality.
+ */
+ union {
+ /**
+ * Fields for the FIFO policy
+ */
+ struct nrs_fifo_req fifo;
+ } nr_u;
+ /**
+ * Externally-registering policies may want to use this to allocate
+ * their own request properties.
+ */
+ void *ext;
+};
+
+/** @} nrs */
+#endif
diff --git a/drivers/staging/lustre/lustre/include/lustre_nrs_fifo.h b/drivers/staging/lustre/lustre/include/lustre_nrs_fifo.h
new file mode 100644
index 000000000000..3b5418eac6c4
--- /dev/null
+++ b/drivers/staging/lustre/lustre/include/lustre_nrs_fifo.h
@@ -0,0 +1,70 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License version 2 for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.gnu.org/licenses/gpl-2.0.html
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2014, Intel Corporation.
+ *
+ * Copyright 2012 Xyratex Technology Limited
+ */
+/*
+ *
+ * Network Request Scheduler (NRS) First-in First-out (FIFO) policy
+ *
+ */
+
+#ifndef _LUSTRE_NRS_FIFO_H
+#define _LUSTRE_NRS_FIFO_H
+
+/* \name fifo
+ *
+ * FIFO policy
+ *
+ * This policy is a logical wrapper around previous, non-NRS functionality.
+ * It dispatches RPCs in the same order as they arrive from the network. This
+ * policy is currently used as the fallback policy, and the only enabled policy
+ * on all NRS heads of all PTLRPC service partitions.
+ * @{
+ */
+
+/**
+ * Private data structure for the FIFO policy
+ */
+struct nrs_fifo_head {
+ /**
+ * Resource object for policy instance.
+ */
+ struct ptlrpc_nrs_resource fh_res;
+ /**
+ * List of queued requests.
+ */
+ struct list_head fh_list;
+ /**
+ * For debugging purposes.
+ */
+ __u64 fh_sequence;
+};
+
+struct nrs_fifo_req {
+ struct list_head fr_list;
+ __u64 fr_sequence;
+};
+
+/** @} fifo */
+#endif
diff --git a/drivers/staging/lustre/lustre/include/lustre_req_layout.h b/drivers/staging/lustre/lustre/include/lustre_req_layout.h
index a13558e53274..fbcd39572cd0 100644
--- a/drivers/staging/lustre/lustre/include/lustre_req_layout.h
+++ b/drivers/staging/lustre/lustre/include/lustre_req_layout.h
@@ -148,13 +148,12 @@ extern struct req_format RQF_MDS_GETATTR;
*/
extern struct req_format RQF_MDS_GETATTR_NAME;
extern struct req_format RQF_MDS_CLOSE;
-extern struct req_format RQF_MDS_RELEASE_CLOSE;
+extern struct req_format RQF_MDS_INTENT_CLOSE;
extern struct req_format RQF_MDS_CONNECT;
extern struct req_format RQF_MDS_DISCONNECT;
extern struct req_format RQF_MDS_GET_INFO;
extern struct req_format RQF_MDS_READPAGE;
extern struct req_format RQF_MDS_WRITEPAGE;
-extern struct req_format RQF_MDS_DONE_WRITING;
extern struct req_format RQF_MDS_REINT;
extern struct req_format RQF_MDS_REINT_CREATE;
extern struct req_format RQF_MDS_REINT_CREATE_ACL;
@@ -166,10 +165,9 @@ extern struct req_format RQF_MDS_REINT_LINK;
extern struct req_format RQF_MDS_REINT_RENAME;
extern struct req_format RQF_MDS_REINT_SETATTR;
extern struct req_format RQF_MDS_REINT_SETXATTR;
-extern struct req_format RQF_MDS_QUOTACHECK;
extern struct req_format RQF_MDS_QUOTACTL;
-extern struct req_format RQF_QC_CALLBACK;
extern struct req_format RQF_MDS_SWAP_LAYOUTS;
+extern struct req_format RQF_MDS_REINT_MIGRATE;
/* MDS hsm formats */
extern struct req_format RQF_MDS_HSM_STATE_GET;
extern struct req_format RQF_MDS_HSM_STATE_SET;
@@ -181,7 +179,6 @@ extern struct req_format RQF_MDS_HSM_REQUEST;
/* OST req_format */
extern struct req_format RQF_OST_CONNECT;
extern struct req_format RQF_OST_DISCONNECT;
-extern struct req_format RQF_OST_QUOTACHECK;
extern struct req_format RQF_OST_QUOTACTL;
extern struct req_format RQF_OST_GETATTR;
extern struct req_format RQF_OST_SETATTR;
diff --git a/drivers/staging/lustre/lustre/include/lustre_sec.h b/drivers/staging/lustre/lustre/include/lustre_sec.h
index 90c183424802..03a970bcac55 100644
--- a/drivers/staging/lustre/lustre/include/lustre_sec.h
+++ b/drivers/staging/lustre/lustre/include/lustre_sec.h
@@ -50,6 +50,7 @@ struct brw_page;
/* Linux specific */
struct key;
struct seq_file;
+struct lustre_cfg;
/*
* forward declaration
@@ -1029,6 +1030,8 @@ int sptlrpc_target_export_check(struct obd_export *exp,
/* bulk security api */
void sptlrpc_enc_pool_put_pages(struct ptlrpc_bulk_desc *desc);
+int get_free_pages_in_pool(void);
+int pool_is_at_full_capacity(void);
int sptlrpc_cli_wrap_bulk(struct ptlrpc_request *req,
struct ptlrpc_bulk_desc *desc);
diff --git a/drivers/staging/lustre/lustre/include/lustre_swab.h b/drivers/staging/lustre/lustre/include/lustre_swab.h
new file mode 100644
index 000000000000..26d01c2d6633
--- /dev/null
+++ b/drivers/staging/lustre/lustre/include/lustre_swab.h
@@ -0,0 +1,102 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.gnu.org/licenses/gpl-2.0.html
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2014, Intel Corporation.
+ *
+ * Copyright 2015 Cray Inc, all rights reserved.
+ * Author: Ben Evans.
+ *
+ * We assume all nodes are either little-endian or big-endian, and we
+ * always send messages in the sender's native format. The receiver
+ * detects the message format by checking the 'magic' field of the message
+ * (see lustre_msg_swabbed() below).
+ *
+ * Each wire type has corresponding 'lustre_swab_xxxtypexxx()' routines
+ * are implemented in ptlrpc/lustre_swab.c. These 'swabbers' convert the
+ * type from "other" endian, in-place in the message buffer.
+ *
+ * A swabber takes a single pointer argument. The caller must already have
+ * verified that the length of the message buffer >= sizeof (type).
+ *
+ * For variable length types, a second 'lustre_swab_v_xxxtypexxx()' routine
+ * may be defined that swabs just the variable part, after the caller has
+ * verified that the message buffer is large enough.
+ */
+
+#ifndef _LUSTRE_SWAB_H_
+#define _LUSTRE_SWAB_H_
+
+#include "lustre/lustre_idl.h"
+
+void lustre_swab_ptlrpc_body(struct ptlrpc_body *pb);
+void lustre_swab_connect(struct obd_connect_data *ocd);
+void lustre_swab_hsm_user_state(struct hsm_user_state *hus);
+void lustre_swab_hsm_state_set(struct hsm_state_set *hss);
+void lustre_swab_obd_statfs(struct obd_statfs *os);
+void lustre_swab_obd_ioobj(struct obd_ioobj *ioo);
+void lustre_swab_niobuf_remote(struct niobuf_remote *nbr);
+void lustre_swab_ost_lvb_v1(struct ost_lvb_v1 *lvb);
+void lustre_swab_ost_lvb(struct ost_lvb *lvb);
+void lustre_swab_obd_quotactl(struct obd_quotactl *q);
+void lustre_swab_lquota_lvb(struct lquota_lvb *lvb);
+void lustre_swab_generic_32s(__u32 *val);
+void lustre_swab_mdt_body(struct mdt_body *b);
+void lustre_swab_mdt_ioepoch(struct mdt_ioepoch *b);
+void lustre_swab_mdt_rec_setattr(struct mdt_rec_setattr *sa);
+void lustre_swab_mdt_rec_reint(struct mdt_rec_reint *rr);
+void lustre_swab_lmv_desc(struct lmv_desc *ld);
+void lustre_swab_lmv_mds_md(union lmv_mds_md *lmm);
+void lustre_swab_lov_desc(struct lov_desc *ld);
+void lustre_swab_gl_desc(union ldlm_gl_desc *desc);
+void lustre_swab_ldlm_intent(struct ldlm_intent *i);
+void lustre_swab_ldlm_request(struct ldlm_request *rq);
+void lustre_swab_ldlm_reply(struct ldlm_reply *r);
+void lustre_swab_mgs_target_info(struct mgs_target_info *oinfo);
+void lustre_swab_mgs_nidtbl_entry(struct mgs_nidtbl_entry *oinfo);
+void lustre_swab_mgs_config_body(struct mgs_config_body *body);
+void lustre_swab_mgs_config_res(struct mgs_config_res *body);
+void lustre_swab_ost_body(struct ost_body *b);
+void lustre_swab_ost_last_id(__u64 *id);
+void lustre_swab_fiemap(struct fiemap *fiemap);
+void lustre_swab_lov_user_md_v1(struct lov_user_md_v1 *lum);
+void lustre_swab_lov_user_md_v3(struct lov_user_md_v3 *lum);
+void lustre_swab_lov_user_md_objects(struct lov_user_ost_data *lod,
+ int stripe_count);
+void lustre_swab_lov_mds_md(struct lov_mds_md *lmm);
+void lustre_swab_lustre_capa(struct lustre_capa *c);
+void lustre_swab_lustre_capa_key(struct lustre_capa_key *k);
+void lustre_swab_fid2path(struct getinfo_fid2path *gf);
+void lustre_swab_layout_intent(struct layout_intent *li);
+void lustre_swab_hsm_user_state(struct hsm_user_state *hus);
+void lustre_swab_hsm_current_action(struct hsm_current_action *action);
+void lustre_swab_hsm_progress_kernel(struct hsm_progress_kernel *hpk);
+void lustre_swab_hsm_user_state(struct hsm_user_state *hus);
+void lustre_swab_hsm_user_item(struct hsm_user_item *hui);
+void lustre_swab_hsm_request(struct hsm_request *hr);
+void lustre_swab_swap_layouts(struct mdc_swap_layouts *msl);
+void lustre_swab_close_data(struct close_data *data);
+void lustre_swab_lmv_user_md(struct lmv_user_md *lum);
+
+#endif
diff --git a/drivers/staging/lustre/lustre/include/obd.h b/drivers/staging/lustre/lustre/include/obd.h
index f6fc4dd05bd6..0f48e9c3d9e3 100644
--- a/drivers/staging/lustre/lustre/include/obd.h
+++ b/drivers/staging/lustre/lustre/include/obd.h
@@ -73,70 +73,17 @@ static inline void loi_init(struct lov_oinfo *loi)
{
}
-/*
- * If we are unable to get the maximum object size from the OST in
- * ocd_maxbytes using OBD_CONNECT_MAXBYTES, then we fall back to using
- * the old maximum object size from ext3.
- */
-#define LUSTRE_EXT3_STRIPE_MAXBYTES 0x1fffffff000ULL
-
-struct lov_stripe_md {
- atomic_t lsm_refc;
- spinlock_t lsm_lock;
- pid_t lsm_lock_owner; /* debugging */
-
- /* maximum possible file size, might change as OSTs status changes,
- * e.g. disconnected, deactivated
- */
- __u64 lsm_maxbytes;
- struct ost_id lsm_oi;
- __u32 lsm_magic;
- __u32 lsm_stripe_size;
- __u32 lsm_pattern; /* striping pattern (RAID0, RAID1) */
- __u16 lsm_stripe_count;
- __u16 lsm_layout_gen;
- char lsm_pool_name[LOV_MAXPOOLNAME + 1];
- struct lov_oinfo *lsm_oinfo[0];
-};
-
-static inline bool lsm_is_released(struct lov_stripe_md *lsm)
-{
- return !!(lsm->lsm_pattern & LOV_PATTERN_F_RELEASED);
-}
-
-static inline bool lsm_has_objects(struct lov_stripe_md *lsm)
-{
- if (!lsm)
- return false;
- if (lsm_is_released(lsm))
- return false;
- return true;
-}
-
-static inline int lov_stripe_md_size(unsigned int stripe_count)
-{
- struct lov_stripe_md lsm;
-
- return sizeof(lsm) + stripe_count * sizeof(lsm.lsm_oinfo[0]);
-}
-
+struct lov_stripe_md;
struct obd_info;
typedef int (*obd_enqueue_update_f)(void *cookie, int rc);
/* obd info for a particular level (lov, osc). */
struct obd_info {
- /* Flags used for set request specific flags:
- - while lock handling, the flags obtained on the enqueue
- request are set here.
- - while stats, the flags used for control delay/resend.
- - while setattr, the flags used for distinguish punch operation
- */
+ /* OBD_STATFS_* flags */
__u64 oi_flags;
/* lsm data specific for every OSC. */
struct lov_stripe_md *oi_md;
- /* obdo data specific for every OSC, if needed at all. */
- struct obdo *oi_oa;
/* statfs data specific for every OSC, if needed at all. */
struct obd_statfs *oi_osfs;
/* An update callback which is called to update some data on upper
@@ -204,7 +151,6 @@ enum obd_cl_sem_lock_class {
* on the MDS.
*/
#define OBD_MAX_DEFAULT_EA_SIZE 4096
-#define OBD_MAX_DEFAULT_COOKIE_SIZE 4096
struct mdc_rpc_lock;
struct obd_import;
@@ -214,7 +160,7 @@ struct client_obd {
struct obd_import *cl_import; /* ptlrpc connection state */
size_t cl_conn_count;
/*
- * Cache maximum and default values for easize and cookiesize. This is
+ * Cache maximum and default values for easize. This is
* strictly a performance optimization to minimize calls to
* obd_size_diskmd(). The default values are used to calculate the
* initial size of a request buffer. The ptlrpc layer will resize the
@@ -235,18 +181,6 @@ struct client_obd {
* run-time if a larger observed size is advertised by the MDT.
*/
u32 cl_max_mds_easize;
- /* Default cookie size for llog cookies (see struct llog_cookie). It is
- * initialized to zero at mount-time, then it tracks the largest
- * observed cookie size advertised by the MDT, up to a maximum value of
- * OBD_MAX_DEFAULT_COOKIE_SIZE. Note that llog_cookies are not
- * used by clients communicating with MDS versions 2.4.0 and later.
- */
- u32 cl_default_mds_cookiesize;
- /* Maximum possible cookie size computed at mount-time based on
- * the number of OSTs in the filesystem. May be increased at
- * run-time if a larger observed size is advertised by the MDT.
- */
- u32 cl_max_mds_cookiesize;
enum lustre_sec_part cl_sp_me;
enum lustre_sec_part cl_sp_to;
@@ -313,15 +247,42 @@ struct client_obd {
struct obd_histogram cl_read_offset_hist;
struct obd_histogram cl_write_offset_hist;
- /* lru for osc caching pages */
+ /* LRU for osc caching pages */
struct cl_client_cache *cl_cache;
- struct list_head cl_lru_osc; /* member of cl_cache->ccc_lru */
+ /** member of cl_cache->ccc_lru */
+ struct list_head cl_lru_osc;
+ /** # of available LRU slots left in the per-OSC cache.
+ * Available LRU slots are shared by all OSCs of the same file system,
+ * therefore this is a pointer to cl_client_cache::ccc_lru_left.
+ */
atomic_long_t *cl_lru_left;
+ /** # of busy LRU pages. A page is considered busy if it's in writeback
+ * queue, or in transfer. Busy pages can't be discarded so they are not
+ * in LRU cache.
+ */
atomic_long_t cl_lru_busy;
+ /** # of LRU pages in the cache for this client_obd */
atomic_long_t cl_lru_in_list;
+ /** # of threads are shrinking LRU cache. To avoid contention, it's not
+ * allowed to have multiple threads shrinking LRU cache.
+ */
atomic_t cl_lru_shrinkers;
- struct list_head cl_lru_list; /* lru page list */
- spinlock_t cl_lru_list_lock; /* page list protector */
+ /** The time when this LRU cache was last used. */
+ time64_t cl_lru_last_used;
+ /** stats: how many reclaims have happened for this client_obd.
+ * reclaim and shrink - shrink is async, voluntarily rebalancing;
+ * reclaim is sync, initiated by IO thread when the LRU slots are
+ * in shortage.
+ */
+ u64 cl_lru_reclaim;
+ /** List of LRU pages for this client_obd */
+ struct list_head cl_lru_list;
+ /** Lock for LRU page list */
+ spinlock_t cl_lru_list_lock;
+ /** # of unstable pages in this client_obd.
+ * An unstable page is a page state that WRITE RPC has finished but
+ * the transaction has NOT yet committed.
+ */
atomic_long_t cl_unstable_count;
/* number of in flight destroy rpcs is limited to max_rpcs_in_flight */
@@ -329,7 +290,17 @@ struct client_obd {
wait_queue_head_t cl_destroy_waitq;
struct mdc_rpc_lock *cl_rpc_lock;
- struct mdc_rpc_lock *cl_close_lock;
+
+ /* modify rpcs in flight
+ * currently used for metadata only
+ */
+ spinlock_t cl_mod_rpcs_lock;
+ u16 cl_max_mod_rpcs_in_flight;
+ u16 cl_mod_rpcs_in_flight;
+ u16 cl_close_rpcs_in_flight;
+ wait_queue_head_t cl_mod_rpcs_waitq;
+ unsigned long *cl_mod_tag_bitmap;
+ struct obd_histogram cl_mod_rpcs_hist;
/* mgc datastruct */
atomic_t cl_mgc_refcount;
@@ -345,13 +316,6 @@ struct client_obd {
/* also protected by the poorly named _loi_list_lock lock above */
struct osc_async_rc cl_ar;
- /* used by quotacheck when the servers are older than 2.4 */
- int cl_qchk_stat; /* quotacheck stat of the peer */
-#define CL_NOT_QUOTACHECKED 1 /* client->cl_qchk_stat init value */
-#if OBD_OCD_VERSION(2, 7, 53, 0) < LUSTRE_VERSION_CODE
-#warning "please consider removing quotacheck compatibility code"
-#endif
-
/* sequence manager */
struct lu_client_seq *cl_seq;
@@ -454,8 +418,6 @@ struct lmv_obd {
int connected;
int max_easize;
int max_def_easize;
- int max_cookiesize;
- int max_def_cookiesize;
u32 tgts_size; /* size of tgts array */
struct lmv_tgt_desc **tgts;
@@ -469,9 +431,9 @@ struct niobuf_local {
__u32 lnb_page_offset;
__u32 lnb_len;
__u32 lnb_flags;
+ int lnb_rc;
struct page *lnb_page;
void *lnb_data;
- int lnb_rc;
};
#define LUSTRE_FLD_NAME "fld"
@@ -512,21 +474,6 @@ struct niobuf_local {
/* Don't conflict with on-wire flags OBD_BRW_WRITE, etc */
#define N_LOCAL_TEMP_PAGE 0x10000000
-struct obd_trans_info {
- __u64 oti_xid;
- /* Only used on the server side for tracking acks. */
- struct oti_req_ack_lock {
- struct lustre_handle lock;
- __u32 mode;
- } oti_ack_locks[4];
- void *oti_handle;
- struct llog_cookie oti_onecookie;
- struct llog_cookie *oti_logcookies;
-
- /** VBR: versions */
- __u64 oti_pre_version;
-};
-
/*
* Events signalled through obd_notify() upcall-chain.
*/
@@ -587,15 +534,14 @@ struct lvfs_run_ctxt {
struct obd_device {
struct obd_type *obd_type;
- __u32 obd_magic;
+ u32 obd_magic; /* OBD_DEVICE_MAGIC */
+ int obd_minor; /* device number: lctl dl */
+ struct lu_device *obd_lu_dev;
/* common and UUID name of this device */
- char obd_name[MAX_OBD_NAME];
- struct obd_uuid obd_uuid;
-
- struct lu_device *obd_lu_dev;
+ struct obd_uuid obd_uuid;
+ char obd_name[MAX_OBD_NAME];
- int obd_minor;
/* bitfield modification is protected by obd_dev_lock */
unsigned long obd_attached:1, /* finished attach */
obd_set_up:1, /* finished setup */
@@ -619,22 +565,22 @@ struct obd_device {
unsigned long obd_recovery_expired:1;
/* uuid-export hash body */
struct cfs_hash *obd_uuid_hash;
- atomic_t obd_refcount;
wait_queue_head_t obd_refcount_waitq;
struct list_head obd_exports;
struct list_head obd_unlinked_exports;
struct list_head obd_delayed_exports;
+ atomic_t obd_refcount;
int obd_num_exports;
spinlock_t obd_nid_lock;
struct ldlm_namespace *obd_namespace;
struct ptlrpc_client obd_ldlm_client; /* XXX OST/MDS only */
/* a spinlock is OK for what we do now, may need a semaphore later */
spinlock_t obd_dev_lock; /* protect OBD bitfield above */
- struct mutex obd_dev_mutex;
- __u64 obd_last_committed;
spinlock_t obd_osfs_lock;
struct obd_statfs obd_osfs; /* locked by obd_osfs_lock */
__u64 obd_osfs_age;
+ u64 obd_last_committed;
+ struct mutex obd_dev_mutex;
struct lvfs_run_ctxt obd_lvfs_ctxt;
struct obd_llog_group obd_olg; /* default llog group */
struct obd_device *obd_observer;
@@ -648,12 +594,13 @@ struct obd_device {
struct lov_obd lov;
struct lmv_obd lmv;
} u;
+
/* Fields used by LProcFS */
- unsigned int obd_cntr_base;
- struct lprocfs_stats *obd_stats;
+ struct lprocfs_stats *obd_stats;
+ unsigned int obd_cntr_base;
- unsigned int md_cntr_base;
- struct lprocfs_stats *md_stats;
+ struct lprocfs_stats *md_stats;
+ unsigned int md_cntr_base;
struct dentry *obd_debugfs_entry;
struct dentry *obd_svc_debugfs_entry;
@@ -665,9 +612,11 @@ struct obd_device {
/**
* Ldlm pool part. Save last calculated SLV and Limit.
*/
- rwlock_t obd_pool_lock;
- int obd_pool_limit;
- __u64 obd_pool_slv;
+ rwlock_t obd_pool_lock;
+ u64 obd_pool_slv;
+ int obd_pool_limit;
+
+ int obd_conn_inprogress;
/**
* A list of outstanding class_incref()'s against this obd. For
@@ -675,19 +624,10 @@ struct obd_device {
*/
struct lu_ref obd_reference;
- int obd_conn_inprogress;
-
struct kobject obd_kobj; /* sysfs object */
struct completion obd_kobj_unregister;
};
-enum obd_cleanup_stage {
-/* Special case hack for MDS LOVs */
- OBD_CLEANUP_EARLY,
-/* can be directly mapped to .ldto_device_fini() */
- OBD_CLEANUP_EXPORTS,
-};
-
/* get/set_info keys */
#define KEY_ASYNC "async"
#define KEY_CHANGELOG_CLEAR "changelog_clear"
@@ -704,7 +644,6 @@ enum obd_cleanup_stage {
#define KEY_INTERMDS "inter_mds"
#define KEY_LAST_ID "last_id"
#define KEY_LAST_FID "last_fid"
-#define KEY_LOVDESC "lovdesc"
#define KEY_MAX_EASIZE "max_easize"
#define KEY_DEFAULT_EASIZE "default_easize"
#define KEY_MGSSEC "mgssec"
@@ -720,22 +659,6 @@ enum obd_cleanup_stage {
struct lu_context;
-/* /!\ must be coherent with include/linux/namei.h on patched kernel */
-#define IT_OPEN (1 << 0)
-#define IT_CREAT (1 << 1)
-#define IT_READDIR (1 << 2)
-#define IT_GETATTR (1 << 3)
-#define IT_LOOKUP (1 << 4)
-#define IT_UNLINK (1 << 5)
-#define IT_TRUNC (1 << 6)
-#define IT_GETXATTR (1 << 7)
-#define IT_EXEC (1 << 8)
-#define IT_PIN (1 << 9)
-#define IT_LAYOUT (1 << 10)
-#define IT_QUOTA_DQACQ (1 << 11)
-#define IT_QUOTA_CONN (1 << 12)
-#define IT_SETXATTR (1 << 13)
-
static inline int it_to_lock_mode(struct lookup_intent *it)
{
/* CREAT needs to be tested before open (both could be set) */
@@ -755,6 +678,14 @@ static inline int it_to_lock_mode(struct lookup_intent *it)
return -EINVAL;
}
+enum md_op_flags {
+ MF_MDC_CANCEL_FID1 = BIT(0),
+ MF_MDC_CANCEL_FID2 = BIT(1),
+ MF_MDC_CANCEL_FID3 = BIT(2),
+ MF_MDC_CANCEL_FID4 = BIT(3),
+ MF_GET_MDT_IDX = BIT(4),
+};
+
enum md_cli_flags {
CLI_SET_MEA = BIT(0),
CLI_RM_ENTRY = BIT(1),
@@ -789,8 +720,6 @@ struct md_op_data {
__u64 op_valid;
loff_t op_attr_blocks;
- /* Size-on-MDS epoch and flags. */
- __u64 op_ioepoch;
__u32 op_flags;
/* Various operation flags. */
@@ -839,15 +768,13 @@ struct obd_ops {
int (*iocontrol)(unsigned int cmd, struct obd_export *exp, int len,
void *karg, void __user *uarg);
int (*get_info)(const struct lu_env *env, struct obd_export *,
- __u32 keylen, void *key, __u32 *vallen, void *val,
- struct lov_stripe_md *lsm);
+ __u32 keylen, void *key, __u32 *vallen, void *val);
int (*set_info_async)(const struct lu_env *, struct obd_export *,
__u32 keylen, void *key,
__u32 vallen, void *val,
struct ptlrpc_request_set *set);
int (*setup)(struct obd_device *dev, struct lustre_cfg *cfg);
- int (*precleanup)(struct obd_device *dev,
- enum obd_cleanup_stage cleanup_stage);
+ int (*precleanup)(struct obd_device *dev);
int (*cleanup)(struct obd_device *dev);
int (*process_config)(struct obd_device *dev, u32 len, void *data);
int (*postrecov)(struct obd_device *dev);
@@ -887,35 +814,23 @@ struct obd_ops {
struct obd_statfs *osfs, __u64 max_age, __u32 flags);
int (*statfs_async)(struct obd_export *exp, struct obd_info *oinfo,
__u64 max_age, struct ptlrpc_request_set *set);
- int (*packmd)(struct obd_export *exp, struct lov_mds_md **disk_tgt,
- struct lov_stripe_md *mem_src);
- int (*unpackmd)(struct obd_export *exp,
- struct lov_stripe_md **mem_tgt,
- struct lov_mds_md *disk_src, int disk_len);
int (*create)(const struct lu_env *env, struct obd_export *exp,
- struct obdo *oa, struct obd_trans_info *oti);
+ struct obdo *oa);
int (*destroy)(const struct lu_env *env, struct obd_export *exp,
- struct obdo *oa, struct obd_trans_info *oti);
+ struct obdo *oa);
int (*setattr)(const struct lu_env *, struct obd_export *exp,
- struct obd_info *oinfo, struct obd_trans_info *oti);
- int (*setattr_async)(struct obd_export *exp, struct obd_info *oinfo,
- struct obd_trans_info *oti,
- struct ptlrpc_request_set *rqset);
+ struct obdo *oa);
int (*getattr)(const struct lu_env *env, struct obd_export *exp,
- struct obd_info *oinfo);
- int (*getattr_async)(struct obd_export *exp, struct obd_info *oinfo,
- struct ptlrpc_request_set *set);
+ struct obdo *oa);
int (*preprw)(const struct lu_env *env, int cmd,
struct obd_export *exp, struct obdo *oa, int objcount,
struct obd_ioobj *obj, struct niobuf_remote *remote,
- int *nr_pages, struct niobuf_local *local,
- struct obd_trans_info *oti);
+ int *nr_pages, struct niobuf_local *local);
int (*commitrw)(const struct lu_env *env, int cmd,
struct obd_export *exp, struct obdo *oa,
int objcount, struct obd_ioobj *obj,
struct niobuf_remote *remote, int pages,
- struct niobuf_local *local,
- struct obd_trans_info *oti, int rc);
+ struct niobuf_local *local, int rc);
int (*init_export)(struct obd_export *exp);
int (*destroy_export)(struct obd_export *exp);
@@ -930,8 +845,6 @@ struct obd_ops {
struct obd_uuid *(*get_uuid)(struct obd_export *exp);
/* quota methods */
- int (*quotacheck)(struct obd_device *, struct obd_export *,
- struct obd_quotactl *);
int (*quotactl)(struct obd_device *, struct obd_export *,
struct obd_quotactl *);
@@ -954,7 +867,7 @@ struct obd_ops {
/* lmv structures */
struct lustre_md {
struct mdt_body *body;
- struct lov_stripe_md *lsm;
+ struct lu_buf layout;
struct lmv_stripe_md *lmv;
#ifdef CONFIG_FS_POSIX_ACL
struct posix_acl *posix_acl;
@@ -992,10 +905,8 @@ struct md_ops {
int (*create)(struct obd_export *, struct md_op_data *,
const void *, size_t, umode_t, uid_t, gid_t,
cfs_cap_t, __u64, struct ptlrpc_request **);
- int (*done_writing)(struct obd_export *, struct md_op_data *,
- struct md_open_data *);
int (*enqueue)(struct obd_export *, struct ldlm_enqueue_info *,
- const ldlm_policy_data_t *,
+ const union ldlm_policy_data *,
struct lookup_intent *, struct md_op_data *,
struct lustre_handle *, __u64);
int (*getattr)(struct obd_export *, struct md_op_data *,
@@ -1012,8 +923,7 @@ struct md_ops {
const char *, size_t, const char *, size_t,
struct ptlrpc_request **);
int (*setattr)(struct obd_export *, struct md_op_data *, void *,
- size_t, void *, size_t, struct ptlrpc_request **,
- struct md_open_data **mod);
+ size_t, struct ptlrpc_request **);
int (*sync)(struct obd_export *, const struct lu_fid *,
struct ptlrpc_request **);
int (*read_page)(struct obd_export *, struct md_op_data *,
@@ -1030,7 +940,7 @@ struct md_ops {
u64, const char *, const char *, int, int, int,
struct ptlrpc_request **);
- int (*init_ea_size)(struct obd_export *, u32, u32, u32, u32);
+ int (*init_ea_size)(struct obd_export *, u32, u32);
int (*get_lustre_md)(struct obd_export *, struct ptlrpc_request *,
struct obd_export *, struct obd_export *,
@@ -1052,11 +962,11 @@ struct md_ops {
enum ldlm_mode (*lock_match)(struct obd_export *, __u64,
const struct lu_fid *, enum ldlm_type,
- ldlm_policy_data_t *, enum ldlm_mode,
+ union ldlm_policy_data *, enum ldlm_mode,
struct lustre_handle *);
int (*cancel_unused)(struct obd_export *, const struct lu_fid *,
- ldlm_policy_data_t *, enum ldlm_mode,
+ union ldlm_policy_data *, enum ldlm_mode,
enum ldlm_cancel_flags flags, void *opaque);
int (*get_fid_from_lsm)(struct obd_export *,
@@ -1071,6 +981,8 @@ struct md_ops {
int (*revalidate_lock)(struct obd_export *, struct lookup_intent *,
struct lu_fid *, __u64 *bits);
+ int (*unpackmd)(struct obd_export *exp, struct lmv_stripe_md **plsm,
+ const union lmv_mds_md *lmv, size_t lmv_size);
/*
* NOTE: If adding ops, add another LPROCFS_MD_OP_INIT() line to
* lprocfs_alloc_md_stats() in obdclass/lprocfs_status.c. Also, add a
@@ -1078,33 +990,6 @@ struct md_ops {
*/
};
-struct lsm_operations {
- void (*lsm_free)(struct lov_stripe_md *);
- void (*lsm_stripe_by_index)(struct lov_stripe_md *, int *, u64 *,
- u64 *);
- void (*lsm_stripe_by_offset)(struct lov_stripe_md *, int *, u64 *,
- u64 *);
- int (*lsm_lmm_verify)(struct lov_mds_md *lmm, int lmm_bytes,
- __u16 *stripe_count);
- int (*lsm_unpackmd)(struct lov_obd *lov, struct lov_stripe_md *lsm,
- struct lov_mds_md *lmm);
-};
-
-extern const struct lsm_operations lsm_v1_ops;
-extern const struct lsm_operations lsm_v3_ops;
-static inline const struct lsm_operations *lsm_op_find(int magic)
-{
- switch (magic) {
- case LOV_MAGIC_V1:
- return &lsm_v1_ops;
- case LOV_MAGIC_V3:
- return &lsm_v3_ops;
- default:
- CERROR("Cannot recognize lsm_magic %08x\n", magic);
- return NULL;
- }
-}
-
static inline struct md_open_data *obd_mod_alloc(void)
{
struct md_open_data *mod;
diff --git a/drivers/staging/lustre/lustre/include/obd_class.h b/drivers/staging/lustre/lustre/include/obd_class.h
index 16094dbec08b..7ec25202cd22 100644
--- a/drivers/staging/lustre/lustre/include/obd_class.h
+++ b/drivers/staging/lustre/lustre/include/obd_class.h
@@ -100,6 +100,13 @@ int obd_get_request_slot(struct client_obd *cli);
void obd_put_request_slot(struct client_obd *cli);
__u32 obd_get_max_rpcs_in_flight(struct client_obd *cli);
int obd_set_max_rpcs_in_flight(struct client_obd *cli, __u32 max);
+int obd_set_max_mod_rpcs_in_flight(struct client_obd *cli, u16 max);
+int obd_mod_rpc_stats_seq_show(struct client_obd *cli, struct seq_file *seq);
+
+u16 obd_get_mod_rpc_slot(struct client_obd *cli, u32 opc,
+ struct lookup_intent *it);
+void obd_put_mod_rpc_slot(struct client_obd *cli, u32 opc,
+ struct lookup_intent *it, u16 tag);
struct llog_handle;
struct llog_rec_hdr;
@@ -175,10 +182,13 @@ struct lustre_profile {
char *lp_profile;
char *lp_dt;
char *lp_md;
+ int lp_refs;
+ bool lp_list_deleted;
};
struct lustre_profile *class_get_profile(const char *prof);
void class_del_profile(const char *prof);
+void class_put_profile(struct lustre_profile *lprof);
void class_del_profiles(void);
#if LUSTRE_TRACKS_LOCK_EXP_REFS
@@ -269,10 +279,8 @@ static inline int lprocfs_climp_check(struct obd_device *obd)
struct inode;
struct lu_attr;
struct obdo;
-void obdo_refresh_inode(struct inode *dst, const struct obdo *src, u32 valid);
void obdo_to_ioobj(const struct obdo *oa, struct obd_ioobj *ioobj);
-void md_from_obdo(struct md_op_data *op_data, const struct obdo *oa, u32 valid);
#define OBT(dev) (dev)->obd_type
#define OBP(dev, op) (dev)->obd_type->typ_dt_ops->op
@@ -417,16 +425,14 @@ static inline int class_devno_max(void)
static inline int obd_get_info(const struct lu_env *env,
struct obd_export *exp, __u32 keylen,
- void *key, __u32 *vallen, void *val,
- struct lov_stripe_md *lsm)
+ void *key, __u32 *vallen, void *val)
{
int rc;
EXP_CHECK_DT_OP(exp, get_info);
EXP_COUNTER_INCREMENT(exp, get_info);
- rc = OBP(exp->exp_obd, get_info)(env, exp, keylen, key, vallen, val,
- lsm);
+ rc = OBP(exp->exp_obd, get_info)(env, exp, keylen, key, vallen, val);
return rc;
}
@@ -505,8 +511,7 @@ static inline int obd_setup(struct obd_device *obd, struct lustre_cfg *cfg)
return rc;
}
-static inline int obd_precleanup(struct obd_device *obd,
- enum obd_cleanup_stage cleanup_stage)
+static inline int obd_precleanup(struct obd_device *obd)
{
int rc;
DECLARE_LU_VARS(ldt, d);
@@ -517,20 +522,18 @@ static inline int obd_precleanup(struct obd_device *obd,
ldt = obd->obd_type->typ_lu;
d = obd->obd_lu_dev;
if (ldt && d) {
- if (cleanup_stage == OBD_CLEANUP_EXPORTS) {
- struct lu_env env;
+ struct lu_env env;
- rc = lu_env_init(&env, ldt->ldt_ctx_tags);
- if (rc == 0) {
- ldt->ldt_ops->ldto_device_fini(&env, d);
- lu_env_fini(&env);
- }
+ rc = lu_env_init(&env, ldt->ldt_ctx_tags);
+ if (!rc) {
+ ldt->ldt_ops->ldto_device_fini(&env, d);
+ lu_env_fini(&env);
}
}
OBD_CHECK_DT_OP(obd, precleanup, 0);
OBD_COUNTER_INCREMENT(obd, precleanup);
- rc = OBP(obd, precleanup)(obd, cleanup_stage);
+ rc = OBP(obd, precleanup)(obd);
return rc;
}
@@ -612,181 +615,51 @@ obd_process_config(struct obd_device *obd, int datalen, void *data)
return rc;
}
-/* Pack an in-memory MD struct for storage on disk.
- * Returns +ve size of packed MD (0 for free), or -ve error.
- *
- * If @disk_tgt == NULL, MD size is returned (max size if @mem_src == NULL).
- * If @*disk_tgt != NULL and @mem_src == NULL, @*disk_tgt will be freed.
- * If @*disk_tgt == NULL, it will be allocated
- */
-static inline int obd_packmd(struct obd_export *exp,
- struct lov_mds_md **disk_tgt,
- struct lov_stripe_md *mem_src)
-{
- int rc;
-
- EXP_CHECK_DT_OP(exp, packmd);
- EXP_COUNTER_INCREMENT(exp, packmd);
-
- rc = OBP(exp->exp_obd, packmd)(exp, disk_tgt, mem_src);
- return rc;
-}
-
-static inline int obd_size_diskmd(struct obd_export *exp,
- struct lov_stripe_md *mem_src)
-{
- return obd_packmd(exp, NULL, mem_src);
-}
-
-static inline int obd_free_diskmd(struct obd_export *exp,
- struct lov_mds_md **disk_tgt)
-{
- LASSERT(disk_tgt);
- LASSERT(*disk_tgt);
- /*
- * LU-2590, for caller's convenience, *disk_tgt could be host
- * endianness, it needs swab to LE if necessary, while just
- * lov_mds_md header needs it for figuring out how much memory
- * needs to be freed.
- */
- if ((cpu_to_le32(LOV_MAGIC) != LOV_MAGIC) &&
- (((*disk_tgt)->lmm_magic == LOV_MAGIC_V1) ||
- ((*disk_tgt)->lmm_magic == LOV_MAGIC_V3)))
- lustre_swab_lov_mds_md(*disk_tgt);
- return obd_packmd(exp, disk_tgt, NULL);
-}
-
-/* Unpack an MD struct from disk to in-memory format.
- * Returns +ve size of unpacked MD (0 for free), or -ve error.
- *
- * If @mem_tgt == NULL, MD size is returned (max size if @disk_src == NULL).
- * If @*mem_tgt != NULL and @disk_src == NULL, @*mem_tgt will be freed.
- * If @*mem_tgt == NULL, it will be allocated
- */
-static inline int obd_unpackmd(struct obd_export *exp,
- struct lov_stripe_md **mem_tgt,
- struct lov_mds_md *disk_src,
- int disk_len)
-{
- int rc;
-
- EXP_CHECK_DT_OP(exp, unpackmd);
- EXP_COUNTER_INCREMENT(exp, unpackmd);
-
- rc = OBP(exp->exp_obd, unpackmd)(exp, mem_tgt, disk_src, disk_len);
- return rc;
-}
-
-static inline int obd_free_memmd(struct obd_export *exp,
- struct lov_stripe_md **mem_tgt)
-{
- int rc;
-
- LASSERT(mem_tgt);
- LASSERT(*mem_tgt);
- rc = obd_unpackmd(exp, mem_tgt, NULL, 0);
- *mem_tgt = NULL;
- return rc;
-}
-
static inline int obd_create(const struct lu_env *env, struct obd_export *exp,
- struct obdo *obdo, struct obd_trans_info *oti)
+ struct obdo *obdo)
{
int rc;
EXP_CHECK_DT_OP(exp, create);
EXP_COUNTER_INCREMENT(exp, create);
- rc = OBP(exp->exp_obd, create)(env, exp, obdo, oti);
+ rc = OBP(exp->exp_obd, create)(env, exp, obdo);
return rc;
}
static inline int obd_destroy(const struct lu_env *env, struct obd_export *exp,
- struct obdo *obdo, struct obd_trans_info *oti)
+ struct obdo *obdo)
{
int rc;
EXP_CHECK_DT_OP(exp, destroy);
EXP_COUNTER_INCREMENT(exp, destroy);
- rc = OBP(exp->exp_obd, destroy)(env, exp, obdo, oti);
+ rc = OBP(exp->exp_obd, destroy)(env, exp, obdo);
return rc;
}
static inline int obd_getattr(const struct lu_env *env, struct obd_export *exp,
- struct obd_info *oinfo)
+ struct obdo *oa)
{
int rc;
EXP_CHECK_DT_OP(exp, getattr);
EXP_COUNTER_INCREMENT(exp, getattr);
- rc = OBP(exp->exp_obd, getattr)(env, exp, oinfo);
- return rc;
-}
-
-static inline int obd_getattr_async(struct obd_export *exp,
- struct obd_info *oinfo,
- struct ptlrpc_request_set *set)
-{
- int rc;
-
- EXP_CHECK_DT_OP(exp, getattr_async);
- EXP_COUNTER_INCREMENT(exp, getattr_async);
-
- rc = OBP(exp->exp_obd, getattr_async)(exp, oinfo, set);
+ rc = OBP(exp->exp_obd, getattr)(env, exp, oa);
return rc;
}
static inline int obd_setattr(const struct lu_env *env, struct obd_export *exp,
- struct obd_info *oinfo,
- struct obd_trans_info *oti)
+ struct obdo *oa)
{
int rc;
EXP_CHECK_DT_OP(exp, setattr);
EXP_COUNTER_INCREMENT(exp, setattr);
- rc = OBP(exp->exp_obd, setattr)(env, exp, oinfo, oti);
- return rc;
-}
-
-/* This performs all the requests set init/wait/destroy actions. */
-static inline int obd_setattr_rqset(struct obd_export *exp,
- struct obd_info *oinfo,
- struct obd_trans_info *oti)
-{
- struct ptlrpc_request_set *set = NULL;
- int rc;
-
- EXP_CHECK_DT_OP(exp, setattr_async);
- EXP_COUNTER_INCREMENT(exp, setattr_async);
-
- set = ptlrpc_prep_set();
- if (!set)
- return -ENOMEM;
-
- rc = OBP(exp->exp_obd, setattr_async)(exp, oinfo, oti, set);
- if (rc == 0)
- rc = ptlrpc_set_wait(set);
- ptlrpc_set_destroy(set);
- return rc;
-}
-
-/* This adds all the requests into @set if @set != NULL, otherwise
- * all requests are sent asynchronously without waiting for response.
- */
-static inline int obd_setattr_async(struct obd_export *exp,
- struct obd_info *oinfo,
- struct obd_trans_info *oti,
- struct ptlrpc_request_set *set)
-{
- int rc;
-
- EXP_CHECK_DT_OP(exp, setattr_async);
- EXP_COUNTER_INCREMENT(exp, setattr_async);
-
- rc = OBP(exp->exp_obd, setattr_async)(exp, oinfo, oti, set);
+ rc = OBP(exp->exp_obd, setattr)(env, exp, oa);
return rc;
}
@@ -1053,15 +926,16 @@ static inline int obd_statfs_rqset(struct obd_export *exp,
__u32 flags)
{
struct ptlrpc_request_set *set = NULL;
- struct obd_info oinfo = { };
+ struct obd_info oinfo = {
+ .oi_osfs = osfs,
+ .oi_flags = flags,
+ };
int rc = 0;
- set = ptlrpc_prep_set();
+ set = ptlrpc_prep_set();
if (!set)
return -ENOMEM;
- oinfo.oi_osfs = osfs;
- oinfo.oi_flags = flags;
rc = obd_statfs_async(exp, &oinfo, max_age, set);
if (rc == 0)
rc = ptlrpc_set_wait(set);
@@ -1112,8 +986,7 @@ static inline int obd_preprw(const struct lu_env *env, int cmd,
struct obd_export *exp, struct obdo *oa,
int objcount, struct obd_ioobj *obj,
struct niobuf_remote *remote, int *pages,
- struct niobuf_local *local,
- struct obd_trans_info *oti)
+ struct niobuf_local *local)
{
int rc;
@@ -1121,7 +994,7 @@ static inline int obd_preprw(const struct lu_env *env, int cmd,
EXP_COUNTER_INCREMENT(exp, preprw);
rc = OBP(exp->exp_obd, preprw)(env, cmd, exp, oa, objcount, obj, remote,
- pages, local, oti);
+ pages, local);
return rc;
}
@@ -1129,14 +1002,13 @@ static inline int obd_commitrw(const struct lu_env *env, int cmd,
struct obd_export *exp, struct obdo *oa,
int objcount, struct obd_ioobj *obj,
struct niobuf_remote *rnb, int pages,
- struct niobuf_local *local,
- struct obd_trans_info *oti, int rc)
+ struct niobuf_local *local, int rc)
{
EXP_CHECK_DT_OP(exp, commitrw);
EXP_COUNTER_INCREMENT(exp, commitrw);
rc = OBP(exp->exp_obd, commitrw)(env, cmd, exp, oa, objcount, obj,
- rnb, pages, local, oti, rc);
+ rnb, pages, local, rc);
return rc;
}
@@ -1219,18 +1091,6 @@ static inline int obd_notify_observer(struct obd_device *observer,
return rc1 ? rc1 : rc2;
}
-static inline int obd_quotacheck(struct obd_export *exp,
- struct obd_quotactl *oqctl)
-{
- int rc;
-
- EXP_CHECK_DT_OP(exp, quotacheck);
- EXP_COUNTER_INCREMENT(exp, quotacheck);
-
- rc = OBP(exp->exp_obd, quotacheck)(exp->exp_obd, exp, oqctl);
- return rc;
-}
-
static inline int obd_quotactl(struct obd_export *exp,
struct obd_quotactl *oqctl)
{
@@ -1346,21 +1206,9 @@ static inline int md_create(struct obd_export *exp, struct md_op_data *op_data,
return rc;
}
-static inline int md_done_writing(struct obd_export *exp,
- struct md_op_data *op_data,
- struct md_open_data *mod)
-{
- int rc;
-
- EXP_CHECK_MD_OP(exp, done_writing);
- EXP_MD_COUNTER_INCREMENT(exp, done_writing);
- rc = MDP(exp->exp_obd, done_writing)(exp, op_data, mod);
- return rc;
-}
-
static inline int md_enqueue(struct obd_export *exp,
struct ldlm_enqueue_info *einfo,
- const ldlm_policy_data_t *policy,
+ const union ldlm_policy_data *policy,
struct lookup_intent *it,
struct md_op_data *op_data,
struct lustre_handle *lockh,
@@ -1428,16 +1276,14 @@ static inline int md_rename(struct obd_export *exp, struct md_op_data *op_data,
}
static inline int md_setattr(struct obd_export *exp, struct md_op_data *op_data,
- void *ea, size_t ealen, void *ea2, size_t ea2len,
- struct ptlrpc_request **request,
- struct md_open_data **mod)
+ void *ea, size_t ealen,
+ struct ptlrpc_request **request)
{
int rc;
EXP_CHECK_MD_OP(exp, setattr);
EXP_MD_COUNTER_INCREMENT(exp, setattr);
- rc = MDP(exp->exp_obd, setattr)(exp, op_data, ea, ealen,
- ea2, ea2len, request, mod);
+ rc = MDP(exp->exp_obd, setattr)(exp, op_data, ea, ealen, request);
return rc;
}
@@ -1561,7 +1407,7 @@ static inline int md_set_lock_data(struct obd_export *exp,
static inline int md_cancel_unused(struct obd_export *exp,
const struct lu_fid *fid,
- ldlm_policy_data_t *policy,
+ union ldlm_policy_data *policy,
enum ldlm_mode mode,
enum ldlm_cancel_flags flags,
void *opaque)
@@ -1579,7 +1425,7 @@ static inline int md_cancel_unused(struct obd_export *exp,
static inline enum ldlm_mode md_lock_match(struct obd_export *exp, __u64 flags,
const struct lu_fid *fid,
enum ldlm_type type,
- ldlm_policy_data_t *policy,
+ union ldlm_policy_data *policy,
enum ldlm_mode mode,
struct lustre_handle *lockh)
{
@@ -1589,14 +1435,12 @@ static inline enum ldlm_mode md_lock_match(struct obd_export *exp, __u64 flags,
policy, mode, lockh);
}
-static inline int md_init_ea_size(struct obd_export *exp, int easize,
- int def_asize, int cookiesize,
- int def_cookiesize)
+static inline int md_init_ea_size(struct obd_export *exp, u32 easize,
+ u32 def_asize)
{
EXP_CHECK_MD_OP(exp, init_ea_size);
EXP_MD_COUNTER_INCREMENT(exp, init_ea_size);
- return MDP(exp->exp_obd, init_ea_size)(exp, easize, def_asize,
- cookiesize, def_cookiesize);
+ return MDP(exp->exp_obd, init_ea_size)(exp, easize, def_asize);
}
static inline int md_intent_getattr_async(struct obd_export *exp,
@@ -1636,6 +1480,24 @@ static inline int md_get_fid_from_lsm(struct obd_export *exp,
return rc;
}
+/* Unpack an MD struct from disk to in-memory format.
+ * Returns +ve size of unpacked MD (0 for free), or -ve error.
+ *
+ * If *plsm != NULL and lmm == NULL then *lsm will be freed.
+ * If *plsm == NULL then it will be allocated.
+ */
+static inline int md_unpackmd(struct obd_export *exp,
+ struct lmv_stripe_md **plsm,
+ const union lmv_mds_md *lmm, size_t lmm_size)
+{
+ int rc;
+
+ EXP_CHECK_MD_OP(exp, unpackmd);
+ EXP_MD_COUNTER_INCREMENT(exp, unpackmd);
+ rc = MDP(exp->exp_obd, unpackmd)(exp, plsm, lmm, lmm_size);
+ return rc;
+}
+
/* OBD Metadata Support */
int obd_init_caches(void);
diff --git a/drivers/staging/lustre/lustre/include/obd_support.h b/drivers/staging/lustre/lustre/include/obd_support.h
index b346a7f10aa4..aaedec7d793c 100644
--- a/drivers/staging/lustre/lustre/include/obd_support.h
+++ b/drivers/staging/lustre/lustre/include/obd_support.h
@@ -172,14 +172,14 @@ extern char obd_jobid_var[];
#define OBD_FAIL_MDS_ALL_REQUEST_NET 0x123
#define OBD_FAIL_MDS_SYNC_NET 0x124
#define OBD_FAIL_MDS_SYNC_PACK 0x125
-#define OBD_FAIL_MDS_DONE_WRITING_NET 0x126
-#define OBD_FAIL_MDS_DONE_WRITING_PACK 0x127
+/* OBD_FAIL_MDS_DONE_WRITING_NET 0x126 obsolete since 2.8.0 */
+/* OBD_FAIL_MDS_DONE_WRITING_PACK 0x127 obsolete since 2.8.0 */
#define OBD_FAIL_MDS_ALLOC_OBDO 0x128
#define OBD_FAIL_MDS_PAUSE_OPEN 0x129
#define OBD_FAIL_MDS_STATFS_LCW_SLEEP 0x12a
#define OBD_FAIL_MDS_OPEN_CREATE 0x12b
#define OBD_FAIL_MDS_OST_SETATTR 0x12c
-#define OBD_FAIL_MDS_QUOTACHECK_NET 0x12d
+/* OBD_FAIL_MDS_QUOTACHECK_NET 0x12d obsolete since 2.4 */
#define OBD_FAIL_MDS_QUOTACTL_NET 0x12e
#define OBD_FAIL_MDS_CLIENT_ADD 0x12f
#define OBD_FAIL_MDS_GETXATTR_NET 0x130
@@ -264,7 +264,7 @@ extern char obd_jobid_var[];
#define OBD_FAIL_OST_ENOSPC 0x215
#define OBD_FAIL_OST_EROFS 0x216
#define OBD_FAIL_OST_ENOENT 0x217
-#define OBD_FAIL_OST_QUOTACHECK_NET 0x218
+/* OBD_FAIL_OST_QUOTACHECK_NET 0x218 obsolete since 2.4 */
#define OBD_FAIL_OST_QUOTACTL_NET 0x219
#define OBD_FAIL_OST_CHECKSUM_RECEIVE 0x21a
#define OBD_FAIL_OST_CHECKSUM_SEND 0x21b
@@ -321,6 +321,8 @@ extern char obd_jobid_var[];
#define OBD_FAIL_LDLM_CP_CB_WAIT4 0x322
#define OBD_FAIL_LDLM_CP_CB_WAIT5 0x323
+#define OBD_FAIL_LDLM_GRANT_CHECK 0x32a
+
/* LOCKLESS IO */
#define OBD_FAIL_LDLM_SET_CONTENTION 0x385
@@ -343,6 +345,7 @@ extern char obd_jobid_var[];
#define OBD_FAIL_OSC_CP_ENQ_RACE 0x410
#define OBD_FAIL_OSC_NO_GRANT 0x411
#define OBD_FAIL_OSC_DELAY_SETTIME 0x412
+#define OBD_FAIL_OSC_DELAY_IO 0x414
#define OBD_FAIL_PTLRPC 0x500
#define OBD_FAIL_PTLRPC_ACK 0x501
@@ -373,7 +376,7 @@ extern char obd_jobid_var[];
#define OBD_FAIL_OBD_PING_NET 0x600
#define OBD_FAIL_OBD_LOG_CANCEL_NET 0x601
#define OBD_FAIL_OBD_LOGD_NET 0x602
-#define OBD_FAIL_OBD_QC_CALLBACK_NET 0x603
+/* OBD_FAIL_OBD_QC_CALLBACK_NET 0x603 obsolete since 2.4 */
#define OBD_FAIL_OBD_DQACQ 0x604
#define OBD_FAIL_OBD_LLOG_SETUP 0x605
#define OBD_FAIL_OBD_LOG_CANCEL_REP 0x606
@@ -458,6 +461,8 @@ extern char obd_jobid_var[];
#define OBD_FAIL_LOV_INIT 0x1403
#define OBD_FAIL_GLIMPSE_DELAY 0x1404
#define OBD_FAIL_LLITE_XATTR_ENOMEM 0x1405
+#define OBD_FAIL_MAKE_LOVEA_HOLE 0x1406
+#define OBD_FAIL_LLITE_LOST_LAYOUT 0x1407
#define OBD_FAIL_GETATTR_DELAY 0x1409
#define OBD_FAIL_FID_INDIR 0x1501
diff --git a/drivers/staging/lustre/lustre/include/seq_range.h b/drivers/staging/lustre/lustre/include/seq_range.h
new file mode 100644
index 000000000000..30c4dd66d5c4
--- /dev/null
+++ b/drivers/staging/lustre/lustre/include/seq_range.h
@@ -0,0 +1,199 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.gnu.org/licenses/gpl-2.0.html
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2014, Intel Corporation.
+ *
+ * Copyright 2015 Cray Inc, all rights reserved.
+ * Author: Ben Evans.
+ *
+ * Define lu_seq_range associated functions
+ */
+
+#ifndef _SEQ_RANGE_H_
+#define _SEQ_RANGE_H_
+
+#include "lustre/lustre_idl.h"
+
+/**
+ * computes the sequence range type \a range
+ */
+
+static inline unsigned int fld_range_type(const struct lu_seq_range *range)
+{
+ return range->lsr_flags & LU_SEQ_RANGE_MASK;
+}
+
+/**
+ * Is this sequence range an OST? \a range
+ */
+
+static inline bool fld_range_is_ost(const struct lu_seq_range *range)
+{
+ return fld_range_type(range) == LU_SEQ_RANGE_OST;
+}
+
+/**
+ * Is this sequence range an MDT? \a range
+ */
+
+static inline bool fld_range_is_mdt(const struct lu_seq_range *range)
+{
+ return fld_range_type(range) == LU_SEQ_RANGE_MDT;
+}
+
+/**
+ * ANY range is only used when the fld client sends a fld query request,
+ * but it does not know whether the seq is an MDT or OST, so it will send the
+ * request with ANY type, which means any seq type from the lookup can be
+ * expected. /a range
+ */
+static inline unsigned int fld_range_is_any(const struct lu_seq_range *range)
+{
+ return fld_range_type(range) == LU_SEQ_RANGE_ANY;
+}
+
+/**
+ * Apply flags to range \a range \a flags
+ */
+
+static inline void fld_range_set_type(struct lu_seq_range *range,
+ unsigned int flags)
+{
+ range->lsr_flags |= flags;
+}
+
+/**
+ * Add MDT to range type \a range
+ */
+
+static inline void fld_range_set_mdt(struct lu_seq_range *range)
+{
+ fld_range_set_type(range, LU_SEQ_RANGE_MDT);
+}
+
+/**
+ * Add OST to range type \a range
+ */
+
+static inline void fld_range_set_ost(struct lu_seq_range *range)
+{
+ fld_range_set_type(range, LU_SEQ_RANGE_OST);
+}
+
+/**
+ * Add ANY to range type \a range
+ */
+
+static inline void fld_range_set_any(struct lu_seq_range *range)
+{
+ fld_range_set_type(range, LU_SEQ_RANGE_ANY);
+}
+
+/**
+ * computes width of given sequence range \a range
+ */
+
+static inline u64 lu_seq_range_space(const struct lu_seq_range *range)
+{
+ return range->lsr_end - range->lsr_start;
+}
+
+/**
+ * initialize range to zero \a range
+ */
+
+static inline void lu_seq_range_init(struct lu_seq_range *range)
+{
+ memset(range, 0, sizeof(*range));
+}
+
+/**
+ * check if given seq id \a s is within given range \a range
+ */
+
+static inline bool lu_seq_range_within(const struct lu_seq_range *range,
+ u64 seq)
+{
+ return seq >= range->lsr_start && seq < range->lsr_end;
+}
+
+/**
+ * Is the range sane? Is the end after the beginning? \a range
+ */
+
+static inline bool lu_seq_range_is_sane(const struct lu_seq_range *range)
+{
+ return range->lsr_end >= range->lsr_start;
+}
+
+/**
+ * Is the range 0? \a range
+ */
+
+static inline bool lu_seq_range_is_zero(const struct lu_seq_range *range)
+{
+ return range->lsr_start == 0 && range->lsr_end == 0;
+}
+
+/**
+ * Is the range out of space? \a range
+ */
+
+static inline bool lu_seq_range_is_exhausted(const struct lu_seq_range *range)
+{
+ return lu_seq_range_space(range) == 0;
+}
+
+/**
+ * return 0 if two ranges have the same location, nonzero if they are
+ * different \a r1 \a r2
+ */
+
+static inline int lu_seq_range_compare_loc(const struct lu_seq_range *r1,
+ const struct lu_seq_range *r2)
+{
+ return r1->lsr_index != r2->lsr_index ||
+ r1->lsr_flags != r2->lsr_flags;
+}
+
+#if !defined(__REQ_LAYOUT_USER__)
+/**
+ * byte swap range structure \a range
+ */
+
+void lustre_swab_lu_seq_range(struct lu_seq_range *range);
+#endif
+/**
+ * printf string and argument list for sequence range
+ */
+#define DRANGE "[%#16.16llx-%#16.16llx]:%x:%s"
+
+#define PRANGE(range) \
+ (range)->lsr_start, \
+ (range)->lsr_end, \
+ (range)->lsr_index, \
+ fld_range_is_mdt(range) ? "mdt" : "ost"
+
+#endif
diff --git a/drivers/staging/lustre/lustre/ldlm/ldlm_extent.c b/drivers/staging/lustre/lustre/ldlm/ldlm_extent.c
index ecf472e4813d..32b73ee62639 100644
--- a/drivers/staging/lustre/lustre/ldlm/ldlm_extent.c
+++ b/drivers/staging/lustre/lustre/ldlm/ldlm_extent.c
@@ -193,6 +193,26 @@ void ldlm_extent_add_lock(struct ldlm_resource *res,
* add the locks into grant list, for debug purpose, ..
*/
ldlm_resource_add_lock(res, &res->lr_granted, lock);
+
+ if (OBD_FAIL_CHECK(OBD_FAIL_LDLM_GRANT_CHECK)) {
+ struct ldlm_lock *lck;
+
+ list_for_each_entry_reverse(lck, &res->lr_granted,
+ l_res_link) {
+ if (lck == lock)
+ continue;
+ if (lockmode_compat(lck->l_granted_mode,
+ lock->l_granted_mode))
+ continue;
+ if (ldlm_extent_overlap(&lck->l_req_extent,
+ &lock->l_req_extent)) {
+ CDEBUG(D_ERROR, "granting conflicting lock %p %p\n",
+ lck, lock);
+ ldlm_resource_dump(D_ERROR, res);
+ LBUG();
+ }
+ }
+ }
}
/** Remove cancelled lock from resource interval tree. */
@@ -220,8 +240,8 @@ void ldlm_extent_unlink_lock(struct ldlm_lock *lock)
}
}
-void ldlm_extent_policy_wire_to_local(const ldlm_wire_policy_data_t *wpolicy,
- ldlm_policy_data_t *lpolicy)
+void ldlm_extent_policy_wire_to_local(const union ldlm_wire_policy_data *wpolicy,
+ union ldlm_policy_data *lpolicy)
{
memset(lpolicy, 0, sizeof(*lpolicy));
lpolicy->l_extent.start = wpolicy->l_extent.start;
@@ -229,8 +249,8 @@ void ldlm_extent_policy_wire_to_local(const ldlm_wire_policy_data_t *wpolicy,
lpolicy->l_extent.gid = wpolicy->l_extent.gid;
}
-void ldlm_extent_policy_local_to_wire(const ldlm_policy_data_t *lpolicy,
- ldlm_wire_policy_data_t *wpolicy)
+void ldlm_extent_policy_local_to_wire(const union ldlm_policy_data *lpolicy,
+ union ldlm_wire_policy_data *wpolicy)
{
memset(wpolicy, 0, sizeof(*wpolicy));
wpolicy->l_extent.start = lpolicy->l_extent.start;
diff --git a/drivers/staging/lustre/lustre/ldlm/ldlm_flock.c b/drivers/staging/lustre/lustre/ldlm/ldlm_flock.c
index 861f36f039b5..722160784f83 100644
--- a/drivers/staging/lustre/lustre/ldlm/ldlm_flock.c
+++ b/drivers/staging/lustre/lustre/ldlm/ldlm_flock.c
@@ -612,22 +612,8 @@ granted:
}
EXPORT_SYMBOL(ldlm_flock_completion_ast);
-void ldlm_flock_policy_wire18_to_local(const ldlm_wire_policy_data_t *wpolicy,
- ldlm_policy_data_t *lpolicy)
-{
- memset(lpolicy, 0, sizeof(*lpolicy));
- lpolicy->l_flock.start = wpolicy->l_flock.lfw_start;
- lpolicy->l_flock.end = wpolicy->l_flock.lfw_end;
- lpolicy->l_flock.pid = wpolicy->l_flock.lfw_pid;
- /* Compat code, old clients had no idea about owner field and
- * relied solely on pid for ownership. Introduced in LU-104, 2.1,
- * April 2011
- */
- lpolicy->l_flock.owner = wpolicy->l_flock.lfw_pid;
-}
-
-void ldlm_flock_policy_wire21_to_local(const ldlm_wire_policy_data_t *wpolicy,
- ldlm_policy_data_t *lpolicy)
+void ldlm_flock_policy_wire_to_local(const union ldlm_wire_policy_data *wpolicy,
+ union ldlm_policy_data *lpolicy)
{
memset(lpolicy, 0, sizeof(*lpolicy));
lpolicy->l_flock.start = wpolicy->l_flock.lfw_start;
@@ -636,8 +622,8 @@ void ldlm_flock_policy_wire21_to_local(const ldlm_wire_policy_data_t *wpolicy,
lpolicy->l_flock.owner = wpolicy->l_flock.lfw_owner;
}
-void ldlm_flock_policy_local_to_wire(const ldlm_policy_data_t *lpolicy,
- ldlm_wire_policy_data_t *wpolicy)
+void ldlm_flock_policy_local_to_wire(const union ldlm_policy_data *lpolicy,
+ union ldlm_wire_policy_data *wpolicy)
{
memset(wpolicy, 0, sizeof(*wpolicy));
wpolicy->l_flock.lfw_start = lpolicy->l_flock.start;
diff --git a/drivers/staging/lustre/lustre/ldlm/ldlm_inodebits.c b/drivers/staging/lustre/lustre/ldlm/ldlm_inodebits.c
index 79f4e6fa193e..8e1709dc073c 100644
--- a/drivers/staging/lustre/lustre/ldlm/ldlm_inodebits.c
+++ b/drivers/staging/lustre/lustre/ldlm/ldlm_inodebits.c
@@ -54,15 +54,15 @@
#include "../include/lustre_lib.h"
#include "ldlm_internal.h"
-void ldlm_ibits_policy_wire_to_local(const ldlm_wire_policy_data_t *wpolicy,
- ldlm_policy_data_t *lpolicy)
+void ldlm_ibits_policy_wire_to_local(const union ldlm_wire_policy_data *wpolicy,
+ union ldlm_policy_data *lpolicy)
{
memset(lpolicy, 0, sizeof(*lpolicy));
lpolicy->l_inodebits.bits = wpolicy->l_inodebits.bits;
}
-void ldlm_ibits_policy_local_to_wire(const ldlm_policy_data_t *lpolicy,
- ldlm_wire_policy_data_t *wpolicy)
+void ldlm_ibits_policy_local_to_wire(const union ldlm_policy_data *lpolicy,
+ union ldlm_wire_policy_data *wpolicy)
{
memset(wpolicy, 0, sizeof(*wpolicy));
wpolicy->l_inodebits.bits = lpolicy->l_inodebits.bits;
diff --git a/drivers/staging/lustre/lustre/ldlm/ldlm_internal.h b/drivers/staging/lustre/lustre/ldlm/ldlm_internal.h
index 5e82cfc245b2..5c02501d0560 100644
--- a/drivers/staging/lustre/lustre/ldlm/ldlm_internal.h
+++ b/drivers/staging/lustre/lustre/ldlm/ldlm_internal.h
@@ -39,13 +39,13 @@ extern struct list_head ldlm_srv_namespace_list;
extern struct mutex ldlm_cli_namespace_lock;
extern struct list_head ldlm_cli_active_namespace_list;
-static inline int ldlm_namespace_nr_read(ldlm_side_t client)
+static inline int ldlm_namespace_nr_read(enum ldlm_side client)
{
return client == LDLM_NAMESPACE_SERVER ?
ldlm_srv_namespace_nr : ldlm_cli_namespace_nr;
}
-static inline void ldlm_namespace_nr_inc(ldlm_side_t client)
+static inline void ldlm_namespace_nr_inc(enum ldlm_side client)
{
if (client == LDLM_NAMESPACE_SERVER)
ldlm_srv_namespace_nr++;
@@ -53,7 +53,7 @@ static inline void ldlm_namespace_nr_inc(ldlm_side_t client)
ldlm_cli_namespace_nr++;
}
-static inline void ldlm_namespace_nr_dec(ldlm_side_t client)
+static inline void ldlm_namespace_nr_dec(enum ldlm_side client)
{
if (client == LDLM_NAMESPACE_SERVER)
ldlm_srv_namespace_nr--;
@@ -61,13 +61,13 @@ static inline void ldlm_namespace_nr_dec(ldlm_side_t client)
ldlm_cli_namespace_nr--;
}
-static inline struct list_head *ldlm_namespace_list(ldlm_side_t client)
+static inline struct list_head *ldlm_namespace_list(enum ldlm_side client)
{
return client == LDLM_NAMESPACE_SERVER ?
&ldlm_srv_namespace_list : &ldlm_cli_active_namespace_list;
}
-static inline struct mutex *ldlm_namespace_lock(ldlm_side_t client)
+static inline struct mutex *ldlm_namespace_lock(enum ldlm_side client)
{
return client == LDLM_NAMESPACE_SERVER ?
&ldlm_srv_namespace_lock : &ldlm_cli_namespace_lock;
@@ -79,22 +79,23 @@ static inline int ldlm_ns_empty(struct ldlm_namespace *ns)
return atomic_read(&ns->ns_bref) == 0;
}
-void ldlm_namespace_move_to_active_locked(struct ldlm_namespace *, ldlm_side_t);
+void ldlm_namespace_move_to_active_locked(struct ldlm_namespace *,
+ enum ldlm_side);
void ldlm_namespace_move_to_inactive_locked(struct ldlm_namespace *,
- ldlm_side_t);
-struct ldlm_namespace *ldlm_namespace_first_locked(ldlm_side_t);
+ enum ldlm_side);
+struct ldlm_namespace *ldlm_namespace_first_locked(enum ldlm_side);
/* ldlm_request.c */
/* Cancel lru flag, it indicates we cancel aged locks. */
enum {
- LDLM_CANCEL_AGED = 1 << 0, /* Cancel aged locks (non lru resize). */
- LDLM_CANCEL_PASSED = 1 << 1, /* Cancel passed number of locks. */
- LDLM_CANCEL_SHRINK = 1 << 2, /* Cancel locks from shrinker. */
- LDLM_CANCEL_LRUR = 1 << 3, /* Cancel locks from lru resize. */
- LDLM_CANCEL_NO_WAIT = 1 << 4, /* Cancel locks w/o blocking (neither
- * sending nor waiting for any rpcs)
- */
- LDLM_CANCEL_LRUR_NO_WAIT = 1 << 5, /* LRUR + NO_WAIT */
+ LDLM_LRU_FLAG_AGED = BIT(0), /* Cancel aged locks (non lru resize). */
+ LDLM_LRU_FLAG_PASSED = BIT(1), /* Cancel passed number of locks. */
+ LDLM_LRU_FLAG_SHRINK = BIT(2), /* Cancel locks from shrinker. */
+ LDLM_LRU_FLAG_LRUR = BIT(3), /* Cancel locks from lru resize. */
+ LDLM_LRU_FLAG_NO_WAIT = BIT(4), /* Cancel locks w/o blocking (neither
+ * sending nor waiting for any rpcs)
+ */
+ LDLM_LRU_FLAG_LRUR_NO_WAIT = BIT(5), /* LRUR + NO_WAIT */
};
int ldlm_cancel_lru(struct ldlm_namespace *ns, int nr,
@@ -137,10 +138,10 @@ ldlm_lock_create(struct ldlm_namespace *ns, const struct ldlm_res_id *,
void *data, __u32 lvb_len, enum lvb_type lvb_type);
enum ldlm_error ldlm_lock_enqueue(struct ldlm_namespace *, struct ldlm_lock **,
void *cookie, __u64 *flags);
-void ldlm_lock_addref_internal(struct ldlm_lock *, __u32 mode);
-void ldlm_lock_addref_internal_nolock(struct ldlm_lock *, __u32 mode);
-void ldlm_lock_decref_internal(struct ldlm_lock *, __u32 mode);
-void ldlm_lock_decref_internal_nolock(struct ldlm_lock *, __u32 mode);
+void ldlm_lock_addref_internal(struct ldlm_lock *, enum ldlm_mode mode);
+void ldlm_lock_addref_internal_nolock(struct ldlm_lock *, enum ldlm_mode mode);
+void ldlm_lock_decref_internal(struct ldlm_lock *, enum ldlm_mode mode);
+void ldlm_lock_decref_internal_nolock(struct ldlm_lock *, enum ldlm_mode mode);
int ldlm_run_ast_work(struct ldlm_namespace *ns, struct list_head *rpc_list,
enum ldlm_desc_ast_t ast_type);
int ldlm_lock_remove_from_lru_check(struct ldlm_lock *lock, time_t last_use);
@@ -311,28 +312,25 @@ static inline int is_granted_or_cancelled(struct ldlm_lock *lock)
return ret;
}
-typedef void (*ldlm_policy_wire_to_local_t)(const ldlm_wire_policy_data_t *,
- ldlm_policy_data_t *);
-
-typedef void (*ldlm_policy_local_to_wire_t)(const ldlm_policy_data_t *,
- ldlm_wire_policy_data_t *);
-
-void ldlm_plain_policy_wire_to_local(const ldlm_wire_policy_data_t *wpolicy,
- ldlm_policy_data_t *lpolicy);
-void ldlm_plain_policy_local_to_wire(const ldlm_policy_data_t *lpolicy,
- ldlm_wire_policy_data_t *wpolicy);
-void ldlm_ibits_policy_wire_to_local(const ldlm_wire_policy_data_t *wpolicy,
- ldlm_policy_data_t *lpolicy);
-void ldlm_ibits_policy_local_to_wire(const ldlm_policy_data_t *lpolicy,
- ldlm_wire_policy_data_t *wpolicy);
-void ldlm_extent_policy_wire_to_local(const ldlm_wire_policy_data_t *wpolicy,
- ldlm_policy_data_t *lpolicy);
-void ldlm_extent_policy_local_to_wire(const ldlm_policy_data_t *lpolicy,
- ldlm_wire_policy_data_t *wpolicy);
-void ldlm_flock_policy_wire18_to_local(const ldlm_wire_policy_data_t *wpolicy,
- ldlm_policy_data_t *lpolicy);
-void ldlm_flock_policy_wire21_to_local(const ldlm_wire_policy_data_t *wpolicy,
- ldlm_policy_data_t *lpolicy);
-
-void ldlm_flock_policy_local_to_wire(const ldlm_policy_data_t *lpolicy,
- ldlm_wire_policy_data_t *wpolicy);
+typedef void (*ldlm_policy_wire_to_local_t)(const union ldlm_wire_policy_data *,
+ union ldlm_policy_data *);
+
+typedef void (*ldlm_policy_local_to_wire_t)(const union ldlm_policy_data *,
+ union ldlm_wire_policy_data *);
+
+void ldlm_plain_policy_wire_to_local(const union ldlm_wire_policy_data *wpolicy,
+ union ldlm_policy_data *lpolicy);
+void ldlm_plain_policy_local_to_wire(const union ldlm_policy_data *lpolicy,
+ union ldlm_wire_policy_data *wpolicy);
+void ldlm_ibits_policy_wire_to_local(const union ldlm_wire_policy_data *wpolicy,
+ union ldlm_policy_data *lpolicy);
+void ldlm_ibits_policy_local_to_wire(const union ldlm_policy_data *lpolicy,
+ union ldlm_wire_policy_data *wpolicy);
+void ldlm_extent_policy_wire_to_local(const union ldlm_wire_policy_data *wpolicy,
+ union ldlm_policy_data *lpolicy);
+void ldlm_extent_policy_local_to_wire(const union ldlm_policy_data *lpolicy,
+ union ldlm_wire_policy_data *wpolicy);
+void ldlm_flock_policy_wire_to_local(const union ldlm_wire_policy_data *wpolicy,
+ union ldlm_policy_data *lpolicy);
+void ldlm_flock_policy_local_to_wire(const union ldlm_policy_data *lpolicy,
+ union ldlm_wire_policy_data *wpolicy);
diff --git a/drivers/staging/lustre/lustre/ldlm/ldlm_lib.c b/drivers/staging/lustre/lustre/ldlm/ldlm_lib.c
index 153e990c494e..9be01426c955 100644
--- a/drivers/staging/lustre/lustre/ldlm/ldlm_lib.c
+++ b/drivers/staging/lustre/lustre/ldlm/ldlm_lib.c
@@ -170,6 +170,9 @@ int client_import_del_conn(struct obd_import *imp, struct obd_uuid *uuid)
ptlrpc_connection_put(dlmexp->exp_connection);
dlmexp->exp_connection = NULL;
}
+
+ if (dlmexp)
+ class_export_put(dlmexp);
}
list_del(&imp_conn->oic_item);
@@ -372,6 +375,25 @@ int client_obd_setup(struct obd_device *obddev, struct lustre_cfg *lcfg)
} else {
cli->cl_max_rpcs_in_flight = OBD_MAX_RIF_DEFAULT;
}
+
+ spin_lock_init(&cli->cl_mod_rpcs_lock);
+ spin_lock_init(&cli->cl_mod_rpcs_hist.oh_lock);
+ cli->cl_max_mod_rpcs_in_flight = 0;
+ cli->cl_mod_rpcs_in_flight = 0;
+ cli->cl_close_rpcs_in_flight = 0;
+ init_waitqueue_head(&cli->cl_mod_rpcs_waitq);
+ cli->cl_mod_tag_bitmap = NULL;
+
+ if (connect_op == MDS_CONNECT) {
+ cli->cl_max_mod_rpcs_in_flight = cli->cl_max_rpcs_in_flight - 1;
+ cli->cl_mod_tag_bitmap = kcalloc(BITS_TO_LONGS(OBD_MAX_RIF_MAX),
+ sizeof(long), GFP_NOFS);
+ if (!cli->cl_mod_tag_bitmap) {
+ rc = -ENOMEM;
+ goto err;
+ }
+ }
+
rc = ldlm_get_ref();
if (rc) {
CERROR("ldlm_get_ref failed: %d\n", rc);
@@ -399,9 +421,8 @@ int client_obd_setup(struct obd_device *obddev, struct lustre_cfg *lcfg)
}
cli->cl_import = imp;
- /* cli->cl_max_mds_{easize,cookiesize} updated by mdc_init_ea_size() */
+ /* cli->cl_max_mds_easize updated by mdc_init_ea_size() */
cli->cl_max_mds_easize = sizeof(struct lov_mds_md_v3);
- cli->cl_max_mds_cookiesize = sizeof(struct llog_cookie);
if (LUSTRE_CFG_BUFLEN(lcfg, 3) > 0) {
if (!strcmp(lustre_cfg_string(lcfg, 3), "inactive")) {
@@ -425,8 +446,6 @@ int client_obd_setup(struct obd_device *obddev, struct lustre_cfg *lcfg)
goto err_import;
}
- cli->cl_qchk_stat = CL_NOT_QUOTACHECKED;
-
return rc;
err_import:
@@ -434,12 +453,16 @@ err_import:
err_ldlm:
ldlm_put_ref();
err:
+ kfree(cli->cl_mod_tag_bitmap);
+ cli->cl_mod_tag_bitmap = NULL;
return rc;
}
EXPORT_SYMBOL(client_obd_setup);
int client_obd_cleanup(struct obd_device *obddev)
{
+ struct client_obd *cli = &obddev->u.cli;
+
ldlm_namespace_free_post(obddev->obd_namespace);
obddev->obd_namespace = NULL;
@@ -447,6 +470,10 @@ int client_obd_cleanup(struct obd_device *obddev)
LASSERT(!obddev->u.cli.cl_import);
ldlm_put_ref();
+
+ kfree(cli->cl_mod_tag_bitmap);
+ cli->cl_mod_tag_bitmap = NULL;
+
return 0;
}
EXPORT_SYMBOL(client_obd_cleanup);
@@ -461,6 +488,7 @@ int client_connect_import(const struct lu_env *env,
struct obd_import *imp = cli->cl_import;
struct obd_connect_data *ocd;
struct lustre_handle conn = { 0 };
+ bool is_mdc = false;
int rc;
*exp = NULL;
@@ -487,6 +515,10 @@ int client_connect_import(const struct lu_env *env,
ocd = &imp->imp_connect_data;
if (data) {
*ocd = *data;
+ is_mdc = !strncmp(imp->imp_obd->obd_type->typ_name,
+ LUSTRE_MDC_NAME, 3);
+ if (is_mdc)
+ data->ocd_connect_flags |= OBD_CONNECT_MULTIMODRPCS;
imp->imp_connect_flags_orig = data->ocd_connect_flags;
}
@@ -502,6 +534,11 @@ int client_connect_import(const struct lu_env *env,
ocd->ocd_connect_flags, "old %#llx, new %#llx\n",
data->ocd_connect_flags, ocd->ocd_connect_flags);
data->ocd_connect_flags = ocd->ocd_connect_flags;
+ /* clear the flag as it was not set and is not known
+ * by upper layers
+ */
+ if (is_mdc)
+ data->ocd_connect_flags &= ~OBD_CONNECT_MULTIMODRPCS;
}
ptlrpc_pinger_add_import(imp);
diff --git a/drivers/staging/lustre/lustre/ldlm/ldlm_lock.c b/drivers/staging/lustre/lustre/ldlm/ldlm_lock.c
index 3c48b4fb96f1..a4a291acb659 100644
--- a/drivers/staging/lustre/lustre/ldlm/ldlm_lock.c
+++ b/drivers/staging/lustre/lustre/ldlm/ldlm_lock.c
@@ -39,6 +39,7 @@
#include "../../include/linux/libcfs/libcfs.h"
#include "../include/lustre_intent.h"
+#include "../include/lustre_swab.h"
#include "../include/obd_class.h"
#include "ldlm_internal.h"
@@ -63,17 +64,10 @@ static char *ldlm_typename[] = {
[LDLM_IBITS] = "IBT",
};
-static ldlm_policy_wire_to_local_t ldlm_policy_wire18_to_local[] = {
+static ldlm_policy_wire_to_local_t ldlm_policy_wire_to_local[] = {
[LDLM_PLAIN - LDLM_MIN_TYPE] = ldlm_plain_policy_wire_to_local,
[LDLM_EXTENT - LDLM_MIN_TYPE] = ldlm_extent_policy_wire_to_local,
- [LDLM_FLOCK - LDLM_MIN_TYPE] = ldlm_flock_policy_wire18_to_local,
- [LDLM_IBITS - LDLM_MIN_TYPE] = ldlm_ibits_policy_wire_to_local,
-};
-
-static ldlm_policy_wire_to_local_t ldlm_policy_wire21_to_local[] = {
- [LDLM_PLAIN - LDLM_MIN_TYPE] = ldlm_plain_policy_wire_to_local,
- [LDLM_EXTENT - LDLM_MIN_TYPE] = ldlm_extent_policy_wire_to_local,
- [LDLM_FLOCK - LDLM_MIN_TYPE] = ldlm_flock_policy_wire21_to_local,
+ [LDLM_FLOCK - LDLM_MIN_TYPE] = ldlm_flock_policy_wire_to_local,
[LDLM_IBITS - LDLM_MIN_TYPE] = ldlm_ibits_policy_wire_to_local,
};
@@ -88,8 +82,8 @@ static ldlm_policy_local_to_wire_t ldlm_policy_local_to_wire[] = {
* Converts lock policy from local format to on the wire lock_desc format
*/
static void ldlm_convert_policy_to_wire(enum ldlm_type type,
- const ldlm_policy_data_t *lpolicy,
- ldlm_wire_policy_data_t *wpolicy)
+ const union ldlm_policy_data *lpolicy,
+ union ldlm_wire_policy_data *wpolicy)
{
ldlm_policy_local_to_wire_t convert;
@@ -102,23 +96,17 @@ static void ldlm_convert_policy_to_wire(enum ldlm_type type,
* Converts lock policy from on the wire lock_desc format to local format
*/
void ldlm_convert_policy_to_local(struct obd_export *exp, enum ldlm_type type,
- const ldlm_wire_policy_data_t *wpolicy,
- ldlm_policy_data_t *lpolicy)
+ const union ldlm_wire_policy_data *wpolicy,
+ union ldlm_policy_data *lpolicy)
{
ldlm_policy_wire_to_local_t convert;
- int new_client;
- /** some badness for 2.0.0 clients, but 2.0.0 isn't supported */
- new_client = (exp_connect_flags(exp) & OBD_CONNECT_FULL20) != 0;
- if (new_client)
- convert = ldlm_policy_wire21_to_local[type - LDLM_MIN_TYPE];
- else
- convert = ldlm_policy_wire18_to_local[type - LDLM_MIN_TYPE];
+ convert = ldlm_policy_wire_to_local[type - LDLM_MIN_TYPE];
convert(wpolicy, lpolicy);
}
-char *ldlm_it2str(int it)
+const char *ldlm_it2str(enum ldlm_intent_flags it)
{
switch (it) {
case IT_OPEN:
@@ -140,7 +128,7 @@ char *ldlm_it2str(int it)
case IT_LAYOUT:
return "layout";
default:
- CERROR("Unknown intent %d\n", it);
+ CERROR("Unknown intent 0x%08x\n", it);
return "UNKNOWN";
}
}
@@ -512,7 +500,6 @@ int ldlm_lock_change_resource(struct ldlm_namespace *ns, struct ldlm_lock *lock,
return 0;
}
-EXPORT_SYMBOL(ldlm_lock_change_resource);
/** \defgroup ldlm_handles LDLM HANDLES
* Ways to get hold of locks without any addresses.
@@ -595,7 +582,6 @@ void ldlm_lock2desc(struct ldlm_lock *lock, struct ldlm_lock_desc *desc)
&lock->l_policy_data,
&desc->l_policy_data);
}
-EXPORT_SYMBOL(ldlm_lock2desc);
/**
* Add a lock to list of conflicting locks to send AST to.
@@ -658,7 +644,7 @@ static void ldlm_add_ast_work_item(struct ldlm_lock *lock,
* r/w reference type is determined by \a mode
* Calls ldlm_lock_addref_internal.
*/
-void ldlm_lock_addref(const struct lustre_handle *lockh, __u32 mode)
+void ldlm_lock_addref(const struct lustre_handle *lockh, enum ldlm_mode mode)
{
struct ldlm_lock *lock;
@@ -676,7 +662,8 @@ EXPORT_SYMBOL(ldlm_lock_addref);
* Removes lock from LRU if it is there.
* Assumes the LDLM lock is already locked.
*/
-void ldlm_lock_addref_internal_nolock(struct ldlm_lock *lock, __u32 mode)
+void ldlm_lock_addref_internal_nolock(struct ldlm_lock *lock,
+ enum ldlm_mode mode)
{
ldlm_lock_remove_from_lru(lock);
if (mode & (LCK_NL | LCK_CR | LCK_PR)) {
@@ -700,7 +687,7 @@ void ldlm_lock_addref_internal_nolock(struct ldlm_lock *lock, __u32 mode)
*
* \retval -EAGAIN lock is being canceled.
*/
-int ldlm_lock_addref_try(const struct lustre_handle *lockh, __u32 mode)
+int ldlm_lock_addref_try(const struct lustre_handle *lockh, enum ldlm_mode mode)
{
struct ldlm_lock *lock;
int result;
@@ -726,7 +713,7 @@ EXPORT_SYMBOL(ldlm_lock_addref_try);
* Locks LDLM lock and calls ldlm_lock_addref_internal_nolock to do the work.
* Only called for local locks.
*/
-void ldlm_lock_addref_internal(struct ldlm_lock *lock, __u32 mode)
+void ldlm_lock_addref_internal(struct ldlm_lock *lock, enum ldlm_mode mode)
{
lock_res_and_lock(lock);
ldlm_lock_addref_internal_nolock(lock, mode);
@@ -740,7 +727,8 @@ void ldlm_lock_addref_internal(struct ldlm_lock *lock, __u32 mode)
* Does NOT add lock to LRU if no r/w references left to accommodate flock locks
* that cannot be placed in LRU.
*/
-void ldlm_lock_decref_internal_nolock(struct ldlm_lock *lock, __u32 mode)
+void ldlm_lock_decref_internal_nolock(struct ldlm_lock *lock,
+ enum ldlm_mode mode)
{
LDLM_DEBUG(lock, "ldlm_lock_decref(%s)", ldlm_lockname[mode]);
if (mode & (LCK_NL | LCK_CR | LCK_PR)) {
@@ -766,7 +754,7 @@ void ldlm_lock_decref_internal_nolock(struct ldlm_lock *lock, __u32 mode)
* on the namespace.
* For blocked LDLM locks if r/w count drops to zero, blocking_ast is called.
*/
-void ldlm_lock_decref_internal(struct ldlm_lock *lock, __u32 mode)
+void ldlm_lock_decref_internal(struct ldlm_lock *lock, enum ldlm_mode mode)
{
struct ldlm_namespace *ns;
@@ -786,11 +774,16 @@ void ldlm_lock_decref_internal(struct ldlm_lock *lock, __u32 mode)
}
if (!lock->l_readers && !lock->l_writers &&
- ldlm_is_cbpending(lock)) {
+ (ldlm_is_cbpending(lock) || lock->l_req_mode == LCK_GROUP)) {
/* If we received a blocked AST and this was the last reference,
* run the callback.
+ * Group locks are special:
+ * They must not go in LRU, but they are not called back
+ * like non-group locks, instead they are manually released.
+ * They have an l_writers reference which they keep until
+ * they are manually released, so we remove them when they have
+ * no more reader or writer references. - LU-6368
*/
-
LDLM_DEBUG(lock, "final decref done on cbpending lock");
LDLM_LOCK_GET(lock); /* dropped by bl thread */
@@ -832,7 +825,7 @@ void ldlm_lock_decref_internal(struct ldlm_lock *lock, __u32 mode)
/**
* Decrease reader/writer refcount for LDLM lock with handle \a lockh
*/
-void ldlm_lock_decref(const struct lustre_handle *lockh, __u32 mode)
+void ldlm_lock_decref(const struct lustre_handle *lockh, enum ldlm_mode mode)
{
struct ldlm_lock *lock = __ldlm_handle2lock(lockh, 0);
@@ -846,10 +839,9 @@ EXPORT_SYMBOL(ldlm_lock_decref);
* Decrease reader/writer refcount for LDLM lock with handle
* \a lockh and mark it for subsequent cancellation once r/w refcount
* drops to zero instead of putting into LRU.
- *
- * Typical usage is for GROUP locks which we cannot allow to be cached.
*/
-void ldlm_lock_decref_and_cancel(const struct lustre_handle *lockh, __u32 mode)
+void ldlm_lock_decref_and_cancel(const struct lustre_handle *lockh,
+ enum ldlm_mode mode)
{
struct ldlm_lock *lock = __ldlm_handle2lock(lockh, 0);
@@ -1055,88 +1047,173 @@ void ldlm_grant_lock(struct ldlm_lock *lock, struct list_head *work_list)
}
/**
- * Search for a lock with given properties in a queue.
+ * Describe the overlap between two locks. itree_overlap_cb data.
+ */
+struct lock_match_data {
+ struct ldlm_lock *lmd_old;
+ struct ldlm_lock *lmd_lock;
+ enum ldlm_mode *lmd_mode;
+ union ldlm_policy_data *lmd_policy;
+ __u64 lmd_flags;
+ int lmd_unref;
+};
+
+/**
+ * Check if the given @lock meets the criteria for a match.
+ * A reference on the lock is taken if matched.
*
- * \retval a referenced lock or NULL. See the flag descriptions below, in the
- * comment above ldlm_lock_match
+ * \param lock test-against this lock
+ * \param data parameters
*/
-static struct ldlm_lock *search_queue(struct list_head *queue,
- enum ldlm_mode *mode,
- ldlm_policy_data_t *policy,
- struct ldlm_lock *old_lock,
- __u64 flags, int unref)
+static int lock_matches(struct ldlm_lock *lock, struct lock_match_data *data)
{
- struct ldlm_lock *lock;
- struct list_head *tmp;
+ union ldlm_policy_data *lpol = &lock->l_policy_data;
+ enum ldlm_mode match;
- list_for_each(tmp, queue) {
- enum ldlm_mode match;
+ if (lock == data->lmd_old)
+ return INTERVAL_ITER_STOP;
- lock = list_entry(tmp, struct ldlm_lock, l_res_link);
-
- if (lock == old_lock)
- break;
+ /*
+ * Check if this lock can be matched.
+ * Used by LU-2919(exclusive open) for open lease lock
+ */
+ if (ldlm_is_excl(lock))
+ return INTERVAL_ITER_CONT;
- /* Check if this lock can be matched.
- * Used by LU-2919(exclusive open) for open lease lock
- */
- if (ldlm_is_excl(lock))
- continue;
+ /*
+ * llite sometimes wants to match locks that will be
+ * canceled when their users drop, but we allow it to match
+ * if it passes in CBPENDING and the lock still has users.
+ * this is generally only going to be used by children
+ * whose parents already hold a lock so forward progress
+ * can still happen.
+ */
+ if (ldlm_is_cbpending(lock) &&
+ !(data->lmd_flags & LDLM_FL_CBPENDING))
+ return INTERVAL_ITER_CONT;
- /* llite sometimes wants to match locks that will be
- * canceled when their users drop, but we allow it to match
- * if it passes in CBPENDING and the lock still has users.
- * this is generally only going to be used by children
- * whose parents already hold a lock so forward progress
- * can still happen.
- */
- if (ldlm_is_cbpending(lock) && !(flags & LDLM_FL_CBPENDING))
- continue;
- if (!unref && ldlm_is_cbpending(lock) &&
- lock->l_readers == 0 && lock->l_writers == 0)
- continue;
+ if (!data->lmd_unref && ldlm_is_cbpending(lock) &&
+ !lock->l_readers && !lock->l_writers)
+ return INTERVAL_ITER_CONT;
- if (!(lock->l_req_mode & *mode))
- continue;
- match = lock->l_req_mode;
+ if (!(lock->l_req_mode & *data->lmd_mode))
+ return INTERVAL_ITER_CONT;
+ match = lock->l_req_mode;
- if (lock->l_resource->lr_type == LDLM_EXTENT &&
- (lock->l_policy_data.l_extent.start >
- policy->l_extent.start ||
- lock->l_policy_data.l_extent.end < policy->l_extent.end))
- continue;
+ switch (lock->l_resource->lr_type) {
+ case LDLM_EXTENT:
+ if (lpol->l_extent.start > data->lmd_policy->l_extent.start ||
+ lpol->l_extent.end < data->lmd_policy->l_extent.end)
+ return INTERVAL_ITER_CONT;
if (unlikely(match == LCK_GROUP) &&
- lock->l_resource->lr_type == LDLM_EXTENT &&
- policy->l_extent.gid != LDLM_GID_ANY &&
- lock->l_policy_data.l_extent.gid != policy->l_extent.gid)
- continue;
-
- /* We match if we have existing lock with same or wider set
+ data->lmd_policy->l_extent.gid != LDLM_GID_ANY &&
+ lpol->l_extent.gid != data->lmd_policy->l_extent.gid)
+ return INTERVAL_ITER_CONT;
+ break;
+ case LDLM_IBITS:
+ /*
+ * We match if we have existing lock with same or wider set
* of bits.
*/
- if (lock->l_resource->lr_type == LDLM_IBITS &&
- ((lock->l_policy_data.l_inodebits.bits &
- policy->l_inodebits.bits) !=
- policy->l_inodebits.bits))
- continue;
+ if ((lpol->l_inodebits.bits &
+ data->lmd_policy->l_inodebits.bits) !=
+ data->lmd_policy->l_inodebits.bits)
+ return INTERVAL_ITER_CONT;
+ break;
+ default:
+ break;
+ }
+ /*
+ * We match if we have existing lock with same or wider set
+ * of bits.
+ */
+ if (!data->lmd_unref && LDLM_HAVE_MASK(lock, GONE))
+ return INTERVAL_ITER_CONT;
+
+ if ((data->lmd_flags & LDLM_FL_LOCAL_ONLY) &&
+ !ldlm_is_local(lock))
+ return INTERVAL_ITER_CONT;
+
+ if (data->lmd_flags & LDLM_FL_TEST_LOCK) {
+ LDLM_LOCK_GET(lock);
+ ldlm_lock_touch_in_lru(lock);
+ } else {
+ ldlm_lock_addref_internal_nolock(lock, match);
+ }
+
+ *data->lmd_mode = match;
+ data->lmd_lock = lock;
+
+ return INTERVAL_ITER_STOP;
+}
+
+static unsigned int itree_overlap_cb(struct interval_node *in, void *args)
+{
+ struct ldlm_interval *node = to_ldlm_interval(in);
+ struct lock_match_data *data = args;
+ struct ldlm_lock *lock;
+ int rc;
+
+ list_for_each_entry(lock, &node->li_group, l_sl_policy) {
+ rc = lock_matches(lock, data);
+ if (rc == INTERVAL_ITER_STOP)
+ return INTERVAL_ITER_STOP;
+ }
+ return INTERVAL_ITER_CONT;
+}
+
+/**
+ * Search for a lock with given parameters in interval trees.
+ *
+ * \param res search for a lock in this resource
+ * \param data parameters
+ *
+ * \retval a referenced lock or NULL.
+ */
+static struct ldlm_lock *search_itree(struct ldlm_resource *res,
+ struct lock_match_data *data)
+{
+ struct interval_node_extent ext = {
+ .start = data->lmd_policy->l_extent.start,
+ .end = data->lmd_policy->l_extent.end
+ };
+ int idx;
+
+ for (idx = 0; idx < LCK_MODE_NUM; idx++) {
+ struct ldlm_interval_tree *tree = &res->lr_itree[idx];
- if (!unref && LDLM_HAVE_MASK(lock, GONE))
+ if (!tree->lit_root)
continue;
- if ((flags & LDLM_FL_LOCAL_ONLY) && !ldlm_is_local(lock))
+ if (!(tree->lit_mode & *data->lmd_mode))
continue;
- if (flags & LDLM_FL_TEST_LOCK) {
- LDLM_LOCK_GET(lock);
- ldlm_lock_touch_in_lru(lock);
- } else {
- ldlm_lock_addref_internal_nolock(lock, match);
- }
- *mode = match;
- return lock;
+ interval_search(tree->lit_root, &ext,
+ itree_overlap_cb, data);
}
+ return data->lmd_lock;
+}
+/**
+ * Search for a lock with given properties in a queue.
+ *
+ * \param queue search for a lock in this queue
+ * \param data parameters
+ *
+ * \retval a referenced lock or NULL.
+ */
+static struct ldlm_lock *search_queue(struct list_head *queue,
+ struct lock_match_data *data)
+{
+ struct ldlm_lock *lock;
+ int rc;
+
+ list_for_each_entry(lock, queue, l_res_link) {
+ rc = lock_matches(lock, data);
+ if (rc == INTERVAL_ITER_STOP)
+ return data->lmd_lock;
+ }
return NULL;
}
@@ -1147,7 +1224,6 @@ void ldlm_lock_fail_match_locked(struct ldlm_lock *lock)
wake_up_all(&lock->l_waitq);
}
}
-EXPORT_SYMBOL(ldlm_lock_fail_match_locked);
/**
* Mark lock as "matchable" by OST.
@@ -1208,35 +1284,45 @@ EXPORT_SYMBOL(ldlm_lock_allow_match);
enum ldlm_mode ldlm_lock_match(struct ldlm_namespace *ns, __u64 flags,
const struct ldlm_res_id *res_id,
enum ldlm_type type,
- ldlm_policy_data_t *policy,
+ union ldlm_policy_data *policy,
enum ldlm_mode mode,
struct lustre_handle *lockh, int unref)
{
+ struct lock_match_data data = {
+ .lmd_old = NULL,
+ .lmd_lock = NULL,
+ .lmd_mode = &mode,
+ .lmd_policy = policy,
+ .lmd_flags = flags,
+ .lmd_unref = unref,
+ };
struct ldlm_resource *res;
- struct ldlm_lock *lock, *old_lock = NULL;
+ struct ldlm_lock *lock;
int rc = 0;
if (!ns) {
- old_lock = ldlm_handle2lock(lockh);
- LASSERT(old_lock);
+ data.lmd_old = ldlm_handle2lock(lockh);
+ LASSERT(data.lmd_old);
- ns = ldlm_lock_to_ns(old_lock);
- res_id = &old_lock->l_resource->lr_name;
- type = old_lock->l_resource->lr_type;
- mode = old_lock->l_req_mode;
+ ns = ldlm_lock_to_ns(data.lmd_old);
+ res_id = &data.lmd_old->l_resource->lr_name;
+ type = data.lmd_old->l_resource->lr_type;
+ *data.lmd_mode = data.lmd_old->l_req_mode;
}
res = ldlm_resource_get(ns, NULL, res_id, type, 0);
if (IS_ERR(res)) {
- LASSERT(!old_lock);
+ LASSERT(!data.lmd_old);
return 0;
}
LDLM_RESOURCE_ADDREF(res);
lock_res(res);
- lock = search_queue(&res->lr_granted, &mode, policy, old_lock,
- flags, unref);
+ if (res->lr_type == LDLM_EXTENT)
+ lock = search_itree(res, &data);
+ else
+ lock = search_queue(&res->lr_granted, &data);
if (lock) {
rc = 1;
goto out;
@@ -1245,14 +1331,12 @@ enum ldlm_mode ldlm_lock_match(struct ldlm_namespace *ns, __u64 flags,
rc = 0;
goto out;
}
- lock = search_queue(&res->lr_waiting, &mode, policy, old_lock,
- flags, unref);
+ lock = search_queue(&res->lr_waiting, &data);
if (lock) {
rc = 1;
goto out;
}
-
- out:
+out:
unlock_res(res);
LDLM_RESOURCE_DELREF(res);
ldlm_resource_putref(res);
@@ -1324,8 +1408,8 @@ enum ldlm_mode ldlm_lock_match(struct ldlm_namespace *ns, __u64 flags,
(type == LDLM_PLAIN || type == LDLM_IBITS) ?
res_id->name[3] : policy->l_extent.end);
}
- if (old_lock)
- LDLM_LOCK_PUT(old_lock);
+ if (data.lmd_old)
+ LDLM_LOCK_PUT(data.lmd_old);
return rc ? mode : 0;
}
diff --git a/drivers/staging/lustre/lustre/ldlm/ldlm_lockd.c b/drivers/staging/lustre/lustre/ldlm/ldlm_lockd.c
index fde697ebaadc..12647af5a336 100644
--- a/drivers/staging/lustre/lustre/ldlm/ldlm_lockd.c
+++ b/drivers/staging/lustre/lustre/ldlm/ldlm_lockd.c
@@ -511,23 +511,6 @@ static inline void ldlm_callback_errmsg(struct ptlrpc_request *req,
CWARN("Send reply failed, maybe cause bug 21636.\n");
}
-static int ldlm_handle_qc_callback(struct ptlrpc_request *req)
-{
- struct obd_quotactl *oqctl;
- struct client_obd *cli = &req->rq_export->exp_obd->u.cli;
-
- oqctl = req_capsule_client_get(&req->rq_pill, &RMF_OBD_QUOTACTL);
- if (!oqctl) {
- CERROR("Can't unpack obd_quotactl\n");
- return -EPROTO;
- }
-
- oqctl->qc_stat = ptlrpc_status_ntoh(oqctl->qc_stat);
-
- cli->cl_qchk_stat = oqctl->qc_stat;
- return 0;
-}
-
/* TODO: handle requests in a similar way as MDT: see mdt_handle_common() */
static int ldlm_callback_handler(struct ptlrpc_request *req)
{
@@ -577,13 +560,6 @@ static int ldlm_callback_handler(struct ptlrpc_request *req)
rc = ldlm_handle_setinfo(req);
ldlm_callback_reply(req, rc);
return 0;
- case OBD_QC_CALLBACK:
- req_capsule_set(&req->rq_pill, &RQF_QC_CALLBACK);
- if (OBD_FAIL_CHECK(OBD_FAIL_OBD_QC_CALLBACK_NET))
- return 0;
- rc = ldlm_handle_qc_callback(req);
- ldlm_callback_reply(req, rc);
- return 0;
default:
CERROR("unknown opcode %u\n",
lustre_msg_get_opc(req->rq_reqmsg));
@@ -858,7 +834,6 @@ int ldlm_get_ref(void)
return rc;
}
-EXPORT_SYMBOL(ldlm_get_ref);
void ldlm_put_ref(void)
{
@@ -875,7 +850,6 @@ void ldlm_put_ref(void)
}
mutex_unlock(&ldlm_ref_mutex);
}
-EXPORT_SYMBOL(ldlm_put_ref);
static ssize_t cancel_unused_locks_before_replay_show(struct kobject *kobj,
struct attribute *attr,
diff --git a/drivers/staging/lustre/lustre/ldlm/ldlm_plain.c b/drivers/staging/lustre/lustre/ldlm/ldlm_plain.c
index 0aed39c46154..862ea0a1dc97 100644
--- a/drivers/staging/lustre/lustre/ldlm/ldlm_plain.c
+++ b/drivers/staging/lustre/lustre/ldlm/ldlm_plain.c
@@ -54,14 +54,14 @@
#include "ldlm_internal.h"
-void ldlm_plain_policy_wire_to_local(const ldlm_wire_policy_data_t *wpolicy,
- ldlm_policy_data_t *lpolicy)
+void ldlm_plain_policy_wire_to_local(const union ldlm_wire_policy_data *wpolicy,
+ union ldlm_policy_data *lpolicy)
{
/* No policy for plain locks */
}
-void ldlm_plain_policy_local_to_wire(const ldlm_policy_data_t *lpolicy,
- ldlm_wire_policy_data_t *wpolicy)
+void ldlm_plain_policy_local_to_wire(const union ldlm_policy_data *lpolicy,
+ union ldlm_wire_policy_data *wpolicy)
{
/* No policy for plain locks */
}
diff --git a/drivers/staging/lustre/lustre/ldlm/ldlm_pool.c b/drivers/staging/lustre/lustre/ldlm/ldlm_pool.c
index 9a1136e32dfc..8dfb3c8e6b7a 100644
--- a/drivers/staging/lustre/lustre/ldlm/ldlm_pool.c
+++ b/drivers/staging/lustre/lustre/ldlm/ldlm_pool.c
@@ -293,7 +293,7 @@ static int ldlm_cli_pool_recalc(struct ldlm_pool *pl)
* take into account pl->pl_recalc_time here.
*/
ret = ldlm_cancel_lru(container_of(pl, struct ldlm_namespace, ns_pool),
- 0, LCF_ASYNC, LDLM_CANCEL_LRUR);
+ 0, LCF_ASYNC, LDLM_LRU_FLAG_LRUR);
out:
spin_lock(&pl->pl_lock);
@@ -339,7 +339,7 @@ static int ldlm_cli_pool_shrink(struct ldlm_pool *pl,
if (nr == 0)
return (unused / 100) * sysctl_vfs_cache_pressure;
else
- return ldlm_cancel_lru(ns, nr, LCF_ASYNC, LDLM_CANCEL_SHRINK);
+ return ldlm_cancel_lru(ns, nr, LCF_ASYNC, LDLM_LRU_FLAG_SHRINK);
}
static const struct ldlm_pool_ops ldlm_cli_pool_ops = {
@@ -356,10 +356,10 @@ static int ldlm_pool_recalc(struct ldlm_pool *pl)
u32 recalc_interval_sec;
int count;
- recalc_interval_sec = ktime_get_seconds() - pl->pl_recalc_time;
+ recalc_interval_sec = ktime_get_real_seconds() - pl->pl_recalc_time;
if (recalc_interval_sec > 0) {
spin_lock(&pl->pl_lock);
- recalc_interval_sec = ktime_get_seconds() - pl->pl_recalc_time;
+ recalc_interval_sec = ktime_get_real_seconds() - pl->pl_recalc_time;
if (recalc_interval_sec > 0) {
/*
@@ -382,7 +382,7 @@ static int ldlm_pool_recalc(struct ldlm_pool *pl)
count);
}
- recalc_interval_sec = pl->pl_recalc_time - ktime_get_seconds() +
+ recalc_interval_sec = pl->pl_recalc_time - ktime_get_real_seconds() +
pl->pl_recalc_period;
if (recalc_interval_sec <= 0) {
/* DEBUG: should be re-removed after LU-4536 is fixed */
@@ -651,13 +651,13 @@ static void ldlm_pool_debugfs_fini(struct ldlm_pool *pl)
}
int ldlm_pool_init(struct ldlm_pool *pl, struct ldlm_namespace *ns,
- int idx, ldlm_side_t client)
+ int idx, enum ldlm_side client)
{
int rc;
spin_lock_init(&pl->pl_lock);
atomic_set(&pl->pl_granted, 0);
- pl->pl_recalc_time = ktime_get_seconds();
+ pl->pl_recalc_time = ktime_get_real_seconds();
atomic_set(&pl->pl_lock_volume_factor, 1);
atomic_set(&pl->pl_grant_rate, 0);
@@ -684,7 +684,6 @@ int ldlm_pool_init(struct ldlm_pool *pl, struct ldlm_namespace *ns,
return rc;
}
-EXPORT_SYMBOL(ldlm_pool_init);
void ldlm_pool_fini(struct ldlm_pool *pl)
{
@@ -698,7 +697,6 @@ void ldlm_pool_fini(struct ldlm_pool *pl)
*/
POISON(pl, 0x5a, sizeof(*pl));
}
-EXPORT_SYMBOL(ldlm_pool_fini);
/**
* Add new taken ldlm lock \a lock into pool \a pl accounting.
@@ -724,7 +722,6 @@ void ldlm_pool_add(struct ldlm_pool *pl, struct ldlm_lock *lock)
* with too long call paths.
*/
}
-EXPORT_SYMBOL(ldlm_pool_add);
/**
* Remove ldlm lock \a lock from pool \a pl accounting.
@@ -743,7 +740,6 @@ void ldlm_pool_del(struct ldlm_pool *pl, struct ldlm_lock *lock)
lprocfs_counter_incr(pl->pl_stats, LDLM_POOL_CANCEL_STAT);
}
-EXPORT_SYMBOL(ldlm_pool_del);
/**
* Returns current \a pl SLV.
@@ -792,13 +788,12 @@ static struct completion ldlm_pools_comp;
* count locks from all namespaces (if possible). Returns number of
* cached locks.
*/
-static unsigned long ldlm_pools_count(ldlm_side_t client, gfp_t gfp_mask)
+static unsigned long ldlm_pools_count(enum ldlm_side client, gfp_t gfp_mask)
{
unsigned long total = 0;
int nr_ns;
struct ldlm_namespace *ns;
struct ldlm_namespace *ns_old = NULL; /* loop detection */
- void *cookie;
if (client == LDLM_NAMESPACE_CLIENT && !(gfp_mask & __GFP_FS))
return 0;
@@ -806,8 +801,6 @@ static unsigned long ldlm_pools_count(ldlm_side_t client, gfp_t gfp_mask)
CDEBUG(D_DLMTRACE, "Request to count %s locks from all pools\n",
client == LDLM_NAMESPACE_CLIENT ? "client" : "server");
- cookie = cl_env_reenter();
-
/*
* Find out how many resources we may release.
*/
@@ -816,7 +809,6 @@ static unsigned long ldlm_pools_count(ldlm_side_t client, gfp_t gfp_mask)
mutex_lock(ldlm_namespace_lock(client));
if (list_empty(ldlm_namespace_list(client))) {
mutex_unlock(ldlm_namespace_lock(client));
- cl_env_reexit(cookie);
return 0;
}
ns = ldlm_namespace_first_locked(client);
@@ -842,22 +834,19 @@ static unsigned long ldlm_pools_count(ldlm_side_t client, gfp_t gfp_mask)
ldlm_namespace_put(ns);
}
- cl_env_reexit(cookie);
return total;
}
-static unsigned long ldlm_pools_scan(ldlm_side_t client, int nr, gfp_t gfp_mask)
+static unsigned long ldlm_pools_scan(enum ldlm_side client, int nr,
+ gfp_t gfp_mask)
{
unsigned long freed = 0;
int tmp, nr_ns;
struct ldlm_namespace *ns;
- void *cookie;
if (client == LDLM_NAMESPACE_CLIENT && !(gfp_mask & __GFP_FS))
return -1;
- cookie = cl_env_reenter();
-
/*
* Shrink at least ldlm_namespace_nr_read(client) namespaces.
*/
@@ -887,7 +876,6 @@ static unsigned long ldlm_pools_scan(ldlm_side_t client, int nr, gfp_t gfp_mask)
freed += ldlm_pool_shrink(&ns->ns_pool, cancel, gfp_mask);
ldlm_namespace_put(ns);
}
- cl_env_reexit(cookie);
/*
* we only decrease the SLV in server pools shrinker, return
* SHRINK_STOP to kernel to avoid needless loop. LU-1128
@@ -908,7 +896,7 @@ static unsigned long ldlm_pools_cli_scan(struct shrinker *s,
sc->gfp_mask);
}
-static int ldlm_pools_recalc(ldlm_side_t client)
+static int ldlm_pools_recalc(enum ldlm_side client)
{
struct ldlm_namespace *ns;
struct ldlm_namespace *ns_old = NULL;
@@ -1095,7 +1083,6 @@ int ldlm_pools_init(void)
return rc;
}
-EXPORT_SYMBOL(ldlm_pools_init);
void ldlm_pools_fini(void)
{
@@ -1104,4 +1091,3 @@ void ldlm_pools_fini(void)
ldlm_pools_thread_stop();
}
-EXPORT_SYMBOL(ldlm_pools_fini);
diff --git a/drivers/staging/lustre/lustre/ldlm/ldlm_request.c b/drivers/staging/lustre/lustre/ldlm/ldlm_request.c
index 35ba6f14d95f..c1f8693f94a5 100644
--- a/drivers/staging/lustre/lustre/ldlm/ldlm_request.c
+++ b/drivers/staging/lustre/lustre/ldlm/ldlm_request.c
@@ -93,11 +93,7 @@ static int ldlm_expired_completion_wait(void *data)
if (!lock->l_conn_export) {
static unsigned long next_dump, last_dump;
- LCONSOLE_WARN("lock timed out (enqueued at %lld, %llds ago)\n",
- (s64)lock->l_last_activity,
- (s64)(ktime_get_real_seconds() -
- lock->l_last_activity));
- LDLM_DEBUG(lock, "lock timed out (enqueued at %lld, %llds ago); not entering recovery in server code, just going back to sleep",
+ LDLM_ERROR(lock, "lock timed out (enqueued at %lld, %llds ago); not entering recovery in server code, just going back to sleep",
(s64)lock->l_last_activity,
(s64)(ktime_get_real_seconds() -
lock->l_last_activity));
@@ -475,12 +471,7 @@ int ldlm_cli_enqueue_fini(struct obd_export *exp, struct ptlrpc_request *req,
"client-side enqueue, new policy data");
}
- if ((*flags) & LDLM_FL_AST_SENT ||
- /* Cancel extent locks as soon as possible on a liblustre client,
- * because it cannot handle asynchronous ASTs robustly (see
- * bug 7311).
- */
- (LIBLUSTRE_CLIENT && type == LDLM_EXTENT)) {
+ if ((*flags) & LDLM_FL_AST_SENT) {
lock_res_and_lock(lock);
lock->l_flags |= LDLM_FL_CBPENDING | LDLM_FL_BL_AST;
unlock_res_and_lock(lock);
@@ -602,7 +593,7 @@ int ldlm_prep_elc_req(struct obd_export *exp, struct ptlrpc_request *req,
avail = ldlm_capsule_handles_avail(pill, RCL_CLIENT, canceloff);
flags = ns_connect_lru_resize(ns) ?
- LDLM_CANCEL_LRUR_NO_WAIT : LDLM_CANCEL_AGED;
+ LDLM_LRU_FLAG_LRUR_NO_WAIT : LDLM_LRU_FLAG_AGED;
to_free = !ns_connect_lru_resize(ns) &&
opc == LDLM_ENQUEUE ? 1 : 0;
@@ -657,6 +648,27 @@ int ldlm_prep_enqueue_req(struct obd_export *exp, struct ptlrpc_request *req,
}
EXPORT_SYMBOL(ldlm_prep_enqueue_req);
+static struct ptlrpc_request *ldlm_enqueue_pack(struct obd_export *exp,
+ int lvb_len)
+{
+ struct ptlrpc_request *req;
+ int rc;
+
+ req = ptlrpc_request_alloc(class_exp2cliimp(exp), &RQF_LDLM_ENQUEUE);
+ if (!req)
+ return ERR_PTR(-ENOMEM);
+
+ rc = ldlm_prep_enqueue_req(exp, req, NULL, 0);
+ if (rc) {
+ ptlrpc_request_free(req);
+ return ERR_PTR(rc);
+ }
+
+ req_capsule_set_size(&req->rq_pill, &RMF_DLM_LVB, RCL_SERVER, lvb_len);
+ ptlrpc_request_set_replen(req);
+ return req;
+}
+
/**
* Client-side lock enqueue.
*
@@ -670,7 +682,7 @@ EXPORT_SYMBOL(ldlm_prep_enqueue_req);
int ldlm_cli_enqueue(struct obd_export *exp, struct ptlrpc_request **reqp,
struct ldlm_enqueue_info *einfo,
const struct ldlm_res_id *res_id,
- ldlm_policy_data_t const *policy, __u64 *flags,
+ union ldlm_policy_data const *policy, __u64 *flags,
void *lvb, __u32 lvb_len, enum lvb_type lvb_type,
struct lustre_handle *lockh, int async)
{
@@ -727,17 +739,14 @@ int ldlm_cli_enqueue(struct obd_export *exp, struct ptlrpc_request **reqp,
lock->l_last_activity = ktime_get_real_seconds();
/* lock not sent to server yet */
-
if (!reqp || !*reqp) {
- req = ptlrpc_request_alloc_pack(class_exp2cliimp(exp),
- &RQF_LDLM_ENQUEUE,
- LUSTRE_DLM_VERSION,
- LDLM_ENQUEUE);
- if (!req) {
+ req = ldlm_enqueue_pack(exp, lvb_len);
+ if (IS_ERR(req)) {
failed_lock_cleanup(ns, lock, einfo->ei_mode);
LDLM_LOCK_RELEASE(lock);
- return -ENOMEM;
+ return PTR_ERR(req);
}
+
req_passed_in = 0;
if (reqp)
*reqp = req;
@@ -757,24 +766,6 @@ int ldlm_cli_enqueue(struct obd_export *exp, struct ptlrpc_request **reqp,
body->lock_flags = ldlm_flags_to_wire(*flags);
body->lock_handle[0] = *lockh;
- /* Continue as normal. */
- if (!req_passed_in) {
- if (lvb_len > 0)
- req_capsule_extend(&req->rq_pill,
- &RQF_LDLM_ENQUEUE_LVB);
- req_capsule_set_size(&req->rq_pill, &RMF_DLM_LVB, RCL_SERVER,
- lvb_len);
- ptlrpc_request_set_replen(req);
- }
-
- /*
- * Liblustre client doesn't get extent locks, except for O_APPEND case
- * where [0, OBD_OBJECT_EOF] lock is taken, or truncate, where
- * [i_size, OBD_OBJECT_EOF] lock is taken.
- */
- LASSERT(ergo(LIBLUSTRE_CLIENT, einfo->ei_type != LDLM_EXTENT ||
- policy->l_extent.end == OBD_OBJECT_EOF));
-
if (async) {
LASSERT(reqp);
return 0;
@@ -1022,7 +1013,6 @@ int ldlm_cli_update_pool(struct ptlrpc_request *req)
return 0;
}
-EXPORT_SYMBOL(ldlm_cli_update_pool);
/**
* Client side lock cancel.
@@ -1067,7 +1057,7 @@ int ldlm_cli_cancel(const struct lustre_handle *lockh,
ns = ldlm_lock_to_ns(lock);
flags = ns_connect_lru_resize(ns) ?
- LDLM_CANCEL_LRUR : LDLM_CANCEL_AGED;
+ LDLM_LRU_FLAG_LRUR : LDLM_LRU_FLAG_AGED;
count += ldlm_cancel_lru_local(ns, &cancels, 0, avail - 1,
LCF_BL_AST, flags);
}
@@ -1125,7 +1115,6 @@ int ldlm_cli_cancel_list_local(struct list_head *cancels, int count,
return count;
}
-EXPORT_SYMBOL(ldlm_cli_cancel_list_local);
/**
* Cancel as many locks as possible w/o sending any RPCs (e.g. to write back
@@ -1184,6 +1173,14 @@ static enum ldlm_policy_res ldlm_cancel_lrur_policy(struct ldlm_namespace *ns,
if (count && added >= count)
return LDLM_POLICY_KEEP_LOCK;
+ /*
+ * Despite of the LV, It doesn't make sense to keep the lock which
+ * is unused for ns_max_age time.
+ */
+ if (cfs_time_after(cfs_time_current(),
+ cfs_time_add(lock->l_last_used, ns->ns_max_age)))
+ return LDLM_POLICY_CANCEL_LOCK;
+
slv = ldlm_pool_get_slv(pl);
lvf = ldlm_pool_get_lvf(pl);
la = cfs_duration_sec(cfs_time_sub(cur, lock->l_last_used));
@@ -1287,21 +1284,21 @@ typedef enum ldlm_policy_res (*ldlm_cancel_lru_policy_t)(
static ldlm_cancel_lru_policy_t
ldlm_cancel_lru_policy(struct ldlm_namespace *ns, int flags)
{
- if (flags & LDLM_CANCEL_NO_WAIT)
+ if (flags & LDLM_LRU_FLAG_NO_WAIT)
return ldlm_cancel_no_wait_policy;
if (ns_connect_lru_resize(ns)) {
- if (flags & LDLM_CANCEL_SHRINK)
+ if (flags & LDLM_LRU_FLAG_SHRINK)
/* We kill passed number of old locks. */
return ldlm_cancel_passed_policy;
- else if (flags & LDLM_CANCEL_LRUR)
+ else if (flags & LDLM_LRU_FLAG_LRUR)
return ldlm_cancel_lrur_policy;
- else if (flags & LDLM_CANCEL_PASSED)
+ else if (flags & LDLM_LRU_FLAG_PASSED)
return ldlm_cancel_passed_policy;
- else if (flags & LDLM_CANCEL_LRUR_NO_WAIT)
+ else if (flags & LDLM_LRU_FLAG_LRUR_NO_WAIT)
return ldlm_cancel_lrur_no_wait_policy;
} else {
- if (flags & LDLM_CANCEL_AGED)
+ if (flags & LDLM_LRU_FLAG_AGED)
return ldlm_cancel_aged_policy;
}
@@ -1325,21 +1322,21 @@ ldlm_cancel_lru_policy(struct ldlm_namespace *ns, int flags)
*
* Calling policies for enabled LRU resize:
* ----------------------------------------
- * flags & LDLM_CANCEL_LRUR - use LRU resize policy (SLV from server) to
- * cancel not more than \a count locks;
+ * flags & LDLM_LRU_FLAG_LRUR - use LRU resize policy (SLV from server) to
+ * cancel not more than \a count locks;
*
- * flags & LDLM_CANCEL_PASSED - cancel \a count number of old locks (located at
- * the beginning of LRU list);
+ * flags & LDLM_LRU_FLAG_PASSED - cancel \a count number of old locks (located at
+ * the beginning of LRU list);
*
- * flags & LDLM_CANCEL_SHRINK - cancel not more than \a count locks according to
- * memory pressure policy function;
+ * flags & LDLM_LRU_FLAG_SHRINK - cancel not more than \a count locks according to
+ * memory pressure policy function;
*
- * flags & LDLM_CANCEL_AGED - cancel \a count locks according to "aged policy".
+ * flags & LDLM_LRU_FLAG_AGED - cancel \a count locks according to "aged policy".
*
- * flags & LDLM_CANCEL_NO_WAIT - cancel as many unused locks as possible
- * (typically before replaying locks) w/o
- * sending any RPCs or waiting for any
- * outstanding RPC to complete.
+ * flags & LDLM_LRU_FLAG_NO_WAIT - cancel as many unused locks as possible
+ * (typically before replaying locks) w/o
+ * sending any RPCs or waiting for any
+ * outstanding RPC to complete.
*/
static int ldlm_prepare_lru_list(struct ldlm_namespace *ns,
struct list_head *cancels, int count, int max,
@@ -1348,7 +1345,7 @@ static int ldlm_prepare_lru_list(struct ldlm_namespace *ns,
ldlm_cancel_lru_policy_t pf;
struct ldlm_lock *lock, *next;
int added = 0, unused, remained;
- int no_wait = flags & (LDLM_CANCEL_NO_WAIT | LDLM_CANCEL_LRUR_NO_WAIT);
+ int no_wait = flags & (LDLM_LRU_FLAG_NO_WAIT | LDLM_LRU_FLAG_LRUR_NO_WAIT);
spin_lock(&ns->ns_lock);
unused = ns->ns_nr_unused;
@@ -1531,7 +1528,7 @@ int ldlm_cancel_lru(struct ldlm_namespace *ns, int nr,
*/
int ldlm_cancel_resource_local(struct ldlm_resource *res,
struct list_head *cancels,
- ldlm_policy_data_t *policy,
+ union ldlm_policy_data *policy,
enum ldlm_mode mode, __u64 lock_flags,
enum ldlm_cancel_flags cancel_flags,
void *opaque)
@@ -1648,7 +1645,7 @@ EXPORT_SYMBOL(ldlm_cli_cancel_list);
*/
int ldlm_cli_cancel_unused_resource(struct ldlm_namespace *ns,
const struct ldlm_res_id *res_id,
- ldlm_policy_data_t *policy,
+ union ldlm_policy_data *policy,
enum ldlm_mode mode,
enum ldlm_cancel_flags flags,
void *opaque)
@@ -1723,7 +1720,7 @@ int ldlm_cli_cancel_unused(struct ldlm_namespace *ns,
opaque);
} else {
cfs_hash_for_each_nolock(ns->ns_rs_hash,
- ldlm_cli_hash_cancel_unused, &arg);
+ ldlm_cli_hash_cancel_unused, &arg, 0);
return ELDLM_OK;
}
}
@@ -1796,7 +1793,7 @@ static void ldlm_namespace_foreach(struct ldlm_namespace *ns,
};
cfs_hash_for_each_nolock(ns->ns_rs_hash,
- ldlm_res_iter_helper, &helper);
+ ldlm_res_iter_helper, &helper, 0);
}
/* non-blocking function to manipulate a lock whose cb_data is being put away.
@@ -1840,7 +1837,7 @@ static int ldlm_chain_lock_for_replay(struct ldlm_lock *lock, void *closure)
* bug 17614: locks being actively cancelled. Get a reference
* on a lock so that it does not disappear under us (e.g. due to cancel)
*/
- if (!(lock->l_flags & (LDLM_FL_FAILED | LDLM_FL_CANCELING))) {
+ if (!(lock->l_flags & (LDLM_FL_FAILED | LDLM_FL_BL_DONE))) {
list_add(&lock->l_pending_chain, list);
LDLM_LOCK_GET(lock);
}
@@ -1909,7 +1906,7 @@ static int replay_one_lock(struct obd_import *imp, struct ldlm_lock *lock)
int flags;
/* Bug 11974: Do not replay a lock which is actively being canceled */
- if (ldlm_is_canceling(lock)) {
+ if (ldlm_is_bl_done(lock)) {
LDLM_DEBUG(lock, "Not replaying canceled lock:");
return 0;
}
@@ -2003,11 +2000,11 @@ static void ldlm_cancel_unused_locks_for_replay(struct ldlm_namespace *ns)
ldlm_ns_name(ns), ns->ns_nr_unused);
/* We don't need to care whether or not LRU resize is enabled
- * because the LDLM_CANCEL_NO_WAIT policy doesn't use the
+ * because the LDLM_LRU_FLAG_NO_WAIT policy doesn't use the
* count parameter
*/
canceled = ldlm_cancel_lru_local(ns, &cancels, ns->ns_nr_unused, 0,
- LCF_LOCAL, LDLM_CANCEL_NO_WAIT);
+ LCF_LOCAL, LDLM_LRU_FLAG_NO_WAIT);
CDEBUG(D_DLMTRACE, "Canceled %d unused locks from namespace %s\n",
canceled, ldlm_ns_name(ns));
@@ -2048,4 +2045,3 @@ int ldlm_replay_locks(struct obd_import *imp)
return rc;
}
-EXPORT_SYMBOL(ldlm_replay_locks);
diff --git a/drivers/staging/lustre/lustre/ldlm/ldlm_resource.c b/drivers/staging/lustre/lustre/ldlm/ldlm_resource.c
index a09c25aea698..b22f5bae7201 100644
--- a/drivers/staging/lustre/lustre/ldlm/ldlm_resource.c
+++ b/drivers/staging/lustre/lustre/ldlm/ldlm_resource.c
@@ -226,7 +226,7 @@ static ssize_t lru_size_store(struct kobject *kobj, struct attribute *attr,
/* Try to cancel all @ns_nr_unused locks. */
canceled = ldlm_cancel_lru(ns, unused, 0,
- LDLM_CANCEL_PASSED);
+ LDLM_LRU_FLAG_PASSED);
if (canceled < unused) {
CDEBUG(D_DLMTRACE,
"not all requested locks are canceled, requested: %d, canceled: %d\n",
@@ -237,7 +237,7 @@ static ssize_t lru_size_store(struct kobject *kobj, struct attribute *attr,
} else {
tmp = ns->ns_max_unused;
ns->ns_max_unused = 0;
- ldlm_cancel_lru(ns, 0, 0, LDLM_CANCEL_PASSED);
+ ldlm_cancel_lru(ns, 0, 0, LDLM_LRU_FLAG_PASSED);
ns->ns_max_unused = tmp;
}
return count;
@@ -262,7 +262,7 @@ static ssize_t lru_size_store(struct kobject *kobj, struct attribute *attr,
"changing namespace %s unused locks from %u to %u\n",
ldlm_ns_name(ns), ns->ns_nr_unused,
(unsigned int)tmp);
- ldlm_cancel_lru(ns, tmp, LCF_ASYNC, LDLM_CANCEL_PASSED);
+ ldlm_cancel_lru(ns, tmp, LCF_ASYNC, LDLM_LRU_FLAG_PASSED);
if (!lru_resize) {
CDEBUG(D_DLMTRACE,
@@ -276,7 +276,7 @@ static ssize_t lru_size_store(struct kobject *kobj, struct attribute *attr,
ldlm_ns_name(ns), ns->ns_max_unused,
(unsigned int)tmp);
ns->ns_max_unused = (unsigned int)tmp;
- ldlm_cancel_lru(ns, 0, LCF_ASYNC, LDLM_CANCEL_PASSED);
+ ldlm_cancel_lru(ns, 0, LCF_ASYNC, LDLM_LRU_FLAG_PASSED);
/* Make sure that LRU resize was originally supported before
* turning it on here.
@@ -445,8 +445,8 @@ static struct ldlm_resource *ldlm_resource_getref(struct ldlm_resource *res)
return res;
}
-static unsigned ldlm_res_hop_hash(struct cfs_hash *hs,
- const void *key, unsigned mask)
+static unsigned int ldlm_res_hop_hash(struct cfs_hash *hs,
+ const void *key, unsigned int mask)
{
const struct ldlm_res_id *id = key;
unsigned int val = 0;
@@ -457,8 +457,8 @@ static unsigned ldlm_res_hop_hash(struct cfs_hash *hs,
return val & mask;
}
-static unsigned ldlm_res_hop_fid_hash(struct cfs_hash *hs,
- const void *key, unsigned mask)
+static unsigned int ldlm_res_hop_fid_hash(struct cfs_hash *hs,
+ const void *key, unsigned int mask)
{
const struct ldlm_res_id *id = key;
struct lu_fid fid;
@@ -612,7 +612,7 @@ static struct ldlm_ns_hash_def ldlm_ns_hash_defs[] = {
/** Register \a ns in the list of namespaces */
static void ldlm_namespace_register(struct ldlm_namespace *ns,
- ldlm_side_t client)
+ enum ldlm_side client)
{
mutex_lock(ldlm_namespace_lock(client));
LASSERT(list_empty(&ns->ns_list_chain));
@@ -625,7 +625,7 @@ static void ldlm_namespace_register(struct ldlm_namespace *ns,
* Create and initialize new empty namespace.
*/
struct ldlm_namespace *ldlm_namespace_new(struct obd_device *obd, char *name,
- ldlm_side_t client,
+ enum ldlm_side client,
enum ldlm_appetite apt,
enum ldlm_ns_type ns_type)
{
@@ -855,8 +855,10 @@ int ldlm_namespace_cleanup(struct ldlm_namespace *ns, __u64 flags)
return ELDLM_OK;
}
- cfs_hash_for_each_nolock(ns->ns_rs_hash, ldlm_resource_clean, &flags);
- cfs_hash_for_each_nolock(ns->ns_rs_hash, ldlm_resource_complain, NULL);
+ cfs_hash_for_each_nolock(ns->ns_rs_hash, ldlm_resource_clean,
+ &flags, 0);
+ cfs_hash_for_each_nolock(ns->ns_rs_hash, ldlm_resource_complain,
+ NULL, 0);
return ELDLM_OK;
}
EXPORT_SYMBOL(ldlm_namespace_cleanup);
@@ -952,7 +954,7 @@ void ldlm_namespace_free_prior(struct ldlm_namespace *ns,
/** Unregister \a ns from the list of namespaces. */
static void ldlm_namespace_unregister(struct ldlm_namespace *ns,
- ldlm_side_t client)
+ enum ldlm_side client)
{
mutex_lock(ldlm_namespace_lock(client));
LASSERT(!list_empty(&ns->ns_list_chain));
@@ -999,7 +1001,6 @@ void ldlm_namespace_get(struct ldlm_namespace *ns)
{
atomic_inc(&ns->ns_bref);
}
-EXPORT_SYMBOL(ldlm_namespace_get);
/* This is only for callers that care about refcount */
static int ldlm_namespace_get_return(struct ldlm_namespace *ns)
@@ -1014,11 +1015,10 @@ void ldlm_namespace_put(struct ldlm_namespace *ns)
spin_unlock(&ns->ns_lock);
}
}
-EXPORT_SYMBOL(ldlm_namespace_put);
/** Should be called with ldlm_namespace_lock(client) taken. */
void ldlm_namespace_move_to_active_locked(struct ldlm_namespace *ns,
- ldlm_side_t client)
+ enum ldlm_side client)
{
LASSERT(!list_empty(&ns->ns_list_chain));
LASSERT(mutex_is_locked(ldlm_namespace_lock(client)));
@@ -1027,7 +1027,7 @@ void ldlm_namespace_move_to_active_locked(struct ldlm_namespace *ns,
/** Should be called with ldlm_namespace_lock(client) taken. */
void ldlm_namespace_move_to_inactive_locked(struct ldlm_namespace *ns,
- ldlm_side_t client)
+ enum ldlm_side client)
{
LASSERT(!list_empty(&ns->ns_list_chain));
LASSERT(mutex_is_locked(ldlm_namespace_lock(client)));
@@ -1035,7 +1035,7 @@ void ldlm_namespace_move_to_inactive_locked(struct ldlm_namespace *ns,
}
/** Should be called with ldlm_namespace_lock(client) taken. */
-struct ldlm_namespace *ldlm_namespace_first_locked(ldlm_side_t client)
+struct ldlm_namespace *ldlm_namespace_first_locked(enum ldlm_side client)
{
LASSERT(mutex_is_locked(ldlm_namespace_lock(client)));
LASSERT(!list_empty(ldlm_namespace_list(client)));
@@ -1305,7 +1305,7 @@ void ldlm_res2desc(struct ldlm_resource *res, struct ldlm_resource_desc *desc)
* Print information about all locks in all namespaces on this node to debug
* log.
*/
-void ldlm_dump_all_namespaces(ldlm_side_t client, int level)
+void ldlm_dump_all_namespaces(enum ldlm_side client, int level)
{
struct list_head *tmp;
@@ -1323,7 +1323,6 @@ void ldlm_dump_all_namespaces(ldlm_side_t client, int level)
mutex_unlock(ldlm_namespace_lock(client));
}
-EXPORT_SYMBOL(ldlm_dump_all_namespaces);
static int ldlm_res_hash_dump(struct cfs_hash *hs, struct cfs_hash_bd *bd,
struct hlist_node *hnode, void *arg)
@@ -1355,12 +1354,11 @@ void ldlm_namespace_dump(int level, struct ldlm_namespace *ns)
cfs_hash_for_each_nolock(ns->ns_rs_hash,
ldlm_res_hash_dump,
- (void *)(unsigned long)level);
+ (void *)(unsigned long)level, 0);
spin_lock(&ns->ns_lock);
ns->ns_next_dump = cfs_time_shift(10);
spin_unlock(&ns->ns_lock);
}
-EXPORT_SYMBOL(ldlm_namespace_dump);
/**
* Print information about all locks in this resource to debug log.
diff --git a/drivers/staging/lustre/lustre/llite/Makefile b/drivers/staging/lustre/lustre/llite/Makefile
index 1ac0940bd8df..322d4fa63f5d 100644
--- a/drivers/staging/lustre/lustre/llite/Makefile
+++ b/drivers/staging/lustre/lustre/llite/Makefile
@@ -1,7 +1,7 @@
obj-$(CONFIG_LUSTRE_FS) += lustre.o
-lustre-y := dcache.o dir.o file.o llite_close.o llite_lib.o llite_nfs.o \
- rw.o namei.o symlink.o llite_mmap.o range_lock.o \
- xattr.o xattr_cache.o rw26.o super25.o statahead.o \
- glimpse.o lcommon_cl.o lcommon_misc.o \
- vvp_dev.o vvp_page.o vvp_lock.o vvp_io.o vvp_object.o vvp_req.o \
+lustre-y := dcache.o dir.o file.o llite_lib.o llite_nfs.o \
+ rw.o rw26.o namei.o symlink.o llite_mmap.o range_lock.o \
+ xattr.o xattr_cache.o xattr_security.o \
+ super25.o statahead.o glimpse.o lcommon_cl.o lcommon_misc.o \
+ vvp_dev.o vvp_page.o vvp_lock.o vvp_io.o vvp_object.o \
lproc_llite.o
diff --git a/drivers/staging/lustre/lustre/llite/dir.c b/drivers/staging/lustre/lustre/llite/dir.c
index 7f32a539d260..ea5d247a3f70 100644
--- a/drivers/staging/lustre/lustre/llite/dir.c
+++ b/drivers/staging/lustre/lustre/llite/dir.c
@@ -51,6 +51,8 @@
#include "../include/lustre_dlm.h"
#include "../include/lustre_fid.h"
#include "../include/lustre_kernelcomm.h"
+#include "../include/lustre_swab.h"
+
#include "llite_internal.h"
/*
@@ -410,6 +412,8 @@ static int ll_dir_setdirstripe(struct inode *parent, struct lmv_user_md *lump,
struct ptlrpc_request *request = NULL;
struct md_op_data *op_data;
struct ll_sb_info *sbi = ll_i2sbi(parent);
+ struct inode *inode = NULL;
+ struct dentry dentry;
int err;
if (unlikely(lump->lum_magic != LMV_USER_MAGIC))
@@ -419,6 +423,10 @@ static int ll_dir_setdirstripe(struct inode *parent, struct lmv_user_md *lump,
PFID(ll_inode2fid(parent)), parent, dirname,
(int)lump->lum_stripe_offset, lump->lum_stripe_count);
+ if (lump->lum_stripe_count > 1 &&
+ !(exp_connect_flags(sbi->ll_md_exp) & OBD_CONNECT_DIR_STRIPE))
+ return -EINVAL;
+
if (lump->lum_magic != cpu_to_le32(LMV_USER_MAGIC))
lustre_swab_lmv_user_md(lump);
@@ -439,8 +447,17 @@ static int ll_dir_setdirstripe(struct inode *parent, struct lmv_user_md *lump,
from_kgid(&init_user_ns, current_fsgid()),
cfs_curproc_cap_pack(), 0, &request);
ll_finish_md_op_data(op_data);
+
+ err = ll_prep_inode(&inode, request, parent->i_sb, NULL);
if (err)
goto err_exit;
+
+ memset(&dentry, 0, sizeof(dentry));
+ dentry.d_inode = inode;
+
+ err = ll_init_security(&dentry, inode, parent);
+ iput(inode);
+
err_exit:
ptlrpc_req_finished(request);
return err;
@@ -501,8 +518,7 @@ int ll_dir_setstripe(struct inode *inode, struct lov_user_md *lump,
return PTR_ERR(op_data);
/* swabbing is done in lov_setstripe() on server side */
- rc = md_setattr(sbi->ll_md_exp, op_data, lump, lum_size,
- NULL, 0, &req, NULL);
+ rc = md_setattr(sbi->ll_md_exp, op_data, lump, lum_size, &req);
ll_finish_md_op_data(op_data);
ptlrpc_req_finished(req);
if (rc) {
@@ -682,7 +698,7 @@ static int ll_ioc_copy_start(struct super_block *sb, struct hsm_copy *copy)
{
struct ll_sb_info *sbi = ll_s2sbi(sb);
struct hsm_progress_kernel hpk;
- int rc;
+ int rc2, rc = 0;
/* Forge a hsm_progress based on data from copy. */
hpk.hpk_fid = copy->hc_hai.hai_fid;
@@ -732,10 +748,10 @@ progress:
/* On error, the request should be considered as completed */
if (hpk.hpk_errval > 0)
hpk.hpk_flags |= HP_FLAG_COMPLETED;
- rc = obd_iocontrol(LL_IOC_HSM_PROGRESS, sbi->ll_md_exp, sizeof(hpk),
- &hpk, NULL);
+ rc2 = obd_iocontrol(LL_IOC_HSM_PROGRESS, sbi->ll_md_exp, sizeof(hpk),
+ &hpk, NULL);
- return rc;
+ return rc ? rc : rc2;
}
/**
@@ -757,7 +773,7 @@ static int ll_ioc_copy_end(struct super_block *sb, struct hsm_copy *copy)
{
struct ll_sb_info *sbi = ll_s2sbi(sb);
struct hsm_progress_kernel hpk;
- int rc;
+ int rc2, rc = 0;
/* If you modify the logic here, also check llapi_hsm_copy_end(). */
/* Take care: copy->hc_hai.hai_action, len, gid and data are not
@@ -823,18 +839,18 @@ static int ll_ioc_copy_end(struct super_block *sb, struct hsm_copy *copy)
* when the file will not be modified for some tunable
* time
*/
- /* we do not notify caller */
hpk.hpk_flags &= ~HP_FLAG_RETRY;
+ rc = -EBUSY;
/* hpk_errval must be >= 0 */
- hpk.hpk_errval = EBUSY;
+ hpk.hpk_errval = -rc;
}
}
progress:
- rc = obd_iocontrol(LL_IOC_HSM_PROGRESS, sbi->ll_md_exp, sizeof(hpk),
- &hpk, NULL);
+ rc2 = obd_iocontrol(LL_IOC_HSM_PROGRESS, sbi->ll_md_exp, sizeof(hpk),
+ &hpk, NULL);
- return rc;
+ return rc ? rc : rc2;
}
static int copy_and_ioctl(int cmd, struct obd_export *exp,
@@ -862,10 +878,6 @@ static int quotactl_ioctl(struct ll_sb_info *sbi, struct if_quotactl *qctl)
int rc = 0;
switch (cmd) {
- case LUSTRE_Q_INVALIDATE:
- case LUSTRE_Q_FINVALIDATE:
- case Q_QUOTAON:
- case Q_QUOTAOFF:
case Q_SETQUOTA:
case Q_SETINFO:
if (!capable(CFS_CAP_SYS_ADMIN))
@@ -930,10 +942,6 @@ static int quotactl_ioctl(struct ll_sb_info *sbi, struct if_quotactl *qctl)
QCTL_COPY(oqctl, qctl);
rc = obd_quotactl(sbi->ll_md_exp, oqctl);
if (rc) {
- if (rc != -EALREADY && cmd == Q_QUOTAON) {
- oqctl->qc_cmd = Q_QUOTAOFF;
- obd_quotactl(sbi->ll_md_exp, oqctl);
- }
kfree(oqctl);
return rc;
}
@@ -1077,7 +1085,7 @@ static long ll_dir_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
goto out_free;
}
- rc = ll_get_fid_by_name(inode, filename, namelen, NULL);
+ rc = ll_get_fid_by_name(inode, filename, namelen, NULL, NULL);
if (rc < 0) {
CERROR("%s: lookup %.*s failed: rc = %d\n",
ll_get_fsname(inode->i_sb, NULL, 0), namelen,
@@ -1189,6 +1197,7 @@ lmv_out_free:
struct lmv_user_md *tmp = NULL;
union lmv_mds_md *lmm = NULL;
u64 valid = 0;
+ int max_stripe_count;
int stripe_count;
int mdt_index;
int lum_size;
@@ -1200,6 +1209,7 @@ lmv_out_free:
if (copy_from_user(&lum, ulmv, sizeof(*ulmv)))
return -EFAULT;
+ max_stripe_count = lum.lum_stripe_count;
/*
* lum_magic will indicate which stripe the ioctl will like
* to get, LMV_MAGIC_V1 is for normal LMV stripe, LMV_USER_MAGIC
@@ -1219,9 +1229,6 @@ lmv_out_free:
/* Get default LMV EA */
if (lum.lum_magic == LMV_USER_MAGIC) {
- if (rc)
- goto finish_req;
-
if (lmmsize > sizeof(*ulmv)) {
rc = -EINVAL;
goto finish_req;
@@ -1234,6 +1241,16 @@ lmv_out_free:
}
stripe_count = lmv_mds_md_stripe_count_get(lmm);
+ if (max_stripe_count < stripe_count) {
+ lum.lum_stripe_count = stripe_count;
+ if (copy_to_user(ulmv, &lum, sizeof(lum))) {
+ rc = -EFAULT;
+ goto finish_req;
+ }
+ rc = -E2BIG;
+ goto finish_req;
+ }
+
lum_size = lmv_user_md_size(stripe_count, LMV_MAGIC_V1);
tmp = kzalloc(lum_size, GFP_NOFS);
if (!tmp) {
@@ -1370,134 +1387,6 @@ out_req:
ll_putname(filename);
return rc;
}
- case IOC_LOV_GETINFO: {
- struct lov_user_mds_data __user *lumd;
- struct lov_stripe_md *lsm;
- struct lov_user_md __user *lum;
- struct lov_mds_md *lmm;
- int lmmsize;
- lstat_t st;
-
- lumd = (struct lov_user_mds_data __user *)arg;
- lum = &lumd->lmd_lmm;
-
- rc = ll_get_max_mdsize(sbi, &lmmsize);
- if (rc)
- return rc;
-
- lmm = libcfs_kvzalloc(lmmsize, GFP_NOFS);
- if (!lmm)
- return -ENOMEM;
- if (copy_from_user(lmm, lum, lmmsize)) {
- rc = -EFAULT;
- goto free_lmm;
- }
-
- switch (lmm->lmm_magic) {
- case LOV_USER_MAGIC_V1:
- if (cpu_to_le32(LOV_USER_MAGIC_V1) == LOV_USER_MAGIC_V1)
- break;
- /* swab objects first so that stripes num will be sane */
- lustre_swab_lov_user_md_objects(
- ((struct lov_user_md_v1 *)lmm)->lmm_objects,
- ((struct lov_user_md_v1 *)lmm)->lmm_stripe_count);
- lustre_swab_lov_user_md_v1((struct lov_user_md_v1 *)lmm);
- break;
- case LOV_USER_MAGIC_V3:
- if (cpu_to_le32(LOV_USER_MAGIC_V3) == LOV_USER_MAGIC_V3)
- break;
- /* swab objects first so that stripes num will be sane */
- lustre_swab_lov_user_md_objects(
- ((struct lov_user_md_v3 *)lmm)->lmm_objects,
- ((struct lov_user_md_v3 *)lmm)->lmm_stripe_count);
- lustre_swab_lov_user_md_v3((struct lov_user_md_v3 *)lmm);
- break;
- default:
- rc = -EINVAL;
- goto free_lmm;
- }
-
- rc = obd_unpackmd(sbi->ll_dt_exp, &lsm, lmm, lmmsize);
- if (rc < 0) {
- rc = -ENOMEM;
- goto free_lmm;
- }
-
- /* Perform glimpse_size operation. */
- memset(&st, 0, sizeof(st));
-
- rc = ll_glimpse_ioctl(sbi, lsm, &st);
- if (rc)
- goto free_lsm;
-
- if (copy_to_user(&lumd->lmd_st, &st, sizeof(st))) {
- rc = -EFAULT;
- goto free_lsm;
- }
-
-free_lsm:
- obd_free_memmd(sbi->ll_dt_exp, &lsm);
-free_lmm:
- kvfree(lmm);
- return rc;
- }
- case OBD_IOC_QUOTACHECK: {
- struct obd_quotactl *oqctl;
- int error = 0;
-
- if (!capable(CFS_CAP_SYS_ADMIN))
- return -EPERM;
-
- oqctl = kzalloc(sizeof(*oqctl), GFP_NOFS);
- if (!oqctl)
- return -ENOMEM;
- oqctl->qc_type = arg;
- rc = obd_quotacheck(sbi->ll_md_exp, oqctl);
- if (rc < 0) {
- CDEBUG(D_INFO, "md_quotacheck failed: rc %d\n", rc);
- error = rc;
- }
-
- rc = obd_quotacheck(sbi->ll_dt_exp, oqctl);
- if (rc < 0)
- CDEBUG(D_INFO, "obd_quotacheck failed: rc %d\n", rc);
-
- kfree(oqctl);
- return error ?: rc;
- }
- case OBD_IOC_POLL_QUOTACHECK: {
- struct if_quotacheck *check;
-
- if (!capable(CFS_CAP_SYS_ADMIN))
- return -EPERM;
-
- check = kzalloc(sizeof(*check), GFP_NOFS);
- if (!check)
- return -ENOMEM;
-
- rc = obd_iocontrol(cmd, sbi->ll_md_exp, 0, (void *)check,
- NULL);
- if (rc) {
- CDEBUG(D_QUOTA, "mdc ioctl %d failed: %d\n", cmd, rc);
- if (copy_to_user((void __user *)arg, check,
- sizeof(*check)))
- CDEBUG(D_QUOTA, "copy_to_user failed\n");
- goto out_poll;
- }
-
- rc = obd_iocontrol(cmd, sbi->ll_dt_exp, 0, (void *)check,
- NULL);
- if (rc) {
- CDEBUG(D_QUOTA, "osc ioctl %d failed: %d\n", cmd, rc);
- if (copy_to_user((void __user *)arg, check,
- sizeof(*check)))
- CDEBUG(D_QUOTA, "copy_to_user failed\n");
- goto out_poll;
- }
-out_poll:
- kfree(check);
- return rc;
- }
case OBD_IOC_QUOTACTL: {
struct if_quotactl *qctl;
@@ -1536,7 +1425,7 @@ out_quotactl:
exp = count ? sbi->ll_md_exp : sbi->ll_dt_exp;
vallen = sizeof(count);
rc = obd_get_info(NULL, exp, sizeof(KEY_TGT_COUNT),
- KEY_TGT_COUNT, &vallen, &count, NULL);
+ KEY_TGT_COUNT, &vallen, &count);
if (rc) {
CERROR("get target count failed: %d\n", rc);
return rc;
diff --git a/drivers/staging/lustre/lustre/llite/file.c b/drivers/staging/lustre/lustre/llite/file.c
index e1d784bae064..f634c11216e6 100644
--- a/drivers/staging/lustre/lustre/llite/file.c
+++ b/drivers/staging/lustre/lustre/llite/file.c
@@ -44,6 +44,7 @@
#include <linux/mount.h>
#include "../include/lustre/ll_fiemap.h"
#include "../include/lustre/lustre_ioctl.h"
+#include "../include/lustre_swab.h"
#include "../include/cl_object.h"
#include "llite_internal.h"
@@ -75,60 +76,56 @@ static void ll_file_data_put(struct ll_file_data *fd)
kmem_cache_free(ll_file_data_slab, fd);
}
-void ll_pack_inode2opdata(struct inode *inode, struct md_op_data *op_data,
- struct lustre_handle *fh)
+/**
+ * Packs all the attributes into @op_data for the CLOSE rpc.
+ */
+static void ll_prepare_close(struct inode *inode, struct md_op_data *op_data,
+ struct obd_client_handle *och)
{
- op_data->op_fid1 = ll_i2info(inode)->lli_fid;
+ struct ll_inode_info *lli = ll_i2info(inode);
+
+ ll_prep_md_op_data(op_data, inode, NULL, NULL,
+ 0, 0, LUSTRE_OPC_ANY, NULL);
+
op_data->op_attr.ia_mode = inode->i_mode;
op_data->op_attr.ia_atime = inode->i_atime;
op_data->op_attr.ia_mtime = inode->i_mtime;
op_data->op_attr.ia_ctime = inode->i_ctime;
op_data->op_attr.ia_size = i_size_read(inode);
+ op_data->op_attr.ia_valid |= ATTR_MODE | ATTR_ATIME | ATTR_ATIME_SET |
+ ATTR_MTIME | ATTR_MTIME_SET |
+ ATTR_CTIME | ATTR_CTIME_SET;
op_data->op_attr_blocks = inode->i_blocks;
op_data->op_attr_flags = ll_inode_to_ext_flags(inode->i_flags);
- op_data->op_ioepoch = ll_i2info(inode)->lli_ioepoch;
- if (fh)
- op_data->op_handle = *fh;
+ op_data->op_handle = och->och_fh;
- if (ll_i2info(inode)->lli_flags & LLIF_DATA_MODIFIED)
+ /*
+ * For HSM: if inode data has been modified, pack it so that
+ * MDT can set data dirty flag in the archive.
+ */
+ if (och->och_flags & FMODE_WRITE &&
+ test_and_clear_bit(LLIF_DATA_MODIFIED, &lli->lli_flags))
op_data->op_bias |= MDS_DATA_MODIFIED;
}
/**
- * Closes the IO epoch and packs all the attributes into @op_data for
- * the CLOSE rpc.
+ * Perform a close, possibly with a bias.
+ * The meaning of "data" depends on the value of "bias".
+ *
+ * If \a bias is MDS_HSM_RELEASE then \a data is a pointer to the data version.
+ * If \a bias is MDS_CLOSE_LAYOUT_SWAP then \a data is a pointer to the inode to
+ * swap layouts with.
*/
-static void ll_prepare_close(struct inode *inode, struct md_op_data *op_data,
- struct obd_client_handle *och)
-{
- op_data->op_attr.ia_valid = ATTR_MODE | ATTR_ATIME | ATTR_ATIME_SET |
- ATTR_MTIME | ATTR_MTIME_SET |
- ATTR_CTIME | ATTR_CTIME_SET;
-
- if (!(och->och_flags & FMODE_WRITE))
- goto out;
-
- if (!exp_connect_som(ll_i2mdexp(inode)) || !S_ISREG(inode->i_mode))
- op_data->op_attr.ia_valid |= ATTR_SIZE | ATTR_BLOCKS;
- else
- ll_ioepoch_close(inode, op_data, &och, 0);
-
-out:
- ll_pack_inode2opdata(inode, op_data, &och->och_fh);
- ll_prep_md_op_data(op_data, inode, NULL, NULL,
- 0, 0, LUSTRE_OPC_ANY, NULL);
-}
-
static int ll_close_inode_openhandle(struct obd_export *md_exp,
- struct inode *inode,
struct obd_client_handle *och,
- const __u64 *data_version)
+ struct inode *inode,
+ enum mds_op_bias bias,
+ void *data)
{
struct obd_export *exp = ll_i2mdexp(inode);
struct md_op_data *op_data;
struct ptlrpc_request *req = NULL;
struct obd_device *obd = class_exp2obd(exp);
- int epoch_close = 1;
int rc;
if (!obd) {
@@ -150,65 +147,51 @@ static int ll_close_inode_openhandle(struct obd_export *md_exp,
}
ll_prepare_close(inode, op_data, och);
- if (data_version) {
- /* Pass in data_version implies release. */
+ switch (bias) {
+ case MDS_CLOSE_LAYOUT_SWAP:
+ LASSERT(data);
+ op_data->op_bias |= MDS_CLOSE_LAYOUT_SWAP;
+ op_data->op_data_version = 0;
+ op_data->op_lease_handle = och->och_lease_handle;
+ op_data->op_fid2 = *ll_inode2fid(data);
+ break;
+
+ case MDS_HSM_RELEASE:
+ LASSERT(data);
op_data->op_bias |= MDS_HSM_RELEASE;
- op_data->op_data_version = *data_version;
+ op_data->op_data_version = *(__u64 *)data;
op_data->op_lease_handle = och->och_lease_handle;
op_data->op_attr.ia_valid |= ATTR_SIZE | ATTR_BLOCKS;
+ break;
+
+ default:
+ LASSERT(!data);
+ break;
}
- epoch_close = op_data->op_flags & MF_EPOCH_CLOSE;
+
rc = md_close(md_exp, op_data, och->och_mod, &req);
- if (rc == -EAGAIN) {
- /* This close must have the epoch closed. */
- LASSERT(epoch_close);
- /* MDS has instructed us to obtain Size-on-MDS attribute from
- * OSTs and send setattr to back to MDS.
- */
- rc = ll_som_update(inode, op_data);
- if (rc) {
- CERROR("%s: inode "DFID" mdc Size-on-MDS update failed: rc = %d\n",
- ll_i2mdexp(inode)->exp_obd->obd_name,
- PFID(ll_inode2fid(inode)), rc);
- rc = 0;
- }
- } else if (rc) {
+ if (rc) {
CERROR("%s: inode "DFID" mdc close failed: rc = %d\n",
ll_i2mdexp(inode)->exp_obd->obd_name,
PFID(ll_inode2fid(inode)), rc);
}
- /* DATA_MODIFIED flag was successfully sent on close, cancel data
- * modification flag.
- */
- if (rc == 0 && (op_data->op_bias & MDS_DATA_MODIFIED)) {
- struct ll_inode_info *lli = ll_i2info(inode);
-
- spin_lock(&lli->lli_lock);
- lli->lli_flags &= ~LLIF_DATA_MODIFIED;
- spin_unlock(&lli->lli_lock);
- }
-
- if (rc == 0 && op_data->op_bias & MDS_HSM_RELEASE) {
+ if (op_data->op_bias & (MDS_HSM_RELEASE | MDS_CLOSE_LAYOUT_SWAP) &&
+ !rc) {
struct mdt_body *body;
body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
- if (!(body->mbo_valid & OBD_MD_FLRELEASED))
+ if (!(body->mbo_valid & OBD_MD_CLOSE_INTENT_EXECED))
rc = -EBUSY;
}
ll_finish_md_op_data(op_data);
out:
- if (exp_connect_som(exp) && !epoch_close &&
- S_ISREG(inode->i_mode) && (och->och_flags & FMODE_WRITE)) {
- ll_queue_done_writing(inode, LLIF_DONE_WRITING);
- } else {
- md_clear_open_replay_data(md_exp, och);
- /* Free @och if it is not waiting for DONE_WRITING. */
- och->och_fh.cookie = DEAD_HANDLE_MAGIC;
- kfree(och);
- }
+ md_clear_open_replay_data(md_exp, och);
+ och->och_fh.cookie = DEAD_HANDLE_MAGIC;
+ kfree(och);
+
if (req) /* This is close request */
ptlrpc_req_finished(req);
return rc;
@@ -252,7 +235,7 @@ int ll_md_real_close(struct inode *inode, fmode_t fmode)
* be closed.
*/
rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp,
- inode, och, NULL);
+ och, inode, 0, NULL);
}
return rc;
@@ -266,7 +249,9 @@ static int ll_md_close(struct obd_export *md_exp, struct inode *inode,
int lockmode;
__u64 flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_TEST_LOCK;
struct lustre_handle lockh;
- ldlm_policy_data_t policy = {.l_inodebits = {MDS_INODELOCK_OPEN} };
+ union ldlm_policy_data policy = {
+ .l_inodebits = { MDS_INODELOCK_OPEN }
+ };
int rc = 0;
/* clear group lock, if present */
@@ -288,7 +273,8 @@ static int ll_md_close(struct obd_export *md_exp, struct inode *inode,
}
if (fd->fd_och) {
- rc = ll_close_inode_openhandle(md_exp, inode, fd->fd_och, NULL);
+ rc = ll_close_inode_openhandle(md_exp, fd->fd_och, inode, 0,
+ NULL);
fd->fd_och = NULL;
goto out;
}
@@ -437,20 +423,6 @@ out:
return rc;
}
-/**
- * Assign an obtained @ioepoch to client's inode. No lock is needed, MDS does
- * not believe attributes if a few ioepoch holders exist. Attributes for
- * previous ioepoch if new one is opened are also skipped by MDS.
- */
-void ll_ioepoch_open(struct ll_inode_info *lli, __u64 ioepoch)
-{
- if (ioepoch && lli->lli_ioepoch != ioepoch) {
- lli->lli_ioepoch = ioepoch;
- CDEBUG(D_INODE, "Epoch %llu opened on "DFID"\n",
- ioepoch, PFID(&lli->lli_fid));
- }
-}
-
static int ll_och_fill(struct obd_export *md_exp, struct lookup_intent *it,
struct obd_client_handle *och)
{
@@ -470,23 +442,17 @@ static int ll_local_open(struct file *file, struct lookup_intent *it,
struct ll_file_data *fd, struct obd_client_handle *och)
{
struct inode *inode = file_inode(file);
- struct ll_inode_info *lli = ll_i2info(inode);
LASSERT(!LUSTRE_FPRIVATE(file));
LASSERT(fd);
if (och) {
- struct mdt_body *body;
int rc;
rc = ll_och_fill(ll_i2sbi(inode)->ll_md_exp, it, och);
if (rc != 0)
return rc;
-
- body = req_capsule_server_get(&it->it_request->rq_pill,
- &RMF_MDT_BODY);
- ll_ioepoch_open(lli, body->mbo_ioepoch);
}
LUSTRE_FPRIVATE(file) = fd;
@@ -677,12 +643,6 @@ restart:
if (!S_ISREG(inode->i_mode))
goto out_och_free;
- if (!lli->lli_has_smd &&
- (cl_is_lov_delay_create(file->f_flags) ||
- (file->f_mode & FMODE_WRITE) == 0)) {
- CDEBUG(D_INODE, "object creation was delayed\n");
- goto out_och_free;
- }
cl_lov_delay_create_clear(&file->f_flags);
goto out_och_free;
@@ -867,7 +827,7 @@ out_close:
it.it_lock_mode = 0;
och->och_lease_handle.cookie = 0ULL;
}
- rc2 = ll_close_inode_openhandle(sbi->ll_md_exp, inode, och, NULL);
+ rc2 = ll_close_inode_openhandle(sbi->ll_md_exp, och, inode, 0, NULL);
if (rc2 < 0)
CERROR("%s: error closing file "DFID": %d\n",
ll_get_fsname(inode->i_sb, NULL, 0),
@@ -881,6 +841,69 @@ out:
}
/**
+ * Check whether a layout swap can be done between two inodes.
+ *
+ * \param[in] inode1 First inode to check
+ * \param[in] inode2 Second inode to check
+ *
+ * \retval 0 on success, layout swap can be performed between both inodes
+ * \retval negative error code if requirements are not met
+ */
+static int ll_check_swap_layouts_validity(struct inode *inode1,
+ struct inode *inode2)
+{
+ if (!S_ISREG(inode1->i_mode) || !S_ISREG(inode2->i_mode))
+ return -EINVAL;
+
+ if (inode_permission(inode1, MAY_WRITE) ||
+ inode_permission(inode2, MAY_WRITE))
+ return -EPERM;
+
+ if (inode1->i_sb != inode2->i_sb)
+ return -EXDEV;
+
+ return 0;
+}
+
+static int ll_swap_layouts_close(struct obd_client_handle *och,
+ struct inode *inode, struct inode *inode2)
+{
+ const struct lu_fid *fid1 = ll_inode2fid(inode);
+ const struct lu_fid *fid2;
+ int rc;
+
+ CDEBUG(D_INODE, "%s: biased close of file " DFID "\n",
+ ll_get_fsname(inode->i_sb, NULL, 0), PFID(fid1));
+
+ rc = ll_check_swap_layouts_validity(inode, inode2);
+ if (rc < 0)
+ goto out_free_och;
+
+ /* We now know that inode2 is a lustre inode */
+ fid2 = ll_inode2fid(inode2);
+
+ rc = lu_fid_cmp(fid1, fid2);
+ if (!rc) {
+ rc = -EINVAL;
+ goto out_free_och;
+ }
+
+ /*
+ * Close the file and swap layouts between inode & inode2.
+ * NB: lease lock handle is released in mdc_close_layout_swap_pack()
+ * because we still need it to pack l_remote_handle to MDT.
+ */
+ rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp, och, inode,
+ MDS_CLOSE_LAYOUT_SWAP, inode2);
+
+ och = NULL; /* freed in ll_close_inode_openhandle() */
+
+out_free_och:
+ kfree(och);
+ return rc;
+}
+
+/**
* Release lease and close the file.
* It will check if the lease has ever broken.
*/
@@ -907,84 +930,7 @@ static int ll_lease_close(struct obd_client_handle *och, struct inode *inode,
*lease_broken = cancelled;
return ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp,
- inode, och, NULL);
-}
-
-/* Fills the obdo with the attributes for the lsm */
-static int ll_lsm_getattr(struct lov_stripe_md *lsm, struct obd_export *exp,
- struct obdo *obdo, __u64 ioepoch, int dv_flags)
-{
- struct ptlrpc_request_set *set;
- struct obd_info oinfo = { };
- int rc;
-
- LASSERT(lsm);
-
- oinfo.oi_md = lsm;
- oinfo.oi_oa = obdo;
- oinfo.oi_oa->o_oi = lsm->lsm_oi;
- oinfo.oi_oa->o_mode = S_IFREG;
- oinfo.oi_oa->o_ioepoch = ioepoch;
- oinfo.oi_oa->o_valid = OBD_MD_FLID | OBD_MD_FLTYPE |
- OBD_MD_FLSIZE | OBD_MD_FLBLOCKS |
- OBD_MD_FLBLKSZ | OBD_MD_FLATIME |
- OBD_MD_FLMTIME | OBD_MD_FLCTIME |
- OBD_MD_FLGROUP | OBD_MD_FLEPOCH |
- OBD_MD_FLDATAVERSION;
- if (dv_flags & (LL_DV_WR_FLUSH | LL_DV_RD_FLUSH)) {
- oinfo.oi_oa->o_valid |= OBD_MD_FLFLAGS;
- oinfo.oi_oa->o_flags |= OBD_FL_SRVLOCK;
- if (dv_flags & LL_DV_WR_FLUSH)
- oinfo.oi_oa->o_flags |= OBD_FL_FLUSH;
- }
-
- set = ptlrpc_prep_set();
- if (!set) {
- CERROR("cannot allocate ptlrpc set: rc = %d\n", -ENOMEM);
- rc = -ENOMEM;
- } else {
- rc = obd_getattr_async(exp, &oinfo, set);
- if (rc == 0)
- rc = ptlrpc_set_wait(set);
- ptlrpc_set_destroy(set);
- }
- if (rc == 0) {
- oinfo.oi_oa->o_valid &= (OBD_MD_FLBLOCKS | OBD_MD_FLBLKSZ |
- OBD_MD_FLATIME | OBD_MD_FLMTIME |
- OBD_MD_FLCTIME | OBD_MD_FLSIZE |
- OBD_MD_FLDATAVERSION | OBD_MD_FLFLAGS);
- if (dv_flags & LL_DV_WR_FLUSH &&
- !(oinfo.oi_oa->o_valid & OBD_MD_FLFLAGS &&
- oinfo.oi_oa->o_flags & OBD_FL_FLUSH))
- return -ENOTSUPP;
- }
- return rc;
-}
-
-/**
- * Performs the getattr on the inode and updates its fields.
- * If @sync != 0, perform the getattr under the server-side lock.
- */
-int ll_inode_getattr(struct inode *inode, struct obdo *obdo,
- __u64 ioepoch, int sync)
-{
- struct lov_stripe_md *lsm;
- int rc;
-
- lsm = ccc_inode_lsm_get(inode);
- rc = ll_lsm_getattr(lsm, ll_i2dtexp(inode),
- obdo, ioepoch, sync ? LL_DV_RD_FLUSH : 0);
- if (rc == 0) {
- struct ost_id *oi = lsm ? &lsm->lsm_oi : &obdo->o_oi;
-
- obdo_refresh_inode(inode, obdo, obdo->o_valid);
- CDEBUG(D_INODE, "objid " DOSTID " size %llu, blocks %llu, blksize %lu\n",
- POSTID(oi), i_size_read(inode),
- (unsigned long long)inode->i_blocks,
- 1UL << inode->i_blkbits);
- }
- ccc_inode_lsm_put(inode, lsm);
- return rc;
+ och, inode, 0, NULL);
}
int ll_merge_attr(const struct lu_env *env, struct inode *inode)
@@ -1043,23 +989,6 @@ out_size_unlock:
return rc;
}
-int ll_glimpse_ioctl(struct ll_sb_info *sbi, struct lov_stripe_md *lsm,
- lstat_t *st)
-{
- struct obdo obdo = { 0 };
- int rc;
-
- rc = ll_lsm_getattr(lsm, sbi->ll_dt_exp, &obdo, 0, 0);
- if (rc == 0) {
- st->st_size = obdo.o_size;
- st->st_blocks = obdo.o_blocks;
- st->st_mtime = obdo.o_mtime;
- st->st_atime = obdo.o_atime;
- st->st_ctime = obdo.o_ctime;
- }
- return rc;
-}
-
static bool file_is_noatime(const struct file *file)
{
const struct vfsmount *mnt = file->f_path.mnt;
@@ -1117,9 +1046,11 @@ ll_file_io_generic(const struct lu_env *env, struct vvp_io_args *args,
{
struct ll_inode_info *lli = ll_i2info(file_inode(file));
struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
+ struct vvp_io *vio = vvp_env_io(env);
struct range_lock range;
struct cl_io *io;
- ssize_t result;
+ ssize_t result = 0;
+ int rc = 0;
CDEBUG(D_VFSTRACE, "file: %pD, type: %d ppos: %llu, count: %zu\n",
file, iot, *ppos, count);
@@ -1151,18 +1082,15 @@ restart:
CDEBUG(D_VFSTRACE, "Range lock [%llu, %llu]\n",
range.rl_node.in_extent.start,
range.rl_node.in_extent.end);
- result = range_lock(&lli->lli_write_tree,
- &range);
- if (result < 0)
+ rc = range_lock(&lli->lli_write_tree, &range);
+ if (rc < 0)
goto out;
range_locked = true;
}
- down_read(&lli->lli_trunc_sem);
ll_cl_add(file, env, io);
- result = cl_io_loop(env, io);
+ rc = cl_io_loop(env, io);
ll_cl_remove(file, env);
- up_read(&lli->lli_trunc_sem);
if (range_locked) {
CDEBUG(D_VFSTRACE, "Range unlock [%llu, %llu]\n",
range.rl_node.in_extent.start,
@@ -1171,24 +1099,26 @@ restart:
}
} else {
/* cl_io_rw_init() handled IO */
- result = io->ci_result;
+ rc = io->ci_result;
}
if (io->ci_nob > 0) {
result = io->ci_nob;
+ count -= io->ci_nob;
*ppos = io->u.ci_wr.wr.crw_pos;
+
+ /* prepare IO restart */
+ if (count > 0)
+ args->u.normal.via_iter = vio->vui_iter;
}
- goto out;
out:
cl_io_fini(env, io);
- /* If any bit been read/written (result != 0), we just return
- * short read/write instead of restart io.
- */
- if ((result == 0 || result == -ENODATA) && io->ci_need_restart) {
- CDEBUG(D_VFSTRACE, "Restart %s on %pD from %lld, count:%zu\n",
+
+ if ((!rc || rc == -ENODATA) && count > 0 && io->ci_need_restart) {
+ CDEBUG(D_VFSTRACE, "%s: restart %s from %lld, count:%zu, result: %zd\n",
+ file_dentry(file)->d_name.name,
iot == CIT_READ ? "read" : "write",
- file, *ppos, count);
- LASSERTF(io->ci_nob == 0, "%zd\n", io->ci_nob);
+ *ppos, count, result);
goto restart;
}
@@ -1201,13 +1131,19 @@ out:
ll_stats_ops_tally(ll_i2sbi(file_inode(file)),
LPROC_LL_WRITE_BYTES, result);
fd->fd_write_failed = false;
- } else if (result != -ERESTARTSYS) {
+ } else if (!result && !rc) {
+ rc = io->ci_result;
+ if (rc < 0)
+ fd->fd_write_failed = true;
+ else
+ fd->fd_write_failed = false;
+ } else if (rc != -ERESTARTSYS) {
fd->fd_write_failed = true;
}
}
CDEBUG(D_VFSTRACE, "iot: %d, result: %zd\n", iot, result);
- return result;
+ return result > 0 ? result : rc;
}
static ssize_t ll_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
@@ -1259,37 +1195,22 @@ int ll_lov_setstripe_ea_info(struct inode *inode, struct dentry *dentry,
__u64 flags, struct lov_user_md *lum,
int lum_size)
{
- struct lov_stripe_md *lsm = NULL;
struct lookup_intent oit = {
.it_op = IT_OPEN,
.it_flags = flags | MDS_OPEN_BY_FID,
};
int rc = 0;
- lsm = ccc_inode_lsm_get(inode);
- if (lsm) {
- ccc_inode_lsm_put(inode, lsm);
- CDEBUG(D_IOCTL, "stripe already exists for inode "DFID"\n",
- PFID(ll_inode2fid(inode)));
- rc = -EEXIST;
- goto out;
- }
-
ll_inode_size_lock(inode);
rc = ll_intent_file_open(dentry, lum, lum_size, &oit);
if (rc < 0)
goto out_unlock;
- rc = oit.it_status;
- if (rc < 0)
- goto out_unlock;
ll_release_openhandle(inode, &oit);
out_unlock:
ll_inode_size_unlock(inode);
ll_intent_release(&oit);
- ccc_inode_lsm_put(inode, lsm);
-out:
return rc;
}
@@ -1566,7 +1487,7 @@ int ll_release_openhandle(struct inode *inode, struct lookup_intent *it)
ll_och_fill(ll_i2sbi(inode)->ll_md_exp, it, och);
rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp,
- inode, och, NULL);
+ och, inode, 0, NULL);
out:
/* this one is in place of ll_file_open */
if (it_disposition(it, DISP_ENQ_OPEN_REF)) {
@@ -1579,15 +1500,17 @@ out:
/**
* Get size for inode for which FIEMAP mapping is requested.
* Make the FIEMAP get_info call and returns the result.
+ *
+ * \param fiemap kernel buffer to hold extens
+ * \param num_bytes kernel buffer size
*/
-static int ll_do_fiemap(struct inode *inode, struct ll_user_fiemap *fiemap,
+static int ll_do_fiemap(struct inode *inode, struct fiemap *fiemap,
size_t num_bytes)
{
- struct obd_export *exp = ll_i2dtexp(inode);
- struct lov_stripe_md *lsm = NULL;
- struct ll_fiemap_info_key fm_key = { .name = KEY_FIEMAP, };
- __u32 vallen = num_bytes;
- int rc;
+ struct ll_fiemap_info_key fmkey = { .lfik_name = KEY_FIEMAP, };
+ struct lu_env *env;
+ int refcheck;
+ int rc = 0;
/* Checks for fiemap flags */
if (fiemap->fm_flags & ~LUSTRE_FIEMAP_FLAGS_COMPAT) {
@@ -1602,21 +1525,9 @@ static int ll_do_fiemap(struct inode *inode, struct ll_user_fiemap *fiemap,
return rc;
}
- lsm = ccc_inode_lsm_get(inode);
- if (!lsm)
- return -ENOENT;
-
- /* If the stripe_count > 1 and the application does not understand
- * DEVICE_ORDER flag, then it cannot interpret the extents correctly.
- */
- if (lsm->lsm_stripe_count > 1 &&
- !(fiemap->fm_flags & FIEMAP_FLAG_DEVICE_ORDER)) {
- rc = -EOPNOTSUPP;
- goto out;
- }
-
- fm_key.oa.o_oi = lsm->lsm_oi;
- fm_key.oa.o_valid = OBD_MD_FLID | OBD_MD_FLGROUP;
+ env = cl_env_get(&refcheck);
+ if (IS_ERR(env))
+ return PTR_ERR(env);
if (i_size_read(inode) == 0) {
rc = ll_glimpse_size(inode);
@@ -1624,24 +1535,23 @@ static int ll_do_fiemap(struct inode *inode, struct ll_user_fiemap *fiemap,
goto out;
}
- obdo_from_inode(&fm_key.oa, inode, OBD_MD_FLSIZE);
- obdo_set_parent_fid(&fm_key.oa, &ll_i2info(inode)->lli_fid);
+ fmkey.lfik_oa.o_valid = OBD_MD_FLID | OBD_MD_FLGROUP;
+ obdo_from_inode(&fmkey.lfik_oa, inode, OBD_MD_FLSIZE);
+ obdo_set_parent_fid(&fmkey.lfik_oa, &ll_i2info(inode)->lli_fid);
+
/* If filesize is 0, then there would be no objects for mapping */
- if (fm_key.oa.o_size == 0) {
+ if (fmkey.lfik_oa.o_size == 0) {
fiemap->fm_mapped_extents = 0;
rc = 0;
goto out;
}
- memcpy(&fm_key.fiemap, fiemap, sizeof(*fiemap));
-
- rc = obd_get_info(NULL, exp, sizeof(fm_key), &fm_key, &vallen,
- fiemap, lsm);
- if (rc)
- CERROR("obd_get_info failed: rc = %d\n", rc);
+ memcpy(&fmkey.lfik_fiemap, fiemap, sizeof(*fiemap));
+ rc = cl_object_fiemap(env, ll_i2info(inode)->lli_clob,
+ &fmkey, fiemap, &num_bytes);
out:
- ccc_inode_lsm_put(inode, lsm);
+ cl_env_put(env, &refcheck);
return rc;
}
@@ -1689,113 +1599,56 @@ gf_free:
return rc;
}
-static int ll_ioctl_fiemap(struct inode *inode, unsigned long arg)
-{
- struct ll_user_fiemap *fiemap_s;
- size_t num_bytes, ret_bytes;
- unsigned int extent_count;
- int rc = 0;
-
- /* Get the extent count so we can calculate the size of
- * required fiemap buffer
- */
- if (get_user(extent_count,
- &((struct ll_user_fiemap __user *)arg)->fm_extent_count))
- return -EFAULT;
-
- if (extent_count >=
- (SIZE_MAX - sizeof(*fiemap_s)) / sizeof(struct ll_fiemap_extent))
- return -EINVAL;
- num_bytes = sizeof(*fiemap_s) + (extent_count *
- sizeof(struct ll_fiemap_extent));
-
- fiemap_s = libcfs_kvzalloc(num_bytes, GFP_NOFS);
- if (!fiemap_s)
- return -ENOMEM;
-
- /* get the fiemap value */
- if (copy_from_user(fiemap_s, (struct ll_user_fiemap __user *)arg,
- sizeof(*fiemap_s))) {
- rc = -EFAULT;
- goto error;
- }
-
- /* If fm_extent_count is non-zero, read the first extent since
- * it is used to calculate end_offset and device from previous
- * fiemap call.
- */
- if (extent_count) {
- if (copy_from_user(&fiemap_s->fm_extents[0],
- (char __user *)arg + sizeof(*fiemap_s),
- sizeof(struct ll_fiemap_extent))) {
- rc = -EFAULT;
- goto error;
- }
- }
-
- rc = ll_do_fiemap(inode, fiemap_s, num_bytes);
- if (rc)
- goto error;
-
- ret_bytes = sizeof(struct ll_user_fiemap);
-
- if (extent_count != 0)
- ret_bytes += (fiemap_s->fm_mapped_extents *
- sizeof(struct ll_fiemap_extent));
-
- if (copy_to_user((void __user *)arg, fiemap_s, ret_bytes))
- rc = -EFAULT;
-
-error:
- kvfree(fiemap_s);
- return rc;
-}
-
/*
* Read the data_version for inode.
*
* This value is computed using stripe object version on OST.
* Version is computed using server side locking.
*
- * @param sync if do sync on the OST side;
+ * @param flags if do sync on the OST side;
* 0: no sync
* LL_DV_RD_FLUSH: flush dirty pages, LCK_PR on OSTs
* LL_DV_WR_FLUSH: drop all caching pages, LCK_PW on OSTs
*/
int ll_data_version(struct inode *inode, __u64 *data_version, int flags)
{
- struct lov_stripe_md *lsm = NULL;
- struct ll_sb_info *sbi = ll_i2sbi(inode);
- struct obdo *obdo = NULL;
- int rc;
+ struct cl_object *obj = ll_i2info(inode)->lli_clob;
+ struct lu_env *env;
+ struct cl_io *io;
+ int refcheck;
+ int result;
- /* If no stripe, we consider version is 0. */
- lsm = ccc_inode_lsm_get(inode);
- if (!lsm_has_objects(lsm)) {
+ /* If no file object initialized, we consider its version is 0. */
+ if (!obj) {
*data_version = 0;
- CDEBUG(D_INODE, "No object for inode\n");
- rc = 0;
- goto out;
+ return 0;
}
- obdo = kzalloc(sizeof(*obdo), GFP_NOFS);
- if (!obdo) {
- rc = -ENOMEM;
- goto out;
- }
+ env = cl_env_get(&refcheck);
+ if (IS_ERR(env))
+ return PTR_ERR(env);
- rc = ll_lsm_getattr(lsm, sbi->ll_dt_exp, obdo, 0, flags);
- if (rc == 0) {
- if (!(obdo->o_valid & OBD_MD_FLDATAVERSION))
- rc = -EOPNOTSUPP;
- else
- *data_version = obdo->o_data_version;
- }
+ io = vvp_env_thread_io(env);
+ io->ci_obj = obj;
+ io->u.ci_data_version.dv_data_version = 0;
+ io->u.ci_data_version.dv_flags = flags;
- kfree(obdo);
-out:
- ccc_inode_lsm_put(inode, lsm);
- return rc;
+restart:
+ if (!cl_io_init(env, io, CIT_DATA_VERSION, io->ci_obj))
+ result = cl_io_loop(env, io);
+ else
+ result = io->ci_result;
+
+ *data_version = io->u.ci_data_version.dv_data_version;
+
+ cl_io_fini(env, io);
+
+ if (unlikely(io->ci_need_restart))
+ goto restart;
+
+ cl_env_put(env, &refcheck);
+
+ return result;
}
/*
@@ -1803,11 +1656,11 @@ out:
*/
int ll_hsm_release(struct inode *inode)
{
- struct cl_env_nest nest;
struct lu_env *env;
struct obd_client_handle *och = NULL;
__u64 data_version = 0;
int rc;
+ int refcheck;
CDEBUG(D_INODE, "%s: Releasing file "DFID".\n",
ll_get_fsname(inode->i_sb, NULL, 0),
@@ -1824,21 +1677,21 @@ int ll_hsm_release(struct inode *inode)
if (rc != 0)
goto out;
- env = cl_env_nested_get(&nest);
+ env = cl_env_get(&refcheck);
if (IS_ERR(env)) {
rc = PTR_ERR(env);
goto out;
}
ll_merge_attr(env, inode);
- cl_env_nested_put(&nest, env);
+ cl_env_put(env, &refcheck);
/* Release the file.
* NB: lease lock handle is released in mdc_hsm_release_pack() because
* we still need it to pack l_remote_handle to MDT.
*/
- rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp, inode, och,
- &data_version);
+ rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp, och, inode,
+ MDS_HSM_RELEASE, &data_version);
och = NULL;
out:
@@ -1849,10 +1702,12 @@ out:
}
struct ll_swap_stack {
- struct iattr ia1, ia2;
- __u64 dv1, dv2;
- struct inode *inode1, *inode2;
- bool check_dv1, check_dv2;
+ u64 dv1;
+ u64 dv2;
+ struct inode *inode1;
+ struct inode *inode2;
+ bool check_dv1;
+ bool check_dv2;
};
static int ll_swap_layouts(struct file *file1, struct file *file2,
@@ -1872,21 +1727,9 @@ static int ll_swap_layouts(struct file *file1, struct file *file2,
llss->inode1 = file_inode(file1);
llss->inode2 = file_inode(file2);
- if (!S_ISREG(llss->inode2->i_mode)) {
- rc = -EINVAL;
- goto free;
- }
-
- if (inode_permission(llss->inode1, MAY_WRITE) ||
- inode_permission(llss->inode2, MAY_WRITE)) {
- rc = -EPERM;
+ rc = ll_check_swap_layouts_validity(llss->inode1, llss->inode2);
+ if (rc < 0)
goto free;
- }
-
- if (llss->inode2->i_sb != llss->inode1->i_sb) {
- rc = -EXDEV;
- goto free;
- }
/* we use 2 bool because it is easier to swap than 2 bits */
if (lsl->sl_flags & SWAP_LAYOUTS_CHECK_DV1)
@@ -1900,10 +1743,8 @@ static int ll_swap_layouts(struct file *file1, struct file *file2,
llss->dv2 = lsl->sl_dv2;
rc = lu_fid_cmp(ll_inode2fid(llss->inode1), ll_inode2fid(llss->inode2));
- if (rc == 0) /* same file, done! */ {
- rc = 0;
+ if (!rc) /* same file, done! */
goto free;
- }
if (rc < 0) { /* sequentialize it */
swap(llss->inode1, llss->inode2);
@@ -1925,19 +1766,6 @@ static int ll_swap_layouts(struct file *file1, struct file *file2,
}
}
- /* to be able to restore mtime and atime after swap
- * we need to first save them
- */
- if (lsl->sl_flags &
- (SWAP_LAYOUTS_KEEP_MTIME | SWAP_LAYOUTS_KEEP_ATIME)) {
- llss->ia1.ia_mtime = llss->inode1->i_mtime;
- llss->ia1.ia_atime = llss->inode1->i_atime;
- llss->ia1.ia_valid = ATTR_MTIME | ATTR_ATIME;
- llss->ia2.ia_mtime = llss->inode2->i_mtime;
- llss->ia2.ia_atime = llss->inode2->i_atime;
- llss->ia2.ia_valid = ATTR_MTIME | ATTR_ATIME;
- }
-
/* ultimate check, before swapping the layouts we check if
* dataversion has changed (if requested)
*/
@@ -1987,39 +1815,6 @@ putgl:
ll_put_grouplock(llss->inode1, file1, gid);
}
- /* rc can be set from obd_iocontrol() or from a GOTO(putgl, ...) */
- if (rc != 0)
- goto free;
-
- /* clear useless flags */
- if (!(lsl->sl_flags & SWAP_LAYOUTS_KEEP_MTIME)) {
- llss->ia1.ia_valid &= ~ATTR_MTIME;
- llss->ia2.ia_valid &= ~ATTR_MTIME;
- }
-
- if (!(lsl->sl_flags & SWAP_LAYOUTS_KEEP_ATIME)) {
- llss->ia1.ia_valid &= ~ATTR_ATIME;
- llss->ia2.ia_valid &= ~ATTR_ATIME;
- }
-
- /* update time if requested */
- rc = 0;
- if (llss->ia2.ia_valid != 0) {
- inode_lock(llss->inode1);
- rc = ll_setattr(file1->f_path.dentry, &llss->ia2);
- inode_unlock(llss->inode1);
- }
-
- if (llss->ia1.ia_valid != 0) {
- int rc1;
-
- inode_lock(llss->inode2);
- rc1 = ll_setattr(file2->f_path.dentry, &llss->ia1);
- inode_unlock(llss->inode2);
- if (rc == 0)
- rc = rc1;
- }
-
free:
kfree(llss);
@@ -2176,24 +1971,52 @@ ll_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
sizeof(struct lustre_swap_layouts)))
return -EFAULT;
- if ((file->f_flags & O_ACCMODE) == 0) /* O_RDONLY */
+ if ((file->f_flags & O_ACCMODE) == O_RDONLY)
return -EPERM;
file2 = fget(lsl.sl_fd);
if (!file2)
return -EBADF;
- rc = -EPERM;
- if ((file2->f_flags & O_ACCMODE) != 0) /* O_WRONLY or O_RDWR */
+ /* O_WRONLY or O_RDWR */
+ if ((file2->f_flags & O_ACCMODE) == O_RDONLY) {
+ rc = -EPERM;
+ goto out;
+ }
+
+ if (lsl.sl_flags & SWAP_LAYOUTS_CLOSE) {
+ struct obd_client_handle *och = NULL;
+ struct ll_inode_info *lli;
+ struct inode *inode2;
+
+ if (lsl.sl_flags != SWAP_LAYOUTS_CLOSE) {
+ rc = -EINVAL;
+ goto out;
+ }
+
+ lli = ll_i2info(inode);
+ mutex_lock(&lli->lli_och_mutex);
+ if (fd->fd_lease_och) {
+ och = fd->fd_lease_och;
+ fd->fd_lease_och = NULL;
+ }
+ mutex_unlock(&lli->lli_och_mutex);
+ if (!och) {
+ rc = -ENOLCK;
+ goto out;
+ }
+ inode2 = file_inode(file2);
+ rc = ll_swap_layouts_close(och, inode, inode2);
+ } else {
rc = ll_swap_layouts(file, file2, &lsl);
+ }
+out:
fput(file2);
return rc;
}
case LL_IOC_LOV_GETSTRIPE:
return ll_file_getstripe(inode,
(struct lov_user_md __user *)arg);
- case FSFILT_IOC_FIEMAP:
- return ll_ioctl_fiemap(inode, arg);
case FSFILT_IOC_GETFLAGS:
case FSFILT_IOC_SETFLAGS:
return ll_iocontrol(inode, file, cmd, arg);
@@ -2489,17 +2312,17 @@ static int ll_flush(struct file *file, fl_owner_t id)
int cl_sync_file_range(struct inode *inode, loff_t start, loff_t end,
enum cl_fsync_mode mode, int ignore_layout)
{
- struct cl_env_nest nest;
struct lu_env *env;
struct cl_io *io;
struct cl_fsync_io *fio;
int result;
+ int refcheck;
if (mode != CL_FSYNC_NONE && mode != CL_FSYNC_LOCAL &&
mode != CL_FSYNC_DISCARD && mode != CL_FSYNC_ALL)
return -EINVAL;
- env = cl_env_nested_get(&nest);
+ env = cl_env_get(&refcheck);
if (IS_ERR(env))
return PTR_ERR(env);
@@ -2522,7 +2345,7 @@ int cl_sync_file_range(struct inode *inode, loff_t start, loff_t end,
if (result == 0)
result = fio->fi_nr_written;
cl_io_fini(env, io);
- cl_env_nested_put(&nest, env);
+ cl_env_put(env, &refcheck);
return result;
}
@@ -2549,9 +2372,11 @@ int ll_fsync(struct file *file, loff_t start, loff_t end, int datasync)
lli->lli_async_rc = 0;
if (rc == 0)
rc = err;
- err = lov_read_and_clear_async_rc(lli->lli_clob);
- if (rc == 0)
- rc = err;
+ if (lli->lli_clob) {
+ err = lov_read_and_clear_async_rc(lli->lli_clob);
+ if (rc == 0)
+ rc = err;
+ }
}
err = md_sync(ll_i2sbi(inode)->ll_md_exp, ll_inode2fid(inode), &req);
@@ -2588,7 +2413,7 @@ ll_file_flock(struct file *file, int cmd, struct file_lock *file_lock)
};
struct md_op_data *op_data;
struct lustre_handle lockh = {0};
- ldlm_policy_data_t flock = { {0} };
+ union ldlm_policy_data flock = { { 0 } };
int fl_type = file_lock->fl_type;
__u64 flags = 0;
int rc;
@@ -2707,7 +2532,8 @@ ll_file_flock(struct file *file, int cmd, struct file_lock *file_lock)
}
int ll_get_fid_by_name(struct inode *parent, const char *name,
- int namelen, struct lu_fid *fid)
+ int namelen, struct lu_fid *fid,
+ struct inode **inode)
{
struct md_op_data *op_data = NULL;
struct ptlrpc_request *req;
@@ -2719,7 +2545,7 @@ int ll_get_fid_by_name(struct inode *parent, const char *name,
if (IS_ERR(op_data))
return PTR_ERR(op_data);
- op_data->op_valid = OBD_MD_FLID;
+ op_data->op_valid = OBD_MD_FLID | OBD_MD_FLTYPE;
rc = md_getattr_name(ll_i2sbi(parent)->ll_md_exp, op_data, &req);
ll_finish_md_op_data(op_data);
if (rc < 0)
@@ -2732,6 +2558,9 @@ int ll_get_fid_by_name(struct inode *parent, const char *name,
}
if (fid)
*fid = body->mbo_fid1;
+
+ if (inode)
+ rc = ll_prep_inode(inode, req, parent->i_sb, NULL);
out_req:
ptlrpc_req_finished(req);
return rc;
@@ -2741,9 +2570,12 @@ int ll_migrate(struct inode *parent, struct file *file, int mdtidx,
const char *name, int namelen)
{
struct ptlrpc_request *request = NULL;
+ struct obd_client_handle *och = NULL;
struct inode *child_inode = NULL;
struct dentry *dchild = NULL;
struct md_op_data *op_data;
+ struct mdt_body *body;
+ u64 data_version = 0;
struct qstr qstr;
int rc;
@@ -2762,22 +2594,25 @@ int ll_migrate(struct inode *parent, struct file *file, int mdtidx,
dchild = d_lookup(file_dentry(file), &qstr);
if (dchild) {
op_data->op_fid3 = *ll_inode2fid(dchild->d_inode);
- if (dchild->d_inode) {
+ if (dchild->d_inode)
child_inode = igrab(dchild->d_inode);
- if (child_inode) {
- inode_lock(child_inode);
- op_data->op_fid3 = *ll_inode2fid(child_inode);
- ll_invalidate_aliases(child_inode);
- }
- }
dput(dchild);
- } else {
+ }
+
+ if (!child_inode) {
rc = ll_get_fid_by_name(parent, name, namelen,
- &op_data->op_fid3);
+ &op_data->op_fid3, &child_inode);
if (rc)
goto out_free;
}
+ if (!child_inode) {
+ rc = -EINVAL;
+ goto out_free;
+ }
+
+ inode_lock(child_inode);
+ op_data->op_fid3 = *ll_inode2fid(child_inode);
if (!fid_is_sane(&op_data->op_fid3)) {
CERROR("%s: migrate %s, but fid "DFID" is insane\n",
ll_get_fsname(parent->i_sb, NULL, 0), name,
@@ -2796,6 +2631,26 @@ int ll_migrate(struct inode *parent, struct file *file, int mdtidx,
rc = 0;
goto out_free;
}
+again:
+ if (S_ISREG(child_inode->i_mode)) {
+ och = ll_lease_open(child_inode, NULL, FMODE_WRITE, 0);
+ if (IS_ERR(och)) {
+ rc = PTR_ERR(och);
+ och = NULL;
+ goto out_free;
+ }
+
+ rc = ll_data_version(child_inode, &data_version,
+ LL_DV_WR_FLUSH);
+ if (rc)
+ goto out_free;
+
+ op_data->op_handle = och->och_fh;
+ op_data->op_data = och->och_mod;
+ op_data->op_data_version = data_version;
+ op_data->op_lease_handle = och->och_lease_handle;
+ op_data->op_bias |= MDS_RENAME_MIGRATE;
+ }
op_data->op_mds = mdtidx;
op_data->op_cli_flags = CLI_MIGRATE;
@@ -2804,10 +2659,32 @@ int ll_migrate(struct inode *parent, struct file *file, int mdtidx,
if (!rc)
ll_update_times(request, parent);
- ptlrpc_req_finished(request);
+ body = req_capsule_server_get(&request->rq_pill, &RMF_MDT_BODY);
+ if (!body) {
+ rc = -EPROTO;
+ goto out_free;
+ }
+
+ /*
+ * If the server does release layout lock, then we cleanup
+ * the client och here, otherwise release it in out_free:
+ */
+ if (och && body->mbo_valid & OBD_MD_CLOSE_INTENT_EXECED) {
+ obd_mod_put(och->och_mod);
+ md_clear_open_replay_data(ll_i2sbi(parent)->ll_md_exp, och);
+ och->och_fh.cookie = DEAD_HANDLE_MAGIC;
+ kfree(och);
+ och = NULL;
+ }
+ ptlrpc_req_finished(request);
+ /* Try again if the file layout has changed. */
+ if (rc == -EAGAIN && S_ISREG(child_inode->i_mode))
+ goto again;
out_free:
if (child_inode) {
+ if (och) /* close the file */
+ ll_lease_close(och, child_inode, NULL);
clear_nlink(child_inode);
inode_unlock(child_inode);
iput(child_inode);
@@ -2837,7 +2714,7 @@ int ll_have_md_lock(struct inode *inode, __u64 *bits,
enum ldlm_mode l_req_mode)
{
struct lustre_handle lockh;
- ldlm_policy_data_t policy;
+ union ldlm_policy_data policy;
enum ldlm_mode mode = (l_req_mode == LCK_MINMODE) ?
(LCK_CR | LCK_CW | LCK_PR | LCK_PW) : l_req_mode;
struct lu_fid *fid;
@@ -2878,7 +2755,7 @@ enum ldlm_mode ll_take_md_lock(struct inode *inode, __u64 bits,
struct lustre_handle *lockh, __u64 flags,
enum ldlm_mode mode)
{
- ldlm_policy_data_t policy = { .l_inodebits = {bits} };
+ union ldlm_policy_data policy = { .l_inodebits = { bits } };
struct lu_fid *fid;
fid = &ll_i2info(inode)->lli_fid;
@@ -2893,6 +2770,13 @@ static int ll_inode_revalidate_fini(struct inode *inode, int rc)
/* Already unlinked. Just update nlink and return success */
if (rc == -ENOENT) {
clear_nlink(inode);
+ /* If it is striped directory, and there is bad stripe
+ * Let's revalidate the dentry again, instead of returning
+ * error
+ */
+ if (S_ISDIR(inode->i_mode) && ll_i2info(inode)->lli_lsm_md)
+ return 0;
+
/* This path cannot be hit for regular files unless in
* case of obscure races, so no need to validate size.
*/
@@ -3040,6 +2924,8 @@ static int ll_inode_revalidate(struct dentry *dentry, __u64 ibits)
LTIME_S(inode->i_mtime) = ll_i2info(inode)->lli_mtime;
LTIME_S(inode->i_ctime) = ll_i2info(inode)->lli_ctime;
} else {
+ struct ll_inode_info *lli = ll_i2info(inode);
+
/* In case of restore, the MDT has the right size and has
* already send it back without granting the layout lock,
* inode is up-to-date so glimpse is useless.
@@ -3047,7 +2933,7 @@ static int ll_inode_revalidate(struct dentry *dentry, __u64 ibits)
* restore the MDT holds the layout lock so the glimpse will
* block up to the end of restore (getattr will block)
*/
- if (!(ll_i2info(inode)->lli_flags & LLIF_FILE_RESTORING))
+ if (!test_bit(LLIF_FILE_RESTORING, &lli->lli_flags))
rc = ll_glimpse_size(inode);
}
return rc;
@@ -3095,13 +2981,12 @@ static int ll_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
{
int rc;
size_t num_bytes;
- struct ll_user_fiemap *fiemap;
+ struct fiemap *fiemap;
unsigned int extent_count = fieinfo->fi_extents_max;
num_bytes = sizeof(*fiemap) + (extent_count *
- sizeof(struct ll_fiemap_extent));
+ sizeof(struct fiemap_extent));
fiemap = libcfs_kvzalloc(num_bytes, GFP_NOFS);
-
if (!fiemap)
return -ENOMEM;
@@ -3109,9 +2994,10 @@ static int ll_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
fiemap->fm_extent_count = fieinfo->fi_extents_max;
fiemap->fm_start = start;
fiemap->fm_length = len;
+
if (extent_count > 0 &&
copy_from_user(&fiemap->fm_extents[0], fieinfo->fi_extents_start,
- sizeof(struct ll_fiemap_extent)) != 0) {
+ sizeof(struct fiemap_extent))) {
rc = -EFAULT;
goto out;
}
@@ -3123,11 +3009,10 @@ static int ll_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
if (extent_count > 0 &&
copy_to_user(fieinfo->fi_extents_start, &fiemap->fm_extents[0],
fiemap->fm_mapped_extents *
- sizeof(struct ll_fiemap_extent)) != 0) {
+ sizeof(struct fiemap_extent))) {
rc = -EFAULT;
goto out;
}
-
out:
kvfree(fiemap);
return rc;
@@ -3370,35 +3255,50 @@ ll_iocontrol_call(struct inode *inode, struct file *file,
int ll_layout_conf(struct inode *inode, const struct cl_object_conf *conf)
{
struct ll_inode_info *lli = ll_i2info(inode);
- struct cl_env_nest nest;
+ struct cl_object *obj = lli->lli_clob;
struct lu_env *env;
- int result;
+ int rc;
+ int refcheck;
- if (!lli->lli_clob)
+ if (!obj)
return 0;
- env = cl_env_nested_get(&nest);
+ env = cl_env_get(&refcheck);
if (IS_ERR(env))
return PTR_ERR(env);
- result = cl_conf_set(env, lli->lli_clob, conf);
- cl_env_nested_put(&nest, env);
+ rc = cl_conf_set(env, obj, conf);
+ if (rc < 0)
+ goto out;
if (conf->coc_opc == OBJECT_CONF_SET) {
struct ldlm_lock *lock = conf->coc_lock;
+ struct cl_layout cl = {
+ .cl_layout_gen = 0,
+ };
LASSERT(lock);
LASSERT(ldlm_has_layout(lock));
- if (result == 0) {
- /* it can only be allowed to match after layout is
- * applied to inode otherwise false layout would be
- * seen. Applying layout should happen before dropping
- * the intent lock.
- */
- ldlm_lock_allow_match(lock);
- }
+
+ /* it can only be allowed to match after layout is
+ * applied to inode otherwise false layout would be
+ * seen. Applying layout should happen before dropping
+ * the intent lock.
+ */
+ ldlm_lock_allow_match(lock);
+
+ rc = cl_object_layout_get(env, obj, &cl);
+ if (rc < 0)
+ goto out;
+
+ CDEBUG(D_VFSTRACE, DFID ": layout version change: %u -> %u\n",
+ PFID(&lli->lli_fid), ll_layout_version_get(lli),
+ cl.cl_layout_gen);
+ ll_layout_version_set(lli, cl.cl_layout_gen);
}
- return result;
+out:
+ cl_env_put(env, &refcheck);
+ return rc;
}
/* Fetch layout from MDT with getxattr request, if it's not ready yet */
@@ -3477,12 +3377,11 @@ out:
* in this function.
*/
static int ll_layout_lock_set(struct lustre_handle *lockh, enum ldlm_mode mode,
- struct inode *inode, __u32 *gen, bool reconf)
+ struct inode *inode)
{
struct ll_inode_info *lli = ll_i2info(inode);
struct ll_sb_info *sbi = ll_i2sbi(inode);
struct ldlm_lock *lock;
- struct lustre_md md = { NULL };
struct cl_object_conf conf;
int rc = 0;
bool lvb_ready;
@@ -3494,8 +3393,8 @@ static int ll_layout_lock_set(struct lustre_handle *lockh, enum ldlm_mode mode,
LASSERT(lock);
LASSERT(ldlm_has_layout(lock));
- LDLM_DEBUG(lock, "File "DFID"(%p) being reconfigured: %d",
- PFID(&lli->lli_fid), inode, reconf);
+ LDLM_DEBUG(lock, "File " DFID "(%p) being reconfigured",
+ PFID(&lli->lli_fid), inode);
/* in case this is a caching lock and reinstate with new inode */
md_set_lock_data(sbi->ll_md_exp, lockh, inode, NULL);
@@ -3506,15 +3405,8 @@ static int ll_layout_lock_set(struct lustre_handle *lockh, enum ldlm_mode mode,
/* checking lvb_ready is racy but this is okay. The worst case is
* that multi processes may configure the file on the same time.
*/
- if (lvb_ready || !reconf) {
- rc = -ENODATA;
- if (lvb_ready) {
- /* layout_gen must be valid if layout lock is not
- * cancelled and stripe has already set
- */
- *gen = ll_layout_version_get(lli);
- rc = 0;
- }
+ if (lvb_ready) {
+ rc = 0;
goto out;
}
@@ -3524,39 +3416,19 @@ static int ll_layout_lock_set(struct lustre_handle *lockh, enum ldlm_mode mode,
/* for layout lock, lmm is returned in lock's lvb.
* lvb_data is immutable if the lock is held so it's safe to access it
- * without res lock. See the description in ldlm_lock_decref_internal()
- * for the condition to free lvb_data of layout lock
- */
- if (lock->l_lvb_data) {
- rc = obd_unpackmd(sbi->ll_dt_exp, &md.lsm,
- lock->l_lvb_data, lock->l_lvb_len);
- if (rc >= 0) {
- *gen = LL_LAYOUT_GEN_EMPTY;
- if (md.lsm)
- *gen = md.lsm->lsm_layout_gen;
- rc = 0;
- } else {
- CERROR("%s: file " DFID " unpackmd error: %d\n",
- ll_get_fsname(inode->i_sb, NULL, 0),
- PFID(&lli->lli_fid), rc);
- }
- }
- if (rc < 0)
- goto out;
-
- /* set layout to file. Unlikely this will fail as old layout was
+ * without res lock.
+ *
+ * set layout to file. Unlikely this will fail as old layout was
* surely eliminated
*/
memset(&conf, 0, sizeof(conf));
conf.coc_opc = OBJECT_CONF_SET;
conf.coc_inode = inode;
conf.coc_lock = lock;
- conf.u.coc_md = &md;
+ conf.u.coc_layout.lb_buf = lock->l_lvb_data;
+ conf.u.coc_layout.lb_len = lock->l_lvb_len;
rc = ll_layout_conf(inode, &conf);
- if (md.lsm)
- obd_free_memmd(sbi->ll_dt_exp, &md.lsm);
-
/* refresh layout failed, need to wait */
wait_layout = rc == -EBUSY;
@@ -3584,20 +3456,7 @@ out:
return rc;
}
-/**
- * This function checks if there exists a LAYOUT lock on the client side,
- * or enqueues it if it doesn't have one in cache.
- *
- * This function will not hold layout lock so it may be revoked any time after
- * this function returns. Any operations depend on layout should be redone
- * in that case.
- *
- * This function should be called before lov_io_init() to get an uptodate
- * layout version, the caller should save the version number and after IO
- * is finished, this function should be called again to verify that layout
- * is not changed during IO time.
- */
-int ll_layout_refresh(struct inode *inode, __u32 *gen)
+static int ll_layout_refresh_locked(struct inode *inode)
{
struct ll_inode_info *lli = ll_i2info(inode);
struct ll_sb_info *sbi = ll_i2sbi(inode);
@@ -3613,17 +3472,6 @@ int ll_layout_refresh(struct inode *inode, __u32 *gen)
};
int rc;
- *gen = ll_layout_version_get(lli);
- if (!(sbi->ll_flags & LL_SBI_LAYOUT_LOCK) || *gen != LL_LAYOUT_GEN_NONE)
- return 0;
-
- /* sanity checks */
- LASSERT(fid_is_sane(ll_inode2fid(inode)));
- LASSERT(S_ISREG(inode->i_mode));
-
- /* take layout lock mutex to enqueue layout lock exclusively. */
- mutex_lock(&lli->lli_layout_mutex);
-
again:
/* mostly layout lock is caching on the local side, so try to match
* it before grabbing layout lock mutex.
@@ -3631,20 +3479,16 @@ again:
mode = ll_take_md_lock(inode, MDS_INODELOCK_LAYOUT, &lockh, 0,
LCK_CR | LCK_CW | LCK_PR | LCK_PW);
if (mode != 0) { /* hit cached lock */
- rc = ll_layout_lock_set(&lockh, mode, inode, gen, true);
+ rc = ll_layout_lock_set(&lockh, mode, inode);
if (rc == -EAGAIN)
goto again;
-
- mutex_unlock(&lli->lli_layout_mutex);
return rc;
}
op_data = ll_prep_md_op_data(NULL, inode, inode, NULL,
0, 0, LUSTRE_OPC_ANY, NULL);
- if (IS_ERR(op_data)) {
- mutex_unlock(&lli->lli_layout_mutex);
+ if (IS_ERR(op_data))
return PTR_ERR(op_data);
- }
/* have to enqueue one */
memset(&it, 0, sizeof(it));
@@ -3668,10 +3512,50 @@ again:
if (rc == 0) {
/* set lock data in case this is a new lock */
ll_set_lock_data(sbi->ll_md_exp, inode, &it, NULL);
- rc = ll_layout_lock_set(&lockh, mode, inode, gen, true);
+ rc = ll_layout_lock_set(&lockh, mode, inode);
if (rc == -EAGAIN)
goto again;
}
+
+ return rc;
+}
+
+/**
+ * This function checks if there exists a LAYOUT lock on the client side,
+ * or enqueues it if it doesn't have one in cache.
+ *
+ * This function will not hold layout lock so it may be revoked any time after
+ * this function returns. Any operations depend on layout should be redone
+ * in that case.
+ *
+ * This function should be called before lov_io_init() to get an uptodate
+ * layout version, the caller should save the version number and after IO
+ * is finished, this function should be called again to verify that layout
+ * is not changed during IO time.
+ */
+int ll_layout_refresh(struct inode *inode, __u32 *gen)
+{
+ struct ll_inode_info *lli = ll_i2info(inode);
+ struct ll_sb_info *sbi = ll_i2sbi(inode);
+ int rc;
+
+ *gen = ll_layout_version_get(lli);
+ if (!(sbi->ll_flags & LL_SBI_LAYOUT_LOCK) || *gen != CL_LAYOUT_GEN_NONE)
+ return 0;
+
+ /* sanity checks */
+ LASSERT(fid_is_sane(ll_inode2fid(inode)));
+ LASSERT(S_ISREG(inode->i_mode));
+
+ /* take layout lock mutex to enqueue layout lock exclusively. */
+ mutex_lock(&lli->lli_layout_mutex);
+
+ rc = ll_layout_refresh_locked(inode);
+ if (rc < 0)
+ goto out;
+
+ *gen = ll_layout_version_get(lli);
+out:
mutex_unlock(&lli->lli_layout_mutex);
return rc;
diff --git a/drivers/staging/lustre/lustre/llite/glimpse.c b/drivers/staging/lustre/lustre/llite/glimpse.c
index 22507b9c6d69..504498de536e 100644
--- a/drivers/staging/lustre/lustre/llite/glimpse.c
+++ b/drivers/staging/lustre/lustre/llite/glimpse.c
@@ -80,69 +80,60 @@ blkcnt_t dirty_cnt(struct inode *inode)
int cl_glimpse_lock(const struct lu_env *env, struct cl_io *io,
struct inode *inode, struct cl_object *clob, int agl)
{
- struct ll_inode_info *lli = ll_i2info(inode);
const struct lu_fid *fid = lu_object_fid(&clob->co_lu);
- int result;
+ struct cl_lock *lock = vvp_env_lock(env);
+ struct cl_lock_descr *descr = &lock->cll_descr;
+ int result = 0;
+
+ CDEBUG(D_DLMTRACE, "Glimpsing inode " DFID "\n", PFID(fid));
+
+ /* NOTE: this looks like DLM lock request, but it may
+ * not be one. Due to CEF_ASYNC flag (translated
+ * to LDLM_FL_HAS_INTENT by osc), this is
+ * glimpse request, that won't revoke any
+ * conflicting DLM locks held. Instead,
+ * ll_glimpse_callback() will be called on each
+ * client holding a DLM lock against this file,
+ * and resulting size will be returned for each
+ * stripe. DLM lock on [0, EOF] is acquired only
+ * if there were no conflicting locks. If there
+ * were conflicting locks, enqueuing or waiting
+ * fails with -ENAVAIL, but valid inode
+ * attributes are returned anyway.
+ */
+ *descr = whole_file;
+ descr->cld_obj = clob;
+ descr->cld_mode = CLM_READ;
+ descr->cld_enq_flags = CEF_ASYNC | CEF_MUST;
+ if (agl)
+ descr->cld_enq_flags |= CEF_AGL;
+ /*
+ * CEF_ASYNC is used because glimpse sub-locks cannot
+ * deadlock (because they never conflict with other
+ * locks) and, hence, can be enqueued out-of-order.
+ *
+ * CEF_MUST protects glimpse lock from conversion into
+ * a lockless mode.
+ */
+ result = cl_lock_request(env, io, lock);
+ if (result < 0)
+ return result;
- result = 0;
- if (!(lli->lli_flags & LLIF_MDS_SIZE_LOCK)) {
- CDEBUG(D_DLMTRACE, "Glimpsing inode " DFID "\n", PFID(fid));
- if (lli->lli_has_smd) {
- struct cl_lock *lock = vvp_env_lock(env);
- struct cl_lock_descr *descr = &lock->cll_descr;
-
- /* NOTE: this looks like DLM lock request, but it may
- * not be one. Due to CEF_ASYNC flag (translated
- * to LDLM_FL_HAS_INTENT by osc), this is
- * glimpse request, that won't revoke any
- * conflicting DLM locks held. Instead,
- * ll_glimpse_callback() will be called on each
- * client holding a DLM lock against this file,
- * and resulting size will be returned for each
- * stripe. DLM lock on [0, EOF] is acquired only
- * if there were no conflicting locks. If there
- * were conflicting locks, enqueuing or waiting
- * fails with -ENAVAIL, but valid inode
- * attributes are returned anyway.
- */
- *descr = whole_file;
- descr->cld_obj = clob;
- descr->cld_mode = CLM_READ;
- descr->cld_enq_flags = CEF_ASYNC | CEF_MUST;
- if (agl)
- descr->cld_enq_flags |= CEF_AGL;
+ if (!agl) {
+ ll_merge_attr(env, inode);
+ if (i_size_read(inode) > 0 && !inode->i_blocks) {
/*
- * CEF_ASYNC is used because glimpse sub-locks cannot
- * deadlock (because they never conflict with other
- * locks) and, hence, can be enqueued out-of-order.
- *
- * CEF_MUST protects glimpse lock from conversion into
- * a lockless mode.
+ * LU-417: Add dirty pages block count
+ * lest i_blocks reports 0, some "cp" or
+ * "tar" may think it's a completely
+ * sparse file and skip it.
*/
- result = cl_lock_request(env, io, lock);
- if (result < 0)
- return result;
-
- if (!agl) {
- ll_merge_attr(env, inode);
- if (i_size_read(inode) > 0 &&
- inode->i_blocks == 0) {
- /*
- * LU-417: Add dirty pages block count
- * lest i_blocks reports 0, some "cp" or
- * "tar" may think it's a completely
- * sparse file and skip it.
- */
- inode->i_blocks = dirty_cnt(inode);
- }
- }
- cl_lock_release(env, lock);
- } else {
- CDEBUG(D_DLMTRACE, "No objects for inode\n");
- ll_merge_attr(env, inode);
+ inode->i_blocks = dirty_cnt(inode);
}
}
+ cl_lock_release(env, lock);
+
return result;
}
@@ -212,39 +203,3 @@ again:
}
return result;
}
-
-int cl_local_size(struct inode *inode)
-{
- struct lu_env *env = NULL;
- struct cl_io *io = NULL;
- struct cl_object *clob;
- int result;
- int refcheck;
-
- if (!ll_i2info(inode)->lli_has_smd)
- return 0;
-
- result = cl_io_get(inode, &env, &io, &refcheck);
- if (result <= 0)
- return result;
-
- clob = io->ci_obj;
- result = cl_io_init(env, io, CIT_MISC, clob);
- if (result > 0) {
- result = io->ci_result;
- } else if (result == 0) {
- struct cl_lock *lock = vvp_env_lock(env);
-
- lock->cll_descr = whole_file;
- lock->cll_descr.cld_enq_flags = CEF_PEEK;
- lock->cll_descr.cld_obj = clob;
- result = cl_lock_request(env, io, lock);
- if (result == 0) {
- ll_merge_attr(env, inode);
- cl_lock_release(env, lock);
- }
- }
- cl_io_fini(env, io);
- cl_env_put(env, &refcheck);
- return result;
-}
diff --git a/drivers/staging/lustre/lustre/llite/lcommon_cl.c b/drivers/staging/lustre/lustre/llite/lcommon_cl.c
index 084330d08f7a..dd1cfd8f5213 100644
--- a/drivers/staging/lustre/lustre/llite/lcommon_cl.c
+++ b/drivers/staging/lustre/lustre/llite/lcommon_cl.c
@@ -80,7 +80,8 @@ int cl_inode_fini_refcheck;
*/
static DEFINE_MUTEX(cl_inode_fini_guard);
-int cl_setattr_ost(struct inode *inode, const struct iattr *attr)
+int cl_setattr_ost(struct cl_object *obj, const struct iattr *attr,
+ unsigned int attr_flags)
{
struct lu_env *env;
struct cl_io *io;
@@ -92,14 +93,15 @@ int cl_setattr_ost(struct inode *inode, const struct iattr *attr)
return PTR_ERR(env);
io = vvp_env_thread_io(env);
- io->ci_obj = ll_i2info(inode)->lli_clob;
+ io->ci_obj = obj;
io->u.ci_setattr.sa_attr.lvb_atime = LTIME_S(attr->ia_atime);
io->u.ci_setattr.sa_attr.lvb_mtime = LTIME_S(attr->ia_mtime);
io->u.ci_setattr.sa_attr.lvb_ctime = LTIME_S(attr->ia_ctime);
io->u.ci_setattr.sa_attr.lvb_size = attr->ia_size;
+ io->u.ci_setattr.sa_attr_flags = attr_flags;
io->u.ci_setattr.sa_valid = attr->ia_valid;
- io->u.ci_setattr.sa_parent_fid = ll_inode2fid(inode);
+ io->u.ci_setattr.sa_parent_fid = lu_object_fid(&obj->co_lu);
again:
if (cl_io_init(env, io, CIT_SETATTR, io->ci_obj) == 0) {
@@ -148,7 +150,7 @@ int cl_file_inode_init(struct inode *inode, struct lustre_md *md)
struct cl_object_conf conf = {
.coc_inode = inode,
.u = {
- .coc_md = md
+ .coc_layout = md->layout,
}
};
int result = 0;
@@ -182,7 +184,6 @@ int cl_file_inode_init(struct inode *inode, struct lustre_md *md)
* locked by I_NEW bit.
*/
lli->lli_clob = clob;
- lli->lli_has_smd = lsm_has_objects(md->lsm);
lu_object_ref_add(&clob->co_lu, "inode", inode);
} else {
result = PTR_ERR(clob);
@@ -245,15 +246,11 @@ void cl_inode_fini(struct inode *inode)
int emergency;
if (clob) {
- void *cookie;
-
- cookie = cl_env_reenter();
env = cl_env_get(&refcheck);
emergency = IS_ERR(env);
if (emergency) {
mutex_lock(&cl_inode_fini_guard);
LASSERT(cl_inode_fini_env);
- cl_env_implant(cl_inode_fini_env, &refcheck);
env = cl_inode_fini_env;
}
/*
@@ -265,13 +262,10 @@ void cl_inode_fini(struct inode *inode)
lu_object_ref_del(&clob->co_lu, "inode", inode);
cl_object_put_last(env, clob);
lli->lli_clob = NULL;
- if (emergency) {
- cl_env_unplant(cl_inode_fini_env, &refcheck);
+ if (emergency)
mutex_unlock(&cl_inode_fini_guard);
- } else {
+ else
cl_env_put(env, &refcheck);
- }
- cl_env_reexit(cookie);
}
}
@@ -302,22 +296,3 @@ __u32 cl_fid_build_gen(const struct lu_fid *fid)
gen = fid_flatten(fid) >> 32;
return gen;
}
-
-/* lsm is unreliable after hsm implementation as layout can be changed at
- * any time. This is only to support old, non-clio-ized interfaces. It will
- * cause deadlock if clio operations are called with this extra layout refcount
- * because in case the layout changed during the IO, ll_layout_refresh() will
- * have to wait for the refcount to become zero to destroy the older layout.
- *
- * Notice that the lsm returned by this function may not be valid unless called
- * inside layout lock - MDS_INODELOCK_LAYOUT.
- */
-struct lov_stripe_md *ccc_inode_lsm_get(struct inode *inode)
-{
- return lov_lsm_get(ll_i2info(inode)->lli_clob);
-}
-
-inline void ccc_inode_lsm_put(struct inode *inode, struct lov_stripe_md *lsm)
-{
- lov_lsm_put(ll_i2info(inode)->lli_clob, lsm);
-}
diff --git a/drivers/staging/lustre/lustre/llite/lcommon_misc.c b/drivers/staging/lustre/lustre/llite/lcommon_misc.c
index fb346c12dad2..f48660ed350f 100644
--- a/drivers/staging/lustre/lustre/llite/lcommon_misc.c
+++ b/drivers/staging/lustre/lustre/llite/lcommon_misc.c
@@ -47,36 +47,29 @@
*/
int cl_init_ea_size(struct obd_export *md_exp, struct obd_export *dt_exp)
{
- struct lov_stripe_md lsm = { .lsm_magic = LOV_MAGIC_V3 };
- __u32 valsize = sizeof(struct lov_desc);
- int rc, easize, def_easize, cookiesize;
- struct lov_desc desc;
- __u16 stripes, def_stripes;
-
- rc = obd_get_info(NULL, dt_exp, sizeof(KEY_LOVDESC), KEY_LOVDESC,
- &valsize, &desc, NULL);
+ u32 val_size, max_easize, def_easize;
+ int rc;
+
+ val_size = sizeof(max_easize);
+ rc = obd_get_info(NULL, dt_exp, sizeof(KEY_MAX_EASIZE), KEY_MAX_EASIZE,
+ &val_size, &max_easize);
if (rc)
return rc;
- stripes = min_t(__u32, desc.ld_tgt_count, LOV_MAX_STRIPE_COUNT);
- lsm.lsm_stripe_count = stripes;
- easize = obd_size_diskmd(dt_exp, &lsm);
-
- def_stripes = min_t(__u32, desc.ld_default_stripe_count,
- LOV_MAX_STRIPE_COUNT);
- lsm.lsm_stripe_count = def_stripes;
- def_easize = obd_size_diskmd(dt_exp, &lsm);
-
- cookiesize = stripes * sizeof(struct llog_cookie);
+ val_size = sizeof(def_easize);
+ rc = obd_get_info(NULL, dt_exp, sizeof(KEY_DEFAULT_EASIZE),
+ KEY_DEFAULT_EASIZE, &val_size, &def_easize);
+ if (rc)
+ return rc;
- /* default cookiesize is 0 because from 2.4 server doesn't send
+ /*
+ * default cookiesize is 0 because from 2.4 server doesn't send
* llog cookies to client.
*/
- CDEBUG(D_HA,
- "updating def/max_easize: %d/%d def/max_cookiesize: 0/%d\n",
- def_easize, easize, cookiesize);
+ CDEBUG(D_HA, "updating def/max_easize: %d/%d\n",
+ def_easize, max_easize);
- rc = md_init_ea_size(md_exp, easize, def_easize, cookiesize, 0);
+ rc = md_init_ea_size(md_exp, max_easize, def_easize);
return rc;
}
@@ -169,13 +162,11 @@ int cl_get_grouplock(struct cl_object *obj, unsigned long gid, int nonblock,
return rc;
}
- cg->lg_env = cl_env_get(&refcheck);
+ cg->lg_env = env;
cg->lg_io = io;
cg->lg_lock = lock;
cg->lg_gid = gid;
- LASSERT(cg->lg_env == env);
- cl_env_unplant(env, &refcheck);
return 0;
}
@@ -184,14 +175,10 @@ void cl_put_grouplock(struct ll_grouplock *cg)
struct lu_env *env = cg->lg_env;
struct cl_io *io = cg->lg_io;
struct cl_lock *lock = cg->lg_lock;
- int refcheck;
LASSERT(cg->lg_env);
LASSERT(cg->lg_gid);
- cl_env_implant(env, &refcheck);
- cl_env_put(env, &refcheck);
-
cl_lock_release(env, lock);
cl_io_fini(env, io);
cl_env_put(env, NULL);
diff --git a/drivers/staging/lustre/lustre/llite/llite_close.c b/drivers/staging/lustre/lustre/llite/llite_close.c
deleted file mode 100644
index 8644631bf2ba..000000000000
--- a/drivers/staging/lustre/lustre/llite/llite_close.c
+++ /dev/null
@@ -1,395 +0,0 @@
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2012, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * lustre/llite/llite_close.c
- *
- * Lustre Lite routines to issue a secondary close after writeback
- */
-
-#include <linux/module.h>
-
-#define DEBUG_SUBSYSTEM S_LLITE
-
-#include "llite_internal.h"
-
-/** records that a write is in flight */
-void vvp_write_pending(struct vvp_object *club, struct vvp_page *page)
-{
- struct ll_inode_info *lli = ll_i2info(club->vob_inode);
-
- spin_lock(&lli->lli_lock);
- lli->lli_flags |= LLIF_SOM_DIRTY;
- if (page && list_empty(&page->vpg_pending_linkage))
- list_add(&page->vpg_pending_linkage, &club->vob_pending_list);
- spin_unlock(&lli->lli_lock);
-}
-
-/** records that a write has completed */
-void vvp_write_complete(struct vvp_object *club, struct vvp_page *page)
-{
- struct ll_inode_info *lli = ll_i2info(club->vob_inode);
- int rc = 0;
-
- spin_lock(&lli->lli_lock);
- if (page && !list_empty(&page->vpg_pending_linkage)) {
- list_del_init(&page->vpg_pending_linkage);
- rc = 1;
- }
- spin_unlock(&lli->lli_lock);
- if (rc)
- ll_queue_done_writing(club->vob_inode, 0);
-}
-
-/** Queues DONE_WRITING if
- * - done writing is allowed;
- * - inode has no no dirty pages;
- */
-void ll_queue_done_writing(struct inode *inode, unsigned long flags)
-{
- struct ll_inode_info *lli = ll_i2info(inode);
- struct vvp_object *club = cl2vvp(ll_i2info(inode)->lli_clob);
-
- spin_lock(&lli->lli_lock);
- lli->lli_flags |= flags;
-
- if ((lli->lli_flags & LLIF_DONE_WRITING) &&
- list_empty(&club->vob_pending_list)) {
- struct ll_close_queue *lcq = ll_i2sbi(inode)->ll_lcq;
-
- if (lli->lli_flags & LLIF_MDS_SIZE_LOCK)
- CWARN("%s: file "DFID"(flags %u) Size-on-MDS valid, done writing allowed and no diry pages\n",
- ll_get_fsname(inode->i_sb, NULL, 0),
- PFID(ll_inode2fid(inode)), lli->lli_flags);
- /* DONE_WRITING is allowed and inode has no dirty page. */
- spin_lock(&lcq->lcq_lock);
-
- LASSERT(list_empty(&lli->lli_close_list));
- CDEBUG(D_INODE, "adding inode "DFID" to close list\n",
- PFID(ll_inode2fid(inode)));
- list_add_tail(&lli->lli_close_list, &lcq->lcq_head);
-
- /* Avoid a concurrent insertion into the close thread queue:
- * an inode is already in the close thread, open(), write(),
- * close() happen, epoch is closed as the inode is marked as
- * LLIF_EPOCH_PENDING. When pages are written inode should not
- * be inserted into the queue again, clear this flag to avoid
- * it.
- */
- lli->lli_flags &= ~LLIF_DONE_WRITING;
-
- wake_up(&lcq->lcq_waitq);
- spin_unlock(&lcq->lcq_lock);
- }
- spin_unlock(&lli->lli_lock);
-}
-
-/** Pack SOM attributes info @opdata for CLOSE, DONE_WRITING rpc. */
-void ll_done_writing_attr(struct inode *inode, struct md_op_data *op_data)
-{
- struct ll_inode_info *lli = ll_i2info(inode);
-
- op_data->op_flags |= MF_SOM_CHANGE;
- /* Check if Size-on-MDS attributes are valid. */
- if (lli->lli_flags & LLIF_MDS_SIZE_LOCK)
- CERROR("%s: inode "DFID"(flags %u) MDS holds lock on Size-on-MDS attributes\n",
- ll_get_fsname(inode->i_sb, NULL, 0),
- PFID(ll_inode2fid(inode)), lli->lli_flags);
-
- if (!cl_local_size(inode)) {
- /* Send Size-on-MDS Attributes if valid. */
- op_data->op_attr.ia_valid |= ATTR_MTIME_SET | ATTR_CTIME_SET |
- ATTR_ATIME_SET | ATTR_SIZE | ATTR_BLOCKS;
- }
-}
-
-/** Closes ioepoch and packs Size-on-MDS attribute if needed into @op_data. */
-void ll_ioepoch_close(struct inode *inode, struct md_op_data *op_data,
- struct obd_client_handle **och, unsigned long flags)
-{
- struct ll_inode_info *lli = ll_i2info(inode);
- struct vvp_object *club = cl2vvp(ll_i2info(inode)->lli_clob);
-
- spin_lock(&lli->lli_lock);
- if (!(list_empty(&club->vob_pending_list))) {
- if (!(lli->lli_flags & LLIF_EPOCH_PENDING)) {
- LASSERT(*och);
- LASSERT(!lli->lli_pending_och);
- /* Inode is dirty and there is no pending write done
- * request yet, DONE_WRITE is to be sent later.
- */
- lli->lli_flags |= LLIF_EPOCH_PENDING;
- lli->lli_pending_och = *och;
- spin_unlock(&lli->lli_lock);
-
- inode = igrab(inode);
- LASSERT(inode);
- goto out;
- }
- if (flags & LLIF_DONE_WRITING) {
- /* Some pages are still dirty, it is early to send
- * DONE_WRITE. Wait until all pages will be flushed
- * and try DONE_WRITE again later.
- */
- LASSERT(!(lli->lli_flags & LLIF_DONE_WRITING));
- lli->lli_flags |= LLIF_DONE_WRITING;
- spin_unlock(&lli->lli_lock);
-
- inode = igrab(inode);
- LASSERT(inode);
- goto out;
- }
- }
- CDEBUG(D_INODE, "Epoch %llu closed on "DFID"\n",
- ll_i2info(inode)->lli_ioepoch, PFID(&lli->lli_fid));
- op_data->op_flags |= MF_EPOCH_CLOSE;
-
- if (flags & LLIF_DONE_WRITING) {
- LASSERT(lli->lli_flags & LLIF_SOM_DIRTY);
- LASSERT(!(lli->lli_flags & LLIF_DONE_WRITING));
- *och = lli->lli_pending_och;
- lli->lli_pending_och = NULL;
- lli->lli_flags &= ~LLIF_EPOCH_PENDING;
- } else {
- /* Pack Size-on-MDS inode attributes only if they has changed */
- if (!(lli->lli_flags & LLIF_SOM_DIRTY)) {
- spin_unlock(&lli->lli_lock);
- goto out;
- }
-
- /* There is a pending DONE_WRITE -- close epoch with no
- * attribute change.
- */
- if (lli->lli_flags & LLIF_EPOCH_PENDING) {
- spin_unlock(&lli->lli_lock);
- goto out;
- }
- }
-
- LASSERT(list_empty(&club->vob_pending_list));
- lli->lli_flags &= ~LLIF_SOM_DIRTY;
- spin_unlock(&lli->lli_lock);
- ll_done_writing_attr(inode, op_data);
-
-out:
- return;
-}
-
-/**
- * Cliens updates SOM attributes on MDS (including llog cookies):
- * obd_getattr with no lock and md_setattr.
- */
-int ll_som_update(struct inode *inode, struct md_op_data *op_data)
-{
- struct ll_inode_info *lli = ll_i2info(inode);
- struct ptlrpc_request *request = NULL;
- __u32 old_flags;
- struct obdo *oa;
- int rc;
-
- LASSERT(op_data);
- if (lli->lli_flags & LLIF_MDS_SIZE_LOCK)
- CERROR("%s: inode "DFID"(flags %u) MDS holds lock on Size-on-MDS attributes\n",
- ll_get_fsname(inode->i_sb, NULL, 0),
- PFID(ll_inode2fid(inode)), lli->lli_flags);
-
- oa = kmem_cache_zalloc(obdo_cachep, GFP_NOFS);
- if (!oa) {
- CERROR("can't allocate memory for Size-on-MDS update.\n");
- return -ENOMEM;
- }
-
- old_flags = op_data->op_flags;
- op_data->op_flags = MF_SOM_CHANGE;
-
- /* If inode is already in another epoch, skip getattr from OSTs. */
- if (lli->lli_ioepoch == op_data->op_ioepoch) {
- rc = ll_inode_getattr(inode, oa, op_data->op_ioepoch,
- old_flags & MF_GETATTR_LOCK);
- if (rc) {
- oa->o_valid = 0;
- if (rc != -ENOENT)
- CERROR("%s: inode_getattr failed - unable to send a Size-on-MDS attribute update for inode "DFID": rc = %d\n",
- ll_get_fsname(inode->i_sb, NULL, 0),
- PFID(ll_inode2fid(inode)), rc);
- } else {
- CDEBUG(D_INODE, "Size-on-MDS update on "DFID"\n",
- PFID(&lli->lli_fid));
- }
- /* Install attributes into op_data. */
- md_from_obdo(op_data, oa, oa->o_valid);
- }
-
- rc = md_setattr(ll_i2sbi(inode)->ll_md_exp, op_data,
- NULL, 0, NULL, 0, &request, NULL);
- ptlrpc_req_finished(request);
-
- kmem_cache_free(obdo_cachep, oa);
- return rc;
-}
-
-/**
- * Closes the ioepoch and packs all the attributes into @op_data for
- * DONE_WRITING rpc.
- */
-static void ll_prepare_done_writing(struct inode *inode,
- struct md_op_data *op_data,
- struct obd_client_handle **och)
-{
- ll_ioepoch_close(inode, op_data, och, LLIF_DONE_WRITING);
- /* If there is no @och, we do not do D_W yet. */
- if (!*och)
- return;
-
- ll_pack_inode2opdata(inode, op_data, &(*och)->och_fh);
- ll_prep_md_op_data(op_data, inode, NULL, NULL,
- 0, 0, LUSTRE_OPC_ANY, NULL);
-}
-
-/** Send a DONE_WRITING rpc. */
-static void ll_done_writing(struct inode *inode)
-{
- struct obd_client_handle *och = NULL;
- struct md_op_data *op_data;
- int rc;
-
- LASSERT(exp_connect_som(ll_i2mdexp(inode)));
-
- op_data = kzalloc(sizeof(*op_data), GFP_NOFS);
- if (!op_data)
- return;
-
- ll_prepare_done_writing(inode, op_data, &och);
- /* If there is no @och, we do not do D_W yet. */
- if (!och)
- goto out;
-
- rc = md_done_writing(ll_i2sbi(inode)->ll_md_exp, op_data, NULL);
- if (rc == -EAGAIN)
- /* MDS has instructed us to obtain Size-on-MDS attribute from
- * OSTs and send setattr to back to MDS.
- */
- rc = ll_som_update(inode, op_data);
- else if (rc) {
- CERROR("%s: inode "DFID" mdc done_writing failed: rc = %d\n",
- ll_get_fsname(inode->i_sb, NULL, 0),
- PFID(ll_inode2fid(inode)), rc);
- }
-out:
- ll_finish_md_op_data(op_data);
- if (och) {
- md_clear_open_replay_data(ll_i2sbi(inode)->ll_md_exp, och);
- kfree(och);
- }
-}
-
-static struct ll_inode_info *ll_close_next_lli(struct ll_close_queue *lcq)
-{
- struct ll_inode_info *lli = NULL;
-
- spin_lock(&lcq->lcq_lock);
-
- if (!list_empty(&lcq->lcq_head)) {
- lli = list_entry(lcq->lcq_head.next, struct ll_inode_info,
- lli_close_list);
- list_del_init(&lli->lli_close_list);
- } else if (atomic_read(&lcq->lcq_stop)) {
- lli = ERR_PTR(-EALREADY);
- }
-
- spin_unlock(&lcq->lcq_lock);
- return lli;
-}
-
-static int ll_close_thread(void *arg)
-{
- struct ll_close_queue *lcq = arg;
-
- complete(&lcq->lcq_comp);
-
- while (1) {
- struct l_wait_info lwi = { 0 };
- struct ll_inode_info *lli;
- struct inode *inode;
-
- l_wait_event_exclusive(lcq->lcq_waitq,
- (lli = ll_close_next_lli(lcq)) != NULL,
- &lwi);
- if (IS_ERR(lli))
- break;
-
- inode = ll_info2i(lli);
- CDEBUG(D_INFO, "done_writing for inode "DFID"\n",
- PFID(ll_inode2fid(inode)));
- ll_done_writing(inode);
- iput(inode);
- }
-
- CDEBUG(D_INFO, "ll_close exiting\n");
- complete(&lcq->lcq_comp);
- return 0;
-}
-
-int ll_close_thread_start(struct ll_close_queue **lcq_ret)
-{
- struct ll_close_queue *lcq;
- struct task_struct *task;
-
- if (OBD_FAIL_CHECK(OBD_FAIL_LDLM_CLOSE_THREAD))
- return -EINTR;
-
- lcq = kzalloc(sizeof(*lcq), GFP_NOFS);
- if (!lcq)
- return -ENOMEM;
-
- spin_lock_init(&lcq->lcq_lock);
- INIT_LIST_HEAD(&lcq->lcq_head);
- init_waitqueue_head(&lcq->lcq_waitq);
- init_completion(&lcq->lcq_comp);
-
- task = kthread_run(ll_close_thread, lcq, "ll_close");
- if (IS_ERR(task)) {
- kfree(lcq);
- return PTR_ERR(task);
- }
-
- wait_for_completion(&lcq->lcq_comp);
- *lcq_ret = lcq;
- return 0;
-}
-
-void ll_close_thread_shutdown(struct ll_close_queue *lcq)
-{
- init_completion(&lcq->lcq_comp);
- atomic_inc(&lcq->lcq_stop);
- wake_up(&lcq->lcq_waitq);
- wait_for_completion(&lcq->lcq_comp);
- kfree(lcq);
-}
diff --git a/drivers/staging/lustre/lustre/llite/llite_internal.h b/drivers/staging/lustre/lustre/llite/llite_internal.h
index 4bc551279aa4..2f46d475cd7d 100644
--- a/drivers/staging/lustre/lustre/llite/llite_internal.h
+++ b/drivers/staging/lustre/lustre/llite/llite_internal.h
@@ -97,31 +97,20 @@ struct ll_grouplock {
unsigned long lg_gid;
};
-enum lli_flags {
- /* MDS has an authority for the Size-on-MDS attributes. */
- LLIF_MDS_SIZE_LOCK = (1 << 0),
- /* Epoch close is postponed. */
- LLIF_EPOCH_PENDING = (1 << 1),
- /* DONE WRITING is allowed. */
- LLIF_DONE_WRITING = (1 << 2),
- /* Sizeon-on-MDS attributes are changed. An attribute update needs to
- * be sent to MDS.
- */
- LLIF_SOM_DIRTY = (1 << 3),
+enum ll_file_flags {
/* File data is modified. */
- LLIF_DATA_MODIFIED = (1 << 4),
+ LLIF_DATA_MODIFIED = 0,
/* File is being restored */
- LLIF_FILE_RESTORING = (1 << 5),
+ LLIF_FILE_RESTORING = 1,
/* Xattr cache is attached to the file */
- LLIF_XATTR_CACHE = (1 << 6),
+ LLIF_XATTR_CACHE = 2,
};
struct ll_inode_info {
__u32 lli_inode_magic;
- __u32 lli_flags;
- __u64 lli_ioepoch;
spinlock_t lli_lock;
+ unsigned long lli_flags;
struct posix_acl *lli_posix_acl;
/* identifying fields for both metadata and data stacks. */
@@ -129,14 +118,6 @@ struct ll_inode_info {
/* master inode fid for stripe directory */
struct lu_fid lli_pfid;
- struct list_head lli_close_list;
-
- /* handle is to be sent to MDS later on done_writing and setattr.
- * Open handle data are needed for the recovery to reconstruct
- * the inode state on the MDS. XXX: recovery is not ready yet.
- */
- struct obd_client_handle *lli_pending_och;
-
/* We need all three because every inode may be opened in different
* modes
*/
@@ -204,7 +185,6 @@ struct ll_inode_info {
struct {
struct mutex lli_size_mutex;
char *lli_symlink_name;
- __u64 lli_maxbytes;
/*
* struct rw_semaphore {
* signed long count; // align d.d_def_acl
@@ -245,7 +225,6 @@ struct ll_inode_info {
* In the future, if more members are added only for directory,
* some of the following members can be moved into u.f.
*/
- bool lli_has_smd;
struct cl_object *lli_clob;
/* mutex to request for layout lock exclusively. */
@@ -282,6 +261,9 @@ int ll_xattr_cache_destroy(struct inode *inode);
int ll_xattr_cache_get(struct inode *inode, const char *name,
char *buffer, size_t size, __u64 valid);
+int ll_init_security(struct dentry *dentry, struct inode *inode,
+ struct inode *dir);
+
/*
* Locking to guarantee consistency of non-atomic updates to long long i_size,
* consistency between file size and KMS.
@@ -400,7 +382,7 @@ enum stats_track_type {
#define LL_SBI_LOCALFLOCK 0x200 /* Local flocks support by kernel */
#define LL_SBI_LRU_RESIZE 0x400 /* lru resize support */
#define LL_SBI_LAZYSTATFS 0x800 /* lazystatfs mount option */
-#define LL_SBI_SOM_PREVIEW 0x1000 /* SOM preview mount option */
+/* LL_SBI_SOM_PREVIEW 0x1000 SOM preview mount option, obsolete */
#define LL_SBI_32BIT_API 0x2000 /* generate 32 bit inodes. */
#define LL_SBI_64BIT_HASH 0x4000 /* support 64-bits dir hash/offset */
#define LL_SBI_AGL_ENABLED 0x8000 /* enable agl */
@@ -409,6 +391,8 @@ enum stats_track_type {
#define LL_SBI_USER_FID2PATH 0x40000 /* allow fid2path by unprivileged users */
#define LL_SBI_XATTR_CACHE 0x80000 /* support for xattr cache */
#define LL_SBI_NOROOTSQUASH 0x100000 /* do not apply root squash */
+#define LL_SBI_ALWAYS_PING 0x200000 /* always ping even if server
+ * suppress_pings */
#define LL_SBI_FLAGS { \
"nolck", \
@@ -432,6 +416,7 @@ enum stats_track_type {
"user_fid2path",\
"xattr_cache", \
"norootsquash", \
+ "always_ping", \
}
/*
@@ -466,10 +451,10 @@ struct ll_sb_info {
int ll_flags;
unsigned int ll_umounting:1,
- ll_xattr_cache_enabled:1;
- struct lustre_client_ocd ll_lco;
+ ll_xattr_cache_enabled:1,
+ ll_client_common_fill_super_succeeded:1;
- struct ll_close_queue *ll_lcq;
+ struct lustre_client_ocd ll_lco;
struct lprocfs_stats *ll_stats; /* lprocfs stats counter */
@@ -630,8 +615,6 @@ struct ll_file_data {
struct list_head fd_lccs; /* list of ll_cl_context */
};
-struct lov_stripe_md;
-
extern struct dentry *llite_root;
extern struct kset *llite_kset;
@@ -682,8 +665,6 @@ enum {
LPROC_LL_WRITE_BYTES,
LPROC_LL_BRW_READ,
LPROC_LL_BRW_WRITE,
- LPROC_LL_OSC_READ,
- LPROC_LL_OSC_WRITE,
LPROC_LL_IOCTL,
LPROC_LL_OPEN,
LPROC_LL_RELEASE,
@@ -741,9 +722,7 @@ int ll_writepage(struct page *page, struct writeback_control *wbc);
int ll_writepages(struct address_space *, struct writeback_control *wbc);
int ll_readpage(struct file *file, struct page *page);
void ll_readahead_init(struct inode *inode, struct ll_readahead_state *ras);
-int ll_readahead(const struct lu_env *env, struct cl_io *io,
- struct cl_page_list *queue, struct ll_readahead_state *ras,
- bool hit);
+int vvp_io_write_commit(const struct lu_env *env, struct cl_io *io);
struct ll_cl_context *ll_cl_find(struct file *file);
void ll_cl_add(struct file *file, const struct lu_env *env, struct cl_io *io);
void ll_cl_remove(struct file *file, const struct lu_env *env);
@@ -762,25 +741,14 @@ enum ldlm_mode ll_take_md_lock(struct inode *inode, __u64 bits,
enum ldlm_mode mode);
int ll_file_open(struct inode *inode, struct file *file);
int ll_file_release(struct inode *inode, struct file *file);
-int ll_glimpse_ioctl(struct ll_sb_info *sbi,
- struct lov_stripe_md *lsm, lstat_t *st);
-void ll_ioepoch_open(struct ll_inode_info *lli, __u64 ioepoch);
int ll_release_openhandle(struct inode *, struct lookup_intent *);
int ll_md_real_close(struct inode *inode, fmode_t fmode);
-void ll_ioepoch_close(struct inode *inode, struct md_op_data *op_data,
- struct obd_client_handle **och, unsigned long flags);
-void ll_done_writing_attr(struct inode *inode, struct md_op_data *op_data);
-int ll_som_update(struct inode *inode, struct md_op_data *op_data);
-int ll_inode_getattr(struct inode *inode, struct obdo *obdo,
- __u64 ioepoch, int sync);
-void ll_pack_inode2opdata(struct inode *inode, struct md_op_data *op_data,
- struct lustre_handle *fh);
int ll_getattr(struct vfsmount *mnt, struct dentry *de, struct kstat *stat);
struct posix_acl *ll_get_acl(struct inode *inode, int type);
int ll_migrate(struct inode *parent, struct file *file, int mdtidx,
const char *name, int namelen);
int ll_get_fid_by_name(struct inode *parent, const char *name,
- int namelen, struct lu_fid *fid);
+ int namelen, struct lu_fid *fid, struct inode **inode);
int ll_inode_permission(struct inode *inode, int mask);
int ll_lov_setstripe_ea_info(struct inode *inode, struct dentry *dentry,
@@ -818,6 +786,7 @@ int ll_fill_super(struct super_block *sb, struct vfsmount *mnt);
void ll_put_super(struct super_block *sb);
void ll_kill_super(struct super_block *sb);
struct inode *ll_inode_from_resource_lock(struct ldlm_lock *lock);
+void ll_dir_clear_lsm_md(struct inode *inode);
void ll_clear_inode(struct inode *inode);
int ll_setattr_raw(struct dentry *dentry, struct iattr *attr, bool hsm_import);
int ll_setattr(struct dentry *de, struct iattr *attr);
@@ -891,18 +860,6 @@ int ll_dir_get_parent_fid(struct inode *dir, struct lu_fid *parent_fid);
/* llite/symlink.c */
extern const struct inode_operations ll_fast_symlink_inode_operations;
-/* llite/llite_close.c */
-struct ll_close_queue {
- spinlock_t lcq_lock;
- struct list_head lcq_head;
- wait_queue_head_t lcq_waitq;
- struct completion lcq_comp;
- atomic_t lcq_stop;
-};
-
-void vvp_write_pending(struct vvp_object *club, struct vvp_page *page);
-void vvp_write_complete(struct vvp_object *club, struct vvp_page *page);
-
/**
* IO arguments for various VFS I/O interfaces.
*/
@@ -945,15 +902,11 @@ static inline struct vvp_io_args *ll_env_args(const struct lu_env *env)
return &ll_env_info(env)->lti_args;
}
-void ll_queue_done_writing(struct inode *inode, unsigned long flags);
-void ll_close_thread_shutdown(struct ll_close_queue *lcq);
-int ll_close_thread_start(struct ll_close_queue **lcq_ret);
-
/* llite/llite_mmap.c */
int ll_teardown_mmaps(struct address_space *mapping, __u64 first, __u64 last);
int ll_file_mmap(struct file *file, struct vm_area_struct *vma);
-void policy_from_vma(ldlm_policy_data_t *policy, struct vm_area_struct *vma,
+void policy_from_vma(union ldlm_policy_data *policy, struct vm_area_struct *vma,
unsigned long addr, size_t count);
struct vm_area_struct *our_vma(struct mm_struct *mm, unsigned long addr,
size_t count);
@@ -1024,9 +977,14 @@ static inline struct lu_fid *ll_inode2fid(struct inode *inode)
return fid;
}
-static inline __u64 ll_file_maxbytes(struct inode *inode)
+static inline loff_t ll_file_maxbytes(struct inode *inode)
{
- return ll_i2info(inode)->lli_maxbytes;
+ struct cl_object *obj = ll_i2info(inode)->lli_clob;
+
+ if (!obj)
+ return MAX_LFS_FILESIZE;
+
+ return min_t(loff_t, cl_object_maxbytes(obj), MAX_LFS_FILESIZE);
}
/* llite/xattr.c */
@@ -1043,17 +1001,18 @@ extern const struct xattr_handler *ll_xattr_handlers[];
ssize_t ll_listxattr(struct dentry *dentry, char *buffer, size_t size);
int ll_xattr_list(struct inode *inode, const char *name, int type,
void *buffer, size_t size, __u64 valid);
+const struct xattr_handler *get_xattr_type(const char *name);
/**
* Common IO arguments for various VFS I/O interfaces.
*/
int cl_sb_init(struct super_block *sb);
int cl_sb_fini(struct super_block *sb);
-void ll_io_init(struct cl_io *io, const struct file *file, int write);
-void ras_update(struct ll_sb_info *sbi, struct inode *inode,
- struct ll_readahead_state *ras, unsigned long index,
- unsigned hit);
+enum ras_update_flags {
+ LL_RAS_HIT = 0x1,
+ LL_RAS_MMAP = 0x2
+};
void ll_ra_count_put(struct ll_sb_info *sbi, unsigned long len);
void ll_ra_stats_inc(struct inode *inode, enum ra_stat which);
@@ -1258,15 +1217,6 @@ struct ll_dio_pages {
int ldp_nr;
};
-static inline void cl_stats_tally(struct cl_device *dev, enum cl_req_type crt,
- int rc)
-{
- int opc = (crt == CRT_READ) ? LPROC_LL_OSC_READ :
- LPROC_LL_OSC_WRITE;
-
- ll_stats_ops_tally(ll_s2sbi(cl2vvp_dev(dev)->vdv_sb), opc, rc);
-}
-
ssize_t ll_direct_rw_pages(const struct lu_env *env, struct cl_io *io,
int rw, struct inode *inode,
struct ll_dio_pages *pv);
@@ -1365,11 +1315,6 @@ static inline void d_lustre_revalidate(struct dentry *dentry)
spin_unlock(&dentry->d_lock);
}
-enum {
- LL_LAYOUT_GEN_NONE = ((__u32)-2), /* layout lock was cancelled */
- LL_LAYOUT_GEN_EMPTY = ((__u32)-1) /* for empty layout */
-};
-
int ll_layout_conf(struct inode *inode, const struct cl_object_conf *conf);
int ll_layout_refresh(struct inode *inode, __u32 *gen);
int ll_layout_restore(struct inode *inode, loff_t start, __u64 length);
@@ -1383,14 +1328,14 @@ int ll_page_sync_io(const struct lu_env *env, struct cl_io *io,
int ll_getparent(struct file *file, struct getparent __user *arg);
/* lcommon_cl.c */
-int cl_setattr_ost(struct inode *inode, const struct iattr *attr);
+int cl_setattr_ost(struct cl_object *obj, const struct iattr *attr,
+ unsigned int attr_flags);
extern struct lu_env *cl_inode_fini_env;
extern int cl_inode_fini_refcheck;
int cl_file_inode_init(struct inode *inode, struct lustre_md *md);
void cl_inode_fini(struct inode *inode);
-int cl_local_size(struct inode *inode);
__u64 cl_fid_build_ino(const struct lu_fid *fid, int api32);
__u32 cl_fid_build_gen(const struct lu_fid *fid);
diff --git a/drivers/staging/lustre/lustre/llite/llite_lib.c b/drivers/staging/lustre/lustre/llite/llite_lib.c
index e5c62f4ce3d8..25f5aed97f63 100644
--- a/drivers/staging/lustre/lustre/llite/llite_lib.c
+++ b/drivers/staging/lustre/lustre/llite/llite_lib.c
@@ -191,10 +191,8 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt,
OBD_CONNECT_FLOCK_DEAD |
OBD_CONNECT_DISP_STRIPE | OBD_CONNECT_LFSCK |
OBD_CONNECT_OPEN_BY_FID |
- OBD_CONNECT_DIR_STRIPE;
-
- if (sbi->ll_flags & LL_SBI_SOM_PREVIEW)
- data->ocd_connect_flags |= OBD_CONNECT_SOM;
+ OBD_CONNECT_DIR_STRIPE |
+ OBD_CONNECT_BULK_MBITS;
if (sbi->ll_flags & LL_SBI_LRU_RESIZE)
data->ocd_connect_flags |= OBD_CONNECT_LRU_RESIZE;
@@ -226,6 +224,10 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt,
/* real client */
data->ocd_connect_flags |= OBD_CONNECT_REAL;
+ /* always ping even if server suppress_pings */
+ if (sbi->ll_flags & LL_SBI_ALWAYS_PING)
+ data->ocd_connect_flags &= ~OBD_CONNECT_PINGLESS;
+
data->ocd_brw_size = MD_MAX_BRW_SIZE;
err = obd_connect(NULL, &sbi->ll_md_exp, obd, &sbi->ll_sb_uuid,
@@ -288,7 +290,7 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt,
size = sizeof(*data);
err = obd_get_info(NULL, sbi->ll_md_exp, sizeof(KEY_CONN_DATA),
- KEY_CONN_DATA, &size, data, NULL);
+ KEY_CONN_DATA, &size, data);
if (err) {
CERROR("%s: Get connect data failed: rc = %d\n",
sbi->ll_md_exp->exp_obd->obd_name, err);
@@ -355,10 +357,9 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt,
OBD_CONNECT_64BITHASH | OBD_CONNECT_MAXBYTES |
OBD_CONNECT_EINPROGRESS |
OBD_CONNECT_JOBSTATS | OBD_CONNECT_LVB_TYPE |
- OBD_CONNECT_LAYOUTLOCK | OBD_CONNECT_PINGLESS;
-
- if (sbi->ll_flags & LL_SBI_SOM_PREVIEW)
- data->ocd_connect_flags |= OBD_CONNECT_SOM;
+ OBD_CONNECT_LAYOUTLOCK |
+ OBD_CONNECT_PINGLESS | OBD_CONNECT_LFSCK |
+ OBD_CONNECT_BULK_MBITS;
if (!OBD_FAIL_CHECK(OBD_FAIL_OSC_CONNECT_CKSUM)) {
/* OBD_CONNECT_CKSUM should always be set, even if checksums are
@@ -376,6 +377,10 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt,
data->ocd_connect_flags |= OBD_CONNECT_LRU_RESIZE;
+ /* always ping even if server suppress_pings */
+ if (sbi->ll_flags & LL_SBI_ALWAYS_PING)
+ data->ocd_connect_flags &= ~OBD_CONNECT_PINGLESS;
+
CDEBUG(D_RPCTRACE, "ocd_connect_flags: %#llx ocd_version: %d ocd_grant: %d\n",
data->ocd_connect_flags,
data->ocd_version, data->ocd_grant);
@@ -475,8 +480,6 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt,
ptlrpc_req_finished(request);
if (IS_ERR(root)) {
- if (lmd.lsm)
- obd_free_memmd(sbi->ll_dt_exp, &lmd.lsm);
#ifdef CONFIG_FS_POSIX_ACL
if (lmd.posix_acl) {
posix_acl_release(lmd.posix_acl);
@@ -488,12 +491,6 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt,
goto out_root;
}
- err = ll_close_thread_start(&sbi->ll_lcq);
- if (err) {
- CERROR("cannot start close thread: rc %d\n", err);
- goto out_root;
- }
-
checksum = sbi->ll_flags & LL_SBI_CHECKSUM;
err = obd_set_info_async(NULL, sbi->ll_dt_exp, sizeof(KEY_CHECKSUM),
KEY_CHECKSUM, sizeof(checksum), &checksum,
@@ -572,10 +569,18 @@ int ll_get_max_mdsize(struct ll_sb_info *sbi, int *lmmsize)
{
int size, rc;
- *lmmsize = obd_size_diskmd(sbi->ll_dt_exp, NULL);
+ size = sizeof(*lmmsize);
+ rc = obd_get_info(NULL, sbi->ll_dt_exp, sizeof(KEY_MAX_EASIZE),
+ KEY_MAX_EASIZE, &size, lmmsize);
+ if (rc) {
+ CERROR("%s: cannot get max LOV EA size: rc = %d\n",
+ sbi->ll_dt_exp->exp_obd->obd_name, rc);
+ return rc;
+ }
+
size = sizeof(int);
rc = obd_get_info(NULL, sbi->ll_md_exp, sizeof(KEY_MAX_EASIZE),
- KEY_MAX_EASIZE, &size, lmmsize, NULL);
+ KEY_MAX_EASIZE, &size, lmmsize);
if (rc)
CERROR("Get max mdsize error rc %d\n", rc);
@@ -599,7 +604,7 @@ int ll_get_default_mdsize(struct ll_sb_info *sbi, int *lmmsize)
size = sizeof(int);
rc = obd_get_info(NULL, sbi->ll_md_exp, sizeof(KEY_DEFAULT_EASIZE),
- KEY_DEFAULT_EASIZE, &size, lmmsize, NULL);
+ KEY_DEFAULT_EASIZE, &size, lmmsize);
if (rc)
CERROR("Get default mdsize error rc %d\n", rc);
@@ -633,8 +638,6 @@ static void client_common_put_super(struct super_block *sb)
{
struct ll_sb_info *sbi = ll_s2sbi(sb);
- ll_close_thread_shutdown(sbi->ll_lcq);
-
cl_sb_fini(sb);
obd_fid_fini(sbi->ll_dt_exp->exp_obd);
@@ -725,6 +728,18 @@ static int ll_options(char *options, int *flags)
*flags &= ~tmp;
goto next;
}
+ tmp = ll_set_opt("context", s1, 1);
+ if (tmp)
+ goto next;
+ tmp = ll_set_opt("fscontext", s1, 1);
+ if (tmp)
+ goto next;
+ tmp = ll_set_opt("defcontext", s1, 1);
+ if (tmp)
+ goto next;
+ tmp = ll_set_opt("rootcontext", s1, 1);
+ if (tmp)
+ goto next;
tmp = ll_set_opt("user_fid2path", s1, LL_SBI_USER_FID2PATH);
if (tmp) {
*flags |= tmp;
@@ -766,11 +781,6 @@ static int ll_options(char *options, int *flags)
*flags &= ~tmp;
goto next;
}
- tmp = ll_set_opt("som_preview", s1, LL_SBI_SOM_PREVIEW);
- if (tmp) {
- *flags |= tmp;
- goto next;
- }
tmp = ll_set_opt("32bitapi", s1, LL_SBI_32BIT_API);
if (tmp) {
*flags |= tmp;
@@ -786,6 +796,11 @@ static int ll_options(char *options, int *flags)
*flags &= ~tmp;
goto next;
}
+ tmp = ll_set_opt("always_ping", s1, LL_SBI_ALWAYS_PING);
+ if (tmp) {
+ *flags |= tmp;
+ goto next;
+ }
LCONSOLE_ERROR_MSG(0x152, "Unknown option '%s', won't mount.\n",
s1);
return -EINVAL;
@@ -804,14 +819,10 @@ void ll_lli_init(struct ll_inode_info *lli)
{
lli->lli_inode_magic = LLI_INODE_MAGIC;
lli->lli_flags = 0;
- lli->lli_ioepoch = 0;
- lli->lli_maxbytes = MAX_LFS_FILESIZE;
spin_lock_init(&lli->lli_lock);
lli->lli_posix_acl = NULL;
/* Do not set lli_fid, it has been initialized already. */
fid_zero(&lli->lli_pfid);
- INIT_LIST_HEAD(&lli->lli_close_list);
- lli->lli_pending_och = NULL;
lli->lli_mds_read_och = NULL;
lli->lli_mds_write_och = NULL;
lli->lli_mds_exec_och = NULL;
@@ -820,9 +831,8 @@ void ll_lli_init(struct ll_inode_info *lli)
lli->lli_open_fd_exec_count = 0;
mutex_init(&lli->lli_och_mutex);
spin_lock_init(&lli->lli_agl_lock);
- lli->lli_has_smd = false;
spin_lock_init(&lli->lli_layout_lock);
- ll_layout_version_set(lli, LL_LAYOUT_GEN_NONE);
+ ll_layout_version_set(lli, CL_LAYOUT_GEN_NONE);
lli->lli_clob = NULL;
init_rwsem(&lli->lli_xattrs_list_rwsem);
@@ -941,10 +951,14 @@ int ll_fill_super(struct super_block *sb, struct vfsmount *mnt)
/* connections, registrations, sb setup */
err = client_common_fill_super(sb, md, dt, mnt);
+ if (!err)
+ sbi->ll_client_common_fill_super_succeeded = 1;
out_free:
kfree(md);
kfree(dt);
+ if (lprof)
+ class_put_profile(lprof);
if (err)
ll_put_super(sb);
else if (sbi->ll_flags & LL_SBI_VERBOSE)
@@ -1002,7 +1016,7 @@ void ll_put_super(struct super_block *sb)
}
}
- if (sbi->ll_lcq) {
+ if (sbi->ll_client_common_fill_super_succeeded) {
/* Only if client_common_fill_super succeeded */
client_common_put_super(sb);
}
@@ -1057,7 +1071,7 @@ struct inode *ll_inode_from_resource_lock(struct ldlm_lock *lock)
return inode;
}
-static void ll_dir_clear_lsm_md(struct inode *inode)
+void ll_dir_clear_lsm_md(struct inode *inode)
{
struct ll_inode_info *lli = ll_i2info(inode);
@@ -1205,16 +1219,44 @@ static int ll_update_lsm_md(struct inode *inode, struct lustre_md *md)
/* set the directory layout */
if (!lli->lli_lsm_md) {
+ struct cl_attr *attr;
+
rc = ll_init_lsm_md(inode, md);
if (rc)
return rc;
- lli->lli_lsm_md = lsm;
/*
* set lsm_md to NULL, so the following free lustre_md
* will not free this lsm
*/
md->lmv = NULL;
+ lli->lli_lsm_md = lsm;
+
+ attr = kzalloc(sizeof(*attr), GFP_NOFS);
+ if (!attr)
+ return -ENOMEM;
+
+ /* validate the lsm */
+ rc = md_merge_attr(ll_i2mdexp(inode), lsm, attr,
+ ll_md_blocking_ast);
+ if (rc) {
+ kfree(attr);
+ return rc;
+ }
+
+ if (md->body->mbo_valid & OBD_MD_FLNLINK)
+ md->body->mbo_nlink = attr->cat_nlink;
+ if (md->body->mbo_valid & OBD_MD_FLSIZE)
+ md->body->mbo_size = attr->cat_size;
+ if (md->body->mbo_valid & OBD_MD_FLATIME)
+ md->body->mbo_atime = attr->cat_atime;
+ if (md->body->mbo_valid & OBD_MD_FLCTIME)
+ md->body->mbo_ctime = attr->cat_ctime;
+ if (md->body->mbo_valid & OBD_MD_FLMTIME)
+ md->body->mbo_mtime = attr->cat_mtime;
+
+ kfree(attr);
+
CDEBUG(D_INODE, "Set lsm %p magic %x to "DFID"\n", lsm,
lsm->lsm_md_magic, PFID(ll_inode2fid(inode)));
return 0;
@@ -1272,9 +1314,6 @@ void ll_clear_inode(struct inode *inode)
LASSERT(lli->lli_opendir_pid == 0);
}
- spin_lock(&lli->lli_lock);
- ll_i2info(inode)->lli_flags &= ~LLIF_MDS_SIZE_LOCK;
- spin_unlock(&lli->lli_lock);
md_null_inode(sbi->ll_md_exp, ll_inode2fid(inode));
LASSERT(!lli->lli_open_fd_write_count);
@@ -1313,13 +1352,11 @@ void ll_clear_inode(struct inode *inode)
* cl_object still uses inode lsm.
*/
cl_inode_fini(inode);
- lli->lli_has_smd = false;
}
#define TIMES_SET_FLAGS (ATTR_MTIME_SET | ATTR_ATIME_SET | ATTR_TIMES_SET)
-static int ll_md_setattr(struct dentry *dentry, struct md_op_data *op_data,
- struct md_open_data **mod)
+static int ll_md_setattr(struct dentry *dentry, struct md_op_data *op_data)
{
struct lustre_md md;
struct inode *inode = d_inode(dentry);
@@ -1332,8 +1369,7 @@ static int ll_md_setattr(struct dentry *dentry, struct md_op_data *op_data,
if (IS_ERR(op_data))
return PTR_ERR(op_data);
- rc = md_setattr(sbi->ll_md_exp, op_data, NULL, 0, NULL, 0,
- &request, mod);
+ rc = md_setattr(sbi->ll_md_exp, op_data, NULL, 0, &request);
if (rc) {
ptlrpc_req_finished(request);
if (rc == -ENOENT) {
@@ -1369,48 +1405,12 @@ static int ll_md_setattr(struct dentry *dentry, struct md_op_data *op_data,
rc = simple_setattr(dentry, &op_data->op_attr);
op_data->op_attr.ia_valid = ia_valid;
- /* Extract epoch data if obtained. */
- op_data->op_handle = md.body->mbo_handle;
- op_data->op_ioepoch = md.body->mbo_ioepoch;
-
rc = ll_update_inode(inode, &md);
ptlrpc_req_finished(request);
return rc;
}
-/* Close IO epoch and send Size-on-MDS attribute update. */
-static int ll_setattr_done_writing(struct inode *inode,
- struct md_op_data *op_data,
- struct md_open_data *mod)
-{
- struct ll_inode_info *lli = ll_i2info(inode);
- int rc = 0;
-
- if (!S_ISREG(inode->i_mode))
- return 0;
-
- CDEBUG(D_INODE, "Epoch %llu closed on "DFID" for truncate\n",
- op_data->op_ioepoch, PFID(&lli->lli_fid));
-
- op_data->op_flags = MF_EPOCH_CLOSE;
- ll_done_writing_attr(inode, op_data);
- ll_pack_inode2opdata(inode, op_data, NULL);
-
- rc = md_done_writing(ll_i2sbi(inode)->ll_md_exp, op_data, mod);
- if (rc == -EAGAIN)
- /* MDS has instructed us to obtain Size-on-MDS attribute
- * from OSTs and send setattr to back to MDS.
- */
- rc = ll_som_update(inode, op_data);
- else if (rc) {
- CERROR("%s: inode "DFID" mdc truncate failed: rc = %d\n",
- ll_i2sbi(inode)->ll_md_exp->exp_obd->obd_name,
- PFID(ll_inode2fid(inode)), rc);
- }
- return rc;
-}
-
/* If this inode has objects allocated to it (lsm != NULL), then the OST
* object(s) determine the file size and mtime. Otherwise, the MDS will
* keep these values until such a time that objects are allocated for it.
@@ -1431,9 +1431,8 @@ int ll_setattr_raw(struct dentry *dentry, struct iattr *attr, bool hsm_import)
struct inode *inode = d_inode(dentry);
struct ll_inode_info *lli = ll_i2info(inode);
struct md_op_data *op_data = NULL;
- struct md_open_data *mod = NULL;
bool file_is_released = false;
- int rc = 0, rc1 = 0;
+ int rc = 0;
CDEBUG(D_VFSTRACE, "%s: setattr inode "DFID"(%p) from %llu to %llu, valid %x, hsm_import %d\n",
ll_get_fsname(inode->i_sb, NULL, 0), PFID(&lli->lli_fid), inode,
@@ -1503,14 +1502,33 @@ int ll_setattr_raw(struct dentry *dentry, struct iattr *attr, bool hsm_import)
* but other attributes must be set
*/
if (S_ISREG(inode->i_mode)) {
- struct lov_stripe_md *lsm;
+ struct cl_layout cl = {
+ .cl_is_released = false,
+ };
+ struct lu_env *env;
+ int refcheck;
__u32 gen;
- ll_layout_refresh(inode, &gen);
- lsm = ccc_inode_lsm_get(inode);
- if (lsm && lsm->lsm_pattern & LOV_PATTERN_F_RELEASED)
- file_is_released = true;
- ccc_inode_lsm_put(inode, lsm);
+ rc = ll_layout_refresh(inode, &gen);
+ if (rc < 0)
+ goto out;
+
+ /*
+ * XXX: the only place we need to know the layout type,
+ * this will be removed by a later patch. -Jinshan
+ */
+ env = cl_env_get(&refcheck);
+ if (IS_ERR(env)) {
+ rc = PTR_ERR(env);
+ goto out;
+ }
+
+ rc = cl_object_layout_get(env, lli->lli_clob, &cl);
+ cl_env_put(env, &refcheck);
+ if (rc < 0)
+ goto out;
+
+ file_is_released = cl.cl_is_released;
if (!hsm_import && attr->ia_valid & ATTR_SIZE) {
if (file_is_released) {
@@ -1527,32 +1545,16 @@ int ll_setattr_raw(struct dentry *dentry, struct iattr *attr, bool hsm_import)
* modified, flag it.
*/
attr->ia_valid |= MDS_OPEN_OWNEROVERRIDE;
- spin_lock(&lli->lli_lock);
- lli->lli_flags |= LLIF_DATA_MODIFIED;
- spin_unlock(&lli->lli_lock);
op_data->op_bias |= MDS_DATA_MODIFIED;
}
}
memcpy(&op_data->op_attr, attr, sizeof(*attr));
- /* Open epoch for truncate. */
- if (exp_connect_som(ll_i2mdexp(inode)) && !hsm_import &&
- (attr->ia_valid & (ATTR_SIZE | ATTR_MTIME | ATTR_MTIME_SET)))
- op_data->op_flags = MF_EPOCH_OPEN;
-
- rc = ll_md_setattr(dentry, op_data, &mod);
+ rc = ll_md_setattr(dentry, op_data);
if (rc)
goto out;
- /* RPC to MDT is sent, cancel data modification flag */
- if (op_data->op_bias & MDS_DATA_MODIFIED) {
- spin_lock(&lli->lli_lock);
- lli->lli_flags &= ~LLIF_DATA_MODIFIED;
- spin_unlock(&lli->lli_lock);
- }
-
- ll_ioepoch_open(lli, op_data->op_ioepoch);
if (!S_ISREG(inode->i_mode) || file_is_released) {
rc = 0;
goto out;
@@ -1568,19 +1570,11 @@ int ll_setattr_raw(struct dentry *dentry, struct iattr *attr, bool hsm_import)
* setting times to past, but it is necessary due to possible
* time de-synchronization between MDT inode and OST objects
*/
- if (attr->ia_valid & ATTR_SIZE)
- down_write(&lli->lli_trunc_sem);
- rc = cl_setattr_ost(inode, attr);
- if (attr->ia_valid & ATTR_SIZE)
- up_write(&lli->lli_trunc_sem);
+ rc = cl_setattr_ost(ll_i2info(inode)->lli_clob, attr, 0);
}
out:
- if (op_data->op_ioepoch) {
- rc1 = ll_setattr_done_writing(inode, op_data, mod);
- if (!rc)
- rc = rc1;
- }
- ll_finish_md_op_data(op_data);
+ if (op_data)
+ ll_finish_md_op_data(op_data);
if (!S_ISDIR(inode->i_mode)) {
inode_lock(inode);
@@ -1736,19 +1730,10 @@ int ll_update_inode(struct inode *inode, struct lustre_md *md)
{
struct ll_inode_info *lli = ll_i2info(inode);
struct mdt_body *body = md->body;
- struct lov_stripe_md *lsm = md->lsm;
struct ll_sb_info *sbi = ll_i2sbi(inode);
- LASSERT((lsm != NULL) == ((body->mbo_valid & OBD_MD_FLEASIZE) != 0));
- if (lsm) {
- if (!lli->lli_has_smd &&
- !(sbi->ll_flags & LL_SBI_LAYOUT_LOCK))
- cl_file_inode_init(inode, md);
-
- lli->lli_maxbytes = lsm->lsm_maxbytes;
- if (lli->lli_maxbytes > MAX_LFS_FILESIZE)
- lli->lli_maxbytes = MAX_LFS_FILESIZE;
- }
+ if (body->mbo_valid & OBD_MD_FLEASIZE)
+ cl_file_inode_init(inode, md);
if (S_ISDIR(inode->i_mode)) {
int rc;
@@ -1828,48 +1813,11 @@ int ll_update_inode(struct inode *inode, struct lustre_md *md)
LASSERT(fid_seq(&lli->lli_fid) != 0);
if (body->mbo_valid & OBD_MD_FLSIZE) {
- if (exp_connect_som(ll_i2mdexp(inode)) &&
- S_ISREG(inode->i_mode)) {
- struct lustre_handle lockh;
- enum ldlm_mode mode;
-
- /* As it is possible a blocking ast has been processed
- * by this time, we need to check there is an UPDATE
- * lock on the client and set LLIF_MDS_SIZE_LOCK holding
- * it.
- */
- mode = ll_take_md_lock(inode, MDS_INODELOCK_UPDATE,
- &lockh, LDLM_FL_CBPENDING,
- LCK_CR | LCK_CW |
- LCK_PR | LCK_PW);
- if (mode) {
- if (lli->lli_flags & (LLIF_DONE_WRITING |
- LLIF_EPOCH_PENDING |
- LLIF_SOM_DIRTY)) {
- CERROR("%s: inode "DFID" flags %u still has size authority! do not trust the size got from MDS\n",
- sbi->ll_md_exp->exp_obd->obd_name,
- PFID(ll_inode2fid(inode)),
- lli->lli_flags);
- } else {
- /* Use old size assignment to avoid
- * deadlock bz14138 & bz14326
- */
- i_size_write(inode, body->mbo_size);
- spin_lock(&lli->lli_lock);
- lli->lli_flags |= LLIF_MDS_SIZE_LOCK;
- spin_unlock(&lli->lli_lock);
- }
- ldlm_lock_decref(&lockh, mode);
- }
- } else {
- /* Use old size assignment to avoid
- * deadlock bz14138 & bz14326
- */
- i_size_write(inode, body->mbo_size);
+ i_size_write(inode, body->mbo_size);
- CDEBUG(D_VFSTRACE, "inode=%lu, updating i_size %llu\n",
- inode->i_ino, (unsigned long long)body->mbo_size);
- }
+ CDEBUG(D_VFSTRACE, "inode=" DFID ", updating i_size %llu\n",
+ PFID(ll_inode2fid(inode)),
+ (unsigned long long)body->mbo_size);
if (body->mbo_valid & OBD_MD_FLBLOCKS)
inode->i_blocks = body->mbo_blocks;
@@ -1877,7 +1825,7 @@ int ll_update_inode(struct inode *inode, struct lustre_md *md)
if (body->mbo_valid & OBD_MD_TSTATE) {
if (body->mbo_t_state & MS_RESTORE)
- lli->lli_flags |= LLIF_FILE_RESTORING;
+ set_bit(LLIF_FILE_RESTORING, &lli->lli_flags);
}
return 0;
@@ -1892,8 +1840,6 @@ int ll_read_inode2(struct inode *inode, void *opaque)
CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p)\n",
PFID(&lli->lli_fid), inode);
- LASSERT(!lli->lli_has_smd);
-
/* Core attributes from the MDS first. This is a new inode, and
* the VFS doesn't zero times in the core inode so we have to do
* it ourselves. They will be overwritten by either MDS or OST
@@ -1988,9 +1934,9 @@ int ll_iocontrol(struct inode *inode, struct file *file,
return put_user(flags, (int __user *)arg);
}
case FSFILT_IOC_SETFLAGS: {
- struct lov_stripe_md *lsm;
- struct obd_info oinfo = { };
struct md_op_data *op_data;
+ struct cl_object *obj;
+ struct iattr *attr;
if (get_user(flags, (int __user *)arg))
return -EFAULT;
@@ -2002,8 +1948,7 @@ int ll_iocontrol(struct inode *inode, struct file *file,
op_data->op_attr_flags = flags;
op_data->op_attr.ia_valid |= ATTR_ATTR_FLAG;
- rc = md_setattr(sbi->ll_md_exp, op_data,
- NULL, 0, NULL, 0, &req, NULL);
+ rc = md_setattr(sbi->ll_md_exp, op_data, NULL, 0, &req);
ll_finish_md_op_data(op_data);
ptlrpc_req_finished(req);
if (rc)
@@ -2011,30 +1956,17 @@ int ll_iocontrol(struct inode *inode, struct file *file,
inode->i_flags = ll_ext_to_inode_flags(flags);
- lsm = ccc_inode_lsm_get(inode);
- if (!lsm_has_objects(lsm)) {
- ccc_inode_lsm_put(inode, lsm);
+ obj = ll_i2info(inode)->lli_clob;
+ if (!obj)
return 0;
- }
- oinfo.oi_oa = kmem_cache_zalloc(obdo_cachep, GFP_NOFS);
- if (!oinfo.oi_oa) {
- ccc_inode_lsm_put(inode, lsm);
+ attr = kzalloc(sizeof(*attr), GFP_NOFS);
+ if (!attr)
return -ENOMEM;
- }
- oinfo.oi_md = lsm;
- oinfo.oi_oa->o_oi = lsm->lsm_oi;
- oinfo.oi_oa->o_flags = flags;
- oinfo.oi_oa->o_valid = OBD_MD_FLID | OBD_MD_FLFLAGS |
- OBD_MD_FLGROUP;
- obdo_set_parent_fid(oinfo.oi_oa, &ll_i2info(inode)->lli_fid);
- rc = obd_setattr_rqset(sbi->ll_dt_exp, &oinfo, NULL);
- kmem_cache_free(obdo_cachep, oinfo.oi_oa);
- ccc_inode_lsm_put(inode, lsm);
-
- if (rc && rc != -EPERM && rc != -EACCES)
- CERROR("osc_setattr_async fails: rc = %d\n", rc);
+ attr->ia_valid = ATTR_ATTR_FLAG;
+ rc = cl_setattr_ost(obj, attr, flags);
+ kfree(attr);
return rc;
}
default:
@@ -2164,7 +2096,6 @@ void ll_open_cleanup(struct super_block *sb, struct ptlrpc_request *open_req)
return;
op_data->op_fid1 = body->mbo_fid1;
- op_data->op_ioepoch = body->mbo_ioepoch;
op_data->op_handle = body->mbo_handle;
op_data->op_mod_time = get_seconds();
md_close(exp, op_data, NULL, &close_req);
@@ -2244,17 +2175,14 @@ int ll_prep_inode(struct inode **inode, struct ptlrpc_request *req,
conf.coc_opc = OBJECT_CONF_SET;
conf.coc_inode = *inode;
conf.coc_lock = lock;
- conf.u.coc_md = &md;
+ conf.u.coc_layout = md.layout;
(void)ll_layout_conf(*inode, &conf);
}
LDLM_LOCK_PUT(lock);
}
out:
- if (md.lsm)
- obd_free_memmd(sbi->ll_dt_exp, &md.lsm);
md_free_lustre_md(sbi->ll_md_exp, &md);
-
cleanup:
if (rc != 0 && it && it->it_op & IT_OPEN)
ll_open_cleanup(sb ? sb : (*inode)->i_sb, req);
@@ -2380,8 +2308,9 @@ struct md_op_data *ll_prep_md_op_data(struct md_op_data *op_data,
op_data->op_default_stripe_offset = -1;
if (S_ISDIR(i1->i_mode)) {
op_data->op_mea1 = ll_i2info(i1)->lli_lsm_md;
- op_data->op_default_stripe_offset =
- ll_i2info(i1)->lli_def_stripe_offset;
+ if (opc == LUSTRE_OPC_MKDIR)
+ op_data->op_default_stripe_offset =
+ ll_i2info(i1)->lli_def_stripe_offset;
}
if (i2) {
@@ -2405,8 +2334,6 @@ struct md_op_data *ll_prep_md_op_data(struct md_op_data *op_data,
op_data->op_fsuid = from_kuid(&init_user_ns, current_fsuid());
op_data->op_fsgid = from_kgid(&init_user_ns, current_fsgid());
op_data->op_cap = cfs_curproc_cap_pack();
- op_data->op_bias = 0;
- op_data->op_cli_flags = 0;
if ((opc == LUSTRE_OPC_CREATE) && name &&
filename_is_volatile(name, namelen, &op_data->op_mds))
op_data->op_bias |= MDS_CREATE_VOLATILE;
@@ -2414,10 +2341,6 @@ struct md_op_data *ll_prep_md_op_data(struct md_op_data *op_data,
op_data->op_mds = 0;
op_data->op_data = data;
- /* When called by ll_setattr_raw, file is i1. */
- if (ll_i2info(i1)->lli_flags & LLIF_DATA_MODIFIED)
- op_data->op_bias |= MDS_DATA_MODIFIED;
-
return op_data;
}
@@ -2451,6 +2374,9 @@ int ll_show_options(struct seq_file *seq, struct dentry *dentry)
if (sbi->ll_flags & LL_SBI_USER_FID2PATH)
seq_puts(seq, ",user_fid2path");
+ if (sbi->ll_flags & LL_SBI_ALWAYS_PING)
+ seq_puts(seq, ",always_ping");
+
return 0;
}
diff --git a/drivers/staging/lustre/lustre/llite/llite_mmap.c b/drivers/staging/lustre/lustre/llite/llite_mmap.c
index 436691814a5e..ee01f20d8b11 100644
--- a/drivers/staging/lustre/lustre/llite/llite_mmap.c
+++ b/drivers/staging/lustre/lustre/llite/llite_mmap.c
@@ -47,7 +47,7 @@
static const struct vm_operations_struct ll_file_vm_ops;
-void policy_from_vma(ldlm_policy_data_t *policy,
+void policy_from_vma(union ldlm_policy_data *policy,
struct vm_area_struct *vma, unsigned long addr,
size_t count)
{
@@ -80,43 +80,24 @@ struct vm_area_struct *our_vma(struct mm_struct *mm, unsigned long addr,
* API independent part for page fault initialization.
* \param vma - virtual memory area addressed to page fault
* \param env - corespondent lu_env to processing
- * \param nest - nested level
* \param index - page index corespondent to fault.
* \parm ra_flags - vma readahead flags.
*
- * \return allocated and initialized env for fault operation.
- * \retval EINVAL if env can't allocated
- * \return other error codes from cl_io_init.
+ * \return error codes from cl_io_init.
*/
static struct cl_io *
-ll_fault_io_init(struct vm_area_struct *vma, struct lu_env **env_ret,
- struct cl_env_nest *nest, pgoff_t index,
- unsigned long *ra_flags)
+ll_fault_io_init(struct lu_env *env, struct vm_area_struct *vma,
+ pgoff_t index, unsigned long *ra_flags)
{
struct file *file = vma->vm_file;
struct inode *inode = file_inode(file);
struct cl_io *io;
struct cl_fault_io *fio;
- struct lu_env *env;
int rc;
- *env_ret = NULL;
if (ll_file_nolock(file))
return ERR_PTR(-EOPNOTSUPP);
- /*
- * page fault can be called when lustre IO is
- * already active for the current thread, e.g., when doing read/write
- * against user level buffer mapped from Lustre buffer. To avoid
- * stomping on existing context, optionally force an allocation of a new
- * one.
- */
- env = cl_env_nested_get(nest);
- if (IS_ERR(env))
- return ERR_PTR(-EINVAL);
-
- *env_ret = env;
-
restart:
io = vvp_env_thread_io(env);
io->ci_obj = ll_i2info(inode)->lli_clob;
@@ -155,7 +136,6 @@ restart:
if (io->ci_need_restart)
goto restart;
- cl_env_nested_put(nest, env);
io = ERR_PTR(rc);
}
@@ -169,13 +149,17 @@ static int ll_page_mkwrite0(struct vm_area_struct *vma, struct page *vmpage,
struct lu_env *env;
struct cl_io *io;
struct vvp_io *vio;
- struct cl_env_nest nest;
int result;
+ int refcheck;
sigset_t set;
struct inode *inode;
struct ll_inode_info *lli;
- io = ll_fault_io_init(vma, &env, &nest, vmpage->index, NULL);
+ env = cl_env_get(&refcheck);
+ if (IS_ERR(env))
+ return PTR_ERR(env);
+
+ io = ll_fault_io_init(env, vma, vmpage->index, NULL);
if (IS_ERR(io)) {
result = PTR_ERR(io);
goto out;
@@ -231,17 +215,14 @@ static int ll_page_mkwrite0(struct vm_area_struct *vma, struct page *vmpage,
result = -EAGAIN;
}
- if (result == 0) {
- spin_lock(&lli->lli_lock);
- lli->lli_flags |= LLIF_DATA_MODIFIED;
- spin_unlock(&lli->lli_lock);
- }
+ if (!result)
+ set_bit(LLIF_DATA_MODIFIED, &lli->lli_flags);
}
out_io:
cl_io_fini(env, io);
- cl_env_nested_put(&nest, env);
out:
+ cl_env_put(env, &refcheck);
CDEBUG(D_MMAP, "%s mkwrite with %d\n", current->comm, result);
LASSERT(ergo(result == 0, PageLocked(vmpage)));
@@ -285,13 +266,19 @@ static int ll_fault0(struct vm_area_struct *vma, struct vm_fault *vmf)
struct vvp_io *vio = NULL;
struct page *vmpage;
unsigned long ra_flags;
- struct cl_env_nest nest;
- int result;
+ int result = 0;
int fault_ret = 0;
+ int refcheck;
+
+ env = cl_env_get(&refcheck);
+ if (IS_ERR(env))
+ return PTR_ERR(env);
- io = ll_fault_io_init(vma, &env, &nest, vmf->pgoff, &ra_flags);
- if (IS_ERR(io))
- return to_fault_error(PTR_ERR(io));
+ io = ll_fault_io_init(env, vma, vmf->pgoff, &ra_flags);
+ if (IS_ERR(io)) {
+ result = to_fault_error(PTR_ERR(io));
+ goto out;
+ }
result = io->ci_result;
if (result == 0) {
@@ -322,14 +309,15 @@ static int ll_fault0(struct vm_area_struct *vma, struct vm_fault *vmf)
}
}
cl_io_fini(env, io);
- cl_env_nested_put(&nest, env);
vma->vm_flags |= ra_flags;
+
+out:
+ cl_env_put(env, &refcheck);
if (result != 0 && !(fault_ret & VM_FAULT_RETRY))
fault_ret |= to_fault_error(result);
- CDEBUG(D_MMAP, "%s fault %d/%d\n",
- current->comm, fault_ret, result);
+ CDEBUG(D_MMAP, "%s fault %d/%d\n", current->comm, fault_ret, result);
return fault_ret;
}
@@ -381,6 +369,7 @@ static int ll_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
bool retry;
int result;
+ file_update_time(vma->vm_file);
do {
retry = false;
result = ll_page_mkwrite0(vma, vmf->page, &retry);
diff --git a/drivers/staging/lustre/lustre/llite/llite_nfs.c b/drivers/staging/lustre/lustre/llite/llite_nfs.c
index 709230571b4b..c63236580b0f 100644
--- a/drivers/staging/lustre/lustre/llite/llite_nfs.c
+++ b/drivers/staging/lustre/lustre/llite/llite_nfs.c
@@ -226,7 +226,7 @@ static int ll_encode_fh(struct inode *inode, __u32 *fh, int *plen,
static int ll_nfs_get_name_filldir(struct dir_context *ctx, const char *name,
int namelen, loff_t hash, u64 ino,
- unsigned type)
+ unsigned int type)
{
/* It is hack to access lde_fid for comparison with lgd_fid.
* So the input 'name' must be part of the 'lu_dirent'.
diff --git a/drivers/staging/lustre/lustre/llite/lproc_llite.c b/drivers/staging/lustre/lustre/llite/lproc_llite.c
index 23fda9d98bff..03682c10fc9e 100644
--- a/drivers/staging/lustre/lustre/llite/lproc_llite.c
+++ b/drivers/staging/lustre/lustre/llite/lproc_llite.c
@@ -1060,10 +1060,6 @@ static const struct llite_file_opcode {
"brw_read" },
{ LPROC_LL_BRW_WRITE, LPROCFS_CNTR_AVGMINMAX | LPROCFS_TYPE_PAGES,
"brw_write" },
- { LPROC_LL_OSC_READ, LPROCFS_CNTR_AVGMINMAX | LPROCFS_TYPE_BYTES,
- "osc_read" },
- { LPROC_LL_OSC_WRITE, LPROCFS_CNTR_AVGMINMAX | LPROCFS_TYPE_BYTES,
- "osc_write" },
{ LPROC_LL_IOCTL, LPROCFS_TYPE_REGS, "ioctl" },
{ LPROC_LL_OPEN, LPROCFS_TYPE_REGS, "open" },
{ LPROC_LL_RELEASE, LPROCFS_TYPE_REGS, "close" },
diff --git a/drivers/staging/lustre/lustre/llite/namei.c b/drivers/staging/lustre/lustre/llite/namei.c
index 180f35e3afd9..9426759aedc9 100644
--- a/drivers/staging/lustre/lustre/llite/namei.c
+++ b/drivers/staging/lustre/lustre/llite/namei.c
@@ -113,13 +113,18 @@ struct inode *ll_iget(struct super_block *sb, ino_t hash,
if (inode->i_state & I_NEW) {
rc = ll_read_inode2(inode, md);
if (!rc && S_ISREG(inode->i_mode) &&
- !ll_i2info(inode)->lli_clob) {
- CDEBUG(D_INODE, "%s: apply lsm %p to inode "DFID"\n",
- ll_get_fsname(sb, NULL, 0), md->lsm,
- PFID(ll_inode2fid(inode)));
+ !ll_i2info(inode)->lli_clob)
rc = cl_file_inode_init(inode, md);
- }
+
if (rc) {
+ /*
+ * Let's clear directory lsm here, otherwise
+ * make_bad_inode() will reset the inode mode
+ * to regular, then ll_clear_inode will not
+ * be able to clear lsm_md
+ */
+ if (S_ISDIR(inode->i_mode))
+ ll_dir_clear_lsm_md(inode);
make_bad_inode(inode);
unlock_new_inode(inode);
iput(inode);
@@ -132,6 +137,8 @@ struct inode *ll_iget(struct super_block *sb, ino_t hash,
CDEBUG(D_VFSTRACE, "got inode: "DFID"(%p): rc = %d\n",
PFID(&md->body->mbo_fid1), inode, rc);
if (rc) {
+ if (S_ISDIR(inode->i_mode))
+ ll_dir_clear_lsm_md(inode);
iput(inode);
inode = ERR_PTR(rc);
}
@@ -258,7 +265,9 @@ int ll_md_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc,
struct ll_inode_info *lli = ll_i2info(inode);
spin_lock(&lli->lli_lock);
- lli->lli_flags &= ~LLIF_MDS_SIZE_LOCK;
+ LTIME_S(inode->i_mtime) = 0;
+ LTIME_S(inode->i_atime) = 0;
+ LTIME_S(inode->i_ctime) = 0;
spin_unlock(&lli->lli_lock);
}
@@ -287,11 +296,39 @@ int ll_md_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc,
hash = cl_fid_build_ino(&lli->lli_pfid,
ll_need_32bit_api(ll_i2sbi(inode)));
-
- master_inode = ilookup5(inode->i_sb, hash,
- ll_test_inode_by_fid,
- (void *)&lli->lli_pfid);
- if (master_inode && !IS_ERR(master_inode)) {
+ /*
+ * Do not lookup the inode with ilookup5,
+ * otherwise it will cause dead lock,
+ *
+ * 1. Client1 send chmod req to the MDT0, then
+ * on MDT0, it enqueues master and all of its
+ * slaves lock, (mdt_attr_set() ->
+ * mdt_lock_slaves()), after gets master and
+ * stripe0 lock, it will send the enqueue req
+ * (for stripe1) to MDT1, then MDT1 finds the
+ * lock has been granted to client2. Then MDT1
+ * sends blocking ast to client2.
+ *
+ * 2. At the same time, client2 tries to unlink
+ * the striped dir (rm -rf striped_dir), and
+ * during lookup, it will hold the master inode
+ * of the striped directory, whose inode state
+ * is NEW, then tries to revalidate all of its
+ * slaves, (ll_prep_inode()->ll_iget()->
+ * ll_read_inode2()-> ll_update_inode().). And
+ * it will be blocked on the server side because
+ * of 1.
+ *
+ * 3. Then the client get the blocking_ast req,
+ * cancel the lock, but being blocked if using
+ * ->ilookup5()), because master inode state is
+ * NEW.
+ */
+ master_inode = ilookup5_nowait(inode->i_sb,
+ hash,
+ ll_test_inode_by_fid,
+ (void *)&lli->lli_pfid);
+ if (master_inode) {
ll_invalidate_negative_children(master_inode);
iput(master_inode);
}
@@ -535,6 +572,10 @@ static struct dentry *ll_lookup_it(struct inode *parent, struct dentry *dentry,
}
}
+ if (it->it_op & IT_OPEN && it->it_flags & FMODE_WRITE &&
+ dentry->d_sb->s_flags & MS_RDONLY)
+ return ERR_PTR(-EROFS);
+
if (it->it_op & IT_CREAT)
opc = LUSTRE_OPC_CREATE;
else
@@ -801,7 +842,8 @@ static int ll_create_it(struct inode *dir, struct dentry *dentry,
return PTR_ERR(inode);
d_instantiate(dentry, inode);
- return 0;
+
+ return ll_init_security(dentry, inode, dir);
}
void ll_update_times(struct ptlrpc_request *request, struct inode *inode)
@@ -896,6 +938,8 @@ again:
goto err_exit;
d_instantiate(dentry, inode);
+
+ err = ll_init_security(dentry, inode, dir);
err_exit:
if (request)
ptlrpc_req_finished(request);
diff --git a/drivers/staging/lustre/lustre/llite/rw.c b/drivers/staging/lustre/lustre/llite/rw.c
index 76a6836cdf70..f10e092979fe 100644
--- a/drivers/staging/lustre/lustre/llite/rw.c
+++ b/drivers/staging/lustre/lustre/llite/rw.c
@@ -181,90 +181,73 @@ void ll_ras_enter(struct file *f)
spin_unlock(&ras->ras_lock);
}
-static int cl_read_ahead_page(const struct lu_env *env, struct cl_io *io,
- struct cl_page_list *queue, struct cl_page *page,
- struct cl_object *clob, pgoff_t *max_index)
+/**
+ * Initiates read-ahead of a page with given index.
+ *
+ * \retval +ve: page was already uptodate so it will be skipped
+ * from being added;
+ * \retval -ve: page wasn't added to \a queue for error;
+ * \retval 0: page was added into \a queue for read ahead.
+ */
+static int ll_read_ahead_page(const struct lu_env *env, struct cl_io *io,
+ struct cl_page_list *queue, pgoff_t index)
{
- struct page *vmpage = page->cp_vmpage;
+ enum ra_stat which = _NR_RA_STAT; /* keep gcc happy */
+ struct cl_object *clob = io->ci_obj;
+ struct inode *inode = vvp_object_inode(clob);
+ const char *msg = NULL;
+ struct cl_page *page;
struct vvp_page *vpg;
- int rc;
+ struct page *vmpage;
+ int rc = 0;
+
+ vmpage = grab_cache_page_nowait(inode->i_mapping, index);
+ if (!vmpage) {
+ which = RA_STAT_FAILED_GRAB_PAGE;
+ msg = "g_c_p_n failed";
+ rc = -EBUSY;
+ goto out;
+ }
+
+ /* Check if vmpage was truncated or reclaimed */
+ if (vmpage->mapping != inode->i_mapping) {
+ which = RA_STAT_WRONG_GRAB_PAGE;
+ msg = "g_c_p_n returned invalid page";
+ rc = -EBUSY;
+ goto out;
+ }
+
+ page = cl_page_find(env, clob, vmpage->index, vmpage, CPT_CACHEABLE);
+ if (IS_ERR(page)) {
+ which = RA_STAT_FAILED_GRAB_PAGE;
+ msg = "cl_page_find failed";
+ rc = PTR_ERR(page);
+ goto out;
+ }
- rc = 0;
- cl_page_assume(env, io, page);
lu_ref_add(&page->cp_reference, "ra", current);
+ cl_page_assume(env, io, page);
vpg = cl2vvp_page(cl_object_page_slice(clob, page));
if (!vpg->vpg_defer_uptodate && !PageUptodate(vmpage)) {
- CDEBUG(D_READA, "page index %lu, max_index: %lu\n",
- vvp_index(vpg), *max_index);
- if (*max_index == 0 || vvp_index(vpg) > *max_index)
- rc = cl_page_is_under_lock(env, io, page, max_index);
- if (rc == 0) {
- vpg->vpg_defer_uptodate = 1;
- vpg->vpg_ra_used = 0;
- cl_page_list_add(queue, page);
- rc = 1;
- } else {
- cl_page_discard(env, io, page);
- rc = -ENOLCK;
- }
+ vpg->vpg_defer_uptodate = 1;
+ vpg->vpg_ra_used = 0;
+ cl_page_list_add(queue, page);
} else {
/* skip completed pages */
cl_page_unassume(env, io, page);
+ /* This page is already uptodate, returning a positive number
+ * to tell the callers about this
+ */
+ rc = 1;
}
+
lu_ref_del(&page->cp_reference, "ra", current);
cl_page_put(env, page);
- return rc;
-}
-
-/**
- * Initiates read-ahead of a page with given index.
- *
- * \retval +ve: page was added to \a queue.
- *
- * \retval -ENOLCK: there is no extent lock for this part of a file, stop
- * read-ahead.
- *
- * \retval -ve, 0: page wasn't added to \a queue for other reason.
- */
-static int ll_read_ahead_page(const struct lu_env *env, struct cl_io *io,
- struct cl_page_list *queue,
- pgoff_t index, pgoff_t *max_index)
-{
- struct cl_object *clob = io->ci_obj;
- struct inode *inode = vvp_object_inode(clob);
- struct page *vmpage;
- struct cl_page *page;
- enum ra_stat which = _NR_RA_STAT; /* keep gcc happy */
- int rc = 0;
- const char *msg = NULL;
-
- vmpage = grab_cache_page_nowait(inode->i_mapping, index);
+out:
if (vmpage) {
- /* Check if vmpage was truncated or reclaimed */
- if (vmpage->mapping == inode->i_mapping) {
- page = cl_page_find(env, clob, vmpage->index,
- vmpage, CPT_CACHEABLE);
- if (!IS_ERR(page)) {
- rc = cl_read_ahead_page(env, io, queue,
- page, clob, max_index);
- if (rc == -ENOLCK) {
- which = RA_STAT_FAILED_MATCH;
- msg = "lock match failed";
- }
- } else {
- which = RA_STAT_FAILED_GRAB_PAGE;
- msg = "cl_page_find failed";
- }
- } else {
- which = RA_STAT_WRONG_GRAB_PAGE;
- msg = "g_c_p_n returned invalid page";
- }
- if (rc != 1)
+ if (rc)
unlock_page(vmpage);
put_page(vmpage);
- } else {
- which = RA_STAT_FAILED_GRAB_PAGE;
- msg = "g_c_p_n failed";
}
if (msg) {
ll_ra_stats_inc(inode, which);
@@ -379,12 +362,12 @@ static int ll_read_ahead_pages(const struct lu_env *env,
struct cl_io *io, struct cl_page_list *queue,
struct ra_io_arg *ria,
unsigned long *reserved_pages,
- unsigned long *ra_end)
+ pgoff_t *ra_end)
{
+ struct cl_read_ahead ra = { 0 };
int rc, count = 0;
bool stride_ria;
pgoff_t page_idx;
- pgoff_t max_index = 0;
LASSERT(ria);
RIA_DEBUG(ria);
@@ -393,14 +376,23 @@ static int ll_read_ahead_pages(const struct lu_env *env,
for (page_idx = ria->ria_start;
page_idx <= ria->ria_end && *reserved_pages > 0; page_idx++) {
if (ras_inside_ra_window(page_idx, ria)) {
+ if (!ra.cra_end || ra.cra_end < page_idx) {
+ cl_read_ahead_release(env, &ra);
+
+ rc = cl_io_read_ahead(env, io, page_idx, &ra);
+ if (rc < 0)
+ break;
+
+ LASSERTF(ra.cra_end >= page_idx,
+ "object: %p, indcies %lu / %lu\n",
+ io->ci_obj, ra.cra_end, page_idx);
+ }
+
/* If the page is inside the read-ahead window*/
- rc = ll_read_ahead_page(env, io, queue,
- page_idx, &max_index);
- if (rc == 1) {
+ rc = ll_read_ahead_page(env, io, queue, page_idx);
+ if (!rc) {
(*reserved_pages)--;
count++;
- } else if (rc == -ENOLCK) {
- break;
}
} else if (stride_ria) {
/* If it is not in the read-ahead window, and it is
@@ -426,19 +418,21 @@ static int ll_read_ahead_pages(const struct lu_env *env,
}
}
}
+ cl_read_ahead_release(env, &ra);
+
*ra_end = page_idx;
return count;
}
-int ll_readahead(const struct lu_env *env, struct cl_io *io,
- struct cl_page_list *queue, struct ll_readahead_state *ras,
- bool hit)
+static int ll_readahead(const struct lu_env *env, struct cl_io *io,
+ struct cl_page_list *queue,
+ struct ll_readahead_state *ras, bool hit)
{
struct vvp_io *vio = vvp_env_io(env);
struct ll_thread_info *lti = ll_env_info(env);
struct cl_attr *attr = vvp_env_thread_attr(env);
- unsigned long start = 0, end = 0, reserved;
- unsigned long ra_end, len, mlen = 0;
+ unsigned long len, mlen = 0, reserved;
+ pgoff_t ra_end, start = 0, end = 0;
struct inode *inode;
struct ra_io_arg *ria = &lti->lti_ria;
struct cl_object *clob;
@@ -464,30 +458,25 @@ int ll_readahead(const struct lu_env *env, struct cl_io *io,
spin_lock(&ras->ras_lock);
- /* Enlarge the RA window to encompass the full read */
- if (vio->vui_ra_valid &&
- ras->ras_window_start + ras->ras_window_len <
- vio->vui_ra_start + vio->vui_ra_count) {
- ras->ras_window_len = vio->vui_ra_start + vio->vui_ra_count -
- ras->ras_window_start;
- }
+ /**
+ * Note: other thread might rollback the ras_next_readahead,
+ * if it can not get the full size of prepared pages, see the
+ * end of this function. For stride read ahead, it needs to
+ * make sure the offset is no less than ras_stride_offset,
+ * so that stride read ahead can work correctly.
+ */
+ if (stride_io_mode(ras))
+ start = max(ras->ras_next_readahead, ras->ras_stride_offset);
+ else
+ start = ras->ras_next_readahead;
- /* Reserve a part of the read-ahead window that we'll be issuing */
- if (ras->ras_window_len > 0) {
- /*
- * Note: other thread might rollback the ras_next_readahead,
- * if it can not get the full size of prepared pages, see the
- * end of this function. For stride read ahead, it needs to
- * make sure the offset is no less than ras_stride_offset,
- * so that stride read ahead can work correctly.
- */
- if (stride_io_mode(ras))
- start = max(ras->ras_next_readahead,
- ras->ras_stride_offset);
- else
- start = ras->ras_next_readahead;
+ if (ras->ras_window_len > 0)
end = ras->ras_window_start + ras->ras_window_len - 1;
- }
+
+ /* Enlarge the RA window to encompass the full read */
+ if (vio->vui_ra_valid &&
+ end < vio->vui_ra_start + vio->vui_ra_count - 1)
+ end = vio->vui_ra_start + vio->vui_ra_count - 1;
if (end != 0) {
unsigned long rpc_boundary;
@@ -576,8 +565,8 @@ int ll_readahead(const struct lu_env *env, struct cl_io *io,
* if the region we failed to issue read-ahead on is still ahead
* of the app and behind the next index to start read-ahead from
*/
- CDEBUG(D_READA, "ra_end %lu end %lu stride end %lu\n",
- ra_end, end, ria->ria_end);
+ CDEBUG(D_READA, "ra_end = %lu end = %lu stride end = %lu pages = %d\n",
+ ra_end, end, ria->ria_end, ret);
if (ra_end != end + 1) {
ll_ra_stats_inc(inode, RA_STAT_FAILED_REACH_END);
@@ -609,7 +598,7 @@ static void ras_reset(struct inode *inode, struct ll_readahead_state *ras,
ras->ras_consecutive_pages = 0;
ras->ras_window_len = 0;
ras_set_start(inode, ras, index);
- ras->ras_next_readahead = max(ras->ras_window_start, index);
+ ras->ras_next_readahead = max(ras->ras_window_start, index + 1);
RAS_CDEBUG(ras);
}
@@ -738,12 +727,13 @@ static void ras_increase_window(struct inode *inode,
ra->ra_max_pages_per_file);
}
-void ras_update(struct ll_sb_info *sbi, struct inode *inode,
- struct ll_readahead_state *ras, unsigned long index,
- unsigned hit)
+static void ras_update(struct ll_sb_info *sbi, struct inode *inode,
+ struct ll_readahead_state *ras, unsigned long index,
+ enum ras_update_flags flags)
{
struct ll_ra_info *ra = &sbi->ll_ra_info;
int zero = 0, stride_detect = 0, ra_miss = 0;
+ bool hit = flags & LL_RAS_HIT;
spin_lock(&ras->ras_lock);
@@ -773,7 +763,7 @@ void ras_update(struct ll_sb_info *sbi, struct inode *inode,
* to for subsequent IO. The mmap case does not increment
* ras_requests and thus can never trigger this behavior.
*/
- if (ras->ras_requests == 2 && !ras->ras_request_index) {
+ if (ras->ras_requests >= 2 && !ras->ras_request_index) {
__u64 kms_pages;
kms_pages = (i_size_read(inode) + PAGE_SIZE - 1) >>
@@ -785,8 +775,7 @@ void ras_update(struct ll_sb_info *sbi, struct inode *inode,
if (kms_pages &&
kms_pages <= ra->ra_max_read_ahead_whole_pages) {
ras->ras_window_start = 0;
- ras->ras_last_readpage = 0;
- ras->ras_next_readahead = 0;
+ ras->ras_next_readahead = index + 1;
ras->ras_window_len = min(ra->ra_max_pages_per_file,
ra->ra_max_read_ahead_whole_pages);
goto out_unlock;
@@ -816,13 +805,20 @@ void ras_update(struct ll_sb_info *sbi, struct inode *inode,
if (ra_miss) {
if (index_in_stride_window(ras, index) &&
stride_io_mode(ras)) {
- /*If stride-RA hit cache miss, the stride dector
- *will not be reset to avoid the overhead of
- *redetecting read-ahead mode
- */
if (index != ras->ras_last_readpage + 1)
ras->ras_consecutive_pages = 0;
ras_reset(inode, ras, index);
+
+ /* If stride-RA hit cache miss, the stride
+ * detector will not be reset to avoid the
+ * overhead of redetecting read-ahead mode,
+ * but on the condition that the stride window
+ * is still intersect with normal sequential
+ * read-ahead window.
+ */
+ if (ras->ras_window_start <
+ ras->ras_stride_offset)
+ ras_stride_reset(ras);
RAS_CDEBUG(ras);
} else {
/* Reset both stride window and normal RA
@@ -867,8 +863,13 @@ void ras_update(struct ll_sb_info *sbi, struct inode *inode,
/* Trigger RA in the mmap case where ras_consecutive_requests
* is not incremented and thus can't be used to trigger RA
*/
- if (!ras->ras_window_len && ras->ras_consecutive_pages == 4) {
- ras->ras_window_len = RAS_INCREASE_STEP(inode);
+ if (ras->ras_consecutive_pages >= 4 && flags & LL_RAS_MMAP) {
+ ras_increase_window(inode, ras, ra);
+ /*
+ * reset consecutive pages so that the readahead window can
+ * grow gradually.
+ */
+ ras->ras_consecutive_pages = 0;
goto out_unlock;
}
@@ -903,17 +904,17 @@ int ll_writepage(struct page *vmpage, struct writeback_control *wbc)
struct cl_io *io;
struct cl_page *page;
struct cl_object *clob;
- struct cl_env_nest nest;
bool redirtied = false;
bool unlocked = false;
int result;
+ int refcheck;
LASSERT(PageLocked(vmpage));
LASSERT(!PageWriteback(vmpage));
LASSERT(ll_i2dtexp(inode));
- env = cl_env_nested_get(&nest);
+ env = cl_env_get(&refcheck);
if (IS_ERR(env)) {
result = PTR_ERR(env);
goto out;
@@ -978,7 +979,7 @@ int ll_writepage(struct page *vmpage, struct writeback_control *wbc)
}
}
- cl_env_nested_put(&nest, env);
+ cl_env_put(env, &refcheck);
goto out;
out:
@@ -1088,6 +1089,63 @@ void ll_cl_remove(struct file *file, const struct lu_env *env)
write_unlock(&fd->fd_lock);
}
+static int ll_io_read_page(const struct lu_env *env, struct cl_io *io,
+ struct cl_page *page)
+{
+ struct inode *inode = vvp_object_inode(page->cp_obj);
+ struct ll_file_data *fd = vvp_env_io(env)->vui_fd;
+ struct ll_readahead_state *ras = &fd->fd_ras;
+ struct cl_2queue *queue = &io->ci_queue;
+ struct ll_sb_info *sbi = ll_i2sbi(inode);
+ struct vvp_page *vpg;
+ int rc = 0;
+
+ vpg = cl2vvp_page(cl_object_page_slice(page->cp_obj, page));
+ if (sbi->ll_ra_info.ra_max_pages_per_file > 0 &&
+ sbi->ll_ra_info.ra_max_pages > 0) {
+ struct vvp_io *vio = vvp_env_io(env);
+ enum ras_update_flags flags = 0;
+
+ if (vpg->vpg_defer_uptodate)
+ flags |= LL_RAS_HIT;
+ if (!vio->vui_ra_valid)
+ flags |= LL_RAS_MMAP;
+ ras_update(sbi, inode, ras, vvp_index(vpg), flags);
+ }
+
+ if (vpg->vpg_defer_uptodate) {
+ vpg->vpg_ra_used = 1;
+ cl_page_export(env, page, 1);
+ }
+
+ cl_2queue_init(queue);
+ /*
+ * Add page into the queue even when it is marked uptodate above.
+ * this will unlock it automatically as part of cl_page_list_disown().
+ */
+ cl_page_list_add(&queue->c2_qin, page);
+ if (sbi->ll_ra_info.ra_max_pages_per_file > 0 &&
+ sbi->ll_ra_info.ra_max_pages > 0) {
+ int rc2;
+
+ rc2 = ll_readahead(env, io, &queue->c2_qin, ras,
+ vpg->vpg_defer_uptodate);
+ CDEBUG(D_READA, DFID "%d pages read ahead at %lu\n",
+ PFID(ll_inode2fid(inode)), rc2, vvp_index(vpg));
+ }
+
+ if (queue->c2_qin.pl_nr > 0)
+ rc = cl_io_submit_rw(env, io, CRT_READ, queue);
+
+ /*
+ * Unlock unsent pages in case of error.
+ */
+ cl_page_list_disown(env, io, &queue->c2_qin);
+ cl_2queue_fini(env, queue);
+
+ return rc;
+}
+
int ll_readpage(struct file *file, struct page *vmpage)
{
struct cl_object *clob = ll_i2info(file_inode(file))->lli_clob;
@@ -1111,7 +1169,7 @@ int ll_readpage(struct file *file, struct page *vmpage)
LASSERT(page->cp_type == CPT_CACHEABLE);
if (likely(!PageUptodate(vmpage))) {
cl_page_assume(env, io, page);
- result = cl_io_read_page(env, io, page);
+ result = ll_io_read_page(env, io, page);
} else {
/* Page from a non-object file. */
unlock_page(vmpage);
diff --git a/drivers/staging/lustre/lustre/llite/rw26.c b/drivers/staging/lustre/lustre/llite/rw26.c
index 26f3a37873a7..21e06e5b514e 100644
--- a/drivers/staging/lustre/lustre/llite/rw26.c
+++ b/drivers/staging/lustre/lustre/llite/rw26.c
@@ -71,8 +71,6 @@ static void ll_invalidatepage(struct page *vmpage, unsigned int offset,
struct cl_page *page;
struct cl_object *obj;
- int refcheck;
-
LASSERT(PageLocked(vmpage));
LASSERT(!PageWriteback(vmpage));
@@ -82,28 +80,27 @@ static void ll_invalidatepage(struct page *vmpage, unsigned int offset,
* happening with locked page too
*/
if (offset == 0 && length == PAGE_SIZE) {
- env = cl_env_get(&refcheck);
- if (!IS_ERR(env)) {
- inode = vmpage->mapping->host;
- obj = ll_i2info(inode)->lli_clob;
- if (obj) {
- page = cl_vmpage_page(vmpage, obj);
- if (page) {
- cl_page_delete(env, page);
- cl_page_put(env, page);
- }
- } else {
- LASSERT(vmpage->private == 0);
+ /* See the comment in ll_releasepage() */
+ env = cl_env_percpu_get();
+ LASSERT(!IS_ERR(env));
+ inode = vmpage->mapping->host;
+ obj = ll_i2info(inode)->lli_clob;
+ if (obj) {
+ page = cl_vmpage_page(vmpage, obj);
+ if (page) {
+ cl_page_delete(env, page);
+ cl_page_put(env, page);
}
- cl_env_put(env, &refcheck);
+ } else {
+ LASSERT(vmpage->private == 0);
}
+ cl_env_percpu_put(env);
}
}
static int ll_releasepage(struct page *vmpage, gfp_t gfp_mask)
{
struct lu_env *env;
- void *cookie;
struct cl_object *obj;
struct cl_page *page;
struct address_space *mapping;
@@ -129,7 +126,6 @@ static int ll_releasepage(struct page *vmpage, gfp_t gfp_mask)
if (!page)
return 1;
- cookie = cl_env_reenter();
env = cl_env_percpu_get();
LASSERT(!IS_ERR(env));
@@ -155,7 +151,6 @@ static int ll_releasepage(struct page *vmpage, gfp_t gfp_mask)
cl_page_put(env, page);
cl_env_percpu_put(env);
- cl_env_reexit(cookie);
return result;
}
@@ -340,19 +335,15 @@ static ssize_t ll_direct_IO_26_seg(const struct lu_env *env, struct cl_io *io,
PAGE_SIZE) & ~(DT_MAX_BRW_SIZE - 1))
static ssize_t ll_direct_IO_26(struct kiocb *iocb, struct iov_iter *iter)
{
- struct lu_env *env;
+ struct ll_cl_context *lcc;
+ const struct lu_env *env;
struct cl_io *io;
struct file *file = iocb->ki_filp;
struct inode *inode = file->f_mapping->host;
loff_t file_offset = iocb->ki_pos;
ssize_t count = iov_iter_count(iter);
ssize_t tot_bytes = 0, result = 0;
- struct ll_inode_info *lli = ll_i2info(inode);
long size = MAX_DIO_SIZE;
- int refcheck;
-
- if (!lli->lli_has_smd)
- return -EBADF;
/* FIXME: io smaller than PAGE_SIZE is broken on ia64 ??? */
if ((file_offset & ~PAGE_MASK) || (count & ~PAGE_MASK))
@@ -367,9 +358,13 @@ static ssize_t ll_direct_IO_26(struct kiocb *iocb, struct iov_iter *iter)
if (iov_iter_alignment(iter) & ~PAGE_MASK)
return -EINVAL;
- env = cl_env_get(&refcheck);
+ lcc = ll_cl_find(file);
+ if (!lcc)
+ return -EIO;
+
+ env = lcc->lcc_env;
LASSERT(!IS_ERR(env));
- io = vvp_env_io(env)->vui_cl.cis_io;
+ io = lcc->lcc_io;
LASSERT(io);
while (iov_iter_count(iter)) {
@@ -426,7 +421,6 @@ out:
vio->u.write.vui_written += tot_bytes;
}
- cl_env_put(env, &refcheck);
return tot_bytes ? tot_bytes : result;
}
@@ -466,13 +460,13 @@ static int ll_prepare_partial_page(const struct lu_env *env, struct cl_io *io,
}
static int ll_write_begin(struct file *file, struct address_space *mapping,
- loff_t pos, unsigned len, unsigned flags,
+ loff_t pos, unsigned int len, unsigned int flags,
struct page **pagep, void **fsdata)
{
struct ll_cl_context *lcc;
- const struct lu_env *env;
+ const struct lu_env *env = NULL;
struct cl_io *io;
- struct cl_page *page;
+ struct cl_page *page = NULL;
struct cl_object *clob = ll_i2info(mapping->host)->lli_clob;
pgoff_t index = pos >> PAGE_SHIFT;
struct page *vmpage = NULL;
@@ -484,6 +478,7 @@ static int ll_write_begin(struct file *file, struct address_space *mapping,
lcc = ll_cl_find(file);
if (!lcc) {
+ io = NULL;
result = -EIO;
goto out;
}
@@ -560,6 +555,12 @@ out:
unlock_page(vmpage);
put_page(vmpage);
}
+ if (!IS_ERR_OR_NULL(page)) {
+ lu_ref_del(&page->cp_reference, "cl_io", io);
+ cl_page_put(env, page);
+ }
+ if (io)
+ io->ci_result = result;
} else {
*pagep = vmpage;
*fsdata = lcc;
@@ -576,7 +577,7 @@ static int ll_write_end(struct file *file, struct address_space *mapping,
struct cl_io *io;
struct vvp_io *vio;
struct cl_page *page;
- unsigned from = pos & (PAGE_SIZE - 1);
+ unsigned int from = pos & (PAGE_SIZE - 1);
bool unplug = false;
int result = 0;
@@ -629,6 +630,8 @@ static int ll_write_end(struct file *file, struct address_space *mapping,
file->f_flags & O_SYNC || IS_SYNC(file_inode(file)))
result = vvp_io_write_commit(env, io);
+ if (result < 0)
+ io->ci_result = result;
return result >= 0 ? copied : result;
}
diff --git a/drivers/staging/lustre/lustre/llite/statahead.c b/drivers/staging/lustre/lustre/llite/statahead.c
index 0677513476ec..4769a2230ae1 100644
--- a/drivers/staging/lustre/lustre/llite/statahead.c
+++ b/drivers/staging/lustre/lustre/llite/statahead.c
@@ -659,8 +659,8 @@ static int ll_statahead_interpret(struct ptlrpc_request *req,
struct ll_inode_info *lli = ll_i2info(dir);
struct ll_statahead_info *sai = lli->lli_sai;
struct sa_entry *entry = (struct sa_entry *)minfo->mi_cbdata;
+ wait_queue_head_t *waitq = NULL;
__u64 handle = 0;
- bool wakeup;
if (it_disposition(it, DISP_LOOKUP_NEG))
rc = -ENOENT;
@@ -693,7 +693,8 @@ static int ll_statahead_interpret(struct ptlrpc_request *req,
spin_lock(&lli->lli_sa_lock);
if (rc) {
- wakeup = __sa_make_ready(sai, entry, rc);
+ if (__sa_make_ready(sai, entry, rc))
+ waitq = &sai->sai_waitq;
} else {
entry->se_minfo = minfo;
entry->se_req = ptlrpc_request_addref(req);
@@ -704,13 +705,15 @@ static int ll_statahead_interpret(struct ptlrpc_request *req,
* with parent's lock held, for example: unlink.
*/
entry->se_handle = handle;
- wakeup = !sa_has_callback(sai);
+ if (!sa_has_callback(sai))
+ waitq = &sai->sai_thread.t_ctl_waitq;
+
list_add_tail(&entry->se_list, &sai->sai_interim_entries);
}
sai->sai_replied++;
- if (wakeup)
- wake_up(&sai->sai_thread.t_ctl_waitq);
+ if (waitq)
+ wake_up(waitq);
spin_unlock(&lli->lli_sa_lock);
return rc;
@@ -1397,10 +1400,10 @@ static int revalidate_statahead_dentry(struct inode *dir,
struct dentry **dentryp,
bool unplug)
{
+ struct ll_inode_info *lli = ll_i2info(dir);
struct sa_entry *entry = NULL;
struct l_wait_info lwi = { 0 };
struct ll_dentry_data *ldd;
- struct ll_inode_info *lli;
int rc = 0;
if ((*dentryp)->d_name.name[0] == '.') {
@@ -1446,7 +1449,9 @@ static int revalidate_statahead_dentry(struct inode *dir,
sa_handle_callback(sai);
if (!sa_ready(entry)) {
+ spin_lock(&lli->lli_sa_lock);
sai->sai_index_wait = entry->se_index;
+ spin_unlock(&lli->lli_sa_lock);
lwi = LWI_TIMEOUT_INTR(cfs_time_seconds(30), NULL,
LWI_ON_SIGNAL_NOOP, NULL);
rc = l_wait_event(sai->sai_waitq, sa_ready(entry), &lwi);
@@ -1475,6 +1480,7 @@ static int revalidate_statahead_dentry(struct inode *dir,
alias = ll_splice_alias(inode, *dentryp);
if (IS_ERR(alias)) {
+ ll_intent_release(&it);
rc = PTR_ERR(alias);
goto out_unplug;
}
@@ -1493,6 +1499,7 @@ static int revalidate_statahead_dentry(struct inode *dir,
*dentryp,
PFID(ll_inode2fid((*dentryp)->d_inode)),
PFID(ll_inode2fid(inode)));
+ ll_intent_release(&it);
rc = -ESTALE;
goto out_unplug;
}
@@ -1512,7 +1519,6 @@ out_unplug:
* dentry_may_statahead().
*/
ldd = ll_d2d(*dentryp);
- lli = ll_i2info(dir);
/* ldd can be NULL if llite lookup failed. */
if (ldd)
ldd->lld_sa_generation = lli->lli_sa_generation;
diff --git a/drivers/staging/lustre/lustre/llite/vvp_dev.c b/drivers/staging/lustre/lustre/llite/vvp_dev.c
index 8aa8ecc09a48..12c129f7e4ad 100644
--- a/drivers/staging/lustre/lustre/llite/vvp_dev.c
+++ b/drivers/staging/lustre/lustre/llite/vvp_dev.c
@@ -55,7 +55,6 @@
static struct kmem_cache *ll_thread_kmem;
struct kmem_cache *vvp_lock_kmem;
struct kmem_cache *vvp_object_kmem;
-struct kmem_cache *vvp_req_kmem;
static struct kmem_cache *vvp_session_kmem;
static struct kmem_cache *vvp_thread_kmem;
@@ -76,11 +75,6 @@ static struct lu_kmem_descr vvp_caches[] = {
.ckd_size = sizeof(struct vvp_object),
},
{
- .ckd_cache = &vvp_req_kmem,
- .ckd_name = "vvp_req_kmem",
- .ckd_size = sizeof(struct vvp_req),
- },
- {
.ckd_cache = &vvp_session_kmem,
.ckd_name = "vvp_session_kmem",
.ckd_size = sizeof(struct vvp_session)
@@ -177,10 +171,6 @@ static const struct lu_device_operations vvp_lu_ops = {
.ldo_object_alloc = vvp_object_alloc
};
-static const struct cl_device_operations vvp_cl_ops = {
- .cdo_req_init = vvp_req_init
-};
-
static struct lu_device *vvp_device_free(const struct lu_env *env,
struct lu_device *d)
{
@@ -213,7 +203,6 @@ static struct lu_device *vvp_device_alloc(const struct lu_env *env,
lud = &vdv->vdv_cl.cd_lu_dev;
cl_device_init(&vdv->vdv_cl, t);
vvp2lu_dev(vdv)->ld_ops = &vvp_lu_ops;
- vdv->vdv_cl.cd_ops = &vvp_cl_ops;
site = kzalloc(sizeof(*site), GFP_NOFS);
if (site) {
@@ -332,7 +321,6 @@ int cl_sb_init(struct super_block *sb)
cl = cl_type_setup(env, NULL, &vvp_device_type,
sbi->ll_dt_exp->exp_obd->obd_lu_dev);
if (!IS_ERR(cl)) {
- cl2vvp_dev(cl)->vdv_sb = sb;
sbi->ll_cl = cl;
sbi->ll_site = cl2lu_dev(cl)->ld_site;
}
@@ -521,11 +509,10 @@ static void vvp_pgcache_page_show(const struct lu_env *env,
vpg = cl2vvp_page(cl_page_at(page, &vvp_device_type));
vmpage = vpg->vpg_page;
- seq_printf(seq, " %5i | %p %p %s %s %s %s | %p "DFID"(%p) %lu %u [",
+ seq_printf(seq, " %5i | %p %p %s %s %s | %p " DFID "(%p) %lu %u [",
0 /* gen */,
vpg, page,
"none",
- vpg->vpg_write_queued ? "wq" : "- ",
vpg->vpg_defer_uptodate ? "du" : "- ",
PageWriteback(vmpage) ? "wb" : "-",
vmpage, PFID(ll_inode2fid(vmpage->mapping->host)),
diff --git a/drivers/staging/lustre/lustre/llite/vvp_internal.h b/drivers/staging/lustre/lustre/llite/vvp_internal.h
index 4464ad258387..c60d0414ac25 100644
--- a/drivers/staging/lustre/lustre/llite/vvp_internal.h
+++ b/drivers/staging/lustre/lustre/llite/vvp_internal.h
@@ -42,9 +42,7 @@
enum obd_notify_event;
struct inode;
-struct lov_stripe_md;
struct lustre_md;
-struct obd_capa;
struct obd_device;
struct obd_export;
struct page;
@@ -122,7 +120,6 @@ extern struct lu_context_key vvp_thread_key;
extern struct kmem_cache *vvp_lock_kmem;
extern struct kmem_cache *vvp_object_kmem;
-extern struct kmem_cache *vvp_req_kmem;
struct vvp_thread_info {
struct cl_lock vti_lock;
@@ -195,14 +192,6 @@ struct vvp_object {
struct inode *vob_inode;
/**
- * A list of dirty pages pending IO in the cache. Used by
- * SOM. Protected by ll_inode_info::lli_lock.
- *
- * \see vvp_page::vpg_pending_linkage
- */
- struct list_head vob_pending_list;
-
- /**
* Number of transient pages. This is no longer protected by i_sem,
* and needs to be atomic. This is not actually used for anything,
* and can probably be removed.
@@ -235,15 +224,7 @@ struct vvp_object {
struct vvp_page {
struct cl_page_slice vpg_cl;
unsigned int vpg_defer_uptodate:1,
- vpg_ra_used:1,
- vpg_write_queued:1;
- /**
- * Non-empty iff this page is already counted in
- * vvp_object::vob_pending_list. This list is only used as a flag,
- * that is, never iterated through, only checked for list_empty(), but
- * having a list is useful for debugging.
- */
- struct list_head vpg_pending_linkage;
+ vpg_ra_used:1;
/** VM page */
struct page *vpg_page;
};
@@ -260,7 +241,6 @@ static inline pgoff_t vvp_index(struct vvp_page *vvp)
struct vvp_device {
struct cl_device vdv_cl;
- struct super_block *vdv_sb;
struct cl_device *vdv_next;
};
@@ -268,10 +248,6 @@ struct vvp_lock {
struct cl_lock_slice vlk_cl;
};
-struct vvp_req {
- struct cl_req_slice vrq_cl;
-};
-
void *ccc_key_init(const struct lu_context *ctx,
struct lu_context_key *key);
void ccc_key_fini(const struct lu_context *ctx,
@@ -325,21 +301,8 @@ static inline struct vvp_lock *cl2vvp_lock(const struct cl_lock_slice *slice)
# define CLOBINVRNT(env, clob, expr) \
((void)sizeof(env), (void)sizeof(clob), (void)sizeof(!!(expr)))
-/**
- * New interfaces to get and put lov_stripe_md from lov layer. This violates
- * layering because lov_stripe_md is supposed to be a private data in lov.
- *
- * NB: If you find you have to use these interfaces for your new code, please
- * think about it again. These interfaces may be removed in the future for
- * better layering.
- */
-struct lov_stripe_md *lov_lsm_get(struct cl_object *clobj);
-void lov_lsm_put(struct cl_object *clobj, struct lov_stripe_md *lsm);
int lov_read_and_clear_async_rc(struct cl_object *clob);
-struct lov_stripe_md *ccc_inode_lsm_get(struct inode *inode);
-void ccc_inode_lsm_put(struct inode *inode, struct lov_stripe_md *lsm);
-
int vvp_io_init(const struct lu_env *env, struct cl_object *obj,
struct cl_io *io);
int vvp_io_write_commit(const struct lu_env *env, struct cl_io *io);
@@ -347,8 +310,6 @@ int vvp_lock_init(const struct lu_env *env, struct cl_object *obj,
struct cl_lock *lock, const struct cl_io *io);
int vvp_page_init(const struct lu_env *env, struct cl_object *obj,
struct cl_page *page, pgoff_t index);
-int vvp_req_init(const struct lu_env *env, struct cl_device *dev,
- struct cl_req *req);
struct lu_object *vvp_object_alloc(const struct lu_env *env,
const struct lu_object_header *hdr,
struct lu_device *dev);
diff --git a/drivers/staging/lustre/lustre/llite/vvp_io.c b/drivers/staging/lustre/lustre/llite/vvp_io.c
index 2b7f182a15e2..0b6d388d8aa4 100644
--- a/drivers/staging/lustre/lustre/llite/vvp_io.c
+++ b/drivers/staging/lustre/lustre/llite/vvp_io.c
@@ -72,9 +72,10 @@ static bool can_populate_pages(const struct lu_env *env, struct cl_io *io,
/* don't need lock here to check lli_layout_gen as we have held
* extent lock and GROUP lock has to hold to swap layout
*/
- if (ll_layout_version_get(lli) != vio->vui_layout_gen) {
+ if (ll_layout_version_get(lli) != vio->vui_layout_gen ||
+ OBD_FAIL_CHECK_RESET(OBD_FAIL_LLITE_LOST_LAYOUT, 0)) {
io->ci_need_restart = 1;
- /* this will return application a short read/write */
+ /* this will cause a short read/write */
io->ci_continue = 0;
rc = false;
}
@@ -328,8 +329,8 @@ static void vvp_io_fini(const struct lu_env *env, const struct cl_io_slice *ios)
vio->vui_layout_gen, gen);
/* today successful restore is the only possible case */
/* restore was done, clear restoring state */
- ll_i2info(vvp_object_inode(obj))->lli_flags &=
- ~LLIF_FILE_RESTORING;
+ clear_bit(LLIF_FILE_RESTORING,
+ &ll_i2info(inode)->lli_flags);
}
}
}
@@ -369,7 +370,7 @@ static int vvp_mmap_locks(const struct lu_env *env,
struct mm_struct *mm = current->mm;
struct vm_area_struct *vma;
struct cl_lock_descr *descr = &cti->vti_descr;
- ldlm_policy_data_t policy;
+ union ldlm_policy_data policy;
unsigned long addr;
ssize_t count;
int result = 0;
@@ -450,7 +451,8 @@ static void vvp_io_advance(const struct lu_env *env,
struct vvp_io *vio = cl2vvp_io(env, ios);
CLOBINVRNT(env, obj, vvp_object_invariant(obj));
- iov_iter_reexpand(vio->vui_iter, vio->vui_tot_count -= nob);
+ vio->vui_tot_count -= nob;
+ iov_iter_reexpand(vio->vui_iter, vio->vui_tot_count);
}
static void vvp_io_update_iov(const struct lu_env *env,
@@ -551,9 +553,16 @@ static int vvp_io_setattr_lock(const struct lu_env *env,
if (new_size == 0)
enqflags = CEF_DISCARD_DATA;
} else {
- if ((io->u.ci_setattr.sa_attr.lvb_mtime >=
- io->u.ci_setattr.sa_attr.lvb_ctime) ||
- (io->u.ci_setattr.sa_attr.lvb_atime >=
+ unsigned int valid = io->u.ci_setattr.sa_valid;
+
+ if (!(valid & TIMES_SET_FLAGS))
+ return 0;
+
+ if ((!(valid & ATTR_MTIME) ||
+ io->u.ci_setattr.sa_attr.lvb_mtime >=
+ io->u.ci_setattr.sa_attr.lvb_ctime) &&
+ (!(valid & ATTR_ATIME) ||
+ io->u.ci_setattr.sa_attr.lvb_atime >=
io->u.ci_setattr.sa_attr.lvb_ctime))
return 0;
new_size = 0;
@@ -580,14 +589,6 @@ static int vvp_do_vmtruncate(struct inode *inode, size_t size)
return result;
}
-static int vvp_io_setattr_trunc(const struct lu_env *env,
- const struct cl_io_slice *ios,
- struct inode *inode, loff_t size)
-{
- inode_dio_wait(inode);
- return 0;
-}
-
static int vvp_io_setattr_time(const struct lu_env *env,
const struct cl_io_slice *ios)
{
@@ -618,15 +619,20 @@ static int vvp_io_setattr_start(const struct lu_env *env,
{
struct cl_io *io = ios->cis_io;
struct inode *inode = vvp_object_inode(io->ci_obj);
- int result = 0;
+ struct ll_inode_info *lli = ll_i2info(inode);
- inode_lock(inode);
- if (cl_io_is_trunc(io))
- result = vvp_io_setattr_trunc(env, ios, inode,
- io->u.ci_setattr.sa_attr.lvb_size);
- if (result == 0)
- result = vvp_io_setattr_time(env, ios);
- return result;
+ if (cl_io_is_trunc(io)) {
+ down_write(&lli->lli_trunc_sem);
+ inode_lock(inode);
+ inode_dio_wait(inode);
+ } else {
+ inode_lock(inode);
+ }
+
+ if (io->u.ci_setattr.sa_valid & TIMES_SET_FLAGS)
+ return vvp_io_setattr_time(env, ios);
+
+ return 0;
}
static void vvp_io_setattr_end(const struct lu_env *env,
@@ -634,14 +640,18 @@ static void vvp_io_setattr_end(const struct lu_env *env,
{
struct cl_io *io = ios->cis_io;
struct inode *inode = vvp_object_inode(io->ci_obj);
+ struct ll_inode_info *lli = ll_i2info(inode);
- if (cl_io_is_trunc(io))
+ if (cl_io_is_trunc(io)) {
/* Truncate in memory pages - they must be clean pages
* because osc has already notified to destroy osc_extents.
*/
vvp_do_vmtruncate(inode, io->u.ci_setattr.sa_attr.lvb_size);
-
- inode_unlock(inode);
+ inode_unlock(inode);
+ up_write(&lli->lli_trunc_sem);
+ } else {
+ inode_unlock(inode);
+ }
}
static void vvp_io_setattr_fini(const struct lu_env *env,
@@ -657,6 +667,7 @@ static int vvp_io_read_start(const struct lu_env *env,
struct cl_io *io = ios->cis_io;
struct cl_object *obj = io->ci_obj;
struct inode *inode = vvp_object_inode(obj);
+ struct ll_inode_info *lli = ll_i2info(inode);
struct file *file = vio->vui_fd->fd_file;
int result;
@@ -669,6 +680,8 @@ static int vvp_io_read_start(const struct lu_env *env,
CDEBUG(D_VFSTRACE, "read: -> [%lli, %lli)\n", pos, pos + cnt);
+ down_read(&lli->lli_trunc_sem);
+
if (!can_populate_pages(env, io, inode))
return 0;
@@ -770,16 +783,11 @@ static int vvp_io_commit_sync(const struct lu_env *env, struct cl_io *io,
static void write_commit_callback(const struct lu_env *env, struct cl_io *io,
struct cl_page *page)
{
- struct vvp_page *vpg;
struct page *vmpage = page->cp_vmpage;
- struct cl_object *clob = cl_io_top(io)->ci_obj;
SetPageUptodate(vmpage);
set_page_dirty(vmpage);
- vpg = cl2vvp_page(cl_object_page_slice(clob, page));
- vvp_write_pending(cl2vvp(clob), vpg);
-
cl_page_disown(env, io, page);
/* held in ll_cl_init() */
@@ -899,10 +907,13 @@ static int vvp_io_write_start(const struct lu_env *env,
struct cl_io *io = ios->cis_io;
struct cl_object *obj = io->ci_obj;
struct inode *inode = vvp_object_inode(obj);
+ struct ll_inode_info *lli = ll_i2info(inode);
ssize_t result = 0;
loff_t pos = io->u.ci_wr.wr.crw_pos;
size_t cnt = io->u.ci_wr.wr.crw_count;
+ down_read(&lli->lli_trunc_sem);
+
if (!can_populate_pages(env, io, inode))
return 0;
@@ -921,6 +932,20 @@ static int vvp_io_write_start(const struct lu_env *env,
CDEBUG(D_VFSTRACE, "write: [%lli, %lli)\n", pos, pos + (long long)cnt);
+ /*
+ * The maximum Lustre file size is variable, based on the OST maximum
+ * object size and number of stripes. This needs another check in
+ * addition to the VFS checks earlier.
+ */
+ if (pos + cnt > ll_file_maxbytes(inode)) {
+ CDEBUG(D_INODE,
+ "%s: file " DFID " offset %llu > maxbytes %llu\n",
+ ll_get_fsname(inode->i_sb, NULL, 0),
+ PFID(ll_inode2fid(inode)), pos + cnt,
+ ll_file_maxbytes(inode));
+ return -EFBIG;
+ }
+
if (!vio->vui_iter) {
/* from a temp io in ll_cl_init(). */
result = 0;
@@ -957,11 +982,7 @@ static int vvp_io_write_start(const struct lu_env *env,
}
}
if (result > 0) {
- struct ll_inode_info *lli = ll_i2info(inode);
-
- spin_lock(&lli->lli_lock);
- lli->lli_flags |= LLIF_DATA_MODIFIED;
- spin_unlock(&lli->lli_lock);
+ set_bit(LLIF_DATA_MODIFIED, &(ll_i2info(inode))->lli_flags);
if (result < cnt)
io->ci_continue = 0;
@@ -972,6 +993,15 @@ static int vvp_io_write_start(const struct lu_env *env,
return result;
}
+static void vvp_io_rw_end(const struct lu_env *env,
+ const struct cl_io_slice *ios)
+{
+ struct inode *inode = vvp_object_inode(ios->cis_obj);
+ struct ll_inode_info *lli = ll_i2info(inode);
+
+ up_read(&lli->lli_trunc_sem);
+}
+
static int vvp_io_kernel_fault(struct vvp_fault_io *cfio)
{
struct vm_fault *vmf = cfio->ft_vmf;
@@ -1014,13 +1044,7 @@ static int vvp_io_kernel_fault(struct vvp_fault_io *cfio)
static void mkwrite_commit_callback(const struct lu_env *env, struct cl_io *io,
struct cl_page *page)
{
- struct vvp_page *vpg;
- struct cl_object *clob = cl_io_top(io)->ci_obj;
-
set_page_dirty(page->cp_vmpage);
-
- vpg = cl2vvp_page(cl_object_page_slice(clob, page));
- vvp_write_pending(cl2vvp(clob), vpg);
}
static int vvp_io_fault_start(const struct lu_env *env,
@@ -1030,6 +1054,7 @@ static int vvp_io_fault_start(const struct lu_env *env,
struct cl_io *io = ios->cis_io;
struct cl_object *obj = io->ci_obj;
struct inode *inode = vvp_object_inode(obj);
+ struct ll_inode_info *lli = ll_i2info(inode);
struct cl_fault_io *fio = &io->u.ci_fault;
struct vvp_fault_io *cfio = &vio->u.fault;
loff_t offset;
@@ -1039,11 +1064,7 @@ static int vvp_io_fault_start(const struct lu_env *env,
loff_t size;
pgoff_t last_index;
- if (fio->ft_executable &&
- inode->i_mtime.tv_sec != vio->u.fault.ft_mtime)
- CWARN("binary "DFID
- " changed while waiting for the page fault lock\n",
- PFID(lu_object_fid(&obj->co_lu)));
+ down_read(&lli->lli_trunc_sem);
/* offset of the last byte on the page */
offset = cl_offset(obj, fio->ft_index + 1) - 1;
@@ -1192,6 +1213,17 @@ out:
return result;
}
+static void vvp_io_fault_end(const struct lu_env *env,
+ const struct cl_io_slice *ios)
+{
+ struct inode *inode = vvp_object_inode(ios->cis_obj);
+ struct ll_inode_info *lli = ll_i2info(inode);
+
+ CLOBINVRNT(env, ios->cis_io->ci_obj,
+ vvp_object_invariant(ios->cis_io->ci_obj));
+ up_read(&lli->lli_trunc_sem);
+}
+
static int vvp_io_fsync_start(const struct lu_env *env,
const struct cl_io_slice *ios)
{
@@ -1202,46 +1234,23 @@ static int vvp_io_fsync_start(const struct lu_env *env,
return 0;
}
-static int vvp_io_read_page(const struct lu_env *env,
- const struct cl_io_slice *ios,
- const struct cl_page_slice *slice)
+static int vvp_io_read_ahead(const struct lu_env *env,
+ const struct cl_io_slice *ios,
+ pgoff_t start, struct cl_read_ahead *ra)
{
- struct cl_io *io = ios->cis_io;
- struct vvp_page *vpg = cl2vvp_page(slice);
- struct cl_page *page = slice->cpl_page;
- struct inode *inode = vvp_object_inode(slice->cpl_obj);
- struct ll_sb_info *sbi = ll_i2sbi(inode);
- struct ll_file_data *fd = cl2vvp_io(env, ios)->vui_fd;
- struct ll_readahead_state *ras = &fd->fd_ras;
- struct cl_2queue *queue = &io->ci_queue;
-
- if (sbi->ll_ra_info.ra_max_pages_per_file &&
- sbi->ll_ra_info.ra_max_pages)
- ras_update(sbi, inode, ras, vvp_index(vpg),
- vpg->vpg_defer_uptodate);
-
- if (vpg->vpg_defer_uptodate) {
- vpg->vpg_ra_used = 1;
- cl_page_export(env, page, 1);
- }
- /*
- * Add page into the queue even when it is marked uptodate above.
- * this will unlock it automatically as part of cl_page_list_disown().
- */
+ int result = 0;
- cl_page_list_add(&queue->c2_qin, page);
- if (sbi->ll_ra_info.ra_max_pages_per_file &&
- sbi->ll_ra_info.ra_max_pages)
- ll_readahead(env, io, &queue->c2_qin, ras,
- vpg->vpg_defer_uptodate);
+ if (ios->cis_io->ci_type == CIT_READ ||
+ ios->cis_io->ci_type == CIT_FAULT) {
+ struct vvp_io *vio = cl2vvp_io(env, ios);
- return 0;
-}
+ if (unlikely(vio->vui_fd->fd_flags & LL_FILE_GROUP_LOCKED)) {
+ ra->cra_end = CL_PAGE_EOF;
+ result = 1; /* no need to call down */
+ }
+ }
-static void vvp_io_end(const struct lu_env *env, const struct cl_io_slice *ios)
-{
- CLOBINVRNT(env, ios->cis_io->ci_obj,
- vvp_object_invariant(ios->cis_io->ci_obj));
+ return result;
}
static const struct cl_io_operations vvp_io_ops = {
@@ -1250,6 +1259,7 @@ static const struct cl_io_operations vvp_io_ops = {
.cio_fini = vvp_io_fini,
.cio_lock = vvp_io_read_lock,
.cio_start = vvp_io_read_start,
+ .cio_end = vvp_io_rw_end,
.cio_advance = vvp_io_advance,
},
[CIT_WRITE] = {
@@ -1258,6 +1268,7 @@ static const struct cl_io_operations vvp_io_ops = {
.cio_iter_fini = vvp_io_write_iter_fini,
.cio_lock = vvp_io_write_lock,
.cio_start = vvp_io_write_start,
+ .cio_end = vvp_io_rw_end,
.cio_advance = vvp_io_advance,
},
[CIT_SETATTR] = {
@@ -1272,7 +1283,7 @@ static const struct cl_io_operations vvp_io_ops = {
.cio_iter_init = vvp_io_fault_iter_init,
.cio_lock = vvp_io_fault_lock,
.cio_start = vvp_io_fault_start,
- .cio_end = vvp_io_end,
+ .cio_end = vvp_io_fault_end,
},
[CIT_FSYNC] = {
.cio_start = vvp_io_fsync_start,
@@ -1282,7 +1293,7 @@ static const struct cl_io_operations vvp_io_ops = {
.cio_fini = vvp_io_fini
}
},
- .cio_read_page = vvp_io_read_page,
+ .cio_read_ahead = vvp_io_read_ahead,
};
int vvp_io_init(const struct lu_env *env, struct cl_object *obj,
diff --git a/drivers/staging/lustre/lustre/llite/vvp_object.c b/drivers/staging/lustre/lustre/llite/vvp_object.c
index b57195d15674..8e18cf86cefc 100644
--- a/drivers/staging/lustre/lustre/llite/vvp_object.c
+++ b/drivers/staging/lustre/lustre/llite/vvp_object.c
@@ -65,8 +65,7 @@ static int vvp_object_print(const struct lu_env *env, void *cookie,
struct inode *inode = obj->vob_inode;
struct ll_inode_info *lli;
- (*p)(env, cookie, "(%s %d %d) inode: %p ",
- list_empty(&obj->vob_pending_list) ? "-" : "+",
+ (*p)(env, cookie, "(%d %d) inode: %p ",
atomic_read(&obj->vob_transient_pages),
atomic_read(&obj->vob_mmap_cnt), inode);
if (inode) {
@@ -133,7 +132,7 @@ static int vvp_conf_set(const struct lu_env *env, struct cl_object *obj,
CDEBUG(D_VFSTRACE, DFID ": losing layout lock\n",
PFID(&lli->lli_fid));
- ll_layout_version_set(lli, LL_LAYOUT_GEN_NONE);
+ ll_layout_version_set(lli, CL_LAYOUT_GEN_NONE);
/* Clean up page mmap for this inode.
* The reason for us to do this is that if the page has
@@ -146,27 +145,8 @@ static int vvp_conf_set(const struct lu_env *env, struct cl_object *obj,
*/
unmap_mapping_range(conf->coc_inode->i_mapping,
0, OBD_OBJECT_EOF, 0);
-
- return 0;
}
- if (conf->coc_opc != OBJECT_CONF_SET)
- return 0;
-
- if (conf->u.coc_md && conf->u.coc_md->lsm) {
- CDEBUG(D_VFSTRACE, DFID ": layout version change: %u -> %u\n",
- PFID(&lli->lli_fid), lli->lli_layout_gen,
- conf->u.coc_md->lsm->lsm_layout_gen);
-
- lli->lli_has_smd = lsm_has_objects(conf->u.coc_md->lsm);
- ll_layout_version_set(lli, conf->u.coc_md->lsm->lsm_layout_gen);
- } else {
- CDEBUG(D_VFSTRACE, DFID ": layout nuked: %u.\n",
- PFID(&lli->lli_fid), lli->lli_layout_gen);
-
- lli->lli_has_smd = false;
- ll_layout_version_set(lli, LL_LAYOUT_GEN_EMPTY);
- }
return 0;
}
@@ -204,6 +184,26 @@ static int vvp_object_glimpse(const struct lu_env *env,
return 0;
}
+static void vvp_req_attr_set(const struct lu_env *env, struct cl_object *obj,
+ struct cl_req_attr *attr)
+{
+ u64 valid_flags = OBD_MD_FLTYPE;
+ struct inode *inode;
+ struct obdo *oa;
+
+ oa = attr->cra_oa;
+ inode = vvp_object_inode(obj);
+
+ if (attr->cra_type == CRT_WRITE)
+ valid_flags |= OBD_MD_FLMTIME | OBD_MD_FLCTIME |
+ OBD_MD_FLUID | OBD_MD_FLGID;
+ obdo_from_inode(oa, inode, valid_flags & attr->cra_flags);
+ obdo_set_parent_fid(oa, &ll_i2info(inode)->lli_fid);
+ if (OBD_FAIL_CHECK(OBD_FAIL_LFSCK_INVALID_PFID))
+ oa->o_parent_oid++;
+ memcpy(attr->cra_jobid, ll_i2info(inode)->lli_jobid, LUSTRE_JOBID_SIZE);
+}
+
static const struct cl_object_operations vvp_ops = {
.coo_page_init = vvp_page_init,
.coo_lock_init = vvp_lock_init,
@@ -212,7 +212,8 @@ static const struct cl_object_operations vvp_ops = {
.coo_attr_update = vvp_attr_update,
.coo_conf_set = vvp_conf_set,
.coo_prune = vvp_prune,
- .coo_glimpse = vvp_object_glimpse
+ .coo_glimpse = vvp_object_glimpse,
+ .coo_req_attr_set = vvp_req_attr_set
};
static int vvp_object_init0(const struct lu_env *env,
@@ -240,7 +241,6 @@ static int vvp_object_init(const struct lu_env *env, struct lu_object *obj,
const struct cl_object_conf *cconf;
cconf = lu2cl_conf(conf);
- INIT_LIST_HEAD(&vob->vob_pending_list);
lu_object_add(obj, below);
result = vvp_object_init0(env, vob, cconf);
} else {
diff --git a/drivers/staging/lustre/lustre/llite/vvp_page.c b/drivers/staging/lustre/lustre/llite/vvp_page.c
index 046e84d7a158..23d66308ff20 100644
--- a/drivers/staging/lustre/lustre/llite/vvp_page.c
+++ b/drivers/staging/lustre/lustre/llite/vvp_page.c
@@ -162,13 +162,10 @@ static void vvp_page_delete(const struct lu_env *env,
LASSERT((struct cl_page *)vmpage->private == page);
LASSERT(inode == vvp_object_inode(obj));
- vvp_write_complete(cl2vvp(obj), cl2vvp_page(slice));
-
/* Drop the reference count held in vvp_page_init */
refc = atomic_dec_return(&page->cp_ref);
LASSERTF(refc >= 1, "page = %p, refc = %d\n", page, refc);
- ClearPageUptodate(vmpage);
ClearPagePrivate(vmpage);
vmpage->private = 0;
/*
@@ -221,8 +218,6 @@ static int vvp_page_prep_write(const struct lu_env *env,
if (!pg->cp_sync_io)
set_page_writeback(vmpage);
- vvp_write_pending(cl2vvp(slice->cpl_obj), cl2vvp_page(slice));
-
return 0;
}
@@ -287,19 +282,6 @@ static void vvp_page_completion_write(const struct lu_env *env,
CL_PAGE_HEADER(D_PAGE, env, pg, "completing WRITE with %d\n", ioret);
- /*
- * TODO: Actually it makes sense to add the page into oap pending
- * list again and so that we don't need to take the page out from
- * SoM write pending list, if we just meet a recoverable error,
- * -ENOMEM, etc.
- * To implement this, we just need to return a non zero value in
- * ->cpo_completion method. The underlying transfer should be notified
- * and then re-add the page into pending transfer queue. -jay
- */
-
- vpg->vpg_write_queued = 0;
- vvp_write_complete(cl2vvp(slice->cpl_obj), vpg);
-
if (pg->cp_sync_io) {
LASSERT(PageLocked(vmpage));
LASSERT(!PageWriteback(vmpage));
@@ -341,7 +323,6 @@ static int vvp_page_make_ready(const struct lu_env *env,
LASSERT(pg->cp_state == CPS_CACHED);
/* This actually clears the dirty bit in the radix tree. */
set_page_writeback(vmpage);
- vvp_write_pending(cl2vvp(slice->cpl_obj), cl2vvp_page(slice));
CL_PAGE_HEADER(D_PAGE, env, pg, "readied\n");
} else if (pg->cp_state == CPS_PAGEOUT) {
/* is it possible for osc_flush_async_page() to already
@@ -357,20 +338,6 @@ static int vvp_page_make_ready(const struct lu_env *env,
return result;
}
-static int vvp_page_is_under_lock(const struct lu_env *env,
- const struct cl_page_slice *slice,
- struct cl_io *io, pgoff_t *max_index)
-{
- if (io->ci_type == CIT_READ || io->ci_type == CIT_WRITE ||
- io->ci_type == CIT_FAULT) {
- struct vvp_io *vio = vvp_env_io(env);
-
- if (unlikely(vio->vui_fd->fd_flags & LL_FILE_GROUP_LOCKED))
- *max_index = CL_PAGE_EOF;
- }
- return 0;
-}
-
static int vvp_page_print(const struct lu_env *env,
const struct cl_page_slice *slice,
void *cookie, lu_printer_t printer)
@@ -378,9 +345,8 @@ static int vvp_page_print(const struct lu_env *env,
struct vvp_page *vpg = cl2vvp_page(slice);
struct page *vmpage = vpg->vpg_page;
- (*printer)(env, cookie, LUSTRE_VVP_NAME "-page@%p(%d:%d:%d) vm@%p ",
- vpg, vpg->vpg_defer_uptodate, vpg->vpg_ra_used,
- vpg->vpg_write_queued, vmpage);
+ (*printer)(env, cookie, LUSTRE_VVP_NAME "-page@%p(%d:%d) vm@%p ",
+ vpg, vpg->vpg_defer_uptodate, vpg->vpg_ra_used, vmpage);
if (vmpage) {
(*printer)(env, cookie, "%lx %d:%d %lx %lu %slru",
(long)vmpage->flags, page_count(vmpage),
@@ -416,7 +382,6 @@ static const struct cl_page_operations vvp_page_ops = {
.cpo_is_vmlocked = vvp_page_is_vmlocked,
.cpo_fini = vvp_page_fini,
.cpo_print = vvp_page_print,
- .cpo_is_under_lock = vvp_page_is_under_lock,
.io = {
[CRT_READ] = {
.cpo_prep = vvp_page_prep_read,
@@ -515,7 +480,6 @@ static const struct cl_page_operations vvp_transient_page_ops = {
.cpo_fini = vvp_transient_page_fini,
.cpo_is_vmlocked = vvp_transient_page_is_vmlocked,
.cpo_print = vvp_page_print,
- .cpo_is_under_lock = vvp_page_is_under_lock,
.io = {
[CRT_READ] = {
.cpo_prep = vvp_transient_page_prep,
@@ -539,7 +503,6 @@ int vvp_page_init(const struct lu_env *env, struct cl_object *obj,
vpg->vpg_page = vmpage;
get_page(vmpage);
- INIT_LIST_HEAD(&vpg->vpg_pending_linkage);
if (page->cp_type == CPT_CACHEABLE) {
/* in cache, decref in vvp_page_delete */
atomic_inc(&page->cp_ref);
diff --git a/drivers/staging/lustre/lustre/llite/vvp_req.c b/drivers/staging/lustre/lustre/llite/vvp_req.c
deleted file mode 100644
index e3f4c790d646..000000000000
--- a/drivers/staging/lustre/lustre/llite/vvp_req.c
+++ /dev/null
@@ -1,122 +0,0 @@
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2014, Intel Corporation.
- */
-
-#define DEBUG_SUBSYSTEM S_LLITE
-
-#include "../include/lustre/lustre_idl.h"
-#include "../include/cl_object.h"
-#include "../include/obd.h"
-#include "../include/obd_support.h"
-#include "llite_internal.h"
-#include "vvp_internal.h"
-
-static inline struct vvp_req *cl2vvp_req(const struct cl_req_slice *slice)
-{
- return container_of0(slice, struct vvp_req, vrq_cl);
-}
-
-/**
- * Implementation of struct cl_req_operations::cro_attr_set() for VVP
- * layer. VVP is responsible for
- *
- * - o_[mac]time
- *
- * - o_mode
- *
- * - o_parent_seq
- *
- * - o_[ug]id
- *
- * - o_parent_oid
- *
- * - o_parent_ver
- *
- * - o_ioepoch,
- *
- */
-static void vvp_req_attr_set(const struct lu_env *env,
- const struct cl_req_slice *slice,
- const struct cl_object *obj,
- struct cl_req_attr *attr, u64 flags)
-{
- struct inode *inode;
- struct obdo *oa;
- u32 valid_flags;
-
- oa = attr->cra_oa;
- inode = vvp_object_inode(obj);
- valid_flags = OBD_MD_FLTYPE;
-
- if (slice->crs_req->crq_type == CRT_WRITE) {
- if (flags & OBD_MD_FLEPOCH) {
- oa->o_valid |= OBD_MD_FLEPOCH;
- oa->o_ioepoch = ll_i2info(inode)->lli_ioepoch;
- valid_flags |= OBD_MD_FLMTIME | OBD_MD_FLCTIME |
- OBD_MD_FLUID | OBD_MD_FLGID;
- }
- }
- obdo_from_inode(oa, inode, valid_flags & flags);
- obdo_set_parent_fid(oa, &ll_i2info(inode)->lli_fid);
- if (OBD_FAIL_CHECK(OBD_FAIL_LFSCK_INVALID_PFID))
- oa->o_parent_oid++;
- memcpy(attr->cra_jobid, ll_i2info(inode)->lli_jobid,
- LUSTRE_JOBID_SIZE);
-}
-
-static void vvp_req_completion(const struct lu_env *env,
- const struct cl_req_slice *slice, int ioret)
-{
- struct vvp_req *vrq;
-
- if (ioret > 0)
- cl_stats_tally(slice->crs_dev, slice->crs_req->crq_type, ioret);
-
- vrq = cl2vvp_req(slice);
- kmem_cache_free(vvp_req_kmem, vrq);
-}
-
-static const struct cl_req_operations vvp_req_ops = {
- .cro_attr_set = vvp_req_attr_set,
- .cro_completion = vvp_req_completion
-};
-
-int vvp_req_init(const struct lu_env *env, struct cl_device *dev,
- struct cl_req *req)
-{
- struct vvp_req *vrq;
- int result;
-
- vrq = kmem_cache_zalloc(vvp_req_kmem, GFP_NOFS);
- if (vrq) {
- cl_req_slice_add(req, &vrq->vrq_cl, dev, &vvp_req_ops);
- result = 0;
- } else {
- result = -ENOMEM;
- }
- return result;
-}
diff --git a/drivers/staging/lustre/lustre/llite/xattr.c b/drivers/staging/lustre/lustre/llite/xattr.c
index e070adb7a3cc..7a848ebc57c1 100644
--- a/drivers/staging/lustre/lustre/llite/xattr.c
+++ b/drivers/staging/lustre/lustre/llite/xattr.c
@@ -44,48 +44,39 @@
#include "llite_internal.h"
-static
-int get_xattr_type(const char *name)
+const struct xattr_handler *get_xattr_type(const char *name)
{
- if (!strcmp(name, XATTR_NAME_POSIX_ACL_ACCESS))
- return XATTR_ACL_ACCESS_T;
+ int i = 0;
- if (!strcmp(name, XATTR_NAME_POSIX_ACL_DEFAULT))
- return XATTR_ACL_DEFAULT_T;
+ while (ll_xattr_handlers[i]) {
+ size_t len = strlen(ll_xattr_handlers[i]->prefix);
- if (!strncmp(name, XATTR_USER_PREFIX,
- sizeof(XATTR_USER_PREFIX) - 1))
- return XATTR_USER_T;
-
- if (!strncmp(name, XATTR_TRUSTED_PREFIX,
- sizeof(XATTR_TRUSTED_PREFIX) - 1))
- return XATTR_TRUSTED_T;
-
- if (!strncmp(name, XATTR_SECURITY_PREFIX,
- sizeof(XATTR_SECURITY_PREFIX) - 1))
- return XATTR_SECURITY_T;
-
- if (!strncmp(name, XATTR_LUSTRE_PREFIX,
- sizeof(XATTR_LUSTRE_PREFIX) - 1))
- return XATTR_LUSTRE_T;
-
- return XATTR_OTHER_T;
+ if (!strncmp(ll_xattr_handlers[i]->prefix, name, len))
+ return ll_xattr_handlers[i];
+ i++;
+ }
+ return NULL;
}
-static
-int xattr_type_filter(struct ll_sb_info *sbi, int xattr_type)
+static int xattr_type_filter(struct ll_sb_info *sbi,
+ const struct xattr_handler *handler)
{
- if ((xattr_type == XATTR_ACL_ACCESS_T ||
- xattr_type == XATTR_ACL_DEFAULT_T) &&
+ /* No handler means XATTR_OTHER_T */
+ if (!handler)
+ return -EOPNOTSUPP;
+
+ if ((handler->flags == XATTR_ACL_ACCESS_T ||
+ handler->flags == XATTR_ACL_DEFAULT_T) &&
!(sbi->ll_flags & LL_SBI_ACL))
return -EOPNOTSUPP;
- if (xattr_type == XATTR_USER_T && !(sbi->ll_flags & LL_SBI_USER_XATTR))
+ if (handler->flags == XATTR_USER_T &&
+ !(sbi->ll_flags & LL_SBI_USER_XATTR))
return -EOPNOTSUPP;
- if (xattr_type == XATTR_TRUSTED_T && !capable(CFS_CAP_SYS_ADMIN))
+
+ if (handler->flags == XATTR_TRUSTED_T &&
+ !capable(CFS_CAP_SYS_ADMIN))
return -EPERM;
- if (xattr_type == XATTR_OTHER_T)
- return -EOPNOTSUPP;
return 0;
}
@@ -111,7 +102,7 @@ ll_xattr_set_common(const struct xattr_handler *handler,
valid = OBD_MD_FLXATTR;
}
- rc = xattr_type_filter(sbi, handler->flags);
+ rc = xattr_type_filter(sbi, handler);
if (rc)
return rc;
@@ -121,8 +112,9 @@ ll_xattr_set_common(const struct xattr_handler *handler,
return -EPERM;
/* b10667: ignore lustre special xattr for now */
- if ((handler->flags == XATTR_TRUSTED_T && !strcmp(name, "lov")) ||
- (handler->flags == XATTR_LUSTRE_T && !strcmp(name, "lov")))
+ if (!strcmp(name, "hsm") ||
+ ((handler->flags == XATTR_TRUSTED_T && !strcmp(name, "lov")) ||
+ (handler->flags == XATTR_LUSTRE_T && !strcmp(name, "lov"))))
return 0;
/* b15587: ignore security.capability xattr for now */
@@ -135,6 +127,11 @@ ll_xattr_set_common(const struct xattr_handler *handler,
strcmp(name, "selinux") == 0)
return -EOPNOTSUPP;
+ /*FIXME: enable IMA when the conditions are ready */
+ if (handler->flags == XATTR_SECURITY_T &&
+ (!strcmp(name, "ima") || !strcmp(name, "evm")))
+ return -EOPNOTSUPP;
+
sprintf(fullname, "%s%s\n", handler->prefix, name);
rc = md_setxattr(sbi->ll_md_exp, ll_inode2fid(inode),
valid, fullname, pv, size, 0, flags,
@@ -151,6 +148,37 @@ ll_xattr_set_common(const struct xattr_handler *handler,
return 0;
}
+static int get_hsm_state(struct inode *inode, u32 *hus_states)
+{
+ struct md_op_data *op_data;
+ struct hsm_user_state *hus;
+ int rc;
+
+ hus = kzalloc(sizeof(*hus), GFP_NOFS);
+ if (!hus)
+ return -ENOMEM;
+
+ op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
+ LUSTRE_OPC_ANY, hus);
+ if (!IS_ERR(op_data)) {
+ rc = obd_iocontrol(LL_IOC_HSM_STATE_GET, ll_i2mdexp(inode),
+ sizeof(*op_data), op_data, NULL);
+ if (!rc)
+ *hus_states = hus->hus_states;
+ else
+ CDEBUG(D_VFSTRACE, "obd_iocontrol failed. rc = %d\n",
+ rc);
+
+ ll_finish_md_op_data(op_data);
+ } else {
+ rc = PTR_ERR(op_data);
+ CDEBUG(D_VFSTRACE, "Could not prepare the opdata. rc = %d\n",
+ rc);
+ }
+ kfree(hus);
+ return rc;
+}
+
static int ll_xattr_set(const struct xattr_handler *handler,
struct dentry *dentry, struct inode *inode,
const char *name, const void *value, size_t size,
@@ -187,6 +215,31 @@ static int ll_xattr_set(const struct xattr_handler *handler,
if (lump && lump->lmm_stripe_offset == 0)
lump->lmm_stripe_offset = -1;
+ /* Avoid anyone directly setting the RELEASED flag. */
+ if (lump && (lump->lmm_pattern & LOV_PATTERN_F_RELEASED)) {
+ /* Only if we have a released flag check if the file
+ * was indeed archived.
+ */
+ u32 state = HS_NONE;
+
+ rc = get_hsm_state(inode, &state);
+ if (rc)
+ return rc;
+
+ if (!(state & HS_ARCHIVED)) {
+ CDEBUG(D_VFSTRACE,
+ "hus_states state = %x, pattern = %x\n",
+ state, lump->lmm_pattern);
+ /*
+ * Here the state is: real file is not
+ * archived but user is requesting to set
+ * the RELEASED flag so we mask off the
+ * released flag from the request
+ */
+ lump->lmm_pattern ^= LOV_PATTERN_F_RELEASED;
+ }
+ }
+
if (lump && S_ISREG(inode->i_mode)) {
__u64 it_flags = FMODE_WRITE;
int lum_size;
@@ -225,7 +278,8 @@ ll_xattr_list(struct inode *inode, const char *name, int type, void *buffer,
void *xdata;
int rc;
- if (sbi->ll_xattr_cache_enabled && type != XATTR_ACL_ACCESS_T) {
+ if (sbi->ll_xattr_cache_enabled && type != XATTR_ACL_ACCESS_T &&
+ (type != XATTR_SECURITY_T || strcmp(name, "security.selinux"))) {
rc = ll_xattr_cache_get(inode, name, buffer, size, valid);
if (rc == -EAGAIN)
goto getxattr_nocache;
@@ -313,7 +367,7 @@ static int ll_xattr_get_common(const struct xattr_handler *handler,
ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_GETXATTR, 1);
- rc = xattr_type_filter(sbi, handler->flags);
+ rc = xattr_type_filter(sbi, handler);
if (rc)
return rc;
@@ -353,80 +407,99 @@ static int ll_xattr_get_common(const struct xattr_handler *handler,
OBD_MD_FLXATTR);
}
-static int ll_xattr_get(const struct xattr_handler *handler,
- struct dentry *dentry, struct inode *inode,
- const char *name, void *buffer, size_t size)
+static ssize_t ll_getxattr_lov(struct inode *inode, void *buf, size_t buf_size)
{
- LASSERT(inode);
- LASSERT(name);
+ ssize_t rc;
- CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p), xattr %s\n",
- PFID(ll_inode2fid(inode)), inode, name);
+ if (S_ISREG(inode->i_mode)) {
+ struct cl_object *obj = ll_i2info(inode)->lli_clob;
+ struct cl_layout cl = {
+ .cl_buf.lb_buf = buf,
+ .cl_buf.lb_len = buf_size,
+ };
+ struct lu_env *env;
+ int refcheck;
+
+ if (!obj)
+ return -ENODATA;
- if (!strcmp(name, "lov")) {
- struct lov_stripe_md *lsm;
- struct lov_user_md *lump;
- struct lov_mds_md *lmm = NULL;
- struct ptlrpc_request *request = NULL;
- int rc = 0, lmmsize = 0;
+ env = cl_env_get(&refcheck);
+ if (IS_ERR(env))
+ return PTR_ERR(env);
- ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_GETXATTR, 1);
-
- if (!S_ISREG(inode->i_mode) && !S_ISDIR(inode->i_mode))
- return -ENODATA;
+ rc = cl_object_layout_get(env, obj, &cl);
+ if (rc < 0)
+ goto out_env;
- lsm = ccc_inode_lsm_get(inode);
- if (!lsm) {
- if (S_ISDIR(inode->i_mode)) {
- rc = ll_dir_getstripe(inode, (void **)&lmm,
- &lmmsize, &request, 0);
- } else {
- rc = -ENODATA;
- }
- } else {
- /* LSM is present already after lookup/getattr call.
- * we need to grab layout lock once it is implemented
- */
- rc = obd_packmd(ll_i2dtexp(inode), &lmm, lsm);
- lmmsize = rc;
+ if (!cl.cl_size) {
+ rc = -ENODATA;
+ goto out_env;
}
- ccc_inode_lsm_put(inode, lsm);
+ rc = cl.cl_size;
+
+ if (!buf_size)
+ goto out_env;
+
+ LASSERT(buf && rc <= buf_size);
+
+ /*
+ * Do not return layout gen for getxattr() since
+ * otherwise it would confuse tar --xattr by
+ * recognizing layout gen as stripe offset when the
+ * file is restored. See LU-2809.
+ */
+ ((struct lov_mds_md *)buf)->lmm_layout_gen = 0;
+out_env:
+ cl_env_put(env, &refcheck);
+
+ return rc;
+ } else if (S_ISDIR(inode->i_mode)) {
+ struct ptlrpc_request *req = NULL;
+ struct lov_mds_md *lmm = NULL;
+ int lmm_size = 0;
+
+ rc = ll_dir_getstripe(inode, (void **)&lmm, &lmm_size,
+ &req, 0);
if (rc < 0)
- goto out;
+ goto out_req;
- if (size == 0) {
- /* used to call ll_get_max_mdsize() forward to get
- * the maximum buffer size, while some apps (such as
- * rsync 3.0.x) care much about the exact xattr value
- * size
- */
- rc = lmmsize;
- goto out;
+ if (!buf_size) {
+ rc = lmm_size;
+ goto out_req;
}
- if (size < lmmsize) {
- CERROR("server bug: replied size %d > %d for %pd (%s)\n",
- lmmsize, (int)size, dentry, name);
+ if (buf_size < lmm_size) {
rc = -ERANGE;
- goto out;
+ goto out_req;
}
- lump = buffer;
- memcpy(lump, lmm, lmmsize);
- /* do not return layout gen for getxattr otherwise it would
- * confuse tar --xattr by recognizing layout gen as stripe
- * offset when the file is restored. See LU-2809.
- */
- lump->lmm_layout_gen = 0;
+ memcpy(buf, lmm, lmm_size);
+ rc = lmm_size;
+out_req:
+ if (req)
+ ptlrpc_req_finished(req);
- rc = lmmsize;
-out:
- if (request)
- ptlrpc_req_finished(request);
- else if (lmm)
- obd_free_diskmd(ll_i2dtexp(inode), &lmm);
return rc;
+ } else {
+ return -ENODATA;
+ }
+}
+
+static int ll_xattr_get(const struct xattr_handler *handler,
+ struct dentry *dentry, struct inode *inode,
+ const char *name, void *buffer, size_t size)
+{
+ LASSERT(inode);
+ LASSERT(name);
+
+ CDEBUG(D_VFSTRACE, "VFS Op:inode=" DFID "(%p), xattr %s\n",
+ PFID(ll_inode2fid(inode)), inode, name);
+
+ if (!strcmp(name, "lov")) {
+ ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_GETXATTR, 1);
+
+ return ll_getxattr_lov(inode, buffer, size);
}
return ll_xattr_get_common(handler, dentry, inode, name, buffer, size);
@@ -435,10 +508,10 @@ out:
ssize_t ll_listxattr(struct dentry *dentry, char *buffer, size_t size)
{
struct inode *inode = d_inode(dentry);
- int rc = 0, rc2 = 0;
- struct lov_mds_md *lmm = NULL;
- struct ptlrpc_request *request = NULL;
- int lmmsize;
+ struct ll_sb_info *sbi = ll_i2sbi(inode);
+ char *xattr_name;
+ ssize_t rc, rc2;
+ size_t len, rem;
LASSERT(inode);
@@ -450,65 +523,48 @@ ssize_t ll_listxattr(struct dentry *dentry, char *buffer, size_t size)
rc = ll_xattr_list(inode, NULL, XATTR_OTHER_T, buffer, size,
OBD_MD_FLXATTRLS);
if (rc < 0)
- goto out;
-
- if (buffer) {
- struct ll_sb_info *sbi = ll_i2sbi(inode);
- char *xattr_name = buffer;
- int xlen, rem = rc;
-
- while (rem > 0) {
- xlen = strnlen(xattr_name, rem - 1) + 1;
- rem -= xlen;
- if (xattr_type_filter(sbi,
- get_xattr_type(xattr_name)) == 0) {
- /* skip OK xattr type
- * leave it in buffer
- */
- xattr_name += xlen;
- continue;
- }
- /* move up remaining xattrs in buffer
- * removing the xattr that is not OK
- */
- memmove(xattr_name, xattr_name + xlen, rem);
- rc -= xlen;
+ return rc;
+ /*
+ * If we're being called to get the size of the xattr list
+ * (buf_size == 0) then just assume that a lustre.lov xattr
+ * exists.
+ */
+ if (!size)
+ return rc + sizeof(XATTR_LUSTRE_LOV);
+
+ xattr_name = buffer;
+ rem = rc;
+
+ while (rem > 0) {
+ len = strnlen(xattr_name, rem - 1) + 1;
+ rem -= len;
+ if (!xattr_type_filter(sbi, get_xattr_type(xattr_name))) {
+ /* Skip OK xattr type leave it in buffer */
+ xattr_name += len;
+ continue;
}
- }
- if (S_ISREG(inode->i_mode)) {
- if (!ll_i2info(inode)->lli_has_smd)
- rc2 = -1;
- } else if (S_ISDIR(inode->i_mode)) {
- rc2 = ll_dir_getstripe(inode, (void **)&lmm, &lmmsize,
- &request, 0);
+
+ /*
+ * Move up remaining xattrs in buffer
+ * removing the xattr that is not OK
+ */
+ memmove(xattr_name, xattr_name + len, rem);
+ rc -= len;
}
- if (rc2 < 0) {
- rc2 = 0;
- goto out;
- } else if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode)) {
- const int prefix_len = sizeof(XATTR_LUSTRE_PREFIX) - 1;
- const size_t name_len = sizeof("lov") - 1;
- const size_t total_len = prefix_len + name_len + 1;
-
- if (((rc + total_len) > size) && buffer) {
- ptlrpc_req_finished(request);
- return -ERANGE;
- }
+ rc2 = ll_getxattr_lov(inode, NULL, 0);
+ if (rc2 == -ENODATA)
+ return rc;
- if (buffer) {
- buffer += rc;
- memcpy(buffer, XATTR_LUSTRE_PREFIX, prefix_len);
- memcpy(buffer + prefix_len, "lov", name_len);
- buffer[prefix_len + name_len] = '\0';
- }
- rc2 = total_len;
- }
-out:
- ptlrpc_req_finished(request);
- rc = rc + rc2;
+ if (rc2 < 0)
+ return rc2;
- return rc;
+ if (size < rc + sizeof(XATTR_LUSTRE_LOV))
+ return -ERANGE;
+
+ memcpy(buffer + rc, XATTR_LUSTRE_LOV, sizeof(XATTR_LUSTRE_LOV));
+
+ return rc + sizeof(XATTR_LUSTRE_LOV);
}
static const struct xattr_handler ll_user_xattr_handler = {
diff --git a/drivers/staging/lustre/lustre/llite/xattr_cache.c b/drivers/staging/lustre/lustre/llite/xattr_cache.c
index 50a19a40bd4e..38f75f6aa887 100644
--- a/drivers/staging/lustre/lustre/llite/xattr_cache.c
+++ b/drivers/staging/lustre/lustre/llite/xattr_cache.c
@@ -26,8 +26,8 @@ struct ll_xattr_entry {
*/
char *xe_name; /* xattr name, \0-terminated */
char *xe_value; /* xattr value */
- unsigned xe_namelen; /* strlen(xe_name) + 1 */
- unsigned xe_vallen; /* xattr value length */
+ unsigned int xe_namelen; /* strlen(xe_name) + 1 */
+ unsigned int xe_vallen; /* xattr value length */
};
static struct kmem_cache *xattr_kmem;
@@ -60,7 +60,7 @@ void ll_xattr_fini(void)
static void ll_xattr_cache_init(struct ll_inode_info *lli)
{
INIT_LIST_HEAD(&lli->lli_xattrs);
- lli->lli_flags |= LLIF_XATTR_CACHE;
+ set_bit(LLIF_XATTR_CACHE, &lli->lli_flags);
}
/**
@@ -104,7 +104,7 @@ static int ll_xattr_cache_find(struct list_head *cache,
static int ll_xattr_cache_add(struct list_head *cache,
const char *xattr_name,
const char *xattr_val,
- unsigned xattr_val_len)
+ unsigned int xattr_val_len)
{
struct ll_xattr_entry *xattr;
@@ -216,7 +216,7 @@ static int ll_xattr_cache_list(struct list_head *cache,
*/
static int ll_xattr_cache_valid(struct ll_inode_info *lli)
{
- return !!(lli->lli_flags & LLIF_XATTR_CACHE);
+ return test_bit(LLIF_XATTR_CACHE, &lli->lli_flags);
}
/**
@@ -233,7 +233,8 @@ static int ll_xattr_cache_destroy_locked(struct ll_inode_info *lli)
while (ll_xattr_cache_del(&lli->lli_xattrs, NULL) == 0)
; /* empty loop */
- lli->lli_flags &= ~LLIF_XATTR_CACHE;
+
+ clear_bit(LLIF_XATTR_CACHE, &lli->lli_flags);
return 0;
}
@@ -415,6 +416,10 @@ static int ll_xattr_cache_refill(struct inode *inode, struct lookup_intent *oit)
CDEBUG(D_CACHE, "not caching %s\n",
XATTR_NAME_ACL_ACCESS);
rc = 0;
+ } else if (!strcmp(xdata, "security.selinux")) {
+ /* Filter out security.selinux, it is cached in slab */
+ CDEBUG(D_CACHE, "not caching security.selinux\n");
+ rc = 0;
} else {
rc = ll_xattr_cache_add(&lli->lli_xattrs, xdata, xval,
*xsizes);
diff --git a/drivers/staging/lustre/lustre/llite/xattr_security.c b/drivers/staging/lustre/lustre/llite/xattr_security.c
new file mode 100644
index 000000000000..d61d8018001a
--- /dev/null
+++ b/drivers/staging/lustre/lustre/llite/xattr_security.c
@@ -0,0 +1,88 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see http://www.gnu.org/licenses
+ *
+ * GPL HEADER END
+ */
+
+/*
+ * Copyright (c) 2014 Bull SAS
+ * Author: Sebastien Buisson sebastien.buisson@bull.net
+ */
+
+/*
+ * lustre/llite/xattr_security.c
+ * Handler for storing security labels as extended attributes.
+ */
+#include <linux/security.h>
+#include <linux/xattr.h>
+#include "llite_internal.h"
+
+/**
+ * A helper function for ll_security_inode_init_security()
+ * that takes care of setting xattrs
+ *
+ * Get security context of @inode from @xattr_array,
+ * and put it in 'security.xxx' xattr of dentry
+ * stored in @fs_info.
+ *
+ * \retval 0 success
+ * \retval -ENOMEM if no memory could be allocated for xattr name
+ * \retval < 0 failure to set xattr
+ */
+static int
+ll_initxattrs(struct inode *inode, const struct xattr *xattr_array,
+ void *fs_info)
+{
+ const struct xattr_handler *handler;
+ struct dentry *dentry = fs_info;
+ const struct xattr *xattr;
+ int err = 0;
+
+ handler = get_xattr_type(XATTR_SECURITY_PREFIX);
+ if (!handler)
+ return -ENXIO;
+
+ for (xattr = xattr_array; xattr->name; xattr++) {
+ err = handler->set(handler, dentry, inode, xattr->name,
+ xattr->value, xattr->value_len,
+ XATTR_CREATE);
+ if (err < 0)
+ break;
+ }
+ return err;
+}
+
+/**
+ * Initializes security context
+ *
+ * Get security context of @inode in @dir,
+ * and put it in 'security.xxx' xattr of @dentry.
+ *
+ * \retval 0 success, or SELinux is disabled
+ * \retval -ENOMEM if no memory could be allocated for xattr name
+ * \retval < 0 failure to get security context or set xattr
+ */
+int
+ll_init_security(struct dentry *dentry, struct inode *inode, struct inode *dir)
+{
+ if (!selinux_is_enabled())
+ return 0;
+
+ return security_inode_init_security(inode, dir, NULL,
+ &ll_initxattrs, dentry);
+}
diff --git a/drivers/staging/lustre/lustre/lmv/lmv_intent.c b/drivers/staging/lustre/lustre/lmv/lmv_intent.c
index 9f4e826bb0af..b1071cf5a70c 100644
--- a/drivers/staging/lustre/lustre/lmv/lmv_intent.c
+++ b/drivers/staging/lustre/lustre/lmv/lmv_intent.c
@@ -223,7 +223,14 @@ int lmv_revalidate_slaves(struct obd_export *exp,
LASSERT(body);
if (unlikely(body->mbo_nlink < 2)) {
- CERROR("%s: nlink %d < 2 corrupt stripe %d "DFID":" DFID"\n",
+ /*
+ * If this is bad stripe, most likely due
+ * to the race between close(unlink) and
+ * getattr, let's return -EONENT, so llite
+ * will revalidate the dentry see
+ * ll_inode_revalidate_fini()
+ */
+ CDEBUG(D_INODE, "%s: nlink %d < 2 corrupt stripe %d "DFID":" DFID"\n",
obd->obd_name, body->mbo_nlink, i,
PFID(&lsm->lsm_md_oinfo[i].lmo_fid),
PFID(&lsm->lsm_md_oinfo[0].lmo_fid));
@@ -233,7 +240,7 @@ int lmv_revalidate_slaves(struct obd_export *exp,
it.it_lock_mode = 0;
}
- rc = -EIO;
+ rc = -ENOENT;
goto cleanup;
}
diff --git a/drivers/staging/lustre/lustre/lmv/lmv_internal.h b/drivers/staging/lustre/lustre/lmv/lmv_internal.h
index 52b03745ac19..12731a17e263 100644
--- a/drivers/staging/lustre/lustre/lmv/lmv_internal.h
+++ b/drivers/staging/lustre/lustre/lmv/lmv_internal.h
@@ -54,9 +54,6 @@ int __lmv_fid_alloc(struct lmv_obd *lmv, struct lu_fid *fid, u32 mds);
int lmv_fid_alloc(const struct lu_env *env, struct obd_export *exp,
struct lu_fid *fid, struct md_op_data *op_data);
-int lmv_unpack_md(struct obd_export *exp, struct lmv_stripe_md **lsmp,
- const union lmv_mds_md *lmm, int stripe_count);
-
int lmv_revalidate_slaves(struct obd_export *exp,
const struct lmv_stripe_md *lsm,
ldlm_blocking_callback cb_blocking,
diff --git a/drivers/staging/lustre/lustre/lmv/lmv_obd.c b/drivers/staging/lustre/lustre/lmv/lmv_obd.c
index 7dbb2b946acf..f124f6c05ea4 100644
--- a/drivers/staging/lustre/lustre/lmv/lmv_obd.c
+++ b/drivers/staging/lustre/lustre/lmv/lmv_obd.c
@@ -62,6 +62,7 @@ static void lmv_activate_target(struct lmv_obd *lmv,
tgt->ltd_active = activate;
lmv->desc.ld_active_tgt_count += (activate ? 1 : -1);
+ tgt->ltd_exp->exp_obd->obd_inactive = !activate;
}
/**
@@ -245,8 +246,7 @@ static int lmv_connect(const struct lu_env *env,
return rc;
}
-static int lmv_init_ea_size(struct obd_export *exp, u32 easize, u32 def_easize,
- u32 cookiesize, u32 def_cookiesize)
+static int lmv_init_ea_size(struct obd_export *exp, u32 easize, u32 def_easize)
{
struct obd_device *obd = exp->exp_obd;
struct lmv_obd *lmv = &obd->u.lmv;
@@ -262,14 +262,7 @@ static int lmv_init_ea_size(struct obd_export *exp, u32 easize, u32 def_easize,
lmv->max_def_easize = def_easize;
change = 1;
}
- if (lmv->max_cookiesize < cookiesize) {
- lmv->max_cookiesize = cookiesize;
- change = 1;
- }
- if (lmv->max_def_cookiesize < def_cookiesize) {
- lmv->max_def_cookiesize = def_cookiesize;
- change = 1;
- }
+
if (change == 0)
return 0;
@@ -284,8 +277,7 @@ static int lmv_init_ea_size(struct obd_export *exp, u32 easize, u32 def_easize,
continue;
}
- rc = md_init_ea_size(tgt->ltd_exp, easize, def_easize,
- cookiesize, def_cookiesize);
+ rc = md_init_ea_size(tgt->ltd_exp, easize, def_easize);
if (rc) {
CERROR("%s: obd_init_ea_size() failed on MDT target %d: rc = %d\n",
obd->obd_name, i, rc);
@@ -368,8 +360,7 @@ static int lmv_connect_mdc(struct obd_device *obd, struct lmv_tgt_desc *tgt)
tgt->ltd_exp = mdc_exp;
lmv->desc.ld_active_tgt_count++;
- md_init_ea_size(tgt->ltd_exp, lmv->max_easize, lmv->max_def_easize,
- lmv->max_cookiesize, lmv->max_def_cookiesize);
+ md_init_ea_size(tgt->ltd_exp, lmv->max_easize, lmv->max_def_easize);
CDEBUG(D_CONFIG, "Connected to %s(%s) successfully (%d)\n",
mdc_obd->obd_name, mdc_obd->obd_uuid.uuid,
@@ -396,27 +387,23 @@ static int lmv_add_target(struct obd_device *obd, struct obd_uuid *uuidp,
__u32 index, int gen)
{
struct lmv_obd *lmv = &obd->u.lmv;
+ struct obd_device *mdc_obd;
struct lmv_tgt_desc *tgt;
int orig_tgt_count = 0;
int rc = 0;
CDEBUG(D_CONFIG, "Target uuid: %s. index %d\n", uuidp->uuid, index);
- mutex_lock(&lmv->lmv_init_mutex);
-
- if (lmv->desc.ld_tgt_count == 0) {
- struct obd_device *mdc_obd;
-
- mdc_obd = class_find_client_obd(uuidp, LUSTRE_MDC_NAME,
- &obd->obd_uuid);
- if (!mdc_obd) {
- mutex_unlock(&lmv->lmv_init_mutex);
- CERROR("%s: Target %s not attached: rc = %d\n",
- obd->obd_name, uuidp->uuid, -EINVAL);
- return -EINVAL;
- }
+ mdc_obd = class_find_client_obd(uuidp, LUSTRE_MDC_NAME,
+ &obd->obd_uuid);
+ if (!mdc_obd) {
+ CERROR("%s: Target %s not attached: rc = %d\n",
+ obd->obd_name, uuidp->uuid, -EINVAL);
+ return -EINVAL;
}
+ mutex_lock(&lmv->lmv_init_mutex);
+
if ((index < lmv->tgts_size) && lmv->tgts[index]) {
tgt = lmv->tgts[index];
CERROR("%s: UUID %s already assigned at LOV target index %d: rc = %d\n",
@@ -472,22 +459,27 @@ static int lmv_add_target(struct obd_device *obd, struct obd_uuid *uuidp,
lmv->desc.ld_tgt_count = index + 1;
}
- if (lmv->connected) {
- rc = lmv_connect_mdc(obd, tgt);
- if (rc) {
- spin_lock(&lmv->lmv_lock);
- if (lmv->desc.ld_tgt_count == index + 1)
- lmv->desc.ld_tgt_count = orig_tgt_count;
- memset(tgt, 0, sizeof(*tgt));
- spin_unlock(&lmv->lmv_lock);
- } else {
- int easize = sizeof(struct lmv_stripe_md) +
- lmv->desc.ld_tgt_count * sizeof(struct lu_fid);
- lmv_init_ea_size(obd->obd_self_export, easize, 0, 0, 0);
- }
+ if (!lmv->connected) {
+ /* lmv_check_connect() will connect this target. */
+ mutex_unlock(&lmv->lmv_init_mutex);
+ return rc;
}
+ /* Otherwise let's connect it ourselves */
mutex_unlock(&lmv->lmv_init_mutex);
+ rc = lmv_connect_mdc(obd, tgt);
+ if (rc) {
+ spin_lock(&lmv->lmv_lock);
+ if (lmv->desc.ld_tgt_count == index + 1)
+ lmv->desc.ld_tgt_count = orig_tgt_count;
+ memset(tgt, 0, sizeof(*tgt));
+ spin_unlock(&lmv->lmv_lock);
+ } else {
+ int easize = sizeof(struct lmv_stripe_md) +
+ lmv->desc.ld_tgt_count * sizeof(struct lu_fid);
+ lmv_init_ea_size(obd->obd_self_export, easize, 0);
+ }
+
return rc;
}
@@ -538,7 +530,7 @@ int lmv_check_connect(struct obd_device *obd)
class_export_put(lmv->exp);
lmv->connected = 1;
easize = lmv_mds_md_size(lmv->desc.ld_tgt_count, LMV_MAGIC);
- lmv_init_ea_size(obd->obd_self_export, easize, 0, 0, 0);
+ lmv_init_ea_size(obd->obd_self_export, easize, 0);
mutex_unlock(&lmv->lmv_init_mutex);
return 0;
@@ -1128,9 +1120,7 @@ static int lmv_iocontrol(unsigned int cmd, struct obd_export *exp,
mdc_obd = class_exp2obd(tgt->ltd_exp);
mdc_obd->obd_force = obddev->obd_force;
err = obd_iocontrol(cmd, tgt->ltd_exp, len, karg, uarg);
- if (err == -ENODATA && cmd == OBD_IOC_POLL_QUOTACHECK) {
- return err;
- } else if (err) {
+ if (err) {
if (tgt->ltd_active) {
CERROR("error: iocontrol MDC %s on MDTidx %d cmd %x: err = %d\n",
tgt->ltd_uuid.uuid, i, cmd, err);
@@ -1284,7 +1274,6 @@ static int lmv_setup(struct obd_device *obd, struct lustre_cfg *lcfg)
obd_str2uuid(&lmv->desc.ld_uuid, desc->ld_uuid.uuid);
lmv->desc.ld_tgt_count = 0;
lmv->desc.ld_active_tgt_count = 0;
- lmv->max_cookiesize = 0;
lmv->max_def_easize = 0;
lmv->max_easize = 0;
lmv->lmv_placement = PLACEMENT_CHAR_POLICY;
@@ -1630,27 +1619,28 @@ lmv_locate_mds(struct lmv_obd *lmv, struct md_op_data *op_data,
* ct_restore().
*/
if (op_data->op_bias & MDS_CREATE_VOLATILE &&
- (int)op_data->op_mds != -1 && lsm) {
+ (int)op_data->op_mds != -1) {
int i;
tgt = lmv_get_target(lmv, op_data->op_mds, NULL);
if (IS_ERR(tgt))
return tgt;
- /* refill the right parent fid */
- for (i = 0; i < lsm->lsm_md_stripe_count; i++) {
- struct lmv_oinfo *oinfo;
+ if (lsm) {
+ /* refill the right parent fid */
+ for (i = 0; i < lsm->lsm_md_stripe_count; i++) {
+ struct lmv_oinfo *oinfo;
- oinfo = &lsm->lsm_md_oinfo[i];
- if (oinfo->lmo_mds == op_data->op_mds) {
- *fid = oinfo->lmo_fid;
- break;
+ oinfo = &lsm->lsm_md_oinfo[i];
+ if (oinfo->lmo_mds == op_data->op_mds) {
+ *fid = oinfo->lmo_fid;
+ break;
+ }
}
- }
- /* Hmm, can not find the stripe by mdt_index(op_mds) */
- if (i == lsm->lsm_md_stripe_count)
- tgt = ERR_PTR(-EINVAL);
+ if (i == lsm->lsm_md_stripe_count)
+ *fid = lsm->lsm_md_oinfo[0].lmo_fid;
+ }
return tgt;
}
@@ -1728,30 +1718,9 @@ static int lmv_create(struct obd_export *exp, struct md_op_data *op_data,
return rc;
}
-static int lmv_done_writing(struct obd_export *exp,
- struct md_op_data *op_data,
- struct md_open_data *mod)
-{
- struct obd_device *obd = exp->exp_obd;
- struct lmv_obd *lmv = &obd->u.lmv;
- struct lmv_tgt_desc *tgt;
- int rc;
-
- rc = lmv_check_connect(obd);
- if (rc)
- return rc;
-
- tgt = lmv_find_target(lmv, &op_data->op_fid1);
- if (IS_ERR(tgt))
- return PTR_ERR(tgt);
-
- rc = md_done_writing(tgt->ltd_exp, op_data, mod);
- return rc;
-}
-
static int
lmv_enqueue(struct obd_export *exp, struct ldlm_enqueue_info *einfo,
- const ldlm_policy_data_t *policy,
+ const union ldlm_policy_data *policy,
struct lookup_intent *it, struct md_op_data *op_data,
struct lustre_handle *lockh, __u64 extra_lock_flags)
{
@@ -1847,7 +1816,7 @@ static int lmv_early_cancel(struct obd_export *exp, struct lmv_tgt_desc *tgt,
struct lu_fid *fid = md_op_data_fid(op_data, flag);
struct obd_device *obd = exp->exp_obd;
struct lmv_obd *lmv = &obd->u.lmv;
- ldlm_policy_data_t policy = { {0} };
+ union ldlm_policy_data policy = { { 0 } };
int rc = 0;
if (!fid_is_sane(fid))
@@ -1937,7 +1906,10 @@ static int lmv_rename(struct obd_export *exp, struct md_op_data *op_data,
{
struct obd_device *obd = exp->exp_obd;
struct lmv_obd *lmv = &obd->u.lmv;
+ struct obd_export *target_exp;
struct lmv_tgt_desc *src_tgt;
+ struct lmv_tgt_desc *tgt_tgt;
+ struct mdt_body *body;
int rc;
LASSERT(oldlen != 0);
@@ -1977,6 +1949,10 @@ static int lmv_rename(struct obd_export *exp, struct md_op_data *op_data,
if (rc)
return rc;
src_tgt = lmv_find_target(lmv, &op_data->op_fid3);
+ if (IS_ERR(src_tgt))
+ return PTR_ERR(src_tgt);
+
+ target_exp = src_tgt->ltd_exp;
} else {
if (op_data->op_mea1) {
struct lmv_stripe_md *lsm = op_data->op_mea1;
@@ -1985,29 +1961,27 @@ static int lmv_rename(struct obd_export *exp, struct md_op_data *op_data,
oldlen,
&op_data->op_fid1,
&op_data->op_mds);
- if (IS_ERR(src_tgt))
- return PTR_ERR(src_tgt);
} else {
src_tgt = lmv_find_target(lmv, &op_data->op_fid1);
- if (IS_ERR(src_tgt))
- return PTR_ERR(src_tgt);
-
- op_data->op_mds = src_tgt->ltd_idx;
}
+ if (IS_ERR(src_tgt))
+ return PTR_ERR(src_tgt);
if (op_data->op_mea2) {
struct lmv_stripe_md *lsm = op_data->op_mea2;
- const struct lmv_oinfo *oinfo;
- oinfo = lsm_name_to_stripe_info(lsm, new, newlen);
- if (IS_ERR(oinfo))
- return PTR_ERR(oinfo);
-
- op_data->op_fid2 = oinfo->lmo_fid;
+ tgt_tgt = lmv_locate_target_for_name(lmv, lsm, new,
+ newlen,
+ &op_data->op_fid2,
+ &op_data->op_mds);
+ } else {
+ tgt_tgt = lmv_find_target(lmv, &op_data->op_fid2);
}
+ if (IS_ERR(tgt_tgt))
+ return PTR_ERR(tgt_tgt);
+
+ target_exp = tgt_tgt->ltd_exp;
}
- if (IS_ERR(src_tgt))
- return PTR_ERR(src_tgt);
/*
* LOOKUP lock on src child (fid3) should also be cancelled for
@@ -2048,26 +2022,56 @@ static int lmv_rename(struct obd_export *exp, struct md_op_data *op_data,
return rc;
}
+retry_rename:
/*
* Cancel all the locks on tgt child (fid4).
*/
- if (fid_is_sane(&op_data->op_fid4))
+ if (fid_is_sane(&op_data->op_fid4)) {
+ struct lmv_tgt_desc *tgt;
+
rc = lmv_early_cancel(exp, NULL, op_data, src_tgt->ltd_idx,
LCK_EX, MDS_INODELOCK_FULL,
MF_MDC_CANCEL_FID4);
+ if (rc)
+ return rc;
+
+ tgt = lmv_find_target(lmv, &op_data->op_fid4);
+ if (IS_ERR(tgt))
+ return PTR_ERR(tgt);
- CDEBUG(D_INODE, DFID":m%d to "DFID"\n", PFID(&op_data->op_fid1),
- op_data->op_mds, PFID(&op_data->op_fid2));
+ /*
+ * Since the target child might be destroyed, and it might
+ * become orphan, and we can only check orphan on the local
+ * MDT right now, so we send rename request to the MDT where
+ * target child is located. If target child does not exist,
+ * then it will send the request to the target parent
+ */
+ target_exp = tgt->ltd_exp;
+ }
- rc = md_rename(src_tgt->ltd_exp, op_data, old, oldlen,
- new, newlen, request);
- return rc;
+ rc = md_rename(target_exp, op_data, old, oldlen, new, newlen, request);
+ if (rc && rc != -EREMOTE)
+ return rc;
+
+ body = req_capsule_server_get(&(*request)->rq_pill, &RMF_MDT_BODY);
+ if (!body)
+ return -EPROTO;
+
+ /* Not cross-ref case, just get out of here. */
+ if (likely(!(body->mbo_valid & OBD_MD_MDS)))
+ return rc;
+
+ CDEBUG(D_INODE, "%s: try rename to another MDT for " DFID "\n",
+ exp->exp_obd->obd_name, PFID(&body->mbo_fid1));
+
+ op_data->op_fid4 = body->mbo_fid1;
+ ptlrpc_req_finished(*request);
+ *request = NULL;
+ goto retry_rename;
}
static int lmv_setattr(struct obd_export *exp, struct md_op_data *op_data,
- void *ea, size_t ealen, void *ea2, size_t ea2len,
- struct ptlrpc_request **request,
- struct md_open_data **mod)
+ void *ea, size_t ealen, struct ptlrpc_request **request)
{
struct obd_device *obd = exp->exp_obd;
struct lmv_obd *lmv = &obd->u.lmv;
@@ -2086,10 +2090,7 @@ static int lmv_setattr(struct obd_export *exp, struct md_op_data *op_data,
if (IS_ERR(tgt))
return PTR_ERR(tgt);
- rc = md_setattr(tgt->ltd_exp, op_data, ea, ealen, ea2,
- ea2len, request, mod);
-
- return rc;
+ return md_setattr(tgt->ltd_exp, op_data, ea, ealen, request);
}
static int lmv_sync(struct obd_export *exp, const struct lu_fid *fid,
@@ -2623,23 +2624,10 @@ try_next_stripe:
goto retry_unlink;
}
-static int lmv_precleanup(struct obd_device *obd, enum obd_cleanup_stage stage)
+static int lmv_precleanup(struct obd_device *obd)
{
- struct lmv_obd *lmv = &obd->u.lmv;
-
- switch (stage) {
- case OBD_CLEANUP_EARLY:
- /* XXX: here should be calling obd_precleanup() down to
- * stack.
- */
- break;
- case OBD_CLEANUP_EXPORTS:
- fld_client_debugfs_fini(&lmv->lmv_fld);
- lprocfs_obd_cleanup(obd);
- break;
- default:
- break;
- }
+ fld_client_debugfs_fini(&obd->u.lmv.lmv_fld);
+ lprocfs_obd_cleanup(obd);
return 0;
}
@@ -2654,14 +2642,12 @@ static int lmv_precleanup(struct obd_device *obd, enum obd_cleanup_stage stage)
* \param[in] key identifier of key to get value for
* \param[in] vallen size of \a val
* \param[out] val pointer to storage location for value
- * \param[in] lsm optional striping metadata of object
*
* \retval 0 on success
* \retval negative negated errno on failure
*/
static int lmv_get_info(const struct lu_env *env, struct obd_export *exp,
- __u32 keylen, void *key, __u32 *vallen, void *val,
- struct lov_stripe_md *lsm)
+ __u32 keylen, void *key, __u32 *vallen, void *val)
{
struct obd_device *obd;
struct lmv_obd *lmv;
@@ -2693,7 +2679,7 @@ static int lmv_get_info(const struct lu_env *env, struct obd_export *exp,
continue;
if (!obd_get_info(env, tgt->ltd_exp, keylen, key,
- vallen, val, NULL))
+ vallen, val))
return 0;
}
return -EINVAL;
@@ -2709,7 +2695,7 @@ static int lmv_get_info(const struct lu_env *env, struct obd_export *exp,
* desc.
*/
rc = obd_get_info(env, lmv->tgts[0]->ltd_exp, keylen, key,
- vallen, val, NULL);
+ vallen, val);
if (!rc && KEY_IS(KEY_CONN_DATA))
exp->exp_connect_data = *(struct obd_connect_data *)val;
return rc;
@@ -2777,90 +2763,6 @@ static int lmv_set_info_async(const struct lu_env *env, struct obd_export *exp,
return -EINVAL;
}
-static int lmv_pack_md_v1(const struct lmv_stripe_md *lsm,
- struct lmv_mds_md_v1 *lmm1)
-{
- int cplen;
- int i;
-
- lmm1->lmv_magic = cpu_to_le32(lsm->lsm_md_magic);
- lmm1->lmv_stripe_count = cpu_to_le32(lsm->lsm_md_stripe_count);
- lmm1->lmv_master_mdt_index = cpu_to_le32(lsm->lsm_md_master_mdt_index);
- lmm1->lmv_hash_type = cpu_to_le32(lsm->lsm_md_hash_type);
- cplen = strlcpy(lmm1->lmv_pool_name, lsm->lsm_md_pool_name,
- sizeof(lmm1->lmv_pool_name));
- if (cplen >= sizeof(lmm1->lmv_pool_name))
- return -E2BIG;
-
- for (i = 0; i < lsm->lsm_md_stripe_count; i++)
- fid_cpu_to_le(&lmm1->lmv_stripe_fids[i],
- &lsm->lsm_md_oinfo[i].lmo_fid);
- return 0;
-}
-
-static int
-lmv_pack_md(union lmv_mds_md **lmmp, const struct lmv_stripe_md *lsm,
- int stripe_count)
-{
- int lmm_size = 0, rc = 0;
- bool allocated = false;
-
- LASSERT(lmmp);
-
- /* Free lmm */
- if (*lmmp && !lsm) {
- int stripe_cnt;
-
- stripe_cnt = lmv_mds_md_stripe_count_get(*lmmp);
- lmm_size = lmv_mds_md_size(stripe_cnt,
- le32_to_cpu((*lmmp)->lmv_magic));
- if (!lmm_size)
- return -EINVAL;
- kvfree(*lmmp);
- *lmmp = NULL;
- return 0;
- }
-
- /* Alloc lmm */
- if (!*lmmp && !lsm) {
- lmm_size = lmv_mds_md_size(stripe_count, LMV_MAGIC);
- LASSERT(lmm_size > 0);
- *lmmp = libcfs_kvzalloc(lmm_size, GFP_NOFS);
- if (!*lmmp)
- return -ENOMEM;
- lmv_mds_md_stripe_count_set(*lmmp, stripe_count);
- (*lmmp)->lmv_magic = cpu_to_le32(LMV_MAGIC);
- return lmm_size;
- }
-
- /* pack lmm */
- LASSERT(lsm);
- lmm_size = lmv_mds_md_size(lsm->lsm_md_stripe_count,
- lsm->lsm_md_magic);
- if (!*lmmp) {
- *lmmp = libcfs_kvzalloc(lmm_size, GFP_NOFS);
- if (!*lmmp)
- return -ENOMEM;
- allocated = true;
- }
-
- switch (lsm->lsm_md_magic) {
- case LMV_MAGIC_V1:
- rc = lmv_pack_md_v1(lsm, &(*lmmp)->lmv_md_v1);
- break;
- default:
- rc = -EINVAL;
- break;
- }
-
- if (rc && allocated) {
- kvfree(*lmmp);
- *lmmp = NULL;
- }
-
- return lmm_size;
-}
-
static int lmv_unpack_md_v1(struct obd_export *exp, struct lmv_stripe_md *lsm,
const struct lmv_mds_md_v1 *lmm1)
{
@@ -2903,8 +2805,8 @@ static int lmv_unpack_md_v1(struct obd_export *exp, struct lmv_stripe_md *lsm,
return rc;
}
-int lmv_unpack_md(struct obd_export *exp, struct lmv_stripe_md **lsmp,
- const union lmv_mds_md *lmm, int stripe_count)
+static int lmv_unpackmd(struct obd_export *exp, struct lmv_stripe_md **lsmp,
+ const union lmv_mds_md *lmm, size_t lmm_size)
{
struct lmv_stripe_md *lsm;
bool allocated = false;
@@ -2933,17 +2835,6 @@ int lmv_unpack_md(struct obd_export *exp, struct lmv_stripe_md **lsmp,
return 0;
}
- /* Alloc memmd */
- if (!lsm && !lmm) {
- lsm_size = lmv_stripe_md_size(stripe_count);
- lsm = libcfs_kvzalloc(lsm_size, GFP_NOFS);
- if (!lsm)
- return -ENOMEM;
- lsm->lsm_md_stripe_count = stripe_count;
- *lsmp = lsm;
- return 0;
- }
-
if (le32_to_cpu(lmm->lmv_magic) == LMV_MAGIC_STRIPE)
return -EPERM;
@@ -2991,38 +2882,17 @@ int lmv_unpack_md(struct obd_export *exp, struct lmv_stripe_md **lsmp,
}
return lsm_size;
}
-EXPORT_SYMBOL(lmv_unpack_md);
-static int lmv_unpackmd(struct obd_export *exp, struct lov_stripe_md **lsmp,
- struct lov_mds_md *lmm, int disk_len)
+void lmv_free_memmd(struct lmv_stripe_md *lsm)
{
- return lmv_unpack_md(exp, (struct lmv_stripe_md **)lsmp,
- (union lmv_mds_md *)lmm, disk_len);
-}
-
-static int lmv_packmd(struct obd_export *exp, struct lov_mds_md **lmmp,
- struct lov_stripe_md *lsm)
-{
- const struct lmv_stripe_md *lmv = (struct lmv_stripe_md *)lsm;
- struct obd_device *obd = exp->exp_obd;
- struct lmv_obd *lmv_obd = &obd->u.lmv;
- int stripe_count;
-
- if (!lmmp) {
- if (lsm)
- stripe_count = lmv->lsm_md_stripe_count;
- else
- stripe_count = lmv_obd->desc.ld_tgt_count;
-
- return lmv_mds_md_size(stripe_count, LMV_MAGIC_V1);
- }
-
- return lmv_pack_md((union lmv_mds_md **)lmmp, lmv, 0);
+ lmv_unpackmd(NULL, &lsm, NULL, 0);
}
+EXPORT_SYMBOL(lmv_free_memmd);
static int lmv_cancel_unused(struct obd_export *exp, const struct lu_fid *fid,
- ldlm_policy_data_t *policy, enum ldlm_mode mode,
- enum ldlm_cancel_flags flags, void *opaque)
+ union ldlm_policy_data *policy,
+ enum ldlm_mode mode, enum ldlm_cancel_flags flags,
+ void *opaque)
{
struct obd_device *obd = exp->exp_obd;
struct lmv_obd *lmv = &obd->u.lmv;
@@ -3064,7 +2934,7 @@ static int lmv_set_lock_data(struct obd_export *exp,
static enum ldlm_mode lmv_lock_match(struct obd_export *exp, __u64 flags,
const struct lu_fid *fid,
enum ldlm_type type,
- ldlm_policy_data_t *policy,
+ union ldlm_policy_data *policy,
enum ldlm_mode mode,
struct lustre_handle *lockh)
{
@@ -3271,32 +3141,6 @@ static int lmv_quotactl(struct obd_device *unused, struct obd_export *exp,
return rc;
}
-static int lmv_quotacheck(struct obd_device *unused, struct obd_export *exp,
- struct obd_quotactl *oqctl)
-{
- struct obd_device *obd = class_exp2obd(exp);
- struct lmv_obd *lmv = &obd->u.lmv;
- struct lmv_tgt_desc *tgt;
- int rc = 0;
- u32 i;
-
- for (i = 0; i < lmv->desc.ld_tgt_count; i++) {
- int err;
-
- tgt = lmv->tgts[i];
- if (!tgt || !tgt->ltd_exp || !tgt->ltd_active) {
- CERROR("lmv idx %d inactive\n", i);
- return -EIO;
- }
-
- err = obd_quotacheck(tgt->ltd_exp, oqctl);
- if (err && !rc)
- rc = err;
- }
-
- return rc;
-}
-
static int lmv_merge_attr(struct obd_export *exp,
const struct lmv_stripe_md *lsm,
struct cl_attr *attr,
@@ -3349,12 +3193,9 @@ static struct obd_ops lmv_obd_ops = {
.statfs = lmv_statfs,
.get_info = lmv_get_info,
.set_info_async = lmv_set_info_async,
- .packmd = lmv_packmd,
- .unpackmd = lmv_unpackmd,
.notify = lmv_notify,
.get_uuid = lmv_get_uuid,
.iocontrol = lmv_iocontrol,
- .quotacheck = lmv_quotacheck,
.quotactl = lmv_quotactl
};
@@ -3363,7 +3204,6 @@ static struct md_ops lmv_md_ops = {
.null_inode = lmv_null_inode,
.close = lmv_close,
.create = lmv_create,
- .done_writing = lmv_done_writing,
.enqueue = lmv_enqueue,
.getattr = lmv_getattr,
.getxattr = lmv_getxattr,
@@ -3388,6 +3228,7 @@ static struct md_ops lmv_md_ops = {
.intent_getattr_async = lmv_intent_getattr_async,
.revalidate_lock = lmv_revalidate_lock,
.get_fid_from_lsm = lmv_get_fid_from_lsm,
+ .unpackmd = lmv_unpackmd,
};
static int __init lmv_init(void)
diff --git a/drivers/staging/lustre/lustre/lov/lov_cl_internal.h b/drivers/staging/lustre/lustre/lov/lov_cl_internal.h
index 4d2b7d303fea..c49a34bf10e5 100644
--- a/drivers/staging/lustre/lustre/lov/lov_cl_internal.h
+++ b/drivers/staging/lustre/lustre/lov/lov_cl_internal.h
@@ -217,7 +217,7 @@ struct lov_object {
union lov_layout_state {
struct lov_layout_raid0 {
- unsigned lo_nr;
+ unsigned int lo_nr;
/**
* When this is true, lov_object::lo_attr contains
* valid up to date attributes for a top-level
@@ -412,7 +412,6 @@ struct lov_io_sub {
int sub_refcheck;
int sub_refcheck2;
int sub_reenter;
- void *sub_cookie;
};
/**
@@ -473,20 +472,6 @@ struct lov_session {
struct lov_sublock_env ls_subenv;
};
-/**
- * State of transfer for lov.
- */
-struct lov_req {
- struct cl_req_slice lr_cl;
-};
-
-/**
- * State of transfer for lovsub.
- */
-struct lovsub_req {
- struct cl_req_slice lsrq_cl;
-};
-
extern struct lu_device_type lov_device_type;
extern struct lu_device_type lovsub_device_type;
@@ -497,11 +482,9 @@ extern struct kmem_cache *lov_lock_kmem;
extern struct kmem_cache *lov_object_kmem;
extern struct kmem_cache *lov_thread_kmem;
extern struct kmem_cache *lov_session_kmem;
-extern struct kmem_cache *lov_req_kmem;
extern struct kmem_cache *lovsub_lock_kmem;
extern struct kmem_cache *lovsub_object_kmem;
-extern struct kmem_cache *lovsub_req_kmem;
extern struct kmem_cache *lov_lock_link_kmem;
@@ -700,11 +683,6 @@ static inline struct lov_page *cl2lov_page(const struct cl_page_slice *slice)
return container_of0(slice, struct lov_page, lps_cl);
}
-static inline struct lov_req *cl2lov_req(const struct cl_req_slice *slice)
-{
- return container_of0(slice, struct lov_req, lr_cl);
-}
-
static inline struct lovsub_page *
cl2lovsub_page(const struct cl_page_slice *slice)
{
@@ -712,11 +690,6 @@ cl2lovsub_page(const struct cl_page_slice *slice)
return container_of0(slice, struct lovsub_page, lsb_cl);
}
-static inline struct lovsub_req *cl2lovsub_req(const struct cl_req_slice *slice)
-{
- return container_of0(slice, struct lovsub_req, lsrq_cl);
-}
-
static inline struct lov_io *cl2lov_io(const struct lu_env *env,
const struct cl_io_slice *ios)
{
diff --git a/drivers/staging/lustre/lustre/lov/lov_dev.c b/drivers/staging/lustre/lustre/lov/lov_dev.c
index 056ae2ed88e8..7301f6e579a1 100644
--- a/drivers/staging/lustre/lustre/lov/lov_dev.c
+++ b/drivers/staging/lustre/lustre/lov/lov_dev.c
@@ -46,11 +46,9 @@ struct kmem_cache *lov_lock_kmem;
struct kmem_cache *lov_object_kmem;
struct kmem_cache *lov_thread_kmem;
struct kmem_cache *lov_session_kmem;
-struct kmem_cache *lov_req_kmem;
struct kmem_cache *lovsub_lock_kmem;
struct kmem_cache *lovsub_object_kmem;
-struct kmem_cache *lovsub_req_kmem;
struct kmem_cache *lov_lock_link_kmem;
@@ -79,11 +77,6 @@ struct lu_kmem_descr lov_caches[] = {
.ckd_size = sizeof(struct lov_session)
},
{
- .ckd_cache = &lov_req_kmem,
- .ckd_name = "lov_req_kmem",
- .ckd_size = sizeof(struct lov_req)
- },
- {
.ckd_cache = &lovsub_lock_kmem,
.ckd_name = "lovsub_lock_kmem",
.ckd_size = sizeof(struct lovsub_lock)
@@ -94,11 +87,6 @@ struct lu_kmem_descr lov_caches[] = {
.ckd_size = sizeof(struct lovsub_object)
},
{
- .ckd_cache = &lovsub_req_kmem,
- .ckd_name = "lovsub_req_kmem",
- .ckd_size = sizeof(struct lovsub_req)
- },
- {
.ckd_cache = &lov_lock_link_kmem,
.ckd_name = "lov_lock_link_kmem",
.ckd_size = sizeof(struct lov_lock_link)
@@ -110,25 +98,6 @@ struct lu_kmem_descr lov_caches[] = {
/*****************************************************************************
*
- * Lov transfer operations.
- *
- */
-
-static void lov_req_completion(const struct lu_env *env,
- const struct cl_req_slice *slice, int ioret)
-{
- struct lov_req *lr;
-
- lr = cl2lov_req(slice);
- kmem_cache_free(lov_req_kmem, lr);
-}
-
-static const struct cl_req_operations lov_req_ops = {
- .cro_completion = lov_req_completion
-};
-
-/*****************************************************************************
- *
* Lov device and device type functions.
*
*/
@@ -248,26 +217,6 @@ static int lov_device_init(const struct lu_env *env, struct lu_device *d,
return rc;
}
-static int lov_req_init(const struct lu_env *env, struct cl_device *dev,
- struct cl_req *req)
-{
- struct lov_req *lr;
- int result;
-
- lr = kmem_cache_zalloc(lov_req_kmem, GFP_NOFS);
- if (lr) {
- cl_req_slice_add(req, &lr->lr_cl, dev, &lov_req_ops);
- result = 0;
- } else {
- result = -ENOMEM;
- }
- return result;
-}
-
-static const struct cl_device_operations lov_cl_ops = {
- .cdo_req_init = lov_req_init
-};
-
static void lov_emerg_free(struct lov_device_emerg **emrg, int nr)
{
int i;
@@ -478,7 +427,6 @@ static struct lu_device *lov_device_alloc(const struct lu_env *env,
cl_device_init(&ld->ld_cl, t);
d = lov2lu_dev(ld);
d->ld_ops = &lov_lu_ops;
- ld->ld_cl.cd_ops = &lov_cl_ops;
mutex_init(&ld->ld_mutex);
lockdep_set_class(&ld->ld_mutex, &cl_lov_device_mutex_class);
diff --git a/drivers/staging/lustre/lustre/lov/lov_ea.c b/drivers/staging/lustre/lustre/lov/lov_ea.c
index 214c561767e0..ac0bf64c08c1 100644
--- a/drivers/staging/lustre/lustre/lov/lov_ea.c
+++ b/drivers/staging/lustre/lustre/lov/lov_ea.c
@@ -76,18 +76,19 @@ static int lsm_lmm_verify_common(struct lov_mds_md *lmm, int lmm_bytes,
return 0;
}
-struct lov_stripe_md *lsm_alloc_plain(__u16 stripe_count, int *size)
+struct lov_stripe_md *lsm_alloc_plain(u16 stripe_count)
{
+ size_t oinfo_ptrs_size, lsm_size;
struct lov_stripe_md *lsm;
struct lov_oinfo *loi;
- int i, oinfo_ptrs_size;
+ int i;
LASSERT(stripe_count <= LOV_MAX_STRIPE_COUNT);
oinfo_ptrs_size = sizeof(struct lov_oinfo *) * stripe_count;
- *size = sizeof(struct lov_stripe_md) + oinfo_ptrs_size;
+ lsm_size = sizeof(*lsm) + oinfo_ptrs_size;
- lsm = libcfs_kvzalloc(*size, GFP_NOFS);
+ lsm = libcfs_kvzalloc(lsm_size, GFP_NOFS);
if (!lsm)
return NULL;
@@ -117,9 +118,43 @@ void lsm_free_plain(struct lov_stripe_md *lsm)
kvfree(lsm);
}
-static void lsm_unpackmd_common(struct lov_stripe_md *lsm,
- struct lov_mds_md *lmm)
+/*
+ * Find minimum stripe maxbytes value. For inactive or
+ * reconnecting targets use LUSTRE_EXT3_STRIPE_MAXBYTES.
+ */
+static loff_t lov_tgt_maxbytes(struct lov_tgt_desc *tgt)
+{
+ loff_t maxbytes = LUSTRE_EXT3_STRIPE_MAXBYTES;
+ struct obd_import *imp;
+
+ if (!tgt->ltd_active)
+ return maxbytes;
+
+ imp = tgt->ltd_obd->u.cli.cl_import;
+ if (!imp)
+ return maxbytes;
+
+ spin_lock(&imp->imp_lock);
+ if (imp->imp_state == LUSTRE_IMP_FULL &&
+ (imp->imp_connect_data.ocd_connect_flags & OBD_CONNECT_MAXBYTES) &&
+ imp->imp_connect_data.ocd_maxbytes > 0)
+ maxbytes = imp->imp_connect_data.ocd_maxbytes;
+
+ spin_unlock(&imp->imp_lock);
+
+ return maxbytes;
+}
+
+static int lsm_unpackmd_common(struct lov_obd *lov,
+ struct lov_stripe_md *lsm,
+ struct lov_mds_md *lmm,
+ struct lov_ost_data_v1 *objects)
{
+ loff_t stripe_maxbytes = LLONG_MAX;
+ unsigned int stripe_count;
+ struct lov_oinfo *loi;
+ unsigned int i;
+
/*
* This supposes lov_mds_md_v1/v3 first fields are
* are the same
@@ -129,11 +164,54 @@ static void lsm_unpackmd_common(struct lov_stripe_md *lsm,
lsm->lsm_pattern = le32_to_cpu(lmm->lmm_pattern);
lsm->lsm_layout_gen = le16_to_cpu(lmm->lmm_layout_gen);
lsm->lsm_pool_name[0] = '\0';
+
+ stripe_count = lsm_is_released(lsm) ? 0 : lsm->lsm_stripe_count;
+
+ for (i = 0; i < stripe_count; i++) {
+ loff_t tgt_bytes;
+
+ loi = lsm->lsm_oinfo[i];
+ ostid_le_to_cpu(&objects[i].l_ost_oi, &loi->loi_oi);
+ loi->loi_ost_idx = le32_to_cpu(objects[i].l_ost_idx);
+ loi->loi_ost_gen = le32_to_cpu(objects[i].l_ost_gen);
+ if (lov_oinfo_is_dummy(loi))
+ continue;
+
+ if (loi->loi_ost_idx >= lov->desc.ld_tgt_count &&
+ !lov2obd(lov)->obd_process_conf) {
+ CERROR("%s: OST index %d more than OST count %d\n",
+ (char *)lov->desc.ld_uuid.uuid,
+ loi->loi_ost_idx, lov->desc.ld_tgt_count);
+ lov_dump_lmm_v1(D_WARNING, lmm);
+ return -EINVAL;
+ }
+
+ if (!lov->lov_tgts[loi->loi_ost_idx]) {
+ CERROR("%s: OST index %d missing\n",
+ (char *)lov->desc.ld_uuid.uuid,
+ loi->loi_ost_idx);
+ lov_dump_lmm_v1(D_WARNING, lmm);
+ continue;
+ }
+
+ tgt_bytes = lov_tgt_maxbytes(lov->lov_tgts[loi->loi_ost_idx]);
+ stripe_maxbytes = min_t(loff_t, stripe_maxbytes, tgt_bytes);
+ }
+
+ if (stripe_maxbytes == LLONG_MAX)
+ stripe_maxbytes = LUSTRE_EXT3_STRIPE_MAXBYTES;
+
+ if (!lsm->lsm_stripe_count)
+ lsm->lsm_maxbytes = stripe_maxbytes * lov->desc.ld_tgt_count;
+ else
+ lsm->lsm_maxbytes = stripe_maxbytes * lsm->lsm_stripe_count;
+
+ return 0;
}
static void
lsm_stripe_by_index_plain(struct lov_stripe_md *lsm, int *stripeno,
- u64 *lov_off, u64 *swidth)
+ loff_t *lov_off, loff_t *swidth)
{
if (swidth)
*swidth = (u64)lsm->lsm_stripe_size * lsm->lsm_stripe_count;
@@ -141,36 +219,12 @@ lsm_stripe_by_index_plain(struct lov_stripe_md *lsm, int *stripeno,
static void
lsm_stripe_by_offset_plain(struct lov_stripe_md *lsm, int *stripeno,
- u64 *lov_off, u64 *swidth)
+ loff_t *lov_off, loff_t *swidth)
{
if (swidth)
*swidth = (u64)lsm->lsm_stripe_size * lsm->lsm_stripe_count;
}
-/* Find minimum stripe maxbytes value. For inactive or
- * reconnecting targets use LUSTRE_EXT3_STRIPE_MAXBYTES.
- */
-static void lov_tgt_maxbytes(struct lov_tgt_desc *tgt, __u64 *stripe_maxbytes)
-{
- struct obd_import *imp = tgt->ltd_obd->u.cli.cl_import;
-
- if (!imp || !tgt->ltd_active) {
- *stripe_maxbytes = LUSTRE_EXT3_STRIPE_MAXBYTES;
- return;
- }
-
- spin_lock(&imp->imp_lock);
- if (imp->imp_state == LUSTRE_IMP_FULL &&
- (imp->imp_connect_data.ocd_connect_flags & OBD_CONNECT_MAXBYTES) &&
- imp->imp_connect_data.ocd_maxbytes > 0) {
- if (*stripe_maxbytes > imp->imp_connect_data.ocd_maxbytes)
- *stripe_maxbytes = imp->imp_connect_data.ocd_maxbytes;
- } else {
- *stripe_maxbytes = LUSTRE_EXT3_STRIPE_MAXBYTES;
- }
- spin_unlock(&imp->imp_lock);
-}
-
static int lsm_lmm_verify_v1(struct lov_mds_md_v1 *lmm, int lmm_bytes,
__u16 *stripe_count)
{
@@ -197,45 +251,7 @@ static int lsm_lmm_verify_v1(struct lov_mds_md_v1 *lmm, int lmm_bytes,
static int lsm_unpackmd_v1(struct lov_obd *lov, struct lov_stripe_md *lsm,
struct lov_mds_md_v1 *lmm)
{
- struct lov_oinfo *loi;
- int i;
- int stripe_count;
- __u64 stripe_maxbytes = OBD_OBJECT_EOF;
-
- lsm_unpackmd_common(lsm, lmm);
-
- stripe_count = lsm_is_released(lsm) ? 0 : lsm->lsm_stripe_count;
-
- for (i = 0; i < stripe_count; i++) {
- /* XXX LOV STACKING call down to osc_unpackmd() */
- loi = lsm->lsm_oinfo[i];
- ostid_le_to_cpu(&lmm->lmm_objects[i].l_ost_oi, &loi->loi_oi);
- loi->loi_ost_idx = le32_to_cpu(lmm->lmm_objects[i].l_ost_idx);
- loi->loi_ost_gen = le32_to_cpu(lmm->lmm_objects[i].l_ost_gen);
- if (lov_oinfo_is_dummy(loi))
- continue;
-
- if (loi->loi_ost_idx >= lov->desc.ld_tgt_count) {
- CERROR("OST index %d more than OST count %d\n",
- loi->loi_ost_idx, lov->desc.ld_tgt_count);
- lov_dump_lmm_v1(D_WARNING, lmm);
- return -EINVAL;
- }
- if (!lov->lov_tgts[loi->loi_ost_idx]) {
- CERROR("OST index %d missing\n", loi->loi_ost_idx);
- lov_dump_lmm_v1(D_WARNING, lmm);
- return -EINVAL;
- }
- /* calculate the minimum stripe max bytes */
- lov_tgt_maxbytes(lov->lov_tgts[loi->loi_ost_idx],
- &stripe_maxbytes);
- }
-
- lsm->lsm_maxbytes = stripe_maxbytes * lsm->lsm_stripe_count;
- if (lsm->lsm_stripe_count == 0)
- lsm->lsm_maxbytes = stripe_maxbytes * lov->desc.ld_tgt_count;
-
- return 0;
+ return lsm_unpackmd_common(lov, lsm, lmm, lmm->lmm_objects);
}
const struct lsm_operations lsm_v1_ops = {
@@ -275,55 +291,21 @@ static int lsm_lmm_verify_v3(struct lov_mds_md *lmmv1, int lmm_bytes,
}
static int lsm_unpackmd_v3(struct lov_obd *lov, struct lov_stripe_md *lsm,
- struct lov_mds_md *lmmv1)
+ struct lov_mds_md *lmm)
{
- struct lov_mds_md_v3 *lmm;
- struct lov_oinfo *loi;
- int i;
- int stripe_count;
- __u64 stripe_maxbytes = OBD_OBJECT_EOF;
- int cplen = 0;
+ struct lov_mds_md_v3 *lmm_v3 = (struct lov_mds_md_v3 *)lmm;
+ size_t cplen = 0;
+ int rc;
- lmm = (struct lov_mds_md_v3 *)lmmv1;
+ rc = lsm_unpackmd_common(lov, lsm, lmm, lmm_v3->lmm_objects);
+ if (rc)
+ return rc;
- lsm_unpackmd_common(lsm, (struct lov_mds_md_v1 *)lmm);
-
- stripe_count = lsm_is_released(lsm) ? 0 : lsm->lsm_stripe_count;
-
- cplen = strlcpy(lsm->lsm_pool_name, lmm->lmm_pool_name,
+ cplen = strlcpy(lsm->lsm_pool_name, lmm_v3->lmm_pool_name,
sizeof(lsm->lsm_pool_name));
if (cplen >= sizeof(lsm->lsm_pool_name))
return -E2BIG;
- for (i = 0; i < stripe_count; i++) {
- /* XXX LOV STACKING call down to osc_unpackmd() */
- loi = lsm->lsm_oinfo[i];
- ostid_le_to_cpu(&lmm->lmm_objects[i].l_ost_oi, &loi->loi_oi);
- loi->loi_ost_idx = le32_to_cpu(lmm->lmm_objects[i].l_ost_idx);
- loi->loi_ost_gen = le32_to_cpu(lmm->lmm_objects[i].l_ost_gen);
- if (lov_oinfo_is_dummy(loi))
- continue;
-
- if (loi->loi_ost_idx >= lov->desc.ld_tgt_count) {
- CERROR("OST index %d more than OST count %d\n",
- loi->loi_ost_idx, lov->desc.ld_tgt_count);
- lov_dump_lmm_v3(D_WARNING, lmm);
- return -EINVAL;
- }
- if (!lov->lov_tgts[loi->loi_ost_idx]) {
- CERROR("OST index %d missing\n", loi->loi_ost_idx);
- lov_dump_lmm_v3(D_WARNING, lmm);
- return -EINVAL;
- }
- /* calculate the minimum stripe max bytes */
- lov_tgt_maxbytes(lov->lov_tgts[loi->loi_ost_idx],
- &stripe_maxbytes);
- }
-
- lsm->lsm_maxbytes = stripe_maxbytes * lsm->lsm_stripe_count;
- if (lsm->lsm_stripe_count == 0)
- lsm->lsm_maxbytes = stripe_maxbytes * lov->desc.ld_tgt_count;
-
return 0;
}
diff --git a/drivers/staging/lustre/lustre/lov/lov_internal.h b/drivers/staging/lustre/lustre/lov/lov_internal.h
index 07e5ede3e952..774499c74daa 100644
--- a/drivers/staging/lustre/lustre/lov/lov_internal.h
+++ b/drivers/staging/lustre/lustre/lov/lov_internal.h
@@ -36,6 +36,77 @@
#include "../include/obd_class.h"
#include "../include/lustre/lustre_user.h"
+/*
+ * If we are unable to get the maximum object size from the OST in
+ * ocd_maxbytes using OBD_CONNECT_MAXBYTES, then we fall back to using
+ * the old maximum object size from ext3.
+ */
+#define LUSTRE_EXT3_STRIPE_MAXBYTES 0x1fffffff000ULL
+
+struct lov_stripe_md {
+ atomic_t lsm_refc;
+ spinlock_t lsm_lock;
+ pid_t lsm_lock_owner; /* debugging */
+
+ /*
+ * maximum possible file size, might change as OSTs status changes,
+ * e.g. disconnected, deactivated
+ */
+ loff_t lsm_maxbytes;
+ struct ost_id lsm_oi;
+ u32 lsm_magic;
+ u32 lsm_stripe_size;
+ u32 lsm_pattern; /* RAID0, RAID1, released, ... */
+ u16 lsm_stripe_count;
+ u16 lsm_layout_gen;
+ char lsm_pool_name[LOV_MAXPOOLNAME + 1];
+ struct lov_oinfo *lsm_oinfo[0];
+};
+
+static inline bool lsm_is_released(struct lov_stripe_md *lsm)
+{
+ return !!(lsm->lsm_pattern & LOV_PATTERN_F_RELEASED);
+}
+
+static inline bool lsm_has_objects(struct lov_stripe_md *lsm)
+{
+ if (!lsm)
+ return false;
+
+ if (lsm_is_released(lsm))
+ return false;
+
+ return true;
+}
+
+struct lsm_operations {
+ void (*lsm_free)(struct lov_stripe_md *);
+ void (*lsm_stripe_by_index)(struct lov_stripe_md *, int *, loff_t *,
+ loff_t *);
+ void (*lsm_stripe_by_offset)(struct lov_stripe_md *, int *, loff_t *,
+ loff_t *);
+ int (*lsm_lmm_verify)(struct lov_mds_md *lmm, int lmm_bytes,
+ u16 *stripe_count);
+ int (*lsm_unpackmd)(struct lov_obd *lov, struct lov_stripe_md *lsm,
+ struct lov_mds_md *lmm);
+};
+
+extern const struct lsm_operations lsm_v1_ops;
+extern const struct lsm_operations lsm_v3_ops;
+
+static inline const struct lsm_operations *lsm_op_find(int magic)
+{
+ switch (magic) {
+ case LOV_MAGIC_V1:
+ return &lsm_v1_ops;
+ case LOV_MAGIC_V3:
+ return &lsm_v3_ops;
+ default:
+ CERROR("unrecognized lsm_magic %08x\n", magic);
+ return NULL;
+ }
+}
+
/* lov_do_div64(a, b) returns a % b, and a = a / b.
* The 32-bit code is LOV-specific due to knowing about stripe limits in
* order to reduce the divisor to a 32-bit number. If the divisor is
@@ -110,8 +181,6 @@ struct lov_request_set {
atomic_t set_completes;
atomic_t set_success;
atomic_t set_finish_checked;
- struct llog_cookie *set_cookies;
- int set_cookie_sent;
struct list_head set_list;
wait_queue_head_t set_waitq;
};
@@ -132,8 +201,6 @@ static inline void lov_put_reqset(struct lov_request_set *set)
(char *)((lv)->lov_tgts[index]->ltd_uuid.uuid)
/* lov_merge.c */
-void lov_merge_attrs(struct obdo *tgt, struct obdo *src, u64 valid,
- struct lov_stripe_md *lsm, int stripeno, int *set);
int lov_merge_lvb_kms(struct lov_stripe_md *lsm,
struct ost_lvb *lvb, __u64 *kms_place);
@@ -150,17 +217,9 @@ pgoff_t lov_stripe_pgoff(struct lov_stripe_md *lsm, pgoff_t stripe_index,
int stripe);
/* lov_request.c */
-int lov_update_common_set(struct lov_request_set *set,
- struct lov_request *req, int rc);
int lov_prep_getattr_set(struct obd_export *exp, struct obd_info *oinfo,
struct lov_request_set **reqset);
int lov_fini_getattr_set(struct lov_request_set *set);
-int lov_prep_setattr_set(struct obd_export *exp, struct obd_info *oinfo,
- struct obd_trans_info *oti,
- struct lov_request_set **reqset);
-int lov_update_setattr_set(struct lov_request_set *set,
- struct lov_request *req, int rc);
-int lov_fini_setattr_set(struct lov_request_set *set);
int lov_prep_statfs_set(struct obd_device *obd, struct obd_info *oinfo,
struct lov_request_set **reqset);
int lov_fini_statfs(struct obd_device *obd, struct obd_statfs *osfs,
@@ -186,12 +245,10 @@ int lov_del_target(struct obd_device *obd, __u32 index,
struct obd_uuid *uuidp, int gen);
/* lov_pack.c */
-int lov_packmd(struct obd_export *exp, struct lov_mds_md **lmm,
- struct lov_stripe_md *lsm);
-int lov_unpackmd(struct obd_export *exp, struct lov_stripe_md **lsmp,
- struct lov_mds_md *lmm, int lmm_bytes);
-int lov_alloc_memmd(struct lov_stripe_md **lsmp, __u16 stripe_count,
- int pattern, int magic);
+ssize_t lov_lsm_pack(const struct lov_stripe_md *lsm, void *buf,
+ size_t buf_size);
+struct lov_stripe_md *lov_unpackmd(struct lov_obd *lov, struct lov_mds_md *lmm,
+ size_t lmm_size);
int lov_free_memmd(struct lov_stripe_md **lsmp);
void lov_dump_lmm_v1(int level, struct lov_mds_md_v1 *lmm);
@@ -199,7 +256,7 @@ void lov_dump_lmm_v3(int level, struct lov_mds_md_v3 *lmm);
void lov_dump_lmm_common(int level, void *lmmp);
/* lov_ea.c */
-struct lov_stripe_md *lsm_alloc_plain(__u16 stripe_count, int *size);
+struct lov_stripe_md *lsm_alloc_plain(u16 stripe_count);
void lsm_free_plain(struct lov_stripe_md *lsm);
void dump_lsm(unsigned int level, const struct lov_stripe_md *lsm);
@@ -244,4 +301,9 @@ static inline bool lov_oinfo_is_dummy(const struct lov_oinfo *loi)
return false;
}
+static inline struct obd_device *lov2obd(const struct lov_obd *lov)
+{
+ return container_of0(lov, struct obd_device, u.lov);
+}
+
#endif
diff --git a/drivers/staging/lustre/lustre/lov/lov_io.c b/drivers/staging/lustre/lustre/lov/lov_io.c
index d10157985ed9..002326c282a7 100644
--- a/drivers/staging/lustre/lustre/lov/lov_io.c
+++ b/drivers/staging/lustre/lustre/lov/lov_io.c
@@ -86,6 +86,8 @@ static void lov_io_sub_inherit(struct cl_io *io, struct lov_io *lio,
switch (io->ci_type) {
case CIT_SETATTR: {
io->u.ci_setattr.sa_attr = parent->u.ci_setattr.sa_attr;
+ io->u.ci_setattr.sa_attr_flags =
+ parent->u.ci_setattr.sa_attr_flags;
io->u.ci_setattr.sa_valid = parent->u.ci_setattr.sa_valid;
io->u.ci_setattr.sa_stripe_index = stripe;
io->u.ci_setattr.sa_parent_fid =
@@ -98,6 +100,12 @@ static void lov_io_sub_inherit(struct cl_io *io, struct lov_io *lio,
}
break;
}
+ case CIT_DATA_VERSION: {
+ io->u.ci_data_version.dv_data_version = 0;
+ io->u.ci_data_version.dv_flags =
+ parent->u.ci_data_version.dv_flags;
+ break;
+ }
case CIT_FAULT: {
struct cl_object *obj = parent->ci_obj;
loff_t off = cl_offset(obj, parent->u.ci_fault.ft_index);
@@ -159,12 +167,7 @@ static int lov_io_sub_init(const struct lu_env *env, struct lov_io *lio,
sub->sub_env = ld->ld_emrg[stripe]->emrg_env;
sub->sub_borrowed = 1;
} else {
- void *cookie;
-
- /* obtain new environment */
- cookie = cl_env_reenter();
sub->sub_env = cl_env_get(&sub->sub_refcheck);
- cl_env_reexit(cookie);
if (IS_ERR(sub->sub_env))
result = PTR_ERR(sub->sub_env);
@@ -337,6 +340,11 @@ static int lov_io_slice_init(struct lov_io *lio, struct lov_object *obj,
lio->lis_endpos = OBD_OBJECT_EOF;
break;
+ case CIT_DATA_VERSION:
+ lio->lis_pos = 0;
+ lio->lis_endpos = OBD_OBJECT_EOF;
+ break;
+
case CIT_FAULT: {
pgoff_t index = io->u.ci_fault.ft_index;
@@ -514,6 +522,24 @@ static int lov_io_end_wrapper(const struct lu_env *env, struct cl_io *io)
return 0;
}
+static void
+lov_io_data_version_end(const struct lu_env *env, const struct cl_io_slice *ios)
+{
+ struct lov_io *lio = cl2lov_io(env, ios);
+ struct cl_io *parent = lio->lis_cl.cis_io;
+ struct lov_io_sub *sub;
+
+ list_for_each_entry(sub, &lio->lis_active, sub_linkage) {
+ lov_io_end_wrapper(env, sub->sub_io);
+
+ parent->u.ci_data_version.dv_data_version +=
+ sub->sub_io->u.ci_data_version.dv_data_version;
+
+ if (!parent->ci_result)
+ parent->ci_result = sub->sub_io->ci_result;
+ }
+}
+
static int lov_io_iter_fini_wrapper(const struct lu_env *env, struct cl_io *io)
{
cl_io_iter_fini(env, io);
@@ -555,6 +581,65 @@ static void lov_io_unlock(const struct lu_env *env,
LASSERT(rc == 0);
}
+static int lov_io_read_ahead(const struct lu_env *env,
+ const struct cl_io_slice *ios,
+ pgoff_t start, struct cl_read_ahead *ra)
+{
+ struct lov_io *lio = cl2lov_io(env, ios);
+ struct lov_object *loo = lio->lis_object;
+ struct cl_object *obj = lov2cl(loo);
+ struct lov_layout_raid0 *r0 = lov_r0(loo);
+ unsigned int pps; /* pages per stripe */
+ struct lov_io_sub *sub;
+ pgoff_t ra_end;
+ loff_t suboff;
+ int stripe;
+ int rc;
+
+ stripe = lov_stripe_number(loo->lo_lsm, cl_offset(obj, start));
+ if (unlikely(!r0->lo_sub[stripe]))
+ return -EIO;
+
+ sub = lov_sub_get(env, lio, stripe);
+ if (IS_ERR(sub))
+ return PTR_ERR(sub);
+
+ lov_stripe_offset(loo->lo_lsm, cl_offset(obj, start), stripe, &suboff);
+ rc = cl_io_read_ahead(sub->sub_env, sub->sub_io,
+ cl_index(lovsub2cl(r0->lo_sub[stripe]), suboff),
+ ra);
+ lov_sub_put(sub);
+
+ CDEBUG(D_READA, DFID " cra_end = %lu, stripes = %d, rc = %d\n",
+ PFID(lu_object_fid(lov2lu(loo))), ra->cra_end, r0->lo_nr, rc);
+ if (rc)
+ return rc;
+
+ /**
+ * Adjust the stripe index by layout of raid0. ra->cra_end is
+ * the maximum page index covered by an underlying DLM lock.
+ * This function converts cra_end from stripe level to file
+ * level, and make sure it's not beyond stripe boundary.
+ */
+ if (r0->lo_nr == 1) /* single stripe file */
+ return 0;
+
+ /* cra_end is stripe level, convert it into file level */
+ ra_end = ra->cra_end;
+ if (ra_end != CL_PAGE_EOF)
+ ra_end = lov_stripe_pgoff(loo->lo_lsm, ra_end, stripe);
+
+ pps = loo->lo_lsm->lsm_stripe_size >> PAGE_SHIFT;
+
+ CDEBUG(D_READA, DFID " max_index = %lu, pps = %u, stripe_size = %u, stripe no = %u, start index = %lu\n",
+ PFID(lu_object_fid(lov2lu(loo))), ra_end, pps,
+ loo->lo_lsm->lsm_stripe_size, stripe, start);
+
+ /* never exceed the end of the stripe */
+ ra->cra_end = min_t(pgoff_t, ra_end, start + pps - start % pps - 1);
+ return 0;
+}
+
/**
* lov implementation of cl_operations::cio_submit() method. It takes a list
* of pages in \a queue, splits it into per-stripe sub-lists, invokes
@@ -779,6 +864,15 @@ static const struct cl_io_operations lov_io_ops = {
.cio_start = lov_io_start,
.cio_end = lov_io_end
},
+ [CIT_DATA_VERSION] = {
+ .cio_fini = lov_io_fini,
+ .cio_iter_init = lov_io_iter_init,
+ .cio_iter_fini = lov_io_iter_fini,
+ .cio_lock = lov_io_lock,
+ .cio_unlock = lov_io_unlock,
+ .cio_start = lov_io_start,
+ .cio_end = lov_io_data_version_end,
+ },
[CIT_FAULT] = {
.cio_fini = lov_io_fini,
.cio_iter_init = lov_io_iter_init,
@@ -801,6 +895,7 @@ static const struct cl_io_operations lov_io_ops = {
.cio_fini = lov_io_fini
}
},
+ .cio_read_ahead = lov_io_read_ahead,
.cio_submit = lov_io_submit,
.cio_commit_async = lov_io_commit_async,
};
@@ -820,6 +915,13 @@ static void lov_empty_io_fini(const struct lu_env *env,
wake_up_all(&lov->lo_waitq);
}
+static int lov_empty_io_submit(const struct lu_env *env,
+ const struct cl_io_slice *ios,
+ enum cl_req_type crt, struct cl_2queue *queue)
+{
+ return -EBADF;
+}
+
static void lov_empty_impossible(const struct lu_env *env,
struct cl_io_slice *ios)
{
@@ -870,7 +972,7 @@ static const struct cl_io_operations lov_empty_io_ops = {
.cio_fini = lov_empty_io_fini
}
},
- .cio_submit = LOV_EMPTY_IMPOSSIBLE,
+ .cio_submit = lov_empty_io_submit,
.cio_commit_async = LOV_EMPTY_IMPOSSIBLE
};
@@ -909,6 +1011,7 @@ int lov_io_init_empty(const struct lu_env *env, struct cl_object *obj,
break;
case CIT_FSYNC:
case CIT_SETATTR:
+ case CIT_DATA_VERSION:
result = 1;
break;
case CIT_WRITE:
@@ -944,6 +1047,7 @@ int lov_io_init_released(const struct lu_env *env, struct cl_object *obj,
LASSERTF(0, "invalid type %d\n", io->ci_type);
case CIT_MISC:
case CIT_FSYNC:
+ case CIT_DATA_VERSION:
result = 1;
break;
case CIT_SETATTR:
diff --git a/drivers/staging/lustre/lustre/lov/lov_merge.c b/drivers/staging/lustre/lustre/lov/lov_merge.c
index 674af106b50b..391dfd207177 100644
--- a/drivers/staging/lustre/lustre/lov/lov_merge.c
+++ b/drivers/staging/lustre/lustre/lov/lov_merge.c
@@ -104,53 +104,3 @@ int lov_merge_lvb_kms(struct lov_stripe_md *lsm,
lvb->lvb_ctime = current_ctime;
return rc;
}
-
-void lov_merge_attrs(struct obdo *tgt, struct obdo *src, u64 valid,
- struct lov_stripe_md *lsm, int stripeno, int *set)
-{
- valid &= src->o_valid;
-
- if (*set) {
- tgt->o_valid &= valid;
- if (valid & OBD_MD_FLSIZE) {
- /* this handles sparse files properly */
- u64 lov_size;
-
- lov_size = lov_stripe_size(lsm, src->o_size, stripeno);
- if (lov_size > tgt->o_size)
- tgt->o_size = lov_size;
- }
- if (valid & OBD_MD_FLBLOCKS)
- tgt->o_blocks += src->o_blocks;
- if (valid & OBD_MD_FLBLKSZ)
- tgt->o_blksize += src->o_blksize;
- if (valid & OBD_MD_FLCTIME && tgt->o_ctime < src->o_ctime)
- tgt->o_ctime = src->o_ctime;
- if (valid & OBD_MD_FLMTIME && tgt->o_mtime < src->o_mtime)
- tgt->o_mtime = src->o_mtime;
- if (valid & OBD_MD_FLDATAVERSION)
- tgt->o_data_version += src->o_data_version;
-
- /* handle flags */
- if (valid & OBD_MD_FLFLAGS)
- tgt->o_flags &= src->o_flags;
- else
- tgt->o_flags = 0;
- } else {
- memcpy(tgt, src, sizeof(*tgt));
- tgt->o_oi = lsm->lsm_oi;
- tgt->o_valid = valid;
- if (valid & OBD_MD_FLSIZE)
- tgt->o_size = lov_stripe_size(lsm, src->o_size,
- stripeno);
- tgt->o_flags = 0;
- if (valid & OBD_MD_FLFLAGS)
- tgt->o_flags = src->o_flags;
- }
-
- /* data_version needs to be valid on all stripes to be correct! */
- if (!(valid & OBD_MD_FLDATAVERSION))
- tgt->o_valid &= ~OBD_MD_FLDATAVERSION;
-
- *set += 1;
-}
diff --git a/drivers/staging/lustre/lustre/lov/lov_obd.c b/drivers/staging/lustre/lustre/lov/lov_obd.c
index b23016f7ec26..63b064523c6a 100644
--- a/drivers/staging/lustre/lustre/lov/lov_obd.c
+++ b/drivers/staging/lustre/lustre/lov/lov_obd.c
@@ -40,19 +40,20 @@
#define DEBUG_SUBSYSTEM S_LOV
#include "../../include/linux/libcfs/libcfs.h"
-#include "../include/obd_support.h"
-#include "../include/lustre/lustre_ioctl.h"
-#include "../include/lustre_lib.h"
-#include "../include/lustre_net.h"
#include "../include/lustre/lustre_idl.h"
+#include "../include/lustre/lustre_ioctl.h"
+
+#include "../include/cl_object.h"
#include "../include/lustre_dlm.h"
+#include "../include/lustre_fid.h"
+#include "../include/lustre_lib.h"
#include "../include/lustre_mds.h"
-#include "../include/obd_class.h"
-#include "../include/lprocfs_status.h"
+#include "../include/lustre_net.h"
#include "../include/lustre_param.h"
-#include "../include/cl_object.h"
-#include "../include/lustre/ll_fiemap.h"
-#include "../include/lustre_fid.h"
+#include "../include/lustre_swab.h"
+#include "../include/lprocfs_status.h"
+#include "../include/obd_class.h"
+#include "../include/obd_support.h"
#include "lov_internal.h"
@@ -826,29 +827,6 @@ out:
return rc;
}
-static int lov_precleanup(struct obd_device *obd, enum obd_cleanup_stage stage)
-{
- struct lov_obd *lov = &obd->u.lov;
-
- switch (stage) {
- case OBD_CLEANUP_EARLY: {
- int i;
-
- for (i = 0; i < lov->desc.ld_tgt_count; i++) {
- if (!lov->lov_tgts[i] || !lov->lov_tgts[i]->ltd_active)
- continue;
- obd_precleanup(class_exp2obd(lov->lov_tgts[i]->ltd_exp),
- OBD_CLEANUP_EARLY);
- }
- break;
- }
- default:
- break;
- }
-
- return 0;
-}
-
static int lov_cleanup(struct obd_device *obd)
{
struct lov_obd *lov = &obd->u.lov;
@@ -972,163 +950,6 @@ out:
return rc;
}
-#define ASSERT_LSM_MAGIC(lsmp) \
-do { \
- LASSERT((lsmp)); \
- LASSERTF(((lsmp)->lsm_magic == LOV_MAGIC_V1 || \
- (lsmp)->lsm_magic == LOV_MAGIC_V3), \
- "%p->lsm_magic=%x\n", (lsmp), (lsmp)->lsm_magic); \
-} while (0)
-
-static int lov_getattr_interpret(struct ptlrpc_request_set *rqset,
- void *data, int rc)
-{
- struct lov_request_set *lovset = (struct lov_request_set *)data;
- int err;
-
- /* don't do attribute merge if this async op failed */
- if (rc)
- atomic_set(&lovset->set_completes, 0);
- err = lov_fini_getattr_set(lovset);
- return rc ? rc : err;
-}
-
-static int lov_getattr_async(struct obd_export *exp, struct obd_info *oinfo,
- struct ptlrpc_request_set *rqset)
-{
- struct lov_request_set *lovset;
- struct lov_obd *lov;
- struct lov_request *req;
- int rc = 0, err;
-
- LASSERT(oinfo);
- ASSERT_LSM_MAGIC(oinfo->oi_md);
-
- if (!exp || !exp->exp_obd)
- return -ENODEV;
-
- lov = &exp->exp_obd->u.lov;
-
- rc = lov_prep_getattr_set(exp, oinfo, &lovset);
- if (rc)
- return rc;
-
- CDEBUG(D_INFO, "objid "DOSTID": %ux%u byte stripes\n",
- POSTID(&oinfo->oi_md->lsm_oi), oinfo->oi_md->lsm_stripe_count,
- oinfo->oi_md->lsm_stripe_size);
-
- list_for_each_entry(req, &lovset->set_list, rq_link) {
- CDEBUG(D_INFO, "objid " DOSTID "[%d] has subobj " DOSTID " at idx%u\n",
- POSTID(&oinfo->oi_oa->o_oi), req->rq_stripe,
- POSTID(&req->rq_oi.oi_oa->o_oi), req->rq_idx);
- rc = obd_getattr_async(lov->lov_tgts[req->rq_idx]->ltd_exp,
- &req->rq_oi, rqset);
- if (rc) {
- CERROR("%s: getattr objid "DOSTID" subobj"
- DOSTID" on OST idx %d: rc = %d\n",
- exp->exp_obd->obd_name,
- POSTID(&oinfo->oi_oa->o_oi),
- POSTID(&req->rq_oi.oi_oa->o_oi),
- req->rq_idx, rc);
- goto out;
- }
- }
-
- if (!list_empty(&rqset->set_requests)) {
- LASSERT(rc == 0);
- LASSERT(!rqset->set_interpret);
- rqset->set_interpret = lov_getattr_interpret;
- rqset->set_arg = (void *)lovset;
- return rc;
- }
-out:
- if (rc)
- atomic_set(&lovset->set_completes, 0);
- err = lov_fini_getattr_set(lovset);
- return rc ? rc : err;
-}
-
-static int lov_setattr_interpret(struct ptlrpc_request_set *rqset,
- void *data, int rc)
-{
- struct lov_request_set *lovset = (struct lov_request_set *)data;
- int err;
-
- if (rc)
- atomic_set(&lovset->set_completes, 0);
- err = lov_fini_setattr_set(lovset);
- return rc ? rc : err;
-}
-
-/* If @oti is given, the request goes from MDS and responses from OSTs are not
- * needed. Otherwise, a client is waiting for responses.
- */
-static int lov_setattr_async(struct obd_export *exp, struct obd_info *oinfo,
- struct obd_trans_info *oti,
- struct ptlrpc_request_set *rqset)
-{
- struct lov_request_set *set;
- struct lov_request *req;
- struct lov_obd *lov;
- int rc = 0;
-
- LASSERT(oinfo);
- ASSERT_LSM_MAGIC(oinfo->oi_md);
- if (oinfo->oi_oa->o_valid & OBD_MD_FLCOOKIE) {
- LASSERT(oti);
- LASSERT(oti->oti_logcookies);
- }
-
- if (!exp || !exp->exp_obd)
- return -ENODEV;
-
- lov = &exp->exp_obd->u.lov;
- rc = lov_prep_setattr_set(exp, oinfo, oti, &set);
- if (rc)
- return rc;
-
- CDEBUG(D_INFO, "objid "DOSTID": %ux%u byte stripes\n",
- POSTID(&oinfo->oi_md->lsm_oi),
- oinfo->oi_md->lsm_stripe_count,
- oinfo->oi_md->lsm_stripe_size);
-
- list_for_each_entry(req, &set->set_list, rq_link) {
- if (oinfo->oi_oa->o_valid & OBD_MD_FLCOOKIE)
- oti->oti_logcookies = set->set_cookies + req->rq_stripe;
-
- CDEBUG(D_INFO, "objid " DOSTID "[%d] has subobj " DOSTID " at idx%u\n",
- POSTID(&oinfo->oi_oa->o_oi), req->rq_stripe,
- POSTID(&req->rq_oi.oi_oa->o_oi), req->rq_idx);
-
- rc = obd_setattr_async(lov->lov_tgts[req->rq_idx]->ltd_exp,
- &req->rq_oi, oti, rqset);
- if (rc) {
- CERROR("error: setattr objid "DOSTID" subobj"
- DOSTID" on OST idx %d: rc = %d\n",
- POSTID(&set->set_oi->oi_oa->o_oi),
- POSTID(&req->rq_oi.oi_oa->o_oi),
- req->rq_idx, rc);
- break;
- }
- }
-
- /* If we are not waiting for responses on async requests, return. */
- if (rc || !rqset || list_empty(&rqset->set_requests)) {
- int err;
-
- if (rc)
- atomic_set(&set->set_completes, 0);
- err = lov_fini_setattr_set(set);
- return rc ? rc : err;
- }
-
- LASSERT(!rqset->set_interpret);
- rqset->set_interpret = lov_setattr_interpret;
- rqset->set_arg = (void *)set;
-
- return 0;
-}
-
int lov_statfs_interpret(struct ptlrpc_request_set *rqset, void *data, int rc)
{
struct lov_request_set *lovset = (struct lov_request_set *)data;
@@ -1183,7 +1004,10 @@ static int lov_statfs(const struct lu_env *env, struct obd_export *exp,
struct obd_statfs *osfs, __u64 max_age, __u32 flags)
{
struct ptlrpc_request_set *set = NULL;
- struct obd_info oinfo = { };
+ struct obd_info oinfo = {
+ .oi_osfs = osfs,
+ .oi_flags = flags,
+ };
int rc = 0;
/* for obdclass we forbid using obd_statfs_rqset, but prefer using async
@@ -1193,8 +1017,6 @@ static int lov_statfs(const struct lu_env *env, struct obd_export *exp,
if (!set)
return -ENOMEM;
- oinfo.oi_osfs = osfs;
- oinfo.oi_flags = flags;
rc = lov_statfs_async(exp, &oinfo, max_age, set);
if (rc == 0)
rc = ptlrpc_set_wait(set);
@@ -1235,8 +1057,8 @@ static int lov_iocontrol(unsigned int cmd, struct obd_export *exp, int len,
/* copy UUID */
if (copy_to_user(data->ioc_pbuf2, obd2cli_tgt(osc_obd),
- min((int)data->ioc_plen2,
- (int)sizeof(struct obd_uuid))))
+ min_t(unsigned long, data->ioc_plen2,
+ sizeof(struct obd_uuid))))
return -EFAULT;
memcpy(&flags, data->ioc_inlbuf1, sizeof(__u32));
@@ -1249,8 +1071,8 @@ static int lov_iocontrol(unsigned int cmd, struct obd_export *exp, int len,
if (rc)
return rc;
if (copy_to_user(data->ioc_pbuf1, &stat_buf,
- min((int)data->ioc_plen1,
- (int)sizeof(stat_buf))))
+ min_t(unsigned long, data->ioc_plen1,
+ sizeof(stat_buf))))
return -EFAULT;
break;
}
@@ -1367,8 +1189,6 @@ static int lov_iocontrol(unsigned int cmd, struct obd_export *exp, int len,
osc_obd->obd_force = obddev->obd_force;
err = obd_iocontrol(cmd, lov->lov_tgts[i]->ltd_exp,
len, karg, uarg);
- if (err == -ENODATA && cmd == OBD_IOC_POLL_QUOTACHECK)
- return err;
if (err) {
if (lov->lov_tgts[i]->ltd_active) {
CDEBUG(err == -ENOTTY ?
@@ -1391,454 +1211,35 @@ static int lov_iocontrol(unsigned int cmd, struct obd_export *exp, int len,
return rc;
}
-#define FIEMAP_BUFFER_SIZE 4096
-
-/**
- * Non-zero fe_logical indicates that this is a continuation FIEMAP
- * call. The local end offset and the device are sent in the first
- * fm_extent. This function calculates the stripe number from the index.
- * This function returns a stripe_no on which mapping is to be restarted.
- *
- * This function returns fm_end_offset which is the in-OST offset at which
- * mapping should be restarted. If fm_end_offset=0 is returned then caller
- * will re-calculate proper offset in next stripe.
- * Note that the first extent is passed to lov_get_info via the value field.
- *
- * \param fiemap fiemap request header
- * \param lsm striping information for the file
- * \param fm_start logical start of mapping
- * \param fm_end logical end of mapping
- * \param start_stripe starting stripe will be returned in this
- */
-static u64 fiemap_calc_fm_end_offset(struct ll_user_fiemap *fiemap,
- struct lov_stripe_md *lsm, u64 fm_start,
- u64 fm_end, int *start_stripe)
-{
- u64 local_end = fiemap->fm_extents[0].fe_logical;
- u64 lun_start, lun_end;
- u64 fm_end_offset;
- int stripe_no = -1, i;
-
- if (fiemap->fm_extent_count == 0 ||
- fiemap->fm_extents[0].fe_logical == 0)
- return 0;
-
- /* Find out stripe_no from ost_index saved in the fe_device */
- for (i = 0; i < lsm->lsm_stripe_count; i++) {
- struct lov_oinfo *oinfo = lsm->lsm_oinfo[i];
-
- if (lov_oinfo_is_dummy(oinfo))
- continue;
-
- if (oinfo->loi_ost_idx == fiemap->fm_extents[0].fe_device) {
- stripe_no = i;
- break;
- }
- }
- if (stripe_no == -1)
- return -EINVAL;
-
- /* If we have finished mapping on previous device, shift logical
- * offset to start of next device
- */
- if ((lov_stripe_intersects(lsm, stripe_no, fm_start, fm_end,
- &lun_start, &lun_end)) != 0 &&
- local_end < lun_end) {
- fm_end_offset = local_end;
- *start_stripe = stripe_no;
- } else {
- /* This is a special value to indicate that caller should
- * calculate offset in next stripe.
- */
- fm_end_offset = 0;
- *start_stripe = (stripe_no + 1) % lsm->lsm_stripe_count;
- }
-
- return fm_end_offset;
-}
-
-/**
- * We calculate on which OST the mapping will end. If the length of mapping
- * is greater than (stripe_size * stripe_count) then the last_stripe will
- * will be one just before start_stripe. Else we check if the mapping
- * intersects each OST and find last_stripe.
- * This function returns the last_stripe and also sets the stripe_count
- * over which the mapping is spread
- *
- * \param lsm striping information for the file
- * \param fm_start logical start of mapping
- * \param fm_end logical end of mapping
- * \param start_stripe starting stripe of the mapping
- * \param stripe_count the number of stripes across which to map is returned
- *
- * \retval last_stripe return the last stripe of the mapping
- */
-static int fiemap_calc_last_stripe(struct lov_stripe_md *lsm, u64 fm_start,
- u64 fm_end, int start_stripe,
- int *stripe_count)
-{
- int last_stripe;
- u64 obd_start, obd_end;
- int i, j;
-
- if (fm_end - fm_start > lsm->lsm_stripe_size * lsm->lsm_stripe_count) {
- last_stripe = start_stripe < 1 ? lsm->lsm_stripe_count - 1 :
- start_stripe - 1;
- *stripe_count = lsm->lsm_stripe_count;
- } else {
- for (j = 0, i = start_stripe; j < lsm->lsm_stripe_count;
- i = (i + 1) % lsm->lsm_stripe_count, j++) {
- if ((lov_stripe_intersects(lsm, i, fm_start, fm_end,
- &obd_start, &obd_end)) == 0)
- break;
- }
- *stripe_count = j;
- last_stripe = (start_stripe + j - 1) % lsm->lsm_stripe_count;
- }
-
- return last_stripe;
-}
-
-/**
- * Set fe_device and copy extents from local buffer into main return buffer.
- *
- * \param fiemap fiemap request header
- * \param lcl_fm_ext array of local fiemap extents to be copied
- * \param ost_index OST index to be written into the fm_device field for each
- extent
- * \param ext_count number of extents to be copied
- * \param current_extent where to start copying in main extent array
- */
-static void fiemap_prepare_and_copy_exts(struct ll_user_fiemap *fiemap,
- struct ll_fiemap_extent *lcl_fm_ext,
- int ost_index, unsigned int ext_count,
- int current_extent)
-{
- char *to;
- int ext;
-
- for (ext = 0; ext < ext_count; ext++) {
- lcl_fm_ext[ext].fe_device = ost_index;
- lcl_fm_ext[ext].fe_flags |= FIEMAP_EXTENT_NET;
- }
-
- /* Copy fm_extent's from fm_local to return buffer */
- to = (char *)fiemap + fiemap_count_to_size(current_extent);
- memcpy(to, lcl_fm_ext, ext_count * sizeof(struct ll_fiemap_extent));
-}
-
-/**
- * Break down the FIEMAP request and send appropriate calls to individual OSTs.
- * This also handles the restarting of FIEMAP calls in case mapping overflows
- * the available number of extents in single call.
- */
-static int lov_fiemap(struct lov_obd *lov, __u32 keylen, void *key,
- __u32 *vallen, void *val, struct lov_stripe_md *lsm)
-{
- struct ll_fiemap_info_key *fm_key = key;
- struct ll_user_fiemap *fiemap = val;
- struct ll_user_fiemap *fm_local = NULL;
- struct ll_fiemap_extent *lcl_fm_ext;
- int count_local;
- unsigned int get_num_extents = 0;
- int ost_index = 0, actual_start_stripe, start_stripe;
- u64 fm_start, fm_end, fm_length, fm_end_offset;
- u64 curr_loc;
- int current_extent = 0, rc = 0, i;
- /* Whether have we collected enough extents */
- bool enough = false;
- int ost_eof = 0; /* EOF for object */
- int ost_done = 0; /* done with required mapping for this OST? */
- int last_stripe;
- int cur_stripe = 0, cur_stripe_wrap = 0, stripe_count;
- unsigned int buffer_size = FIEMAP_BUFFER_SIZE;
-
- if (!lsm_has_objects(lsm)) {
- if (lsm && lsm_is_released(lsm) && (fm_key->fiemap.fm_start <
- fm_key->oa.o_size)) {
- /*
- * released file, return a minimal FIEMAP if
- * request fits in file-size.
- */
- fiemap->fm_mapped_extents = 1;
- fiemap->fm_extents[0].fe_logical =
- fm_key->fiemap.fm_start;
- if (fm_key->fiemap.fm_start + fm_key->fiemap.fm_length <
- fm_key->oa.o_size) {
- fiemap->fm_extents[0].fe_length =
- fm_key->fiemap.fm_length;
- } else {
- fiemap->fm_extents[0].fe_length =
- fm_key->oa.o_size - fm_key->fiemap.fm_start;
- fiemap->fm_extents[0].fe_flags |=
- (FIEMAP_EXTENT_UNKNOWN |
- FIEMAP_EXTENT_LAST);
- }
- }
- rc = 0;
- goto out;
- }
-
- if (fiemap_count_to_size(fm_key->fiemap.fm_extent_count) < buffer_size)
- buffer_size = fiemap_count_to_size(fm_key->fiemap.fm_extent_count);
-
- fm_local = libcfs_kvzalloc(buffer_size, GFP_NOFS);
- if (!fm_local) {
- rc = -ENOMEM;
- goto out;
- }
- lcl_fm_ext = &fm_local->fm_extents[0];
-
- count_local = fiemap_size_to_count(buffer_size);
-
- memcpy(fiemap, &fm_key->fiemap, sizeof(*fiemap));
- fm_start = fiemap->fm_start;
- fm_length = fiemap->fm_length;
- /* Calculate start stripe, last stripe and length of mapping */
- start_stripe = lov_stripe_number(lsm, fm_start);
- actual_start_stripe = start_stripe;
- fm_end = (fm_length == ~0ULL ? fm_key->oa.o_size :
- fm_start + fm_length - 1);
- /* If fm_length != ~0ULL but fm_start+fm_length-1 exceeds file size */
- if (fm_end > fm_key->oa.o_size)
- fm_end = fm_key->oa.o_size;
-
- last_stripe = fiemap_calc_last_stripe(lsm, fm_start, fm_end,
- actual_start_stripe,
- &stripe_count);
-
- fm_end_offset = fiemap_calc_fm_end_offset(fiemap, lsm, fm_start,
- fm_end, &start_stripe);
- if (fm_end_offset == -EINVAL) {
- rc = -EINVAL;
- goto out;
- }
-
- if (fiemap_count_to_size(fiemap->fm_extent_count) > *vallen)
- fiemap->fm_extent_count = fiemap_size_to_count(*vallen);
- if (fiemap->fm_extent_count == 0) {
- get_num_extents = 1;
- count_local = 0;
- }
- /* Check each stripe */
- for (cur_stripe = start_stripe, i = 0; i < stripe_count;
- i++, cur_stripe = (cur_stripe + 1) % lsm->lsm_stripe_count) {
- u64 req_fm_len; /* Stores length of required mapping */
- u64 len_mapped_single_call;
- u64 lun_start, lun_end, obd_object_end;
- unsigned int ext_count;
-
- cur_stripe_wrap = cur_stripe;
-
- /* Find out range of mapping on this stripe */
- if ((lov_stripe_intersects(lsm, cur_stripe, fm_start, fm_end,
- &lun_start, &obd_object_end)) == 0)
- continue;
-
- if (lov_oinfo_is_dummy(lsm->lsm_oinfo[cur_stripe])) {
- rc = -EIO;
- goto out;
- }
-
- /* If this is a continuation FIEMAP call and we are on
- * starting stripe then lun_start needs to be set to
- * fm_end_offset
- */
- if (fm_end_offset != 0 && cur_stripe == start_stripe)
- lun_start = fm_end_offset;
-
- if (fm_length != ~0ULL) {
- /* Handle fm_start + fm_length overflow */
- if (fm_start + fm_length < fm_start)
- fm_length = ~0ULL - fm_start;
- lun_end = lov_size_to_stripe(lsm, fm_start + fm_length,
- cur_stripe);
- } else {
- lun_end = ~0ULL;
- }
-
- if (lun_start == lun_end)
- continue;
-
- req_fm_len = obd_object_end - lun_start;
- fm_local->fm_length = 0;
- len_mapped_single_call = 0;
-
- /* If the output buffer is very large and the objects have many
- * extents we may need to loop on a single OST repeatedly
- */
- ost_eof = 0;
- ost_done = 0;
- do {
- if (get_num_extents == 0) {
- /* Don't get too many extents. */
- if (current_extent + count_local >
- fiemap->fm_extent_count)
- count_local = fiemap->fm_extent_count -
- current_extent;
- }
-
- lun_start += len_mapped_single_call;
- fm_local->fm_length = req_fm_len - len_mapped_single_call;
- req_fm_len = fm_local->fm_length;
- fm_local->fm_extent_count = enough ? 1 : count_local;
- fm_local->fm_mapped_extents = 0;
- fm_local->fm_flags = fiemap->fm_flags;
-
- fm_key->oa.o_oi = lsm->lsm_oinfo[cur_stripe]->loi_oi;
- ost_index = lsm->lsm_oinfo[cur_stripe]->loi_ost_idx;
-
- if (ost_index < 0 ||
- ost_index >= lov->desc.ld_tgt_count) {
- rc = -EINVAL;
- goto out;
- }
-
- /* If OST is inactive, return extent with UNKNOWN flag */
- if (!lov->lov_tgts[ost_index]->ltd_active) {
- fm_local->fm_flags |= FIEMAP_EXTENT_LAST;
- fm_local->fm_mapped_extents = 1;
-
- lcl_fm_ext[0].fe_logical = lun_start;
- lcl_fm_ext[0].fe_length = obd_object_end -
- lun_start;
- lcl_fm_ext[0].fe_flags |= FIEMAP_EXTENT_UNKNOWN;
-
- goto inactive_tgt;
- }
-
- fm_local->fm_start = lun_start;
- fm_local->fm_flags &= ~FIEMAP_FLAG_DEVICE_ORDER;
- memcpy(&fm_key->fiemap, fm_local, sizeof(*fm_local));
- *vallen = fiemap_count_to_size(fm_local->fm_extent_count);
- rc = obd_get_info(NULL,
- lov->lov_tgts[ost_index]->ltd_exp,
- keylen, key, vallen, fm_local, lsm);
- if (rc != 0)
- goto out;
-
-inactive_tgt:
- ext_count = fm_local->fm_mapped_extents;
- if (ext_count == 0) {
- ost_done = 1;
- /* If last stripe has hole at the end,
- * then we need to return
- */
- if (cur_stripe_wrap == last_stripe) {
- fiemap->fm_mapped_extents = 0;
- goto finish;
- }
- break;
- } else if (enough) {
- /*
- * We've collected enough extents and there are
- * more extents after it.
- */
- goto finish;
- }
-
- /* If we just need num of extents then go to next device */
- if (get_num_extents) {
- current_extent += ext_count;
- break;
- }
-
- len_mapped_single_call =
- lcl_fm_ext[ext_count - 1].fe_logical -
- lun_start + lcl_fm_ext[ext_count - 1].fe_length;
-
- /* Have we finished mapping on this device? */
- if (req_fm_len <= len_mapped_single_call)
- ost_done = 1;
-
- /* Clear the EXTENT_LAST flag which can be present on
- * last extent
- */
- if (lcl_fm_ext[ext_count - 1].fe_flags &
- FIEMAP_EXTENT_LAST)
- lcl_fm_ext[ext_count - 1].fe_flags &=
- ~FIEMAP_EXTENT_LAST;
-
- curr_loc = lov_stripe_size(lsm,
- lcl_fm_ext[ext_count - 1].fe_logical +
- lcl_fm_ext[ext_count - 1].fe_length,
- cur_stripe);
- if (curr_loc >= fm_key->oa.o_size)
- ost_eof = 1;
-
- fiemap_prepare_and_copy_exts(fiemap, lcl_fm_ext,
- ost_index, ext_count,
- current_extent);
-
- current_extent += ext_count;
-
- /* Ran out of available extents? */
- if (current_extent >= fiemap->fm_extent_count)
- enough = true;
- } while (ost_done == 0 && ost_eof == 0);
-
- if (cur_stripe_wrap == last_stripe)
- goto finish;
- }
-
-finish:
- /* Indicate that we are returning device offsets unless file just has
- * single stripe
- */
- if (lsm->lsm_stripe_count > 1)
- fiemap->fm_flags |= FIEMAP_FLAG_DEVICE_ORDER;
-
- if (get_num_extents)
- goto skip_last_device_calc;
-
- /* Check if we have reached the last stripe and whether mapping for that
- * stripe is done.
- */
- if (cur_stripe_wrap == last_stripe) {
- if (ost_done || ost_eof)
- fiemap->fm_extents[current_extent - 1].fe_flags |=
- FIEMAP_EXTENT_LAST;
- }
-
-skip_last_device_calc:
- fiemap->fm_mapped_extents = current_extent;
-
-out:
- kvfree(fm_local);
- return rc;
-}
-
static int lov_get_info(const struct lu_env *env, struct obd_export *exp,
- __u32 keylen, void *key, __u32 *vallen, void *val,
- struct lov_stripe_md *lsm)
+ __u32 keylen, void *key, __u32 *vallen, void *val)
{
struct obd_device *obddev = class_exp2obd(exp);
struct lov_obd *lov = &obddev->u.lov;
- int rc;
+ struct lov_desc *ld = &lov->desc;
+ int rc = 0;
if (!vallen || !val)
return -EFAULT;
obd_getref(obddev);
- if (KEY_IS(KEY_LOVDESC)) {
- struct lov_desc *desc_ret = val;
- *desc_ret = lov->desc;
+ if (KEY_IS(KEY_MAX_EASIZE)) {
+ u32 max_stripe_count = min_t(u32, ld->ld_active_tgt_count,
+ LOV_MAX_STRIPE_COUNT);
- rc = 0;
- goto out;
- } else if (KEY_IS(KEY_FIEMAP)) {
- rc = lov_fiemap(lov, keylen, key, vallen, val, lsm);
- goto out;
+ *((u32 *)val) = lov_mds_md_size(max_stripe_count, LOV_MAGIC_V3);
+ } else if (KEY_IS(KEY_DEFAULT_EASIZE)) {
+ u32 def_stripe_count = min_t(u32, ld->ld_default_stripe_count,
+ LOV_MAX_STRIPE_COUNT);
+
+ *((u32 *)val) = lov_mds_md_size(def_stripe_count, LOV_MAGIC_V3);
} else if (KEY_IS(KEY_TGT_COUNT)) {
*((int *)val) = lov->desc.ld_tgt_count;
- rc = 0;
- goto out;
+ } else {
+ rc = -EINVAL;
}
- rc = -EINVAL;
-
-out:
obd_putref(obddev);
return rc;
}
@@ -1926,12 +1327,8 @@ static int lov_quotactl(struct obd_device *obd, struct obd_export *exp,
__u64 bhardlimit = 0;
int i, rc = 0;
- if (oqctl->qc_cmd != LUSTRE_Q_QUOTAON &&
- oqctl->qc_cmd != LUSTRE_Q_QUOTAOFF &&
- oqctl->qc_cmd != Q_GETOQUOTA &&
- oqctl->qc_cmd != Q_INITQUOTA &&
- oqctl->qc_cmd != LUSTRE_Q_SETQUOTA &&
- oqctl->qc_cmd != Q_FINVALIDATE) {
+ if (oqctl->qc_cmd != Q_GETOQUOTA &&
+ oqctl->qc_cmd != LUSTRE_Q_SETQUOTA) {
CERROR("bad quota opc %x for lov obd\n", oqctl->qc_cmd);
return -EFAULT;
}
@@ -1978,63 +1375,15 @@ static int lov_quotactl(struct obd_device *obd, struct obd_export *exp,
return rc;
}
-static int lov_quotacheck(struct obd_device *obd, struct obd_export *exp,
- struct obd_quotactl *oqctl)
-{
- struct lov_obd *lov = &obd->u.lov;
- int i, rc = 0;
-
- obd_getref(obd);
-
- for (i = 0; i < lov->desc.ld_tgt_count; i++) {
- if (!lov->lov_tgts[i])
- continue;
-
- /* Skip quota check on the administratively disabled OSTs. */
- if (!lov->lov_tgts[i]->ltd_activate) {
- CWARN("lov idx %d was administratively disabled, skip quotacheck on it.\n",
- i);
- continue;
- }
-
- if (!lov->lov_tgts[i]->ltd_active) {
- CERROR("lov idx %d inactive\n", i);
- rc = -EIO;
- goto out;
- }
- }
-
- for (i = 0; i < lov->desc.ld_tgt_count; i++) {
- int err;
-
- if (!lov->lov_tgts[i] || !lov->lov_tgts[i]->ltd_activate)
- continue;
-
- err = obd_quotacheck(lov->lov_tgts[i]->ltd_exp, oqctl);
- if (err && !rc)
- rc = err;
- }
-
-out:
- obd_putref(obd);
-
- return rc;
-}
-
static struct obd_ops lov_obd_ops = {
.owner = THIS_MODULE,
.setup = lov_setup,
- .precleanup = lov_precleanup,
.cleanup = lov_cleanup,
/*.process_config = lov_process_config,*/
.connect = lov_connect,
.disconnect = lov_disconnect,
.statfs = lov_statfs,
.statfs_async = lov_statfs_async,
- .packmd = lov_packmd,
- .unpackmd = lov_unpackmd,
- .getattr_async = lov_getattr_async,
- .setattr_async = lov_setattr_async,
.iocontrol = lov_iocontrol,
.get_info = lov_get_info,
.set_info_async = lov_set_info_async,
@@ -2046,7 +1395,6 @@ static struct obd_ops lov_obd_ops = {
.getref = lov_getref,
.putref = lov_putref,
.quotactl = lov_quotactl,
- .quotacheck = lov_quotacheck,
};
struct kmem_cache *lov_oinfo_slab;
diff --git a/drivers/staging/lustre/lustre/lov/lov_object.c b/drivers/staging/lustre/lustre/lov/lov_object.c
index 52f736338887..76d4256fa828 100644
--- a/drivers/staging/lustre/lustre/lov/lov_object.c
+++ b/drivers/staging/lustre/lustre/lov/lov_object.c
@@ -39,6 +39,11 @@
#include "lov_cl_internal.h"
+static inline struct lov_device *lov_object_dev(struct lov_object *obj)
+{
+ return lu2lov_dev(obj->lo_cl.co_lu.lo_dev);
+}
+
/** \addtogroup lov
* @{
*/
@@ -51,7 +56,7 @@
struct lov_layout_operations {
int (*llo_init)(const struct lu_env *env, struct lov_device *dev,
- struct lov_object *lov,
+ struct lov_object *lov, struct lov_stripe_md *lsm,
const struct cl_object_conf *conf,
union lov_layout_state *state);
int (*llo_delete)(const struct lu_env *env, struct lov_object *lov,
@@ -75,12 +80,11 @@ struct lov_layout_operations {
static int lov_layout_wait(const struct lu_env *env, struct lov_object *lov);
-void lov_lsm_put(struct cl_object *unused, struct lov_stripe_md *lsm)
+static void lov_lsm_put(struct lov_stripe_md *lsm)
{
if (lsm)
lov_free_memmd(&lsm);
}
-EXPORT_SYMBOL(lov_lsm_put);
/*****************************************************************************
*
@@ -97,17 +101,17 @@ static void lov_install_empty(const struct lu_env *env,
*/
}
-static int lov_init_empty(const struct lu_env *env,
- struct lov_device *dev, struct lov_object *lov,
+static int lov_init_empty(const struct lu_env *env, struct lov_device *dev,
+ struct lov_object *lov, struct lov_stripe_md *lsm,
const struct cl_object_conf *conf,
- union lov_layout_state *state)
+ union lov_layout_state *state)
{
return 0;
}
static void lov_install_raid0(const struct lu_env *env,
struct lov_object *lov,
- union lov_layout_state *state)
+ union lov_layout_state *state)
{
}
@@ -212,8 +216,8 @@ static int lov_page_slice_fixup(struct lov_object *lov,
return cl_object_header(stripe)->coh_page_bufsize;
}
-static int lov_init_raid0(const struct lu_env *env,
- struct lov_device *dev, struct lov_object *lov,
+static int lov_init_raid0(const struct lu_env *env, struct lov_device *dev,
+ struct lov_object *lov, struct lov_stripe_md *lsm,
const struct cl_object_conf *conf,
union lov_layout_state *state)
{
@@ -223,7 +227,6 @@ static int lov_init_raid0(const struct lu_env *env,
struct cl_object *stripe;
struct lov_thread_info *lti = lov_env_info(env);
struct cl_object_conf *subconf = &lti->lti_stripe_conf;
- struct lov_stripe_md *lsm = conf->u.coc_md->lsm;
struct lu_fid *ofid = &lti->lti_fid;
struct lov_layout_raid0 *r0 = &state->raid0;
@@ -298,13 +301,11 @@ out:
return result;
}
-static int lov_init_released(const struct lu_env *env,
- struct lov_device *dev, struct lov_object *lov,
+static int lov_init_released(const struct lu_env *env, struct lov_device *dev,
+ struct lov_object *lov, struct lov_stripe_md *lsm,
const struct cl_object_conf *conf,
union lov_layout_state *state)
{
- struct lov_stripe_md *lsm = conf->u.coc_md->lsm;
-
LASSERT(lsm);
LASSERT(lsm_is_released(lsm));
LASSERT(!lov->lo_lsm);
@@ -313,6 +314,40 @@ static int lov_init_released(const struct lu_env *env,
return 0;
}
+static struct cl_object *lov_find_subobj(const struct lu_env *env,
+ struct lov_object *lov,
+ struct lov_stripe_md *lsm,
+ int stripe_idx)
+{
+ struct lov_device *dev = lu2lov_dev(lov2lu(lov)->lo_dev);
+ struct lov_oinfo *oinfo = lsm->lsm_oinfo[stripe_idx];
+ struct lov_thread_info *lti = lov_env_info(env);
+ struct lu_fid *ofid = &lti->lti_fid;
+ struct cl_device *subdev;
+ struct cl_object *result;
+ int ost_idx;
+ int rc;
+
+ if (lov->lo_type != LLT_RAID0) {
+ result = NULL;
+ goto out;
+ }
+
+ ost_idx = oinfo->loi_ost_idx;
+ rc = ostid_to_fid(ofid, &oinfo->loi_oi, ost_idx);
+ if (rc) {
+ result = NULL;
+ goto out;
+ }
+
+ subdev = lovsub2cl_dev(dev->ld_target[ost_idx]);
+ result = lov_sub_find(env, subdev, ofid, NULL);
+out:
+ if (!result)
+ result = ERR_PTR(-EINVAL);
+ return result;
+}
+
static int lov_delete_empty(const struct lu_env *env, struct lov_object *lov,
union lov_layout_state *state)
{
@@ -687,31 +722,24 @@ static int lov_layout_wait(const struct lu_env *env, struct lov_object *lov)
}
static int lov_layout_change(const struct lu_env *unused,
- struct lov_object *lov,
+ struct lov_object *lov, struct lov_stripe_md *lsm,
const struct cl_object_conf *conf)
{
- int result;
- enum lov_layout_type llt = LLT_EMPTY;
+ enum lov_layout_type llt = lov_type(lsm);
union lov_layout_state *state = &lov->u;
const struct lov_layout_operations *old_ops;
const struct lov_layout_operations *new_ops;
-
- void *cookie;
struct lu_env *env;
int refcheck;
+ int rc;
LASSERT(0 <= lov->lo_type && lov->lo_type < ARRAY_SIZE(lov_dispatch));
- if (conf->u.coc_md)
- llt = lov_type(conf->u.coc_md->lsm);
- LASSERT(0 <= llt && llt < ARRAY_SIZE(lov_dispatch));
-
- cookie = cl_env_reenter();
env = cl_env_get(&refcheck);
- if (IS_ERR(env)) {
- cl_env_reexit(cookie);
+ if (IS_ERR(env))
return PTR_ERR(env);
- }
+
+ LASSERT(0 <= llt && llt < ARRAY_SIZE(lov_dispatch));
CDEBUG(D_INODE, DFID" from %s to %s\n",
PFID(lu_object_fid(lov2lu(lov))),
@@ -720,38 +748,37 @@ static int lov_layout_change(const struct lu_env *unused,
old_ops = &lov_dispatch[lov->lo_type];
new_ops = &lov_dispatch[llt];
- result = cl_object_prune(env, &lov->lo_cl);
- if (result != 0)
+ rc = cl_object_prune(env, &lov->lo_cl);
+ if (rc)
+ goto out;
+
+ rc = old_ops->llo_delete(env, lov, &lov->u);
+ if (rc)
goto out;
- result = old_ops->llo_delete(env, lov, &lov->u);
- if (result == 0) {
- old_ops->llo_fini(env, lov, &lov->u);
+ old_ops->llo_fini(env, lov, &lov->u);
- LASSERT(atomic_read(&lov->lo_active_ios) == 0);
+ LASSERT(!atomic_read(&lov->lo_active_ios));
- lov->lo_type = LLT_EMPTY;
- /* page bufsize fixup */
- cl_object_header(&lov->lo_cl)->coh_page_bufsize -=
+ lov->lo_type = LLT_EMPTY;
+
+ /* page bufsize fixup */
+ cl_object_header(&lov->lo_cl)->coh_page_bufsize -=
lov_page_slice_fixup(lov, NULL);
- result = new_ops->llo_init(env,
- lu2lov_dev(lov->lo_cl.co_lu.lo_dev),
- lov, conf, state);
- if (result == 0) {
- new_ops->llo_install(env, lov, state);
- lov->lo_type = llt;
- } else {
- new_ops->llo_delete(env, lov, state);
- new_ops->llo_fini(env, lov, state);
- /* this file becomes an EMPTY file. */
- }
+ rc = new_ops->llo_init(env, lov_object_dev(lov), lov, lsm, conf, state);
+ if (rc) {
+ new_ops->llo_delete(env, lov, state);
+ new_ops->llo_fini(env, lov, state);
+ /* this file becomes an EMPTY file. */
+ goto out;
}
+ new_ops->llo_install(env, lov, state);
+ lov->lo_type = llt;
out:
cl_env_put(env, &refcheck);
- cl_env_reexit(cookie);
- return result;
+ return rc;
}
/*****************************************************************************
@@ -762,26 +789,38 @@ out:
int lov_object_init(const struct lu_env *env, struct lu_object *obj,
const struct lu_object_conf *conf)
{
- struct lov_device *dev = lu2lov_dev(obj->lo_dev);
struct lov_object *lov = lu2lov(obj);
+ struct lov_device *dev = lov_object_dev(lov);
const struct cl_object_conf *cconf = lu2cl_conf(conf);
union lov_layout_state *set = &lov->u;
const struct lov_layout_operations *ops;
- int result;
+ struct lov_stripe_md *lsm = NULL;
+ int rc;
init_rwsem(&lov->lo_type_guard);
atomic_set(&lov->lo_active_ios, 0);
init_waitqueue_head(&lov->lo_waitq);
-
cl_object_page_init(lu2cl(obj), sizeof(struct lov_page));
+ lov->lo_type = LLT_EMPTY;
+ if (cconf->u.coc_layout.lb_buf) {
+ lsm = lov_unpackmd(dev->ld_lov,
+ cconf->u.coc_layout.lb_buf,
+ cconf->u.coc_layout.lb_len);
+ if (IS_ERR(lsm))
+ return PTR_ERR(lsm);
+ }
+
/* no locking is necessary, as object is being created */
- lov->lo_type = lov_type(cconf->u.coc_md->lsm);
+ lov->lo_type = lov_type(lsm);
ops = &lov_dispatch[lov->lo_type];
- result = ops->llo_init(env, dev, lov, cconf, set);
- if (result == 0)
+ rc = ops->llo_init(env, dev, lov, lsm, cconf, set);
+ if (!rc)
ops->llo_install(env, lov, set);
- return result;
+
+ lov_lsm_put(lsm);
+
+ return rc;
}
static int lov_conf_set(const struct lu_env *env, struct cl_object *obj,
@@ -791,6 +830,15 @@ static int lov_conf_set(const struct lu_env *env, struct cl_object *obj,
struct lov_object *lov = cl2lov(obj);
int result = 0;
+ if (conf->coc_opc == OBJECT_CONF_SET &&
+ conf->u.coc_layout.lb_buf) {
+ lsm = lov_unpackmd(lov_object_dev(lov)->ld_lov,
+ conf->u.coc_layout.lb_buf,
+ conf->u.coc_layout.lb_len);
+ if (IS_ERR(lsm))
+ return PTR_ERR(lsm);
+ }
+
lov_conf_lock(lov);
if (conf->coc_opc == OBJECT_CONF_INVALIDATE) {
lov->lo_layout_invalid = true;
@@ -810,8 +858,6 @@ static int lov_conf_set(const struct lu_env *env, struct cl_object *obj,
LASSERT(conf->coc_opc == OBJECT_CONF_SET);
- if (conf->u.coc_md)
- lsm = conf->u.coc_md->lsm;
if ((!lsm && !lov->lo_lsm) ||
((lsm && lov->lo_lsm) &&
(lov->lo_lsm->lsm_layout_gen == lsm->lsm_layout_gen) &&
@@ -829,11 +875,12 @@ static int lov_conf_set(const struct lu_env *env, struct cl_object *obj,
goto out;
}
- result = lov_layout_change(env, lov, conf);
+ result = lov_layout_change(env, lov, lsm, conf);
lov->lo_layout_invalid = result != 0;
out:
lov_conf_unlock(lov);
+ lov_lsm_put(lsm);
CDEBUG(D_INODE, DFID" lo_layout_invalid=%d\n",
PFID(lu_object_fid(lov2lu(lov))), lov->lo_layout_invalid);
return result;
@@ -911,6 +958,473 @@ int lov_lock_init(const struct lu_env *env, struct cl_object *obj,
io);
}
+/**
+ * We calculate on which OST the mapping will end. If the length of mapping
+ * is greater than (stripe_size * stripe_count) then the last_stripe will
+ * will be one just before start_stripe. Else we check if the mapping
+ * intersects each OST and find last_stripe.
+ * This function returns the last_stripe and also sets the stripe_count
+ * over which the mapping is spread
+ *
+ * \param lsm [in] striping information for the file
+ * \param fm_start [in] logical start of mapping
+ * \param fm_end [in] logical end of mapping
+ * \param start_stripe [in] starting stripe of the mapping
+ * \param stripe_count [out] the number of stripes across which to map is
+ * returned
+ *
+ * \retval last_stripe return the last stripe of the mapping
+ */
+static int fiemap_calc_last_stripe(struct lov_stripe_md *lsm,
+ loff_t fm_start, loff_t fm_end,
+ int start_stripe, int *stripe_count)
+{
+ int last_stripe;
+ loff_t obd_start;
+ loff_t obd_end;
+ int i, j;
+
+ if (fm_end - fm_start > lsm->lsm_stripe_size * lsm->lsm_stripe_count) {
+ last_stripe = (start_stripe < 1 ? lsm->lsm_stripe_count - 1 :
+ start_stripe - 1);
+ *stripe_count = lsm->lsm_stripe_count;
+ } else {
+ for (j = 0, i = start_stripe; j < lsm->lsm_stripe_count;
+ i = (i + 1) % lsm->lsm_stripe_count, j++) {
+ if (!(lov_stripe_intersects(lsm, i, fm_start, fm_end,
+ &obd_start, &obd_end)))
+ break;
+ }
+ *stripe_count = j;
+ last_stripe = (start_stripe + j - 1) % lsm->lsm_stripe_count;
+ }
+
+ return last_stripe;
+}
+
+/**
+ * Set fe_device and copy extents from local buffer into main return buffer.
+ *
+ * \param fiemap [out] fiemap to hold all extents
+ * \param lcl_fm_ext [in] array of fiemap extents get from OSC layer
+ * \param ost_index [in] OST index to be written into the fm_device
+ * field for each extent
+ * \param ext_count [in] number of extents to be copied
+ * \param current_extent [in] where to start copying in the extent array
+ */
+static void fiemap_prepare_and_copy_exts(struct fiemap *fiemap,
+ struct fiemap_extent *lcl_fm_ext,
+ int ost_index, unsigned int ext_count,
+ int current_extent)
+{
+ unsigned int ext;
+ char *to;
+
+ for (ext = 0; ext < ext_count; ext++) {
+ lcl_fm_ext[ext].fe_device = ost_index;
+ lcl_fm_ext[ext].fe_flags |= FIEMAP_EXTENT_NET;
+ }
+
+ /* Copy fm_extent's from fm_local to return buffer */
+ to = (char *)fiemap + fiemap_count_to_size(current_extent);
+ memcpy(to, lcl_fm_ext, ext_count * sizeof(struct fiemap_extent));
+}
+
+#define FIEMAP_BUFFER_SIZE 4096
+
+/**
+ * Non-zero fe_logical indicates that this is a continuation FIEMAP
+ * call. The local end offset and the device are sent in the first
+ * fm_extent. This function calculates the stripe number from the index.
+ * This function returns a stripe_no on which mapping is to be restarted.
+ *
+ * This function returns fm_end_offset which is the in-OST offset at which
+ * mapping should be restarted. If fm_end_offset=0 is returned then caller
+ * will re-calculate proper offset in next stripe.
+ * Note that the first extent is passed to lov_get_info via the value field.
+ *
+ * \param fiemap [in] fiemap request header
+ * \param lsm [in] striping information for the file
+ * \param fm_start [in] logical start of mapping
+ * \param fm_end [in] logical end of mapping
+ * \param start_stripe [out] starting stripe will be returned in this
+ */
+static loff_t fiemap_calc_fm_end_offset(struct fiemap *fiemap,
+ struct lov_stripe_md *lsm,
+ loff_t fm_start, loff_t fm_end,
+ int *start_stripe)
+{
+ loff_t local_end = fiemap->fm_extents[0].fe_logical;
+ loff_t lun_start, lun_end;
+ loff_t fm_end_offset;
+ int stripe_no = -1;
+ int i;
+
+ if (!fiemap->fm_extent_count || !fiemap->fm_extents[0].fe_logical)
+ return 0;
+
+ /* Find out stripe_no from ost_index saved in the fe_device */
+ for (i = 0; i < lsm->lsm_stripe_count; i++) {
+ struct lov_oinfo *oinfo = lsm->lsm_oinfo[i];
+
+ if (lov_oinfo_is_dummy(oinfo))
+ continue;
+
+ if (oinfo->loi_ost_idx == fiemap->fm_extents[0].fe_device) {
+ stripe_no = i;
+ break;
+ }
+ }
+
+ if (stripe_no == -1)
+ return -EINVAL;
+
+ /*
+ * If we have finished mapping on previous device, shift logical
+ * offset to start of next device
+ */
+ if (lov_stripe_intersects(lsm, stripe_no, fm_start, fm_end,
+ &lun_start, &lun_end) &&
+ local_end < lun_end) {
+ fm_end_offset = local_end;
+ *start_stripe = stripe_no;
+ } else {
+ /* This is a special value to indicate that caller should
+ * calculate offset in next stripe.
+ */
+ fm_end_offset = 0;
+ *start_stripe = (stripe_no + 1) % lsm->lsm_stripe_count;
+ }
+
+ return fm_end_offset;
+}
+
+/**
+ * Break down the FIEMAP request and send appropriate calls to individual OSTs.
+ * This also handles the restarting of FIEMAP calls in case mapping overflows
+ * the available number of extents in single call.
+ *
+ * \param env [in] lustre environment
+ * \param obj [in] file object
+ * \param fmkey [in] fiemap request header and other info
+ * \param fiemap [out] fiemap buffer holding retrived map extents
+ * \param buflen [in/out] max buffer length of @fiemap, when iterate
+ * each OST, it is used to limit max map needed
+ * \retval 0 success
+ * \retval < 0 error
+ */
+static int lov_object_fiemap(const struct lu_env *env, struct cl_object *obj,
+ struct ll_fiemap_info_key *fmkey,
+ struct fiemap *fiemap, size_t *buflen)
+{
+ struct lov_obd *lov = lu2lov_dev(obj->co_lu.lo_dev)->ld_lov;
+ unsigned int buffer_size = FIEMAP_BUFFER_SIZE;
+ struct fiemap_extent *lcl_fm_ext;
+ struct cl_object *subobj = NULL;
+ struct fiemap *fm_local = NULL;
+ struct lov_stripe_md *lsm;
+ loff_t fm_start;
+ loff_t fm_end;
+ loff_t fm_length;
+ loff_t fm_end_offset;
+ int count_local;
+ int ost_index = 0;
+ int start_stripe;
+ int current_extent = 0;
+ int rc = 0;
+ int last_stripe;
+ int cur_stripe = 0;
+ int cur_stripe_wrap = 0;
+ int stripe_count;
+ /* Whether have we collected enough extents */
+ bool enough = false;
+ /* EOF for object */
+ bool ost_eof = false;
+ /* done with required mapping for this OST? */
+ bool ost_done = false;
+
+ lsm = lov_lsm_addref(cl2lov(obj));
+ if (!lsm)
+ return -ENODATA;
+
+ /**
+ * If the stripe_count > 1 and the application does not understand
+ * DEVICE_ORDER flag, it cannot interpret the extents correctly.
+ */
+ if (lsm->lsm_stripe_count > 1 &&
+ !(fiemap->fm_flags & FIEMAP_FLAG_DEVICE_ORDER)) {
+ rc = -ENOTSUPP;
+ goto out;
+ }
+
+ if (lsm_is_released(lsm)) {
+ if (fiemap->fm_start < fmkey->lfik_oa.o_size) {
+ /**
+ * released file, return a minimal FIEMAP if
+ * request fits in file-size.
+ */
+ fiemap->fm_mapped_extents = 1;
+ fiemap->fm_extents[0].fe_logical = fiemap->fm_start;
+ if (fiemap->fm_start + fiemap->fm_length <
+ fmkey->lfik_oa.o_size)
+ fiemap->fm_extents[0].fe_length =
+ fiemap->fm_length;
+ else
+ fiemap->fm_extents[0].fe_length =
+ fmkey->lfik_oa.o_size -
+ fiemap->fm_start;
+ fiemap->fm_extents[0].fe_flags |=
+ FIEMAP_EXTENT_UNKNOWN | FIEMAP_EXTENT_LAST;
+ }
+ rc = 0;
+ goto out;
+ }
+
+ if (fiemap_count_to_size(fiemap->fm_extent_count) < buffer_size)
+ buffer_size = fiemap_count_to_size(fiemap->fm_extent_count);
+
+ fm_local = libcfs_kvzalloc(buffer_size, GFP_NOFS);
+ if (!fm_local) {
+ rc = -ENOMEM;
+ goto out;
+ }
+ lcl_fm_ext = &fm_local->fm_extents[0];
+ count_local = fiemap_size_to_count(buffer_size);
+
+ fm_start = fiemap->fm_start;
+ fm_length = fiemap->fm_length;
+ /* Calculate start stripe, last stripe and length of mapping */
+ start_stripe = lov_stripe_number(lsm, fm_start);
+ fm_end = (fm_length == ~0ULL) ? fmkey->lfik_oa.o_size :
+ fm_start + fm_length - 1;
+ /* If fm_length != ~0ULL but fm_start_fm_length-1 exceeds file size */
+ if (fm_end > fmkey->lfik_oa.o_size)
+ fm_end = fmkey->lfik_oa.o_size;
+
+ last_stripe = fiemap_calc_last_stripe(lsm, fm_start, fm_end,
+ start_stripe, &stripe_count);
+ fm_end_offset = fiemap_calc_fm_end_offset(fiemap, lsm, fm_start, fm_end,
+ &start_stripe);
+ if (fm_end_offset == -EINVAL) {
+ rc = -EINVAL;
+ goto out;
+ }
+
+ /**
+ * Requested extent count exceeds the fiemap buffer size, shrink our
+ * ambition.
+ */
+ if (fiemap_count_to_size(fiemap->fm_extent_count) > *buflen)
+ fiemap->fm_extent_count = fiemap_size_to_count(*buflen);
+ if (!fiemap->fm_extent_count)
+ count_local = 0;
+
+ /* Check each stripe */
+ for (cur_stripe = start_stripe; stripe_count > 0;
+ --stripe_count,
+ cur_stripe = (cur_stripe + 1) % lsm->lsm_stripe_count) {
+ loff_t req_fm_len; /* Stores length of required mapping */
+ loff_t len_mapped_single_call;
+ loff_t lun_start;
+ loff_t lun_end;
+ loff_t obd_object_end;
+ unsigned int ext_count;
+
+ cur_stripe_wrap = cur_stripe;
+
+ /* Find out range of mapping on this stripe */
+ if (!(lov_stripe_intersects(lsm, cur_stripe, fm_start, fm_end,
+ &lun_start, &obd_object_end)))
+ continue;
+
+ if (lov_oinfo_is_dummy(lsm->lsm_oinfo[cur_stripe])) {
+ rc = -EIO;
+ goto out;
+ }
+
+ /*
+ * If this is a continuation FIEMAP call and we are on
+ * starting stripe then lun_start needs to be set to
+ * fm_end_offset
+ */
+ if (fm_end_offset && cur_stripe == start_stripe)
+ lun_start = fm_end_offset;
+
+ if (fm_length != ~0ULL) {
+ /* Handle fm_start + fm_length overflow */
+ if (fm_start + fm_length < fm_start)
+ fm_length = ~0ULL - fm_start;
+ lun_end = lov_size_to_stripe(lsm, fm_start + fm_length,
+ cur_stripe);
+ } else {
+ lun_end = ~0ULL;
+ }
+
+ if (lun_start == lun_end)
+ continue;
+
+ req_fm_len = obd_object_end - lun_start;
+ fm_local->fm_length = 0;
+ len_mapped_single_call = 0;
+
+ /* find lobsub object */
+ subobj = lov_find_subobj(env, cl2lov(obj), lsm,
+ cur_stripe);
+ if (IS_ERR(subobj)) {
+ rc = PTR_ERR(subobj);
+ goto out;
+ }
+ /*
+ * If the output buffer is very large and the objects have many
+ * extents we may need to loop on a single OST repeatedly
+ */
+ ost_eof = false;
+ ost_done = false;
+ do {
+ if (fiemap->fm_extent_count > 0) {
+ /* Don't get too many extents. */
+ if (current_extent + count_local >
+ fiemap->fm_extent_count)
+ count_local = fiemap->fm_extent_count -
+ current_extent;
+ }
+
+ lun_start += len_mapped_single_call;
+ fm_local->fm_length = req_fm_len -
+ len_mapped_single_call;
+ req_fm_len = fm_local->fm_length;
+ fm_local->fm_extent_count = enough ? 1 : count_local;
+ fm_local->fm_mapped_extents = 0;
+ fm_local->fm_flags = fiemap->fm_flags;
+
+ ost_index = lsm->lsm_oinfo[cur_stripe]->loi_ost_idx;
+
+ if (ost_index < 0 ||
+ ost_index >= lov->desc.ld_tgt_count) {
+ rc = -EINVAL;
+ goto obj_put;
+ }
+ /*
+ * If OST is inactive, return extent with UNKNOWN
+ * flag.
+ */
+ if (!lov->lov_tgts[ost_index]->ltd_active) {
+ fm_local->fm_flags |= FIEMAP_EXTENT_LAST;
+ fm_local->fm_mapped_extents = 1;
+
+ lcl_fm_ext[0].fe_logical = lun_start;
+ lcl_fm_ext[0].fe_length = obd_object_end -
+ lun_start;
+ lcl_fm_ext[0].fe_flags |= FIEMAP_EXTENT_UNKNOWN;
+
+ goto inactive_tgt;
+ }
+
+ fm_local->fm_start = lun_start;
+ fm_local->fm_flags &= ~FIEMAP_FLAG_DEVICE_ORDER;
+ memcpy(&fmkey->lfik_fiemap, fm_local, sizeof(*fm_local));
+ *buflen = fiemap_count_to_size(fm_local->fm_extent_count);
+
+ rc = cl_object_fiemap(env, subobj, fmkey, fm_local,
+ buflen);
+ if (rc)
+ goto obj_put;
+inactive_tgt:
+ ext_count = fm_local->fm_mapped_extents;
+ if (!ext_count) {
+ ost_done = true;
+ /*
+ * If last stripe has hold at the end,
+ * we need to return
+ */
+ if (cur_stripe_wrap == last_stripe) {
+ fiemap->fm_mapped_extents = 0;
+ goto finish;
+ }
+ break;
+ } else if (enough) {
+ /*
+ * We've collected enough extents and there are
+ * more extents after it.
+ */
+ goto finish;
+ }
+
+ /* If we just need num of extents, got to next device */
+ if (!fiemap->fm_extent_count) {
+ current_extent += ext_count;
+ break;
+ }
+
+ /* prepare to copy retrived map extents */
+ len_mapped_single_call =
+ lcl_fm_ext[ext_count - 1].fe_logical -
+ lun_start + lcl_fm_ext[ext_count - 1].fe_length;
+
+ /* Have we finished mapping on this device? */
+ if (req_fm_len <= len_mapped_single_call)
+ ost_done = true;
+
+ /*
+ * Clear the EXTENT_LAST flag which can be present on
+ * the last extent
+ */
+ if (lcl_fm_ext[ext_count - 1].fe_flags &
+ FIEMAP_EXTENT_LAST)
+ lcl_fm_ext[ext_count - 1].fe_flags &=
+ ~FIEMAP_EXTENT_LAST;
+
+ if (lov_stripe_size(lsm,
+ lcl_fm_ext[ext_count - 1].fe_logical +
+ lcl_fm_ext[ext_count - 1].fe_length,
+ cur_stripe) >= fmkey->lfik_oa.o_size)
+ ost_eof = true;
+
+ fiemap_prepare_and_copy_exts(fiemap, lcl_fm_ext,
+ ost_index, ext_count,
+ current_extent);
+ current_extent += ext_count;
+
+ /* Ran out of available extents? */
+ if (current_extent >= fiemap->fm_extent_count)
+ enough = true;
+ } while (!ost_done && !ost_eof);
+
+ cl_object_put(env, subobj);
+ subobj = NULL;
+
+ if (cur_stripe_wrap == last_stripe)
+ goto finish;
+ } /* for each stripe */
+finish:
+ /*
+ * Indicate that we are returning device offsets unless file just has
+ * single stripe
+ */
+ if (lsm->lsm_stripe_count > 1)
+ fiemap->fm_flags |= FIEMAP_FLAG_DEVICE_ORDER;
+
+ if (!fiemap->fm_extent_count)
+ goto skip_last_device_calc;
+
+ /*
+ * Check if we have reached the last stripe and whether mapping for that
+ * stripe is done.
+ */
+ if ((cur_stripe_wrap == last_stripe) && (ost_done || ost_eof))
+ fiemap->fm_extents[current_extent - 1].fe_flags |=
+ FIEMAP_EXTENT_LAST;
+skip_last_device_calc:
+ fiemap->fm_mapped_extents = current_extent;
+obj_put:
+ if (subobj)
+ cl_object_put(env, subobj);
+out:
+ kvfree(fm_local);
+ lov_lsm_put(lsm);
+ return rc;
+}
+
static int lov_object_getstripe(const struct lu_env *env, struct cl_object *obj,
struct lov_user_md __user *lum)
{
@@ -923,10 +1437,53 @@ static int lov_object_getstripe(const struct lu_env *env, struct cl_object *obj,
return -ENODATA;
rc = lov_getstripe(cl2lov(obj), lsm, lum);
- lov_lsm_put(obj, lsm);
+ lov_lsm_put(lsm);
return rc;
}
+static int lov_object_layout_get(const struct lu_env *env,
+ struct cl_object *obj,
+ struct cl_layout *cl)
+{
+ struct lov_object *lov = cl2lov(obj);
+ struct lov_stripe_md *lsm = lov_lsm_addref(lov);
+ struct lu_buf *buf = &cl->cl_buf;
+ ssize_t rc;
+
+ if (!lsm) {
+ cl->cl_size = 0;
+ cl->cl_layout_gen = CL_LAYOUT_GEN_EMPTY;
+ cl->cl_is_released = false;
+
+ return 0;
+ }
+
+ cl->cl_size = lov_mds_md_size(lsm->lsm_stripe_count, lsm->lsm_magic);
+ cl->cl_layout_gen = lsm->lsm_layout_gen;
+ cl->cl_is_released = lsm_is_released(lsm);
+
+ rc = lov_lsm_pack(lsm, buf->lb_buf, buf->lb_len);
+ lov_lsm_put(lsm);
+
+ return rc < 0 ? rc : 0;
+}
+
+static loff_t lov_object_maxbytes(struct cl_object *obj)
+{
+ struct lov_object *lov = cl2lov(obj);
+ struct lov_stripe_md *lsm = lov_lsm_addref(lov);
+ loff_t maxbytes;
+
+ if (!lsm)
+ return LLONG_MAX;
+
+ maxbytes = lsm->lsm_maxbytes;
+
+ lov_lsm_put(lsm);
+
+ return maxbytes;
+}
+
static const struct cl_object_operations lov_ops = {
.coo_page_init = lov_page_init,
.coo_lock_init = lov_lock_init,
@@ -934,7 +1491,10 @@ static const struct cl_object_operations lov_ops = {
.coo_attr_get = lov_attr_get,
.coo_attr_update = lov_attr_update,
.coo_conf_set = lov_conf_set,
- .coo_getstripe = lov_object_getstripe
+ .coo_getstripe = lov_object_getstripe,
+ .coo_layout_get = lov_object_layout_get,
+ .coo_maxbytes = lov_object_maxbytes,
+ .coo_fiemap = lov_object_fiemap,
};
static const struct lu_object_operations lov_lu_obj_ops = {
@@ -986,22 +1546,6 @@ struct lov_stripe_md *lov_lsm_addref(struct lov_object *lov)
return lsm;
}
-struct lov_stripe_md *lov_lsm_get(struct cl_object *clobj)
-{
- struct lu_object *luobj;
- struct lov_stripe_md *lsm = NULL;
-
- if (!clobj)
- return NULL;
-
- luobj = lu_object_locate(&cl_object_header(clobj)->coh_lu,
- &lov_device_type);
- if (luobj)
- lsm = lov_lsm_addref(lu2lov(luobj));
- return lsm;
-}
-EXPORT_SYMBOL(lov_lsm_get);
-
int lov_read_and_clear_async_rc(struct cl_object *clob)
{
struct lu_object *luobj;
diff --git a/drivers/staging/lustre/lustre/lov/lov_pack.c b/drivers/staging/lustre/lustre/lov/lov_pack.c
index be6e9857ce2a..6c93d180aef7 100644
--- a/drivers/staging/lustre/lustre/lov/lov_pack.c
+++ b/drivers/staging/lustre/lustre/lov/lov_pack.c
@@ -38,14 +38,17 @@
#define DEBUG_SUBSYSTEM S_LOV
+#include "../include/lustre/lustre_idl.h"
+#include "../include/lustre/lustre_user.h"
+
#include "../include/lustre_net.h"
+#include "../include/lustre_swab.h"
#include "../include/obd.h"
#include "../include/obd_class.h"
#include "../include/obd_support.h"
-#include "../include/lustre/lustre_user.h"
-#include "lov_internal.h"
#include "lov_cl_internal.h"
+#include "lov_internal.h"
void lov_dump_lmm_common(int level, void *lmmp)
{
@@ -97,120 +100,54 @@ void lov_dump_lmm_v3(int level, struct lov_mds_md_v3 *lmm)
le16_to_cpu(lmm->lmm_stripe_count));
}
-/* Pack LOV object metadata for disk storage. It is packed in LE byte
- * order and is opaque to the networking layer.
+/**
+ * Pack LOV striping metadata for disk storage format (in little
+ * endian byte order).
*
- * XXX In the future, this will be enhanced to get the EA size from the
- * underlying OSC device(s) to get their EA sizes so we can stack
- * LOVs properly. For now lov_mds_md_size() just assumes one u64
- * per stripe.
+ * This follows the getxattr() conventions. If \a buf_size is zero
+ * then return the size needed. If \a buf_size is too small then
+ * return -ERANGE. Otherwise return the size of the result.
*/
-int lov_obd_packmd(struct lov_obd *lov, struct lov_mds_md **lmmp,
- struct lov_stripe_md *lsm)
+ssize_t lov_lsm_pack(const struct lov_stripe_md *lsm, void *buf,
+ size_t buf_size)
{
- struct lov_mds_md_v1 *lmmv1;
- struct lov_mds_md_v3 *lmmv3;
- __u16 stripe_count;
struct lov_ost_data_v1 *lmm_objects;
- int lmm_size, lmm_magic;
- int i;
- int cplen = 0;
-
- if (lsm) {
- lmm_magic = lsm->lsm_magic;
- } else {
- if (lmmp && *lmmp)
- lmm_magic = le32_to_cpu((*lmmp)->lmm_magic);
- else
- /* lsm == NULL and lmmp == NULL */
- lmm_magic = LOV_MAGIC;
- }
-
- if ((lmm_magic != LOV_MAGIC_V1) &&
- (lmm_magic != LOV_MAGIC_V3)) {
- CERROR("bad mem LOV MAGIC: 0x%08X != 0x%08X nor 0x%08X\n",
- lmm_magic, LOV_MAGIC_V1, LOV_MAGIC_V3);
- return -EINVAL;
- }
-
- if (lsm) {
- /* If we are just sizing the EA, limit the stripe count
- * to the actual number of OSTs in this filesystem.
- */
- if (!lmmp) {
- stripe_count = lov_get_stripecnt(lov, lmm_magic,
- lsm->lsm_stripe_count);
- lsm->lsm_stripe_count = stripe_count;
- } else if (!lsm_is_released(lsm)) {
- stripe_count = lsm->lsm_stripe_count;
- } else {
- stripe_count = 0;
- }
- } else {
- /*
- * To calculate maximum easize by active targets at present,
- * which is exactly the maximum easize to be seen by LOV
- */
- stripe_count = lov->desc.ld_active_tgt_count;
- }
+ struct lov_mds_md_v1 *lmmv1 = buf;
+ struct lov_mds_md_v3 *lmmv3 = buf;
+ size_t lmm_size;
+ unsigned int i;
- /* XXX LOV STACKING call into osc for sizes */
- lmm_size = lov_mds_md_size(stripe_count, lmm_magic);
-
- if (!lmmp)
+ lmm_size = lov_mds_md_size(lsm->lsm_stripe_count, lsm->lsm_magic);
+ if (!buf_size)
return lmm_size;
- if (*lmmp && !lsm) {
- stripe_count = le16_to_cpu((*lmmp)->lmm_stripe_count);
- lmm_size = lov_mds_md_size(stripe_count, lmm_magic);
- kvfree(*lmmp);
- *lmmp = NULL;
- return 0;
- }
-
- if (!*lmmp) {
- *lmmp = libcfs_kvzalloc(lmm_size, GFP_NOFS);
- if (!*lmmp)
- return -ENOMEM;
- }
-
- CDEBUG(D_INFO, "lov_packmd: LOV_MAGIC 0x%08X, lmm_size = %d\n",
- lmm_magic, lmm_size);
-
- lmmv1 = *lmmp;
- lmmv3 = (struct lov_mds_md_v3 *)*lmmp;
- if (lmm_magic == LOV_MAGIC_V3)
- lmmv3->lmm_magic = cpu_to_le32(LOV_MAGIC_V3);
- else
- lmmv1->lmm_magic = cpu_to_le32(LOV_MAGIC_V1);
-
- if (!lsm)
- return lmm_size;
+ if (buf_size < lmm_size)
+ return -ERANGE;
- /* lmmv1 and lmmv3 point to the same struct and have the
+ /*
+ * lmmv1 and lmmv3 point to the same struct and have the
* same first fields
*/
+ lmmv1->lmm_magic = cpu_to_le32(lsm->lsm_magic);
lmm_oi_cpu_to_le(&lmmv1->lmm_oi, &lsm->lsm_oi);
lmmv1->lmm_stripe_size = cpu_to_le32(lsm->lsm_stripe_size);
- lmmv1->lmm_stripe_count = cpu_to_le16(stripe_count);
+ lmmv1->lmm_stripe_count = cpu_to_le16(lsm->lsm_stripe_count);
lmmv1->lmm_pattern = cpu_to_le32(lsm->lsm_pattern);
lmmv1->lmm_layout_gen = cpu_to_le16(lsm->lsm_layout_gen);
+
if (lsm->lsm_magic == LOV_MAGIC_V3) {
- cplen = strlcpy(lmmv3->lmm_pool_name, lsm->lsm_pool_name,
- sizeof(lmmv3->lmm_pool_name));
- if (cplen >= sizeof(lmmv3->lmm_pool_name))
- return -E2BIG;
+ CLASSERT(sizeof(lsm->lsm_pool_name) ==
+ sizeof(lmmv3->lmm_pool_name));
+ strlcpy(lmmv3->lmm_pool_name, lsm->lsm_pool_name,
+ sizeof(lmmv3->lmm_pool_name));
lmm_objects = lmmv3->lmm_objects;
} else {
lmm_objects = lmmv1->lmm_objects;
}
- for (i = 0; i < stripe_count; i++) {
+ for (i = 0; i < lsm->lsm_stripe_count; i++) {
struct lov_oinfo *loi = lsm->lsm_oinfo[i];
- /* XXX LOV STACKING call down to osc_packmd() to do packing */
- LASSERTF(ostid_id(&loi->loi_oi) != 0, "lmm_oi "DOSTID
- " stripe %u/%u idx %u\n", POSTID(&lmmv1->lmm_oi),
- i, stripe_count, loi->loi_ost_idx);
+
ostid_cpu_to_le(&loi->loi_oi, &lmm_objects[i].l_ost_oi);
lmm_objects[i].l_ost_gen = cpu_to_le32(loi->loi_ost_gen);
lmm_objects[i].l_ost_idx = cpu_to_le32(loi->loi_ost_idx);
@@ -219,15 +156,6 @@ int lov_obd_packmd(struct lov_obd *lov, struct lov_mds_md **lmmp,
return lmm_size;
}
-int lov_packmd(struct obd_export *exp, struct lov_mds_md **lmmp,
- struct lov_stripe_md *lsm)
-{
- struct obd_device *obd = class_exp2obd(exp);
- struct lov_obd *lov = &obd->u.lov;
-
- return lov_obd_packmd(lov, lmmp, lsm);
-}
-
/* Find the max stripecount we should use */
__u16 lov_get_stripecnt(struct lov_obd *lov, __u32 magic, __u16 stripe_count)
{
@@ -270,34 +198,34 @@ static int lov_verify_lmm(void *lmm, int lmm_bytes, __u16 *stripe_count)
return rc;
}
-int lov_alloc_memmd(struct lov_stripe_md **lsmp, __u16 stripe_count,
- int pattern, int magic)
+struct lov_stripe_md *lov_lsm_alloc(u16 stripe_count, u32 pattern, u32 magic)
{
- int i, lsm_size;
+ struct lov_stripe_md *lsm;
+ unsigned int i;
- CDEBUG(D_INFO, "alloc lsm, stripe_count %d\n", stripe_count);
+ CDEBUG(D_INFO, "alloc lsm, stripe_count %u\n", stripe_count);
- *lsmp = lsm_alloc_plain(stripe_count, &lsm_size);
- if (!*lsmp) {
- CERROR("can't allocate lsmp stripe_count %d\n", stripe_count);
- return -ENOMEM;
+ lsm = lsm_alloc_plain(stripe_count);
+ if (!lsm) {
+ CERROR("cannot allocate LSM stripe_count %u\n", stripe_count);
+ return ERR_PTR(-ENOMEM);
}
- atomic_set(&(*lsmp)->lsm_refc, 1);
- spin_lock_init(&(*lsmp)->lsm_lock);
- (*lsmp)->lsm_magic = magic;
- (*lsmp)->lsm_stripe_count = stripe_count;
- (*lsmp)->lsm_maxbytes = LUSTRE_EXT3_STRIPE_MAXBYTES * stripe_count;
- (*lsmp)->lsm_pattern = pattern;
- (*lsmp)->lsm_pool_name[0] = '\0';
- (*lsmp)->lsm_layout_gen = 0;
+ atomic_set(&lsm->lsm_refc, 1);
+ spin_lock_init(&lsm->lsm_lock);
+ lsm->lsm_magic = magic;
+ lsm->lsm_stripe_count = stripe_count;
+ lsm->lsm_maxbytes = LUSTRE_EXT3_STRIPE_MAXBYTES * stripe_count;
+ lsm->lsm_pattern = pattern;
+ lsm->lsm_pool_name[0] = '\0';
+ lsm->lsm_layout_gen = 0;
if (stripe_count > 0)
- (*lsmp)->lsm_oinfo[0]->loi_ost_idx = ~0;
+ lsm->lsm_oinfo[0]->loi_ost_idx = ~0;
for (i = 0; i < stripe_count; i++)
- loi_init((*lsmp)->lsm_oinfo[i]);
+ loi_init(lsm->lsm_oinfo[i]);
- return lsm_size;
+ return lsm;
}
int lov_free_memmd(struct lov_stripe_md **lsmp)
@@ -317,56 +245,34 @@ int lov_free_memmd(struct lov_stripe_md **lsmp)
/* Unpack LOV object metadata from disk storage. It is packed in LE byte
* order and is opaque to the networking layer.
*/
-int lov_unpackmd(struct obd_export *exp, struct lov_stripe_md **lsmp,
- struct lov_mds_md *lmm, int lmm_bytes)
+struct lov_stripe_md *lov_unpackmd(struct lov_obd *lov, struct lov_mds_md *lmm,
+ size_t lmm_size)
{
- struct obd_device *obd = class_exp2obd(exp);
- struct lov_obd *lov = &obd->u.lov;
- int rc = 0, lsm_size;
- __u16 stripe_count;
- __u32 magic;
- __u32 pattern;
-
- /* If passed an MDS struct use values from there, otherwise defaults */
- if (lmm) {
- rc = lov_verify_lmm(lmm, lmm_bytes, &stripe_count);
- if (rc)
- return rc;
- magic = le32_to_cpu(lmm->lmm_magic);
- pattern = le32_to_cpu(lmm->lmm_pattern);
- } else {
- magic = LOV_MAGIC;
- stripe_count = lov_get_stripecnt(lov, magic, 0);
- pattern = LOV_PATTERN_RAID0;
- }
+ struct lov_stripe_md *lsm;
+ u16 stripe_count;
+ u32 pattern;
+ u32 magic;
+ int rc;
- /* If we aren't passed an lsmp struct, we just want the size */
- if (!lsmp) {
- /* XXX LOV STACKING call into osc for sizes */
- LBUG();
- return lov_stripe_md_size(stripe_count);
- }
- /* If we are passed an allocated struct but nothing to unpack, free */
- if (*lsmp && !lmm) {
- lov_free_memmd(lsmp);
- return 0;
- }
+ rc = lov_verify_lmm(lmm, lmm_size, &stripe_count);
+ if (rc)
+ return ERR_PTR(rc);
- lsm_size = lov_alloc_memmd(lsmp, stripe_count, pattern, magic);
- if (lsm_size < 0)
- return lsm_size;
+ magic = le32_to_cpu(lmm->lmm_magic);
+ pattern = le32_to_cpu(lmm->lmm_pattern);
- /* If we are passed a pointer but nothing to unpack, we only alloc */
- if (!lmm)
- return lsm_size;
+ lsm = lov_lsm_alloc(stripe_count, pattern, magic);
+ if (IS_ERR(lsm))
+ return lsm;
- rc = lsm_op_find(magic)->lsm_unpackmd(lov, *lsmp, lmm);
+ LASSERT(lsm_op_find(magic));
+ rc = lsm_op_find(magic)->lsm_unpackmd(lov, lsm, lmm);
if (rc) {
- lov_free_memmd(lsmp);
- return rc;
+ lov_free_memmd(&lsm);
+ return ERR_PTR(rc);
}
- return lsm_size;
+ return lsm;
}
/* Retrieve object striping information.
@@ -378,15 +284,14 @@ int lov_unpackmd(struct obd_export *exp, struct lov_stripe_md **lsmp,
int lov_getstripe(struct lov_object *obj, struct lov_stripe_md *lsm,
struct lov_user_md __user *lump)
{
- /*
- * XXX huge struct allocated on stack.
- */
/* we use lov_user_md_v3 because it is larger than lov_user_md_v1 */
- struct lov_obd *lov;
struct lov_user_md_v3 lum;
- struct lov_mds_md *lmmk = NULL;
- int rc, lmmk_size, lmm_size;
- int lum_size;
+ struct lov_mds_md *lmmk;
+ u32 stripe_count;
+ ssize_t lmm_size;
+ size_t lmmk_size;
+ size_t lum_size;
+ int rc;
mm_segment_t seg;
if (!lsm)
@@ -399,6 +304,18 @@ int lov_getstripe(struct lov_object *obj, struct lov_stripe_md *lsm,
seg = get_fs();
set_fs(KERNEL_DS);
+ if (lsm->lsm_magic != LOV_MAGIC_V1 && lsm->lsm_magic != LOV_MAGIC_V3) {
+ CERROR("bad LSM MAGIC: 0x%08X != 0x%08X nor 0x%08X\n",
+ lsm->lsm_magic, LOV_MAGIC_V1, LOV_MAGIC_V3);
+ rc = -EIO;
+ goto out;
+ }
+
+ if (!lsm_is_released(lsm))
+ stripe_count = lsm->lsm_stripe_count;
+ else
+ stripe_count = 0;
+
/* we only need the header part from user space to get lmm_magic and
* lmm_stripe_count, (the header part is common to v1 and v3)
*/
@@ -417,32 +334,40 @@ int lov_getstripe(struct lov_object *obj, struct lov_stripe_md *lsm,
if (lum.lmm_stripe_count &&
(lum.lmm_stripe_count < lsm->lsm_stripe_count)) {
/* Return right size of stripe to user */
- lum.lmm_stripe_count = lsm->lsm_stripe_count;
+ lum.lmm_stripe_count = stripe_count;
rc = copy_to_user(lump, &lum, lum_size);
rc = -EOVERFLOW;
goto out;
}
- lov = lu2lov_dev(obj->lo_cl.co_lu.lo_dev)->ld_lov;
- rc = lov_obd_packmd(lov, &lmmk, lsm);
- if (rc < 0)
+ lmmk_size = lov_mds_md_size(stripe_count, lsm->lsm_magic);
+
+
+ lmmk = libcfs_kvzalloc(lmmk_size, GFP_NOFS);
+ if (!lmmk) {
+ rc = -ENOMEM;
goto out;
- lmmk_size = rc;
- lmm_size = rc;
- rc = 0;
+ }
+
+ lmm_size = lov_lsm_pack(lsm, lmmk, lmmk_size);
+ if (lmm_size < 0) {
+ rc = lmm_size;
+ goto out_free;
+ }
/* FIXME: Bug 1185 - copy fields properly when structs change */
/* struct lov_user_md_v3 and struct lov_mds_md_v3 must be the same */
CLASSERT(sizeof(lum) == sizeof(struct lov_mds_md_v3));
CLASSERT(sizeof(lum.lmm_objects[0]) == sizeof(lmmk->lmm_objects[0]));
- if ((cpu_to_le32(LOV_MAGIC) != LOV_MAGIC) &&
- ((lmmk->lmm_magic == cpu_to_le32(LOV_MAGIC_V1)) ||
- (lmmk->lmm_magic == cpu_to_le32(LOV_MAGIC_V3)))) {
+ if (cpu_to_le32(LOV_MAGIC) != LOV_MAGIC &&
+ (lmmk->lmm_magic == cpu_to_le32(LOV_MAGIC_V1) ||
+ lmmk->lmm_magic == cpu_to_le32(LOV_MAGIC_V3))) {
lustre_swab_lov_mds_md(lmmk);
lustre_swab_lov_user_md_objects(
(struct lov_user_ost_data *)lmmk->lmm_objects,
lmmk->lmm_stripe_count);
}
+
if (lum.lmm_magic == LOV_USER_MAGIC) {
/* User request for v1, we need skip lmm_pool_name */
if (lmmk->lmm_magic == LOV_MAGIC_V3) {
@@ -474,9 +399,11 @@ int lov_getstripe(struct lov_object *obj, struct lov_stripe_md *lsm,
((struct lov_user_md *)lmmk)->lmm_stripe_count = lum.lmm_stripe_count;
if (copy_to_user(lump, lmmk, lmm_size))
rc = -EFAULT;
+ else
+ rc = 0;
out_free:
- kfree(lmmk);
+ kvfree(lmmk);
out:
set_fs(seg);
return rc;
diff --git a/drivers/staging/lustre/lustre/lov/lov_page.c b/drivers/staging/lustre/lustre/lov/lov_page.c
index 00bfabad78eb..62ceb6dfdfdf 100644
--- a/drivers/staging/lustre/lustre/lov/lov_page.c
+++ b/drivers/staging/lustre/lustre/lov/lov_page.c
@@ -49,51 +49,6 @@
*
*/
-/**
- * Adjust the stripe index by layout of raid0. @max_index is the maximum
- * page index covered by an underlying DLM lock.
- * This function converts max_index from stripe level to file level, and make
- * sure it's not beyond one stripe.
- */
-static int lov_raid0_page_is_under_lock(const struct lu_env *env,
- const struct cl_page_slice *slice,
- struct cl_io *unused,
- pgoff_t *max_index)
-{
- struct lov_object *loo = cl2lov(slice->cpl_obj);
- struct lov_layout_raid0 *r0 = lov_r0(loo);
- pgoff_t index = *max_index;
- unsigned int pps; /* pages per stripe */
-
- CDEBUG(D_READA, DFID "*max_index = %lu, nr = %d\n",
- PFID(lu_object_fid(lov2lu(loo))), index, r0->lo_nr);
-
- if (index == 0) /* the page is not covered by any lock */
- return 0;
-
- if (r0->lo_nr == 1) /* single stripe file */
- return 0;
-
- /* max_index is stripe level, convert it into file level */
- if (index != CL_PAGE_EOF) {
- int stripeno = lov_page_stripe(slice->cpl_page);
- *max_index = lov_stripe_pgoff(loo->lo_lsm, index, stripeno);
- }
-
- /* calculate the end of current stripe */
- pps = loo->lo_lsm->lsm_stripe_size >> PAGE_SHIFT;
- index = slice->cpl_index + pps - slice->cpl_index % pps - 1;
-
- CDEBUG(D_READA, DFID "*max_index = %lu, index = %lu, pps = %u, stripe_size = %u, stripe no = %u, page index = %lu\n",
- PFID(lu_object_fid(lov2lu(loo))), *max_index, index, pps,
- loo->lo_lsm->lsm_stripe_size, lov_page_stripe(slice->cpl_page),
- slice->cpl_index);
-
- /* never exceed the end of the stripe */
- *max_index = min_t(pgoff_t, *max_index, index);
- return 0;
-}
-
static int lov_raid0_page_print(const struct lu_env *env,
const struct cl_page_slice *slice,
void *cookie, lu_printer_t printer)
@@ -104,7 +59,6 @@ static int lov_raid0_page_print(const struct lu_env *env,
}
static const struct cl_page_operations lov_raid0_page_ops = {
- .cpo_is_under_lock = lov_raid0_page_is_under_lock,
.cpo_print = lov_raid0_page_print
};
diff --git a/drivers/staging/lustre/lustre/lov/lov_pool.c b/drivers/staging/lustre/lustre/lov/lov_pool.c
index f8c8a361ef79..7daa8671fdc3 100644
--- a/drivers/staging/lustre/lustre/lov/lov_pool.c
+++ b/drivers/staging/lustre/lustre/lov/lov_pool.c
@@ -81,7 +81,8 @@ static void lov_pool_putref_locked(struct pool_desc *pool)
* Chapter 6.4.
* Addison Wesley, 1973
*/
-static __u32 pool_hashfn(struct cfs_hash *hash_body, const void *key, unsigned mask)
+static __u32 pool_hashfn(struct cfs_hash *hash_body, const void *key,
+ unsigned int mask)
{
int i;
__u32 result;
diff --git a/drivers/staging/lustre/lustre/lov/lov_request.c b/drivers/staging/lustre/lustre/lov/lov_request.c
index 09dcaf484c89..d43cc88ae641 100644
--- a/drivers/staging/lustre/lustre/lov/lov_request.c
+++ b/drivers/staging/lustre/lustre/lov/lov_request.c
@@ -44,7 +44,6 @@ static void lov_init_set(struct lov_request_set *set)
atomic_set(&set->set_completes, 0);
atomic_set(&set->set_success, 0);
atomic_set(&set->set_finish_checked, 0);
- set->set_cookies = NULL;
INIT_LIST_HEAD(&set->set_list);
atomic_set(&set->set_refcount, 1);
init_waitqueue_head(&set->set_waitq);
@@ -61,8 +60,6 @@ void lov_finish_set(struct lov_request_set *set)
rq_link);
list_del_init(&req->rq_link);
- if (req->rq_oi.oi_oa)
- kmem_cache_free(obdo_cachep, req->rq_oi.oi_oa);
kfree(req->rq_oi.oi_osfs);
kfree(req);
}
@@ -97,22 +94,6 @@ static void lov_update_set(struct lov_request_set *set,
wake_up(&set->set_waitq);
}
-int lov_update_common_set(struct lov_request_set *set,
- struct lov_request *req, int rc)
-{
- struct lov_obd *lov = &set->set_exp->exp_obd->u.lov;
-
- lov_update_set(set, req, rc);
-
- /* grace error on inactive ost */
- if (rc && !(lov->lov_tgts[req->rq_idx] &&
- lov->lov_tgts[req->rq_idx]->ltd_active))
- rc = 0;
-
- /* FIXME in raid1 regime, should return 0 */
- return rc;
-}
-
static void lov_set_add_req(struct lov_request *req,
struct lov_request_set *set)
{
@@ -183,279 +164,6 @@ out:
return rc;
}
-static int common_attr_done(struct lov_request_set *set)
-{
- struct lov_request *req;
- struct obdo *tmp_oa;
- int rc = 0, attrset = 0;
-
- if (!set->set_oi->oi_oa)
- return 0;
-
- if (!atomic_read(&set->set_success))
- return -EIO;
-
- tmp_oa = kmem_cache_zalloc(obdo_cachep, GFP_NOFS);
- if (!tmp_oa) {
- rc = -ENOMEM;
- goto out;
- }
-
- list_for_each_entry(req, &set->set_list, rq_link) {
- if (!req->rq_complete || req->rq_rc)
- continue;
- if (req->rq_oi.oi_oa->o_valid == 0) /* inactive stripe */
- continue;
- lov_merge_attrs(tmp_oa, req->rq_oi.oi_oa,
- req->rq_oi.oi_oa->o_valid,
- set->set_oi->oi_md, req->rq_stripe, &attrset);
- }
- if (!attrset) {
- CERROR("No stripes had valid attrs\n");
- rc = -EIO;
- }
- if ((set->set_oi->oi_oa->o_valid & OBD_MD_FLEPOCH) &&
- (set->set_oi->oi_md->lsm_stripe_count != attrset)) {
- /* When we take attributes of some epoch, we require all the
- * ost to be active.
- */
- CERROR("Not all the stripes had valid attrs\n");
- rc = -EIO;
- goto out;
- }
-
- tmp_oa->o_oi = set->set_oi->oi_oa->o_oi;
- memcpy(set->set_oi->oi_oa, tmp_oa, sizeof(*set->set_oi->oi_oa));
-out:
- if (tmp_oa)
- kmem_cache_free(obdo_cachep, tmp_oa);
- return rc;
-}
-
-int lov_fini_getattr_set(struct lov_request_set *set)
-{
- int rc = 0;
-
- if (!set)
- return 0;
- LASSERT(set->set_exp);
- if (atomic_read(&set->set_completes))
- rc = common_attr_done(set);
-
- lov_put_reqset(set);
-
- return rc;
-}
-
-/* The callback for osc_getattr_async that finalizes a request info when a
- * response is received.
- */
-static int cb_getattr_update(void *cookie, int rc)
-{
- struct obd_info *oinfo = cookie;
- struct lov_request *lovreq;
-
- lovreq = container_of(oinfo, struct lov_request, rq_oi);
- return lov_update_common_set(lovreq->rq_rqset, lovreq, rc);
-}
-
-int lov_prep_getattr_set(struct obd_export *exp, struct obd_info *oinfo,
- struct lov_request_set **reqset)
-{
- struct lov_request_set *set;
- struct lov_obd *lov = &exp->exp_obd->u.lov;
- int rc = 0, i;
-
- set = kzalloc(sizeof(*set), GFP_NOFS);
- if (!set)
- return -ENOMEM;
- lov_init_set(set);
-
- set->set_exp = exp;
- set->set_oi = oinfo;
-
- for (i = 0; i < oinfo->oi_md->lsm_stripe_count; i++) {
- struct lov_oinfo *loi;
- struct lov_request *req;
-
- loi = oinfo->oi_md->lsm_oinfo[i];
- if (lov_oinfo_is_dummy(loi))
- continue;
-
- if (!lov_check_and_wait_active(lov, loi->loi_ost_idx)) {
- CDEBUG(D_HA, "lov idx %d inactive\n", loi->loi_ost_idx);
- if (oinfo->oi_oa->o_valid & OBD_MD_FLEPOCH) {
- /* SOM requires all the OSTs to be active. */
- rc = -EIO;
- goto out_set;
- }
- continue;
- }
-
- req = kzalloc(sizeof(*req), GFP_NOFS);
- if (!req) {
- rc = -ENOMEM;
- goto out_set;
- }
-
- req->rq_stripe = i;
- req->rq_idx = loi->loi_ost_idx;
-
- req->rq_oi.oi_oa = kmem_cache_zalloc(obdo_cachep, GFP_NOFS);
- if (!req->rq_oi.oi_oa) {
- kfree(req);
- rc = -ENOMEM;
- goto out_set;
- }
- memcpy(req->rq_oi.oi_oa, oinfo->oi_oa,
- sizeof(*req->rq_oi.oi_oa));
- req->rq_oi.oi_oa->o_oi = loi->loi_oi;
- req->rq_oi.oi_cb_up = cb_getattr_update;
-
- lov_set_add_req(req, set);
- }
- if (!set->set_count) {
- rc = -EIO;
- goto out_set;
- }
- *reqset = set;
- return rc;
-out_set:
- lov_fini_getattr_set(set);
- return rc;
-}
-
-int lov_fini_setattr_set(struct lov_request_set *set)
-{
- int rc = 0;
-
- if (!set)
- return 0;
- LASSERT(set->set_exp);
- if (atomic_read(&set->set_completes)) {
- rc = common_attr_done(set);
- /* FIXME update qos data here */
- }
-
- lov_put_reqset(set);
- return rc;
-}
-
-int lov_update_setattr_set(struct lov_request_set *set,
- struct lov_request *req, int rc)
-{
- struct lov_obd *lov = &req->rq_rqset->set_exp->exp_obd->u.lov;
- struct lov_stripe_md *lsm = req->rq_rqset->set_oi->oi_md;
-
- lov_update_set(set, req, rc);
-
- /* grace error on inactive ost */
- if (rc && !(lov->lov_tgts[req->rq_idx] &&
- lov->lov_tgts[req->rq_idx]->ltd_active))
- rc = 0;
-
- if (rc == 0) {
- if (req->rq_oi.oi_oa->o_valid & OBD_MD_FLCTIME)
- lsm->lsm_oinfo[req->rq_stripe]->loi_lvb.lvb_ctime =
- req->rq_oi.oi_oa->o_ctime;
- if (req->rq_oi.oi_oa->o_valid & OBD_MD_FLMTIME)
- lsm->lsm_oinfo[req->rq_stripe]->loi_lvb.lvb_mtime =
- req->rq_oi.oi_oa->o_mtime;
- if (req->rq_oi.oi_oa->o_valid & OBD_MD_FLATIME)
- lsm->lsm_oinfo[req->rq_stripe]->loi_lvb.lvb_atime =
- req->rq_oi.oi_oa->o_atime;
- }
-
- return rc;
-}
-
-/* The callback for osc_setattr_async that finalizes a request info when a
- * response is received.
- */
-static int cb_setattr_update(void *cookie, int rc)
-{
- struct obd_info *oinfo = cookie;
- struct lov_request *lovreq;
-
- lovreq = container_of(oinfo, struct lov_request, rq_oi);
- return lov_update_setattr_set(lovreq->rq_rqset, lovreq, rc);
-}
-
-int lov_prep_setattr_set(struct obd_export *exp, struct obd_info *oinfo,
- struct obd_trans_info *oti,
- struct lov_request_set **reqset)
-{
- struct lov_request_set *set;
- struct lov_obd *lov = &exp->exp_obd->u.lov;
- int rc = 0, i;
-
- set = kzalloc(sizeof(*set), GFP_NOFS);
- if (!set)
- return -ENOMEM;
- lov_init_set(set);
-
- set->set_exp = exp;
- set->set_oi = oinfo;
- if (oti && oinfo->oi_oa->o_valid & OBD_MD_FLCOOKIE)
- set->set_cookies = oti->oti_logcookies;
-
- for (i = 0; i < oinfo->oi_md->lsm_stripe_count; i++) {
- struct lov_oinfo *loi = oinfo->oi_md->lsm_oinfo[i];
- struct lov_request *req;
-
- if (lov_oinfo_is_dummy(loi))
- continue;
-
- if (!lov_check_and_wait_active(lov, loi->loi_ost_idx)) {
- CDEBUG(D_HA, "lov idx %d inactive\n", loi->loi_ost_idx);
- continue;
- }
-
- req = kzalloc(sizeof(*req), GFP_NOFS);
- if (!req) {
- rc = -ENOMEM;
- goto out_set;
- }
- req->rq_stripe = i;
- req->rq_idx = loi->loi_ost_idx;
-
- req->rq_oi.oi_oa = kmem_cache_zalloc(obdo_cachep, GFP_NOFS);
- if (!req->rq_oi.oi_oa) {
- kfree(req);
- rc = -ENOMEM;
- goto out_set;
- }
- memcpy(req->rq_oi.oi_oa, oinfo->oi_oa,
- sizeof(*req->rq_oi.oi_oa));
- req->rq_oi.oi_oa->o_oi = loi->loi_oi;
- req->rq_oi.oi_oa->o_stripe_idx = i;
- req->rq_oi.oi_cb_up = cb_setattr_update;
-
- if (oinfo->oi_oa->o_valid & OBD_MD_FLSIZE) {
- int off = lov_stripe_offset(oinfo->oi_md,
- oinfo->oi_oa->o_size, i,
- &req->rq_oi.oi_oa->o_size);
-
- if (off < 0 && req->rq_oi.oi_oa->o_size)
- req->rq_oi.oi_oa->o_size--;
-
- CDEBUG(D_INODE, "stripe %d has size %llu/%llu\n",
- i, req->rq_oi.oi_oa->o_size,
- oinfo->oi_oa->o_size);
- }
- lov_set_add_req(req, set);
- }
- if (!set->set_count) {
- rc = -EIO;
- goto out_set;
- }
- *reqset = set;
- return rc;
-out_set:
- lov_fini_setattr_set(set);
- return rc;
-}
-
#define LOV_U64_MAX ((__u64)~0ULL)
#define LOV_SUM_MAX(tot, add) \
do { \
diff --git a/drivers/staging/lustre/lustre/lov/lovsub_dev.c b/drivers/staging/lustre/lustre/lov/lovsub_dev.c
index b519a1940e1e..5d6536f8a4f7 100644
--- a/drivers/staging/lustre/lustre/lov/lovsub_dev.c
+++ b/drivers/staging/lustre/lustre/lov/lovsub_dev.c
@@ -44,46 +44,6 @@
/*****************************************************************************
*
- * Lovsub transfer operations.
- *
- */
-
-static void lovsub_req_completion(const struct lu_env *env,
- const struct cl_req_slice *slice, int ioret)
-{
- struct lovsub_req *lsr;
-
- lsr = cl2lovsub_req(slice);
- kmem_cache_free(lovsub_req_kmem, lsr);
-}
-
-/**
- * Implementation of struct cl_req_operations::cro_attr_set() for lovsub
- * layer. Lov and lovsub are responsible only for struct obdo::o_stripe_idx
- * field, which is filled there.
- */
-static void lovsub_req_attr_set(const struct lu_env *env,
- const struct cl_req_slice *slice,
- const struct cl_object *obj,
- struct cl_req_attr *attr, u64 flags)
-{
- struct lovsub_object *subobj;
-
- subobj = cl2lovsub(obj);
- /*
- * There is no OBD_MD_* flag for obdo::o_stripe_idx, so set it
- * unconditionally. It never changes anyway.
- */
- attr->cra_oa->o_stripe_idx = subobj->lso_index;
-}
-
-static const struct cl_req_operations lovsub_req_ops = {
- .cro_attr_set = lovsub_req_attr_set,
- .cro_completion = lovsub_req_completion
-};
-
-/*****************************************************************************
- *
* Lov-sub device and device type functions.
*
*/
@@ -137,32 +97,12 @@ static struct lu_device *lovsub_device_free(const struct lu_env *env,
return next;
}
-static int lovsub_req_init(const struct lu_env *env, struct cl_device *dev,
- struct cl_req *req)
-{
- struct lovsub_req *lsr;
- int result;
-
- lsr = kmem_cache_zalloc(lovsub_req_kmem, GFP_NOFS);
- if (lsr) {
- cl_req_slice_add(req, &lsr->lsrq_cl, dev, &lovsub_req_ops);
- result = 0;
- } else {
- result = -ENOMEM;
- }
- return result;
-}
-
static const struct lu_device_operations lovsub_lu_ops = {
.ldo_object_alloc = lovsub_object_alloc,
.ldo_process_config = NULL,
.ldo_recovery_complete = NULL
};
-static const struct cl_device_operations lovsub_cl_ops = {
- .cdo_req_init = lovsub_req_init
-};
-
static struct lu_device *lovsub_device_alloc(const struct lu_env *env,
struct lu_device_type *t,
struct lustre_cfg *cfg)
@@ -178,7 +118,6 @@ static struct lu_device *lovsub_device_alloc(const struct lu_env *env,
if (result == 0) {
d = lovsub2lu_dev(lsd);
d->ld_ops = &lovsub_lu_ops;
- lsd->acid_cl.cd_ops = &lovsub_cl_ops;
} else {
d = ERR_PTR(result);
}
diff --git a/drivers/staging/lustre/lustre/lov/lovsub_object.c b/drivers/staging/lustre/lustre/lov/lovsub_object.c
index a2bac7a3b71b..011296ee16e6 100644
--- a/drivers/staging/lustre/lustre/lov/lovsub_object.c
+++ b/drivers/staging/lustre/lustre/lov/lovsub_object.c
@@ -116,11 +116,31 @@ static int lovsub_object_glimpse(const struct lu_env *env,
return cl_object_glimpse(env, &los->lso_super->lo_cl, lvb);
}
+/**
+ * Implementation of struct cl_object_operations::coo_req_attr_set() for lovsub
+ * layer. Lov and lovsub are responsible only for struct obdo::o_stripe_idx
+ * field, which is filled there.
+ */
+static void lovsub_req_attr_set(const struct lu_env *env, struct cl_object *obj,
+ struct cl_req_attr *attr)
+{
+ struct lovsub_object *subobj = cl2lovsub(obj);
+
+ cl_req_attr_set(env, &subobj->lso_super->lo_cl, attr);
+
+ /*
+ * There is no OBD_MD_* flag for obdo::o_stripe_idx, so set it
+ * unconditionally. It never changes anyway.
+ */
+ attr->cra_oa->o_stripe_idx = subobj->lso_index;
+}
+
static const struct cl_object_operations lovsub_ops = {
.coo_page_init = lovsub_page_init,
.coo_lock_init = lovsub_lock_init,
.coo_attr_update = lovsub_attr_update,
- .coo_glimpse = lovsub_object_glimpse
+ .coo_glimpse = lovsub_object_glimpse,
+ .coo_req_attr_set = lovsub_req_attr_set
};
static const struct lu_object_operations lovsub_lu_obj_ops = {
diff --git a/drivers/staging/lustre/lustre/mdc/lproc_mdc.c b/drivers/staging/lustre/lustre/mdc/lproc_mdc.c
index fca9450de57c..9021c465c044 100644
--- a/drivers/staging/lustre/lustre/mdc/lproc_mdc.c
+++ b/drivers/staging/lustre/lustre/mdc/lproc_mdc.c
@@ -36,6 +36,42 @@
#include "../include/lprocfs_status.h"
#include "mdc_internal.h"
+static ssize_t active_show(struct kobject *kobj, struct attribute *attr,
+ char *buf)
+{
+ struct obd_device *dev = container_of(kobj, struct obd_device,
+ obd_kobj);
+
+ return sprintf(buf, "%u\n", !dev->u.cli.cl_import->imp_deactive);
+}
+
+static ssize_t active_store(struct kobject *kobj, struct attribute *attr,
+ const char *buffer, size_t count)
+{
+ struct obd_device *dev = container_of(kobj, struct obd_device,
+ obd_kobj);
+ unsigned long val;
+ int rc;
+
+ rc = kstrtoul(buffer, 10, &val);
+ if (rc)
+ return rc;
+
+ if (val < 0 || val > 1)
+ return -ERANGE;
+
+ /* opposite senses */
+ if (dev->u.cli.cl_import->imp_deactive == val) {
+ rc = ptlrpc_set_import_active(dev->u.cli.cl_import, val);
+ if (rc)
+ count = rc;
+ } else {
+ CDEBUG(D_CONFIG, "activate %lu: ignoring repeat request\n", val);
+ }
+ return count;
+}
+LUSTRE_RW_ATTR(active);
+
static ssize_t max_rpcs_in_flight_show(struct kobject *kobj,
struct attribute *attr,
char *buf)
@@ -73,6 +109,64 @@ static ssize_t max_rpcs_in_flight_store(struct kobject *kobj,
}
LUSTRE_RW_ATTR(max_rpcs_in_flight);
+static ssize_t max_mod_rpcs_in_flight_show(struct kobject *kobj,
+ struct attribute *attr,
+ char *buf)
+{
+ struct obd_device *dev = container_of(kobj, struct obd_device,
+ obd_kobj);
+ u16 max;
+ int len;
+
+ max = dev->u.cli.cl_max_mod_rpcs_in_flight;
+ len = sprintf(buf, "%hu\n", max);
+
+ return len;
+}
+
+static ssize_t max_mod_rpcs_in_flight_store(struct kobject *kobj,
+ struct attribute *attr,
+ const char *buffer,
+ size_t count)
+{
+ struct obd_device *dev = container_of(kobj, struct obd_device,
+ obd_kobj);
+ u16 val;
+ int rc;
+
+ rc = kstrtou16(buffer, 10, &val);
+ if (rc)
+ return rc;
+
+ rc = obd_set_max_mod_rpcs_in_flight(&dev->u.cli, val);
+ if (rc)
+ count = rc;
+
+ return count;
+}
+LUSTRE_RW_ATTR(max_mod_rpcs_in_flight);
+
+static int mdc_rpc_stats_seq_show(struct seq_file *seq, void *v)
+{
+ struct obd_device *dev = seq->private;
+
+ return obd_mod_rpc_stats_seq_show(&dev->u.cli, seq);
+}
+
+static ssize_t mdc_rpc_stats_seq_write(struct file *file,
+ const char __user *buf,
+ size_t len, loff_t *off)
+{
+ struct seq_file *seq = file->private_data;
+ struct obd_device *dev = seq->private;
+ struct client_obd *cli = &dev->u.cli;
+
+ lprocfs_oh_clear(&cli->cl_mod_rpcs_hist);
+
+ return len;
+}
+LPROC_SEQ_FOPS(mdc_rpc_stats);
+
LPROC_SEQ_FOPS_WR_ONLY(mdc, ping);
LPROC_SEQ_FOPS_RO_TYPE(mdc, connect_flags);
@@ -112,11 +206,15 @@ static struct lprocfs_vars lprocfs_mdc_obd_vars[] = {
{ "import", &mdc_import_fops, NULL, 0 },
{ "state", &mdc_state_fops, NULL, 0 },
{ "pinger_recov", &mdc_pinger_recov_fops, NULL, 0 },
+ { .name = "rpc_stats",
+ .fops = &mdc_rpc_stats_fops },
{ NULL }
};
static struct attribute *mdc_attrs[] = {
+ &lustre_attr_active.attr,
&lustre_attr_max_rpcs_in_flight.attr,
+ &lustre_attr_max_mod_rpcs_in_flight.attr,
&lustre_attr_max_pages_per_rpc.attr,
NULL,
};
diff --git a/drivers/staging/lustre/lustre/mdc/mdc_internal.h b/drivers/staging/lustre/lustre/mdc/mdc_internal.h
index f446c1c2584b..881c6a0676a6 100644
--- a/drivers/staging/lustre/lustre/mdc/mdc_internal.h
+++ b/drivers/staging/lustre/lustre/mdc/mdc_internal.h
@@ -46,7 +46,7 @@ void mdc_readdir_pack(struct ptlrpc_request *req, __u64 pgoff, size_t size,
void mdc_getattr_pack(struct ptlrpc_request *req, __u64 valid, u32 flags,
struct md_op_data *data, size_t ea_size);
void mdc_setattr_pack(struct ptlrpc_request *req, struct md_op_data *op_data,
- void *ea, size_t ealen, void *ea2, size_t ea2len);
+ void *ea, size_t ealen);
void mdc_create_pack(struct ptlrpc_request *req, struct md_op_data *op_data,
const void *data, size_t datalen, umode_t mode, uid_t uid,
gid_t gid, cfs_cap_t capability, __u64 rdev);
@@ -75,7 +75,7 @@ int mdc_intent_lock(struct obd_export *exp,
__u64 extra_lock_flags);
int mdc_enqueue(struct obd_export *exp, struct ldlm_enqueue_info *einfo,
- const ldlm_policy_data_t *policy,
+ const union ldlm_policy_data *policy,
struct lookup_intent *it, struct md_op_data *op_data,
struct lustre_handle *lockh, __u64 extra_lock_flags);
@@ -105,12 +105,11 @@ int mdc_rename(struct obd_export *exp, struct md_op_data *op_data,
const char *new, size_t newlen,
struct ptlrpc_request **request);
int mdc_setattr(struct obd_export *exp, struct md_op_data *op_data,
- void *ea, size_t ealen, void *ea2, size_t ea2len,
- struct ptlrpc_request **request, struct md_open_data **mod);
+ void *ea, size_t ealen, struct ptlrpc_request **request);
int mdc_unlink(struct obd_export *exp, struct md_op_data *op_data,
struct ptlrpc_request **request);
int mdc_cancel_unused(struct obd_export *exp, const struct lu_fid *fid,
- ldlm_policy_data_t *policy, enum ldlm_mode mode,
+ union ldlm_policy_data *policy, enum ldlm_mode mode,
enum ldlm_cancel_flags flags, void *opaque);
int mdc_revalidate_lock(struct obd_export *exp, struct lookup_intent *it,
@@ -122,7 +121,8 @@ int mdc_intent_getattr_async(struct obd_export *exp,
enum ldlm_mode mdc_lock_match(struct obd_export *exp, __u64 flags,
const struct lu_fid *fid, enum ldlm_type type,
- ldlm_policy_data_t *policy, enum ldlm_mode mode,
+ union ldlm_policy_data *policy,
+ enum ldlm_mode mode,
struct lustre_handle *lockh);
static inline int mdc_prep_elc_req(struct obd_export *exp,
diff --git a/drivers/staging/lustre/lustre/mdc/mdc_lib.c b/drivers/staging/lustre/lustre/mdc/mdc_lib.c
index aac7e04873e2..f35e1f9afdef 100644
--- a/drivers/staging/lustre/lustre/mdc/mdc_lib.c
+++ b/drivers/staging/lustre/lustre/mdc/mdc_lib.c
@@ -139,7 +139,7 @@ void mdc_create_pack(struct ptlrpc_request *req, struct md_op_data *op_data,
rec->cr_time = op_data->op_mod_time;
rec->cr_suppgid1 = op_data->op_suppgids[0];
rec->cr_suppgid2 = op_data->op_suppgids[1];
- flags = op_data->op_flags & MF_SOM_LOCAL_FLAGS;
+ flags = 0;
if (op_data->op_bias & MDS_CREATE_VOLATILE)
flags |= MDS_OPEN_VOLATILE;
set_mrc_cr_flags(rec, flags);
@@ -301,16 +301,16 @@ static void mdc_setattr_pack_rec(struct mdt_rec_setattr *rec,
static void mdc_ioepoch_pack(struct mdt_ioepoch *epoch,
struct md_op_data *op_data)
{
- memcpy(&epoch->handle, &op_data->op_handle, sizeof(epoch->handle));
- epoch->ioepoch = op_data->op_ioepoch;
- epoch->flags = op_data->op_flags & MF_SOM_LOCAL_FLAGS;
+ epoch->mio_handle = op_data->op_handle;
+ epoch->mio_unused1 = 0;
+ epoch->mio_unused2 = 0;
+ epoch->mio_padding = 0;
}
void mdc_setattr_pack(struct ptlrpc_request *req, struct md_op_data *op_data,
- void *ea, size_t ealen, void *ea2, size_t ea2len)
+ void *ea, size_t ealen)
{
struct mdt_rec_setattr *rec;
- struct mdt_ioepoch *epoch;
struct lov_user_md *lum = NULL;
CLASSERT(sizeof(struct mdt_rec_reint) ==
@@ -318,11 +318,6 @@ void mdc_setattr_pack(struct ptlrpc_request *req, struct md_op_data *op_data,
rec = req_capsule_client_get(&req->rq_pill, &RMF_REC_REINT);
mdc_setattr_pack_rec(rec, op_data);
- if (op_data->op_flags & (MF_SOM_CHANGE | MF_EPOCH_OPEN)) {
- epoch = req_capsule_client_get(&req->rq_pill, &RMF_MDT_EPOCH);
- mdc_ioepoch_pack(epoch, op_data);
- }
-
if (ealen == 0)
return;
@@ -335,12 +330,6 @@ void mdc_setattr_pack(struct ptlrpc_request *req, struct md_op_data *op_data,
} else {
memcpy(lum, ea, ealen);
}
-
- if (ea2len == 0)
- return;
-
- memcpy(req_capsule_client_get(&req->rq_pill, &RMF_LOGCOOKIES), ea2,
- ea2len);
}
void mdc_unlink_pack(struct ptlrpc_request *req, struct md_op_data *op_data)
@@ -387,6 +376,31 @@ void mdc_link_pack(struct ptlrpc_request *req, struct md_op_data *op_data)
mdc_pack_name(req, &RMF_NAME, op_data->op_name, op_data->op_namelen);
}
+static void mdc_intent_close_pack(struct ptlrpc_request *req,
+ struct md_op_data *op_data)
+{
+ enum mds_op_bias bias = op_data->op_bias;
+ struct close_data *data;
+ struct ldlm_lock *lock;
+
+ if (!(bias & (MDS_HSM_RELEASE | MDS_CLOSE_LAYOUT_SWAP |
+ MDS_RENAME_MIGRATE)))
+ return;
+
+ data = req_capsule_client_get(&req->rq_pill, &RMF_CLOSE_DATA);
+ LASSERT(data);
+
+ lock = ldlm_handle2lock(&op_data->op_lease_handle);
+ if (lock) {
+ data->cd_handle = lock->l_remote_handle;
+ LDLM_LOCK_PUT(lock);
+ }
+ ldlm_cli_cancel(&op_data->op_lease_handle, LCF_LOCAL);
+
+ data->cd_data_version = op_data->op_data_version;
+ data->cd_fid = op_data->op_fid2;
+}
+
void mdc_rename_pack(struct ptlrpc_request *req, struct md_op_data *op_data,
const char *old, size_t oldlen,
const char *new, size_t newlen)
@@ -415,6 +429,15 @@ void mdc_rename_pack(struct ptlrpc_request *req, struct md_op_data *op_data,
if (new)
mdc_pack_name(req, &RMF_SYMTGT, new, newlen);
+
+ if (op_data->op_cli_flags & CLI_MIGRATE &&
+ op_data->op_bias & MDS_RENAME_MIGRATE) {
+ struct mdt_ioepoch *epoch;
+
+ mdc_intent_close_pack(req, op_data);
+ epoch = req_capsule_client_get(&req->rq_pill, &RMF_MDT_EPOCH);
+ mdc_ioepoch_pack(epoch, op_data);
+ }
}
void mdc_getattr_pack(struct ptlrpc_request *req, __u64 valid, u32 flags,
@@ -441,27 +464,6 @@ void mdc_getattr_pack(struct ptlrpc_request *req, __u64 valid, u32 flags,
op_data->op_namelen);
}
-static void mdc_hsm_release_pack(struct ptlrpc_request *req,
- struct md_op_data *op_data)
-{
- if (op_data->op_bias & MDS_HSM_RELEASE) {
- struct close_data *data;
- struct ldlm_lock *lock;
-
- data = req_capsule_client_get(&req->rq_pill, &RMF_CLOSE_DATA);
-
- lock = ldlm_handle2lock(&op_data->op_lease_handle);
- if (lock) {
- data->cd_handle = lock->l_remote_handle;
- LDLM_LOCK_PUT(lock);
- }
- ldlm_cli_cancel(&op_data->op_lease_handle, LCF_LOCAL);
-
- data->cd_data_version = op_data->op_data_version;
- data->cd_fid = op_data->op_fid2;
- }
-}
-
void mdc_close_pack(struct ptlrpc_request *req, struct md_op_data *op_data)
{
struct mdt_ioepoch *epoch;
@@ -484,5 +486,5 @@ void mdc_close_pack(struct ptlrpc_request *req, struct md_op_data *op_data)
rec->sa_valid &= ~MDS_ATTR_ATIME;
mdc_ioepoch_pack(epoch, op_data);
- mdc_hsm_release_pack(req, op_data);
+ mdc_intent_close_pack(req, op_data);
}
diff --git a/drivers/staging/lustre/lustre/mdc/mdc_locks.c b/drivers/staging/lustre/lustre/mdc/mdc_locks.c
index f1f6c082fa42..54ebb9952d66 100644
--- a/drivers/staging/lustre/lustre/mdc/mdc_locks.c
+++ b/drivers/staging/lustre/lustre/mdc/mdc_locks.c
@@ -38,10 +38,12 @@
#include "../include/obd.h"
#include "../include/obd_class.h"
#include "../include/lustre_dlm.h"
-#include "../include/lustre_fid.h" /* fid_res_name_eq() */
+#include "../include/lustre_fid.h"
#include "../include/lustre_mdc.h"
#include "../include/lustre_net.h"
#include "../include/lustre_req_layout.h"
+#include "../include/lustre_swab.h"
+
#include "mdc_internal.h"
struct mdc_getattr_args {
@@ -131,7 +133,8 @@ int mdc_set_lock_data(struct obd_export *exp, const struct lustre_handle *lockh,
enum ldlm_mode mdc_lock_match(struct obd_export *exp, __u64 flags,
const struct lu_fid *fid, enum ldlm_type type,
- ldlm_policy_data_t *policy, enum ldlm_mode mode,
+ union ldlm_policy_data *policy,
+ enum ldlm_mode mode,
struct lustre_handle *lockh)
{
struct ldlm_res_id res_id;
@@ -147,7 +150,7 @@ enum ldlm_mode mdc_lock_match(struct obd_export *exp, __u64 flags,
int mdc_cancel_unused(struct obd_export *exp,
const struct lu_fid *fid,
- ldlm_policy_data_t *policy,
+ union ldlm_policy_data *policy,
enum ldlm_mode mode,
enum ldlm_cancel_flags flags,
void *opaque)
@@ -386,8 +389,6 @@ static struct ptlrpc_request *mdc_intent_unlink_pack(struct obd_export *exp,
req_capsule_set_size(&req->rq_pill, &RMF_MDT_MD, RCL_SERVER,
obddev->u.cli.cl_default_mds_easize);
- req_capsule_set_size(&req->rq_pill, &RMF_ACL, RCL_SERVER,
- obddev->u.cli.cl_default_mds_cookiesize);
ptlrpc_request_set_replen(req);
return req;
}
@@ -688,20 +689,20 @@ static int mdc_finish_enqueue(struct obd_export *exp,
* we don't know in advance the file type.
*/
int mdc_enqueue(struct obd_export *exp, struct ldlm_enqueue_info *einfo,
- const ldlm_policy_data_t *policy,
+ const union ldlm_policy_data *policy,
struct lookup_intent *it, struct md_op_data *op_data,
struct lustre_handle *lockh, u64 extra_lock_flags)
{
- static const ldlm_policy_data_t lookup_policy = {
+ static const union ldlm_policy_data lookup_policy = {
.l_inodebits = { MDS_INODELOCK_LOOKUP }
};
- static const ldlm_policy_data_t update_policy = {
+ static const union ldlm_policy_data update_policy = {
.l_inodebits = { MDS_INODELOCK_UPDATE }
};
- static const ldlm_policy_data_t layout_policy = {
+ static const union ldlm_policy_data layout_policy = {
.l_inodebits = { MDS_INODELOCK_LAYOUT }
};
- static const ldlm_policy_data_t getxattr_policy = {
+ static const union ldlm_policy_data getxattr_policy = {
.l_inodebits = { MDS_INODELOCK_XATTR }
};
struct obd_device *obddev = class_exp2obd(exp);
@@ -762,27 +763,22 @@ resend:
if (IS_ERR(req))
return PTR_ERR(req);
- if (req && it && it->it_op & IT_CREAT)
- /* ask ptlrpc not to resend on EINPROGRESS since we have our own
- * retry logic
- */
- req->rq_no_retry_einprogress = 1;
-
if (resends) {
req->rq_generation_set = 1;
req->rq_import_generation = generation;
req->rq_sent = ktime_get_real_seconds() + resends;
}
- /* It is important to obtain rpc_lock first (if applicable), so that
- * threads that are serialised with rpc_lock are not polluting our
- * rpcs in flight counter. We do not do flock request limiting, though
+ /* It is important to obtain modify RPC slot first (if applicable), so
+ * that threads that are waiting for a modify RPC slot are not polluting
+ * our rpcs in flight counter.
+ * We do not do flock request limiting, though
*/
if (it) {
- mdc_get_rpc_lock(obddev->u.cli.cl_rpc_lock, it);
+ mdc_get_mod_rpc_slot(req, it);
rc = obd_get_request_slot(&obddev->u.cli);
if (rc != 0) {
- mdc_put_rpc_lock(obddev->u.cli.cl_rpc_lock, it);
+ mdc_put_mod_rpc_slot(req, it);
mdc_clear_replay_flag(req, 0);
ptlrpc_req_finished(req);
return rc;
@@ -809,7 +805,7 @@ resend:
}
obd_put_request_slot(&obddev->u.cli);
- mdc_put_rpc_lock(obddev->u.cli.cl_rpc_lock, it);
+ mdc_put_mod_rpc_slot(req, it);
if (rc < 0) {
CDEBUG(D_INFO, "%s: ldlm_cli_enqueue failed: rc = %d\n",
@@ -825,11 +821,12 @@ resend:
lockrep->lock_policy_res2 =
ptlrpc_status_ntoh(lockrep->lock_policy_res2);
- /* Retry the create infinitely when we get -EINPROGRESS from
- * server. This is required by the new quota design.
+ /*
+ * Retry infinitely when the server returns -EINPROGRESS for the
+ * intent operation, when server returns -EINPROGRESS for acquiring
+ * intent lock, we'll retry in after_reply().
*/
- if (it->it_op & IT_CREAT &&
- (int)lockrep->lock_policy_res2 == -EINPROGRESS) {
+ if (it->it_op && (int)lockrep->lock_policy_res2 == -EINPROGRESS) {
mdc_clear_replay_flag(req, rc);
ptlrpc_req_finished(req);
resends++;
@@ -931,7 +928,7 @@ static int mdc_finish_intent_lock(struct obd_export *exp,
*/
lock = ldlm_handle2lock(lockh);
if (lock) {
- ldlm_policy_data_t policy = lock->l_policy_data;
+ union ldlm_policy_data policy = lock->l_policy_data;
LDLM_DEBUG(lock, "matching against this");
@@ -967,7 +964,7 @@ int mdc_revalidate_lock(struct obd_export *exp, struct lookup_intent *it,
*/
struct ldlm_res_id res_id;
struct lustre_handle lockh;
- ldlm_policy_data_t policy;
+ union ldlm_policy_data policy;
enum ldlm_mode mode;
if (it->it_lock_handle) {
@@ -1169,10 +1166,9 @@ int mdc_intent_getattr_async(struct obd_export *exp,
* for statahead currently. Consider CMD in future, such two bits
* maybe managed by different MDS, should be adjusted then.
*/
- ldlm_policy_data_t policy = {
- .l_inodebits = { MDS_INODELOCK_LOOKUP |
- MDS_INODELOCK_UPDATE }
- };
+ union ldlm_policy_data policy = {
+ .l_inodebits = { MDS_INODELOCK_LOOKUP | MDS_INODELOCK_UPDATE }
+ };
int rc = 0;
__u64 flags = LDLM_FL_HAS_INTENT;
diff --git a/drivers/staging/lustre/lustre/mdc/mdc_reint.c b/drivers/staging/lustre/lustre/mdc/mdc_reint.c
index c921e471fa27..07b168490f09 100644
--- a/drivers/staging/lustre/lustre/mdc/mdc_reint.c
+++ b/drivers/staging/lustre/lustre/mdc/mdc_reint.c
@@ -40,17 +40,15 @@
#include "../include/lustre_fid.h"
/* mdc_setattr does its own semaphore handling */
-static int mdc_reint(struct ptlrpc_request *request,
- struct mdc_rpc_lock *rpc_lock,
- int level)
+static int mdc_reint(struct ptlrpc_request *request, int level)
{
int rc;
request->rq_send_state = level;
- mdc_get_rpc_lock(rpc_lock, NULL);
+ mdc_get_mod_rpc_slot(request, NULL);
rc = ptlrpc_queue_wait(request);
- mdc_put_rpc_lock(rpc_lock, NULL);
+ mdc_put_mod_rpc_slot(request, NULL);
if (rc)
CDEBUG(D_INFO, "error in handling %d\n", rc);
else if (!req_capsule_server_get(&request->rq_pill, &RMF_MDT_BODY))
@@ -68,7 +66,7 @@ int mdc_resource_get_unused(struct obd_export *exp, const struct lu_fid *fid,
__u64 bits)
{
struct ldlm_namespace *ns = exp->exp_obd->obd_namespace;
- ldlm_policy_data_t policy = {};
+ union ldlm_policy_data policy = {};
struct ldlm_res_id res_id;
struct ldlm_resource *res;
int count;
@@ -99,13 +97,10 @@ int mdc_resource_get_unused(struct obd_export *exp, const struct lu_fid *fid,
}
int mdc_setattr(struct obd_export *exp, struct md_op_data *op_data,
- void *ea, size_t ealen, void *ea2, size_t ea2len,
- struct ptlrpc_request **request, struct md_open_data **mod)
+ void *ea, size_t ealen, struct ptlrpc_request **request)
{
LIST_HEAD(cancels);
struct ptlrpc_request *req;
- struct mdc_rpc_lock *rpc_lock;
- struct obd_device *obd = exp->exp_obd;
int count = 0, rc;
__u64 bits;
@@ -122,12 +117,9 @@ int mdc_setattr(struct obd_export *exp, struct md_op_data *op_data,
ldlm_lock_list_put(&cancels, l_bl_ast, count);
return -ENOMEM;
}
- if ((op_data->op_flags & (MF_SOM_CHANGE | MF_EPOCH_OPEN)) == 0)
- req_capsule_set_size(&req->rq_pill, &RMF_MDT_EPOCH, RCL_CLIENT,
- 0);
+ req_capsule_set_size(&req->rq_pill, &RMF_MDT_EPOCH, RCL_CLIENT, 0);
req_capsule_set_size(&req->rq_pill, &RMF_EADATA, RCL_CLIENT, ealen);
- req_capsule_set_size(&req->rq_pill, &RMF_LOGCOOKIES, RCL_CLIENT,
- ea2len);
+ req_capsule_set_size(&req->rq_pill, &RMF_LOGCOOKIES, RCL_CLIENT, 0);
rc = mdc_prep_elc_req(exp, req, MDS_REINT, &cancels, count);
if (rc) {
@@ -135,63 +127,21 @@ int mdc_setattr(struct obd_export *exp, struct md_op_data *op_data,
return rc;
}
- rpc_lock = obd->u.cli.cl_rpc_lock;
-
if (op_data->op_attr.ia_valid & (ATTR_MTIME | ATTR_CTIME))
CDEBUG(D_INODE, "setting mtime %ld, ctime %ld\n",
LTIME_S(op_data->op_attr.ia_mtime),
LTIME_S(op_data->op_attr.ia_ctime));
- mdc_setattr_pack(req, op_data, ea, ealen, ea2, ea2len);
+ mdc_setattr_pack(req, op_data, ea, ealen);
ptlrpc_request_set_replen(req);
- if (mod && (op_data->op_flags & MF_EPOCH_OPEN) &&
- req->rq_import->imp_replayable) {
- LASSERT(!*mod);
-
- *mod = obd_mod_alloc();
- if (!*mod) {
- DEBUG_REQ(D_ERROR, req, "Can't allocate md_open_data");
- } else {
- req->rq_replay = 1;
- req->rq_cb_data = *mod;
- (*mod)->mod_open_req = req;
- req->rq_commit_cb = mdc_commit_open;
- (*mod)->mod_is_create = true;
- /**
- * Take an extra reference on \var mod, it protects \var
- * mod from being freed on eviction (commit callback is
- * called despite rq_replay flag).
- * Will be put on mdc_done_writing().
- */
- obd_mod_get(*mod);
- }
- }
-
- rc = mdc_reint(req, rpc_lock, LUSTRE_IMP_FULL);
- /* Save the obtained info in the original RPC for the replay case. */
- if (rc == 0 && (op_data->op_flags & MF_EPOCH_OPEN)) {
- struct mdt_ioepoch *epoch;
- struct mdt_body *body;
+ rc = mdc_reint(req, LUSTRE_IMP_FULL);
- epoch = req_capsule_client_get(&req->rq_pill, &RMF_MDT_EPOCH);
- body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
- epoch->handle = body->mbo_handle;
- epoch->ioepoch = body->mbo_ioepoch;
- req->rq_replay_cb = mdc_replay_open;
- /** bug 3633, open may be committed and estale answer is not error */
- } else if (rc == -ESTALE && (op_data->op_flags & MF_SOM_CHANGE)) {
- rc = 0;
- } else if (rc == -ERESTARTSYS) {
+ if (rc == -ERESTARTSYS)
rc = 0;
- }
+
*request = req;
- if (rc && req->rq_commit_cb) {
- /* Put an extra reference on \var mod on error case. */
- if (mod && *mod)
- obd_mod_put(*mod);
- req->rq_commit_cb(req);
- }
+
return rc;
}
@@ -264,7 +214,7 @@ rebuild:
}
level = LUSTRE_IMP_FULL;
resend:
- rc = mdc_reint(req, exp->exp_obd->u.cli.cl_rpc_lock, level);
+ rc = mdc_reint(req, level);
/* Resend if we were told to. */
if (rc == -ERESTARTSYS) {
@@ -332,13 +282,11 @@ int mdc_unlink(struct obd_export *exp, struct md_op_data *op_data,
req_capsule_set_size(&req->rq_pill, &RMF_MDT_MD, RCL_SERVER,
obd->u.cli.cl_default_mds_easize);
- req_capsule_set_size(&req->rq_pill, &RMF_LOGCOOKIES, RCL_SERVER,
- obd->u.cli.cl_default_mds_cookiesize);
ptlrpc_request_set_replen(req);
*request = req;
- rc = mdc_reint(req, obd->u.cli.cl_rpc_lock, LUSTRE_IMP_FULL);
+ rc = mdc_reint(req, LUSTRE_IMP_FULL);
if (rc == -ERESTARTSYS)
rc = 0;
return rc;
@@ -348,7 +296,6 @@ int mdc_link(struct obd_export *exp, struct md_op_data *op_data,
struct ptlrpc_request **request)
{
LIST_HEAD(cancels);
- struct obd_device *obd = exp->exp_obd;
struct ptlrpc_request *req;
int count = 0, rc;
@@ -380,7 +327,7 @@ int mdc_link(struct obd_export *exp, struct md_op_data *op_data,
mdc_link_pack(req, op_data);
ptlrpc_request_set_replen(req);
- rc = mdc_reint(req, obd->u.cli.cl_rpc_lock, LUSTRE_IMP_FULL);
+ rc = mdc_reint(req, LUSTRE_IMP_FULL);
*request = req;
if (rc == -ERESTARTSYS)
rc = 0;
@@ -419,7 +366,8 @@ int mdc_rename(struct obd_export *exp, struct md_op_data *op_data,
MDS_INODELOCK_FULL);
req = ptlrpc_request_alloc(class_exp2cliimp(exp),
- &RQF_MDS_REINT_RENAME);
+ op_data->op_cli_flags & CLI_MIGRATE ?
+ &RQF_MDS_REINT_MIGRATE : &RQF_MDS_REINT_RENAME);
if (!req) {
ldlm_lock_list_put(&cancels, l_bl_ast, count);
return -ENOMEM;
@@ -435,6 +383,23 @@ int mdc_rename(struct obd_export *exp, struct md_op_data *op_data,
return rc;
}
+ if (op_data->op_cli_flags & CLI_MIGRATE && op_data->op_data) {
+ struct md_open_data *mod = op_data->op_data;
+
+ LASSERTF(mod->mod_open_req &&
+ mod->mod_open_req->rq_type != LI_POISON,
+ "POISONED open %p!\n", mod->mod_open_req);
+
+ DEBUG_REQ(D_HA, mod->mod_open_req, "matched open");
+ /*
+ * We no longer want to preserve this open for replay even
+ * though the open was committed. b=3632, b=3633
+ */
+ spin_lock(&mod->mod_open_req->rq_lock);
+ mod->mod_open_req->rq_replay = 0;
+ spin_unlock(&mod->mod_open_req->rq_lock);
+ }
+
if (exp_connect_cancelset(exp) && req)
ldlm_cli_cancel_list(&cancels, count, req, 0);
@@ -442,11 +407,9 @@ int mdc_rename(struct obd_export *exp, struct md_op_data *op_data,
req_capsule_set_size(&req->rq_pill, &RMF_MDT_MD, RCL_SERVER,
obd->u.cli.cl_default_mds_easize);
- req_capsule_set_size(&req->rq_pill, &RMF_LOGCOOKIES, RCL_SERVER,
- obd->u.cli.cl_default_mds_cookiesize);
ptlrpc_request_set_replen(req);
- rc = mdc_reint(req, obd->u.cli.cl_rpc_lock, LUSTRE_IMP_FULL);
+ rc = mdc_reint(req, LUSTRE_IMP_FULL);
*request = req;
if (rc == -ERESTARTSYS)
rc = 0;
diff --git a/drivers/staging/lustre/lustre/mdc/mdc_request.c b/drivers/staging/lustre/lustre/mdc/mdc_request.c
index f56ea643f9bf..2cfd913f9bc5 100644
--- a/drivers/staging/lustre/lustre/mdc/mdc_request.c
+++ b/drivers/staging/lustre/lustre/mdc/mdc_request.c
@@ -38,15 +38,18 @@
# include <linux/init.h>
# include <linux/utsname.h>
+#include "../include/cl_object.h"
+#include "../include/llog_swab.h"
+#include "../include/lprocfs_status.h"
#include "../include/lustre_acl.h"
+#include "../include/lustre_fid.h"
#include "../include/lustre/lustre_ioctl.h"
-#include "../include/obd_class.h"
+#include "../include/lustre_kernelcomm.h"
#include "../include/lustre_lmv.h"
-#include "../include/lustre_fid.h"
-#include "../include/lprocfs_status.h"
-#include "../include/lustre_param.h"
#include "../include/lustre_log.h"
-#include "../include/lustre_kernelcomm.h"
+#include "../include/lustre_param.h"
+#include "../include/lustre_swab.h"
+#include "../include/obd_class.h"
#include "mdc_internal.h"
@@ -327,12 +330,12 @@ static int mdc_xattr_common(struct obd_export *exp,
/* make rpc */
if (opcode == MDS_REINT)
- mdc_get_rpc_lock(exp->exp_obd->u.cli.cl_rpc_lock, NULL);
+ mdc_get_mod_rpc_slot(req, NULL);
rc = ptlrpc_queue_wait(req);
if (opcode == MDS_REINT)
- mdc_put_rpc_lock(exp->exp_obd->u.cli.cl_rpc_lock, NULL);
+ mdc_put_mod_rpc_slot(req, NULL);
if (rc)
ptlrpc_req_finished(req);
@@ -420,9 +423,6 @@ static int mdc_get_lustre_md(struct obd_export *exp,
md->body = req_capsule_server_get(pill, &RMF_MDT_BODY);
if (md->body->mbo_valid & OBD_MD_FLEASIZE) {
- int lmmsize;
- struct lov_mds_md *lmm;
-
if (!S_ISREG(md->body->mbo_mode)) {
CDEBUG(D_INFO,
"OBD_MD_FLEASIZE set, should be a regular file, but is not\n");
@@ -436,28 +436,18 @@ static int mdc_get_lustre_md(struct obd_export *exp,
rc = -EPROTO;
goto out;
}
- lmmsize = md->body->mbo_eadatasize;
- lmm = req_capsule_server_sized_get(pill, &RMF_MDT_MD, lmmsize);
- if (!lmm) {
- rc = -EPROTO;
- goto out;
- }
-
- rc = obd_unpackmd(dt_exp, &md->lsm, lmm, lmmsize);
- if (rc < 0)
- goto out;
- if (rc < (typeof(rc))sizeof(*md->lsm)) {
- CDEBUG(D_INFO,
- "lsm size too small: rc < sizeof (*md->lsm) (%d < %d)\n",
- rc, (int)sizeof(*md->lsm));
+ md->layout.lb_len = md->body->mbo_eadatasize;
+ md->layout.lb_buf = req_capsule_server_sized_get(pill,
+ &RMF_MDT_MD,
+ md->layout.lb_len);
+ if (!md->layout.lb_buf) {
rc = -EPROTO;
goto out;
}
-
} else if (md->body->mbo_valid & OBD_MD_FLDIREA) {
- int lmvsize;
- struct lov_mds_md *lmv;
+ const union lmv_mds_md *lmv;
+ size_t lmv_size;
if (!S_ISDIR(md->body->mbo_mode)) {
CDEBUG(D_INFO,
@@ -466,22 +456,21 @@ static int mdc_get_lustre_md(struct obd_export *exp,
goto out;
}
- if (md->body->mbo_eadatasize == 0) {
+ lmv_size = md->body->mbo_eadatasize;
+ if (!lmv_size) {
CDEBUG(D_INFO,
"OBD_MD_FLDIREA is set, but eadatasize 0\n");
return -EPROTO;
}
if (md->body->mbo_valid & OBD_MD_MEA) {
- lmvsize = md->body->mbo_eadatasize;
lmv = req_capsule_server_sized_get(pill, &RMF_MDT_MD,
- lmvsize);
+ lmv_size);
if (!lmv) {
rc = -EPROTO;
goto out;
}
- rc = obd_unpackmd(md_exp, (void *)&md->lmv, lmv,
- lmvsize);
+ rc = md_unpackmd(md_exp, &md->lmv, lmv, lmv_size);
if (rc < 0)
goto out;
@@ -517,8 +506,6 @@ out:
#ifdef CONFIG_FS_POSIX_ACL
posix_acl_release(md->posix_acl);
#endif
- if (md->lsm)
- obd_free_memmd(dt_exp, &md->lsm);
}
return rc;
}
@@ -528,10 +515,6 @@ static int mdc_free_lustre_md(struct obd_export *exp, struct lustre_md *md)
return 0;
}
-/**
- * Handles both OPEN and SETATTR RPCs for OPEN-CLOSE and SETATTR-DONE_WRITING
- * RPC chains.
- */
void mdc_replay_open(struct ptlrpc_request *req)
{
struct md_open_data *mod = req->rq_cb_data;
@@ -565,15 +548,15 @@ void mdc_replay_open(struct ptlrpc_request *req)
__u32 opc = lustre_msg_get_opc(close_req->rq_reqmsg);
struct mdt_ioepoch *epoch;
- LASSERT(opc == MDS_CLOSE || opc == MDS_DONE_WRITING);
+ LASSERT(opc == MDS_CLOSE);
epoch = req_capsule_client_get(&close_req->rq_pill,
&RMF_MDT_EPOCH);
LASSERT(epoch);
if (och)
- LASSERT(!memcmp(&old, &epoch->handle, sizeof(old)));
+ LASSERT(!memcmp(&old, &epoch->mio_handle, sizeof(old)));
DEBUG_REQ(D_HA, close_req, "updating close body with new fh");
- epoch->handle = body->mbo_handle;
+ epoch->mio_handle = body->mbo_handle;
}
}
@@ -715,22 +698,6 @@ static int mdc_clear_open_replay_data(struct obd_export *exp,
return 0;
}
-/* Prepares the request for the replay by the given reply */
-static void mdc_close_handle_reply(struct ptlrpc_request *req,
- struct md_op_data *op_data, int rc) {
- struct mdt_body *repbody;
- struct mdt_ioepoch *epoch;
-
- if (req && rc == -EAGAIN) {
- repbody = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
- epoch = req_capsule_client_get(&req->rq_pill, &RMF_MDT_EPOCH);
-
- epoch->flags |= MF_SOM_AU;
- if (repbody->mbo_valid & OBD_MD_FLGETATTRLOCK)
- op_data->op_flags |= MF_GETATTR_LOCK;
- }
-}
-
static int mdc_close(struct obd_export *exp, struct md_op_data *op_data,
struct md_open_data *mod, struct ptlrpc_request **request)
{
@@ -740,9 +707,8 @@ static int mdc_close(struct obd_export *exp, struct md_op_data *op_data,
int rc;
int saved_rc = 0;
- req_fmt = &RQF_MDS_CLOSE;
if (op_data->op_bias & MDS_HSM_RELEASE) {
- req_fmt = &RQF_MDS_RELEASE_CLOSE;
+ req_fmt = &RQF_MDS_INTENT_CLOSE;
/* allocate a FID for volatile file */
rc = mdc_fid_alloc(NULL, exp, &op_data->op_fid2, op_data);
@@ -752,6 +718,10 @@ static int mdc_close(struct obd_export *exp, struct md_op_data *op_data,
/* save the errcode and proceed to close */
saved_rc = rc;
}
+ } else if (op_data->op_bias & MDS_CLOSE_LAYOUT_SWAP) {
+ req_fmt = &RQF_MDS_INTENT_CLOSE;
+ } else {
+ req_fmt = &RQF_MDS_CLOSE;
}
*request = NULL;
@@ -807,14 +777,12 @@ static int mdc_close(struct obd_export *exp, struct md_op_data *op_data,
req_capsule_set_size(&req->rq_pill, &RMF_MDT_MD, RCL_SERVER,
obd->u.cli.cl_default_mds_easize);
- req_capsule_set_size(&req->rq_pill, &RMF_LOGCOOKIES, RCL_SERVER,
- obd->u.cli.cl_default_mds_cookiesize);
ptlrpc_request_set_replen(req);
- mdc_get_rpc_lock(obd->u.cli.cl_close_lock, NULL);
+ mdc_get_mod_rpc_slot(req, NULL);
rc = ptlrpc_queue_wait(req);
- mdc_put_rpc_lock(obd->u.cli.cl_close_lock, NULL);
+ mdc_put_mod_rpc_slot(req, NULL);
if (!req->rq_repmsg) {
CDEBUG(D_RPCTRACE, "request failed to send: %p, %d\n", req,
@@ -857,79 +825,9 @@ out:
obd_mod_put(mod);
}
*request = req;
- mdc_close_handle_reply(req, op_data, rc);
return rc < 0 ? rc : saved_rc;
}
-static int mdc_done_writing(struct obd_export *exp, struct md_op_data *op_data,
- struct md_open_data *mod)
-{
- struct obd_device *obd = class_exp2obd(exp);
- struct ptlrpc_request *req;
- int rc;
-
- req = ptlrpc_request_alloc(class_exp2cliimp(exp),
- &RQF_MDS_DONE_WRITING);
- if (!req)
- return -ENOMEM;
-
- rc = ptlrpc_request_pack(req, LUSTRE_MDS_VERSION, MDS_DONE_WRITING);
- if (rc) {
- ptlrpc_request_free(req);
- return rc;
- }
-
- if (mod) {
- LASSERTF(mod->mod_open_req &&
- mod->mod_open_req->rq_type != LI_POISON,
- "POISONED setattr %p!\n", mod->mod_open_req);
-
- mod->mod_close_req = req;
- DEBUG_REQ(D_HA, mod->mod_open_req, "matched setattr");
- /* We no longer want to preserve this setattr for replay even
- * though the open was committed. b=3632, b=3633
- */
- spin_lock(&mod->mod_open_req->rq_lock);
- mod->mod_open_req->rq_replay = 0;
- spin_unlock(&mod->mod_open_req->rq_lock);
- }
-
- mdc_close_pack(req, op_data);
- ptlrpc_request_set_replen(req);
-
- mdc_get_rpc_lock(obd->u.cli.cl_close_lock, NULL);
- rc = ptlrpc_queue_wait(req);
- mdc_put_rpc_lock(obd->u.cli.cl_close_lock, NULL);
-
- if (rc == -ESTALE) {
- /**
- * it can be allowed error after 3633 if open or setattr were
- * committed and server failed before close was sent.
- * Let's check if mod exists and return no error in that case
- */
- if (mod) {
- if (mod->mod_open_req->rq_committed)
- rc = 0;
- }
- }
-
- if (mod) {
- if (rc != 0)
- mod->mod_close_req = NULL;
- LASSERT(mod->mod_open_req);
- mdc_free_open(mod);
-
- /* Since now, mod is accessed through setattr req only,
- * thus DW req does not keep a reference on mod anymore.
- */
- obd_mod_put(mod);
- }
-
- mdc_close_handle_reply(req, op_data, rc);
- ptlrpc_req_finished(req);
- return rc;
-}
-
static int mdc_getpage(struct obd_export *exp, const struct lu_fid *fid,
u64 offset, struct page **pages, int npages,
struct ptlrpc_request **request)
@@ -959,8 +857,10 @@ restart_bulk:
req->rq_request_portal = MDS_READPAGE_PORTAL;
ptlrpc_at_set_req_timeout(req);
- desc = ptlrpc_prep_bulk_imp(req, npages, 1, BULK_PUT_SINK,
- MDS_BULK_PORTAL);
+ desc = ptlrpc_prep_bulk_imp(req, npages, 1,
+ PTLRPC_BULK_PUT_SINK | PTLRPC_BULK_BUF_KIOV,
+ MDS_BULK_PORTAL,
+ &ptlrpc_bulk_kiov_pin_ops);
if (!desc) {
ptlrpc_request_free(req);
return -ENOMEM;
@@ -968,7 +868,7 @@ restart_bulk:
/* NB req now owns desc and will free it when it gets freed */
for (i = 0; i < npages; i++)
- ptlrpc_prep_bulk_page_pin(desc, pages[i], 0, PAGE_SIZE);
+ desc->bd_frag_ops->add_kiov_frag(desc, pages[i], 0, PAGE_SIZE);
mdc_readdir_pack(req, offset, PAGE_SIZE * npages, fid);
@@ -1546,7 +1446,7 @@ static int mdc_ioc_fid2path(struct obd_export *exp, struct getinfo_fid2path *gf)
/* Val is struct getinfo_fid2path result plus path */
vallen = sizeof(*gf) + gf->gf_pathlen;
- rc = obd_get_info(NULL, exp, keylen, key, &vallen, gf, NULL);
+ rc = obd_get_info(NULL, exp, keylen, key, &vallen, gf);
if (rc != 0 && rc != -EREMOTE)
goto out;
@@ -1558,8 +1458,11 @@ static int mdc_ioc_fid2path(struct obd_export *exp, struct getinfo_fid2path *gf)
goto out;
}
- CDEBUG(D_IOCTL, "path get "DFID" from %llu #%d\n%s\n",
- PFID(&gf->gf_fid), gf->gf_recno, gf->gf_linkno, gf->gf_path);
+ CDEBUG(D_IOCTL, "path got " DFID " from %llu #%d: %s\n",
+ PFID(&gf->gf_fid), gf->gf_recno, gf->gf_linkno,
+ gf->gf_pathlen < 512 ? gf->gf_path :
+ /* only log the last 512 characters of the path */
+ gf->gf_path + gf->gf_pathlen - 512);
out:
kfree(key);
@@ -1595,7 +1498,9 @@ static int mdc_ioc_hsm_progress(struct obd_export *exp,
ptlrpc_request_set_replen(req);
- rc = mdc_queue_wait(req);
+ mdc_get_mod_rpc_slot(req, NULL);
+ rc = ptlrpc_queue_wait(req);
+ mdc_put_mod_rpc_slot(req, NULL);
out:
ptlrpc_req_finished(req);
return rc;
@@ -1773,7 +1678,9 @@ static int mdc_ioc_hsm_state_set(struct obd_export *exp,
ptlrpc_request_set_replen(req);
- rc = mdc_queue_wait(req);
+ mdc_get_mod_rpc_slot(req, NULL);
+ rc = ptlrpc_queue_wait(req);
+ mdc_put_mod_rpc_slot(req, NULL);
out:
ptlrpc_req_finished(req);
return rc;
@@ -1836,7 +1743,9 @@ static int mdc_ioc_hsm_request(struct obd_export *exp,
ptlrpc_request_set_replen(req);
- rc = mdc_queue_wait(req);
+ mdc_get_mod_rpc_slot(req, NULL);
+ rc = ptlrpc_queue_wait(req);
+ mdc_put_mod_rpc_slot(req, NULL);
out:
ptlrpc_req_finished(req);
return rc;
@@ -1957,10 +1866,8 @@ static int mdc_changelog_send_thread(void *csdata)
/* Send EOF no matter what our result */
kuch = changelog_kuc_hdr(cs->cs_buf, sizeof(*kuch), cs->cs_flags);
- if (kuch) {
- kuch->kuc_msgtype = CL_EOF;
- libcfs_kkuc_msg_put(cs->cs_fp, kuch);
- }
+ kuch->kuc_msgtype = CL_EOF;
+ libcfs_kkuc_msg_put(cs->cs_fp, kuch);
out:
fput(cs->cs_fp);
@@ -2015,52 +1922,6 @@ static int mdc_ioc_changelog_send(struct obd_device *obd,
static int mdc_ioc_hsm_ct_start(struct obd_export *exp,
struct lustre_kernelcomm *lk);
-static int mdc_quotacheck(struct obd_device *unused, struct obd_export *exp,
- struct obd_quotactl *oqctl)
-{
- struct client_obd *cli = &exp->exp_obd->u.cli;
- struct ptlrpc_request *req;
- struct obd_quotactl *body;
- int rc;
-
- req = ptlrpc_request_alloc_pack(class_exp2cliimp(exp),
- &RQF_MDS_QUOTACHECK, LUSTRE_MDS_VERSION,
- MDS_QUOTACHECK);
- if (!req)
- return -ENOMEM;
-
- body = req_capsule_client_get(&req->rq_pill, &RMF_OBD_QUOTACTL);
- *body = *oqctl;
-
- ptlrpc_request_set_replen(req);
-
- /* the next poll will find -ENODATA, that means quotacheck is
- * going on
- */
- cli->cl_qchk_stat = -ENODATA;
- rc = ptlrpc_queue_wait(req);
- if (rc)
- cli->cl_qchk_stat = rc;
- ptlrpc_req_finished(req);
- return rc;
-}
-
-static int mdc_quota_poll_check(struct obd_export *exp,
- struct if_quotacheck *qchk)
-{
- struct client_obd *cli = &exp->exp_obd->u.cli;
- int rc;
-
- qchk->obd_uuid = cli->cl_target_uuid;
- memcpy(qchk->obd_type, LUSTRE_MDS_NAME, strlen(LUSTRE_MDS_NAME));
-
- rc = cli->cl_qchk_stat;
- /* the client is not the previous one */
- if (rc == CL_NOT_QUOTACHECKED)
- rc = -EINTR;
- return rc;
-}
-
static int mdc_quotactl(struct obd_device *unused, struct obd_export *exp,
struct obd_quotactl *oqctl)
{
@@ -2215,9 +2076,6 @@ static int mdc_iocontrol(unsigned int cmd, struct obd_export *exp, int len,
case IOC_OSC_SET_ACTIVE:
rc = ptlrpc_set_import_active(imp, data->ioc_offset);
goto out;
- case OBD_IOC_POLL_QUOTACHECK:
- rc = mdc_quota_poll_check(exp, (struct if_quotacheck *)karg);
- goto out;
case OBD_IOC_PING_TARGET:
rc = ptlrpc_obd_ping(obd);
goto out;
@@ -2528,8 +2386,7 @@ static int mdc_set_info_async(const struct lu_env *env,
}
static int mdc_get_info(const struct lu_env *env, struct obd_export *exp,
- __u32 keylen, void *key, __u32 *vallen, void *val,
- struct lov_stripe_md *lsm)
+ __u32 keylen, void *key, __u32 *vallen, void *val)
{
int rc = -EINVAL;
@@ -2733,29 +2590,17 @@ static void mdc_llog_finish(struct obd_device *obd)
static int mdc_setup(struct obd_device *obd, struct lustre_cfg *cfg)
{
- struct client_obd *cli = &obd->u.cli;
struct lprocfs_static_vars lvars = { NULL };
int rc;
- cli->cl_rpc_lock = kzalloc(sizeof(*cli->cl_rpc_lock), GFP_NOFS);
- if (!cli->cl_rpc_lock)
- return -ENOMEM;
- mdc_init_rpc_lock(cli->cl_rpc_lock);
-
rc = ptlrpcd_addref();
if (rc < 0)
- goto err_rpc_lock;
-
- cli->cl_close_lock = kzalloc(sizeof(*cli->cl_close_lock), GFP_NOFS);
- if (!cli->cl_close_lock) {
- rc = -ENOMEM;
- goto err_ptlrpcd_decref;
- }
- mdc_init_rpc_lock(cli->cl_close_lock);
+ return rc;
rc = client_obd_setup(obd, cfg);
if (rc)
- goto err_close_lock;
+ goto err_ptlrpcd_decref;
+
lprocfs_mdc_init_vars(&lvars);
lprocfs_obd_setup(obd, lvars.obd_vars, lvars.sysfs_vars);
sptlrpc_lprocfs_cliobd_attach(obd);
@@ -2769,29 +2614,25 @@ static int mdc_setup(struct obd_device *obd, struct lustre_cfg *cfg)
if (rc) {
mdc_cleanup(obd);
CERROR("failed to setup llogging subsystems\n");
+ return rc;
}
return rc;
-err_close_lock:
- kfree(cli->cl_close_lock);
err_ptlrpcd_decref:
ptlrpcd_decref();
-err_rpc_lock:
- kfree(cli->cl_rpc_lock);
return rc;
}
-/* Initialize the default and maximum LOV EA and cookie sizes. This allows
+/* Initialize the default and maximum LOV EA sizes. This allows
* us to make MDS RPCs with large enough reply buffers to hold a default
- * sized EA and cookie without having to calculate this (via a call into the
+ * sized EA without having to calculate this (via a call into the
* LOV + OSCs) each time we make an RPC. The maximum size is also tracked
* but not used to avoid wastefully vmalloc()'ing large reply buffers when
* a large number of stripes is possible. If a larger reply buffer is
* required it will be reallocated in the ptlrpc layer due to overflow.
*/
-static int mdc_init_ea_size(struct obd_export *exp, u32 easize, u32 def_easize,
- u32 cookiesize, u32 def_cookiesize)
+static int mdc_init_ea_size(struct obd_export *exp, u32 easize, u32 def_easize)
{
struct obd_device *obd = exp->exp_obd;
struct client_obd *cli = &obd->u.cli;
@@ -2802,42 +2643,24 @@ static int mdc_init_ea_size(struct obd_export *exp, u32 easize, u32 def_easize,
if (cli->cl_default_mds_easize < def_easize)
cli->cl_default_mds_easize = def_easize;
- if (cli->cl_max_mds_cookiesize < cookiesize)
- cli->cl_max_mds_cookiesize = cookiesize;
-
- if (cli->cl_default_mds_cookiesize < def_cookiesize)
- cli->cl_default_mds_cookiesize = def_cookiesize;
-
return 0;
}
-static int mdc_precleanup(struct obd_device *obd, enum obd_cleanup_stage stage)
+static int mdc_precleanup(struct obd_device *obd)
{
- switch (stage) {
- case OBD_CLEANUP_EARLY:
- break;
- case OBD_CLEANUP_EXPORTS:
- /* Failsafe, ok if racy */
- if (obd->obd_type->typ_refcnt <= 1)
- libcfs_kkuc_group_rem(0, KUC_GRP_HSM);
+ /* Failsafe, ok if racy */
+ if (obd->obd_type->typ_refcnt <= 1)
+ libcfs_kkuc_group_rem(0, KUC_GRP_HSM);
- obd_cleanup_client_import(obd);
- ptlrpc_lprocfs_unregister_obd(obd);
- lprocfs_obd_cleanup(obd);
-
- mdc_llog_finish(obd);
- break;
- }
+ obd_cleanup_client_import(obd);
+ ptlrpc_lprocfs_unregister_obd(obd);
+ lprocfs_obd_cleanup(obd);
+ mdc_llog_finish(obd);
return 0;
}
static int mdc_cleanup(struct obd_device *obd)
{
- struct client_obd *cli = &obd->u.cli;
-
- kfree(cli->cl_rpc_lock);
- kfree(cli->cl_close_lock);
-
ptlrpcd_decref();
return client_obd_cleanup(obd);
@@ -2881,7 +2704,6 @@ static struct obd_ops mdc_obd_ops = {
.process_config = mdc_process_config,
.get_uuid = mdc_get_uuid,
.quotactl = mdc_quotactl,
- .quotacheck = mdc_quotacheck
};
static struct md_ops mdc_md_ops = {
@@ -2889,7 +2711,6 @@ static struct md_ops mdc_md_ops = {
.null_inode = mdc_null_inode,
.close = mdc_close,
.create = mdc_create,
- .done_writing = mdc_done_writing,
.enqueue = mdc_enqueue,
.getattr = mdc_getattr,
.getattr_name = mdc_getattr_name,
diff --git a/drivers/staging/lustre/lustre/mgc/mgc_request.c b/drivers/staging/lustre/lustre/mgc/mgc_request.c
index 23374cae5133..b9c522a3c7a4 100644
--- a/drivers/staging/lustre/lustre/mgc/mgc_request.c
+++ b/drivers/staging/lustre/lustre/mgc/mgc_request.c
@@ -38,11 +38,13 @@
#define D_MGC D_CONFIG /*|D_WARNING*/
#include <linux/module.h>
-#include "../include/obd_class.h"
-#include "../include/lustre_dlm.h"
+
#include "../include/lprocfs_status.h"
-#include "../include/lustre_log.h"
+#include "../include/lustre_dlm.h"
#include "../include/lustre_disk.h"
+#include "../include/lustre_log.h"
+#include "../include/lustre_swab.h"
+#include "../include/obd_class.h"
#include "mgc_internal.h"
@@ -373,7 +375,7 @@ out_err:
return rc;
}
-DEFINE_MUTEX(llog_process_lock);
+static DEFINE_MUTEX(llog_process_lock);
/** Stop watching for updates on this log.
*/
@@ -684,35 +686,33 @@ static int mgc_llog_fini(const struct lu_env *env, struct obd_device *obd)
}
static atomic_t mgc_count = ATOMIC_INIT(0);
-static int mgc_precleanup(struct obd_device *obd, enum obd_cleanup_stage stage)
+static int mgc_precleanup(struct obd_device *obd)
{
int rc = 0;
int temp;
- switch (stage) {
- case OBD_CLEANUP_EARLY:
- break;
- case OBD_CLEANUP_EXPORTS:
- if (atomic_dec_and_test(&mgc_count)) {
- LASSERT(rq_state & RQ_RUNNING);
- /* stop requeue thread */
- temp = RQ_STOP;
- } else {
- /* wakeup requeue thread to clean our cld */
- temp = RQ_NOW | RQ_PRECLEANUP;
- }
- spin_lock(&config_list_lock);
- rq_state |= temp;
- spin_unlock(&config_list_lock);
- wake_up(&rq_waitq);
- if (temp & RQ_STOP)
- wait_for_completion(&rq_exit);
- obd_cleanup_client_import(obd);
- rc = mgc_llog_fini(NULL, obd);
- if (rc != 0)
- CERROR("failed to cleanup llogging subsystems\n");
- break;
+ if (atomic_dec_and_test(&mgc_count)) {
+ LASSERT(rq_state & RQ_RUNNING);
+ /* stop requeue thread */
+ temp = RQ_STOP;
+ } else {
+ /* wakeup requeue thread to clean our cld */
+ temp = RQ_NOW | RQ_PRECLEANUP;
}
+
+ spin_lock(&config_list_lock);
+ rq_state |= temp;
+ spin_unlock(&config_list_lock);
+ wake_up(&rq_waitq);
+
+ if (temp & RQ_STOP)
+ wait_for_completion(&rq_exit);
+ obd_cleanup_client_import(obd);
+
+ rc = mgc_llog_fini(NULL, obd);
+ if (rc)
+ CERROR("failed to cleanup llogging subsystems\n");
+
return rc;
}
@@ -887,8 +887,8 @@ static int mgc_set_mgs_param(struct obd_export *exp,
}
/* Take a config lock so we can get cancel notifications */
-static int mgc_enqueue(struct obd_export *exp, struct lov_stripe_md *lsm,
- __u32 type, ldlm_policy_data_t *policy, __u32 mode,
+static int mgc_enqueue(struct obd_export *exp, __u32 type,
+ union ldlm_policy_data *policy, __u32 mode,
__u64 *flags, void *bl_cb, void *cp_cb, void *gl_cb,
void *data, __u32 lvb_len, void *lvb_swabber,
struct lustre_handle *lockh)
@@ -1059,8 +1059,7 @@ static int mgc_set_info_async(const struct lu_env *env, struct obd_export *exp,
}
static int mgc_get_info(const struct lu_env *env, struct obd_export *exp,
- __u32 keylen, void *key, __u32 *vallen, void *val,
- struct lov_stripe_md *unused)
+ __u32 keylen, void *key, __u32 *vallen, void *val)
{
int rc = -EINVAL;
@@ -1387,15 +1386,17 @@ again:
body->mcb_units = nrpages;
/* allocate bulk transfer descriptor */
- desc = ptlrpc_prep_bulk_imp(req, nrpages, 1, BULK_PUT_SINK,
- MGS_BULK_PORTAL);
+ desc = ptlrpc_prep_bulk_imp(req, nrpages, 1,
+ PTLRPC_BULK_PUT_SINK | PTLRPC_BULK_BUF_KIOV,
+ MGS_BULK_PORTAL,
+ &ptlrpc_bulk_kiov_pin_ops);
if (!desc) {
rc = -ENOMEM;
goto out;
}
for (i = 0; i < nrpages; i++)
- ptlrpc_prep_bulk_page_pin(desc, pages[i], 0, PAGE_SIZE);
+ desc->bd_frag_ops->add_kiov_frag(desc, pages[i], 0, PAGE_SIZE);
ptlrpc_request_set_replen(req);
rc = ptlrpc_queue_wait(req);
@@ -1553,14 +1554,52 @@ out_free:
return rc;
}
-/** Get a config log from the MGS and process it.
- * This func is called for both clients and servers.
- * Copy the log locally before parsing it if appropriate (non-MGS server)
+static bool mgc_import_in_recovery(struct obd_import *imp)
+{
+ bool in_recovery = true;
+
+ spin_lock(&imp->imp_lock);
+ if (imp->imp_state == LUSTRE_IMP_FULL ||
+ imp->imp_state == LUSTRE_IMP_CLOSED)
+ in_recovery = false;
+ spin_unlock(&imp->imp_lock);
+
+ return in_recovery;
+}
+
+/**
+ * Get a configuration log from the MGS and process it.
+ *
+ * This function is called for both clients and servers to process the
+ * configuration log from the MGS. The MGC enqueues a DLM lock on the
+ * log from the MGS, and if the lock gets revoked the MGC will be notified
+ * by the lock cancellation callback that the config log has changed,
+ * and will enqueue another MGS lock on it, and then continue processing
+ * the new additions to the end of the log.
+ *
+ * Since the MGC import is not replayable, if the import is being evicted
+ * (rcl == -ESHUTDOWN, \see ptlrpc_import_delay_req()), retry to process
+ * the log until recovery is finished or the import is closed.
+ *
+ * Make a local copy of the log before parsing it if appropriate (non-MGS
+ * server) so that the server can start even when the MGS is down.
+ *
+ * There shouldn't be multiple processes running process_log at once --
+ * sounds like badness. It actually might be fine, as long as they're not
+ * trying to update from the same log simultaneously, in which case we
+ * should use a per-log semaphore instead of cld_lock.
+ *
+ * \param[in] mgc MGC device by which to fetch the configuration log
+ * \param[in] cld log processing state (stored in lock callback data)
+ *
+ * \retval 0 on success
+ * \retval negative errno on failure
*/
int mgc_process_log(struct obd_device *mgc, struct config_llog_data *cld)
{
struct lustre_handle lockh = { 0 };
__u64 flags = LDLM_FL_NO_LRU;
+ bool retry = false;
int rc = 0, rcl;
LASSERT(cld);
@@ -1570,6 +1609,7 @@ int mgc_process_log(struct obd_device *mgc, struct config_llog_data *cld)
* we're not trying to update from the same log
* simultaneously (in which case we should use a per-log sem.)
*/
+restart:
mutex_lock(&cld->cld_lock);
if (cld->cld_stopping) {
mutex_unlock(&cld->cld_lock);
@@ -1582,7 +1622,7 @@ int mgc_process_log(struct obd_device *mgc, struct config_llog_data *cld)
cld->cld_cfg.cfg_instance, cld->cld_cfg.cfg_last_idx + 1);
/* Get the cfg lock on the llog */
- rcl = mgc_enqueue(mgc->u.cli.cl_mgc_mgsexp, NULL, LDLM_PLAIN, NULL,
+ rcl = mgc_enqueue(mgc->u.cli.cl_mgc_mgsexp, LDLM_PLAIN, NULL,
LCK_CR, &flags, NULL, NULL, NULL,
cld, 0, NULL, &lockh);
if (rcl == 0) {
@@ -1593,18 +1633,57 @@ int mgc_process_log(struct obd_device *mgc, struct config_llog_data *cld)
} else {
CDEBUG(D_MGC, "Can't get cfg lock: %d\n", rcl);
- /* mark cld_lostlock so that it will requeue
- * after MGC becomes available.
- */
- cld->cld_lostlock = 1;
+ if (rcl == -ESHUTDOWN &&
+ atomic_read(&mgc->u.cli.cl_mgc_refcount) > 0 && !retry) {
+ int secs = cfs_time_seconds(obd_timeout);
+ struct obd_import *imp;
+ struct l_wait_info lwi;
+
+ mutex_unlock(&cld->cld_lock);
+ imp = class_exp2cliimp(mgc->u.cli.cl_mgc_mgsexp);
+
+ /*
+ * Let's force the pinger, and wait the import to be
+ * connected, note: since mgc import is non-replayable,
+ * and even the import state is disconnected, it does
+ * not mean the "recovery" is stopped, so we will keep
+ * waitting until timeout or the import state is
+ * FULL or closed
+ */
+ ptlrpc_pinger_force(imp);
+
+ lwi = LWI_TIMEOUT(secs, NULL, NULL);
+ l_wait_event(imp->imp_recovery_waitq,
+ !mgc_import_in_recovery(imp), &lwi);
+
+ if (imp->imp_state == LUSTRE_IMP_FULL) {
+ retry = true;
+ goto restart;
+ } else {
+ mutex_lock(&cld->cld_lock);
+ cld->cld_lostlock = 1;
+ }
+ } else {
+ /* mark cld_lostlock so that it will requeue
+ * after MGC becomes available.
+ */
+ cld->cld_lostlock = 1;
+ }
/* Get extra reference, it will be put in requeue thread */
config_log_get(cld);
}
if (cld_is_recover(cld)) {
rc = 0; /* this is not a fatal error for recover log */
- if (rcl == 0)
+ if (!rcl) {
rc = mgc_process_recover_log(mgc, cld);
+ if (rc) {
+ CERROR("%s: recover log %s failed: rc = %d not fatal.\n",
+ mgc->obd_name, cld->cld_logname, rc);
+ rc = 0;
+ cld->cld_lostlock = 1;
+ }
+ }
} else {
rc = mgc_process_cfg_log(mgc, cld, rcl != 0);
}
diff --git a/drivers/staging/lustre/lustre/obdclass/Makefile b/drivers/staging/lustre/lustre/obdclass/Makefile
index b42e109b30e0..af570c0db15b 100644
--- a/drivers/staging/lustre/lustre/obdclass/Makefile
+++ b/drivers/staging/lustre/lustre/obdclass/Makefile
@@ -1,6 +1,6 @@
obj-$(CONFIG_LUSTRE_FS) += obdclass.o
-obdclass-y := linux/linux-module.o linux/linux-obdo.o linux/linux-sysctl.o \
+obdclass-y := linux/linux-module.o linux/linux-sysctl.o \
llog.o llog_cat.o llog_obd.o llog_swab.o class_obd.o debug.o \
genops.o uuid.o lprocfs_status.o lprocfs_counters.o \
lustre_handles.o lustre_peer.o statfs_pack.o linkea.o \
diff --git a/drivers/staging/lustre/lustre/obdclass/cl_internal.h b/drivers/staging/lustre/lustre/obdclass/cl_internal.h
index e866754a42d5..7b403fbd5f94 100644
--- a/drivers/staging/lustre/lustre/obdclass/cl_internal.h
+++ b/drivers/staging/lustre/lustre/obdclass/cl_internal.h
@@ -50,25 +50,6 @@ enum clt_nesting_level {
};
/**
- * Counters used to check correctness of cl_lock interface usage.
- */
-struct cl_thread_counters {
- /**
- * Number of outstanding calls to cl_lock_mutex_get() made by the
- * current thread. For debugging.
- */
- int ctc_nr_locks_locked;
- /** List of locked locks. */
- struct lu_ref ctc_locks_locked;
- /** Number of outstanding holds on locks. */
- int ctc_nr_held;
- /** Number of outstanding uses on locks. */
- int ctc_nr_used;
- /** Number of held extent locks. */
- int ctc_nr_locks_acquired;
-};
-
-/**
* Thread local state internal for generic cl-code.
*/
struct cl_thread_info {
@@ -83,10 +64,6 @@ struct cl_thread_info {
*/
struct cl_lock_descr clt_descr;
struct cl_page_list clt_list;
- /**
- * Counters for every level of lock nesting.
- */
- struct cl_thread_counters clt_counters[CNL_NR];
/** @} debugging */
/*
diff --git a/drivers/staging/lustre/lustre/obdclass/cl_io.c b/drivers/staging/lustre/lustre/obdclass/cl_io.c
index bc4b7b6b9a20..3f42457b0d7d 100644
--- a/drivers/staging/lustre/lustre/obdclass/cl_io.c
+++ b/drivers/staging/lustre/lustre/obdclass/cl_io.c
@@ -126,6 +126,7 @@ void cl_io_fini(const struct lu_env *env, struct cl_io *io)
switch (io->ci_type) {
case CIT_READ:
case CIT_WRITE:
+ case CIT_DATA_VERSION:
break;
case CIT_FAULT:
break;
@@ -411,7 +412,6 @@ void cl_io_unlock(const struct lu_env *env, struct cl_io *io)
scan->cis_iop->op[io->ci_type].cio_unlock(env, scan);
}
io->ci_state = CIS_UNLOCKED;
- LASSERT(!cl_env_info(env)->clt_counters[CNL_TOP].ctc_nr_locks_acquired);
}
EXPORT_SYMBOL(cl_io_unlock);
@@ -586,67 +586,32 @@ void cl_io_end(const struct lu_env *env, struct cl_io *io)
}
EXPORT_SYMBOL(cl_io_end);
-static const struct cl_page_slice *
-cl_io_slice_page(const struct cl_io_slice *ios, struct cl_page *page)
-{
- const struct cl_page_slice *slice;
-
- slice = cl_page_at(page, ios->cis_obj->co_lu.lo_dev->ld_type);
- LINVRNT(slice);
- return slice;
-}
-
/**
- * Called by read io, when page has to be read from the server.
+ * Called by read io, to decide the readahead extent
*
- * \see cl_io_operations::cio_read_page()
+ * \see cl_io_operations::cio_read_ahead()
*/
-int cl_io_read_page(const struct lu_env *env, struct cl_io *io,
- struct cl_page *page)
+int cl_io_read_ahead(const struct lu_env *env, struct cl_io *io,
+ pgoff_t start, struct cl_read_ahead *ra)
{
const struct cl_io_slice *scan;
- struct cl_2queue *queue;
int result = 0;
LINVRNT(io->ci_type == CIT_READ || io->ci_type == CIT_FAULT);
- LINVRNT(cl_page_is_owned(page, io));
LINVRNT(io->ci_state == CIS_IO_GOING || io->ci_state == CIS_LOCKED);
LINVRNT(cl_io_invariant(io));
- queue = &io->ci_queue;
-
- cl_2queue_init(queue);
- /*
- * ->cio_read_page() methods called in the loop below are supposed to
- * never block waiting for network (the only subtle point is the
- * creation of new pages for read-ahead that might result in cache
- * shrinking, but currently only clean pages are shrunk and this
- * requires no network io).
- *
- * Should this ever starts blocking, retry loop would be needed for
- * "parallel io" (see CLO_REPEAT loops in cl_lock.c).
- */
cl_io_for_each(scan, io) {
- if (scan->cis_iop->cio_read_page) {
- const struct cl_page_slice *slice;
+ if (!scan->cis_iop->cio_read_ahead)
+ continue;
- slice = cl_io_slice_page(scan, page);
- LINVRNT(slice);
- result = scan->cis_iop->cio_read_page(env, scan, slice);
- if (result != 0)
- break;
- }
+ result = scan->cis_iop->cio_read_ahead(env, scan, start, ra);
+ if (result)
+ break;
}
- if (result == 0 && queue->c2_qin.pl_nr > 0)
- result = cl_io_submit_rw(env, io, CRT_READ, queue);
- /*
- * Unlock unsent pages in case of error.
- */
- cl_page_list_disown(env, io, &queue->c2_qin);
- cl_2queue_fini(env, queue);
- return result;
+ return result > 0 ? 0 : result;
}
-EXPORT_SYMBOL(cl_io_read_page);
+EXPORT_SYMBOL(cl_io_read_ahead);
/**
* Commit a list of contiguous pages into writeback cache.
@@ -1080,235 +1045,18 @@ struct cl_io *cl_io_top(struct cl_io *io)
EXPORT_SYMBOL(cl_io_top);
/**
- * Adds request slice to the compound request.
- *
- * This is called by cl_device_operations::cdo_req_init() methods to add a
- * per-layer state to the request. New state is added at the end of
- * cl_req::crq_layers list, that is, it is at the bottom of the stack.
- *
- * \see cl_lock_slice_add(), cl_page_slice_add(), cl_io_slice_add()
- */
-void cl_req_slice_add(struct cl_req *req, struct cl_req_slice *slice,
- struct cl_device *dev,
- const struct cl_req_operations *ops)
-{
- list_add_tail(&slice->crs_linkage, &req->crq_layers);
- slice->crs_dev = dev;
- slice->crs_ops = ops;
- slice->crs_req = req;
-}
-EXPORT_SYMBOL(cl_req_slice_add);
-
-static void cl_req_free(const struct lu_env *env, struct cl_req *req)
-{
- unsigned i;
-
- LASSERT(list_empty(&req->crq_pages));
- LASSERT(req->crq_nrpages == 0);
- LINVRNT(list_empty(&req->crq_layers));
- LINVRNT(equi(req->crq_nrobjs > 0, req->crq_o));
-
- if (req->crq_o) {
- for (i = 0; i < req->crq_nrobjs; ++i) {
- struct cl_object *obj = req->crq_o[i].ro_obj;
-
- if (obj) {
- lu_object_ref_del_at(&obj->co_lu,
- &req->crq_o[i].ro_obj_ref,
- "cl_req", req);
- cl_object_put(env, obj);
- }
- }
- kfree(req->crq_o);
- }
- kfree(req);
-}
-
-static int cl_req_init(const struct lu_env *env, struct cl_req *req,
- struct cl_page *page)
-{
- struct cl_device *dev;
- struct cl_page_slice *slice;
- int result;
-
- result = 0;
- list_for_each_entry(slice, &page->cp_layers, cpl_linkage) {
- dev = lu2cl_dev(slice->cpl_obj->co_lu.lo_dev);
- if (dev->cd_ops->cdo_req_init) {
- result = dev->cd_ops->cdo_req_init(env, dev, req);
- if (result != 0)
- break;
- }
- }
- return result;
-}
-
-/**
- * Invokes per-request transfer completion call-backs
- * (cl_req_operations::cro_completion()) bottom-to-top.
- */
-void cl_req_completion(const struct lu_env *env, struct cl_req *req, int rc)
-{
- struct cl_req_slice *slice;
-
- /*
- * for the lack of list_for_each_entry_reverse_safe()...
- */
- while (!list_empty(&req->crq_layers)) {
- slice = list_entry(req->crq_layers.prev,
- struct cl_req_slice, crs_linkage);
- list_del_init(&slice->crs_linkage);
- if (slice->crs_ops->cro_completion)
- slice->crs_ops->cro_completion(env, slice, rc);
- }
- cl_req_free(env, req);
-}
-EXPORT_SYMBOL(cl_req_completion);
-
-/**
- * Allocates new transfer request.
- */
-struct cl_req *cl_req_alloc(const struct lu_env *env, struct cl_page *page,
- enum cl_req_type crt, int nr_objects)
-{
- struct cl_req *req;
-
- LINVRNT(nr_objects > 0);
-
- req = kzalloc(sizeof(*req), GFP_NOFS);
- if (req) {
- int result;
-
- req->crq_type = crt;
- INIT_LIST_HEAD(&req->crq_pages);
- INIT_LIST_HEAD(&req->crq_layers);
-
- req->crq_o = kcalloc(nr_objects, sizeof(req->crq_o[0]),
- GFP_NOFS);
- if (req->crq_o) {
- req->crq_nrobjs = nr_objects;
- result = cl_req_init(env, req, page);
- } else {
- result = -ENOMEM;
- }
- if (result != 0) {
- cl_req_completion(env, req, result);
- req = ERR_PTR(result);
- }
- } else {
- req = ERR_PTR(-ENOMEM);
- }
- return req;
-}
-EXPORT_SYMBOL(cl_req_alloc);
-
-/**
- * Adds a page to a request.
- */
-void cl_req_page_add(const struct lu_env *env,
- struct cl_req *req, struct cl_page *page)
-{
- struct cl_object *obj;
- struct cl_req_obj *rqo;
- unsigned int i;
-
- LASSERT(list_empty(&page->cp_flight));
- LASSERT(!page->cp_req);
-
- CL_PAGE_DEBUG(D_PAGE, env, page, "req %p, %d, %u\n",
- req, req->crq_type, req->crq_nrpages);
-
- list_add_tail(&page->cp_flight, &req->crq_pages);
- ++req->crq_nrpages;
- page->cp_req = req;
- obj = cl_object_top(page->cp_obj);
- for (i = 0, rqo = req->crq_o; obj != rqo->ro_obj; ++i, ++rqo) {
- if (!rqo->ro_obj) {
- rqo->ro_obj = obj;
- cl_object_get(obj);
- lu_object_ref_add_at(&obj->co_lu, &rqo->ro_obj_ref,
- "cl_req", req);
- break;
- }
- }
- LASSERT(i < req->crq_nrobjs);
-}
-EXPORT_SYMBOL(cl_req_page_add);
-
-/**
- * Removes a page from a request.
- */
-void cl_req_page_done(const struct lu_env *env, struct cl_page *page)
-{
- struct cl_req *req = page->cp_req;
-
- LASSERT(!list_empty(&page->cp_flight));
- LASSERT(req->crq_nrpages > 0);
-
- list_del_init(&page->cp_flight);
- --req->crq_nrpages;
- page->cp_req = NULL;
-}
-EXPORT_SYMBOL(cl_req_page_done);
-
-/**
- * Notifies layers that request is about to depart by calling
- * cl_req_operations::cro_prep() top-to-bottom.
- */
-int cl_req_prep(const struct lu_env *env, struct cl_req *req)
-{
- unsigned int i;
- int result;
- const struct cl_req_slice *slice;
-
- /*
- * Check that the caller of cl_req_alloc() didn't lie about the number
- * of objects.
- */
- for (i = 0; i < req->crq_nrobjs; ++i)
- LASSERT(req->crq_o[i].ro_obj);
-
- result = 0;
- list_for_each_entry(slice, &req->crq_layers, crs_linkage) {
- if (slice->crs_ops->cro_prep) {
- result = slice->crs_ops->cro_prep(env, slice);
- if (result != 0)
- break;
- }
- }
- return result;
-}
-EXPORT_SYMBOL(cl_req_prep);
-
-/**
* Fills in attributes that are passed to server together with transfer. Only
* attributes from \a flags may be touched. This can be called multiple times
* for the same request.
*/
-void cl_req_attr_set(const struct lu_env *env, struct cl_req *req,
- struct cl_req_attr *attr, u64 flags)
+void cl_req_attr_set(const struct lu_env *env, struct cl_object *obj,
+ struct cl_req_attr *attr)
{
- const struct cl_req_slice *slice;
- struct cl_page *page;
- unsigned int i;
-
- LASSERT(!list_empty(&req->crq_pages));
-
- /* Take any page to use as a model. */
- page = list_entry(req->crq_pages.next, struct cl_page, cp_flight);
-
- for (i = 0; i < req->crq_nrobjs; ++i) {
- list_for_each_entry(slice, &req->crq_layers, crs_linkage) {
- const struct cl_page_slice *scan;
- const struct cl_object *obj;
-
- scan = cl_page_at(page,
- slice->crs_dev->cd_lu_dev.ld_type);
- obj = scan->cpl_obj;
- if (slice->crs_ops->cro_attr_set)
- slice->crs_ops->cro_attr_set(env, slice, obj,
- attr + i, flags);
- }
+ struct cl_object *scan;
+
+ cl_object_for_each(scan, obj) {
+ if (scan->co_ops->coo_req_attr_set)
+ scan->co_ops->coo_req_attr_set(env, scan, attr);
}
}
EXPORT_SYMBOL(cl_req_attr_set);
diff --git a/drivers/staging/lustre/lustre/obdclass/cl_object.c b/drivers/staging/lustre/lustre/obdclass/cl_object.c
index 3199dd4a3b72..f5d4e23c64b7 100644
--- a/drivers/staging/lustre/lustre/obdclass/cl_object.c
+++ b/drivers/staging/lustre/lustre/obdclass/cl_object.c
@@ -335,7 +335,7 @@ int cl_object_getstripe(const struct lu_env *env, struct cl_object *obj,
if (obj->co_ops->coo_getstripe) {
result = obj->co_ops->coo_getstripe(env, obj, uarg);
if (result)
- break;
+ break;
}
}
return result;
@@ -343,6 +343,67 @@ int cl_object_getstripe(const struct lu_env *env, struct cl_object *obj,
EXPORT_SYMBOL(cl_object_getstripe);
/**
+ * Get fiemap extents from file object.
+ *
+ * \param env [in] lustre environment
+ * \param obj [in] file object
+ * \param key [in] fiemap request argument
+ * \param fiemap [out] fiemap extents mapping retrived
+ * \param buflen [in] max buffer length of @fiemap
+ *
+ * \retval 0 success
+ * \retval < 0 error
+ */
+int cl_object_fiemap(const struct lu_env *env, struct cl_object *obj,
+ struct ll_fiemap_info_key *key,
+ struct fiemap *fiemap, size_t *buflen)
+{
+ struct lu_object_header *top;
+ int result = 0;
+
+ top = obj->co_lu.lo_header;
+ list_for_each_entry(obj, &top->loh_layers, co_lu.lo_linkage) {
+ if (obj->co_ops->coo_fiemap) {
+ result = obj->co_ops->coo_fiemap(env, obj, key, fiemap,
+ buflen);
+ if (result)
+ break;
+ }
+ }
+ return result;
+}
+EXPORT_SYMBOL(cl_object_fiemap);
+
+int cl_object_layout_get(const struct lu_env *env, struct cl_object *obj,
+ struct cl_layout *cl)
+{
+ struct lu_object_header *top = obj->co_lu.lo_header;
+
+ list_for_each_entry(obj, &top->loh_layers, co_lu.lo_linkage) {
+ if (obj->co_ops->coo_layout_get)
+ return obj->co_ops->coo_layout_get(env, obj, cl);
+ }
+
+ return -EOPNOTSUPP;
+}
+EXPORT_SYMBOL(cl_object_layout_get);
+
+loff_t cl_object_maxbytes(struct cl_object *obj)
+{
+ struct lu_object_header *top = obj->co_lu.lo_header;
+ loff_t maxbytes = LLONG_MAX;
+
+ list_for_each_entry(obj, &top->loh_layers, co_lu.lo_linkage) {
+ if (obj->co_ops->coo_maxbytes)
+ maxbytes = min_t(loff_t, obj->co_ops->coo_maxbytes(obj),
+ maxbytes);
+ }
+
+ return maxbytes;
+}
+EXPORT_SYMBOL(cl_object_maxbytes);
+
+/**
* Helper function removing all object locks, and marking object for
* deletion. All object pages must have been deleted at this point.
*
@@ -483,36 +544,20 @@ EXPORT_SYMBOL(cl_site_stats_print);
* bz20044, bz22683.
*/
-static LIST_HEAD(cl_envs);
-static unsigned int cl_envs_cached_nr;
-static unsigned int cl_envs_cached_max = 128; /* XXX: prototype: arbitrary limit
- * for now.
- */
-static DEFINE_SPINLOCK(cl_envs_guard);
+static unsigned int cl_envs_cached_max = 32; /* XXX: prototype: arbitrary limit
+ * for now.
+ */
+static struct cl_env_cache {
+ rwlock_t cec_guard;
+ unsigned int cec_count;
+ struct list_head cec_envs;
+} *cl_envs = NULL;
struct cl_env {
void *ce_magic;
struct lu_env ce_lu;
struct lu_context ce_ses;
- /**
- * This allows cl_env to be entered into cl_env_hash which implements
- * the current thread -> client environment lookup.
- */
- struct hlist_node ce_node;
- /**
- * Owner for the current cl_env.
- *
- * If LL_TASK_CL_ENV is defined, this point to the owning current,
- * only for debugging purpose ;
- * Otherwise hash is used, and this is the key for cfs_hash.
- * Now current thread pid is stored. Note using thread pointer would
- * lead to unbalanced hash because of its specific allocation locality
- * and could be varied for different platforms and OSes, even different
- * OS versions.
- */
- void *ce_owner;
-
/*
* Linkage into global list of all client environments. Used for
* garbage collection.
@@ -536,122 +581,13 @@ static void cl_env_init0(struct cl_env *cle, void *debug)
{
LASSERT(cle->ce_ref == 0);
LASSERT(cle->ce_magic == &cl_env_init0);
- LASSERT(!cle->ce_debug && !cle->ce_owner);
+ LASSERT(!cle->ce_debug);
cle->ce_ref = 1;
cle->ce_debug = debug;
CL_ENV_INC(busy);
}
-/*
- * The implementation of using hash table to connect cl_env and thread
- */
-
-static struct cfs_hash *cl_env_hash;
-
-static unsigned cl_env_hops_hash(struct cfs_hash *lh,
- const void *key, unsigned mask)
-{
-#if BITS_PER_LONG == 64
- return cfs_hash_u64_hash((__u64)key, mask);
-#else
- return cfs_hash_u32_hash((__u32)key, mask);
-#endif
-}
-
-static void *cl_env_hops_obj(struct hlist_node *hn)
-{
- struct cl_env *cle = hlist_entry(hn, struct cl_env, ce_node);
-
- LASSERT(cle->ce_magic == &cl_env_init0);
- return (void *)cle;
-}
-
-static int cl_env_hops_keycmp(const void *key, struct hlist_node *hn)
-{
- struct cl_env *cle = cl_env_hops_obj(hn);
-
- LASSERT(cle->ce_owner);
- return (key == cle->ce_owner);
-}
-
-static void cl_env_hops_noop(struct cfs_hash *hs, struct hlist_node *hn)
-{
- struct cl_env *cle = hlist_entry(hn, struct cl_env, ce_node);
-
- LASSERT(cle->ce_magic == &cl_env_init0);
-}
-
-static struct cfs_hash_ops cl_env_hops = {
- .hs_hash = cl_env_hops_hash,
- .hs_key = cl_env_hops_obj,
- .hs_keycmp = cl_env_hops_keycmp,
- .hs_object = cl_env_hops_obj,
- .hs_get = cl_env_hops_noop,
- .hs_put_locked = cl_env_hops_noop,
-};
-
-static inline struct cl_env *cl_env_fetch(void)
-{
- struct cl_env *cle;
-
- cle = cfs_hash_lookup(cl_env_hash, (void *)(long)current->pid);
- LASSERT(ergo(cle, cle->ce_magic == &cl_env_init0));
- return cle;
-}
-
-static inline void cl_env_attach(struct cl_env *cle)
-{
- if (cle) {
- int rc;
-
- LASSERT(!cle->ce_owner);
- cle->ce_owner = (void *)(long)current->pid;
- rc = cfs_hash_add_unique(cl_env_hash, cle->ce_owner,
- &cle->ce_node);
- LASSERT(rc == 0);
- }
-}
-
-static inline void cl_env_do_detach(struct cl_env *cle)
-{
- void *cookie;
-
- LASSERT(cle->ce_owner == (void *)(long)current->pid);
- cookie = cfs_hash_del(cl_env_hash, cle->ce_owner,
- &cle->ce_node);
- LASSERT(cookie == cle);
- cle->ce_owner = NULL;
-}
-
-static int cl_env_store_init(void)
-{
- cl_env_hash = cfs_hash_create("cl_env",
- HASH_CL_ENV_BITS, HASH_CL_ENV_BITS,
- HASH_CL_ENV_BKT_BITS, 0,
- CFS_HASH_MIN_THETA,
- CFS_HASH_MAX_THETA,
- &cl_env_hops,
- CFS_HASH_RW_BKTLOCK);
- return cl_env_hash ? 0 : -ENOMEM;
-}
-
-static void cl_env_store_fini(void)
-{
- cfs_hash_putref(cl_env_hash);
-}
-
-static inline struct cl_env *cl_env_detach(struct cl_env *cle)
-{
- if (!cle)
- cle = cl_env_fetch();
-
- if (cle && cle->ce_owner)
- cl_env_do_detach(cle);
-
- return cle;
-}
-
static struct lu_env *cl_env_new(__u32 ctx_tags, __u32 ses_tags, void *debug)
{
struct lu_env *env;
@@ -701,16 +637,20 @@ static struct lu_env *cl_env_obtain(void *debug)
{
struct cl_env *cle;
struct lu_env *env;
+ int cpu = get_cpu();
- spin_lock(&cl_envs_guard);
- LASSERT(equi(cl_envs_cached_nr == 0, list_empty(&cl_envs)));
- if (cl_envs_cached_nr > 0) {
+ read_lock(&cl_envs[cpu].cec_guard);
+ LASSERT(equi(cl_envs[cpu].cec_count == 0,
+ list_empty(&cl_envs[cpu].cec_envs)));
+ if (cl_envs[cpu].cec_count > 0) {
int rc;
- cle = container_of(cl_envs.next, struct cl_env, ce_linkage);
+ cle = container_of(cl_envs[cpu].cec_envs.next, struct cl_env,
+ ce_linkage);
list_del_init(&cle->ce_linkage);
- cl_envs_cached_nr--;
- spin_unlock(&cl_envs_guard);
+ cl_envs[cpu].cec_count--;
+ read_unlock(&cl_envs[cpu].cec_guard);
+ put_cpu();
env = &cle->ce_lu;
rc = lu_env_refill(env);
@@ -723,7 +663,8 @@ static struct lu_env *cl_env_obtain(void *debug)
env = ERR_PTR(rc);
}
} else {
- spin_unlock(&cl_envs_guard);
+ read_unlock(&cl_envs[cpu].cec_guard);
+ put_cpu();
env = cl_env_new(lu_context_tags_default,
lu_session_tags_default, debug);
}
@@ -735,27 +676,6 @@ static inline struct cl_env *cl_env_container(struct lu_env *env)
return container_of(env, struct cl_env, ce_lu);
}
-static struct lu_env *cl_env_peek(int *refcheck)
-{
- struct lu_env *env;
- struct cl_env *cle;
-
- CL_ENV_INC(lookup);
-
- /* check that we don't go far from untrusted pointer */
- CLASSERT(offsetof(struct cl_env, ce_magic) == 0);
-
- env = NULL;
- cle = cl_env_fetch();
- if (cle) {
- CL_ENV_INC(hit);
- env = &cle->ce_lu;
- *refcheck = ++cle->ce_ref;
- }
- CDEBUG(D_OTHER, "%d@%p\n", cle ? cle->ce_ref : 0, cle);
- return env;
-}
-
/**
* Returns lu_env: if there already is an environment associated with the
* current thread, it is returned, otherwise, new environment is allocated.
@@ -773,17 +693,13 @@ struct lu_env *cl_env_get(int *refcheck)
{
struct lu_env *env;
- env = cl_env_peek(refcheck);
- if (!env) {
- env = cl_env_obtain(__builtin_return_address(0));
- if (!IS_ERR(env)) {
- struct cl_env *cle;
+ env = cl_env_obtain(__builtin_return_address(0));
+ if (!IS_ERR(env)) {
+ struct cl_env *cle;
- cle = cl_env_container(env);
- cl_env_attach(cle);
- *refcheck = cle->ce_ref;
- CDEBUG(D_OTHER, "%d@%p\n", cle->ce_ref, cle);
- }
+ cle = cl_env_container(env);
+ *refcheck = cle->ce_ref;
+ CDEBUG(D_OTHER, "%d@%p\n", cle->ce_ref, cle);
}
return env;
}
@@ -798,7 +714,6 @@ struct lu_env *cl_env_alloc(int *refcheck, __u32 tags)
{
struct lu_env *env;
- LASSERT(!cl_env_peek(refcheck));
env = cl_env_new(tags, tags, __builtin_return_address(0));
if (!IS_ERR(env)) {
struct cl_env *cle;
@@ -813,7 +728,6 @@ EXPORT_SYMBOL(cl_env_alloc);
static void cl_env_exit(struct cl_env *cle)
{
- LASSERT(!cle->ce_owner);
lu_context_exit(&cle->ce_lu.le_ctx);
lu_context_exit(&cle->ce_ses);
}
@@ -826,20 +740,25 @@ static void cl_env_exit(struct cl_env *cle)
unsigned int cl_env_cache_purge(unsigned int nr)
{
struct cl_env *cle;
+ unsigned int i;
- spin_lock(&cl_envs_guard);
- for (; !list_empty(&cl_envs) && nr > 0; --nr) {
- cle = container_of(cl_envs.next, struct cl_env, ce_linkage);
- list_del_init(&cle->ce_linkage);
- LASSERT(cl_envs_cached_nr > 0);
- cl_envs_cached_nr--;
- spin_unlock(&cl_envs_guard);
+ for_each_possible_cpu(i) {
+ write_lock(&cl_envs[i].cec_guard);
+ for (; !list_empty(&cl_envs[i].cec_envs) && nr > 0; --nr) {
+ cle = container_of(cl_envs[i].cec_envs.next,
+ struct cl_env, ce_linkage);
+ list_del_init(&cle->ce_linkage);
+ LASSERT(cl_envs[i].cec_count > 0);
+ cl_envs[i].cec_count--;
+ write_unlock(&cl_envs[i].cec_guard);
- cl_env_fini(cle);
- spin_lock(&cl_envs_guard);
+ cl_env_fini(cle);
+ write_lock(&cl_envs[i].cec_guard);
+ }
+ LASSERT(equi(cl_envs[i].cec_count == 0,
+ list_empty(&cl_envs[i].cec_envs)));
+ write_unlock(&cl_envs[i].cec_guard);
}
- LASSERT(equi(cl_envs_cached_nr == 0, list_empty(&cl_envs)));
- spin_unlock(&cl_envs_guard);
return nr;
}
EXPORT_SYMBOL(cl_env_cache_purge);
@@ -862,8 +781,9 @@ void cl_env_put(struct lu_env *env, int *refcheck)
CDEBUG(D_OTHER, "%d@%p\n", cle->ce_ref, cle);
if (--cle->ce_ref == 0) {
+ int cpu = get_cpu();
+
CL_ENV_DEC(busy);
- cl_env_detach(cle);
cle->ce_debug = NULL;
cl_env_exit(cle);
/*
@@ -872,107 +792,22 @@ void cl_env_put(struct lu_env *env, int *refcheck)
* Return environment to the cache only when it was allocated
* with the standard tags.
*/
- if (cl_envs_cached_nr < cl_envs_cached_max &&
+ if (cl_envs[cpu].cec_count < cl_envs_cached_max &&
(env->le_ctx.lc_tags & ~LCT_HAS_EXIT) == LCT_CL_THREAD &&
(env->le_ses->lc_tags & ~LCT_HAS_EXIT) == LCT_SESSION) {
- spin_lock(&cl_envs_guard);
- list_add(&cle->ce_linkage, &cl_envs);
- cl_envs_cached_nr++;
- spin_unlock(&cl_envs_guard);
+ read_lock(&cl_envs[cpu].cec_guard);
+ list_add(&cle->ce_linkage, &cl_envs[cpu].cec_envs);
+ cl_envs[cpu].cec_count++;
+ read_unlock(&cl_envs[cpu].cec_guard);
} else {
cl_env_fini(cle);
}
+ put_cpu();
}
}
EXPORT_SYMBOL(cl_env_put);
/**
- * Declares a point of re-entrancy.
- *
- * \see cl_env_reexit()
- */
-void *cl_env_reenter(void)
-{
- return cl_env_detach(NULL);
-}
-EXPORT_SYMBOL(cl_env_reenter);
-
-/**
- * Exits re-entrancy.
- */
-void cl_env_reexit(void *cookie)
-{
- cl_env_detach(NULL);
- cl_env_attach(cookie);
-}
-EXPORT_SYMBOL(cl_env_reexit);
-
-/**
- * Setup user-supplied \a env as a current environment. This is to be used to
- * guaranteed that environment exists even when cl_env_get() fails. It is up
- * to user to ensure proper concurrency control.
- *
- * \see cl_env_unplant()
- */
-void cl_env_implant(struct lu_env *env, int *refcheck)
-{
- struct cl_env *cle = cl_env_container(env);
-
- LASSERT(cle->ce_ref > 0);
-
- cl_env_attach(cle);
- cl_env_get(refcheck);
- CDEBUG(D_OTHER, "%d@%p\n", cle->ce_ref, cle);
-}
-EXPORT_SYMBOL(cl_env_implant);
-
-/**
- * Detach environment installed earlier by cl_env_implant().
- */
-void cl_env_unplant(struct lu_env *env, int *refcheck)
-{
- struct cl_env *cle = cl_env_container(env);
-
- LASSERT(cle->ce_ref > 1);
-
- CDEBUG(D_OTHER, "%d@%p\n", cle->ce_ref, cle);
-
- cl_env_detach(cle);
- cl_env_put(env, refcheck);
-}
-EXPORT_SYMBOL(cl_env_unplant);
-
-struct lu_env *cl_env_nested_get(struct cl_env_nest *nest)
-{
- struct lu_env *env;
-
- nest->cen_cookie = NULL;
- env = cl_env_peek(&nest->cen_refcheck);
- if (env) {
- if (!cl_io_is_going(env))
- return env;
- cl_env_put(env, &nest->cen_refcheck);
- nest->cen_cookie = cl_env_reenter();
- }
- env = cl_env_get(&nest->cen_refcheck);
- if (IS_ERR(env)) {
- cl_env_reexit(nest->cen_cookie);
- return env;
- }
-
- LASSERT(!cl_io_is_going(env));
- return env;
-}
-EXPORT_SYMBOL(cl_env_nested_get);
-
-void cl_env_nested_put(struct cl_env_nest *nest, struct lu_env *env)
-{
- cl_env_put(env, &nest->cen_refcheck);
- cl_env_reexit(nest->cen_cookie);
-}
-EXPORT_SYMBOL(cl_env_nested_put);
-
-/**
* Converts struct ost_lvb to struct cl_attr.
*
* \see cl_attr2lvb
@@ -999,6 +834,10 @@ static int cl_env_percpu_init(void)
for_each_possible_cpu(i) {
struct lu_env *env;
+ rwlock_init(&cl_envs[i].cec_guard);
+ INIT_LIST_HEAD(&cl_envs[i].cec_envs);
+ cl_envs[i].cec_count = 0;
+
cle = &cl_env_percpu[i];
env = &cle->ce_lu;
@@ -1066,7 +905,6 @@ void cl_env_percpu_put(struct lu_env *env)
LASSERT(cle->ce_ref == 0);
CL_ENV_DEC(busy);
- cl_env_detach(cle);
cle->ce_debug = NULL;
put_cpu();
@@ -1080,7 +918,6 @@ struct lu_env *cl_env_percpu_get(void)
cle = &cl_env_percpu[get_cpu()];
cl_env_init0(cle, __builtin_return_address(0));
- cl_env_attach(cle);
return &cle->ce_lu;
}
EXPORT_SYMBOL(cl_env_percpu_get);
@@ -1144,51 +981,19 @@ LU_KEY_INIT_FINI(cl0, struct cl_thread_info);
static void *cl_key_init(const struct lu_context *ctx,
struct lu_context_key *key)
{
- struct cl_thread_info *info;
-
- info = cl0_key_init(ctx, key);
- if (!IS_ERR(info)) {
- size_t i;
-
- for (i = 0; i < ARRAY_SIZE(info->clt_counters); ++i)
- lu_ref_init(&info->clt_counters[i].ctc_locks_locked);
- }
- return info;
+ return cl0_key_init(ctx, key);
}
static void cl_key_fini(const struct lu_context *ctx,
struct lu_context_key *key, void *data)
{
- struct cl_thread_info *info;
- size_t i;
-
- info = data;
- for (i = 0; i < ARRAY_SIZE(info->clt_counters); ++i)
- lu_ref_fini(&info->clt_counters[i].ctc_locks_locked);
cl0_key_fini(ctx, key, data);
}
-static void cl_key_exit(const struct lu_context *ctx,
- struct lu_context_key *key, void *data)
-{
- struct cl_thread_info *info = data;
- size_t i;
-
- for (i = 0; i < ARRAY_SIZE(info->clt_counters); ++i) {
- LASSERT(info->clt_counters[i].ctc_nr_held == 0);
- LASSERT(info->clt_counters[i].ctc_nr_used == 0);
- LASSERT(info->clt_counters[i].ctc_nr_locks_acquired == 0);
- LASSERT(info->clt_counters[i].ctc_nr_locks_locked == 0);
- lu_ref_fini(&info->clt_counters[i].ctc_locks_locked);
- lu_ref_init(&info->clt_counters[i].ctc_locks_locked);
- }
-}
-
static struct lu_context_key cl_key = {
.lct_tags = LCT_CL_THREAD,
.lct_init = cl_key_init,
.lct_fini = cl_key_fini,
- .lct_exit = cl_key_exit
};
static struct lu_kmem_descr cl_object_caches[] = {
@@ -1212,13 +1017,15 @@ int cl_global_init(void)
{
int result;
- result = cl_env_store_init();
- if (result)
- return result;
+ cl_envs = kzalloc(sizeof(*cl_envs) * num_possible_cpus(), GFP_KERNEL);
+ if (!cl_envs) {
+ result = -ENOMEM;
+ goto out;
+ }
result = lu_kmem_init(cl_object_caches);
if (result)
- goto out_store;
+ goto out_envs;
LU_CONTEXT_KEY_INIT(&cl_key);
result = lu_context_key_register(&cl_key);
@@ -1228,16 +1035,17 @@ int cl_global_init(void)
result = cl_env_percpu_init();
if (result)
/* no cl_env_percpu_fini on error */
- goto out_context;
+ goto out_keys;
return 0;
-out_context:
+out_keys:
lu_context_key_degister(&cl_key);
out_kmem:
lu_kmem_fini(cl_object_caches);
-out_store:
- cl_env_store_fini();
+out_envs:
+ kfree(cl_envs);
+out:
return result;
}
@@ -1249,5 +1057,5 @@ void cl_global_fini(void)
cl_env_percpu_fini();
lu_context_key_degister(&cl_key);
lu_kmem_fini(cl_object_caches);
- cl_env_store_fini();
+ kfree(cl_envs);
}
diff --git a/drivers/staging/lustre/lustre/obdclass/cl_page.c b/drivers/staging/lustre/lustre/obdclass/cl_page.c
index 63973ba096da..cd9a40ca4448 100644
--- a/drivers/staging/lustre/lustre/obdclass/cl_page.c
+++ b/drivers/staging/lustre/lustre/obdclass/cl_page.c
@@ -99,7 +99,6 @@ static void cl_page_free(const struct lu_env *env, struct cl_page *page)
PASSERT(env, page, list_empty(&page->cp_batch));
PASSERT(env, page, !page->cp_owner);
- PASSERT(env, page, !page->cp_req);
PASSERT(env, page, page->cp_state == CPS_FREEING);
while (!list_empty(&page->cp_layers)) {
@@ -150,7 +149,6 @@ struct cl_page *cl_page_alloc(const struct lu_env *env,
page->cp_type = type;
INIT_LIST_HEAD(&page->cp_layers);
INIT_LIST_HEAD(&page->cp_batch);
- INIT_LIST_HEAD(&page->cp_flight);
lu_ref_init(&page->cp_reference);
head = o->co_lu.lo_header;
list_for_each_entry(o, &head->loh_layers, co_lu.lo_linkage) {
@@ -390,30 +388,6 @@ EXPORT_SYMBOL(cl_page_at);
__result; \
})
-#define CL_PAGE_INVOKE_REVERSE(_env, _page, _op, _proto, ...) \
-({ \
- const struct lu_env *__env = (_env); \
- struct cl_page *__page = (_page); \
- const struct cl_page_slice *__scan; \
- int __result; \
- ptrdiff_t __op = (_op); \
- int (*__method)_proto; \
- \
- __result = 0; \
- list_for_each_entry_reverse(__scan, &__page->cp_layers, \
- cpl_linkage) { \
- __method = *(void **)((char *)__scan->cpl_ops + __op); \
- if (__method) { \
- __result = (*__method)(__env, __scan, ## __VA_ARGS__); \
- if (__result != 0) \
- break; \
- } \
- } \
- if (__result > 0) \
- __result = 0; \
- __result; \
-})
-
#define CL_PAGE_INVOID(_env, _page, _op, _proto, ...) \
do { \
const struct lu_env *__env = (_env); \
@@ -552,7 +526,6 @@ static int cl_page_own0(const struct lu_env *env, struct cl_io *io,
io, nonblock);
if (result == 0) {
PASSERT(env, pg, !pg->cp_owner);
- PASSERT(env, pg, !pg->cp_req);
pg->cp_owner = cl_io_top(io);
cl_page_owner_set(pg);
if (pg->cp_state != CPS_FREEING) {
@@ -694,7 +667,7 @@ static void cl_page_delete0(const struct lu_env *env, struct cl_page *pg)
PASSERT(env, pg, pg->cp_state != CPS_FREEING);
/*
- * Severe all ways to obtain new pointers to @pg.
+ * Sever all ways to obtain new pointers to @pg.
*/
cl_page_owner_clear(pg);
@@ -845,8 +818,6 @@ void cl_page_completion(const struct lu_env *env,
struct cl_sync_io *anchor = pg->cp_sync_io;
PASSERT(env, pg, crt < CRT_NR);
- /* cl_page::cp_req already cleared by the caller (osc_completion()) */
- PASSERT(env, pg, !pg->cp_req);
PASSERT(env, pg, pg->cp_state == cl_req_type_state(crt));
CL_PAGE_HEADER(D_TRACE, env, pg, "%d %d\n", crt, ioret);
@@ -860,16 +831,8 @@ void cl_page_completion(const struct lu_env *env,
if (anchor) {
LASSERT(pg->cp_sync_io == anchor);
pg->cp_sync_io = NULL;
- }
- /*
- * As page->cp_obj is pinned by a reference from page->cp_req, it is
- * safe to call cl_page_put() without risking object destruction in a
- * non-blocking context.
- */
- cl_page_put(env, pg);
-
- if (anchor)
cl_sync_io_note(env, anchor, ioret);
+ }
}
EXPORT_SYMBOL(cl_page_completion);
@@ -927,29 +890,6 @@ int cl_page_flush(const struct lu_env *env, struct cl_io *io,
EXPORT_SYMBOL(cl_page_flush);
/**
- * Checks whether page is protected by any extent lock is at least required
- * mode.
- *
- * \return the same as in cl_page_operations::cpo_is_under_lock() method.
- * \see cl_page_operations::cpo_is_under_lock()
- */
-int cl_page_is_under_lock(const struct lu_env *env, struct cl_io *io,
- struct cl_page *page, pgoff_t *max_index)
-{
- int rc;
-
- PINVRNT(env, page, cl_page_invariant(page));
-
- rc = CL_PAGE_INVOKE_REVERSE(env, page, CL_PAGE_OP(cpo_is_under_lock),
- (const struct lu_env *,
- const struct cl_page_slice *,
- struct cl_io *, pgoff_t *),
- io, max_index);
- return rc;
-}
-EXPORT_SYMBOL(cl_page_is_under_lock);
-
-/**
* Tells transfer engine that only part of a page is to be transmitted.
*
* \see cl_page_operations::cpo_clip()
@@ -974,10 +914,10 @@ void cl_page_header_print(const struct lu_env *env, void *cookie,
lu_printer_t printer, const struct cl_page *pg)
{
(*printer)(env, cookie,
- "page@%p[%d %p %d %d %p %p]\n",
+ "page@%p[%d %p %d %d %p]\n",
pg, atomic_read(&pg->cp_ref), pg->cp_obj,
pg->cp_state, pg->cp_type,
- pg->cp_owner, pg->cp_req);
+ pg->cp_owner);
}
EXPORT_SYMBOL(cl_page_header_print);
diff --git a/drivers/staging/lustre/lustre/obdclass/genops.c b/drivers/staging/lustre/lustre/obdclass/genops.c
index cf8bb2a2f40b..fa0d38ddccb2 100644
--- a/drivers/staging/lustre/lustre/obdclass/genops.c
+++ b/drivers/staging/lustre/lustre/obdclass/genops.c
@@ -907,6 +907,8 @@ struct obd_import *class_new_import(struct obd_device *obd)
INIT_LIST_HEAD(&imp->imp_sending_list);
INIT_LIST_HEAD(&imp->imp_delayed_list);
INIT_LIST_HEAD(&imp->imp_committed_list);
+ INIT_LIST_HEAD(&imp->imp_unreplied_list);
+ imp->imp_known_replied_xid = 0;
imp->imp_replay_cursor = &imp->imp_committed_list;
spin_lock_init(&imp->imp_lock);
imp->imp_last_success_conn = 0;
@@ -1408,13 +1410,33 @@ EXPORT_SYMBOL(obd_get_max_rpcs_in_flight);
int obd_set_max_rpcs_in_flight(struct client_obd *cli, __u32 max)
{
struct obd_request_slot_waiter *orsw;
+ const char *typ_name;
__u32 old;
int diff;
+ int rc;
int i;
if (max > OBD_MAX_RIF_MAX || max < 1)
return -ERANGE;
+ typ_name = cli->cl_import->imp_obd->obd_type->typ_name;
+ if (!strcmp(typ_name, LUSTRE_MDC_NAME)) {
+ /*
+ * adjust max_mod_rpcs_in_flight to ensure it is always
+ * strictly lower that max_rpcs_in_flight
+ */
+ if (max < 2) {
+ CERROR("%s: cannot set max_rpcs_in_flight to 1 because it must be higher than max_mod_rpcs_in_flight value\n",
+ cli->cl_import->imp_obd->obd_name);
+ return -ERANGE;
+ }
+ if (max <= cli->cl_max_mod_rpcs_in_flight) {
+ rc = obd_set_max_mod_rpcs_in_flight(cli, max - 1);
+ if (rc)
+ return rc;
+ }
+ }
+
spin_lock(&cli->cl_loi_list_lock);
old = cli->cl_max_rpcs_in_flight;
cli->cl_max_rpcs_in_flight = max;
@@ -1436,3 +1458,209 @@ int obd_set_max_rpcs_in_flight(struct client_obd *cli, __u32 max)
return 0;
}
EXPORT_SYMBOL(obd_set_max_rpcs_in_flight);
+
+int obd_set_max_mod_rpcs_in_flight(struct client_obd *cli, __u16 max)
+{
+ struct obd_connect_data *ocd;
+ u16 maxmodrpcs;
+ u16 prev;
+
+ if (max > OBD_MAX_RIF_MAX || max < 1)
+ return -ERANGE;
+
+ /* cannot exceed or equal max_rpcs_in_flight */
+ if (max >= cli->cl_max_rpcs_in_flight) {
+ CERROR("%s: can't set max_mod_rpcs_in_flight to a value (%hu) higher or equal to max_rpcs_in_flight value (%u)\n",
+ cli->cl_import->imp_obd->obd_name,
+ max, cli->cl_max_rpcs_in_flight);
+ return -ERANGE;
+ }
+
+ /* cannot exceed max modify RPCs in flight supported by the server */
+ ocd = &cli->cl_import->imp_connect_data;
+ if (ocd->ocd_connect_flags & OBD_CONNECT_MULTIMODRPCS)
+ maxmodrpcs = ocd->ocd_maxmodrpcs;
+ else
+ maxmodrpcs = 1;
+ if (max > maxmodrpcs) {
+ CERROR("%s: can't set max_mod_rpcs_in_flight to a value (%hu) higher than max_mod_rpcs_per_client value (%hu) returned by the server at connection\n",
+ cli->cl_import->imp_obd->obd_name,
+ max, maxmodrpcs);
+ return -ERANGE;
+ }
+
+ spin_lock(&cli->cl_mod_rpcs_lock);
+
+ prev = cli->cl_max_mod_rpcs_in_flight;
+ cli->cl_max_mod_rpcs_in_flight = max;
+
+ /* wakeup waiters if limit has been increased */
+ if (cli->cl_max_mod_rpcs_in_flight > prev)
+ wake_up(&cli->cl_mod_rpcs_waitq);
+
+ spin_unlock(&cli->cl_mod_rpcs_lock);
+
+ return 0;
+}
+EXPORT_SYMBOL(obd_set_max_mod_rpcs_in_flight);
+
+#define pct(a, b) (b ? (a * 100) / b : 0)
+
+int obd_mod_rpc_stats_seq_show(struct client_obd *cli, struct seq_file *seq)
+{
+ unsigned long mod_tot = 0, mod_cum;
+ struct timespec64 now;
+ int i;
+
+ ktime_get_real_ts64(&now);
+
+ spin_lock(&cli->cl_mod_rpcs_lock);
+
+ seq_printf(seq, "snapshot_time: %llu.%9lu (secs.nsecs)\n",
+ (s64)now.tv_sec, (unsigned long)now.tv_nsec);
+ seq_printf(seq, "modify_RPCs_in_flight: %hu\n",
+ cli->cl_mod_rpcs_in_flight);
+
+ seq_puts(seq, "\n\t\t\tmodify\n");
+ seq_puts(seq, "rpcs in flight rpcs %% cum %%\n");
+
+ mod_tot = lprocfs_oh_sum(&cli->cl_mod_rpcs_hist);
+
+ mod_cum = 0;
+ for (i = 0; i < OBD_HIST_MAX; i++) {
+ unsigned long mod = cli->cl_mod_rpcs_hist.oh_buckets[i];
+
+ mod_cum += mod;
+ seq_printf(seq, "%d:\t\t%10lu %3lu %3lu\n",
+ i, mod, pct(mod, mod_tot),
+ pct(mod_cum, mod_tot));
+ if (mod_cum == mod_tot)
+ break;
+ }
+
+ spin_unlock(&cli->cl_mod_rpcs_lock);
+
+ return 0;
+}
+EXPORT_SYMBOL(obd_mod_rpc_stats_seq_show);
+#undef pct
+
+/*
+ * The number of modify RPCs sent in parallel is limited
+ * because the server has a finite number of slots per client to
+ * store request result and ensure reply reconstruction when needed.
+ * On the client, this limit is stored in cl_max_mod_rpcs_in_flight
+ * that takes into account server limit and cl_max_rpcs_in_flight
+ * value.
+ * On the MDC client, to avoid a potential deadlock (see Bugzilla 3462),
+ * one close request is allowed above the maximum.
+ */
+static inline bool obd_mod_rpc_slot_avail_locked(struct client_obd *cli,
+ bool close_req)
+{
+ bool avail;
+
+ /* A slot is available if
+ * - number of modify RPCs in flight is less than the max
+ * - it's a close RPC and no other close request is in flight
+ */
+ avail = cli->cl_mod_rpcs_in_flight < cli->cl_max_mod_rpcs_in_flight ||
+ (close_req && !cli->cl_close_rpcs_in_flight);
+
+ return avail;
+}
+
+static inline bool obd_mod_rpc_slot_avail(struct client_obd *cli,
+ bool close_req)
+{
+ bool avail;
+
+ spin_lock(&cli->cl_mod_rpcs_lock);
+ avail = obd_mod_rpc_slot_avail_locked(cli, close_req);
+ spin_unlock(&cli->cl_mod_rpcs_lock);
+ return avail;
+}
+
+/* Get a modify RPC slot from the obd client @cli according
+ * to the kind of operation @opc that is going to be sent
+ * and the intent @it of the operation if it applies.
+ * If the maximum number of modify RPCs in flight is reached
+ * the thread is put to sleep.
+ * Returns the tag to be set in the request message. Tag 0
+ * is reserved for non-modifying requests.
+ */
+u16 obd_get_mod_rpc_slot(struct client_obd *cli, __u32 opc,
+ struct lookup_intent *it)
+{
+ struct l_wait_info lwi = LWI_INTR(NULL, NULL);
+ bool close_req = false;
+ u16 i, max;
+
+ /* read-only metadata RPCs don't consume a slot on MDT
+ * for reply reconstruction
+ */
+ if (it && (it->it_op == IT_GETATTR || it->it_op == IT_LOOKUP ||
+ it->it_op == IT_LAYOUT || it->it_op == IT_READDIR))
+ return 0;
+
+ if (opc == MDS_CLOSE)
+ close_req = true;
+
+ do {
+ spin_lock(&cli->cl_mod_rpcs_lock);
+ max = cli->cl_max_mod_rpcs_in_flight;
+ if (obd_mod_rpc_slot_avail_locked(cli, close_req)) {
+ /* there is a slot available */
+ cli->cl_mod_rpcs_in_flight++;
+ if (close_req)
+ cli->cl_close_rpcs_in_flight++;
+ lprocfs_oh_tally(&cli->cl_mod_rpcs_hist,
+ cli->cl_mod_rpcs_in_flight);
+ /* find a free tag */
+ i = find_first_zero_bit(cli->cl_mod_tag_bitmap,
+ max + 1);
+ LASSERT(i < OBD_MAX_RIF_MAX);
+ LASSERT(!test_and_set_bit(i, cli->cl_mod_tag_bitmap));
+ spin_unlock(&cli->cl_mod_rpcs_lock);
+ /* tag 0 is reserved for non-modify RPCs */
+ return i + 1;
+ }
+ spin_unlock(&cli->cl_mod_rpcs_lock);
+
+ CDEBUG(D_RPCTRACE, "%s: sleeping for a modify RPC slot opc %u, max %hu\n",
+ cli->cl_import->imp_obd->obd_name, opc, max);
+
+ l_wait_event(cli->cl_mod_rpcs_waitq,
+ obd_mod_rpc_slot_avail(cli, close_req), &lwi);
+ } while (true);
+}
+EXPORT_SYMBOL(obd_get_mod_rpc_slot);
+
+/*
+ * Put a modify RPC slot from the obd client @cli according
+ * to the kind of operation @opc that has been sent and the
+ * intent @it of the operation if it applies.
+ */
+void obd_put_mod_rpc_slot(struct client_obd *cli, u32 opc,
+ struct lookup_intent *it, u16 tag)
+{
+ bool close_req = false;
+
+ if (it && (it->it_op == IT_GETATTR || it->it_op == IT_LOOKUP ||
+ it->it_op == IT_LAYOUT || it->it_op == IT_READDIR))
+ return;
+
+ if (opc == MDS_CLOSE)
+ close_req = true;
+
+ spin_lock(&cli->cl_mod_rpcs_lock);
+ cli->cl_mod_rpcs_in_flight--;
+ if (close_req)
+ cli->cl_close_rpcs_in_flight--;
+ /* release the tag in the bitmap */
+ LASSERT(tag - 1 < OBD_MAX_RIF_MAX);
+ LASSERT(test_and_clear_bit(tag - 1, cli->cl_mod_tag_bitmap) != 0);
+ spin_unlock(&cli->cl_mod_rpcs_lock);
+ wake_up(&cli->cl_mod_rpcs_waitq);
+}
+EXPORT_SYMBOL(obd_put_mod_rpc_slot);
diff --git a/drivers/staging/lustre/lustre/obdclass/linux/linux-module.c b/drivers/staging/lustre/lustre/obdclass/linux/linux-module.c
index be09e04b042f..9f5e8299d7e4 100644
--- a/drivers/staging/lustre/lustre/obdclass/linux/linux-module.c
+++ b/drivers/staging/lustre/lustre/obdclass/linux/linux-module.c
@@ -217,8 +217,8 @@ static ssize_t pinger_show(struct kobject *kobj, struct attribute *attr,
return sprintf(buf, "%s\n", "on");
}
-static ssize_t health_show(struct kobject *kobj, struct attribute *attr,
- char *buf)
+static ssize_t
+health_check_show(struct kobject *kobj, struct attribute *attr, char *buf)
{
bool healthy = true;
int i;
@@ -311,14 +311,14 @@ EXPORT_SYMBOL_GPL(debugfs_lustre_root);
LUSTRE_RO_ATTR(version);
LUSTRE_RO_ATTR(pinger);
-LUSTRE_RO_ATTR(health);
+LUSTRE_RO_ATTR(health_check);
LUSTRE_RW_ATTR(jobid_var);
LUSTRE_RW_ATTR(jobid_name);
static struct attribute *lustre_attrs[] = {
&lustre_attr_version.attr,
&lustre_attr_pinger.attr,
- &lustre_attr_health.attr,
+ &lustre_attr_health_check.attr,
&lustre_attr_jobid_name.attr,
&lustre_attr_jobid_var.attr,
NULL,
diff --git a/drivers/staging/lustre/lustre/obdclass/linux/linux-obdo.c b/drivers/staging/lustre/lustre/obdclass/linux/linux-obdo.c
deleted file mode 100644
index 41b77a30feb3..000000000000
--- a/drivers/staging/lustre/lustre/obdclass/linux/linux-obdo.c
+++ /dev/null
@@ -1,80 +0,0 @@
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2012, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * lustre/obdclass/linux/linux-obdo.c
- *
- * Object Devices Class Driver
- * These are the only exported functions, they provide some generic
- * infrastructure for managing object devices
- */
-
-#define DEBUG_SUBSYSTEM S_CLASS
-
-#include <linux/module.h>
-#include "../../include/obd_class.h"
-#include "../../include/lustre/lustre_idl.h"
-
-#include <linux/fs.h>
-
-void obdo_refresh_inode(struct inode *dst, const struct obdo *src, u32 valid)
-{
- valid &= src->o_valid;
-
- if (valid & (OBD_MD_FLCTIME | OBD_MD_FLMTIME))
- CDEBUG(D_INODE,
- "valid %#llx, cur time %lu/%lu, new %llu/%llu\n",
- src->o_valid, LTIME_S(dst->i_mtime),
- LTIME_S(dst->i_ctime), src->o_mtime, src->o_ctime);
-
- if (valid & OBD_MD_FLATIME && src->o_atime > LTIME_S(dst->i_atime))
- LTIME_S(dst->i_atime) = src->o_atime;
- if (valid & OBD_MD_FLMTIME && src->o_mtime > LTIME_S(dst->i_mtime))
- LTIME_S(dst->i_mtime) = src->o_mtime;
- if (valid & OBD_MD_FLCTIME && src->o_ctime > LTIME_S(dst->i_ctime))
- LTIME_S(dst->i_ctime) = src->o_ctime;
- if (valid & OBD_MD_FLSIZE)
- i_size_write(dst, src->o_size);
- /* optimum IO size */
- if (valid & OBD_MD_FLBLKSZ && src->o_blksize > (1 << dst->i_blkbits))
- dst->i_blkbits = ffs(src->o_blksize) - 1;
-
- if (dst->i_blkbits < PAGE_SHIFT)
- dst->i_blkbits = PAGE_SHIFT;
-
- /* allocation of space */
- if (valid & OBD_MD_FLBLOCKS && src->o_blocks > dst->i_blocks)
- /*
- * XXX shouldn't overflow be checked here like in
- * obdo_to_inode().
- */
- dst->i_blocks = src->o_blocks;
-}
-EXPORT_SYMBOL(obdo_refresh_inode);
diff --git a/drivers/staging/lustre/lustre/obdclass/llog.c b/drivers/staging/lustre/lustre/obdclass/llog.c
index 43797f106745..736ea1067c93 100644
--- a/drivers/staging/lustre/lustre/obdclass/llog.c
+++ b/drivers/staging/lustre/lustre/obdclass/llog.c
@@ -43,8 +43,9 @@
#define DEBUG_SUBSYSTEM S_LOG
-#include "../include/obd_class.h"
+#include "../include/llog_swab.h"
#include "../include/lustre_log.h"
+#include "../include/obd_class.h"
#include "llog_internal.h"
/*
@@ -80,8 +81,7 @@ static void llog_free_handle(struct llog_handle *loghandle)
LASSERT(list_empty(&loghandle->u.phd.phd_entry));
else if (loghandle->lgh_hdr->llh_flags & LLOG_F_IS_CAT)
LASSERT(list_empty(&loghandle->u.chd.chd_head));
- LASSERT(sizeof(*loghandle->lgh_hdr) == LLOG_CHUNK_SIZE);
- kfree(loghandle->lgh_hdr);
+ kvfree(loghandle->lgh_hdr);
out:
kfree(loghandle);
}
@@ -115,20 +115,29 @@ static int llog_read_header(const struct lu_env *env,
rc = lop->lop_read_header(env, handle);
if (rc == LLOG_EEMPTY) {
struct llog_log_hdr *llh = handle->lgh_hdr;
+ size_t len;
+ /* lrh_len should be initialized in llog_init_handle */
handle->lgh_last_idx = 0; /* header is record with index 0 */
llh->llh_count = 1; /* for the header record */
llh->llh_hdr.lrh_type = LLOG_HDR_MAGIC;
- llh->llh_hdr.lrh_len = LLOG_CHUNK_SIZE;
- llh->llh_tail.lrt_len = LLOG_CHUNK_SIZE;
+ LASSERT(handle->lgh_ctxt->loc_chunk_size >= LLOG_MIN_CHUNK_SIZE);
+ llh->llh_hdr.lrh_len = handle->lgh_ctxt->loc_chunk_size;
llh->llh_hdr.lrh_index = 0;
- llh->llh_tail.lrt_index = 0;
llh->llh_timestamp = ktime_get_real_seconds();
if (uuid)
memcpy(&llh->llh_tgtuuid, uuid,
sizeof(llh->llh_tgtuuid));
llh->llh_bitmap_offset = offsetof(typeof(*llh), llh_bitmap);
- ext2_set_bit(0, llh->llh_bitmap);
+ /*
+ * Since update llog header might also call this function,
+ * let's reset the bitmap to 0 here
+ */
+ len = llh->llh_hdr.lrh_len - llh->llh_bitmap_offset;
+ memset(LLOG_HDR_BITMAP(llh), 0, len - sizeof(llh->llh_tail));
+ ext2_set_bit(0, LLOG_HDR_BITMAP(llh));
+ LLOG_HDR_TAIL(llh)->lrt_len = llh->llh_hdr.lrh_len;
+ LLOG_HDR_TAIL(llh)->lrt_index = llh->llh_hdr.lrh_index;
rc = 0;
}
return rc;
@@ -137,16 +146,19 @@ static int llog_read_header(const struct lu_env *env,
int llog_init_handle(const struct lu_env *env, struct llog_handle *handle,
int flags, struct obd_uuid *uuid)
{
+ int chunk_size = handle->lgh_ctxt->loc_chunk_size;
enum llog_flag fmt = flags & LLOG_F_EXT_MASK;
struct llog_log_hdr *llh;
int rc;
LASSERT(!handle->lgh_hdr);
- llh = kzalloc(sizeof(*llh), GFP_NOFS);
+ LASSERT(chunk_size >= LLOG_MIN_CHUNK_SIZE);
+ llh = libcfs_kvzalloc(sizeof(*llh), GFP_NOFS);
if (!llh)
return -ENOMEM;
handle->lgh_hdr = llh;
+ handle->lgh_hdr_size = chunk_size;
/* first assign flags to use llog_client_ops */
llh->llh_flags = flags;
rc = llog_read_header(env, handle, uuid);
@@ -189,6 +201,7 @@ int llog_init_handle(const struct lu_env *env, struct llog_handle *handle,
LASSERT(list_empty(&handle->u.chd.chd_head));
INIT_LIST_HEAD(&handle->u.chd.chd_head);
llh->llh_size = sizeof(struct llog_logid_rec);
+ llh->llh_flags |= LLOG_F_IS_FIXSIZE;
} else if (!(flags & LLOG_F_IS_PLAIN)) {
CERROR("%s: unknown flags: %#x (expected %#x or %#x)\n",
handle->lgh_ctxt->loc_obd->obd_name,
@@ -198,7 +211,7 @@ int llog_init_handle(const struct lu_env *env, struct llog_handle *handle,
llh->llh_flags |= fmt;
out:
if (rc) {
- kfree(llh);
+ kvfree(llh);
handle->lgh_hdr = NULL;
}
return rc;
@@ -212,15 +225,21 @@ static int llog_process_thread(void *arg)
struct llog_log_hdr *llh = loghandle->lgh_hdr;
struct llog_process_cat_data *cd = lpi->lpi_catdata;
char *buf;
- __u64 cur_offset = LLOG_CHUNK_SIZE;
- __u64 last_offset;
+ u64 cur_offset, tmp_offset;
+ int chunk_size;
int rc = 0, index = 1, last_index;
int saved_index = 0;
int last_called_index = 0;
- LASSERT(llh);
+ if (!llh)
+ return -EINVAL;
+
+ cur_offset = llh->llh_hdr.lrh_len;
+ chunk_size = llh->llh_hdr.lrh_len;
+ /* expect chunk_size to be power of two */
+ LASSERT(is_power_of_2(chunk_size));
- buf = kzalloc(LLOG_CHUNK_SIZE, GFP_NOFS);
+ buf = libcfs_kvzalloc(chunk_size, GFP_NOFS);
if (!buf) {
lpi->lpi_rc = -ENOMEM;
return 0;
@@ -233,41 +252,53 @@ static int llog_process_thread(void *arg)
if (cd && cd->lpcd_last_idx)
last_index = cd->lpcd_last_idx;
else
- last_index = LLOG_BITMAP_BYTES * 8 - 1;
-
- /* Record is not in this buffer. */
- if (index > last_index)
- goto out;
+ last_index = LLOG_HDR_BITMAP_SIZE(llh) - 1;
while (rc == 0) {
+ unsigned int buf_offset = 0;
struct llog_rec_hdr *rec;
+ bool partial_chunk;
+ off_t chunk_offset;
/* skip records not set in bitmap */
while (index <= last_index &&
- !ext2_test_bit(index, llh->llh_bitmap))
+ !ext2_test_bit(index, LLOG_HDR_BITMAP(llh)))
++index;
- LASSERT(index <= last_index + 1);
- if (index == last_index + 1)
+ if (index > last_index)
break;
-repeat:
+
CDEBUG(D_OTHER, "index: %d last_index %d\n",
index, last_index);
-
+repeat:
/* get the buf with our target record; avoid old garbage */
- memset(buf, 0, LLOG_CHUNK_SIZE);
- last_offset = cur_offset;
+ memset(buf, 0, chunk_size);
rc = llog_next_block(lpi->lpi_env, loghandle, &saved_index,
- index, &cur_offset, buf, LLOG_CHUNK_SIZE);
+ index, &cur_offset, buf, chunk_size);
if (rc)
goto out;
+ /*
+ * NB: after llog_next_block() call the cur_offset is the
+ * offset of the next block after read one.
+ * The absolute offset of the current chunk is calculated
+ * from cur_offset value and stored in chunk_offset variable.
+ */
+ tmp_offset = cur_offset;
+ if (do_div(tmp_offset, chunk_size)) {
+ partial_chunk = true;
+ chunk_offset = cur_offset & ~(chunk_size - 1);
+ } else {
+ partial_chunk = false;
+ chunk_offset = cur_offset - chunk_size;
+ }
+
/* NB: when rec->lrh_len is accessed it is already swabbed
* since it is used at the "end" of the loop and the rec
* swabbing is done at the beginning of the loop.
*/
- for (rec = (struct llog_rec_hdr *)buf;
- (char *)rec < buf + LLOG_CHUNK_SIZE;
+ for (rec = (struct llog_rec_hdr *)(buf + buf_offset);
+ (char *)rec < buf + chunk_size;
rec = llog_rec_hdr_next(rec)) {
CDEBUG(D_OTHER, "processing rec 0x%p type %#x\n",
rec, rec->lrh_type);
@@ -278,15 +309,29 @@ repeat:
CDEBUG(D_OTHER, "after swabbing, type=%#x idx=%d\n",
rec->lrh_type, rec->lrh_index);
- if (rec->lrh_index == 0) {
- /* probably another rec just got added? */
- rc = 0;
- if (index <= loghandle->lgh_last_idx)
- goto repeat;
- goto out; /* no more records */
+ /*
+ * for partial chunk the end of it is zeroed, check
+ * for index 0 to distinguish it.
+ */
+ if (partial_chunk && !rec->lrh_index) {
+ /* concurrent llog_add() might add new records
+ * while llog_processing, check this is not
+ * the case and re-read the current chunk
+ * otherwise.
+ */
+ if (index > loghandle->lgh_last_idx) {
+ rc = 0;
+ goto out;
+ }
+ CDEBUG(D_OTHER, "Re-read last llog buffer for new records, index %u, last %u\n",
+ index, loghandle->lgh_last_idx);
+ /* save offset inside buffer for the re-read */
+ buf_offset = (char *)rec - (char *)buf;
+ cur_offset = chunk_offset;
+ goto repeat;
}
- if (rec->lrh_len == 0 ||
- rec->lrh_len > LLOG_CHUNK_SIZE) {
+
+ if (!rec->lrh_len || rec->lrh_len > chunk_size) {
CWARN("invalid length %d in llog record for index %d/%d\n",
rec->lrh_len,
rec->lrh_index, index);
@@ -300,32 +345,38 @@ repeat:
continue;
}
+ if (rec->lrh_index != index) {
+ CERROR("%s: Invalid record: index %u but expected %u\n",
+ loghandle->lgh_ctxt->loc_obd->obd_name,
+ rec->lrh_index, index);
+ rc = -ERANGE;
+ goto out;
+ }
+
CDEBUG(D_OTHER,
"lrh_index: %d lrh_len: %d (%d remains)\n",
rec->lrh_index, rec->lrh_len,
- (int)(buf + LLOG_CHUNK_SIZE - (char *)rec));
+ (int)(buf + chunk_size - (char *)rec));
loghandle->lgh_cur_idx = rec->lrh_index;
loghandle->lgh_cur_offset = (char *)rec - (char *)buf +
- last_offset;
+ chunk_offset;
/* if set, process the callback on this record */
- if (ext2_test_bit(index, llh->llh_bitmap)) {
+ if (ext2_test_bit(index, LLOG_HDR_BITMAP(llh))) {
rc = lpi->lpi_cb(lpi->lpi_env, loghandle, rec,
lpi->lpi_cbdata);
last_called_index = index;
if (rc)
goto out;
- } else {
- CDEBUG(D_OTHER, "Skipped index %d\n", index);
}
- /* next record, still in buffer? */
- ++index;
- if (index > last_index) {
+ /* exit if the last index is reached */
+ if (index >= last_index) {
rc = 0;
goto out;
}
+ index++;
}
}
diff --git a/drivers/staging/lustre/lustre/obdclass/llog_obd.c b/drivers/staging/lustre/lustre/obdclass/llog_obd.c
index a4277d684614..8574ad401f66 100644
--- a/drivers/staging/lustre/lustre/obdclass/llog_obd.c
+++ b/drivers/staging/lustre/lustre/obdclass/llog_obd.c
@@ -158,6 +158,7 @@ int llog_setup(const struct lu_env *env, struct obd_device *obd,
mutex_init(&ctxt->loc_mutex);
ctxt->loc_exp = class_export_get(disk_obd->obd_self_export);
ctxt->loc_flags = LLOG_CTXT_FLAG_UNINITIALIZED;
+ ctxt->loc_chunk_size = LLOG_MIN_CHUNK_SIZE;
rc = llog_group_set_ctxt(olg, ctxt, index);
if (rc) {
diff --git a/drivers/staging/lustre/lustre/obdclass/llog_swab.c b/drivers/staging/lustre/lustre/obdclass/llog_swab.c
index 8c4c1b3f1b45..723c212c6747 100644
--- a/drivers/staging/lustre/lustre/obdclass/llog_swab.c
+++ b/drivers/staging/lustre/lustre/obdclass/llog_swab.c
@@ -38,6 +38,7 @@
#define DEBUG_SUBSYSTEM S_LOG
+#include "../include/llog_swab.h"
#include "../include/lustre_log.h"
static void print_llogd_body(struct llogd_body *d)
@@ -244,7 +245,7 @@ void lustre_swab_llog_rec(struct llog_rec_hdr *rec)
__swab32s(&llh->llh_flags);
__swab32s(&llh->llh_size);
__swab32s(&llh->llh_cat_idx);
- tail = &llh->llh_tail;
+ tail = LLOG_HDR_TAIL(llh);
break;
}
case LLOG_LOGID_MAGIC:
@@ -290,8 +291,10 @@ static void print_llog_hdr(struct llog_log_hdr *h)
CDEBUG(D_OTHER, "\tllh_flags: %#x\n", h->llh_flags);
CDEBUG(D_OTHER, "\tllh_size: %#x\n", h->llh_size);
CDEBUG(D_OTHER, "\tllh_cat_idx: %#x\n", h->llh_cat_idx);
- CDEBUG(D_OTHER, "\tllh_tail.lrt_index: %#x\n", h->llh_tail.lrt_index);
- CDEBUG(D_OTHER, "\tllh_tail.lrt_len: %#x\n", h->llh_tail.lrt_len);
+ CDEBUG(D_OTHER, "\tllh_tail.lrt_index: %#x\n",
+ LLOG_HDR_TAIL(h)->lrt_index);
+ CDEBUG(D_OTHER, "\tllh_tail.lrt_len: %#x\n",
+ LLOG_HDR_TAIL(h)->lrt_len);
}
void lustre_swab_llog_hdr(struct llog_log_hdr *h)
diff --git a/drivers/staging/lustre/lustre/obdclass/lprocfs_status.c b/drivers/staging/lustre/lustre/obdclass/lprocfs_status.c
index 852a5acfefab..2c99717b0aba 100644
--- a/drivers/staging/lustre/lustre/obdclass/lprocfs_status.c
+++ b/drivers/staging/lustre/lustre/obdclass/lprocfs_status.c
@@ -100,9 +100,13 @@ static const char * const obd_connect_names[] = {
"lfsck",
"unknown",
"unlink_close",
- "unknown",
+ "multi_mod_rpcs",
"dir_stripe",
- "unknown",
+ "subtree",
+ "lock_ahead",
+ "bulk_mbits",
+ "compact_obdo",
+ "second_flags",
NULL
};
@@ -127,7 +131,7 @@ EXPORT_SYMBOL(obd_connect_flags2str);
static void obd_connect_data_seqprint(struct seq_file *m,
struct obd_connect_data *ocd)
{
- int flags;
+ u64 flags;
LASSERT(ocd);
flags = ocd->ocd_connect_flags;
@@ -172,6 +176,9 @@ static void obd_connect_data_seqprint(struct seq_file *m,
if (flags & OBD_CONNECT_MAXBYTES)
seq_printf(m, " max_object_bytes: %llx\n",
ocd->ocd_maxbytes);
+ if (flags & OBD_CONNECT_MULTIMODRPCS)
+ seq_printf(m, " max_mod_rpcs: %hu\n",
+ ocd->ocd_maxmodrpcs);
}
int lprocfs_read_frac_helper(char *buffer, unsigned long count, long val,
@@ -396,10 +403,17 @@ int lprocfs_wr_uint(struct file *file, const char __user *buffer,
char dummy[MAX_STRING_SIZE + 1], *end;
unsigned long tmp;
- dummy[MAX_STRING_SIZE] = '\0';
- if (copy_from_user(dummy, buffer, MAX_STRING_SIZE))
+ if (count >= sizeof(dummy))
+ return -EINVAL;
+
+ if (count == 0)
+ return 0;
+
+ if (copy_from_user(dummy, buffer, count))
return -EFAULT;
+ dummy[count] = '\0';
+
tmp = simple_strtoul(dummy, &end, 0);
if (dummy == end)
return -EINVAL;
@@ -1275,7 +1289,8 @@ int ldebugfs_register_stats(struct dentry *parent, const char *name,
EXPORT_SYMBOL_GPL(ldebugfs_register_stats);
void lprocfs_counter_init(struct lprocfs_stats *stats, int index,
- unsigned conf, const char *name, const char *units)
+ unsigned int conf, const char *name,
+ const char *units)
{
struct lprocfs_counter_header *header;
struct lprocfs_counter *percpu_cntr;
diff --git a/drivers/staging/lustre/lustre/obdclass/lu_object.c b/drivers/staging/lustre/lustre/obdclass/lu_object.c
index 054e567e6c8d..7971562a3efd 100644
--- a/drivers/staging/lustre/lustre/obdclass/lu_object.c
+++ b/drivers/staging/lustre/lustre/obdclass/lu_object.c
@@ -68,6 +68,7 @@ enum {
#define LU_SITE_BITS_MIN 12
#define LU_SITE_BITS_MAX 24
+#define LU_SITE_BITS_MAX_CL 19
/**
* total 256 buckets, we don't want too many buckets because:
* - consume too much memory
@@ -338,7 +339,7 @@ int lu_site_purge(const struct lu_env *env, struct lu_site *s, int nr)
struct cfs_hash_bd bd2;
struct list_head dispose;
int did_sth;
- unsigned int start;
+ unsigned int start = 0;
int count;
int bnr;
unsigned int i;
@@ -351,7 +352,8 @@ int lu_site_purge(const struct lu_env *env, struct lu_site *s, int nr)
* Under LRU list lock, scan LRU list and move unreferenced objects to
* the dispose list, removing them from LRU and hash table.
*/
- start = s->ls_purge_start;
+ if (nr != ~0)
+ start = s->ls_purge_start;
bnr = (nr == ~0) ? -1 : nr / (int)CFS_HASH_NBKT(s->ls_obj_hash) + 1;
again:
/*
@@ -877,6 +879,9 @@ static unsigned long lu_htable_order(struct lu_device *top)
unsigned long cache_size;
unsigned long bits;
+ if (!strcmp(top->ld_type->ldt_name, LUSTRE_VVP_NAME))
+ bits_max = LU_SITE_BITS_MAX_CL;
+
/*
* Calculate hash table size, assuming that we want reasonable
* performance when 20% of total memory is occupied by cache of
@@ -909,8 +914,8 @@ static unsigned long lu_htable_order(struct lu_device *top)
return clamp_t(typeof(bits), bits, LU_SITE_BITS_MIN, bits_max);
}
-static unsigned lu_obj_hop_hash(struct cfs_hash *hs,
- const void *key, unsigned mask)
+static unsigned int lu_obj_hop_hash(struct cfs_hash *hs,
+ const void *key, unsigned int mask)
{
struct lu_fid *fid = (struct lu_fid *)key;
__u32 hash;
@@ -1311,6 +1316,7 @@ enum {
static struct lu_context_key *lu_keys[LU_CONTEXT_KEY_NR] = { NULL, };
static DEFINE_SPINLOCK(lu_keys_guard);
+static atomic_t lu_key_initing_cnt = ATOMIC_INIT(0);
/**
* Global counter incremented whenever key is registered, unregistered,
@@ -1318,7 +1324,7 @@ static DEFINE_SPINLOCK(lu_keys_guard);
* lu_context_refill(). No locking is provided, as initialization and shutdown
* are supposed to be externally serialized.
*/
-static unsigned key_set_version;
+static unsigned int key_set_version;
/**
* Register new key.
@@ -1385,6 +1391,19 @@ void lu_context_key_degister(struct lu_context_key *key)
++key_set_version;
spin_lock(&lu_keys_guard);
key_fini(&lu_shrink_env.le_ctx, key->lct_index);
+
+ /**
+ * Wait until all transient contexts referencing this key have
+ * run lu_context_key::lct_fini() method.
+ */
+ while (atomic_read(&key->lct_used) > 1) {
+ spin_unlock(&lu_keys_guard);
+ CDEBUG(D_INFO, "lu_context_key_degister: \"%s\" %p, %d\n",
+ key->lct_owner ? key->lct_owner->name : "", key,
+ atomic_read(&key->lct_used));
+ schedule();
+ spin_lock(&lu_keys_guard);
+ }
if (lu_keys[key->lct_index]) {
lu_keys[key->lct_index] = NULL;
lu_ref_fini(&key->lct_reference);
@@ -1507,14 +1526,25 @@ void lu_context_key_quiesce(struct lu_context_key *key)
if (!(key->lct_tags & LCT_QUIESCENT)) {
/*
- * XXX layering violation.
- */
- cl_env_cache_purge(~0);
- key->lct_tags |= LCT_QUIESCENT;
- /*
* XXX memory barrier has to go here.
*/
spin_lock(&lu_keys_guard);
+ key->lct_tags |= LCT_QUIESCENT;
+
+ /**
+ * Wait until all lu_context_key::lct_init() methods
+ * have completed.
+ */
+ while (atomic_read(&lu_key_initing_cnt) > 0) {
+ spin_unlock(&lu_keys_guard);
+ CDEBUG(D_INFO, "lu_context_key_quiesce: \"%s\" %p, %d (%d)\n",
+ key->lct_owner ? key->lct_owner->name : "",
+ key, atomic_read(&key->lct_used),
+ atomic_read(&lu_key_initing_cnt));
+ schedule();
+ spin_lock(&lu_keys_guard);
+ }
+
list_for_each_entry(ctx, &lu_context_remembered, lc_remember)
key_fini(ctx, key->lct_index);
spin_unlock(&lu_keys_guard);
@@ -1546,6 +1576,19 @@ static int keys_fill(struct lu_context *ctx)
{
unsigned int i;
+ /*
+ * A serialisation with lu_context_key_quiesce() is needed, but some
+ * "key->lct_init()" are calling kernel memory allocation routine and
+ * can't be called while holding a spin_lock.
+ * "lu_keys_guard" is held while incrementing "lu_key_initing_cnt"
+ * to ensure the start of the serialisation.
+ * An atomic_t variable is still used, in order not to reacquire the
+ * lock when decrementing the counter.
+ */
+ spin_lock(&lu_keys_guard);
+ atomic_inc(&lu_key_initing_cnt);
+ spin_unlock(&lu_keys_guard);
+
LINVRNT(ctx->lc_value);
for (i = 0; i < ARRAY_SIZE(lu_keys); ++i) {
struct lu_context_key *key;
@@ -1563,12 +1606,19 @@ static int keys_fill(struct lu_context *ctx)
LINVRNT(key->lct_init);
LINVRNT(key->lct_index == i);
+ LASSERT(key->lct_owner);
+ if (!(ctx->lc_tags & LCT_NOREF) &&
+ !try_module_get(key->lct_owner)) {
+ /* module is unloading, skip this key */
+ continue;
+ }
+
value = key->lct_init(ctx, key);
- if (IS_ERR(value))
+ if (unlikely(IS_ERR(value))) {
+ atomic_dec(&lu_key_initing_cnt);
return PTR_ERR(value);
+ }
- if (!(ctx->lc_tags & LCT_NOREF))
- try_module_get(key->lct_owner);
lu_ref_add_atomic(&key->lct_reference, "ctx", ctx);
atomic_inc(&key->lct_used);
/*
@@ -1582,6 +1632,7 @@ static int keys_fill(struct lu_context *ctx)
}
ctx->lc_version = key_set_version;
}
+ atomic_dec(&lu_key_initing_cnt);
return 0;
}
@@ -1663,6 +1714,9 @@ void lu_context_exit(struct lu_context *ctx)
ctx->lc_state = LCS_LEFT;
if (ctx->lc_tags & LCT_HAS_EXIT && ctx->lc_value) {
for (i = 0; i < ARRAY_SIZE(lu_keys); ++i) {
+ /* could race with key quiescency */
+ if (ctx->lc_tags & LCT_REMEMBER)
+ spin_lock(&lu_keys_guard);
if (ctx->lc_value[i]) {
struct lu_context_key *key;
@@ -1671,6 +1725,8 @@ void lu_context_exit(struct lu_context *ctx)
key->lct_exit(ctx,
key, ctx->lc_value[i]);
}
+ if (ctx->lc_tags & LCT_REMEMBER)
+ spin_unlock(&lu_keys_guard);
}
}
}
@@ -1930,7 +1986,7 @@ int lu_site_stats_print(const struct lu_site *s, struct seq_file *m)
memset(&stats, 0, sizeof(stats));
lu_site_stats_get(s->ls_obj_hash, &stats, 1);
- seq_printf(m, "%d/%d %d/%d %d %d %d %d %d %d %d %d\n",
+ seq_printf(m, "%d/%d %d/%ld %d %d %d %d %d %d %d %d\n",
stats.lss_busy,
stats.lss_total,
stats.lss_populated,
diff --git a/drivers/staging/lustre/lustre/obdclass/obd_config.c b/drivers/staging/lustre/lustre/obdclass/obd_config.c
index bbed1b72d52e..9ca84c7d49de 100644
--- a/drivers/staging/lustre/lustre/obdclass/obd_config.c
+++ b/drivers/staging/lustre/lustre/obdclass/obd_config.c
@@ -35,12 +35,15 @@
*/
#define DEBUG_SUBSYSTEM S_CLASS
-#include "../include/obd_class.h"
+
#include <linux/string.h>
+
#include "../include/lustre/lustre_ioctl.h"
-#include "../include/lustre_log.h"
+#include "../include/llog_swab.h"
#include "../include/lprocfs_status.h"
+#include "../include/lustre_log.h"
#include "../include/lustre_param.h"
+#include "../include/obd_class.h"
#include "llog_internal.h"
@@ -446,7 +449,7 @@ static int class_cleanup(struct obd_device *obd, struct lustre_cfg *lcfg)
LASSERT(obd->obd_self_export);
/* Precleanup, we must make sure all exports get destroyed. */
- err = obd_precleanup(obd, OBD_CLEANUP_EXPORTS);
+ err = obd_precleanup(obd);
if (err)
CERROR("Precleanup %s returned %d\n",
obd->obd_name, err);
@@ -585,16 +588,21 @@ static int class_del_conn(struct obd_device *obd, struct lustre_cfg *lcfg)
}
static LIST_HEAD(lustre_profile_list);
+static DEFINE_SPINLOCK(lustre_profile_list_lock);
struct lustre_profile *class_get_profile(const char *prof)
{
struct lustre_profile *lprof;
+ spin_lock(&lustre_profile_list_lock);
list_for_each_entry(lprof, &lustre_profile_list, lp_list) {
if (!strcmp(lprof->lp_profile, prof)) {
+ lprof->lp_refs++;
+ spin_unlock(&lustre_profile_list_lock);
return lprof;
}
}
+ spin_unlock(&lustre_profile_list_lock);
return NULL;
}
EXPORT_SYMBOL(class_get_profile);
@@ -639,7 +647,11 @@ static int class_add_profile(int proflen, char *prof, int osclen, char *osc,
}
}
+ spin_lock(&lustre_profile_list_lock);
+ lprof->lp_refs = 1;
+ lprof->lp_list_deleted = false;
list_add(&lprof->lp_list, &lustre_profile_list);
+ spin_unlock(&lustre_profile_list_lock);
return err;
free_lp_dt:
@@ -659,27 +671,59 @@ void class_del_profile(const char *prof)
lprof = class_get_profile(prof);
if (lprof) {
+ spin_lock(&lustre_profile_list_lock);
+ /* because get profile increments the ref counter */
+ lprof->lp_refs--;
list_del(&lprof->lp_list);
- kfree(lprof->lp_profile);
- kfree(lprof->lp_dt);
- kfree(lprof->lp_md);
- kfree(lprof);
+ lprof->lp_list_deleted = true;
+ spin_unlock(&lustre_profile_list_lock);
+
+ class_put_profile(lprof);
}
}
EXPORT_SYMBOL(class_del_profile);
+void class_put_profile(struct lustre_profile *lprof)
+{
+ spin_lock(&lustre_profile_list_lock);
+ if (--lprof->lp_refs > 0) {
+ LASSERT(lprof->lp_refs > 0);
+ spin_unlock(&lustre_profile_list_lock);
+ return;
+ }
+ spin_unlock(&lustre_profile_list_lock);
+
+ /* confirm not a negative number */
+ LASSERT(!lprof->lp_refs);
+
+ /*
+ * At least one class_del_profile/profiles must be called
+ * on the target profile or lustre_profile_list will corrupt
+ */
+ LASSERT(lprof->lp_list_deleted);
+ kfree(lprof->lp_profile);
+ kfree(lprof->lp_dt);
+ kfree(lprof->lp_md);
+ kfree(lprof);
+}
+EXPORT_SYMBOL(class_put_profile);
+
/* COMPAT_146 */
void class_del_profiles(void)
{
struct lustre_profile *lprof, *n;
+ spin_lock(&lustre_profile_list_lock);
list_for_each_entry_safe(lprof, n, &lustre_profile_list, lp_list) {
list_del(&lprof->lp_list);
- kfree(lprof->lp_profile);
- kfree(lprof->lp_dt);
- kfree(lprof->lp_md);
- kfree(lprof);
+ lprof->lp_list_deleted = true;
+ spin_unlock(&lustre_profile_list_lock);
+
+ class_put_profile(lprof);
+
+ spin_lock(&lustre_profile_list_lock);
}
+ spin_unlock(&lustre_profile_list_lock);
}
EXPORT_SYMBOL(class_del_profiles);
@@ -1406,8 +1450,8 @@ EXPORT_SYMBOL(class_manual_cleanup);
* uuid<->export lustre hash operations
*/
-static unsigned
-uuid_hash(struct cfs_hash *hs, const void *key, unsigned mask)
+static unsigned int
+uuid_hash(struct cfs_hash *hs, const void *key, unsigned int mask)
{
return cfs_hash_djb2_hash(((struct obd_uuid *)key)->uuid,
sizeof(((struct obd_uuid *)key)->uuid), mask);
diff --git a/drivers/staging/lustre/lustre/obdclass/obd_mount.c b/drivers/staging/lustre/lustre/obdclass/obd_mount.c
index 0d3a3b05a637..2283e920d839 100644
--- a/drivers/staging/lustre/lustre/obdclass/obd_mount.c
+++ b/drivers/staging/lustre/lustre/obdclass/obd_mount.c
@@ -261,7 +261,7 @@ int lustre_start_mgc(struct super_block *sb)
rc = obd_get_info(NULL, obd->obd_self_export,
strlen(KEY_CONN_DATA), KEY_CONN_DATA,
- &vallen, data, NULL);
+ &vallen, data);
LASSERT(rc == 0);
has_ir = OCD_HAS_FLAG(data, IMP_RECOV);
if (has_ir ^ !(*flags & LMD_FLG_NOIR)) {
@@ -382,7 +382,7 @@ int lustre_start_mgc(struct super_block *sb)
/* We connect to the MGS at setup, and don't disconnect until cleanup */
data->ocd_connect_flags = OBD_CONNECT_VERSION | OBD_CONNECT_AT |
OBD_CONNECT_FULL20 | OBD_CONNECT_IMP_RECOV |
- OBD_CONNECT_LVB_TYPE;
+ OBD_CONNECT_LVB_TYPE | OBD_CONNECT_BULK_MBITS;
#if OBD_OCD_VERSION(3, 0, 53, 0) > LUSTRE_VERSION_CODE
data->ocd_connect_flags |= OBD_CONNECT_MNE_SWAB;
@@ -1216,8 +1216,7 @@ static struct file_system_type lustre_fs_type = {
.name = "lustre",
.mount = lustre_mount,
.kill_sb = lustre_kill_super,
- .fs_flags = FS_BINARY_MOUNTDATA | FS_REQUIRES_DEV |
- FS_RENAME_DOES_D_MOVE,
+ .fs_flags = FS_REQUIRES_DEV | FS_RENAME_DOES_D_MOVE,
};
MODULE_ALIAS_FS("lustre");
diff --git a/drivers/staging/lustre/lustre/obdclass/obdo.c b/drivers/staging/lustre/lustre/obdclass/obdo.c
index 79104a66da96..c52b9e07d7dd 100644
--- a/drivers/staging/lustre/lustre/obdclass/obdo.c
+++ b/drivers/staging/lustre/lustre/obdclass/obdo.c
@@ -124,68 +124,3 @@ void obdo_to_ioobj(const struct obdo *oa, struct obd_ioobj *ioobj)
ioobj->ioo_max_brw = 0;
}
EXPORT_SYMBOL(obdo_to_ioobj);
-
-static void iattr_from_obdo(struct iattr *attr, const struct obdo *oa,
- u32 valid)
-{
- valid &= oa->o_valid;
-
- if (valid & (OBD_MD_FLCTIME | OBD_MD_FLMTIME))
- CDEBUG(D_INODE, "valid %#llx, new time %llu/%llu\n",
- oa->o_valid, oa->o_mtime, oa->o_ctime);
-
- attr->ia_valid = 0;
- if (valid & OBD_MD_FLATIME) {
- LTIME_S(attr->ia_atime) = oa->o_atime;
- attr->ia_valid |= ATTR_ATIME;
- }
- if (valid & OBD_MD_FLMTIME) {
- LTIME_S(attr->ia_mtime) = oa->o_mtime;
- attr->ia_valid |= ATTR_MTIME;
- }
- if (valid & OBD_MD_FLCTIME) {
- LTIME_S(attr->ia_ctime) = oa->o_ctime;
- attr->ia_valid |= ATTR_CTIME;
- }
- if (valid & OBD_MD_FLSIZE) {
- attr->ia_size = oa->o_size;
- attr->ia_valid |= ATTR_SIZE;
- }
-#if 0 /* you shouldn't be able to change a file's type with setattr */
- if (valid & OBD_MD_FLTYPE) {
- attr->ia_mode = (attr->ia_mode & ~S_IFMT) |
- (oa->o_mode & S_IFMT);
- attr->ia_valid |= ATTR_MODE;
- }
-#endif
- if (valid & OBD_MD_FLMODE) {
- attr->ia_mode = (attr->ia_mode & S_IFMT) |
- (oa->o_mode & ~S_IFMT);
- attr->ia_valid |= ATTR_MODE;
- if (!in_group_p(make_kgid(&init_user_ns, oa->o_gid)) &&
- !capable(CFS_CAP_FSETID))
- attr->ia_mode &= ~S_ISGID;
- }
- if (valid & OBD_MD_FLUID) {
- attr->ia_uid = make_kuid(&init_user_ns, oa->o_uid);
- attr->ia_valid |= ATTR_UID;
- }
- if (valid & OBD_MD_FLGID) {
- attr->ia_gid = make_kgid(&init_user_ns, oa->o_gid);
- attr->ia_valid |= ATTR_GID;
- }
-}
-
-void md_from_obdo(struct md_op_data *op_data, const struct obdo *oa, u32 valid)
-{
- iattr_from_obdo(&op_data->op_attr, oa, valid);
- if (valid & OBD_MD_FLBLOCKS) {
- op_data->op_attr_blocks = oa->o_blocks;
- op_data->op_attr.ia_valid |= ATTR_BLOCKS;
- }
- if (valid & OBD_MD_FLFLAGS) {
- op_data->op_attr_flags = oa->o_flags;
- op_data->op_attr.ia_valid |= ATTR_ATTR_FLAG;
- }
-}
-EXPORT_SYMBOL(md_from_obdo);
diff --git a/drivers/staging/lustre/lustre/obdecho/echo_client.c b/drivers/staging/lustre/lustre/obdecho/echo_client.c
index 505582ff4d1e..549076193bde 100644
--- a/drivers/staging/lustre/lustre/obdecho/echo_client.c
+++ b/drivers/staging/lustre/lustre/obdecho/echo_client.c
@@ -55,7 +55,7 @@ struct echo_device {
struct echo_client_obd *ed_ec;
struct cl_site ed_site_myself;
- struct cl_site *ed_site;
+ struct lu_site *ed_site;
struct lu_device *ed_next;
};
@@ -505,9 +505,6 @@ static const struct lu_device_operations echo_device_lu_ops = {
/** @} echo_lu_dev_ops */
-static const struct cl_device_operations echo_device_cl_ops = {
-};
-
/** \defgroup echo_init Setup and teardown
*
* Init and fini functions for echo client.
@@ -527,17 +524,19 @@ static int echo_site_init(const struct lu_env *env, struct echo_device *ed)
}
rc = lu_site_init_finish(&site->cs_lu);
- if (rc)
+ if (rc) {
+ cl_site_fini(site);
return rc;
+ }
- ed->ed_site = site;
+ ed->ed_site = &site->cs_lu;
return 0;
}
static void echo_site_fini(const struct lu_env *env, struct echo_device *ed)
{
if (ed->ed_site) {
- cl_site_fini(ed->ed_site);
+ lu_site_fini(ed->ed_site);
ed->ed_site = NULL;
}
}
@@ -561,16 +560,10 @@ static void echo_thread_key_fini(const struct lu_context *ctx,
kmem_cache_free(echo_thread_kmem, info);
}
-static void echo_thread_key_exit(const struct lu_context *ctx,
- struct lu_context_key *key, void *data)
-{
-}
-
static struct lu_context_key echo_thread_key = {
.lct_tags = LCT_CL_THREAD,
.lct_init = echo_thread_key_init,
.lct_fini = echo_thread_key_fini,
- .lct_exit = echo_thread_key_exit
};
static void *echo_session_key_init(const struct lu_context *ctx,
@@ -592,16 +585,10 @@ static void echo_session_key_fini(const struct lu_context *ctx,
kmem_cache_free(echo_session_kmem, session);
}
-static void echo_session_key_exit(const struct lu_context *ctx,
- struct lu_context_key *key, void *data)
-{
-}
-
static struct lu_context_key echo_session_key = {
.lct_tags = LCT_SESSION,
.lct_init = echo_session_key_init,
.lct_fini = echo_session_key_fini,
- .lct_exit = echo_session_key_exit
};
LU_TYPE_INIT_FINI(echo, &echo_thread_key, &echo_session_key);
@@ -630,7 +617,6 @@ static struct lu_device *echo_device_alloc(const struct lu_env *env,
goto out_free;
cd->cd_lu_dev.ld_ops = &echo_device_lu_ops;
- cd->cd_ops = &echo_device_cl_ops;
obd = class_name2obd(lustre_cfg_string(cfg, 0));
LASSERT(obd);
@@ -674,7 +660,7 @@ static struct lu_device *echo_device_alloc(const struct lu_env *env,
goto out_cleanup;
}
- next->ld_site = &ed->ed_site->cs_lu;
+ next->ld_site = ed->ed_site;
rc = next->ld_type->ldt_ops->ldto_device_init(env, next,
next->ld_type->ldt_name,
NULL);
@@ -741,7 +727,7 @@ static struct lu_device *echo_device_free(const struct lu_env *env,
CDEBUG(D_INFO, "echo device:%p is going to be freed, next = %p\n",
ed, next);
- lu_site_purge(env, &ed->ed_site->cs_lu, -1);
+ lu_site_purge(env, ed->ed_site, -1);
/* check if there are objects still alive.
* It shouldn't have any object because lu_site_purge would cleanup
@@ -754,7 +740,7 @@ static struct lu_device *echo_device_free(const struct lu_env *env,
spin_unlock(&ec->ec_lock);
/* purge again */
- lu_site_purge(env, &ed->ed_site->cs_lu, -1);
+ lu_site_purge(env, ed->ed_site, -1);
CDEBUG(D_INFO,
"Waiting for the reference of echo object to be dropped\n");
@@ -766,7 +752,7 @@ static struct lu_device *echo_device_free(const struct lu_env *env,
CERROR("echo_client still has objects at cleanup time, wait for 1 second\n");
set_current_state(TASK_UNINTERRUPTIBLE);
schedule_timeout(cfs_time_seconds(1));
- lu_site_purge(env, &ed->ed_site->cs_lu, -1);
+ lu_site_purge(env, ed->ed_site, -1);
spin_lock(&ec->ec_lock);
}
spin_unlock(&ec->ec_lock);
@@ -780,11 +766,13 @@ static struct lu_device *echo_device_free(const struct lu_env *env,
while (next)
next = next->ld_type->ldt_ops->ldto_device_free(env, next);
- LASSERT(ed->ed_site == lu2cl_site(d->ld_site));
+ LASSERT(ed->ed_site == d->ld_site);
echo_site_fini(env, ed);
cl_device_fini(&ed->ed_cl);
kfree(ed);
+ cl_env_cache_purge(~0);
+
return NULL;
}
@@ -1100,7 +1088,7 @@ out:
static u64 last_object_id;
static int echo_create_object(const struct lu_env *env, struct echo_device *ed,
- struct obdo *oa, struct obd_trans_info *oti)
+ struct obdo *oa)
{
struct echo_object *eco;
struct echo_client_obd *ec = ed->ed_ec;
@@ -1117,7 +1105,7 @@ static int echo_create_object(const struct lu_env *env, struct echo_device *ed,
if (!ostid_id(&oa->o_oi))
ostid_set_id(&oa->o_oi, ++last_object_id);
- rc = obd_create(env, ec->ec_exp, oa, oti);
+ rc = obd_create(env, ec->ec_exp, oa);
if (rc != 0) {
CERROR("Cannot create objects: rc = %d\n", rc);
goto failed;
@@ -1137,7 +1125,7 @@ static int echo_create_object(const struct lu_env *env, struct echo_device *ed,
failed:
if (created && rc)
- obd_destroy(env, ec->ec_exp, oa, oti);
+ obd_destroy(env, ec->ec_exp, oa);
if (rc)
CERROR("create object failed with: rc = %d\n", rc);
return rc;
@@ -1237,8 +1225,7 @@ static int echo_client_page_debug_check(struct page *page, u64 id,
static int echo_client_kbrw(struct echo_device *ed, int rw, struct obdo *oa,
struct echo_object *eco, u64 offset,
- u64 count, int async,
- struct obd_trans_info *oti)
+ u64 count, int async)
{
u32 npages;
struct brw_page *pga;
@@ -1332,12 +1319,11 @@ static int echo_client_prep_commit(const struct lu_env *env,
struct obd_export *exp, int rw,
struct obdo *oa, struct echo_object *eco,
u64 offset, u64 count,
- u64 batch, struct obd_trans_info *oti,
- int async)
+ u64 batch, int async)
{
struct obd_ioobj ioo;
struct niobuf_local *lnb;
- struct niobuf_remote *rnb;
+ struct niobuf_remote rnb;
u64 off;
u64 npages, tot_pages;
int i, ret = 0, brw_flags = 0;
@@ -1349,9 +1335,7 @@ static int echo_client_prep_commit(const struct lu_env *env,
tot_pages = count >> PAGE_SHIFT;
lnb = kcalloc(npages, sizeof(struct niobuf_local), GFP_NOFS);
- rnb = kcalloc(npages, sizeof(struct niobuf_remote), GFP_NOFS);
-
- if (!lnb || !rnb) {
+ if (!lnb) {
ret = -ENOMEM;
goto out;
}
@@ -1363,26 +1347,22 @@ static int echo_client_prep_commit(const struct lu_env *env,
off = offset;
- for (; tot_pages; tot_pages -= npages) {
+ for (; tot_pages > 0; tot_pages -= npages) {
int lpages;
if (tot_pages < npages)
npages = tot_pages;
- for (i = 0; i < npages; i++, off += PAGE_SIZE) {
- rnb[i].rnb_offset = off;
- rnb[i].rnb_len = PAGE_SIZE;
- rnb[i].rnb_flags = brw_flags;
- }
-
- ioo.ioo_bufcnt = npages;
+ rnb.rnb_offset = off;
+ rnb.rnb_len = npages * PAGE_SIZE;
+ rnb.rnb_flags = brw_flags;
+ ioo.ioo_bufcnt = 1;
+ off += npages * PAGE_SIZE;
lpages = npages;
- ret = obd_preprw(env, rw, exp, oa, 1, &ioo, rnb, &lpages,
- lnb, oti);
+ ret = obd_preprw(env, rw, exp, oa, 1, &ioo, &rnb, &lpages, lnb);
if (ret != 0)
goto out;
- LASSERT(lpages == npages);
for (i = 0; i < lpages; i++) {
struct page *page = lnb[i].lnb_page;
@@ -1401,24 +1381,21 @@ static int echo_client_prep_commit(const struct lu_env *env,
if (rw == OBD_BRW_WRITE)
echo_client_page_debug_setup(page, rw,
- ostid_id(&oa->o_oi),
- rnb[i].rnb_offset,
- rnb[i].rnb_len);
+ ostid_id(&oa->o_oi),
+ lnb[i].lnb_file_offset,
+ lnb[i].lnb_len);
else
echo_client_page_debug_check(page,
- ostid_id(&oa->o_oi),
- rnb[i].rnb_offset,
- rnb[i].rnb_len);
+ ostid_id(&oa->o_oi),
+ lnb[i].lnb_file_offset,
+ lnb[i].lnb_len);
}
- ret = obd_commitrw(env, rw, exp, oa, 1, &ioo,
- rnb, npages, lnb, oti, ret);
+ ret = obd_commitrw(env, rw, exp, oa, 1, &ioo, &rnb, npages, lnb,
+ ret);
if (ret != 0)
goto out;
- /* Reset oti otherwise it would confuse ldiskfs. */
- memset(oti, 0, sizeof(*oti));
-
/* Reuse env context. */
lu_context_exit((struct lu_context *)&env->le_ctx);
lu_context_enter((struct lu_context *)&env->le_ctx);
@@ -1426,14 +1403,12 @@ static int echo_client_prep_commit(const struct lu_env *env,
out:
kfree(lnb);
- kfree(rnb);
return ret;
}
static int echo_client_brw_ioctl(const struct lu_env *env, int rw,
struct obd_export *exp,
- struct obd_ioctl_data *data,
- struct obd_trans_info *dummy_oti)
+ struct obd_ioctl_data *data)
{
struct obd_device *obd = class_exp2obd(exp);
struct echo_device *ed = obd2echo_dev(obd);
@@ -1470,15 +1445,13 @@ static int echo_client_brw_ioctl(const struct lu_env *env, int rw,
case 1:
/* fall through */
case 2:
- rc = echo_client_kbrw(ed, rw, oa,
- eco, data->ioc_offset,
- data->ioc_count, async, dummy_oti);
+ rc = echo_client_kbrw(ed, rw, oa, eco, data->ioc_offset,
+ data->ioc_count, async);
break;
case 3:
- rc = echo_client_prep_commit(env, ec->ec_exp, rw, oa,
- eco, data->ioc_offset,
- data->ioc_count, data->ioc_plen1,
- dummy_oti, async);
+ rc = echo_client_prep_commit(env, ec->ec_exp, rw, oa, eco,
+ data->ioc_offset, data->ioc_count,
+ data->ioc_plen1, async);
break;
default:
rc = -EINVAL;
@@ -1496,16 +1469,11 @@ echo_client_iocontrol(unsigned int cmd, struct obd_export *exp, int len,
struct echo_client_obd *ec = ed->ed_ec;
struct echo_object *eco;
struct obd_ioctl_data *data = karg;
- struct obd_trans_info dummy_oti;
struct lu_env *env;
- struct oti_req_ack_lock *ack_lock;
struct obdo *oa;
struct lu_fid fid;
int rw = OBD_BRW_READ;
int rc = 0;
- int i;
-
- memset(&dummy_oti, 0, sizeof(dummy_oti));
oa = &data->ioc_obdo1;
if (!(oa->o_valid & OBD_MD_FLGROUP)) {
@@ -1535,7 +1503,7 @@ echo_client_iocontrol(unsigned int cmd, struct obd_export *exp, int len,
goto out;
}
- rc = echo_create_object(env, ed, oa, &dummy_oti);
+ rc = echo_create_object(env, ed, oa);
goto out;
case OBD_IOC_DESTROY:
@@ -1546,7 +1514,7 @@ echo_client_iocontrol(unsigned int cmd, struct obd_export *exp, int len,
rc = echo_get_object(&eco, ed, oa);
if (rc == 0) {
- rc = obd_destroy(env, ec->ec_exp, oa, &dummy_oti);
+ rc = obd_destroy(env, ec->ec_exp, oa);
if (rc == 0)
eco->eo_deleted = 1;
echo_put_object(eco);
@@ -1556,11 +1524,7 @@ echo_client_iocontrol(unsigned int cmd, struct obd_export *exp, int len,
case OBD_IOC_GETATTR:
rc = echo_get_object(&eco, ed, oa);
if (rc == 0) {
- struct obd_info oinfo = {
- .oi_oa = oa,
- };
-
- rc = obd_getattr(env, ec->ec_exp, &oinfo);
+ rc = obd_getattr(env, ec->ec_exp, oa);
echo_put_object(eco);
}
goto out;
@@ -1573,11 +1537,7 @@ echo_client_iocontrol(unsigned int cmd, struct obd_export *exp, int len,
rc = echo_get_object(&eco, ed, oa);
if (rc == 0) {
- struct obd_info oinfo = {
- .oi_oa = oa,
- };
-
- rc = obd_setattr(env, ec->ec_exp, &oinfo, NULL);
+ rc = obd_setattr(env, ec->ec_exp, oa);
echo_put_object(eco);
}
goto out;
@@ -1591,7 +1551,7 @@ echo_client_iocontrol(unsigned int cmd, struct obd_export *exp, int len,
rw = OBD_BRW_WRITE;
/* fall through */
case OBD_IOC_BRW_READ:
- rc = echo_client_brw_ioctl(env, rw, exp, data, &dummy_oti);
+ rc = echo_client_brw_ioctl(env, rw, exp, data);
goto out;
default:
@@ -1604,14 +1564,6 @@ out:
lu_env_fini(env);
kfree(env);
- /* XXX this should be in a helper also called by target_send_reply */
- for (ack_lock = dummy_oti.oti_ack_locks, i = 0; i < 4;
- i++, ack_lock++) {
- if (!ack_lock->mode)
- break;
- ldlm_lock_decref(&ack_lock->lock, ack_lock->mode);
- }
-
return rc;
}
diff --git a/drivers/staging/lustre/lustre/osc/lproc_osc.c b/drivers/staging/lustre/lustre/osc/lproc_osc.c
index f0062d44ee03..575b2969ad83 100644
--- a/drivers/staging/lustre/lustre/osc/lproc_osc.c
+++ b/drivers/staging/lustre/lustre/osc/lproc_osc.c
@@ -162,7 +162,7 @@ static ssize_t max_dirty_mb_store(struct kobject *kobj,
pages_number *= 1 << (20 - PAGE_SHIFT); /* MB -> pages */
if (pages_number <= 0 ||
- pages_number > OSC_MAX_DIRTY_MB_MAX << (20 - PAGE_SHIFT) ||
+ pages_number >= OSC_MAX_DIRTY_MB_MAX << (20 - PAGE_SHIFT) ||
pages_number > totalram_pages / 4) /* 1/4 of RAM */
return -ERANGE;
@@ -183,10 +183,12 @@ static int osc_cached_mb_seq_show(struct seq_file *m, void *v)
seq_printf(m,
"used_mb: %ld\n"
- "busy_cnt: %ld\n",
+ "busy_cnt: %ld\n"
+ "reclaim: %llu\n",
(atomic_long_read(&cli->cl_lru_in_list) +
atomic_long_read(&cli->cl_lru_busy)) >> shift,
- atomic_long_read(&cli->cl_lru_busy));
+ atomic_long_read(&cli->cl_lru_busy),
+ cli->cl_lru_reclaim);
return 0;
}
@@ -585,7 +587,8 @@ static ssize_t max_pages_per_rpc_store(struct kobject *kobj,
chunk_mask = ~((1 << (cli->cl_chunkbits - PAGE_SHIFT)) - 1);
/* max_pages_per_rpc must be chunk aligned */
val = (val + ~chunk_mask) & chunk_mask;
- if (val == 0 || val > ocd->ocd_brw_size >> PAGE_SHIFT) {
+ if (!val || (ocd->ocd_brw_size &&
+ val > ocd->ocd_brw_size >> PAGE_SHIFT)) {
return -ERANGE;
}
spin_lock(&cli->cl_loi_list_lock);
diff --git a/drivers/staging/lustre/lustre/osc/osc_cache.c b/drivers/staging/lustre/lustre/osc/osc_cache.c
index 4bbe219add98..b0f030c6c9c9 100644
--- a/drivers/staging/lustre/lustre/osc/osc_cache.c
+++ b/drivers/staging/lustre/lustre/osc/osc_cache.c
@@ -360,6 +360,7 @@ static struct osc_extent *osc_extent_alloc(struct osc_object *obj)
RB_CLEAR_NODE(&ext->oe_node);
ext->oe_obj = obj;
+ cl_object_get(osc2cl(obj));
atomic_set(&ext->oe_refc, 1);
atomic_set(&ext->oe_users, 0);
INIT_LIST_HEAD(&ext->oe_link);
@@ -398,6 +399,7 @@ static void osc_extent_put(const struct lu_env *env, struct osc_extent *ext)
LDLM_LOCK_PUT(ext->oe_dlmlock);
ext->oe_dlmlock = NULL;
}
+ cl_object_put(env, osc2cl(ext->oe_obj));
osc_extent_free(ext);
}
}
@@ -959,7 +961,7 @@ static int osc_extent_wait(const struct lu_env *env, struct osc_extent *ext,
if (rc == -ETIMEDOUT) {
OSC_EXTENT_DUMP(D_ERROR, ext,
"%s: wait ext to %u timedout, recovery in progress?\n",
- osc_export(obj)->exp_obd->obd_name, state);
+ cli_name(osc_cli(obj)), state);
lwi = LWI_INTR(NULL, NULL);
rc = l_wait_event(ext->oe_waitq, extent_wait_cb(ext, state),
@@ -977,7 +979,6 @@ static int osc_extent_wait(const struct lu_env *env, struct osc_extent *ext,
static int osc_extent_truncate(struct osc_extent *ext, pgoff_t trunc_index,
bool partial)
{
- struct cl_env_nest nest;
struct lu_env *env;
struct cl_io *io;
struct osc_object *obj = ext->oe_obj;
@@ -990,6 +991,7 @@ static int osc_extent_truncate(struct osc_extent *ext, pgoff_t trunc_index,
int grants = 0;
int nr_pages = 0;
int rc = 0;
+ int refcheck;
LASSERT(sanity_check(ext) == 0);
EASSERT(ext->oe_state == OES_TRUNC, ext);
@@ -999,7 +1001,7 @@ static int osc_extent_truncate(struct osc_extent *ext, pgoff_t trunc_index,
* We can't use that env from osc_cache_truncate_start() because
* it's from lov_io_sub and not fully initialized.
*/
- env = cl_env_nested_get(&nest);
+ env = cl_env_get(&refcheck);
io = &osc_env_info(env)->oti_io;
io->ci_obj = cl_object_top(osc2cl(obj));
rc = cl_io_init(env, io, CIT_MISC, io->ci_obj);
@@ -1085,7 +1087,7 @@ static int osc_extent_truncate(struct osc_extent *ext, pgoff_t trunc_index,
out:
cl_io_fini(env, io);
- cl_env_nested_put(&nest, env);
+ cl_env_put(env, &refcheck);
return rc;
}
@@ -1327,7 +1329,6 @@ static int osc_completion(const struct lu_env *env, struct osc_async_page *oap,
{
struct osc_page *opg = oap2osc_page(oap);
struct cl_page *page = oap2cl_page(oap);
- struct osc_object *obj = cl2osc(opg->ops_cl.cpl_obj);
enum cl_req_type crt;
int srvlock;
@@ -1338,25 +1339,10 @@ static int osc_completion(const struct lu_env *env, struct osc_async_page *oap,
"cp_state:%u, cmd:%d\n", page->cp_state, cmd);
LASSERT(opg->ops_transfer_pinned);
- /*
- * page->cp_req can be NULL if io submission failed before
- * cl_req was allocated.
- */
- if (page->cp_req)
- cl_req_page_done(env, page);
- LASSERT(!page->cp_req);
-
crt = cmd == OBD_BRW_READ ? CRT_READ : CRT_WRITE;
/* Clear opg->ops_transfer_pinned before VM lock is released. */
opg->ops_transfer_pinned = 0;
- spin_lock(&obj->oo_seatbelt);
- LASSERT(opg->ops_submitter);
- LASSERT(!list_empty(&opg->ops_inflight));
- list_del_init(&opg->ops_inflight);
- opg->ops_submitter = NULL;
- spin_unlock(&obj->oo_seatbelt);
-
opg->ops_submit_time = 0;
srvlock = oap->oap_brw_flags & OBD_BRW_SRVLOCK;
@@ -1380,16 +1366,17 @@ static int osc_completion(const struct lu_env *env, struct osc_async_page *oap,
lu_ref_del(&page->cp_reference, "transfer", page);
cl_page_completion(env, page, crt, rc);
+ cl_page_put(env, page);
return 0;
}
#define OSC_DUMP_GRANT(lvl, cli, fmt, args...) do { \
struct client_obd *__tmp = (cli); \
- CDEBUG(lvl, "%s: grant { dirty: %ld/%ld dirty_pages: %ld/%lu " \
+ CDEBUG(lvl, "%s: grant { dirty: %lu/%lu dirty_pages: %ld/%lu " \
"dropped: %ld avail: %ld, reserved: %ld, flight: %d }" \
"lru {in list: %ld, left: %ld, waiters: %d }" fmt "\n", \
- __tmp->cl_import->imp_obd->obd_name, \
+ cli_name(__tmp), \
__tmp->cl_dirty_pages, __tmp->cl_dirty_max_pages, \
atomic_long_read(&obd_dirty_pages), obd_max_dirty_pages, \
__tmp->cl_lost_grant, __tmp->cl_avail_grant, \
@@ -1627,7 +1614,7 @@ static int osc_enter_cache(const struct lu_env *env, struct client_obd *cli,
osc_io_unplug_async(env, cli, NULL);
CDEBUG(D_CACHE, "%s: sleeping for cache space @ %p for %p\n",
- cli->cl_import->imp_obd->obd_name, &ocw, oap);
+ cli_name(cli), &ocw, oap);
rc = l_wait_event(ocw.ocw_waitq, ocw_granted(cli, &ocw), &lwi);
@@ -1671,7 +1658,7 @@ static int osc_enter_cache(const struct lu_env *env, struct client_obd *cli,
break;
default:
CDEBUG(D_CACHE, "%s: event for cache space @ %p never arrived due to %d, fall back to sync i/o\n",
- cli->cl_import->imp_obd->obd_name, &ocw, rc);
+ cli_name(cli), &ocw, rc);
break;
}
out:
@@ -1931,7 +1918,8 @@ static int try_to_add_extent_for_io(struct client_obd *cli,
}
if (tmp->oe_srvlock != ext->oe_srvlock ||
- !tmp->oe_grants != !ext->oe_grants)
+ !tmp->oe_grants != !ext->oe_grants ||
+ tmp->oe_no_merge || ext->oe_no_merge)
return 0;
/* remove break for strict check */
@@ -2250,14 +2238,9 @@ static int osc_io_unplug0(const struct lu_env *env, struct client_obd *cli,
return 0;
if (!async) {
- /* disable osc_lru_shrink() temporarily to avoid
- * potential stack overrun problem. LU-2859
- */
- atomic_inc(&cli->cl_lru_shrinkers);
spin_lock(&cli->cl_loi_list_lock);
osc_check_rpcs(env, cli);
spin_unlock(&cli->cl_loi_list_lock);
- atomic_dec(&cli->cl_lru_shrinkers);
} else {
CDEBUG(D_CACHE, "Queue writeback work for client %p.\n", cli);
LASSERT(cli->cl_writeback_work);
@@ -2479,7 +2462,6 @@ int osc_teardown_async_page(const struct lu_env *env,
struct osc_object *obj, struct osc_page *ops)
{
struct osc_async_page *oap = &ops->ops_oap;
- struct osc_extent *ext = NULL;
int rc = 0;
LASSERT(oap->oap_magic == OAP_MAGIC);
@@ -2487,12 +2469,15 @@ int osc_teardown_async_page(const struct lu_env *env,
CDEBUG(D_INFO, "teardown oap %p page %p at index %lu.\n",
oap, ops, osc_index(oap2osc(oap)));
- osc_object_lock(obj);
if (!list_empty(&oap->oap_rpc_item)) {
CDEBUG(D_CACHE, "oap %p is not in cache.\n", oap);
rc = -EBUSY;
} else if (!list_empty(&oap->oap_pending_item)) {
+ struct osc_extent *ext = NULL;
+
+ osc_object_lock(obj);
ext = osc_extent_lookup(obj, osc_index(oap2osc(oap)));
+ osc_object_unlock(obj);
/* only truncated pages are allowed to be taken out.
* See osc_extent_truncate() and osc_cache_truncate_start()
* for details.
@@ -2502,10 +2487,9 @@ int osc_teardown_async_page(const struct lu_env *env,
osc_index(oap2osc(oap)));
rc = -EBUSY;
}
+ if (ext)
+ osc_extent_put(env, ext);
}
- osc_object_unlock(obj);
- if (ext)
- osc_extent_put(env, ext);
return rc;
}
@@ -2666,11 +2650,13 @@ int osc_queue_sync_pages(const struct lu_env *env, struct osc_object *obj,
struct osc_async_page *oap, *tmp;
int page_count = 0;
int mppr = cli->cl_max_pages_per_rpc;
+ bool can_merge = true;
pgoff_t start = CL_PAGE_EOF;
pgoff_t end = 0;
list_for_each_entry(oap, list, oap_pending_item) {
- pgoff_t index = osc_index(oap2osc(oap));
+ struct osc_page *opg = oap2osc_page(oap);
+ pgoff_t index = osc_index(opg);
if (index > end)
end = index;
@@ -2678,6 +2664,9 @@ int osc_queue_sync_pages(const struct lu_env *env, struct osc_object *obj,
start = index;
++page_count;
mppr <<= (page_count > mppr);
+
+ if (unlikely(opg->ops_from > 0 || opg->ops_to < PAGE_SIZE))
+ can_merge = false;
}
ext = osc_extent_alloc(obj);
@@ -2691,6 +2680,7 @@ int osc_queue_sync_pages(const struct lu_env *env, struct osc_object *obj,
ext->oe_rw = !!(cmd & OBD_BRW_READ);
ext->oe_sync = 1;
+ ext->oe_no_merge = !can_merge;
ext->oe_urgent = 1;
ext->oe_start = start;
ext->oe_end = end;
@@ -3158,7 +3148,8 @@ static int check_and_discard_cb(const struct lu_env *env, struct cl_io *io,
struct cl_page *page = ops->ops_cl.cpl_page;
/* refresh non-overlapped index */
- tmp = osc_dlmlock_at_pgoff(env, osc, index, 0, 0);
+ tmp = osc_dlmlock_at_pgoff(env, osc, index,
+ OSC_DAP_FL_TEST_LOCK);
if (tmp) {
__u64 end = tmp->l_policy_data.l_extent.end;
/* Cache the first-non-overlapped index so as to skip
diff --git a/drivers/staging/lustre/lustre/osc/osc_cl_internal.h b/drivers/staging/lustre/lustre/osc/osc_cl_internal.h
index 9c8de15c309c..cce55a9689f0 100644
--- a/drivers/staging/lustre/lustre/osc/osc_cl_internal.h
+++ b/drivers/staging/lustre/lustre/osc/osc_cl_internal.h
@@ -77,7 +77,6 @@ struct osc_io {
/** write osc_lock for this IO, used by osc_extent_find(). */
struct osc_lock *oi_write_osclock;
- struct obd_info oi_info;
struct obdo oi_oa;
struct osc_async_cbargs {
bool opc_rpc_sent;
@@ -87,13 +86,6 @@ struct osc_io {
};
/**
- * State of transfer for osc.
- */
-struct osc_req {
- struct cl_req_slice or_cl;
-};
-
-/**
* State maintained by osc layer for the duration of a system call.
*/
struct osc_session {
@@ -103,7 +95,7 @@ struct osc_session {
#define OTI_PVEC_SIZE 256
struct osc_thread_info {
struct ldlm_res_id oti_resname;
- ldlm_policy_data_t oti_policy;
+ union ldlm_policy_data oti_policy;
struct cl_lock_descr oti_descr;
struct cl_attr oti_attr;
struct lustre_handle oti_handle;
@@ -116,6 +108,7 @@ struct osc_thread_info {
pgoff_t oti_next_index;
pgoff_t oti_fn_index; /* first non-overlapped index */
struct cl_sync_io oti_anchor;
+ struct cl_req_attr oti_req_attr;
};
struct osc_object {
@@ -127,16 +120,6 @@ struct osc_object {
int oo_contended;
unsigned long oo_contention_time;
/**
- * List of pages in transfer.
- */
- struct list_head oo_inflight[CRT_NR];
- /**
- * Lock, protecting osc_page::ops_inflight, because a seat-belt is
- * locked during take-off and landing.
- */
- spinlock_t oo_seatbelt;
-
- /**
* used by the osc to keep track of what objects to build into rpcs.
* Protected by client_obd->cli_loi_list_lock.
*/
@@ -364,15 +347,6 @@ struct osc_page {
*/
struct list_head ops_lru;
/**
- * Linkage into a per-osc_object list of pages in flight. For
- * debugging.
- */
- struct list_head ops_inflight;
- /**
- * Thread that submitted this page for transfer. For debugging.
- */
- struct task_struct *ops_submitter;
- /**
* Submit time - the time when the page is starting RPC. For debugging.
*/
unsigned long ops_submit_time;
@@ -382,7 +356,6 @@ extern struct kmem_cache *osc_lock_kmem;
extern struct kmem_cache *osc_object_kmem;
extern struct kmem_cache *osc_thread_kmem;
extern struct kmem_cache *osc_session_kmem;
-extern struct kmem_cache *osc_req_kmem;
extern struct kmem_cache *osc_extent_kmem;
extern struct lu_device_type osc_device_type;
@@ -396,15 +369,14 @@ int osc_lock_init(const struct lu_env *env,
const struct cl_io *io);
int osc_io_init(const struct lu_env *env,
struct cl_object *obj, struct cl_io *io);
-int osc_req_init(const struct lu_env *env, struct cl_device *dev,
- struct cl_req *req);
struct lu_object *osc_object_alloc(const struct lu_env *env,
const struct lu_object_header *hdr,
struct lu_device *dev);
int osc_page_init(const struct lu_env *env, struct cl_object *obj,
struct cl_page *page, pgoff_t ind);
-void osc_index2policy(ldlm_policy_data_t *policy, const struct cl_object *obj,
+void osc_index2policy(union ldlm_policy_data *policy,
+ const struct cl_object *obj,
pgoff_t start, pgoff_t end);
int osc_lvb_print(const struct lu_env *env, void *cookie,
lu_printer_t p, const struct ost_lvb *lvb);
@@ -554,6 +526,16 @@ static inline struct osc_page *oap2osc_page(struct osc_async_page *oap)
return (struct osc_page *)container_of(oap, struct osc_page, ops_oap);
}
+static inline struct osc_page *
+osc_cl_page_osc(struct cl_page *page, struct osc_object *osc)
+{
+ const struct cl_page_slice *slice;
+
+ LASSERT(osc);
+ slice = cl_object_page_slice(&osc->oo_cl, page);
+ return cl2osc_page(slice);
+}
+
static inline struct osc_lock *cl2osc_lock(const struct cl_lock_slice *slice)
{
LINVRNT(osc_is_object(&slice->cls_obj->co_lu));
@@ -615,6 +597,10 @@ struct osc_extent {
oe_rw:1,
/** sync extent, queued by osc_queue_sync_pages() */
oe_sync:1,
+ /** set if this extent has partial, sync pages.
+ * Extents with partial page(s) can't merge with others in RPC
+ */
+ oe_no_merge:1,
oe_srvlock:1,
oe_memalloc:1,
/** an ACTIVE extent is going to be truncated, so when this extent
diff --git a/drivers/staging/lustre/lustre/osc/osc_dev.c b/drivers/staging/lustre/lustre/osc/osc_dev.c
index 83d30c135ba4..c5d62aeaeab5 100644
--- a/drivers/staging/lustre/lustre/osc/osc_dev.c
+++ b/drivers/staging/lustre/lustre/osc/osc_dev.c
@@ -29,7 +29,7 @@
* This file is part of Lustre, http://www.lustre.org/
* Lustre is a trademark of Sun Microsystems, Inc.
*
- * Implementation of cl_device, cl_req for OSC layer.
+ * Implementation of cl_device, for OSC layer.
*
* Author: Nikita Danilov <nikita.danilov@sun.com>
*/
@@ -49,7 +49,6 @@ struct kmem_cache *osc_lock_kmem;
struct kmem_cache *osc_object_kmem;
struct kmem_cache *osc_thread_kmem;
struct kmem_cache *osc_session_kmem;
-struct kmem_cache *osc_req_kmem;
struct kmem_cache *osc_extent_kmem;
struct kmem_cache *osc_quota_kmem;
@@ -75,11 +74,6 @@ struct lu_kmem_descr osc_caches[] = {
.ckd_size = sizeof(struct osc_session)
},
{
- .ckd_cache = &osc_req_kmem,
- .ckd_name = "osc_req_kmem",
- .ckd_size = sizeof(struct osc_req)
- },
- {
.ckd_cache = &osc_extent_kmem,
.ckd_name = "osc_extent_kmem",
.ckd_size = sizeof(struct osc_extent)
@@ -94,8 +88,6 @@ struct lu_kmem_descr osc_caches[] = {
}
};
-struct lock_class_key osc_ast_guard_class;
-
/*****************************************************************************
*
* Type conversions.
@@ -178,10 +170,6 @@ static const struct lu_device_operations osc_lu_ops = {
.ldo_recovery_complete = NULL
};
-static const struct cl_device_operations osc_cl_ops = {
- .cdo_req_init = osc_req_init
-};
-
static int osc_device_init(const struct lu_env *env, struct lu_device *d,
const char *name, struct lu_device *next)
{
@@ -220,7 +208,6 @@ static struct lu_device *osc_device_alloc(const struct lu_env *env,
cl_device_init(&od->od_cl, t);
d = osc2lu_dev(od);
d->ld_ops = &osc_lu_ops;
- od->od_cl.cd_ops = &osc_cl_ops;
/* Setup OSC OBD */
obd = class_name2obd(lustre_cfg_string(cfg, 0));
diff --git a/drivers/staging/lustre/lustre/osc/osc_internal.h b/drivers/staging/lustre/lustre/osc/osc_internal.h
index 67fe0a254991..688783dcc1e4 100644
--- a/drivers/staging/lustre/lustre/osc/osc_internal.h
+++ b/drivers/staging/lustre/lustre/osc/osc_internal.h
@@ -107,26 +107,24 @@ typedef int (*osc_enqueue_upcall_f)(void *cookie, struct lustre_handle *lockh,
int rc);
int osc_enqueue_base(struct obd_export *exp, struct ldlm_res_id *res_id,
- __u64 *flags, ldlm_policy_data_t *policy,
+ __u64 *flags, union ldlm_policy_data *policy,
struct ost_lvb *lvb, int kms_valid,
osc_enqueue_upcall_f upcall,
void *cookie, struct ldlm_enqueue_info *einfo,
struct ptlrpc_request_set *rqset, int async, int agl);
-int osc_cancel_base(struct lustre_handle *lockh, __u32 mode);
int osc_match_base(struct obd_export *exp, struct ldlm_res_id *res_id,
- __u32 type, ldlm_policy_data_t *policy, __u32 mode,
+ __u32 type, union ldlm_policy_data *policy, __u32 mode,
__u64 *flags, void *data, struct lustre_handle *lockh,
int unref);
-int osc_setattr_async_base(struct obd_export *exp, struct obd_info *oinfo,
- struct obd_trans_info *oti,
- obd_enqueue_update_f upcall, void *cookie,
- struct ptlrpc_request_set *rqset);
-int osc_punch_base(struct obd_export *exp, struct obd_info *oinfo,
+int osc_setattr_async(struct obd_export *exp, struct obdo *oa,
+ obd_enqueue_update_f upcall, void *cookie,
+ struct ptlrpc_request_set *rqset);
+int osc_punch_base(struct obd_export *exp, struct obdo *oa,
obd_enqueue_update_f upcall, void *cookie,
struct ptlrpc_request_set *rqset);
-int osc_sync_base(struct obd_export *exp, struct obd_info *oinfo,
+int osc_sync_base(struct osc_object *exp, struct obdo *oa,
obd_enqueue_update_f upcall, void *cookie,
struct ptlrpc_request_set *rqset);
@@ -135,7 +133,7 @@ int osc_build_rpc(const struct lu_env *env, struct client_obd *cli,
struct list_head *ext_list, int cmd);
long osc_lru_shrink(const struct lu_env *env, struct client_obd *cli,
long target, bool force);
-long osc_lru_reclaim(struct client_obd *cli);
+long osc_lru_reclaim(struct client_obd *cli, unsigned long npages);
unsigned long osc_ldlm_weigh_ast(struct ldlm_lock *dlmlock);
@@ -157,6 +155,11 @@ static inline unsigned long rpcs_in_flight(struct client_obd *cli)
return cli->cl_r_in_flight + cli->cl_w_in_flight;
}
+static inline char *cli_name(struct client_obd *cli)
+{
+ return cli->cl_import->imp_obd->obd_name;
+}
+
struct osc_device {
struct cl_device od_cl;
struct obd_export *od_exp;
@@ -192,15 +195,27 @@ int osc_quota_setdq(struct client_obd *cli, const unsigned int qid[],
int osc_quota_chkdq(struct client_obd *cli, const unsigned int qid[]);
int osc_quotactl(struct obd_device *unused, struct obd_export *exp,
struct obd_quotactl *oqctl);
-int osc_quotacheck(struct obd_device *unused, struct obd_export *exp,
- struct obd_quotactl *oqctl);
-int osc_quota_poll_check(struct obd_export *exp, struct if_quotacheck *qchk);
void osc_inc_unstable_pages(struct ptlrpc_request *req);
void osc_dec_unstable_pages(struct ptlrpc_request *req);
bool osc_over_unstable_soft_limit(struct client_obd *cli);
+/**
+ * Bit flags for osc_dlm_lock_at_pageoff().
+ */
+enum osc_dap_flags {
+ /**
+ * Just check if the desired lock exists, it won't hold reference
+ * count on lock.
+ */
+ OSC_DAP_FL_TEST_LOCK = BIT(0),
+ /**
+ * Return the lock even if it is being canceled.
+ */
+ OSC_DAP_FL_CANCELING = BIT(1),
+};
+
struct ldlm_lock *osc_dlmlock_at_pgoff(const struct lu_env *env,
struct osc_object *obj, pgoff_t index,
- int pending, int canceling);
+ enum osc_dap_flags flags);
#endif /* OSC_INTERNAL_H */
diff --git a/drivers/staging/lustre/lustre/osc/osc_io.c b/drivers/staging/lustre/lustre/osc/osc_io.c
index 8a559cbcdd0c..228a97c098fe 100644
--- a/drivers/staging/lustre/lustre/osc/osc_io.c
+++ b/drivers/staging/lustre/lustre/osc/osc_io.c
@@ -49,12 +49,6 @@
*
*/
-static struct osc_req *cl2osc_req(const struct cl_req_slice *slice)
-{
- LINVRNT(slice->crs_dev->cd_lu_dev.ld_type == &osc_device_type);
- return container_of0(slice, struct osc_req, or_cl);
-}
-
static struct osc_io *cl2osc_io(const struct lu_env *env,
const struct cl_io_slice *slice)
{
@@ -64,20 +58,6 @@ static struct osc_io *cl2osc_io(const struct lu_env *env,
return oio;
}
-static struct osc_page *osc_cl_page_osc(struct cl_page *page,
- struct osc_object *osc)
-{
- const struct cl_page_slice *slice;
-
- if (osc)
- slice = cl_object_page_slice(&osc->oo_cl, page);
- else
- slice = cl_page_at(page, &osc_device_type);
- LASSERT(slice);
-
- return cl2osc_page(slice);
-}
-
/*****************************************************************************
*
* io operations.
@@ -88,6 +68,45 @@ static void osc_io_fini(const struct lu_env *env, const struct cl_io_slice *io)
{
}
+static void osc_read_ahead_release(const struct lu_env *env, void *cbdata)
+{
+ struct ldlm_lock *dlmlock = cbdata;
+ struct lustre_handle lockh;
+
+ ldlm_lock2handle(dlmlock, &lockh);
+ ldlm_lock_decref(&lockh, LCK_PR);
+ LDLM_LOCK_PUT(dlmlock);
+}
+
+static int osc_io_read_ahead(const struct lu_env *env,
+ const struct cl_io_slice *ios,
+ pgoff_t start, struct cl_read_ahead *ra)
+{
+ struct osc_object *osc = cl2osc(ios->cis_obj);
+ struct ldlm_lock *dlmlock;
+ int result = -ENODATA;
+
+ dlmlock = osc_dlmlock_at_pgoff(env, osc, start, 0);
+ if (dlmlock) {
+ LASSERT(dlmlock->l_ast_data == osc);
+ if (dlmlock->l_req_mode != LCK_PR) {
+ struct lustre_handle lockh;
+
+ ldlm_lock2handle(dlmlock, &lockh);
+ ldlm_lock_addref(&lockh, LCK_PR);
+ ldlm_lock_decref(&lockh, dlmlock->l_req_mode);
+ }
+
+ ra->cra_end = cl_index(osc2cl(osc),
+ dlmlock->l_policy_data.l_extent.end);
+ ra->cra_release = osc_read_ahead_release;
+ ra->cra_cbdata = dlmlock;
+ result = 0;
+ }
+
+ return result;
+}
+
/**
* An implementation of cl_io_operations::cio_io_submit() method for osc
* layer. Iterates over pages in the in-queue, prepares each for io by calling
@@ -334,7 +353,7 @@ static int osc_io_rw_iter_init(const struct lu_env *env,
npages = max_pages;
c = atomic_long_read(cli->cl_lru_left);
- if (c < npages && osc_lru_reclaim(cli) > 0)
+ if (c < npages && osc_lru_reclaim(cli, npages) > 0)
c = atomic_long_read(cli->cl_lru_left);
while (c >= npages) {
if (c == atomic_long_cmpxchg(cli->cl_lru_left, c, c - npages)) {
@@ -343,6 +362,17 @@ static int osc_io_rw_iter_init(const struct lu_env *env,
}
c = atomic_long_read(cli->cl_lru_left);
}
+ if (atomic_long_read(cli->cl_lru_left) < max_pages) {
+ /*
+ * If there aren't enough pages in the per-OSC LRU then
+ * wake up the LRU thread to try and clear out space, so
+ * we don't block if pages are being dirtied quickly.
+ */
+ CDEBUG(D_CACHE, "%s: queue LRU, left: %lu/%ld.\n",
+ cli_name(cli), atomic_long_read(cli->cl_lru_left),
+ max_pages);
+ (void)ptlrpcd_queue_work(cli->cl_lru_work);
+ }
return 0;
}
@@ -446,7 +476,6 @@ static int osc_io_setattr_start(const struct lu_env *env,
__u64 size = io->u.ci_setattr.sa_attr.lvb_size;
unsigned int ia_valid = io->u.ci_setattr.sa_valid;
int result = 0;
- struct obd_info oinfo = { };
/* truncate cache dirty pages first */
if (cl_io_is_trunc(io))
@@ -486,11 +515,19 @@ static int osc_io_setattr_start(const struct lu_env *env,
oa->o_oi = loi->loi_oi;
obdo_set_parent_fid(oa, io->u.ci_setattr.sa_parent_fid);
oa->o_stripe_idx = io->u.ci_setattr.sa_stripe_index;
- oa->o_mtime = attr->cat_mtime;
- oa->o_atime = attr->cat_atime;
- oa->o_ctime = attr->cat_ctime;
- oa->o_valid |= OBD_MD_FLID | OBD_MD_FLGROUP | OBD_MD_FLATIME |
- OBD_MD_FLCTIME | OBD_MD_FLMTIME;
+ oa->o_valid |= OBD_MD_FLID | OBD_MD_FLGROUP;
+ if (ia_valid & ATTR_CTIME) {
+ oa->o_valid |= OBD_MD_FLCTIME;
+ oa->o_ctime = attr->cat_ctime;
+ }
+ if (ia_valid & ATTR_ATIME) {
+ oa->o_valid |= OBD_MD_FLATIME;
+ oa->o_atime = attr->cat_atime;
+ }
+ if (ia_valid & ATTR_MTIME) {
+ oa->o_valid |= OBD_MD_FLMTIME;
+ oa->o_mtime = attr->cat_mtime;
+ }
if (ia_valid & ATTR_SIZE) {
oa->o_size = size;
oa->o_blocks = OBD_OBJECT_EOF;
@@ -503,19 +540,21 @@ static int osc_io_setattr_start(const struct lu_env *env,
} else {
LASSERT(oio->oi_lockless == 0);
}
+ if (ia_valid & ATTR_ATTR_FLAG) {
+ oa->o_flags = io->u.ci_setattr.sa_attr_flags;
+ oa->o_valid |= OBD_MD_FLFLAGS;
+ }
- oinfo.oi_oa = oa;
init_completion(&cbargs->opc_sync);
if (ia_valid & ATTR_SIZE)
result = osc_punch_base(osc_export(cl2osc(obj)),
- &oinfo, osc_async_upcall,
+ oa, osc_async_upcall,
cbargs, PTLRPCD_SET);
else
- result = osc_setattr_async_base(osc_export(cl2osc(obj)),
- &oinfo, NULL,
- osc_async_upcall,
- cbargs, PTLRPCD_SET);
+ result = osc_setattr_async(osc_export(cl2osc(obj)),
+ oa, osc_async_upcall,
+ cbargs, PTLRPCD_SET);
cbargs->opc_rpc_sent = result == 0;
}
return result;
@@ -557,6 +596,107 @@ static void osc_io_setattr_end(const struct lu_env *env,
}
}
+struct osc_data_version_args {
+ struct osc_io *dva_oio;
+};
+
+static int
+osc_data_version_interpret(const struct lu_env *env, struct ptlrpc_request *req,
+ void *arg, int rc)
+{
+ struct osc_data_version_args *dva = arg;
+ struct osc_io *oio = dva->dva_oio;
+ const struct ost_body *body;
+
+ if (rc < 0)
+ goto out;
+
+ body = req_capsule_server_get(&req->rq_pill, &RMF_OST_BODY);
+ if (!body) {
+ rc = -EPROTO;
+ goto out;
+ }
+
+ lustre_get_wire_obdo(&req->rq_import->imp_connect_data, &oio->oi_oa,
+ &body->oa);
+out:
+ oio->oi_cbarg.opc_rc = rc;
+ complete(&oio->oi_cbarg.opc_sync);
+
+ return 0;
+}
+
+static int osc_io_data_version_start(const struct lu_env *env,
+ const struct cl_io_slice *slice)
+{
+ struct cl_data_version_io *dv = &slice->cis_io->u.ci_data_version;
+ struct osc_io *oio = cl2osc_io(env, slice);
+ struct osc_async_cbargs *cbargs = &oio->oi_cbarg;
+ struct osc_object *obj = cl2osc(slice->cis_obj);
+ struct obd_export *exp = osc_export(obj);
+ struct lov_oinfo *loi = obj->oo_oinfo;
+ struct osc_data_version_args *dva;
+ struct obdo *oa = &oio->oi_oa;
+ struct ptlrpc_request *req;
+ struct ost_body *body;
+ int rc;
+
+ memset(oa, 0, sizeof(*oa));
+ oa->o_oi = loi->loi_oi;
+ oa->o_valid = OBD_MD_FLID | OBD_MD_FLGROUP;
+
+ if (dv->dv_flags & (LL_DV_RD_FLUSH | LL_DV_WR_FLUSH)) {
+ oa->o_valid |= OBD_MD_FLFLAGS;
+ oa->o_flags |= OBD_FL_SRVLOCK;
+ if (dv->dv_flags & LL_DV_WR_FLUSH)
+ oa->o_flags |= OBD_FL_FLUSH;
+ }
+
+ init_completion(&cbargs->opc_sync);
+
+ req = ptlrpc_request_alloc(class_exp2cliimp(exp), &RQF_OST_GETATTR);
+ if (!req)
+ return -ENOMEM;
+
+ rc = ptlrpc_request_pack(req, LUSTRE_OST_VERSION, OST_GETATTR);
+ if (rc < 0) {
+ ptlrpc_request_free(req);
+ return rc;
+ }
+
+ body = req_capsule_client_get(&req->rq_pill, &RMF_OST_BODY);
+ lustre_set_wire_obdo(&req->rq_import->imp_connect_data, &body->oa, oa);
+
+ ptlrpc_request_set_replen(req);
+ req->rq_interpret_reply = osc_data_version_interpret;
+ CLASSERT(sizeof(*dva) <= sizeof(req->rq_async_args));
+ dva = ptlrpc_req_async_args(req);
+ dva->dva_oio = oio;
+
+ ptlrpcd_add_req(req);
+
+ return 0;
+}
+
+static void osc_io_data_version_end(const struct lu_env *env,
+ const struct cl_io_slice *slice)
+{
+ struct cl_data_version_io *dv = &slice->cis_io->u.ci_data_version;
+ struct osc_io *oio = cl2osc_io(env, slice);
+ struct osc_async_cbargs *cbargs = &oio->oi_cbarg;
+
+ wait_for_completion(&cbargs->opc_sync);
+
+ if (cbargs->opc_rc) {
+ slice->cis_io->ci_result = cbargs->opc_rc;
+ } else if (!(oio->oi_oa.o_valid & OBD_MD_FLDATAVERSION)) {
+ slice->cis_io->ci_result = -EOPNOTSUPP;
+ } else {
+ dv->dv_data_version = oio->oi_oa.o_data_version;
+ slice->cis_io->ci_result = 0;
+ }
+}
+
static int osc_io_read_start(const struct lu_env *env,
const struct cl_io_slice *slice)
{
@@ -595,7 +735,6 @@ static int osc_fsync_ost(const struct lu_env *env, struct osc_object *obj,
{
struct osc_io *oio = osc_env_io(env);
struct obdo *oa = &oio->oi_oa;
- struct obd_info *oinfo = &oio->oi_info;
struct lov_oinfo *loi = obj->oo_oinfo;
struct osc_async_cbargs *cbargs = &oio->oi_cbarg;
int rc = 0;
@@ -611,12 +750,9 @@ static int osc_fsync_ost(const struct lu_env *env, struct osc_object *obj,
obdo_set_parent_fid(oa, fio->fi_fid);
- memset(oinfo, 0, sizeof(*oinfo));
- oinfo->oi_oa = oa;
init_completion(&cbargs->opc_sync);
- rc = osc_sync_base(osc_export(obj), oinfo, osc_async_upcall, cbargs,
- PTLRPCD_SET);
+ rc = osc_sync_base(obj, oa, osc_async_upcall, cbargs, PTLRPCD_SET);
return rc;
}
@@ -710,6 +846,10 @@ static const struct cl_io_operations osc_io_ops = {
.cio_start = osc_io_setattr_start,
.cio_end = osc_io_setattr_end
},
+ [CIT_DATA_VERSION] = {
+ .cio_start = osc_io_data_version_start,
+ .cio_end = osc_io_data_version_end,
+ },
[CIT_FAULT] = {
.cio_start = osc_io_fault_start,
.cio_end = osc_io_end,
@@ -724,6 +864,7 @@ static const struct cl_io_operations osc_io_ops = {
.cio_fini = osc_io_fini
}
},
+ .cio_read_ahead = osc_io_read_ahead,
.cio_submit = osc_io_submit,
.cio_commit_async = osc_io_commit_async
};
@@ -734,103 +875,6 @@ static const struct cl_io_operations osc_io_ops = {
*
*/
-static int osc_req_prep(const struct lu_env *env,
- const struct cl_req_slice *slice)
-{
- return 0;
-}
-
-static void osc_req_completion(const struct lu_env *env,
- const struct cl_req_slice *slice, int ioret)
-{
- struct osc_req *or;
-
- or = cl2osc_req(slice);
- kmem_cache_free(osc_req_kmem, or);
-}
-
-/**
- * Implementation of struct cl_req_operations::cro_attr_set() for osc
- * layer. osc is responsible for struct obdo::o_id and struct obdo::o_seq
- * fields.
- */
-static void osc_req_attr_set(const struct lu_env *env,
- const struct cl_req_slice *slice,
- const struct cl_object *obj,
- struct cl_req_attr *attr, u64 flags)
-{
- struct lov_oinfo *oinfo;
- struct cl_req *clerq;
- struct cl_page *apage; /* _some_ page in @clerq */
- struct ldlm_lock *lock; /* _some_ lock protecting @apage */
- struct osc_page *opg;
- struct obdo *oa;
- struct ost_lvb *lvb;
-
- oinfo = cl2osc(obj)->oo_oinfo;
- lvb = &oinfo->loi_lvb;
- oa = attr->cra_oa;
-
- if ((flags & OBD_MD_FLMTIME) != 0) {
- oa->o_mtime = lvb->lvb_mtime;
- oa->o_valid |= OBD_MD_FLMTIME;
- }
- if ((flags & OBD_MD_FLATIME) != 0) {
- oa->o_atime = lvb->lvb_atime;
- oa->o_valid |= OBD_MD_FLATIME;
- }
- if ((flags & OBD_MD_FLCTIME) != 0) {
- oa->o_ctime = lvb->lvb_ctime;
- oa->o_valid |= OBD_MD_FLCTIME;
- }
- if (flags & OBD_MD_FLGROUP) {
- ostid_set_seq(&oa->o_oi, ostid_seq(&oinfo->loi_oi));
- oa->o_valid |= OBD_MD_FLGROUP;
- }
- if (flags & OBD_MD_FLID) {
- ostid_set_id(&oa->o_oi, ostid_id(&oinfo->loi_oi));
- oa->o_valid |= OBD_MD_FLID;
- }
- if (flags & OBD_MD_FLHANDLE) {
- clerq = slice->crs_req;
- LASSERT(!list_empty(&clerq->crq_pages));
- apage = container_of(clerq->crq_pages.next,
- struct cl_page, cp_flight);
- opg = osc_cl_page_osc(apage, NULL);
- lock = osc_dlmlock_at_pgoff(env, cl2osc(obj), osc_index(opg),
- 1, 1);
- if (!lock && !opg->ops_srvlock) {
- struct ldlm_resource *res;
- struct ldlm_res_id *resname;
-
- CL_PAGE_DEBUG(D_ERROR, env, apage, "uncovered page!\n");
-
- resname = &osc_env_info(env)->oti_resname;
- ostid_build_res_name(&oinfo->loi_oi, resname);
- res = ldlm_resource_get(
- osc_export(cl2osc(obj))->exp_obd->obd_namespace,
- NULL, resname, LDLM_EXTENT, 0);
- ldlm_resource_dump(D_ERROR, res);
-
- dump_stack();
- LBUG();
- }
-
- /* check for lockless io. */
- if (lock) {
- oa->o_handle = lock->l_remote_handle;
- oa->o_valid |= OBD_MD_FLHANDLE;
- LDLM_LOCK_PUT(lock);
- }
- }
-}
-
-static const struct cl_req_operations osc_req_ops = {
- .cro_prep = osc_req_prep,
- .cro_attr_set = osc_req_attr_set,
- .cro_completion = osc_req_completion
-};
-
int osc_io_init(const struct lu_env *env,
struct cl_object *obj, struct cl_io *io)
{
@@ -841,20 +885,4 @@ int osc_io_init(const struct lu_env *env,
return 0;
}
-int osc_req_init(const struct lu_env *env, struct cl_device *dev,
- struct cl_req *req)
-{
- struct osc_req *or;
- int result;
-
- or = kmem_cache_zalloc(osc_req_kmem, GFP_NOFS);
- if (or) {
- cl_req_slice_add(req, &or->or_cl, dev, &osc_req_ops);
- result = 0;
- } else {
- result = -ENOMEM;
- }
- return result;
-}
-
/** @} osc */
diff --git a/drivers/staging/lustre/lustre/osc/osc_lock.c b/drivers/staging/lustre/lustre/osc/osc_lock.c
index 39a8a5851603..5f799a4c78f9 100644
--- a/drivers/staging/lustre/lustre/osc/osc_lock.c
+++ b/drivers/staging/lustre/lustre/osc/osc_lock.c
@@ -145,7 +145,7 @@ static void osc_lock_fini(const struct lu_env *env,
static void osc_lock_build_policy(const struct lu_env *env,
const struct cl_lock *lock,
- ldlm_policy_data_t *policy)
+ union ldlm_policy_data *policy)
{
const struct cl_lock_descr *d = &lock->cll_descr;
@@ -188,7 +188,7 @@ static void osc_lock_lvb_update(const struct lu_env *env,
struct cl_object *obj = osc2cl(osc);
struct lov_oinfo *oinfo = osc->oo_oinfo;
struct cl_attr *attr = &osc_env_info(env)->oti_attr;
- unsigned valid;
+ unsigned int valid;
valid = CAT_BLOCKS | CAT_ATIME | CAT_CTIME | CAT_MTIME | CAT_SIZE;
if (!lvb)
@@ -294,10 +294,10 @@ static int osc_lock_upcall(void *cookie, struct lustre_handle *lockh,
struct osc_lock *oscl = cookie;
struct cl_lock_slice *slice = &oscl->ols_cl;
struct lu_env *env;
- struct cl_env_nest nest;
int rc;
+ int refcheck;
- env = cl_env_nested_get(&nest);
+ env = cl_env_get(&refcheck);
/* should never happen, similar to osc_ldlm_blocking_ast(). */
LASSERT(!IS_ERR(env));
@@ -336,7 +336,7 @@ static int osc_lock_upcall(void *cookie, struct lustre_handle *lockh,
if (oscl->ols_owner)
cl_sync_io_note(env, oscl->ols_owner, rc);
- cl_env_nested_put(&nest, env);
+ cl_env_put(env, &refcheck);
return rc;
}
@@ -347,9 +347,9 @@ static int osc_lock_upcall_agl(void *cookie, struct lustre_handle *lockh,
struct osc_object *osc = cookie;
struct ldlm_lock *dlmlock;
struct lu_env *env;
- struct cl_env_nest nest;
+ int refcheck;
- env = cl_env_nested_get(&nest);
+ env = cl_env_get(&refcheck);
LASSERT(!IS_ERR(env));
if (errcode == ELDLM_LOCK_MATCHED) {
@@ -374,7 +374,7 @@ static int osc_lock_upcall_agl(void *cookie, struct lustre_handle *lockh,
out:
cl_object_put(env, osc2cl(osc));
- cl_env_nested_put(&nest, env);
+ cl_env_put(env, &refcheck);
return ldlm_error2errno(errcode);
}
@@ -382,11 +382,11 @@ static int osc_lock_flush(struct osc_object *obj, pgoff_t start, pgoff_t end,
enum cl_lock_mode mode, int discard)
{
struct lu_env *env;
- struct cl_env_nest nest;
+ int refcheck;
int rc = 0;
int rc2 = 0;
- env = cl_env_nested_get(&nest);
+ env = cl_env_get(&refcheck);
if (IS_ERR(env))
return PTR_ERR(env);
@@ -404,7 +404,7 @@ static int osc_lock_flush(struct osc_object *obj, pgoff_t start, pgoff_t end,
if (rc == 0 && rc2 < 0)
rc = rc2;
- cl_env_nested_put(&nest, env);
+ cl_env_put(env, &refcheck);
return rc;
}
@@ -536,7 +536,7 @@ static int osc_ldlm_blocking_ast(struct ldlm_lock *dlmlock,
}
case LDLM_CB_CANCELING: {
struct lu_env *env;
- struct cl_env_nest nest;
+ int refcheck;
/*
* This can be called in the context of outer IO, e.g.,
@@ -549,14 +549,14 @@ static int osc_ldlm_blocking_ast(struct ldlm_lock *dlmlock,
* new environment has to be created to not corrupt outer
* context.
*/
- env = cl_env_nested_get(&nest);
+ env = cl_env_get(&refcheck);
if (IS_ERR(env)) {
result = PTR_ERR(env);
break;
}
result = osc_dlm_blocking_ast0(env, dlmlock, data, flag);
- cl_env_nested_put(&nest, env);
+ cl_env_put(env, &refcheck);
break;
}
default:
@@ -568,61 +568,63 @@ static int osc_ldlm_blocking_ast(struct ldlm_lock *dlmlock,
static int osc_ldlm_glimpse_ast(struct ldlm_lock *dlmlock, void *data)
{
struct ptlrpc_request *req = data;
- struct cl_env_nest nest;
struct lu_env *env;
struct ost_lvb *lvb;
struct req_capsule *cap;
+ struct cl_object *obj = NULL;
int result;
+ int refcheck;
LASSERT(lustre_msg_get_opc(req->rq_reqmsg) == LDLM_GL_CALLBACK);
- env = cl_env_nested_get(&nest);
- if (!IS_ERR(env)) {
- struct cl_object *obj = NULL;
+ env = cl_env_get(&refcheck);
+ if (IS_ERR(env)) {
+ result = PTR_ERR(env);
+ goto out;
+ }
- lock_res_and_lock(dlmlock);
- if (dlmlock->l_ast_data) {
- obj = osc2cl(dlmlock->l_ast_data);
- cl_object_get(obj);
- }
- unlock_res_and_lock(dlmlock);
+ lock_res_and_lock(dlmlock);
+ if (dlmlock->l_ast_data) {
+ obj = osc2cl(dlmlock->l_ast_data);
+ cl_object_get(obj);
+ }
+ unlock_res_and_lock(dlmlock);
- if (obj) {
- /* Do not grab the mutex of cl_lock for glimpse.
- * See LU-1274 for details.
- * BTW, it's okay for cl_lock to be cancelled during
- * this period because server can handle this race.
- * See ldlm_server_glimpse_ast() for details.
- * cl_lock_mutex_get(env, lock);
- */
- cap = &req->rq_pill;
- req_capsule_extend(cap, &RQF_LDLM_GL_CALLBACK);
- req_capsule_set_size(cap, &RMF_DLM_LVB, RCL_SERVER,
- sizeof(*lvb));
- result = req_capsule_server_pack(cap);
- if (result == 0) {
- lvb = req_capsule_server_get(cap, &RMF_DLM_LVB);
- result = cl_object_glimpse(env, obj, lvb);
- }
- if (!exp_connect_lvb_type(req->rq_export))
- req_capsule_shrink(&req->rq_pill,
- &RMF_DLM_LVB,
- sizeof(struct ost_lvb_v1),
- RCL_SERVER);
- cl_object_put(env, obj);
- } else {
- /*
- * These errors are normal races, so we don't want to
- * fill the console with messages by calling
- * ptlrpc_error()
- */
- lustre_pack_reply(req, 1, NULL, NULL);
- result = -ELDLM_NO_LOCK_DATA;
+ if (obj) {
+ /* Do not grab the mutex of cl_lock for glimpse.
+ * See LU-1274 for details.
+ * BTW, it's okay for cl_lock to be cancelled during
+ * this period because server can handle this race.
+ * See ldlm_server_glimpse_ast() for details.
+ * cl_lock_mutex_get(env, lock);
+ */
+ cap = &req->rq_pill;
+ req_capsule_extend(cap, &RQF_LDLM_GL_CALLBACK);
+ req_capsule_set_size(cap, &RMF_DLM_LVB, RCL_SERVER,
+ sizeof(*lvb));
+ result = req_capsule_server_pack(cap);
+ if (result == 0) {
+ lvb = req_capsule_server_get(cap, &RMF_DLM_LVB);
+ result = cl_object_glimpse(env, obj, lvb);
+ }
+ if (!exp_connect_lvb_type(req->rq_export)) {
+ req_capsule_shrink(&req->rq_pill, &RMF_DLM_LVB,
+ sizeof(struct ost_lvb_v1),
+ RCL_SERVER);
}
- cl_env_nested_put(&nest, env);
+ cl_object_put(env, obj);
} else {
- result = PTR_ERR(env);
+ /*
+ * These errors are normal races, so we don't want to
+ * fill the console with messages by calling
+ * ptlrpc_error()
+ */
+ lustre_pack_reply(req, 1, NULL, NULL);
+ result = -ELDLM_NO_LOCK_DATA;
}
+ cl_env_put(env, &refcheck);
+
+out:
req->rq_status = result;
return result;
}
@@ -677,12 +679,12 @@ static unsigned long osc_lock_weight(const struct lu_env *env,
*/
unsigned long osc_ldlm_weigh_ast(struct ldlm_lock *dlmlock)
{
- struct cl_env_nest nest;
struct lu_env *env;
struct osc_object *obj;
struct osc_lock *oscl;
unsigned long weight;
bool found = false;
+ int refcheck;
might_sleep();
/*
@@ -692,7 +694,7 @@ unsigned long osc_ldlm_weigh_ast(struct ldlm_lock *dlmlock)
* the upper context because cl_lock_put don't modify environment
* variables. But just in case ..
*/
- env = cl_env_nested_get(&nest);
+ env = cl_env_get(&refcheck);
if (IS_ERR(env))
/* Mostly because lack of memory, do not eliminate this lock */
return 1;
@@ -722,7 +724,7 @@ unsigned long osc_ldlm_weigh_ast(struct ldlm_lock *dlmlock)
weight = osc_lock_weight(env, obj, &dlmlock->l_policy_data.l_extent);
out:
- cl_env_nested_put(&nest, env);
+ cl_env_put(env, &refcheck);
return weight;
}
@@ -912,7 +914,7 @@ static int osc_lock_enqueue(const struct lu_env *env,
struct osc_lock *oscl = cl2osc_lock(slice);
struct cl_lock *lock = slice->cls_lock;
struct ldlm_res_id *resname = &info->oti_resname;
- ldlm_policy_data_t *policy = &info->oti_policy;
+ union ldlm_policy_data *policy = &info->oti_policy;
osc_enqueue_upcall_f upcall = osc_lock_upcall;
void *cookie = oscl;
bool async = false;
@@ -1009,7 +1011,7 @@ static void osc_lock_detach(const struct lu_env *env, struct osc_lock *olck)
if (olck->ols_hold) {
olck->ols_hold = 0;
- osc_cancel_base(&olck->ols_handle, olck->ols_einfo.ei_mode);
+ ldlm_lock_decref(&olck->ols_handle, olck->ols_einfo.ei_mode);
olck->ols_handle.cookie = 0ULL;
}
@@ -1180,11 +1182,11 @@ int osc_lock_init(const struct lu_env *env,
*/
struct ldlm_lock *osc_dlmlock_at_pgoff(const struct lu_env *env,
struct osc_object *obj, pgoff_t index,
- int pending, int canceling)
+ enum osc_dap_flags dap_flags)
{
struct osc_thread_info *info = osc_env_info(env);
struct ldlm_res_id *resname = &info->oti_resname;
- ldlm_policy_data_t *policy = &info->oti_policy;
+ union ldlm_policy_data *policy = &info->oti_policy;
struct lustre_handle lockh;
struct ldlm_lock *lock = NULL;
enum ldlm_mode mode;
@@ -1194,17 +1196,18 @@ struct ldlm_lock *osc_dlmlock_at_pgoff(const struct lu_env *env,
osc_index2policy(policy, osc2cl(obj), index, index);
policy->l_extent.gid = LDLM_GID_ANY;
- flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_TEST_LOCK;
- if (pending)
- flags |= LDLM_FL_CBPENDING;
+ flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_CBPENDING;
+ if (dap_flags & OSC_DAP_FL_TEST_LOCK)
+ flags |= LDLM_FL_TEST_LOCK;
+
/*
* It is fine to match any group lock since there could be only one
* with a uniq gid and it conflicts with all other lock modes too
*/
again:
- mode = ldlm_lock_match(osc_export(obj)->exp_obd->obd_namespace,
- flags, resname, LDLM_EXTENT, policy,
- LCK_PR | LCK_PW | LCK_GROUP, &lockh, canceling);
+ mode = osc_match_base(osc_export(obj), resname, LDLM_EXTENT, policy,
+ LCK_PR | LCK_PW | LCK_GROUP, &flags, obj, &lockh,
+ dap_flags & OSC_DAP_FL_CANCELING);
if (mode != 0) {
lock = ldlm_handle2lock(&lockh);
/* RACE: the lock is cancelled so let's try again */
diff --git a/drivers/staging/lustre/lustre/osc/osc_object.c b/drivers/staging/lustre/lustre/osc/osc_object.c
index aae3a2d4243f..e0c3324857dd 100644
--- a/drivers/staging/lustre/lustre/osc/osc_object.c
+++ b/drivers/staging/lustre/lustre/osc/osc_object.c
@@ -71,13 +71,8 @@ static int osc_object_init(const struct lu_env *env, struct lu_object *obj,
{
struct osc_object *osc = lu2osc(obj);
const struct cl_object_conf *cconf = lu2cl_conf(conf);
- int i;
osc->oo_oinfo = cconf->u.coc_oinfo;
- spin_lock_init(&osc->oo_seatbelt);
- for (i = 0; i < CRT_NR; ++i)
- INIT_LIST_HEAD(&osc->oo_inflight[i]);
-
INIT_LIST_HEAD(&osc->oo_ready_item);
INIT_LIST_HEAD(&osc->oo_hp_ready_item);
INIT_LIST_HEAD(&osc->oo_write_item);
@@ -103,10 +98,6 @@ static int osc_object_init(const struct lu_env *env, struct lu_object *obj,
static void osc_object_free(const struct lu_env *env, struct lu_object *obj)
{
struct osc_object *osc = lu2osc(obj);
- int i;
-
- for (i = 0; i < CRT_NR; ++i)
- LASSERT(list_empty(&osc->oo_inflight[i]));
LASSERT(list_empty(&osc->oo_ready_item));
LASSERT(list_empty(&osc->oo_hp_ready_item));
@@ -218,6 +209,94 @@ static int osc_object_prune(const struct lu_env *env, struct cl_object *obj)
return 0;
}
+static int osc_object_fiemap(const struct lu_env *env, struct cl_object *obj,
+ struct ll_fiemap_info_key *fmkey,
+ struct fiemap *fiemap, size_t *buflen)
+{
+ struct obd_export *exp = osc_export(cl2osc(obj));
+ union ldlm_policy_data policy;
+ struct ptlrpc_request *req;
+ struct lustre_handle lockh;
+ struct ldlm_res_id resid;
+ enum ldlm_mode mode = 0;
+ struct fiemap *reply;
+ char *tmp;
+ int rc;
+
+ fmkey->lfik_oa.o_oi = cl2osc(obj)->oo_oinfo->loi_oi;
+ if (!(fmkey->lfik_fiemap.fm_flags & FIEMAP_FLAG_SYNC))
+ goto skip_locking;
+
+ policy.l_extent.start = fmkey->lfik_fiemap.fm_start & PAGE_MASK;
+
+ if (OBD_OBJECT_EOF - fmkey->lfik_fiemap.fm_length <=
+ fmkey->lfik_fiemap.fm_start + PAGE_SIZE - 1)
+ policy.l_extent.end = OBD_OBJECT_EOF;
+ else
+ policy.l_extent.end = (fmkey->lfik_fiemap.fm_start +
+ fmkey->lfik_fiemap.fm_length +
+ PAGE_SIZE - 1) & PAGE_MASK;
+
+ ostid_build_res_name(&fmkey->lfik_oa.o_oi, &resid);
+ mode = ldlm_lock_match(exp->exp_obd->obd_namespace,
+ LDLM_FL_BLOCK_GRANTED | LDLM_FL_LVB_READY,
+ &resid, LDLM_EXTENT, &policy,
+ LCK_PR | LCK_PW, &lockh, 0);
+ if (mode) { /* lock is cached on client */
+ if (mode != LCK_PR) {
+ ldlm_lock_addref(&lockh, LCK_PR);
+ ldlm_lock_decref(&lockh, LCK_PW);
+ }
+ } else { /* no cached lock, needs acquire lock on server side */
+ fmkey->lfik_oa.o_valid |= OBD_MD_FLFLAGS;
+ fmkey->lfik_oa.o_flags |= OBD_FL_SRVLOCK;
+ }
+
+skip_locking:
+ req = ptlrpc_request_alloc(class_exp2cliimp(exp),
+ &RQF_OST_GET_INFO_FIEMAP);
+ if (!req) {
+ rc = -ENOMEM;
+ goto drop_lock;
+ }
+
+ req_capsule_set_size(&req->rq_pill, &RMF_FIEMAP_KEY, RCL_CLIENT,
+ sizeof(*fmkey));
+ req_capsule_set_size(&req->rq_pill, &RMF_FIEMAP_VAL, RCL_CLIENT,
+ *buflen);
+ req_capsule_set_size(&req->rq_pill, &RMF_FIEMAP_VAL, RCL_SERVER,
+ *buflen);
+
+ rc = ptlrpc_request_pack(req, LUSTRE_OST_VERSION, OST_GET_INFO);
+ if (rc) {
+ ptlrpc_request_free(req);
+ goto drop_lock;
+ }
+ tmp = req_capsule_client_get(&req->rq_pill, &RMF_FIEMAP_KEY);
+ memcpy(tmp, fmkey, sizeof(*fmkey));
+ tmp = req_capsule_client_get(&req->rq_pill, &RMF_FIEMAP_VAL);
+ memcpy(tmp, fiemap, *buflen);
+ ptlrpc_request_set_replen(req);
+
+ rc = ptlrpc_queue_wait(req);
+ if (rc)
+ goto fini_req;
+
+ reply = req_capsule_server_get(&req->rq_pill, &RMF_FIEMAP_VAL);
+ if (!reply) {
+ rc = -EPROTO;
+ goto fini_req;
+ }
+
+ memcpy(fiemap, reply, *buflen);
+fini_req:
+ ptlrpc_req_finished(req);
+drop_lock:
+ if (mode)
+ ldlm_lock_decref(&lockh, LCK_PR);
+ return rc;
+}
+
void osc_object_set_contended(struct osc_object *obj)
{
obj->oo_contention_time = cfs_time_current();
@@ -256,6 +335,76 @@ int osc_object_is_contended(struct osc_object *obj)
return 1;
}
+/**
+ * Implementation of struct cl_object_operations::coo_req_attr_set() for osc
+ * layer. osc is responsible for struct obdo::o_id and struct obdo::o_seq
+ * fields.
+ */
+static void osc_req_attr_set(const struct lu_env *env, struct cl_object *obj,
+ struct cl_req_attr *attr)
+{
+ u64 flags = attr->cra_flags;
+ struct lov_oinfo *oinfo;
+ struct ost_lvb *lvb;
+ struct obdo *oa;
+
+ oinfo = cl2osc(obj)->oo_oinfo;
+ lvb = &oinfo->loi_lvb;
+ oa = attr->cra_oa;
+
+ if (flags & OBD_MD_FLMTIME) {
+ oa->o_mtime = lvb->lvb_mtime;
+ oa->o_valid |= OBD_MD_FLMTIME;
+ }
+ if (flags & OBD_MD_FLATIME) {
+ oa->o_atime = lvb->lvb_atime;
+ oa->o_valid |= OBD_MD_FLATIME;
+ }
+ if (flags & OBD_MD_FLCTIME) {
+ oa->o_ctime = lvb->lvb_ctime;
+ oa->o_valid |= OBD_MD_FLCTIME;
+ }
+ if (flags & OBD_MD_FLGROUP) {
+ ostid_set_seq(&oa->o_oi, ostid_seq(&oinfo->loi_oi));
+ oa->o_valid |= OBD_MD_FLGROUP;
+ }
+ if (flags & OBD_MD_FLID) {
+ ostid_set_id(&oa->o_oi, ostid_id(&oinfo->loi_oi));
+ oa->o_valid |= OBD_MD_FLID;
+ }
+ if (flags & OBD_MD_FLHANDLE) {
+ struct ldlm_lock *lock;
+ struct osc_page *opg;
+
+ opg = osc_cl_page_osc(attr->cra_page, cl2osc(obj));
+ lock = osc_dlmlock_at_pgoff(env, cl2osc(obj), osc_index(opg),
+ OSC_DAP_FL_TEST_LOCK | OSC_DAP_FL_CANCELING);
+ if (!lock && !opg->ops_srvlock) {
+ struct ldlm_resource *res;
+ struct ldlm_res_id *resname;
+
+ CL_PAGE_DEBUG(D_ERROR, env, attr->cra_page,
+ "uncovered page!\n");
+
+ resname = &osc_env_info(env)->oti_resname;
+ ostid_build_res_name(&oinfo->loi_oi, resname);
+ res = ldlm_resource_get(
+ osc_export(cl2osc(obj))->exp_obd->obd_namespace,
+ NULL, resname, LDLM_EXTENT, 0);
+ ldlm_resource_dump(D_ERROR, res);
+
+ LBUG();
+ }
+
+ /* check for lockless io. */
+ if (lock) {
+ oa->o_handle = lock->l_remote_handle;
+ oa->o_valid |= OBD_MD_FLHANDLE;
+ LDLM_LOCK_PUT(lock);
+ }
+ }
+}
+
static const struct cl_object_operations osc_ops = {
.coo_page_init = osc_page_init,
.coo_lock_init = osc_lock_init,
@@ -263,7 +412,9 @@ static const struct cl_object_operations osc_ops = {
.coo_attr_get = osc_attr_get,
.coo_attr_update = osc_attr_update,
.coo_glimpse = osc_object_glimpse,
- .coo_prune = osc_object_prune
+ .coo_prune = osc_object_prune,
+ .coo_fiemap = osc_object_fiemap,
+ .coo_req_attr_set = osc_req_attr_set
};
static const struct lu_object_operations osc_lu_obj_ops = {
diff --git a/drivers/staging/lustre/lustre/osc/osc_page.c b/drivers/staging/lustre/lustre/osc/osc_page.c
index 2a7a70aa9e80..e356e4af08e1 100644
--- a/drivers/staging/lustre/lustre/osc/osc_page.c
+++ b/drivers/staging/lustre/lustre/osc/osc_page.c
@@ -37,6 +37,7 @@
#define DEBUG_SUBSYSTEM S_OSC
+#include <linux/math64.h>
#include "osc_cl_internal.h"
static void osc_lru_del(struct client_obd *cli, struct osc_page *opg);
@@ -86,11 +87,6 @@ static void osc_page_transfer_add(const struct lu_env *env,
struct osc_object *obj = cl2osc(opg->ops_cl.cpl_obj);
osc_lru_use(osc_cli(obj), opg);
-
- spin_lock(&obj->oo_seatbelt);
- list_add(&opg->ops_inflight, &obj->oo_inflight[crt]);
- opg->ops_submitter = current;
- spin_unlock(&obj->oo_seatbelt);
}
int osc_page_cache_add(const struct lu_env *env,
@@ -109,7 +105,8 @@ int osc_page_cache_add(const struct lu_env *env,
return result;
}
-void osc_index2policy(ldlm_policy_data_t *policy, const struct cl_object *obj,
+void osc_index2policy(union ldlm_policy_data *policy,
+ const struct cl_object *obj,
pgoff_t start, pgoff_t end)
{
memset(policy, 0, sizeof(*policy));
@@ -117,25 +114,6 @@ void osc_index2policy(ldlm_policy_data_t *policy, const struct cl_object *obj,
policy->l_extent.end = cl_offset(obj, end + 1) - 1;
}
-static int osc_page_is_under_lock(const struct lu_env *env,
- const struct cl_page_slice *slice,
- struct cl_io *unused, pgoff_t *max_index)
-{
- struct osc_page *opg = cl2osc_page(slice);
- struct ldlm_lock *dlmlock;
- int result = -ENODATA;
-
- dlmlock = osc_dlmlock_at_pgoff(env, cl2osc(slice->cpl_obj),
- osc_index(opg), 1, 0);
- if (dlmlock) {
- *max_index = cl_index(slice->cpl_obj,
- dlmlock->l_policy_data.l_extent.end);
- LDLM_LOCK_PUT(dlmlock);
- result = 0;
- }
- return result;
-}
-
static const char *osc_list(struct list_head *head)
{
return list_empty(head) ? "-" : "+";
@@ -158,7 +136,7 @@ static int osc_page_print(const struct lu_env *env,
struct osc_object *obj = cl2osc(slice->cpl_obj);
struct client_obd *cli = &osc_export(obj)->exp_obd->u.cli;
- return (*printer)(env, cookie, LUSTRE_OSC_NAME "-page@%p %lu: 1< %#x %d %u %s %s > 2< %llu %u %u %#x %#x | %p %p %p > 3< %s %p %d %lu %d > 4< %d %d %d %lu %s | %s %s %s %s > 5< %s %s %s %s | %d %s | %d %s %s>\n",
+ return (*printer)(env, cookie, LUSTRE_OSC_NAME "-page@%p %lu: 1< %#x %d %u %s %s > 2< %llu %u %u %#x %#x | %p %p %p > 3< %d %lu %d > 4< %d %d %d %lu %s | %s %s %s %s > 5< %s %s %s %s | %d %s | %d %s %s>\n",
opg, osc_index(opg),
/* 1 */
oap->oap_magic, oap->oap_cmd,
@@ -170,8 +148,7 @@ static int osc_page_print(const struct lu_env *env,
oap->oap_async_flags, oap->oap_brw_flags,
oap->oap_request, oap->oap_cli, obj,
/* 3 */
- osc_list(&opg->ops_inflight),
- opg->ops_submitter, opg->ops_transfer_pinned,
+ opg->ops_transfer_pinned,
osc_submit_duration(opg), opg->ops_srvlock,
/* 4 */
cli->cl_r_in_flight, cli->cl_w_in_flight,
@@ -210,14 +187,6 @@ static void osc_page_delete(const struct lu_env *env,
LASSERT(0);
}
- spin_lock(&obj->oo_seatbelt);
- if (opg->ops_submitter) {
- LASSERT(!list_empty(&opg->ops_inflight));
- list_del_init(&opg->ops_inflight);
- opg->ops_submitter = NULL;
- }
- spin_unlock(&obj->oo_seatbelt);
-
osc_lru_del(osc_cli(obj), opg);
if (slice->cpl_page->cp_type == CPT_CACHEABLE) {
@@ -276,7 +245,6 @@ static int osc_page_flush(const struct lu_env *env,
static const struct cl_page_operations osc_page_ops = {
.cpo_print = osc_page_print,
.cpo_delete = osc_page_delete,
- .cpo_is_under_lock = osc_page_is_under_lock,
.cpo_clip = osc_page_clip,
.cpo_cancel = osc_page_cancel,
.cpo_flush = osc_page_flush
@@ -301,10 +269,6 @@ int osc_page_init(const struct lu_env *env, struct cl_object *obj,
cl_page_slice_add(page, &opg->ops_cl, obj, index,
&osc_page_ops);
}
- /* ops_inflight and ops_lru are the same field, but it doesn't
- * hurt to initialize it twice :-)
- */
- INIT_LIST_HEAD(&opg->ops_inflight);
INIT_LIST_HEAD(&opg->ops_lru);
/* reserve an LRU space for this page */
@@ -362,16 +326,27 @@ void osc_page_submit(const struct lu_env *env, struct osc_page *opg,
* OSC to free slots voluntarily to maintain a reasonable number of free slots
* at any time.
*/
-
static DECLARE_WAIT_QUEUE_HEAD(osc_lru_waitq);
-/* LRU pages are freed in batch mode. OSC should at least free this
- * number of pages to avoid running out of LRU budget, and..
+
+/**
+ * LRU pages are freed in batch mode. OSC should at least free this
+ * number of pages to avoid running out of LRU slots.
+ */
+static inline int lru_shrink_min(struct client_obd *cli)
+{
+ return cli->cl_max_pages_per_rpc * 2;
+}
+
+/**
+ * free this number at most otherwise it will take too long time to finish.
*/
-static const int lru_shrink_min = 2 << (20 - PAGE_SHIFT); /* 2M */
-/* free this number at most otherwise it will take too long time to finish. */
-static const int lru_shrink_max = 8 << (20 - PAGE_SHIFT); /* 8M */
+static inline int lru_shrink_max(struct client_obd *cli)
+{
+ return cli->cl_max_pages_per_rpc * cli->cl_max_rpcs_in_flight;
+}
-/* Check if we can free LRU slots from this OSC. If there exists LRU waiters,
+/**
+ * Check if we can free LRU slots from this OSC. If there exists LRU waiters,
* we should free slots aggressively. In this way, slots are freed in a steady
* step to maintain fairness among OSCs.
*
@@ -388,13 +363,20 @@ static int osc_cache_too_much(struct client_obd *cli)
/* if it's going to run out LRU slots, we should free some, but not
* too much to maintain fairness among OSCs.
*/
- if (atomic_long_read(cli->cl_lru_left) < cache->ccc_lru_max >> 4) {
+ if (atomic_long_read(cli->cl_lru_left) < cache->ccc_lru_max >> 2) {
if (pages >= budget)
- return lru_shrink_max;
+ return lru_shrink_max(cli);
else if (pages >= budget / 2)
- return lru_shrink_min;
- } else if (pages >= budget * 2) {
- return lru_shrink_min;
+ return lru_shrink_min(cli);
+ } else {
+ time64_t duration = ktime_get_real_seconds();
+
+ /* knock out pages by duration of no IO activity */
+ duration -= cli->cl_lru_last_used;
+ duration >>= 6; /* approximately 1 minute */
+ if (duration > 0 &&
+ pages >= div64_s64((s64)budget, duration))
+ return lru_shrink_min(cli);
}
return 0;
}
@@ -402,11 +384,21 @@ static int osc_cache_too_much(struct client_obd *cli)
int lru_queue_work(const struct lu_env *env, void *data)
{
struct client_obd *cli = data;
+ int count;
- CDEBUG(D_CACHE, "Run LRU work for client obd %p.\n", cli);
+ CDEBUG(D_CACHE, "%s: run LRU work for client obd\n", cli_name(cli));
- if (osc_cache_too_much(cli))
- osc_lru_shrink(env, cli, lru_shrink_max, true);
+ count = osc_cache_too_much(cli);
+ if (count > 0) {
+ int rc = osc_lru_shrink(env, cli, count, false);
+
+ CDEBUG(D_CACHE, "%s: shrank %d/%d pages from client obd\n",
+ cli_name(cli), rc, count);
+ if (rc >= count) {
+ CDEBUG(D_CACHE, "%s: queue again\n", cli_name(cli));
+ ptlrpcd_queue_work(cli->cl_lru_work);
+ }
+ }
return 0;
}
@@ -433,10 +425,10 @@ void osc_lru_add_batch(struct client_obd *cli, struct list_head *plist)
list_splice_tail(&lru, &cli->cl_lru_list);
atomic_long_sub(npages, &cli->cl_lru_busy);
atomic_long_add(npages, &cli->cl_lru_in_list);
+ cli->cl_lru_last_used = ktime_get_real_seconds();
spin_unlock(&cli->cl_lru_list_lock);
- /* XXX: May set force to be true for better performance */
- if (osc_cache_too_much(cli))
+ if (waitqueue_active(&osc_lru_waitq))
(void)ptlrpcd_queue_work(cli->cl_lru_work);
}
}
@@ -469,8 +461,10 @@ static void osc_lru_del(struct client_obd *cli, struct osc_page *opg)
* this osc occupies too many LRU pages and kernel is
* stealing one of them.
*/
- if (!memory_pressure_get())
+ if (osc_cache_too_much(cli)) {
+ CDEBUG(D_CACHE, "%s: queue LRU work\n", cli_name(cli));
(void)ptlrpcd_queue_work(cli->cl_lru_work);
+ }
wake_up(&osc_lru_waitq);
} else {
LASSERT(list_empty(&opg->ops_lru));
@@ -502,6 +496,7 @@ static void discard_pagevec(const struct lu_env *env, struct cl_io *io,
struct cl_page *page = pvec[i];
LASSERT(cl_page_is_owned(page, io));
+ cl_page_delete(env, page);
cl_page_discard(env, io, page);
cl_page_disown(env, io, page);
cl_page_put(env, page);
@@ -542,7 +537,6 @@ long osc_lru_shrink(const struct lu_env *env, struct client_obd *cli,
struct cl_object *clobj = NULL;
struct cl_page **pvec;
struct osc_page *opg;
- struct osc_page *temp;
int maxscan = 0;
long count = 0;
int index = 0;
@@ -552,6 +546,8 @@ long osc_lru_shrink(const struct lu_env *env, struct client_obd *cli,
if (atomic_long_read(&cli->cl_lru_in_list) == 0 || target <= 0)
return 0;
+ CDEBUG(D_CACHE, "%s: shrinkers: %d, force: %d\n",
+ cli_name(cli), atomic_read(&cli->cl_lru_shrinkers), force);
if (!force) {
if (atomic_read(&cli->cl_lru_shrinkers) > 0)
return -EBUSY;
@@ -568,14 +564,21 @@ long osc_lru_shrink(const struct lu_env *env, struct client_obd *cli,
io = &osc_env_info(env)->oti_io;
spin_lock(&cli->cl_lru_list_lock);
+ if (force)
+ cli->cl_lru_reclaim++;
maxscan = min(target << 1, atomic_long_read(&cli->cl_lru_in_list));
- list_for_each_entry_safe(opg, temp, &cli->cl_lru_list, ops_lru) {
+ while (!list_empty(&cli->cl_lru_list)) {
struct cl_page *page;
bool will_free = false;
+ if (!force && atomic_read(&cli->cl_lru_shrinkers) > 1)
+ break;
+
if (--maxscan < 0)
break;
+ opg = list_entry(cli->cl_lru_list.next, struct osc_page,
+ ops_lru);
page = opg->ops_cl.cpl_page;
if (lru_page_busy(cli, page)) {
list_move_tail(&opg->ops_lru, &cli->cl_lru_list);
@@ -662,34 +665,43 @@ long osc_lru_shrink(const struct lu_env *env, struct client_obd *cli,
return count > 0 ? count : rc;
}
-long osc_lru_reclaim(struct client_obd *cli)
+/**
+ * Reclaim LRU pages by an IO thread. The caller wants to reclaim at least
+ * \@npages of LRU slots. For performance consideration, it's better to drop
+ * LRU pages in batch. Therefore, the actual number is adjusted at least
+ * max_pages_per_rpc.
+ */
+long osc_lru_reclaim(struct client_obd *cli, unsigned long npages)
{
- struct cl_env_nest nest;
struct lu_env *env;
struct cl_client_cache *cache = cli->cl_cache;
int max_scans;
+ int refcheck;
long rc = 0;
LASSERT(cache);
- env = cl_env_nested_get(&nest);
+ env = cl_env_get(&refcheck);
if (IS_ERR(env))
return 0;
- rc = osc_lru_shrink(env, cli, osc_cache_too_much(cli), false);
- if (rc != 0) {
- if (rc == -EBUSY)
- rc = 0;
-
- CDEBUG(D_CACHE, "%s: Free %ld pages from own LRU: %p.\n",
- cli->cl_import->imp_obd->obd_name, rc, cli);
+ npages = max_t(int, npages, cli->cl_max_pages_per_rpc);
+ CDEBUG(D_CACHE, "%s: start to reclaim %ld pages from LRU\n",
+ cli_name(cli), npages);
+ rc = osc_lru_shrink(env, cli, npages, true);
+ if (rc >= npages) {
+ CDEBUG(D_CACHE, "%s: reclaimed %ld/%ld pages from LRU\n",
+ cli_name(cli), rc, npages);
+ if (osc_cache_too_much(cli) > 0)
+ ptlrpcd_queue_work(cli->cl_lru_work);
goto out;
+ } else if (rc > 0) {
+ npages -= rc;
}
- CDEBUG(D_CACHE, "%s: cli %p no free slots, pages: %ld, busy: %ld.\n",
- cli->cl_import->imp_obd->obd_name, cli,
- atomic_long_read(&cli->cl_lru_in_list),
- atomic_long_read(&cli->cl_lru_busy));
+ CDEBUG(D_CACHE, "%s: cli %p no free slots, pages: %ld/%ld, want: %ld\n",
+ cli_name(cli), cli, atomic_long_read(&cli->cl_lru_in_list),
+ atomic_long_read(&cli->cl_lru_busy), npages);
/* Reclaim LRU slots from other client_obd as it can't free enough
* from its own. This should rarely happen.
@@ -706,7 +718,7 @@ long osc_lru_reclaim(struct client_obd *cli)
cl_lru_osc);
CDEBUG(D_CACHE, "%s: cli %p LRU pages: %ld, busy: %ld.\n",
- cli->cl_import->imp_obd->obd_name, cli,
+ cli_name(cli), cli,
atomic_long_read(&cli->cl_lru_in_list),
atomic_long_read(&cli->cl_lru_busy));
@@ -714,19 +726,20 @@ long osc_lru_reclaim(struct client_obd *cli)
if (osc_cache_too_much(cli) > 0) {
spin_unlock(&cache->ccc_lru_lock);
- rc = osc_lru_shrink(env, cli, osc_cache_too_much(cli),
- true);
+ rc = osc_lru_shrink(env, cli, npages, true);
spin_lock(&cache->ccc_lru_lock);
- if (rc != 0)
+ if (rc >= npages)
break;
+ if (rc > 0)
+ npages -= rc;
}
}
spin_unlock(&cache->ccc_lru_lock);
out:
- cl_env_nested_put(&nest, env);
+ cl_env_put(env, &refcheck);
CDEBUG(D_CACHE, "%s: cli %p freed %ld pages.\n",
- cli->cl_import->imp_obd->obd_name, cli, rc);
+ cli_name(cli), cli, rc);
return rc;
}
@@ -756,7 +769,7 @@ static int osc_lru_reserve(const struct lu_env *env, struct osc_object *obj,
LASSERT(atomic_long_read(cli->cl_lru_left) >= 0);
while (!atomic_long_add_unless(cli->cl_lru_left, -1, 0)) {
/* run out of LRU spaces, try to drop some by itself */
- rc = osc_lru_reclaim(cli);
+ rc = osc_lru_reclaim(cli, 1);
if (rc < 0)
break;
if (rc > 0)
@@ -796,8 +809,10 @@ static inline void unstable_page_accounting(struct ptlrpc_bulk_desc *desc,
int count = 0;
int i;
+ LASSERT(ptlrpc_is_bulk_desc_kiov(desc->bd_type));
+
for (i = 0; i < page_count; i++) {
- pg_data_t *pgdat = page_pgdat(desc->bd_iov[i].bv_page);
+ pg_data_t *pgdat = page_pgdat(BD_GET_KIOV(desc, i).bv_page);
if (likely(pgdat == last)) {
++count;
@@ -857,7 +872,7 @@ void osc_dec_unstable_pages(struct ptlrpc_request *req)
if (!unstable_count)
wake_up_all(&cli->cl_cache->ccc_unstable_waitq);
- if (osc_cache_too_much(cli))
+ if (waitqueue_active(&osc_lru_waitq))
(void)ptlrpcd_queue_work(cli->cl_lru_work);
}
@@ -913,8 +928,7 @@ bool osc_over_unstable_soft_limit(struct client_obd *cli)
CDEBUG(D_CACHE,
"%s: cli: %p unstable pages: %lu, osc unstable pages: %lu\n",
- cli->cl_import->imp_obd->obd_name, cli,
- unstable_nr, osc_unstable_count);
+ cli_name(cli), cli, unstable_nr, osc_unstable_count);
/*
* If the LRU slots are in shortage - 25% remaining AND this OSC
diff --git a/drivers/staging/lustre/lustre/osc/osc_quota.c b/drivers/staging/lustre/lustre/osc/osc_quota.c
index 194d8ede40a2..fed4da63ee45 100644
--- a/drivers/staging/lustre/lustre/osc/osc_quota.c
+++ b/drivers/staging/lustre/lustre/osc/osc_quota.c
@@ -106,7 +106,7 @@ int osc_quota_setdq(struct client_obd *cli, const unsigned int qid[],
}
CDEBUG(D_QUOTA, "%s: setdq to insert for %s %d (%d)\n",
- cli->cl_import->imp_obd->obd_name,
+ cli_name(cli),
type == USRQUOTA ? "user" : "group",
qid[type], rc);
} else {
@@ -122,7 +122,7 @@ int osc_quota_setdq(struct client_obd *cli, const unsigned int qid[],
kmem_cache_free(osc_quota_kmem, oqi);
CDEBUG(D_QUOTA, "%s: setdq to remove for %s %d (%p)\n",
- cli->cl_import->imp_obd->obd_name,
+ cli_name(cli),
type == USRQUOTA ? "user" : "group",
qid[type], oqi);
}
@@ -134,8 +134,8 @@ int osc_quota_setdq(struct client_obd *cli, const unsigned int qid[],
/*
* Hash operations for uid/gid <-> osc_quota_info
*/
-static unsigned
-oqi_hashfn(struct cfs_hash *hs, const void *key, unsigned mask)
+static unsigned int
+oqi_hashfn(struct cfs_hash *hs, const void *key, unsigned int mask)
{
return cfs_hash_u32_hash(*((__u32 *)key), mask);
}
@@ -281,47 +281,3 @@ int osc_quotactl(struct obd_device *unused, struct obd_export *exp,
return rc;
}
-
-int osc_quotacheck(struct obd_device *unused, struct obd_export *exp,
- struct obd_quotactl *oqctl)
-{
- struct client_obd *cli = &exp->exp_obd->u.cli;
- struct ptlrpc_request *req;
- struct obd_quotactl *body;
- int rc;
-
- req = ptlrpc_request_alloc_pack(class_exp2cliimp(exp),
- &RQF_OST_QUOTACHECK, LUSTRE_OST_VERSION,
- OST_QUOTACHECK);
- if (!req)
- return -ENOMEM;
-
- body = req_capsule_client_get(&req->rq_pill, &RMF_OBD_QUOTACTL);
- *body = *oqctl;
-
- ptlrpc_request_set_replen(req);
-
- /* the next poll will find -ENODATA, that means quotacheck is going on
- */
- cli->cl_qchk_stat = -ENODATA;
- rc = ptlrpc_queue_wait(req);
- if (rc)
- cli->cl_qchk_stat = rc;
- ptlrpc_req_finished(req);
- return rc;
-}
-
-int osc_quota_poll_check(struct obd_export *exp, struct if_quotacheck *qchk)
-{
- struct client_obd *cli = &exp->exp_obd->u.cli;
- int rc;
-
- qchk->obd_uuid = cli->cl_target_uuid;
- memcpy(qchk->obd_type, LUSTRE_OST_NAME, strlen(LUSTRE_OST_NAME));
-
- rc = cli->cl_qchk_stat;
- /* the client is not the previous one */
- if (rc == CL_NOT_QUOTACHECKED)
- rc = -EINTR;
- return rc;
-}
diff --git a/drivers/staging/lustre/lustre/osc/osc_request.c b/drivers/staging/lustre/lustre/osc/osc_request.c
index 749781f022e2..7143564ae7e7 100644
--- a/drivers/staging/lustre/lustre/osc/osc_request.c
+++ b/drivers/staging/lustre/lustre/osc/osc_request.c
@@ -68,7 +68,6 @@ struct osc_brw_async_args {
struct client_obd *aa_cli;
struct list_head aa_oaps;
struct list_head aa_exts;
- struct cl_req *aa_clerq;
};
struct osc_async_args {
@@ -82,7 +81,8 @@ struct osc_setattr_args {
};
struct osc_fsync_args {
- struct obd_info *fa_oi;
+ struct osc_object *fa_obj;
+ struct obdo *fa_oa;
obd_enqueue_update_f fa_upcall;
void *fa_cookie;
};
@@ -103,140 +103,19 @@ static void osc_release_ppga(struct brw_page **ppga, u32 count);
static int brw_interpret(const struct lu_env *env,
struct ptlrpc_request *req, void *data, int rc);
-/* Unpack OSC object metadata from disk storage (LE byte order). */
-static int osc_unpackmd(struct obd_export *exp, struct lov_stripe_md **lsmp,
- struct lov_mds_md *lmm, int lmm_bytes)
-{
- int lsm_size;
- struct obd_import *imp = class_exp2cliimp(exp);
-
- if (lmm) {
- if (lmm_bytes < sizeof(*lmm)) {
- CERROR("%s: lov_mds_md too small: %d, need %d\n",
- exp->exp_obd->obd_name, lmm_bytes,
- (int)sizeof(*lmm));
- return -EINVAL;
- }
- /* XXX LOV_MAGIC etc check? */
-
- if (unlikely(ostid_id(&lmm->lmm_oi) == 0)) {
- CERROR("%s: zero lmm_object_id: rc = %d\n",
- exp->exp_obd->obd_name, -EINVAL);
- return -EINVAL;
- }
- }
-
- lsm_size = lov_stripe_md_size(1);
- if (!lsmp)
- return lsm_size;
-
- if (*lsmp && !lmm) {
- kfree((*lsmp)->lsm_oinfo[0]);
- kfree(*lsmp);
- *lsmp = NULL;
- return 0;
- }
-
- if (!*lsmp) {
- *lsmp = kzalloc(lsm_size, GFP_NOFS);
- if (unlikely(!*lsmp))
- return -ENOMEM;
- (*lsmp)->lsm_oinfo[0] = kzalloc(sizeof(struct lov_oinfo),
- GFP_NOFS);
- if (unlikely(!(*lsmp)->lsm_oinfo[0])) {
- kfree(*lsmp);
- return -ENOMEM;
- }
- loi_init((*lsmp)->lsm_oinfo[0]);
- } else if (unlikely(ostid_id(&(*lsmp)->lsm_oi) == 0)) {
- return -EBADF;
- }
-
- if (lmm)
- /* XXX zero *lsmp? */
- ostid_le_to_cpu(&lmm->lmm_oi, &(*lsmp)->lsm_oi);
-
- if (imp &&
- (imp->imp_connect_data.ocd_connect_flags & OBD_CONNECT_MAXBYTES))
- (*lsmp)->lsm_maxbytes = imp->imp_connect_data.ocd_maxbytes;
- else
- (*lsmp)->lsm_maxbytes = LUSTRE_EXT3_STRIPE_MAXBYTES;
-
- return lsm_size;
-}
-
static inline void osc_pack_req_body(struct ptlrpc_request *req,
- struct obd_info *oinfo)
+ struct obdo *oa)
{
struct ost_body *body;
body = req_capsule_client_get(&req->rq_pill, &RMF_OST_BODY);
LASSERT(body);
- lustre_set_wire_obdo(&req->rq_import->imp_connect_data, &body->oa,
- oinfo->oi_oa);
-}
-
-static int osc_getattr_interpret(const struct lu_env *env,
- struct ptlrpc_request *req,
- struct osc_async_args *aa, int rc)
-{
- struct ost_body *body;
-
- if (rc != 0)
- goto out;
-
- body = req_capsule_server_get(&req->rq_pill, &RMF_OST_BODY);
- if (body) {
- CDEBUG(D_INODE, "mode: %o\n", body->oa.o_mode);
- lustre_get_wire_obdo(&req->rq_import->imp_connect_data,
- aa->aa_oi->oi_oa, &body->oa);
-
- /* This should really be sent by the OST */
- aa->aa_oi->oi_oa->o_blksize = DT_MAX_BRW_SIZE;
- aa->aa_oi->oi_oa->o_valid |= OBD_MD_FLBLKSZ;
- } else {
- CDEBUG(D_INFO, "can't unpack ost_body\n");
- rc = -EPROTO;
- aa->aa_oi->oi_oa->o_valid = 0;
- }
-out:
- rc = aa->aa_oi->oi_cb_up(aa->aa_oi, rc);
- return rc;
-}
-
-static int osc_getattr_async(struct obd_export *exp, struct obd_info *oinfo,
- struct ptlrpc_request_set *set)
-{
- struct ptlrpc_request *req;
- struct osc_async_args *aa;
- int rc;
-
- req = ptlrpc_request_alloc(class_exp2cliimp(exp), &RQF_OST_GETATTR);
- if (!req)
- return -ENOMEM;
-
- rc = ptlrpc_request_pack(req, LUSTRE_OST_VERSION, OST_GETATTR);
- if (rc) {
- ptlrpc_request_free(req);
- return rc;
- }
-
- osc_pack_req_body(req, oinfo);
-
- ptlrpc_request_set_replen(req);
- req->rq_interpret_reply = (ptlrpc_interpterer_t)osc_getattr_interpret;
-
- CLASSERT(sizeof(*aa) <= sizeof(req->rq_async_args));
- aa = ptlrpc_req_async_args(req);
- aa->aa_oi = oinfo;
-
- ptlrpc_set_add_req(set, req);
- return 0;
+ lustre_set_wire_obdo(&req->rq_import->imp_connect_data, &body->oa, oa);
}
static int osc_getattr(const struct lu_env *env, struct obd_export *exp,
- struct obd_info *oinfo)
+ struct obdo *oa)
{
struct ptlrpc_request *req;
struct ost_body *body;
@@ -252,7 +131,7 @@ static int osc_getattr(const struct lu_env *env, struct obd_export *exp,
return rc;
}
- osc_pack_req_body(req, oinfo);
+ osc_pack_req_body(req, oa);
ptlrpc_request_set_replen(req);
@@ -267,11 +146,11 @@ static int osc_getattr(const struct lu_env *env, struct obd_export *exp,
}
CDEBUG(D_INODE, "mode: %o\n", body->oa.o_mode);
- lustre_get_wire_obdo(&req->rq_import->imp_connect_data, oinfo->oi_oa,
+ lustre_get_wire_obdo(&req->rq_import->imp_connect_data, oa,
&body->oa);
- oinfo->oi_oa->o_blksize = cli_brw_size(exp->exp_obd);
- oinfo->oi_oa->o_valid |= OBD_MD_FLBLKSZ;
+ oa->o_blksize = cli_brw_size(exp->exp_obd);
+ oa->o_valid |= OBD_MD_FLBLKSZ;
out:
ptlrpc_req_finished(req);
@@ -279,13 +158,13 @@ static int osc_getattr(const struct lu_env *env, struct obd_export *exp,
}
static int osc_setattr(const struct lu_env *env, struct obd_export *exp,
- struct obd_info *oinfo, struct obd_trans_info *oti)
+ struct obdo *oa)
{
struct ptlrpc_request *req;
struct ost_body *body;
int rc;
- LASSERT(oinfo->oi_oa->o_valid & OBD_MD_FLGROUP);
+ LASSERT(oa->o_valid & OBD_MD_FLGROUP);
req = ptlrpc_request_alloc(class_exp2cliimp(exp), &RQF_OST_SETATTR);
if (!req)
@@ -297,7 +176,7 @@ static int osc_setattr(const struct lu_env *env, struct obd_export *exp,
return rc;
}
- osc_pack_req_body(req, oinfo);
+ osc_pack_req_body(req, oa);
ptlrpc_request_set_replen(req);
@@ -311,7 +190,7 @@ static int osc_setattr(const struct lu_env *env, struct obd_export *exp,
goto out;
}
- lustre_get_wire_obdo(&req->rq_import->imp_connect_data, oinfo->oi_oa,
+ lustre_get_wire_obdo(&req->rq_import->imp_connect_data, oa,
&body->oa);
out:
@@ -341,10 +220,9 @@ out:
return rc;
}
-int osc_setattr_async_base(struct obd_export *exp, struct obd_info *oinfo,
- struct obd_trans_info *oti,
- obd_enqueue_update_f upcall, void *cookie,
- struct ptlrpc_request_set *rqset)
+int osc_setattr_async(struct obd_export *exp, struct obdo *oa,
+ obd_enqueue_update_f upcall, void *cookie,
+ struct ptlrpc_request_set *rqset)
{
struct ptlrpc_request *req;
struct osc_setattr_args *sa;
@@ -360,10 +238,7 @@ int osc_setattr_async_base(struct obd_export *exp, struct obd_info *oinfo,
return rc;
}
- if (oti && oinfo->oi_oa->o_valid & OBD_MD_FLCOOKIE)
- oinfo->oi_oa->o_lcookie = *oti->oti_logcookies;
-
- osc_pack_req_body(req, oinfo);
+ osc_pack_req_body(req, oa);
ptlrpc_request_set_replen(req);
@@ -377,7 +252,7 @@ int osc_setattr_async_base(struct obd_export *exp, struct obd_info *oinfo,
CLASSERT(sizeof(*sa) <= sizeof(req->rq_async_args));
sa = ptlrpc_req_async_args(req);
- sa->sa_oa = oinfo->oi_oa;
+ sa->sa_oa = oa;
sa->sa_upcall = upcall;
sa->sa_cookie = cookie;
@@ -390,16 +265,8 @@ int osc_setattr_async_base(struct obd_export *exp, struct obd_info *oinfo,
return 0;
}
-static int osc_setattr_async(struct obd_export *exp, struct obd_info *oinfo,
- struct obd_trans_info *oti,
- struct ptlrpc_request_set *rqset)
-{
- return osc_setattr_async_base(exp, oinfo, oti,
- oinfo->oi_cb_up, oinfo, rqset);
-}
-
static int osc_create(const struct lu_env *env, struct obd_export *exp,
- struct obdo *oa, struct obd_trans_info *oti)
+ struct obdo *oa)
{
struct ptlrpc_request *req;
struct ost_body *body;
@@ -428,15 +295,6 @@ static int osc_create(const struct lu_env *env, struct obd_export *exp,
ptlrpc_request_set_replen(req);
- if ((oa->o_valid & OBD_MD_FLFLAGS) &&
- oa->o_flags == OBD_FL_DELORPHAN) {
- DEBUG_REQ(D_HA, req,
- "delorphan from OST integration");
- /* Don't resend the delorphan req */
- req->rq_no_resend = 1;
- req->rq_no_delay = 1;
- }
-
rc = ptlrpc_queue_wait(req);
if (rc)
goto out_req;
@@ -453,12 +311,6 @@ static int osc_create(const struct lu_env *env, struct obd_export *exp,
oa->o_blksize = cli_brw_size(exp->exp_obd);
oa->o_valid |= OBD_MD_FLBLKSZ;
- if (oti && oa->o_valid & OBD_MD_FLCOOKIE) {
- if (!oti->oti_logcookies)
- oti->oti_logcookies = &oti->oti_onecookie;
- *oti->oti_logcookies = oa->o_lcookie;
- }
-
CDEBUG(D_HA, "transno: %lld\n",
lustre_msg_get_transno(req->rq_repmsg));
out_req:
@@ -467,7 +319,7 @@ out:
return rc;
}
-int osc_punch_base(struct obd_export *exp, struct obd_info *oinfo,
+int osc_punch_base(struct obd_export *exp, struct obdo *oa,
obd_enqueue_update_f upcall, void *cookie,
struct ptlrpc_request_set *rqset)
{
@@ -491,14 +343,14 @@ int osc_punch_base(struct obd_export *exp, struct obd_info *oinfo,
body = req_capsule_client_get(&req->rq_pill, &RMF_OST_BODY);
LASSERT(body);
lustre_set_wire_obdo(&req->rq_import->imp_connect_data, &body->oa,
- oinfo->oi_oa);
+ oa);
ptlrpc_request_set_replen(req);
req->rq_interpret_reply = (ptlrpc_interpterer_t)osc_setattr_interpret;
CLASSERT(sizeof(*sa) <= sizeof(req->rq_async_args));
sa = ptlrpc_req_async_args(req);
- sa->sa_oa = oinfo->oi_oa;
+ sa->sa_oa = oa;
sa->sa_upcall = upcall;
sa->sa_cookie = cookie;
if (rqset == PTLRPCD_SET)
@@ -513,8 +365,11 @@ static int osc_sync_interpret(const struct lu_env *env,
struct ptlrpc_request *req,
void *arg, int rc)
{
+ struct cl_attr *attr = &osc_env_info(env)->oti_attr;
struct osc_fsync_args *fa = arg;
+ unsigned long valid = 0;
struct ost_body *body;
+ struct cl_object *obj;
if (rc)
goto out;
@@ -526,16 +381,30 @@ static int osc_sync_interpret(const struct lu_env *env,
goto out;
}
- *fa->fa_oi->oi_oa = body->oa;
+ *fa->fa_oa = body->oa;
+ obj = osc2cl(fa->fa_obj);
+
+ /* Update osc object's blocks attribute */
+ cl_object_attr_lock(obj);
+ if (body->oa.o_valid & OBD_MD_FLBLOCKS) {
+ attr->cat_blocks = body->oa.o_blocks;
+ valid |= CAT_BLOCKS;
+ }
+
+ if (valid)
+ cl_object_attr_update(env, obj, attr, valid);
+ cl_object_attr_unlock(obj);
+
out:
rc = fa->fa_upcall(fa->fa_cookie, rc);
return rc;
}
-int osc_sync_base(struct obd_export *exp, struct obd_info *oinfo,
+int osc_sync_base(struct osc_object *obj, struct obdo *oa,
obd_enqueue_update_f upcall, void *cookie,
struct ptlrpc_request_set *rqset)
{
+ struct obd_export *exp = osc_export(obj);
struct ptlrpc_request *req;
struct ost_body *body;
struct osc_fsync_args *fa;
@@ -555,14 +424,15 @@ int osc_sync_base(struct obd_export *exp, struct obd_info *oinfo,
body = req_capsule_client_get(&req->rq_pill, &RMF_OST_BODY);
LASSERT(body);
lustre_set_wire_obdo(&req->rq_import->imp_connect_data, &body->oa,
- oinfo->oi_oa);
+ oa);
ptlrpc_request_set_replen(req);
req->rq_interpret_reply = osc_sync_interpret;
CLASSERT(sizeof(*fa) <= sizeof(req->rq_async_args));
fa = ptlrpc_req_async_args(req);
- fa->fa_oi = oinfo;
+ fa->fa_obj = obj;
+ fa->fa_oa = oa;
fa->fa_upcall = upcall;
fa->fa_cookie = cookie;
@@ -639,19 +509,8 @@ static int osc_can_send_destroy(struct client_obd *cli)
return 0;
}
-/* Destroy requests can be async always on the client, and we don't even really
- * care about the return code since the client cannot do anything at all about
- * a destroy failure.
- * When the MDS is unlinking a filename, it saves the file objects into a
- * recovery llog, and these object records are cancelled when the OST reports
- * they were destroyed and sync'd to disk (i.e. transaction committed).
- * If the client dies, or the OST is down when the object should be destroyed,
- * the records are not cancelled, and when the OST reconnects to the MDS next,
- * it will retrieve the llog unlink logs and then sends the log cancellation
- * cookies to the MDS after committing destroy transactions.
- */
static int osc_destroy(const struct lu_env *env, struct obd_export *exp,
- struct obdo *oa, struct obd_trans_info *oti)
+ struct obdo *oa)
{
struct client_obd *cli = &exp->exp_obd->u.cli;
struct ptlrpc_request *req;
@@ -683,32 +542,22 @@ static int osc_destroy(const struct lu_env *env, struct obd_export *exp,
req->rq_request_portal = OST_IO_PORTAL; /* bug 7198 */
ptlrpc_at_set_req_timeout(req);
- if (oti && oa->o_valid & OBD_MD_FLCOOKIE)
- oa->o_lcookie = *oti->oti_logcookies;
body = req_capsule_client_get(&req->rq_pill, &RMF_OST_BODY);
LASSERT(body);
lustre_set_wire_obdo(&req->rq_import->imp_connect_data, &body->oa, oa);
ptlrpc_request_set_replen(req);
- /* If osc_destroy is for destroying the unlink orphan,
- * sent from MDT to OST, which should not be blocked here,
- * because the process might be triggered by ptlrpcd, and
- * it is not good to block ptlrpcd thread (b=16006
- **/
- if (!(oa->o_flags & OBD_FL_DELORPHAN)) {
- req->rq_interpret_reply = osc_destroy_interpret;
- if (!osc_can_send_destroy(cli)) {
- struct l_wait_info lwi = LWI_INTR(LWI_ON_SIGNAL_NOOP,
- NULL);
-
- /*
- * Wait until the number of on-going destroy RPCs drops
- * under max_rpc_in_flight
- */
- l_wait_event_exclusive(cli->cl_destroy_waitq,
- osc_can_send_destroy(cli), &lwi);
- }
+ req->rq_interpret_reply = osc_destroy_interpret;
+ if (!osc_can_send_destroy(cli)) {
+ struct l_wait_info lwi = LWI_INTR(LWI_ON_SIGNAL_NOOP, NULL);
+
+ /*
+ * Wait until the number of on-going destroy RPCs drops
+ * under max_rpc_in_flight
+ */
+ l_wait_event_exclusive(cli->cl_destroy_waitq,
+ osc_can_send_destroy(cli), &lwi);
}
/* Do not wait for response */
@@ -734,14 +583,13 @@ static void osc_announce_cached(struct client_obd *cli, struct obdo *oa,
oa->o_undirty = 0;
} else if (unlikely(atomic_long_read(&obd_dirty_pages) -
atomic_long_read(&obd_dirty_transit_pages) >
- (obd_max_dirty_pages + 1))) {
+ (long)(obd_max_dirty_pages + 1))) {
/* The atomic_read() allowing the atomic_inc() are
* not covered by a lock thus they may safely race and trip
* this CERROR() unless we add in a small fudge factor (+1).
*/
- CERROR("%s: dirty %ld + %ld > system dirty_max %lu\n",
- cli->cl_import->imp_obd->obd_name,
- atomic_long_read(&obd_dirty_pages),
+ CERROR("%s: dirty %ld + %ld > system dirty_max %ld\n",
+ cli_name(cli), atomic_long_read(&obd_dirty_pages),
atomic_long_read(&obd_dirty_transit_pages),
obd_max_dirty_pages);
oa->o_undirty = 0;
@@ -936,12 +784,10 @@ static int osc_add_shrink_grant(struct client_obd *client)
osc_grant_shrink_grant_cb, NULL,
&client->cl_grant_shrink_list);
if (rc) {
- CERROR("add grant client %s error %d\n",
- client->cl_import->imp_obd->obd_name, rc);
+ CERROR("add grant client %s error %d\n", cli_name(client), rc);
return rc;
}
- CDEBUG(D_CACHE, "add grant client %s\n",
- client->cl_import->imp_obd->obd_name);
+ CDEBUG(D_CACHE, "add grant client %s\n", cli_name(client));
osc_update_next_shrink(client);
return 0;
}
@@ -970,23 +816,13 @@ static void osc_init_grant(struct client_obd *cli, struct obd_connect_data *ocd)
cli->cl_avail_grant = ocd->ocd_grant -
(cli->cl_dirty_pages << PAGE_SHIFT);
- if (cli->cl_avail_grant < 0) {
- CWARN("%s: available grant < 0: avail/ocd/dirty %ld/%u/%ld\n",
- cli->cl_import->imp_obd->obd_name, cli->cl_avail_grant,
- ocd->ocd_grant, cli->cl_dirty_pages << PAGE_SHIFT);
- /* workaround for servers which do not have the patch from
- * LU-2679
- */
- cli->cl_avail_grant = ocd->ocd_grant;
- }
-
/* determine the appropriate chunk size used by osc_extent. */
cli->cl_chunkbits = max_t(int, PAGE_SHIFT, ocd->ocd_blocksize);
spin_unlock(&cli->cl_loi_list_lock);
CDEBUG(D_CACHE, "%s, setting cl_avail_grant: %ld cl_lost_grant: %ld chunk bits: %d\n",
- cli->cl_import->imp_obd->obd_name,
- cli->cl_avail_grant, cli->cl_lost_grant, cli->cl_chunkbits);
+ cli_name(cli), cli->cl_avail_grant, cli->cl_lost_grant,
+ cli->cl_chunkbits);
if (ocd->ocd_connect_flags & OBD_CONNECT_GRANT_SHRINK &&
list_empty(&cli->cl_grant_shrink_list))
@@ -1072,9 +908,9 @@ static int check_write_rcs(struct ptlrpc_request *req,
static inline int can_merge_pages(struct brw_page *p1, struct brw_page *p2)
{
if (p1->flag != p2->flag) {
- unsigned mask = ~(OBD_BRW_FROM_GRANT | OBD_BRW_NOCACHE |
- OBD_BRW_SYNC | OBD_BRW_ASYNC |
- OBD_BRW_NOQUOTA | OBD_BRW_SOFT_SYNC);
+ unsigned int mask = ~(OBD_BRW_FROM_GRANT | OBD_BRW_NOCACHE |
+ OBD_BRW_SYNC | OBD_BRW_ASYNC |
+ OBD_BRW_NOQUOTA | OBD_BRW_SOFT_SYNC);
/* warn if we try to combine flags that we don't know to be
* safe to combine
@@ -1097,7 +933,6 @@ static u32 osc_checksum_bulk(int nob, u32 pg_count,
int i = 0;
struct cfs_crypto_hash_desc *hdesc;
unsigned int bufsize;
- int err;
unsigned char cfs_alg = cksum_obd2cfs(cksum_type);
LASSERT(pg_count > 0);
@@ -1139,7 +974,7 @@ static u32 osc_checksum_bulk(int nob, u32 pg_count,
}
bufsize = sizeof(cksum);
- err = cfs_crypto_hash_final(hdesc, (unsigned char *)&cksum, &bufsize);
+ cfs_crypto_hash_final(hdesc, (unsigned char *)&cksum, &bufsize);
/* For sending we only compute the wrong checksum instead
* of corrupting the data so it is still correct on a redo
@@ -1151,8 +986,7 @@ static u32 osc_checksum_bulk(int nob, u32 pg_count,
}
static int osc_brw_prep_request(int cmd, struct client_obd *cli,
- struct obdo *oa,
- struct lov_stripe_md *lsm, u32 page_count,
+ struct obdo *oa, u32 page_count,
struct brw_page **pga,
struct ptlrpc_request **reqp,
int reserve,
@@ -1210,8 +1044,9 @@ static int osc_brw_prep_request(int cmd, struct client_obd *cli,
desc = ptlrpc_prep_bulk_imp(req, page_count,
cli->cl_import->imp_connect_data.ocd_brw_size >> LNET_MTU_BITS,
- opc == OST_WRITE ? BULK_GET_SOURCE : BULK_PUT_SINK,
- OST_BULK_PORTAL);
+ (opc == OST_WRITE ? PTLRPC_BULK_GET_SOURCE :
+ PTLRPC_BULK_PUT_SINK) | PTLRPC_BULK_BUF_KIOV, OST_BULK_PORTAL,
+ &ptlrpc_bulk_kiov_pin_ops);
if (!desc) {
rc = -ENOMEM;
@@ -1259,7 +1094,7 @@ static int osc_brw_prep_request(int cmd, struct client_obd *cli,
LASSERT((pga[0]->flag & OBD_BRW_SRVLOCK) ==
(pg->flag & OBD_BRW_SRVLOCK));
- ptlrpc_prep_bulk_page_pin(desc, pg->pg, poff, pg->count);
+ desc->bd_frag_ops->add_kiov_frag(desc, pg->pg, poff, pg->count);
requested_nob += pg->count;
if (i > 0 && can_merge_pages(pg_prev, pg)) {
@@ -1569,7 +1404,6 @@ static int osc_brw_redo_request(struct ptlrpc_request *request,
rc = osc_brw_prep_request(lustre_msg_get_opc(request->rq_reqmsg) ==
OST_WRITE ? OBD_BRW_WRITE : OBD_BRW_READ,
aa->aa_cli, aa->aa_oa,
- NULL /* lsm unused by osc currently */,
aa->aa_page_count, aa->aa_ppga,
&new_req, 0, 1);
if (rc)
@@ -1764,8 +1598,6 @@ static int brw_interpret(const struct lu_env *env,
LASSERT(list_empty(&aa->aa_exts));
LASSERT(list_empty(&aa->aa_oaps));
- cl_req_completion(env, aa->aa_clerq, rc < 0 ? rc :
- req->rq_bulk->bd_nob_transferred);
osc_release_ppga(aa->aa_ppga, aa->aa_page_count);
ptlrpc_lprocfs_brw(req, req->rq_bulk->bd_nob_transferred);
@@ -1818,9 +1650,7 @@ int osc_build_rpc(const struct lu_env *env, struct client_obd *cli,
struct osc_brw_async_args *aa = NULL;
struct obdo *oa = NULL;
struct osc_async_page *oap;
- struct osc_async_page *tmp;
- struct cl_req *clerq = NULL;
- enum cl_req_type crt = (cmd & OBD_BRW_WRITE) ? CRT_WRITE : CRT_READ;
+ struct osc_object *obj = NULL;
struct cl_req_attr *crattr = NULL;
u64 starting_offset = OBD_OBJECT_EOF;
u64 ending_offset = 0;
@@ -1828,6 +1658,7 @@ int osc_build_rpc(const struct lu_env *env, struct client_obd *cli,
int mem_tight = 0;
int page_count = 0;
bool soft_sync = false;
+ bool interrupted = false;
int i;
int rc;
struct ost_body *body;
@@ -1839,32 +1670,15 @@ int osc_build_rpc(const struct lu_env *env, struct client_obd *cli,
list_for_each_entry(ext, ext_list, oe_link) {
LASSERT(ext->oe_state == OES_RPC);
mem_tight |= ext->oe_memalloc;
- list_for_each_entry(oap, &ext->oe_pages, oap_pending_item) {
- ++page_count;
- list_add_tail(&oap->oap_rpc_item, &rpc_list);
- if (starting_offset > oap->oap_obj_off)
- starting_offset = oap->oap_obj_off;
- else
- LASSERT(oap->oap_page_off == 0);
- if (ending_offset < oap->oap_obj_off + oap->oap_count)
- ending_offset = oap->oap_obj_off +
- oap->oap_count;
- else
- LASSERT(oap->oap_page_off + oap->oap_count ==
- PAGE_SIZE);
- }
+ page_count += ext->oe_nr_pages;
+ if (!obj)
+ obj = ext->oe_obj;
}
soft_sync = osc_over_unstable_soft_limit(cli);
if (mem_tight)
mpflag = cfs_memory_pressure_get_and_set();
- crattr = kzalloc(sizeof(*crattr), GFP_NOFS);
- if (!crattr) {
- rc = -ENOMEM;
- goto out;
- }
-
pga = kcalloc(page_count, sizeof(*pga), GFP_NOFS);
if (!pga) {
rc = -ENOMEM;
@@ -1878,44 +1692,46 @@ int osc_build_rpc(const struct lu_env *env, struct client_obd *cli,
}
i = 0;
- list_for_each_entry(oap, &rpc_list, oap_rpc_item) {
- struct cl_page *page = oap2cl_page(oap);
-
- if (!clerq) {
- clerq = cl_req_alloc(env, page, crt,
- 1 /* only 1-object rpcs for now */);
- if (IS_ERR(clerq)) {
- rc = PTR_ERR(clerq);
- goto out;
- }
+ list_for_each_entry(ext, ext_list, oe_link) {
+ list_for_each_entry(oap, &ext->oe_pages, oap_pending_item) {
+ if (mem_tight)
+ oap->oap_brw_flags |= OBD_BRW_MEMALLOC;
+ if (soft_sync)
+ oap->oap_brw_flags |= OBD_BRW_SOFT_SYNC;
+ pga[i] = &oap->oap_brw_page;
+ pga[i]->off = oap->oap_obj_off + oap->oap_page_off;
+ i++;
+
+ list_add_tail(&oap->oap_rpc_item, &rpc_list);
+ if (starting_offset == OBD_OBJECT_EOF ||
+ starting_offset > oap->oap_obj_off)
+ starting_offset = oap->oap_obj_off;
+ else
+ LASSERT(!oap->oap_page_off);
+ if (ending_offset < oap->oap_obj_off + oap->oap_count)
+ ending_offset = oap->oap_obj_off +
+ oap->oap_count;
+ else
+ LASSERT(oap->oap_page_off + oap->oap_count ==
+ PAGE_SIZE);
+ if (oap->oap_interrupted)
+ interrupted = true;
}
- if (mem_tight)
- oap->oap_brw_flags |= OBD_BRW_MEMALLOC;
- if (soft_sync)
- oap->oap_brw_flags |= OBD_BRW_SOFT_SYNC;
- pga[i] = &oap->oap_brw_page;
- pga[i]->off = oap->oap_obj_off + oap->oap_page_off;
- CDEBUG(0, "put page %p index %lu oap %p flg %x to pga\n",
- pga[i]->pg, oap->oap_page->index, oap,
- pga[i]->flag);
- i++;
- cl_req_page_add(env, clerq, page);
}
- /* always get the data for the obdo for the rpc */
- LASSERT(clerq);
- crattr->cra_oa = oa;
- cl_req_attr_set(env, clerq, crattr, ~0ULL);
+ /* first page in the list */
+ oap = list_entry(rpc_list.next, typeof(*oap), oap_rpc_item);
- rc = cl_req_prep(env, clerq);
- if (rc != 0) {
- CERROR("cl_req_prep failed: %d\n", rc);
- goto out;
- }
+ crattr = &osc_env_info(env)->oti_req_attr;
+ memset(crattr, 0, sizeof(*crattr));
+ crattr->cra_type = (cmd & OBD_BRW_WRITE) ? CRT_WRITE : CRT_READ;
+ crattr->cra_flags = ~0ULL;
+ crattr->cra_page = oap2cl_page(oap);
+ crattr->cra_oa = oa;
+ cl_req_attr_set(env, osc2cl(obj), crattr);
sort_brw_pages(pga, page_count);
- rc = osc_brw_prep_request(cmd, cli, oa, NULL, page_count,
- pga, &req, 1, 0);
+ rc = osc_brw_prep_request(cmd, cli, oa, page_count, pga, &req, 1, 0);
if (rc != 0) {
CERROR("prep_req failed: %d\n", rc);
goto out;
@@ -1924,8 +1740,10 @@ int osc_build_rpc(const struct lu_env *env, struct client_obd *cli,
req->rq_commit_cb = brw_commit;
req->rq_interpret_reply = brw_interpret;
- if (mem_tight != 0)
- req->rq_memalloc = 1;
+ req->rq_memalloc = mem_tight != 0;
+ oap->oap_request = ptlrpc_request_addref(req);
+ if (interrupted && !req->rq_intr)
+ ptlrpc_mark_interrupted(req);
/* Need to update the timestamps after the request is built in case
* we race with setattr (locally or in queue at OST). If OST gets
@@ -1935,9 +1753,8 @@ int osc_build_rpc(const struct lu_env *env, struct client_obd *cli,
*/
body = req_capsule_client_get(&req->rq_pill, &RMF_OST_BODY);
crattr->cra_oa = &body->oa;
- cl_req_attr_set(env, clerq, crattr,
- OBD_MD_FLMTIME | OBD_MD_FLCTIME | OBD_MD_FLATIME);
-
+ crattr->cra_flags = OBD_MD_FLMTIME | OBD_MD_FLCTIME | OBD_MD_FLATIME;
+ cl_req_attr_set(env, osc2cl(obj), crattr);
lustre_msg_set_jobid(req->rq_reqmsg, crattr->cra_jobid);
CLASSERT(sizeof(*aa) <= sizeof(req->rq_async_args));
@@ -1946,24 +1763,6 @@ int osc_build_rpc(const struct lu_env *env, struct client_obd *cli,
list_splice_init(&rpc_list, &aa->aa_oaps);
INIT_LIST_HEAD(&aa->aa_exts);
list_splice_init(ext_list, &aa->aa_exts);
- aa->aa_clerq = clerq;
-
- /* queued sync pages can be torn down while the pages
- * were between the pending list and the rpc
- */
- tmp = NULL;
- list_for_each_entry(oap, &aa->aa_oaps, oap_rpc_item) {
- /* only one oap gets a request reference */
- if (!tmp)
- tmp = oap;
- if (oap->oap_interrupted && !req->rq_intr) {
- CDEBUG(D_INODE, "oap %p in req %p interrupted\n",
- oap, req);
- ptlrpc_mark_interrupted(req);
- }
- }
- if (tmp)
- tmp->oap_request = ptlrpc_request_addref(req);
spin_lock(&cli->cl_loi_list_lock);
starting_offset >>= PAGE_SHIFT;
@@ -1985,6 +1784,7 @@ int osc_build_rpc(const struct lu_env *env, struct client_obd *cli,
DEBUG_REQ(D_INODE, req, "%d pages, aa %p. now %ur/%dw in flight",
page_count, aa, cli->cl_r_in_flight,
cli->cl_w_in_flight);
+ OBD_FAIL_TIMEOUT(OBD_FAIL_OSC_DELAY_IO, cfs_fail_val);
ptlrpcd_add_req(req);
rc = 0;
@@ -1993,8 +1793,6 @@ out:
if (mem_tight != 0)
cfs_memory_pressure_restore(mpflag);
- kfree(crattr);
-
if (rc != 0) {
LASSERT(!req);
@@ -2010,22 +1808,15 @@ out:
list_del_init(&ext->oe_link);
osc_extent_finish(env, ext, 0, rc);
}
- if (clerq && !IS_ERR(clerq))
- cl_req_completion(env, clerq, rc);
}
return rc;
}
-static int osc_set_lock_data_with_check(struct ldlm_lock *lock,
- struct ldlm_enqueue_info *einfo)
+static int osc_set_lock_data(struct ldlm_lock *lock, void *data)
{
- void *data = einfo->ei_cbdata;
int set = 0;
- LASSERT(lock->l_blocking_ast == einfo->ei_cb_bl);
- LASSERT(lock->l_resource->lr_type == einfo->ei_type);
- LASSERT(lock->l_completion_ast == einfo->ei_cb_cp);
- LASSERT(lock->l_glimpse_ast == einfo->ei_cb_gl);
+ LASSERT(lock);
lock_res_and_lock(lock);
@@ -2039,21 +1830,6 @@ static int osc_set_lock_data_with_check(struct ldlm_lock *lock,
return set;
}
-static int osc_set_data_with_check(struct lustre_handle *lockh,
- struct ldlm_enqueue_info *einfo)
-{
- struct ldlm_lock *lock = ldlm_handle2lock(lockh);
- int set = 0;
-
- if (lock) {
- set = osc_set_lock_data_with_check(lock, einfo);
- LDLM_LOCK_PUT(lock);
- } else
- CERROR("lockh %p, data %p - client evicted?\n",
- lockh, einfo->ei_cbdata);
- return set;
-}
-
static int osc_enqueue_fini(struct ptlrpc_request *req,
osc_enqueue_upcall_f upcall, void *cookie,
struct lustre_handle *lockh, enum ldlm_mode mode,
@@ -2153,7 +1929,7 @@ struct ptlrpc_request_set *PTLRPCD_SET = (void *)1;
* release locks just after they are obtained.
*/
int osc_enqueue_base(struct obd_export *exp, struct ldlm_res_id *res_id,
- __u64 *flags, ldlm_policy_data_t *policy,
+ __u64 *flags, union ldlm_policy_data *policy,
struct ost_lvb *lvb, int kms_valid,
osc_enqueue_upcall_f upcall, void *cookie,
struct ldlm_enqueue_info *einfo,
@@ -2219,7 +1995,7 @@ int osc_enqueue_base(struct obd_export *exp, struct ldlm_res_id *res_id,
ldlm_lock_decref(&lockh, mode);
LDLM_LOCK_PUT(matched);
return -ECANCELED;
- } else if (osc_set_lock_data_with_check(matched, einfo)) {
+ } else if (osc_set_lock_data(matched, einfo->ei_cbdata)) {
*flags |= LDLM_FL_LVB_READY;
/* We already have a lock, and it's referenced. */
(*upcall)(cookie, &lockh, ELDLM_LOCK_MATCHED);
@@ -2304,7 +2080,7 @@ no_match:
}
int osc_match_base(struct obd_export *exp, struct ldlm_res_id *res_id,
- __u32 type, ldlm_policy_data_t *policy, __u32 mode,
+ __u32 type, union ldlm_policy_data *policy, __u32 mode,
__u64 *flags, void *data, struct lustre_handle *lockh,
int unref)
{
@@ -2331,31 +2107,20 @@ int osc_match_base(struct obd_export *exp, struct ldlm_res_id *res_id,
rc |= LCK_PW;
rc = ldlm_lock_match(obd->obd_namespace, lflags,
res_id, type, policy, rc, lockh, unref);
- if (rc) {
- if (data) {
- if (!osc_set_data_with_check(lockh, data)) {
- if (!(lflags & LDLM_FL_TEST_LOCK))
- ldlm_lock_decref(lockh, rc);
- return 0;
- }
- }
- if (!(lflags & LDLM_FL_TEST_LOCK) && mode != rc) {
- ldlm_lock_addref(lockh, LCK_PR);
- ldlm_lock_decref(lockh, LCK_PW);
- }
+ if (!rc || lflags & LDLM_FL_TEST_LOCK)
return rc;
- }
- return rc;
-}
-int osc_cancel_base(struct lustre_handle *lockh, __u32 mode)
-{
- if (unlikely(mode == LCK_GROUP))
- ldlm_lock_decref_and_cancel(lockh, mode);
- else
- ldlm_lock_decref(lockh, mode);
+ if (data) {
+ struct ldlm_lock *lock = ldlm_handle2lock(lockh);
- return 0;
+ LASSERT(lock);
+ if (!osc_set_lock_data(lock, data)) {
+ ldlm_lock_decref(lockh, rc);
+ rc = 0;
+ }
+ LDLM_LOCK_PUT(lock);
+ }
+ return rc;
}
static int osc_statfs_interpret(const struct lu_env *env,
@@ -2526,9 +2291,6 @@ static int osc_iocontrol(unsigned int cmd, struct obd_export *exp, int len,
err = ptlrpc_set_import_active(obd->u.cli.cl_import,
data->ioc_offset);
goto out;
- case OBD_IOC_POLL_QUOTACHECK:
- err = osc_quota_poll_check(exp, karg);
- goto out;
case OBD_IOC_PING_TARGET:
err = ptlrpc_obd_ping(obd);
goto out;
@@ -2543,103 +2305,6 @@ out:
return err;
}
-static int osc_get_info(const struct lu_env *env, struct obd_export *exp,
- u32 keylen, void *key, __u32 *vallen, void *val,
- struct lov_stripe_md *lsm)
-{
- if (!vallen || !val)
- return -EFAULT;
-
- if (KEY_IS(KEY_FIEMAP)) {
- struct ll_fiemap_info_key *fm_key = key;
- struct ldlm_res_id res_id;
- ldlm_policy_data_t policy;
- struct lustre_handle lockh;
- enum ldlm_mode mode = 0;
- struct ptlrpc_request *req;
- struct ll_user_fiemap *reply;
- char *tmp;
- int rc;
-
- if (!(fm_key->fiemap.fm_flags & FIEMAP_FLAG_SYNC))
- goto skip_locking;
-
- policy.l_extent.start = fm_key->fiemap.fm_start &
- PAGE_MASK;
-
- if (OBD_OBJECT_EOF - fm_key->fiemap.fm_length <=
- fm_key->fiemap.fm_start + PAGE_SIZE - 1)
- policy.l_extent.end = OBD_OBJECT_EOF;
- else
- policy.l_extent.end = (fm_key->fiemap.fm_start +
- fm_key->fiemap.fm_length +
- PAGE_SIZE - 1) & PAGE_MASK;
-
- ostid_build_res_name(&fm_key->oa.o_oi, &res_id);
- mode = ldlm_lock_match(exp->exp_obd->obd_namespace,
- LDLM_FL_BLOCK_GRANTED |
- LDLM_FL_LVB_READY,
- &res_id, LDLM_EXTENT, &policy,
- LCK_PR | LCK_PW, &lockh, 0);
- if (mode) { /* lock is cached on client */
- if (mode != LCK_PR) {
- ldlm_lock_addref(&lockh, LCK_PR);
- ldlm_lock_decref(&lockh, LCK_PW);
- }
- } else { /* no cached lock, needs acquire lock on server side */
- fm_key->oa.o_valid |= OBD_MD_FLFLAGS;
- fm_key->oa.o_flags |= OBD_FL_SRVLOCK;
- }
-
-skip_locking:
- req = ptlrpc_request_alloc(class_exp2cliimp(exp),
- &RQF_OST_GET_INFO_FIEMAP);
- if (!req) {
- rc = -ENOMEM;
- goto drop_lock;
- }
-
- req_capsule_set_size(&req->rq_pill, &RMF_FIEMAP_KEY,
- RCL_CLIENT, keylen);
- req_capsule_set_size(&req->rq_pill, &RMF_FIEMAP_VAL,
- RCL_CLIENT, *vallen);
- req_capsule_set_size(&req->rq_pill, &RMF_FIEMAP_VAL,
- RCL_SERVER, *vallen);
-
- rc = ptlrpc_request_pack(req, LUSTRE_OST_VERSION, OST_GET_INFO);
- if (rc) {
- ptlrpc_request_free(req);
- goto drop_lock;
- }
-
- tmp = req_capsule_client_get(&req->rq_pill, &RMF_FIEMAP_KEY);
- memcpy(tmp, key, keylen);
- tmp = req_capsule_client_get(&req->rq_pill, &RMF_FIEMAP_VAL);
- memcpy(tmp, val, *vallen);
-
- ptlrpc_request_set_replen(req);
- rc = ptlrpc_queue_wait(req);
- if (rc)
- goto fini_req;
-
- reply = req_capsule_server_get(&req->rq_pill, &RMF_FIEMAP_VAL);
- if (!reply) {
- rc = -EPROTO;
- goto fini_req;
- }
-
- memcpy(val, reply, *vallen);
-fini_req:
- ptlrpc_req_finished(req);
-drop_lock:
- if (mode)
- ldlm_lock_decref(&lockh, LCK_PR);
- return rc;
- }
-
- return -EINVAL;
-}
-
static int osc_set_info_async(const struct lu_env *env, struct obd_export *exp,
u32 keylen, void *key, u32 vallen,
void *val, struct ptlrpc_request_set *set)
@@ -2999,47 +2664,33 @@ out_ptlrpcd:
return rc;
}
-static int osc_precleanup(struct obd_device *obd, enum obd_cleanup_stage stage)
+static int osc_precleanup(struct obd_device *obd)
{
- switch (stage) {
- case OBD_CLEANUP_EARLY: {
- struct obd_import *imp;
-
- imp = obd->u.cli.cl_import;
- CDEBUG(D_HA, "Deactivating import %s\n", obd->obd_name);
- /* ptlrpc_abort_inflight to stop an mds_lov_synchronize */
- ptlrpc_deactivate_import(imp);
- spin_lock(&imp->imp_lock);
- imp->imp_pingable = 0;
- spin_unlock(&imp->imp_lock);
- break;
+ struct client_obd *cli = &obd->u.cli;
+
+ /* LU-464
+ * for echo client, export may be on zombie list, wait for
+ * zombie thread to cull it, because cli.cl_import will be
+ * cleared in client_disconnect_export():
+ * class_export_destroy() -> obd_cleanup() ->
+ * echo_device_free() -> echo_client_cleanup() ->
+ * obd_disconnect() -> osc_disconnect() ->
+ * client_disconnect_export()
+ */
+ obd_zombie_barrier();
+ if (cli->cl_writeback_work) {
+ ptlrpcd_destroy_work(cli->cl_writeback_work);
+ cli->cl_writeback_work = NULL;
}
- case OBD_CLEANUP_EXPORTS: {
- struct client_obd *cli = &obd->u.cli;
- /* LU-464
- * for echo client, export may be on zombie list, wait for
- * zombie thread to cull it, because cli.cl_import will be
- * cleared in client_disconnect_export():
- * class_export_destroy() -> obd_cleanup() ->
- * echo_device_free() -> echo_client_cleanup() ->
- * obd_disconnect() -> osc_disconnect() ->
- * client_disconnect_export()
- */
- obd_zombie_barrier();
- if (cli->cl_writeback_work) {
- ptlrpcd_destroy_work(cli->cl_writeback_work);
- cli->cl_writeback_work = NULL;
- }
- if (cli->cl_lru_work) {
- ptlrpcd_destroy_work(cli->cl_lru_work);
- cli->cl_lru_work = NULL;
- }
- obd_cleanup_client_import(obd);
- ptlrpc_lprocfs_unregister_obd(obd);
- lprocfs_obd_cleanup(obd);
- break;
- }
+
+ if (cli->cl_lru_work) {
+ ptlrpcd_destroy_work(cli->cl_lru_work);
+ cli->cl_lru_work = NULL;
}
+
+ obd_cleanup_client_import(obd);
+ ptlrpc_lprocfs_unregister_obd(obd);
+ lprocfs_obd_cleanup(obd);
return 0;
}
@@ -3104,24 +2755,18 @@ static struct obd_ops osc_obd_ops = {
.disconnect = osc_disconnect,
.statfs = osc_statfs,
.statfs_async = osc_statfs_async,
- .unpackmd = osc_unpackmd,
.create = osc_create,
.destroy = osc_destroy,
.getattr = osc_getattr,
- .getattr_async = osc_getattr_async,
.setattr = osc_setattr,
- .setattr_async = osc_setattr_async,
.iocontrol = osc_iocontrol,
- .get_info = osc_get_info,
.set_info_async = osc_set_info_async,
.import_event = osc_import_event,
.process_config = osc_process_config,
.quotactl = osc_quotactl,
- .quotacheck = osc_quotacheck,
};
extern struct lu_kmem_descr osc_caches[];
-extern struct lock_class_key osc_ast_guard_class;
static int __init osc_init(void)
{
diff --git a/drivers/staging/lustre/lustre/ptlrpc/client.c b/drivers/staging/lustre/lustre/ptlrpc/client.c
index 8c51d51a678b..804741362bc0 100644
--- a/drivers/staging/lustre/lustre/ptlrpc/client.c
+++ b/drivers/staging/lustre/lustre/ptlrpc/client.c
@@ -43,6 +43,18 @@
#include "ptlrpc_internal.h"
+const struct ptlrpc_bulk_frag_ops ptlrpc_bulk_kiov_pin_ops = {
+ .add_kiov_frag = ptlrpc_prep_bulk_page_pin,
+ .release_frags = ptlrpc_release_bulk_page_pin,
+};
+EXPORT_SYMBOL(ptlrpc_bulk_kiov_pin_ops);
+
+const struct ptlrpc_bulk_frag_ops ptlrpc_bulk_kiov_nopin_ops = {
+ .add_kiov_frag = ptlrpc_prep_bulk_page_nopin,
+ .release_frags = NULL,
+};
+EXPORT_SYMBOL(ptlrpc_bulk_kiov_nopin_ops);
+
static int ptlrpc_send_new_req(struct ptlrpc_request *req);
static int ptlrpcd_check_work(struct ptlrpc_request *req);
static int ptlrpc_unregister_reply(struct ptlrpc_request *request, int async);
@@ -95,24 +107,43 @@ struct ptlrpc_connection *ptlrpc_uuid_to_connection(struct obd_uuid *uuid)
* Allocate and initialize new bulk descriptor on the sender.
* Returns pointer to the descriptor or NULL on error.
*/
-struct ptlrpc_bulk_desc *ptlrpc_new_bulk(unsigned npages, unsigned max_brw,
- unsigned type, unsigned portal)
+struct ptlrpc_bulk_desc *ptlrpc_new_bulk(unsigned int nfrags,
+ unsigned int max_brw,
+ enum ptlrpc_bulk_op_type type,
+ unsigned int portal,
+ const struct ptlrpc_bulk_frag_ops *ops)
{
struct ptlrpc_bulk_desc *desc;
int i;
- desc = kzalloc(offsetof(struct ptlrpc_bulk_desc, bd_iov[npages]),
- GFP_NOFS);
+ /* ensure that only one of KIOV or IOVEC is set but not both */
+ LASSERT((ptlrpc_is_bulk_desc_kiov(type) && ops->add_kiov_frag) ||
+ (ptlrpc_is_bulk_desc_kvec(type) && ops->add_iov_frag));
+
+ desc = kzalloc(sizeof(*desc), GFP_NOFS);
if (!desc)
return NULL;
+ if (type & PTLRPC_BULK_BUF_KIOV) {
+ GET_KIOV(desc) = kcalloc(nfrags, sizeof(*GET_KIOV(desc)),
+ GFP_NOFS);
+ if (!GET_KIOV(desc))
+ goto free_desc;
+ } else {
+ GET_KVEC(desc) = kcalloc(nfrags, sizeof(*GET_KVEC(desc)),
+ GFP_NOFS);
+ if (!GET_KVEC(desc))
+ goto free_desc;
+ }
+
spin_lock_init(&desc->bd_lock);
init_waitqueue_head(&desc->bd_waitq);
- desc->bd_max_iov = npages;
+ desc->bd_max_iov = nfrags;
desc->bd_iov_count = 0;
desc->bd_portal = portal;
desc->bd_type = type;
desc->bd_md_count = 0;
+ desc->bd_frag_ops = (struct ptlrpc_bulk_frag_ops *)ops;
LASSERT(max_brw > 0);
desc->bd_md_max_brw = min(max_brw, PTLRPC_BULK_OPS_COUNT);
/*
@@ -123,24 +154,31 @@ struct ptlrpc_bulk_desc *ptlrpc_new_bulk(unsigned npages, unsigned max_brw,
LNetInvalidateHandle(&desc->bd_mds[i]);
return desc;
+free_desc:
+ kfree(desc);
+ return NULL;
}
/**
* Prepare bulk descriptor for specified outgoing request \a req that
- * can fit \a npages * pages. \a type is bulk type. \a portal is where
+ * can fit \a nfrags * pages. \a type is bulk type. \a portal is where
* the bulk to be sent. Used on client-side.
* Returns pointer to newly allocated initialized bulk descriptor or NULL on
* error.
*/
struct ptlrpc_bulk_desc *ptlrpc_prep_bulk_imp(struct ptlrpc_request *req,
- unsigned npages, unsigned max_brw,
- unsigned type, unsigned portal)
+ unsigned int nfrags,
+ unsigned int max_brw,
+ unsigned int type,
+ unsigned int portal,
+ const struct ptlrpc_bulk_frag_ops *ops)
{
struct obd_import *imp = req->rq_import;
struct ptlrpc_bulk_desc *desc;
- LASSERT(type == BULK_PUT_SINK || type == BULK_GET_SOURCE);
- desc = ptlrpc_new_bulk(npages, max_brw, type, portal);
+ LASSERT(ptlrpc_is_bulk_op_passive(type));
+
+ desc = ptlrpc_new_bulk(nfrags, max_brw, type, portal, ops);
if (!desc)
return NULL;
@@ -158,56 +196,82 @@ struct ptlrpc_bulk_desc *ptlrpc_prep_bulk_imp(struct ptlrpc_request *req,
}
EXPORT_SYMBOL(ptlrpc_prep_bulk_imp);
-/**
- * Add a page \a page to the bulk descriptor \a desc.
- * Data to transfer in the page starts at offset \a pageoffset and
- * amount of data to transfer from the page is \a len
- */
void __ptlrpc_prep_bulk_page(struct ptlrpc_bulk_desc *desc,
struct page *page, int pageoffset, int len, int pin)
{
+ struct bio_vec *kiov;
+
LASSERT(desc->bd_iov_count < desc->bd_max_iov);
LASSERT(page);
LASSERT(pageoffset >= 0);
LASSERT(len > 0);
LASSERT(pageoffset + len <= PAGE_SIZE);
+ LASSERT(ptlrpc_is_bulk_desc_kiov(desc->bd_type));
+
+ kiov = &BD_GET_KIOV(desc, desc->bd_iov_count);
desc->bd_nob += len;
if (pin)
get_page(page);
- ptlrpc_add_bulk_page(desc, page, pageoffset, len);
+ kiov->bv_page = page;
+ kiov->bv_offset = pageoffset;
+ kiov->bv_len = len;
+
+ desc->bd_iov_count++;
}
EXPORT_SYMBOL(__ptlrpc_prep_bulk_page);
-/**
- * Uninitialize and free bulk descriptor \a desc.
- * Works on bulk descriptors both from server and client side.
- */
-void __ptlrpc_free_bulk(struct ptlrpc_bulk_desc *desc, int unpin)
+int ptlrpc_prep_bulk_frag(struct ptlrpc_bulk_desc *desc,
+ void *frag, int len)
{
- int i;
+ struct kvec *iovec;
+
+ LASSERT(desc->bd_iov_count < desc->bd_max_iov);
+ LASSERT(frag);
+ LASSERT(len > 0);
+ LASSERT(ptlrpc_is_bulk_desc_kvec(desc->bd_type));
+ iovec = &BD_GET_KVEC(desc, desc->bd_iov_count);
+
+ desc->bd_nob += len;
+
+ iovec->iov_base = frag;
+ iovec->iov_len = len;
+
+ desc->bd_iov_count++;
+
+ return desc->bd_nob;
+}
+EXPORT_SYMBOL(ptlrpc_prep_bulk_frag);
+
+void ptlrpc_free_bulk(struct ptlrpc_bulk_desc *desc)
+{
LASSERT(desc->bd_iov_count != LI_POISON); /* not freed already */
LASSERT(desc->bd_md_count == 0); /* network hands off */
LASSERT((desc->bd_export != NULL) ^ (desc->bd_import != NULL));
+ LASSERT(desc->bd_frag_ops);
- sptlrpc_enc_pool_put_pages(desc);
+ if (ptlrpc_is_bulk_desc_kiov(desc->bd_type))
+ sptlrpc_enc_pool_put_pages(desc);
if (desc->bd_export)
class_export_put(desc->bd_export);
else
class_import_put(desc->bd_import);
- if (unpin) {
- for (i = 0; i < desc->bd_iov_count; i++)
- put_page(desc->bd_iov[i].bv_page);
- }
+ if (desc->bd_frag_ops->release_frags)
+ desc->bd_frag_ops->release_frags(desc);
+
+ if (ptlrpc_is_bulk_desc_kiov(desc->bd_type))
+ kfree(GET_KIOV(desc));
+ else
+ kfree(GET_KVEC(desc));
kfree(desc);
}
-EXPORT_SYMBOL(__ptlrpc_free_bulk);
+EXPORT_SYMBOL(ptlrpc_free_bulk);
/**
* Set server timelimit for this req, i.e. how long are we willing to wait
@@ -589,6 +653,42 @@ static void __ptlrpc_free_req_to_pool(struct ptlrpc_request *request)
spin_unlock(&pool->prp_lock);
}
+void ptlrpc_add_unreplied(struct ptlrpc_request *req)
+{
+ struct obd_import *imp = req->rq_import;
+ struct list_head *tmp;
+ struct ptlrpc_request *iter;
+
+ assert_spin_locked(&imp->imp_lock);
+ LASSERT(list_empty(&req->rq_unreplied_list));
+
+ /* unreplied list is sorted by xid in ascending order */
+ list_for_each_prev(tmp, &imp->imp_unreplied_list) {
+ iter = list_entry(tmp, struct ptlrpc_request,
+ rq_unreplied_list);
+
+ LASSERT(req->rq_xid != iter->rq_xid);
+ if (req->rq_xid < iter->rq_xid)
+ continue;
+ list_add(&req->rq_unreplied_list, &iter->rq_unreplied_list);
+ return;
+ }
+ list_add(&req->rq_unreplied_list, &imp->imp_unreplied_list);
+}
+
+void ptlrpc_assign_next_xid_nolock(struct ptlrpc_request *req)
+{
+ req->rq_xid = ptlrpc_next_xid();
+ ptlrpc_add_unreplied(req);
+}
+
+static inline void ptlrpc_assign_next_xid(struct ptlrpc_request *req)
+{
+ spin_lock(&req->rq_import->imp_lock);
+ ptlrpc_assign_next_xid_nolock(req);
+ spin_unlock(&req->rq_import->imp_lock);
+}
+
int ptlrpc_request_bufs_pack(struct ptlrpc_request *request,
__u32 version, int opcode, char **bufs,
struct ptlrpc_cli_ctx *ctx)
@@ -637,8 +737,8 @@ int ptlrpc_request_bufs_pack(struct ptlrpc_request *request,
ptlrpc_at_set_req_timeout(request);
- request->rq_xid = ptlrpc_next_xid();
lustre_msg_set_opc(request->rq_reqmsg, opcode);
+ ptlrpc_assign_next_xid(request);
/* Let's setup deadline for req/reply/bulk unlink for opcode. */
if (cfs_fail_val == opcode) {
@@ -1129,7 +1229,9 @@ static int ptlrpc_check_status(struct ptlrpc_request *req)
lnet_nid_t nid = imp->imp_connection->c_peer.nid;
__u32 opc = lustre_msg_get_opc(req->rq_reqmsg);
- if (ptlrpc_console_allow(req))
+ /* -EAGAIN is normal when using POSIX flocks */
+ if (ptlrpc_console_allow(req) &&
+ !(opc == LDLM_ENQUEUE && err == -EAGAIN))
LCONSOLE_ERROR_MSG(0x011, "%s: operation %s to node %s failed: rc = %d\n",
imp->imp_obd->obd_name,
ll_opcode2str(opc),
@@ -1166,6 +1268,24 @@ static void ptlrpc_save_versions(struct ptlrpc_request *req)
versions[0], versions[1]);
}
+__u64 ptlrpc_known_replied_xid(struct obd_import *imp)
+{
+ struct ptlrpc_request *req;
+
+ assert_spin_locked(&imp->imp_lock);
+ if (list_empty(&imp->imp_unreplied_list))
+ return 0;
+
+ req = list_entry(imp->imp_unreplied_list.next, struct ptlrpc_request,
+ rq_unreplied_list);
+ LASSERTF(req->rq_xid >= 1, "XID:%llu\n", req->rq_xid);
+
+ if (imp->imp_known_replied_xid < req->rq_xid - 1)
+ imp->imp_known_replied_xid = req->rq_xid - 1;
+
+ return req->rq_xid - 1;
+}
+
/**
* Callback function called when client receives RPC reply for \a req.
* Returns 0 on success or error code.
@@ -1180,6 +1300,7 @@ static int after_reply(struct ptlrpc_request *req)
int rc;
struct timespec64 work_start;
long timediff;
+ u64 committed;
LASSERT(obd);
/* repbuf must be unlinked */
@@ -1206,6 +1327,10 @@ static int after_reply(struct ptlrpc_request *req)
return 0;
}
+ ktime_get_real_ts64(&work_start);
+ timediff = (work_start.tv_sec - req->rq_sent_tv.tv_sec) * USEC_PER_SEC +
+ (work_start.tv_nsec - req->rq_sent_tv.tv_nsec) /
+ NSEC_PER_USEC;
/*
* NB Until this point, the whole of the incoming message,
* including buflens, status etc is in the sender's byte order.
@@ -1235,13 +1360,6 @@ static int after_reply(struct ptlrpc_request *req)
spin_unlock(&req->rq_lock);
req->rq_nr_resend++;
- /* allocate new xid to avoid reply reconstruction */
- if (!req->rq_bulk) {
- /* new xid is already allocated for bulk in ptlrpc_check_set() */
- req->rq_xid = ptlrpc_next_xid();
- DEBUG_REQ(D_RPCTRACE, req, "Allocating new xid for resend on EINPROGRESS");
- }
-
/* Readjust the timeout for current conditions */
ptlrpc_at_set_req_timeout(req);
/*
@@ -1255,13 +1373,14 @@ static int after_reply(struct ptlrpc_request *req)
else
req->rq_sent = now + req->rq_nr_resend;
+ /* Resend for EINPROGRESS will use a new XID */
+ spin_lock(&imp->imp_lock);
+ list_del_init(&req->rq_unreplied_list);
+ spin_unlock(&imp->imp_lock);
+
return 0;
}
- ktime_get_real_ts64(&work_start);
- timediff = (work_start.tv_sec - req->rq_sent_tv.tv_sec) * USEC_PER_SEC +
- (work_start.tv_nsec - req->rq_sent_tv.tv_nsec) /
- NSEC_PER_USEC;
if (obd->obd_svc_stats) {
lprocfs_counter_add(obd->obd_svc_stats, PTLRPC_REQWAIT_CNTR,
timediff);
@@ -1338,10 +1457,9 @@ static int after_reply(struct ptlrpc_request *req)
}
/* Replay-enabled imports return commit-status information. */
- if (lustre_msg_get_last_committed(req->rq_repmsg)) {
- imp->imp_peer_committed_transno =
- lustre_msg_get_last_committed(req->rq_repmsg);
- }
+ committed = lustre_msg_get_last_committed(req->rq_repmsg);
+ if (likely(committed > imp->imp_peer_committed_transno))
+ imp->imp_peer_committed_transno = committed;
ptlrpc_free_committed(imp);
@@ -1373,9 +1491,17 @@ static int after_reply(struct ptlrpc_request *req)
static int ptlrpc_send_new_req(struct ptlrpc_request *req)
{
struct obd_import *imp = req->rq_import;
+ u64 min_xid = 0;
int rc;
LASSERT(req->rq_phase == RQ_PHASE_NEW);
+
+ /* do not try to go further if there is not enough memory in enc_pool */
+ if (req->rq_sent && req->rq_bulk)
+ if (req->rq_bulk->bd_iov_count > get_free_pages_in_pool() &&
+ pool_is_at_full_capacity())
+ return -ENOMEM;
+
if (req->rq_sent && (req->rq_sent > ktime_get_real_seconds()) &&
(!req->rq_generation_set ||
req->rq_import_generation == imp->imp_generation))
@@ -1385,6 +1511,9 @@ static int ptlrpc_send_new_req(struct ptlrpc_request *req)
spin_lock(&imp->imp_lock);
+ LASSERT(req->rq_xid);
+ LASSERT(!list_empty(&req->rq_unreplied_list));
+
if (!req->rq_generation_set)
req->rq_import_generation = imp->imp_generation;
@@ -1414,8 +1543,25 @@ static int ptlrpc_send_new_req(struct ptlrpc_request *req)
LASSERT(list_empty(&req->rq_list));
list_add_tail(&req->rq_list, &imp->imp_sending_list);
atomic_inc(&req->rq_import->imp_inflight);
+
+ /* find the known replied XID from the unreplied list, CONNECT
+ * and DISCONNECT requests are skipped to make the sanity check
+ * on server side happy. see process_req_last_xid().
+ *
+ * For CONNECT: Because replay requests have lower XID, it'll
+ * break the sanity check if CONNECT bump the exp_last_xid on
+ * server.
+ *
+ * For DISCONNECT: Since client will abort inflight RPC before
+ * sending DISCONNECT, DISCONNECT may carry an XID which higher
+ * than the inflight RPC.
+ */
+ if (!ptlrpc_req_is_connect(req) && !ptlrpc_req_is_disconnect(req))
+ min_xid = ptlrpc_known_replied_xid(imp);
spin_unlock(&imp->imp_lock);
+ lustre_msg_set_last_xid(req->rq_reqmsg, min_xid);
+
lustre_msg_set_status(req->rq_reqmsg, current_pid());
rc = sptlrpc_req_refresh_ctx(req, -1);
@@ -1438,6 +1584,16 @@ static int ptlrpc_send_new_req(struct ptlrpc_request *req)
lustre_msg_get_opc(req->rq_reqmsg));
rc = ptl_send_rpc(req, 0);
+ if (rc == -ENOMEM) {
+ spin_lock(&imp->imp_lock);
+ if (!list_empty(&req->rq_list)) {
+ list_del_init(&req->rq_list);
+ atomic_dec(&req->rq_import->imp_inflight);
+ }
+ spin_unlock(&imp->imp_lock);
+ ptlrpc_rqphase_move(req, RQ_PHASE_NEW);
+ return rc;
+ }
if (rc) {
DEBUG_REQ(D_HA, req, "send failed (%d); expect timeout", rc);
spin_lock(&req->rq_lock);
@@ -1688,18 +1844,9 @@ int ptlrpc_check_set(const struct lu_env *env, struct ptlrpc_request_set *set)
spin_lock(&req->rq_lock);
req->rq_resend = 1;
spin_unlock(&req->rq_lock);
- if (req->rq_bulk) {
- __u64 old_xid;
-
- if (!ptlrpc_unregister_bulk(req, 1))
- continue;
-
- /* ensure previous bulk fails */
- old_xid = req->rq_xid;
- req->rq_xid = ptlrpc_next_xid();
- CDEBUG(D_HA, "resend bulk old x%llu new x%llu\n",
- old_xid, req->rq_xid);
- }
+ if (req->rq_bulk &&
+ !ptlrpc_unregister_bulk(req, 1))
+ continue;
}
/*
* rq_wait_ctx is only touched by ptlrpcd,
@@ -1727,6 +1874,14 @@ int ptlrpc_check_set(const struct lu_env *env, struct ptlrpc_request_set *set)
}
rc = ptl_send_rpc(req, 0);
+ if (rc == -ENOMEM) {
+ spin_lock(&imp->imp_lock);
+ if (!list_empty(&req->rq_list))
+ list_del_init(&req->rq_list);
+ spin_unlock(&imp->imp_lock);
+ ptlrpc_rqphase_move(req, RQ_PHASE_NEW);
+ continue;
+ }
if (rc) {
DEBUG_REQ(D_HA, req,
"send failed: rc = %d", rc);
@@ -1850,6 +2005,7 @@ interpret:
list_del_init(&req->rq_list);
atomic_dec(&imp->imp_inflight);
}
+ list_del_init(&req->rq_unreplied_list);
spin_unlock(&imp->imp_lock);
atomic_dec(&set->set_remaining);
@@ -2247,6 +2403,7 @@ static void __ptlrpc_free_req(struct ptlrpc_request *request, int locked)
if (!locked)
spin_lock(&request->rq_import->imp_lock);
list_del_init(&request->rq_replay_list);
+ list_del_init(&request->rq_unreplied_list);
if (!locked)
spin_unlock(&request->rq_import->imp_lock);
}
@@ -2266,7 +2423,7 @@ static void __ptlrpc_free_req(struct ptlrpc_request *request, int locked)
request->rq_import = NULL;
}
if (request->rq_bulk)
- ptlrpc_free_bulk_pin(request->rq_bulk);
+ ptlrpc_free_bulk(request->rq_bulk);
if (request->rq_reqbuf || request->rq_clrbuf)
sptlrpc_cli_free_reqbuf(request);
@@ -2542,14 +2699,6 @@ void ptlrpc_resend_req(struct ptlrpc_request *req)
req->rq_resend = 1;
req->rq_net_err = 0;
req->rq_timedout = 0;
- if (req->rq_bulk) {
- __u64 old_xid = req->rq_xid;
-
- /* ensure previous bulk fails */
- req->rq_xid = ptlrpc_next_xid();
- CDEBUG(D_HA, "resend bulk old x%llu new x%llu\n",
- old_xid, req->rq_xid);
- }
ptlrpc_client_wake_req(req);
spin_unlock(&req->rq_lock);
}
@@ -2592,6 +2741,10 @@ void ptlrpc_retain_replayable_request(struct ptlrpc_request *req,
lustre_msg_add_flags(req->rq_reqmsg, MSG_REPLAY);
+ spin_lock(&req->rq_lock);
+ req->rq_resend = 0;
+ spin_unlock(&req->rq_lock);
+
LASSERT(imp->imp_replayable);
/* Balanced in ptlrpc_free_committed, usually. */
ptlrpc_request_addref(req);
@@ -2667,8 +2820,15 @@ static int ptlrpc_replay_interpret(const struct lu_env *env,
atomic_dec(&imp->imp_replay_inflight);
- if (!ptlrpc_client_replied(req)) {
- CERROR("request replay timed out, restarting recovery\n");
+ /*
+ * Note: if it is bulk replay (MDS-MDS replay), then even if
+ * server got the request, but bulk transfer timeout, let's
+ * replay the bulk req again
+ */
+ if (!ptlrpc_client_replied(req) ||
+ (req->rq_bulk &&
+ lustre_msg_get_status(req->rq_repmsg) == -ETIMEDOUT)) {
+ DEBUG_REQ(D_ERROR, req, "request replay timed out.\n");
rc = -ETIMEDOUT;
goto out;
}
@@ -2939,6 +3099,48 @@ __u64 ptlrpc_next_xid(void)
}
/**
+ * If request has a new allocated XID (new request or EINPROGRESS resend),
+ * use this XID as matchbits of bulk, otherwise allocate a new matchbits for
+ * request to ensure previous bulk fails and avoid problems with lost replies
+ * and therefore several transfers landing into the same buffer from different
+ * sending attempts.
+ */
+void ptlrpc_set_bulk_mbits(struct ptlrpc_request *req)
+{
+ struct ptlrpc_bulk_desc *bd = req->rq_bulk;
+
+ LASSERT(bd);
+
+ if (!req->rq_resend) {
+ /* this request has a new xid, just use it as bulk matchbits */
+ req->rq_mbits = req->rq_xid;
+
+ } else { /* needs to generate a new matchbits for resend */
+ u64 old_mbits = req->rq_mbits;
+
+ if ((bd->bd_import->imp_connect_data.ocd_connect_flags &
+ OBD_CONNECT_BULK_MBITS)) {
+ req->rq_mbits = ptlrpc_next_xid();
+ } else {
+ /* old version transfers rq_xid to peer as matchbits */
+ req->rq_mbits = ptlrpc_next_xid();
+ req->rq_xid = req->rq_mbits;
+ }
+
+ CDEBUG(D_HA, "resend bulk old x%llu new x%llu\n",
+ old_mbits, req->rq_mbits);
+ }
+
+ /*
+ * For multi-bulk RPCs, rq_mbits is the last mbits needed for bulks so
+ * that server can infer the number of bulks that were prepared,
+ * see LU-1431
+ */
+ req->rq_mbits += ((bd->bd_iov_count + LNET_MAX_IOV - 1) /
+ LNET_MAX_IOV) - 1;
+}
+
+/**
* Get a glimpse at what next xid value might have been.
* Returns possible next xid.
*/
diff --git a/drivers/staging/lustre/lustre/ptlrpc/connection.c b/drivers/staging/lustre/lustre/ptlrpc/connection.c
index 7b020d60c9e5..6c7c8b68a909 100644
--- a/drivers/staging/lustre/lustre/ptlrpc/connection.c
+++ b/drivers/staging/lustre/lustre/ptlrpc/connection.c
@@ -152,8 +152,8 @@ void ptlrpc_connection_fini(void)
/*
* Hash operations for net_peer<->connection
*/
-static unsigned
-conn_hashfn(struct cfs_hash *hs, const void *key, unsigned mask)
+static unsigned int
+conn_hashfn(struct cfs_hash *hs, const void *key, unsigned int mask)
{
return cfs_hash_djb2_hash(key, sizeof(lnet_process_id_t), mask);
}
diff --git a/drivers/staging/lustre/lustre/ptlrpc/events.c b/drivers/staging/lustre/lustre/ptlrpc/events.c
index 283dfb296d35..49f3e6368415 100644
--- a/drivers/staging/lustre/lustre/ptlrpc/events.c
+++ b/drivers/staging/lustre/lustre/ptlrpc/events.c
@@ -182,9 +182,9 @@ void client_bulk_callback(lnet_event_t *ev)
struct ptlrpc_bulk_desc *desc = cbid->cbid_arg;
struct ptlrpc_request *req;
- LASSERT((desc->bd_type == BULK_PUT_SINK &&
+ LASSERT((ptlrpc_is_bulk_put_sink(desc->bd_type) &&
ev->type == LNET_EVENT_PUT) ||
- (desc->bd_type == BULK_GET_SOURCE &&
+ (ptlrpc_is_bulk_get_source(desc->bd_type) &&
ev->type == LNET_EVENT_GET) ||
ev->type == LNET_EVENT_UNLINK);
LASSERT(ev->unlinked);
diff --git a/drivers/staging/lustre/lustre/ptlrpc/import.c b/drivers/staging/lustre/lustre/ptlrpc/import.c
index a23d0a05b574..e8280194001c 100644
--- a/drivers/staging/lustre/lustre/ptlrpc/import.c
+++ b/drivers/staging/lustre/lustre/ptlrpc/import.c
@@ -396,7 +396,7 @@ void ptlrpc_activate_import(struct obd_import *imp)
}
EXPORT_SYMBOL(ptlrpc_activate_import);
-static void ptlrpc_pinger_force(struct obd_import *imp)
+void ptlrpc_pinger_force(struct obd_import *imp)
{
CDEBUG(D_HA, "%s: waking up pinger s:%s\n", obd2cli_tgt(imp->imp_obd),
ptlrpc_import_state_name(imp->imp_state));
@@ -408,6 +408,7 @@ static void ptlrpc_pinger_force(struct obd_import *imp)
if (imp->imp_state != LUSTRE_IMP_CONNECTING)
ptlrpc_pinger_wake_up();
}
+EXPORT_SYMBOL(ptlrpc_pinger_force);
void ptlrpc_fail_import(struct obd_import *imp, __u32 conn_cnt)
{
@@ -621,7 +622,8 @@ int ptlrpc_connect_import(struct obd_import *imp)
spin_unlock(&imp->imp_lock);
CERROR("already connected\n");
return 0;
- } else if (imp->imp_state == LUSTRE_IMP_CONNECTING) {
+ } else if (imp->imp_state == LUSTRE_IMP_CONNECTING ||
+ imp->imp_connected) {
spin_unlock(&imp->imp_lock);
CERROR("already connecting\n");
return -EALREADY;
@@ -691,8 +693,6 @@ int ptlrpc_connect_import(struct obd_import *imp)
request->rq_timeout = INITIAL_CONNECT_TIMEOUT;
lustre_msg_set_timeout(request->rq_reqmsg, request->rq_timeout);
- lustre_msg_add_op_flags(request->rq_reqmsg, MSG_CONNECT_NEXT_VER);
-
request->rq_no_resend = 1;
request->rq_no_delay = 1;
request->rq_send_state = LUSTRE_IMP_CONNECTING;
@@ -859,6 +859,17 @@ static int ptlrpc_connect_set_flags(struct obd_import *imp,
client_adjust_max_dirty(cli);
/*
+ * Update client max modify RPCs in flight with value returned
+ * by the server
+ */
+ if (ocd->ocd_connect_flags & OBD_CONNECT_MULTIMODRPCS)
+ cli->cl_max_mod_rpcs_in_flight = min(
+ cli->cl_max_mod_rpcs_in_flight,
+ ocd->ocd_maxmodrpcs);
+ else
+ cli->cl_max_mod_rpcs_in_flight = 1;
+
+ /*
* Reset ns_connect_flags only for initial connect. It might be
* changed in while using FS and if we reset it in reconnect
* this leads to losing user settings done before such as
@@ -873,8 +884,7 @@ static int ptlrpc_connect_set_flags(struct obd_import *imp,
ocd->ocd_connect_flags;
}
- if ((ocd->ocd_connect_flags & OBD_CONNECT_AT) &&
- (imp->imp_msg_magic == LUSTRE_MSG_MAGIC_V2))
+ if (ocd->ocd_connect_flags & OBD_CONNECT_AT)
/*
* We need a per-message support flag, because
* a. we don't know if the incoming connect reply
@@ -889,16 +899,45 @@ static int ptlrpc_connect_set_flags(struct obd_import *imp,
else
imp->imp_msghdr_flags &= ~MSGHDR_AT_SUPPORT;
- if ((ocd->ocd_connect_flags & OBD_CONNECT_FULL20) &&
- (imp->imp_msg_magic == LUSTRE_MSG_MAGIC_V2))
- imp->imp_msghdr_flags |= MSGHDR_CKSUM_INCOMPAT18;
- else
- imp->imp_msghdr_flags &= ~MSGHDR_CKSUM_INCOMPAT18;
+ imp->imp_msghdr_flags |= MSGHDR_CKSUM_INCOMPAT18;
return 0;
}
/**
+ * Add all replay requests back to unreplied list before start replay,
+ * so that we can make sure the known replied XID is always increased
+ * only even if when replaying requests.
+ */
+static void ptlrpc_prepare_replay(struct obd_import *imp)
+{
+ struct ptlrpc_request *req;
+
+ if (imp->imp_state != LUSTRE_IMP_REPLAY ||
+ imp->imp_resend_replay)
+ return;
+
+ /*
+ * If the server was restart during repaly, the requests may
+ * have been added to the unreplied list in former replay.
+ */
+ spin_lock(&imp->imp_lock);
+
+ list_for_each_entry(req, &imp->imp_committed_list, rq_replay_list) {
+ if (list_empty(&req->rq_unreplied_list))
+ ptlrpc_add_unreplied(req);
+ }
+
+ list_for_each_entry(req, &imp->imp_replay_list, rq_replay_list) {
+ if (list_empty(&req->rq_unreplied_list))
+ ptlrpc_add_unreplied(req);
+ }
+
+ imp->imp_known_replied_xid = ptlrpc_known_replied_xid(imp);
+ spin_unlock(&imp->imp_lock);
+}
+
+/**
* interpret_reply callback for connect RPCs.
* Looks into returned status of connect operation and decides
* what to do with the import - i.e enter recovery, promote it to
@@ -933,6 +972,13 @@ static int ptlrpc_connect_interpret(const struct lu_env *env,
ptlrpc_maybe_ping_import_soon(imp);
goto out;
}
+
+ /*
+ * LU-7558: indicate that we are interpretting connect reply,
+ * pltrpc_connect_import() will not try to reconnect until
+ * interpret will finish.
+ */
+ imp->imp_connected = 1;
spin_unlock(&imp->imp_lock);
LASSERT(imp->imp_conn_current);
@@ -967,6 +1013,16 @@ static int ptlrpc_connect_interpret(const struct lu_env *env,
spin_unlock(&imp->imp_lock);
+ if (!exp) {
+ /* This could happen if export is cleaned during the
+ * connect attempt
+ */
+ CERROR("%s: missing export after connect\n",
+ imp->imp_obd->obd_name);
+ rc = -ENODEV;
+ goto out;
+ }
+
/* check that server granted subset of flags we asked for. */
if ((ocd->ocd_connect_flags & imp->imp_connect_flags_orig) !=
ocd->ocd_connect_flags) {
@@ -977,15 +1033,6 @@ static int ptlrpc_connect_interpret(const struct lu_env *env,
goto out;
}
- if (!exp) {
- /* This could happen if export is cleaned during the
- * connect attempt
- */
- CERROR("%s: missing export after connect\n",
- imp->imp_obd->obd_name);
- rc = -ENODEV;
- goto out;
- }
old_connect_flags = exp_connect_flags(exp);
exp->exp_connect_data = *ocd;
imp->imp_obd->obd_self_export->exp_connect_data = *ocd;
@@ -1124,6 +1171,7 @@ static int ptlrpc_connect_interpret(const struct lu_env *env,
imp->imp_remote_handle =
*lustre_msg_get_handle(request->rq_repmsg);
imp->imp_last_replay_transno = 0;
+ imp->imp_replay_cursor = &imp->imp_committed_list;
IMPORT_SET_STATE(imp, LUSTRE_IMP_REPLAY);
} else {
DEBUG_REQ(D_HA, request, "%s: evicting (reconnect/recover flags not set: %x)",
@@ -1147,18 +1195,25 @@ static int ptlrpc_connect_interpret(const struct lu_env *env,
}
finish:
+ ptlrpc_prepare_replay(imp);
rc = ptlrpc_import_recovery_state_machine(imp);
if (rc == -ENOTCONN) {
CDEBUG(D_HA, "evicted/aborted by %s@%s during recovery; invalidating and reconnecting\n",
obd2cli_tgt(imp->imp_obd),
imp->imp_connection->c_remote_uuid.uuid);
ptlrpc_connect_import(imp);
+ spin_lock(&imp->imp_lock);
+ imp->imp_connected = 0;
imp->imp_connect_tried = 1;
+ spin_unlock(&imp->imp_lock);
return 0;
}
out:
+ spin_lock(&imp->imp_lock);
+ imp->imp_connected = 0;
imp->imp_connect_tried = 1;
+ spin_unlock(&imp->imp_lock);
if (rc != 0) {
IMPORT_SET_STATE(imp, LUSTRE_IMP_DISCON);
diff --git a/drivers/staging/lustre/lustre/ptlrpc/layout.c b/drivers/staging/lustre/lustre/ptlrpc/layout.c
index 839ef3e80c1a..99d7c667df28 100644
--- a/drivers/staging/lustre/lustre/ptlrpc/layout.c
+++ b/drivers/staging/lustre/lustre/ptlrpc/layout.c
@@ -48,14 +48,14 @@
#include <linux/module.h>
-/* LUSTRE_VERSION_CODE */
-#include "../include/lustre_ver.h"
-
-#include "../include/obd_support.h"
-/* lustre_swab_mdt_body */
#include "../include/lustre/lustre_idl.h"
-/* obd2cli_tgt() (required by DEBUG_REQ()) */
+
+#include "../include/llog_swab.h"
+#include "../include/lustre_debug.h"
+#include "../include/lustre_swab.h"
+#include "../include/lustre_ver.h"
#include "../include/obd.h"
+#include "../include/obd_support.h"
/* __REQ_LAYOUT_USER__ */
#endif
@@ -121,7 +121,7 @@ static const struct req_msg_field *mdt_close_client[] = {
&RMF_CAPA1
};
-static const struct req_msg_field *mdt_release_close_client[] = {
+static const struct req_msg_field *mdt_intent_close_client[] = {
&RMF_PTLRPC_BODY,
&RMF_MDT_EPOCH,
&RMF_REC_REINT,
@@ -257,6 +257,18 @@ static const struct req_msg_field *mds_reint_rename_client[] = {
&RMF_DLM_REQ
};
+static const struct req_msg_field *mds_reint_migrate_client[] = {
+ &RMF_PTLRPC_BODY,
+ &RMF_REC_REINT,
+ &RMF_CAPA1,
+ &RMF_CAPA2,
+ &RMF_NAME,
+ &RMF_SYMTGT,
+ &RMF_DLM_REQ,
+ &RMF_MDT_EPOCH,
+ &RMF_CLOSE_DATA
+};
+
static const struct req_msg_field *mds_last_unlink_server[] = {
&RMF_PTLRPC_BODY,
&RMF_MDT_BODY,
@@ -666,10 +678,9 @@ static struct req_format *req_formats[] = {
&RQF_MDS_GETXATTR,
&RQF_MDS_SYNC,
&RQF_MDS_CLOSE,
- &RQF_MDS_RELEASE_CLOSE,
+ &RQF_MDS_INTENT_CLOSE,
&RQF_MDS_READPAGE,
&RQF_MDS_WRITEPAGE,
- &RQF_MDS_DONE_WRITING,
&RQF_MDS_REINT,
&RQF_MDS_REINT_CREATE,
&RQF_MDS_REINT_CREATE_ACL,
@@ -679,9 +690,9 @@ static struct req_format *req_formats[] = {
&RQF_MDS_REINT_UNLINK,
&RQF_MDS_REINT_LINK,
&RQF_MDS_REINT_RENAME,
+ &RQF_MDS_REINT_MIGRATE,
&RQF_MDS_REINT_SETATTR,
&RQF_MDS_REINT_SETXATTR,
- &RQF_MDS_QUOTACHECK,
&RQF_MDS_QUOTACTL,
&RQF_MDS_HSM_PROGRESS,
&RQF_MDS_HSM_CT_REGISTER,
@@ -691,10 +702,8 @@ static struct req_format *req_formats[] = {
&RQF_MDS_HSM_ACTION,
&RQF_MDS_HSM_REQUEST,
&RQF_MDS_SWAP_LAYOUTS,
- &RQF_QC_CALLBACK,
&RQF_OST_CONNECT,
&RQF_OST_DISCONNECT,
- &RQF_OST_QUOTACHECK,
&RQF_OST_QUOTACTL,
&RQF_OST_GETATTR,
&RQF_OST_SETATTR,
@@ -1180,14 +1189,6 @@ struct req_format RQF_LOG_CANCEL =
DEFINE_REQ_FMT0("OBD_LOG_CANCEL", log_cancel_client, empty);
EXPORT_SYMBOL(RQF_LOG_CANCEL);
-struct req_format RQF_MDS_QUOTACHECK =
- DEFINE_REQ_FMT0("MDS_QUOTACHECK", quotactl_only, empty);
-EXPORT_SYMBOL(RQF_MDS_QUOTACHECK);
-
-struct req_format RQF_OST_QUOTACHECK =
- DEFINE_REQ_FMT0("OST_QUOTACHECK", quotactl_only, empty);
-EXPORT_SYMBOL(RQF_OST_QUOTACHECK);
-
struct req_format RQF_MDS_QUOTACTL =
DEFINE_REQ_FMT0("MDS_QUOTACTL", quotactl_only, quotactl_only);
EXPORT_SYMBOL(RQF_MDS_QUOTACTL);
@@ -1196,10 +1197,6 @@ struct req_format RQF_OST_QUOTACTL =
DEFINE_REQ_FMT0("OST_QUOTACTL", quotactl_only, quotactl_only);
EXPORT_SYMBOL(RQF_OST_QUOTACTL);
-struct req_format RQF_QC_CALLBACK =
- DEFINE_REQ_FMT0("QC_CALLBACK", quotactl_only, empty);
-EXPORT_SYMBOL(RQF_QC_CALLBACK);
-
struct req_format RQF_MDS_GETSTATUS =
DEFINE_REQ_FMT0("MDS_GETSTATUS", mdt_body_only, mdt_body_capa);
EXPORT_SYMBOL(RQF_MDS_GETSTATUS);
@@ -1270,6 +1267,11 @@ struct req_format RQF_MDS_REINT_RENAME =
mds_last_unlink_server);
EXPORT_SYMBOL(RQF_MDS_REINT_RENAME);
+struct req_format RQF_MDS_REINT_MIGRATE =
+ DEFINE_REQ_FMT0("MDS_REINT_MIGRATE", mds_reint_migrate_client,
+ mds_last_unlink_server);
+EXPORT_SYMBOL(RQF_MDS_REINT_MIGRATE);
+
struct req_format RQF_MDS_REINT_SETATTR =
DEFINE_REQ_FMT0("MDS_REINT_SETATTR",
mds_reint_setattr_client, mds_setattr_server);
@@ -1381,15 +1383,10 @@ struct req_format RQF_MDS_CLOSE =
mdt_close_client, mds_last_unlink_server);
EXPORT_SYMBOL(RQF_MDS_CLOSE);
-struct req_format RQF_MDS_RELEASE_CLOSE =
+struct req_format RQF_MDS_INTENT_CLOSE =
DEFINE_REQ_FMT0("MDS_CLOSE",
- mdt_release_close_client, mds_last_unlink_server);
-EXPORT_SYMBOL(RQF_MDS_RELEASE_CLOSE);
-
-struct req_format RQF_MDS_DONE_WRITING =
- DEFINE_REQ_FMT0("MDS_DONE_WRITING",
- mdt_close_client, mdt_body_only);
-EXPORT_SYMBOL(RQF_MDS_DONE_WRITING);
+ mdt_intent_close_client, mds_last_unlink_server);
+EXPORT_SYMBOL(RQF_MDS_INTENT_CLOSE);
struct req_format RQF_MDS_READPAGE =
DEFINE_REQ_FMT0("MDS_READPAGE",
@@ -1874,13 +1871,14 @@ static void *__req_capsule_get(struct req_capsule *pill,
getter = (field->rmf_flags & RMF_F_STRING) ?
(typeof(getter))lustre_msg_string : lustre_msg_buf;
- if (field->rmf_flags & RMF_F_STRUCT_ARRAY) {
+ if (field->rmf_flags & (RMF_F_STRUCT_ARRAY | RMF_F_NO_SIZE_CHECK)) {
/*
* We've already asserted that field->rmf_size > 0 in
* req_layout_init().
*/
len = lustre_msg_buflen(msg, offset);
- if ((len % field->rmf_size) != 0) {
+ if (!(field->rmf_flags & RMF_F_NO_SIZE_CHECK) &&
+ (len % field->rmf_size)) {
CERROR("%s: array field size mismatch %d modulo %u != 0 (%d)\n",
field->rmf_name, len, field->rmf_size, loc);
return NULL;
diff --git a/drivers/staging/lustre/lustre/ptlrpc/llog_client.c b/drivers/staging/lustre/lustre/ptlrpc/llog_client.c
index 0f55c01feba8..110d9f505787 100644
--- a/drivers/staging/lustre/lustre/ptlrpc/llog_client.c
+++ b/drivers/staging/lustre/lustre/ptlrpc/llog_client.c
@@ -287,8 +287,13 @@ static int llog_client_read_header(const struct lu_env *env,
goto out;
}
- memcpy(handle->lgh_hdr, hdr, sizeof(*hdr));
- handle->lgh_last_idx = handle->lgh_hdr->llh_tail.lrt_index;
+ if (handle->lgh_hdr_size < hdr->llh_hdr.lrh_len) {
+ rc = -EFAULT;
+ goto out;
+ }
+
+ memcpy(handle->lgh_hdr, hdr, hdr->llh_hdr.lrh_len);
+ handle->lgh_last_idx = LLOG_HDR_TAIL(handle->lgh_hdr)->lrt_index;
/* sanity checks */
llh_hdr = &handle->lgh_hdr->llh_hdr;
@@ -296,9 +301,14 @@ static int llog_client_read_header(const struct lu_env *env,
CERROR("bad log header magic: %#x (expecting %#x)\n",
llh_hdr->lrh_type, LLOG_HDR_MAGIC);
rc = -EIO;
- } else if (llh_hdr->lrh_len != LLOG_CHUNK_SIZE) {
- CERROR("incorrectly sized log header: %#x (expecting %#x)\n",
- llh_hdr->lrh_len, LLOG_CHUNK_SIZE);
+ } else if (llh_hdr->lrh_len !=
+ LLOG_HDR_TAIL(handle->lgh_hdr)->lrt_len ||
+ (llh_hdr->lrh_len & (llh_hdr->lrh_len - 1)) ||
+ llh_hdr->lrh_len < LLOG_MIN_CHUNK_SIZE ||
+ llh_hdr->lrh_len > handle->lgh_hdr_size) {
+ CERROR("incorrectly sized log header: %#x (expecting %#x) (power of two > 8192)\n",
+ llh_hdr->lrh_len,
+ LLOG_HDR_TAIL(handle->lgh_hdr)->lrt_len);
CERROR("you may need to re-run lconf --write_conf.\n");
rc = -EIO;
}
diff --git a/drivers/staging/lustre/lustre/ptlrpc/lproc_ptlrpc.c b/drivers/staging/lustre/lustre/ptlrpc/lproc_ptlrpc.c
index 9bad57d65db4..f87478180013 100644
--- a/drivers/staging/lustre/lustre/ptlrpc/lproc_ptlrpc.c
+++ b/drivers/staging/lustre/lustre/ptlrpc/lproc_ptlrpc.c
@@ -479,8 +479,8 @@ static int ptlrpc_lprocfs_nrs_seq_show(struct seq_file *m, void *n)
struct ptlrpc_nrs_policy *policy;
struct ptlrpc_nrs_pol_info *infos;
struct ptlrpc_nrs_pol_info tmp;
- unsigned num_pols;
- unsigned pol_idx = 0;
+ unsigned int num_pols;
+ unsigned int pol_idx = 0;
bool hp = false;
int i;
int rc = 0;
diff --git a/drivers/staging/lustre/lustre/ptlrpc/niobuf.c b/drivers/staging/lustre/lustre/ptlrpc/niobuf.c
index 9c937398a085..da1209e40f03 100644
--- a/drivers/staging/lustre/lustre/ptlrpc/niobuf.c
+++ b/drivers/staging/lustre/lustre/ptlrpc/niobuf.c
@@ -114,7 +114,7 @@ static int ptlrpc_register_bulk(struct ptlrpc_request *req)
int rc2;
int posted_md;
int total_md;
- __u64 xid;
+ u64 mbits;
lnet_handle_me_t me_h;
lnet_md_t md;
@@ -127,8 +127,7 @@ static int ptlrpc_register_bulk(struct ptlrpc_request *req)
LASSERT(desc->bd_md_max_brw <= PTLRPC_BULK_OPS_COUNT);
LASSERT(desc->bd_iov_count <= PTLRPC_MAX_BRW_PAGES);
LASSERT(desc->bd_req);
- LASSERT(desc->bd_type == BULK_PUT_SINK ||
- desc->bd_type == BULK_GET_SOURCE);
+ LASSERT(ptlrpc_is_bulk_op_passive(desc->bd_type));
/* cleanup the state of the bulk for it will be reused */
if (req->rq_resend || req->rq_send_state == LUSTRE_IMP_REPLAY)
@@ -143,40 +142,37 @@ static int ptlrpc_register_bulk(struct ptlrpc_request *req)
LASSERT(desc->bd_cbid.cbid_fn == client_bulk_callback);
LASSERT(desc->bd_cbid.cbid_arg == desc);
- /* An XID is only used for a single request from the client.
- * For retried bulk transfers, a new XID will be allocated in
- * in ptlrpc_check_set() if it needs to be resent, so it is not
- * using the same RDMA match bits after an error.
- *
- * For multi-bulk RPCs, rq_xid is the last XID needed for bulks. The
- * first bulk XID is power-of-two aligned before rq_xid. LU-1431
- */
- xid = req->rq_xid & ~((__u64)desc->bd_md_max_brw - 1);
+ total_md = (desc->bd_iov_count + LNET_MAX_IOV - 1) / LNET_MAX_IOV;
+ /* rq_mbits is matchbits of the final bulk */
+ mbits = req->rq_mbits - total_md + 1;
+
+ LASSERTF(mbits == (req->rq_mbits & PTLRPC_BULK_OPS_MASK),
+ "first mbits = x%llu, last mbits = x%llu\n",
+ mbits, req->rq_mbits);
LASSERTF(!(desc->bd_registered &&
req->rq_send_state != LUSTRE_IMP_REPLAY) ||
- xid != desc->bd_last_xid,
- "registered: %d rq_xid: %llu bd_last_xid: %llu\n",
- desc->bd_registered, xid, desc->bd_last_xid);
+ mbits != desc->bd_last_mbits,
+ "registered: %d rq_mbits: %llu bd_last_mbits: %llu\n",
+ desc->bd_registered, mbits, desc->bd_last_mbits);
- total_md = (desc->bd_iov_count + LNET_MAX_IOV - 1) / LNET_MAX_IOV;
desc->bd_registered = 1;
- desc->bd_last_xid = xid;
+ desc->bd_last_mbits = mbits;
desc->bd_md_count = total_md;
md.user_ptr = &desc->bd_cbid;
md.eq_handle = ptlrpc_eq_h;
md.threshold = 1; /* PUT or GET */
- for (posted_md = 0; posted_md < total_md; posted_md++, xid++) {
+ for (posted_md = 0; posted_md < total_md; posted_md++, mbits++) {
md.options = PTLRPC_MD_OPTIONS |
- ((desc->bd_type == BULK_GET_SOURCE) ?
+ (ptlrpc_is_bulk_op_get(desc->bd_type) ?
LNET_MD_OP_GET : LNET_MD_OP_PUT);
ptlrpc_fill_bulk_md(&md, desc, posted_md);
- rc = LNetMEAttach(desc->bd_portal, peer, xid, 0,
+ rc = LNetMEAttach(desc->bd_portal, peer, mbits, 0,
LNET_UNLINK, LNET_INS_AFTER, &me_h);
if (rc != 0) {
CERROR("%s: LNetMEAttach failed x%llu/%d: rc = %d\n",
- desc->bd_import->imp_obd->obd_name, xid,
+ desc->bd_import->imp_obd->obd_name, mbits,
posted_md, rc);
break;
}
@@ -186,7 +182,7 @@ static int ptlrpc_register_bulk(struct ptlrpc_request *req)
&desc->bd_mds[posted_md]);
if (rc != 0) {
CERROR("%s: LNetMDAttach failed x%llu/%d: rc = %d\n",
- desc->bd_import->imp_obd->obd_name, xid,
+ desc->bd_import->imp_obd->obd_name, mbits,
posted_md, rc);
rc2 = LNetMEUnlink(me_h);
LASSERT(rc2 == 0);
@@ -205,27 +201,19 @@ static int ptlrpc_register_bulk(struct ptlrpc_request *req)
return -ENOMEM;
}
- /* Set rq_xid to matchbits of the final bulk so that server can
- * infer the number of bulks that were prepared
- */
- req->rq_xid = --xid;
- LASSERTF(desc->bd_last_xid == (req->rq_xid & PTLRPC_BULK_OPS_MASK),
- "bd_last_xid = x%llu, rq_xid = x%llu\n",
- desc->bd_last_xid, req->rq_xid);
-
spin_lock(&desc->bd_lock);
- /* Holler if peer manages to touch buffers before he knows the xid */
+ /* Holler if peer manages to touch buffers before he knows the mbits */
if (desc->bd_md_count != total_md)
CWARN("%s: Peer %s touched %d buffers while I registered\n",
desc->bd_import->imp_obd->obd_name, libcfs_id2str(peer),
total_md - desc->bd_md_count);
spin_unlock(&desc->bd_lock);
- CDEBUG(D_NET, "Setup %u bulk %s buffers: %u pages %u bytes, xid x%#llx-%#llx, portal %u\n",
+ CDEBUG(D_NET, "Setup %u bulk %s buffers: %u pages %u bytes, mbits x%#llx-%#llx, portal %u\n",
desc->bd_md_count,
- desc->bd_type == BULK_GET_SOURCE ? "get-source" : "put-sink",
+ ptlrpc_is_bulk_op_get(desc->bd_type) ? "get-source" : "put-sink",
desc->bd_iov_count, desc->bd_nob,
- desc->bd_last_xid, req->rq_xid, desc->bd_portal);
+ desc->bd_last_mbits, req->rq_mbits, desc->bd_portal);
return 0;
}
@@ -521,6 +509,39 @@ int ptl_send_rpc(struct ptlrpc_request *request, int noreply)
lustre_msg_set_conn_cnt(request->rq_reqmsg, imp->imp_conn_cnt);
lustre_msghdr_set_flags(request->rq_reqmsg, imp->imp_msghdr_flags);
+ /*
+ * If it's the first time to resend the request for EINPROGRESS,
+ * we need to allocate a new XID (see after_reply()), it's different
+ * from the resend for reply timeout.
+ */
+ if (request->rq_nr_resend && list_empty(&request->rq_unreplied_list)) {
+ __u64 min_xid = 0;
+ /*
+ * resend for EINPROGRESS, allocate new xid to avoid reply
+ * reconstruction
+ */
+ spin_lock(&imp->imp_lock);
+ ptlrpc_assign_next_xid_nolock(request);
+ request->rq_mbits = request->rq_xid;
+ min_xid = ptlrpc_known_replied_xid(imp);
+ spin_unlock(&imp->imp_lock);
+
+ lustre_msg_set_last_xid(request->rq_reqmsg, min_xid);
+ DEBUG_REQ(D_RPCTRACE, request, "Allocating new xid for resend on EINPROGRESS");
+ } else if (request->rq_bulk) {
+ ptlrpc_set_bulk_mbits(request);
+ lustre_msg_set_mbits(request->rq_reqmsg, request->rq_mbits);
+ }
+
+ if (list_empty(&request->rq_unreplied_list) ||
+ request->rq_xid <= imp->imp_known_replied_xid) {
+ DEBUG_REQ(D_ERROR, request,
+ "xid: %llu, replied: %llu, list_empty:%d\n",
+ request->rq_xid, imp->imp_known_replied_xid,
+ list_empty(&request->rq_unreplied_list));
+ LBUG();
+ }
+
/**
* For enabled AT all request should have AT_SUPPORT in the
* FULL import state when OBD_CONNECT_AT is set
@@ -537,8 +558,15 @@ int ptl_send_rpc(struct ptlrpc_request *request, int noreply)
mpflag = cfs_memory_pressure_get_and_set();
rc = sptlrpc_cli_wrap_request(request);
- if (rc)
+ if (rc) {
+ /*
+ * set rq_sent so that this request is treated
+ * as a delayed send in the upper layers
+ */
+ if (rc == -ENOMEM)
+ request->rq_sent = ktime_get_seconds();
goto out;
+ }
/* bulk register should be done after wrap_request() */
if (request->rq_bulk) {
diff --git a/drivers/staging/lustre/lustre/ptlrpc/nrs.c b/drivers/staging/lustre/lustre/ptlrpc/nrs.c
index d88faf61e740..7b6ffb195834 100644
--- a/drivers/staging/lustre/lustre/ptlrpc/nrs.c
+++ b/drivers/staging/lustre/lustre/ptlrpc/nrs.c
@@ -82,16 +82,9 @@ static int nrs_policy_ctl_locked(struct ptlrpc_nrs_policy *policy,
static void nrs_policy_stop0(struct ptlrpc_nrs_policy *policy)
{
- struct ptlrpc_nrs *nrs = policy->pol_nrs;
-
- if (policy->pol_desc->pd_ops->op_policy_stop) {
- spin_unlock(&nrs->nrs_lock);
-
+ if (policy->pol_desc->pd_ops->op_policy_stop)
policy->pol_desc->pd_ops->op_policy_stop(policy);
- spin_lock(&nrs->nrs_lock);
- }
-
LASSERT(list_empty(&policy->pol_list_queued));
LASSERT(policy->pol_req_queued == 0 &&
policy->pol_req_started == 0);
@@ -619,6 +612,12 @@ static int nrs_policy_ctl(struct ptlrpc_nrs *nrs, char *name,
goto out;
}
+ if (policy->pol_state != NRS_POL_STATE_STARTED &&
+ policy->pol_state != NRS_POL_STATE_STOPPED) {
+ rc = -EAGAIN;
+ goto out;
+ }
+
switch (opc) {
/**
* Unknown opcode, pass it down to the policy-specific control
diff --git a/drivers/staging/lustre/lustre/ptlrpc/pack_generic.c b/drivers/staging/lustre/lustre/ptlrpc/pack_generic.c
index 871768511e8c..13f00b7cbbe5 100644
--- a/drivers/staging/lustre/lustre/ptlrpc/pack_generic.c
+++ b/drivers/staging/lustre/lustre/ptlrpc/pack_generic.c
@@ -42,11 +42,14 @@
#include "../../include/linux/libcfs/libcfs.h"
-#include "../include/obd_support.h"
-#include "../include/obd_class.h"
+#include "../include/lustre/ll_fiemap.h"
+
+#include "../include/llog_swab.h"
#include "../include/lustre_net.h"
+#include "../include/lustre_swab.h"
#include "../include/obd_cksum.h"
-#include "../include/lustre/ll_fiemap.h"
+#include "../include/obd_support.h"
+#include "../include/obd_class.h"
#include "ptlrpc_internal.h"
@@ -942,6 +945,25 @@ __u32 lustre_msg_get_opc(struct lustre_msg *msg)
}
EXPORT_SYMBOL(lustre_msg_get_opc);
+__u16 lustre_msg_get_tag(struct lustre_msg *msg)
+{
+ switch (msg->lm_magic) {
+ case LUSTRE_MSG_MAGIC_V2: {
+ struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg);
+
+ if (!pb) {
+ CERROR("invalid msg %p: no ptlrpc body!\n", msg);
+ return 0;
+ }
+ return pb->pb_tag;
+ }
+ default:
+ CERROR("incorrect message magic: %08x\n", msg->lm_magic);
+ return 0;
+ }
+}
+EXPORT_SYMBOL(lustre_msg_get_tag);
+
__u64 lustre_msg_get_last_committed(struct lustre_msg *msg)
{
switch (msg->lm_magic) {
@@ -1236,6 +1258,37 @@ void lustre_msg_set_opc(struct lustre_msg *msg, __u32 opc)
}
}
+void lustre_msg_set_last_xid(struct lustre_msg *msg, u64 last_xid)
+{
+ switch (msg->lm_magic) {
+ case LUSTRE_MSG_MAGIC_V2: {
+ struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg);
+
+ LASSERTF(pb, "invalid msg %p: no ptlrpc body!\n", msg);
+ pb->pb_last_xid = last_xid;
+ return;
+ }
+ default:
+ LASSERTF(0, "incorrect message magic: %08x\n", msg->lm_magic);
+ }
+}
+
+void lustre_msg_set_tag(struct lustre_msg *msg, __u16 tag)
+{
+ switch (msg->lm_magic) {
+ case LUSTRE_MSG_MAGIC_V2: {
+ struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg);
+
+ LASSERTF(pb, "invalid msg %p: no ptlrpc body!\n", msg);
+ pb->pb_tag = tag;
+ return;
+ }
+ default:
+ LASSERTF(0, "incorrect message magic: %08x\n", msg->lm_magic);
+ }
+}
+EXPORT_SYMBOL(lustre_msg_set_tag);
+
void lustre_msg_set_versions(struct lustre_msg *msg, __u64 *versions)
{
switch (msg->lm_magic) {
@@ -1373,6 +1426,21 @@ void lustre_msg_set_cksum(struct lustre_msg *msg, __u32 cksum)
}
}
+void lustre_msg_set_mbits(struct lustre_msg *msg, __u64 mbits)
+{
+ switch (msg->lm_magic) {
+ case LUSTRE_MSG_MAGIC_V2: {
+ struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg);
+
+ LASSERTF(pb, "invalid msg %p: no ptlrpc body!\n", msg);
+ pb->pb_mbits = mbits;
+ return;
+ }
+ default:
+ LASSERTF(0, "incorrect message magic: %08x\n", msg->lm_magic);
+ }
+}
+
void ptlrpc_request_set_replen(struct ptlrpc_request *req)
{
int count = req_capsule_filled_sizes(&req->rq_pill, RCL_SERVER);
@@ -1442,7 +1510,7 @@ void lustre_swab_ptlrpc_body(struct ptlrpc_body *b)
__swab32s(&b->pb_opc);
__swab32s(&b->pb_status);
__swab64s(&b->pb_last_xid);
- __swab64s(&b->pb_last_seen);
+ __swab16s(&b->pb_tag);
__swab64s(&b->pb_last_committed);
__swab64s(&b->pb_transno);
__swab32s(&b->pb_flags);
@@ -1456,7 +1524,12 @@ void lustre_swab_ptlrpc_body(struct ptlrpc_body *b)
__swab64s(&b->pb_pre_versions[1]);
__swab64s(&b->pb_pre_versions[2]);
__swab64s(&b->pb_pre_versions[3]);
- CLASSERT(offsetof(typeof(*b), pb_padding) != 0);
+ __swab64s(&b->pb_mbits);
+ CLASSERT(offsetof(typeof(*b), pb_padding0) != 0);
+ CLASSERT(offsetof(typeof(*b), pb_padding1) != 0);
+ CLASSERT(offsetof(typeof(*b), pb_padding64_0) != 0);
+ CLASSERT(offsetof(typeof(*b), pb_padding64_1) != 0);
+ CLASSERT(offsetof(typeof(*b), pb_padding64_2) != 0);
/* While we need to maintain compatibility between
* clients and servers without ptlrpc_body_v2 (< 2.3)
* do not swab any fields beyond pb_jobid, as we are
@@ -1492,8 +1565,12 @@ void lustre_swab_connect(struct obd_connect_data *ocd)
__swab32s(&ocd->ocd_max_easize);
if (ocd->ocd_connect_flags & OBD_CONNECT_MAXBYTES)
__swab64s(&ocd->ocd_maxbytes);
+ if (ocd->ocd_connect_flags & OBD_CONNECT_MULTIMODRPCS)
+ __swab16s(&ocd->ocd_maxmodrpcs);
+ CLASSERT(offsetof(typeof(*ocd), padding0));
CLASSERT(offsetof(typeof(*ocd), padding1) != 0);
- CLASSERT(offsetof(typeof(*ocd), padding2) != 0);
+ if (ocd->ocd_connect_flags & OBD_CONNECT_FLAGS2)
+ __swab64s(&ocd->ocd_connect_flags2);
CLASSERT(offsetof(typeof(*ocd), padding3) != 0);
CLASSERT(offsetof(typeof(*ocd), padding4) != 0);
CLASSERT(offsetof(typeof(*ocd), padding5) != 0);
@@ -1666,7 +1743,7 @@ void lustre_swab_mdt_body(struct mdt_body *b)
__swab32s(&b->mbo_eadatasize);
__swab32s(&b->mbo_aclsize);
__swab32s(&b->mbo_max_mdsize);
- __swab32s(&b->mbo_max_cookiesize);
+ CLASSERT(offsetof(typeof(*b), mbo_unused3));
__swab32s(&b->mbo_uid_h);
__swab32s(&b->mbo_gid_h);
CLASSERT(offsetof(typeof(*b), mbo_padding_5) != 0);
@@ -1675,9 +1752,10 @@ void lustre_swab_mdt_body(struct mdt_body *b)
void lustre_swab_mdt_ioepoch(struct mdt_ioepoch *b)
{
/* handle is opaque */
- __swab64s(&b->ioepoch);
- __swab32s(&b->flags);
- CLASSERT(offsetof(typeof(*b), padding) != 0);
+ /* mio_handle is opaque */
+ CLASSERT(offsetof(typeof(*b), mio_unused1));
+ CLASSERT(offsetof(typeof(*b), mio_unused2));
+ CLASSERT(offsetof(typeof(*b), mio_padding));
}
void lustre_swab_mgs_target_info(struct mgs_target_info *mti)
@@ -1772,7 +1850,7 @@ void lustre_swab_fid2path(struct getinfo_fid2path *gf)
}
EXPORT_SYMBOL(lustre_swab_fid2path);
-static void lustre_swab_fiemap_extent(struct ll_fiemap_extent *fm_extent)
+static void lustre_swab_fiemap_extent(struct fiemap_extent *fm_extent)
{
__swab64s(&fm_extent->fe_logical);
__swab64s(&fm_extent->fe_physical);
@@ -1781,7 +1859,7 @@ static void lustre_swab_fiemap_extent(struct ll_fiemap_extent *fm_extent)
__swab32s(&fm_extent->fe_device);
}
-void lustre_swab_fiemap(struct ll_user_fiemap *fiemap)
+void lustre_swab_fiemap(struct fiemap *fiemap)
{
__u32 i;
@@ -1938,7 +2016,7 @@ static void lustre_swab_ldlm_res_id(struct ldlm_res_id *id)
__swab64s(&id->name[i]);
}
-static void lustre_swab_ldlm_policy_data(ldlm_wire_policy_data_t *d)
+static void lustre_swab_ldlm_policy_data(union ldlm_wire_policy_data *d)
{
/* the lock data is a union and the first two fields are always an
* extent so it's ok to process an LDLM_EXTENT and LDLM_FLOCK lock
@@ -2062,8 +2140,6 @@ static void dump_obdo(struct obdo *oa)
if (valid & OBD_MD_FLHANDLE)
CDEBUG(D_RPCTRACE, "obdo: o_handle = %lld\n",
oa->o_handle.cookie);
- if (valid & OBD_MD_FLCOOKIE)
- CDEBUG(D_RPCTRACE, "obdo: o_lcookie = (llog_cookie dumping not yet implemented)\n");
}
void dump_ost_body(struct ost_body *ob)
diff --git a/drivers/staging/lustre/lustre/ptlrpc/pers.c b/drivers/staging/lustre/lustre/ptlrpc/pers.c
index 5b9fb11c0b6b..94e9fa85d774 100644
--- a/drivers/staging/lustre/lustre/ptlrpc/pers.c
+++ b/drivers/staging/lustre/lustre/ptlrpc/pers.c
@@ -43,6 +43,8 @@
void ptlrpc_fill_bulk_md(lnet_md_t *md, struct ptlrpc_bulk_desc *desc,
int mdidx)
{
+ int offset = mdidx * LNET_MAX_IOV;
+
CLASSERT(PTLRPC_MAX_BRW_PAGES < LI_POISON);
LASSERT(mdidx < desc->bd_md_max_brw);
@@ -50,23 +52,20 @@ void ptlrpc_fill_bulk_md(lnet_md_t *md, struct ptlrpc_bulk_desc *desc,
LASSERT(!(md->options & (LNET_MD_IOVEC | LNET_MD_KIOV |
LNET_MD_PHYS)));
- md->options |= LNET_MD_KIOV;
md->length = max(0, desc->bd_iov_count - mdidx * LNET_MAX_IOV);
md->length = min_t(unsigned int, LNET_MAX_IOV, md->length);
- if (desc->bd_enc_iov)
- md->start = &desc->bd_enc_iov[mdidx * LNET_MAX_IOV];
- else
- md->start = &desc->bd_iov[mdidx * LNET_MAX_IOV];
-}
-
-void ptlrpc_add_bulk_page(struct ptlrpc_bulk_desc *desc, struct page *page,
- int pageoffset, int len)
-{
- lnet_kiov_t *kiov = &desc->bd_iov[desc->bd_iov_count];
-
- kiov->bv_page = page;
- kiov->bv_offset = pageoffset;
- kiov->bv_len = len;
- desc->bd_iov_count++;
+ if (ptlrpc_is_bulk_desc_kiov(desc->bd_type)) {
+ md->options |= LNET_MD_KIOV;
+ if (GET_ENC_KIOV(desc))
+ md->start = &BD_GET_ENC_KIOV(desc, offset);
+ else
+ md->start = &BD_GET_KIOV(desc, offset);
+ } else {
+ md->options |= LNET_MD_IOVEC;
+ if (GET_ENC_KVEC(desc))
+ md->start = &BD_GET_ENC_KVEC(desc, offset);
+ else
+ md->start = &BD_GET_KVEC(desc, offset);
+ }
}
diff --git a/drivers/staging/lustre/lustre/ptlrpc/ptlrpc_internal.h b/drivers/staging/lustre/lustre/ptlrpc/ptlrpc_internal.h
index f14d193287da..e0f859ca6223 100644
--- a/drivers/staging/lustre/lustre/ptlrpc/ptlrpc_internal.h
+++ b/drivers/staging/lustre/lustre/ptlrpc/ptlrpc_internal.h
@@ -55,8 +55,11 @@ int ptlrpcd_start(struct ptlrpcd_ctl *pc);
/* client.c */
void ptlrpc_at_adj_net_latency(struct ptlrpc_request *req,
unsigned int service_time);
-struct ptlrpc_bulk_desc *ptlrpc_new_bulk(unsigned npages, unsigned max_brw,
- unsigned type, unsigned portal);
+struct ptlrpc_bulk_desc *ptlrpc_new_bulk(unsigned int nfrags,
+ unsigned int max_brw,
+ enum ptlrpc_bulk_op_type type,
+ unsigned int portal,
+ const struct ptlrpc_bulk_frag_ops *ops);
int ptlrpc_request_cache_init(void);
void ptlrpc_request_cache_fini(void);
struct ptlrpc_request *ptlrpc_request_cache_alloc(gfp_t flags);
@@ -67,6 +70,10 @@ void ptlrpc_set_add_new_req(struct ptlrpcd_ctl *pc,
int ptlrpc_expired_set(void *data);
int ptlrpc_set_next_timeout(struct ptlrpc_request_set *);
void ptlrpc_resend_req(struct ptlrpc_request *request);
+void ptlrpc_set_bulk_mbits(struct ptlrpc_request *req);
+void ptlrpc_assign_next_xid_nolock(struct ptlrpc_request *req);
+__u64 ptlrpc_known_replied_xid(struct obd_import *imp);
+void ptlrpc_add_unreplied(struct ptlrpc_request *req);
/* events.c */
int ptlrpc_init_portals(void);
@@ -226,8 +233,6 @@ int ptlrpc_expire_one_request(struct ptlrpc_request *req, int async_unlink);
/* pers.c */
void ptlrpc_fill_bulk_md(lnet_md_t *md, struct ptlrpc_bulk_desc *desc,
int mdcnt);
-void ptlrpc_add_bulk_page(struct ptlrpc_bulk_desc *desc, struct page *page,
- int pageoffset, int len);
/* pack_generic.c */
struct ptlrpc_reply_state *
@@ -322,6 +327,7 @@ static inline void ptlrpc_cli_req_init(struct ptlrpc_request *req)
INIT_LIST_HEAD(&cr->cr_set_chain);
INIT_LIST_HEAD(&cr->cr_ctx_chain);
+ INIT_LIST_HEAD(&cr->cr_unreplied_list);
init_waitqueue_head(&cr->cr_reply_waitq);
init_waitqueue_head(&cr->cr_set_waitq);
}
@@ -338,4 +344,24 @@ static inline void ptlrpc_srv_req_init(struct ptlrpc_request *req)
INIT_LIST_HEAD(&sr->sr_hist_list);
}
+static inline bool ptlrpc_req_is_connect(struct ptlrpc_request *req)
+{
+ if (lustre_msg_get_opc(req->rq_reqmsg) == MDS_CONNECT ||
+ lustre_msg_get_opc(req->rq_reqmsg) == OST_CONNECT ||
+ lustre_msg_get_opc(req->rq_reqmsg) == MGS_CONNECT)
+ return true;
+ else
+ return false;
+}
+
+static inline bool ptlrpc_req_is_disconnect(struct ptlrpc_request *req)
+{
+ if (lustre_msg_get_opc(req->rq_reqmsg) == MDS_DISCONNECT ||
+ lustre_msg_get_opc(req->rq_reqmsg) == OST_DISCONNECT ||
+ lustre_msg_get_opc(req->rq_reqmsg) == MGS_DISCONNECT)
+ return true;
+ else
+ return false;
+}
+
#endif /* PTLRPC_INTERNAL_H */
diff --git a/drivers/staging/lustre/lustre/ptlrpc/recover.c b/drivers/staging/lustre/lustre/ptlrpc/recover.c
index 405faf0dc9fc..c00449036884 100644
--- a/drivers/staging/lustre/lustre/ptlrpc/recover.c
+++ b/drivers/staging/lustre/lustre/ptlrpc/recover.c
@@ -111,7 +111,9 @@ int ptlrpc_replay_next(struct obd_import *imp, int *inflight)
* all of it's requests being replayed, it's safe to
* use a cursor to accelerate the search
*/
- imp->imp_replay_cursor = imp->imp_replay_cursor->next;
+ if (!imp->imp_resend_replay ||
+ imp->imp_replay_cursor == &imp->imp_committed_list)
+ imp->imp_replay_cursor = imp->imp_replay_cursor->next;
while (imp->imp_replay_cursor !=
&imp->imp_committed_list) {
@@ -155,10 +157,24 @@ int ptlrpc_replay_next(struct obd_import *imp, int *inflight)
lustre_msg_add_flags(req->rq_reqmsg, MSG_RESENT);
spin_lock(&imp->imp_lock);
+ /* The resend replay request may have been removed from the
+ * unreplied list.
+ */
+ if (req && imp->imp_resend_replay &&
+ list_empty(&req->rq_unreplied_list)) {
+ ptlrpc_add_unreplied(req);
+ imp->imp_known_replied_xid = ptlrpc_known_replied_xid(imp);
+ }
+
imp->imp_resend_replay = 0;
spin_unlock(&imp->imp_lock);
if (req) {
+ /* The request should have been added back in unreplied list
+ * by ptlrpc_prepare_replay().
+ */
+ LASSERT(!list_empty(&req->rq_unreplied_list));
+
rc = ptlrpc_replay_req(req);
if (rc) {
CERROR("recovery replay error %d for req %llu\n",
@@ -194,7 +210,13 @@ int ptlrpc_resend(struct obd_import *imp)
LASSERTF((long)req > PAGE_SIZE && req != LP_POISON,
"req %p bad\n", req);
LASSERTF(req->rq_type != LI_POISON, "req %p freed\n", req);
- if (!ptlrpc_no_resend(req))
+
+ /*
+ * If the request is allowed to be sent during replay and it
+ * is not timeout yet, then it does not need to be resent.
+ */
+ if (!ptlrpc_no_resend(req) &&
+ (req->rq_timedout || !req->rq_allow_replay))
ptlrpc_resend_req(req);
}
spin_unlock(&imp->imp_lock);
diff --git a/drivers/staging/lustre/lustre/ptlrpc/sec.c b/drivers/staging/lustre/lustre/ptlrpc/sec.c
index a7416cd9ac71..e860df7c45a2 100644
--- a/drivers/staging/lustre/lustre/ptlrpc/sec.c
+++ b/drivers/staging/lustre/lustre/ptlrpc/sec.c
@@ -379,7 +379,7 @@ int sptlrpc_req_get_ctx(struct ptlrpc_request *req)
if (!req->rq_cli_ctx) {
CERROR("req %p: fail to get context\n", req);
- return -ENOMEM;
+ return -ECONNREFUSED;
}
return 0;
@@ -515,6 +515,13 @@ static int sptlrpc_req_replace_dead_ctx(struct ptlrpc_request *req)
set_current_state(TASK_INTERRUPTIBLE);
schedule_timeout(msecs_to_jiffies(MSEC_PER_SEC));
+ } else if (unlikely(!test_bit(PTLRPC_CTX_UPTODATE_BIT, &newctx->cc_flags))) {
+ /*
+ * new ctx not up to date yet
+ */
+ CDEBUG(D_SEC,
+ "ctx (%p, fl %lx) doesn't switch, not up to date yet\n",
+ newctx, newctx->cc_flags);
} else {
/*
* it's possible newctx == oldctx if we're switching
diff --git a/drivers/staging/lustre/lustre/ptlrpc/sec_bulk.c b/drivers/staging/lustre/lustre/ptlrpc/sec_bulk.c
index b2cc5ea6cb93..2fe9085e2034 100644
--- a/drivers/staging/lustre/lustre/ptlrpc/sec_bulk.c
+++ b/drivers/staging/lustre/lustre/ptlrpc/sec_bulk.c
@@ -108,6 +108,7 @@ static struct ptlrpc_enc_page_pool {
unsigned long epp_st_lowfree; /* lowest free pages reached */
unsigned int epp_st_max_wqlen; /* highest waitqueue length */
unsigned long epp_st_max_wait; /* in jiffies */
+ unsigned long epp_st_outofmem; /* # of out of mem requests */
/*
* pointers to pools
*/
@@ -139,7 +140,8 @@ int sptlrpc_proc_enc_pool_seq_show(struct seq_file *m, void *v)
"cache missing: %lu\n"
"low free mark: %lu\n"
"max waitqueue depth: %u\n"
- "max wait time: %ld/%lu\n",
+ "max wait time: %ld/%lu\n"
+ "out of mem: %lu\n",
totalram_pages,
PAGES_PER_POOL,
page_pools.epp_max_pages,
@@ -158,7 +160,8 @@ int sptlrpc_proc_enc_pool_seq_show(struct seq_file *m, void *v)
page_pools.epp_st_lowfree,
page_pools.epp_st_max_wqlen,
page_pools.epp_st_max_wait,
- msecs_to_jiffies(MSEC_PER_SEC));
+ msecs_to_jiffies(MSEC_PER_SEC),
+ page_pools.epp_st_outofmem);
spin_unlock(&page_pools.epp_lock);
@@ -306,12 +309,30 @@ static inline void enc_pools_wakeup(void)
}
}
+/*
+ * Export the number of free pages in the pool
+ */
+int get_free_pages_in_pool(void)
+{
+ return page_pools.epp_free_pages;
+}
+
+/*
+ * Let outside world know if enc_pool full capacity is reached
+ */
+int pool_is_at_full_capacity(void)
+{
+ return (page_pools.epp_total_pages == page_pools.epp_max_pages);
+}
+
void sptlrpc_enc_pool_put_pages(struct ptlrpc_bulk_desc *desc)
{
int p_idx, g_idx;
int i;
- if (!desc->bd_enc_iov)
+ LASSERT(ptlrpc_is_bulk_desc_kiov(desc->bd_type));
+
+ if (!GET_ENC_KIOV(desc))
return;
LASSERT(desc->bd_iov_count > 0);
@@ -326,12 +347,12 @@ void sptlrpc_enc_pool_put_pages(struct ptlrpc_bulk_desc *desc)
LASSERT(page_pools.epp_pools[p_idx]);
for (i = 0; i < desc->bd_iov_count; i++) {
- LASSERT(desc->bd_enc_iov[i].bv_page);
+ LASSERT(BD_GET_ENC_KIOV(desc, i).bv_page);
LASSERT(g_idx != 0 || page_pools.epp_pools[p_idx]);
LASSERT(!page_pools.epp_pools[p_idx][g_idx]);
page_pools.epp_pools[p_idx][g_idx] =
- desc->bd_enc_iov[i].bv_page;
+ BD_GET_ENC_KIOV(desc, i).bv_page;
if (++g_idx == PAGES_PER_POOL) {
p_idx++;
@@ -345,8 +366,8 @@ void sptlrpc_enc_pool_put_pages(struct ptlrpc_bulk_desc *desc)
spin_unlock(&page_pools.epp_lock);
- kfree(desc->bd_enc_iov);
- desc->bd_enc_iov = NULL;
+ kfree(GET_ENC_KIOV(desc));
+ GET_ENC_KIOV(desc) = NULL;
}
static inline void enc_pools_alloc(void)
@@ -404,6 +425,7 @@ int sptlrpc_enc_pool_init(void)
page_pools.epp_st_lowfree = 0;
page_pools.epp_st_max_wqlen = 0;
page_pools.epp_st_max_wait = 0;
+ page_pools.epp_st_outofmem = 0;
enc_pools_alloc();
if (!page_pools.epp_pools)
@@ -431,13 +453,14 @@ void sptlrpc_enc_pool_fini(void)
if (page_pools.epp_st_access > 0) {
CDEBUG(D_SEC,
- "max pages %lu, grows %u, grow fails %u, shrinks %u, access %lu, missing %lu, max qlen %u, max wait %ld/%ld\n",
+ "max pages %lu, grows %u, grow fails %u, shrinks %u, access %lu, missing %lu, max qlen %u, max wait %ld/%ld, out of mem %lu\n",
page_pools.epp_st_max_pages, page_pools.epp_st_grows,
page_pools.epp_st_grow_fails,
page_pools.epp_st_shrinks, page_pools.epp_st_access,
page_pools.epp_st_missings, page_pools.epp_st_max_wqlen,
page_pools.epp_st_max_wait,
- msecs_to_jiffies(MSEC_PER_SEC));
+ msecs_to_jiffies(MSEC_PER_SEC),
+ page_pools.epp_st_outofmem);
}
}
@@ -520,10 +543,11 @@ int sptlrpc_get_bulk_checksum(struct ptlrpc_bulk_desc *desc, __u8 alg,
hashsize = cfs_crypto_hash_digestsize(cfs_hash_alg_id[alg]);
for (i = 0; i < desc->bd_iov_count; i++) {
- cfs_crypto_hash_update_page(hdesc, desc->bd_iov[i].bv_page,
- desc->bd_iov[i].bv_offset &
+ cfs_crypto_hash_update_page(hdesc,
+ BD_GET_KIOV(desc, i).bv_page,
+ BD_GET_KIOV(desc, i).bv_offset &
~PAGE_MASK,
- desc->bd_iov[i].bv_len);
+ BD_GET_KIOV(desc, i).bv_len);
}
if (hashsize > buflen) {
diff --git a/drivers/staging/lustre/lustre/ptlrpc/sec_plain.c b/drivers/staging/lustre/lustre/ptlrpc/sec_plain.c
index cd305bcb334a..c5e7a2309fce 100644
--- a/drivers/staging/lustre/lustre/ptlrpc/sec_plain.c
+++ b/drivers/staging/lustre/lustre/ptlrpc/sec_plain.c
@@ -153,14 +153,16 @@ static void corrupt_bulk_data(struct ptlrpc_bulk_desc *desc)
char *ptr;
unsigned int off, i;
+ LASSERT(ptlrpc_is_bulk_desc_kiov(desc->bd_type));
+
for (i = 0; i < desc->bd_iov_count; i++) {
- if (desc->bd_iov[i].bv_len == 0)
+ if (!BD_GET_KIOV(desc, i).bv_len)
continue;
- ptr = kmap(desc->bd_iov[i].bv_page);
- off = desc->bd_iov[i].bv_offset & ~PAGE_MASK;
+ ptr = kmap(BD_GET_KIOV(desc, i).bv_page);
+ off = BD_GET_KIOV(desc, i).bv_offset & ~PAGE_MASK;
ptr[off] ^= 0x1;
- kunmap(desc->bd_iov[i].bv_page);
+ kunmap(BD_GET_KIOV(desc, i).bv_page);
return;
}
}
@@ -352,11 +354,11 @@ int plain_cli_unwrap_bulk(struct ptlrpc_cli_ctx *ctx,
/* fix the actual data size */
for (i = 0, nob = 0; i < desc->bd_iov_count; i++) {
- if (desc->bd_iov[i].bv_len + nob > desc->bd_nob_transferred) {
- desc->bd_iov[i].bv_len =
- desc->bd_nob_transferred - nob;
- }
- nob += desc->bd_iov[i].bv_len;
+ struct bio_vec bv_desc = BD_GET_KIOV(desc, i);
+
+ if (bv_desc.bv_len + nob > desc->bd_nob_transferred)
+ bv_desc.bv_len = desc->bd_nob_transferred - nob;
+ nob += bv_desc.bv_len;
}
rc = plain_verify_bulk_csum(desc, req->rq_flvr.u_bulk.hash.hash_alg,
diff --git a/drivers/staging/lustre/lustre/ptlrpc/service.c b/drivers/staging/lustre/lustre/ptlrpc/service.c
index 72f39308eebb..70c70558e177 100644
--- a/drivers/staging/lustre/lustre/ptlrpc/service.c
+++ b/drivers/staging/lustre/lustre/ptlrpc/service.c
@@ -343,9 +343,9 @@ ptlrpc_server_nthreads_check(struct ptlrpc_service *svc,
struct ptlrpc_service_conf *conf)
{
struct ptlrpc_service_thr_conf *tc = &conf->psc_thr;
- unsigned init;
- unsigned total;
- unsigned nthrs;
+ unsigned int init;
+ unsigned int total;
+ unsigned int nthrs;
int weight;
/*
@@ -2541,8 +2541,9 @@ int ptlrpc_hr_init(void)
hrp->hrp_nthrs = cfs_cpt_weight(ptlrpc_hr.hr_cpt_table, i);
hrp->hrp_nthrs /= weight;
+ if (hrp->hrp_nthrs == 0)
+ hrp->hrp_nthrs = 1;
- LASSERT(hrp->hrp_nthrs > 0);
hrp->hrp_thrs =
kzalloc_node(hrp->hrp_nthrs * sizeof(*hrt), GFP_NOFS,
cfs_cpt_spread_node(ptlrpc_hr.hr_cpt_table,
diff --git a/drivers/staging/lustre/lustre/ptlrpc/wiretest.c b/drivers/staging/lustre/lustre/ptlrpc/wiretest.c
index b05b1f935e4c..a04e36cf6dd4 100644
--- a/drivers/staging/lustre/lustre/ptlrpc/wiretest.c
+++ b/drivers/staging/lustre/lustre/ptlrpc/wiretest.c
@@ -195,49 +195,29 @@ void lustre_assert_wire_constants(void)
LASSERTF(REINT_MAX == 10, "found %lld\n",
(long long)REINT_MAX);
LASSERTF(DISP_IT_EXECD == 0x00000001UL, "found 0x%.8xUL\n",
- (unsigned)DISP_IT_EXECD);
+ (unsigned int)DISP_IT_EXECD);
LASSERTF(DISP_LOOKUP_EXECD == 0x00000002UL, "found 0x%.8xUL\n",
- (unsigned)DISP_LOOKUP_EXECD);
+ (unsigned int)DISP_LOOKUP_EXECD);
LASSERTF(DISP_LOOKUP_NEG == 0x00000004UL, "found 0x%.8xUL\n",
- (unsigned)DISP_LOOKUP_NEG);
+ (unsigned int)DISP_LOOKUP_NEG);
LASSERTF(DISP_LOOKUP_POS == 0x00000008UL, "found 0x%.8xUL\n",
- (unsigned)DISP_LOOKUP_POS);
+ (unsigned int)DISP_LOOKUP_POS);
LASSERTF(DISP_OPEN_CREATE == 0x00000010UL, "found 0x%.8xUL\n",
- (unsigned)DISP_OPEN_CREATE);
+ (unsigned int)DISP_OPEN_CREATE);
LASSERTF(DISP_OPEN_OPEN == 0x00000020UL, "found 0x%.8xUL\n",
- (unsigned)DISP_OPEN_OPEN);
+ (unsigned int)DISP_OPEN_OPEN);
LASSERTF(DISP_ENQ_COMPLETE == 0x00400000UL, "found 0x%.8xUL\n",
- (unsigned)DISP_ENQ_COMPLETE);
+ (unsigned int)DISP_ENQ_COMPLETE);
LASSERTF(DISP_ENQ_OPEN_REF == 0x00800000UL, "found 0x%.8xUL\n",
- (unsigned)DISP_ENQ_OPEN_REF);
+ (unsigned int)DISP_ENQ_OPEN_REF);
LASSERTF(DISP_ENQ_CREATE_REF == 0x01000000UL, "found 0x%.8xUL\n",
- (unsigned)DISP_ENQ_CREATE_REF);
+ (unsigned int)DISP_ENQ_CREATE_REF);
LASSERTF(DISP_OPEN_LOCK == 0x02000000UL, "found 0x%.8xUL\n",
- (unsigned)DISP_OPEN_LOCK);
+ (unsigned int)DISP_OPEN_LOCK);
LASSERTF(MDS_STATUS_CONN == 1, "found %lld\n",
(long long)MDS_STATUS_CONN);
LASSERTF(MDS_STATUS_LOV == 2, "found %lld\n",
(long long)MDS_STATUS_LOV);
- LASSERTF(LUSTRE_BFLAG_UNCOMMITTED_WRITES == 1, "found %lld\n",
- (long long)LUSTRE_BFLAG_UNCOMMITTED_WRITES);
- LASSERTF(MF_SOM_CHANGE == 0x00000001UL, "found 0x%.8xUL\n",
- (unsigned)MF_SOM_CHANGE);
- LASSERTF(MF_EPOCH_OPEN == 0x00000002UL, "found 0x%.8xUL\n",
- (unsigned)MF_EPOCH_OPEN);
- LASSERTF(MF_EPOCH_CLOSE == 0x00000004UL, "found 0x%.8xUL\n",
- (unsigned)MF_EPOCH_CLOSE);
- LASSERTF(MF_MDC_CANCEL_FID1 == 0x00000008UL, "found 0x%.8xUL\n",
- (unsigned)MF_MDC_CANCEL_FID1);
- LASSERTF(MF_MDC_CANCEL_FID2 == 0x00000010UL, "found 0x%.8xUL\n",
- (unsigned)MF_MDC_CANCEL_FID2);
- LASSERTF(MF_MDC_CANCEL_FID3 == 0x00000020UL, "found 0x%.8xUL\n",
- (unsigned)MF_MDC_CANCEL_FID3);
- LASSERTF(MF_MDC_CANCEL_FID4 == 0x00000040UL, "found 0x%.8xUL\n",
- (unsigned)MF_MDC_CANCEL_FID4);
- LASSERTF(MF_SOM_AU == 0x00000080UL, "found 0x%.8xUL\n",
- (unsigned)MF_SOM_AU);
- LASSERTF(MF_GETATTR_LOCK == 0x00000100UL, "found 0x%.8xUL\n",
- (unsigned)MF_GETATTR_LOCK);
LASSERTF(MDS_ATTR_MODE == 0x0000000000000001ULL, "found 0x%.16llxULL\n",
(long long)MDS_ATTR_MODE);
LASSERTF(MDS_ATTR_UID == 0x0000000000000002ULL, "found 0x%.16llxULL\n",
@@ -420,15 +400,13 @@ void lustre_assert_wire_constants(void)
LASSERTF((int)sizeof(((struct lustre_mdt_attrs *)0)->lma_self_fid) == 16, "found %lld\n",
(long long)(int)sizeof(((struct lustre_mdt_attrs *)0)->lma_self_fid));
LASSERTF(LMAI_RELEASED == 0x00000001UL, "found 0x%.8xUL\n",
- (unsigned)LMAI_RELEASED);
+ (unsigned int)LMAI_RELEASED);
LASSERTF(LMAC_HSM == 0x00000001UL, "found 0x%.8xUL\n",
- (unsigned)LMAC_HSM);
- LASSERTF(LMAC_SOM == 0x00000002UL, "found 0x%.8xUL\n",
- (unsigned)LMAC_SOM);
+ (unsigned int)LMAC_HSM);
LASSERTF(LMAC_NOT_IN_OI == 0x00000004UL, "found 0x%.8xUL\n",
- (unsigned)LMAC_NOT_IN_OI);
+ (unsigned int)LMAC_NOT_IN_OI);
LASSERTF(LMAC_FID_ON_OST == 0x00000008UL, "found 0x%.8xUL\n",
- (unsigned)LMAC_FID_ON_OST);
+ (unsigned int)LMAC_FID_ON_OST);
/* Checks for struct ost_id */
LASSERTF((int)sizeof(struct ost_id) == 16, "found %lld\n",
@@ -478,11 +456,11 @@ void lustre_assert_wire_constants(void)
LASSERTF(FID_SEQ_LOV_DEFAULT == 0xffffffffffffffffULL, "found 0x%.16llxULL\n",
(long long)FID_SEQ_LOV_DEFAULT);
LASSERTF(FID_OID_SPECIAL_BFL == 0x00000001UL, "found 0x%.8xUL\n",
- (unsigned)FID_OID_SPECIAL_BFL);
+ (unsigned int)FID_OID_SPECIAL_BFL);
LASSERTF(FID_OID_DOT_LUSTRE == 0x00000001UL, "found 0x%.8xUL\n",
- (unsigned)FID_OID_DOT_LUSTRE);
+ (unsigned int)FID_OID_DOT_LUSTRE);
LASSERTF(FID_OID_DOT_LUSTRE_OBF == 0x00000002UL, "found 0x%.8xUL\n",
- (unsigned)FID_OID_DOT_LUSTRE_OBF);
+ (unsigned int)FID_OID_DOT_LUSTRE_OBF);
/* Checks for struct lu_dirent */
LASSERTF((int)sizeof(struct lu_dirent) == 32, "found %lld\n",
@@ -512,11 +490,11 @@ void lustre_assert_wire_constants(void)
LASSERTF((int)sizeof(((struct lu_dirent *)0)->lde_name[0]) == 1, "found %lld\n",
(long long)(int)sizeof(((struct lu_dirent *)0)->lde_name[0]));
LASSERTF(LUDA_FID == 0x00000001UL, "found 0x%.8xUL\n",
- (unsigned)LUDA_FID);
+ (unsigned int)LUDA_FID);
LASSERTF(LUDA_TYPE == 0x00000002UL, "found 0x%.8xUL\n",
- (unsigned)LUDA_TYPE);
+ (unsigned int)LUDA_TYPE);
LASSERTF(LUDA_64BITHASH == 0x00000004UL, "found 0x%.8xUL\n",
- (unsigned)LUDA_64BITHASH);
+ (unsigned int)LUDA_64BITHASH);
/* Checks for struct luda_type */
LASSERTF((int)sizeof(struct luda_type) == 2, "found %lld\n",
@@ -635,10 +613,18 @@ void lustre_assert_wire_constants(void)
(long long)(int)offsetof(struct ptlrpc_body_v3, pb_last_xid));
LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_last_xid) == 8, "found %lld\n",
(long long)(int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_last_xid));
- LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_last_seen) == 32, "found %lld\n",
- (long long)(int)offsetof(struct ptlrpc_body_v3, pb_last_seen));
- LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_last_seen) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_last_seen));
+ LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_tag) == 32, "found %lld\n",
+ (long long)(int)offsetof(struct ptlrpc_body_v3, pb_tag));
+ LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_tag) == 2, "found %lld\n",
+ (long long)(int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_tag));
+ LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_padding0) == 34, "found %lld\n",
+ (long long)(int)offsetof(struct ptlrpc_body_v3, pb_padding0));
+ LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_padding0) == 2, "found %lld\n",
+ (long long)(int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_padding0));
+ LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_padding1) == 36, "found %lld\n",
+ (long long)(int)offsetof(struct ptlrpc_body_v3, pb_padding1));
+ LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_padding1) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_padding1));
LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_last_committed) == 40, "found %lld\n",
(long long)(int)offsetof(struct ptlrpc_body_v3, pb_last_committed));
LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_last_committed) == 8, "found %lld\n",
@@ -680,10 +666,22 @@ void lustre_assert_wire_constants(void)
(long long)(int)offsetof(struct ptlrpc_body_v3, pb_pre_versions));
LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_pre_versions) == 32, "found %lld\n",
(long long)(int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_pre_versions));
- LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_padding) == 120, "found %lld\n",
- (long long)(int)offsetof(struct ptlrpc_body_v3, pb_padding));
- LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_padding) == 32, "found %lld\n",
- (long long)(int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_padding));
+ LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_mbits) == 120, "found %lld\n",
+ (long long)(int)offsetof(struct ptlrpc_body_v3, pb_mbits));
+ LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_mbits) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_mbits));
+ LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_padding64_0) == 128, "found %lld\n",
+ (long long)(int)offsetof(struct ptlrpc_body_v3, pb_padding64_0));
+ LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_padding64_0) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_padding64_0));
+ LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_padding64_1) == 136, "found %lld\n",
+ (long long)(int)offsetof(struct ptlrpc_body_v3, pb_padding64_1));
+ LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_padding64_1) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_padding64_1));
+ LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_padding64_2) == 144, "found %lld\n",
+ (long long)(int)offsetof(struct ptlrpc_body_v3, pb_padding64_2));
+ LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_padding64_2) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_padding64_2));
CLASSERT(LUSTRE_JOBID_SIZE == 32);
LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_jobid) == 152, "found %lld\n",
(long long)(int)offsetof(struct ptlrpc_body_v3, pb_jobid));
@@ -713,10 +711,18 @@ void lustre_assert_wire_constants(void)
(int)offsetof(struct ptlrpc_body_v3, pb_last_xid), (int)offsetof(struct ptlrpc_body_v2, pb_last_xid));
LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_last_xid) == (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_last_xid), "%d != %d\n",
(int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_last_xid), (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_last_xid));
- LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_last_seen) == (int)offsetof(struct ptlrpc_body_v2, pb_last_seen), "%d != %d\n",
- (int)offsetof(struct ptlrpc_body_v3, pb_last_seen), (int)offsetof(struct ptlrpc_body_v2, pb_last_seen));
- LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_last_seen) == (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_last_seen), "%d != %d\n",
- (int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_last_seen), (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_last_seen));
+ LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_tag) == (int)offsetof(struct ptlrpc_body_v2, pb_tag), "%d != %d\n",
+ (int)offsetof(struct ptlrpc_body_v3, pb_tag), (int)offsetof(struct ptlrpc_body_v2, pb_tag));
+ LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_tag) == (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_tag), "%d != %d\n",
+ (int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_tag), (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_tag));
+ LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_padding0) == (int)offsetof(struct ptlrpc_body_v2, pb_padding0), "%d != %d\n",
+ (int)offsetof(struct ptlrpc_body_v3, pb_padding0), (int)offsetof(struct ptlrpc_body_v2, pb_padding0));
+ LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_padding0) == (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_padding0), "%d != %d\n",
+ (int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_padding0), (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_padding0));
+ LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_padding1) == (int)offsetof(struct ptlrpc_body_v2, pb_padding1), "%d != %d\n",
+ (int)offsetof(struct ptlrpc_body_v3, pb_padding1), (int)offsetof(struct ptlrpc_body_v2, pb_padding1));
+ LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_padding1) == (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_padding1), "%d != %d\n",
+ (int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_padding1), (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_padding1));
LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_last_committed) == (int)offsetof(struct ptlrpc_body_v2, pb_last_committed), "%d != %d\n",
(int)offsetof(struct ptlrpc_body_v3, pb_last_committed), (int)offsetof(struct ptlrpc_body_v2, pb_last_committed));
LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_last_committed) == (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_last_committed), "%d != %d\n",
@@ -757,10 +763,22 @@ void lustre_assert_wire_constants(void)
(int)offsetof(struct ptlrpc_body_v3, pb_pre_versions), (int)offsetof(struct ptlrpc_body_v2, pb_pre_versions));
LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_pre_versions) == (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_pre_versions), "%d != %d\n",
(int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_pre_versions), (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_pre_versions));
- LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_padding) == (int)offsetof(struct ptlrpc_body_v2, pb_padding), "%d != %d\n",
- (int)offsetof(struct ptlrpc_body_v3, pb_padding), (int)offsetof(struct ptlrpc_body_v2, pb_padding));
- LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_padding) == (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_padding), "%d != %d\n",
- (int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_padding), (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_padding));
+ LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_mbits) == (int)offsetof(struct ptlrpc_body_v2, pb_mbits), "%d != %d\n",
+ (int)offsetof(struct ptlrpc_body_v3, pb_mbits), (int)offsetof(struct ptlrpc_body_v2, pb_mbits));
+ LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_mbits) == (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_mbits), "%d != %d\n",
+ (int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_mbits), (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_mbits));
+ LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_padding64_0) == (int)offsetof(struct ptlrpc_body_v2, pb_padding64_0), "%d != %d\n",
+ (int)offsetof(struct ptlrpc_body_v3, pb_padding64_0), (int)offsetof(struct ptlrpc_body_v2, pb_padding64_0));
+ LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_padding64_0) == (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_padding64_0), "%d != %d\n",
+ (int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_padding64_0), (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_padding64_0));
+ LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_padding64_1) == (int)offsetof(struct ptlrpc_body_v2, pb_padding64_1), "%d != %d\n",
+ (int)offsetof(struct ptlrpc_body_v3, pb_padding64_1), (int)offsetof(struct ptlrpc_body_v2, pb_padding64_1));
+ LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_padding64_1) == (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_padding64_1), "%d != %d\n",
+ (int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_padding64_1), (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_padding64_1));
+ LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_padding64_2) == (int)offsetof(struct ptlrpc_body_v2, pb_padding64_2), "%d != %d\n",
+ (int)offsetof(struct ptlrpc_body_v3, pb_padding64_2), (int)offsetof(struct ptlrpc_body_v2, pb_padding64_2));
+ LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_padding64_2) == (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_padding64_2), "%d != %d\n",
+ (int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_padding64_2), (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_padding64_2));
LASSERTF(MSG_PTLRPC_BODY_OFF == 0, "found %lld\n",
(long long)MSG_PTLRPC_BODY_OFF);
LASSERTF(REQ_REC_OFF == 1, "found %lld\n",
@@ -802,41 +820,41 @@ void lustre_assert_wire_constants(void)
LASSERTF(MSGHDR_CKSUM_INCOMPAT18 == 2, "found %lld\n",
(long long)MSGHDR_CKSUM_INCOMPAT18);
LASSERTF(MSG_OP_FLAG_MASK == 0xffff0000UL, "found 0x%.8xUL\n",
- (unsigned)MSG_OP_FLAG_MASK);
+ (unsigned int)MSG_OP_FLAG_MASK);
LASSERTF(MSG_OP_FLAG_SHIFT == 16, "found %lld\n",
(long long)MSG_OP_FLAG_SHIFT);
LASSERTF(MSG_GEN_FLAG_MASK == 0x0000ffffUL, "found 0x%.8xUL\n",
- (unsigned)MSG_GEN_FLAG_MASK);
+ (unsigned int)MSG_GEN_FLAG_MASK);
LASSERTF(MSG_LAST_REPLAY == 0x00000001UL, "found 0x%.8xUL\n",
- (unsigned)MSG_LAST_REPLAY);
+ (unsigned int)MSG_LAST_REPLAY);
LASSERTF(MSG_RESENT == 0x00000002UL, "found 0x%.8xUL\n",
- (unsigned)MSG_RESENT);
+ (unsigned int)MSG_RESENT);
LASSERTF(MSG_REPLAY == 0x00000004UL, "found 0x%.8xUL\n",
- (unsigned)MSG_REPLAY);
+ (unsigned int)MSG_REPLAY);
LASSERTF(MSG_DELAY_REPLAY == 0x00000010UL, "found 0x%.8xUL\n",
- (unsigned)MSG_DELAY_REPLAY);
+ (unsigned int)MSG_DELAY_REPLAY);
LASSERTF(MSG_VERSION_REPLAY == 0x00000020UL, "found 0x%.8xUL\n",
- (unsigned)MSG_VERSION_REPLAY);
+ (unsigned int)MSG_VERSION_REPLAY);
LASSERTF(MSG_REQ_REPLAY_DONE == 0x00000040UL, "found 0x%.8xUL\n",
- (unsigned)MSG_REQ_REPLAY_DONE);
+ (unsigned int)MSG_REQ_REPLAY_DONE);
LASSERTF(MSG_LOCK_REPLAY_DONE == 0x00000080UL, "found 0x%.8xUL\n",
- (unsigned)MSG_LOCK_REPLAY_DONE);
+ (unsigned int)MSG_LOCK_REPLAY_DONE);
LASSERTF(MSG_CONNECT_RECOVERING == 0x00000001UL, "found 0x%.8xUL\n",
- (unsigned)MSG_CONNECT_RECOVERING);
+ (unsigned int)MSG_CONNECT_RECOVERING);
LASSERTF(MSG_CONNECT_RECONNECT == 0x00000002UL, "found 0x%.8xUL\n",
- (unsigned)MSG_CONNECT_RECONNECT);
+ (unsigned int)MSG_CONNECT_RECONNECT);
LASSERTF(MSG_CONNECT_REPLAYABLE == 0x00000004UL, "found 0x%.8xUL\n",
- (unsigned)MSG_CONNECT_REPLAYABLE);
+ (unsigned int)MSG_CONNECT_REPLAYABLE);
LASSERTF(MSG_CONNECT_LIBCLIENT == 0x00000010UL, "found 0x%.8xUL\n",
- (unsigned)MSG_CONNECT_LIBCLIENT);
+ (unsigned int)MSG_CONNECT_LIBCLIENT);
LASSERTF(MSG_CONNECT_INITIAL == 0x00000020UL, "found 0x%.8xUL\n",
- (unsigned)MSG_CONNECT_INITIAL);
+ (unsigned int)MSG_CONNECT_INITIAL);
LASSERTF(MSG_CONNECT_ASYNC == 0x00000040UL, "found 0x%.8xUL\n",
- (unsigned)MSG_CONNECT_ASYNC);
+ (unsigned int)MSG_CONNECT_ASYNC);
LASSERTF(MSG_CONNECT_NEXT_VER == 0x00000080UL, "found 0x%.8xUL\n",
- (unsigned)MSG_CONNECT_NEXT_VER);
+ (unsigned int)MSG_CONNECT_NEXT_VER);
LASSERTF(MSG_CONNECT_TRANSNO == 0x00000100UL, "found 0x%.8xUL\n",
- (unsigned)MSG_CONNECT_TRANSNO);
+ (unsigned int)MSG_CONNECT_TRANSNO);
/* Checks for struct obd_connect_data */
LASSERTF((int)sizeof(struct obd_connect_data) == 192, "found %lld\n",
@@ -905,14 +923,22 @@ void lustre_assert_wire_constants(void)
(long long)(int)offsetof(struct obd_connect_data, ocd_maxbytes));
LASSERTF((int)sizeof(((struct obd_connect_data *)0)->ocd_maxbytes) == 8, "found %lld\n",
(long long)(int)sizeof(((struct obd_connect_data *)0)->ocd_maxbytes));
- LASSERTF((int)offsetof(struct obd_connect_data, padding1) == 72, "found %lld\n",
+ LASSERTF((int)offsetof(struct obd_connect_data, ocd_maxmodrpcs) == 72, "found %lld\n",
+ (long long)(int)offsetof(struct obd_connect_data, ocd_maxmodrpcs));
+ LASSERTF((int)sizeof(((struct obd_connect_data *)0)->ocd_maxmodrpcs) == 2, "found %lld\n",
+ (long long)(int)sizeof(((struct obd_connect_data *)0)->ocd_maxmodrpcs));
+ LASSERTF((int)offsetof(struct obd_connect_data, padding0) == 74, "found %lld\n",
+ (long long)(int)offsetof(struct obd_connect_data, padding0));
+ LASSERTF((int)sizeof(((struct obd_connect_data *)0)->padding0) == 2, "found %lld\n",
+ (long long)(int)sizeof(((struct obd_connect_data *)0)->padding0));
+ LASSERTF((int)offsetof(struct obd_connect_data, padding1) == 76, "found %lld\n",
(long long)(int)offsetof(struct obd_connect_data, padding1));
- LASSERTF((int)sizeof(((struct obd_connect_data *)0)->padding1) == 8, "found %lld\n",
+ LASSERTF((int)sizeof(((struct obd_connect_data *)0)->padding1) == 4, "found %lld\n",
(long long)(int)sizeof(((struct obd_connect_data *)0)->padding1));
- LASSERTF((int)offsetof(struct obd_connect_data, padding2) == 80, "found %lld\n",
- (long long)(int)offsetof(struct obd_connect_data, padding2));
- LASSERTF((int)sizeof(((struct obd_connect_data *)0)->padding2) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct obd_connect_data *)0)->padding2));
+ LASSERTF((int)offsetof(struct obd_connect_data, ocd_connect_flags2) == 80, "found %lld\n",
+ (long long)(int)offsetof(struct obd_connect_data, ocd_connect_flags2));
+ LASSERTF((int)sizeof(((struct obd_connect_data *)0)->ocd_connect_flags2) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct obd_connect_data *)0)->ocd_connect_flags2));
LASSERTF((int)offsetof(struct obd_connect_data, padding3) == 88, "found %lld\n",
(long long)(int)offsetof(struct obd_connect_data, padding3));
LASSERTF((int)sizeof(((struct obd_connect_data *)0)->padding3) == 8, "found %lld\n",
@@ -1075,14 +1101,24 @@ void lustre_assert_wire_constants(void)
OBD_CONNECT_LFSCK);
LASSERTF(OBD_CONNECT_UNLINK_CLOSE == 0x100000000000000ULL, "found 0x%.16llxULL\n",
OBD_CONNECT_UNLINK_CLOSE);
+ LASSERTF(OBD_CONNECT_MULTIMODRPCS == 0x200000000000000ULL, "found 0x%.16llxULL\n",
+ OBD_CONNECT_MULTIMODRPCS);
LASSERTF(OBD_CONNECT_DIR_STRIPE == 0x400000000000000ULL, "found 0x%.16llxULL\n",
OBD_CONNECT_DIR_STRIPE);
+ LASSERTF(OBD_CONNECT_SUBTREE == 0x800000000000000ULL, "found 0x%.16llxULL\n",
+ OBD_CONNECT_SUBTREE);
+ LASSERTF(OBD_CONNECT_LOCK_AHEAD == 0x1000000000000000ULL, "found 0x%.16llxULL\n",
+ OBD_CONNECT_LOCK_AHEAD);
+ LASSERTF(OBD_CONNECT_OBDOPACK == 0x4000000000000000ULL, "found 0x%.16llxULL\n",
+ OBD_CONNECT_OBDOPACK);
+ LASSERTF(OBD_CONNECT_FLAGS2 == 0x8000000000000000ULL, "found 0x%.16llxULL\n",
+ OBD_CONNECT_FLAGS2);
LASSERTF(OBD_CKSUM_CRC32 == 0x00000001UL, "found 0x%.8xUL\n",
- (unsigned)OBD_CKSUM_CRC32);
+ (unsigned int)OBD_CKSUM_CRC32);
LASSERTF(OBD_CKSUM_ADLER == 0x00000002UL, "found 0x%.8xUL\n",
- (unsigned)OBD_CKSUM_ADLER);
+ (unsigned int)OBD_CKSUM_ADLER);
LASSERTF(OBD_CKSUM_CRC32C == 0x00000004UL, "found 0x%.8xUL\n",
- (unsigned)OBD_CKSUM_CRC32C);
+ (unsigned int)OBD_CKSUM_CRC32C);
/* Checks for struct obdo */
LASSERTF((int)sizeof(struct obdo) == 208, "found %lld\n",
@@ -1239,8 +1275,6 @@ void lustre_assert_wire_constants(void)
OBD_MD_FLCKSUM);
LASSERTF(OBD_MD_FLQOS == (0x00200000ULL), "found 0x%.16llxULL\n",
OBD_MD_FLQOS);
- LASSERTF(OBD_MD_FLCOOKIE == (0x00800000ULL), "found 0x%.16llxULL\n",
- OBD_MD_FLCOOKIE);
LASSERTF(OBD_MD_FLGROUP == (0x01000000ULL), "found 0x%.16llxULL\n",
OBD_MD_FLGROUP);
LASSERTF(OBD_MD_FLFID == (0x02000000ULL), "found 0x%.16llxULL\n",
@@ -1394,13 +1428,13 @@ void lustre_assert_wire_constants(void)
(long long)(int)sizeof(((struct lov_mds_md_v3 *)0)->lmm_objects[0]));
CLASSERT(LOV_MAGIC_V3 == (0x0BD30000 | 0x0BD0));
LASSERTF(LOV_PATTERN_RAID0 == 0x00000001UL, "found 0x%.8xUL\n",
- (unsigned)LOV_PATTERN_RAID0);
+ (unsigned int)LOV_PATTERN_RAID0);
LASSERTF(LOV_PATTERN_RAID1 == 0x00000002UL, "found 0x%.8xUL\n",
- (unsigned)LOV_PATTERN_RAID1);
+ (unsigned int)LOV_PATTERN_RAID1);
LASSERTF(LOV_PATTERN_FIRST == 0x00000100UL, "found 0x%.8xUL\n",
- (unsigned)LOV_PATTERN_FIRST);
+ (unsigned int)LOV_PATTERN_FIRST);
LASSERTF(LOV_PATTERN_CMOBD == 0x00000200UL, "found 0x%.8xUL\n",
- (unsigned)LOV_PATTERN_CMOBD);
+ (unsigned int)LOV_PATTERN_CMOBD);
/* Checks for struct lmv_mds_md_v1 */
LASSERTF((int)sizeof(struct lmv_mds_md_v1) == 56, "found %lld\n",
@@ -1542,6 +1576,8 @@ void lustre_assert_wire_constants(void)
(long long)(int)offsetof(struct obd_ioobj, ioo_bufcnt));
LASSERTF((int)sizeof(((struct obd_ioobj *)0)->ioo_bufcnt) == 4, "found %lld\n",
(long long)(int)sizeof(((struct obd_ioobj *)0)->ioo_bufcnt));
+ LASSERTF(IOOBJ_MAX_BRW_BITS == 16, "found %lld\n",
+ (long long)IOOBJ_MAX_BRW_BITS);
/* Checks for union lquota_id */
LASSERTF((int)sizeof(union lquota_id) == 16, "found %lld\n",
@@ -1817,10 +1853,10 @@ void lustre_assert_wire_constants(void)
(long long)(int)offsetof(struct mdt_body, mbo_max_mdsize));
LASSERTF((int)sizeof(((struct mdt_body *)0)->mbo_max_mdsize) == 4, "found %lld\n",
(long long)(int)sizeof(((struct mdt_body *)0)->mbo_max_mdsize));
- LASSERTF((int)offsetof(struct mdt_body, mbo_max_cookiesize) == 160, "found %lld\n",
- (long long)(int)offsetof(struct mdt_body, mbo_max_cookiesize));
- LASSERTF((int)sizeof(((struct mdt_body *)0)->mbo_max_cookiesize) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_body *)0)->mbo_max_cookiesize));
+ LASSERTF((int)offsetof(struct mdt_body, mbo_unused3) == 160, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_body, mbo_unused3));
+ LASSERTF((int)sizeof(((struct mdt_body *)0)->mbo_unused3) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_body *)0)->mbo_unused3));
LASSERTF((int)offsetof(struct mdt_body, mbo_uid_h) == 164, "found %lld\n",
(long long)(int)offsetof(struct mdt_body, mbo_uid_h));
LASSERTF((int)sizeof(((struct mdt_body *)0)->mbo_uid_h) == 4, "found %lld\n",
@@ -1857,12 +1893,6 @@ void lustre_assert_wire_constants(void)
MDS_FMODE_CLOSED);
LASSERTF(MDS_FMODE_EXEC == 000000000004UL, "found 0%.11oUL\n",
MDS_FMODE_EXEC);
- LASSERTF(MDS_FMODE_EPOCH == 000001000000UL, "found 0%.11oUL\n",
- MDS_FMODE_EPOCH);
- LASSERTF(MDS_FMODE_TRUNC == 000002000000UL, "found 0%.11oUL\n",
- MDS_FMODE_TRUNC);
- LASSERTF(MDS_FMODE_SOM == 000004000000UL, "found 0%.11oUL\n",
- MDS_FMODE_SOM);
LASSERTF(MDS_OPEN_CREATED == 000000000010UL, "found 0%.11oUL\n",
MDS_OPEN_CREATED);
LASSERTF(MDS_OPEN_CROSS == 000000000020UL, "found 0%.11oUL\n",
@@ -1905,10 +1935,20 @@ void lustre_assert_wire_constants(void)
LUSTRE_IMMUTABLE_FL);
LASSERTF(LUSTRE_APPEND_FL == 0x00000020, "found 0x%.8x\n",
LUSTRE_APPEND_FL);
+ LASSERTF(LUSTRE_NODUMP_FL == 0x00000040, "found 0x%.8x\n",
+ LUSTRE_NODUMP_FL);
LASSERTF(LUSTRE_NOATIME_FL == 0x00000080, "found 0x%.8x\n",
LUSTRE_NOATIME_FL);
+ LASSERTF(LUSTRE_INDEX_FL == 0x00001000, "found 0x%.8x\n",
+ LUSTRE_INDEX_FL);
LASSERTF(LUSTRE_DIRSYNC_FL == 0x00010000, "found 0x%.8x\n",
LUSTRE_DIRSYNC_FL);
+ LASSERTF(LUSTRE_TOPDIR_FL == 0x00020000, "found 0x%.8x\n",
+ LUSTRE_TOPDIR_FL);
+ LASSERTF(LUSTRE_DIRECTIO_FL == 0x00100000, "found 0x%.8x\n",
+ LUSTRE_DIRECTIO_FL);
+ LASSERTF(LUSTRE_INLINE_DATA_FL == 0x10000000, "found 0x%.8x\n",
+ LUSTRE_INLINE_DATA_FL);
LASSERTF(MDS_INODELOCK_LOOKUP == 0x000001, "found 0x%.8x\n",
MDS_INODELOCK_LOOKUP);
LASSERTF(MDS_INODELOCK_UPDATE == 0x000002, "found 0x%.8x\n",
@@ -1921,22 +1961,22 @@ void lustre_assert_wire_constants(void)
/* Checks for struct mdt_ioepoch */
LASSERTF((int)sizeof(struct mdt_ioepoch) == 24, "found %lld\n",
(long long)(int)sizeof(struct mdt_ioepoch));
- LASSERTF((int)offsetof(struct mdt_ioepoch, handle) == 0, "found %lld\n",
- (long long)(int)offsetof(struct mdt_ioepoch, handle));
- LASSERTF((int)sizeof(((struct mdt_ioepoch *)0)->handle) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_ioepoch *)0)->handle));
- LASSERTF((int)offsetof(struct mdt_ioepoch, ioepoch) == 8, "found %lld\n",
- (long long)(int)offsetof(struct mdt_ioepoch, ioepoch));
- LASSERTF((int)sizeof(((struct mdt_ioepoch *)0)->ioepoch) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_ioepoch *)0)->ioepoch));
- LASSERTF((int)offsetof(struct mdt_ioepoch, flags) == 16, "found %lld\n",
- (long long)(int)offsetof(struct mdt_ioepoch, flags));
- LASSERTF((int)sizeof(((struct mdt_ioepoch *)0)->flags) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_ioepoch *)0)->flags));
- LASSERTF((int)offsetof(struct mdt_ioepoch, padding) == 20, "found %lld\n",
- (long long)(int)offsetof(struct mdt_ioepoch, padding));
- LASSERTF((int)sizeof(((struct mdt_ioepoch *)0)->padding) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_ioepoch *)0)->padding));
+ LASSERTF((int)offsetof(struct mdt_ioepoch, mio_handle) == 0, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_ioepoch, mio_handle));
+ LASSERTF((int)sizeof(((struct mdt_ioepoch *)0)->mio_handle) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_ioepoch *)0)->mio_handle));
+ LASSERTF((int)offsetof(struct mdt_ioepoch, mio_unused1) == 8, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_ioepoch, mio_unused1));
+ LASSERTF((int)sizeof(((struct mdt_ioepoch *)0)->mio_unused1) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_ioepoch *)0)->mio_unused1));
+ LASSERTF((int)offsetof(struct mdt_ioepoch, mio_unused2) == 16, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_ioepoch, mio_unused2));
+ LASSERTF((int)sizeof(((struct mdt_ioepoch *)0)->mio_unused2) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_ioepoch *)0)->mio_unused2));
+ LASSERTF((int)offsetof(struct mdt_ioepoch, mio_padding) == 20, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_ioepoch, mio_padding));
+ LASSERTF((int)sizeof(((struct mdt_ioepoch *)0)->mio_padding) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_ioepoch *)0)->mio_padding));
/* Checks for struct mdt_rec_setattr */
LASSERTF((int)sizeof(struct mdt_rec_setattr) == 136, "found %lld\n",
@@ -3520,21 +3560,21 @@ void lustre_assert_wire_constants(void)
LASSERTF((int)sizeof(((struct llogd_conn_body *)0)->lgdc_ctxt_idx) == 4, "found %lld\n",
(long long)(int)sizeof(((struct llogd_conn_body *)0)->lgdc_ctxt_idx));
- /* Checks for struct ll_fiemap_info_key */
+ /* Checks for struct fiemap_info_key */
LASSERTF((int)sizeof(struct ll_fiemap_info_key) == 248, "found %lld\n",
(long long)(int)sizeof(struct ll_fiemap_info_key));
- LASSERTF((int)offsetof(struct ll_fiemap_info_key, name[8]) == 8, "found %lld\n",
- (long long)(int)offsetof(struct ll_fiemap_info_key, name[8]));
- LASSERTF((int)sizeof(((struct ll_fiemap_info_key *)0)->name[8]) == 1, "found %lld\n",
- (long long)(int)sizeof(((struct ll_fiemap_info_key *)0)->name[8]));
- LASSERTF((int)offsetof(struct ll_fiemap_info_key, oa) == 8, "found %lld\n",
- (long long)(int)offsetof(struct ll_fiemap_info_key, oa));
- LASSERTF((int)sizeof(((struct ll_fiemap_info_key *)0)->oa) == 208, "found %lld\n",
- (long long)(int)sizeof(((struct ll_fiemap_info_key *)0)->oa));
- LASSERTF((int)offsetof(struct ll_fiemap_info_key, fiemap) == 216, "found %lld\n",
- (long long)(int)offsetof(struct ll_fiemap_info_key, fiemap));
- LASSERTF((int)sizeof(((struct ll_fiemap_info_key *)0)->fiemap) == 32, "found %lld\n",
- (long long)(int)sizeof(((struct ll_fiemap_info_key *)0)->fiemap));
+ LASSERTF((int)offsetof(struct ll_fiemap_info_key, lfik_name[8]) == 8, "found %lld\n",
+ (long long)(int)offsetof(struct ll_fiemap_info_key, lfik_name[8]));
+ LASSERTF((int)sizeof(((struct ll_fiemap_info_key *)0)->lfik_name[8]) == 1, "found %lld\n",
+ (long long)(int)sizeof(((struct ll_fiemap_info_key *)0)->lfik_name[8]));
+ LASSERTF((int)offsetof(struct ll_fiemap_info_key, lfik_oa) == 8, "found %lld\n",
+ (long long)(int)offsetof(struct ll_fiemap_info_key, lfik_oa));
+ LASSERTF((int)sizeof(((struct ll_fiemap_info_key *)0)->lfik_oa) == 208, "found %lld\n",
+ (long long)(int)sizeof(((struct ll_fiemap_info_key *)0)->lfik_oa));
+ LASSERTF((int)offsetof(struct ll_fiemap_info_key, lfik_fiemap) == 216, "found %lld\n",
+ (long long)(int)offsetof(struct ll_fiemap_info_key, lfik_fiemap));
+ LASSERTF((int)sizeof(((struct ll_fiemap_info_key *)0)->lfik_fiemap) == 32, "found %lld\n",
+ (long long)(int)sizeof(((struct ll_fiemap_info_key *)0)->lfik_fiemap));
/* Checks for struct mgs_target_info */
LASSERTF((int)sizeof(struct mgs_target_info) == 4544, "found %lld\n",
@@ -3670,64 +3710,64 @@ void lustre_assert_wire_constants(void)
LASSERTF((int)sizeof(((struct getinfo_fid2path *)0)->gf_path[0]) == 1, "found %lld\n",
(long long)(int)sizeof(((struct getinfo_fid2path *)0)->gf_path[0]));
- /* Checks for struct ll_user_fiemap */
- LASSERTF((int)sizeof(struct ll_user_fiemap) == 32, "found %lld\n",
- (long long)(int)sizeof(struct ll_user_fiemap));
- LASSERTF((int)offsetof(struct ll_user_fiemap, fm_start) == 0, "found %lld\n",
- (long long)(int)offsetof(struct ll_user_fiemap, fm_start));
- LASSERTF((int)sizeof(((struct ll_user_fiemap *)0)->fm_start) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct ll_user_fiemap *)0)->fm_start));
- LASSERTF((int)offsetof(struct ll_user_fiemap, fm_length) == 8, "found %lld\n",
- (long long)(int)offsetof(struct ll_user_fiemap, fm_length));
- LASSERTF((int)sizeof(((struct ll_user_fiemap *)0)->fm_length) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct ll_user_fiemap *)0)->fm_length));
- LASSERTF((int)offsetof(struct ll_user_fiemap, fm_flags) == 16, "found %lld\n",
- (long long)(int)offsetof(struct ll_user_fiemap, fm_flags));
- LASSERTF((int)sizeof(((struct ll_user_fiemap *)0)->fm_flags) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct ll_user_fiemap *)0)->fm_flags));
- LASSERTF((int)offsetof(struct ll_user_fiemap, fm_mapped_extents) == 20, "found %lld\n",
- (long long)(int)offsetof(struct ll_user_fiemap, fm_mapped_extents));
- LASSERTF((int)sizeof(((struct ll_user_fiemap *)0)->fm_mapped_extents) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct ll_user_fiemap *)0)->fm_mapped_extents));
- LASSERTF((int)offsetof(struct ll_user_fiemap, fm_extent_count) == 24, "found %lld\n",
- (long long)(int)offsetof(struct ll_user_fiemap, fm_extent_count));
- LASSERTF((int)sizeof(((struct ll_user_fiemap *)0)->fm_extent_count) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct ll_user_fiemap *)0)->fm_extent_count));
- LASSERTF((int)offsetof(struct ll_user_fiemap, fm_reserved) == 28, "found %lld\n",
- (long long)(int)offsetof(struct ll_user_fiemap, fm_reserved));
- LASSERTF((int)sizeof(((struct ll_user_fiemap *)0)->fm_reserved) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct ll_user_fiemap *)0)->fm_reserved));
- LASSERTF((int)offsetof(struct ll_user_fiemap, fm_extents) == 32, "found %lld\n",
- (long long)(int)offsetof(struct ll_user_fiemap, fm_extents));
- LASSERTF((int)sizeof(((struct ll_user_fiemap *)0)->fm_extents) == 0, "found %lld\n",
- (long long)(int)sizeof(((struct ll_user_fiemap *)0)->fm_extents));
+ /* Checks for struct fiemap */
+ LASSERTF((int)sizeof(struct fiemap) == 32, "found %lld\n",
+ (long long)(int)sizeof(struct fiemap));
+ LASSERTF((int)offsetof(struct fiemap, fm_start) == 0, "found %lld\n",
+ (long long)(int)offsetof(struct fiemap, fm_start));
+ LASSERTF((int)sizeof(((struct fiemap *)0)->fm_start) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct fiemap *)0)->fm_start));
+ LASSERTF((int)offsetof(struct fiemap, fm_length) == 8, "found %lld\n",
+ (long long)(int)offsetof(struct fiemap, fm_length));
+ LASSERTF((int)sizeof(((struct fiemap *)0)->fm_length) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct fiemap *)0)->fm_length));
+ LASSERTF((int)offsetof(struct fiemap, fm_flags) == 16, "found %lld\n",
+ (long long)(int)offsetof(struct fiemap, fm_flags));
+ LASSERTF((int)sizeof(((struct fiemap *)0)->fm_flags) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct fiemap *)0)->fm_flags));
+ LASSERTF((int)offsetof(struct fiemap, fm_mapped_extents) == 20, "found %lld\n",
+ (long long)(int)offsetof(struct fiemap, fm_mapped_extents));
+ LASSERTF((int)sizeof(((struct fiemap *)0)->fm_mapped_extents) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct fiemap *)0)->fm_mapped_extents));
+ LASSERTF((int)offsetof(struct fiemap, fm_extent_count) == 24, "found %lld\n",
+ (long long)(int)offsetof(struct fiemap, fm_extent_count));
+ LASSERTF((int)sizeof(((struct fiemap *)0)->fm_extent_count) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct fiemap *)0)->fm_extent_count));
+ LASSERTF((int)offsetof(struct fiemap, fm_reserved) == 28, "found %lld\n",
+ (long long)(int)offsetof(struct fiemap, fm_reserved));
+ LASSERTF((int)sizeof(((struct fiemap *)0)->fm_reserved) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct fiemap *)0)->fm_reserved));
+ LASSERTF((int)offsetof(struct fiemap, fm_extents) == 32, "found %lld\n",
+ (long long)(int)offsetof(struct fiemap, fm_extents));
+ LASSERTF((int)sizeof(((struct fiemap *)0)->fm_extents) == 0, "found %lld\n",
+ (long long)(int)sizeof(((struct fiemap *)0)->fm_extents));
CLASSERT(FIEMAP_FLAG_SYNC == 0x00000001);
CLASSERT(FIEMAP_FLAG_XATTR == 0x00000002);
CLASSERT(FIEMAP_FLAG_DEVICE_ORDER == 0x40000000);
- /* Checks for struct ll_fiemap_extent */
- LASSERTF((int)sizeof(struct ll_fiemap_extent) == 56, "found %lld\n",
- (long long)(int)sizeof(struct ll_fiemap_extent));
- LASSERTF((int)offsetof(struct ll_fiemap_extent, fe_logical) == 0, "found %lld\n",
- (long long)(int)offsetof(struct ll_fiemap_extent, fe_logical));
- LASSERTF((int)sizeof(((struct ll_fiemap_extent *)0)->fe_logical) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct ll_fiemap_extent *)0)->fe_logical));
- LASSERTF((int)offsetof(struct ll_fiemap_extent, fe_physical) == 8, "found %lld\n",
- (long long)(int)offsetof(struct ll_fiemap_extent, fe_physical));
- LASSERTF((int)sizeof(((struct ll_fiemap_extent *)0)->fe_physical) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct ll_fiemap_extent *)0)->fe_physical));
- LASSERTF((int)offsetof(struct ll_fiemap_extent, fe_length) == 16, "found %lld\n",
- (long long)(int)offsetof(struct ll_fiemap_extent, fe_length));
- LASSERTF((int)sizeof(((struct ll_fiemap_extent *)0)->fe_length) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct ll_fiemap_extent *)0)->fe_length));
- LASSERTF((int)offsetof(struct ll_fiemap_extent, fe_flags) == 40, "found %lld\n",
- (long long)(int)offsetof(struct ll_fiemap_extent, fe_flags));
- LASSERTF((int)sizeof(((struct ll_fiemap_extent *)0)->fe_flags) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct ll_fiemap_extent *)0)->fe_flags));
- LASSERTF((int)offsetof(struct ll_fiemap_extent, fe_device) == 44, "found %lld\n",
- (long long)(int)offsetof(struct ll_fiemap_extent, fe_device));
- LASSERTF((int)sizeof(((struct ll_fiemap_extent *)0)->fe_device) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct ll_fiemap_extent *)0)->fe_device));
+ /* Checks for struct fiemap_extent */
+ LASSERTF((int)sizeof(struct fiemap_extent) == 56, "found %lld\n",
+ (long long)(int)sizeof(struct fiemap_extent));
+ LASSERTF((int)offsetof(struct fiemap_extent, fe_logical) == 0, "found %lld\n",
+ (long long)(int)offsetof(struct fiemap_extent, fe_logical));
+ LASSERTF((int)sizeof(((struct fiemap_extent *)0)->fe_logical) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct fiemap_extent *)0)->fe_logical));
+ LASSERTF((int)offsetof(struct fiemap_extent, fe_physical) == 8, "found %lld\n",
+ (long long)(int)offsetof(struct fiemap_extent, fe_physical));
+ LASSERTF((int)sizeof(((struct fiemap_extent *)0)->fe_physical) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct fiemap_extent *)0)->fe_physical));
+ LASSERTF((int)offsetof(struct fiemap_extent, fe_length) == 16, "found %lld\n",
+ (long long)(int)offsetof(struct fiemap_extent, fe_length));
+ LASSERTF((int)sizeof(((struct fiemap_extent *)0)->fe_length) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct fiemap_extent *)0)->fe_length));
+ LASSERTF((int)offsetof(struct fiemap_extent, fe_flags) == 40, "found %lld\n",
+ (long long)(int)offsetof(struct fiemap_extent, fe_flags));
+ LASSERTF((int)sizeof(((struct fiemap_extent *)0)->fe_flags) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct fiemap_extent *)0)->fe_flags));
+ LASSERTF((int)offsetof(struct fiemap_extent, fe_reserved[0]) == 44, "found %lld\n",
+ (long long)(int)offsetof(struct fiemap_extent, fe_reserved[0]));
+ LASSERTF((int)sizeof(((struct fiemap_extent *)0)->fe_reserved[0]) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct fiemap_extent *)0)->fe_reserved[0]));
CLASSERT(FIEMAP_EXTENT_LAST == 0x00000001);
CLASSERT(FIEMAP_EXTENT_UNKNOWN == 0x00000002);
CLASSERT(FIEMAP_EXTENT_DELALLOC == 0x00000004);
@@ -4093,9 +4133,9 @@ void lustre_assert_wire_constants(void)
LASSERTF((int)sizeof(((struct hsm_request *)0)->hr_data_len) == 4, "found %lld\n",
(long long)(int)sizeof(((struct hsm_request *)0)->hr_data_len));
LASSERTF(HSM_FORCE_ACTION == 0x00000001UL, "found 0x%.8xUL\n",
- (unsigned)HSM_FORCE_ACTION);
+ (unsigned int)HSM_FORCE_ACTION);
LASSERTF(HSM_GHOST_COPY == 0x00000002UL, "found 0x%.8xUL\n",
- (unsigned)HSM_GHOST_COPY);
+ (unsigned int)HSM_GHOST_COPY);
/* Checks for struct hsm_user_request */
LASSERTF((int)sizeof(struct hsm_user_request) == 24, "found %lld\n",
diff --git a/drivers/staging/lustre/sysfs-fs-lustre b/drivers/staging/lustre/sysfs-fs-lustre
index 20206ba965af..8691c6543a9c 100644
--- a/drivers/staging/lustre/sysfs-fs-lustre
+++ b/drivers/staging/lustre/sysfs-fs-lustre
@@ -11,7 +11,7 @@ Description:
Shows if the lustre module has pinger support.
"on" means yes and "off" means no.
-What: /sys/fs/lustre/health
+What: /sys/fs/lustre/health_check
Date: May 2015
Contact: "Oleg Drokin" <oleg.drokin@intel.com>
Description: