diff options
Diffstat (limited to 'drivers/staging/lustre')
194 files changed, 8607 insertions, 10095 deletions
diff --git a/drivers/staging/lustre/include/linux/libcfs/curproc.h b/drivers/staging/lustre/include/linux/libcfs/curproc.h index be0675d8ff5e..1ea27c9e3708 100644 --- a/drivers/staging/lustre/include/linux/libcfs/curproc.h +++ b/drivers/staging/lustre/include/linux/libcfs/curproc.h @@ -53,7 +53,7 @@ #define current_pid() (current->pid) #define current_comm() (current->comm) -typedef __u32 cfs_cap_t; +typedef u32 cfs_cap_t; #define CFS_CAP_CHOWN 0 #define CFS_CAP_DAC_OVERRIDE 1 @@ -65,15 +65,15 @@ typedef __u32 cfs_cap_t; #define CFS_CAP_SYS_BOOT 23 #define CFS_CAP_SYS_RESOURCE 24 -#define CFS_CAP_FS_MASK ((1 << CFS_CAP_CHOWN) | \ - (1 << CFS_CAP_DAC_OVERRIDE) | \ - (1 << CFS_CAP_DAC_READ_SEARCH) | \ - (1 << CFS_CAP_FOWNER) | \ - (1 << CFS_CAP_FSETID) | \ - (1 << CFS_CAP_LINUX_IMMUTABLE) | \ - (1 << CFS_CAP_SYS_ADMIN) | \ - (1 << CFS_CAP_SYS_BOOT) | \ - (1 << CFS_CAP_SYS_RESOURCE)) +#define CFS_CAP_FS_MASK (BIT(CFS_CAP_CHOWN) | \ + BIT(CFS_CAP_DAC_OVERRIDE) | \ + BIT(CFS_CAP_DAC_READ_SEARCH) | \ + BIT(CFS_CAP_FOWNER) | \ + BIT(CFS_CAP_FSETID) | \ + BIT(CFS_CAP_LINUX_IMMUTABLE) | \ + BIT(CFS_CAP_SYS_ADMIN) | \ + BIT(CFS_CAP_SYS_BOOT) | \ + BIT(CFS_CAP_SYS_RESOURCE)) void cfs_cap_raise(cfs_cap_t cap); void cfs_cap_lower(cfs_cap_t cap); diff --git a/drivers/staging/lustre/include/linux/libcfs/libcfs.h b/drivers/staging/lustre/include/linux/libcfs/libcfs.h index 3b92d38d37e2..cc2c0e97bb7e 100644 --- a/drivers/staging/lustre/include/linux/libcfs/libcfs.h +++ b/drivers/staging/lustre/include/linux/libcfs/libcfs.h @@ -61,7 +61,7 @@ sigset_t cfs_block_allsigs(void); sigset_t cfs_block_sigs(unsigned long sigs); sigset_t cfs_block_sigsinv(unsigned long sigs); -void cfs_restore_sigs(sigset_t); +void cfs_restore_sigs(sigset_t sigset); void cfs_clear_sigpending(void); /* @@ -71,7 +71,7 @@ void cfs_clear_sigpending(void); /* returns a random 32-bit integer */ unsigned int cfs_rand(void); /* seed the generator */ -void cfs_srand(unsigned int, unsigned int); +void cfs_srand(unsigned int seed1, unsigned int seed2); void cfs_get_random_bytes(void *buf, int size); #include "libcfs_debug.h" @@ -125,7 +125,6 @@ extern struct miscdevice libcfs_dev; /** * The path of debug log dump upcall script. */ -extern char lnet_upcall[1024]; extern char lnet_debug_log_upcall[1024]; extern struct cfs_wi_sched *cfs_sched_rehash; diff --git a/drivers/staging/lustre/include/linux/libcfs/libcfs_cpu.h b/drivers/staging/lustre/include/linux/libcfs/libcfs_cpu.h index 81d8079e3b5e..6d8752a368fa 100644 --- a/drivers/staging/lustre/include/linux/libcfs/libcfs_cpu.h +++ b/drivers/staging/lustre/include/linux/libcfs/libcfs_cpu.h @@ -92,7 +92,7 @@ struct cfs_cpt_table { /* node mask */ nodemask_t ctb_nodemask; /* version */ - __u64 ctb_version; + u64 ctb_version; }; static inline cpumask_t * @@ -211,7 +211,7 @@ int cfs_cpu_ht_nsiblings(int cpu); */ void *cfs_percpt_alloc(struct cfs_cpt_table *cptab, unsigned int size); /* - * destory per-cpu-partition variable + * destroy per-cpu-partition variable */ void cfs_percpt_free(void *vars); int cfs_percpt_number(void *vars); diff --git a/drivers/staging/lustre/include/linux/libcfs/libcfs_crypto.h b/drivers/staging/lustre/include/linux/libcfs/libcfs_crypto.h index 02be7d7608a5..8f34c5ddc63e 100644 --- a/drivers/staging/lustre/include/linux/libcfs/libcfs_crypto.h +++ b/drivers/staging/lustre/include/linux/libcfs/libcfs_crypto.h @@ -29,10 +29,12 @@ #define _LIBCFS_CRYPTO_H struct cfs_crypto_hash_type { - char *cht_name; /**< hash algorithm name, equal to - * format name for crypto api */ - unsigned int cht_key; /**< init key by default (valid for - * 4 bytes context like crc32, adler */ + char *cht_name; /*< hash algorithm name, equal to + * format name for crypto api + */ + unsigned int cht_key; /*< init key by default (valid for + * 4 bytes context like crc32, adler + */ unsigned int cht_size; /**< hash digest size */ }; @@ -135,7 +137,7 @@ static inline unsigned char cfs_crypto_hash_alg(const char *algname) enum cfs_crypto_hash_alg hash_alg; for (hash_alg = 0; hash_alg < CFS_HASH_ALG_MAX; hash_alg++) - if (strcmp(hash_types[hash_alg].cht_name, algname) == 0) + if (!strcmp(hash_types[hash_alg].cht_name, algname)) return hash_alg; return CFS_HASH_ALG_UNKNOWN; diff --git a/drivers/staging/lustre/include/linux/libcfs/libcfs_fail.h b/drivers/staging/lustre/include/linux/libcfs/libcfs_fail.h index bdbbe934584c..fedb46dff696 100644 --- a/drivers/staging/lustre/include/linux/libcfs/libcfs_fail.h +++ b/drivers/staging/lustre/include/linux/libcfs/libcfs_fail.h @@ -39,8 +39,8 @@ extern int cfs_fail_err; extern wait_queue_head_t cfs_race_waitq; extern int cfs_race_state; -int __cfs_fail_check_set(__u32 id, __u32 value, int set); -int __cfs_fail_timeout_set(__u32 id, __u32 value, int ms, int set); +int __cfs_fail_check_set(u32 id, u32 value, int set); +int __cfs_fail_timeout_set(u32 id, u32 value, int ms, int set); enum { CFS_FAIL_LOC_NOSET = 0, @@ -55,11 +55,11 @@ enum { #define CFS_FAILED_BIT 30 /* CFS_FAILED is 0x40000000 */ -#define CFS_FAILED (1 << CFS_FAILED_BIT) +#define CFS_FAILED BIT(CFS_FAILED_BIT) #define CFS_FAIL_ONCE_BIT 31 /* CFS_FAIL_ONCE is 0x80000000 */ -#define CFS_FAIL_ONCE (1 << CFS_FAIL_ONCE_BIT) +#define CFS_FAIL_ONCE BIT(CFS_FAIL_ONCE_BIT) /* The following flags aren't made to be combined */ #define CFS_FAIL_SKIP 0x20000000 /* skip N times then fail */ @@ -69,14 +69,14 @@ enum { #define CFS_FAULT 0x02000000 /* match any CFS_FAULT_CHECK */ -static inline bool CFS_FAIL_PRECHECK(__u32 id) +static inline bool CFS_FAIL_PRECHECK(u32 id) { - return cfs_fail_loc != 0 && + return cfs_fail_loc && ((cfs_fail_loc & CFS_FAIL_MASK_LOC) == (id & CFS_FAIL_MASK_LOC) || - (cfs_fail_loc & id & CFS_FAULT)); + (cfs_fail_loc & id & CFS_FAULT)); } -static inline int cfs_fail_check_set(__u32 id, __u32 value, +static inline int cfs_fail_check_set(u32 id, u32 value, int set, int quiet) { int ret = 0; @@ -103,28 +103,34 @@ static inline int cfs_fail_check_set(__u32 id, __u32 value, #define CFS_FAIL_CHECK_QUIET(id) \ cfs_fail_check_set(id, 0, CFS_FAIL_LOC_NOSET, 1) -/* If id hit cfs_fail_loc and cfs_fail_val == (-1 or value) return 1, - * otherwise return 0 */ +/* + * If id hit cfs_fail_loc and cfs_fail_val == (-1 or value) return 1, + * otherwise return 0 + */ #define CFS_FAIL_CHECK_VALUE(id, value) \ cfs_fail_check_set(id, value, CFS_FAIL_LOC_VALUE, 0) #define CFS_FAIL_CHECK_VALUE_QUIET(id, value) \ cfs_fail_check_set(id, value, CFS_FAIL_LOC_VALUE, 1) -/* If id hit cfs_fail_loc, cfs_fail_loc |= value and return 1, - * otherwise return 0 */ +/* + * If id hit cfs_fail_loc, cfs_fail_loc |= value and return 1, + * otherwise return 0 + */ #define CFS_FAIL_CHECK_ORSET(id, value) \ cfs_fail_check_set(id, value, CFS_FAIL_LOC_ORSET, 0) #define CFS_FAIL_CHECK_ORSET_QUIET(id, value) \ cfs_fail_check_set(id, value, CFS_FAIL_LOC_ORSET, 1) -/* If id hit cfs_fail_loc, cfs_fail_loc = value and return 1, - * otherwise return 0 */ +/* + * If id hit cfs_fail_loc, cfs_fail_loc = value and return 1, + * otherwise return 0 + */ #define CFS_FAIL_CHECK_RESET(id, value) \ cfs_fail_check_set(id, value, CFS_FAIL_LOC_RESET, 0) #define CFS_FAIL_CHECK_RESET_QUIET(id, value) \ cfs_fail_check_set(id, value, CFS_FAIL_LOC_RESET, 1) -static inline int cfs_fail_timeout_set(__u32 id, __u32 value, int ms, int set) +static inline int cfs_fail_timeout_set(u32 id, u32 value, int ms, int set) { if (unlikely(CFS_FAIL_PRECHECK(id))) return __cfs_fail_timeout_set(id, value, ms, set); @@ -138,8 +144,10 @@ static inline int cfs_fail_timeout_set(__u32 id, __u32 value, int ms, int set) #define CFS_FAIL_TIMEOUT_MS(id, ms) \ cfs_fail_timeout_set(id, 0, ms, CFS_FAIL_LOC_NOSET) -/* If id hit cfs_fail_loc, cfs_fail_loc |= value and - * sleep seconds or milliseconds */ +/* + * If id hit cfs_fail_loc, cfs_fail_loc |= value and + * sleep seconds or milliseconds + */ #define CFS_FAIL_TIMEOUT_ORSET(id, value, secs) \ cfs_fail_timeout_set(id, value, secs * 1000, CFS_FAIL_LOC_ORSET) @@ -152,13 +160,14 @@ static inline int cfs_fail_timeout_set(__u32 id, __u32 value, int ms, int set) #define CFS_FAULT_CHECK(id) \ CFS_FAIL_CHECK(CFS_FAULT | (id)) -/* The idea here is to synchronise two threads to force a race. The +/* + * The idea here is to synchronise two threads to force a race. The * first thread that calls this with a matching fail_loc is put to * sleep. The next thread that calls with the same fail_loc wakes up - * the first and continues. */ -static inline void cfs_race(__u32 id) + * the first and continues. + */ +static inline void cfs_race(u32 id) { - if (CFS_FAIL_PRECHECK(id)) { if (unlikely(__cfs_fail_check_set(id, 0, CFS_FAIL_LOC_NOSET))) { int rc; @@ -166,7 +175,7 @@ static inline void cfs_race(__u32 id) cfs_race_state = 0; CERROR("cfs_race id %x sleeping\n", id); rc = wait_event_interruptible(cfs_race_waitq, - cfs_race_state != 0); + !!cfs_race_state); CERROR("cfs_fail_race id %x awake, rc=%d\n", id, rc); } else { CERROR("cfs_fail_race id %x waking\n", id); diff --git a/drivers/staging/lustre/include/linux/libcfs/libcfs_hash.h b/drivers/staging/lustre/include/linux/libcfs/libcfs_hash.h index 6949a1846635..0cc2fc465c1a 100644 --- a/drivers/staging/lustre/include/linux/libcfs/libcfs_hash.h +++ b/drivers/staging/lustre/include/linux/libcfs/libcfs_hash.h @@ -57,8 +57,10 @@ /** disable debug */ #define CFS_HASH_DEBUG_NONE 0 -/** record hash depth and output to console when it's too deep, - * computing overhead is low but consume more memory */ +/* + * record hash depth and output to console when it's too deep, + * computing overhead is low but consume more memory + */ #define CFS_HASH_DEBUG_1 1 /** expensive, check key validation */ #define CFS_HASH_DEBUG_2 2 @@ -87,8 +89,8 @@ union cfs_hash_lock { */ struct cfs_hash_bucket { union cfs_hash_lock hsb_lock; /**< bucket lock */ - __u32 hsb_count; /**< current entries */ - __u32 hsb_version; /**< change version */ + u32 hsb_count; /**< current entries */ + u32 hsb_version; /**< change version */ unsigned int hsb_index; /**< index of bucket */ int hsb_depmax; /**< max depth on bucket */ long hsb_head[0]; /**< hash-head array */ @@ -123,38 +125,40 @@ enum cfs_hash_tag { * . Some functions will be disabled with this flag, i.e: * cfs_hash_for_each_empty, cfs_hash_rehash */ - CFS_HASH_NO_LOCK = 1 << 0, + CFS_HASH_NO_LOCK = BIT(0), /** no bucket lock, use one spinlock to protect the whole hash */ - CFS_HASH_NO_BKTLOCK = 1 << 1, + CFS_HASH_NO_BKTLOCK = BIT(1), /** rwlock to protect bucket */ - CFS_HASH_RW_BKTLOCK = 1 << 2, + CFS_HASH_RW_BKTLOCK = BIT(2), /** spinlock to protect bucket */ - CFS_HASH_SPIN_BKTLOCK = 1 << 3, + CFS_HASH_SPIN_BKTLOCK = BIT(3), /** always add new item to tail */ - CFS_HASH_ADD_TAIL = 1 << 4, + CFS_HASH_ADD_TAIL = BIT(4), /** hash-table doesn't have refcount on item */ - CFS_HASH_NO_ITEMREF = 1 << 5, + CFS_HASH_NO_ITEMREF = BIT(5), /** big name for param-tree */ - CFS_HASH_BIGNAME = 1 << 6, + CFS_HASH_BIGNAME = BIT(6), /** track global count */ - CFS_HASH_COUNTER = 1 << 7, + CFS_HASH_COUNTER = BIT(7), /** rehash item by new key */ - CFS_HASH_REHASH_KEY = 1 << 8, + CFS_HASH_REHASH_KEY = BIT(8), /** Enable dynamic hash resizing */ - CFS_HASH_REHASH = 1 << 9, + CFS_HASH_REHASH = BIT(9), /** can shrink hash-size */ - CFS_HASH_SHRINK = 1 << 10, + CFS_HASH_SHRINK = BIT(10), /** assert hash is empty on exit */ - CFS_HASH_ASSERT_EMPTY = 1 << 11, + CFS_HASH_ASSERT_EMPTY = BIT(11), /** record hlist depth */ - CFS_HASH_DEPTH = 1 << 12, + CFS_HASH_DEPTH = BIT(12), /** * rehash is always scheduled in a different thread, so current * change on hash table is non-blocking */ - CFS_HASH_NBLK_CHANGE = 1 << 13, - /** NB, we typed hs_flags as __u16, please change it - * if you need to extend >=16 flags */ + CFS_HASH_NBLK_CHANGE = BIT(13), + /** + * NB, we typed hs_flags as u16, please change it + * if you need to extend >=16 flags + */ }; /** most used attributes */ @@ -201,8 +205,10 @@ enum cfs_hash_tag { */ struct cfs_hash { - /** serialize with rehash, or serialize all operations if - * the hash-table has CFS_HASH_NO_BKTLOCK */ + /** + * serialize with rehash, or serialize all operations if + * the hash-table has CFS_HASH_NO_BKTLOCK + */ union cfs_hash_lock hs_lock; /** hash operations */ struct cfs_hash_ops *hs_ops; @@ -215,31 +221,31 @@ struct cfs_hash { /** total number of items on this hash-table */ atomic_t hs_count; /** hash flags, see cfs_hash_tag for detail */ - __u16 hs_flags; + u16 hs_flags; /** # of extra-bytes for bucket, for user saving extended attributes */ - __u16 hs_extra_bytes; + u16 hs_extra_bytes; /** wants to iterate */ - __u8 hs_iterating; + u8 hs_iterating; /** hash-table is dying */ - __u8 hs_exiting; + u8 hs_exiting; /** current hash bits */ - __u8 hs_cur_bits; + u8 hs_cur_bits; /** min hash bits */ - __u8 hs_min_bits; + u8 hs_min_bits; /** max hash bits */ - __u8 hs_max_bits; + u8 hs_max_bits; /** bits for rehash */ - __u8 hs_rehash_bits; + u8 hs_rehash_bits; /** bits for each bucket */ - __u8 hs_bkt_bits; + u8 hs_bkt_bits; /** resize min threshold */ - __u16 hs_min_theta; + u16 hs_min_theta; /** resize max threshold */ - __u16 hs_max_theta; + u16 hs_max_theta; /** resize count */ - __u32 hs_rehash_count; + u32 hs_rehash_count; /** # of iterators (caller of cfs_hash_for_each_*) */ - __u32 hs_iterators; + u32 hs_iterators; /** rehash workitem */ struct cfs_workitem hs_rehash_wi; /** refcount on this hash table */ @@ -291,8 +297,8 @@ struct cfs_hash_hlist_ops { struct cfs_hash_ops { /** return hashed value from @key */ - unsigned (*hs_hash)(struct cfs_hash *hs, const void *key, - unsigned mask); + unsigned int (*hs_hash)(struct cfs_hash *hs, const void *key, + unsigned int mask); /** return key address of @hnode */ void * (*hs_key)(struct hlist_node *hnode); /** copy key from @hnode to @key */ @@ -317,110 +323,112 @@ struct cfs_hash_ops { /** total number of buckets in @hs */ #define CFS_HASH_NBKT(hs) \ - (1U << ((hs)->hs_cur_bits - (hs)->hs_bkt_bits)) + BIT((hs)->hs_cur_bits - (hs)->hs_bkt_bits) /** total number of buckets in @hs while rehashing */ #define CFS_HASH_RH_NBKT(hs) \ - (1U << ((hs)->hs_rehash_bits - (hs)->hs_bkt_bits)) + BIT((hs)->hs_rehash_bits - (hs)->hs_bkt_bits) /** number of hlist for in bucket */ -#define CFS_HASH_BKT_NHLIST(hs) (1U << (hs)->hs_bkt_bits) +#define CFS_HASH_BKT_NHLIST(hs) BIT((hs)->hs_bkt_bits) /** total number of hlist in @hs */ -#define CFS_HASH_NHLIST(hs) (1U << (hs)->hs_cur_bits) +#define CFS_HASH_NHLIST(hs) BIT((hs)->hs_cur_bits) /** total number of hlist in @hs while rehashing */ -#define CFS_HASH_RH_NHLIST(hs) (1U << (hs)->hs_rehash_bits) +#define CFS_HASH_RH_NHLIST(hs) BIT((hs)->hs_rehash_bits) static inline int cfs_hash_with_no_lock(struct cfs_hash *hs) { /* caller will serialize all operations for this hash-table */ - return (hs->hs_flags & CFS_HASH_NO_LOCK) != 0; + return hs->hs_flags & CFS_HASH_NO_LOCK; } static inline int cfs_hash_with_no_bktlock(struct cfs_hash *hs) { /* no bucket lock, one single lock to protect the hash-table */ - return (hs->hs_flags & CFS_HASH_NO_BKTLOCK) != 0; + return hs->hs_flags & CFS_HASH_NO_BKTLOCK; } static inline int cfs_hash_with_rw_bktlock(struct cfs_hash *hs) { /* rwlock to protect hash bucket */ - return (hs->hs_flags & CFS_HASH_RW_BKTLOCK) != 0; + return hs->hs_flags & CFS_HASH_RW_BKTLOCK; } static inline int cfs_hash_with_spin_bktlock(struct cfs_hash *hs) { /* spinlock to protect hash bucket */ - return (hs->hs_flags & CFS_HASH_SPIN_BKTLOCK) != 0; + return hs->hs_flags & CFS_HASH_SPIN_BKTLOCK; } static inline int cfs_hash_with_add_tail(struct cfs_hash *hs) { - return (hs->hs_flags & CFS_HASH_ADD_TAIL) != 0; + return hs->hs_flags & CFS_HASH_ADD_TAIL; } static inline int cfs_hash_with_no_itemref(struct cfs_hash *hs) { - /* hash-table doesn't keep refcount on item, + /* + * hash-table doesn't keep refcount on item, * item can't be removed from hash unless it's - * ZERO refcount */ - return (hs->hs_flags & CFS_HASH_NO_ITEMREF) != 0; + * ZERO refcount + */ + return hs->hs_flags & CFS_HASH_NO_ITEMREF; } static inline int cfs_hash_with_bigname(struct cfs_hash *hs) { - return (hs->hs_flags & CFS_HASH_BIGNAME) != 0; + return hs->hs_flags & CFS_HASH_BIGNAME; } static inline int cfs_hash_with_counter(struct cfs_hash *hs) { - return (hs->hs_flags & CFS_HASH_COUNTER) != 0; + return hs->hs_flags & CFS_HASH_COUNTER; } static inline int cfs_hash_with_rehash(struct cfs_hash *hs) { - return (hs->hs_flags & CFS_HASH_REHASH) != 0; + return hs->hs_flags & CFS_HASH_REHASH; } static inline int cfs_hash_with_rehash_key(struct cfs_hash *hs) { - return (hs->hs_flags & CFS_HASH_REHASH_KEY) != 0; + return hs->hs_flags & CFS_HASH_REHASH_KEY; } static inline int cfs_hash_with_shrink(struct cfs_hash *hs) { - return (hs->hs_flags & CFS_HASH_SHRINK) != 0; + return hs->hs_flags & CFS_HASH_SHRINK; } static inline int cfs_hash_with_assert_empty(struct cfs_hash *hs) { - return (hs->hs_flags & CFS_HASH_ASSERT_EMPTY) != 0; + return hs->hs_flags & CFS_HASH_ASSERT_EMPTY; } static inline int cfs_hash_with_depth(struct cfs_hash *hs) { - return (hs->hs_flags & CFS_HASH_DEPTH) != 0; + return hs->hs_flags & CFS_HASH_DEPTH; } static inline int cfs_hash_with_nblk_change(struct cfs_hash *hs) { - return (hs->hs_flags & CFS_HASH_NBLK_CHANGE) != 0; + return hs->hs_flags & CFS_HASH_NBLK_CHANGE; } static inline int @@ -434,14 +442,14 @@ static inline int cfs_hash_is_rehashing(struct cfs_hash *hs) { /* rehash is launched */ - return hs->hs_rehash_bits != 0; + return !!hs->hs_rehash_bits; } static inline int cfs_hash_is_iterating(struct cfs_hash *hs) { /* someone is calling cfs_hash_for_each_* */ - return hs->hs_iterating || hs->hs_iterators != 0; + return hs->hs_iterating || hs->hs_iterators; } static inline int @@ -453,7 +461,7 @@ cfs_hash_bkt_size(struct cfs_hash *hs) } static inline unsigned -cfs_hash_id(struct cfs_hash *hs, const void *key, unsigned mask) +cfs_hash_id(struct cfs_hash *hs, const void *key, unsigned int mask) { return hs->hs_ops->hs_hash(hs, key, mask); } @@ -562,7 +570,7 @@ cfs_hash_bd_index_get(struct cfs_hash *hs, struct cfs_hash_bd *bd) } static inline void -cfs_hash_bd_index_set(struct cfs_hash *hs, unsigned index, +cfs_hash_bd_index_set(struct cfs_hash *hs, unsigned int index, struct cfs_hash_bd *bd) { bd->bd_bucket = hs->hs_buckets[index >> hs->hs_bkt_bits]; @@ -576,14 +584,14 @@ cfs_hash_bd_extra_get(struct cfs_hash *hs, struct cfs_hash_bd *bd) cfs_hash_bkt_size(hs) - hs->hs_extra_bytes; } -static inline __u32 +static inline u32 cfs_hash_bd_version_get(struct cfs_hash_bd *bd) { /* need hold cfs_hash_bd_lock */ return bd->bd_bucket->hsb_version; } -static inline __u32 +static inline u32 cfs_hash_bd_count_get(struct cfs_hash_bd *bd) { /* need hold cfs_hash_bd_lock */ @@ -669,10 +677,10 @@ cfs_hash_dual_bd_finddel_locked(struct cfs_hash *hs, struct cfs_hash_bd *bds, /* Hash init/cleanup functions */ struct cfs_hash * -cfs_hash_create(char *name, unsigned cur_bits, unsigned max_bits, - unsigned bkt_bits, unsigned extra_bytes, - unsigned min_theta, unsigned max_theta, - struct cfs_hash_ops *ops, unsigned flags); +cfs_hash_create(char *name, unsigned int cur_bits, unsigned int max_bits, + unsigned int bkt_bits, unsigned int extra_bytes, + unsigned int min_theta, unsigned int max_theta, + struct cfs_hash_ops *ops, unsigned int flags); struct cfs_hash *cfs_hash_getref(struct cfs_hash *hs); void cfs_hash_putref(struct cfs_hash *hs); @@ -700,27 +708,28 @@ typedef int (*cfs_hash_for_each_cb_t)(struct cfs_hash *hs, void * cfs_hash_lookup(struct cfs_hash *hs, const void *key); void -cfs_hash_for_each(struct cfs_hash *hs, cfs_hash_for_each_cb_t, void *data); +cfs_hash_for_each(struct cfs_hash *hs, cfs_hash_for_each_cb_t cb, void *data); void -cfs_hash_for_each_safe(struct cfs_hash *hs, cfs_hash_for_each_cb_t, void *data); +cfs_hash_for_each_safe(struct cfs_hash *hs, cfs_hash_for_each_cb_t cb, + void *data); int -cfs_hash_for_each_nolock(struct cfs_hash *hs, cfs_hash_for_each_cb_t, - void *data); +cfs_hash_for_each_nolock(struct cfs_hash *hs, cfs_hash_for_each_cb_t cb, + void *data, int start); int -cfs_hash_for_each_empty(struct cfs_hash *hs, cfs_hash_for_each_cb_t, +cfs_hash_for_each_empty(struct cfs_hash *hs, cfs_hash_for_each_cb_t cb, void *data); void cfs_hash_for_each_key(struct cfs_hash *hs, const void *key, - cfs_hash_for_each_cb_t, void *data); + cfs_hash_for_each_cb_t cb, void *data); typedef int (*cfs_hash_cond_opt_cb_t)(void *obj, void *data); void -cfs_hash_cond_del(struct cfs_hash *hs, cfs_hash_cond_opt_cb_t, void *data); +cfs_hash_cond_del(struct cfs_hash *hs, cfs_hash_cond_opt_cb_t cb, void *data); void -cfs_hash_hlist_for_each(struct cfs_hash *hs, unsigned hindex, - cfs_hash_for_each_cb_t, void *data); +cfs_hash_hlist_for_each(struct cfs_hash *hs, unsigned int hindex, + cfs_hash_for_each_cb_t cb, void *data); int cfs_hash_is_empty(struct cfs_hash *hs); -__u64 cfs_hash_size_get(struct cfs_hash *hs); +u64 cfs_hash_size_get(struct cfs_hash *hs); /* * Rehash - Theta is calculated to be the average chained @@ -766,8 +775,8 @@ cfs_hash_bucket_validate(struct cfs_hash *hs, struct cfs_hash_bd *bd, #endif /* CFS_HASH_DEBUG_LEVEL */ #define CFS_HASH_THETA_BITS 10 -#define CFS_HASH_MIN_THETA (1U << (CFS_HASH_THETA_BITS - 1)) -#define CFS_HASH_MAX_THETA (1U << (CFS_HASH_THETA_BITS + 1)) +#define CFS_HASH_MIN_THETA BIT(CFS_HASH_THETA_BITS - 1) +#define CFS_HASH_MAX_THETA BIT(CFS_HASH_THETA_BITS + 1) /* Return integer component of theta */ static inline int __cfs_hash_theta_int(int theta) @@ -792,8 +801,8 @@ static inline void __cfs_hash_set_theta(struct cfs_hash *hs, int min, int max) { LASSERT(min < max); - hs->hs_min_theta = (__u16)min; - hs->hs_max_theta = (__u16)max; + hs->hs_min_theta = (u16)min; + hs->hs_max_theta = (u16)max; } /* Generic debug formatting routines mainly for proc handler */ @@ -805,11 +814,11 @@ void cfs_hash_debug_str(struct cfs_hash *hs, struct seq_file *m); * Generic djb2 hash algorithm for character arrays. */ static inline unsigned -cfs_hash_djb2_hash(const void *key, size_t size, unsigned mask) +cfs_hash_djb2_hash(const void *key, size_t size, unsigned int mask) { - unsigned i, hash = 5381; + unsigned int i, hash = 5381; - LASSERT(key != NULL); + LASSERT(key); for (i = 0; i < size; i++) hash = hash * 33 + ((char *)key)[i]; @@ -821,7 +830,7 @@ cfs_hash_djb2_hash(const void *key, size_t size, unsigned mask) * Generic u32 hash algorithm. */ static inline unsigned -cfs_hash_u32_hash(const __u32 key, unsigned mask) +cfs_hash_u32_hash(const u32 key, unsigned int mask) { return ((key * CFS_GOLDEN_RATIO_PRIME_32) & mask); } @@ -830,9 +839,9 @@ cfs_hash_u32_hash(const __u32 key, unsigned mask) * Generic u64 hash algorithm. */ static inline unsigned -cfs_hash_u64_hash(const __u64 key, unsigned mask) +cfs_hash_u64_hash(const u64 key, unsigned int mask) { - return ((unsigned)(key * CFS_GOLDEN_RATIO_PRIME_64) & mask); + return ((unsigned int)(key * CFS_GOLDEN_RATIO_PRIME_64) & mask); } /** iterate over all buckets in @bds (array of struct cfs_hash_bd) */ diff --git a/drivers/staging/lustre/include/linux/libcfs/libcfs_private.h b/drivers/staging/lustre/include/linux/libcfs/libcfs_private.h index e0e1a5d0949d..aab15d8112a4 100644 --- a/drivers/staging/lustre/include/linux/libcfs/libcfs_private.h +++ b/drivers/staging/lustre/include/linux/libcfs/libcfs_private.h @@ -75,7 +75,7 @@ do { \ #define KLASSERT(e) LASSERT(e) -void __noreturn lbug_with_loc(struct libcfs_debug_msg_data *); +void __noreturn lbug_with_loc(struct libcfs_debug_msg_data *msg); #define LBUG() \ do { \ @@ -96,7 +96,7 @@ do { \ #define LIBCFS_ALLOC_POST(ptr, size) \ do { \ - if (unlikely((ptr) == NULL)) { \ + if (unlikely(!(ptr))) { \ CERROR("LNET: out of memory at %s:%d (tried to alloc '" \ #ptr "' = %d)\n", __FILE__, __LINE__, (int)(size)); \ } else { \ @@ -147,7 +147,7 @@ do { \ #define LIBCFS_FREE(ptr, size) \ do { \ - if (unlikely((ptr) == NULL)) { \ + if (unlikely(!(ptr))) { \ CERROR("LIBCFS: free NULL '" #ptr "' (%d bytes) at " \ "%s:%d\n", (int)(size), __FILE__, __LINE__); \ break; \ @@ -169,8 +169,6 @@ do { \ #define ntohs(x) ___ntohs(x) #endif -void libcfs_run_upcall(char **argv); -void libcfs_run_lbug_upcall(struct libcfs_debug_msg_data *); void libcfs_debug_dumplog(void); int libcfs_debug_init(unsigned long bufsize); int libcfs_debug_cleanup(void); @@ -280,7 +278,7 @@ do { \ #define CFS_FREE_PTR(ptr) LIBCFS_FREE(ptr, sizeof(*(ptr))) /** Compile-time assertion. - + * * Check an invariant described by a constant expression at compile time by * forcing a compiler error if it does not hold. \a cond must be a constant * expression as defined by the ISO C Standard: @@ -306,7 +304,8 @@ do { \ /* -------------------------------------------------------------------- * Light-weight trace * Support for temporary event tracing with minimal Heisenberg effect. - * -------------------------------------------------------------------- */ + * -------------------------------------------------------------------- + */ #define MKSTR(ptr) ((ptr)) ? (ptr) : "" diff --git a/drivers/staging/lustre/include/linux/libcfs/libcfs_string.h b/drivers/staging/lustre/include/linux/libcfs/libcfs_string.h index 0ee60ff336f2..41795d9b3b9b 100644 --- a/drivers/staging/lustre/include/linux/libcfs/libcfs_string.h +++ b/drivers/staging/lustre/include/linux/libcfs/libcfs_string.h @@ -62,9 +62,9 @@ struct cfs_range_expr { * Link to cfs_expr_list::el_exprs. */ struct list_head re_link; - __u32 re_lo; - __u32 re_hi; - __u32 re_stride; + u32 re_lo; + u32 re_hi; + u32 re_stride; }; struct cfs_expr_list { @@ -74,24 +74,26 @@ struct cfs_expr_list { char *cfs_trimwhite(char *str); int cfs_gettok(struct cfs_lstr *next, char delim, struct cfs_lstr *res); -int cfs_str2num_check(char *str, int nob, unsigned *num, - unsigned min, unsigned max); -int cfs_expr_list_match(__u32 value, struct cfs_expr_list *expr_list); +int cfs_str2num_check(char *str, int nob, unsigned int *num, + unsigned int min, unsigned int max); +int cfs_expr_list_match(u32 value, struct cfs_expr_list *expr_list); int cfs_expr_list_print(char *buffer, int count, struct cfs_expr_list *expr_list); int cfs_expr_list_values(struct cfs_expr_list *expr_list, - int max, __u32 **values); + int max, u32 **values); static inline void -cfs_expr_list_values_free(__u32 *values, int num) +cfs_expr_list_values_free(u32 *values, int num) { - /* This array is allocated by LIBCFS_ALLOC(), so it shouldn't be freed + /* + * This array is allocated by LIBCFS_ALLOC(), so it shouldn't be freed * by OBD_FREE() if it's called by module other than libcfs & LNet, - * otherwise we will see fake memory leak */ + * otherwise we will see fake memory leak + */ LIBCFS_FREE(values, num * sizeof(values[0])); } void cfs_expr_list_free(struct cfs_expr_list *expr_list); -int cfs_expr_list_parse(char *str, int len, unsigned min, unsigned max, +int cfs_expr_list_parse(char *str, int len, unsigned int min, unsigned int max, struct cfs_expr_list **elpp); void cfs_expr_list_free_list(struct list_head *list); diff --git a/drivers/staging/lustre/include/linux/libcfs/libcfs_workitem.h b/drivers/staging/lustre/include/linux/libcfs/libcfs_workitem.h index a7e1340e69a1..2accd9a85472 100644 --- a/drivers/staging/lustre/include/linux/libcfs/libcfs_workitem.h +++ b/drivers/staging/lustre/include/linux/libcfs/libcfs_workitem.h @@ -62,9 +62,9 @@ struct cfs_wi_sched; -void cfs_wi_sched_destroy(struct cfs_wi_sched *); +void cfs_wi_sched_destroy(struct cfs_wi_sched *sched); int cfs_wi_sched_create(char *name, struct cfs_cpt_table *cptab, int cpt, - int nthrs, struct cfs_wi_sched **); + int nthrs, struct cfs_wi_sched **sched_pp); struct cfs_workitem; diff --git a/drivers/staging/lustre/include/linux/libcfs/linux/linux-cpu.h b/drivers/staging/lustre/include/linux/libcfs/linux/linux-cpu.h index f63cb47bc309..dd0cd0442b86 100644 --- a/drivers/staging/lustre/include/linux/libcfs/linux/linux-cpu.h +++ b/drivers/staging/lustre/include/linux/libcfs/linux/linux-cpu.h @@ -52,17 +52,17 @@ struct cfs_cpu_partition { /* nodes mask for this partition */ nodemask_t *cpt_nodemask; /* spread rotor for NUMA allocator */ - unsigned cpt_spread_rotor; + unsigned int cpt_spread_rotor; }; /** descriptor for CPU partitions */ struct cfs_cpt_table { /* version, reserved for hotplug */ - unsigned ctb_version; + unsigned int ctb_version; /* spread rotor for NUMA allocator */ - unsigned ctb_spread_rotor; + unsigned int ctb_spread_rotor; /* # of CPU partitions */ - unsigned ctb_nparts; + unsigned int ctb_nparts; /* partitions tables */ struct cfs_cpu_partition *ctb_parts; /* shadow HW CPU to CPU partition ID */ diff --git a/drivers/staging/lustre/include/linux/libcfs/linux/linux-time.h b/drivers/staging/lustre/include/linux/libcfs/linux/linux-time.h index b646acd1f7e7..709e1ce98d8d 100644 --- a/drivers/staging/lustre/include/linux/libcfs/linux/linux-time.h +++ b/drivers/staging/lustre/include/linux/libcfs/linux/linux-time.h @@ -76,23 +76,23 @@ static inline long cfs_duration_sec(long d) #define cfs_time_current_64 get_jiffies_64 -static inline __u64 cfs_time_add_64(__u64 t, __u64 d) +static inline u64 cfs_time_add_64(u64 t, u64 d) { return t + d; } -static inline __u64 cfs_time_shift_64(int seconds) +static inline u64 cfs_time_shift_64(int seconds) { return cfs_time_add_64(cfs_time_current_64(), cfs_time_seconds(seconds)); } -static inline int cfs_time_before_64(__u64 t1, __u64 t2) +static inline int cfs_time_before_64(u64 t1, u64 t2) { return (__s64)t2 - (__s64)t1 > 0; } -static inline int cfs_time_beforeq_64(__u64 t1, __u64 t2) +static inline int cfs_time_beforeq_64(u64 t1, u64 t2) { return (__s64)t2 - (__s64)t1 >= 0; } diff --git a/drivers/staging/lustre/include/linux/lnet/lnetst.h b/drivers/staging/lustre/include/linux/lnet/lnetst.h index 417044552d3f..8a84888635ff 100644 --- a/drivers/staging/lustre/include/linux/lnet/lnetst.h +++ b/drivers/staging/lustre/include/linux/lnet/lnetst.h @@ -244,7 +244,7 @@ typedef struct { int lstio_ses_timeout; /* IN: session timeout */ int lstio_ses_force; /* IN: force create ? */ /** IN: session features */ - unsigned lstio_ses_feats; + unsigned int lstio_ses_feats; lst_sid_t __user *lstio_ses_idp; /* OUT: session id */ int lstio_ses_nmlen; /* IN: name length */ char __user *lstio_ses_namep; /* IN: session name */ @@ -255,7 +255,7 @@ typedef struct { lst_sid_t __user *lstio_ses_idp; /* OUT: session id */ int __user *lstio_ses_keyp; /* OUT: local key */ /** OUT: session features */ - unsigned __user *lstio_ses_featp; + unsigned int __user *lstio_ses_featp; lstcon_ndlist_ent_t __user *lstio_ses_ndinfo; /* OUT: */ int lstio_ses_nmlen; /* IN: name length */ char __user *lstio_ses_namep; /* OUT: session name */ @@ -328,7 +328,7 @@ typedef struct { char __user *lstio_grp_namep; /* IN: group name */ int lstio_grp_count; /* IN: # of nodes */ /** OUT: session features */ - unsigned __user *lstio_grp_featp; + unsigned int __user *lstio_grp_featp; lnet_process_id_t __user *lstio_grp_idsp; /* IN: nodes */ struct list_head __user *lstio_grp_resultp; /* OUT: list head of result buffer */ @@ -490,6 +490,8 @@ typedef struct { int blk_size; /* size (bytes) */ int blk_time; /* time of running the test*/ int blk_flags; /* reserved flags */ + int blk_cli_off; /* bulk offset on client */ + int blk_srv_off; /* reserved: bulk offset on server */ } lst_test_bulk_param_t; typedef struct { diff --git a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.c b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.c index 9e8802181452..7f761b327166 100644 --- a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.c +++ b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.c @@ -1489,7 +1489,7 @@ out_fpo: static void kiblnd_fail_fmr_poolset(struct kib_fmr_poolset *fps, struct list_head *zombies) { - if (!fps->fps_net) /* intialized? */ + if (!fps->fps_net) /* initialized? */ return; spin_lock(&fps->fps_lock); @@ -1637,7 +1637,7 @@ int kiblnd_fmr_pool_map(struct kib_fmr_poolset *fps, struct kib_tx *tx, { __u64 *pages = tx->tx_pages; bool is_rx = (rd != tx->tx_rd); - bool tx_pages_mapped = 0; + bool tx_pages_mapped = false; struct kib_fmr_pool *fpo; int npages = 0; __u64 version; @@ -1812,7 +1812,7 @@ static void kiblnd_destroy_pool_list(struct list_head *head) static void kiblnd_fail_poolset(struct kib_poolset *ps, struct list_head *zombies) { - if (!ps->ps_net) /* intialized? */ + if (!ps->ps_net) /* initialized? */ return; spin_lock(&ps->ps_lock); diff --git a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c index b27de8888149..c7917abf9944 100644 --- a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c +++ b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c @@ -1912,12 +1912,12 @@ kiblnd_close_conn_locked(struct kib_conn *conn, int error) libcfs_nid2str(peer->ibp_nid)); } else { CNETERR("Closing conn to %s: error %d%s%s%s%s%s\n", - libcfs_nid2str(peer->ibp_nid), error, - list_empty(&conn->ibc_tx_queue) ? "" : "(sending)", - list_empty(&conn->ibc_tx_noops) ? "" : "(sending_noops)", - list_empty(&conn->ibc_tx_queue_rsrvd) ? "" : "(sending_rsrvd)", - list_empty(&conn->ibc_tx_queue_nocred) ? "" : "(sending_nocred)", - list_empty(&conn->ibc_active_txs) ? "" : "(waiting)"); + libcfs_nid2str(peer->ibp_nid), error, + list_empty(&conn->ibc_tx_queue) ? "" : "(sending)", + list_empty(&conn->ibc_tx_noops) ? "" : "(sending_noops)", + list_empty(&conn->ibc_tx_queue_rsrvd) ? "" : "(sending_rsrvd)", + list_empty(&conn->ibc_tx_queue_nocred) ? "" : "(sending_nocred)", + list_empty(&conn->ibc_active_txs) ? "" : "(waiting)"); } dev = ((struct kib_net *)peer->ibp_ni->ni_data)->ibn_dev; @@ -2643,7 +2643,7 @@ kiblnd_check_reconnect(struct kib_conn *conn, int version, if (incarnation) peer->ibp_incarnation = incarnation; out: - write_unlock_irqrestore(glock, flags); + write_unlock_irqrestore(glock, flags); CNETERR("%s: %s (%s), %x, %x, msg_size: %d, queue_depth: %d/%d, max_frags: %d/%d\n", libcfs_nid2str(peer->ibp_nid), @@ -2651,7 +2651,7 @@ out: reason, IBLND_MSG_VERSION, version, msg_size, conn->ibc_queue_depth, queue_dep, conn->ibc_max_frags, frag_num); - /** + /** * if conn::ibc_reconnect is TRUE, connd will reconnect to the peer * while destroying the zombie */ @@ -2976,7 +2976,7 @@ kiblnd_cm_callback(struct rdma_cm_id *cmid, struct rdma_cm_event *event) case RDMA_CM_EVENT_ADDR_ERROR: peer = (struct kib_peer *)cmid->context; CNETERR("%s: ADDR ERROR %d\n", - libcfs_nid2str(peer->ibp_nid), event->status); + libcfs_nid2str(peer->ibp_nid), event->status); kiblnd_peer_connect_failed(peer, 1, -EHOSTUNREACH); kiblnd_peer_decref(peer); return -EHOSTUNREACH; /* rc destroys cmid */ @@ -3021,7 +3021,7 @@ kiblnd_cm_callback(struct rdma_cm_id *cmid, struct rdma_cm_event *event) return kiblnd_active_connect(cmid); CNETERR("Can't resolve route for %s: %d\n", - libcfs_nid2str(peer->ibp_nid), event->status); + libcfs_nid2str(peer->ibp_nid), event->status); kiblnd_peer_connect_failed(peer, 1, event->status); kiblnd_peer_decref(peer); return event->status; /* rc destroys cmid */ @@ -3031,7 +3031,7 @@ kiblnd_cm_callback(struct rdma_cm_id *cmid, struct rdma_cm_event *event) LASSERT(conn->ibc_state == IBLND_CONN_ACTIVE_CONNECT || conn->ibc_state == IBLND_CONN_PASSIVE_WAIT); CNETERR("%s: UNREACHABLE %d\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid), event->status); + libcfs_nid2str(conn->ibc_peer->ibp_nid), event->status); kiblnd_connreq_done(conn, -ENETDOWN); kiblnd_conn_decref(conn); return 0; @@ -3269,14 +3269,14 @@ kiblnd_disconnect_conn(struct kib_conn *conn) #define KIB_RECONN_HIGH_RACE 10 /** * Allow connd to take a break and handle other things after consecutive - * reconnection attemps. + * reconnection attempts. */ #define KIB_RECONN_BREAK 100 int kiblnd_connd(void *arg) { - spinlock_t *lock= &kiblnd_data.kib_connd_lock; + spinlock_t *lock = &kiblnd_data.kib_connd_lock; wait_queue_t wait; unsigned long flags; struct kib_conn *conn; diff --git a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd.c b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd.c index cbc9a9c5385f..b74cf635faee 100644 --- a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd.c +++ b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd.c @@ -96,7 +96,8 @@ ksocknal_destroy_route(struct ksock_route *route) } static int -ksocknal_create_peer(struct ksock_peer **peerp, lnet_ni_t *ni, lnet_process_id_t id) +ksocknal_create_peer(struct ksock_peer **peerp, lnet_ni_t *ni, + lnet_process_id_t id) { int cpt = lnet_cpt_of_nid(id.nid); struct ksock_net *net = ni->ni_data; @@ -319,7 +320,8 @@ ksocknal_get_peer_info(lnet_ni_t *ni, int index, } static void -ksocknal_associate_route_conn_locked(struct ksock_route *route, struct ksock_conn *conn) +ksocknal_associate_route_conn_locked(struct ksock_route *route, + struct ksock_conn *conn) { struct ksock_peer *peer = route->ksnr_peer; int type = conn->ksnc_type; @@ -821,7 +823,8 @@ ksocknal_select_ips(struct ksock_peer *peer, __u32 *peerips, int n_peerips) if (k < peer->ksnp_n_passive_ips) /* using it already */ continue; - k = ksocknal_match_peerip(iface, peerips, n_peerips); + k = ksocknal_match_peerip(iface, peerips, + n_peerips); xor = ip ^ peerips[k]; this_netmatch = !(xor & iface->ksni_netmask) ? 1 : 0; @@ -1302,8 +1305,11 @@ ksocknal_create_conn(lnet_ni_t *ni, struct ksock_route *route, /* Take packets blocking for this connection. */ list_for_each_entry_safe(tx, txtmp, &peer->ksnp_tx_queue, tx_list) { - if (conn->ksnc_proto->pro_match_tx(conn, tx, tx->tx_nonblk) == SOCKNAL_MATCH_NO) - continue; + int match = conn->ksnc_proto->pro_match_tx(conn, tx, + tx->tx_nonblk); + + if (match == SOCKNAL_MATCH_NO) + continue; list_del(&tx->tx_list); ksocknal_queue_tx_locked(tx, conn); @@ -1493,8 +1499,8 @@ ksocknal_close_conn_locked(struct ksock_conn *conn, int error) spin_unlock_bh(&conn->ksnc_scheduler->kss_lock); } - peer->ksnp_proto = NULL; /* renegotiate protocol version */ - peer->ksnp_error = error; /* stash last conn close reason */ + peer->ksnp_proto = NULL; /* renegotiate protocol version */ + peer->ksnp_error = error; /* stash last conn close reason */ if (list_empty(&peer->ksnp_routes)) { /* @@ -1786,7 +1792,8 @@ ksocknal_close_matching_conns(lnet_process_id_t id, __u32 ipaddr) (id.pid == LNET_PID_ANY || id.pid == peer->ksnp_id.pid))) continue; - count += ksocknal_close_peer_conns_locked(peer, ipaddr, 0); + count += ksocknal_close_peer_conns_locked(peer, ipaddr, + 0); } } @@ -2026,7 +2033,10 @@ ksocknal_add_interface(lnet_ni_t *ni, __u32 ipaddress, __u32 netmask) } rc = 0; - /* NB only new connections will pay attention to the new interface! */ + /* + * NB only new connections will pay attention to the + * new interface! + */ } write_unlock_bh(&ksocknal_data.ksnd_global_lock); @@ -2200,8 +2210,9 @@ ksocknal_ctl(lnet_ni_t *ni, unsigned int cmd, void *arg) int txmem; int rxmem; int nagle; - struct ksock_conn *conn = ksocknal_get_conn_by_idx(ni, data->ioc_count); + struct ksock_conn *conn; + conn = ksocknal_get_conn_by_idx(ni, data->ioc_count); if (!conn) return -ENOENT; diff --git a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd.h b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd.h index e6ca0cf52691..842c45393b38 100644 --- a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd.h +++ b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd.h @@ -84,7 +84,8 @@ struct ksock_sched { /* per scheduler state */ struct list_head kss_zombie_noop_txs; /* zombie noop tx list */ wait_queue_head_t kss_waitq; /* where scheduler sleeps */ int kss_nconns; /* # connections assigned to - * this scheduler */ + * this scheduler + */ struct ksock_sched_info *kss_info; /* owner of it */ }; @@ -110,15 +111,19 @@ struct ksock_interface { /* in-use interface */ struct ksock_tunables { int *ksnd_timeout; /* "stuck" socket timeout - * (seconds) */ + * (seconds) + */ int *ksnd_nscheds; /* # scheduler threads in each - * pool while starting */ + * pool while starting + */ int *ksnd_nconnds; /* # connection daemons */ int *ksnd_nconnds_max; /* max # connection daemons */ int *ksnd_min_reconnectms; /* first connection retry after - * (ms)... */ + * (ms)... + */ int *ksnd_max_reconnectms; /* ...exponentially increasing to - * this */ + * this + */ int *ksnd_eager_ack; /* make TCP ack eagerly? */ int *ksnd_typed_conns; /* drive sockets by type? */ int *ksnd_min_bulk; /* smallest "large" message */ @@ -126,9 +131,11 @@ struct ksock_tunables { int *ksnd_rx_buffer_size; /* socket rx buffer size */ int *ksnd_nagle; /* enable NAGLE? */ int *ksnd_round_robin; /* round robin for multiple - * interfaces */ + * interfaces + */ int *ksnd_keepalive; /* # secs for sending keepalive - * NOOP */ + * NOOP + */ int *ksnd_keepalive_idle; /* # idle secs before 1st probe */ int *ksnd_keepalive_count; /* # probes */ @@ -137,20 +144,26 @@ struct ksock_tunables { int *ksnd_peertxcredits; /* # concurrent sends to 1 peer */ int *ksnd_peerrtrcredits; /* # per-peer router buffer - * credits */ + * credits + */ int *ksnd_peertimeout; /* seconds to consider peer dead */ int *ksnd_enable_csum; /* enable check sum */ int *ksnd_inject_csum_error; /* set non-zero to inject - * checksum error */ + * checksum error + */ int *ksnd_nonblk_zcack; /* always send zc-ack on - * non-blocking connection */ + * non-blocking connection + */ unsigned int *ksnd_zc_min_payload; /* minimum zero copy payload - * size */ + * size + */ int *ksnd_zc_recv; /* enable ZC receive (for - * Chelsio TOE) */ + * Chelsio TOE) + */ int *ksnd_zc_recv_min_nfrags; /* minimum # of fragments to - * enable ZC receive */ + * enable ZC receive + */ }; struct ksock_net { @@ -174,9 +187,11 @@ struct ksock_nal_data { int ksnd_nnets; /* # networks set up */ struct list_head ksnd_nets; /* list of nets */ rwlock_t ksnd_global_lock; /* stabilize peer/conn - * ops */ + * ops + */ struct list_head *ksnd_peers; /* hash table of all my - * known peers */ + * known peers + */ int ksnd_peer_hash_size; /* size of ksnd_peers */ int ksnd_nthreads; /* # live threads */ @@ -187,11 +202,14 @@ struct ksock_nal_data { atomic_t ksnd_nactive_txs; /* #active txs */ struct list_head ksnd_deathrow_conns; /* conns to close: - * reaper_lock*/ + * reaper_lock + */ struct list_head ksnd_zombie_conns; /* conns to free: - * reaper_lock */ + * reaper_lock + */ struct list_head ksnd_enomem_conns; /* conns to retry: - * reaper_lock*/ + * reaper_lock + */ wait_queue_head_t ksnd_reaper_waitq; /* reaper sleeps here */ unsigned long ksnd_reaper_waketime; /* when reaper will wake */ @@ -201,30 +219,34 @@ struct ksock_nal_data { int ksnd_stall_tx; /* test sluggish sender */ int ksnd_stall_rx; /* test sluggish - * receiver */ - + * receiver + */ struct list_head ksnd_connd_connreqs; /* incoming connection - * requests */ + * requests + */ struct list_head ksnd_connd_routes; /* routes waiting to be - * connected */ + * connected + */ wait_queue_head_t ksnd_connd_waitq; /* connds sleep here */ int ksnd_connd_connecting; /* # connds connecting */ time64_t ksnd_connd_failed_stamp;/* time stamp of the * last failed - * connecting attempt */ + * connecting attempt + */ time64_t ksnd_connd_starting_stamp;/* time stamp of the * last starting connd */ - unsigned ksnd_connd_starting; /* # starting connd */ - unsigned ksnd_connd_running; /* # running connd */ + unsigned int ksnd_connd_starting; /* # starting connd */ + unsigned int ksnd_connd_running; /* # running connd */ spinlock_t ksnd_connd_lock; /* serialise */ struct list_head ksnd_idle_noop_txs; /* list head for freed - * noop tx */ + * noop tx + */ spinlock_t ksnd_tx_lock; /* serialise, g_lock - * unsafe */ - + * unsafe + */ }; #define SOCKNAL_INIT_NOTHING 0 @@ -304,18 +326,21 @@ struct ksock_conn { struct list_head ksnc_list; /* stash on peer's conn list */ struct socket *ksnc_sock; /* actual socket */ void *ksnc_saved_data_ready; /* socket's original - * data_ready() callback */ + * data_ready() callback + */ void *ksnc_saved_write_space; /* socket's original - * write_space() callback */ + * write_space() callback + */ atomic_t ksnc_conn_refcount;/* conn refcount */ atomic_t ksnc_sock_refcount;/* sock refcount */ struct ksock_sched *ksnc_scheduler; /* who schedules this connection - */ + */ __u32 ksnc_myipaddr; /* my IP */ __u32 ksnc_ipaddr; /* peer's IP */ int ksnc_port; /* peer's port */ signed int ksnc_type:3; /* type of connection, should be - * signed value */ + * signed value + */ unsigned int ksnc_closing:1; /* being shut down */ unsigned int ksnc_flip:1; /* flip or not, only for V2.x */ unsigned int ksnc_zc_capable:1; /* enable to ZC */ @@ -323,9 +348,11 @@ struct ksock_conn { /* reader */ struct list_head ksnc_rx_list; /* where I enq waiting input or a - * forwarding descriptor */ + * forwarding descriptor + */ unsigned long ksnc_rx_deadline; /* when (in jiffies) receive times - * out */ + * out + */ __u8 ksnc_rx_started; /* started receiving a message */ __u8 ksnc_rx_ready; /* data ready to read */ __u8 ksnc_rx_scheduled; /* being progressed */ @@ -338,7 +365,8 @@ struct ksock_conn { lnet_kiov_t *ksnc_rx_kiov; /* the page frags */ union ksock_rxiovspace ksnc_rx_iov_space; /* space for frag descriptors */ __u32 ksnc_rx_csum; /* partial checksum for incoming - * data */ + * data + */ void *ksnc_cookie; /* rx lnet_finalize passthru arg */ ksock_msg_t ksnc_msg; /* incoming message buffer: @@ -346,14 +374,16 @@ struct ksock_conn { * whole struct * V1.x message is a bare * lnet_hdr_t, it's stored in - * ksnc_msg.ksm_u.lnetmsg */ - + * ksnc_msg.ksm_u.lnetmsg + */ /* WRITER */ struct list_head ksnc_tx_list; /* where I enq waiting for output - * space */ + * space + */ struct list_head ksnc_tx_queue; /* packets waiting to be sent */ - struct ksock_tx *ksnc_tx_carrier; /* next TX that can carry a LNet - * message or ZC-ACK */ + struct ksock_tx *ksnc_tx_carrier; /* next TX that can carry a LNet + * message or ZC-ACK + */ unsigned long ksnc_tx_deadline; /* when (in jiffies) tx times out */ int ksnc_tx_bufnob; /* send buffer marker */ @@ -361,7 +391,8 @@ struct ksock_conn { int ksnc_tx_ready; /* write space */ int ksnc_tx_scheduled; /* being progressed */ unsigned long ksnc_tx_last_post; /* time stamp of the last posted - * TX */ + * TX + */ }; struct ksock_route { @@ -370,20 +401,24 @@ struct ksock_route { struct ksock_peer *ksnr_peer; /* owning peer */ atomic_t ksnr_refcount; /* # users */ unsigned long ksnr_timeout; /* when (in jiffies) reconnection - * can happen next */ + * can happen next + */ long ksnr_retry_interval; /* how long between retries */ __u32 ksnr_myipaddr; /* my IP */ __u32 ksnr_ipaddr; /* IP address to connect to */ int ksnr_port; /* port to connect to */ unsigned int ksnr_scheduled:1; /* scheduled for attention */ unsigned int ksnr_connecting:1; /* connection establishment in - * progress */ + * progress + */ unsigned int ksnr_connected:4; /* connections established by - * type */ + * type + */ unsigned int ksnr_deleted:1; /* been removed from peer? */ unsigned int ksnr_share_count; /* created explicitly? */ int ksnr_conn_count; /* # conns established by this - * route */ + * route + */ }; #define SOCKNAL_KEEPALIVE_PING 1 /* cookie for keepalive ping */ @@ -391,7 +426,8 @@ struct ksock_route { struct ksock_peer { struct list_head ksnp_list; /* stash on global peer list */ unsigned long ksnp_last_alive; /* when (in jiffies) I was last - * alive */ + * alive + */ lnet_process_id_t ksnp_id; /* who's on the other end(s) */ atomic_t ksnp_refcount; /* # users */ int ksnp_sharecount; /* lconf usage counter */ @@ -408,7 +444,8 @@ struct ksock_peer { struct list_head ksnp_tx_queue; /* waiting packets */ spinlock_t ksnp_lock; /* serialize, g_lock unsafe */ struct list_head ksnp_zc_req_list; /* zero copy requests wait for - * ACK */ + * ACK + */ unsigned long ksnp_send_keepalive; /* time to send keepalive */ lnet_ni_t *ksnp_ni; /* which network */ int ksnp_n_passive_ips; /* # of... */ @@ -429,7 +466,8 @@ extern struct ksock_tunables ksocknal_tunables; #define SOCKNAL_MATCH_NO 0 /* TX can't match type of connection */ #define SOCKNAL_MATCH_YES 1 /* TX matches type of connection */ #define SOCKNAL_MATCH_MAY 2 /* TX can be sent on the connection, but not - * preferred */ + * preferred + */ struct ksock_proto { /* version number of protocol */ diff --git a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_cb.c b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_cb.c index c1c6f604e6ad..972f6094be75 100644 --- a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_cb.c +++ b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_cb.c @@ -620,7 +620,8 @@ ksocknal_launch_all_connections_locked(struct ksock_peer *peer) } struct ksock_conn * -ksocknal_find_conn_locked(struct ksock_peer *peer, struct ksock_tx *tx, int nonblk) +ksocknal_find_conn_locked(struct ksock_peer *peer, struct ksock_tx *tx, + int nonblk) { struct list_head *tmp; struct ksock_conn *conn; @@ -630,10 +631,12 @@ ksocknal_find_conn_locked(struct ksock_peer *peer, struct ksock_tx *tx, int nonb int fnob = 0; list_for_each(tmp, &peer->ksnp_conns) { - struct ksock_conn *c = list_entry(tmp, struct ksock_conn, ksnc_list); - int nob = atomic_read(&c->ksnc_tx_nob) + - c->ksnc_sock->sk->sk_wmem_queued; - int rc; + struct ksock_conn *c; + int nob, rc; + + c = list_entry(tmp, struct ksock_conn, ksnc_list); + nob = atomic_read(&c->ksnc_tx_nob) + + c->ksnc_sock->sk->sk_wmem_queued; LASSERT(!c->ksnc_closing); LASSERT(c->ksnc_proto && @@ -752,9 +755,9 @@ ksocknal_queue_tx_locked(struct ksock_tx *tx, struct ksock_conn *conn) LASSERT(msg->ksm_zc_cookies[1]); LASSERT(conn->ksnc_proto->pro_queue_tx_zcack); + /* ZC ACK piggybacked on ztx release tx later */ if (conn->ksnc_proto->pro_queue_tx_zcack(conn, tx, 0)) - ztx = tx; /* ZC ACK piggybacked on ztx release tx later */ - + ztx = tx; } else { /* * It's a normal packet - can it piggback a noop zc-ack that @@ -796,7 +799,8 @@ ksocknal_find_connectable_route_locked(struct ksock_peer *peer) LASSERT(!route->ksnr_connecting || route->ksnr_scheduled); - if (route->ksnr_scheduled) /* connections being established */ + /* connections being established */ + if (route->ksnr_scheduled) continue; /* all route types connected ? */ @@ -1514,7 +1518,10 @@ int ksocknal_scheduler(void *arg) rc = ksocknal_process_transmit(conn, tx); if (rc == -ENOMEM || rc == -EAGAIN) { - /* Incomplete send: replace tx on HEAD of tx_queue */ + /* + * Incomplete send: replace tx on HEAD of + * tx_queue + */ spin_lock_bh(&sched->kss_lock); list_add(&tx->tx_list, &conn->ksnc_tx_queue); } else { @@ -1724,7 +1731,8 @@ ksocknal_recv_hello(lnet_ni_t *ni, struct ksock_conn *conn, timeout = active ? *ksocknal_tunables.ksnd_timeout : lnet_acceptor_timeout(); - rc = lnet_sock_read(sock, &hello->kshm_magic, sizeof(hello->kshm_magic), timeout); + rc = lnet_sock_read(sock, &hello->kshm_magic, + sizeof(hello->kshm_magic), timeout); if (rc) { CERROR("Error %d reading HELLO from %pI4h\n", rc, &conn->ksnc_ipaddr); @@ -1798,7 +1806,8 @@ ksocknal_recv_hello(lnet_ni_t *ni, struct ksock_conn *conn, conn->ksnc_port > LNET_ACCEPTOR_MAX_RESERVED_PORT) { /* Userspace NAL assigns peer process ID from socket */ recv_id.pid = conn->ksnc_port | LNET_PID_USERFLAG; - recv_id.nid = LNET_MKNID(LNET_NIDNET(ni->ni_nid), conn->ksnc_ipaddr); + recv_id.nid = LNET_MKNID(LNET_NIDNET(ni->ni_nid), + conn->ksnc_ipaddr); } else { recv_id.nid = hello->kshm_src_nid; recv_id.pid = hello->kshm_src_pid; @@ -1882,7 +1891,8 @@ ksocknal_connect(struct ksock_route *route) if (peer->ksnp_accepting > 0) { CDEBUG(D_NET, "peer %s(%d) already connecting to me, retry later.\n", - libcfs_nid2str(peer->ksnp_id.nid), peer->ksnp_accepting); + libcfs_nid2str(peer->ksnp_id.nid), + peer->ksnp_accepting); retry_later = 1; } @@ -2241,7 +2251,8 @@ ksocknal_connd(void *arg) /* Nothing to do for 'timeout' */ set_current_state(TASK_INTERRUPTIBLE); - add_wait_queue_exclusive(&ksocknal_data.ksnd_connd_waitq, &wait); + add_wait_queue_exclusive(&ksocknal_data.ksnd_connd_waitq, + &wait); spin_unlock_bh(connd_lock); nloops = 0; @@ -2371,7 +2382,8 @@ ksocknal_send_keepalive_locked(struct ksock_peer *peer) struct ksock_conn *conn; struct ksock_tx *tx; - if (list_empty(&peer->ksnp_conns)) /* last_alive will be updated by create_conn */ + /* last_alive will be updated by create_conn */ + if (list_empty(&peer->ksnp_conns)) return 0; if (peer->ksnp_proto != &ksocknal_protocol_v3x) @@ -2473,8 +2485,8 @@ ksocknal_check_peer_timeouts(int idx) * holding only shared lock */ if (!list_empty(&peer->ksnp_tx_queue)) { - struct ksock_tx *tx = list_entry(peer->ksnp_tx_queue.next, - struct ksock_tx, tx_list); + tx = list_entry(peer->ksnp_tx_queue.next, + struct ksock_tx, tx_list); if (cfs_time_aftereq(cfs_time_current(), tx->tx_deadline)) { diff --git a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_lib.c b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_lib.c index 6c95e989ca12..4bcab4bcc2de 100644 --- a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_lib.c +++ b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_lib.c @@ -202,7 +202,8 @@ ksocknal_lib_recv_iov(struct ksock_conn *conn) fragnob = sum; conn->ksnc_rx_csum = ksocknal_csum(conn->ksnc_rx_csum, - iov[i].iov_base, fragnob); + iov[i].iov_base, + fragnob); } conn->ksnc_msg.ksm_csum = saved_csum; } @@ -291,7 +292,8 @@ ksocknal_lib_csum_tx(struct ksock_tx *tx) } int -ksocknal_lib_get_conn_tunables(struct ksock_conn *conn, int *txmem, int *rxmem, int *nagle) +ksocknal_lib_get_conn_tunables(struct ksock_conn *conn, int *txmem, + int *rxmem, int *nagle) { struct socket *sock = conn->ksnc_sock; int len; diff --git a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_proto.c b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_proto.c index 82e174f6d9fe..8f0ff6ca1f39 100644 --- a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_proto.c +++ b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_proto.c @@ -194,7 +194,10 @@ ksocknal_queue_tx_zcack_v3(struct ksock_conn *conn, } if (!tx->tx_msg.ksm_zc_cookies[0]) { - /* NOOP tx has only one ZC-ACK cookie, can carry at least one more */ + /* + * NOOP tx has only one ZC-ACK cookie, + * can carry at least one more + */ if (tx->tx_msg.ksm_zc_cookies[1] > cookie) { tx->tx_msg.ksm_zc_cookies[0] = tx->tx_msg.ksm_zc_cookies[1]; tx->tx_msg.ksm_zc_cookies[1] = cookie; @@ -203,7 +206,10 @@ ksocknal_queue_tx_zcack_v3(struct ksock_conn *conn, } if (tx->tx_msg.ksm_zc_cookies[0] - tx->tx_msg.ksm_zc_cookies[1] > 2) { - /* not likely to carry more ACKs, skip it to simplify logic */ + /* + * not likely to carry more ACKs, skip it + * to simplify logic + */ ksocknal_next_tx_carrier(conn); } @@ -237,7 +243,10 @@ ksocknal_queue_tx_zcack_v3(struct ksock_conn *conn, } } else { - /* ksm_zc_cookies[0] < ksm_zc_cookies[1], it is range of cookies */ + /* + * ksm_zc_cookies[0] < ksm_zc_cookies[1], + * it is range of cookies + */ if (cookie >= tx->tx_msg.ksm_zc_cookies[0] && cookie <= tx->tx_msg.ksm_zc_cookies[1]) { CWARN("%s: duplicated ZC cookie: %llu\n", @@ -425,7 +434,8 @@ ksocknal_handle_zcack(struct ksock_conn *conn, __u64 cookie1, __u64 cookie2) tx_zc_list) { __u64 c = tx->tx_msg.ksm_zc_cookies[0]; - if (c == cookie1 || c == cookie2 || (cookie1 < c && c < cookie2)) { + if (c == cookie1 || c == cookie2 || + (cookie1 < c && c < cookie2)) { tx->tx_msg.ksm_zc_cookies[0] = 0; list_del(&tx->tx_zc_list); list_add(&tx->tx_zc_list, &zlist); @@ -639,7 +649,8 @@ out: } static int -ksocknal_recv_hello_v2(struct ksock_conn *conn, ksock_hello_msg_t *hello, int timeout) +ksocknal_recv_hello_v2(struct ksock_conn *conn, ksock_hello_msg_t *hello, + int timeout) { struct socket *sock = conn->ksnc_sock; int rc; @@ -737,7 +748,10 @@ ksocknal_pack_msg_v2(struct ksock_tx *tx) tx->tx_nob = offsetof(ksock_msg_t, ksm_u.lnetmsg.ksnm_hdr); tx->tx_resid = offsetof(ksock_msg_t, ksm_u.lnetmsg.ksnm_hdr); } - /* Don't checksum before start sending, because packet can be piggybacked with ACK */ + /* + * Don't checksum before start sending, because packet can be + * piggybacked with ACK + */ } static void diff --git a/drivers/staging/lustre/lnet/libcfs/debug.c b/drivers/staging/lustre/lnet/libcfs/debug.c index 23b36b890964..a38db2322225 100644 --- a/drivers/staging/lustre/lnet/libcfs/debug.c +++ b/drivers/staging/lustre/lnet/libcfs/debug.c @@ -57,7 +57,7 @@ static int libcfs_param_debug_mb_set(const char *val, const struct kernel_param *kp) { int rc; - unsigned num; + unsigned int num; rc = kstrtouint(val, 0, &num); if (rc < 0) @@ -228,7 +228,8 @@ int libcfs_panic_in_progress; static const char * libcfs_debug_subsys2str(int subsys) { - static const char *libcfs_debug_subsystems[] = LIBCFS_DEBUG_SUBSYS_NAMES; + static const char * const libcfs_debug_subsystems[] = + LIBCFS_DEBUG_SUBSYS_NAMES; if (subsys >= ARRAY_SIZE(libcfs_debug_subsystems)) return NULL; @@ -240,7 +241,8 @@ libcfs_debug_subsys2str(int subsys) static const char * libcfs_debug_dbg2str(int debug) { - static const char *libcfs_debug_masks[] = LIBCFS_DEBUG_MASKS_NAMES; + static const char * const libcfs_debug_masks[] = + LIBCFS_DEBUG_MASKS_NAMES; if (debug >= ARRAY_SIZE(libcfs_debug_masks)) return NULL; @@ -253,17 +255,17 @@ libcfs_debug_mask2str(char *str, int size, int mask, int is_subsys) { const char *(*fn)(int bit) = is_subsys ? libcfs_debug_subsys2str : libcfs_debug_dbg2str; - int len = 0; - const char *token; - int i; + int len = 0; + const char *token; + int i; - if (mask == 0) { /* "0" */ + if (!mask) { /* "0" */ if (size > 0) str[0] = '0'; len = 1; } else { /* space-separated tokens */ for (i = 0; i < 32; i++) { - if ((mask & (1 << i)) == 0) + if (!(mask & (1 << i))) continue; token = fn(i); @@ -276,7 +278,7 @@ libcfs_debug_mask2str(char *str, int size, int mask, int is_subsys) len++; } - while (*token != 0) { + while (*token) { if (len < size) str[len] = *token; token++; @@ -299,10 +301,10 @@ libcfs_debug_str2mask(int *mask, const char *str, int is_subsys) { const char *(*fn)(int bit) = is_subsys ? libcfs_debug_subsys2str : libcfs_debug_dbg2str; - int m = 0; - int matched; - int n; - int t; + int m = 0; + int matched; + int n; + int t; /* Allow a number for backwards compatibility */ @@ -313,7 +315,7 @@ libcfs_debug_str2mask(int *mask, const char *str, int is_subsys) t = sscanf(str, "%i%n", &m, &matched); if (t >= 1 && matched == n) { /* don't print warning for lctl set_param debug=0 or -1 */ - if (m != 0 && m != -1) + if (m && m != -1) CWARN("You are trying to use a numerical value for the mask - this will be deprecated in a future release.\n"); *mask = m; return 0; @@ -387,8 +389,8 @@ EXPORT_SYMBOL(libcfs_debug_dumplog); int libcfs_debug_init(unsigned long bufsize) { - int rc = 0; unsigned int max = libcfs_debug_mb; + int rc = 0; init_waitqueue_head(&debug_ctlwq); @@ -414,9 +416,9 @@ int libcfs_debug_init(unsigned long bufsize) max = max / num_possible_cpus(); max <<= (20 - PAGE_SHIFT); } - rc = cfs_tracefile_init(max); - if (rc == 0) { + rc = cfs_tracefile_init(max); + if (!rc) { libcfs_register_panic_notifier(); libcfs_debug_mb = cfs_trace_get_debug_mb(); } diff --git a/drivers/staging/lustre/lnet/libcfs/fail.c b/drivers/staging/lustre/lnet/libcfs/fail.c index e4b1a0a86eae..12dd50ad4efb 100644 --- a/drivers/staging/lustre/lnet/libcfs/fail.c +++ b/drivers/staging/lustre/lnet/libcfs/fail.c @@ -46,7 +46,7 @@ EXPORT_SYMBOL(cfs_race_waitq); int cfs_race_state; EXPORT_SYMBOL(cfs_race_state); -int __cfs_fail_check_set(__u32 id, __u32 value, int set) +int __cfs_fail_check_set(u32 id, u32 value, int set) { static atomic_t cfs_fail_count = ATOMIC_INIT(0); @@ -113,6 +113,7 @@ int __cfs_fail_check_set(__u32 id, __u32 value, int set) break; case CFS_FAIL_LOC_RESET: cfs_fail_loc = value; + atomic_set(&cfs_fail_count, 0); break; default: LASSERTF(0, "called with bad set %u\n", set); @@ -123,7 +124,7 @@ int __cfs_fail_check_set(__u32 id, __u32 value, int set) } EXPORT_SYMBOL(__cfs_fail_check_set); -int __cfs_fail_timeout_set(__u32 id, __u32 value, int ms, int set) +int __cfs_fail_timeout_set(u32 id, u32 value, int ms, int set) { int ret; diff --git a/drivers/staging/lustre/lnet/libcfs/hash.c b/drivers/staging/lustre/lnet/libcfs/hash.c index 23283b6e09ab..c93c59d8fe6c 100644 --- a/drivers/staging/lustre/lnet/libcfs/hash.c +++ b/drivers/staging/lustre/lnet/libcfs/hash.c @@ -289,7 +289,7 @@ cfs_hash_hd_hhead_size(struct cfs_hash *hs) static struct hlist_head * cfs_hash_hd_hhead(struct cfs_hash *hs, struct cfs_hash_bd *bd) { - struct cfs_hash_head_dep *head; + struct cfs_hash_head_dep *head; head = (struct cfs_hash_head_dep *)&bd->bd_bucket->hsb_head[0]; return &head[bd->bd_offset].hd_head; @@ -492,7 +492,7 @@ cfs_hash_bd_get(struct cfs_hash *hs, const void *key, struct cfs_hash_bd *bd) cfs_hash_bd_from_key(hs, hs->hs_buckets, hs->hs_cur_bits, key, bd); } else { - LASSERT(hs->hs_rehash_bits != 0); + LASSERT(hs->hs_rehash_bits); cfs_hash_bd_from_key(hs, hs->hs_rehash_buckets, hs->hs_rehash_bits, key, bd); } @@ -507,14 +507,14 @@ cfs_hash_bd_dep_record(struct cfs_hash *hs, struct cfs_hash_bd *bd, int dep_cur) bd->bd_bucket->hsb_depmax = dep_cur; # if CFS_HASH_DEBUG_LEVEL >= CFS_HASH_DEBUG_1 - if (likely(warn_on_depth == 0 || + if (likely(!warn_on_depth || max(warn_on_depth, hs->hs_dep_max) >= dep_cur)) return; spin_lock(&hs->hs_dep_lock); - hs->hs_dep_max = dep_cur; - hs->hs_dep_bkt = bd->bd_bucket->hsb_index; - hs->hs_dep_off = bd->bd_offset; + hs->hs_dep_max = dep_cur; + hs->hs_dep_bkt = bd->bd_bucket->hsb_index; + hs->hs_dep_off = bd->bd_offset; hs->hs_dep_bits = hs->hs_cur_bits; spin_unlock(&hs->hs_dep_lock); @@ -531,7 +531,7 @@ cfs_hash_bd_add_locked(struct cfs_hash *hs, struct cfs_hash_bd *bd, rc = hs->hs_hops->hop_hnode_add(hs, bd, hnode); cfs_hash_bd_dep_record(hs, bd, rc); bd->bd_bucket->hsb_version++; - if (unlikely(bd->bd_bucket->hsb_version == 0)) + if (unlikely(!bd->bd_bucket->hsb_version)) bd->bd_bucket->hsb_version++; bd->bd_bucket->hsb_count++; @@ -551,7 +551,7 @@ cfs_hash_bd_del_locked(struct cfs_hash *hs, struct cfs_hash_bd *bd, LASSERT(bd->bd_bucket->hsb_count > 0); bd->bd_bucket->hsb_count--; bd->bd_bucket->hsb_version++; - if (unlikely(bd->bd_bucket->hsb_version == 0)) + if (unlikely(!bd->bd_bucket->hsb_version)) bd->bd_bucket->hsb_version++; if (cfs_hash_with_counter(hs)) { @@ -571,7 +571,7 @@ cfs_hash_bd_move_locked(struct cfs_hash *hs, struct cfs_hash_bd *bd_old, struct cfs_hash_bucket *nbkt = bd_new->bd_bucket; int rc; - if (cfs_hash_bd_compare(bd_old, bd_new) == 0) + if (!cfs_hash_bd_compare(bd_old, bd_new)) return; /* use cfs_hash_bd_hnode_add/del, to avoid atomic & refcount ops @@ -584,11 +584,11 @@ cfs_hash_bd_move_locked(struct cfs_hash *hs, struct cfs_hash_bd *bd_old, LASSERT(obkt->hsb_count > 0); obkt->hsb_count--; obkt->hsb_version++; - if (unlikely(obkt->hsb_version == 0)) + if (unlikely(!obkt->hsb_version)) obkt->hsb_version++; nbkt->hsb_count++; nbkt->hsb_version++; - if (unlikely(nbkt->hsb_version == 0)) + if (unlikely(!nbkt->hsb_version)) nbkt->hsb_version++; } @@ -629,7 +629,7 @@ cfs_hash_bd_lookup_intent(struct cfs_hash *hs, struct cfs_hash_bd *bd, struct hlist_head *hhead = cfs_hash_bd_hhead(hs, bd); struct hlist_node *ehnode; struct hlist_node *match; - int intent_add = (intent & CFS_HS_LOOKUP_MASK_ADD) != 0; + int intent_add = intent & CFS_HS_LOOKUP_MASK_ADD; /* with this function, we can avoid a lot of useless refcount ops, * which are expensive atomic operations most time. @@ -643,13 +643,13 @@ cfs_hash_bd_lookup_intent(struct cfs_hash *hs, struct cfs_hash_bd *bd, continue; /* match and ... */ - if ((intent & CFS_HS_LOOKUP_MASK_DEL) != 0) { + if (intent & CFS_HS_LOOKUP_MASK_DEL) { cfs_hash_bd_del_locked(hs, bd, ehnode); return ehnode; } /* caller wants refcount? */ - if ((intent & CFS_HS_LOOKUP_MASK_REF) != 0) + if (intent & CFS_HS_LOOKUP_MASK_REF) cfs_hash_get(hs, ehnode); return ehnode; } @@ -682,7 +682,7 @@ EXPORT_SYMBOL(cfs_hash_bd_peek_locked); static void cfs_hash_multi_bd_lock(struct cfs_hash *hs, struct cfs_hash_bd *bds, - unsigned n, int excl) + unsigned int n, int excl) { struct cfs_hash_bucket *prev = NULL; int i; @@ -704,7 +704,7 @@ cfs_hash_multi_bd_lock(struct cfs_hash *hs, struct cfs_hash_bd *bds, static void cfs_hash_multi_bd_unlock(struct cfs_hash *hs, struct cfs_hash_bd *bds, - unsigned n, int excl) + unsigned int n, int excl) { struct cfs_hash_bucket *prev = NULL; int i; @@ -719,10 +719,10 @@ cfs_hash_multi_bd_unlock(struct cfs_hash *hs, struct cfs_hash_bd *bds, static struct hlist_node * cfs_hash_multi_bd_lookup_locked(struct cfs_hash *hs, struct cfs_hash_bd *bds, - unsigned n, const void *key) + unsigned int n, const void *key) { struct hlist_node *ehnode; - unsigned i; + unsigned int i; cfs_hash_for_each_bd(bds, n, i) { ehnode = cfs_hash_bd_lookup_intent(hs, &bds[i], key, NULL, @@ -735,12 +735,12 @@ cfs_hash_multi_bd_lookup_locked(struct cfs_hash *hs, struct cfs_hash_bd *bds, static struct hlist_node * cfs_hash_multi_bd_findadd_locked(struct cfs_hash *hs, struct cfs_hash_bd *bds, - unsigned n, const void *key, + unsigned int n, const void *key, struct hlist_node *hnode, int noref) { struct hlist_node *ehnode; int intent; - unsigned i; + unsigned int i; LASSERT(hnode); intent = (!noref * CFS_HS_LOOKUP_MASK_REF) | CFS_HS_LOOKUP_IT_PEEK; @@ -766,7 +766,7 @@ cfs_hash_multi_bd_findadd_locked(struct cfs_hash *hs, struct cfs_hash_bd *bds, static struct hlist_node * cfs_hash_multi_bd_finddel_locked(struct cfs_hash *hs, struct cfs_hash_bd *bds, - unsigned n, const void *key, + unsigned int n, const void *key, struct hlist_node *hnode) { struct hlist_node *ehnode; @@ -815,7 +815,7 @@ cfs_hash_dual_bd_get(struct cfs_hash *hs, const void *key, return; } - LASSERT(hs->hs_rehash_bits != 0); + LASSERT(hs->hs_rehash_bits); cfs_hash_bd_from_key(hs, hs->hs_rehash_buckets, hs->hs_rehash_bits, key, &bds[1]); @@ -883,7 +883,7 @@ cfs_hash_buckets_realloc(struct cfs_hash *hs, struct cfs_hash_bucket **old_bkts, struct cfs_hash_bucket **new_bkts; int i; - LASSERT(old_size == 0 || old_bkts); + LASSERT(!old_size || old_bkts); if (old_bkts && old_size == new_size) return old_bkts; @@ -908,9 +908,9 @@ cfs_hash_buckets_realloc(struct cfs_hash *hs, struct cfs_hash_bucket **old_bkts, return NULL; } - new_bkts[i]->hsb_index = i; - new_bkts[i]->hsb_version = 1; /* shouldn't be zero */ - new_bkts[i]->hsb_depmax = -1; /* unknown */ + new_bkts[i]->hsb_index = i; + new_bkts[i]->hsb_version = 1; /* shouldn't be zero */ + new_bkts[i]->hsb_depmax = -1; /* unknown */ bd.bd_bucket = new_bkts[i]; cfs_hash_bd_for_each_hlist(hs, &bd, hhead) INIT_HLIST_HEAD(hhead); @@ -950,9 +950,9 @@ static int cfs_hash_dep_print(struct cfs_workitem *wi) int bits; spin_lock(&hs->hs_dep_lock); - dep = hs->hs_dep_max; - bkt = hs->hs_dep_bkt; - off = hs->hs_dep_off; + dep = hs->hs_dep_max; + bkt = hs->hs_dep_bkt; + off = hs->hs_dep_off; bits = hs->hs_dep_bits; spin_unlock(&hs->hs_dep_lock); @@ -976,7 +976,7 @@ static void cfs_hash_depth_wi_cancel(struct cfs_hash *hs) return; spin_lock(&hs->hs_dep_lock); - while (hs->hs_dep_bits != 0) { + while (hs->hs_dep_bits) { spin_unlock(&hs->hs_dep_lock); cond_resched(); spin_lock(&hs->hs_dep_lock); @@ -992,10 +992,10 @@ static inline void cfs_hash_depth_wi_cancel(struct cfs_hash *hs) {} #endif /* CFS_HASH_DEBUG_LEVEL >= CFS_HASH_DEBUG_1 */ struct cfs_hash * -cfs_hash_create(char *name, unsigned cur_bits, unsigned max_bits, - unsigned bkt_bits, unsigned extra_bytes, - unsigned min_theta, unsigned max_theta, - struct cfs_hash_ops *ops, unsigned flags) +cfs_hash_create(char *name, unsigned int cur_bits, unsigned int max_bits, + unsigned int bkt_bits, unsigned int extra_bytes, + unsigned int min_theta, unsigned int max_theta, + struct cfs_hash_ops *ops, unsigned int flags) { struct cfs_hash *hs; int len; @@ -1010,18 +1010,17 @@ cfs_hash_create(char *name, unsigned cur_bits, unsigned max_bits, LASSERT(ops->hs_get); LASSERT(ops->hs_put_locked); - if ((flags & CFS_HASH_REHASH) != 0) + if (flags & CFS_HASH_REHASH) flags |= CFS_HASH_COUNTER; /* must have counter */ LASSERT(cur_bits > 0); LASSERT(cur_bits >= bkt_bits); LASSERT(max_bits >= cur_bits && max_bits < 31); - LASSERT(ergo((flags & CFS_HASH_REHASH) == 0, cur_bits == max_bits)); - LASSERT(ergo((flags & CFS_HASH_REHASH) != 0, - (flags & CFS_HASH_NO_LOCK) == 0)); - LASSERT(ergo((flags & CFS_HASH_REHASH_KEY) != 0, ops->hs_keycpy)); + LASSERT(ergo(!(flags & CFS_HASH_REHASH), cur_bits == max_bits)); + LASSERT(ergo(flags & CFS_HASH_REHASH, !(flags & CFS_HASH_NO_LOCK))); + LASSERT(ergo(flags & CFS_HASH_REHASH_KEY, ops->hs_keycpy)); - len = (flags & CFS_HASH_BIGNAME) == 0 ? + len = !(flags & CFS_HASH_BIGNAME) ? CFS_HASH_NAME_LEN : CFS_HASH_BIGNAME_LEN; LIBCFS_ALLOC(hs, offsetof(struct cfs_hash, hs_name[len])); if (!hs) @@ -1036,12 +1035,12 @@ cfs_hash_create(char *name, unsigned cur_bits, unsigned max_bits, cfs_hash_lock_setup(hs); cfs_hash_hlist_setup(hs); - hs->hs_cur_bits = (__u8)cur_bits; - hs->hs_min_bits = (__u8)cur_bits; - hs->hs_max_bits = (__u8)max_bits; - hs->hs_bkt_bits = (__u8)bkt_bits; + hs->hs_cur_bits = (u8)cur_bits; + hs->hs_min_bits = (u8)cur_bits; + hs->hs_max_bits = (u8)max_bits; + hs->hs_bkt_bits = (u8)bkt_bits; - hs->hs_ops = ops; + hs->hs_ops = ops; hs->hs_extra_bytes = extra_bytes; hs->hs_rehash_bits = 0; cfs_wi_init(&hs->hs_rehash_wi, hs, cfs_hash_rehash_worker); @@ -1107,12 +1106,12 @@ cfs_hash_destroy(struct cfs_hash *hs) cfs_hash_exit(hs, hnode); } } - LASSERT(bd.bd_bucket->hsb_count == 0); + LASSERT(!bd.bd_bucket->hsb_count); cfs_hash_bd_unlock(hs, &bd, 1); cond_resched(); } - LASSERT(atomic_read(&hs->hs_count) == 0); + LASSERT(!atomic_read(&hs->hs_count)); cfs_hash_buckets_free(hs->hs_buckets, cfs_hash_bkt_size(hs), 0, CFS_HASH_NBKT(hs)); @@ -1216,7 +1215,7 @@ cfs_hash_find_or_add(struct cfs_hash *hs, const void *key, struct cfs_hash_bd bds[2]; int bits = 0; - LASSERT(hlist_unhashed(hnode)); + LASSERTF(hlist_unhashed(hnode), "hnode = %p\n", hnode); cfs_hash_lock(hs, 0); cfs_hash_dual_bd_get_and_lock(hs, key, bds, 1); @@ -1293,7 +1292,7 @@ cfs_hash_del(struct cfs_hash *hs, const void *key, struct hlist_node *hnode) } if (hnode) { - obj = cfs_hash_object(hs, hnode); + obj = cfs_hash_object(hs, hnode); bits = cfs_hash_rehash_bits(hs); } @@ -1388,7 +1387,7 @@ cfs_hash_for_each_exit(struct cfs_hash *hs) bits = cfs_hash_rehash_bits(hs); cfs_hash_unlock(hs, 1); /* NB: it's race on cfs_has_t::hs_iterating, see above */ - if (remained == 0) + if (!remained) hs->hs_iterating = 0; if (bits > 0) { cfs_hash_rehash(hs, atomic_read(&hs->hs_count) < @@ -1406,14 +1405,14 @@ cfs_hash_for_each_exit(struct cfs_hash *hs) * . if @removal_safe is true, use can remove current item by * cfs_hash_bd_del_locked */ -static __u64 +static u64 cfs_hash_for_each_tight(struct cfs_hash *hs, cfs_hash_for_each_cb_t func, void *data, int remove_safe) { struct hlist_node *hnode; struct hlist_node *pos; struct cfs_hash_bd bd; - __u64 count = 0; + u64 count = 0; int excl = !!remove_safe; int loop = 0; int i; @@ -1526,7 +1525,7 @@ cfs_hash_is_empty(struct cfs_hash *hs) } EXPORT_SYMBOL(cfs_hash_is_empty); -__u64 +u64 cfs_hash_size_get(struct cfs_hash *hs) { return cfs_hash_with_counter(hs) ? @@ -1552,26 +1551,33 @@ EXPORT_SYMBOL(cfs_hash_size_get); */ static int cfs_hash_for_each_relax(struct cfs_hash *hs, cfs_hash_for_each_cb_t func, - void *data) + void *data, int start) { struct hlist_node *hnode; struct hlist_node *tmp; struct cfs_hash_bd bd; - __u32 version; + u32 version; int count = 0; int stop_on_change; - int rc; + int end = -1; + int rc = 0; int i; stop_on_change = cfs_hash_with_rehash_key(hs) || !cfs_hash_with_no_itemref(hs) || !hs->hs_ops->hs_put_locked; cfs_hash_lock(hs, 0); +again: LASSERT(!cfs_hash_is_rehashing(hs)); cfs_hash_for_each_bucket(hs, &bd, i) { struct hlist_head *hhead; + if (i < start) + continue; + else if (end > 0 && i >= end) + break; + cfs_hash_bd_lock(hs, &bd, 0); version = cfs_hash_bd_version_get(&bd); @@ -1611,14 +1617,19 @@ cfs_hash_for_each_relax(struct cfs_hash *hs, cfs_hash_for_each_cb_t func, if (rc) /* callback wants to break iteration */ break; } - cfs_hash_unlock(hs, 0); + if (start > 0 && !rc) { + end = start; + start = 0; + goto again; + } + cfs_hash_unlock(hs, 0); return count; } int cfs_hash_for_each_nolock(struct cfs_hash *hs, cfs_hash_for_each_cb_t func, - void *data) + void *data, int start) { if (cfs_hash_with_no_lock(hs) || cfs_hash_with_rehash_key(hs) || @@ -1630,7 +1641,7 @@ cfs_hash_for_each_nolock(struct cfs_hash *hs, cfs_hash_for_each_cb_t func, return -EOPNOTSUPP; cfs_hash_for_each_enter(hs); - cfs_hash_for_each_relax(hs, func, data); + cfs_hash_for_each_relax(hs, func, data, start); cfs_hash_for_each_exit(hs); return 0; @@ -1652,7 +1663,7 @@ int cfs_hash_for_each_empty(struct cfs_hash *hs, cfs_hash_for_each_cb_t func, void *data) { - unsigned i = 0; + unsigned int i = 0; if (cfs_hash_with_no_lock(hs)) return -EOPNOTSUPP; @@ -1662,7 +1673,7 @@ cfs_hash_for_each_empty(struct cfs_hash *hs, cfs_hash_for_each_cb_t func, return -EOPNOTSUPP; cfs_hash_for_each_enter(hs); - while (cfs_hash_for_each_relax(hs, func, data)) { + while (cfs_hash_for_each_relax(hs, func, data, 0)) { CDEBUG(D_INFO, "Try to empty hash: %s, loop: %u\n", hs->hs_name, i++); } @@ -1672,7 +1683,7 @@ cfs_hash_for_each_empty(struct cfs_hash *hs, cfs_hash_for_each_cb_t func, EXPORT_SYMBOL(cfs_hash_for_each_empty); void -cfs_hash_hlist_for_each(struct cfs_hash *hs, unsigned hindex, +cfs_hash_hlist_for_each(struct cfs_hash *hs, unsigned int hindex, cfs_hash_for_each_cb_t func, void *data) { struct hlist_head *hhead; @@ -1704,7 +1715,7 @@ EXPORT_SYMBOL(cfs_hash_hlist_for_each); * the passed callback @func and pass to it as an argument each hash * item and the private @data. During the callback the bucket lock * is held so the callback must never sleep. - */ + */ void cfs_hash_for_each_key(struct cfs_hash *hs, const void *key, cfs_hash_for_each_cb_t func, void *data) @@ -1936,7 +1947,7 @@ out: /* can't refer to @hs anymore because it could be destroyed */ if (bkts) cfs_hash_buckets_free(bkts, bsize, new_size, old_size); - if (rc != 0) + if (rc) CDEBUG(D_INFO, "early quit of rehashing: %d\n", rc); /* return 1 only if cfs_wi_exit is called */ return rc == -ESRCH; @@ -2005,7 +2016,7 @@ cfs_hash_full_bkts(struct cfs_hash *hs) if (!hs->hs_rehash_buckets) return hs->hs_buckets; - LASSERT(hs->hs_rehash_bits != 0); + LASSERT(hs->hs_rehash_bits); return hs->hs_rehash_bits > hs->hs_cur_bits ? hs->hs_rehash_buckets : hs->hs_buckets; } @@ -2017,7 +2028,7 @@ cfs_hash_full_nbkt(struct cfs_hash *hs) if (!hs->hs_rehash_buckets) return CFS_HASH_NBKT(hs); - LASSERT(hs->hs_rehash_bits != 0); + LASSERT(hs->hs_rehash_bits); return hs->hs_rehash_bits > hs->hs_cur_bits ? CFS_HASH_RH_NBKT(hs) : CFS_HASH_NBKT(hs); } diff --git a/drivers/staging/lustre/lnet/libcfs/libcfs_cpu.c b/drivers/staging/lustre/lnet/libcfs/libcfs_cpu.c index 33352af6c27f..55caa19def51 100644 --- a/drivers/staging/lustre/lnet/libcfs/libcfs_cpu.c +++ b/drivers/staging/lustre/lnet/libcfs/libcfs_cpu.c @@ -74,7 +74,7 @@ EXPORT_SYMBOL(cfs_cpt_table_free); int cfs_cpt_table_print(struct cfs_cpt_table *cptab, char *buf, int len) { - int rc; + int rc; rc = snprintf(buf, len, "%d\t: %d\n", 0, 0); len -= rc; diff --git a/drivers/staging/lustre/lnet/libcfs/libcfs_lock.c b/drivers/staging/lustre/lnet/libcfs/libcfs_lock.c index 83543f928279..1967b97c4afc 100644 --- a/drivers/staging/lustre/lnet/libcfs/libcfs_lock.c +++ b/drivers/staging/lustre/lnet/libcfs/libcfs_lock.c @@ -52,9 +52,9 @@ struct cfs_percpt_lock * cfs_percpt_lock_create(struct cfs_cpt_table *cptab, struct lock_class_key *keys) { - struct cfs_percpt_lock *pcl; - spinlock_t *lock; - int i; + struct cfs_percpt_lock *pcl; + spinlock_t *lock; + int i; /* NB: cptab can be NULL, pcl will be for HW CPUs on that case */ LIBCFS_ALLOC(pcl, sizeof(*pcl)); @@ -73,7 +73,7 @@ cfs_percpt_lock_create(struct cfs_cpt_table *cptab, cfs_percpt_for_each(lock, i, pcl->pcl_locks) { spin_lock_init(lock); - if (keys != NULL) + if (keys) lockdep_set_class(lock, &keys[i]); } @@ -94,8 +94,8 @@ void cfs_percpt_lock(struct cfs_percpt_lock *pcl, int index) __acquires(pcl->pcl_locks) { - int ncpt = cfs_cpt_number(pcl->pcl_cptab); - int i; + int ncpt = cfs_cpt_number(pcl->pcl_cptab); + int i; LASSERT(index >= CFS_PERCPT_LOCK_EX && index < ncpt); @@ -114,7 +114,7 @@ cfs_percpt_lock(struct cfs_percpt_lock *pcl, int index) /* exclusive lock request */ for (i = 0; i < ncpt; i++) { spin_lock(pcl->pcl_locks[i]); - if (i == 0) { + if (!i) { LASSERT(!pcl->pcl_locked); /* nobody should take private lock after this * so I wouldn't starve for too long time @@ -130,8 +130,8 @@ void cfs_percpt_unlock(struct cfs_percpt_lock *pcl, int index) __releases(pcl->pcl_locks) { - int ncpt = cfs_cpt_number(pcl->pcl_cptab); - int i; + int ncpt = cfs_cpt_number(pcl->pcl_cptab); + int i; index = ncpt == 1 ? 0 : index; @@ -141,7 +141,7 @@ cfs_percpt_unlock(struct cfs_percpt_lock *pcl, int index) } for (i = ncpt - 1; i >= 0; i--) { - if (i == 0) { + if (!i) { LASSERT(pcl->pcl_locked); pcl->pcl_locked = 0; } diff --git a/drivers/staging/lustre/lnet/libcfs/libcfs_mem.c b/drivers/staging/lustre/lnet/libcfs/libcfs_mem.c index d0e81bb41cdc..ef085ba23194 100644 --- a/drivers/staging/lustre/lnet/libcfs/libcfs_mem.c +++ b/drivers/staging/lustre/lnet/libcfs/libcfs_mem.c @@ -43,8 +43,8 @@ struct cfs_var_array { void cfs_percpt_free(void *vars) { - struct cfs_var_array *arr; - int i; + struct cfs_var_array *arr; + int i; arr = container_of(vars, struct cfs_var_array, va_ptrs[0]); @@ -72,9 +72,9 @@ EXPORT_SYMBOL(cfs_percpt_free); void * cfs_percpt_alloc(struct cfs_cpt_table *cptab, unsigned int size) { - struct cfs_var_array *arr; - int count; - int i; + struct cfs_var_array *arr; + int count; + int i; count = cfs_cpt_number(cptab); @@ -120,8 +120,8 @@ EXPORT_SYMBOL(cfs_percpt_number); void cfs_array_free(void *vars) { - struct cfs_var_array *arr; - int i; + struct cfs_var_array *arr; + int i; arr = container_of(vars, struct cfs_var_array, va_ptrs[0]); @@ -144,15 +144,15 @@ EXPORT_SYMBOL(cfs_array_free); void * cfs_array_alloc(int count, unsigned int size) { - struct cfs_var_array *arr; - int i; + struct cfs_var_array *arr; + int i; LIBCFS_ALLOC(arr, offsetof(struct cfs_var_array, va_ptrs[count])); if (!arr) return NULL; - arr->va_count = count; - arr->va_size = size; + arr->va_count = count; + arr->va_size = size; for (i = 0; i < count; i++) { LIBCFS_ALLOC(arr->va_ptrs[i], size); diff --git a/drivers/staging/lustre/lnet/libcfs/libcfs_string.c b/drivers/staging/lustre/lnet/libcfs/libcfs_string.c index 56a614d7713b..02de1ee720fd 100644 --- a/drivers/staging/lustre/lnet/libcfs/libcfs_string.c +++ b/drivers/staging/lustre/lnet/libcfs/libcfs_string.c @@ -79,7 +79,7 @@ int cfs_str2mask(const char *str, const char *(*bit2str)(int bit), for (i = 0; i < 32; i++) { debugstr = bit2str(i); if (debugstr && strlen(debugstr) == len && - strncasecmp(str, debugstr, len) == 0) { + !strncasecmp(str, debugstr, len)) { if (op == '-') newmask &= ~(1 << i); else @@ -89,7 +89,7 @@ int cfs_str2mask(const char *str, const char *(*bit2str)(int bit), } } if (!found && len == 3 && - (strncasecmp(str, "ALL", len) == 0)) { + !strncasecmp(str, "ALL", len)) { if (op == '-') newmask = minmask; else @@ -112,7 +112,7 @@ int cfs_str2mask(const char *str, const char *(*bit2str)(int bit), char *cfs_firststr(char *str, size_t size) { size_t i = 0; - char *end; + char *end; /* trim leading spaces */ while (i < size && *str && isspace(*str)) { @@ -182,7 +182,7 @@ cfs_gettok(struct cfs_lstr *next, char delim, struct cfs_lstr *res) next->ls_len--; } - if (next->ls_len == 0) /* whitespaces only */ + if (!next->ls_len) /* whitespaces only */ return 0; if (*next->ls_str == delim) { @@ -222,8 +222,8 @@ EXPORT_SYMBOL(cfs_gettok); * \retval 0 otherwise */ int -cfs_str2num_check(char *str, int nob, unsigned *num, - unsigned min, unsigned max) +cfs_str2num_check(char *str, int nob, unsigned int *num, + unsigned int min, unsigned int max) { bool all_numbers = true; char *endp, cache; @@ -273,11 +273,11 @@ EXPORT_SYMBOL(cfs_str2num_check); * -ENOMEM will be returned. */ static int -cfs_range_expr_parse(struct cfs_lstr *src, unsigned min, unsigned max, +cfs_range_expr_parse(struct cfs_lstr *src, unsigned int min, unsigned int max, int bracketed, struct cfs_range_expr **expr) { - struct cfs_range_expr *re; - struct cfs_lstr tok; + struct cfs_range_expr *re; + struct cfs_lstr tok; LIBCFS_ALLOC(re, sizeof(*re)); if (!re) @@ -391,7 +391,7 @@ cfs_expr_list_print(char *buffer, int count, struct cfs_expr_list *expr_list) i += scnprintf(buffer + i, count - i, "["); list_for_each_entry(expr, &expr_list->el_exprs, re_link) { - if (j++ != 0) + if (j++) i += scnprintf(buffer + i, count - i, ","); i += cfs_range_expr_print(buffer + i, count - i, expr, numexprs > 1); @@ -411,13 +411,13 @@ EXPORT_SYMBOL(cfs_expr_list_print); * \retval 0 otherwise */ int -cfs_expr_list_match(__u32 value, struct cfs_expr_list *expr_list) +cfs_expr_list_match(u32 value, struct cfs_expr_list *expr_list) { - struct cfs_range_expr *expr; + struct cfs_range_expr *expr; list_for_each_entry(expr, &expr_list->el_exprs, re_link) { if (value >= expr->re_lo && value <= expr->re_hi && - ((value - expr->re_lo) % expr->re_stride) == 0) + !((value - expr->re_lo) % expr->re_stride)) return 1; } @@ -433,21 +433,21 @@ EXPORT_SYMBOL(cfs_expr_list_match); * \retval < 0 for failure */ int -cfs_expr_list_values(struct cfs_expr_list *expr_list, int max, __u32 **valpp) +cfs_expr_list_values(struct cfs_expr_list *expr_list, int max, u32 **valpp) { - struct cfs_range_expr *expr; - __u32 *val; - int count = 0; - int i; + struct cfs_range_expr *expr; + u32 *val; + int count = 0; + int i; list_for_each_entry(expr, &expr_list->el_exprs, re_link) { for (i = expr->re_lo; i <= expr->re_hi; i++) { - if (((i - expr->re_lo) % expr->re_stride) == 0) + if (!((i - expr->re_lo) % expr->re_stride)) count++; } } - if (count == 0) /* empty expression list */ + if (!count) /* empty expression list */ return 0; if (count > max) { @@ -463,7 +463,7 @@ cfs_expr_list_values(struct cfs_expr_list *expr_list, int max, __u32 **valpp) count = 0; list_for_each_entry(expr, &expr_list->el_exprs, re_link) { for (i = expr->re_lo; i <= expr->re_hi; i++) { - if (((i - expr->re_lo) % expr->re_stride) == 0) + if (!((i - expr->re_lo) % expr->re_stride)) val[count++] = i; } } @@ -501,13 +501,13 @@ EXPORT_SYMBOL(cfs_expr_list_free); * \retval -errno otherwise */ int -cfs_expr_list_parse(char *str, int len, unsigned min, unsigned max, +cfs_expr_list_parse(char *str, int len, unsigned int min, unsigned int max, struct cfs_expr_list **elpp) { - struct cfs_expr_list *expr_list; - struct cfs_range_expr *expr; - struct cfs_lstr src; - int rc; + struct cfs_expr_list *expr_list; + struct cfs_range_expr *expr; + struct cfs_lstr src; + int rc; LIBCFS_ALLOC(expr_list, sizeof(*expr_list)); if (!expr_list) @@ -533,18 +533,18 @@ cfs_expr_list_parse(char *str, int len, unsigned min, unsigned max, } rc = cfs_range_expr_parse(&tok, min, max, 1, &expr); - if (rc != 0) + if (rc) break; list_add_tail(&expr->re_link, &expr_list->el_exprs); } } else { rc = cfs_range_expr_parse(&src, min, max, 0, &expr); - if (rc == 0) + if (!rc) list_add_tail(&expr->re_link, &expr_list->el_exprs); } - if (rc != 0) + if (rc) cfs_expr_list_free(expr_list); else *elpp = expr_list; diff --git a/drivers/staging/lustre/lnet/libcfs/linux/linux-cpu.c b/drivers/staging/lustre/lnet/libcfs/linux/linux-cpu.c index e8b1a61420de..6b9cf06e8df2 100644 --- a/drivers/staging/lustre/lnet/libcfs/linux/linux-cpu.c +++ b/drivers/staging/lustre/lnet/libcfs/linux/linux-cpu.c @@ -55,6 +55,8 @@ MODULE_PARM_DESC(cpu_npartitions, "# of CPU partitions"); * i.e: "N 0[0,1] 1[2,3]" the first character 'N' means numbers in bracket * are NUMA node ID, number before bracket is CPU partition ID. * + * i.e: "N", shortcut expression to create CPT from NUMA & CPU topology + * * NB: If user specified cpu_pattern, cpu_npartitions will be ignored */ static char *cpu_pattern = ""; @@ -88,7 +90,7 @@ cfs_node_to_cpumask(int node, cpumask_t *mask) void cfs_cpt_table_free(struct cfs_cpt_table *cptab) { - int i; + int i; if (cptab->ctb_cpu2cpt) { LIBCFS_FREE(cptab->ctb_cpu2cpt, @@ -126,7 +128,7 @@ struct cfs_cpt_table * cfs_cpt_table_alloc(unsigned int ncpt) { struct cfs_cpt_table *cptab; - int i; + int i; LIBCFS_ALLOC(cptab, sizeof(*cptab)); if (!cptab) @@ -177,10 +179,10 @@ EXPORT_SYMBOL(cfs_cpt_table_alloc); int cfs_cpt_table_print(struct cfs_cpt_table *cptab, char *buf, int len) { - char *tmp = buf; - int rc = 0; - int i; - int j; + char *tmp = buf; + int rc = 0; + int i; + int j; for (i = 0; i < cptab->ctb_nparts; i++) { if (len > 0) { @@ -271,7 +273,7 @@ EXPORT_SYMBOL(cfs_cpt_nodemask); int cfs_cpt_set_cpu(struct cfs_cpt_table *cptab, int cpt, int cpu) { - int node; + int node; LASSERT(cpt >= 0 && cpt < cptab->ctb_nparts); @@ -311,8 +313,8 @@ EXPORT_SYMBOL(cfs_cpt_set_cpu); void cfs_cpt_unset_cpu(struct cfs_cpt_table *cptab, int cpt, int cpu) { - int node; - int i; + int node; + int i; LASSERT(cpt == CFS_CPT_ANY || (cpt >= 0 && cpt < cptab->ctb_nparts)); @@ -371,9 +373,9 @@ EXPORT_SYMBOL(cfs_cpt_unset_cpu); int cfs_cpt_set_cpumask(struct cfs_cpt_table *cptab, int cpt, cpumask_t *mask) { - int i; + int i; - if (cpumask_weight(mask) == 0 || + if (!cpumask_weight(mask) || cpumask_any_and(mask, cpu_online_mask) >= nr_cpu_ids) { CDEBUG(D_INFO, "No online CPU is found in the CPU mask for CPU partition %d\n", cpt); @@ -392,7 +394,7 @@ EXPORT_SYMBOL(cfs_cpt_set_cpumask); void cfs_cpt_unset_cpumask(struct cfs_cpt_table *cptab, int cpt, cpumask_t *mask) { - int i; + int i; for_each_cpu(i, mask) cfs_cpt_unset_cpu(cptab, cpt, i); @@ -402,8 +404,8 @@ EXPORT_SYMBOL(cfs_cpt_unset_cpumask); int cfs_cpt_set_node(struct cfs_cpt_table *cptab, int cpt, int node) { - cpumask_t *mask; - int rc; + cpumask_t *mask; + int rc; if (node < 0 || node >= MAX_NUMNODES) { CDEBUG(D_INFO, @@ -449,7 +451,7 @@ EXPORT_SYMBOL(cfs_cpt_unset_node); int cfs_cpt_set_nodemask(struct cfs_cpt_table *cptab, int cpt, nodemask_t *mask) { - int i; + int i; for_each_node_mask(i, *mask) { if (!cfs_cpt_set_node(cptab, cpt, i)) @@ -463,7 +465,7 @@ EXPORT_SYMBOL(cfs_cpt_set_nodemask); void cfs_cpt_unset_nodemask(struct cfs_cpt_table *cptab, int cpt, nodemask_t *mask) { - int i; + int i; for_each_node_mask(i, *mask) cfs_cpt_unset_node(cptab, cpt, i); @@ -473,8 +475,8 @@ EXPORT_SYMBOL(cfs_cpt_unset_nodemask); void cfs_cpt_clear(struct cfs_cpt_table *cptab, int cpt) { - int last; - int i; + int last; + int i; if (cpt == CFS_CPT_ANY) { last = cptab->ctb_nparts - 1; @@ -493,10 +495,10 @@ EXPORT_SYMBOL(cfs_cpt_clear); int cfs_cpt_spread_node(struct cfs_cpt_table *cptab, int cpt) { - nodemask_t *mask; - int weight; - int rotor; - int node; + nodemask_t *mask; + int weight; + int rotor; + int node; /* convert CPU partition ID to HW node id */ @@ -514,7 +516,7 @@ cfs_cpt_spread_node(struct cfs_cpt_table *cptab, int cpt) rotor %= weight; for_each_node_mask(node, *mask) { - if (rotor-- == 0) + if (!rotor--) return node; } @@ -526,8 +528,8 @@ EXPORT_SYMBOL(cfs_cpt_spread_node); int cfs_cpt_current(struct cfs_cpt_table *cptab, int remap) { - int cpu = smp_processor_id(); - int cpt = cptab->ctb_cpu2cpt[cpu]; + int cpu = smp_processor_id(); + int cpt = cptab->ctb_cpu2cpt[cpu]; if (cpt < 0) { if (!remap) @@ -555,10 +557,10 @@ EXPORT_SYMBOL(cfs_cpt_of_cpu); int cfs_cpt_bind(struct cfs_cpt_table *cptab, int cpt) { - cpumask_t *cpumask; - nodemask_t *nodemask; - int rc; - int i; + cpumask_t *cpumask; + nodemask_t *nodemask; + int rc; + int i; LASSERT(cpt == CFS_CPT_ANY || (cpt >= 0 && cpt < cptab->ctb_nparts)); @@ -582,7 +584,7 @@ cfs_cpt_bind(struct cfs_cpt_table *cptab, int cpt) rc = set_cpus_allowed_ptr(current, cpumask); set_mems_allowed(*nodemask); - if (rc == 0) + if (!rc) schedule(); /* switch to allowed CPU */ return rc; @@ -601,10 +603,10 @@ static int cfs_cpt_choose_ncpus(struct cfs_cpt_table *cptab, int cpt, cpumask_t *node, int number) { - cpumask_t *socket = NULL; - cpumask_t *core = NULL; - int rc = 0; - int cpu; + cpumask_t *socket = NULL; + cpumask_t *core = NULL; + int rc = 0; + int cpu; LASSERT(number > 0); @@ -638,7 +640,7 @@ cfs_cpt_choose_ncpus(struct cfs_cpt_table *cptab, int cpt, LASSERT(!cpumask_empty(socket)); while (!cpumask_empty(socket)) { - int i; + int i; /* get cpumask for hts in the same core */ cpumask_copy(core, topology_sibling_cpumask(cpu)); @@ -656,14 +658,14 @@ cfs_cpt_choose_ncpus(struct cfs_cpt_table *cptab, int cpt, goto out; } - if (--number == 0) + if (!--number) goto out; } cpu = cpumask_first(socket); } } - out: +out: if (socket) LIBCFS_FREE(socket, cpumask_size()); if (core) @@ -676,9 +678,9 @@ cfs_cpt_choose_ncpus(struct cfs_cpt_table *cptab, int cpt, static unsigned int cfs_cpt_num_estimate(void) { - unsigned nnode = num_online_nodes(); - unsigned ncpu = num_online_cpus(); - unsigned ncpt; + unsigned int nnode = num_online_nodes(); + unsigned int ncpu = num_online_cpus(); + unsigned int ncpt; if (ncpu <= CPT_WEIGHT_MIN) { ncpt = 1; @@ -703,14 +705,14 @@ cfs_cpt_num_estimate(void) ncpt = nnode; - out: +out: #if (BITS_PER_LONG == 32) /* config many CPU partitions on 32-bit system could consume * too much memory */ ncpt = min(2U, ncpt); #endif - while (ncpu % ncpt != 0) + while (ncpu % ncpt) ncpt--; /* worst case is 1 */ return ncpt; @@ -720,11 +722,11 @@ static struct cfs_cpt_table * cfs_cpt_table_create(int ncpt) { struct cfs_cpt_table *cptab = NULL; - cpumask_t *mask = NULL; - int cpt = 0; - int num; - int rc; - int i; + cpumask_t *mask = NULL; + int cpt = 0; + int num; + int rc; + int i; rc = cfs_cpt_num_estimate(); if (ncpt <= 0) @@ -735,7 +737,7 @@ cfs_cpt_table_create(int ncpt) ncpt, rc); } - if (num_online_cpus() % ncpt != 0) { + if (num_online_cpus() % ncpt) { CERROR("CPU number %d is not multiple of cpu_npartition %d, please try different cpu_npartitions value or set pattern string by cpu_pattern=STRING\n", (int)num_online_cpus(), ncpt); goto failed; @@ -748,7 +750,7 @@ cfs_cpt_table_create(int ncpt) } num = num_online_cpus() / ncpt; - if (num == 0) { + if (!num) { CERROR("CPU changed while setting CPU partition\n"); goto failed; } @@ -764,7 +766,7 @@ cfs_cpt_table_create(int ncpt) while (!cpumask_empty(mask)) { struct cfs_cpu_partition *part; - int n; + int n; /* * Each emulated NUMA node has all allowed CPUs in @@ -817,27 +819,36 @@ cfs_cpt_table_create(int ncpt) static struct cfs_cpt_table * cfs_cpt_table_create_pattern(char *pattern) { - struct cfs_cpt_table *cptab; - char *str = pattern; - int node = 0; - int high; - int ncpt; - int c; - - for (ncpt = 0;; ncpt++) { /* quick scan bracket */ - str = strchr(str, '['); - if (!str) - break; - str++; - } + struct cfs_cpt_table *cptab; + char *str; + int node = 0; + int high; + int ncpt = 0; + int cpt; + int rc; + int c; + int i; str = cfs_trimwhite(pattern); if (*str == 'n' || *str == 'N') { pattern = str + 1; - node = 1; + if (*pattern != '\0') { + node = 1; + } else { /* shortcut to create CPT from NUMA & CPU topology */ + node = -1; + ncpt = num_online_nodes(); + } + } + + if (!ncpt) { /* scanning bracket which is mark of partition */ + for (str = pattern;; str++, ncpt++) { + str = strchr(str, '['); + if (!str) + break; + } } - if (ncpt == 0 || + if (!ncpt || (node && ncpt > num_online_nodes()) || (!node && ncpt > num_online_cpus())) { CERROR("Invalid pattern %s, or too many partitions %d\n", @@ -845,25 +856,39 @@ cfs_cpt_table_create_pattern(char *pattern) return NULL; } - high = node ? MAX_NUMNODES - 1 : nr_cpu_ids - 1; - cptab = cfs_cpt_table_alloc(ncpt); if (!cptab) { CERROR("Failed to allocate cpu partition table\n"); return NULL; } + if (node < 0) { /* shortcut to create CPT from NUMA & CPU topology */ + cpt = 0; + + for_each_online_node(i) { + if (cpt >= ncpt) { + CERROR("CPU changed while setting CPU partition table, %d/%d\n", + cpt, ncpt); + goto failed; + } + + rc = cfs_cpt_set_node(cptab, cpt++, i); + if (!rc) + goto failed; + } + return cptab; + } + + high = node ? MAX_NUMNODES - 1 : nr_cpu_ids - 1; + for (str = cfs_trimwhite(pattern), c = 0;; c++) { - struct cfs_range_expr *range; - struct cfs_expr_list *el; - char *bracket = strchr(str, '['); - int cpt; - int rc; - int i; - int n; + struct cfs_range_expr *range; + struct cfs_expr_list *el; + char *bracket = strchr(str, '['); + int n; if (!bracket) { - if (*str != 0) { + if (*str) { CERROR("Invalid pattern %s\n", str); goto failed; } @@ -886,7 +911,7 @@ cfs_cpt_table_create_pattern(char *pattern) goto failed; } - if (cfs_cpt_weight(cptab, cpt) != 0) { + if (cfs_cpt_weight(cptab, cpt)) { CERROR("Partition %d has already been set.\n", cpt); goto failed; } @@ -905,14 +930,14 @@ cfs_cpt_table_create_pattern(char *pattern) } if (cfs_expr_list_parse(str, (bracket - str) + 1, - 0, high, &el) != 0) { + 0, high, &el)) { CERROR("Can't parse number range: %s\n", str); goto failed; } list_for_each_entry(range, &el->el_exprs, re_link) { for (i = range->re_lo; i <= range->re_hi; i++) { - if ((i - range->re_lo) % range->re_stride != 0) + if ((i - range->re_lo) % range->re_stride) continue; rc = node ? cfs_cpt_set_node(cptab, cpt, i) : @@ -945,8 +970,8 @@ cfs_cpt_table_create_pattern(char *pattern) static int cfs_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu) { - unsigned int cpu = (unsigned long)hcpu; - bool warn; + unsigned int cpu = (unsigned long)hcpu; + bool warn; switch (action) { case CPU_DEAD: @@ -1019,7 +1044,7 @@ cfs_cpu_init(void) register_hotcpu_notifier(&cfs_cpu_notifier); #endif - if (*cpu_pattern != 0) { + if (*cpu_pattern) { cfs_cpt_table = cfs_cpt_table_create_pattern(cpu_pattern); if (!cfs_cpt_table) { CERROR("Failed to create cptab from pattern %s\n", diff --git a/drivers/staging/lustre/lnet/libcfs/linux/linux-crypto.c b/drivers/staging/lustre/lnet/libcfs/linux/linux-crypto.c index 7f56d2c9dd00..68e34b4a76c9 100644 --- a/drivers/staging/lustre/lnet/libcfs/linux/linux-crypto.c +++ b/drivers/staging/lustre/lnet/libcfs/linux/linux-crypto.c @@ -64,7 +64,7 @@ static int cfs_crypto_hash_alloc(enum cfs_crypto_hash_alg hash_alg, unsigned int key_len) { struct crypto_ahash *tfm; - int err = 0; + int err = 0; *type = cfs_crypto_hash_type(hash_alg); @@ -93,12 +93,12 @@ static int cfs_crypto_hash_alloc(enum cfs_crypto_hash_alg hash_alg, if (key) err = crypto_ahash_setkey(tfm, key, key_len); - else if ((*type)->cht_key != 0) + else if ((*type)->cht_key) err = crypto_ahash_setkey(tfm, (unsigned char *)&((*type)->cht_key), (*type)->cht_size); - if (err != 0) { + if (err) { ahash_request_free(*req); crypto_free_ahash(tfm); return err; @@ -147,16 +147,16 @@ int cfs_crypto_hash_digest(enum cfs_crypto_hash_alg hash_alg, unsigned char *key, unsigned int key_len, unsigned char *hash, unsigned int *hash_len) { - struct scatterlist sl; + struct scatterlist sl; struct ahash_request *req; - int err; - const struct cfs_crypto_hash_type *type; + int err; + const struct cfs_crypto_hash_type *type; - if (!buf || buf_len == 0 || !hash_len) + if (!buf || !buf_len || !hash_len) return -EINVAL; err = cfs_crypto_hash_alloc(hash_alg, &type, &req, key, key_len); - if (err != 0) + if (err) return err; if (!hash || *hash_len < type->cht_size) { @@ -177,7 +177,7 @@ int cfs_crypto_hash_digest(enum cfs_crypto_hash_alg hash_alg, EXPORT_SYMBOL(cfs_crypto_hash_digest); /** - * Allocate and initialize desriptor for hash algorithm. + * Allocate and initialize descriptor for hash algorithm. * * This should be used to initialize a hash descriptor for multiple calls * to a single hash function when computing the hash across multiple @@ -198,8 +198,8 @@ cfs_crypto_hash_init(enum cfs_crypto_hash_alg hash_alg, unsigned char *key, unsigned int key_len) { struct ahash_request *req; - int err; - const struct cfs_crypto_hash_type *type; + int err; + const struct cfs_crypto_hash_type *type; err = cfs_crypto_hash_alloc(hash_alg, &type, &req, key, key_len); @@ -273,7 +273,7 @@ EXPORT_SYMBOL(cfs_crypto_hash_update); int cfs_crypto_hash_final(struct cfs_crypto_hash_desc *hdesc, unsigned char *hash, unsigned int *hash_len) { - int err; + int err; struct ahash_request *req = (void *)hdesc; int size = crypto_ahash_digestsize(crypto_ahash_reqtfm(req)); @@ -312,8 +312,8 @@ static void cfs_crypto_performance_test(enum cfs_crypto_hash_alg hash_alg) { int buf_len = max(PAGE_SIZE, 1048576UL); void *buf; - unsigned long start, end; - int bcount, err = 0; + unsigned long start, end; + int bcount, err = 0; struct page *page; unsigned char hash[CFS_CRYPTO_HASH_DIGESTSIZE_MAX]; unsigned int hash_len = sizeof(hash); @@ -358,7 +358,7 @@ out_err: CDEBUG(D_INFO, "Crypto hash algorithm %s test error: rc = %d\n", cfs_crypto_hash_name(hash_alg), err); } else { - unsigned long tmp; + unsigned long tmp; tmp = ((bcount * buf_len / jiffies_to_msecs(end - start)) * 1000) / (1024 * 1024); @@ -440,6 +440,6 @@ int cfs_crypto_register(void) */ void cfs_crypto_unregister(void) { - if (adler32 == 0) + if (!adler32) cfs_crypto_adler32_unregister(); } diff --git a/drivers/staging/lustre/lnet/libcfs/linux/linux-crypto.h b/drivers/staging/lustre/lnet/libcfs/linux/linux-crypto.h index 18e8cd4d8758..d0b3aa80cfa6 100644 --- a/drivers/staging/lustre/lnet/libcfs/linux/linux-crypto.h +++ b/drivers/staging/lustre/lnet/libcfs/linux/linux-crypto.h @@ -1,4 +1,4 @@ - /* +/* * GPL HEADER START * * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. diff --git a/drivers/staging/lustre/lnet/libcfs/linux/linux-debug.c b/drivers/staging/lustre/lnet/libcfs/linux/linux-debug.c index 435b784c52f8..39a72e3f0c18 100644 --- a/drivers/staging/lustre/lnet/libcfs/linux/linux-debug.c +++ b/drivers/staging/lustre/lnet/libcfs/linux/linux-debug.c @@ -57,7 +57,6 @@ #include <linux/kallsyms.h> -char lnet_upcall[1024] = "/usr/lib/lustre/lnet_upcall"; char lnet_debug_log_upcall[1024] = "/usr/lib/lustre/lnet_debug_log_upcall"; /** @@ -68,11 +67,12 @@ char lnet_debug_log_upcall[1024] = "/usr/lib/lustre/lnet_debug_log_upcall"; void libcfs_run_debug_log_upcall(char *file) { char *argv[3]; - int rc; - char *envp[] = { + int rc; + static const char * const envp[] = { "HOME=/", "PATH=/sbin:/bin:/usr/sbin:/usr/bin", - NULL}; + NULL + }; argv[0] = lnet_debug_log_upcall; @@ -81,7 +81,7 @@ void libcfs_run_debug_log_upcall(char *file) argv[2] = NULL; - rc = call_usermodehelper(argv[0], argv, envp, 1); + rc = call_usermodehelper(argv[0], argv, (char **)envp, 1); if (rc < 0 && rc != -ENOENT) { CERROR("Error %d invoking LNET debug log upcall %s %s; check /sys/kernel/debug/lnet/debug_log_upcall\n", rc, argv[0], argv[1]); @@ -91,57 +91,6 @@ void libcfs_run_debug_log_upcall(char *file) } } -void libcfs_run_upcall(char **argv) -{ - int rc; - int argc; - char *envp[] = { - "HOME=/", - "PATH=/sbin:/bin:/usr/sbin:/usr/bin", - NULL}; - - argv[0] = lnet_upcall; - argc = 1; - while (argv[argc]) - argc++; - - LASSERT(argc >= 2); - - rc = call_usermodehelper(argv[0], argv, envp, 1); - if (rc < 0 && rc != -ENOENT) { - CERROR("Error %d invoking LNET upcall %s %s%s%s%s%s%s%s%s; check /sys/kernel/debug/lnet/upcall\n", - rc, argv[0], argv[1], - argc < 3 ? "" : ",", argc < 3 ? "" : argv[2], - argc < 4 ? "" : ",", argc < 4 ? "" : argv[3], - argc < 5 ? "" : ",", argc < 5 ? "" : argv[4], - argc < 6 ? "" : ",..."); - } else { - CDEBUG(D_HA, "Invoked LNET upcall %s %s%s%s%s%s%s%s%s\n", - argv[0], argv[1], - argc < 3 ? "" : ",", argc < 3 ? "" : argv[2], - argc < 4 ? "" : ",", argc < 4 ? "" : argv[3], - argc < 5 ? "" : ",", argc < 5 ? "" : argv[4], - argc < 6 ? "" : ",..."); - } -} - -void libcfs_run_lbug_upcall(struct libcfs_debug_msg_data *msgdata) -{ - char *argv[6]; - char buf[32]; - - snprintf(buf, sizeof(buf), "%d", msgdata->msg_line); - - argv[1] = "LBUG"; - argv[2] = (char *)msgdata->msg_file; - argv[3] = (char *)msgdata->msg_fn; - argv[4] = buf; - argv[5] = NULL; - - libcfs_run_upcall(argv); -} -EXPORT_SYMBOL(libcfs_run_lbug_upcall); - /* coverity[+kill] */ void __noreturn lbug_with_loc(struct libcfs_debug_msg_data *msgdata) { @@ -156,7 +105,6 @@ void __noreturn lbug_with_loc(struct libcfs_debug_msg_data *msgdata) dump_stack(); if (!libcfs_panic_on_lbug) libcfs_debug_dumplog(); - libcfs_run_lbug_upcall(msgdata); if (libcfs_panic_on_lbug) panic("LBUG"); set_task_state(current, TASK_UNINTERRUPTIBLE); diff --git a/drivers/staging/lustre/lnet/libcfs/linux/linux-module.c b/drivers/staging/lustre/lnet/libcfs/linux/linux-module.c index 38308f8b6aae..3f5d58babc2f 100644 --- a/drivers/staging/lustre/lnet/libcfs/linux/linux-module.c +++ b/drivers/staging/lustre/lnet/libcfs/linux/linux-module.c @@ -83,7 +83,7 @@ static inline bool libcfs_ioctl_is_invalid(struct libcfs_ioctl_data *data) CERROR("LIBCFS ioctl: plen2 nonzero but no pbuf2 pointer\n"); return true; } - if ((__u32)libcfs_ioctl_packlen(data) != data->ioc_hdr.ioc_len) { + if ((u32)libcfs_ioctl_packlen(data) != data->ioc_hdr.ioc_len) { CERROR("LIBCFS ioctl: packlen != ioc_len\n"); return true; } diff --git a/drivers/staging/lustre/lnet/libcfs/linux/linux-prim.c b/drivers/staging/lustre/lnet/libcfs/linux/linux-prim.c index 291d286eab48..cf902154f0aa 100644 --- a/drivers/staging/lustre/lnet/libcfs/linux/linux-prim.c +++ b/drivers/staging/lustre/lnet/libcfs/linux/linux-prim.c @@ -45,8 +45,8 @@ sigset_t cfs_block_allsigs(void) { - unsigned long flags; - sigset_t old; + unsigned long flags; + sigset_t old; spin_lock_irqsave(¤t->sighand->siglock, flags); old = current->blocked; @@ -60,8 +60,8 @@ EXPORT_SYMBOL(cfs_block_allsigs); sigset_t cfs_block_sigs(unsigned long sigs) { - unsigned long flags; - sigset_t old; + unsigned long flags; + sigset_t old; spin_lock_irqsave(¤t->sighand->siglock, flags); old = current->blocked; @@ -91,7 +91,7 @@ EXPORT_SYMBOL(cfs_block_sigsinv); void cfs_restore_sigs(sigset_t old) { - unsigned long flags; + unsigned long flags; spin_lock_irqsave(¤t->sighand->siglock, flags); current->blocked = old; diff --git a/drivers/staging/lustre/lnet/libcfs/linux/linux-tracefile.c b/drivers/staging/lustre/lnet/libcfs/linux/linux-tracefile.c index 8b551d2708ba..75eb84e7f0f8 100644 --- a/drivers/staging/lustre/lnet/libcfs/linux/linux-tracefile.c +++ b/drivers/staging/lustre/lnet/libcfs/linux/linux-tracefile.c @@ -49,8 +49,8 @@ static DECLARE_RWSEM(cfs_tracefile_sem); int cfs_tracefile_init_arch(void) { - int i; - int j; + int i; + int j; struct cfs_trace_cpu_data *tcd; /* initialize trace_data */ @@ -85,14 +85,14 @@ int cfs_tracefile_init_arch(void) out: cfs_tracefile_fini_arch(); - printk(KERN_ERR "lnet: Not enough memory\n"); + pr_err("lnet: Not enough memory\n"); return -ENOMEM; } void cfs_tracefile_fini_arch(void) { - int i; - int j; + int i; + int j; for (i = 0; i < num_possible_cpus(); i++) for (j = 0; j < 3; j++) { @@ -224,26 +224,26 @@ void cfs_print_to_console(struct ptldebug_header *hdr, int mask, { char *prefix = "Lustre", *ptype = NULL; - if ((mask & D_EMERG) != 0) { + if (mask & D_EMERG) { prefix = dbghdr_to_err_string(hdr); ptype = KERN_EMERG; - } else if ((mask & D_ERROR) != 0) { + } else if (mask & D_ERROR) { prefix = dbghdr_to_err_string(hdr); ptype = KERN_ERR; - } else if ((mask & D_WARNING) != 0) { + } else if (mask & D_WARNING) { prefix = dbghdr_to_info_string(hdr); ptype = KERN_WARNING; - } else if ((mask & (D_CONSOLE | libcfs_printk)) != 0) { + } else if (mask & (D_CONSOLE | libcfs_printk)) { prefix = dbghdr_to_info_string(hdr); ptype = KERN_INFO; } - if ((mask & D_CONSOLE) != 0) { - printk("%s%s: %.*s", ptype, prefix, len, buf); + if (mask & D_CONSOLE) { + pr_info("%s%s: %.*s", ptype, prefix, len, buf); } else { - printk("%s%s: %d:%d:(%s:%d:%s()) %.*s", ptype, prefix, - hdr->ph_pid, hdr->ph_extern_pid, file, hdr->ph_line_num, - fn, len, buf); + pr_info("%s%s: %d:%d:(%s:%d:%s()) %.*s", ptype, prefix, + hdr->ph_pid, hdr->ph_extern_pid, file, + hdr->ph_line_num, fn, len, buf); } } diff --git a/drivers/staging/lustre/lnet/libcfs/module.c b/drivers/staging/lustre/lnet/libcfs/module.c index 86b4d25cad46..161e04226521 100644 --- a/drivers/staging/lustre/lnet/libcfs/module.c +++ b/drivers/staging/lustre/lnet/libcfs/module.c @@ -183,12 +183,12 @@ EXPORT_SYMBOL(lprocfs_call_handler); static int __proc_dobitmasks(void *data, int write, loff_t pos, void __user *buffer, int nob) { - const int tmpstrlen = 512; - char *tmpstr; - int rc; + const int tmpstrlen = 512; + char *tmpstr; + int rc; unsigned int *mask = data; - int is_subsys = (mask == &libcfs_subsystem_debug) ? 1 : 0; - int is_printk = (mask == &libcfs_printk) ? 1 : 0; + int is_subsys = (mask == &libcfs_subsystem_debug) ? 1 : 0; + int is_printk = (mask == &libcfs_printk) ? 1 : 0; rc = cfs_trace_allocate_string_buffer(&tmpstr, tmpstrlen); if (rc < 0) @@ -293,8 +293,8 @@ static int __proc_cpt_table(void *data, int write, loff_t pos, void __user *buffer, int nob) { char *buf = NULL; - int len = 4096; - int rc = 0; + int len = 4096; + int rc = 0; if (write) return -EPERM; @@ -365,14 +365,6 @@ static struct ctl_table lnet_table[] = { .mode = 0444, .proc_handler = &proc_cpt_table, }, - - { - .procname = "upcall", - .data = lnet_upcall, - .maxlen = sizeof(lnet_upcall), - .mode = 0644, - .proc_handler = &proc_dostring, - }, { .procname = "debug_log_upcall", .data = lnet_debug_log_upcall, @@ -547,7 +539,7 @@ static int libcfs_init(void) } rc = cfs_cpu_init(); - if (rc != 0) + if (rc) goto cleanup_debug; rc = misc_register(&libcfs_dev); @@ -566,7 +558,7 @@ static int libcfs_init(void) rc = min(cfs_cpt_weight(cfs_cpt_table, CFS_CPT_ANY), 4); rc = cfs_wi_sched_create("cfs_rh", cfs_cpt_table, CFS_CPT_ANY, rc, &cfs_sched_rehash); - if (rc != 0) { + if (rc) { CERROR("Startup workitem scheduler: error: %d\n", rc); goto cleanup_deregister; } diff --git a/drivers/staging/lustre/lnet/libcfs/prng.c b/drivers/staging/lustre/lnet/libcfs/prng.c index a9bdb284fd15..21d5a3912c5f 100644 --- a/drivers/staging/lustre/lnet/libcfs/prng.c +++ b/drivers/staging/lustre/lnet/libcfs/prng.c @@ -33,7 +33,7 @@ * x(n)=a*x(n-1)+carry mod 2^16 and y(n)=b*y(n-1)+carry mod 2^16, * number and carry packed within the same 32 bit integer. * algorithm recommended by Marsaglia -*/ + */ #include "../../include/linux/libcfs/libcfs.h" diff --git a/drivers/staging/lustre/lnet/libcfs/tracefile.c b/drivers/staging/lustre/lnet/libcfs/tracefile.c index 1c7efdfaffcf..d7b29f8997c0 100644 --- a/drivers/staging/lustre/lnet/libcfs/tracefile.c +++ b/drivers/staging/lustre/lnet/libcfs/tracefile.c @@ -59,13 +59,13 @@ struct page_collection { * ->tcd_daemon_pages and ->tcd_pages to the ->pc_pages. Otherwise, * only ->tcd_pages are spilled. */ - int pc_want_daemon_pages; + int pc_want_daemon_pages; }; struct tracefiled_ctl { struct completion tctl_start; struct completion tctl_stop; - wait_queue_head_t tctl_waitq; + wait_queue_head_t tctl_waitq; pid_t tctl_pid; atomic_t tctl_shutdown; }; @@ -77,24 +77,24 @@ struct cfs_trace_page { /* * page itself */ - struct page *page; + struct page *page; /* * linkage into one of the lists in trace_data_union or * page_collection */ - struct list_head linkage; + struct list_head linkage; /* * number of bytes used within this page */ - unsigned int used; + unsigned int used; /* * cpu that owns this page */ - unsigned short cpu; + unsigned short cpu; /* * type(context) of this page */ - unsigned short type; + unsigned short type; }; static void put_pages_on_tcd_daemon_list(struct page_collection *pc, @@ -108,7 +108,7 @@ cfs_tage_from_list(struct list_head *list) static struct cfs_trace_page *cfs_tage_alloc(gfp_t gfp) { - struct page *page; + struct page *page; struct cfs_trace_page *tage; /* My caller is trying to free memory */ @@ -236,7 +236,7 @@ static void cfs_tcd_shrink(struct cfs_trace_cpu_data *tcd) INIT_LIST_HEAD(&pc.pc_pages); list_for_each_entry_safe(tage, tmp, &tcd->tcd_pages, linkage) { - if (pgcount-- == 0) + if (!pgcount--) break; list_move_tail(&tage->linkage, &pc.pc_pages); @@ -278,7 +278,7 @@ int libcfs_debug_msg(struct libcfs_debug_msg_data *msgdata, const char *format, ...) { va_list args; - int rc; + int rc; va_start(args, format); rc = libcfs_debug_vmsg2(msgdata, format, args, NULL); @@ -293,21 +293,21 @@ int libcfs_debug_vmsg2(struct libcfs_debug_msg_data *msgdata, const char *format2, ...) { struct cfs_trace_cpu_data *tcd = NULL; - struct ptldebug_header header = {0}; - struct cfs_trace_page *tage; + struct ptldebug_header header = { 0 }; + struct cfs_trace_page *tage; /* string_buf is used only if tcd != NULL, and is always set then */ - char *string_buf = NULL; - char *debug_buf; - int known_size; - int needed = 85; /* average message length */ - int max_nob; - va_list ap; - int depth; - int i; - int remain; - int mask = msgdata->msg_mask; - const char *file = kbasename(msgdata->msg_file); - struct cfs_debug_limit_state *cdls = msgdata->msg_cdls; + char *string_buf = NULL; + char *debug_buf; + int known_size; + int needed = 85; /* average message length */ + int max_nob; + va_list ap; + int depth; + int i; + int remain; + int mask = msgdata->msg_mask; + const char *file = kbasename(msgdata->msg_file); + struct cfs_debug_limit_state *cdls = msgdata->msg_cdls; tcd = cfs_trace_get_tcd(); @@ -320,7 +320,7 @@ int libcfs_debug_vmsg2(struct libcfs_debug_msg_data *msgdata, if (!tcd) /* arch may not log in IRQ context */ goto console; - if (tcd->tcd_cur_pages == 0) + if (!tcd->tcd_cur_pages) header.ph_flags |= PH_FLAG_FIRST_RECORD; if (tcd->tcd_shutting_down) { @@ -423,7 +423,7 @@ int libcfs_debug_vmsg2(struct libcfs_debug_msg_data *msgdata, __LASSERT(tage->used <= PAGE_SIZE); console: - if ((mask & libcfs_printk) == 0) { + if (!(mask & libcfs_printk)) { /* no console output requested */ if (tcd) cfs_trace_put_tcd(tcd); @@ -432,7 +432,7 @@ console: if (cdls) { if (libcfs_console_ratelimit && - cdls->cdls_next != 0 && /* not first time ever */ + cdls->cdls_next && /* not first time ever */ !cfs_time_after(cfs_time_current(), cdls->cdls_next)) { /* skipping a console message */ cdls->cdls_count++; @@ -489,7 +489,7 @@ console: put_cpu(); } - if (cdls && cdls->cdls_count != 0) { + if (cdls && cdls->cdls_count) { string_buf = cfs_trace_get_console_buffer(); needed = snprintf(string_buf, CFS_TRACE_CONSOLE_BUFFER_SIZE, @@ -535,9 +535,9 @@ panic_collect_pages(struct page_collection *pc) * CPUs have been stopped during a panic. If this isn't true for some * arch, this will have to be implemented separately in each arch. */ - int i; - int j; struct cfs_trace_cpu_data *tcd; + int i; + int j; INIT_LIST_HEAD(&pc->pc_pages); @@ -698,11 +698,11 @@ void cfs_trace_debug_print(void) int cfs_tracefile_dump_all_pages(char *filename) { - struct page_collection pc; - struct file *filp; - struct cfs_trace_page *tage; - struct cfs_trace_page *tmp; - char *buf; + struct page_collection pc; + struct file *filp; + struct cfs_trace_page *tage; + struct cfs_trace_page *tmp; + char *buf; mm_segment_t __oldfs; int rc; @@ -778,7 +778,7 @@ void cfs_trace_flush_pages(void) int cfs_trace_copyin_string(char *knl_buffer, int knl_buffer_nob, const char __user *usr_buffer, int usr_buffer_nob) { - int nob; + int nob; if (usr_buffer_nob > knl_buffer_nob) return -EOVERFLOW; @@ -810,7 +810,7 @@ int cfs_trace_copyout_string(char __user *usr_buffer, int usr_buffer_nob, * NB if 'append' != NULL, it's a single character to append to the * copied out string - usually "\n" or "" (i.e. a terminating zero byte) */ - int nob = strlen(knl_buffer); + int nob = strlen(knl_buffer); if (nob > usr_buffer_nob) nob = usr_buffer_nob; @@ -843,16 +843,16 @@ int cfs_trace_allocate_string_buffer(char **str, int nob) int cfs_trace_dump_debug_buffer_usrstr(void __user *usr_str, int usr_str_nob) { - char *str; - int rc; + char *str; + int rc; rc = cfs_trace_allocate_string_buffer(&str, usr_str_nob + 1); - if (rc != 0) + if (rc) return rc; rc = cfs_trace_copyin_string(str, usr_str_nob + 1, usr_str, usr_str_nob); - if (rc != 0) + if (rc) goto out; if (str[0] != '/') { @@ -867,17 +867,17 @@ out: int cfs_trace_daemon_command(char *str) { - int rc = 0; + int rc = 0; cfs_tracefile_write_lock(); - if (strcmp(str, "stop") == 0) { + if (!strcmp(str, "stop")) { cfs_tracefile_write_unlock(); cfs_trace_stop_thread(); cfs_tracefile_write_lock(); memset(cfs_tracefile, 0, sizeof(cfs_tracefile)); - } else if (strncmp(str, "size=", 5) == 0) { + } else if (!strncmp(str, "size=", 5)) { unsigned long tmp; rc = kstrtoul(str + 5, 10, &tmp); @@ -909,15 +909,15 @@ int cfs_trace_daemon_command(char *str) int cfs_trace_daemon_command_usrstr(void __user *usr_str, int usr_str_nob) { char *str; - int rc; + int rc; rc = cfs_trace_allocate_string_buffer(&str, usr_str_nob + 1); - if (rc != 0) + if (rc) return rc; rc = cfs_trace_copyin_string(str, usr_str_nob + 1, usr_str, usr_str_nob); - if (rc == 0) + if (!rc) rc = cfs_trace_daemon_command(str); kfree(str); @@ -1003,7 +1003,7 @@ static int tracefiled(void *arg) filp = NULL; cfs_tracefile_read_lock(); - if (cfs_tracefile[0] != 0) { + if (cfs_tracefile[0]) { filp = filp_open(cfs_tracefile, O_CREAT | O_RDWR | O_LARGEFILE, 0600); @@ -1072,7 +1072,7 @@ static int tracefiled(void *arg) __LASSERT(list_empty(&pc.pc_pages)); end_loop: if (atomic_read(&tctl->tctl_shutdown)) { - if (last_loop == 0) { + if (!last_loop) { last_loop = 1; continue; } else { @@ -1135,13 +1135,13 @@ void cfs_trace_stop_thread(void) int cfs_tracefile_init(int max_pages) { struct cfs_trace_cpu_data *tcd; - int i; - int j; - int rc; - int factor; + int i; + int j; + int rc; + int factor; rc = cfs_tracefile_init_arch(); - if (rc != 0) + if (rc) return rc; cfs_tcd_for_each(tcd, i, j) { diff --git a/drivers/staging/lustre/lnet/libcfs/tracefile.h b/drivers/staging/lustre/lnet/libcfs/tracefile.h index d878676bc375..f644cbc5a277 100644 --- a/drivers/staging/lustre/lnet/libcfs/tracefile.h +++ b/drivers/staging/lustre/lnet/libcfs/tracefile.h @@ -45,7 +45,7 @@ enum cfs_trace_buf_type { /* trace file lock routines */ #define TRACEFILE_NAME_SIZE 1024 -extern char cfs_tracefile[TRACEFILE_NAME_SIZE]; +extern char cfs_tracefile[TRACEFILE_NAME_SIZE]; extern long long cfs_tracefile_size; void libcfs_run_debug_log_upcall(char *file); @@ -80,7 +80,7 @@ int cfs_trace_get_debug_mb(void); void libcfs_debug_dumplog_internal(void *arg); void libcfs_register_panic_notifier(void); void libcfs_unregister_panic_notifier(void); -extern int libcfs_panic_in_progress; +extern int libcfs_panic_in_progress; int cfs_trace_max_debug_mb(void); #define TCD_MAX_PAGES (5 << (20 - PAGE_SHIFT)) @@ -113,14 +113,14 @@ union cfs_trace_data_union { * tcd_for_each_type_lock */ spinlock_t tcd_lock; - unsigned long tcd_lock_flags; + unsigned long tcd_lock_flags; /* * pages with trace records not yet processed by tracefiled. */ - struct list_head tcd_pages; + struct list_head tcd_pages; /* number of pages on ->tcd_pages */ - unsigned long tcd_cur_pages; + unsigned long tcd_cur_pages; /* * pages with trace records already processed by @@ -132,9 +132,9 @@ union cfs_trace_data_union { * (put_pages_on_daemon_list()). LRU pages from this list are * discarded when list grows too large. */ - struct list_head tcd_daemon_pages; + struct list_head tcd_daemon_pages; /* number of pages on ->tcd_daemon_pages */ - unsigned long tcd_cur_daemon_pages; + unsigned long tcd_cur_daemon_pages; /* * Maximal number of pages allowed on ->tcd_pages and @@ -142,7 +142,7 @@ union cfs_trace_data_union { * Always TCD_MAX_PAGES * tcd_pages_factor / 100 in current * implementation. */ - unsigned long tcd_max_pages; + unsigned long tcd_max_pages; /* * preallocated pages to write trace records into. Pages from @@ -166,15 +166,15 @@ union cfs_trace_data_union { * TCD_STOCK_PAGES pagesful are consumed by trace records all * emitted in non-blocking contexts. Which is quite unlikely. */ - struct list_head tcd_stock_pages; + struct list_head tcd_stock_pages; /* number of pages on ->tcd_stock_pages */ - unsigned long tcd_cur_stock_pages; + unsigned long tcd_cur_stock_pages; - unsigned short tcd_shutting_down; - unsigned short tcd_cpu; - unsigned short tcd_type; + unsigned short tcd_shutting_down; + unsigned short tcd_cpu; + unsigned short tcd_type; /* The factors to share debug memory. */ - unsigned short tcd_pages_factor; + unsigned short tcd_pages_factor; } tcd; char __pad[L1_CACHE_ALIGN(sizeof(struct cfs_trace_cpu_data))]; }; diff --git a/drivers/staging/lustre/lnet/libcfs/workitem.c b/drivers/staging/lustre/lnet/libcfs/workitem.c index e98c818a14fb..d0512da6bcde 100644 --- a/drivers/staging/lustre/lnet/libcfs/workitem.c +++ b/drivers/staging/lustre/lnet/libcfs/workitem.c @@ -45,7 +45,7 @@ struct cfs_wi_sched { /* chain on global list */ struct list_head ws_list; /** serialised workitems */ - spinlock_t ws_lock; + spinlock_t ws_lock; /** where schedulers sleep */ wait_queue_head_t ws_waitq; /** concurrent workitems */ @@ -59,26 +59,26 @@ struct cfs_wi_sched { */ struct list_head ws_rerunq; /** CPT-table for this scheduler */ - struct cfs_cpt_table *ws_cptab; + struct cfs_cpt_table *ws_cptab; /** CPT id for affinity */ - int ws_cpt; + int ws_cpt; /** number of scheduled workitems */ - int ws_nscheduled; + int ws_nscheduled; /** started scheduler thread, protected by cfs_wi_data::wi_glock */ - unsigned int ws_nthreads:30; + unsigned int ws_nthreads:30; /** shutting down, protected by cfs_wi_data::wi_glock */ - unsigned int ws_stopping:1; + unsigned int ws_stopping:1; /** serialize starting thread, protected by cfs_wi_data::wi_glock */ - unsigned int ws_starting:1; + unsigned int ws_starting:1; /** scheduler name */ - char ws_name[CFS_WS_NAME_LEN]; + char ws_name[CFS_WS_NAME_LEN]; }; static struct cfs_workitem_data { /** serialize */ spinlock_t wi_glock; /** list of all schedulers */ - struct list_head wi_scheds; + struct list_head wi_scheds; /** WI module is initialized */ int wi_init; /** shutting down the whole WI module */ @@ -136,7 +136,7 @@ EXPORT_SYMBOL(cfs_wi_exit); int cfs_wi_deschedule(struct cfs_wi_sched *sched, struct cfs_workitem *wi) { - int rc; + int rc; LASSERT(!in_interrupt()); /* because we use plain spinlock */ LASSERT(!sched->ws_stopping); @@ -202,13 +202,13 @@ EXPORT_SYMBOL(cfs_wi_schedule); static int cfs_wi_scheduler(void *arg) { - struct cfs_wi_sched *sched = (struct cfs_wi_sched *)arg; + struct cfs_wi_sched *sched = (struct cfs_wi_sched *)arg; cfs_block_allsigs(); /* CPT affinity scheduler? */ if (sched->ws_cptab) - if (cfs_cpt_bind(sched->ws_cptab, sched->ws_cpt) != 0) + if (cfs_cpt_bind(sched->ws_cptab, sched->ws_cpt)) CWARN("Failed to bind %s on CPT %d\n", sched->ws_name, sched->ws_cpt); @@ -223,8 +223,8 @@ static int cfs_wi_scheduler(void *arg) spin_lock(&sched->ws_lock); while (!sched->ws_stopping) { - int nloops = 0; - int rc; + int nloops = 0; + int rc; struct cfs_workitem *wi; while (!list_empty(&sched->ws_runq) && @@ -238,16 +238,16 @@ static int cfs_wi_scheduler(void *arg) LASSERT(sched->ws_nscheduled > 0); sched->ws_nscheduled--; - wi->wi_running = 1; + wi->wi_running = 1; wi->wi_scheduled = 0; spin_unlock(&sched->ws_lock); nloops++; - rc = (*wi->wi_action) (wi); + rc = (*wi->wi_action)(wi); spin_lock(&sched->ws_lock); - if (rc != 0) /* WI should be dead, even be freed! */ + if (rc) /* WI should be dead, even be freed! */ continue; wi->wi_running = 0; @@ -273,7 +273,7 @@ static int cfs_wi_scheduler(void *arg) spin_unlock(&sched->ws_lock); rc = wait_event_interruptible_exclusive(sched->ws_waitq, - !cfs_wi_sched_cansleep(sched)); + !cfs_wi_sched_cansleep(sched)); spin_lock(&sched->ws_lock); } @@ -289,7 +289,7 @@ static int cfs_wi_scheduler(void *arg) void cfs_wi_sched_destroy(struct cfs_wi_sched *sched) { - int i; + int i; LASSERT(cfs_wi_data.wi_init); LASSERT(!cfs_wi_data.wi_stopping); @@ -325,7 +325,7 @@ cfs_wi_sched_destroy(struct cfs_wi_sched *sched) list_del(&sched->ws_list); spin_unlock(&cfs_wi_data.wi_glock); - LASSERT(sched->ws_nscheduled == 0); + LASSERT(!sched->ws_nscheduled); LIBCFS_FREE(sched, sizeof(*sched)); } @@ -335,8 +335,8 @@ int cfs_wi_sched_create(char *name, struct cfs_cpt_table *cptab, int cpt, int nthrs, struct cfs_wi_sched **sched_pp) { - struct cfs_wi_sched *sched; - int rc; + struct cfs_wi_sched *sched; + int rc; LASSERT(cfs_wi_data.wi_init); LASSERT(!cfs_wi_data.wi_stopping); @@ -364,7 +364,7 @@ cfs_wi_sched_create(char *name, struct cfs_cpt_table *cptab, rc = 0; while (nthrs > 0) { - char name[16]; + char name[16]; struct task_struct *task; spin_lock(&cfs_wi_data.wi_glock); @@ -431,7 +431,7 @@ cfs_wi_startup(void) void cfs_wi_shutdown(void) { - struct cfs_wi_sched *sched; + struct cfs_wi_sched *sched; struct cfs_wi_sched *temp; spin_lock(&cfs_wi_data.wi_glock); @@ -447,7 +447,7 @@ cfs_wi_shutdown(void) list_for_each_entry(sched, &cfs_wi_data.wi_scheds, ws_list) { spin_lock(&cfs_wi_data.wi_glock); - while (sched->ws_nthreads != 0) { + while (sched->ws_nthreads) { spin_unlock(&cfs_wi_data.wi_glock); set_current_state(TASK_UNINTERRUPTIBLE); schedule_timeout(cfs_time_seconds(1) / 20); diff --git a/drivers/staging/lustre/lnet/lnet/api-ni.c b/drivers/staging/lustre/lnet/lnet/api-ni.c index 4daf828198c3..b2ba10d59f84 100644 --- a/drivers/staging/lustre/lnet/lnet/api-ni.c +++ b/drivers/staging/lustre/lnet/lnet/api-ni.c @@ -1551,16 +1551,16 @@ LNetNIInit(lnet_pid_t requested_pid) rc = lnet_check_routes(); if (rc) - goto err_destory_routes; + goto err_destroy_routes; rc = lnet_rtrpools_alloc(im_a_router); if (rc) - goto err_destory_routes; + goto err_destroy_routes; } rc = lnet_acceptor_start(); if (rc) - goto err_destory_routes; + goto err_destroy_routes; the_lnet.ln_refcount = 1; /* Now I may use my own API functions... */ @@ -1587,7 +1587,7 @@ err_stop_ping: err_acceptor_stop: the_lnet.ln_refcount = 0; lnet_acceptor_stop(); -err_destory_routes: +err_destroy_routes: if (!the_lnet.ln_nis_from_mod_params) lnet_destroy_routes(); err_shutdown_lndnis: diff --git a/drivers/staging/lustre/lnet/lnet/lib-me.c b/drivers/staging/lustre/lnet/lnet/lib-me.c index b430046dc294..eb796a86e6ab 100644 --- a/drivers/staging/lustre/lnet/lnet/lib-me.c +++ b/drivers/staging/lustre/lnet/lnet/lib-me.c @@ -271,21 +271,3 @@ lnet_me_unlink(lnet_me_t *me) lnet_res_lh_invalidate(&me->me_lh); lnet_me_free(me); } - -#if 0 -static void -lib_me_dump(lnet_me_t *me) -{ - CWARN("Match Entry %p (%#llx)\n", me, - me->me_lh.lh_cookie); - - CWARN("\tMatch/Ignore\t= %016lx / %016lx\n", - me->me_match_bits, me->me_ignore_bits); - - CWARN("\tMD\t= %p\n", me->md); - CWARN("\tprev\t= %p\n", - list_entry(me->me_list.prev, lnet_me_t, me_list)); - CWARN("\tnext\t= %p\n", - list_entry(me->me_list.next, lnet_me_t, me_list)); -} -#endif diff --git a/drivers/staging/lustre/lnet/lnet/lib-move.c b/drivers/staging/lustre/lnet/lnet/lib-move.c index 48e6f8f2392f..f3dd6e42f4d4 100644 --- a/drivers/staging/lustre/lnet/lnet/lib-move.c +++ b/drivers/staging/lustre/lnet/lnet/lib-move.c @@ -192,6 +192,7 @@ lnet_copy_iov2iter(struct iov_iter *to, left = siov->iov_len - soffset; do { size_t n, copy = left; + LASSERT(nsiov > 0); if (copy > nob) diff --git a/drivers/staging/lustre/lnet/lnet/nidstrings.c b/drivers/staging/lustre/lnet/lnet/nidstrings.c index a6d7a6159b8f..a9fe3e69daae 100644 --- a/drivers/staging/lustre/lnet/lnet/nidstrings.c +++ b/drivers/staging/lustre/lnet/lnet/nidstrings.c @@ -193,7 +193,7 @@ add_nidrange(const struct cfs_lstr *src, struct netstrfns *nf; struct nidrange *nr; int endlen; - unsigned netnum; + unsigned int netnum; if (src->ls_len >= LNET_NIDSTR_SIZE) return NULL; @@ -247,10 +247,8 @@ parse_nidrange(struct cfs_lstr *src, struct list_head *nidlist) { struct cfs_lstr addrrange; struct cfs_lstr net; - struct cfs_lstr tmp; struct nidrange *nr; - tmp = *src; if (!cfs_gettok(src, '@', &addrrange)) goto failed; @@ -1156,7 +1154,7 @@ EXPORT_SYMBOL(libcfs_nid2str_r); static struct netstrfns * libcfs_str2net_internal(const char *str, __u32 *net) { - struct netstrfns *uninitialized_var(nf); + struct netstrfns *nf = NULL; int nob; unsigned int netnum; int i; diff --git a/drivers/staging/lustre/lnet/lnet/router.c b/drivers/staging/lustre/lnet/lnet/router.c index 063ad55ec950..8afa0abf15cd 100644 --- a/drivers/staging/lustre/lnet/lnet/router.c +++ b/drivers/staging/lustre/lnet/lnet/router.c @@ -903,6 +903,7 @@ lnet_create_rc_data_locked(lnet_peer_t *gateway) { lnet_rc_data_t *rcd = NULL; lnet_ping_info_t *pi; + lnet_md_t md; int rc; int i; @@ -925,15 +926,15 @@ lnet_create_rc_data_locked(lnet_peer_t *gateway) } rcd->rcd_pinginfo = pi; + md.start = pi; + md.user_ptr = rcd; + md.length = LNET_PINGINFO_SIZE; + md.threshold = LNET_MD_THRESH_INF; + md.options = LNET_MD_TRUNCATE; + md.eq_handle = the_lnet.ln_rc_eqh; + LASSERT(!LNetHandleIsInvalid(the_lnet.ln_rc_eqh)); - rc = LNetMDBind((lnet_md_t){.start = pi, - .user_ptr = rcd, - .length = LNET_PINGINFO_SIZE, - .threshold = LNET_MD_THRESH_INF, - .options = LNET_MD_TRUNCATE, - .eq_handle = the_lnet.ln_rc_eqh}, - LNET_UNLINK, - &rcd->rcd_mdh); + rc = LNetMDBind(md, LNET_UNLINK, &rcd->rcd_mdh); if (rc < 0) { CERROR("Can't bind MD: %d\n", rc); goto out; diff --git a/drivers/staging/lustre/lnet/selftest/brw_test.c b/drivers/staging/lustre/lnet/selftest/brw_test.c index b20c5d394e3b..67b460f41d6e 100644 --- a/drivers/staging/lustre/lnet/selftest/brw_test.c +++ b/drivers/staging/lustre/lnet/selftest/brw_test.c @@ -44,6 +44,10 @@ static int brw_inject_errors; module_param(brw_inject_errors, int, 0644); MODULE_PARM_DESC(brw_inject_errors, "# data errors to inject randomly, zero by default"); +#define BRW_POISON 0xbeefbeefbeefbeefULL +#define BRW_MAGIC 0xeeb0eeb1eeb2eeb3ULL +#define BRW_MSIZE sizeof(u64) + static void brw_client_fini(struct sfw_test_instance *tsi) { @@ -67,6 +71,7 @@ brw_client_init(struct sfw_test_instance *tsi) { struct sfw_session *sn = tsi->tsi_batch->bat_session; int flags; + int off; int npg; int len; int opc; @@ -87,6 +92,7 @@ brw_client_init(struct sfw_test_instance *tsi) * but we have to keep it for compatibility */ len = npg * PAGE_SIZE; + off = 0; } else { struct test_bulk_req_v1 *breq = &tsi->tsi_u.bulk_v1; @@ -99,9 +105,13 @@ brw_client_init(struct sfw_test_instance *tsi) opc = breq->blk_opc; flags = breq->blk_flags; len = breq->blk_len; - npg = (len + PAGE_SIZE - 1) >> PAGE_SHIFT; + off = breq->blk_offset & ~PAGE_MASK; + npg = (off + len + PAGE_SIZE - 1) >> PAGE_SHIFT; } + if (off % BRW_MSIZE) + return -EINVAL; + if (npg > LNET_MAX_IOV || npg <= 0) return -EINVAL; @@ -114,7 +124,7 @@ brw_client_init(struct sfw_test_instance *tsi) list_for_each_entry(tsu, &tsi->tsi_units, tsu_list) { bulk = srpc_alloc_bulk(lnet_cpt_of_nid(tsu->tsu_dest.nid), - npg, len, opc == LST_BRW_READ); + off, npg, len, opc == LST_BRW_READ); if (!bulk) { brw_client_fini(tsi); return -ENOMEM; @@ -126,12 +136,7 @@ brw_client_init(struct sfw_test_instance *tsi) return 0; } -#define BRW_POISON 0xbeefbeefbeefbeefULL -#define BRW_MAGIC 0xeeb0eeb1eeb2eeb3ULL -#define BRW_MSIZE sizeof(__u64) - -static int -brw_inject_one_error(void) +int brw_inject_one_error(void) { struct timespec64 ts; @@ -147,12 +152,13 @@ brw_inject_one_error(void) } static void -brw_fill_page(struct page *pg, int pattern, __u64 magic) +brw_fill_page(struct page *pg, int off, int len, int pattern, __u64 magic) { - char *addr = page_address(pg); + char *addr = page_address(pg) + off; int i; LASSERT(addr); + LASSERT(!(off % BRW_MSIZE) && !(len % BRW_MSIZE)); if (pattern == LST_BRW_CHECK_NONE) return; @@ -162,14 +168,16 @@ brw_fill_page(struct page *pg, int pattern, __u64 magic) if (pattern == LST_BRW_CHECK_SIMPLE) { memcpy(addr, &magic, BRW_MSIZE); - addr += PAGE_SIZE - BRW_MSIZE; - memcpy(addr, &magic, BRW_MSIZE); + if (len > BRW_MSIZE) { + addr += PAGE_SIZE - BRW_MSIZE; + memcpy(addr, &magic, BRW_MSIZE); + } return; } if (pattern == LST_BRW_CHECK_FULL) { - for (i = 0; i < PAGE_SIZE / BRW_MSIZE; i++) - memcpy(addr + i * BRW_MSIZE, &magic, BRW_MSIZE); + for (i = 0; i < len; i += BRW_MSIZE) + memcpy(addr + i, &magic, BRW_MSIZE); return; } @@ -177,13 +185,14 @@ brw_fill_page(struct page *pg, int pattern, __u64 magic) } static int -brw_check_page(struct page *pg, int pattern, __u64 magic) +brw_check_page(struct page *pg, int off, int len, int pattern, __u64 magic) { - char *addr = page_address(pg); + char *addr = page_address(pg) + off; __u64 data = 0; /* make compiler happy */ int i; LASSERT(addr); + LASSERT(!(off % BRW_MSIZE) && !(len % BRW_MSIZE)); if (pattern == LST_BRW_CHECK_NONE) return 0; @@ -193,21 +202,21 @@ brw_check_page(struct page *pg, int pattern, __u64 magic) if (data != magic) goto bad_data; - addr += PAGE_SIZE - BRW_MSIZE; - data = *((__u64 *)addr); - if (data != magic) - goto bad_data; - + if (len > BRW_MSIZE) { + addr += PAGE_SIZE - BRW_MSIZE; + data = *((__u64 *)addr); + if (data != magic) + goto bad_data; + } return 0; } if (pattern == LST_BRW_CHECK_FULL) { - for (i = 0; i < PAGE_SIZE / BRW_MSIZE; i++) { - data = *(((__u64 *)addr) + i); + for (i = 0; i < len; i += BRW_MSIZE) { + data = *(u64 *)(addr + i); if (data != magic) goto bad_data; } - return 0; } @@ -226,8 +235,12 @@ brw_fill_bulk(struct srpc_bulk *bk, int pattern, __u64 magic) struct page *pg; for (i = 0; i < bk->bk_niov; i++) { + int off, len; + pg = bk->bk_iovs[i].bv_page; - brw_fill_page(pg, pattern, magic); + off = bk->bk_iovs[i].bv_offset; + len = bk->bk_iovs[i].bv_len; + brw_fill_page(pg, off, len, pattern, magic); } } @@ -238,8 +251,12 @@ brw_check_bulk(struct srpc_bulk *bk, int pattern, __u64 magic) struct page *pg; for (i = 0; i < bk->bk_niov; i++) { + int off, len; + pg = bk->bk_iovs[i].bv_page; - if (brw_check_page(pg, pattern, magic)) { + off = bk->bk_iovs[i].bv_offset; + len = bk->bk_iovs[i].bv_len; + if (brw_check_page(pg, off, len, pattern, magic)) { CERROR("Bulk page %p (%d/%d) is corrupted!\n", pg, i, bk->bk_niov); return 1; @@ -276,6 +293,7 @@ brw_client_prep_rpc(struct sfw_test_unit *tsu, len = npg * PAGE_SIZE; } else { struct test_bulk_req_v1 *breq = &tsi->tsi_u.bulk_v1; + int off; /* * I should never get this step if it's unknown feature @@ -286,7 +304,8 @@ brw_client_prep_rpc(struct sfw_test_unit *tsu, opc = breq->blk_opc; flags = breq->blk_flags; len = breq->blk_len; - npg = (len + PAGE_SIZE - 1) >> PAGE_SHIFT; + off = breq->blk_offset; + npg = (off + len + PAGE_SIZE - 1) >> PAGE_SHIFT; } rc = sfw_create_test_rpc(tsu, dest, sn->sn_features, npg, len, &rpc); diff --git a/drivers/staging/lustre/lnet/selftest/conctl.c b/drivers/staging/lustre/lnet/selftest/conctl.c index b786f8b4a73d..94383023c1be 100644 --- a/drivers/staging/lustre/lnet/selftest/conctl.c +++ b/drivers/staging/lustre/lnet/selftest/conctl.c @@ -315,7 +315,7 @@ lst_group_update_ioctl(lstio_group_update_args_t *args) static int lst_nodes_add_ioctl(lstio_group_nodes_args_t *args) { - unsigned feats; + unsigned int feats; int rc; char *name; @@ -742,6 +742,10 @@ static int lst_test_add_ioctl(lstio_test_args_t *args) PAGE_SIZE - sizeof(struct lstcon_test))) return -EINVAL; + /* Enforce zero parameter length if there's no parameter */ + if (!args->lstio_tes_param && args->lstio_tes_param_len) + return -EINVAL; + LIBCFS_ALLOC(batch_name, args->lstio_tes_bat_nmlen + 1); if (!batch_name) return rc; diff --git a/drivers/staging/lustre/lnet/selftest/conrpc.c b/drivers/staging/lustre/lnet/selftest/conrpc.c index 55afb53b0743..994422c62487 100644 --- a/drivers/staging/lustre/lnet/selftest/conrpc.c +++ b/drivers/staging/lustre/lnet/selftest/conrpc.c @@ -86,8 +86,9 @@ lstcon_rpc_done(struct srpc_client_rpc *rpc) } static int -lstcon_rpc_init(struct lstcon_node *nd, int service, unsigned feats, - int bulk_npg, int bulk_len, int embedded, struct lstcon_rpc *crpc) +lstcon_rpc_init(struct lstcon_node *nd, int service, unsigned int feats, + int bulk_npg, int bulk_len, int embedded, + struct lstcon_rpc *crpc) { crpc->crp_rpc = sfw_create_rpc(nd->nd_id, service, feats, bulk_npg, bulk_len, @@ -111,7 +112,7 @@ lstcon_rpc_init(struct lstcon_node *nd, int service, unsigned feats, } static int -lstcon_rpc_prep(struct lstcon_node *nd, int service, unsigned feats, +lstcon_rpc_prep(struct lstcon_node *nd, int service, unsigned int feats, int bulk_npg, int bulk_len, struct lstcon_rpc **crpcpp) { struct lstcon_rpc *crpc = NULL; @@ -292,8 +293,8 @@ lstcon_rpc_trans_abort(struct lstcon_rpc_trans *trans, int error) spin_lock(&rpc->crpc_lock); - if (!crpc->crp_posted || /* not posted */ - crpc->crp_stamp) { /* rpc done or aborted already */ + if (!crpc->crp_posted || /* not posted */ + crpc->crp_stamp) { /* rpc done or aborted already */ if (!crpc->crp_stamp) { crpc->crp_stamp = cfs_time_current(); crpc->crp_status = -EINTR; @@ -589,7 +590,7 @@ lstcon_rpc_trans_destroy(struct lstcon_rpc_trans *trans) int lstcon_sesrpc_prep(struct lstcon_node *nd, int transop, - unsigned feats, struct lstcon_rpc **crpc) + unsigned int feats, struct lstcon_rpc **crpc) { struct srpc_mksn_reqst *msrq; struct srpc_rmsn_reqst *rsrq; @@ -627,7 +628,8 @@ lstcon_sesrpc_prep(struct lstcon_node *nd, int transop, } int -lstcon_dbgrpc_prep(struct lstcon_node *nd, unsigned feats, struct lstcon_rpc **crpc) +lstcon_dbgrpc_prep(struct lstcon_node *nd, unsigned int feats, + struct lstcon_rpc **crpc) { struct srpc_debug_reqst *drq; int rc; @@ -645,7 +647,7 @@ lstcon_dbgrpc_prep(struct lstcon_node *nd, unsigned feats, struct lstcon_rpc **c } int -lstcon_batrpc_prep(struct lstcon_node *nd, int transop, unsigned feats, +lstcon_batrpc_prep(struct lstcon_node *nd, int transop, unsigned int feats, struct lstcon_tsb_hdr *tsb, struct lstcon_rpc **crpc) { struct lstcon_batch *batch; @@ -678,7 +680,8 @@ lstcon_batrpc_prep(struct lstcon_node *nd, int transop, unsigned feats, } int -lstcon_statrpc_prep(struct lstcon_node *nd, unsigned feats, struct lstcon_rpc **crpc) +lstcon_statrpc_prep(struct lstcon_node *nd, unsigned int feats, + struct lstcon_rpc **crpc) { struct srpc_stat_reqst *srq; int rc; @@ -776,7 +779,8 @@ lstcon_pingrpc_prep(lst_test_ping_param_t *param, struct srpc_test_reqst *req) } static int -lstcon_bulkrpc_v0_prep(lst_test_bulk_param_t *param, struct srpc_test_reqst *req) +lstcon_bulkrpc_v0_prep(lst_test_bulk_param_t *param, + struct srpc_test_reqst *req) { struct test_bulk_req *brq = &req->tsr_u.bulk_v0; @@ -789,20 +793,21 @@ lstcon_bulkrpc_v0_prep(lst_test_bulk_param_t *param, struct srpc_test_reqst *req } static int -lstcon_bulkrpc_v1_prep(lst_test_bulk_param_t *param, struct srpc_test_reqst *req) +lstcon_bulkrpc_v1_prep(lst_test_bulk_param_t *param, bool is_client, + struct srpc_test_reqst *req) { struct test_bulk_req_v1 *brq = &req->tsr_u.bulk_v1; brq->blk_opc = param->blk_opc; brq->blk_flags = param->blk_flags; brq->blk_len = param->blk_size; - brq->blk_offset = 0; /* reserved */ + brq->blk_offset = is_client ? param->blk_cli_off : param->blk_srv_off; return 0; } int -lstcon_testrpc_prep(struct lstcon_node *nd, int transop, unsigned feats, +lstcon_testrpc_prep(struct lstcon_node *nd, int transop, unsigned int feats, struct lstcon_test *test, struct lstcon_rpc **crpc) { struct lstcon_group *sgrp = test->tes_src_grp; @@ -897,7 +902,8 @@ lstcon_testrpc_prep(struct lstcon_node *nd, int transop, unsigned feats, &test->tes_param[0], trq); } else { rc = lstcon_bulkrpc_v1_prep((lst_test_bulk_param_t *) - &test->tes_param[0], trq); + &test->tes_param[0], + trq->tsr_is_client, trq); } break; @@ -1084,7 +1090,7 @@ lstcon_rpc_trans_ndlist(struct list_head *ndlist, struct lstcon_ndlink *ndl; struct lstcon_node *nd; struct lstcon_rpc *rpc; - unsigned feats; + unsigned int feats; int rc; /* Creating session RPG for list of nodes */ diff --git a/drivers/staging/lustre/lnet/selftest/conrpc.h b/drivers/staging/lustre/lnet/selftest/conrpc.h index 7ec6fc96959e..e629e87c461c 100644 --- a/drivers/staging/lustre/lnet/selftest/conrpc.h +++ b/drivers/staging/lustre/lnet/selftest/conrpc.h @@ -78,8 +78,8 @@ struct lstcon_rpc_trans { struct list_head tas_olink; /* link chain on owner list */ struct list_head tas_link; /* link chain on global list */ int tas_opc; /* operation code of transaction */ - unsigned tas_feats_updated; /* features mask is uptodate */ - unsigned tas_features; /* test features mask */ + unsigned int tas_feats_updated; /* features mask is uptodate */ + unsigned int tas_features; /* test features mask */ wait_queue_head_t tas_waitq; /* wait queue head */ atomic_t tas_remaining; /* # of un-scheduled rpcs */ struct list_head tas_rpcs_list; /* queued requests */ @@ -106,14 +106,16 @@ typedef int (*lstcon_rpc_readent_func_t)(int, struct srpc_msg *, lstcon_rpc_ent_t __user *); int lstcon_sesrpc_prep(struct lstcon_node *nd, int transop, - unsigned version, struct lstcon_rpc **crpc); + unsigned int version, struct lstcon_rpc **crpc); int lstcon_dbgrpc_prep(struct lstcon_node *nd, - unsigned version, struct lstcon_rpc **crpc); -int lstcon_batrpc_prep(struct lstcon_node *nd, int transop, unsigned version, - struct lstcon_tsb_hdr *tsb, struct lstcon_rpc **crpc); -int lstcon_testrpc_prep(struct lstcon_node *nd, int transop, unsigned version, - struct lstcon_test *test, struct lstcon_rpc **crpc); -int lstcon_statrpc_prep(struct lstcon_node *nd, unsigned version, + unsigned int version, struct lstcon_rpc **crpc); +int lstcon_batrpc_prep(struct lstcon_node *nd, int transop, + unsigned int version, struct lstcon_tsb_hdr *tsb, + struct lstcon_rpc **crpc); +int lstcon_testrpc_prep(struct lstcon_node *nd, int transop, + unsigned int version, struct lstcon_test *test, + struct lstcon_rpc **crpc); +int lstcon_statrpc_prep(struct lstcon_node *nd, unsigned int version, struct lstcon_rpc **crpc); void lstcon_rpc_put(struct lstcon_rpc *crpc); int lstcon_rpc_trans_prep(struct list_head *translist, @@ -129,7 +131,8 @@ int lstcon_rpc_trans_interpreter(struct lstcon_rpc_trans *trans, lstcon_rpc_readent_func_t readent); void lstcon_rpc_trans_abort(struct lstcon_rpc_trans *trans, int error); void lstcon_rpc_trans_destroy(struct lstcon_rpc_trans *trans); -void lstcon_rpc_trans_addreq(struct lstcon_rpc_trans *trans, struct lstcon_rpc *req); +void lstcon_rpc_trans_addreq(struct lstcon_rpc_trans *trans, + struct lstcon_rpc *req); int lstcon_rpc_trans_postwait(struct lstcon_rpc_trans *trans, int timeout); int lstcon_rpc_pinger_start(void); void lstcon_rpc_pinger_stop(void); diff --git a/drivers/staging/lustre/lnet/selftest/console.c b/drivers/staging/lustre/lnet/selftest/console.c index a0fcbf3bcc95..1456d2395cc9 100644 --- a/drivers/staging/lustre/lnet/selftest/console.c +++ b/drivers/staging/lustre/lnet/selftest/console.c @@ -86,7 +86,7 @@ lstcon_node_find(lnet_process_id_t id, struct lstcon_node **ndpp, int create) if (!create) return -ENOENT; - LIBCFS_ALLOC(*ndpp, sizeof(struct lstcon_node) + sizeof(struct lstcon_ndlink)); + LIBCFS_ALLOC(*ndpp, sizeof(**ndpp) + sizeof(*ndl)); if (!*ndpp) return -ENOMEM; @@ -131,12 +131,12 @@ lstcon_node_put(struct lstcon_node *nd) list_del(&ndl->ndl_link); list_del(&ndl->ndl_hlink); - LIBCFS_FREE(nd, sizeof(struct lstcon_node) + sizeof(struct lstcon_ndlink)); + LIBCFS_FREE(nd, sizeof(*nd) + sizeof(*ndl)); } static int -lstcon_ndlink_find(struct list_head *hash, - lnet_process_id_t id, struct lstcon_ndlink **ndlpp, int create) +lstcon_ndlink_find(struct list_head *hash, lnet_process_id_t id, + struct lstcon_ndlink **ndlpp, int create) { unsigned int idx = LNET_NIDADDR(id.nid) % LST_NODE_HASHSIZE; struct lstcon_ndlink *ndl; @@ -230,7 +230,8 @@ lstcon_group_addref(struct lstcon_group *grp) grp->grp_ref++; } -static void lstcon_group_ndlink_release(struct lstcon_group *, struct lstcon_ndlink *); +static void lstcon_group_ndlink_release(struct lstcon_group *, + struct lstcon_ndlink *); static void lstcon_group_drain(struct lstcon_group *grp, int keep) @@ -397,7 +398,8 @@ lstcon_sesrpc_readent(int transop, struct srpc_msg *msg, static int lstcon_group_nodes_add(struct lstcon_group *grp, int count, lnet_process_id_t __user *ids_up, - unsigned *featp, struct list_head __user *result_up) + unsigned int *featp, + struct list_head __user *result_up) { struct lstcon_rpc_trans *trans; struct lstcon_ndlink *ndl; @@ -542,7 +544,8 @@ lstcon_group_add(char *name) int lstcon_nodes_add(char *name, int count, lnet_process_id_t __user *ids_up, - unsigned *featp, struct list_head __user *result_up) + unsigned int *featp, + struct list_head __user *result_up) { struct lstcon_group *grp; int rc; @@ -820,7 +823,7 @@ lstcon_group_info(char *name, lstcon_ndlist_ent_t __user *gents_p, lstcon_group_decref(grp); - return 0; + return rc; } static int @@ -1181,7 +1184,8 @@ lstcon_testrpc_condition(int transop, struct lstcon_node *nd, void *arg) } static int -lstcon_test_nodes_add(struct lstcon_test *test, struct list_head __user *result_up) +lstcon_test_nodes_add(struct lstcon_test *test, + struct list_head __user *result_up) { struct lstcon_rpc_trans *trans; struct lstcon_group *grp; @@ -1364,7 +1368,8 @@ out: } static int -lstcon_test_find(struct lstcon_batch *batch, int idx, struct lstcon_test **testpp) +lstcon_test_find(struct lstcon_batch *batch, int idx, + struct lstcon_test **testpp) { struct lstcon_test *test; @@ -1702,7 +1707,7 @@ lstcon_new_session_id(lst_sid_t *sid) } int -lstcon_session_new(char *name, int key, unsigned feats, +lstcon_session_new(char *name, int key, unsigned int feats, int timeout, int force, lst_sid_t __user *sid_up) { int rc = 0; @@ -1868,7 +1873,7 @@ lstcon_session_end(void) } int -lstcon_session_feats_check(unsigned feats) +lstcon_session_feats_check(unsigned int feats) { int rc = 0; diff --git a/drivers/staging/lustre/lnet/selftest/console.h b/drivers/staging/lustre/lnet/selftest/console.h index 78388a611c22..5dc1de48a10e 100644 --- a/drivers/staging/lustre/lnet/selftest/console.h +++ b/drivers/staging/lustre/lnet/selftest/console.h @@ -92,14 +92,16 @@ struct lstcon_batch { int bat_ntest; /* # of test */ int bat_state; /* state of the batch */ int bat_arg; /* parameter for run|stop, timeout - * for run, force for stop */ + * for run, force for stop + */ char bat_name[LST_NAME_SIZE];/* name of batch */ struct list_head bat_test_list; /* list head of tests (struct lstcon_test) */ struct list_head bat_trans_list; /* list head of transaction */ struct list_head bat_cli_list; /* list head of client nodes - * (struct lstcon_node) */ + * (struct lstcon_node) + */ struct list_head *bat_cli_hash; /* hash table of client nodes */ struct list_head bat_srv_list; /* list head of server nodes */ struct list_head *bat_srv_hash; /* hash table of server nodes */ @@ -144,13 +146,14 @@ struct lstcon_session { int ses_timeout; /* timeout in seconds */ time64_t ses_laststamp; /* last operation stamp (seconds) */ - unsigned ses_features; /* tests features of the session + unsigned int ses_features; /* tests features of the session */ - unsigned ses_feats_updated:1; /* features are synced with - * remote test nodes */ - unsigned ses_force:1; /* force creating */ - unsigned ses_shutdown:1; /* session is shutting down */ - unsigned ses_expired:1; /* console is timedout */ + unsigned int ses_feats_updated:1; /* features are synced with + * remote test nodes + */ + unsigned int ses_force:1; /* force creating */ + unsigned int ses_shutdown:1; /* session is shutting down */ + unsigned int ses_expired:1; /* console is timedout */ __u64 ses_id_cookie; /* batch id cookie */ char ses_name[LST_NAME_SIZE];/* session name */ struct lstcon_rpc_trans *ses_ping; /* session pinger */ @@ -188,14 +191,14 @@ int lstcon_ioctl_entry(unsigned int cmd, struct libcfs_ioctl_hdr *hdr); int lstcon_console_init(void); int lstcon_console_fini(void); int lstcon_session_match(lst_sid_t sid); -int lstcon_session_new(char *name, int key, unsigned version, +int lstcon_session_new(char *name, int key, unsigned int version, int timeout, int flags, lst_sid_t __user *sid_up); int lstcon_session_info(lst_sid_t __user *sid_up, int __user *key, unsigned __user *verp, lstcon_ndlist_ent_t __user *entp, char __user *name_up, int len); int lstcon_session_end(void); int lstcon_session_debug(int timeout, struct list_head __user *result_up); -int lstcon_session_feats_check(unsigned feats); +int lstcon_session_feats_check(unsigned int feats); int lstcon_batch_debug(int timeout, char *name, int client, struct list_head __user *result_up); int lstcon_group_debug(int timeout, char *name, @@ -207,7 +210,7 @@ int lstcon_group_del(char *name); int lstcon_group_clean(char *name, int args); int lstcon_group_refresh(char *name, struct list_head __user *result_up); int lstcon_nodes_add(char *name, int nnd, lnet_process_id_t __user *nds_up, - unsigned *featp, struct list_head __user *result_up); + unsigned int *featp, struct list_head __user *result_up); int lstcon_nodes_remove(char *name, int nnd, lnet_process_id_t __user *nds_up, struct list_head __user *result_up); int lstcon_group_info(char *name, lstcon_ndlist_ent_t __user *gent_up, diff --git a/drivers/staging/lustre/lnet/selftest/framework.c b/drivers/staging/lustre/lnet/selftest/framework.c index abbd6287b4bd..48dcc330dc9b 100644 --- a/drivers/staging/lustre/lnet/selftest/framework.c +++ b/drivers/staging/lustre/lnet/selftest/framework.c @@ -131,7 +131,8 @@ sfw_find_test_case(int id) } static int -sfw_register_test(struct srpc_service *service, struct sfw_test_client_ops *cliops) +sfw_register_test(struct srpc_service *service, + struct sfw_test_client_ops *cliops) { struct sfw_test_case *tsc; @@ -254,7 +255,7 @@ sfw_session_expired(void *data) static inline void sfw_init_session(struct sfw_session *sn, lst_sid_t sid, - unsigned features, const char *name) + unsigned int features, const char *name) { struct stt_timer *timer = &sn->sn_timer; @@ -469,7 +470,8 @@ sfw_make_session(struct srpc_mksn_reqst *request, struct srpc_mksn_reply *reply) } static int -sfw_remove_session(struct srpc_rmsn_reqst *request, struct srpc_rmsn_reply *reply) +sfw_remove_session(struct srpc_rmsn_reqst *request, + struct srpc_rmsn_reply *reply) { struct sfw_session *sn = sfw_data.fw_session; @@ -501,7 +503,8 @@ sfw_remove_session(struct srpc_rmsn_reqst *request, struct srpc_rmsn_reply *repl } static int -sfw_debug_session(struct srpc_debug_reqst *request, struct srpc_debug_reply *reply) +sfw_debug_session(struct srpc_debug_reqst *request, + struct srpc_debug_reply *reply) { struct sfw_session *sn = sfw_data.fw_session; @@ -897,7 +900,7 @@ sfw_test_rpc_done(struct srpc_client_rpc *rpc) int sfw_create_test_rpc(struct sfw_test_unit *tsu, lnet_process_id_t peer, - unsigned features, int nblk, int blklen, + unsigned int features, int nblk, int blklen, struct srpc_client_rpc **rpcpp) { struct srpc_client_rpc *rpc = NULL; @@ -1064,7 +1067,8 @@ sfw_stop_batch(struct sfw_batch *tsb, int force) } static int -sfw_query_batch(struct sfw_batch *tsb, int testidx, struct srpc_batch_reply *reply) +sfw_query_batch(struct sfw_batch *tsb, int testidx, + struct srpc_batch_reply *reply) { struct sfw_test_instance *tsi; @@ -1101,7 +1105,7 @@ sfw_alloc_pages(struct srpc_server_rpc *rpc, int cpt, int npages, int len, LASSERT(!rpc->srpc_bulk); LASSERT(npages > 0 && npages <= LNET_MAX_IOV); - rpc->srpc_bulk = srpc_alloc_bulk(cpt, npages, len, sink); + rpc->srpc_bulk = srpc_alloc_bulk(cpt, 0, npages, len, sink); if (!rpc->srpc_bulk) return -ENOMEM; @@ -1179,7 +1183,8 @@ sfw_add_test(struct srpc_server_rpc *rpc) } static int -sfw_control_batch(struct srpc_batch_reqst *request, struct srpc_batch_reply *reply) +sfw_control_batch(struct srpc_batch_reqst *request, + struct srpc_batch_reply *reply) { struct sfw_session *sn = sfw_data.fw_session; int rc = 0; @@ -1225,7 +1230,7 @@ sfw_handle_server_rpc(struct srpc_server_rpc *rpc) struct srpc_service *sv = rpc->srpc_scd->scd_svc; struct srpc_msg *reply = &rpc->srpc_replymsg; struct srpc_msg *request = &rpc->srpc_reqstbuf->buf_msg; - unsigned features = LST_FEATS_MASK; + unsigned int features = LST_FEATS_MASK; int rc = 0; LASSERT(!sfw_data.fw_active_srpc); @@ -1375,7 +1380,7 @@ sfw_bulk_ready(struct srpc_server_rpc *rpc, int status) struct srpc_client_rpc * sfw_create_rpc(lnet_process_id_t peer, int service, - unsigned features, int nbulkiov, int bulklen, + unsigned int features, int nbulkiov, int bulklen, void (*done)(struct srpc_client_rpc *), void *priv) { struct srpc_client_rpc *rpc = NULL; diff --git a/drivers/staging/lustre/lnet/selftest/ping_test.c b/drivers/staging/lustre/lnet/selftest/ping_test.c index 9331ca4e3606..b9601b00a273 100644 --- a/drivers/staging/lustre/lnet/selftest/ping_test.c +++ b/drivers/staging/lustre/lnet/selftest/ping_test.c @@ -159,8 +159,8 @@ ping_client_done_rpc(struct sfw_test_unit *tsu, struct srpc_client_rpc *rpc) ktime_get_real_ts64(&ts); CDEBUG(D_NET, "%d reply in %u usec\n", reply->pnr_seq, - (unsigned)((ts.tv_sec - reqst->pnr_time_sec) * 1000000 + - (ts.tv_nsec / NSEC_PER_USEC - reqst->pnr_time_usec))); + (unsigned int)((ts.tv_sec - reqst->pnr_time_sec) * 1000000 + + (ts.tv_nsec / NSEC_PER_USEC - reqst->pnr_time_usec))); } static int diff --git a/drivers/staging/lustre/lnet/selftest/rpc.c b/drivers/staging/lustre/lnet/selftest/rpc.c index f5619d8744ef..ce9de8c9be57 100644 --- a/drivers/staging/lustre/lnet/selftest/rpc.c +++ b/drivers/staging/lustre/lnet/selftest/rpc.c @@ -84,14 +84,13 @@ void srpc_set_counters(const srpc_counters_t *cnt) } static int -srpc_add_bulk_page(struct srpc_bulk *bk, struct page *pg, int i, int nob) +srpc_add_bulk_page(struct srpc_bulk *bk, struct page *pg, int i, int off, + int nob) { - nob = min_t(int, nob, PAGE_SIZE); + LASSERT(off < PAGE_SIZE); + LASSERT(nob > 0 && nob <= PAGE_SIZE); - LASSERT(nob > 0); - LASSERT(i >= 0 && i < bk->bk_niov); - - bk->bk_iovs[i].bv_offset = 0; + bk->bk_iovs[i].bv_offset = off; bk->bk_iovs[i].bv_page = pg; bk->bk_iovs[i].bv_len = nob; return nob; @@ -117,7 +116,8 @@ srpc_free_bulk(struct srpc_bulk *bk) } struct srpc_bulk * -srpc_alloc_bulk(int cpt, unsigned bulk_npg, unsigned bulk_len, int sink) +srpc_alloc_bulk(int cpt, unsigned int bulk_off, unsigned int bulk_npg, + unsigned int bulk_len, int sink) { struct srpc_bulk *bk; int i; @@ -148,8 +148,11 @@ srpc_alloc_bulk(int cpt, unsigned bulk_npg, unsigned bulk_len, int sink) return NULL; } - nob = srpc_add_bulk_page(bk, pg, i, bulk_len); + nob = min_t(unsigned int, bulk_off + bulk_len, PAGE_SIZE) - + bulk_off; + srpc_add_bulk_page(bk, pg, i, bulk_off, nob); bulk_len -= nob; + bulk_off = 0; } return bk; @@ -693,7 +696,8 @@ srpc_finish_service(struct srpc_service *sv) /* called with sv->sv_lock held */ static void -srpc_service_recycle_buffer(struct srpc_service_cd *scd, struct srpc_buffer *buf) +srpc_service_recycle_buffer(struct srpc_service_cd *scd, + struct srpc_buffer *buf) __must_hold(&scd->scd_lock) { if (!scd->scd_svc->sv_shuttingdown && scd->scd_buf_adjust >= 0) { diff --git a/drivers/staging/lustre/lnet/selftest/rpc.h b/drivers/staging/lustre/lnet/selftest/rpc.h index 4ab2ee264004..f353a634cc8e 100644 --- a/drivers/staging/lustre/lnet/selftest/rpc.h +++ b/drivers/staging/lustre/lnet/selftest/rpc.h @@ -113,7 +113,8 @@ struct srpc_join_reply { __u32 join_status; /* returned status */ lst_sid_t join_sid; /* session id */ __u32 join_timeout; /* # seconds' inactivity to - * expire */ + * expire + */ char join_session[LST_NAME_SIZE]; /* session name */ } WIRE_ATTR; @@ -175,7 +176,7 @@ struct test_bulk_req_v1 { __u16 blk_opc; /* bulk operation code */ __u16 blk_flags; /* data check flags */ __u32 blk_len; /* data length */ - __u32 blk_offset; /* reserved: offset */ + __u32 blk_offset; /* offset */ } WIRE_ATTR; struct test_ping_req { @@ -190,7 +191,8 @@ struct srpc_test_reqst { lst_bid_t tsr_bid; /* batch id */ __u32 tsr_service; /* test type: bulk|ping|... */ __u32 tsr_loop; /* test client loop count or - * # server buffers needed */ + * # server buffers needed + */ __u32 tsr_concur; /* concurrency of test */ __u8 tsr_is_client; /* is test client or not */ __u8 tsr_stop_onerr; /* stop on error */ diff --git a/drivers/staging/lustre/lnet/selftest/selftest.h b/drivers/staging/lustre/lnet/selftest/selftest.h index d033ac03d953..c8833a016b6d 100644 --- a/drivers/staging/lustre/lnet/selftest/selftest.h +++ b/drivers/staging/lustre/lnet/selftest/selftest.h @@ -131,7 +131,8 @@ srpc_service2reply(int service) enum srpc_event_type { SRPC_BULK_REQ_RCVD = 1, /* passive bulk request(PUT sink/GET source) - * received */ + * received + */ SRPC_BULK_PUT_SENT = 2, /* active bulk PUT sent (source) */ SRPC_BULK_GET_RPLD = 3, /* active bulk GET replied (sink) */ SRPC_REPLY_RCVD = 4, /* incoming reply received */ @@ -295,7 +296,8 @@ struct srpc_service_cd { #define SFW_TEST_WI_MIN 256 #define SFW_TEST_WI_MAX 2048 /* extra buffers for tolerating buggy peers, or unbalanced number - * of peers between partitions */ + * of peers between partitions + */ #define SFW_TEST_WI_EXTRA 64 /* number of server workitems (mini-thread) for framework service */ @@ -347,9 +349,11 @@ struct sfw_batch { struct sfw_test_client_ops { int (*tso_init)(struct sfw_test_instance *tsi); /* initialize test - * client */ + * client + */ void (*tso_fini)(struct sfw_test_instance *tsi); /* finalize test - * client */ + * client + */ int (*tso_prep_rpc)(struct sfw_test_unit *tsu, lnet_process_id_t dest, struct srpc_client_rpc **rpc); /* prep a tests rpc */ @@ -374,7 +378,8 @@ struct sfw_test_instance { spinlock_t tsi_lock; /* serialize */ unsigned int tsi_stopping:1; /* test is stopping */ atomic_t tsi_nactive; /* # of active test - * unit */ + * unit + */ struct list_head tsi_units; /* test units */ struct list_head tsi_free_rpcs; /* free rpcs */ struct list_head tsi_active_rpcs; /* active rpcs */ @@ -386,8 +391,10 @@ struct sfw_test_instance { } tsi_u; }; -/* XXX: trailing (PAGE_SIZE % sizeof(lnet_process_id_t)) bytes at the end of - * pages are not used */ +/* + * XXX: trailing (PAGE_SIZE % sizeof(lnet_process_id_t)) bytes at the end of + * pages are not used + */ #define SFW_MAX_CONCUR LST_MAX_CONCUR #define SFW_ID_PER_PAGE (PAGE_SIZE / sizeof(lnet_process_id_packed_t)) #define SFW_MAX_NDESTS (LNET_MAX_IOV * SFW_ID_PER_PAGE) @@ -410,10 +417,10 @@ struct sfw_test_case { struct srpc_client_rpc * sfw_create_rpc(lnet_process_id_t peer, int service, - unsigned features, int nbulkiov, int bulklen, + unsigned int features, int nbulkiov, int bulklen, void (*done)(struct srpc_client_rpc *), void *priv); int sfw_create_test_rpc(struct sfw_test_unit *tsu, - lnet_process_id_t peer, unsigned features, + lnet_process_id_t peer, unsigned int features, int nblk, int blklen, struct srpc_client_rpc **rpc); void sfw_abort_rpc(struct srpc_client_rpc *rpc); void sfw_post_rpc(struct srpc_client_rpc *rpc); @@ -434,8 +441,9 @@ srpc_create_client_rpc(lnet_process_id_t peer, int service, void srpc_post_rpc(struct srpc_client_rpc *rpc); void srpc_abort_rpc(struct srpc_client_rpc *rpc, int why); void srpc_free_bulk(struct srpc_bulk *bk); -struct srpc_bulk *srpc_alloc_bulk(int cpt, unsigned bulk_npg, - unsigned bulk_len, int sink); +struct srpc_bulk *srpc_alloc_bulk(int cpt, unsigned int off, + unsigned int bulk_npg, unsigned int bulk_len, + int sink); int srpc_send_rpc(struct swi_workitem *wi); int srpc_send_reply(struct srpc_server_rpc *rpc); int srpc_add_service(struct srpc_service *sv); diff --git a/drivers/staging/lustre/lnet/selftest/timer.c b/drivers/staging/lustre/lnet/selftest/timer.c index dcd22580b1f0..2fe692df19d0 100644 --- a/drivers/staging/lustre/lnet/selftest/timer.c +++ b/drivers/staging/lustre/lnet/selftest/timer.c @@ -46,16 +46,17 @@ * to cover a time period of 1024 seconds into the future before wrapping. */ #define STTIMER_MINPOLL 3 /* log2 min poll interval (8 s) */ -#define STTIMER_SLOTTIME (1 << STTIMER_MINPOLL) +#define STTIMER_SLOTTIME BIT(STTIMER_MINPOLL) #define STTIMER_SLOTTIMEMASK (~(STTIMER_SLOTTIME - 1)) -#define STTIMER_NSLOTS (1 << 7) +#define STTIMER_NSLOTS BIT(7) #define STTIMER_SLOT(t) (&stt_data.stt_hash[(((t) >> STTIMER_MINPOLL) & \ (STTIMER_NSLOTS - 1))]) static struct st_timer_data { spinlock_t stt_lock; unsigned long stt_prev_slot; /* start time of the slot processed - * previously */ + * previously + */ struct list_head stt_hash[STTIMER_NSLOTS]; int stt_shuttingdown; wait_queue_head_t stt_waitq; diff --git a/drivers/staging/lustre/lustre/fid/fid_request.c b/drivers/staging/lustre/lustre/fid/fid_request.c index edd72b926f81..999f250ceed0 100644 --- a/drivers/staging/lustre/lustre/fid/fid_request.c +++ b/drivers/staging/lustre/lustre/fid/fid_request.c @@ -74,7 +74,7 @@ static int seq_client_rpc(struct lu_client_seq *seq, /* Zero out input range, this is not recovery yet. */ in = req_capsule_client_get(&req->rq_pill, &RMF_SEQ_RANGE); - range_init(in); + lu_seq_range_init(in); ptlrpc_request_set_replen(req); @@ -112,25 +112,21 @@ static int seq_client_rpc(struct lu_client_seq *seq, ptlrpc_at_set_req_timeout(req); - if (opc != SEQ_ALLOC_SUPER && seq->lcs_type == LUSTRE_SEQ_METADATA) - mdc_get_rpc_lock(exp->exp_obd->u.cli.cl_rpc_lock, NULL); rc = ptlrpc_queue_wait(req); - if (opc != SEQ_ALLOC_SUPER && seq->lcs_type == LUSTRE_SEQ_METADATA) - mdc_put_rpc_lock(exp->exp_obd->u.cli.cl_rpc_lock, NULL); if (rc) goto out_req; out = req_capsule_server_get(&req->rq_pill, &RMF_SEQ_RANGE); *output = *out; - if (!range_is_sane(output)) { + if (!lu_seq_range_is_sane(output)) { CERROR("%s: Invalid range received from server: " DRANGE "\n", seq->lcs_name, PRANGE(output)); rc = -EINVAL; goto out_req; } - if (range_is_exhausted(output)) { + if (lu_seq_range_is_exhausted(output)) { CERROR("%s: Range received from server is exhausted: " DRANGE "]\n", seq->lcs_name, PRANGE(output)); rc = -EINVAL; @@ -170,9 +166,9 @@ static int seq_client_alloc_seq(const struct lu_env *env, { int rc; - LASSERT(range_is_sane(&seq->lcs_space)); + LASSERT(lu_seq_range_is_sane(&seq->lcs_space)); - if (range_is_exhausted(&seq->lcs_space)) { + if (lu_seq_range_is_exhausted(&seq->lcs_space)) { rc = seq_client_alloc_meta(env, seq); if (rc) { CERROR("%s: Can't allocate new meta-sequence, rc %d\n", @@ -185,7 +181,7 @@ static int seq_client_alloc_seq(const struct lu_env *env, rc = 0; } - LASSERT(!range_is_exhausted(&seq->lcs_space)); + LASSERT(!lu_seq_range_is_exhausted(&seq->lcs_space)); *seqnr = seq->lcs_space.lsr_start; seq->lcs_space.lsr_start += 1; @@ -320,7 +316,7 @@ void seq_client_flush(struct lu_client_seq *seq) seq->lcs_space.lsr_index = -1; - range_init(&seq->lcs_space); + lu_seq_range_init(&seq->lcs_space); mutex_unlock(&seq->lcs_mutex); } EXPORT_SYMBOL(seq_client_flush); diff --git a/drivers/staging/lustre/lustre/fid/lproc_fid.c b/drivers/staging/lustre/lustre/fid/lproc_fid.c index 3ed32d77f38b..97d4849c7199 100644 --- a/drivers/staging/lustre/lustre/fid/lproc_fid.c +++ b/drivers/staging/lustre/lustre/fid/lproc_fid.c @@ -83,7 +83,7 @@ ldebugfs_fid_write_common(const char __user *buffer, size_t count, (unsigned long long *)&tmp.lsr_end); if (rc != 2) return -EINVAL; - if (!range_is_sane(&tmp) || range_is_zero(&tmp) || + if (!lu_seq_range_is_sane(&tmp) || lu_seq_range_is_zero(&tmp) || tmp.lsr_start < range->lsr_start || tmp.lsr_end > range->lsr_end) return -EINVAL; *range = tmp; diff --git a/drivers/staging/lustre/lustre/fld/fld_cache.c b/drivers/staging/lustre/lustre/fld/fld_cache.c index 0100a935f4ff..11f697496180 100644 --- a/drivers/staging/lustre/lustre/fld/fld_cache.c +++ b/drivers/staging/lustre/lustre/fld/fld_cache.c @@ -143,7 +143,7 @@ restart_fixup: c_range = &f_curr->fce_range; n_range = &f_next->fce_range; - LASSERT(range_is_sane(c_range)); + LASSERT(lu_seq_range_is_sane(c_range)); if (&f_next->fce_list == head) break; @@ -358,7 +358,7 @@ struct fld_cache_entry { struct fld_cache_entry *f_new; - LASSERT(range_is_sane(range)); + LASSERT(lu_seq_range_is_sane(range)); f_new = kzalloc(sizeof(*f_new), GFP_NOFS); if (!f_new) @@ -503,7 +503,7 @@ int fld_cache_lookup(struct fld_cache *cache, } prev = flde; - if (range_within(&flde->fce_range, seq)) { + if (lu_seq_range_within(&flde->fce_range, seq)) { *range = flde->fce_range; cache->fci_stat.fst_cache++; diff --git a/drivers/staging/lustre/lustre/fld/fld_internal.h b/drivers/staging/lustre/lustre/fld/fld_internal.h index 08eaec735d6f..4a7f0b71c48d 100644 --- a/drivers/staging/lustre/lustre/fld/fld_internal.h +++ b/drivers/staging/lustre/lustre/fld/fld_internal.h @@ -62,11 +62,6 @@ #include "../include/lustre_req_layout.h" #include "../include/lustre_fld.h" -enum { - LUSTRE_FLD_INIT = 1 << 0, - LUSTRE_FLD_RUN = 1 << 1 -}; - struct fld_stats { __u64 fst_count; __u64 fst_cache; diff --git a/drivers/staging/lustre/lustre/fld/fld_request.c b/drivers/staging/lustre/lustre/fld/fld_request.c index 0de72b717ce5..4cade7a16800 100644 --- a/drivers/staging/lustre/lustre/fld/fld_request.c +++ b/drivers/staging/lustre/lustre/fld/fld_request.c @@ -159,11 +159,6 @@ int fld_client_add_target(struct lu_client_fld *fld, LASSERT(name); LASSERT(tar->ft_srv || tar->ft_exp); - if (fld->lcf_flags != LUSTRE_FLD_INIT) { - CERROR("%s: Attempt to add target %s (idx %llu) on fly - skip it\n", - fld->lcf_name, name, tar->ft_idx); - return 0; - } CDEBUG(D_INFO, "%s: Adding target %s (idx %llu)\n", fld->lcf_name, name, tar->ft_idx); @@ -282,7 +277,6 @@ int fld_client_init(struct lu_client_fld *fld, fld->lcf_count = 0; spin_lock_init(&fld->lcf_lock); fld->lcf_hash = &fld_hash[hash]; - fld->lcf_flags = LUSTRE_FLD_INIT; INIT_LIST_HEAD(&fld->lcf_targets); cache_size = FLD_CLIENT_CACHE_SIZE / @@ -421,8 +415,6 @@ int fld_client_lookup(struct lu_client_fld *fld, u64 seq, u32 *mds, struct lu_fld_target *target; int rc; - fld->lcf_flags |= LUSTRE_FLD_RUN; - rc = fld_cache_lookup(fld->lcf_cache, seq, &res); if (rc == 0) { *mds = res.lsr_index; diff --git a/drivers/staging/lustre/lustre/include/cl_object.h b/drivers/staging/lustre/lustre/include/cl_object.h index 89292c93dcd5..dc685610c4c4 100644 --- a/drivers/staging/lustre/lustre/include/cl_object.h +++ b/drivers/staging/lustre/lustre/include/cl_object.h @@ -59,10 +59,6 @@ * read/write system call it is associated with the single user * thread, that issued the system call). * - * - cl_req represents a collection of pages for a transfer. cl_req is - * constructed by req-forming engine that tries to saturate - * transport with large and continuous transfers. - * * Terminology * * - to avoid confusion high-level I/O operation like read or write system @@ -103,11 +99,8 @@ struct inode; struct cl_device; -struct cl_device_operations; struct cl_object; -struct cl_object_page_operations; -struct cl_object_lock_operations; struct cl_page; struct cl_page_slice; @@ -120,27 +113,7 @@ struct cl_page_operations; struct cl_io; struct cl_io_slice; -struct cl_req; -struct cl_req_slice; - -/** - * Operations for each data device in the client stack. - * - * \see vvp_cl_ops, lov_cl_ops, lovsub_cl_ops, osc_cl_ops - */ -struct cl_device_operations { - /** - * Initialize cl_req. This method is called top-to-bottom on all - * devices in the stack to get them a chance to allocate layer-private - * data, and to attach them to the cl_req by calling - * cl_req_slice_add(). - * - * \see osc_req_init(), lov_req_init(), lovsub_req_init() - * \see vvp_req_init() - */ - int (*cdo_req_init)(const struct lu_env *env, struct cl_device *dev, - struct cl_req *req); -}; +struct cl_req_attr; /** * Device in the client stack. @@ -150,8 +123,6 @@ struct cl_device_operations { struct cl_device { /** Super-class. */ struct lu_device cd_lu_dev; - /** Per-layer operation vector. */ - const struct cl_device_operations *cd_ops; }; /** \addtogroup cl_object cl_object @@ -267,7 +238,7 @@ struct cl_object_conf { /** * Object layout. This is consumed by lov. */ - struct lustre_md *coc_md; + struct lu_buf coc_layout; /** * Description of particular stripe location in the * cluster. This is consumed by osc. @@ -301,6 +272,26 @@ enum { OBJECT_CONF_WAIT = 2 }; +enum { + CL_LAYOUT_GEN_NONE = (u32)-2, /* layout lock was cancelled */ + CL_LAYOUT_GEN_EMPTY = (u32)-1, /* for empty layout */ +}; + +struct cl_layout { + /** the buffer to return the layout in lov_mds_md format. */ + struct lu_buf cl_buf; + /** size of layout in lov_mds_md format. */ + size_t cl_size; + /** Layout generation. */ + u32 cl_layout_gen; + /** + * True if this is a released file. + * Temporarily added for released file truncate in ll_setattr_raw(). + * It will be removed later. -Jinshan + */ + bool cl_is_released; +}; + /** * Operations implemented for each cl object layer. * @@ -400,6 +391,27 @@ struct cl_object_operations { */ int (*coo_getstripe)(const struct lu_env *env, struct cl_object *obj, struct lov_user_md __user *lum); + /** + * Get FIEMAP mapping from the object. + */ + int (*coo_fiemap)(const struct lu_env *env, struct cl_object *obj, + struct ll_fiemap_info_key *fmkey, + struct fiemap *fiemap, size_t *buflen); + /** + * Get layout and generation of the object. + */ + int (*coo_layout_get)(const struct lu_env *env, struct cl_object *obj, + struct cl_layout *layout); + /** + * Get maximum size of the object. + */ + loff_t (*coo_maxbytes)(struct cl_object *obj); + /** + * Set request attributes. + */ + void (*coo_req_attr_set)(const struct lu_env *env, + struct cl_object *obj, + struct cl_req_attr *attr); }; /** @@ -591,7 +603,7 @@ enum cl_page_state { * * - [cl_page_state::CPS_PAGEOUT] page is dirty, the * req-formation engine decides that it wants to include this page - * into an cl_req being constructed, and yanks it from the cache; + * into an RPC being constructed, and yanks it from the cache; * * - [cl_page_state::CPS_FREEING] VM callback is executed to * evict the page form the memory; @@ -660,7 +672,7 @@ enum cl_page_state { * Page is being read in, as a part of a transfer. This is quite * similar to the cl_page_state::CPS_PAGEOUT state, except that * read-in is always "immediate"---there is no such thing a sudden - * construction of read cl_req from cached, presumably not up to date, + * construction of read request from cached, presumably not up to date, * pages. * * Underlying VM page is locked for the duration of transfer. @@ -714,8 +726,6 @@ struct cl_page { struct list_head cp_batch; /** List of slices. Immutable after creation. */ struct list_head cp_layers; - /** Linkage of pages within cl_req. */ - struct list_head cp_flight; /** * Page state. This field is const to avoid accidental update, it is * modified only internally within cl_page.c. Protected by a VM lock. @@ -732,12 +742,6 @@ struct cl_page { * by sub-io. Protected by a VM lock. */ struct cl_io *cp_owner; - /** - * Owning IO request in cl_page_state::CPS_PAGEOUT and - * cl_page_state::CPS_PAGEIN states. This field is maintained only in - * the top-level pages. Protected by a VM lock. - */ - struct cl_req *cp_req; /** List of references to this page, for debugging. */ struct lu_ref cp_reference; /** Link to an object, for debugging. */ @@ -779,7 +783,6 @@ enum cl_lock_mode { /** * Requested transfer type. - * \ingroup cl_req */ enum cl_req_type { CRT_READ, @@ -884,26 +887,6 @@ struct cl_page_operations { /** Destructor. Frees resources and slice itself. */ void (*cpo_fini)(const struct lu_env *env, struct cl_page_slice *slice); - - /** - * Checks whether the page is protected by a cl_lock. This is a - * per-layer method, because certain layers have ways to check for the - * lock much more efficiently than through the generic locks scan, or - * implement locking mechanisms separate from cl_lock, e.g., - * LL_FILE_GROUP_LOCKED in vvp. If \a pending is true, check for locks - * being canceled, or scheduled for cancellation as soon as the last - * user goes away, too. - * - * \retval -EBUSY: page is protected by a lock of a given mode; - * \retval -ENODATA: page is not protected by a lock; - * \retval 0: this layer cannot decide. - * - * \see cl_page_is_under_lock() - */ - int (*cpo_is_under_lock)(const struct lu_env *env, - const struct cl_page_slice *slice, - struct cl_io *io, pgoff_t *max); - /** * Optional debugging helper. Prints given page slice. * @@ -915,8 +898,7 @@ struct cl_page_operations { /** * \name transfer * - * Transfer methods. See comment on cl_req for a description of - * transfer formation and life-cycle. + * Transfer methods. * * @{ */ @@ -962,7 +944,7 @@ struct cl_page_operations { int ioret); /** * Called when cached page is about to be added to the - * cl_req as a part of req formation. + * ptlrpc request as a part of req formation. * * \return 0 : proceed with this page; * \return -EAGAIN : skip this page; @@ -1365,7 +1347,6 @@ struct cl_2queue { * (3) sort all locks to avoid dead-locks, and acquire them * * (4) process the chunk: call per-page methods - * (cl_io_operations::cio_read_page() for read, * cl_io_operations::cio_prepare_write(), * cl_io_operations::cio_commit_write() for write) * @@ -1388,6 +1369,8 @@ enum cl_io_type { CIT_WRITE, /** truncate, utime system calls */ CIT_SETATTR, + /** get data version */ + CIT_DATA_VERSION, /** * page fault handling */ @@ -1467,6 +1450,31 @@ struct cl_io_slice { typedef void (*cl_commit_cbt)(const struct lu_env *, struct cl_io *, struct cl_page *); + +struct cl_read_ahead { + /* + * Maximum page index the readahead window will end. + * This is determined DLM lock coverage, RPC and stripe boundary. + * cra_end is included. + */ + pgoff_t cra_end; + /* + * Release routine. If readahead holds resources underneath, this + * function should be called to release it. + */ + void (*cra_release)(const struct lu_env *env, void *cbdata); + /* Callback data for cra_release routine */ + void *cra_cbdata; +}; + +static inline void cl_read_ahead_release(const struct lu_env *env, + struct cl_read_ahead *ra) +{ + if (ra->cra_release) + ra->cra_release(env, ra->cra_cbdata); + memset(ra, 0, sizeof(*ra)); +} + /** * Per-layer io operations. * \see vvp_io_ops, lov_io_ops, lovsub_io_ops, osc_io_ops @@ -1573,16 +1581,13 @@ struct cl_io_operations { struct cl_page_list *queue, int from, int to, cl_commit_cbt cb); /** - * Read missing page. - * - * Called by a top-level cl_io_operations::op[CIT_READ]::cio_start() - * method, when it hits not-up-to-date page in the range. Optional. + * Decide maximum read ahead extent * * \pre io->ci_type == CIT_READ */ - int (*cio_read_page)(const struct lu_env *env, - const struct cl_io_slice *slice, - const struct cl_page_slice *page); + int (*cio_read_ahead)(const struct lu_env *env, + const struct cl_io_slice *slice, + pgoff_t start, struct cl_read_ahead *ra); /** * Optional debugging helper. Print given io slice. */ @@ -1765,10 +1770,15 @@ struct cl_io { struct cl_io_rw_common ci_rw; struct cl_setattr_io { struct ost_lvb sa_attr; + unsigned int sa_attr_flags; unsigned int sa_valid; int sa_stripe_index; - struct lu_fid *sa_parent_fid; + const struct lu_fid *sa_parent_fid; } ci_setattr; + struct cl_data_version_io { + u64 dv_data_version; + int dv_flags; + } ci_data_version; struct cl_fault_io { /** page index within file. */ pgoff_t ft_index; @@ -1836,179 +1846,20 @@ struct cl_io { /** @} cl_io */ -/** \addtogroup cl_req cl_req - * @{ - */ -/** \struct cl_req - * Transfer. - * - * There are two possible modes of transfer initiation on the client: - * - * - immediate transfer: this is started when a high level io wants a page - * or a collection of pages to be transferred right away. Examples: - * read-ahead, synchronous read in the case of non-page aligned write, - * page write-out as a part of extent lock cancellation, page write-out - * as a part of memory cleansing. Immediate transfer can be both - * cl_req_type::CRT_READ and cl_req_type::CRT_WRITE; - * - * - opportunistic transfer (cl_req_type::CRT_WRITE only), that happens - * when io wants to transfer a page to the server some time later, when - * it can be done efficiently. Example: pages dirtied by the write(2) - * path. - * - * In any case, transfer takes place in the form of a cl_req, which is a - * representation for a network RPC. - * - * Pages queued for an opportunistic transfer are cached until it is decided - * that efficient RPC can be composed of them. This decision is made by "a - * req-formation engine", currently implemented as a part of osc - * layer. Req-formation depends on many factors: the size of the resulting - * RPC, whether or not multi-object RPCs are supported by the server, - * max-rpc-in-flight limitations, size of the dirty cache, etc. - * - * For the immediate transfer io submits a cl_page_list, that req-formation - * engine slices into cl_req's, possibly adding cached pages to some of - * the resulting req's. - * - * Whenever a page from cl_page_list is added to a newly constructed req, its - * cl_page_operations::cpo_prep() layer methods are called. At that moment, - * page state is atomically changed from cl_page_state::CPS_OWNED to - * cl_page_state::CPS_PAGEOUT or cl_page_state::CPS_PAGEIN, cl_page::cp_owner - * is zeroed, and cl_page::cp_req is set to the - * req. cl_page_operations::cpo_prep() method at the particular layer might - * return -EALREADY to indicate that it does not need to submit this page - * at all. This is possible, for example, if page, submitted for read, - * became up-to-date in the meantime; and for write, the page don't have - * dirty bit marked. \see cl_io_submit_rw() - * - * Whenever a cached page is added to a newly constructed req, its - * cl_page_operations::cpo_make_ready() layer methods are called. At that - * moment, page state is atomically changed from cl_page_state::CPS_CACHED to - * cl_page_state::CPS_PAGEOUT, and cl_page::cp_req is set to - * req. cl_page_operations::cpo_make_ready() method at the particular layer - * might return -EAGAIN to indicate that this page is not eligible for the - * transfer right now. - * - * FUTURE - * - * Plan is to divide transfers into "priority bands" (indicated when - * submitting cl_page_list, and queuing a page for the opportunistic transfer) - * and allow glueing of cached pages to immediate transfers only within single - * band. This would make high priority transfers (like lock cancellation or - * memory pressure induced write-out) really high priority. - * - */ - /** * Per-transfer attributes. */ struct cl_req_attr { + enum cl_req_type cra_type; + u64 cra_flags; + struct cl_page *cra_page; + /** Generic attributes for the server consumption. */ struct obdo *cra_oa; /** Jobid */ char cra_jobid[LUSTRE_JOBID_SIZE]; }; -/** - * Transfer request operations definable at every layer. - * - * Concurrency: transfer formation engine synchronizes calls to all transfer - * methods. - */ -struct cl_req_operations { - /** - * Invoked top-to-bottom by cl_req_prep() when transfer formation is - * complete (all pages are added). - * - * \see osc_req_prep() - */ - int (*cro_prep)(const struct lu_env *env, - const struct cl_req_slice *slice); - /** - * Called top-to-bottom to fill in \a oa fields. This is called twice - * with different flags, see bug 10150 and osc_build_req(). - * - * \param obj an object from cl_req which attributes are to be set in - * \a oa. - * - * \param oa struct obdo where attributes are placed - * - * \param flags \a oa fields to be filled. - */ - void (*cro_attr_set)(const struct lu_env *env, - const struct cl_req_slice *slice, - const struct cl_object *obj, - struct cl_req_attr *attr, u64 flags); - /** - * Called top-to-bottom from cl_req_completion() to notify layers that - * transfer completed. Has to free all state allocated by - * cl_device_operations::cdo_req_init(). - */ - void (*cro_completion)(const struct lu_env *env, - const struct cl_req_slice *slice, int ioret); -}; - -/** - * A per-object state that (potentially multi-object) transfer request keeps. - */ -struct cl_req_obj { - /** object itself */ - struct cl_object *ro_obj; - /** reference to cl_req_obj::ro_obj. For debugging. */ - struct lu_ref_link ro_obj_ref; - /* something else? Number of pages for a given object? */ -}; - -/** - * Transfer request. - * - * Transfer requests are not reference counted, because IO sub-system owns - * them exclusively and knows when to free them. - * - * Life cycle. - * - * cl_req is created by cl_req_alloc() that calls - * cl_device_operations::cdo_req_init() device methods to allocate per-req - * state in every layer. - * - * Then pages are added (cl_req_page_add()), req keeps track of all objects it - * contains pages for. - * - * Once all pages were collected, cl_page_operations::cpo_prep() method is - * called top-to-bottom. At that point layers can modify req, let it pass, or - * deny it completely. This is to support things like SNS that have transfer - * ordering requirements invisible to the individual req-formation engine. - * - * On transfer completion (or transfer timeout, or failure to initiate the - * transfer of an allocated req), cl_req_operations::cro_completion() method - * is called, after execution of cl_page_operations::cpo_completion() of all - * req's pages. - */ -struct cl_req { - enum cl_req_type crq_type; - /** A list of pages being transferred */ - struct list_head crq_pages; - /** Number of pages in cl_req::crq_pages */ - unsigned crq_nrpages; - /** An array of objects which pages are in ->crq_pages */ - struct cl_req_obj *crq_o; - /** Number of elements in cl_req::crq_objs[] */ - unsigned crq_nrobjs; - struct list_head crq_layers; -}; - -/** - * Per-layer state for request. - */ -struct cl_req_slice { - struct cl_req *crs_req; - struct cl_device *crs_dev; - struct list_head crs_linkage; - const struct cl_req_operations *crs_ops; -}; - -/* @} cl_req */ - enum cache_stats_item { /** how many cache lookups were performed */ CS_lookup = 0, @@ -2153,9 +2004,6 @@ void cl_lock_slice_add(struct cl_lock *lock, struct cl_lock_slice *slice, const struct cl_lock_operations *ops); void cl_io_slice_add(struct cl_io *io, struct cl_io_slice *slice, struct cl_object *obj, const struct cl_io_operations *ops); -void cl_req_slice_add(struct cl_req *req, struct cl_req_slice *slice, - struct cl_device *dev, - const struct cl_req_operations *ops); /** @} helpers */ /** \defgroup cl_object cl_object @@ -2183,6 +2031,12 @@ int cl_object_prune(const struct lu_env *env, struct cl_object *obj); void cl_object_kill(const struct lu_env *env, struct cl_object *obj); int cl_object_getstripe(const struct lu_env *env, struct cl_object *obj, struct lov_user_md __user *lum); +int cl_object_fiemap(const struct lu_env *env, struct cl_object *obj, + struct ll_fiemap_info_key *fmkey, struct fiemap *fiemap, + size_t *buflen); +int cl_object_layout_get(const struct lu_env *env, struct cl_object *obj, + struct cl_layout *cl); +loff_t cl_object_maxbytes(struct cl_object *obj); /** * Returns true, iff \a o0 and \a o1 are slices of the same object. @@ -2302,8 +2156,6 @@ void cl_page_discard(const struct lu_env *env, struct cl_io *io, void cl_page_delete(const struct lu_env *env, struct cl_page *pg); int cl_page_is_vmlocked(const struct lu_env *env, const struct cl_page *pg); void cl_page_export(const struct lu_env *env, struct cl_page *pg, int uptodate); -int cl_page_is_under_lock(const struct lu_env *env, struct cl_io *io, - struct cl_page *page, pgoff_t *max_index); loff_t cl_offset(const struct cl_object *obj, pgoff_t idx); pgoff_t cl_index(const struct cl_object *obj, loff_t offset); size_t cl_page_size(const struct cl_object *obj); @@ -2414,8 +2266,6 @@ int cl_io_lock_add(const struct lu_env *env, struct cl_io *io, struct cl_io_lock_link *link); int cl_io_lock_alloc_add(const struct lu_env *env, struct cl_io *io, struct cl_lock_descr *descr); -int cl_io_read_page(const struct lu_env *env, struct cl_io *io, - struct cl_page *page); int cl_io_submit_rw(const struct lu_env *env, struct cl_io *io, enum cl_req_type iot, struct cl_2queue *queue); int cl_io_submit_sync(const struct lu_env *env, struct cl_io *io, @@ -2424,6 +2274,8 @@ int cl_io_submit_sync(const struct lu_env *env, struct cl_io *io, int cl_io_commit_async(const struct lu_env *env, struct cl_io *io, struct cl_page_list *queue, int from, int to, cl_commit_cbt cb); +int cl_io_read_ahead(const struct lu_env *env, struct cl_io *io, + pgoff_t start, struct cl_read_ahead *ra); int cl_io_is_going(const struct lu_env *env); /** @@ -2520,19 +2372,8 @@ void cl_2queue_init_page(struct cl_2queue *queue, struct cl_page *page); /** @} cl_page_list */ -/** \defgroup cl_req cl_req - * @{ - */ -struct cl_req *cl_req_alloc(const struct lu_env *env, struct cl_page *page, - enum cl_req_type crt, int nr_objects); - -void cl_req_page_add(const struct lu_env *env, struct cl_req *req, - struct cl_page *page); -void cl_req_page_done(const struct lu_env *env, struct cl_page *page); -int cl_req_prep(const struct lu_env *env, struct cl_req *req); -void cl_req_attr_set(const struct lu_env *env, struct cl_req *req, - struct cl_req_attr *attr, u64 flags); -void cl_req_completion(const struct lu_env *env, struct cl_req *req, int ioret); +void cl_req_attr_set(const struct lu_env *env, struct cl_object *obj, + struct cl_req_attr *attr); /** \defgroup cl_sync_io cl_sync_io * @{ @@ -2568,8 +2409,6 @@ void cl_sync_io_end(const struct lu_env *env, struct cl_sync_io *anchor); /** @} cl_sync_io */ -/** @} cl_req */ - /** \defgroup cl_env cl_env * * lu_env handling for a client. @@ -2593,35 +2432,13 @@ void cl_sync_io_end(const struct lu_env *env, struct cl_sync_io *anchor); * - allocation and destruction of environment is amortized by caching no * longer used environments instead of destroying them; * - * - there is a notion of "current" environment, attached to the kernel - * data structure representing current thread Top-level lustre code - * allocates an environment and makes it current, then calls into - * non-lustre code, that in turn calls lustre back. Low-level lustre - * code thus called can fetch environment created by the top-level code - * and reuse it, avoiding additional environment allocation. - * Right now, three interfaces can attach the cl_env to running thread: - * - cl_env_get - * - cl_env_implant - * - cl_env_reexit(cl_env_reenter had to be called priorly) - * * \see lu_env, lu_context, lu_context_key * @{ */ -struct cl_env_nest { - int cen_refcheck; - void *cen_cookie; -}; - struct lu_env *cl_env_get(int *refcheck); struct lu_env *cl_env_alloc(int *refcheck, __u32 tags); -struct lu_env *cl_env_nested_get(struct cl_env_nest *nest); void cl_env_put(struct lu_env *env, int *refcheck); -void cl_env_nested_put(struct cl_env_nest *nest, struct lu_env *env); -void *cl_env_reenter(void); -void cl_env_reexit(void *cookie); -void cl_env_implant(struct lu_env *env, int *refcheck); -void cl_env_unplant(struct lu_env *env, int *refcheck); unsigned int cl_env_cache_purge(unsigned int nr); struct lu_env *cl_env_percpu_get(void); void cl_env_percpu_put(struct lu_env *env); diff --git a/drivers/staging/lustre/lustre/include/llog_swab.h b/drivers/staging/lustre/lustre/include/llog_swab.h new file mode 100644 index 000000000000..fd7ffb154ad1 --- /dev/null +++ b/drivers/staging/lustre/lustre/include/llog_swab.h @@ -0,0 +1,65 @@ +/* + * GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see + * http://www.gnu.org/licenses/gpl-2.0.html + * + * GPL HEADER END + */ +/* + * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. + * Use is subject to license terms. + * + * Copyright (c) 2011, 2014, Intel Corporation. + * + * Copyright 2015 Cray Inc, all rights reserved. + * Author: Ben Evans. + * + * We assume all nodes are either little-endian or big-endian, and we + * always send messages in the sender's native format. The receiver + * detects the message format by checking the 'magic' field of the message + * (see lustre_msg_swabbed() below). + * + * Each type has corresponding 'lustre_swab_xxxtypexxx()' routines + * are implemented in ptlrpc/pack_generic.c. These 'swabbers' convert the + * type from "other" endian, in-place in the message buffer. + * + * A swabber takes a single pointer argument. The caller must already have + * verified that the length of the message buffer >= sizeof (type). + * + * For variable length types, a second 'lustre_swab_v_xxxtypexxx()' routine + * may be defined that swabs just the variable part, after the caller has + * verified that the message buffer is large enough. + */ + +#ifndef _LLOG_SWAB_H_ +#define _LLOG_SWAB_H_ + +#include "lustre/lustre_idl.h" +struct lustre_cfg; + +void lustre_swab_lu_fid(struct lu_fid *fid); +void lustre_swab_ost_id(struct ost_id *oid); +void lustre_swab_llogd_body(struct llogd_body *d); +void lustre_swab_llog_hdr(struct llog_log_hdr *h); +void lustre_swab_llogd_conn_body(struct llogd_conn_body *d); +void lustre_swab_llog_rec(struct llog_rec_hdr *rec); +void lustre_swab_lu_seq_range(struct lu_seq_range *range); +void lustre_swab_lustre_cfg(struct lustre_cfg *lcfg); +void lustre_swab_cfg_marker(struct cfg_marker *marker, + int swab, int size); + +#endif diff --git a/drivers/staging/lustre/lustre/include/lprocfs_status.h b/drivers/staging/lustre/lustre/include/lprocfs_status.h index cc0713ef8ae5..62753dae0bfa 100644 --- a/drivers/staging/lustre/lustre/include/lprocfs_status.h +++ b/drivers/staging/lustre/lustre/include/lprocfs_status.h @@ -43,6 +43,8 @@ #include <linux/spinlock.h> #include <linux/types.h> +#include "../../include/linux/libcfs/libcfs.h" +#include "lustre_cfg.h" #include "lustre/lustre_idl.h" struct lprocfs_vars { @@ -540,7 +542,8 @@ lprocfs_alloc_stats(unsigned int num, enum lprocfs_stats_flags flags); void lprocfs_clear_stats(struct lprocfs_stats *stats); void lprocfs_free_stats(struct lprocfs_stats **stats); void lprocfs_counter_init(struct lprocfs_stats *stats, int index, - unsigned conf, const char *name, const char *units); + unsigned int conf, const char *name, + const char *units); struct obd_export; int lprocfs_exp_cleanup(struct obd_export *exp); struct dentry *ldebugfs_add_simple(struct dentry *root, @@ -701,9 +704,9 @@ static struct lustre_attr lustre_attr_##name = __ATTR(name, mode, show, store) extern const struct sysfs_ops lustre_sysfs_ops; struct root_squash_info; -int lprocfs_wr_root_squash(const char *buffer, unsigned long count, +int lprocfs_wr_root_squash(const char __user *buffer, unsigned long count, struct root_squash_info *squash, char *name); -int lprocfs_wr_nosquash_nids(const char *buffer, unsigned long count, +int lprocfs_wr_nosquash_nids(const char __user *buffer, unsigned long count, struct root_squash_info *squash, char *name); /* all quota proc functions */ diff --git a/drivers/staging/lustre/lustre/include/lustre/ll_fiemap.h b/drivers/staging/lustre/lustre/include/lustre/ll_fiemap.h index c2340d643e84..b8ad5559a3b9 100644 --- a/drivers/staging/lustre/lustre/include/lustre/ll_fiemap.h +++ b/drivers/staging/lustre/lustre/include/lustre/ll_fiemap.h @@ -41,79 +41,24 @@ #ifndef _LUSTRE_FIEMAP_H #define _LUSTRE_FIEMAP_H -struct ll_fiemap_extent { - __u64 fe_logical; /* logical offset in bytes for the start of - * the extent from the beginning of the file - */ - __u64 fe_physical; /* physical offset in bytes for the start - * of the extent from the beginning of the disk - */ - __u64 fe_length; /* length in bytes for this extent */ - __u64 fe_reserved64[2]; - __u32 fe_flags; /* FIEMAP_EXTENT_* flags for this extent */ - __u32 fe_device; /* device number for this extent */ - __u32 fe_reserved[2]; -}; - -struct ll_user_fiemap { - __u64 fm_start; /* logical offset (inclusive) at - * which to start mapping (in) - */ - __u64 fm_length; /* logical length of mapping which - * userspace wants (in) - */ - __u32 fm_flags; /* FIEMAP_FLAG_* flags for request (in/out) */ - __u32 fm_mapped_extents;/* number of extents that were mapped (out) */ - __u32 fm_extent_count; /* size of fm_extents array (in) */ - __u32 fm_reserved; - struct ll_fiemap_extent fm_extents[0]; /* array of mapped extents (out) */ -}; - -#define FIEMAP_MAX_OFFSET (~0ULL) +#ifndef __KERNEL__ +#include <stddef.h> +#include <fiemap.h> +#endif -#define FIEMAP_FLAG_SYNC 0x00000001 /* sync file data before - * map - */ -#define FIEMAP_FLAG_XATTR 0x00000002 /* map extended attribute - * tree - */ -#define FIEMAP_EXTENT_LAST 0x00000001 /* Last extent in file. */ -#define FIEMAP_EXTENT_UNKNOWN 0x00000002 /* Data location unknown. */ -#define FIEMAP_EXTENT_DELALLOC 0x00000004 /* Location still pending. - * Sets EXTENT_UNKNOWN. - */ -#define FIEMAP_EXTENT_ENCODED 0x00000008 /* Data can not be read - * while fs is unmounted - */ -#define FIEMAP_EXTENT_DATA_ENCRYPTED 0x00000080 /* Data is encrypted by fs. - * Sets EXTENT_NO_DIRECT. - */ -#define FIEMAP_EXTENT_NOT_ALIGNED 0x00000100 /* Extent offsets may not be - * block aligned. - */ -#define FIEMAP_EXTENT_DATA_INLINE 0x00000200 /* Data mixed with metadata. - * Sets EXTENT_NOT_ALIGNED.*/ -#define FIEMAP_EXTENT_DATA_TAIL 0x00000400 /* Multiple files in block. - * Sets EXTENT_NOT_ALIGNED. - */ -#define FIEMAP_EXTENT_UNWRITTEN 0x00000800 /* Space allocated, but - * no data (i.e. zero). - */ -#define FIEMAP_EXTENT_MERGED 0x00001000 /* File does not natively - * support extents. Result - * merged for efficiency. - */ +/* XXX: We use fiemap_extent::fe_reserved[0] */ +#define fe_device fe_reserved[0] static inline size_t fiemap_count_to_size(size_t extent_count) { - return (sizeof(struct ll_user_fiemap) + extent_count * - sizeof(struct ll_fiemap_extent)); + return sizeof(struct fiemap) + extent_count * + sizeof(struct fiemap_extent); } static inline unsigned fiemap_size_to_count(size_t array_size) { - return ((array_size - sizeof(struct ll_user_fiemap)) / - sizeof(struct ll_fiemap_extent)); + return (array_size - sizeof(struct fiemap)) / + sizeof(struct fiemap_extent); } #define FIEMAP_FLAG_DEVICE_ORDER 0x40000000 /* return device ordered mapping */ diff --git a/drivers/staging/lustre/lustre/include/lustre/lustre_idl.h b/drivers/staging/lustre/lustre/include/lustre/lustre_idl.h index 72eaee95c6b8..65ce503ad595 100644 --- a/drivers/staging/lustre/lustre/include/lustre/lustre_idl.h +++ b/drivers/staging/lustre/lustre/include/lustre/lustre_idl.h @@ -48,8 +48,7 @@ * that the Lustre wire protocol is not influenced by external dependencies. * * The only other acceptable items in this file are VERY SIMPLE accessor - * functions to avoid callers grubbing inside the structures, and the - * prototypes of the swabber functions for each struct. Nothing that + * functions to avoid callers grubbing inside the structures. Nothing that * depends on external functions or definitions should be in here. * * Structs must be properly aligned to put 64-bit values on an 8-byte @@ -64,23 +63,6 @@ * in the code to ensure that new/old clients that see this larger struct * do not fail, otherwise you need to implement protocol compatibility). * - * We assume all nodes are either little-endian or big-endian, and we - * always send messages in the sender's native format. The receiver - * detects the message format by checking the 'magic' field of the message - * (see lustre_msg_swabbed() below). - * - * Each wire type has corresponding 'lustre_swab_xxxtypexxx()' routines, - * implemented either here, inline (trivial implementations) or in - * ptlrpc/pack_generic.c. These 'swabbers' convert the type from "other" - * endian, in-place in the message buffer. - * - * A swabber takes a single pointer argument. The caller must already have - * verified that the length of the message buffer >= sizeof (type). - * - * For variable length types, a second 'lustre_swab_v_xxxtypexxx()' routine - * may be defined that swabs just the variable part, after the caller has - * verified that the message buffer is large enough. - * * @{ */ @@ -192,113 +174,6 @@ struct lu_seq_range_array { #define LU_SEQ_RANGE_MASK 0x3 -static inline unsigned fld_range_type(const struct lu_seq_range *range) -{ - return range->lsr_flags & LU_SEQ_RANGE_MASK; -} - -static inline bool fld_range_is_ost(const struct lu_seq_range *range) -{ - return fld_range_type(range) == LU_SEQ_RANGE_OST; -} - -static inline bool fld_range_is_mdt(const struct lu_seq_range *range) -{ - return fld_range_type(range) == LU_SEQ_RANGE_MDT; -} - -/** - * This all range is only being used when fld client sends fld query request, - * but it does not know whether the seq is MDT or OST, so it will send req - * with ALL type, which means either seq type gotten from lookup can be - * expected. - */ -static inline unsigned fld_range_is_any(const struct lu_seq_range *range) -{ - return fld_range_type(range) == LU_SEQ_RANGE_ANY; -} - -static inline void fld_range_set_type(struct lu_seq_range *range, - unsigned flags) -{ - range->lsr_flags |= flags; -} - -static inline void fld_range_set_mdt(struct lu_seq_range *range) -{ - fld_range_set_type(range, LU_SEQ_RANGE_MDT); -} - -static inline void fld_range_set_ost(struct lu_seq_range *range) -{ - fld_range_set_type(range, LU_SEQ_RANGE_OST); -} - -static inline void fld_range_set_any(struct lu_seq_range *range) -{ - fld_range_set_type(range, LU_SEQ_RANGE_ANY); -} - -/** - * returns width of given range \a r - */ - -static inline __u64 range_space(const struct lu_seq_range *range) -{ - return range->lsr_end - range->lsr_start; -} - -/** - * initialize range to zero - */ - -static inline void range_init(struct lu_seq_range *range) -{ - memset(range, 0, sizeof(*range)); -} - -/** - * check if given seq id \a s is within given range \a r - */ - -static inline bool range_within(const struct lu_seq_range *range, - __u64 s) -{ - return s >= range->lsr_start && s < range->lsr_end; -} - -static inline bool range_is_sane(const struct lu_seq_range *range) -{ - return (range->lsr_end >= range->lsr_start); -} - -static inline bool range_is_zero(const struct lu_seq_range *range) -{ - return (range->lsr_start == 0 && range->lsr_end == 0); -} - -static inline bool range_is_exhausted(const struct lu_seq_range *range) - -{ - return range_space(range) == 0; -} - -/* return 0 if two range have the same location */ -static inline int range_compare_loc(const struct lu_seq_range *r1, - const struct lu_seq_range *r2) -{ - return r1->lsr_index != r2->lsr_index || - r1->lsr_flags != r2->lsr_flags; -} - -#define DRANGE "[%#16.16Lx-%#16.16Lx):%x:%s" - -#define PRANGE(range) \ - (range)->lsr_start, \ - (range)->lsr_end, \ - (range)->lsr_index, \ - fld_range_is_mdt(range) ? "mdt" : "ost" - /** \defgroup lu_fid lu_fid * @{ */ @@ -310,7 +185,7 @@ static inline int range_compare_loc(const struct lu_seq_range *r1, */ enum lma_compat { LMAC_HSM = 0x00000001, - LMAC_SOM = 0x00000002, +/* LMAC_SOM = 0x00000002, obsolete since 2.8.0 */ LMAC_NOT_IN_OI = 0x00000004, /* the object does NOT need OI mapping */ LMAC_FID_ON_OST = 0x00000008, /* For OST-object, its OI mapping is * under /O/<seq>/d<x>. @@ -644,13 +519,14 @@ static inline void ostid_set_id(struct ost_id *oi, __u64 oid) { if (fid_seq_is_mdt0(oi->oi.oi_seq)) { if (oid >= IDIF_MAX_OID) { - CERROR("Bad %llu to set " DOSTID "\n", oid, POSTID(oi)); + CERROR("Too large OID %#llx to set MDT0 " DOSTID "\n", + oid, POSTID(oi)); return; } oi->oi.oi_id = oid; } else if (fid_is_idif(&oi->oi_fid)) { if (oid >= IDIF_MAX_OID) { - CERROR("Bad %llu to set "DOSTID"\n", + CERROR("Too large OID %#llx to set IDIF " DOSTID "\n", oid, POSTID(oi)); return; } @@ -676,7 +552,7 @@ static inline int fid_set_id(struct lu_fid *fid, __u64 oid) if (fid_is_idif(fid)) { if (oid >= IDIF_MAX_OID) { - CERROR("Too large OID %#llx to set IDIF "DFID"\n", + CERROR("Too large OID %#llx to set IDIF " DFID "\n", (unsigned long long)oid, PFID(fid)); return -EBADF; } @@ -685,7 +561,7 @@ static inline int fid_set_id(struct lu_fid *fid, __u64 oid) fid->f_ver = oid >> 48; } else { if (oid >= OBIF_MAX_OID) { - CERROR("Too large OID %#llx to set REG "DFID"\n", + CERROR("Too large OID %#llx to set REG " DFID "\n", (unsigned long long)oid, PFID(fid)); return -EBADF; } @@ -785,8 +661,6 @@ static inline ino_t lu_igif_ino(const struct lu_fid *fid) return fid_seq(fid); } -void lustre_swab_ost_id(struct ost_id *oid); - /** * Get inode generation from a igif. * \param fid a igif to get inode generation from. @@ -847,9 +721,6 @@ static inline bool fid_is_sane(const struct lu_fid *fid) fid_seq_is_rsvd(fid_seq(fid))); } -void lustre_swab_lu_fid(struct lu_fid *fid); -void lustre_swab_lu_seq_range(struct lu_seq_range *range); - static inline bool lu_fid_eq(const struct lu_fid *f0, const struct lu_fid *f1) { return memcmp(f0, f1, sizeof(*f0)) == 0; @@ -1099,8 +970,10 @@ struct ptlrpc_body_v3 { __u32 pb_version; __u32 pb_opc; __u32 pb_status; - __u64 pb_last_xid; - __u64 pb_last_seen; + __u64 pb_last_xid; /* highest replied XID without lower unreplied XID */ + __u16 pb_tag; /* virtual slot idx for multiple modifying RPCs */ + __u16 pb_padding0; + __u32 pb_padding1; __u64 pb_last_committed; __u64 pb_transno; __u32 pb_flags; @@ -1112,8 +985,11 @@ struct ptlrpc_body_v3 { __u64 pb_slv; /* VBR: pre-versions */ __u64 pb_pre_versions[PTLRPC_NUM_VERSIONS]; + __u64 pb_mbits; /**< match bits for bulk request */ /* padding for future needs */ - __u64 pb_padding[4]; + __u64 pb_padding64_0; + __u64 pb_padding64_1; + __u64 pb_padding64_2; char pb_jobid[LUSTRE_JOBID_SIZE]; }; @@ -1125,8 +1001,10 @@ struct ptlrpc_body_v2 { __u32 pb_version; __u32 pb_opc; __u32 pb_status; - __u64 pb_last_xid; - __u64 pb_last_seen; + __u64 pb_last_xid; /* highest replied XID without lower unreplied XID */ + __u16 pb_tag; /* virtual slot idx for multiple modifying RPCs */ + __u16 pb_padding0; + __u32 pb_padding1; __u64 pb_last_committed; __u64 pb_transno; __u32 pb_flags; @@ -1140,12 +1018,13 @@ struct ptlrpc_body_v2 { __u64 pb_slv; /* VBR: pre-versions */ __u64 pb_pre_versions[PTLRPC_NUM_VERSIONS]; + __u64 pb_mbits; /**< unused in V2 */ /* padding for future needs */ - __u64 pb_padding[4]; + __u64 pb_padding64_0; + __u64 pb_padding64_1; + __u64 pb_padding64_2; }; -void lustre_swab_ptlrpc_body(struct ptlrpc_body *pb); - /* message body offset for lustre_msg_v2 */ /* ptlrpc body offset in all request/reply messages */ #define MSG_PTLRPC_BODY_OFF 0 @@ -1282,7 +1161,16 @@ void lustre_swab_ptlrpc_body(struct ptlrpc_body *pb); */ #define OBD_CONNECT_LFSCK 0x40000000000000ULL/* support online LFSCK */ #define OBD_CONNECT_UNLINK_CLOSE 0x100000000000000ULL/* close file in unlink */ +#define OBD_CONNECT_MULTIMODRPCS 0x200000000000000ULL /* support multiple modify + * RPCs in parallel + */ #define OBD_CONNECT_DIR_STRIPE 0x400000000000000ULL/* striped DNE dir */ +#define OBD_CONNECT_SUBTREE 0x800000000000000ULL /* fileset mount */ +#define OBD_CONNECT_LOCK_AHEAD 0x1000000000000000ULL /* lock ahead */ +/** bulk matchbits is sent within ptlrpc_body */ +#define OBD_CONNECT_BULK_MBITS 0x2000000000000000ULL +#define OBD_CONNECT_OBDOPACK 0x4000000000000000ULL /* compact OUT obdo */ +#define OBD_CONNECT_FLAGS2 0x8000000000000000ULL /* second flags word */ /* XXX README XXX: * Please DO NOT add flag values here before first ensuring that this same @@ -1313,25 +1201,6 @@ void lustre_swab_ptlrpc_body(struct ptlrpc_body *pb); * If we eventually have separate connect data for different types, which we * almost certainly will, then perhaps we stick a union in here. */ -struct obd_connect_data_v1 { - __u64 ocd_connect_flags; /* OBD_CONNECT_* per above */ - __u32 ocd_version; /* lustre release version number */ - __u32 ocd_grant; /* initial cache grant amount (bytes) */ - __u32 ocd_index; /* LOV index to connect to */ - __u32 ocd_brw_size; /* Maximum BRW size in bytes, must be 2^n */ - __u64 ocd_ibits_known; /* inode bits this client understands */ - __u8 ocd_blocksize; /* log2 of the backend filesystem blocksize */ - __u8 ocd_inodespace; /* log2 of the per-inode space consumption */ - __u16 ocd_grant_extent; /* per-extent grant overhead, in 1K blocks */ - __u32 ocd_unused; /* also fix lustre_swab_connect */ - __u64 ocd_transno; /* first transno from client to be replayed */ - __u32 ocd_group; /* MDS group on OST */ - __u32 ocd_cksum_types; /* supported checksum algorithms */ - __u32 ocd_max_easize; /* How big LOV EA can be on MDS */ - __u32 ocd_instance; /* also fix lustre_swab_connect */ - __u64 ocd_maxbytes; /* Maximum stripe size in bytes */ -}; - struct obd_connect_data { __u64 ocd_connect_flags; /* OBD_CONNECT_* per above */ __u32 ocd_version; /* lustre release version number */ @@ -1354,8 +1223,10 @@ struct obd_connect_data { * any field after ocd_maxbytes on the receiver without a valid flag * may result in out-of-bound memory access and kernel oops. */ - __u64 padding1; /* added 2.1.0. also fix lustre_swab_connect */ - __u64 padding2; /* added 2.1.0. also fix lustre_swab_connect */ + __u16 ocd_maxmodrpcs; /* Maximum modify RPCs in parallel */ + __u16 padding0; /* added 2.1.0. also fix lustre_swab_connect */ + __u32 padding1; /* added 2.1.0. also fix lustre_swab_connect */ + __u64 ocd_connect_flags2; __u64 padding3; /* added 2.1.0. also fix lustre_swab_connect */ __u64 padding4; /* added 2.1.0. also fix lustre_swab_connect */ __u64 padding5; /* added 2.1.0. also fix lustre_swab_connect */ @@ -1380,8 +1251,6 @@ struct obd_connect_data { * reserve the flag for future use. */ -void lustre_swab_connect(struct obd_connect_data *ocd); - /* * Supported checksum algorithms. Up to 32 checksum types are supported. * (32-bit mask stored in obd_connect_data::ocd_cksum_types) @@ -1416,7 +1285,7 @@ enum ost_cmd { OST_STATFS = 13, OST_SYNC = 16, OST_SET_INFO = 17, - OST_QUOTACHECK = 18, + OST_QUOTACHECK = 18, /* not used since 2.4 */ OST_QUOTACTL = 19, OST_QUOTA_ADJUST_QUNIT = 20, /* not used since 2.4 */ OST_LAST_OPC @@ -1580,8 +1449,6 @@ static inline void lmm_oi_cpu_to_le(struct ost_id *dst_oi, dst_oi->oi.oi_seq = cpu_to_le64(src_oi->oi.oi_seq); } -/* extern void lustre_swab_lov_mds_md(struct lov_mds_md *llm); */ - #define MAX_MD_SIZE \ (sizeof(struct lov_mds_md) + 4 * sizeof(struct lov_ost_data)) #define MIN_MD_SIZE \ @@ -1674,7 +1541,7 @@ lov_mds_md_max_stripe_count(size_t buf_size, __u32 lmm_magic) #define OBD_MD_FLCKSUM (0x00100000ULL) /* bulk data checksum */ #define OBD_MD_FLQOS (0x00200000ULL) /* quality of service stats */ /*#define OBD_MD_FLOSCOPQ (0x00400000ULL) osc opaque data, never used */ -#define OBD_MD_FLCOOKIE (0x00800000ULL) /* log cancellation cookie */ +/* OBD_MD_FLCOOKIE (0x00800000ULL) obsolete in 2.8 */ #define OBD_MD_FLGROUP (0x01000000ULL) /* group */ #define OBD_MD_FLFID (0x02000000ULL) /* ->ost write inline fid */ #define OBD_MD_FLEPOCH (0x04000000ULL) /* ->ost write with ioepoch */ @@ -1713,7 +1580,9 @@ lov_mds_md_max_stripe_count(size_t buf_size, __u32 lmm_magic) /* OBD_MD_FLRMTRGETFACL (0x0008000000000000ULL) lfs rgetfacl, obsolete */ #define OBD_MD_FLDATAVERSION (0x0010000000000000ULL) /* iversion sum */ -#define OBD_MD_FLRELEASED (0x0020000000000000ULL) /* file released */ +#define OBD_MD_CLOSE_INTENT_EXECED (0x0020000000000000ULL) /* close intent + * executed + */ #define OBD_MD_DEFAULT_MEA (0x0040000000000000ULL) /* default MEA */ @@ -1742,11 +1611,6 @@ struct hsm_state_set { __u64 hss_clearmask; }; -void lustre_swab_hsm_user_state(struct hsm_user_state *hus); -void lustre_swab_hsm_state_set(struct hsm_state_set *hss); - -void lustre_swab_obd_statfs(struct obd_statfs *os); - /* ost_body.data values for OST_BRW */ #define OBD_BRW_READ 0x01 @@ -1786,14 +1650,16 @@ struct obd_ioobj { __u32 ioo_bufcnt; /* number of niobufs for this object */ }; +/* + * NOTE: IOOBJ_MAX_BRW_BITS defines the _offset_ of the max_brw field in + * ioo_max_brw, NOT the maximum number of bits in PTLRPC_BULK_OPS_BITS. + * That said, ioo_max_brw is a 32-bit field so the limit is also 16 bits. + */ #define IOOBJ_MAX_BRW_BITS 16 -#define IOOBJ_TYPE_MASK ((1U << IOOBJ_MAX_BRW_BITS) - 1) #define ioobj_max_brw_get(ioo) (((ioo)->ioo_max_brw >> IOOBJ_MAX_BRW_BITS) + 1) #define ioobj_max_brw_set(ioo, num) \ do { (ioo)->ioo_max_brw = ((num) - 1) << IOOBJ_MAX_BRW_BITS; } while (0) -void lustre_swab_obd_ioobj(struct obd_ioobj *ioo); - /* multiple of 8 bytes => can array */ struct niobuf_remote { __u64 rnb_offset; @@ -1801,8 +1667,6 @@ struct niobuf_remote { __u32 rnb_flags; }; -void lustre_swab_niobuf_remote(struct niobuf_remote *nbr); - /* lock value block communicated between the filter and llite */ /* OST_LVB_ERR_INIT is needed because the return code in rc is @@ -1824,8 +1688,6 @@ struct ost_lvb_v1 { __u64 lvb_blocks; }; -void lustre_swab_ost_lvb_v1(struct ost_lvb_v1 *lvb); - struct ost_lvb { __u64 lvb_size; __s64 lvb_mtime; @@ -1838,8 +1700,6 @@ struct ost_lvb { __u32 lvb_padding; }; -void lustre_swab_ost_lvb(struct ost_lvb *lvb); - /* * lquota data structures */ @@ -1866,8 +1726,6 @@ struct obd_quotactl { struct obd_dqblk qc_dqblk; }; -void lustre_swab_obd_quotactl(struct obd_quotactl *q); - #define Q_COPY(out, in, member) (out)->member = (in)->member #define QCTL_COPY(out, in) \ @@ -1905,8 +1763,6 @@ struct lquota_lvb { __u64 lvb_pad1; }; -void lustre_swab_lquota_lvb(struct lquota_lvb *lvb); - /* op codes */ enum quota_cmd { QUOTA_DQACQ = 601, @@ -1933,9 +1789,9 @@ enum mds_cmd { MDS_PIN = 42, /* obsolete, never used in a release */ MDS_UNPIN = 43, /* obsolete, never used in a release */ MDS_SYNC = 44, - MDS_DONE_WRITING = 45, + MDS_DONE_WRITING = 45, /* obsolete since 2.8.0 */ MDS_SET_INFO = 46, - MDS_QUOTACHECK = 47, + MDS_QUOTACHECK = 47, /* not used since 2.4 */ MDS_QUOTACTL = 48, MDS_GETXATTR = 49, MDS_SETXATTR = 50, /* obsolete, now it's MDS_REINT op */ @@ -1972,8 +1828,6 @@ enum mdt_reint_cmd { REINT_MAX }; -void lustre_swab_generic_32s(__u32 *val); - /* the disposition of the intent outlines what was executed */ #define DISP_IT_EXECD 0x00000001 #define DISP_LOOKUP_EXECD 0x00000002 @@ -2031,36 +1885,19 @@ enum { #define MDS_STATUS_CONN 1 #define MDS_STATUS_LOV 2 -/* mdt_thread_info.mti_flags. */ -enum md_op_flags { - /* The flag indicates Size-on-MDS attributes are changed. */ - MF_SOM_CHANGE = (1 << 0), - /* Flags indicates an epoch opens or closes. */ - MF_EPOCH_OPEN = (1 << 1), - MF_EPOCH_CLOSE = (1 << 2), - MF_MDC_CANCEL_FID1 = (1 << 3), - MF_MDC_CANCEL_FID2 = (1 << 4), - MF_MDC_CANCEL_FID3 = (1 << 5), - MF_MDC_CANCEL_FID4 = (1 << 6), - /* There is a pending attribute update. */ - MF_SOM_AU = (1 << 7), - /* Cancel OST locks while getattr OST attributes. */ - MF_GETATTR_LOCK = (1 << 8), - MF_GET_MDT_IDX = (1 << 9), -}; - -#define MF_SOM_LOCAL_FLAGS (MF_SOM_CHANGE | MF_EPOCH_OPEN | MF_EPOCH_CLOSE) - -#define LUSTRE_BFLAG_UNCOMMITTED_WRITES 0x1 - /* these should be identical to their EXT4_*_FL counterparts, they are * redefined here only to avoid dragging in fs/ext4/ext4.h */ #define LUSTRE_SYNC_FL 0x00000008 /* Synchronous updates */ #define LUSTRE_IMMUTABLE_FL 0x00000010 /* Immutable file */ #define LUSTRE_APPEND_FL 0x00000020 /* writes to file may only append */ +#define LUSTRE_NODUMP_FL 0x00000040 /* do not dump file */ #define LUSTRE_NOATIME_FL 0x00000080 /* do not update atime */ +#define LUSTRE_INDEX_FL 0x00001000 /* hash-indexed directory */ #define LUSTRE_DIRSYNC_FL 0x00010000 /* dirsync behaviour (dir only) */ +#define LUSTRE_TOPDIR_FL 0x00020000 /* Top of directory hierarchies*/ +#define LUSTRE_DIRECTIO_FL 0x00100000 /* Use direct i/o */ +#define LUSTRE_INLINE_DATA_FL 0x10000000 /* Inode has inline data. */ /* Convert wire LUSTRE_*_FL to corresponding client local VFS S_* values * for the client inode i_flags. The LUSTRE_*_FL are the Lustre wire @@ -2113,7 +1950,7 @@ struct mdt_body { __u32 mbo_mode; __u32 mbo_uid; __u32 mbo_gid; - __u32 mbo_flags; + __u32 mbo_flags; /* LUSTRE_*_FL file attributes */ __u32 mbo_rdev; __u32 mbo_nlink; /* #bytes to read in the case of MDS_READPAGE */ __u32 mbo_unused2; /* was "generation" until 2.4.0 */ @@ -2121,7 +1958,7 @@ struct mdt_body { __u32 mbo_eadatasize; __u32 mbo_aclsize; __u32 mbo_max_mdsize; - __u32 mbo_max_cookiesize; + __u32 mbo_unused3; /* was max_cookiesize until 2.8 */ __u32 mbo_uid_h; /* high 32-bits of uid, for FUID */ __u32 mbo_gid_h; /* high 32-bits of gid, for FUID */ __u32 mbo_padding_5; /* also fix lustre_swab_mdt_body */ @@ -2132,17 +1969,13 @@ struct mdt_body { __u64 mbo_padding_10; }; /* 216 */ -void lustre_swab_mdt_body(struct mdt_body *b); - struct mdt_ioepoch { - struct lustre_handle handle; - __u64 ioepoch; - __u32 flags; - __u32 padding; + struct lustre_handle mio_handle; + __u64 mio_unused1; /* was ioepoch */ + __u32 mio_unused2; /* was flags */ + __u32 mio_padding; }; -void lustre_swab_mdt_ioepoch(struct mdt_ioepoch *b); - /* permissions for md_perm.mp_perm */ enum { CFS_SETUID_PERM = 0x01, @@ -2178,8 +2011,6 @@ struct mdt_rec_setattr { __u32 sa_padding_5; }; -void lustre_swab_mdt_rec_setattr(struct mdt_rec_setattr *sa); - /* * Attribute flags used in mdt_rec_setattr::sa_valid. * The kernel's #defines for ATTR_* should not be used over the network @@ -2207,12 +2038,9 @@ void lustre_swab_mdt_rec_setattr(struct mdt_rec_setattr *sa); #define MDS_FMODE_CLOSED 00000000 #define MDS_FMODE_EXEC 00000004 -/* IO Epoch is opened on a closed file. */ -#define MDS_FMODE_EPOCH 01000000 -/* IO Epoch is opened on a file truncate. */ -#define MDS_FMODE_TRUNC 02000000 -/* Size-on-MDS Attribute Update is pending. */ -#define MDS_FMODE_SOM 04000000 +/* MDS_FMODE_EPOCH 01000000 obsolete since 2.8.0 */ +/* MDS_FMODE_TRUNC 02000000 obsolete since 2.8.0 */ +/* MDS_FMODE_SOM 04000000 obsolete since 2.8.0 */ #define MDS_OPEN_CREATED 00000010 #define MDS_OPEN_CROSS 00000020 @@ -2258,7 +2086,7 @@ enum mds_op_bias { MDS_CROSS_REF = 1 << 1, MDS_VTX_BYPASS = 1 << 2, MDS_PERM_BYPASS = 1 << 3, - MDS_SOM = 1 << 4, +/* MDS_SOM = 1 << 4, obsolete since 2.8.0 */ MDS_QUOTA_IGNORE = 1 << 5, MDS_CLOSE_CLEANUP = 1 << 6, MDS_KEEP_ORPHAN = 1 << 7, @@ -2268,6 +2096,7 @@ enum mds_op_bias { MDS_OWNEROVERRIDE = 1 << 11, MDS_HSM_RELEASE = 1 << 12, MDS_RENAME_MIGRATE = BIT(13), + MDS_CLOSE_LAYOUT_SWAP = BIT(14), }; /* instance of mdt_reint_rec */ @@ -2456,8 +2285,6 @@ struct mdt_rec_reint { __u32 rr_padding_4; /* also fix lustre_swab_mdt_rec_reint */ }; -void lustre_swab_mdt_rec_reint(struct mdt_rec_reint *rr); - /* lmv structures */ struct lmv_desc { __u32 ld_tgt_count; /* how many MDS's */ @@ -2547,8 +2374,6 @@ union lmv_mds_md { struct lmv_user_md lmv_user_md; }; -void lustre_swab_lmv_mds_md(union lmv_mds_md *lmm); - static inline ssize_t lmv_mds_md_size(int stripe_count, unsigned int lmm_magic) { ssize_t len = -EINVAL; @@ -2652,8 +2477,6 @@ struct lov_desc { #define ld_magic ld_active_tgt_count /* for swabbing from llogs */ -void lustre_swab_lov_desc(struct lov_desc *ld); - /* * LDLM requests: */ @@ -2749,24 +2572,38 @@ struct ldlm_flock_wire { * on the resource type. */ -typedef union { +union ldlm_wire_policy_data { struct ldlm_extent l_extent; struct ldlm_flock_wire l_flock; struct ldlm_inodebits l_inodebits; -} ldlm_wire_policy_data_t; +}; union ldlm_gl_desc { struct ldlm_gl_lquota_desc lquota_desc; }; -void lustre_swab_gl_desc(union ldlm_gl_desc *); +enum ldlm_intent_flags { + IT_OPEN = BIT(0), + IT_CREAT = BIT(1), + IT_OPEN_CREAT = BIT(1) | BIT(0), + IT_READDIR = BIT(2), + IT_GETATTR = BIT(3), + IT_LOOKUP = BIT(4), + IT_UNLINK = BIT(5), + IT_TRUNC = BIT(6), + IT_GETXATTR = BIT(7), + IT_EXEC = BIT(8), + IT_PIN = BIT(9), + IT_LAYOUT = BIT(10), + IT_QUOTA_DQACQ = BIT(11), + IT_QUOTA_CONN = BIT(12), + IT_SETXATTR = BIT(13), +}; struct ldlm_intent { __u64 opc; }; -void lustre_swab_ldlm_intent(struct ldlm_intent *i); - struct ldlm_resource_desc { enum ldlm_type lr_type; __u32 lr_padding; /* also fix lustre_swab_ldlm_resource_desc */ @@ -2777,7 +2614,7 @@ struct ldlm_lock_desc { struct ldlm_resource_desc l_resource; enum ldlm_mode l_req_mode; enum ldlm_mode l_granted_mode; - ldlm_wire_policy_data_t l_policy_data; + union ldlm_wire_policy_data l_policy_data; }; #define LDLM_LOCKREQ_HANDLES 2 @@ -2790,8 +2627,6 @@ struct ldlm_request { struct lustre_handle lock_handle[LDLM_LOCKREQ_HANDLES]; }; -void lustre_swab_ldlm_request(struct ldlm_request *rq); - /* If LDLM_ENQUEUE, 1 slot is already occupied, 1 is available. * Otherwise, 2 are available. */ @@ -2813,8 +2648,6 @@ struct ldlm_reply { __u64 lock_policy_res2; }; -void lustre_swab_ldlm_reply(struct ldlm_reply *r); - #define ldlm_flags_to_wire(flags) ((__u32)(flags)) #define ldlm_flags_from_wire(flags) ((__u64)(flags)) @@ -2858,8 +2691,6 @@ struct mgs_target_info { char mti_params[MTI_PARAM_MAXLEN]; }; -void lustre_swab_mgs_target_info(struct mgs_target_info *oinfo); - struct mgs_nidtbl_entry { __u64 mne_version; /* table version of this entry */ __u32 mne_instance; /* target instance # */ @@ -2874,8 +2705,6 @@ struct mgs_nidtbl_entry { } u; }; -void lustre_swab_mgs_nidtbl_entry(struct mgs_nidtbl_entry *oinfo); - struct mgs_config_body { char mcb_name[MTI_NAME_MAXLEN]; /* logname */ __u64 mcb_offset; /* next index of config log to request */ @@ -2885,15 +2714,11 @@ struct mgs_config_body { __u32 mcb_units; /* # of units for bulk transfer */ }; -void lustre_swab_mgs_config_body(struct mgs_config_body *body); - struct mgs_config_res { __u64 mcr_offset; /* index of last config log */ __u64 mcr_size; /* size of the log */ }; -void lustre_swab_mgs_config_res(struct mgs_config_res *body); - /* Config marker flags (in config log) */ #define CM_START 0x01 #define CM_END 0x02 @@ -2913,8 +2738,6 @@ struct cfg_marker { char cm_comment[MTI_NAME_MAXLEN]; }; -void lustre_swab_cfg_marker(struct cfg_marker *marker, int swab, int size); - /* * Opcodes for multiple servers. */ @@ -2922,7 +2745,7 @@ void lustre_swab_cfg_marker(struct cfg_marker *marker, int swab, int size); enum obd_cmd { OBD_PING = 400, OBD_LOG_CANCEL, - OBD_QC_CALLBACK, + OBD_QC_CALLBACK, /* not used since 2.4 */ OBD_IDX_READ, OBD_LAST_OPC }; @@ -3155,23 +2978,32 @@ struct llog_gen_rec { struct llog_rec_tail lgr_tail; }; -/* On-disk header structure of each log object, stored in little endian order */ -#define LLOG_CHUNK_SIZE 8192 -#define LLOG_HEADER_SIZE (96) -#define LLOG_BITMAP_BYTES (LLOG_CHUNK_SIZE - LLOG_HEADER_SIZE) - -#define LLOG_MIN_REC_SIZE (24) /* round(llog_rec_hdr + llog_rec_tail) */ - /* flags for the logs */ enum llog_flag { LLOG_F_ZAP_WHEN_EMPTY = 0x1, LLOG_F_IS_CAT = 0x2, LLOG_F_IS_PLAIN = 0x4, LLOG_F_EXT_JOBID = BIT(3), + LLOG_F_IS_FIXSIZE = BIT(4), + /* + * Note: Flags covered by LLOG_F_EXT_MASK will be inherited from + * catlog to plain log, so do not add LLOG_F_IS_FIXSIZE here, + * because the catlog record is usually fixed size, but its plain + * log record can be variable + */ LLOG_F_EXT_MASK = LLOG_F_EXT_JOBID, }; +/* On-disk header structure of each log object, stored in little endian order */ +#define LLOG_MIN_CHUNK_SIZE 8192 +#define LLOG_HEADER_SIZE (96) /* sizeof (llog_log_hdr) + + * sizeof(llh_tail) - sizeof(llh_bitmap) + */ +#define LLOG_BITMAP_BYTES (LLOG_MIN_CHUNK_SIZE - LLOG_HEADER_SIZE) +#define LLOG_MIN_REC_SIZE (24) /* round(llog_rec_hdr + llog_rec_tail) */ + +/* flags for the logs */ struct llog_log_hdr { struct llog_rec_hdr llh_hdr; __s64 llh_timestamp; @@ -3183,13 +3015,30 @@ struct llog_log_hdr { /* for a catalog the first plain slot is next to it */ struct obd_uuid llh_tgtuuid; __u32 llh_reserved[LLOG_HEADER_SIZE / sizeof(__u32) - 23]; + /* These fields must always be at the end of the llog_log_hdr. + * Note: llh_bitmap size is variable because llog chunk size could be + * bigger than LLOG_MIN_CHUNK_SIZE, i.e. sizeof(llog_log_hdr) > 8192 + * bytes, and the real size is stored in llh_hdr.lrh_len, which means + * llh_tail should only be referred by LLOG_HDR_TAIL(). + * But this structure is also used by client/server llog interface + * (see llog_client.c), it will be kept in its original way to avoid + * compatibility issue. + */ __u32 llh_bitmap[LLOG_BITMAP_BYTES / sizeof(__u32)]; struct llog_rec_tail llh_tail; } __packed; -#define LLOG_BITMAP_SIZE(llh) (__u32)((llh->llh_hdr.lrh_len - \ - llh->llh_bitmap_offset - \ - sizeof(llh->llh_tail)) * 8) +#undef LLOG_HEADER_SIZE +#undef LLOG_BITMAP_BYTES + +#define LLOG_HDR_BITMAP_SIZE(llh) (__u32)((llh->llh_hdr.lrh_len - \ + llh->llh_bitmap_offset - \ + sizeof(llh->llh_tail)) * 8) +#define LLOG_HDR_BITMAP(llh) (__u32 *)((char *)(llh) + \ + (llh)->llh_bitmap_offset) +#define LLOG_HDR_TAIL(llh) ((struct llog_rec_tail *)((char *)llh + \ + llh->llh_hdr.lrh_len - \ + sizeof(llh->llh_tail))) /** log cookies are used to reference a specific log file and a record * therein @@ -3259,7 +3108,8 @@ struct obdo { __u32 o_parent_ver; struct lustre_handle o_handle; /* brw: lock handle to prolong locks */ - struct llog_cookie o_lcookie; /* destroy: unlink cookie from MDS + struct llog_cookie o_lcookie; /* destroy: unlink cookie from MDS, + * obsolete in 2.8, reused in OSP */ __u32 o_uid_h; __u32 o_gid_h; @@ -3333,30 +3183,11 @@ struct ost_body { /* Key for FIEMAP to be used in get_info calls */ struct ll_fiemap_info_key { - char name[8]; - struct obdo oa; - struct ll_user_fiemap fiemap; + char lfik_name[8]; + struct obdo lfik_oa; + struct fiemap lfik_fiemap; }; -void lustre_swab_ost_body(struct ost_body *b); -void lustre_swab_ost_last_id(__u64 *id); -void lustre_swab_fiemap(struct ll_user_fiemap *fiemap); - -void lustre_swab_lov_user_md_v1(struct lov_user_md_v1 *lum); -void lustre_swab_lov_user_md_v3(struct lov_user_md_v3 *lum); -void lustre_swab_lov_user_md_objects(struct lov_user_ost_data *lod, - int stripe_count); -void lustre_swab_lov_mds_md(struct lov_mds_md *lmm); - -/* llog_swab.c */ -void lustre_swab_llogd_body(struct llogd_body *d); -void lustre_swab_llog_hdr(struct llog_log_hdr *h); -void lustre_swab_llogd_conn_body(struct llogd_conn_body *d); -void lustre_swab_llog_rec(struct llog_rec_hdr *rec); - -struct lustre_cfg; -void lustre_swab_lustre_cfg(struct lustre_cfg *lcfg); - /* Functions for dumping PTLRPC fields */ void dump_rniobuf(struct niobuf_remote *rnb); void dump_ioo(struct obd_ioobj *nb); @@ -3394,8 +3225,6 @@ struct lustre_capa { __u8 lc_hmac[CAPA_HMAC_MAX_LEN]; /** HMAC */ } __packed; -void lustre_swab_lustre_capa(struct lustre_capa *c); - /** lustre_capa::lc_opc */ enum { CAPA_OPC_BODY_WRITE = 1 << 0, /**< write object data */ @@ -3458,8 +3287,6 @@ struct getinfo_fid2path { char gf_path[0]; } __packed; -void lustre_swab_fid2path(struct getinfo_fid2path *gf); - /** path2parent request/reply structures */ struct getparent { struct lu_fid gp_fid; /**< parent FID */ @@ -3486,8 +3313,6 @@ struct layout_intent { __u64 li_end; }; -void lustre_swab_layout_intent(struct layout_intent *li); - /** * On the wire version of hsm_progress structure. * @@ -3506,13 +3331,6 @@ struct hsm_progress_kernel { __u64 hpk_padding2; } __packed; -void lustre_swab_hsm_user_state(struct hsm_user_state *hus); -void lustre_swab_hsm_current_action(struct hsm_current_action *action); -void lustre_swab_hsm_progress_kernel(struct hsm_progress_kernel *hpk); -void lustre_swab_hsm_user_state(struct hsm_user_state *hus); -void lustre_swab_hsm_user_item(struct hsm_user_item *hui); -void lustre_swab_hsm_request(struct hsm_request *hr); - /** layout swap request structure * fid1 and fid2 are in mdt_body */ @@ -3520,8 +3338,6 @@ struct mdc_swap_layouts { __u64 msl_flags; } __packed; -void lustre_swab_swap_layouts(struct mdc_swap_layouts *msl); - struct close_data { struct lustre_handle cd_handle; struct lu_fid cd_fid; @@ -3529,7 +3345,5 @@ struct close_data { __u64 cd_reserved[8]; }; -void lustre_swab_close_data(struct close_data *data); - #endif /** @} lustreidl */ diff --git a/drivers/staging/lustre/lustre/include/lustre/lustre_ioctl.h b/drivers/staging/lustre/lustre/include/lustre/lustre_ioctl.h index f3d7c94c3b50..eb08df33b2db 100644 --- a/drivers/staging/lustre/lustre/include/lustre/lustre_ioctl.h +++ b/drivers/staging/lustre/lustre/include/lustre/lustre_ioctl.h @@ -363,8 +363,8 @@ obd_ioctl_unpack(struct obd_ioctl_data *data, char *pbuf, int max_len) /* OBD_IOC_LOV_GETSTRIPE 155 LL_IOC_LOV_GETSTRIPE */ /* OBD_IOC_LOV_SETEA 156 LL_IOC_LOV_SETEA */ /* lustre/lustre_user.h 157-159 */ -#define OBD_IOC_QUOTACHECK _IOW('f', 160, int) -#define OBD_IOC_POLL_QUOTACHECK _IOR('f', 161, struct if_quotacheck *) +/* OBD_IOC_QUOTACHECK _IOW('f', 160, int) */ +/* OBD_IOC_POLL_QUOTACHECK _IOR('f', 161, struct if_quotacheck *) */ #define OBD_IOC_QUOTACTL _IOWR('f', 162, struct if_quotactl) /* lustre/lustre_user.h 163-176 */ #define OBD_IOC_CHANGELOG_REG _IOW('f', 177, struct obd_ioctl_data) diff --git a/drivers/staging/lustre/lustre/include/lustre/lustre_user.h b/drivers/staging/lustre/lustre/include/lustre/lustre_user.h index 6fc985571cba..3301ad652db1 100644 --- a/drivers/staging/lustre/lustre/include/lustre/lustre_user.h +++ b/drivers/staging/lustre/lustre/include/lustre/lustre_user.h @@ -63,9 +63,13 @@ #if __BITS_PER_LONG != 64 || defined(__ARCH_WANT_STAT64) typedef struct stat64 lstat_t; #define lstat_f lstat64 +#define fstat_f fstat64 +#define fstatat_f fstatat64 #else typedef struct stat lstat_t; #define lstat_f lstat +#define fstat_f fstat +#define fstatat_f fstatat #endif #define HAVE_LOV_USER_MDS_DATA @@ -82,7 +86,6 @@ typedef struct stat lstat_t; #define FSFILT_IOC_SETVERSION _IOW('f', 4, long) #define FSFILT_IOC_GETVERSION_OLD _IOR('v', 1, long) #define FSFILT_IOC_SETVERSION_OLD _IOW('v', 2, long) -#define FSFILT_IOC_FIEMAP _IOWR('f', 11, struct ll_user_fiemap) #endif /* FIEMAP flags supported by Lustre */ @@ -235,7 +238,7 @@ struct ost_id { /* #define LL_IOC_POLL_QUOTACHECK 161 OBD_IOC_POLL_QUOTACHECK */ /* #define LL_IOC_QUOTACTL 162 OBD_IOC_QUOTACTL */ #define IOC_OBD_STATFS _IOWR('f', 164, struct obd_statfs *) -#define IOC_LOV_GETINFO _IOWR('f', 165, struct lov_user_mds_data *) +/* IOC_LOV_GETINFO 165 obsolete */ #define LL_IOC_FLUSHCTX _IOW('f', 166, long) /* LL_IOC_RMTACL 167 obsolete */ #define LL_IOC_GETOBDCOUNT _IOR('f', 168, long) @@ -343,6 +346,9 @@ enum ll_lease_type { #define LOV_ALL_STRIPES 0xffff /* only valid for directories */ #define LOV_V1_INSANE_STRIPE_COUNT 65532 /* maximum stripe count bz13933 */ +#define XATTR_LUSTRE_PREFIX "lustre." +#define XATTR_LUSTRE_LOV "lustre.lov" + #define lov_user_ost_data lov_user_ost_data_v1 struct lov_user_ost_data_v1 { /* per-stripe data structure */ struct ost_id l_ost_oi; /* OST object ID */ @@ -451,8 +457,6 @@ static inline int lmv_user_md_size(int stripes, int lmm_magic) stripes * sizeof(struct lmv_user_mds_data); } -void lustre_swab_lmv_user_md(struct lmv_user_md *lum); - struct ll_recreate_obj { __u64 lrc_id; __u32 lrc_ost_idx; @@ -522,25 +526,20 @@ static inline void obd_uuid2fsname(char *buf, char *uuid, int buflen) } /* printf display format - * e.g. printf("file FID is "DFID"\n", PFID(fid)); + * * usage: printf("file FID is "DFID"\n", PFID(fid)); */ #define FID_NOBRACE_LEN 40 #define FID_LEN (FID_NOBRACE_LEN + 2) #define DFID_NOBRACE "%#llx:0x%x:0x%x" #define DFID "["DFID_NOBRACE"]" -#define PFID(fid) \ - (fid)->f_seq, \ - (fid)->f_oid, \ - (fid)->f_ver +#define PFID(fid) (unsigned long long)(fid)->f_seq, (fid)->f_oid, (fid)->f_ver -/* scanf input parse format -- strip '[' first. - * e.g. sscanf(fidstr, SFID, RFID(&fid)); +/* scanf input parse format for fids in DFID_NOBRACE format + * Need to strip '[' from DFID format first or use "["SFID"]" at caller. + * usage: sscanf(fidstr, SFID, RFID(&fid)); */ #define SFID "0x%llx:0x%x:0x%x" -#define RFID(fid) \ - &((fid)->f_seq), \ - &((fid)->f_oid), \ - &((fid)->f_ver) +#define RFID(fid) &((fid)->f_seq), &((fid)->f_oid), &((fid)->f_ver) /********* Quotas **********/ @@ -551,23 +550,18 @@ static inline void obd_uuid2fsname(char *buf, char *uuid, int buflen) #define Q_FINVALIDATE 0x800104 /* deprecated as of 2.4 */ /* these must be explicitly translated into linux Q_* in ll_dir_ioctl */ -#define LUSTRE_Q_QUOTAON 0x800002 /* turn quotas on */ -#define LUSTRE_Q_QUOTAOFF 0x800003 /* turn quotas off */ +#define LUSTRE_Q_QUOTAON 0x800002 /* deprecated as of 2.4 */ +#define LUSTRE_Q_QUOTAOFF 0x800003 /* deprecated as of 2.4 */ #define LUSTRE_Q_GETINFO 0x800005 /* get information about quota files */ #define LUSTRE_Q_SETINFO 0x800006 /* set information about quota files */ #define LUSTRE_Q_GETQUOTA 0x800007 /* get user quota structure */ #define LUSTRE_Q_SETQUOTA 0x800008 /* set user quota structure */ /* lustre-specific control commands */ -#define LUSTRE_Q_INVALIDATE 0x80000b /* invalidate quota data */ -#define LUSTRE_Q_FINVALIDATE 0x80000c /* invalidate filter quota data */ +#define LUSTRE_Q_INVALIDATE 0x80000b /* deprecated as of 2.4 */ +#define LUSTRE_Q_FINVALIDATE 0x80000c /* deprecated as of 2.4 */ #define UGQUOTA 2 /* set both USRQUOTA and GRPQUOTA */ -struct if_quotacheck { - char obd_type[16]; - struct obd_uuid obd_uuid; -}; - #define IDENTITY_DOWNCALL_MAGIC 0x6d6dd629 /* permission */ @@ -649,6 +643,7 @@ struct if_quotactl { #define SWAP_LAYOUTS_CHECK_DV2 (1 << 1) #define SWAP_LAYOUTS_KEEP_MTIME (1 << 2) #define SWAP_LAYOUTS_KEEP_ATIME (1 << 3) +#define SWAP_LAYOUTS_CLOSE BIT(4) /* Swap XATTR_NAME_HSM as well, only on the MDT so far */ #define SWAP_LAYOUTS_MDS_HSM (1 << 31) @@ -999,6 +994,7 @@ struct ioc_data_version { * See HSM_FLAGS below. */ enum hsm_states { + HS_NONE = 0x00000000, HS_EXISTS = 0x00000001, HS_DIRTY = 0x00000002, HS_RELEASED = 0x00000004, diff --git a/drivers/staging/lustre/lustre/include/lustre_compat.h b/drivers/staging/lustre/lustre/include/lustre_compat.h index 567c438e93cb..300e96fb032a 100644 --- a/drivers/staging/lustre/lustre/include/lustre_compat.h +++ b/drivers/staging/lustre/lustre/include/lustre_compat.h @@ -74,4 +74,6 @@ # define ext2_find_next_zero_bit find_next_zero_bit_le #endif +#define TIMES_SET_FLAGS (ATTR_MTIME_SET | ATTR_ATIME_SET | ATTR_TIMES_SET) + #endif /* _LUSTRE_COMPAT_H */ diff --git a/drivers/staging/lustre/lustre/include/lustre_dlm.h b/drivers/staging/lustre/lustre/include/lustre_dlm.h index d03534432624..b7e61d082e55 100644 --- a/drivers/staging/lustre/lustre/include/lustre_dlm.h +++ b/drivers/staging/lustre/lustre/include/lustre_dlm.h @@ -59,7 +59,7 @@ struct obd_device; #define OBD_LDLM_DEVICENAME "ldlm" #define LDLM_DEFAULT_LRU_SIZE (100 * num_online_cpus()) -#define LDLM_DEFAULT_MAX_ALIVE (cfs_time_seconds(36000)) +#define LDLM_DEFAULT_MAX_ALIVE (cfs_time_seconds(3900)) /* 65 min */ #define LDLM_DEFAULT_PARALLEL_AST_LIMIT 1024 /** @@ -86,10 +86,10 @@ enum ldlm_error { * decisions about lack of conflicts or do any autonomous lock granting without * first speaking to a server. */ -typedef enum { +enum ldlm_side { LDLM_NAMESPACE_SERVER = 1 << 0, LDLM_NAMESPACE_CLIENT = 1 << 1 -} ldlm_side_t; +}; /** * The blocking callback is overloaded to perform two functions. These flags @@ -359,7 +359,7 @@ struct ldlm_namespace { struct obd_device *ns_obd; /** Flag indicating if namespace is on client instead of server */ - ldlm_side_t ns_client; + enum ldlm_side ns_client; /** Resource hash table for namespace. */ struct cfs_hash *ns_rs_hash; @@ -550,20 +550,18 @@ struct ldlm_flock { __u64 owner; __u64 blocking_owner; struct obd_export *blocking_export; - /* Protected by the hash lock */ - __u32 blocking_refs; __u32 pid; }; -typedef union { +union ldlm_policy_data { struct ldlm_extent l_extent; struct ldlm_flock l_flock; struct ldlm_inodebits l_inodebits; -} ldlm_policy_data_t; +}; void ldlm_convert_policy_to_local(struct obd_export *exp, enum ldlm_type type, - const ldlm_wire_policy_data_t *wpolicy, - ldlm_policy_data_t *lpolicy); + const union ldlm_wire_policy_data *wpolicy, + union ldlm_policy_data *lpolicy); enum lvb_type { LVB_T_NONE = 0, @@ -692,7 +690,7 @@ struct ldlm_lock { * Representation of private data specific for a lock type. * Examples are: extent range for extent lock or bitmask for ibits locks */ - ldlm_policy_data_t l_policy_data; + union ldlm_policy_data l_policy_data; /** * Lock state flags. Protected by lr_lock. @@ -967,8 +965,8 @@ struct ldlm_ast_work { * Common ldlm_enqueue parameters */ struct ldlm_enqueue_info { - __u32 ei_type; /** Type of the lock being enqueued. */ - __u32 ei_mode; /** Mode of the lock being enqueued. */ + enum ldlm_type ei_type; /** Type of the lock being enqueued. */ + enum ldlm_mode ei_mode; /** Mode of the lock being enqueued. */ void *ei_cb_bl; /** blocking lock callback */ void *ei_cb_cp; /** lock completion callback */ void *ei_cb_gl; /** lock glimpse callback */ @@ -979,7 +977,7 @@ struct ldlm_enqueue_info { extern struct obd_ops ldlm_obd_ops; extern char *ldlm_lockname[]; -char *ldlm_it2str(int it); +const char *ldlm_it2str(enum ldlm_intent_flags it); /** * Just a fancy CDEBUG call with log level preset to LDLM_DEBUG. @@ -1168,16 +1166,18 @@ do { \ struct ldlm_lock *ldlm_lock_get(struct ldlm_lock *lock); void ldlm_lock_put(struct ldlm_lock *lock); void ldlm_lock2desc(struct ldlm_lock *lock, struct ldlm_lock_desc *desc); -void ldlm_lock_addref(const struct lustre_handle *lockh, __u32 mode); -int ldlm_lock_addref_try(const struct lustre_handle *lockh, __u32 mode); -void ldlm_lock_decref(const struct lustre_handle *lockh, __u32 mode); -void ldlm_lock_decref_and_cancel(const struct lustre_handle *lockh, __u32 mode); +void ldlm_lock_addref(const struct lustre_handle *lockh, enum ldlm_mode mode); +int ldlm_lock_addref_try(const struct lustre_handle *lockh, + enum ldlm_mode mode); +void ldlm_lock_decref(const struct lustre_handle *lockh, enum ldlm_mode mode); +void ldlm_lock_decref_and_cancel(const struct lustre_handle *lockh, + enum ldlm_mode mode); void ldlm_lock_fail_match_locked(struct ldlm_lock *lock); void ldlm_lock_allow_match(struct ldlm_lock *lock); void ldlm_lock_allow_match_locked(struct ldlm_lock *lock); enum ldlm_mode ldlm_lock_match(struct ldlm_namespace *ns, __u64 flags, const struct ldlm_res_id *, - enum ldlm_type type, ldlm_policy_data_t *, + enum ldlm_type type, union ldlm_policy_data *, enum ldlm_mode mode, struct lustre_handle *, int unref); enum ldlm_mode ldlm_revalidate_lock_handle(const struct lustre_handle *lockh, @@ -1189,7 +1189,7 @@ void ldlm_unlink_lock_skiplist(struct ldlm_lock *req); /* resource.c */ struct ldlm_namespace * ldlm_namespace_new(struct obd_device *obd, char *name, - ldlm_side_t client, enum ldlm_appetite apt, + enum ldlm_side client, enum ldlm_appetite apt, enum ldlm_ns_type ns_type); int ldlm_namespace_cleanup(struct ldlm_namespace *ns, __u64 flags); void ldlm_namespace_get(struct ldlm_namespace *ns); @@ -1208,7 +1208,7 @@ void ldlm_resource_add_lock(struct ldlm_resource *res, struct ldlm_lock *lock); void ldlm_resource_unlink_lock(struct ldlm_lock *lock); void ldlm_res2desc(struct ldlm_resource *res, struct ldlm_resource_desc *desc); -void ldlm_dump_all_namespaces(ldlm_side_t client, int level); +void ldlm_dump_all_namespaces(enum ldlm_side client, int level); void ldlm_namespace_dump(int level, struct ldlm_namespace *); void ldlm_resource_dump(int level, struct ldlm_resource *); int ldlm_lock_change_resource(struct ldlm_namespace *, struct ldlm_lock *, @@ -1241,7 +1241,7 @@ int ldlm_completion_ast(struct ldlm_lock *lock, __u64 flags, void *data); int ldlm_cli_enqueue(struct obd_export *exp, struct ptlrpc_request **reqp, struct ldlm_enqueue_info *einfo, const struct ldlm_res_id *res_id, - ldlm_policy_data_t const *policy, __u64 *flags, + union ldlm_policy_data const *policy, __u64 *flags, void *lvb, __u32 lvb_len, enum lvb_type lvb_type, struct lustre_handle *lockh, int async); int ldlm_prep_enqueue_req(struct obd_export *exp, @@ -1265,13 +1265,13 @@ int ldlm_cli_cancel_unused(struct ldlm_namespace *, const struct ldlm_res_id *, enum ldlm_cancel_flags flags, void *opaque); int ldlm_cli_cancel_unused_resource(struct ldlm_namespace *ns, const struct ldlm_res_id *res_id, - ldlm_policy_data_t *policy, + union ldlm_policy_data *policy, enum ldlm_mode mode, enum ldlm_cancel_flags flags, void *opaque); int ldlm_cancel_resource_local(struct ldlm_resource *res, struct list_head *cancels, - ldlm_policy_data_t *policy, + union ldlm_policy_data *policy, enum ldlm_mode mode, __u64 lock_flags, enum ldlm_cancel_flags cancel_flags, void *opaque); @@ -1333,7 +1333,7 @@ int ldlm_pools_init(void); void ldlm_pools_fini(void); int ldlm_pool_init(struct ldlm_pool *pl, struct ldlm_namespace *ns, - int idx, ldlm_side_t client); + int idx, enum ldlm_side client); void ldlm_pool_fini(struct ldlm_pool *pl); void ldlm_pool_add(struct ldlm_pool *pl, struct ldlm_lock *lock); void ldlm_pool_del(struct ldlm_pool *pl, struct ldlm_lock *lock); diff --git a/drivers/staging/lustre/lustre/include/lustre_fid.h b/drivers/staging/lustre/lustre/include/lustre_fid.h index 316780693193..b5a1aadbcb93 100644 --- a/drivers/staging/lustre/lustre/include/lustre_fid.h +++ b/drivers/staging/lustre/lustre/include/lustre_fid.h @@ -150,6 +150,7 @@ #include "../../include/linux/libcfs/libcfs.h" #include "lustre/lustre_idl.h" +#include "seq_range.h" struct lu_env; struct lu_site; diff --git a/drivers/staging/lustre/lustre/include/lustre_fld.h b/drivers/staging/lustre/lustre/include/lustre_fld.h index 932410d3e3cc..6ef1b03cb986 100644 --- a/drivers/staging/lustre/lustre/include/lustre_fld.h +++ b/drivers/staging/lustre/lustre/include/lustre_fld.h @@ -103,8 +103,6 @@ struct lu_client_fld { /** Client fld debugfs entry name. */ char lcf_name[LUSTRE_MDT_MAXNAMELEN]; - - int lcf_flags; }; /* Client methods */ diff --git a/drivers/staging/lustre/lustre/include/lustre_ha.h b/drivers/staging/lustre/lustre/include/lustre_ha.h index cde7ed702c86..dec1e99d594d 100644 --- a/drivers/staging/lustre/lustre/include/lustre_ha.h +++ b/drivers/staging/lustre/lustre/include/lustre_ha.h @@ -53,6 +53,7 @@ void ptlrpc_activate_import(struct obd_import *imp); void ptlrpc_deactivate_import(struct obd_import *imp); void ptlrpc_invalidate_import(struct obd_import *imp); void ptlrpc_fail_import(struct obd_import *imp, __u32 conn_cnt); +void ptlrpc_pinger_force(struct obd_import *imp); /** @} ha */ diff --git a/drivers/staging/lustre/lustre/include/lustre_import.h b/drivers/staging/lustre/lustre/include/lustre_import.h index 5461ba33d90c..f0c931ce1a67 100644 --- a/drivers/staging/lustre/lustre/include/lustre_import.h +++ b/drivers/staging/lustre/lustre/include/lustre_import.h @@ -185,6 +185,11 @@ struct obd_import { struct list_head *imp_replay_cursor; /** @} */ + /** List of not replied requests */ + struct list_head imp_unreplied_list; + /** Known maximal replied XID */ + __u64 imp_known_replied_xid; + /** obd device for this import */ struct obd_device *imp_obd; @@ -294,7 +299,9 @@ struct obd_import { */ imp_force_reconnect:1, /* import has tried to connect with server */ - imp_connect_tried:1; + imp_connect_tried:1, + /* connected but not FULL yet */ + imp_connected:1; __u32 imp_connect_op; struct obd_connect_data imp_connect_data; __u64 imp_connect_flags_orig; diff --git a/drivers/staging/lustre/lustre/include/lustre_lib.h b/drivers/staging/lustre/lustre/include/lustre_lib.h index 6b231913ba2e..27f3148c4344 100644 --- a/drivers/staging/lustre/lustre/include/lustre_lib.h +++ b/drivers/staging/lustre/lustre/include/lustre_lib.h @@ -350,8 +350,6 @@ do { \ l_wait_event_exclusive_head(wq, condition, &lwi); \ }) -#define LIBLUSTRE_CLIENT (0) - /** @} lib */ #endif /* _LUSTRE_LIB_H */ diff --git a/drivers/staging/lustre/lustre/include/lustre_lmv.h b/drivers/staging/lustre/lustre/include/lustre_lmv.h index d7f7afa8dfa7..5aa3645e64dc 100644 --- a/drivers/staging/lustre/lustre/include/lustre_lmv.h +++ b/drivers/staging/lustre/lustre/include/lustre_lmv.h @@ -76,18 +76,7 @@ lsm_md_eq(const struct lmv_stripe_md *lsm1, const struct lmv_stripe_md *lsm2) union lmv_mds_md; -int lmv_unpack_md(struct obd_export *exp, struct lmv_stripe_md **lsmp, - const union lmv_mds_md *lmm, int stripe_count); - -static inline int lmv_alloc_memmd(struct lmv_stripe_md **lsmp, int stripe_count) -{ - return lmv_unpack_md(NULL, lsmp, NULL, stripe_count); -} - -static inline void lmv_free_memmd(struct lmv_stripe_md *lsm) -{ - lmv_unpack_md(NULL, &lsm, NULL, 0); -} +void lmv_free_memmd(struct lmv_stripe_md *lsm); static inline void lmv1_le_to_cpu(struct lmv_mds_md_v1 *lmv_dst, const struct lmv_mds_md_v1 *lmv_src) diff --git a/drivers/staging/lustre/lustre/include/lustre_log.h b/drivers/staging/lustre/lustre/include/lustre_log.h index 995b266932e3..35e37eb1bc2c 100644 --- a/drivers/staging/lustre/lustre/include/lustre_log.h +++ b/drivers/staging/lustre/lustre/include/lustre_log.h @@ -214,6 +214,7 @@ struct llog_handle { spinlock_t lgh_hdr_lock; /* protect lgh_hdr data */ struct llog_logid lgh_id; /* id of this log */ struct llog_log_hdr *lgh_hdr; + size_t lgh_hdr_size; int lgh_last_idx; int lgh_cur_idx; /* used during llog_process */ __u64 lgh_cur_offset; /* used during llog_process */ @@ -244,6 +245,11 @@ struct llog_ctxt { struct mutex loc_mutex; /* protect loc_imp */ atomic_t loc_refcount; long loc_flags; /* flags, see above defines */ + /* + * llog chunk size, and llog record size can not be bigger than + * loc_chunk_size + */ + __u32 loc_chunk_size; }; #define LLOG_PROC_BREAK 0x0001 diff --git a/drivers/staging/lustre/lustre/include/lustre_mdc.h b/drivers/staging/lustre/lustre/include/lustre_mdc.h index 8fc2d3f2dfd6..198ceb0c66f9 100644 --- a/drivers/staging/lustre/lustre/include/lustre_mdc.h +++ b/drivers/staging/lustre/lustre/include/lustre_mdc.h @@ -156,16 +156,39 @@ static inline void mdc_put_rpc_lock(struct mdc_rpc_lock *lck, mutex_unlock(&lck->rpcl_mutex); } +static inline void mdc_get_mod_rpc_slot(struct ptlrpc_request *req, + struct lookup_intent *it) +{ + struct client_obd *cli = &req->rq_import->imp_obd->u.cli; + u32 opc; + u16 tag; + + opc = lustre_msg_get_opc(req->rq_reqmsg); + tag = obd_get_mod_rpc_slot(cli, opc, it); + lustre_msg_set_tag(req->rq_reqmsg, tag); +} + +static inline void mdc_put_mod_rpc_slot(struct ptlrpc_request *req, + struct lookup_intent *it) +{ + struct client_obd *cli = &req->rq_import->imp_obd->u.cli; + u32 opc; + u16 tag; + + opc = lustre_msg_get_opc(req->rq_reqmsg); + tag = lustre_msg_get_tag(req->rq_reqmsg); + obd_put_mod_rpc_slot(cli, opc, it, tag); +} + /** - * Update the maximum possible easize and cookiesize. + * Update the maximum possible easize. * - * The values are learned from ptlrpc replies sent by the MDT. The - * default easize and cookiesize is initialized to the minimum value but - * allowed to grow up to a single page in size if required to handle the + * This value is learned from ptlrpc replies sent by the MDT. The + * default easize is initialized to the minimum value but allowed + * to grow up to a single page in size if required to handle the * common case. * - * \see client_obd::cl_default_mds_easize and - * client_obd::cl_default_mds_cookiesize + * \see client_obd::cl_default_mds_easize * * \param[in] exp export for MDC device * \param[in] body body of ptlrpc reply from MDT @@ -176,7 +199,7 @@ static inline void mdc_update_max_ea_from_body(struct obd_export *exp, { if (body->mbo_valid & OBD_MD_FLMODEASIZE) { struct client_obd *cli = &exp->exp_obd->u.cli; - u32 def_cookiesize, def_easize; + u32 def_easize; if (cli->cl_max_mds_easize < body->mbo_max_mdsize) cli->cl_max_mds_easize = body->mbo_max_mdsize; @@ -184,13 +207,6 @@ static inline void mdc_update_max_ea_from_body(struct obd_export *exp, def_easize = min_t(__u32, body->mbo_max_mdsize, OBD_MAX_DEFAULT_EA_SIZE); cli->cl_default_mds_easize = def_easize; - - if (cli->cl_max_mds_cookiesize < body->mbo_max_cookiesize) - cli->cl_max_mds_cookiesize = body->mbo_max_cookiesize; - - def_cookiesize = min_t(__u32, body->mbo_max_cookiesize, - OBD_MAX_DEFAULT_COOKIE_SIZE); - cli->cl_default_mds_cookiesize = def_cookiesize; } } diff --git a/drivers/staging/lustre/lustre/include/lustre_net.h b/drivers/staging/lustre/lustre/include/lustre_net.h index e9aba99ee52a..411eb0dc7f38 100644 --- a/drivers/staging/lustre/lustre/include/lustre_net.h +++ b/drivers/staging/lustre/lustre/include/lustre_net.h @@ -50,6 +50,7 @@ * @{ */ +#include <linux/uio.h> #include "../../include/linux/libcfs/libcfs.h" #include "../../include/linux/lnet/nidstr.h" #include "../../include/linux/lnet/api.h" @@ -68,13 +69,17 @@ #define PTLRPC_MD_OPTIONS 0 /** - * Max # of bulk operations in one request. + * log2 max # of bulk operations in one request: 2=4MB/RPC, 5=32MB/RPC, ... * In order for the client and server to properly negotiate the maximum * possible transfer size, PTLRPC_BULK_OPS_COUNT must be a power-of-two * value. The client is free to limit the actual RPC size for any bulk * transfer via cl_max_pages_per_rpc to some non-power-of-two value. + * NOTE: This is limited to 16 (=64GB RPCs) by IOOBJ_MAX_BRW_BITS. */ -#define PTLRPC_BULK_OPS_BITS 2 +#define PTLRPC_BULK_OPS_BITS 4 +#if PTLRPC_BULK_OPS_BITS > 16 +#error "More than 65536 BRW RPCs not allowed by IOOBJ_MAX_BRW_BITS." +#endif #define PTLRPC_BULK_OPS_COUNT (1U << PTLRPC_BULK_OPS_BITS) /** * PTLRPC_BULK_OPS_MASK is for the convenience of the client only, and @@ -437,6 +442,10 @@ struct ptlrpc_reply_state { unsigned long rs_committed:1;/* the transaction was committed * and the rs was dispatched */ + atomic_t rs_refcount; /* number of users */ + /** Number of locks awaiting client ACK */ + int rs_nlocks; + /** Size of the state */ int rs_size; /** opcode */ @@ -449,7 +458,6 @@ struct ptlrpc_reply_state { struct ptlrpc_service_part *rs_svcpt; /** Lnet metadata handle for the reply */ lnet_handle_md_t rs_md_h; - atomic_t rs_refcount; /** Context for the service thread */ struct ptlrpc_svc_ctx *rs_svc_ctx; @@ -466,8 +474,6 @@ struct ptlrpc_reply_state { */ struct lustre_msg *rs_msg; /* reply message */ - /** Number of locks awaiting client ACK */ - int rs_nlocks; /** Handles of locks awaiting client reply ACK */ struct lustre_handle rs_locks[RS_MAX_LOCKS]; /** Lock modes of locks in \a rs_locks */ @@ -515,717 +521,7 @@ struct lu_env; struct ldlm_lock; -/** - * \defgroup nrs Network Request Scheduler - * @{ - */ -struct ptlrpc_nrs_policy; -struct ptlrpc_nrs_resource; -struct ptlrpc_nrs_request; - -/** - * NRS control operations. - * - * These are common for all policies. - */ -enum ptlrpc_nrs_ctl { - /** - * Not a valid opcode. - */ - PTLRPC_NRS_CTL_INVALID, - /** - * Activate the policy. - */ - PTLRPC_NRS_CTL_START, - /** - * Reserved for multiple primary policies, which may be a possibility - * in the future. - */ - PTLRPC_NRS_CTL_STOP, - /** - * Policies can start using opcodes from this value and onwards for - * their own purposes; the assigned value itself is arbitrary. - */ - PTLRPC_NRS_CTL_1ST_POL_SPEC = 0x20, -}; - -/** - * ORR policy operations - */ -enum nrs_ctl_orr { - NRS_CTL_ORR_RD_QUANTUM = PTLRPC_NRS_CTL_1ST_POL_SPEC, - NRS_CTL_ORR_WR_QUANTUM, - NRS_CTL_ORR_RD_OFF_TYPE, - NRS_CTL_ORR_WR_OFF_TYPE, - NRS_CTL_ORR_RD_SUPP_REQ, - NRS_CTL_ORR_WR_SUPP_REQ, -}; - -/** - * NRS policy operations. - * - * These determine the behaviour of a policy, and are called in response to - * NRS core events. - */ -struct ptlrpc_nrs_pol_ops { - /** - * Called during policy registration; this operation is optional. - * - * \param[in,out] policy The policy being initialized - */ - int (*op_policy_init)(struct ptlrpc_nrs_policy *policy); - /** - * Called during policy unregistration; this operation is optional. - * - * \param[in,out] policy The policy being unregistered/finalized - */ - void (*op_policy_fini)(struct ptlrpc_nrs_policy *policy); - /** - * Called when activating a policy via lprocfs; policies allocate and - * initialize their resources here; this operation is optional. - * - * \param[in,out] policy The policy being started - * - * \see nrs_policy_start_locked() - */ - int (*op_policy_start)(struct ptlrpc_nrs_policy *policy); - /** - * Called when deactivating a policy via lprocfs; policies deallocate - * their resources here; this operation is optional - * - * \param[in,out] policy The policy being stopped - * - * \see nrs_policy_stop0() - */ - void (*op_policy_stop)(struct ptlrpc_nrs_policy *policy); - /** - * Used for policy-specific operations; i.e. not generic ones like - * \e PTLRPC_NRS_CTL_START and \e PTLRPC_NRS_CTL_GET_INFO; analogous - * to an ioctl; this operation is optional. - * - * \param[in,out] policy The policy carrying out operation \a opc - * \param[in] opc The command operation being carried out - * \param[in,out] arg An generic buffer for communication between the - * user and the control operation - * - * \retval -ve error - * \retval 0 success - * - * \see ptlrpc_nrs_policy_control() - */ - int (*op_policy_ctl)(struct ptlrpc_nrs_policy *policy, - enum ptlrpc_nrs_ctl opc, void *arg); - - /** - * Called when obtaining references to the resources of the resource - * hierarchy for a request that has arrived for handling at the PTLRPC - * service. Policies should return -ve for requests they do not wish - * to handle. This operation is mandatory. - * - * \param[in,out] policy The policy we're getting resources for. - * \param[in,out] nrq The request we are getting resources for. - * \param[in] parent The parent resource of the resource being - * requested; set to NULL if none. - * \param[out] resp The resource is to be returned here; the - * fallback policy in an NRS head should - * \e always return a non-NULL pointer value. - * \param[in] moving_req When set, signifies that this is an attempt - * to obtain resources for a request being moved - * to the high-priority NRS head by - * ldlm_lock_reorder_req(). - * This implies two things: - * 1. We are under obd_export::exp_rpc_lock and - * so should not sleep. - * 2. We should not perform non-idempotent or can - * skip performing idempotent operations that - * were carried out when resources were first - * taken for the request when it was initialized - * in ptlrpc_nrs_req_initialize(). - * - * \retval 0, +ve The level of the returned resource in the resource - * hierarchy; currently only 0 (for a non-leaf resource) - * and 1 (for a leaf resource) are supported by the - * framework. - * \retval -ve error - * - * \see ptlrpc_nrs_req_initialize() - * \see ptlrpc_nrs_hpreq_add_nolock() - */ - int (*op_res_get)(struct ptlrpc_nrs_policy *policy, - struct ptlrpc_nrs_request *nrq, - const struct ptlrpc_nrs_resource *parent, - struct ptlrpc_nrs_resource **resp, - bool moving_req); - /** - * Called when releasing references taken for resources in the resource - * hierarchy for the request; this operation is optional. - * - * \param[in,out] policy The policy the resource belongs to - * \param[in] res The resource to be freed - * - * \see ptlrpc_nrs_req_finalize() - * \see ptlrpc_nrs_hpreq_add_nolock() - */ - void (*op_res_put)(struct ptlrpc_nrs_policy *policy, - const struct ptlrpc_nrs_resource *res); - - /** - * Obtains a request for handling from the policy, and optionally - * removes the request from the policy; this operation is mandatory. - * - * \param[in,out] policy The policy to poll - * \param[in] peek When set, signifies that we just want to - * examine the request, and not handle it, so the - * request is not removed from the policy. - * \param[in] force When set, it will force a policy to return a - * request if it has one queued. - * - * \retval NULL No request available for handling - * \retval valid-pointer The request polled for handling - * - * \see ptlrpc_nrs_req_get_nolock() - */ - struct ptlrpc_nrs_request * - (*op_req_get)(struct ptlrpc_nrs_policy *policy, bool peek, - bool force); - /** - * Called when attempting to add a request to a policy for later - * handling; this operation is mandatory. - * - * \param[in,out] policy The policy on which to enqueue \a nrq - * \param[in,out] nrq The request to enqueue - * - * \retval 0 success - * \retval != 0 error - * - * \see ptlrpc_nrs_req_add_nolock() - */ - int (*op_req_enqueue)(struct ptlrpc_nrs_policy *policy, - struct ptlrpc_nrs_request *nrq); - /** - * Removes a request from the policy's set of pending requests. Normally - * called after a request has been polled successfully from the policy - * for handling; this operation is mandatory. - * - * \param[in,out] policy The policy the request \a nrq belongs to - * \param[in,out] nrq The request to dequeue - */ - void (*op_req_dequeue)(struct ptlrpc_nrs_policy *policy, - struct ptlrpc_nrs_request *nrq); - /** - * Called after the request being carried out. Could be used for - * job/resource control; this operation is optional. - * - * \param[in,out] policy The policy which is stopping to handle request - * \a nrq - * \param[in,out] nrq The request - * - * \pre assert_spin_locked(&svcpt->scp_req_lock) - * - * \see ptlrpc_nrs_req_stop_nolock() - */ - void (*op_req_stop)(struct ptlrpc_nrs_policy *policy, - struct ptlrpc_nrs_request *nrq); - /** - * Registers the policy's lprocfs interface with a PTLRPC service. - * - * \param[in] svc The service - * - * \retval 0 success - * \retval != 0 error - */ - int (*op_lprocfs_init)(struct ptlrpc_service *svc); - /** - * Unegisters the policy's lprocfs interface with a PTLRPC service. - * - * In cases of failed policy registration in - * \e ptlrpc_nrs_policy_register(), this function may be called for a - * service which has not registered the policy successfully, so - * implementations of this method should make sure their operations are - * safe in such cases. - * - * \param[in] svc The service - */ - void (*op_lprocfs_fini)(struct ptlrpc_service *svc); -}; - -/** - * Policy flags - */ -enum nrs_policy_flags { - /** - * Fallback policy, use this flag only on a single supported policy per - * service. The flag cannot be used on policies that use - * \e PTLRPC_NRS_FL_REG_EXTERN - */ - PTLRPC_NRS_FL_FALLBACK = (1 << 0), - /** - * Start policy immediately after registering. - */ - PTLRPC_NRS_FL_REG_START = (1 << 1), - /** - * This is a policy registering from a module different to the one NRS - * core ships in (currently ptlrpc). - */ - PTLRPC_NRS_FL_REG_EXTERN = (1 << 2), -}; - -/** - * NRS queue type. - * - * Denotes whether an NRS instance is for handling normal or high-priority - * RPCs, or whether an operation pertains to one or both of the NRS instances - * in a service. - */ -enum ptlrpc_nrs_queue_type { - PTLRPC_NRS_QUEUE_REG = (1 << 0), - PTLRPC_NRS_QUEUE_HP = (1 << 1), - PTLRPC_NRS_QUEUE_BOTH = (PTLRPC_NRS_QUEUE_REG | PTLRPC_NRS_QUEUE_HP) -}; - -/** - * NRS head - * - * A PTLRPC service has at least one NRS head instance for handling normal - * priority RPCs, and may optionally have a second NRS head instance for - * handling high-priority RPCs. Each NRS head maintains a list of available - * policies, of which one and only one policy is acting as the fallback policy, - * and optionally a different policy may be acting as the primary policy. For - * all RPCs handled by this NRS head instance, NRS core will first attempt to - * enqueue the RPC using the primary policy (if any). The fallback policy is - * used in the following cases: - * - when there was no primary policy in the - * ptlrpc_nrs_pol_state::NRS_POL_STATE_STARTED state at the time the request - * was initialized. - * - when the primary policy that was at the - * ptlrpc_nrs_pol_state::PTLRPC_NRS_POL_STATE_STARTED state at the time the - * RPC was initialized, denoted it did not wish, or for some other reason was - * not able to handle the request, by returning a non-valid NRS resource - * reference. - * - when the primary policy that was at the - * ptlrpc_nrs_pol_state::PTLRPC_NRS_POL_STATE_STARTED state at the time the - * RPC was initialized, fails later during the request enqueueing stage. - * - * \see nrs_resource_get_safe() - * \see nrs_request_enqueue() - */ -struct ptlrpc_nrs { - spinlock_t nrs_lock; - /** XXX Possibly replace svcpt->scp_req_lock with another lock here. */ - /** - * List of registered policies - */ - struct list_head nrs_policy_list; - /** - * List of policies with queued requests. Policies that have any - * outstanding requests are queued here, and this list is queried - * in a round-robin manner from NRS core when obtaining a request - * for handling. This ensures that requests from policies that at some - * point transition away from the - * ptlrpc_nrs_pol_state::NRS_POL_STATE_STARTED state are drained. - */ - struct list_head nrs_policy_queued; - /** - * Service partition for this NRS head - */ - struct ptlrpc_service_part *nrs_svcpt; - /** - * Primary policy, which is the preferred policy for handling RPCs - */ - struct ptlrpc_nrs_policy *nrs_policy_primary; - /** - * Fallback policy, which is the backup policy for handling RPCs - */ - struct ptlrpc_nrs_policy *nrs_policy_fallback; - /** - * This NRS head handles either HP or regular requests - */ - enum ptlrpc_nrs_queue_type nrs_queue_type; - /** - * # queued requests from all policies in this NRS head - */ - unsigned long nrs_req_queued; - /** - * # scheduled requests from all policies in this NRS head - */ - unsigned long nrs_req_started; - /** - * # policies on this NRS - */ - unsigned nrs_num_pols; - /** - * This NRS head is in progress of starting a policy - */ - unsigned nrs_policy_starting:1; - /** - * In progress of shutting down the whole NRS head; used during - * unregistration - */ - unsigned nrs_stopping:1; -}; - -#define NRS_POL_NAME_MAX 16 - -struct ptlrpc_nrs_pol_desc; - -/** - * Service compatibility predicate; this determines whether a policy is adequate - * for handling RPCs of a particular PTLRPC service. - * - * XXX:This should give the same result during policy registration and - * unregistration, and for all partitions of a service; so the result should not - * depend on temporal service or other properties, that may influence the - * result. - */ -typedef bool (*nrs_pol_desc_compat_t) (const struct ptlrpc_service *svc, - const struct ptlrpc_nrs_pol_desc *desc); - -struct ptlrpc_nrs_pol_conf { - /** - * Human-readable policy name - */ - char nc_name[NRS_POL_NAME_MAX]; - /** - * NRS operations for this policy - */ - const struct ptlrpc_nrs_pol_ops *nc_ops; - /** - * Service compatibility predicate - */ - nrs_pol_desc_compat_t nc_compat; - /** - * Set for policies that support a single ptlrpc service, i.e. ones that - * have \a pd_compat set to nrs_policy_compat_one(). The variable value - * depicts the name of the single service that such policies are - * compatible with. - */ - const char *nc_compat_svc_name; - /** - * Owner module for this policy descriptor; policies registering from a - * different module to the one the NRS framework is held within - * (currently ptlrpc), should set this field to THIS_MODULE. - */ - struct module *nc_owner; - /** - * Policy registration flags; a bitmask of \e nrs_policy_flags - */ - unsigned nc_flags; -}; - -/** - * NRS policy registering descriptor - * - * Is used to hold a description of a policy that can be passed to NRS core in - * order to register the policy with NRS heads in different PTLRPC services. - */ -struct ptlrpc_nrs_pol_desc { - /** - * Human-readable policy name - */ - char pd_name[NRS_POL_NAME_MAX]; - /** - * Link into nrs_core::nrs_policies - */ - struct list_head pd_list; - /** - * NRS operations for this policy - */ - const struct ptlrpc_nrs_pol_ops *pd_ops; - /** - * Service compatibility predicate - */ - nrs_pol_desc_compat_t pd_compat; - /** - * Set for policies that are compatible with only one PTLRPC service. - * - * \see ptlrpc_nrs_pol_conf::nc_compat_svc_name - */ - const char *pd_compat_svc_name; - /** - * Owner module for this policy descriptor. - * - * We need to hold a reference to the module whenever we might make use - * of any of the module's contents, i.e. - * - If one or more instances of the policy are at a state where they - * might be handling a request, i.e. - * ptlrpc_nrs_pol_state::NRS_POL_STATE_STARTED or - * ptlrpc_nrs_pol_state::NRS_POL_STATE_STOPPING as we will have to - * call into the policy's ptlrpc_nrs_pol_ops() handlers. A reference - * is taken on the module when - * \e ptlrpc_nrs_pol_desc::pd_refs becomes 1, and released when it - * becomes 0, so that we hold only one reference to the module maximum - * at any time. - * - * We do not need to hold a reference to the module, even though we - * might use code and data from the module, in the following cases: - * - During external policy registration, because this should happen in - * the module's init() function, in which case the module is safe from - * removal because a reference is being held on the module by the - * kernel, and iirc kmod (and I guess module-init-tools also) will - * serialize any racing processes properly anyway. - * - During external policy unregistration, because this should happen - * in a module's exit() function, and any attempts to start a policy - * instance would need to take a reference on the module, and this is - * not possible once we have reached the point where the exit() - * handler is called. - * - During service registration and unregistration, as service setup - * and cleanup, and policy registration, unregistration and policy - * instance starting, are serialized by \e nrs_core::nrs_mutex, so - * as long as users adhere to the convention of registering policies - * in init() and unregistering them in module exit() functions, there - * should not be a race between these operations. - * - During any policy-specific lprocfs operations, because a reference - * is held by the kernel on a proc entry that has been entered by a - * syscall, so as long as proc entries are removed during unregistration time, - * then unregistration and lprocfs operations will be properly - * serialized. - */ - struct module *pd_owner; - /** - * Bitmask of \e nrs_policy_flags - */ - unsigned pd_flags; - /** - * # of references on this descriptor - */ - atomic_t pd_refs; -}; - -/** - * NRS policy state - * - * Policies transition from one state to the other during their lifetime - */ -enum ptlrpc_nrs_pol_state { - /** - * Not a valid policy state. - */ - NRS_POL_STATE_INVALID, - /** - * Policies are at this state either at the start of their life, or - * transition here when the user selects a different policy to act - * as the primary one. - */ - NRS_POL_STATE_STOPPED, - /** - * Policy is progress of stopping - */ - NRS_POL_STATE_STOPPING, - /** - * Policy is in progress of starting - */ - NRS_POL_STATE_STARTING, - /** - * A policy is in this state in two cases: - * - it is the fallback policy, which is always in this state. - * - it has been activated by the user; i.e. it is the primary policy, - */ - NRS_POL_STATE_STARTED, -}; - -/** - * NRS policy information - * - * Used for obtaining information for the status of a policy via lprocfs - */ -struct ptlrpc_nrs_pol_info { - /** - * Policy name - */ - char pi_name[NRS_POL_NAME_MAX]; - /** - * Current policy state - */ - enum ptlrpc_nrs_pol_state pi_state; - /** - * # RPCs enqueued for later dispatching by the policy - */ - long pi_req_queued; - /** - * # RPCs started for dispatch by the policy - */ - long pi_req_started; - /** - * Is this a fallback policy? - */ - unsigned pi_fallback:1; -}; - -/** - * NRS policy - * - * There is one instance of this for each policy in each NRS head of each - * PTLRPC service partition. - */ -struct ptlrpc_nrs_policy { - /** - * Linkage into the NRS head's list of policies, - * ptlrpc_nrs:nrs_policy_list - */ - struct list_head pol_list; - /** - * Linkage into the NRS head's list of policies with enqueued - * requests ptlrpc_nrs:nrs_policy_queued - */ - struct list_head pol_list_queued; - /** - * Current state of this policy - */ - enum ptlrpc_nrs_pol_state pol_state; - /** - * Bitmask of nrs_policy_flags - */ - unsigned pol_flags; - /** - * # RPCs enqueued for later dispatching by the policy - */ - long pol_req_queued; - /** - * # RPCs started for dispatch by the policy - */ - long pol_req_started; - /** - * Usage Reference count taken on the policy instance - */ - long pol_ref; - /** - * The NRS head this policy has been created at - */ - struct ptlrpc_nrs *pol_nrs; - /** - * Private policy data; varies by policy type - */ - void *pol_private; - /** - * Policy descriptor for this policy instance. - */ - struct ptlrpc_nrs_pol_desc *pol_desc; -}; - -/** - * NRS resource - * - * Resources are embedded into two types of NRS entities: - * - Inside NRS policies, in the policy's private data in - * ptlrpc_nrs_policy::pol_private - * - In objects that act as prime-level scheduling entities in different NRS - * policies; e.g. on a policy that performs round robin or similar order - * scheduling across client NIDs, there would be one NRS resource per unique - * client NID. On a policy which performs round robin scheduling across - * backend filesystem objects, there would be one resource associated with - * each of the backend filesystem objects partaking in the scheduling - * performed by the policy. - * - * NRS resources share a parent-child relationship, in which resources embedded - * in policy instances are the parent entities, with all scheduling entities - * a policy schedules across being the children, thus forming a simple resource - * hierarchy. This hierarchy may be extended with one or more levels in the - * future if the ability to have more than one primary policy is added. - * - * Upon request initialization, references to the then active NRS policies are - * taken and used to later handle the dispatching of the request with one of - * these policies. - * - * \see nrs_resource_get_safe() - * \see ptlrpc_nrs_req_add() - */ -struct ptlrpc_nrs_resource { - /** - * This NRS resource's parent; is NULL for resources embedded in NRS - * policy instances; i.e. those are top-level ones. - */ - struct ptlrpc_nrs_resource *res_parent; - /** - * The policy associated with this resource. - */ - struct ptlrpc_nrs_policy *res_policy; -}; - -enum { - NRS_RES_FALLBACK, - NRS_RES_PRIMARY, - NRS_RES_MAX -}; - -/* \name fifo - * - * FIFO policy - * - * This policy is a logical wrapper around previous, non-NRS functionality. - * It dispatches RPCs in the same order as they arrive from the network. This - * policy is currently used as the fallback policy, and the only enabled policy - * on all NRS heads of all PTLRPC service partitions. - * @{ - */ - -/** - * Private data structure for the FIFO policy - */ -struct nrs_fifo_head { - /** - * Resource object for policy instance. - */ - struct ptlrpc_nrs_resource fh_res; - /** - * List of queued requests. - */ - struct list_head fh_list; - /** - * For debugging purposes. - */ - __u64 fh_sequence; -}; - -struct nrs_fifo_req { - struct list_head fr_list; - __u64 fr_sequence; -}; - -/** @} fifo */ - -/** - * NRS request - * - * Instances of this object exist embedded within ptlrpc_request; the main - * purpose of this object is to hold references to the request's resources - * for the lifetime of the request, and to hold properties that policies use - * use for determining the request's scheduling priority. - */ -struct ptlrpc_nrs_request { - /** - * The request's resource hierarchy. - */ - struct ptlrpc_nrs_resource *nr_res_ptrs[NRS_RES_MAX]; - /** - * Index into ptlrpc_nrs_request::nr_res_ptrs of the resource of the - * policy that was used to enqueue the request. - * - * \see nrs_request_enqueue() - */ - unsigned nr_res_idx; - unsigned nr_initialized:1; - unsigned nr_enqueued:1; - unsigned nr_started:1; - unsigned nr_finalized:1; - - /** - * Policy-specific fields, used for determining a request's scheduling - * priority, and other supporting functionality. - */ - union { - /** - * Fields for the FIFO policy - */ - struct nrs_fifo_req fifo; - } nr_u; - /** - * Externally-registering policies may want to use this to allocate - * their own request properties. - */ - void *ext; -}; - -/** @} nrs */ +#include "lustre_nrs.h" /** * Basic request prioritization operations structure. @@ -1304,6 +600,8 @@ struct ptlrpc_cli_req { union ptlrpc_async_args cr_async_args; /** Opaq data for replay and commit callbacks. */ void *cr_cb_data; + /** Link to the imp->imp_unreplied_list */ + struct list_head cr_unreplied_list; /** * Commit callback, called when request is committed and about to be * freed. @@ -1343,6 +641,7 @@ struct ptlrpc_cli_req { #define rq_interpret_reply rq_cli.cr_reply_interp #define rq_async_args rq_cli.cr_async_args #define rq_cb_data rq_cli.cr_cb_data +#define rq_unreplied_list rq_cli.cr_unreplied_list #define rq_commit_cb rq_cli.cr_commit_cb #define rq_replay_cb rq_cli.cr_replay_cb @@ -1505,6 +804,8 @@ struct ptlrpc_request { __u64 rq_transno; /** xid */ __u64 rq_xid; + /** bulk match bits */ + u64 rq_mbits; /** * List item to for replay list. Not yet committed requests get linked * there. @@ -1793,10 +1094,93 @@ struct ptlrpc_bulk_page { struct page *bp_page; }; -#define BULK_GET_SOURCE 0 -#define BULK_PUT_SINK 1 -#define BULK_GET_SINK 2 -#define BULK_PUT_SOURCE 3 +enum ptlrpc_bulk_op_type { + PTLRPC_BULK_OP_ACTIVE = 0x00000001, + PTLRPC_BULK_OP_PASSIVE = 0x00000002, + PTLRPC_BULK_OP_PUT = 0x00000004, + PTLRPC_BULK_OP_GET = 0x00000008, + PTLRPC_BULK_BUF_KVEC = 0x00000010, + PTLRPC_BULK_BUF_KIOV = 0x00000020, + PTLRPC_BULK_GET_SOURCE = PTLRPC_BULK_OP_PASSIVE | PTLRPC_BULK_OP_GET, + PTLRPC_BULK_PUT_SINK = PTLRPC_BULK_OP_PASSIVE | PTLRPC_BULK_OP_PUT, + PTLRPC_BULK_GET_SINK = PTLRPC_BULK_OP_ACTIVE | PTLRPC_BULK_OP_GET, + PTLRPC_BULK_PUT_SOURCE = PTLRPC_BULK_OP_ACTIVE | PTLRPC_BULK_OP_PUT, +}; + +static inline bool ptlrpc_is_bulk_op_get(enum ptlrpc_bulk_op_type type) +{ + return (type & PTLRPC_BULK_OP_GET) == PTLRPC_BULK_OP_GET; +} + +static inline bool ptlrpc_is_bulk_get_source(enum ptlrpc_bulk_op_type type) +{ + return (type & PTLRPC_BULK_GET_SOURCE) == PTLRPC_BULK_GET_SOURCE; +} + +static inline bool ptlrpc_is_bulk_put_sink(enum ptlrpc_bulk_op_type type) +{ + return (type & PTLRPC_BULK_PUT_SINK) == PTLRPC_BULK_PUT_SINK; +} + +static inline bool ptlrpc_is_bulk_get_sink(enum ptlrpc_bulk_op_type type) +{ + return (type & PTLRPC_BULK_GET_SINK) == PTLRPC_BULK_GET_SINK; +} + +static inline bool ptlrpc_is_bulk_put_source(enum ptlrpc_bulk_op_type type) +{ + return (type & PTLRPC_BULK_PUT_SOURCE) == PTLRPC_BULK_PUT_SOURCE; +} + +static inline bool ptlrpc_is_bulk_desc_kvec(enum ptlrpc_bulk_op_type type) +{ + return ((type & PTLRPC_BULK_BUF_KVEC) | (type & PTLRPC_BULK_BUF_KIOV)) + == PTLRPC_BULK_BUF_KVEC; +} + +static inline bool ptlrpc_is_bulk_desc_kiov(enum ptlrpc_bulk_op_type type) +{ + return ((type & PTLRPC_BULK_BUF_KVEC) | (type & PTLRPC_BULK_BUF_KIOV)) + == PTLRPC_BULK_BUF_KIOV; +} + +static inline bool ptlrpc_is_bulk_op_active(enum ptlrpc_bulk_op_type type) +{ + return ((type & PTLRPC_BULK_OP_ACTIVE) | + (type & PTLRPC_BULK_OP_PASSIVE)) == PTLRPC_BULK_OP_ACTIVE; +} + +static inline bool ptlrpc_is_bulk_op_passive(enum ptlrpc_bulk_op_type type) +{ + return ((type & PTLRPC_BULK_OP_ACTIVE) | + (type & PTLRPC_BULK_OP_PASSIVE)) == PTLRPC_BULK_OP_PASSIVE; +} + +struct ptlrpc_bulk_frag_ops { + /** + * Add a page \a page to the bulk descriptor \a desc + * Data to transfer in the page starts at offset \a pageoffset and + * amount of data to transfer from the page is \a len + */ + void (*add_kiov_frag)(struct ptlrpc_bulk_desc *desc, + struct page *page, int pageoffset, int len); + + /* + * Add a \a fragment to the bulk descriptor \a desc. + * Data to transfer in the fragment is pointed to by \a frag + * The size of the fragment is \a len + */ + int (*add_iov_frag)(struct ptlrpc_bulk_desc *desc, void *frag, int len); + + /** + * Uninitialize and free bulk descriptor \a desc. + * Works on bulk descriptors both from server and client side. + */ + void (*release_frags)(struct ptlrpc_bulk_desc *desc); +}; + +extern const struct ptlrpc_bulk_frag_ops ptlrpc_bulk_kiov_pin_ops; +extern const struct ptlrpc_bulk_frag_ops ptlrpc_bulk_kiov_nopin_ops; /** * Definition of bulk descriptor. @@ -1811,14 +1195,14 @@ struct ptlrpc_bulk_page { struct ptlrpc_bulk_desc { /** completed with failure */ unsigned long bd_failure:1; - /** {put,get}{source,sink} */ - unsigned long bd_type:2; /** client side */ unsigned long bd_registered:1; /** For serialization with callback */ spinlock_t bd_lock; /** Import generation when request for this bulk was sent */ int bd_import_generation; + /** {put,get}{source,sink}{kvec,kiov} */ + enum ptlrpc_bulk_op_type bd_type; /** LNet portal for this bulk */ __u32 bd_portal; /** Server side - export this bulk created for */ @@ -1827,13 +1211,14 @@ struct ptlrpc_bulk_desc { struct obd_import *bd_import; /** Back pointer to the request */ struct ptlrpc_request *bd_req; + struct ptlrpc_bulk_frag_ops *bd_frag_ops; wait_queue_head_t bd_waitq; /* server side only WQ */ int bd_iov_count; /* # entries in bd_iov */ int bd_max_iov; /* allocated size of bd_iov */ int bd_nob; /* # bytes covered */ int bd_nob_transferred; /* # bytes GOT/PUT */ - __u64 bd_last_xid; + u64 bd_last_mbits; struct ptlrpc_cb_id bd_cbid; /* network callback info */ lnet_nid_t bd_sender; /* stash event::sender */ @@ -1842,14 +1227,31 @@ struct ptlrpc_bulk_desc { /** array of associated MDs */ lnet_handle_md_t bd_mds[PTLRPC_BULK_OPS_COUNT]; - /* - * encrypt iov, size is either 0 or bd_iov_count. - */ - lnet_kiov_t *bd_enc_iov; - - lnet_kiov_t bd_iov[0]; + union { + struct { + /* + * encrypt iov, size is either 0 or bd_iov_count. + */ + struct bio_vec *bd_enc_vec; + struct bio_vec *bd_vec; /* Array of bio_vecs */ + } bd_kiov; + + struct { + struct kvec *bd_enc_kvec; + struct kvec *bd_kvec; /* Array of kvecs */ + } bd_kvec; + } bd_u; }; +#define GET_KIOV(desc) ((desc)->bd_u.bd_kiov.bd_vec) +#define BD_GET_KIOV(desc, i) ((desc)->bd_u.bd_kiov.bd_vec[i]) +#define GET_ENC_KIOV(desc) ((desc)->bd_u.bd_kiov.bd_enc_vec) +#define BD_GET_ENC_KIOV(desc, i) ((desc)->bd_u.bd_kiov.bd_enc_vec[i]) +#define GET_KVEC(desc) ((desc)->bd_u.bd_kvec.bd_kvec) +#define BD_GET_KVEC(desc, i) ((desc)->bd_u.bd_kvec.bd_kvec[i]) +#define GET_ENC_KVEC(desc) ((desc)->bd_u.bd_kvec.bd_enc_kvec) +#define BD_GET_ENC_KVEC(desc, i) ((desc)->bd_u.bd_kvec.bd_enc_kvec[i]) + enum { SVC_STOPPED = 1 << 0, SVC_STOPPING = 1 << 1, @@ -2464,21 +1866,17 @@ int ptlrpc_request_bufs_pack(struct ptlrpc_request *request, void ptlrpc_req_finished(struct ptlrpc_request *request); struct ptlrpc_request *ptlrpc_request_addref(struct ptlrpc_request *req); struct ptlrpc_bulk_desc *ptlrpc_prep_bulk_imp(struct ptlrpc_request *req, - unsigned npages, unsigned max_brw, - unsigned type, unsigned portal); -void __ptlrpc_free_bulk(struct ptlrpc_bulk_desc *bulk, int pin); -static inline void ptlrpc_free_bulk_pin(struct ptlrpc_bulk_desc *bulk) -{ - __ptlrpc_free_bulk(bulk, 1); -} - -static inline void ptlrpc_free_bulk_nopin(struct ptlrpc_bulk_desc *bulk) -{ - __ptlrpc_free_bulk(bulk, 0); -} - + unsigned int nfrags, + unsigned int max_brw, + unsigned int type, + unsigned int portal, + const struct ptlrpc_bulk_frag_ops *ops); + +int ptlrpc_prep_bulk_frag(struct ptlrpc_bulk_desc *desc, + void *frag, int len); void __ptlrpc_prep_bulk_page(struct ptlrpc_bulk_desc *desc, - struct page *page, int pageoffset, int len, int); + struct page *page, int pageoffset, int len, + int pin); static inline void ptlrpc_prep_bulk_page_pin(struct ptlrpc_bulk_desc *desc, struct page *page, int pageoffset, int len) @@ -2493,6 +1891,16 @@ static inline void ptlrpc_prep_bulk_page_nopin(struct ptlrpc_bulk_desc *desc, __ptlrpc_prep_bulk_page(desc, page, pageoffset, len, 0); } +void ptlrpc_free_bulk(struct ptlrpc_bulk_desc *bulk); + +static inline void ptlrpc_release_bulk_page_pin(struct ptlrpc_bulk_desc *desc) +{ + int i; + + for (i = 0; i < desc->bd_iov_count ; i++) + put_page(BD_GET_KIOV(desc, i).bv_page); +} + void ptlrpc_retain_replayable_request(struct ptlrpc_request *req, struct obd_import *imp); __u64 ptlrpc_next_xid(void); @@ -2652,6 +2060,7 @@ struct lustre_handle *lustre_msg_get_handle(struct lustre_msg *msg); __u32 lustre_msg_get_type(struct lustre_msg *msg); void lustre_msg_add_version(struct lustre_msg *msg, u32 version); __u32 lustre_msg_get_opc(struct lustre_msg *msg); +__u16 lustre_msg_get_tag(struct lustre_msg *msg); __u64 lustre_msg_get_last_committed(struct lustre_msg *msg); __u64 *lustre_msg_get_versions(struct lustre_msg *msg); __u64 lustre_msg_get_transno(struct lustre_msg *msg); @@ -2670,6 +2079,8 @@ void lustre_msg_set_handle(struct lustre_msg *msg, struct lustre_handle *handle); void lustre_msg_set_type(struct lustre_msg *msg, __u32 type); void lustre_msg_set_opc(struct lustre_msg *msg, __u32 opc); +void lustre_msg_set_last_xid(struct lustre_msg *msg, u64 last_xid); +void lustre_msg_set_tag(struct lustre_msg *msg, __u16 tag); void lustre_msg_set_versions(struct lustre_msg *msg, __u64 *versions); void lustre_msg_set_transno(struct lustre_msg *msg, __u64 transno); void lustre_msg_set_status(struct lustre_msg *msg, __u32 status); @@ -2679,6 +2090,7 @@ void lustre_msg_set_timeout(struct lustre_msg *msg, __u32 timeout); void lustre_msg_set_service_time(struct lustre_msg *msg, __u32 service_time); void lustre_msg_set_jobid(struct lustre_msg *msg, char *jobid); void lustre_msg_set_cksum(struct lustre_msg *msg, __u32 cksum); +void lustre_msg_set_mbits(struct lustre_msg *msg, u64 mbits); static inline void lustre_shrink_reply(struct ptlrpc_request *req, int segment, diff --git a/drivers/staging/lustre/lustre/include/lustre_nrs.h b/drivers/staging/lustre/lustre/include/lustre_nrs.h new file mode 100644 index 000000000000..a5028aaa19cd --- /dev/null +++ b/drivers/staging/lustre/lustre/include/lustre_nrs.h @@ -0,0 +1,717 @@ +/* + * GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License version 2 for more details. + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see + * http://www.gnu.org/licenses/gpl-2.0.html + * + * GPL HEADER END + */ +/* + * Copyright (c) 2014, Intel Corporation. + * + * Copyright 2012 Xyratex Technology Limited + */ +/* + * + * Network Request Scheduler (NRS) + * + */ + +#ifndef _LUSTRE_NRS_H +#define _LUSTRE_NRS_H + +/** + * \defgroup nrs Network Request Scheduler + * @{ + */ +struct ptlrpc_nrs_policy; +struct ptlrpc_nrs_resource; +struct ptlrpc_nrs_request; + +/** + * NRS control operations. + * + * These are common for all policies. + */ +enum ptlrpc_nrs_ctl { + /** + * Not a valid opcode. + */ + PTLRPC_NRS_CTL_INVALID, + /** + * Activate the policy. + */ + PTLRPC_NRS_CTL_START, + /** + * Reserved for multiple primary policies, which may be a possibility + * in the future. + */ + PTLRPC_NRS_CTL_STOP, + /** + * Policies can start using opcodes from this value and onwards for + * their own purposes; the assigned value itself is arbitrary. + */ + PTLRPC_NRS_CTL_1ST_POL_SPEC = 0x20, +}; + +/** + * NRS policy operations. + * + * These determine the behaviour of a policy, and are called in response to + * NRS core events. + */ +struct ptlrpc_nrs_pol_ops { + /** + * Called during policy registration; this operation is optional. + * + * \param[in,out] policy The policy being initialized + */ + int (*op_policy_init)(struct ptlrpc_nrs_policy *policy); + /** + * Called during policy unregistration; this operation is optional. + * + * \param[in,out] policy The policy being unregistered/finalized + */ + void (*op_policy_fini)(struct ptlrpc_nrs_policy *policy); + /** + * Called when activating a policy via lprocfs; policies allocate and + * initialize their resources here; this operation is optional. + * + * \param[in,out] policy The policy being started + * + * \see nrs_policy_start_locked() + */ + int (*op_policy_start)(struct ptlrpc_nrs_policy *policy); + /** + * Called when deactivating a policy via lprocfs; policies deallocate + * their resources here; this operation is optional + * + * \param[in,out] policy The policy being stopped + * + * \see nrs_policy_stop0() + */ + void (*op_policy_stop)(struct ptlrpc_nrs_policy *policy); + /** + * Used for policy-specific operations; i.e. not generic ones like + * \e PTLRPC_NRS_CTL_START and \e PTLRPC_NRS_CTL_GET_INFO; analogous + * to an ioctl; this operation is optional. + * + * \param[in,out] policy The policy carrying out operation \a opc + * \param[in] opc The command operation being carried out + * \param[in,out] arg An generic buffer for communication between the + * user and the control operation + * + * \retval -ve error + * \retval 0 success + * + * \see ptlrpc_nrs_policy_control() + */ + int (*op_policy_ctl)(struct ptlrpc_nrs_policy *policy, + enum ptlrpc_nrs_ctl opc, void *arg); + + /** + * Called when obtaining references to the resources of the resource + * hierarchy for a request that has arrived for handling at the PTLRPC + * service. Policies should return -ve for requests they do not wish + * to handle. This operation is mandatory. + * + * \param[in,out] policy The policy we're getting resources for. + * \param[in,out] nrq The request we are getting resources for. + * \param[in] parent The parent resource of the resource being + * requested; set to NULL if none. + * \param[out] resp The resource is to be returned here; the + * fallback policy in an NRS head should + * \e always return a non-NULL pointer value. + * \param[in] moving_req When set, signifies that this is an attempt + * to obtain resources for a request being moved + * to the high-priority NRS head by + * ldlm_lock_reorder_req(). + * This implies two things: + * 1. We are under obd_export::exp_rpc_lock and + * so should not sleep. + * 2. We should not perform non-idempotent or can + * skip performing idempotent operations that + * were carried out when resources were first + * taken for the request when it was initialized + * in ptlrpc_nrs_req_initialize(). + * + * \retval 0, +ve The level of the returned resource in the resource + * hierarchy; currently only 0 (for a non-leaf resource) + * and 1 (for a leaf resource) are supported by the + * framework. + * \retval -ve error + * + * \see ptlrpc_nrs_req_initialize() + * \see ptlrpc_nrs_hpreq_add_nolock() + * \see ptlrpc_nrs_req_hp_move() + */ + int (*op_res_get)(struct ptlrpc_nrs_policy *policy, + struct ptlrpc_nrs_request *nrq, + const struct ptlrpc_nrs_resource *parent, + struct ptlrpc_nrs_resource **resp, + bool moving_req); + /** + * Called when releasing references taken for resources in the resource + * hierarchy for the request; this operation is optional. + * + * \param[in,out] policy The policy the resource belongs to + * \param[in] res The resource to be freed + * + * \see ptlrpc_nrs_req_finalize() + * \see ptlrpc_nrs_hpreq_add_nolock() + * \see ptlrpc_nrs_req_hp_move() + */ + void (*op_res_put)(struct ptlrpc_nrs_policy *policy, + const struct ptlrpc_nrs_resource *res); + + /** + * Obtains a request for handling from the policy, and optionally + * removes the request from the policy; this operation is mandatory. + * + * \param[in,out] policy The policy to poll + * \param[in] peek When set, signifies that we just want to + * examine the request, and not handle it, so the + * request is not removed from the policy. + * \param[in] force When set, it will force a policy to return a + * request if it has one queued. + * + * \retval NULL No request available for handling + * \retval valid-pointer The request polled for handling + * + * \see ptlrpc_nrs_req_get_nolock() + */ + struct ptlrpc_nrs_request * + (*op_req_get)(struct ptlrpc_nrs_policy *policy, bool peek, + bool force); + /** + * Called when attempting to add a request to a policy for later + * handling; this operation is mandatory. + * + * \param[in,out] policy The policy on which to enqueue \a nrq + * \param[in,out] nrq The request to enqueue + * + * \retval 0 success + * \retval != 0 error + * + * \see ptlrpc_nrs_req_add_nolock() + */ + int (*op_req_enqueue)(struct ptlrpc_nrs_policy *policy, + struct ptlrpc_nrs_request *nrq); + /** + * Removes a request from the policy's set of pending requests. Normally + * called after a request has been polled successfully from the policy + * for handling; this operation is mandatory. + * + * \param[in,out] policy The policy the request \a nrq belongs to + * \param[in,out] nrq The request to dequeue + * + * \see ptlrpc_nrs_req_del_nolock() + */ + void (*op_req_dequeue)(struct ptlrpc_nrs_policy *policy, + struct ptlrpc_nrs_request *nrq); + /** + * Called after the request being carried out. Could be used for + * job/resource control; this operation is optional. + * + * \param[in,out] policy The policy which is stopping to handle request + * \a nrq + * \param[in,out] nrq The request + * + * \pre assert_spin_locked(&svcpt->scp_req_lock) + * + * \see ptlrpc_nrs_req_stop_nolock() + */ + void (*op_req_stop)(struct ptlrpc_nrs_policy *policy, + struct ptlrpc_nrs_request *nrq); + /** + * Registers the policy's lprocfs interface with a PTLRPC service. + * + * \param[in] svc The service + * + * \retval 0 success + * \retval != 0 error + */ + int (*op_lprocfs_init)(struct ptlrpc_service *svc); + /** + * Unegisters the policy's lprocfs interface with a PTLRPC service. + * + * In cases of failed policy registration in + * \e ptlrpc_nrs_policy_register(), this function may be called for a + * service which has not registered the policy successfully, so + * implementations of this method should make sure their operations are + * safe in such cases. + * + * \param[in] svc The service + */ + void (*op_lprocfs_fini)(struct ptlrpc_service *svc); +}; + +/** + * Policy flags + */ +enum nrs_policy_flags { + /** + * Fallback policy, use this flag only on a single supported policy per + * service. The flag cannot be used on policies that use + * \e PTLRPC_NRS_FL_REG_EXTERN + */ + PTLRPC_NRS_FL_FALLBACK = BIT(0), + /** + * Start policy immediately after registering. + */ + PTLRPC_NRS_FL_REG_START = BIT(1), + /** + * This is a policy registering from a module different to the one NRS + * core ships in (currently ptlrpc). + */ + PTLRPC_NRS_FL_REG_EXTERN = BIT(2), +}; + +/** + * NRS queue type. + * + * Denotes whether an NRS instance is for handling normal or high-priority + * RPCs, or whether an operation pertains to one or both of the NRS instances + * in a service. + */ +enum ptlrpc_nrs_queue_type { + PTLRPC_NRS_QUEUE_REG = BIT(0), + PTLRPC_NRS_QUEUE_HP = BIT(1), + PTLRPC_NRS_QUEUE_BOTH = (PTLRPC_NRS_QUEUE_REG | PTLRPC_NRS_QUEUE_HP) +}; + +/** + * NRS head + * + * A PTLRPC service has at least one NRS head instance for handling normal + * priority RPCs, and may optionally have a second NRS head instance for + * handling high-priority RPCs. Each NRS head maintains a list of available + * policies, of which one and only one policy is acting as the fallback policy, + * and optionally a different policy may be acting as the primary policy. For + * all RPCs handled by this NRS head instance, NRS core will first attempt to + * enqueue the RPC using the primary policy (if any). The fallback policy is + * used in the following cases: + * - when there was no primary policy in the + * ptlrpc_nrs_pol_state::NRS_POL_STATE_STARTED state at the time the request + * was initialized. + * - when the primary policy that was at the + * ptlrpc_nrs_pol_state::PTLRPC_NRS_POL_STATE_STARTED state at the time the + * RPC was initialized, denoted it did not wish, or for some other reason was + * not able to handle the request, by returning a non-valid NRS resource + * reference. + * - when the primary policy that was at the + * ptlrpc_nrs_pol_state::PTLRPC_NRS_POL_STATE_STARTED state at the time the + * RPC was initialized, fails later during the request enqueueing stage. + * + * \see nrs_resource_get_safe() + * \see nrs_request_enqueue() + */ +struct ptlrpc_nrs { + spinlock_t nrs_lock; + /** XXX Possibly replace svcpt->scp_req_lock with another lock here. */ + /** + * List of registered policies + */ + struct list_head nrs_policy_list; + /** + * List of policies with queued requests. Policies that have any + * outstanding requests are queued here, and this list is queried + * in a round-robin manner from NRS core when obtaining a request + * for handling. This ensures that requests from policies that at some + * point transition away from the + * ptlrpc_nrs_pol_state::NRS_POL_STATE_STARTED state are drained. + */ + struct list_head nrs_policy_queued; + /** + * Service partition for this NRS head + */ + struct ptlrpc_service_part *nrs_svcpt; + /** + * Primary policy, which is the preferred policy for handling RPCs + */ + struct ptlrpc_nrs_policy *nrs_policy_primary; + /** + * Fallback policy, which is the backup policy for handling RPCs + */ + struct ptlrpc_nrs_policy *nrs_policy_fallback; + /** + * This NRS head handles either HP or regular requests + */ + enum ptlrpc_nrs_queue_type nrs_queue_type; + /** + * # queued requests from all policies in this NRS head + */ + unsigned long nrs_req_queued; + /** + * # scheduled requests from all policies in this NRS head + */ + unsigned long nrs_req_started; + /** + * # policies on this NRS + */ + unsigned int nrs_num_pols; + /** + * This NRS head is in progress of starting a policy + */ + unsigned int nrs_policy_starting:1; + /** + * In progress of shutting down the whole NRS head; used during + * unregistration + */ + unsigned int nrs_stopping:1; + /** + * NRS policy is throttling request + */ + unsigned int nrs_throttling:1; +}; + +#define NRS_POL_NAME_MAX 16 +#define NRS_POL_ARG_MAX 16 + +struct ptlrpc_nrs_pol_desc; + +/** + * Service compatibility predicate; this determines whether a policy is adequate + * for handling RPCs of a particular PTLRPC service. + * + * XXX:This should give the same result during policy registration and + * unregistration, and for all partitions of a service; so the result should not + * depend on temporal service or other properties, that may influence the + * result. + */ +typedef bool (*nrs_pol_desc_compat_t)(const struct ptlrpc_service *svc, + const struct ptlrpc_nrs_pol_desc *desc); + +struct ptlrpc_nrs_pol_conf { + /** + * Human-readable policy name + */ + char nc_name[NRS_POL_NAME_MAX]; + /** + * NRS operations for this policy + */ + const struct ptlrpc_nrs_pol_ops *nc_ops; + /** + * Service compatibility predicate + */ + nrs_pol_desc_compat_t nc_compat; + /** + * Set for policies that support a single ptlrpc service, i.e. ones that + * have \a pd_compat set to nrs_policy_compat_one(). The variable value + * depicts the name of the single service that such policies are + * compatible with. + */ + const char *nc_compat_svc_name; + /** + * Owner module for this policy descriptor; policies registering from a + * different module to the one the NRS framework is held within + * (currently ptlrpc), should set this field to THIS_MODULE. + */ + struct module *nc_owner; + /** + * Policy registration flags; a bitmask of \e nrs_policy_flags + */ + unsigned int nc_flags; +}; + +/** + * NRS policy registering descriptor + * + * Is used to hold a description of a policy that can be passed to NRS core in + * order to register the policy with NRS heads in different PTLRPC services. + */ +struct ptlrpc_nrs_pol_desc { + /** + * Human-readable policy name + */ + char pd_name[NRS_POL_NAME_MAX]; + /** + * Link into nrs_core::nrs_policies + */ + struct list_head pd_list; + /** + * NRS operations for this policy + */ + const struct ptlrpc_nrs_pol_ops *pd_ops; + /** + * Service compatibility predicate + */ + nrs_pol_desc_compat_t pd_compat; + /** + * Set for policies that are compatible with only one PTLRPC service. + * + * \see ptlrpc_nrs_pol_conf::nc_compat_svc_name + */ + const char *pd_compat_svc_name; + /** + * Owner module for this policy descriptor. + * + * We need to hold a reference to the module whenever we might make use + * of any of the module's contents, i.e. + * - If one or more instances of the policy are at a state where they + * might be handling a request, i.e. + * ptlrpc_nrs_pol_state::NRS_POL_STATE_STARTED or + * ptlrpc_nrs_pol_state::NRS_POL_STATE_STOPPING as we will have to + * call into the policy's ptlrpc_nrs_pol_ops() handlers. A reference + * is taken on the module when + * \e ptlrpc_nrs_pol_desc::pd_refs becomes 1, and released when it + * becomes 0, so that we hold only one reference to the module maximum + * at any time. + * + * We do not need to hold a reference to the module, even though we + * might use code and data from the module, in the following cases: + * - During external policy registration, because this should happen in + * the module's init() function, in which case the module is safe from + * removal because a reference is being held on the module by the + * kernel, and iirc kmod (and I guess module-init-tools also) will + * serialize any racing processes properly anyway. + * - During external policy unregistration, because this should happen + * in a module's exit() function, and any attempts to start a policy + * instance would need to take a reference on the module, and this is + * not possible once we have reached the point where the exit() + * handler is called. + * - During service registration and unregistration, as service setup + * and cleanup, and policy registration, unregistration and policy + * instance starting, are serialized by \e nrs_core::nrs_mutex, so + * as long as users adhere to the convention of registering policies + * in init() and unregistering them in module exit() functions, there + * should not be a race between these operations. + * - During any policy-specific lprocfs operations, because a reference + * is held by the kernel on a proc entry that has been entered by a + * syscall, so as long as proc entries are removed during + * unregistration time, then unregistration and lprocfs operations + * will be properly serialized. + */ + struct module *pd_owner; + /** + * Bitmask of \e nrs_policy_flags + */ + unsigned int pd_flags; + /** + * # of references on this descriptor + */ + atomic_t pd_refs; +}; + +/** + * NRS policy state + * + * Policies transition from one state to the other during their lifetime + */ +enum ptlrpc_nrs_pol_state { + /** + * Not a valid policy state. + */ + NRS_POL_STATE_INVALID, + /** + * Policies are at this state either at the start of their life, or + * transition here when the user selects a different policy to act + * as the primary one. + */ + NRS_POL_STATE_STOPPED, + /** + * Policy is progress of stopping + */ + NRS_POL_STATE_STOPPING, + /** + * Policy is in progress of starting + */ + NRS_POL_STATE_STARTING, + /** + * A policy is in this state in two cases: + * - it is the fallback policy, which is always in this state. + * - it has been activated by the user; i.e. it is the primary policy, + */ + NRS_POL_STATE_STARTED, +}; + +/** + * NRS policy information + * + * Used for obtaining information for the status of a policy via lprocfs + */ +struct ptlrpc_nrs_pol_info { + /** + * Policy name + */ + char pi_name[NRS_POL_NAME_MAX]; + /** + * Policy argument + */ + char pi_arg[NRS_POL_ARG_MAX]; + /** + * Current policy state + */ + enum ptlrpc_nrs_pol_state pi_state; + /** + * # RPCs enqueued for later dispatching by the policy + */ + long pi_req_queued; + /** + * # RPCs started for dispatch by the policy + */ + long pi_req_started; + /** + * Is this a fallback policy? + */ + unsigned pi_fallback:1; +}; + +/** + * NRS policy + * + * There is one instance of this for each policy in each NRS head of each + * PTLRPC service partition. + */ +struct ptlrpc_nrs_policy { + /** + * Linkage into the NRS head's list of policies, + * ptlrpc_nrs:nrs_policy_list + */ + struct list_head pol_list; + /** + * Linkage into the NRS head's list of policies with enqueued + * requests ptlrpc_nrs:nrs_policy_queued + */ + struct list_head pol_list_queued; + /** + * Current state of this policy + */ + enum ptlrpc_nrs_pol_state pol_state; + /** + * Bitmask of nrs_policy_flags + */ + unsigned int pol_flags; + /** + * # RPCs enqueued for later dispatching by the policy + */ + long pol_req_queued; + /** + * # RPCs started for dispatch by the policy + */ + long pol_req_started; + /** + * Usage Reference count taken on the policy instance + */ + long pol_ref; + /** + * Human-readable policy argument + */ + char pol_arg[NRS_POL_ARG_MAX]; + /** + * The NRS head this policy has been created at + */ + struct ptlrpc_nrs *pol_nrs; + /** + * Private policy data; varies by policy type + */ + void *pol_private; + /** + * Policy descriptor for this policy instance. + */ + struct ptlrpc_nrs_pol_desc *pol_desc; +}; + +/** + * NRS resource + * + * Resources are embedded into two types of NRS entities: + * - Inside NRS policies, in the policy's private data in + * ptlrpc_nrs_policy::pol_private + * - In objects that act as prime-level scheduling entities in different NRS + * policies; e.g. on a policy that performs round robin or similar order + * scheduling across client NIDs, there would be one NRS resource per unique + * client NID. On a policy which performs round robin scheduling across + * backend filesystem objects, there would be one resource associated with + * each of the backend filesystem objects partaking in the scheduling + * performed by the policy. + * + * NRS resources share a parent-child relationship, in which resources embedded + * in policy instances are the parent entities, with all scheduling entities + * a policy schedules across being the children, thus forming a simple resource + * hierarchy. This hierarchy may be extended with one or more levels in the + * future if the ability to have more than one primary policy is added. + * + * Upon request initialization, references to the then active NRS policies are + * taken and used to later handle the dispatching of the request with one of + * these policies. + * + * \see nrs_resource_get_safe() + * \see ptlrpc_nrs_req_add() + */ +struct ptlrpc_nrs_resource { + /** + * This NRS resource's parent; is NULL for resources embedded in NRS + * policy instances; i.e. those are top-level ones. + */ + struct ptlrpc_nrs_resource *res_parent; + /** + * The policy associated with this resource. + */ + struct ptlrpc_nrs_policy *res_policy; +}; + +enum { + NRS_RES_FALLBACK, + NRS_RES_PRIMARY, + NRS_RES_MAX +}; + +#include "lustre_nrs_fifo.h" + +/** + * NRS request + * + * Instances of this object exist embedded within ptlrpc_request; the main + * purpose of this object is to hold references to the request's resources + * for the lifetime of the request, and to hold properties that policies use + * use for determining the request's scheduling priority. + **/ +struct ptlrpc_nrs_request { + /** + * The request's resource hierarchy. + */ + struct ptlrpc_nrs_resource *nr_res_ptrs[NRS_RES_MAX]; + /** + * Index into ptlrpc_nrs_request::nr_res_ptrs of the resource of the + * policy that was used to enqueue the request. + * + * \see nrs_request_enqueue() + */ + unsigned int nr_res_idx; + unsigned int nr_initialized:1; + unsigned int nr_enqueued:1; + unsigned int nr_started:1; + unsigned int nr_finalized:1; + + /** + * Policy-specific fields, used for determining a request's scheduling + * priority, and other supporting functionality. + */ + union { + /** + * Fields for the FIFO policy + */ + struct nrs_fifo_req fifo; + } nr_u; + /** + * Externally-registering policies may want to use this to allocate + * their own request properties. + */ + void *ext; +}; + +/** @} nrs */ +#endif diff --git a/drivers/staging/lustre/lustre/include/lustre_nrs_fifo.h b/drivers/staging/lustre/lustre/include/lustre_nrs_fifo.h new file mode 100644 index 000000000000..3b5418eac6c4 --- /dev/null +++ b/drivers/staging/lustre/lustre/include/lustre_nrs_fifo.h @@ -0,0 +1,70 @@ +/* + * GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License version 2 for more details. + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see + * http://www.gnu.org/licenses/gpl-2.0.html + * + * GPL HEADER END + */ +/* + * Copyright (c) 2014, Intel Corporation. + * + * Copyright 2012 Xyratex Technology Limited + */ +/* + * + * Network Request Scheduler (NRS) First-in First-out (FIFO) policy + * + */ + +#ifndef _LUSTRE_NRS_FIFO_H +#define _LUSTRE_NRS_FIFO_H + +/* \name fifo + * + * FIFO policy + * + * This policy is a logical wrapper around previous, non-NRS functionality. + * It dispatches RPCs in the same order as they arrive from the network. This + * policy is currently used as the fallback policy, and the only enabled policy + * on all NRS heads of all PTLRPC service partitions. + * @{ + */ + +/** + * Private data structure for the FIFO policy + */ +struct nrs_fifo_head { + /** + * Resource object for policy instance. + */ + struct ptlrpc_nrs_resource fh_res; + /** + * List of queued requests. + */ + struct list_head fh_list; + /** + * For debugging purposes. + */ + __u64 fh_sequence; +}; + +struct nrs_fifo_req { + struct list_head fr_list; + __u64 fr_sequence; +}; + +/** @} fifo */ +#endif diff --git a/drivers/staging/lustre/lustre/include/lustre_req_layout.h b/drivers/staging/lustre/lustre/include/lustre_req_layout.h index a13558e53274..fbcd39572cd0 100644 --- a/drivers/staging/lustre/lustre/include/lustre_req_layout.h +++ b/drivers/staging/lustre/lustre/include/lustre_req_layout.h @@ -148,13 +148,12 @@ extern struct req_format RQF_MDS_GETATTR; */ extern struct req_format RQF_MDS_GETATTR_NAME; extern struct req_format RQF_MDS_CLOSE; -extern struct req_format RQF_MDS_RELEASE_CLOSE; +extern struct req_format RQF_MDS_INTENT_CLOSE; extern struct req_format RQF_MDS_CONNECT; extern struct req_format RQF_MDS_DISCONNECT; extern struct req_format RQF_MDS_GET_INFO; extern struct req_format RQF_MDS_READPAGE; extern struct req_format RQF_MDS_WRITEPAGE; -extern struct req_format RQF_MDS_DONE_WRITING; extern struct req_format RQF_MDS_REINT; extern struct req_format RQF_MDS_REINT_CREATE; extern struct req_format RQF_MDS_REINT_CREATE_ACL; @@ -166,10 +165,9 @@ extern struct req_format RQF_MDS_REINT_LINK; extern struct req_format RQF_MDS_REINT_RENAME; extern struct req_format RQF_MDS_REINT_SETATTR; extern struct req_format RQF_MDS_REINT_SETXATTR; -extern struct req_format RQF_MDS_QUOTACHECK; extern struct req_format RQF_MDS_QUOTACTL; -extern struct req_format RQF_QC_CALLBACK; extern struct req_format RQF_MDS_SWAP_LAYOUTS; +extern struct req_format RQF_MDS_REINT_MIGRATE; /* MDS hsm formats */ extern struct req_format RQF_MDS_HSM_STATE_GET; extern struct req_format RQF_MDS_HSM_STATE_SET; @@ -181,7 +179,6 @@ extern struct req_format RQF_MDS_HSM_REQUEST; /* OST req_format */ extern struct req_format RQF_OST_CONNECT; extern struct req_format RQF_OST_DISCONNECT; -extern struct req_format RQF_OST_QUOTACHECK; extern struct req_format RQF_OST_QUOTACTL; extern struct req_format RQF_OST_GETATTR; extern struct req_format RQF_OST_SETATTR; diff --git a/drivers/staging/lustre/lustre/include/lustre_sec.h b/drivers/staging/lustre/lustre/include/lustre_sec.h index 90c183424802..03a970bcac55 100644 --- a/drivers/staging/lustre/lustre/include/lustre_sec.h +++ b/drivers/staging/lustre/lustre/include/lustre_sec.h @@ -50,6 +50,7 @@ struct brw_page; /* Linux specific */ struct key; struct seq_file; +struct lustre_cfg; /* * forward declaration @@ -1029,6 +1030,8 @@ int sptlrpc_target_export_check(struct obd_export *exp, /* bulk security api */ void sptlrpc_enc_pool_put_pages(struct ptlrpc_bulk_desc *desc); +int get_free_pages_in_pool(void); +int pool_is_at_full_capacity(void); int sptlrpc_cli_wrap_bulk(struct ptlrpc_request *req, struct ptlrpc_bulk_desc *desc); diff --git a/drivers/staging/lustre/lustre/include/lustre_swab.h b/drivers/staging/lustre/lustre/include/lustre_swab.h new file mode 100644 index 000000000000..26d01c2d6633 --- /dev/null +++ b/drivers/staging/lustre/lustre/include/lustre_swab.h @@ -0,0 +1,102 @@ +/* + * GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see + * http://www.gnu.org/licenses/gpl-2.0.html + * + * GPL HEADER END + */ +/* + * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. + * Use is subject to license terms. + * + * Copyright (c) 2011, 2014, Intel Corporation. + * + * Copyright 2015 Cray Inc, all rights reserved. + * Author: Ben Evans. + * + * We assume all nodes are either little-endian or big-endian, and we + * always send messages in the sender's native format. The receiver + * detects the message format by checking the 'magic' field of the message + * (see lustre_msg_swabbed() below). + * + * Each wire type has corresponding 'lustre_swab_xxxtypexxx()' routines + * are implemented in ptlrpc/lustre_swab.c. These 'swabbers' convert the + * type from "other" endian, in-place in the message buffer. + * + * A swabber takes a single pointer argument. The caller must already have + * verified that the length of the message buffer >= sizeof (type). + * + * For variable length types, a second 'lustre_swab_v_xxxtypexxx()' routine + * may be defined that swabs just the variable part, after the caller has + * verified that the message buffer is large enough. + */ + +#ifndef _LUSTRE_SWAB_H_ +#define _LUSTRE_SWAB_H_ + +#include "lustre/lustre_idl.h" + +void lustre_swab_ptlrpc_body(struct ptlrpc_body *pb); +void lustre_swab_connect(struct obd_connect_data *ocd); +void lustre_swab_hsm_user_state(struct hsm_user_state *hus); +void lustre_swab_hsm_state_set(struct hsm_state_set *hss); +void lustre_swab_obd_statfs(struct obd_statfs *os); +void lustre_swab_obd_ioobj(struct obd_ioobj *ioo); +void lustre_swab_niobuf_remote(struct niobuf_remote *nbr); +void lustre_swab_ost_lvb_v1(struct ost_lvb_v1 *lvb); +void lustre_swab_ost_lvb(struct ost_lvb *lvb); +void lustre_swab_obd_quotactl(struct obd_quotactl *q); +void lustre_swab_lquota_lvb(struct lquota_lvb *lvb); +void lustre_swab_generic_32s(__u32 *val); +void lustre_swab_mdt_body(struct mdt_body *b); +void lustre_swab_mdt_ioepoch(struct mdt_ioepoch *b); +void lustre_swab_mdt_rec_setattr(struct mdt_rec_setattr *sa); +void lustre_swab_mdt_rec_reint(struct mdt_rec_reint *rr); +void lustre_swab_lmv_desc(struct lmv_desc *ld); +void lustre_swab_lmv_mds_md(union lmv_mds_md *lmm); +void lustre_swab_lov_desc(struct lov_desc *ld); +void lustre_swab_gl_desc(union ldlm_gl_desc *desc); +void lustre_swab_ldlm_intent(struct ldlm_intent *i); +void lustre_swab_ldlm_request(struct ldlm_request *rq); +void lustre_swab_ldlm_reply(struct ldlm_reply *r); +void lustre_swab_mgs_target_info(struct mgs_target_info *oinfo); +void lustre_swab_mgs_nidtbl_entry(struct mgs_nidtbl_entry *oinfo); +void lustre_swab_mgs_config_body(struct mgs_config_body *body); +void lustre_swab_mgs_config_res(struct mgs_config_res *body); +void lustre_swab_ost_body(struct ost_body *b); +void lustre_swab_ost_last_id(__u64 *id); +void lustre_swab_fiemap(struct fiemap *fiemap); +void lustre_swab_lov_user_md_v1(struct lov_user_md_v1 *lum); +void lustre_swab_lov_user_md_v3(struct lov_user_md_v3 *lum); +void lustre_swab_lov_user_md_objects(struct lov_user_ost_data *lod, + int stripe_count); +void lustre_swab_lov_mds_md(struct lov_mds_md *lmm); +void lustre_swab_lustre_capa(struct lustre_capa *c); +void lustre_swab_lustre_capa_key(struct lustre_capa_key *k); +void lustre_swab_fid2path(struct getinfo_fid2path *gf); +void lustre_swab_layout_intent(struct layout_intent *li); +void lustre_swab_hsm_user_state(struct hsm_user_state *hus); +void lustre_swab_hsm_current_action(struct hsm_current_action *action); +void lustre_swab_hsm_progress_kernel(struct hsm_progress_kernel *hpk); +void lustre_swab_hsm_user_state(struct hsm_user_state *hus); +void lustre_swab_hsm_user_item(struct hsm_user_item *hui); +void lustre_swab_hsm_request(struct hsm_request *hr); +void lustre_swab_swap_layouts(struct mdc_swap_layouts *msl); +void lustre_swab_close_data(struct close_data *data); +void lustre_swab_lmv_user_md(struct lmv_user_md *lum); + +#endif diff --git a/drivers/staging/lustre/lustre/include/obd.h b/drivers/staging/lustre/lustre/include/obd.h index f6fc4dd05bd6..0f48e9c3d9e3 100644 --- a/drivers/staging/lustre/lustre/include/obd.h +++ b/drivers/staging/lustre/lustre/include/obd.h @@ -73,70 +73,17 @@ static inline void loi_init(struct lov_oinfo *loi) { } -/* - * If we are unable to get the maximum object size from the OST in - * ocd_maxbytes using OBD_CONNECT_MAXBYTES, then we fall back to using - * the old maximum object size from ext3. - */ -#define LUSTRE_EXT3_STRIPE_MAXBYTES 0x1fffffff000ULL - -struct lov_stripe_md { - atomic_t lsm_refc; - spinlock_t lsm_lock; - pid_t lsm_lock_owner; /* debugging */ - - /* maximum possible file size, might change as OSTs status changes, - * e.g. disconnected, deactivated - */ - __u64 lsm_maxbytes; - struct ost_id lsm_oi; - __u32 lsm_magic; - __u32 lsm_stripe_size; - __u32 lsm_pattern; /* striping pattern (RAID0, RAID1) */ - __u16 lsm_stripe_count; - __u16 lsm_layout_gen; - char lsm_pool_name[LOV_MAXPOOLNAME + 1]; - struct lov_oinfo *lsm_oinfo[0]; -}; - -static inline bool lsm_is_released(struct lov_stripe_md *lsm) -{ - return !!(lsm->lsm_pattern & LOV_PATTERN_F_RELEASED); -} - -static inline bool lsm_has_objects(struct lov_stripe_md *lsm) -{ - if (!lsm) - return false; - if (lsm_is_released(lsm)) - return false; - return true; -} - -static inline int lov_stripe_md_size(unsigned int stripe_count) -{ - struct lov_stripe_md lsm; - - return sizeof(lsm) + stripe_count * sizeof(lsm.lsm_oinfo[0]); -} - +struct lov_stripe_md; struct obd_info; typedef int (*obd_enqueue_update_f)(void *cookie, int rc); /* obd info for a particular level (lov, osc). */ struct obd_info { - /* Flags used for set request specific flags: - - while lock handling, the flags obtained on the enqueue - request are set here. - - while stats, the flags used for control delay/resend. - - while setattr, the flags used for distinguish punch operation - */ + /* OBD_STATFS_* flags */ __u64 oi_flags; /* lsm data specific for every OSC. */ struct lov_stripe_md *oi_md; - /* obdo data specific for every OSC, if needed at all. */ - struct obdo *oi_oa; /* statfs data specific for every OSC, if needed at all. */ struct obd_statfs *oi_osfs; /* An update callback which is called to update some data on upper @@ -204,7 +151,6 @@ enum obd_cl_sem_lock_class { * on the MDS. */ #define OBD_MAX_DEFAULT_EA_SIZE 4096 -#define OBD_MAX_DEFAULT_COOKIE_SIZE 4096 struct mdc_rpc_lock; struct obd_import; @@ -214,7 +160,7 @@ struct client_obd { struct obd_import *cl_import; /* ptlrpc connection state */ size_t cl_conn_count; /* - * Cache maximum and default values for easize and cookiesize. This is + * Cache maximum and default values for easize. This is * strictly a performance optimization to minimize calls to * obd_size_diskmd(). The default values are used to calculate the * initial size of a request buffer. The ptlrpc layer will resize the @@ -235,18 +181,6 @@ struct client_obd { * run-time if a larger observed size is advertised by the MDT. */ u32 cl_max_mds_easize; - /* Default cookie size for llog cookies (see struct llog_cookie). It is - * initialized to zero at mount-time, then it tracks the largest - * observed cookie size advertised by the MDT, up to a maximum value of - * OBD_MAX_DEFAULT_COOKIE_SIZE. Note that llog_cookies are not - * used by clients communicating with MDS versions 2.4.0 and later. - */ - u32 cl_default_mds_cookiesize; - /* Maximum possible cookie size computed at mount-time based on - * the number of OSTs in the filesystem. May be increased at - * run-time if a larger observed size is advertised by the MDT. - */ - u32 cl_max_mds_cookiesize; enum lustre_sec_part cl_sp_me; enum lustre_sec_part cl_sp_to; @@ -313,15 +247,42 @@ struct client_obd { struct obd_histogram cl_read_offset_hist; struct obd_histogram cl_write_offset_hist; - /* lru for osc caching pages */ + /* LRU for osc caching pages */ struct cl_client_cache *cl_cache; - struct list_head cl_lru_osc; /* member of cl_cache->ccc_lru */ + /** member of cl_cache->ccc_lru */ + struct list_head cl_lru_osc; + /** # of available LRU slots left in the per-OSC cache. + * Available LRU slots are shared by all OSCs of the same file system, + * therefore this is a pointer to cl_client_cache::ccc_lru_left. + */ atomic_long_t *cl_lru_left; + /** # of busy LRU pages. A page is considered busy if it's in writeback + * queue, or in transfer. Busy pages can't be discarded so they are not + * in LRU cache. + */ atomic_long_t cl_lru_busy; + /** # of LRU pages in the cache for this client_obd */ atomic_long_t cl_lru_in_list; + /** # of threads are shrinking LRU cache. To avoid contention, it's not + * allowed to have multiple threads shrinking LRU cache. + */ atomic_t cl_lru_shrinkers; - struct list_head cl_lru_list; /* lru page list */ - spinlock_t cl_lru_list_lock; /* page list protector */ + /** The time when this LRU cache was last used. */ + time64_t cl_lru_last_used; + /** stats: how many reclaims have happened for this client_obd. + * reclaim and shrink - shrink is async, voluntarily rebalancing; + * reclaim is sync, initiated by IO thread when the LRU slots are + * in shortage. + */ + u64 cl_lru_reclaim; + /** List of LRU pages for this client_obd */ + struct list_head cl_lru_list; + /** Lock for LRU page list */ + spinlock_t cl_lru_list_lock; + /** # of unstable pages in this client_obd. + * An unstable page is a page state that WRITE RPC has finished but + * the transaction has NOT yet committed. + */ atomic_long_t cl_unstable_count; /* number of in flight destroy rpcs is limited to max_rpcs_in_flight */ @@ -329,7 +290,17 @@ struct client_obd { wait_queue_head_t cl_destroy_waitq; struct mdc_rpc_lock *cl_rpc_lock; - struct mdc_rpc_lock *cl_close_lock; + + /* modify rpcs in flight + * currently used for metadata only + */ + spinlock_t cl_mod_rpcs_lock; + u16 cl_max_mod_rpcs_in_flight; + u16 cl_mod_rpcs_in_flight; + u16 cl_close_rpcs_in_flight; + wait_queue_head_t cl_mod_rpcs_waitq; + unsigned long *cl_mod_tag_bitmap; + struct obd_histogram cl_mod_rpcs_hist; /* mgc datastruct */ atomic_t cl_mgc_refcount; @@ -345,13 +316,6 @@ struct client_obd { /* also protected by the poorly named _loi_list_lock lock above */ struct osc_async_rc cl_ar; - /* used by quotacheck when the servers are older than 2.4 */ - int cl_qchk_stat; /* quotacheck stat of the peer */ -#define CL_NOT_QUOTACHECKED 1 /* client->cl_qchk_stat init value */ -#if OBD_OCD_VERSION(2, 7, 53, 0) < LUSTRE_VERSION_CODE -#warning "please consider removing quotacheck compatibility code" -#endif - /* sequence manager */ struct lu_client_seq *cl_seq; @@ -454,8 +418,6 @@ struct lmv_obd { int connected; int max_easize; int max_def_easize; - int max_cookiesize; - int max_def_cookiesize; u32 tgts_size; /* size of tgts array */ struct lmv_tgt_desc **tgts; @@ -469,9 +431,9 @@ struct niobuf_local { __u32 lnb_page_offset; __u32 lnb_len; __u32 lnb_flags; + int lnb_rc; struct page *lnb_page; void *lnb_data; - int lnb_rc; }; #define LUSTRE_FLD_NAME "fld" @@ -512,21 +474,6 @@ struct niobuf_local { /* Don't conflict with on-wire flags OBD_BRW_WRITE, etc */ #define N_LOCAL_TEMP_PAGE 0x10000000 -struct obd_trans_info { - __u64 oti_xid; - /* Only used on the server side for tracking acks. */ - struct oti_req_ack_lock { - struct lustre_handle lock; - __u32 mode; - } oti_ack_locks[4]; - void *oti_handle; - struct llog_cookie oti_onecookie; - struct llog_cookie *oti_logcookies; - - /** VBR: versions */ - __u64 oti_pre_version; -}; - /* * Events signalled through obd_notify() upcall-chain. */ @@ -587,15 +534,14 @@ struct lvfs_run_ctxt { struct obd_device { struct obd_type *obd_type; - __u32 obd_magic; + u32 obd_magic; /* OBD_DEVICE_MAGIC */ + int obd_minor; /* device number: lctl dl */ + struct lu_device *obd_lu_dev; /* common and UUID name of this device */ - char obd_name[MAX_OBD_NAME]; - struct obd_uuid obd_uuid; - - struct lu_device *obd_lu_dev; + struct obd_uuid obd_uuid; + char obd_name[MAX_OBD_NAME]; - int obd_minor; /* bitfield modification is protected by obd_dev_lock */ unsigned long obd_attached:1, /* finished attach */ obd_set_up:1, /* finished setup */ @@ -619,22 +565,22 @@ struct obd_device { unsigned long obd_recovery_expired:1; /* uuid-export hash body */ struct cfs_hash *obd_uuid_hash; - atomic_t obd_refcount; wait_queue_head_t obd_refcount_waitq; struct list_head obd_exports; struct list_head obd_unlinked_exports; struct list_head obd_delayed_exports; + atomic_t obd_refcount; int obd_num_exports; spinlock_t obd_nid_lock; struct ldlm_namespace *obd_namespace; struct ptlrpc_client obd_ldlm_client; /* XXX OST/MDS only */ /* a spinlock is OK for what we do now, may need a semaphore later */ spinlock_t obd_dev_lock; /* protect OBD bitfield above */ - struct mutex obd_dev_mutex; - __u64 obd_last_committed; spinlock_t obd_osfs_lock; struct obd_statfs obd_osfs; /* locked by obd_osfs_lock */ __u64 obd_osfs_age; + u64 obd_last_committed; + struct mutex obd_dev_mutex; struct lvfs_run_ctxt obd_lvfs_ctxt; struct obd_llog_group obd_olg; /* default llog group */ struct obd_device *obd_observer; @@ -648,12 +594,13 @@ struct obd_device { struct lov_obd lov; struct lmv_obd lmv; } u; + /* Fields used by LProcFS */ - unsigned int obd_cntr_base; - struct lprocfs_stats *obd_stats; + struct lprocfs_stats *obd_stats; + unsigned int obd_cntr_base; - unsigned int md_cntr_base; - struct lprocfs_stats *md_stats; + struct lprocfs_stats *md_stats; + unsigned int md_cntr_base; struct dentry *obd_debugfs_entry; struct dentry *obd_svc_debugfs_entry; @@ -665,9 +612,11 @@ struct obd_device { /** * Ldlm pool part. Save last calculated SLV and Limit. */ - rwlock_t obd_pool_lock; - int obd_pool_limit; - __u64 obd_pool_slv; + rwlock_t obd_pool_lock; + u64 obd_pool_slv; + int obd_pool_limit; + + int obd_conn_inprogress; /** * A list of outstanding class_incref()'s against this obd. For @@ -675,19 +624,10 @@ struct obd_device { */ struct lu_ref obd_reference; - int obd_conn_inprogress; - struct kobject obd_kobj; /* sysfs object */ struct completion obd_kobj_unregister; }; -enum obd_cleanup_stage { -/* Special case hack for MDS LOVs */ - OBD_CLEANUP_EARLY, -/* can be directly mapped to .ldto_device_fini() */ - OBD_CLEANUP_EXPORTS, -}; - /* get/set_info keys */ #define KEY_ASYNC "async" #define KEY_CHANGELOG_CLEAR "changelog_clear" @@ -704,7 +644,6 @@ enum obd_cleanup_stage { #define KEY_INTERMDS "inter_mds" #define KEY_LAST_ID "last_id" #define KEY_LAST_FID "last_fid" -#define KEY_LOVDESC "lovdesc" #define KEY_MAX_EASIZE "max_easize" #define KEY_DEFAULT_EASIZE "default_easize" #define KEY_MGSSEC "mgssec" @@ -720,22 +659,6 @@ enum obd_cleanup_stage { struct lu_context; -/* /!\ must be coherent with include/linux/namei.h on patched kernel */ -#define IT_OPEN (1 << 0) -#define IT_CREAT (1 << 1) -#define IT_READDIR (1 << 2) -#define IT_GETATTR (1 << 3) -#define IT_LOOKUP (1 << 4) -#define IT_UNLINK (1 << 5) -#define IT_TRUNC (1 << 6) -#define IT_GETXATTR (1 << 7) -#define IT_EXEC (1 << 8) -#define IT_PIN (1 << 9) -#define IT_LAYOUT (1 << 10) -#define IT_QUOTA_DQACQ (1 << 11) -#define IT_QUOTA_CONN (1 << 12) -#define IT_SETXATTR (1 << 13) - static inline int it_to_lock_mode(struct lookup_intent *it) { /* CREAT needs to be tested before open (both could be set) */ @@ -755,6 +678,14 @@ static inline int it_to_lock_mode(struct lookup_intent *it) return -EINVAL; } +enum md_op_flags { + MF_MDC_CANCEL_FID1 = BIT(0), + MF_MDC_CANCEL_FID2 = BIT(1), + MF_MDC_CANCEL_FID3 = BIT(2), + MF_MDC_CANCEL_FID4 = BIT(3), + MF_GET_MDT_IDX = BIT(4), +}; + enum md_cli_flags { CLI_SET_MEA = BIT(0), CLI_RM_ENTRY = BIT(1), @@ -789,8 +720,6 @@ struct md_op_data { __u64 op_valid; loff_t op_attr_blocks; - /* Size-on-MDS epoch and flags. */ - __u64 op_ioepoch; __u32 op_flags; /* Various operation flags. */ @@ -839,15 +768,13 @@ struct obd_ops { int (*iocontrol)(unsigned int cmd, struct obd_export *exp, int len, void *karg, void __user *uarg); int (*get_info)(const struct lu_env *env, struct obd_export *, - __u32 keylen, void *key, __u32 *vallen, void *val, - struct lov_stripe_md *lsm); + __u32 keylen, void *key, __u32 *vallen, void *val); int (*set_info_async)(const struct lu_env *, struct obd_export *, __u32 keylen, void *key, __u32 vallen, void *val, struct ptlrpc_request_set *set); int (*setup)(struct obd_device *dev, struct lustre_cfg *cfg); - int (*precleanup)(struct obd_device *dev, - enum obd_cleanup_stage cleanup_stage); + int (*precleanup)(struct obd_device *dev); int (*cleanup)(struct obd_device *dev); int (*process_config)(struct obd_device *dev, u32 len, void *data); int (*postrecov)(struct obd_device *dev); @@ -887,35 +814,23 @@ struct obd_ops { struct obd_statfs *osfs, __u64 max_age, __u32 flags); int (*statfs_async)(struct obd_export *exp, struct obd_info *oinfo, __u64 max_age, struct ptlrpc_request_set *set); - int (*packmd)(struct obd_export *exp, struct lov_mds_md **disk_tgt, - struct lov_stripe_md *mem_src); - int (*unpackmd)(struct obd_export *exp, - struct lov_stripe_md **mem_tgt, - struct lov_mds_md *disk_src, int disk_len); int (*create)(const struct lu_env *env, struct obd_export *exp, - struct obdo *oa, struct obd_trans_info *oti); + struct obdo *oa); int (*destroy)(const struct lu_env *env, struct obd_export *exp, - struct obdo *oa, struct obd_trans_info *oti); + struct obdo *oa); int (*setattr)(const struct lu_env *, struct obd_export *exp, - struct obd_info *oinfo, struct obd_trans_info *oti); - int (*setattr_async)(struct obd_export *exp, struct obd_info *oinfo, - struct obd_trans_info *oti, - struct ptlrpc_request_set *rqset); + struct obdo *oa); int (*getattr)(const struct lu_env *env, struct obd_export *exp, - struct obd_info *oinfo); - int (*getattr_async)(struct obd_export *exp, struct obd_info *oinfo, - struct ptlrpc_request_set *set); + struct obdo *oa); int (*preprw)(const struct lu_env *env, int cmd, struct obd_export *exp, struct obdo *oa, int objcount, struct obd_ioobj *obj, struct niobuf_remote *remote, - int *nr_pages, struct niobuf_local *local, - struct obd_trans_info *oti); + int *nr_pages, struct niobuf_local *local); int (*commitrw)(const struct lu_env *env, int cmd, struct obd_export *exp, struct obdo *oa, int objcount, struct obd_ioobj *obj, struct niobuf_remote *remote, int pages, - struct niobuf_local *local, - struct obd_trans_info *oti, int rc); + struct niobuf_local *local, int rc); int (*init_export)(struct obd_export *exp); int (*destroy_export)(struct obd_export *exp); @@ -930,8 +845,6 @@ struct obd_ops { struct obd_uuid *(*get_uuid)(struct obd_export *exp); /* quota methods */ - int (*quotacheck)(struct obd_device *, struct obd_export *, - struct obd_quotactl *); int (*quotactl)(struct obd_device *, struct obd_export *, struct obd_quotactl *); @@ -954,7 +867,7 @@ struct obd_ops { /* lmv structures */ struct lustre_md { struct mdt_body *body; - struct lov_stripe_md *lsm; + struct lu_buf layout; struct lmv_stripe_md *lmv; #ifdef CONFIG_FS_POSIX_ACL struct posix_acl *posix_acl; @@ -992,10 +905,8 @@ struct md_ops { int (*create)(struct obd_export *, struct md_op_data *, const void *, size_t, umode_t, uid_t, gid_t, cfs_cap_t, __u64, struct ptlrpc_request **); - int (*done_writing)(struct obd_export *, struct md_op_data *, - struct md_open_data *); int (*enqueue)(struct obd_export *, struct ldlm_enqueue_info *, - const ldlm_policy_data_t *, + const union ldlm_policy_data *, struct lookup_intent *, struct md_op_data *, struct lustre_handle *, __u64); int (*getattr)(struct obd_export *, struct md_op_data *, @@ -1012,8 +923,7 @@ struct md_ops { const char *, size_t, const char *, size_t, struct ptlrpc_request **); int (*setattr)(struct obd_export *, struct md_op_data *, void *, - size_t, void *, size_t, struct ptlrpc_request **, - struct md_open_data **mod); + size_t, struct ptlrpc_request **); int (*sync)(struct obd_export *, const struct lu_fid *, struct ptlrpc_request **); int (*read_page)(struct obd_export *, struct md_op_data *, @@ -1030,7 +940,7 @@ struct md_ops { u64, const char *, const char *, int, int, int, struct ptlrpc_request **); - int (*init_ea_size)(struct obd_export *, u32, u32, u32, u32); + int (*init_ea_size)(struct obd_export *, u32, u32); int (*get_lustre_md)(struct obd_export *, struct ptlrpc_request *, struct obd_export *, struct obd_export *, @@ -1052,11 +962,11 @@ struct md_ops { enum ldlm_mode (*lock_match)(struct obd_export *, __u64, const struct lu_fid *, enum ldlm_type, - ldlm_policy_data_t *, enum ldlm_mode, + union ldlm_policy_data *, enum ldlm_mode, struct lustre_handle *); int (*cancel_unused)(struct obd_export *, const struct lu_fid *, - ldlm_policy_data_t *, enum ldlm_mode, + union ldlm_policy_data *, enum ldlm_mode, enum ldlm_cancel_flags flags, void *opaque); int (*get_fid_from_lsm)(struct obd_export *, @@ -1071,6 +981,8 @@ struct md_ops { int (*revalidate_lock)(struct obd_export *, struct lookup_intent *, struct lu_fid *, __u64 *bits); + int (*unpackmd)(struct obd_export *exp, struct lmv_stripe_md **plsm, + const union lmv_mds_md *lmv, size_t lmv_size); /* * NOTE: If adding ops, add another LPROCFS_MD_OP_INIT() line to * lprocfs_alloc_md_stats() in obdclass/lprocfs_status.c. Also, add a @@ -1078,33 +990,6 @@ struct md_ops { */ }; -struct lsm_operations { - void (*lsm_free)(struct lov_stripe_md *); - void (*lsm_stripe_by_index)(struct lov_stripe_md *, int *, u64 *, - u64 *); - void (*lsm_stripe_by_offset)(struct lov_stripe_md *, int *, u64 *, - u64 *); - int (*lsm_lmm_verify)(struct lov_mds_md *lmm, int lmm_bytes, - __u16 *stripe_count); - int (*lsm_unpackmd)(struct lov_obd *lov, struct lov_stripe_md *lsm, - struct lov_mds_md *lmm); -}; - -extern const struct lsm_operations lsm_v1_ops; -extern const struct lsm_operations lsm_v3_ops; -static inline const struct lsm_operations *lsm_op_find(int magic) -{ - switch (magic) { - case LOV_MAGIC_V1: - return &lsm_v1_ops; - case LOV_MAGIC_V3: - return &lsm_v3_ops; - default: - CERROR("Cannot recognize lsm_magic %08x\n", magic); - return NULL; - } -} - static inline struct md_open_data *obd_mod_alloc(void) { struct md_open_data *mod; diff --git a/drivers/staging/lustre/lustre/include/obd_class.h b/drivers/staging/lustre/lustre/include/obd_class.h index 16094dbec08b..7ec25202cd22 100644 --- a/drivers/staging/lustre/lustre/include/obd_class.h +++ b/drivers/staging/lustre/lustre/include/obd_class.h @@ -100,6 +100,13 @@ int obd_get_request_slot(struct client_obd *cli); void obd_put_request_slot(struct client_obd *cli); __u32 obd_get_max_rpcs_in_flight(struct client_obd *cli); int obd_set_max_rpcs_in_flight(struct client_obd *cli, __u32 max); +int obd_set_max_mod_rpcs_in_flight(struct client_obd *cli, u16 max); +int obd_mod_rpc_stats_seq_show(struct client_obd *cli, struct seq_file *seq); + +u16 obd_get_mod_rpc_slot(struct client_obd *cli, u32 opc, + struct lookup_intent *it); +void obd_put_mod_rpc_slot(struct client_obd *cli, u32 opc, + struct lookup_intent *it, u16 tag); struct llog_handle; struct llog_rec_hdr; @@ -175,10 +182,13 @@ struct lustre_profile { char *lp_profile; char *lp_dt; char *lp_md; + int lp_refs; + bool lp_list_deleted; }; struct lustre_profile *class_get_profile(const char *prof); void class_del_profile(const char *prof); +void class_put_profile(struct lustre_profile *lprof); void class_del_profiles(void); #if LUSTRE_TRACKS_LOCK_EXP_REFS @@ -269,10 +279,8 @@ static inline int lprocfs_climp_check(struct obd_device *obd) struct inode; struct lu_attr; struct obdo; -void obdo_refresh_inode(struct inode *dst, const struct obdo *src, u32 valid); void obdo_to_ioobj(const struct obdo *oa, struct obd_ioobj *ioobj); -void md_from_obdo(struct md_op_data *op_data, const struct obdo *oa, u32 valid); #define OBT(dev) (dev)->obd_type #define OBP(dev, op) (dev)->obd_type->typ_dt_ops->op @@ -417,16 +425,14 @@ static inline int class_devno_max(void) static inline int obd_get_info(const struct lu_env *env, struct obd_export *exp, __u32 keylen, - void *key, __u32 *vallen, void *val, - struct lov_stripe_md *lsm) + void *key, __u32 *vallen, void *val) { int rc; EXP_CHECK_DT_OP(exp, get_info); EXP_COUNTER_INCREMENT(exp, get_info); - rc = OBP(exp->exp_obd, get_info)(env, exp, keylen, key, vallen, val, - lsm); + rc = OBP(exp->exp_obd, get_info)(env, exp, keylen, key, vallen, val); return rc; } @@ -505,8 +511,7 @@ static inline int obd_setup(struct obd_device *obd, struct lustre_cfg *cfg) return rc; } -static inline int obd_precleanup(struct obd_device *obd, - enum obd_cleanup_stage cleanup_stage) +static inline int obd_precleanup(struct obd_device *obd) { int rc; DECLARE_LU_VARS(ldt, d); @@ -517,20 +522,18 @@ static inline int obd_precleanup(struct obd_device *obd, ldt = obd->obd_type->typ_lu; d = obd->obd_lu_dev; if (ldt && d) { - if (cleanup_stage == OBD_CLEANUP_EXPORTS) { - struct lu_env env; + struct lu_env env; - rc = lu_env_init(&env, ldt->ldt_ctx_tags); - if (rc == 0) { - ldt->ldt_ops->ldto_device_fini(&env, d); - lu_env_fini(&env); - } + rc = lu_env_init(&env, ldt->ldt_ctx_tags); + if (!rc) { + ldt->ldt_ops->ldto_device_fini(&env, d); + lu_env_fini(&env); } } OBD_CHECK_DT_OP(obd, precleanup, 0); OBD_COUNTER_INCREMENT(obd, precleanup); - rc = OBP(obd, precleanup)(obd, cleanup_stage); + rc = OBP(obd, precleanup)(obd); return rc; } @@ -612,181 +615,51 @@ obd_process_config(struct obd_device *obd, int datalen, void *data) return rc; } -/* Pack an in-memory MD struct for storage on disk. - * Returns +ve size of packed MD (0 for free), or -ve error. - * - * If @disk_tgt == NULL, MD size is returned (max size if @mem_src == NULL). - * If @*disk_tgt != NULL and @mem_src == NULL, @*disk_tgt will be freed. - * If @*disk_tgt == NULL, it will be allocated - */ -static inline int obd_packmd(struct obd_export *exp, - struct lov_mds_md **disk_tgt, - struct lov_stripe_md *mem_src) -{ - int rc; - - EXP_CHECK_DT_OP(exp, packmd); - EXP_COUNTER_INCREMENT(exp, packmd); - - rc = OBP(exp->exp_obd, packmd)(exp, disk_tgt, mem_src); - return rc; -} - -static inline int obd_size_diskmd(struct obd_export *exp, - struct lov_stripe_md *mem_src) -{ - return obd_packmd(exp, NULL, mem_src); -} - -static inline int obd_free_diskmd(struct obd_export *exp, - struct lov_mds_md **disk_tgt) -{ - LASSERT(disk_tgt); - LASSERT(*disk_tgt); - /* - * LU-2590, for caller's convenience, *disk_tgt could be host - * endianness, it needs swab to LE if necessary, while just - * lov_mds_md header needs it for figuring out how much memory - * needs to be freed. - */ - if ((cpu_to_le32(LOV_MAGIC) != LOV_MAGIC) && - (((*disk_tgt)->lmm_magic == LOV_MAGIC_V1) || - ((*disk_tgt)->lmm_magic == LOV_MAGIC_V3))) - lustre_swab_lov_mds_md(*disk_tgt); - return obd_packmd(exp, disk_tgt, NULL); -} - -/* Unpack an MD struct from disk to in-memory format. - * Returns +ve size of unpacked MD (0 for free), or -ve error. - * - * If @mem_tgt == NULL, MD size is returned (max size if @disk_src == NULL). - * If @*mem_tgt != NULL and @disk_src == NULL, @*mem_tgt will be freed. - * If @*mem_tgt == NULL, it will be allocated - */ -static inline int obd_unpackmd(struct obd_export *exp, - struct lov_stripe_md **mem_tgt, - struct lov_mds_md *disk_src, - int disk_len) -{ - int rc; - - EXP_CHECK_DT_OP(exp, unpackmd); - EXP_COUNTER_INCREMENT(exp, unpackmd); - - rc = OBP(exp->exp_obd, unpackmd)(exp, mem_tgt, disk_src, disk_len); - return rc; -} - -static inline int obd_free_memmd(struct obd_export *exp, - struct lov_stripe_md **mem_tgt) -{ - int rc; - - LASSERT(mem_tgt); - LASSERT(*mem_tgt); - rc = obd_unpackmd(exp, mem_tgt, NULL, 0); - *mem_tgt = NULL; - return rc; -} - static inline int obd_create(const struct lu_env *env, struct obd_export *exp, - struct obdo *obdo, struct obd_trans_info *oti) + struct obdo *obdo) { int rc; EXP_CHECK_DT_OP(exp, create); EXP_COUNTER_INCREMENT(exp, create); - rc = OBP(exp->exp_obd, create)(env, exp, obdo, oti); + rc = OBP(exp->exp_obd, create)(env, exp, obdo); return rc; } static inline int obd_destroy(const struct lu_env *env, struct obd_export *exp, - struct obdo *obdo, struct obd_trans_info *oti) + struct obdo *obdo) { int rc; EXP_CHECK_DT_OP(exp, destroy); EXP_COUNTER_INCREMENT(exp, destroy); - rc = OBP(exp->exp_obd, destroy)(env, exp, obdo, oti); + rc = OBP(exp->exp_obd, destroy)(env, exp, obdo); return rc; } static inline int obd_getattr(const struct lu_env *env, struct obd_export *exp, - struct obd_info *oinfo) + struct obdo *oa) { int rc; EXP_CHECK_DT_OP(exp, getattr); EXP_COUNTER_INCREMENT(exp, getattr); - rc = OBP(exp->exp_obd, getattr)(env, exp, oinfo); - return rc; -} - -static inline int obd_getattr_async(struct obd_export *exp, - struct obd_info *oinfo, - struct ptlrpc_request_set *set) -{ - int rc; - - EXP_CHECK_DT_OP(exp, getattr_async); - EXP_COUNTER_INCREMENT(exp, getattr_async); - - rc = OBP(exp->exp_obd, getattr_async)(exp, oinfo, set); + rc = OBP(exp->exp_obd, getattr)(env, exp, oa); return rc; } static inline int obd_setattr(const struct lu_env *env, struct obd_export *exp, - struct obd_info *oinfo, - struct obd_trans_info *oti) + struct obdo *oa) { int rc; EXP_CHECK_DT_OP(exp, setattr); EXP_COUNTER_INCREMENT(exp, setattr); - rc = OBP(exp->exp_obd, setattr)(env, exp, oinfo, oti); - return rc; -} - -/* This performs all the requests set init/wait/destroy actions. */ -static inline int obd_setattr_rqset(struct obd_export *exp, - struct obd_info *oinfo, - struct obd_trans_info *oti) -{ - struct ptlrpc_request_set *set = NULL; - int rc; - - EXP_CHECK_DT_OP(exp, setattr_async); - EXP_COUNTER_INCREMENT(exp, setattr_async); - - set = ptlrpc_prep_set(); - if (!set) - return -ENOMEM; - - rc = OBP(exp->exp_obd, setattr_async)(exp, oinfo, oti, set); - if (rc == 0) - rc = ptlrpc_set_wait(set); - ptlrpc_set_destroy(set); - return rc; -} - -/* This adds all the requests into @set if @set != NULL, otherwise - * all requests are sent asynchronously without waiting for response. - */ -static inline int obd_setattr_async(struct obd_export *exp, - struct obd_info *oinfo, - struct obd_trans_info *oti, - struct ptlrpc_request_set *set) -{ - int rc; - - EXP_CHECK_DT_OP(exp, setattr_async); - EXP_COUNTER_INCREMENT(exp, setattr_async); - - rc = OBP(exp->exp_obd, setattr_async)(exp, oinfo, oti, set); + rc = OBP(exp->exp_obd, setattr)(env, exp, oa); return rc; } @@ -1053,15 +926,16 @@ static inline int obd_statfs_rqset(struct obd_export *exp, __u32 flags) { struct ptlrpc_request_set *set = NULL; - struct obd_info oinfo = { }; + struct obd_info oinfo = { + .oi_osfs = osfs, + .oi_flags = flags, + }; int rc = 0; - set = ptlrpc_prep_set(); + set = ptlrpc_prep_set(); if (!set) return -ENOMEM; - oinfo.oi_osfs = osfs; - oinfo.oi_flags = flags; rc = obd_statfs_async(exp, &oinfo, max_age, set); if (rc == 0) rc = ptlrpc_set_wait(set); @@ -1112,8 +986,7 @@ static inline int obd_preprw(const struct lu_env *env, int cmd, struct obd_export *exp, struct obdo *oa, int objcount, struct obd_ioobj *obj, struct niobuf_remote *remote, int *pages, - struct niobuf_local *local, - struct obd_trans_info *oti) + struct niobuf_local *local) { int rc; @@ -1121,7 +994,7 @@ static inline int obd_preprw(const struct lu_env *env, int cmd, EXP_COUNTER_INCREMENT(exp, preprw); rc = OBP(exp->exp_obd, preprw)(env, cmd, exp, oa, objcount, obj, remote, - pages, local, oti); + pages, local); return rc; } @@ -1129,14 +1002,13 @@ static inline int obd_commitrw(const struct lu_env *env, int cmd, struct obd_export *exp, struct obdo *oa, int objcount, struct obd_ioobj *obj, struct niobuf_remote *rnb, int pages, - struct niobuf_local *local, - struct obd_trans_info *oti, int rc) + struct niobuf_local *local, int rc) { EXP_CHECK_DT_OP(exp, commitrw); EXP_COUNTER_INCREMENT(exp, commitrw); rc = OBP(exp->exp_obd, commitrw)(env, cmd, exp, oa, objcount, obj, - rnb, pages, local, oti, rc); + rnb, pages, local, rc); return rc; } @@ -1219,18 +1091,6 @@ static inline int obd_notify_observer(struct obd_device *observer, return rc1 ? rc1 : rc2; } -static inline int obd_quotacheck(struct obd_export *exp, - struct obd_quotactl *oqctl) -{ - int rc; - - EXP_CHECK_DT_OP(exp, quotacheck); - EXP_COUNTER_INCREMENT(exp, quotacheck); - - rc = OBP(exp->exp_obd, quotacheck)(exp->exp_obd, exp, oqctl); - return rc; -} - static inline int obd_quotactl(struct obd_export *exp, struct obd_quotactl *oqctl) { @@ -1346,21 +1206,9 @@ static inline int md_create(struct obd_export *exp, struct md_op_data *op_data, return rc; } -static inline int md_done_writing(struct obd_export *exp, - struct md_op_data *op_data, - struct md_open_data *mod) -{ - int rc; - - EXP_CHECK_MD_OP(exp, done_writing); - EXP_MD_COUNTER_INCREMENT(exp, done_writing); - rc = MDP(exp->exp_obd, done_writing)(exp, op_data, mod); - return rc; -} - static inline int md_enqueue(struct obd_export *exp, struct ldlm_enqueue_info *einfo, - const ldlm_policy_data_t *policy, + const union ldlm_policy_data *policy, struct lookup_intent *it, struct md_op_data *op_data, struct lustre_handle *lockh, @@ -1428,16 +1276,14 @@ static inline int md_rename(struct obd_export *exp, struct md_op_data *op_data, } static inline int md_setattr(struct obd_export *exp, struct md_op_data *op_data, - void *ea, size_t ealen, void *ea2, size_t ea2len, - struct ptlrpc_request **request, - struct md_open_data **mod) + void *ea, size_t ealen, + struct ptlrpc_request **request) { int rc; EXP_CHECK_MD_OP(exp, setattr); EXP_MD_COUNTER_INCREMENT(exp, setattr); - rc = MDP(exp->exp_obd, setattr)(exp, op_data, ea, ealen, - ea2, ea2len, request, mod); + rc = MDP(exp->exp_obd, setattr)(exp, op_data, ea, ealen, request); return rc; } @@ -1561,7 +1407,7 @@ static inline int md_set_lock_data(struct obd_export *exp, static inline int md_cancel_unused(struct obd_export *exp, const struct lu_fid *fid, - ldlm_policy_data_t *policy, + union ldlm_policy_data *policy, enum ldlm_mode mode, enum ldlm_cancel_flags flags, void *opaque) @@ -1579,7 +1425,7 @@ static inline int md_cancel_unused(struct obd_export *exp, static inline enum ldlm_mode md_lock_match(struct obd_export *exp, __u64 flags, const struct lu_fid *fid, enum ldlm_type type, - ldlm_policy_data_t *policy, + union ldlm_policy_data *policy, enum ldlm_mode mode, struct lustre_handle *lockh) { @@ -1589,14 +1435,12 @@ static inline enum ldlm_mode md_lock_match(struct obd_export *exp, __u64 flags, policy, mode, lockh); } -static inline int md_init_ea_size(struct obd_export *exp, int easize, - int def_asize, int cookiesize, - int def_cookiesize) +static inline int md_init_ea_size(struct obd_export *exp, u32 easize, + u32 def_asize) { EXP_CHECK_MD_OP(exp, init_ea_size); EXP_MD_COUNTER_INCREMENT(exp, init_ea_size); - return MDP(exp->exp_obd, init_ea_size)(exp, easize, def_asize, - cookiesize, def_cookiesize); + return MDP(exp->exp_obd, init_ea_size)(exp, easize, def_asize); } static inline int md_intent_getattr_async(struct obd_export *exp, @@ -1636,6 +1480,24 @@ static inline int md_get_fid_from_lsm(struct obd_export *exp, return rc; } +/* Unpack an MD struct from disk to in-memory format. + * Returns +ve size of unpacked MD (0 for free), or -ve error. + * + * If *plsm != NULL and lmm == NULL then *lsm will be freed. + * If *plsm == NULL then it will be allocated. + */ +static inline int md_unpackmd(struct obd_export *exp, + struct lmv_stripe_md **plsm, + const union lmv_mds_md *lmm, size_t lmm_size) +{ + int rc; + + EXP_CHECK_MD_OP(exp, unpackmd); + EXP_MD_COUNTER_INCREMENT(exp, unpackmd); + rc = MDP(exp->exp_obd, unpackmd)(exp, plsm, lmm, lmm_size); + return rc; +} + /* OBD Metadata Support */ int obd_init_caches(void); diff --git a/drivers/staging/lustre/lustre/include/obd_support.h b/drivers/staging/lustre/lustre/include/obd_support.h index b346a7f10aa4..aaedec7d793c 100644 --- a/drivers/staging/lustre/lustre/include/obd_support.h +++ b/drivers/staging/lustre/lustre/include/obd_support.h @@ -172,14 +172,14 @@ extern char obd_jobid_var[]; #define OBD_FAIL_MDS_ALL_REQUEST_NET 0x123 #define OBD_FAIL_MDS_SYNC_NET 0x124 #define OBD_FAIL_MDS_SYNC_PACK 0x125 -#define OBD_FAIL_MDS_DONE_WRITING_NET 0x126 -#define OBD_FAIL_MDS_DONE_WRITING_PACK 0x127 +/* OBD_FAIL_MDS_DONE_WRITING_NET 0x126 obsolete since 2.8.0 */ +/* OBD_FAIL_MDS_DONE_WRITING_PACK 0x127 obsolete since 2.8.0 */ #define OBD_FAIL_MDS_ALLOC_OBDO 0x128 #define OBD_FAIL_MDS_PAUSE_OPEN 0x129 #define OBD_FAIL_MDS_STATFS_LCW_SLEEP 0x12a #define OBD_FAIL_MDS_OPEN_CREATE 0x12b #define OBD_FAIL_MDS_OST_SETATTR 0x12c -#define OBD_FAIL_MDS_QUOTACHECK_NET 0x12d +/* OBD_FAIL_MDS_QUOTACHECK_NET 0x12d obsolete since 2.4 */ #define OBD_FAIL_MDS_QUOTACTL_NET 0x12e #define OBD_FAIL_MDS_CLIENT_ADD 0x12f #define OBD_FAIL_MDS_GETXATTR_NET 0x130 @@ -264,7 +264,7 @@ extern char obd_jobid_var[]; #define OBD_FAIL_OST_ENOSPC 0x215 #define OBD_FAIL_OST_EROFS 0x216 #define OBD_FAIL_OST_ENOENT 0x217 -#define OBD_FAIL_OST_QUOTACHECK_NET 0x218 +/* OBD_FAIL_OST_QUOTACHECK_NET 0x218 obsolete since 2.4 */ #define OBD_FAIL_OST_QUOTACTL_NET 0x219 #define OBD_FAIL_OST_CHECKSUM_RECEIVE 0x21a #define OBD_FAIL_OST_CHECKSUM_SEND 0x21b @@ -321,6 +321,8 @@ extern char obd_jobid_var[]; #define OBD_FAIL_LDLM_CP_CB_WAIT4 0x322 #define OBD_FAIL_LDLM_CP_CB_WAIT5 0x323 +#define OBD_FAIL_LDLM_GRANT_CHECK 0x32a + /* LOCKLESS IO */ #define OBD_FAIL_LDLM_SET_CONTENTION 0x385 @@ -343,6 +345,7 @@ extern char obd_jobid_var[]; #define OBD_FAIL_OSC_CP_ENQ_RACE 0x410 #define OBD_FAIL_OSC_NO_GRANT 0x411 #define OBD_FAIL_OSC_DELAY_SETTIME 0x412 +#define OBD_FAIL_OSC_DELAY_IO 0x414 #define OBD_FAIL_PTLRPC 0x500 #define OBD_FAIL_PTLRPC_ACK 0x501 @@ -373,7 +376,7 @@ extern char obd_jobid_var[]; #define OBD_FAIL_OBD_PING_NET 0x600 #define OBD_FAIL_OBD_LOG_CANCEL_NET 0x601 #define OBD_FAIL_OBD_LOGD_NET 0x602 -#define OBD_FAIL_OBD_QC_CALLBACK_NET 0x603 +/* OBD_FAIL_OBD_QC_CALLBACK_NET 0x603 obsolete since 2.4 */ #define OBD_FAIL_OBD_DQACQ 0x604 #define OBD_FAIL_OBD_LLOG_SETUP 0x605 #define OBD_FAIL_OBD_LOG_CANCEL_REP 0x606 @@ -458,6 +461,8 @@ extern char obd_jobid_var[]; #define OBD_FAIL_LOV_INIT 0x1403 #define OBD_FAIL_GLIMPSE_DELAY 0x1404 #define OBD_FAIL_LLITE_XATTR_ENOMEM 0x1405 +#define OBD_FAIL_MAKE_LOVEA_HOLE 0x1406 +#define OBD_FAIL_LLITE_LOST_LAYOUT 0x1407 #define OBD_FAIL_GETATTR_DELAY 0x1409 #define OBD_FAIL_FID_INDIR 0x1501 diff --git a/drivers/staging/lustre/lustre/include/seq_range.h b/drivers/staging/lustre/lustre/include/seq_range.h new file mode 100644 index 000000000000..30c4dd66d5c4 --- /dev/null +++ b/drivers/staging/lustre/lustre/include/seq_range.h @@ -0,0 +1,199 @@ +/* + * GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see + * http://www.gnu.org/licenses/gpl-2.0.html + * + * GPL HEADER END + */ +/* + * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. + * Use is subject to license terms. + * + * Copyright (c) 2011, 2014, Intel Corporation. + * + * Copyright 2015 Cray Inc, all rights reserved. + * Author: Ben Evans. + * + * Define lu_seq_range associated functions + */ + +#ifndef _SEQ_RANGE_H_ +#define _SEQ_RANGE_H_ + +#include "lustre/lustre_idl.h" + +/** + * computes the sequence range type \a range + */ + +static inline unsigned int fld_range_type(const struct lu_seq_range *range) +{ + return range->lsr_flags & LU_SEQ_RANGE_MASK; +} + +/** + * Is this sequence range an OST? \a range + */ + +static inline bool fld_range_is_ost(const struct lu_seq_range *range) +{ + return fld_range_type(range) == LU_SEQ_RANGE_OST; +} + +/** + * Is this sequence range an MDT? \a range + */ + +static inline bool fld_range_is_mdt(const struct lu_seq_range *range) +{ + return fld_range_type(range) == LU_SEQ_RANGE_MDT; +} + +/** + * ANY range is only used when the fld client sends a fld query request, + * but it does not know whether the seq is an MDT or OST, so it will send the + * request with ANY type, which means any seq type from the lookup can be + * expected. /a range + */ +static inline unsigned int fld_range_is_any(const struct lu_seq_range *range) +{ + return fld_range_type(range) == LU_SEQ_RANGE_ANY; +} + +/** + * Apply flags to range \a range \a flags + */ + +static inline void fld_range_set_type(struct lu_seq_range *range, + unsigned int flags) +{ + range->lsr_flags |= flags; +} + +/** + * Add MDT to range type \a range + */ + +static inline void fld_range_set_mdt(struct lu_seq_range *range) +{ + fld_range_set_type(range, LU_SEQ_RANGE_MDT); +} + +/** + * Add OST to range type \a range + */ + +static inline void fld_range_set_ost(struct lu_seq_range *range) +{ + fld_range_set_type(range, LU_SEQ_RANGE_OST); +} + +/** + * Add ANY to range type \a range + */ + +static inline void fld_range_set_any(struct lu_seq_range *range) +{ + fld_range_set_type(range, LU_SEQ_RANGE_ANY); +} + +/** + * computes width of given sequence range \a range + */ + +static inline u64 lu_seq_range_space(const struct lu_seq_range *range) +{ + return range->lsr_end - range->lsr_start; +} + +/** + * initialize range to zero \a range + */ + +static inline void lu_seq_range_init(struct lu_seq_range *range) +{ + memset(range, 0, sizeof(*range)); +} + +/** + * check if given seq id \a s is within given range \a range + */ + +static inline bool lu_seq_range_within(const struct lu_seq_range *range, + u64 seq) +{ + return seq >= range->lsr_start && seq < range->lsr_end; +} + +/** + * Is the range sane? Is the end after the beginning? \a range + */ + +static inline bool lu_seq_range_is_sane(const struct lu_seq_range *range) +{ + return range->lsr_end >= range->lsr_start; +} + +/** + * Is the range 0? \a range + */ + +static inline bool lu_seq_range_is_zero(const struct lu_seq_range *range) +{ + return range->lsr_start == 0 && range->lsr_end == 0; +} + +/** + * Is the range out of space? \a range + */ + +static inline bool lu_seq_range_is_exhausted(const struct lu_seq_range *range) +{ + return lu_seq_range_space(range) == 0; +} + +/** + * return 0 if two ranges have the same location, nonzero if they are + * different \a r1 \a r2 + */ + +static inline int lu_seq_range_compare_loc(const struct lu_seq_range *r1, + const struct lu_seq_range *r2) +{ + return r1->lsr_index != r2->lsr_index || + r1->lsr_flags != r2->lsr_flags; +} + +#if !defined(__REQ_LAYOUT_USER__) +/** + * byte swap range structure \a range + */ + +void lustre_swab_lu_seq_range(struct lu_seq_range *range); +#endif +/** + * printf string and argument list for sequence range + */ +#define DRANGE "[%#16.16llx-%#16.16llx]:%x:%s" + +#define PRANGE(range) \ + (range)->lsr_start, \ + (range)->lsr_end, \ + (range)->lsr_index, \ + fld_range_is_mdt(range) ? "mdt" : "ost" + +#endif diff --git a/drivers/staging/lustre/lustre/ldlm/ldlm_extent.c b/drivers/staging/lustre/lustre/ldlm/ldlm_extent.c index ecf472e4813d..32b73ee62639 100644 --- a/drivers/staging/lustre/lustre/ldlm/ldlm_extent.c +++ b/drivers/staging/lustre/lustre/ldlm/ldlm_extent.c @@ -193,6 +193,26 @@ void ldlm_extent_add_lock(struct ldlm_resource *res, * add the locks into grant list, for debug purpose, .. */ ldlm_resource_add_lock(res, &res->lr_granted, lock); + + if (OBD_FAIL_CHECK(OBD_FAIL_LDLM_GRANT_CHECK)) { + struct ldlm_lock *lck; + + list_for_each_entry_reverse(lck, &res->lr_granted, + l_res_link) { + if (lck == lock) + continue; + if (lockmode_compat(lck->l_granted_mode, + lock->l_granted_mode)) + continue; + if (ldlm_extent_overlap(&lck->l_req_extent, + &lock->l_req_extent)) { + CDEBUG(D_ERROR, "granting conflicting lock %p %p\n", + lck, lock); + ldlm_resource_dump(D_ERROR, res); + LBUG(); + } + } + } } /** Remove cancelled lock from resource interval tree. */ @@ -220,8 +240,8 @@ void ldlm_extent_unlink_lock(struct ldlm_lock *lock) } } -void ldlm_extent_policy_wire_to_local(const ldlm_wire_policy_data_t *wpolicy, - ldlm_policy_data_t *lpolicy) +void ldlm_extent_policy_wire_to_local(const union ldlm_wire_policy_data *wpolicy, + union ldlm_policy_data *lpolicy) { memset(lpolicy, 0, sizeof(*lpolicy)); lpolicy->l_extent.start = wpolicy->l_extent.start; @@ -229,8 +249,8 @@ void ldlm_extent_policy_wire_to_local(const ldlm_wire_policy_data_t *wpolicy, lpolicy->l_extent.gid = wpolicy->l_extent.gid; } -void ldlm_extent_policy_local_to_wire(const ldlm_policy_data_t *lpolicy, - ldlm_wire_policy_data_t *wpolicy) +void ldlm_extent_policy_local_to_wire(const union ldlm_policy_data *lpolicy, + union ldlm_wire_policy_data *wpolicy) { memset(wpolicy, 0, sizeof(*wpolicy)); wpolicy->l_extent.start = lpolicy->l_extent.start; diff --git a/drivers/staging/lustre/lustre/ldlm/ldlm_flock.c b/drivers/staging/lustre/lustre/ldlm/ldlm_flock.c index 861f36f039b5..722160784f83 100644 --- a/drivers/staging/lustre/lustre/ldlm/ldlm_flock.c +++ b/drivers/staging/lustre/lustre/ldlm/ldlm_flock.c @@ -612,22 +612,8 @@ granted: } EXPORT_SYMBOL(ldlm_flock_completion_ast); -void ldlm_flock_policy_wire18_to_local(const ldlm_wire_policy_data_t *wpolicy, - ldlm_policy_data_t *lpolicy) -{ - memset(lpolicy, 0, sizeof(*lpolicy)); - lpolicy->l_flock.start = wpolicy->l_flock.lfw_start; - lpolicy->l_flock.end = wpolicy->l_flock.lfw_end; - lpolicy->l_flock.pid = wpolicy->l_flock.lfw_pid; - /* Compat code, old clients had no idea about owner field and - * relied solely on pid for ownership. Introduced in LU-104, 2.1, - * April 2011 - */ - lpolicy->l_flock.owner = wpolicy->l_flock.lfw_pid; -} - -void ldlm_flock_policy_wire21_to_local(const ldlm_wire_policy_data_t *wpolicy, - ldlm_policy_data_t *lpolicy) +void ldlm_flock_policy_wire_to_local(const union ldlm_wire_policy_data *wpolicy, + union ldlm_policy_data *lpolicy) { memset(lpolicy, 0, sizeof(*lpolicy)); lpolicy->l_flock.start = wpolicy->l_flock.lfw_start; @@ -636,8 +622,8 @@ void ldlm_flock_policy_wire21_to_local(const ldlm_wire_policy_data_t *wpolicy, lpolicy->l_flock.owner = wpolicy->l_flock.lfw_owner; } -void ldlm_flock_policy_local_to_wire(const ldlm_policy_data_t *lpolicy, - ldlm_wire_policy_data_t *wpolicy) +void ldlm_flock_policy_local_to_wire(const union ldlm_policy_data *lpolicy, + union ldlm_wire_policy_data *wpolicy) { memset(wpolicy, 0, sizeof(*wpolicy)); wpolicy->l_flock.lfw_start = lpolicy->l_flock.start; diff --git a/drivers/staging/lustre/lustre/ldlm/ldlm_inodebits.c b/drivers/staging/lustre/lustre/ldlm/ldlm_inodebits.c index 79f4e6fa193e..8e1709dc073c 100644 --- a/drivers/staging/lustre/lustre/ldlm/ldlm_inodebits.c +++ b/drivers/staging/lustre/lustre/ldlm/ldlm_inodebits.c @@ -54,15 +54,15 @@ #include "../include/lustre_lib.h" #include "ldlm_internal.h" -void ldlm_ibits_policy_wire_to_local(const ldlm_wire_policy_data_t *wpolicy, - ldlm_policy_data_t *lpolicy) +void ldlm_ibits_policy_wire_to_local(const union ldlm_wire_policy_data *wpolicy, + union ldlm_policy_data *lpolicy) { memset(lpolicy, 0, sizeof(*lpolicy)); lpolicy->l_inodebits.bits = wpolicy->l_inodebits.bits; } -void ldlm_ibits_policy_local_to_wire(const ldlm_policy_data_t *lpolicy, - ldlm_wire_policy_data_t *wpolicy) +void ldlm_ibits_policy_local_to_wire(const union ldlm_policy_data *lpolicy, + union ldlm_wire_policy_data *wpolicy) { memset(wpolicy, 0, sizeof(*wpolicy)); wpolicy->l_inodebits.bits = lpolicy->l_inodebits.bits; diff --git a/drivers/staging/lustre/lustre/ldlm/ldlm_internal.h b/drivers/staging/lustre/lustre/ldlm/ldlm_internal.h index 5e82cfc245b2..5c02501d0560 100644 --- a/drivers/staging/lustre/lustre/ldlm/ldlm_internal.h +++ b/drivers/staging/lustre/lustre/ldlm/ldlm_internal.h @@ -39,13 +39,13 @@ extern struct list_head ldlm_srv_namespace_list; extern struct mutex ldlm_cli_namespace_lock; extern struct list_head ldlm_cli_active_namespace_list; -static inline int ldlm_namespace_nr_read(ldlm_side_t client) +static inline int ldlm_namespace_nr_read(enum ldlm_side client) { return client == LDLM_NAMESPACE_SERVER ? ldlm_srv_namespace_nr : ldlm_cli_namespace_nr; } -static inline void ldlm_namespace_nr_inc(ldlm_side_t client) +static inline void ldlm_namespace_nr_inc(enum ldlm_side client) { if (client == LDLM_NAMESPACE_SERVER) ldlm_srv_namespace_nr++; @@ -53,7 +53,7 @@ static inline void ldlm_namespace_nr_inc(ldlm_side_t client) ldlm_cli_namespace_nr++; } -static inline void ldlm_namespace_nr_dec(ldlm_side_t client) +static inline void ldlm_namespace_nr_dec(enum ldlm_side client) { if (client == LDLM_NAMESPACE_SERVER) ldlm_srv_namespace_nr--; @@ -61,13 +61,13 @@ static inline void ldlm_namespace_nr_dec(ldlm_side_t client) ldlm_cli_namespace_nr--; } -static inline struct list_head *ldlm_namespace_list(ldlm_side_t client) +static inline struct list_head *ldlm_namespace_list(enum ldlm_side client) { return client == LDLM_NAMESPACE_SERVER ? &ldlm_srv_namespace_list : &ldlm_cli_active_namespace_list; } -static inline struct mutex *ldlm_namespace_lock(ldlm_side_t client) +static inline struct mutex *ldlm_namespace_lock(enum ldlm_side client) { return client == LDLM_NAMESPACE_SERVER ? &ldlm_srv_namespace_lock : &ldlm_cli_namespace_lock; @@ -79,22 +79,23 @@ static inline int ldlm_ns_empty(struct ldlm_namespace *ns) return atomic_read(&ns->ns_bref) == 0; } -void ldlm_namespace_move_to_active_locked(struct ldlm_namespace *, ldlm_side_t); +void ldlm_namespace_move_to_active_locked(struct ldlm_namespace *, + enum ldlm_side); void ldlm_namespace_move_to_inactive_locked(struct ldlm_namespace *, - ldlm_side_t); -struct ldlm_namespace *ldlm_namespace_first_locked(ldlm_side_t); + enum ldlm_side); +struct ldlm_namespace *ldlm_namespace_first_locked(enum ldlm_side); /* ldlm_request.c */ /* Cancel lru flag, it indicates we cancel aged locks. */ enum { - LDLM_CANCEL_AGED = 1 << 0, /* Cancel aged locks (non lru resize). */ - LDLM_CANCEL_PASSED = 1 << 1, /* Cancel passed number of locks. */ - LDLM_CANCEL_SHRINK = 1 << 2, /* Cancel locks from shrinker. */ - LDLM_CANCEL_LRUR = 1 << 3, /* Cancel locks from lru resize. */ - LDLM_CANCEL_NO_WAIT = 1 << 4, /* Cancel locks w/o blocking (neither - * sending nor waiting for any rpcs) - */ - LDLM_CANCEL_LRUR_NO_WAIT = 1 << 5, /* LRUR + NO_WAIT */ + LDLM_LRU_FLAG_AGED = BIT(0), /* Cancel aged locks (non lru resize). */ + LDLM_LRU_FLAG_PASSED = BIT(1), /* Cancel passed number of locks. */ + LDLM_LRU_FLAG_SHRINK = BIT(2), /* Cancel locks from shrinker. */ + LDLM_LRU_FLAG_LRUR = BIT(3), /* Cancel locks from lru resize. */ + LDLM_LRU_FLAG_NO_WAIT = BIT(4), /* Cancel locks w/o blocking (neither + * sending nor waiting for any rpcs) + */ + LDLM_LRU_FLAG_LRUR_NO_WAIT = BIT(5), /* LRUR + NO_WAIT */ }; int ldlm_cancel_lru(struct ldlm_namespace *ns, int nr, @@ -137,10 +138,10 @@ ldlm_lock_create(struct ldlm_namespace *ns, const struct ldlm_res_id *, void *data, __u32 lvb_len, enum lvb_type lvb_type); enum ldlm_error ldlm_lock_enqueue(struct ldlm_namespace *, struct ldlm_lock **, void *cookie, __u64 *flags); -void ldlm_lock_addref_internal(struct ldlm_lock *, __u32 mode); -void ldlm_lock_addref_internal_nolock(struct ldlm_lock *, __u32 mode); -void ldlm_lock_decref_internal(struct ldlm_lock *, __u32 mode); -void ldlm_lock_decref_internal_nolock(struct ldlm_lock *, __u32 mode); +void ldlm_lock_addref_internal(struct ldlm_lock *, enum ldlm_mode mode); +void ldlm_lock_addref_internal_nolock(struct ldlm_lock *, enum ldlm_mode mode); +void ldlm_lock_decref_internal(struct ldlm_lock *, enum ldlm_mode mode); +void ldlm_lock_decref_internal_nolock(struct ldlm_lock *, enum ldlm_mode mode); int ldlm_run_ast_work(struct ldlm_namespace *ns, struct list_head *rpc_list, enum ldlm_desc_ast_t ast_type); int ldlm_lock_remove_from_lru_check(struct ldlm_lock *lock, time_t last_use); @@ -311,28 +312,25 @@ static inline int is_granted_or_cancelled(struct ldlm_lock *lock) return ret; } -typedef void (*ldlm_policy_wire_to_local_t)(const ldlm_wire_policy_data_t *, - ldlm_policy_data_t *); - -typedef void (*ldlm_policy_local_to_wire_t)(const ldlm_policy_data_t *, - ldlm_wire_policy_data_t *); - -void ldlm_plain_policy_wire_to_local(const ldlm_wire_policy_data_t *wpolicy, - ldlm_policy_data_t *lpolicy); -void ldlm_plain_policy_local_to_wire(const ldlm_policy_data_t *lpolicy, - ldlm_wire_policy_data_t *wpolicy); -void ldlm_ibits_policy_wire_to_local(const ldlm_wire_policy_data_t *wpolicy, - ldlm_policy_data_t *lpolicy); -void ldlm_ibits_policy_local_to_wire(const ldlm_policy_data_t *lpolicy, - ldlm_wire_policy_data_t *wpolicy); -void ldlm_extent_policy_wire_to_local(const ldlm_wire_policy_data_t *wpolicy, - ldlm_policy_data_t *lpolicy); -void ldlm_extent_policy_local_to_wire(const ldlm_policy_data_t *lpolicy, - ldlm_wire_policy_data_t *wpolicy); -void ldlm_flock_policy_wire18_to_local(const ldlm_wire_policy_data_t *wpolicy, - ldlm_policy_data_t *lpolicy); -void ldlm_flock_policy_wire21_to_local(const ldlm_wire_policy_data_t *wpolicy, - ldlm_policy_data_t *lpolicy); - -void ldlm_flock_policy_local_to_wire(const ldlm_policy_data_t *lpolicy, - ldlm_wire_policy_data_t *wpolicy); +typedef void (*ldlm_policy_wire_to_local_t)(const union ldlm_wire_policy_data *, + union ldlm_policy_data *); + +typedef void (*ldlm_policy_local_to_wire_t)(const union ldlm_policy_data *, + union ldlm_wire_policy_data *); + +void ldlm_plain_policy_wire_to_local(const union ldlm_wire_policy_data *wpolicy, + union ldlm_policy_data *lpolicy); +void ldlm_plain_policy_local_to_wire(const union ldlm_policy_data *lpolicy, + union ldlm_wire_policy_data *wpolicy); +void ldlm_ibits_policy_wire_to_local(const union ldlm_wire_policy_data *wpolicy, + union ldlm_policy_data *lpolicy); +void ldlm_ibits_policy_local_to_wire(const union ldlm_policy_data *lpolicy, + union ldlm_wire_policy_data *wpolicy); +void ldlm_extent_policy_wire_to_local(const union ldlm_wire_policy_data *wpolicy, + union ldlm_policy_data *lpolicy); +void ldlm_extent_policy_local_to_wire(const union ldlm_policy_data *lpolicy, + union ldlm_wire_policy_data *wpolicy); +void ldlm_flock_policy_wire_to_local(const union ldlm_wire_policy_data *wpolicy, + union ldlm_policy_data *lpolicy); +void ldlm_flock_policy_local_to_wire(const union ldlm_policy_data *lpolicy, + union ldlm_wire_policy_data *wpolicy); diff --git a/drivers/staging/lustre/lustre/ldlm/ldlm_lib.c b/drivers/staging/lustre/lustre/ldlm/ldlm_lib.c index 153e990c494e..9be01426c955 100644 --- a/drivers/staging/lustre/lustre/ldlm/ldlm_lib.c +++ b/drivers/staging/lustre/lustre/ldlm/ldlm_lib.c @@ -170,6 +170,9 @@ int client_import_del_conn(struct obd_import *imp, struct obd_uuid *uuid) ptlrpc_connection_put(dlmexp->exp_connection); dlmexp->exp_connection = NULL; } + + if (dlmexp) + class_export_put(dlmexp); } list_del(&imp_conn->oic_item); @@ -372,6 +375,25 @@ int client_obd_setup(struct obd_device *obddev, struct lustre_cfg *lcfg) } else { cli->cl_max_rpcs_in_flight = OBD_MAX_RIF_DEFAULT; } + + spin_lock_init(&cli->cl_mod_rpcs_lock); + spin_lock_init(&cli->cl_mod_rpcs_hist.oh_lock); + cli->cl_max_mod_rpcs_in_flight = 0; + cli->cl_mod_rpcs_in_flight = 0; + cli->cl_close_rpcs_in_flight = 0; + init_waitqueue_head(&cli->cl_mod_rpcs_waitq); + cli->cl_mod_tag_bitmap = NULL; + + if (connect_op == MDS_CONNECT) { + cli->cl_max_mod_rpcs_in_flight = cli->cl_max_rpcs_in_flight - 1; + cli->cl_mod_tag_bitmap = kcalloc(BITS_TO_LONGS(OBD_MAX_RIF_MAX), + sizeof(long), GFP_NOFS); + if (!cli->cl_mod_tag_bitmap) { + rc = -ENOMEM; + goto err; + } + } + rc = ldlm_get_ref(); if (rc) { CERROR("ldlm_get_ref failed: %d\n", rc); @@ -399,9 +421,8 @@ int client_obd_setup(struct obd_device *obddev, struct lustre_cfg *lcfg) } cli->cl_import = imp; - /* cli->cl_max_mds_{easize,cookiesize} updated by mdc_init_ea_size() */ + /* cli->cl_max_mds_easize updated by mdc_init_ea_size() */ cli->cl_max_mds_easize = sizeof(struct lov_mds_md_v3); - cli->cl_max_mds_cookiesize = sizeof(struct llog_cookie); if (LUSTRE_CFG_BUFLEN(lcfg, 3) > 0) { if (!strcmp(lustre_cfg_string(lcfg, 3), "inactive")) { @@ -425,8 +446,6 @@ int client_obd_setup(struct obd_device *obddev, struct lustre_cfg *lcfg) goto err_import; } - cli->cl_qchk_stat = CL_NOT_QUOTACHECKED; - return rc; err_import: @@ -434,12 +453,16 @@ err_import: err_ldlm: ldlm_put_ref(); err: + kfree(cli->cl_mod_tag_bitmap); + cli->cl_mod_tag_bitmap = NULL; return rc; } EXPORT_SYMBOL(client_obd_setup); int client_obd_cleanup(struct obd_device *obddev) { + struct client_obd *cli = &obddev->u.cli; + ldlm_namespace_free_post(obddev->obd_namespace); obddev->obd_namespace = NULL; @@ -447,6 +470,10 @@ int client_obd_cleanup(struct obd_device *obddev) LASSERT(!obddev->u.cli.cl_import); ldlm_put_ref(); + + kfree(cli->cl_mod_tag_bitmap); + cli->cl_mod_tag_bitmap = NULL; + return 0; } EXPORT_SYMBOL(client_obd_cleanup); @@ -461,6 +488,7 @@ int client_connect_import(const struct lu_env *env, struct obd_import *imp = cli->cl_import; struct obd_connect_data *ocd; struct lustre_handle conn = { 0 }; + bool is_mdc = false; int rc; *exp = NULL; @@ -487,6 +515,10 @@ int client_connect_import(const struct lu_env *env, ocd = &imp->imp_connect_data; if (data) { *ocd = *data; + is_mdc = !strncmp(imp->imp_obd->obd_type->typ_name, + LUSTRE_MDC_NAME, 3); + if (is_mdc) + data->ocd_connect_flags |= OBD_CONNECT_MULTIMODRPCS; imp->imp_connect_flags_orig = data->ocd_connect_flags; } @@ -502,6 +534,11 @@ int client_connect_import(const struct lu_env *env, ocd->ocd_connect_flags, "old %#llx, new %#llx\n", data->ocd_connect_flags, ocd->ocd_connect_flags); data->ocd_connect_flags = ocd->ocd_connect_flags; + /* clear the flag as it was not set and is not known + * by upper layers + */ + if (is_mdc) + data->ocd_connect_flags &= ~OBD_CONNECT_MULTIMODRPCS; } ptlrpc_pinger_add_import(imp); diff --git a/drivers/staging/lustre/lustre/ldlm/ldlm_lock.c b/drivers/staging/lustre/lustre/ldlm/ldlm_lock.c index 3c48b4fb96f1..a4a291acb659 100644 --- a/drivers/staging/lustre/lustre/ldlm/ldlm_lock.c +++ b/drivers/staging/lustre/lustre/ldlm/ldlm_lock.c @@ -39,6 +39,7 @@ #include "../../include/linux/libcfs/libcfs.h" #include "../include/lustre_intent.h" +#include "../include/lustre_swab.h" #include "../include/obd_class.h" #include "ldlm_internal.h" @@ -63,17 +64,10 @@ static char *ldlm_typename[] = { [LDLM_IBITS] = "IBT", }; -static ldlm_policy_wire_to_local_t ldlm_policy_wire18_to_local[] = { +static ldlm_policy_wire_to_local_t ldlm_policy_wire_to_local[] = { [LDLM_PLAIN - LDLM_MIN_TYPE] = ldlm_plain_policy_wire_to_local, [LDLM_EXTENT - LDLM_MIN_TYPE] = ldlm_extent_policy_wire_to_local, - [LDLM_FLOCK - LDLM_MIN_TYPE] = ldlm_flock_policy_wire18_to_local, - [LDLM_IBITS - LDLM_MIN_TYPE] = ldlm_ibits_policy_wire_to_local, -}; - -static ldlm_policy_wire_to_local_t ldlm_policy_wire21_to_local[] = { - [LDLM_PLAIN - LDLM_MIN_TYPE] = ldlm_plain_policy_wire_to_local, - [LDLM_EXTENT - LDLM_MIN_TYPE] = ldlm_extent_policy_wire_to_local, - [LDLM_FLOCK - LDLM_MIN_TYPE] = ldlm_flock_policy_wire21_to_local, + [LDLM_FLOCK - LDLM_MIN_TYPE] = ldlm_flock_policy_wire_to_local, [LDLM_IBITS - LDLM_MIN_TYPE] = ldlm_ibits_policy_wire_to_local, }; @@ -88,8 +82,8 @@ static ldlm_policy_local_to_wire_t ldlm_policy_local_to_wire[] = { * Converts lock policy from local format to on the wire lock_desc format */ static void ldlm_convert_policy_to_wire(enum ldlm_type type, - const ldlm_policy_data_t *lpolicy, - ldlm_wire_policy_data_t *wpolicy) + const union ldlm_policy_data *lpolicy, + union ldlm_wire_policy_data *wpolicy) { ldlm_policy_local_to_wire_t convert; @@ -102,23 +96,17 @@ static void ldlm_convert_policy_to_wire(enum ldlm_type type, * Converts lock policy from on the wire lock_desc format to local format */ void ldlm_convert_policy_to_local(struct obd_export *exp, enum ldlm_type type, - const ldlm_wire_policy_data_t *wpolicy, - ldlm_policy_data_t *lpolicy) + const union ldlm_wire_policy_data *wpolicy, + union ldlm_policy_data *lpolicy) { ldlm_policy_wire_to_local_t convert; - int new_client; - /** some badness for 2.0.0 clients, but 2.0.0 isn't supported */ - new_client = (exp_connect_flags(exp) & OBD_CONNECT_FULL20) != 0; - if (new_client) - convert = ldlm_policy_wire21_to_local[type - LDLM_MIN_TYPE]; - else - convert = ldlm_policy_wire18_to_local[type - LDLM_MIN_TYPE]; + convert = ldlm_policy_wire_to_local[type - LDLM_MIN_TYPE]; convert(wpolicy, lpolicy); } -char *ldlm_it2str(int it) +const char *ldlm_it2str(enum ldlm_intent_flags it) { switch (it) { case IT_OPEN: @@ -140,7 +128,7 @@ char *ldlm_it2str(int it) case IT_LAYOUT: return "layout"; default: - CERROR("Unknown intent %d\n", it); + CERROR("Unknown intent 0x%08x\n", it); return "UNKNOWN"; } } @@ -512,7 +500,6 @@ int ldlm_lock_change_resource(struct ldlm_namespace *ns, struct ldlm_lock *lock, return 0; } -EXPORT_SYMBOL(ldlm_lock_change_resource); /** \defgroup ldlm_handles LDLM HANDLES * Ways to get hold of locks without any addresses. @@ -595,7 +582,6 @@ void ldlm_lock2desc(struct ldlm_lock *lock, struct ldlm_lock_desc *desc) &lock->l_policy_data, &desc->l_policy_data); } -EXPORT_SYMBOL(ldlm_lock2desc); /** * Add a lock to list of conflicting locks to send AST to. @@ -658,7 +644,7 @@ static void ldlm_add_ast_work_item(struct ldlm_lock *lock, * r/w reference type is determined by \a mode * Calls ldlm_lock_addref_internal. */ -void ldlm_lock_addref(const struct lustre_handle *lockh, __u32 mode) +void ldlm_lock_addref(const struct lustre_handle *lockh, enum ldlm_mode mode) { struct ldlm_lock *lock; @@ -676,7 +662,8 @@ EXPORT_SYMBOL(ldlm_lock_addref); * Removes lock from LRU if it is there. * Assumes the LDLM lock is already locked. */ -void ldlm_lock_addref_internal_nolock(struct ldlm_lock *lock, __u32 mode) +void ldlm_lock_addref_internal_nolock(struct ldlm_lock *lock, + enum ldlm_mode mode) { ldlm_lock_remove_from_lru(lock); if (mode & (LCK_NL | LCK_CR | LCK_PR)) { @@ -700,7 +687,7 @@ void ldlm_lock_addref_internal_nolock(struct ldlm_lock *lock, __u32 mode) * * \retval -EAGAIN lock is being canceled. */ -int ldlm_lock_addref_try(const struct lustre_handle *lockh, __u32 mode) +int ldlm_lock_addref_try(const struct lustre_handle *lockh, enum ldlm_mode mode) { struct ldlm_lock *lock; int result; @@ -726,7 +713,7 @@ EXPORT_SYMBOL(ldlm_lock_addref_try); * Locks LDLM lock and calls ldlm_lock_addref_internal_nolock to do the work. * Only called for local locks. */ -void ldlm_lock_addref_internal(struct ldlm_lock *lock, __u32 mode) +void ldlm_lock_addref_internal(struct ldlm_lock *lock, enum ldlm_mode mode) { lock_res_and_lock(lock); ldlm_lock_addref_internal_nolock(lock, mode); @@ -740,7 +727,8 @@ void ldlm_lock_addref_internal(struct ldlm_lock *lock, __u32 mode) * Does NOT add lock to LRU if no r/w references left to accommodate flock locks * that cannot be placed in LRU. */ -void ldlm_lock_decref_internal_nolock(struct ldlm_lock *lock, __u32 mode) +void ldlm_lock_decref_internal_nolock(struct ldlm_lock *lock, + enum ldlm_mode mode) { LDLM_DEBUG(lock, "ldlm_lock_decref(%s)", ldlm_lockname[mode]); if (mode & (LCK_NL | LCK_CR | LCK_PR)) { @@ -766,7 +754,7 @@ void ldlm_lock_decref_internal_nolock(struct ldlm_lock *lock, __u32 mode) * on the namespace. * For blocked LDLM locks if r/w count drops to zero, blocking_ast is called. */ -void ldlm_lock_decref_internal(struct ldlm_lock *lock, __u32 mode) +void ldlm_lock_decref_internal(struct ldlm_lock *lock, enum ldlm_mode mode) { struct ldlm_namespace *ns; @@ -786,11 +774,16 @@ void ldlm_lock_decref_internal(struct ldlm_lock *lock, __u32 mode) } if (!lock->l_readers && !lock->l_writers && - ldlm_is_cbpending(lock)) { + (ldlm_is_cbpending(lock) || lock->l_req_mode == LCK_GROUP)) { /* If we received a blocked AST and this was the last reference, * run the callback. + * Group locks are special: + * They must not go in LRU, but they are not called back + * like non-group locks, instead they are manually released. + * They have an l_writers reference which they keep until + * they are manually released, so we remove them when they have + * no more reader or writer references. - LU-6368 */ - LDLM_DEBUG(lock, "final decref done on cbpending lock"); LDLM_LOCK_GET(lock); /* dropped by bl thread */ @@ -832,7 +825,7 @@ void ldlm_lock_decref_internal(struct ldlm_lock *lock, __u32 mode) /** * Decrease reader/writer refcount for LDLM lock with handle \a lockh */ -void ldlm_lock_decref(const struct lustre_handle *lockh, __u32 mode) +void ldlm_lock_decref(const struct lustre_handle *lockh, enum ldlm_mode mode) { struct ldlm_lock *lock = __ldlm_handle2lock(lockh, 0); @@ -846,10 +839,9 @@ EXPORT_SYMBOL(ldlm_lock_decref); * Decrease reader/writer refcount for LDLM lock with handle * \a lockh and mark it for subsequent cancellation once r/w refcount * drops to zero instead of putting into LRU. - * - * Typical usage is for GROUP locks which we cannot allow to be cached. */ -void ldlm_lock_decref_and_cancel(const struct lustre_handle *lockh, __u32 mode) +void ldlm_lock_decref_and_cancel(const struct lustre_handle *lockh, + enum ldlm_mode mode) { struct ldlm_lock *lock = __ldlm_handle2lock(lockh, 0); @@ -1055,88 +1047,173 @@ void ldlm_grant_lock(struct ldlm_lock *lock, struct list_head *work_list) } /** - * Search for a lock with given properties in a queue. + * Describe the overlap between two locks. itree_overlap_cb data. + */ +struct lock_match_data { + struct ldlm_lock *lmd_old; + struct ldlm_lock *lmd_lock; + enum ldlm_mode *lmd_mode; + union ldlm_policy_data *lmd_policy; + __u64 lmd_flags; + int lmd_unref; +}; + +/** + * Check if the given @lock meets the criteria for a match. + * A reference on the lock is taken if matched. * - * \retval a referenced lock or NULL. See the flag descriptions below, in the - * comment above ldlm_lock_match + * \param lock test-against this lock + * \param data parameters */ -static struct ldlm_lock *search_queue(struct list_head *queue, - enum ldlm_mode *mode, - ldlm_policy_data_t *policy, - struct ldlm_lock *old_lock, - __u64 flags, int unref) +static int lock_matches(struct ldlm_lock *lock, struct lock_match_data *data) { - struct ldlm_lock *lock; - struct list_head *tmp; + union ldlm_policy_data *lpol = &lock->l_policy_data; + enum ldlm_mode match; - list_for_each(tmp, queue) { - enum ldlm_mode match; + if (lock == data->lmd_old) + return INTERVAL_ITER_STOP; - lock = list_entry(tmp, struct ldlm_lock, l_res_link); - - if (lock == old_lock) - break; + /* + * Check if this lock can be matched. + * Used by LU-2919(exclusive open) for open lease lock + */ + if (ldlm_is_excl(lock)) + return INTERVAL_ITER_CONT; - /* Check if this lock can be matched. - * Used by LU-2919(exclusive open) for open lease lock - */ - if (ldlm_is_excl(lock)) - continue; + /* + * llite sometimes wants to match locks that will be + * canceled when their users drop, but we allow it to match + * if it passes in CBPENDING and the lock still has users. + * this is generally only going to be used by children + * whose parents already hold a lock so forward progress + * can still happen. + */ + if (ldlm_is_cbpending(lock) && + !(data->lmd_flags & LDLM_FL_CBPENDING)) + return INTERVAL_ITER_CONT; - /* llite sometimes wants to match locks that will be - * canceled when their users drop, but we allow it to match - * if it passes in CBPENDING and the lock still has users. - * this is generally only going to be used by children - * whose parents already hold a lock so forward progress - * can still happen. - */ - if (ldlm_is_cbpending(lock) && !(flags & LDLM_FL_CBPENDING)) - continue; - if (!unref && ldlm_is_cbpending(lock) && - lock->l_readers == 0 && lock->l_writers == 0) - continue; + if (!data->lmd_unref && ldlm_is_cbpending(lock) && + !lock->l_readers && !lock->l_writers) + return INTERVAL_ITER_CONT; - if (!(lock->l_req_mode & *mode)) - continue; - match = lock->l_req_mode; + if (!(lock->l_req_mode & *data->lmd_mode)) + return INTERVAL_ITER_CONT; + match = lock->l_req_mode; - if (lock->l_resource->lr_type == LDLM_EXTENT && - (lock->l_policy_data.l_extent.start > - policy->l_extent.start || - lock->l_policy_data.l_extent.end < policy->l_extent.end)) - continue; + switch (lock->l_resource->lr_type) { + case LDLM_EXTENT: + if (lpol->l_extent.start > data->lmd_policy->l_extent.start || + lpol->l_extent.end < data->lmd_policy->l_extent.end) + return INTERVAL_ITER_CONT; if (unlikely(match == LCK_GROUP) && - lock->l_resource->lr_type == LDLM_EXTENT && - policy->l_extent.gid != LDLM_GID_ANY && - lock->l_policy_data.l_extent.gid != policy->l_extent.gid) - continue; - - /* We match if we have existing lock with same or wider set + data->lmd_policy->l_extent.gid != LDLM_GID_ANY && + lpol->l_extent.gid != data->lmd_policy->l_extent.gid) + return INTERVAL_ITER_CONT; + break; + case LDLM_IBITS: + /* + * We match if we have existing lock with same or wider set * of bits. */ - if (lock->l_resource->lr_type == LDLM_IBITS && - ((lock->l_policy_data.l_inodebits.bits & - policy->l_inodebits.bits) != - policy->l_inodebits.bits)) - continue; + if ((lpol->l_inodebits.bits & + data->lmd_policy->l_inodebits.bits) != + data->lmd_policy->l_inodebits.bits) + return INTERVAL_ITER_CONT; + break; + default: + break; + } + /* + * We match if we have existing lock with same or wider set + * of bits. + */ + if (!data->lmd_unref && LDLM_HAVE_MASK(lock, GONE)) + return INTERVAL_ITER_CONT; + + if ((data->lmd_flags & LDLM_FL_LOCAL_ONLY) && + !ldlm_is_local(lock)) + return INTERVAL_ITER_CONT; + + if (data->lmd_flags & LDLM_FL_TEST_LOCK) { + LDLM_LOCK_GET(lock); + ldlm_lock_touch_in_lru(lock); + } else { + ldlm_lock_addref_internal_nolock(lock, match); + } + + *data->lmd_mode = match; + data->lmd_lock = lock; + + return INTERVAL_ITER_STOP; +} + +static unsigned int itree_overlap_cb(struct interval_node *in, void *args) +{ + struct ldlm_interval *node = to_ldlm_interval(in); + struct lock_match_data *data = args; + struct ldlm_lock *lock; + int rc; + + list_for_each_entry(lock, &node->li_group, l_sl_policy) { + rc = lock_matches(lock, data); + if (rc == INTERVAL_ITER_STOP) + return INTERVAL_ITER_STOP; + } + return INTERVAL_ITER_CONT; +} + +/** + * Search for a lock with given parameters in interval trees. + * + * \param res search for a lock in this resource + * \param data parameters + * + * \retval a referenced lock or NULL. + */ +static struct ldlm_lock *search_itree(struct ldlm_resource *res, + struct lock_match_data *data) +{ + struct interval_node_extent ext = { + .start = data->lmd_policy->l_extent.start, + .end = data->lmd_policy->l_extent.end + }; + int idx; + + for (idx = 0; idx < LCK_MODE_NUM; idx++) { + struct ldlm_interval_tree *tree = &res->lr_itree[idx]; - if (!unref && LDLM_HAVE_MASK(lock, GONE)) + if (!tree->lit_root) continue; - if ((flags & LDLM_FL_LOCAL_ONLY) && !ldlm_is_local(lock)) + if (!(tree->lit_mode & *data->lmd_mode)) continue; - if (flags & LDLM_FL_TEST_LOCK) { - LDLM_LOCK_GET(lock); - ldlm_lock_touch_in_lru(lock); - } else { - ldlm_lock_addref_internal_nolock(lock, match); - } - *mode = match; - return lock; + interval_search(tree->lit_root, &ext, + itree_overlap_cb, data); } + return data->lmd_lock; +} +/** + * Search for a lock with given properties in a queue. + * + * \param queue search for a lock in this queue + * \param data parameters + * + * \retval a referenced lock or NULL. + */ +static struct ldlm_lock *search_queue(struct list_head *queue, + struct lock_match_data *data) +{ + struct ldlm_lock *lock; + int rc; + + list_for_each_entry(lock, queue, l_res_link) { + rc = lock_matches(lock, data); + if (rc == INTERVAL_ITER_STOP) + return data->lmd_lock; + } return NULL; } @@ -1147,7 +1224,6 @@ void ldlm_lock_fail_match_locked(struct ldlm_lock *lock) wake_up_all(&lock->l_waitq); } } -EXPORT_SYMBOL(ldlm_lock_fail_match_locked); /** * Mark lock as "matchable" by OST. @@ -1208,35 +1284,45 @@ EXPORT_SYMBOL(ldlm_lock_allow_match); enum ldlm_mode ldlm_lock_match(struct ldlm_namespace *ns, __u64 flags, const struct ldlm_res_id *res_id, enum ldlm_type type, - ldlm_policy_data_t *policy, + union ldlm_policy_data *policy, enum ldlm_mode mode, struct lustre_handle *lockh, int unref) { + struct lock_match_data data = { + .lmd_old = NULL, + .lmd_lock = NULL, + .lmd_mode = &mode, + .lmd_policy = policy, + .lmd_flags = flags, + .lmd_unref = unref, + }; struct ldlm_resource *res; - struct ldlm_lock *lock, *old_lock = NULL; + struct ldlm_lock *lock; int rc = 0; if (!ns) { - old_lock = ldlm_handle2lock(lockh); - LASSERT(old_lock); + data.lmd_old = ldlm_handle2lock(lockh); + LASSERT(data.lmd_old); - ns = ldlm_lock_to_ns(old_lock); - res_id = &old_lock->l_resource->lr_name; - type = old_lock->l_resource->lr_type; - mode = old_lock->l_req_mode; + ns = ldlm_lock_to_ns(data.lmd_old); + res_id = &data.lmd_old->l_resource->lr_name; + type = data.lmd_old->l_resource->lr_type; + *data.lmd_mode = data.lmd_old->l_req_mode; } res = ldlm_resource_get(ns, NULL, res_id, type, 0); if (IS_ERR(res)) { - LASSERT(!old_lock); + LASSERT(!data.lmd_old); return 0; } LDLM_RESOURCE_ADDREF(res); lock_res(res); - lock = search_queue(&res->lr_granted, &mode, policy, old_lock, - flags, unref); + if (res->lr_type == LDLM_EXTENT) + lock = search_itree(res, &data); + else + lock = search_queue(&res->lr_granted, &data); if (lock) { rc = 1; goto out; @@ -1245,14 +1331,12 @@ enum ldlm_mode ldlm_lock_match(struct ldlm_namespace *ns, __u64 flags, rc = 0; goto out; } - lock = search_queue(&res->lr_waiting, &mode, policy, old_lock, - flags, unref); + lock = search_queue(&res->lr_waiting, &data); if (lock) { rc = 1; goto out; } - - out: +out: unlock_res(res); LDLM_RESOURCE_DELREF(res); ldlm_resource_putref(res); @@ -1324,8 +1408,8 @@ enum ldlm_mode ldlm_lock_match(struct ldlm_namespace *ns, __u64 flags, (type == LDLM_PLAIN || type == LDLM_IBITS) ? res_id->name[3] : policy->l_extent.end); } - if (old_lock) - LDLM_LOCK_PUT(old_lock); + if (data.lmd_old) + LDLM_LOCK_PUT(data.lmd_old); return rc ? mode : 0; } diff --git a/drivers/staging/lustre/lustre/ldlm/ldlm_lockd.c b/drivers/staging/lustre/lustre/ldlm/ldlm_lockd.c index fde697ebaadc..12647af5a336 100644 --- a/drivers/staging/lustre/lustre/ldlm/ldlm_lockd.c +++ b/drivers/staging/lustre/lustre/ldlm/ldlm_lockd.c @@ -511,23 +511,6 @@ static inline void ldlm_callback_errmsg(struct ptlrpc_request *req, CWARN("Send reply failed, maybe cause bug 21636.\n"); } -static int ldlm_handle_qc_callback(struct ptlrpc_request *req) -{ - struct obd_quotactl *oqctl; - struct client_obd *cli = &req->rq_export->exp_obd->u.cli; - - oqctl = req_capsule_client_get(&req->rq_pill, &RMF_OBD_QUOTACTL); - if (!oqctl) { - CERROR("Can't unpack obd_quotactl\n"); - return -EPROTO; - } - - oqctl->qc_stat = ptlrpc_status_ntoh(oqctl->qc_stat); - - cli->cl_qchk_stat = oqctl->qc_stat; - return 0; -} - /* TODO: handle requests in a similar way as MDT: see mdt_handle_common() */ static int ldlm_callback_handler(struct ptlrpc_request *req) { @@ -577,13 +560,6 @@ static int ldlm_callback_handler(struct ptlrpc_request *req) rc = ldlm_handle_setinfo(req); ldlm_callback_reply(req, rc); return 0; - case OBD_QC_CALLBACK: - req_capsule_set(&req->rq_pill, &RQF_QC_CALLBACK); - if (OBD_FAIL_CHECK(OBD_FAIL_OBD_QC_CALLBACK_NET)) - return 0; - rc = ldlm_handle_qc_callback(req); - ldlm_callback_reply(req, rc); - return 0; default: CERROR("unknown opcode %u\n", lustre_msg_get_opc(req->rq_reqmsg)); @@ -858,7 +834,6 @@ int ldlm_get_ref(void) return rc; } -EXPORT_SYMBOL(ldlm_get_ref); void ldlm_put_ref(void) { @@ -875,7 +850,6 @@ void ldlm_put_ref(void) } mutex_unlock(&ldlm_ref_mutex); } -EXPORT_SYMBOL(ldlm_put_ref); static ssize_t cancel_unused_locks_before_replay_show(struct kobject *kobj, struct attribute *attr, diff --git a/drivers/staging/lustre/lustre/ldlm/ldlm_plain.c b/drivers/staging/lustre/lustre/ldlm/ldlm_plain.c index 0aed39c46154..862ea0a1dc97 100644 --- a/drivers/staging/lustre/lustre/ldlm/ldlm_plain.c +++ b/drivers/staging/lustre/lustre/ldlm/ldlm_plain.c @@ -54,14 +54,14 @@ #include "ldlm_internal.h" -void ldlm_plain_policy_wire_to_local(const ldlm_wire_policy_data_t *wpolicy, - ldlm_policy_data_t *lpolicy) +void ldlm_plain_policy_wire_to_local(const union ldlm_wire_policy_data *wpolicy, + union ldlm_policy_data *lpolicy) { /* No policy for plain locks */ } -void ldlm_plain_policy_local_to_wire(const ldlm_policy_data_t *lpolicy, - ldlm_wire_policy_data_t *wpolicy) +void ldlm_plain_policy_local_to_wire(const union ldlm_policy_data *lpolicy, + union ldlm_wire_policy_data *wpolicy) { /* No policy for plain locks */ } diff --git a/drivers/staging/lustre/lustre/ldlm/ldlm_pool.c b/drivers/staging/lustre/lustre/ldlm/ldlm_pool.c index 9a1136e32dfc..8dfb3c8e6b7a 100644 --- a/drivers/staging/lustre/lustre/ldlm/ldlm_pool.c +++ b/drivers/staging/lustre/lustre/ldlm/ldlm_pool.c @@ -293,7 +293,7 @@ static int ldlm_cli_pool_recalc(struct ldlm_pool *pl) * take into account pl->pl_recalc_time here. */ ret = ldlm_cancel_lru(container_of(pl, struct ldlm_namespace, ns_pool), - 0, LCF_ASYNC, LDLM_CANCEL_LRUR); + 0, LCF_ASYNC, LDLM_LRU_FLAG_LRUR); out: spin_lock(&pl->pl_lock); @@ -339,7 +339,7 @@ static int ldlm_cli_pool_shrink(struct ldlm_pool *pl, if (nr == 0) return (unused / 100) * sysctl_vfs_cache_pressure; else - return ldlm_cancel_lru(ns, nr, LCF_ASYNC, LDLM_CANCEL_SHRINK); + return ldlm_cancel_lru(ns, nr, LCF_ASYNC, LDLM_LRU_FLAG_SHRINK); } static const struct ldlm_pool_ops ldlm_cli_pool_ops = { @@ -356,10 +356,10 @@ static int ldlm_pool_recalc(struct ldlm_pool *pl) u32 recalc_interval_sec; int count; - recalc_interval_sec = ktime_get_seconds() - pl->pl_recalc_time; + recalc_interval_sec = ktime_get_real_seconds() - pl->pl_recalc_time; if (recalc_interval_sec > 0) { spin_lock(&pl->pl_lock); - recalc_interval_sec = ktime_get_seconds() - pl->pl_recalc_time; + recalc_interval_sec = ktime_get_real_seconds() - pl->pl_recalc_time; if (recalc_interval_sec > 0) { /* @@ -382,7 +382,7 @@ static int ldlm_pool_recalc(struct ldlm_pool *pl) count); } - recalc_interval_sec = pl->pl_recalc_time - ktime_get_seconds() + + recalc_interval_sec = pl->pl_recalc_time - ktime_get_real_seconds() + pl->pl_recalc_period; if (recalc_interval_sec <= 0) { /* DEBUG: should be re-removed after LU-4536 is fixed */ @@ -651,13 +651,13 @@ static void ldlm_pool_debugfs_fini(struct ldlm_pool *pl) } int ldlm_pool_init(struct ldlm_pool *pl, struct ldlm_namespace *ns, - int idx, ldlm_side_t client) + int idx, enum ldlm_side client) { int rc; spin_lock_init(&pl->pl_lock); atomic_set(&pl->pl_granted, 0); - pl->pl_recalc_time = ktime_get_seconds(); + pl->pl_recalc_time = ktime_get_real_seconds(); atomic_set(&pl->pl_lock_volume_factor, 1); atomic_set(&pl->pl_grant_rate, 0); @@ -684,7 +684,6 @@ int ldlm_pool_init(struct ldlm_pool *pl, struct ldlm_namespace *ns, return rc; } -EXPORT_SYMBOL(ldlm_pool_init); void ldlm_pool_fini(struct ldlm_pool *pl) { @@ -698,7 +697,6 @@ void ldlm_pool_fini(struct ldlm_pool *pl) */ POISON(pl, 0x5a, sizeof(*pl)); } -EXPORT_SYMBOL(ldlm_pool_fini); /** * Add new taken ldlm lock \a lock into pool \a pl accounting. @@ -724,7 +722,6 @@ void ldlm_pool_add(struct ldlm_pool *pl, struct ldlm_lock *lock) * with too long call paths. */ } -EXPORT_SYMBOL(ldlm_pool_add); /** * Remove ldlm lock \a lock from pool \a pl accounting. @@ -743,7 +740,6 @@ void ldlm_pool_del(struct ldlm_pool *pl, struct ldlm_lock *lock) lprocfs_counter_incr(pl->pl_stats, LDLM_POOL_CANCEL_STAT); } -EXPORT_SYMBOL(ldlm_pool_del); /** * Returns current \a pl SLV. @@ -792,13 +788,12 @@ static struct completion ldlm_pools_comp; * count locks from all namespaces (if possible). Returns number of * cached locks. */ -static unsigned long ldlm_pools_count(ldlm_side_t client, gfp_t gfp_mask) +static unsigned long ldlm_pools_count(enum ldlm_side client, gfp_t gfp_mask) { unsigned long total = 0; int nr_ns; struct ldlm_namespace *ns; struct ldlm_namespace *ns_old = NULL; /* loop detection */ - void *cookie; if (client == LDLM_NAMESPACE_CLIENT && !(gfp_mask & __GFP_FS)) return 0; @@ -806,8 +801,6 @@ static unsigned long ldlm_pools_count(ldlm_side_t client, gfp_t gfp_mask) CDEBUG(D_DLMTRACE, "Request to count %s locks from all pools\n", client == LDLM_NAMESPACE_CLIENT ? "client" : "server"); - cookie = cl_env_reenter(); - /* * Find out how many resources we may release. */ @@ -816,7 +809,6 @@ static unsigned long ldlm_pools_count(ldlm_side_t client, gfp_t gfp_mask) mutex_lock(ldlm_namespace_lock(client)); if (list_empty(ldlm_namespace_list(client))) { mutex_unlock(ldlm_namespace_lock(client)); - cl_env_reexit(cookie); return 0; } ns = ldlm_namespace_first_locked(client); @@ -842,22 +834,19 @@ static unsigned long ldlm_pools_count(ldlm_side_t client, gfp_t gfp_mask) ldlm_namespace_put(ns); } - cl_env_reexit(cookie); return total; } -static unsigned long ldlm_pools_scan(ldlm_side_t client, int nr, gfp_t gfp_mask) +static unsigned long ldlm_pools_scan(enum ldlm_side client, int nr, + gfp_t gfp_mask) { unsigned long freed = 0; int tmp, nr_ns; struct ldlm_namespace *ns; - void *cookie; if (client == LDLM_NAMESPACE_CLIENT && !(gfp_mask & __GFP_FS)) return -1; - cookie = cl_env_reenter(); - /* * Shrink at least ldlm_namespace_nr_read(client) namespaces. */ @@ -887,7 +876,6 @@ static unsigned long ldlm_pools_scan(ldlm_side_t client, int nr, gfp_t gfp_mask) freed += ldlm_pool_shrink(&ns->ns_pool, cancel, gfp_mask); ldlm_namespace_put(ns); } - cl_env_reexit(cookie); /* * we only decrease the SLV in server pools shrinker, return * SHRINK_STOP to kernel to avoid needless loop. LU-1128 @@ -908,7 +896,7 @@ static unsigned long ldlm_pools_cli_scan(struct shrinker *s, sc->gfp_mask); } -static int ldlm_pools_recalc(ldlm_side_t client) +static int ldlm_pools_recalc(enum ldlm_side client) { struct ldlm_namespace *ns; struct ldlm_namespace *ns_old = NULL; @@ -1095,7 +1083,6 @@ int ldlm_pools_init(void) return rc; } -EXPORT_SYMBOL(ldlm_pools_init); void ldlm_pools_fini(void) { @@ -1104,4 +1091,3 @@ void ldlm_pools_fini(void) ldlm_pools_thread_stop(); } -EXPORT_SYMBOL(ldlm_pools_fini); diff --git a/drivers/staging/lustre/lustre/ldlm/ldlm_request.c b/drivers/staging/lustre/lustre/ldlm/ldlm_request.c index 35ba6f14d95f..c1f8693f94a5 100644 --- a/drivers/staging/lustre/lustre/ldlm/ldlm_request.c +++ b/drivers/staging/lustre/lustre/ldlm/ldlm_request.c @@ -93,11 +93,7 @@ static int ldlm_expired_completion_wait(void *data) if (!lock->l_conn_export) { static unsigned long next_dump, last_dump; - LCONSOLE_WARN("lock timed out (enqueued at %lld, %llds ago)\n", - (s64)lock->l_last_activity, - (s64)(ktime_get_real_seconds() - - lock->l_last_activity)); - LDLM_DEBUG(lock, "lock timed out (enqueued at %lld, %llds ago); not entering recovery in server code, just going back to sleep", + LDLM_ERROR(lock, "lock timed out (enqueued at %lld, %llds ago); not entering recovery in server code, just going back to sleep", (s64)lock->l_last_activity, (s64)(ktime_get_real_seconds() - lock->l_last_activity)); @@ -475,12 +471,7 @@ int ldlm_cli_enqueue_fini(struct obd_export *exp, struct ptlrpc_request *req, "client-side enqueue, new policy data"); } - if ((*flags) & LDLM_FL_AST_SENT || - /* Cancel extent locks as soon as possible on a liblustre client, - * because it cannot handle asynchronous ASTs robustly (see - * bug 7311). - */ - (LIBLUSTRE_CLIENT && type == LDLM_EXTENT)) { + if ((*flags) & LDLM_FL_AST_SENT) { lock_res_and_lock(lock); lock->l_flags |= LDLM_FL_CBPENDING | LDLM_FL_BL_AST; unlock_res_and_lock(lock); @@ -602,7 +593,7 @@ int ldlm_prep_elc_req(struct obd_export *exp, struct ptlrpc_request *req, avail = ldlm_capsule_handles_avail(pill, RCL_CLIENT, canceloff); flags = ns_connect_lru_resize(ns) ? - LDLM_CANCEL_LRUR_NO_WAIT : LDLM_CANCEL_AGED; + LDLM_LRU_FLAG_LRUR_NO_WAIT : LDLM_LRU_FLAG_AGED; to_free = !ns_connect_lru_resize(ns) && opc == LDLM_ENQUEUE ? 1 : 0; @@ -657,6 +648,27 @@ int ldlm_prep_enqueue_req(struct obd_export *exp, struct ptlrpc_request *req, } EXPORT_SYMBOL(ldlm_prep_enqueue_req); +static struct ptlrpc_request *ldlm_enqueue_pack(struct obd_export *exp, + int lvb_len) +{ + struct ptlrpc_request *req; + int rc; + + req = ptlrpc_request_alloc(class_exp2cliimp(exp), &RQF_LDLM_ENQUEUE); + if (!req) + return ERR_PTR(-ENOMEM); + + rc = ldlm_prep_enqueue_req(exp, req, NULL, 0); + if (rc) { + ptlrpc_request_free(req); + return ERR_PTR(rc); + } + + req_capsule_set_size(&req->rq_pill, &RMF_DLM_LVB, RCL_SERVER, lvb_len); + ptlrpc_request_set_replen(req); + return req; +} + /** * Client-side lock enqueue. * @@ -670,7 +682,7 @@ EXPORT_SYMBOL(ldlm_prep_enqueue_req); int ldlm_cli_enqueue(struct obd_export *exp, struct ptlrpc_request **reqp, struct ldlm_enqueue_info *einfo, const struct ldlm_res_id *res_id, - ldlm_policy_data_t const *policy, __u64 *flags, + union ldlm_policy_data const *policy, __u64 *flags, void *lvb, __u32 lvb_len, enum lvb_type lvb_type, struct lustre_handle *lockh, int async) { @@ -727,17 +739,14 @@ int ldlm_cli_enqueue(struct obd_export *exp, struct ptlrpc_request **reqp, lock->l_last_activity = ktime_get_real_seconds(); /* lock not sent to server yet */ - if (!reqp || !*reqp) { - req = ptlrpc_request_alloc_pack(class_exp2cliimp(exp), - &RQF_LDLM_ENQUEUE, - LUSTRE_DLM_VERSION, - LDLM_ENQUEUE); - if (!req) { + req = ldlm_enqueue_pack(exp, lvb_len); + if (IS_ERR(req)) { failed_lock_cleanup(ns, lock, einfo->ei_mode); LDLM_LOCK_RELEASE(lock); - return -ENOMEM; + return PTR_ERR(req); } + req_passed_in = 0; if (reqp) *reqp = req; @@ -757,24 +766,6 @@ int ldlm_cli_enqueue(struct obd_export *exp, struct ptlrpc_request **reqp, body->lock_flags = ldlm_flags_to_wire(*flags); body->lock_handle[0] = *lockh; - /* Continue as normal. */ - if (!req_passed_in) { - if (lvb_len > 0) - req_capsule_extend(&req->rq_pill, - &RQF_LDLM_ENQUEUE_LVB); - req_capsule_set_size(&req->rq_pill, &RMF_DLM_LVB, RCL_SERVER, - lvb_len); - ptlrpc_request_set_replen(req); - } - - /* - * Liblustre client doesn't get extent locks, except for O_APPEND case - * where [0, OBD_OBJECT_EOF] lock is taken, or truncate, where - * [i_size, OBD_OBJECT_EOF] lock is taken. - */ - LASSERT(ergo(LIBLUSTRE_CLIENT, einfo->ei_type != LDLM_EXTENT || - policy->l_extent.end == OBD_OBJECT_EOF)); - if (async) { LASSERT(reqp); return 0; @@ -1022,7 +1013,6 @@ int ldlm_cli_update_pool(struct ptlrpc_request *req) return 0; } -EXPORT_SYMBOL(ldlm_cli_update_pool); /** * Client side lock cancel. @@ -1067,7 +1057,7 @@ int ldlm_cli_cancel(const struct lustre_handle *lockh, ns = ldlm_lock_to_ns(lock); flags = ns_connect_lru_resize(ns) ? - LDLM_CANCEL_LRUR : LDLM_CANCEL_AGED; + LDLM_LRU_FLAG_LRUR : LDLM_LRU_FLAG_AGED; count += ldlm_cancel_lru_local(ns, &cancels, 0, avail - 1, LCF_BL_AST, flags); } @@ -1125,7 +1115,6 @@ int ldlm_cli_cancel_list_local(struct list_head *cancels, int count, return count; } -EXPORT_SYMBOL(ldlm_cli_cancel_list_local); /** * Cancel as many locks as possible w/o sending any RPCs (e.g. to write back @@ -1184,6 +1173,14 @@ static enum ldlm_policy_res ldlm_cancel_lrur_policy(struct ldlm_namespace *ns, if (count && added >= count) return LDLM_POLICY_KEEP_LOCK; + /* + * Despite of the LV, It doesn't make sense to keep the lock which + * is unused for ns_max_age time. + */ + if (cfs_time_after(cfs_time_current(), + cfs_time_add(lock->l_last_used, ns->ns_max_age))) + return LDLM_POLICY_CANCEL_LOCK; + slv = ldlm_pool_get_slv(pl); lvf = ldlm_pool_get_lvf(pl); la = cfs_duration_sec(cfs_time_sub(cur, lock->l_last_used)); @@ -1287,21 +1284,21 @@ typedef enum ldlm_policy_res (*ldlm_cancel_lru_policy_t)( static ldlm_cancel_lru_policy_t ldlm_cancel_lru_policy(struct ldlm_namespace *ns, int flags) { - if (flags & LDLM_CANCEL_NO_WAIT) + if (flags & LDLM_LRU_FLAG_NO_WAIT) return ldlm_cancel_no_wait_policy; if (ns_connect_lru_resize(ns)) { - if (flags & LDLM_CANCEL_SHRINK) + if (flags & LDLM_LRU_FLAG_SHRINK) /* We kill passed number of old locks. */ return ldlm_cancel_passed_policy; - else if (flags & LDLM_CANCEL_LRUR) + else if (flags & LDLM_LRU_FLAG_LRUR) return ldlm_cancel_lrur_policy; - else if (flags & LDLM_CANCEL_PASSED) + else if (flags & LDLM_LRU_FLAG_PASSED) return ldlm_cancel_passed_policy; - else if (flags & LDLM_CANCEL_LRUR_NO_WAIT) + else if (flags & LDLM_LRU_FLAG_LRUR_NO_WAIT) return ldlm_cancel_lrur_no_wait_policy; } else { - if (flags & LDLM_CANCEL_AGED) + if (flags & LDLM_LRU_FLAG_AGED) return ldlm_cancel_aged_policy; } @@ -1325,21 +1322,21 @@ ldlm_cancel_lru_policy(struct ldlm_namespace *ns, int flags) * * Calling policies for enabled LRU resize: * ---------------------------------------- - * flags & LDLM_CANCEL_LRUR - use LRU resize policy (SLV from server) to - * cancel not more than \a count locks; + * flags & LDLM_LRU_FLAG_LRUR - use LRU resize policy (SLV from server) to + * cancel not more than \a count locks; * - * flags & LDLM_CANCEL_PASSED - cancel \a count number of old locks (located at - * the beginning of LRU list); + * flags & LDLM_LRU_FLAG_PASSED - cancel \a count number of old locks (located at + * the beginning of LRU list); * - * flags & LDLM_CANCEL_SHRINK - cancel not more than \a count locks according to - * memory pressure policy function; + * flags & LDLM_LRU_FLAG_SHRINK - cancel not more than \a count locks according to + * memory pressure policy function; * - * flags & LDLM_CANCEL_AGED - cancel \a count locks according to "aged policy". + * flags & LDLM_LRU_FLAG_AGED - cancel \a count locks according to "aged policy". * - * flags & LDLM_CANCEL_NO_WAIT - cancel as many unused locks as possible - * (typically before replaying locks) w/o - * sending any RPCs or waiting for any - * outstanding RPC to complete. + * flags & LDLM_LRU_FLAG_NO_WAIT - cancel as many unused locks as possible + * (typically before replaying locks) w/o + * sending any RPCs or waiting for any + * outstanding RPC to complete. */ static int ldlm_prepare_lru_list(struct ldlm_namespace *ns, struct list_head *cancels, int count, int max, @@ -1348,7 +1345,7 @@ static int ldlm_prepare_lru_list(struct ldlm_namespace *ns, ldlm_cancel_lru_policy_t pf; struct ldlm_lock *lock, *next; int added = 0, unused, remained; - int no_wait = flags & (LDLM_CANCEL_NO_WAIT | LDLM_CANCEL_LRUR_NO_WAIT); + int no_wait = flags & (LDLM_LRU_FLAG_NO_WAIT | LDLM_LRU_FLAG_LRUR_NO_WAIT); spin_lock(&ns->ns_lock); unused = ns->ns_nr_unused; @@ -1531,7 +1528,7 @@ int ldlm_cancel_lru(struct ldlm_namespace *ns, int nr, */ int ldlm_cancel_resource_local(struct ldlm_resource *res, struct list_head *cancels, - ldlm_policy_data_t *policy, + union ldlm_policy_data *policy, enum ldlm_mode mode, __u64 lock_flags, enum ldlm_cancel_flags cancel_flags, void *opaque) @@ -1648,7 +1645,7 @@ EXPORT_SYMBOL(ldlm_cli_cancel_list); */ int ldlm_cli_cancel_unused_resource(struct ldlm_namespace *ns, const struct ldlm_res_id *res_id, - ldlm_policy_data_t *policy, + union ldlm_policy_data *policy, enum ldlm_mode mode, enum ldlm_cancel_flags flags, void *opaque) @@ -1723,7 +1720,7 @@ int ldlm_cli_cancel_unused(struct ldlm_namespace *ns, opaque); } else { cfs_hash_for_each_nolock(ns->ns_rs_hash, - ldlm_cli_hash_cancel_unused, &arg); + ldlm_cli_hash_cancel_unused, &arg, 0); return ELDLM_OK; } } @@ -1796,7 +1793,7 @@ static void ldlm_namespace_foreach(struct ldlm_namespace *ns, }; cfs_hash_for_each_nolock(ns->ns_rs_hash, - ldlm_res_iter_helper, &helper); + ldlm_res_iter_helper, &helper, 0); } /* non-blocking function to manipulate a lock whose cb_data is being put away. @@ -1840,7 +1837,7 @@ static int ldlm_chain_lock_for_replay(struct ldlm_lock *lock, void *closure) * bug 17614: locks being actively cancelled. Get a reference * on a lock so that it does not disappear under us (e.g. due to cancel) */ - if (!(lock->l_flags & (LDLM_FL_FAILED | LDLM_FL_CANCELING))) { + if (!(lock->l_flags & (LDLM_FL_FAILED | LDLM_FL_BL_DONE))) { list_add(&lock->l_pending_chain, list); LDLM_LOCK_GET(lock); } @@ -1909,7 +1906,7 @@ static int replay_one_lock(struct obd_import *imp, struct ldlm_lock *lock) int flags; /* Bug 11974: Do not replay a lock which is actively being canceled */ - if (ldlm_is_canceling(lock)) { + if (ldlm_is_bl_done(lock)) { LDLM_DEBUG(lock, "Not replaying canceled lock:"); return 0; } @@ -2003,11 +2000,11 @@ static void ldlm_cancel_unused_locks_for_replay(struct ldlm_namespace *ns) ldlm_ns_name(ns), ns->ns_nr_unused); /* We don't need to care whether or not LRU resize is enabled - * because the LDLM_CANCEL_NO_WAIT policy doesn't use the + * because the LDLM_LRU_FLAG_NO_WAIT policy doesn't use the * count parameter */ canceled = ldlm_cancel_lru_local(ns, &cancels, ns->ns_nr_unused, 0, - LCF_LOCAL, LDLM_CANCEL_NO_WAIT); + LCF_LOCAL, LDLM_LRU_FLAG_NO_WAIT); CDEBUG(D_DLMTRACE, "Canceled %d unused locks from namespace %s\n", canceled, ldlm_ns_name(ns)); @@ -2048,4 +2045,3 @@ int ldlm_replay_locks(struct obd_import *imp) return rc; } -EXPORT_SYMBOL(ldlm_replay_locks); diff --git a/drivers/staging/lustre/lustre/ldlm/ldlm_resource.c b/drivers/staging/lustre/lustre/ldlm/ldlm_resource.c index a09c25aea698..b22f5bae7201 100644 --- a/drivers/staging/lustre/lustre/ldlm/ldlm_resource.c +++ b/drivers/staging/lustre/lustre/ldlm/ldlm_resource.c @@ -226,7 +226,7 @@ static ssize_t lru_size_store(struct kobject *kobj, struct attribute *attr, /* Try to cancel all @ns_nr_unused locks. */ canceled = ldlm_cancel_lru(ns, unused, 0, - LDLM_CANCEL_PASSED); + LDLM_LRU_FLAG_PASSED); if (canceled < unused) { CDEBUG(D_DLMTRACE, "not all requested locks are canceled, requested: %d, canceled: %d\n", @@ -237,7 +237,7 @@ static ssize_t lru_size_store(struct kobject *kobj, struct attribute *attr, } else { tmp = ns->ns_max_unused; ns->ns_max_unused = 0; - ldlm_cancel_lru(ns, 0, 0, LDLM_CANCEL_PASSED); + ldlm_cancel_lru(ns, 0, 0, LDLM_LRU_FLAG_PASSED); ns->ns_max_unused = tmp; } return count; @@ -262,7 +262,7 @@ static ssize_t lru_size_store(struct kobject *kobj, struct attribute *attr, "changing namespace %s unused locks from %u to %u\n", ldlm_ns_name(ns), ns->ns_nr_unused, (unsigned int)tmp); - ldlm_cancel_lru(ns, tmp, LCF_ASYNC, LDLM_CANCEL_PASSED); + ldlm_cancel_lru(ns, tmp, LCF_ASYNC, LDLM_LRU_FLAG_PASSED); if (!lru_resize) { CDEBUG(D_DLMTRACE, @@ -276,7 +276,7 @@ static ssize_t lru_size_store(struct kobject *kobj, struct attribute *attr, ldlm_ns_name(ns), ns->ns_max_unused, (unsigned int)tmp); ns->ns_max_unused = (unsigned int)tmp; - ldlm_cancel_lru(ns, 0, LCF_ASYNC, LDLM_CANCEL_PASSED); + ldlm_cancel_lru(ns, 0, LCF_ASYNC, LDLM_LRU_FLAG_PASSED); /* Make sure that LRU resize was originally supported before * turning it on here. @@ -445,8 +445,8 @@ static struct ldlm_resource *ldlm_resource_getref(struct ldlm_resource *res) return res; } -static unsigned ldlm_res_hop_hash(struct cfs_hash *hs, - const void *key, unsigned mask) +static unsigned int ldlm_res_hop_hash(struct cfs_hash *hs, + const void *key, unsigned int mask) { const struct ldlm_res_id *id = key; unsigned int val = 0; @@ -457,8 +457,8 @@ static unsigned ldlm_res_hop_hash(struct cfs_hash *hs, return val & mask; } -static unsigned ldlm_res_hop_fid_hash(struct cfs_hash *hs, - const void *key, unsigned mask) +static unsigned int ldlm_res_hop_fid_hash(struct cfs_hash *hs, + const void *key, unsigned int mask) { const struct ldlm_res_id *id = key; struct lu_fid fid; @@ -612,7 +612,7 @@ static struct ldlm_ns_hash_def ldlm_ns_hash_defs[] = { /** Register \a ns in the list of namespaces */ static void ldlm_namespace_register(struct ldlm_namespace *ns, - ldlm_side_t client) + enum ldlm_side client) { mutex_lock(ldlm_namespace_lock(client)); LASSERT(list_empty(&ns->ns_list_chain)); @@ -625,7 +625,7 @@ static void ldlm_namespace_register(struct ldlm_namespace *ns, * Create and initialize new empty namespace. */ struct ldlm_namespace *ldlm_namespace_new(struct obd_device *obd, char *name, - ldlm_side_t client, + enum ldlm_side client, enum ldlm_appetite apt, enum ldlm_ns_type ns_type) { @@ -855,8 +855,10 @@ int ldlm_namespace_cleanup(struct ldlm_namespace *ns, __u64 flags) return ELDLM_OK; } - cfs_hash_for_each_nolock(ns->ns_rs_hash, ldlm_resource_clean, &flags); - cfs_hash_for_each_nolock(ns->ns_rs_hash, ldlm_resource_complain, NULL); + cfs_hash_for_each_nolock(ns->ns_rs_hash, ldlm_resource_clean, + &flags, 0); + cfs_hash_for_each_nolock(ns->ns_rs_hash, ldlm_resource_complain, + NULL, 0); return ELDLM_OK; } EXPORT_SYMBOL(ldlm_namespace_cleanup); @@ -952,7 +954,7 @@ void ldlm_namespace_free_prior(struct ldlm_namespace *ns, /** Unregister \a ns from the list of namespaces. */ static void ldlm_namespace_unregister(struct ldlm_namespace *ns, - ldlm_side_t client) + enum ldlm_side client) { mutex_lock(ldlm_namespace_lock(client)); LASSERT(!list_empty(&ns->ns_list_chain)); @@ -999,7 +1001,6 @@ void ldlm_namespace_get(struct ldlm_namespace *ns) { atomic_inc(&ns->ns_bref); } -EXPORT_SYMBOL(ldlm_namespace_get); /* This is only for callers that care about refcount */ static int ldlm_namespace_get_return(struct ldlm_namespace *ns) @@ -1014,11 +1015,10 @@ void ldlm_namespace_put(struct ldlm_namespace *ns) spin_unlock(&ns->ns_lock); } } -EXPORT_SYMBOL(ldlm_namespace_put); /** Should be called with ldlm_namespace_lock(client) taken. */ void ldlm_namespace_move_to_active_locked(struct ldlm_namespace *ns, - ldlm_side_t client) + enum ldlm_side client) { LASSERT(!list_empty(&ns->ns_list_chain)); LASSERT(mutex_is_locked(ldlm_namespace_lock(client))); @@ -1027,7 +1027,7 @@ void ldlm_namespace_move_to_active_locked(struct ldlm_namespace *ns, /** Should be called with ldlm_namespace_lock(client) taken. */ void ldlm_namespace_move_to_inactive_locked(struct ldlm_namespace *ns, - ldlm_side_t client) + enum ldlm_side client) { LASSERT(!list_empty(&ns->ns_list_chain)); LASSERT(mutex_is_locked(ldlm_namespace_lock(client))); @@ -1035,7 +1035,7 @@ void ldlm_namespace_move_to_inactive_locked(struct ldlm_namespace *ns, } /** Should be called with ldlm_namespace_lock(client) taken. */ -struct ldlm_namespace *ldlm_namespace_first_locked(ldlm_side_t client) +struct ldlm_namespace *ldlm_namespace_first_locked(enum ldlm_side client) { LASSERT(mutex_is_locked(ldlm_namespace_lock(client))); LASSERT(!list_empty(ldlm_namespace_list(client))); @@ -1305,7 +1305,7 @@ void ldlm_res2desc(struct ldlm_resource *res, struct ldlm_resource_desc *desc) * Print information about all locks in all namespaces on this node to debug * log. */ -void ldlm_dump_all_namespaces(ldlm_side_t client, int level) +void ldlm_dump_all_namespaces(enum ldlm_side client, int level) { struct list_head *tmp; @@ -1323,7 +1323,6 @@ void ldlm_dump_all_namespaces(ldlm_side_t client, int level) mutex_unlock(ldlm_namespace_lock(client)); } -EXPORT_SYMBOL(ldlm_dump_all_namespaces); static int ldlm_res_hash_dump(struct cfs_hash *hs, struct cfs_hash_bd *bd, struct hlist_node *hnode, void *arg) @@ -1355,12 +1354,11 @@ void ldlm_namespace_dump(int level, struct ldlm_namespace *ns) cfs_hash_for_each_nolock(ns->ns_rs_hash, ldlm_res_hash_dump, - (void *)(unsigned long)level); + (void *)(unsigned long)level, 0); spin_lock(&ns->ns_lock); ns->ns_next_dump = cfs_time_shift(10); spin_unlock(&ns->ns_lock); } -EXPORT_SYMBOL(ldlm_namespace_dump); /** * Print information about all locks in this resource to debug log. diff --git a/drivers/staging/lustre/lustre/llite/Makefile b/drivers/staging/lustre/lustre/llite/Makefile index 1ac0940bd8df..322d4fa63f5d 100644 --- a/drivers/staging/lustre/lustre/llite/Makefile +++ b/drivers/staging/lustre/lustre/llite/Makefile @@ -1,7 +1,7 @@ obj-$(CONFIG_LUSTRE_FS) += lustre.o -lustre-y := dcache.o dir.o file.o llite_close.o llite_lib.o llite_nfs.o \ - rw.o namei.o symlink.o llite_mmap.o range_lock.o \ - xattr.o xattr_cache.o rw26.o super25.o statahead.o \ - glimpse.o lcommon_cl.o lcommon_misc.o \ - vvp_dev.o vvp_page.o vvp_lock.o vvp_io.o vvp_object.o vvp_req.o \ +lustre-y := dcache.o dir.o file.o llite_lib.o llite_nfs.o \ + rw.o rw26.o namei.o symlink.o llite_mmap.o range_lock.o \ + xattr.o xattr_cache.o xattr_security.o \ + super25.o statahead.o glimpse.o lcommon_cl.o lcommon_misc.o \ + vvp_dev.o vvp_page.o vvp_lock.o vvp_io.o vvp_object.o \ lproc_llite.o diff --git a/drivers/staging/lustre/lustre/llite/dir.c b/drivers/staging/lustre/lustre/llite/dir.c index 7f32a539d260..ea5d247a3f70 100644 --- a/drivers/staging/lustre/lustre/llite/dir.c +++ b/drivers/staging/lustre/lustre/llite/dir.c @@ -51,6 +51,8 @@ #include "../include/lustre_dlm.h" #include "../include/lustre_fid.h" #include "../include/lustre_kernelcomm.h" +#include "../include/lustre_swab.h" + #include "llite_internal.h" /* @@ -410,6 +412,8 @@ static int ll_dir_setdirstripe(struct inode *parent, struct lmv_user_md *lump, struct ptlrpc_request *request = NULL; struct md_op_data *op_data; struct ll_sb_info *sbi = ll_i2sbi(parent); + struct inode *inode = NULL; + struct dentry dentry; int err; if (unlikely(lump->lum_magic != LMV_USER_MAGIC)) @@ -419,6 +423,10 @@ static int ll_dir_setdirstripe(struct inode *parent, struct lmv_user_md *lump, PFID(ll_inode2fid(parent)), parent, dirname, (int)lump->lum_stripe_offset, lump->lum_stripe_count); + if (lump->lum_stripe_count > 1 && + !(exp_connect_flags(sbi->ll_md_exp) & OBD_CONNECT_DIR_STRIPE)) + return -EINVAL; + if (lump->lum_magic != cpu_to_le32(LMV_USER_MAGIC)) lustre_swab_lmv_user_md(lump); @@ -439,8 +447,17 @@ static int ll_dir_setdirstripe(struct inode *parent, struct lmv_user_md *lump, from_kgid(&init_user_ns, current_fsgid()), cfs_curproc_cap_pack(), 0, &request); ll_finish_md_op_data(op_data); + + err = ll_prep_inode(&inode, request, parent->i_sb, NULL); if (err) goto err_exit; + + memset(&dentry, 0, sizeof(dentry)); + dentry.d_inode = inode; + + err = ll_init_security(&dentry, inode, parent); + iput(inode); + err_exit: ptlrpc_req_finished(request); return err; @@ -501,8 +518,7 @@ int ll_dir_setstripe(struct inode *inode, struct lov_user_md *lump, return PTR_ERR(op_data); /* swabbing is done in lov_setstripe() on server side */ - rc = md_setattr(sbi->ll_md_exp, op_data, lump, lum_size, - NULL, 0, &req, NULL); + rc = md_setattr(sbi->ll_md_exp, op_data, lump, lum_size, &req); ll_finish_md_op_data(op_data); ptlrpc_req_finished(req); if (rc) { @@ -682,7 +698,7 @@ static int ll_ioc_copy_start(struct super_block *sb, struct hsm_copy *copy) { struct ll_sb_info *sbi = ll_s2sbi(sb); struct hsm_progress_kernel hpk; - int rc; + int rc2, rc = 0; /* Forge a hsm_progress based on data from copy. */ hpk.hpk_fid = copy->hc_hai.hai_fid; @@ -732,10 +748,10 @@ progress: /* On error, the request should be considered as completed */ if (hpk.hpk_errval > 0) hpk.hpk_flags |= HP_FLAG_COMPLETED; - rc = obd_iocontrol(LL_IOC_HSM_PROGRESS, sbi->ll_md_exp, sizeof(hpk), - &hpk, NULL); + rc2 = obd_iocontrol(LL_IOC_HSM_PROGRESS, sbi->ll_md_exp, sizeof(hpk), + &hpk, NULL); - return rc; + return rc ? rc : rc2; } /** @@ -757,7 +773,7 @@ static int ll_ioc_copy_end(struct super_block *sb, struct hsm_copy *copy) { struct ll_sb_info *sbi = ll_s2sbi(sb); struct hsm_progress_kernel hpk; - int rc; + int rc2, rc = 0; /* If you modify the logic here, also check llapi_hsm_copy_end(). */ /* Take care: copy->hc_hai.hai_action, len, gid and data are not @@ -823,18 +839,18 @@ static int ll_ioc_copy_end(struct super_block *sb, struct hsm_copy *copy) * when the file will not be modified for some tunable * time */ - /* we do not notify caller */ hpk.hpk_flags &= ~HP_FLAG_RETRY; + rc = -EBUSY; /* hpk_errval must be >= 0 */ - hpk.hpk_errval = EBUSY; + hpk.hpk_errval = -rc; } } progress: - rc = obd_iocontrol(LL_IOC_HSM_PROGRESS, sbi->ll_md_exp, sizeof(hpk), - &hpk, NULL); + rc2 = obd_iocontrol(LL_IOC_HSM_PROGRESS, sbi->ll_md_exp, sizeof(hpk), + &hpk, NULL); - return rc; + return rc ? rc : rc2; } static int copy_and_ioctl(int cmd, struct obd_export *exp, @@ -862,10 +878,6 @@ static int quotactl_ioctl(struct ll_sb_info *sbi, struct if_quotactl *qctl) int rc = 0; switch (cmd) { - case LUSTRE_Q_INVALIDATE: - case LUSTRE_Q_FINVALIDATE: - case Q_QUOTAON: - case Q_QUOTAOFF: case Q_SETQUOTA: case Q_SETINFO: if (!capable(CFS_CAP_SYS_ADMIN)) @@ -930,10 +942,6 @@ static int quotactl_ioctl(struct ll_sb_info *sbi, struct if_quotactl *qctl) QCTL_COPY(oqctl, qctl); rc = obd_quotactl(sbi->ll_md_exp, oqctl); if (rc) { - if (rc != -EALREADY && cmd == Q_QUOTAON) { - oqctl->qc_cmd = Q_QUOTAOFF; - obd_quotactl(sbi->ll_md_exp, oqctl); - } kfree(oqctl); return rc; } @@ -1077,7 +1085,7 @@ static long ll_dir_ioctl(struct file *file, unsigned int cmd, unsigned long arg) goto out_free; } - rc = ll_get_fid_by_name(inode, filename, namelen, NULL); + rc = ll_get_fid_by_name(inode, filename, namelen, NULL, NULL); if (rc < 0) { CERROR("%s: lookup %.*s failed: rc = %d\n", ll_get_fsname(inode->i_sb, NULL, 0), namelen, @@ -1189,6 +1197,7 @@ lmv_out_free: struct lmv_user_md *tmp = NULL; union lmv_mds_md *lmm = NULL; u64 valid = 0; + int max_stripe_count; int stripe_count; int mdt_index; int lum_size; @@ -1200,6 +1209,7 @@ lmv_out_free: if (copy_from_user(&lum, ulmv, sizeof(*ulmv))) return -EFAULT; + max_stripe_count = lum.lum_stripe_count; /* * lum_magic will indicate which stripe the ioctl will like * to get, LMV_MAGIC_V1 is for normal LMV stripe, LMV_USER_MAGIC @@ -1219,9 +1229,6 @@ lmv_out_free: /* Get default LMV EA */ if (lum.lum_magic == LMV_USER_MAGIC) { - if (rc) - goto finish_req; - if (lmmsize > sizeof(*ulmv)) { rc = -EINVAL; goto finish_req; @@ -1234,6 +1241,16 @@ lmv_out_free: } stripe_count = lmv_mds_md_stripe_count_get(lmm); + if (max_stripe_count < stripe_count) { + lum.lum_stripe_count = stripe_count; + if (copy_to_user(ulmv, &lum, sizeof(lum))) { + rc = -EFAULT; + goto finish_req; + } + rc = -E2BIG; + goto finish_req; + } + lum_size = lmv_user_md_size(stripe_count, LMV_MAGIC_V1); tmp = kzalloc(lum_size, GFP_NOFS); if (!tmp) { @@ -1370,134 +1387,6 @@ out_req: ll_putname(filename); return rc; } - case IOC_LOV_GETINFO: { - struct lov_user_mds_data __user *lumd; - struct lov_stripe_md *lsm; - struct lov_user_md __user *lum; - struct lov_mds_md *lmm; - int lmmsize; - lstat_t st; - - lumd = (struct lov_user_mds_data __user *)arg; - lum = &lumd->lmd_lmm; - - rc = ll_get_max_mdsize(sbi, &lmmsize); - if (rc) - return rc; - - lmm = libcfs_kvzalloc(lmmsize, GFP_NOFS); - if (!lmm) - return -ENOMEM; - if (copy_from_user(lmm, lum, lmmsize)) { - rc = -EFAULT; - goto free_lmm; - } - - switch (lmm->lmm_magic) { - case LOV_USER_MAGIC_V1: - if (cpu_to_le32(LOV_USER_MAGIC_V1) == LOV_USER_MAGIC_V1) - break; - /* swab objects first so that stripes num will be sane */ - lustre_swab_lov_user_md_objects( - ((struct lov_user_md_v1 *)lmm)->lmm_objects, - ((struct lov_user_md_v1 *)lmm)->lmm_stripe_count); - lustre_swab_lov_user_md_v1((struct lov_user_md_v1 *)lmm); - break; - case LOV_USER_MAGIC_V3: - if (cpu_to_le32(LOV_USER_MAGIC_V3) == LOV_USER_MAGIC_V3) - break; - /* swab objects first so that stripes num will be sane */ - lustre_swab_lov_user_md_objects( - ((struct lov_user_md_v3 *)lmm)->lmm_objects, - ((struct lov_user_md_v3 *)lmm)->lmm_stripe_count); - lustre_swab_lov_user_md_v3((struct lov_user_md_v3 *)lmm); - break; - default: - rc = -EINVAL; - goto free_lmm; - } - - rc = obd_unpackmd(sbi->ll_dt_exp, &lsm, lmm, lmmsize); - if (rc < 0) { - rc = -ENOMEM; - goto free_lmm; - } - - /* Perform glimpse_size operation. */ - memset(&st, 0, sizeof(st)); - - rc = ll_glimpse_ioctl(sbi, lsm, &st); - if (rc) - goto free_lsm; - - if (copy_to_user(&lumd->lmd_st, &st, sizeof(st))) { - rc = -EFAULT; - goto free_lsm; - } - -free_lsm: - obd_free_memmd(sbi->ll_dt_exp, &lsm); -free_lmm: - kvfree(lmm); - return rc; - } - case OBD_IOC_QUOTACHECK: { - struct obd_quotactl *oqctl; - int error = 0; - - if (!capable(CFS_CAP_SYS_ADMIN)) - return -EPERM; - - oqctl = kzalloc(sizeof(*oqctl), GFP_NOFS); - if (!oqctl) - return -ENOMEM; - oqctl->qc_type = arg; - rc = obd_quotacheck(sbi->ll_md_exp, oqctl); - if (rc < 0) { - CDEBUG(D_INFO, "md_quotacheck failed: rc %d\n", rc); - error = rc; - } - - rc = obd_quotacheck(sbi->ll_dt_exp, oqctl); - if (rc < 0) - CDEBUG(D_INFO, "obd_quotacheck failed: rc %d\n", rc); - - kfree(oqctl); - return error ?: rc; - } - case OBD_IOC_POLL_QUOTACHECK: { - struct if_quotacheck *check; - - if (!capable(CFS_CAP_SYS_ADMIN)) - return -EPERM; - - check = kzalloc(sizeof(*check), GFP_NOFS); - if (!check) - return -ENOMEM; - - rc = obd_iocontrol(cmd, sbi->ll_md_exp, 0, (void *)check, - NULL); - if (rc) { - CDEBUG(D_QUOTA, "mdc ioctl %d failed: %d\n", cmd, rc); - if (copy_to_user((void __user *)arg, check, - sizeof(*check))) - CDEBUG(D_QUOTA, "copy_to_user failed\n"); - goto out_poll; - } - - rc = obd_iocontrol(cmd, sbi->ll_dt_exp, 0, (void *)check, - NULL); - if (rc) { - CDEBUG(D_QUOTA, "osc ioctl %d failed: %d\n", cmd, rc); - if (copy_to_user((void __user *)arg, check, - sizeof(*check))) - CDEBUG(D_QUOTA, "copy_to_user failed\n"); - goto out_poll; - } -out_poll: - kfree(check); - return rc; - } case OBD_IOC_QUOTACTL: { struct if_quotactl *qctl; @@ -1536,7 +1425,7 @@ out_quotactl: exp = count ? sbi->ll_md_exp : sbi->ll_dt_exp; vallen = sizeof(count); rc = obd_get_info(NULL, exp, sizeof(KEY_TGT_COUNT), - KEY_TGT_COUNT, &vallen, &count, NULL); + KEY_TGT_COUNT, &vallen, &count); if (rc) { CERROR("get target count failed: %d\n", rc); return rc; diff --git a/drivers/staging/lustre/lustre/llite/file.c b/drivers/staging/lustre/lustre/llite/file.c index e1d784bae064..f634c11216e6 100644 --- a/drivers/staging/lustre/lustre/llite/file.c +++ b/drivers/staging/lustre/lustre/llite/file.c @@ -44,6 +44,7 @@ #include <linux/mount.h> #include "../include/lustre/ll_fiemap.h" #include "../include/lustre/lustre_ioctl.h" +#include "../include/lustre_swab.h" #include "../include/cl_object.h" #include "llite_internal.h" @@ -75,60 +76,56 @@ static void ll_file_data_put(struct ll_file_data *fd) kmem_cache_free(ll_file_data_slab, fd); } -void ll_pack_inode2opdata(struct inode *inode, struct md_op_data *op_data, - struct lustre_handle *fh) +/** + * Packs all the attributes into @op_data for the CLOSE rpc. + */ +static void ll_prepare_close(struct inode *inode, struct md_op_data *op_data, + struct obd_client_handle *och) { - op_data->op_fid1 = ll_i2info(inode)->lli_fid; + struct ll_inode_info *lli = ll_i2info(inode); + + ll_prep_md_op_data(op_data, inode, NULL, NULL, + 0, 0, LUSTRE_OPC_ANY, NULL); + op_data->op_attr.ia_mode = inode->i_mode; op_data->op_attr.ia_atime = inode->i_atime; op_data->op_attr.ia_mtime = inode->i_mtime; op_data->op_attr.ia_ctime = inode->i_ctime; op_data->op_attr.ia_size = i_size_read(inode); + op_data->op_attr.ia_valid |= ATTR_MODE | ATTR_ATIME | ATTR_ATIME_SET | + ATTR_MTIME | ATTR_MTIME_SET | + ATTR_CTIME | ATTR_CTIME_SET; op_data->op_attr_blocks = inode->i_blocks; op_data->op_attr_flags = ll_inode_to_ext_flags(inode->i_flags); - op_data->op_ioepoch = ll_i2info(inode)->lli_ioepoch; - if (fh) - op_data->op_handle = *fh; + op_data->op_handle = och->och_fh; - if (ll_i2info(inode)->lli_flags & LLIF_DATA_MODIFIED) + /* + * For HSM: if inode data has been modified, pack it so that + * MDT can set data dirty flag in the archive. + */ + if (och->och_flags & FMODE_WRITE && + test_and_clear_bit(LLIF_DATA_MODIFIED, &lli->lli_flags)) op_data->op_bias |= MDS_DATA_MODIFIED; } /** - * Closes the IO epoch and packs all the attributes into @op_data for - * the CLOSE rpc. + * Perform a close, possibly with a bias. + * The meaning of "data" depends on the value of "bias". + * + * If \a bias is MDS_HSM_RELEASE then \a data is a pointer to the data version. + * If \a bias is MDS_CLOSE_LAYOUT_SWAP then \a data is a pointer to the inode to + * swap layouts with. */ -static void ll_prepare_close(struct inode *inode, struct md_op_data *op_data, - struct obd_client_handle *och) -{ - op_data->op_attr.ia_valid = ATTR_MODE | ATTR_ATIME | ATTR_ATIME_SET | - ATTR_MTIME | ATTR_MTIME_SET | - ATTR_CTIME | ATTR_CTIME_SET; - - if (!(och->och_flags & FMODE_WRITE)) - goto out; - - if (!exp_connect_som(ll_i2mdexp(inode)) || !S_ISREG(inode->i_mode)) - op_data->op_attr.ia_valid |= ATTR_SIZE | ATTR_BLOCKS; - else - ll_ioepoch_close(inode, op_data, &och, 0); - -out: - ll_pack_inode2opdata(inode, op_data, &och->och_fh); - ll_prep_md_op_data(op_data, inode, NULL, NULL, - 0, 0, LUSTRE_OPC_ANY, NULL); -} - static int ll_close_inode_openhandle(struct obd_export *md_exp, - struct inode *inode, struct obd_client_handle *och, - const __u64 *data_version) + struct inode *inode, + enum mds_op_bias bias, + void *data) { struct obd_export *exp = ll_i2mdexp(inode); struct md_op_data *op_data; struct ptlrpc_request *req = NULL; struct obd_device *obd = class_exp2obd(exp); - int epoch_close = 1; int rc; if (!obd) { @@ -150,65 +147,51 @@ static int ll_close_inode_openhandle(struct obd_export *md_exp, } ll_prepare_close(inode, op_data, och); - if (data_version) { - /* Pass in data_version implies release. */ + switch (bias) { + case MDS_CLOSE_LAYOUT_SWAP: + LASSERT(data); + op_data->op_bias |= MDS_CLOSE_LAYOUT_SWAP; + op_data->op_data_version = 0; + op_data->op_lease_handle = och->och_lease_handle; + op_data->op_fid2 = *ll_inode2fid(data); + break; + + case MDS_HSM_RELEASE: + LASSERT(data); op_data->op_bias |= MDS_HSM_RELEASE; - op_data->op_data_version = *data_version; + op_data->op_data_version = *(__u64 *)data; op_data->op_lease_handle = och->och_lease_handle; op_data->op_attr.ia_valid |= ATTR_SIZE | ATTR_BLOCKS; + break; + + default: + LASSERT(!data); + break; } - epoch_close = op_data->op_flags & MF_EPOCH_CLOSE; + rc = md_close(md_exp, op_data, och->och_mod, &req); - if (rc == -EAGAIN) { - /* This close must have the epoch closed. */ - LASSERT(epoch_close); - /* MDS has instructed us to obtain Size-on-MDS attribute from - * OSTs and send setattr to back to MDS. - */ - rc = ll_som_update(inode, op_data); - if (rc) { - CERROR("%s: inode "DFID" mdc Size-on-MDS update failed: rc = %d\n", - ll_i2mdexp(inode)->exp_obd->obd_name, - PFID(ll_inode2fid(inode)), rc); - rc = 0; - } - } else if (rc) { + if (rc) { CERROR("%s: inode "DFID" mdc close failed: rc = %d\n", ll_i2mdexp(inode)->exp_obd->obd_name, PFID(ll_inode2fid(inode)), rc); } - /* DATA_MODIFIED flag was successfully sent on close, cancel data - * modification flag. - */ - if (rc == 0 && (op_data->op_bias & MDS_DATA_MODIFIED)) { - struct ll_inode_info *lli = ll_i2info(inode); - - spin_lock(&lli->lli_lock); - lli->lli_flags &= ~LLIF_DATA_MODIFIED; - spin_unlock(&lli->lli_lock); - } - - if (rc == 0 && op_data->op_bias & MDS_HSM_RELEASE) { + if (op_data->op_bias & (MDS_HSM_RELEASE | MDS_CLOSE_LAYOUT_SWAP) && + !rc) { struct mdt_body *body; body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY); - if (!(body->mbo_valid & OBD_MD_FLRELEASED)) + if (!(body->mbo_valid & OBD_MD_CLOSE_INTENT_EXECED)) rc = -EBUSY; } ll_finish_md_op_data(op_data); out: - if (exp_connect_som(exp) && !epoch_close && - S_ISREG(inode->i_mode) && (och->och_flags & FMODE_WRITE)) { - ll_queue_done_writing(inode, LLIF_DONE_WRITING); - } else { - md_clear_open_replay_data(md_exp, och); - /* Free @och if it is not waiting for DONE_WRITING. */ - och->och_fh.cookie = DEAD_HANDLE_MAGIC; - kfree(och); - } + md_clear_open_replay_data(md_exp, och); + och->och_fh.cookie = DEAD_HANDLE_MAGIC; + kfree(och); + if (req) /* This is close request */ ptlrpc_req_finished(req); return rc; @@ -252,7 +235,7 @@ int ll_md_real_close(struct inode *inode, fmode_t fmode) * be closed. */ rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp, - inode, och, NULL); + och, inode, 0, NULL); } return rc; @@ -266,7 +249,9 @@ static int ll_md_close(struct obd_export *md_exp, struct inode *inode, int lockmode; __u64 flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_TEST_LOCK; struct lustre_handle lockh; - ldlm_policy_data_t policy = {.l_inodebits = {MDS_INODELOCK_OPEN} }; + union ldlm_policy_data policy = { + .l_inodebits = { MDS_INODELOCK_OPEN } + }; int rc = 0; /* clear group lock, if present */ @@ -288,7 +273,8 @@ static int ll_md_close(struct obd_export *md_exp, struct inode *inode, } if (fd->fd_och) { - rc = ll_close_inode_openhandle(md_exp, inode, fd->fd_och, NULL); + rc = ll_close_inode_openhandle(md_exp, fd->fd_och, inode, 0, + NULL); fd->fd_och = NULL; goto out; } @@ -437,20 +423,6 @@ out: return rc; } -/** - * Assign an obtained @ioepoch to client's inode. No lock is needed, MDS does - * not believe attributes if a few ioepoch holders exist. Attributes for - * previous ioepoch if new one is opened are also skipped by MDS. - */ -void ll_ioepoch_open(struct ll_inode_info *lli, __u64 ioepoch) -{ - if (ioepoch && lli->lli_ioepoch != ioepoch) { - lli->lli_ioepoch = ioepoch; - CDEBUG(D_INODE, "Epoch %llu opened on "DFID"\n", - ioepoch, PFID(&lli->lli_fid)); - } -} - static int ll_och_fill(struct obd_export *md_exp, struct lookup_intent *it, struct obd_client_handle *och) { @@ -470,23 +442,17 @@ static int ll_local_open(struct file *file, struct lookup_intent *it, struct ll_file_data *fd, struct obd_client_handle *och) { struct inode *inode = file_inode(file); - struct ll_inode_info *lli = ll_i2info(inode); LASSERT(!LUSTRE_FPRIVATE(file)); LASSERT(fd); if (och) { - struct mdt_body *body; int rc; rc = ll_och_fill(ll_i2sbi(inode)->ll_md_exp, it, och); if (rc != 0) return rc; - - body = req_capsule_server_get(&it->it_request->rq_pill, - &RMF_MDT_BODY); - ll_ioepoch_open(lli, body->mbo_ioepoch); } LUSTRE_FPRIVATE(file) = fd; @@ -677,12 +643,6 @@ restart: if (!S_ISREG(inode->i_mode)) goto out_och_free; - if (!lli->lli_has_smd && - (cl_is_lov_delay_create(file->f_flags) || - (file->f_mode & FMODE_WRITE) == 0)) { - CDEBUG(D_INODE, "object creation was delayed\n"); - goto out_och_free; - } cl_lov_delay_create_clear(&file->f_flags); goto out_och_free; @@ -867,7 +827,7 @@ out_close: it.it_lock_mode = 0; och->och_lease_handle.cookie = 0ULL; } - rc2 = ll_close_inode_openhandle(sbi->ll_md_exp, inode, och, NULL); + rc2 = ll_close_inode_openhandle(sbi->ll_md_exp, och, inode, 0, NULL); if (rc2 < 0) CERROR("%s: error closing file "DFID": %d\n", ll_get_fsname(inode->i_sb, NULL, 0), @@ -881,6 +841,69 @@ out: } /** + * Check whether a layout swap can be done between two inodes. + * + * \param[in] inode1 First inode to check + * \param[in] inode2 Second inode to check + * + * \retval 0 on success, layout swap can be performed between both inodes + * \retval negative error code if requirements are not met + */ +static int ll_check_swap_layouts_validity(struct inode *inode1, + struct inode *inode2) +{ + if (!S_ISREG(inode1->i_mode) || !S_ISREG(inode2->i_mode)) + return -EINVAL; + + if (inode_permission(inode1, MAY_WRITE) || + inode_permission(inode2, MAY_WRITE)) + return -EPERM; + + if (inode1->i_sb != inode2->i_sb) + return -EXDEV; + + return 0; +} + +static int ll_swap_layouts_close(struct obd_client_handle *och, + struct inode *inode, struct inode *inode2) +{ + const struct lu_fid *fid1 = ll_inode2fid(inode); + const struct lu_fid *fid2; + int rc; + + CDEBUG(D_INODE, "%s: biased close of file " DFID "\n", + ll_get_fsname(inode->i_sb, NULL, 0), PFID(fid1)); + + rc = ll_check_swap_layouts_validity(inode, inode2); + if (rc < 0) + goto out_free_och; + + /* We now know that inode2 is a lustre inode */ + fid2 = ll_inode2fid(inode2); + + rc = lu_fid_cmp(fid1, fid2); + if (!rc) { + rc = -EINVAL; + goto out_free_och; + } + + /* + * Close the file and swap layouts between inode & inode2. + * NB: lease lock handle is released in mdc_close_layout_swap_pack() + * because we still need it to pack l_remote_handle to MDT. + */ + rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp, och, inode, + MDS_CLOSE_LAYOUT_SWAP, inode2); + + och = NULL; /* freed in ll_close_inode_openhandle() */ + +out_free_och: + kfree(och); + return rc; +} + +/** * Release lease and close the file. * It will check if the lease has ever broken. */ @@ -907,84 +930,7 @@ static int ll_lease_close(struct obd_client_handle *och, struct inode *inode, *lease_broken = cancelled; return ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp, - inode, och, NULL); -} - -/* Fills the obdo with the attributes for the lsm */ -static int ll_lsm_getattr(struct lov_stripe_md *lsm, struct obd_export *exp, - struct obdo *obdo, __u64 ioepoch, int dv_flags) -{ - struct ptlrpc_request_set *set; - struct obd_info oinfo = { }; - int rc; - - LASSERT(lsm); - - oinfo.oi_md = lsm; - oinfo.oi_oa = obdo; - oinfo.oi_oa->o_oi = lsm->lsm_oi; - oinfo.oi_oa->o_mode = S_IFREG; - oinfo.oi_oa->o_ioepoch = ioepoch; - oinfo.oi_oa->o_valid = OBD_MD_FLID | OBD_MD_FLTYPE | - OBD_MD_FLSIZE | OBD_MD_FLBLOCKS | - OBD_MD_FLBLKSZ | OBD_MD_FLATIME | - OBD_MD_FLMTIME | OBD_MD_FLCTIME | - OBD_MD_FLGROUP | OBD_MD_FLEPOCH | - OBD_MD_FLDATAVERSION; - if (dv_flags & (LL_DV_WR_FLUSH | LL_DV_RD_FLUSH)) { - oinfo.oi_oa->o_valid |= OBD_MD_FLFLAGS; - oinfo.oi_oa->o_flags |= OBD_FL_SRVLOCK; - if (dv_flags & LL_DV_WR_FLUSH) - oinfo.oi_oa->o_flags |= OBD_FL_FLUSH; - } - - set = ptlrpc_prep_set(); - if (!set) { - CERROR("cannot allocate ptlrpc set: rc = %d\n", -ENOMEM); - rc = -ENOMEM; - } else { - rc = obd_getattr_async(exp, &oinfo, set); - if (rc == 0) - rc = ptlrpc_set_wait(set); - ptlrpc_set_destroy(set); - } - if (rc == 0) { - oinfo.oi_oa->o_valid &= (OBD_MD_FLBLOCKS | OBD_MD_FLBLKSZ | - OBD_MD_FLATIME | OBD_MD_FLMTIME | - OBD_MD_FLCTIME | OBD_MD_FLSIZE | - OBD_MD_FLDATAVERSION | OBD_MD_FLFLAGS); - if (dv_flags & LL_DV_WR_FLUSH && - !(oinfo.oi_oa->o_valid & OBD_MD_FLFLAGS && - oinfo.oi_oa->o_flags & OBD_FL_FLUSH)) - return -ENOTSUPP; - } - return rc; -} - -/** - * Performs the getattr on the inode and updates its fields. - * If @sync != 0, perform the getattr under the server-side lock. - */ -int ll_inode_getattr(struct inode *inode, struct obdo *obdo, - __u64 ioepoch, int sync) -{ - struct lov_stripe_md *lsm; - int rc; - - lsm = ccc_inode_lsm_get(inode); - rc = ll_lsm_getattr(lsm, ll_i2dtexp(inode), - obdo, ioepoch, sync ? LL_DV_RD_FLUSH : 0); - if (rc == 0) { - struct ost_id *oi = lsm ? &lsm->lsm_oi : &obdo->o_oi; - - obdo_refresh_inode(inode, obdo, obdo->o_valid); - CDEBUG(D_INODE, "objid " DOSTID " size %llu, blocks %llu, blksize %lu\n", - POSTID(oi), i_size_read(inode), - (unsigned long long)inode->i_blocks, - 1UL << inode->i_blkbits); - } - ccc_inode_lsm_put(inode, lsm); - return rc; + och, inode, 0, NULL); } int ll_merge_attr(const struct lu_env *env, struct inode *inode) @@ -1043,23 +989,6 @@ out_size_unlock: return rc; } -int ll_glimpse_ioctl(struct ll_sb_info *sbi, struct lov_stripe_md *lsm, - lstat_t *st) -{ - struct obdo obdo = { 0 }; - int rc; - - rc = ll_lsm_getattr(lsm, sbi->ll_dt_exp, &obdo, 0, 0); - if (rc == 0) { - st->st_size = obdo.o_size; - st->st_blocks = obdo.o_blocks; - st->st_mtime = obdo.o_mtime; - st->st_atime = obdo.o_atime; - st->st_ctime = obdo.o_ctime; - } - return rc; -} - static bool file_is_noatime(const struct file *file) { const struct vfsmount *mnt = file->f_path.mnt; @@ -1117,9 +1046,11 @@ ll_file_io_generic(const struct lu_env *env, struct vvp_io_args *args, { struct ll_inode_info *lli = ll_i2info(file_inode(file)); struct ll_file_data *fd = LUSTRE_FPRIVATE(file); + struct vvp_io *vio = vvp_env_io(env); struct range_lock range; struct cl_io *io; - ssize_t result; + ssize_t result = 0; + int rc = 0; CDEBUG(D_VFSTRACE, "file: %pD, type: %d ppos: %llu, count: %zu\n", file, iot, *ppos, count); @@ -1151,18 +1082,15 @@ restart: CDEBUG(D_VFSTRACE, "Range lock [%llu, %llu]\n", range.rl_node.in_extent.start, range.rl_node.in_extent.end); - result = range_lock(&lli->lli_write_tree, - &range); - if (result < 0) + rc = range_lock(&lli->lli_write_tree, &range); + if (rc < 0) goto out; range_locked = true; } - down_read(&lli->lli_trunc_sem); ll_cl_add(file, env, io); - result = cl_io_loop(env, io); + rc = cl_io_loop(env, io); ll_cl_remove(file, env); - up_read(&lli->lli_trunc_sem); if (range_locked) { CDEBUG(D_VFSTRACE, "Range unlock [%llu, %llu]\n", range.rl_node.in_extent.start, @@ -1171,24 +1099,26 @@ restart: } } else { /* cl_io_rw_init() handled IO */ - result = io->ci_result; + rc = io->ci_result; } if (io->ci_nob > 0) { result = io->ci_nob; + count -= io->ci_nob; *ppos = io->u.ci_wr.wr.crw_pos; + + /* prepare IO restart */ + if (count > 0) + args->u.normal.via_iter = vio->vui_iter; } - goto out; out: cl_io_fini(env, io); - /* If any bit been read/written (result != 0), we just return - * short read/write instead of restart io. - */ - if ((result == 0 || result == -ENODATA) && io->ci_need_restart) { - CDEBUG(D_VFSTRACE, "Restart %s on %pD from %lld, count:%zu\n", + + if ((!rc || rc == -ENODATA) && count > 0 && io->ci_need_restart) { + CDEBUG(D_VFSTRACE, "%s: restart %s from %lld, count:%zu, result: %zd\n", + file_dentry(file)->d_name.name, iot == CIT_READ ? "read" : "write", - file, *ppos, count); - LASSERTF(io->ci_nob == 0, "%zd\n", io->ci_nob); + *ppos, count, result); goto restart; } @@ -1201,13 +1131,19 @@ out: ll_stats_ops_tally(ll_i2sbi(file_inode(file)), LPROC_LL_WRITE_BYTES, result); fd->fd_write_failed = false; - } else if (result != -ERESTARTSYS) { + } else if (!result && !rc) { + rc = io->ci_result; + if (rc < 0) + fd->fd_write_failed = true; + else + fd->fd_write_failed = false; + } else if (rc != -ERESTARTSYS) { fd->fd_write_failed = true; } } CDEBUG(D_VFSTRACE, "iot: %d, result: %zd\n", iot, result); - return result; + return result > 0 ? result : rc; } static ssize_t ll_file_read_iter(struct kiocb *iocb, struct iov_iter *to) @@ -1259,37 +1195,22 @@ int ll_lov_setstripe_ea_info(struct inode *inode, struct dentry *dentry, __u64 flags, struct lov_user_md *lum, int lum_size) { - struct lov_stripe_md *lsm = NULL; struct lookup_intent oit = { .it_op = IT_OPEN, .it_flags = flags | MDS_OPEN_BY_FID, }; int rc = 0; - lsm = ccc_inode_lsm_get(inode); - if (lsm) { - ccc_inode_lsm_put(inode, lsm); - CDEBUG(D_IOCTL, "stripe already exists for inode "DFID"\n", - PFID(ll_inode2fid(inode))); - rc = -EEXIST; - goto out; - } - ll_inode_size_lock(inode); rc = ll_intent_file_open(dentry, lum, lum_size, &oit); if (rc < 0) goto out_unlock; - rc = oit.it_status; - if (rc < 0) - goto out_unlock; ll_release_openhandle(inode, &oit); out_unlock: ll_inode_size_unlock(inode); ll_intent_release(&oit); - ccc_inode_lsm_put(inode, lsm); -out: return rc; } @@ -1566,7 +1487,7 @@ int ll_release_openhandle(struct inode *inode, struct lookup_intent *it) ll_och_fill(ll_i2sbi(inode)->ll_md_exp, it, och); rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp, - inode, och, NULL); + och, inode, 0, NULL); out: /* this one is in place of ll_file_open */ if (it_disposition(it, DISP_ENQ_OPEN_REF)) { @@ -1579,15 +1500,17 @@ out: /** * Get size for inode for which FIEMAP mapping is requested. * Make the FIEMAP get_info call and returns the result. + * + * \param fiemap kernel buffer to hold extens + * \param num_bytes kernel buffer size */ -static int ll_do_fiemap(struct inode *inode, struct ll_user_fiemap *fiemap, +static int ll_do_fiemap(struct inode *inode, struct fiemap *fiemap, size_t num_bytes) { - struct obd_export *exp = ll_i2dtexp(inode); - struct lov_stripe_md *lsm = NULL; - struct ll_fiemap_info_key fm_key = { .name = KEY_FIEMAP, }; - __u32 vallen = num_bytes; - int rc; + struct ll_fiemap_info_key fmkey = { .lfik_name = KEY_FIEMAP, }; + struct lu_env *env; + int refcheck; + int rc = 0; /* Checks for fiemap flags */ if (fiemap->fm_flags & ~LUSTRE_FIEMAP_FLAGS_COMPAT) { @@ -1602,21 +1525,9 @@ static int ll_do_fiemap(struct inode *inode, struct ll_user_fiemap *fiemap, return rc; } - lsm = ccc_inode_lsm_get(inode); - if (!lsm) - return -ENOENT; - - /* If the stripe_count > 1 and the application does not understand - * DEVICE_ORDER flag, then it cannot interpret the extents correctly. - */ - if (lsm->lsm_stripe_count > 1 && - !(fiemap->fm_flags & FIEMAP_FLAG_DEVICE_ORDER)) { - rc = -EOPNOTSUPP; - goto out; - } - - fm_key.oa.o_oi = lsm->lsm_oi; - fm_key.oa.o_valid = OBD_MD_FLID | OBD_MD_FLGROUP; + env = cl_env_get(&refcheck); + if (IS_ERR(env)) + return PTR_ERR(env); if (i_size_read(inode) == 0) { rc = ll_glimpse_size(inode); @@ -1624,24 +1535,23 @@ static int ll_do_fiemap(struct inode *inode, struct ll_user_fiemap *fiemap, goto out; } - obdo_from_inode(&fm_key.oa, inode, OBD_MD_FLSIZE); - obdo_set_parent_fid(&fm_key.oa, &ll_i2info(inode)->lli_fid); + fmkey.lfik_oa.o_valid = OBD_MD_FLID | OBD_MD_FLGROUP; + obdo_from_inode(&fmkey.lfik_oa, inode, OBD_MD_FLSIZE); + obdo_set_parent_fid(&fmkey.lfik_oa, &ll_i2info(inode)->lli_fid); + /* If filesize is 0, then there would be no objects for mapping */ - if (fm_key.oa.o_size == 0) { + if (fmkey.lfik_oa.o_size == 0) { fiemap->fm_mapped_extents = 0; rc = 0; goto out; } - memcpy(&fm_key.fiemap, fiemap, sizeof(*fiemap)); - - rc = obd_get_info(NULL, exp, sizeof(fm_key), &fm_key, &vallen, - fiemap, lsm); - if (rc) - CERROR("obd_get_info failed: rc = %d\n", rc); + memcpy(&fmkey.lfik_fiemap, fiemap, sizeof(*fiemap)); + rc = cl_object_fiemap(env, ll_i2info(inode)->lli_clob, + &fmkey, fiemap, &num_bytes); out: - ccc_inode_lsm_put(inode, lsm); + cl_env_put(env, &refcheck); return rc; } @@ -1689,113 +1599,56 @@ gf_free: return rc; } -static int ll_ioctl_fiemap(struct inode *inode, unsigned long arg) -{ - struct ll_user_fiemap *fiemap_s; - size_t num_bytes, ret_bytes; - unsigned int extent_count; - int rc = 0; - - /* Get the extent count so we can calculate the size of - * required fiemap buffer - */ - if (get_user(extent_count, - &((struct ll_user_fiemap __user *)arg)->fm_extent_count)) - return -EFAULT; - - if (extent_count >= - (SIZE_MAX - sizeof(*fiemap_s)) / sizeof(struct ll_fiemap_extent)) - return -EINVAL; - num_bytes = sizeof(*fiemap_s) + (extent_count * - sizeof(struct ll_fiemap_extent)); - - fiemap_s = libcfs_kvzalloc(num_bytes, GFP_NOFS); - if (!fiemap_s) - return -ENOMEM; - - /* get the fiemap value */ - if (copy_from_user(fiemap_s, (struct ll_user_fiemap __user *)arg, - sizeof(*fiemap_s))) { - rc = -EFAULT; - goto error; - } - - /* If fm_extent_count is non-zero, read the first extent since - * it is used to calculate end_offset and device from previous - * fiemap call. - */ - if (extent_count) { - if (copy_from_user(&fiemap_s->fm_extents[0], - (char __user *)arg + sizeof(*fiemap_s), - sizeof(struct ll_fiemap_extent))) { - rc = -EFAULT; - goto error; - } - } - - rc = ll_do_fiemap(inode, fiemap_s, num_bytes); - if (rc) - goto error; - - ret_bytes = sizeof(struct ll_user_fiemap); - - if (extent_count != 0) - ret_bytes += (fiemap_s->fm_mapped_extents * - sizeof(struct ll_fiemap_extent)); - - if (copy_to_user((void __user *)arg, fiemap_s, ret_bytes)) - rc = -EFAULT; - -error: - kvfree(fiemap_s); - return rc; -} - /* * Read the data_version for inode. * * This value is computed using stripe object version on OST. * Version is computed using server side locking. * - * @param sync if do sync on the OST side; + * @param flags if do sync on the OST side; * 0: no sync * LL_DV_RD_FLUSH: flush dirty pages, LCK_PR on OSTs * LL_DV_WR_FLUSH: drop all caching pages, LCK_PW on OSTs */ int ll_data_version(struct inode *inode, __u64 *data_version, int flags) { - struct lov_stripe_md *lsm = NULL; - struct ll_sb_info *sbi = ll_i2sbi(inode); - struct obdo *obdo = NULL; - int rc; + struct cl_object *obj = ll_i2info(inode)->lli_clob; + struct lu_env *env; + struct cl_io *io; + int refcheck; + int result; - /* If no stripe, we consider version is 0. */ - lsm = ccc_inode_lsm_get(inode); - if (!lsm_has_objects(lsm)) { + /* If no file object initialized, we consider its version is 0. */ + if (!obj) { *data_version = 0; - CDEBUG(D_INODE, "No object for inode\n"); - rc = 0; - goto out; + return 0; } - obdo = kzalloc(sizeof(*obdo), GFP_NOFS); - if (!obdo) { - rc = -ENOMEM; - goto out; - } + env = cl_env_get(&refcheck); + if (IS_ERR(env)) + return PTR_ERR(env); - rc = ll_lsm_getattr(lsm, sbi->ll_dt_exp, obdo, 0, flags); - if (rc == 0) { - if (!(obdo->o_valid & OBD_MD_FLDATAVERSION)) - rc = -EOPNOTSUPP; - else - *data_version = obdo->o_data_version; - } + io = vvp_env_thread_io(env); + io->ci_obj = obj; + io->u.ci_data_version.dv_data_version = 0; + io->u.ci_data_version.dv_flags = flags; - kfree(obdo); -out: - ccc_inode_lsm_put(inode, lsm); - return rc; +restart: + if (!cl_io_init(env, io, CIT_DATA_VERSION, io->ci_obj)) + result = cl_io_loop(env, io); + else + result = io->ci_result; + + *data_version = io->u.ci_data_version.dv_data_version; + + cl_io_fini(env, io); + + if (unlikely(io->ci_need_restart)) + goto restart; + + cl_env_put(env, &refcheck); + + return result; } /* @@ -1803,11 +1656,11 @@ out: */ int ll_hsm_release(struct inode *inode) { - struct cl_env_nest nest; struct lu_env *env; struct obd_client_handle *och = NULL; __u64 data_version = 0; int rc; + int refcheck; CDEBUG(D_INODE, "%s: Releasing file "DFID".\n", ll_get_fsname(inode->i_sb, NULL, 0), @@ -1824,21 +1677,21 @@ int ll_hsm_release(struct inode *inode) if (rc != 0) goto out; - env = cl_env_nested_get(&nest); + env = cl_env_get(&refcheck); if (IS_ERR(env)) { rc = PTR_ERR(env); goto out; } ll_merge_attr(env, inode); - cl_env_nested_put(&nest, env); + cl_env_put(env, &refcheck); /* Release the file. * NB: lease lock handle is released in mdc_hsm_release_pack() because * we still need it to pack l_remote_handle to MDT. */ - rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp, inode, och, - &data_version); + rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp, och, inode, + MDS_HSM_RELEASE, &data_version); och = NULL; out: @@ -1849,10 +1702,12 @@ out: } struct ll_swap_stack { - struct iattr ia1, ia2; - __u64 dv1, dv2; - struct inode *inode1, *inode2; - bool check_dv1, check_dv2; + u64 dv1; + u64 dv2; + struct inode *inode1; + struct inode *inode2; + bool check_dv1; + bool check_dv2; }; static int ll_swap_layouts(struct file *file1, struct file *file2, @@ -1872,21 +1727,9 @@ static int ll_swap_layouts(struct file *file1, struct file *file2, llss->inode1 = file_inode(file1); llss->inode2 = file_inode(file2); - if (!S_ISREG(llss->inode2->i_mode)) { - rc = -EINVAL; - goto free; - } - - if (inode_permission(llss->inode1, MAY_WRITE) || - inode_permission(llss->inode2, MAY_WRITE)) { - rc = -EPERM; + rc = ll_check_swap_layouts_validity(llss->inode1, llss->inode2); + if (rc < 0) goto free; - } - - if (llss->inode2->i_sb != llss->inode1->i_sb) { - rc = -EXDEV; - goto free; - } /* we use 2 bool because it is easier to swap than 2 bits */ if (lsl->sl_flags & SWAP_LAYOUTS_CHECK_DV1) @@ -1900,10 +1743,8 @@ static int ll_swap_layouts(struct file *file1, struct file *file2, llss->dv2 = lsl->sl_dv2; rc = lu_fid_cmp(ll_inode2fid(llss->inode1), ll_inode2fid(llss->inode2)); - if (rc == 0) /* same file, done! */ { - rc = 0; + if (!rc) /* same file, done! */ goto free; - } if (rc < 0) { /* sequentialize it */ swap(llss->inode1, llss->inode2); @@ -1925,19 +1766,6 @@ static int ll_swap_layouts(struct file *file1, struct file *file2, } } - /* to be able to restore mtime and atime after swap - * we need to first save them - */ - if (lsl->sl_flags & - (SWAP_LAYOUTS_KEEP_MTIME | SWAP_LAYOUTS_KEEP_ATIME)) { - llss->ia1.ia_mtime = llss->inode1->i_mtime; - llss->ia1.ia_atime = llss->inode1->i_atime; - llss->ia1.ia_valid = ATTR_MTIME | ATTR_ATIME; - llss->ia2.ia_mtime = llss->inode2->i_mtime; - llss->ia2.ia_atime = llss->inode2->i_atime; - llss->ia2.ia_valid = ATTR_MTIME | ATTR_ATIME; - } - /* ultimate check, before swapping the layouts we check if * dataversion has changed (if requested) */ @@ -1987,39 +1815,6 @@ putgl: ll_put_grouplock(llss->inode1, file1, gid); } - /* rc can be set from obd_iocontrol() or from a GOTO(putgl, ...) */ - if (rc != 0) - goto free; - - /* clear useless flags */ - if (!(lsl->sl_flags & SWAP_LAYOUTS_KEEP_MTIME)) { - llss->ia1.ia_valid &= ~ATTR_MTIME; - llss->ia2.ia_valid &= ~ATTR_MTIME; - } - - if (!(lsl->sl_flags & SWAP_LAYOUTS_KEEP_ATIME)) { - llss->ia1.ia_valid &= ~ATTR_ATIME; - llss->ia2.ia_valid &= ~ATTR_ATIME; - } - - /* update time if requested */ - rc = 0; - if (llss->ia2.ia_valid != 0) { - inode_lock(llss->inode1); - rc = ll_setattr(file1->f_path.dentry, &llss->ia2); - inode_unlock(llss->inode1); - } - - if (llss->ia1.ia_valid != 0) { - int rc1; - - inode_lock(llss->inode2); - rc1 = ll_setattr(file2->f_path.dentry, &llss->ia1); - inode_unlock(llss->inode2); - if (rc == 0) - rc = rc1; - } - free: kfree(llss); @@ -2176,24 +1971,52 @@ ll_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg) sizeof(struct lustre_swap_layouts))) return -EFAULT; - if ((file->f_flags & O_ACCMODE) == 0) /* O_RDONLY */ + if ((file->f_flags & O_ACCMODE) == O_RDONLY) return -EPERM; file2 = fget(lsl.sl_fd); if (!file2) return -EBADF; - rc = -EPERM; - if ((file2->f_flags & O_ACCMODE) != 0) /* O_WRONLY or O_RDWR */ + /* O_WRONLY or O_RDWR */ + if ((file2->f_flags & O_ACCMODE) == O_RDONLY) { + rc = -EPERM; + goto out; + } + + if (lsl.sl_flags & SWAP_LAYOUTS_CLOSE) { + struct obd_client_handle *och = NULL; + struct ll_inode_info *lli; + struct inode *inode2; + + if (lsl.sl_flags != SWAP_LAYOUTS_CLOSE) { + rc = -EINVAL; + goto out; + } + + lli = ll_i2info(inode); + mutex_lock(&lli->lli_och_mutex); + if (fd->fd_lease_och) { + och = fd->fd_lease_och; + fd->fd_lease_och = NULL; + } + mutex_unlock(&lli->lli_och_mutex); + if (!och) { + rc = -ENOLCK; + goto out; + } + inode2 = file_inode(file2); + rc = ll_swap_layouts_close(och, inode, inode2); + } else { rc = ll_swap_layouts(file, file2, &lsl); + } +out: fput(file2); return rc; } case LL_IOC_LOV_GETSTRIPE: return ll_file_getstripe(inode, (struct lov_user_md __user *)arg); - case FSFILT_IOC_FIEMAP: - return ll_ioctl_fiemap(inode, arg); case FSFILT_IOC_GETFLAGS: case FSFILT_IOC_SETFLAGS: return ll_iocontrol(inode, file, cmd, arg); @@ -2489,17 +2312,17 @@ static int ll_flush(struct file *file, fl_owner_t id) int cl_sync_file_range(struct inode *inode, loff_t start, loff_t end, enum cl_fsync_mode mode, int ignore_layout) { - struct cl_env_nest nest; struct lu_env *env; struct cl_io *io; struct cl_fsync_io *fio; int result; + int refcheck; if (mode != CL_FSYNC_NONE && mode != CL_FSYNC_LOCAL && mode != CL_FSYNC_DISCARD && mode != CL_FSYNC_ALL) return -EINVAL; - env = cl_env_nested_get(&nest); + env = cl_env_get(&refcheck); if (IS_ERR(env)) return PTR_ERR(env); @@ -2522,7 +2345,7 @@ int cl_sync_file_range(struct inode *inode, loff_t start, loff_t end, if (result == 0) result = fio->fi_nr_written; cl_io_fini(env, io); - cl_env_nested_put(&nest, env); + cl_env_put(env, &refcheck); return result; } @@ -2549,9 +2372,11 @@ int ll_fsync(struct file *file, loff_t start, loff_t end, int datasync) lli->lli_async_rc = 0; if (rc == 0) rc = err; - err = lov_read_and_clear_async_rc(lli->lli_clob); - if (rc == 0) - rc = err; + if (lli->lli_clob) { + err = lov_read_and_clear_async_rc(lli->lli_clob); + if (rc == 0) + rc = err; + } } err = md_sync(ll_i2sbi(inode)->ll_md_exp, ll_inode2fid(inode), &req); @@ -2588,7 +2413,7 @@ ll_file_flock(struct file *file, int cmd, struct file_lock *file_lock) }; struct md_op_data *op_data; struct lustre_handle lockh = {0}; - ldlm_policy_data_t flock = { {0} }; + union ldlm_policy_data flock = { { 0 } }; int fl_type = file_lock->fl_type; __u64 flags = 0; int rc; @@ -2707,7 +2532,8 @@ ll_file_flock(struct file *file, int cmd, struct file_lock *file_lock) } int ll_get_fid_by_name(struct inode *parent, const char *name, - int namelen, struct lu_fid *fid) + int namelen, struct lu_fid *fid, + struct inode **inode) { struct md_op_data *op_data = NULL; struct ptlrpc_request *req; @@ -2719,7 +2545,7 @@ int ll_get_fid_by_name(struct inode *parent, const char *name, if (IS_ERR(op_data)) return PTR_ERR(op_data); - op_data->op_valid = OBD_MD_FLID; + op_data->op_valid = OBD_MD_FLID | OBD_MD_FLTYPE; rc = md_getattr_name(ll_i2sbi(parent)->ll_md_exp, op_data, &req); ll_finish_md_op_data(op_data); if (rc < 0) @@ -2732,6 +2558,9 @@ int ll_get_fid_by_name(struct inode *parent, const char *name, } if (fid) *fid = body->mbo_fid1; + + if (inode) + rc = ll_prep_inode(inode, req, parent->i_sb, NULL); out_req: ptlrpc_req_finished(req); return rc; @@ -2741,9 +2570,12 @@ int ll_migrate(struct inode *parent, struct file *file, int mdtidx, const char *name, int namelen) { struct ptlrpc_request *request = NULL; + struct obd_client_handle *och = NULL; struct inode *child_inode = NULL; struct dentry *dchild = NULL; struct md_op_data *op_data; + struct mdt_body *body; + u64 data_version = 0; struct qstr qstr; int rc; @@ -2762,22 +2594,25 @@ int ll_migrate(struct inode *parent, struct file *file, int mdtidx, dchild = d_lookup(file_dentry(file), &qstr); if (dchild) { op_data->op_fid3 = *ll_inode2fid(dchild->d_inode); - if (dchild->d_inode) { + if (dchild->d_inode) child_inode = igrab(dchild->d_inode); - if (child_inode) { - inode_lock(child_inode); - op_data->op_fid3 = *ll_inode2fid(child_inode); - ll_invalidate_aliases(child_inode); - } - } dput(dchild); - } else { + } + + if (!child_inode) { rc = ll_get_fid_by_name(parent, name, namelen, - &op_data->op_fid3); + &op_data->op_fid3, &child_inode); if (rc) goto out_free; } + if (!child_inode) { + rc = -EINVAL; + goto out_free; + } + + inode_lock(child_inode); + op_data->op_fid3 = *ll_inode2fid(child_inode); if (!fid_is_sane(&op_data->op_fid3)) { CERROR("%s: migrate %s, but fid "DFID" is insane\n", ll_get_fsname(parent->i_sb, NULL, 0), name, @@ -2796,6 +2631,26 @@ int ll_migrate(struct inode *parent, struct file *file, int mdtidx, rc = 0; goto out_free; } +again: + if (S_ISREG(child_inode->i_mode)) { + och = ll_lease_open(child_inode, NULL, FMODE_WRITE, 0); + if (IS_ERR(och)) { + rc = PTR_ERR(och); + och = NULL; + goto out_free; + } + + rc = ll_data_version(child_inode, &data_version, + LL_DV_WR_FLUSH); + if (rc) + goto out_free; + + op_data->op_handle = och->och_fh; + op_data->op_data = och->och_mod; + op_data->op_data_version = data_version; + op_data->op_lease_handle = och->och_lease_handle; + op_data->op_bias |= MDS_RENAME_MIGRATE; + } op_data->op_mds = mdtidx; op_data->op_cli_flags = CLI_MIGRATE; @@ -2804,10 +2659,32 @@ int ll_migrate(struct inode *parent, struct file *file, int mdtidx, if (!rc) ll_update_times(request, parent); - ptlrpc_req_finished(request); + body = req_capsule_server_get(&request->rq_pill, &RMF_MDT_BODY); + if (!body) { + rc = -EPROTO; + goto out_free; + } + + /* + * If the server does release layout lock, then we cleanup + * the client och here, otherwise release it in out_free: + */ + if (och && body->mbo_valid & OBD_MD_CLOSE_INTENT_EXECED) { + obd_mod_put(och->och_mod); + md_clear_open_replay_data(ll_i2sbi(parent)->ll_md_exp, och); + och->och_fh.cookie = DEAD_HANDLE_MAGIC; + kfree(och); + och = NULL; + } + ptlrpc_req_finished(request); + /* Try again if the file layout has changed. */ + if (rc == -EAGAIN && S_ISREG(child_inode->i_mode)) + goto again; out_free: if (child_inode) { + if (och) /* close the file */ + ll_lease_close(och, child_inode, NULL); clear_nlink(child_inode); inode_unlock(child_inode); iput(child_inode); @@ -2837,7 +2714,7 @@ int ll_have_md_lock(struct inode *inode, __u64 *bits, enum ldlm_mode l_req_mode) { struct lustre_handle lockh; - ldlm_policy_data_t policy; + union ldlm_policy_data policy; enum ldlm_mode mode = (l_req_mode == LCK_MINMODE) ? (LCK_CR | LCK_CW | LCK_PR | LCK_PW) : l_req_mode; struct lu_fid *fid; @@ -2878,7 +2755,7 @@ enum ldlm_mode ll_take_md_lock(struct inode *inode, __u64 bits, struct lustre_handle *lockh, __u64 flags, enum ldlm_mode mode) { - ldlm_policy_data_t policy = { .l_inodebits = {bits} }; + union ldlm_policy_data policy = { .l_inodebits = { bits } }; struct lu_fid *fid; fid = &ll_i2info(inode)->lli_fid; @@ -2893,6 +2770,13 @@ static int ll_inode_revalidate_fini(struct inode *inode, int rc) /* Already unlinked. Just update nlink and return success */ if (rc == -ENOENT) { clear_nlink(inode); + /* If it is striped directory, and there is bad stripe + * Let's revalidate the dentry again, instead of returning + * error + */ + if (S_ISDIR(inode->i_mode) && ll_i2info(inode)->lli_lsm_md) + return 0; + /* This path cannot be hit for regular files unless in * case of obscure races, so no need to validate size. */ @@ -3040,6 +2924,8 @@ static int ll_inode_revalidate(struct dentry *dentry, __u64 ibits) LTIME_S(inode->i_mtime) = ll_i2info(inode)->lli_mtime; LTIME_S(inode->i_ctime) = ll_i2info(inode)->lli_ctime; } else { + struct ll_inode_info *lli = ll_i2info(inode); + /* In case of restore, the MDT has the right size and has * already send it back without granting the layout lock, * inode is up-to-date so glimpse is useless. @@ -3047,7 +2933,7 @@ static int ll_inode_revalidate(struct dentry *dentry, __u64 ibits) * restore the MDT holds the layout lock so the glimpse will * block up to the end of restore (getattr will block) */ - if (!(ll_i2info(inode)->lli_flags & LLIF_FILE_RESTORING)) + if (!test_bit(LLIF_FILE_RESTORING, &lli->lli_flags)) rc = ll_glimpse_size(inode); } return rc; @@ -3095,13 +2981,12 @@ static int ll_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, { int rc; size_t num_bytes; - struct ll_user_fiemap *fiemap; + struct fiemap *fiemap; unsigned int extent_count = fieinfo->fi_extents_max; num_bytes = sizeof(*fiemap) + (extent_count * - sizeof(struct ll_fiemap_extent)); + sizeof(struct fiemap_extent)); fiemap = libcfs_kvzalloc(num_bytes, GFP_NOFS); - if (!fiemap) return -ENOMEM; @@ -3109,9 +2994,10 @@ static int ll_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, fiemap->fm_extent_count = fieinfo->fi_extents_max; fiemap->fm_start = start; fiemap->fm_length = len; + if (extent_count > 0 && copy_from_user(&fiemap->fm_extents[0], fieinfo->fi_extents_start, - sizeof(struct ll_fiemap_extent)) != 0) { + sizeof(struct fiemap_extent))) { rc = -EFAULT; goto out; } @@ -3123,11 +3009,10 @@ static int ll_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, if (extent_count > 0 && copy_to_user(fieinfo->fi_extents_start, &fiemap->fm_extents[0], fiemap->fm_mapped_extents * - sizeof(struct ll_fiemap_extent)) != 0) { + sizeof(struct fiemap_extent))) { rc = -EFAULT; goto out; } - out: kvfree(fiemap); return rc; @@ -3370,35 +3255,50 @@ ll_iocontrol_call(struct inode *inode, struct file *file, int ll_layout_conf(struct inode *inode, const struct cl_object_conf *conf) { struct ll_inode_info *lli = ll_i2info(inode); - struct cl_env_nest nest; + struct cl_object *obj = lli->lli_clob; struct lu_env *env; - int result; + int rc; + int refcheck; - if (!lli->lli_clob) + if (!obj) return 0; - env = cl_env_nested_get(&nest); + env = cl_env_get(&refcheck); if (IS_ERR(env)) return PTR_ERR(env); - result = cl_conf_set(env, lli->lli_clob, conf); - cl_env_nested_put(&nest, env); + rc = cl_conf_set(env, obj, conf); + if (rc < 0) + goto out; if (conf->coc_opc == OBJECT_CONF_SET) { struct ldlm_lock *lock = conf->coc_lock; + struct cl_layout cl = { + .cl_layout_gen = 0, + }; LASSERT(lock); LASSERT(ldlm_has_layout(lock)); - if (result == 0) { - /* it can only be allowed to match after layout is - * applied to inode otherwise false layout would be - * seen. Applying layout should happen before dropping - * the intent lock. - */ - ldlm_lock_allow_match(lock); - } + + /* it can only be allowed to match after layout is + * applied to inode otherwise false layout would be + * seen. Applying layout should happen before dropping + * the intent lock. + */ + ldlm_lock_allow_match(lock); + + rc = cl_object_layout_get(env, obj, &cl); + if (rc < 0) + goto out; + + CDEBUG(D_VFSTRACE, DFID ": layout version change: %u -> %u\n", + PFID(&lli->lli_fid), ll_layout_version_get(lli), + cl.cl_layout_gen); + ll_layout_version_set(lli, cl.cl_layout_gen); } - return result; +out: + cl_env_put(env, &refcheck); + return rc; } /* Fetch layout from MDT with getxattr request, if it's not ready yet */ @@ -3477,12 +3377,11 @@ out: * in this function. */ static int ll_layout_lock_set(struct lustre_handle *lockh, enum ldlm_mode mode, - struct inode *inode, __u32 *gen, bool reconf) + struct inode *inode) { struct ll_inode_info *lli = ll_i2info(inode); struct ll_sb_info *sbi = ll_i2sbi(inode); struct ldlm_lock *lock; - struct lustre_md md = { NULL }; struct cl_object_conf conf; int rc = 0; bool lvb_ready; @@ -3494,8 +3393,8 @@ static int ll_layout_lock_set(struct lustre_handle *lockh, enum ldlm_mode mode, LASSERT(lock); LASSERT(ldlm_has_layout(lock)); - LDLM_DEBUG(lock, "File "DFID"(%p) being reconfigured: %d", - PFID(&lli->lli_fid), inode, reconf); + LDLM_DEBUG(lock, "File " DFID "(%p) being reconfigured", + PFID(&lli->lli_fid), inode); /* in case this is a caching lock and reinstate with new inode */ md_set_lock_data(sbi->ll_md_exp, lockh, inode, NULL); @@ -3506,15 +3405,8 @@ static int ll_layout_lock_set(struct lustre_handle *lockh, enum ldlm_mode mode, /* checking lvb_ready is racy but this is okay. The worst case is * that multi processes may configure the file on the same time. */ - if (lvb_ready || !reconf) { - rc = -ENODATA; - if (lvb_ready) { - /* layout_gen must be valid if layout lock is not - * cancelled and stripe has already set - */ - *gen = ll_layout_version_get(lli); - rc = 0; - } + if (lvb_ready) { + rc = 0; goto out; } @@ -3524,39 +3416,19 @@ static int ll_layout_lock_set(struct lustre_handle *lockh, enum ldlm_mode mode, /* for layout lock, lmm is returned in lock's lvb. * lvb_data is immutable if the lock is held so it's safe to access it - * without res lock. See the description in ldlm_lock_decref_internal() - * for the condition to free lvb_data of layout lock - */ - if (lock->l_lvb_data) { - rc = obd_unpackmd(sbi->ll_dt_exp, &md.lsm, - lock->l_lvb_data, lock->l_lvb_len); - if (rc >= 0) { - *gen = LL_LAYOUT_GEN_EMPTY; - if (md.lsm) - *gen = md.lsm->lsm_layout_gen; - rc = 0; - } else { - CERROR("%s: file " DFID " unpackmd error: %d\n", - ll_get_fsname(inode->i_sb, NULL, 0), - PFID(&lli->lli_fid), rc); - } - } - if (rc < 0) - goto out; - - /* set layout to file. Unlikely this will fail as old layout was + * without res lock. + * + * set layout to file. Unlikely this will fail as old layout was * surely eliminated */ memset(&conf, 0, sizeof(conf)); conf.coc_opc = OBJECT_CONF_SET; conf.coc_inode = inode; conf.coc_lock = lock; - conf.u.coc_md = &md; + conf.u.coc_layout.lb_buf = lock->l_lvb_data; + conf.u.coc_layout.lb_len = lock->l_lvb_len; rc = ll_layout_conf(inode, &conf); - if (md.lsm) - obd_free_memmd(sbi->ll_dt_exp, &md.lsm); - /* refresh layout failed, need to wait */ wait_layout = rc == -EBUSY; @@ -3584,20 +3456,7 @@ out: return rc; } -/** - * This function checks if there exists a LAYOUT lock on the client side, - * or enqueues it if it doesn't have one in cache. - * - * This function will not hold layout lock so it may be revoked any time after - * this function returns. Any operations depend on layout should be redone - * in that case. - * - * This function should be called before lov_io_init() to get an uptodate - * layout version, the caller should save the version number and after IO - * is finished, this function should be called again to verify that layout - * is not changed during IO time. - */ -int ll_layout_refresh(struct inode *inode, __u32 *gen) +static int ll_layout_refresh_locked(struct inode *inode) { struct ll_inode_info *lli = ll_i2info(inode); struct ll_sb_info *sbi = ll_i2sbi(inode); @@ -3613,17 +3472,6 @@ int ll_layout_refresh(struct inode *inode, __u32 *gen) }; int rc; - *gen = ll_layout_version_get(lli); - if (!(sbi->ll_flags & LL_SBI_LAYOUT_LOCK) || *gen != LL_LAYOUT_GEN_NONE) - return 0; - - /* sanity checks */ - LASSERT(fid_is_sane(ll_inode2fid(inode))); - LASSERT(S_ISREG(inode->i_mode)); - - /* take layout lock mutex to enqueue layout lock exclusively. */ - mutex_lock(&lli->lli_layout_mutex); - again: /* mostly layout lock is caching on the local side, so try to match * it before grabbing layout lock mutex. @@ -3631,20 +3479,16 @@ again: mode = ll_take_md_lock(inode, MDS_INODELOCK_LAYOUT, &lockh, 0, LCK_CR | LCK_CW | LCK_PR | LCK_PW); if (mode != 0) { /* hit cached lock */ - rc = ll_layout_lock_set(&lockh, mode, inode, gen, true); + rc = ll_layout_lock_set(&lockh, mode, inode); if (rc == -EAGAIN) goto again; - - mutex_unlock(&lli->lli_layout_mutex); return rc; } op_data = ll_prep_md_op_data(NULL, inode, inode, NULL, 0, 0, LUSTRE_OPC_ANY, NULL); - if (IS_ERR(op_data)) { - mutex_unlock(&lli->lli_layout_mutex); + if (IS_ERR(op_data)) return PTR_ERR(op_data); - } /* have to enqueue one */ memset(&it, 0, sizeof(it)); @@ -3668,10 +3512,50 @@ again: if (rc == 0) { /* set lock data in case this is a new lock */ ll_set_lock_data(sbi->ll_md_exp, inode, &it, NULL); - rc = ll_layout_lock_set(&lockh, mode, inode, gen, true); + rc = ll_layout_lock_set(&lockh, mode, inode); if (rc == -EAGAIN) goto again; } + + return rc; +} + +/** + * This function checks if there exists a LAYOUT lock on the client side, + * or enqueues it if it doesn't have one in cache. + * + * This function will not hold layout lock so it may be revoked any time after + * this function returns. Any operations depend on layout should be redone + * in that case. + * + * This function should be called before lov_io_init() to get an uptodate + * layout version, the caller should save the version number and after IO + * is finished, this function should be called again to verify that layout + * is not changed during IO time. + */ +int ll_layout_refresh(struct inode *inode, __u32 *gen) +{ + struct ll_inode_info *lli = ll_i2info(inode); + struct ll_sb_info *sbi = ll_i2sbi(inode); + int rc; + + *gen = ll_layout_version_get(lli); + if (!(sbi->ll_flags & LL_SBI_LAYOUT_LOCK) || *gen != CL_LAYOUT_GEN_NONE) + return 0; + + /* sanity checks */ + LASSERT(fid_is_sane(ll_inode2fid(inode))); + LASSERT(S_ISREG(inode->i_mode)); + + /* take layout lock mutex to enqueue layout lock exclusively. */ + mutex_lock(&lli->lli_layout_mutex); + + rc = ll_layout_refresh_locked(inode); + if (rc < 0) + goto out; + + *gen = ll_layout_version_get(lli); +out: mutex_unlock(&lli->lli_layout_mutex); return rc; diff --git a/drivers/staging/lustre/lustre/llite/glimpse.c b/drivers/staging/lustre/lustre/llite/glimpse.c index 22507b9c6d69..504498de536e 100644 --- a/drivers/staging/lustre/lustre/llite/glimpse.c +++ b/drivers/staging/lustre/lustre/llite/glimpse.c @@ -80,69 +80,60 @@ blkcnt_t dirty_cnt(struct inode *inode) int cl_glimpse_lock(const struct lu_env *env, struct cl_io *io, struct inode *inode, struct cl_object *clob, int agl) { - struct ll_inode_info *lli = ll_i2info(inode); const struct lu_fid *fid = lu_object_fid(&clob->co_lu); - int result; + struct cl_lock *lock = vvp_env_lock(env); + struct cl_lock_descr *descr = &lock->cll_descr; + int result = 0; + + CDEBUG(D_DLMTRACE, "Glimpsing inode " DFID "\n", PFID(fid)); + + /* NOTE: this looks like DLM lock request, but it may + * not be one. Due to CEF_ASYNC flag (translated + * to LDLM_FL_HAS_INTENT by osc), this is + * glimpse request, that won't revoke any + * conflicting DLM locks held. Instead, + * ll_glimpse_callback() will be called on each + * client holding a DLM lock against this file, + * and resulting size will be returned for each + * stripe. DLM lock on [0, EOF] is acquired only + * if there were no conflicting locks. If there + * were conflicting locks, enqueuing or waiting + * fails with -ENAVAIL, but valid inode + * attributes are returned anyway. + */ + *descr = whole_file; + descr->cld_obj = clob; + descr->cld_mode = CLM_READ; + descr->cld_enq_flags = CEF_ASYNC | CEF_MUST; + if (agl) + descr->cld_enq_flags |= CEF_AGL; + /* + * CEF_ASYNC is used because glimpse sub-locks cannot + * deadlock (because they never conflict with other + * locks) and, hence, can be enqueued out-of-order. + * + * CEF_MUST protects glimpse lock from conversion into + * a lockless mode. + */ + result = cl_lock_request(env, io, lock); + if (result < 0) + return result; - result = 0; - if (!(lli->lli_flags & LLIF_MDS_SIZE_LOCK)) { - CDEBUG(D_DLMTRACE, "Glimpsing inode " DFID "\n", PFID(fid)); - if (lli->lli_has_smd) { - struct cl_lock *lock = vvp_env_lock(env); - struct cl_lock_descr *descr = &lock->cll_descr; - - /* NOTE: this looks like DLM lock request, but it may - * not be one. Due to CEF_ASYNC flag (translated - * to LDLM_FL_HAS_INTENT by osc), this is - * glimpse request, that won't revoke any - * conflicting DLM locks held. Instead, - * ll_glimpse_callback() will be called on each - * client holding a DLM lock against this file, - * and resulting size will be returned for each - * stripe. DLM lock on [0, EOF] is acquired only - * if there were no conflicting locks. If there - * were conflicting locks, enqueuing or waiting - * fails with -ENAVAIL, but valid inode - * attributes are returned anyway. - */ - *descr = whole_file; - descr->cld_obj = clob; - descr->cld_mode = CLM_READ; - descr->cld_enq_flags = CEF_ASYNC | CEF_MUST; - if (agl) - descr->cld_enq_flags |= CEF_AGL; + if (!agl) { + ll_merge_attr(env, inode); + if (i_size_read(inode) > 0 && !inode->i_blocks) { /* - * CEF_ASYNC is used because glimpse sub-locks cannot - * deadlock (because they never conflict with other - * locks) and, hence, can be enqueued out-of-order. - * - * CEF_MUST protects glimpse lock from conversion into - * a lockless mode. + * LU-417: Add dirty pages block count + * lest i_blocks reports 0, some "cp" or + * "tar" may think it's a completely + * sparse file and skip it. */ - result = cl_lock_request(env, io, lock); - if (result < 0) - return result; - - if (!agl) { - ll_merge_attr(env, inode); - if (i_size_read(inode) > 0 && - inode->i_blocks == 0) { - /* - * LU-417: Add dirty pages block count - * lest i_blocks reports 0, some "cp" or - * "tar" may think it's a completely - * sparse file and skip it. - */ - inode->i_blocks = dirty_cnt(inode); - } - } - cl_lock_release(env, lock); - } else { - CDEBUG(D_DLMTRACE, "No objects for inode\n"); - ll_merge_attr(env, inode); + inode->i_blocks = dirty_cnt(inode); } } + cl_lock_release(env, lock); + return result; } @@ -212,39 +203,3 @@ again: } return result; } - -int cl_local_size(struct inode *inode) -{ - struct lu_env *env = NULL; - struct cl_io *io = NULL; - struct cl_object *clob; - int result; - int refcheck; - - if (!ll_i2info(inode)->lli_has_smd) - return 0; - - result = cl_io_get(inode, &env, &io, &refcheck); - if (result <= 0) - return result; - - clob = io->ci_obj; - result = cl_io_init(env, io, CIT_MISC, clob); - if (result > 0) { - result = io->ci_result; - } else if (result == 0) { - struct cl_lock *lock = vvp_env_lock(env); - - lock->cll_descr = whole_file; - lock->cll_descr.cld_enq_flags = CEF_PEEK; - lock->cll_descr.cld_obj = clob; - result = cl_lock_request(env, io, lock); - if (result == 0) { - ll_merge_attr(env, inode); - cl_lock_release(env, lock); - } - } - cl_io_fini(env, io); - cl_env_put(env, &refcheck); - return result; -} diff --git a/drivers/staging/lustre/lustre/llite/lcommon_cl.c b/drivers/staging/lustre/lustre/llite/lcommon_cl.c index 084330d08f7a..dd1cfd8f5213 100644 --- a/drivers/staging/lustre/lustre/llite/lcommon_cl.c +++ b/drivers/staging/lustre/lustre/llite/lcommon_cl.c @@ -80,7 +80,8 @@ int cl_inode_fini_refcheck; */ static DEFINE_MUTEX(cl_inode_fini_guard); -int cl_setattr_ost(struct inode *inode, const struct iattr *attr) +int cl_setattr_ost(struct cl_object *obj, const struct iattr *attr, + unsigned int attr_flags) { struct lu_env *env; struct cl_io *io; @@ -92,14 +93,15 @@ int cl_setattr_ost(struct inode *inode, const struct iattr *attr) return PTR_ERR(env); io = vvp_env_thread_io(env); - io->ci_obj = ll_i2info(inode)->lli_clob; + io->ci_obj = obj; io->u.ci_setattr.sa_attr.lvb_atime = LTIME_S(attr->ia_atime); io->u.ci_setattr.sa_attr.lvb_mtime = LTIME_S(attr->ia_mtime); io->u.ci_setattr.sa_attr.lvb_ctime = LTIME_S(attr->ia_ctime); io->u.ci_setattr.sa_attr.lvb_size = attr->ia_size; + io->u.ci_setattr.sa_attr_flags = attr_flags; io->u.ci_setattr.sa_valid = attr->ia_valid; - io->u.ci_setattr.sa_parent_fid = ll_inode2fid(inode); + io->u.ci_setattr.sa_parent_fid = lu_object_fid(&obj->co_lu); again: if (cl_io_init(env, io, CIT_SETATTR, io->ci_obj) == 0) { @@ -148,7 +150,7 @@ int cl_file_inode_init(struct inode *inode, struct lustre_md *md) struct cl_object_conf conf = { .coc_inode = inode, .u = { - .coc_md = md + .coc_layout = md->layout, } }; int result = 0; @@ -182,7 +184,6 @@ int cl_file_inode_init(struct inode *inode, struct lustre_md *md) * locked by I_NEW bit. */ lli->lli_clob = clob; - lli->lli_has_smd = lsm_has_objects(md->lsm); lu_object_ref_add(&clob->co_lu, "inode", inode); } else { result = PTR_ERR(clob); @@ -245,15 +246,11 @@ void cl_inode_fini(struct inode *inode) int emergency; if (clob) { - void *cookie; - - cookie = cl_env_reenter(); env = cl_env_get(&refcheck); emergency = IS_ERR(env); if (emergency) { mutex_lock(&cl_inode_fini_guard); LASSERT(cl_inode_fini_env); - cl_env_implant(cl_inode_fini_env, &refcheck); env = cl_inode_fini_env; } /* @@ -265,13 +262,10 @@ void cl_inode_fini(struct inode *inode) lu_object_ref_del(&clob->co_lu, "inode", inode); cl_object_put_last(env, clob); lli->lli_clob = NULL; - if (emergency) { - cl_env_unplant(cl_inode_fini_env, &refcheck); + if (emergency) mutex_unlock(&cl_inode_fini_guard); - } else { + else cl_env_put(env, &refcheck); - } - cl_env_reexit(cookie); } } @@ -302,22 +296,3 @@ __u32 cl_fid_build_gen(const struct lu_fid *fid) gen = fid_flatten(fid) >> 32; return gen; } - -/* lsm is unreliable after hsm implementation as layout can be changed at - * any time. This is only to support old, non-clio-ized interfaces. It will - * cause deadlock if clio operations are called with this extra layout refcount - * because in case the layout changed during the IO, ll_layout_refresh() will - * have to wait for the refcount to become zero to destroy the older layout. - * - * Notice that the lsm returned by this function may not be valid unless called - * inside layout lock - MDS_INODELOCK_LAYOUT. - */ -struct lov_stripe_md *ccc_inode_lsm_get(struct inode *inode) -{ - return lov_lsm_get(ll_i2info(inode)->lli_clob); -} - -inline void ccc_inode_lsm_put(struct inode *inode, struct lov_stripe_md *lsm) -{ - lov_lsm_put(ll_i2info(inode)->lli_clob, lsm); -} diff --git a/drivers/staging/lustre/lustre/llite/lcommon_misc.c b/drivers/staging/lustre/lustre/llite/lcommon_misc.c index fb346c12dad2..f48660ed350f 100644 --- a/drivers/staging/lustre/lustre/llite/lcommon_misc.c +++ b/drivers/staging/lustre/lustre/llite/lcommon_misc.c @@ -47,36 +47,29 @@ */ int cl_init_ea_size(struct obd_export *md_exp, struct obd_export *dt_exp) { - struct lov_stripe_md lsm = { .lsm_magic = LOV_MAGIC_V3 }; - __u32 valsize = sizeof(struct lov_desc); - int rc, easize, def_easize, cookiesize; - struct lov_desc desc; - __u16 stripes, def_stripes; - - rc = obd_get_info(NULL, dt_exp, sizeof(KEY_LOVDESC), KEY_LOVDESC, - &valsize, &desc, NULL); + u32 val_size, max_easize, def_easize; + int rc; + + val_size = sizeof(max_easize); + rc = obd_get_info(NULL, dt_exp, sizeof(KEY_MAX_EASIZE), KEY_MAX_EASIZE, + &val_size, &max_easize); if (rc) return rc; - stripes = min_t(__u32, desc.ld_tgt_count, LOV_MAX_STRIPE_COUNT); - lsm.lsm_stripe_count = stripes; - easize = obd_size_diskmd(dt_exp, &lsm); - - def_stripes = min_t(__u32, desc.ld_default_stripe_count, - LOV_MAX_STRIPE_COUNT); - lsm.lsm_stripe_count = def_stripes; - def_easize = obd_size_diskmd(dt_exp, &lsm); - - cookiesize = stripes * sizeof(struct llog_cookie); + val_size = sizeof(def_easize); + rc = obd_get_info(NULL, dt_exp, sizeof(KEY_DEFAULT_EASIZE), + KEY_DEFAULT_EASIZE, &val_size, &def_easize); + if (rc) + return rc; - /* default cookiesize is 0 because from 2.4 server doesn't send + /* + * default cookiesize is 0 because from 2.4 server doesn't send * llog cookies to client. */ - CDEBUG(D_HA, - "updating def/max_easize: %d/%d def/max_cookiesize: 0/%d\n", - def_easize, easize, cookiesize); + CDEBUG(D_HA, "updating def/max_easize: %d/%d\n", + def_easize, max_easize); - rc = md_init_ea_size(md_exp, easize, def_easize, cookiesize, 0); + rc = md_init_ea_size(md_exp, max_easize, def_easize); return rc; } @@ -169,13 +162,11 @@ int cl_get_grouplock(struct cl_object *obj, unsigned long gid, int nonblock, return rc; } - cg->lg_env = cl_env_get(&refcheck); + cg->lg_env = env; cg->lg_io = io; cg->lg_lock = lock; cg->lg_gid = gid; - LASSERT(cg->lg_env == env); - cl_env_unplant(env, &refcheck); return 0; } @@ -184,14 +175,10 @@ void cl_put_grouplock(struct ll_grouplock *cg) struct lu_env *env = cg->lg_env; struct cl_io *io = cg->lg_io; struct cl_lock *lock = cg->lg_lock; - int refcheck; LASSERT(cg->lg_env); LASSERT(cg->lg_gid); - cl_env_implant(env, &refcheck); - cl_env_put(env, &refcheck); - cl_lock_release(env, lock); cl_io_fini(env, io); cl_env_put(env, NULL); diff --git a/drivers/staging/lustre/lustre/llite/llite_close.c b/drivers/staging/lustre/lustre/llite/llite_close.c deleted file mode 100644 index 8644631bf2ba..000000000000 --- a/drivers/staging/lustre/lustre/llite/llite_close.c +++ /dev/null @@ -1,395 +0,0 @@ -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2011, 2012, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * lustre/llite/llite_close.c - * - * Lustre Lite routines to issue a secondary close after writeback - */ - -#include <linux/module.h> - -#define DEBUG_SUBSYSTEM S_LLITE - -#include "llite_internal.h" - -/** records that a write is in flight */ -void vvp_write_pending(struct vvp_object *club, struct vvp_page *page) -{ - struct ll_inode_info *lli = ll_i2info(club->vob_inode); - - spin_lock(&lli->lli_lock); - lli->lli_flags |= LLIF_SOM_DIRTY; - if (page && list_empty(&page->vpg_pending_linkage)) - list_add(&page->vpg_pending_linkage, &club->vob_pending_list); - spin_unlock(&lli->lli_lock); -} - -/** records that a write has completed */ -void vvp_write_complete(struct vvp_object *club, struct vvp_page *page) -{ - struct ll_inode_info *lli = ll_i2info(club->vob_inode); - int rc = 0; - - spin_lock(&lli->lli_lock); - if (page && !list_empty(&page->vpg_pending_linkage)) { - list_del_init(&page->vpg_pending_linkage); - rc = 1; - } - spin_unlock(&lli->lli_lock); - if (rc) - ll_queue_done_writing(club->vob_inode, 0); -} - -/** Queues DONE_WRITING if - * - done writing is allowed; - * - inode has no no dirty pages; - */ -void ll_queue_done_writing(struct inode *inode, unsigned long flags) -{ - struct ll_inode_info *lli = ll_i2info(inode); - struct vvp_object *club = cl2vvp(ll_i2info(inode)->lli_clob); - - spin_lock(&lli->lli_lock); - lli->lli_flags |= flags; - - if ((lli->lli_flags & LLIF_DONE_WRITING) && - list_empty(&club->vob_pending_list)) { - struct ll_close_queue *lcq = ll_i2sbi(inode)->ll_lcq; - - if (lli->lli_flags & LLIF_MDS_SIZE_LOCK) - CWARN("%s: file "DFID"(flags %u) Size-on-MDS valid, done writing allowed and no diry pages\n", - ll_get_fsname(inode->i_sb, NULL, 0), - PFID(ll_inode2fid(inode)), lli->lli_flags); - /* DONE_WRITING is allowed and inode has no dirty page. */ - spin_lock(&lcq->lcq_lock); - - LASSERT(list_empty(&lli->lli_close_list)); - CDEBUG(D_INODE, "adding inode "DFID" to close list\n", - PFID(ll_inode2fid(inode))); - list_add_tail(&lli->lli_close_list, &lcq->lcq_head); - - /* Avoid a concurrent insertion into the close thread queue: - * an inode is already in the close thread, open(), write(), - * close() happen, epoch is closed as the inode is marked as - * LLIF_EPOCH_PENDING. When pages are written inode should not - * be inserted into the queue again, clear this flag to avoid - * it. - */ - lli->lli_flags &= ~LLIF_DONE_WRITING; - - wake_up(&lcq->lcq_waitq); - spin_unlock(&lcq->lcq_lock); - } - spin_unlock(&lli->lli_lock); -} - -/** Pack SOM attributes info @opdata for CLOSE, DONE_WRITING rpc. */ -void ll_done_writing_attr(struct inode *inode, struct md_op_data *op_data) -{ - struct ll_inode_info *lli = ll_i2info(inode); - - op_data->op_flags |= MF_SOM_CHANGE; - /* Check if Size-on-MDS attributes are valid. */ - if (lli->lli_flags & LLIF_MDS_SIZE_LOCK) - CERROR("%s: inode "DFID"(flags %u) MDS holds lock on Size-on-MDS attributes\n", - ll_get_fsname(inode->i_sb, NULL, 0), - PFID(ll_inode2fid(inode)), lli->lli_flags); - - if (!cl_local_size(inode)) { - /* Send Size-on-MDS Attributes if valid. */ - op_data->op_attr.ia_valid |= ATTR_MTIME_SET | ATTR_CTIME_SET | - ATTR_ATIME_SET | ATTR_SIZE | ATTR_BLOCKS; - } -} - -/** Closes ioepoch and packs Size-on-MDS attribute if needed into @op_data. */ -void ll_ioepoch_close(struct inode *inode, struct md_op_data *op_data, - struct obd_client_handle **och, unsigned long flags) -{ - struct ll_inode_info *lli = ll_i2info(inode); - struct vvp_object *club = cl2vvp(ll_i2info(inode)->lli_clob); - - spin_lock(&lli->lli_lock); - if (!(list_empty(&club->vob_pending_list))) { - if (!(lli->lli_flags & LLIF_EPOCH_PENDING)) { - LASSERT(*och); - LASSERT(!lli->lli_pending_och); - /* Inode is dirty and there is no pending write done - * request yet, DONE_WRITE is to be sent later. - */ - lli->lli_flags |= LLIF_EPOCH_PENDING; - lli->lli_pending_och = *och; - spin_unlock(&lli->lli_lock); - - inode = igrab(inode); - LASSERT(inode); - goto out; - } - if (flags & LLIF_DONE_WRITING) { - /* Some pages are still dirty, it is early to send - * DONE_WRITE. Wait until all pages will be flushed - * and try DONE_WRITE again later. - */ - LASSERT(!(lli->lli_flags & LLIF_DONE_WRITING)); - lli->lli_flags |= LLIF_DONE_WRITING; - spin_unlock(&lli->lli_lock); - - inode = igrab(inode); - LASSERT(inode); - goto out; - } - } - CDEBUG(D_INODE, "Epoch %llu closed on "DFID"\n", - ll_i2info(inode)->lli_ioepoch, PFID(&lli->lli_fid)); - op_data->op_flags |= MF_EPOCH_CLOSE; - - if (flags & LLIF_DONE_WRITING) { - LASSERT(lli->lli_flags & LLIF_SOM_DIRTY); - LASSERT(!(lli->lli_flags & LLIF_DONE_WRITING)); - *och = lli->lli_pending_och; - lli->lli_pending_och = NULL; - lli->lli_flags &= ~LLIF_EPOCH_PENDING; - } else { - /* Pack Size-on-MDS inode attributes only if they has changed */ - if (!(lli->lli_flags & LLIF_SOM_DIRTY)) { - spin_unlock(&lli->lli_lock); - goto out; - } - - /* There is a pending DONE_WRITE -- close epoch with no - * attribute change. - */ - if (lli->lli_flags & LLIF_EPOCH_PENDING) { - spin_unlock(&lli->lli_lock); - goto out; - } - } - - LASSERT(list_empty(&club->vob_pending_list)); - lli->lli_flags &= ~LLIF_SOM_DIRTY; - spin_unlock(&lli->lli_lock); - ll_done_writing_attr(inode, op_data); - -out: - return; -} - -/** - * Cliens updates SOM attributes on MDS (including llog cookies): - * obd_getattr with no lock and md_setattr. - */ -int ll_som_update(struct inode *inode, struct md_op_data *op_data) -{ - struct ll_inode_info *lli = ll_i2info(inode); - struct ptlrpc_request *request = NULL; - __u32 old_flags; - struct obdo *oa; - int rc; - - LASSERT(op_data); - if (lli->lli_flags & LLIF_MDS_SIZE_LOCK) - CERROR("%s: inode "DFID"(flags %u) MDS holds lock on Size-on-MDS attributes\n", - ll_get_fsname(inode->i_sb, NULL, 0), - PFID(ll_inode2fid(inode)), lli->lli_flags); - - oa = kmem_cache_zalloc(obdo_cachep, GFP_NOFS); - if (!oa) { - CERROR("can't allocate memory for Size-on-MDS update.\n"); - return -ENOMEM; - } - - old_flags = op_data->op_flags; - op_data->op_flags = MF_SOM_CHANGE; - - /* If inode is already in another epoch, skip getattr from OSTs. */ - if (lli->lli_ioepoch == op_data->op_ioepoch) { - rc = ll_inode_getattr(inode, oa, op_data->op_ioepoch, - old_flags & MF_GETATTR_LOCK); - if (rc) { - oa->o_valid = 0; - if (rc != -ENOENT) - CERROR("%s: inode_getattr failed - unable to send a Size-on-MDS attribute update for inode "DFID": rc = %d\n", - ll_get_fsname(inode->i_sb, NULL, 0), - PFID(ll_inode2fid(inode)), rc); - } else { - CDEBUG(D_INODE, "Size-on-MDS update on "DFID"\n", - PFID(&lli->lli_fid)); - } - /* Install attributes into op_data. */ - md_from_obdo(op_data, oa, oa->o_valid); - } - - rc = md_setattr(ll_i2sbi(inode)->ll_md_exp, op_data, - NULL, 0, NULL, 0, &request, NULL); - ptlrpc_req_finished(request); - - kmem_cache_free(obdo_cachep, oa); - return rc; -} - -/** - * Closes the ioepoch and packs all the attributes into @op_data for - * DONE_WRITING rpc. - */ -static void ll_prepare_done_writing(struct inode *inode, - struct md_op_data *op_data, - struct obd_client_handle **och) -{ - ll_ioepoch_close(inode, op_data, och, LLIF_DONE_WRITING); - /* If there is no @och, we do not do D_W yet. */ - if (!*och) - return; - - ll_pack_inode2opdata(inode, op_data, &(*och)->och_fh); - ll_prep_md_op_data(op_data, inode, NULL, NULL, - 0, 0, LUSTRE_OPC_ANY, NULL); -} - -/** Send a DONE_WRITING rpc. */ -static void ll_done_writing(struct inode *inode) -{ - struct obd_client_handle *och = NULL; - struct md_op_data *op_data; - int rc; - - LASSERT(exp_connect_som(ll_i2mdexp(inode))); - - op_data = kzalloc(sizeof(*op_data), GFP_NOFS); - if (!op_data) - return; - - ll_prepare_done_writing(inode, op_data, &och); - /* If there is no @och, we do not do D_W yet. */ - if (!och) - goto out; - - rc = md_done_writing(ll_i2sbi(inode)->ll_md_exp, op_data, NULL); - if (rc == -EAGAIN) - /* MDS has instructed us to obtain Size-on-MDS attribute from - * OSTs and send setattr to back to MDS. - */ - rc = ll_som_update(inode, op_data); - else if (rc) { - CERROR("%s: inode "DFID" mdc done_writing failed: rc = %d\n", - ll_get_fsname(inode->i_sb, NULL, 0), - PFID(ll_inode2fid(inode)), rc); - } -out: - ll_finish_md_op_data(op_data); - if (och) { - md_clear_open_replay_data(ll_i2sbi(inode)->ll_md_exp, och); - kfree(och); - } -} - -static struct ll_inode_info *ll_close_next_lli(struct ll_close_queue *lcq) -{ - struct ll_inode_info *lli = NULL; - - spin_lock(&lcq->lcq_lock); - - if (!list_empty(&lcq->lcq_head)) { - lli = list_entry(lcq->lcq_head.next, struct ll_inode_info, - lli_close_list); - list_del_init(&lli->lli_close_list); - } else if (atomic_read(&lcq->lcq_stop)) { - lli = ERR_PTR(-EALREADY); - } - - spin_unlock(&lcq->lcq_lock); - return lli; -} - -static int ll_close_thread(void *arg) -{ - struct ll_close_queue *lcq = arg; - - complete(&lcq->lcq_comp); - - while (1) { - struct l_wait_info lwi = { 0 }; - struct ll_inode_info *lli; - struct inode *inode; - - l_wait_event_exclusive(lcq->lcq_waitq, - (lli = ll_close_next_lli(lcq)) != NULL, - &lwi); - if (IS_ERR(lli)) - break; - - inode = ll_info2i(lli); - CDEBUG(D_INFO, "done_writing for inode "DFID"\n", - PFID(ll_inode2fid(inode))); - ll_done_writing(inode); - iput(inode); - } - - CDEBUG(D_INFO, "ll_close exiting\n"); - complete(&lcq->lcq_comp); - return 0; -} - -int ll_close_thread_start(struct ll_close_queue **lcq_ret) -{ - struct ll_close_queue *lcq; - struct task_struct *task; - - if (OBD_FAIL_CHECK(OBD_FAIL_LDLM_CLOSE_THREAD)) - return -EINTR; - - lcq = kzalloc(sizeof(*lcq), GFP_NOFS); - if (!lcq) - return -ENOMEM; - - spin_lock_init(&lcq->lcq_lock); - INIT_LIST_HEAD(&lcq->lcq_head); - init_waitqueue_head(&lcq->lcq_waitq); - init_completion(&lcq->lcq_comp); - - task = kthread_run(ll_close_thread, lcq, "ll_close"); - if (IS_ERR(task)) { - kfree(lcq); - return PTR_ERR(task); - } - - wait_for_completion(&lcq->lcq_comp); - *lcq_ret = lcq; - return 0; -} - -void ll_close_thread_shutdown(struct ll_close_queue *lcq) -{ - init_completion(&lcq->lcq_comp); - atomic_inc(&lcq->lcq_stop); - wake_up(&lcq->lcq_waitq); - wait_for_completion(&lcq->lcq_comp); - kfree(lcq); -} diff --git a/drivers/staging/lustre/lustre/llite/llite_internal.h b/drivers/staging/lustre/lustre/llite/llite_internal.h index 4bc551279aa4..2f46d475cd7d 100644 --- a/drivers/staging/lustre/lustre/llite/llite_internal.h +++ b/drivers/staging/lustre/lustre/llite/llite_internal.h @@ -97,31 +97,20 @@ struct ll_grouplock { unsigned long lg_gid; }; -enum lli_flags { - /* MDS has an authority for the Size-on-MDS attributes. */ - LLIF_MDS_SIZE_LOCK = (1 << 0), - /* Epoch close is postponed. */ - LLIF_EPOCH_PENDING = (1 << 1), - /* DONE WRITING is allowed. */ - LLIF_DONE_WRITING = (1 << 2), - /* Sizeon-on-MDS attributes are changed. An attribute update needs to - * be sent to MDS. - */ - LLIF_SOM_DIRTY = (1 << 3), +enum ll_file_flags { /* File data is modified. */ - LLIF_DATA_MODIFIED = (1 << 4), + LLIF_DATA_MODIFIED = 0, /* File is being restored */ - LLIF_FILE_RESTORING = (1 << 5), + LLIF_FILE_RESTORING = 1, /* Xattr cache is attached to the file */ - LLIF_XATTR_CACHE = (1 << 6), + LLIF_XATTR_CACHE = 2, }; struct ll_inode_info { __u32 lli_inode_magic; - __u32 lli_flags; - __u64 lli_ioepoch; spinlock_t lli_lock; + unsigned long lli_flags; struct posix_acl *lli_posix_acl; /* identifying fields for both metadata and data stacks. */ @@ -129,14 +118,6 @@ struct ll_inode_info { /* master inode fid for stripe directory */ struct lu_fid lli_pfid; - struct list_head lli_close_list; - - /* handle is to be sent to MDS later on done_writing and setattr. - * Open handle data are needed for the recovery to reconstruct - * the inode state on the MDS. XXX: recovery is not ready yet. - */ - struct obd_client_handle *lli_pending_och; - /* We need all three because every inode may be opened in different * modes */ @@ -204,7 +185,6 @@ struct ll_inode_info { struct { struct mutex lli_size_mutex; char *lli_symlink_name; - __u64 lli_maxbytes; /* * struct rw_semaphore { * signed long count; // align d.d_def_acl @@ -245,7 +225,6 @@ struct ll_inode_info { * In the future, if more members are added only for directory, * some of the following members can be moved into u.f. */ - bool lli_has_smd; struct cl_object *lli_clob; /* mutex to request for layout lock exclusively. */ @@ -282,6 +261,9 @@ int ll_xattr_cache_destroy(struct inode *inode); int ll_xattr_cache_get(struct inode *inode, const char *name, char *buffer, size_t size, __u64 valid); +int ll_init_security(struct dentry *dentry, struct inode *inode, + struct inode *dir); + /* * Locking to guarantee consistency of non-atomic updates to long long i_size, * consistency between file size and KMS. @@ -400,7 +382,7 @@ enum stats_track_type { #define LL_SBI_LOCALFLOCK 0x200 /* Local flocks support by kernel */ #define LL_SBI_LRU_RESIZE 0x400 /* lru resize support */ #define LL_SBI_LAZYSTATFS 0x800 /* lazystatfs mount option */ -#define LL_SBI_SOM_PREVIEW 0x1000 /* SOM preview mount option */ +/* LL_SBI_SOM_PREVIEW 0x1000 SOM preview mount option, obsolete */ #define LL_SBI_32BIT_API 0x2000 /* generate 32 bit inodes. */ #define LL_SBI_64BIT_HASH 0x4000 /* support 64-bits dir hash/offset */ #define LL_SBI_AGL_ENABLED 0x8000 /* enable agl */ @@ -409,6 +391,8 @@ enum stats_track_type { #define LL_SBI_USER_FID2PATH 0x40000 /* allow fid2path by unprivileged users */ #define LL_SBI_XATTR_CACHE 0x80000 /* support for xattr cache */ #define LL_SBI_NOROOTSQUASH 0x100000 /* do not apply root squash */ +#define LL_SBI_ALWAYS_PING 0x200000 /* always ping even if server + * suppress_pings */ #define LL_SBI_FLAGS { \ "nolck", \ @@ -432,6 +416,7 @@ enum stats_track_type { "user_fid2path",\ "xattr_cache", \ "norootsquash", \ + "always_ping", \ } /* @@ -466,10 +451,10 @@ struct ll_sb_info { int ll_flags; unsigned int ll_umounting:1, - ll_xattr_cache_enabled:1; - struct lustre_client_ocd ll_lco; + ll_xattr_cache_enabled:1, + ll_client_common_fill_super_succeeded:1; - struct ll_close_queue *ll_lcq; + struct lustre_client_ocd ll_lco; struct lprocfs_stats *ll_stats; /* lprocfs stats counter */ @@ -630,8 +615,6 @@ struct ll_file_data { struct list_head fd_lccs; /* list of ll_cl_context */ }; -struct lov_stripe_md; - extern struct dentry *llite_root; extern struct kset *llite_kset; @@ -682,8 +665,6 @@ enum { LPROC_LL_WRITE_BYTES, LPROC_LL_BRW_READ, LPROC_LL_BRW_WRITE, - LPROC_LL_OSC_READ, - LPROC_LL_OSC_WRITE, LPROC_LL_IOCTL, LPROC_LL_OPEN, LPROC_LL_RELEASE, @@ -741,9 +722,7 @@ int ll_writepage(struct page *page, struct writeback_control *wbc); int ll_writepages(struct address_space *, struct writeback_control *wbc); int ll_readpage(struct file *file, struct page *page); void ll_readahead_init(struct inode *inode, struct ll_readahead_state *ras); -int ll_readahead(const struct lu_env *env, struct cl_io *io, - struct cl_page_list *queue, struct ll_readahead_state *ras, - bool hit); +int vvp_io_write_commit(const struct lu_env *env, struct cl_io *io); struct ll_cl_context *ll_cl_find(struct file *file); void ll_cl_add(struct file *file, const struct lu_env *env, struct cl_io *io); void ll_cl_remove(struct file *file, const struct lu_env *env); @@ -762,25 +741,14 @@ enum ldlm_mode ll_take_md_lock(struct inode *inode, __u64 bits, enum ldlm_mode mode); int ll_file_open(struct inode *inode, struct file *file); int ll_file_release(struct inode *inode, struct file *file); -int ll_glimpse_ioctl(struct ll_sb_info *sbi, - struct lov_stripe_md *lsm, lstat_t *st); -void ll_ioepoch_open(struct ll_inode_info *lli, __u64 ioepoch); int ll_release_openhandle(struct inode *, struct lookup_intent *); int ll_md_real_close(struct inode *inode, fmode_t fmode); -void ll_ioepoch_close(struct inode *inode, struct md_op_data *op_data, - struct obd_client_handle **och, unsigned long flags); -void ll_done_writing_attr(struct inode *inode, struct md_op_data *op_data); -int ll_som_update(struct inode *inode, struct md_op_data *op_data); -int ll_inode_getattr(struct inode *inode, struct obdo *obdo, - __u64 ioepoch, int sync); -void ll_pack_inode2opdata(struct inode *inode, struct md_op_data *op_data, - struct lustre_handle *fh); int ll_getattr(struct vfsmount *mnt, struct dentry *de, struct kstat *stat); struct posix_acl *ll_get_acl(struct inode *inode, int type); int ll_migrate(struct inode *parent, struct file *file, int mdtidx, const char *name, int namelen); int ll_get_fid_by_name(struct inode *parent, const char *name, - int namelen, struct lu_fid *fid); + int namelen, struct lu_fid *fid, struct inode **inode); int ll_inode_permission(struct inode *inode, int mask); int ll_lov_setstripe_ea_info(struct inode *inode, struct dentry *dentry, @@ -818,6 +786,7 @@ int ll_fill_super(struct super_block *sb, struct vfsmount *mnt); void ll_put_super(struct super_block *sb); void ll_kill_super(struct super_block *sb); struct inode *ll_inode_from_resource_lock(struct ldlm_lock *lock); +void ll_dir_clear_lsm_md(struct inode *inode); void ll_clear_inode(struct inode *inode); int ll_setattr_raw(struct dentry *dentry, struct iattr *attr, bool hsm_import); int ll_setattr(struct dentry *de, struct iattr *attr); @@ -891,18 +860,6 @@ int ll_dir_get_parent_fid(struct inode *dir, struct lu_fid *parent_fid); /* llite/symlink.c */ extern const struct inode_operations ll_fast_symlink_inode_operations; -/* llite/llite_close.c */ -struct ll_close_queue { - spinlock_t lcq_lock; - struct list_head lcq_head; - wait_queue_head_t lcq_waitq; - struct completion lcq_comp; - atomic_t lcq_stop; -}; - -void vvp_write_pending(struct vvp_object *club, struct vvp_page *page); -void vvp_write_complete(struct vvp_object *club, struct vvp_page *page); - /** * IO arguments for various VFS I/O interfaces. */ @@ -945,15 +902,11 @@ static inline struct vvp_io_args *ll_env_args(const struct lu_env *env) return &ll_env_info(env)->lti_args; } -void ll_queue_done_writing(struct inode *inode, unsigned long flags); -void ll_close_thread_shutdown(struct ll_close_queue *lcq); -int ll_close_thread_start(struct ll_close_queue **lcq_ret); - /* llite/llite_mmap.c */ int ll_teardown_mmaps(struct address_space *mapping, __u64 first, __u64 last); int ll_file_mmap(struct file *file, struct vm_area_struct *vma); -void policy_from_vma(ldlm_policy_data_t *policy, struct vm_area_struct *vma, +void policy_from_vma(union ldlm_policy_data *policy, struct vm_area_struct *vma, unsigned long addr, size_t count); struct vm_area_struct *our_vma(struct mm_struct *mm, unsigned long addr, size_t count); @@ -1024,9 +977,14 @@ static inline struct lu_fid *ll_inode2fid(struct inode *inode) return fid; } -static inline __u64 ll_file_maxbytes(struct inode *inode) +static inline loff_t ll_file_maxbytes(struct inode *inode) { - return ll_i2info(inode)->lli_maxbytes; + struct cl_object *obj = ll_i2info(inode)->lli_clob; + + if (!obj) + return MAX_LFS_FILESIZE; + + return min_t(loff_t, cl_object_maxbytes(obj), MAX_LFS_FILESIZE); } /* llite/xattr.c */ @@ -1043,17 +1001,18 @@ extern const struct xattr_handler *ll_xattr_handlers[]; ssize_t ll_listxattr(struct dentry *dentry, char *buffer, size_t size); int ll_xattr_list(struct inode *inode, const char *name, int type, void *buffer, size_t size, __u64 valid); +const struct xattr_handler *get_xattr_type(const char *name); /** * Common IO arguments for various VFS I/O interfaces. */ int cl_sb_init(struct super_block *sb); int cl_sb_fini(struct super_block *sb); -void ll_io_init(struct cl_io *io, const struct file *file, int write); -void ras_update(struct ll_sb_info *sbi, struct inode *inode, - struct ll_readahead_state *ras, unsigned long index, - unsigned hit); +enum ras_update_flags { + LL_RAS_HIT = 0x1, + LL_RAS_MMAP = 0x2 +}; void ll_ra_count_put(struct ll_sb_info *sbi, unsigned long len); void ll_ra_stats_inc(struct inode *inode, enum ra_stat which); @@ -1258,15 +1217,6 @@ struct ll_dio_pages { int ldp_nr; }; -static inline void cl_stats_tally(struct cl_device *dev, enum cl_req_type crt, - int rc) -{ - int opc = (crt == CRT_READ) ? LPROC_LL_OSC_READ : - LPROC_LL_OSC_WRITE; - - ll_stats_ops_tally(ll_s2sbi(cl2vvp_dev(dev)->vdv_sb), opc, rc); -} - ssize_t ll_direct_rw_pages(const struct lu_env *env, struct cl_io *io, int rw, struct inode *inode, struct ll_dio_pages *pv); @@ -1365,11 +1315,6 @@ static inline void d_lustre_revalidate(struct dentry *dentry) spin_unlock(&dentry->d_lock); } -enum { - LL_LAYOUT_GEN_NONE = ((__u32)-2), /* layout lock was cancelled */ - LL_LAYOUT_GEN_EMPTY = ((__u32)-1) /* for empty layout */ -}; - int ll_layout_conf(struct inode *inode, const struct cl_object_conf *conf); int ll_layout_refresh(struct inode *inode, __u32 *gen); int ll_layout_restore(struct inode *inode, loff_t start, __u64 length); @@ -1383,14 +1328,14 @@ int ll_page_sync_io(const struct lu_env *env, struct cl_io *io, int ll_getparent(struct file *file, struct getparent __user *arg); /* lcommon_cl.c */ -int cl_setattr_ost(struct inode *inode, const struct iattr *attr); +int cl_setattr_ost(struct cl_object *obj, const struct iattr *attr, + unsigned int attr_flags); extern struct lu_env *cl_inode_fini_env; extern int cl_inode_fini_refcheck; int cl_file_inode_init(struct inode *inode, struct lustre_md *md); void cl_inode_fini(struct inode *inode); -int cl_local_size(struct inode *inode); __u64 cl_fid_build_ino(const struct lu_fid *fid, int api32); __u32 cl_fid_build_gen(const struct lu_fid *fid); diff --git a/drivers/staging/lustre/lustre/llite/llite_lib.c b/drivers/staging/lustre/lustre/llite/llite_lib.c index e5c62f4ce3d8..25f5aed97f63 100644 --- a/drivers/staging/lustre/lustre/llite/llite_lib.c +++ b/drivers/staging/lustre/lustre/llite/llite_lib.c @@ -191,10 +191,8 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt, OBD_CONNECT_FLOCK_DEAD | OBD_CONNECT_DISP_STRIPE | OBD_CONNECT_LFSCK | OBD_CONNECT_OPEN_BY_FID | - OBD_CONNECT_DIR_STRIPE; - - if (sbi->ll_flags & LL_SBI_SOM_PREVIEW) - data->ocd_connect_flags |= OBD_CONNECT_SOM; + OBD_CONNECT_DIR_STRIPE | + OBD_CONNECT_BULK_MBITS; if (sbi->ll_flags & LL_SBI_LRU_RESIZE) data->ocd_connect_flags |= OBD_CONNECT_LRU_RESIZE; @@ -226,6 +224,10 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt, /* real client */ data->ocd_connect_flags |= OBD_CONNECT_REAL; + /* always ping even if server suppress_pings */ + if (sbi->ll_flags & LL_SBI_ALWAYS_PING) + data->ocd_connect_flags &= ~OBD_CONNECT_PINGLESS; + data->ocd_brw_size = MD_MAX_BRW_SIZE; err = obd_connect(NULL, &sbi->ll_md_exp, obd, &sbi->ll_sb_uuid, @@ -288,7 +290,7 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt, size = sizeof(*data); err = obd_get_info(NULL, sbi->ll_md_exp, sizeof(KEY_CONN_DATA), - KEY_CONN_DATA, &size, data, NULL); + KEY_CONN_DATA, &size, data); if (err) { CERROR("%s: Get connect data failed: rc = %d\n", sbi->ll_md_exp->exp_obd->obd_name, err); @@ -355,10 +357,9 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt, OBD_CONNECT_64BITHASH | OBD_CONNECT_MAXBYTES | OBD_CONNECT_EINPROGRESS | OBD_CONNECT_JOBSTATS | OBD_CONNECT_LVB_TYPE | - OBD_CONNECT_LAYOUTLOCK | OBD_CONNECT_PINGLESS; - - if (sbi->ll_flags & LL_SBI_SOM_PREVIEW) - data->ocd_connect_flags |= OBD_CONNECT_SOM; + OBD_CONNECT_LAYOUTLOCK | + OBD_CONNECT_PINGLESS | OBD_CONNECT_LFSCK | + OBD_CONNECT_BULK_MBITS; if (!OBD_FAIL_CHECK(OBD_FAIL_OSC_CONNECT_CKSUM)) { /* OBD_CONNECT_CKSUM should always be set, even if checksums are @@ -376,6 +377,10 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt, data->ocd_connect_flags |= OBD_CONNECT_LRU_RESIZE; + /* always ping even if server suppress_pings */ + if (sbi->ll_flags & LL_SBI_ALWAYS_PING) + data->ocd_connect_flags &= ~OBD_CONNECT_PINGLESS; + CDEBUG(D_RPCTRACE, "ocd_connect_flags: %#llx ocd_version: %d ocd_grant: %d\n", data->ocd_connect_flags, data->ocd_version, data->ocd_grant); @@ -475,8 +480,6 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt, ptlrpc_req_finished(request); if (IS_ERR(root)) { - if (lmd.lsm) - obd_free_memmd(sbi->ll_dt_exp, &lmd.lsm); #ifdef CONFIG_FS_POSIX_ACL if (lmd.posix_acl) { posix_acl_release(lmd.posix_acl); @@ -488,12 +491,6 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt, goto out_root; } - err = ll_close_thread_start(&sbi->ll_lcq); - if (err) { - CERROR("cannot start close thread: rc %d\n", err); - goto out_root; - } - checksum = sbi->ll_flags & LL_SBI_CHECKSUM; err = obd_set_info_async(NULL, sbi->ll_dt_exp, sizeof(KEY_CHECKSUM), KEY_CHECKSUM, sizeof(checksum), &checksum, @@ -572,10 +569,18 @@ int ll_get_max_mdsize(struct ll_sb_info *sbi, int *lmmsize) { int size, rc; - *lmmsize = obd_size_diskmd(sbi->ll_dt_exp, NULL); + size = sizeof(*lmmsize); + rc = obd_get_info(NULL, sbi->ll_dt_exp, sizeof(KEY_MAX_EASIZE), + KEY_MAX_EASIZE, &size, lmmsize); + if (rc) { + CERROR("%s: cannot get max LOV EA size: rc = %d\n", + sbi->ll_dt_exp->exp_obd->obd_name, rc); + return rc; + } + size = sizeof(int); rc = obd_get_info(NULL, sbi->ll_md_exp, sizeof(KEY_MAX_EASIZE), - KEY_MAX_EASIZE, &size, lmmsize, NULL); + KEY_MAX_EASIZE, &size, lmmsize); if (rc) CERROR("Get max mdsize error rc %d\n", rc); @@ -599,7 +604,7 @@ int ll_get_default_mdsize(struct ll_sb_info *sbi, int *lmmsize) size = sizeof(int); rc = obd_get_info(NULL, sbi->ll_md_exp, sizeof(KEY_DEFAULT_EASIZE), - KEY_DEFAULT_EASIZE, &size, lmmsize, NULL); + KEY_DEFAULT_EASIZE, &size, lmmsize); if (rc) CERROR("Get default mdsize error rc %d\n", rc); @@ -633,8 +638,6 @@ static void client_common_put_super(struct super_block *sb) { struct ll_sb_info *sbi = ll_s2sbi(sb); - ll_close_thread_shutdown(sbi->ll_lcq); - cl_sb_fini(sb); obd_fid_fini(sbi->ll_dt_exp->exp_obd); @@ -725,6 +728,18 @@ static int ll_options(char *options, int *flags) *flags &= ~tmp; goto next; } + tmp = ll_set_opt("context", s1, 1); + if (tmp) + goto next; + tmp = ll_set_opt("fscontext", s1, 1); + if (tmp) + goto next; + tmp = ll_set_opt("defcontext", s1, 1); + if (tmp) + goto next; + tmp = ll_set_opt("rootcontext", s1, 1); + if (tmp) + goto next; tmp = ll_set_opt("user_fid2path", s1, LL_SBI_USER_FID2PATH); if (tmp) { *flags |= tmp; @@ -766,11 +781,6 @@ static int ll_options(char *options, int *flags) *flags &= ~tmp; goto next; } - tmp = ll_set_opt("som_preview", s1, LL_SBI_SOM_PREVIEW); - if (tmp) { - *flags |= tmp; - goto next; - } tmp = ll_set_opt("32bitapi", s1, LL_SBI_32BIT_API); if (tmp) { *flags |= tmp; @@ -786,6 +796,11 @@ static int ll_options(char *options, int *flags) *flags &= ~tmp; goto next; } + tmp = ll_set_opt("always_ping", s1, LL_SBI_ALWAYS_PING); + if (tmp) { + *flags |= tmp; + goto next; + } LCONSOLE_ERROR_MSG(0x152, "Unknown option '%s', won't mount.\n", s1); return -EINVAL; @@ -804,14 +819,10 @@ void ll_lli_init(struct ll_inode_info *lli) { lli->lli_inode_magic = LLI_INODE_MAGIC; lli->lli_flags = 0; - lli->lli_ioepoch = 0; - lli->lli_maxbytes = MAX_LFS_FILESIZE; spin_lock_init(&lli->lli_lock); lli->lli_posix_acl = NULL; /* Do not set lli_fid, it has been initialized already. */ fid_zero(&lli->lli_pfid); - INIT_LIST_HEAD(&lli->lli_close_list); - lli->lli_pending_och = NULL; lli->lli_mds_read_och = NULL; lli->lli_mds_write_och = NULL; lli->lli_mds_exec_och = NULL; @@ -820,9 +831,8 @@ void ll_lli_init(struct ll_inode_info *lli) lli->lli_open_fd_exec_count = 0; mutex_init(&lli->lli_och_mutex); spin_lock_init(&lli->lli_agl_lock); - lli->lli_has_smd = false; spin_lock_init(&lli->lli_layout_lock); - ll_layout_version_set(lli, LL_LAYOUT_GEN_NONE); + ll_layout_version_set(lli, CL_LAYOUT_GEN_NONE); lli->lli_clob = NULL; init_rwsem(&lli->lli_xattrs_list_rwsem); @@ -941,10 +951,14 @@ int ll_fill_super(struct super_block *sb, struct vfsmount *mnt) /* connections, registrations, sb setup */ err = client_common_fill_super(sb, md, dt, mnt); + if (!err) + sbi->ll_client_common_fill_super_succeeded = 1; out_free: kfree(md); kfree(dt); + if (lprof) + class_put_profile(lprof); if (err) ll_put_super(sb); else if (sbi->ll_flags & LL_SBI_VERBOSE) @@ -1002,7 +1016,7 @@ void ll_put_super(struct super_block *sb) } } - if (sbi->ll_lcq) { + if (sbi->ll_client_common_fill_super_succeeded) { /* Only if client_common_fill_super succeeded */ client_common_put_super(sb); } @@ -1057,7 +1071,7 @@ struct inode *ll_inode_from_resource_lock(struct ldlm_lock *lock) return inode; } -static void ll_dir_clear_lsm_md(struct inode *inode) +void ll_dir_clear_lsm_md(struct inode *inode) { struct ll_inode_info *lli = ll_i2info(inode); @@ -1205,16 +1219,44 @@ static int ll_update_lsm_md(struct inode *inode, struct lustre_md *md) /* set the directory layout */ if (!lli->lli_lsm_md) { + struct cl_attr *attr; + rc = ll_init_lsm_md(inode, md); if (rc) return rc; - lli->lli_lsm_md = lsm; /* * set lsm_md to NULL, so the following free lustre_md * will not free this lsm */ md->lmv = NULL; + lli->lli_lsm_md = lsm; + + attr = kzalloc(sizeof(*attr), GFP_NOFS); + if (!attr) + return -ENOMEM; + + /* validate the lsm */ + rc = md_merge_attr(ll_i2mdexp(inode), lsm, attr, + ll_md_blocking_ast); + if (rc) { + kfree(attr); + return rc; + } + + if (md->body->mbo_valid & OBD_MD_FLNLINK) + md->body->mbo_nlink = attr->cat_nlink; + if (md->body->mbo_valid & OBD_MD_FLSIZE) + md->body->mbo_size = attr->cat_size; + if (md->body->mbo_valid & OBD_MD_FLATIME) + md->body->mbo_atime = attr->cat_atime; + if (md->body->mbo_valid & OBD_MD_FLCTIME) + md->body->mbo_ctime = attr->cat_ctime; + if (md->body->mbo_valid & OBD_MD_FLMTIME) + md->body->mbo_mtime = attr->cat_mtime; + + kfree(attr); + CDEBUG(D_INODE, "Set lsm %p magic %x to "DFID"\n", lsm, lsm->lsm_md_magic, PFID(ll_inode2fid(inode))); return 0; @@ -1272,9 +1314,6 @@ void ll_clear_inode(struct inode *inode) LASSERT(lli->lli_opendir_pid == 0); } - spin_lock(&lli->lli_lock); - ll_i2info(inode)->lli_flags &= ~LLIF_MDS_SIZE_LOCK; - spin_unlock(&lli->lli_lock); md_null_inode(sbi->ll_md_exp, ll_inode2fid(inode)); LASSERT(!lli->lli_open_fd_write_count); @@ -1313,13 +1352,11 @@ void ll_clear_inode(struct inode *inode) * cl_object still uses inode lsm. */ cl_inode_fini(inode); - lli->lli_has_smd = false; } #define TIMES_SET_FLAGS (ATTR_MTIME_SET | ATTR_ATIME_SET | ATTR_TIMES_SET) -static int ll_md_setattr(struct dentry *dentry, struct md_op_data *op_data, - struct md_open_data **mod) +static int ll_md_setattr(struct dentry *dentry, struct md_op_data *op_data) { struct lustre_md md; struct inode *inode = d_inode(dentry); @@ -1332,8 +1369,7 @@ static int ll_md_setattr(struct dentry *dentry, struct md_op_data *op_data, if (IS_ERR(op_data)) return PTR_ERR(op_data); - rc = md_setattr(sbi->ll_md_exp, op_data, NULL, 0, NULL, 0, - &request, mod); + rc = md_setattr(sbi->ll_md_exp, op_data, NULL, 0, &request); if (rc) { ptlrpc_req_finished(request); if (rc == -ENOENT) { @@ -1369,48 +1405,12 @@ static int ll_md_setattr(struct dentry *dentry, struct md_op_data *op_data, rc = simple_setattr(dentry, &op_data->op_attr); op_data->op_attr.ia_valid = ia_valid; - /* Extract epoch data if obtained. */ - op_data->op_handle = md.body->mbo_handle; - op_data->op_ioepoch = md.body->mbo_ioepoch; - rc = ll_update_inode(inode, &md); ptlrpc_req_finished(request); return rc; } -/* Close IO epoch and send Size-on-MDS attribute update. */ -static int ll_setattr_done_writing(struct inode *inode, - struct md_op_data *op_data, - struct md_open_data *mod) -{ - struct ll_inode_info *lli = ll_i2info(inode); - int rc = 0; - - if (!S_ISREG(inode->i_mode)) - return 0; - - CDEBUG(D_INODE, "Epoch %llu closed on "DFID" for truncate\n", - op_data->op_ioepoch, PFID(&lli->lli_fid)); - - op_data->op_flags = MF_EPOCH_CLOSE; - ll_done_writing_attr(inode, op_data); - ll_pack_inode2opdata(inode, op_data, NULL); - - rc = md_done_writing(ll_i2sbi(inode)->ll_md_exp, op_data, mod); - if (rc == -EAGAIN) - /* MDS has instructed us to obtain Size-on-MDS attribute - * from OSTs and send setattr to back to MDS. - */ - rc = ll_som_update(inode, op_data); - else if (rc) { - CERROR("%s: inode "DFID" mdc truncate failed: rc = %d\n", - ll_i2sbi(inode)->ll_md_exp->exp_obd->obd_name, - PFID(ll_inode2fid(inode)), rc); - } - return rc; -} - /* If this inode has objects allocated to it (lsm != NULL), then the OST * object(s) determine the file size and mtime. Otherwise, the MDS will * keep these values until such a time that objects are allocated for it. @@ -1431,9 +1431,8 @@ int ll_setattr_raw(struct dentry *dentry, struct iattr *attr, bool hsm_import) struct inode *inode = d_inode(dentry); struct ll_inode_info *lli = ll_i2info(inode); struct md_op_data *op_data = NULL; - struct md_open_data *mod = NULL; bool file_is_released = false; - int rc = 0, rc1 = 0; + int rc = 0; CDEBUG(D_VFSTRACE, "%s: setattr inode "DFID"(%p) from %llu to %llu, valid %x, hsm_import %d\n", ll_get_fsname(inode->i_sb, NULL, 0), PFID(&lli->lli_fid), inode, @@ -1503,14 +1502,33 @@ int ll_setattr_raw(struct dentry *dentry, struct iattr *attr, bool hsm_import) * but other attributes must be set */ if (S_ISREG(inode->i_mode)) { - struct lov_stripe_md *lsm; + struct cl_layout cl = { + .cl_is_released = false, + }; + struct lu_env *env; + int refcheck; __u32 gen; - ll_layout_refresh(inode, &gen); - lsm = ccc_inode_lsm_get(inode); - if (lsm && lsm->lsm_pattern & LOV_PATTERN_F_RELEASED) - file_is_released = true; - ccc_inode_lsm_put(inode, lsm); + rc = ll_layout_refresh(inode, &gen); + if (rc < 0) + goto out; + + /* + * XXX: the only place we need to know the layout type, + * this will be removed by a later patch. -Jinshan + */ + env = cl_env_get(&refcheck); + if (IS_ERR(env)) { + rc = PTR_ERR(env); + goto out; + } + + rc = cl_object_layout_get(env, lli->lli_clob, &cl); + cl_env_put(env, &refcheck); + if (rc < 0) + goto out; + + file_is_released = cl.cl_is_released; if (!hsm_import && attr->ia_valid & ATTR_SIZE) { if (file_is_released) { @@ -1527,32 +1545,16 @@ int ll_setattr_raw(struct dentry *dentry, struct iattr *attr, bool hsm_import) * modified, flag it. */ attr->ia_valid |= MDS_OPEN_OWNEROVERRIDE; - spin_lock(&lli->lli_lock); - lli->lli_flags |= LLIF_DATA_MODIFIED; - spin_unlock(&lli->lli_lock); op_data->op_bias |= MDS_DATA_MODIFIED; } } memcpy(&op_data->op_attr, attr, sizeof(*attr)); - /* Open epoch for truncate. */ - if (exp_connect_som(ll_i2mdexp(inode)) && !hsm_import && - (attr->ia_valid & (ATTR_SIZE | ATTR_MTIME | ATTR_MTIME_SET))) - op_data->op_flags = MF_EPOCH_OPEN; - - rc = ll_md_setattr(dentry, op_data, &mod); + rc = ll_md_setattr(dentry, op_data); if (rc) goto out; - /* RPC to MDT is sent, cancel data modification flag */ - if (op_data->op_bias & MDS_DATA_MODIFIED) { - spin_lock(&lli->lli_lock); - lli->lli_flags &= ~LLIF_DATA_MODIFIED; - spin_unlock(&lli->lli_lock); - } - - ll_ioepoch_open(lli, op_data->op_ioepoch); if (!S_ISREG(inode->i_mode) || file_is_released) { rc = 0; goto out; @@ -1568,19 +1570,11 @@ int ll_setattr_raw(struct dentry *dentry, struct iattr *attr, bool hsm_import) * setting times to past, but it is necessary due to possible * time de-synchronization between MDT inode and OST objects */ - if (attr->ia_valid & ATTR_SIZE) - down_write(&lli->lli_trunc_sem); - rc = cl_setattr_ost(inode, attr); - if (attr->ia_valid & ATTR_SIZE) - up_write(&lli->lli_trunc_sem); + rc = cl_setattr_ost(ll_i2info(inode)->lli_clob, attr, 0); } out: - if (op_data->op_ioepoch) { - rc1 = ll_setattr_done_writing(inode, op_data, mod); - if (!rc) - rc = rc1; - } - ll_finish_md_op_data(op_data); + if (op_data) + ll_finish_md_op_data(op_data); if (!S_ISDIR(inode->i_mode)) { inode_lock(inode); @@ -1736,19 +1730,10 @@ int ll_update_inode(struct inode *inode, struct lustre_md *md) { struct ll_inode_info *lli = ll_i2info(inode); struct mdt_body *body = md->body; - struct lov_stripe_md *lsm = md->lsm; struct ll_sb_info *sbi = ll_i2sbi(inode); - LASSERT((lsm != NULL) == ((body->mbo_valid & OBD_MD_FLEASIZE) != 0)); - if (lsm) { - if (!lli->lli_has_smd && - !(sbi->ll_flags & LL_SBI_LAYOUT_LOCK)) - cl_file_inode_init(inode, md); - - lli->lli_maxbytes = lsm->lsm_maxbytes; - if (lli->lli_maxbytes > MAX_LFS_FILESIZE) - lli->lli_maxbytes = MAX_LFS_FILESIZE; - } + if (body->mbo_valid & OBD_MD_FLEASIZE) + cl_file_inode_init(inode, md); if (S_ISDIR(inode->i_mode)) { int rc; @@ -1828,48 +1813,11 @@ int ll_update_inode(struct inode *inode, struct lustre_md *md) LASSERT(fid_seq(&lli->lli_fid) != 0); if (body->mbo_valid & OBD_MD_FLSIZE) { - if (exp_connect_som(ll_i2mdexp(inode)) && - S_ISREG(inode->i_mode)) { - struct lustre_handle lockh; - enum ldlm_mode mode; - - /* As it is possible a blocking ast has been processed - * by this time, we need to check there is an UPDATE - * lock on the client and set LLIF_MDS_SIZE_LOCK holding - * it. - */ - mode = ll_take_md_lock(inode, MDS_INODELOCK_UPDATE, - &lockh, LDLM_FL_CBPENDING, - LCK_CR | LCK_CW | - LCK_PR | LCK_PW); - if (mode) { - if (lli->lli_flags & (LLIF_DONE_WRITING | - LLIF_EPOCH_PENDING | - LLIF_SOM_DIRTY)) { - CERROR("%s: inode "DFID" flags %u still has size authority! do not trust the size got from MDS\n", - sbi->ll_md_exp->exp_obd->obd_name, - PFID(ll_inode2fid(inode)), - lli->lli_flags); - } else { - /* Use old size assignment to avoid - * deadlock bz14138 & bz14326 - */ - i_size_write(inode, body->mbo_size); - spin_lock(&lli->lli_lock); - lli->lli_flags |= LLIF_MDS_SIZE_LOCK; - spin_unlock(&lli->lli_lock); - } - ldlm_lock_decref(&lockh, mode); - } - } else { - /* Use old size assignment to avoid - * deadlock bz14138 & bz14326 - */ - i_size_write(inode, body->mbo_size); + i_size_write(inode, body->mbo_size); - CDEBUG(D_VFSTRACE, "inode=%lu, updating i_size %llu\n", - inode->i_ino, (unsigned long long)body->mbo_size); - } + CDEBUG(D_VFSTRACE, "inode=" DFID ", updating i_size %llu\n", + PFID(ll_inode2fid(inode)), + (unsigned long long)body->mbo_size); if (body->mbo_valid & OBD_MD_FLBLOCKS) inode->i_blocks = body->mbo_blocks; @@ -1877,7 +1825,7 @@ int ll_update_inode(struct inode *inode, struct lustre_md *md) if (body->mbo_valid & OBD_MD_TSTATE) { if (body->mbo_t_state & MS_RESTORE) - lli->lli_flags |= LLIF_FILE_RESTORING; + set_bit(LLIF_FILE_RESTORING, &lli->lli_flags); } return 0; @@ -1892,8 +1840,6 @@ int ll_read_inode2(struct inode *inode, void *opaque) CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p)\n", PFID(&lli->lli_fid), inode); - LASSERT(!lli->lli_has_smd); - /* Core attributes from the MDS first. This is a new inode, and * the VFS doesn't zero times in the core inode so we have to do * it ourselves. They will be overwritten by either MDS or OST @@ -1988,9 +1934,9 @@ int ll_iocontrol(struct inode *inode, struct file *file, return put_user(flags, (int __user *)arg); } case FSFILT_IOC_SETFLAGS: { - struct lov_stripe_md *lsm; - struct obd_info oinfo = { }; struct md_op_data *op_data; + struct cl_object *obj; + struct iattr *attr; if (get_user(flags, (int __user *)arg)) return -EFAULT; @@ -2002,8 +1948,7 @@ int ll_iocontrol(struct inode *inode, struct file *file, op_data->op_attr_flags = flags; op_data->op_attr.ia_valid |= ATTR_ATTR_FLAG; - rc = md_setattr(sbi->ll_md_exp, op_data, - NULL, 0, NULL, 0, &req, NULL); + rc = md_setattr(sbi->ll_md_exp, op_data, NULL, 0, &req); ll_finish_md_op_data(op_data); ptlrpc_req_finished(req); if (rc) @@ -2011,30 +1956,17 @@ int ll_iocontrol(struct inode *inode, struct file *file, inode->i_flags = ll_ext_to_inode_flags(flags); - lsm = ccc_inode_lsm_get(inode); - if (!lsm_has_objects(lsm)) { - ccc_inode_lsm_put(inode, lsm); + obj = ll_i2info(inode)->lli_clob; + if (!obj) return 0; - } - oinfo.oi_oa = kmem_cache_zalloc(obdo_cachep, GFP_NOFS); - if (!oinfo.oi_oa) { - ccc_inode_lsm_put(inode, lsm); + attr = kzalloc(sizeof(*attr), GFP_NOFS); + if (!attr) return -ENOMEM; - } - oinfo.oi_md = lsm; - oinfo.oi_oa->o_oi = lsm->lsm_oi; - oinfo.oi_oa->o_flags = flags; - oinfo.oi_oa->o_valid = OBD_MD_FLID | OBD_MD_FLFLAGS | - OBD_MD_FLGROUP; - obdo_set_parent_fid(oinfo.oi_oa, &ll_i2info(inode)->lli_fid); - rc = obd_setattr_rqset(sbi->ll_dt_exp, &oinfo, NULL); - kmem_cache_free(obdo_cachep, oinfo.oi_oa); - ccc_inode_lsm_put(inode, lsm); - - if (rc && rc != -EPERM && rc != -EACCES) - CERROR("osc_setattr_async fails: rc = %d\n", rc); + attr->ia_valid = ATTR_ATTR_FLAG; + rc = cl_setattr_ost(obj, attr, flags); + kfree(attr); return rc; } default: @@ -2164,7 +2096,6 @@ void ll_open_cleanup(struct super_block *sb, struct ptlrpc_request *open_req) return; op_data->op_fid1 = body->mbo_fid1; - op_data->op_ioepoch = body->mbo_ioepoch; op_data->op_handle = body->mbo_handle; op_data->op_mod_time = get_seconds(); md_close(exp, op_data, NULL, &close_req); @@ -2244,17 +2175,14 @@ int ll_prep_inode(struct inode **inode, struct ptlrpc_request *req, conf.coc_opc = OBJECT_CONF_SET; conf.coc_inode = *inode; conf.coc_lock = lock; - conf.u.coc_md = &md; + conf.u.coc_layout = md.layout; (void)ll_layout_conf(*inode, &conf); } LDLM_LOCK_PUT(lock); } out: - if (md.lsm) - obd_free_memmd(sbi->ll_dt_exp, &md.lsm); md_free_lustre_md(sbi->ll_md_exp, &md); - cleanup: if (rc != 0 && it && it->it_op & IT_OPEN) ll_open_cleanup(sb ? sb : (*inode)->i_sb, req); @@ -2380,8 +2308,9 @@ struct md_op_data *ll_prep_md_op_data(struct md_op_data *op_data, op_data->op_default_stripe_offset = -1; if (S_ISDIR(i1->i_mode)) { op_data->op_mea1 = ll_i2info(i1)->lli_lsm_md; - op_data->op_default_stripe_offset = - ll_i2info(i1)->lli_def_stripe_offset; + if (opc == LUSTRE_OPC_MKDIR) + op_data->op_default_stripe_offset = + ll_i2info(i1)->lli_def_stripe_offset; } if (i2) { @@ -2405,8 +2334,6 @@ struct md_op_data *ll_prep_md_op_data(struct md_op_data *op_data, op_data->op_fsuid = from_kuid(&init_user_ns, current_fsuid()); op_data->op_fsgid = from_kgid(&init_user_ns, current_fsgid()); op_data->op_cap = cfs_curproc_cap_pack(); - op_data->op_bias = 0; - op_data->op_cli_flags = 0; if ((opc == LUSTRE_OPC_CREATE) && name && filename_is_volatile(name, namelen, &op_data->op_mds)) op_data->op_bias |= MDS_CREATE_VOLATILE; @@ -2414,10 +2341,6 @@ struct md_op_data *ll_prep_md_op_data(struct md_op_data *op_data, op_data->op_mds = 0; op_data->op_data = data; - /* When called by ll_setattr_raw, file is i1. */ - if (ll_i2info(i1)->lli_flags & LLIF_DATA_MODIFIED) - op_data->op_bias |= MDS_DATA_MODIFIED; - return op_data; } @@ -2451,6 +2374,9 @@ int ll_show_options(struct seq_file *seq, struct dentry *dentry) if (sbi->ll_flags & LL_SBI_USER_FID2PATH) seq_puts(seq, ",user_fid2path"); + if (sbi->ll_flags & LL_SBI_ALWAYS_PING) + seq_puts(seq, ",always_ping"); + return 0; } diff --git a/drivers/staging/lustre/lustre/llite/llite_mmap.c b/drivers/staging/lustre/lustre/llite/llite_mmap.c index 436691814a5e..ee01f20d8b11 100644 --- a/drivers/staging/lustre/lustre/llite/llite_mmap.c +++ b/drivers/staging/lustre/lustre/llite/llite_mmap.c @@ -47,7 +47,7 @@ static const struct vm_operations_struct ll_file_vm_ops; -void policy_from_vma(ldlm_policy_data_t *policy, +void policy_from_vma(union ldlm_policy_data *policy, struct vm_area_struct *vma, unsigned long addr, size_t count) { @@ -80,43 +80,24 @@ struct vm_area_struct *our_vma(struct mm_struct *mm, unsigned long addr, * API independent part for page fault initialization. * \param vma - virtual memory area addressed to page fault * \param env - corespondent lu_env to processing - * \param nest - nested level * \param index - page index corespondent to fault. * \parm ra_flags - vma readahead flags. * - * \return allocated and initialized env for fault operation. - * \retval EINVAL if env can't allocated - * \return other error codes from cl_io_init. + * \return error codes from cl_io_init. */ static struct cl_io * -ll_fault_io_init(struct vm_area_struct *vma, struct lu_env **env_ret, - struct cl_env_nest *nest, pgoff_t index, - unsigned long *ra_flags) +ll_fault_io_init(struct lu_env *env, struct vm_area_struct *vma, + pgoff_t index, unsigned long *ra_flags) { struct file *file = vma->vm_file; struct inode *inode = file_inode(file); struct cl_io *io; struct cl_fault_io *fio; - struct lu_env *env; int rc; - *env_ret = NULL; if (ll_file_nolock(file)) return ERR_PTR(-EOPNOTSUPP); - /* - * page fault can be called when lustre IO is - * already active for the current thread, e.g., when doing read/write - * against user level buffer mapped from Lustre buffer. To avoid - * stomping on existing context, optionally force an allocation of a new - * one. - */ - env = cl_env_nested_get(nest); - if (IS_ERR(env)) - return ERR_PTR(-EINVAL); - - *env_ret = env; - restart: io = vvp_env_thread_io(env); io->ci_obj = ll_i2info(inode)->lli_clob; @@ -155,7 +136,6 @@ restart: if (io->ci_need_restart) goto restart; - cl_env_nested_put(nest, env); io = ERR_PTR(rc); } @@ -169,13 +149,17 @@ static int ll_page_mkwrite0(struct vm_area_struct *vma, struct page *vmpage, struct lu_env *env; struct cl_io *io; struct vvp_io *vio; - struct cl_env_nest nest; int result; + int refcheck; sigset_t set; struct inode *inode; struct ll_inode_info *lli; - io = ll_fault_io_init(vma, &env, &nest, vmpage->index, NULL); + env = cl_env_get(&refcheck); + if (IS_ERR(env)) + return PTR_ERR(env); + + io = ll_fault_io_init(env, vma, vmpage->index, NULL); if (IS_ERR(io)) { result = PTR_ERR(io); goto out; @@ -231,17 +215,14 @@ static int ll_page_mkwrite0(struct vm_area_struct *vma, struct page *vmpage, result = -EAGAIN; } - if (result == 0) { - spin_lock(&lli->lli_lock); - lli->lli_flags |= LLIF_DATA_MODIFIED; - spin_unlock(&lli->lli_lock); - } + if (!result) + set_bit(LLIF_DATA_MODIFIED, &lli->lli_flags); } out_io: cl_io_fini(env, io); - cl_env_nested_put(&nest, env); out: + cl_env_put(env, &refcheck); CDEBUG(D_MMAP, "%s mkwrite with %d\n", current->comm, result); LASSERT(ergo(result == 0, PageLocked(vmpage))); @@ -285,13 +266,19 @@ static int ll_fault0(struct vm_area_struct *vma, struct vm_fault *vmf) struct vvp_io *vio = NULL; struct page *vmpage; unsigned long ra_flags; - struct cl_env_nest nest; - int result; + int result = 0; int fault_ret = 0; + int refcheck; + + env = cl_env_get(&refcheck); + if (IS_ERR(env)) + return PTR_ERR(env); - io = ll_fault_io_init(vma, &env, &nest, vmf->pgoff, &ra_flags); - if (IS_ERR(io)) - return to_fault_error(PTR_ERR(io)); + io = ll_fault_io_init(env, vma, vmf->pgoff, &ra_flags); + if (IS_ERR(io)) { + result = to_fault_error(PTR_ERR(io)); + goto out; + } result = io->ci_result; if (result == 0) { @@ -322,14 +309,15 @@ static int ll_fault0(struct vm_area_struct *vma, struct vm_fault *vmf) } } cl_io_fini(env, io); - cl_env_nested_put(&nest, env); vma->vm_flags |= ra_flags; + +out: + cl_env_put(env, &refcheck); if (result != 0 && !(fault_ret & VM_FAULT_RETRY)) fault_ret |= to_fault_error(result); - CDEBUG(D_MMAP, "%s fault %d/%d\n", - current->comm, fault_ret, result); + CDEBUG(D_MMAP, "%s fault %d/%d\n", current->comm, fault_ret, result); return fault_ret; } @@ -381,6 +369,7 @@ static int ll_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) bool retry; int result; + file_update_time(vma->vm_file); do { retry = false; result = ll_page_mkwrite0(vma, vmf->page, &retry); diff --git a/drivers/staging/lustre/lustre/llite/llite_nfs.c b/drivers/staging/lustre/lustre/llite/llite_nfs.c index 709230571b4b..c63236580b0f 100644 --- a/drivers/staging/lustre/lustre/llite/llite_nfs.c +++ b/drivers/staging/lustre/lustre/llite/llite_nfs.c @@ -226,7 +226,7 @@ static int ll_encode_fh(struct inode *inode, __u32 *fh, int *plen, static int ll_nfs_get_name_filldir(struct dir_context *ctx, const char *name, int namelen, loff_t hash, u64 ino, - unsigned type) + unsigned int type) { /* It is hack to access lde_fid for comparison with lgd_fid. * So the input 'name' must be part of the 'lu_dirent'. diff --git a/drivers/staging/lustre/lustre/llite/lproc_llite.c b/drivers/staging/lustre/lustre/llite/lproc_llite.c index 23fda9d98bff..03682c10fc9e 100644 --- a/drivers/staging/lustre/lustre/llite/lproc_llite.c +++ b/drivers/staging/lustre/lustre/llite/lproc_llite.c @@ -1060,10 +1060,6 @@ static const struct llite_file_opcode { "brw_read" }, { LPROC_LL_BRW_WRITE, LPROCFS_CNTR_AVGMINMAX | LPROCFS_TYPE_PAGES, "brw_write" }, - { LPROC_LL_OSC_READ, LPROCFS_CNTR_AVGMINMAX | LPROCFS_TYPE_BYTES, - "osc_read" }, - { LPROC_LL_OSC_WRITE, LPROCFS_CNTR_AVGMINMAX | LPROCFS_TYPE_BYTES, - "osc_write" }, { LPROC_LL_IOCTL, LPROCFS_TYPE_REGS, "ioctl" }, { LPROC_LL_OPEN, LPROCFS_TYPE_REGS, "open" }, { LPROC_LL_RELEASE, LPROCFS_TYPE_REGS, "close" }, diff --git a/drivers/staging/lustre/lustre/llite/namei.c b/drivers/staging/lustre/lustre/llite/namei.c index 180f35e3afd9..9426759aedc9 100644 --- a/drivers/staging/lustre/lustre/llite/namei.c +++ b/drivers/staging/lustre/lustre/llite/namei.c @@ -113,13 +113,18 @@ struct inode *ll_iget(struct super_block *sb, ino_t hash, if (inode->i_state & I_NEW) { rc = ll_read_inode2(inode, md); if (!rc && S_ISREG(inode->i_mode) && - !ll_i2info(inode)->lli_clob) { - CDEBUG(D_INODE, "%s: apply lsm %p to inode "DFID"\n", - ll_get_fsname(sb, NULL, 0), md->lsm, - PFID(ll_inode2fid(inode))); + !ll_i2info(inode)->lli_clob) rc = cl_file_inode_init(inode, md); - } + if (rc) { + /* + * Let's clear directory lsm here, otherwise + * make_bad_inode() will reset the inode mode + * to regular, then ll_clear_inode will not + * be able to clear lsm_md + */ + if (S_ISDIR(inode->i_mode)) + ll_dir_clear_lsm_md(inode); make_bad_inode(inode); unlock_new_inode(inode); iput(inode); @@ -132,6 +137,8 @@ struct inode *ll_iget(struct super_block *sb, ino_t hash, CDEBUG(D_VFSTRACE, "got inode: "DFID"(%p): rc = %d\n", PFID(&md->body->mbo_fid1), inode, rc); if (rc) { + if (S_ISDIR(inode->i_mode)) + ll_dir_clear_lsm_md(inode); iput(inode); inode = ERR_PTR(rc); } @@ -258,7 +265,9 @@ int ll_md_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc, struct ll_inode_info *lli = ll_i2info(inode); spin_lock(&lli->lli_lock); - lli->lli_flags &= ~LLIF_MDS_SIZE_LOCK; + LTIME_S(inode->i_mtime) = 0; + LTIME_S(inode->i_atime) = 0; + LTIME_S(inode->i_ctime) = 0; spin_unlock(&lli->lli_lock); } @@ -287,11 +296,39 @@ int ll_md_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc, hash = cl_fid_build_ino(&lli->lli_pfid, ll_need_32bit_api(ll_i2sbi(inode))); - - master_inode = ilookup5(inode->i_sb, hash, - ll_test_inode_by_fid, - (void *)&lli->lli_pfid); - if (master_inode && !IS_ERR(master_inode)) { + /* + * Do not lookup the inode with ilookup5, + * otherwise it will cause dead lock, + * + * 1. Client1 send chmod req to the MDT0, then + * on MDT0, it enqueues master and all of its + * slaves lock, (mdt_attr_set() -> + * mdt_lock_slaves()), after gets master and + * stripe0 lock, it will send the enqueue req + * (for stripe1) to MDT1, then MDT1 finds the + * lock has been granted to client2. Then MDT1 + * sends blocking ast to client2. + * + * 2. At the same time, client2 tries to unlink + * the striped dir (rm -rf striped_dir), and + * during lookup, it will hold the master inode + * of the striped directory, whose inode state + * is NEW, then tries to revalidate all of its + * slaves, (ll_prep_inode()->ll_iget()-> + * ll_read_inode2()-> ll_update_inode().). And + * it will be blocked on the server side because + * of 1. + * + * 3. Then the client get the blocking_ast req, + * cancel the lock, but being blocked if using + * ->ilookup5()), because master inode state is + * NEW. + */ + master_inode = ilookup5_nowait(inode->i_sb, + hash, + ll_test_inode_by_fid, + (void *)&lli->lli_pfid); + if (master_inode) { ll_invalidate_negative_children(master_inode); iput(master_inode); } @@ -535,6 +572,10 @@ static struct dentry *ll_lookup_it(struct inode *parent, struct dentry *dentry, } } + if (it->it_op & IT_OPEN && it->it_flags & FMODE_WRITE && + dentry->d_sb->s_flags & MS_RDONLY) + return ERR_PTR(-EROFS); + if (it->it_op & IT_CREAT) opc = LUSTRE_OPC_CREATE; else @@ -801,7 +842,8 @@ static int ll_create_it(struct inode *dir, struct dentry *dentry, return PTR_ERR(inode); d_instantiate(dentry, inode); - return 0; + + return ll_init_security(dentry, inode, dir); } void ll_update_times(struct ptlrpc_request *request, struct inode *inode) @@ -896,6 +938,8 @@ again: goto err_exit; d_instantiate(dentry, inode); + + err = ll_init_security(dentry, inode, dir); err_exit: if (request) ptlrpc_req_finished(request); diff --git a/drivers/staging/lustre/lustre/llite/rw.c b/drivers/staging/lustre/lustre/llite/rw.c index 76a6836cdf70..f10e092979fe 100644 --- a/drivers/staging/lustre/lustre/llite/rw.c +++ b/drivers/staging/lustre/lustre/llite/rw.c @@ -181,90 +181,73 @@ void ll_ras_enter(struct file *f) spin_unlock(&ras->ras_lock); } -static int cl_read_ahead_page(const struct lu_env *env, struct cl_io *io, - struct cl_page_list *queue, struct cl_page *page, - struct cl_object *clob, pgoff_t *max_index) +/** + * Initiates read-ahead of a page with given index. + * + * \retval +ve: page was already uptodate so it will be skipped + * from being added; + * \retval -ve: page wasn't added to \a queue for error; + * \retval 0: page was added into \a queue for read ahead. + */ +static int ll_read_ahead_page(const struct lu_env *env, struct cl_io *io, + struct cl_page_list *queue, pgoff_t index) { - struct page *vmpage = page->cp_vmpage; + enum ra_stat which = _NR_RA_STAT; /* keep gcc happy */ + struct cl_object *clob = io->ci_obj; + struct inode *inode = vvp_object_inode(clob); + const char *msg = NULL; + struct cl_page *page; struct vvp_page *vpg; - int rc; + struct page *vmpage; + int rc = 0; + + vmpage = grab_cache_page_nowait(inode->i_mapping, index); + if (!vmpage) { + which = RA_STAT_FAILED_GRAB_PAGE; + msg = "g_c_p_n failed"; + rc = -EBUSY; + goto out; + } + + /* Check if vmpage was truncated or reclaimed */ + if (vmpage->mapping != inode->i_mapping) { + which = RA_STAT_WRONG_GRAB_PAGE; + msg = "g_c_p_n returned invalid page"; + rc = -EBUSY; + goto out; + } + + page = cl_page_find(env, clob, vmpage->index, vmpage, CPT_CACHEABLE); + if (IS_ERR(page)) { + which = RA_STAT_FAILED_GRAB_PAGE; + msg = "cl_page_find failed"; + rc = PTR_ERR(page); + goto out; + } - rc = 0; - cl_page_assume(env, io, page); lu_ref_add(&page->cp_reference, "ra", current); + cl_page_assume(env, io, page); vpg = cl2vvp_page(cl_object_page_slice(clob, page)); if (!vpg->vpg_defer_uptodate && !PageUptodate(vmpage)) { - CDEBUG(D_READA, "page index %lu, max_index: %lu\n", - vvp_index(vpg), *max_index); - if (*max_index == 0 || vvp_index(vpg) > *max_index) - rc = cl_page_is_under_lock(env, io, page, max_index); - if (rc == 0) { - vpg->vpg_defer_uptodate = 1; - vpg->vpg_ra_used = 0; - cl_page_list_add(queue, page); - rc = 1; - } else { - cl_page_discard(env, io, page); - rc = -ENOLCK; - } + vpg->vpg_defer_uptodate = 1; + vpg->vpg_ra_used = 0; + cl_page_list_add(queue, page); } else { /* skip completed pages */ cl_page_unassume(env, io, page); + /* This page is already uptodate, returning a positive number + * to tell the callers about this + */ + rc = 1; } + lu_ref_del(&page->cp_reference, "ra", current); cl_page_put(env, page); - return rc; -} - -/** - * Initiates read-ahead of a page with given index. - * - * \retval +ve: page was added to \a queue. - * - * \retval -ENOLCK: there is no extent lock for this part of a file, stop - * read-ahead. - * - * \retval -ve, 0: page wasn't added to \a queue for other reason. - */ -static int ll_read_ahead_page(const struct lu_env *env, struct cl_io *io, - struct cl_page_list *queue, - pgoff_t index, pgoff_t *max_index) -{ - struct cl_object *clob = io->ci_obj; - struct inode *inode = vvp_object_inode(clob); - struct page *vmpage; - struct cl_page *page; - enum ra_stat which = _NR_RA_STAT; /* keep gcc happy */ - int rc = 0; - const char *msg = NULL; - - vmpage = grab_cache_page_nowait(inode->i_mapping, index); +out: if (vmpage) { - /* Check if vmpage was truncated or reclaimed */ - if (vmpage->mapping == inode->i_mapping) { - page = cl_page_find(env, clob, vmpage->index, - vmpage, CPT_CACHEABLE); - if (!IS_ERR(page)) { - rc = cl_read_ahead_page(env, io, queue, - page, clob, max_index); - if (rc == -ENOLCK) { - which = RA_STAT_FAILED_MATCH; - msg = "lock match failed"; - } - } else { - which = RA_STAT_FAILED_GRAB_PAGE; - msg = "cl_page_find failed"; - } - } else { - which = RA_STAT_WRONG_GRAB_PAGE; - msg = "g_c_p_n returned invalid page"; - } - if (rc != 1) + if (rc) unlock_page(vmpage); put_page(vmpage); - } else { - which = RA_STAT_FAILED_GRAB_PAGE; - msg = "g_c_p_n failed"; } if (msg) { ll_ra_stats_inc(inode, which); @@ -379,12 +362,12 @@ static int ll_read_ahead_pages(const struct lu_env *env, struct cl_io *io, struct cl_page_list *queue, struct ra_io_arg *ria, unsigned long *reserved_pages, - unsigned long *ra_end) + pgoff_t *ra_end) { + struct cl_read_ahead ra = { 0 }; int rc, count = 0; bool stride_ria; pgoff_t page_idx; - pgoff_t max_index = 0; LASSERT(ria); RIA_DEBUG(ria); @@ -393,14 +376,23 @@ static int ll_read_ahead_pages(const struct lu_env *env, for (page_idx = ria->ria_start; page_idx <= ria->ria_end && *reserved_pages > 0; page_idx++) { if (ras_inside_ra_window(page_idx, ria)) { + if (!ra.cra_end || ra.cra_end < page_idx) { + cl_read_ahead_release(env, &ra); + + rc = cl_io_read_ahead(env, io, page_idx, &ra); + if (rc < 0) + break; + + LASSERTF(ra.cra_end >= page_idx, + "object: %p, indcies %lu / %lu\n", + io->ci_obj, ra.cra_end, page_idx); + } + /* If the page is inside the read-ahead window*/ - rc = ll_read_ahead_page(env, io, queue, - page_idx, &max_index); - if (rc == 1) { + rc = ll_read_ahead_page(env, io, queue, page_idx); + if (!rc) { (*reserved_pages)--; count++; - } else if (rc == -ENOLCK) { - break; } } else if (stride_ria) { /* If it is not in the read-ahead window, and it is @@ -426,19 +418,21 @@ static int ll_read_ahead_pages(const struct lu_env *env, } } } + cl_read_ahead_release(env, &ra); + *ra_end = page_idx; return count; } -int ll_readahead(const struct lu_env *env, struct cl_io *io, - struct cl_page_list *queue, struct ll_readahead_state *ras, - bool hit) +static int ll_readahead(const struct lu_env *env, struct cl_io *io, + struct cl_page_list *queue, + struct ll_readahead_state *ras, bool hit) { struct vvp_io *vio = vvp_env_io(env); struct ll_thread_info *lti = ll_env_info(env); struct cl_attr *attr = vvp_env_thread_attr(env); - unsigned long start = 0, end = 0, reserved; - unsigned long ra_end, len, mlen = 0; + unsigned long len, mlen = 0, reserved; + pgoff_t ra_end, start = 0, end = 0; struct inode *inode; struct ra_io_arg *ria = <i->lti_ria; struct cl_object *clob; @@ -464,30 +458,25 @@ int ll_readahead(const struct lu_env *env, struct cl_io *io, spin_lock(&ras->ras_lock); - /* Enlarge the RA window to encompass the full read */ - if (vio->vui_ra_valid && - ras->ras_window_start + ras->ras_window_len < - vio->vui_ra_start + vio->vui_ra_count) { - ras->ras_window_len = vio->vui_ra_start + vio->vui_ra_count - - ras->ras_window_start; - } + /** + * Note: other thread might rollback the ras_next_readahead, + * if it can not get the full size of prepared pages, see the + * end of this function. For stride read ahead, it needs to + * make sure the offset is no less than ras_stride_offset, + * so that stride read ahead can work correctly. + */ + if (stride_io_mode(ras)) + start = max(ras->ras_next_readahead, ras->ras_stride_offset); + else + start = ras->ras_next_readahead; - /* Reserve a part of the read-ahead window that we'll be issuing */ - if (ras->ras_window_len > 0) { - /* - * Note: other thread might rollback the ras_next_readahead, - * if it can not get the full size of prepared pages, see the - * end of this function. For stride read ahead, it needs to - * make sure the offset is no less than ras_stride_offset, - * so that stride read ahead can work correctly. - */ - if (stride_io_mode(ras)) - start = max(ras->ras_next_readahead, - ras->ras_stride_offset); - else - start = ras->ras_next_readahead; + if (ras->ras_window_len > 0) end = ras->ras_window_start + ras->ras_window_len - 1; - } + + /* Enlarge the RA window to encompass the full read */ + if (vio->vui_ra_valid && + end < vio->vui_ra_start + vio->vui_ra_count - 1) + end = vio->vui_ra_start + vio->vui_ra_count - 1; if (end != 0) { unsigned long rpc_boundary; @@ -576,8 +565,8 @@ int ll_readahead(const struct lu_env *env, struct cl_io *io, * if the region we failed to issue read-ahead on is still ahead * of the app and behind the next index to start read-ahead from */ - CDEBUG(D_READA, "ra_end %lu end %lu stride end %lu\n", - ra_end, end, ria->ria_end); + CDEBUG(D_READA, "ra_end = %lu end = %lu stride end = %lu pages = %d\n", + ra_end, end, ria->ria_end, ret); if (ra_end != end + 1) { ll_ra_stats_inc(inode, RA_STAT_FAILED_REACH_END); @@ -609,7 +598,7 @@ static void ras_reset(struct inode *inode, struct ll_readahead_state *ras, ras->ras_consecutive_pages = 0; ras->ras_window_len = 0; ras_set_start(inode, ras, index); - ras->ras_next_readahead = max(ras->ras_window_start, index); + ras->ras_next_readahead = max(ras->ras_window_start, index + 1); RAS_CDEBUG(ras); } @@ -738,12 +727,13 @@ static void ras_increase_window(struct inode *inode, ra->ra_max_pages_per_file); } -void ras_update(struct ll_sb_info *sbi, struct inode *inode, - struct ll_readahead_state *ras, unsigned long index, - unsigned hit) +static void ras_update(struct ll_sb_info *sbi, struct inode *inode, + struct ll_readahead_state *ras, unsigned long index, + enum ras_update_flags flags) { struct ll_ra_info *ra = &sbi->ll_ra_info; int zero = 0, stride_detect = 0, ra_miss = 0; + bool hit = flags & LL_RAS_HIT; spin_lock(&ras->ras_lock); @@ -773,7 +763,7 @@ void ras_update(struct ll_sb_info *sbi, struct inode *inode, * to for subsequent IO. The mmap case does not increment * ras_requests and thus can never trigger this behavior. */ - if (ras->ras_requests == 2 && !ras->ras_request_index) { + if (ras->ras_requests >= 2 && !ras->ras_request_index) { __u64 kms_pages; kms_pages = (i_size_read(inode) + PAGE_SIZE - 1) >> @@ -785,8 +775,7 @@ void ras_update(struct ll_sb_info *sbi, struct inode *inode, if (kms_pages && kms_pages <= ra->ra_max_read_ahead_whole_pages) { ras->ras_window_start = 0; - ras->ras_last_readpage = 0; - ras->ras_next_readahead = 0; + ras->ras_next_readahead = index + 1; ras->ras_window_len = min(ra->ra_max_pages_per_file, ra->ra_max_read_ahead_whole_pages); goto out_unlock; @@ -816,13 +805,20 @@ void ras_update(struct ll_sb_info *sbi, struct inode *inode, if (ra_miss) { if (index_in_stride_window(ras, index) && stride_io_mode(ras)) { - /*If stride-RA hit cache miss, the stride dector - *will not be reset to avoid the overhead of - *redetecting read-ahead mode - */ if (index != ras->ras_last_readpage + 1) ras->ras_consecutive_pages = 0; ras_reset(inode, ras, index); + + /* If stride-RA hit cache miss, the stride + * detector will not be reset to avoid the + * overhead of redetecting read-ahead mode, + * but on the condition that the stride window + * is still intersect with normal sequential + * read-ahead window. + */ + if (ras->ras_window_start < + ras->ras_stride_offset) + ras_stride_reset(ras); RAS_CDEBUG(ras); } else { /* Reset both stride window and normal RA @@ -867,8 +863,13 @@ void ras_update(struct ll_sb_info *sbi, struct inode *inode, /* Trigger RA in the mmap case where ras_consecutive_requests * is not incremented and thus can't be used to trigger RA */ - if (!ras->ras_window_len && ras->ras_consecutive_pages == 4) { - ras->ras_window_len = RAS_INCREASE_STEP(inode); + if (ras->ras_consecutive_pages >= 4 && flags & LL_RAS_MMAP) { + ras_increase_window(inode, ras, ra); + /* + * reset consecutive pages so that the readahead window can + * grow gradually. + */ + ras->ras_consecutive_pages = 0; goto out_unlock; } @@ -903,17 +904,17 @@ int ll_writepage(struct page *vmpage, struct writeback_control *wbc) struct cl_io *io; struct cl_page *page; struct cl_object *clob; - struct cl_env_nest nest; bool redirtied = false; bool unlocked = false; int result; + int refcheck; LASSERT(PageLocked(vmpage)); LASSERT(!PageWriteback(vmpage)); LASSERT(ll_i2dtexp(inode)); - env = cl_env_nested_get(&nest); + env = cl_env_get(&refcheck); if (IS_ERR(env)) { result = PTR_ERR(env); goto out; @@ -978,7 +979,7 @@ int ll_writepage(struct page *vmpage, struct writeback_control *wbc) } } - cl_env_nested_put(&nest, env); + cl_env_put(env, &refcheck); goto out; out: @@ -1088,6 +1089,63 @@ void ll_cl_remove(struct file *file, const struct lu_env *env) write_unlock(&fd->fd_lock); } +static int ll_io_read_page(const struct lu_env *env, struct cl_io *io, + struct cl_page *page) +{ + struct inode *inode = vvp_object_inode(page->cp_obj); + struct ll_file_data *fd = vvp_env_io(env)->vui_fd; + struct ll_readahead_state *ras = &fd->fd_ras; + struct cl_2queue *queue = &io->ci_queue; + struct ll_sb_info *sbi = ll_i2sbi(inode); + struct vvp_page *vpg; + int rc = 0; + + vpg = cl2vvp_page(cl_object_page_slice(page->cp_obj, page)); + if (sbi->ll_ra_info.ra_max_pages_per_file > 0 && + sbi->ll_ra_info.ra_max_pages > 0) { + struct vvp_io *vio = vvp_env_io(env); + enum ras_update_flags flags = 0; + + if (vpg->vpg_defer_uptodate) + flags |= LL_RAS_HIT; + if (!vio->vui_ra_valid) + flags |= LL_RAS_MMAP; + ras_update(sbi, inode, ras, vvp_index(vpg), flags); + } + + if (vpg->vpg_defer_uptodate) { + vpg->vpg_ra_used = 1; + cl_page_export(env, page, 1); + } + + cl_2queue_init(queue); + /* + * Add page into the queue even when it is marked uptodate above. + * this will unlock it automatically as part of cl_page_list_disown(). + */ + cl_page_list_add(&queue->c2_qin, page); + if (sbi->ll_ra_info.ra_max_pages_per_file > 0 && + sbi->ll_ra_info.ra_max_pages > 0) { + int rc2; + + rc2 = ll_readahead(env, io, &queue->c2_qin, ras, + vpg->vpg_defer_uptodate); + CDEBUG(D_READA, DFID "%d pages read ahead at %lu\n", + PFID(ll_inode2fid(inode)), rc2, vvp_index(vpg)); + } + + if (queue->c2_qin.pl_nr > 0) + rc = cl_io_submit_rw(env, io, CRT_READ, queue); + + /* + * Unlock unsent pages in case of error. + */ + cl_page_list_disown(env, io, &queue->c2_qin); + cl_2queue_fini(env, queue); + + return rc; +} + int ll_readpage(struct file *file, struct page *vmpage) { struct cl_object *clob = ll_i2info(file_inode(file))->lli_clob; @@ -1111,7 +1169,7 @@ int ll_readpage(struct file *file, struct page *vmpage) LASSERT(page->cp_type == CPT_CACHEABLE); if (likely(!PageUptodate(vmpage))) { cl_page_assume(env, io, page); - result = cl_io_read_page(env, io, page); + result = ll_io_read_page(env, io, page); } else { /* Page from a non-object file. */ unlock_page(vmpage); diff --git a/drivers/staging/lustre/lustre/llite/rw26.c b/drivers/staging/lustre/lustre/llite/rw26.c index 26f3a37873a7..21e06e5b514e 100644 --- a/drivers/staging/lustre/lustre/llite/rw26.c +++ b/drivers/staging/lustre/lustre/llite/rw26.c @@ -71,8 +71,6 @@ static void ll_invalidatepage(struct page *vmpage, unsigned int offset, struct cl_page *page; struct cl_object *obj; - int refcheck; - LASSERT(PageLocked(vmpage)); LASSERT(!PageWriteback(vmpage)); @@ -82,28 +80,27 @@ static void ll_invalidatepage(struct page *vmpage, unsigned int offset, * happening with locked page too */ if (offset == 0 && length == PAGE_SIZE) { - env = cl_env_get(&refcheck); - if (!IS_ERR(env)) { - inode = vmpage->mapping->host; - obj = ll_i2info(inode)->lli_clob; - if (obj) { - page = cl_vmpage_page(vmpage, obj); - if (page) { - cl_page_delete(env, page); - cl_page_put(env, page); - } - } else { - LASSERT(vmpage->private == 0); + /* See the comment in ll_releasepage() */ + env = cl_env_percpu_get(); + LASSERT(!IS_ERR(env)); + inode = vmpage->mapping->host; + obj = ll_i2info(inode)->lli_clob; + if (obj) { + page = cl_vmpage_page(vmpage, obj); + if (page) { + cl_page_delete(env, page); + cl_page_put(env, page); } - cl_env_put(env, &refcheck); + } else { + LASSERT(vmpage->private == 0); } + cl_env_percpu_put(env); } } static int ll_releasepage(struct page *vmpage, gfp_t gfp_mask) { struct lu_env *env; - void *cookie; struct cl_object *obj; struct cl_page *page; struct address_space *mapping; @@ -129,7 +126,6 @@ static int ll_releasepage(struct page *vmpage, gfp_t gfp_mask) if (!page) return 1; - cookie = cl_env_reenter(); env = cl_env_percpu_get(); LASSERT(!IS_ERR(env)); @@ -155,7 +151,6 @@ static int ll_releasepage(struct page *vmpage, gfp_t gfp_mask) cl_page_put(env, page); cl_env_percpu_put(env); - cl_env_reexit(cookie); return result; } @@ -340,19 +335,15 @@ static ssize_t ll_direct_IO_26_seg(const struct lu_env *env, struct cl_io *io, PAGE_SIZE) & ~(DT_MAX_BRW_SIZE - 1)) static ssize_t ll_direct_IO_26(struct kiocb *iocb, struct iov_iter *iter) { - struct lu_env *env; + struct ll_cl_context *lcc; + const struct lu_env *env; struct cl_io *io; struct file *file = iocb->ki_filp; struct inode *inode = file->f_mapping->host; loff_t file_offset = iocb->ki_pos; ssize_t count = iov_iter_count(iter); ssize_t tot_bytes = 0, result = 0; - struct ll_inode_info *lli = ll_i2info(inode); long size = MAX_DIO_SIZE; - int refcheck; - - if (!lli->lli_has_smd) - return -EBADF; /* FIXME: io smaller than PAGE_SIZE is broken on ia64 ??? */ if ((file_offset & ~PAGE_MASK) || (count & ~PAGE_MASK)) @@ -367,9 +358,13 @@ static ssize_t ll_direct_IO_26(struct kiocb *iocb, struct iov_iter *iter) if (iov_iter_alignment(iter) & ~PAGE_MASK) return -EINVAL; - env = cl_env_get(&refcheck); + lcc = ll_cl_find(file); + if (!lcc) + return -EIO; + + env = lcc->lcc_env; LASSERT(!IS_ERR(env)); - io = vvp_env_io(env)->vui_cl.cis_io; + io = lcc->lcc_io; LASSERT(io); while (iov_iter_count(iter)) { @@ -426,7 +421,6 @@ out: vio->u.write.vui_written += tot_bytes; } - cl_env_put(env, &refcheck); return tot_bytes ? tot_bytes : result; } @@ -466,13 +460,13 @@ static int ll_prepare_partial_page(const struct lu_env *env, struct cl_io *io, } static int ll_write_begin(struct file *file, struct address_space *mapping, - loff_t pos, unsigned len, unsigned flags, + loff_t pos, unsigned int len, unsigned int flags, struct page **pagep, void **fsdata) { struct ll_cl_context *lcc; - const struct lu_env *env; + const struct lu_env *env = NULL; struct cl_io *io; - struct cl_page *page; + struct cl_page *page = NULL; struct cl_object *clob = ll_i2info(mapping->host)->lli_clob; pgoff_t index = pos >> PAGE_SHIFT; struct page *vmpage = NULL; @@ -484,6 +478,7 @@ static int ll_write_begin(struct file *file, struct address_space *mapping, lcc = ll_cl_find(file); if (!lcc) { + io = NULL; result = -EIO; goto out; } @@ -560,6 +555,12 @@ out: unlock_page(vmpage); put_page(vmpage); } + if (!IS_ERR_OR_NULL(page)) { + lu_ref_del(&page->cp_reference, "cl_io", io); + cl_page_put(env, page); + } + if (io) + io->ci_result = result; } else { *pagep = vmpage; *fsdata = lcc; @@ -576,7 +577,7 @@ static int ll_write_end(struct file *file, struct address_space *mapping, struct cl_io *io; struct vvp_io *vio; struct cl_page *page; - unsigned from = pos & (PAGE_SIZE - 1); + unsigned int from = pos & (PAGE_SIZE - 1); bool unplug = false; int result = 0; @@ -629,6 +630,8 @@ static int ll_write_end(struct file *file, struct address_space *mapping, file->f_flags & O_SYNC || IS_SYNC(file_inode(file))) result = vvp_io_write_commit(env, io); + if (result < 0) + io->ci_result = result; return result >= 0 ? copied : result; } diff --git a/drivers/staging/lustre/lustre/llite/statahead.c b/drivers/staging/lustre/lustre/llite/statahead.c index 0677513476ec..4769a2230ae1 100644 --- a/drivers/staging/lustre/lustre/llite/statahead.c +++ b/drivers/staging/lustre/lustre/llite/statahead.c @@ -659,8 +659,8 @@ static int ll_statahead_interpret(struct ptlrpc_request *req, struct ll_inode_info *lli = ll_i2info(dir); struct ll_statahead_info *sai = lli->lli_sai; struct sa_entry *entry = (struct sa_entry *)minfo->mi_cbdata; + wait_queue_head_t *waitq = NULL; __u64 handle = 0; - bool wakeup; if (it_disposition(it, DISP_LOOKUP_NEG)) rc = -ENOENT; @@ -693,7 +693,8 @@ static int ll_statahead_interpret(struct ptlrpc_request *req, spin_lock(&lli->lli_sa_lock); if (rc) { - wakeup = __sa_make_ready(sai, entry, rc); + if (__sa_make_ready(sai, entry, rc)) + waitq = &sai->sai_waitq; } else { entry->se_minfo = minfo; entry->se_req = ptlrpc_request_addref(req); @@ -704,13 +705,15 @@ static int ll_statahead_interpret(struct ptlrpc_request *req, * with parent's lock held, for example: unlink. */ entry->se_handle = handle; - wakeup = !sa_has_callback(sai); + if (!sa_has_callback(sai)) + waitq = &sai->sai_thread.t_ctl_waitq; + list_add_tail(&entry->se_list, &sai->sai_interim_entries); } sai->sai_replied++; - if (wakeup) - wake_up(&sai->sai_thread.t_ctl_waitq); + if (waitq) + wake_up(waitq); spin_unlock(&lli->lli_sa_lock); return rc; @@ -1397,10 +1400,10 @@ static int revalidate_statahead_dentry(struct inode *dir, struct dentry **dentryp, bool unplug) { + struct ll_inode_info *lli = ll_i2info(dir); struct sa_entry *entry = NULL; struct l_wait_info lwi = { 0 }; struct ll_dentry_data *ldd; - struct ll_inode_info *lli; int rc = 0; if ((*dentryp)->d_name.name[0] == '.') { @@ -1446,7 +1449,9 @@ static int revalidate_statahead_dentry(struct inode *dir, sa_handle_callback(sai); if (!sa_ready(entry)) { + spin_lock(&lli->lli_sa_lock); sai->sai_index_wait = entry->se_index; + spin_unlock(&lli->lli_sa_lock); lwi = LWI_TIMEOUT_INTR(cfs_time_seconds(30), NULL, LWI_ON_SIGNAL_NOOP, NULL); rc = l_wait_event(sai->sai_waitq, sa_ready(entry), &lwi); @@ -1475,6 +1480,7 @@ static int revalidate_statahead_dentry(struct inode *dir, alias = ll_splice_alias(inode, *dentryp); if (IS_ERR(alias)) { + ll_intent_release(&it); rc = PTR_ERR(alias); goto out_unplug; } @@ -1493,6 +1499,7 @@ static int revalidate_statahead_dentry(struct inode *dir, *dentryp, PFID(ll_inode2fid((*dentryp)->d_inode)), PFID(ll_inode2fid(inode))); + ll_intent_release(&it); rc = -ESTALE; goto out_unplug; } @@ -1512,7 +1519,6 @@ out_unplug: * dentry_may_statahead(). */ ldd = ll_d2d(*dentryp); - lli = ll_i2info(dir); /* ldd can be NULL if llite lookup failed. */ if (ldd) ldd->lld_sa_generation = lli->lli_sa_generation; diff --git a/drivers/staging/lustre/lustre/llite/vvp_dev.c b/drivers/staging/lustre/lustre/llite/vvp_dev.c index 8aa8ecc09a48..12c129f7e4ad 100644 --- a/drivers/staging/lustre/lustre/llite/vvp_dev.c +++ b/drivers/staging/lustre/lustre/llite/vvp_dev.c @@ -55,7 +55,6 @@ static struct kmem_cache *ll_thread_kmem; struct kmem_cache *vvp_lock_kmem; struct kmem_cache *vvp_object_kmem; -struct kmem_cache *vvp_req_kmem; static struct kmem_cache *vvp_session_kmem; static struct kmem_cache *vvp_thread_kmem; @@ -76,11 +75,6 @@ static struct lu_kmem_descr vvp_caches[] = { .ckd_size = sizeof(struct vvp_object), }, { - .ckd_cache = &vvp_req_kmem, - .ckd_name = "vvp_req_kmem", - .ckd_size = sizeof(struct vvp_req), - }, - { .ckd_cache = &vvp_session_kmem, .ckd_name = "vvp_session_kmem", .ckd_size = sizeof(struct vvp_session) @@ -177,10 +171,6 @@ static const struct lu_device_operations vvp_lu_ops = { .ldo_object_alloc = vvp_object_alloc }; -static const struct cl_device_operations vvp_cl_ops = { - .cdo_req_init = vvp_req_init -}; - static struct lu_device *vvp_device_free(const struct lu_env *env, struct lu_device *d) { @@ -213,7 +203,6 @@ static struct lu_device *vvp_device_alloc(const struct lu_env *env, lud = &vdv->vdv_cl.cd_lu_dev; cl_device_init(&vdv->vdv_cl, t); vvp2lu_dev(vdv)->ld_ops = &vvp_lu_ops; - vdv->vdv_cl.cd_ops = &vvp_cl_ops; site = kzalloc(sizeof(*site), GFP_NOFS); if (site) { @@ -332,7 +321,6 @@ int cl_sb_init(struct super_block *sb) cl = cl_type_setup(env, NULL, &vvp_device_type, sbi->ll_dt_exp->exp_obd->obd_lu_dev); if (!IS_ERR(cl)) { - cl2vvp_dev(cl)->vdv_sb = sb; sbi->ll_cl = cl; sbi->ll_site = cl2lu_dev(cl)->ld_site; } @@ -521,11 +509,10 @@ static void vvp_pgcache_page_show(const struct lu_env *env, vpg = cl2vvp_page(cl_page_at(page, &vvp_device_type)); vmpage = vpg->vpg_page; - seq_printf(seq, " %5i | %p %p %s %s %s %s | %p "DFID"(%p) %lu %u [", + seq_printf(seq, " %5i | %p %p %s %s %s | %p " DFID "(%p) %lu %u [", 0 /* gen */, vpg, page, "none", - vpg->vpg_write_queued ? "wq" : "- ", vpg->vpg_defer_uptodate ? "du" : "- ", PageWriteback(vmpage) ? "wb" : "-", vmpage, PFID(ll_inode2fid(vmpage->mapping->host)), diff --git a/drivers/staging/lustre/lustre/llite/vvp_internal.h b/drivers/staging/lustre/lustre/llite/vvp_internal.h index 4464ad258387..c60d0414ac25 100644 --- a/drivers/staging/lustre/lustre/llite/vvp_internal.h +++ b/drivers/staging/lustre/lustre/llite/vvp_internal.h @@ -42,9 +42,7 @@ enum obd_notify_event; struct inode; -struct lov_stripe_md; struct lustre_md; -struct obd_capa; struct obd_device; struct obd_export; struct page; @@ -122,7 +120,6 @@ extern struct lu_context_key vvp_thread_key; extern struct kmem_cache *vvp_lock_kmem; extern struct kmem_cache *vvp_object_kmem; -extern struct kmem_cache *vvp_req_kmem; struct vvp_thread_info { struct cl_lock vti_lock; @@ -195,14 +192,6 @@ struct vvp_object { struct inode *vob_inode; /** - * A list of dirty pages pending IO in the cache. Used by - * SOM. Protected by ll_inode_info::lli_lock. - * - * \see vvp_page::vpg_pending_linkage - */ - struct list_head vob_pending_list; - - /** * Number of transient pages. This is no longer protected by i_sem, * and needs to be atomic. This is not actually used for anything, * and can probably be removed. @@ -235,15 +224,7 @@ struct vvp_object { struct vvp_page { struct cl_page_slice vpg_cl; unsigned int vpg_defer_uptodate:1, - vpg_ra_used:1, - vpg_write_queued:1; - /** - * Non-empty iff this page is already counted in - * vvp_object::vob_pending_list. This list is only used as a flag, - * that is, never iterated through, only checked for list_empty(), but - * having a list is useful for debugging. - */ - struct list_head vpg_pending_linkage; + vpg_ra_used:1; /** VM page */ struct page *vpg_page; }; @@ -260,7 +241,6 @@ static inline pgoff_t vvp_index(struct vvp_page *vvp) struct vvp_device { struct cl_device vdv_cl; - struct super_block *vdv_sb; struct cl_device *vdv_next; }; @@ -268,10 +248,6 @@ struct vvp_lock { struct cl_lock_slice vlk_cl; }; -struct vvp_req { - struct cl_req_slice vrq_cl; -}; - void *ccc_key_init(const struct lu_context *ctx, struct lu_context_key *key); void ccc_key_fini(const struct lu_context *ctx, @@ -325,21 +301,8 @@ static inline struct vvp_lock *cl2vvp_lock(const struct cl_lock_slice *slice) # define CLOBINVRNT(env, clob, expr) \ ((void)sizeof(env), (void)sizeof(clob), (void)sizeof(!!(expr))) -/** - * New interfaces to get and put lov_stripe_md from lov layer. This violates - * layering because lov_stripe_md is supposed to be a private data in lov. - * - * NB: If you find you have to use these interfaces for your new code, please - * think about it again. These interfaces may be removed in the future for - * better layering. - */ -struct lov_stripe_md *lov_lsm_get(struct cl_object *clobj); -void lov_lsm_put(struct cl_object *clobj, struct lov_stripe_md *lsm); int lov_read_and_clear_async_rc(struct cl_object *clob); -struct lov_stripe_md *ccc_inode_lsm_get(struct inode *inode); -void ccc_inode_lsm_put(struct inode *inode, struct lov_stripe_md *lsm); - int vvp_io_init(const struct lu_env *env, struct cl_object *obj, struct cl_io *io); int vvp_io_write_commit(const struct lu_env *env, struct cl_io *io); @@ -347,8 +310,6 @@ int vvp_lock_init(const struct lu_env *env, struct cl_object *obj, struct cl_lock *lock, const struct cl_io *io); int vvp_page_init(const struct lu_env *env, struct cl_object *obj, struct cl_page *page, pgoff_t index); -int vvp_req_init(const struct lu_env *env, struct cl_device *dev, - struct cl_req *req); struct lu_object *vvp_object_alloc(const struct lu_env *env, const struct lu_object_header *hdr, struct lu_device *dev); diff --git a/drivers/staging/lustre/lustre/llite/vvp_io.c b/drivers/staging/lustre/lustre/llite/vvp_io.c index 2b7f182a15e2..0b6d388d8aa4 100644 --- a/drivers/staging/lustre/lustre/llite/vvp_io.c +++ b/drivers/staging/lustre/lustre/llite/vvp_io.c @@ -72,9 +72,10 @@ static bool can_populate_pages(const struct lu_env *env, struct cl_io *io, /* don't need lock here to check lli_layout_gen as we have held * extent lock and GROUP lock has to hold to swap layout */ - if (ll_layout_version_get(lli) != vio->vui_layout_gen) { + if (ll_layout_version_get(lli) != vio->vui_layout_gen || + OBD_FAIL_CHECK_RESET(OBD_FAIL_LLITE_LOST_LAYOUT, 0)) { io->ci_need_restart = 1; - /* this will return application a short read/write */ + /* this will cause a short read/write */ io->ci_continue = 0; rc = false; } @@ -328,8 +329,8 @@ static void vvp_io_fini(const struct lu_env *env, const struct cl_io_slice *ios) vio->vui_layout_gen, gen); /* today successful restore is the only possible case */ /* restore was done, clear restoring state */ - ll_i2info(vvp_object_inode(obj))->lli_flags &= - ~LLIF_FILE_RESTORING; + clear_bit(LLIF_FILE_RESTORING, + &ll_i2info(inode)->lli_flags); } } } @@ -369,7 +370,7 @@ static int vvp_mmap_locks(const struct lu_env *env, struct mm_struct *mm = current->mm; struct vm_area_struct *vma; struct cl_lock_descr *descr = &cti->vti_descr; - ldlm_policy_data_t policy; + union ldlm_policy_data policy; unsigned long addr; ssize_t count; int result = 0; @@ -450,7 +451,8 @@ static void vvp_io_advance(const struct lu_env *env, struct vvp_io *vio = cl2vvp_io(env, ios); CLOBINVRNT(env, obj, vvp_object_invariant(obj)); - iov_iter_reexpand(vio->vui_iter, vio->vui_tot_count -= nob); + vio->vui_tot_count -= nob; + iov_iter_reexpand(vio->vui_iter, vio->vui_tot_count); } static void vvp_io_update_iov(const struct lu_env *env, @@ -551,9 +553,16 @@ static int vvp_io_setattr_lock(const struct lu_env *env, if (new_size == 0) enqflags = CEF_DISCARD_DATA; } else { - if ((io->u.ci_setattr.sa_attr.lvb_mtime >= - io->u.ci_setattr.sa_attr.lvb_ctime) || - (io->u.ci_setattr.sa_attr.lvb_atime >= + unsigned int valid = io->u.ci_setattr.sa_valid; + + if (!(valid & TIMES_SET_FLAGS)) + return 0; + + if ((!(valid & ATTR_MTIME) || + io->u.ci_setattr.sa_attr.lvb_mtime >= + io->u.ci_setattr.sa_attr.lvb_ctime) && + (!(valid & ATTR_ATIME) || + io->u.ci_setattr.sa_attr.lvb_atime >= io->u.ci_setattr.sa_attr.lvb_ctime)) return 0; new_size = 0; @@ -580,14 +589,6 @@ static int vvp_do_vmtruncate(struct inode *inode, size_t size) return result; } -static int vvp_io_setattr_trunc(const struct lu_env *env, - const struct cl_io_slice *ios, - struct inode *inode, loff_t size) -{ - inode_dio_wait(inode); - return 0; -} - static int vvp_io_setattr_time(const struct lu_env *env, const struct cl_io_slice *ios) { @@ -618,15 +619,20 @@ static int vvp_io_setattr_start(const struct lu_env *env, { struct cl_io *io = ios->cis_io; struct inode *inode = vvp_object_inode(io->ci_obj); - int result = 0; + struct ll_inode_info *lli = ll_i2info(inode); - inode_lock(inode); - if (cl_io_is_trunc(io)) - result = vvp_io_setattr_trunc(env, ios, inode, - io->u.ci_setattr.sa_attr.lvb_size); - if (result == 0) - result = vvp_io_setattr_time(env, ios); - return result; + if (cl_io_is_trunc(io)) { + down_write(&lli->lli_trunc_sem); + inode_lock(inode); + inode_dio_wait(inode); + } else { + inode_lock(inode); + } + + if (io->u.ci_setattr.sa_valid & TIMES_SET_FLAGS) + return vvp_io_setattr_time(env, ios); + + return 0; } static void vvp_io_setattr_end(const struct lu_env *env, @@ -634,14 +640,18 @@ static void vvp_io_setattr_end(const struct lu_env *env, { struct cl_io *io = ios->cis_io; struct inode *inode = vvp_object_inode(io->ci_obj); + struct ll_inode_info *lli = ll_i2info(inode); - if (cl_io_is_trunc(io)) + if (cl_io_is_trunc(io)) { /* Truncate in memory pages - they must be clean pages * because osc has already notified to destroy osc_extents. */ vvp_do_vmtruncate(inode, io->u.ci_setattr.sa_attr.lvb_size); - - inode_unlock(inode); + inode_unlock(inode); + up_write(&lli->lli_trunc_sem); + } else { + inode_unlock(inode); + } } static void vvp_io_setattr_fini(const struct lu_env *env, @@ -657,6 +667,7 @@ static int vvp_io_read_start(const struct lu_env *env, struct cl_io *io = ios->cis_io; struct cl_object *obj = io->ci_obj; struct inode *inode = vvp_object_inode(obj); + struct ll_inode_info *lli = ll_i2info(inode); struct file *file = vio->vui_fd->fd_file; int result; @@ -669,6 +680,8 @@ static int vvp_io_read_start(const struct lu_env *env, CDEBUG(D_VFSTRACE, "read: -> [%lli, %lli)\n", pos, pos + cnt); + down_read(&lli->lli_trunc_sem); + if (!can_populate_pages(env, io, inode)) return 0; @@ -770,16 +783,11 @@ static int vvp_io_commit_sync(const struct lu_env *env, struct cl_io *io, static void write_commit_callback(const struct lu_env *env, struct cl_io *io, struct cl_page *page) { - struct vvp_page *vpg; struct page *vmpage = page->cp_vmpage; - struct cl_object *clob = cl_io_top(io)->ci_obj; SetPageUptodate(vmpage); set_page_dirty(vmpage); - vpg = cl2vvp_page(cl_object_page_slice(clob, page)); - vvp_write_pending(cl2vvp(clob), vpg); - cl_page_disown(env, io, page); /* held in ll_cl_init() */ @@ -899,10 +907,13 @@ static int vvp_io_write_start(const struct lu_env *env, struct cl_io *io = ios->cis_io; struct cl_object *obj = io->ci_obj; struct inode *inode = vvp_object_inode(obj); + struct ll_inode_info *lli = ll_i2info(inode); ssize_t result = 0; loff_t pos = io->u.ci_wr.wr.crw_pos; size_t cnt = io->u.ci_wr.wr.crw_count; + down_read(&lli->lli_trunc_sem); + if (!can_populate_pages(env, io, inode)) return 0; @@ -921,6 +932,20 @@ static int vvp_io_write_start(const struct lu_env *env, CDEBUG(D_VFSTRACE, "write: [%lli, %lli)\n", pos, pos + (long long)cnt); + /* + * The maximum Lustre file size is variable, based on the OST maximum + * object size and number of stripes. This needs another check in + * addition to the VFS checks earlier. + */ + if (pos + cnt > ll_file_maxbytes(inode)) { + CDEBUG(D_INODE, + "%s: file " DFID " offset %llu > maxbytes %llu\n", + ll_get_fsname(inode->i_sb, NULL, 0), + PFID(ll_inode2fid(inode)), pos + cnt, + ll_file_maxbytes(inode)); + return -EFBIG; + } + if (!vio->vui_iter) { /* from a temp io in ll_cl_init(). */ result = 0; @@ -957,11 +982,7 @@ static int vvp_io_write_start(const struct lu_env *env, } } if (result > 0) { - struct ll_inode_info *lli = ll_i2info(inode); - - spin_lock(&lli->lli_lock); - lli->lli_flags |= LLIF_DATA_MODIFIED; - spin_unlock(&lli->lli_lock); + set_bit(LLIF_DATA_MODIFIED, &(ll_i2info(inode))->lli_flags); if (result < cnt) io->ci_continue = 0; @@ -972,6 +993,15 @@ static int vvp_io_write_start(const struct lu_env *env, return result; } +static void vvp_io_rw_end(const struct lu_env *env, + const struct cl_io_slice *ios) +{ + struct inode *inode = vvp_object_inode(ios->cis_obj); + struct ll_inode_info *lli = ll_i2info(inode); + + up_read(&lli->lli_trunc_sem); +} + static int vvp_io_kernel_fault(struct vvp_fault_io *cfio) { struct vm_fault *vmf = cfio->ft_vmf; @@ -1014,13 +1044,7 @@ static int vvp_io_kernel_fault(struct vvp_fault_io *cfio) static void mkwrite_commit_callback(const struct lu_env *env, struct cl_io *io, struct cl_page *page) { - struct vvp_page *vpg; - struct cl_object *clob = cl_io_top(io)->ci_obj; - set_page_dirty(page->cp_vmpage); - - vpg = cl2vvp_page(cl_object_page_slice(clob, page)); - vvp_write_pending(cl2vvp(clob), vpg); } static int vvp_io_fault_start(const struct lu_env *env, @@ -1030,6 +1054,7 @@ static int vvp_io_fault_start(const struct lu_env *env, struct cl_io *io = ios->cis_io; struct cl_object *obj = io->ci_obj; struct inode *inode = vvp_object_inode(obj); + struct ll_inode_info *lli = ll_i2info(inode); struct cl_fault_io *fio = &io->u.ci_fault; struct vvp_fault_io *cfio = &vio->u.fault; loff_t offset; @@ -1039,11 +1064,7 @@ static int vvp_io_fault_start(const struct lu_env *env, loff_t size; pgoff_t last_index; - if (fio->ft_executable && - inode->i_mtime.tv_sec != vio->u.fault.ft_mtime) - CWARN("binary "DFID - " changed while waiting for the page fault lock\n", - PFID(lu_object_fid(&obj->co_lu))); + down_read(&lli->lli_trunc_sem); /* offset of the last byte on the page */ offset = cl_offset(obj, fio->ft_index + 1) - 1; @@ -1192,6 +1213,17 @@ out: return result; } +static void vvp_io_fault_end(const struct lu_env *env, + const struct cl_io_slice *ios) +{ + struct inode *inode = vvp_object_inode(ios->cis_obj); + struct ll_inode_info *lli = ll_i2info(inode); + + CLOBINVRNT(env, ios->cis_io->ci_obj, + vvp_object_invariant(ios->cis_io->ci_obj)); + up_read(&lli->lli_trunc_sem); +} + static int vvp_io_fsync_start(const struct lu_env *env, const struct cl_io_slice *ios) { @@ -1202,46 +1234,23 @@ static int vvp_io_fsync_start(const struct lu_env *env, return 0; } -static int vvp_io_read_page(const struct lu_env *env, - const struct cl_io_slice *ios, - const struct cl_page_slice *slice) +static int vvp_io_read_ahead(const struct lu_env *env, + const struct cl_io_slice *ios, + pgoff_t start, struct cl_read_ahead *ra) { - struct cl_io *io = ios->cis_io; - struct vvp_page *vpg = cl2vvp_page(slice); - struct cl_page *page = slice->cpl_page; - struct inode *inode = vvp_object_inode(slice->cpl_obj); - struct ll_sb_info *sbi = ll_i2sbi(inode); - struct ll_file_data *fd = cl2vvp_io(env, ios)->vui_fd; - struct ll_readahead_state *ras = &fd->fd_ras; - struct cl_2queue *queue = &io->ci_queue; - - if (sbi->ll_ra_info.ra_max_pages_per_file && - sbi->ll_ra_info.ra_max_pages) - ras_update(sbi, inode, ras, vvp_index(vpg), - vpg->vpg_defer_uptodate); - - if (vpg->vpg_defer_uptodate) { - vpg->vpg_ra_used = 1; - cl_page_export(env, page, 1); - } - /* - * Add page into the queue even when it is marked uptodate above. - * this will unlock it automatically as part of cl_page_list_disown(). - */ + int result = 0; - cl_page_list_add(&queue->c2_qin, page); - if (sbi->ll_ra_info.ra_max_pages_per_file && - sbi->ll_ra_info.ra_max_pages) - ll_readahead(env, io, &queue->c2_qin, ras, - vpg->vpg_defer_uptodate); + if (ios->cis_io->ci_type == CIT_READ || + ios->cis_io->ci_type == CIT_FAULT) { + struct vvp_io *vio = cl2vvp_io(env, ios); - return 0; -} + if (unlikely(vio->vui_fd->fd_flags & LL_FILE_GROUP_LOCKED)) { + ra->cra_end = CL_PAGE_EOF; + result = 1; /* no need to call down */ + } + } -static void vvp_io_end(const struct lu_env *env, const struct cl_io_slice *ios) -{ - CLOBINVRNT(env, ios->cis_io->ci_obj, - vvp_object_invariant(ios->cis_io->ci_obj)); + return result; } static const struct cl_io_operations vvp_io_ops = { @@ -1250,6 +1259,7 @@ static const struct cl_io_operations vvp_io_ops = { .cio_fini = vvp_io_fini, .cio_lock = vvp_io_read_lock, .cio_start = vvp_io_read_start, + .cio_end = vvp_io_rw_end, .cio_advance = vvp_io_advance, }, [CIT_WRITE] = { @@ -1258,6 +1268,7 @@ static const struct cl_io_operations vvp_io_ops = { .cio_iter_fini = vvp_io_write_iter_fini, .cio_lock = vvp_io_write_lock, .cio_start = vvp_io_write_start, + .cio_end = vvp_io_rw_end, .cio_advance = vvp_io_advance, }, [CIT_SETATTR] = { @@ -1272,7 +1283,7 @@ static const struct cl_io_operations vvp_io_ops = { .cio_iter_init = vvp_io_fault_iter_init, .cio_lock = vvp_io_fault_lock, .cio_start = vvp_io_fault_start, - .cio_end = vvp_io_end, + .cio_end = vvp_io_fault_end, }, [CIT_FSYNC] = { .cio_start = vvp_io_fsync_start, @@ -1282,7 +1293,7 @@ static const struct cl_io_operations vvp_io_ops = { .cio_fini = vvp_io_fini } }, - .cio_read_page = vvp_io_read_page, + .cio_read_ahead = vvp_io_read_ahead, }; int vvp_io_init(const struct lu_env *env, struct cl_object *obj, diff --git a/drivers/staging/lustre/lustre/llite/vvp_object.c b/drivers/staging/lustre/lustre/llite/vvp_object.c index b57195d15674..8e18cf86cefc 100644 --- a/drivers/staging/lustre/lustre/llite/vvp_object.c +++ b/drivers/staging/lustre/lustre/llite/vvp_object.c @@ -65,8 +65,7 @@ static int vvp_object_print(const struct lu_env *env, void *cookie, struct inode *inode = obj->vob_inode; struct ll_inode_info *lli; - (*p)(env, cookie, "(%s %d %d) inode: %p ", - list_empty(&obj->vob_pending_list) ? "-" : "+", + (*p)(env, cookie, "(%d %d) inode: %p ", atomic_read(&obj->vob_transient_pages), atomic_read(&obj->vob_mmap_cnt), inode); if (inode) { @@ -133,7 +132,7 @@ static int vvp_conf_set(const struct lu_env *env, struct cl_object *obj, CDEBUG(D_VFSTRACE, DFID ": losing layout lock\n", PFID(&lli->lli_fid)); - ll_layout_version_set(lli, LL_LAYOUT_GEN_NONE); + ll_layout_version_set(lli, CL_LAYOUT_GEN_NONE); /* Clean up page mmap for this inode. * The reason for us to do this is that if the page has @@ -146,27 +145,8 @@ static int vvp_conf_set(const struct lu_env *env, struct cl_object *obj, */ unmap_mapping_range(conf->coc_inode->i_mapping, 0, OBD_OBJECT_EOF, 0); - - return 0; } - if (conf->coc_opc != OBJECT_CONF_SET) - return 0; - - if (conf->u.coc_md && conf->u.coc_md->lsm) { - CDEBUG(D_VFSTRACE, DFID ": layout version change: %u -> %u\n", - PFID(&lli->lli_fid), lli->lli_layout_gen, - conf->u.coc_md->lsm->lsm_layout_gen); - - lli->lli_has_smd = lsm_has_objects(conf->u.coc_md->lsm); - ll_layout_version_set(lli, conf->u.coc_md->lsm->lsm_layout_gen); - } else { - CDEBUG(D_VFSTRACE, DFID ": layout nuked: %u.\n", - PFID(&lli->lli_fid), lli->lli_layout_gen); - - lli->lli_has_smd = false; - ll_layout_version_set(lli, LL_LAYOUT_GEN_EMPTY); - } return 0; } @@ -204,6 +184,26 @@ static int vvp_object_glimpse(const struct lu_env *env, return 0; } +static void vvp_req_attr_set(const struct lu_env *env, struct cl_object *obj, + struct cl_req_attr *attr) +{ + u64 valid_flags = OBD_MD_FLTYPE; + struct inode *inode; + struct obdo *oa; + + oa = attr->cra_oa; + inode = vvp_object_inode(obj); + + if (attr->cra_type == CRT_WRITE) + valid_flags |= OBD_MD_FLMTIME | OBD_MD_FLCTIME | + OBD_MD_FLUID | OBD_MD_FLGID; + obdo_from_inode(oa, inode, valid_flags & attr->cra_flags); + obdo_set_parent_fid(oa, &ll_i2info(inode)->lli_fid); + if (OBD_FAIL_CHECK(OBD_FAIL_LFSCK_INVALID_PFID)) + oa->o_parent_oid++; + memcpy(attr->cra_jobid, ll_i2info(inode)->lli_jobid, LUSTRE_JOBID_SIZE); +} + static const struct cl_object_operations vvp_ops = { .coo_page_init = vvp_page_init, .coo_lock_init = vvp_lock_init, @@ -212,7 +212,8 @@ static const struct cl_object_operations vvp_ops = { .coo_attr_update = vvp_attr_update, .coo_conf_set = vvp_conf_set, .coo_prune = vvp_prune, - .coo_glimpse = vvp_object_glimpse + .coo_glimpse = vvp_object_glimpse, + .coo_req_attr_set = vvp_req_attr_set }; static int vvp_object_init0(const struct lu_env *env, @@ -240,7 +241,6 @@ static int vvp_object_init(const struct lu_env *env, struct lu_object *obj, const struct cl_object_conf *cconf; cconf = lu2cl_conf(conf); - INIT_LIST_HEAD(&vob->vob_pending_list); lu_object_add(obj, below); result = vvp_object_init0(env, vob, cconf); } else { diff --git a/drivers/staging/lustre/lustre/llite/vvp_page.c b/drivers/staging/lustre/lustre/llite/vvp_page.c index 046e84d7a158..23d66308ff20 100644 --- a/drivers/staging/lustre/lustre/llite/vvp_page.c +++ b/drivers/staging/lustre/lustre/llite/vvp_page.c @@ -162,13 +162,10 @@ static void vvp_page_delete(const struct lu_env *env, LASSERT((struct cl_page *)vmpage->private == page); LASSERT(inode == vvp_object_inode(obj)); - vvp_write_complete(cl2vvp(obj), cl2vvp_page(slice)); - /* Drop the reference count held in vvp_page_init */ refc = atomic_dec_return(&page->cp_ref); LASSERTF(refc >= 1, "page = %p, refc = %d\n", page, refc); - ClearPageUptodate(vmpage); ClearPagePrivate(vmpage); vmpage->private = 0; /* @@ -221,8 +218,6 @@ static int vvp_page_prep_write(const struct lu_env *env, if (!pg->cp_sync_io) set_page_writeback(vmpage); - vvp_write_pending(cl2vvp(slice->cpl_obj), cl2vvp_page(slice)); - return 0; } @@ -287,19 +282,6 @@ static void vvp_page_completion_write(const struct lu_env *env, CL_PAGE_HEADER(D_PAGE, env, pg, "completing WRITE with %d\n", ioret); - /* - * TODO: Actually it makes sense to add the page into oap pending - * list again and so that we don't need to take the page out from - * SoM write pending list, if we just meet a recoverable error, - * -ENOMEM, etc. - * To implement this, we just need to return a non zero value in - * ->cpo_completion method. The underlying transfer should be notified - * and then re-add the page into pending transfer queue. -jay - */ - - vpg->vpg_write_queued = 0; - vvp_write_complete(cl2vvp(slice->cpl_obj), vpg); - if (pg->cp_sync_io) { LASSERT(PageLocked(vmpage)); LASSERT(!PageWriteback(vmpage)); @@ -341,7 +323,6 @@ static int vvp_page_make_ready(const struct lu_env *env, LASSERT(pg->cp_state == CPS_CACHED); /* This actually clears the dirty bit in the radix tree. */ set_page_writeback(vmpage); - vvp_write_pending(cl2vvp(slice->cpl_obj), cl2vvp_page(slice)); CL_PAGE_HEADER(D_PAGE, env, pg, "readied\n"); } else if (pg->cp_state == CPS_PAGEOUT) { /* is it possible for osc_flush_async_page() to already @@ -357,20 +338,6 @@ static int vvp_page_make_ready(const struct lu_env *env, return result; } -static int vvp_page_is_under_lock(const struct lu_env *env, - const struct cl_page_slice *slice, - struct cl_io *io, pgoff_t *max_index) -{ - if (io->ci_type == CIT_READ || io->ci_type == CIT_WRITE || - io->ci_type == CIT_FAULT) { - struct vvp_io *vio = vvp_env_io(env); - - if (unlikely(vio->vui_fd->fd_flags & LL_FILE_GROUP_LOCKED)) - *max_index = CL_PAGE_EOF; - } - return 0; -} - static int vvp_page_print(const struct lu_env *env, const struct cl_page_slice *slice, void *cookie, lu_printer_t printer) @@ -378,9 +345,8 @@ static int vvp_page_print(const struct lu_env *env, struct vvp_page *vpg = cl2vvp_page(slice); struct page *vmpage = vpg->vpg_page; - (*printer)(env, cookie, LUSTRE_VVP_NAME "-page@%p(%d:%d:%d) vm@%p ", - vpg, vpg->vpg_defer_uptodate, vpg->vpg_ra_used, - vpg->vpg_write_queued, vmpage); + (*printer)(env, cookie, LUSTRE_VVP_NAME "-page@%p(%d:%d) vm@%p ", + vpg, vpg->vpg_defer_uptodate, vpg->vpg_ra_used, vmpage); if (vmpage) { (*printer)(env, cookie, "%lx %d:%d %lx %lu %slru", (long)vmpage->flags, page_count(vmpage), @@ -416,7 +382,6 @@ static const struct cl_page_operations vvp_page_ops = { .cpo_is_vmlocked = vvp_page_is_vmlocked, .cpo_fini = vvp_page_fini, .cpo_print = vvp_page_print, - .cpo_is_under_lock = vvp_page_is_under_lock, .io = { [CRT_READ] = { .cpo_prep = vvp_page_prep_read, @@ -515,7 +480,6 @@ static const struct cl_page_operations vvp_transient_page_ops = { .cpo_fini = vvp_transient_page_fini, .cpo_is_vmlocked = vvp_transient_page_is_vmlocked, .cpo_print = vvp_page_print, - .cpo_is_under_lock = vvp_page_is_under_lock, .io = { [CRT_READ] = { .cpo_prep = vvp_transient_page_prep, @@ -539,7 +503,6 @@ int vvp_page_init(const struct lu_env *env, struct cl_object *obj, vpg->vpg_page = vmpage; get_page(vmpage); - INIT_LIST_HEAD(&vpg->vpg_pending_linkage); if (page->cp_type == CPT_CACHEABLE) { /* in cache, decref in vvp_page_delete */ atomic_inc(&page->cp_ref); diff --git a/drivers/staging/lustre/lustre/llite/vvp_req.c b/drivers/staging/lustre/lustre/llite/vvp_req.c deleted file mode 100644 index e3f4c790d646..000000000000 --- a/drivers/staging/lustre/lustre/llite/vvp_req.c +++ /dev/null @@ -1,122 +0,0 @@ -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2011, 2014, Intel Corporation. - */ - -#define DEBUG_SUBSYSTEM S_LLITE - -#include "../include/lustre/lustre_idl.h" -#include "../include/cl_object.h" -#include "../include/obd.h" -#include "../include/obd_support.h" -#include "llite_internal.h" -#include "vvp_internal.h" - -static inline struct vvp_req *cl2vvp_req(const struct cl_req_slice *slice) -{ - return container_of0(slice, struct vvp_req, vrq_cl); -} - -/** - * Implementation of struct cl_req_operations::cro_attr_set() for VVP - * layer. VVP is responsible for - * - * - o_[mac]time - * - * - o_mode - * - * - o_parent_seq - * - * - o_[ug]id - * - * - o_parent_oid - * - * - o_parent_ver - * - * - o_ioepoch, - * - */ -static void vvp_req_attr_set(const struct lu_env *env, - const struct cl_req_slice *slice, - const struct cl_object *obj, - struct cl_req_attr *attr, u64 flags) -{ - struct inode *inode; - struct obdo *oa; - u32 valid_flags; - - oa = attr->cra_oa; - inode = vvp_object_inode(obj); - valid_flags = OBD_MD_FLTYPE; - - if (slice->crs_req->crq_type == CRT_WRITE) { - if (flags & OBD_MD_FLEPOCH) { - oa->o_valid |= OBD_MD_FLEPOCH; - oa->o_ioepoch = ll_i2info(inode)->lli_ioepoch; - valid_flags |= OBD_MD_FLMTIME | OBD_MD_FLCTIME | - OBD_MD_FLUID | OBD_MD_FLGID; - } - } - obdo_from_inode(oa, inode, valid_flags & flags); - obdo_set_parent_fid(oa, &ll_i2info(inode)->lli_fid); - if (OBD_FAIL_CHECK(OBD_FAIL_LFSCK_INVALID_PFID)) - oa->o_parent_oid++; - memcpy(attr->cra_jobid, ll_i2info(inode)->lli_jobid, - LUSTRE_JOBID_SIZE); -} - -static void vvp_req_completion(const struct lu_env *env, - const struct cl_req_slice *slice, int ioret) -{ - struct vvp_req *vrq; - - if (ioret > 0) - cl_stats_tally(slice->crs_dev, slice->crs_req->crq_type, ioret); - - vrq = cl2vvp_req(slice); - kmem_cache_free(vvp_req_kmem, vrq); -} - -static const struct cl_req_operations vvp_req_ops = { - .cro_attr_set = vvp_req_attr_set, - .cro_completion = vvp_req_completion -}; - -int vvp_req_init(const struct lu_env *env, struct cl_device *dev, - struct cl_req *req) -{ - struct vvp_req *vrq; - int result; - - vrq = kmem_cache_zalloc(vvp_req_kmem, GFP_NOFS); - if (vrq) { - cl_req_slice_add(req, &vrq->vrq_cl, dev, &vvp_req_ops); - result = 0; - } else { - result = -ENOMEM; - } - return result; -} diff --git a/drivers/staging/lustre/lustre/llite/xattr.c b/drivers/staging/lustre/lustre/llite/xattr.c index e070adb7a3cc..7a848ebc57c1 100644 --- a/drivers/staging/lustre/lustre/llite/xattr.c +++ b/drivers/staging/lustre/lustre/llite/xattr.c @@ -44,48 +44,39 @@ #include "llite_internal.h" -static -int get_xattr_type(const char *name) +const struct xattr_handler *get_xattr_type(const char *name) { - if (!strcmp(name, XATTR_NAME_POSIX_ACL_ACCESS)) - return XATTR_ACL_ACCESS_T; + int i = 0; - if (!strcmp(name, XATTR_NAME_POSIX_ACL_DEFAULT)) - return XATTR_ACL_DEFAULT_T; + while (ll_xattr_handlers[i]) { + size_t len = strlen(ll_xattr_handlers[i]->prefix); - if (!strncmp(name, XATTR_USER_PREFIX, - sizeof(XATTR_USER_PREFIX) - 1)) - return XATTR_USER_T; - - if (!strncmp(name, XATTR_TRUSTED_PREFIX, - sizeof(XATTR_TRUSTED_PREFIX) - 1)) - return XATTR_TRUSTED_T; - - if (!strncmp(name, XATTR_SECURITY_PREFIX, - sizeof(XATTR_SECURITY_PREFIX) - 1)) - return XATTR_SECURITY_T; - - if (!strncmp(name, XATTR_LUSTRE_PREFIX, - sizeof(XATTR_LUSTRE_PREFIX) - 1)) - return XATTR_LUSTRE_T; - - return XATTR_OTHER_T; + if (!strncmp(ll_xattr_handlers[i]->prefix, name, len)) + return ll_xattr_handlers[i]; + i++; + } + return NULL; } -static -int xattr_type_filter(struct ll_sb_info *sbi, int xattr_type) +static int xattr_type_filter(struct ll_sb_info *sbi, + const struct xattr_handler *handler) { - if ((xattr_type == XATTR_ACL_ACCESS_T || - xattr_type == XATTR_ACL_DEFAULT_T) && + /* No handler means XATTR_OTHER_T */ + if (!handler) + return -EOPNOTSUPP; + + if ((handler->flags == XATTR_ACL_ACCESS_T || + handler->flags == XATTR_ACL_DEFAULT_T) && !(sbi->ll_flags & LL_SBI_ACL)) return -EOPNOTSUPP; - if (xattr_type == XATTR_USER_T && !(sbi->ll_flags & LL_SBI_USER_XATTR)) + if (handler->flags == XATTR_USER_T && + !(sbi->ll_flags & LL_SBI_USER_XATTR)) return -EOPNOTSUPP; - if (xattr_type == XATTR_TRUSTED_T && !capable(CFS_CAP_SYS_ADMIN)) + + if (handler->flags == XATTR_TRUSTED_T && + !capable(CFS_CAP_SYS_ADMIN)) return -EPERM; - if (xattr_type == XATTR_OTHER_T) - return -EOPNOTSUPP; return 0; } @@ -111,7 +102,7 @@ ll_xattr_set_common(const struct xattr_handler *handler, valid = OBD_MD_FLXATTR; } - rc = xattr_type_filter(sbi, handler->flags); + rc = xattr_type_filter(sbi, handler); if (rc) return rc; @@ -121,8 +112,9 @@ ll_xattr_set_common(const struct xattr_handler *handler, return -EPERM; /* b10667: ignore lustre special xattr for now */ - if ((handler->flags == XATTR_TRUSTED_T && !strcmp(name, "lov")) || - (handler->flags == XATTR_LUSTRE_T && !strcmp(name, "lov"))) + if (!strcmp(name, "hsm") || + ((handler->flags == XATTR_TRUSTED_T && !strcmp(name, "lov")) || + (handler->flags == XATTR_LUSTRE_T && !strcmp(name, "lov")))) return 0; /* b15587: ignore security.capability xattr for now */ @@ -135,6 +127,11 @@ ll_xattr_set_common(const struct xattr_handler *handler, strcmp(name, "selinux") == 0) return -EOPNOTSUPP; + /*FIXME: enable IMA when the conditions are ready */ + if (handler->flags == XATTR_SECURITY_T && + (!strcmp(name, "ima") || !strcmp(name, "evm"))) + return -EOPNOTSUPP; + sprintf(fullname, "%s%s\n", handler->prefix, name); rc = md_setxattr(sbi->ll_md_exp, ll_inode2fid(inode), valid, fullname, pv, size, 0, flags, @@ -151,6 +148,37 @@ ll_xattr_set_common(const struct xattr_handler *handler, return 0; } +static int get_hsm_state(struct inode *inode, u32 *hus_states) +{ + struct md_op_data *op_data; + struct hsm_user_state *hus; + int rc; + + hus = kzalloc(sizeof(*hus), GFP_NOFS); + if (!hus) + return -ENOMEM; + + op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0, + LUSTRE_OPC_ANY, hus); + if (!IS_ERR(op_data)) { + rc = obd_iocontrol(LL_IOC_HSM_STATE_GET, ll_i2mdexp(inode), + sizeof(*op_data), op_data, NULL); + if (!rc) + *hus_states = hus->hus_states; + else + CDEBUG(D_VFSTRACE, "obd_iocontrol failed. rc = %d\n", + rc); + + ll_finish_md_op_data(op_data); + } else { + rc = PTR_ERR(op_data); + CDEBUG(D_VFSTRACE, "Could not prepare the opdata. rc = %d\n", + rc); + } + kfree(hus); + return rc; +} + static int ll_xattr_set(const struct xattr_handler *handler, struct dentry *dentry, struct inode *inode, const char *name, const void *value, size_t size, @@ -187,6 +215,31 @@ static int ll_xattr_set(const struct xattr_handler *handler, if (lump && lump->lmm_stripe_offset == 0) lump->lmm_stripe_offset = -1; + /* Avoid anyone directly setting the RELEASED flag. */ + if (lump && (lump->lmm_pattern & LOV_PATTERN_F_RELEASED)) { + /* Only if we have a released flag check if the file + * was indeed archived. + */ + u32 state = HS_NONE; + + rc = get_hsm_state(inode, &state); + if (rc) + return rc; + + if (!(state & HS_ARCHIVED)) { + CDEBUG(D_VFSTRACE, + "hus_states state = %x, pattern = %x\n", + state, lump->lmm_pattern); + /* + * Here the state is: real file is not + * archived but user is requesting to set + * the RELEASED flag so we mask off the + * released flag from the request + */ + lump->lmm_pattern ^= LOV_PATTERN_F_RELEASED; + } + } + if (lump && S_ISREG(inode->i_mode)) { __u64 it_flags = FMODE_WRITE; int lum_size; @@ -225,7 +278,8 @@ ll_xattr_list(struct inode *inode, const char *name, int type, void *buffer, void *xdata; int rc; - if (sbi->ll_xattr_cache_enabled && type != XATTR_ACL_ACCESS_T) { + if (sbi->ll_xattr_cache_enabled && type != XATTR_ACL_ACCESS_T && + (type != XATTR_SECURITY_T || strcmp(name, "security.selinux"))) { rc = ll_xattr_cache_get(inode, name, buffer, size, valid); if (rc == -EAGAIN) goto getxattr_nocache; @@ -313,7 +367,7 @@ static int ll_xattr_get_common(const struct xattr_handler *handler, ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_GETXATTR, 1); - rc = xattr_type_filter(sbi, handler->flags); + rc = xattr_type_filter(sbi, handler); if (rc) return rc; @@ -353,80 +407,99 @@ static int ll_xattr_get_common(const struct xattr_handler *handler, OBD_MD_FLXATTR); } -static int ll_xattr_get(const struct xattr_handler *handler, - struct dentry *dentry, struct inode *inode, - const char *name, void *buffer, size_t size) +static ssize_t ll_getxattr_lov(struct inode *inode, void *buf, size_t buf_size) { - LASSERT(inode); - LASSERT(name); + ssize_t rc; - CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p), xattr %s\n", - PFID(ll_inode2fid(inode)), inode, name); + if (S_ISREG(inode->i_mode)) { + struct cl_object *obj = ll_i2info(inode)->lli_clob; + struct cl_layout cl = { + .cl_buf.lb_buf = buf, + .cl_buf.lb_len = buf_size, + }; + struct lu_env *env; + int refcheck; + + if (!obj) + return -ENODATA; - if (!strcmp(name, "lov")) { - struct lov_stripe_md *lsm; - struct lov_user_md *lump; - struct lov_mds_md *lmm = NULL; - struct ptlrpc_request *request = NULL; - int rc = 0, lmmsize = 0; + env = cl_env_get(&refcheck); + if (IS_ERR(env)) + return PTR_ERR(env); - ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_GETXATTR, 1); - - if (!S_ISREG(inode->i_mode) && !S_ISDIR(inode->i_mode)) - return -ENODATA; + rc = cl_object_layout_get(env, obj, &cl); + if (rc < 0) + goto out_env; - lsm = ccc_inode_lsm_get(inode); - if (!lsm) { - if (S_ISDIR(inode->i_mode)) { - rc = ll_dir_getstripe(inode, (void **)&lmm, - &lmmsize, &request, 0); - } else { - rc = -ENODATA; - } - } else { - /* LSM is present already after lookup/getattr call. - * we need to grab layout lock once it is implemented - */ - rc = obd_packmd(ll_i2dtexp(inode), &lmm, lsm); - lmmsize = rc; + if (!cl.cl_size) { + rc = -ENODATA; + goto out_env; } - ccc_inode_lsm_put(inode, lsm); + rc = cl.cl_size; + + if (!buf_size) + goto out_env; + + LASSERT(buf && rc <= buf_size); + + /* + * Do not return layout gen for getxattr() since + * otherwise it would confuse tar --xattr by + * recognizing layout gen as stripe offset when the + * file is restored. See LU-2809. + */ + ((struct lov_mds_md *)buf)->lmm_layout_gen = 0; +out_env: + cl_env_put(env, &refcheck); + + return rc; + } else if (S_ISDIR(inode->i_mode)) { + struct ptlrpc_request *req = NULL; + struct lov_mds_md *lmm = NULL; + int lmm_size = 0; + + rc = ll_dir_getstripe(inode, (void **)&lmm, &lmm_size, + &req, 0); if (rc < 0) - goto out; + goto out_req; - if (size == 0) { - /* used to call ll_get_max_mdsize() forward to get - * the maximum buffer size, while some apps (such as - * rsync 3.0.x) care much about the exact xattr value - * size - */ - rc = lmmsize; - goto out; + if (!buf_size) { + rc = lmm_size; + goto out_req; } - if (size < lmmsize) { - CERROR("server bug: replied size %d > %d for %pd (%s)\n", - lmmsize, (int)size, dentry, name); + if (buf_size < lmm_size) { rc = -ERANGE; - goto out; + goto out_req; } - lump = buffer; - memcpy(lump, lmm, lmmsize); - /* do not return layout gen for getxattr otherwise it would - * confuse tar --xattr by recognizing layout gen as stripe - * offset when the file is restored. See LU-2809. - */ - lump->lmm_layout_gen = 0; + memcpy(buf, lmm, lmm_size); + rc = lmm_size; +out_req: + if (req) + ptlrpc_req_finished(req); - rc = lmmsize; -out: - if (request) - ptlrpc_req_finished(request); - else if (lmm) - obd_free_diskmd(ll_i2dtexp(inode), &lmm); return rc; + } else { + return -ENODATA; + } +} + +static int ll_xattr_get(const struct xattr_handler *handler, + struct dentry *dentry, struct inode *inode, + const char *name, void *buffer, size_t size) +{ + LASSERT(inode); + LASSERT(name); + + CDEBUG(D_VFSTRACE, "VFS Op:inode=" DFID "(%p), xattr %s\n", + PFID(ll_inode2fid(inode)), inode, name); + + if (!strcmp(name, "lov")) { + ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_GETXATTR, 1); + + return ll_getxattr_lov(inode, buffer, size); } return ll_xattr_get_common(handler, dentry, inode, name, buffer, size); @@ -435,10 +508,10 @@ out: ssize_t ll_listxattr(struct dentry *dentry, char *buffer, size_t size) { struct inode *inode = d_inode(dentry); - int rc = 0, rc2 = 0; - struct lov_mds_md *lmm = NULL; - struct ptlrpc_request *request = NULL; - int lmmsize; + struct ll_sb_info *sbi = ll_i2sbi(inode); + char *xattr_name; + ssize_t rc, rc2; + size_t len, rem; LASSERT(inode); @@ -450,65 +523,48 @@ ssize_t ll_listxattr(struct dentry *dentry, char *buffer, size_t size) rc = ll_xattr_list(inode, NULL, XATTR_OTHER_T, buffer, size, OBD_MD_FLXATTRLS); if (rc < 0) - goto out; - - if (buffer) { - struct ll_sb_info *sbi = ll_i2sbi(inode); - char *xattr_name = buffer; - int xlen, rem = rc; - - while (rem > 0) { - xlen = strnlen(xattr_name, rem - 1) + 1; - rem -= xlen; - if (xattr_type_filter(sbi, - get_xattr_type(xattr_name)) == 0) { - /* skip OK xattr type - * leave it in buffer - */ - xattr_name += xlen; - continue; - } - /* move up remaining xattrs in buffer - * removing the xattr that is not OK - */ - memmove(xattr_name, xattr_name + xlen, rem); - rc -= xlen; + return rc; + /* + * If we're being called to get the size of the xattr list + * (buf_size == 0) then just assume that a lustre.lov xattr + * exists. + */ + if (!size) + return rc + sizeof(XATTR_LUSTRE_LOV); + + xattr_name = buffer; + rem = rc; + + while (rem > 0) { + len = strnlen(xattr_name, rem - 1) + 1; + rem -= len; + if (!xattr_type_filter(sbi, get_xattr_type(xattr_name))) { + /* Skip OK xattr type leave it in buffer */ + xattr_name += len; + continue; } - } - if (S_ISREG(inode->i_mode)) { - if (!ll_i2info(inode)->lli_has_smd) - rc2 = -1; - } else if (S_ISDIR(inode->i_mode)) { - rc2 = ll_dir_getstripe(inode, (void **)&lmm, &lmmsize, - &request, 0); + + /* + * Move up remaining xattrs in buffer + * removing the xattr that is not OK + */ + memmove(xattr_name, xattr_name + len, rem); + rc -= len; } - if (rc2 < 0) { - rc2 = 0; - goto out; - } else if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode)) { - const int prefix_len = sizeof(XATTR_LUSTRE_PREFIX) - 1; - const size_t name_len = sizeof("lov") - 1; - const size_t total_len = prefix_len + name_len + 1; - - if (((rc + total_len) > size) && buffer) { - ptlrpc_req_finished(request); - return -ERANGE; - } + rc2 = ll_getxattr_lov(inode, NULL, 0); + if (rc2 == -ENODATA) + return rc; - if (buffer) { - buffer += rc; - memcpy(buffer, XATTR_LUSTRE_PREFIX, prefix_len); - memcpy(buffer + prefix_len, "lov", name_len); - buffer[prefix_len + name_len] = '\0'; - } - rc2 = total_len; - } -out: - ptlrpc_req_finished(request); - rc = rc + rc2; + if (rc2 < 0) + return rc2; - return rc; + if (size < rc + sizeof(XATTR_LUSTRE_LOV)) + return -ERANGE; + + memcpy(buffer + rc, XATTR_LUSTRE_LOV, sizeof(XATTR_LUSTRE_LOV)); + + return rc + sizeof(XATTR_LUSTRE_LOV); } static const struct xattr_handler ll_user_xattr_handler = { diff --git a/drivers/staging/lustre/lustre/llite/xattr_cache.c b/drivers/staging/lustre/lustre/llite/xattr_cache.c index 50a19a40bd4e..38f75f6aa887 100644 --- a/drivers/staging/lustre/lustre/llite/xattr_cache.c +++ b/drivers/staging/lustre/lustre/llite/xattr_cache.c @@ -26,8 +26,8 @@ struct ll_xattr_entry { */ char *xe_name; /* xattr name, \0-terminated */ char *xe_value; /* xattr value */ - unsigned xe_namelen; /* strlen(xe_name) + 1 */ - unsigned xe_vallen; /* xattr value length */ + unsigned int xe_namelen; /* strlen(xe_name) + 1 */ + unsigned int xe_vallen; /* xattr value length */ }; static struct kmem_cache *xattr_kmem; @@ -60,7 +60,7 @@ void ll_xattr_fini(void) static void ll_xattr_cache_init(struct ll_inode_info *lli) { INIT_LIST_HEAD(&lli->lli_xattrs); - lli->lli_flags |= LLIF_XATTR_CACHE; + set_bit(LLIF_XATTR_CACHE, &lli->lli_flags); } /** @@ -104,7 +104,7 @@ static int ll_xattr_cache_find(struct list_head *cache, static int ll_xattr_cache_add(struct list_head *cache, const char *xattr_name, const char *xattr_val, - unsigned xattr_val_len) + unsigned int xattr_val_len) { struct ll_xattr_entry *xattr; @@ -216,7 +216,7 @@ static int ll_xattr_cache_list(struct list_head *cache, */ static int ll_xattr_cache_valid(struct ll_inode_info *lli) { - return !!(lli->lli_flags & LLIF_XATTR_CACHE); + return test_bit(LLIF_XATTR_CACHE, &lli->lli_flags); } /** @@ -233,7 +233,8 @@ static int ll_xattr_cache_destroy_locked(struct ll_inode_info *lli) while (ll_xattr_cache_del(&lli->lli_xattrs, NULL) == 0) ; /* empty loop */ - lli->lli_flags &= ~LLIF_XATTR_CACHE; + + clear_bit(LLIF_XATTR_CACHE, &lli->lli_flags); return 0; } @@ -415,6 +416,10 @@ static int ll_xattr_cache_refill(struct inode *inode, struct lookup_intent *oit) CDEBUG(D_CACHE, "not caching %s\n", XATTR_NAME_ACL_ACCESS); rc = 0; + } else if (!strcmp(xdata, "security.selinux")) { + /* Filter out security.selinux, it is cached in slab */ + CDEBUG(D_CACHE, "not caching security.selinux\n"); + rc = 0; } else { rc = ll_xattr_cache_add(&lli->lli_xattrs, xdata, xval, *xsizes); diff --git a/drivers/staging/lustre/lustre/llite/xattr_security.c b/drivers/staging/lustre/lustre/llite/xattr_security.c new file mode 100644 index 000000000000..d61d8018001a --- /dev/null +++ b/drivers/staging/lustre/lustre/llite/xattr_security.c @@ -0,0 +1,88 @@ +/* + * GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see http://www.gnu.org/licenses + * + * GPL HEADER END + */ + +/* + * Copyright (c) 2014 Bull SAS + * Author: Sebastien Buisson sebastien.buisson@bull.net + */ + +/* + * lustre/llite/xattr_security.c + * Handler for storing security labels as extended attributes. + */ +#include <linux/security.h> +#include <linux/xattr.h> +#include "llite_internal.h" + +/** + * A helper function for ll_security_inode_init_security() + * that takes care of setting xattrs + * + * Get security context of @inode from @xattr_array, + * and put it in 'security.xxx' xattr of dentry + * stored in @fs_info. + * + * \retval 0 success + * \retval -ENOMEM if no memory could be allocated for xattr name + * \retval < 0 failure to set xattr + */ +static int +ll_initxattrs(struct inode *inode, const struct xattr *xattr_array, + void *fs_info) +{ + const struct xattr_handler *handler; + struct dentry *dentry = fs_info; + const struct xattr *xattr; + int err = 0; + + handler = get_xattr_type(XATTR_SECURITY_PREFIX); + if (!handler) + return -ENXIO; + + for (xattr = xattr_array; xattr->name; xattr++) { + err = handler->set(handler, dentry, inode, xattr->name, + xattr->value, xattr->value_len, + XATTR_CREATE); + if (err < 0) + break; + } + return err; +} + +/** + * Initializes security context + * + * Get security context of @inode in @dir, + * and put it in 'security.xxx' xattr of @dentry. + * + * \retval 0 success, or SELinux is disabled + * \retval -ENOMEM if no memory could be allocated for xattr name + * \retval < 0 failure to get security context or set xattr + */ +int +ll_init_security(struct dentry *dentry, struct inode *inode, struct inode *dir) +{ + if (!selinux_is_enabled()) + return 0; + + return security_inode_init_security(inode, dir, NULL, + &ll_initxattrs, dentry); +} diff --git a/drivers/staging/lustre/lustre/lmv/lmv_intent.c b/drivers/staging/lustre/lustre/lmv/lmv_intent.c index 9f4e826bb0af..b1071cf5a70c 100644 --- a/drivers/staging/lustre/lustre/lmv/lmv_intent.c +++ b/drivers/staging/lustre/lustre/lmv/lmv_intent.c @@ -223,7 +223,14 @@ int lmv_revalidate_slaves(struct obd_export *exp, LASSERT(body); if (unlikely(body->mbo_nlink < 2)) { - CERROR("%s: nlink %d < 2 corrupt stripe %d "DFID":" DFID"\n", + /* + * If this is bad stripe, most likely due + * to the race between close(unlink) and + * getattr, let's return -EONENT, so llite + * will revalidate the dentry see + * ll_inode_revalidate_fini() + */ + CDEBUG(D_INODE, "%s: nlink %d < 2 corrupt stripe %d "DFID":" DFID"\n", obd->obd_name, body->mbo_nlink, i, PFID(&lsm->lsm_md_oinfo[i].lmo_fid), PFID(&lsm->lsm_md_oinfo[0].lmo_fid)); @@ -233,7 +240,7 @@ int lmv_revalidate_slaves(struct obd_export *exp, it.it_lock_mode = 0; } - rc = -EIO; + rc = -ENOENT; goto cleanup; } diff --git a/drivers/staging/lustre/lustre/lmv/lmv_internal.h b/drivers/staging/lustre/lustre/lmv/lmv_internal.h index 52b03745ac19..12731a17e263 100644 --- a/drivers/staging/lustre/lustre/lmv/lmv_internal.h +++ b/drivers/staging/lustre/lustre/lmv/lmv_internal.h @@ -54,9 +54,6 @@ int __lmv_fid_alloc(struct lmv_obd *lmv, struct lu_fid *fid, u32 mds); int lmv_fid_alloc(const struct lu_env *env, struct obd_export *exp, struct lu_fid *fid, struct md_op_data *op_data); -int lmv_unpack_md(struct obd_export *exp, struct lmv_stripe_md **lsmp, - const union lmv_mds_md *lmm, int stripe_count); - int lmv_revalidate_slaves(struct obd_export *exp, const struct lmv_stripe_md *lsm, ldlm_blocking_callback cb_blocking, diff --git a/drivers/staging/lustre/lustre/lmv/lmv_obd.c b/drivers/staging/lustre/lustre/lmv/lmv_obd.c index 7dbb2b946acf..f124f6c05ea4 100644 --- a/drivers/staging/lustre/lustre/lmv/lmv_obd.c +++ b/drivers/staging/lustre/lustre/lmv/lmv_obd.c @@ -62,6 +62,7 @@ static void lmv_activate_target(struct lmv_obd *lmv, tgt->ltd_active = activate; lmv->desc.ld_active_tgt_count += (activate ? 1 : -1); + tgt->ltd_exp->exp_obd->obd_inactive = !activate; } /** @@ -245,8 +246,7 @@ static int lmv_connect(const struct lu_env *env, return rc; } -static int lmv_init_ea_size(struct obd_export *exp, u32 easize, u32 def_easize, - u32 cookiesize, u32 def_cookiesize) +static int lmv_init_ea_size(struct obd_export *exp, u32 easize, u32 def_easize) { struct obd_device *obd = exp->exp_obd; struct lmv_obd *lmv = &obd->u.lmv; @@ -262,14 +262,7 @@ static int lmv_init_ea_size(struct obd_export *exp, u32 easize, u32 def_easize, lmv->max_def_easize = def_easize; change = 1; } - if (lmv->max_cookiesize < cookiesize) { - lmv->max_cookiesize = cookiesize; - change = 1; - } - if (lmv->max_def_cookiesize < def_cookiesize) { - lmv->max_def_cookiesize = def_cookiesize; - change = 1; - } + if (change == 0) return 0; @@ -284,8 +277,7 @@ static int lmv_init_ea_size(struct obd_export *exp, u32 easize, u32 def_easize, continue; } - rc = md_init_ea_size(tgt->ltd_exp, easize, def_easize, - cookiesize, def_cookiesize); + rc = md_init_ea_size(tgt->ltd_exp, easize, def_easize); if (rc) { CERROR("%s: obd_init_ea_size() failed on MDT target %d: rc = %d\n", obd->obd_name, i, rc); @@ -368,8 +360,7 @@ static int lmv_connect_mdc(struct obd_device *obd, struct lmv_tgt_desc *tgt) tgt->ltd_exp = mdc_exp; lmv->desc.ld_active_tgt_count++; - md_init_ea_size(tgt->ltd_exp, lmv->max_easize, lmv->max_def_easize, - lmv->max_cookiesize, lmv->max_def_cookiesize); + md_init_ea_size(tgt->ltd_exp, lmv->max_easize, lmv->max_def_easize); CDEBUG(D_CONFIG, "Connected to %s(%s) successfully (%d)\n", mdc_obd->obd_name, mdc_obd->obd_uuid.uuid, @@ -396,27 +387,23 @@ static int lmv_add_target(struct obd_device *obd, struct obd_uuid *uuidp, __u32 index, int gen) { struct lmv_obd *lmv = &obd->u.lmv; + struct obd_device *mdc_obd; struct lmv_tgt_desc *tgt; int orig_tgt_count = 0; int rc = 0; CDEBUG(D_CONFIG, "Target uuid: %s. index %d\n", uuidp->uuid, index); - mutex_lock(&lmv->lmv_init_mutex); - - if (lmv->desc.ld_tgt_count == 0) { - struct obd_device *mdc_obd; - - mdc_obd = class_find_client_obd(uuidp, LUSTRE_MDC_NAME, - &obd->obd_uuid); - if (!mdc_obd) { - mutex_unlock(&lmv->lmv_init_mutex); - CERROR("%s: Target %s not attached: rc = %d\n", - obd->obd_name, uuidp->uuid, -EINVAL); - return -EINVAL; - } + mdc_obd = class_find_client_obd(uuidp, LUSTRE_MDC_NAME, + &obd->obd_uuid); + if (!mdc_obd) { + CERROR("%s: Target %s not attached: rc = %d\n", + obd->obd_name, uuidp->uuid, -EINVAL); + return -EINVAL; } + mutex_lock(&lmv->lmv_init_mutex); + if ((index < lmv->tgts_size) && lmv->tgts[index]) { tgt = lmv->tgts[index]; CERROR("%s: UUID %s already assigned at LOV target index %d: rc = %d\n", @@ -472,22 +459,27 @@ static int lmv_add_target(struct obd_device *obd, struct obd_uuid *uuidp, lmv->desc.ld_tgt_count = index + 1; } - if (lmv->connected) { - rc = lmv_connect_mdc(obd, tgt); - if (rc) { - spin_lock(&lmv->lmv_lock); - if (lmv->desc.ld_tgt_count == index + 1) - lmv->desc.ld_tgt_count = orig_tgt_count; - memset(tgt, 0, sizeof(*tgt)); - spin_unlock(&lmv->lmv_lock); - } else { - int easize = sizeof(struct lmv_stripe_md) + - lmv->desc.ld_tgt_count * sizeof(struct lu_fid); - lmv_init_ea_size(obd->obd_self_export, easize, 0, 0, 0); - } + if (!lmv->connected) { + /* lmv_check_connect() will connect this target. */ + mutex_unlock(&lmv->lmv_init_mutex); + return rc; } + /* Otherwise let's connect it ourselves */ mutex_unlock(&lmv->lmv_init_mutex); + rc = lmv_connect_mdc(obd, tgt); + if (rc) { + spin_lock(&lmv->lmv_lock); + if (lmv->desc.ld_tgt_count == index + 1) + lmv->desc.ld_tgt_count = orig_tgt_count; + memset(tgt, 0, sizeof(*tgt)); + spin_unlock(&lmv->lmv_lock); + } else { + int easize = sizeof(struct lmv_stripe_md) + + lmv->desc.ld_tgt_count * sizeof(struct lu_fid); + lmv_init_ea_size(obd->obd_self_export, easize, 0); + } + return rc; } @@ -538,7 +530,7 @@ int lmv_check_connect(struct obd_device *obd) class_export_put(lmv->exp); lmv->connected = 1; easize = lmv_mds_md_size(lmv->desc.ld_tgt_count, LMV_MAGIC); - lmv_init_ea_size(obd->obd_self_export, easize, 0, 0, 0); + lmv_init_ea_size(obd->obd_self_export, easize, 0); mutex_unlock(&lmv->lmv_init_mutex); return 0; @@ -1128,9 +1120,7 @@ static int lmv_iocontrol(unsigned int cmd, struct obd_export *exp, mdc_obd = class_exp2obd(tgt->ltd_exp); mdc_obd->obd_force = obddev->obd_force; err = obd_iocontrol(cmd, tgt->ltd_exp, len, karg, uarg); - if (err == -ENODATA && cmd == OBD_IOC_POLL_QUOTACHECK) { - return err; - } else if (err) { + if (err) { if (tgt->ltd_active) { CERROR("error: iocontrol MDC %s on MDTidx %d cmd %x: err = %d\n", tgt->ltd_uuid.uuid, i, cmd, err); @@ -1284,7 +1274,6 @@ static int lmv_setup(struct obd_device *obd, struct lustre_cfg *lcfg) obd_str2uuid(&lmv->desc.ld_uuid, desc->ld_uuid.uuid); lmv->desc.ld_tgt_count = 0; lmv->desc.ld_active_tgt_count = 0; - lmv->max_cookiesize = 0; lmv->max_def_easize = 0; lmv->max_easize = 0; lmv->lmv_placement = PLACEMENT_CHAR_POLICY; @@ -1630,27 +1619,28 @@ lmv_locate_mds(struct lmv_obd *lmv, struct md_op_data *op_data, * ct_restore(). */ if (op_data->op_bias & MDS_CREATE_VOLATILE && - (int)op_data->op_mds != -1 && lsm) { + (int)op_data->op_mds != -1) { int i; tgt = lmv_get_target(lmv, op_data->op_mds, NULL); if (IS_ERR(tgt)) return tgt; - /* refill the right parent fid */ - for (i = 0; i < lsm->lsm_md_stripe_count; i++) { - struct lmv_oinfo *oinfo; + if (lsm) { + /* refill the right parent fid */ + for (i = 0; i < lsm->lsm_md_stripe_count; i++) { + struct lmv_oinfo *oinfo; - oinfo = &lsm->lsm_md_oinfo[i]; - if (oinfo->lmo_mds == op_data->op_mds) { - *fid = oinfo->lmo_fid; - break; + oinfo = &lsm->lsm_md_oinfo[i]; + if (oinfo->lmo_mds == op_data->op_mds) { + *fid = oinfo->lmo_fid; + break; + } } - } - /* Hmm, can not find the stripe by mdt_index(op_mds) */ - if (i == lsm->lsm_md_stripe_count) - tgt = ERR_PTR(-EINVAL); + if (i == lsm->lsm_md_stripe_count) + *fid = lsm->lsm_md_oinfo[0].lmo_fid; + } return tgt; } @@ -1728,30 +1718,9 @@ static int lmv_create(struct obd_export *exp, struct md_op_data *op_data, return rc; } -static int lmv_done_writing(struct obd_export *exp, - struct md_op_data *op_data, - struct md_open_data *mod) -{ - struct obd_device *obd = exp->exp_obd; - struct lmv_obd *lmv = &obd->u.lmv; - struct lmv_tgt_desc *tgt; - int rc; - - rc = lmv_check_connect(obd); - if (rc) - return rc; - - tgt = lmv_find_target(lmv, &op_data->op_fid1); - if (IS_ERR(tgt)) - return PTR_ERR(tgt); - - rc = md_done_writing(tgt->ltd_exp, op_data, mod); - return rc; -} - static int lmv_enqueue(struct obd_export *exp, struct ldlm_enqueue_info *einfo, - const ldlm_policy_data_t *policy, + const union ldlm_policy_data *policy, struct lookup_intent *it, struct md_op_data *op_data, struct lustre_handle *lockh, __u64 extra_lock_flags) { @@ -1847,7 +1816,7 @@ static int lmv_early_cancel(struct obd_export *exp, struct lmv_tgt_desc *tgt, struct lu_fid *fid = md_op_data_fid(op_data, flag); struct obd_device *obd = exp->exp_obd; struct lmv_obd *lmv = &obd->u.lmv; - ldlm_policy_data_t policy = { {0} }; + union ldlm_policy_data policy = { { 0 } }; int rc = 0; if (!fid_is_sane(fid)) @@ -1937,7 +1906,10 @@ static int lmv_rename(struct obd_export *exp, struct md_op_data *op_data, { struct obd_device *obd = exp->exp_obd; struct lmv_obd *lmv = &obd->u.lmv; + struct obd_export *target_exp; struct lmv_tgt_desc *src_tgt; + struct lmv_tgt_desc *tgt_tgt; + struct mdt_body *body; int rc; LASSERT(oldlen != 0); @@ -1977,6 +1949,10 @@ static int lmv_rename(struct obd_export *exp, struct md_op_data *op_data, if (rc) return rc; src_tgt = lmv_find_target(lmv, &op_data->op_fid3); + if (IS_ERR(src_tgt)) + return PTR_ERR(src_tgt); + + target_exp = src_tgt->ltd_exp; } else { if (op_data->op_mea1) { struct lmv_stripe_md *lsm = op_data->op_mea1; @@ -1985,29 +1961,27 @@ static int lmv_rename(struct obd_export *exp, struct md_op_data *op_data, oldlen, &op_data->op_fid1, &op_data->op_mds); - if (IS_ERR(src_tgt)) - return PTR_ERR(src_tgt); } else { src_tgt = lmv_find_target(lmv, &op_data->op_fid1); - if (IS_ERR(src_tgt)) - return PTR_ERR(src_tgt); - - op_data->op_mds = src_tgt->ltd_idx; } + if (IS_ERR(src_tgt)) + return PTR_ERR(src_tgt); if (op_data->op_mea2) { struct lmv_stripe_md *lsm = op_data->op_mea2; - const struct lmv_oinfo *oinfo; - oinfo = lsm_name_to_stripe_info(lsm, new, newlen); - if (IS_ERR(oinfo)) - return PTR_ERR(oinfo); - - op_data->op_fid2 = oinfo->lmo_fid; + tgt_tgt = lmv_locate_target_for_name(lmv, lsm, new, + newlen, + &op_data->op_fid2, + &op_data->op_mds); + } else { + tgt_tgt = lmv_find_target(lmv, &op_data->op_fid2); } + if (IS_ERR(tgt_tgt)) + return PTR_ERR(tgt_tgt); + + target_exp = tgt_tgt->ltd_exp; } - if (IS_ERR(src_tgt)) - return PTR_ERR(src_tgt); /* * LOOKUP lock on src child (fid3) should also be cancelled for @@ -2048,26 +2022,56 @@ static int lmv_rename(struct obd_export *exp, struct md_op_data *op_data, return rc; } +retry_rename: /* * Cancel all the locks on tgt child (fid4). */ - if (fid_is_sane(&op_data->op_fid4)) + if (fid_is_sane(&op_data->op_fid4)) { + struct lmv_tgt_desc *tgt; + rc = lmv_early_cancel(exp, NULL, op_data, src_tgt->ltd_idx, LCK_EX, MDS_INODELOCK_FULL, MF_MDC_CANCEL_FID4); + if (rc) + return rc; + + tgt = lmv_find_target(lmv, &op_data->op_fid4); + if (IS_ERR(tgt)) + return PTR_ERR(tgt); - CDEBUG(D_INODE, DFID":m%d to "DFID"\n", PFID(&op_data->op_fid1), - op_data->op_mds, PFID(&op_data->op_fid2)); + /* + * Since the target child might be destroyed, and it might + * become orphan, and we can only check orphan on the local + * MDT right now, so we send rename request to the MDT where + * target child is located. If target child does not exist, + * then it will send the request to the target parent + */ + target_exp = tgt->ltd_exp; + } - rc = md_rename(src_tgt->ltd_exp, op_data, old, oldlen, - new, newlen, request); - return rc; + rc = md_rename(target_exp, op_data, old, oldlen, new, newlen, request); + if (rc && rc != -EREMOTE) + return rc; + + body = req_capsule_server_get(&(*request)->rq_pill, &RMF_MDT_BODY); + if (!body) + return -EPROTO; + + /* Not cross-ref case, just get out of here. */ + if (likely(!(body->mbo_valid & OBD_MD_MDS))) + return rc; + + CDEBUG(D_INODE, "%s: try rename to another MDT for " DFID "\n", + exp->exp_obd->obd_name, PFID(&body->mbo_fid1)); + + op_data->op_fid4 = body->mbo_fid1; + ptlrpc_req_finished(*request); + *request = NULL; + goto retry_rename; } static int lmv_setattr(struct obd_export *exp, struct md_op_data *op_data, - void *ea, size_t ealen, void *ea2, size_t ea2len, - struct ptlrpc_request **request, - struct md_open_data **mod) + void *ea, size_t ealen, struct ptlrpc_request **request) { struct obd_device *obd = exp->exp_obd; struct lmv_obd *lmv = &obd->u.lmv; @@ -2086,10 +2090,7 @@ static int lmv_setattr(struct obd_export *exp, struct md_op_data *op_data, if (IS_ERR(tgt)) return PTR_ERR(tgt); - rc = md_setattr(tgt->ltd_exp, op_data, ea, ealen, ea2, - ea2len, request, mod); - - return rc; + return md_setattr(tgt->ltd_exp, op_data, ea, ealen, request); } static int lmv_sync(struct obd_export *exp, const struct lu_fid *fid, @@ -2623,23 +2624,10 @@ try_next_stripe: goto retry_unlink; } -static int lmv_precleanup(struct obd_device *obd, enum obd_cleanup_stage stage) +static int lmv_precleanup(struct obd_device *obd) { - struct lmv_obd *lmv = &obd->u.lmv; - - switch (stage) { - case OBD_CLEANUP_EARLY: - /* XXX: here should be calling obd_precleanup() down to - * stack. - */ - break; - case OBD_CLEANUP_EXPORTS: - fld_client_debugfs_fini(&lmv->lmv_fld); - lprocfs_obd_cleanup(obd); - break; - default: - break; - } + fld_client_debugfs_fini(&obd->u.lmv.lmv_fld); + lprocfs_obd_cleanup(obd); return 0; } @@ -2654,14 +2642,12 @@ static int lmv_precleanup(struct obd_device *obd, enum obd_cleanup_stage stage) * \param[in] key identifier of key to get value for * \param[in] vallen size of \a val * \param[out] val pointer to storage location for value - * \param[in] lsm optional striping metadata of object * * \retval 0 on success * \retval negative negated errno on failure */ static int lmv_get_info(const struct lu_env *env, struct obd_export *exp, - __u32 keylen, void *key, __u32 *vallen, void *val, - struct lov_stripe_md *lsm) + __u32 keylen, void *key, __u32 *vallen, void *val) { struct obd_device *obd; struct lmv_obd *lmv; @@ -2693,7 +2679,7 @@ static int lmv_get_info(const struct lu_env *env, struct obd_export *exp, continue; if (!obd_get_info(env, tgt->ltd_exp, keylen, key, - vallen, val, NULL)) + vallen, val)) return 0; } return -EINVAL; @@ -2709,7 +2695,7 @@ static int lmv_get_info(const struct lu_env *env, struct obd_export *exp, * desc. */ rc = obd_get_info(env, lmv->tgts[0]->ltd_exp, keylen, key, - vallen, val, NULL); + vallen, val); if (!rc && KEY_IS(KEY_CONN_DATA)) exp->exp_connect_data = *(struct obd_connect_data *)val; return rc; @@ -2777,90 +2763,6 @@ static int lmv_set_info_async(const struct lu_env *env, struct obd_export *exp, return -EINVAL; } -static int lmv_pack_md_v1(const struct lmv_stripe_md *lsm, - struct lmv_mds_md_v1 *lmm1) -{ - int cplen; - int i; - - lmm1->lmv_magic = cpu_to_le32(lsm->lsm_md_magic); - lmm1->lmv_stripe_count = cpu_to_le32(lsm->lsm_md_stripe_count); - lmm1->lmv_master_mdt_index = cpu_to_le32(lsm->lsm_md_master_mdt_index); - lmm1->lmv_hash_type = cpu_to_le32(lsm->lsm_md_hash_type); - cplen = strlcpy(lmm1->lmv_pool_name, lsm->lsm_md_pool_name, - sizeof(lmm1->lmv_pool_name)); - if (cplen >= sizeof(lmm1->lmv_pool_name)) - return -E2BIG; - - for (i = 0; i < lsm->lsm_md_stripe_count; i++) - fid_cpu_to_le(&lmm1->lmv_stripe_fids[i], - &lsm->lsm_md_oinfo[i].lmo_fid); - return 0; -} - -static int -lmv_pack_md(union lmv_mds_md **lmmp, const struct lmv_stripe_md *lsm, - int stripe_count) -{ - int lmm_size = 0, rc = 0; - bool allocated = false; - - LASSERT(lmmp); - - /* Free lmm */ - if (*lmmp && !lsm) { - int stripe_cnt; - - stripe_cnt = lmv_mds_md_stripe_count_get(*lmmp); - lmm_size = lmv_mds_md_size(stripe_cnt, - le32_to_cpu((*lmmp)->lmv_magic)); - if (!lmm_size) - return -EINVAL; - kvfree(*lmmp); - *lmmp = NULL; - return 0; - } - - /* Alloc lmm */ - if (!*lmmp && !lsm) { - lmm_size = lmv_mds_md_size(stripe_count, LMV_MAGIC); - LASSERT(lmm_size > 0); - *lmmp = libcfs_kvzalloc(lmm_size, GFP_NOFS); - if (!*lmmp) - return -ENOMEM; - lmv_mds_md_stripe_count_set(*lmmp, stripe_count); - (*lmmp)->lmv_magic = cpu_to_le32(LMV_MAGIC); - return lmm_size; - } - - /* pack lmm */ - LASSERT(lsm); - lmm_size = lmv_mds_md_size(lsm->lsm_md_stripe_count, - lsm->lsm_md_magic); - if (!*lmmp) { - *lmmp = libcfs_kvzalloc(lmm_size, GFP_NOFS); - if (!*lmmp) - return -ENOMEM; - allocated = true; - } - - switch (lsm->lsm_md_magic) { - case LMV_MAGIC_V1: - rc = lmv_pack_md_v1(lsm, &(*lmmp)->lmv_md_v1); - break; - default: - rc = -EINVAL; - break; - } - - if (rc && allocated) { - kvfree(*lmmp); - *lmmp = NULL; - } - - return lmm_size; -} - static int lmv_unpack_md_v1(struct obd_export *exp, struct lmv_stripe_md *lsm, const struct lmv_mds_md_v1 *lmm1) { @@ -2903,8 +2805,8 @@ static int lmv_unpack_md_v1(struct obd_export *exp, struct lmv_stripe_md *lsm, return rc; } -int lmv_unpack_md(struct obd_export *exp, struct lmv_stripe_md **lsmp, - const union lmv_mds_md *lmm, int stripe_count) +static int lmv_unpackmd(struct obd_export *exp, struct lmv_stripe_md **lsmp, + const union lmv_mds_md *lmm, size_t lmm_size) { struct lmv_stripe_md *lsm; bool allocated = false; @@ -2933,17 +2835,6 @@ int lmv_unpack_md(struct obd_export *exp, struct lmv_stripe_md **lsmp, return 0; } - /* Alloc memmd */ - if (!lsm && !lmm) { - lsm_size = lmv_stripe_md_size(stripe_count); - lsm = libcfs_kvzalloc(lsm_size, GFP_NOFS); - if (!lsm) - return -ENOMEM; - lsm->lsm_md_stripe_count = stripe_count; - *lsmp = lsm; - return 0; - } - if (le32_to_cpu(lmm->lmv_magic) == LMV_MAGIC_STRIPE) return -EPERM; @@ -2991,38 +2882,17 @@ int lmv_unpack_md(struct obd_export *exp, struct lmv_stripe_md **lsmp, } return lsm_size; } -EXPORT_SYMBOL(lmv_unpack_md); -static int lmv_unpackmd(struct obd_export *exp, struct lov_stripe_md **lsmp, - struct lov_mds_md *lmm, int disk_len) +void lmv_free_memmd(struct lmv_stripe_md *lsm) { - return lmv_unpack_md(exp, (struct lmv_stripe_md **)lsmp, - (union lmv_mds_md *)lmm, disk_len); -} - -static int lmv_packmd(struct obd_export *exp, struct lov_mds_md **lmmp, - struct lov_stripe_md *lsm) -{ - const struct lmv_stripe_md *lmv = (struct lmv_stripe_md *)lsm; - struct obd_device *obd = exp->exp_obd; - struct lmv_obd *lmv_obd = &obd->u.lmv; - int stripe_count; - - if (!lmmp) { - if (lsm) - stripe_count = lmv->lsm_md_stripe_count; - else - stripe_count = lmv_obd->desc.ld_tgt_count; - - return lmv_mds_md_size(stripe_count, LMV_MAGIC_V1); - } - - return lmv_pack_md((union lmv_mds_md **)lmmp, lmv, 0); + lmv_unpackmd(NULL, &lsm, NULL, 0); } +EXPORT_SYMBOL(lmv_free_memmd); static int lmv_cancel_unused(struct obd_export *exp, const struct lu_fid *fid, - ldlm_policy_data_t *policy, enum ldlm_mode mode, - enum ldlm_cancel_flags flags, void *opaque) + union ldlm_policy_data *policy, + enum ldlm_mode mode, enum ldlm_cancel_flags flags, + void *opaque) { struct obd_device *obd = exp->exp_obd; struct lmv_obd *lmv = &obd->u.lmv; @@ -3064,7 +2934,7 @@ static int lmv_set_lock_data(struct obd_export *exp, static enum ldlm_mode lmv_lock_match(struct obd_export *exp, __u64 flags, const struct lu_fid *fid, enum ldlm_type type, - ldlm_policy_data_t *policy, + union ldlm_policy_data *policy, enum ldlm_mode mode, struct lustre_handle *lockh) { @@ -3271,32 +3141,6 @@ static int lmv_quotactl(struct obd_device *unused, struct obd_export *exp, return rc; } -static int lmv_quotacheck(struct obd_device *unused, struct obd_export *exp, - struct obd_quotactl *oqctl) -{ - struct obd_device *obd = class_exp2obd(exp); - struct lmv_obd *lmv = &obd->u.lmv; - struct lmv_tgt_desc *tgt; - int rc = 0; - u32 i; - - for (i = 0; i < lmv->desc.ld_tgt_count; i++) { - int err; - - tgt = lmv->tgts[i]; - if (!tgt || !tgt->ltd_exp || !tgt->ltd_active) { - CERROR("lmv idx %d inactive\n", i); - return -EIO; - } - - err = obd_quotacheck(tgt->ltd_exp, oqctl); - if (err && !rc) - rc = err; - } - - return rc; -} - static int lmv_merge_attr(struct obd_export *exp, const struct lmv_stripe_md *lsm, struct cl_attr *attr, @@ -3349,12 +3193,9 @@ static struct obd_ops lmv_obd_ops = { .statfs = lmv_statfs, .get_info = lmv_get_info, .set_info_async = lmv_set_info_async, - .packmd = lmv_packmd, - .unpackmd = lmv_unpackmd, .notify = lmv_notify, .get_uuid = lmv_get_uuid, .iocontrol = lmv_iocontrol, - .quotacheck = lmv_quotacheck, .quotactl = lmv_quotactl }; @@ -3363,7 +3204,6 @@ static struct md_ops lmv_md_ops = { .null_inode = lmv_null_inode, .close = lmv_close, .create = lmv_create, - .done_writing = lmv_done_writing, .enqueue = lmv_enqueue, .getattr = lmv_getattr, .getxattr = lmv_getxattr, @@ -3388,6 +3228,7 @@ static struct md_ops lmv_md_ops = { .intent_getattr_async = lmv_intent_getattr_async, .revalidate_lock = lmv_revalidate_lock, .get_fid_from_lsm = lmv_get_fid_from_lsm, + .unpackmd = lmv_unpackmd, }; static int __init lmv_init(void) diff --git a/drivers/staging/lustre/lustre/lov/lov_cl_internal.h b/drivers/staging/lustre/lustre/lov/lov_cl_internal.h index 4d2b7d303fea..c49a34bf10e5 100644 --- a/drivers/staging/lustre/lustre/lov/lov_cl_internal.h +++ b/drivers/staging/lustre/lustre/lov/lov_cl_internal.h @@ -217,7 +217,7 @@ struct lov_object { union lov_layout_state { struct lov_layout_raid0 { - unsigned lo_nr; + unsigned int lo_nr; /** * When this is true, lov_object::lo_attr contains * valid up to date attributes for a top-level @@ -412,7 +412,6 @@ struct lov_io_sub { int sub_refcheck; int sub_refcheck2; int sub_reenter; - void *sub_cookie; }; /** @@ -473,20 +472,6 @@ struct lov_session { struct lov_sublock_env ls_subenv; }; -/** - * State of transfer for lov. - */ -struct lov_req { - struct cl_req_slice lr_cl; -}; - -/** - * State of transfer for lovsub. - */ -struct lovsub_req { - struct cl_req_slice lsrq_cl; -}; - extern struct lu_device_type lov_device_type; extern struct lu_device_type lovsub_device_type; @@ -497,11 +482,9 @@ extern struct kmem_cache *lov_lock_kmem; extern struct kmem_cache *lov_object_kmem; extern struct kmem_cache *lov_thread_kmem; extern struct kmem_cache *lov_session_kmem; -extern struct kmem_cache *lov_req_kmem; extern struct kmem_cache *lovsub_lock_kmem; extern struct kmem_cache *lovsub_object_kmem; -extern struct kmem_cache *lovsub_req_kmem; extern struct kmem_cache *lov_lock_link_kmem; @@ -700,11 +683,6 @@ static inline struct lov_page *cl2lov_page(const struct cl_page_slice *slice) return container_of0(slice, struct lov_page, lps_cl); } -static inline struct lov_req *cl2lov_req(const struct cl_req_slice *slice) -{ - return container_of0(slice, struct lov_req, lr_cl); -} - static inline struct lovsub_page * cl2lovsub_page(const struct cl_page_slice *slice) { @@ -712,11 +690,6 @@ cl2lovsub_page(const struct cl_page_slice *slice) return container_of0(slice, struct lovsub_page, lsb_cl); } -static inline struct lovsub_req *cl2lovsub_req(const struct cl_req_slice *slice) -{ - return container_of0(slice, struct lovsub_req, lsrq_cl); -} - static inline struct lov_io *cl2lov_io(const struct lu_env *env, const struct cl_io_slice *ios) { diff --git a/drivers/staging/lustre/lustre/lov/lov_dev.c b/drivers/staging/lustre/lustre/lov/lov_dev.c index 056ae2ed88e8..7301f6e579a1 100644 --- a/drivers/staging/lustre/lustre/lov/lov_dev.c +++ b/drivers/staging/lustre/lustre/lov/lov_dev.c @@ -46,11 +46,9 @@ struct kmem_cache *lov_lock_kmem; struct kmem_cache *lov_object_kmem; struct kmem_cache *lov_thread_kmem; struct kmem_cache *lov_session_kmem; -struct kmem_cache *lov_req_kmem; struct kmem_cache *lovsub_lock_kmem; struct kmem_cache *lovsub_object_kmem; -struct kmem_cache *lovsub_req_kmem; struct kmem_cache *lov_lock_link_kmem; @@ -79,11 +77,6 @@ struct lu_kmem_descr lov_caches[] = { .ckd_size = sizeof(struct lov_session) }, { - .ckd_cache = &lov_req_kmem, - .ckd_name = "lov_req_kmem", - .ckd_size = sizeof(struct lov_req) - }, - { .ckd_cache = &lovsub_lock_kmem, .ckd_name = "lovsub_lock_kmem", .ckd_size = sizeof(struct lovsub_lock) @@ -94,11 +87,6 @@ struct lu_kmem_descr lov_caches[] = { .ckd_size = sizeof(struct lovsub_object) }, { - .ckd_cache = &lovsub_req_kmem, - .ckd_name = "lovsub_req_kmem", - .ckd_size = sizeof(struct lovsub_req) - }, - { .ckd_cache = &lov_lock_link_kmem, .ckd_name = "lov_lock_link_kmem", .ckd_size = sizeof(struct lov_lock_link) @@ -110,25 +98,6 @@ struct lu_kmem_descr lov_caches[] = { /***************************************************************************** * - * Lov transfer operations. - * - */ - -static void lov_req_completion(const struct lu_env *env, - const struct cl_req_slice *slice, int ioret) -{ - struct lov_req *lr; - - lr = cl2lov_req(slice); - kmem_cache_free(lov_req_kmem, lr); -} - -static const struct cl_req_operations lov_req_ops = { - .cro_completion = lov_req_completion -}; - -/***************************************************************************** - * * Lov device and device type functions. * */ @@ -248,26 +217,6 @@ static int lov_device_init(const struct lu_env *env, struct lu_device *d, return rc; } -static int lov_req_init(const struct lu_env *env, struct cl_device *dev, - struct cl_req *req) -{ - struct lov_req *lr; - int result; - - lr = kmem_cache_zalloc(lov_req_kmem, GFP_NOFS); - if (lr) { - cl_req_slice_add(req, &lr->lr_cl, dev, &lov_req_ops); - result = 0; - } else { - result = -ENOMEM; - } - return result; -} - -static const struct cl_device_operations lov_cl_ops = { - .cdo_req_init = lov_req_init -}; - static void lov_emerg_free(struct lov_device_emerg **emrg, int nr) { int i; @@ -478,7 +427,6 @@ static struct lu_device *lov_device_alloc(const struct lu_env *env, cl_device_init(&ld->ld_cl, t); d = lov2lu_dev(ld); d->ld_ops = &lov_lu_ops; - ld->ld_cl.cd_ops = &lov_cl_ops; mutex_init(&ld->ld_mutex); lockdep_set_class(&ld->ld_mutex, &cl_lov_device_mutex_class); diff --git a/drivers/staging/lustre/lustre/lov/lov_ea.c b/drivers/staging/lustre/lustre/lov/lov_ea.c index 214c561767e0..ac0bf64c08c1 100644 --- a/drivers/staging/lustre/lustre/lov/lov_ea.c +++ b/drivers/staging/lustre/lustre/lov/lov_ea.c @@ -76,18 +76,19 @@ static int lsm_lmm_verify_common(struct lov_mds_md *lmm, int lmm_bytes, return 0; } -struct lov_stripe_md *lsm_alloc_plain(__u16 stripe_count, int *size) +struct lov_stripe_md *lsm_alloc_plain(u16 stripe_count) { + size_t oinfo_ptrs_size, lsm_size; struct lov_stripe_md *lsm; struct lov_oinfo *loi; - int i, oinfo_ptrs_size; + int i; LASSERT(stripe_count <= LOV_MAX_STRIPE_COUNT); oinfo_ptrs_size = sizeof(struct lov_oinfo *) * stripe_count; - *size = sizeof(struct lov_stripe_md) + oinfo_ptrs_size; + lsm_size = sizeof(*lsm) + oinfo_ptrs_size; - lsm = libcfs_kvzalloc(*size, GFP_NOFS); + lsm = libcfs_kvzalloc(lsm_size, GFP_NOFS); if (!lsm) return NULL; @@ -117,9 +118,43 @@ void lsm_free_plain(struct lov_stripe_md *lsm) kvfree(lsm); } -static void lsm_unpackmd_common(struct lov_stripe_md *lsm, - struct lov_mds_md *lmm) +/* + * Find minimum stripe maxbytes value. For inactive or + * reconnecting targets use LUSTRE_EXT3_STRIPE_MAXBYTES. + */ +static loff_t lov_tgt_maxbytes(struct lov_tgt_desc *tgt) +{ + loff_t maxbytes = LUSTRE_EXT3_STRIPE_MAXBYTES; + struct obd_import *imp; + + if (!tgt->ltd_active) + return maxbytes; + + imp = tgt->ltd_obd->u.cli.cl_import; + if (!imp) + return maxbytes; + + spin_lock(&imp->imp_lock); + if (imp->imp_state == LUSTRE_IMP_FULL && + (imp->imp_connect_data.ocd_connect_flags & OBD_CONNECT_MAXBYTES) && + imp->imp_connect_data.ocd_maxbytes > 0) + maxbytes = imp->imp_connect_data.ocd_maxbytes; + + spin_unlock(&imp->imp_lock); + + return maxbytes; +} + +static int lsm_unpackmd_common(struct lov_obd *lov, + struct lov_stripe_md *lsm, + struct lov_mds_md *lmm, + struct lov_ost_data_v1 *objects) { + loff_t stripe_maxbytes = LLONG_MAX; + unsigned int stripe_count; + struct lov_oinfo *loi; + unsigned int i; + /* * This supposes lov_mds_md_v1/v3 first fields are * are the same @@ -129,11 +164,54 @@ static void lsm_unpackmd_common(struct lov_stripe_md *lsm, lsm->lsm_pattern = le32_to_cpu(lmm->lmm_pattern); lsm->lsm_layout_gen = le16_to_cpu(lmm->lmm_layout_gen); lsm->lsm_pool_name[0] = '\0'; + + stripe_count = lsm_is_released(lsm) ? 0 : lsm->lsm_stripe_count; + + for (i = 0; i < stripe_count; i++) { + loff_t tgt_bytes; + + loi = lsm->lsm_oinfo[i]; + ostid_le_to_cpu(&objects[i].l_ost_oi, &loi->loi_oi); + loi->loi_ost_idx = le32_to_cpu(objects[i].l_ost_idx); + loi->loi_ost_gen = le32_to_cpu(objects[i].l_ost_gen); + if (lov_oinfo_is_dummy(loi)) + continue; + + if (loi->loi_ost_idx >= lov->desc.ld_tgt_count && + !lov2obd(lov)->obd_process_conf) { + CERROR("%s: OST index %d more than OST count %d\n", + (char *)lov->desc.ld_uuid.uuid, + loi->loi_ost_idx, lov->desc.ld_tgt_count); + lov_dump_lmm_v1(D_WARNING, lmm); + return -EINVAL; + } + + if (!lov->lov_tgts[loi->loi_ost_idx]) { + CERROR("%s: OST index %d missing\n", + (char *)lov->desc.ld_uuid.uuid, + loi->loi_ost_idx); + lov_dump_lmm_v1(D_WARNING, lmm); + continue; + } + + tgt_bytes = lov_tgt_maxbytes(lov->lov_tgts[loi->loi_ost_idx]); + stripe_maxbytes = min_t(loff_t, stripe_maxbytes, tgt_bytes); + } + + if (stripe_maxbytes == LLONG_MAX) + stripe_maxbytes = LUSTRE_EXT3_STRIPE_MAXBYTES; + + if (!lsm->lsm_stripe_count) + lsm->lsm_maxbytes = stripe_maxbytes * lov->desc.ld_tgt_count; + else + lsm->lsm_maxbytes = stripe_maxbytes * lsm->lsm_stripe_count; + + return 0; } static void lsm_stripe_by_index_plain(struct lov_stripe_md *lsm, int *stripeno, - u64 *lov_off, u64 *swidth) + loff_t *lov_off, loff_t *swidth) { if (swidth) *swidth = (u64)lsm->lsm_stripe_size * lsm->lsm_stripe_count; @@ -141,36 +219,12 @@ lsm_stripe_by_index_plain(struct lov_stripe_md *lsm, int *stripeno, static void lsm_stripe_by_offset_plain(struct lov_stripe_md *lsm, int *stripeno, - u64 *lov_off, u64 *swidth) + loff_t *lov_off, loff_t *swidth) { if (swidth) *swidth = (u64)lsm->lsm_stripe_size * lsm->lsm_stripe_count; } -/* Find minimum stripe maxbytes value. For inactive or - * reconnecting targets use LUSTRE_EXT3_STRIPE_MAXBYTES. - */ -static void lov_tgt_maxbytes(struct lov_tgt_desc *tgt, __u64 *stripe_maxbytes) -{ - struct obd_import *imp = tgt->ltd_obd->u.cli.cl_import; - - if (!imp || !tgt->ltd_active) { - *stripe_maxbytes = LUSTRE_EXT3_STRIPE_MAXBYTES; - return; - } - - spin_lock(&imp->imp_lock); - if (imp->imp_state == LUSTRE_IMP_FULL && - (imp->imp_connect_data.ocd_connect_flags & OBD_CONNECT_MAXBYTES) && - imp->imp_connect_data.ocd_maxbytes > 0) { - if (*stripe_maxbytes > imp->imp_connect_data.ocd_maxbytes) - *stripe_maxbytes = imp->imp_connect_data.ocd_maxbytes; - } else { - *stripe_maxbytes = LUSTRE_EXT3_STRIPE_MAXBYTES; - } - spin_unlock(&imp->imp_lock); -} - static int lsm_lmm_verify_v1(struct lov_mds_md_v1 *lmm, int lmm_bytes, __u16 *stripe_count) { @@ -197,45 +251,7 @@ static int lsm_lmm_verify_v1(struct lov_mds_md_v1 *lmm, int lmm_bytes, static int lsm_unpackmd_v1(struct lov_obd *lov, struct lov_stripe_md *lsm, struct lov_mds_md_v1 *lmm) { - struct lov_oinfo *loi; - int i; - int stripe_count; - __u64 stripe_maxbytes = OBD_OBJECT_EOF; - - lsm_unpackmd_common(lsm, lmm); - - stripe_count = lsm_is_released(lsm) ? 0 : lsm->lsm_stripe_count; - - for (i = 0; i < stripe_count; i++) { - /* XXX LOV STACKING call down to osc_unpackmd() */ - loi = lsm->lsm_oinfo[i]; - ostid_le_to_cpu(&lmm->lmm_objects[i].l_ost_oi, &loi->loi_oi); - loi->loi_ost_idx = le32_to_cpu(lmm->lmm_objects[i].l_ost_idx); - loi->loi_ost_gen = le32_to_cpu(lmm->lmm_objects[i].l_ost_gen); - if (lov_oinfo_is_dummy(loi)) - continue; - - if (loi->loi_ost_idx >= lov->desc.ld_tgt_count) { - CERROR("OST index %d more than OST count %d\n", - loi->loi_ost_idx, lov->desc.ld_tgt_count); - lov_dump_lmm_v1(D_WARNING, lmm); - return -EINVAL; - } - if (!lov->lov_tgts[loi->loi_ost_idx]) { - CERROR("OST index %d missing\n", loi->loi_ost_idx); - lov_dump_lmm_v1(D_WARNING, lmm); - return -EINVAL; - } - /* calculate the minimum stripe max bytes */ - lov_tgt_maxbytes(lov->lov_tgts[loi->loi_ost_idx], - &stripe_maxbytes); - } - - lsm->lsm_maxbytes = stripe_maxbytes * lsm->lsm_stripe_count; - if (lsm->lsm_stripe_count == 0) - lsm->lsm_maxbytes = stripe_maxbytes * lov->desc.ld_tgt_count; - - return 0; + return lsm_unpackmd_common(lov, lsm, lmm, lmm->lmm_objects); } const struct lsm_operations lsm_v1_ops = { @@ -275,55 +291,21 @@ static int lsm_lmm_verify_v3(struct lov_mds_md *lmmv1, int lmm_bytes, } static int lsm_unpackmd_v3(struct lov_obd *lov, struct lov_stripe_md *lsm, - struct lov_mds_md *lmmv1) + struct lov_mds_md *lmm) { - struct lov_mds_md_v3 *lmm; - struct lov_oinfo *loi; - int i; - int stripe_count; - __u64 stripe_maxbytes = OBD_OBJECT_EOF; - int cplen = 0; + struct lov_mds_md_v3 *lmm_v3 = (struct lov_mds_md_v3 *)lmm; + size_t cplen = 0; + int rc; - lmm = (struct lov_mds_md_v3 *)lmmv1; + rc = lsm_unpackmd_common(lov, lsm, lmm, lmm_v3->lmm_objects); + if (rc) + return rc; - lsm_unpackmd_common(lsm, (struct lov_mds_md_v1 *)lmm); - - stripe_count = lsm_is_released(lsm) ? 0 : lsm->lsm_stripe_count; - - cplen = strlcpy(lsm->lsm_pool_name, lmm->lmm_pool_name, + cplen = strlcpy(lsm->lsm_pool_name, lmm_v3->lmm_pool_name, sizeof(lsm->lsm_pool_name)); if (cplen >= sizeof(lsm->lsm_pool_name)) return -E2BIG; - for (i = 0; i < stripe_count; i++) { - /* XXX LOV STACKING call down to osc_unpackmd() */ - loi = lsm->lsm_oinfo[i]; - ostid_le_to_cpu(&lmm->lmm_objects[i].l_ost_oi, &loi->loi_oi); - loi->loi_ost_idx = le32_to_cpu(lmm->lmm_objects[i].l_ost_idx); - loi->loi_ost_gen = le32_to_cpu(lmm->lmm_objects[i].l_ost_gen); - if (lov_oinfo_is_dummy(loi)) - continue; - - if (loi->loi_ost_idx >= lov->desc.ld_tgt_count) { - CERROR("OST index %d more than OST count %d\n", - loi->loi_ost_idx, lov->desc.ld_tgt_count); - lov_dump_lmm_v3(D_WARNING, lmm); - return -EINVAL; - } - if (!lov->lov_tgts[loi->loi_ost_idx]) { - CERROR("OST index %d missing\n", loi->loi_ost_idx); - lov_dump_lmm_v3(D_WARNING, lmm); - return -EINVAL; - } - /* calculate the minimum stripe max bytes */ - lov_tgt_maxbytes(lov->lov_tgts[loi->loi_ost_idx], - &stripe_maxbytes); - } - - lsm->lsm_maxbytes = stripe_maxbytes * lsm->lsm_stripe_count; - if (lsm->lsm_stripe_count == 0) - lsm->lsm_maxbytes = stripe_maxbytes * lov->desc.ld_tgt_count; - return 0; } diff --git a/drivers/staging/lustre/lustre/lov/lov_internal.h b/drivers/staging/lustre/lustre/lov/lov_internal.h index 07e5ede3e952..774499c74daa 100644 --- a/drivers/staging/lustre/lustre/lov/lov_internal.h +++ b/drivers/staging/lustre/lustre/lov/lov_internal.h @@ -36,6 +36,77 @@ #include "../include/obd_class.h" #include "../include/lustre/lustre_user.h" +/* + * If we are unable to get the maximum object size from the OST in + * ocd_maxbytes using OBD_CONNECT_MAXBYTES, then we fall back to using + * the old maximum object size from ext3. + */ +#define LUSTRE_EXT3_STRIPE_MAXBYTES 0x1fffffff000ULL + +struct lov_stripe_md { + atomic_t lsm_refc; + spinlock_t lsm_lock; + pid_t lsm_lock_owner; /* debugging */ + + /* + * maximum possible file size, might change as OSTs status changes, + * e.g. disconnected, deactivated + */ + loff_t lsm_maxbytes; + struct ost_id lsm_oi; + u32 lsm_magic; + u32 lsm_stripe_size; + u32 lsm_pattern; /* RAID0, RAID1, released, ... */ + u16 lsm_stripe_count; + u16 lsm_layout_gen; + char lsm_pool_name[LOV_MAXPOOLNAME + 1]; + struct lov_oinfo *lsm_oinfo[0]; +}; + +static inline bool lsm_is_released(struct lov_stripe_md *lsm) +{ + return !!(lsm->lsm_pattern & LOV_PATTERN_F_RELEASED); +} + +static inline bool lsm_has_objects(struct lov_stripe_md *lsm) +{ + if (!lsm) + return false; + + if (lsm_is_released(lsm)) + return false; + + return true; +} + +struct lsm_operations { + void (*lsm_free)(struct lov_stripe_md *); + void (*lsm_stripe_by_index)(struct lov_stripe_md *, int *, loff_t *, + loff_t *); + void (*lsm_stripe_by_offset)(struct lov_stripe_md *, int *, loff_t *, + loff_t *); + int (*lsm_lmm_verify)(struct lov_mds_md *lmm, int lmm_bytes, + u16 *stripe_count); + int (*lsm_unpackmd)(struct lov_obd *lov, struct lov_stripe_md *lsm, + struct lov_mds_md *lmm); +}; + +extern const struct lsm_operations lsm_v1_ops; +extern const struct lsm_operations lsm_v3_ops; + +static inline const struct lsm_operations *lsm_op_find(int magic) +{ + switch (magic) { + case LOV_MAGIC_V1: + return &lsm_v1_ops; + case LOV_MAGIC_V3: + return &lsm_v3_ops; + default: + CERROR("unrecognized lsm_magic %08x\n", magic); + return NULL; + } +} + /* lov_do_div64(a, b) returns a % b, and a = a / b. * The 32-bit code is LOV-specific due to knowing about stripe limits in * order to reduce the divisor to a 32-bit number. If the divisor is @@ -110,8 +181,6 @@ struct lov_request_set { atomic_t set_completes; atomic_t set_success; atomic_t set_finish_checked; - struct llog_cookie *set_cookies; - int set_cookie_sent; struct list_head set_list; wait_queue_head_t set_waitq; }; @@ -132,8 +201,6 @@ static inline void lov_put_reqset(struct lov_request_set *set) (char *)((lv)->lov_tgts[index]->ltd_uuid.uuid) /* lov_merge.c */ -void lov_merge_attrs(struct obdo *tgt, struct obdo *src, u64 valid, - struct lov_stripe_md *lsm, int stripeno, int *set); int lov_merge_lvb_kms(struct lov_stripe_md *lsm, struct ost_lvb *lvb, __u64 *kms_place); @@ -150,17 +217,9 @@ pgoff_t lov_stripe_pgoff(struct lov_stripe_md *lsm, pgoff_t stripe_index, int stripe); /* lov_request.c */ -int lov_update_common_set(struct lov_request_set *set, - struct lov_request *req, int rc); int lov_prep_getattr_set(struct obd_export *exp, struct obd_info *oinfo, struct lov_request_set **reqset); int lov_fini_getattr_set(struct lov_request_set *set); -int lov_prep_setattr_set(struct obd_export *exp, struct obd_info *oinfo, - struct obd_trans_info *oti, - struct lov_request_set **reqset); -int lov_update_setattr_set(struct lov_request_set *set, - struct lov_request *req, int rc); -int lov_fini_setattr_set(struct lov_request_set *set); int lov_prep_statfs_set(struct obd_device *obd, struct obd_info *oinfo, struct lov_request_set **reqset); int lov_fini_statfs(struct obd_device *obd, struct obd_statfs *osfs, @@ -186,12 +245,10 @@ int lov_del_target(struct obd_device *obd, __u32 index, struct obd_uuid *uuidp, int gen); /* lov_pack.c */ -int lov_packmd(struct obd_export *exp, struct lov_mds_md **lmm, - struct lov_stripe_md *lsm); -int lov_unpackmd(struct obd_export *exp, struct lov_stripe_md **lsmp, - struct lov_mds_md *lmm, int lmm_bytes); -int lov_alloc_memmd(struct lov_stripe_md **lsmp, __u16 stripe_count, - int pattern, int magic); +ssize_t lov_lsm_pack(const struct lov_stripe_md *lsm, void *buf, + size_t buf_size); +struct lov_stripe_md *lov_unpackmd(struct lov_obd *lov, struct lov_mds_md *lmm, + size_t lmm_size); int lov_free_memmd(struct lov_stripe_md **lsmp); void lov_dump_lmm_v1(int level, struct lov_mds_md_v1 *lmm); @@ -199,7 +256,7 @@ void lov_dump_lmm_v3(int level, struct lov_mds_md_v3 *lmm); void lov_dump_lmm_common(int level, void *lmmp); /* lov_ea.c */ -struct lov_stripe_md *lsm_alloc_plain(__u16 stripe_count, int *size); +struct lov_stripe_md *lsm_alloc_plain(u16 stripe_count); void lsm_free_plain(struct lov_stripe_md *lsm); void dump_lsm(unsigned int level, const struct lov_stripe_md *lsm); @@ -244,4 +301,9 @@ static inline bool lov_oinfo_is_dummy(const struct lov_oinfo *loi) return false; } +static inline struct obd_device *lov2obd(const struct lov_obd *lov) +{ + return container_of0(lov, struct obd_device, u.lov); +} + #endif diff --git a/drivers/staging/lustre/lustre/lov/lov_io.c b/drivers/staging/lustre/lustre/lov/lov_io.c index d10157985ed9..002326c282a7 100644 --- a/drivers/staging/lustre/lustre/lov/lov_io.c +++ b/drivers/staging/lustre/lustre/lov/lov_io.c @@ -86,6 +86,8 @@ static void lov_io_sub_inherit(struct cl_io *io, struct lov_io *lio, switch (io->ci_type) { case CIT_SETATTR: { io->u.ci_setattr.sa_attr = parent->u.ci_setattr.sa_attr; + io->u.ci_setattr.sa_attr_flags = + parent->u.ci_setattr.sa_attr_flags; io->u.ci_setattr.sa_valid = parent->u.ci_setattr.sa_valid; io->u.ci_setattr.sa_stripe_index = stripe; io->u.ci_setattr.sa_parent_fid = @@ -98,6 +100,12 @@ static void lov_io_sub_inherit(struct cl_io *io, struct lov_io *lio, } break; } + case CIT_DATA_VERSION: { + io->u.ci_data_version.dv_data_version = 0; + io->u.ci_data_version.dv_flags = + parent->u.ci_data_version.dv_flags; + break; + } case CIT_FAULT: { struct cl_object *obj = parent->ci_obj; loff_t off = cl_offset(obj, parent->u.ci_fault.ft_index); @@ -159,12 +167,7 @@ static int lov_io_sub_init(const struct lu_env *env, struct lov_io *lio, sub->sub_env = ld->ld_emrg[stripe]->emrg_env; sub->sub_borrowed = 1; } else { - void *cookie; - - /* obtain new environment */ - cookie = cl_env_reenter(); sub->sub_env = cl_env_get(&sub->sub_refcheck); - cl_env_reexit(cookie); if (IS_ERR(sub->sub_env)) result = PTR_ERR(sub->sub_env); @@ -337,6 +340,11 @@ static int lov_io_slice_init(struct lov_io *lio, struct lov_object *obj, lio->lis_endpos = OBD_OBJECT_EOF; break; + case CIT_DATA_VERSION: + lio->lis_pos = 0; + lio->lis_endpos = OBD_OBJECT_EOF; + break; + case CIT_FAULT: { pgoff_t index = io->u.ci_fault.ft_index; @@ -514,6 +522,24 @@ static int lov_io_end_wrapper(const struct lu_env *env, struct cl_io *io) return 0; } +static void +lov_io_data_version_end(const struct lu_env *env, const struct cl_io_slice *ios) +{ + struct lov_io *lio = cl2lov_io(env, ios); + struct cl_io *parent = lio->lis_cl.cis_io; + struct lov_io_sub *sub; + + list_for_each_entry(sub, &lio->lis_active, sub_linkage) { + lov_io_end_wrapper(env, sub->sub_io); + + parent->u.ci_data_version.dv_data_version += + sub->sub_io->u.ci_data_version.dv_data_version; + + if (!parent->ci_result) + parent->ci_result = sub->sub_io->ci_result; + } +} + static int lov_io_iter_fini_wrapper(const struct lu_env *env, struct cl_io *io) { cl_io_iter_fini(env, io); @@ -555,6 +581,65 @@ static void lov_io_unlock(const struct lu_env *env, LASSERT(rc == 0); } +static int lov_io_read_ahead(const struct lu_env *env, + const struct cl_io_slice *ios, + pgoff_t start, struct cl_read_ahead *ra) +{ + struct lov_io *lio = cl2lov_io(env, ios); + struct lov_object *loo = lio->lis_object; + struct cl_object *obj = lov2cl(loo); + struct lov_layout_raid0 *r0 = lov_r0(loo); + unsigned int pps; /* pages per stripe */ + struct lov_io_sub *sub; + pgoff_t ra_end; + loff_t suboff; + int stripe; + int rc; + + stripe = lov_stripe_number(loo->lo_lsm, cl_offset(obj, start)); + if (unlikely(!r0->lo_sub[stripe])) + return -EIO; + + sub = lov_sub_get(env, lio, stripe); + if (IS_ERR(sub)) + return PTR_ERR(sub); + + lov_stripe_offset(loo->lo_lsm, cl_offset(obj, start), stripe, &suboff); + rc = cl_io_read_ahead(sub->sub_env, sub->sub_io, + cl_index(lovsub2cl(r0->lo_sub[stripe]), suboff), + ra); + lov_sub_put(sub); + + CDEBUG(D_READA, DFID " cra_end = %lu, stripes = %d, rc = %d\n", + PFID(lu_object_fid(lov2lu(loo))), ra->cra_end, r0->lo_nr, rc); + if (rc) + return rc; + + /** + * Adjust the stripe index by layout of raid0. ra->cra_end is + * the maximum page index covered by an underlying DLM lock. + * This function converts cra_end from stripe level to file + * level, and make sure it's not beyond stripe boundary. + */ + if (r0->lo_nr == 1) /* single stripe file */ + return 0; + + /* cra_end is stripe level, convert it into file level */ + ra_end = ra->cra_end; + if (ra_end != CL_PAGE_EOF) + ra_end = lov_stripe_pgoff(loo->lo_lsm, ra_end, stripe); + + pps = loo->lo_lsm->lsm_stripe_size >> PAGE_SHIFT; + + CDEBUG(D_READA, DFID " max_index = %lu, pps = %u, stripe_size = %u, stripe no = %u, start index = %lu\n", + PFID(lu_object_fid(lov2lu(loo))), ra_end, pps, + loo->lo_lsm->lsm_stripe_size, stripe, start); + + /* never exceed the end of the stripe */ + ra->cra_end = min_t(pgoff_t, ra_end, start + pps - start % pps - 1); + return 0; +} + /** * lov implementation of cl_operations::cio_submit() method. It takes a list * of pages in \a queue, splits it into per-stripe sub-lists, invokes @@ -779,6 +864,15 @@ static const struct cl_io_operations lov_io_ops = { .cio_start = lov_io_start, .cio_end = lov_io_end }, + [CIT_DATA_VERSION] = { + .cio_fini = lov_io_fini, + .cio_iter_init = lov_io_iter_init, + .cio_iter_fini = lov_io_iter_fini, + .cio_lock = lov_io_lock, + .cio_unlock = lov_io_unlock, + .cio_start = lov_io_start, + .cio_end = lov_io_data_version_end, + }, [CIT_FAULT] = { .cio_fini = lov_io_fini, .cio_iter_init = lov_io_iter_init, @@ -801,6 +895,7 @@ static const struct cl_io_operations lov_io_ops = { .cio_fini = lov_io_fini } }, + .cio_read_ahead = lov_io_read_ahead, .cio_submit = lov_io_submit, .cio_commit_async = lov_io_commit_async, }; @@ -820,6 +915,13 @@ static void lov_empty_io_fini(const struct lu_env *env, wake_up_all(&lov->lo_waitq); } +static int lov_empty_io_submit(const struct lu_env *env, + const struct cl_io_slice *ios, + enum cl_req_type crt, struct cl_2queue *queue) +{ + return -EBADF; +} + static void lov_empty_impossible(const struct lu_env *env, struct cl_io_slice *ios) { @@ -870,7 +972,7 @@ static const struct cl_io_operations lov_empty_io_ops = { .cio_fini = lov_empty_io_fini } }, - .cio_submit = LOV_EMPTY_IMPOSSIBLE, + .cio_submit = lov_empty_io_submit, .cio_commit_async = LOV_EMPTY_IMPOSSIBLE }; @@ -909,6 +1011,7 @@ int lov_io_init_empty(const struct lu_env *env, struct cl_object *obj, break; case CIT_FSYNC: case CIT_SETATTR: + case CIT_DATA_VERSION: result = 1; break; case CIT_WRITE: @@ -944,6 +1047,7 @@ int lov_io_init_released(const struct lu_env *env, struct cl_object *obj, LASSERTF(0, "invalid type %d\n", io->ci_type); case CIT_MISC: case CIT_FSYNC: + case CIT_DATA_VERSION: result = 1; break; case CIT_SETATTR: diff --git a/drivers/staging/lustre/lustre/lov/lov_merge.c b/drivers/staging/lustre/lustre/lov/lov_merge.c index 674af106b50b..391dfd207177 100644 --- a/drivers/staging/lustre/lustre/lov/lov_merge.c +++ b/drivers/staging/lustre/lustre/lov/lov_merge.c @@ -104,53 +104,3 @@ int lov_merge_lvb_kms(struct lov_stripe_md *lsm, lvb->lvb_ctime = current_ctime; return rc; } - -void lov_merge_attrs(struct obdo *tgt, struct obdo *src, u64 valid, - struct lov_stripe_md *lsm, int stripeno, int *set) -{ - valid &= src->o_valid; - - if (*set) { - tgt->o_valid &= valid; - if (valid & OBD_MD_FLSIZE) { - /* this handles sparse files properly */ - u64 lov_size; - - lov_size = lov_stripe_size(lsm, src->o_size, stripeno); - if (lov_size > tgt->o_size) - tgt->o_size = lov_size; - } - if (valid & OBD_MD_FLBLOCKS) - tgt->o_blocks += src->o_blocks; - if (valid & OBD_MD_FLBLKSZ) - tgt->o_blksize += src->o_blksize; - if (valid & OBD_MD_FLCTIME && tgt->o_ctime < src->o_ctime) - tgt->o_ctime = src->o_ctime; - if (valid & OBD_MD_FLMTIME && tgt->o_mtime < src->o_mtime) - tgt->o_mtime = src->o_mtime; - if (valid & OBD_MD_FLDATAVERSION) - tgt->o_data_version += src->o_data_version; - - /* handle flags */ - if (valid & OBD_MD_FLFLAGS) - tgt->o_flags &= src->o_flags; - else - tgt->o_flags = 0; - } else { - memcpy(tgt, src, sizeof(*tgt)); - tgt->o_oi = lsm->lsm_oi; - tgt->o_valid = valid; - if (valid & OBD_MD_FLSIZE) - tgt->o_size = lov_stripe_size(lsm, src->o_size, - stripeno); - tgt->o_flags = 0; - if (valid & OBD_MD_FLFLAGS) - tgt->o_flags = src->o_flags; - } - - /* data_version needs to be valid on all stripes to be correct! */ - if (!(valid & OBD_MD_FLDATAVERSION)) - tgt->o_valid &= ~OBD_MD_FLDATAVERSION; - - *set += 1; -} diff --git a/drivers/staging/lustre/lustre/lov/lov_obd.c b/drivers/staging/lustre/lustre/lov/lov_obd.c index b23016f7ec26..63b064523c6a 100644 --- a/drivers/staging/lustre/lustre/lov/lov_obd.c +++ b/drivers/staging/lustre/lustre/lov/lov_obd.c @@ -40,19 +40,20 @@ #define DEBUG_SUBSYSTEM S_LOV #include "../../include/linux/libcfs/libcfs.h" -#include "../include/obd_support.h" -#include "../include/lustre/lustre_ioctl.h" -#include "../include/lustre_lib.h" -#include "../include/lustre_net.h" #include "../include/lustre/lustre_idl.h" +#include "../include/lustre/lustre_ioctl.h" + +#include "../include/cl_object.h" #include "../include/lustre_dlm.h" +#include "../include/lustre_fid.h" +#include "../include/lustre_lib.h" #include "../include/lustre_mds.h" -#include "../include/obd_class.h" -#include "../include/lprocfs_status.h" +#include "../include/lustre_net.h" #include "../include/lustre_param.h" -#include "../include/cl_object.h" -#include "../include/lustre/ll_fiemap.h" -#include "../include/lustre_fid.h" +#include "../include/lustre_swab.h" +#include "../include/lprocfs_status.h" +#include "../include/obd_class.h" +#include "../include/obd_support.h" #include "lov_internal.h" @@ -826,29 +827,6 @@ out: return rc; } -static int lov_precleanup(struct obd_device *obd, enum obd_cleanup_stage stage) -{ - struct lov_obd *lov = &obd->u.lov; - - switch (stage) { - case OBD_CLEANUP_EARLY: { - int i; - - for (i = 0; i < lov->desc.ld_tgt_count; i++) { - if (!lov->lov_tgts[i] || !lov->lov_tgts[i]->ltd_active) - continue; - obd_precleanup(class_exp2obd(lov->lov_tgts[i]->ltd_exp), - OBD_CLEANUP_EARLY); - } - break; - } - default: - break; - } - - return 0; -} - static int lov_cleanup(struct obd_device *obd) { struct lov_obd *lov = &obd->u.lov; @@ -972,163 +950,6 @@ out: return rc; } -#define ASSERT_LSM_MAGIC(lsmp) \ -do { \ - LASSERT((lsmp)); \ - LASSERTF(((lsmp)->lsm_magic == LOV_MAGIC_V1 || \ - (lsmp)->lsm_magic == LOV_MAGIC_V3), \ - "%p->lsm_magic=%x\n", (lsmp), (lsmp)->lsm_magic); \ -} while (0) - -static int lov_getattr_interpret(struct ptlrpc_request_set *rqset, - void *data, int rc) -{ - struct lov_request_set *lovset = (struct lov_request_set *)data; - int err; - - /* don't do attribute merge if this async op failed */ - if (rc) - atomic_set(&lovset->set_completes, 0); - err = lov_fini_getattr_set(lovset); - return rc ? rc : err; -} - -static int lov_getattr_async(struct obd_export *exp, struct obd_info *oinfo, - struct ptlrpc_request_set *rqset) -{ - struct lov_request_set *lovset; - struct lov_obd *lov; - struct lov_request *req; - int rc = 0, err; - - LASSERT(oinfo); - ASSERT_LSM_MAGIC(oinfo->oi_md); - - if (!exp || !exp->exp_obd) - return -ENODEV; - - lov = &exp->exp_obd->u.lov; - - rc = lov_prep_getattr_set(exp, oinfo, &lovset); - if (rc) - return rc; - - CDEBUG(D_INFO, "objid "DOSTID": %ux%u byte stripes\n", - POSTID(&oinfo->oi_md->lsm_oi), oinfo->oi_md->lsm_stripe_count, - oinfo->oi_md->lsm_stripe_size); - - list_for_each_entry(req, &lovset->set_list, rq_link) { - CDEBUG(D_INFO, "objid " DOSTID "[%d] has subobj " DOSTID " at idx%u\n", - POSTID(&oinfo->oi_oa->o_oi), req->rq_stripe, - POSTID(&req->rq_oi.oi_oa->o_oi), req->rq_idx); - rc = obd_getattr_async(lov->lov_tgts[req->rq_idx]->ltd_exp, - &req->rq_oi, rqset); - if (rc) { - CERROR("%s: getattr objid "DOSTID" subobj" - DOSTID" on OST idx %d: rc = %d\n", - exp->exp_obd->obd_name, - POSTID(&oinfo->oi_oa->o_oi), - POSTID(&req->rq_oi.oi_oa->o_oi), - req->rq_idx, rc); - goto out; - } - } - - if (!list_empty(&rqset->set_requests)) { - LASSERT(rc == 0); - LASSERT(!rqset->set_interpret); - rqset->set_interpret = lov_getattr_interpret; - rqset->set_arg = (void *)lovset; - return rc; - } -out: - if (rc) - atomic_set(&lovset->set_completes, 0); - err = lov_fini_getattr_set(lovset); - return rc ? rc : err; -} - -static int lov_setattr_interpret(struct ptlrpc_request_set *rqset, - void *data, int rc) -{ - struct lov_request_set *lovset = (struct lov_request_set *)data; - int err; - - if (rc) - atomic_set(&lovset->set_completes, 0); - err = lov_fini_setattr_set(lovset); - return rc ? rc : err; -} - -/* If @oti is given, the request goes from MDS and responses from OSTs are not - * needed. Otherwise, a client is waiting for responses. - */ -static int lov_setattr_async(struct obd_export *exp, struct obd_info *oinfo, - struct obd_trans_info *oti, - struct ptlrpc_request_set *rqset) -{ - struct lov_request_set *set; - struct lov_request *req; - struct lov_obd *lov; - int rc = 0; - - LASSERT(oinfo); - ASSERT_LSM_MAGIC(oinfo->oi_md); - if (oinfo->oi_oa->o_valid & OBD_MD_FLCOOKIE) { - LASSERT(oti); - LASSERT(oti->oti_logcookies); - } - - if (!exp || !exp->exp_obd) - return -ENODEV; - - lov = &exp->exp_obd->u.lov; - rc = lov_prep_setattr_set(exp, oinfo, oti, &set); - if (rc) - return rc; - - CDEBUG(D_INFO, "objid "DOSTID": %ux%u byte stripes\n", - POSTID(&oinfo->oi_md->lsm_oi), - oinfo->oi_md->lsm_stripe_count, - oinfo->oi_md->lsm_stripe_size); - - list_for_each_entry(req, &set->set_list, rq_link) { - if (oinfo->oi_oa->o_valid & OBD_MD_FLCOOKIE) - oti->oti_logcookies = set->set_cookies + req->rq_stripe; - - CDEBUG(D_INFO, "objid " DOSTID "[%d] has subobj " DOSTID " at idx%u\n", - POSTID(&oinfo->oi_oa->o_oi), req->rq_stripe, - POSTID(&req->rq_oi.oi_oa->o_oi), req->rq_idx); - - rc = obd_setattr_async(lov->lov_tgts[req->rq_idx]->ltd_exp, - &req->rq_oi, oti, rqset); - if (rc) { - CERROR("error: setattr objid "DOSTID" subobj" - DOSTID" on OST idx %d: rc = %d\n", - POSTID(&set->set_oi->oi_oa->o_oi), - POSTID(&req->rq_oi.oi_oa->o_oi), - req->rq_idx, rc); - break; - } - } - - /* If we are not waiting for responses on async requests, return. */ - if (rc || !rqset || list_empty(&rqset->set_requests)) { - int err; - - if (rc) - atomic_set(&set->set_completes, 0); - err = lov_fini_setattr_set(set); - return rc ? rc : err; - } - - LASSERT(!rqset->set_interpret); - rqset->set_interpret = lov_setattr_interpret; - rqset->set_arg = (void *)set; - - return 0; -} - int lov_statfs_interpret(struct ptlrpc_request_set *rqset, void *data, int rc) { struct lov_request_set *lovset = (struct lov_request_set *)data; @@ -1183,7 +1004,10 @@ static int lov_statfs(const struct lu_env *env, struct obd_export *exp, struct obd_statfs *osfs, __u64 max_age, __u32 flags) { struct ptlrpc_request_set *set = NULL; - struct obd_info oinfo = { }; + struct obd_info oinfo = { + .oi_osfs = osfs, + .oi_flags = flags, + }; int rc = 0; /* for obdclass we forbid using obd_statfs_rqset, but prefer using async @@ -1193,8 +1017,6 @@ static int lov_statfs(const struct lu_env *env, struct obd_export *exp, if (!set) return -ENOMEM; - oinfo.oi_osfs = osfs; - oinfo.oi_flags = flags; rc = lov_statfs_async(exp, &oinfo, max_age, set); if (rc == 0) rc = ptlrpc_set_wait(set); @@ -1235,8 +1057,8 @@ static int lov_iocontrol(unsigned int cmd, struct obd_export *exp, int len, /* copy UUID */ if (copy_to_user(data->ioc_pbuf2, obd2cli_tgt(osc_obd), - min((int)data->ioc_plen2, - (int)sizeof(struct obd_uuid)))) + min_t(unsigned long, data->ioc_plen2, + sizeof(struct obd_uuid)))) return -EFAULT; memcpy(&flags, data->ioc_inlbuf1, sizeof(__u32)); @@ -1249,8 +1071,8 @@ static int lov_iocontrol(unsigned int cmd, struct obd_export *exp, int len, if (rc) return rc; if (copy_to_user(data->ioc_pbuf1, &stat_buf, - min((int)data->ioc_plen1, - (int)sizeof(stat_buf)))) + min_t(unsigned long, data->ioc_plen1, + sizeof(stat_buf)))) return -EFAULT; break; } @@ -1367,8 +1189,6 @@ static int lov_iocontrol(unsigned int cmd, struct obd_export *exp, int len, osc_obd->obd_force = obddev->obd_force; err = obd_iocontrol(cmd, lov->lov_tgts[i]->ltd_exp, len, karg, uarg); - if (err == -ENODATA && cmd == OBD_IOC_POLL_QUOTACHECK) - return err; if (err) { if (lov->lov_tgts[i]->ltd_active) { CDEBUG(err == -ENOTTY ? @@ -1391,454 +1211,35 @@ static int lov_iocontrol(unsigned int cmd, struct obd_export *exp, int len, return rc; } -#define FIEMAP_BUFFER_SIZE 4096 - -/** - * Non-zero fe_logical indicates that this is a continuation FIEMAP - * call. The local end offset and the device are sent in the first - * fm_extent. This function calculates the stripe number from the index. - * This function returns a stripe_no on which mapping is to be restarted. - * - * This function returns fm_end_offset which is the in-OST offset at which - * mapping should be restarted. If fm_end_offset=0 is returned then caller - * will re-calculate proper offset in next stripe. - * Note that the first extent is passed to lov_get_info via the value field. - * - * \param fiemap fiemap request header - * \param lsm striping information for the file - * \param fm_start logical start of mapping - * \param fm_end logical end of mapping - * \param start_stripe starting stripe will be returned in this - */ -static u64 fiemap_calc_fm_end_offset(struct ll_user_fiemap *fiemap, - struct lov_stripe_md *lsm, u64 fm_start, - u64 fm_end, int *start_stripe) -{ - u64 local_end = fiemap->fm_extents[0].fe_logical; - u64 lun_start, lun_end; - u64 fm_end_offset; - int stripe_no = -1, i; - - if (fiemap->fm_extent_count == 0 || - fiemap->fm_extents[0].fe_logical == 0) - return 0; - - /* Find out stripe_no from ost_index saved in the fe_device */ - for (i = 0; i < lsm->lsm_stripe_count; i++) { - struct lov_oinfo *oinfo = lsm->lsm_oinfo[i]; - - if (lov_oinfo_is_dummy(oinfo)) - continue; - - if (oinfo->loi_ost_idx == fiemap->fm_extents[0].fe_device) { - stripe_no = i; - break; - } - } - if (stripe_no == -1) - return -EINVAL; - - /* If we have finished mapping on previous device, shift logical - * offset to start of next device - */ - if ((lov_stripe_intersects(lsm, stripe_no, fm_start, fm_end, - &lun_start, &lun_end)) != 0 && - local_end < lun_end) { - fm_end_offset = local_end; - *start_stripe = stripe_no; - } else { - /* This is a special value to indicate that caller should - * calculate offset in next stripe. - */ - fm_end_offset = 0; - *start_stripe = (stripe_no + 1) % lsm->lsm_stripe_count; - } - - return fm_end_offset; -} - -/** - * We calculate on which OST the mapping will end. If the length of mapping - * is greater than (stripe_size * stripe_count) then the last_stripe will - * will be one just before start_stripe. Else we check if the mapping - * intersects each OST and find last_stripe. - * This function returns the last_stripe and also sets the stripe_count - * over which the mapping is spread - * - * \param lsm striping information for the file - * \param fm_start logical start of mapping - * \param fm_end logical end of mapping - * \param start_stripe starting stripe of the mapping - * \param stripe_count the number of stripes across which to map is returned - * - * \retval last_stripe return the last stripe of the mapping - */ -static int fiemap_calc_last_stripe(struct lov_stripe_md *lsm, u64 fm_start, - u64 fm_end, int start_stripe, - int *stripe_count) -{ - int last_stripe; - u64 obd_start, obd_end; - int i, j; - - if (fm_end - fm_start > lsm->lsm_stripe_size * lsm->lsm_stripe_count) { - last_stripe = start_stripe < 1 ? lsm->lsm_stripe_count - 1 : - start_stripe - 1; - *stripe_count = lsm->lsm_stripe_count; - } else { - for (j = 0, i = start_stripe; j < lsm->lsm_stripe_count; - i = (i + 1) % lsm->lsm_stripe_count, j++) { - if ((lov_stripe_intersects(lsm, i, fm_start, fm_end, - &obd_start, &obd_end)) == 0) - break; - } - *stripe_count = j; - last_stripe = (start_stripe + j - 1) % lsm->lsm_stripe_count; - } - - return last_stripe; -} - -/** - * Set fe_device and copy extents from local buffer into main return buffer. - * - * \param fiemap fiemap request header - * \param lcl_fm_ext array of local fiemap extents to be copied - * \param ost_index OST index to be written into the fm_device field for each - extent - * \param ext_count number of extents to be copied - * \param current_extent where to start copying in main extent array - */ -static void fiemap_prepare_and_copy_exts(struct ll_user_fiemap *fiemap, - struct ll_fiemap_extent *lcl_fm_ext, - int ost_index, unsigned int ext_count, - int current_extent) -{ - char *to; - int ext; - - for (ext = 0; ext < ext_count; ext++) { - lcl_fm_ext[ext].fe_device = ost_index; - lcl_fm_ext[ext].fe_flags |= FIEMAP_EXTENT_NET; - } - - /* Copy fm_extent's from fm_local to return buffer */ - to = (char *)fiemap + fiemap_count_to_size(current_extent); - memcpy(to, lcl_fm_ext, ext_count * sizeof(struct ll_fiemap_extent)); -} - -/** - * Break down the FIEMAP request and send appropriate calls to individual OSTs. - * This also handles the restarting of FIEMAP calls in case mapping overflows - * the available number of extents in single call. - */ -static int lov_fiemap(struct lov_obd *lov, __u32 keylen, void *key, - __u32 *vallen, void *val, struct lov_stripe_md *lsm) -{ - struct ll_fiemap_info_key *fm_key = key; - struct ll_user_fiemap *fiemap = val; - struct ll_user_fiemap *fm_local = NULL; - struct ll_fiemap_extent *lcl_fm_ext; - int count_local; - unsigned int get_num_extents = 0; - int ost_index = 0, actual_start_stripe, start_stripe; - u64 fm_start, fm_end, fm_length, fm_end_offset; - u64 curr_loc; - int current_extent = 0, rc = 0, i; - /* Whether have we collected enough extents */ - bool enough = false; - int ost_eof = 0; /* EOF for object */ - int ost_done = 0; /* done with required mapping for this OST? */ - int last_stripe; - int cur_stripe = 0, cur_stripe_wrap = 0, stripe_count; - unsigned int buffer_size = FIEMAP_BUFFER_SIZE; - - if (!lsm_has_objects(lsm)) { - if (lsm && lsm_is_released(lsm) && (fm_key->fiemap.fm_start < - fm_key->oa.o_size)) { - /* - * released file, return a minimal FIEMAP if - * request fits in file-size. - */ - fiemap->fm_mapped_extents = 1; - fiemap->fm_extents[0].fe_logical = - fm_key->fiemap.fm_start; - if (fm_key->fiemap.fm_start + fm_key->fiemap.fm_length < - fm_key->oa.o_size) { - fiemap->fm_extents[0].fe_length = - fm_key->fiemap.fm_length; - } else { - fiemap->fm_extents[0].fe_length = - fm_key->oa.o_size - fm_key->fiemap.fm_start; - fiemap->fm_extents[0].fe_flags |= - (FIEMAP_EXTENT_UNKNOWN | - FIEMAP_EXTENT_LAST); - } - } - rc = 0; - goto out; - } - - if (fiemap_count_to_size(fm_key->fiemap.fm_extent_count) < buffer_size) - buffer_size = fiemap_count_to_size(fm_key->fiemap.fm_extent_count); - - fm_local = libcfs_kvzalloc(buffer_size, GFP_NOFS); - if (!fm_local) { - rc = -ENOMEM; - goto out; - } - lcl_fm_ext = &fm_local->fm_extents[0]; - - count_local = fiemap_size_to_count(buffer_size); - - memcpy(fiemap, &fm_key->fiemap, sizeof(*fiemap)); - fm_start = fiemap->fm_start; - fm_length = fiemap->fm_length; - /* Calculate start stripe, last stripe and length of mapping */ - start_stripe = lov_stripe_number(lsm, fm_start); - actual_start_stripe = start_stripe; - fm_end = (fm_length == ~0ULL ? fm_key->oa.o_size : - fm_start + fm_length - 1); - /* If fm_length != ~0ULL but fm_start+fm_length-1 exceeds file size */ - if (fm_end > fm_key->oa.o_size) - fm_end = fm_key->oa.o_size; - - last_stripe = fiemap_calc_last_stripe(lsm, fm_start, fm_end, - actual_start_stripe, - &stripe_count); - - fm_end_offset = fiemap_calc_fm_end_offset(fiemap, lsm, fm_start, - fm_end, &start_stripe); - if (fm_end_offset == -EINVAL) { - rc = -EINVAL; - goto out; - } - - if (fiemap_count_to_size(fiemap->fm_extent_count) > *vallen) - fiemap->fm_extent_count = fiemap_size_to_count(*vallen); - if (fiemap->fm_extent_count == 0) { - get_num_extents = 1; - count_local = 0; - } - /* Check each stripe */ - for (cur_stripe = start_stripe, i = 0; i < stripe_count; - i++, cur_stripe = (cur_stripe + 1) % lsm->lsm_stripe_count) { - u64 req_fm_len; /* Stores length of required mapping */ - u64 len_mapped_single_call; - u64 lun_start, lun_end, obd_object_end; - unsigned int ext_count; - - cur_stripe_wrap = cur_stripe; - - /* Find out range of mapping on this stripe */ - if ((lov_stripe_intersects(lsm, cur_stripe, fm_start, fm_end, - &lun_start, &obd_object_end)) == 0) - continue; - - if (lov_oinfo_is_dummy(lsm->lsm_oinfo[cur_stripe])) { - rc = -EIO; - goto out; - } - - /* If this is a continuation FIEMAP call and we are on - * starting stripe then lun_start needs to be set to - * fm_end_offset - */ - if (fm_end_offset != 0 && cur_stripe == start_stripe) - lun_start = fm_end_offset; - - if (fm_length != ~0ULL) { - /* Handle fm_start + fm_length overflow */ - if (fm_start + fm_length < fm_start) - fm_length = ~0ULL - fm_start; - lun_end = lov_size_to_stripe(lsm, fm_start + fm_length, - cur_stripe); - } else { - lun_end = ~0ULL; - } - - if (lun_start == lun_end) - continue; - - req_fm_len = obd_object_end - lun_start; - fm_local->fm_length = 0; - len_mapped_single_call = 0; - - /* If the output buffer is very large and the objects have many - * extents we may need to loop on a single OST repeatedly - */ - ost_eof = 0; - ost_done = 0; - do { - if (get_num_extents == 0) { - /* Don't get too many extents. */ - if (current_extent + count_local > - fiemap->fm_extent_count) - count_local = fiemap->fm_extent_count - - current_extent; - } - - lun_start += len_mapped_single_call; - fm_local->fm_length = req_fm_len - len_mapped_single_call; - req_fm_len = fm_local->fm_length; - fm_local->fm_extent_count = enough ? 1 : count_local; - fm_local->fm_mapped_extents = 0; - fm_local->fm_flags = fiemap->fm_flags; - - fm_key->oa.o_oi = lsm->lsm_oinfo[cur_stripe]->loi_oi; - ost_index = lsm->lsm_oinfo[cur_stripe]->loi_ost_idx; - - if (ost_index < 0 || - ost_index >= lov->desc.ld_tgt_count) { - rc = -EINVAL; - goto out; - } - - /* If OST is inactive, return extent with UNKNOWN flag */ - if (!lov->lov_tgts[ost_index]->ltd_active) { - fm_local->fm_flags |= FIEMAP_EXTENT_LAST; - fm_local->fm_mapped_extents = 1; - - lcl_fm_ext[0].fe_logical = lun_start; - lcl_fm_ext[0].fe_length = obd_object_end - - lun_start; - lcl_fm_ext[0].fe_flags |= FIEMAP_EXTENT_UNKNOWN; - - goto inactive_tgt; - } - - fm_local->fm_start = lun_start; - fm_local->fm_flags &= ~FIEMAP_FLAG_DEVICE_ORDER; - memcpy(&fm_key->fiemap, fm_local, sizeof(*fm_local)); - *vallen = fiemap_count_to_size(fm_local->fm_extent_count); - rc = obd_get_info(NULL, - lov->lov_tgts[ost_index]->ltd_exp, - keylen, key, vallen, fm_local, lsm); - if (rc != 0) - goto out; - -inactive_tgt: - ext_count = fm_local->fm_mapped_extents; - if (ext_count == 0) { - ost_done = 1; - /* If last stripe has hole at the end, - * then we need to return - */ - if (cur_stripe_wrap == last_stripe) { - fiemap->fm_mapped_extents = 0; - goto finish; - } - break; - } else if (enough) { - /* - * We've collected enough extents and there are - * more extents after it. - */ - goto finish; - } - - /* If we just need num of extents then go to next device */ - if (get_num_extents) { - current_extent += ext_count; - break; - } - - len_mapped_single_call = - lcl_fm_ext[ext_count - 1].fe_logical - - lun_start + lcl_fm_ext[ext_count - 1].fe_length; - - /* Have we finished mapping on this device? */ - if (req_fm_len <= len_mapped_single_call) - ost_done = 1; - - /* Clear the EXTENT_LAST flag which can be present on - * last extent - */ - if (lcl_fm_ext[ext_count - 1].fe_flags & - FIEMAP_EXTENT_LAST) - lcl_fm_ext[ext_count - 1].fe_flags &= - ~FIEMAP_EXTENT_LAST; - - curr_loc = lov_stripe_size(lsm, - lcl_fm_ext[ext_count - 1].fe_logical + - lcl_fm_ext[ext_count - 1].fe_length, - cur_stripe); - if (curr_loc >= fm_key->oa.o_size) - ost_eof = 1; - - fiemap_prepare_and_copy_exts(fiemap, lcl_fm_ext, - ost_index, ext_count, - current_extent); - - current_extent += ext_count; - - /* Ran out of available extents? */ - if (current_extent >= fiemap->fm_extent_count) - enough = true; - } while (ost_done == 0 && ost_eof == 0); - - if (cur_stripe_wrap == last_stripe) - goto finish; - } - -finish: - /* Indicate that we are returning device offsets unless file just has - * single stripe - */ - if (lsm->lsm_stripe_count > 1) - fiemap->fm_flags |= FIEMAP_FLAG_DEVICE_ORDER; - - if (get_num_extents) - goto skip_last_device_calc; - - /* Check if we have reached the last stripe and whether mapping for that - * stripe is done. - */ - if (cur_stripe_wrap == last_stripe) { - if (ost_done || ost_eof) - fiemap->fm_extents[current_extent - 1].fe_flags |= - FIEMAP_EXTENT_LAST; - } - -skip_last_device_calc: - fiemap->fm_mapped_extents = current_extent; - -out: - kvfree(fm_local); - return rc; -} - static int lov_get_info(const struct lu_env *env, struct obd_export *exp, - __u32 keylen, void *key, __u32 *vallen, void *val, - struct lov_stripe_md *lsm) + __u32 keylen, void *key, __u32 *vallen, void *val) { struct obd_device *obddev = class_exp2obd(exp); struct lov_obd *lov = &obddev->u.lov; - int rc; + struct lov_desc *ld = &lov->desc; + int rc = 0; if (!vallen || !val) return -EFAULT; obd_getref(obddev); - if (KEY_IS(KEY_LOVDESC)) { - struct lov_desc *desc_ret = val; - *desc_ret = lov->desc; + if (KEY_IS(KEY_MAX_EASIZE)) { + u32 max_stripe_count = min_t(u32, ld->ld_active_tgt_count, + LOV_MAX_STRIPE_COUNT); - rc = 0; - goto out; - } else if (KEY_IS(KEY_FIEMAP)) { - rc = lov_fiemap(lov, keylen, key, vallen, val, lsm); - goto out; + *((u32 *)val) = lov_mds_md_size(max_stripe_count, LOV_MAGIC_V3); + } else if (KEY_IS(KEY_DEFAULT_EASIZE)) { + u32 def_stripe_count = min_t(u32, ld->ld_default_stripe_count, + LOV_MAX_STRIPE_COUNT); + + *((u32 *)val) = lov_mds_md_size(def_stripe_count, LOV_MAGIC_V3); } else if (KEY_IS(KEY_TGT_COUNT)) { *((int *)val) = lov->desc.ld_tgt_count; - rc = 0; - goto out; + } else { + rc = -EINVAL; } - rc = -EINVAL; - -out: obd_putref(obddev); return rc; } @@ -1926,12 +1327,8 @@ static int lov_quotactl(struct obd_device *obd, struct obd_export *exp, __u64 bhardlimit = 0; int i, rc = 0; - if (oqctl->qc_cmd != LUSTRE_Q_QUOTAON && - oqctl->qc_cmd != LUSTRE_Q_QUOTAOFF && - oqctl->qc_cmd != Q_GETOQUOTA && - oqctl->qc_cmd != Q_INITQUOTA && - oqctl->qc_cmd != LUSTRE_Q_SETQUOTA && - oqctl->qc_cmd != Q_FINVALIDATE) { + if (oqctl->qc_cmd != Q_GETOQUOTA && + oqctl->qc_cmd != LUSTRE_Q_SETQUOTA) { CERROR("bad quota opc %x for lov obd\n", oqctl->qc_cmd); return -EFAULT; } @@ -1978,63 +1375,15 @@ static int lov_quotactl(struct obd_device *obd, struct obd_export *exp, return rc; } -static int lov_quotacheck(struct obd_device *obd, struct obd_export *exp, - struct obd_quotactl *oqctl) -{ - struct lov_obd *lov = &obd->u.lov; - int i, rc = 0; - - obd_getref(obd); - - for (i = 0; i < lov->desc.ld_tgt_count; i++) { - if (!lov->lov_tgts[i]) - continue; - - /* Skip quota check on the administratively disabled OSTs. */ - if (!lov->lov_tgts[i]->ltd_activate) { - CWARN("lov idx %d was administratively disabled, skip quotacheck on it.\n", - i); - continue; - } - - if (!lov->lov_tgts[i]->ltd_active) { - CERROR("lov idx %d inactive\n", i); - rc = -EIO; - goto out; - } - } - - for (i = 0; i < lov->desc.ld_tgt_count; i++) { - int err; - - if (!lov->lov_tgts[i] || !lov->lov_tgts[i]->ltd_activate) - continue; - - err = obd_quotacheck(lov->lov_tgts[i]->ltd_exp, oqctl); - if (err && !rc) - rc = err; - } - -out: - obd_putref(obd); - - return rc; -} - static struct obd_ops lov_obd_ops = { .owner = THIS_MODULE, .setup = lov_setup, - .precleanup = lov_precleanup, .cleanup = lov_cleanup, /*.process_config = lov_process_config,*/ .connect = lov_connect, .disconnect = lov_disconnect, .statfs = lov_statfs, .statfs_async = lov_statfs_async, - .packmd = lov_packmd, - .unpackmd = lov_unpackmd, - .getattr_async = lov_getattr_async, - .setattr_async = lov_setattr_async, .iocontrol = lov_iocontrol, .get_info = lov_get_info, .set_info_async = lov_set_info_async, @@ -2046,7 +1395,6 @@ static struct obd_ops lov_obd_ops = { .getref = lov_getref, .putref = lov_putref, .quotactl = lov_quotactl, - .quotacheck = lov_quotacheck, }; struct kmem_cache *lov_oinfo_slab; diff --git a/drivers/staging/lustre/lustre/lov/lov_object.c b/drivers/staging/lustre/lustre/lov/lov_object.c index 52f736338887..76d4256fa828 100644 --- a/drivers/staging/lustre/lustre/lov/lov_object.c +++ b/drivers/staging/lustre/lustre/lov/lov_object.c @@ -39,6 +39,11 @@ #include "lov_cl_internal.h" +static inline struct lov_device *lov_object_dev(struct lov_object *obj) +{ + return lu2lov_dev(obj->lo_cl.co_lu.lo_dev); +} + /** \addtogroup lov * @{ */ @@ -51,7 +56,7 @@ struct lov_layout_operations { int (*llo_init)(const struct lu_env *env, struct lov_device *dev, - struct lov_object *lov, + struct lov_object *lov, struct lov_stripe_md *lsm, const struct cl_object_conf *conf, union lov_layout_state *state); int (*llo_delete)(const struct lu_env *env, struct lov_object *lov, @@ -75,12 +80,11 @@ struct lov_layout_operations { static int lov_layout_wait(const struct lu_env *env, struct lov_object *lov); -void lov_lsm_put(struct cl_object *unused, struct lov_stripe_md *lsm) +static void lov_lsm_put(struct lov_stripe_md *lsm) { if (lsm) lov_free_memmd(&lsm); } -EXPORT_SYMBOL(lov_lsm_put); /***************************************************************************** * @@ -97,17 +101,17 @@ static void lov_install_empty(const struct lu_env *env, */ } -static int lov_init_empty(const struct lu_env *env, - struct lov_device *dev, struct lov_object *lov, +static int lov_init_empty(const struct lu_env *env, struct lov_device *dev, + struct lov_object *lov, struct lov_stripe_md *lsm, const struct cl_object_conf *conf, - union lov_layout_state *state) + union lov_layout_state *state) { return 0; } static void lov_install_raid0(const struct lu_env *env, struct lov_object *lov, - union lov_layout_state *state) + union lov_layout_state *state) { } @@ -212,8 +216,8 @@ static int lov_page_slice_fixup(struct lov_object *lov, return cl_object_header(stripe)->coh_page_bufsize; } -static int lov_init_raid0(const struct lu_env *env, - struct lov_device *dev, struct lov_object *lov, +static int lov_init_raid0(const struct lu_env *env, struct lov_device *dev, + struct lov_object *lov, struct lov_stripe_md *lsm, const struct cl_object_conf *conf, union lov_layout_state *state) { @@ -223,7 +227,6 @@ static int lov_init_raid0(const struct lu_env *env, struct cl_object *stripe; struct lov_thread_info *lti = lov_env_info(env); struct cl_object_conf *subconf = <i->lti_stripe_conf; - struct lov_stripe_md *lsm = conf->u.coc_md->lsm; struct lu_fid *ofid = <i->lti_fid; struct lov_layout_raid0 *r0 = &state->raid0; @@ -298,13 +301,11 @@ out: return result; } -static int lov_init_released(const struct lu_env *env, - struct lov_device *dev, struct lov_object *lov, +static int lov_init_released(const struct lu_env *env, struct lov_device *dev, + struct lov_object *lov, struct lov_stripe_md *lsm, const struct cl_object_conf *conf, union lov_layout_state *state) { - struct lov_stripe_md *lsm = conf->u.coc_md->lsm; - LASSERT(lsm); LASSERT(lsm_is_released(lsm)); LASSERT(!lov->lo_lsm); @@ -313,6 +314,40 @@ static int lov_init_released(const struct lu_env *env, return 0; } +static struct cl_object *lov_find_subobj(const struct lu_env *env, + struct lov_object *lov, + struct lov_stripe_md *lsm, + int stripe_idx) +{ + struct lov_device *dev = lu2lov_dev(lov2lu(lov)->lo_dev); + struct lov_oinfo *oinfo = lsm->lsm_oinfo[stripe_idx]; + struct lov_thread_info *lti = lov_env_info(env); + struct lu_fid *ofid = <i->lti_fid; + struct cl_device *subdev; + struct cl_object *result; + int ost_idx; + int rc; + + if (lov->lo_type != LLT_RAID0) { + result = NULL; + goto out; + } + + ost_idx = oinfo->loi_ost_idx; + rc = ostid_to_fid(ofid, &oinfo->loi_oi, ost_idx); + if (rc) { + result = NULL; + goto out; + } + + subdev = lovsub2cl_dev(dev->ld_target[ost_idx]); + result = lov_sub_find(env, subdev, ofid, NULL); +out: + if (!result) + result = ERR_PTR(-EINVAL); + return result; +} + static int lov_delete_empty(const struct lu_env *env, struct lov_object *lov, union lov_layout_state *state) { @@ -687,31 +722,24 @@ static int lov_layout_wait(const struct lu_env *env, struct lov_object *lov) } static int lov_layout_change(const struct lu_env *unused, - struct lov_object *lov, + struct lov_object *lov, struct lov_stripe_md *lsm, const struct cl_object_conf *conf) { - int result; - enum lov_layout_type llt = LLT_EMPTY; + enum lov_layout_type llt = lov_type(lsm); union lov_layout_state *state = &lov->u; const struct lov_layout_operations *old_ops; const struct lov_layout_operations *new_ops; - - void *cookie; struct lu_env *env; int refcheck; + int rc; LASSERT(0 <= lov->lo_type && lov->lo_type < ARRAY_SIZE(lov_dispatch)); - if (conf->u.coc_md) - llt = lov_type(conf->u.coc_md->lsm); - LASSERT(0 <= llt && llt < ARRAY_SIZE(lov_dispatch)); - - cookie = cl_env_reenter(); env = cl_env_get(&refcheck); - if (IS_ERR(env)) { - cl_env_reexit(cookie); + if (IS_ERR(env)) return PTR_ERR(env); - } + + LASSERT(0 <= llt && llt < ARRAY_SIZE(lov_dispatch)); CDEBUG(D_INODE, DFID" from %s to %s\n", PFID(lu_object_fid(lov2lu(lov))), @@ -720,38 +748,37 @@ static int lov_layout_change(const struct lu_env *unused, old_ops = &lov_dispatch[lov->lo_type]; new_ops = &lov_dispatch[llt]; - result = cl_object_prune(env, &lov->lo_cl); - if (result != 0) + rc = cl_object_prune(env, &lov->lo_cl); + if (rc) + goto out; + + rc = old_ops->llo_delete(env, lov, &lov->u); + if (rc) goto out; - result = old_ops->llo_delete(env, lov, &lov->u); - if (result == 0) { - old_ops->llo_fini(env, lov, &lov->u); + old_ops->llo_fini(env, lov, &lov->u); - LASSERT(atomic_read(&lov->lo_active_ios) == 0); + LASSERT(!atomic_read(&lov->lo_active_ios)); - lov->lo_type = LLT_EMPTY; - /* page bufsize fixup */ - cl_object_header(&lov->lo_cl)->coh_page_bufsize -= + lov->lo_type = LLT_EMPTY; + + /* page bufsize fixup */ + cl_object_header(&lov->lo_cl)->coh_page_bufsize -= lov_page_slice_fixup(lov, NULL); - result = new_ops->llo_init(env, - lu2lov_dev(lov->lo_cl.co_lu.lo_dev), - lov, conf, state); - if (result == 0) { - new_ops->llo_install(env, lov, state); - lov->lo_type = llt; - } else { - new_ops->llo_delete(env, lov, state); - new_ops->llo_fini(env, lov, state); - /* this file becomes an EMPTY file. */ - } + rc = new_ops->llo_init(env, lov_object_dev(lov), lov, lsm, conf, state); + if (rc) { + new_ops->llo_delete(env, lov, state); + new_ops->llo_fini(env, lov, state); + /* this file becomes an EMPTY file. */ + goto out; } + new_ops->llo_install(env, lov, state); + lov->lo_type = llt; out: cl_env_put(env, &refcheck); - cl_env_reexit(cookie); - return result; + return rc; } /***************************************************************************** @@ -762,26 +789,38 @@ out: int lov_object_init(const struct lu_env *env, struct lu_object *obj, const struct lu_object_conf *conf) { - struct lov_device *dev = lu2lov_dev(obj->lo_dev); struct lov_object *lov = lu2lov(obj); + struct lov_device *dev = lov_object_dev(lov); const struct cl_object_conf *cconf = lu2cl_conf(conf); union lov_layout_state *set = &lov->u; const struct lov_layout_operations *ops; - int result; + struct lov_stripe_md *lsm = NULL; + int rc; init_rwsem(&lov->lo_type_guard); atomic_set(&lov->lo_active_ios, 0); init_waitqueue_head(&lov->lo_waitq); - cl_object_page_init(lu2cl(obj), sizeof(struct lov_page)); + lov->lo_type = LLT_EMPTY; + if (cconf->u.coc_layout.lb_buf) { + lsm = lov_unpackmd(dev->ld_lov, + cconf->u.coc_layout.lb_buf, + cconf->u.coc_layout.lb_len); + if (IS_ERR(lsm)) + return PTR_ERR(lsm); + } + /* no locking is necessary, as object is being created */ - lov->lo_type = lov_type(cconf->u.coc_md->lsm); + lov->lo_type = lov_type(lsm); ops = &lov_dispatch[lov->lo_type]; - result = ops->llo_init(env, dev, lov, cconf, set); - if (result == 0) + rc = ops->llo_init(env, dev, lov, lsm, cconf, set); + if (!rc) ops->llo_install(env, lov, set); - return result; + + lov_lsm_put(lsm); + + return rc; } static int lov_conf_set(const struct lu_env *env, struct cl_object *obj, @@ -791,6 +830,15 @@ static int lov_conf_set(const struct lu_env *env, struct cl_object *obj, struct lov_object *lov = cl2lov(obj); int result = 0; + if (conf->coc_opc == OBJECT_CONF_SET && + conf->u.coc_layout.lb_buf) { + lsm = lov_unpackmd(lov_object_dev(lov)->ld_lov, + conf->u.coc_layout.lb_buf, + conf->u.coc_layout.lb_len); + if (IS_ERR(lsm)) + return PTR_ERR(lsm); + } + lov_conf_lock(lov); if (conf->coc_opc == OBJECT_CONF_INVALIDATE) { lov->lo_layout_invalid = true; @@ -810,8 +858,6 @@ static int lov_conf_set(const struct lu_env *env, struct cl_object *obj, LASSERT(conf->coc_opc == OBJECT_CONF_SET); - if (conf->u.coc_md) - lsm = conf->u.coc_md->lsm; if ((!lsm && !lov->lo_lsm) || ((lsm && lov->lo_lsm) && (lov->lo_lsm->lsm_layout_gen == lsm->lsm_layout_gen) && @@ -829,11 +875,12 @@ static int lov_conf_set(const struct lu_env *env, struct cl_object *obj, goto out; } - result = lov_layout_change(env, lov, conf); + result = lov_layout_change(env, lov, lsm, conf); lov->lo_layout_invalid = result != 0; out: lov_conf_unlock(lov); + lov_lsm_put(lsm); CDEBUG(D_INODE, DFID" lo_layout_invalid=%d\n", PFID(lu_object_fid(lov2lu(lov))), lov->lo_layout_invalid); return result; @@ -911,6 +958,473 @@ int lov_lock_init(const struct lu_env *env, struct cl_object *obj, io); } +/** + * We calculate on which OST the mapping will end. If the length of mapping + * is greater than (stripe_size * stripe_count) then the last_stripe will + * will be one just before start_stripe. Else we check if the mapping + * intersects each OST and find last_stripe. + * This function returns the last_stripe and also sets the stripe_count + * over which the mapping is spread + * + * \param lsm [in] striping information for the file + * \param fm_start [in] logical start of mapping + * \param fm_end [in] logical end of mapping + * \param start_stripe [in] starting stripe of the mapping + * \param stripe_count [out] the number of stripes across which to map is + * returned + * + * \retval last_stripe return the last stripe of the mapping + */ +static int fiemap_calc_last_stripe(struct lov_stripe_md *lsm, + loff_t fm_start, loff_t fm_end, + int start_stripe, int *stripe_count) +{ + int last_stripe; + loff_t obd_start; + loff_t obd_end; + int i, j; + + if (fm_end - fm_start > lsm->lsm_stripe_size * lsm->lsm_stripe_count) { + last_stripe = (start_stripe < 1 ? lsm->lsm_stripe_count - 1 : + start_stripe - 1); + *stripe_count = lsm->lsm_stripe_count; + } else { + for (j = 0, i = start_stripe; j < lsm->lsm_stripe_count; + i = (i + 1) % lsm->lsm_stripe_count, j++) { + if (!(lov_stripe_intersects(lsm, i, fm_start, fm_end, + &obd_start, &obd_end))) + break; + } + *stripe_count = j; + last_stripe = (start_stripe + j - 1) % lsm->lsm_stripe_count; + } + + return last_stripe; +} + +/** + * Set fe_device and copy extents from local buffer into main return buffer. + * + * \param fiemap [out] fiemap to hold all extents + * \param lcl_fm_ext [in] array of fiemap extents get from OSC layer + * \param ost_index [in] OST index to be written into the fm_device + * field for each extent + * \param ext_count [in] number of extents to be copied + * \param current_extent [in] where to start copying in the extent array + */ +static void fiemap_prepare_and_copy_exts(struct fiemap *fiemap, + struct fiemap_extent *lcl_fm_ext, + int ost_index, unsigned int ext_count, + int current_extent) +{ + unsigned int ext; + char *to; + + for (ext = 0; ext < ext_count; ext++) { + lcl_fm_ext[ext].fe_device = ost_index; + lcl_fm_ext[ext].fe_flags |= FIEMAP_EXTENT_NET; + } + + /* Copy fm_extent's from fm_local to return buffer */ + to = (char *)fiemap + fiemap_count_to_size(current_extent); + memcpy(to, lcl_fm_ext, ext_count * sizeof(struct fiemap_extent)); +} + +#define FIEMAP_BUFFER_SIZE 4096 + +/** + * Non-zero fe_logical indicates that this is a continuation FIEMAP + * call. The local end offset and the device are sent in the first + * fm_extent. This function calculates the stripe number from the index. + * This function returns a stripe_no on which mapping is to be restarted. + * + * This function returns fm_end_offset which is the in-OST offset at which + * mapping should be restarted. If fm_end_offset=0 is returned then caller + * will re-calculate proper offset in next stripe. + * Note that the first extent is passed to lov_get_info via the value field. + * + * \param fiemap [in] fiemap request header + * \param lsm [in] striping information for the file + * \param fm_start [in] logical start of mapping + * \param fm_end [in] logical end of mapping + * \param start_stripe [out] starting stripe will be returned in this + */ +static loff_t fiemap_calc_fm_end_offset(struct fiemap *fiemap, + struct lov_stripe_md *lsm, + loff_t fm_start, loff_t fm_end, + int *start_stripe) +{ + loff_t local_end = fiemap->fm_extents[0].fe_logical; + loff_t lun_start, lun_end; + loff_t fm_end_offset; + int stripe_no = -1; + int i; + + if (!fiemap->fm_extent_count || !fiemap->fm_extents[0].fe_logical) + return 0; + + /* Find out stripe_no from ost_index saved in the fe_device */ + for (i = 0; i < lsm->lsm_stripe_count; i++) { + struct lov_oinfo *oinfo = lsm->lsm_oinfo[i]; + + if (lov_oinfo_is_dummy(oinfo)) + continue; + + if (oinfo->loi_ost_idx == fiemap->fm_extents[0].fe_device) { + stripe_no = i; + break; + } + } + + if (stripe_no == -1) + return -EINVAL; + + /* + * If we have finished mapping on previous device, shift logical + * offset to start of next device + */ + if (lov_stripe_intersects(lsm, stripe_no, fm_start, fm_end, + &lun_start, &lun_end) && + local_end < lun_end) { + fm_end_offset = local_end; + *start_stripe = stripe_no; + } else { + /* This is a special value to indicate that caller should + * calculate offset in next stripe. + */ + fm_end_offset = 0; + *start_stripe = (stripe_no + 1) % lsm->lsm_stripe_count; + } + + return fm_end_offset; +} + +/** + * Break down the FIEMAP request and send appropriate calls to individual OSTs. + * This also handles the restarting of FIEMAP calls in case mapping overflows + * the available number of extents in single call. + * + * \param env [in] lustre environment + * \param obj [in] file object + * \param fmkey [in] fiemap request header and other info + * \param fiemap [out] fiemap buffer holding retrived map extents + * \param buflen [in/out] max buffer length of @fiemap, when iterate + * each OST, it is used to limit max map needed + * \retval 0 success + * \retval < 0 error + */ +static int lov_object_fiemap(const struct lu_env *env, struct cl_object *obj, + struct ll_fiemap_info_key *fmkey, + struct fiemap *fiemap, size_t *buflen) +{ + struct lov_obd *lov = lu2lov_dev(obj->co_lu.lo_dev)->ld_lov; + unsigned int buffer_size = FIEMAP_BUFFER_SIZE; + struct fiemap_extent *lcl_fm_ext; + struct cl_object *subobj = NULL; + struct fiemap *fm_local = NULL; + struct lov_stripe_md *lsm; + loff_t fm_start; + loff_t fm_end; + loff_t fm_length; + loff_t fm_end_offset; + int count_local; + int ost_index = 0; + int start_stripe; + int current_extent = 0; + int rc = 0; + int last_stripe; + int cur_stripe = 0; + int cur_stripe_wrap = 0; + int stripe_count; + /* Whether have we collected enough extents */ + bool enough = false; + /* EOF for object */ + bool ost_eof = false; + /* done with required mapping for this OST? */ + bool ost_done = false; + + lsm = lov_lsm_addref(cl2lov(obj)); + if (!lsm) + return -ENODATA; + + /** + * If the stripe_count > 1 and the application does not understand + * DEVICE_ORDER flag, it cannot interpret the extents correctly. + */ + if (lsm->lsm_stripe_count > 1 && + !(fiemap->fm_flags & FIEMAP_FLAG_DEVICE_ORDER)) { + rc = -ENOTSUPP; + goto out; + } + + if (lsm_is_released(lsm)) { + if (fiemap->fm_start < fmkey->lfik_oa.o_size) { + /** + * released file, return a minimal FIEMAP if + * request fits in file-size. + */ + fiemap->fm_mapped_extents = 1; + fiemap->fm_extents[0].fe_logical = fiemap->fm_start; + if (fiemap->fm_start + fiemap->fm_length < + fmkey->lfik_oa.o_size) + fiemap->fm_extents[0].fe_length = + fiemap->fm_length; + else + fiemap->fm_extents[0].fe_length = + fmkey->lfik_oa.o_size - + fiemap->fm_start; + fiemap->fm_extents[0].fe_flags |= + FIEMAP_EXTENT_UNKNOWN | FIEMAP_EXTENT_LAST; + } + rc = 0; + goto out; + } + + if (fiemap_count_to_size(fiemap->fm_extent_count) < buffer_size) + buffer_size = fiemap_count_to_size(fiemap->fm_extent_count); + + fm_local = libcfs_kvzalloc(buffer_size, GFP_NOFS); + if (!fm_local) { + rc = -ENOMEM; + goto out; + } + lcl_fm_ext = &fm_local->fm_extents[0]; + count_local = fiemap_size_to_count(buffer_size); + + fm_start = fiemap->fm_start; + fm_length = fiemap->fm_length; + /* Calculate start stripe, last stripe and length of mapping */ + start_stripe = lov_stripe_number(lsm, fm_start); + fm_end = (fm_length == ~0ULL) ? fmkey->lfik_oa.o_size : + fm_start + fm_length - 1; + /* If fm_length != ~0ULL but fm_start_fm_length-1 exceeds file size */ + if (fm_end > fmkey->lfik_oa.o_size) + fm_end = fmkey->lfik_oa.o_size; + + last_stripe = fiemap_calc_last_stripe(lsm, fm_start, fm_end, + start_stripe, &stripe_count); + fm_end_offset = fiemap_calc_fm_end_offset(fiemap, lsm, fm_start, fm_end, + &start_stripe); + if (fm_end_offset == -EINVAL) { + rc = -EINVAL; + goto out; + } + + /** + * Requested extent count exceeds the fiemap buffer size, shrink our + * ambition. + */ + if (fiemap_count_to_size(fiemap->fm_extent_count) > *buflen) + fiemap->fm_extent_count = fiemap_size_to_count(*buflen); + if (!fiemap->fm_extent_count) + count_local = 0; + + /* Check each stripe */ + for (cur_stripe = start_stripe; stripe_count > 0; + --stripe_count, + cur_stripe = (cur_stripe + 1) % lsm->lsm_stripe_count) { + loff_t req_fm_len; /* Stores length of required mapping */ + loff_t len_mapped_single_call; + loff_t lun_start; + loff_t lun_end; + loff_t obd_object_end; + unsigned int ext_count; + + cur_stripe_wrap = cur_stripe; + + /* Find out range of mapping on this stripe */ + if (!(lov_stripe_intersects(lsm, cur_stripe, fm_start, fm_end, + &lun_start, &obd_object_end))) + continue; + + if (lov_oinfo_is_dummy(lsm->lsm_oinfo[cur_stripe])) { + rc = -EIO; + goto out; + } + + /* + * If this is a continuation FIEMAP call and we are on + * starting stripe then lun_start needs to be set to + * fm_end_offset + */ + if (fm_end_offset && cur_stripe == start_stripe) + lun_start = fm_end_offset; + + if (fm_length != ~0ULL) { + /* Handle fm_start + fm_length overflow */ + if (fm_start + fm_length < fm_start) + fm_length = ~0ULL - fm_start; + lun_end = lov_size_to_stripe(lsm, fm_start + fm_length, + cur_stripe); + } else { + lun_end = ~0ULL; + } + + if (lun_start == lun_end) + continue; + + req_fm_len = obd_object_end - lun_start; + fm_local->fm_length = 0; + len_mapped_single_call = 0; + + /* find lobsub object */ + subobj = lov_find_subobj(env, cl2lov(obj), lsm, + cur_stripe); + if (IS_ERR(subobj)) { + rc = PTR_ERR(subobj); + goto out; + } + /* + * If the output buffer is very large and the objects have many + * extents we may need to loop on a single OST repeatedly + */ + ost_eof = false; + ost_done = false; + do { + if (fiemap->fm_extent_count > 0) { + /* Don't get too many extents. */ + if (current_extent + count_local > + fiemap->fm_extent_count) + count_local = fiemap->fm_extent_count - + current_extent; + } + + lun_start += len_mapped_single_call; + fm_local->fm_length = req_fm_len - + len_mapped_single_call; + req_fm_len = fm_local->fm_length; + fm_local->fm_extent_count = enough ? 1 : count_local; + fm_local->fm_mapped_extents = 0; + fm_local->fm_flags = fiemap->fm_flags; + + ost_index = lsm->lsm_oinfo[cur_stripe]->loi_ost_idx; + + if (ost_index < 0 || + ost_index >= lov->desc.ld_tgt_count) { + rc = -EINVAL; + goto obj_put; + } + /* + * If OST is inactive, return extent with UNKNOWN + * flag. + */ + if (!lov->lov_tgts[ost_index]->ltd_active) { + fm_local->fm_flags |= FIEMAP_EXTENT_LAST; + fm_local->fm_mapped_extents = 1; + + lcl_fm_ext[0].fe_logical = lun_start; + lcl_fm_ext[0].fe_length = obd_object_end - + lun_start; + lcl_fm_ext[0].fe_flags |= FIEMAP_EXTENT_UNKNOWN; + + goto inactive_tgt; + } + + fm_local->fm_start = lun_start; + fm_local->fm_flags &= ~FIEMAP_FLAG_DEVICE_ORDER; + memcpy(&fmkey->lfik_fiemap, fm_local, sizeof(*fm_local)); + *buflen = fiemap_count_to_size(fm_local->fm_extent_count); + + rc = cl_object_fiemap(env, subobj, fmkey, fm_local, + buflen); + if (rc) + goto obj_put; +inactive_tgt: + ext_count = fm_local->fm_mapped_extents; + if (!ext_count) { + ost_done = true; + /* + * If last stripe has hold at the end, + * we need to return + */ + if (cur_stripe_wrap == last_stripe) { + fiemap->fm_mapped_extents = 0; + goto finish; + } + break; + } else if (enough) { + /* + * We've collected enough extents and there are + * more extents after it. + */ + goto finish; + } + + /* If we just need num of extents, got to next device */ + if (!fiemap->fm_extent_count) { + current_extent += ext_count; + break; + } + + /* prepare to copy retrived map extents */ + len_mapped_single_call = + lcl_fm_ext[ext_count - 1].fe_logical - + lun_start + lcl_fm_ext[ext_count - 1].fe_length; + + /* Have we finished mapping on this device? */ + if (req_fm_len <= len_mapped_single_call) + ost_done = true; + + /* + * Clear the EXTENT_LAST flag which can be present on + * the last extent + */ + if (lcl_fm_ext[ext_count - 1].fe_flags & + FIEMAP_EXTENT_LAST) + lcl_fm_ext[ext_count - 1].fe_flags &= + ~FIEMAP_EXTENT_LAST; + + if (lov_stripe_size(lsm, + lcl_fm_ext[ext_count - 1].fe_logical + + lcl_fm_ext[ext_count - 1].fe_length, + cur_stripe) >= fmkey->lfik_oa.o_size) + ost_eof = true; + + fiemap_prepare_and_copy_exts(fiemap, lcl_fm_ext, + ost_index, ext_count, + current_extent); + current_extent += ext_count; + + /* Ran out of available extents? */ + if (current_extent >= fiemap->fm_extent_count) + enough = true; + } while (!ost_done && !ost_eof); + + cl_object_put(env, subobj); + subobj = NULL; + + if (cur_stripe_wrap == last_stripe) + goto finish; + } /* for each stripe */ +finish: + /* + * Indicate that we are returning device offsets unless file just has + * single stripe + */ + if (lsm->lsm_stripe_count > 1) + fiemap->fm_flags |= FIEMAP_FLAG_DEVICE_ORDER; + + if (!fiemap->fm_extent_count) + goto skip_last_device_calc; + + /* + * Check if we have reached the last stripe and whether mapping for that + * stripe is done. + */ + if ((cur_stripe_wrap == last_stripe) && (ost_done || ost_eof)) + fiemap->fm_extents[current_extent - 1].fe_flags |= + FIEMAP_EXTENT_LAST; +skip_last_device_calc: + fiemap->fm_mapped_extents = current_extent; +obj_put: + if (subobj) + cl_object_put(env, subobj); +out: + kvfree(fm_local); + lov_lsm_put(lsm); + return rc; +} + static int lov_object_getstripe(const struct lu_env *env, struct cl_object *obj, struct lov_user_md __user *lum) { @@ -923,10 +1437,53 @@ static int lov_object_getstripe(const struct lu_env *env, struct cl_object *obj, return -ENODATA; rc = lov_getstripe(cl2lov(obj), lsm, lum); - lov_lsm_put(obj, lsm); + lov_lsm_put(lsm); return rc; } +static int lov_object_layout_get(const struct lu_env *env, + struct cl_object *obj, + struct cl_layout *cl) +{ + struct lov_object *lov = cl2lov(obj); + struct lov_stripe_md *lsm = lov_lsm_addref(lov); + struct lu_buf *buf = &cl->cl_buf; + ssize_t rc; + + if (!lsm) { + cl->cl_size = 0; + cl->cl_layout_gen = CL_LAYOUT_GEN_EMPTY; + cl->cl_is_released = false; + + return 0; + } + + cl->cl_size = lov_mds_md_size(lsm->lsm_stripe_count, lsm->lsm_magic); + cl->cl_layout_gen = lsm->lsm_layout_gen; + cl->cl_is_released = lsm_is_released(lsm); + + rc = lov_lsm_pack(lsm, buf->lb_buf, buf->lb_len); + lov_lsm_put(lsm); + + return rc < 0 ? rc : 0; +} + +static loff_t lov_object_maxbytes(struct cl_object *obj) +{ + struct lov_object *lov = cl2lov(obj); + struct lov_stripe_md *lsm = lov_lsm_addref(lov); + loff_t maxbytes; + + if (!lsm) + return LLONG_MAX; + + maxbytes = lsm->lsm_maxbytes; + + lov_lsm_put(lsm); + + return maxbytes; +} + static const struct cl_object_operations lov_ops = { .coo_page_init = lov_page_init, .coo_lock_init = lov_lock_init, @@ -934,7 +1491,10 @@ static const struct cl_object_operations lov_ops = { .coo_attr_get = lov_attr_get, .coo_attr_update = lov_attr_update, .coo_conf_set = lov_conf_set, - .coo_getstripe = lov_object_getstripe + .coo_getstripe = lov_object_getstripe, + .coo_layout_get = lov_object_layout_get, + .coo_maxbytes = lov_object_maxbytes, + .coo_fiemap = lov_object_fiemap, }; static const struct lu_object_operations lov_lu_obj_ops = { @@ -986,22 +1546,6 @@ struct lov_stripe_md *lov_lsm_addref(struct lov_object *lov) return lsm; } -struct lov_stripe_md *lov_lsm_get(struct cl_object *clobj) -{ - struct lu_object *luobj; - struct lov_stripe_md *lsm = NULL; - - if (!clobj) - return NULL; - - luobj = lu_object_locate(&cl_object_header(clobj)->coh_lu, - &lov_device_type); - if (luobj) - lsm = lov_lsm_addref(lu2lov(luobj)); - return lsm; -} -EXPORT_SYMBOL(lov_lsm_get); - int lov_read_and_clear_async_rc(struct cl_object *clob) { struct lu_object *luobj; diff --git a/drivers/staging/lustre/lustre/lov/lov_pack.c b/drivers/staging/lustre/lustre/lov/lov_pack.c index be6e9857ce2a..6c93d180aef7 100644 --- a/drivers/staging/lustre/lustre/lov/lov_pack.c +++ b/drivers/staging/lustre/lustre/lov/lov_pack.c @@ -38,14 +38,17 @@ #define DEBUG_SUBSYSTEM S_LOV +#include "../include/lustre/lustre_idl.h" +#include "../include/lustre/lustre_user.h" + #include "../include/lustre_net.h" +#include "../include/lustre_swab.h" #include "../include/obd.h" #include "../include/obd_class.h" #include "../include/obd_support.h" -#include "../include/lustre/lustre_user.h" -#include "lov_internal.h" #include "lov_cl_internal.h" +#include "lov_internal.h" void lov_dump_lmm_common(int level, void *lmmp) { @@ -97,120 +100,54 @@ void lov_dump_lmm_v3(int level, struct lov_mds_md_v3 *lmm) le16_to_cpu(lmm->lmm_stripe_count)); } -/* Pack LOV object metadata for disk storage. It is packed in LE byte - * order and is opaque to the networking layer. +/** + * Pack LOV striping metadata for disk storage format (in little + * endian byte order). * - * XXX In the future, this will be enhanced to get the EA size from the - * underlying OSC device(s) to get their EA sizes so we can stack - * LOVs properly. For now lov_mds_md_size() just assumes one u64 - * per stripe. + * This follows the getxattr() conventions. If \a buf_size is zero + * then return the size needed. If \a buf_size is too small then + * return -ERANGE. Otherwise return the size of the result. */ -int lov_obd_packmd(struct lov_obd *lov, struct lov_mds_md **lmmp, - struct lov_stripe_md *lsm) +ssize_t lov_lsm_pack(const struct lov_stripe_md *lsm, void *buf, + size_t buf_size) { - struct lov_mds_md_v1 *lmmv1; - struct lov_mds_md_v3 *lmmv3; - __u16 stripe_count; struct lov_ost_data_v1 *lmm_objects; - int lmm_size, lmm_magic; - int i; - int cplen = 0; - - if (lsm) { - lmm_magic = lsm->lsm_magic; - } else { - if (lmmp && *lmmp) - lmm_magic = le32_to_cpu((*lmmp)->lmm_magic); - else - /* lsm == NULL and lmmp == NULL */ - lmm_magic = LOV_MAGIC; - } - - if ((lmm_magic != LOV_MAGIC_V1) && - (lmm_magic != LOV_MAGIC_V3)) { - CERROR("bad mem LOV MAGIC: 0x%08X != 0x%08X nor 0x%08X\n", - lmm_magic, LOV_MAGIC_V1, LOV_MAGIC_V3); - return -EINVAL; - } - - if (lsm) { - /* If we are just sizing the EA, limit the stripe count - * to the actual number of OSTs in this filesystem. - */ - if (!lmmp) { - stripe_count = lov_get_stripecnt(lov, lmm_magic, - lsm->lsm_stripe_count); - lsm->lsm_stripe_count = stripe_count; - } else if (!lsm_is_released(lsm)) { - stripe_count = lsm->lsm_stripe_count; - } else { - stripe_count = 0; - } - } else { - /* - * To calculate maximum easize by active targets at present, - * which is exactly the maximum easize to be seen by LOV - */ - stripe_count = lov->desc.ld_active_tgt_count; - } + struct lov_mds_md_v1 *lmmv1 = buf; + struct lov_mds_md_v3 *lmmv3 = buf; + size_t lmm_size; + unsigned int i; - /* XXX LOV STACKING call into osc for sizes */ - lmm_size = lov_mds_md_size(stripe_count, lmm_magic); - - if (!lmmp) + lmm_size = lov_mds_md_size(lsm->lsm_stripe_count, lsm->lsm_magic); + if (!buf_size) return lmm_size; - if (*lmmp && !lsm) { - stripe_count = le16_to_cpu((*lmmp)->lmm_stripe_count); - lmm_size = lov_mds_md_size(stripe_count, lmm_magic); - kvfree(*lmmp); - *lmmp = NULL; - return 0; - } - - if (!*lmmp) { - *lmmp = libcfs_kvzalloc(lmm_size, GFP_NOFS); - if (!*lmmp) - return -ENOMEM; - } - - CDEBUG(D_INFO, "lov_packmd: LOV_MAGIC 0x%08X, lmm_size = %d\n", - lmm_magic, lmm_size); - - lmmv1 = *lmmp; - lmmv3 = (struct lov_mds_md_v3 *)*lmmp; - if (lmm_magic == LOV_MAGIC_V3) - lmmv3->lmm_magic = cpu_to_le32(LOV_MAGIC_V3); - else - lmmv1->lmm_magic = cpu_to_le32(LOV_MAGIC_V1); - - if (!lsm) - return lmm_size; + if (buf_size < lmm_size) + return -ERANGE; - /* lmmv1 and lmmv3 point to the same struct and have the + /* + * lmmv1 and lmmv3 point to the same struct and have the * same first fields */ + lmmv1->lmm_magic = cpu_to_le32(lsm->lsm_magic); lmm_oi_cpu_to_le(&lmmv1->lmm_oi, &lsm->lsm_oi); lmmv1->lmm_stripe_size = cpu_to_le32(lsm->lsm_stripe_size); - lmmv1->lmm_stripe_count = cpu_to_le16(stripe_count); + lmmv1->lmm_stripe_count = cpu_to_le16(lsm->lsm_stripe_count); lmmv1->lmm_pattern = cpu_to_le32(lsm->lsm_pattern); lmmv1->lmm_layout_gen = cpu_to_le16(lsm->lsm_layout_gen); + if (lsm->lsm_magic == LOV_MAGIC_V3) { - cplen = strlcpy(lmmv3->lmm_pool_name, lsm->lsm_pool_name, - sizeof(lmmv3->lmm_pool_name)); - if (cplen >= sizeof(lmmv3->lmm_pool_name)) - return -E2BIG; + CLASSERT(sizeof(lsm->lsm_pool_name) == + sizeof(lmmv3->lmm_pool_name)); + strlcpy(lmmv3->lmm_pool_name, lsm->lsm_pool_name, + sizeof(lmmv3->lmm_pool_name)); lmm_objects = lmmv3->lmm_objects; } else { lmm_objects = lmmv1->lmm_objects; } - for (i = 0; i < stripe_count; i++) { + for (i = 0; i < lsm->lsm_stripe_count; i++) { struct lov_oinfo *loi = lsm->lsm_oinfo[i]; - /* XXX LOV STACKING call down to osc_packmd() to do packing */ - LASSERTF(ostid_id(&loi->loi_oi) != 0, "lmm_oi "DOSTID - " stripe %u/%u idx %u\n", POSTID(&lmmv1->lmm_oi), - i, stripe_count, loi->loi_ost_idx); + ostid_cpu_to_le(&loi->loi_oi, &lmm_objects[i].l_ost_oi); lmm_objects[i].l_ost_gen = cpu_to_le32(loi->loi_ost_gen); lmm_objects[i].l_ost_idx = cpu_to_le32(loi->loi_ost_idx); @@ -219,15 +156,6 @@ int lov_obd_packmd(struct lov_obd *lov, struct lov_mds_md **lmmp, return lmm_size; } -int lov_packmd(struct obd_export *exp, struct lov_mds_md **lmmp, - struct lov_stripe_md *lsm) -{ - struct obd_device *obd = class_exp2obd(exp); - struct lov_obd *lov = &obd->u.lov; - - return lov_obd_packmd(lov, lmmp, lsm); -} - /* Find the max stripecount we should use */ __u16 lov_get_stripecnt(struct lov_obd *lov, __u32 magic, __u16 stripe_count) { @@ -270,34 +198,34 @@ static int lov_verify_lmm(void *lmm, int lmm_bytes, __u16 *stripe_count) return rc; } -int lov_alloc_memmd(struct lov_stripe_md **lsmp, __u16 stripe_count, - int pattern, int magic) +struct lov_stripe_md *lov_lsm_alloc(u16 stripe_count, u32 pattern, u32 magic) { - int i, lsm_size; + struct lov_stripe_md *lsm; + unsigned int i; - CDEBUG(D_INFO, "alloc lsm, stripe_count %d\n", stripe_count); + CDEBUG(D_INFO, "alloc lsm, stripe_count %u\n", stripe_count); - *lsmp = lsm_alloc_plain(stripe_count, &lsm_size); - if (!*lsmp) { - CERROR("can't allocate lsmp stripe_count %d\n", stripe_count); - return -ENOMEM; + lsm = lsm_alloc_plain(stripe_count); + if (!lsm) { + CERROR("cannot allocate LSM stripe_count %u\n", stripe_count); + return ERR_PTR(-ENOMEM); } - atomic_set(&(*lsmp)->lsm_refc, 1); - spin_lock_init(&(*lsmp)->lsm_lock); - (*lsmp)->lsm_magic = magic; - (*lsmp)->lsm_stripe_count = stripe_count; - (*lsmp)->lsm_maxbytes = LUSTRE_EXT3_STRIPE_MAXBYTES * stripe_count; - (*lsmp)->lsm_pattern = pattern; - (*lsmp)->lsm_pool_name[0] = '\0'; - (*lsmp)->lsm_layout_gen = 0; + atomic_set(&lsm->lsm_refc, 1); + spin_lock_init(&lsm->lsm_lock); + lsm->lsm_magic = magic; + lsm->lsm_stripe_count = stripe_count; + lsm->lsm_maxbytes = LUSTRE_EXT3_STRIPE_MAXBYTES * stripe_count; + lsm->lsm_pattern = pattern; + lsm->lsm_pool_name[0] = '\0'; + lsm->lsm_layout_gen = 0; if (stripe_count > 0) - (*lsmp)->lsm_oinfo[0]->loi_ost_idx = ~0; + lsm->lsm_oinfo[0]->loi_ost_idx = ~0; for (i = 0; i < stripe_count; i++) - loi_init((*lsmp)->lsm_oinfo[i]); + loi_init(lsm->lsm_oinfo[i]); - return lsm_size; + return lsm; } int lov_free_memmd(struct lov_stripe_md **lsmp) @@ -317,56 +245,34 @@ int lov_free_memmd(struct lov_stripe_md **lsmp) /* Unpack LOV object metadata from disk storage. It is packed in LE byte * order and is opaque to the networking layer. */ -int lov_unpackmd(struct obd_export *exp, struct lov_stripe_md **lsmp, - struct lov_mds_md *lmm, int lmm_bytes) +struct lov_stripe_md *lov_unpackmd(struct lov_obd *lov, struct lov_mds_md *lmm, + size_t lmm_size) { - struct obd_device *obd = class_exp2obd(exp); - struct lov_obd *lov = &obd->u.lov; - int rc = 0, lsm_size; - __u16 stripe_count; - __u32 magic; - __u32 pattern; - - /* If passed an MDS struct use values from there, otherwise defaults */ - if (lmm) { - rc = lov_verify_lmm(lmm, lmm_bytes, &stripe_count); - if (rc) - return rc; - magic = le32_to_cpu(lmm->lmm_magic); - pattern = le32_to_cpu(lmm->lmm_pattern); - } else { - magic = LOV_MAGIC; - stripe_count = lov_get_stripecnt(lov, magic, 0); - pattern = LOV_PATTERN_RAID0; - } + struct lov_stripe_md *lsm; + u16 stripe_count; + u32 pattern; + u32 magic; + int rc; - /* If we aren't passed an lsmp struct, we just want the size */ - if (!lsmp) { - /* XXX LOV STACKING call into osc for sizes */ - LBUG(); - return lov_stripe_md_size(stripe_count); - } - /* If we are passed an allocated struct but nothing to unpack, free */ - if (*lsmp && !lmm) { - lov_free_memmd(lsmp); - return 0; - } + rc = lov_verify_lmm(lmm, lmm_size, &stripe_count); + if (rc) + return ERR_PTR(rc); - lsm_size = lov_alloc_memmd(lsmp, stripe_count, pattern, magic); - if (lsm_size < 0) - return lsm_size; + magic = le32_to_cpu(lmm->lmm_magic); + pattern = le32_to_cpu(lmm->lmm_pattern); - /* If we are passed a pointer but nothing to unpack, we only alloc */ - if (!lmm) - return lsm_size; + lsm = lov_lsm_alloc(stripe_count, pattern, magic); + if (IS_ERR(lsm)) + return lsm; - rc = lsm_op_find(magic)->lsm_unpackmd(lov, *lsmp, lmm); + LASSERT(lsm_op_find(magic)); + rc = lsm_op_find(magic)->lsm_unpackmd(lov, lsm, lmm); if (rc) { - lov_free_memmd(lsmp); - return rc; + lov_free_memmd(&lsm); + return ERR_PTR(rc); } - return lsm_size; + return lsm; } /* Retrieve object striping information. @@ -378,15 +284,14 @@ int lov_unpackmd(struct obd_export *exp, struct lov_stripe_md **lsmp, int lov_getstripe(struct lov_object *obj, struct lov_stripe_md *lsm, struct lov_user_md __user *lump) { - /* - * XXX huge struct allocated on stack. - */ /* we use lov_user_md_v3 because it is larger than lov_user_md_v1 */ - struct lov_obd *lov; struct lov_user_md_v3 lum; - struct lov_mds_md *lmmk = NULL; - int rc, lmmk_size, lmm_size; - int lum_size; + struct lov_mds_md *lmmk; + u32 stripe_count; + ssize_t lmm_size; + size_t lmmk_size; + size_t lum_size; + int rc; mm_segment_t seg; if (!lsm) @@ -399,6 +304,18 @@ int lov_getstripe(struct lov_object *obj, struct lov_stripe_md *lsm, seg = get_fs(); set_fs(KERNEL_DS); + if (lsm->lsm_magic != LOV_MAGIC_V1 && lsm->lsm_magic != LOV_MAGIC_V3) { + CERROR("bad LSM MAGIC: 0x%08X != 0x%08X nor 0x%08X\n", + lsm->lsm_magic, LOV_MAGIC_V1, LOV_MAGIC_V3); + rc = -EIO; + goto out; + } + + if (!lsm_is_released(lsm)) + stripe_count = lsm->lsm_stripe_count; + else + stripe_count = 0; + /* we only need the header part from user space to get lmm_magic and * lmm_stripe_count, (the header part is common to v1 and v3) */ @@ -417,32 +334,40 @@ int lov_getstripe(struct lov_object *obj, struct lov_stripe_md *lsm, if (lum.lmm_stripe_count && (lum.lmm_stripe_count < lsm->lsm_stripe_count)) { /* Return right size of stripe to user */ - lum.lmm_stripe_count = lsm->lsm_stripe_count; + lum.lmm_stripe_count = stripe_count; rc = copy_to_user(lump, &lum, lum_size); rc = -EOVERFLOW; goto out; } - lov = lu2lov_dev(obj->lo_cl.co_lu.lo_dev)->ld_lov; - rc = lov_obd_packmd(lov, &lmmk, lsm); - if (rc < 0) + lmmk_size = lov_mds_md_size(stripe_count, lsm->lsm_magic); + + + lmmk = libcfs_kvzalloc(lmmk_size, GFP_NOFS); + if (!lmmk) { + rc = -ENOMEM; goto out; - lmmk_size = rc; - lmm_size = rc; - rc = 0; + } + + lmm_size = lov_lsm_pack(lsm, lmmk, lmmk_size); + if (lmm_size < 0) { + rc = lmm_size; + goto out_free; + } /* FIXME: Bug 1185 - copy fields properly when structs change */ /* struct lov_user_md_v3 and struct lov_mds_md_v3 must be the same */ CLASSERT(sizeof(lum) == sizeof(struct lov_mds_md_v3)); CLASSERT(sizeof(lum.lmm_objects[0]) == sizeof(lmmk->lmm_objects[0])); - if ((cpu_to_le32(LOV_MAGIC) != LOV_MAGIC) && - ((lmmk->lmm_magic == cpu_to_le32(LOV_MAGIC_V1)) || - (lmmk->lmm_magic == cpu_to_le32(LOV_MAGIC_V3)))) { + if (cpu_to_le32(LOV_MAGIC) != LOV_MAGIC && + (lmmk->lmm_magic == cpu_to_le32(LOV_MAGIC_V1) || + lmmk->lmm_magic == cpu_to_le32(LOV_MAGIC_V3))) { lustre_swab_lov_mds_md(lmmk); lustre_swab_lov_user_md_objects( (struct lov_user_ost_data *)lmmk->lmm_objects, lmmk->lmm_stripe_count); } + if (lum.lmm_magic == LOV_USER_MAGIC) { /* User request for v1, we need skip lmm_pool_name */ if (lmmk->lmm_magic == LOV_MAGIC_V3) { @@ -474,9 +399,11 @@ int lov_getstripe(struct lov_object *obj, struct lov_stripe_md *lsm, ((struct lov_user_md *)lmmk)->lmm_stripe_count = lum.lmm_stripe_count; if (copy_to_user(lump, lmmk, lmm_size)) rc = -EFAULT; + else + rc = 0; out_free: - kfree(lmmk); + kvfree(lmmk); out: set_fs(seg); return rc; diff --git a/drivers/staging/lustre/lustre/lov/lov_page.c b/drivers/staging/lustre/lustre/lov/lov_page.c index 00bfabad78eb..62ceb6dfdfdf 100644 --- a/drivers/staging/lustre/lustre/lov/lov_page.c +++ b/drivers/staging/lustre/lustre/lov/lov_page.c @@ -49,51 +49,6 @@ * */ -/** - * Adjust the stripe index by layout of raid0. @max_index is the maximum - * page index covered by an underlying DLM lock. - * This function converts max_index from stripe level to file level, and make - * sure it's not beyond one stripe. - */ -static int lov_raid0_page_is_under_lock(const struct lu_env *env, - const struct cl_page_slice *slice, - struct cl_io *unused, - pgoff_t *max_index) -{ - struct lov_object *loo = cl2lov(slice->cpl_obj); - struct lov_layout_raid0 *r0 = lov_r0(loo); - pgoff_t index = *max_index; - unsigned int pps; /* pages per stripe */ - - CDEBUG(D_READA, DFID "*max_index = %lu, nr = %d\n", - PFID(lu_object_fid(lov2lu(loo))), index, r0->lo_nr); - - if (index == 0) /* the page is not covered by any lock */ - return 0; - - if (r0->lo_nr == 1) /* single stripe file */ - return 0; - - /* max_index is stripe level, convert it into file level */ - if (index != CL_PAGE_EOF) { - int stripeno = lov_page_stripe(slice->cpl_page); - *max_index = lov_stripe_pgoff(loo->lo_lsm, index, stripeno); - } - - /* calculate the end of current stripe */ - pps = loo->lo_lsm->lsm_stripe_size >> PAGE_SHIFT; - index = slice->cpl_index + pps - slice->cpl_index % pps - 1; - - CDEBUG(D_READA, DFID "*max_index = %lu, index = %lu, pps = %u, stripe_size = %u, stripe no = %u, page index = %lu\n", - PFID(lu_object_fid(lov2lu(loo))), *max_index, index, pps, - loo->lo_lsm->lsm_stripe_size, lov_page_stripe(slice->cpl_page), - slice->cpl_index); - - /* never exceed the end of the stripe */ - *max_index = min_t(pgoff_t, *max_index, index); - return 0; -} - static int lov_raid0_page_print(const struct lu_env *env, const struct cl_page_slice *slice, void *cookie, lu_printer_t printer) @@ -104,7 +59,6 @@ static int lov_raid0_page_print(const struct lu_env *env, } static const struct cl_page_operations lov_raid0_page_ops = { - .cpo_is_under_lock = lov_raid0_page_is_under_lock, .cpo_print = lov_raid0_page_print }; diff --git a/drivers/staging/lustre/lustre/lov/lov_pool.c b/drivers/staging/lustre/lustre/lov/lov_pool.c index f8c8a361ef79..7daa8671fdc3 100644 --- a/drivers/staging/lustre/lustre/lov/lov_pool.c +++ b/drivers/staging/lustre/lustre/lov/lov_pool.c @@ -81,7 +81,8 @@ static void lov_pool_putref_locked(struct pool_desc *pool) * Chapter 6.4. * Addison Wesley, 1973 */ -static __u32 pool_hashfn(struct cfs_hash *hash_body, const void *key, unsigned mask) +static __u32 pool_hashfn(struct cfs_hash *hash_body, const void *key, + unsigned int mask) { int i; __u32 result; diff --git a/drivers/staging/lustre/lustre/lov/lov_request.c b/drivers/staging/lustre/lustre/lov/lov_request.c index 09dcaf484c89..d43cc88ae641 100644 --- a/drivers/staging/lustre/lustre/lov/lov_request.c +++ b/drivers/staging/lustre/lustre/lov/lov_request.c @@ -44,7 +44,6 @@ static void lov_init_set(struct lov_request_set *set) atomic_set(&set->set_completes, 0); atomic_set(&set->set_success, 0); atomic_set(&set->set_finish_checked, 0); - set->set_cookies = NULL; INIT_LIST_HEAD(&set->set_list); atomic_set(&set->set_refcount, 1); init_waitqueue_head(&set->set_waitq); @@ -61,8 +60,6 @@ void lov_finish_set(struct lov_request_set *set) rq_link); list_del_init(&req->rq_link); - if (req->rq_oi.oi_oa) - kmem_cache_free(obdo_cachep, req->rq_oi.oi_oa); kfree(req->rq_oi.oi_osfs); kfree(req); } @@ -97,22 +94,6 @@ static void lov_update_set(struct lov_request_set *set, wake_up(&set->set_waitq); } -int lov_update_common_set(struct lov_request_set *set, - struct lov_request *req, int rc) -{ - struct lov_obd *lov = &set->set_exp->exp_obd->u.lov; - - lov_update_set(set, req, rc); - - /* grace error on inactive ost */ - if (rc && !(lov->lov_tgts[req->rq_idx] && - lov->lov_tgts[req->rq_idx]->ltd_active)) - rc = 0; - - /* FIXME in raid1 regime, should return 0 */ - return rc; -} - static void lov_set_add_req(struct lov_request *req, struct lov_request_set *set) { @@ -183,279 +164,6 @@ out: return rc; } -static int common_attr_done(struct lov_request_set *set) -{ - struct lov_request *req; - struct obdo *tmp_oa; - int rc = 0, attrset = 0; - - if (!set->set_oi->oi_oa) - return 0; - - if (!atomic_read(&set->set_success)) - return -EIO; - - tmp_oa = kmem_cache_zalloc(obdo_cachep, GFP_NOFS); - if (!tmp_oa) { - rc = -ENOMEM; - goto out; - } - - list_for_each_entry(req, &set->set_list, rq_link) { - if (!req->rq_complete || req->rq_rc) - continue; - if (req->rq_oi.oi_oa->o_valid == 0) /* inactive stripe */ - continue; - lov_merge_attrs(tmp_oa, req->rq_oi.oi_oa, - req->rq_oi.oi_oa->o_valid, - set->set_oi->oi_md, req->rq_stripe, &attrset); - } - if (!attrset) { - CERROR("No stripes had valid attrs\n"); - rc = -EIO; - } - if ((set->set_oi->oi_oa->o_valid & OBD_MD_FLEPOCH) && - (set->set_oi->oi_md->lsm_stripe_count != attrset)) { - /* When we take attributes of some epoch, we require all the - * ost to be active. - */ - CERROR("Not all the stripes had valid attrs\n"); - rc = -EIO; - goto out; - } - - tmp_oa->o_oi = set->set_oi->oi_oa->o_oi; - memcpy(set->set_oi->oi_oa, tmp_oa, sizeof(*set->set_oi->oi_oa)); -out: - if (tmp_oa) - kmem_cache_free(obdo_cachep, tmp_oa); - return rc; -} - -int lov_fini_getattr_set(struct lov_request_set *set) -{ - int rc = 0; - - if (!set) - return 0; - LASSERT(set->set_exp); - if (atomic_read(&set->set_completes)) - rc = common_attr_done(set); - - lov_put_reqset(set); - - return rc; -} - -/* The callback for osc_getattr_async that finalizes a request info when a - * response is received. - */ -static int cb_getattr_update(void *cookie, int rc) -{ - struct obd_info *oinfo = cookie; - struct lov_request *lovreq; - - lovreq = container_of(oinfo, struct lov_request, rq_oi); - return lov_update_common_set(lovreq->rq_rqset, lovreq, rc); -} - -int lov_prep_getattr_set(struct obd_export *exp, struct obd_info *oinfo, - struct lov_request_set **reqset) -{ - struct lov_request_set *set; - struct lov_obd *lov = &exp->exp_obd->u.lov; - int rc = 0, i; - - set = kzalloc(sizeof(*set), GFP_NOFS); - if (!set) - return -ENOMEM; - lov_init_set(set); - - set->set_exp = exp; - set->set_oi = oinfo; - - for (i = 0; i < oinfo->oi_md->lsm_stripe_count; i++) { - struct lov_oinfo *loi; - struct lov_request *req; - - loi = oinfo->oi_md->lsm_oinfo[i]; - if (lov_oinfo_is_dummy(loi)) - continue; - - if (!lov_check_and_wait_active(lov, loi->loi_ost_idx)) { - CDEBUG(D_HA, "lov idx %d inactive\n", loi->loi_ost_idx); - if (oinfo->oi_oa->o_valid & OBD_MD_FLEPOCH) { - /* SOM requires all the OSTs to be active. */ - rc = -EIO; - goto out_set; - } - continue; - } - - req = kzalloc(sizeof(*req), GFP_NOFS); - if (!req) { - rc = -ENOMEM; - goto out_set; - } - - req->rq_stripe = i; - req->rq_idx = loi->loi_ost_idx; - - req->rq_oi.oi_oa = kmem_cache_zalloc(obdo_cachep, GFP_NOFS); - if (!req->rq_oi.oi_oa) { - kfree(req); - rc = -ENOMEM; - goto out_set; - } - memcpy(req->rq_oi.oi_oa, oinfo->oi_oa, - sizeof(*req->rq_oi.oi_oa)); - req->rq_oi.oi_oa->o_oi = loi->loi_oi; - req->rq_oi.oi_cb_up = cb_getattr_update; - - lov_set_add_req(req, set); - } - if (!set->set_count) { - rc = -EIO; - goto out_set; - } - *reqset = set; - return rc; -out_set: - lov_fini_getattr_set(set); - return rc; -} - -int lov_fini_setattr_set(struct lov_request_set *set) -{ - int rc = 0; - - if (!set) - return 0; - LASSERT(set->set_exp); - if (atomic_read(&set->set_completes)) { - rc = common_attr_done(set); - /* FIXME update qos data here */ - } - - lov_put_reqset(set); - return rc; -} - -int lov_update_setattr_set(struct lov_request_set *set, - struct lov_request *req, int rc) -{ - struct lov_obd *lov = &req->rq_rqset->set_exp->exp_obd->u.lov; - struct lov_stripe_md *lsm = req->rq_rqset->set_oi->oi_md; - - lov_update_set(set, req, rc); - - /* grace error on inactive ost */ - if (rc && !(lov->lov_tgts[req->rq_idx] && - lov->lov_tgts[req->rq_idx]->ltd_active)) - rc = 0; - - if (rc == 0) { - if (req->rq_oi.oi_oa->o_valid & OBD_MD_FLCTIME) - lsm->lsm_oinfo[req->rq_stripe]->loi_lvb.lvb_ctime = - req->rq_oi.oi_oa->o_ctime; - if (req->rq_oi.oi_oa->o_valid & OBD_MD_FLMTIME) - lsm->lsm_oinfo[req->rq_stripe]->loi_lvb.lvb_mtime = - req->rq_oi.oi_oa->o_mtime; - if (req->rq_oi.oi_oa->o_valid & OBD_MD_FLATIME) - lsm->lsm_oinfo[req->rq_stripe]->loi_lvb.lvb_atime = - req->rq_oi.oi_oa->o_atime; - } - - return rc; -} - -/* The callback for osc_setattr_async that finalizes a request info when a - * response is received. - */ -static int cb_setattr_update(void *cookie, int rc) -{ - struct obd_info *oinfo = cookie; - struct lov_request *lovreq; - - lovreq = container_of(oinfo, struct lov_request, rq_oi); - return lov_update_setattr_set(lovreq->rq_rqset, lovreq, rc); -} - -int lov_prep_setattr_set(struct obd_export *exp, struct obd_info *oinfo, - struct obd_trans_info *oti, - struct lov_request_set **reqset) -{ - struct lov_request_set *set; - struct lov_obd *lov = &exp->exp_obd->u.lov; - int rc = 0, i; - - set = kzalloc(sizeof(*set), GFP_NOFS); - if (!set) - return -ENOMEM; - lov_init_set(set); - - set->set_exp = exp; - set->set_oi = oinfo; - if (oti && oinfo->oi_oa->o_valid & OBD_MD_FLCOOKIE) - set->set_cookies = oti->oti_logcookies; - - for (i = 0; i < oinfo->oi_md->lsm_stripe_count; i++) { - struct lov_oinfo *loi = oinfo->oi_md->lsm_oinfo[i]; - struct lov_request *req; - - if (lov_oinfo_is_dummy(loi)) - continue; - - if (!lov_check_and_wait_active(lov, loi->loi_ost_idx)) { - CDEBUG(D_HA, "lov idx %d inactive\n", loi->loi_ost_idx); - continue; - } - - req = kzalloc(sizeof(*req), GFP_NOFS); - if (!req) { - rc = -ENOMEM; - goto out_set; - } - req->rq_stripe = i; - req->rq_idx = loi->loi_ost_idx; - - req->rq_oi.oi_oa = kmem_cache_zalloc(obdo_cachep, GFP_NOFS); - if (!req->rq_oi.oi_oa) { - kfree(req); - rc = -ENOMEM; - goto out_set; - } - memcpy(req->rq_oi.oi_oa, oinfo->oi_oa, - sizeof(*req->rq_oi.oi_oa)); - req->rq_oi.oi_oa->o_oi = loi->loi_oi; - req->rq_oi.oi_oa->o_stripe_idx = i; - req->rq_oi.oi_cb_up = cb_setattr_update; - - if (oinfo->oi_oa->o_valid & OBD_MD_FLSIZE) { - int off = lov_stripe_offset(oinfo->oi_md, - oinfo->oi_oa->o_size, i, - &req->rq_oi.oi_oa->o_size); - - if (off < 0 && req->rq_oi.oi_oa->o_size) - req->rq_oi.oi_oa->o_size--; - - CDEBUG(D_INODE, "stripe %d has size %llu/%llu\n", - i, req->rq_oi.oi_oa->o_size, - oinfo->oi_oa->o_size); - } - lov_set_add_req(req, set); - } - if (!set->set_count) { - rc = -EIO; - goto out_set; - } - *reqset = set; - return rc; -out_set: - lov_fini_setattr_set(set); - return rc; -} - #define LOV_U64_MAX ((__u64)~0ULL) #define LOV_SUM_MAX(tot, add) \ do { \ diff --git a/drivers/staging/lustre/lustre/lov/lovsub_dev.c b/drivers/staging/lustre/lustre/lov/lovsub_dev.c index b519a1940e1e..5d6536f8a4f7 100644 --- a/drivers/staging/lustre/lustre/lov/lovsub_dev.c +++ b/drivers/staging/lustre/lustre/lov/lovsub_dev.c @@ -44,46 +44,6 @@ /***************************************************************************** * - * Lovsub transfer operations. - * - */ - -static void lovsub_req_completion(const struct lu_env *env, - const struct cl_req_slice *slice, int ioret) -{ - struct lovsub_req *lsr; - - lsr = cl2lovsub_req(slice); - kmem_cache_free(lovsub_req_kmem, lsr); -} - -/** - * Implementation of struct cl_req_operations::cro_attr_set() for lovsub - * layer. Lov and lovsub are responsible only for struct obdo::o_stripe_idx - * field, which is filled there. - */ -static void lovsub_req_attr_set(const struct lu_env *env, - const struct cl_req_slice *slice, - const struct cl_object *obj, - struct cl_req_attr *attr, u64 flags) -{ - struct lovsub_object *subobj; - - subobj = cl2lovsub(obj); - /* - * There is no OBD_MD_* flag for obdo::o_stripe_idx, so set it - * unconditionally. It never changes anyway. - */ - attr->cra_oa->o_stripe_idx = subobj->lso_index; -} - -static const struct cl_req_operations lovsub_req_ops = { - .cro_attr_set = lovsub_req_attr_set, - .cro_completion = lovsub_req_completion -}; - -/***************************************************************************** - * * Lov-sub device and device type functions. * */ @@ -137,32 +97,12 @@ static struct lu_device *lovsub_device_free(const struct lu_env *env, return next; } -static int lovsub_req_init(const struct lu_env *env, struct cl_device *dev, - struct cl_req *req) -{ - struct lovsub_req *lsr; - int result; - - lsr = kmem_cache_zalloc(lovsub_req_kmem, GFP_NOFS); - if (lsr) { - cl_req_slice_add(req, &lsr->lsrq_cl, dev, &lovsub_req_ops); - result = 0; - } else { - result = -ENOMEM; - } - return result; -} - static const struct lu_device_operations lovsub_lu_ops = { .ldo_object_alloc = lovsub_object_alloc, .ldo_process_config = NULL, .ldo_recovery_complete = NULL }; -static const struct cl_device_operations lovsub_cl_ops = { - .cdo_req_init = lovsub_req_init -}; - static struct lu_device *lovsub_device_alloc(const struct lu_env *env, struct lu_device_type *t, struct lustre_cfg *cfg) @@ -178,7 +118,6 @@ static struct lu_device *lovsub_device_alloc(const struct lu_env *env, if (result == 0) { d = lovsub2lu_dev(lsd); d->ld_ops = &lovsub_lu_ops; - lsd->acid_cl.cd_ops = &lovsub_cl_ops; } else { d = ERR_PTR(result); } diff --git a/drivers/staging/lustre/lustre/lov/lovsub_object.c b/drivers/staging/lustre/lustre/lov/lovsub_object.c index a2bac7a3b71b..011296ee16e6 100644 --- a/drivers/staging/lustre/lustre/lov/lovsub_object.c +++ b/drivers/staging/lustre/lustre/lov/lovsub_object.c @@ -116,11 +116,31 @@ static int lovsub_object_glimpse(const struct lu_env *env, return cl_object_glimpse(env, &los->lso_super->lo_cl, lvb); } +/** + * Implementation of struct cl_object_operations::coo_req_attr_set() for lovsub + * layer. Lov and lovsub are responsible only for struct obdo::o_stripe_idx + * field, which is filled there. + */ +static void lovsub_req_attr_set(const struct lu_env *env, struct cl_object *obj, + struct cl_req_attr *attr) +{ + struct lovsub_object *subobj = cl2lovsub(obj); + + cl_req_attr_set(env, &subobj->lso_super->lo_cl, attr); + + /* + * There is no OBD_MD_* flag for obdo::o_stripe_idx, so set it + * unconditionally. It never changes anyway. + */ + attr->cra_oa->o_stripe_idx = subobj->lso_index; +} + static const struct cl_object_operations lovsub_ops = { .coo_page_init = lovsub_page_init, .coo_lock_init = lovsub_lock_init, .coo_attr_update = lovsub_attr_update, - .coo_glimpse = lovsub_object_glimpse + .coo_glimpse = lovsub_object_glimpse, + .coo_req_attr_set = lovsub_req_attr_set }; static const struct lu_object_operations lovsub_lu_obj_ops = { diff --git a/drivers/staging/lustre/lustre/mdc/lproc_mdc.c b/drivers/staging/lustre/lustre/mdc/lproc_mdc.c index fca9450de57c..9021c465c044 100644 --- a/drivers/staging/lustre/lustre/mdc/lproc_mdc.c +++ b/drivers/staging/lustre/lustre/mdc/lproc_mdc.c @@ -36,6 +36,42 @@ #include "../include/lprocfs_status.h" #include "mdc_internal.h" +static ssize_t active_show(struct kobject *kobj, struct attribute *attr, + char *buf) +{ + struct obd_device *dev = container_of(kobj, struct obd_device, + obd_kobj); + + return sprintf(buf, "%u\n", !dev->u.cli.cl_import->imp_deactive); +} + +static ssize_t active_store(struct kobject *kobj, struct attribute *attr, + const char *buffer, size_t count) +{ + struct obd_device *dev = container_of(kobj, struct obd_device, + obd_kobj); + unsigned long val; + int rc; + + rc = kstrtoul(buffer, 10, &val); + if (rc) + return rc; + + if (val < 0 || val > 1) + return -ERANGE; + + /* opposite senses */ + if (dev->u.cli.cl_import->imp_deactive == val) { + rc = ptlrpc_set_import_active(dev->u.cli.cl_import, val); + if (rc) + count = rc; + } else { + CDEBUG(D_CONFIG, "activate %lu: ignoring repeat request\n", val); + } + return count; +} +LUSTRE_RW_ATTR(active); + static ssize_t max_rpcs_in_flight_show(struct kobject *kobj, struct attribute *attr, char *buf) @@ -73,6 +109,64 @@ static ssize_t max_rpcs_in_flight_store(struct kobject *kobj, } LUSTRE_RW_ATTR(max_rpcs_in_flight); +static ssize_t max_mod_rpcs_in_flight_show(struct kobject *kobj, + struct attribute *attr, + char *buf) +{ + struct obd_device *dev = container_of(kobj, struct obd_device, + obd_kobj); + u16 max; + int len; + + max = dev->u.cli.cl_max_mod_rpcs_in_flight; + len = sprintf(buf, "%hu\n", max); + + return len; +} + +static ssize_t max_mod_rpcs_in_flight_store(struct kobject *kobj, + struct attribute *attr, + const char *buffer, + size_t count) +{ + struct obd_device *dev = container_of(kobj, struct obd_device, + obd_kobj); + u16 val; + int rc; + + rc = kstrtou16(buffer, 10, &val); + if (rc) + return rc; + + rc = obd_set_max_mod_rpcs_in_flight(&dev->u.cli, val); + if (rc) + count = rc; + + return count; +} +LUSTRE_RW_ATTR(max_mod_rpcs_in_flight); + +static int mdc_rpc_stats_seq_show(struct seq_file *seq, void *v) +{ + struct obd_device *dev = seq->private; + + return obd_mod_rpc_stats_seq_show(&dev->u.cli, seq); +} + +static ssize_t mdc_rpc_stats_seq_write(struct file *file, + const char __user *buf, + size_t len, loff_t *off) +{ + struct seq_file *seq = file->private_data; + struct obd_device *dev = seq->private; + struct client_obd *cli = &dev->u.cli; + + lprocfs_oh_clear(&cli->cl_mod_rpcs_hist); + + return len; +} +LPROC_SEQ_FOPS(mdc_rpc_stats); + LPROC_SEQ_FOPS_WR_ONLY(mdc, ping); LPROC_SEQ_FOPS_RO_TYPE(mdc, connect_flags); @@ -112,11 +206,15 @@ static struct lprocfs_vars lprocfs_mdc_obd_vars[] = { { "import", &mdc_import_fops, NULL, 0 }, { "state", &mdc_state_fops, NULL, 0 }, { "pinger_recov", &mdc_pinger_recov_fops, NULL, 0 }, + { .name = "rpc_stats", + .fops = &mdc_rpc_stats_fops }, { NULL } }; static struct attribute *mdc_attrs[] = { + &lustre_attr_active.attr, &lustre_attr_max_rpcs_in_flight.attr, + &lustre_attr_max_mod_rpcs_in_flight.attr, &lustre_attr_max_pages_per_rpc.attr, NULL, }; diff --git a/drivers/staging/lustre/lustre/mdc/mdc_internal.h b/drivers/staging/lustre/lustre/mdc/mdc_internal.h index f446c1c2584b..881c6a0676a6 100644 --- a/drivers/staging/lustre/lustre/mdc/mdc_internal.h +++ b/drivers/staging/lustre/lustre/mdc/mdc_internal.h @@ -46,7 +46,7 @@ void mdc_readdir_pack(struct ptlrpc_request *req, __u64 pgoff, size_t size, void mdc_getattr_pack(struct ptlrpc_request *req, __u64 valid, u32 flags, struct md_op_data *data, size_t ea_size); void mdc_setattr_pack(struct ptlrpc_request *req, struct md_op_data *op_data, - void *ea, size_t ealen, void *ea2, size_t ea2len); + void *ea, size_t ealen); void mdc_create_pack(struct ptlrpc_request *req, struct md_op_data *op_data, const void *data, size_t datalen, umode_t mode, uid_t uid, gid_t gid, cfs_cap_t capability, __u64 rdev); @@ -75,7 +75,7 @@ int mdc_intent_lock(struct obd_export *exp, __u64 extra_lock_flags); int mdc_enqueue(struct obd_export *exp, struct ldlm_enqueue_info *einfo, - const ldlm_policy_data_t *policy, + const union ldlm_policy_data *policy, struct lookup_intent *it, struct md_op_data *op_data, struct lustre_handle *lockh, __u64 extra_lock_flags); @@ -105,12 +105,11 @@ int mdc_rename(struct obd_export *exp, struct md_op_data *op_data, const char *new, size_t newlen, struct ptlrpc_request **request); int mdc_setattr(struct obd_export *exp, struct md_op_data *op_data, - void *ea, size_t ealen, void *ea2, size_t ea2len, - struct ptlrpc_request **request, struct md_open_data **mod); + void *ea, size_t ealen, struct ptlrpc_request **request); int mdc_unlink(struct obd_export *exp, struct md_op_data *op_data, struct ptlrpc_request **request); int mdc_cancel_unused(struct obd_export *exp, const struct lu_fid *fid, - ldlm_policy_data_t *policy, enum ldlm_mode mode, + union ldlm_policy_data *policy, enum ldlm_mode mode, enum ldlm_cancel_flags flags, void *opaque); int mdc_revalidate_lock(struct obd_export *exp, struct lookup_intent *it, @@ -122,7 +121,8 @@ int mdc_intent_getattr_async(struct obd_export *exp, enum ldlm_mode mdc_lock_match(struct obd_export *exp, __u64 flags, const struct lu_fid *fid, enum ldlm_type type, - ldlm_policy_data_t *policy, enum ldlm_mode mode, + union ldlm_policy_data *policy, + enum ldlm_mode mode, struct lustre_handle *lockh); static inline int mdc_prep_elc_req(struct obd_export *exp, diff --git a/drivers/staging/lustre/lustre/mdc/mdc_lib.c b/drivers/staging/lustre/lustre/mdc/mdc_lib.c index aac7e04873e2..f35e1f9afdef 100644 --- a/drivers/staging/lustre/lustre/mdc/mdc_lib.c +++ b/drivers/staging/lustre/lustre/mdc/mdc_lib.c @@ -139,7 +139,7 @@ void mdc_create_pack(struct ptlrpc_request *req, struct md_op_data *op_data, rec->cr_time = op_data->op_mod_time; rec->cr_suppgid1 = op_data->op_suppgids[0]; rec->cr_suppgid2 = op_data->op_suppgids[1]; - flags = op_data->op_flags & MF_SOM_LOCAL_FLAGS; + flags = 0; if (op_data->op_bias & MDS_CREATE_VOLATILE) flags |= MDS_OPEN_VOLATILE; set_mrc_cr_flags(rec, flags); @@ -301,16 +301,16 @@ static void mdc_setattr_pack_rec(struct mdt_rec_setattr *rec, static void mdc_ioepoch_pack(struct mdt_ioepoch *epoch, struct md_op_data *op_data) { - memcpy(&epoch->handle, &op_data->op_handle, sizeof(epoch->handle)); - epoch->ioepoch = op_data->op_ioepoch; - epoch->flags = op_data->op_flags & MF_SOM_LOCAL_FLAGS; + epoch->mio_handle = op_data->op_handle; + epoch->mio_unused1 = 0; + epoch->mio_unused2 = 0; + epoch->mio_padding = 0; } void mdc_setattr_pack(struct ptlrpc_request *req, struct md_op_data *op_data, - void *ea, size_t ealen, void *ea2, size_t ea2len) + void *ea, size_t ealen) { struct mdt_rec_setattr *rec; - struct mdt_ioepoch *epoch; struct lov_user_md *lum = NULL; CLASSERT(sizeof(struct mdt_rec_reint) == @@ -318,11 +318,6 @@ void mdc_setattr_pack(struct ptlrpc_request *req, struct md_op_data *op_data, rec = req_capsule_client_get(&req->rq_pill, &RMF_REC_REINT); mdc_setattr_pack_rec(rec, op_data); - if (op_data->op_flags & (MF_SOM_CHANGE | MF_EPOCH_OPEN)) { - epoch = req_capsule_client_get(&req->rq_pill, &RMF_MDT_EPOCH); - mdc_ioepoch_pack(epoch, op_data); - } - if (ealen == 0) return; @@ -335,12 +330,6 @@ void mdc_setattr_pack(struct ptlrpc_request *req, struct md_op_data *op_data, } else { memcpy(lum, ea, ealen); } - - if (ea2len == 0) - return; - - memcpy(req_capsule_client_get(&req->rq_pill, &RMF_LOGCOOKIES), ea2, - ea2len); } void mdc_unlink_pack(struct ptlrpc_request *req, struct md_op_data *op_data) @@ -387,6 +376,31 @@ void mdc_link_pack(struct ptlrpc_request *req, struct md_op_data *op_data) mdc_pack_name(req, &RMF_NAME, op_data->op_name, op_data->op_namelen); } +static void mdc_intent_close_pack(struct ptlrpc_request *req, + struct md_op_data *op_data) +{ + enum mds_op_bias bias = op_data->op_bias; + struct close_data *data; + struct ldlm_lock *lock; + + if (!(bias & (MDS_HSM_RELEASE | MDS_CLOSE_LAYOUT_SWAP | + MDS_RENAME_MIGRATE))) + return; + + data = req_capsule_client_get(&req->rq_pill, &RMF_CLOSE_DATA); + LASSERT(data); + + lock = ldlm_handle2lock(&op_data->op_lease_handle); + if (lock) { + data->cd_handle = lock->l_remote_handle; + LDLM_LOCK_PUT(lock); + } + ldlm_cli_cancel(&op_data->op_lease_handle, LCF_LOCAL); + + data->cd_data_version = op_data->op_data_version; + data->cd_fid = op_data->op_fid2; +} + void mdc_rename_pack(struct ptlrpc_request *req, struct md_op_data *op_data, const char *old, size_t oldlen, const char *new, size_t newlen) @@ -415,6 +429,15 @@ void mdc_rename_pack(struct ptlrpc_request *req, struct md_op_data *op_data, if (new) mdc_pack_name(req, &RMF_SYMTGT, new, newlen); + + if (op_data->op_cli_flags & CLI_MIGRATE && + op_data->op_bias & MDS_RENAME_MIGRATE) { + struct mdt_ioepoch *epoch; + + mdc_intent_close_pack(req, op_data); + epoch = req_capsule_client_get(&req->rq_pill, &RMF_MDT_EPOCH); + mdc_ioepoch_pack(epoch, op_data); + } } void mdc_getattr_pack(struct ptlrpc_request *req, __u64 valid, u32 flags, @@ -441,27 +464,6 @@ void mdc_getattr_pack(struct ptlrpc_request *req, __u64 valid, u32 flags, op_data->op_namelen); } -static void mdc_hsm_release_pack(struct ptlrpc_request *req, - struct md_op_data *op_data) -{ - if (op_data->op_bias & MDS_HSM_RELEASE) { - struct close_data *data; - struct ldlm_lock *lock; - - data = req_capsule_client_get(&req->rq_pill, &RMF_CLOSE_DATA); - - lock = ldlm_handle2lock(&op_data->op_lease_handle); - if (lock) { - data->cd_handle = lock->l_remote_handle; - LDLM_LOCK_PUT(lock); - } - ldlm_cli_cancel(&op_data->op_lease_handle, LCF_LOCAL); - - data->cd_data_version = op_data->op_data_version; - data->cd_fid = op_data->op_fid2; - } -} - void mdc_close_pack(struct ptlrpc_request *req, struct md_op_data *op_data) { struct mdt_ioepoch *epoch; @@ -484,5 +486,5 @@ void mdc_close_pack(struct ptlrpc_request *req, struct md_op_data *op_data) rec->sa_valid &= ~MDS_ATTR_ATIME; mdc_ioepoch_pack(epoch, op_data); - mdc_hsm_release_pack(req, op_data); + mdc_intent_close_pack(req, op_data); } diff --git a/drivers/staging/lustre/lustre/mdc/mdc_locks.c b/drivers/staging/lustre/lustre/mdc/mdc_locks.c index f1f6c082fa42..54ebb9952d66 100644 --- a/drivers/staging/lustre/lustre/mdc/mdc_locks.c +++ b/drivers/staging/lustre/lustre/mdc/mdc_locks.c @@ -38,10 +38,12 @@ #include "../include/obd.h" #include "../include/obd_class.h" #include "../include/lustre_dlm.h" -#include "../include/lustre_fid.h" /* fid_res_name_eq() */ +#include "../include/lustre_fid.h" #include "../include/lustre_mdc.h" #include "../include/lustre_net.h" #include "../include/lustre_req_layout.h" +#include "../include/lustre_swab.h" + #include "mdc_internal.h" struct mdc_getattr_args { @@ -131,7 +133,8 @@ int mdc_set_lock_data(struct obd_export *exp, const struct lustre_handle *lockh, enum ldlm_mode mdc_lock_match(struct obd_export *exp, __u64 flags, const struct lu_fid *fid, enum ldlm_type type, - ldlm_policy_data_t *policy, enum ldlm_mode mode, + union ldlm_policy_data *policy, + enum ldlm_mode mode, struct lustre_handle *lockh) { struct ldlm_res_id res_id; @@ -147,7 +150,7 @@ enum ldlm_mode mdc_lock_match(struct obd_export *exp, __u64 flags, int mdc_cancel_unused(struct obd_export *exp, const struct lu_fid *fid, - ldlm_policy_data_t *policy, + union ldlm_policy_data *policy, enum ldlm_mode mode, enum ldlm_cancel_flags flags, void *opaque) @@ -386,8 +389,6 @@ static struct ptlrpc_request *mdc_intent_unlink_pack(struct obd_export *exp, req_capsule_set_size(&req->rq_pill, &RMF_MDT_MD, RCL_SERVER, obddev->u.cli.cl_default_mds_easize); - req_capsule_set_size(&req->rq_pill, &RMF_ACL, RCL_SERVER, - obddev->u.cli.cl_default_mds_cookiesize); ptlrpc_request_set_replen(req); return req; } @@ -688,20 +689,20 @@ static int mdc_finish_enqueue(struct obd_export *exp, * we don't know in advance the file type. */ int mdc_enqueue(struct obd_export *exp, struct ldlm_enqueue_info *einfo, - const ldlm_policy_data_t *policy, + const union ldlm_policy_data *policy, struct lookup_intent *it, struct md_op_data *op_data, struct lustre_handle *lockh, u64 extra_lock_flags) { - static const ldlm_policy_data_t lookup_policy = { + static const union ldlm_policy_data lookup_policy = { .l_inodebits = { MDS_INODELOCK_LOOKUP } }; - static const ldlm_policy_data_t update_policy = { + static const union ldlm_policy_data update_policy = { .l_inodebits = { MDS_INODELOCK_UPDATE } }; - static const ldlm_policy_data_t layout_policy = { + static const union ldlm_policy_data layout_policy = { .l_inodebits = { MDS_INODELOCK_LAYOUT } }; - static const ldlm_policy_data_t getxattr_policy = { + static const union ldlm_policy_data getxattr_policy = { .l_inodebits = { MDS_INODELOCK_XATTR } }; struct obd_device *obddev = class_exp2obd(exp); @@ -762,27 +763,22 @@ resend: if (IS_ERR(req)) return PTR_ERR(req); - if (req && it && it->it_op & IT_CREAT) - /* ask ptlrpc not to resend on EINPROGRESS since we have our own - * retry logic - */ - req->rq_no_retry_einprogress = 1; - if (resends) { req->rq_generation_set = 1; req->rq_import_generation = generation; req->rq_sent = ktime_get_real_seconds() + resends; } - /* It is important to obtain rpc_lock first (if applicable), so that - * threads that are serialised with rpc_lock are not polluting our - * rpcs in flight counter. We do not do flock request limiting, though + /* It is important to obtain modify RPC slot first (if applicable), so + * that threads that are waiting for a modify RPC slot are not polluting + * our rpcs in flight counter. + * We do not do flock request limiting, though */ if (it) { - mdc_get_rpc_lock(obddev->u.cli.cl_rpc_lock, it); + mdc_get_mod_rpc_slot(req, it); rc = obd_get_request_slot(&obddev->u.cli); if (rc != 0) { - mdc_put_rpc_lock(obddev->u.cli.cl_rpc_lock, it); + mdc_put_mod_rpc_slot(req, it); mdc_clear_replay_flag(req, 0); ptlrpc_req_finished(req); return rc; @@ -809,7 +805,7 @@ resend: } obd_put_request_slot(&obddev->u.cli); - mdc_put_rpc_lock(obddev->u.cli.cl_rpc_lock, it); + mdc_put_mod_rpc_slot(req, it); if (rc < 0) { CDEBUG(D_INFO, "%s: ldlm_cli_enqueue failed: rc = %d\n", @@ -825,11 +821,12 @@ resend: lockrep->lock_policy_res2 = ptlrpc_status_ntoh(lockrep->lock_policy_res2); - /* Retry the create infinitely when we get -EINPROGRESS from - * server. This is required by the new quota design. + /* + * Retry infinitely when the server returns -EINPROGRESS for the + * intent operation, when server returns -EINPROGRESS for acquiring + * intent lock, we'll retry in after_reply(). */ - if (it->it_op & IT_CREAT && - (int)lockrep->lock_policy_res2 == -EINPROGRESS) { + if (it->it_op && (int)lockrep->lock_policy_res2 == -EINPROGRESS) { mdc_clear_replay_flag(req, rc); ptlrpc_req_finished(req); resends++; @@ -931,7 +928,7 @@ static int mdc_finish_intent_lock(struct obd_export *exp, */ lock = ldlm_handle2lock(lockh); if (lock) { - ldlm_policy_data_t policy = lock->l_policy_data; + union ldlm_policy_data policy = lock->l_policy_data; LDLM_DEBUG(lock, "matching against this"); @@ -967,7 +964,7 @@ int mdc_revalidate_lock(struct obd_export *exp, struct lookup_intent *it, */ struct ldlm_res_id res_id; struct lustre_handle lockh; - ldlm_policy_data_t policy; + union ldlm_policy_data policy; enum ldlm_mode mode; if (it->it_lock_handle) { @@ -1169,10 +1166,9 @@ int mdc_intent_getattr_async(struct obd_export *exp, * for statahead currently. Consider CMD in future, such two bits * maybe managed by different MDS, should be adjusted then. */ - ldlm_policy_data_t policy = { - .l_inodebits = { MDS_INODELOCK_LOOKUP | - MDS_INODELOCK_UPDATE } - }; + union ldlm_policy_data policy = { + .l_inodebits = { MDS_INODELOCK_LOOKUP | MDS_INODELOCK_UPDATE } + }; int rc = 0; __u64 flags = LDLM_FL_HAS_INTENT; diff --git a/drivers/staging/lustre/lustre/mdc/mdc_reint.c b/drivers/staging/lustre/lustre/mdc/mdc_reint.c index c921e471fa27..07b168490f09 100644 --- a/drivers/staging/lustre/lustre/mdc/mdc_reint.c +++ b/drivers/staging/lustre/lustre/mdc/mdc_reint.c @@ -40,17 +40,15 @@ #include "../include/lustre_fid.h" /* mdc_setattr does its own semaphore handling */ -static int mdc_reint(struct ptlrpc_request *request, - struct mdc_rpc_lock *rpc_lock, - int level) +static int mdc_reint(struct ptlrpc_request *request, int level) { int rc; request->rq_send_state = level; - mdc_get_rpc_lock(rpc_lock, NULL); + mdc_get_mod_rpc_slot(request, NULL); rc = ptlrpc_queue_wait(request); - mdc_put_rpc_lock(rpc_lock, NULL); + mdc_put_mod_rpc_slot(request, NULL); if (rc) CDEBUG(D_INFO, "error in handling %d\n", rc); else if (!req_capsule_server_get(&request->rq_pill, &RMF_MDT_BODY)) @@ -68,7 +66,7 @@ int mdc_resource_get_unused(struct obd_export *exp, const struct lu_fid *fid, __u64 bits) { struct ldlm_namespace *ns = exp->exp_obd->obd_namespace; - ldlm_policy_data_t policy = {}; + union ldlm_policy_data policy = {}; struct ldlm_res_id res_id; struct ldlm_resource *res; int count; @@ -99,13 +97,10 @@ int mdc_resource_get_unused(struct obd_export *exp, const struct lu_fid *fid, } int mdc_setattr(struct obd_export *exp, struct md_op_data *op_data, - void *ea, size_t ealen, void *ea2, size_t ea2len, - struct ptlrpc_request **request, struct md_open_data **mod) + void *ea, size_t ealen, struct ptlrpc_request **request) { LIST_HEAD(cancels); struct ptlrpc_request *req; - struct mdc_rpc_lock *rpc_lock; - struct obd_device *obd = exp->exp_obd; int count = 0, rc; __u64 bits; @@ -122,12 +117,9 @@ int mdc_setattr(struct obd_export *exp, struct md_op_data *op_data, ldlm_lock_list_put(&cancels, l_bl_ast, count); return -ENOMEM; } - if ((op_data->op_flags & (MF_SOM_CHANGE | MF_EPOCH_OPEN)) == 0) - req_capsule_set_size(&req->rq_pill, &RMF_MDT_EPOCH, RCL_CLIENT, - 0); + req_capsule_set_size(&req->rq_pill, &RMF_MDT_EPOCH, RCL_CLIENT, 0); req_capsule_set_size(&req->rq_pill, &RMF_EADATA, RCL_CLIENT, ealen); - req_capsule_set_size(&req->rq_pill, &RMF_LOGCOOKIES, RCL_CLIENT, - ea2len); + req_capsule_set_size(&req->rq_pill, &RMF_LOGCOOKIES, RCL_CLIENT, 0); rc = mdc_prep_elc_req(exp, req, MDS_REINT, &cancels, count); if (rc) { @@ -135,63 +127,21 @@ int mdc_setattr(struct obd_export *exp, struct md_op_data *op_data, return rc; } - rpc_lock = obd->u.cli.cl_rpc_lock; - if (op_data->op_attr.ia_valid & (ATTR_MTIME | ATTR_CTIME)) CDEBUG(D_INODE, "setting mtime %ld, ctime %ld\n", LTIME_S(op_data->op_attr.ia_mtime), LTIME_S(op_data->op_attr.ia_ctime)); - mdc_setattr_pack(req, op_data, ea, ealen, ea2, ea2len); + mdc_setattr_pack(req, op_data, ea, ealen); ptlrpc_request_set_replen(req); - if (mod && (op_data->op_flags & MF_EPOCH_OPEN) && - req->rq_import->imp_replayable) { - LASSERT(!*mod); - - *mod = obd_mod_alloc(); - if (!*mod) { - DEBUG_REQ(D_ERROR, req, "Can't allocate md_open_data"); - } else { - req->rq_replay = 1; - req->rq_cb_data = *mod; - (*mod)->mod_open_req = req; - req->rq_commit_cb = mdc_commit_open; - (*mod)->mod_is_create = true; - /** - * Take an extra reference on \var mod, it protects \var - * mod from being freed on eviction (commit callback is - * called despite rq_replay flag). - * Will be put on mdc_done_writing(). - */ - obd_mod_get(*mod); - } - } - - rc = mdc_reint(req, rpc_lock, LUSTRE_IMP_FULL); - /* Save the obtained info in the original RPC for the replay case. */ - if (rc == 0 && (op_data->op_flags & MF_EPOCH_OPEN)) { - struct mdt_ioepoch *epoch; - struct mdt_body *body; + rc = mdc_reint(req, LUSTRE_IMP_FULL); - epoch = req_capsule_client_get(&req->rq_pill, &RMF_MDT_EPOCH); - body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY); - epoch->handle = body->mbo_handle; - epoch->ioepoch = body->mbo_ioepoch; - req->rq_replay_cb = mdc_replay_open; - /** bug 3633, open may be committed and estale answer is not error */ - } else if (rc == -ESTALE && (op_data->op_flags & MF_SOM_CHANGE)) { - rc = 0; - } else if (rc == -ERESTARTSYS) { + if (rc == -ERESTARTSYS) rc = 0; - } + *request = req; - if (rc && req->rq_commit_cb) { - /* Put an extra reference on \var mod on error case. */ - if (mod && *mod) - obd_mod_put(*mod); - req->rq_commit_cb(req); - } + return rc; } @@ -264,7 +214,7 @@ rebuild: } level = LUSTRE_IMP_FULL; resend: - rc = mdc_reint(req, exp->exp_obd->u.cli.cl_rpc_lock, level); + rc = mdc_reint(req, level); /* Resend if we were told to. */ if (rc == -ERESTARTSYS) { @@ -332,13 +282,11 @@ int mdc_unlink(struct obd_export *exp, struct md_op_data *op_data, req_capsule_set_size(&req->rq_pill, &RMF_MDT_MD, RCL_SERVER, obd->u.cli.cl_default_mds_easize); - req_capsule_set_size(&req->rq_pill, &RMF_LOGCOOKIES, RCL_SERVER, - obd->u.cli.cl_default_mds_cookiesize); ptlrpc_request_set_replen(req); *request = req; - rc = mdc_reint(req, obd->u.cli.cl_rpc_lock, LUSTRE_IMP_FULL); + rc = mdc_reint(req, LUSTRE_IMP_FULL); if (rc == -ERESTARTSYS) rc = 0; return rc; @@ -348,7 +296,6 @@ int mdc_link(struct obd_export *exp, struct md_op_data *op_data, struct ptlrpc_request **request) { LIST_HEAD(cancels); - struct obd_device *obd = exp->exp_obd; struct ptlrpc_request *req; int count = 0, rc; @@ -380,7 +327,7 @@ int mdc_link(struct obd_export *exp, struct md_op_data *op_data, mdc_link_pack(req, op_data); ptlrpc_request_set_replen(req); - rc = mdc_reint(req, obd->u.cli.cl_rpc_lock, LUSTRE_IMP_FULL); + rc = mdc_reint(req, LUSTRE_IMP_FULL); *request = req; if (rc == -ERESTARTSYS) rc = 0; @@ -419,7 +366,8 @@ int mdc_rename(struct obd_export *exp, struct md_op_data *op_data, MDS_INODELOCK_FULL); req = ptlrpc_request_alloc(class_exp2cliimp(exp), - &RQF_MDS_REINT_RENAME); + op_data->op_cli_flags & CLI_MIGRATE ? + &RQF_MDS_REINT_MIGRATE : &RQF_MDS_REINT_RENAME); if (!req) { ldlm_lock_list_put(&cancels, l_bl_ast, count); return -ENOMEM; @@ -435,6 +383,23 @@ int mdc_rename(struct obd_export *exp, struct md_op_data *op_data, return rc; } + if (op_data->op_cli_flags & CLI_MIGRATE && op_data->op_data) { + struct md_open_data *mod = op_data->op_data; + + LASSERTF(mod->mod_open_req && + mod->mod_open_req->rq_type != LI_POISON, + "POISONED open %p!\n", mod->mod_open_req); + + DEBUG_REQ(D_HA, mod->mod_open_req, "matched open"); + /* + * We no longer want to preserve this open for replay even + * though the open was committed. b=3632, b=3633 + */ + spin_lock(&mod->mod_open_req->rq_lock); + mod->mod_open_req->rq_replay = 0; + spin_unlock(&mod->mod_open_req->rq_lock); + } + if (exp_connect_cancelset(exp) && req) ldlm_cli_cancel_list(&cancels, count, req, 0); @@ -442,11 +407,9 @@ int mdc_rename(struct obd_export *exp, struct md_op_data *op_data, req_capsule_set_size(&req->rq_pill, &RMF_MDT_MD, RCL_SERVER, obd->u.cli.cl_default_mds_easize); - req_capsule_set_size(&req->rq_pill, &RMF_LOGCOOKIES, RCL_SERVER, - obd->u.cli.cl_default_mds_cookiesize); ptlrpc_request_set_replen(req); - rc = mdc_reint(req, obd->u.cli.cl_rpc_lock, LUSTRE_IMP_FULL); + rc = mdc_reint(req, LUSTRE_IMP_FULL); *request = req; if (rc == -ERESTARTSYS) rc = 0; diff --git a/drivers/staging/lustre/lustre/mdc/mdc_request.c b/drivers/staging/lustre/lustre/mdc/mdc_request.c index f56ea643f9bf..2cfd913f9bc5 100644 --- a/drivers/staging/lustre/lustre/mdc/mdc_request.c +++ b/drivers/staging/lustre/lustre/mdc/mdc_request.c @@ -38,15 +38,18 @@ # include <linux/init.h> # include <linux/utsname.h> +#include "../include/cl_object.h" +#include "../include/llog_swab.h" +#include "../include/lprocfs_status.h" #include "../include/lustre_acl.h" +#include "../include/lustre_fid.h" #include "../include/lustre/lustre_ioctl.h" -#include "../include/obd_class.h" +#include "../include/lustre_kernelcomm.h" #include "../include/lustre_lmv.h" -#include "../include/lustre_fid.h" -#include "../include/lprocfs_status.h" -#include "../include/lustre_param.h" #include "../include/lustre_log.h" -#include "../include/lustre_kernelcomm.h" +#include "../include/lustre_param.h" +#include "../include/lustre_swab.h" +#include "../include/obd_class.h" #include "mdc_internal.h" @@ -327,12 +330,12 @@ static int mdc_xattr_common(struct obd_export *exp, /* make rpc */ if (opcode == MDS_REINT) - mdc_get_rpc_lock(exp->exp_obd->u.cli.cl_rpc_lock, NULL); + mdc_get_mod_rpc_slot(req, NULL); rc = ptlrpc_queue_wait(req); if (opcode == MDS_REINT) - mdc_put_rpc_lock(exp->exp_obd->u.cli.cl_rpc_lock, NULL); + mdc_put_mod_rpc_slot(req, NULL); if (rc) ptlrpc_req_finished(req); @@ -420,9 +423,6 @@ static int mdc_get_lustre_md(struct obd_export *exp, md->body = req_capsule_server_get(pill, &RMF_MDT_BODY); if (md->body->mbo_valid & OBD_MD_FLEASIZE) { - int lmmsize; - struct lov_mds_md *lmm; - if (!S_ISREG(md->body->mbo_mode)) { CDEBUG(D_INFO, "OBD_MD_FLEASIZE set, should be a regular file, but is not\n"); @@ -436,28 +436,18 @@ static int mdc_get_lustre_md(struct obd_export *exp, rc = -EPROTO; goto out; } - lmmsize = md->body->mbo_eadatasize; - lmm = req_capsule_server_sized_get(pill, &RMF_MDT_MD, lmmsize); - if (!lmm) { - rc = -EPROTO; - goto out; - } - - rc = obd_unpackmd(dt_exp, &md->lsm, lmm, lmmsize); - if (rc < 0) - goto out; - if (rc < (typeof(rc))sizeof(*md->lsm)) { - CDEBUG(D_INFO, - "lsm size too small: rc < sizeof (*md->lsm) (%d < %d)\n", - rc, (int)sizeof(*md->lsm)); + md->layout.lb_len = md->body->mbo_eadatasize; + md->layout.lb_buf = req_capsule_server_sized_get(pill, + &RMF_MDT_MD, + md->layout.lb_len); + if (!md->layout.lb_buf) { rc = -EPROTO; goto out; } - } else if (md->body->mbo_valid & OBD_MD_FLDIREA) { - int lmvsize; - struct lov_mds_md *lmv; + const union lmv_mds_md *lmv; + size_t lmv_size; if (!S_ISDIR(md->body->mbo_mode)) { CDEBUG(D_INFO, @@ -466,22 +456,21 @@ static int mdc_get_lustre_md(struct obd_export *exp, goto out; } - if (md->body->mbo_eadatasize == 0) { + lmv_size = md->body->mbo_eadatasize; + if (!lmv_size) { CDEBUG(D_INFO, "OBD_MD_FLDIREA is set, but eadatasize 0\n"); return -EPROTO; } if (md->body->mbo_valid & OBD_MD_MEA) { - lmvsize = md->body->mbo_eadatasize; lmv = req_capsule_server_sized_get(pill, &RMF_MDT_MD, - lmvsize); + lmv_size); if (!lmv) { rc = -EPROTO; goto out; } - rc = obd_unpackmd(md_exp, (void *)&md->lmv, lmv, - lmvsize); + rc = md_unpackmd(md_exp, &md->lmv, lmv, lmv_size); if (rc < 0) goto out; @@ -517,8 +506,6 @@ out: #ifdef CONFIG_FS_POSIX_ACL posix_acl_release(md->posix_acl); #endif - if (md->lsm) - obd_free_memmd(dt_exp, &md->lsm); } return rc; } @@ -528,10 +515,6 @@ static int mdc_free_lustre_md(struct obd_export *exp, struct lustre_md *md) return 0; } -/** - * Handles both OPEN and SETATTR RPCs for OPEN-CLOSE and SETATTR-DONE_WRITING - * RPC chains. - */ void mdc_replay_open(struct ptlrpc_request *req) { struct md_open_data *mod = req->rq_cb_data; @@ -565,15 +548,15 @@ void mdc_replay_open(struct ptlrpc_request *req) __u32 opc = lustre_msg_get_opc(close_req->rq_reqmsg); struct mdt_ioepoch *epoch; - LASSERT(opc == MDS_CLOSE || opc == MDS_DONE_WRITING); + LASSERT(opc == MDS_CLOSE); epoch = req_capsule_client_get(&close_req->rq_pill, &RMF_MDT_EPOCH); LASSERT(epoch); if (och) - LASSERT(!memcmp(&old, &epoch->handle, sizeof(old))); + LASSERT(!memcmp(&old, &epoch->mio_handle, sizeof(old))); DEBUG_REQ(D_HA, close_req, "updating close body with new fh"); - epoch->handle = body->mbo_handle; + epoch->mio_handle = body->mbo_handle; } } @@ -715,22 +698,6 @@ static int mdc_clear_open_replay_data(struct obd_export *exp, return 0; } -/* Prepares the request for the replay by the given reply */ -static void mdc_close_handle_reply(struct ptlrpc_request *req, - struct md_op_data *op_data, int rc) { - struct mdt_body *repbody; - struct mdt_ioepoch *epoch; - - if (req && rc == -EAGAIN) { - repbody = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY); - epoch = req_capsule_client_get(&req->rq_pill, &RMF_MDT_EPOCH); - - epoch->flags |= MF_SOM_AU; - if (repbody->mbo_valid & OBD_MD_FLGETATTRLOCK) - op_data->op_flags |= MF_GETATTR_LOCK; - } -} - static int mdc_close(struct obd_export *exp, struct md_op_data *op_data, struct md_open_data *mod, struct ptlrpc_request **request) { @@ -740,9 +707,8 @@ static int mdc_close(struct obd_export *exp, struct md_op_data *op_data, int rc; int saved_rc = 0; - req_fmt = &RQF_MDS_CLOSE; if (op_data->op_bias & MDS_HSM_RELEASE) { - req_fmt = &RQF_MDS_RELEASE_CLOSE; + req_fmt = &RQF_MDS_INTENT_CLOSE; /* allocate a FID for volatile file */ rc = mdc_fid_alloc(NULL, exp, &op_data->op_fid2, op_data); @@ -752,6 +718,10 @@ static int mdc_close(struct obd_export *exp, struct md_op_data *op_data, /* save the errcode and proceed to close */ saved_rc = rc; } + } else if (op_data->op_bias & MDS_CLOSE_LAYOUT_SWAP) { + req_fmt = &RQF_MDS_INTENT_CLOSE; + } else { + req_fmt = &RQF_MDS_CLOSE; } *request = NULL; @@ -807,14 +777,12 @@ static int mdc_close(struct obd_export *exp, struct md_op_data *op_data, req_capsule_set_size(&req->rq_pill, &RMF_MDT_MD, RCL_SERVER, obd->u.cli.cl_default_mds_easize); - req_capsule_set_size(&req->rq_pill, &RMF_LOGCOOKIES, RCL_SERVER, - obd->u.cli.cl_default_mds_cookiesize); ptlrpc_request_set_replen(req); - mdc_get_rpc_lock(obd->u.cli.cl_close_lock, NULL); + mdc_get_mod_rpc_slot(req, NULL); rc = ptlrpc_queue_wait(req); - mdc_put_rpc_lock(obd->u.cli.cl_close_lock, NULL); + mdc_put_mod_rpc_slot(req, NULL); if (!req->rq_repmsg) { CDEBUG(D_RPCTRACE, "request failed to send: %p, %d\n", req, @@ -857,79 +825,9 @@ out: obd_mod_put(mod); } *request = req; - mdc_close_handle_reply(req, op_data, rc); return rc < 0 ? rc : saved_rc; } -static int mdc_done_writing(struct obd_export *exp, struct md_op_data *op_data, - struct md_open_data *mod) -{ - struct obd_device *obd = class_exp2obd(exp); - struct ptlrpc_request *req; - int rc; - - req = ptlrpc_request_alloc(class_exp2cliimp(exp), - &RQF_MDS_DONE_WRITING); - if (!req) - return -ENOMEM; - - rc = ptlrpc_request_pack(req, LUSTRE_MDS_VERSION, MDS_DONE_WRITING); - if (rc) { - ptlrpc_request_free(req); - return rc; - } - - if (mod) { - LASSERTF(mod->mod_open_req && - mod->mod_open_req->rq_type != LI_POISON, - "POISONED setattr %p!\n", mod->mod_open_req); - - mod->mod_close_req = req; - DEBUG_REQ(D_HA, mod->mod_open_req, "matched setattr"); - /* We no longer want to preserve this setattr for replay even - * though the open was committed. b=3632, b=3633 - */ - spin_lock(&mod->mod_open_req->rq_lock); - mod->mod_open_req->rq_replay = 0; - spin_unlock(&mod->mod_open_req->rq_lock); - } - - mdc_close_pack(req, op_data); - ptlrpc_request_set_replen(req); - - mdc_get_rpc_lock(obd->u.cli.cl_close_lock, NULL); - rc = ptlrpc_queue_wait(req); - mdc_put_rpc_lock(obd->u.cli.cl_close_lock, NULL); - - if (rc == -ESTALE) { - /** - * it can be allowed error after 3633 if open or setattr were - * committed and server failed before close was sent. - * Let's check if mod exists and return no error in that case - */ - if (mod) { - if (mod->mod_open_req->rq_committed) - rc = 0; - } - } - - if (mod) { - if (rc != 0) - mod->mod_close_req = NULL; - LASSERT(mod->mod_open_req); - mdc_free_open(mod); - - /* Since now, mod is accessed through setattr req only, - * thus DW req does not keep a reference on mod anymore. - */ - obd_mod_put(mod); - } - - mdc_close_handle_reply(req, op_data, rc); - ptlrpc_req_finished(req); - return rc; -} - static int mdc_getpage(struct obd_export *exp, const struct lu_fid *fid, u64 offset, struct page **pages, int npages, struct ptlrpc_request **request) @@ -959,8 +857,10 @@ restart_bulk: req->rq_request_portal = MDS_READPAGE_PORTAL; ptlrpc_at_set_req_timeout(req); - desc = ptlrpc_prep_bulk_imp(req, npages, 1, BULK_PUT_SINK, - MDS_BULK_PORTAL); + desc = ptlrpc_prep_bulk_imp(req, npages, 1, + PTLRPC_BULK_PUT_SINK | PTLRPC_BULK_BUF_KIOV, + MDS_BULK_PORTAL, + &ptlrpc_bulk_kiov_pin_ops); if (!desc) { ptlrpc_request_free(req); return -ENOMEM; @@ -968,7 +868,7 @@ restart_bulk: /* NB req now owns desc and will free it when it gets freed */ for (i = 0; i < npages; i++) - ptlrpc_prep_bulk_page_pin(desc, pages[i], 0, PAGE_SIZE); + desc->bd_frag_ops->add_kiov_frag(desc, pages[i], 0, PAGE_SIZE); mdc_readdir_pack(req, offset, PAGE_SIZE * npages, fid); @@ -1546,7 +1446,7 @@ static int mdc_ioc_fid2path(struct obd_export *exp, struct getinfo_fid2path *gf) /* Val is struct getinfo_fid2path result plus path */ vallen = sizeof(*gf) + gf->gf_pathlen; - rc = obd_get_info(NULL, exp, keylen, key, &vallen, gf, NULL); + rc = obd_get_info(NULL, exp, keylen, key, &vallen, gf); if (rc != 0 && rc != -EREMOTE) goto out; @@ -1558,8 +1458,11 @@ static int mdc_ioc_fid2path(struct obd_export *exp, struct getinfo_fid2path *gf) goto out; } - CDEBUG(D_IOCTL, "path get "DFID" from %llu #%d\n%s\n", - PFID(&gf->gf_fid), gf->gf_recno, gf->gf_linkno, gf->gf_path); + CDEBUG(D_IOCTL, "path got " DFID " from %llu #%d: %s\n", + PFID(&gf->gf_fid), gf->gf_recno, gf->gf_linkno, + gf->gf_pathlen < 512 ? gf->gf_path : + /* only log the last 512 characters of the path */ + gf->gf_path + gf->gf_pathlen - 512); out: kfree(key); @@ -1595,7 +1498,9 @@ static int mdc_ioc_hsm_progress(struct obd_export *exp, ptlrpc_request_set_replen(req); - rc = mdc_queue_wait(req); + mdc_get_mod_rpc_slot(req, NULL); + rc = ptlrpc_queue_wait(req); + mdc_put_mod_rpc_slot(req, NULL); out: ptlrpc_req_finished(req); return rc; @@ -1773,7 +1678,9 @@ static int mdc_ioc_hsm_state_set(struct obd_export *exp, ptlrpc_request_set_replen(req); - rc = mdc_queue_wait(req); + mdc_get_mod_rpc_slot(req, NULL); + rc = ptlrpc_queue_wait(req); + mdc_put_mod_rpc_slot(req, NULL); out: ptlrpc_req_finished(req); return rc; @@ -1836,7 +1743,9 @@ static int mdc_ioc_hsm_request(struct obd_export *exp, ptlrpc_request_set_replen(req); - rc = mdc_queue_wait(req); + mdc_get_mod_rpc_slot(req, NULL); + rc = ptlrpc_queue_wait(req); + mdc_put_mod_rpc_slot(req, NULL); out: ptlrpc_req_finished(req); return rc; @@ -1957,10 +1866,8 @@ static int mdc_changelog_send_thread(void *csdata) /* Send EOF no matter what our result */ kuch = changelog_kuc_hdr(cs->cs_buf, sizeof(*kuch), cs->cs_flags); - if (kuch) { - kuch->kuc_msgtype = CL_EOF; - libcfs_kkuc_msg_put(cs->cs_fp, kuch); - } + kuch->kuc_msgtype = CL_EOF; + libcfs_kkuc_msg_put(cs->cs_fp, kuch); out: fput(cs->cs_fp); @@ -2015,52 +1922,6 @@ static int mdc_ioc_changelog_send(struct obd_device *obd, static int mdc_ioc_hsm_ct_start(struct obd_export *exp, struct lustre_kernelcomm *lk); -static int mdc_quotacheck(struct obd_device *unused, struct obd_export *exp, - struct obd_quotactl *oqctl) -{ - struct client_obd *cli = &exp->exp_obd->u.cli; - struct ptlrpc_request *req; - struct obd_quotactl *body; - int rc; - - req = ptlrpc_request_alloc_pack(class_exp2cliimp(exp), - &RQF_MDS_QUOTACHECK, LUSTRE_MDS_VERSION, - MDS_QUOTACHECK); - if (!req) - return -ENOMEM; - - body = req_capsule_client_get(&req->rq_pill, &RMF_OBD_QUOTACTL); - *body = *oqctl; - - ptlrpc_request_set_replen(req); - - /* the next poll will find -ENODATA, that means quotacheck is - * going on - */ - cli->cl_qchk_stat = -ENODATA; - rc = ptlrpc_queue_wait(req); - if (rc) - cli->cl_qchk_stat = rc; - ptlrpc_req_finished(req); - return rc; -} - -static int mdc_quota_poll_check(struct obd_export *exp, - struct if_quotacheck *qchk) -{ - struct client_obd *cli = &exp->exp_obd->u.cli; - int rc; - - qchk->obd_uuid = cli->cl_target_uuid; - memcpy(qchk->obd_type, LUSTRE_MDS_NAME, strlen(LUSTRE_MDS_NAME)); - - rc = cli->cl_qchk_stat; - /* the client is not the previous one */ - if (rc == CL_NOT_QUOTACHECKED) - rc = -EINTR; - return rc; -} - static int mdc_quotactl(struct obd_device *unused, struct obd_export *exp, struct obd_quotactl *oqctl) { @@ -2215,9 +2076,6 @@ static int mdc_iocontrol(unsigned int cmd, struct obd_export *exp, int len, case IOC_OSC_SET_ACTIVE: rc = ptlrpc_set_import_active(imp, data->ioc_offset); goto out; - case OBD_IOC_POLL_QUOTACHECK: - rc = mdc_quota_poll_check(exp, (struct if_quotacheck *)karg); - goto out; case OBD_IOC_PING_TARGET: rc = ptlrpc_obd_ping(obd); goto out; @@ -2528,8 +2386,7 @@ static int mdc_set_info_async(const struct lu_env *env, } static int mdc_get_info(const struct lu_env *env, struct obd_export *exp, - __u32 keylen, void *key, __u32 *vallen, void *val, - struct lov_stripe_md *lsm) + __u32 keylen, void *key, __u32 *vallen, void *val) { int rc = -EINVAL; @@ -2733,29 +2590,17 @@ static void mdc_llog_finish(struct obd_device *obd) static int mdc_setup(struct obd_device *obd, struct lustre_cfg *cfg) { - struct client_obd *cli = &obd->u.cli; struct lprocfs_static_vars lvars = { NULL }; int rc; - cli->cl_rpc_lock = kzalloc(sizeof(*cli->cl_rpc_lock), GFP_NOFS); - if (!cli->cl_rpc_lock) - return -ENOMEM; - mdc_init_rpc_lock(cli->cl_rpc_lock); - rc = ptlrpcd_addref(); if (rc < 0) - goto err_rpc_lock; - - cli->cl_close_lock = kzalloc(sizeof(*cli->cl_close_lock), GFP_NOFS); - if (!cli->cl_close_lock) { - rc = -ENOMEM; - goto err_ptlrpcd_decref; - } - mdc_init_rpc_lock(cli->cl_close_lock); + return rc; rc = client_obd_setup(obd, cfg); if (rc) - goto err_close_lock; + goto err_ptlrpcd_decref; + lprocfs_mdc_init_vars(&lvars); lprocfs_obd_setup(obd, lvars.obd_vars, lvars.sysfs_vars); sptlrpc_lprocfs_cliobd_attach(obd); @@ -2769,29 +2614,25 @@ static int mdc_setup(struct obd_device *obd, struct lustre_cfg *cfg) if (rc) { mdc_cleanup(obd); CERROR("failed to setup llogging subsystems\n"); + return rc; } return rc; -err_close_lock: - kfree(cli->cl_close_lock); err_ptlrpcd_decref: ptlrpcd_decref(); -err_rpc_lock: - kfree(cli->cl_rpc_lock); return rc; } -/* Initialize the default and maximum LOV EA and cookie sizes. This allows +/* Initialize the default and maximum LOV EA sizes. This allows * us to make MDS RPCs with large enough reply buffers to hold a default - * sized EA and cookie without having to calculate this (via a call into the + * sized EA without having to calculate this (via a call into the * LOV + OSCs) each time we make an RPC. The maximum size is also tracked * but not used to avoid wastefully vmalloc()'ing large reply buffers when * a large number of stripes is possible. If a larger reply buffer is * required it will be reallocated in the ptlrpc layer due to overflow. */ -static int mdc_init_ea_size(struct obd_export *exp, u32 easize, u32 def_easize, - u32 cookiesize, u32 def_cookiesize) +static int mdc_init_ea_size(struct obd_export *exp, u32 easize, u32 def_easize) { struct obd_device *obd = exp->exp_obd; struct client_obd *cli = &obd->u.cli; @@ -2802,42 +2643,24 @@ static int mdc_init_ea_size(struct obd_export *exp, u32 easize, u32 def_easize, if (cli->cl_default_mds_easize < def_easize) cli->cl_default_mds_easize = def_easize; - if (cli->cl_max_mds_cookiesize < cookiesize) - cli->cl_max_mds_cookiesize = cookiesize; - - if (cli->cl_default_mds_cookiesize < def_cookiesize) - cli->cl_default_mds_cookiesize = def_cookiesize; - return 0; } -static int mdc_precleanup(struct obd_device *obd, enum obd_cleanup_stage stage) +static int mdc_precleanup(struct obd_device *obd) { - switch (stage) { - case OBD_CLEANUP_EARLY: - break; - case OBD_CLEANUP_EXPORTS: - /* Failsafe, ok if racy */ - if (obd->obd_type->typ_refcnt <= 1) - libcfs_kkuc_group_rem(0, KUC_GRP_HSM); + /* Failsafe, ok if racy */ + if (obd->obd_type->typ_refcnt <= 1) + libcfs_kkuc_group_rem(0, KUC_GRP_HSM); - obd_cleanup_client_import(obd); - ptlrpc_lprocfs_unregister_obd(obd); - lprocfs_obd_cleanup(obd); - - mdc_llog_finish(obd); - break; - } + obd_cleanup_client_import(obd); + ptlrpc_lprocfs_unregister_obd(obd); + lprocfs_obd_cleanup(obd); + mdc_llog_finish(obd); return 0; } static int mdc_cleanup(struct obd_device *obd) { - struct client_obd *cli = &obd->u.cli; - - kfree(cli->cl_rpc_lock); - kfree(cli->cl_close_lock); - ptlrpcd_decref(); return client_obd_cleanup(obd); @@ -2881,7 +2704,6 @@ static struct obd_ops mdc_obd_ops = { .process_config = mdc_process_config, .get_uuid = mdc_get_uuid, .quotactl = mdc_quotactl, - .quotacheck = mdc_quotacheck }; static struct md_ops mdc_md_ops = { @@ -2889,7 +2711,6 @@ static struct md_ops mdc_md_ops = { .null_inode = mdc_null_inode, .close = mdc_close, .create = mdc_create, - .done_writing = mdc_done_writing, .enqueue = mdc_enqueue, .getattr = mdc_getattr, .getattr_name = mdc_getattr_name, diff --git a/drivers/staging/lustre/lustre/mgc/mgc_request.c b/drivers/staging/lustre/lustre/mgc/mgc_request.c index 23374cae5133..b9c522a3c7a4 100644 --- a/drivers/staging/lustre/lustre/mgc/mgc_request.c +++ b/drivers/staging/lustre/lustre/mgc/mgc_request.c @@ -38,11 +38,13 @@ #define D_MGC D_CONFIG /*|D_WARNING*/ #include <linux/module.h> -#include "../include/obd_class.h" -#include "../include/lustre_dlm.h" + #include "../include/lprocfs_status.h" -#include "../include/lustre_log.h" +#include "../include/lustre_dlm.h" #include "../include/lustre_disk.h" +#include "../include/lustre_log.h" +#include "../include/lustre_swab.h" +#include "../include/obd_class.h" #include "mgc_internal.h" @@ -373,7 +375,7 @@ out_err: return rc; } -DEFINE_MUTEX(llog_process_lock); +static DEFINE_MUTEX(llog_process_lock); /** Stop watching for updates on this log. */ @@ -684,35 +686,33 @@ static int mgc_llog_fini(const struct lu_env *env, struct obd_device *obd) } static atomic_t mgc_count = ATOMIC_INIT(0); -static int mgc_precleanup(struct obd_device *obd, enum obd_cleanup_stage stage) +static int mgc_precleanup(struct obd_device *obd) { int rc = 0; int temp; - switch (stage) { - case OBD_CLEANUP_EARLY: - break; - case OBD_CLEANUP_EXPORTS: - if (atomic_dec_and_test(&mgc_count)) { - LASSERT(rq_state & RQ_RUNNING); - /* stop requeue thread */ - temp = RQ_STOP; - } else { - /* wakeup requeue thread to clean our cld */ - temp = RQ_NOW | RQ_PRECLEANUP; - } - spin_lock(&config_list_lock); - rq_state |= temp; - spin_unlock(&config_list_lock); - wake_up(&rq_waitq); - if (temp & RQ_STOP) - wait_for_completion(&rq_exit); - obd_cleanup_client_import(obd); - rc = mgc_llog_fini(NULL, obd); - if (rc != 0) - CERROR("failed to cleanup llogging subsystems\n"); - break; + if (atomic_dec_and_test(&mgc_count)) { + LASSERT(rq_state & RQ_RUNNING); + /* stop requeue thread */ + temp = RQ_STOP; + } else { + /* wakeup requeue thread to clean our cld */ + temp = RQ_NOW | RQ_PRECLEANUP; } + + spin_lock(&config_list_lock); + rq_state |= temp; + spin_unlock(&config_list_lock); + wake_up(&rq_waitq); + + if (temp & RQ_STOP) + wait_for_completion(&rq_exit); + obd_cleanup_client_import(obd); + + rc = mgc_llog_fini(NULL, obd); + if (rc) + CERROR("failed to cleanup llogging subsystems\n"); + return rc; } @@ -887,8 +887,8 @@ static int mgc_set_mgs_param(struct obd_export *exp, } /* Take a config lock so we can get cancel notifications */ -static int mgc_enqueue(struct obd_export *exp, struct lov_stripe_md *lsm, - __u32 type, ldlm_policy_data_t *policy, __u32 mode, +static int mgc_enqueue(struct obd_export *exp, __u32 type, + union ldlm_policy_data *policy, __u32 mode, __u64 *flags, void *bl_cb, void *cp_cb, void *gl_cb, void *data, __u32 lvb_len, void *lvb_swabber, struct lustre_handle *lockh) @@ -1059,8 +1059,7 @@ static int mgc_set_info_async(const struct lu_env *env, struct obd_export *exp, } static int mgc_get_info(const struct lu_env *env, struct obd_export *exp, - __u32 keylen, void *key, __u32 *vallen, void *val, - struct lov_stripe_md *unused) + __u32 keylen, void *key, __u32 *vallen, void *val) { int rc = -EINVAL; @@ -1387,15 +1386,17 @@ again: body->mcb_units = nrpages; /* allocate bulk transfer descriptor */ - desc = ptlrpc_prep_bulk_imp(req, nrpages, 1, BULK_PUT_SINK, - MGS_BULK_PORTAL); + desc = ptlrpc_prep_bulk_imp(req, nrpages, 1, + PTLRPC_BULK_PUT_SINK | PTLRPC_BULK_BUF_KIOV, + MGS_BULK_PORTAL, + &ptlrpc_bulk_kiov_pin_ops); if (!desc) { rc = -ENOMEM; goto out; } for (i = 0; i < nrpages; i++) - ptlrpc_prep_bulk_page_pin(desc, pages[i], 0, PAGE_SIZE); + desc->bd_frag_ops->add_kiov_frag(desc, pages[i], 0, PAGE_SIZE); ptlrpc_request_set_replen(req); rc = ptlrpc_queue_wait(req); @@ -1553,14 +1554,52 @@ out_free: return rc; } -/** Get a config log from the MGS and process it. - * This func is called for both clients and servers. - * Copy the log locally before parsing it if appropriate (non-MGS server) +static bool mgc_import_in_recovery(struct obd_import *imp) +{ + bool in_recovery = true; + + spin_lock(&imp->imp_lock); + if (imp->imp_state == LUSTRE_IMP_FULL || + imp->imp_state == LUSTRE_IMP_CLOSED) + in_recovery = false; + spin_unlock(&imp->imp_lock); + + return in_recovery; +} + +/** + * Get a configuration log from the MGS and process it. + * + * This function is called for both clients and servers to process the + * configuration log from the MGS. The MGC enqueues a DLM lock on the + * log from the MGS, and if the lock gets revoked the MGC will be notified + * by the lock cancellation callback that the config log has changed, + * and will enqueue another MGS lock on it, and then continue processing + * the new additions to the end of the log. + * + * Since the MGC import is not replayable, if the import is being evicted + * (rcl == -ESHUTDOWN, \see ptlrpc_import_delay_req()), retry to process + * the log until recovery is finished or the import is closed. + * + * Make a local copy of the log before parsing it if appropriate (non-MGS + * server) so that the server can start even when the MGS is down. + * + * There shouldn't be multiple processes running process_log at once -- + * sounds like badness. It actually might be fine, as long as they're not + * trying to update from the same log simultaneously, in which case we + * should use a per-log semaphore instead of cld_lock. + * + * \param[in] mgc MGC device by which to fetch the configuration log + * \param[in] cld log processing state (stored in lock callback data) + * + * \retval 0 on success + * \retval negative errno on failure */ int mgc_process_log(struct obd_device *mgc, struct config_llog_data *cld) { struct lustre_handle lockh = { 0 }; __u64 flags = LDLM_FL_NO_LRU; + bool retry = false; int rc = 0, rcl; LASSERT(cld); @@ -1570,6 +1609,7 @@ int mgc_process_log(struct obd_device *mgc, struct config_llog_data *cld) * we're not trying to update from the same log * simultaneously (in which case we should use a per-log sem.) */ +restart: mutex_lock(&cld->cld_lock); if (cld->cld_stopping) { mutex_unlock(&cld->cld_lock); @@ -1582,7 +1622,7 @@ int mgc_process_log(struct obd_device *mgc, struct config_llog_data *cld) cld->cld_cfg.cfg_instance, cld->cld_cfg.cfg_last_idx + 1); /* Get the cfg lock on the llog */ - rcl = mgc_enqueue(mgc->u.cli.cl_mgc_mgsexp, NULL, LDLM_PLAIN, NULL, + rcl = mgc_enqueue(mgc->u.cli.cl_mgc_mgsexp, LDLM_PLAIN, NULL, LCK_CR, &flags, NULL, NULL, NULL, cld, 0, NULL, &lockh); if (rcl == 0) { @@ -1593,18 +1633,57 @@ int mgc_process_log(struct obd_device *mgc, struct config_llog_data *cld) } else { CDEBUG(D_MGC, "Can't get cfg lock: %d\n", rcl); - /* mark cld_lostlock so that it will requeue - * after MGC becomes available. - */ - cld->cld_lostlock = 1; + if (rcl == -ESHUTDOWN && + atomic_read(&mgc->u.cli.cl_mgc_refcount) > 0 && !retry) { + int secs = cfs_time_seconds(obd_timeout); + struct obd_import *imp; + struct l_wait_info lwi; + + mutex_unlock(&cld->cld_lock); + imp = class_exp2cliimp(mgc->u.cli.cl_mgc_mgsexp); + + /* + * Let's force the pinger, and wait the import to be + * connected, note: since mgc import is non-replayable, + * and even the import state is disconnected, it does + * not mean the "recovery" is stopped, so we will keep + * waitting until timeout or the import state is + * FULL or closed + */ + ptlrpc_pinger_force(imp); + + lwi = LWI_TIMEOUT(secs, NULL, NULL); + l_wait_event(imp->imp_recovery_waitq, + !mgc_import_in_recovery(imp), &lwi); + + if (imp->imp_state == LUSTRE_IMP_FULL) { + retry = true; + goto restart; + } else { + mutex_lock(&cld->cld_lock); + cld->cld_lostlock = 1; + } + } else { + /* mark cld_lostlock so that it will requeue + * after MGC becomes available. + */ + cld->cld_lostlock = 1; + } /* Get extra reference, it will be put in requeue thread */ config_log_get(cld); } if (cld_is_recover(cld)) { rc = 0; /* this is not a fatal error for recover log */ - if (rcl == 0) + if (!rcl) { rc = mgc_process_recover_log(mgc, cld); + if (rc) { + CERROR("%s: recover log %s failed: rc = %d not fatal.\n", + mgc->obd_name, cld->cld_logname, rc); + rc = 0; + cld->cld_lostlock = 1; + } + } } else { rc = mgc_process_cfg_log(mgc, cld, rcl != 0); } diff --git a/drivers/staging/lustre/lustre/obdclass/Makefile b/drivers/staging/lustre/lustre/obdclass/Makefile index b42e109b30e0..af570c0db15b 100644 --- a/drivers/staging/lustre/lustre/obdclass/Makefile +++ b/drivers/staging/lustre/lustre/obdclass/Makefile @@ -1,6 +1,6 @@ obj-$(CONFIG_LUSTRE_FS) += obdclass.o -obdclass-y := linux/linux-module.o linux/linux-obdo.o linux/linux-sysctl.o \ +obdclass-y := linux/linux-module.o linux/linux-sysctl.o \ llog.o llog_cat.o llog_obd.o llog_swab.o class_obd.o debug.o \ genops.o uuid.o lprocfs_status.o lprocfs_counters.o \ lustre_handles.o lustre_peer.o statfs_pack.o linkea.o \ diff --git a/drivers/staging/lustre/lustre/obdclass/cl_internal.h b/drivers/staging/lustre/lustre/obdclass/cl_internal.h index e866754a42d5..7b403fbd5f94 100644 --- a/drivers/staging/lustre/lustre/obdclass/cl_internal.h +++ b/drivers/staging/lustre/lustre/obdclass/cl_internal.h @@ -50,25 +50,6 @@ enum clt_nesting_level { }; /** - * Counters used to check correctness of cl_lock interface usage. - */ -struct cl_thread_counters { - /** - * Number of outstanding calls to cl_lock_mutex_get() made by the - * current thread. For debugging. - */ - int ctc_nr_locks_locked; - /** List of locked locks. */ - struct lu_ref ctc_locks_locked; - /** Number of outstanding holds on locks. */ - int ctc_nr_held; - /** Number of outstanding uses on locks. */ - int ctc_nr_used; - /** Number of held extent locks. */ - int ctc_nr_locks_acquired; -}; - -/** * Thread local state internal for generic cl-code. */ struct cl_thread_info { @@ -83,10 +64,6 @@ struct cl_thread_info { */ struct cl_lock_descr clt_descr; struct cl_page_list clt_list; - /** - * Counters for every level of lock nesting. - */ - struct cl_thread_counters clt_counters[CNL_NR]; /** @} debugging */ /* diff --git a/drivers/staging/lustre/lustre/obdclass/cl_io.c b/drivers/staging/lustre/lustre/obdclass/cl_io.c index bc4b7b6b9a20..3f42457b0d7d 100644 --- a/drivers/staging/lustre/lustre/obdclass/cl_io.c +++ b/drivers/staging/lustre/lustre/obdclass/cl_io.c @@ -126,6 +126,7 @@ void cl_io_fini(const struct lu_env *env, struct cl_io *io) switch (io->ci_type) { case CIT_READ: case CIT_WRITE: + case CIT_DATA_VERSION: break; case CIT_FAULT: break; @@ -411,7 +412,6 @@ void cl_io_unlock(const struct lu_env *env, struct cl_io *io) scan->cis_iop->op[io->ci_type].cio_unlock(env, scan); } io->ci_state = CIS_UNLOCKED; - LASSERT(!cl_env_info(env)->clt_counters[CNL_TOP].ctc_nr_locks_acquired); } EXPORT_SYMBOL(cl_io_unlock); @@ -586,67 +586,32 @@ void cl_io_end(const struct lu_env *env, struct cl_io *io) } EXPORT_SYMBOL(cl_io_end); -static const struct cl_page_slice * -cl_io_slice_page(const struct cl_io_slice *ios, struct cl_page *page) -{ - const struct cl_page_slice *slice; - - slice = cl_page_at(page, ios->cis_obj->co_lu.lo_dev->ld_type); - LINVRNT(slice); - return slice; -} - /** - * Called by read io, when page has to be read from the server. + * Called by read io, to decide the readahead extent * - * \see cl_io_operations::cio_read_page() + * \see cl_io_operations::cio_read_ahead() */ -int cl_io_read_page(const struct lu_env *env, struct cl_io *io, - struct cl_page *page) +int cl_io_read_ahead(const struct lu_env *env, struct cl_io *io, + pgoff_t start, struct cl_read_ahead *ra) { const struct cl_io_slice *scan; - struct cl_2queue *queue; int result = 0; LINVRNT(io->ci_type == CIT_READ || io->ci_type == CIT_FAULT); - LINVRNT(cl_page_is_owned(page, io)); LINVRNT(io->ci_state == CIS_IO_GOING || io->ci_state == CIS_LOCKED); LINVRNT(cl_io_invariant(io)); - queue = &io->ci_queue; - - cl_2queue_init(queue); - /* - * ->cio_read_page() methods called in the loop below are supposed to - * never block waiting for network (the only subtle point is the - * creation of new pages for read-ahead that might result in cache - * shrinking, but currently only clean pages are shrunk and this - * requires no network io). - * - * Should this ever starts blocking, retry loop would be needed for - * "parallel io" (see CLO_REPEAT loops in cl_lock.c). - */ cl_io_for_each(scan, io) { - if (scan->cis_iop->cio_read_page) { - const struct cl_page_slice *slice; + if (!scan->cis_iop->cio_read_ahead) + continue; - slice = cl_io_slice_page(scan, page); - LINVRNT(slice); - result = scan->cis_iop->cio_read_page(env, scan, slice); - if (result != 0) - break; - } + result = scan->cis_iop->cio_read_ahead(env, scan, start, ra); + if (result) + break; } - if (result == 0 && queue->c2_qin.pl_nr > 0) - result = cl_io_submit_rw(env, io, CRT_READ, queue); - /* - * Unlock unsent pages in case of error. - */ - cl_page_list_disown(env, io, &queue->c2_qin); - cl_2queue_fini(env, queue); - return result; + return result > 0 ? 0 : result; } -EXPORT_SYMBOL(cl_io_read_page); +EXPORT_SYMBOL(cl_io_read_ahead); /** * Commit a list of contiguous pages into writeback cache. @@ -1080,235 +1045,18 @@ struct cl_io *cl_io_top(struct cl_io *io) EXPORT_SYMBOL(cl_io_top); /** - * Adds request slice to the compound request. - * - * This is called by cl_device_operations::cdo_req_init() methods to add a - * per-layer state to the request. New state is added at the end of - * cl_req::crq_layers list, that is, it is at the bottom of the stack. - * - * \see cl_lock_slice_add(), cl_page_slice_add(), cl_io_slice_add() - */ -void cl_req_slice_add(struct cl_req *req, struct cl_req_slice *slice, - struct cl_device *dev, - const struct cl_req_operations *ops) -{ - list_add_tail(&slice->crs_linkage, &req->crq_layers); - slice->crs_dev = dev; - slice->crs_ops = ops; - slice->crs_req = req; -} -EXPORT_SYMBOL(cl_req_slice_add); - -static void cl_req_free(const struct lu_env *env, struct cl_req *req) -{ - unsigned i; - - LASSERT(list_empty(&req->crq_pages)); - LASSERT(req->crq_nrpages == 0); - LINVRNT(list_empty(&req->crq_layers)); - LINVRNT(equi(req->crq_nrobjs > 0, req->crq_o)); - - if (req->crq_o) { - for (i = 0; i < req->crq_nrobjs; ++i) { - struct cl_object *obj = req->crq_o[i].ro_obj; - - if (obj) { - lu_object_ref_del_at(&obj->co_lu, - &req->crq_o[i].ro_obj_ref, - "cl_req", req); - cl_object_put(env, obj); - } - } - kfree(req->crq_o); - } - kfree(req); -} - -static int cl_req_init(const struct lu_env *env, struct cl_req *req, - struct cl_page *page) -{ - struct cl_device *dev; - struct cl_page_slice *slice; - int result; - - result = 0; - list_for_each_entry(slice, &page->cp_layers, cpl_linkage) { - dev = lu2cl_dev(slice->cpl_obj->co_lu.lo_dev); - if (dev->cd_ops->cdo_req_init) { - result = dev->cd_ops->cdo_req_init(env, dev, req); - if (result != 0) - break; - } - } - return result; -} - -/** - * Invokes per-request transfer completion call-backs - * (cl_req_operations::cro_completion()) bottom-to-top. - */ -void cl_req_completion(const struct lu_env *env, struct cl_req *req, int rc) -{ - struct cl_req_slice *slice; - - /* - * for the lack of list_for_each_entry_reverse_safe()... - */ - while (!list_empty(&req->crq_layers)) { - slice = list_entry(req->crq_layers.prev, - struct cl_req_slice, crs_linkage); - list_del_init(&slice->crs_linkage); - if (slice->crs_ops->cro_completion) - slice->crs_ops->cro_completion(env, slice, rc); - } - cl_req_free(env, req); -} -EXPORT_SYMBOL(cl_req_completion); - -/** - * Allocates new transfer request. - */ -struct cl_req *cl_req_alloc(const struct lu_env *env, struct cl_page *page, - enum cl_req_type crt, int nr_objects) -{ - struct cl_req *req; - - LINVRNT(nr_objects > 0); - - req = kzalloc(sizeof(*req), GFP_NOFS); - if (req) { - int result; - - req->crq_type = crt; - INIT_LIST_HEAD(&req->crq_pages); - INIT_LIST_HEAD(&req->crq_layers); - - req->crq_o = kcalloc(nr_objects, sizeof(req->crq_o[0]), - GFP_NOFS); - if (req->crq_o) { - req->crq_nrobjs = nr_objects; - result = cl_req_init(env, req, page); - } else { - result = -ENOMEM; - } - if (result != 0) { - cl_req_completion(env, req, result); - req = ERR_PTR(result); - } - } else { - req = ERR_PTR(-ENOMEM); - } - return req; -} -EXPORT_SYMBOL(cl_req_alloc); - -/** - * Adds a page to a request. - */ -void cl_req_page_add(const struct lu_env *env, - struct cl_req *req, struct cl_page *page) -{ - struct cl_object *obj; - struct cl_req_obj *rqo; - unsigned int i; - - LASSERT(list_empty(&page->cp_flight)); - LASSERT(!page->cp_req); - - CL_PAGE_DEBUG(D_PAGE, env, page, "req %p, %d, %u\n", - req, req->crq_type, req->crq_nrpages); - - list_add_tail(&page->cp_flight, &req->crq_pages); - ++req->crq_nrpages; - page->cp_req = req; - obj = cl_object_top(page->cp_obj); - for (i = 0, rqo = req->crq_o; obj != rqo->ro_obj; ++i, ++rqo) { - if (!rqo->ro_obj) { - rqo->ro_obj = obj; - cl_object_get(obj); - lu_object_ref_add_at(&obj->co_lu, &rqo->ro_obj_ref, - "cl_req", req); - break; - } - } - LASSERT(i < req->crq_nrobjs); -} -EXPORT_SYMBOL(cl_req_page_add); - -/** - * Removes a page from a request. - */ -void cl_req_page_done(const struct lu_env *env, struct cl_page *page) -{ - struct cl_req *req = page->cp_req; - - LASSERT(!list_empty(&page->cp_flight)); - LASSERT(req->crq_nrpages > 0); - - list_del_init(&page->cp_flight); - --req->crq_nrpages; - page->cp_req = NULL; -} -EXPORT_SYMBOL(cl_req_page_done); - -/** - * Notifies layers that request is about to depart by calling - * cl_req_operations::cro_prep() top-to-bottom. - */ -int cl_req_prep(const struct lu_env *env, struct cl_req *req) -{ - unsigned int i; - int result; - const struct cl_req_slice *slice; - - /* - * Check that the caller of cl_req_alloc() didn't lie about the number - * of objects. - */ - for (i = 0; i < req->crq_nrobjs; ++i) - LASSERT(req->crq_o[i].ro_obj); - - result = 0; - list_for_each_entry(slice, &req->crq_layers, crs_linkage) { - if (slice->crs_ops->cro_prep) { - result = slice->crs_ops->cro_prep(env, slice); - if (result != 0) - break; - } - } - return result; -} -EXPORT_SYMBOL(cl_req_prep); - -/** * Fills in attributes that are passed to server together with transfer. Only * attributes from \a flags may be touched. This can be called multiple times * for the same request. */ -void cl_req_attr_set(const struct lu_env *env, struct cl_req *req, - struct cl_req_attr *attr, u64 flags) +void cl_req_attr_set(const struct lu_env *env, struct cl_object *obj, + struct cl_req_attr *attr) { - const struct cl_req_slice *slice; - struct cl_page *page; - unsigned int i; - - LASSERT(!list_empty(&req->crq_pages)); - - /* Take any page to use as a model. */ - page = list_entry(req->crq_pages.next, struct cl_page, cp_flight); - - for (i = 0; i < req->crq_nrobjs; ++i) { - list_for_each_entry(slice, &req->crq_layers, crs_linkage) { - const struct cl_page_slice *scan; - const struct cl_object *obj; - - scan = cl_page_at(page, - slice->crs_dev->cd_lu_dev.ld_type); - obj = scan->cpl_obj; - if (slice->crs_ops->cro_attr_set) - slice->crs_ops->cro_attr_set(env, slice, obj, - attr + i, flags); - } + struct cl_object *scan; + + cl_object_for_each(scan, obj) { + if (scan->co_ops->coo_req_attr_set) + scan->co_ops->coo_req_attr_set(env, scan, attr); } } EXPORT_SYMBOL(cl_req_attr_set); diff --git a/drivers/staging/lustre/lustre/obdclass/cl_object.c b/drivers/staging/lustre/lustre/obdclass/cl_object.c index 3199dd4a3b72..f5d4e23c64b7 100644 --- a/drivers/staging/lustre/lustre/obdclass/cl_object.c +++ b/drivers/staging/lustre/lustre/obdclass/cl_object.c @@ -335,7 +335,7 @@ int cl_object_getstripe(const struct lu_env *env, struct cl_object *obj, if (obj->co_ops->coo_getstripe) { result = obj->co_ops->coo_getstripe(env, obj, uarg); if (result) - break; + break; } } return result; @@ -343,6 +343,67 @@ int cl_object_getstripe(const struct lu_env *env, struct cl_object *obj, EXPORT_SYMBOL(cl_object_getstripe); /** + * Get fiemap extents from file object. + * + * \param env [in] lustre environment + * \param obj [in] file object + * \param key [in] fiemap request argument + * \param fiemap [out] fiemap extents mapping retrived + * \param buflen [in] max buffer length of @fiemap + * + * \retval 0 success + * \retval < 0 error + */ +int cl_object_fiemap(const struct lu_env *env, struct cl_object *obj, + struct ll_fiemap_info_key *key, + struct fiemap *fiemap, size_t *buflen) +{ + struct lu_object_header *top; + int result = 0; + + top = obj->co_lu.lo_header; + list_for_each_entry(obj, &top->loh_layers, co_lu.lo_linkage) { + if (obj->co_ops->coo_fiemap) { + result = obj->co_ops->coo_fiemap(env, obj, key, fiemap, + buflen); + if (result) + break; + } + } + return result; +} +EXPORT_SYMBOL(cl_object_fiemap); + +int cl_object_layout_get(const struct lu_env *env, struct cl_object *obj, + struct cl_layout *cl) +{ + struct lu_object_header *top = obj->co_lu.lo_header; + + list_for_each_entry(obj, &top->loh_layers, co_lu.lo_linkage) { + if (obj->co_ops->coo_layout_get) + return obj->co_ops->coo_layout_get(env, obj, cl); + } + + return -EOPNOTSUPP; +} +EXPORT_SYMBOL(cl_object_layout_get); + +loff_t cl_object_maxbytes(struct cl_object *obj) +{ + struct lu_object_header *top = obj->co_lu.lo_header; + loff_t maxbytes = LLONG_MAX; + + list_for_each_entry(obj, &top->loh_layers, co_lu.lo_linkage) { + if (obj->co_ops->coo_maxbytes) + maxbytes = min_t(loff_t, obj->co_ops->coo_maxbytes(obj), + maxbytes); + } + + return maxbytes; +} +EXPORT_SYMBOL(cl_object_maxbytes); + +/** * Helper function removing all object locks, and marking object for * deletion. All object pages must have been deleted at this point. * @@ -483,36 +544,20 @@ EXPORT_SYMBOL(cl_site_stats_print); * bz20044, bz22683. */ -static LIST_HEAD(cl_envs); -static unsigned int cl_envs_cached_nr; -static unsigned int cl_envs_cached_max = 128; /* XXX: prototype: arbitrary limit - * for now. - */ -static DEFINE_SPINLOCK(cl_envs_guard); +static unsigned int cl_envs_cached_max = 32; /* XXX: prototype: arbitrary limit + * for now. + */ +static struct cl_env_cache { + rwlock_t cec_guard; + unsigned int cec_count; + struct list_head cec_envs; +} *cl_envs = NULL; struct cl_env { void *ce_magic; struct lu_env ce_lu; struct lu_context ce_ses; - /** - * This allows cl_env to be entered into cl_env_hash which implements - * the current thread -> client environment lookup. - */ - struct hlist_node ce_node; - /** - * Owner for the current cl_env. - * - * If LL_TASK_CL_ENV is defined, this point to the owning current, - * only for debugging purpose ; - * Otherwise hash is used, and this is the key for cfs_hash. - * Now current thread pid is stored. Note using thread pointer would - * lead to unbalanced hash because of its specific allocation locality - * and could be varied for different platforms and OSes, even different - * OS versions. - */ - void *ce_owner; - /* * Linkage into global list of all client environments. Used for * garbage collection. @@ -536,122 +581,13 @@ static void cl_env_init0(struct cl_env *cle, void *debug) { LASSERT(cle->ce_ref == 0); LASSERT(cle->ce_magic == &cl_env_init0); - LASSERT(!cle->ce_debug && !cle->ce_owner); + LASSERT(!cle->ce_debug); cle->ce_ref = 1; cle->ce_debug = debug; CL_ENV_INC(busy); } -/* - * The implementation of using hash table to connect cl_env and thread - */ - -static struct cfs_hash *cl_env_hash; - -static unsigned cl_env_hops_hash(struct cfs_hash *lh, - const void *key, unsigned mask) -{ -#if BITS_PER_LONG == 64 - return cfs_hash_u64_hash((__u64)key, mask); -#else - return cfs_hash_u32_hash((__u32)key, mask); -#endif -} - -static void *cl_env_hops_obj(struct hlist_node *hn) -{ - struct cl_env *cle = hlist_entry(hn, struct cl_env, ce_node); - - LASSERT(cle->ce_magic == &cl_env_init0); - return (void *)cle; -} - -static int cl_env_hops_keycmp(const void *key, struct hlist_node *hn) -{ - struct cl_env *cle = cl_env_hops_obj(hn); - - LASSERT(cle->ce_owner); - return (key == cle->ce_owner); -} - -static void cl_env_hops_noop(struct cfs_hash *hs, struct hlist_node *hn) -{ - struct cl_env *cle = hlist_entry(hn, struct cl_env, ce_node); - - LASSERT(cle->ce_magic == &cl_env_init0); -} - -static struct cfs_hash_ops cl_env_hops = { - .hs_hash = cl_env_hops_hash, - .hs_key = cl_env_hops_obj, - .hs_keycmp = cl_env_hops_keycmp, - .hs_object = cl_env_hops_obj, - .hs_get = cl_env_hops_noop, - .hs_put_locked = cl_env_hops_noop, -}; - -static inline struct cl_env *cl_env_fetch(void) -{ - struct cl_env *cle; - - cle = cfs_hash_lookup(cl_env_hash, (void *)(long)current->pid); - LASSERT(ergo(cle, cle->ce_magic == &cl_env_init0)); - return cle; -} - -static inline void cl_env_attach(struct cl_env *cle) -{ - if (cle) { - int rc; - - LASSERT(!cle->ce_owner); - cle->ce_owner = (void *)(long)current->pid; - rc = cfs_hash_add_unique(cl_env_hash, cle->ce_owner, - &cle->ce_node); - LASSERT(rc == 0); - } -} - -static inline void cl_env_do_detach(struct cl_env *cle) -{ - void *cookie; - - LASSERT(cle->ce_owner == (void *)(long)current->pid); - cookie = cfs_hash_del(cl_env_hash, cle->ce_owner, - &cle->ce_node); - LASSERT(cookie == cle); - cle->ce_owner = NULL; -} - -static int cl_env_store_init(void) -{ - cl_env_hash = cfs_hash_create("cl_env", - HASH_CL_ENV_BITS, HASH_CL_ENV_BITS, - HASH_CL_ENV_BKT_BITS, 0, - CFS_HASH_MIN_THETA, - CFS_HASH_MAX_THETA, - &cl_env_hops, - CFS_HASH_RW_BKTLOCK); - return cl_env_hash ? 0 : -ENOMEM; -} - -static void cl_env_store_fini(void) -{ - cfs_hash_putref(cl_env_hash); -} - -static inline struct cl_env *cl_env_detach(struct cl_env *cle) -{ - if (!cle) - cle = cl_env_fetch(); - - if (cle && cle->ce_owner) - cl_env_do_detach(cle); - - return cle; -} - static struct lu_env *cl_env_new(__u32 ctx_tags, __u32 ses_tags, void *debug) { struct lu_env *env; @@ -701,16 +637,20 @@ static struct lu_env *cl_env_obtain(void *debug) { struct cl_env *cle; struct lu_env *env; + int cpu = get_cpu(); - spin_lock(&cl_envs_guard); - LASSERT(equi(cl_envs_cached_nr == 0, list_empty(&cl_envs))); - if (cl_envs_cached_nr > 0) { + read_lock(&cl_envs[cpu].cec_guard); + LASSERT(equi(cl_envs[cpu].cec_count == 0, + list_empty(&cl_envs[cpu].cec_envs))); + if (cl_envs[cpu].cec_count > 0) { int rc; - cle = container_of(cl_envs.next, struct cl_env, ce_linkage); + cle = container_of(cl_envs[cpu].cec_envs.next, struct cl_env, + ce_linkage); list_del_init(&cle->ce_linkage); - cl_envs_cached_nr--; - spin_unlock(&cl_envs_guard); + cl_envs[cpu].cec_count--; + read_unlock(&cl_envs[cpu].cec_guard); + put_cpu(); env = &cle->ce_lu; rc = lu_env_refill(env); @@ -723,7 +663,8 @@ static struct lu_env *cl_env_obtain(void *debug) env = ERR_PTR(rc); } } else { - spin_unlock(&cl_envs_guard); + read_unlock(&cl_envs[cpu].cec_guard); + put_cpu(); env = cl_env_new(lu_context_tags_default, lu_session_tags_default, debug); } @@ -735,27 +676,6 @@ static inline struct cl_env *cl_env_container(struct lu_env *env) return container_of(env, struct cl_env, ce_lu); } -static struct lu_env *cl_env_peek(int *refcheck) -{ - struct lu_env *env; - struct cl_env *cle; - - CL_ENV_INC(lookup); - - /* check that we don't go far from untrusted pointer */ - CLASSERT(offsetof(struct cl_env, ce_magic) == 0); - - env = NULL; - cle = cl_env_fetch(); - if (cle) { - CL_ENV_INC(hit); - env = &cle->ce_lu; - *refcheck = ++cle->ce_ref; - } - CDEBUG(D_OTHER, "%d@%p\n", cle ? cle->ce_ref : 0, cle); - return env; -} - /** * Returns lu_env: if there already is an environment associated with the * current thread, it is returned, otherwise, new environment is allocated. @@ -773,17 +693,13 @@ struct lu_env *cl_env_get(int *refcheck) { struct lu_env *env; - env = cl_env_peek(refcheck); - if (!env) { - env = cl_env_obtain(__builtin_return_address(0)); - if (!IS_ERR(env)) { - struct cl_env *cle; + env = cl_env_obtain(__builtin_return_address(0)); + if (!IS_ERR(env)) { + struct cl_env *cle; - cle = cl_env_container(env); - cl_env_attach(cle); - *refcheck = cle->ce_ref; - CDEBUG(D_OTHER, "%d@%p\n", cle->ce_ref, cle); - } + cle = cl_env_container(env); + *refcheck = cle->ce_ref; + CDEBUG(D_OTHER, "%d@%p\n", cle->ce_ref, cle); } return env; } @@ -798,7 +714,6 @@ struct lu_env *cl_env_alloc(int *refcheck, __u32 tags) { struct lu_env *env; - LASSERT(!cl_env_peek(refcheck)); env = cl_env_new(tags, tags, __builtin_return_address(0)); if (!IS_ERR(env)) { struct cl_env *cle; @@ -813,7 +728,6 @@ EXPORT_SYMBOL(cl_env_alloc); static void cl_env_exit(struct cl_env *cle) { - LASSERT(!cle->ce_owner); lu_context_exit(&cle->ce_lu.le_ctx); lu_context_exit(&cle->ce_ses); } @@ -826,20 +740,25 @@ static void cl_env_exit(struct cl_env *cle) unsigned int cl_env_cache_purge(unsigned int nr) { struct cl_env *cle; + unsigned int i; - spin_lock(&cl_envs_guard); - for (; !list_empty(&cl_envs) && nr > 0; --nr) { - cle = container_of(cl_envs.next, struct cl_env, ce_linkage); - list_del_init(&cle->ce_linkage); - LASSERT(cl_envs_cached_nr > 0); - cl_envs_cached_nr--; - spin_unlock(&cl_envs_guard); + for_each_possible_cpu(i) { + write_lock(&cl_envs[i].cec_guard); + for (; !list_empty(&cl_envs[i].cec_envs) && nr > 0; --nr) { + cle = container_of(cl_envs[i].cec_envs.next, + struct cl_env, ce_linkage); + list_del_init(&cle->ce_linkage); + LASSERT(cl_envs[i].cec_count > 0); + cl_envs[i].cec_count--; + write_unlock(&cl_envs[i].cec_guard); - cl_env_fini(cle); - spin_lock(&cl_envs_guard); + cl_env_fini(cle); + write_lock(&cl_envs[i].cec_guard); + } + LASSERT(equi(cl_envs[i].cec_count == 0, + list_empty(&cl_envs[i].cec_envs))); + write_unlock(&cl_envs[i].cec_guard); } - LASSERT(equi(cl_envs_cached_nr == 0, list_empty(&cl_envs))); - spin_unlock(&cl_envs_guard); return nr; } EXPORT_SYMBOL(cl_env_cache_purge); @@ -862,8 +781,9 @@ void cl_env_put(struct lu_env *env, int *refcheck) CDEBUG(D_OTHER, "%d@%p\n", cle->ce_ref, cle); if (--cle->ce_ref == 0) { + int cpu = get_cpu(); + CL_ENV_DEC(busy); - cl_env_detach(cle); cle->ce_debug = NULL; cl_env_exit(cle); /* @@ -872,107 +792,22 @@ void cl_env_put(struct lu_env *env, int *refcheck) * Return environment to the cache only when it was allocated * with the standard tags. */ - if (cl_envs_cached_nr < cl_envs_cached_max && + if (cl_envs[cpu].cec_count < cl_envs_cached_max && (env->le_ctx.lc_tags & ~LCT_HAS_EXIT) == LCT_CL_THREAD && (env->le_ses->lc_tags & ~LCT_HAS_EXIT) == LCT_SESSION) { - spin_lock(&cl_envs_guard); - list_add(&cle->ce_linkage, &cl_envs); - cl_envs_cached_nr++; - spin_unlock(&cl_envs_guard); + read_lock(&cl_envs[cpu].cec_guard); + list_add(&cle->ce_linkage, &cl_envs[cpu].cec_envs); + cl_envs[cpu].cec_count++; + read_unlock(&cl_envs[cpu].cec_guard); } else { cl_env_fini(cle); } + put_cpu(); } } EXPORT_SYMBOL(cl_env_put); /** - * Declares a point of re-entrancy. - * - * \see cl_env_reexit() - */ -void *cl_env_reenter(void) -{ - return cl_env_detach(NULL); -} -EXPORT_SYMBOL(cl_env_reenter); - -/** - * Exits re-entrancy. - */ -void cl_env_reexit(void *cookie) -{ - cl_env_detach(NULL); - cl_env_attach(cookie); -} -EXPORT_SYMBOL(cl_env_reexit); - -/** - * Setup user-supplied \a env as a current environment. This is to be used to - * guaranteed that environment exists even when cl_env_get() fails. It is up - * to user to ensure proper concurrency control. - * - * \see cl_env_unplant() - */ -void cl_env_implant(struct lu_env *env, int *refcheck) -{ - struct cl_env *cle = cl_env_container(env); - - LASSERT(cle->ce_ref > 0); - - cl_env_attach(cle); - cl_env_get(refcheck); - CDEBUG(D_OTHER, "%d@%p\n", cle->ce_ref, cle); -} -EXPORT_SYMBOL(cl_env_implant); - -/** - * Detach environment installed earlier by cl_env_implant(). - */ -void cl_env_unplant(struct lu_env *env, int *refcheck) -{ - struct cl_env *cle = cl_env_container(env); - - LASSERT(cle->ce_ref > 1); - - CDEBUG(D_OTHER, "%d@%p\n", cle->ce_ref, cle); - - cl_env_detach(cle); - cl_env_put(env, refcheck); -} -EXPORT_SYMBOL(cl_env_unplant); - -struct lu_env *cl_env_nested_get(struct cl_env_nest *nest) -{ - struct lu_env *env; - - nest->cen_cookie = NULL; - env = cl_env_peek(&nest->cen_refcheck); - if (env) { - if (!cl_io_is_going(env)) - return env; - cl_env_put(env, &nest->cen_refcheck); - nest->cen_cookie = cl_env_reenter(); - } - env = cl_env_get(&nest->cen_refcheck); - if (IS_ERR(env)) { - cl_env_reexit(nest->cen_cookie); - return env; - } - - LASSERT(!cl_io_is_going(env)); - return env; -} -EXPORT_SYMBOL(cl_env_nested_get); - -void cl_env_nested_put(struct cl_env_nest *nest, struct lu_env *env) -{ - cl_env_put(env, &nest->cen_refcheck); - cl_env_reexit(nest->cen_cookie); -} -EXPORT_SYMBOL(cl_env_nested_put); - -/** * Converts struct ost_lvb to struct cl_attr. * * \see cl_attr2lvb @@ -999,6 +834,10 @@ static int cl_env_percpu_init(void) for_each_possible_cpu(i) { struct lu_env *env; + rwlock_init(&cl_envs[i].cec_guard); + INIT_LIST_HEAD(&cl_envs[i].cec_envs); + cl_envs[i].cec_count = 0; + cle = &cl_env_percpu[i]; env = &cle->ce_lu; @@ -1066,7 +905,6 @@ void cl_env_percpu_put(struct lu_env *env) LASSERT(cle->ce_ref == 0); CL_ENV_DEC(busy); - cl_env_detach(cle); cle->ce_debug = NULL; put_cpu(); @@ -1080,7 +918,6 @@ struct lu_env *cl_env_percpu_get(void) cle = &cl_env_percpu[get_cpu()]; cl_env_init0(cle, __builtin_return_address(0)); - cl_env_attach(cle); return &cle->ce_lu; } EXPORT_SYMBOL(cl_env_percpu_get); @@ -1144,51 +981,19 @@ LU_KEY_INIT_FINI(cl0, struct cl_thread_info); static void *cl_key_init(const struct lu_context *ctx, struct lu_context_key *key) { - struct cl_thread_info *info; - - info = cl0_key_init(ctx, key); - if (!IS_ERR(info)) { - size_t i; - - for (i = 0; i < ARRAY_SIZE(info->clt_counters); ++i) - lu_ref_init(&info->clt_counters[i].ctc_locks_locked); - } - return info; + return cl0_key_init(ctx, key); } static void cl_key_fini(const struct lu_context *ctx, struct lu_context_key *key, void *data) { - struct cl_thread_info *info; - size_t i; - - info = data; - for (i = 0; i < ARRAY_SIZE(info->clt_counters); ++i) - lu_ref_fini(&info->clt_counters[i].ctc_locks_locked); cl0_key_fini(ctx, key, data); } -static void cl_key_exit(const struct lu_context *ctx, - struct lu_context_key *key, void *data) -{ - struct cl_thread_info *info = data; - size_t i; - - for (i = 0; i < ARRAY_SIZE(info->clt_counters); ++i) { - LASSERT(info->clt_counters[i].ctc_nr_held == 0); - LASSERT(info->clt_counters[i].ctc_nr_used == 0); - LASSERT(info->clt_counters[i].ctc_nr_locks_acquired == 0); - LASSERT(info->clt_counters[i].ctc_nr_locks_locked == 0); - lu_ref_fini(&info->clt_counters[i].ctc_locks_locked); - lu_ref_init(&info->clt_counters[i].ctc_locks_locked); - } -} - static struct lu_context_key cl_key = { .lct_tags = LCT_CL_THREAD, .lct_init = cl_key_init, .lct_fini = cl_key_fini, - .lct_exit = cl_key_exit }; static struct lu_kmem_descr cl_object_caches[] = { @@ -1212,13 +1017,15 @@ int cl_global_init(void) { int result; - result = cl_env_store_init(); - if (result) - return result; + cl_envs = kzalloc(sizeof(*cl_envs) * num_possible_cpus(), GFP_KERNEL); + if (!cl_envs) { + result = -ENOMEM; + goto out; + } result = lu_kmem_init(cl_object_caches); if (result) - goto out_store; + goto out_envs; LU_CONTEXT_KEY_INIT(&cl_key); result = lu_context_key_register(&cl_key); @@ -1228,16 +1035,17 @@ int cl_global_init(void) result = cl_env_percpu_init(); if (result) /* no cl_env_percpu_fini on error */ - goto out_context; + goto out_keys; return 0; -out_context: +out_keys: lu_context_key_degister(&cl_key); out_kmem: lu_kmem_fini(cl_object_caches); -out_store: - cl_env_store_fini(); +out_envs: + kfree(cl_envs); +out: return result; } @@ -1249,5 +1057,5 @@ void cl_global_fini(void) cl_env_percpu_fini(); lu_context_key_degister(&cl_key); lu_kmem_fini(cl_object_caches); - cl_env_store_fini(); + kfree(cl_envs); } diff --git a/drivers/staging/lustre/lustre/obdclass/cl_page.c b/drivers/staging/lustre/lustre/obdclass/cl_page.c index 63973ba096da..cd9a40ca4448 100644 --- a/drivers/staging/lustre/lustre/obdclass/cl_page.c +++ b/drivers/staging/lustre/lustre/obdclass/cl_page.c @@ -99,7 +99,6 @@ static void cl_page_free(const struct lu_env *env, struct cl_page *page) PASSERT(env, page, list_empty(&page->cp_batch)); PASSERT(env, page, !page->cp_owner); - PASSERT(env, page, !page->cp_req); PASSERT(env, page, page->cp_state == CPS_FREEING); while (!list_empty(&page->cp_layers)) { @@ -150,7 +149,6 @@ struct cl_page *cl_page_alloc(const struct lu_env *env, page->cp_type = type; INIT_LIST_HEAD(&page->cp_layers); INIT_LIST_HEAD(&page->cp_batch); - INIT_LIST_HEAD(&page->cp_flight); lu_ref_init(&page->cp_reference); head = o->co_lu.lo_header; list_for_each_entry(o, &head->loh_layers, co_lu.lo_linkage) { @@ -390,30 +388,6 @@ EXPORT_SYMBOL(cl_page_at); __result; \ }) -#define CL_PAGE_INVOKE_REVERSE(_env, _page, _op, _proto, ...) \ -({ \ - const struct lu_env *__env = (_env); \ - struct cl_page *__page = (_page); \ - const struct cl_page_slice *__scan; \ - int __result; \ - ptrdiff_t __op = (_op); \ - int (*__method)_proto; \ - \ - __result = 0; \ - list_for_each_entry_reverse(__scan, &__page->cp_layers, \ - cpl_linkage) { \ - __method = *(void **)((char *)__scan->cpl_ops + __op); \ - if (__method) { \ - __result = (*__method)(__env, __scan, ## __VA_ARGS__); \ - if (__result != 0) \ - break; \ - } \ - } \ - if (__result > 0) \ - __result = 0; \ - __result; \ -}) - #define CL_PAGE_INVOID(_env, _page, _op, _proto, ...) \ do { \ const struct lu_env *__env = (_env); \ @@ -552,7 +526,6 @@ static int cl_page_own0(const struct lu_env *env, struct cl_io *io, io, nonblock); if (result == 0) { PASSERT(env, pg, !pg->cp_owner); - PASSERT(env, pg, !pg->cp_req); pg->cp_owner = cl_io_top(io); cl_page_owner_set(pg); if (pg->cp_state != CPS_FREEING) { @@ -694,7 +667,7 @@ static void cl_page_delete0(const struct lu_env *env, struct cl_page *pg) PASSERT(env, pg, pg->cp_state != CPS_FREEING); /* - * Severe all ways to obtain new pointers to @pg. + * Sever all ways to obtain new pointers to @pg. */ cl_page_owner_clear(pg); @@ -845,8 +818,6 @@ void cl_page_completion(const struct lu_env *env, struct cl_sync_io *anchor = pg->cp_sync_io; PASSERT(env, pg, crt < CRT_NR); - /* cl_page::cp_req already cleared by the caller (osc_completion()) */ - PASSERT(env, pg, !pg->cp_req); PASSERT(env, pg, pg->cp_state == cl_req_type_state(crt)); CL_PAGE_HEADER(D_TRACE, env, pg, "%d %d\n", crt, ioret); @@ -860,16 +831,8 @@ void cl_page_completion(const struct lu_env *env, if (anchor) { LASSERT(pg->cp_sync_io == anchor); pg->cp_sync_io = NULL; - } - /* - * As page->cp_obj is pinned by a reference from page->cp_req, it is - * safe to call cl_page_put() without risking object destruction in a - * non-blocking context. - */ - cl_page_put(env, pg); - - if (anchor) cl_sync_io_note(env, anchor, ioret); + } } EXPORT_SYMBOL(cl_page_completion); @@ -927,29 +890,6 @@ int cl_page_flush(const struct lu_env *env, struct cl_io *io, EXPORT_SYMBOL(cl_page_flush); /** - * Checks whether page is protected by any extent lock is at least required - * mode. - * - * \return the same as in cl_page_operations::cpo_is_under_lock() method. - * \see cl_page_operations::cpo_is_under_lock() - */ -int cl_page_is_under_lock(const struct lu_env *env, struct cl_io *io, - struct cl_page *page, pgoff_t *max_index) -{ - int rc; - - PINVRNT(env, page, cl_page_invariant(page)); - - rc = CL_PAGE_INVOKE_REVERSE(env, page, CL_PAGE_OP(cpo_is_under_lock), - (const struct lu_env *, - const struct cl_page_slice *, - struct cl_io *, pgoff_t *), - io, max_index); - return rc; -} -EXPORT_SYMBOL(cl_page_is_under_lock); - -/** * Tells transfer engine that only part of a page is to be transmitted. * * \see cl_page_operations::cpo_clip() @@ -974,10 +914,10 @@ void cl_page_header_print(const struct lu_env *env, void *cookie, lu_printer_t printer, const struct cl_page *pg) { (*printer)(env, cookie, - "page@%p[%d %p %d %d %p %p]\n", + "page@%p[%d %p %d %d %p]\n", pg, atomic_read(&pg->cp_ref), pg->cp_obj, pg->cp_state, pg->cp_type, - pg->cp_owner, pg->cp_req); + pg->cp_owner); } EXPORT_SYMBOL(cl_page_header_print); diff --git a/drivers/staging/lustre/lustre/obdclass/genops.c b/drivers/staging/lustre/lustre/obdclass/genops.c index cf8bb2a2f40b..fa0d38ddccb2 100644 --- a/drivers/staging/lustre/lustre/obdclass/genops.c +++ b/drivers/staging/lustre/lustre/obdclass/genops.c @@ -907,6 +907,8 @@ struct obd_import *class_new_import(struct obd_device *obd) INIT_LIST_HEAD(&imp->imp_sending_list); INIT_LIST_HEAD(&imp->imp_delayed_list); INIT_LIST_HEAD(&imp->imp_committed_list); + INIT_LIST_HEAD(&imp->imp_unreplied_list); + imp->imp_known_replied_xid = 0; imp->imp_replay_cursor = &imp->imp_committed_list; spin_lock_init(&imp->imp_lock); imp->imp_last_success_conn = 0; @@ -1408,13 +1410,33 @@ EXPORT_SYMBOL(obd_get_max_rpcs_in_flight); int obd_set_max_rpcs_in_flight(struct client_obd *cli, __u32 max) { struct obd_request_slot_waiter *orsw; + const char *typ_name; __u32 old; int diff; + int rc; int i; if (max > OBD_MAX_RIF_MAX || max < 1) return -ERANGE; + typ_name = cli->cl_import->imp_obd->obd_type->typ_name; + if (!strcmp(typ_name, LUSTRE_MDC_NAME)) { + /* + * adjust max_mod_rpcs_in_flight to ensure it is always + * strictly lower that max_rpcs_in_flight + */ + if (max < 2) { + CERROR("%s: cannot set max_rpcs_in_flight to 1 because it must be higher than max_mod_rpcs_in_flight value\n", + cli->cl_import->imp_obd->obd_name); + return -ERANGE; + } + if (max <= cli->cl_max_mod_rpcs_in_flight) { + rc = obd_set_max_mod_rpcs_in_flight(cli, max - 1); + if (rc) + return rc; + } + } + spin_lock(&cli->cl_loi_list_lock); old = cli->cl_max_rpcs_in_flight; cli->cl_max_rpcs_in_flight = max; @@ -1436,3 +1458,209 @@ int obd_set_max_rpcs_in_flight(struct client_obd *cli, __u32 max) return 0; } EXPORT_SYMBOL(obd_set_max_rpcs_in_flight); + +int obd_set_max_mod_rpcs_in_flight(struct client_obd *cli, __u16 max) +{ + struct obd_connect_data *ocd; + u16 maxmodrpcs; + u16 prev; + + if (max > OBD_MAX_RIF_MAX || max < 1) + return -ERANGE; + + /* cannot exceed or equal max_rpcs_in_flight */ + if (max >= cli->cl_max_rpcs_in_flight) { + CERROR("%s: can't set max_mod_rpcs_in_flight to a value (%hu) higher or equal to max_rpcs_in_flight value (%u)\n", + cli->cl_import->imp_obd->obd_name, + max, cli->cl_max_rpcs_in_flight); + return -ERANGE; + } + + /* cannot exceed max modify RPCs in flight supported by the server */ + ocd = &cli->cl_import->imp_connect_data; + if (ocd->ocd_connect_flags & OBD_CONNECT_MULTIMODRPCS) + maxmodrpcs = ocd->ocd_maxmodrpcs; + else + maxmodrpcs = 1; + if (max > maxmodrpcs) { + CERROR("%s: can't set max_mod_rpcs_in_flight to a value (%hu) higher than max_mod_rpcs_per_client value (%hu) returned by the server at connection\n", + cli->cl_import->imp_obd->obd_name, + max, maxmodrpcs); + return -ERANGE; + } + + spin_lock(&cli->cl_mod_rpcs_lock); + + prev = cli->cl_max_mod_rpcs_in_flight; + cli->cl_max_mod_rpcs_in_flight = max; + + /* wakeup waiters if limit has been increased */ + if (cli->cl_max_mod_rpcs_in_flight > prev) + wake_up(&cli->cl_mod_rpcs_waitq); + + spin_unlock(&cli->cl_mod_rpcs_lock); + + return 0; +} +EXPORT_SYMBOL(obd_set_max_mod_rpcs_in_flight); + +#define pct(a, b) (b ? (a * 100) / b : 0) + +int obd_mod_rpc_stats_seq_show(struct client_obd *cli, struct seq_file *seq) +{ + unsigned long mod_tot = 0, mod_cum; + struct timespec64 now; + int i; + + ktime_get_real_ts64(&now); + + spin_lock(&cli->cl_mod_rpcs_lock); + + seq_printf(seq, "snapshot_time: %llu.%9lu (secs.nsecs)\n", + (s64)now.tv_sec, (unsigned long)now.tv_nsec); + seq_printf(seq, "modify_RPCs_in_flight: %hu\n", + cli->cl_mod_rpcs_in_flight); + + seq_puts(seq, "\n\t\t\tmodify\n"); + seq_puts(seq, "rpcs in flight rpcs %% cum %%\n"); + + mod_tot = lprocfs_oh_sum(&cli->cl_mod_rpcs_hist); + + mod_cum = 0; + for (i = 0; i < OBD_HIST_MAX; i++) { + unsigned long mod = cli->cl_mod_rpcs_hist.oh_buckets[i]; + + mod_cum += mod; + seq_printf(seq, "%d:\t\t%10lu %3lu %3lu\n", + i, mod, pct(mod, mod_tot), + pct(mod_cum, mod_tot)); + if (mod_cum == mod_tot) + break; + } + + spin_unlock(&cli->cl_mod_rpcs_lock); + + return 0; +} +EXPORT_SYMBOL(obd_mod_rpc_stats_seq_show); +#undef pct + +/* + * The number of modify RPCs sent in parallel is limited + * because the server has a finite number of slots per client to + * store request result and ensure reply reconstruction when needed. + * On the client, this limit is stored in cl_max_mod_rpcs_in_flight + * that takes into account server limit and cl_max_rpcs_in_flight + * value. + * On the MDC client, to avoid a potential deadlock (see Bugzilla 3462), + * one close request is allowed above the maximum. + */ +static inline bool obd_mod_rpc_slot_avail_locked(struct client_obd *cli, + bool close_req) +{ + bool avail; + + /* A slot is available if + * - number of modify RPCs in flight is less than the max + * - it's a close RPC and no other close request is in flight + */ + avail = cli->cl_mod_rpcs_in_flight < cli->cl_max_mod_rpcs_in_flight || + (close_req && !cli->cl_close_rpcs_in_flight); + + return avail; +} + +static inline bool obd_mod_rpc_slot_avail(struct client_obd *cli, + bool close_req) +{ + bool avail; + + spin_lock(&cli->cl_mod_rpcs_lock); + avail = obd_mod_rpc_slot_avail_locked(cli, close_req); + spin_unlock(&cli->cl_mod_rpcs_lock); + return avail; +} + +/* Get a modify RPC slot from the obd client @cli according + * to the kind of operation @opc that is going to be sent + * and the intent @it of the operation if it applies. + * If the maximum number of modify RPCs in flight is reached + * the thread is put to sleep. + * Returns the tag to be set in the request message. Tag 0 + * is reserved for non-modifying requests. + */ +u16 obd_get_mod_rpc_slot(struct client_obd *cli, __u32 opc, + struct lookup_intent *it) +{ + struct l_wait_info lwi = LWI_INTR(NULL, NULL); + bool close_req = false; + u16 i, max; + + /* read-only metadata RPCs don't consume a slot on MDT + * for reply reconstruction + */ + if (it && (it->it_op == IT_GETATTR || it->it_op == IT_LOOKUP || + it->it_op == IT_LAYOUT || it->it_op == IT_READDIR)) + return 0; + + if (opc == MDS_CLOSE) + close_req = true; + + do { + spin_lock(&cli->cl_mod_rpcs_lock); + max = cli->cl_max_mod_rpcs_in_flight; + if (obd_mod_rpc_slot_avail_locked(cli, close_req)) { + /* there is a slot available */ + cli->cl_mod_rpcs_in_flight++; + if (close_req) + cli->cl_close_rpcs_in_flight++; + lprocfs_oh_tally(&cli->cl_mod_rpcs_hist, + cli->cl_mod_rpcs_in_flight); + /* find a free tag */ + i = find_first_zero_bit(cli->cl_mod_tag_bitmap, + max + 1); + LASSERT(i < OBD_MAX_RIF_MAX); + LASSERT(!test_and_set_bit(i, cli->cl_mod_tag_bitmap)); + spin_unlock(&cli->cl_mod_rpcs_lock); + /* tag 0 is reserved for non-modify RPCs */ + return i + 1; + } + spin_unlock(&cli->cl_mod_rpcs_lock); + + CDEBUG(D_RPCTRACE, "%s: sleeping for a modify RPC slot opc %u, max %hu\n", + cli->cl_import->imp_obd->obd_name, opc, max); + + l_wait_event(cli->cl_mod_rpcs_waitq, + obd_mod_rpc_slot_avail(cli, close_req), &lwi); + } while (true); +} +EXPORT_SYMBOL(obd_get_mod_rpc_slot); + +/* + * Put a modify RPC slot from the obd client @cli according + * to the kind of operation @opc that has been sent and the + * intent @it of the operation if it applies. + */ +void obd_put_mod_rpc_slot(struct client_obd *cli, u32 opc, + struct lookup_intent *it, u16 tag) +{ + bool close_req = false; + + if (it && (it->it_op == IT_GETATTR || it->it_op == IT_LOOKUP || + it->it_op == IT_LAYOUT || it->it_op == IT_READDIR)) + return; + + if (opc == MDS_CLOSE) + close_req = true; + + spin_lock(&cli->cl_mod_rpcs_lock); + cli->cl_mod_rpcs_in_flight--; + if (close_req) + cli->cl_close_rpcs_in_flight--; + /* release the tag in the bitmap */ + LASSERT(tag - 1 < OBD_MAX_RIF_MAX); + LASSERT(test_and_clear_bit(tag - 1, cli->cl_mod_tag_bitmap) != 0); + spin_unlock(&cli->cl_mod_rpcs_lock); + wake_up(&cli->cl_mod_rpcs_waitq); +} +EXPORT_SYMBOL(obd_put_mod_rpc_slot); diff --git a/drivers/staging/lustre/lustre/obdclass/linux/linux-module.c b/drivers/staging/lustre/lustre/obdclass/linux/linux-module.c index be09e04b042f..9f5e8299d7e4 100644 --- a/drivers/staging/lustre/lustre/obdclass/linux/linux-module.c +++ b/drivers/staging/lustre/lustre/obdclass/linux/linux-module.c @@ -217,8 +217,8 @@ static ssize_t pinger_show(struct kobject *kobj, struct attribute *attr, return sprintf(buf, "%s\n", "on"); } -static ssize_t health_show(struct kobject *kobj, struct attribute *attr, - char *buf) +static ssize_t +health_check_show(struct kobject *kobj, struct attribute *attr, char *buf) { bool healthy = true; int i; @@ -311,14 +311,14 @@ EXPORT_SYMBOL_GPL(debugfs_lustre_root); LUSTRE_RO_ATTR(version); LUSTRE_RO_ATTR(pinger); -LUSTRE_RO_ATTR(health); +LUSTRE_RO_ATTR(health_check); LUSTRE_RW_ATTR(jobid_var); LUSTRE_RW_ATTR(jobid_name); static struct attribute *lustre_attrs[] = { &lustre_attr_version.attr, &lustre_attr_pinger.attr, - &lustre_attr_health.attr, + &lustre_attr_health_check.attr, &lustre_attr_jobid_name.attr, &lustre_attr_jobid_var.attr, NULL, diff --git a/drivers/staging/lustre/lustre/obdclass/linux/linux-obdo.c b/drivers/staging/lustre/lustre/obdclass/linux/linux-obdo.c deleted file mode 100644 index 41b77a30feb3..000000000000 --- a/drivers/staging/lustre/lustre/obdclass/linux/linux-obdo.c +++ /dev/null @@ -1,80 +0,0 @@ -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2011, 2012, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * lustre/obdclass/linux/linux-obdo.c - * - * Object Devices Class Driver - * These are the only exported functions, they provide some generic - * infrastructure for managing object devices - */ - -#define DEBUG_SUBSYSTEM S_CLASS - -#include <linux/module.h> -#include "../../include/obd_class.h" -#include "../../include/lustre/lustre_idl.h" - -#include <linux/fs.h> - -void obdo_refresh_inode(struct inode *dst, const struct obdo *src, u32 valid) -{ - valid &= src->o_valid; - - if (valid & (OBD_MD_FLCTIME | OBD_MD_FLMTIME)) - CDEBUG(D_INODE, - "valid %#llx, cur time %lu/%lu, new %llu/%llu\n", - src->o_valid, LTIME_S(dst->i_mtime), - LTIME_S(dst->i_ctime), src->o_mtime, src->o_ctime); - - if (valid & OBD_MD_FLATIME && src->o_atime > LTIME_S(dst->i_atime)) - LTIME_S(dst->i_atime) = src->o_atime; - if (valid & OBD_MD_FLMTIME && src->o_mtime > LTIME_S(dst->i_mtime)) - LTIME_S(dst->i_mtime) = src->o_mtime; - if (valid & OBD_MD_FLCTIME && src->o_ctime > LTIME_S(dst->i_ctime)) - LTIME_S(dst->i_ctime) = src->o_ctime; - if (valid & OBD_MD_FLSIZE) - i_size_write(dst, src->o_size); - /* optimum IO size */ - if (valid & OBD_MD_FLBLKSZ && src->o_blksize > (1 << dst->i_blkbits)) - dst->i_blkbits = ffs(src->o_blksize) - 1; - - if (dst->i_blkbits < PAGE_SHIFT) - dst->i_blkbits = PAGE_SHIFT; - - /* allocation of space */ - if (valid & OBD_MD_FLBLOCKS && src->o_blocks > dst->i_blocks) - /* - * XXX shouldn't overflow be checked here like in - * obdo_to_inode(). - */ - dst->i_blocks = src->o_blocks; -} -EXPORT_SYMBOL(obdo_refresh_inode); diff --git a/drivers/staging/lustre/lustre/obdclass/llog.c b/drivers/staging/lustre/lustre/obdclass/llog.c index 43797f106745..736ea1067c93 100644 --- a/drivers/staging/lustre/lustre/obdclass/llog.c +++ b/drivers/staging/lustre/lustre/obdclass/llog.c @@ -43,8 +43,9 @@ #define DEBUG_SUBSYSTEM S_LOG -#include "../include/obd_class.h" +#include "../include/llog_swab.h" #include "../include/lustre_log.h" +#include "../include/obd_class.h" #include "llog_internal.h" /* @@ -80,8 +81,7 @@ static void llog_free_handle(struct llog_handle *loghandle) LASSERT(list_empty(&loghandle->u.phd.phd_entry)); else if (loghandle->lgh_hdr->llh_flags & LLOG_F_IS_CAT) LASSERT(list_empty(&loghandle->u.chd.chd_head)); - LASSERT(sizeof(*loghandle->lgh_hdr) == LLOG_CHUNK_SIZE); - kfree(loghandle->lgh_hdr); + kvfree(loghandle->lgh_hdr); out: kfree(loghandle); } @@ -115,20 +115,29 @@ static int llog_read_header(const struct lu_env *env, rc = lop->lop_read_header(env, handle); if (rc == LLOG_EEMPTY) { struct llog_log_hdr *llh = handle->lgh_hdr; + size_t len; + /* lrh_len should be initialized in llog_init_handle */ handle->lgh_last_idx = 0; /* header is record with index 0 */ llh->llh_count = 1; /* for the header record */ llh->llh_hdr.lrh_type = LLOG_HDR_MAGIC; - llh->llh_hdr.lrh_len = LLOG_CHUNK_SIZE; - llh->llh_tail.lrt_len = LLOG_CHUNK_SIZE; + LASSERT(handle->lgh_ctxt->loc_chunk_size >= LLOG_MIN_CHUNK_SIZE); + llh->llh_hdr.lrh_len = handle->lgh_ctxt->loc_chunk_size; llh->llh_hdr.lrh_index = 0; - llh->llh_tail.lrt_index = 0; llh->llh_timestamp = ktime_get_real_seconds(); if (uuid) memcpy(&llh->llh_tgtuuid, uuid, sizeof(llh->llh_tgtuuid)); llh->llh_bitmap_offset = offsetof(typeof(*llh), llh_bitmap); - ext2_set_bit(0, llh->llh_bitmap); + /* + * Since update llog header might also call this function, + * let's reset the bitmap to 0 here + */ + len = llh->llh_hdr.lrh_len - llh->llh_bitmap_offset; + memset(LLOG_HDR_BITMAP(llh), 0, len - sizeof(llh->llh_tail)); + ext2_set_bit(0, LLOG_HDR_BITMAP(llh)); + LLOG_HDR_TAIL(llh)->lrt_len = llh->llh_hdr.lrh_len; + LLOG_HDR_TAIL(llh)->lrt_index = llh->llh_hdr.lrh_index; rc = 0; } return rc; @@ -137,16 +146,19 @@ static int llog_read_header(const struct lu_env *env, int llog_init_handle(const struct lu_env *env, struct llog_handle *handle, int flags, struct obd_uuid *uuid) { + int chunk_size = handle->lgh_ctxt->loc_chunk_size; enum llog_flag fmt = flags & LLOG_F_EXT_MASK; struct llog_log_hdr *llh; int rc; LASSERT(!handle->lgh_hdr); - llh = kzalloc(sizeof(*llh), GFP_NOFS); + LASSERT(chunk_size >= LLOG_MIN_CHUNK_SIZE); + llh = libcfs_kvzalloc(sizeof(*llh), GFP_NOFS); if (!llh) return -ENOMEM; handle->lgh_hdr = llh; + handle->lgh_hdr_size = chunk_size; /* first assign flags to use llog_client_ops */ llh->llh_flags = flags; rc = llog_read_header(env, handle, uuid); @@ -189,6 +201,7 @@ int llog_init_handle(const struct lu_env *env, struct llog_handle *handle, LASSERT(list_empty(&handle->u.chd.chd_head)); INIT_LIST_HEAD(&handle->u.chd.chd_head); llh->llh_size = sizeof(struct llog_logid_rec); + llh->llh_flags |= LLOG_F_IS_FIXSIZE; } else if (!(flags & LLOG_F_IS_PLAIN)) { CERROR("%s: unknown flags: %#x (expected %#x or %#x)\n", handle->lgh_ctxt->loc_obd->obd_name, @@ -198,7 +211,7 @@ int llog_init_handle(const struct lu_env *env, struct llog_handle *handle, llh->llh_flags |= fmt; out: if (rc) { - kfree(llh); + kvfree(llh); handle->lgh_hdr = NULL; } return rc; @@ -212,15 +225,21 @@ static int llog_process_thread(void *arg) struct llog_log_hdr *llh = loghandle->lgh_hdr; struct llog_process_cat_data *cd = lpi->lpi_catdata; char *buf; - __u64 cur_offset = LLOG_CHUNK_SIZE; - __u64 last_offset; + u64 cur_offset, tmp_offset; + int chunk_size; int rc = 0, index = 1, last_index; int saved_index = 0; int last_called_index = 0; - LASSERT(llh); + if (!llh) + return -EINVAL; + + cur_offset = llh->llh_hdr.lrh_len; + chunk_size = llh->llh_hdr.lrh_len; + /* expect chunk_size to be power of two */ + LASSERT(is_power_of_2(chunk_size)); - buf = kzalloc(LLOG_CHUNK_SIZE, GFP_NOFS); + buf = libcfs_kvzalloc(chunk_size, GFP_NOFS); if (!buf) { lpi->lpi_rc = -ENOMEM; return 0; @@ -233,41 +252,53 @@ static int llog_process_thread(void *arg) if (cd && cd->lpcd_last_idx) last_index = cd->lpcd_last_idx; else - last_index = LLOG_BITMAP_BYTES * 8 - 1; - - /* Record is not in this buffer. */ - if (index > last_index) - goto out; + last_index = LLOG_HDR_BITMAP_SIZE(llh) - 1; while (rc == 0) { + unsigned int buf_offset = 0; struct llog_rec_hdr *rec; + bool partial_chunk; + off_t chunk_offset; /* skip records not set in bitmap */ while (index <= last_index && - !ext2_test_bit(index, llh->llh_bitmap)) + !ext2_test_bit(index, LLOG_HDR_BITMAP(llh))) ++index; - LASSERT(index <= last_index + 1); - if (index == last_index + 1) + if (index > last_index) break; -repeat: + CDEBUG(D_OTHER, "index: %d last_index %d\n", index, last_index); - +repeat: /* get the buf with our target record; avoid old garbage */ - memset(buf, 0, LLOG_CHUNK_SIZE); - last_offset = cur_offset; + memset(buf, 0, chunk_size); rc = llog_next_block(lpi->lpi_env, loghandle, &saved_index, - index, &cur_offset, buf, LLOG_CHUNK_SIZE); + index, &cur_offset, buf, chunk_size); if (rc) goto out; + /* + * NB: after llog_next_block() call the cur_offset is the + * offset of the next block after read one. + * The absolute offset of the current chunk is calculated + * from cur_offset value and stored in chunk_offset variable. + */ + tmp_offset = cur_offset; + if (do_div(tmp_offset, chunk_size)) { + partial_chunk = true; + chunk_offset = cur_offset & ~(chunk_size - 1); + } else { + partial_chunk = false; + chunk_offset = cur_offset - chunk_size; + } + /* NB: when rec->lrh_len is accessed it is already swabbed * since it is used at the "end" of the loop and the rec * swabbing is done at the beginning of the loop. */ - for (rec = (struct llog_rec_hdr *)buf; - (char *)rec < buf + LLOG_CHUNK_SIZE; + for (rec = (struct llog_rec_hdr *)(buf + buf_offset); + (char *)rec < buf + chunk_size; rec = llog_rec_hdr_next(rec)) { CDEBUG(D_OTHER, "processing rec 0x%p type %#x\n", rec, rec->lrh_type); @@ -278,15 +309,29 @@ repeat: CDEBUG(D_OTHER, "after swabbing, type=%#x idx=%d\n", rec->lrh_type, rec->lrh_index); - if (rec->lrh_index == 0) { - /* probably another rec just got added? */ - rc = 0; - if (index <= loghandle->lgh_last_idx) - goto repeat; - goto out; /* no more records */ + /* + * for partial chunk the end of it is zeroed, check + * for index 0 to distinguish it. + */ + if (partial_chunk && !rec->lrh_index) { + /* concurrent llog_add() might add new records + * while llog_processing, check this is not + * the case and re-read the current chunk + * otherwise. + */ + if (index > loghandle->lgh_last_idx) { + rc = 0; + goto out; + } + CDEBUG(D_OTHER, "Re-read last llog buffer for new records, index %u, last %u\n", + index, loghandle->lgh_last_idx); + /* save offset inside buffer for the re-read */ + buf_offset = (char *)rec - (char *)buf; + cur_offset = chunk_offset; + goto repeat; } - if (rec->lrh_len == 0 || - rec->lrh_len > LLOG_CHUNK_SIZE) { + + if (!rec->lrh_len || rec->lrh_len > chunk_size) { CWARN("invalid length %d in llog record for index %d/%d\n", rec->lrh_len, rec->lrh_index, index); @@ -300,32 +345,38 @@ repeat: continue; } + if (rec->lrh_index != index) { + CERROR("%s: Invalid record: index %u but expected %u\n", + loghandle->lgh_ctxt->loc_obd->obd_name, + rec->lrh_index, index); + rc = -ERANGE; + goto out; + } + CDEBUG(D_OTHER, "lrh_index: %d lrh_len: %d (%d remains)\n", rec->lrh_index, rec->lrh_len, - (int)(buf + LLOG_CHUNK_SIZE - (char *)rec)); + (int)(buf + chunk_size - (char *)rec)); loghandle->lgh_cur_idx = rec->lrh_index; loghandle->lgh_cur_offset = (char *)rec - (char *)buf + - last_offset; + chunk_offset; /* if set, process the callback on this record */ - if (ext2_test_bit(index, llh->llh_bitmap)) { + if (ext2_test_bit(index, LLOG_HDR_BITMAP(llh))) { rc = lpi->lpi_cb(lpi->lpi_env, loghandle, rec, lpi->lpi_cbdata); last_called_index = index; if (rc) goto out; - } else { - CDEBUG(D_OTHER, "Skipped index %d\n", index); } - /* next record, still in buffer? */ - ++index; - if (index > last_index) { + /* exit if the last index is reached */ + if (index >= last_index) { rc = 0; goto out; } + index++; } } diff --git a/drivers/staging/lustre/lustre/obdclass/llog_obd.c b/drivers/staging/lustre/lustre/obdclass/llog_obd.c index a4277d684614..8574ad401f66 100644 --- a/drivers/staging/lustre/lustre/obdclass/llog_obd.c +++ b/drivers/staging/lustre/lustre/obdclass/llog_obd.c @@ -158,6 +158,7 @@ int llog_setup(const struct lu_env *env, struct obd_device *obd, mutex_init(&ctxt->loc_mutex); ctxt->loc_exp = class_export_get(disk_obd->obd_self_export); ctxt->loc_flags = LLOG_CTXT_FLAG_UNINITIALIZED; + ctxt->loc_chunk_size = LLOG_MIN_CHUNK_SIZE; rc = llog_group_set_ctxt(olg, ctxt, index); if (rc) { diff --git a/drivers/staging/lustre/lustre/obdclass/llog_swab.c b/drivers/staging/lustre/lustre/obdclass/llog_swab.c index 8c4c1b3f1b45..723c212c6747 100644 --- a/drivers/staging/lustre/lustre/obdclass/llog_swab.c +++ b/drivers/staging/lustre/lustre/obdclass/llog_swab.c @@ -38,6 +38,7 @@ #define DEBUG_SUBSYSTEM S_LOG +#include "../include/llog_swab.h" #include "../include/lustre_log.h" static void print_llogd_body(struct llogd_body *d) @@ -244,7 +245,7 @@ void lustre_swab_llog_rec(struct llog_rec_hdr *rec) __swab32s(&llh->llh_flags); __swab32s(&llh->llh_size); __swab32s(&llh->llh_cat_idx); - tail = &llh->llh_tail; + tail = LLOG_HDR_TAIL(llh); break; } case LLOG_LOGID_MAGIC: @@ -290,8 +291,10 @@ static void print_llog_hdr(struct llog_log_hdr *h) CDEBUG(D_OTHER, "\tllh_flags: %#x\n", h->llh_flags); CDEBUG(D_OTHER, "\tllh_size: %#x\n", h->llh_size); CDEBUG(D_OTHER, "\tllh_cat_idx: %#x\n", h->llh_cat_idx); - CDEBUG(D_OTHER, "\tllh_tail.lrt_index: %#x\n", h->llh_tail.lrt_index); - CDEBUG(D_OTHER, "\tllh_tail.lrt_len: %#x\n", h->llh_tail.lrt_len); + CDEBUG(D_OTHER, "\tllh_tail.lrt_index: %#x\n", + LLOG_HDR_TAIL(h)->lrt_index); + CDEBUG(D_OTHER, "\tllh_tail.lrt_len: %#x\n", + LLOG_HDR_TAIL(h)->lrt_len); } void lustre_swab_llog_hdr(struct llog_log_hdr *h) diff --git a/drivers/staging/lustre/lustre/obdclass/lprocfs_status.c b/drivers/staging/lustre/lustre/obdclass/lprocfs_status.c index 852a5acfefab..2c99717b0aba 100644 --- a/drivers/staging/lustre/lustre/obdclass/lprocfs_status.c +++ b/drivers/staging/lustre/lustre/obdclass/lprocfs_status.c @@ -100,9 +100,13 @@ static const char * const obd_connect_names[] = { "lfsck", "unknown", "unlink_close", - "unknown", + "multi_mod_rpcs", "dir_stripe", - "unknown", + "subtree", + "lock_ahead", + "bulk_mbits", + "compact_obdo", + "second_flags", NULL }; @@ -127,7 +131,7 @@ EXPORT_SYMBOL(obd_connect_flags2str); static void obd_connect_data_seqprint(struct seq_file *m, struct obd_connect_data *ocd) { - int flags; + u64 flags; LASSERT(ocd); flags = ocd->ocd_connect_flags; @@ -172,6 +176,9 @@ static void obd_connect_data_seqprint(struct seq_file *m, if (flags & OBD_CONNECT_MAXBYTES) seq_printf(m, " max_object_bytes: %llx\n", ocd->ocd_maxbytes); + if (flags & OBD_CONNECT_MULTIMODRPCS) + seq_printf(m, " max_mod_rpcs: %hu\n", + ocd->ocd_maxmodrpcs); } int lprocfs_read_frac_helper(char *buffer, unsigned long count, long val, @@ -396,10 +403,17 @@ int lprocfs_wr_uint(struct file *file, const char __user *buffer, char dummy[MAX_STRING_SIZE + 1], *end; unsigned long tmp; - dummy[MAX_STRING_SIZE] = '\0'; - if (copy_from_user(dummy, buffer, MAX_STRING_SIZE)) + if (count >= sizeof(dummy)) + return -EINVAL; + + if (count == 0) + return 0; + + if (copy_from_user(dummy, buffer, count)) return -EFAULT; + dummy[count] = '\0'; + tmp = simple_strtoul(dummy, &end, 0); if (dummy == end) return -EINVAL; @@ -1275,7 +1289,8 @@ int ldebugfs_register_stats(struct dentry *parent, const char *name, EXPORT_SYMBOL_GPL(ldebugfs_register_stats); void lprocfs_counter_init(struct lprocfs_stats *stats, int index, - unsigned conf, const char *name, const char *units) + unsigned int conf, const char *name, + const char *units) { struct lprocfs_counter_header *header; struct lprocfs_counter *percpu_cntr; diff --git a/drivers/staging/lustre/lustre/obdclass/lu_object.c b/drivers/staging/lustre/lustre/obdclass/lu_object.c index 054e567e6c8d..7971562a3efd 100644 --- a/drivers/staging/lustre/lustre/obdclass/lu_object.c +++ b/drivers/staging/lustre/lustre/obdclass/lu_object.c @@ -68,6 +68,7 @@ enum { #define LU_SITE_BITS_MIN 12 #define LU_SITE_BITS_MAX 24 +#define LU_SITE_BITS_MAX_CL 19 /** * total 256 buckets, we don't want too many buckets because: * - consume too much memory @@ -338,7 +339,7 @@ int lu_site_purge(const struct lu_env *env, struct lu_site *s, int nr) struct cfs_hash_bd bd2; struct list_head dispose; int did_sth; - unsigned int start; + unsigned int start = 0; int count; int bnr; unsigned int i; @@ -351,7 +352,8 @@ int lu_site_purge(const struct lu_env *env, struct lu_site *s, int nr) * Under LRU list lock, scan LRU list and move unreferenced objects to * the dispose list, removing them from LRU and hash table. */ - start = s->ls_purge_start; + if (nr != ~0) + start = s->ls_purge_start; bnr = (nr == ~0) ? -1 : nr / (int)CFS_HASH_NBKT(s->ls_obj_hash) + 1; again: /* @@ -877,6 +879,9 @@ static unsigned long lu_htable_order(struct lu_device *top) unsigned long cache_size; unsigned long bits; + if (!strcmp(top->ld_type->ldt_name, LUSTRE_VVP_NAME)) + bits_max = LU_SITE_BITS_MAX_CL; + /* * Calculate hash table size, assuming that we want reasonable * performance when 20% of total memory is occupied by cache of @@ -909,8 +914,8 @@ static unsigned long lu_htable_order(struct lu_device *top) return clamp_t(typeof(bits), bits, LU_SITE_BITS_MIN, bits_max); } -static unsigned lu_obj_hop_hash(struct cfs_hash *hs, - const void *key, unsigned mask) +static unsigned int lu_obj_hop_hash(struct cfs_hash *hs, + const void *key, unsigned int mask) { struct lu_fid *fid = (struct lu_fid *)key; __u32 hash; @@ -1311,6 +1316,7 @@ enum { static struct lu_context_key *lu_keys[LU_CONTEXT_KEY_NR] = { NULL, }; static DEFINE_SPINLOCK(lu_keys_guard); +static atomic_t lu_key_initing_cnt = ATOMIC_INIT(0); /** * Global counter incremented whenever key is registered, unregistered, @@ -1318,7 +1324,7 @@ static DEFINE_SPINLOCK(lu_keys_guard); * lu_context_refill(). No locking is provided, as initialization and shutdown * are supposed to be externally serialized. */ -static unsigned key_set_version; +static unsigned int key_set_version; /** * Register new key. @@ -1385,6 +1391,19 @@ void lu_context_key_degister(struct lu_context_key *key) ++key_set_version; spin_lock(&lu_keys_guard); key_fini(&lu_shrink_env.le_ctx, key->lct_index); + + /** + * Wait until all transient contexts referencing this key have + * run lu_context_key::lct_fini() method. + */ + while (atomic_read(&key->lct_used) > 1) { + spin_unlock(&lu_keys_guard); + CDEBUG(D_INFO, "lu_context_key_degister: \"%s\" %p, %d\n", + key->lct_owner ? key->lct_owner->name : "", key, + atomic_read(&key->lct_used)); + schedule(); + spin_lock(&lu_keys_guard); + } if (lu_keys[key->lct_index]) { lu_keys[key->lct_index] = NULL; lu_ref_fini(&key->lct_reference); @@ -1507,14 +1526,25 @@ void lu_context_key_quiesce(struct lu_context_key *key) if (!(key->lct_tags & LCT_QUIESCENT)) { /* - * XXX layering violation. - */ - cl_env_cache_purge(~0); - key->lct_tags |= LCT_QUIESCENT; - /* * XXX memory barrier has to go here. */ spin_lock(&lu_keys_guard); + key->lct_tags |= LCT_QUIESCENT; + + /** + * Wait until all lu_context_key::lct_init() methods + * have completed. + */ + while (atomic_read(&lu_key_initing_cnt) > 0) { + spin_unlock(&lu_keys_guard); + CDEBUG(D_INFO, "lu_context_key_quiesce: \"%s\" %p, %d (%d)\n", + key->lct_owner ? key->lct_owner->name : "", + key, atomic_read(&key->lct_used), + atomic_read(&lu_key_initing_cnt)); + schedule(); + spin_lock(&lu_keys_guard); + } + list_for_each_entry(ctx, &lu_context_remembered, lc_remember) key_fini(ctx, key->lct_index); spin_unlock(&lu_keys_guard); @@ -1546,6 +1576,19 @@ static int keys_fill(struct lu_context *ctx) { unsigned int i; + /* + * A serialisation with lu_context_key_quiesce() is needed, but some + * "key->lct_init()" are calling kernel memory allocation routine and + * can't be called while holding a spin_lock. + * "lu_keys_guard" is held while incrementing "lu_key_initing_cnt" + * to ensure the start of the serialisation. + * An atomic_t variable is still used, in order not to reacquire the + * lock when decrementing the counter. + */ + spin_lock(&lu_keys_guard); + atomic_inc(&lu_key_initing_cnt); + spin_unlock(&lu_keys_guard); + LINVRNT(ctx->lc_value); for (i = 0; i < ARRAY_SIZE(lu_keys); ++i) { struct lu_context_key *key; @@ -1563,12 +1606,19 @@ static int keys_fill(struct lu_context *ctx) LINVRNT(key->lct_init); LINVRNT(key->lct_index == i); + LASSERT(key->lct_owner); + if (!(ctx->lc_tags & LCT_NOREF) && + !try_module_get(key->lct_owner)) { + /* module is unloading, skip this key */ + continue; + } + value = key->lct_init(ctx, key); - if (IS_ERR(value)) + if (unlikely(IS_ERR(value))) { + atomic_dec(&lu_key_initing_cnt); return PTR_ERR(value); + } - if (!(ctx->lc_tags & LCT_NOREF)) - try_module_get(key->lct_owner); lu_ref_add_atomic(&key->lct_reference, "ctx", ctx); atomic_inc(&key->lct_used); /* @@ -1582,6 +1632,7 @@ static int keys_fill(struct lu_context *ctx) } ctx->lc_version = key_set_version; } + atomic_dec(&lu_key_initing_cnt); return 0; } @@ -1663,6 +1714,9 @@ void lu_context_exit(struct lu_context *ctx) ctx->lc_state = LCS_LEFT; if (ctx->lc_tags & LCT_HAS_EXIT && ctx->lc_value) { for (i = 0; i < ARRAY_SIZE(lu_keys); ++i) { + /* could race with key quiescency */ + if (ctx->lc_tags & LCT_REMEMBER) + spin_lock(&lu_keys_guard); if (ctx->lc_value[i]) { struct lu_context_key *key; @@ -1671,6 +1725,8 @@ void lu_context_exit(struct lu_context *ctx) key->lct_exit(ctx, key, ctx->lc_value[i]); } + if (ctx->lc_tags & LCT_REMEMBER) + spin_unlock(&lu_keys_guard); } } } @@ -1930,7 +1986,7 @@ int lu_site_stats_print(const struct lu_site *s, struct seq_file *m) memset(&stats, 0, sizeof(stats)); lu_site_stats_get(s->ls_obj_hash, &stats, 1); - seq_printf(m, "%d/%d %d/%d %d %d %d %d %d %d %d %d\n", + seq_printf(m, "%d/%d %d/%ld %d %d %d %d %d %d %d %d\n", stats.lss_busy, stats.lss_total, stats.lss_populated, diff --git a/drivers/staging/lustre/lustre/obdclass/obd_config.c b/drivers/staging/lustre/lustre/obdclass/obd_config.c index bbed1b72d52e..9ca84c7d49de 100644 --- a/drivers/staging/lustre/lustre/obdclass/obd_config.c +++ b/drivers/staging/lustre/lustre/obdclass/obd_config.c @@ -35,12 +35,15 @@ */ #define DEBUG_SUBSYSTEM S_CLASS -#include "../include/obd_class.h" + #include <linux/string.h> + #include "../include/lustre/lustre_ioctl.h" -#include "../include/lustre_log.h" +#include "../include/llog_swab.h" #include "../include/lprocfs_status.h" +#include "../include/lustre_log.h" #include "../include/lustre_param.h" +#include "../include/obd_class.h" #include "llog_internal.h" @@ -446,7 +449,7 @@ static int class_cleanup(struct obd_device *obd, struct lustre_cfg *lcfg) LASSERT(obd->obd_self_export); /* Precleanup, we must make sure all exports get destroyed. */ - err = obd_precleanup(obd, OBD_CLEANUP_EXPORTS); + err = obd_precleanup(obd); if (err) CERROR("Precleanup %s returned %d\n", obd->obd_name, err); @@ -585,16 +588,21 @@ static int class_del_conn(struct obd_device *obd, struct lustre_cfg *lcfg) } static LIST_HEAD(lustre_profile_list); +static DEFINE_SPINLOCK(lustre_profile_list_lock); struct lustre_profile *class_get_profile(const char *prof) { struct lustre_profile *lprof; + spin_lock(&lustre_profile_list_lock); list_for_each_entry(lprof, &lustre_profile_list, lp_list) { if (!strcmp(lprof->lp_profile, prof)) { + lprof->lp_refs++; + spin_unlock(&lustre_profile_list_lock); return lprof; } } + spin_unlock(&lustre_profile_list_lock); return NULL; } EXPORT_SYMBOL(class_get_profile); @@ -639,7 +647,11 @@ static int class_add_profile(int proflen, char *prof, int osclen, char *osc, } } + spin_lock(&lustre_profile_list_lock); + lprof->lp_refs = 1; + lprof->lp_list_deleted = false; list_add(&lprof->lp_list, &lustre_profile_list); + spin_unlock(&lustre_profile_list_lock); return err; free_lp_dt: @@ -659,27 +671,59 @@ void class_del_profile(const char *prof) lprof = class_get_profile(prof); if (lprof) { + spin_lock(&lustre_profile_list_lock); + /* because get profile increments the ref counter */ + lprof->lp_refs--; list_del(&lprof->lp_list); - kfree(lprof->lp_profile); - kfree(lprof->lp_dt); - kfree(lprof->lp_md); - kfree(lprof); + lprof->lp_list_deleted = true; + spin_unlock(&lustre_profile_list_lock); + + class_put_profile(lprof); } } EXPORT_SYMBOL(class_del_profile); +void class_put_profile(struct lustre_profile *lprof) +{ + spin_lock(&lustre_profile_list_lock); + if (--lprof->lp_refs > 0) { + LASSERT(lprof->lp_refs > 0); + spin_unlock(&lustre_profile_list_lock); + return; + } + spin_unlock(&lustre_profile_list_lock); + + /* confirm not a negative number */ + LASSERT(!lprof->lp_refs); + + /* + * At least one class_del_profile/profiles must be called + * on the target profile or lustre_profile_list will corrupt + */ + LASSERT(lprof->lp_list_deleted); + kfree(lprof->lp_profile); + kfree(lprof->lp_dt); + kfree(lprof->lp_md); + kfree(lprof); +} +EXPORT_SYMBOL(class_put_profile); + /* COMPAT_146 */ void class_del_profiles(void) { struct lustre_profile *lprof, *n; + spin_lock(&lustre_profile_list_lock); list_for_each_entry_safe(lprof, n, &lustre_profile_list, lp_list) { list_del(&lprof->lp_list); - kfree(lprof->lp_profile); - kfree(lprof->lp_dt); - kfree(lprof->lp_md); - kfree(lprof); + lprof->lp_list_deleted = true; + spin_unlock(&lustre_profile_list_lock); + + class_put_profile(lprof); + + spin_lock(&lustre_profile_list_lock); } + spin_unlock(&lustre_profile_list_lock); } EXPORT_SYMBOL(class_del_profiles); @@ -1406,8 +1450,8 @@ EXPORT_SYMBOL(class_manual_cleanup); * uuid<->export lustre hash operations */ -static unsigned -uuid_hash(struct cfs_hash *hs, const void *key, unsigned mask) +static unsigned int +uuid_hash(struct cfs_hash *hs, const void *key, unsigned int mask) { return cfs_hash_djb2_hash(((struct obd_uuid *)key)->uuid, sizeof(((struct obd_uuid *)key)->uuid), mask); diff --git a/drivers/staging/lustre/lustre/obdclass/obd_mount.c b/drivers/staging/lustre/lustre/obdclass/obd_mount.c index 0d3a3b05a637..2283e920d839 100644 --- a/drivers/staging/lustre/lustre/obdclass/obd_mount.c +++ b/drivers/staging/lustre/lustre/obdclass/obd_mount.c @@ -261,7 +261,7 @@ int lustre_start_mgc(struct super_block *sb) rc = obd_get_info(NULL, obd->obd_self_export, strlen(KEY_CONN_DATA), KEY_CONN_DATA, - &vallen, data, NULL); + &vallen, data); LASSERT(rc == 0); has_ir = OCD_HAS_FLAG(data, IMP_RECOV); if (has_ir ^ !(*flags & LMD_FLG_NOIR)) { @@ -382,7 +382,7 @@ int lustre_start_mgc(struct super_block *sb) /* We connect to the MGS at setup, and don't disconnect until cleanup */ data->ocd_connect_flags = OBD_CONNECT_VERSION | OBD_CONNECT_AT | OBD_CONNECT_FULL20 | OBD_CONNECT_IMP_RECOV | - OBD_CONNECT_LVB_TYPE; + OBD_CONNECT_LVB_TYPE | OBD_CONNECT_BULK_MBITS; #if OBD_OCD_VERSION(3, 0, 53, 0) > LUSTRE_VERSION_CODE data->ocd_connect_flags |= OBD_CONNECT_MNE_SWAB; @@ -1216,8 +1216,7 @@ static struct file_system_type lustre_fs_type = { .name = "lustre", .mount = lustre_mount, .kill_sb = lustre_kill_super, - .fs_flags = FS_BINARY_MOUNTDATA | FS_REQUIRES_DEV | - FS_RENAME_DOES_D_MOVE, + .fs_flags = FS_REQUIRES_DEV | FS_RENAME_DOES_D_MOVE, }; MODULE_ALIAS_FS("lustre"); diff --git a/drivers/staging/lustre/lustre/obdclass/obdo.c b/drivers/staging/lustre/lustre/obdclass/obdo.c index 79104a66da96..c52b9e07d7dd 100644 --- a/drivers/staging/lustre/lustre/obdclass/obdo.c +++ b/drivers/staging/lustre/lustre/obdclass/obdo.c @@ -124,68 +124,3 @@ void obdo_to_ioobj(const struct obdo *oa, struct obd_ioobj *ioobj) ioobj->ioo_max_brw = 0; } EXPORT_SYMBOL(obdo_to_ioobj); - -static void iattr_from_obdo(struct iattr *attr, const struct obdo *oa, - u32 valid) -{ - valid &= oa->o_valid; - - if (valid & (OBD_MD_FLCTIME | OBD_MD_FLMTIME)) - CDEBUG(D_INODE, "valid %#llx, new time %llu/%llu\n", - oa->o_valid, oa->o_mtime, oa->o_ctime); - - attr->ia_valid = 0; - if (valid & OBD_MD_FLATIME) { - LTIME_S(attr->ia_atime) = oa->o_atime; - attr->ia_valid |= ATTR_ATIME; - } - if (valid & OBD_MD_FLMTIME) { - LTIME_S(attr->ia_mtime) = oa->o_mtime; - attr->ia_valid |= ATTR_MTIME; - } - if (valid & OBD_MD_FLCTIME) { - LTIME_S(attr->ia_ctime) = oa->o_ctime; - attr->ia_valid |= ATTR_CTIME; - } - if (valid & OBD_MD_FLSIZE) { - attr->ia_size = oa->o_size; - attr->ia_valid |= ATTR_SIZE; - } -#if 0 /* you shouldn't be able to change a file's type with setattr */ - if (valid & OBD_MD_FLTYPE) { - attr->ia_mode = (attr->ia_mode & ~S_IFMT) | - (oa->o_mode & S_IFMT); - attr->ia_valid |= ATTR_MODE; - } -#endif - if (valid & OBD_MD_FLMODE) { - attr->ia_mode = (attr->ia_mode & S_IFMT) | - (oa->o_mode & ~S_IFMT); - attr->ia_valid |= ATTR_MODE; - if (!in_group_p(make_kgid(&init_user_ns, oa->o_gid)) && - !capable(CFS_CAP_FSETID)) - attr->ia_mode &= ~S_ISGID; - } - if (valid & OBD_MD_FLUID) { - attr->ia_uid = make_kuid(&init_user_ns, oa->o_uid); - attr->ia_valid |= ATTR_UID; - } - if (valid & OBD_MD_FLGID) { - attr->ia_gid = make_kgid(&init_user_ns, oa->o_gid); - attr->ia_valid |= ATTR_GID; - } -} - -void md_from_obdo(struct md_op_data *op_data, const struct obdo *oa, u32 valid) -{ - iattr_from_obdo(&op_data->op_attr, oa, valid); - if (valid & OBD_MD_FLBLOCKS) { - op_data->op_attr_blocks = oa->o_blocks; - op_data->op_attr.ia_valid |= ATTR_BLOCKS; - } - if (valid & OBD_MD_FLFLAGS) { - op_data->op_attr_flags = oa->o_flags; - op_data->op_attr.ia_valid |= ATTR_ATTR_FLAG; - } -} -EXPORT_SYMBOL(md_from_obdo); diff --git a/drivers/staging/lustre/lustre/obdecho/echo_client.c b/drivers/staging/lustre/lustre/obdecho/echo_client.c index 505582ff4d1e..549076193bde 100644 --- a/drivers/staging/lustre/lustre/obdecho/echo_client.c +++ b/drivers/staging/lustre/lustre/obdecho/echo_client.c @@ -55,7 +55,7 @@ struct echo_device { struct echo_client_obd *ed_ec; struct cl_site ed_site_myself; - struct cl_site *ed_site; + struct lu_site *ed_site; struct lu_device *ed_next; }; @@ -505,9 +505,6 @@ static const struct lu_device_operations echo_device_lu_ops = { /** @} echo_lu_dev_ops */ -static const struct cl_device_operations echo_device_cl_ops = { -}; - /** \defgroup echo_init Setup and teardown * * Init and fini functions for echo client. @@ -527,17 +524,19 @@ static int echo_site_init(const struct lu_env *env, struct echo_device *ed) } rc = lu_site_init_finish(&site->cs_lu); - if (rc) + if (rc) { + cl_site_fini(site); return rc; + } - ed->ed_site = site; + ed->ed_site = &site->cs_lu; return 0; } static void echo_site_fini(const struct lu_env *env, struct echo_device *ed) { if (ed->ed_site) { - cl_site_fini(ed->ed_site); + lu_site_fini(ed->ed_site); ed->ed_site = NULL; } } @@ -561,16 +560,10 @@ static void echo_thread_key_fini(const struct lu_context *ctx, kmem_cache_free(echo_thread_kmem, info); } -static void echo_thread_key_exit(const struct lu_context *ctx, - struct lu_context_key *key, void *data) -{ -} - static struct lu_context_key echo_thread_key = { .lct_tags = LCT_CL_THREAD, .lct_init = echo_thread_key_init, .lct_fini = echo_thread_key_fini, - .lct_exit = echo_thread_key_exit }; static void *echo_session_key_init(const struct lu_context *ctx, @@ -592,16 +585,10 @@ static void echo_session_key_fini(const struct lu_context *ctx, kmem_cache_free(echo_session_kmem, session); } -static void echo_session_key_exit(const struct lu_context *ctx, - struct lu_context_key *key, void *data) -{ -} - static struct lu_context_key echo_session_key = { .lct_tags = LCT_SESSION, .lct_init = echo_session_key_init, .lct_fini = echo_session_key_fini, - .lct_exit = echo_session_key_exit }; LU_TYPE_INIT_FINI(echo, &echo_thread_key, &echo_session_key); @@ -630,7 +617,6 @@ static struct lu_device *echo_device_alloc(const struct lu_env *env, goto out_free; cd->cd_lu_dev.ld_ops = &echo_device_lu_ops; - cd->cd_ops = &echo_device_cl_ops; obd = class_name2obd(lustre_cfg_string(cfg, 0)); LASSERT(obd); @@ -674,7 +660,7 @@ static struct lu_device *echo_device_alloc(const struct lu_env *env, goto out_cleanup; } - next->ld_site = &ed->ed_site->cs_lu; + next->ld_site = ed->ed_site; rc = next->ld_type->ldt_ops->ldto_device_init(env, next, next->ld_type->ldt_name, NULL); @@ -741,7 +727,7 @@ static struct lu_device *echo_device_free(const struct lu_env *env, CDEBUG(D_INFO, "echo device:%p is going to be freed, next = %p\n", ed, next); - lu_site_purge(env, &ed->ed_site->cs_lu, -1); + lu_site_purge(env, ed->ed_site, -1); /* check if there are objects still alive. * It shouldn't have any object because lu_site_purge would cleanup @@ -754,7 +740,7 @@ static struct lu_device *echo_device_free(const struct lu_env *env, spin_unlock(&ec->ec_lock); /* purge again */ - lu_site_purge(env, &ed->ed_site->cs_lu, -1); + lu_site_purge(env, ed->ed_site, -1); CDEBUG(D_INFO, "Waiting for the reference of echo object to be dropped\n"); @@ -766,7 +752,7 @@ static struct lu_device *echo_device_free(const struct lu_env *env, CERROR("echo_client still has objects at cleanup time, wait for 1 second\n"); set_current_state(TASK_UNINTERRUPTIBLE); schedule_timeout(cfs_time_seconds(1)); - lu_site_purge(env, &ed->ed_site->cs_lu, -1); + lu_site_purge(env, ed->ed_site, -1); spin_lock(&ec->ec_lock); } spin_unlock(&ec->ec_lock); @@ -780,11 +766,13 @@ static struct lu_device *echo_device_free(const struct lu_env *env, while (next) next = next->ld_type->ldt_ops->ldto_device_free(env, next); - LASSERT(ed->ed_site == lu2cl_site(d->ld_site)); + LASSERT(ed->ed_site == d->ld_site); echo_site_fini(env, ed); cl_device_fini(&ed->ed_cl); kfree(ed); + cl_env_cache_purge(~0); + return NULL; } @@ -1100,7 +1088,7 @@ out: static u64 last_object_id; static int echo_create_object(const struct lu_env *env, struct echo_device *ed, - struct obdo *oa, struct obd_trans_info *oti) + struct obdo *oa) { struct echo_object *eco; struct echo_client_obd *ec = ed->ed_ec; @@ -1117,7 +1105,7 @@ static int echo_create_object(const struct lu_env *env, struct echo_device *ed, if (!ostid_id(&oa->o_oi)) ostid_set_id(&oa->o_oi, ++last_object_id); - rc = obd_create(env, ec->ec_exp, oa, oti); + rc = obd_create(env, ec->ec_exp, oa); if (rc != 0) { CERROR("Cannot create objects: rc = %d\n", rc); goto failed; @@ -1137,7 +1125,7 @@ static int echo_create_object(const struct lu_env *env, struct echo_device *ed, failed: if (created && rc) - obd_destroy(env, ec->ec_exp, oa, oti); + obd_destroy(env, ec->ec_exp, oa); if (rc) CERROR("create object failed with: rc = %d\n", rc); return rc; @@ -1237,8 +1225,7 @@ static int echo_client_page_debug_check(struct page *page, u64 id, static int echo_client_kbrw(struct echo_device *ed, int rw, struct obdo *oa, struct echo_object *eco, u64 offset, - u64 count, int async, - struct obd_trans_info *oti) + u64 count, int async) { u32 npages; struct brw_page *pga; @@ -1332,12 +1319,11 @@ static int echo_client_prep_commit(const struct lu_env *env, struct obd_export *exp, int rw, struct obdo *oa, struct echo_object *eco, u64 offset, u64 count, - u64 batch, struct obd_trans_info *oti, - int async) + u64 batch, int async) { struct obd_ioobj ioo; struct niobuf_local *lnb; - struct niobuf_remote *rnb; + struct niobuf_remote rnb; u64 off; u64 npages, tot_pages; int i, ret = 0, brw_flags = 0; @@ -1349,9 +1335,7 @@ static int echo_client_prep_commit(const struct lu_env *env, tot_pages = count >> PAGE_SHIFT; lnb = kcalloc(npages, sizeof(struct niobuf_local), GFP_NOFS); - rnb = kcalloc(npages, sizeof(struct niobuf_remote), GFP_NOFS); - - if (!lnb || !rnb) { + if (!lnb) { ret = -ENOMEM; goto out; } @@ -1363,26 +1347,22 @@ static int echo_client_prep_commit(const struct lu_env *env, off = offset; - for (; tot_pages; tot_pages -= npages) { + for (; tot_pages > 0; tot_pages -= npages) { int lpages; if (tot_pages < npages) npages = tot_pages; - for (i = 0; i < npages; i++, off += PAGE_SIZE) { - rnb[i].rnb_offset = off; - rnb[i].rnb_len = PAGE_SIZE; - rnb[i].rnb_flags = brw_flags; - } - - ioo.ioo_bufcnt = npages; + rnb.rnb_offset = off; + rnb.rnb_len = npages * PAGE_SIZE; + rnb.rnb_flags = brw_flags; + ioo.ioo_bufcnt = 1; + off += npages * PAGE_SIZE; lpages = npages; - ret = obd_preprw(env, rw, exp, oa, 1, &ioo, rnb, &lpages, - lnb, oti); + ret = obd_preprw(env, rw, exp, oa, 1, &ioo, &rnb, &lpages, lnb); if (ret != 0) goto out; - LASSERT(lpages == npages); for (i = 0; i < lpages; i++) { struct page *page = lnb[i].lnb_page; @@ -1401,24 +1381,21 @@ static int echo_client_prep_commit(const struct lu_env *env, if (rw == OBD_BRW_WRITE) echo_client_page_debug_setup(page, rw, - ostid_id(&oa->o_oi), - rnb[i].rnb_offset, - rnb[i].rnb_len); + ostid_id(&oa->o_oi), + lnb[i].lnb_file_offset, + lnb[i].lnb_len); else echo_client_page_debug_check(page, - ostid_id(&oa->o_oi), - rnb[i].rnb_offset, - rnb[i].rnb_len); + ostid_id(&oa->o_oi), + lnb[i].lnb_file_offset, + lnb[i].lnb_len); } - ret = obd_commitrw(env, rw, exp, oa, 1, &ioo, - rnb, npages, lnb, oti, ret); + ret = obd_commitrw(env, rw, exp, oa, 1, &ioo, &rnb, npages, lnb, + ret); if (ret != 0) goto out; - /* Reset oti otherwise it would confuse ldiskfs. */ - memset(oti, 0, sizeof(*oti)); - /* Reuse env context. */ lu_context_exit((struct lu_context *)&env->le_ctx); lu_context_enter((struct lu_context *)&env->le_ctx); @@ -1426,14 +1403,12 @@ static int echo_client_prep_commit(const struct lu_env *env, out: kfree(lnb); - kfree(rnb); return ret; } static int echo_client_brw_ioctl(const struct lu_env *env, int rw, struct obd_export *exp, - struct obd_ioctl_data *data, - struct obd_trans_info *dummy_oti) + struct obd_ioctl_data *data) { struct obd_device *obd = class_exp2obd(exp); struct echo_device *ed = obd2echo_dev(obd); @@ -1470,15 +1445,13 @@ static int echo_client_brw_ioctl(const struct lu_env *env, int rw, case 1: /* fall through */ case 2: - rc = echo_client_kbrw(ed, rw, oa, - eco, data->ioc_offset, - data->ioc_count, async, dummy_oti); + rc = echo_client_kbrw(ed, rw, oa, eco, data->ioc_offset, + data->ioc_count, async); break; case 3: - rc = echo_client_prep_commit(env, ec->ec_exp, rw, oa, - eco, data->ioc_offset, - data->ioc_count, data->ioc_plen1, - dummy_oti, async); + rc = echo_client_prep_commit(env, ec->ec_exp, rw, oa, eco, + data->ioc_offset, data->ioc_count, + data->ioc_plen1, async); break; default: rc = -EINVAL; @@ -1496,16 +1469,11 @@ echo_client_iocontrol(unsigned int cmd, struct obd_export *exp, int len, struct echo_client_obd *ec = ed->ed_ec; struct echo_object *eco; struct obd_ioctl_data *data = karg; - struct obd_trans_info dummy_oti; struct lu_env *env; - struct oti_req_ack_lock *ack_lock; struct obdo *oa; struct lu_fid fid; int rw = OBD_BRW_READ; int rc = 0; - int i; - - memset(&dummy_oti, 0, sizeof(dummy_oti)); oa = &data->ioc_obdo1; if (!(oa->o_valid & OBD_MD_FLGROUP)) { @@ -1535,7 +1503,7 @@ echo_client_iocontrol(unsigned int cmd, struct obd_export *exp, int len, goto out; } - rc = echo_create_object(env, ed, oa, &dummy_oti); + rc = echo_create_object(env, ed, oa); goto out; case OBD_IOC_DESTROY: @@ -1546,7 +1514,7 @@ echo_client_iocontrol(unsigned int cmd, struct obd_export *exp, int len, rc = echo_get_object(&eco, ed, oa); if (rc == 0) { - rc = obd_destroy(env, ec->ec_exp, oa, &dummy_oti); + rc = obd_destroy(env, ec->ec_exp, oa); if (rc == 0) eco->eo_deleted = 1; echo_put_object(eco); @@ -1556,11 +1524,7 @@ echo_client_iocontrol(unsigned int cmd, struct obd_export *exp, int len, case OBD_IOC_GETATTR: rc = echo_get_object(&eco, ed, oa); if (rc == 0) { - struct obd_info oinfo = { - .oi_oa = oa, - }; - - rc = obd_getattr(env, ec->ec_exp, &oinfo); + rc = obd_getattr(env, ec->ec_exp, oa); echo_put_object(eco); } goto out; @@ -1573,11 +1537,7 @@ echo_client_iocontrol(unsigned int cmd, struct obd_export *exp, int len, rc = echo_get_object(&eco, ed, oa); if (rc == 0) { - struct obd_info oinfo = { - .oi_oa = oa, - }; - - rc = obd_setattr(env, ec->ec_exp, &oinfo, NULL); + rc = obd_setattr(env, ec->ec_exp, oa); echo_put_object(eco); } goto out; @@ -1591,7 +1551,7 @@ echo_client_iocontrol(unsigned int cmd, struct obd_export *exp, int len, rw = OBD_BRW_WRITE; /* fall through */ case OBD_IOC_BRW_READ: - rc = echo_client_brw_ioctl(env, rw, exp, data, &dummy_oti); + rc = echo_client_brw_ioctl(env, rw, exp, data); goto out; default: @@ -1604,14 +1564,6 @@ out: lu_env_fini(env); kfree(env); - /* XXX this should be in a helper also called by target_send_reply */ - for (ack_lock = dummy_oti.oti_ack_locks, i = 0; i < 4; - i++, ack_lock++) { - if (!ack_lock->mode) - break; - ldlm_lock_decref(&ack_lock->lock, ack_lock->mode); - } - return rc; } diff --git a/drivers/staging/lustre/lustre/osc/lproc_osc.c b/drivers/staging/lustre/lustre/osc/lproc_osc.c index f0062d44ee03..575b2969ad83 100644 --- a/drivers/staging/lustre/lustre/osc/lproc_osc.c +++ b/drivers/staging/lustre/lustre/osc/lproc_osc.c @@ -162,7 +162,7 @@ static ssize_t max_dirty_mb_store(struct kobject *kobj, pages_number *= 1 << (20 - PAGE_SHIFT); /* MB -> pages */ if (pages_number <= 0 || - pages_number > OSC_MAX_DIRTY_MB_MAX << (20 - PAGE_SHIFT) || + pages_number >= OSC_MAX_DIRTY_MB_MAX << (20 - PAGE_SHIFT) || pages_number > totalram_pages / 4) /* 1/4 of RAM */ return -ERANGE; @@ -183,10 +183,12 @@ static int osc_cached_mb_seq_show(struct seq_file *m, void *v) seq_printf(m, "used_mb: %ld\n" - "busy_cnt: %ld\n", + "busy_cnt: %ld\n" + "reclaim: %llu\n", (atomic_long_read(&cli->cl_lru_in_list) + atomic_long_read(&cli->cl_lru_busy)) >> shift, - atomic_long_read(&cli->cl_lru_busy)); + atomic_long_read(&cli->cl_lru_busy), + cli->cl_lru_reclaim); return 0; } @@ -585,7 +587,8 @@ static ssize_t max_pages_per_rpc_store(struct kobject *kobj, chunk_mask = ~((1 << (cli->cl_chunkbits - PAGE_SHIFT)) - 1); /* max_pages_per_rpc must be chunk aligned */ val = (val + ~chunk_mask) & chunk_mask; - if (val == 0 || val > ocd->ocd_brw_size >> PAGE_SHIFT) { + if (!val || (ocd->ocd_brw_size && + val > ocd->ocd_brw_size >> PAGE_SHIFT)) { return -ERANGE; } spin_lock(&cli->cl_loi_list_lock); diff --git a/drivers/staging/lustre/lustre/osc/osc_cache.c b/drivers/staging/lustre/lustre/osc/osc_cache.c index 4bbe219add98..b0f030c6c9c9 100644 --- a/drivers/staging/lustre/lustre/osc/osc_cache.c +++ b/drivers/staging/lustre/lustre/osc/osc_cache.c @@ -360,6 +360,7 @@ static struct osc_extent *osc_extent_alloc(struct osc_object *obj) RB_CLEAR_NODE(&ext->oe_node); ext->oe_obj = obj; + cl_object_get(osc2cl(obj)); atomic_set(&ext->oe_refc, 1); atomic_set(&ext->oe_users, 0); INIT_LIST_HEAD(&ext->oe_link); @@ -398,6 +399,7 @@ static void osc_extent_put(const struct lu_env *env, struct osc_extent *ext) LDLM_LOCK_PUT(ext->oe_dlmlock); ext->oe_dlmlock = NULL; } + cl_object_put(env, osc2cl(ext->oe_obj)); osc_extent_free(ext); } } @@ -959,7 +961,7 @@ static int osc_extent_wait(const struct lu_env *env, struct osc_extent *ext, if (rc == -ETIMEDOUT) { OSC_EXTENT_DUMP(D_ERROR, ext, "%s: wait ext to %u timedout, recovery in progress?\n", - osc_export(obj)->exp_obd->obd_name, state); + cli_name(osc_cli(obj)), state); lwi = LWI_INTR(NULL, NULL); rc = l_wait_event(ext->oe_waitq, extent_wait_cb(ext, state), @@ -977,7 +979,6 @@ static int osc_extent_wait(const struct lu_env *env, struct osc_extent *ext, static int osc_extent_truncate(struct osc_extent *ext, pgoff_t trunc_index, bool partial) { - struct cl_env_nest nest; struct lu_env *env; struct cl_io *io; struct osc_object *obj = ext->oe_obj; @@ -990,6 +991,7 @@ static int osc_extent_truncate(struct osc_extent *ext, pgoff_t trunc_index, int grants = 0; int nr_pages = 0; int rc = 0; + int refcheck; LASSERT(sanity_check(ext) == 0); EASSERT(ext->oe_state == OES_TRUNC, ext); @@ -999,7 +1001,7 @@ static int osc_extent_truncate(struct osc_extent *ext, pgoff_t trunc_index, * We can't use that env from osc_cache_truncate_start() because * it's from lov_io_sub and not fully initialized. */ - env = cl_env_nested_get(&nest); + env = cl_env_get(&refcheck); io = &osc_env_info(env)->oti_io; io->ci_obj = cl_object_top(osc2cl(obj)); rc = cl_io_init(env, io, CIT_MISC, io->ci_obj); @@ -1085,7 +1087,7 @@ static int osc_extent_truncate(struct osc_extent *ext, pgoff_t trunc_index, out: cl_io_fini(env, io); - cl_env_nested_put(&nest, env); + cl_env_put(env, &refcheck); return rc; } @@ -1327,7 +1329,6 @@ static int osc_completion(const struct lu_env *env, struct osc_async_page *oap, { struct osc_page *opg = oap2osc_page(oap); struct cl_page *page = oap2cl_page(oap); - struct osc_object *obj = cl2osc(opg->ops_cl.cpl_obj); enum cl_req_type crt; int srvlock; @@ -1338,25 +1339,10 @@ static int osc_completion(const struct lu_env *env, struct osc_async_page *oap, "cp_state:%u, cmd:%d\n", page->cp_state, cmd); LASSERT(opg->ops_transfer_pinned); - /* - * page->cp_req can be NULL if io submission failed before - * cl_req was allocated. - */ - if (page->cp_req) - cl_req_page_done(env, page); - LASSERT(!page->cp_req); - crt = cmd == OBD_BRW_READ ? CRT_READ : CRT_WRITE; /* Clear opg->ops_transfer_pinned before VM lock is released. */ opg->ops_transfer_pinned = 0; - spin_lock(&obj->oo_seatbelt); - LASSERT(opg->ops_submitter); - LASSERT(!list_empty(&opg->ops_inflight)); - list_del_init(&opg->ops_inflight); - opg->ops_submitter = NULL; - spin_unlock(&obj->oo_seatbelt); - opg->ops_submit_time = 0; srvlock = oap->oap_brw_flags & OBD_BRW_SRVLOCK; @@ -1380,16 +1366,17 @@ static int osc_completion(const struct lu_env *env, struct osc_async_page *oap, lu_ref_del(&page->cp_reference, "transfer", page); cl_page_completion(env, page, crt, rc); + cl_page_put(env, page); return 0; } #define OSC_DUMP_GRANT(lvl, cli, fmt, args...) do { \ struct client_obd *__tmp = (cli); \ - CDEBUG(lvl, "%s: grant { dirty: %ld/%ld dirty_pages: %ld/%lu " \ + CDEBUG(lvl, "%s: grant { dirty: %lu/%lu dirty_pages: %ld/%lu " \ "dropped: %ld avail: %ld, reserved: %ld, flight: %d }" \ "lru {in list: %ld, left: %ld, waiters: %d }" fmt "\n", \ - __tmp->cl_import->imp_obd->obd_name, \ + cli_name(__tmp), \ __tmp->cl_dirty_pages, __tmp->cl_dirty_max_pages, \ atomic_long_read(&obd_dirty_pages), obd_max_dirty_pages, \ __tmp->cl_lost_grant, __tmp->cl_avail_grant, \ @@ -1627,7 +1614,7 @@ static int osc_enter_cache(const struct lu_env *env, struct client_obd *cli, osc_io_unplug_async(env, cli, NULL); CDEBUG(D_CACHE, "%s: sleeping for cache space @ %p for %p\n", - cli->cl_import->imp_obd->obd_name, &ocw, oap); + cli_name(cli), &ocw, oap); rc = l_wait_event(ocw.ocw_waitq, ocw_granted(cli, &ocw), &lwi); @@ -1671,7 +1658,7 @@ static int osc_enter_cache(const struct lu_env *env, struct client_obd *cli, break; default: CDEBUG(D_CACHE, "%s: event for cache space @ %p never arrived due to %d, fall back to sync i/o\n", - cli->cl_import->imp_obd->obd_name, &ocw, rc); + cli_name(cli), &ocw, rc); break; } out: @@ -1931,7 +1918,8 @@ static int try_to_add_extent_for_io(struct client_obd *cli, } if (tmp->oe_srvlock != ext->oe_srvlock || - !tmp->oe_grants != !ext->oe_grants) + !tmp->oe_grants != !ext->oe_grants || + tmp->oe_no_merge || ext->oe_no_merge) return 0; /* remove break for strict check */ @@ -2250,14 +2238,9 @@ static int osc_io_unplug0(const struct lu_env *env, struct client_obd *cli, return 0; if (!async) { - /* disable osc_lru_shrink() temporarily to avoid - * potential stack overrun problem. LU-2859 - */ - atomic_inc(&cli->cl_lru_shrinkers); spin_lock(&cli->cl_loi_list_lock); osc_check_rpcs(env, cli); spin_unlock(&cli->cl_loi_list_lock); - atomic_dec(&cli->cl_lru_shrinkers); } else { CDEBUG(D_CACHE, "Queue writeback work for client %p.\n", cli); LASSERT(cli->cl_writeback_work); @@ -2479,7 +2462,6 @@ int osc_teardown_async_page(const struct lu_env *env, struct osc_object *obj, struct osc_page *ops) { struct osc_async_page *oap = &ops->ops_oap; - struct osc_extent *ext = NULL; int rc = 0; LASSERT(oap->oap_magic == OAP_MAGIC); @@ -2487,12 +2469,15 @@ int osc_teardown_async_page(const struct lu_env *env, CDEBUG(D_INFO, "teardown oap %p page %p at index %lu.\n", oap, ops, osc_index(oap2osc(oap))); - osc_object_lock(obj); if (!list_empty(&oap->oap_rpc_item)) { CDEBUG(D_CACHE, "oap %p is not in cache.\n", oap); rc = -EBUSY; } else if (!list_empty(&oap->oap_pending_item)) { + struct osc_extent *ext = NULL; + + osc_object_lock(obj); ext = osc_extent_lookup(obj, osc_index(oap2osc(oap))); + osc_object_unlock(obj); /* only truncated pages are allowed to be taken out. * See osc_extent_truncate() and osc_cache_truncate_start() * for details. @@ -2502,10 +2487,9 @@ int osc_teardown_async_page(const struct lu_env *env, osc_index(oap2osc(oap))); rc = -EBUSY; } + if (ext) + osc_extent_put(env, ext); } - osc_object_unlock(obj); - if (ext) - osc_extent_put(env, ext); return rc; } @@ -2666,11 +2650,13 @@ int osc_queue_sync_pages(const struct lu_env *env, struct osc_object *obj, struct osc_async_page *oap, *tmp; int page_count = 0; int mppr = cli->cl_max_pages_per_rpc; + bool can_merge = true; pgoff_t start = CL_PAGE_EOF; pgoff_t end = 0; list_for_each_entry(oap, list, oap_pending_item) { - pgoff_t index = osc_index(oap2osc(oap)); + struct osc_page *opg = oap2osc_page(oap); + pgoff_t index = osc_index(opg); if (index > end) end = index; @@ -2678,6 +2664,9 @@ int osc_queue_sync_pages(const struct lu_env *env, struct osc_object *obj, start = index; ++page_count; mppr <<= (page_count > mppr); + + if (unlikely(opg->ops_from > 0 || opg->ops_to < PAGE_SIZE)) + can_merge = false; } ext = osc_extent_alloc(obj); @@ -2691,6 +2680,7 @@ int osc_queue_sync_pages(const struct lu_env *env, struct osc_object *obj, ext->oe_rw = !!(cmd & OBD_BRW_READ); ext->oe_sync = 1; + ext->oe_no_merge = !can_merge; ext->oe_urgent = 1; ext->oe_start = start; ext->oe_end = end; @@ -3158,7 +3148,8 @@ static int check_and_discard_cb(const struct lu_env *env, struct cl_io *io, struct cl_page *page = ops->ops_cl.cpl_page; /* refresh non-overlapped index */ - tmp = osc_dlmlock_at_pgoff(env, osc, index, 0, 0); + tmp = osc_dlmlock_at_pgoff(env, osc, index, + OSC_DAP_FL_TEST_LOCK); if (tmp) { __u64 end = tmp->l_policy_data.l_extent.end; /* Cache the first-non-overlapped index so as to skip diff --git a/drivers/staging/lustre/lustre/osc/osc_cl_internal.h b/drivers/staging/lustre/lustre/osc/osc_cl_internal.h index 9c8de15c309c..cce55a9689f0 100644 --- a/drivers/staging/lustre/lustre/osc/osc_cl_internal.h +++ b/drivers/staging/lustre/lustre/osc/osc_cl_internal.h @@ -77,7 +77,6 @@ struct osc_io { /** write osc_lock for this IO, used by osc_extent_find(). */ struct osc_lock *oi_write_osclock; - struct obd_info oi_info; struct obdo oi_oa; struct osc_async_cbargs { bool opc_rpc_sent; @@ -87,13 +86,6 @@ struct osc_io { }; /** - * State of transfer for osc. - */ -struct osc_req { - struct cl_req_slice or_cl; -}; - -/** * State maintained by osc layer for the duration of a system call. */ struct osc_session { @@ -103,7 +95,7 @@ struct osc_session { #define OTI_PVEC_SIZE 256 struct osc_thread_info { struct ldlm_res_id oti_resname; - ldlm_policy_data_t oti_policy; + union ldlm_policy_data oti_policy; struct cl_lock_descr oti_descr; struct cl_attr oti_attr; struct lustre_handle oti_handle; @@ -116,6 +108,7 @@ struct osc_thread_info { pgoff_t oti_next_index; pgoff_t oti_fn_index; /* first non-overlapped index */ struct cl_sync_io oti_anchor; + struct cl_req_attr oti_req_attr; }; struct osc_object { @@ -127,16 +120,6 @@ struct osc_object { int oo_contended; unsigned long oo_contention_time; /** - * List of pages in transfer. - */ - struct list_head oo_inflight[CRT_NR]; - /** - * Lock, protecting osc_page::ops_inflight, because a seat-belt is - * locked during take-off and landing. - */ - spinlock_t oo_seatbelt; - - /** * used by the osc to keep track of what objects to build into rpcs. * Protected by client_obd->cli_loi_list_lock. */ @@ -364,15 +347,6 @@ struct osc_page { */ struct list_head ops_lru; /** - * Linkage into a per-osc_object list of pages in flight. For - * debugging. - */ - struct list_head ops_inflight; - /** - * Thread that submitted this page for transfer. For debugging. - */ - struct task_struct *ops_submitter; - /** * Submit time - the time when the page is starting RPC. For debugging. */ unsigned long ops_submit_time; @@ -382,7 +356,6 @@ extern struct kmem_cache *osc_lock_kmem; extern struct kmem_cache *osc_object_kmem; extern struct kmem_cache *osc_thread_kmem; extern struct kmem_cache *osc_session_kmem; -extern struct kmem_cache *osc_req_kmem; extern struct kmem_cache *osc_extent_kmem; extern struct lu_device_type osc_device_type; @@ -396,15 +369,14 @@ int osc_lock_init(const struct lu_env *env, const struct cl_io *io); int osc_io_init(const struct lu_env *env, struct cl_object *obj, struct cl_io *io); -int osc_req_init(const struct lu_env *env, struct cl_device *dev, - struct cl_req *req); struct lu_object *osc_object_alloc(const struct lu_env *env, const struct lu_object_header *hdr, struct lu_device *dev); int osc_page_init(const struct lu_env *env, struct cl_object *obj, struct cl_page *page, pgoff_t ind); -void osc_index2policy(ldlm_policy_data_t *policy, const struct cl_object *obj, +void osc_index2policy(union ldlm_policy_data *policy, + const struct cl_object *obj, pgoff_t start, pgoff_t end); int osc_lvb_print(const struct lu_env *env, void *cookie, lu_printer_t p, const struct ost_lvb *lvb); @@ -554,6 +526,16 @@ static inline struct osc_page *oap2osc_page(struct osc_async_page *oap) return (struct osc_page *)container_of(oap, struct osc_page, ops_oap); } +static inline struct osc_page * +osc_cl_page_osc(struct cl_page *page, struct osc_object *osc) +{ + const struct cl_page_slice *slice; + + LASSERT(osc); + slice = cl_object_page_slice(&osc->oo_cl, page); + return cl2osc_page(slice); +} + static inline struct osc_lock *cl2osc_lock(const struct cl_lock_slice *slice) { LINVRNT(osc_is_object(&slice->cls_obj->co_lu)); @@ -615,6 +597,10 @@ struct osc_extent { oe_rw:1, /** sync extent, queued by osc_queue_sync_pages() */ oe_sync:1, + /** set if this extent has partial, sync pages. + * Extents with partial page(s) can't merge with others in RPC + */ + oe_no_merge:1, oe_srvlock:1, oe_memalloc:1, /** an ACTIVE extent is going to be truncated, so when this extent diff --git a/drivers/staging/lustre/lustre/osc/osc_dev.c b/drivers/staging/lustre/lustre/osc/osc_dev.c index 83d30c135ba4..c5d62aeaeab5 100644 --- a/drivers/staging/lustre/lustre/osc/osc_dev.c +++ b/drivers/staging/lustre/lustre/osc/osc_dev.c @@ -29,7 +29,7 @@ * This file is part of Lustre, http://www.lustre.org/ * Lustre is a trademark of Sun Microsystems, Inc. * - * Implementation of cl_device, cl_req for OSC layer. + * Implementation of cl_device, for OSC layer. * * Author: Nikita Danilov <nikita.danilov@sun.com> */ @@ -49,7 +49,6 @@ struct kmem_cache *osc_lock_kmem; struct kmem_cache *osc_object_kmem; struct kmem_cache *osc_thread_kmem; struct kmem_cache *osc_session_kmem; -struct kmem_cache *osc_req_kmem; struct kmem_cache *osc_extent_kmem; struct kmem_cache *osc_quota_kmem; @@ -75,11 +74,6 @@ struct lu_kmem_descr osc_caches[] = { .ckd_size = sizeof(struct osc_session) }, { - .ckd_cache = &osc_req_kmem, - .ckd_name = "osc_req_kmem", - .ckd_size = sizeof(struct osc_req) - }, - { .ckd_cache = &osc_extent_kmem, .ckd_name = "osc_extent_kmem", .ckd_size = sizeof(struct osc_extent) @@ -94,8 +88,6 @@ struct lu_kmem_descr osc_caches[] = { } }; -struct lock_class_key osc_ast_guard_class; - /***************************************************************************** * * Type conversions. @@ -178,10 +170,6 @@ static const struct lu_device_operations osc_lu_ops = { .ldo_recovery_complete = NULL }; -static const struct cl_device_operations osc_cl_ops = { - .cdo_req_init = osc_req_init -}; - static int osc_device_init(const struct lu_env *env, struct lu_device *d, const char *name, struct lu_device *next) { @@ -220,7 +208,6 @@ static struct lu_device *osc_device_alloc(const struct lu_env *env, cl_device_init(&od->od_cl, t); d = osc2lu_dev(od); d->ld_ops = &osc_lu_ops; - od->od_cl.cd_ops = &osc_cl_ops; /* Setup OSC OBD */ obd = class_name2obd(lustre_cfg_string(cfg, 0)); diff --git a/drivers/staging/lustre/lustre/osc/osc_internal.h b/drivers/staging/lustre/lustre/osc/osc_internal.h index 67fe0a254991..688783dcc1e4 100644 --- a/drivers/staging/lustre/lustre/osc/osc_internal.h +++ b/drivers/staging/lustre/lustre/osc/osc_internal.h @@ -107,26 +107,24 @@ typedef int (*osc_enqueue_upcall_f)(void *cookie, struct lustre_handle *lockh, int rc); int osc_enqueue_base(struct obd_export *exp, struct ldlm_res_id *res_id, - __u64 *flags, ldlm_policy_data_t *policy, + __u64 *flags, union ldlm_policy_data *policy, struct ost_lvb *lvb, int kms_valid, osc_enqueue_upcall_f upcall, void *cookie, struct ldlm_enqueue_info *einfo, struct ptlrpc_request_set *rqset, int async, int agl); -int osc_cancel_base(struct lustre_handle *lockh, __u32 mode); int osc_match_base(struct obd_export *exp, struct ldlm_res_id *res_id, - __u32 type, ldlm_policy_data_t *policy, __u32 mode, + __u32 type, union ldlm_policy_data *policy, __u32 mode, __u64 *flags, void *data, struct lustre_handle *lockh, int unref); -int osc_setattr_async_base(struct obd_export *exp, struct obd_info *oinfo, - struct obd_trans_info *oti, - obd_enqueue_update_f upcall, void *cookie, - struct ptlrpc_request_set *rqset); -int osc_punch_base(struct obd_export *exp, struct obd_info *oinfo, +int osc_setattr_async(struct obd_export *exp, struct obdo *oa, + obd_enqueue_update_f upcall, void *cookie, + struct ptlrpc_request_set *rqset); +int osc_punch_base(struct obd_export *exp, struct obdo *oa, obd_enqueue_update_f upcall, void *cookie, struct ptlrpc_request_set *rqset); -int osc_sync_base(struct obd_export *exp, struct obd_info *oinfo, +int osc_sync_base(struct osc_object *exp, struct obdo *oa, obd_enqueue_update_f upcall, void *cookie, struct ptlrpc_request_set *rqset); @@ -135,7 +133,7 @@ int osc_build_rpc(const struct lu_env *env, struct client_obd *cli, struct list_head *ext_list, int cmd); long osc_lru_shrink(const struct lu_env *env, struct client_obd *cli, long target, bool force); -long osc_lru_reclaim(struct client_obd *cli); +long osc_lru_reclaim(struct client_obd *cli, unsigned long npages); unsigned long osc_ldlm_weigh_ast(struct ldlm_lock *dlmlock); @@ -157,6 +155,11 @@ static inline unsigned long rpcs_in_flight(struct client_obd *cli) return cli->cl_r_in_flight + cli->cl_w_in_flight; } +static inline char *cli_name(struct client_obd *cli) +{ + return cli->cl_import->imp_obd->obd_name; +} + struct osc_device { struct cl_device od_cl; struct obd_export *od_exp; @@ -192,15 +195,27 @@ int osc_quota_setdq(struct client_obd *cli, const unsigned int qid[], int osc_quota_chkdq(struct client_obd *cli, const unsigned int qid[]); int osc_quotactl(struct obd_device *unused, struct obd_export *exp, struct obd_quotactl *oqctl); -int osc_quotacheck(struct obd_device *unused, struct obd_export *exp, - struct obd_quotactl *oqctl); -int osc_quota_poll_check(struct obd_export *exp, struct if_quotacheck *qchk); void osc_inc_unstable_pages(struct ptlrpc_request *req); void osc_dec_unstable_pages(struct ptlrpc_request *req); bool osc_over_unstable_soft_limit(struct client_obd *cli); +/** + * Bit flags for osc_dlm_lock_at_pageoff(). + */ +enum osc_dap_flags { + /** + * Just check if the desired lock exists, it won't hold reference + * count on lock. + */ + OSC_DAP_FL_TEST_LOCK = BIT(0), + /** + * Return the lock even if it is being canceled. + */ + OSC_DAP_FL_CANCELING = BIT(1), +}; + struct ldlm_lock *osc_dlmlock_at_pgoff(const struct lu_env *env, struct osc_object *obj, pgoff_t index, - int pending, int canceling); + enum osc_dap_flags flags); #endif /* OSC_INTERNAL_H */ diff --git a/drivers/staging/lustre/lustre/osc/osc_io.c b/drivers/staging/lustre/lustre/osc/osc_io.c index 8a559cbcdd0c..228a97c098fe 100644 --- a/drivers/staging/lustre/lustre/osc/osc_io.c +++ b/drivers/staging/lustre/lustre/osc/osc_io.c @@ -49,12 +49,6 @@ * */ -static struct osc_req *cl2osc_req(const struct cl_req_slice *slice) -{ - LINVRNT(slice->crs_dev->cd_lu_dev.ld_type == &osc_device_type); - return container_of0(slice, struct osc_req, or_cl); -} - static struct osc_io *cl2osc_io(const struct lu_env *env, const struct cl_io_slice *slice) { @@ -64,20 +58,6 @@ static struct osc_io *cl2osc_io(const struct lu_env *env, return oio; } -static struct osc_page *osc_cl_page_osc(struct cl_page *page, - struct osc_object *osc) -{ - const struct cl_page_slice *slice; - - if (osc) - slice = cl_object_page_slice(&osc->oo_cl, page); - else - slice = cl_page_at(page, &osc_device_type); - LASSERT(slice); - - return cl2osc_page(slice); -} - /***************************************************************************** * * io operations. @@ -88,6 +68,45 @@ static void osc_io_fini(const struct lu_env *env, const struct cl_io_slice *io) { } +static void osc_read_ahead_release(const struct lu_env *env, void *cbdata) +{ + struct ldlm_lock *dlmlock = cbdata; + struct lustre_handle lockh; + + ldlm_lock2handle(dlmlock, &lockh); + ldlm_lock_decref(&lockh, LCK_PR); + LDLM_LOCK_PUT(dlmlock); +} + +static int osc_io_read_ahead(const struct lu_env *env, + const struct cl_io_slice *ios, + pgoff_t start, struct cl_read_ahead *ra) +{ + struct osc_object *osc = cl2osc(ios->cis_obj); + struct ldlm_lock *dlmlock; + int result = -ENODATA; + + dlmlock = osc_dlmlock_at_pgoff(env, osc, start, 0); + if (dlmlock) { + LASSERT(dlmlock->l_ast_data == osc); + if (dlmlock->l_req_mode != LCK_PR) { + struct lustre_handle lockh; + + ldlm_lock2handle(dlmlock, &lockh); + ldlm_lock_addref(&lockh, LCK_PR); + ldlm_lock_decref(&lockh, dlmlock->l_req_mode); + } + + ra->cra_end = cl_index(osc2cl(osc), + dlmlock->l_policy_data.l_extent.end); + ra->cra_release = osc_read_ahead_release; + ra->cra_cbdata = dlmlock; + result = 0; + } + + return result; +} + /** * An implementation of cl_io_operations::cio_io_submit() method for osc * layer. Iterates over pages in the in-queue, prepares each for io by calling @@ -334,7 +353,7 @@ static int osc_io_rw_iter_init(const struct lu_env *env, npages = max_pages; c = atomic_long_read(cli->cl_lru_left); - if (c < npages && osc_lru_reclaim(cli) > 0) + if (c < npages && osc_lru_reclaim(cli, npages) > 0) c = atomic_long_read(cli->cl_lru_left); while (c >= npages) { if (c == atomic_long_cmpxchg(cli->cl_lru_left, c, c - npages)) { @@ -343,6 +362,17 @@ static int osc_io_rw_iter_init(const struct lu_env *env, } c = atomic_long_read(cli->cl_lru_left); } + if (atomic_long_read(cli->cl_lru_left) < max_pages) { + /* + * If there aren't enough pages in the per-OSC LRU then + * wake up the LRU thread to try and clear out space, so + * we don't block if pages are being dirtied quickly. + */ + CDEBUG(D_CACHE, "%s: queue LRU, left: %lu/%ld.\n", + cli_name(cli), atomic_long_read(cli->cl_lru_left), + max_pages); + (void)ptlrpcd_queue_work(cli->cl_lru_work); + } return 0; } @@ -446,7 +476,6 @@ static int osc_io_setattr_start(const struct lu_env *env, __u64 size = io->u.ci_setattr.sa_attr.lvb_size; unsigned int ia_valid = io->u.ci_setattr.sa_valid; int result = 0; - struct obd_info oinfo = { }; /* truncate cache dirty pages first */ if (cl_io_is_trunc(io)) @@ -486,11 +515,19 @@ static int osc_io_setattr_start(const struct lu_env *env, oa->o_oi = loi->loi_oi; obdo_set_parent_fid(oa, io->u.ci_setattr.sa_parent_fid); oa->o_stripe_idx = io->u.ci_setattr.sa_stripe_index; - oa->o_mtime = attr->cat_mtime; - oa->o_atime = attr->cat_atime; - oa->o_ctime = attr->cat_ctime; - oa->o_valid |= OBD_MD_FLID | OBD_MD_FLGROUP | OBD_MD_FLATIME | - OBD_MD_FLCTIME | OBD_MD_FLMTIME; + oa->o_valid |= OBD_MD_FLID | OBD_MD_FLGROUP; + if (ia_valid & ATTR_CTIME) { + oa->o_valid |= OBD_MD_FLCTIME; + oa->o_ctime = attr->cat_ctime; + } + if (ia_valid & ATTR_ATIME) { + oa->o_valid |= OBD_MD_FLATIME; + oa->o_atime = attr->cat_atime; + } + if (ia_valid & ATTR_MTIME) { + oa->o_valid |= OBD_MD_FLMTIME; + oa->o_mtime = attr->cat_mtime; + } if (ia_valid & ATTR_SIZE) { oa->o_size = size; oa->o_blocks = OBD_OBJECT_EOF; @@ -503,19 +540,21 @@ static int osc_io_setattr_start(const struct lu_env *env, } else { LASSERT(oio->oi_lockless == 0); } + if (ia_valid & ATTR_ATTR_FLAG) { + oa->o_flags = io->u.ci_setattr.sa_attr_flags; + oa->o_valid |= OBD_MD_FLFLAGS; + } - oinfo.oi_oa = oa; init_completion(&cbargs->opc_sync); if (ia_valid & ATTR_SIZE) result = osc_punch_base(osc_export(cl2osc(obj)), - &oinfo, osc_async_upcall, + oa, osc_async_upcall, cbargs, PTLRPCD_SET); else - result = osc_setattr_async_base(osc_export(cl2osc(obj)), - &oinfo, NULL, - osc_async_upcall, - cbargs, PTLRPCD_SET); + result = osc_setattr_async(osc_export(cl2osc(obj)), + oa, osc_async_upcall, + cbargs, PTLRPCD_SET); cbargs->opc_rpc_sent = result == 0; } return result; @@ -557,6 +596,107 @@ static void osc_io_setattr_end(const struct lu_env *env, } } +struct osc_data_version_args { + struct osc_io *dva_oio; +}; + +static int +osc_data_version_interpret(const struct lu_env *env, struct ptlrpc_request *req, + void *arg, int rc) +{ + struct osc_data_version_args *dva = arg; + struct osc_io *oio = dva->dva_oio; + const struct ost_body *body; + + if (rc < 0) + goto out; + + body = req_capsule_server_get(&req->rq_pill, &RMF_OST_BODY); + if (!body) { + rc = -EPROTO; + goto out; + } + + lustre_get_wire_obdo(&req->rq_import->imp_connect_data, &oio->oi_oa, + &body->oa); +out: + oio->oi_cbarg.opc_rc = rc; + complete(&oio->oi_cbarg.opc_sync); + + return 0; +} + +static int osc_io_data_version_start(const struct lu_env *env, + const struct cl_io_slice *slice) +{ + struct cl_data_version_io *dv = &slice->cis_io->u.ci_data_version; + struct osc_io *oio = cl2osc_io(env, slice); + struct osc_async_cbargs *cbargs = &oio->oi_cbarg; + struct osc_object *obj = cl2osc(slice->cis_obj); + struct obd_export *exp = osc_export(obj); + struct lov_oinfo *loi = obj->oo_oinfo; + struct osc_data_version_args *dva; + struct obdo *oa = &oio->oi_oa; + struct ptlrpc_request *req; + struct ost_body *body; + int rc; + + memset(oa, 0, sizeof(*oa)); + oa->o_oi = loi->loi_oi; + oa->o_valid = OBD_MD_FLID | OBD_MD_FLGROUP; + + if (dv->dv_flags & (LL_DV_RD_FLUSH | LL_DV_WR_FLUSH)) { + oa->o_valid |= OBD_MD_FLFLAGS; + oa->o_flags |= OBD_FL_SRVLOCK; + if (dv->dv_flags & LL_DV_WR_FLUSH) + oa->o_flags |= OBD_FL_FLUSH; + } + + init_completion(&cbargs->opc_sync); + + req = ptlrpc_request_alloc(class_exp2cliimp(exp), &RQF_OST_GETATTR); + if (!req) + return -ENOMEM; + + rc = ptlrpc_request_pack(req, LUSTRE_OST_VERSION, OST_GETATTR); + if (rc < 0) { + ptlrpc_request_free(req); + return rc; + } + + body = req_capsule_client_get(&req->rq_pill, &RMF_OST_BODY); + lustre_set_wire_obdo(&req->rq_import->imp_connect_data, &body->oa, oa); + + ptlrpc_request_set_replen(req); + req->rq_interpret_reply = osc_data_version_interpret; + CLASSERT(sizeof(*dva) <= sizeof(req->rq_async_args)); + dva = ptlrpc_req_async_args(req); + dva->dva_oio = oio; + + ptlrpcd_add_req(req); + + return 0; +} + +static void osc_io_data_version_end(const struct lu_env *env, + const struct cl_io_slice *slice) +{ + struct cl_data_version_io *dv = &slice->cis_io->u.ci_data_version; + struct osc_io *oio = cl2osc_io(env, slice); + struct osc_async_cbargs *cbargs = &oio->oi_cbarg; + + wait_for_completion(&cbargs->opc_sync); + + if (cbargs->opc_rc) { + slice->cis_io->ci_result = cbargs->opc_rc; + } else if (!(oio->oi_oa.o_valid & OBD_MD_FLDATAVERSION)) { + slice->cis_io->ci_result = -EOPNOTSUPP; + } else { + dv->dv_data_version = oio->oi_oa.o_data_version; + slice->cis_io->ci_result = 0; + } +} + static int osc_io_read_start(const struct lu_env *env, const struct cl_io_slice *slice) { @@ -595,7 +735,6 @@ static int osc_fsync_ost(const struct lu_env *env, struct osc_object *obj, { struct osc_io *oio = osc_env_io(env); struct obdo *oa = &oio->oi_oa; - struct obd_info *oinfo = &oio->oi_info; struct lov_oinfo *loi = obj->oo_oinfo; struct osc_async_cbargs *cbargs = &oio->oi_cbarg; int rc = 0; @@ -611,12 +750,9 @@ static int osc_fsync_ost(const struct lu_env *env, struct osc_object *obj, obdo_set_parent_fid(oa, fio->fi_fid); - memset(oinfo, 0, sizeof(*oinfo)); - oinfo->oi_oa = oa; init_completion(&cbargs->opc_sync); - rc = osc_sync_base(osc_export(obj), oinfo, osc_async_upcall, cbargs, - PTLRPCD_SET); + rc = osc_sync_base(obj, oa, osc_async_upcall, cbargs, PTLRPCD_SET); return rc; } @@ -710,6 +846,10 @@ static const struct cl_io_operations osc_io_ops = { .cio_start = osc_io_setattr_start, .cio_end = osc_io_setattr_end }, + [CIT_DATA_VERSION] = { + .cio_start = osc_io_data_version_start, + .cio_end = osc_io_data_version_end, + }, [CIT_FAULT] = { .cio_start = osc_io_fault_start, .cio_end = osc_io_end, @@ -724,6 +864,7 @@ static const struct cl_io_operations osc_io_ops = { .cio_fini = osc_io_fini } }, + .cio_read_ahead = osc_io_read_ahead, .cio_submit = osc_io_submit, .cio_commit_async = osc_io_commit_async }; @@ -734,103 +875,6 @@ static const struct cl_io_operations osc_io_ops = { * */ -static int osc_req_prep(const struct lu_env *env, - const struct cl_req_slice *slice) -{ - return 0; -} - -static void osc_req_completion(const struct lu_env *env, - const struct cl_req_slice *slice, int ioret) -{ - struct osc_req *or; - - or = cl2osc_req(slice); - kmem_cache_free(osc_req_kmem, or); -} - -/** - * Implementation of struct cl_req_operations::cro_attr_set() for osc - * layer. osc is responsible for struct obdo::o_id and struct obdo::o_seq - * fields. - */ -static void osc_req_attr_set(const struct lu_env *env, - const struct cl_req_slice *slice, - const struct cl_object *obj, - struct cl_req_attr *attr, u64 flags) -{ - struct lov_oinfo *oinfo; - struct cl_req *clerq; - struct cl_page *apage; /* _some_ page in @clerq */ - struct ldlm_lock *lock; /* _some_ lock protecting @apage */ - struct osc_page *opg; - struct obdo *oa; - struct ost_lvb *lvb; - - oinfo = cl2osc(obj)->oo_oinfo; - lvb = &oinfo->loi_lvb; - oa = attr->cra_oa; - - if ((flags & OBD_MD_FLMTIME) != 0) { - oa->o_mtime = lvb->lvb_mtime; - oa->o_valid |= OBD_MD_FLMTIME; - } - if ((flags & OBD_MD_FLATIME) != 0) { - oa->o_atime = lvb->lvb_atime; - oa->o_valid |= OBD_MD_FLATIME; - } - if ((flags & OBD_MD_FLCTIME) != 0) { - oa->o_ctime = lvb->lvb_ctime; - oa->o_valid |= OBD_MD_FLCTIME; - } - if (flags & OBD_MD_FLGROUP) { - ostid_set_seq(&oa->o_oi, ostid_seq(&oinfo->loi_oi)); - oa->o_valid |= OBD_MD_FLGROUP; - } - if (flags & OBD_MD_FLID) { - ostid_set_id(&oa->o_oi, ostid_id(&oinfo->loi_oi)); - oa->o_valid |= OBD_MD_FLID; - } - if (flags & OBD_MD_FLHANDLE) { - clerq = slice->crs_req; - LASSERT(!list_empty(&clerq->crq_pages)); - apage = container_of(clerq->crq_pages.next, - struct cl_page, cp_flight); - opg = osc_cl_page_osc(apage, NULL); - lock = osc_dlmlock_at_pgoff(env, cl2osc(obj), osc_index(opg), - 1, 1); - if (!lock && !opg->ops_srvlock) { - struct ldlm_resource *res; - struct ldlm_res_id *resname; - - CL_PAGE_DEBUG(D_ERROR, env, apage, "uncovered page!\n"); - - resname = &osc_env_info(env)->oti_resname; - ostid_build_res_name(&oinfo->loi_oi, resname); - res = ldlm_resource_get( - osc_export(cl2osc(obj))->exp_obd->obd_namespace, - NULL, resname, LDLM_EXTENT, 0); - ldlm_resource_dump(D_ERROR, res); - - dump_stack(); - LBUG(); - } - - /* check for lockless io. */ - if (lock) { - oa->o_handle = lock->l_remote_handle; - oa->o_valid |= OBD_MD_FLHANDLE; - LDLM_LOCK_PUT(lock); - } - } -} - -static const struct cl_req_operations osc_req_ops = { - .cro_prep = osc_req_prep, - .cro_attr_set = osc_req_attr_set, - .cro_completion = osc_req_completion -}; - int osc_io_init(const struct lu_env *env, struct cl_object *obj, struct cl_io *io) { @@ -841,20 +885,4 @@ int osc_io_init(const struct lu_env *env, return 0; } -int osc_req_init(const struct lu_env *env, struct cl_device *dev, - struct cl_req *req) -{ - struct osc_req *or; - int result; - - or = kmem_cache_zalloc(osc_req_kmem, GFP_NOFS); - if (or) { - cl_req_slice_add(req, &or->or_cl, dev, &osc_req_ops); - result = 0; - } else { - result = -ENOMEM; - } - return result; -} - /** @} osc */ diff --git a/drivers/staging/lustre/lustre/osc/osc_lock.c b/drivers/staging/lustre/lustre/osc/osc_lock.c index 39a8a5851603..5f799a4c78f9 100644 --- a/drivers/staging/lustre/lustre/osc/osc_lock.c +++ b/drivers/staging/lustre/lustre/osc/osc_lock.c @@ -145,7 +145,7 @@ static void osc_lock_fini(const struct lu_env *env, static void osc_lock_build_policy(const struct lu_env *env, const struct cl_lock *lock, - ldlm_policy_data_t *policy) + union ldlm_policy_data *policy) { const struct cl_lock_descr *d = &lock->cll_descr; @@ -188,7 +188,7 @@ static void osc_lock_lvb_update(const struct lu_env *env, struct cl_object *obj = osc2cl(osc); struct lov_oinfo *oinfo = osc->oo_oinfo; struct cl_attr *attr = &osc_env_info(env)->oti_attr; - unsigned valid; + unsigned int valid; valid = CAT_BLOCKS | CAT_ATIME | CAT_CTIME | CAT_MTIME | CAT_SIZE; if (!lvb) @@ -294,10 +294,10 @@ static int osc_lock_upcall(void *cookie, struct lustre_handle *lockh, struct osc_lock *oscl = cookie; struct cl_lock_slice *slice = &oscl->ols_cl; struct lu_env *env; - struct cl_env_nest nest; int rc; + int refcheck; - env = cl_env_nested_get(&nest); + env = cl_env_get(&refcheck); /* should never happen, similar to osc_ldlm_blocking_ast(). */ LASSERT(!IS_ERR(env)); @@ -336,7 +336,7 @@ static int osc_lock_upcall(void *cookie, struct lustre_handle *lockh, if (oscl->ols_owner) cl_sync_io_note(env, oscl->ols_owner, rc); - cl_env_nested_put(&nest, env); + cl_env_put(env, &refcheck); return rc; } @@ -347,9 +347,9 @@ static int osc_lock_upcall_agl(void *cookie, struct lustre_handle *lockh, struct osc_object *osc = cookie; struct ldlm_lock *dlmlock; struct lu_env *env; - struct cl_env_nest nest; + int refcheck; - env = cl_env_nested_get(&nest); + env = cl_env_get(&refcheck); LASSERT(!IS_ERR(env)); if (errcode == ELDLM_LOCK_MATCHED) { @@ -374,7 +374,7 @@ static int osc_lock_upcall_agl(void *cookie, struct lustre_handle *lockh, out: cl_object_put(env, osc2cl(osc)); - cl_env_nested_put(&nest, env); + cl_env_put(env, &refcheck); return ldlm_error2errno(errcode); } @@ -382,11 +382,11 @@ static int osc_lock_flush(struct osc_object *obj, pgoff_t start, pgoff_t end, enum cl_lock_mode mode, int discard) { struct lu_env *env; - struct cl_env_nest nest; + int refcheck; int rc = 0; int rc2 = 0; - env = cl_env_nested_get(&nest); + env = cl_env_get(&refcheck); if (IS_ERR(env)) return PTR_ERR(env); @@ -404,7 +404,7 @@ static int osc_lock_flush(struct osc_object *obj, pgoff_t start, pgoff_t end, if (rc == 0 && rc2 < 0) rc = rc2; - cl_env_nested_put(&nest, env); + cl_env_put(env, &refcheck); return rc; } @@ -536,7 +536,7 @@ static int osc_ldlm_blocking_ast(struct ldlm_lock *dlmlock, } case LDLM_CB_CANCELING: { struct lu_env *env; - struct cl_env_nest nest; + int refcheck; /* * This can be called in the context of outer IO, e.g., @@ -549,14 +549,14 @@ static int osc_ldlm_blocking_ast(struct ldlm_lock *dlmlock, * new environment has to be created to not corrupt outer * context. */ - env = cl_env_nested_get(&nest); + env = cl_env_get(&refcheck); if (IS_ERR(env)) { result = PTR_ERR(env); break; } result = osc_dlm_blocking_ast0(env, dlmlock, data, flag); - cl_env_nested_put(&nest, env); + cl_env_put(env, &refcheck); break; } default: @@ -568,61 +568,63 @@ static int osc_ldlm_blocking_ast(struct ldlm_lock *dlmlock, static int osc_ldlm_glimpse_ast(struct ldlm_lock *dlmlock, void *data) { struct ptlrpc_request *req = data; - struct cl_env_nest nest; struct lu_env *env; struct ost_lvb *lvb; struct req_capsule *cap; + struct cl_object *obj = NULL; int result; + int refcheck; LASSERT(lustre_msg_get_opc(req->rq_reqmsg) == LDLM_GL_CALLBACK); - env = cl_env_nested_get(&nest); - if (!IS_ERR(env)) { - struct cl_object *obj = NULL; + env = cl_env_get(&refcheck); + if (IS_ERR(env)) { + result = PTR_ERR(env); + goto out; + } - lock_res_and_lock(dlmlock); - if (dlmlock->l_ast_data) { - obj = osc2cl(dlmlock->l_ast_data); - cl_object_get(obj); - } - unlock_res_and_lock(dlmlock); + lock_res_and_lock(dlmlock); + if (dlmlock->l_ast_data) { + obj = osc2cl(dlmlock->l_ast_data); + cl_object_get(obj); + } + unlock_res_and_lock(dlmlock); - if (obj) { - /* Do not grab the mutex of cl_lock for glimpse. - * See LU-1274 for details. - * BTW, it's okay for cl_lock to be cancelled during - * this period because server can handle this race. - * See ldlm_server_glimpse_ast() for details. - * cl_lock_mutex_get(env, lock); - */ - cap = &req->rq_pill; - req_capsule_extend(cap, &RQF_LDLM_GL_CALLBACK); - req_capsule_set_size(cap, &RMF_DLM_LVB, RCL_SERVER, - sizeof(*lvb)); - result = req_capsule_server_pack(cap); - if (result == 0) { - lvb = req_capsule_server_get(cap, &RMF_DLM_LVB); - result = cl_object_glimpse(env, obj, lvb); - } - if (!exp_connect_lvb_type(req->rq_export)) - req_capsule_shrink(&req->rq_pill, - &RMF_DLM_LVB, - sizeof(struct ost_lvb_v1), - RCL_SERVER); - cl_object_put(env, obj); - } else { - /* - * These errors are normal races, so we don't want to - * fill the console with messages by calling - * ptlrpc_error() - */ - lustre_pack_reply(req, 1, NULL, NULL); - result = -ELDLM_NO_LOCK_DATA; + if (obj) { + /* Do not grab the mutex of cl_lock for glimpse. + * See LU-1274 for details. + * BTW, it's okay for cl_lock to be cancelled during + * this period because server can handle this race. + * See ldlm_server_glimpse_ast() for details. + * cl_lock_mutex_get(env, lock); + */ + cap = &req->rq_pill; + req_capsule_extend(cap, &RQF_LDLM_GL_CALLBACK); + req_capsule_set_size(cap, &RMF_DLM_LVB, RCL_SERVER, + sizeof(*lvb)); + result = req_capsule_server_pack(cap); + if (result == 0) { + lvb = req_capsule_server_get(cap, &RMF_DLM_LVB); + result = cl_object_glimpse(env, obj, lvb); + } + if (!exp_connect_lvb_type(req->rq_export)) { + req_capsule_shrink(&req->rq_pill, &RMF_DLM_LVB, + sizeof(struct ost_lvb_v1), + RCL_SERVER); } - cl_env_nested_put(&nest, env); + cl_object_put(env, obj); } else { - result = PTR_ERR(env); + /* + * These errors are normal races, so we don't want to + * fill the console with messages by calling + * ptlrpc_error() + */ + lustre_pack_reply(req, 1, NULL, NULL); + result = -ELDLM_NO_LOCK_DATA; } + cl_env_put(env, &refcheck); + +out: req->rq_status = result; return result; } @@ -677,12 +679,12 @@ static unsigned long osc_lock_weight(const struct lu_env *env, */ unsigned long osc_ldlm_weigh_ast(struct ldlm_lock *dlmlock) { - struct cl_env_nest nest; struct lu_env *env; struct osc_object *obj; struct osc_lock *oscl; unsigned long weight; bool found = false; + int refcheck; might_sleep(); /* @@ -692,7 +694,7 @@ unsigned long osc_ldlm_weigh_ast(struct ldlm_lock *dlmlock) * the upper context because cl_lock_put don't modify environment * variables. But just in case .. */ - env = cl_env_nested_get(&nest); + env = cl_env_get(&refcheck); if (IS_ERR(env)) /* Mostly because lack of memory, do not eliminate this lock */ return 1; @@ -722,7 +724,7 @@ unsigned long osc_ldlm_weigh_ast(struct ldlm_lock *dlmlock) weight = osc_lock_weight(env, obj, &dlmlock->l_policy_data.l_extent); out: - cl_env_nested_put(&nest, env); + cl_env_put(env, &refcheck); return weight; } @@ -912,7 +914,7 @@ static int osc_lock_enqueue(const struct lu_env *env, struct osc_lock *oscl = cl2osc_lock(slice); struct cl_lock *lock = slice->cls_lock; struct ldlm_res_id *resname = &info->oti_resname; - ldlm_policy_data_t *policy = &info->oti_policy; + union ldlm_policy_data *policy = &info->oti_policy; osc_enqueue_upcall_f upcall = osc_lock_upcall; void *cookie = oscl; bool async = false; @@ -1009,7 +1011,7 @@ static void osc_lock_detach(const struct lu_env *env, struct osc_lock *olck) if (olck->ols_hold) { olck->ols_hold = 0; - osc_cancel_base(&olck->ols_handle, olck->ols_einfo.ei_mode); + ldlm_lock_decref(&olck->ols_handle, olck->ols_einfo.ei_mode); olck->ols_handle.cookie = 0ULL; } @@ -1180,11 +1182,11 @@ int osc_lock_init(const struct lu_env *env, */ struct ldlm_lock *osc_dlmlock_at_pgoff(const struct lu_env *env, struct osc_object *obj, pgoff_t index, - int pending, int canceling) + enum osc_dap_flags dap_flags) { struct osc_thread_info *info = osc_env_info(env); struct ldlm_res_id *resname = &info->oti_resname; - ldlm_policy_data_t *policy = &info->oti_policy; + union ldlm_policy_data *policy = &info->oti_policy; struct lustre_handle lockh; struct ldlm_lock *lock = NULL; enum ldlm_mode mode; @@ -1194,17 +1196,18 @@ struct ldlm_lock *osc_dlmlock_at_pgoff(const struct lu_env *env, osc_index2policy(policy, osc2cl(obj), index, index); policy->l_extent.gid = LDLM_GID_ANY; - flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_TEST_LOCK; - if (pending) - flags |= LDLM_FL_CBPENDING; + flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_CBPENDING; + if (dap_flags & OSC_DAP_FL_TEST_LOCK) + flags |= LDLM_FL_TEST_LOCK; + /* * It is fine to match any group lock since there could be only one * with a uniq gid and it conflicts with all other lock modes too */ again: - mode = ldlm_lock_match(osc_export(obj)->exp_obd->obd_namespace, - flags, resname, LDLM_EXTENT, policy, - LCK_PR | LCK_PW | LCK_GROUP, &lockh, canceling); + mode = osc_match_base(osc_export(obj), resname, LDLM_EXTENT, policy, + LCK_PR | LCK_PW | LCK_GROUP, &flags, obj, &lockh, + dap_flags & OSC_DAP_FL_CANCELING); if (mode != 0) { lock = ldlm_handle2lock(&lockh); /* RACE: the lock is cancelled so let's try again */ diff --git a/drivers/staging/lustre/lustre/osc/osc_object.c b/drivers/staging/lustre/lustre/osc/osc_object.c index aae3a2d4243f..e0c3324857dd 100644 --- a/drivers/staging/lustre/lustre/osc/osc_object.c +++ b/drivers/staging/lustre/lustre/osc/osc_object.c @@ -71,13 +71,8 @@ static int osc_object_init(const struct lu_env *env, struct lu_object *obj, { struct osc_object *osc = lu2osc(obj); const struct cl_object_conf *cconf = lu2cl_conf(conf); - int i; osc->oo_oinfo = cconf->u.coc_oinfo; - spin_lock_init(&osc->oo_seatbelt); - for (i = 0; i < CRT_NR; ++i) - INIT_LIST_HEAD(&osc->oo_inflight[i]); - INIT_LIST_HEAD(&osc->oo_ready_item); INIT_LIST_HEAD(&osc->oo_hp_ready_item); INIT_LIST_HEAD(&osc->oo_write_item); @@ -103,10 +98,6 @@ static int osc_object_init(const struct lu_env *env, struct lu_object *obj, static void osc_object_free(const struct lu_env *env, struct lu_object *obj) { struct osc_object *osc = lu2osc(obj); - int i; - - for (i = 0; i < CRT_NR; ++i) - LASSERT(list_empty(&osc->oo_inflight[i])); LASSERT(list_empty(&osc->oo_ready_item)); LASSERT(list_empty(&osc->oo_hp_ready_item)); @@ -218,6 +209,94 @@ static int osc_object_prune(const struct lu_env *env, struct cl_object *obj) return 0; } +static int osc_object_fiemap(const struct lu_env *env, struct cl_object *obj, + struct ll_fiemap_info_key *fmkey, + struct fiemap *fiemap, size_t *buflen) +{ + struct obd_export *exp = osc_export(cl2osc(obj)); + union ldlm_policy_data policy; + struct ptlrpc_request *req; + struct lustre_handle lockh; + struct ldlm_res_id resid; + enum ldlm_mode mode = 0; + struct fiemap *reply; + char *tmp; + int rc; + + fmkey->lfik_oa.o_oi = cl2osc(obj)->oo_oinfo->loi_oi; + if (!(fmkey->lfik_fiemap.fm_flags & FIEMAP_FLAG_SYNC)) + goto skip_locking; + + policy.l_extent.start = fmkey->lfik_fiemap.fm_start & PAGE_MASK; + + if (OBD_OBJECT_EOF - fmkey->lfik_fiemap.fm_length <= + fmkey->lfik_fiemap.fm_start + PAGE_SIZE - 1) + policy.l_extent.end = OBD_OBJECT_EOF; + else + policy.l_extent.end = (fmkey->lfik_fiemap.fm_start + + fmkey->lfik_fiemap.fm_length + + PAGE_SIZE - 1) & PAGE_MASK; + + ostid_build_res_name(&fmkey->lfik_oa.o_oi, &resid); + mode = ldlm_lock_match(exp->exp_obd->obd_namespace, + LDLM_FL_BLOCK_GRANTED | LDLM_FL_LVB_READY, + &resid, LDLM_EXTENT, &policy, + LCK_PR | LCK_PW, &lockh, 0); + if (mode) { /* lock is cached on client */ + if (mode != LCK_PR) { + ldlm_lock_addref(&lockh, LCK_PR); + ldlm_lock_decref(&lockh, LCK_PW); + } + } else { /* no cached lock, needs acquire lock on server side */ + fmkey->lfik_oa.o_valid |= OBD_MD_FLFLAGS; + fmkey->lfik_oa.o_flags |= OBD_FL_SRVLOCK; + } + +skip_locking: + req = ptlrpc_request_alloc(class_exp2cliimp(exp), + &RQF_OST_GET_INFO_FIEMAP); + if (!req) { + rc = -ENOMEM; + goto drop_lock; + } + + req_capsule_set_size(&req->rq_pill, &RMF_FIEMAP_KEY, RCL_CLIENT, + sizeof(*fmkey)); + req_capsule_set_size(&req->rq_pill, &RMF_FIEMAP_VAL, RCL_CLIENT, + *buflen); + req_capsule_set_size(&req->rq_pill, &RMF_FIEMAP_VAL, RCL_SERVER, + *buflen); + + rc = ptlrpc_request_pack(req, LUSTRE_OST_VERSION, OST_GET_INFO); + if (rc) { + ptlrpc_request_free(req); + goto drop_lock; + } + tmp = req_capsule_client_get(&req->rq_pill, &RMF_FIEMAP_KEY); + memcpy(tmp, fmkey, sizeof(*fmkey)); + tmp = req_capsule_client_get(&req->rq_pill, &RMF_FIEMAP_VAL); + memcpy(tmp, fiemap, *buflen); + ptlrpc_request_set_replen(req); + + rc = ptlrpc_queue_wait(req); + if (rc) + goto fini_req; + + reply = req_capsule_server_get(&req->rq_pill, &RMF_FIEMAP_VAL); + if (!reply) { + rc = -EPROTO; + goto fini_req; + } + + memcpy(fiemap, reply, *buflen); +fini_req: + ptlrpc_req_finished(req); +drop_lock: + if (mode) + ldlm_lock_decref(&lockh, LCK_PR); + return rc; +} + void osc_object_set_contended(struct osc_object *obj) { obj->oo_contention_time = cfs_time_current(); @@ -256,6 +335,76 @@ int osc_object_is_contended(struct osc_object *obj) return 1; } +/** + * Implementation of struct cl_object_operations::coo_req_attr_set() for osc + * layer. osc is responsible for struct obdo::o_id and struct obdo::o_seq + * fields. + */ +static void osc_req_attr_set(const struct lu_env *env, struct cl_object *obj, + struct cl_req_attr *attr) +{ + u64 flags = attr->cra_flags; + struct lov_oinfo *oinfo; + struct ost_lvb *lvb; + struct obdo *oa; + + oinfo = cl2osc(obj)->oo_oinfo; + lvb = &oinfo->loi_lvb; + oa = attr->cra_oa; + + if (flags & OBD_MD_FLMTIME) { + oa->o_mtime = lvb->lvb_mtime; + oa->o_valid |= OBD_MD_FLMTIME; + } + if (flags & OBD_MD_FLATIME) { + oa->o_atime = lvb->lvb_atime; + oa->o_valid |= OBD_MD_FLATIME; + } + if (flags & OBD_MD_FLCTIME) { + oa->o_ctime = lvb->lvb_ctime; + oa->o_valid |= OBD_MD_FLCTIME; + } + if (flags & OBD_MD_FLGROUP) { + ostid_set_seq(&oa->o_oi, ostid_seq(&oinfo->loi_oi)); + oa->o_valid |= OBD_MD_FLGROUP; + } + if (flags & OBD_MD_FLID) { + ostid_set_id(&oa->o_oi, ostid_id(&oinfo->loi_oi)); + oa->o_valid |= OBD_MD_FLID; + } + if (flags & OBD_MD_FLHANDLE) { + struct ldlm_lock *lock; + struct osc_page *opg; + + opg = osc_cl_page_osc(attr->cra_page, cl2osc(obj)); + lock = osc_dlmlock_at_pgoff(env, cl2osc(obj), osc_index(opg), + OSC_DAP_FL_TEST_LOCK | OSC_DAP_FL_CANCELING); + if (!lock && !opg->ops_srvlock) { + struct ldlm_resource *res; + struct ldlm_res_id *resname; + + CL_PAGE_DEBUG(D_ERROR, env, attr->cra_page, + "uncovered page!\n"); + + resname = &osc_env_info(env)->oti_resname; + ostid_build_res_name(&oinfo->loi_oi, resname); + res = ldlm_resource_get( + osc_export(cl2osc(obj))->exp_obd->obd_namespace, + NULL, resname, LDLM_EXTENT, 0); + ldlm_resource_dump(D_ERROR, res); + + LBUG(); + } + + /* check for lockless io. */ + if (lock) { + oa->o_handle = lock->l_remote_handle; + oa->o_valid |= OBD_MD_FLHANDLE; + LDLM_LOCK_PUT(lock); + } + } +} + static const struct cl_object_operations osc_ops = { .coo_page_init = osc_page_init, .coo_lock_init = osc_lock_init, @@ -263,7 +412,9 @@ static const struct cl_object_operations osc_ops = { .coo_attr_get = osc_attr_get, .coo_attr_update = osc_attr_update, .coo_glimpse = osc_object_glimpse, - .coo_prune = osc_object_prune + .coo_prune = osc_object_prune, + .coo_fiemap = osc_object_fiemap, + .coo_req_attr_set = osc_req_attr_set }; static const struct lu_object_operations osc_lu_obj_ops = { diff --git a/drivers/staging/lustre/lustre/osc/osc_page.c b/drivers/staging/lustre/lustre/osc/osc_page.c index 2a7a70aa9e80..e356e4af08e1 100644 --- a/drivers/staging/lustre/lustre/osc/osc_page.c +++ b/drivers/staging/lustre/lustre/osc/osc_page.c @@ -37,6 +37,7 @@ #define DEBUG_SUBSYSTEM S_OSC +#include <linux/math64.h> #include "osc_cl_internal.h" static void osc_lru_del(struct client_obd *cli, struct osc_page *opg); @@ -86,11 +87,6 @@ static void osc_page_transfer_add(const struct lu_env *env, struct osc_object *obj = cl2osc(opg->ops_cl.cpl_obj); osc_lru_use(osc_cli(obj), opg); - - spin_lock(&obj->oo_seatbelt); - list_add(&opg->ops_inflight, &obj->oo_inflight[crt]); - opg->ops_submitter = current; - spin_unlock(&obj->oo_seatbelt); } int osc_page_cache_add(const struct lu_env *env, @@ -109,7 +105,8 @@ int osc_page_cache_add(const struct lu_env *env, return result; } -void osc_index2policy(ldlm_policy_data_t *policy, const struct cl_object *obj, +void osc_index2policy(union ldlm_policy_data *policy, + const struct cl_object *obj, pgoff_t start, pgoff_t end) { memset(policy, 0, sizeof(*policy)); @@ -117,25 +114,6 @@ void osc_index2policy(ldlm_policy_data_t *policy, const struct cl_object *obj, policy->l_extent.end = cl_offset(obj, end + 1) - 1; } -static int osc_page_is_under_lock(const struct lu_env *env, - const struct cl_page_slice *slice, - struct cl_io *unused, pgoff_t *max_index) -{ - struct osc_page *opg = cl2osc_page(slice); - struct ldlm_lock *dlmlock; - int result = -ENODATA; - - dlmlock = osc_dlmlock_at_pgoff(env, cl2osc(slice->cpl_obj), - osc_index(opg), 1, 0); - if (dlmlock) { - *max_index = cl_index(slice->cpl_obj, - dlmlock->l_policy_data.l_extent.end); - LDLM_LOCK_PUT(dlmlock); - result = 0; - } - return result; -} - static const char *osc_list(struct list_head *head) { return list_empty(head) ? "-" : "+"; @@ -158,7 +136,7 @@ static int osc_page_print(const struct lu_env *env, struct osc_object *obj = cl2osc(slice->cpl_obj); struct client_obd *cli = &osc_export(obj)->exp_obd->u.cli; - return (*printer)(env, cookie, LUSTRE_OSC_NAME "-page@%p %lu: 1< %#x %d %u %s %s > 2< %llu %u %u %#x %#x | %p %p %p > 3< %s %p %d %lu %d > 4< %d %d %d %lu %s | %s %s %s %s > 5< %s %s %s %s | %d %s | %d %s %s>\n", + return (*printer)(env, cookie, LUSTRE_OSC_NAME "-page@%p %lu: 1< %#x %d %u %s %s > 2< %llu %u %u %#x %#x | %p %p %p > 3< %d %lu %d > 4< %d %d %d %lu %s | %s %s %s %s > 5< %s %s %s %s | %d %s | %d %s %s>\n", opg, osc_index(opg), /* 1 */ oap->oap_magic, oap->oap_cmd, @@ -170,8 +148,7 @@ static int osc_page_print(const struct lu_env *env, oap->oap_async_flags, oap->oap_brw_flags, oap->oap_request, oap->oap_cli, obj, /* 3 */ - osc_list(&opg->ops_inflight), - opg->ops_submitter, opg->ops_transfer_pinned, + opg->ops_transfer_pinned, osc_submit_duration(opg), opg->ops_srvlock, /* 4 */ cli->cl_r_in_flight, cli->cl_w_in_flight, @@ -210,14 +187,6 @@ static void osc_page_delete(const struct lu_env *env, LASSERT(0); } - spin_lock(&obj->oo_seatbelt); - if (opg->ops_submitter) { - LASSERT(!list_empty(&opg->ops_inflight)); - list_del_init(&opg->ops_inflight); - opg->ops_submitter = NULL; - } - spin_unlock(&obj->oo_seatbelt); - osc_lru_del(osc_cli(obj), opg); if (slice->cpl_page->cp_type == CPT_CACHEABLE) { @@ -276,7 +245,6 @@ static int osc_page_flush(const struct lu_env *env, static const struct cl_page_operations osc_page_ops = { .cpo_print = osc_page_print, .cpo_delete = osc_page_delete, - .cpo_is_under_lock = osc_page_is_under_lock, .cpo_clip = osc_page_clip, .cpo_cancel = osc_page_cancel, .cpo_flush = osc_page_flush @@ -301,10 +269,6 @@ int osc_page_init(const struct lu_env *env, struct cl_object *obj, cl_page_slice_add(page, &opg->ops_cl, obj, index, &osc_page_ops); } - /* ops_inflight and ops_lru are the same field, but it doesn't - * hurt to initialize it twice :-) - */ - INIT_LIST_HEAD(&opg->ops_inflight); INIT_LIST_HEAD(&opg->ops_lru); /* reserve an LRU space for this page */ @@ -362,16 +326,27 @@ void osc_page_submit(const struct lu_env *env, struct osc_page *opg, * OSC to free slots voluntarily to maintain a reasonable number of free slots * at any time. */ - static DECLARE_WAIT_QUEUE_HEAD(osc_lru_waitq); -/* LRU pages are freed in batch mode. OSC should at least free this - * number of pages to avoid running out of LRU budget, and.. + +/** + * LRU pages are freed in batch mode. OSC should at least free this + * number of pages to avoid running out of LRU slots. + */ +static inline int lru_shrink_min(struct client_obd *cli) +{ + return cli->cl_max_pages_per_rpc * 2; +} + +/** + * free this number at most otherwise it will take too long time to finish. */ -static const int lru_shrink_min = 2 << (20 - PAGE_SHIFT); /* 2M */ -/* free this number at most otherwise it will take too long time to finish. */ -static const int lru_shrink_max = 8 << (20 - PAGE_SHIFT); /* 8M */ +static inline int lru_shrink_max(struct client_obd *cli) +{ + return cli->cl_max_pages_per_rpc * cli->cl_max_rpcs_in_flight; +} -/* Check if we can free LRU slots from this OSC. If there exists LRU waiters, +/** + * Check if we can free LRU slots from this OSC. If there exists LRU waiters, * we should free slots aggressively. In this way, slots are freed in a steady * step to maintain fairness among OSCs. * @@ -388,13 +363,20 @@ static int osc_cache_too_much(struct client_obd *cli) /* if it's going to run out LRU slots, we should free some, but not * too much to maintain fairness among OSCs. */ - if (atomic_long_read(cli->cl_lru_left) < cache->ccc_lru_max >> 4) { + if (atomic_long_read(cli->cl_lru_left) < cache->ccc_lru_max >> 2) { if (pages >= budget) - return lru_shrink_max; + return lru_shrink_max(cli); else if (pages >= budget / 2) - return lru_shrink_min; - } else if (pages >= budget * 2) { - return lru_shrink_min; + return lru_shrink_min(cli); + } else { + time64_t duration = ktime_get_real_seconds(); + + /* knock out pages by duration of no IO activity */ + duration -= cli->cl_lru_last_used; + duration >>= 6; /* approximately 1 minute */ + if (duration > 0 && + pages >= div64_s64((s64)budget, duration)) + return lru_shrink_min(cli); } return 0; } @@ -402,11 +384,21 @@ static int osc_cache_too_much(struct client_obd *cli) int lru_queue_work(const struct lu_env *env, void *data) { struct client_obd *cli = data; + int count; - CDEBUG(D_CACHE, "Run LRU work for client obd %p.\n", cli); + CDEBUG(D_CACHE, "%s: run LRU work for client obd\n", cli_name(cli)); - if (osc_cache_too_much(cli)) - osc_lru_shrink(env, cli, lru_shrink_max, true); + count = osc_cache_too_much(cli); + if (count > 0) { + int rc = osc_lru_shrink(env, cli, count, false); + + CDEBUG(D_CACHE, "%s: shrank %d/%d pages from client obd\n", + cli_name(cli), rc, count); + if (rc >= count) { + CDEBUG(D_CACHE, "%s: queue again\n", cli_name(cli)); + ptlrpcd_queue_work(cli->cl_lru_work); + } + } return 0; } @@ -433,10 +425,10 @@ void osc_lru_add_batch(struct client_obd *cli, struct list_head *plist) list_splice_tail(&lru, &cli->cl_lru_list); atomic_long_sub(npages, &cli->cl_lru_busy); atomic_long_add(npages, &cli->cl_lru_in_list); + cli->cl_lru_last_used = ktime_get_real_seconds(); spin_unlock(&cli->cl_lru_list_lock); - /* XXX: May set force to be true for better performance */ - if (osc_cache_too_much(cli)) + if (waitqueue_active(&osc_lru_waitq)) (void)ptlrpcd_queue_work(cli->cl_lru_work); } } @@ -469,8 +461,10 @@ static void osc_lru_del(struct client_obd *cli, struct osc_page *opg) * this osc occupies too many LRU pages and kernel is * stealing one of them. */ - if (!memory_pressure_get()) + if (osc_cache_too_much(cli)) { + CDEBUG(D_CACHE, "%s: queue LRU work\n", cli_name(cli)); (void)ptlrpcd_queue_work(cli->cl_lru_work); + } wake_up(&osc_lru_waitq); } else { LASSERT(list_empty(&opg->ops_lru)); @@ -502,6 +496,7 @@ static void discard_pagevec(const struct lu_env *env, struct cl_io *io, struct cl_page *page = pvec[i]; LASSERT(cl_page_is_owned(page, io)); + cl_page_delete(env, page); cl_page_discard(env, io, page); cl_page_disown(env, io, page); cl_page_put(env, page); @@ -542,7 +537,6 @@ long osc_lru_shrink(const struct lu_env *env, struct client_obd *cli, struct cl_object *clobj = NULL; struct cl_page **pvec; struct osc_page *opg; - struct osc_page *temp; int maxscan = 0; long count = 0; int index = 0; @@ -552,6 +546,8 @@ long osc_lru_shrink(const struct lu_env *env, struct client_obd *cli, if (atomic_long_read(&cli->cl_lru_in_list) == 0 || target <= 0) return 0; + CDEBUG(D_CACHE, "%s: shrinkers: %d, force: %d\n", + cli_name(cli), atomic_read(&cli->cl_lru_shrinkers), force); if (!force) { if (atomic_read(&cli->cl_lru_shrinkers) > 0) return -EBUSY; @@ -568,14 +564,21 @@ long osc_lru_shrink(const struct lu_env *env, struct client_obd *cli, io = &osc_env_info(env)->oti_io; spin_lock(&cli->cl_lru_list_lock); + if (force) + cli->cl_lru_reclaim++; maxscan = min(target << 1, atomic_long_read(&cli->cl_lru_in_list)); - list_for_each_entry_safe(opg, temp, &cli->cl_lru_list, ops_lru) { + while (!list_empty(&cli->cl_lru_list)) { struct cl_page *page; bool will_free = false; + if (!force && atomic_read(&cli->cl_lru_shrinkers) > 1) + break; + if (--maxscan < 0) break; + opg = list_entry(cli->cl_lru_list.next, struct osc_page, + ops_lru); page = opg->ops_cl.cpl_page; if (lru_page_busy(cli, page)) { list_move_tail(&opg->ops_lru, &cli->cl_lru_list); @@ -662,34 +665,43 @@ long osc_lru_shrink(const struct lu_env *env, struct client_obd *cli, return count > 0 ? count : rc; } -long osc_lru_reclaim(struct client_obd *cli) +/** + * Reclaim LRU pages by an IO thread. The caller wants to reclaim at least + * \@npages of LRU slots. For performance consideration, it's better to drop + * LRU pages in batch. Therefore, the actual number is adjusted at least + * max_pages_per_rpc. + */ +long osc_lru_reclaim(struct client_obd *cli, unsigned long npages) { - struct cl_env_nest nest; struct lu_env *env; struct cl_client_cache *cache = cli->cl_cache; int max_scans; + int refcheck; long rc = 0; LASSERT(cache); - env = cl_env_nested_get(&nest); + env = cl_env_get(&refcheck); if (IS_ERR(env)) return 0; - rc = osc_lru_shrink(env, cli, osc_cache_too_much(cli), false); - if (rc != 0) { - if (rc == -EBUSY) - rc = 0; - - CDEBUG(D_CACHE, "%s: Free %ld pages from own LRU: %p.\n", - cli->cl_import->imp_obd->obd_name, rc, cli); + npages = max_t(int, npages, cli->cl_max_pages_per_rpc); + CDEBUG(D_CACHE, "%s: start to reclaim %ld pages from LRU\n", + cli_name(cli), npages); + rc = osc_lru_shrink(env, cli, npages, true); + if (rc >= npages) { + CDEBUG(D_CACHE, "%s: reclaimed %ld/%ld pages from LRU\n", + cli_name(cli), rc, npages); + if (osc_cache_too_much(cli) > 0) + ptlrpcd_queue_work(cli->cl_lru_work); goto out; + } else if (rc > 0) { + npages -= rc; } - CDEBUG(D_CACHE, "%s: cli %p no free slots, pages: %ld, busy: %ld.\n", - cli->cl_import->imp_obd->obd_name, cli, - atomic_long_read(&cli->cl_lru_in_list), - atomic_long_read(&cli->cl_lru_busy)); + CDEBUG(D_CACHE, "%s: cli %p no free slots, pages: %ld/%ld, want: %ld\n", + cli_name(cli), cli, atomic_long_read(&cli->cl_lru_in_list), + atomic_long_read(&cli->cl_lru_busy), npages); /* Reclaim LRU slots from other client_obd as it can't free enough * from its own. This should rarely happen. @@ -706,7 +718,7 @@ long osc_lru_reclaim(struct client_obd *cli) cl_lru_osc); CDEBUG(D_CACHE, "%s: cli %p LRU pages: %ld, busy: %ld.\n", - cli->cl_import->imp_obd->obd_name, cli, + cli_name(cli), cli, atomic_long_read(&cli->cl_lru_in_list), atomic_long_read(&cli->cl_lru_busy)); @@ -714,19 +726,20 @@ long osc_lru_reclaim(struct client_obd *cli) if (osc_cache_too_much(cli) > 0) { spin_unlock(&cache->ccc_lru_lock); - rc = osc_lru_shrink(env, cli, osc_cache_too_much(cli), - true); + rc = osc_lru_shrink(env, cli, npages, true); spin_lock(&cache->ccc_lru_lock); - if (rc != 0) + if (rc >= npages) break; + if (rc > 0) + npages -= rc; } } spin_unlock(&cache->ccc_lru_lock); out: - cl_env_nested_put(&nest, env); + cl_env_put(env, &refcheck); CDEBUG(D_CACHE, "%s: cli %p freed %ld pages.\n", - cli->cl_import->imp_obd->obd_name, cli, rc); + cli_name(cli), cli, rc); return rc; } @@ -756,7 +769,7 @@ static int osc_lru_reserve(const struct lu_env *env, struct osc_object *obj, LASSERT(atomic_long_read(cli->cl_lru_left) >= 0); while (!atomic_long_add_unless(cli->cl_lru_left, -1, 0)) { /* run out of LRU spaces, try to drop some by itself */ - rc = osc_lru_reclaim(cli); + rc = osc_lru_reclaim(cli, 1); if (rc < 0) break; if (rc > 0) @@ -796,8 +809,10 @@ static inline void unstable_page_accounting(struct ptlrpc_bulk_desc *desc, int count = 0; int i; + LASSERT(ptlrpc_is_bulk_desc_kiov(desc->bd_type)); + for (i = 0; i < page_count; i++) { - pg_data_t *pgdat = page_pgdat(desc->bd_iov[i].bv_page); + pg_data_t *pgdat = page_pgdat(BD_GET_KIOV(desc, i).bv_page); if (likely(pgdat == last)) { ++count; @@ -857,7 +872,7 @@ void osc_dec_unstable_pages(struct ptlrpc_request *req) if (!unstable_count) wake_up_all(&cli->cl_cache->ccc_unstable_waitq); - if (osc_cache_too_much(cli)) + if (waitqueue_active(&osc_lru_waitq)) (void)ptlrpcd_queue_work(cli->cl_lru_work); } @@ -913,8 +928,7 @@ bool osc_over_unstable_soft_limit(struct client_obd *cli) CDEBUG(D_CACHE, "%s: cli: %p unstable pages: %lu, osc unstable pages: %lu\n", - cli->cl_import->imp_obd->obd_name, cli, - unstable_nr, osc_unstable_count); + cli_name(cli), cli, unstable_nr, osc_unstable_count); /* * If the LRU slots are in shortage - 25% remaining AND this OSC diff --git a/drivers/staging/lustre/lustre/osc/osc_quota.c b/drivers/staging/lustre/lustre/osc/osc_quota.c index 194d8ede40a2..fed4da63ee45 100644 --- a/drivers/staging/lustre/lustre/osc/osc_quota.c +++ b/drivers/staging/lustre/lustre/osc/osc_quota.c @@ -106,7 +106,7 @@ int osc_quota_setdq(struct client_obd *cli, const unsigned int qid[], } CDEBUG(D_QUOTA, "%s: setdq to insert for %s %d (%d)\n", - cli->cl_import->imp_obd->obd_name, + cli_name(cli), type == USRQUOTA ? "user" : "group", qid[type], rc); } else { @@ -122,7 +122,7 @@ int osc_quota_setdq(struct client_obd *cli, const unsigned int qid[], kmem_cache_free(osc_quota_kmem, oqi); CDEBUG(D_QUOTA, "%s: setdq to remove for %s %d (%p)\n", - cli->cl_import->imp_obd->obd_name, + cli_name(cli), type == USRQUOTA ? "user" : "group", qid[type], oqi); } @@ -134,8 +134,8 @@ int osc_quota_setdq(struct client_obd *cli, const unsigned int qid[], /* * Hash operations for uid/gid <-> osc_quota_info */ -static unsigned -oqi_hashfn(struct cfs_hash *hs, const void *key, unsigned mask) +static unsigned int +oqi_hashfn(struct cfs_hash *hs, const void *key, unsigned int mask) { return cfs_hash_u32_hash(*((__u32 *)key), mask); } @@ -281,47 +281,3 @@ int osc_quotactl(struct obd_device *unused, struct obd_export *exp, return rc; } - -int osc_quotacheck(struct obd_device *unused, struct obd_export *exp, - struct obd_quotactl *oqctl) -{ - struct client_obd *cli = &exp->exp_obd->u.cli; - struct ptlrpc_request *req; - struct obd_quotactl *body; - int rc; - - req = ptlrpc_request_alloc_pack(class_exp2cliimp(exp), - &RQF_OST_QUOTACHECK, LUSTRE_OST_VERSION, - OST_QUOTACHECK); - if (!req) - return -ENOMEM; - - body = req_capsule_client_get(&req->rq_pill, &RMF_OBD_QUOTACTL); - *body = *oqctl; - - ptlrpc_request_set_replen(req); - - /* the next poll will find -ENODATA, that means quotacheck is going on - */ - cli->cl_qchk_stat = -ENODATA; - rc = ptlrpc_queue_wait(req); - if (rc) - cli->cl_qchk_stat = rc; - ptlrpc_req_finished(req); - return rc; -} - -int osc_quota_poll_check(struct obd_export *exp, struct if_quotacheck *qchk) -{ - struct client_obd *cli = &exp->exp_obd->u.cli; - int rc; - - qchk->obd_uuid = cli->cl_target_uuid; - memcpy(qchk->obd_type, LUSTRE_OST_NAME, strlen(LUSTRE_OST_NAME)); - - rc = cli->cl_qchk_stat; - /* the client is not the previous one */ - if (rc == CL_NOT_QUOTACHECKED) - rc = -EINTR; - return rc; -} diff --git a/drivers/staging/lustre/lustre/osc/osc_request.c b/drivers/staging/lustre/lustre/osc/osc_request.c index 749781f022e2..7143564ae7e7 100644 --- a/drivers/staging/lustre/lustre/osc/osc_request.c +++ b/drivers/staging/lustre/lustre/osc/osc_request.c @@ -68,7 +68,6 @@ struct osc_brw_async_args { struct client_obd *aa_cli; struct list_head aa_oaps; struct list_head aa_exts; - struct cl_req *aa_clerq; }; struct osc_async_args { @@ -82,7 +81,8 @@ struct osc_setattr_args { }; struct osc_fsync_args { - struct obd_info *fa_oi; + struct osc_object *fa_obj; + struct obdo *fa_oa; obd_enqueue_update_f fa_upcall; void *fa_cookie; }; @@ -103,140 +103,19 @@ static void osc_release_ppga(struct brw_page **ppga, u32 count); static int brw_interpret(const struct lu_env *env, struct ptlrpc_request *req, void *data, int rc); -/* Unpack OSC object metadata from disk storage (LE byte order). */ -static int osc_unpackmd(struct obd_export *exp, struct lov_stripe_md **lsmp, - struct lov_mds_md *lmm, int lmm_bytes) -{ - int lsm_size; - struct obd_import *imp = class_exp2cliimp(exp); - - if (lmm) { - if (lmm_bytes < sizeof(*lmm)) { - CERROR("%s: lov_mds_md too small: %d, need %d\n", - exp->exp_obd->obd_name, lmm_bytes, - (int)sizeof(*lmm)); - return -EINVAL; - } - /* XXX LOV_MAGIC etc check? */ - - if (unlikely(ostid_id(&lmm->lmm_oi) == 0)) { - CERROR("%s: zero lmm_object_id: rc = %d\n", - exp->exp_obd->obd_name, -EINVAL); - return -EINVAL; - } - } - - lsm_size = lov_stripe_md_size(1); - if (!lsmp) - return lsm_size; - - if (*lsmp && !lmm) { - kfree((*lsmp)->lsm_oinfo[0]); - kfree(*lsmp); - *lsmp = NULL; - return 0; - } - - if (!*lsmp) { - *lsmp = kzalloc(lsm_size, GFP_NOFS); - if (unlikely(!*lsmp)) - return -ENOMEM; - (*lsmp)->lsm_oinfo[0] = kzalloc(sizeof(struct lov_oinfo), - GFP_NOFS); - if (unlikely(!(*lsmp)->lsm_oinfo[0])) { - kfree(*lsmp); - return -ENOMEM; - } - loi_init((*lsmp)->lsm_oinfo[0]); - } else if (unlikely(ostid_id(&(*lsmp)->lsm_oi) == 0)) { - return -EBADF; - } - - if (lmm) - /* XXX zero *lsmp? */ - ostid_le_to_cpu(&lmm->lmm_oi, &(*lsmp)->lsm_oi); - - if (imp && - (imp->imp_connect_data.ocd_connect_flags & OBD_CONNECT_MAXBYTES)) - (*lsmp)->lsm_maxbytes = imp->imp_connect_data.ocd_maxbytes; - else - (*lsmp)->lsm_maxbytes = LUSTRE_EXT3_STRIPE_MAXBYTES; - - return lsm_size; -} - static inline void osc_pack_req_body(struct ptlrpc_request *req, - struct obd_info *oinfo) + struct obdo *oa) { struct ost_body *body; body = req_capsule_client_get(&req->rq_pill, &RMF_OST_BODY); LASSERT(body); - lustre_set_wire_obdo(&req->rq_import->imp_connect_data, &body->oa, - oinfo->oi_oa); -} - -static int osc_getattr_interpret(const struct lu_env *env, - struct ptlrpc_request *req, - struct osc_async_args *aa, int rc) -{ - struct ost_body *body; - - if (rc != 0) - goto out; - - body = req_capsule_server_get(&req->rq_pill, &RMF_OST_BODY); - if (body) { - CDEBUG(D_INODE, "mode: %o\n", body->oa.o_mode); - lustre_get_wire_obdo(&req->rq_import->imp_connect_data, - aa->aa_oi->oi_oa, &body->oa); - - /* This should really be sent by the OST */ - aa->aa_oi->oi_oa->o_blksize = DT_MAX_BRW_SIZE; - aa->aa_oi->oi_oa->o_valid |= OBD_MD_FLBLKSZ; - } else { - CDEBUG(D_INFO, "can't unpack ost_body\n"); - rc = -EPROTO; - aa->aa_oi->oi_oa->o_valid = 0; - } -out: - rc = aa->aa_oi->oi_cb_up(aa->aa_oi, rc); - return rc; -} - -static int osc_getattr_async(struct obd_export *exp, struct obd_info *oinfo, - struct ptlrpc_request_set *set) -{ - struct ptlrpc_request *req; - struct osc_async_args *aa; - int rc; - - req = ptlrpc_request_alloc(class_exp2cliimp(exp), &RQF_OST_GETATTR); - if (!req) - return -ENOMEM; - - rc = ptlrpc_request_pack(req, LUSTRE_OST_VERSION, OST_GETATTR); - if (rc) { - ptlrpc_request_free(req); - return rc; - } - - osc_pack_req_body(req, oinfo); - - ptlrpc_request_set_replen(req); - req->rq_interpret_reply = (ptlrpc_interpterer_t)osc_getattr_interpret; - - CLASSERT(sizeof(*aa) <= sizeof(req->rq_async_args)); - aa = ptlrpc_req_async_args(req); - aa->aa_oi = oinfo; - - ptlrpc_set_add_req(set, req); - return 0; + lustre_set_wire_obdo(&req->rq_import->imp_connect_data, &body->oa, oa); } static int osc_getattr(const struct lu_env *env, struct obd_export *exp, - struct obd_info *oinfo) + struct obdo *oa) { struct ptlrpc_request *req; struct ost_body *body; @@ -252,7 +131,7 @@ static int osc_getattr(const struct lu_env *env, struct obd_export *exp, return rc; } - osc_pack_req_body(req, oinfo); + osc_pack_req_body(req, oa); ptlrpc_request_set_replen(req); @@ -267,11 +146,11 @@ static int osc_getattr(const struct lu_env *env, struct obd_export *exp, } CDEBUG(D_INODE, "mode: %o\n", body->oa.o_mode); - lustre_get_wire_obdo(&req->rq_import->imp_connect_data, oinfo->oi_oa, + lustre_get_wire_obdo(&req->rq_import->imp_connect_data, oa, &body->oa); - oinfo->oi_oa->o_blksize = cli_brw_size(exp->exp_obd); - oinfo->oi_oa->o_valid |= OBD_MD_FLBLKSZ; + oa->o_blksize = cli_brw_size(exp->exp_obd); + oa->o_valid |= OBD_MD_FLBLKSZ; out: ptlrpc_req_finished(req); @@ -279,13 +158,13 @@ static int osc_getattr(const struct lu_env *env, struct obd_export *exp, } static int osc_setattr(const struct lu_env *env, struct obd_export *exp, - struct obd_info *oinfo, struct obd_trans_info *oti) + struct obdo *oa) { struct ptlrpc_request *req; struct ost_body *body; int rc; - LASSERT(oinfo->oi_oa->o_valid & OBD_MD_FLGROUP); + LASSERT(oa->o_valid & OBD_MD_FLGROUP); req = ptlrpc_request_alloc(class_exp2cliimp(exp), &RQF_OST_SETATTR); if (!req) @@ -297,7 +176,7 @@ static int osc_setattr(const struct lu_env *env, struct obd_export *exp, return rc; } - osc_pack_req_body(req, oinfo); + osc_pack_req_body(req, oa); ptlrpc_request_set_replen(req); @@ -311,7 +190,7 @@ static int osc_setattr(const struct lu_env *env, struct obd_export *exp, goto out; } - lustre_get_wire_obdo(&req->rq_import->imp_connect_data, oinfo->oi_oa, + lustre_get_wire_obdo(&req->rq_import->imp_connect_data, oa, &body->oa); out: @@ -341,10 +220,9 @@ out: return rc; } -int osc_setattr_async_base(struct obd_export *exp, struct obd_info *oinfo, - struct obd_trans_info *oti, - obd_enqueue_update_f upcall, void *cookie, - struct ptlrpc_request_set *rqset) +int osc_setattr_async(struct obd_export *exp, struct obdo *oa, + obd_enqueue_update_f upcall, void *cookie, + struct ptlrpc_request_set *rqset) { struct ptlrpc_request *req; struct osc_setattr_args *sa; @@ -360,10 +238,7 @@ int osc_setattr_async_base(struct obd_export *exp, struct obd_info *oinfo, return rc; } - if (oti && oinfo->oi_oa->o_valid & OBD_MD_FLCOOKIE) - oinfo->oi_oa->o_lcookie = *oti->oti_logcookies; - - osc_pack_req_body(req, oinfo); + osc_pack_req_body(req, oa); ptlrpc_request_set_replen(req); @@ -377,7 +252,7 @@ int osc_setattr_async_base(struct obd_export *exp, struct obd_info *oinfo, CLASSERT(sizeof(*sa) <= sizeof(req->rq_async_args)); sa = ptlrpc_req_async_args(req); - sa->sa_oa = oinfo->oi_oa; + sa->sa_oa = oa; sa->sa_upcall = upcall; sa->sa_cookie = cookie; @@ -390,16 +265,8 @@ int osc_setattr_async_base(struct obd_export *exp, struct obd_info *oinfo, return 0; } -static int osc_setattr_async(struct obd_export *exp, struct obd_info *oinfo, - struct obd_trans_info *oti, - struct ptlrpc_request_set *rqset) -{ - return osc_setattr_async_base(exp, oinfo, oti, - oinfo->oi_cb_up, oinfo, rqset); -} - static int osc_create(const struct lu_env *env, struct obd_export *exp, - struct obdo *oa, struct obd_trans_info *oti) + struct obdo *oa) { struct ptlrpc_request *req; struct ost_body *body; @@ -428,15 +295,6 @@ static int osc_create(const struct lu_env *env, struct obd_export *exp, ptlrpc_request_set_replen(req); - if ((oa->o_valid & OBD_MD_FLFLAGS) && - oa->o_flags == OBD_FL_DELORPHAN) { - DEBUG_REQ(D_HA, req, - "delorphan from OST integration"); - /* Don't resend the delorphan req */ - req->rq_no_resend = 1; - req->rq_no_delay = 1; - } - rc = ptlrpc_queue_wait(req); if (rc) goto out_req; @@ -453,12 +311,6 @@ static int osc_create(const struct lu_env *env, struct obd_export *exp, oa->o_blksize = cli_brw_size(exp->exp_obd); oa->o_valid |= OBD_MD_FLBLKSZ; - if (oti && oa->o_valid & OBD_MD_FLCOOKIE) { - if (!oti->oti_logcookies) - oti->oti_logcookies = &oti->oti_onecookie; - *oti->oti_logcookies = oa->o_lcookie; - } - CDEBUG(D_HA, "transno: %lld\n", lustre_msg_get_transno(req->rq_repmsg)); out_req: @@ -467,7 +319,7 @@ out: return rc; } -int osc_punch_base(struct obd_export *exp, struct obd_info *oinfo, +int osc_punch_base(struct obd_export *exp, struct obdo *oa, obd_enqueue_update_f upcall, void *cookie, struct ptlrpc_request_set *rqset) { @@ -491,14 +343,14 @@ int osc_punch_base(struct obd_export *exp, struct obd_info *oinfo, body = req_capsule_client_get(&req->rq_pill, &RMF_OST_BODY); LASSERT(body); lustre_set_wire_obdo(&req->rq_import->imp_connect_data, &body->oa, - oinfo->oi_oa); + oa); ptlrpc_request_set_replen(req); req->rq_interpret_reply = (ptlrpc_interpterer_t)osc_setattr_interpret; CLASSERT(sizeof(*sa) <= sizeof(req->rq_async_args)); sa = ptlrpc_req_async_args(req); - sa->sa_oa = oinfo->oi_oa; + sa->sa_oa = oa; sa->sa_upcall = upcall; sa->sa_cookie = cookie; if (rqset == PTLRPCD_SET) @@ -513,8 +365,11 @@ static int osc_sync_interpret(const struct lu_env *env, struct ptlrpc_request *req, void *arg, int rc) { + struct cl_attr *attr = &osc_env_info(env)->oti_attr; struct osc_fsync_args *fa = arg; + unsigned long valid = 0; struct ost_body *body; + struct cl_object *obj; if (rc) goto out; @@ -526,16 +381,30 @@ static int osc_sync_interpret(const struct lu_env *env, goto out; } - *fa->fa_oi->oi_oa = body->oa; + *fa->fa_oa = body->oa; + obj = osc2cl(fa->fa_obj); + + /* Update osc object's blocks attribute */ + cl_object_attr_lock(obj); + if (body->oa.o_valid & OBD_MD_FLBLOCKS) { + attr->cat_blocks = body->oa.o_blocks; + valid |= CAT_BLOCKS; + } + + if (valid) + cl_object_attr_update(env, obj, attr, valid); + cl_object_attr_unlock(obj); + out: rc = fa->fa_upcall(fa->fa_cookie, rc); return rc; } -int osc_sync_base(struct obd_export *exp, struct obd_info *oinfo, +int osc_sync_base(struct osc_object *obj, struct obdo *oa, obd_enqueue_update_f upcall, void *cookie, struct ptlrpc_request_set *rqset) { + struct obd_export *exp = osc_export(obj); struct ptlrpc_request *req; struct ost_body *body; struct osc_fsync_args *fa; @@ -555,14 +424,15 @@ int osc_sync_base(struct obd_export *exp, struct obd_info *oinfo, body = req_capsule_client_get(&req->rq_pill, &RMF_OST_BODY); LASSERT(body); lustre_set_wire_obdo(&req->rq_import->imp_connect_data, &body->oa, - oinfo->oi_oa); + oa); ptlrpc_request_set_replen(req); req->rq_interpret_reply = osc_sync_interpret; CLASSERT(sizeof(*fa) <= sizeof(req->rq_async_args)); fa = ptlrpc_req_async_args(req); - fa->fa_oi = oinfo; + fa->fa_obj = obj; + fa->fa_oa = oa; fa->fa_upcall = upcall; fa->fa_cookie = cookie; @@ -639,19 +509,8 @@ static int osc_can_send_destroy(struct client_obd *cli) return 0; } -/* Destroy requests can be async always on the client, and we don't even really - * care about the return code since the client cannot do anything at all about - * a destroy failure. - * When the MDS is unlinking a filename, it saves the file objects into a - * recovery llog, and these object records are cancelled when the OST reports - * they were destroyed and sync'd to disk (i.e. transaction committed). - * If the client dies, or the OST is down when the object should be destroyed, - * the records are not cancelled, and when the OST reconnects to the MDS next, - * it will retrieve the llog unlink logs and then sends the log cancellation - * cookies to the MDS after committing destroy transactions. - */ static int osc_destroy(const struct lu_env *env, struct obd_export *exp, - struct obdo *oa, struct obd_trans_info *oti) + struct obdo *oa) { struct client_obd *cli = &exp->exp_obd->u.cli; struct ptlrpc_request *req; @@ -683,32 +542,22 @@ static int osc_destroy(const struct lu_env *env, struct obd_export *exp, req->rq_request_portal = OST_IO_PORTAL; /* bug 7198 */ ptlrpc_at_set_req_timeout(req); - if (oti && oa->o_valid & OBD_MD_FLCOOKIE) - oa->o_lcookie = *oti->oti_logcookies; body = req_capsule_client_get(&req->rq_pill, &RMF_OST_BODY); LASSERT(body); lustre_set_wire_obdo(&req->rq_import->imp_connect_data, &body->oa, oa); ptlrpc_request_set_replen(req); - /* If osc_destroy is for destroying the unlink orphan, - * sent from MDT to OST, which should not be blocked here, - * because the process might be triggered by ptlrpcd, and - * it is not good to block ptlrpcd thread (b=16006 - **/ - if (!(oa->o_flags & OBD_FL_DELORPHAN)) { - req->rq_interpret_reply = osc_destroy_interpret; - if (!osc_can_send_destroy(cli)) { - struct l_wait_info lwi = LWI_INTR(LWI_ON_SIGNAL_NOOP, - NULL); - - /* - * Wait until the number of on-going destroy RPCs drops - * under max_rpc_in_flight - */ - l_wait_event_exclusive(cli->cl_destroy_waitq, - osc_can_send_destroy(cli), &lwi); - } + req->rq_interpret_reply = osc_destroy_interpret; + if (!osc_can_send_destroy(cli)) { + struct l_wait_info lwi = LWI_INTR(LWI_ON_SIGNAL_NOOP, NULL); + + /* + * Wait until the number of on-going destroy RPCs drops + * under max_rpc_in_flight + */ + l_wait_event_exclusive(cli->cl_destroy_waitq, + osc_can_send_destroy(cli), &lwi); } /* Do not wait for response */ @@ -734,14 +583,13 @@ static void osc_announce_cached(struct client_obd *cli, struct obdo *oa, oa->o_undirty = 0; } else if (unlikely(atomic_long_read(&obd_dirty_pages) - atomic_long_read(&obd_dirty_transit_pages) > - (obd_max_dirty_pages + 1))) { + (long)(obd_max_dirty_pages + 1))) { /* The atomic_read() allowing the atomic_inc() are * not covered by a lock thus they may safely race and trip * this CERROR() unless we add in a small fudge factor (+1). */ - CERROR("%s: dirty %ld + %ld > system dirty_max %lu\n", - cli->cl_import->imp_obd->obd_name, - atomic_long_read(&obd_dirty_pages), + CERROR("%s: dirty %ld + %ld > system dirty_max %ld\n", + cli_name(cli), atomic_long_read(&obd_dirty_pages), atomic_long_read(&obd_dirty_transit_pages), obd_max_dirty_pages); oa->o_undirty = 0; @@ -936,12 +784,10 @@ static int osc_add_shrink_grant(struct client_obd *client) osc_grant_shrink_grant_cb, NULL, &client->cl_grant_shrink_list); if (rc) { - CERROR("add grant client %s error %d\n", - client->cl_import->imp_obd->obd_name, rc); + CERROR("add grant client %s error %d\n", cli_name(client), rc); return rc; } - CDEBUG(D_CACHE, "add grant client %s\n", - client->cl_import->imp_obd->obd_name); + CDEBUG(D_CACHE, "add grant client %s\n", cli_name(client)); osc_update_next_shrink(client); return 0; } @@ -970,23 +816,13 @@ static void osc_init_grant(struct client_obd *cli, struct obd_connect_data *ocd) cli->cl_avail_grant = ocd->ocd_grant - (cli->cl_dirty_pages << PAGE_SHIFT); - if (cli->cl_avail_grant < 0) { - CWARN("%s: available grant < 0: avail/ocd/dirty %ld/%u/%ld\n", - cli->cl_import->imp_obd->obd_name, cli->cl_avail_grant, - ocd->ocd_grant, cli->cl_dirty_pages << PAGE_SHIFT); - /* workaround for servers which do not have the patch from - * LU-2679 - */ - cli->cl_avail_grant = ocd->ocd_grant; - } - /* determine the appropriate chunk size used by osc_extent. */ cli->cl_chunkbits = max_t(int, PAGE_SHIFT, ocd->ocd_blocksize); spin_unlock(&cli->cl_loi_list_lock); CDEBUG(D_CACHE, "%s, setting cl_avail_grant: %ld cl_lost_grant: %ld chunk bits: %d\n", - cli->cl_import->imp_obd->obd_name, - cli->cl_avail_grant, cli->cl_lost_grant, cli->cl_chunkbits); + cli_name(cli), cli->cl_avail_grant, cli->cl_lost_grant, + cli->cl_chunkbits); if (ocd->ocd_connect_flags & OBD_CONNECT_GRANT_SHRINK && list_empty(&cli->cl_grant_shrink_list)) @@ -1072,9 +908,9 @@ static int check_write_rcs(struct ptlrpc_request *req, static inline int can_merge_pages(struct brw_page *p1, struct brw_page *p2) { if (p1->flag != p2->flag) { - unsigned mask = ~(OBD_BRW_FROM_GRANT | OBD_BRW_NOCACHE | - OBD_BRW_SYNC | OBD_BRW_ASYNC | - OBD_BRW_NOQUOTA | OBD_BRW_SOFT_SYNC); + unsigned int mask = ~(OBD_BRW_FROM_GRANT | OBD_BRW_NOCACHE | + OBD_BRW_SYNC | OBD_BRW_ASYNC | + OBD_BRW_NOQUOTA | OBD_BRW_SOFT_SYNC); /* warn if we try to combine flags that we don't know to be * safe to combine @@ -1097,7 +933,6 @@ static u32 osc_checksum_bulk(int nob, u32 pg_count, int i = 0; struct cfs_crypto_hash_desc *hdesc; unsigned int bufsize; - int err; unsigned char cfs_alg = cksum_obd2cfs(cksum_type); LASSERT(pg_count > 0); @@ -1139,7 +974,7 @@ static u32 osc_checksum_bulk(int nob, u32 pg_count, } bufsize = sizeof(cksum); - err = cfs_crypto_hash_final(hdesc, (unsigned char *)&cksum, &bufsize); + cfs_crypto_hash_final(hdesc, (unsigned char *)&cksum, &bufsize); /* For sending we only compute the wrong checksum instead * of corrupting the data so it is still correct on a redo @@ -1151,8 +986,7 @@ static u32 osc_checksum_bulk(int nob, u32 pg_count, } static int osc_brw_prep_request(int cmd, struct client_obd *cli, - struct obdo *oa, - struct lov_stripe_md *lsm, u32 page_count, + struct obdo *oa, u32 page_count, struct brw_page **pga, struct ptlrpc_request **reqp, int reserve, @@ -1210,8 +1044,9 @@ static int osc_brw_prep_request(int cmd, struct client_obd *cli, desc = ptlrpc_prep_bulk_imp(req, page_count, cli->cl_import->imp_connect_data.ocd_brw_size >> LNET_MTU_BITS, - opc == OST_WRITE ? BULK_GET_SOURCE : BULK_PUT_SINK, - OST_BULK_PORTAL); + (opc == OST_WRITE ? PTLRPC_BULK_GET_SOURCE : + PTLRPC_BULK_PUT_SINK) | PTLRPC_BULK_BUF_KIOV, OST_BULK_PORTAL, + &ptlrpc_bulk_kiov_pin_ops); if (!desc) { rc = -ENOMEM; @@ -1259,7 +1094,7 @@ static int osc_brw_prep_request(int cmd, struct client_obd *cli, LASSERT((pga[0]->flag & OBD_BRW_SRVLOCK) == (pg->flag & OBD_BRW_SRVLOCK)); - ptlrpc_prep_bulk_page_pin(desc, pg->pg, poff, pg->count); + desc->bd_frag_ops->add_kiov_frag(desc, pg->pg, poff, pg->count); requested_nob += pg->count; if (i > 0 && can_merge_pages(pg_prev, pg)) { @@ -1569,7 +1404,6 @@ static int osc_brw_redo_request(struct ptlrpc_request *request, rc = osc_brw_prep_request(lustre_msg_get_opc(request->rq_reqmsg) == OST_WRITE ? OBD_BRW_WRITE : OBD_BRW_READ, aa->aa_cli, aa->aa_oa, - NULL /* lsm unused by osc currently */, aa->aa_page_count, aa->aa_ppga, &new_req, 0, 1); if (rc) @@ -1764,8 +1598,6 @@ static int brw_interpret(const struct lu_env *env, LASSERT(list_empty(&aa->aa_exts)); LASSERT(list_empty(&aa->aa_oaps)); - cl_req_completion(env, aa->aa_clerq, rc < 0 ? rc : - req->rq_bulk->bd_nob_transferred); osc_release_ppga(aa->aa_ppga, aa->aa_page_count); ptlrpc_lprocfs_brw(req, req->rq_bulk->bd_nob_transferred); @@ -1818,9 +1650,7 @@ int osc_build_rpc(const struct lu_env *env, struct client_obd *cli, struct osc_brw_async_args *aa = NULL; struct obdo *oa = NULL; struct osc_async_page *oap; - struct osc_async_page *tmp; - struct cl_req *clerq = NULL; - enum cl_req_type crt = (cmd & OBD_BRW_WRITE) ? CRT_WRITE : CRT_READ; + struct osc_object *obj = NULL; struct cl_req_attr *crattr = NULL; u64 starting_offset = OBD_OBJECT_EOF; u64 ending_offset = 0; @@ -1828,6 +1658,7 @@ int osc_build_rpc(const struct lu_env *env, struct client_obd *cli, int mem_tight = 0; int page_count = 0; bool soft_sync = false; + bool interrupted = false; int i; int rc; struct ost_body *body; @@ -1839,32 +1670,15 @@ int osc_build_rpc(const struct lu_env *env, struct client_obd *cli, list_for_each_entry(ext, ext_list, oe_link) { LASSERT(ext->oe_state == OES_RPC); mem_tight |= ext->oe_memalloc; - list_for_each_entry(oap, &ext->oe_pages, oap_pending_item) { - ++page_count; - list_add_tail(&oap->oap_rpc_item, &rpc_list); - if (starting_offset > oap->oap_obj_off) - starting_offset = oap->oap_obj_off; - else - LASSERT(oap->oap_page_off == 0); - if (ending_offset < oap->oap_obj_off + oap->oap_count) - ending_offset = oap->oap_obj_off + - oap->oap_count; - else - LASSERT(oap->oap_page_off + oap->oap_count == - PAGE_SIZE); - } + page_count += ext->oe_nr_pages; + if (!obj) + obj = ext->oe_obj; } soft_sync = osc_over_unstable_soft_limit(cli); if (mem_tight) mpflag = cfs_memory_pressure_get_and_set(); - crattr = kzalloc(sizeof(*crattr), GFP_NOFS); - if (!crattr) { - rc = -ENOMEM; - goto out; - } - pga = kcalloc(page_count, sizeof(*pga), GFP_NOFS); if (!pga) { rc = -ENOMEM; @@ -1878,44 +1692,46 @@ int osc_build_rpc(const struct lu_env *env, struct client_obd *cli, } i = 0; - list_for_each_entry(oap, &rpc_list, oap_rpc_item) { - struct cl_page *page = oap2cl_page(oap); - - if (!clerq) { - clerq = cl_req_alloc(env, page, crt, - 1 /* only 1-object rpcs for now */); - if (IS_ERR(clerq)) { - rc = PTR_ERR(clerq); - goto out; - } + list_for_each_entry(ext, ext_list, oe_link) { + list_for_each_entry(oap, &ext->oe_pages, oap_pending_item) { + if (mem_tight) + oap->oap_brw_flags |= OBD_BRW_MEMALLOC; + if (soft_sync) + oap->oap_brw_flags |= OBD_BRW_SOFT_SYNC; + pga[i] = &oap->oap_brw_page; + pga[i]->off = oap->oap_obj_off + oap->oap_page_off; + i++; + + list_add_tail(&oap->oap_rpc_item, &rpc_list); + if (starting_offset == OBD_OBJECT_EOF || + starting_offset > oap->oap_obj_off) + starting_offset = oap->oap_obj_off; + else + LASSERT(!oap->oap_page_off); + if (ending_offset < oap->oap_obj_off + oap->oap_count) + ending_offset = oap->oap_obj_off + + oap->oap_count; + else + LASSERT(oap->oap_page_off + oap->oap_count == + PAGE_SIZE); + if (oap->oap_interrupted) + interrupted = true; } - if (mem_tight) - oap->oap_brw_flags |= OBD_BRW_MEMALLOC; - if (soft_sync) - oap->oap_brw_flags |= OBD_BRW_SOFT_SYNC; - pga[i] = &oap->oap_brw_page; - pga[i]->off = oap->oap_obj_off + oap->oap_page_off; - CDEBUG(0, "put page %p index %lu oap %p flg %x to pga\n", - pga[i]->pg, oap->oap_page->index, oap, - pga[i]->flag); - i++; - cl_req_page_add(env, clerq, page); } - /* always get the data for the obdo for the rpc */ - LASSERT(clerq); - crattr->cra_oa = oa; - cl_req_attr_set(env, clerq, crattr, ~0ULL); + /* first page in the list */ + oap = list_entry(rpc_list.next, typeof(*oap), oap_rpc_item); - rc = cl_req_prep(env, clerq); - if (rc != 0) { - CERROR("cl_req_prep failed: %d\n", rc); - goto out; - } + crattr = &osc_env_info(env)->oti_req_attr; + memset(crattr, 0, sizeof(*crattr)); + crattr->cra_type = (cmd & OBD_BRW_WRITE) ? CRT_WRITE : CRT_READ; + crattr->cra_flags = ~0ULL; + crattr->cra_page = oap2cl_page(oap); + crattr->cra_oa = oa; + cl_req_attr_set(env, osc2cl(obj), crattr); sort_brw_pages(pga, page_count); - rc = osc_brw_prep_request(cmd, cli, oa, NULL, page_count, - pga, &req, 1, 0); + rc = osc_brw_prep_request(cmd, cli, oa, page_count, pga, &req, 1, 0); if (rc != 0) { CERROR("prep_req failed: %d\n", rc); goto out; @@ -1924,8 +1740,10 @@ int osc_build_rpc(const struct lu_env *env, struct client_obd *cli, req->rq_commit_cb = brw_commit; req->rq_interpret_reply = brw_interpret; - if (mem_tight != 0) - req->rq_memalloc = 1; + req->rq_memalloc = mem_tight != 0; + oap->oap_request = ptlrpc_request_addref(req); + if (interrupted && !req->rq_intr) + ptlrpc_mark_interrupted(req); /* Need to update the timestamps after the request is built in case * we race with setattr (locally or in queue at OST). If OST gets @@ -1935,9 +1753,8 @@ int osc_build_rpc(const struct lu_env *env, struct client_obd *cli, */ body = req_capsule_client_get(&req->rq_pill, &RMF_OST_BODY); crattr->cra_oa = &body->oa; - cl_req_attr_set(env, clerq, crattr, - OBD_MD_FLMTIME | OBD_MD_FLCTIME | OBD_MD_FLATIME); - + crattr->cra_flags = OBD_MD_FLMTIME | OBD_MD_FLCTIME | OBD_MD_FLATIME; + cl_req_attr_set(env, osc2cl(obj), crattr); lustre_msg_set_jobid(req->rq_reqmsg, crattr->cra_jobid); CLASSERT(sizeof(*aa) <= sizeof(req->rq_async_args)); @@ -1946,24 +1763,6 @@ int osc_build_rpc(const struct lu_env *env, struct client_obd *cli, list_splice_init(&rpc_list, &aa->aa_oaps); INIT_LIST_HEAD(&aa->aa_exts); list_splice_init(ext_list, &aa->aa_exts); - aa->aa_clerq = clerq; - - /* queued sync pages can be torn down while the pages - * were between the pending list and the rpc - */ - tmp = NULL; - list_for_each_entry(oap, &aa->aa_oaps, oap_rpc_item) { - /* only one oap gets a request reference */ - if (!tmp) - tmp = oap; - if (oap->oap_interrupted && !req->rq_intr) { - CDEBUG(D_INODE, "oap %p in req %p interrupted\n", - oap, req); - ptlrpc_mark_interrupted(req); - } - } - if (tmp) - tmp->oap_request = ptlrpc_request_addref(req); spin_lock(&cli->cl_loi_list_lock); starting_offset >>= PAGE_SHIFT; @@ -1985,6 +1784,7 @@ int osc_build_rpc(const struct lu_env *env, struct client_obd *cli, DEBUG_REQ(D_INODE, req, "%d pages, aa %p. now %ur/%dw in flight", page_count, aa, cli->cl_r_in_flight, cli->cl_w_in_flight); + OBD_FAIL_TIMEOUT(OBD_FAIL_OSC_DELAY_IO, cfs_fail_val); ptlrpcd_add_req(req); rc = 0; @@ -1993,8 +1793,6 @@ out: if (mem_tight != 0) cfs_memory_pressure_restore(mpflag); - kfree(crattr); - if (rc != 0) { LASSERT(!req); @@ -2010,22 +1808,15 @@ out: list_del_init(&ext->oe_link); osc_extent_finish(env, ext, 0, rc); } - if (clerq && !IS_ERR(clerq)) - cl_req_completion(env, clerq, rc); } return rc; } -static int osc_set_lock_data_with_check(struct ldlm_lock *lock, - struct ldlm_enqueue_info *einfo) +static int osc_set_lock_data(struct ldlm_lock *lock, void *data) { - void *data = einfo->ei_cbdata; int set = 0; - LASSERT(lock->l_blocking_ast == einfo->ei_cb_bl); - LASSERT(lock->l_resource->lr_type == einfo->ei_type); - LASSERT(lock->l_completion_ast == einfo->ei_cb_cp); - LASSERT(lock->l_glimpse_ast == einfo->ei_cb_gl); + LASSERT(lock); lock_res_and_lock(lock); @@ -2039,21 +1830,6 @@ static int osc_set_lock_data_with_check(struct ldlm_lock *lock, return set; } -static int osc_set_data_with_check(struct lustre_handle *lockh, - struct ldlm_enqueue_info *einfo) -{ - struct ldlm_lock *lock = ldlm_handle2lock(lockh); - int set = 0; - - if (lock) { - set = osc_set_lock_data_with_check(lock, einfo); - LDLM_LOCK_PUT(lock); - } else - CERROR("lockh %p, data %p - client evicted?\n", - lockh, einfo->ei_cbdata); - return set; -} - static int osc_enqueue_fini(struct ptlrpc_request *req, osc_enqueue_upcall_f upcall, void *cookie, struct lustre_handle *lockh, enum ldlm_mode mode, @@ -2153,7 +1929,7 @@ struct ptlrpc_request_set *PTLRPCD_SET = (void *)1; * release locks just after they are obtained. */ int osc_enqueue_base(struct obd_export *exp, struct ldlm_res_id *res_id, - __u64 *flags, ldlm_policy_data_t *policy, + __u64 *flags, union ldlm_policy_data *policy, struct ost_lvb *lvb, int kms_valid, osc_enqueue_upcall_f upcall, void *cookie, struct ldlm_enqueue_info *einfo, @@ -2219,7 +1995,7 @@ int osc_enqueue_base(struct obd_export *exp, struct ldlm_res_id *res_id, ldlm_lock_decref(&lockh, mode); LDLM_LOCK_PUT(matched); return -ECANCELED; - } else if (osc_set_lock_data_with_check(matched, einfo)) { + } else if (osc_set_lock_data(matched, einfo->ei_cbdata)) { *flags |= LDLM_FL_LVB_READY; /* We already have a lock, and it's referenced. */ (*upcall)(cookie, &lockh, ELDLM_LOCK_MATCHED); @@ -2304,7 +2080,7 @@ no_match: } int osc_match_base(struct obd_export *exp, struct ldlm_res_id *res_id, - __u32 type, ldlm_policy_data_t *policy, __u32 mode, + __u32 type, union ldlm_policy_data *policy, __u32 mode, __u64 *flags, void *data, struct lustre_handle *lockh, int unref) { @@ -2331,31 +2107,20 @@ int osc_match_base(struct obd_export *exp, struct ldlm_res_id *res_id, rc |= LCK_PW; rc = ldlm_lock_match(obd->obd_namespace, lflags, res_id, type, policy, rc, lockh, unref); - if (rc) { - if (data) { - if (!osc_set_data_with_check(lockh, data)) { - if (!(lflags & LDLM_FL_TEST_LOCK)) - ldlm_lock_decref(lockh, rc); - return 0; - } - } - if (!(lflags & LDLM_FL_TEST_LOCK) && mode != rc) { - ldlm_lock_addref(lockh, LCK_PR); - ldlm_lock_decref(lockh, LCK_PW); - } + if (!rc || lflags & LDLM_FL_TEST_LOCK) return rc; - } - return rc; -} -int osc_cancel_base(struct lustre_handle *lockh, __u32 mode) -{ - if (unlikely(mode == LCK_GROUP)) - ldlm_lock_decref_and_cancel(lockh, mode); - else - ldlm_lock_decref(lockh, mode); + if (data) { + struct ldlm_lock *lock = ldlm_handle2lock(lockh); - return 0; + LASSERT(lock); + if (!osc_set_lock_data(lock, data)) { + ldlm_lock_decref(lockh, rc); + rc = 0; + } + LDLM_LOCK_PUT(lock); + } + return rc; } static int osc_statfs_interpret(const struct lu_env *env, @@ -2526,9 +2291,6 @@ static int osc_iocontrol(unsigned int cmd, struct obd_export *exp, int len, err = ptlrpc_set_import_active(obd->u.cli.cl_import, data->ioc_offset); goto out; - case OBD_IOC_POLL_QUOTACHECK: - err = osc_quota_poll_check(exp, karg); - goto out; case OBD_IOC_PING_TARGET: err = ptlrpc_obd_ping(obd); goto out; @@ -2543,103 +2305,6 @@ out: return err; } -static int osc_get_info(const struct lu_env *env, struct obd_export *exp, - u32 keylen, void *key, __u32 *vallen, void *val, - struct lov_stripe_md *lsm) -{ - if (!vallen || !val) - return -EFAULT; - - if (KEY_IS(KEY_FIEMAP)) { - struct ll_fiemap_info_key *fm_key = key; - struct ldlm_res_id res_id; - ldlm_policy_data_t policy; - struct lustre_handle lockh; - enum ldlm_mode mode = 0; - struct ptlrpc_request *req; - struct ll_user_fiemap *reply; - char *tmp; - int rc; - - if (!(fm_key->fiemap.fm_flags & FIEMAP_FLAG_SYNC)) - goto skip_locking; - - policy.l_extent.start = fm_key->fiemap.fm_start & - PAGE_MASK; - - if (OBD_OBJECT_EOF - fm_key->fiemap.fm_length <= - fm_key->fiemap.fm_start + PAGE_SIZE - 1) - policy.l_extent.end = OBD_OBJECT_EOF; - else - policy.l_extent.end = (fm_key->fiemap.fm_start + - fm_key->fiemap.fm_length + - PAGE_SIZE - 1) & PAGE_MASK; - - ostid_build_res_name(&fm_key->oa.o_oi, &res_id); - mode = ldlm_lock_match(exp->exp_obd->obd_namespace, - LDLM_FL_BLOCK_GRANTED | - LDLM_FL_LVB_READY, - &res_id, LDLM_EXTENT, &policy, - LCK_PR | LCK_PW, &lockh, 0); - if (mode) { /* lock is cached on client */ - if (mode != LCK_PR) { - ldlm_lock_addref(&lockh, LCK_PR); - ldlm_lock_decref(&lockh, LCK_PW); - } - } else { /* no cached lock, needs acquire lock on server side */ - fm_key->oa.o_valid |= OBD_MD_FLFLAGS; - fm_key->oa.o_flags |= OBD_FL_SRVLOCK; - } - -skip_locking: - req = ptlrpc_request_alloc(class_exp2cliimp(exp), - &RQF_OST_GET_INFO_FIEMAP); - if (!req) { - rc = -ENOMEM; - goto drop_lock; - } - - req_capsule_set_size(&req->rq_pill, &RMF_FIEMAP_KEY, - RCL_CLIENT, keylen); - req_capsule_set_size(&req->rq_pill, &RMF_FIEMAP_VAL, - RCL_CLIENT, *vallen); - req_capsule_set_size(&req->rq_pill, &RMF_FIEMAP_VAL, - RCL_SERVER, *vallen); - - rc = ptlrpc_request_pack(req, LUSTRE_OST_VERSION, OST_GET_INFO); - if (rc) { - ptlrpc_request_free(req); - goto drop_lock; - } - - tmp = req_capsule_client_get(&req->rq_pill, &RMF_FIEMAP_KEY); - memcpy(tmp, key, keylen); - tmp = req_capsule_client_get(&req->rq_pill, &RMF_FIEMAP_VAL); - memcpy(tmp, val, *vallen); - - ptlrpc_request_set_replen(req); - rc = ptlrpc_queue_wait(req); - if (rc) - goto fini_req; - - reply = req_capsule_server_get(&req->rq_pill, &RMF_FIEMAP_VAL); - if (!reply) { - rc = -EPROTO; - goto fini_req; - } - - memcpy(val, reply, *vallen); -fini_req: - ptlrpc_req_finished(req); -drop_lock: - if (mode) - ldlm_lock_decref(&lockh, LCK_PR); - return rc; - } - - return -EINVAL; -} - static int osc_set_info_async(const struct lu_env *env, struct obd_export *exp, u32 keylen, void *key, u32 vallen, void *val, struct ptlrpc_request_set *set) @@ -2999,47 +2664,33 @@ out_ptlrpcd: return rc; } -static int osc_precleanup(struct obd_device *obd, enum obd_cleanup_stage stage) +static int osc_precleanup(struct obd_device *obd) { - switch (stage) { - case OBD_CLEANUP_EARLY: { - struct obd_import *imp; - - imp = obd->u.cli.cl_import; - CDEBUG(D_HA, "Deactivating import %s\n", obd->obd_name); - /* ptlrpc_abort_inflight to stop an mds_lov_synchronize */ - ptlrpc_deactivate_import(imp); - spin_lock(&imp->imp_lock); - imp->imp_pingable = 0; - spin_unlock(&imp->imp_lock); - break; + struct client_obd *cli = &obd->u.cli; + + /* LU-464 + * for echo client, export may be on zombie list, wait for + * zombie thread to cull it, because cli.cl_import will be + * cleared in client_disconnect_export(): + * class_export_destroy() -> obd_cleanup() -> + * echo_device_free() -> echo_client_cleanup() -> + * obd_disconnect() -> osc_disconnect() -> + * client_disconnect_export() + */ + obd_zombie_barrier(); + if (cli->cl_writeback_work) { + ptlrpcd_destroy_work(cli->cl_writeback_work); + cli->cl_writeback_work = NULL; } - case OBD_CLEANUP_EXPORTS: { - struct client_obd *cli = &obd->u.cli; - /* LU-464 - * for echo client, export may be on zombie list, wait for - * zombie thread to cull it, because cli.cl_import will be - * cleared in client_disconnect_export(): - * class_export_destroy() -> obd_cleanup() -> - * echo_device_free() -> echo_client_cleanup() -> - * obd_disconnect() -> osc_disconnect() -> - * client_disconnect_export() - */ - obd_zombie_barrier(); - if (cli->cl_writeback_work) { - ptlrpcd_destroy_work(cli->cl_writeback_work); - cli->cl_writeback_work = NULL; - } - if (cli->cl_lru_work) { - ptlrpcd_destroy_work(cli->cl_lru_work); - cli->cl_lru_work = NULL; - } - obd_cleanup_client_import(obd); - ptlrpc_lprocfs_unregister_obd(obd); - lprocfs_obd_cleanup(obd); - break; - } + + if (cli->cl_lru_work) { + ptlrpcd_destroy_work(cli->cl_lru_work); + cli->cl_lru_work = NULL; } + + obd_cleanup_client_import(obd); + ptlrpc_lprocfs_unregister_obd(obd); + lprocfs_obd_cleanup(obd); return 0; } @@ -3104,24 +2755,18 @@ static struct obd_ops osc_obd_ops = { .disconnect = osc_disconnect, .statfs = osc_statfs, .statfs_async = osc_statfs_async, - .unpackmd = osc_unpackmd, .create = osc_create, .destroy = osc_destroy, .getattr = osc_getattr, - .getattr_async = osc_getattr_async, .setattr = osc_setattr, - .setattr_async = osc_setattr_async, .iocontrol = osc_iocontrol, - .get_info = osc_get_info, .set_info_async = osc_set_info_async, .import_event = osc_import_event, .process_config = osc_process_config, .quotactl = osc_quotactl, - .quotacheck = osc_quotacheck, }; extern struct lu_kmem_descr osc_caches[]; -extern struct lock_class_key osc_ast_guard_class; static int __init osc_init(void) { diff --git a/drivers/staging/lustre/lustre/ptlrpc/client.c b/drivers/staging/lustre/lustre/ptlrpc/client.c index 8c51d51a678b..804741362bc0 100644 --- a/drivers/staging/lustre/lustre/ptlrpc/client.c +++ b/drivers/staging/lustre/lustre/ptlrpc/client.c @@ -43,6 +43,18 @@ #include "ptlrpc_internal.h" +const struct ptlrpc_bulk_frag_ops ptlrpc_bulk_kiov_pin_ops = { + .add_kiov_frag = ptlrpc_prep_bulk_page_pin, + .release_frags = ptlrpc_release_bulk_page_pin, +}; +EXPORT_SYMBOL(ptlrpc_bulk_kiov_pin_ops); + +const struct ptlrpc_bulk_frag_ops ptlrpc_bulk_kiov_nopin_ops = { + .add_kiov_frag = ptlrpc_prep_bulk_page_nopin, + .release_frags = NULL, +}; +EXPORT_SYMBOL(ptlrpc_bulk_kiov_nopin_ops); + static int ptlrpc_send_new_req(struct ptlrpc_request *req); static int ptlrpcd_check_work(struct ptlrpc_request *req); static int ptlrpc_unregister_reply(struct ptlrpc_request *request, int async); @@ -95,24 +107,43 @@ struct ptlrpc_connection *ptlrpc_uuid_to_connection(struct obd_uuid *uuid) * Allocate and initialize new bulk descriptor on the sender. * Returns pointer to the descriptor or NULL on error. */ -struct ptlrpc_bulk_desc *ptlrpc_new_bulk(unsigned npages, unsigned max_brw, - unsigned type, unsigned portal) +struct ptlrpc_bulk_desc *ptlrpc_new_bulk(unsigned int nfrags, + unsigned int max_brw, + enum ptlrpc_bulk_op_type type, + unsigned int portal, + const struct ptlrpc_bulk_frag_ops *ops) { struct ptlrpc_bulk_desc *desc; int i; - desc = kzalloc(offsetof(struct ptlrpc_bulk_desc, bd_iov[npages]), - GFP_NOFS); + /* ensure that only one of KIOV or IOVEC is set but not both */ + LASSERT((ptlrpc_is_bulk_desc_kiov(type) && ops->add_kiov_frag) || + (ptlrpc_is_bulk_desc_kvec(type) && ops->add_iov_frag)); + + desc = kzalloc(sizeof(*desc), GFP_NOFS); if (!desc) return NULL; + if (type & PTLRPC_BULK_BUF_KIOV) { + GET_KIOV(desc) = kcalloc(nfrags, sizeof(*GET_KIOV(desc)), + GFP_NOFS); + if (!GET_KIOV(desc)) + goto free_desc; + } else { + GET_KVEC(desc) = kcalloc(nfrags, sizeof(*GET_KVEC(desc)), + GFP_NOFS); + if (!GET_KVEC(desc)) + goto free_desc; + } + spin_lock_init(&desc->bd_lock); init_waitqueue_head(&desc->bd_waitq); - desc->bd_max_iov = npages; + desc->bd_max_iov = nfrags; desc->bd_iov_count = 0; desc->bd_portal = portal; desc->bd_type = type; desc->bd_md_count = 0; + desc->bd_frag_ops = (struct ptlrpc_bulk_frag_ops *)ops; LASSERT(max_brw > 0); desc->bd_md_max_brw = min(max_brw, PTLRPC_BULK_OPS_COUNT); /* @@ -123,24 +154,31 @@ struct ptlrpc_bulk_desc *ptlrpc_new_bulk(unsigned npages, unsigned max_brw, LNetInvalidateHandle(&desc->bd_mds[i]); return desc; +free_desc: + kfree(desc); + return NULL; } /** * Prepare bulk descriptor for specified outgoing request \a req that - * can fit \a npages * pages. \a type is bulk type. \a portal is where + * can fit \a nfrags * pages. \a type is bulk type. \a portal is where * the bulk to be sent. Used on client-side. * Returns pointer to newly allocated initialized bulk descriptor or NULL on * error. */ struct ptlrpc_bulk_desc *ptlrpc_prep_bulk_imp(struct ptlrpc_request *req, - unsigned npages, unsigned max_brw, - unsigned type, unsigned portal) + unsigned int nfrags, + unsigned int max_brw, + unsigned int type, + unsigned int portal, + const struct ptlrpc_bulk_frag_ops *ops) { struct obd_import *imp = req->rq_import; struct ptlrpc_bulk_desc *desc; - LASSERT(type == BULK_PUT_SINK || type == BULK_GET_SOURCE); - desc = ptlrpc_new_bulk(npages, max_brw, type, portal); + LASSERT(ptlrpc_is_bulk_op_passive(type)); + + desc = ptlrpc_new_bulk(nfrags, max_brw, type, portal, ops); if (!desc) return NULL; @@ -158,56 +196,82 @@ struct ptlrpc_bulk_desc *ptlrpc_prep_bulk_imp(struct ptlrpc_request *req, } EXPORT_SYMBOL(ptlrpc_prep_bulk_imp); -/** - * Add a page \a page to the bulk descriptor \a desc. - * Data to transfer in the page starts at offset \a pageoffset and - * amount of data to transfer from the page is \a len - */ void __ptlrpc_prep_bulk_page(struct ptlrpc_bulk_desc *desc, struct page *page, int pageoffset, int len, int pin) { + struct bio_vec *kiov; + LASSERT(desc->bd_iov_count < desc->bd_max_iov); LASSERT(page); LASSERT(pageoffset >= 0); LASSERT(len > 0); LASSERT(pageoffset + len <= PAGE_SIZE); + LASSERT(ptlrpc_is_bulk_desc_kiov(desc->bd_type)); + + kiov = &BD_GET_KIOV(desc, desc->bd_iov_count); desc->bd_nob += len; if (pin) get_page(page); - ptlrpc_add_bulk_page(desc, page, pageoffset, len); + kiov->bv_page = page; + kiov->bv_offset = pageoffset; + kiov->bv_len = len; + + desc->bd_iov_count++; } EXPORT_SYMBOL(__ptlrpc_prep_bulk_page); -/** - * Uninitialize and free bulk descriptor \a desc. - * Works on bulk descriptors both from server and client side. - */ -void __ptlrpc_free_bulk(struct ptlrpc_bulk_desc *desc, int unpin) +int ptlrpc_prep_bulk_frag(struct ptlrpc_bulk_desc *desc, + void *frag, int len) { - int i; + struct kvec *iovec; + + LASSERT(desc->bd_iov_count < desc->bd_max_iov); + LASSERT(frag); + LASSERT(len > 0); + LASSERT(ptlrpc_is_bulk_desc_kvec(desc->bd_type)); + iovec = &BD_GET_KVEC(desc, desc->bd_iov_count); + + desc->bd_nob += len; + + iovec->iov_base = frag; + iovec->iov_len = len; + + desc->bd_iov_count++; + + return desc->bd_nob; +} +EXPORT_SYMBOL(ptlrpc_prep_bulk_frag); + +void ptlrpc_free_bulk(struct ptlrpc_bulk_desc *desc) +{ LASSERT(desc->bd_iov_count != LI_POISON); /* not freed already */ LASSERT(desc->bd_md_count == 0); /* network hands off */ LASSERT((desc->bd_export != NULL) ^ (desc->bd_import != NULL)); + LASSERT(desc->bd_frag_ops); - sptlrpc_enc_pool_put_pages(desc); + if (ptlrpc_is_bulk_desc_kiov(desc->bd_type)) + sptlrpc_enc_pool_put_pages(desc); if (desc->bd_export) class_export_put(desc->bd_export); else class_import_put(desc->bd_import); - if (unpin) { - for (i = 0; i < desc->bd_iov_count; i++) - put_page(desc->bd_iov[i].bv_page); - } + if (desc->bd_frag_ops->release_frags) + desc->bd_frag_ops->release_frags(desc); + + if (ptlrpc_is_bulk_desc_kiov(desc->bd_type)) + kfree(GET_KIOV(desc)); + else + kfree(GET_KVEC(desc)); kfree(desc); } -EXPORT_SYMBOL(__ptlrpc_free_bulk); +EXPORT_SYMBOL(ptlrpc_free_bulk); /** * Set server timelimit for this req, i.e. how long are we willing to wait @@ -589,6 +653,42 @@ static void __ptlrpc_free_req_to_pool(struct ptlrpc_request *request) spin_unlock(&pool->prp_lock); } +void ptlrpc_add_unreplied(struct ptlrpc_request *req) +{ + struct obd_import *imp = req->rq_import; + struct list_head *tmp; + struct ptlrpc_request *iter; + + assert_spin_locked(&imp->imp_lock); + LASSERT(list_empty(&req->rq_unreplied_list)); + + /* unreplied list is sorted by xid in ascending order */ + list_for_each_prev(tmp, &imp->imp_unreplied_list) { + iter = list_entry(tmp, struct ptlrpc_request, + rq_unreplied_list); + + LASSERT(req->rq_xid != iter->rq_xid); + if (req->rq_xid < iter->rq_xid) + continue; + list_add(&req->rq_unreplied_list, &iter->rq_unreplied_list); + return; + } + list_add(&req->rq_unreplied_list, &imp->imp_unreplied_list); +} + +void ptlrpc_assign_next_xid_nolock(struct ptlrpc_request *req) +{ + req->rq_xid = ptlrpc_next_xid(); + ptlrpc_add_unreplied(req); +} + +static inline void ptlrpc_assign_next_xid(struct ptlrpc_request *req) +{ + spin_lock(&req->rq_import->imp_lock); + ptlrpc_assign_next_xid_nolock(req); + spin_unlock(&req->rq_import->imp_lock); +} + int ptlrpc_request_bufs_pack(struct ptlrpc_request *request, __u32 version, int opcode, char **bufs, struct ptlrpc_cli_ctx *ctx) @@ -637,8 +737,8 @@ int ptlrpc_request_bufs_pack(struct ptlrpc_request *request, ptlrpc_at_set_req_timeout(request); - request->rq_xid = ptlrpc_next_xid(); lustre_msg_set_opc(request->rq_reqmsg, opcode); + ptlrpc_assign_next_xid(request); /* Let's setup deadline for req/reply/bulk unlink for opcode. */ if (cfs_fail_val == opcode) { @@ -1129,7 +1229,9 @@ static int ptlrpc_check_status(struct ptlrpc_request *req) lnet_nid_t nid = imp->imp_connection->c_peer.nid; __u32 opc = lustre_msg_get_opc(req->rq_reqmsg); - if (ptlrpc_console_allow(req)) + /* -EAGAIN is normal when using POSIX flocks */ + if (ptlrpc_console_allow(req) && + !(opc == LDLM_ENQUEUE && err == -EAGAIN)) LCONSOLE_ERROR_MSG(0x011, "%s: operation %s to node %s failed: rc = %d\n", imp->imp_obd->obd_name, ll_opcode2str(opc), @@ -1166,6 +1268,24 @@ static void ptlrpc_save_versions(struct ptlrpc_request *req) versions[0], versions[1]); } +__u64 ptlrpc_known_replied_xid(struct obd_import *imp) +{ + struct ptlrpc_request *req; + + assert_spin_locked(&imp->imp_lock); + if (list_empty(&imp->imp_unreplied_list)) + return 0; + + req = list_entry(imp->imp_unreplied_list.next, struct ptlrpc_request, + rq_unreplied_list); + LASSERTF(req->rq_xid >= 1, "XID:%llu\n", req->rq_xid); + + if (imp->imp_known_replied_xid < req->rq_xid - 1) + imp->imp_known_replied_xid = req->rq_xid - 1; + + return req->rq_xid - 1; +} + /** * Callback function called when client receives RPC reply for \a req. * Returns 0 on success or error code. @@ -1180,6 +1300,7 @@ static int after_reply(struct ptlrpc_request *req) int rc; struct timespec64 work_start; long timediff; + u64 committed; LASSERT(obd); /* repbuf must be unlinked */ @@ -1206,6 +1327,10 @@ static int after_reply(struct ptlrpc_request *req) return 0; } + ktime_get_real_ts64(&work_start); + timediff = (work_start.tv_sec - req->rq_sent_tv.tv_sec) * USEC_PER_SEC + + (work_start.tv_nsec - req->rq_sent_tv.tv_nsec) / + NSEC_PER_USEC; /* * NB Until this point, the whole of the incoming message, * including buflens, status etc is in the sender's byte order. @@ -1235,13 +1360,6 @@ static int after_reply(struct ptlrpc_request *req) spin_unlock(&req->rq_lock); req->rq_nr_resend++; - /* allocate new xid to avoid reply reconstruction */ - if (!req->rq_bulk) { - /* new xid is already allocated for bulk in ptlrpc_check_set() */ - req->rq_xid = ptlrpc_next_xid(); - DEBUG_REQ(D_RPCTRACE, req, "Allocating new xid for resend on EINPROGRESS"); - } - /* Readjust the timeout for current conditions */ ptlrpc_at_set_req_timeout(req); /* @@ -1255,13 +1373,14 @@ static int after_reply(struct ptlrpc_request *req) else req->rq_sent = now + req->rq_nr_resend; + /* Resend for EINPROGRESS will use a new XID */ + spin_lock(&imp->imp_lock); + list_del_init(&req->rq_unreplied_list); + spin_unlock(&imp->imp_lock); + return 0; } - ktime_get_real_ts64(&work_start); - timediff = (work_start.tv_sec - req->rq_sent_tv.tv_sec) * USEC_PER_SEC + - (work_start.tv_nsec - req->rq_sent_tv.tv_nsec) / - NSEC_PER_USEC; if (obd->obd_svc_stats) { lprocfs_counter_add(obd->obd_svc_stats, PTLRPC_REQWAIT_CNTR, timediff); @@ -1338,10 +1457,9 @@ static int after_reply(struct ptlrpc_request *req) } /* Replay-enabled imports return commit-status information. */ - if (lustre_msg_get_last_committed(req->rq_repmsg)) { - imp->imp_peer_committed_transno = - lustre_msg_get_last_committed(req->rq_repmsg); - } + committed = lustre_msg_get_last_committed(req->rq_repmsg); + if (likely(committed > imp->imp_peer_committed_transno)) + imp->imp_peer_committed_transno = committed; ptlrpc_free_committed(imp); @@ -1373,9 +1491,17 @@ static int after_reply(struct ptlrpc_request *req) static int ptlrpc_send_new_req(struct ptlrpc_request *req) { struct obd_import *imp = req->rq_import; + u64 min_xid = 0; int rc; LASSERT(req->rq_phase == RQ_PHASE_NEW); + + /* do not try to go further if there is not enough memory in enc_pool */ + if (req->rq_sent && req->rq_bulk) + if (req->rq_bulk->bd_iov_count > get_free_pages_in_pool() && + pool_is_at_full_capacity()) + return -ENOMEM; + if (req->rq_sent && (req->rq_sent > ktime_get_real_seconds()) && (!req->rq_generation_set || req->rq_import_generation == imp->imp_generation)) @@ -1385,6 +1511,9 @@ static int ptlrpc_send_new_req(struct ptlrpc_request *req) spin_lock(&imp->imp_lock); + LASSERT(req->rq_xid); + LASSERT(!list_empty(&req->rq_unreplied_list)); + if (!req->rq_generation_set) req->rq_import_generation = imp->imp_generation; @@ -1414,8 +1543,25 @@ static int ptlrpc_send_new_req(struct ptlrpc_request *req) LASSERT(list_empty(&req->rq_list)); list_add_tail(&req->rq_list, &imp->imp_sending_list); atomic_inc(&req->rq_import->imp_inflight); + + /* find the known replied XID from the unreplied list, CONNECT + * and DISCONNECT requests are skipped to make the sanity check + * on server side happy. see process_req_last_xid(). + * + * For CONNECT: Because replay requests have lower XID, it'll + * break the sanity check if CONNECT bump the exp_last_xid on + * server. + * + * For DISCONNECT: Since client will abort inflight RPC before + * sending DISCONNECT, DISCONNECT may carry an XID which higher + * than the inflight RPC. + */ + if (!ptlrpc_req_is_connect(req) && !ptlrpc_req_is_disconnect(req)) + min_xid = ptlrpc_known_replied_xid(imp); spin_unlock(&imp->imp_lock); + lustre_msg_set_last_xid(req->rq_reqmsg, min_xid); + lustre_msg_set_status(req->rq_reqmsg, current_pid()); rc = sptlrpc_req_refresh_ctx(req, -1); @@ -1438,6 +1584,16 @@ static int ptlrpc_send_new_req(struct ptlrpc_request *req) lustre_msg_get_opc(req->rq_reqmsg)); rc = ptl_send_rpc(req, 0); + if (rc == -ENOMEM) { + spin_lock(&imp->imp_lock); + if (!list_empty(&req->rq_list)) { + list_del_init(&req->rq_list); + atomic_dec(&req->rq_import->imp_inflight); + } + spin_unlock(&imp->imp_lock); + ptlrpc_rqphase_move(req, RQ_PHASE_NEW); + return rc; + } if (rc) { DEBUG_REQ(D_HA, req, "send failed (%d); expect timeout", rc); spin_lock(&req->rq_lock); @@ -1688,18 +1844,9 @@ int ptlrpc_check_set(const struct lu_env *env, struct ptlrpc_request_set *set) spin_lock(&req->rq_lock); req->rq_resend = 1; spin_unlock(&req->rq_lock); - if (req->rq_bulk) { - __u64 old_xid; - - if (!ptlrpc_unregister_bulk(req, 1)) - continue; - - /* ensure previous bulk fails */ - old_xid = req->rq_xid; - req->rq_xid = ptlrpc_next_xid(); - CDEBUG(D_HA, "resend bulk old x%llu new x%llu\n", - old_xid, req->rq_xid); - } + if (req->rq_bulk && + !ptlrpc_unregister_bulk(req, 1)) + continue; } /* * rq_wait_ctx is only touched by ptlrpcd, @@ -1727,6 +1874,14 @@ int ptlrpc_check_set(const struct lu_env *env, struct ptlrpc_request_set *set) } rc = ptl_send_rpc(req, 0); + if (rc == -ENOMEM) { + spin_lock(&imp->imp_lock); + if (!list_empty(&req->rq_list)) + list_del_init(&req->rq_list); + spin_unlock(&imp->imp_lock); + ptlrpc_rqphase_move(req, RQ_PHASE_NEW); + continue; + } if (rc) { DEBUG_REQ(D_HA, req, "send failed: rc = %d", rc); @@ -1850,6 +2005,7 @@ interpret: list_del_init(&req->rq_list); atomic_dec(&imp->imp_inflight); } + list_del_init(&req->rq_unreplied_list); spin_unlock(&imp->imp_lock); atomic_dec(&set->set_remaining); @@ -2247,6 +2403,7 @@ static void __ptlrpc_free_req(struct ptlrpc_request *request, int locked) if (!locked) spin_lock(&request->rq_import->imp_lock); list_del_init(&request->rq_replay_list); + list_del_init(&request->rq_unreplied_list); if (!locked) spin_unlock(&request->rq_import->imp_lock); } @@ -2266,7 +2423,7 @@ static void __ptlrpc_free_req(struct ptlrpc_request *request, int locked) request->rq_import = NULL; } if (request->rq_bulk) - ptlrpc_free_bulk_pin(request->rq_bulk); + ptlrpc_free_bulk(request->rq_bulk); if (request->rq_reqbuf || request->rq_clrbuf) sptlrpc_cli_free_reqbuf(request); @@ -2542,14 +2699,6 @@ void ptlrpc_resend_req(struct ptlrpc_request *req) req->rq_resend = 1; req->rq_net_err = 0; req->rq_timedout = 0; - if (req->rq_bulk) { - __u64 old_xid = req->rq_xid; - - /* ensure previous bulk fails */ - req->rq_xid = ptlrpc_next_xid(); - CDEBUG(D_HA, "resend bulk old x%llu new x%llu\n", - old_xid, req->rq_xid); - } ptlrpc_client_wake_req(req); spin_unlock(&req->rq_lock); } @@ -2592,6 +2741,10 @@ void ptlrpc_retain_replayable_request(struct ptlrpc_request *req, lustre_msg_add_flags(req->rq_reqmsg, MSG_REPLAY); + spin_lock(&req->rq_lock); + req->rq_resend = 0; + spin_unlock(&req->rq_lock); + LASSERT(imp->imp_replayable); /* Balanced in ptlrpc_free_committed, usually. */ ptlrpc_request_addref(req); @@ -2667,8 +2820,15 @@ static int ptlrpc_replay_interpret(const struct lu_env *env, atomic_dec(&imp->imp_replay_inflight); - if (!ptlrpc_client_replied(req)) { - CERROR("request replay timed out, restarting recovery\n"); + /* + * Note: if it is bulk replay (MDS-MDS replay), then even if + * server got the request, but bulk transfer timeout, let's + * replay the bulk req again + */ + if (!ptlrpc_client_replied(req) || + (req->rq_bulk && + lustre_msg_get_status(req->rq_repmsg) == -ETIMEDOUT)) { + DEBUG_REQ(D_ERROR, req, "request replay timed out.\n"); rc = -ETIMEDOUT; goto out; } @@ -2939,6 +3099,48 @@ __u64 ptlrpc_next_xid(void) } /** + * If request has a new allocated XID (new request or EINPROGRESS resend), + * use this XID as matchbits of bulk, otherwise allocate a new matchbits for + * request to ensure previous bulk fails and avoid problems with lost replies + * and therefore several transfers landing into the same buffer from different + * sending attempts. + */ +void ptlrpc_set_bulk_mbits(struct ptlrpc_request *req) +{ + struct ptlrpc_bulk_desc *bd = req->rq_bulk; + + LASSERT(bd); + + if (!req->rq_resend) { + /* this request has a new xid, just use it as bulk matchbits */ + req->rq_mbits = req->rq_xid; + + } else { /* needs to generate a new matchbits for resend */ + u64 old_mbits = req->rq_mbits; + + if ((bd->bd_import->imp_connect_data.ocd_connect_flags & + OBD_CONNECT_BULK_MBITS)) { + req->rq_mbits = ptlrpc_next_xid(); + } else { + /* old version transfers rq_xid to peer as matchbits */ + req->rq_mbits = ptlrpc_next_xid(); + req->rq_xid = req->rq_mbits; + } + + CDEBUG(D_HA, "resend bulk old x%llu new x%llu\n", + old_mbits, req->rq_mbits); + } + + /* + * For multi-bulk RPCs, rq_mbits is the last mbits needed for bulks so + * that server can infer the number of bulks that were prepared, + * see LU-1431 + */ + req->rq_mbits += ((bd->bd_iov_count + LNET_MAX_IOV - 1) / + LNET_MAX_IOV) - 1; +} + +/** * Get a glimpse at what next xid value might have been. * Returns possible next xid. */ diff --git a/drivers/staging/lustre/lustre/ptlrpc/connection.c b/drivers/staging/lustre/lustre/ptlrpc/connection.c index 7b020d60c9e5..6c7c8b68a909 100644 --- a/drivers/staging/lustre/lustre/ptlrpc/connection.c +++ b/drivers/staging/lustre/lustre/ptlrpc/connection.c @@ -152,8 +152,8 @@ void ptlrpc_connection_fini(void) /* * Hash operations for net_peer<->connection */ -static unsigned -conn_hashfn(struct cfs_hash *hs, const void *key, unsigned mask) +static unsigned int +conn_hashfn(struct cfs_hash *hs, const void *key, unsigned int mask) { return cfs_hash_djb2_hash(key, sizeof(lnet_process_id_t), mask); } diff --git a/drivers/staging/lustre/lustre/ptlrpc/events.c b/drivers/staging/lustre/lustre/ptlrpc/events.c index 283dfb296d35..49f3e6368415 100644 --- a/drivers/staging/lustre/lustre/ptlrpc/events.c +++ b/drivers/staging/lustre/lustre/ptlrpc/events.c @@ -182,9 +182,9 @@ void client_bulk_callback(lnet_event_t *ev) struct ptlrpc_bulk_desc *desc = cbid->cbid_arg; struct ptlrpc_request *req; - LASSERT((desc->bd_type == BULK_PUT_SINK && + LASSERT((ptlrpc_is_bulk_put_sink(desc->bd_type) && ev->type == LNET_EVENT_PUT) || - (desc->bd_type == BULK_GET_SOURCE && + (ptlrpc_is_bulk_get_source(desc->bd_type) && ev->type == LNET_EVENT_GET) || ev->type == LNET_EVENT_UNLINK); LASSERT(ev->unlinked); diff --git a/drivers/staging/lustre/lustre/ptlrpc/import.c b/drivers/staging/lustre/lustre/ptlrpc/import.c index a23d0a05b574..e8280194001c 100644 --- a/drivers/staging/lustre/lustre/ptlrpc/import.c +++ b/drivers/staging/lustre/lustre/ptlrpc/import.c @@ -396,7 +396,7 @@ void ptlrpc_activate_import(struct obd_import *imp) } EXPORT_SYMBOL(ptlrpc_activate_import); -static void ptlrpc_pinger_force(struct obd_import *imp) +void ptlrpc_pinger_force(struct obd_import *imp) { CDEBUG(D_HA, "%s: waking up pinger s:%s\n", obd2cli_tgt(imp->imp_obd), ptlrpc_import_state_name(imp->imp_state)); @@ -408,6 +408,7 @@ static void ptlrpc_pinger_force(struct obd_import *imp) if (imp->imp_state != LUSTRE_IMP_CONNECTING) ptlrpc_pinger_wake_up(); } +EXPORT_SYMBOL(ptlrpc_pinger_force); void ptlrpc_fail_import(struct obd_import *imp, __u32 conn_cnt) { @@ -621,7 +622,8 @@ int ptlrpc_connect_import(struct obd_import *imp) spin_unlock(&imp->imp_lock); CERROR("already connected\n"); return 0; - } else if (imp->imp_state == LUSTRE_IMP_CONNECTING) { + } else if (imp->imp_state == LUSTRE_IMP_CONNECTING || + imp->imp_connected) { spin_unlock(&imp->imp_lock); CERROR("already connecting\n"); return -EALREADY; @@ -691,8 +693,6 @@ int ptlrpc_connect_import(struct obd_import *imp) request->rq_timeout = INITIAL_CONNECT_TIMEOUT; lustre_msg_set_timeout(request->rq_reqmsg, request->rq_timeout); - lustre_msg_add_op_flags(request->rq_reqmsg, MSG_CONNECT_NEXT_VER); - request->rq_no_resend = 1; request->rq_no_delay = 1; request->rq_send_state = LUSTRE_IMP_CONNECTING; @@ -859,6 +859,17 @@ static int ptlrpc_connect_set_flags(struct obd_import *imp, client_adjust_max_dirty(cli); /* + * Update client max modify RPCs in flight with value returned + * by the server + */ + if (ocd->ocd_connect_flags & OBD_CONNECT_MULTIMODRPCS) + cli->cl_max_mod_rpcs_in_flight = min( + cli->cl_max_mod_rpcs_in_flight, + ocd->ocd_maxmodrpcs); + else + cli->cl_max_mod_rpcs_in_flight = 1; + + /* * Reset ns_connect_flags only for initial connect. It might be * changed in while using FS and if we reset it in reconnect * this leads to losing user settings done before such as @@ -873,8 +884,7 @@ static int ptlrpc_connect_set_flags(struct obd_import *imp, ocd->ocd_connect_flags; } - if ((ocd->ocd_connect_flags & OBD_CONNECT_AT) && - (imp->imp_msg_magic == LUSTRE_MSG_MAGIC_V2)) + if (ocd->ocd_connect_flags & OBD_CONNECT_AT) /* * We need a per-message support flag, because * a. we don't know if the incoming connect reply @@ -889,16 +899,45 @@ static int ptlrpc_connect_set_flags(struct obd_import *imp, else imp->imp_msghdr_flags &= ~MSGHDR_AT_SUPPORT; - if ((ocd->ocd_connect_flags & OBD_CONNECT_FULL20) && - (imp->imp_msg_magic == LUSTRE_MSG_MAGIC_V2)) - imp->imp_msghdr_flags |= MSGHDR_CKSUM_INCOMPAT18; - else - imp->imp_msghdr_flags &= ~MSGHDR_CKSUM_INCOMPAT18; + imp->imp_msghdr_flags |= MSGHDR_CKSUM_INCOMPAT18; return 0; } /** + * Add all replay requests back to unreplied list before start replay, + * so that we can make sure the known replied XID is always increased + * only even if when replaying requests. + */ +static void ptlrpc_prepare_replay(struct obd_import *imp) +{ + struct ptlrpc_request *req; + + if (imp->imp_state != LUSTRE_IMP_REPLAY || + imp->imp_resend_replay) + return; + + /* + * If the server was restart during repaly, the requests may + * have been added to the unreplied list in former replay. + */ + spin_lock(&imp->imp_lock); + + list_for_each_entry(req, &imp->imp_committed_list, rq_replay_list) { + if (list_empty(&req->rq_unreplied_list)) + ptlrpc_add_unreplied(req); + } + + list_for_each_entry(req, &imp->imp_replay_list, rq_replay_list) { + if (list_empty(&req->rq_unreplied_list)) + ptlrpc_add_unreplied(req); + } + + imp->imp_known_replied_xid = ptlrpc_known_replied_xid(imp); + spin_unlock(&imp->imp_lock); +} + +/** * interpret_reply callback for connect RPCs. * Looks into returned status of connect operation and decides * what to do with the import - i.e enter recovery, promote it to @@ -933,6 +972,13 @@ static int ptlrpc_connect_interpret(const struct lu_env *env, ptlrpc_maybe_ping_import_soon(imp); goto out; } + + /* + * LU-7558: indicate that we are interpretting connect reply, + * pltrpc_connect_import() will not try to reconnect until + * interpret will finish. + */ + imp->imp_connected = 1; spin_unlock(&imp->imp_lock); LASSERT(imp->imp_conn_current); @@ -967,6 +1013,16 @@ static int ptlrpc_connect_interpret(const struct lu_env *env, spin_unlock(&imp->imp_lock); + if (!exp) { + /* This could happen if export is cleaned during the + * connect attempt + */ + CERROR("%s: missing export after connect\n", + imp->imp_obd->obd_name); + rc = -ENODEV; + goto out; + } + /* check that server granted subset of flags we asked for. */ if ((ocd->ocd_connect_flags & imp->imp_connect_flags_orig) != ocd->ocd_connect_flags) { @@ -977,15 +1033,6 @@ static int ptlrpc_connect_interpret(const struct lu_env *env, goto out; } - if (!exp) { - /* This could happen if export is cleaned during the - * connect attempt - */ - CERROR("%s: missing export after connect\n", - imp->imp_obd->obd_name); - rc = -ENODEV; - goto out; - } old_connect_flags = exp_connect_flags(exp); exp->exp_connect_data = *ocd; imp->imp_obd->obd_self_export->exp_connect_data = *ocd; @@ -1124,6 +1171,7 @@ static int ptlrpc_connect_interpret(const struct lu_env *env, imp->imp_remote_handle = *lustre_msg_get_handle(request->rq_repmsg); imp->imp_last_replay_transno = 0; + imp->imp_replay_cursor = &imp->imp_committed_list; IMPORT_SET_STATE(imp, LUSTRE_IMP_REPLAY); } else { DEBUG_REQ(D_HA, request, "%s: evicting (reconnect/recover flags not set: %x)", @@ -1147,18 +1195,25 @@ static int ptlrpc_connect_interpret(const struct lu_env *env, } finish: + ptlrpc_prepare_replay(imp); rc = ptlrpc_import_recovery_state_machine(imp); if (rc == -ENOTCONN) { CDEBUG(D_HA, "evicted/aborted by %s@%s during recovery; invalidating and reconnecting\n", obd2cli_tgt(imp->imp_obd), imp->imp_connection->c_remote_uuid.uuid); ptlrpc_connect_import(imp); + spin_lock(&imp->imp_lock); + imp->imp_connected = 0; imp->imp_connect_tried = 1; + spin_unlock(&imp->imp_lock); return 0; } out: + spin_lock(&imp->imp_lock); + imp->imp_connected = 0; imp->imp_connect_tried = 1; + spin_unlock(&imp->imp_lock); if (rc != 0) { IMPORT_SET_STATE(imp, LUSTRE_IMP_DISCON); diff --git a/drivers/staging/lustre/lustre/ptlrpc/layout.c b/drivers/staging/lustre/lustre/ptlrpc/layout.c index 839ef3e80c1a..99d7c667df28 100644 --- a/drivers/staging/lustre/lustre/ptlrpc/layout.c +++ b/drivers/staging/lustre/lustre/ptlrpc/layout.c @@ -48,14 +48,14 @@ #include <linux/module.h> -/* LUSTRE_VERSION_CODE */ -#include "../include/lustre_ver.h" - -#include "../include/obd_support.h" -/* lustre_swab_mdt_body */ #include "../include/lustre/lustre_idl.h" -/* obd2cli_tgt() (required by DEBUG_REQ()) */ + +#include "../include/llog_swab.h" +#include "../include/lustre_debug.h" +#include "../include/lustre_swab.h" +#include "../include/lustre_ver.h" #include "../include/obd.h" +#include "../include/obd_support.h" /* __REQ_LAYOUT_USER__ */ #endif @@ -121,7 +121,7 @@ static const struct req_msg_field *mdt_close_client[] = { &RMF_CAPA1 }; -static const struct req_msg_field *mdt_release_close_client[] = { +static const struct req_msg_field *mdt_intent_close_client[] = { &RMF_PTLRPC_BODY, &RMF_MDT_EPOCH, &RMF_REC_REINT, @@ -257,6 +257,18 @@ static const struct req_msg_field *mds_reint_rename_client[] = { &RMF_DLM_REQ }; +static const struct req_msg_field *mds_reint_migrate_client[] = { + &RMF_PTLRPC_BODY, + &RMF_REC_REINT, + &RMF_CAPA1, + &RMF_CAPA2, + &RMF_NAME, + &RMF_SYMTGT, + &RMF_DLM_REQ, + &RMF_MDT_EPOCH, + &RMF_CLOSE_DATA +}; + static const struct req_msg_field *mds_last_unlink_server[] = { &RMF_PTLRPC_BODY, &RMF_MDT_BODY, @@ -666,10 +678,9 @@ static struct req_format *req_formats[] = { &RQF_MDS_GETXATTR, &RQF_MDS_SYNC, &RQF_MDS_CLOSE, - &RQF_MDS_RELEASE_CLOSE, + &RQF_MDS_INTENT_CLOSE, &RQF_MDS_READPAGE, &RQF_MDS_WRITEPAGE, - &RQF_MDS_DONE_WRITING, &RQF_MDS_REINT, &RQF_MDS_REINT_CREATE, &RQF_MDS_REINT_CREATE_ACL, @@ -679,9 +690,9 @@ static struct req_format *req_formats[] = { &RQF_MDS_REINT_UNLINK, &RQF_MDS_REINT_LINK, &RQF_MDS_REINT_RENAME, + &RQF_MDS_REINT_MIGRATE, &RQF_MDS_REINT_SETATTR, &RQF_MDS_REINT_SETXATTR, - &RQF_MDS_QUOTACHECK, &RQF_MDS_QUOTACTL, &RQF_MDS_HSM_PROGRESS, &RQF_MDS_HSM_CT_REGISTER, @@ -691,10 +702,8 @@ static struct req_format *req_formats[] = { &RQF_MDS_HSM_ACTION, &RQF_MDS_HSM_REQUEST, &RQF_MDS_SWAP_LAYOUTS, - &RQF_QC_CALLBACK, &RQF_OST_CONNECT, &RQF_OST_DISCONNECT, - &RQF_OST_QUOTACHECK, &RQF_OST_QUOTACTL, &RQF_OST_GETATTR, &RQF_OST_SETATTR, @@ -1180,14 +1189,6 @@ struct req_format RQF_LOG_CANCEL = DEFINE_REQ_FMT0("OBD_LOG_CANCEL", log_cancel_client, empty); EXPORT_SYMBOL(RQF_LOG_CANCEL); -struct req_format RQF_MDS_QUOTACHECK = - DEFINE_REQ_FMT0("MDS_QUOTACHECK", quotactl_only, empty); -EXPORT_SYMBOL(RQF_MDS_QUOTACHECK); - -struct req_format RQF_OST_QUOTACHECK = - DEFINE_REQ_FMT0("OST_QUOTACHECK", quotactl_only, empty); -EXPORT_SYMBOL(RQF_OST_QUOTACHECK); - struct req_format RQF_MDS_QUOTACTL = DEFINE_REQ_FMT0("MDS_QUOTACTL", quotactl_only, quotactl_only); EXPORT_SYMBOL(RQF_MDS_QUOTACTL); @@ -1196,10 +1197,6 @@ struct req_format RQF_OST_QUOTACTL = DEFINE_REQ_FMT0("OST_QUOTACTL", quotactl_only, quotactl_only); EXPORT_SYMBOL(RQF_OST_QUOTACTL); -struct req_format RQF_QC_CALLBACK = - DEFINE_REQ_FMT0("QC_CALLBACK", quotactl_only, empty); -EXPORT_SYMBOL(RQF_QC_CALLBACK); - struct req_format RQF_MDS_GETSTATUS = DEFINE_REQ_FMT0("MDS_GETSTATUS", mdt_body_only, mdt_body_capa); EXPORT_SYMBOL(RQF_MDS_GETSTATUS); @@ -1270,6 +1267,11 @@ struct req_format RQF_MDS_REINT_RENAME = mds_last_unlink_server); EXPORT_SYMBOL(RQF_MDS_REINT_RENAME); +struct req_format RQF_MDS_REINT_MIGRATE = + DEFINE_REQ_FMT0("MDS_REINT_MIGRATE", mds_reint_migrate_client, + mds_last_unlink_server); +EXPORT_SYMBOL(RQF_MDS_REINT_MIGRATE); + struct req_format RQF_MDS_REINT_SETATTR = DEFINE_REQ_FMT0("MDS_REINT_SETATTR", mds_reint_setattr_client, mds_setattr_server); @@ -1381,15 +1383,10 @@ struct req_format RQF_MDS_CLOSE = mdt_close_client, mds_last_unlink_server); EXPORT_SYMBOL(RQF_MDS_CLOSE); -struct req_format RQF_MDS_RELEASE_CLOSE = +struct req_format RQF_MDS_INTENT_CLOSE = DEFINE_REQ_FMT0("MDS_CLOSE", - mdt_release_close_client, mds_last_unlink_server); -EXPORT_SYMBOL(RQF_MDS_RELEASE_CLOSE); - -struct req_format RQF_MDS_DONE_WRITING = - DEFINE_REQ_FMT0("MDS_DONE_WRITING", - mdt_close_client, mdt_body_only); -EXPORT_SYMBOL(RQF_MDS_DONE_WRITING); + mdt_intent_close_client, mds_last_unlink_server); +EXPORT_SYMBOL(RQF_MDS_INTENT_CLOSE); struct req_format RQF_MDS_READPAGE = DEFINE_REQ_FMT0("MDS_READPAGE", @@ -1874,13 +1871,14 @@ static void *__req_capsule_get(struct req_capsule *pill, getter = (field->rmf_flags & RMF_F_STRING) ? (typeof(getter))lustre_msg_string : lustre_msg_buf; - if (field->rmf_flags & RMF_F_STRUCT_ARRAY) { + if (field->rmf_flags & (RMF_F_STRUCT_ARRAY | RMF_F_NO_SIZE_CHECK)) { /* * We've already asserted that field->rmf_size > 0 in * req_layout_init(). */ len = lustre_msg_buflen(msg, offset); - if ((len % field->rmf_size) != 0) { + if (!(field->rmf_flags & RMF_F_NO_SIZE_CHECK) && + (len % field->rmf_size)) { CERROR("%s: array field size mismatch %d modulo %u != 0 (%d)\n", field->rmf_name, len, field->rmf_size, loc); return NULL; diff --git a/drivers/staging/lustre/lustre/ptlrpc/llog_client.c b/drivers/staging/lustre/lustre/ptlrpc/llog_client.c index 0f55c01feba8..110d9f505787 100644 --- a/drivers/staging/lustre/lustre/ptlrpc/llog_client.c +++ b/drivers/staging/lustre/lustre/ptlrpc/llog_client.c @@ -287,8 +287,13 @@ static int llog_client_read_header(const struct lu_env *env, goto out; } - memcpy(handle->lgh_hdr, hdr, sizeof(*hdr)); - handle->lgh_last_idx = handle->lgh_hdr->llh_tail.lrt_index; + if (handle->lgh_hdr_size < hdr->llh_hdr.lrh_len) { + rc = -EFAULT; + goto out; + } + + memcpy(handle->lgh_hdr, hdr, hdr->llh_hdr.lrh_len); + handle->lgh_last_idx = LLOG_HDR_TAIL(handle->lgh_hdr)->lrt_index; /* sanity checks */ llh_hdr = &handle->lgh_hdr->llh_hdr; @@ -296,9 +301,14 @@ static int llog_client_read_header(const struct lu_env *env, CERROR("bad log header magic: %#x (expecting %#x)\n", llh_hdr->lrh_type, LLOG_HDR_MAGIC); rc = -EIO; - } else if (llh_hdr->lrh_len != LLOG_CHUNK_SIZE) { - CERROR("incorrectly sized log header: %#x (expecting %#x)\n", - llh_hdr->lrh_len, LLOG_CHUNK_SIZE); + } else if (llh_hdr->lrh_len != + LLOG_HDR_TAIL(handle->lgh_hdr)->lrt_len || + (llh_hdr->lrh_len & (llh_hdr->lrh_len - 1)) || + llh_hdr->lrh_len < LLOG_MIN_CHUNK_SIZE || + llh_hdr->lrh_len > handle->lgh_hdr_size) { + CERROR("incorrectly sized log header: %#x (expecting %#x) (power of two > 8192)\n", + llh_hdr->lrh_len, + LLOG_HDR_TAIL(handle->lgh_hdr)->lrt_len); CERROR("you may need to re-run lconf --write_conf.\n"); rc = -EIO; } diff --git a/drivers/staging/lustre/lustre/ptlrpc/lproc_ptlrpc.c b/drivers/staging/lustre/lustre/ptlrpc/lproc_ptlrpc.c index 9bad57d65db4..f87478180013 100644 --- a/drivers/staging/lustre/lustre/ptlrpc/lproc_ptlrpc.c +++ b/drivers/staging/lustre/lustre/ptlrpc/lproc_ptlrpc.c @@ -479,8 +479,8 @@ static int ptlrpc_lprocfs_nrs_seq_show(struct seq_file *m, void *n) struct ptlrpc_nrs_policy *policy; struct ptlrpc_nrs_pol_info *infos; struct ptlrpc_nrs_pol_info tmp; - unsigned num_pols; - unsigned pol_idx = 0; + unsigned int num_pols; + unsigned int pol_idx = 0; bool hp = false; int i; int rc = 0; diff --git a/drivers/staging/lustre/lustre/ptlrpc/niobuf.c b/drivers/staging/lustre/lustre/ptlrpc/niobuf.c index 9c937398a085..da1209e40f03 100644 --- a/drivers/staging/lustre/lustre/ptlrpc/niobuf.c +++ b/drivers/staging/lustre/lustre/ptlrpc/niobuf.c @@ -114,7 +114,7 @@ static int ptlrpc_register_bulk(struct ptlrpc_request *req) int rc2; int posted_md; int total_md; - __u64 xid; + u64 mbits; lnet_handle_me_t me_h; lnet_md_t md; @@ -127,8 +127,7 @@ static int ptlrpc_register_bulk(struct ptlrpc_request *req) LASSERT(desc->bd_md_max_brw <= PTLRPC_BULK_OPS_COUNT); LASSERT(desc->bd_iov_count <= PTLRPC_MAX_BRW_PAGES); LASSERT(desc->bd_req); - LASSERT(desc->bd_type == BULK_PUT_SINK || - desc->bd_type == BULK_GET_SOURCE); + LASSERT(ptlrpc_is_bulk_op_passive(desc->bd_type)); /* cleanup the state of the bulk for it will be reused */ if (req->rq_resend || req->rq_send_state == LUSTRE_IMP_REPLAY) @@ -143,40 +142,37 @@ static int ptlrpc_register_bulk(struct ptlrpc_request *req) LASSERT(desc->bd_cbid.cbid_fn == client_bulk_callback); LASSERT(desc->bd_cbid.cbid_arg == desc); - /* An XID is only used for a single request from the client. - * For retried bulk transfers, a new XID will be allocated in - * in ptlrpc_check_set() if it needs to be resent, so it is not - * using the same RDMA match bits after an error. - * - * For multi-bulk RPCs, rq_xid is the last XID needed for bulks. The - * first bulk XID is power-of-two aligned before rq_xid. LU-1431 - */ - xid = req->rq_xid & ~((__u64)desc->bd_md_max_brw - 1); + total_md = (desc->bd_iov_count + LNET_MAX_IOV - 1) / LNET_MAX_IOV; + /* rq_mbits is matchbits of the final bulk */ + mbits = req->rq_mbits - total_md + 1; + + LASSERTF(mbits == (req->rq_mbits & PTLRPC_BULK_OPS_MASK), + "first mbits = x%llu, last mbits = x%llu\n", + mbits, req->rq_mbits); LASSERTF(!(desc->bd_registered && req->rq_send_state != LUSTRE_IMP_REPLAY) || - xid != desc->bd_last_xid, - "registered: %d rq_xid: %llu bd_last_xid: %llu\n", - desc->bd_registered, xid, desc->bd_last_xid); + mbits != desc->bd_last_mbits, + "registered: %d rq_mbits: %llu bd_last_mbits: %llu\n", + desc->bd_registered, mbits, desc->bd_last_mbits); - total_md = (desc->bd_iov_count + LNET_MAX_IOV - 1) / LNET_MAX_IOV; desc->bd_registered = 1; - desc->bd_last_xid = xid; + desc->bd_last_mbits = mbits; desc->bd_md_count = total_md; md.user_ptr = &desc->bd_cbid; md.eq_handle = ptlrpc_eq_h; md.threshold = 1; /* PUT or GET */ - for (posted_md = 0; posted_md < total_md; posted_md++, xid++) { + for (posted_md = 0; posted_md < total_md; posted_md++, mbits++) { md.options = PTLRPC_MD_OPTIONS | - ((desc->bd_type == BULK_GET_SOURCE) ? + (ptlrpc_is_bulk_op_get(desc->bd_type) ? LNET_MD_OP_GET : LNET_MD_OP_PUT); ptlrpc_fill_bulk_md(&md, desc, posted_md); - rc = LNetMEAttach(desc->bd_portal, peer, xid, 0, + rc = LNetMEAttach(desc->bd_portal, peer, mbits, 0, LNET_UNLINK, LNET_INS_AFTER, &me_h); if (rc != 0) { CERROR("%s: LNetMEAttach failed x%llu/%d: rc = %d\n", - desc->bd_import->imp_obd->obd_name, xid, + desc->bd_import->imp_obd->obd_name, mbits, posted_md, rc); break; } @@ -186,7 +182,7 @@ static int ptlrpc_register_bulk(struct ptlrpc_request *req) &desc->bd_mds[posted_md]); if (rc != 0) { CERROR("%s: LNetMDAttach failed x%llu/%d: rc = %d\n", - desc->bd_import->imp_obd->obd_name, xid, + desc->bd_import->imp_obd->obd_name, mbits, posted_md, rc); rc2 = LNetMEUnlink(me_h); LASSERT(rc2 == 0); @@ -205,27 +201,19 @@ static int ptlrpc_register_bulk(struct ptlrpc_request *req) return -ENOMEM; } - /* Set rq_xid to matchbits of the final bulk so that server can - * infer the number of bulks that were prepared - */ - req->rq_xid = --xid; - LASSERTF(desc->bd_last_xid == (req->rq_xid & PTLRPC_BULK_OPS_MASK), - "bd_last_xid = x%llu, rq_xid = x%llu\n", - desc->bd_last_xid, req->rq_xid); - spin_lock(&desc->bd_lock); - /* Holler if peer manages to touch buffers before he knows the xid */ + /* Holler if peer manages to touch buffers before he knows the mbits */ if (desc->bd_md_count != total_md) CWARN("%s: Peer %s touched %d buffers while I registered\n", desc->bd_import->imp_obd->obd_name, libcfs_id2str(peer), total_md - desc->bd_md_count); spin_unlock(&desc->bd_lock); - CDEBUG(D_NET, "Setup %u bulk %s buffers: %u pages %u bytes, xid x%#llx-%#llx, portal %u\n", + CDEBUG(D_NET, "Setup %u bulk %s buffers: %u pages %u bytes, mbits x%#llx-%#llx, portal %u\n", desc->bd_md_count, - desc->bd_type == BULK_GET_SOURCE ? "get-source" : "put-sink", + ptlrpc_is_bulk_op_get(desc->bd_type) ? "get-source" : "put-sink", desc->bd_iov_count, desc->bd_nob, - desc->bd_last_xid, req->rq_xid, desc->bd_portal); + desc->bd_last_mbits, req->rq_mbits, desc->bd_portal); return 0; } @@ -521,6 +509,39 @@ int ptl_send_rpc(struct ptlrpc_request *request, int noreply) lustre_msg_set_conn_cnt(request->rq_reqmsg, imp->imp_conn_cnt); lustre_msghdr_set_flags(request->rq_reqmsg, imp->imp_msghdr_flags); + /* + * If it's the first time to resend the request for EINPROGRESS, + * we need to allocate a new XID (see after_reply()), it's different + * from the resend for reply timeout. + */ + if (request->rq_nr_resend && list_empty(&request->rq_unreplied_list)) { + __u64 min_xid = 0; + /* + * resend for EINPROGRESS, allocate new xid to avoid reply + * reconstruction + */ + spin_lock(&imp->imp_lock); + ptlrpc_assign_next_xid_nolock(request); + request->rq_mbits = request->rq_xid; + min_xid = ptlrpc_known_replied_xid(imp); + spin_unlock(&imp->imp_lock); + + lustre_msg_set_last_xid(request->rq_reqmsg, min_xid); + DEBUG_REQ(D_RPCTRACE, request, "Allocating new xid for resend on EINPROGRESS"); + } else if (request->rq_bulk) { + ptlrpc_set_bulk_mbits(request); + lustre_msg_set_mbits(request->rq_reqmsg, request->rq_mbits); + } + + if (list_empty(&request->rq_unreplied_list) || + request->rq_xid <= imp->imp_known_replied_xid) { + DEBUG_REQ(D_ERROR, request, + "xid: %llu, replied: %llu, list_empty:%d\n", + request->rq_xid, imp->imp_known_replied_xid, + list_empty(&request->rq_unreplied_list)); + LBUG(); + } + /** * For enabled AT all request should have AT_SUPPORT in the * FULL import state when OBD_CONNECT_AT is set @@ -537,8 +558,15 @@ int ptl_send_rpc(struct ptlrpc_request *request, int noreply) mpflag = cfs_memory_pressure_get_and_set(); rc = sptlrpc_cli_wrap_request(request); - if (rc) + if (rc) { + /* + * set rq_sent so that this request is treated + * as a delayed send in the upper layers + */ + if (rc == -ENOMEM) + request->rq_sent = ktime_get_seconds(); goto out; + } /* bulk register should be done after wrap_request() */ if (request->rq_bulk) { diff --git a/drivers/staging/lustre/lustre/ptlrpc/nrs.c b/drivers/staging/lustre/lustre/ptlrpc/nrs.c index d88faf61e740..7b6ffb195834 100644 --- a/drivers/staging/lustre/lustre/ptlrpc/nrs.c +++ b/drivers/staging/lustre/lustre/ptlrpc/nrs.c @@ -82,16 +82,9 @@ static int nrs_policy_ctl_locked(struct ptlrpc_nrs_policy *policy, static void nrs_policy_stop0(struct ptlrpc_nrs_policy *policy) { - struct ptlrpc_nrs *nrs = policy->pol_nrs; - - if (policy->pol_desc->pd_ops->op_policy_stop) { - spin_unlock(&nrs->nrs_lock); - + if (policy->pol_desc->pd_ops->op_policy_stop) policy->pol_desc->pd_ops->op_policy_stop(policy); - spin_lock(&nrs->nrs_lock); - } - LASSERT(list_empty(&policy->pol_list_queued)); LASSERT(policy->pol_req_queued == 0 && policy->pol_req_started == 0); @@ -619,6 +612,12 @@ static int nrs_policy_ctl(struct ptlrpc_nrs *nrs, char *name, goto out; } + if (policy->pol_state != NRS_POL_STATE_STARTED && + policy->pol_state != NRS_POL_STATE_STOPPED) { + rc = -EAGAIN; + goto out; + } + switch (opc) { /** * Unknown opcode, pass it down to the policy-specific control diff --git a/drivers/staging/lustre/lustre/ptlrpc/pack_generic.c b/drivers/staging/lustre/lustre/ptlrpc/pack_generic.c index 871768511e8c..13f00b7cbbe5 100644 --- a/drivers/staging/lustre/lustre/ptlrpc/pack_generic.c +++ b/drivers/staging/lustre/lustre/ptlrpc/pack_generic.c @@ -42,11 +42,14 @@ #include "../../include/linux/libcfs/libcfs.h" -#include "../include/obd_support.h" -#include "../include/obd_class.h" +#include "../include/lustre/ll_fiemap.h" + +#include "../include/llog_swab.h" #include "../include/lustre_net.h" +#include "../include/lustre_swab.h" #include "../include/obd_cksum.h" -#include "../include/lustre/ll_fiemap.h" +#include "../include/obd_support.h" +#include "../include/obd_class.h" #include "ptlrpc_internal.h" @@ -942,6 +945,25 @@ __u32 lustre_msg_get_opc(struct lustre_msg *msg) } EXPORT_SYMBOL(lustre_msg_get_opc); +__u16 lustre_msg_get_tag(struct lustre_msg *msg) +{ + switch (msg->lm_magic) { + case LUSTRE_MSG_MAGIC_V2: { + struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg); + + if (!pb) { + CERROR("invalid msg %p: no ptlrpc body!\n", msg); + return 0; + } + return pb->pb_tag; + } + default: + CERROR("incorrect message magic: %08x\n", msg->lm_magic); + return 0; + } +} +EXPORT_SYMBOL(lustre_msg_get_tag); + __u64 lustre_msg_get_last_committed(struct lustre_msg *msg) { switch (msg->lm_magic) { @@ -1236,6 +1258,37 @@ void lustre_msg_set_opc(struct lustre_msg *msg, __u32 opc) } } +void lustre_msg_set_last_xid(struct lustre_msg *msg, u64 last_xid) +{ + switch (msg->lm_magic) { + case LUSTRE_MSG_MAGIC_V2: { + struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg); + + LASSERTF(pb, "invalid msg %p: no ptlrpc body!\n", msg); + pb->pb_last_xid = last_xid; + return; + } + default: + LASSERTF(0, "incorrect message magic: %08x\n", msg->lm_magic); + } +} + +void lustre_msg_set_tag(struct lustre_msg *msg, __u16 tag) +{ + switch (msg->lm_magic) { + case LUSTRE_MSG_MAGIC_V2: { + struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg); + + LASSERTF(pb, "invalid msg %p: no ptlrpc body!\n", msg); + pb->pb_tag = tag; + return; + } + default: + LASSERTF(0, "incorrect message magic: %08x\n", msg->lm_magic); + } +} +EXPORT_SYMBOL(lustre_msg_set_tag); + void lustre_msg_set_versions(struct lustre_msg *msg, __u64 *versions) { switch (msg->lm_magic) { @@ -1373,6 +1426,21 @@ void lustre_msg_set_cksum(struct lustre_msg *msg, __u32 cksum) } } +void lustre_msg_set_mbits(struct lustre_msg *msg, __u64 mbits) +{ + switch (msg->lm_magic) { + case LUSTRE_MSG_MAGIC_V2: { + struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg); + + LASSERTF(pb, "invalid msg %p: no ptlrpc body!\n", msg); + pb->pb_mbits = mbits; + return; + } + default: + LASSERTF(0, "incorrect message magic: %08x\n", msg->lm_magic); + } +} + void ptlrpc_request_set_replen(struct ptlrpc_request *req) { int count = req_capsule_filled_sizes(&req->rq_pill, RCL_SERVER); @@ -1442,7 +1510,7 @@ void lustre_swab_ptlrpc_body(struct ptlrpc_body *b) __swab32s(&b->pb_opc); __swab32s(&b->pb_status); __swab64s(&b->pb_last_xid); - __swab64s(&b->pb_last_seen); + __swab16s(&b->pb_tag); __swab64s(&b->pb_last_committed); __swab64s(&b->pb_transno); __swab32s(&b->pb_flags); @@ -1456,7 +1524,12 @@ void lustre_swab_ptlrpc_body(struct ptlrpc_body *b) __swab64s(&b->pb_pre_versions[1]); __swab64s(&b->pb_pre_versions[2]); __swab64s(&b->pb_pre_versions[3]); - CLASSERT(offsetof(typeof(*b), pb_padding) != 0); + __swab64s(&b->pb_mbits); + CLASSERT(offsetof(typeof(*b), pb_padding0) != 0); + CLASSERT(offsetof(typeof(*b), pb_padding1) != 0); + CLASSERT(offsetof(typeof(*b), pb_padding64_0) != 0); + CLASSERT(offsetof(typeof(*b), pb_padding64_1) != 0); + CLASSERT(offsetof(typeof(*b), pb_padding64_2) != 0); /* While we need to maintain compatibility between * clients and servers without ptlrpc_body_v2 (< 2.3) * do not swab any fields beyond pb_jobid, as we are @@ -1492,8 +1565,12 @@ void lustre_swab_connect(struct obd_connect_data *ocd) __swab32s(&ocd->ocd_max_easize); if (ocd->ocd_connect_flags & OBD_CONNECT_MAXBYTES) __swab64s(&ocd->ocd_maxbytes); + if (ocd->ocd_connect_flags & OBD_CONNECT_MULTIMODRPCS) + __swab16s(&ocd->ocd_maxmodrpcs); + CLASSERT(offsetof(typeof(*ocd), padding0)); CLASSERT(offsetof(typeof(*ocd), padding1) != 0); - CLASSERT(offsetof(typeof(*ocd), padding2) != 0); + if (ocd->ocd_connect_flags & OBD_CONNECT_FLAGS2) + __swab64s(&ocd->ocd_connect_flags2); CLASSERT(offsetof(typeof(*ocd), padding3) != 0); CLASSERT(offsetof(typeof(*ocd), padding4) != 0); CLASSERT(offsetof(typeof(*ocd), padding5) != 0); @@ -1666,7 +1743,7 @@ void lustre_swab_mdt_body(struct mdt_body *b) __swab32s(&b->mbo_eadatasize); __swab32s(&b->mbo_aclsize); __swab32s(&b->mbo_max_mdsize); - __swab32s(&b->mbo_max_cookiesize); + CLASSERT(offsetof(typeof(*b), mbo_unused3)); __swab32s(&b->mbo_uid_h); __swab32s(&b->mbo_gid_h); CLASSERT(offsetof(typeof(*b), mbo_padding_5) != 0); @@ -1675,9 +1752,10 @@ void lustre_swab_mdt_body(struct mdt_body *b) void lustre_swab_mdt_ioepoch(struct mdt_ioepoch *b) { /* handle is opaque */ - __swab64s(&b->ioepoch); - __swab32s(&b->flags); - CLASSERT(offsetof(typeof(*b), padding) != 0); + /* mio_handle is opaque */ + CLASSERT(offsetof(typeof(*b), mio_unused1)); + CLASSERT(offsetof(typeof(*b), mio_unused2)); + CLASSERT(offsetof(typeof(*b), mio_padding)); } void lustre_swab_mgs_target_info(struct mgs_target_info *mti) @@ -1772,7 +1850,7 @@ void lustre_swab_fid2path(struct getinfo_fid2path *gf) } EXPORT_SYMBOL(lustre_swab_fid2path); -static void lustre_swab_fiemap_extent(struct ll_fiemap_extent *fm_extent) +static void lustre_swab_fiemap_extent(struct fiemap_extent *fm_extent) { __swab64s(&fm_extent->fe_logical); __swab64s(&fm_extent->fe_physical); @@ -1781,7 +1859,7 @@ static void lustre_swab_fiemap_extent(struct ll_fiemap_extent *fm_extent) __swab32s(&fm_extent->fe_device); } -void lustre_swab_fiemap(struct ll_user_fiemap *fiemap) +void lustre_swab_fiemap(struct fiemap *fiemap) { __u32 i; @@ -1938,7 +2016,7 @@ static void lustre_swab_ldlm_res_id(struct ldlm_res_id *id) __swab64s(&id->name[i]); } -static void lustre_swab_ldlm_policy_data(ldlm_wire_policy_data_t *d) +static void lustre_swab_ldlm_policy_data(union ldlm_wire_policy_data *d) { /* the lock data is a union and the first two fields are always an * extent so it's ok to process an LDLM_EXTENT and LDLM_FLOCK lock @@ -2062,8 +2140,6 @@ static void dump_obdo(struct obdo *oa) if (valid & OBD_MD_FLHANDLE) CDEBUG(D_RPCTRACE, "obdo: o_handle = %lld\n", oa->o_handle.cookie); - if (valid & OBD_MD_FLCOOKIE) - CDEBUG(D_RPCTRACE, "obdo: o_lcookie = (llog_cookie dumping not yet implemented)\n"); } void dump_ost_body(struct ost_body *ob) diff --git a/drivers/staging/lustre/lustre/ptlrpc/pers.c b/drivers/staging/lustre/lustre/ptlrpc/pers.c index 5b9fb11c0b6b..94e9fa85d774 100644 --- a/drivers/staging/lustre/lustre/ptlrpc/pers.c +++ b/drivers/staging/lustre/lustre/ptlrpc/pers.c @@ -43,6 +43,8 @@ void ptlrpc_fill_bulk_md(lnet_md_t *md, struct ptlrpc_bulk_desc *desc, int mdidx) { + int offset = mdidx * LNET_MAX_IOV; + CLASSERT(PTLRPC_MAX_BRW_PAGES < LI_POISON); LASSERT(mdidx < desc->bd_md_max_brw); @@ -50,23 +52,20 @@ void ptlrpc_fill_bulk_md(lnet_md_t *md, struct ptlrpc_bulk_desc *desc, LASSERT(!(md->options & (LNET_MD_IOVEC | LNET_MD_KIOV | LNET_MD_PHYS))); - md->options |= LNET_MD_KIOV; md->length = max(0, desc->bd_iov_count - mdidx * LNET_MAX_IOV); md->length = min_t(unsigned int, LNET_MAX_IOV, md->length); - if (desc->bd_enc_iov) - md->start = &desc->bd_enc_iov[mdidx * LNET_MAX_IOV]; - else - md->start = &desc->bd_iov[mdidx * LNET_MAX_IOV]; -} - -void ptlrpc_add_bulk_page(struct ptlrpc_bulk_desc *desc, struct page *page, - int pageoffset, int len) -{ - lnet_kiov_t *kiov = &desc->bd_iov[desc->bd_iov_count]; - - kiov->bv_page = page; - kiov->bv_offset = pageoffset; - kiov->bv_len = len; - desc->bd_iov_count++; + if (ptlrpc_is_bulk_desc_kiov(desc->bd_type)) { + md->options |= LNET_MD_KIOV; + if (GET_ENC_KIOV(desc)) + md->start = &BD_GET_ENC_KIOV(desc, offset); + else + md->start = &BD_GET_KIOV(desc, offset); + } else { + md->options |= LNET_MD_IOVEC; + if (GET_ENC_KVEC(desc)) + md->start = &BD_GET_ENC_KVEC(desc, offset); + else + md->start = &BD_GET_KVEC(desc, offset); + } } diff --git a/drivers/staging/lustre/lustre/ptlrpc/ptlrpc_internal.h b/drivers/staging/lustre/lustre/ptlrpc/ptlrpc_internal.h index f14d193287da..e0f859ca6223 100644 --- a/drivers/staging/lustre/lustre/ptlrpc/ptlrpc_internal.h +++ b/drivers/staging/lustre/lustre/ptlrpc/ptlrpc_internal.h @@ -55,8 +55,11 @@ int ptlrpcd_start(struct ptlrpcd_ctl *pc); /* client.c */ void ptlrpc_at_adj_net_latency(struct ptlrpc_request *req, unsigned int service_time); -struct ptlrpc_bulk_desc *ptlrpc_new_bulk(unsigned npages, unsigned max_brw, - unsigned type, unsigned portal); +struct ptlrpc_bulk_desc *ptlrpc_new_bulk(unsigned int nfrags, + unsigned int max_brw, + enum ptlrpc_bulk_op_type type, + unsigned int portal, + const struct ptlrpc_bulk_frag_ops *ops); int ptlrpc_request_cache_init(void); void ptlrpc_request_cache_fini(void); struct ptlrpc_request *ptlrpc_request_cache_alloc(gfp_t flags); @@ -67,6 +70,10 @@ void ptlrpc_set_add_new_req(struct ptlrpcd_ctl *pc, int ptlrpc_expired_set(void *data); int ptlrpc_set_next_timeout(struct ptlrpc_request_set *); void ptlrpc_resend_req(struct ptlrpc_request *request); +void ptlrpc_set_bulk_mbits(struct ptlrpc_request *req); +void ptlrpc_assign_next_xid_nolock(struct ptlrpc_request *req); +__u64 ptlrpc_known_replied_xid(struct obd_import *imp); +void ptlrpc_add_unreplied(struct ptlrpc_request *req); /* events.c */ int ptlrpc_init_portals(void); @@ -226,8 +233,6 @@ int ptlrpc_expire_one_request(struct ptlrpc_request *req, int async_unlink); /* pers.c */ void ptlrpc_fill_bulk_md(lnet_md_t *md, struct ptlrpc_bulk_desc *desc, int mdcnt); -void ptlrpc_add_bulk_page(struct ptlrpc_bulk_desc *desc, struct page *page, - int pageoffset, int len); /* pack_generic.c */ struct ptlrpc_reply_state * @@ -322,6 +327,7 @@ static inline void ptlrpc_cli_req_init(struct ptlrpc_request *req) INIT_LIST_HEAD(&cr->cr_set_chain); INIT_LIST_HEAD(&cr->cr_ctx_chain); + INIT_LIST_HEAD(&cr->cr_unreplied_list); init_waitqueue_head(&cr->cr_reply_waitq); init_waitqueue_head(&cr->cr_set_waitq); } @@ -338,4 +344,24 @@ static inline void ptlrpc_srv_req_init(struct ptlrpc_request *req) INIT_LIST_HEAD(&sr->sr_hist_list); } +static inline bool ptlrpc_req_is_connect(struct ptlrpc_request *req) +{ + if (lustre_msg_get_opc(req->rq_reqmsg) == MDS_CONNECT || + lustre_msg_get_opc(req->rq_reqmsg) == OST_CONNECT || + lustre_msg_get_opc(req->rq_reqmsg) == MGS_CONNECT) + return true; + else + return false; +} + +static inline bool ptlrpc_req_is_disconnect(struct ptlrpc_request *req) +{ + if (lustre_msg_get_opc(req->rq_reqmsg) == MDS_DISCONNECT || + lustre_msg_get_opc(req->rq_reqmsg) == OST_DISCONNECT || + lustre_msg_get_opc(req->rq_reqmsg) == MGS_DISCONNECT) + return true; + else + return false; +} + #endif /* PTLRPC_INTERNAL_H */ diff --git a/drivers/staging/lustre/lustre/ptlrpc/recover.c b/drivers/staging/lustre/lustre/ptlrpc/recover.c index 405faf0dc9fc..c00449036884 100644 --- a/drivers/staging/lustre/lustre/ptlrpc/recover.c +++ b/drivers/staging/lustre/lustre/ptlrpc/recover.c @@ -111,7 +111,9 @@ int ptlrpc_replay_next(struct obd_import *imp, int *inflight) * all of it's requests being replayed, it's safe to * use a cursor to accelerate the search */ - imp->imp_replay_cursor = imp->imp_replay_cursor->next; + if (!imp->imp_resend_replay || + imp->imp_replay_cursor == &imp->imp_committed_list) + imp->imp_replay_cursor = imp->imp_replay_cursor->next; while (imp->imp_replay_cursor != &imp->imp_committed_list) { @@ -155,10 +157,24 @@ int ptlrpc_replay_next(struct obd_import *imp, int *inflight) lustre_msg_add_flags(req->rq_reqmsg, MSG_RESENT); spin_lock(&imp->imp_lock); + /* The resend replay request may have been removed from the + * unreplied list. + */ + if (req && imp->imp_resend_replay && + list_empty(&req->rq_unreplied_list)) { + ptlrpc_add_unreplied(req); + imp->imp_known_replied_xid = ptlrpc_known_replied_xid(imp); + } + imp->imp_resend_replay = 0; spin_unlock(&imp->imp_lock); if (req) { + /* The request should have been added back in unreplied list + * by ptlrpc_prepare_replay(). + */ + LASSERT(!list_empty(&req->rq_unreplied_list)); + rc = ptlrpc_replay_req(req); if (rc) { CERROR("recovery replay error %d for req %llu\n", @@ -194,7 +210,13 @@ int ptlrpc_resend(struct obd_import *imp) LASSERTF((long)req > PAGE_SIZE && req != LP_POISON, "req %p bad\n", req); LASSERTF(req->rq_type != LI_POISON, "req %p freed\n", req); - if (!ptlrpc_no_resend(req)) + + /* + * If the request is allowed to be sent during replay and it + * is not timeout yet, then it does not need to be resent. + */ + if (!ptlrpc_no_resend(req) && + (req->rq_timedout || !req->rq_allow_replay)) ptlrpc_resend_req(req); } spin_unlock(&imp->imp_lock); diff --git a/drivers/staging/lustre/lustre/ptlrpc/sec.c b/drivers/staging/lustre/lustre/ptlrpc/sec.c index a7416cd9ac71..e860df7c45a2 100644 --- a/drivers/staging/lustre/lustre/ptlrpc/sec.c +++ b/drivers/staging/lustre/lustre/ptlrpc/sec.c @@ -379,7 +379,7 @@ int sptlrpc_req_get_ctx(struct ptlrpc_request *req) if (!req->rq_cli_ctx) { CERROR("req %p: fail to get context\n", req); - return -ENOMEM; + return -ECONNREFUSED; } return 0; @@ -515,6 +515,13 @@ static int sptlrpc_req_replace_dead_ctx(struct ptlrpc_request *req) set_current_state(TASK_INTERRUPTIBLE); schedule_timeout(msecs_to_jiffies(MSEC_PER_SEC)); + } else if (unlikely(!test_bit(PTLRPC_CTX_UPTODATE_BIT, &newctx->cc_flags))) { + /* + * new ctx not up to date yet + */ + CDEBUG(D_SEC, + "ctx (%p, fl %lx) doesn't switch, not up to date yet\n", + newctx, newctx->cc_flags); } else { /* * it's possible newctx == oldctx if we're switching diff --git a/drivers/staging/lustre/lustre/ptlrpc/sec_bulk.c b/drivers/staging/lustre/lustre/ptlrpc/sec_bulk.c index b2cc5ea6cb93..2fe9085e2034 100644 --- a/drivers/staging/lustre/lustre/ptlrpc/sec_bulk.c +++ b/drivers/staging/lustre/lustre/ptlrpc/sec_bulk.c @@ -108,6 +108,7 @@ static struct ptlrpc_enc_page_pool { unsigned long epp_st_lowfree; /* lowest free pages reached */ unsigned int epp_st_max_wqlen; /* highest waitqueue length */ unsigned long epp_st_max_wait; /* in jiffies */ + unsigned long epp_st_outofmem; /* # of out of mem requests */ /* * pointers to pools */ @@ -139,7 +140,8 @@ int sptlrpc_proc_enc_pool_seq_show(struct seq_file *m, void *v) "cache missing: %lu\n" "low free mark: %lu\n" "max waitqueue depth: %u\n" - "max wait time: %ld/%lu\n", + "max wait time: %ld/%lu\n" + "out of mem: %lu\n", totalram_pages, PAGES_PER_POOL, page_pools.epp_max_pages, @@ -158,7 +160,8 @@ int sptlrpc_proc_enc_pool_seq_show(struct seq_file *m, void *v) page_pools.epp_st_lowfree, page_pools.epp_st_max_wqlen, page_pools.epp_st_max_wait, - msecs_to_jiffies(MSEC_PER_SEC)); + msecs_to_jiffies(MSEC_PER_SEC), + page_pools.epp_st_outofmem); spin_unlock(&page_pools.epp_lock); @@ -306,12 +309,30 @@ static inline void enc_pools_wakeup(void) } } +/* + * Export the number of free pages in the pool + */ +int get_free_pages_in_pool(void) +{ + return page_pools.epp_free_pages; +} + +/* + * Let outside world know if enc_pool full capacity is reached + */ +int pool_is_at_full_capacity(void) +{ + return (page_pools.epp_total_pages == page_pools.epp_max_pages); +} + void sptlrpc_enc_pool_put_pages(struct ptlrpc_bulk_desc *desc) { int p_idx, g_idx; int i; - if (!desc->bd_enc_iov) + LASSERT(ptlrpc_is_bulk_desc_kiov(desc->bd_type)); + + if (!GET_ENC_KIOV(desc)) return; LASSERT(desc->bd_iov_count > 0); @@ -326,12 +347,12 @@ void sptlrpc_enc_pool_put_pages(struct ptlrpc_bulk_desc *desc) LASSERT(page_pools.epp_pools[p_idx]); for (i = 0; i < desc->bd_iov_count; i++) { - LASSERT(desc->bd_enc_iov[i].bv_page); + LASSERT(BD_GET_ENC_KIOV(desc, i).bv_page); LASSERT(g_idx != 0 || page_pools.epp_pools[p_idx]); LASSERT(!page_pools.epp_pools[p_idx][g_idx]); page_pools.epp_pools[p_idx][g_idx] = - desc->bd_enc_iov[i].bv_page; + BD_GET_ENC_KIOV(desc, i).bv_page; if (++g_idx == PAGES_PER_POOL) { p_idx++; @@ -345,8 +366,8 @@ void sptlrpc_enc_pool_put_pages(struct ptlrpc_bulk_desc *desc) spin_unlock(&page_pools.epp_lock); - kfree(desc->bd_enc_iov); - desc->bd_enc_iov = NULL; + kfree(GET_ENC_KIOV(desc)); + GET_ENC_KIOV(desc) = NULL; } static inline void enc_pools_alloc(void) @@ -404,6 +425,7 @@ int sptlrpc_enc_pool_init(void) page_pools.epp_st_lowfree = 0; page_pools.epp_st_max_wqlen = 0; page_pools.epp_st_max_wait = 0; + page_pools.epp_st_outofmem = 0; enc_pools_alloc(); if (!page_pools.epp_pools) @@ -431,13 +453,14 @@ void sptlrpc_enc_pool_fini(void) if (page_pools.epp_st_access > 0) { CDEBUG(D_SEC, - "max pages %lu, grows %u, grow fails %u, shrinks %u, access %lu, missing %lu, max qlen %u, max wait %ld/%ld\n", + "max pages %lu, grows %u, grow fails %u, shrinks %u, access %lu, missing %lu, max qlen %u, max wait %ld/%ld, out of mem %lu\n", page_pools.epp_st_max_pages, page_pools.epp_st_grows, page_pools.epp_st_grow_fails, page_pools.epp_st_shrinks, page_pools.epp_st_access, page_pools.epp_st_missings, page_pools.epp_st_max_wqlen, page_pools.epp_st_max_wait, - msecs_to_jiffies(MSEC_PER_SEC)); + msecs_to_jiffies(MSEC_PER_SEC), + page_pools.epp_st_outofmem); } } @@ -520,10 +543,11 @@ int sptlrpc_get_bulk_checksum(struct ptlrpc_bulk_desc *desc, __u8 alg, hashsize = cfs_crypto_hash_digestsize(cfs_hash_alg_id[alg]); for (i = 0; i < desc->bd_iov_count; i++) { - cfs_crypto_hash_update_page(hdesc, desc->bd_iov[i].bv_page, - desc->bd_iov[i].bv_offset & + cfs_crypto_hash_update_page(hdesc, + BD_GET_KIOV(desc, i).bv_page, + BD_GET_KIOV(desc, i).bv_offset & ~PAGE_MASK, - desc->bd_iov[i].bv_len); + BD_GET_KIOV(desc, i).bv_len); } if (hashsize > buflen) { diff --git a/drivers/staging/lustre/lustre/ptlrpc/sec_plain.c b/drivers/staging/lustre/lustre/ptlrpc/sec_plain.c index cd305bcb334a..c5e7a2309fce 100644 --- a/drivers/staging/lustre/lustre/ptlrpc/sec_plain.c +++ b/drivers/staging/lustre/lustre/ptlrpc/sec_plain.c @@ -153,14 +153,16 @@ static void corrupt_bulk_data(struct ptlrpc_bulk_desc *desc) char *ptr; unsigned int off, i; + LASSERT(ptlrpc_is_bulk_desc_kiov(desc->bd_type)); + for (i = 0; i < desc->bd_iov_count; i++) { - if (desc->bd_iov[i].bv_len == 0) + if (!BD_GET_KIOV(desc, i).bv_len) continue; - ptr = kmap(desc->bd_iov[i].bv_page); - off = desc->bd_iov[i].bv_offset & ~PAGE_MASK; + ptr = kmap(BD_GET_KIOV(desc, i).bv_page); + off = BD_GET_KIOV(desc, i).bv_offset & ~PAGE_MASK; ptr[off] ^= 0x1; - kunmap(desc->bd_iov[i].bv_page); + kunmap(BD_GET_KIOV(desc, i).bv_page); return; } } @@ -352,11 +354,11 @@ int plain_cli_unwrap_bulk(struct ptlrpc_cli_ctx *ctx, /* fix the actual data size */ for (i = 0, nob = 0; i < desc->bd_iov_count; i++) { - if (desc->bd_iov[i].bv_len + nob > desc->bd_nob_transferred) { - desc->bd_iov[i].bv_len = - desc->bd_nob_transferred - nob; - } - nob += desc->bd_iov[i].bv_len; + struct bio_vec bv_desc = BD_GET_KIOV(desc, i); + + if (bv_desc.bv_len + nob > desc->bd_nob_transferred) + bv_desc.bv_len = desc->bd_nob_transferred - nob; + nob += bv_desc.bv_len; } rc = plain_verify_bulk_csum(desc, req->rq_flvr.u_bulk.hash.hash_alg, diff --git a/drivers/staging/lustre/lustre/ptlrpc/service.c b/drivers/staging/lustre/lustre/ptlrpc/service.c index 72f39308eebb..70c70558e177 100644 --- a/drivers/staging/lustre/lustre/ptlrpc/service.c +++ b/drivers/staging/lustre/lustre/ptlrpc/service.c @@ -343,9 +343,9 @@ ptlrpc_server_nthreads_check(struct ptlrpc_service *svc, struct ptlrpc_service_conf *conf) { struct ptlrpc_service_thr_conf *tc = &conf->psc_thr; - unsigned init; - unsigned total; - unsigned nthrs; + unsigned int init; + unsigned int total; + unsigned int nthrs; int weight; /* @@ -2541,8 +2541,9 @@ int ptlrpc_hr_init(void) hrp->hrp_nthrs = cfs_cpt_weight(ptlrpc_hr.hr_cpt_table, i); hrp->hrp_nthrs /= weight; + if (hrp->hrp_nthrs == 0) + hrp->hrp_nthrs = 1; - LASSERT(hrp->hrp_nthrs > 0); hrp->hrp_thrs = kzalloc_node(hrp->hrp_nthrs * sizeof(*hrt), GFP_NOFS, cfs_cpt_spread_node(ptlrpc_hr.hr_cpt_table, diff --git a/drivers/staging/lustre/lustre/ptlrpc/wiretest.c b/drivers/staging/lustre/lustre/ptlrpc/wiretest.c index b05b1f935e4c..a04e36cf6dd4 100644 --- a/drivers/staging/lustre/lustre/ptlrpc/wiretest.c +++ b/drivers/staging/lustre/lustre/ptlrpc/wiretest.c @@ -195,49 +195,29 @@ void lustre_assert_wire_constants(void) LASSERTF(REINT_MAX == 10, "found %lld\n", (long long)REINT_MAX); LASSERTF(DISP_IT_EXECD == 0x00000001UL, "found 0x%.8xUL\n", - (unsigned)DISP_IT_EXECD); + (unsigned int)DISP_IT_EXECD); LASSERTF(DISP_LOOKUP_EXECD == 0x00000002UL, "found 0x%.8xUL\n", - (unsigned)DISP_LOOKUP_EXECD); + (unsigned int)DISP_LOOKUP_EXECD); LASSERTF(DISP_LOOKUP_NEG == 0x00000004UL, "found 0x%.8xUL\n", - (unsigned)DISP_LOOKUP_NEG); + (unsigned int)DISP_LOOKUP_NEG); LASSERTF(DISP_LOOKUP_POS == 0x00000008UL, "found 0x%.8xUL\n", - (unsigned)DISP_LOOKUP_POS); + (unsigned int)DISP_LOOKUP_POS); LASSERTF(DISP_OPEN_CREATE == 0x00000010UL, "found 0x%.8xUL\n", - (unsigned)DISP_OPEN_CREATE); + (unsigned int)DISP_OPEN_CREATE); LASSERTF(DISP_OPEN_OPEN == 0x00000020UL, "found 0x%.8xUL\n", - (unsigned)DISP_OPEN_OPEN); + (unsigned int)DISP_OPEN_OPEN); LASSERTF(DISP_ENQ_COMPLETE == 0x00400000UL, "found 0x%.8xUL\n", - (unsigned)DISP_ENQ_COMPLETE); + (unsigned int)DISP_ENQ_COMPLETE); LASSERTF(DISP_ENQ_OPEN_REF == 0x00800000UL, "found 0x%.8xUL\n", - (unsigned)DISP_ENQ_OPEN_REF); + (unsigned int)DISP_ENQ_OPEN_REF); LASSERTF(DISP_ENQ_CREATE_REF == 0x01000000UL, "found 0x%.8xUL\n", - (unsigned)DISP_ENQ_CREATE_REF); + (unsigned int)DISP_ENQ_CREATE_REF); LASSERTF(DISP_OPEN_LOCK == 0x02000000UL, "found 0x%.8xUL\n", - (unsigned)DISP_OPEN_LOCK); + (unsigned int)DISP_OPEN_LOCK); LASSERTF(MDS_STATUS_CONN == 1, "found %lld\n", (long long)MDS_STATUS_CONN); LASSERTF(MDS_STATUS_LOV == 2, "found %lld\n", (long long)MDS_STATUS_LOV); - LASSERTF(LUSTRE_BFLAG_UNCOMMITTED_WRITES == 1, "found %lld\n", - (long long)LUSTRE_BFLAG_UNCOMMITTED_WRITES); - LASSERTF(MF_SOM_CHANGE == 0x00000001UL, "found 0x%.8xUL\n", - (unsigned)MF_SOM_CHANGE); - LASSERTF(MF_EPOCH_OPEN == 0x00000002UL, "found 0x%.8xUL\n", - (unsigned)MF_EPOCH_OPEN); - LASSERTF(MF_EPOCH_CLOSE == 0x00000004UL, "found 0x%.8xUL\n", - (unsigned)MF_EPOCH_CLOSE); - LASSERTF(MF_MDC_CANCEL_FID1 == 0x00000008UL, "found 0x%.8xUL\n", - (unsigned)MF_MDC_CANCEL_FID1); - LASSERTF(MF_MDC_CANCEL_FID2 == 0x00000010UL, "found 0x%.8xUL\n", - (unsigned)MF_MDC_CANCEL_FID2); - LASSERTF(MF_MDC_CANCEL_FID3 == 0x00000020UL, "found 0x%.8xUL\n", - (unsigned)MF_MDC_CANCEL_FID3); - LASSERTF(MF_MDC_CANCEL_FID4 == 0x00000040UL, "found 0x%.8xUL\n", - (unsigned)MF_MDC_CANCEL_FID4); - LASSERTF(MF_SOM_AU == 0x00000080UL, "found 0x%.8xUL\n", - (unsigned)MF_SOM_AU); - LASSERTF(MF_GETATTR_LOCK == 0x00000100UL, "found 0x%.8xUL\n", - (unsigned)MF_GETATTR_LOCK); LASSERTF(MDS_ATTR_MODE == 0x0000000000000001ULL, "found 0x%.16llxULL\n", (long long)MDS_ATTR_MODE); LASSERTF(MDS_ATTR_UID == 0x0000000000000002ULL, "found 0x%.16llxULL\n", @@ -420,15 +400,13 @@ void lustre_assert_wire_constants(void) LASSERTF((int)sizeof(((struct lustre_mdt_attrs *)0)->lma_self_fid) == 16, "found %lld\n", (long long)(int)sizeof(((struct lustre_mdt_attrs *)0)->lma_self_fid)); LASSERTF(LMAI_RELEASED == 0x00000001UL, "found 0x%.8xUL\n", - (unsigned)LMAI_RELEASED); + (unsigned int)LMAI_RELEASED); LASSERTF(LMAC_HSM == 0x00000001UL, "found 0x%.8xUL\n", - (unsigned)LMAC_HSM); - LASSERTF(LMAC_SOM == 0x00000002UL, "found 0x%.8xUL\n", - (unsigned)LMAC_SOM); + (unsigned int)LMAC_HSM); LASSERTF(LMAC_NOT_IN_OI == 0x00000004UL, "found 0x%.8xUL\n", - (unsigned)LMAC_NOT_IN_OI); + (unsigned int)LMAC_NOT_IN_OI); LASSERTF(LMAC_FID_ON_OST == 0x00000008UL, "found 0x%.8xUL\n", - (unsigned)LMAC_FID_ON_OST); + (unsigned int)LMAC_FID_ON_OST); /* Checks for struct ost_id */ LASSERTF((int)sizeof(struct ost_id) == 16, "found %lld\n", @@ -478,11 +456,11 @@ void lustre_assert_wire_constants(void) LASSERTF(FID_SEQ_LOV_DEFAULT == 0xffffffffffffffffULL, "found 0x%.16llxULL\n", (long long)FID_SEQ_LOV_DEFAULT); LASSERTF(FID_OID_SPECIAL_BFL == 0x00000001UL, "found 0x%.8xUL\n", - (unsigned)FID_OID_SPECIAL_BFL); + (unsigned int)FID_OID_SPECIAL_BFL); LASSERTF(FID_OID_DOT_LUSTRE == 0x00000001UL, "found 0x%.8xUL\n", - (unsigned)FID_OID_DOT_LUSTRE); + (unsigned int)FID_OID_DOT_LUSTRE); LASSERTF(FID_OID_DOT_LUSTRE_OBF == 0x00000002UL, "found 0x%.8xUL\n", - (unsigned)FID_OID_DOT_LUSTRE_OBF); + (unsigned int)FID_OID_DOT_LUSTRE_OBF); /* Checks for struct lu_dirent */ LASSERTF((int)sizeof(struct lu_dirent) == 32, "found %lld\n", @@ -512,11 +490,11 @@ void lustre_assert_wire_constants(void) LASSERTF((int)sizeof(((struct lu_dirent *)0)->lde_name[0]) == 1, "found %lld\n", (long long)(int)sizeof(((struct lu_dirent *)0)->lde_name[0])); LASSERTF(LUDA_FID == 0x00000001UL, "found 0x%.8xUL\n", - (unsigned)LUDA_FID); + (unsigned int)LUDA_FID); LASSERTF(LUDA_TYPE == 0x00000002UL, "found 0x%.8xUL\n", - (unsigned)LUDA_TYPE); + (unsigned int)LUDA_TYPE); LASSERTF(LUDA_64BITHASH == 0x00000004UL, "found 0x%.8xUL\n", - (unsigned)LUDA_64BITHASH); + (unsigned int)LUDA_64BITHASH); /* Checks for struct luda_type */ LASSERTF((int)sizeof(struct luda_type) == 2, "found %lld\n", @@ -635,10 +613,18 @@ void lustre_assert_wire_constants(void) (long long)(int)offsetof(struct ptlrpc_body_v3, pb_last_xid)); LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_last_xid) == 8, "found %lld\n", (long long)(int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_last_xid)); - LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_last_seen) == 32, "found %lld\n", - (long long)(int)offsetof(struct ptlrpc_body_v3, pb_last_seen)); - LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_last_seen) == 8, "found %lld\n", - (long long)(int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_last_seen)); + LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_tag) == 32, "found %lld\n", + (long long)(int)offsetof(struct ptlrpc_body_v3, pb_tag)); + LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_tag) == 2, "found %lld\n", + (long long)(int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_tag)); + LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_padding0) == 34, "found %lld\n", + (long long)(int)offsetof(struct ptlrpc_body_v3, pb_padding0)); + LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_padding0) == 2, "found %lld\n", + (long long)(int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_padding0)); + LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_padding1) == 36, "found %lld\n", + (long long)(int)offsetof(struct ptlrpc_body_v3, pb_padding1)); + LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_padding1) == 4, "found %lld\n", + (long long)(int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_padding1)); LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_last_committed) == 40, "found %lld\n", (long long)(int)offsetof(struct ptlrpc_body_v3, pb_last_committed)); LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_last_committed) == 8, "found %lld\n", @@ -680,10 +666,22 @@ void lustre_assert_wire_constants(void) (long long)(int)offsetof(struct ptlrpc_body_v3, pb_pre_versions)); LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_pre_versions) == 32, "found %lld\n", (long long)(int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_pre_versions)); - LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_padding) == 120, "found %lld\n", - (long long)(int)offsetof(struct ptlrpc_body_v3, pb_padding)); - LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_padding) == 32, "found %lld\n", - (long long)(int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_padding)); + LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_mbits) == 120, "found %lld\n", + (long long)(int)offsetof(struct ptlrpc_body_v3, pb_mbits)); + LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_mbits) == 8, "found %lld\n", + (long long)(int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_mbits)); + LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_padding64_0) == 128, "found %lld\n", + (long long)(int)offsetof(struct ptlrpc_body_v3, pb_padding64_0)); + LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_padding64_0) == 8, "found %lld\n", + (long long)(int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_padding64_0)); + LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_padding64_1) == 136, "found %lld\n", + (long long)(int)offsetof(struct ptlrpc_body_v3, pb_padding64_1)); + LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_padding64_1) == 8, "found %lld\n", + (long long)(int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_padding64_1)); + LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_padding64_2) == 144, "found %lld\n", + (long long)(int)offsetof(struct ptlrpc_body_v3, pb_padding64_2)); + LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_padding64_2) == 8, "found %lld\n", + (long long)(int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_padding64_2)); CLASSERT(LUSTRE_JOBID_SIZE == 32); LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_jobid) == 152, "found %lld\n", (long long)(int)offsetof(struct ptlrpc_body_v3, pb_jobid)); @@ -713,10 +711,18 @@ void lustre_assert_wire_constants(void) (int)offsetof(struct ptlrpc_body_v3, pb_last_xid), (int)offsetof(struct ptlrpc_body_v2, pb_last_xid)); LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_last_xid) == (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_last_xid), "%d != %d\n", (int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_last_xid), (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_last_xid)); - LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_last_seen) == (int)offsetof(struct ptlrpc_body_v2, pb_last_seen), "%d != %d\n", - (int)offsetof(struct ptlrpc_body_v3, pb_last_seen), (int)offsetof(struct ptlrpc_body_v2, pb_last_seen)); - LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_last_seen) == (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_last_seen), "%d != %d\n", - (int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_last_seen), (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_last_seen)); + LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_tag) == (int)offsetof(struct ptlrpc_body_v2, pb_tag), "%d != %d\n", + (int)offsetof(struct ptlrpc_body_v3, pb_tag), (int)offsetof(struct ptlrpc_body_v2, pb_tag)); + LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_tag) == (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_tag), "%d != %d\n", + (int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_tag), (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_tag)); + LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_padding0) == (int)offsetof(struct ptlrpc_body_v2, pb_padding0), "%d != %d\n", + (int)offsetof(struct ptlrpc_body_v3, pb_padding0), (int)offsetof(struct ptlrpc_body_v2, pb_padding0)); + LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_padding0) == (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_padding0), "%d != %d\n", + (int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_padding0), (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_padding0)); + LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_padding1) == (int)offsetof(struct ptlrpc_body_v2, pb_padding1), "%d != %d\n", + (int)offsetof(struct ptlrpc_body_v3, pb_padding1), (int)offsetof(struct ptlrpc_body_v2, pb_padding1)); + LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_padding1) == (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_padding1), "%d != %d\n", + (int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_padding1), (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_padding1)); LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_last_committed) == (int)offsetof(struct ptlrpc_body_v2, pb_last_committed), "%d != %d\n", (int)offsetof(struct ptlrpc_body_v3, pb_last_committed), (int)offsetof(struct ptlrpc_body_v2, pb_last_committed)); LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_last_committed) == (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_last_committed), "%d != %d\n", @@ -757,10 +763,22 @@ void lustre_assert_wire_constants(void) (int)offsetof(struct ptlrpc_body_v3, pb_pre_versions), (int)offsetof(struct ptlrpc_body_v2, pb_pre_versions)); LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_pre_versions) == (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_pre_versions), "%d != %d\n", (int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_pre_versions), (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_pre_versions)); - LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_padding) == (int)offsetof(struct ptlrpc_body_v2, pb_padding), "%d != %d\n", - (int)offsetof(struct ptlrpc_body_v3, pb_padding), (int)offsetof(struct ptlrpc_body_v2, pb_padding)); - LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_padding) == (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_padding), "%d != %d\n", - (int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_padding), (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_padding)); + LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_mbits) == (int)offsetof(struct ptlrpc_body_v2, pb_mbits), "%d != %d\n", + (int)offsetof(struct ptlrpc_body_v3, pb_mbits), (int)offsetof(struct ptlrpc_body_v2, pb_mbits)); + LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_mbits) == (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_mbits), "%d != %d\n", + (int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_mbits), (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_mbits)); + LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_padding64_0) == (int)offsetof(struct ptlrpc_body_v2, pb_padding64_0), "%d != %d\n", + (int)offsetof(struct ptlrpc_body_v3, pb_padding64_0), (int)offsetof(struct ptlrpc_body_v2, pb_padding64_0)); + LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_padding64_0) == (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_padding64_0), "%d != %d\n", + (int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_padding64_0), (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_padding64_0)); + LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_padding64_1) == (int)offsetof(struct ptlrpc_body_v2, pb_padding64_1), "%d != %d\n", + (int)offsetof(struct ptlrpc_body_v3, pb_padding64_1), (int)offsetof(struct ptlrpc_body_v2, pb_padding64_1)); + LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_padding64_1) == (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_padding64_1), "%d != %d\n", + (int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_padding64_1), (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_padding64_1)); + LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_padding64_2) == (int)offsetof(struct ptlrpc_body_v2, pb_padding64_2), "%d != %d\n", + (int)offsetof(struct ptlrpc_body_v3, pb_padding64_2), (int)offsetof(struct ptlrpc_body_v2, pb_padding64_2)); + LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_padding64_2) == (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_padding64_2), "%d != %d\n", + (int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_padding64_2), (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_padding64_2)); LASSERTF(MSG_PTLRPC_BODY_OFF == 0, "found %lld\n", (long long)MSG_PTLRPC_BODY_OFF); LASSERTF(REQ_REC_OFF == 1, "found %lld\n", @@ -802,41 +820,41 @@ void lustre_assert_wire_constants(void) LASSERTF(MSGHDR_CKSUM_INCOMPAT18 == 2, "found %lld\n", (long long)MSGHDR_CKSUM_INCOMPAT18); LASSERTF(MSG_OP_FLAG_MASK == 0xffff0000UL, "found 0x%.8xUL\n", - (unsigned)MSG_OP_FLAG_MASK); + (unsigned int)MSG_OP_FLAG_MASK); LASSERTF(MSG_OP_FLAG_SHIFT == 16, "found %lld\n", (long long)MSG_OP_FLAG_SHIFT); LASSERTF(MSG_GEN_FLAG_MASK == 0x0000ffffUL, "found 0x%.8xUL\n", - (unsigned)MSG_GEN_FLAG_MASK); + (unsigned int)MSG_GEN_FLAG_MASK); LASSERTF(MSG_LAST_REPLAY == 0x00000001UL, "found 0x%.8xUL\n", - (unsigned)MSG_LAST_REPLAY); + (unsigned int)MSG_LAST_REPLAY); LASSERTF(MSG_RESENT == 0x00000002UL, "found 0x%.8xUL\n", - (unsigned)MSG_RESENT); + (unsigned int)MSG_RESENT); LASSERTF(MSG_REPLAY == 0x00000004UL, "found 0x%.8xUL\n", - (unsigned)MSG_REPLAY); + (unsigned int)MSG_REPLAY); LASSERTF(MSG_DELAY_REPLAY == 0x00000010UL, "found 0x%.8xUL\n", - (unsigned)MSG_DELAY_REPLAY); + (unsigned int)MSG_DELAY_REPLAY); LASSERTF(MSG_VERSION_REPLAY == 0x00000020UL, "found 0x%.8xUL\n", - (unsigned)MSG_VERSION_REPLAY); + (unsigned int)MSG_VERSION_REPLAY); LASSERTF(MSG_REQ_REPLAY_DONE == 0x00000040UL, "found 0x%.8xUL\n", - (unsigned)MSG_REQ_REPLAY_DONE); + (unsigned int)MSG_REQ_REPLAY_DONE); LASSERTF(MSG_LOCK_REPLAY_DONE == 0x00000080UL, "found 0x%.8xUL\n", - (unsigned)MSG_LOCK_REPLAY_DONE); + (unsigned int)MSG_LOCK_REPLAY_DONE); LASSERTF(MSG_CONNECT_RECOVERING == 0x00000001UL, "found 0x%.8xUL\n", - (unsigned)MSG_CONNECT_RECOVERING); + (unsigned int)MSG_CONNECT_RECOVERING); LASSERTF(MSG_CONNECT_RECONNECT == 0x00000002UL, "found 0x%.8xUL\n", - (unsigned)MSG_CONNECT_RECONNECT); + (unsigned int)MSG_CONNECT_RECONNECT); LASSERTF(MSG_CONNECT_REPLAYABLE == 0x00000004UL, "found 0x%.8xUL\n", - (unsigned)MSG_CONNECT_REPLAYABLE); + (unsigned int)MSG_CONNECT_REPLAYABLE); LASSERTF(MSG_CONNECT_LIBCLIENT == 0x00000010UL, "found 0x%.8xUL\n", - (unsigned)MSG_CONNECT_LIBCLIENT); + (unsigned int)MSG_CONNECT_LIBCLIENT); LASSERTF(MSG_CONNECT_INITIAL == 0x00000020UL, "found 0x%.8xUL\n", - (unsigned)MSG_CONNECT_INITIAL); + (unsigned int)MSG_CONNECT_INITIAL); LASSERTF(MSG_CONNECT_ASYNC == 0x00000040UL, "found 0x%.8xUL\n", - (unsigned)MSG_CONNECT_ASYNC); + (unsigned int)MSG_CONNECT_ASYNC); LASSERTF(MSG_CONNECT_NEXT_VER == 0x00000080UL, "found 0x%.8xUL\n", - (unsigned)MSG_CONNECT_NEXT_VER); + (unsigned int)MSG_CONNECT_NEXT_VER); LASSERTF(MSG_CONNECT_TRANSNO == 0x00000100UL, "found 0x%.8xUL\n", - (unsigned)MSG_CONNECT_TRANSNO); + (unsigned int)MSG_CONNECT_TRANSNO); /* Checks for struct obd_connect_data */ LASSERTF((int)sizeof(struct obd_connect_data) == 192, "found %lld\n", @@ -905,14 +923,22 @@ void lustre_assert_wire_constants(void) (long long)(int)offsetof(struct obd_connect_data, ocd_maxbytes)); LASSERTF((int)sizeof(((struct obd_connect_data *)0)->ocd_maxbytes) == 8, "found %lld\n", (long long)(int)sizeof(((struct obd_connect_data *)0)->ocd_maxbytes)); - LASSERTF((int)offsetof(struct obd_connect_data, padding1) == 72, "found %lld\n", + LASSERTF((int)offsetof(struct obd_connect_data, ocd_maxmodrpcs) == 72, "found %lld\n", + (long long)(int)offsetof(struct obd_connect_data, ocd_maxmodrpcs)); + LASSERTF((int)sizeof(((struct obd_connect_data *)0)->ocd_maxmodrpcs) == 2, "found %lld\n", + (long long)(int)sizeof(((struct obd_connect_data *)0)->ocd_maxmodrpcs)); + LASSERTF((int)offsetof(struct obd_connect_data, padding0) == 74, "found %lld\n", + (long long)(int)offsetof(struct obd_connect_data, padding0)); + LASSERTF((int)sizeof(((struct obd_connect_data *)0)->padding0) == 2, "found %lld\n", + (long long)(int)sizeof(((struct obd_connect_data *)0)->padding0)); + LASSERTF((int)offsetof(struct obd_connect_data, padding1) == 76, "found %lld\n", (long long)(int)offsetof(struct obd_connect_data, padding1)); - LASSERTF((int)sizeof(((struct obd_connect_data *)0)->padding1) == 8, "found %lld\n", + LASSERTF((int)sizeof(((struct obd_connect_data *)0)->padding1) == 4, "found %lld\n", (long long)(int)sizeof(((struct obd_connect_data *)0)->padding1)); - LASSERTF((int)offsetof(struct obd_connect_data, padding2) == 80, "found %lld\n", - (long long)(int)offsetof(struct obd_connect_data, padding2)); - LASSERTF((int)sizeof(((struct obd_connect_data *)0)->padding2) == 8, "found %lld\n", - (long long)(int)sizeof(((struct obd_connect_data *)0)->padding2)); + LASSERTF((int)offsetof(struct obd_connect_data, ocd_connect_flags2) == 80, "found %lld\n", + (long long)(int)offsetof(struct obd_connect_data, ocd_connect_flags2)); + LASSERTF((int)sizeof(((struct obd_connect_data *)0)->ocd_connect_flags2) == 8, "found %lld\n", + (long long)(int)sizeof(((struct obd_connect_data *)0)->ocd_connect_flags2)); LASSERTF((int)offsetof(struct obd_connect_data, padding3) == 88, "found %lld\n", (long long)(int)offsetof(struct obd_connect_data, padding3)); LASSERTF((int)sizeof(((struct obd_connect_data *)0)->padding3) == 8, "found %lld\n", @@ -1075,14 +1101,24 @@ void lustre_assert_wire_constants(void) OBD_CONNECT_LFSCK); LASSERTF(OBD_CONNECT_UNLINK_CLOSE == 0x100000000000000ULL, "found 0x%.16llxULL\n", OBD_CONNECT_UNLINK_CLOSE); + LASSERTF(OBD_CONNECT_MULTIMODRPCS == 0x200000000000000ULL, "found 0x%.16llxULL\n", + OBD_CONNECT_MULTIMODRPCS); LASSERTF(OBD_CONNECT_DIR_STRIPE == 0x400000000000000ULL, "found 0x%.16llxULL\n", OBD_CONNECT_DIR_STRIPE); + LASSERTF(OBD_CONNECT_SUBTREE == 0x800000000000000ULL, "found 0x%.16llxULL\n", + OBD_CONNECT_SUBTREE); + LASSERTF(OBD_CONNECT_LOCK_AHEAD == 0x1000000000000000ULL, "found 0x%.16llxULL\n", + OBD_CONNECT_LOCK_AHEAD); + LASSERTF(OBD_CONNECT_OBDOPACK == 0x4000000000000000ULL, "found 0x%.16llxULL\n", + OBD_CONNECT_OBDOPACK); + LASSERTF(OBD_CONNECT_FLAGS2 == 0x8000000000000000ULL, "found 0x%.16llxULL\n", + OBD_CONNECT_FLAGS2); LASSERTF(OBD_CKSUM_CRC32 == 0x00000001UL, "found 0x%.8xUL\n", - (unsigned)OBD_CKSUM_CRC32); + (unsigned int)OBD_CKSUM_CRC32); LASSERTF(OBD_CKSUM_ADLER == 0x00000002UL, "found 0x%.8xUL\n", - (unsigned)OBD_CKSUM_ADLER); + (unsigned int)OBD_CKSUM_ADLER); LASSERTF(OBD_CKSUM_CRC32C == 0x00000004UL, "found 0x%.8xUL\n", - (unsigned)OBD_CKSUM_CRC32C); + (unsigned int)OBD_CKSUM_CRC32C); /* Checks for struct obdo */ LASSERTF((int)sizeof(struct obdo) == 208, "found %lld\n", @@ -1239,8 +1275,6 @@ void lustre_assert_wire_constants(void) OBD_MD_FLCKSUM); LASSERTF(OBD_MD_FLQOS == (0x00200000ULL), "found 0x%.16llxULL\n", OBD_MD_FLQOS); - LASSERTF(OBD_MD_FLCOOKIE == (0x00800000ULL), "found 0x%.16llxULL\n", - OBD_MD_FLCOOKIE); LASSERTF(OBD_MD_FLGROUP == (0x01000000ULL), "found 0x%.16llxULL\n", OBD_MD_FLGROUP); LASSERTF(OBD_MD_FLFID == (0x02000000ULL), "found 0x%.16llxULL\n", @@ -1394,13 +1428,13 @@ void lustre_assert_wire_constants(void) (long long)(int)sizeof(((struct lov_mds_md_v3 *)0)->lmm_objects[0])); CLASSERT(LOV_MAGIC_V3 == (0x0BD30000 | 0x0BD0)); LASSERTF(LOV_PATTERN_RAID0 == 0x00000001UL, "found 0x%.8xUL\n", - (unsigned)LOV_PATTERN_RAID0); + (unsigned int)LOV_PATTERN_RAID0); LASSERTF(LOV_PATTERN_RAID1 == 0x00000002UL, "found 0x%.8xUL\n", - (unsigned)LOV_PATTERN_RAID1); + (unsigned int)LOV_PATTERN_RAID1); LASSERTF(LOV_PATTERN_FIRST == 0x00000100UL, "found 0x%.8xUL\n", - (unsigned)LOV_PATTERN_FIRST); + (unsigned int)LOV_PATTERN_FIRST); LASSERTF(LOV_PATTERN_CMOBD == 0x00000200UL, "found 0x%.8xUL\n", - (unsigned)LOV_PATTERN_CMOBD); + (unsigned int)LOV_PATTERN_CMOBD); /* Checks for struct lmv_mds_md_v1 */ LASSERTF((int)sizeof(struct lmv_mds_md_v1) == 56, "found %lld\n", @@ -1542,6 +1576,8 @@ void lustre_assert_wire_constants(void) (long long)(int)offsetof(struct obd_ioobj, ioo_bufcnt)); LASSERTF((int)sizeof(((struct obd_ioobj *)0)->ioo_bufcnt) == 4, "found %lld\n", (long long)(int)sizeof(((struct obd_ioobj *)0)->ioo_bufcnt)); + LASSERTF(IOOBJ_MAX_BRW_BITS == 16, "found %lld\n", + (long long)IOOBJ_MAX_BRW_BITS); /* Checks for union lquota_id */ LASSERTF((int)sizeof(union lquota_id) == 16, "found %lld\n", @@ -1817,10 +1853,10 @@ void lustre_assert_wire_constants(void) (long long)(int)offsetof(struct mdt_body, mbo_max_mdsize)); LASSERTF((int)sizeof(((struct mdt_body *)0)->mbo_max_mdsize) == 4, "found %lld\n", (long long)(int)sizeof(((struct mdt_body *)0)->mbo_max_mdsize)); - LASSERTF((int)offsetof(struct mdt_body, mbo_max_cookiesize) == 160, "found %lld\n", - (long long)(int)offsetof(struct mdt_body, mbo_max_cookiesize)); - LASSERTF((int)sizeof(((struct mdt_body *)0)->mbo_max_cookiesize) == 4, "found %lld\n", - (long long)(int)sizeof(((struct mdt_body *)0)->mbo_max_cookiesize)); + LASSERTF((int)offsetof(struct mdt_body, mbo_unused3) == 160, "found %lld\n", + (long long)(int)offsetof(struct mdt_body, mbo_unused3)); + LASSERTF((int)sizeof(((struct mdt_body *)0)->mbo_unused3) == 4, "found %lld\n", + (long long)(int)sizeof(((struct mdt_body *)0)->mbo_unused3)); LASSERTF((int)offsetof(struct mdt_body, mbo_uid_h) == 164, "found %lld\n", (long long)(int)offsetof(struct mdt_body, mbo_uid_h)); LASSERTF((int)sizeof(((struct mdt_body *)0)->mbo_uid_h) == 4, "found %lld\n", @@ -1857,12 +1893,6 @@ void lustre_assert_wire_constants(void) MDS_FMODE_CLOSED); LASSERTF(MDS_FMODE_EXEC == 000000000004UL, "found 0%.11oUL\n", MDS_FMODE_EXEC); - LASSERTF(MDS_FMODE_EPOCH == 000001000000UL, "found 0%.11oUL\n", - MDS_FMODE_EPOCH); - LASSERTF(MDS_FMODE_TRUNC == 000002000000UL, "found 0%.11oUL\n", - MDS_FMODE_TRUNC); - LASSERTF(MDS_FMODE_SOM == 000004000000UL, "found 0%.11oUL\n", - MDS_FMODE_SOM); LASSERTF(MDS_OPEN_CREATED == 000000000010UL, "found 0%.11oUL\n", MDS_OPEN_CREATED); LASSERTF(MDS_OPEN_CROSS == 000000000020UL, "found 0%.11oUL\n", @@ -1905,10 +1935,20 @@ void lustre_assert_wire_constants(void) LUSTRE_IMMUTABLE_FL); LASSERTF(LUSTRE_APPEND_FL == 0x00000020, "found 0x%.8x\n", LUSTRE_APPEND_FL); + LASSERTF(LUSTRE_NODUMP_FL == 0x00000040, "found 0x%.8x\n", + LUSTRE_NODUMP_FL); LASSERTF(LUSTRE_NOATIME_FL == 0x00000080, "found 0x%.8x\n", LUSTRE_NOATIME_FL); + LASSERTF(LUSTRE_INDEX_FL == 0x00001000, "found 0x%.8x\n", + LUSTRE_INDEX_FL); LASSERTF(LUSTRE_DIRSYNC_FL == 0x00010000, "found 0x%.8x\n", LUSTRE_DIRSYNC_FL); + LASSERTF(LUSTRE_TOPDIR_FL == 0x00020000, "found 0x%.8x\n", + LUSTRE_TOPDIR_FL); + LASSERTF(LUSTRE_DIRECTIO_FL == 0x00100000, "found 0x%.8x\n", + LUSTRE_DIRECTIO_FL); + LASSERTF(LUSTRE_INLINE_DATA_FL == 0x10000000, "found 0x%.8x\n", + LUSTRE_INLINE_DATA_FL); LASSERTF(MDS_INODELOCK_LOOKUP == 0x000001, "found 0x%.8x\n", MDS_INODELOCK_LOOKUP); LASSERTF(MDS_INODELOCK_UPDATE == 0x000002, "found 0x%.8x\n", @@ -1921,22 +1961,22 @@ void lustre_assert_wire_constants(void) /* Checks for struct mdt_ioepoch */ LASSERTF((int)sizeof(struct mdt_ioepoch) == 24, "found %lld\n", (long long)(int)sizeof(struct mdt_ioepoch)); - LASSERTF((int)offsetof(struct mdt_ioepoch, handle) == 0, "found %lld\n", - (long long)(int)offsetof(struct mdt_ioepoch, handle)); - LASSERTF((int)sizeof(((struct mdt_ioepoch *)0)->handle) == 8, "found %lld\n", - (long long)(int)sizeof(((struct mdt_ioepoch *)0)->handle)); - LASSERTF((int)offsetof(struct mdt_ioepoch, ioepoch) == 8, "found %lld\n", - (long long)(int)offsetof(struct mdt_ioepoch, ioepoch)); - LASSERTF((int)sizeof(((struct mdt_ioepoch *)0)->ioepoch) == 8, "found %lld\n", - (long long)(int)sizeof(((struct mdt_ioepoch *)0)->ioepoch)); - LASSERTF((int)offsetof(struct mdt_ioepoch, flags) == 16, "found %lld\n", - (long long)(int)offsetof(struct mdt_ioepoch, flags)); - LASSERTF((int)sizeof(((struct mdt_ioepoch *)0)->flags) == 4, "found %lld\n", - (long long)(int)sizeof(((struct mdt_ioepoch *)0)->flags)); - LASSERTF((int)offsetof(struct mdt_ioepoch, padding) == 20, "found %lld\n", - (long long)(int)offsetof(struct mdt_ioepoch, padding)); - LASSERTF((int)sizeof(((struct mdt_ioepoch *)0)->padding) == 4, "found %lld\n", - (long long)(int)sizeof(((struct mdt_ioepoch *)0)->padding)); + LASSERTF((int)offsetof(struct mdt_ioepoch, mio_handle) == 0, "found %lld\n", + (long long)(int)offsetof(struct mdt_ioepoch, mio_handle)); + LASSERTF((int)sizeof(((struct mdt_ioepoch *)0)->mio_handle) == 8, "found %lld\n", + (long long)(int)sizeof(((struct mdt_ioepoch *)0)->mio_handle)); + LASSERTF((int)offsetof(struct mdt_ioepoch, mio_unused1) == 8, "found %lld\n", + (long long)(int)offsetof(struct mdt_ioepoch, mio_unused1)); + LASSERTF((int)sizeof(((struct mdt_ioepoch *)0)->mio_unused1) == 8, "found %lld\n", + (long long)(int)sizeof(((struct mdt_ioepoch *)0)->mio_unused1)); + LASSERTF((int)offsetof(struct mdt_ioepoch, mio_unused2) == 16, "found %lld\n", + (long long)(int)offsetof(struct mdt_ioepoch, mio_unused2)); + LASSERTF((int)sizeof(((struct mdt_ioepoch *)0)->mio_unused2) == 4, "found %lld\n", + (long long)(int)sizeof(((struct mdt_ioepoch *)0)->mio_unused2)); + LASSERTF((int)offsetof(struct mdt_ioepoch, mio_padding) == 20, "found %lld\n", + (long long)(int)offsetof(struct mdt_ioepoch, mio_padding)); + LASSERTF((int)sizeof(((struct mdt_ioepoch *)0)->mio_padding) == 4, "found %lld\n", + (long long)(int)sizeof(((struct mdt_ioepoch *)0)->mio_padding)); /* Checks for struct mdt_rec_setattr */ LASSERTF((int)sizeof(struct mdt_rec_setattr) == 136, "found %lld\n", @@ -3520,21 +3560,21 @@ void lustre_assert_wire_constants(void) LASSERTF((int)sizeof(((struct llogd_conn_body *)0)->lgdc_ctxt_idx) == 4, "found %lld\n", (long long)(int)sizeof(((struct llogd_conn_body *)0)->lgdc_ctxt_idx)); - /* Checks for struct ll_fiemap_info_key */ + /* Checks for struct fiemap_info_key */ LASSERTF((int)sizeof(struct ll_fiemap_info_key) == 248, "found %lld\n", (long long)(int)sizeof(struct ll_fiemap_info_key)); - LASSERTF((int)offsetof(struct ll_fiemap_info_key, name[8]) == 8, "found %lld\n", - (long long)(int)offsetof(struct ll_fiemap_info_key, name[8])); - LASSERTF((int)sizeof(((struct ll_fiemap_info_key *)0)->name[8]) == 1, "found %lld\n", - (long long)(int)sizeof(((struct ll_fiemap_info_key *)0)->name[8])); - LASSERTF((int)offsetof(struct ll_fiemap_info_key, oa) == 8, "found %lld\n", - (long long)(int)offsetof(struct ll_fiemap_info_key, oa)); - LASSERTF((int)sizeof(((struct ll_fiemap_info_key *)0)->oa) == 208, "found %lld\n", - (long long)(int)sizeof(((struct ll_fiemap_info_key *)0)->oa)); - LASSERTF((int)offsetof(struct ll_fiemap_info_key, fiemap) == 216, "found %lld\n", - (long long)(int)offsetof(struct ll_fiemap_info_key, fiemap)); - LASSERTF((int)sizeof(((struct ll_fiemap_info_key *)0)->fiemap) == 32, "found %lld\n", - (long long)(int)sizeof(((struct ll_fiemap_info_key *)0)->fiemap)); + LASSERTF((int)offsetof(struct ll_fiemap_info_key, lfik_name[8]) == 8, "found %lld\n", + (long long)(int)offsetof(struct ll_fiemap_info_key, lfik_name[8])); + LASSERTF((int)sizeof(((struct ll_fiemap_info_key *)0)->lfik_name[8]) == 1, "found %lld\n", + (long long)(int)sizeof(((struct ll_fiemap_info_key *)0)->lfik_name[8])); + LASSERTF((int)offsetof(struct ll_fiemap_info_key, lfik_oa) == 8, "found %lld\n", + (long long)(int)offsetof(struct ll_fiemap_info_key, lfik_oa)); + LASSERTF((int)sizeof(((struct ll_fiemap_info_key *)0)->lfik_oa) == 208, "found %lld\n", + (long long)(int)sizeof(((struct ll_fiemap_info_key *)0)->lfik_oa)); + LASSERTF((int)offsetof(struct ll_fiemap_info_key, lfik_fiemap) == 216, "found %lld\n", + (long long)(int)offsetof(struct ll_fiemap_info_key, lfik_fiemap)); + LASSERTF((int)sizeof(((struct ll_fiemap_info_key *)0)->lfik_fiemap) == 32, "found %lld\n", + (long long)(int)sizeof(((struct ll_fiemap_info_key *)0)->lfik_fiemap)); /* Checks for struct mgs_target_info */ LASSERTF((int)sizeof(struct mgs_target_info) == 4544, "found %lld\n", @@ -3670,64 +3710,64 @@ void lustre_assert_wire_constants(void) LASSERTF((int)sizeof(((struct getinfo_fid2path *)0)->gf_path[0]) == 1, "found %lld\n", (long long)(int)sizeof(((struct getinfo_fid2path *)0)->gf_path[0])); - /* Checks for struct ll_user_fiemap */ - LASSERTF((int)sizeof(struct ll_user_fiemap) == 32, "found %lld\n", - (long long)(int)sizeof(struct ll_user_fiemap)); - LASSERTF((int)offsetof(struct ll_user_fiemap, fm_start) == 0, "found %lld\n", - (long long)(int)offsetof(struct ll_user_fiemap, fm_start)); - LASSERTF((int)sizeof(((struct ll_user_fiemap *)0)->fm_start) == 8, "found %lld\n", - (long long)(int)sizeof(((struct ll_user_fiemap *)0)->fm_start)); - LASSERTF((int)offsetof(struct ll_user_fiemap, fm_length) == 8, "found %lld\n", - (long long)(int)offsetof(struct ll_user_fiemap, fm_length)); - LASSERTF((int)sizeof(((struct ll_user_fiemap *)0)->fm_length) == 8, "found %lld\n", - (long long)(int)sizeof(((struct ll_user_fiemap *)0)->fm_length)); - LASSERTF((int)offsetof(struct ll_user_fiemap, fm_flags) == 16, "found %lld\n", - (long long)(int)offsetof(struct ll_user_fiemap, fm_flags)); - LASSERTF((int)sizeof(((struct ll_user_fiemap *)0)->fm_flags) == 4, "found %lld\n", - (long long)(int)sizeof(((struct ll_user_fiemap *)0)->fm_flags)); - LASSERTF((int)offsetof(struct ll_user_fiemap, fm_mapped_extents) == 20, "found %lld\n", - (long long)(int)offsetof(struct ll_user_fiemap, fm_mapped_extents)); - LASSERTF((int)sizeof(((struct ll_user_fiemap *)0)->fm_mapped_extents) == 4, "found %lld\n", - (long long)(int)sizeof(((struct ll_user_fiemap *)0)->fm_mapped_extents)); - LASSERTF((int)offsetof(struct ll_user_fiemap, fm_extent_count) == 24, "found %lld\n", - (long long)(int)offsetof(struct ll_user_fiemap, fm_extent_count)); - LASSERTF((int)sizeof(((struct ll_user_fiemap *)0)->fm_extent_count) == 4, "found %lld\n", - (long long)(int)sizeof(((struct ll_user_fiemap *)0)->fm_extent_count)); - LASSERTF((int)offsetof(struct ll_user_fiemap, fm_reserved) == 28, "found %lld\n", - (long long)(int)offsetof(struct ll_user_fiemap, fm_reserved)); - LASSERTF((int)sizeof(((struct ll_user_fiemap *)0)->fm_reserved) == 4, "found %lld\n", - (long long)(int)sizeof(((struct ll_user_fiemap *)0)->fm_reserved)); - LASSERTF((int)offsetof(struct ll_user_fiemap, fm_extents) == 32, "found %lld\n", - (long long)(int)offsetof(struct ll_user_fiemap, fm_extents)); - LASSERTF((int)sizeof(((struct ll_user_fiemap *)0)->fm_extents) == 0, "found %lld\n", - (long long)(int)sizeof(((struct ll_user_fiemap *)0)->fm_extents)); + /* Checks for struct fiemap */ + LASSERTF((int)sizeof(struct fiemap) == 32, "found %lld\n", + (long long)(int)sizeof(struct fiemap)); + LASSERTF((int)offsetof(struct fiemap, fm_start) == 0, "found %lld\n", + (long long)(int)offsetof(struct fiemap, fm_start)); + LASSERTF((int)sizeof(((struct fiemap *)0)->fm_start) == 8, "found %lld\n", + (long long)(int)sizeof(((struct fiemap *)0)->fm_start)); + LASSERTF((int)offsetof(struct fiemap, fm_length) == 8, "found %lld\n", + (long long)(int)offsetof(struct fiemap, fm_length)); + LASSERTF((int)sizeof(((struct fiemap *)0)->fm_length) == 8, "found %lld\n", + (long long)(int)sizeof(((struct fiemap *)0)->fm_length)); + LASSERTF((int)offsetof(struct fiemap, fm_flags) == 16, "found %lld\n", + (long long)(int)offsetof(struct fiemap, fm_flags)); + LASSERTF((int)sizeof(((struct fiemap *)0)->fm_flags) == 4, "found %lld\n", + (long long)(int)sizeof(((struct fiemap *)0)->fm_flags)); + LASSERTF((int)offsetof(struct fiemap, fm_mapped_extents) == 20, "found %lld\n", + (long long)(int)offsetof(struct fiemap, fm_mapped_extents)); + LASSERTF((int)sizeof(((struct fiemap *)0)->fm_mapped_extents) == 4, "found %lld\n", + (long long)(int)sizeof(((struct fiemap *)0)->fm_mapped_extents)); + LASSERTF((int)offsetof(struct fiemap, fm_extent_count) == 24, "found %lld\n", + (long long)(int)offsetof(struct fiemap, fm_extent_count)); + LASSERTF((int)sizeof(((struct fiemap *)0)->fm_extent_count) == 4, "found %lld\n", + (long long)(int)sizeof(((struct fiemap *)0)->fm_extent_count)); + LASSERTF((int)offsetof(struct fiemap, fm_reserved) == 28, "found %lld\n", + (long long)(int)offsetof(struct fiemap, fm_reserved)); + LASSERTF((int)sizeof(((struct fiemap *)0)->fm_reserved) == 4, "found %lld\n", + (long long)(int)sizeof(((struct fiemap *)0)->fm_reserved)); + LASSERTF((int)offsetof(struct fiemap, fm_extents) == 32, "found %lld\n", + (long long)(int)offsetof(struct fiemap, fm_extents)); + LASSERTF((int)sizeof(((struct fiemap *)0)->fm_extents) == 0, "found %lld\n", + (long long)(int)sizeof(((struct fiemap *)0)->fm_extents)); CLASSERT(FIEMAP_FLAG_SYNC == 0x00000001); CLASSERT(FIEMAP_FLAG_XATTR == 0x00000002); CLASSERT(FIEMAP_FLAG_DEVICE_ORDER == 0x40000000); - /* Checks for struct ll_fiemap_extent */ - LASSERTF((int)sizeof(struct ll_fiemap_extent) == 56, "found %lld\n", - (long long)(int)sizeof(struct ll_fiemap_extent)); - LASSERTF((int)offsetof(struct ll_fiemap_extent, fe_logical) == 0, "found %lld\n", - (long long)(int)offsetof(struct ll_fiemap_extent, fe_logical)); - LASSERTF((int)sizeof(((struct ll_fiemap_extent *)0)->fe_logical) == 8, "found %lld\n", - (long long)(int)sizeof(((struct ll_fiemap_extent *)0)->fe_logical)); - LASSERTF((int)offsetof(struct ll_fiemap_extent, fe_physical) == 8, "found %lld\n", - (long long)(int)offsetof(struct ll_fiemap_extent, fe_physical)); - LASSERTF((int)sizeof(((struct ll_fiemap_extent *)0)->fe_physical) == 8, "found %lld\n", - (long long)(int)sizeof(((struct ll_fiemap_extent *)0)->fe_physical)); - LASSERTF((int)offsetof(struct ll_fiemap_extent, fe_length) == 16, "found %lld\n", - (long long)(int)offsetof(struct ll_fiemap_extent, fe_length)); - LASSERTF((int)sizeof(((struct ll_fiemap_extent *)0)->fe_length) == 8, "found %lld\n", - (long long)(int)sizeof(((struct ll_fiemap_extent *)0)->fe_length)); - LASSERTF((int)offsetof(struct ll_fiemap_extent, fe_flags) == 40, "found %lld\n", - (long long)(int)offsetof(struct ll_fiemap_extent, fe_flags)); - LASSERTF((int)sizeof(((struct ll_fiemap_extent *)0)->fe_flags) == 4, "found %lld\n", - (long long)(int)sizeof(((struct ll_fiemap_extent *)0)->fe_flags)); - LASSERTF((int)offsetof(struct ll_fiemap_extent, fe_device) == 44, "found %lld\n", - (long long)(int)offsetof(struct ll_fiemap_extent, fe_device)); - LASSERTF((int)sizeof(((struct ll_fiemap_extent *)0)->fe_device) == 4, "found %lld\n", - (long long)(int)sizeof(((struct ll_fiemap_extent *)0)->fe_device)); + /* Checks for struct fiemap_extent */ + LASSERTF((int)sizeof(struct fiemap_extent) == 56, "found %lld\n", + (long long)(int)sizeof(struct fiemap_extent)); + LASSERTF((int)offsetof(struct fiemap_extent, fe_logical) == 0, "found %lld\n", + (long long)(int)offsetof(struct fiemap_extent, fe_logical)); + LASSERTF((int)sizeof(((struct fiemap_extent *)0)->fe_logical) == 8, "found %lld\n", + (long long)(int)sizeof(((struct fiemap_extent *)0)->fe_logical)); + LASSERTF((int)offsetof(struct fiemap_extent, fe_physical) == 8, "found %lld\n", + (long long)(int)offsetof(struct fiemap_extent, fe_physical)); + LASSERTF((int)sizeof(((struct fiemap_extent *)0)->fe_physical) == 8, "found %lld\n", + (long long)(int)sizeof(((struct fiemap_extent *)0)->fe_physical)); + LASSERTF((int)offsetof(struct fiemap_extent, fe_length) == 16, "found %lld\n", + (long long)(int)offsetof(struct fiemap_extent, fe_length)); + LASSERTF((int)sizeof(((struct fiemap_extent *)0)->fe_length) == 8, "found %lld\n", + (long long)(int)sizeof(((struct fiemap_extent *)0)->fe_length)); + LASSERTF((int)offsetof(struct fiemap_extent, fe_flags) == 40, "found %lld\n", + (long long)(int)offsetof(struct fiemap_extent, fe_flags)); + LASSERTF((int)sizeof(((struct fiemap_extent *)0)->fe_flags) == 4, "found %lld\n", + (long long)(int)sizeof(((struct fiemap_extent *)0)->fe_flags)); + LASSERTF((int)offsetof(struct fiemap_extent, fe_reserved[0]) == 44, "found %lld\n", + (long long)(int)offsetof(struct fiemap_extent, fe_reserved[0])); + LASSERTF((int)sizeof(((struct fiemap_extent *)0)->fe_reserved[0]) == 4, "found %lld\n", + (long long)(int)sizeof(((struct fiemap_extent *)0)->fe_reserved[0])); CLASSERT(FIEMAP_EXTENT_LAST == 0x00000001); CLASSERT(FIEMAP_EXTENT_UNKNOWN == 0x00000002); CLASSERT(FIEMAP_EXTENT_DELALLOC == 0x00000004); @@ -4093,9 +4133,9 @@ void lustre_assert_wire_constants(void) LASSERTF((int)sizeof(((struct hsm_request *)0)->hr_data_len) == 4, "found %lld\n", (long long)(int)sizeof(((struct hsm_request *)0)->hr_data_len)); LASSERTF(HSM_FORCE_ACTION == 0x00000001UL, "found 0x%.8xUL\n", - (unsigned)HSM_FORCE_ACTION); + (unsigned int)HSM_FORCE_ACTION); LASSERTF(HSM_GHOST_COPY == 0x00000002UL, "found 0x%.8xUL\n", - (unsigned)HSM_GHOST_COPY); + (unsigned int)HSM_GHOST_COPY); /* Checks for struct hsm_user_request */ LASSERTF((int)sizeof(struct hsm_user_request) == 24, "found %lld\n", diff --git a/drivers/staging/lustre/sysfs-fs-lustre b/drivers/staging/lustre/sysfs-fs-lustre index 20206ba965af..8691c6543a9c 100644 --- a/drivers/staging/lustre/sysfs-fs-lustre +++ b/drivers/staging/lustre/sysfs-fs-lustre @@ -11,7 +11,7 @@ Description: Shows if the lustre module has pinger support. "on" means yes and "off" means no. -What: /sys/fs/lustre/health +What: /sys/fs/lustre/health_check Date: May 2015 Contact: "Oleg Drokin" <oleg.drokin@intel.com> Description: |