diff options
Diffstat (limited to 'drivers/staging/lustre')
118 files changed, 1291 insertions, 2124 deletions
diff --git a/drivers/staging/lustre/TODO b/drivers/staging/lustre/TODO index f194417d0af7..94446487748a 100644 --- a/drivers/staging/lustre/TODO +++ b/drivers/staging/lustre/TODO @@ -1,12 +1,302 @@ -* Possible remaining coding style fix. -* Remove deadcode. -* Separate client/server functionality. Functions only used by server can be - removed from client. -* Clean up libcfs layer. Ideally we can remove include/linux/libcfs entirely. -* Clean up CLIO layer. Lustre client readahead/writeback control needs to better - suit kernel providings. -* Add documents in Documentation. -* Other minor misc cleanups... +Currently all the work directed toward the lustre upstream client is tracked +at the following link: + +https://jira.hpdd.intel.com/browse/LU-9679 + +Under this ticket you will see the following work items that need to be +addressed: + +****************************************************************************** +* libcfs cleanup +* +* https://jira.hpdd.intel.com/browse/LU-9859 +* +* Track all the cleanups and simplification of the libcfs module. Remove +* functions the kernel provides. Possible intergrate some of the functionality +* into the kernel proper. +* +****************************************************************************** + +https://jira.hpdd.intel.com/browse/LU-100086 + +LNET_MINOR conflicts with USERIO_MINOR + +------------------------------------------------------------------------------ + +https://jira.hpdd.intel.com/browse/LU-8130 + +Fix and simplify libcfs hash handling + +------------------------------------------------------------------------------ + +https://jira.hpdd.intel.com/browse/LU-8703 + +The current way we handle SMP is wrong. Platforms like ARM and KNL can have +core and NUMA setups with things like NUMA nodes with no cores. We need to +handle such cases. This work also greatly simplified the lustre SMP code. + +------------------------------------------------------------------------------ + +https://jira.hpdd.intel.com/browse/LU-9019 + +Replace libcfs time API with standard kernel APIs. Also migrate away from +jiffies. We found jiffies can vary on nodes which can lead to corner cases +that can break the file system due to nodes having inconsistent behavior. +So move to time64_t and ktime_t as much as possible. + +****************************************************************************** +* Proper IB support for ko2iblnd +****************************************************************************** +https://jira.hpdd.intel.com/browse/LU-9179 + +Poor performance for the ko2iblnd driver. This is related to many of the +patches below that are missing from the linux client. +------------------------------------------------------------------------------ + +https://jira.hpdd.intel.com/browse/LU-9886 + +Crash in upstream kiblnd_handle_early_rxs() +------------------------------------------------------------------------------ + +https://jira.hpdd.intel.com/browse/LU-10394 / LU-10526 / LU-10089 + +Default to default to using MEM_REG +------------------------------------------------------------------------------ + +https://jira.hpdd.intel.com/browse/LU-10459 + +throttle tx based on queue depth +------------------------------------------------------------------------------ + +https://jira.hpdd.intel.com/browse/LU-9943 + +correct WR fast reg accounting +------------------------------------------------------------------------------ + +https://jira.hpdd.intel.com/browse/LU-10291 + +remove concurrent_sends tunable +------------------------------------------------------------------------------ + +https://jira.hpdd.intel.com/browse/LU-10213 + +calculate qp max_send_wrs properly +------------------------------------------------------------------------------ + +https://jira.hpdd.intel.com/browse/LU-9810 + +use less CQ entries for each connection +------------------------------------------------------------------------------ + +https://jira.hpdd.intel.com/browse/LU-10129 / LU-9180 + +rework map_on_demand behavior +------------------------------------------------------------------------------ + +https://jira.hpdd.intel.com/browse/LU-10129 + +query device capabilities +------------------------------------------------------------------------------ + +https://jira.hpdd.intel.com/browse/LU-10015 + +fix race at kiblnd_connect_peer +------------------------------------------------------------------------------ + +https://jira.hpdd.intel.com/browse/LU-9983 + +allow for discontiguous fragments +------------------------------------------------------------------------------ + +https://jira.hpdd.intel.com/browse/LU-9500 + +Don't Page Align remote_addr with FastReg +------------------------------------------------------------------------------ + +https://jira.hpdd.intel.com/browse/LU-9448 + +handle empty CPTs +------------------------------------------------------------------------------ + +https://jira.hpdd.intel.com/browse/LU-9507 + +Don't Assert On Reconnect with MultiQP +------------------------------------------------------------------------------ + +https://jira.hpdd.intel.com/browse/LU-9472 + +Fix FastReg map/unmap for MLX5 +------------------------------------------------------------------------------ + +https://jira.hpdd.intel.com/browse/LU-9425 + +Turn on 2 sges by default +------------------------------------------------------------------------------ + +https://jira.hpdd.intel.com/browse/LU-8943 + +Enable Multiple OPA Endpoints between Nodes +------------------------------------------------------------------------------ + +https://jira.hpdd.intel.com/browse/LU-5718 + +multiple sges for work request +------------------------------------------------------------------------------ + +https://jira.hpdd.intel.com/browse/LU-9094 + +kill timedout txs from ibp_tx_queue +------------------------------------------------------------------------------ + +https://jira.hpdd.intel.com/browse/LU-9094 + +reconnect peer for REJ_INVALID_SERVICE_ID +------------------------------------------------------------------------------ + +https://jira.hpdd.intel.com/browse/LU-8752 + +Stop MLX5 triggering a dump_cqe +------------------------------------------------------------------------------ + +https://jira.hpdd.intel.com/browse/LU-8874 + +Move ko2iblnd to latest RDMA changes +------------------------------------------------------------------------------ + +https://jira.hpdd.intel.com/browse/LU-8875 / LU-8874 + +Change to new RDMA done callback mechanism + +------------------------------------------------------------------------------ + +https://jira.hpdd.intel.com/browse/LU-9164 / LU-8874 + +Incorporate RDMA map/unamp API's into ko2iblnd + +****************************************************************************** +* sysfs/debugfs fixes +* +* https://jira.hpdd.intel.com/browse/LU-8066 +* +* The original migration to sysfs was done in haste without properly working +* utilities to test the changes. This covers the work to restore the proper +* behavior. Huge project to make this right. +* +****************************************************************************** + +https://jira.hpdd.intel.com/browse/LU-9431 + +The function class_process_proc_param was used for our mass updates of proc +tunables. It didn't work with sysfs and it was just ugly so it was removed. +In the process the ability to mass update thousands of clients was lost. This +work restores this in a sane way. + +------------------------------------------------------------------------------ +https://jira.hpdd.intel.com/browse/LU-9091 + +One the major request of users is the ability to pass in parameters into a +sysfs file in various different units. For example we can set max_pages_per_rpc +but this can vary on platforms due to different platform sizes. So you can +set this like max_pages_per_rpc=16MiB. The original code to handle this written +before the string helpers were created so the code doesn't follow that format +but it would be easy to move to. Currently the string helpers does the reverse +of what we need, changing bytes to string. We need to change a string to bytes. + +****************************************************************************** +* Proper user land to kernel space interface for Lustre +* +* https://jira.hpdd.intel.com/browse/LU-9680 +* +****************************************************************************** + +https://jira.hpdd.intel.com/browse/LU-8915 + +Don't use linux list structure as user land arguments for lnet selftest. +This code is pretty poor quality and really needs to be reworked. + +------------------------------------------------------------------------------ + +https://jira.hpdd.intel.com/browse/LU-8834 + +The lustre ioctl LL_IOC_FUTIMES_3 is very generic. Need to either work with +other file systems with similar functionality and make a common syscall +interface or rework our server code to automagically do it for us. + +------------------------------------------------------------------------------ + +https://jira.hpdd.intel.com/browse/LU-6202 + +Cleanup up ioctl handling. We have many obsolete ioctls. Also the way we do +ioctls can be changed over to netlink. This also has the benefit of working +better with HPC systems that do IO forwarding. Such systems don't like ioctls +very well. + +------------------------------------------------------------------------------ + +https://jira.hpdd.intel.com/browse/LU-9667 + +More cleanups by making our utilities use sysfs instead of ioctls for LNet. +Also it has been requested to move the remaining ioctls to the netlink API. + +****************************************************************************** +* Misc +****************************************************************************** + +------------------------------------------------------------------------------ +https://jira.hpdd.intel.com/browse/LU-9855 + +Clean up obdclass preprocessor code. One of the major eye sores is the various +pointer redirections and macros used by the obdclass. This makes the code very +difficult to understand. It was requested by the Al Viro to clean this up before +we leave staging. + +------------------------------------------------------------------------------ + +https://jira.hpdd.intel.com/browse/LU-9633 + +Migrate to sphinx kernel-doc style comments. Add documents in Documentation. + +------------------------------------------------------------------------------ + +https://jira.hpdd.intel.com/browse/LU-6142 + +Possible remaining coding style fix. Remove deadcode. Enforce kernel code +style. Other minor misc cleanups... + +------------------------------------------------------------------------------ + +https://jira.hpdd.intel.com/browse/LU-8837 + +Separate client/server functionality. Functions only used by server can be +removed from client. Most of this has been done but we need a inspect of the +code to make sure. + +------------------------------------------------------------------------------ + +https://jira.hpdd.intel.com/browse/LU-8964 + +Lustre client readahead/writeback control needs to better suit kernel providings. +Currently its being explored. We could end up replacing the CLIO read ahead +abstract with the kernel proper version. + +------------------------------------------------------------------------------ + +https://jira.hpdd.intel.com/browse/LU-9862 + +Patch that landed for LU-7890 leads to static checker errors +------------------------------------------------------------------------------ + +https://jira.hpdd.intel.com/browse/LU-9868 + +dcache/namei fixes for lustre +------------------------------------------------------------------------------ + +https://jira.hpdd.intel.com/browse/LU-10467 + +use standard linux wait_events macros work by Neil Brown + +------------------------------------------------------------------------------ Please send any patches to Greg Kroah-Hartman <greg@kroah.com>, Andreas Dilger -<andreas.dilger@intel.com>, and Oleg Drokin <oleg.drokin@intel.com>. +<andreas.dilger@intel.com>, James Simmons <jsimmons@infradead.org> and +Oleg Drokin <oleg.drokin@intel.com>. diff --git a/drivers/staging/lustre/include/linux/libcfs/curproc.h b/drivers/staging/lustre/include/linux/libcfs/curproc.h index 3cb3f086148e..4702956805a6 100644 --- a/drivers/staging/lustre/include/linux/libcfs/curproc.h +++ b/drivers/staging/lustre/include/linux/libcfs/curproc.h @@ -56,30 +56,21 @@ typedef u32 cfs_cap_t; -#define CFS_CAP_CHOWN 0 -#define CFS_CAP_DAC_OVERRIDE 1 -#define CFS_CAP_DAC_READ_SEARCH 2 -#define CFS_CAP_FOWNER 3 -#define CFS_CAP_FSETID 4 -#define CFS_CAP_LINUX_IMMUTABLE 9 -#define CFS_CAP_SYS_ADMIN 21 -#define CFS_CAP_SYS_BOOT 23 -#define CFS_CAP_SYS_RESOURCE 24 +#define CFS_CAP_FS_MASK (BIT(CAP_CHOWN) | \ + BIT(CAP_DAC_OVERRIDE) | \ + BIT(CAP_DAC_READ_SEARCH) | \ + BIT(CAP_FOWNER) | \ + BIT(CAP_FSETID) | \ + BIT(CAP_LINUX_IMMUTABLE) | \ + BIT(CAP_SYS_ADMIN) | \ + BIT(CAP_SYS_BOOT) | \ + BIT(CAP_SYS_RESOURCE)) -#define CFS_CAP_FS_MASK (BIT(CFS_CAP_CHOWN) | \ - BIT(CFS_CAP_DAC_OVERRIDE) | \ - BIT(CFS_CAP_DAC_READ_SEARCH) | \ - BIT(CFS_CAP_FOWNER) | \ - BIT(CFS_CAP_FSETID) | \ - BIT(CFS_CAP_LINUX_IMMUTABLE) | \ - BIT(CFS_CAP_SYS_ADMIN) | \ - BIT(CFS_CAP_SYS_BOOT) | \ - BIT(CFS_CAP_SYS_RESOURCE)) - -void cfs_cap_raise(cfs_cap_t cap); -void cfs_cap_lower(cfs_cap_t cap); -int cfs_cap_raised(cfs_cap_t cap); -cfs_cap_t cfs_curproc_cap_pack(void); +static inline cfs_cap_t cfs_curproc_cap_pack(void) +{ + /* cfs_cap_t is only the first word of kernel_cap_t */ + return (cfs_cap_t)(current_cap().cap[0]); +} /* __LIBCFS_CURPROC_H__ */ #endif diff --git a/drivers/staging/lustre/include/linux/libcfs/libcfs.h b/drivers/staging/lustre/include/linux/libcfs/libcfs.h index ca3472cc952f..392793582956 100644 --- a/drivers/staging/lustre/include/linux/libcfs/libcfs.h +++ b/drivers/staging/lustre/include/linux/libcfs/libcfs.h @@ -64,14 +64,21 @@ #define LNET_ACCEPTOR_MIN_RESERVED_PORT 512 #define LNET_ACCEPTOR_MAX_RESERVED_PORT 1023 -/* - * Defined by platform - */ -sigset_t cfs_block_allsigs(void); -sigset_t cfs_block_sigs(unsigned long sigs); -sigset_t cfs_block_sigsinv(unsigned long sigs); -void cfs_restore_sigs(sigset_t sigset); -void cfs_clear_sigpending(void); +/* Block all signals except for the @sigs */ +static inline void cfs_block_sigsinv(unsigned long sigs, sigset_t *old) +{ + sigset_t new; + + siginitsetinv(&new, sigs); + sigorsets(&new, ¤t->blocked, &new); + sigprocmask(SIG_BLOCK, &new, old); +} + +static inline void +cfs_restore_sigs(sigset_t *old) +{ + sigprocmask(SIG_SETMASK, old, NULL); +} struct libcfs_ioctl_handler { struct list_head item; @@ -105,10 +112,6 @@ static inline void *__container_of(void *ptr, unsigned long shift) #define _LIBCFS_H -void *libcfs_kvzalloc(size_t size, gfp_t flags); -void *libcfs_kvzalloc_cpt(struct cfs_cpt_table *cptab, int cpt, size_t size, - gfp_t flags); - extern struct miscdevice libcfs_dev; /** * The path of debug log dump upcall script. diff --git a/drivers/staging/lustre/include/linux/libcfs/libcfs_crypto.h b/drivers/staging/lustre/include/linux/libcfs/libcfs_crypto.h index e5c156e9d907..3a72117140ed 100644 --- a/drivers/staging/lustre/include/linux/libcfs/libcfs_crypto.h +++ b/drivers/staging/lustre/include/linux/libcfs/libcfs_crypto.h @@ -189,18 +189,15 @@ int cfs_crypto_hash_digest(enum cfs_crypto_hash_alg hash_alg, unsigned char *key, unsigned int key_len, unsigned char *hash, unsigned int *hash_len); -/* cfs crypto hash descriptor */ -struct cfs_crypto_hash_desc; - -struct cfs_crypto_hash_desc * +struct ahash_request * cfs_crypto_hash_init(enum cfs_crypto_hash_alg hash_alg, unsigned char *key, unsigned int key_len); -int cfs_crypto_hash_update_page(struct cfs_crypto_hash_desc *desc, +int cfs_crypto_hash_update_page(struct ahash_request *desc, struct page *page, unsigned int offset, unsigned int len); -int cfs_crypto_hash_update(struct cfs_crypto_hash_desc *desc, const void *buf, +int cfs_crypto_hash_update(struct ahash_request *desc, const void *buf, unsigned int buf_len); -int cfs_crypto_hash_final(struct cfs_crypto_hash_desc *desc, +int cfs_crypto_hash_final(struct ahash_request *desc, unsigned char *hash, unsigned int *hash_len); int cfs_crypto_register(void); void cfs_crypto_unregister(void); diff --git a/drivers/staging/lustre/include/linux/libcfs/libcfs_debug.h b/drivers/staging/lustre/include/linux/libcfs/libcfs_debug.h index 1b98f0953afb..9290a19429e7 100644 --- a/drivers/staging/lustre/include/linux/libcfs/libcfs_debug.h +++ b/drivers/staging/lustre/include/linux/libcfs/libcfs_debug.h @@ -66,8 +66,8 @@ extern unsigned int libcfs_panic_on_lbug; # define DEBUG_SUBSYSTEM S_UNDEFINED #endif -#define CDEBUG_DEFAULT_MAX_DELAY (cfs_time_seconds(600)) /* jiffies */ -#define CDEBUG_DEFAULT_MIN_DELAY ((cfs_time_seconds(1) + 1) / 2) /* jiffies */ +#define CDEBUG_DEFAULT_MAX_DELAY (600 * HZ) /* jiffies */ +#define CDEBUG_DEFAULT_MIN_DELAY ((HZ + 1) / 2) /* jiffies */ #define CDEBUG_DEFAULT_BACKOFF 2 struct cfs_debug_limit_state { unsigned long cdls_next; diff --git a/drivers/staging/lustre/include/linux/libcfs/libcfs_time.h b/drivers/staging/lustre/include/linux/libcfs/libcfs_time.h index 9699646decb9..c4f25be78268 100644 --- a/drivers/staging/lustre/include/linux/libcfs/libcfs_time.h +++ b/drivers/staging/lustre/include/linux/libcfs/libcfs_time.h @@ -62,7 +62,7 @@ static inline int cfs_time_aftereq(unsigned long t1, unsigned long t2) static inline unsigned long cfs_time_shift(int seconds) { - return cfs_time_add(cfs_time_current(), cfs_time_seconds(seconds)); + return cfs_time_add(cfs_time_current(), seconds * HZ); } /* diff --git a/drivers/staging/lustre/include/linux/libcfs/linux/linux-time.h b/drivers/staging/lustre/include/linux/libcfs/linux/linux-time.h index aece13698eb4..805cb326af86 100644 --- a/drivers/staging/lustre/include/linux/libcfs/linux/linux-time.h +++ b/drivers/staging/lustre/include/linux/libcfs/linux/linux-time.h @@ -65,11 +65,6 @@ static inline unsigned long cfs_time_current(void) return jiffies; } -static inline long cfs_time_seconds(int seconds) -{ - return ((long)seconds) * msecs_to_jiffies(MSEC_PER_SEC); -} - static inline long cfs_duration_sec(long d) { return d / msecs_to_jiffies(MSEC_PER_SEC); @@ -85,7 +80,7 @@ static inline u64 cfs_time_add_64(u64 t, u64 d) static inline u64 cfs_time_shift_64(int seconds) { return cfs_time_add_64(cfs_time_current_64(), - cfs_time_seconds(seconds)); + seconds * HZ); } static inline int cfs_time_before_64(u64 t1, u64 t2) diff --git a/drivers/staging/lustre/include/linux/lnet/api.h b/drivers/staging/lustre/include/linux/lnet/api.h index 31fcd33171b4..dae2e4f0056c 100644 --- a/drivers/staging/lustre/include/linux/lnet/api.h +++ b/drivers/staging/lustre/include/linux/lnet/api.h @@ -169,6 +169,7 @@ int LNetEQFree(struct lnet_handle_eq eventq_in); int LNetEQPoll(struct lnet_handle_eq *eventqs_in, int neq_in, int timeout_ms, + int interruptible, struct lnet_event *event_out, int *which_eq_out); /** @} lnet_eq */ diff --git a/drivers/staging/lustre/lnet/Kconfig b/drivers/staging/lustre/lnet/Kconfig index 6bcb53d0c6f4..ad049e6f24e4 100644 --- a/drivers/staging/lustre/lnet/Kconfig +++ b/drivers/staging/lustre/lnet/Kconfig @@ -1,6 +1,6 @@ config LNET tristate "Lustre networking subsystem (LNet)" - depends on INET && m + depends on INET help The Lustre network layer, also known as LNet, is a networking abstaction level API that was initially created to allow Lustre Filesystem to utilize diff --git a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.c b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.c index ec84edfda271..7ae2955c4db6 100644 --- a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.c +++ b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.c @@ -1211,7 +1211,7 @@ static struct kib_hca_dev *kiblnd_current_hdev(struct kib_dev *dev) CDEBUG(D_NET, "%s: Wait for failover\n", dev->ibd_ifname); set_current_state(TASK_INTERRUPTIBLE); - schedule_timeout(cfs_time_seconds(1) / 100); + schedule_timeout(HZ / 100); read_lock_irqsave(&kiblnd_data.kib_global_lock, flags); } @@ -1921,7 +1921,7 @@ struct list_head *kiblnd_pool_alloc_node(struct kib_poolset *ps) set_current_state(TASK_INTERRUPTIBLE); schedule_timeout(interval); - if (interval < cfs_time_seconds(1)) + if (interval < HZ) interval *= 2; goto again; @@ -2541,7 +2541,7 @@ static void kiblnd_base_shutdown(void) "Waiting for %d threads to terminate\n", atomic_read(&kiblnd_data.kib_nthreads)); set_current_state(TASK_UNINTERRUPTIBLE); - schedule_timeout(cfs_time_seconds(1)); + schedule_timeout(HZ); } /* fall through */ @@ -2592,7 +2592,7 @@ static void kiblnd_shutdown(struct lnet_ni *ni) libcfs_nid2str(ni->ni_nid), atomic_read(&net->ibn_npeers)); set_current_state(TASK_UNINTERRUPTIBLE); - schedule_timeout(cfs_time_seconds(1)); + schedule_timeout(HZ); } kiblnd_net_fini_pools(net); diff --git a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c index b3e7f28eb978..6690a6cd4e34 100644 --- a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c +++ b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c @@ -3288,8 +3288,6 @@ kiblnd_connd(void *arg) int peer_index = 0; unsigned long deadline = jiffies; - cfs_block_allsigs(); - init_waitqueue_entry(&wait, current); kiblnd_data.kib_connd = current; @@ -3542,8 +3540,6 @@ kiblnd_scheduler(void *arg) int busy_loops = 0; int rc; - cfs_block_allsigs(); - init_waitqueue_entry(&wait, current); sched = kiblnd_data.kib_scheds[KIB_THREAD_CPT(id)]; @@ -3676,8 +3672,6 @@ kiblnd_failover_thread(void *arg) LASSERT(*kiblnd_tunables.kib_dev_failover); - cfs_block_allsigs(); - init_waitqueue_entry(&wait, current); write_lock_irqsave(glock, flags); @@ -3728,8 +3722,8 @@ kiblnd_failover_thread(void *arg) add_wait_queue(&kiblnd_data.kib_failover_waitq, &wait); write_unlock_irqrestore(glock, flags); - rc = schedule_timeout(long_sleep ? cfs_time_seconds(10) : - cfs_time_seconds(1)); + rc = schedule_timeout(long_sleep ? 10 * HZ : + HZ); remove_wait_queue(&kiblnd_data.kib_failover_waitq, &wait); write_lock_irqsave(glock, flags); diff --git a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd.c b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd.c index ff292216290d..7086678e1c3e 100644 --- a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd.c +++ b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd.c @@ -1677,7 +1677,7 @@ ksocknal_destroy_conn(struct ksock_conn *conn) switch (conn->ksnc_rx_state) { case SOCKNAL_RX_LNET_PAYLOAD: last_rcv = conn->ksnc_rx_deadline - - cfs_time_seconds(*ksocknal_tunables.ksnd_timeout); + *ksocknal_tunables.ksnd_timeout * HZ; CERROR("Completing partial receive from %s[%d], ip %pI4h:%d, with error, wanted: %zd, left: %d, last alive is %ld secs ago\n", libcfs_id2str(conn->ksnc_peer->ksnp_id), conn->ksnc_type, &conn->ksnc_ipaddr, conn->ksnc_port, @@ -2356,7 +2356,7 @@ ksocknal_base_shutdown(void) ksocknal_data.ksnd_nthreads); read_unlock(&ksocknal_data.ksnd_global_lock); set_current_state(TASK_UNINTERRUPTIBLE); - schedule_timeout(cfs_time_seconds(1)); + schedule_timeout(HZ); read_lock(&ksocknal_data.ksnd_global_lock); } read_unlock(&ksocknal_data.ksnd_global_lock); @@ -2599,7 +2599,7 @@ ksocknal_shutdown(struct lnet_ni *ni) "waiting for %d peers to disconnect\n", net->ksnn_npeers); set_current_state(TASK_UNINTERRUPTIBLE); - schedule_timeout(cfs_time_seconds(1)); + schedule_timeout(HZ); ksocknal_debug_peerhash(ni); diff --git a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd.h b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd.h index d50ebdf863fa..570f54ed57b1 100644 --- a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd.h +++ b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd.h @@ -304,15 +304,6 @@ struct ksock_tx { /* transmit packet */ /* network zero copy callback descriptor embedded in struct ksock_tx */ -/* - * space for the rx frag descriptors; we either read a single contiguous - * header, or up to LNET_MAX_IOV frags of payload of either type. - */ -union ksock_rxiovspace { - struct kvec iov[LNET_MAX_IOV]; - struct bio_vec kiov[LNET_MAX_IOV]; -}; - #define SOCKNAL_RX_KSM_HEADER 1 /* reading ksock message header */ #define SOCKNAL_RX_LNET_HEADER 2 /* reading lnet message header */ #define SOCKNAL_RX_PARSE 3 /* Calling lnet_parse() */ @@ -359,7 +350,7 @@ struct ksock_conn { __u8 ksnc_rx_state; /* what is being read */ int ksnc_rx_nob_left; /* # bytes to next hdr/body */ struct iov_iter ksnc_rx_to; /* copy destination */ - union ksock_rxiovspace ksnc_rx_iov_space; /* space for frag descriptors */ + struct kvec ksnc_rx_iov_space[LNET_MAX_IOV]; /* space for frag descriptors */ __u32 ksnc_rx_csum; /* partial checksum for incoming * data */ diff --git a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_cb.c b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_cb.c index 11fd3a36424f..036fecbcede8 100644 --- a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_cb.c +++ b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_cb.c @@ -189,7 +189,7 @@ ksocknal_transmit(struct ksock_conn *conn, struct ksock_tx *tx) if (ksocknal_data.ksnd_stall_tx) { set_current_state(TASK_UNINTERRUPTIBLE); - schedule_timeout(cfs_time_seconds(ksocknal_data.ksnd_stall_tx)); + schedule_timeout(ksocknal_data.ksnd_stall_tx * HZ); } LASSERT(tx->tx_resid); @@ -294,7 +294,7 @@ ksocknal_receive(struct ksock_conn *conn) if (ksocknal_data.ksnd_stall_rx) { set_current_state(TASK_UNINTERRUPTIBLE); - schedule_timeout(cfs_time_seconds(ksocknal_data.ksnd_stall_rx)); + schedule_timeout(ksocknal_data.ksnd_stall_rx * HZ); } rc = ksocknal_connsock_addref(conn); @@ -986,7 +986,7 @@ int ksocknal_new_packet(struct ksock_conn *conn, int nob_to_skip) { static char ksocknal_slop_buffer[4096]; - struct kvec *kvec = (struct kvec *)&conn->ksnc_rx_iov_space; + struct kvec *kvec = conn->ksnc_rx_iov_space; int nob; unsigned int niov; @@ -1059,7 +1059,7 @@ ksocknal_new_packet(struct ksock_conn *conn, int nob_to_skip) static int ksocknal_process_receive(struct ksock_conn *conn) { - struct kvec *kvec = (struct kvec *)&conn->ksnc_rx_iov_space; + struct kvec *kvec = conn->ksnc_rx_iov_space; struct lnet_hdr *lhdr; struct lnet_process_id *id; int rc; @@ -1324,8 +1324,6 @@ int ksocknal_scheduler(void *arg) info = ksocknal_data.ksnd_sched_info[KSOCK_THREAD_CPT(id)]; sched = &info->ksi_scheds[KSOCK_THREAD_SID(id)]; - cfs_block_allsigs(); - rc = cfs_cpt_bind(lnet_cpt_table(), info->ksi_cpt); if (rc) { CWARN("Can't set CPU partition affinity to %d: %d\n", @@ -1780,7 +1778,7 @@ ksocknal_connect(struct ksock_route *route) int rc = 0; deadline = cfs_time_add(cfs_time_current(), - cfs_time_seconds(*ksocknal_tunables.ksnd_timeout)); + *ksocknal_tunables.ksnd_timeout * HZ); write_lock_bh(&ksocknal_data.ksnd_global_lock); @@ -1878,7 +1876,7 @@ ksocknal_connect(struct ksock_route *route) * so min_reconnectms should be good heuristic */ route->ksnr_retry_interval = - cfs_time_seconds(*ksocknal_tunables.ksnd_min_reconnectms) / 1000; + *ksocknal_tunables.ksnd_min_reconnectms * HZ / 1000; route->ksnr_timeout = cfs_time_add(cfs_time_current(), route->ksnr_retry_interval); } @@ -1899,10 +1897,10 @@ ksocknal_connect(struct ksock_route *route) route->ksnr_retry_interval *= 2; route->ksnr_retry_interval = max(route->ksnr_retry_interval, - cfs_time_seconds(*ksocknal_tunables.ksnd_min_reconnectms) / 1000); + (long)*ksocknal_tunables.ksnd_min_reconnectms * HZ / 1000); route->ksnr_retry_interval = min(route->ksnr_retry_interval, - cfs_time_seconds(*ksocknal_tunables.ksnd_max_reconnectms) / 1000); + (long)*ksocknal_tunables.ksnd_max_reconnectms * HZ / 1000); LASSERT(route->ksnr_retry_interval); route->ksnr_timeout = cfs_time_add(cfs_time_current(), @@ -1972,7 +1970,7 @@ ksocknal_connd_check_start(time64_t sec, long *timeout) if (sec - ksocknal_data.ksnd_connd_failed_stamp <= 1) { /* may run out of resource, retry later */ - *timeout = cfs_time_seconds(1); + *timeout = HZ; return 0; } @@ -2031,8 +2029,8 @@ ksocknal_connd_check_stop(time64_t sec, long *timeout) val = (int)(ksocknal_data.ksnd_connd_starting_stamp + SOCKNAL_CONND_TIMEOUT - sec); - *timeout = (val > 0) ? cfs_time_seconds(val) : - cfs_time_seconds(SOCKNAL_CONND_TIMEOUT); + *timeout = (val > 0) ? val * HZ : + SOCKNAL_CONND_TIMEOUT * HZ; if (val > 0) return 0; @@ -2078,8 +2076,6 @@ ksocknal_connd(void *arg) int nloops = 0; int cons_retry = 0; - cfs_block_allsigs(); - init_waitqueue_entry(&wait, current); spin_lock_bh(connd_lock); @@ -2307,7 +2303,7 @@ ksocknal_send_keepalive_locked(struct ksock_peer *peer) if (*ksocknal_tunables.ksnd_keepalive <= 0 || time_before(cfs_time_current(), cfs_time_add(peer->ksnp_last_alive, - cfs_time_seconds(*ksocknal_tunables.ksnd_keepalive)))) + *ksocknal_tunables.ksnd_keepalive * HZ))) return 0; if (time_before(cfs_time_current(), peer->ksnp_send_keepalive)) @@ -2472,8 +2468,6 @@ ksocknal_reaper(void *arg) int peer_index = 0; unsigned long deadline = cfs_time_current(); - cfs_block_allsigs(); - INIT_LIST_HEAD(&enomem_conns); init_waitqueue_entry(&wait, current); @@ -2563,7 +2557,7 @@ ksocknal_reaper(void *arg) ksocknal_data.ksnd_peer_hash_size; } - deadline = cfs_time_add(deadline, cfs_time_seconds(p)); + deadline = cfs_time_add(deadline, p * HZ); } if (nenomem_conns) { diff --git a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_lib.c b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_lib.c index cb28dd2baf2f..7941cfa526bc 100644 --- a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_lib.c +++ b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_lib.c @@ -189,7 +189,7 @@ ksocknal_lib_recv(struct ksock_conn *conn) if (!(conn->ksnc_rx_to.type & ITER_BVEC) && conn->ksnc_proto != &ksocknal_protocol_v2x) return rc; - + /* accumulate checksum */ conn->ksnc_msg.ksm_csum = 0; iov_iter_for_each_range(&conn->ksnc_rx_to, rc, lustre_csum, conn); diff --git a/drivers/staging/lustre/lnet/libcfs/Makefile b/drivers/staging/lustre/lnet/libcfs/Makefile index 730f2c675047..b7dc7ac11cc5 100644 --- a/drivers/staging/lustre/lnet/libcfs/Makefile +++ b/drivers/staging/lustre/lnet/libcfs/Makefile @@ -5,12 +5,10 @@ subdir-ccflags-y += -I$(srctree)/drivers/staging/lustre/lustre/include obj-$(CONFIG_LNET) += libcfs.o libcfs-linux-objs := linux-tracefile.o linux-debug.o -libcfs-linux-objs += linux-prim.o linux-cpu.o -libcfs-linux-objs += linux-curproc.o +libcfs-linux-objs += linux-cpu.o libcfs-linux-objs += linux-module.o libcfs-linux-objs += linux-crypto.o libcfs-linux-objs += linux-crypto-adler.o -libcfs-linux-objs += linux-mem.o libcfs-linux-objs := $(addprefix linux/,$(libcfs-linux-objs)) diff --git a/drivers/staging/lustre/lnet/libcfs/debug.c b/drivers/staging/lustre/lnet/libcfs/debug.c index 551c45bf4108..1371224a8cb9 100644 --- a/drivers/staging/lustre/lnet/libcfs/debug.c +++ b/drivers/staging/lustre/lnet/libcfs/debug.c @@ -113,7 +113,7 @@ static int param_set_delay_minmax(const char *val, if (rc) return -EINVAL; - d = cfs_time_seconds(sec) / 100; + d = sec * HZ / 100; if (d < min || d > max) return -EINVAL; @@ -440,7 +440,7 @@ int libcfs_debug_clear_buffer(void) return 0; } -/* Debug markers, although printed by S_LNET should not be be marked as such. */ +/* Debug markers, although printed by S_LNET should not be marked as such. */ #undef DEBUG_SUBSYSTEM #define DEBUG_SUBSYSTEM S_UNDEFINED int libcfs_debug_mark_buffer(const char *text) diff --git a/drivers/staging/lustre/lnet/libcfs/fail.c b/drivers/staging/lustre/lnet/libcfs/fail.c index 39439b303d65..d3f1e866c6a7 100644 --- a/drivers/staging/lustre/lnet/libcfs/fail.c +++ b/drivers/staging/lustre/lnet/libcfs/fail.c @@ -134,7 +134,7 @@ int __cfs_fail_timeout_set(u32 id, u32 value, int ms, int set) CERROR("cfs_fail_timeout id %x sleeping for %dms\n", id, ms); set_current_state(TASK_UNINTERRUPTIBLE); - schedule_timeout(cfs_time_seconds(ms) / 1000); + schedule_timeout(ms * HZ / 1000); CERROR("cfs_fail_timeout id %x awake\n", id); } return ret; diff --git a/drivers/staging/lustre/lnet/libcfs/linux/linux-cpu.c b/drivers/staging/lustre/lnet/libcfs/linux/linux-cpu.c index c07165e0ad95..388521e4e354 100644 --- a/drivers/staging/lustre/lnet/libcfs/linux/linux-cpu.c +++ b/drivers/staging/lustre/lnet/libcfs/linux/linux-cpu.c @@ -743,7 +743,7 @@ cfs_cpt_table_create(int ncpt) goto failed; } - if (!zalloc_cpumask_var(&mask, GFP_NOFS)){ + if (!zalloc_cpumask_var(&mask, GFP_NOFS)) { CERROR("Failed to allocate scratch cpumask\n"); goto failed; } diff --git a/drivers/staging/lustre/lnet/libcfs/linux/linux-crypto.c b/drivers/staging/lustre/lnet/libcfs/linux/linux-crypto.c index 80072b2a443c..b55006264155 100644 --- a/drivers/staging/lustre/lnet/libcfs/linux/linux-crypto.c +++ b/drivers/staging/lustre/lnet/libcfs/linux/linux-crypto.c @@ -42,7 +42,7 @@ static int cfs_crypto_hash_speeds[CFS_HASH_ALG_MAX]; /** * Initialize the state descriptor for the specified hash algorithm. * - * An internal routine to allocate the hash-specific state in \a hdesc for + * An internal routine to allocate the hash-specific state in \a req for * use with cfs_crypto_hash_digest() to compute the hash of a single message, * though possibly in multiple chunks. The descriptor internal state should * be freed with cfs_crypto_hash_final(). @@ -50,7 +50,7 @@ static int cfs_crypto_hash_speeds[CFS_HASH_ALG_MAX]; * \param[in] hash_alg hash algorithm id (CFS_HASH_ALG_*) * \param[out] type pointer to the hash description in hash_types[] * array - * \param[in,out] hdesc hash state descriptor to be initialized + * \param[in,out] req hash state descriptor to be initialized * \param[in] key initial hash value/state, NULL to use default * value * \param[in] key_len length of \a key @@ -194,7 +194,7 @@ EXPORT_SYMBOL(cfs_crypto_hash_digest); * \retval pointer to descriptor of hash instance * \retval ERR_PTR(errno) in case of error */ -struct cfs_crypto_hash_desc * +struct ahash_request * cfs_crypto_hash_init(enum cfs_crypto_hash_alg hash_alg, unsigned char *key, unsigned int key_len) { @@ -206,14 +206,14 @@ cfs_crypto_hash_init(enum cfs_crypto_hash_alg hash_alg, if (err) return ERR_PTR(err); - return (struct cfs_crypto_hash_desc *)req; + return req; } EXPORT_SYMBOL(cfs_crypto_hash_init); /** * Update hash digest computed on data within the given \a page * - * \param[in] hdesc hash state descriptor + * \param[in] hreq hash state descriptor * \param[in] page data page on which to compute the hash * \param[in] offset offset within \a page at which to start hash * \param[in] len length of data on which to compute hash @@ -221,11 +221,10 @@ EXPORT_SYMBOL(cfs_crypto_hash_init); * \retval 0 for success * \retval negative errno on failure */ -int cfs_crypto_hash_update_page(struct cfs_crypto_hash_desc *hdesc, +int cfs_crypto_hash_update_page(struct ahash_request *req, struct page *page, unsigned int offset, unsigned int len) { - struct ahash_request *req = (void *)hdesc; struct scatterlist sl; sg_init_table(&sl, 1); @@ -239,17 +238,16 @@ EXPORT_SYMBOL(cfs_crypto_hash_update_page); /** * Update hash digest computed on the specified data * - * \param[in] hdesc hash state descriptor + * \param[in] req hash state descriptor * \param[in] buf data buffer on which to compute the hash * \param[in] buf_len length of \buf on which to compute hash * * \retval 0 for success * \retval negative errno on failure */ -int cfs_crypto_hash_update(struct cfs_crypto_hash_desc *hdesc, +int cfs_crypto_hash_update(struct ahash_request *req, const void *buf, unsigned int buf_len) { - struct ahash_request *req = (void *)hdesc; struct scatterlist sl; sg_init_one(&sl, buf, buf_len); @@ -262,20 +260,19 @@ EXPORT_SYMBOL(cfs_crypto_hash_update); /** * Finish hash calculation, copy hash digest to buffer, clean up hash descriptor * - * \param[in] hdesc hash descriptor + * \param[in] req hash descriptor * \param[out] hash pointer to hash buffer to store hash digest - * \param[in,out] hash_len pointer to hash buffer size, if \a hdesc = NULL - * only free \a hdesc instead of computing the hash + * \param[in,out] hash_len pointer to hash buffer size, if \a req = NULL + * only free \a req instead of computing the hash * * \retval 0 for success * \retval -EOVERFLOW if hash_len is too small for the hash digest * \retval negative errno for other errors from lower layers */ -int cfs_crypto_hash_final(struct cfs_crypto_hash_desc *hdesc, +int cfs_crypto_hash_final(struct ahash_request *req, unsigned char *hash, unsigned int *hash_len) { int err; - struct ahash_request *req = (void *)hdesc; int size = crypto_ahash_digestsize(crypto_ahash_reqtfm(req)); if (!hash || !hash_len) { @@ -331,7 +328,7 @@ static void cfs_crypto_performance_test(enum cfs_crypto_hash_alg hash_alg) for (start = jiffies, end = start + msecs_to_jiffies(MSEC_PER_SEC), bcount = 0; time_before(jiffies, end); bcount++) { - struct cfs_crypto_hash_desc *hdesc; + struct ahash_request *hdesc; int i; hdesc = cfs_crypto_hash_init(hash_alg, NULL, 0); diff --git a/drivers/staging/lustre/lnet/libcfs/linux/linux-curproc.c b/drivers/staging/lustre/lnet/libcfs/linux/linux-curproc.c deleted file mode 100644 index 1d8949f1a4fa..000000000000 --- a/drivers/staging/lustre/lnet/libcfs/linux/linux-curproc.c +++ /dev/null @@ -1,108 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2011, 2015, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * libcfs/libcfs/linux/linux-curproc.c - * - * Lustre curproc API implementation for Linux kernel - * - * Author: Nikita Danilov <nikita@clusterfs.com> - */ - -#include <linux/sched.h> -#include <linux/fs_struct.h> - -#include <linux/compat.h> -#include <linux/thread_info.h> - -#define DEBUG_SUBSYSTEM S_LNET - -#include <linux/libcfs/libcfs.h> - -/* - * Implementation of cfs_curproc API (see portals/include/libcfs/curproc.h) - * for Linux kernel. - */ - -void cfs_cap_raise(cfs_cap_t cap) -{ - struct cred *cred; - - cred = prepare_creds(); - if (cred) { - cap_raise(cred->cap_effective, cap); - commit_creds(cred); - } -} -EXPORT_SYMBOL(cfs_cap_raise); - -void cfs_cap_lower(cfs_cap_t cap) -{ - struct cred *cred; - - cred = prepare_creds(); - if (cred) { - cap_lower(cred->cap_effective, cap); - commit_creds(cred); - } -} -EXPORT_SYMBOL(cfs_cap_lower); - -int cfs_cap_raised(cfs_cap_t cap) -{ - return cap_raised(current_cap(), cap); -} -EXPORT_SYMBOL(cfs_cap_raised); - -static void cfs_kernel_cap_pack(kernel_cap_t kcap, cfs_cap_t *cap) -{ - /* XXX lost high byte */ - *cap = kcap.cap[0]; -} - -cfs_cap_t cfs_curproc_cap_pack(void) -{ - cfs_cap_t cap; - - cfs_kernel_cap_pack(current_cap(), &cap); - return cap; -} -EXPORT_SYMBOL(cfs_curproc_cap_pack); - -/* - * Local variables: - * c-indentation-style: "K&R" - * c-basic-offset: 8 - * tab-width: 8 - * fill-column: 80 - * scroll-step: 1 - * End: - */ diff --git a/drivers/staging/lustre/lnet/libcfs/linux/linux-mem.c b/drivers/staging/lustre/lnet/libcfs/linux/linux-mem.c deleted file mode 100644 index 963df0ef4afb..000000000000 --- a/drivers/staging/lustre/lnet/libcfs/linux/linux-mem.c +++ /dev/null @@ -1,51 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - */ -/* - * This file creates a memory allocation primitive for Lustre, that - * allows to fallback to vmalloc allocations should regular kernel allocations - * fail due to size or system memory fragmentation. - * - * Author: Oleg Drokin <green@linuxhacker.ru> - * - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Seagate Technology. - */ -#include <linux/slab.h> -#include <linux/vmalloc.h> - -#include <linux/libcfs/libcfs.h> - -void *libcfs_kvzalloc(size_t size, gfp_t flags) -{ - void *ret; - - ret = kzalloc(size, flags | __GFP_NOWARN); - if (!ret) - ret = __vmalloc(size, flags | __GFP_ZERO, PAGE_KERNEL); - return ret; -} -EXPORT_SYMBOL(libcfs_kvzalloc); - -void *libcfs_kvzalloc_cpt(struct cfs_cpt_table *cptab, int cpt, size_t size, - gfp_t flags) -{ - return kvzalloc_node(size, flags, cfs_cpt_spread_node(cptab, cpt)); -} -EXPORT_SYMBOL(libcfs_kvzalloc_cpt); diff --git a/drivers/staging/lustre/lnet/libcfs/linux/linux-prim.c b/drivers/staging/lustre/lnet/libcfs/linux/linux-prim.c deleted file mode 100644 index 6f92ea272186..000000000000 --- a/drivers/staging/lustre/lnet/libcfs/linux/linux-prim.c +++ /dev/null @@ -1,113 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2011, 2012, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - */ - -#define DEBUG_SUBSYSTEM S_LNET -#include <linux/module.h> -#include <linux/kernel.h> -#include <linux/fs_struct.h> -#include <linux/sched/signal.h> - -#include <linux/libcfs/libcfs.h> - -#if defined(CONFIG_KGDB) -#include <linux/kgdb.h> -#endif - -sigset_t -cfs_block_allsigs(void) -{ - unsigned long flags; - sigset_t old; - - spin_lock_irqsave(¤t->sighand->siglock, flags); - old = current->blocked; - sigfillset(¤t->blocked); - recalc_sigpending(); - spin_unlock_irqrestore(¤t->sighand->siglock, flags); - - return old; -} -EXPORT_SYMBOL(cfs_block_allsigs); - -sigset_t cfs_block_sigs(unsigned long sigs) -{ - unsigned long flags; - sigset_t old; - - spin_lock_irqsave(¤t->sighand->siglock, flags); - old = current->blocked; - sigaddsetmask(¤t->blocked, sigs); - recalc_sigpending(); - spin_unlock_irqrestore(¤t->sighand->siglock, flags); - return old; -} -EXPORT_SYMBOL(cfs_block_sigs); - -/* Block all signals except for the @sigs */ -sigset_t cfs_block_sigsinv(unsigned long sigs) -{ - unsigned long flags; - sigset_t old; - - spin_lock_irqsave(¤t->sighand->siglock, flags); - old = current->blocked; - sigaddsetmask(¤t->blocked, ~sigs); - recalc_sigpending(); - spin_unlock_irqrestore(¤t->sighand->siglock, flags); - - return old; -} -EXPORT_SYMBOL(cfs_block_sigsinv); - -void -cfs_restore_sigs(sigset_t old) -{ - unsigned long flags; - - spin_lock_irqsave(¤t->sighand->siglock, flags); - current->blocked = old; - recalc_sigpending(); - spin_unlock_irqrestore(¤t->sighand->siglock, flags); -} -EXPORT_SYMBOL(cfs_restore_sigs); - -void -cfs_clear_sigpending(void) -{ - unsigned long flags; - - spin_lock_irqsave(¤t->sighand->siglock, flags); - clear_tsk_thread_flag(current, TIF_SIGPENDING); - spin_unlock_irqrestore(¤t->sighand->siglock, flags); -} -EXPORT_SYMBOL(cfs_clear_sigpending); diff --git a/drivers/staging/lustre/lnet/libcfs/tracefile.c b/drivers/staging/lustre/lnet/libcfs/tracefile.c index 57913aae1d88..4affca750bc5 100644 --- a/drivers/staging/lustre/lnet/libcfs/tracefile.c +++ b/drivers/staging/lustre/lnet/libcfs/tracefile.c @@ -441,7 +441,7 @@ console: if (cfs_time_after(cfs_time_current(), cdls->cdls_next + libcfs_console_max_delay + - cfs_time_seconds(10))) { + 10 * HZ)) { /* last timeout was a long time ago */ cdls->cdls_delay /= libcfs_console_backoff * 4; } else { @@ -1071,7 +1071,7 @@ end_loop: init_waitqueue_entry(&__wait, current); add_wait_queue(&tctl->tctl_waitq, &__wait); set_current_state(TASK_INTERRUPTIBLE); - schedule_timeout(cfs_time_seconds(1)); + schedule_timeout(HZ); remove_wait_queue(&tctl->tctl_waitq, &__wait); } complete(&tctl->tctl_stop); diff --git a/drivers/staging/lustre/lnet/lnet/acceptor.c b/drivers/staging/lustre/lnet/lnet/acceptor.c index ee85cab6f437..5648f17eddc0 100644 --- a/drivers/staging/lustre/lnet/lnet/acceptor.c +++ b/drivers/staging/lustre/lnet/lnet/acceptor.c @@ -240,7 +240,7 @@ lnet_accept(struct socket *sock, __u32 magic) return -EPROTO; } - if (magic == le32_to_cpu(LNET_PROTO_TCP_MAGIC)) + if (lnet_accept_magic(magic, LNET_PROTO_TCP_MAGIC)) str = "'old' socknal/tcpnal"; else str = "unrecognised"; @@ -335,8 +335,6 @@ lnet_acceptor(void *arg) LASSERT(!lnet_acceptor_state.pta_sock); - cfs_block_allsigs(); - rc = lnet_sock_listen(&lnet_acceptor_state.pta_sock, 0, accept_port, accept_backlog); if (rc) { @@ -365,7 +363,7 @@ lnet_acceptor(void *arg) if (rc != -EAGAIN) { CWARN("Accept error %d: pausing...\n", rc); set_current_state(TASK_UNINTERRUPTIBLE); - schedule_timeout(cfs_time_seconds(1)); + schedule_timeout(HZ); } continue; } diff --git a/drivers/staging/lustre/lnet/lnet/api-ni.c b/drivers/staging/lustre/lnet/lnet/api-ni.c index 2c7abad57104..90266be0132d 100644 --- a/drivers/staging/lustre/lnet/lnet/api-ni.c +++ b/drivers/staging/lustre/lnet/lnet/api-ni.c @@ -961,19 +961,15 @@ static void lnet_ping_md_unlink(struct lnet_ping_info *pinfo, struct lnet_handle_md *md_handle) { - sigset_t blocked = cfs_block_allsigs(); - LNetMDUnlink(*md_handle); LNetInvalidateMDHandle(md_handle); /* NB md could be busy; this just starts the unlink */ while (pinfo->pi_features != LNET_PING_FEAT_INVAL) { CDEBUG(D_NET, "Still waiting for ping MD to unlink\n"); - set_current_state(TASK_UNINTERRUPTIBLE); - schedule_timeout(cfs_time_seconds(1)); + set_current_state(TASK_NOLOAD); + schedule_timeout(HZ); } - - cfs_restore_sigs(blocked); } static void @@ -1109,7 +1105,7 @@ lnet_clear_zombies_nis_locked(void) libcfs_nid2str(ni->ni_nid)); } set_current_state(TASK_UNINTERRUPTIBLE); - schedule_timeout(cfs_time_seconds(1)); + schedule_timeout(HZ); lnet_net_lock(LNET_LOCK_EX); continue; } @@ -1218,6 +1214,7 @@ lnet_startup_lndni(struct lnet_ni *ni, struct lnet_ioctl_config_data *conf) struct lnet_lnd *lnd; struct lnet_tx_queue *tq; int i; + u32 seed; lnd_type = LNET_NETTYP(LNET_NIDNET(ni->ni_nid)); @@ -1356,6 +1353,12 @@ lnet_startup_lndni(struct lnet_ni *ni, struct lnet_ioctl_config_data *conf) tq->tq_credits = lnet_ni_tq_credits(ni); } + /* Nodes with small feet have little entropy. The NID for this + * node gives the most entropy in the low bits. + */ + seed = LNET_NIDADDR(ni->ni_nid); + add_device_randomness(&seed, sizeof(seed)); + CDEBUG(D_LNI, "Added LNI %s [%d/%d/%d/%d]\n", libcfs_nid2str(ni->ni_nid), ni->ni_peertxcredits, lnet_ni_tq_credits(ni) * LNET_CPT_NUMBER, @@ -2141,7 +2144,6 @@ static int lnet_ping(struct lnet_process_id id, int timeout_ms, int nob; int rc; int rc2; - sigset_t blocked; infosz = offsetof(struct lnet_ping_info, pi_ni[n_ids]); @@ -2197,13 +2199,9 @@ static int lnet_ping(struct lnet_process_id id, int timeout_ms, do { /* MUST block for unlink to complete */ - if (unlinked) - blocked = cfs_block_allsigs(); - - rc2 = LNetEQPoll(&eqh, 1, timeout_ms, &event, &which); - if (unlinked) - cfs_restore_sigs(blocked); + rc2 = LNetEQPoll(&eqh, 1, timeout_ms, !unlinked, + &event, &which); CDEBUG(D_NET, "poll %d(%d %d)%s\n", rc2, (rc2 <= 0) ? -1 : event.type, diff --git a/drivers/staging/lustre/lnet/lnet/lib-eq.c b/drivers/staging/lustre/lnet/lnet/lib-eq.c index a173b69e2f92..ea53b5cb3f72 100644 --- a/drivers/staging/lustre/lnet/lnet/lib-eq.c +++ b/drivers/staging/lustre/lnet/lnet/lib-eq.c @@ -308,7 +308,7 @@ lnet_eq_dequeue_event(struct lnet_eq *eq, struct lnet_event *ev) */ static int -lnet_eq_wait_locked(int *timeout_ms) +lnet_eq_wait_locked(int *timeout_ms, long state) __must_hold(&the_lnet.ln_eq_wait_lock) { int tms = *timeout_ms; @@ -320,7 +320,7 @@ __must_hold(&the_lnet.ln_eq_wait_lock) return -ENXIO; /* don't want to wait and no new event */ init_waitqueue_entry(&wl, current); - set_current_state(TASK_INTERRUPTIBLE); + set_current_state(state); add_wait_queue(&the_lnet.ln_eq_waitq, &wl); lnet_eq_wait_unlock(); @@ -359,6 +359,7 @@ __must_hold(&the_lnet.ln_eq_wait_lock) * \param timeout_ms Time in milliseconds to wait for an event to occur on * one of the EQs. The constant LNET_TIME_FOREVER can be used to indicate an * infinite timeout. + * \param interruptible, if true, use TASK_INTERRUPTIBLE, else TASK_NOLOAD * \param event,which On successful return (1 or -EOVERFLOW), \a event will * hold the next event in the EQs, and \a which will contain the index of the * EQ from which the event was taken. @@ -372,6 +373,7 @@ __must_hold(&the_lnet.ln_eq_wait_lock) */ int LNetEQPoll(struct lnet_handle_eq *eventqs, int neq, int timeout_ms, + int interruptible, struct lnet_event *event, int *which) { int wait = 1; @@ -412,7 +414,9 @@ LNetEQPoll(struct lnet_handle_eq *eventqs, int neq, int timeout_ms, * 0 : don't want to wait anymore, but might have new event * so need to call dequeue again */ - wait = lnet_eq_wait_locked(&timeout_ms); + wait = lnet_eq_wait_locked(&timeout_ms, + interruptible ? TASK_INTERRUPTIBLE + : TASK_NOLOAD); if (wait < 0) /* no new event */ break; } diff --git a/drivers/staging/lustre/lnet/lnet/lib-move.c b/drivers/staging/lustre/lnet/lnet/lib-move.c index c673037dbce4..ed43b3f4b114 100644 --- a/drivers/staging/lustre/lnet/lnet/lib-move.c +++ b/drivers/staging/lustre/lnet/lnet/lib-move.c @@ -524,7 +524,7 @@ lnet_peer_is_alive(struct lnet_peer *lp, unsigned long now) return 0; deadline = cfs_time_add(lp->lp_last_alive, - cfs_time_seconds(lp->lp_ni->ni_peertimeout)); + lp->lp_ni->ni_peertimeout * HZ); alive = cfs_time_after(deadline, now); /* Update obsolete lp_alive except for routers assumed to be dead @@ -562,7 +562,7 @@ lnet_peer_alive_locked(struct lnet_peer *lp) unsigned long next_query = cfs_time_add(lp->lp_last_query, - cfs_time_seconds(lnet_queryinterval)); + lnet_queryinterval * HZ); if (time_before(now, next_query)) { if (lp->lp_alive) diff --git a/drivers/staging/lustre/lnet/lnet/lib-ptl.c b/drivers/staging/lustre/lnet/lnet/lib-ptl.c index 471f2f6c86f4..fc47379c5938 100644 --- a/drivers/staging/lustre/lnet/lnet/lib-ptl.c +++ b/drivers/staging/lustre/lnet/lnet/lib-ptl.c @@ -841,6 +841,7 @@ lnet_portals_destroy(void) cfs_array_free(the_lnet.ln_portals); the_lnet.ln_portals = NULL; + the_lnet.ln_nportals = 0; } int @@ -851,12 +852,12 @@ lnet_portals_create(void) size = offsetof(struct lnet_portal, ptl_mt_maps[LNET_CPT_NUMBER]); - the_lnet.ln_nportals = MAX_PORTALS; - the_lnet.ln_portals = cfs_array_alloc(the_lnet.ln_nportals, size); + the_lnet.ln_portals = cfs_array_alloc(MAX_PORTALS, size); if (!the_lnet.ln_portals) { CERROR("Failed to allocate portals table\n"); return -ENOMEM; } + the_lnet.ln_nportals = MAX_PORTALS; for (i = 0; i < the_lnet.ln_nportals; i++) { if (lnet_ptl_setup(the_lnet.ln_portals[i], i)) { diff --git a/drivers/staging/lustre/lnet/lnet/net_fault.c b/drivers/staging/lustre/lnet/lnet/net_fault.c index e3468cef273b..a63b7941d435 100644 --- a/drivers/staging/lustre/lnet/lnet/net_fault.c +++ b/drivers/staging/lustre/lnet/lnet/net_fault.c @@ -315,9 +315,8 @@ drop_rule_match(struct lnet_drop_rule *rule, lnet_nid_t src, rule->dr_time_base = now; rule->dr_drop_time = rule->dr_time_base + - cfs_time_seconds( - prandom_u32_max(attr->u.drop.da_interval)); - rule->dr_time_base += cfs_time_seconds(attr->u.drop.da_interval); + prandom_u32_max(attr->u.drop.da_interval) * HZ; + rule->dr_time_base += attr->u.drop.da_interval * HZ; CDEBUG(D_NET, "Drop Rule %s->%s: next drop : %lu\n", libcfs_nid2str(attr->fa_src), @@ -440,8 +439,7 @@ static struct delay_daemon_data delay_dd; static unsigned long round_timeout(unsigned long timeout) { - return cfs_time_seconds((unsigned int) - cfs_duration_sec(cfs_time_sub(timeout, 0)) + 1); + return (unsigned int)rounddown(timeout, HZ) + HZ; } static void @@ -483,10 +481,8 @@ delay_rule_match(struct lnet_delay_rule *rule, lnet_nid_t src, rule->dl_time_base = now; rule->dl_delay_time = rule->dl_time_base + - cfs_time_seconds( - prandom_u32_max( - attr->u.delay.la_interval)); - rule->dl_time_base += cfs_time_seconds(attr->u.delay.la_interval); + prandom_u32_max(attr->u.delay.la_interval) * HZ; + rule->dl_time_base += attr->u.delay.la_interval * HZ; CDEBUG(D_NET, "Delay Rule %s->%s: next delay : %lu\n", libcfs_nid2str(attr->fa_src), diff --git a/drivers/staging/lustre/lnet/lnet/peer.c b/drivers/staging/lustre/lnet/lnet/peer.c index 3e157c10fec4..3d4caa609c83 100644 --- a/drivers/staging/lustre/lnet/lnet/peer.c +++ b/drivers/staging/lustre/lnet/lnet/peer.c @@ -137,7 +137,7 @@ lnet_peer_table_deathrow_wait_locked(struct lnet_peer_table *ptable, ptable->pt_zombies); } set_current_state(TASK_UNINTERRUPTIBLE); - schedule_timeout(cfs_time_seconds(1) >> 1); + schedule_timeout(HZ >> 1); lnet_net_lock(cpt_locked); } } diff --git a/drivers/staging/lustre/lnet/lnet/router.c b/drivers/staging/lustre/lnet/lnet/router.c index 6504761ca598..a3c3f4959f46 100644 --- a/drivers/staging/lustre/lnet/lnet/router.c +++ b/drivers/staging/lustre/lnet/lnet/router.c @@ -808,7 +808,7 @@ lnet_wait_known_routerstate(void) return; set_current_state(TASK_UNINTERRUPTIBLE); - schedule_timeout(cfs_time_seconds(1)); + schedule_timeout(HZ); } } @@ -1011,7 +1011,7 @@ lnet_ping_router_locked(struct lnet_peer *rtr) if (secs && !rtr->lp_ping_notsent && cfs_time_after(now, cfs_time_add(rtr->lp_ping_timestamp, - cfs_time_seconds(secs)))) { + secs * HZ))) { int rc; struct lnet_process_id id; struct lnet_handle_md mdh; @@ -1185,7 +1185,7 @@ lnet_prune_rc_data(int wait_unlink) CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET, "Waiting for rc buffers to unlink\n"); set_current_state(TASK_UNINTERRUPTIBLE); - schedule_timeout(cfs_time_seconds(1) / 4); + schedule_timeout(HZ / 4); lnet_net_lock(LNET_LOCK_EX); } @@ -1226,8 +1226,6 @@ lnet_router_checker(void *arg) struct lnet_peer *rtr; struct list_head *entry; - cfs_block_allsigs(); - while (the_lnet.ln_rc_state == LNET_RC_STATE_RUNNING) { __u64 version; int cpt; @@ -1282,7 +1280,7 @@ rescan: else wait_event_interruptible_timeout(the_lnet.ln_rc_waitq, false, - cfs_time_seconds(1)); + HZ); } lnet_prune_rc_data(1); /* wait for UNLINK */ diff --git a/drivers/staging/lustre/lnet/selftest/conctl.c b/drivers/staging/lustre/lnet/selftest/conctl.c index 34ba440b3c02..a2d8092bdeb7 100644 --- a/drivers/staging/lustre/lnet/selftest/conctl.c +++ b/drivers/staging/lustre/lnet/selftest/conctl.c @@ -648,14 +648,10 @@ static int lst_test_add_ioctl(struct lstio_test_args *args) return -EINVAL; if (args->lstio_tes_param) { - param = kmalloc(args->lstio_tes_param_len, GFP_KERNEL); - if (!param) - goto out; - if (copy_from_user(param, args->lstio_tes_param, - args->lstio_tes_param_len)) { - rc = -EFAULT; - goto out; - } + param = memdup_user(args->lstio_tes_param, + args->lstio_tes_param_len); + if (IS_ERR(param)) + return PTR_ERR(param); } rc = -EFAULT; @@ -674,7 +670,7 @@ static int lst_test_add_ioctl(struct lstio_test_args *args) args->lstio_tes_param_len, &ret, args->lstio_tes_resultp); - if (ret) + if (!rc && ret) rc = (copy_to_user(args->lstio_tes_retp, &ret, sizeof(ret))) ? -EFAULT : 0; out: diff --git a/drivers/staging/lustre/lnet/selftest/conrpc.c b/drivers/staging/lustre/lnet/selftest/conrpc.c index 7aa515c34594..6dcc966b293b 100644 --- a/drivers/staging/lustre/lnet/selftest/conrpc.c +++ b/drivers/staging/lustre/lnet/selftest/conrpc.c @@ -359,7 +359,7 @@ lstcon_rpc_trans_postwait(struct lstcon_rpc_trans *trans, int timeout) rc = wait_event_interruptible_timeout(trans->tas_waitq, lstcon_rpc_trans_check(trans), - cfs_time_seconds(timeout)); + timeout * HZ); rc = (rc > 0) ? 0 : ((rc < 0) ? -EINTR : -ETIMEDOUT); mutex_lock(&console_session.ses_mutex); @@ -1350,7 +1350,7 @@ lstcon_rpc_cleanup_wait(void) CWARN("Session is shutting down, waiting for termination of transactions\n"); set_current_state(TASK_UNINTERRUPTIBLE); - schedule_timeout(cfs_time_seconds(1)); + schedule_timeout(HZ); mutex_lock(&console_session.ses_mutex); } diff --git a/drivers/staging/lustre/lnet/selftest/framework.c b/drivers/staging/lustre/lnet/selftest/framework.c index c7697f66f663..0ca1e3a780ca 100644 --- a/drivers/staging/lustre/lnet/selftest/framework.c +++ b/drivers/staging/lustre/lnet/selftest/framework.c @@ -187,7 +187,7 @@ sfw_del_session_timer(void) return 0; } - return EBUSY; /* racing with sfw_session_expired() */ + return -EBUSY; /* racing with sfw_session_expired() */ } static void diff --git a/drivers/staging/lustre/lnet/selftest/rpc.c b/drivers/staging/lustre/lnet/selftest/rpc.c index f8198ad1046e..9613b0a77007 100644 --- a/drivers/staging/lustre/lnet/selftest/rpc.c +++ b/drivers/staging/lustre/lnet/selftest/rpc.c @@ -1604,7 +1604,7 @@ srpc_startup(void) /* 1 second pause to avoid timestamp reuse */ set_current_state(TASK_UNINTERRUPTIBLE); - schedule_timeout(cfs_time_seconds(1)); + schedule_timeout(HZ); srpc_data.rpc_matchbits = ((__u64)ktime_get_real_seconds()) << 48; srpc_data.rpc_state = SRPC_STATE_NONE; diff --git a/drivers/staging/lustre/lnet/selftest/selftest.h b/drivers/staging/lustre/lnet/selftest/selftest.h index ad04534f000c..05466b85e1c0 100644 --- a/drivers/staging/lustre/lnet/selftest/selftest.h +++ b/drivers/staging/lustre/lnet/selftest/selftest.h @@ -575,7 +575,7 @@ swi_state2str(int state) #define selftest_wait_events() \ do { \ set_current_state(TASK_UNINTERRUPTIBLE); \ - schedule_timeout(cfs_time_seconds(1) / 10); \ + schedule_timeout(HZ / 10); \ } while (0) #define lst_wait_until(cond, lock, fmt, ...) \ diff --git a/drivers/staging/lustre/lnet/selftest/timer.c b/drivers/staging/lustre/lnet/selftest/timer.c index ab125a8524c5..1b2c5fc81358 100644 --- a/drivers/staging/lustre/lnet/selftest/timer.c +++ b/drivers/staging/lustre/lnet/selftest/timer.c @@ -170,14 +170,12 @@ stt_timer_main(void *arg) { int rc = 0; - cfs_block_allsigs(); - while (!stt_data.stt_shuttingdown) { stt_check_timers(&stt_data.stt_prev_slot); rc = wait_event_timeout(stt_data.stt_waitq, stt_data.stt_shuttingdown, - cfs_time_seconds(STTIMER_SLOTTIME)); + STTIMER_SLOTTIME * HZ); } spin_lock(&stt_data.stt_lock); diff --git a/drivers/staging/lustre/lustre/Kconfig b/drivers/staging/lustre/lustre/Kconfig index 90d826946c6a..ccb78a945995 100644 --- a/drivers/staging/lustre/lustre/Kconfig +++ b/drivers/staging/lustre/lustre/Kconfig @@ -1,6 +1,5 @@ config LUSTRE_FS tristate "Lustre file system client support" - depends on m && !MIPS && !XTENSA && !SUPERH depends on LNET select CRYPTO select CRYPTO_CRC32 diff --git a/drivers/staging/lustre/lustre/fid/fid_request.c b/drivers/staging/lustre/lustre/fid/fid_request.c index 009c2367f74e..030680f37c79 100644 --- a/drivers/staging/lustre/lustre/fid/fid_request.c +++ b/drivers/staging/lustre/lustre/fid/fid_request.c @@ -118,22 +118,22 @@ static int seq_client_rpc(struct lu_client_seq *seq, goto out_req; out = req_capsule_server_get(&req->rq_pill, &RMF_SEQ_RANGE); - *output = *out; - if (!lu_seq_range_is_sane(output)) { + if (!lu_seq_range_is_sane(out)) { CERROR("%s: Invalid range received from server: " - DRANGE "\n", seq->lcs_name, PRANGE(output)); + DRANGE "\n", seq->lcs_name, PRANGE(out)); rc = -EINVAL; goto out_req; } - if (lu_seq_range_is_exhausted(output)) { + if (lu_seq_range_is_exhausted(out)) { CERROR("%s: Range received from server is exhausted: " - DRANGE "]\n", seq->lcs_name, PRANGE(output)); + DRANGE "]\n", seq->lcs_name, PRANGE(out)); rc = -EINVAL; goto out_req; } + *output = *out; CDEBUG_LIMIT(debug_mask, "%s: Allocated %s-sequence " DRANGE "]\n", seq->lcs_name, opcname, PRANGE(output)); @@ -174,6 +174,7 @@ static int seq_client_alloc_seq(const struct lu_env *env, if (rc) { CERROR("%s: Can't allocate new meta-sequence, rc %d\n", seq->lcs_name, rc); + *seqnr = U64_MAX; return rc; } CDEBUG(D_INFO, "%s: New range - " DRANGE "\n", @@ -192,71 +193,49 @@ static int seq_client_alloc_seq(const struct lu_env *env, return rc; } -static int seq_fid_alloc_prep(struct lu_client_seq *seq, - wait_queue_entry_t *link) -{ - if (seq->lcs_update) { - add_wait_queue(&seq->lcs_waitq, link); - set_current_state(TASK_UNINTERRUPTIBLE); - mutex_unlock(&seq->lcs_mutex); - - schedule(); - - mutex_lock(&seq->lcs_mutex); - remove_wait_queue(&seq->lcs_waitq, link); - set_current_state(TASK_RUNNING); - return -EAGAIN; - } - ++seq->lcs_update; - mutex_unlock(&seq->lcs_mutex); - return 0; -} - -static void seq_fid_alloc_fini(struct lu_client_seq *seq) -{ - LASSERT(seq->lcs_update == 1); - mutex_lock(&seq->lcs_mutex); - --seq->lcs_update; - wake_up(&seq->lcs_waitq); -} - /* Allocate new fid on passed client @seq and save it to @fid. */ int seq_client_alloc_fid(const struct lu_env *env, struct lu_client_seq *seq, struct lu_fid *fid) { - wait_queue_entry_t link; int rc; LASSERT(seq); LASSERT(fid); - init_waitqueue_entry(&link, current); - mutex_lock(&seq->lcs_mutex); + spin_lock(&seq->lcs_lock); if (OBD_FAIL_CHECK(OBD_FAIL_SEQ_EXHAUST)) seq->lcs_fid.f_oid = seq->lcs_width; - while (1) { + wait_event_cmd(seq->lcs_waitq, + (!fid_is_zero(&seq->lcs_fid) && + fid_oid(&seq->lcs_fid) < seq->lcs_width) || + !seq->lcs_update, + spin_unlock(&seq->lcs_lock), + spin_lock(&seq->lcs_lock)); + + if (!fid_is_zero(&seq->lcs_fid) && + fid_oid(&seq->lcs_fid) < seq->lcs_width) { + /* Just bump last allocated fid and return to caller. */ + seq->lcs_fid.f_oid += 1; + rc = 0; + } else { u64 seqnr; - if (!fid_is_zero(&seq->lcs_fid) && - fid_oid(&seq->lcs_fid) < seq->lcs_width) { - /* Just bump last allocated fid and return to caller. */ - seq->lcs_fid.f_oid += 1; - rc = 0; - break; - } - - rc = seq_fid_alloc_prep(seq, &link); - if (rc) - continue; + LASSERT(seq->lcs_update == 0); + seq->lcs_update = 1; + spin_unlock(&seq->lcs_lock); rc = seq_client_alloc_seq(env, seq, &seqnr); + + spin_lock(&seq->lcs_lock); + seq->lcs_update = 0; + wake_up(&seq->lcs_waitq); + if (rc) { CERROR("%s: Can't allocate new sequence, rc %d\n", seq->lcs_name, rc); - seq_fid_alloc_fini(seq); - mutex_unlock(&seq->lcs_mutex); + spin_unlock(&seq->lcs_lock); return rc; } @@ -272,13 +251,10 @@ int seq_client_alloc_fid(const struct lu_env *env, * to setup FLD for it. */ rc = 1; - - seq_fid_alloc_fini(seq); - break; } *fid = seq->lcs_fid; - mutex_unlock(&seq->lcs_mutex); + spin_unlock(&seq->lcs_lock); CDEBUG(D_INFO, "%s: Allocated FID " DFID "\n", seq->lcs_name, PFID(fid)); @@ -292,23 +268,14 @@ EXPORT_SYMBOL(seq_client_alloc_fid); */ void seq_client_flush(struct lu_client_seq *seq) { - wait_queue_entry_t link; LASSERT(seq); - init_waitqueue_entry(&link, current); - mutex_lock(&seq->lcs_mutex); - - while (seq->lcs_update) { - add_wait_queue(&seq->lcs_waitq, &link); - set_current_state(TASK_UNINTERRUPTIBLE); - mutex_unlock(&seq->lcs_mutex); + spin_lock(&seq->lcs_lock); - schedule(); - - mutex_lock(&seq->lcs_mutex); - remove_wait_queue(&seq->lcs_waitq, &link); - set_current_state(TASK_RUNNING); - } + wait_event_cmd(seq->lcs_waitq, + !seq->lcs_update, + spin_unlock(&seq->lcs_lock), + spin_lock(&seq->lcs_lock)); fid_zero(&seq->lcs_fid); /** @@ -319,7 +286,7 @@ void seq_client_flush(struct lu_client_seq *seq) seq->lcs_space.lsr_index = -1; lu_seq_range_init(&seq->lcs_space); - mutex_unlock(&seq->lcs_mutex); + spin_unlock(&seq->lcs_lock); } EXPORT_SYMBOL(seq_client_flush); @@ -382,7 +349,7 @@ static int seq_client_init(struct lu_client_seq *seq, seq->lcs_type = type; - mutex_init(&seq->lcs_mutex); + spin_lock_init(&seq->lcs_lock); if (type == LUSTRE_SEQ_METADATA) seq->lcs_width = LUSTRE_METADATA_SEQ_MAX_WIDTH; else diff --git a/drivers/staging/lustre/lustre/fid/lproc_fid.c b/drivers/staging/lustre/lustre/fid/lproc_fid.c index 083419f77697..a1e5bf9f36ec 100644 --- a/drivers/staging/lustre/lustre/fid/lproc_fid.c +++ b/drivers/staging/lustre/lustre/fid/lproc_fid.c @@ -98,33 +98,43 @@ ldebugfs_fid_space_seq_write(struct file *file, size_t count, loff_t *off) { struct lu_client_seq *seq; + struct lu_seq_range range; int rc; seq = ((struct seq_file *)file->private_data)->private; - mutex_lock(&seq->lcs_mutex); - rc = ldebugfs_fid_write_common(buffer, count, &seq->lcs_space); + rc = ldebugfs_fid_write_common(buffer, count, &range); - if (rc == 0) { + spin_lock(&seq->lcs_lock); + if (seq->lcs_update) + /* An RPC call is active to update lcs_space */ + rc = -EBUSY; + if (rc > 0) + seq->lcs_space = range; + spin_unlock(&seq->lcs_lock); + + if (rc > 0) { CDEBUG(D_INFO, "%s: Space: " DRANGE "\n", - seq->lcs_name, PRANGE(&seq->lcs_space)); + seq->lcs_name, PRANGE(&range)); } - mutex_unlock(&seq->lcs_mutex); - - return count; + return rc; } static int ldebugfs_fid_space_seq_show(struct seq_file *m, void *unused) { struct lu_client_seq *seq = (struct lu_client_seq *)m->private; + int rc = 0; - mutex_lock(&seq->lcs_mutex); - seq_printf(m, "[%#llx - %#llx]:%x:%s\n", PRANGE(&seq->lcs_space)); - mutex_unlock(&seq->lcs_mutex); + spin_lock(&seq->lcs_lock); + if (seq->lcs_update) + rc = -EBUSY; + else + seq_printf(m, "[%#llx - %#llx]:%x:%s\n", PRANGE(&seq->lcs_space)); + spin_unlock(&seq->lcs_lock); - return 0; + return rc; } static ssize_t @@ -142,7 +152,7 @@ ldebugfs_fid_width_seq_write(struct file *file, if (rc) return rc; - mutex_lock(&seq->lcs_mutex); + spin_lock(&seq->lcs_lock); if (seq->lcs_type == LUSTRE_SEQ_DATA) max = LUSTRE_DATA_SEQ_MAX_WIDTH; else @@ -155,7 +165,7 @@ ldebugfs_fid_width_seq_write(struct file *file, seq->lcs_width); } - mutex_unlock(&seq->lcs_mutex); + spin_unlock(&seq->lcs_lock); return count; } @@ -165,9 +175,9 @@ ldebugfs_fid_width_seq_show(struct seq_file *m, void *unused) { struct lu_client_seq *seq = (struct lu_client_seq *)m->private; - mutex_lock(&seq->lcs_mutex); + spin_lock(&seq->lcs_lock); seq_printf(m, "%llu\n", seq->lcs_width); - mutex_unlock(&seq->lcs_mutex); + spin_unlock(&seq->lcs_lock); return 0; } @@ -177,9 +187,9 @@ ldebugfs_fid_fid_seq_show(struct seq_file *m, void *unused) { struct lu_client_seq *seq = (struct lu_client_seq *)m->private; - mutex_lock(&seq->lcs_mutex); + spin_lock(&seq->lcs_lock); seq_printf(m, DFID "\n", PFID(&seq->lcs_fid)); - mutex_unlock(&seq->lcs_mutex); + spin_unlock(&seq->lcs_lock); return 0; } diff --git a/drivers/staging/lustre/lustre/fld/fld_cache.c b/drivers/staging/lustre/lustre/fld/fld_cache.c index ecf8b9e1ed5c..2d61ca4e51cf 100644 --- a/drivers/staging/lustre/lustre/fld/fld_cache.c +++ b/drivers/staging/lustre/lustre/fld/fld_cache.c @@ -263,7 +263,7 @@ static void fld_cache_punch_hole(struct fld_cache *cache, fldt = kzalloc(sizeof(*fldt), GFP_ATOMIC); if (!fldt) { kfree(f_new); - /* overlap is not allowed, so dont mess up list. */ + /* overlap is not allowed, so don't mess up list. */ return; } /* break f_curr RANGE into three RANGES: diff --git a/drivers/staging/lustre/lustre/include/cl_object.h b/drivers/staging/lustre/lustre/include/cl_object.h index 90419dca2e1e..341a145c3331 100644 --- a/drivers/staging/lustre/lustre/include/cl_object.h +++ b/drivers/staging/lustre/lustre/include/cl_object.h @@ -1833,7 +1833,7 @@ struct cl_io { */ ci_verify_layout:1, /** - * file is released, restore has to to be triggered by vvp layer + * file is released, restore has to be triggered by vvp layer */ ci_restore_needed:1, /** diff --git a/drivers/staging/lustre/lustre/include/lprocfs_status.h b/drivers/staging/lustre/lustre/include/lprocfs_status.h index 835a729dd8d0..426e8f3c9809 100644 --- a/drivers/staging/lustre/lustre/include/lprocfs_status.h +++ b/drivers/staging/lustre/lustre/include/lprocfs_status.h @@ -114,7 +114,7 @@ struct rename_stats { * LPROCFS_CNTR_AVGMINMAX indicates a multi-valued counter samples, * (i.e. counter can be incremented by more than "1"). When specified, * the counter maintains min, max and sum in addition to a simple - * invocation count. This allows averages to be be computed. + * invocation count. This allows averages to be computed. * If not specified, the counter is an increment-by-1 counter. * min, max, sum, etc. are not maintained. * diff --git a/drivers/staging/lustre/lustre/include/lu_object.h b/drivers/staging/lustre/lustre/include/lu_object.h index 34e35fbff978..35c7b582f36d 100644 --- a/drivers/staging/lustre/lustre/include/lu_object.h +++ b/drivers/staging/lustre/lustre/include/lu_object.h @@ -1328,13 +1328,6 @@ struct lu_kmem_descr { int lu_kmem_init(struct lu_kmem_descr *caches); void lu_kmem_fini(struct lu_kmem_descr *caches); -void lu_buf_free(struct lu_buf *buf); -void lu_buf_alloc(struct lu_buf *buf, size_t size); -void lu_buf_realloc(struct lu_buf *buf, size_t size); - -int lu_buf_check_and_grow(struct lu_buf *buf, size_t len); -struct lu_buf *lu_buf_check_and_alloc(struct lu_buf *buf, size_t len); - extern __u32 lu_context_tags_default; extern __u32 lu_session_tags_default; diff --git a/drivers/staging/lustre/lustre/include/lustre_dlm.h b/drivers/staging/lustre/lustre/include/lustre_dlm.h index e0b17052b2ea..239aa2b1268f 100644 --- a/drivers/staging/lustre/lustre/include/lustre_dlm.h +++ b/drivers/staging/lustre/lustre/include/lustre_dlm.h @@ -60,7 +60,7 @@ struct obd_device; #define OBD_LDLM_DEVICENAME "ldlm" #define LDLM_DEFAULT_LRU_SIZE (100 * num_online_cpus()) -#define LDLM_DEFAULT_MAX_ALIVE (cfs_time_seconds(3900)) /* 65 min */ +#define LDLM_DEFAULT_MAX_ALIVE (65 * 60 * HZ) /* 65 min */ #define LDLM_DEFAULT_PARALLEL_AST_LIMIT 1024 /** diff --git a/drivers/staging/lustre/lustre/include/lustre_export.h b/drivers/staging/lustre/lustre/include/lustre_export.h index 66ac9dc7302a..40cd168ed2ea 100644 --- a/drivers/staging/lustre/lustre/include/lustre_export.h +++ b/drivers/staging/lustre/lustre/include/lustre_export.h @@ -87,6 +87,8 @@ struct obd_export { struct obd_uuid exp_client_uuid; /** To link all exports on an obd device */ struct list_head exp_obd_chain; + /** work_struct for destruction of export */ + struct work_struct exp_zombie_work; struct hlist_node exp_uuid_hash; /** uuid-export hash*/ /** Obd device of this export */ struct obd_device *exp_obd; diff --git a/drivers/staging/lustre/lustre/include/lustre_fid.h b/drivers/staging/lustre/lustre/include/lustre_fid.h index d19c7a27ee48..094ad282de2c 100644 --- a/drivers/staging/lustre/lustre/include/lustre_fid.h +++ b/drivers/staging/lustre/lustre/include/lustre_fid.h @@ -324,7 +324,7 @@ enum lu_mgr_type { struct lu_client_seq { /* Sequence-controller export. */ struct obd_export *lcs_exp; - struct mutex lcs_mutex; + spinlock_t lcs_lock; /* * Range of allowed for allocation sequences. When using lu_client_seq on diff --git a/drivers/staging/lustre/lustre/include/lustre_import.h b/drivers/staging/lustre/lustre/include/lustre_import.h index ea158e0630e2..1731048f1ff2 100644 --- a/drivers/staging/lustre/lustre/include/lustre_import.h +++ b/drivers/staging/lustre/lustre/include/lustre_import.h @@ -162,8 +162,8 @@ struct obd_import { struct ptlrpc_client *imp_client; /** List element for linking into pinger chain */ struct list_head imp_pinger_chain; - /** List element for linking into chain for destruction */ - struct list_head imp_zombie_chain; + /** work struct for destruction of import */ + struct work_struct imp_zombie_work; /** * Lists of requests that are retained for replay, waiting for a reply, diff --git a/drivers/staging/lustre/lustre/include/lustre_lib.h b/drivers/staging/lustre/lustre/include/lustre_lib.h index ca1dce15337e..0053eafc1c10 100644 --- a/drivers/staging/lustre/lustre/include/lustre_lib.h +++ b/drivers/staging/lustre/lustre/include/lustre_lib.h @@ -76,281 +76,49 @@ int do_set_info_async(struct obd_import *imp, void target_send_reply(struct ptlrpc_request *req, int rc, int fail_id); -/* - * l_wait_event is a flexible sleeping function, permitting simple caller - * configuration of interrupt and timeout sensitivity along with actions to - * be performed in the event of either exception. - * - * The first form of usage looks like this: - * - * struct l_wait_info lwi = LWI_TIMEOUT_INTR(timeout, timeout_handler, - * intr_handler, callback_data); - * rc = l_wait_event(waitq, condition, &lwi); - * - * l_wait_event() makes the current process wait on 'waitq' until 'condition' - * is TRUE or a "killable" signal (SIGTERM, SIKGILL, SIGINT) is pending. It - * returns 0 to signify 'condition' is TRUE, but if a signal wakes it before - * 'condition' becomes true, it optionally calls the specified 'intr_handler' - * if not NULL, and returns -EINTR. - * - * If a non-zero timeout is specified, signals are ignored until the timeout - * has expired. At this time, if 'timeout_handler' is not NULL it is called. - * If it returns FALSE l_wait_event() continues to wait as described above with - * signals enabled. Otherwise it returns -ETIMEDOUT. - * - * LWI_INTR(intr_handler, callback_data) is shorthand for - * LWI_TIMEOUT_INTR(0, NULL, intr_handler, callback_data) - * - * The second form of usage looks like this: - * - * struct l_wait_info lwi = LWI_TIMEOUT(timeout, timeout_handler); - * rc = l_wait_event(waitq, condition, &lwi); - * - * This form is the same as the first except that it COMPLETELY IGNORES - * SIGNALS. The caller must therefore beware that if 'timeout' is zero, or if - * 'timeout_handler' is not NULL and returns FALSE, then the ONLY thing that - * can unblock the current process is 'condition' becoming TRUE. - * - * Another form of usage is: - * struct l_wait_info lwi = LWI_TIMEOUT_INTERVAL(timeout, interval, - * timeout_handler); - * rc = l_wait_event(waitq, condition, &lwi); - * This is the same as previous case, but condition is checked once every - * 'interval' jiffies (if non-zero). - * - * Subtle synchronization point: this macro does *not* necessary takes - * wait-queue spin-lock before returning, and, hence, following idiom is safe - * ONLY when caller provides some external locking: - * - * Thread1 Thread2 - * - * l_wait_event(&obj->wq, ....); (1) - * - * wake_up(&obj->wq): (2) - * spin_lock(&q->lock); (2.1) - * __wake_up_common(q, ...); (2.2) - * spin_unlock(&q->lock, flags); (2.3) - * - * kfree(obj); (3) - * - * As l_wait_event() may "short-cut" execution and return without taking - * wait-queue spin-lock, some additional synchronization is necessary to - * guarantee that step (3) can begin only after (2.3) finishes. - * - * XXX nikita: some ptlrpc daemon threads have races of that sort. - * - */ -static inline int back_to_sleep(void *arg) -{ - return 0; -} - -#define LWI_ON_SIGNAL_NOOP ((void (*)(void *))(-1)) - -struct l_wait_info { - long lwi_timeout; - long lwi_interval; - int lwi_allow_intr; - int (*lwi_on_timeout)(void *); - void (*lwi_on_signal)(void *); - void *lwi_cb_data; -}; - -/* NB: LWI_TIMEOUT ignores signals completely */ -#define LWI_TIMEOUT(time, cb, data) \ -((struct l_wait_info) { \ - .lwi_timeout = time, \ - .lwi_on_timeout = cb, \ - .lwi_cb_data = data, \ - .lwi_interval = 0, \ - .lwi_allow_intr = 0 \ -}) - -#define LWI_TIMEOUT_INTERVAL(time, interval, cb, data) \ -((struct l_wait_info) { \ - .lwi_timeout = time, \ - .lwi_on_timeout = cb, \ - .lwi_cb_data = data, \ - .lwi_interval = interval, \ - .lwi_allow_intr = 0 \ -}) - -#define LWI_TIMEOUT_INTR(time, time_cb, sig_cb, data) \ -((struct l_wait_info) { \ - .lwi_timeout = time, \ - .lwi_on_timeout = time_cb, \ - .lwi_on_signal = sig_cb, \ - .lwi_cb_data = data, \ - .lwi_interval = 0, \ - .lwi_allow_intr = 0 \ -}) - -#define LWI_TIMEOUT_INTR_ALL(time, time_cb, sig_cb, data) \ -((struct l_wait_info) { \ - .lwi_timeout = time, \ - .lwi_on_timeout = time_cb, \ - .lwi_on_signal = sig_cb, \ - .lwi_cb_data = data, \ - .lwi_interval = 0, \ - .lwi_allow_intr = 1 \ -}) - -#define LWI_INTR(cb, data) LWI_TIMEOUT_INTR(0, NULL, cb, data) - #define LUSTRE_FATAL_SIGS (sigmask(SIGKILL) | sigmask(SIGINT) | \ sigmask(SIGTERM) | sigmask(SIGQUIT) | \ sigmask(SIGALRM)) - -/** - * wait_queue_entry_t of Linux (version < 2.6.34) is a FIFO list for exclusively - * waiting threads, which is not always desirable because all threads will - * be waken up again and again, even user only needs a few of them to be - * active most time. This is not good for performance because cache can - * be polluted by different threads. - * - * LIFO list can resolve this problem because we always wakeup the most - * recent active thread by default. - * - * NB: please don't call non-exclusive & exclusive wait on the same - * waitq if add_wait_queue_exclusive_head is used. - */ -#define add_wait_queue_exclusive_head(waitq, link) \ -{ \ - unsigned long flags; \ - \ - spin_lock_irqsave(&((waitq)->lock), flags); \ - __add_wait_queue_exclusive(waitq, link); \ - spin_unlock_irqrestore(&((waitq)->lock), flags); \ +static inline int l_fatal_signal_pending(struct task_struct *p) +{ + return signal_pending(p) && sigtestsetmask(&p->pending.signal, LUSTRE_FATAL_SIGS); } -/* - * wait for @condition to become true, but no longer than timeout, specified - * by @info. - */ -#define __l_wait_event(wq, condition, info, ret, l_add_wait) \ -do { \ - wait_queue_entry_t __wait; \ - long __timeout = info->lwi_timeout; \ - sigset_t __blocked; \ - int __allow_intr = info->lwi_allow_intr; \ - \ - ret = 0; \ - if (condition) \ - break; \ - \ - init_waitqueue_entry(&__wait, current); \ - l_add_wait(&wq, &__wait); \ - \ - /* Block all signals (just the non-fatal ones if no timeout). */ \ - if (info->lwi_on_signal && (__timeout == 0 || __allow_intr)) \ - __blocked = cfs_block_sigsinv(LUSTRE_FATAL_SIGS); \ - else \ - __blocked = cfs_block_sigsinv(0); \ - \ - for (;;) { \ - if (condition) \ - break; \ - \ - set_current_state(TASK_INTERRUPTIBLE); \ - \ - if (__timeout == 0) { \ - schedule(); \ - } else { \ - long interval = info->lwi_interval ? \ - min_t(long, \ - info->lwi_interval, __timeout) : \ - __timeout; \ - long remaining = schedule_timeout(interval);\ - __timeout = cfs_time_sub(__timeout, \ - cfs_time_sub(interval, remaining));\ - if (__timeout == 0) { \ - if (!info->lwi_on_timeout || \ - info->lwi_on_timeout(info->lwi_cb_data)) { \ - ret = -ETIMEDOUT; \ - break; \ - } \ - /* Take signals after the timeout expires. */ \ - if (info->lwi_on_signal) \ - (void)cfs_block_sigsinv(LUSTRE_FATAL_SIGS);\ - } \ - } \ - \ - set_current_state(TASK_RUNNING); \ - \ - if (condition) \ - break; \ - if (signal_pending(current)) { \ - if (info->lwi_on_signal && \ - (__timeout == 0 || __allow_intr)) { \ - if (info->lwi_on_signal != LWI_ON_SIGNAL_NOOP) \ - info->lwi_on_signal(info->lwi_cb_data);\ - ret = -EINTR; \ - break; \ - } \ - /* We have to do this here because some signals */ \ - /* are not blockable - ie from strace(1). */ \ - /* In these cases we want to schedule_timeout() */ \ - /* again, because we don't want that to return */ \ - /* -EINTR when the RPC actually succeeded. */ \ - /* the recalc_sigpending() below will deliver the */ \ - /* signal properly. */ \ - cfs_clear_sigpending(); \ - } \ - } \ - \ - cfs_restore_sigs(__blocked); \ - \ - remove_wait_queue(&wq, &__wait); \ -} while (0) - -#define l_wait_event(wq, condition, info) \ -({ \ - int __ret; \ - struct l_wait_info *__info = (info); \ - \ - __l_wait_event(wq, condition, __info, \ - __ret, add_wait_queue); \ - __ret; \ -}) +/** @} lib */ -#define l_wait_event_exclusive(wq, condition, info) \ -({ \ - int __ret; \ - struct l_wait_info *__info = (info); \ - \ - __l_wait_event(wq, condition, __info, \ - __ret, add_wait_queue_exclusive); \ - __ret; \ -}) -#define l_wait_event_exclusive_head(wq, condition, info) \ -({ \ - int __ret; \ - struct l_wait_info *__info = (info); \ - \ - __l_wait_event(wq, condition, __info, \ - __ret, add_wait_queue_exclusive_head); \ - __ret; \ -}) -#define l_wait_condition(wq, condition) \ -({ \ - struct l_wait_info lwi = { 0 }; \ - l_wait_event(wq, condition, &lwi); \ +/* l_wait_event_abortable() is a bit like wait_event_killable() + * except there is a fixed set of signals which will abort: + * LUSTRE_FATAL_SIGS + */ +#define l_wait_event_abortable(wq, condition) \ +({ \ + sigset_t __old_blocked; \ + int __ret = 0; \ + cfs_block_sigsinv(LUSTRE_FATAL_SIGS, &__old_blocked); \ + __ret = wait_event_interruptible(wq, condition); \ + cfs_restore_sigs(&__old_blocked); \ + __ret; \ }) -#define l_wait_condition_exclusive(wq, condition) \ -({ \ - struct l_wait_info lwi = { 0 }; \ - l_wait_event_exclusive(wq, condition, &lwi); \ +#define l_wait_event_abortable_timeout(wq, condition, timeout) \ +({ \ + sigset_t __old_blocked; \ + int __ret = 0; \ + cfs_block_sigsinv(LUSTRE_FATAL_SIGS, &__old_blocked); \ + __ret = wait_event_interruptible_timeout(wq, condition, timeout);\ + cfs_restore_sigs(&__old_blocked); \ + __ret; \ }) -#define l_wait_condition_exclusive_head(wq, condition) \ -({ \ - struct l_wait_info lwi = { 0 }; \ - l_wait_event_exclusive_head(wq, condition, &lwi); \ +#define l_wait_event_abortable_exclusive(wq, condition) \ +({ \ + sigset_t __old_blocked; \ + int __ret = 0; \ + cfs_block_sigsinv(LUSTRE_FATAL_SIGS, &__old_blocked); \ + __ret = wait_event_interruptible_exclusive(wq, condition); \ + cfs_restore_sigs(&__old_blocked); \ + __ret; \ }) - -/** @} lib */ - #endif /* _LUSTRE_LIB_H */ diff --git a/drivers/staging/lustre/lustre/include/lustre_lmv.h b/drivers/staging/lustre/lustre/include/lustre_lmv.h index f4298e5f7543..080ec1f8e19f 100644 --- a/drivers/staging/lustre/lustre/include/lustre_lmv.h +++ b/drivers/staging/lustre/lustre/include/lustre_lmv.h @@ -63,7 +63,7 @@ lsm_md_eq(const struct lmv_stripe_md *lsm1, const struct lmv_stripe_md *lsm2) lsm1->lsm_md_master_mdt_index != lsm2->lsm_md_master_mdt_index || lsm1->lsm_md_hash_type != lsm2->lsm_md_hash_type || lsm1->lsm_md_layout_version != lsm2->lsm_md_layout_version || - !strcmp(lsm1->lsm_md_pool_name, lsm2->lsm_md_pool_name)) + strcmp(lsm1->lsm_md_pool_name, lsm2->lsm_md_pool_name) != 0) return false; for (idx = 0; idx < lsm1->lsm_md_stripe_count; idx++) { diff --git a/drivers/staging/lustre/lustre/include/lustre_mdc.h b/drivers/staging/lustre/lustre/include/lustre_mdc.h index 007e1ec3f0f4..a9c9992a2502 100644 --- a/drivers/staging/lustre/lustre/include/lustre_mdc.h +++ b/drivers/staging/lustre/lustre/include/lustre_mdc.h @@ -124,7 +124,7 @@ static inline void mdc_get_rpc_lock(struct mdc_rpc_lock *lck, */ while (unlikely(lck->rpcl_it == MDC_FAKE_RPCL_IT)) { mutex_unlock(&lck->rpcl_mutex); - schedule_timeout(cfs_time_seconds(1) / 4); + schedule_timeout(HZ / 4); goto again; } diff --git a/drivers/staging/lustre/lustre/include/lustre_net.h b/drivers/staging/lustre/lustre/include/lustre_net.h index 3ff5de4770e8..d35ae0cda8d2 100644 --- a/drivers/staging/lustre/lustre/include/lustre_net.h +++ b/drivers/staging/lustre/lustre/include/lustre_net.h @@ -1259,8 +1259,6 @@ enum { SVC_STOPPING = 1 << 1, SVC_STARTING = 1 << 2, SVC_RUNNING = 1 << 3, - SVC_EVENT = 1 << 4, - SVC_SIGNAL = 1 << 5, }; #define PTLRPC_THR_NAME_LEN 32 @@ -1303,11 +1301,6 @@ struct ptlrpc_thread { char t_name[PTLRPC_THR_NAME_LEN]; }; -static inline int thread_is_init(struct ptlrpc_thread *thread) -{ - return thread->t_flags == 0; -} - static inline int thread_is_stopped(struct ptlrpc_thread *thread) { return !!(thread->t_flags & SVC_STOPPED); @@ -1328,16 +1321,6 @@ static inline int thread_is_running(struct ptlrpc_thread *thread) return !!(thread->t_flags & SVC_RUNNING); } -static inline int thread_is_event(struct ptlrpc_thread *thread) -{ - return !!(thread->t_flags & SVC_EVENT); -} - -static inline int thread_is_signal(struct ptlrpc_thread *thread) -{ - return !!(thread->t_flags & SVC_SIGNAL); -} - static inline void thread_clear_flags(struct ptlrpc_thread *thread, __u32 flags) { thread->t_flags &= ~flags; @@ -1821,6 +1804,9 @@ int ptlrpc_register_rqbd(struct ptlrpc_request_buffer_desc *rqbd); */ void ptlrpc_request_committed(struct ptlrpc_request *req, int force); +int ptlrpc_inc_ref(void); +void ptlrpc_dec_ref(void); + void ptlrpc_init_client(int req_portal, int rep_portal, char *name, struct ptlrpc_client *); struct ptlrpc_connection *ptlrpc_uuid_to_connection(struct obd_uuid *uuid); @@ -2268,7 +2254,7 @@ static inline int ptlrpc_send_limit_expired(struct ptlrpc_request *req) { if (req->rq_delay_limit != 0 && time_before(cfs_time_add(req->rq_queued_time, - cfs_time_seconds(req->rq_delay_limit)), + req->rq_delay_limit * HZ), cfs_time_current())) { return 1; } diff --git a/drivers/staging/lustre/lustre/include/lustre_sec.h b/drivers/staging/lustre/lustre/include/lustre_sec.h index 64b6fd4fed8f..c5cb07acd0da 100644 --- a/drivers/staging/lustre/lustre/include/lustre_sec.h +++ b/drivers/staging/lustre/lustre/include/lustre_sec.h @@ -1058,9 +1058,6 @@ int sptlrpc_current_user_desc_size(void); int sptlrpc_pack_user_desc(struct lustre_msg *msg, int offset); int sptlrpc_unpack_user_desc(struct lustre_msg *req, int offset, int swabbed); -#define CFS_CAP_CHOWN_MASK (1 << CFS_CAP_CHOWN) -#define CFS_CAP_SYS_RESOURCE_MASK (1 << CFS_CAP_SYS_RESOURCE) - enum { LUSTRE_SEC_NONE = 0, LUSTRE_SEC_REMOTE = 1, diff --git a/drivers/staging/lustre/lustre/include/obd.h b/drivers/staging/lustre/lustre/include/obd.h index 4368f4e9f208..f1233ca7d337 100644 --- a/drivers/staging/lustre/lustre/include/obd.h +++ b/drivers/staging/lustre/lustre/include/obd.h @@ -191,7 +191,7 @@ struct client_obd { struct sptlrpc_flavor cl_flvr_mgc; /* fixed flavor of mgc->mgs */ /* the grant values are protected by loi_list_lock below */ - unsigned long cl_dirty_pages; /* all _dirty_ in pahges */ + unsigned long cl_dirty_pages; /* all _dirty_ in pages */ unsigned long cl_dirty_max_pages; /* allowed w/o rpc */ unsigned long cl_dirty_transit; /* dirty synchronous */ unsigned long cl_avail_grant; /* bytes of credit for ost */ diff --git a/drivers/staging/lustre/lustre/include/obd_class.h b/drivers/staging/lustre/lustre/include/obd_class.h index 531e8ddfa9e5..f24dd74ffa09 100644 --- a/drivers/staging/lustre/lustre/include/obd_class.h +++ b/drivers/staging/lustre/lustre/include/obd_class.h @@ -294,10 +294,10 @@ struct obdo; void obdo_to_ioobj(const struct obdo *oa, struct obd_ioobj *ioobj); -#define OBT(dev) (dev)->obd_type -#define OBP(dev, op) (dev)->obd_type->typ_dt_ops->op -#define MDP(dev, op) (dev)->obd_type->typ_md_ops->op -#define CTXTP(ctxt, op) (ctxt)->loc_logops->lop_##op +#define OBT(dev) ((dev)->obd_type) +#define OBP(dev, op) ((dev)->obd_type->typ_dt_ops->op) +#define MDP(dev, op) ((dev)->obd_type->typ_md_ops->op) +#define CTXTP(ctxt, op) ((ctxt)->loc_logops->lop_##op) /* * Ensure obd_setup: used for cleanup which must be called diff --git a/drivers/staging/lustre/lustre/include/obd_support.h b/drivers/staging/lustre/lustre/include/obd_support.h index 3f4fe290f6ea..8595091b8b86 100644 --- a/drivers/staging/lustre/lustre/include/obd_support.h +++ b/drivers/staging/lustre/lustre/include/obd_support.h @@ -516,7 +516,7 @@ extern char obd_jobid_var[]; #define POISON_PTR(ptr) ((void)0) #else #define POISON(ptr, c, s) memset(ptr, c, s) -#define POISON_PTR(ptr) (ptr) = (void *)0xdeadbeef +#define POISON_PTR(ptr) ((ptr) = (void *)0xdeadbeef) #endif #ifdef POISON_BULK diff --git a/drivers/staging/lustre/lustre/ldlm/ldlm_flock.c b/drivers/staging/lustre/lustre/ldlm/ldlm_flock.c index 657ab95091a0..411b540b96d9 100644 --- a/drivers/staging/lustre/lustre/ldlm/ldlm_flock.c +++ b/drivers/staging/lustre/lustre/ldlm/ldlm_flock.c @@ -310,24 +310,6 @@ reprocess: return LDLM_ITER_CONTINUE; } -struct ldlm_flock_wait_data { - struct ldlm_lock *fwd_lock; -}; - -static void -ldlm_flock_interrupted_wait(void *data) -{ - struct ldlm_lock *lock; - - lock = ((struct ldlm_flock_wait_data *)data)->fwd_lock; - - lock_res_and_lock(lock); - - /* client side - set flag to prevent lock from being put on LRU list */ - ldlm_set_cbpending(lock); - unlock_res_and_lock(lock); -} - /** * Flock completion callback function. * @@ -342,8 +324,6 @@ int ldlm_flock_completion_ast(struct ldlm_lock *lock, __u64 flags, void *data) { struct file_lock *getlk = lock->l_ast_data; - struct ldlm_flock_wait_data fwd; - struct l_wait_info lwi; int rc = 0; OBD_FAIL_TIMEOUT(OBD_FAIL_LDLM_CP_CB_WAIT2, 4); @@ -372,13 +352,17 @@ ldlm_flock_completion_ast(struct ldlm_lock *lock, __u64 flags, void *data) LDLM_DEBUG(lock, "client-side enqueue returned a blocked lock, sleeping"); - fwd.fwd_lock = lock; - lwi = LWI_TIMEOUT_INTR(0, NULL, ldlm_flock_interrupted_wait, &fwd); /* Go to sleep until the lock is granted. */ - rc = l_wait_event(lock->l_waitq, is_granted_or_cancelled(lock), &lwi); + rc = l_wait_event_abortable(lock->l_waitq, is_granted_or_cancelled(lock)); if (rc) { + lock_res_and_lock(lock); + + /* client side - set flag to prevent lock from being put on LRU list */ + ldlm_set_cbpending(lock); + unlock_res_and_lock(lock); + LDLM_DEBUG(lock, "client-side enqueue waking up: failed (%d)", rc); return rc; diff --git a/drivers/staging/lustre/lustre/ldlm/ldlm_lock.c b/drivers/staging/lustre/lustre/ldlm/ldlm_lock.c index 7cbc6a06afec..95bea351d21d 100644 --- a/drivers/staging/lustre/lustre/ldlm/ldlm_lock.c +++ b/drivers/staging/lustre/lustre/ldlm/ldlm_lock.c @@ -1349,7 +1349,6 @@ out: if ((flags & LDLM_FL_LVB_READY) && !ldlm_is_lvb_ready(lock)) { __u64 wait_flags = LDLM_FL_LVB_READY | LDLM_FL_DESTROYED | LDLM_FL_FAIL_NOTIFIED; - struct l_wait_info lwi; if (lock->l_completion_ast) { int err = lock->l_completion_ast(lock, @@ -1366,13 +1365,10 @@ out: } } - lwi = LWI_TIMEOUT_INTR(cfs_time_seconds(obd_timeout), - NULL, LWI_ON_SIGNAL_NOOP, NULL); - /* XXX FIXME see comment on CAN_MATCH in lustre_dlm.h */ - l_wait_event(lock->l_waitq, - lock->l_flags & wait_flags, - &lwi); + wait_event_idle_timeout(lock->l_waitq, + lock->l_flags & wait_flags, + obd_timeout * HZ); if (!ldlm_is_lvb_ready(lock)) { if (flags & LDLM_FL_TEST_LOCK) LDLM_LOCK_RELEASE(lock); @@ -1913,14 +1909,12 @@ void ldlm_cancel_callback(struct ldlm_lock *lock) ldlm_set_bl_done(lock); wake_up_all(&lock->l_waitq); } else if (!ldlm_is_bl_done(lock)) { - struct l_wait_info lwi = { 0 }; - /* * The lock is guaranteed to have been canceled once * returning from this function. */ unlock_res_and_lock(lock); - l_wait_event(lock->l_waitq, is_bl_done(lock), &lwi); + wait_event_idle(lock->l_waitq, is_bl_done(lock)); lock_res_and_lock(lock); } } diff --git a/drivers/staging/lustre/lustre/ldlm/ldlm_lockd.c b/drivers/staging/lustre/lustre/ldlm/ldlm_lockd.c index 5f6e7c933b81..c772c68e5a49 100644 --- a/drivers/staging/lustre/lustre/ldlm/ldlm_lockd.c +++ b/drivers/staging/lustre/lustre/ldlm/ldlm_lockd.c @@ -163,7 +163,7 @@ static void ldlm_handle_cp_callback(struct ptlrpc_request *req, LDLM_DEBUG(lock, "client completion callback handler START"); if (OBD_FAIL_CHECK(OBD_FAIL_LDLM_CANCEL_BL_CB_RACE)) { - int to = cfs_time_seconds(1); + int to = HZ; while (to > 0) { set_current_state(TASK_INTERRUPTIBLE); @@ -327,7 +327,7 @@ static void ldlm_handle_gl_callback(struct ptlrpc_request *req, !lock->l_readers && !lock->l_writers && cfs_time_after(cfs_time_current(), cfs_time_add(lock->l_last_used, - cfs_time_seconds(10)))) { + 10 * HZ))) { unlock_res_and_lock(lock); if (ldlm_bl_to_thread_lock(ns, NULL, lock)) ldlm_handle_bl_callback(ns, NULL, lock); @@ -833,17 +833,15 @@ static int ldlm_bl_thread_main(void *arg) /* cannot use bltd after this, it is only on caller's stack */ while (1) { - struct l_wait_info lwi = { 0 }; struct ldlm_bl_work_item *blwi = NULL; struct obd_export *exp = NULL; int rc; rc = ldlm_bl_get_work(blp, &blwi, &exp); if (!rc) - l_wait_event_exclusive(blp->blp_waitq, - ldlm_bl_get_work(blp, &blwi, - &exp), - &lwi); + wait_event_idle_exclusive(blp->blp_waitq, + ldlm_bl_get_work(blp, &blwi, + &exp)); atomic_inc(&blp->blp_busy_threads); if (ldlm_bl_thread_need_create(blp, blwi)) @@ -871,6 +869,10 @@ int ldlm_get_ref(void) { int rc = 0; + rc = ptlrpc_inc_ref(); + if (rc) + return rc; + mutex_lock(&ldlm_ref_mutex); if (++ldlm_refcount == 1) { rc = ldlm_setup(); @@ -879,14 +881,18 @@ int ldlm_get_ref(void) } mutex_unlock(&ldlm_ref_mutex); + if (rc) + ptlrpc_dec_ref(); + return rc; } void ldlm_put_ref(void) { + int rc = 0; mutex_lock(&ldlm_ref_mutex); if (ldlm_refcount == 1) { - int rc = ldlm_cleanup(); + rc = ldlm_cleanup(); if (rc) CERROR("ldlm_cleanup failed: %d\n", rc); @@ -896,6 +902,8 @@ void ldlm_put_ref(void) ldlm_refcount--; } mutex_unlock(&ldlm_ref_mutex); + if (!rc) + ptlrpc_dec_ref(); } static ssize_t cancel_unused_locks_before_replay_show(struct kobject *kobj, diff --git a/drivers/staging/lustre/lustre/ldlm/ldlm_pool.c b/drivers/staging/lustre/lustre/ldlm/ldlm_pool.c index 8563bd32befa..53b8f33e54b5 100644 --- a/drivers/staging/lustre/lustre/ldlm/ldlm_pool.c +++ b/drivers/staging/lustre/lustre/ldlm/ldlm_pool.c @@ -784,9 +784,6 @@ static int ldlm_pool_granted(struct ldlm_pool *pl) return atomic_read(&pl->pl_granted); } -static struct ptlrpc_thread *ldlm_pools_thread; -static struct completion ldlm_pools_comp; - /* * count locks from all namespaces (if possible). Returns number of * cached locks. @@ -899,8 +896,12 @@ static unsigned long ldlm_pools_cli_scan(struct shrinker *s, sc->gfp_mask); } -static int ldlm_pools_recalc(enum ldlm_side client) +static void ldlm_pools_recalc(struct work_struct *ws); +static DECLARE_DELAYED_WORK(ldlm_recalc_pools, ldlm_pools_recalc); + +static void ldlm_pools_recalc(struct work_struct *ws) { + enum ldlm_side client = LDLM_NAMESPACE_CLIENT; struct ldlm_namespace *ns; struct ldlm_namespace *ns_old = NULL; /* seconds of sleep if no active namespaces */ @@ -982,97 +983,19 @@ static int ldlm_pools_recalc(enum ldlm_side client) /* Wake up the blocking threads from time to time. */ ldlm_bl_thread_wakeup(); - return time; -} - -static int ldlm_pools_thread_main(void *arg) -{ - struct ptlrpc_thread *thread = (struct ptlrpc_thread *)arg; - int c_time; - - thread_set_flags(thread, SVC_RUNNING); - wake_up(&thread->t_ctl_waitq); - - CDEBUG(D_DLMTRACE, "%s: pool thread starting, process %d\n", - "ldlm_poold", current_pid()); - - while (1) { - struct l_wait_info lwi; - - /* - * Recal all pools on this tick. - */ - c_time = ldlm_pools_recalc(LDLM_NAMESPACE_CLIENT); - - /* - * Wait until the next check time, or until we're - * stopped. - */ - lwi = LWI_TIMEOUT(cfs_time_seconds(c_time), - NULL, NULL); - l_wait_event(thread->t_ctl_waitq, - thread_is_stopping(thread) || - thread_is_event(thread), - &lwi); - - if (thread_test_and_clear_flags(thread, SVC_STOPPING)) - break; - thread_test_and_clear_flags(thread, SVC_EVENT); - } - - thread_set_flags(thread, SVC_STOPPED); - wake_up(&thread->t_ctl_waitq); - - CDEBUG(D_DLMTRACE, "%s: pool thread exiting, process %d\n", - "ldlm_poold", current_pid()); - - complete_and_exit(&ldlm_pools_comp, 0); + schedule_delayed_work(&ldlm_recalc_pools, time * HZ); } static int ldlm_pools_thread_start(void) { - struct l_wait_info lwi = { 0 }; - struct task_struct *task; - - if (ldlm_pools_thread) - return -EALREADY; - - ldlm_pools_thread = kzalloc(sizeof(*ldlm_pools_thread), GFP_NOFS); - if (!ldlm_pools_thread) - return -ENOMEM; - - init_completion(&ldlm_pools_comp); - init_waitqueue_head(&ldlm_pools_thread->t_ctl_waitq); + schedule_delayed_work(&ldlm_recalc_pools, 0); - task = kthread_run(ldlm_pools_thread_main, ldlm_pools_thread, - "ldlm_poold"); - if (IS_ERR(task)) { - CERROR("Can't start pool thread, error %ld\n", PTR_ERR(task)); - kfree(ldlm_pools_thread); - ldlm_pools_thread = NULL; - return PTR_ERR(task); - } - l_wait_event(ldlm_pools_thread->t_ctl_waitq, - thread_is_running(ldlm_pools_thread), &lwi); return 0; } static void ldlm_pools_thread_stop(void) { - if (!ldlm_pools_thread) - return; - - thread_set_flags(ldlm_pools_thread, SVC_STOPPING); - wake_up(&ldlm_pools_thread->t_ctl_waitq); - - /* - * Make sure that pools thread is finished before freeing @thread. - * This fixes possible race and oops due to accessing freed memory - * in pools thread. - */ - wait_for_completion(&ldlm_pools_comp); - kfree(ldlm_pools_thread); - ldlm_pools_thread = NULL; + cancel_delayed_work_sync(&ldlm_recalc_pools); } static struct shrinker ldlm_pools_cli_shrinker = { @@ -1086,20 +1009,15 @@ int ldlm_pools_init(void) int rc; rc = ldlm_pools_thread_start(); - if (rc) - return rc; - - rc = register_shrinker(&ldlm_pools_cli_shrinker); - if (rc) - ldlm_pools_thread_stop(); + if (!rc) + rc = register_shrinker(&ldlm_pools_cli_shrinker); return rc; } void ldlm_pools_fini(void) { - if (ldlm_pools_thread) - unregister_shrinker(&ldlm_pools_cli_shrinker); + unregister_shrinker(&ldlm_pools_cli_shrinker); ldlm_pools_thread_stop(); } diff --git a/drivers/staging/lustre/lustre/ldlm/ldlm_request.c b/drivers/staging/lustre/lustre/ldlm/ldlm_request.c index 6aa37463db46..c3c9186b74ce 100644 --- a/drivers/staging/lustre/lustre/ldlm/ldlm_request.c +++ b/drivers/staging/lustre/lustre/ldlm/ldlm_request.c @@ -72,15 +72,6 @@ MODULE_PARM_DESC(ldlm_enqueue_min, "lock enqueue timeout minimum"); /* in client side, whether the cached locks will be canceled before replay */ unsigned int ldlm_cancel_unused_locks_before_replay = 1; -static void interrupted_completion_wait(void *data) -{ -} - -struct lock_wait_data { - struct ldlm_lock *lwd_lock; - __u32 lwd_conn_cnt; -}; - struct ldlm_async_args { struct lustre_handle lock_handle; }; @@ -112,10 +103,8 @@ static int ldlm_request_bufsize(int count, int type) return sizeof(struct ldlm_request) + avail; } -static int ldlm_expired_completion_wait(void *data) +static void ldlm_expired_completion_wait(struct ldlm_lock *lock, __u32 conn_cnt) { - struct lock_wait_data *lwd = data; - struct ldlm_lock *lock = lwd->lwd_lock; struct obd_import *imp; struct obd_device *obd; @@ -135,19 +124,17 @@ static int ldlm_expired_completion_wait(void *data) if (last_dump == 0) libcfs_debug_dumplog(); } - return 0; + return; } obd = lock->l_conn_export->exp_obd; imp = obd->u.cli.cl_import; - ptlrpc_fail_import(imp, lwd->lwd_conn_cnt); + ptlrpc_fail_import(imp, conn_cnt); LDLM_ERROR(lock, "lock timed out (enqueued at %lld, %llds ago), entering recovery for %s@%s", (s64)lock->l_last_activity, (s64)(ktime_get_real_seconds() - lock->l_last_activity), obd2cli_tgt(obd), imp->imp_connection->c_remote_uuid.uuid); - - return 0; } /** @@ -251,11 +238,10 @@ EXPORT_SYMBOL(ldlm_completion_ast_async); int ldlm_completion_ast(struct ldlm_lock *lock, __u64 flags, void *data) { /* XXX ALLOCATE - 160 bytes */ - struct lock_wait_data lwd; struct obd_device *obd; struct obd_import *imp = NULL; - struct l_wait_info lwi; __u32 timeout; + __u32 conn_cnt = 0; int rc = 0; if (flags == LDLM_FL_WAIT_NOREPROC) { @@ -281,32 +267,33 @@ noreproc: timeout = ldlm_cp_timeout(lock); - lwd.lwd_lock = lock; lock->l_last_activity = ktime_get_real_seconds(); - if (ldlm_is_no_timeout(lock)) { - LDLM_DEBUG(lock, "waiting indefinitely because of NO_TIMEOUT"); - lwi = LWI_INTR(interrupted_completion_wait, &lwd); - } else { - lwi = LWI_TIMEOUT_INTR(cfs_time_seconds(timeout), - ldlm_expired_completion_wait, - interrupted_completion_wait, &lwd); - } - if (imp) { spin_lock(&imp->imp_lock); - lwd.lwd_conn_cnt = imp->imp_conn_cnt; + conn_cnt = imp->imp_conn_cnt; spin_unlock(&imp->imp_lock); } - if (OBD_FAIL_CHECK_RESET(OBD_FAIL_LDLM_INTR_CP_AST, OBD_FAIL_LDLM_CP_BL_RACE | OBD_FAIL_ONCE)) { ldlm_set_fail_loc(lock); rc = -EINTR; } else { - /* Go to sleep until the lock is granted or cancelled. */ - rc = l_wait_event(lock->l_waitq, - is_granted_or_cancelled(lock), &lwi); + /* Go to sleep until the lock is granted or canceled. */ + if (!ldlm_is_no_timeout(lock)) { + /* Wait uninterruptible for a while first */ + rc = wait_event_idle_timeout(lock->l_waitq, + is_granted_or_cancelled(lock), + timeout * HZ); + if (rc == 0) + ldlm_expired_completion_wait(lock, conn_cnt); + } + /* Now wait abortable */ + if (rc == 0) + rc = l_wait_event_abortable(lock->l_waitq, + is_granted_or_cancelled(lock)); + else + rc = 0; } if (rc) { diff --git a/drivers/staging/lustre/lustre/ldlm/ldlm_resource.c b/drivers/staging/lustre/lustre/ldlm/ldlm_resource.c index 9958533cc227..4c44603ab6f9 100644 --- a/drivers/staging/lustre/lustre/ldlm/ldlm_resource.c +++ b/drivers/staging/lustre/lustre/ldlm/ldlm_resource.c @@ -799,7 +799,7 @@ static void cleanup_resource(struct ldlm_resource *res, struct list_head *q, LDLM_DEBUG(lock, "setting FL_LOCAL_ONLY"); if (lock->l_flags & LDLM_FL_FAIL_LOC) { set_current_state(TASK_UNINTERRUPTIBLE); - schedule_timeout(cfs_time_seconds(4)); + schedule_timeout(4 * HZ); set_current_state(TASK_RUNNING); } if (lock->l_completion_ast) @@ -879,7 +879,6 @@ static int __ldlm_namespace_free(struct ldlm_namespace *ns, int force) ldlm_namespace_cleanup(ns, force ? LDLM_FL_LOCAL_ONLY : 0); if (atomic_read(&ns->ns_bref) > 0) { - struct l_wait_info lwi = LWI_INTR(LWI_ON_SIGNAL_NOOP, NULL); int rc; CDEBUG(D_DLMTRACE, @@ -887,11 +886,12 @@ static int __ldlm_namespace_free(struct ldlm_namespace *ns, int force) ldlm_ns_name(ns), atomic_read(&ns->ns_bref)); force_wait: if (force) - lwi = LWI_TIMEOUT(msecs_to_jiffies(obd_timeout * - MSEC_PER_SEC) / 4, NULL, NULL); - - rc = l_wait_event(ns->ns_waitq, - atomic_read(&ns->ns_bref) == 0, &lwi); + rc = wait_event_idle_timeout(ns->ns_waitq, + atomic_read(&ns->ns_bref) == 0, + obd_timeout * HZ / 4) ? 0 : -ETIMEDOUT; + else + rc = l_wait_event_abortable(ns->ns_waitq, + atomic_read(&ns->ns_bref) == 0); /* Forced cleanups should be able to reclaim all references, * so it's safe to wait forever... we can't leak locks... diff --git a/drivers/staging/lustre/lustre/llite/dcache.c b/drivers/staging/lustre/lustre/llite/dcache.c index 6cd0318062e8..11b82c639bfe 100644 --- a/drivers/staging/lustre/lustre/llite/dcache.c +++ b/drivers/staging/lustre/lustre/llite/dcache.c @@ -74,6 +74,12 @@ static void ll_release(struct dentry *de) * an AST before calling d_revalidate_it(). The dentry still exists (marked * INVALID) so d_lookup() matches it, but we have no lock on it (so * lock_match() fails) and we spin around real_lookup(). + * + * This race doesn't apply to lookups in d_alloc_parallel(), and for + * those we want to ensure that only one dentry with a given name is + * in ll_lookup_nd() at a time. So allow invalid dentries to match + * while d_in_lookup(). We will be called again when the lookup + * completes, and can give a different answer then. */ static int ll_dcompare(const struct dentry *dentry, unsigned int len, const char *str, @@ -93,6 +99,10 @@ static int ll_dcompare(const struct dentry *dentry, if (d_mountpoint(dentry)) return 0; + /* ensure exclusion against parallel lookup of the same name */ + if (d_in_lookup((struct dentry *)dentry)) + return 0; + if (d_lustre_invalid(dentry)) return 1; diff --git a/drivers/staging/lustre/lustre/llite/dir.c b/drivers/staging/lustre/lustre/llite/dir.c index 99b0b77c75f5..d10d27268323 100644 --- a/drivers/staging/lustre/lustre/llite/dir.c +++ b/drivers/staging/lustre/lustre/llite/dir.c @@ -885,7 +885,7 @@ static int quotactl_ioctl(struct ll_sb_info *sbi, struct if_quotactl *qctl) switch (cmd) { case Q_SETQUOTA: case Q_SETINFO: - if (!capable(CFS_CAP_SYS_ADMIN)) + if (!capable(CAP_SYS_ADMIN)) return -EPERM; break; case Q_GETQUOTA: @@ -893,7 +893,7 @@ static int quotactl_ioctl(struct ll_sb_info *sbi, struct if_quotactl *qctl) !uid_eq(current_euid(), make_kuid(&init_user_ns, id))) || (type == GRPQUOTA && !in_egroup_p(make_kgid(&init_user_ns, id)))) && - !capable(CFS_CAP_SYS_ADMIN)) + !capable(CAP_SYS_ADMIN)) return -EPERM; break; case Q_GETINFO: @@ -1452,7 +1452,7 @@ out_quotactl: } case OBD_IOC_CHANGELOG_SEND: case OBD_IOC_CHANGELOG_CLEAR: - if (!capable(CFS_CAP_SYS_ADMIN)) + if (!capable(CAP_SYS_ADMIN)) return -EPERM; rc = copy_and_ioctl(cmd, sbi->ll_md_exp, (void __user *)arg, @@ -1497,7 +1497,7 @@ out_quotactl: if (totalsize >= MDS_MAXREQSIZE / 3) return -E2BIG; - hur = libcfs_kvzalloc(totalsize, GFP_NOFS); + hur = kzalloc(totalsize, GFP_NOFS); if (!hur) return -ENOMEM; @@ -1556,7 +1556,7 @@ out_quotactl: return rc; } case LL_IOC_HSM_CT_START: - if (!capable(CFS_CAP_SYS_ADMIN)) + if (!capable(CAP_SYS_ADMIN)) return -EPERM; rc = copy_and_ioctl(cmd, sbi->ll_md_exp, (void __user *)arg, diff --git a/drivers/staging/lustre/lustre/llite/file.c b/drivers/staging/lustre/lustre/llite/file.c index 938b859b6650..ca5faea13b7e 100644 --- a/drivers/staging/lustre/lustre/llite/file.c +++ b/drivers/staging/lustre/lustre/llite/file.c @@ -1315,10 +1315,10 @@ static int ll_lov_setea(struct inode *inode, struct file *file, sizeof(struct lov_user_ost_data); int rc; - if (!capable(CFS_CAP_SYS_ADMIN)) + if (!capable(CAP_SYS_ADMIN)) return -EPERM; - lump = libcfs_kvzalloc(lum_size, GFP_NOFS); + lump = kzalloc(lum_size, GFP_NOFS); if (!lump) return -ENOMEM; @@ -1570,7 +1570,7 @@ int ll_fid2path(struct inode *inode, void __user *arg) size_t outsize; int rc; - if (!capable(CFS_CAP_DAC_READ_SEARCH) && + if (!capable(CAP_DAC_READ_SEARCH) && !(ll_i2sbi(inode)->ll_flags & LL_SBI_USER_FID2PATH)) return -EPERM; @@ -1840,7 +1840,7 @@ int ll_hsm_state_set(struct inode *inode, struct hsm_state_set *hss) * NOT defined in HSM_USER_MASK. */ if (((hss->hss_setmask | hss->hss_clearmask) & ~HSM_USER_MASK) && - !capable(CFS_CAP_SYS_ADMIN)) + !capable(CAP_SYS_ADMIN)) return -EPERM; /* Detect out-of range archive id */ @@ -2998,7 +2998,7 @@ static int ll_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, num_bytes = sizeof(*fiemap) + (extent_count * sizeof(struct fiemap_extent)); - fiemap = libcfs_kvzalloc(num_bytes, GFP_NOFS); + fiemap = kvzalloc(num_bytes, GFP_KERNEL); if (!fiemap) return -ENOMEM; @@ -3361,7 +3361,7 @@ static int ll_layout_fetch(struct inode *inode, struct ldlm_lock *lock) goto out; } - lvbdata = libcfs_kvzalloc(lmmsize, GFP_NOFS); + lvbdata = kvzalloc(lmmsize, GFP_NOFS); if (!lvbdata) { rc = -ENOMEM; goto out; diff --git a/drivers/staging/lustre/lustre/llite/llite_internal.h b/drivers/staging/lustre/lustre/llite/llite_internal.h index f68c2e88f12b..d46bcf71b273 100644 --- a/drivers/staging/lustre/lustre/llite/llite_internal.h +++ b/drivers/staging/lustre/lustre/llite/llite_internal.h @@ -1070,8 +1070,8 @@ struct ll_statahead_info { sai_agl_valid:1,/* AGL is valid for the dir */ sai_in_readpage:1;/* statahead in readdir() */ wait_queue_head_t sai_waitq; /* stat-ahead wait queue */ - struct ptlrpc_thread sai_thread; /* stat-ahead thread */ - struct ptlrpc_thread sai_agl_thread; /* AGL thread */ + struct task_struct *sai_task; /* stat-ahead thread */ + struct task_struct *sai_agl_task; /* AGL thread */ struct list_head sai_interim_entries; /* entries which got async * stat reply, but not * instantiated diff --git a/drivers/staging/lustre/lustre/llite/llite_lib.c b/drivers/staging/lustre/lustre/llite/llite_lib.c index 6735a6f006d2..e7500c53fafc 100644 --- a/drivers/staging/lustre/lustre/llite/llite_lib.c +++ b/drivers/staging/lustre/lustre/llite/llite_lib.c @@ -879,9 +879,15 @@ int ll_fill_super(struct super_block *sb) CDEBUG(D_VFSTRACE, "VFS Op: sb %p\n", sb); + err = ptlrpc_inc_ref(); + if (err) + return err; + cfg = kzalloc(sizeof(*cfg), GFP_NOFS); - if (!cfg) - return -ENOMEM; + if (!cfg) { + err = -ENOMEM; + goto out_put; + } try_module_get(THIS_MODULE); @@ -891,7 +897,8 @@ int ll_fill_super(struct super_block *sb) if (!sbi) { module_put(THIS_MODULE); kfree(cfg); - return -ENOMEM; + err = -ENOMEM; + goto out_put; } err = ll_options(lsi->lsi_lmd->lmd_opts, &sbi->ll_flags); @@ -958,6 +965,9 @@ out_free: LCONSOLE_WARN("Mounted %s\n", profilenm); kfree(cfg); +out_put: + if (err) + ptlrpc_dec_ref(); return err; } /* ll_fill_super */ @@ -986,16 +996,12 @@ void ll_put_super(struct super_block *sb) } /* Wait for unstable pages to be committed to stable storage */ - if (!force) { - struct l_wait_info lwi = LWI_INTR(LWI_ON_SIGNAL_NOOP, NULL); - - rc = l_wait_event(sbi->ll_cache->ccc_unstable_waitq, - !atomic_long_read(&sbi->ll_cache->ccc_unstable_nr), - &lwi); - } + if (!force) + rc = l_wait_event_abortable(sbi->ll_cache->ccc_unstable_waitq, + !atomic_long_read(&sbi->ll_cache->ccc_unstable_nr)); ccc_count = atomic_long_read(&sbi->ll_cache->ccc_unstable_nr); - if (!force && rc != -EINTR) + if (!force && rc != -ERESTARTSYS) LASSERTF(!ccc_count, "count: %li\n", ccc_count); /* We need to set force before the lov_disconnect in @@ -1032,6 +1038,8 @@ void ll_put_super(struct super_block *sb) cl_env_cache_purge(~0); module_put(THIS_MODULE); + + ptlrpc_dec_ref(); } /* client_put_super */ struct inode *ll_inode_from_resource_lock(struct ldlm_lock *lock) @@ -1197,13 +1205,12 @@ static int ll_update_lsm_md(struct inode *inode, struct lustre_md *md) lmv_free_memmd(lli->lli_lsm_md); lli->lli_lsm_md = NULL; return 0; - } else { - /* - * The lustre_md from req does not include stripeEA, - * see ll_md_setattr - */ - return 0; } + /* + * The lustre_md from req does not include stripeEA, + * see ll_md_setattr + */ + return 0; } /* set the directory layout */ @@ -1454,7 +1461,7 @@ int ll_setattr_raw(struct dentry *dentry, struct iattr *attr, bool hsm_import) /* POSIX: check before ATTR_*TIME_SET set (from setattr_prepare) */ if (attr->ia_valid & TIMES_SET_FLAGS) { if ((!uid_eq(current_fsuid(), inode->i_uid)) && - !capable(CFS_CAP_FOWNER)) + !capable(CAP_FOWNER)) return -EPERM; } @@ -1988,8 +1995,7 @@ void ll_umount_begin(struct super_block *sb) struct ll_sb_info *sbi = ll_s2sbi(sb); struct obd_device *obd; struct obd_ioctl_data *ioc_data; - wait_queue_head_t waitq; - struct l_wait_info lwi; + int cnt = 0; CDEBUG(D_VFSTRACE, "VFS Op: superblock %p count %d active %d\n", sb, sb->s_count, atomic_read(&sb->s_active)); @@ -2025,10 +2031,10 @@ void ll_umount_begin(struct super_block *sb) * and then continue. For now, we just periodically checking for vfs * to decrement mnt_cnt and hope to finish it within 10sec. */ - init_waitqueue_head(&waitq); - lwi = LWI_TIMEOUT_INTERVAL(cfs_time_seconds(10), - cfs_time_seconds(1), NULL, NULL); - l_wait_event(waitq, may_umount(sbi->ll_mnt.mnt), &lwi); + while (cnt < 10 && !may_umount(sbi->ll_mnt.mnt)) { + schedule_timeout_uninterruptible(HZ); + cnt++; + } schedule(); } @@ -2143,7 +2149,7 @@ int ll_prep_inode(struct inode **inode, struct ptlrpc_request *req, md.posix_acl = NULL; } #endif - rc = -ENOMEM; + rc = PTR_ERR(*inode); CERROR("new_inode -fatal: rc %d\n", rc); goto out; } @@ -2602,7 +2608,7 @@ int ll_getparent(struct file *file, struct getparent __user *arg) u32 linkno; int rc; - if (!capable(CFS_CAP_DAC_READ_SEARCH) && + if (!capable(CAP_DAC_READ_SEARCH) && !(ll_i2sbi(inode)->ll_flags & LL_SBI_USER_FID2PATH)) return -EPERM; @@ -2653,7 +2659,7 @@ int ll_getparent(struct file *file, struct getparent __user *arg) } lb_free: - lu_buf_free(&buf); + kvfree(buf.lb_buf); ldata_free: kfree(ldata); return rc; diff --git a/drivers/staging/lustre/lustre/llite/llite_mmap.c b/drivers/staging/lustre/lustre/llite/llite_mmap.c index c0533bd6f352..214b07554e62 100644 --- a/drivers/staging/lustre/lustre/llite/llite_mmap.c +++ b/drivers/staging/lustre/lustre/llite/llite_mmap.c @@ -177,14 +177,14 @@ static int ll_page_mkwrite0(struct vm_area_struct *vma, struct page *vmpage, vio->u.fault.ft_vma = vma; vio->u.fault.ft_vmpage = vmpage; - set = cfs_block_sigsinv(sigmask(SIGKILL) | sigmask(SIGTERM)); + cfs_block_sigsinv(sigmask(SIGKILL) | sigmask(SIGTERM), &set); inode = vvp_object_inode(io->ci_obj); lli = ll_i2info(inode); result = cl_io_loop(env, io); - cfs_restore_sigs(set); + cfs_restore_sigs(&set); if (result == 0) { struct inode *inode = file_inode(vma->vm_file); @@ -334,7 +334,7 @@ static int ll_fault(struct vm_fault *vmf) * so that it can be killed by admin but not cause segfault by * other signals. */ - set = cfs_block_sigsinv(sigmask(SIGKILL) | sigmask(SIGTERM)); + cfs_block_sigsinv(sigmask(SIGKILL) | sigmask(SIGTERM), &set); restart: result = ll_fault0(vmf->vma, vmf); @@ -360,7 +360,7 @@ restart: result = VM_FAULT_LOCKED; } - cfs_restore_sigs(set); + cfs_restore_sigs(&set); return result; } diff --git a/drivers/staging/lustre/lustre/llite/namei.c b/drivers/staging/lustre/lustre/llite/namei.c index a2687f46a16d..6c9ec462eb41 100644 --- a/drivers/staging/lustre/lustre/llite/namei.c +++ b/drivers/staging/lustre/lustre/llite/namei.c @@ -380,52 +380,45 @@ void ll_i2gids(__u32 *suppgids, struct inode *i1, struct inode *i2) } /* - * try to reuse three types of dentry: - * 1. unhashed alias, this one is unhashed by d_invalidate (but it may be valid - * by concurrent .revalidate). - * 2. INVALID alias (common case for no valid ldlm lock held, but this flag may - * be cleared by others calling d_lustre_revalidate). - * 3. DISCONNECTED alias. + * Try to reuse unhashed or invalidated dentries. + * This is very similar to d_exact_alias(), and any changes in one should be + * considered for inclusion in the other. The differences are that we don't + * need an unhashed alias, and we don't want d_compare to be used for + * comparison. */ static struct dentry *ll_find_alias(struct inode *inode, struct dentry *dentry) { - struct dentry *alias, *discon_alias, *invalid_alias; + struct dentry *alias; if (hlist_empty(&inode->i_dentry)) return NULL; - discon_alias = NULL; - invalid_alias = NULL; - spin_lock(&inode->i_lock); hlist_for_each_entry(alias, &inode->i_dentry, d_u.d_alias) { LASSERT(alias != dentry); + /* + * Don't need alias->d_lock here, because aliases with + * d_parent == entry->d_parent are not subject to name or + * parent changes, because the parent inode i_mutex is held. + */ - spin_lock(&alias->d_lock); - if ((alias->d_flags & DCACHE_DISCONNECTED) && - S_ISDIR(inode->i_mode)) - /* LASSERT(last_discon == NULL); LU-405, bz 20055 */ - discon_alias = alias; - else if (alias->d_parent == dentry->d_parent && - alias->d_name.hash == dentry->d_name.hash && - alias->d_name.len == dentry->d_name.len && - memcmp(alias->d_name.name, dentry->d_name.name, - dentry->d_name.len) == 0) - invalid_alias = alias; - spin_unlock(&alias->d_lock); - - if (invalid_alias) - break; - } - alias = invalid_alias ?: discon_alias ?: NULL; - if (alias) { + if (alias->d_parent != dentry->d_parent) + continue; + if (alias->d_name.hash != dentry->d_name.hash) + continue; + if (alias->d_name.len != dentry->d_name.len || + memcmp(alias->d_name.name, dentry->d_name.name, + dentry->d_name.len) != 0) + continue; spin_lock(&alias->d_lock); dget_dlock(alias); spin_unlock(&alias->d_lock); + spin_unlock(&inode->i_lock); + return alias; } spin_unlock(&inode->i_lock); - return alias; + return NULL; } /* @@ -434,7 +427,7 @@ static struct dentry *ll_find_alias(struct inode *inode, struct dentry *dentry) */ struct dentry *ll_splice_alias(struct inode *inode, struct dentry *de) { - if (inode) { + if (inode && !S_ISDIR(inode->i_mode)) { struct dentry *new = ll_find_alias(inode, de); if (new) { @@ -445,8 +438,13 @@ struct dentry *ll_splice_alias(struct inode *inode, struct dentry *de) new, d_inode(new), d_count(new), new->d_flags); return new; } + d_add(de, inode); + } else { + struct dentry *new = d_splice_alias(inode, de); + + if (new) + de = new; } - d_add(de, inode); CDEBUG(D_DENTRY, "Add dentry %p inode %p refc %d flags %#x\n", de, d_inode(de), d_count(de), de->d_flags); return de; diff --git a/drivers/staging/lustre/lustre/llite/statahead.c b/drivers/staging/lustre/lustre/llite/statahead.c index 90c7324575e4..155ce3cf6f60 100644 --- a/drivers/staging/lustre/lustre/llite/statahead.c +++ b/drivers/staging/lustre/lustre/llite/statahead.c @@ -267,7 +267,7 @@ sa_kill(struct ll_statahead_info *sai, struct sa_entry *entry) /* called by scanner after use, sa_entry will be killed */ static void -sa_put(struct ll_statahead_info *sai, struct sa_entry *entry) +sa_put(struct ll_statahead_info *sai, struct sa_entry *entry, struct ll_inode_info *lli) { struct sa_entry *tmp, *next; @@ -295,7 +295,11 @@ sa_put(struct ll_statahead_info *sai, struct sa_entry *entry) sa_kill(sai, tmp); } - wake_up(&sai->sai_thread.t_ctl_waitq); + spin_lock(&lli->lli_sa_lock); + if (sai->sai_task) + wake_up_process(sai->sai_task); + spin_unlock(&lli->lli_sa_lock); + } /* @@ -383,7 +387,7 @@ static void ll_agl_add(struct ll_statahead_info *sai, } if (added > 0) - wake_up(&sai->sai_agl_thread.t_ctl_waitq); + wake_up_process(sai->sai_agl_task); } /* allocate sai */ @@ -403,8 +407,6 @@ static struct ll_statahead_info *ll_sai_alloc(struct dentry *dentry) sai->sai_max = LL_SA_RPC_MIN; sai->sai_index = 1; init_waitqueue_head(&sai->sai_waitq); - init_waitqueue_head(&sai->sai_thread.t_ctl_waitq); - init_waitqueue_head(&sai->sai_agl_thread.t_ctl_waitq); INIT_LIST_HEAD(&sai->sai_interim_entries); INIT_LIST_HEAD(&sai->sai_entries); @@ -466,8 +468,8 @@ static void ll_sai_put(struct ll_statahead_info *sai) lli->lli_sai = NULL; spin_unlock(&lli->lli_sa_lock); - LASSERT(thread_is_stopped(&sai->sai_thread)); - LASSERT(thread_is_stopped(&sai->sai_agl_thread)); + LASSERT(sai->sai_task == NULL); + LASSERT(sai->sai_agl_task == NULL); LASSERT(sai->sai_sent == sai->sai_replied); LASSERT(!sa_has_callback(sai)); @@ -647,7 +649,6 @@ static int ll_statahead_interpret(struct ptlrpc_request *req, struct ll_inode_info *lli = ll_i2info(dir); struct ll_statahead_info *sai = lli->lli_sai; struct sa_entry *entry = (struct sa_entry *)minfo->mi_cbdata; - wait_queue_head_t *waitq = NULL; __u64 handle = 0; if (it_disposition(it, DISP_LOOKUP_NEG)) @@ -658,7 +659,6 @@ static int ll_statahead_interpret(struct ptlrpc_request *req, * sai should be always valid, no need to refcount */ LASSERT(sai); - LASSERT(!thread_is_stopped(&sai->sai_thread)); LASSERT(entry); CDEBUG(D_READA, "sa_entry %.*s rc %d\n", @@ -682,8 +682,9 @@ static int ll_statahead_interpret(struct ptlrpc_request *req, spin_lock(&lli->lli_sa_lock); if (rc) { if (__sa_make_ready(sai, entry, rc)) - waitq = &sai->sai_waitq; + wake_up(&sai->sai_waitq); } else { + int first = 0; entry->se_minfo = minfo; entry->se_req = ptlrpc_request_addref(req); /* @@ -694,14 +695,15 @@ static int ll_statahead_interpret(struct ptlrpc_request *req, */ entry->se_handle = handle; if (!sa_has_callback(sai)) - waitq = &sai->sai_thread.t_ctl_waitq; + first = 1; list_add_tail(&entry->se_list, &sai->sai_interim_entries); + + if (first && sai->sai_task) + wake_up_process(sai->sai_task); } sai->sai_replied++; - if (waitq) - wake_up(waitq); spin_unlock(&lli->lli_sa_lock); return rc; @@ -861,37 +863,13 @@ static int ll_agl_thread(void *arg) struct inode *dir = d_inode(parent); struct ll_inode_info *plli = ll_i2info(dir); struct ll_inode_info *clli; - struct ll_sb_info *sbi = ll_i2sbi(dir); - struct ll_statahead_info *sai; - struct ptlrpc_thread *thread; - struct l_wait_info lwi = { 0 }; + /* We already own this reference, so it is safe to take it without a lock. */ + struct ll_statahead_info *sai = plli->lli_sai; - sai = ll_sai_get(dir); - thread = &sai->sai_agl_thread; - thread->t_pid = current_pid(); CDEBUG(D_READA, "agl thread started: sai %p, parent %pd\n", sai, parent); - atomic_inc(&sbi->ll_agl_total); - spin_lock(&plli->lli_agl_lock); - sai->sai_agl_valid = 1; - if (thread_is_init(thread)) - /* If someone else has changed the thread state - * (e.g. already changed to SVC_STOPPING), we can't just - * blindly overwrite that setting. - */ - thread_set_flags(thread, SVC_RUNNING); - spin_unlock(&plli->lli_agl_lock); - wake_up(&thread->t_ctl_waitq); - - while (1) { - l_wait_event(thread->t_ctl_waitq, - !list_empty(&sai->sai_agls) || - !thread_is_running(thread), - &lwi); - - if (!thread_is_running(thread)) - break; + while (!kthread_should_stop()) { spin_lock(&plli->lli_agl_lock); /* The statahead thread maybe help to process AGL entries, @@ -906,6 +884,12 @@ static int ll_agl_thread(void *arg) } else { spin_unlock(&plli->lli_agl_lock); } + + set_current_state(TASK_IDLE); + if (list_empty(&sai->sai_agls) && + !kthread_should_stop()) + schedule(); + __set_current_state(TASK_RUNNING); } spin_lock(&plli->lli_agl_lock); @@ -919,20 +903,16 @@ static int ll_agl_thread(void *arg) iput(&clli->lli_vfs_inode); spin_lock(&plli->lli_agl_lock); } - thread_set_flags(thread, SVC_STOPPED); spin_unlock(&plli->lli_agl_lock); - wake_up(&thread->t_ctl_waitq); - ll_sai_put(sai); CDEBUG(D_READA, "agl thread stopped: sai %p, parent %pd\n", sai, parent); + ll_sai_put(sai); return 0; } /* start agl thread */ static void ll_start_agl(struct dentry *parent, struct ll_statahead_info *sai) { - struct ptlrpc_thread *thread = &sai->sai_agl_thread; - struct l_wait_info lwi = { 0 }; struct ll_inode_info *plli; struct task_struct *task; @@ -940,17 +920,22 @@ static void ll_start_agl(struct dentry *parent, struct ll_statahead_info *sai) sai, parent); plli = ll_i2info(d_inode(parent)); - task = kthread_run(ll_agl_thread, parent, "ll_agl_%u", - plli->lli_opendir_pid); + task = kthread_create(ll_agl_thread, parent, "ll_agl_%u", + plli->lli_opendir_pid); if (IS_ERR(task)) { CERROR("can't start ll_agl thread, rc: %ld\n", PTR_ERR(task)); - thread_set_flags(thread, SVC_STOPPED); return; } - l_wait_event(thread->t_ctl_waitq, - thread_is_running(thread) || thread_is_stopped(thread), - &lwi); + sai->sai_agl_task = task; + atomic_inc(&ll_i2sbi(d_inode(parent))->ll_agl_total); + spin_lock(&plli->lli_agl_lock); + sai->sai_agl_valid = 1; + spin_unlock(&plli->lli_agl_lock); + /* Get an extra reference that the thread holds */ + ll_sai_get(d_inode(parent)); + + wake_up_process(task); } /* statahead thread main function */ @@ -960,20 +945,13 @@ static int ll_statahead_thread(void *arg) struct inode *dir = d_inode(parent); struct ll_inode_info *lli = ll_i2info(dir); struct ll_sb_info *sbi = ll_i2sbi(dir); - struct ll_statahead_info *sai; - struct ptlrpc_thread *sa_thread; - struct ptlrpc_thread *agl_thread; + struct ll_statahead_info *sai = lli->lli_sai; struct page *page = NULL; __u64 pos = 0; int first = 0; int rc = 0; struct md_op_data *op_data; - struct l_wait_info lwi = { 0 }; - sai = ll_sai_get(dir); - sa_thread = &sai->sai_thread; - agl_thread = &sai->sai_agl_thread; - sa_thread->t_pid = current_pid(); CDEBUG(D_READA, "statahead thread starting: sai %p, parent %pd\n", sai, parent); @@ -986,21 +964,7 @@ static int ll_statahead_thread(void *arg) op_data->op_max_pages = ll_i2sbi(dir)->ll_md_brw_pages; - if (sbi->ll_flags & LL_SBI_AGL_ENABLED) - ll_start_agl(parent, sai); - - atomic_inc(&sbi->ll_sa_total); - spin_lock(&lli->lli_sa_lock); - if (thread_is_init(sa_thread)) - /* If someone else has changed the thread state - * (e.g. already changed to SVC_STOPPING), we can't just - * blindly overwrite that setting. - */ - thread_set_flags(sa_thread, SVC_RUNNING); - spin_unlock(&lli->lli_sa_lock); - wake_up(&sa_thread->t_ctl_waitq); - - while (pos != MDS_DIR_END_OFF && thread_is_running(sa_thread)) { + while (pos != MDS_DIR_END_OFF && sai->sai_task) { struct lu_dirpage *dp; struct lu_dirent *ent; @@ -1017,7 +981,7 @@ static int ll_statahead_thread(void *arg) dp = page_address(page); for (ent = lu_dirent_start(dp); - ent && thread_is_running(sa_thread) && !sa_low_hit(sai); + ent && sai->sai_task && !sa_low_hit(sai); ent = lu_dirent_next(ent)) { struct lu_fid fid; __u64 hash; @@ -1067,14 +1031,7 @@ static int ll_statahead_thread(void *arg) fid_le_to_cpu(&fid, &ent->lde_fid); - /* wait for spare statahead window */ do { - l_wait_event(sa_thread->t_ctl_waitq, - !sa_sent_full(sai) || - sa_has_callback(sai) || - !list_empty(&sai->sai_agls) || - !thread_is_running(sa_thread), - &lwi); sa_handle_callback(sai); spin_lock(&lli->lli_agl_lock); @@ -1094,8 +1051,16 @@ static int ll_statahead_thread(void *arg) spin_lock(&lli->lli_agl_lock); } spin_unlock(&lli->lli_agl_lock); - } while (sa_sent_full(sai) && - thread_is_running(sa_thread)); + + set_current_state(TASK_IDLE); + if (sa_sent_full(sai) && + !sa_has_callback(sai) && + agl_list_empty(sai) && + sai->sai_task) + /* wait for spare statahead window */ + schedule(); + __set_current_state(TASK_RUNNING); + } while (sa_sent_full(sai) && sai->sai_task); sa_statahead(parent, name, namelen, &fid); } @@ -1118,7 +1083,7 @@ static int ll_statahead_thread(void *arg) if (rc < 0) { spin_lock(&lli->lli_sa_lock); - thread_set_flags(sa_thread, SVC_STOPPING); + sai->sai_task = NULL; lli->lli_sa_enabled = 0; spin_unlock(&lli->lli_sa_lock); } @@ -1127,59 +1092,46 @@ static int ll_statahead_thread(void *arg) * statahead is finished, but statahead entries need to be cached, wait * for file release to stop me. */ - while (thread_is_running(sa_thread)) { - l_wait_event(sa_thread->t_ctl_waitq, - sa_has_callback(sai) || - !agl_list_empty(sai) || - !thread_is_running(sa_thread), - &lwi); - + while (sai->sai_task) { sa_handle_callback(sai); + + set_current_state(TASK_IDLE); + if (!sa_has_callback(sai) && + sai->sai_task) + schedule(); + __set_current_state(TASK_RUNNING); } out: - if (sai->sai_agl_valid) { - spin_lock(&lli->lli_agl_lock); - thread_set_flags(agl_thread, SVC_STOPPING); - spin_unlock(&lli->lli_agl_lock); - wake_up(&agl_thread->t_ctl_waitq); + if (sai->sai_agl_task) { + kthread_stop(sai->sai_agl_task); CDEBUG(D_READA, "stop agl thread: sai %p pid %u\n", - sai, (unsigned int)agl_thread->t_pid); - l_wait_event(agl_thread->t_ctl_waitq, - thread_is_stopped(agl_thread), - &lwi); - } else { - /* Set agl_thread flags anyway. */ - thread_set_flags(agl_thread, SVC_STOPPED); + sai, (unsigned int)sai->sai_agl_task->pid); + sai->sai_agl_task = NULL; } - /* * wait for inflight statahead RPCs to finish, and then we can free sai * safely because statahead RPC will access sai data */ while (sai->sai_sent != sai->sai_replied) { /* in case we're not woken up, timeout wait */ - lwi = LWI_TIMEOUT(msecs_to_jiffies(MSEC_PER_SEC >> 3), - NULL, NULL); - l_wait_event(sa_thread->t_ctl_waitq, - sai->sai_sent == sai->sai_replied, &lwi); + schedule_timeout_idle(HZ>>3); } /* release resources held by statahead RPCs */ sa_handle_callback(sai); - spin_lock(&lli->lli_sa_lock); - thread_set_flags(sa_thread, SVC_STOPPED); - spin_unlock(&lli->lli_sa_lock); - CDEBUG(D_READA, "statahead thread stopped: sai %p, parent %pd\n", sai, parent); + spin_lock(&lli->lli_sa_lock); + sai->sai_task = NULL; + spin_unlock(&lli->lli_sa_lock); + wake_up(&sai->sai_waitq); - wake_up(&sa_thread->t_ctl_waitq); ll_sai_put(sai); - return rc; + do_exit(rc); } /* authorize opened dir handle @key to statahead */ @@ -1221,13 +1173,13 @@ void ll_deauthorize_statahead(struct inode *dir, void *key) lli->lli_opendir_pid = 0; lli->lli_sa_enabled = 0; sai = lli->lli_sai; - if (sai && thread_is_running(&sai->sai_thread)) { + if (sai && sai->sai_task) { /* * statahead thread may not quit yet because it needs to cache * entries, now it's time to tell it to quit. */ - thread_set_flags(&sai->sai_thread, SVC_STOPPING); - wake_up(&sai->sai_thread.t_ctl_waitq); + wake_up_process(sai->sai_task); + sai->sai_task = NULL; } spin_unlock(&lli->lli_sa_lock); } @@ -1382,7 +1334,6 @@ static int revalidate_statahead_dentry(struct inode *dir, { struct ll_inode_info *lli = ll_i2info(dir); struct sa_entry *entry = NULL; - struct l_wait_info lwi = { 0 }; struct ll_dentry_data *ldd; int rc = 0; @@ -1432,10 +1383,8 @@ static int revalidate_statahead_dentry(struct inode *dir, spin_lock(&lli->lli_sa_lock); sai->sai_index_wait = entry->se_index; spin_unlock(&lli->lli_sa_lock); - lwi = LWI_TIMEOUT_INTR(cfs_time_seconds(30), NULL, - LWI_ON_SIGNAL_NOOP, NULL); - rc = l_wait_event(sai->sai_waitq, sa_ready(entry), &lwi); - if (rc < 0) { + if (0 == wait_event_idle_timeout(sai->sai_waitq, + sa_ready(entry), 30 * HZ)) { /* * entry may not be ready, so it may be used by inflight * statahead RPC, don't free it. @@ -1500,7 +1449,7 @@ out_unplug: */ ldd = ll_d2d(*dentryp); ldd->lld_sa_generation = lli->lli_sa_generation; - sa_put(sai, entry); + sa_put(sai, entry, lli); return rc; } @@ -1520,8 +1469,6 @@ static int start_statahead_thread(struct inode *dir, struct dentry *dentry) { struct ll_inode_info *lli = ll_i2info(dir); struct ll_statahead_info *sai = NULL; - struct l_wait_info lwi = { 0 }; - struct ptlrpc_thread *thread; struct task_struct *task; struct dentry *parent = dentry->d_parent; int rc; @@ -1561,19 +1508,21 @@ static int start_statahead_thread(struct inode *dir, struct dentry *dentry) CDEBUG(D_READA, "start statahead thread: [pid %d] [parent %pd]\n", current_pid(), parent); - task = kthread_run(ll_statahead_thread, parent, "ll_sa_%u", - lli->lli_opendir_pid); - thread = &sai->sai_thread; + task = kthread_create(ll_statahead_thread, parent, "ll_sa_%u", + lli->lli_opendir_pid); if (IS_ERR(task)) { rc = PTR_ERR(task); CERROR("can't start ll_sa thread, rc : %d\n", rc); goto out; } - l_wait_event(thread->t_ctl_waitq, - thread_is_running(thread) || thread_is_stopped(thread), - &lwi); - ll_sai_put(sai); + if (ll_i2sbi(parent->d_inode)->ll_flags & LL_SBI_AGL_ENABLED) + ll_start_agl(parent, sai); + + atomic_inc(&ll_i2sbi(parent->d_inode)->ll_sa_total); + sai->sai_task = task; + + wake_up_process(task); /* * We don't stat-ahead for the first dirent since we are already in diff --git a/drivers/staging/lustre/lustre/llite/super25.c b/drivers/staging/lustre/lustre/llite/super25.c index 9b0bb3541a84..861e7a60f408 100644 --- a/drivers/staging/lustre/lustre/llite/super25.c +++ b/drivers/staging/lustre/lustre/llite/super25.c @@ -85,8 +85,7 @@ MODULE_ALIAS_FS("lustre"); static int __init lustre_init(void) { - struct lnet_process_id lnet_id; - int i, rc; + int rc; BUILD_BUG_ON(sizeof(LUSTRE_VOLATILE_HDR) != LUSTRE_VOLATILE_HDR_LEN + 1); @@ -125,20 +124,6 @@ static int __init lustre_init(void) goto out_debugfs; } - /* Nodes with small feet have little entropy. The NID for this - * node gives the most entropy in the low bits - */ - for (i = 0;; i++) { - u32 seed; - - if (LNetGetId(i, &lnet_id) == -ENOENT) - break; - if (LNET_NETTYP(LNET_NIDNET(lnet_id.nid)) != LOLND) { - seed = LNET_NIDADDR(lnet_id.nid); - add_device_randomness(&seed, sizeof(seed)); - } - } - rc = vvp_global_init(); if (rc != 0) goto out_sysfs; diff --git a/drivers/staging/lustre/lustre/llite/xattr.c b/drivers/staging/lustre/lustre/llite/xattr.c index 532384c91447..2d78432963dc 100644 --- a/drivers/staging/lustre/lustre/llite/xattr.c +++ b/drivers/staging/lustre/lustre/llite/xattr.c @@ -75,7 +75,7 @@ static int xattr_type_filter(struct ll_sb_info *sbi, return -EOPNOTSUPP; if (handler->flags == XATTR_TRUSTED_T && - !capable(CFS_CAP_SYS_ADMIN)) + !capable(CAP_SYS_ADMIN)) return -EPERM; return 0; @@ -87,10 +87,10 @@ ll_xattr_set_common(const struct xattr_handler *handler, const char *name, const void *value, size_t size, int flags) { - char fullname[strlen(handler->prefix) + strlen(name) + 1]; struct ll_sb_info *sbi = ll_i2sbi(inode); struct ptlrpc_request *req = NULL; const char *pv = value; + char *fullname; __u64 valid; int rc; @@ -141,10 +141,13 @@ ll_xattr_set_common(const struct xattr_handler *handler, return -EPERM; } - sprintf(fullname, "%s%s\n", handler->prefix, name); + fullname = kasprintf(GFP_KERNEL, "%s%s\n", handler->prefix, name); + if (!fullname) + return -ENOMEM; rc = md_setxattr(sbi->ll_md_exp, ll_inode2fid(inode), valid, fullname, pv, size, 0, flags, ll_i2suppgid(inode), &req); + kfree(fullname); if (rc) { if (rc == -EOPNOTSUPP && handler->flags == XATTR_USER_T) { LCONSOLE_INFO("Disabling user_xattr feature because it is not supported on the server\n"); @@ -364,11 +367,11 @@ static int ll_xattr_get_common(const struct xattr_handler *handler, struct dentry *dentry, struct inode *inode, const char *name, void *buffer, size_t size) { - char fullname[strlen(handler->prefix) + strlen(name) + 1]; struct ll_sb_info *sbi = ll_i2sbi(inode); #ifdef CONFIG_FS_POSIX_ACL struct ll_inode_info *lli = ll_i2info(inode); #endif + char *fullname; int rc; CDEBUG(D_VFSTRACE, "VFS Op:inode=" DFID "(%p)\n", @@ -411,9 +414,13 @@ static int ll_xattr_get_common(const struct xattr_handler *handler, if (handler->flags == XATTR_ACL_DEFAULT_T && !S_ISDIR(inode->i_mode)) return -ENODATA; #endif - sprintf(fullname, "%s%s\n", handler->prefix, name); - return ll_xattr_list(inode, fullname, handler->flags, buffer, size, - OBD_MD_FLXATTR); + fullname = kasprintf(GFP_KERNEL, "%s%s\n", handler->prefix, name); + if (!fullname) + return -ENOMEM; + rc = ll_xattr_list(inode, fullname, handler->flags, buffer, size, + OBD_MD_FLXATTR); + kfree(fullname); + return rc; } static ssize_t ll_getxattr_lov(struct inode *inode, void *buf, size_t buf_size) diff --git a/drivers/staging/lustre/lustre/lmv/lmv_obd.c b/drivers/staging/lustre/lustre/lmv/lmv_obd.c index c2c57f65431e..e8a9b9902c37 100644 --- a/drivers/staging/lustre/lustre/lmv/lmv_obd.c +++ b/drivers/staging/lustre/lustre/lmv/lmv_obd.c @@ -1035,7 +1035,7 @@ static int lmv_iocontrol(unsigned int cmd, struct obd_export *exp, reqlen = offsetof(typeof(*hur), hur_user_item[nr]) + hur->hur_request.hr_data_len; - req = libcfs_kvzalloc(reqlen, GFP_NOFS); + req = kvzalloc(reqlen, GFP_NOFS); if (!req) return -ENOMEM; @@ -2695,7 +2695,7 @@ static int lmv_unpackmd(struct obd_export *exp, struct lmv_stripe_md **lsmp, if (lsm && !lmm) { int i; - for (i = 1; i < lsm->lsm_md_stripe_count; i++) { + for (i = 0; i < lsm->lsm_md_stripe_count; i++) { /* * For migrating inode, the master stripe and master * object will be the same, so do not need iput, see @@ -2733,7 +2733,7 @@ static int lmv_unpackmd(struct obd_export *exp, struct lmv_stripe_md **lsmp, lsm_size = lmv_stripe_md_size(0); if (!lsm) { - lsm = libcfs_kvzalloc(lsm_size, GFP_NOFS); + lsm = kvzalloc(lsm_size, GFP_NOFS); if (!lsm) return -ENOMEM; allocated = true; diff --git a/drivers/staging/lustre/lustre/lov/lov_ea.c b/drivers/staging/lustre/lustre/lov/lov_ea.c index d563dd73343a..c56a971745e8 100644 --- a/drivers/staging/lustre/lustre/lov/lov_ea.c +++ b/drivers/staging/lustre/lustre/lov/lov_ea.c @@ -89,7 +89,7 @@ struct lov_stripe_md *lsm_alloc_plain(u16 stripe_count) oinfo_ptrs_size = sizeof(struct lov_oinfo *) * stripe_count; lsm_size = sizeof(*lsm) + oinfo_ptrs_size; - lsm = libcfs_kvzalloc(lsm_size, GFP_NOFS); + lsm = kvzalloc(lsm_size, GFP_NOFS); if (!lsm) return NULL; diff --git a/drivers/staging/lustre/lustre/lov/lov_io.c b/drivers/staging/lustre/lustre/lov/lov_io.c index c5f5d1b106dc..b823f8a21856 100644 --- a/drivers/staging/lustre/lustre/lov/lov_io.c +++ b/drivers/staging/lustre/lustre/lov/lov_io.c @@ -243,7 +243,7 @@ static int lov_io_subio_init(const struct lu_env *env, struct lov_io *lio, * when writing a page. -jay */ lio->lis_subs = - libcfs_kvzalloc(lsm->lsm_stripe_count * + kvzalloc(lsm->lsm_stripe_count * sizeof(lio->lis_subs[0]), GFP_NOFS); if (lio->lis_subs) { @@ -483,7 +483,7 @@ lov_io_data_version_end(const struct lu_env *env, const struct cl_io_slice *ios) struct lov_io_sub *sub; list_for_each_entry(sub, &lio->lis_active, sub_linkage) { - lov_io_end_wrapper(env, sub->sub_io); + lov_io_end_wrapper(sub->sub_env, sub->sub_io); parent->u.ci_data_version.dv_data_version += sub->sub_io->u.ci_data_version.dv_data_version; diff --git a/drivers/staging/lustre/lustre/lov/lov_lock.c b/drivers/staging/lustre/lustre/lov/lov_lock.c index 2fcdeb707ff9..b0292100bf26 100644 --- a/drivers/staging/lustre/lustre/lov/lov_lock.c +++ b/drivers/staging/lustre/lustre/lov/lov_lock.c @@ -145,7 +145,7 @@ static struct lov_lock *lov_lock_sub_init(const struct lu_env *env, nr++; } LASSERT(nr > 0); - lovlck = libcfs_kvzalloc(offsetof(struct lov_lock, lls_sub[nr]), + lovlck = kvzalloc(offsetof(struct lov_lock, lls_sub[nr]), GFP_NOFS); if (!lovlck) return ERR_PTR(-ENOMEM); diff --git a/drivers/staging/lustre/lustre/lov/lov_object.c b/drivers/staging/lustre/lustre/lov/lov_object.c index 897cf2cd4a24..f7c69680cb7d 100644 --- a/drivers/staging/lustre/lustre/lov/lov_object.c +++ b/drivers/staging/lustre/lustre/lov/lov_object.c @@ -242,7 +242,7 @@ static int lov_init_raid0(const struct lu_env *env, struct lov_device *dev, r0->lo_nr = lsm->lsm_stripe_count; LASSERT(r0->lo_nr <= lov_targets_nr(dev)); - r0->lo_sub = libcfs_kvzalloc(r0->lo_nr * sizeof(r0->lo_sub[0]), + r0->lo_sub = kvzalloc(r0->lo_nr * sizeof(r0->lo_sub[0]), GFP_NOFS); if (r0->lo_sub) { int psz = 0; @@ -723,15 +723,13 @@ static void lov_conf_unlock(struct lov_object *lov) static int lov_layout_wait(const struct lu_env *env, struct lov_object *lov) { - struct l_wait_info lwi = { 0 }; - while (atomic_read(&lov->lo_active_ios) > 0) { CDEBUG(D_INODE, "file:" DFID " wait for active IO, now: %d.\n", PFID(lu_object_fid(lov2lu(lov))), atomic_read(&lov->lo_active_ios)); - l_wait_event(lov->lo_waitq, - atomic_read(&lov->lo_active_ios) == 0, &lwi); + wait_event_idle(lov->lo_waitq, + atomic_read(&lov->lo_active_ios) == 0); } return 0; } @@ -1175,7 +1173,8 @@ static int fiemap_for_stripe(const struct lu_env *env, struct cl_object *obj, /* If this is a continuation FIEMAP call and we are on * starting stripe then lun_start needs to be set to - * end_offset */ + * end_offset + */ if (fs->fs_end_offset != 0 && stripeno == fs->fs_start_stripe) lun_start = fs->fs_end_offset; @@ -1200,7 +1199,8 @@ static int fiemap_for_stripe(const struct lu_env *env, struct cl_object *obj, if (IS_ERR(subobj)) return PTR_ERR(subobj); /* If the output buffer is very large and the objects have many - * extents we may need to loop on a single OST repeatedly */ + * extents we may need to loop on a single OST repeatedly + */ do { if (fiemap->fm_extent_count > 0) { /* Don't get too many extents. */ @@ -1250,7 +1250,8 @@ inactive_tgt: ost_done = true; fs->fs_device_done = true; /* If last stripe has hold at the end, - * we need to return */ + * we need to return + */ if (stripeno == fs->fs_last_stripe) { fiemap->fm_mapped_extents = 0; fs->fs_finish = true; @@ -1284,7 +1285,8 @@ inactive_tgt: } /* Clear the EXTENT_LAST flag which can be present on - * the last extent */ + * the last extent + */ if (fm_ext[ext_count - 1].fe_flags & FIEMAP_EXTENT_LAST) fm_ext[ext_count - 1].fe_flags &= ~FIEMAP_EXTENT_LAST; if (lov_stripe_size(lsm, fm_ext[ext_count - 1].fe_logical + @@ -1377,7 +1379,7 @@ static int lov_object_fiemap(const struct lu_env *env, struct cl_object *obj, if (fiemap_count_to_size(fiemap->fm_extent_count) < buffer_size) buffer_size = fiemap_count_to_size(fiemap->fm_extent_count); - fm_local = libcfs_kvzalloc(buffer_size, GFP_NOFS); + fm_local = kvzalloc(buffer_size, GFP_NOFS); if (!fm_local) { rc = -ENOMEM; goto out; diff --git a/drivers/staging/lustre/lustre/lov/lov_pack.c b/drivers/staging/lustre/lustre/lov/lov_pack.c index e5b11c4085a9..b1060d02a164 100644 --- a/drivers/staging/lustre/lustre/lov/lov_pack.c +++ b/drivers/staging/lustre/lustre/lov/lov_pack.c @@ -333,7 +333,7 @@ int lov_getstripe(struct lov_object *obj, struct lov_stripe_md *lsm, lmmk_size = lov_mds_md_size(stripe_count, lsm->lsm_magic); - lmmk = libcfs_kvzalloc(lmmk_size, GFP_NOFS); + lmmk = kvzalloc(lmmk_size, GFP_NOFS); if (!lmmk) { rc = -ENOMEM; goto out; diff --git a/drivers/staging/lustre/lustre/lov/lov_request.c b/drivers/staging/lustre/lustre/lov/lov_request.c index cfa1d7f92b0f..051450d67524 100644 --- a/drivers/staging/lustre/lustre/lov/lov_request.c +++ b/drivers/staging/lustre/lustre/lov/lov_request.c @@ -99,8 +99,7 @@ static int lov_check_set(struct lov_obd *lov, int idx) */ static int lov_check_and_wait_active(struct lov_obd *lov, int ost_idx) { - wait_queue_head_t waitq; - struct l_wait_info lwi; + int cnt = 0; struct lov_tgt_desc *tgt; int rc = 0; @@ -125,11 +124,10 @@ static int lov_check_and_wait_active(struct lov_obd *lov, int ost_idx) mutex_unlock(&lov->lov_lock); - init_waitqueue_head(&waitq); - lwi = LWI_TIMEOUT_INTERVAL(cfs_time_seconds(obd_timeout), - cfs_time_seconds(1), NULL, NULL); - - rc = l_wait_event(waitq, lov_check_set(lov, ost_idx), &lwi); + while (cnt < obd_timeout && !lov_check_set(lov, ost_idx)) { + schedule_timeout_uninterruptible(HZ); + cnt++; + } if (tgt->ltd_active) return 1; diff --git a/drivers/staging/lustre/lustre/mdc/mdc_locks.c b/drivers/staging/lustre/lustre/mdc/mdc_locks.c index 3114907ac5ff..695ef44532cf 100644 --- a/drivers/staging/lustre/lustre/mdc/mdc_locks.c +++ b/drivers/staging/lustre/lustre/mdc/mdc_locks.c @@ -660,7 +660,7 @@ static int mdc_finish_enqueue(struct obd_export *exp, LDLM_DEBUG(lock, "layout lock returned by: %s, lvb_len: %d", ldlm_it2str(it->it_op), lvb_len); - lmm = libcfs_kvzalloc(lvb_len, GFP_NOFS); + lmm = kvzalloc(lvb_len, GFP_NOFS); if (!lmm) { LDLM_LOCK_PUT(lock); return -ENOMEM; diff --git a/drivers/staging/lustre/lustre/mdc/mdc_request.c b/drivers/staging/lustre/lustre/mdc/mdc_request.c index 03e55bca4ada..3b1c8e5a3053 100644 --- a/drivers/staging/lustre/lustre/mdc/mdc_request.c +++ b/drivers/staging/lustre/lustre/mdc/mdc_request.c @@ -838,7 +838,6 @@ static int mdc_getpage(struct obd_export *exp, const struct lu_fid *fid, struct ptlrpc_bulk_desc *desc; struct ptlrpc_request *req; wait_queue_head_t waitq; - struct l_wait_info lwi; int resends = 0; int rc; int i; @@ -888,9 +887,7 @@ restart_bulk: exp->exp_obd->obd_name, -EIO); return -EIO; } - lwi = LWI_TIMEOUT_INTR(cfs_time_seconds(resends), NULL, NULL, - NULL); - l_wait_event(waitq, 0, &lwi); + wait_event_idle_timeout(waitq, 0, resends * HZ); goto restart_bulk; } @@ -1058,13 +1055,14 @@ static void mdc_adjust_dirpages(struct page **pages, int cfs_pgs, int lu_pgs) __u64 hash_end = le64_to_cpu(dp->ldp_hash_end); __u32 flags = le32_to_cpu(dp->ldp_flags); struct lu_dirpage *first = dp; - struct lu_dirent *end_dirent = NULL; - struct lu_dirent *ent; while (--lu_pgs > 0) { - ent = lu_dirent_start(dp); - for (end_dirent = ent; ent; - end_dirent = ent, ent = lu_dirent_next(ent)); + struct lu_dirent *end_dirent = NULL; + struct lu_dirent *ent; + + for (ent = lu_dirent_start(dp); ent; + ent = lu_dirent_next(ent)) + end_dirent = ent; /* Advance dp to next lu_dirpage. */ dp = (struct lu_dirpage *)((char *)dp + LU_PAGE_SIZE); diff --git a/drivers/staging/lustre/lustre/mgc/mgc_request.c b/drivers/staging/lustre/lustre/mgc/mgc_request.c index 79ff85feab64..c61cd23a96df 100644 --- a/drivers/staging/lustre/lustre/mgc/mgc_request.c +++ b/drivers/staging/lustre/lustre/mgc/mgc_request.c @@ -535,7 +535,6 @@ static int mgc_requeue_thread(void *data) spin_lock(&config_list_lock); rq_state |= RQ_RUNNING; while (!(rq_state & RQ_STOP)) { - struct l_wait_info lwi; struct config_llog_data *cld, *cld_prev; int rand = prandom_u32_max(MGC_TIMEOUT_RAND_CENTISEC); int to; @@ -556,9 +555,9 @@ static int mgc_requeue_thread(void *data) to = msecs_to_jiffies(MGC_TIMEOUT_MIN_SECONDS * MSEC_PER_SEC); /* rand is centi-seconds */ to += msecs_to_jiffies(rand * MSEC_PER_SEC / 100); - lwi = LWI_TIMEOUT(to, NULL, NULL); - l_wait_event(rq_waitq, rq_state & (RQ_STOP | RQ_PRECLEANUP), - &lwi); + wait_event_idle_timeout(rq_waitq, + rq_state & (RQ_STOP | RQ_PRECLEANUP), + to); /* * iterate & processing through the list. for each cld, process @@ -601,9 +600,7 @@ static int mgc_requeue_thread(void *data) config_log_put(cld_prev); /* Wait a bit to see if anyone else needs a requeue */ - lwi = (struct l_wait_info) { 0 }; - l_wait_event(rq_waitq, rq_state & (RQ_NOW | RQ_STOP), - &lwi); + wait_event_idle(rq_waitq, rq_state & (RQ_NOW | RQ_STOP)); spin_lock(&config_list_lock); } @@ -1630,9 +1627,7 @@ restart: if (rcl == -ESHUTDOWN && atomic_read(&mgc->u.cli.cl_mgc_refcount) > 0 && !retry) { - int secs = cfs_time_seconds(obd_timeout); struct obd_import *imp; - struct l_wait_info lwi; mutex_unlock(&cld->cld_lock); imp = class_exp2cliimp(mgc->u.cli.cl_mgc_mgsexp); @@ -1647,9 +1642,9 @@ restart: */ ptlrpc_pinger_force(imp); - lwi = LWI_TIMEOUT(secs, NULL, NULL); - l_wait_event(imp->imp_recovery_waitq, - !mgc_import_in_recovery(imp), &lwi); + wait_event_idle_timeout(imp->imp_recovery_waitq, + !mgc_import_in_recovery(imp), + obd_timeout * HZ); if (imp->imp_state == LUSTRE_IMP_FULL) { retry = true; diff --git a/drivers/staging/lustre/lustre/obdclass/cl_io.c b/drivers/staging/lustre/lustre/obdclass/cl_io.c index 6ec5218a18c1..ab84e011b560 100644 --- a/drivers/staging/lustre/lustre/obdclass/cl_io.c +++ b/drivers/staging/lustre/lustre/obdclass/cl_io.c @@ -1097,23 +1097,24 @@ EXPORT_SYMBOL(cl_sync_io_init); int cl_sync_io_wait(const struct lu_env *env, struct cl_sync_io *anchor, long timeout) { - struct l_wait_info lwi = LWI_TIMEOUT_INTR(cfs_time_seconds(timeout), - NULL, NULL, NULL); - int rc; + int rc = 1; LASSERT(timeout >= 0); - rc = l_wait_event(anchor->csi_waitq, - atomic_read(&anchor->csi_sync_nr) == 0, - &lwi); - if (rc < 0) { + if (timeout == 0) + wait_event_idle(anchor->csi_waitq, + atomic_read(&anchor->csi_sync_nr) == 0); + else + rc = wait_event_idle_timeout(anchor->csi_waitq, + atomic_read(&anchor->csi_sync_nr) == 0, + timeout * HZ); + if (rc == 0) { + rc = -ETIMEDOUT; CERROR("IO failed: %d, still wait for %d remaining entries\n", rc, atomic_read(&anchor->csi_sync_nr)); - lwi = (struct l_wait_info) { 0 }; - (void)l_wait_event(anchor->csi_waitq, - atomic_read(&anchor->csi_sync_nr) == 0, - &lwi); + wait_event_idle(anchor->csi_waitq, + atomic_read(&anchor->csi_sync_nr) == 0); } else { rc = anchor->csi_sync_rc; } diff --git a/drivers/staging/lustre/lustre/obdclass/cl_lock.c b/drivers/staging/lustre/lustre/obdclass/cl_lock.c index 3b683b774fef..9ca29a26a38b 100644 --- a/drivers/staging/lustre/lustre/obdclass/cl_lock.c +++ b/drivers/staging/lustre/lustre/obdclass/cl_lock.c @@ -224,7 +224,7 @@ EXPORT_SYMBOL(cl_lock_release); const char *cl_lock_mode_name(const enum cl_lock_mode mode) { - static const char *names[] = { + static const char * const names[] = { [CLM_READ] = "R", [CLM_WRITE] = "W", [CLM_GROUP] = "G" diff --git a/drivers/staging/lustre/lustre/obdclass/cl_object.c b/drivers/staging/lustre/lustre/obdclass/cl_object.c index 7b18d775b001..7809f6ae1809 100644 --- a/drivers/staging/lustre/lustre/obdclass/cl_object.c +++ b/drivers/staging/lustre/lustre/obdclass/cl_object.c @@ -495,7 +495,7 @@ static struct cache_stats cl_env_stats = { int cl_site_stats_print(const struct cl_site *site, struct seq_file *m) { size_t i; - static const char *pstate[] = { + static const char * const pstate[] = { [CPS_CACHED] = "c", [CPS_OWNED] = "o", [CPS_PAGEOUT] = "w", diff --git a/drivers/staging/lustre/lustre/obdclass/genops.c b/drivers/staging/lustre/lustre/obdclass/genops.c index b1d6ba4a3190..63ccbabb4c5a 100644 --- a/drivers/staging/lustre/lustre/obdclass/genops.c +++ b/drivers/staging/lustre/lustre/obdclass/genops.c @@ -48,10 +48,7 @@ struct kmem_cache *obdo_cachep; EXPORT_SYMBOL(obdo_cachep); static struct kmem_cache *import_cachep; -static struct list_head obd_zombie_imports; -static struct list_head obd_zombie_exports; -static spinlock_t obd_zombie_impexp_lock; -static void obd_zombie_impexp_notify(void); +static struct workqueue_struct *zombie_wq; static void obd_zombie_export_add(struct obd_export *exp); static void obd_zombie_import_add(struct obd_import *imp); @@ -701,6 +698,13 @@ void class_export_put(struct obd_export *exp) } EXPORT_SYMBOL(class_export_put); +static void obd_zombie_exp_cull(struct work_struct *ws) +{ + struct obd_export *export = container_of(ws, struct obd_export, exp_zombie_work); + + class_export_destroy(export); +} + /* Creates a new export, adds it to the hash table, and returns a * pointer to it. The refcount is 2: one for the hash reference, and * one for the pointer returned by this function. @@ -741,6 +745,7 @@ struct obd_export *class_new_export(struct obd_device *obd, INIT_HLIST_NODE(&export->exp_uuid_hash); spin_lock_init(&export->exp_bl_list_lock); INIT_LIST_HEAD(&export->exp_bl_list); + INIT_WORK(&export->exp_zombie_work, obd_zombie_exp_cull); export->exp_sp_peer = LUSTRE_SP_ANY; export->exp_flvr.sf_rpc = SPTLRPC_FLVR_INVALID; @@ -862,7 +867,6 @@ EXPORT_SYMBOL(class_import_get); void class_import_put(struct obd_import *imp) { - LASSERT(list_empty(&imp->imp_zombie_chain)); LASSERT_ATOMIC_GT_LT(&imp->imp_refcount, 0, LI_POISON); CDEBUG(D_INFO, "import %p refcount=%d obd=%s\n", imp, @@ -894,6 +898,13 @@ static void init_imp_at(struct imp_at *at) } } +static void obd_zombie_imp_cull(struct work_struct *ws) +{ + struct obd_import *import = container_of(ws, struct obd_import, imp_zombie_work); + + class_import_destroy(import); +} + struct obd_import *class_new_import(struct obd_device *obd) { struct obd_import *imp; @@ -903,7 +914,6 @@ struct obd_import *class_new_import(struct obd_device *obd) return NULL; INIT_LIST_HEAD(&imp->imp_pinger_chain); - INIT_LIST_HEAD(&imp->imp_zombie_chain); INIT_LIST_HEAD(&imp->imp_replay_list); INIT_LIST_HEAD(&imp->imp_sending_list); INIT_LIST_HEAD(&imp->imp_delayed_list); @@ -917,6 +927,7 @@ struct obd_import *class_new_import(struct obd_device *obd) imp->imp_obd = class_incref(obd, "import", imp); mutex_init(&imp->imp_sec_mutex); init_waitqueue_head(&imp->imp_recovery_waitq); + INIT_WORK(&imp->imp_zombie_work, obd_zombie_imp_cull); atomic_set(&imp->imp_refcount, 2); atomic_set(&imp->imp_unregistering, 0); @@ -1098,81 +1109,6 @@ EXPORT_SYMBOL(class_fail_export); void (*class_export_dump_hook)(struct obd_export *) = NULL; #endif -/* Total amount of zombies to be destroyed */ -static int zombies_count; - -/** - * kill zombie imports and exports - */ -static void obd_zombie_impexp_cull(void) -{ - struct obd_import *import; - struct obd_export *export; - - do { - spin_lock(&obd_zombie_impexp_lock); - - import = NULL; - if (!list_empty(&obd_zombie_imports)) { - import = list_entry(obd_zombie_imports.next, - struct obd_import, - imp_zombie_chain); - list_del_init(&import->imp_zombie_chain); - } - - export = NULL; - if (!list_empty(&obd_zombie_exports)) { - export = list_entry(obd_zombie_exports.next, - struct obd_export, - exp_obd_chain); - list_del_init(&export->exp_obd_chain); - } - - spin_unlock(&obd_zombie_impexp_lock); - - if (import) { - class_import_destroy(import); - spin_lock(&obd_zombie_impexp_lock); - zombies_count--; - spin_unlock(&obd_zombie_impexp_lock); - } - - if (export) { - class_export_destroy(export); - spin_lock(&obd_zombie_impexp_lock); - zombies_count--; - spin_unlock(&obd_zombie_impexp_lock); - } - - cond_resched(); - } while (import || export); -} - -static struct completion obd_zombie_start; -static struct completion obd_zombie_stop; -static unsigned long obd_zombie_flags; -static wait_queue_head_t obd_zombie_waitq; -static pid_t obd_zombie_pid; - -enum { - OBD_ZOMBIE_STOP = 0x0001, -}; - -/** - * check for work for kill zombie import/export thread. - */ -static int obd_zombie_impexp_check(void *arg) -{ - int rc; - - spin_lock(&obd_zombie_impexp_lock); - rc = (zombies_count == 0) && - !test_bit(OBD_ZOMBIE_STOP, &obd_zombie_flags); - spin_unlock(&obd_zombie_impexp_lock); - - return rc; -} - /** * Add export to the obd_zombie thread and notify it. */ @@ -1182,12 +1118,7 @@ static void obd_zombie_export_add(struct obd_export *exp) LASSERT(!list_empty(&exp->exp_obd_chain)); list_del_init(&exp->exp_obd_chain); spin_unlock(&exp->exp_obd->obd_dev_lock); - spin_lock(&obd_zombie_impexp_lock); - zombies_count++; - list_add(&exp->exp_obd_chain, &obd_zombie_exports); - spin_unlock(&obd_zombie_impexp_lock); - - obd_zombie_impexp_notify(); + queue_work(zombie_wq, &exp->exp_zombie_work); } /** @@ -1196,40 +1127,7 @@ static void obd_zombie_export_add(struct obd_export *exp) static void obd_zombie_import_add(struct obd_import *imp) { LASSERT(!imp->imp_sec); - spin_lock(&obd_zombie_impexp_lock); - LASSERT(list_empty(&imp->imp_zombie_chain)); - zombies_count++; - list_add(&imp->imp_zombie_chain, &obd_zombie_imports); - spin_unlock(&obd_zombie_impexp_lock); - - obd_zombie_impexp_notify(); -} - -/** - * notify import/export destroy thread about new zombie. - */ -static void obd_zombie_impexp_notify(void) -{ - /* - * Make sure obd_zombie_impexp_thread get this notification. - * It is possible this signal only get by obd_zombie_barrier, and - * barrier gulps this notification and sleeps away and hangs ensues - */ - wake_up_all(&obd_zombie_waitq); -} - -/** - * check whether obd_zombie is idle - */ -static int obd_zombie_is_idle(void) -{ - int rc; - - LASSERT(!test_bit(OBD_ZOMBIE_STOP, &obd_zombie_flags)); - spin_lock(&obd_zombie_impexp_lock); - rc = (zombies_count == 0); - spin_unlock(&obd_zombie_impexp_lock); - return rc; + queue_work(zombie_wq, &imp->imp_zombie_work); } /** @@ -1237,64 +1135,19 @@ static int obd_zombie_is_idle(void) */ void obd_zombie_barrier(void) { - struct l_wait_info lwi = { 0 }; - - if (obd_zombie_pid == current_pid()) - /* don't wait for myself */ - return; - l_wait_event(obd_zombie_waitq, obd_zombie_is_idle(), &lwi); + flush_workqueue(zombie_wq); } EXPORT_SYMBOL(obd_zombie_barrier); /** - * destroy zombie export/import thread. - */ -static int obd_zombie_impexp_thread(void *unused) -{ - unshare_fs_struct(); - complete(&obd_zombie_start); - - obd_zombie_pid = current_pid(); - - while (!test_bit(OBD_ZOMBIE_STOP, &obd_zombie_flags)) { - struct l_wait_info lwi = { 0 }; - - l_wait_event(obd_zombie_waitq, - !obd_zombie_impexp_check(NULL), &lwi); - obd_zombie_impexp_cull(); - - /* - * Notify obd_zombie_barrier callers that queues - * may be empty. - */ - wake_up(&obd_zombie_waitq); - } - - complete(&obd_zombie_stop); - - return 0; -} - -/** * start destroy zombie import/export thread */ int obd_zombie_impexp_init(void) { - struct task_struct *task; - - INIT_LIST_HEAD(&obd_zombie_imports); - INIT_LIST_HEAD(&obd_zombie_exports); - spin_lock_init(&obd_zombie_impexp_lock); - init_completion(&obd_zombie_start); - init_completion(&obd_zombie_stop); - init_waitqueue_head(&obd_zombie_waitq); - obd_zombie_pid = 0; - - task = kthread_run(obd_zombie_impexp_thread, NULL, "obd_zombid"); - if (IS_ERR(task)) - return PTR_ERR(task); + zombie_wq = alloc_workqueue("obd_zombid", 0, 0); + if (!zombie_wq) + return -ENOMEM; - wait_for_completion(&obd_zombie_start); return 0; } @@ -1303,9 +1156,7 @@ int obd_zombie_impexp_init(void) */ void obd_zombie_impexp_stop(void) { - set_bit(OBD_ZOMBIE_STOP, &obd_zombie_flags); - obd_zombie_impexp_notify(); - wait_for_completion(&obd_zombie_stop); + destroy_workqueue(zombie_wq); } struct obd_request_slot_waiter { @@ -1336,7 +1187,6 @@ static bool obd_request_slot_avail(struct client_obd *cli, int obd_get_request_slot(struct client_obd *cli) { struct obd_request_slot_waiter orsw; - struct l_wait_info lwi; int rc; spin_lock(&cli->cl_loi_list_lock); @@ -1351,11 +1201,9 @@ int obd_get_request_slot(struct client_obd *cli) orsw.orsw_signaled = false; spin_unlock(&cli->cl_loi_list_lock); - lwi = LWI_INTR(LWI_ON_SIGNAL_NOOP, NULL); - rc = l_wait_event(orsw.orsw_waitq, - obd_request_slot_avail(cli, &orsw) || - orsw.orsw_signaled, - &lwi); + rc = l_wait_event_abortable(orsw.orsw_waitq, + obd_request_slot_avail(cli, &orsw) || + orsw.orsw_signaled); /* * Here, we must take the lock to avoid the on-stack 'orsw' to be @@ -1593,7 +1441,6 @@ static inline bool obd_mod_rpc_slot_avail(struct client_obd *cli, u16 obd_get_mod_rpc_slot(struct client_obd *cli, __u32 opc, struct lookup_intent *it) { - struct l_wait_info lwi = LWI_INTR(NULL, NULL); bool close_req = false; u16 i, max; @@ -1631,8 +1478,8 @@ u16 obd_get_mod_rpc_slot(struct client_obd *cli, __u32 opc, CDEBUG(D_RPCTRACE, "%s: sleeping for a modify RPC slot opc %u, max %hu\n", cli->cl_import->imp_obd->obd_name, opc, max); - l_wait_event(cli->cl_mod_rpcs_waitq, - obd_mod_rpc_slot_avail(cli, close_req), &lwi); + wait_event_idle(cli->cl_mod_rpcs_waitq, + obd_mod_rpc_slot_avail(cli, close_req)); } while (true); } EXPORT_SYMBOL(obd_get_mod_rpc_slot); diff --git a/drivers/staging/lustre/lustre/obdclass/linkea.c b/drivers/staging/lustre/lustre/obdclass/linkea.c index fe1638b0916e..74c99ee216bb 100644 --- a/drivers/staging/lustre/lustre/obdclass/linkea.c +++ b/drivers/staging/lustre/lustre/obdclass/linkea.c @@ -33,9 +33,11 @@ int linkea_data_new(struct linkea_data *ldata, struct lu_buf *buf) { - ldata->ld_buf = lu_buf_check_and_alloc(buf, PAGE_SIZE); - if (!ldata->ld_buf->lb_buf) + buf->lb_buf = kzalloc(PAGE_SIZE, GFP_NOFS); + if (!buf->lb_buf) return -ENOMEM; + buf->lb_len = PAGE_SIZE; + ldata->ld_buf = buf; ldata->ld_leh = ldata->ld_buf->lb_buf; ldata->ld_leh->leh_magic = LINK_EA_MAGIC; ldata->ld_leh->leh_len = sizeof(struct link_ea_header); @@ -158,11 +160,15 @@ int linkea_add_buf(struct linkea_data *ldata, const struct lu_name *lname, } if (leh->leh_len + reclen > ldata->ld_buf->lb_len) { - if (lu_buf_check_and_grow(ldata->ld_buf, - leh->leh_len + reclen) < 0) + /* Note: this never happens as MAX_LINKEA_SIZE is 4096, while + * the initial allocation is PAGE_SIZE. + */ + void *b = krealloc(ldata->ld_buf->lb_buf, leh->leh_len + reclen, GFP_NOFS); + if (!b) return -ENOMEM; - leh = ldata->ld_leh = ldata->ld_buf->lb_buf; + ldata->ld_buf->lb_len = leh->leh_len + reclen; + leh = ldata->ld_leh = ldata->ld_buf->lb_buf = b; } ldata->ld_lee = ldata->ld_buf->lb_buf + leh->leh_len; diff --git a/drivers/staging/lustre/lustre/obdclass/linux/linux-module.c b/drivers/staging/lustre/lustre/obdclass/linux/linux-module.c index 57951237def2..7bceee7f121e 100644 --- a/drivers/staging/lustre/lustre/obdclass/linux/linux-module.c +++ b/drivers/staging/lustre/lustre/obdclass/linux/linux-module.c @@ -180,7 +180,7 @@ int obd_ioctl_getdata(char **buf, int *len, void __user *arg) * obdfilter-survey is an example, which relies on ioctl. So we'd * better avoid vmalloc on ioctl path. LU-66 */ - *buf = libcfs_kvzalloc(hdr.ioc_len, GFP_NOFS); + *buf = kvzalloc(hdr.ioc_len, GFP_KERNEL); if (!*buf) { CERROR("Cannot allocate control buffer of len %d\n", hdr.ioc_len); @@ -251,7 +251,7 @@ static long obd_class_ioctl(struct file *filp, unsigned int cmd, int err = 0; /* Allow non-root access for OBD_IOC_PING_TARGET - used by lfs check */ - if (!capable(CFS_CAP_SYS_ADMIN) && (cmd != OBD_IOC_PING_TARGET)) + if (!capable(CAP_SYS_ADMIN) && (cmd != OBD_IOC_PING_TARGET)) return err = -EACCES; if ((cmd & 0xffffff00) == ((int)'T') << 8) /* ignore all tty ioctls */ return err = -ENOTTY; diff --git a/drivers/staging/lustre/lustre/obdclass/llog.c b/drivers/staging/lustre/lustre/obdclass/llog.c index cd051e31233e..693e1129f1f9 100644 --- a/drivers/staging/lustre/lustre/obdclass/llog.c +++ b/drivers/staging/lustre/lustre/obdclass/llog.c @@ -155,7 +155,7 @@ int llog_init_handle(const struct lu_env *env, struct llog_handle *handle, LASSERT(!handle->lgh_hdr); LASSERT(chunk_size >= LLOG_MIN_CHUNK_SIZE); - llh = libcfs_kvzalloc(sizeof(*llh), GFP_NOFS); + llh = kvzalloc(sizeof(*llh), GFP_KERNEL); if (!llh) return -ENOMEM; handle->lgh_hdr = llh; @@ -240,7 +240,7 @@ static int llog_process_thread(void *arg) /* expect chunk_size to be power of two */ LASSERT(is_power_of_2(chunk_size)); - buf = libcfs_kvzalloc(chunk_size, GFP_NOFS); + buf = kvzalloc(chunk_size, GFP_NOFS); if (!buf) { lpi->lpi_rc = -ENOMEM; return 0; @@ -466,7 +466,7 @@ int llog_open(const struct lu_env *env, struct llog_ctxt *ctxt, struct llog_handle **lgh, struct llog_logid *logid, char *name, enum llog_open_param open_param) { - int raised; + const struct cred *old_cred = NULL; int rc; LASSERT(ctxt); @@ -483,12 +483,18 @@ int llog_open(const struct lu_env *env, struct llog_ctxt *ctxt, (*lgh)->lgh_ctxt = ctxt; (*lgh)->lgh_logops = ctxt->loc_logops; - raised = cfs_cap_raised(CFS_CAP_SYS_RESOURCE); - if (!raised) - cfs_cap_raise(CFS_CAP_SYS_RESOURCE); + if (cap_raised(current_cap(), CAP_SYS_RESOURCE)) { + struct cred *cred = prepare_creds(); + + if (cred) { + cap_raise(cred->cap_effective, CAP_SYS_RESOURCE); + old_cred = override_creds(cred); + } + } rc = ctxt->loc_logops->lop_open(env, *lgh, logid, name, open_param); - if (!raised) - cfs_cap_lower(CFS_CAP_SYS_RESOURCE); + if (old_cred) + revert_creds(old_cred); + if (rc) { llog_free_handle(*lgh); *lgh = NULL; diff --git a/drivers/staging/lustre/lustre/obdclass/llog_obd.c b/drivers/staging/lustre/lustre/obdclass/llog_obd.c index 28bbaa2136ac..26aea114a29b 100644 --- a/drivers/staging/lustre/lustre/obdclass/llog_obd.c +++ b/drivers/staging/lustre/lustre/obdclass/llog_obd.c @@ -104,7 +104,6 @@ EXPORT_SYMBOL(__llog_ctxt_put); int llog_cleanup(const struct lu_env *env, struct llog_ctxt *ctxt) { - struct l_wait_info lwi = LWI_INTR(LWI_ON_SIGNAL_NOOP, NULL); struct obd_llog_group *olg; int rc, idx; @@ -129,8 +128,8 @@ int llog_cleanup(const struct lu_env *env, struct llog_ctxt *ctxt) CERROR("Error %d while cleaning up ctxt %p\n", rc, ctxt); - l_wait_event(olg->olg_waitq, - llog_group_ctxt_null(olg, idx), &lwi); + l_wait_event_abortable(olg->olg_waitq, + llog_group_ctxt_null(olg, idx)); return rc; } diff --git a/drivers/staging/lustre/lustre/obdclass/lprocfs_status.c b/drivers/staging/lustre/lustre/obdclass/lprocfs_status.c index e1f4ef2bddd4..2ed350527398 100644 --- a/drivers/staging/lustre/lustre/obdclass/lprocfs_status.c +++ b/drivers/staging/lustre/lustre/obdclass/lprocfs_status.c @@ -1467,7 +1467,7 @@ int lprocfs_write_frac_u64_helper(const char __user *buffer, { char kernbuf[22], *end, *pbuf; __u64 whole, frac = 0, units; - unsigned frac_d = 1; + unsigned int frac_d = 1; int sign = 1; if (count > (sizeof(kernbuf) - 1)) @@ -1585,7 +1585,7 @@ int ldebugfs_seq_create(struct dentry *parent, const char *name, struct dentry *entry; /* Disallow secretly (un)writable entries. */ - LASSERT((seq_fops->write == NULL) == ((mode & 0222) == 0)); + LASSERT((!seq_fops->write) == ((mode & 0222) == 0)); entry = debugfs_create_file(name, mode, parent, data, seq_fops); if (IS_ERR_OR_NULL(entry)) diff --git a/drivers/staging/lustre/lustre/obdclass/lu_object.c b/drivers/staging/lustre/lustre/obdclass/lu_object.c index 2719abbff85f..3ae16e8501c2 100644 --- a/drivers/staging/lustre/lustre/obdclass/lu_object.c +++ b/drivers/staging/lustre/lustre/obdclass/lu_object.c @@ -1380,12 +1380,8 @@ static void key_fini(struct lu_context *ctx, int index) lu_ref_del(&key->lct_reference, "ctx", ctx); atomic_dec(&key->lct_used); - if ((ctx->lc_tags & LCT_NOREF) == 0) { -#ifdef CONFIG_MODULE_UNLOAD - LINVRNT(module_refcount(key->lct_owner) > 0); -#endif + if ((ctx->lc_tags & LCT_NOREF) == 0) module_put(key->lct_owner); - } ctx->lc_value[index] = NULL; } } @@ -1411,7 +1407,7 @@ void lu_context_key_degister(struct lu_context_key *key) while (atomic_read(&key->lct_used) > 1) { spin_unlock(&lu_keys_guard); CDEBUG(D_INFO, "%s: \"%s\" %p, %d\n", - __func__, key->lct_owner ? key->lct_owner->name : "", + __func__, module_name(key->lct_owner), key, atomic_read(&key->lct_used)); schedule(); spin_lock(&lu_keys_guard); @@ -1551,7 +1547,7 @@ void lu_context_key_quiesce(struct lu_context_key *key) spin_unlock(&lu_keys_guard); CDEBUG(D_INFO, "%s: \"%s\" %p, %d (%d)\n", __func__, - key->lct_owner ? key->lct_owner->name : "", + module_name(key->lct_owner), key, atomic_read(&key->lct_used), atomic_read(&lu_key_initing_cnt)); schedule(); @@ -1619,7 +1615,6 @@ static int keys_fill(struct lu_context *ctx) LINVRNT(key->lct_init); LINVRNT(key->lct_index == i); - LASSERT(key->lct_owner); if (!(ctx->lc_tags & LCT_NOREF) && !try_module_get(key->lct_owner)) { /* module is unloading, skip this key */ @@ -1797,10 +1792,10 @@ int lu_env_refill(struct lu_env *env) EXPORT_SYMBOL(lu_env_refill); struct lu_site_stats { - unsigned lss_populated; - unsigned lss_max_search; - unsigned lss_total; - unsigned lss_busy; + unsigned int lss_populated; + unsigned int lss_max_search; + unsigned int lss_total; + unsigned int lss_busy; }; static void lu_site_stats_get(struct cfs_hash *hs, @@ -2061,73 +2056,3 @@ void lu_kmem_fini(struct lu_kmem_descr *caches) } } EXPORT_SYMBOL(lu_kmem_fini); - -void lu_buf_free(struct lu_buf *buf) -{ - LASSERT(buf); - if (buf->lb_buf) { - LASSERT(buf->lb_len > 0); - kvfree(buf->lb_buf); - buf->lb_buf = NULL; - buf->lb_len = 0; - } -} -EXPORT_SYMBOL(lu_buf_free); - -void lu_buf_alloc(struct lu_buf *buf, size_t size) -{ - LASSERT(buf); - LASSERT(!buf->lb_buf); - LASSERT(!buf->lb_len); - buf->lb_buf = libcfs_kvzalloc(size, GFP_NOFS); - if (likely(buf->lb_buf)) - buf->lb_len = size; -} -EXPORT_SYMBOL(lu_buf_alloc); - -void lu_buf_realloc(struct lu_buf *buf, size_t size) -{ - lu_buf_free(buf); - lu_buf_alloc(buf, size); -} -EXPORT_SYMBOL(lu_buf_realloc); - -struct lu_buf *lu_buf_check_and_alloc(struct lu_buf *buf, size_t len) -{ - if (!buf->lb_buf && !buf->lb_len) - lu_buf_alloc(buf, len); - - if ((len > buf->lb_len) && buf->lb_buf) - lu_buf_realloc(buf, len); - - return buf; -} -EXPORT_SYMBOL(lu_buf_check_and_alloc); - -/** - * Increase the size of the \a buf. - * preserves old data in buffer - * old buffer remains unchanged on error - * \retval 0 or -ENOMEM - */ -int lu_buf_check_and_grow(struct lu_buf *buf, size_t len) -{ - char *ptr; - - if (len <= buf->lb_len) - return 0; - - ptr = libcfs_kvzalloc(len, GFP_NOFS); - if (!ptr) - return -ENOMEM; - - /* Free the old buf */ - if (buf->lb_buf) { - memcpy(ptr, buf->lb_buf, buf->lb_len); - kvfree(buf->lb_buf); - } - - buf->lb_buf = ptr; - buf->lb_len = len; - return 0; -} diff --git a/drivers/staging/lustre/lustre/obdclass/lustre_handles.c b/drivers/staging/lustre/lustre/obdclass/lustre_handles.c index 2d6da2431a09..f53b1a3c342e 100644 --- a/drivers/staging/lustre/lustre/obdclass/lustre_handles.c +++ b/drivers/staging/lustre/lustre/obdclass/lustre_handles.c @@ -184,8 +184,8 @@ int class_handle_init(void) LASSERT(!handle_hash); - handle_hash = libcfs_kvzalloc(sizeof(*bucket) * HANDLE_HASH_SIZE, - GFP_NOFS); + handle_hash = kvzalloc(sizeof(*bucket) * HANDLE_HASH_SIZE, + GFP_KERNEL); if (!handle_hash) return -ENOMEM; diff --git a/drivers/staging/lustre/lustre/obdclass/obd_config.c b/drivers/staging/lustre/lustre/obdclass/obd_config.c index 997c0f9aafb5..277576b586db 100644 --- a/drivers/staging/lustre/lustre/obdclass/obd_config.c +++ b/drivers/staging/lustre/lustre/obdclass/obd_config.c @@ -455,7 +455,7 @@ static int class_cleanup(struct obd_device *obd, struct lustre_cfg *lcfg) spin_unlock(&obd->obd_dev_lock); while (obd->obd_conn_inprogress > 0) - yield(); + cond_resched(); smp_rmb(); if (lcfg->lcfg_bufcount >= 2 && LUSTRE_CFG_BUFLEN(lcfg, 1) > 0) { diff --git a/drivers/staging/lustre/lustre/obdclass/obd_mount.c b/drivers/staging/lustre/lustre/obdclass/obd_mount.c index acc1ea773c9c..f5e8214ac37b 100644 --- a/drivers/staging/lustre/lustre/obdclass/obd_mount.c +++ b/drivers/staging/lustre/lustre/obdclass/obd_mount.c @@ -243,7 +243,7 @@ int lustre_start_mgc(struct super_block *sb) libcfs_nid2str_r(nid, nidstr, sizeof(nidstr)); mgcname = kasprintf(GFP_NOFS, "%s%s", LUSTRE_MGC_OBDNAME, nidstr); - niduuid = kasprintf(GFP_NOFS, "%s_%x", mgcname, i); + niduuid = kasprintf(GFP_NOFS, "%s_%x", mgcname, 0); if (!mgcname || !niduuid) { rc = -ENOMEM; goto out_free; diff --git a/drivers/staging/lustre/lustre/obdecho/echo_client.c b/drivers/staging/lustre/lustre/obdecho/echo_client.c index b9c1dc7e61b0..99a76db51ae0 100644 --- a/drivers/staging/lustre/lustre/obdecho/echo_client.c +++ b/drivers/staging/lustre/lustre/obdecho/echo_client.c @@ -752,7 +752,7 @@ static struct lu_device *echo_device_free(const struct lu_env *env, spin_unlock(&ec->ec_lock); CERROR("echo_client still has objects at cleanup time, wait for 1 second\n"); set_current_state(TASK_UNINTERRUPTIBLE); - schedule_timeout(cfs_time_seconds(1)); + schedule_timeout(HZ); lu_site_purge(env, ed->ed_site, -1); spin_lock(&ec->ec_lock); } @@ -1502,7 +1502,7 @@ echo_client_iocontrol(unsigned int cmd, struct obd_export *exp, int len, switch (cmd) { case OBD_IOC_CREATE: /* may create echo object */ - if (!capable(CFS_CAP_SYS_ADMIN)) { + if (!capable(CAP_SYS_ADMIN)) { rc = -EPERM; goto out; } @@ -1511,7 +1511,7 @@ echo_client_iocontrol(unsigned int cmd, struct obd_export *exp, int len, goto out; case OBD_IOC_DESTROY: - if (!capable(CFS_CAP_SYS_ADMIN)) { + if (!capable(CAP_SYS_ADMIN)) { rc = -EPERM; goto out; } @@ -1534,7 +1534,7 @@ echo_client_iocontrol(unsigned int cmd, struct obd_export *exp, int len, goto out; case OBD_IOC_SETATTR: - if (!capable(CFS_CAP_SYS_ADMIN)) { + if (!capable(CAP_SYS_ADMIN)) { rc = -EPERM; goto out; } @@ -1547,7 +1547,7 @@ echo_client_iocontrol(unsigned int cmd, struct obd_export *exp, int len, goto out; case OBD_IOC_BRW_WRITE: - if (!capable(CFS_CAP_SYS_ADMIN)) { + if (!capable(CAP_SYS_ADMIN)) { rc = -EPERM; goto out; } diff --git a/drivers/staging/lustre/lustre/osc/osc_cache.c b/drivers/staging/lustre/lustre/osc/osc_cache.c index 5767ac2a7d16..459503727ce3 100644 --- a/drivers/staging/lustre/lustre/osc/osc_cache.c +++ b/drivers/staging/lustre/lustre/osc/osc_cache.c @@ -934,8 +934,6 @@ static int osc_extent_wait(const struct lu_env *env, struct osc_extent *ext, enum osc_extent_state state) { struct osc_object *obj = ext->oe_obj; - struct l_wait_info lwi = LWI_TIMEOUT_INTR(cfs_time_seconds(600), NULL, - LWI_ON_SIGNAL_NOOP, NULL); int rc = 0; osc_object_lock(obj); @@ -958,18 +956,19 @@ static int osc_extent_wait(const struct lu_env *env, struct osc_extent *ext, osc_extent_release(env, ext); /* wait for the extent until its state becomes @state */ - rc = l_wait_event(ext->oe_waitq, extent_wait_cb(ext, state), &lwi); - if (rc == -ETIMEDOUT) { + rc = wait_event_idle_timeout(ext->oe_waitq, + extent_wait_cb(ext, state), 600 * HZ); + if (rc == 0) { OSC_EXTENT_DUMP(D_ERROR, ext, "%s: wait ext to %u timedout, recovery in progress?\n", cli_name(osc_cli(obj)), state); - lwi = LWI_INTR(NULL, NULL); - rc = l_wait_event(ext->oe_waitq, extent_wait_cb(ext, state), - &lwi); + wait_event_idle(ext->oe_waitq, extent_wait_cb(ext, state)); } - if (rc == 0 && ext->oe_rc < 0) + if (ext->oe_rc < 0) rc = ext->oe_rc; + else + rc = 0; return rc; } @@ -1530,7 +1529,7 @@ static int osc_enter_cache_try(struct client_obd *cli, if (rc < 0) return 0; - if (cli->cl_dirty_pages <= cli->cl_dirty_max_pages && + if (cli->cl_dirty_pages < cli->cl_dirty_max_pages && atomic_long_read(&obd_dirty_pages) + 1 <= obd_max_dirty_pages) { osc_consume_write_grant(cli, &oap->oap_brw_page); if (transient) { @@ -1569,12 +1568,9 @@ static int osc_enter_cache(const struct lu_env *env, struct client_obd *cli, struct osc_object *osc = oap->oap_obj; struct lov_oinfo *loi = osc->oo_oinfo; struct osc_cache_waiter ocw; - struct l_wait_info lwi; + unsigned long timeout = (AT_OFF ? obd_timeout : at_max) * HZ; int rc = -EDQUOT; - lwi = LWI_TIMEOUT_INTR(cfs_time_seconds(AT_OFF ? obd_timeout : at_max), - NULL, LWI_ON_SIGNAL_NOOP, NULL); - OSC_DUMP_GRANT(D_CACHE, cli, "need:%d\n", bytes); spin_lock(&cli->cl_loi_list_lock); @@ -1617,13 +1613,15 @@ static int osc_enter_cache(const struct lu_env *env, struct client_obd *cli, CDEBUG(D_CACHE, "%s: sleeping for cache space @ %p for %p\n", cli_name(cli), &ocw, oap); - rc = l_wait_event(ocw.ocw_waitq, ocw_granted(cli, &ocw), &lwi); + rc = wait_event_idle_timeout(ocw.ocw_waitq, + ocw_granted(cli, &ocw), timeout); spin_lock(&cli->cl_loi_list_lock); - if (rc < 0) { - /* l_wait_event is interrupted by signal, or timed out */ + if (rc == 0) { + /* wait_event is interrupted by signal, or timed out */ list_del_init(&ocw.ocw_entry); + rc = -ETIMEDOUT; break; } LASSERT(list_empty(&ocw.ocw_entry)); @@ -2347,7 +2345,7 @@ int osc_prep_async_page(struct osc_object *osc, struct osc_page *ops, oap->oap_obj_off = offset; LASSERT(!(offset & ~PAGE_MASK)); - if (capable(CFS_CAP_SYS_RESOURCE)) + if (capable(CAP_SYS_RESOURCE)) oap->oap_brw_flags = OBD_BRW_NOQUOTA; INIT_LIST_HEAD(&oap->oap_pending_item); @@ -2386,7 +2384,7 @@ int osc_queue_async_io(const struct lu_env *env, struct cl_io *io, /* Set the OBD_BRW_SRVLOCK before the page is queued. */ brw_flags |= ops->ops_srvlock ? OBD_BRW_SRVLOCK : 0; - if (capable(CFS_CAP_SYS_RESOURCE)) { + if (capable(CAP_SYS_RESOURCE)) { brw_flags |= OBD_BRW_NOQUOTA; cmd |= OBD_BRW_NOQUOTA; } diff --git a/drivers/staging/lustre/lustre/osc/osc_object.c b/drivers/staging/lustre/lustre/osc/osc_object.c index f82c87a77550..6baa8e2e00c9 100644 --- a/drivers/staging/lustre/lustre/osc/osc_object.c +++ b/drivers/staging/lustre/lustre/osc/osc_object.c @@ -328,7 +328,7 @@ int osc_object_is_contended(struct osc_object *obj) * ll_file_is_contended. */ retry_time = cfs_time_add(obj->oo_contention_time, - cfs_time_seconds(osc_contention_time)); + osc_contention_time * HZ); if (cfs_time_after(cur_time, retry_time)) { osc_object_clear_contended(obj); return 0; @@ -454,12 +454,10 @@ struct lu_object *osc_object_alloc(const struct lu_env *env, int osc_object_invalidate(const struct lu_env *env, struct osc_object *osc) { - struct l_wait_info lwi = { 0 }; - CDEBUG(D_INODE, "Invalidate osc object: %p, # of active IOs: %d\n", osc, atomic_read(&osc->oo_nr_ios)); - l_wait_event(osc->oo_io_waitq, !atomic_read(&osc->oo_nr_ios), &lwi); + wait_event_idle(osc->oo_io_waitq, !atomic_read(&osc->oo_nr_ios)); /* Discard all dirty pages of this object. */ osc_cache_truncate_start(env, osc, 0, NULL); diff --git a/drivers/staging/lustre/lustre/osc/osc_page.c b/drivers/staging/lustre/lustre/osc/osc_page.c index 20094b6309f9..01a930dbbf64 100644 --- a/drivers/staging/lustre/lustre/osc/osc_page.c +++ b/drivers/staging/lustre/lustre/osc/osc_page.c @@ -307,7 +307,7 @@ void osc_page_submit(const struct lu_env *env, struct osc_page *opg, oap->oap_count = opg->ops_to - opg->ops_from; oap->oap_brw_flags = brw_flags | OBD_BRW_SYNC; - if (capable(CFS_CAP_SYS_RESOURCE)) { + if (capable(CAP_SYS_RESOURCE)) { oap->oap_brw_flags |= OBD_BRW_NOQUOTA; oap->oap_cmd |= OBD_BRW_NOQUOTA; } @@ -759,7 +759,6 @@ out: static int osc_lru_alloc(const struct lu_env *env, struct client_obd *cli, struct osc_page *opg) { - struct l_wait_info lwi = LWI_INTR(LWI_ON_SIGNAL_NOOP, NULL); struct osc_io *oio = osc_env_io(env); int rc = 0; @@ -782,9 +781,8 @@ static int osc_lru_alloc(const struct lu_env *env, struct client_obd *cli, cond_resched(); - rc = l_wait_event(osc_lru_waitq, - atomic_long_read(cli->cl_lru_left) > 0, - &lwi); + rc = l_wait_event_abortable(osc_lru_waitq, + atomic_long_read(cli->cl_lru_left) > 0); if (rc < 0) break; diff --git a/drivers/staging/lustre/lustre/osc/osc_request.c b/drivers/staging/lustre/lustre/osc/osc_request.c index 45b1ebf33363..1c2bbbf5d864 100644 --- a/drivers/staging/lustre/lustre/osc/osc_request.c +++ b/drivers/staging/lustre/lustre/osc/osc_request.c @@ -552,14 +552,12 @@ static int osc_destroy(const struct lu_env *env, struct obd_export *exp, req->rq_interpret_reply = osc_destroy_interpret; if (!osc_can_send_destroy(cli)) { - struct l_wait_info lwi = LWI_INTR(LWI_ON_SIGNAL_NOOP, NULL); - /* * Wait until the number of on-going destroy RPCs drops * under max_rpc_in_flight */ - l_wait_event_exclusive(cli->cl_destroy_waitq, - osc_can_send_destroy(cli), &lwi); + l_wait_event_abortable_exclusive(cli->cl_destroy_waitq, + osc_can_send_destroy(cli)); } /* Do not wait for response */ @@ -933,7 +931,7 @@ static u32 osc_checksum_bulk(int nob, u32 pg_count, { __u32 cksum; int i = 0; - struct cfs_crypto_hash_desc *hdesc; + struct ahash_request *hdesc; unsigned int bufsize; unsigned char cfs_alg = cksum_obd2cfs(cksum_type); diff --git a/drivers/staging/lustre/lustre/ptlrpc/client.c b/drivers/staging/lustre/lustre/ptlrpc/client.c index bac4b2304bad..ca096fadb9c0 100644 --- a/drivers/staging/lustre/lustre/ptlrpc/client.c +++ b/drivers/staging/lustre/lustre/ptlrpc/client.c @@ -504,19 +504,16 @@ void ptlrpc_request_cache_free(struct ptlrpc_request *req) */ void ptlrpc_free_rq_pool(struct ptlrpc_request_pool *pool) { - struct list_head *l, *tmp; struct ptlrpc_request *req; - spin_lock(&pool->prp_lock); - list_for_each_safe(l, tmp, &pool->prp_req_list) { - req = list_entry(l, struct ptlrpc_request, rq_list); + while ((req = list_first_entry_or_null(&pool->prp_req_list, + struct ptlrpc_request, rq_list))) { list_del(&req->rq_list); LASSERT(req->rq_reqbuf); LASSERT(req->rq_reqbuf_len == pool->prp_rq_size); kvfree(req->rq_reqbuf); ptlrpc_request_cache_free(req); } - spin_unlock(&pool->prp_lock); kfree(pool); } EXPORT_SYMBOL(ptlrpc_free_rq_pool); @@ -544,10 +541,10 @@ int ptlrpc_add_rqs_to_pool(struct ptlrpc_request_pool *pool, int num_rq) struct lustre_msg *msg; spin_unlock(&pool->prp_lock); - req = ptlrpc_request_cache_alloc(GFP_NOFS); + req = ptlrpc_request_cache_alloc(GFP_KERNEL); if (!req) return i; - msg = libcfs_kvzalloc(size, GFP_NOFS); + msg = kvzalloc(size, GFP_KERNEL); if (!msg) { ptlrpc_request_cache_free(req); return i; @@ -656,16 +653,13 @@ static void __ptlrpc_free_req_to_pool(struct ptlrpc_request *request) void ptlrpc_add_unreplied(struct ptlrpc_request *req) { struct obd_import *imp = req->rq_import; - struct list_head *tmp; struct ptlrpc_request *iter; assert_spin_locked(&imp->imp_lock); LASSERT(list_empty(&req->rq_unreplied_list)); /* unreplied list is sorted by xid in ascending order */ - list_for_each_prev(tmp, &imp->imp_unreplied_list) { - iter = list_entry(tmp, struct ptlrpc_request, - rq_unreplied_list); + list_for_each_entry_reverse(iter, &imp->imp_unreplied_list, rq_unreplied_list) { LASSERT(req->rq_xid != iter->rq_xid); if (req->rq_xid < iter->rq_xid) @@ -766,7 +760,7 @@ int ptlrpc_request_bufs_pack(struct ptlrpc_request *request, * fail_loc */ set_current_state(TASK_UNINTERRUPTIBLE); - schedule_timeout(cfs_time_seconds(2)); + schedule_timeout(2 * HZ); set_current_state(TASK_RUNNING); } } @@ -1001,18 +995,14 @@ struct ptlrpc_request_set *ptlrpc_prep_fcset(int max, set_producer_func func, */ void ptlrpc_set_destroy(struct ptlrpc_request_set *set) { - struct list_head *tmp; - struct list_head *next; + struct ptlrpc_request *req; int expected_phase; int n = 0; /* Requests on the set should either all be completed, or all be new */ expected_phase = (atomic_read(&set->set_remaining) == 0) ? RQ_PHASE_COMPLETE : RQ_PHASE_NEW; - list_for_each(tmp, &set->set_requests) { - struct ptlrpc_request *req = - list_entry(tmp, struct ptlrpc_request, rq_set_chain); - + list_for_each_entry(req, &set->set_requests, rq_set_chain) { LASSERT(req->rq_phase == expected_phase); n++; } @@ -1021,9 +1011,9 @@ void ptlrpc_set_destroy(struct ptlrpc_request_set *set) atomic_read(&set->set_remaining) == n, "%d / %d\n", atomic_read(&set->set_remaining), n); - list_for_each_safe(tmp, next, &set->set_requests) { - struct ptlrpc_request *req = - list_entry(tmp, struct ptlrpc_request, rq_set_chain); + while ((req = list_first_entry_or_null(&set->set_requests, + struct ptlrpc_request, + rq_set_chain))) { list_del_init(&req->rq_set_chain); LASSERT(req->rq_phase == expected_phase); @@ -1588,7 +1578,8 @@ static int ptlrpc_send_new_req(struct ptlrpc_request *req) spin_lock(&imp->imp_lock); if (!list_empty(&req->rq_list)) { list_del_init(&req->rq_list); - atomic_dec(&req->rq_import->imp_inflight); + if (atomic_dec_and_test(&req->rq_import->imp_inflight)) + wake_up_all(&req->rq_import->imp_recovery_waitq); } spin_unlock(&imp->imp_lock); ptlrpc_rqphase_move(req, RQ_PHASE_NEW); @@ -1639,7 +1630,7 @@ static inline int ptlrpc_set_producer(struct ptlrpc_request_set *set) */ int ptlrpc_check_set(const struct lu_env *env, struct ptlrpc_request_set *set) { - struct list_head *tmp, *next; + struct ptlrpc_request *req, *next; struct list_head comp_reqs; int force_timer_recalc = 0; @@ -1647,9 +1638,7 @@ int ptlrpc_check_set(const struct lu_env *env, struct ptlrpc_request_set *set) return 1; INIT_LIST_HEAD(&comp_reqs); - list_for_each_safe(tmp, next, &set->set_requests) { - struct ptlrpc_request *req = - list_entry(tmp, struct ptlrpc_request, rq_set_chain); + list_for_each_entry_safe(req, next, &set->set_requests, rq_set_chain) { struct obd_import *imp = req->rq_import; int unregistered = 0; int rc = 0; @@ -1773,7 +1762,7 @@ int ptlrpc_check_set(const struct lu_env *env, struct ptlrpc_request_set *set) } /* - * ptlrpc_set_wait->l_wait_event sets lwi_allow_intr + * ptlrpc_set_wait allow signal to abort the timeout * so it sets rq_intr regardless of individual rpc * timeouts. The synchronous IO waiting path sets * rq_intr irrespective of whether ptlrpcd @@ -2121,19 +2110,15 @@ int ptlrpc_expire_one_request(struct ptlrpc_request *req, int async_unlink) /** * Time out all uncompleted requests in request set pointed by \a data - * Callback used when waiting on sets with l_wait_event. - * Always returns 1. + * Called when wait_event_idle_timeout times out. */ -int ptlrpc_expired_set(void *data) +void ptlrpc_expired_set(struct ptlrpc_request_set *set) { - struct ptlrpc_request_set *set = data; - struct list_head *tmp; + struct ptlrpc_request *req; time64_t now = ktime_get_real_seconds(); /* A timeout expired. See which reqs it applies to... */ - list_for_each(tmp, &set->set_requests) { - struct ptlrpc_request *req = - list_entry(tmp, struct ptlrpc_request, rq_set_chain); + list_for_each_entry(req, &set->set_requests, rq_set_chain) { /* don't expire request waiting for context */ if (req->rq_wait_ctx) @@ -2155,13 +2140,6 @@ int ptlrpc_expired_set(void *data) */ ptlrpc_expire_one_request(req, 1); } - - /* - * When waiting for a whole set, we always break out of the - * sleep so we can recalculate the timeout, or enable interrupts - * if everyone's timed out. - */ - return 1; } /** @@ -2177,18 +2155,14 @@ EXPORT_SYMBOL(ptlrpc_mark_interrupted); /** * Interrupts (sets interrupted flag) all uncompleted requests in - * a set \a data. Callback for l_wait_event for interruptible waits. + * a set \a data. Called when l_wait_event_abortable_timeout receives signal. */ -static void ptlrpc_interrupted_set(void *data) +static void ptlrpc_interrupted_set(struct ptlrpc_request_set *set) { - struct ptlrpc_request_set *set = data; - struct list_head *tmp; - + struct ptlrpc_request *req; CDEBUG(D_RPCTRACE, "INTERRUPTED SET %p\n", set); - list_for_each(tmp, &set->set_requests) { - struct ptlrpc_request *req = - list_entry(tmp, struct ptlrpc_request, rq_set_chain); + list_for_each_entry(req, &set->set_requests, rq_set_chain) { if (req->rq_phase != RQ_PHASE_RPC && req->rq_phase != RQ_PHASE_UNREG_RPC) @@ -2203,14 +2177,12 @@ static void ptlrpc_interrupted_set(void *data) */ int ptlrpc_set_next_timeout(struct ptlrpc_request_set *set) { - struct list_head *tmp; time64_t now = ktime_get_real_seconds(); int timeout = 0; struct ptlrpc_request *req; time64_t deadline; - list_for_each(tmp, &set->set_requests) { - req = list_entry(tmp, struct ptlrpc_request, rq_set_chain); + list_for_each_entry(req, &set->set_requests, rq_set_chain) { /* Request in-flight? */ if (!(((req->rq_phase == RQ_PHASE_RPC) && !req->rq_waiting) || @@ -2249,17 +2221,13 @@ int ptlrpc_set_next_timeout(struct ptlrpc_request_set *set) */ int ptlrpc_set_wait(struct ptlrpc_request_set *set) { - struct list_head *tmp; struct ptlrpc_request *req; - struct l_wait_info lwi; int rc, timeout; if (set->set_producer) (void)ptlrpc_set_producer(set); else - list_for_each(tmp, &set->set_requests) { - req = list_entry(tmp, struct ptlrpc_request, - rq_set_chain); + list_for_each_entry(req, &set->set_requests, rq_set_chain) { if (req->rq_phase == RQ_PHASE_NEW) (void)ptlrpc_send_new_req(req); } @@ -2277,46 +2245,47 @@ int ptlrpc_set_wait(struct ptlrpc_request_set *set) CDEBUG(D_RPCTRACE, "set %p going to sleep for %d seconds\n", set, timeout); - if (timeout == 0 && !signal_pending(current)) + if (timeout == 0 && !signal_pending(current)) { /* * No requests are in-flight (ether timed out * or delayed), so we can allow interrupts. * We still want to block for a limited time, * so we allow interrupts during the timeout. */ - lwi = LWI_TIMEOUT_INTR_ALL(cfs_time_seconds(1), - ptlrpc_expired_set, - ptlrpc_interrupted_set, set); - else + rc = l_wait_event_abortable_timeout(set->set_waitq, + ptlrpc_check_set(NULL, set), + HZ); + if (rc == 0) { + rc = -ETIMEDOUT; + ptlrpc_expired_set(set); + } else if (rc < 0) { + rc = -EINTR; + ptlrpc_interrupted_set(set); + } else + rc = 0; + } else { /* * At least one request is in flight, so no * interrupts are allowed. Wait until all * complete, or an in-flight req times out. */ - lwi = LWI_TIMEOUT(cfs_time_seconds(timeout ? timeout : 1), - ptlrpc_expired_set, set); - - rc = l_wait_event(set->set_waitq, ptlrpc_check_set(NULL, set), &lwi); - - /* - * LU-769 - if we ignored the signal because it was already - * pending when we started, we need to handle it now or we risk - * it being ignored forever - */ - if (rc == -ETIMEDOUT && !lwi.lwi_allow_intr && - signal_pending(current)) { - sigset_t blocked_sigs = - cfs_block_sigsinv(LUSTRE_FATAL_SIGS); - - /* - * In fact we only interrupt for the "fatal" signals - * like SIGINT or SIGKILL. We still ignore less - * important signals since ptlrpc set is not easily - * reentrant from userspace again - */ - if (signal_pending(current)) - ptlrpc_interrupted_set(set); - cfs_restore_sigs(blocked_sigs); + rc = wait_event_idle_timeout(set->set_waitq, + ptlrpc_check_set(NULL, set), + (timeout ? timeout : 1) * HZ); + if (rc == 0) { + ptlrpc_expired_set(set); + rc = -ETIMEDOUT; + /* + * LU-769 - if we ignored the signal + * because it was already pending when + * we started, we need to handle it + * now or we risk it being ignored + * forever + */ + if (l_fatal_signal_pending(current)) + ptlrpc_interrupted_set(set); + } else + rc = 0; } LASSERT(rc == 0 || rc == -EINTR || rc == -ETIMEDOUT); @@ -2331,9 +2300,7 @@ int ptlrpc_set_wait(struct ptlrpc_request_set *set) * the error cases -eeb. */ if (rc == 0 && atomic_read(&set->set_remaining) == 0) { - list_for_each(tmp, &set->set_requests) { - req = list_entry(tmp, struct ptlrpc_request, - rq_set_chain); + list_for_each_entry(req, &set->set_requests, rq_set_chain) { spin_lock(&req->rq_lock); req->rq_invalid_rqset = 1; spin_unlock(&req->rq_lock); @@ -2344,9 +2311,7 @@ int ptlrpc_set_wait(struct ptlrpc_request_set *set) LASSERT(atomic_read(&set->set_remaining) == 0); rc = set->set_rc; /* rq_status of already freed requests if any */ - list_for_each(tmp, &set->set_requests) { - req = list_entry(tmp, struct ptlrpc_request, rq_set_chain); - + list_for_each_entry(req, &set->set_requests, rq_set_chain) { LASSERT(req->rq_phase == RQ_PHASE_COMPLETE); if (req->rq_status != 0) rc = req->rq_status; @@ -2495,7 +2460,6 @@ static int ptlrpc_unregister_reply(struct ptlrpc_request *request, int async) { int rc; wait_queue_head_t *wq; - struct l_wait_info lwi; /* Might sleep. */ LASSERT(!in_interrupt()); @@ -2524,7 +2488,7 @@ static int ptlrpc_unregister_reply(struct ptlrpc_request *request, int async) return 0; /* - * We have to l_wait_event() whatever the result, to give liblustre + * We have to wait_event_idle_timeout() whatever the result, to give liblustre * a chance to run reply_in_callback(), and to make sure we've * unlinked before returning a req to the pool. */ @@ -2538,16 +2502,17 @@ static int ptlrpc_unregister_reply(struct ptlrpc_request *request, int async) * Network access will complete in finite time but the HUGE * timeout lets us CWARN for visibility of sluggish NALs */ - lwi = LWI_TIMEOUT_INTERVAL(cfs_time_seconds(LONG_UNLINK), - cfs_time_seconds(1), NULL, NULL); - rc = l_wait_event(*wq, !ptlrpc_client_recv_or_unlink(request), - &lwi); - if (rc == 0) { + int cnt = 0; + while (cnt < LONG_UNLINK && + (rc = wait_event_idle_timeout(*wq, + !ptlrpc_client_recv_or_unlink(request), + HZ)) == 0) + cnt += 1; + if (rc > 0) { ptlrpc_rqphase_move(request, request->rq_next_phase); return 1; } - LASSERT(rc == -ETIMEDOUT); DEBUG_REQ(D_WARNING, request, "Unexpectedly long timeout receiving_reply=%d req_ulinked=%d reply_unlinked=%d", request->rq_receiving_reply, @@ -2725,8 +2690,7 @@ EXPORT_SYMBOL(ptlrpc_request_addref); void ptlrpc_retain_replayable_request(struct ptlrpc_request *req, struct obd_import *imp) { - struct list_head *tmp; - + struct ptlrpc_request *iter; assert_spin_locked(&imp->imp_lock); if (req->rq_transno == 0) { @@ -2753,10 +2717,7 @@ void ptlrpc_retain_replayable_request(struct ptlrpc_request *req, LASSERT(imp->imp_replayable); /* Balanced in ptlrpc_free_committed, usually. */ ptlrpc_request_addref(req); - list_for_each_prev(tmp, &imp->imp_replay_list) { - struct ptlrpc_request *iter = - list_entry(tmp, struct ptlrpc_request, rq_replay_list); - + list_for_each_entry_reverse(iter, &imp->imp_replay_list, rq_replay_list) { /* * We may have duplicate transnos if we create and then * open a file, or for closes retained if to match creating @@ -2964,7 +2925,7 @@ int ptlrpc_replay_req(struct ptlrpc_request *req) */ void ptlrpc_abort_inflight(struct obd_import *imp) { - struct list_head *tmp, *n; + struct ptlrpc_request *req, *n; /* * Make sure that no new requests get processed for this import. @@ -2978,10 +2939,7 @@ void ptlrpc_abort_inflight(struct obd_import *imp) * locked? Also, how do we know if the requests on the list are * being freed at this time? */ - list_for_each_safe(tmp, n, &imp->imp_sending_list) { - struct ptlrpc_request *req = - list_entry(tmp, struct ptlrpc_request, rq_list); - + list_for_each_entry_safe(req, n, &imp->imp_sending_list, rq_list) { DEBUG_REQ(D_RPCTRACE, req, "inflight"); spin_lock(&req->rq_lock); @@ -2993,10 +2951,7 @@ void ptlrpc_abort_inflight(struct obd_import *imp) spin_unlock(&req->rq_lock); } - list_for_each_safe(tmp, n, &imp->imp_delayed_list) { - struct ptlrpc_request *req = - list_entry(tmp, struct ptlrpc_request, rq_list); - + list_for_each_entry_safe(req, n, &imp->imp_delayed_list, rq_list) { DEBUG_REQ(D_RPCTRACE, req, "aborting waiting req"); spin_lock(&req->rq_lock); @@ -3023,12 +2978,9 @@ void ptlrpc_abort_inflight(struct obd_import *imp) */ void ptlrpc_abort_set(struct ptlrpc_request_set *set) { - struct list_head *tmp, *pos; - - list_for_each_safe(pos, tmp, &set->set_requests) { - struct ptlrpc_request *req = - list_entry(pos, struct ptlrpc_request, rq_set_chain); + struct ptlrpc_request *req, *tmp; + list_for_each_entry_safe(req, tmp, &set->set_requests, rq_set_chain) { spin_lock(&req->rq_lock); if (req->rq_phase != RQ_PHASE_RPC) { spin_unlock(&req->rq_lock); diff --git a/drivers/staging/lustre/lustre/ptlrpc/events.c b/drivers/staging/lustre/lustre/ptlrpc/events.c index 811b7ab3a582..130bacc2c891 100644 --- a/drivers/staging/lustre/lustre/ptlrpc/events.c +++ b/drivers/staging/lustre/lustre/ptlrpc/events.c @@ -490,8 +490,6 @@ int ptlrpc_uuid_to_peer(struct obd_uuid *uuid, static void ptlrpc_ni_fini(void) { - wait_queue_head_t waitq; - struct l_wait_info lwi; int rc; int retries; @@ -515,10 +513,7 @@ static void ptlrpc_ni_fini(void) if (retries != 0) CWARN("Event queue still busy\n"); - /* Wait for a bit */ - init_waitqueue_head(&waitq); - lwi = LWI_TIMEOUT(cfs_time_seconds(2), NULL, NULL); - l_wait_event(waitq, 0, &lwi); + schedule_timeout_uninterruptible(2 * HZ); break; } } diff --git a/drivers/staging/lustre/lustre/ptlrpc/import.c b/drivers/staging/lustre/lustre/ptlrpc/import.c index 5b0f65536c29..a2c4fc3488b1 100644 --- a/drivers/staging/lustre/lustre/ptlrpc/import.c +++ b/drivers/staging/lustre/lustre/ptlrpc/import.c @@ -242,15 +242,13 @@ ptlrpc_inflight_deadline(struct ptlrpc_request *req, time64_t now) static unsigned int ptlrpc_inflight_timeout(struct obd_import *imp) { time64_t now = ktime_get_real_seconds(); - struct list_head *tmp, *n; - struct ptlrpc_request *req; + struct ptlrpc_request *req, *n; unsigned int timeout = 0; spin_lock(&imp->imp_lock); - list_for_each_safe(tmp, n, &imp->imp_sending_list) { - req = list_entry(tmp, struct ptlrpc_request, rq_list); + list_for_each_entry_safe(req, n, &imp->imp_sending_list, rq_list) timeout = max(ptlrpc_inflight_deadline(req, now), timeout); - } + spin_unlock(&imp->imp_lock); return timeout; } @@ -263,9 +261,7 @@ static unsigned int ptlrpc_inflight_timeout(struct obd_import *imp) */ void ptlrpc_invalidate_import(struct obd_import *imp) { - struct list_head *tmp, *n; - struct ptlrpc_request *req; - struct l_wait_info lwi; + struct ptlrpc_request *req, *n; unsigned int timeout; int rc; @@ -306,19 +302,15 @@ void ptlrpc_invalidate_import(struct obd_import *imp) * callbacks. Cap it at obd_timeout -- these should all * have been locally cancelled by ptlrpc_abort_inflight. */ - lwi = LWI_TIMEOUT_INTERVAL( - cfs_timeout_cap(cfs_time_seconds(timeout)), - (timeout > 1) ? cfs_time_seconds(1) : - cfs_time_seconds(1) / 2, - NULL, NULL); - rc = l_wait_event(imp->imp_recovery_waitq, - (atomic_read(&imp->imp_inflight) == 0), - &lwi); - if (rc) { + rc = wait_event_idle_timeout(imp->imp_recovery_waitq, + atomic_read(&imp->imp_inflight) == 0, + obd_timeout * HZ); + + if (rc == 0) { const char *cli_tgt = obd2cli_tgt(imp->imp_obd); - CERROR("%s: rc = %d waiting for callback (%d != 0)\n", - cli_tgt, rc, + CERROR("%s: timeout waiting for callback (%d != 0)\n", + cli_tgt, atomic_read(&imp->imp_inflight)); spin_lock(&imp->imp_lock); @@ -341,19 +333,13 @@ void ptlrpc_invalidate_import(struct obd_import *imp) */ rc = 0; } else { - list_for_each_safe(tmp, n, - &imp->imp_sending_list) { - req = list_entry(tmp, - struct ptlrpc_request, - rq_list); + list_for_each_entry_safe(req, n, + &imp->imp_sending_list, rq_list) { DEBUG_REQ(D_ERROR, req, "still on sending list"); } - list_for_each_safe(tmp, n, - &imp->imp_delayed_list) { - req = list_entry(tmp, - struct ptlrpc_request, - rq_list); + list_for_each_entry_safe(req, n, + &imp->imp_delayed_list, rq_list) { DEBUG_REQ(D_ERROR, req, "still on delayed list"); } @@ -365,7 +351,7 @@ void ptlrpc_invalidate_import(struct obd_import *imp) } spin_unlock(&imp->imp_lock); } - } while (rc != 0); + } while (rc == 0); /* * Let's additionally check that no new rpcs added to import in @@ -430,21 +416,19 @@ void ptlrpc_fail_import(struct obd_import *imp, __u32 conn_cnt) int ptlrpc_reconnect_import(struct obd_import *imp) { - struct l_wait_info lwi; - int secs = cfs_time_seconds(obd_timeout); int rc; ptlrpc_pinger_force(imp); CDEBUG(D_HA, "%s: recovery started, waiting %u seconds\n", - obd2cli_tgt(imp->imp_obd), secs); + obd2cli_tgt(imp->imp_obd), obd_timeout); - lwi = LWI_TIMEOUT(secs, NULL, NULL); - rc = l_wait_event(imp->imp_recovery_waitq, - !ptlrpc_import_in_recovery(imp), &lwi); + rc = wait_event_idle_timeout(imp->imp_recovery_waitq, + !ptlrpc_import_in_recovery(imp), + obd_timeout * HZ); CDEBUG(D_HA, "%s: recovery finished s:%s\n", obd2cli_tgt(imp->imp_obd), ptlrpc_import_state_name(imp->imp_state)); - return rc; + return rc == 0 ? -ETIMEDOUT : 0; } EXPORT_SYMBOL(ptlrpc_reconnect_import); @@ -564,14 +548,13 @@ static int import_select_connection(struct obd_import *imp) static int ptlrpc_first_transno(struct obd_import *imp, __u64 *transno) { struct ptlrpc_request *req; - struct list_head *tmp; /* The requests in committed_list always have smaller transnos than * the requests in replay_list */ if (!list_empty(&imp->imp_committed_list)) { - tmp = imp->imp_committed_list.next; - req = list_entry(tmp, struct ptlrpc_request, rq_replay_list); + req = list_first_entry(&imp->imp_committed_list, + struct ptlrpc_request, rq_replay_list); *transno = req->rq_transno; if (req->rq_transno == 0) { DEBUG_REQ(D_ERROR, req, @@ -581,8 +564,8 @@ static int ptlrpc_first_transno(struct obd_import *imp, __u64 *transno) return 1; } if (!list_empty(&imp->imp_replay_list)) { - tmp = imp->imp_replay_list.next; - req = list_entry(tmp, struct ptlrpc_request, rq_replay_list); + req = list_first_entry(&imp->imp_replay_list, + struct ptlrpc_request, rq_replay_list); *transno = req->rq_transno; if (req->rq_transno == 0) { DEBUG_REQ(D_ERROR, req, "zero transno in replay_list"); @@ -1503,25 +1486,25 @@ int ptlrpc_disconnect_import(struct obd_import *imp, int noclose) } if (ptlrpc_import_in_recovery(imp)) { - struct l_wait_info lwi; long timeout; if (AT_OFF) { if (imp->imp_server_timeout) - timeout = cfs_time_seconds(obd_timeout / 2); + timeout = obd_timeout * HZ / 2; else - timeout = cfs_time_seconds(obd_timeout); + timeout = obd_timeout * HZ; } else { int idx = import_at_get_index(imp, imp->imp_client->cli_request_portal); - timeout = cfs_time_seconds( - at_get(&imp->imp_at.iat_service_estimate[idx])); + timeout = at_get(&imp->imp_at.iat_service_estimate[idx]) * HZ; } - lwi = LWI_TIMEOUT_INTR(cfs_timeout_cap(timeout), - back_to_sleep, LWI_ON_SIGNAL_NOOP, NULL); - rc = l_wait_event(imp->imp_recovery_waitq, - !ptlrpc_import_in_recovery(imp), &lwi); + if (wait_event_idle_timeout(imp->imp_recovery_waitq, + !ptlrpc_import_in_recovery(imp), + cfs_timeout_cap(timeout)) == 0) + l_wait_event_abortable( + imp->imp_recovery_waitq, + !ptlrpc_import_in_recovery(imp)); } spin_lock(&imp->imp_lock); diff --git a/drivers/staging/lustre/lustre/ptlrpc/layout.c b/drivers/staging/lustre/lustre/ptlrpc/layout.c index 18769d335751..2855f38c8190 100644 --- a/drivers/staging/lustre/lustre/ptlrpc/layout.c +++ b/drivers/staging/lustre/lustre/ptlrpc/layout.c @@ -1555,7 +1555,7 @@ struct req_format RQF_OST_GET_INFO_FIEMAP = EXPORT_SYMBOL(RQF_OST_GET_INFO_FIEMAP); /* Convenience macro */ -#define FMT_FIELD(fmt, i, j) (fmt)->rf_fields[(i)].d[(j)] +#define FMT_FIELD(fmt, i, j) ((fmt)->rf_fields[(i)].d[(j)]) /** * Initializes the capsule abstraction by computing and setting the \a rf_idx diff --git a/drivers/staging/lustre/lustre/ptlrpc/niobuf.c b/drivers/staging/lustre/lustre/ptlrpc/niobuf.c index 047d712e850c..86883abaad2c 100644 --- a/drivers/staging/lustre/lustre/ptlrpc/niobuf.c +++ b/drivers/staging/lustre/lustre/ptlrpc/niobuf.c @@ -229,7 +229,6 @@ int ptlrpc_unregister_bulk(struct ptlrpc_request *req, int async) { struct ptlrpc_bulk_desc *desc = req->rq_bulk; wait_queue_head_t *wq; - struct l_wait_info lwi; int rc; LASSERT(!in_interrupt()); /* might sleep */ @@ -246,7 +245,7 @@ int ptlrpc_unregister_bulk(struct ptlrpc_request *req, int async) /* the unlink ensures the callback happens ASAP and is the last * one. If it fails, it must be because completion just happened, - * but we must still l_wait_event() in this case to give liblustre + * but we must still wait_event() in this case to give liblustre * a chance to run client_bulk_callback() */ mdunlink_iterate_helper(desc->bd_mds, desc->bd_md_max_brw); @@ -270,15 +269,17 @@ int ptlrpc_unregister_bulk(struct ptlrpc_request *req, int async) /* Network access will complete in finite time but the HUGE * timeout lets us CWARN for visibility of sluggish LNDs */ - lwi = LWI_TIMEOUT_INTERVAL(cfs_time_seconds(LONG_UNLINK), - cfs_time_seconds(1), NULL, NULL); - rc = l_wait_event(*wq, !ptlrpc_client_bulk_active(req), &lwi); - if (rc == 0) { + int cnt = 0; + while (cnt < LONG_UNLINK && + (rc = wait_event_idle_timeout(*wq, + !ptlrpc_client_bulk_active(req), + HZ)) == 0) + cnt += 1; + if (rc > 0) { ptlrpc_rqphase_move(req, req->rq_next_phase); return 1; } - LASSERT(rc == -ETIMEDOUT); DEBUG_REQ(D_WARNING, req, "Unexpectedly long timeout: desc %p", desc); } diff --git a/drivers/staging/lustre/lustre/ptlrpc/pack_generic.c b/drivers/staging/lustre/lustre/ptlrpc/pack_generic.c index a64e125df95f..f73463ac401f 100644 --- a/drivers/staging/lustre/lustre/ptlrpc/pack_generic.c +++ b/drivers/staging/lustre/lustre/ptlrpc/pack_generic.c @@ -260,17 +260,16 @@ lustre_get_emerg_rs(struct ptlrpc_service_part *svcpt) /* See if we have anything in a pool, and wait if nothing */ while (list_empty(&svcpt->scp_rep_idle)) { - struct l_wait_info lwi; int rc; spin_unlock(&svcpt->scp_rep_lock); /* If we cannot get anything for some long time, we better * bail out instead of waiting infinitely */ - lwi = LWI_TIMEOUT(cfs_time_seconds(10), NULL, NULL); - rc = l_wait_event(svcpt->scp_rep_waitq, - !list_empty(&svcpt->scp_rep_idle), &lwi); - if (rc != 0) + rc = wait_event_idle_timeout(svcpt->scp_rep_waitq, + !list_empty(&svcpt->scp_rep_idle), + 10 * HZ); + if (rc == 0) goto out; spin_lock(&svcpt->scp_rep_lock); } diff --git a/drivers/staging/lustre/lustre/ptlrpc/pinger.c b/drivers/staging/lustre/lustre/ptlrpc/pinger.c index fe6b47bfe8be..0775b7a048bb 100644 --- a/drivers/staging/lustre/lustre/ptlrpc/pinger.c +++ b/drivers/staging/lustre/lustre/ptlrpc/pinger.c @@ -141,7 +141,7 @@ static long pinger_check_timeout(unsigned long time) } mutex_unlock(&pinger_mutex); - return cfs_time_sub(cfs_time_add(time, cfs_time_seconds(timeout)), + return cfs_time_sub(cfs_time_add(time, timeout * HZ), cfs_time_current()); } @@ -217,37 +217,29 @@ static void ptlrpc_pinger_process_import(struct obd_import *imp, } } -static int ptlrpc_pinger_main(void *arg) -{ - struct ptlrpc_thread *thread = arg; - - /* Record that the thread is running */ - thread_set_flags(thread, SVC_RUNNING); - wake_up(&thread->t_ctl_waitq); +static struct workqueue_struct *pinger_wq; +static void ptlrpc_pinger_main(struct work_struct *ws); +static DECLARE_DELAYED_WORK(ping_work, ptlrpc_pinger_main); - /* And now, loop forever, pinging as needed. */ - while (1) { - unsigned long this_ping = cfs_time_current(); - struct l_wait_info lwi; - long time_to_next_wake; - struct timeout_item *item; - struct list_head *iter; +static void ptlrpc_pinger_main(struct work_struct *ws) +{ + unsigned long this_ping = cfs_time_current(); + long time_to_next_wake; + struct timeout_item *item; + struct obd_import *imp; + do { mutex_lock(&pinger_mutex); list_for_each_entry(item, &timeout_list, ti_chain) { item->ti_cb(item, item->ti_cb_data); } - list_for_each(iter, &pinger_imports) { - struct obd_import *imp = - list_entry(iter, struct obd_import, - imp_pinger_chain); - + list_for_each_entry(imp, &pinger_imports, imp_pinger_chain) { ptlrpc_pinger_process_import(imp, this_ping); /* obd_timeout might have changed */ if (imp->imp_pingable && imp->imp_next_ping && cfs_time_after(imp->imp_next_ping, cfs_time_add(this_ping, - cfs_time_seconds(PING_INTERVAL)))) + PING_INTERVAL * HZ))) ptlrpc_update_next_ping(imp, 0); } mutex_unlock(&pinger_mutex); @@ -264,55 +256,25 @@ static int ptlrpc_pinger_main(void *arg) CDEBUG(D_INFO, "next wakeup in " CFS_DURATION_T " (%ld)\n", time_to_next_wake, cfs_time_add(this_ping, - cfs_time_seconds(PING_INTERVAL))); - if (time_to_next_wake > 0) { - lwi = LWI_TIMEOUT(max_t(long, time_to_next_wake, - cfs_time_seconds(1)), - NULL, NULL); - l_wait_event(thread->t_ctl_waitq, - thread_is_stopping(thread) || - thread_is_event(thread), - &lwi); - if (thread_test_and_clear_flags(thread, SVC_STOPPING)) - break; - /* woken after adding import to reset timer */ - thread_test_and_clear_flags(thread, SVC_EVENT); - } - } - - thread_set_flags(thread, SVC_STOPPED); - wake_up(&thread->t_ctl_waitq); + PING_INTERVAL * HZ)); + } while (time_to_next_wake <= 0); - CDEBUG(D_NET, "pinger thread exiting, process %d\n", current_pid()); - return 0; + queue_delayed_work(pinger_wq, &ping_work, + round_jiffies_up_relative(time_to_next_wake)); } -static struct ptlrpc_thread pinger_thread; - int ptlrpc_start_pinger(void) { - struct l_wait_info lwi = { 0 }; - struct task_struct *task; - int rc; - - if (!thread_is_init(&pinger_thread) && - !thread_is_stopped(&pinger_thread)) + if (pinger_wq) return -EALREADY; - init_waitqueue_head(&pinger_thread.t_ctl_waitq); - - strcpy(pinger_thread.t_name, "ll_ping"); - - task = kthread_run(ptlrpc_pinger_main, &pinger_thread, - pinger_thread.t_name); - if (IS_ERR(task)) { - rc = PTR_ERR(task); - CERROR("cannot start pinger thread: rc = %d\n", rc); - return rc; + pinger_wq = alloc_workqueue("ptlrpc_pinger", WQ_MEM_RECLAIM, 1); + if (!pinger_wq) { + CERROR("cannot start pinger workqueue\n"); + return -ENOMEM; } - l_wait_event(pinger_thread.t_ctl_waitq, - thread_is_running(&pinger_thread), &lwi); + queue_delayed_work(pinger_wq, &ping_work, 0); return 0; } @@ -320,19 +282,15 @@ static int ptlrpc_pinger_remove_timeouts(void); int ptlrpc_stop_pinger(void) { - struct l_wait_info lwi = { 0 }; int rc = 0; - if (thread_is_init(&pinger_thread) || - thread_is_stopped(&pinger_thread)) + if (!pinger_wq) return -EALREADY; ptlrpc_pinger_remove_timeouts(); - thread_set_flags(&pinger_thread, SVC_STOPPING); - wake_up(&pinger_thread.t_ctl_waitq); - - l_wait_event(pinger_thread.t_ctl_waitq, - thread_is_stopped(&pinger_thread), &lwi); + cancel_delayed_work_sync(&ping_work); + destroy_workqueue(pinger_wq); + pinger_wq = NULL; return rc; } @@ -515,6 +473,5 @@ static int ptlrpc_pinger_remove_timeouts(void) void ptlrpc_pinger_wake_up(void) { - thread_add_flags(&pinger_thread, SVC_EVENT); - wake_up(&pinger_thread.t_ctl_waitq); + mod_delayed_work(pinger_wq, &ping_work, 0); } diff --git a/drivers/staging/lustre/lustre/ptlrpc/ptlrpc_internal.h b/drivers/staging/lustre/lustre/ptlrpc/ptlrpc_internal.h index f9decbd1459d..b7a8d7537a66 100644 --- a/drivers/staging/lustre/lustre/ptlrpc/ptlrpc_internal.h +++ b/drivers/staging/lustre/lustre/ptlrpc/ptlrpc_internal.h @@ -68,7 +68,7 @@ void ptlrpc_request_cache_free(struct ptlrpc_request *req); void ptlrpc_init_xid(void); void ptlrpc_set_add_new_req(struct ptlrpcd_ctl *pc, struct ptlrpc_request *req); -int ptlrpc_expired_set(void *data); +void ptlrpc_expired_set(struct ptlrpc_request_set *set); int ptlrpc_set_next_timeout(struct ptlrpc_request_set *set); void ptlrpc_resend_req(struct ptlrpc_request *request); void ptlrpc_set_bulk_mbits(struct ptlrpc_request *req); diff --git a/drivers/staging/lustre/lustre/ptlrpc/ptlrpc_module.c b/drivers/staging/lustre/lustre/ptlrpc/ptlrpc_module.c index 131fc6d9646e..38923418669f 100644 --- a/drivers/staging/lustre/lustre/ptlrpc/ptlrpc_module.c +++ b/drivers/staging/lustre/lustre/ptlrpc/ptlrpc_module.c @@ -45,6 +45,42 @@ extern spinlock_t ptlrpc_last_xid_lock; extern spinlock_t ptlrpc_rs_debug_lock; #endif +DEFINE_MUTEX(ptlrpc_startup); +static int ptlrpc_active = 0; + +int ptlrpc_inc_ref(void) +{ + int rc = 0; + + mutex_lock(&ptlrpc_startup); + if (ptlrpc_active++ == 0) { + ptlrpc_put_connection_superhack = ptlrpc_connection_put; + + rc = ptlrpc_init_portals(); + if (!rc) { + rc= ptlrpc_start_pinger(); + if (rc) + ptlrpc_exit_portals(); + } + if (rc) + ptlrpc_active--; + } + mutex_unlock(&ptlrpc_startup); + return rc; +} +EXPORT_SYMBOL(ptlrpc_inc_ref); + +void ptlrpc_dec_ref(void) +{ + mutex_lock(&ptlrpc_startup); + if (--ptlrpc_active == 0) { + ptlrpc_stop_pinger(); + ptlrpc_exit_portals(); + } + mutex_unlock(&ptlrpc_startup); +} +EXPORT_SYMBOL(ptlrpc_dec_ref); + static int __init ptlrpc_init(void) { int rc, cleanup_phase = 0; @@ -71,24 +107,12 @@ static int __init ptlrpc_init(void) if (rc) goto cleanup; - cleanup_phase = 2; - rc = ptlrpc_init_portals(); - if (rc) - goto cleanup; - cleanup_phase = 3; rc = ptlrpc_connection_init(); if (rc) goto cleanup; - cleanup_phase = 4; - ptlrpc_put_connection_superhack = ptlrpc_connection_put; - - rc = ptlrpc_start_pinger(); - if (rc) - goto cleanup; - cleanup_phase = 5; rc = ldlm_init(); if (rc) @@ -122,15 +146,9 @@ cleanup: ldlm_exit(); /* Fall through */ case 5: - ptlrpc_stop_pinger(); - /* Fall through */ - case 4: ptlrpc_connection_fini(); /* Fall through */ case 3: - ptlrpc_exit_portals(); - /* Fall through */ - case 2: ptlrpc_request_cache_fini(); /* Fall through */ case 1: @@ -150,8 +168,6 @@ static void __exit ptlrpc_exit(void) ptlrpc_nrs_fini(); sptlrpc_fini(); ldlm_exit(); - ptlrpc_stop_pinger(); - ptlrpc_exit_portals(); ptlrpc_request_cache_fini(); ptlrpc_hr_fini(); ptlrpc_connection_fini(); diff --git a/drivers/staging/lustre/lustre/ptlrpc/ptlrpcd.c b/drivers/staging/lustre/lustre/ptlrpc/ptlrpcd.c index 8b865294d933..c0fa13942bd8 100644 --- a/drivers/staging/lustre/lustre/ptlrpc/ptlrpcd.c +++ b/drivers/staging/lustre/lustre/ptlrpc/ptlrpcd.c @@ -197,17 +197,14 @@ ptlrpcd_select_pc(struct ptlrpc_request *req) static int ptlrpcd_steal_rqset(struct ptlrpc_request_set *des, struct ptlrpc_request_set *src) { - struct list_head *tmp, *pos; - struct ptlrpc_request *req; + struct ptlrpc_request *req, *tmp; int rc = 0; spin_lock(&src->set_new_req_lock); if (likely(!list_empty(&src->set_new_requests))) { - list_for_each_safe(pos, tmp, &src->set_new_requests) { - req = list_entry(pos, struct ptlrpc_request, - rq_set_chain); + list_for_each_entry_safe(req, tmp, &src->set_new_requests, rq_set_chain) req->rq_set = des; - } + list_splice_init(&src->set_new_requests, &des->set_requests); rc = atomic_read(&src->set_new_count); atomic_add(rc, &des->set_remaining); @@ -230,12 +227,13 @@ void ptlrpcd_add_req(struct ptlrpc_request *req) spin_lock(&req->rq_lock); if (req->rq_invalid_rqset) { - struct l_wait_info lwi = LWI_TIMEOUT(cfs_time_seconds(5), - back_to_sleep, NULL); - req->rq_invalid_rqset = 0; spin_unlock(&req->rq_lock); - l_wait_event(req->rq_set_waitq, !req->rq_set, &lwi); + if (wait_event_idle_timeout(req->rq_set_waitq, + !req->rq_set, + 5 * HZ) == 0) + wait_event_idle(req->rq_set_waitq, + !req->rq_set); } else if (req->rq_set) { /* If we have a valid "rq_set", just reuse it to avoid double * linked. @@ -272,8 +270,7 @@ static inline void ptlrpc_reqset_get(struct ptlrpc_request_set *set) */ static int ptlrpcd_check(struct lu_env *env, struct ptlrpcd_ctl *pc) { - struct list_head *tmp, *pos; - struct ptlrpc_request *req; + struct ptlrpc_request *req, *tmp; struct ptlrpc_request_set *set = pc->pc_set; int rc = 0; int rc2; @@ -319,8 +316,7 @@ static int ptlrpcd_check(struct lu_env *env, struct ptlrpcd_ctl *pc) /* NB: ptlrpc_check_set has already moved completed request at the * head of seq::set_requests */ - list_for_each_safe(pos, tmp, &set->set_requests) { - req = list_entry(pos, struct ptlrpc_request, rq_set_chain); + list_for_each_entry_safe(req, tmp, &set->set_requests, rq_set_chain) { if (req->rq_phase != RQ_PHASE_COMPLETE) break; @@ -434,16 +430,17 @@ static int ptlrpcd(void *arg) * new_req_list and ptlrpcd_check() moves them into the set. */ do { - struct l_wait_info lwi; int timeout; timeout = ptlrpc_set_next_timeout(set); - lwi = LWI_TIMEOUT(cfs_time_seconds(timeout ? timeout : 1), - ptlrpc_expired_set, set); lu_context_enter(&env.le_ctx); lu_context_enter(env.le_ses); - l_wait_event(set->set_waitq, ptlrpcd_check(&env, pc), &lwi); + if (wait_event_idle_timeout(set->set_waitq, + ptlrpcd_check(&env, pc), + (timeout ? timeout : 1) * HZ) == 0) + ptlrpc_expired_set(set); + lu_context_exit(&env.le_ctx); lu_context_exit(env.le_ses); diff --git a/drivers/staging/lustre/lustre/ptlrpc/recover.c b/drivers/staging/lustre/lustre/ptlrpc/recover.c index e4d3f23e9f3a..2ea0a7ff87dd 100644 --- a/drivers/staging/lustre/lustre/ptlrpc/recover.c +++ b/drivers/staging/lustre/lustre/ptlrpc/recover.c @@ -66,8 +66,7 @@ void ptlrpc_initiate_recovery(struct obd_import *imp) int ptlrpc_replay_next(struct obd_import *imp, int *inflight) { int rc = 0; - struct list_head *tmp, *pos; - struct ptlrpc_request *req = NULL; + struct ptlrpc_request *req = NULL, *pos; __u64 last_transno; *inflight = 0; @@ -86,8 +85,8 @@ int ptlrpc_replay_next(struct obd_import *imp, int *inflight) /* Replay all the committed open requests on committed_list first */ if (!list_empty(&imp->imp_committed_list)) { - tmp = imp->imp_committed_list.prev; - req = list_entry(tmp, struct ptlrpc_request, rq_replay_list); + req = list_last_entry(&imp->imp_committed_list, + struct ptlrpc_request, rq_replay_list); /* The last request on committed_list hasn't been replayed */ if (req->rq_transno > last_transno) { @@ -119,13 +118,13 @@ int ptlrpc_replay_next(struct obd_import *imp, int *inflight) * the imp_replay_list */ if (!req) { - list_for_each_safe(tmp, pos, &imp->imp_replay_list) { - req = list_entry(tmp, struct ptlrpc_request, - rq_replay_list); - - if (req->rq_transno > last_transno) + struct ptlrpc_request *tmp; + list_for_each_entry_safe(tmp, pos, &imp->imp_replay_list, + rq_replay_list) { + if (tmp->rq_transno > last_transno) { + req = tmp; break; - req = NULL; + } } } @@ -211,13 +210,10 @@ int ptlrpc_resend(struct obd_import *imp) */ void ptlrpc_wake_delayed(struct obd_import *imp) { - struct list_head *tmp, *pos; - struct ptlrpc_request *req; + struct ptlrpc_request *req, *pos; spin_lock(&imp->imp_lock); - list_for_each_safe(tmp, pos, &imp->imp_delayed_list) { - req = list_entry(tmp, struct ptlrpc_request, rq_list); - + list_for_each_entry_safe(req, pos, &imp->imp_delayed_list, rq_list) { DEBUG_REQ(D_HA, req, "waking (set %p):", req->rq_set); ptlrpc_client_wake_req(req); } @@ -346,17 +342,15 @@ int ptlrpc_recover_import(struct obd_import *imp, char *new_uuid, int async) goto out; if (!async) { - struct l_wait_info lwi; - int secs = cfs_time_seconds(obd_timeout); - CDEBUG(D_HA, "%s: recovery started, waiting %u seconds\n", - obd2cli_tgt(imp->imp_obd), secs); + obd2cli_tgt(imp->imp_obd), obd_timeout); - lwi = LWI_TIMEOUT(secs, NULL, NULL); - rc = l_wait_event(imp->imp_recovery_waitq, - !ptlrpc_import_in_recovery(imp), &lwi); + rc = wait_event_idle_timeout(imp->imp_recovery_waitq, + !ptlrpc_import_in_recovery(imp), + obd_timeout * HZ); CDEBUG(D_HA, "%s: recovery finished\n", obd2cli_tgt(imp->imp_obd)); + rc = rc ? 0 : -ETIMEDOUT; } out: diff --git a/drivers/staging/lustre/lustre/ptlrpc/sec.c b/drivers/staging/lustre/lustre/ptlrpc/sec.c index 617e004d00f8..3cb1e075f077 100644 --- a/drivers/staging/lustre/lustre/ptlrpc/sec.c +++ b/drivers/staging/lustre/lustre/ptlrpc/sec.c @@ -339,11 +339,9 @@ static int import_sec_validate_get(struct obd_import *imp, } *sec = sptlrpc_import_sec_ref(imp); - /* Only output an error when the import is still active */ if (!*sec) { - if (list_empty(&imp->imp_zombie_chain)) - CERROR("import %p (%s) with no sec\n", - imp, ptlrpc_import_state_name(imp->imp_state)); + CERROR("import %p (%s) with no sec\n", + imp, ptlrpc_import_state_name(imp->imp_state)); return -EACCES; } @@ -442,7 +440,7 @@ int sptlrpc_req_ctx_switch(struct ptlrpc_request *req, /* save request message */ reqmsg_size = req->rq_reqlen; if (reqmsg_size != 0) { - reqmsg = libcfs_kvzalloc(reqmsg_size, GFP_NOFS); + reqmsg = kvzalloc(reqmsg_size, GFP_NOFS); if (!reqmsg) return -ENOMEM; memcpy(reqmsg, req->rq_reqmsg, reqmsg_size); @@ -554,9 +552,8 @@ int ctx_check_refresh(struct ptlrpc_cli_ctx *ctx) } static -int ctx_refresh_timeout(void *data) +int ctx_refresh_timeout(struct ptlrpc_request *req) { - struct ptlrpc_request *req = data; int rc; /* conn_cnt is needed in expire_one_request */ @@ -575,10 +572,8 @@ int ctx_refresh_timeout(void *data) } static -void ctx_refresh_interrupt(void *data) +void ctx_refresh_interrupt(struct ptlrpc_request *req) { - struct ptlrpc_request *req = data; - spin_lock(&req->rq_lock); req->rq_intr = 1; spin_unlock(&req->rq_lock); @@ -611,7 +606,6 @@ int sptlrpc_req_refresh_ctx(struct ptlrpc_request *req, long timeout) { struct ptlrpc_cli_ctx *ctx = req->rq_cli_ctx; struct ptlrpc_sec *sec; - struct l_wait_info lwi; int rc; LASSERT(ctx); @@ -743,10 +737,28 @@ again: req->rq_restart = 0; spin_unlock(&req->rq_lock); - lwi = LWI_TIMEOUT_INTR(msecs_to_jiffies(timeout * MSEC_PER_SEC), - ctx_refresh_timeout, ctx_refresh_interrupt, - req); - rc = l_wait_event(req->rq_reply_waitq, ctx_check_refresh(ctx), &lwi); + rc = wait_event_idle_timeout(req->rq_reply_waitq, + ctx_check_refresh(ctx), + timeout * HZ); + if (rc == 0 && ctx_refresh_timeout(req) == 0) { + /* Keep waiting, but enable some signals */ + rc = l_wait_event_abortable(req->rq_reply_waitq, + ctx_check_refresh(ctx)); + if (rc == 0) + rc = 1; + } + + if (rc > 0) + /* condition is true */ + rc = 0; + else if (rc == 0) + /* Timed out */ + rc = -ETIMEDOUT; + else { + /* Aborted by signal */ + rc = -EINTR; + ctx_refresh_interrupt(req); + } /* * following cases could lead us here: @@ -1075,7 +1087,7 @@ int sptlrpc_cli_unwrap_early_reply(struct ptlrpc_request *req, early_size = req->rq_nob_received; early_bufsz = size_roundup_power2(early_size); - early_buf = libcfs_kvzalloc(early_bufsz, GFP_NOFS); + early_buf = kvzalloc(early_bufsz, GFP_NOFS); if (!early_buf) { rc = -ENOMEM; goto err_req; diff --git a/drivers/staging/lustre/lustre/ptlrpc/sec_bulk.c b/drivers/staging/lustre/lustre/ptlrpc/sec_bulk.c index 134ee727e8b7..625b9520d78f 100644 --- a/drivers/staging/lustre/lustre/ptlrpc/sec_bulk.c +++ b/drivers/staging/lustre/lustre/ptlrpc/sec_bulk.c @@ -375,9 +375,9 @@ static inline void enc_pools_alloc(void) { LASSERT(page_pools.epp_max_pools); page_pools.epp_pools = - libcfs_kvzalloc(page_pools.epp_max_pools * + kvzalloc(page_pools.epp_max_pools * sizeof(*page_pools.epp_pools), - GFP_NOFS); + GFP_KERNEL); } static inline void enc_pools_free(void) @@ -530,7 +530,7 @@ EXPORT_SYMBOL(bulk_sec_desc_unpack); int sptlrpc_get_bulk_checksum(struct ptlrpc_bulk_desc *desc, __u8 alg, void *buf, int buflen) { - struct cfs_crypto_hash_desc *hdesc; + struct ahash_request *hdesc; int hashsize; unsigned int bufsize; int i, err; diff --git a/drivers/staging/lustre/lustre/ptlrpc/sec_gc.c b/drivers/staging/lustre/lustre/ptlrpc/sec_gc.c index 8d1e0edfcede..2c8bad7b7877 100644 --- a/drivers/staging/lustre/lustre/ptlrpc/sec_gc.c +++ b/drivers/staging/lustre/lustre/ptlrpc/sec_gc.c @@ -55,7 +55,6 @@ static spinlock_t sec_gc_list_lock; static LIST_HEAD(sec_gc_ctx_list); static spinlock_t sec_gc_ctx_list_lock; -static struct ptlrpc_thread sec_gc_thread; static atomic_t sec_gc_wait_del = ATOMIC_INIT(0); void sptlrpc_gc_add_sec(struct ptlrpc_sec *sec) @@ -139,95 +138,53 @@ static void sec_do_gc(struct ptlrpc_sec *sec) sec->ps_gc_next = ktime_get_real_seconds() + sec->ps_gc_interval; } -static int sec_gc_main(void *arg) -{ - struct ptlrpc_thread *thread = arg; - struct l_wait_info lwi; - - unshare_fs_struct(); +static void sec_gc_main(struct work_struct *ws); +static DECLARE_DELAYED_WORK(sec_gc_work, sec_gc_main); - /* Record that the thread is running */ - thread_set_flags(thread, SVC_RUNNING); - wake_up(&thread->t_ctl_waitq); - - while (1) { - struct ptlrpc_sec *sec; +static void sec_gc_main(struct work_struct *ws) +{ + struct ptlrpc_sec *sec; - thread_clear_flags(thread, SVC_SIGNAL); - sec_process_ctx_list(); + sec_process_ctx_list(); again: - /* go through sec list do gc. - * FIXME here we iterate through the whole list each time which - * is not optimal. we perhaps want to use balanced binary tree - * to trace each sec as order of expiry time. - * another issue here is we wakeup as fixed interval instead of - * according to each sec's expiry time + /* go through sec list do gc. + * FIXME here we iterate through the whole list each time which + * is not optimal. we perhaps want to use balanced binary tree + * to trace each sec as order of expiry time. + * another issue here is we wakeup as fixed interval instead of + * according to each sec's expiry time + */ + mutex_lock(&sec_gc_mutex); + list_for_each_entry(sec, &sec_gc_list, ps_gc_list) { + /* if someone is waiting to be deleted, let it + * proceed as soon as possible. */ - mutex_lock(&sec_gc_mutex); - list_for_each_entry(sec, &sec_gc_list, ps_gc_list) { - /* if someone is waiting to be deleted, let it - * proceed as soon as possible. - */ - if (atomic_read(&sec_gc_wait_del)) { - CDEBUG(D_SEC, "deletion pending, start over\n"); - mutex_unlock(&sec_gc_mutex); - goto again; - } - - sec_do_gc(sec); + if (atomic_read(&sec_gc_wait_del)) { + CDEBUG(D_SEC, "deletion pending, start over\n"); + mutex_unlock(&sec_gc_mutex); + goto again; } - mutex_unlock(&sec_gc_mutex); - - /* check ctx list again before sleep */ - sec_process_ctx_list(); - lwi = LWI_TIMEOUT(msecs_to_jiffies(SEC_GC_INTERVAL * MSEC_PER_SEC), - NULL, NULL); - l_wait_event(thread->t_ctl_waitq, - thread_is_stopping(thread) || - thread_is_signal(thread), - &lwi); - - if (thread_test_and_clear_flags(thread, SVC_STOPPING)) - break; + sec_do_gc(sec); } + mutex_unlock(&sec_gc_mutex); - thread_set_flags(thread, SVC_STOPPED); - wake_up(&thread->t_ctl_waitq); - return 0; + /* check ctx list again before sleep */ + sec_process_ctx_list(); + schedule_delayed_work(&sec_gc_work, SEC_GC_INTERVAL * HZ); } int sptlrpc_gc_init(void) { - struct l_wait_info lwi = { 0 }; - struct task_struct *task; - mutex_init(&sec_gc_mutex); spin_lock_init(&sec_gc_list_lock); spin_lock_init(&sec_gc_ctx_list_lock); - /* initialize thread control */ - memset(&sec_gc_thread, 0, sizeof(sec_gc_thread)); - init_waitqueue_head(&sec_gc_thread.t_ctl_waitq); - - task = kthread_run(sec_gc_main, &sec_gc_thread, "sptlrpc_gc"); - if (IS_ERR(task)) { - CERROR("can't start gc thread: %ld\n", PTR_ERR(task)); - return PTR_ERR(task); - } - - l_wait_event(sec_gc_thread.t_ctl_waitq, - thread_is_running(&sec_gc_thread), &lwi); + schedule_delayed_work(&sec_gc_work, 0); return 0; } void sptlrpc_gc_fini(void) { - struct l_wait_info lwi = { 0 }; - - thread_set_flags(&sec_gc_thread, SVC_STOPPING); - wake_up(&sec_gc_thread.t_ctl_waitq); - - l_wait_event(sec_gc_thread.t_ctl_waitq, - thread_is_stopped(&sec_gc_thread), &lwi); + cancel_delayed_work_sync(&sec_gc_work); } diff --git a/drivers/staging/lustre/lustre/ptlrpc/sec_null.c b/drivers/staging/lustre/lustre/ptlrpc/sec_null.c index 80cea0b24693..ecc387d1b9b4 100644 --- a/drivers/staging/lustre/lustre/ptlrpc/sec_null.c +++ b/drivers/staging/lustre/lustre/ptlrpc/sec_null.c @@ -158,7 +158,7 @@ int null_alloc_reqbuf(struct ptlrpc_sec *sec, int alloc_size = size_roundup_power2(msgsize); LASSERT(!req->rq_pool); - req->rq_reqbuf = libcfs_kvzalloc(alloc_size, GFP_NOFS); + req->rq_reqbuf = kvzalloc(alloc_size, GFP_NOFS); if (!req->rq_reqbuf) return -ENOMEM; @@ -201,7 +201,7 @@ int null_alloc_repbuf(struct ptlrpc_sec *sec, msgsize = size_roundup_power2(msgsize); - req->rq_repbuf = libcfs_kvzalloc(msgsize, GFP_NOFS); + req->rq_repbuf = kvzalloc(msgsize, GFP_NOFS); if (!req->rq_repbuf) return -ENOMEM; @@ -246,7 +246,7 @@ int null_enlarge_reqbuf(struct ptlrpc_sec *sec, if (req->rq_reqbuf_len < newmsg_size) { alloc_size = size_roundup_power2(newmsg_size); - newbuf = libcfs_kvzalloc(alloc_size, GFP_NOFS); + newbuf = kvzalloc(alloc_size, GFP_NOFS); if (!newbuf) return -ENOMEM; @@ -317,7 +317,7 @@ int null_alloc_rs(struct ptlrpc_request *req, int msgsize) /* pre-allocated */ LASSERT(rs->rs_size >= rs_size); } else { - rs = libcfs_kvzalloc(rs_size, GFP_NOFS); + rs = kvzalloc(rs_size, GFP_NOFS); if (!rs) return -ENOMEM; diff --git a/drivers/staging/lustre/lustre/ptlrpc/sec_plain.c b/drivers/staging/lustre/lustre/ptlrpc/sec_plain.c index 44e34056515b..ec3d9af76b17 100644 --- a/drivers/staging/lustre/lustre/ptlrpc/sec_plain.c +++ b/drivers/staging/lustre/lustre/ptlrpc/sec_plain.c @@ -562,7 +562,7 @@ int plain_alloc_reqbuf(struct ptlrpc_sec *sec, LASSERT(!req->rq_pool); alloc_len = size_roundup_power2(alloc_len); - req->rq_reqbuf = libcfs_kvzalloc(alloc_len, GFP_NOFS); + req->rq_reqbuf = kvzalloc(alloc_len, GFP_NOFS); if (!req->rq_reqbuf) return -ENOMEM; @@ -620,7 +620,7 @@ int plain_alloc_repbuf(struct ptlrpc_sec *sec, alloc_len = size_roundup_power2(alloc_len); - req->rq_repbuf = libcfs_kvzalloc(alloc_len, GFP_NOFS); + req->rq_repbuf = kvzalloc(alloc_len, GFP_NOFS); if (!req->rq_repbuf) return -ENOMEM; @@ -671,7 +671,7 @@ int plain_enlarge_reqbuf(struct ptlrpc_sec *sec, if (req->rq_reqbuf_len < newbuf_size) { newbuf_size = size_roundup_power2(newbuf_size); - newbuf = libcfs_kvzalloc(newbuf_size, GFP_NOFS); + newbuf = kvzalloc(newbuf_size, GFP_NOFS); if (!newbuf) return -ENOMEM; @@ -808,7 +808,7 @@ int plain_alloc_rs(struct ptlrpc_request *req, int msgsize) /* pre-allocated */ LASSERT(rs->rs_size >= rs_size); } else { - rs = libcfs_kvzalloc(rs_size, GFP_NOFS); + rs = kvzalloc(rs_size, GFP_NOFS); if (!rs) return -ENOMEM; diff --git a/drivers/staging/lustre/lustre/ptlrpc/service.c b/drivers/staging/lustre/lustre/ptlrpc/service.c index 63be6e7273f3..f37364e00dfe 100644 --- a/drivers/staging/lustre/lustre/ptlrpc/service.c +++ b/drivers/staging/lustre/lustre/ptlrpc/service.c @@ -83,10 +83,10 @@ ptlrpc_alloc_rqbd(struct ptlrpc_service_part *svcpt) rqbd->rqbd_cbid.cbid_fn = request_in_callback; rqbd->rqbd_cbid.cbid_arg = rqbd; INIT_LIST_HEAD(&rqbd->rqbd_reqs); - rqbd->rqbd_buffer = libcfs_kvzalloc_cpt(svc->srv_cptable, - svcpt->scp_cpt, - svc->srv_buf_size, - GFP_KERNEL); + rqbd->rqbd_buffer = kvzalloc_node(svc->srv_buf_size, GFP_KERNEL, + cfs_cpt_spread_node(svc->srv_cptable, + svcpt->scp_cpt)); + if (!rqbd->rqbd_buffer) { kfree(rqbd); return NULL; @@ -726,8 +726,6 @@ static void ptlrpc_server_drop_request(struct ptlrpc_request *req) struct ptlrpc_service_part *svcpt = rqbd->rqbd_svcpt; struct ptlrpc_service *svc = svcpt->scp_service; int refcount; - struct list_head *tmp; - struct list_head *nxt; if (!atomic_dec_and_test(&req->rq_refcount)) return; @@ -776,9 +774,7 @@ static void ptlrpc_server_drop_request(struct ptlrpc_request *req) /* remove rqbd's reqs from svc's req history while * I've got the service lock */ - list_for_each(tmp, &rqbd->rqbd_reqs) { - req = list_entry(tmp, struct ptlrpc_request, - rq_list); + list_for_each_entry(req, &rqbd->rqbd_reqs, rq_list) { /* Track the highest culled req seq */ if (req->rq_history_seq > svcpt->scp_hist_seq_culled) { @@ -790,10 +786,9 @@ static void ptlrpc_server_drop_request(struct ptlrpc_request *req) spin_unlock(&svcpt->scp_lock); - list_for_each_safe(tmp, nxt, &rqbd->rqbd_reqs) { - req = list_entry(rqbd->rqbd_reqs.next, - struct ptlrpc_request, - rq_list); + while ((req = list_first_entry_or_null( + &rqbd->rqbd_reqs, + struct ptlrpc_request, rq_list))) { list_del(&req->rq_list); ptlrpc_server_free_request(req); } @@ -1068,7 +1063,7 @@ static int ptlrpc_at_send_early_reply(struct ptlrpc_request *req) reqcopy = ptlrpc_request_cache_alloc(GFP_NOFS); if (!reqcopy) return -ENOMEM; - reqmsg = libcfs_kvzalloc(req->rq_reqlen, GFP_NOFS); + reqmsg = kvzalloc(req->rq_reqlen, GFP_NOFS); if (!reqmsg) { rc = -ENOMEM; goto out_free; @@ -1897,15 +1892,6 @@ ptlrpc_check_rqbd_pool(struct ptlrpc_service_part *svcpt) } } -static int -ptlrpc_retry_rqbds(void *arg) -{ - struct ptlrpc_service_part *svcpt = arg; - - svcpt->scp_rqbd_timeout = 0; - return -ETIMEDOUT; -} - static inline int ptlrpc_threads_enough(struct ptlrpc_service_part *svcpt) { @@ -1968,13 +1954,17 @@ ptlrpc_server_request_incoming(struct ptlrpc_service_part *svcpt) return !list_empty(&svcpt->scp_req_incoming); } +/* We perfer lifo queuing, but kernel doesn't provide that yet. */ +#ifndef wait_event_idle_exclusive_lifo +#define wait_event_idle_exclusive_lifo wait_event_idle_exclusive +#define wait_event_idle_exclusive_lifo_timeout wait_event_idle_exclusive_timeout +#endif + static __attribute__((__noinline__)) int ptlrpc_wait_event(struct ptlrpc_service_part *svcpt, struct ptlrpc_thread *thread) { /* Don't exit while there are replies to be handled */ - struct l_wait_info lwi = LWI_TIMEOUT(svcpt->scp_rqbd_timeout, - ptlrpc_retry_rqbds, svcpt); /* XXX: Add this back when libcfs watchdog is merged upstream lc_watchdog_disable(thread->t_watchdog); @@ -1982,13 +1972,25 @@ ptlrpc_wait_event(struct ptlrpc_service_part *svcpt, cond_resched(); - l_wait_event_exclusive_head(svcpt->scp_waitq, - ptlrpc_thread_stopping(thread) || - ptlrpc_server_request_incoming(svcpt) || - ptlrpc_server_request_pending(svcpt, - false) || - ptlrpc_rqbd_pending(svcpt) || - ptlrpc_at_check(svcpt), &lwi); + if (svcpt->scp_rqbd_timeout == 0) + wait_event_idle_exclusive_lifo( + svcpt->scp_waitq, + ptlrpc_thread_stopping(thread) || + ptlrpc_server_request_incoming(svcpt) || + ptlrpc_server_request_pending(svcpt, + false) || + ptlrpc_rqbd_pending(svcpt) || + ptlrpc_at_check(svcpt)); + else if (0 == wait_event_idle_exclusive_lifo_timeout( + svcpt->scp_waitq, + ptlrpc_thread_stopping(thread) || + ptlrpc_server_request_incoming(svcpt) || + ptlrpc_server_request_pending(svcpt, + false) || + ptlrpc_rqbd_pending(svcpt) || + ptlrpc_at_check(svcpt), + svcpt->scp_rqbd_timeout)) + svcpt->scp_rqbd_timeout = 0; if (ptlrpc_thread_stopping(thread)) return -EINTR; @@ -2044,7 +2046,7 @@ static int ptlrpc_main(void *arg) goto out; } - env = kzalloc(sizeof(*env), GFP_NOFS); + env = kzalloc(sizeof(*env), GFP_KERNEL); if (!env) { rc = -ENOMEM; goto out_srv_fini; @@ -2070,7 +2072,7 @@ static int ptlrpc_main(void *arg) } /* Alloc reply state structure for this one */ - rs = libcfs_kvzalloc(svc->srv_max_reply_size, GFP_NOFS); + rs = kvzalloc(svc->srv_max_reply_size, GFP_KERNEL); if (!rs) { rc = -ENOMEM; goto out_srv_fini; @@ -2149,7 +2151,7 @@ static int ptlrpc_main(void *arg) * Wait for a timeout (unless something else * happens) before I try again */ - svcpt->scp_rqbd_timeout = cfs_time_seconds(1) / 10; + svcpt->scp_rqbd_timeout = HZ / 10; CDEBUG(D_RPCTRACE, "Posted buffers: %d\n", svcpt->scp_nrqbds_posted); } @@ -2233,7 +2235,7 @@ static int ptlrpc_hr_main(void *arg) wake_up(&ptlrpc_hr.hr_waitq); while (!ptlrpc_hr.hr_stopping) { - l_wait_condition(hrt->hrt_waitq, hrt_dont_sleep(hrt, &replies)); + wait_event_idle(hrt->hrt_waitq, hrt_dont_sleep(hrt, &replies)); while (!list_empty(&replies)) { struct ptlrpc_reply_state *rs; @@ -2312,7 +2314,6 @@ static int ptlrpc_start_hr_threads(void) static void ptlrpc_svcpt_stop_threads(struct ptlrpc_service_part *svcpt) { - struct l_wait_info lwi = { 0 }; struct ptlrpc_thread *thread; LIST_HEAD(zombie); @@ -2341,8 +2342,8 @@ static void ptlrpc_svcpt_stop_threads(struct ptlrpc_service_part *svcpt) CDEBUG(D_INFO, "waiting for stopping-thread %s #%u\n", svcpt->scp_service->srv_thread_name, thread->t_id); - l_wait_event(thread->t_ctl_waitq, - thread_is_stopped(thread), &lwi); + wait_event_idle(thread->t_ctl_waitq, + thread_is_stopped(thread)); spin_lock(&svcpt->scp_lock); } @@ -2403,7 +2404,6 @@ int ptlrpc_start_threads(struct ptlrpc_service *svc) int ptlrpc_start_thread(struct ptlrpc_service_part *svcpt, int wait) { - struct l_wait_info lwi = { 0 }; struct ptlrpc_thread *thread; struct ptlrpc_service *svc; struct task_struct *task; @@ -2499,9 +2499,8 @@ int ptlrpc_start_thread(struct ptlrpc_service_part *svcpt, int wait) if (!wait) return 0; - l_wait_event(thread->t_ctl_waitq, - thread_is_running(thread) || thread_is_stopped(thread), - &lwi); + wait_event_idle(thread->t_ctl_waitq, + thread_is_running(thread) || thread_is_stopped(thread)); rc = thread_is_stopped(thread) ? thread->t_id : 0; return rc; @@ -2591,13 +2590,12 @@ static void ptlrpc_wait_replies(struct ptlrpc_service_part *svcpt) { while (1) { int rc; - struct l_wait_info lwi = LWI_TIMEOUT(cfs_time_seconds(10), - NULL, NULL); - rc = l_wait_event(svcpt->scp_waitq, - atomic_read(&svcpt->scp_nreps_difficult) == 0, - &lwi); - if (rc == 0) + rc = wait_event_idle_timeout( + svcpt->scp_waitq, + atomic_read(&svcpt->scp_nreps_difficult) == 0, + 10 * HZ); + if (rc > 0) break; CWARN("Unexpectedly long timeout %s %p\n", svcpt->scp_service->srv_name, svcpt->scp_service); @@ -2622,7 +2620,7 @@ ptlrpc_service_unlink_rqbd(struct ptlrpc_service *svc) { struct ptlrpc_service_part *svcpt; struct ptlrpc_request_buffer_desc *rqbd; - struct l_wait_info lwi; + int cnt; int rc; int i; @@ -2662,12 +2660,13 @@ ptlrpc_service_unlink_rqbd(struct ptlrpc_service *svc) * the HUGE timeout lets us CWARN for visibility * of sluggish LNDs */ - lwi = LWI_TIMEOUT_INTERVAL( - cfs_time_seconds(LONG_UNLINK), - cfs_time_seconds(1), NULL, NULL); - rc = l_wait_event(svcpt->scp_waitq, - svcpt->scp_nrqbds_posted == 0, &lwi); - if (rc == -ETIMEDOUT) { + cnt = 0; + while (cnt < LONG_UNLINK && + (rc = wait_event_idle_timeout(svcpt->scp_waitq, + svcpt->scp_nrqbds_posted == 0, + HZ)) == 0) + cnt++; + if (rc == 0) { CWARN("Service %s waiting for request buffers\n", svcpt->scp_service->srv_name); } |