From 38aa8b0c59c35d10d15ebf00ceee641f9ed7acba Mon Sep 17 00:00:00 2001 From: David Teigland Date: Wed, 13 Dec 2006 10:37:16 -0600 Subject: [DLM] fix old rcom messages A reply to a recovery message will often be received after the relevant recovery sequence has aborted and the next recovery sequence has begun. We need to ignore replies to these old messages from the previous recovery. There's already a way to do this for synchronous recovery requests using the rc_id number, but not for async. Each recovery sequence already has a locally unique sequence number associated with it. This patch adds a field to the rcom (recovery message) structure where this recovery sequence number can be placed, rc_seq. When a node sends a reply to a recovery request, it copies the rc_seq number it received into rc_seq_reply. When the first node receives the reply to its recovery message, it will check whether rc_seq_reply matches the current recovery sequence number, ls_recover_seq, and if not then it ignores the old reply. An old, inadequate approach to filtering out old replies (checking if the current stage of recovery has moved back to the start) has been removed from two spots. The protocol version number is changed to reflect the different rcom structures. Signed-off-by: David Teigland Signed-off-by: Steven Whitehouse --- fs/dlm/dlm_internal.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'fs/dlm/dlm_internal.h') diff --git a/fs/dlm/dlm_internal.h b/fs/dlm/dlm_internal.h index 1ee8195e6fc0..7185a132a8b5 100644 --- a/fs/dlm/dlm_internal.h +++ b/fs/dlm/dlm_internal.h @@ -309,8 +309,8 @@ static inline int rsb_flag(struct dlm_rsb *r, enum rsb_flags flag) /* dlm_header is first element of all structs sent between nodes */ -#define DLM_HEADER_MAJOR 0x00020000 -#define DLM_HEADER_MINOR 0x00000001 +#define DLM_HEADER_MAJOR 0x00030000 +#define DLM_HEADER_MINOR 0x00000000 #define DLM_MSG 1 #define DLM_RCOM 2 @@ -386,6 +386,8 @@ struct dlm_rcom { uint32_t rc_type; /* DLM_RCOM_ */ int rc_result; /* multi-purpose */ uint64_t rc_id; /* match reply with request */ + uint64_t rc_seq; /* sender's ls_recover_seq */ + uint64_t rc_seq_reply; /* remote ls_recover_seq */ char rc_buf[0]; }; -- cgit v1.2.3-59-g8ed1b From 99fc64874aad1ee0aea5c4d8c07e3529f9d03497 Mon Sep 17 00:00:00 2001 From: David Teigland Date: Tue, 9 Jan 2007 09:44:01 -0600 Subject: [DLM] add config entry to enable log_debug Add a new dlm_config_info field to enable log_debug output and change log_debug() to use it. Signed-off-by: David Teigland Signed-off-by: Steven Whitehouse --- fs/dlm/config.c | 4 +++- fs/dlm/config.h | 1 + fs/dlm/dlm_internal.h | 13 +++++++------ 3 files changed, 11 insertions(+), 7 deletions(-) (limited to 'fs/dlm/dlm_internal.h') diff --git a/fs/dlm/config.c b/fs/dlm/config.c index 958021f91486..7cf20209b127 100644 --- a/fs/dlm/config.c +++ b/fs/dlm/config.c @@ -775,6 +775,7 @@ int dlm_our_addr(struct sockaddr_storage *addr, int num) #define DEFAULT_RECOVER_TIMER 5 #define DEFAULT_TOSS_SECS 10 #define DEFAULT_SCAN_SECS 5 +#define DEFAULT_LOG_DEBUG 0 struct dlm_config_info dlm_config = { .ci_tcp_port = DEFAULT_TCP_PORT, @@ -784,6 +785,7 @@ struct dlm_config_info dlm_config = { .ci_dirtbl_size = DEFAULT_DIRTBL_SIZE, .ci_recover_timer = DEFAULT_RECOVER_TIMER, .ci_toss_secs = DEFAULT_TOSS_SECS, - .ci_scan_secs = DEFAULT_SCAN_SECS + .ci_scan_secs = DEFAULT_SCAN_SECS, + .ci_log_debug = DEFAULT_LOG_DEBUG }; diff --git a/fs/dlm/config.h b/fs/dlm/config.h index ce603e1c3bac..1e978611a96e 100644 --- a/fs/dlm/config.h +++ b/fs/dlm/config.h @@ -25,6 +25,7 @@ struct dlm_config_info { int ci_recover_timer; int ci_toss_secs; int ci_scan_secs; + int ci_log_debug; }; extern struct dlm_config_info dlm_config; diff --git a/fs/dlm/dlm_internal.h b/fs/dlm/dlm_internal.h index 7185a132a8b5..ee993c5c2307 100644 --- a/fs/dlm/dlm_internal.h +++ b/fs/dlm/dlm_internal.h @@ -41,6 +41,7 @@ #include #include +#include "config.h" #define DLM_LOCKSPACE_LEN 64 @@ -69,12 +70,12 @@ struct dlm_mhandle; #define log_error(ls, fmt, args...) \ printk(KERN_ERR "dlm: %s: " fmt "\n", (ls)->ls_name , ##args) -#define DLM_LOG_DEBUG -#ifdef DLM_LOG_DEBUG -#define log_debug(ls, fmt, args...) log_error(ls, fmt, ##args) -#else -#define log_debug(ls, fmt, args...) -#endif +#define log_debug(ls, fmt, args...) \ +do { \ + if (dlm_config.ci_log_debug) \ + printk(KERN_DEBUG "dlm: %s: " fmt "\n", \ + (ls)->ls_name , ##args); \ +} while (0) #define DLM_ASSERT(x, do) \ { \ -- cgit v1.2.3-59-g8ed1b From a1bc86e6bddd34362ca08a3a4d898eb4b5c15215 Mon Sep 17 00:00:00 2001 From: David Teigland Date: Mon, 15 Jan 2007 10:34:52 -0600 Subject: [DLM] fix user unlocking When a user process exits, we clear all the locks it holds. There is a problem, though, with locks that the process had begun unlocking before it exited. We couldn't find the lkb's that were in the process of being unlocked remotely, to flag that they are DEAD. To solve this, we move lkb's being unlocked onto a new list in the per-process structure that tracks what locks the process is holding. We can then go through this list to flag the necessary lkb's when clearing locks for a process when it exits. Signed-off-by: David Teigland Signed-off-by: Steven Whitehouse --- fs/dlm/dlm_internal.h | 1 + fs/dlm/lock.c | 30 ++++++++++++++++++------------ fs/dlm/user.c | 9 +++++++++ 3 files changed, 28 insertions(+), 12 deletions(-) (limited to 'fs/dlm/dlm_internal.h') diff --git a/fs/dlm/dlm_internal.h b/fs/dlm/dlm_internal.h index ee993c5c2307..61d93201e1b2 100644 --- a/fs/dlm/dlm_internal.h +++ b/fs/dlm/dlm_internal.h @@ -526,6 +526,7 @@ struct dlm_user_proc { spinlock_t asts_spin; struct list_head locks; spinlock_t locks_spin; + struct list_head unlocking; wait_queue_head_t wait; }; diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c index 5bac9827ded3..6ad2b8eb96a5 100644 --- a/fs/dlm/lock.c +++ b/fs/dlm/lock.c @@ -3772,12 +3772,10 @@ int dlm_user_unlock(struct dlm_ls *ls, struct dlm_user_args *ua_tmp, goto out_put; spin_lock(&ua->proc->locks_spin); - list_del_init(&lkb->lkb_ownqueue); + /* dlm_user_add_ast() may have already taken lkb off the proc list */ + if (!list_empty(&lkb->lkb_ownqueue)) + list_move(&lkb->lkb_ownqueue, &ua->proc->unlocking); spin_unlock(&ua->proc->locks_spin); - - /* this removes the reference for the proc->locks list added by - dlm_user_request */ - unhold_lkb(lkb); out_put: dlm_put_lkb(lkb); out: @@ -3817,9 +3815,8 @@ int dlm_user_cancel(struct dlm_ls *ls, struct dlm_user_args *ua_tmp, /* this lkb was removed from the WAITING queue */ if (lkb->lkb_grmode == DLM_LOCK_IV) { spin_lock(&ua->proc->locks_spin); - list_del_init(&lkb->lkb_ownqueue); + list_move(&lkb->lkb_ownqueue, &ua->proc->unlocking); spin_unlock(&ua->proc->locks_spin); - unhold_lkb(lkb); } out_put: dlm_put_lkb(lkb); @@ -3880,11 +3877,6 @@ void dlm_clear_proc_locks(struct dlm_ls *ls, struct dlm_user_proc *proc) mutex_lock(&ls->ls_clear_proc_locks); list_for_each_entry_safe(lkb, safe, &proc->locks, lkb_ownqueue) { - if (lkb->lkb_ast_type) { - list_del(&lkb->lkb_astqueue); - unhold_lkb(lkb); - } - list_del_init(&lkb->lkb_ownqueue); if (lkb->lkb_exflags & DLM_LKF_PERSISTENT) { @@ -3901,6 +3893,20 @@ void dlm_clear_proc_locks(struct dlm_ls *ls, struct dlm_user_proc *proc) dlm_put_lkb(lkb); } + + /* in-progress unlocks */ + list_for_each_entry_safe(lkb, safe, &proc->unlocking, lkb_ownqueue) { + list_del_init(&lkb->lkb_ownqueue); + lkb->lkb_flags |= DLM_IFL_DEAD; + dlm_put_lkb(lkb); + } + + list_for_each_entry_safe(lkb, safe, &proc->asts, lkb_astqueue) { + list_del(&lkb->lkb_astqueue); + dlm_put_lkb(lkb); + } + mutex_unlock(&ls->ls_clear_proc_locks); unlock_recovery(ls); } + diff --git a/fs/dlm/user.c b/fs/dlm/user.c index c37e93e4f2df..d378b7fe2a1e 100644 --- a/fs/dlm/user.c +++ b/fs/dlm/user.c @@ -180,6 +180,14 @@ void dlm_user_add_ast(struct dlm_lkb *lkb, int type) ua->lksb.sb_status == -EAGAIN && !list_empty(&lkb->lkb_ownqueue)) remove_ownqueue = 1; + /* unlocks or cancels of waiting requests need to be removed from the + proc's unlocking list, again there must be a better way... */ + + if (ua->lksb.sb_status == -DLM_EUNLOCK || + (ua->lksb.sb_status == -DLM_ECANCEL && + lkb->lkb_grmode == DLM_LOCK_IV)) + remove_ownqueue = 1; + /* We want to copy the lvb to userspace when the completion ast is read if the status is 0, the lock has an lvb and lvb_ops says we should. We could probably have set_lvb_lock() @@ -523,6 +531,7 @@ static int device_open(struct inode *inode, struct file *file) proc->lockspace = ls->ls_local_handle; INIT_LIST_HEAD(&proc->asts); INIT_LIST_HEAD(&proc->locks); + INIT_LIST_HEAD(&proc->unlocking); spin_lock_init(&proc->asts_spin); spin_lock_init(&proc->locks_spin); init_waitqueue_head(&proc->wait); -- cgit v1.2.3-59-g8ed1b