diff options
Diffstat (limited to 'fs/dlm')
-rw-r--r-- | fs/dlm/Kconfig | 9 | ||||
-rw-r--r-- | fs/dlm/Makefile | 2 | ||||
-rw-r--r-- | fs/dlm/ast.c | 31 | ||||
-rw-r--r-- | fs/dlm/ast.h | 1 | ||||
-rw-r--r-- | fs/dlm/config.c | 21 | ||||
-rw-r--r-- | fs/dlm/config.h | 3 | ||||
-rw-r--r-- | fs/dlm/debug_fs.c | 96 | ||||
-rw-r--r-- | fs/dlm/dir.c | 5 | ||||
-rw-r--r-- | fs/dlm/dlm_internal.h | 112 | ||||
-rw-r--r-- | fs/dlm/lock.c | 1057 | ||||
-rw-r--r-- | fs/dlm/lock.h | 23 | ||||
-rw-r--r-- | fs/dlm/lockspace.c | 116 | ||||
-rw-r--r-- | fs/dlm/lockspace.h | 14 | ||||
-rw-r--r-- | fs/dlm/lowcomms.c | 229 | ||||
-rw-r--r-- | fs/dlm/lowcomms.h | 6 | ||||
-rw-r--r-- | fs/dlm/main.c | 3 | ||||
-rw-r--r-- | fs/dlm/member.c | 44 | ||||
-rw-r--r-- | fs/dlm/memory.c | 68 | ||||
-rw-r--r-- | fs/dlm/memory.h | 6 | ||||
-rw-r--r-- | fs/dlm/midcomms.c | 144 | ||||
-rw-r--r-- | fs/dlm/midcomms.h | 3 | ||||
-rw-r--r-- | fs/dlm/netlink.c | 1 | ||||
-rw-r--r-- | fs/dlm/plock.c | 213 | ||||
-rw-r--r-- | fs/dlm/rcom.c | 122 | ||||
-rw-r--r-- | fs/dlm/recover.c | 49 | ||||
-rw-r--r-- | fs/dlm/recoverd.c | 38 | ||||
-rw-r--r-- | fs/dlm/requestqueue.c | 37 | ||||
-rw-r--r-- | fs/dlm/user.c | 54 | ||||
-rw-r--r-- | fs/dlm/util.c | 92 | ||||
-rw-r--r-- | fs/dlm/util.h | 8 |
30 files changed, 1555 insertions, 1052 deletions
diff --git a/fs/dlm/Kconfig b/fs/dlm/Kconfig index ee92634196a8..1105ce3c80cb 100644 --- a/fs/dlm/Kconfig +++ b/fs/dlm/Kconfig @@ -9,6 +9,15 @@ menuconfig DLM A general purpose distributed lock manager for kernel or userspace applications. +config DLM_DEPRECATED_API + bool "DLM deprecated API" + depends on DLM + help + Enables deprecated DLM timeout features that will be removed in + later Linux kernel releases. + + If you are unsure, say N. + config DLM_DEBUG bool "DLM debugging" depends on DLM diff --git a/fs/dlm/Makefile b/fs/dlm/Makefile index 3545fdafc6fb..71dab733cf9a 100644 --- a/fs/dlm/Makefile +++ b/fs/dlm/Makefile @@ -9,7 +9,6 @@ dlm-y := ast.o \ member.o \ memory.o \ midcomms.o \ - netlink.o \ lowcomms.o \ plock.o \ rcom.o \ @@ -18,5 +17,6 @@ dlm-y := ast.o \ requestqueue.o \ user.o \ util.o +dlm-$(CONFIG_DLM_DEPRECATED_API) += netlink.o dlm-$(CONFIG_DLM_DEBUG) += debug_fs.o diff --git a/fs/dlm/ast.c b/fs/dlm/ast.c index 283c7b94edda..d60a8d8f109d 100644 --- a/fs/dlm/ast.c +++ b/fs/dlm/ast.c @@ -9,6 +9,8 @@ ******************************************************************************* ******************************************************************************/ +#include <trace/events/dlm.h> + #include "dlm_internal.h" #include "lock.h" #include "user.h" @@ -198,13 +200,13 @@ void dlm_add_cb(struct dlm_lkb *lkb, uint32_t flags, int mode, int status, if (!prev_seq) { kref_get(&lkb->lkb_ref); + mutex_lock(&ls->ls_cb_mutex); if (test_bit(LSFL_CB_DELAY, &ls->ls_flags)) { - mutex_lock(&ls->ls_cb_mutex); list_add(&lkb->lkb_cb_list, &ls->ls_cb_delay); - mutex_unlock(&ls->ls_cb_mutex); } else { queue_work(ls->ls_callback_wq, &lkb->lkb_cb_work); } + mutex_unlock(&ls->ls_cb_mutex); } out: mutex_unlock(&lkb->lkb_cb_mutex); @@ -253,10 +255,12 @@ void dlm_callback_work(struct work_struct *work) if (callbacks[i].flags & DLM_CB_SKIP) { continue; } else if (callbacks[i].flags & DLM_CB_BAST) { + trace_dlm_bast(ls, lkb, callbacks[i].mode); bastfn(lkb->lkb_astparam, callbacks[i].mode); } else if (callbacks[i].flags & DLM_CB_CAST) { lkb->lkb_lksb->sb_status = callbacks[i].sb_status; lkb->lkb_lksb->sb_flags = callbacks[i].sb_flags; + trace_dlm_ast(ls, lkb); castfn(lkb->lkb_astparam); } } @@ -284,10 +288,13 @@ void dlm_callback_stop(struct dlm_ls *ls) void dlm_callback_suspend(struct dlm_ls *ls) { - set_bit(LSFL_CB_DELAY, &ls->ls_flags); + if (ls->ls_callback_wq) { + mutex_lock(&ls->ls_cb_mutex); + set_bit(LSFL_CB_DELAY, &ls->ls_flags); + mutex_unlock(&ls->ls_cb_mutex); - if (ls->ls_callback_wq) flush_workqueue(ls->ls_callback_wq); + } } #define MAX_CB_QUEUE 25 @@ -295,13 +302,14 @@ void dlm_callback_suspend(struct dlm_ls *ls) void dlm_callback_resume(struct dlm_ls *ls) { struct dlm_lkb *lkb, *safe; - int count = 0; - - clear_bit(LSFL_CB_DELAY, &ls->ls_flags); + int count = 0, sum = 0; + bool empty; if (!ls->ls_callback_wq) return; + clear_bit(LSFL_CB_DELAY, &ls->ls_flags); + more: mutex_lock(&ls->ls_cb_mutex); list_for_each_entry_safe(lkb, safe, &ls->ls_cb_delay, lkb_cb_list) { @@ -311,14 +319,17 @@ more: if (count == MAX_CB_QUEUE) break; } + empty = list_empty(&ls->ls_cb_delay); mutex_unlock(&ls->ls_cb_mutex); - if (count) - log_rinfo(ls, "dlm_callback_resume %d", count); - if (count == MAX_CB_QUEUE) { + sum += count; + if (!empty) { count = 0; cond_resched(); goto more; } + + if (sum) + log_rinfo(ls, "%s %d", __func__, sum); } diff --git a/fs/dlm/ast.h b/fs/dlm/ast.h index 181ad7d20c4d..e5e05fcc5813 100644 --- a/fs/dlm/ast.h +++ b/fs/dlm/ast.h @@ -11,7 +11,6 @@ #ifndef __ASTD_DOT_H__ #define __ASTD_DOT_H__ -void dlm_del_ast(struct dlm_lkb *lkb); int dlm_add_lkb_callback(struct dlm_lkb *lkb, uint32_t flags, int mode, int status, uint32_t sbflags, uint64_t seq); int dlm_rem_lkb_callback(struct dlm_ls *ls, struct dlm_lkb *lkb, diff --git a/fs/dlm/config.c b/fs/dlm/config.c index 42eee2783756..ac8b62106ce0 100644 --- a/fs/dlm/config.c +++ b/fs/dlm/config.c @@ -75,8 +75,9 @@ struct dlm_cluster { unsigned int cl_log_info; unsigned int cl_protocol; unsigned int cl_mark; +#ifdef CONFIG_DLM_DEPRECATED_API unsigned int cl_timewarn_cs; - unsigned int cl_waitwarn_us; +#endif unsigned int cl_new_rsb_count; unsigned int cl_recover_callbacks; char cl_cluster_name[DLM_LOCKSPACE_LEN]; @@ -102,8 +103,9 @@ enum { CLUSTER_ATTR_LOG_INFO, CLUSTER_ATTR_PROTOCOL, CLUSTER_ATTR_MARK, +#ifdef CONFIG_DLM_DEPRECATED_API CLUSTER_ATTR_TIMEWARN_CS, - CLUSTER_ATTR_WAITWARN_US, +#endif CLUSTER_ATTR_NEW_RSB_COUNT, CLUSTER_ATTR_RECOVER_CALLBACKS, CLUSTER_ATTR_CLUSTER_NAME, @@ -224,8 +226,9 @@ CLUSTER_ATTR(log_debug, NULL); CLUSTER_ATTR(log_info, NULL); CLUSTER_ATTR(protocol, dlm_check_protocol_and_dlm_running); CLUSTER_ATTR(mark, NULL); +#ifdef CONFIG_DLM_DEPRECATED_API CLUSTER_ATTR(timewarn_cs, dlm_check_zero); -CLUSTER_ATTR(waitwarn_us, NULL); +#endif CLUSTER_ATTR(new_rsb_count, NULL); CLUSTER_ATTR(recover_callbacks, NULL); @@ -240,8 +243,9 @@ static struct configfs_attribute *cluster_attrs[] = { [CLUSTER_ATTR_LOG_INFO] = &cluster_attr_log_info, [CLUSTER_ATTR_PROTOCOL] = &cluster_attr_protocol, [CLUSTER_ATTR_MARK] = &cluster_attr_mark, +#ifdef CONFIG_DLM_DEPRECATED_API [CLUSTER_ATTR_TIMEWARN_CS] = &cluster_attr_timewarn_cs, - [CLUSTER_ATTR_WAITWARN_US] = &cluster_attr_waitwarn_us, +#endif [CLUSTER_ATTR_NEW_RSB_COUNT] = &cluster_attr_new_rsb_count, [CLUSTER_ATTR_RECOVER_CALLBACKS] = &cluster_attr_recover_callbacks, [CLUSTER_ATTR_CLUSTER_NAME] = &cluster_attr_cluster_name, @@ -432,8 +436,9 @@ static struct config_group *make_cluster(struct config_group *g, cl->cl_log_debug = dlm_config.ci_log_debug; cl->cl_log_info = dlm_config.ci_log_info; cl->cl_protocol = dlm_config.ci_protocol; +#ifdef CONFIG_DLM_DEPRECATED_API cl->cl_timewarn_cs = dlm_config.ci_timewarn_cs; - cl->cl_waitwarn_us = dlm_config.ci_waitwarn_us; +#endif cl->cl_new_rsb_count = dlm_config.ci_new_rsb_count; cl->cl_recover_callbacks = dlm_config.ci_recover_callbacks; memcpy(cl->cl_cluster_name, dlm_config.ci_cluster_name, @@ -954,8 +959,9 @@ int dlm_our_addr(struct sockaddr_storage *addr, int num) #define DEFAULT_LOG_INFO 1 #define DEFAULT_PROTOCOL DLM_PROTO_TCP #define DEFAULT_MARK 0 +#ifdef CONFIG_DLM_DEPRECATED_API #define DEFAULT_TIMEWARN_CS 500 /* 5 sec = 500 centiseconds */ -#define DEFAULT_WAITWARN_US 0 +#endif #define DEFAULT_NEW_RSB_COUNT 128 #define DEFAULT_RECOVER_CALLBACKS 0 #define DEFAULT_CLUSTER_NAME "" @@ -971,8 +977,9 @@ struct dlm_config_info dlm_config = { .ci_log_info = DEFAULT_LOG_INFO, .ci_protocol = DEFAULT_PROTOCOL, .ci_mark = DEFAULT_MARK, +#ifdef CONFIG_DLM_DEPRECATED_API .ci_timewarn_cs = DEFAULT_TIMEWARN_CS, - .ci_waitwarn_us = DEFAULT_WAITWARN_US, +#endif .ci_new_rsb_count = DEFAULT_NEW_RSB_COUNT, .ci_recover_callbacks = DEFAULT_RECOVER_CALLBACKS, .ci_cluster_name = DEFAULT_CLUSTER_NAME diff --git a/fs/dlm/config.h b/fs/dlm/config.h index df92b0a07fc6..55c5f2c13ebd 100644 --- a/fs/dlm/config.h +++ b/fs/dlm/config.h @@ -37,8 +37,9 @@ struct dlm_config_info { int ci_log_info; int ci_protocol; int ci_mark; +#ifdef CONFIG_DLM_DEPRECATED_API int ci_timewarn_cs; - int ci_waitwarn_us; +#endif int ci_new_rsb_count; int ci_recover_callbacks; char ci_cluster_name[DLM_LOCKSPACE_LEN]; diff --git a/fs/dlm/debug_fs.c b/fs/dlm/debug_fs.c index 47e9d57e4cae..8fb04ebbafb5 100644 --- a/fs/dlm/debug_fs.c +++ b/fs/dlm/debug_fs.c @@ -635,6 +635,35 @@ static int table_open2(struct inode *inode, struct file *file) return 0; } +static ssize_t table_write2(struct file *file, const char __user *user_buf, + size_t count, loff_t *ppos) +{ + struct seq_file *seq = file->private_data; + int n, len, lkb_nodeid, lkb_status, error; + char name[DLM_RESNAME_MAXLEN + 1] = {}; + struct dlm_ls *ls = seq->private; + unsigned int lkb_flags; + char buf[256] = {}; + uint32_t lkb_id; + + if (copy_from_user(buf, user_buf, + min_t(size_t, sizeof(buf) - 1, count))) + return -EFAULT; + + n = sscanf(buf, "%x %" __stringify(DLM_RESNAME_MAXLEN) "s %x %d %d", + &lkb_id, name, &lkb_flags, &lkb_nodeid, &lkb_status); + if (n != 5) + return -EINVAL; + + len = strnlen(name, DLM_RESNAME_MAXLEN); + error = dlm_debug_add_lkb(ls, lkb_id, name, len, lkb_flags, + lkb_nodeid, lkb_status); + if (error) + return error; + + return count; +} + static int table_open3(struct inode *inode, struct file *file) { struct seq_file *seq; @@ -675,6 +704,7 @@ static const struct file_operations format2_fops = { .owner = THIS_MODULE, .open = table_open2, .read = seq_read, + .write = table_write2, .llseek = seq_lseek, .release = seq_release }; @@ -724,10 +754,35 @@ static ssize_t waiters_read(struct file *file, char __user *userbuf, return rv; } +static ssize_t waiters_write(struct file *file, const char __user *user_buf, + size_t count, loff_t *ppos) +{ + struct dlm_ls *ls = file->private_data; + int mstype, to_nodeid; + char buf[128] = {}; + uint32_t lkb_id; + int n, error; + + if (copy_from_user(buf, user_buf, + min_t(size_t, sizeof(buf) - 1, count))) + return -EFAULT; + + n = sscanf(buf, "%x %d %d", &lkb_id, &mstype, &to_nodeid); + if (n != 3) + return -EINVAL; + + error = dlm_debug_add_lkb_to_waiters(ls, lkb_id, mstype, to_nodeid); + if (error) + return error; + + return count; +} + static const struct file_operations waiters_fops = { .owner = THIS_MODULE, .open = simple_open, .read = waiters_read, + .write = waiters_write, .llseek = default_llseek, }; @@ -768,6 +823,42 @@ static int dlm_version_show(struct seq_file *file, void *offset) } DEFINE_SHOW_ATTRIBUTE(dlm_version); +static ssize_t dlm_rawmsg_write(struct file *fp, const char __user *user_buf, + size_t count, loff_t *ppos) +{ + void *buf; + int ret; + + if (count > PAGE_SIZE || count < sizeof(struct dlm_header)) + return -EINVAL; + + buf = kmalloc(PAGE_SIZE, GFP_NOFS); + if (!buf) + return -ENOMEM; + + if (copy_from_user(buf, user_buf, count)) { + ret = -EFAULT; + goto out; + } + + ret = dlm_midcomms_rawmsg_send(fp->private_data, buf, count); + if (ret) + goto out; + + kfree(buf); + return count; + +out: + kfree(buf); + return ret; +} + +static const struct file_operations dlm_rawmsg_fops = { + .open = simple_open, + .write = dlm_rawmsg_write, + .llseek = no_llseek, +}; + void *dlm_create_debug_comms_file(int nodeid, void *data) { struct dentry *d_node; @@ -782,6 +873,7 @@ void *dlm_create_debug_comms_file(int nodeid, void *data) debugfs_create_file("send_queue_count", 0444, d_node, data, &dlm_send_queue_cnt_fops); debugfs_create_file("version", 0444, d_node, data, &dlm_version_fops); + debugfs_create_file("rawmsg", 0200, d_node, data, &dlm_rawmsg_fops); return d_node; } @@ -809,7 +901,7 @@ void dlm_create_debug_file(struct dlm_ls *ls) snprintf(name, DLM_LOCKSPACE_LEN + 8, "%s_locks", ls->ls_name); ls->ls_debug_locks_dentry = debugfs_create_file(name, - S_IFREG | S_IRUGO, + 0644, dlm_root, ls, &format2_fops); @@ -840,7 +932,7 @@ void dlm_create_debug_file(struct dlm_ls *ls) snprintf(name, DLM_LOCKSPACE_LEN + 8, "%s_waiters", ls->ls_name); ls->ls_debug_waiters_dentry = debugfs_create_file(name, - S_IFREG | S_IRUGO, + 0644, dlm_root, ls, &waiters_fops); diff --git a/fs/dlm/dir.c b/fs/dlm/dir.c index 45ebbe602bbf..fb1981654bb2 100644 --- a/fs/dlm/dir.c +++ b/fs/dlm/dir.c @@ -84,8 +84,7 @@ int dlm_recover_directory(struct dlm_ls *ls) for (;;) { int left; - error = dlm_recovery_stopped(ls); - if (error) { + if (dlm_recovery_stopped(ls)) { error = -EINTR; goto out_free; } @@ -102,7 +101,7 @@ int dlm_recover_directory(struct dlm_ls *ls) */ b = ls->ls_recover_buf->rc_buf; - left = ls->ls_recover_buf->rc_header.h_length; + left = le16_to_cpu(ls->ls_recover_buf->rc_header.h_length); left -= sizeof(struct dlm_rcom); for (;;) { diff --git a/fs/dlm/dlm_internal.h b/fs/dlm/dlm_internal.h index 5f57538b5d45..e34c3d2639a5 100644 --- a/fs/dlm/dlm_internal.h +++ b/fs/dlm/dlm_internal.h @@ -41,12 +41,6 @@ #include <linux/dlm.h> #include "config.h" -/* Size of the temp buffer midcomms allocates on the stack. - We try to make this large enough so most messages fit. - FIXME: should sctp make this unnecessary? */ - -#define DLM_INBUF_LEN 148 - struct dlm_ls; struct dlm_lkb; struct dlm_rsb; @@ -151,7 +145,9 @@ struct dlm_args { void (*bastfn) (void *astparam, int mode); int mode; struct dlm_lksb *lksb; +#ifdef CONFIG_DLM_DEPRECATED_API unsigned long timeout; +#endif }; @@ -209,10 +205,20 @@ struct dlm_args { #define DLM_IFL_OVERLAP_UNLOCK 0x00080000 #define DLM_IFL_OVERLAP_CANCEL 0x00100000 #define DLM_IFL_ENDOFLIFE 0x00200000 +#ifdef CONFIG_DLM_DEPRECATED_API #define DLM_IFL_WATCH_TIMEWARN 0x00400000 #define DLM_IFL_TIMEOUT_CANCEL 0x00800000 +#endif #define DLM_IFL_DEADLOCK_CANCEL 0x01000000 #define DLM_IFL_STUB_MS 0x02000000 /* magic number for m_flags */ +/* least significant 2 bytes are message changed, they are full transmitted + * but at receive side only the 2 bytes LSB will be set. + * + * Even wireshark dlm dissector does only evaluate the lower bytes and note + * that they may not be used on transceiver side, we assume the higher bytes + * are for internal use or reserved so long they are not parsed on receiver + * side. + */ #define DLM_IFL_USER 0x00000001 #define DLM_IFL_ORPHAN 0x00000002 @@ -255,10 +261,12 @@ struct dlm_lkb { struct list_head lkb_rsb_lookup; /* waiting for rsb lookup */ struct list_head lkb_wait_reply; /* waiting for remote reply */ struct list_head lkb_ownqueue; /* list of locks for a process */ - struct list_head lkb_time_list; ktime_t lkb_timestamp; - ktime_t lkb_wait_time; + +#ifdef CONFIG_DLM_DEPRECATED_API + struct list_head lkb_time_list; unsigned long lkb_timeout_cs; +#endif struct mutex lkb_cb_mutex; struct work_struct lkb_cb_work; @@ -385,15 +393,15 @@ static inline int rsb_flag(struct dlm_rsb *r, enum rsb_flags flag) #define DLM_FIN 5 struct dlm_header { - uint32_t h_version; + __le32 h_version; union { /* for DLM_MSG and DLM_RCOM */ - uint32_t h_lockspace; + __le32 h_lockspace; /* for DLM_ACK and DLM_OPTS */ - uint32_t h_seq; + __le32 h_seq; } u; - uint32_t h_nodeid; /* nodeid of sender */ - uint16_t h_length; + __le32 h_nodeid; /* nodeid of sender */ + __le16 h_length; uint8_t h_cmd; /* DLM_MSG, DLM_RCOM */ uint8_t h_pad; }; @@ -415,24 +423,24 @@ struct dlm_header { struct dlm_message { struct dlm_header m_header; - uint32_t m_type; /* DLM_MSG_ */ - uint32_t m_nodeid; - uint32_t m_pid; - uint32_t m_lkid; /* lkid on sender */ - uint32_t m_remid; /* lkid on receiver */ - uint32_t m_parent_lkid; - uint32_t m_parent_remid; - uint32_t m_exflags; - uint32_t m_sbflags; - uint32_t m_flags; - uint32_t m_lvbseq; - uint32_t m_hash; - int m_status; - int m_grmode; - int m_rqmode; - int m_bastmode; - int m_asts; - int m_result; /* 0 or -EXXX */ + __le32 m_type; /* DLM_MSG_ */ + __le32 m_nodeid; + __le32 m_pid; + __le32 m_lkid; /* lkid on sender */ + __le32 m_remid; /* lkid on receiver */ + __le32 m_parent_lkid; + __le32 m_parent_remid; + __le32 m_exflags; + __le32 m_sbflags; + __le32 m_flags; + __le32 m_lvbseq; + __le32 m_hash; + __le32 m_status; + __le32 m_grmode; + __le32 m_rqmode; + __le32 m_bastmode; + __le32 m_asts; + __le32 m_result; /* 0 or -EXXX */ char m_extra[]; /* name or lvb */ }; @@ -457,18 +465,18 @@ struct dlm_message { struct dlm_rcom { struct dlm_header rc_header; - uint32_t rc_type; /* DLM_RCOM_ */ - int rc_result; /* multi-purpose */ - uint64_t rc_id; /* match reply with request */ - uint64_t rc_seq; /* sender's ls_recover_seq */ - uint64_t rc_seq_reply; /* remote ls_recover_seq */ + __le32 rc_type; /* DLM_RCOM_ */ + __le32 rc_result; /* multi-purpose */ + __le64 rc_id; /* match reply with request */ + __le64 rc_seq; /* sender's ls_recover_seq */ + __le64 rc_seq_reply; /* remote ls_recover_seq */ char rc_buf[]; }; struct dlm_opt_header { - uint16_t t_type; - uint16_t t_length; - uint32_t t_pad; + __le16 t_type; + __le16 t_length; + __le32 t_pad; /* need to be 8 byte aligned */ char t_value[]; }; @@ -478,8 +486,8 @@ struct dlm_opts { struct dlm_header o_header; uint8_t o_nextcmd; uint8_t o_pad; - uint16_t o_optlen; - uint32_t o_pad2; + __le16 o_optlen; + __le32 o_pad2; char o_opts[]; }; @@ -554,8 +562,9 @@ struct dlm_ls { uint32_t ls_generation; uint32_t ls_exflags; int ls_lvblen; - int ls_count; /* refcount of processes in + atomic_t ls_count; /* refcount of processes in the dlm using this ls */ + wait_queue_head_t ls_count_wait; int ls_create_count; /* create/release refcount */ unsigned long ls_flags; /* LSFL_ */ unsigned long ls_scan_time; @@ -573,14 +582,17 @@ struct dlm_ls { struct mutex ls_orphans_mutex; struct list_head ls_orphans; +#ifdef CONFIG_DLM_DEPRECATED_API struct mutex ls_timeout_mutex; struct list_head ls_timeout; +#endif spinlock_t ls_new_rsb_spin; int ls_new_rsb_count; struct list_head ls_new_rsb; /* new rsb structs */ spinlock_t ls_remove_spin; + wait_queue_head_t ls_remove_wait; char ls_remove_name[DLM_RESNAME_MAXLEN+1]; char *ls_remove_names[DLM_REMOVE_NAMES_MAX]; int ls_remove_len; @@ -610,8 +622,8 @@ struct dlm_ls { wait_queue_head_t ls_uevent_wait; /* user part of join/leave */ int ls_uevent_result; - struct completion ls_members_done; - int ls_members_result; + struct completion ls_recovery_done; + int ls_recovery_result; struct miscdevice ls_device; @@ -632,6 +644,8 @@ struct dlm_ls { struct rw_semaphore ls_in_recovery; /* block local requests */ struct rw_semaphore ls_recv_active; /* block dlm_recv */ struct list_head ls_requestqueue;/* queue remote requests */ + atomic_t ls_requestqueue_cnt; + wait_queue_head_t ls_requestqueue_wait; struct mutex ls_requestqueue_mutex; struct dlm_rcom *ls_recover_buf; int ls_recover_nodeid; /* for debugging */ @@ -647,7 +661,7 @@ struct dlm_ls { spinlock_t ls_recover_idr_lock; wait_queue_head_t ls_wait_general; wait_queue_head_t ls_recover_lock_wait; - struct mutex ls_clear_proc_locks; + spinlock_t ls_clear_proc_locks; struct list_head ls_root_list; /* root resources */ struct rw_semaphore ls_root_sem; /* protect root_list */ @@ -690,7 +704,9 @@ struct dlm_ls { #define LSFL_RCOM_READY 5 #define LSFL_RCOM_WAIT 6 #define LSFL_UEVENT_WAIT 7 +#ifdef CONFIG_DLM_DEPRECATED_API #define LSFL_TIMEWARN 8 +#endif #define LSFL_CB_DELAY 9 #define LSFL_NODIR 10 @@ -743,9 +759,15 @@ static inline int dlm_no_directory(struct dlm_ls *ls) return test_bit(LSFL_NODIR, &ls->ls_flags); } +#ifdef CONFIG_DLM_DEPRECATED_API int dlm_netlink_init(void); void dlm_netlink_exit(void); void dlm_timeout_warn(struct dlm_lkb *lkb); +#else +static inline int dlm_netlink_init(void) { return 0; } +static inline void dlm_netlink_exit(void) { }; +static inline void dlm_timeout_warn(struct dlm_lkb *lkb) { }; +#endif int dlm_plock_init(void); void dlm_plock_exit(void); diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c index c502c065d007..94a72ede5764 100644 --- a/fs/dlm/lock.c +++ b/fs/dlm/lock.c @@ -53,6 +53,8 @@ R: do_xxxx() L: receive_xxxx_reply() <- R: send_xxxx_reply() */ +#include <trace/events/dlm.h> + #include <linux/types.h> #include <linux/rbtree.h> #include <linux/slab.h> @@ -294,12 +296,14 @@ static void queue_cast(struct dlm_rsb *r, struct dlm_lkb *lkb, int rv) DLM_ASSERT(lkb->lkb_lksb, dlm_print_lkb(lkb);); +#ifdef CONFIG_DLM_DEPRECATED_API /* if the operation was a cancel, then return -DLM_ECANCEL, if a timeout caused the cancel then return -ETIMEDOUT */ if (rv == -DLM_ECANCEL && (lkb->lkb_flags & DLM_IFL_TIMEOUT_CANCEL)) { lkb->lkb_flags &= ~DLM_IFL_TIMEOUT_CANCEL; rv = -ETIMEDOUT; } +#endif if (rv == -DLM_ECANCEL && (lkb->lkb_flags & DLM_IFL_DEADLOCK_CANCEL)) { lkb->lkb_flags &= ~DLM_IFL_DEADLOCK_CANCEL; @@ -348,10 +352,12 @@ static void put_rsb(struct dlm_rsb *r) { struct dlm_ls *ls = r->res_ls; uint32_t bucket = r->res_bucket; + int rv; - spin_lock(&ls->ls_rsbtbl[bucket].lock); - kref_put(&r->res_ref, toss_rsb); - spin_unlock(&ls->ls_rsbtbl[bucket].lock); + rv = kref_put_lock(&r->res_ref, toss_rsb, + &ls->ls_rsbtbl[bucket].lock); + if (rv) + spin_unlock(&ls->ls_rsbtbl[bucket].lock); } void dlm_put_rsb(struct dlm_rsb *r) @@ -395,7 +401,7 @@ static int pre_rsb_struct(struct dlm_ls *ls) unlock any spinlocks, go back and call pre_rsb_struct again. Otherwise, take an rsb off the list and return it. */ -static int get_rsb_struct(struct dlm_ls *ls, char *name, int len, +static int get_rsb_struct(struct dlm_ls *ls, const void *name, int len, struct dlm_rsb **r_ret) { struct dlm_rsb *r; @@ -406,7 +412,8 @@ static int get_rsb_struct(struct dlm_ls *ls, char *name, int len, count = ls->ls_new_rsb_count; spin_unlock(&ls->ls_new_rsb_spin); log_debug(ls, "find_rsb retry %d %d %s", - count, dlm_config.ci_new_rsb_count, name); + count, dlm_config.ci_new_rsb_count, + (const char *)name); return -EAGAIN; } @@ -442,7 +449,7 @@ static int rsb_cmp(struct dlm_rsb *r, const char *name, int nlen) return memcmp(r->res_name, maxname, DLM_RESNAME_MAXLEN); } -int dlm_search_rsb_tree(struct rb_root *tree, char *name, int len, +int dlm_search_rsb_tree(struct rb_root *tree, const void *name, int len, struct dlm_rsb **r_ret) { struct rb_node *node = tree->rb_node; @@ -540,7 +547,7 @@ static int rsb_insert(struct dlm_rsb *rsb, struct rb_root *tree) * while that rsb has a potentially stale master.) */ -static int find_rsb_dir(struct dlm_ls *ls, char *name, int len, +static int find_rsb_dir(struct dlm_ls *ls, const void *name, int len, uint32_t hash, uint32_t b, int dir_nodeid, int from_nodeid, unsigned int flags, struct dlm_rsb **r_ret) @@ -600,7 +607,6 @@ static int find_rsb_dir(struct dlm_ls *ls, char *name, int len, */ kref_get(&r->res_ref); - error = 0; goto out_unlock; @@ -719,7 +725,7 @@ static int find_rsb_dir(struct dlm_ls *ls, char *name, int len, dlm_recover_locks) before we've made ourself master (in dlm_recover_masters). */ -static int find_rsb_nodir(struct dlm_ls *ls, char *name, int len, +static int find_rsb_nodir(struct dlm_ls *ls, const void *name, int len, uint32_t hash, uint32_t b, int dir_nodeid, int from_nodeid, unsigned int flags, struct dlm_rsb **r_ret) @@ -813,8 +819,9 @@ static int find_rsb_nodir(struct dlm_ls *ls, char *name, int len, return error; } -static int find_rsb(struct dlm_ls *ls, char *name, int len, int from_nodeid, - unsigned int flags, struct dlm_rsb **r_ret) +static int find_rsb(struct dlm_ls *ls, const void *name, int len, + int from_nodeid, unsigned int flags, + struct dlm_rsb **r_ret) { uint32_t hash, b; int dir_nodeid; @@ -878,6 +885,88 @@ static int validate_master_nodeid(struct dlm_ls *ls, struct dlm_rsb *r, } } +static void __dlm_master_lookup(struct dlm_ls *ls, struct dlm_rsb *r, int our_nodeid, + int from_nodeid, bool toss_list, unsigned int flags, + int *r_nodeid, int *result) +{ + int fix_master = (flags & DLM_LU_RECOVER_MASTER); + int from_master = (flags & DLM_LU_RECOVER_DIR); + + if (r->res_dir_nodeid != our_nodeid) { + /* should not happen, but may as well fix it and carry on */ + log_error(ls, "%s res_dir %d our %d %s", __func__, + r->res_dir_nodeid, our_nodeid, r->res_name); + r->res_dir_nodeid = our_nodeid; + } + + if (fix_master && dlm_is_removed(ls, r->res_master_nodeid)) { + /* Recovery uses this function to set a new master when + * the previous master failed. Setting NEW_MASTER will + * force dlm_recover_masters to call recover_master on this + * rsb even though the res_nodeid is no longer removed. + */ + + r->res_master_nodeid = from_nodeid; + r->res_nodeid = from_nodeid; + rsb_set_flag(r, RSB_NEW_MASTER); + + if (toss_list) { + /* I don't think we should ever find it on toss list. */ + log_error(ls, "%s fix_master on toss", __func__); + dlm_dump_rsb(r); + } + } + + if (from_master && (r->res_master_nodeid != from_nodeid)) { + /* this will happen if from_nodeid became master during + * a previous recovery cycle, and we aborted the previous + * cycle before recovering this master value + */ + + log_limit(ls, "%s from_master %d master_nodeid %d res_nodeid %d first %x %s", + __func__, from_nodeid, r->res_master_nodeid, + r->res_nodeid, r->res_first_lkid, r->res_name); + + if (r->res_master_nodeid == our_nodeid) { + log_error(ls, "from_master %d our_master", from_nodeid); + dlm_dump_rsb(r); + goto ret_assign; + } + + r->res_master_nodeid = from_nodeid; + r->res_nodeid = from_nodeid; + rsb_set_flag(r, RSB_NEW_MASTER); + } + + if (!r->res_master_nodeid) { + /* this will happen if recovery happens while we're looking + * up the master for this rsb + */ + + log_debug(ls, "%s master 0 to %d first %x %s", __func__, + from_nodeid, r->res_first_lkid, r->res_name); + r->res_master_nodeid = from_nodeid; + r->res_nodeid = from_nodeid; + } + + if (!from_master && !fix_master && + (r->res_master_nodeid == from_nodeid)) { + /* this can happen when the master sends remove, the dir node + * finds the rsb on the keep list and ignores the remove, + * and the former master sends a lookup + */ + + log_limit(ls, "%s from master %d flags %x first %x %s", + __func__, from_nodeid, flags, r->res_first_lkid, + r->res_name); + } + + ret_assign: + *r_nodeid = r->res_master_nodeid; + if (result) + *result = DLM_LU_MATCH; +} + /* * We're the dir node for this res and another node wants to know the * master nodeid. During normal operation (non recovery) this is only @@ -912,10 +1001,8 @@ int dlm_master_lookup(struct dlm_ls *ls, int from_nodeid, char *name, int len, { struct dlm_rsb *r = NULL; uint32_t hash, b; - int from_master = (flags & DLM_LU_RECOVER_DIR); - int fix_master = (flags & DLM_LU_RECOVER_MASTER); int our_nodeid = dlm_our_nodeid(); - int dir_nodeid, error, toss_list = 0; + int dir_nodeid, error; if (len > DLM_RESNAME_MAXLEN) return -EINVAL; @@ -947,12 +1034,21 @@ int dlm_master_lookup(struct dlm_ls *ls, int from_nodeid, char *name, int len, error = dlm_search_rsb_tree(&ls->ls_rsbtbl[b].keep, name, len, &r); if (!error) { /* because the rsb is active, we need to lock_rsb before - checking/changing re_master_nodeid */ + * checking/changing re_master_nodeid + */ hold_rsb(r); spin_unlock(&ls->ls_rsbtbl[b].lock); lock_rsb(r); - goto found; + + __dlm_master_lookup(ls, r, our_nodeid, from_nodeid, false, + flags, r_nodeid, result); + + /* the rsb was active */ + unlock_rsb(r); + put_rsb(r); + + return 0; } error = dlm_search_rsb_tree(&ls->ls_rsbtbl[b].toss, name, len, &r); @@ -960,90 +1056,16 @@ int dlm_master_lookup(struct dlm_ls *ls, int from_nodeid, char *name, int len, goto not_found; /* because the rsb is inactive (on toss list), it's not refcounted - and lock_rsb is not used, but is protected by the rsbtbl lock */ - - toss_list = 1; - found: - if (r->res_dir_nodeid != our_nodeid) { - /* should not happen, but may as well fix it and carry on */ - log_error(ls, "dlm_master_lookup res_dir %d our %d %s", - r->res_dir_nodeid, our_nodeid, r->res_name); - r->res_dir_nodeid = our_nodeid; - } - - if (fix_master && dlm_is_removed(ls, r->res_master_nodeid)) { - /* Recovery uses this function to set a new master when - the previous master failed. Setting NEW_MASTER will - force dlm_recover_masters to call recover_master on this - rsb even though the res_nodeid is no longer removed. */ - - r->res_master_nodeid = from_nodeid; - r->res_nodeid = from_nodeid; - rsb_set_flag(r, RSB_NEW_MASTER); - - if (toss_list) { - /* I don't think we should ever find it on toss list. */ - log_error(ls, "dlm_master_lookup fix_master on toss"); - dlm_dump_rsb(r); - } - } - - if (from_master && (r->res_master_nodeid != from_nodeid)) { - /* this will happen if from_nodeid became master during - a previous recovery cycle, and we aborted the previous - cycle before recovering this master value */ - - log_limit(ls, "dlm_master_lookup from_master %d " - "master_nodeid %d res_nodeid %d first %x %s", - from_nodeid, r->res_master_nodeid, r->res_nodeid, - r->res_first_lkid, r->res_name); - - if (r->res_master_nodeid == our_nodeid) { - log_error(ls, "from_master %d our_master", from_nodeid); - dlm_dump_rsb(r); - goto out_found; - } - - r->res_master_nodeid = from_nodeid; - r->res_nodeid = from_nodeid; - rsb_set_flag(r, RSB_NEW_MASTER); - } - - if (!r->res_master_nodeid) { - /* this will happen if recovery happens while we're looking - up the master for this rsb */ - - log_debug(ls, "dlm_master_lookup master 0 to %d first %x %s", - from_nodeid, r->res_first_lkid, r->res_name); - r->res_master_nodeid = from_nodeid; - r->res_nodeid = from_nodeid; - } - - if (!from_master && !fix_master && - (r->res_master_nodeid == from_nodeid)) { - /* this can happen when the master sends remove, the dir node - finds the rsb on the keep list and ignores the remove, - and the former master sends a lookup */ + * and lock_rsb is not used, but is protected by the rsbtbl lock + */ - log_limit(ls, "dlm_master_lookup from master %d flags %x " - "first %x %s", from_nodeid, flags, - r->res_first_lkid, r->res_name); - } + __dlm_master_lookup(ls, r, our_nodeid, from_nodeid, true, flags, + r_nodeid, result); - out_found: - *r_nodeid = r->res_master_nodeid; - if (result) - *result = DLM_LU_MATCH; + r->res_toss_time = jiffies; + /* the rsb was inactive (on toss list) */ + spin_unlock(&ls->ls_rsbtbl[b].lock); - if (toss_list) { - r->res_toss_time = jiffies; - /* the rsb was inactive (on toss list) */ - spin_unlock(&ls->ls_rsbtbl[b].lock); - } else { - /* the rsb was active */ - unlock_rsb(r); - put_rsb(r); - } return 0; not_found: @@ -1074,7 +1096,6 @@ int dlm_master_lookup(struct dlm_ls *ls, int from_nodeid, char *name, int len, if (result) *result = DLM_LU_ADD; *r_nodeid = from_nodeid; - error = 0; out_unlock: spin_unlock(&ls->ls_rsbtbl[b].lock); return error; @@ -1178,7 +1199,8 @@ static void detach_lkb(struct dlm_lkb *lkb) } } -static int create_lkb(struct dlm_ls *ls, struct dlm_lkb **lkb_ret) +static int _create_lkb(struct dlm_ls *ls, struct dlm_lkb **lkb_ret, + int start, int end) { struct dlm_lkb *lkb; int rv; @@ -1192,14 +1214,16 @@ static int create_lkb(struct dlm_ls *ls, struct dlm_lkb **lkb_ret) kref_init(&lkb->lkb_ref); INIT_LIST_HEAD(&lkb->lkb_ownqueue); INIT_LIST_HEAD(&lkb->lkb_rsb_lookup); +#ifdef CONFIG_DLM_DEPRECATED_API INIT_LIST_HEAD(&lkb->lkb_time_list); +#endif INIT_LIST_HEAD(&lkb->lkb_cb_list); mutex_init(&lkb->lkb_cb_mutex); INIT_WORK(&lkb->lkb_cb_work, dlm_callback_work); idr_preload(GFP_NOFS); spin_lock(&ls->ls_lkbidr_spin); - rv = idr_alloc(&ls->ls_lkbidr, lkb, 1, 0, GFP_NOWAIT); + rv = idr_alloc(&ls->ls_lkbidr, lkb, start, end, GFP_NOWAIT); if (rv >= 0) lkb->lkb_id = rv; spin_unlock(&ls->ls_lkbidr_spin); @@ -1215,6 +1239,11 @@ static int create_lkb(struct dlm_ls *ls, struct dlm_lkb **lkb_ret) return 0; } +static int create_lkb(struct dlm_ls *ls, struct dlm_lkb **lkb_ret) +{ + return _create_lkb(ls, lkb_ret, 1, 0); +} + static int find_lkb(struct dlm_ls *ls, uint32_t lkid, struct dlm_lkb **lkb_ret) { struct dlm_lkb *lkb; @@ -1245,9 +1274,11 @@ static void kill_lkb(struct kref *kref) static int __put_lkb(struct dlm_ls *ls, struct dlm_lkb *lkb) { uint32_t lkid = lkb->lkb_id; + int rv; - spin_lock(&ls->ls_lkbidr_spin); - if (kref_put(&lkb->lkb_ref, kill_lkb)) { + rv = kref_put_lock(&lkb->lkb_ref, kill_lkb, + &ls->ls_lkbidr_spin); + if (rv) { idr_remove(&ls->ls_lkbidr, lkid); spin_unlock(&ls->ls_lkbidr_spin); @@ -1257,11 +1288,9 @@ static int __put_lkb(struct dlm_ls *ls, struct dlm_lkb *lkb) if (lkb->lkb_lvbptr && is_master_copy(lkb)) dlm_free_lvb(lkb->lkb_lvbptr); dlm_free_lkb(lkb); - return 1; - } else { - spin_unlock(&ls->ls_lkbidr_spin); - return 0; } + + return rv; } int dlm_put_lkb(struct dlm_lkb *lkb) @@ -1283,6 +1312,13 @@ static inline void hold_lkb(struct dlm_lkb *lkb) kref_get(&lkb->lkb_ref); } +static void unhold_lkb_assert(struct kref *kref) +{ + struct dlm_lkb *lkb = container_of(kref, struct dlm_lkb, lkb_ref); + + DLM_ASSERT(false, dlm_print_lkb(lkb);); +} + /* This is called when we need to remove a reference and are certain it's not the last ref. e.g. del_lkb is always called between a find_lkb/put_lkb and is always the inverse of a previous add_lkb. @@ -1290,21 +1326,23 @@ static inline void hold_lkb(struct dlm_lkb *lkb) static inline void unhold_lkb(struct dlm_lkb *lkb) { - int rv; - rv = kref_put(&lkb->lkb_ref, kill_lkb); - DLM_ASSERT(!rv, dlm_print_lkb(lkb);); + kref_put(&lkb->lkb_ref, unhold_lkb_assert); } static void lkb_add_ordered(struct list_head *new, struct list_head *head, int mode) { - struct dlm_lkb *lkb = NULL; + struct dlm_lkb *lkb = NULL, *iter; - list_for_each_entry(lkb, head, lkb_statequeue) - if (lkb->lkb_rqmode < mode) + list_for_each_entry(iter, head, lkb_statequeue) + if (iter->lkb_rqmode < mode) { + lkb = iter; + list_add_tail(new, &iter->lkb_statequeue); break; + } - __list_add(new, lkb->lkb_statequeue.prev, &lkb->lkb_statequeue); + if (!lkb) + list_add_tail(new, head); } /* add/remove lkb to rsb's grant/convert/wait queue */ @@ -1375,75 +1413,6 @@ static int msg_reply_type(int mstype) return -1; } -static int nodeid_warned(int nodeid, int num_nodes, int *warned) -{ - int i; - - for (i = 0; i < num_nodes; i++) { - if (!warned[i]) { - warned[i] = nodeid; - return 0; - } - if (warned[i] == nodeid) - return 1; - } - return 0; -} - -void dlm_scan_waiters(struct dlm_ls *ls) -{ - struct dlm_lkb *lkb; - s64 us; - s64 debug_maxus = 0; - u32 debug_scanned = 0; - u32 debug_expired = 0; - int num_nodes = 0; - int *warned = NULL; - - if (!dlm_config.ci_waitwarn_us) - return; - - mutex_lock(&ls->ls_waiters_mutex); - - list_for_each_entry(lkb, &ls->ls_waiters, lkb_wait_reply) { - if (!lkb->lkb_wait_time) - continue; - - debug_scanned++; - - us = ktime_to_us(ktime_sub(ktime_get(), lkb->lkb_wait_time)); - - if (us < dlm_config.ci_waitwarn_us) - continue; - - lkb->lkb_wait_time = 0; - - debug_expired++; - if (us > debug_maxus) - debug_maxus = us; - - if (!num_nodes) { - num_nodes = ls->ls_num_nodes; - warned = kcalloc(num_nodes, sizeof(int), GFP_KERNEL); - } - if (!warned) - continue; - if (nodeid_warned(lkb->lkb_wait_nodeid, num_nodes, warned)) - continue; - - log_error(ls, "waitwarn %x %lld %d us check connection to " - "node %d", lkb->lkb_id, (long long)us, - dlm_config.ci_waitwarn_us, lkb->lkb_wait_nodeid); - } - mutex_unlock(&ls->ls_waiters_mutex); - kfree(warned); - - if (debug_expired) - log_debug(ls, "scan_waiters %u warn %u over %d us max %lld us", - debug_scanned, debug_expired, - dlm_config.ci_waitwarn_us, (long long)debug_maxus); -} - /* add/remove lkb from global waiters list of lkb's waiting for a reply from a remote node */ @@ -1487,7 +1456,6 @@ static int add_to_waiters(struct dlm_lkb *lkb, int mstype, int to_nodeid) lkb->lkb_wait_count++; lkb->lkb_wait_type = mstype; - lkb->lkb_wait_time = ktime_get(); lkb->lkb_wait_nodeid = to_nodeid; /* for debugging */ hold_lkb(lkb); list_add(&lkb->lkb_wait_reply, &ls->ls_waiters); @@ -1551,6 +1519,7 @@ static int _remove_from_waiters(struct dlm_lkb *lkb, int mstype, lkb->lkb_wait_type = 0; lkb->lkb_flags &= ~DLM_IFL_OVERLAP_CANCEL; lkb->lkb_wait_count--; + unhold_lkb(lkb); goto out_del; } @@ -1563,8 +1532,8 @@ static int _remove_from_waiters(struct dlm_lkb *lkb, int mstype, } log_error(ls, "remwait error %x remote %d %x msg %d flags %x no wait", - lkb->lkb_id, ms ? ms->m_header.h_nodeid : 0, lkb->lkb_remid, - mstype, lkb->lkb_flags); + lkb->lkb_id, ms ? le32_to_cpu(ms->m_header.h_nodeid) : 0, + lkb->lkb_remid, mstype, lkb->lkb_flags); return -1; out_del: @@ -1577,6 +1546,7 @@ static int _remove_from_waiters(struct dlm_lkb *lkb, int mstype, log_error(ls, "remwait error %x reply %d wait_type %d overlap", lkb->lkb_id, mstype, lkb->lkb_wait_type); lkb->lkb_wait_count--; + unhold_lkb(lkb); lkb->lkb_wait_type = 0; } @@ -1609,30 +1579,33 @@ static int remove_from_waiters_ms(struct dlm_lkb *lkb, struct dlm_message *ms) struct dlm_ls *ls = lkb->lkb_resource->res_ls; int error; - if (ms->m_flags != DLM_IFL_STUB_MS) + if (ms->m_flags != cpu_to_le32(DLM_IFL_STUB_MS)) mutex_lock(&ls->ls_waiters_mutex); - error = _remove_from_waiters(lkb, ms->m_type, ms); - if (ms->m_flags != DLM_IFL_STUB_MS) + error = _remove_from_waiters(lkb, le32_to_cpu(ms->m_type), ms); + if (ms->m_flags != cpu_to_le32(DLM_IFL_STUB_MS)) mutex_unlock(&ls->ls_waiters_mutex); return error; } /* If there's an rsb for the same resource being removed, ensure - that the remove message is sent before the new lookup message. - It should be rare to need a delay here, but if not, then it may - be worthwhile to add a proper wait mechanism rather than a delay. */ + * that the remove message is sent before the new lookup message. + */ + +#define DLM_WAIT_PENDING_COND(ls, r) \ + (ls->ls_remove_len && \ + !rsb_cmp(r, ls->ls_remove_name, \ + ls->ls_remove_len)) static void wait_pending_remove(struct dlm_rsb *r) { struct dlm_ls *ls = r->res_ls; restart: spin_lock(&ls->ls_remove_spin); - if (ls->ls_remove_len && - !rsb_cmp(r, ls->ls_remove_name, ls->ls_remove_len)) { + if (DLM_WAIT_PENDING_COND(ls, r)) { log_debug(ls, "delay lookup for remove dir %d %s", - r->res_dir_nodeid, r->res_name); + r->res_dir_nodeid, r->res_name); spin_unlock(&ls->ls_remove_spin); - msleep(1); + wait_event(ls->ls_remove_wait, !DLM_WAIT_PENDING_COND(ls, r)); goto restart; } spin_unlock(&ls->ls_remove_spin); @@ -1792,6 +1765,7 @@ static void shrink_bucket(struct dlm_ls *ls, int b) ls->ls_remove_len = 0; memset(ls->ls_remove_name, 0, DLM_RESNAME_MAXLEN); spin_unlock(&ls->ls_remove_spin); + wake_up(&ls->ls_remove_wait); dlm_free_rsb(r); } @@ -1809,6 +1783,7 @@ void dlm_scan_rsbs(struct dlm_ls *ls) } } +#ifdef CONFIG_DLM_DEPRECATED_API static void add_timeout(struct dlm_lkb *lkb) { struct dlm_ls *ls = lkb->lkb_resource->res_ls; @@ -1854,7 +1829,7 @@ static void del_timeout(struct dlm_lkb *lkb) void dlm_scan_timeout(struct dlm_ls *ls) { struct dlm_rsb *r; - struct dlm_lkb *lkb; + struct dlm_lkb *lkb = NULL, *iter; int do_cancel, do_warn; s64 wait_us; @@ -1865,27 +1840,28 @@ void dlm_scan_timeout(struct dlm_ls *ls) do_cancel = 0; do_warn = 0; mutex_lock(&ls->ls_timeout_mutex); - list_for_each_entry(lkb, &ls->ls_timeout, lkb_time_list) { + list_for_each_entry(iter, &ls->ls_timeout, lkb_time_list) { wait_us = ktime_to_us(ktime_sub(ktime_get(), - lkb->lkb_timestamp)); + iter->lkb_timestamp)); - if ((lkb->lkb_exflags & DLM_LKF_TIMEOUT) && - wait_us >= (lkb->lkb_timeout_cs * 10000)) + if ((iter->lkb_exflags & DLM_LKF_TIMEOUT) && + wait_us >= (iter->lkb_timeout_cs * 10000)) do_cancel = 1; - if ((lkb->lkb_flags & DLM_IFL_WATCH_TIMEWARN) && + if ((iter->lkb_flags & DLM_IFL_WATCH_TIMEWARN) && wait_us >= dlm_config.ci_timewarn_cs * 10000) do_warn = 1; if (!do_cancel && !do_warn) continue; - hold_lkb(lkb); + hold_lkb(iter); + lkb = iter; break; } mutex_unlock(&ls->ls_timeout_mutex); - if (!do_cancel && !do_warn) + if (!lkb) break; r = lkb->lkb_resource; @@ -1928,17 +1904,11 @@ void dlm_adjust_timeouts(struct dlm_ls *ls) list_for_each_entry(lkb, &ls->ls_timeout, lkb_time_list) lkb->lkb_timestamp = ktime_add_us(lkb->lkb_timestamp, adj_us); mutex_unlock(&ls->ls_timeout_mutex); - - if (!dlm_config.ci_waitwarn_us) - return; - - mutex_lock(&ls->ls_waiters_mutex); - list_for_each_entry(lkb, &ls->ls_waiters, lkb_wait_reply) { - if (ktime_to_us(lkb->lkb_wait_time)) - lkb->lkb_wait_time = ktime_get(); - } - mutex_unlock(&ls->ls_waiters_mutex); } +#else +static void add_timeout(struct dlm_lkb *lkb) { } +static void del_timeout(struct dlm_lkb *lkb) { } +#endif /* lkb is master or local copy */ @@ -2039,7 +2009,7 @@ static void set_lvb_lock_pc(struct dlm_rsb *r, struct dlm_lkb *lkb, if (len > r->res_ls->ls_lvblen) len = r->res_ls->ls_lvblen; memcpy(lkb->lkb_lvbptr, ms->m_extra, len); - lkb->lkb_lvbseq = ms->m_lvbseq; + lkb->lkb_lvbseq = le32_to_cpu(ms->m_lvbseq); } } @@ -2170,10 +2140,10 @@ static void munge_demoted(struct dlm_lkb *lkb) static void munge_altmode(struct dlm_lkb *lkb, struct dlm_message *ms) { - if (ms->m_type != DLM_MSG_REQUEST_REPLY && - ms->m_type != DLM_MSG_GRANT) { + if (ms->m_type != cpu_to_le32(DLM_MSG_REQUEST_REPLY) && + ms->m_type != cpu_to_le32(DLM_MSG_GRANT)) { log_print("munge_altmode %x invalid reply type %d", - lkb->lkb_id, ms->m_type); + lkb->lkb_id, le32_to_cpu(ms->m_type)); return; } @@ -2803,12 +2773,20 @@ static void confirm_master(struct dlm_rsb *r, int error) } } +#ifdef CONFIG_DLM_DEPRECATED_API static int set_lock_args(int mode, struct dlm_lksb *lksb, uint32_t flags, int namelen, unsigned long timeout_cs, void (*ast) (void *astparam), void *astparam, void (*bast) (void *astparam, int mode), struct dlm_args *args) +#else +static int set_lock_args(int mode, struct dlm_lksb *lksb, uint32_t flags, + int namelen, void (*ast)(void *astparam), + void *astparam, + void (*bast)(void *astparam, int mode), + struct dlm_args *args) +#endif { int rv = -EINVAL; @@ -2861,7 +2839,9 @@ static int set_lock_args(int mode, struct dlm_lksb *lksb, uint32_t flags, args->astfn = ast; args->astparam = astparam; args->bastfn = bast; +#ifdef CONFIG_DLM_DEPRECATED_API args->timeout = timeout_cs; +#endif args->mode = mode; args->lksb = lksb; rv = 0; @@ -2886,24 +2866,25 @@ static int set_unlock_args(uint32_t flags, void *astarg, struct dlm_args *args) static int validate_lock_args(struct dlm_ls *ls, struct dlm_lkb *lkb, struct dlm_args *args) { - int rv = -EINVAL; + int rv = -EBUSY; if (args->flags & DLM_LKF_CONVERT) { - if (lkb->lkb_flags & DLM_IFL_MSTCPY) + if (lkb->lkb_status != DLM_LKSTS_GRANTED) goto out; - if (args->flags & DLM_LKF_QUECVT && - !__quecvt_compat_matrix[lkb->lkb_grmode+1][args->mode+1]) + /* lock not allowed if there's any op in progress */ + if (lkb->lkb_wait_type || lkb->lkb_wait_count) goto out; - rv = -EBUSY; - if (lkb->lkb_status != DLM_LKSTS_GRANTED) + if (is_overlap(lkb)) goto out; - if (lkb->lkb_wait_type) + rv = -EINVAL; + if (lkb->lkb_flags & DLM_IFL_MSTCPY) goto out; - if (is_overlap(lkb)) + if (args->flags & DLM_LKF_QUECVT && + !__quecvt_compat_matrix[lkb->lkb_grmode+1][args->mode+1]) goto out; } @@ -2916,14 +2897,30 @@ static int validate_lock_args(struct dlm_ls *ls, struct dlm_lkb *lkb, lkb->lkb_lksb = args->lksb; lkb->lkb_lvbptr = args->lksb->sb_lvbptr; lkb->lkb_ownpid = (int) current->pid; +#ifdef CONFIG_DLM_DEPRECATED_API lkb->lkb_timeout_cs = args->timeout; +#endif rv = 0; out: - if (rv) - log_debug(ls, "validate_lock_args %d %x %x %x %d %d %s", + switch (rv) { + case 0: + break; + case -EINVAL: + /* annoy the user because dlm usage is wrong */ + WARN_ON(1); + log_error(ls, "%s %d %x %x %x %d %d %s", __func__, rv, lkb->lkb_id, lkb->lkb_flags, args->flags, lkb->lkb_status, lkb->lkb_wait_type, lkb->lkb_resource->res_name); + break; + default: + log_debug(ls, "%s %d %x %x %x %d %d %s", __func__, + rv, lkb->lkb_id, lkb->lkb_flags, args->flags, + lkb->lkb_status, lkb->lkb_wait_type, + lkb->lkb_resource->res_name); + break; + } + return rv; } @@ -2937,23 +2934,12 @@ static int validate_lock_args(struct dlm_ls *ls, struct dlm_lkb *lkb, static int validate_unlock_args(struct dlm_lkb *lkb, struct dlm_args *args) { struct dlm_ls *ls = lkb->lkb_resource->res_ls; - int rv = -EINVAL; - - if (lkb->lkb_flags & DLM_IFL_MSTCPY) { - log_error(ls, "unlock on MSTCPY %x", lkb->lkb_id); - dlm_print_lkb(lkb); - goto out; - } + int rv = -EBUSY; - /* an lkb may still exist even though the lock is EOL'ed due to a - cancel, unlock or failed noqueue request; an app can't use these - locks; return same error as if the lkid had not been found at all */ - - if (lkb->lkb_flags & DLM_IFL_ENDOFLIFE) { - log_debug(ls, "unlock on ENDOFLIFE %x", lkb->lkb_id); - rv = -ENOENT; + /* normal unlock not allowed if there's any op in progress */ + if (!(args->flags & (DLM_LKF_CANCEL | DLM_LKF_FORCEUNLOCK)) && + (lkb->lkb_wait_type || lkb->lkb_wait_count)) goto out; - } /* an lkb may be waiting for an rsb lookup to complete where the lookup was initiated by another lock */ @@ -2968,7 +2954,24 @@ static int validate_unlock_args(struct dlm_lkb *lkb, struct dlm_args *args) unhold_lkb(lkb); /* undoes create_lkb() */ } /* caller changes -EBUSY to 0 for CANCEL and FORCEUNLOCK */ - rv = -EBUSY; + goto out; + } + + rv = -EINVAL; + if (lkb->lkb_flags & DLM_IFL_MSTCPY) { + log_error(ls, "unlock on MSTCPY %x", lkb->lkb_id); + dlm_print_lkb(lkb); + goto out; + } + + /* an lkb may still exist even though the lock is EOL'ed due to a + * cancel, unlock or failed noqueue request; an app can't use these + * locks; return same error as if the lkid had not been found at all + */ + + if (lkb->lkb_flags & DLM_IFL_ENDOFLIFE) { + log_debug(ls, "unlock on ENDOFLIFE %x", lkb->lkb_id); + rv = -ENOENT; goto out; } @@ -3041,14 +3044,8 @@ static int validate_unlock_args(struct dlm_lkb *lkb, struct dlm_args *args) goto out; } /* add_to_waiters() will set OVERLAP_UNLOCK */ - goto out_ok; } - /* normal unlock not allowed if there's any op in progress */ - rv = -EBUSY; - if (lkb->lkb_wait_type || lkb->lkb_wait_count) - goto out; - out_ok: /* an overlapping op shouldn't blow away exflags from other op */ lkb->lkb_exflags |= args->flags; @@ -3056,11 +3053,25 @@ static int validate_unlock_args(struct dlm_lkb *lkb, struct dlm_args *args) lkb->lkb_astparam = args->astparam; rv = 0; out: - if (rv) - log_debug(ls, "validate_unlock_args %d %x %x %x %x %d %s", rv, + switch (rv) { + case 0: + break; + case -EINVAL: + /* annoy the user because dlm usage is wrong */ + WARN_ON(1); + log_error(ls, "%s %d %x %x %x %x %d %s", __func__, rv, lkb->lkb_id, lkb->lkb_flags, lkb->lkb_exflags, args->flags, lkb->lkb_wait_type, lkb->lkb_resource->res_name); + break; + default: + log_debug(ls, "%s %d %x %x %x %x %d %s", __func__, rv, + lkb->lkb_id, lkb->lkb_flags, lkb->lkb_exflags, + args->flags, lkb->lkb_wait_type, + lkb->lkb_resource->res_name); + break; + } + return rv; } @@ -3311,8 +3322,9 @@ static int _cancel_lock(struct dlm_rsb *r, struct dlm_lkb *lkb) * request_lock(), convert_lock(), unlock_lock(), cancel_lock() */ -static int request_lock(struct dlm_ls *ls, struct dlm_lkb *lkb, char *name, - int len, struct dlm_args *args) +static int request_lock(struct dlm_ls *ls, struct dlm_lkb *lkb, + const void *name, int len, + struct dlm_args *args) { struct dlm_rsb *r; int error; @@ -3411,7 +3423,7 @@ int dlm_lock(dlm_lockspace_t *lockspace, int mode, struct dlm_lksb *lksb, uint32_t flags, - void *name, + const void *name, unsigned int namelen, uint32_t parent_lkid, void (*ast) (void *astarg), @@ -3437,8 +3449,15 @@ int dlm_lock(dlm_lockspace_t *lockspace, if (error) goto out; + trace_dlm_lock_start(ls, lkb, name, namelen, mode, flags); + +#ifdef CONFIG_DLM_DEPRECATED_API error = set_lock_args(mode, lksb, flags, namelen, 0, ast, astarg, bast, &args); +#else + error = set_lock_args(mode, lksb, flags, namelen, ast, astarg, bast, + &args); +#endif if (error) goto out_put; @@ -3450,6 +3469,8 @@ int dlm_lock(dlm_lockspace_t *lockspace, if (error == -EINPROGRESS) error = 0; out_put: + trace_dlm_lock_end(ls, lkb, name, namelen, mode, flags, error, true); + if (convert || error) __put_lkb(ls, lkb); if (error == -EAGAIN || error == -EDEADLK) @@ -3481,6 +3502,8 @@ int dlm_unlock(dlm_lockspace_t *lockspace, if (error) goto out; + trace_dlm_unlock_start(ls, lkb, flags); + error = set_unlock_args(flags, astarg, &args); if (error) goto out_put; @@ -3495,6 +3518,8 @@ int dlm_unlock(dlm_lockspace_t *lockspace, if (error == -EBUSY && (flags & (DLM_LKF_CANCEL | DLM_LKF_FORCEUNLOCK))) error = 0; out_put: + trace_dlm_unlock_end(ls, lkb, flags, error); + dlm_put_lkb(lkb); out: dlm_unlock_recovery(ls); @@ -3543,13 +3568,13 @@ static int _create_message(struct dlm_ls *ls, int mb_len, ms = (struct dlm_message *) mb; - ms->m_header.h_version = (DLM_HEADER_MAJOR | DLM_HEADER_MINOR); - ms->m_header.u.h_lockspace = ls->ls_global_id; - ms->m_header.h_nodeid = dlm_our_nodeid(); - ms->m_header.h_length = mb_len; + ms->m_header.h_version = cpu_to_le32(DLM_HEADER_MAJOR | DLM_HEADER_MINOR); + ms->m_header.u.h_lockspace = cpu_to_le32(ls->ls_global_id); + ms->m_header.h_nodeid = cpu_to_le32(dlm_our_nodeid()); + ms->m_header.h_length = cpu_to_le16(mb_len); ms->m_header.h_cmd = DLM_MSG; - ms->m_type = mstype; + ms->m_type = cpu_to_le32(mstype); *mh_ret = mh; *ms_ret = ms; @@ -3588,7 +3613,6 @@ static int create_message(struct dlm_rsb *r, struct dlm_lkb *lkb, static int send_message(struct dlm_mhandle *mh, struct dlm_message *ms) { - dlm_message_out(ms); dlm_midcomms_commit_mhandle(mh); return 0; } @@ -3596,41 +3620,41 @@ static int send_message(struct dlm_mhandle *mh, struct dlm_message *ms) static void send_args(struct dlm_rsb *r, struct dlm_lkb *lkb, struct dlm_message *ms) { - ms->m_nodeid = lkb->lkb_nodeid; - ms->m_pid = lkb->lkb_ownpid; - ms->m_lkid = lkb->lkb_id; - ms->m_remid = lkb->lkb_remid; - ms->m_exflags = lkb->lkb_exflags; - ms->m_sbflags = lkb->lkb_sbflags; - ms->m_flags = lkb->lkb_flags; - ms->m_lvbseq = lkb->lkb_lvbseq; - ms->m_status = lkb->lkb_status; - ms->m_grmode = lkb->lkb_grmode; - ms->m_rqmode = lkb->lkb_rqmode; - ms->m_hash = r->res_hash; + ms->m_nodeid = cpu_to_le32(lkb->lkb_nodeid); + ms->m_pid = cpu_to_le32(lkb->lkb_ownpid); + ms->m_lkid = cpu_to_le32(lkb->lkb_id); + ms->m_remid = cpu_to_le32(lkb->lkb_remid); + ms->m_exflags = cpu_to_le32(lkb->lkb_exflags); + ms->m_sbflags = cpu_to_le32(lkb->lkb_sbflags); + ms->m_flags = cpu_to_le32(lkb->lkb_flags); + ms->m_lvbseq = cpu_to_le32(lkb->lkb_lvbseq); + ms->m_status = cpu_to_le32(lkb->lkb_status); + ms->m_grmode = cpu_to_le32(lkb->lkb_grmode); + ms->m_rqmode = cpu_to_le32(lkb->lkb_rqmode); + ms->m_hash = cpu_to_le32(r->res_hash); /* m_result and m_bastmode are set from function args, not from lkb fields */ if (lkb->lkb_bastfn) - ms->m_asts |= DLM_CB_BAST; + ms->m_asts |= cpu_to_le32(DLM_CB_BAST); if (lkb->lkb_astfn) - ms->m_asts |= DLM_CB_CAST; + ms->m_asts |= cpu_to_le32(DLM_CB_CAST); /* compare with switch in create_message; send_remove() doesn't use send_args() */ switch (ms->m_type) { - case DLM_MSG_REQUEST: - case DLM_MSG_LOOKUP: + case cpu_to_le32(DLM_MSG_REQUEST): + case cpu_to_le32(DLM_MSG_LOOKUP): memcpy(ms->m_extra, r->res_name, r->res_length); break; - case DLM_MSG_CONVERT: - case DLM_MSG_UNLOCK: - case DLM_MSG_REQUEST_REPLY: - case DLM_MSG_CONVERT_REPLY: - case DLM_MSG_GRANT: - if (!lkb->lkb_lvbptr) + case cpu_to_le32(DLM_MSG_CONVERT): + case cpu_to_le32(DLM_MSG_UNLOCK): + case cpu_to_le32(DLM_MSG_REQUEST_REPLY): + case cpu_to_le32(DLM_MSG_CONVERT_REPLY): + case cpu_to_le32(DLM_MSG_GRANT): + if (!lkb->lkb_lvbptr || !(lkb->lkb_exflags & DLM_LKF_VALBLK)) break; memcpy(ms->m_extra, lkb->lkb_lvbptr, r->res_ls->ls_lvblen); break; @@ -3679,8 +3703,8 @@ static int send_convert(struct dlm_rsb *r, struct dlm_lkb *lkb) /* down conversions go without a reply from the master */ if (!error && down_conversion(lkb)) { remove_from_waiters(lkb, DLM_MSG_CONVERT_REPLY); - r->res_ls->ls_stub_ms.m_flags = DLM_IFL_STUB_MS; - r->res_ls->ls_stub_ms.m_type = DLM_MSG_CONVERT_REPLY; + r->res_ls->ls_stub_ms.m_flags = cpu_to_le32(DLM_IFL_STUB_MS); + r->res_ls->ls_stub_ms.m_type = cpu_to_le32(DLM_MSG_CONVERT_REPLY); r->res_ls->ls_stub_ms.m_result = 0; __receive_convert_reply(r, lkb, &r->res_ls->ls_stub_ms); } @@ -3737,7 +3761,7 @@ static int send_bast(struct dlm_rsb *r, struct dlm_lkb *lkb, int mode) send_args(r, lkb, ms); - ms->m_bastmode = mode; + ms->m_bastmode = cpu_to_le32(mode); error = send_message(mh, ms); out: @@ -3785,7 +3809,7 @@ static int send_remove(struct dlm_rsb *r) goto out; memcpy(ms->m_extra, r->res_name, r->res_length); - ms->m_hash = r->res_hash; + ms->m_hash = cpu_to_le32(r->res_hash); error = send_message(mh, ms); out: @@ -3807,7 +3831,7 @@ static int send_common_reply(struct dlm_rsb *r, struct dlm_lkb *lkb, send_args(r, lkb, ms); - ms->m_result = rv; + ms->m_result = cpu_to_le32(to_dlm_errno(rv)); error = send_message(mh, ms); out: @@ -3840,15 +3864,15 @@ static int send_lookup_reply(struct dlm_ls *ls, struct dlm_message *ms_in, struct dlm_rsb *r = &ls->ls_stub_rsb; struct dlm_message *ms; struct dlm_mhandle *mh; - int error, nodeid = ms_in->m_header.h_nodeid; + int error, nodeid = le32_to_cpu(ms_in->m_header.h_nodeid); error = create_message(r, NULL, nodeid, DLM_MSG_LOOKUP_REPLY, &ms, &mh); if (error) goto out; ms->m_lkid = ms_in->m_lkid; - ms->m_result = rv; - ms->m_nodeid = ret_nodeid; + ms->m_result = cpu_to_le32(to_dlm_errno(rv)); + ms->m_nodeid = cpu_to_le32(ret_nodeid); error = send_message(mh, ms); out: @@ -3861,25 +3885,26 @@ static int send_lookup_reply(struct dlm_ls *ls, struct dlm_message *ms_in, static void receive_flags(struct dlm_lkb *lkb, struct dlm_message *ms) { - lkb->lkb_exflags = ms->m_exflags; - lkb->lkb_sbflags = ms->m_sbflags; + lkb->lkb_exflags = le32_to_cpu(ms->m_exflags); + lkb->lkb_sbflags = le32_to_cpu(ms->m_sbflags); lkb->lkb_flags = (lkb->lkb_flags & 0xFFFF0000) | - (ms->m_flags & 0x0000FFFF); + (le32_to_cpu(ms->m_flags) & 0x0000FFFF); } static void receive_flags_reply(struct dlm_lkb *lkb, struct dlm_message *ms) { - if (ms->m_flags == DLM_IFL_STUB_MS) + if (ms->m_flags == cpu_to_le32(DLM_IFL_STUB_MS)) return; - lkb->lkb_sbflags = ms->m_sbflags; + lkb->lkb_sbflags = le32_to_cpu(ms->m_sbflags); lkb->lkb_flags = (lkb->lkb_flags & 0xFFFF0000) | - (ms->m_flags & 0x0000FFFF); + (le32_to_cpu(ms->m_flags) & 0x0000FFFF); } static int receive_extralen(struct dlm_message *ms) { - return (ms->m_header.h_length - sizeof(struct dlm_message)); + return (le16_to_cpu(ms->m_header.h_length) - + sizeof(struct dlm_message)); } static int receive_lvb(struct dlm_ls *ls, struct dlm_lkb *lkb, @@ -3913,14 +3938,14 @@ static void fake_astfn(void *astparam) static int receive_request_args(struct dlm_ls *ls, struct dlm_lkb *lkb, struct dlm_message *ms) { - lkb->lkb_nodeid = ms->m_header.h_nodeid; - lkb->lkb_ownpid = ms->m_pid; - lkb->lkb_remid = ms->m_lkid; + lkb->lkb_nodeid = le32_to_cpu(ms->m_header.h_nodeid); + lkb->lkb_ownpid = le32_to_cpu(ms->m_pid); + lkb->lkb_remid = le32_to_cpu(ms->m_lkid); lkb->lkb_grmode = DLM_LOCK_IV; - lkb->lkb_rqmode = ms->m_rqmode; + lkb->lkb_rqmode = le32_to_cpu(ms->m_rqmode); - lkb->lkb_bastfn = (ms->m_asts & DLM_CB_BAST) ? &fake_bastfn : NULL; - lkb->lkb_astfn = (ms->m_asts & DLM_CB_CAST) ? &fake_astfn : NULL; + lkb->lkb_bastfn = (ms->m_asts & cpu_to_le32(DLM_CB_BAST)) ? &fake_bastfn : NULL; + lkb->lkb_astfn = (ms->m_asts & cpu_to_le32(DLM_CB_CAST)) ? &fake_astfn : NULL; if (lkb->lkb_exflags & DLM_LKF_VALBLK) { /* lkb was just created so there won't be an lvb yet */ @@ -3941,8 +3966,8 @@ static int receive_convert_args(struct dlm_ls *ls, struct dlm_lkb *lkb, if (receive_lvb(ls, lkb, ms)) return -ENOMEM; - lkb->lkb_rqmode = ms->m_rqmode; - lkb->lkb_lvbseq = ms->m_lvbseq; + lkb->lkb_rqmode = le32_to_cpu(ms->m_rqmode); + lkb->lkb_lvbseq = le32_to_cpu(ms->m_lvbseq); return 0; } @@ -3961,8 +3986,8 @@ static int receive_unlock_args(struct dlm_ls *ls, struct dlm_lkb *lkb, static void setup_stub_lkb(struct dlm_ls *ls, struct dlm_message *ms) { struct dlm_lkb *lkb = &ls->ls_stub_lkb; - lkb->lkb_nodeid = ms->m_header.h_nodeid; - lkb->lkb_remid = ms->m_lkid; + lkb->lkb_nodeid = le32_to_cpu(ms->m_header.h_nodeid); + lkb->lkb_remid = le32_to_cpu(ms->m_lkid); } /* This is called after the rsb is locked so that we can safely inspect @@ -3970,27 +3995,36 @@ static void setup_stub_lkb(struct dlm_ls *ls, struct dlm_message *ms) static int validate_message(struct dlm_lkb *lkb, struct dlm_message *ms) { - int from = ms->m_header.h_nodeid; + int from = le32_to_cpu(ms->m_header.h_nodeid); int error = 0; + /* currently mixing of user/kernel locks are not supported */ + if (ms->m_flags & cpu_to_le32(DLM_IFL_USER) && + ~lkb->lkb_flags & DLM_IFL_USER) { + log_error(lkb->lkb_resource->res_ls, + "got user dlm message for a kernel lock"); + error = -EINVAL; + goto out; + } + switch (ms->m_type) { - case DLM_MSG_CONVERT: - case DLM_MSG_UNLOCK: - case DLM_MSG_CANCEL: + case cpu_to_le32(DLM_MSG_CONVERT): + case cpu_to_le32(DLM_MSG_UNLOCK): + case cpu_to_le32(DLM_MSG_CANCEL): if (!is_master_copy(lkb) || lkb->lkb_nodeid != from) error = -EINVAL; break; - case DLM_MSG_CONVERT_REPLY: - case DLM_MSG_UNLOCK_REPLY: - case DLM_MSG_CANCEL_REPLY: - case DLM_MSG_GRANT: - case DLM_MSG_BAST: + case cpu_to_le32(DLM_MSG_CONVERT_REPLY): + case cpu_to_le32(DLM_MSG_UNLOCK_REPLY): + case cpu_to_le32(DLM_MSG_CANCEL_REPLY): + case cpu_to_le32(DLM_MSG_GRANT): + case cpu_to_le32(DLM_MSG_BAST): if (!is_process_copy(lkb) || lkb->lkb_nodeid != from) error = -EINVAL; break; - case DLM_MSG_REQUEST_REPLY: + case cpu_to_le32(DLM_MSG_REQUEST_REPLY): if (!is_process_copy(lkb)) error = -EINVAL; else if (lkb->lkb_nodeid != -1 && lkb->lkb_nodeid != from) @@ -4001,11 +4035,12 @@ static int validate_message(struct dlm_lkb *lkb, struct dlm_message *ms) error = -EINVAL; } +out: if (error) log_error(lkb->lkb_resource->res_ls, "ignore invalid message %d from %d %x %x %x %d", - ms->m_type, from, lkb->lkb_id, lkb->lkb_remid, - lkb->lkb_flags, lkb->lkb_nodeid); + le32_to_cpu(ms->m_type), from, lkb->lkb_id, + lkb->lkb_remid, lkb->lkb_flags, lkb->lkb_nodeid); return error; } @@ -4054,17 +4089,19 @@ static void send_repeat_remove(struct dlm_ls *ls, char *ms_name, int len) rv = _create_message(ls, sizeof(struct dlm_message) + len, dir_nodeid, DLM_MSG_REMOVE, &ms, &mh); if (rv) - return; + goto out; memcpy(ms->m_extra, name, len); - ms->m_hash = hash; + ms->m_hash = cpu_to_le32(hash); send_message(mh, ms); +out: spin_lock(&ls->ls_remove_spin); ls->ls_remove_len = 0; memset(ls->ls_remove_name, 0, DLM_RESNAME_MAXLEN); spin_unlock(&ls->ls_remove_spin); + wake_up(&ls->ls_remove_wait); } static int receive_request(struct dlm_ls *ls, struct dlm_message *ms) @@ -4074,7 +4111,7 @@ static int receive_request(struct dlm_ls *ls, struct dlm_message *ms) int from_nodeid; int error, namelen = 0; - from_nodeid = ms->m_header.h_nodeid; + from_nodeid = le32_to_cpu(ms->m_header.h_nodeid); error = create_lkb(ls, &lkb); if (error) @@ -4147,7 +4184,7 @@ static int receive_request(struct dlm_ls *ls, struct dlm_message *ms) if (error != -ENOTBLK) { log_limit(ls, "receive_request %x from %d %d", - ms->m_lkid, from_nodeid, error); + le32_to_cpu(ms->m_lkid), from_nodeid, error); } if (namelen && error == -EBADR) { @@ -4166,15 +4203,16 @@ static int receive_convert(struct dlm_ls *ls, struct dlm_message *ms) struct dlm_rsb *r; int error, reply = 1; - error = find_lkb(ls, ms->m_remid, &lkb); + error = find_lkb(ls, le32_to_cpu(ms->m_remid), &lkb); if (error) goto fail; - if (lkb->lkb_remid != ms->m_lkid) { + if (lkb->lkb_remid != le32_to_cpu(ms->m_lkid)) { log_error(ls, "receive_convert %x remid %x recover_seq %llu " "remote %d %x", lkb->lkb_id, lkb->lkb_remid, (unsigned long long)lkb->lkb_recover_seq, - ms->m_header.h_nodeid, ms->m_lkid); + le32_to_cpu(ms->m_header.h_nodeid), + le32_to_cpu(ms->m_lkid)); error = -ENOENT; dlm_put_lkb(lkb); goto fail; @@ -4221,14 +4259,15 @@ static int receive_unlock(struct dlm_ls *ls, struct dlm_message *ms) struct dlm_rsb *r; int error; - error = find_lkb(ls, ms->m_remid, &lkb); + error = find_lkb(ls, le32_to_cpu(ms->m_remid), &lkb); if (error) goto fail; - if (lkb->lkb_remid != ms->m_lkid) { + if (lkb->lkb_remid != le32_to_cpu(ms->m_lkid)) { log_error(ls, "receive_unlock %x remid %x remote %d %x", lkb->lkb_id, lkb->lkb_remid, - ms->m_header.h_nodeid, ms->m_lkid); + le32_to_cpu(ms->m_header.h_nodeid), + le32_to_cpu(ms->m_lkid)); error = -ENOENT; dlm_put_lkb(lkb); goto fail; @@ -4272,7 +4311,7 @@ static int receive_cancel(struct dlm_ls *ls, struct dlm_message *ms) struct dlm_rsb *r; int error; - error = find_lkb(ls, ms->m_remid, &lkb); + error = find_lkb(ls, le32_to_cpu(ms->m_remid), &lkb); if (error) goto fail; @@ -4308,7 +4347,7 @@ static int receive_grant(struct dlm_ls *ls, struct dlm_message *ms) struct dlm_rsb *r; int error; - error = find_lkb(ls, ms->m_remid, &lkb); + error = find_lkb(ls, le32_to_cpu(ms->m_remid), &lkb); if (error) return error; @@ -4339,7 +4378,7 @@ static int receive_bast(struct dlm_ls *ls, struct dlm_message *ms) struct dlm_rsb *r; int error; - error = find_lkb(ls, ms->m_remid, &lkb); + error = find_lkb(ls, le32_to_cpu(ms->m_remid), &lkb); if (error) return error; @@ -4352,8 +4391,8 @@ static int receive_bast(struct dlm_ls *ls, struct dlm_message *ms) if (error) goto out; - queue_bast(r, lkb, ms->m_bastmode); - lkb->lkb_highbast = ms->m_bastmode; + queue_bast(r, lkb, le32_to_cpu(ms->m_bastmode)); + lkb->lkb_highbast = le32_to_cpu(ms->m_bastmode); out: unlock_rsb(r); put_rsb(r); @@ -4365,7 +4404,7 @@ static void receive_lookup(struct dlm_ls *ls, struct dlm_message *ms) { int len, error, ret_nodeid, from_nodeid, our_nodeid; - from_nodeid = ms->m_header.h_nodeid; + from_nodeid = le32_to_cpu(ms->m_header.h_nodeid); our_nodeid = dlm_our_nodeid(); len = receive_extralen(ms); @@ -4388,7 +4427,7 @@ static void receive_remove(struct dlm_ls *ls, struct dlm_message *ms) uint32_t hash, b; int rv, len, dir_nodeid, from_nodeid; - from_nodeid = ms->m_header.h_nodeid; + from_nodeid = le32_to_cpu(ms->m_header.h_nodeid); len = receive_extralen(ms); @@ -4398,7 +4437,7 @@ static void receive_remove(struct dlm_ls *ls, struct dlm_message *ms) return; } - dir_nodeid = dlm_hash2nodeid(ls, ms->m_hash); + dir_nodeid = dlm_hash2nodeid(ls, le32_to_cpu(ms->m_hash)); if (dir_nodeid != dlm_our_nodeid()) { log_error(ls, "receive_remove from %d bad nodeid %d", from_nodeid, dir_nodeid); @@ -4471,7 +4510,7 @@ static void receive_remove(struct dlm_ls *ls, struct dlm_message *ms) static void receive_purge(struct dlm_ls *ls, struct dlm_message *ms) { - do_purge(ls, ms->m_nodeid, ms->m_pid); + do_purge(ls, le32_to_cpu(ms->m_nodeid), le32_to_cpu(ms->m_pid)); } static int receive_request_reply(struct dlm_ls *ls, struct dlm_message *ms) @@ -4479,9 +4518,9 @@ static int receive_request_reply(struct dlm_ls *ls, struct dlm_message *ms) struct dlm_lkb *lkb; struct dlm_rsb *r; int error, mstype, result; - int from_nodeid = ms->m_header.h_nodeid; + int from_nodeid = le32_to_cpu(ms->m_header.h_nodeid); - error = find_lkb(ls, ms->m_remid, &lkb); + error = find_lkb(ls, le32_to_cpu(ms->m_remid), &lkb); if (error) return error; @@ -4497,7 +4536,8 @@ static int receive_request_reply(struct dlm_ls *ls, struct dlm_message *ms) error = remove_from_waiters(lkb, DLM_MSG_REQUEST_REPLY); if (error) { log_error(ls, "receive_request_reply %x remote %d %x result %d", - lkb->lkb_id, from_nodeid, ms->m_lkid, ms->m_result); + lkb->lkb_id, from_nodeid, le32_to_cpu(ms->m_lkid), + from_dlm_errno(le32_to_cpu(ms->m_result))); dlm_dump_rsb(r); goto out; } @@ -4511,7 +4551,7 @@ static int receive_request_reply(struct dlm_ls *ls, struct dlm_message *ms) } /* this is the value returned from do_request() on the master */ - result = ms->m_result; + result = from_dlm_errno(le32_to_cpu(ms->m_result)); switch (result) { case -EAGAIN: @@ -4525,7 +4565,7 @@ static int receive_request_reply(struct dlm_ls *ls, struct dlm_message *ms) case 0: /* request was queued or granted on remote master */ receive_flags_reply(lkb, ms); - lkb->lkb_remid = ms->m_lkid; + lkb->lkb_remid = le32_to_cpu(ms->m_lkid); if (is_altmode(lkb)) munge_altmode(lkb, ms); if (result) { @@ -4598,7 +4638,7 @@ static void __receive_convert_reply(struct dlm_rsb *r, struct dlm_lkb *lkb, struct dlm_message *ms) { /* this is the value returned from do_convert() on the master */ - switch (ms->m_result) { + switch (from_dlm_errno(le32_to_cpu(ms->m_result))) { case -EAGAIN: /* convert would block (be queued) on remote master */ queue_cast(r, lkb, -EAGAIN); @@ -4631,8 +4671,9 @@ static void __receive_convert_reply(struct dlm_rsb *r, struct dlm_lkb *lkb, default: log_error(r->res_ls, "receive_convert_reply %x remote %d %x %d", - lkb->lkb_id, ms->m_header.h_nodeid, ms->m_lkid, - ms->m_result); + lkb->lkb_id, le32_to_cpu(ms->m_header.h_nodeid), + le32_to_cpu(ms->m_lkid), + from_dlm_errno(le32_to_cpu(ms->m_result))); dlm_print_rsb(r); dlm_print_lkb(lkb); } @@ -4666,7 +4707,7 @@ static int receive_convert_reply(struct dlm_ls *ls, struct dlm_message *ms) struct dlm_lkb *lkb; int error; - error = find_lkb(ls, ms->m_remid, &lkb); + error = find_lkb(ls, le32_to_cpu(ms->m_remid), &lkb); if (error) return error; @@ -4694,7 +4735,7 @@ static void _receive_unlock_reply(struct dlm_lkb *lkb, struct dlm_message *ms) /* this is the value returned from do_unlock() on the master */ - switch (ms->m_result) { + switch (from_dlm_errno(le32_to_cpu(ms->m_result))) { case -DLM_EUNLOCK: receive_flags_reply(lkb, ms); remove_lock_pc(r, lkb); @@ -4704,7 +4745,7 @@ static void _receive_unlock_reply(struct dlm_lkb *lkb, struct dlm_message *ms) break; default: log_error(r->res_ls, "receive_unlock_reply %x error %d", - lkb->lkb_id, ms->m_result); + lkb->lkb_id, from_dlm_errno(le32_to_cpu(ms->m_result))); } out: unlock_rsb(r); @@ -4716,7 +4757,7 @@ static int receive_unlock_reply(struct dlm_ls *ls, struct dlm_message *ms) struct dlm_lkb *lkb; int error; - error = find_lkb(ls, ms->m_remid, &lkb); + error = find_lkb(ls, le32_to_cpu(ms->m_remid), &lkb); if (error) return error; @@ -4744,7 +4785,7 @@ static void _receive_cancel_reply(struct dlm_lkb *lkb, struct dlm_message *ms) /* this is the value returned from do_cancel() on the master */ - switch (ms->m_result) { + switch (from_dlm_errno(le32_to_cpu(ms->m_result))) { case -DLM_ECANCEL: receive_flags_reply(lkb, ms); revert_lock_pc(r, lkb); @@ -4754,7 +4795,8 @@ static void _receive_cancel_reply(struct dlm_lkb *lkb, struct dlm_message *ms) break; default: log_error(r->res_ls, "receive_cancel_reply %x error %d", - lkb->lkb_id, ms->m_result); + lkb->lkb_id, + from_dlm_errno(le32_to_cpu(ms->m_result))); } out: unlock_rsb(r); @@ -4766,7 +4808,7 @@ static int receive_cancel_reply(struct dlm_ls *ls, struct dlm_message *ms) struct dlm_lkb *lkb; int error; - error = find_lkb(ls, ms->m_remid, &lkb); + error = find_lkb(ls, le32_to_cpu(ms->m_remid), &lkb); if (error) return error; @@ -4782,9 +4824,10 @@ static void receive_lookup_reply(struct dlm_ls *ls, struct dlm_message *ms) int error, ret_nodeid; int do_lookup_list = 0; - error = find_lkb(ls, ms->m_lkid, &lkb); + error = find_lkb(ls, le32_to_cpu(ms->m_lkid), &lkb); if (error) { - log_error(ls, "receive_lookup_reply no lkid %x", ms->m_lkid); + log_error(ls, "%s no lkid %x", __func__, + le32_to_cpu(ms->m_lkid)); return; } @@ -4799,7 +4842,7 @@ static void receive_lookup_reply(struct dlm_ls *ls, struct dlm_message *ms) if (error) goto out; - ret_nodeid = ms->m_nodeid; + ret_nodeid = le32_to_cpu(ms->m_nodeid); /* We sometimes receive a request from the dir node for this rsb before we've received the dir node's loookup_reply for it. @@ -4811,8 +4854,8 @@ static void receive_lookup_reply(struct dlm_ls *ls, struct dlm_message *ms) /* This should never happen */ log_error(ls, "receive_lookup_reply %x from %d ret %d " "master %d dir %d our %d first %x %s", - lkb->lkb_id, ms->m_header.h_nodeid, ret_nodeid, - r->res_master_nodeid, r->res_dir_nodeid, + lkb->lkb_id, le32_to_cpu(ms->m_header.h_nodeid), + ret_nodeid, r->res_master_nodeid, r->res_dir_nodeid, dlm_our_nodeid(), r->res_first_lkid, r->res_name); } @@ -4824,7 +4867,7 @@ static void receive_lookup_reply(struct dlm_ls *ls, struct dlm_message *ms) } else if (ret_nodeid == -1) { /* the remote node doesn't believe it's the dir node */ log_error(ls, "receive_lookup_reply %x from %d bad ret_nodeid", - lkb->lkb_id, ms->m_header.h_nodeid); + lkb->lkb_id, le32_to_cpu(ms->m_header.h_nodeid)); r->res_master_nodeid = 0; r->res_nodeid = -1; lkb->lkb_nodeid = -1; @@ -4858,10 +4901,12 @@ static void _receive_message(struct dlm_ls *ls, struct dlm_message *ms, { int error = 0, noent = 0; - if (!dlm_is_member(ls, ms->m_header.h_nodeid)) { + if (!dlm_is_member(ls, le32_to_cpu(ms->m_header.h_nodeid))) { log_limit(ls, "receive %d from non-member %d %x %x %d", - ms->m_type, ms->m_header.h_nodeid, ms->m_lkid, - ms->m_remid, ms->m_result); + le32_to_cpu(ms->m_type), + le32_to_cpu(ms->m_header.h_nodeid), + le32_to_cpu(ms->m_lkid), le32_to_cpu(ms->m_remid), + from_dlm_errno(le32_to_cpu(ms->m_result))); return; } @@ -4869,77 +4914,78 @@ static void _receive_message(struct dlm_ls *ls, struct dlm_message *ms, /* messages sent to a master node */ - case DLM_MSG_REQUEST: + case cpu_to_le32(DLM_MSG_REQUEST): error = receive_request(ls, ms); break; - case DLM_MSG_CONVERT: + case cpu_to_le32(DLM_MSG_CONVERT): error = receive_convert(ls, ms); break; - case DLM_MSG_UNLOCK: + case cpu_to_le32(DLM_MSG_UNLOCK): error = receive_unlock(ls, ms); break; - case DLM_MSG_CANCEL: + case cpu_to_le32(DLM_MSG_CANCEL): noent = 1; error = receive_cancel(ls, ms); break; /* messages sent from a master node (replies to above) */ - case DLM_MSG_REQUEST_REPLY: + case cpu_to_le32(DLM_MSG_REQUEST_REPLY): error = receive_request_reply(ls, ms); break; - case DLM_MSG_CONVERT_REPLY: + case cpu_to_le32(DLM_MSG_CONVERT_REPLY): error = receive_convert_reply(ls, ms); break; - case DLM_MSG_UNLOCK_REPLY: + case cpu_to_le32(DLM_MSG_UNLOCK_REPLY): error = receive_unlock_reply(ls, ms); break; - case DLM_MSG_CANCEL_REPLY: + case cpu_to_le32(DLM_MSG_CANCEL_REPLY): error = receive_cancel_reply(ls, ms); break; /* messages sent from a master node (only two types of async msg) */ - case DLM_MSG_GRANT: + case cpu_to_le32(DLM_MSG_GRANT): noent = 1; error = receive_grant(ls, ms); break; - case DLM_MSG_BAST: + case cpu_to_le32(DLM_MSG_BAST): noent = 1; error = receive_bast(ls, ms); break; /* messages sent to a dir node */ - case DLM_MSG_LOOKUP: + case cpu_to_le32(DLM_MSG_LOOKUP): receive_lookup(ls, ms); break; - case DLM_MSG_REMOVE: + case cpu_to_le32(DLM_MSG_REMOVE): receive_remove(ls, ms); break; /* messages sent from a dir node (remove has no reply) */ - case DLM_MSG_LOOKUP_REPLY: + case cpu_to_le32(DLM_MSG_LOOKUP_REPLY): receive_lookup_reply(ls, ms); break; /* other messages */ - case DLM_MSG_PURGE: + case cpu_to_le32(DLM_MSG_PURGE): receive_purge(ls, ms); break; default: - log_error(ls, "unknown message type %d", ms->m_type); + log_error(ls, "unknown message type %d", + le32_to_cpu(ms->m_type)); } /* @@ -4955,22 +5001,26 @@ static void _receive_message(struct dlm_ls *ls, struct dlm_message *ms, if (error == -ENOENT && noent) { log_debug(ls, "receive %d no %x remote %d %x saved_seq %u", - ms->m_type, ms->m_remid, ms->m_header.h_nodeid, - ms->m_lkid, saved_seq); + le32_to_cpu(ms->m_type), le32_to_cpu(ms->m_remid), + le32_to_cpu(ms->m_header.h_nodeid), + le32_to_cpu(ms->m_lkid), saved_seq); } else if (error == -ENOENT) { log_error(ls, "receive %d no %x remote %d %x saved_seq %u", - ms->m_type, ms->m_remid, ms->m_header.h_nodeid, - ms->m_lkid, saved_seq); + le32_to_cpu(ms->m_type), le32_to_cpu(ms->m_remid), + le32_to_cpu(ms->m_header.h_nodeid), + le32_to_cpu(ms->m_lkid), saved_seq); - if (ms->m_type == DLM_MSG_CONVERT) - dlm_dump_rsb_hash(ls, ms->m_hash); + if (ms->m_type == cpu_to_le32(DLM_MSG_CONVERT)) + dlm_dump_rsb_hash(ls, le32_to_cpu(ms->m_hash)); } if (error == -EINVAL) { log_error(ls, "receive %d inval from %d lkid %x remid %x " "saved_seq %u", - ms->m_type, ms->m_header.h_nodeid, - ms->m_lkid, ms->m_remid, saved_seq); + le32_to_cpu(ms->m_type), + le32_to_cpu(ms->m_header.h_nodeid), + le32_to_cpu(ms->m_lkid), le32_to_cpu(ms->m_remid), + saved_seq); } } @@ -4991,7 +5041,7 @@ static void dlm_receive_message(struct dlm_ls *ls, struct dlm_message *ms, lockspace generation before we left. */ if (!ls->ls_generation) { log_limit(ls, "receive %d from %d ignore old gen", - ms->m_type, nodeid); + le32_to_cpu(ms->m_type), nodeid); return; } @@ -5024,30 +5074,30 @@ void dlm_receive_buffer(union dlm_packet *p, int nodeid) switch (hd->h_cmd) { case DLM_MSG: - dlm_message_in(&p->message); - type = p->message.m_type; + type = le32_to_cpu(p->message.m_type); break; case DLM_RCOM: - dlm_rcom_in(&p->rcom); - type = p->rcom.rc_type; + type = le32_to_cpu(p->rcom.rc_type); break; default: log_print("invalid h_cmd %d from %u", hd->h_cmd, nodeid); return; } - if (hd->h_nodeid != nodeid) { + if (le32_to_cpu(hd->h_nodeid) != nodeid) { log_print("invalid h_nodeid %d from %d lockspace %x", - hd->h_nodeid, nodeid, hd->u.h_lockspace); + le32_to_cpu(hd->h_nodeid), nodeid, + le32_to_cpu(hd->u.h_lockspace)); return; } - ls = dlm_find_lockspace_global(hd->u.h_lockspace); + ls = dlm_find_lockspace_global(le32_to_cpu(hd->u.h_lockspace)); if (!ls) { if (dlm_config.ci_log_debug) { printk_ratelimited(KERN_DEBUG "dlm: invalid lockspace " "%u from %d cmd %d type %d\n", - hd->u.h_lockspace, nodeid, hd->h_cmd, type); + le32_to_cpu(hd->u.h_lockspace), nodeid, + hd->h_cmd, type); } if (hd->h_cmd == DLM_RCOM && type == DLM_RCOM_STATUS) @@ -5061,8 +5111,11 @@ void dlm_receive_buffer(union dlm_packet *p, int nodeid) down_read(&ls->ls_recv_active); if (hd->h_cmd == DLM_MSG) dlm_receive_message(ls, &p->message, nodeid); - else + else if (hd->h_cmd == DLM_RCOM) dlm_receive_rcom(ls, &p->rcom, nodeid); + else + log_error(ls, "invalid h_cmd %d from %d lockspace %x", + hd->h_cmd, nodeid, le32_to_cpu(hd->u.h_lockspace)); up_read(&ls->ls_recv_active); dlm_put_lockspace(ls); @@ -5074,10 +5127,10 @@ static void recover_convert_waiter(struct dlm_ls *ls, struct dlm_lkb *lkb, if (middle_conversion(lkb)) { hold_lkb(lkb); memset(ms_stub, 0, sizeof(struct dlm_message)); - ms_stub->m_flags = DLM_IFL_STUB_MS; - ms_stub->m_type = DLM_MSG_CONVERT_REPLY; - ms_stub->m_result = -EINPROGRESS; - ms_stub->m_header.h_nodeid = lkb->lkb_nodeid; + ms_stub->m_flags = cpu_to_le32(DLM_IFL_STUB_MS); + ms_stub->m_type = cpu_to_le32(DLM_MSG_CONVERT_REPLY); + ms_stub->m_result = cpu_to_le32(to_dlm_errno(-EINPROGRESS)); + ms_stub->m_header.h_nodeid = cpu_to_le32(lkb->lkb_nodeid); _receive_convert_reply(lkb, ms_stub); /* Same special case as in receive_rcom_lock_args() */ @@ -5196,10 +5249,10 @@ void dlm_recover_waiters_pre(struct dlm_ls *ls) case DLM_MSG_UNLOCK: hold_lkb(lkb); memset(ms_stub, 0, sizeof(struct dlm_message)); - ms_stub->m_flags = DLM_IFL_STUB_MS; - ms_stub->m_type = DLM_MSG_UNLOCK_REPLY; - ms_stub->m_result = stub_unlock_result; - ms_stub->m_header.h_nodeid = lkb->lkb_nodeid; + ms_stub->m_flags = cpu_to_le32(DLM_IFL_STUB_MS); + ms_stub->m_type = cpu_to_le32(DLM_MSG_UNLOCK_REPLY); + ms_stub->m_result = cpu_to_le32(to_dlm_errno(stub_unlock_result)); + ms_stub->m_header.h_nodeid = cpu_to_le32(lkb->lkb_nodeid); _receive_unlock_reply(lkb, ms_stub); dlm_put_lkb(lkb); break; @@ -5207,10 +5260,10 @@ void dlm_recover_waiters_pre(struct dlm_ls *ls) case DLM_MSG_CANCEL: hold_lkb(lkb); memset(ms_stub, 0, sizeof(struct dlm_message)); - ms_stub->m_flags = DLM_IFL_STUB_MS; - ms_stub->m_type = DLM_MSG_CANCEL_REPLY; - ms_stub->m_result = stub_cancel_result; - ms_stub->m_header.h_nodeid = lkb->lkb_nodeid; + ms_stub->m_flags = cpu_to_le32(DLM_IFL_STUB_MS); + ms_stub->m_type = cpu_to_le32(DLM_MSG_CANCEL_REPLY); + ms_stub->m_result = cpu_to_le32(to_dlm_errno(stub_cancel_result)); + ms_stub->m_header.h_nodeid = cpu_to_le32(lkb->lkb_nodeid); _receive_cancel_reply(lkb, ms_stub); dlm_put_lkb(lkb); break; @@ -5227,21 +5280,18 @@ void dlm_recover_waiters_pre(struct dlm_ls *ls) static struct dlm_lkb *find_resend_waiter(struct dlm_ls *ls) { - struct dlm_lkb *lkb; - int found = 0; + struct dlm_lkb *lkb = NULL, *iter; mutex_lock(&ls->ls_waiters_mutex); - list_for_each_entry(lkb, &ls->ls_waiters, lkb_wait_reply) { - if (lkb->lkb_flags & DLM_IFL_RESEND) { - hold_lkb(lkb); - found = 1; + list_for_each_entry(iter, &ls->ls_waiters, lkb_wait_reply) { + if (iter->lkb_flags & DLM_IFL_RESEND) { + hold_lkb(iter); + lkb = iter; break; } } mutex_unlock(&ls->ls_waiters_mutex); - if (!found) - lkb = NULL; return lkb; } @@ -5301,11 +5351,16 @@ int dlm_recover_waiters_post(struct dlm_ls *ls) lkb->lkb_flags &= ~DLM_IFL_OVERLAP_UNLOCK; lkb->lkb_flags &= ~DLM_IFL_OVERLAP_CANCEL; lkb->lkb_wait_type = 0; - lkb->lkb_wait_count = 0; + /* drop all wait_count references we still + * hold a reference for this iteration. + */ + while (lkb->lkb_wait_count) { + lkb->lkb_wait_count--; + unhold_lkb(lkb); + } mutex_lock(&ls->ls_waiters_mutex); list_del_init(&lkb->lkb_wait_reply); mutex_unlock(&ls->ls_waiters_mutex); - unhold_lkb(lkb); /* for waiters list */ if (oc || ou) { /* do an unlock or cancel instead of resending */ @@ -5575,7 +5630,7 @@ static int receive_rcom_lock_args(struct dlm_ls *ls, struct dlm_lkb *lkb, { struct rcom_lock *rl = (struct rcom_lock *) rc->rc_buf; - lkb->lkb_nodeid = rc->rc_header.h_nodeid; + lkb->lkb_nodeid = le32_to_cpu(rc->rc_header.h_nodeid); lkb->lkb_ownpid = le32_to_cpu(rl->rl_ownpid); lkb->lkb_remid = le32_to_cpu(rl->rl_lkid); lkb->lkb_exflags = le32_to_cpu(rl->rl_exflags); @@ -5590,8 +5645,8 @@ static int receive_rcom_lock_args(struct dlm_ls *ls, struct dlm_lkb *lkb, lkb->lkb_astfn = (rl->rl_asts & DLM_CB_CAST) ? &fake_astfn : NULL; if (lkb->lkb_exflags & DLM_LKF_VALBLK) { - int lvblen = rc->rc_header.h_length - sizeof(struct dlm_rcom) - - sizeof(struct rcom_lock); + int lvblen = le16_to_cpu(rc->rc_header.h_length) - + sizeof(struct dlm_rcom) - sizeof(struct rcom_lock); if (lvblen > ls->ls_lvblen) return -EINVAL; lkb->lkb_lvbptr = dlm_allocate_lvb(ls); @@ -5627,7 +5682,7 @@ int dlm_recover_master_copy(struct dlm_ls *ls, struct dlm_rcom *rc) struct dlm_rsb *r; struct dlm_lkb *lkb; uint32_t remid = 0; - int from_nodeid = rc->rc_header.h_nodeid; + int from_nodeid = le32_to_cpu(rc->rc_header.h_nodeid); int error; if (rl->rl_parent_lkid) { @@ -5677,7 +5732,6 @@ int dlm_recover_master_copy(struct dlm_ls *ls, struct dlm_rcom *rc) attach_lkb(r, lkb); add_lkb(r, lkb, rl->rl_status); - error = 0; ls->ls_recover_locks_in++; if (!list_empty(&r->res_waitqueue) || !list_empty(&r->res_convertqueue)) @@ -5717,7 +5771,8 @@ int dlm_recover_process_copy(struct dlm_ls *ls, struct dlm_rcom *rc) error = find_lkb(ls, lkid, &lkb); if (error) { log_error(ls, "dlm_recover_process_copy no %x remote %d %x %d", - lkid, rc->rc_header.h_nodeid, remid, result); + lkid, le32_to_cpu(rc->rc_header.h_nodeid), remid, + result); return error; } @@ -5727,7 +5782,8 @@ int dlm_recover_process_copy(struct dlm_ls *ls, struct dlm_rcom *rc) if (!is_process_copy(lkb)) { log_error(ls, "dlm_recover_process_copy bad %x remote %d %x %d", - lkid, rc->rc_header.h_nodeid, remid, result); + lkid, le32_to_cpu(rc->rc_header.h_nodeid), remid, + result); dlm_dump_rsb(r); unlock_rsb(r); put_rsb(r); @@ -5742,7 +5798,8 @@ int dlm_recover_process_copy(struct dlm_ls *ls, struct dlm_rcom *rc) a barrier between recover_masters and recover_locks. */ log_debug(ls, "dlm_recover_process_copy %x remote %d %x %d", - lkid, rc->rc_header.h_nodeid, remid, result); + lkid, le32_to_cpu(rc->rc_header.h_nodeid), remid, + result); dlm_send_rcom_lock(r, lkb); goto out; @@ -5752,7 +5809,8 @@ int dlm_recover_process_copy(struct dlm_ls *ls, struct dlm_rcom *rc) break; default: log_error(ls, "dlm_recover_process_copy %x remote %d %x %d unk", - lkid, rc->rc_header.h_nodeid, remid, result); + lkid, le32_to_cpu(rc->rc_header.h_nodeid), remid, + result); } /* an ack for dlm_recover_locks() which waits for replies from @@ -5766,12 +5824,18 @@ int dlm_recover_process_copy(struct dlm_ls *ls, struct dlm_rcom *rc) return 0; } +#ifdef CONFIG_DLM_DEPRECATED_API int dlm_user_request(struct dlm_ls *ls, struct dlm_user_args *ua, int mode, uint32_t flags, void *name, unsigned int namelen, unsigned long timeout_cs) +#else +int dlm_user_request(struct dlm_ls *ls, struct dlm_user_args *ua, + int mode, uint32_t flags, void *name, unsigned int namelen) +#endif { struct dlm_lkb *lkb; struct dlm_args args; + bool do_put = true; int error; dlm_lock_recovery(ls); @@ -5782,23 +5846,28 @@ int dlm_user_request(struct dlm_ls *ls, struct dlm_user_args *ua, goto out; } + trace_dlm_lock_start(ls, lkb, name, namelen, mode, flags); + if (flags & DLM_LKF_VALBLK) { ua->lksb.sb_lvbptr = kzalloc(DLM_USER_LVB_LEN, GFP_NOFS); if (!ua->lksb.sb_lvbptr) { kfree(ua); - __put_lkb(ls, lkb); error = -ENOMEM; - goto out; + goto out_put; } } +#ifdef CONFIG_DLM_DEPRECATED_API error = set_lock_args(mode, &ua->lksb, flags, namelen, timeout_cs, fake_astfn, ua, fake_bastfn, &args); +#else + error = set_lock_args(mode, &ua->lksb, flags, namelen, fake_astfn, ua, + fake_bastfn, &args); +#endif if (error) { kfree(ua->lksb.sb_lvbptr); ua->lksb.sb_lvbptr = NULL; kfree(ua); - __put_lkb(ls, lkb); - goto out; + goto out_put; } /* After ua is attached to lkb it will be freed by dlm_free_lkb(). @@ -5817,8 +5886,7 @@ int dlm_user_request(struct dlm_ls *ls, struct dlm_user_args *ua, error = 0; fallthrough; default: - __put_lkb(ls, lkb); - goto out; + goto out_put; } /* add this new lkb to the per-process list of locks */ @@ -5826,14 +5894,24 @@ int dlm_user_request(struct dlm_ls *ls, struct dlm_user_args *ua, hold_lkb(lkb); list_add_tail(&lkb->lkb_ownqueue, &ua->proc->locks); spin_unlock(&ua->proc->locks_spin); + do_put = false; + out_put: + trace_dlm_lock_end(ls, lkb, name, namelen, mode, flags, error, false); + if (do_put) + __put_lkb(ls, lkb); out: dlm_unlock_recovery(ls); return error; } +#ifdef CONFIG_DLM_DEPRECATED_API int dlm_user_convert(struct dlm_ls *ls, struct dlm_user_args *ua_tmp, int mode, uint32_t flags, uint32_t lkid, char *lvb_in, unsigned long timeout_cs) +#else +int dlm_user_convert(struct dlm_ls *ls, struct dlm_user_args *ua_tmp, + int mode, uint32_t flags, uint32_t lkid, char *lvb_in) +#endif { struct dlm_lkb *lkb; struct dlm_args args; @@ -5846,6 +5924,8 @@ int dlm_user_convert(struct dlm_ls *ls, struct dlm_user_args *ua_tmp, if (error) goto out; + trace_dlm_lock_start(ls, lkb, NULL, 0, mode, flags); + /* user can change the params on its lock when it converts it, or add an lvb that didn't exist before */ @@ -5868,8 +5948,13 @@ int dlm_user_convert(struct dlm_ls *ls, struct dlm_user_args *ua_tmp, ua->bastaddr = ua_tmp->bastaddr; ua->user_lksb = ua_tmp->user_lksb; +#ifdef CONFIG_DLM_DEPRECATED_API error = set_lock_args(mode, &ua->lksb, flags, 0, timeout_cs, fake_astfn, ua, fake_bastfn, &args); +#else + error = set_lock_args(mode, &ua->lksb, flags, 0, fake_astfn, ua, + fake_bastfn, &args); +#endif if (error) goto out_put; @@ -5878,6 +5963,7 @@ int dlm_user_convert(struct dlm_ls *ls, struct dlm_user_args *ua_tmp, if (error == -EINPROGRESS || error == -EAGAIN || error == -EDEADLK) error = 0; out_put: + trace_dlm_lock_end(ls, lkb, NULL, 0, mode, flags, error, false); dlm_put_lkb(lkb); out: dlm_unlock_recovery(ls); @@ -5893,39 +5979,38 @@ int dlm_user_convert(struct dlm_ls *ls, struct dlm_user_args *ua_tmp, int dlm_user_adopt_orphan(struct dlm_ls *ls, struct dlm_user_args *ua_tmp, int mode, uint32_t flags, void *name, unsigned int namelen, - unsigned long timeout_cs, uint32_t *lkid) + uint32_t *lkid) { - struct dlm_lkb *lkb; + struct dlm_lkb *lkb = NULL, *iter; struct dlm_user_args *ua; int found_other_mode = 0; - int found = 0; int rv = 0; mutex_lock(&ls->ls_orphans_mutex); - list_for_each_entry(lkb, &ls->ls_orphans, lkb_ownqueue) { - if (lkb->lkb_resource->res_length != namelen) + list_for_each_entry(iter, &ls->ls_orphans, lkb_ownqueue) { + if (iter->lkb_resource->res_length != namelen) continue; - if (memcmp(lkb->lkb_resource->res_name, name, namelen)) + if (memcmp(iter->lkb_resource->res_name, name, namelen)) continue; - if (lkb->lkb_grmode != mode) { + if (iter->lkb_grmode != mode) { found_other_mode = 1; continue; } - found = 1; - list_del_init(&lkb->lkb_ownqueue); - lkb->lkb_flags &= ~DLM_IFL_ORPHAN; - *lkid = lkb->lkb_id; + lkb = iter; + list_del_init(&iter->lkb_ownqueue); + iter->lkb_flags &= ~DLM_IFL_ORPHAN; + *lkid = iter->lkb_id; break; } mutex_unlock(&ls->ls_orphans_mutex); - if (!found && found_other_mode) { + if (!lkb && found_other_mode) { rv = -EAGAIN; goto out; } - if (!found) { + if (!lkb) { rv = -ENOENT; goto out; } @@ -5971,6 +6056,8 @@ int dlm_user_unlock(struct dlm_ls *ls, struct dlm_user_args *ua_tmp, if (error) goto out; + trace_dlm_unlock_start(ls, lkb, flags); + ua = lkb->lkb_ua; if (lvb_in && ua->lksb.sb_lvbptr) @@ -5999,6 +6086,7 @@ int dlm_user_unlock(struct dlm_ls *ls, struct dlm_user_args *ua_tmp, list_move(&lkb->lkb_ownqueue, &ua->proc->unlocking); spin_unlock(&ua->proc->locks_spin); out_put: + trace_dlm_unlock_end(ls, lkb, flags, error); dlm_put_lkb(lkb); out: dlm_unlock_recovery(ls); @@ -6020,6 +6108,8 @@ int dlm_user_cancel(struct dlm_ls *ls, struct dlm_user_args *ua_tmp, if (error) goto out; + trace_dlm_unlock_start(ls, lkb, flags); + ua = lkb->lkb_ua; if (ua_tmp->castparam) ua->castparam = ua_tmp->castparam; @@ -6037,6 +6127,7 @@ int dlm_user_cancel(struct dlm_ls *ls, struct dlm_user_args *ua_tmp, if (error == -EBUSY) error = 0; out_put: + trace_dlm_unlock_end(ls, lkb, flags, error); dlm_put_lkb(lkb); out: dlm_unlock_recovery(ls); @@ -6058,6 +6149,8 @@ int dlm_user_deadlock(struct dlm_ls *ls, uint32_t flags, uint32_t lkid) if (error) goto out; + trace_dlm_unlock_start(ls, lkb, flags); + ua = lkb->lkb_ua; error = set_unlock_args(flags, ua, &args); @@ -6086,6 +6179,7 @@ int dlm_user_deadlock(struct dlm_ls *ls, uint32_t flags, uint32_t lkid) if (error == -EBUSY) error = 0; out_put: + trace_dlm_unlock_end(ls, lkb, flags, error); dlm_put_lkb(lkb); out: dlm_unlock_recovery(ls); @@ -6141,7 +6235,7 @@ static struct dlm_lkb *del_proc_lock(struct dlm_ls *ls, { struct dlm_lkb *lkb = NULL; - mutex_lock(&ls->ls_clear_proc_locks); + spin_lock(&ls->ls_clear_proc_locks); if (list_empty(&proc->locks)) goto out; @@ -6153,7 +6247,7 @@ static struct dlm_lkb *del_proc_lock(struct dlm_ls *ls, else lkb->lkb_flags |= DLM_IFL_DEAD; out: - mutex_unlock(&ls->ls_clear_proc_locks); + spin_unlock(&ls->ls_clear_proc_locks); return lkb; } @@ -6190,7 +6284,7 @@ void dlm_clear_proc_locks(struct dlm_ls *ls, struct dlm_user_proc *proc) dlm_put_lkb(lkb); } - mutex_lock(&ls->ls_clear_proc_locks); + spin_lock(&ls->ls_clear_proc_locks); /* in-progress unlocks */ list_for_each_entry_safe(lkb, safe, &proc->unlocking, lkb_ownqueue) { @@ -6206,7 +6300,7 @@ void dlm_clear_proc_locks(struct dlm_ls *ls, struct dlm_user_proc *proc) dlm_put_lkb(lkb); } - mutex_unlock(&ls->ls_clear_proc_locks); + spin_unlock(&ls->ls_clear_proc_locks); dlm_unlock_recovery(ls); } @@ -6277,8 +6371,8 @@ static int send_purge(struct dlm_ls *ls, int nodeid, int pid) DLM_MSG_PURGE, &ms, &mh); if (error) return error; - ms->m_nodeid = nodeid; - ms->m_pid = pid; + ms->m_nodeid = cpu_to_le32(nodeid); + ms->m_pid = cpu_to_le32(pid); return send_message(mh, ms); } @@ -6301,3 +6395,64 @@ int dlm_user_purge(struct dlm_ls *ls, struct dlm_user_proc *proc, return error; } +/* debug functionality */ +int dlm_debug_add_lkb(struct dlm_ls *ls, uint32_t lkb_id, char *name, int len, + int lkb_nodeid, unsigned int lkb_flags, int lkb_status) +{ + struct dlm_lksb *lksb; + struct dlm_lkb *lkb; + struct dlm_rsb *r; + int error; + + /* we currently can't set a valid user lock */ + if (lkb_flags & DLM_IFL_USER) + return -EOPNOTSUPP; + + lksb = kzalloc(sizeof(*lksb), GFP_NOFS); + if (!lksb) + return -ENOMEM; + + error = _create_lkb(ls, &lkb, lkb_id, lkb_id + 1); + if (error) { + kfree(lksb); + return error; + } + + lkb->lkb_flags = lkb_flags; + lkb->lkb_nodeid = lkb_nodeid; + lkb->lkb_lksb = lksb; + /* user specific pointer, just don't have it NULL for kernel locks */ + if (~lkb_flags & DLM_IFL_USER) + lkb->lkb_astparam = (void *)0xDEADBEEF; + + error = find_rsb(ls, name, len, 0, R_REQUEST, &r); + if (error) { + kfree(lksb); + __put_lkb(ls, lkb); + return error; + } + + lock_rsb(r); + attach_lkb(r, lkb); + add_lkb(r, lkb, lkb_status); + unlock_rsb(r); + put_rsb(r); + + return 0; +} + +int dlm_debug_add_lkb_to_waiters(struct dlm_ls *ls, uint32_t lkb_id, + int mstype, int to_nodeid) +{ + struct dlm_lkb *lkb; + int error; + + error = find_lkb(ls, lkb_id, &lkb); + if (error) + return error; + + error = add_to_waiters(lkb, mstype, to_nodeid); + dlm_put_lkb(lkb); + return error; +} + diff --git a/fs/dlm/lock.h b/fs/dlm/lock.h index 456c6ec3ef6f..40c76b5544da 100644 --- a/fs/dlm/lock.h +++ b/fs/dlm/lock.h @@ -24,13 +24,19 @@ int dlm_put_lkb(struct dlm_lkb *lkb); void dlm_scan_rsbs(struct dlm_ls *ls); int dlm_lock_recovery_try(struct dlm_ls *ls); void dlm_unlock_recovery(struct dlm_ls *ls); -void dlm_scan_waiters(struct dlm_ls *ls); + +#ifdef CONFIG_DLM_DEPRECATED_API void dlm_scan_timeout(struct dlm_ls *ls); void dlm_adjust_timeouts(struct dlm_ls *ls); +#else +static inline void dlm_scan_timeout(struct dlm_ls *ls) { } +static inline void dlm_adjust_timeouts(struct dlm_ls *ls) { } +#endif + int dlm_master_lookup(struct dlm_ls *ls, int nodeid, char *name, int len, unsigned int flags, int *r_nodeid, int *result); -int dlm_search_rsb_tree(struct rb_root *tree, char *name, int len, +int dlm_search_rsb_tree(struct rb_root *tree, const void *name, int len, struct dlm_rsb **r_ret); void dlm_recover_purge(struct dlm_ls *ls); @@ -41,15 +47,22 @@ void dlm_recover_waiters_pre(struct dlm_ls *ls); int dlm_recover_master_copy(struct dlm_ls *ls, struct dlm_rcom *rc); int dlm_recover_process_copy(struct dlm_ls *ls, struct dlm_rcom *rc); +#ifdef CONFIG_DLM_DEPRECATED_API int dlm_user_request(struct dlm_ls *ls, struct dlm_user_args *ua, int mode, uint32_t flags, void *name, unsigned int namelen, unsigned long timeout_cs); int dlm_user_convert(struct dlm_ls *ls, struct dlm_user_args *ua_tmp, int mode, uint32_t flags, uint32_t lkid, char *lvb_in, unsigned long timeout_cs); +#else +int dlm_user_request(struct dlm_ls *ls, struct dlm_user_args *ua, int mode, + uint32_t flags, void *name, unsigned int namelen); +int dlm_user_convert(struct dlm_ls *ls, struct dlm_user_args *ua_tmp, + int mode, uint32_t flags, uint32_t lkid, char *lvb_in); +#endif int dlm_user_adopt_orphan(struct dlm_ls *ls, struct dlm_user_args *ua_tmp, int mode, uint32_t flags, void *name, unsigned int namelen, - unsigned long timeout_cs, uint32_t *lkid); + uint32_t *lkid); int dlm_user_unlock(struct dlm_ls *ls, struct dlm_user_args *ua_tmp, uint32_t flags, uint32_t lkid, char *lvb_in); int dlm_user_cancel(struct dlm_ls *ls, struct dlm_user_args *ua_tmp, @@ -58,6 +71,10 @@ int dlm_user_purge(struct dlm_ls *ls, struct dlm_user_proc *proc, int nodeid, int pid); int dlm_user_deadlock(struct dlm_ls *ls, uint32_t flags, uint32_t lkid); void dlm_clear_proc_locks(struct dlm_ls *ls, struct dlm_user_proc *proc); +int dlm_debug_add_lkb(struct dlm_ls *ls, uint32_t lkb_id, char *name, int len, + int lkb_nodeid, unsigned int lkb_flags, int lkb_status); +int dlm_debug_add_lkb_to_waiters(struct dlm_ls *ls, uint32_t lkb_id, + int mstype, int to_nodeid); static inline int is_master(struct dlm_rsb *r) { diff --git a/fs/dlm/lockspace.c b/fs/dlm/lockspace.c index 10eddfa6c3d7..bae050df7abf 100644 --- a/fs/dlm/lockspace.c +++ b/fs/dlm/lockspace.c @@ -216,8 +216,7 @@ static int do_uevent(struct dlm_ls *ls, int in) return ls->ls_uevent_result; } -static int dlm_uevent(struct kset *kset, struct kobject *kobj, - struct kobj_uevent_env *env) +static int dlm_uevent(struct kobject *kobj, struct kobj_uevent_env *env) { struct dlm_ls *ls = container_of(kobj, struct dlm_ls, ls_kobj); @@ -276,7 +275,6 @@ static int dlm_scand(void *data) ls->ls_scan_time = jiffies; dlm_scan_rsbs(ls); dlm_scan_timeout(ls); - dlm_scan_waiters(ls); dlm_unlock_recovery(ls); } else { ls->ls_scan_time += HZ; @@ -314,7 +312,7 @@ struct dlm_ls *dlm_find_lockspace_global(uint32_t id) list_for_each_entry(ls, &lslist, ls_list) { if (ls->ls_global_id == id) { - ls->ls_count++; + atomic_inc(&ls->ls_count); goto out; } } @@ -331,7 +329,7 @@ struct dlm_ls *dlm_find_lockspace_local(dlm_lockspace_t *lockspace) spin_lock(&lslist_lock); list_for_each_entry(ls, &lslist, ls_list) { if (ls->ls_local_handle == lockspace) { - ls->ls_count++; + atomic_inc(&ls->ls_count); goto out; } } @@ -348,7 +346,7 @@ struct dlm_ls *dlm_find_lockspace_device(int minor) spin_lock(&lslist_lock); list_for_each_entry(ls, &lslist, ls_list) { if (ls->ls_device.minor == minor) { - ls->ls_count++; + atomic_inc(&ls->ls_count); goto out; } } @@ -360,24 +358,24 @@ struct dlm_ls *dlm_find_lockspace_device(int minor) void dlm_put_lockspace(struct dlm_ls *ls) { - spin_lock(&lslist_lock); - ls->ls_count--; - spin_unlock(&lslist_lock); + if (atomic_dec_and_test(&ls->ls_count)) + wake_up(&ls->ls_count_wait); } static void remove_lockspace(struct dlm_ls *ls) { - for (;;) { - spin_lock(&lslist_lock); - if (ls->ls_count == 0) { - WARN_ON(ls->ls_create_count != 0); - list_del(&ls->ls_list); - spin_unlock(&lslist_lock); - return; - } +retry: + wait_event(ls->ls_count_wait, atomic_read(&ls->ls_count) == 0); + + spin_lock(&lslist_lock); + if (atomic_read(&ls->ls_count) != 0) { spin_unlock(&lslist_lock); - ssleep(1); + goto retry; } + + WARN_ON(ls->ls_create_count != 0); + list_del(&ls->ls_list); + spin_unlock(&lslist_lock); } static int threads_start(void) @@ -418,7 +416,7 @@ static int new_lockspace(const char *name, const char *cluster, if (namelen > DLM_LOCKSPACE_LEN || namelen == 0) return -EINVAL; - if (!lvblen || (lvblen % 8)) + if (lvblen % 8) return -EINVAL; if (!try_module_get(THIS_MODULE)) @@ -481,7 +479,8 @@ static int new_lockspace(const char *name, const char *cluster, memcpy(ls->ls_name, name, namelen); ls->ls_namelen = namelen; ls->ls_lvblen = lvblen; - ls->ls_count = 0; + atomic_set(&ls->ls_count, 0); + init_waitqueue_head(&ls->ls_count_wait); ls->ls_flags = 0; ls->ls_scan_time = jiffies; @@ -490,13 +489,28 @@ static int new_lockspace(const char *name, const char *cluster, ls->ls_ops_arg = ops_arg; } - if (flags & DLM_LSFL_TIMEWARN) +#ifdef CONFIG_DLM_DEPRECATED_API + if (flags & DLM_LSFL_TIMEWARN) { + pr_warn_once("===============================================================\n" + "WARNING: the dlm DLM_LSFL_TIMEWARN flag is being deprecated and\n" + " will be removed in v6.2!\n" + " Inclusive DLM_LSFL_TIMEWARN define in UAPI header!\n" + "===============================================================\n"); + set_bit(LSFL_TIMEWARN, &ls->ls_flags); + } /* ls_exflags are forced to match among nodes, and we don't - need to require all nodes to have some flags set */ + * need to require all nodes to have some flags set + */ ls->ls_exflags = (flags & ~(DLM_LSFL_TIMEWARN | DLM_LSFL_FS | DLM_LSFL_NEWEXCL)); +#else + /* ls_exflags are forced to match among nodes, and we don't + * need to require all nodes to have some flags set + */ + ls->ls_exflags = (flags & ~(DLM_LSFL_FS | DLM_LSFL_NEWEXCL)); +#endif size = READ_ONCE(dlm_config.ci_rsbtbl_size); ls->ls_rsbtbl_size = size; @@ -511,6 +525,7 @@ static int new_lockspace(const char *name, const char *cluster, } spin_lock_init(&ls->ls_remove_spin); + init_waitqueue_head(&ls->ls_remove_wait); for (i = 0; i < DLM_REMOVE_NAMES_MAX; i++) { ls->ls_remove_names[i] = kzalloc(DLM_RESNAME_MAXLEN+1, @@ -526,8 +541,10 @@ static int new_lockspace(const char *name, const char *cluster, mutex_init(&ls->ls_waiters_mutex); INIT_LIST_HEAD(&ls->ls_orphans); mutex_init(&ls->ls_orphans_mutex); +#ifdef CONFIG_DLM_DEPRECATED_API INIT_LIST_HEAD(&ls->ls_timeout); mutex_init(&ls->ls_timeout_mutex); +#endif INIT_LIST_HEAD(&ls->ls_new_rsb); spin_lock_init(&ls->ls_new_rsb_spin); @@ -547,8 +564,8 @@ static int new_lockspace(const char *name, const char *cluster, init_waitqueue_head(&ls->ls_uevent_wait); ls->ls_uevent_result = 0; - init_completion(&ls->ls_members_done); - ls->ls_members_result = -1; + init_completion(&ls->ls_recovery_done); + ls->ls_recovery_result = -1; mutex_init(&ls->ls_cb_mutex); INIT_LIST_HEAD(&ls->ls_cb_delay); @@ -564,8 +581,10 @@ static int new_lockspace(const char *name, const char *cluster, init_rwsem(&ls->ls_in_recovery); init_rwsem(&ls->ls_recv_active); INIT_LIST_HEAD(&ls->ls_requestqueue); + atomic_set(&ls->ls_requestqueue_cnt, 0); + init_waitqueue_head(&ls->ls_requestqueue_wait); mutex_init(&ls->ls_requestqueue_mutex); - mutex_init(&ls->ls_clear_proc_locks); + spin_lock_init(&ls->ls_clear_proc_locks); /* Due backwards compatibility with 3.1 we need to use maximum * possible dlm message size to be sure the message will fit and @@ -642,8 +661,9 @@ static int new_lockspace(const char *name, const char *cluster, if (error) goto out_recoverd; - wait_for_completion(&ls->ls_members_done); - error = ls->ls_members_result; + /* wait until recovery is successful or failed */ + wait_for_completion(&ls->ls_recovery_done); + error = ls->ls_recovery_result; if (error) goto out_members; @@ -683,10 +703,11 @@ static int new_lockspace(const char *name, const char *cluster, return error; } -int dlm_new_lockspace(const char *name, const char *cluster, - uint32_t flags, int lvblen, - const struct dlm_lockspace_ops *ops, void *ops_arg, - int *ops_result, dlm_lockspace_t **lockspace) +static int __dlm_new_lockspace(const char *name, const char *cluster, + uint32_t flags, int lvblen, + const struct dlm_lockspace_ops *ops, + void *ops_arg, int *ops_result, + dlm_lockspace_t **lockspace) { int error = 0; @@ -712,6 +733,25 @@ int dlm_new_lockspace(const char *name, const char *cluster, return error; } +int dlm_new_lockspace(const char *name, const char *cluster, uint32_t flags, + int lvblen, const struct dlm_lockspace_ops *ops, + void *ops_arg, int *ops_result, + dlm_lockspace_t **lockspace) +{ + return __dlm_new_lockspace(name, cluster, flags | DLM_LSFL_FS, lvblen, + ops, ops_arg, ops_result, lockspace); +} + +int dlm_new_user_lockspace(const char *name, const char *cluster, + uint32_t flags, int lvblen, + const struct dlm_lockspace_ops *ops, + void *ops_arg, int *ops_result, + dlm_lockspace_t **lockspace) +{ + return __dlm_new_lockspace(name, cluster, flags, lvblen, ops, + ops_arg, ops_result, lockspace); +} + static int lkb_idr_is_local(int id, void *p, void *data) { struct dlm_lkb *lkb = p; @@ -868,7 +908,7 @@ static int release_lockspace(struct dlm_ls *ls, int force) * until this returns. * * Force has 4 possible values: - * 0 - don't destroy locksapce if it has any LKBs + * 0 - don't destroy lockspace if it has any LKBs * 1 - destroy lockspace if it has remote LKBs but not if it has local LKBs * 2 - destroy lockspace regardless of LKBs * 3 - destroy lockspace as part of a forced shutdown @@ -919,3 +959,15 @@ void dlm_stop_lockspaces(void) log_print("dlm user daemon left %d lockspaces", count); } +void dlm_stop_lockspaces_check(void) +{ + struct dlm_ls *ls; + + spin_lock(&lslist_lock); + list_for_each_entry(ls, &lslist, ls_list) { + if (WARN_ON(!rwsem_is_locked(&ls->ls_in_recovery) || + !dlm_locking_stopped(ls))) + break; + } + spin_unlock(&lslist_lock); +} diff --git a/fs/dlm/lockspace.h b/fs/dlm/lockspace.h index a78d853b9342..03f4a4a3a871 100644 --- a/fs/dlm/lockspace.h +++ b/fs/dlm/lockspace.h @@ -12,6 +12,14 @@ #ifndef __LOCKSPACE_DOT_H__ #define __LOCKSPACE_DOT_H__ +/* DLM_LSFL_FS + * The lockspace user is in the kernel (i.e. filesystem). Enables + * direct bast/cast callbacks. + * + * internal lockspace flag - will be removed in future + */ +#define DLM_LSFL_FS 0x00000004 + int dlm_lockspace_init(void); void dlm_lockspace_exit(void); struct dlm_ls *dlm_find_lockspace_global(uint32_t id); @@ -19,6 +27,12 @@ struct dlm_ls *dlm_find_lockspace_local(void *id); struct dlm_ls *dlm_find_lockspace_device(int minor); void dlm_put_lockspace(struct dlm_ls *ls); void dlm_stop_lockspaces(void); +void dlm_stop_lockspaces_check(void); +int dlm_new_user_lockspace(const char *name, const char *cluster, + uint32_t flags, int lvblen, + const struct dlm_lockspace_ops *ops, + void *ops_arg, int *ops_result, + dlm_lockspace_t **lockspace); #endif /* __LOCKSPACE_DOT_H__ */ diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c index 8f715c620e1f..59f64c596233 100644 --- a/fs/dlm/lowcomms.c +++ b/fs/dlm/lowcomms.c @@ -53,9 +53,12 @@ #include <net/sctp/sctp.h> #include <net/ipv6.h> +#include <trace/events/dlm.h> + #include "dlm_internal.h" #include "lowcomms.h" #include "midcomms.h" +#include "memory.h" #include "config.h" #define NEEDED_RMEM (4*1024*1024) @@ -84,7 +87,6 @@ struct connection { struct list_head writequeue; /* List of outgoing writequeue_entries */ spinlock_t writequeue_lock; atomic_t writequeue_cnt; - struct mutex wq_alloc; int retries; #define MAX_CONNECT_RETRIES 3 struct hlist_node list; @@ -189,6 +191,24 @@ static const struct dlm_proto_ops *dlm_proto_ops; static void process_recv_sockets(struct work_struct *work); static void process_send_sockets(struct work_struct *work); +static void writequeue_entry_ctor(void *data) +{ + struct writequeue_entry *entry = data; + + INIT_LIST_HEAD(&entry->msgs); +} + +struct kmem_cache *dlm_lowcomms_writequeue_cache_create(void) +{ + return kmem_cache_create("dlm_writequeue", sizeof(struct writequeue_entry), + 0, 0, writequeue_entry_ctor); +} + +struct kmem_cache *dlm_lowcomms_msg_cache_create(void) +{ + return kmem_cache_create("dlm_msg", sizeof(struct dlm_msg), 0, 0, NULL); +} + /* need to held writequeue_lock */ static struct writequeue_entry *con_next_wq(struct connection *con) { @@ -199,7 +219,10 @@ static struct writequeue_entry *con_next_wq(struct connection *con) e = list_first_entry(&con->writequeue, struct writequeue_entry, list); - if (e->len == 0) + /* if len is zero nothing is to send, if there are users filling + * buffers we wait until the users are done so we can send more. + */ + if (e->users || e->len == 0) return NULL; return e; @@ -265,8 +288,6 @@ static struct connection *nodeid2con(int nodeid, gfp_t alloc) return NULL; } - mutex_init(&con->wq_alloc); - spin_lock(&connections_lock); /* Because multiple workqueues/threads calls this function it can * race on multiple cpu's. Instead of locking hot path __find_con() @@ -486,11 +507,9 @@ static void lowcomms_data_ready(struct sock *sk) { struct connection *con; - read_lock_bh(&sk->sk_callback_lock); con = sock2con(sk); if (con && !test_and_set_bit(CF_READ_PENDING, &con->flags)) queue_work(recv_workqueue, &con->rwork); - read_unlock_bh(&sk->sk_callback_lock); } static void lowcomms_listen_data_ready(struct sock *sk) @@ -505,15 +524,14 @@ static void lowcomms_write_space(struct sock *sk) { struct connection *con; - read_lock_bh(&sk->sk_callback_lock); con = sock2con(sk); if (!con) - goto out; + return; if (!test_and_set_bit(CF_CONNECTED, &con->flags)) { - log_print("successful connected to node %d", con->nodeid); + log_print("connected to node %d", con->nodeid); queue_work(send_workqueue, &con->swork); - goto out; + return; } clear_bit(SOCK_NOSPACE, &con->sock->flags); @@ -524,8 +542,6 @@ static void lowcomms_write_space(struct sock *sk) } queue_work(send_workqueue, &con->swork); -out: - read_unlock_bh(&sk->sk_callback_lock); } static inline void lowcomms_connect_sock(struct connection *con) @@ -592,42 +608,41 @@ int dlm_lowcomms_nodes_set_mark(int nodeid, unsigned int mark) static void lowcomms_error_report(struct sock *sk) { struct connection *con; - struct sockaddr_storage saddr; void (*orig_report)(struct sock *) = NULL; + struct inet_sock *inet; - read_lock_bh(&sk->sk_callback_lock); con = sock2con(sk); if (con == NULL) goto out; orig_report = listen_sock.sk_error_report; - if (kernel_getpeername(sk->sk_socket, (struct sockaddr *)&saddr) < 0) { - printk_ratelimited(KERN_ERR "dlm: node %d: socket error " - "sending to node %d, port %d, " - "sk_err=%d/%d\n", dlm_our_nodeid(), - con->nodeid, dlm_config.ci_tcp_port, - sk->sk_err, sk->sk_err_soft); - } else if (saddr.ss_family == AF_INET) { - struct sockaddr_in *sin4 = (struct sockaddr_in *)&saddr; + inet = inet_sk(sk); + switch (sk->sk_family) { + case AF_INET: printk_ratelimited(KERN_ERR "dlm: node %d: socket error " - "sending to node %d at %pI4, port %d, " + "sending to node %d at %pI4, dport %d, " "sk_err=%d/%d\n", dlm_our_nodeid(), - con->nodeid, &sin4->sin_addr.s_addr, - dlm_config.ci_tcp_port, sk->sk_err, + con->nodeid, &inet->inet_daddr, + ntohs(inet->inet_dport), sk->sk_err, sk->sk_err_soft); - } else { - struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&saddr; - + break; +#if IS_ENABLED(CONFIG_IPV6) + case AF_INET6: printk_ratelimited(KERN_ERR "dlm: node %d: socket error " - "sending to node %d at %u.%u.%u.%u, " - "port %d, sk_err=%d/%d\n", dlm_our_nodeid(), - con->nodeid, sin6->sin6_addr.s6_addr32[0], - sin6->sin6_addr.s6_addr32[1], - sin6->sin6_addr.s6_addr32[2], - sin6->sin6_addr.s6_addr32[3], - dlm_config.ci_tcp_port, sk->sk_err, + "sending to node %d at %pI6c, " + "dport %d, sk_err=%d/%d\n", dlm_our_nodeid(), + con->nodeid, &sk->sk_v6_daddr, + ntohs(inet->inet_dport), sk->sk_err, sk->sk_err_soft); + break; +#endif + default: + printk_ratelimited(KERN_ERR "dlm: node %d: socket error " + "invalid socket family %d set, " + "sk_err=%d/%d\n", dlm_our_nodeid(), + sk->sk_family, sk->sk_err, sk->sk_err_soft); + goto out; } /* below sendcon only handling */ @@ -646,7 +661,6 @@ static void lowcomms_error_report(struct sock *sk) queue_work(send_workqueue, &con->swork); out: - read_unlock_bh(&sk->sk_callback_lock); if (orig_report) orig_report(sk); } @@ -666,20 +680,20 @@ static void restore_callbacks(struct socket *sock) { struct sock *sk = sock->sk; - write_lock_bh(&sk->sk_callback_lock); + lock_sock(sk); sk->sk_user_data = NULL; sk->sk_data_ready = listen_sock.sk_data_ready; sk->sk_state_change = listen_sock.sk_state_change; sk->sk_write_space = listen_sock.sk_write_space; sk->sk_error_report = listen_sock.sk_error_report; - write_unlock_bh(&sk->sk_callback_lock); + release_sock(sk); } static void add_listen_sock(struct socket *sock, struct listen_connection *con) { struct sock *sk = sock->sk; - write_lock_bh(&sk->sk_callback_lock); + lock_sock(sk); save_listen_callbacks(sock); con->sock = sock; @@ -687,7 +701,7 @@ static void add_listen_sock(struct socket *sock, struct listen_connection *con) sk->sk_allocation = GFP_NOFS; /* Install a data_ready callback */ sk->sk_data_ready = lowcomms_listen_data_ready; - write_unlock_bh(&sk->sk_callback_lock); + release_sock(sk); } /* Make a socket active */ @@ -695,7 +709,7 @@ static void add_sock(struct socket *sock, struct connection *con) { struct sock *sk = sock->sk; - write_lock_bh(&sk->sk_callback_lock); + lock_sock(sk); con->sock = sock; sk->sk_user_data = con; @@ -705,7 +719,7 @@ static void add_sock(struct socket *sock, struct connection *con) sk->sk_state_change = lowcomms_state_change; sk->sk_allocation = GFP_NOFS; sk->sk_error_report = lowcomms_error_report; - write_unlock_bh(&sk->sk_callback_lock); + release_sock(sk); } /* Add the port number to an IPv6 or 4 sockaddr and return the address @@ -733,7 +747,7 @@ static void dlm_page_release(struct kref *kref) ref); __free_page(e->page); - kfree(e); + dlm_free_writequeue(e); } static void dlm_msg_release(struct kref *kref) @@ -741,7 +755,7 @@ static void dlm_msg_release(struct kref *kref) struct dlm_msg *msg = container_of(kref, struct dlm_msg, ref); kref_put(&msg->entry->ref, dlm_page_release); - kfree(msg); + dlm_free_msg(msg); } static void free_entry(struct writequeue_entry *e) @@ -925,6 +939,7 @@ static int receive_from_sock(struct connection *con) msg.msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL; ret = kernel_recvmsg(con->sock, &msg, &iov, 1, iov.iov_len, msg.msg_flags); + trace_dlm_recv(con->nodeid, ret); if (ret == -EAGAIN) break; else if (ret <= 0) @@ -1013,10 +1028,28 @@ static int accept_from_sock(struct listen_connection *con) /* Get the new node's NODEID */ make_sockaddr(&peeraddr, 0, &len); if (addr_to_nodeid(&peeraddr, &nodeid, &mark)) { - unsigned char *b=(unsigned char *)&peeraddr; - log_print("connect from non cluster node"); - print_hex_dump_bytes("ss: ", DUMP_PREFIX_NONE, - b, sizeof(struct sockaddr_storage)); + switch (peeraddr.ss_family) { + case AF_INET: { + struct sockaddr_in *sin = (struct sockaddr_in *)&peeraddr; + + log_print("connect from non cluster IPv4 node %pI4", + &sin->sin_addr); + break; + } +#if IS_ENABLED(CONFIG_IPV6) + case AF_INET6: { + struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&peeraddr; + + log_print("connect from non cluster IPv6 node %pI6c", + &sin6->sin6_addr); + break; + } +#endif + default: + log_print("invalid family from non cluster node"); + break; + } + sock_release(newsock); return -1; } @@ -1177,33 +1210,33 @@ static void deinit_local(void) kfree(dlm_local_addr[i]); } -static struct writequeue_entry *new_writequeue_entry(struct connection *con, - gfp_t allocation) +static struct writequeue_entry *new_writequeue_entry(struct connection *con) { struct writequeue_entry *entry; - entry = kzalloc(sizeof(*entry), allocation); + entry = dlm_allocate_writequeue(); if (!entry) return NULL; - entry->page = alloc_page(allocation | __GFP_ZERO); + entry->page = alloc_page(GFP_ATOMIC | __GFP_ZERO); if (!entry->page) { - kfree(entry); + dlm_free_writequeue(entry); return NULL; } + entry->offset = 0; + entry->len = 0; + entry->end = 0; + entry->dirty = false; entry->con = con; entry->users = 1; kref_init(&entry->ref); - INIT_LIST_HEAD(&entry->msgs); - return entry; } static struct writequeue_entry *new_wq_entry(struct connection *con, int len, - gfp_t allocation, char **ppc, - void (*cb)(struct dlm_mhandle *mh), - struct dlm_mhandle *mh) + char **ppc, void (*cb)(void *data), + void *data) { struct writequeue_entry *e; @@ -1215,74 +1248,54 @@ static struct writequeue_entry *new_wq_entry(struct connection *con, int len, *ppc = page_address(e->page) + e->end; if (cb) - cb(mh); + cb(data); e->end += len; e->users++; - spin_unlock(&con->writequeue_lock); - - return e; + goto out; } } - spin_unlock(&con->writequeue_lock); - e = new_writequeue_entry(con, allocation); + e = new_writequeue_entry(con); if (!e) - return NULL; + goto out; kref_get(&e->ref); *ppc = page_address(e->page); e->end += len; atomic_inc(&con->writequeue_cnt); - - spin_lock(&con->writequeue_lock); if (cb) - cb(mh); + cb(data); list_add_tail(&e->list, &con->writequeue); - spin_unlock(&con->writequeue_lock); +out: + spin_unlock(&con->writequeue_lock); return e; }; static struct dlm_msg *dlm_lowcomms_new_msg_con(struct connection *con, int len, gfp_t allocation, char **ppc, - void (*cb)(struct dlm_mhandle *mh), - struct dlm_mhandle *mh) + void (*cb)(void *data), + void *data) { struct writequeue_entry *e; struct dlm_msg *msg; - bool sleepable; - msg = kzalloc(sizeof(*msg), allocation); + msg = dlm_allocate_msg(allocation); if (!msg) return NULL; - /* this mutex is being used as a wait to avoid multiple "fast" - * new writequeue page list entry allocs in new_wq_entry in - * normal operation which is sleepable context. Without it - * we could end in multiple writequeue entries with one - * dlm message because multiple callers were waiting at - * the writequeue_lock in new_wq_entry(). - */ - sleepable = gfpflags_normal_context(allocation); - if (sleepable) - mutex_lock(&con->wq_alloc); - kref_init(&msg->ref); - e = new_wq_entry(con, len, allocation, ppc, cb, mh); + e = new_wq_entry(con, len, ppc, cb, data); if (!e) { - if (sleepable) - mutex_unlock(&con->wq_alloc); - - kfree(msg); + dlm_free_msg(msg); return NULL; } - if (sleepable) - mutex_unlock(&con->wq_alloc); - + msg->retransmit = false; + msg->orig_msg = NULL; msg->ppc = *ppc; msg->len = len; msg->entry = e; @@ -1290,9 +1303,13 @@ static struct dlm_msg *dlm_lowcomms_new_msg_con(struct connection *con, int len, return msg; } +/* avoid false positive for nodes_srcu, unlock happens in + * dlm_lowcomms_commit_msg which is a must call if success + */ +#ifndef __CHECKER__ struct dlm_msg *dlm_lowcomms_new_msg(int nodeid, int len, gfp_t allocation, - char **ppc, void (*cb)(struct dlm_mhandle *mh), - struct dlm_mhandle *mh) + char **ppc, void (*cb)(void *data), + void *data) { struct connection *con; struct dlm_msg *msg; @@ -1313,16 +1330,19 @@ struct dlm_msg *dlm_lowcomms_new_msg(int nodeid, int len, gfp_t allocation, return NULL; } - msg = dlm_lowcomms_new_msg_con(con, len, allocation, ppc, cb, mh); + msg = dlm_lowcomms_new_msg_con(con, len, allocation, ppc, cb, data); if (!msg) { srcu_read_unlock(&connections_srcu, idx); return NULL; } + /* for dlm_lowcomms_commit_msg() */ + kref_get(&msg->ref); /* we assume if successful commit must called */ msg->idx = idx; return msg; } +#endif static void _dlm_lowcomms_commit_msg(struct dlm_msg *msg) { @@ -1349,11 +1369,18 @@ out: return; } +/* avoid false positive for nodes_srcu, lock was happen in + * dlm_lowcomms_new_msg + */ +#ifndef __CHECKER__ void dlm_lowcomms_commit_msg(struct dlm_msg *msg) { _dlm_lowcomms_commit_msg(msg); srcu_read_unlock(&connections_srcu, msg->idx); + /* because dlm_lowcomms_new_msg() */ + kref_put(&msg->ref, dlm_msg_release); } +#endif void dlm_lowcomms_put_msg(struct dlm_msg *msg) { @@ -1403,7 +1430,6 @@ static void send_to_sock(struct connection *con) if (!e) break; - e = list_first_entry(&con->writequeue, struct writequeue_entry, list); len = e->len; offset = e->offset; BUG_ON(len == 0 && e->users == 0); @@ -1411,6 +1437,7 @@ static void send_to_sock(struct connection *con) ret = kernel_sendpage(con->sock, e->page, offset, len, msg_flags); + trace_dlm_send(con->nodeid, ret); if (ret == -EAGAIN || ret == 0) { if (ret == -EAGAIN && test_bit(SOCKWQ_ASYNC_NOSPACE, &con->sock->flags) && @@ -1680,9 +1707,9 @@ static void _stop_conn(struct connection *con, bool and_other) set_bit(CF_READ_PENDING, &con->flags); set_bit(CF_WRITE_PENDING, &con->flags); if (con->sock && con->sock->sk) { - write_lock_bh(&con->sock->sk->sk_callback_lock); + lock_sock(con->sock->sk); con->sock->sk->sk_user_data = NULL; - write_unlock_bh(&con->sock->sk->sk_callback_lock); + release_sock(con->sock->sk); } if (con->othercon && and_other) _stop_conn(con->othercon, false); @@ -1775,8 +1802,8 @@ static int dlm_listen_for_all(void) result = sock_create_kern(&init_net, dlm_local_addr[0]->ss_family, SOCK_STREAM, dlm_proto_ops->proto, &sock); if (result < 0) { - log_print("Can't create comms socket, check SCTP is loaded"); - goto out; + log_print("Can't create comms socket: %d", result); + return result; } sock_set_mark(sock->sk, dlm_config.ci_mark); @@ -1908,7 +1935,7 @@ static int dlm_sctp_connect(struct connection *con, struct socket *sock, return ret; if (!test_and_set_bit(CF_CONNECTED, &con->flags)) - log_print("successful connected to node %d", con->nodeid); + log_print("connected to node %d", con->nodeid); return 0; } diff --git a/fs/dlm/lowcomms.h b/fs/dlm/lowcomms.h index 4ccae07cf005..29369feea991 100644 --- a/fs/dlm/lowcomms.h +++ b/fs/dlm/lowcomms.h @@ -38,8 +38,8 @@ void dlm_lowcomms_stop(void); void dlm_lowcomms_exit(void); int dlm_lowcomms_close(int nodeid); struct dlm_msg *dlm_lowcomms_new_msg(int nodeid, int len, gfp_t allocation, - char **ppc, void (*cb)(struct dlm_mhandle *mh), - struct dlm_mhandle *mh); + char **ppc, void (*cb)(void *data), + void *data); void dlm_lowcomms_commit_msg(struct dlm_msg *msg); void dlm_lowcomms_put_msg(struct dlm_msg *msg); int dlm_lowcomms_resend_msg(struct dlm_msg *msg); @@ -47,6 +47,8 @@ int dlm_lowcomms_connect_node(int nodeid); int dlm_lowcomms_nodes_set_mark(int nodeid, unsigned int mark); int dlm_lowcomms_addr(int nodeid, struct sockaddr_storage *addr, int len); void dlm_midcomms_receive_done(int nodeid); +struct kmem_cache *dlm_lowcomms_writequeue_cache_create(void); +struct kmem_cache *dlm_lowcomms_msg_cache_create(void); #endif /* __LOWCOMMS_DOT_H__ */ diff --git a/fs/dlm/main.c b/fs/dlm/main.c index afc66a1346d3..1c5be4b70ac1 100644 --- a/fs/dlm/main.c +++ b/fs/dlm/main.c @@ -19,6 +19,9 @@ #include "config.h" #include "lowcomms.h" +#define CREATE_TRACE_POINTS +#include <trace/events/dlm.h> + static int __init init_dlm(void) { int error; diff --git a/fs/dlm/member.c b/fs/dlm/member.c index 731d489aa323..2af2ccfe43a9 100644 --- a/fs/dlm/member.c +++ b/fs/dlm/member.c @@ -20,7 +20,7 @@ int dlm_slots_version(struct dlm_header *h) { - if ((h->h_version & 0x0000FFFF) < DLM_HEADER_SLOTS) + if ((le32_to_cpu(h->h_version) & 0x0000FFFF) < DLM_HEADER_SLOTS) return 0; return 1; } @@ -120,18 +120,13 @@ int dlm_slots_copy_in(struct dlm_ls *ls) ro0 = (struct rcom_slot *)(rc->rc_buf + sizeof(struct rcom_config)); - for (i = 0, ro = ro0; i < num_slots; i++, ro++) { - ro->ro_nodeid = le32_to_cpu(ro->ro_nodeid); - ro->ro_slot = le16_to_cpu(ro->ro_slot); - } - log_slots(ls, gen, num_slots, ro0, NULL, 0); list_for_each_entry(memb, &ls->ls_nodes, list) { for (i = 0, ro = ro0; i < num_slots; i++, ro++) { - if (ro->ro_nodeid != memb->nodeid) + if (le32_to_cpu(ro->ro_nodeid) != memb->nodeid) continue; - memb->slot = ro->ro_slot; + memb->slot = le16_to_cpu(ro->ro_slot); memb->slot_prev = memb->slot; break; } @@ -442,8 +437,7 @@ static int ping_members(struct dlm_ls *ls) int error = 0; list_for_each_entry(memb, &ls->ls_nodes, list) { - error = dlm_recovery_stopped(ls); - if (error) { + if (dlm_recovery_stopped(ls)) { error = -EINTR; break; } @@ -540,7 +534,11 @@ int dlm_recover_members(struct dlm_ls *ls, struct dlm_recover *rv, int *neg_out) int i, error, neg = 0, low = -1; /* previously removed members that we've not finished removing need to - count as a negative change so the "neg" recovery steps will happen */ + * count as a negative change so the "neg" recovery steps will happen + * + * This functionality must report all member changes to lsops or + * midcomms layer and must never return before. + */ list_for_each_entry(memb, &ls->ls_nodes_gone, list) { log_rinfo(ls, "prev removed member %d", memb->nodeid); @@ -589,19 +587,6 @@ int dlm_recover_members(struct dlm_ls *ls, struct dlm_recover *rv, int *neg_out) *neg_out = neg; error = ping_members(ls); - /* error -EINTR means that a new recovery action is triggered. - * We ignore this recovery action and let run the new one which might - * have new member configuration. - */ - if (error == -EINTR) - error = 0; - - /* new_lockspace() may be waiting to know if the config - * is good or bad - */ - ls->ls_members_result = error; - complete(&ls->ls_members_done); - log_rinfo(ls, "dlm_recover_members %d nodes", ls->ls_num_nodes); return error; } @@ -681,7 +666,16 @@ int dlm_ls_stop(struct dlm_ls *ls) if (!ls->ls_recover_begin) ls->ls_recover_begin = jiffies; - dlm_lsop_recover_prep(ls); + /* call recover_prep ops only once and not multiple times + * for each possible dlm_ls_stop() when recovery is already + * stopped. + * + * If we successful was able to clear LSFL_RUNNING bit and + * it was set we know it is the first dlm_ls_stop() call. + */ + if (new) + dlm_lsop_recover_prep(ls); + return 0; } diff --git a/fs/dlm/memory.c b/fs/dlm/memory.c index 5918f4d39586..ce35c3c19aeb 100644 --- a/fs/dlm/memory.c +++ b/fs/dlm/memory.c @@ -10,32 +10,61 @@ ******************************************************************************/ #include "dlm_internal.h" +#include "midcomms.h" +#include "lowcomms.h" #include "config.h" #include "memory.h" +static struct kmem_cache *writequeue_cache; +static struct kmem_cache *mhandle_cache; +static struct kmem_cache *msg_cache; static struct kmem_cache *lkb_cache; static struct kmem_cache *rsb_cache; int __init dlm_memory_init(void) { + writequeue_cache = dlm_lowcomms_writequeue_cache_create(); + if (!writequeue_cache) + goto out; + + mhandle_cache = dlm_midcomms_cache_create(); + if (!mhandle_cache) + goto mhandle; + lkb_cache = kmem_cache_create("dlm_lkb", sizeof(struct dlm_lkb), __alignof__(struct dlm_lkb), 0, NULL); if (!lkb_cache) - return -ENOMEM; + goto lkb; + + msg_cache = dlm_lowcomms_msg_cache_create(); + if (!msg_cache) + goto msg; rsb_cache = kmem_cache_create("dlm_rsb", sizeof(struct dlm_rsb), __alignof__(struct dlm_rsb), 0, NULL); - if (!rsb_cache) { - kmem_cache_destroy(lkb_cache); - return -ENOMEM; - } + if (!rsb_cache) + goto rsb; return 0; + +rsb: + kmem_cache_destroy(msg_cache); +msg: + kmem_cache_destroy(lkb_cache); +lkb: + kmem_cache_destroy(mhandle_cache); +mhandle: + kmem_cache_destroy(writequeue_cache); +out: + return -ENOMEM; } void dlm_memory_exit(void) { + kmem_cache_destroy(writequeue_cache); + kmem_cache_destroy(mhandle_cache); + kmem_cache_destroy(msg_cache); kmem_cache_destroy(lkb_cache); kmem_cache_destroy(rsb_cache); } @@ -89,3 +118,32 @@ void dlm_free_lkb(struct dlm_lkb *lkb) kmem_cache_free(lkb_cache, lkb); } +struct dlm_mhandle *dlm_allocate_mhandle(void) +{ + return kmem_cache_alloc(mhandle_cache, GFP_NOFS); +} + +void dlm_free_mhandle(struct dlm_mhandle *mhandle) +{ + kmem_cache_free(mhandle_cache, mhandle); +} + +struct writequeue_entry *dlm_allocate_writequeue(void) +{ + return kmem_cache_alloc(writequeue_cache, GFP_ATOMIC); +} + +void dlm_free_writequeue(struct writequeue_entry *writequeue) +{ + kmem_cache_free(writequeue_cache, writequeue); +} + +struct dlm_msg *dlm_allocate_msg(gfp_t allocation) +{ + return kmem_cache_alloc(msg_cache, allocation); +} + +void dlm_free_msg(struct dlm_msg *msg) +{ + kmem_cache_free(msg_cache, msg); +} diff --git a/fs/dlm/memory.h b/fs/dlm/memory.h index 4f218ea4b187..7bd3f1a391ca 100644 --- a/fs/dlm/memory.h +++ b/fs/dlm/memory.h @@ -20,6 +20,12 @@ struct dlm_lkb *dlm_allocate_lkb(struct dlm_ls *ls); void dlm_free_lkb(struct dlm_lkb *l); char *dlm_allocate_lvb(struct dlm_ls *ls); void dlm_free_lvb(char *l); +struct dlm_mhandle *dlm_allocate_mhandle(void); +void dlm_free_mhandle(struct dlm_mhandle *mhandle); +struct writequeue_entry *dlm_allocate_writequeue(void); +void dlm_free_writequeue(struct writequeue_entry *writequeue); +struct dlm_msg *dlm_allocate_msg(gfp_t allocation); +void dlm_free_msg(struct dlm_msg *msg); #endif /* __MEMORY_DOT_H__ */ diff --git a/fs/dlm/midcomms.c b/fs/dlm/midcomms.c index 7ae39ec8d9b0..6489bc22ad61 100644 --- a/fs/dlm/midcomms.c +++ b/fs/dlm/midcomms.c @@ -135,8 +135,10 @@ #include <net/tcp.h> #include "dlm_internal.h" +#include "lockspace.h" #include "lowcomms.h" #include "config.h" +#include "memory.h" #include "lock.h" #include "util.h" #include "midcomms.h" @@ -220,6 +222,12 @@ DEFINE_STATIC_SRCU(nodes_srcu); */ static DEFINE_MUTEX(close_lock); +struct kmem_cache *dlm_midcomms_cache_create(void) +{ + return kmem_cache_create("dlm_mhandle", sizeof(struct dlm_mhandle), + 0, 0, NULL); +} + static inline const char *dlm_state_str(int state) { switch (state) { @@ -279,7 +287,7 @@ static void dlm_mhandle_release(struct rcu_head *rcu) struct dlm_mhandle *mh = container_of(rcu, struct dlm_mhandle, rcu); dlm_lowcomms_put_msg(mh->msg); - kfree(mh); + dlm_free_mhandle(mh); } static void dlm_mhandle_delete(struct midcomms_node *node, @@ -373,13 +381,12 @@ static int dlm_send_ack(int nodeid, uint32_t seq) m_header = (struct dlm_header *)ppc; - m_header->h_version = (DLM_HEADER_MAJOR | DLM_HEADER_MINOR); - m_header->h_nodeid = dlm_our_nodeid(); - m_header->h_length = mb_len; + m_header->h_version = cpu_to_le32(DLM_HEADER_MAJOR | DLM_HEADER_MINOR); + m_header->h_nodeid = cpu_to_le32(dlm_our_nodeid()); + m_header->h_length = cpu_to_le16(mb_len); m_header->h_cmd = DLM_ACK; - m_header->u.h_seq = seq; + m_header->u.h_seq = cpu_to_le32(seq); - header_out(m_header); dlm_lowcomms_commit_msg(msg); dlm_lowcomms_put_msg(msg); @@ -402,13 +409,11 @@ static int dlm_send_fin(struct midcomms_node *node, m_header = (struct dlm_header *)ppc; - m_header->h_version = (DLM_HEADER_MAJOR | DLM_HEADER_MINOR); - m_header->h_nodeid = dlm_our_nodeid(); - m_header->h_length = mb_len; + m_header->h_version = cpu_to_le32(DLM_HEADER_MAJOR | DLM_HEADER_MINOR); + m_header->h_nodeid = cpu_to_le32(dlm_our_nodeid()); + m_header->h_length = cpu_to_le16(mb_len); m_header->h_cmd = DLM_FIN; - header_out(m_header); - pr_debug("sending fin msg to node %d\n", node->nodeid); dlm_midcomms_commit_mhandle(mh); set_bit(DLM_NODE_FLAG_STOP_TX, &node->flags); @@ -567,14 +572,14 @@ dlm_midcomms_recv_node_lookup(int nodeid, const union dlm_packet *p, return NULL; } - switch (le32_to_cpu(p->rcom.rc_type)) { - case DLM_RCOM_NAMES: + switch (p->rcom.rc_type) { + case cpu_to_le32(DLM_RCOM_NAMES): fallthrough; - case DLM_RCOM_NAMES_REPLY: + case cpu_to_le32(DLM_RCOM_NAMES_REPLY): fallthrough; - case DLM_RCOM_STATUS: + case cpu_to_le32(DLM_RCOM_STATUS): fallthrough; - case DLM_RCOM_STATUS_REPLY: + case cpu_to_le32(DLM_RCOM_STATUS_REPLY): node = nodeid2node(nodeid, 0); if (node) { spin_lock(&node->state_lock); @@ -734,14 +739,14 @@ static void dlm_midcomms_receive_buffer_3_2(union dlm_packet *p, int nodeid) * * length already checked. */ - switch (le32_to_cpu(p->rcom.rc_type)) { - case DLM_RCOM_NAMES: + switch (p->rcom.rc_type) { + case cpu_to_le32(DLM_RCOM_NAMES): fallthrough; - case DLM_RCOM_NAMES_REPLY: + case cpu_to_le32(DLM_RCOM_NAMES_REPLY): fallthrough; - case DLM_RCOM_STATUS: + case cpu_to_le32(DLM_RCOM_STATUS): fallthrough; - case DLM_RCOM_STATUS_REPLY: + case cpu_to_le32(DLM_RCOM_STATUS_REPLY): break; default: log_print("unsupported rcom type received: %u, will skip this message from node %d", @@ -909,11 +914,11 @@ int dlm_process_incoming_buffer(int nodeid, unsigned char *buf, int len) if (msglen > len) break; - switch (le32_to_cpu(hd->h_version)) { - case DLM_VERSION_3_1: + switch (hd->h_version) { + case cpu_to_le32(DLM_VERSION_3_1): dlm_midcomms_receive_buffer_3_1((union dlm_packet *)ptr, nodeid); break; - case DLM_VERSION_3_2: + case cpu_to_le32(DLM_VERSION_3_2): dlm_midcomms_receive_buffer_3_2((union dlm_packet *)ptr, nodeid); break; default: @@ -969,7 +974,7 @@ void dlm_midcomms_receive_done(int nodeid) spin_unlock(&node->state_lock); /* do nothing FIN has it's own ack send */ break; - }; + } srcu_read_unlock(&nodes_srcu, idx); } @@ -1013,15 +1018,16 @@ static void dlm_fill_opts_header(struct dlm_opts *opts, uint16_t inner_len, uint32_t seq) { opts->o_header.h_cmd = DLM_OPTS; - opts->o_header.h_version = (DLM_HEADER_MAJOR | DLM_HEADER_MINOR); - opts->o_header.h_nodeid = dlm_our_nodeid(); - opts->o_header.h_length = DLM_MIDCOMMS_OPT_LEN + inner_len; - opts->o_header.u.h_seq = seq; - header_out(&opts->o_header); + opts->o_header.h_version = cpu_to_le32(DLM_HEADER_MAJOR | DLM_HEADER_MINOR); + opts->o_header.h_nodeid = cpu_to_le32(dlm_our_nodeid()); + opts->o_header.h_length = cpu_to_le16(DLM_MIDCOMMS_OPT_LEN + inner_len); + opts->o_header.u.h_seq = cpu_to_le32(seq); } -static void midcomms_new_msg_cb(struct dlm_mhandle *mh) +static void midcomms_new_msg_cb(void *data) { + struct dlm_mhandle *mh = data; + atomic_inc(&mh->node->send_queue_cnt); spin_lock(&mh->node->send_queue_lock); @@ -1053,6 +1059,10 @@ static struct dlm_msg *dlm_midcomms_get_msg_3_2(struct dlm_mhandle *mh, int node return msg; } +/* avoid false positive for nodes_srcu, unlock happens in + * dlm_midcomms_commit_mhandle which is a must call if success + */ +#ifndef __CHECKER__ struct dlm_mhandle *dlm_midcomms_get_mhandle(int nodeid, int len, gfp_t allocation, char **ppc) { @@ -1071,10 +1081,12 @@ struct dlm_mhandle *dlm_midcomms_get_mhandle(int nodeid, int len, /* this is a bug, however we going on and hope it will be resolved */ WARN_ON(test_bit(DLM_NODE_FLAG_STOP_TX, &node->flags)); - mh = kzalloc(sizeof(*mh), GFP_NOFS); + mh = dlm_allocate_mhandle(); if (!mh) goto err; + mh->committed = false; + mh->ack_rcv = NULL; mh->idx = idx; mh->node = node; @@ -1083,7 +1095,7 @@ struct dlm_mhandle *dlm_midcomms_get_mhandle(int nodeid, int len, msg = dlm_lowcomms_new_msg(nodeid, len, allocation, ppc, NULL, NULL); if (!msg) { - kfree(mh); + dlm_free_mhandle(mh); goto err; } @@ -1092,13 +1104,13 @@ struct dlm_mhandle *dlm_midcomms_get_mhandle(int nodeid, int len, msg = dlm_midcomms_get_msg_3_2(mh, nodeid, len, allocation, ppc); if (!msg) { - kfree(mh); + dlm_free_mhandle(mh); goto err; } break; default: - kfree(mh); + dlm_free_mhandle(mh); WARN_ON(1); goto err; } @@ -1116,6 +1128,7 @@ err: srcu_read_unlock(&nodes_srcu, idx); return NULL; } +#endif static void dlm_midcomms_commit_msg_3_2(struct dlm_mhandle *mh) { @@ -1125,6 +1138,10 @@ static void dlm_midcomms_commit_msg_3_2(struct dlm_mhandle *mh) dlm_lowcomms_commit_msg(mh->msg); } +/* avoid false positive for nodes_srcu, lock was happen in + * dlm_midcomms_get_mhandle + */ +#ifndef __CHECKER__ void dlm_midcomms_commit_mhandle(struct dlm_mhandle *mh) { switch (mh->node->version) { @@ -1134,7 +1151,7 @@ void dlm_midcomms_commit_mhandle(struct dlm_mhandle *mh) dlm_lowcomms_commit_msg(mh->msg); dlm_lowcomms_put_msg(mh->msg); /* mh is not part of rcu list in this case */ - kfree(mh); + dlm_free_mhandle(mh); break; case DLM_VERSION_3_2: dlm_midcomms_commit_msg_3_2(mh); @@ -1146,6 +1163,7 @@ void dlm_midcomms_commit_mhandle(struct dlm_mhandle *mh) break; } } +#endif int dlm_midcomms_start(void) { @@ -1231,7 +1249,7 @@ void dlm_midcomms_add_member(int nodeid) } node->users++; - pr_debug("users inc count %d\n", node->users); + pr_debug("node %d users inc count %d\n", nodeid, node->users); spin_unlock(&node->state_lock); srcu_read_unlock(&nodes_srcu, idx); @@ -1254,7 +1272,7 @@ void dlm_midcomms_remove_member(int nodeid) spin_lock(&node->state_lock); node->users--; - pr_debug("users dec count %d\n", node->users); + pr_debug("node %d users dec count %d\n", nodeid, node->users); /* hitting users count to zero means the * other side is running dlm_midcomms_stop() @@ -1395,6 +1413,8 @@ int dlm_midcomms_close(int nodeid) if (nodeid == dlm_our_nodeid()) return 0; + dlm_stop_lockspaces_check(); + idx = srcu_read_lock(&nodes_srcu); /* Abort pending close/remove operation */ node = nodeid2node(nodeid, 0); @@ -1425,3 +1445,51 @@ int dlm_midcomms_close(int nodeid) return ret; } + +/* debug functionality to send raw dlm msg from user space */ +struct dlm_rawmsg_data { + struct midcomms_node *node; + void *buf; +}; + +static void midcomms_new_rawmsg_cb(void *data) +{ + struct dlm_rawmsg_data *rd = data; + struct dlm_header *h = rd->buf; + + switch (h->h_version) { + case cpu_to_le32(DLM_VERSION_3_1): + break; + default: + switch (h->h_cmd) { + case DLM_OPTS: + if (!h->u.h_seq) + h->u.h_seq = cpu_to_le32(rd->node->seq_send++); + break; + default: + break; + } + break; + } +} + +int dlm_midcomms_rawmsg_send(struct midcomms_node *node, void *buf, + int buflen) +{ + struct dlm_rawmsg_data rd; + struct dlm_msg *msg; + char *msgbuf; + + rd.node = node; + rd.buf = buf; + + msg = dlm_lowcomms_new_msg(node->nodeid, buflen, GFP_NOFS, + &msgbuf, midcomms_new_rawmsg_cb, &rd); + if (!msg) + return -ENOMEM; + + memcpy(msgbuf, buf, buflen); + dlm_lowcomms_commit_msg(msg); + return 0; +} + diff --git a/fs/dlm/midcomms.h b/fs/dlm/midcomms.h index 579abc6929be..82bcd9661922 100644 --- a/fs/dlm/midcomms.h +++ b/fs/dlm/midcomms.h @@ -28,6 +28,9 @@ const char *dlm_midcomms_state(struct midcomms_node *node); unsigned long dlm_midcomms_flags(struct midcomms_node *node); int dlm_midcomms_send_queue_cnt(struct midcomms_node *node); uint32_t dlm_midcomms_version(struct midcomms_node *node); +int dlm_midcomms_rawmsg_send(struct midcomms_node *node, void *buf, + int buflen); +struct kmem_cache *dlm_midcomms_cache_create(void); #endif /* __MIDCOMMS_DOT_H__ */ diff --git a/fs/dlm/netlink.c b/fs/dlm/netlink.c index 67f68d48d60c..4de4b8651c6c 100644 --- a/fs/dlm/netlink.c +++ b/fs/dlm/netlink.c @@ -75,6 +75,7 @@ static struct genl_family family __ro_after_init = { .version = DLM_GENL_VERSION, .small_ops = dlm_nl_ops, .n_small_ops = ARRAY_SIZE(dlm_nl_ops), + .resv_start_op = DLM_CMD_HELLO + 1, .module = THIS_MODULE, }; diff --git a/fs/dlm/plock.c b/fs/dlm/plock.c index c38b2b8ffd1d..737f185aad8d 100644 --- a/fs/dlm/plock.c +++ b/fs/dlm/plock.c @@ -13,26 +13,28 @@ #include "dlm_internal.h" #include "lockspace.h" -static spinlock_t ops_lock; -static struct list_head send_list; -static struct list_head recv_list; -static wait_queue_head_t send_wq; -static wait_queue_head_t recv_wq; +static DEFINE_SPINLOCK(ops_lock); +static LIST_HEAD(send_list); +static LIST_HEAD(recv_list); +static DECLARE_WAIT_QUEUE_HEAD(send_wq); +static DECLARE_WAIT_QUEUE_HEAD(recv_wq); -struct plock_op { - struct list_head list; - int done; - struct dlm_plock_info info; -}; - -struct plock_xop { - struct plock_op xop; - int (*callback)(struct file_lock *fl, int result); +struct plock_async_data { void *fl; void *file; struct file_lock flc; + int (*callback)(struct file_lock *fl, int result); }; +struct plock_op { + struct list_head list; + int done; + /* if lock op got interrupted while waiting dlm_controld reply */ + bool sigint; + struct dlm_plock_info info; + /* if set indicates async handling */ + struct plock_async_data *data; +}; static inline void set_version(struct dlm_plock_info *info) { @@ -58,10 +60,15 @@ static int check_version(struct dlm_plock_info *info) return 0; } +static void dlm_release_plock_op(struct plock_op *op) +{ + kfree(op->data); + kfree(op); +} + static void send_op(struct plock_op *op) { set_version(&op->info); - INIT_LIST_HEAD(&op->list); spin_lock(&ops_lock); list_add_tail(&op->list, &send_list); spin_unlock(&ops_lock); @@ -74,8 +81,7 @@ static void send_op(struct plock_op *op) abandoned waiter. So, we have to insert the unlock-close when the lock call is interrupted. */ -static void do_unlock_close(struct dlm_ls *ls, u64 number, - struct file *file, struct file_lock *fl) +static void do_unlock_close(const struct dlm_plock_info *info) { struct plock_op *op; @@ -84,15 +90,12 @@ static void do_unlock_close(struct dlm_ls *ls, u64 number, return; op->info.optype = DLM_PLOCK_OP_UNLOCK; - op->info.pid = fl->fl_pid; - op->info.fsid = ls->ls_global_id; - op->info.number = number; + op->info.pid = info->pid; + op->info.fsid = info->fsid; + op->info.number = info->number; op->info.start = 0; op->info.end = OFFSET_MAX; - if (fl->fl_lmops && fl->fl_lmops->lm_grant) - op->info.owner = (__u64) fl->fl_pid; - else - op->info.owner = (__u64)(long) fl->fl_owner; + op->info.owner = info->owner; op->info.flags |= DLM_PLOCK_FL_CLOSE; send_op(op); @@ -101,22 +104,21 @@ static void do_unlock_close(struct dlm_ls *ls, u64 number, int dlm_posix_lock(dlm_lockspace_t *lockspace, u64 number, struct file *file, int cmd, struct file_lock *fl) { + struct plock_async_data *op_data; struct dlm_ls *ls; struct plock_op *op; - struct plock_xop *xop; int rv; ls = dlm_find_lockspace_local(lockspace); if (!ls) return -EINVAL; - xop = kzalloc(sizeof(*xop), GFP_NOFS); - if (!xop) { + op = kzalloc(sizeof(*op), GFP_NOFS); + if (!op) { rv = -ENOMEM; goto out; } - op = &xop->xop; op->info.optype = DLM_PLOCK_OP_LOCK; op->info.pid = fl->fl_pid; op->info.ex = (fl->fl_type == F_WRLCK); @@ -125,46 +127,57 @@ int dlm_posix_lock(dlm_lockspace_t *lockspace, u64 number, struct file *file, op->info.number = number; op->info.start = fl->fl_start; op->info.end = fl->fl_end; + /* async handling */ if (fl->fl_lmops && fl->fl_lmops->lm_grant) { + op_data = kzalloc(sizeof(*op_data), GFP_NOFS); + if (!op_data) { + dlm_release_plock_op(op); + rv = -ENOMEM; + goto out; + } + /* fl_owner is lockd which doesn't distinguish processes on the nfs client */ op->info.owner = (__u64) fl->fl_pid; - xop->callback = fl->fl_lmops->lm_grant; - locks_init_lock(&xop->flc); - locks_copy_lock(&xop->flc, fl); - xop->fl = fl; - xop->file = file; + op_data->callback = fl->fl_lmops->lm_grant; + locks_init_lock(&op_data->flc); + locks_copy_lock(&op_data->flc, fl); + op_data->fl = fl; + op_data->file = file; + + op->data = op_data; + + send_op(op); + rv = FILE_LOCK_DEFERRED; + goto out; } else { op->info.owner = (__u64)(long) fl->fl_owner; - xop->callback = NULL; } send_op(op); - if (xop->callback == NULL) { - rv = wait_event_interruptible(recv_wq, (op->done != 0)); - if (rv == -ERESTARTSYS) { - log_debug(ls, "dlm_posix_lock: wait killed %llx", - (unsigned long long)number); - spin_lock(&ops_lock); - list_del(&op->list); + rv = wait_event_interruptible(recv_wq, (op->done != 0)); + if (rv == -ERESTARTSYS) { + spin_lock(&ops_lock); + /* recheck under ops_lock if we got a done != 0, + * if so this interrupt case should be ignored + */ + if (op->done != 0) { spin_unlock(&ops_lock); - kfree(xop); - do_unlock_close(ls, number, file, fl); - goto out; + goto do_lock_wait; } - } else { - rv = FILE_LOCK_DEFERRED; + + op->sigint = true; + spin_unlock(&ops_lock); + log_debug(ls, "%s: wait interrupted %x %llx pid %d", + __func__, ls->ls_global_id, + (unsigned long long)number, op->info.pid); goto out; } - spin_lock(&ops_lock); - if (!list_empty(&op->list)) { - log_error(ls, "dlm_posix_lock: op on list %llx", - (unsigned long long)number); - list_del(&op->list); - } - spin_unlock(&ops_lock); +do_lock_wait: + + WARN_ON(!list_empty(&op->list)); rv = op->info.rv; @@ -174,7 +187,7 @@ int dlm_posix_lock(dlm_lockspace_t *lockspace, u64 number, struct file *file, (unsigned long long)number); } - kfree(xop); + dlm_release_plock_op(op); out: dlm_put_lockspace(ls); return rv; @@ -184,26 +197,20 @@ EXPORT_SYMBOL_GPL(dlm_posix_lock); /* Returns failure iff a successful lock operation should be canceled */ static int dlm_plock_callback(struct plock_op *op) { + struct plock_async_data *op_data = op->data; struct file *file; struct file_lock *fl; struct file_lock *flc; int (*notify)(struct file_lock *fl, int result) = NULL; - struct plock_xop *xop = (struct plock_xop *)op; int rv = 0; - spin_lock(&ops_lock); - if (!list_empty(&op->list)) { - log_print("dlm_plock_callback: op on list %llx", - (unsigned long long)op->info.number); - list_del(&op->list); - } - spin_unlock(&ops_lock); + WARN_ON(!list_empty(&op->list)); /* check if the following 2 are still valid or make a copy */ - file = xop->file; - flc = &xop->flc; - fl = xop->fl; - notify = xop->callback; + file = op_data->file; + flc = &op_data->flc; + fl = op_data->fl; + notify = op_data->callback; if (op->info.rv) { notify(fl, op->info.rv); @@ -234,7 +241,7 @@ static int dlm_plock_callback(struct plock_op *op) } out: - kfree(xop); + dlm_release_plock_op(op); return rv; } @@ -290,13 +297,7 @@ int dlm_posix_unlock(dlm_lockspace_t *lockspace, u64 number, struct file *file, send_op(op); wait_event(recv_wq, (op->done != 0)); - spin_lock(&ops_lock); - if (!list_empty(&op->list)) { - log_error(ls, "dlm_posix_unlock: op on list %llx", - (unsigned long long)number); - list_del(&op->list); - } - spin_unlock(&ops_lock); + WARN_ON(!list_empty(&op->list)); rv = op->info.rv; @@ -304,7 +305,7 @@ int dlm_posix_unlock(dlm_lockspace_t *lockspace, u64 number, struct file *file, rv = 0; out_free: - kfree(op); + dlm_release_plock_op(op); out: dlm_put_lockspace(ls); fl->fl_flags = fl_flags; @@ -344,13 +345,7 @@ int dlm_posix_get(dlm_lockspace_t *lockspace, u64 number, struct file *file, send_op(op); wait_event(recv_wq, (op->done != 0)); - spin_lock(&ops_lock); - if (!list_empty(&op->list)) { - log_error(ls, "dlm_posix_get: op on list %llx", - (unsigned long long)number); - list_del(&op->list); - } - spin_unlock(&ops_lock); + WARN_ON(!list_empty(&op->list)); /* info.rv from userspace is 1 for conflict, 0 for no-conflict, -ENOENT if there are no locks on the file */ @@ -370,7 +365,7 @@ int dlm_posix_get(dlm_lockspace_t *lockspace, u64 number, struct file *file, rv = 0; } - kfree(op); + dlm_release_plock_op(op); out: dlm_put_lockspace(ls); return rv; @@ -389,7 +384,7 @@ static ssize_t dev_read(struct file *file, char __user *u, size_t count, spin_lock(&ops_lock); if (!list_empty(&send_list)) { - op = list_entry(send_list.next, struct plock_op, list); + op = list_first_entry(&send_list, struct plock_op, list); if (op->info.flags & DLM_PLOCK_FL_CLOSE) list_del(&op->list); else @@ -406,7 +401,7 @@ static ssize_t dev_read(struct file *file, char __user *u, size_t count, (the process did not make an unlock call). */ if (op->info.flags & DLM_PLOCK_FL_CLOSE) - kfree(op); + dlm_release_plock_op(op); if (copy_to_user(u, &info, sizeof(info))) return -EFAULT; @@ -418,9 +413,9 @@ static ssize_t dev_read(struct file *file, char __user *u, size_t count, static ssize_t dev_write(struct file *file, const char __user *u, size_t count, loff_t *ppos) { + struct plock_op *op = NULL, *iter; struct dlm_plock_info info; - struct plock_op *op; - int found = 0, do_callback = 0; + int do_callback = 0; if (count != sizeof(info)) return -EINVAL; @@ -432,31 +427,43 @@ static ssize_t dev_write(struct file *file, const char __user *u, size_t count, return -EINVAL; spin_lock(&ops_lock); - list_for_each_entry(op, &recv_list, list) { - if (op->info.fsid == info.fsid && - op->info.number == info.number && - op->info.owner == info.owner) { - struct plock_xop *xop = (struct plock_xop *)op; - list_del_init(&op->list); - memcpy(&op->info, &info, sizeof(info)); - if (xop->callback) + list_for_each_entry(iter, &recv_list, list) { + if (iter->info.fsid == info.fsid && + iter->info.number == info.number && + iter->info.owner == info.owner) { + if (iter->sigint) { + list_del(&iter->list); + spin_unlock(&ops_lock); + + pr_debug("%s: sigint cleanup %x %llx pid %d", + __func__, iter->info.fsid, + (unsigned long long)iter->info.number, + iter->info.pid); + do_unlock_close(&iter->info); + memcpy(&iter->info, &info, sizeof(info)); + dlm_release_plock_op(iter); + return count; + } + list_del_init(&iter->list); + memcpy(&iter->info, &info, sizeof(info)); + if (iter->data) do_callback = 1; else - op->done = 1; - found = 1; + iter->done = 1; + op = iter; break; } } spin_unlock(&ops_lock); - if (found) { + if (op) { if (do_callback) dlm_plock_callback(op); else wake_up(&recv_wq); } else - log_print("dev_write no op %x %llx", info.fsid, - (unsigned long long)info.number); + log_print("%s: no op %x %llx", __func__, + info.fsid, (unsigned long long)info.number); return count; } @@ -492,12 +499,6 @@ int dlm_plock_init(void) { int rv; - spin_lock_init(&ops_lock); - INIT_LIST_HEAD(&send_list); - INIT_LIST_HEAD(&recv_list); - init_waitqueue_head(&send_wq); - init_waitqueue_head(&recv_wq); - rv = misc_register(&plock_dev_misc); if (rv) log_print("dlm_plock_init: misc_register failed %d", rv); diff --git a/fs/dlm/rcom.c b/fs/dlm/rcom.c index 6cba86470278..f19860315043 100644 --- a/fs/dlm/rcom.c +++ b/fs/dlm/rcom.c @@ -34,16 +34,16 @@ static void _create_rcom(struct dlm_ls *ls, int to_nodeid, int type, int len, rc = (struct dlm_rcom *) mb; - rc->rc_header.h_version = (DLM_HEADER_MAJOR | DLM_HEADER_MINOR); - rc->rc_header.u.h_lockspace = ls->ls_global_id; - rc->rc_header.h_nodeid = dlm_our_nodeid(); - rc->rc_header.h_length = mb_len; + rc->rc_header.h_version = cpu_to_le32(DLM_HEADER_MAJOR | DLM_HEADER_MINOR); + rc->rc_header.u.h_lockspace = cpu_to_le32(ls->ls_global_id); + rc->rc_header.h_nodeid = cpu_to_le32(dlm_our_nodeid()); + rc->rc_header.h_length = cpu_to_le16(mb_len); rc->rc_header.h_cmd = DLM_RCOM; - rc->rc_type = type; + rc->rc_type = cpu_to_le32(type); spin_lock(&ls->ls_recover_lock); - rc->rc_seq = ls->ls_recover_seq; + rc->rc_seq = cpu_to_le64(ls->ls_recover_seq); spin_unlock(&ls->ls_recover_lock); *rc_ret = rc; @@ -91,13 +91,11 @@ static int create_rcom_stateless(struct dlm_ls *ls, int to_nodeid, int type, static void send_rcom(struct dlm_mhandle *mh, struct dlm_rcom *rc) { - dlm_rcom_out(rc); dlm_midcomms_commit_mhandle(mh); } static void send_rcom_stateless(struct dlm_msg *msg, struct dlm_rcom *rc) { - dlm_rcom_out(rc); dlm_lowcomms_commit_msg(msg); dlm_lowcomms_put_msg(msg); } @@ -127,10 +125,10 @@ static int check_rcom_config(struct dlm_ls *ls, struct dlm_rcom *rc, int nodeid) { struct rcom_config *rf = (struct rcom_config *) rc->rc_buf; - if ((rc->rc_header.h_version & 0xFFFF0000) != DLM_HEADER_MAJOR) { + if ((le32_to_cpu(rc->rc_header.h_version) & 0xFFFF0000) != DLM_HEADER_MAJOR) { log_error(ls, "version mismatch: %x nodeid %d: %x", DLM_HEADER_MAJOR | DLM_HEADER_MINOR, nodeid, - rc->rc_header.h_version); + le32_to_cpu(rc->rc_header.h_version)); return -EPROTO; } @@ -145,10 +143,10 @@ static int check_rcom_config(struct dlm_ls *ls, struct dlm_rcom *rc, int nodeid) return 0; } -static void allow_sync_reply(struct dlm_ls *ls, uint64_t *new_seq) +static void allow_sync_reply(struct dlm_ls *ls, __le64 *new_seq) { spin_lock(&ls->ls_rcom_spin); - *new_seq = ++ls->ls_rcom_seq; + *new_seq = cpu_to_le64(++ls->ls_rcom_seq); set_bit(LSFL_RCOM_WAIT, &ls->ls_flags); spin_unlock(&ls->ls_rcom_spin); } @@ -182,7 +180,7 @@ int dlm_rcom_status(struct dlm_ls *ls, int nodeid, uint32_t status_flags) if (nodeid == dlm_our_nodeid()) { rc = ls->ls_recover_buf; - rc->rc_result = dlm_recover_status(ls); + rc->rc_result = cpu_to_le32(dlm_recover_status(ls)); goto out; } @@ -208,7 +206,7 @@ retry: rc = ls->ls_recover_buf; - if (rc->rc_result == -ESRCH) { + if (rc->rc_result == cpu_to_le32(-ESRCH)) { /* we pretend the remote lockspace exists with 0 status */ log_debug(ls, "remote node %d not ready", nodeid); rc->rc_result = 0; @@ -227,7 +225,7 @@ static void receive_rcom_status(struct dlm_ls *ls, struct dlm_rcom *rc_in) struct dlm_rcom *rc; struct rcom_status *rs; uint32_t status; - int nodeid = rc_in->rc_header.h_nodeid; + int nodeid = le32_to_cpu(rc_in->rc_header.h_nodeid); int len = sizeof(struct rcom_config); struct dlm_msg *msg; int num_slots = 0; @@ -259,7 +257,7 @@ static void receive_rcom_status(struct dlm_ls *ls, struct dlm_rcom *rc_in) rc->rc_id = rc_in->rc_id; rc->rc_seq_reply = rc_in->rc_seq; - rc->rc_result = status; + rc->rc_result = cpu_to_le32(status); set_rcom_config(ls, (struct rcom_config *)rc->rc_buf, num_slots); @@ -287,14 +285,16 @@ static void receive_sync_reply(struct dlm_ls *ls, struct dlm_rcom *rc_in) { spin_lock(&ls->ls_rcom_spin); if (!test_bit(LSFL_RCOM_WAIT, &ls->ls_flags) || - rc_in->rc_id != ls->ls_rcom_seq) { + le64_to_cpu(rc_in->rc_id) != ls->ls_rcom_seq) { log_debug(ls, "reject reply %d from %d seq %llx expect %llx", - rc_in->rc_type, rc_in->rc_header.h_nodeid, - (unsigned long long)rc_in->rc_id, + le32_to_cpu(rc_in->rc_type), + le32_to_cpu(rc_in->rc_header.h_nodeid), + (unsigned long long)le64_to_cpu(rc_in->rc_id), (unsigned long long)ls->ls_rcom_seq); goto out; } - memcpy(ls->ls_recover_buf, rc_in, rc_in->rc_header.h_length); + memcpy(ls->ls_recover_buf, rc_in, + le16_to_cpu(rc_in->rc_header.h_length)); set_bit(LSFL_RCOM_READY, &ls->ls_flags); clear_bit(LSFL_RCOM_WAIT, &ls->ls_flags); wake_up(&ls->ls_wait_general); @@ -336,8 +336,9 @@ static void receive_rcom_names(struct dlm_ls *ls, struct dlm_rcom *rc_in) int error, inlen, outlen, nodeid; struct dlm_msg *msg; - nodeid = rc_in->rc_header.h_nodeid; - inlen = rc_in->rc_header.h_length - sizeof(struct dlm_rcom); + nodeid = le32_to_cpu(rc_in->rc_header.h_nodeid); + inlen = le16_to_cpu(rc_in->rc_header.h_length) - + sizeof(struct dlm_rcom); outlen = DLM_MAX_APP_BUFSIZE - sizeof(struct dlm_rcom); error = create_rcom_stateless(ls, nodeid, DLM_RCOM_NAMES_REPLY, outlen, @@ -364,7 +365,7 @@ int dlm_send_rcom_lookup(struct dlm_rsb *r, int dir_nodeid) if (error) goto out; memcpy(rc->rc_buf, r->res_name, r->res_length); - rc->rc_id = (unsigned long) r->res_id; + rc->rc_id = cpu_to_le64(r->res_id); send_rcom(mh, rc); out: @@ -375,11 +376,12 @@ static void receive_rcom_lookup(struct dlm_ls *ls, struct dlm_rcom *rc_in) { struct dlm_rcom *rc; struct dlm_mhandle *mh; - int error, ret_nodeid, nodeid = rc_in->rc_header.h_nodeid; - int len = rc_in->rc_header.h_length - sizeof(struct dlm_rcom); + int error, ret_nodeid, nodeid = le32_to_cpu(rc_in->rc_header.h_nodeid); + int len = le16_to_cpu(rc_in->rc_header.h_length) - + sizeof(struct dlm_rcom); /* Old code would send this special id to trigger a debug dump. */ - if (rc_in->rc_id == 0xFFFFFFFF) { + if (rc_in->rc_id == cpu_to_le64(0xFFFFFFFF)) { log_error(ls, "receive_rcom_lookup dump from %d", nodeid); dlm_dump_rsb_name(ls, rc_in->rc_buf, len); return; @@ -393,7 +395,7 @@ static void receive_rcom_lookup(struct dlm_ls *ls, struct dlm_rcom *rc_in) DLM_LU_RECOVER_MASTER, &ret_nodeid, NULL); if (error) ret_nodeid = error; - rc->rc_result = ret_nodeid; + rc->rc_result = cpu_to_le32(ret_nodeid); rc->rc_id = rc_in->rc_id; rc->rc_seq_reply = rc_in->rc_seq; @@ -452,7 +454,7 @@ int dlm_send_rcom_lock(struct dlm_rsb *r, struct dlm_lkb *lkb) rl = (struct rcom_lock *) rc->rc_buf; pack_rcom_lock(r, lkb, rl); - rc->rc_id = (unsigned long) r; + rc->rc_id = cpu_to_le64((uintptr_t)r); send_rcom(mh, rc); out: @@ -464,7 +466,7 @@ static void receive_rcom_lock(struct dlm_ls *ls, struct dlm_rcom *rc_in) { struct dlm_rcom *rc; struct dlm_mhandle *mh; - int error, nodeid = rc_in->rc_header.h_nodeid; + int error, nodeid = le32_to_cpu(rc_in->rc_header.h_nodeid); dlm_recover_master_copy(ls, rc_in); @@ -500,21 +502,20 @@ int dlm_send_ls_not_ready(int nodeid, struct dlm_rcom *rc_in) rc = (struct dlm_rcom *) mb; - rc->rc_header.h_version = (DLM_HEADER_MAJOR | DLM_HEADER_MINOR); + rc->rc_header.h_version = cpu_to_le32(DLM_HEADER_MAJOR | DLM_HEADER_MINOR); rc->rc_header.u.h_lockspace = rc_in->rc_header.u.h_lockspace; - rc->rc_header.h_nodeid = dlm_our_nodeid(); - rc->rc_header.h_length = mb_len; + rc->rc_header.h_nodeid = cpu_to_le32(dlm_our_nodeid()); + rc->rc_header.h_length = cpu_to_le16(mb_len); rc->rc_header.h_cmd = DLM_RCOM; - rc->rc_type = DLM_RCOM_STATUS_REPLY; + rc->rc_type = cpu_to_le32(DLM_RCOM_STATUS_REPLY); rc->rc_id = rc_in->rc_id; rc->rc_seq_reply = rc_in->rc_seq; - rc->rc_result = -ESRCH; + rc->rc_result = cpu_to_le32(-ESRCH); rf = (struct rcom_config *) rc->rc_buf; rf->rf_lvblen = cpu_to_le32(~0U); - dlm_rcom_out(rc); dlm_midcomms_commit_mhandle(mh); return 0; @@ -573,27 +574,27 @@ void dlm_receive_rcom(struct dlm_ls *ls, struct dlm_rcom *rc, int nodeid) uint64_t seq; switch (rc->rc_type) { - case DLM_RCOM_STATUS_REPLY: + case cpu_to_le32(DLM_RCOM_STATUS_REPLY): reply = 1; break; - case DLM_RCOM_NAMES: + case cpu_to_le32(DLM_RCOM_NAMES): names = 1; break; - case DLM_RCOM_NAMES_REPLY: + case cpu_to_le32(DLM_RCOM_NAMES_REPLY): names = 1; reply = 1; break; - case DLM_RCOM_LOOKUP: + case cpu_to_le32(DLM_RCOM_LOOKUP): lookup = 1; break; - case DLM_RCOM_LOOKUP_REPLY: + case cpu_to_le32(DLM_RCOM_LOOKUP_REPLY): lookup = 1; reply = 1; break; - case DLM_RCOM_LOCK: + case cpu_to_le32(DLM_RCOM_LOCK): lock = 1; break; - case DLM_RCOM_LOCK_REPLY: + case cpu_to_le32(DLM_RCOM_LOCK_REPLY): lock = 1; reply = 1; break; @@ -601,14 +602,14 @@ void dlm_receive_rcom(struct dlm_ls *ls, struct dlm_rcom *rc, int nodeid) spin_lock(&ls->ls_recover_lock); status = ls->ls_recover_status; - stop = test_bit(LSFL_RECOVER_STOP, &ls->ls_flags); + stop = dlm_recovery_stopped(ls); seq = ls->ls_recover_seq; spin_unlock(&ls->ls_recover_lock); - if (stop && (rc->rc_type != DLM_RCOM_STATUS)) + if (stop && (rc->rc_type != cpu_to_le32(DLM_RCOM_STATUS))) goto ignore; - if (reply && (rc->rc_seq_reply != seq)) + if (reply && (le64_to_cpu(rc->rc_seq_reply) != seq)) goto ignore; if (!(status & DLM_RS_NODES) && (names || lookup || lock)) @@ -618,59 +619,60 @@ void dlm_receive_rcom(struct dlm_ls *ls, struct dlm_rcom *rc, int nodeid) goto ignore; switch (rc->rc_type) { - case DLM_RCOM_STATUS: + case cpu_to_le32(DLM_RCOM_STATUS): receive_rcom_status(ls, rc); break; - case DLM_RCOM_NAMES: + case cpu_to_le32(DLM_RCOM_NAMES): receive_rcom_names(ls, rc); break; - case DLM_RCOM_LOOKUP: + case cpu_to_le32(DLM_RCOM_LOOKUP): receive_rcom_lookup(ls, rc); break; - case DLM_RCOM_LOCK: - if (rc->rc_header.h_length < lock_size) + case cpu_to_le32(DLM_RCOM_LOCK): + if (le16_to_cpu(rc->rc_header.h_length) < lock_size) goto Eshort; receive_rcom_lock(ls, rc); break; - case DLM_RCOM_STATUS_REPLY: + case cpu_to_le32(DLM_RCOM_STATUS_REPLY): receive_sync_reply(ls, rc); break; - case DLM_RCOM_NAMES_REPLY: + case cpu_to_le32(DLM_RCOM_NAMES_REPLY): receive_sync_reply(ls, rc); break; - case DLM_RCOM_LOOKUP_REPLY: + case cpu_to_le32(DLM_RCOM_LOOKUP_REPLY): receive_rcom_lookup_reply(ls, rc); break; - case DLM_RCOM_LOCK_REPLY: - if (rc->rc_header.h_length < lock_size) + case cpu_to_le32(DLM_RCOM_LOCK_REPLY): + if (le16_to_cpu(rc->rc_header.h_length) < lock_size) goto Eshort; dlm_recover_process_copy(ls, rc); break; default: - log_error(ls, "receive_rcom bad type %d", rc->rc_type); + log_error(ls, "receive_rcom bad type %d", + le32_to_cpu(rc->rc_type)); } return; ignore: log_limit(ls, "dlm_receive_rcom ignore msg %d " "from %d %llu %llu recover seq %llu sts %x gen %u", - rc->rc_type, + le32_to_cpu(rc->rc_type), nodeid, - (unsigned long long)rc->rc_seq, - (unsigned long long)rc->rc_seq_reply, + (unsigned long long)le64_to_cpu(rc->rc_seq), + (unsigned long long)le64_to_cpu(rc->rc_seq_reply), (unsigned long long)seq, status, ls->ls_generation); return; Eshort: log_error(ls, "recovery message %d from %d is too short", - rc->rc_type, nodeid); + le32_to_cpu(rc->rc_type), nodeid); } diff --git a/fs/dlm/recover.c b/fs/dlm/recover.c index 8928e99dfd47..ccff1791803f 100644 --- a/fs/dlm/recover.c +++ b/fs/dlm/recover.c @@ -114,7 +114,7 @@ static int wait_status_all(struct dlm_ls *ls, uint32_t wait_status, if (save_slots) dlm_slot_save(ls, rc, memb); - if (rc->rc_result & wait_status) + if (le32_to_cpu(rc->rc_result) & wait_status) break; if (delay < 1000) delay += 20; @@ -141,7 +141,7 @@ static int wait_status_low(struct dlm_ls *ls, uint32_t wait_status, if (error) break; - if (rc->rc_result & wait_status) + if (le32_to_cpu(rc->rc_result) & wait_status) break; if (delay < 1000) delay += 20; @@ -568,14 +568,14 @@ int dlm_recover_master_reply(struct dlm_ls *ls, struct dlm_rcom *rc) struct dlm_rsb *r; int ret_nodeid, new_master; - r = recover_idr_find(ls, rc->rc_id); + r = recover_idr_find(ls, le64_to_cpu(rc->rc_id)); if (!r) { log_error(ls, "dlm_recover_master_reply no id %llx", - (unsigned long long)rc->rc_id); + (unsigned long long)le64_to_cpu(rc->rc_id)); goto out; } - ret_nodeid = rc->rc_result; + ret_nodeid = le32_to_cpu(rc->rc_result); if (ret_nodeid == dlm_our_nodeid()) new_master = 0; @@ -732,10 +732,9 @@ void dlm_recovered_lock(struct dlm_rsb *r) static void recover_lvb(struct dlm_rsb *r) { - struct dlm_lkb *lkb, *high_lkb = NULL; + struct dlm_lkb *big_lkb = NULL, *iter, *high_lkb = NULL; uint32_t high_seq = 0; int lock_lvb_exists = 0; - int big_lock_exists = 0; int lvblen = r->res_ls->ls_lvblen; if (!rsb_flag(r, RSB_NEW_MASTER2) && @@ -751,37 +750,37 @@ static void recover_lvb(struct dlm_rsb *r) /* we are the new master, so figure out if VALNOTVALID should be set, and set the rsb lvb from the best lkb available. */ - list_for_each_entry(lkb, &r->res_grantqueue, lkb_statequeue) { - if (!(lkb->lkb_exflags & DLM_LKF_VALBLK)) + list_for_each_entry(iter, &r->res_grantqueue, lkb_statequeue) { + if (!(iter->lkb_exflags & DLM_LKF_VALBLK)) continue; lock_lvb_exists = 1; - if (lkb->lkb_grmode > DLM_LOCK_CR) { - big_lock_exists = 1; + if (iter->lkb_grmode > DLM_LOCK_CR) { + big_lkb = iter; goto setflag; } - if (((int)lkb->lkb_lvbseq - (int)high_seq) >= 0) { - high_lkb = lkb; - high_seq = lkb->lkb_lvbseq; + if (((int)iter->lkb_lvbseq - (int)high_seq) >= 0) { + high_lkb = iter; + high_seq = iter->lkb_lvbseq; } } - list_for_each_entry(lkb, &r->res_convertqueue, lkb_statequeue) { - if (!(lkb->lkb_exflags & DLM_LKF_VALBLK)) + list_for_each_entry(iter, &r->res_convertqueue, lkb_statequeue) { + if (!(iter->lkb_exflags & DLM_LKF_VALBLK)) continue; lock_lvb_exists = 1; - if (lkb->lkb_grmode > DLM_LOCK_CR) { - big_lock_exists = 1; + if (iter->lkb_grmode > DLM_LOCK_CR) { + big_lkb = iter; goto setflag; } - if (((int)lkb->lkb_lvbseq - (int)high_seq) >= 0) { - high_lkb = lkb; - high_seq = lkb->lkb_lvbseq; + if (((int)iter->lkb_lvbseq - (int)high_seq) >= 0) { + high_lkb = iter; + high_seq = iter->lkb_lvbseq; } } @@ -790,7 +789,7 @@ static void recover_lvb(struct dlm_rsb *r) goto out; /* lvb is invalidated if only NL/CR locks remain */ - if (!big_lock_exists) + if (!big_lkb) rsb_set_flag(r, RSB_VALNOTVALID); if (!r->res_lvbptr) { @@ -799,9 +798,9 @@ static void recover_lvb(struct dlm_rsb *r) goto out; } - if (big_lock_exists) { - r->res_lvbseq = lkb->lkb_lvbseq; - memcpy(r->res_lvbptr, lkb->lkb_lvbptr, lvblen); + if (big_lkb) { + r->res_lvbseq = big_lkb->lkb_lvbseq; + memcpy(r->res_lvbptr, big_lkb->lkb_lvbptr, lvblen); } else if (high_lkb) { r->res_lvbseq = high_lkb->lkb_lvbseq; memcpy(r->res_lvbptr, high_lkb->lkb_lvbptr, lvblen); diff --git a/fs/dlm/recoverd.c b/fs/dlm/recoverd.c index 97d052cea5a9..e15eb511b04b 100644 --- a/fs/dlm/recoverd.c +++ b/fs/dlm/recoverd.c @@ -70,6 +70,10 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv) /* * Add or remove nodes from the lockspace's ls_nodes list. + * + * Due to the fact that we must report all membership changes to lsops + * or midcomms layer, it is not permitted to abort ls_recover() until + * this is done. */ error = dlm_recover_members(ls, rv, &neg); @@ -124,8 +128,7 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv) dlm_recover_waiters_pre(ls); - error = dlm_recovery_stopped(ls); - if (error) { + if (dlm_recovery_stopped(ls)) { error = -EINTR; goto fail; } @@ -240,14 +243,12 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv) jiffies_to_msecs(jiffies - start)); mutex_unlock(&ls->ls_recoverd_active); - dlm_lsop_recover_done(ls); return 0; fail: dlm_release_root_list(ls); - log_rinfo(ls, "dlm_recover %llu error %d", - (unsigned long long)rv->seq, error); mutex_unlock(&ls->ls_recoverd_active); + return error; } @@ -258,6 +259,7 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv) static void do_ls_recovery(struct dlm_ls *ls) { struct dlm_recover *rv = NULL; + int error; spin_lock(&ls->ls_recover_lock); rv = ls->ls_recover_args; @@ -267,7 +269,31 @@ static void do_ls_recovery(struct dlm_ls *ls) spin_unlock(&ls->ls_recover_lock); if (rv) { - ls_recover(ls, rv); + error = ls_recover(ls, rv); + switch (error) { + case 0: + ls->ls_recovery_result = 0; + complete(&ls->ls_recovery_done); + + dlm_lsop_recover_done(ls); + break; + case -EINTR: + /* if recovery was interrupted -EINTR we wait for the next + * ls_recover() iteration until it hopefully succeeds. + */ + log_rinfo(ls, "%s %llu interrupted and should be queued to run again", + __func__, (unsigned long long)rv->seq); + break; + default: + log_rinfo(ls, "%s %llu error %d", __func__, + (unsigned long long)rv->seq, error); + + /* let new_lockspace() get aware of critical error */ + ls->ls_recovery_result = error; + complete(&ls->ls_recovery_done); + break; + } + kfree(rv->nodes); kfree(rv); } diff --git a/fs/dlm/requestqueue.c b/fs/dlm/requestqueue.c index e89e0ff8bfa3..036a9a0078f6 100644 --- a/fs/dlm/requestqueue.c +++ b/fs/dlm/requestqueue.c @@ -14,6 +14,7 @@ #include "dir.h" #include "config.h" #include "requestqueue.h" +#include "util.h" struct rq_entry { struct list_head list; @@ -32,7 +33,8 @@ struct rq_entry { void dlm_add_requestqueue(struct dlm_ls *ls, int nodeid, struct dlm_message *ms) { struct rq_entry *e; - int length = ms->m_header.h_length - sizeof(struct dlm_message); + int length = le16_to_cpu(ms->m_header.h_length) - + sizeof(struct dlm_message); e = kmalloc(sizeof(struct rq_entry) + length, GFP_NOFS); if (!e) { @@ -42,8 +44,9 @@ void dlm_add_requestqueue(struct dlm_ls *ls, int nodeid, struct dlm_message *ms) e->recover_seq = ls->ls_recover_seq & 0xFFFFFFFF; e->nodeid = nodeid; - memcpy(&e->request, ms, ms->m_header.h_length); + memcpy(&e->request, ms, le16_to_cpu(ms->m_header.h_length)); + atomic_inc(&ls->ls_requestqueue_cnt); mutex_lock(&ls->ls_requestqueue_mutex); list_add_tail(&e->list, &ls->ls_requestqueue); mutex_unlock(&ls->ls_requestqueue_mutex); @@ -81,14 +84,18 @@ int dlm_process_requestqueue(struct dlm_ls *ls) log_limit(ls, "dlm_process_requestqueue msg %d from %d " "lkid %x remid %x result %d seq %u", - ms->m_type, ms->m_header.h_nodeid, - ms->m_lkid, ms->m_remid, ms->m_result, + le32_to_cpu(ms->m_type), + le32_to_cpu(ms->m_header.h_nodeid), + le32_to_cpu(ms->m_lkid), le32_to_cpu(ms->m_remid), + from_dlm_errno(le32_to_cpu(ms->m_result)), e->recover_seq); dlm_receive_message_saved(ls, &e->request, e->recover_seq); mutex_lock(&ls->ls_requestqueue_mutex); list_del(&e->list); + if (atomic_dec_and_test(&ls->ls_requestqueue_cnt)) + wake_up(&ls->ls_requestqueue_wait); kfree(e); if (dlm_locking_stopped(ls)) { @@ -115,22 +122,16 @@ int dlm_process_requestqueue(struct dlm_ls *ls) void dlm_wait_requestqueue(struct dlm_ls *ls) { - for (;;) { - mutex_lock(&ls->ls_requestqueue_mutex); - if (list_empty(&ls->ls_requestqueue)) - break; - mutex_unlock(&ls->ls_requestqueue_mutex); - schedule(); - } - mutex_unlock(&ls->ls_requestqueue_mutex); + wait_event(ls->ls_requestqueue_wait, + atomic_read(&ls->ls_requestqueue_cnt) == 0); } static int purge_request(struct dlm_ls *ls, struct dlm_message *ms, int nodeid) { - uint32_t type = ms->m_type; + __le32 type = ms->m_type; /* the ls is being cleaned up and freed by release_lockspace */ - if (!ls->ls_count) + if (!atomic_read(&ls->ls_count)) return 1; if (dlm_is_removed(ls, nodeid)) @@ -139,9 +140,9 @@ static int purge_request(struct dlm_ls *ls, struct dlm_message *ms, int nodeid) /* directory operations are always purged because the directory is always rebuilt during recovery and the lookups resent */ - if (type == DLM_MSG_REMOVE || - type == DLM_MSG_LOOKUP || - type == DLM_MSG_LOOKUP_REPLY) + if (type == cpu_to_le32(DLM_MSG_REMOVE) || + type == cpu_to_le32(DLM_MSG_LOOKUP) || + type == cpu_to_le32(DLM_MSG_LOOKUP_REPLY)) return 1; if (!dlm_no_directory(ls)) @@ -161,6 +162,8 @@ void dlm_purge_requestqueue(struct dlm_ls *ls) if (purge_request(ls, ms, e->nodeid)) { list_del(&e->list); + if (atomic_dec_and_test(&ls->ls_requestqueue_cnt)) + wake_up(&ls->ls_requestqueue_wait); kfree(e); } } diff --git a/fs/dlm/user.c b/fs/dlm/user.c index e5cefa90b1ce..c5d27bccc3dc 100644 --- a/fs/dlm/user.c +++ b/fs/dlm/user.c @@ -16,6 +16,8 @@ #include <linux/slab.h> #include <linux/sched/signal.h> +#include <trace/events/dlm.h> + #include "dlm_internal.h" #include "lockspace.h" #include "lock.h" @@ -108,11 +110,11 @@ static void compat_input(struct dlm_write_request *kb, kb->i.lock.parent = kb32->i.lock.parent; kb->i.lock.xid = kb32->i.lock.xid; kb->i.lock.timeout = kb32->i.lock.timeout; - kb->i.lock.castparam = (void *)(long)kb32->i.lock.castparam; - kb->i.lock.castaddr = (void *)(long)kb32->i.lock.castaddr; - kb->i.lock.bastparam = (void *)(long)kb32->i.lock.bastparam; - kb->i.lock.bastaddr = (void *)(long)kb32->i.lock.bastaddr; - kb->i.lock.lksb = (void *)(long)kb32->i.lock.lksb; + kb->i.lock.castparam = (__user void *)(long)kb32->i.lock.castparam; + kb->i.lock.castaddr = (__user void *)(long)kb32->i.lock.castaddr; + kb->i.lock.bastparam = (__user void *)(long)kb32->i.lock.bastparam; + kb->i.lock.bastaddr = (__user void *)(long)kb32->i.lock.bastaddr; + kb->i.lock.lksb = (__user void *)(long)kb32->i.lock.lksb; memcpy(kb->i.lock.lvb, kb32->i.lock.lvb, DLM_USER_LVB_LEN); memcpy(kb->i.lock.name, kb32->i.lock.name, namelen); } @@ -127,9 +129,9 @@ static void compat_output(struct dlm_lock_result *res, res32->version[1] = res->version[1]; res32->version[2] = res->version[2]; - res32->user_astaddr = (__u32)(long)res->user_astaddr; - res32->user_astparam = (__u32)(long)res->user_astparam; - res32->user_lksb = (__u32)(long)res->user_lksb; + res32->user_astaddr = (__u32)(__force long)res->user_astaddr; + res32->user_astparam = (__u32)(__force long)res->user_astparam; + res32->user_lksb = (__u32)(__force long)res->user_lksb; res32->bast_mode = res->bast_mode; res32->lvb_offset = res->lvb_offset; @@ -184,7 +186,7 @@ void dlm_user_add_ast(struct dlm_lkb *lkb, uint32_t flags, int mode, return; ls = lkb->lkb_resource->res_ls; - mutex_lock(&ls->ls_clear_proc_locks); + spin_lock(&ls->ls_clear_proc_locks); /* If ORPHAN/DEAD flag is set, it means the process is dead so an ast can't be delivered. For ORPHAN's, dlm_clear_proc_locks() freed @@ -230,7 +232,7 @@ void dlm_user_add_ast(struct dlm_lkb *lkb, uint32_t flags, int mode, spin_unlock(&proc->locks_spin); } out: - mutex_unlock(&ls->ls_clear_proc_locks); + spin_unlock(&ls->ls_clear_proc_locks); } static int device_user_lock(struct dlm_user_proc *proc, @@ -250,6 +252,14 @@ static int device_user_lock(struct dlm_user_proc *proc, goto out; } +#ifdef CONFIG_DLM_DEPRECATED_API + if (params->timeout) + pr_warn_once("========================================================\n" + "WARNING: the lkb timeout feature is being deprecated and\n" + " will be removed in v6.2!\n" + "========================================================\n"); +#endif + ua = kzalloc(sizeof(struct dlm_user_args), GFP_NOFS); if (!ua) goto out; @@ -262,23 +272,34 @@ static int device_user_lock(struct dlm_user_proc *proc, ua->xid = params->xid; if (params->flags & DLM_LKF_CONVERT) { +#ifdef CONFIG_DLM_DEPRECATED_API error = dlm_user_convert(ls, ua, params->mode, params->flags, params->lkid, params->lvb, (unsigned long) params->timeout); +#else + error = dlm_user_convert(ls, ua, + params->mode, params->flags, + params->lkid, params->lvb); +#endif } else if (params->flags & DLM_LKF_ORPHAN) { error = dlm_user_adopt_orphan(ls, ua, params->mode, params->flags, params->name, params->namelen, - (unsigned long) params->timeout, &lkid); if (!error) error = lkid; } else { +#ifdef CONFIG_DLM_DEPRECATED_API error = dlm_user_request(ls, ua, params->mode, params->flags, params->name, params->namelen, (unsigned long) params->timeout); +#else + error = dlm_user_request(ls, ua, + params->mode, params->flags, + params->name, params->namelen); +#endif if (!error) error = ua->lksb.sb_lkid; } @@ -402,9 +423,9 @@ static int device_create_lockspace(struct dlm_lspace_params *params) if (!capable(CAP_SYS_ADMIN)) return -EPERM; - error = dlm_new_lockspace(params->name, dlm_config.ci_cluster_name, params->flags, - DLM_USER_LVB_LEN, NULL, NULL, NULL, - &lockspace); + error = dlm_new_user_lockspace(params->name, dlm_config.ci_cluster_name, + params->flags, DLM_USER_LVB_LEN, NULL, + NULL, NULL, &lockspace); if (error) return error; @@ -863,7 +884,9 @@ static ssize_t device_read(struct file *file, char __user *buf, size_t count, goto try_another; } - if (cb.flags & DLM_CB_CAST) { + if (cb.flags & DLM_CB_BAST) { + trace_dlm_bast(lkb->lkb_resource->res_ls, lkb, cb.mode); + } else if (cb.flags & DLM_CB_CAST) { new_mode = cb.mode; if (!cb.sb_status && lkb->lkb_lksb->sb_lvbptr && @@ -872,6 +895,7 @@ static ssize_t device_read(struct file *file, char __user *buf, size_t count, lkb->lkb_lksb->sb_status = cb.sb_status; lkb->lkb_lksb->sb_flags = cb.sb_flags; + trace_dlm_ast(lkb->lkb_resource->res_ls, lkb); } rv = copy_result_to_user(lkb->lkb_ua, diff --git a/fs/dlm/util.c b/fs/dlm/util.c index 58acbcc2081a..f2bc401f312f 100644 --- a/fs/dlm/util.c +++ b/fs/dlm/util.c @@ -20,28 +20,10 @@ #define DLM_ERRNO_ETIMEDOUT 110 #define DLM_ERRNO_EINPROGRESS 115 -void header_out(struct dlm_header *hd) -{ - hd->h_version = cpu_to_le32(hd->h_version); - /* does it for others u32 in union as well */ - hd->u.h_lockspace = cpu_to_le32(hd->u.h_lockspace); - hd->h_nodeid = cpu_to_le32(hd->h_nodeid); - hd->h_length = cpu_to_le16(hd->h_length); -} - -void header_in(struct dlm_header *hd) -{ - hd->h_version = le32_to_cpu(hd->h_version); - /* does it for others u32 in union as well */ - hd->u.h_lockspace = le32_to_cpu(hd->u.h_lockspace); - hd->h_nodeid = le32_to_cpu(hd->h_nodeid); - hd->h_length = le16_to_cpu(hd->h_length); -} - /* higher errno values are inconsistent across architectures, so select one set of values for on the wire */ -static int to_dlm_errno(int err) +int to_dlm_errno(int err) { switch (err) { case -EDEADLK: @@ -62,7 +44,7 @@ static int to_dlm_errno(int err) return err; } -static int from_dlm_errno(int err) +int from_dlm_errno(int err) { switch (err) { case -DLM_ERRNO_EDEADLK: @@ -82,73 +64,3 @@ static int from_dlm_errno(int err) } return err; } - -void dlm_message_out(struct dlm_message *ms) -{ - header_out(&ms->m_header); - - ms->m_type = cpu_to_le32(ms->m_type); - ms->m_nodeid = cpu_to_le32(ms->m_nodeid); - ms->m_pid = cpu_to_le32(ms->m_pid); - ms->m_lkid = cpu_to_le32(ms->m_lkid); - ms->m_remid = cpu_to_le32(ms->m_remid); - ms->m_parent_lkid = cpu_to_le32(ms->m_parent_lkid); - ms->m_parent_remid = cpu_to_le32(ms->m_parent_remid); - ms->m_exflags = cpu_to_le32(ms->m_exflags); - ms->m_sbflags = cpu_to_le32(ms->m_sbflags); - ms->m_flags = cpu_to_le32(ms->m_flags); - ms->m_lvbseq = cpu_to_le32(ms->m_lvbseq); - ms->m_hash = cpu_to_le32(ms->m_hash); - ms->m_status = cpu_to_le32(ms->m_status); - ms->m_grmode = cpu_to_le32(ms->m_grmode); - ms->m_rqmode = cpu_to_le32(ms->m_rqmode); - ms->m_bastmode = cpu_to_le32(ms->m_bastmode); - ms->m_asts = cpu_to_le32(ms->m_asts); - ms->m_result = cpu_to_le32(to_dlm_errno(ms->m_result)); -} - -void dlm_message_in(struct dlm_message *ms) -{ - header_in(&ms->m_header); - - ms->m_type = le32_to_cpu(ms->m_type); - ms->m_nodeid = le32_to_cpu(ms->m_nodeid); - ms->m_pid = le32_to_cpu(ms->m_pid); - ms->m_lkid = le32_to_cpu(ms->m_lkid); - ms->m_remid = le32_to_cpu(ms->m_remid); - ms->m_parent_lkid = le32_to_cpu(ms->m_parent_lkid); - ms->m_parent_remid = le32_to_cpu(ms->m_parent_remid); - ms->m_exflags = le32_to_cpu(ms->m_exflags); - ms->m_sbflags = le32_to_cpu(ms->m_sbflags); - ms->m_flags = le32_to_cpu(ms->m_flags); - ms->m_lvbseq = le32_to_cpu(ms->m_lvbseq); - ms->m_hash = le32_to_cpu(ms->m_hash); - ms->m_status = le32_to_cpu(ms->m_status); - ms->m_grmode = le32_to_cpu(ms->m_grmode); - ms->m_rqmode = le32_to_cpu(ms->m_rqmode); - ms->m_bastmode = le32_to_cpu(ms->m_bastmode); - ms->m_asts = le32_to_cpu(ms->m_asts); - ms->m_result = from_dlm_errno(le32_to_cpu(ms->m_result)); -} - -void dlm_rcom_out(struct dlm_rcom *rc) -{ - header_out(&rc->rc_header); - - rc->rc_type = cpu_to_le32(rc->rc_type); - rc->rc_result = cpu_to_le32(rc->rc_result); - rc->rc_id = cpu_to_le64(rc->rc_id); - rc->rc_seq = cpu_to_le64(rc->rc_seq); - rc->rc_seq_reply = cpu_to_le64(rc->rc_seq_reply); -} - -void dlm_rcom_in(struct dlm_rcom *rc) -{ - header_in(&rc->rc_header); - - rc->rc_type = le32_to_cpu(rc->rc_type); - rc->rc_result = le32_to_cpu(rc->rc_result); - rc->rc_id = le64_to_cpu(rc->rc_id); - rc->rc_seq = le64_to_cpu(rc->rc_seq); - rc->rc_seq_reply = le64_to_cpu(rc->rc_seq_reply); -} diff --git a/fs/dlm/util.h b/fs/dlm/util.h index d46f23c7a6a0..b6a4b8adca8d 100644 --- a/fs/dlm/util.h +++ b/fs/dlm/util.h @@ -11,12 +11,8 @@ #ifndef __UTIL_DOT_H__ #define __UTIL_DOT_H__ -void dlm_message_out(struct dlm_message *ms); -void dlm_message_in(struct dlm_message *ms); -void dlm_rcom_out(struct dlm_rcom *rc); -void dlm_rcom_in(struct dlm_rcom *rc); -void header_out(struct dlm_header *hd); -void header_in(struct dlm_header *hd); +int to_dlm_errno(int err); +int from_dlm_errno(int err); #endif |