aboutsummaryrefslogtreecommitdiffstats
path: root/fs/dlm
diff options
context:
space:
mode:
Diffstat (limited to 'fs/dlm')
-rw-r--r--fs/dlm/Kconfig9
-rw-r--r--fs/dlm/Makefile2
-rw-r--r--fs/dlm/ast.c31
-rw-r--r--fs/dlm/ast.h1
-rw-r--r--fs/dlm/config.c21
-rw-r--r--fs/dlm/config.h3
-rw-r--r--fs/dlm/debug_fs.c96
-rw-r--r--fs/dlm/dir.c5
-rw-r--r--fs/dlm/dlm_internal.h112
-rw-r--r--fs/dlm/lock.c1057
-rw-r--r--fs/dlm/lock.h23
-rw-r--r--fs/dlm/lockspace.c116
-rw-r--r--fs/dlm/lockspace.h14
-rw-r--r--fs/dlm/lowcomms.c229
-rw-r--r--fs/dlm/lowcomms.h6
-rw-r--r--fs/dlm/main.c3
-rw-r--r--fs/dlm/member.c44
-rw-r--r--fs/dlm/memory.c68
-rw-r--r--fs/dlm/memory.h6
-rw-r--r--fs/dlm/midcomms.c144
-rw-r--r--fs/dlm/midcomms.h3
-rw-r--r--fs/dlm/netlink.c1
-rw-r--r--fs/dlm/plock.c213
-rw-r--r--fs/dlm/rcom.c122
-rw-r--r--fs/dlm/recover.c49
-rw-r--r--fs/dlm/recoverd.c38
-rw-r--r--fs/dlm/requestqueue.c37
-rw-r--r--fs/dlm/user.c54
-rw-r--r--fs/dlm/util.c92
-rw-r--r--fs/dlm/util.h8
30 files changed, 1555 insertions, 1052 deletions
diff --git a/fs/dlm/Kconfig b/fs/dlm/Kconfig
index ee92634196a8..1105ce3c80cb 100644
--- a/fs/dlm/Kconfig
+++ b/fs/dlm/Kconfig
@@ -9,6 +9,15 @@ menuconfig DLM
A general purpose distributed lock manager for kernel or userspace
applications.
+config DLM_DEPRECATED_API
+ bool "DLM deprecated API"
+ depends on DLM
+ help
+ Enables deprecated DLM timeout features that will be removed in
+ later Linux kernel releases.
+
+ If you are unsure, say N.
+
config DLM_DEBUG
bool "DLM debugging"
depends on DLM
diff --git a/fs/dlm/Makefile b/fs/dlm/Makefile
index 3545fdafc6fb..71dab733cf9a 100644
--- a/fs/dlm/Makefile
+++ b/fs/dlm/Makefile
@@ -9,7 +9,6 @@ dlm-y := ast.o \
member.o \
memory.o \
midcomms.o \
- netlink.o \
lowcomms.o \
plock.o \
rcom.o \
@@ -18,5 +17,6 @@ dlm-y := ast.o \
requestqueue.o \
user.o \
util.o
+dlm-$(CONFIG_DLM_DEPRECATED_API) += netlink.o
dlm-$(CONFIG_DLM_DEBUG) += debug_fs.o
diff --git a/fs/dlm/ast.c b/fs/dlm/ast.c
index 283c7b94edda..d60a8d8f109d 100644
--- a/fs/dlm/ast.c
+++ b/fs/dlm/ast.c
@@ -9,6 +9,8 @@
*******************************************************************************
******************************************************************************/
+#include <trace/events/dlm.h>
+
#include "dlm_internal.h"
#include "lock.h"
#include "user.h"
@@ -198,13 +200,13 @@ void dlm_add_cb(struct dlm_lkb *lkb, uint32_t flags, int mode, int status,
if (!prev_seq) {
kref_get(&lkb->lkb_ref);
+ mutex_lock(&ls->ls_cb_mutex);
if (test_bit(LSFL_CB_DELAY, &ls->ls_flags)) {
- mutex_lock(&ls->ls_cb_mutex);
list_add(&lkb->lkb_cb_list, &ls->ls_cb_delay);
- mutex_unlock(&ls->ls_cb_mutex);
} else {
queue_work(ls->ls_callback_wq, &lkb->lkb_cb_work);
}
+ mutex_unlock(&ls->ls_cb_mutex);
}
out:
mutex_unlock(&lkb->lkb_cb_mutex);
@@ -253,10 +255,12 @@ void dlm_callback_work(struct work_struct *work)
if (callbacks[i].flags & DLM_CB_SKIP) {
continue;
} else if (callbacks[i].flags & DLM_CB_BAST) {
+ trace_dlm_bast(ls, lkb, callbacks[i].mode);
bastfn(lkb->lkb_astparam, callbacks[i].mode);
} else if (callbacks[i].flags & DLM_CB_CAST) {
lkb->lkb_lksb->sb_status = callbacks[i].sb_status;
lkb->lkb_lksb->sb_flags = callbacks[i].sb_flags;
+ trace_dlm_ast(ls, lkb);
castfn(lkb->lkb_astparam);
}
}
@@ -284,10 +288,13 @@ void dlm_callback_stop(struct dlm_ls *ls)
void dlm_callback_suspend(struct dlm_ls *ls)
{
- set_bit(LSFL_CB_DELAY, &ls->ls_flags);
+ if (ls->ls_callback_wq) {
+ mutex_lock(&ls->ls_cb_mutex);
+ set_bit(LSFL_CB_DELAY, &ls->ls_flags);
+ mutex_unlock(&ls->ls_cb_mutex);
- if (ls->ls_callback_wq)
flush_workqueue(ls->ls_callback_wq);
+ }
}
#define MAX_CB_QUEUE 25
@@ -295,13 +302,14 @@ void dlm_callback_suspend(struct dlm_ls *ls)
void dlm_callback_resume(struct dlm_ls *ls)
{
struct dlm_lkb *lkb, *safe;
- int count = 0;
-
- clear_bit(LSFL_CB_DELAY, &ls->ls_flags);
+ int count = 0, sum = 0;
+ bool empty;
if (!ls->ls_callback_wq)
return;
+ clear_bit(LSFL_CB_DELAY, &ls->ls_flags);
+
more:
mutex_lock(&ls->ls_cb_mutex);
list_for_each_entry_safe(lkb, safe, &ls->ls_cb_delay, lkb_cb_list) {
@@ -311,14 +319,17 @@ more:
if (count == MAX_CB_QUEUE)
break;
}
+ empty = list_empty(&ls->ls_cb_delay);
mutex_unlock(&ls->ls_cb_mutex);
- if (count)
- log_rinfo(ls, "dlm_callback_resume %d", count);
- if (count == MAX_CB_QUEUE) {
+ sum += count;
+ if (!empty) {
count = 0;
cond_resched();
goto more;
}
+
+ if (sum)
+ log_rinfo(ls, "%s %d", __func__, sum);
}
diff --git a/fs/dlm/ast.h b/fs/dlm/ast.h
index 181ad7d20c4d..e5e05fcc5813 100644
--- a/fs/dlm/ast.h
+++ b/fs/dlm/ast.h
@@ -11,7 +11,6 @@
#ifndef __ASTD_DOT_H__
#define __ASTD_DOT_H__
-void dlm_del_ast(struct dlm_lkb *lkb);
int dlm_add_lkb_callback(struct dlm_lkb *lkb, uint32_t flags, int mode,
int status, uint32_t sbflags, uint64_t seq);
int dlm_rem_lkb_callback(struct dlm_ls *ls, struct dlm_lkb *lkb,
diff --git a/fs/dlm/config.c b/fs/dlm/config.c
index 42eee2783756..ac8b62106ce0 100644
--- a/fs/dlm/config.c
+++ b/fs/dlm/config.c
@@ -75,8 +75,9 @@ struct dlm_cluster {
unsigned int cl_log_info;
unsigned int cl_protocol;
unsigned int cl_mark;
+#ifdef CONFIG_DLM_DEPRECATED_API
unsigned int cl_timewarn_cs;
- unsigned int cl_waitwarn_us;
+#endif
unsigned int cl_new_rsb_count;
unsigned int cl_recover_callbacks;
char cl_cluster_name[DLM_LOCKSPACE_LEN];
@@ -102,8 +103,9 @@ enum {
CLUSTER_ATTR_LOG_INFO,
CLUSTER_ATTR_PROTOCOL,
CLUSTER_ATTR_MARK,
+#ifdef CONFIG_DLM_DEPRECATED_API
CLUSTER_ATTR_TIMEWARN_CS,
- CLUSTER_ATTR_WAITWARN_US,
+#endif
CLUSTER_ATTR_NEW_RSB_COUNT,
CLUSTER_ATTR_RECOVER_CALLBACKS,
CLUSTER_ATTR_CLUSTER_NAME,
@@ -224,8 +226,9 @@ CLUSTER_ATTR(log_debug, NULL);
CLUSTER_ATTR(log_info, NULL);
CLUSTER_ATTR(protocol, dlm_check_protocol_and_dlm_running);
CLUSTER_ATTR(mark, NULL);
+#ifdef CONFIG_DLM_DEPRECATED_API
CLUSTER_ATTR(timewarn_cs, dlm_check_zero);
-CLUSTER_ATTR(waitwarn_us, NULL);
+#endif
CLUSTER_ATTR(new_rsb_count, NULL);
CLUSTER_ATTR(recover_callbacks, NULL);
@@ -240,8 +243,9 @@ static struct configfs_attribute *cluster_attrs[] = {
[CLUSTER_ATTR_LOG_INFO] = &cluster_attr_log_info,
[CLUSTER_ATTR_PROTOCOL] = &cluster_attr_protocol,
[CLUSTER_ATTR_MARK] = &cluster_attr_mark,
+#ifdef CONFIG_DLM_DEPRECATED_API
[CLUSTER_ATTR_TIMEWARN_CS] = &cluster_attr_timewarn_cs,
- [CLUSTER_ATTR_WAITWARN_US] = &cluster_attr_waitwarn_us,
+#endif
[CLUSTER_ATTR_NEW_RSB_COUNT] = &cluster_attr_new_rsb_count,
[CLUSTER_ATTR_RECOVER_CALLBACKS] = &cluster_attr_recover_callbacks,
[CLUSTER_ATTR_CLUSTER_NAME] = &cluster_attr_cluster_name,
@@ -432,8 +436,9 @@ static struct config_group *make_cluster(struct config_group *g,
cl->cl_log_debug = dlm_config.ci_log_debug;
cl->cl_log_info = dlm_config.ci_log_info;
cl->cl_protocol = dlm_config.ci_protocol;
+#ifdef CONFIG_DLM_DEPRECATED_API
cl->cl_timewarn_cs = dlm_config.ci_timewarn_cs;
- cl->cl_waitwarn_us = dlm_config.ci_waitwarn_us;
+#endif
cl->cl_new_rsb_count = dlm_config.ci_new_rsb_count;
cl->cl_recover_callbacks = dlm_config.ci_recover_callbacks;
memcpy(cl->cl_cluster_name, dlm_config.ci_cluster_name,
@@ -954,8 +959,9 @@ int dlm_our_addr(struct sockaddr_storage *addr, int num)
#define DEFAULT_LOG_INFO 1
#define DEFAULT_PROTOCOL DLM_PROTO_TCP
#define DEFAULT_MARK 0
+#ifdef CONFIG_DLM_DEPRECATED_API
#define DEFAULT_TIMEWARN_CS 500 /* 5 sec = 500 centiseconds */
-#define DEFAULT_WAITWARN_US 0
+#endif
#define DEFAULT_NEW_RSB_COUNT 128
#define DEFAULT_RECOVER_CALLBACKS 0
#define DEFAULT_CLUSTER_NAME ""
@@ -971,8 +977,9 @@ struct dlm_config_info dlm_config = {
.ci_log_info = DEFAULT_LOG_INFO,
.ci_protocol = DEFAULT_PROTOCOL,
.ci_mark = DEFAULT_MARK,
+#ifdef CONFIG_DLM_DEPRECATED_API
.ci_timewarn_cs = DEFAULT_TIMEWARN_CS,
- .ci_waitwarn_us = DEFAULT_WAITWARN_US,
+#endif
.ci_new_rsb_count = DEFAULT_NEW_RSB_COUNT,
.ci_recover_callbacks = DEFAULT_RECOVER_CALLBACKS,
.ci_cluster_name = DEFAULT_CLUSTER_NAME
diff --git a/fs/dlm/config.h b/fs/dlm/config.h
index df92b0a07fc6..55c5f2c13ebd 100644
--- a/fs/dlm/config.h
+++ b/fs/dlm/config.h
@@ -37,8 +37,9 @@ struct dlm_config_info {
int ci_log_info;
int ci_protocol;
int ci_mark;
+#ifdef CONFIG_DLM_DEPRECATED_API
int ci_timewarn_cs;
- int ci_waitwarn_us;
+#endif
int ci_new_rsb_count;
int ci_recover_callbacks;
char ci_cluster_name[DLM_LOCKSPACE_LEN];
diff --git a/fs/dlm/debug_fs.c b/fs/dlm/debug_fs.c
index 47e9d57e4cae..8fb04ebbafb5 100644
--- a/fs/dlm/debug_fs.c
+++ b/fs/dlm/debug_fs.c
@@ -635,6 +635,35 @@ static int table_open2(struct inode *inode, struct file *file)
return 0;
}
+static ssize_t table_write2(struct file *file, const char __user *user_buf,
+ size_t count, loff_t *ppos)
+{
+ struct seq_file *seq = file->private_data;
+ int n, len, lkb_nodeid, lkb_status, error;
+ char name[DLM_RESNAME_MAXLEN + 1] = {};
+ struct dlm_ls *ls = seq->private;
+ unsigned int lkb_flags;
+ char buf[256] = {};
+ uint32_t lkb_id;
+
+ if (copy_from_user(buf, user_buf,
+ min_t(size_t, sizeof(buf) - 1, count)))
+ return -EFAULT;
+
+ n = sscanf(buf, "%x %" __stringify(DLM_RESNAME_MAXLEN) "s %x %d %d",
+ &lkb_id, name, &lkb_flags, &lkb_nodeid, &lkb_status);
+ if (n != 5)
+ return -EINVAL;
+
+ len = strnlen(name, DLM_RESNAME_MAXLEN);
+ error = dlm_debug_add_lkb(ls, lkb_id, name, len, lkb_flags,
+ lkb_nodeid, lkb_status);
+ if (error)
+ return error;
+
+ return count;
+}
+
static int table_open3(struct inode *inode, struct file *file)
{
struct seq_file *seq;
@@ -675,6 +704,7 @@ static const struct file_operations format2_fops = {
.owner = THIS_MODULE,
.open = table_open2,
.read = seq_read,
+ .write = table_write2,
.llseek = seq_lseek,
.release = seq_release
};
@@ -724,10 +754,35 @@ static ssize_t waiters_read(struct file *file, char __user *userbuf,
return rv;
}
+static ssize_t waiters_write(struct file *file, const char __user *user_buf,
+ size_t count, loff_t *ppos)
+{
+ struct dlm_ls *ls = file->private_data;
+ int mstype, to_nodeid;
+ char buf[128] = {};
+ uint32_t lkb_id;
+ int n, error;
+
+ if (copy_from_user(buf, user_buf,
+ min_t(size_t, sizeof(buf) - 1, count)))
+ return -EFAULT;
+
+ n = sscanf(buf, "%x %d %d", &lkb_id, &mstype, &to_nodeid);
+ if (n != 3)
+ return -EINVAL;
+
+ error = dlm_debug_add_lkb_to_waiters(ls, lkb_id, mstype, to_nodeid);
+ if (error)
+ return error;
+
+ return count;
+}
+
static const struct file_operations waiters_fops = {
.owner = THIS_MODULE,
.open = simple_open,
.read = waiters_read,
+ .write = waiters_write,
.llseek = default_llseek,
};
@@ -768,6 +823,42 @@ static int dlm_version_show(struct seq_file *file, void *offset)
}
DEFINE_SHOW_ATTRIBUTE(dlm_version);
+static ssize_t dlm_rawmsg_write(struct file *fp, const char __user *user_buf,
+ size_t count, loff_t *ppos)
+{
+ void *buf;
+ int ret;
+
+ if (count > PAGE_SIZE || count < sizeof(struct dlm_header))
+ return -EINVAL;
+
+ buf = kmalloc(PAGE_SIZE, GFP_NOFS);
+ if (!buf)
+ return -ENOMEM;
+
+ if (copy_from_user(buf, user_buf, count)) {
+ ret = -EFAULT;
+ goto out;
+ }
+
+ ret = dlm_midcomms_rawmsg_send(fp->private_data, buf, count);
+ if (ret)
+ goto out;
+
+ kfree(buf);
+ return count;
+
+out:
+ kfree(buf);
+ return ret;
+}
+
+static const struct file_operations dlm_rawmsg_fops = {
+ .open = simple_open,
+ .write = dlm_rawmsg_write,
+ .llseek = no_llseek,
+};
+
void *dlm_create_debug_comms_file(int nodeid, void *data)
{
struct dentry *d_node;
@@ -782,6 +873,7 @@ void *dlm_create_debug_comms_file(int nodeid, void *data)
debugfs_create_file("send_queue_count", 0444, d_node, data,
&dlm_send_queue_cnt_fops);
debugfs_create_file("version", 0444, d_node, data, &dlm_version_fops);
+ debugfs_create_file("rawmsg", 0200, d_node, data, &dlm_rawmsg_fops);
return d_node;
}
@@ -809,7 +901,7 @@ void dlm_create_debug_file(struct dlm_ls *ls)
snprintf(name, DLM_LOCKSPACE_LEN + 8, "%s_locks", ls->ls_name);
ls->ls_debug_locks_dentry = debugfs_create_file(name,
- S_IFREG | S_IRUGO,
+ 0644,
dlm_root,
ls,
&format2_fops);
@@ -840,7 +932,7 @@ void dlm_create_debug_file(struct dlm_ls *ls)
snprintf(name, DLM_LOCKSPACE_LEN + 8, "%s_waiters", ls->ls_name);
ls->ls_debug_waiters_dentry = debugfs_create_file(name,
- S_IFREG | S_IRUGO,
+ 0644,
dlm_root,
ls,
&waiters_fops);
diff --git a/fs/dlm/dir.c b/fs/dlm/dir.c
index 45ebbe602bbf..fb1981654bb2 100644
--- a/fs/dlm/dir.c
+++ b/fs/dlm/dir.c
@@ -84,8 +84,7 @@ int dlm_recover_directory(struct dlm_ls *ls)
for (;;) {
int left;
- error = dlm_recovery_stopped(ls);
- if (error) {
+ if (dlm_recovery_stopped(ls)) {
error = -EINTR;
goto out_free;
}
@@ -102,7 +101,7 @@ int dlm_recover_directory(struct dlm_ls *ls)
*/
b = ls->ls_recover_buf->rc_buf;
- left = ls->ls_recover_buf->rc_header.h_length;
+ left = le16_to_cpu(ls->ls_recover_buf->rc_header.h_length);
left -= sizeof(struct dlm_rcom);
for (;;) {
diff --git a/fs/dlm/dlm_internal.h b/fs/dlm/dlm_internal.h
index 5f57538b5d45..e34c3d2639a5 100644
--- a/fs/dlm/dlm_internal.h
+++ b/fs/dlm/dlm_internal.h
@@ -41,12 +41,6 @@
#include <linux/dlm.h>
#include "config.h"
-/* Size of the temp buffer midcomms allocates on the stack.
- We try to make this large enough so most messages fit.
- FIXME: should sctp make this unnecessary? */
-
-#define DLM_INBUF_LEN 148
-
struct dlm_ls;
struct dlm_lkb;
struct dlm_rsb;
@@ -151,7 +145,9 @@ struct dlm_args {
void (*bastfn) (void *astparam, int mode);
int mode;
struct dlm_lksb *lksb;
+#ifdef CONFIG_DLM_DEPRECATED_API
unsigned long timeout;
+#endif
};
@@ -209,10 +205,20 @@ struct dlm_args {
#define DLM_IFL_OVERLAP_UNLOCK 0x00080000
#define DLM_IFL_OVERLAP_CANCEL 0x00100000
#define DLM_IFL_ENDOFLIFE 0x00200000
+#ifdef CONFIG_DLM_DEPRECATED_API
#define DLM_IFL_WATCH_TIMEWARN 0x00400000
#define DLM_IFL_TIMEOUT_CANCEL 0x00800000
+#endif
#define DLM_IFL_DEADLOCK_CANCEL 0x01000000
#define DLM_IFL_STUB_MS 0x02000000 /* magic number for m_flags */
+/* least significant 2 bytes are message changed, they are full transmitted
+ * but at receive side only the 2 bytes LSB will be set.
+ *
+ * Even wireshark dlm dissector does only evaluate the lower bytes and note
+ * that they may not be used on transceiver side, we assume the higher bytes
+ * are for internal use or reserved so long they are not parsed on receiver
+ * side.
+ */
#define DLM_IFL_USER 0x00000001
#define DLM_IFL_ORPHAN 0x00000002
@@ -255,10 +261,12 @@ struct dlm_lkb {
struct list_head lkb_rsb_lookup; /* waiting for rsb lookup */
struct list_head lkb_wait_reply; /* waiting for remote reply */
struct list_head lkb_ownqueue; /* list of locks for a process */
- struct list_head lkb_time_list;
ktime_t lkb_timestamp;
- ktime_t lkb_wait_time;
+
+#ifdef CONFIG_DLM_DEPRECATED_API
+ struct list_head lkb_time_list;
unsigned long lkb_timeout_cs;
+#endif
struct mutex lkb_cb_mutex;
struct work_struct lkb_cb_work;
@@ -385,15 +393,15 @@ static inline int rsb_flag(struct dlm_rsb *r, enum rsb_flags flag)
#define DLM_FIN 5
struct dlm_header {
- uint32_t h_version;
+ __le32 h_version;
union {
/* for DLM_MSG and DLM_RCOM */
- uint32_t h_lockspace;
+ __le32 h_lockspace;
/* for DLM_ACK and DLM_OPTS */
- uint32_t h_seq;
+ __le32 h_seq;
} u;
- uint32_t h_nodeid; /* nodeid of sender */
- uint16_t h_length;
+ __le32 h_nodeid; /* nodeid of sender */
+ __le16 h_length;
uint8_t h_cmd; /* DLM_MSG, DLM_RCOM */
uint8_t h_pad;
};
@@ -415,24 +423,24 @@ struct dlm_header {
struct dlm_message {
struct dlm_header m_header;
- uint32_t m_type; /* DLM_MSG_ */
- uint32_t m_nodeid;
- uint32_t m_pid;
- uint32_t m_lkid; /* lkid on sender */
- uint32_t m_remid; /* lkid on receiver */
- uint32_t m_parent_lkid;
- uint32_t m_parent_remid;
- uint32_t m_exflags;
- uint32_t m_sbflags;
- uint32_t m_flags;
- uint32_t m_lvbseq;
- uint32_t m_hash;
- int m_status;
- int m_grmode;
- int m_rqmode;
- int m_bastmode;
- int m_asts;
- int m_result; /* 0 or -EXXX */
+ __le32 m_type; /* DLM_MSG_ */
+ __le32 m_nodeid;
+ __le32 m_pid;
+ __le32 m_lkid; /* lkid on sender */
+ __le32 m_remid; /* lkid on receiver */
+ __le32 m_parent_lkid;
+ __le32 m_parent_remid;
+ __le32 m_exflags;
+ __le32 m_sbflags;
+ __le32 m_flags;
+ __le32 m_lvbseq;
+ __le32 m_hash;
+ __le32 m_status;
+ __le32 m_grmode;
+ __le32 m_rqmode;
+ __le32 m_bastmode;
+ __le32 m_asts;
+ __le32 m_result; /* 0 or -EXXX */
char m_extra[]; /* name or lvb */
};
@@ -457,18 +465,18 @@ struct dlm_message {
struct dlm_rcom {
struct dlm_header rc_header;
- uint32_t rc_type; /* DLM_RCOM_ */
- int rc_result; /* multi-purpose */
- uint64_t rc_id; /* match reply with request */
- uint64_t rc_seq; /* sender's ls_recover_seq */
- uint64_t rc_seq_reply; /* remote ls_recover_seq */
+ __le32 rc_type; /* DLM_RCOM_ */
+ __le32 rc_result; /* multi-purpose */
+ __le64 rc_id; /* match reply with request */
+ __le64 rc_seq; /* sender's ls_recover_seq */
+ __le64 rc_seq_reply; /* remote ls_recover_seq */
char rc_buf[];
};
struct dlm_opt_header {
- uint16_t t_type;
- uint16_t t_length;
- uint32_t t_pad;
+ __le16 t_type;
+ __le16 t_length;
+ __le32 t_pad;
/* need to be 8 byte aligned */
char t_value[];
};
@@ -478,8 +486,8 @@ struct dlm_opts {
struct dlm_header o_header;
uint8_t o_nextcmd;
uint8_t o_pad;
- uint16_t o_optlen;
- uint32_t o_pad2;
+ __le16 o_optlen;
+ __le32 o_pad2;
char o_opts[];
};
@@ -554,8 +562,9 @@ struct dlm_ls {
uint32_t ls_generation;
uint32_t ls_exflags;
int ls_lvblen;
- int ls_count; /* refcount of processes in
+ atomic_t ls_count; /* refcount of processes in
the dlm using this ls */
+ wait_queue_head_t ls_count_wait;
int ls_create_count; /* create/release refcount */
unsigned long ls_flags; /* LSFL_ */
unsigned long ls_scan_time;
@@ -573,14 +582,17 @@ struct dlm_ls {
struct mutex ls_orphans_mutex;
struct list_head ls_orphans;
+#ifdef CONFIG_DLM_DEPRECATED_API
struct mutex ls_timeout_mutex;
struct list_head ls_timeout;
+#endif
spinlock_t ls_new_rsb_spin;
int ls_new_rsb_count;
struct list_head ls_new_rsb; /* new rsb structs */
spinlock_t ls_remove_spin;
+ wait_queue_head_t ls_remove_wait;
char ls_remove_name[DLM_RESNAME_MAXLEN+1];
char *ls_remove_names[DLM_REMOVE_NAMES_MAX];
int ls_remove_len;
@@ -610,8 +622,8 @@ struct dlm_ls {
wait_queue_head_t ls_uevent_wait; /* user part of join/leave */
int ls_uevent_result;
- struct completion ls_members_done;
- int ls_members_result;
+ struct completion ls_recovery_done;
+ int ls_recovery_result;
struct miscdevice ls_device;
@@ -632,6 +644,8 @@ struct dlm_ls {
struct rw_semaphore ls_in_recovery; /* block local requests */
struct rw_semaphore ls_recv_active; /* block dlm_recv */
struct list_head ls_requestqueue;/* queue remote requests */
+ atomic_t ls_requestqueue_cnt;
+ wait_queue_head_t ls_requestqueue_wait;
struct mutex ls_requestqueue_mutex;
struct dlm_rcom *ls_recover_buf;
int ls_recover_nodeid; /* for debugging */
@@ -647,7 +661,7 @@ struct dlm_ls {
spinlock_t ls_recover_idr_lock;
wait_queue_head_t ls_wait_general;
wait_queue_head_t ls_recover_lock_wait;
- struct mutex ls_clear_proc_locks;
+ spinlock_t ls_clear_proc_locks;
struct list_head ls_root_list; /* root resources */
struct rw_semaphore ls_root_sem; /* protect root_list */
@@ -690,7 +704,9 @@ struct dlm_ls {
#define LSFL_RCOM_READY 5
#define LSFL_RCOM_WAIT 6
#define LSFL_UEVENT_WAIT 7
+#ifdef CONFIG_DLM_DEPRECATED_API
#define LSFL_TIMEWARN 8
+#endif
#define LSFL_CB_DELAY 9
#define LSFL_NODIR 10
@@ -743,9 +759,15 @@ static inline int dlm_no_directory(struct dlm_ls *ls)
return test_bit(LSFL_NODIR, &ls->ls_flags);
}
+#ifdef CONFIG_DLM_DEPRECATED_API
int dlm_netlink_init(void);
void dlm_netlink_exit(void);
void dlm_timeout_warn(struct dlm_lkb *lkb);
+#else
+static inline int dlm_netlink_init(void) { return 0; }
+static inline void dlm_netlink_exit(void) { };
+static inline void dlm_timeout_warn(struct dlm_lkb *lkb) { };
+#endif
int dlm_plock_init(void);
void dlm_plock_exit(void);
diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c
index c502c065d007..94a72ede5764 100644
--- a/fs/dlm/lock.c
+++ b/fs/dlm/lock.c
@@ -53,6 +53,8 @@
R: do_xxxx()
L: receive_xxxx_reply() <- R: send_xxxx_reply()
*/
+#include <trace/events/dlm.h>
+
#include <linux/types.h>
#include <linux/rbtree.h>
#include <linux/slab.h>
@@ -294,12 +296,14 @@ static void queue_cast(struct dlm_rsb *r, struct dlm_lkb *lkb, int rv)
DLM_ASSERT(lkb->lkb_lksb, dlm_print_lkb(lkb););
+#ifdef CONFIG_DLM_DEPRECATED_API
/* if the operation was a cancel, then return -DLM_ECANCEL, if a
timeout caused the cancel then return -ETIMEDOUT */
if (rv == -DLM_ECANCEL && (lkb->lkb_flags & DLM_IFL_TIMEOUT_CANCEL)) {
lkb->lkb_flags &= ~DLM_IFL_TIMEOUT_CANCEL;
rv = -ETIMEDOUT;
}
+#endif
if (rv == -DLM_ECANCEL && (lkb->lkb_flags & DLM_IFL_DEADLOCK_CANCEL)) {
lkb->lkb_flags &= ~DLM_IFL_DEADLOCK_CANCEL;
@@ -348,10 +352,12 @@ static void put_rsb(struct dlm_rsb *r)
{
struct dlm_ls *ls = r->res_ls;
uint32_t bucket = r->res_bucket;
+ int rv;
- spin_lock(&ls->ls_rsbtbl[bucket].lock);
- kref_put(&r->res_ref, toss_rsb);
- spin_unlock(&ls->ls_rsbtbl[bucket].lock);
+ rv = kref_put_lock(&r->res_ref, toss_rsb,
+ &ls->ls_rsbtbl[bucket].lock);
+ if (rv)
+ spin_unlock(&ls->ls_rsbtbl[bucket].lock);
}
void dlm_put_rsb(struct dlm_rsb *r)
@@ -395,7 +401,7 @@ static int pre_rsb_struct(struct dlm_ls *ls)
unlock any spinlocks, go back and call pre_rsb_struct again.
Otherwise, take an rsb off the list and return it. */
-static int get_rsb_struct(struct dlm_ls *ls, char *name, int len,
+static int get_rsb_struct(struct dlm_ls *ls, const void *name, int len,
struct dlm_rsb **r_ret)
{
struct dlm_rsb *r;
@@ -406,7 +412,8 @@ static int get_rsb_struct(struct dlm_ls *ls, char *name, int len,
count = ls->ls_new_rsb_count;
spin_unlock(&ls->ls_new_rsb_spin);
log_debug(ls, "find_rsb retry %d %d %s",
- count, dlm_config.ci_new_rsb_count, name);
+ count, dlm_config.ci_new_rsb_count,
+ (const char *)name);
return -EAGAIN;
}
@@ -442,7 +449,7 @@ static int rsb_cmp(struct dlm_rsb *r, const char *name, int nlen)
return memcmp(r->res_name, maxname, DLM_RESNAME_MAXLEN);
}
-int dlm_search_rsb_tree(struct rb_root *tree, char *name, int len,
+int dlm_search_rsb_tree(struct rb_root *tree, const void *name, int len,
struct dlm_rsb **r_ret)
{
struct rb_node *node = tree->rb_node;
@@ -540,7 +547,7 @@ static int rsb_insert(struct dlm_rsb *rsb, struct rb_root *tree)
* while that rsb has a potentially stale master.)
*/
-static int find_rsb_dir(struct dlm_ls *ls, char *name, int len,
+static int find_rsb_dir(struct dlm_ls *ls, const void *name, int len,
uint32_t hash, uint32_t b,
int dir_nodeid, int from_nodeid,
unsigned int flags, struct dlm_rsb **r_ret)
@@ -600,7 +607,6 @@ static int find_rsb_dir(struct dlm_ls *ls, char *name, int len,
*/
kref_get(&r->res_ref);
- error = 0;
goto out_unlock;
@@ -719,7 +725,7 @@ static int find_rsb_dir(struct dlm_ls *ls, char *name, int len,
dlm_recover_locks) before we've made ourself master (in
dlm_recover_masters). */
-static int find_rsb_nodir(struct dlm_ls *ls, char *name, int len,
+static int find_rsb_nodir(struct dlm_ls *ls, const void *name, int len,
uint32_t hash, uint32_t b,
int dir_nodeid, int from_nodeid,
unsigned int flags, struct dlm_rsb **r_ret)
@@ -813,8 +819,9 @@ static int find_rsb_nodir(struct dlm_ls *ls, char *name, int len,
return error;
}
-static int find_rsb(struct dlm_ls *ls, char *name, int len, int from_nodeid,
- unsigned int flags, struct dlm_rsb **r_ret)
+static int find_rsb(struct dlm_ls *ls, const void *name, int len,
+ int from_nodeid, unsigned int flags,
+ struct dlm_rsb **r_ret)
{
uint32_t hash, b;
int dir_nodeid;
@@ -878,6 +885,88 @@ static int validate_master_nodeid(struct dlm_ls *ls, struct dlm_rsb *r,
}
}
+static void __dlm_master_lookup(struct dlm_ls *ls, struct dlm_rsb *r, int our_nodeid,
+ int from_nodeid, bool toss_list, unsigned int flags,
+ int *r_nodeid, int *result)
+{
+ int fix_master = (flags & DLM_LU_RECOVER_MASTER);
+ int from_master = (flags & DLM_LU_RECOVER_DIR);
+
+ if (r->res_dir_nodeid != our_nodeid) {
+ /* should not happen, but may as well fix it and carry on */
+ log_error(ls, "%s res_dir %d our %d %s", __func__,
+ r->res_dir_nodeid, our_nodeid, r->res_name);
+ r->res_dir_nodeid = our_nodeid;
+ }
+
+ if (fix_master && dlm_is_removed(ls, r->res_master_nodeid)) {
+ /* Recovery uses this function to set a new master when
+ * the previous master failed. Setting NEW_MASTER will
+ * force dlm_recover_masters to call recover_master on this
+ * rsb even though the res_nodeid is no longer removed.
+ */
+
+ r->res_master_nodeid = from_nodeid;
+ r->res_nodeid = from_nodeid;
+ rsb_set_flag(r, RSB_NEW_MASTER);
+
+ if (toss_list) {
+ /* I don't think we should ever find it on toss list. */
+ log_error(ls, "%s fix_master on toss", __func__);
+ dlm_dump_rsb(r);
+ }
+ }
+
+ if (from_master && (r->res_master_nodeid != from_nodeid)) {
+ /* this will happen if from_nodeid became master during
+ * a previous recovery cycle, and we aborted the previous
+ * cycle before recovering this master value
+ */
+
+ log_limit(ls, "%s from_master %d master_nodeid %d res_nodeid %d first %x %s",
+ __func__, from_nodeid, r->res_master_nodeid,
+ r->res_nodeid, r->res_first_lkid, r->res_name);
+
+ if (r->res_master_nodeid == our_nodeid) {
+ log_error(ls, "from_master %d our_master", from_nodeid);
+ dlm_dump_rsb(r);
+ goto ret_assign;
+ }
+
+ r->res_master_nodeid = from_nodeid;
+ r->res_nodeid = from_nodeid;
+ rsb_set_flag(r, RSB_NEW_MASTER);
+ }
+
+ if (!r->res_master_nodeid) {
+ /* this will happen if recovery happens while we're looking
+ * up the master for this rsb
+ */
+
+ log_debug(ls, "%s master 0 to %d first %x %s", __func__,
+ from_nodeid, r->res_first_lkid, r->res_name);
+ r->res_master_nodeid = from_nodeid;
+ r->res_nodeid = from_nodeid;
+ }
+
+ if (!from_master && !fix_master &&
+ (r->res_master_nodeid == from_nodeid)) {
+ /* this can happen when the master sends remove, the dir node
+ * finds the rsb on the keep list and ignores the remove,
+ * and the former master sends a lookup
+ */
+
+ log_limit(ls, "%s from master %d flags %x first %x %s",
+ __func__, from_nodeid, flags, r->res_first_lkid,
+ r->res_name);
+ }
+
+ ret_assign:
+ *r_nodeid = r->res_master_nodeid;
+ if (result)
+ *result = DLM_LU_MATCH;
+}
+
/*
* We're the dir node for this res and another node wants to know the
* master nodeid. During normal operation (non recovery) this is only
@@ -912,10 +1001,8 @@ int dlm_master_lookup(struct dlm_ls *ls, int from_nodeid, char *name, int len,
{
struct dlm_rsb *r = NULL;
uint32_t hash, b;
- int from_master = (flags & DLM_LU_RECOVER_DIR);
- int fix_master = (flags & DLM_LU_RECOVER_MASTER);
int our_nodeid = dlm_our_nodeid();
- int dir_nodeid, error, toss_list = 0;
+ int dir_nodeid, error;
if (len > DLM_RESNAME_MAXLEN)
return -EINVAL;
@@ -947,12 +1034,21 @@ int dlm_master_lookup(struct dlm_ls *ls, int from_nodeid, char *name, int len,
error = dlm_search_rsb_tree(&ls->ls_rsbtbl[b].keep, name, len, &r);
if (!error) {
/* because the rsb is active, we need to lock_rsb before
- checking/changing re_master_nodeid */
+ * checking/changing re_master_nodeid
+ */
hold_rsb(r);
spin_unlock(&ls->ls_rsbtbl[b].lock);
lock_rsb(r);
- goto found;
+
+ __dlm_master_lookup(ls, r, our_nodeid, from_nodeid, false,
+ flags, r_nodeid, result);
+
+ /* the rsb was active */
+ unlock_rsb(r);
+ put_rsb(r);
+
+ return 0;
}
error = dlm_search_rsb_tree(&ls->ls_rsbtbl[b].toss, name, len, &r);
@@ -960,90 +1056,16 @@ int dlm_master_lookup(struct dlm_ls *ls, int from_nodeid, char *name, int len,
goto not_found;
/* because the rsb is inactive (on toss list), it's not refcounted
- and lock_rsb is not used, but is protected by the rsbtbl lock */
-
- toss_list = 1;
- found:
- if (r->res_dir_nodeid != our_nodeid) {
- /* should not happen, but may as well fix it and carry on */
- log_error(ls, "dlm_master_lookup res_dir %d our %d %s",
- r->res_dir_nodeid, our_nodeid, r->res_name);
- r->res_dir_nodeid = our_nodeid;
- }
-
- if (fix_master && dlm_is_removed(ls, r->res_master_nodeid)) {
- /* Recovery uses this function to set a new master when
- the previous master failed. Setting NEW_MASTER will
- force dlm_recover_masters to call recover_master on this
- rsb even though the res_nodeid is no longer removed. */
-
- r->res_master_nodeid = from_nodeid;
- r->res_nodeid = from_nodeid;
- rsb_set_flag(r, RSB_NEW_MASTER);
-
- if (toss_list) {
- /* I don't think we should ever find it on toss list. */
- log_error(ls, "dlm_master_lookup fix_master on toss");
- dlm_dump_rsb(r);
- }
- }
-
- if (from_master && (r->res_master_nodeid != from_nodeid)) {
- /* this will happen if from_nodeid became master during
- a previous recovery cycle, and we aborted the previous
- cycle before recovering this master value */
-
- log_limit(ls, "dlm_master_lookup from_master %d "
- "master_nodeid %d res_nodeid %d first %x %s",
- from_nodeid, r->res_master_nodeid, r->res_nodeid,
- r->res_first_lkid, r->res_name);
-
- if (r->res_master_nodeid == our_nodeid) {
- log_error(ls, "from_master %d our_master", from_nodeid);
- dlm_dump_rsb(r);
- goto out_found;
- }
-
- r->res_master_nodeid = from_nodeid;
- r->res_nodeid = from_nodeid;
- rsb_set_flag(r, RSB_NEW_MASTER);
- }
-
- if (!r->res_master_nodeid) {
- /* this will happen if recovery happens while we're looking
- up the master for this rsb */
-
- log_debug(ls, "dlm_master_lookup master 0 to %d first %x %s",
- from_nodeid, r->res_first_lkid, r->res_name);
- r->res_master_nodeid = from_nodeid;
- r->res_nodeid = from_nodeid;
- }
-
- if (!from_master && !fix_master &&
- (r->res_master_nodeid == from_nodeid)) {
- /* this can happen when the master sends remove, the dir node
- finds the rsb on the keep list and ignores the remove,
- and the former master sends a lookup */
+ * and lock_rsb is not used, but is protected by the rsbtbl lock
+ */
- log_limit(ls, "dlm_master_lookup from master %d flags %x "
- "first %x %s", from_nodeid, flags,
- r->res_first_lkid, r->res_name);
- }
+ __dlm_master_lookup(ls, r, our_nodeid, from_nodeid, true, flags,
+ r_nodeid, result);
- out_found:
- *r_nodeid = r->res_master_nodeid;
- if (result)
- *result = DLM_LU_MATCH;
+ r->res_toss_time = jiffies;
+ /* the rsb was inactive (on toss list) */
+ spin_unlock(&ls->ls_rsbtbl[b].lock);
- if (toss_list) {
- r->res_toss_time = jiffies;
- /* the rsb was inactive (on toss list) */
- spin_unlock(&ls->ls_rsbtbl[b].lock);
- } else {
- /* the rsb was active */
- unlock_rsb(r);
- put_rsb(r);
- }
return 0;
not_found:
@@ -1074,7 +1096,6 @@ int dlm_master_lookup(struct dlm_ls *ls, int from_nodeid, char *name, int len,
if (result)
*result = DLM_LU_ADD;
*r_nodeid = from_nodeid;
- error = 0;
out_unlock:
spin_unlock(&ls->ls_rsbtbl[b].lock);
return error;
@@ -1178,7 +1199,8 @@ static void detach_lkb(struct dlm_lkb *lkb)
}
}
-static int create_lkb(struct dlm_ls *ls, struct dlm_lkb **lkb_ret)
+static int _create_lkb(struct dlm_ls *ls, struct dlm_lkb **lkb_ret,
+ int start, int end)
{
struct dlm_lkb *lkb;
int rv;
@@ -1192,14 +1214,16 @@ static int create_lkb(struct dlm_ls *ls, struct dlm_lkb **lkb_ret)
kref_init(&lkb->lkb_ref);
INIT_LIST_HEAD(&lkb->lkb_ownqueue);
INIT_LIST_HEAD(&lkb->lkb_rsb_lookup);
+#ifdef CONFIG_DLM_DEPRECATED_API
INIT_LIST_HEAD(&lkb->lkb_time_list);
+#endif
INIT_LIST_HEAD(&lkb->lkb_cb_list);
mutex_init(&lkb->lkb_cb_mutex);
INIT_WORK(&lkb->lkb_cb_work, dlm_callback_work);
idr_preload(GFP_NOFS);
spin_lock(&ls->ls_lkbidr_spin);
- rv = idr_alloc(&ls->ls_lkbidr, lkb, 1, 0, GFP_NOWAIT);
+ rv = idr_alloc(&ls->ls_lkbidr, lkb, start, end, GFP_NOWAIT);
if (rv >= 0)
lkb->lkb_id = rv;
spin_unlock(&ls->ls_lkbidr_spin);
@@ -1215,6 +1239,11 @@ static int create_lkb(struct dlm_ls *ls, struct dlm_lkb **lkb_ret)
return 0;
}
+static int create_lkb(struct dlm_ls *ls, struct dlm_lkb **lkb_ret)
+{
+ return _create_lkb(ls, lkb_ret, 1, 0);
+}
+
static int find_lkb(struct dlm_ls *ls, uint32_t lkid, struct dlm_lkb **lkb_ret)
{
struct dlm_lkb *lkb;
@@ -1245,9 +1274,11 @@ static void kill_lkb(struct kref *kref)
static int __put_lkb(struct dlm_ls *ls, struct dlm_lkb *lkb)
{
uint32_t lkid = lkb->lkb_id;
+ int rv;
- spin_lock(&ls->ls_lkbidr_spin);
- if (kref_put(&lkb->lkb_ref, kill_lkb)) {
+ rv = kref_put_lock(&lkb->lkb_ref, kill_lkb,
+ &ls->ls_lkbidr_spin);
+ if (rv) {
idr_remove(&ls->ls_lkbidr, lkid);
spin_unlock(&ls->ls_lkbidr_spin);
@@ -1257,11 +1288,9 @@ static int __put_lkb(struct dlm_ls *ls, struct dlm_lkb *lkb)
if (lkb->lkb_lvbptr && is_master_copy(lkb))
dlm_free_lvb(lkb->lkb_lvbptr);
dlm_free_lkb(lkb);
- return 1;
- } else {
- spin_unlock(&ls->ls_lkbidr_spin);
- return 0;
}
+
+ return rv;
}
int dlm_put_lkb(struct dlm_lkb *lkb)
@@ -1283,6 +1312,13 @@ static inline void hold_lkb(struct dlm_lkb *lkb)
kref_get(&lkb->lkb_ref);
}
+static void unhold_lkb_assert(struct kref *kref)
+{
+ struct dlm_lkb *lkb = container_of(kref, struct dlm_lkb, lkb_ref);
+
+ DLM_ASSERT(false, dlm_print_lkb(lkb););
+}
+
/* This is called when we need to remove a reference and are certain
it's not the last ref. e.g. del_lkb is always called between a
find_lkb/put_lkb and is always the inverse of a previous add_lkb.
@@ -1290,21 +1326,23 @@ static inline void hold_lkb(struct dlm_lkb *lkb)
static inline void unhold_lkb(struct dlm_lkb *lkb)
{
- int rv;
- rv = kref_put(&lkb->lkb_ref, kill_lkb);
- DLM_ASSERT(!rv, dlm_print_lkb(lkb););
+ kref_put(&lkb->lkb_ref, unhold_lkb_assert);
}
static void lkb_add_ordered(struct list_head *new, struct list_head *head,
int mode)
{
- struct dlm_lkb *lkb = NULL;
+ struct dlm_lkb *lkb = NULL, *iter;
- list_for_each_entry(lkb, head, lkb_statequeue)
- if (lkb->lkb_rqmode < mode)
+ list_for_each_entry(iter, head, lkb_statequeue)
+ if (iter->lkb_rqmode < mode) {
+ lkb = iter;
+ list_add_tail(new, &iter->lkb_statequeue);
break;
+ }
- __list_add(new, lkb->lkb_statequeue.prev, &lkb->lkb_statequeue);
+ if (!lkb)
+ list_add_tail(new, head);
}
/* add/remove lkb to rsb's grant/convert/wait queue */
@@ -1375,75 +1413,6 @@ static int msg_reply_type(int mstype)
return -1;
}
-static int nodeid_warned(int nodeid, int num_nodes, int *warned)
-{
- int i;
-
- for (i = 0; i < num_nodes; i++) {
- if (!warned[i]) {
- warned[i] = nodeid;
- return 0;
- }
- if (warned[i] == nodeid)
- return 1;
- }
- return 0;
-}
-
-void dlm_scan_waiters(struct dlm_ls *ls)
-{
- struct dlm_lkb *lkb;
- s64 us;
- s64 debug_maxus = 0;
- u32 debug_scanned = 0;
- u32 debug_expired = 0;
- int num_nodes = 0;
- int *warned = NULL;
-
- if (!dlm_config.ci_waitwarn_us)
- return;
-
- mutex_lock(&ls->ls_waiters_mutex);
-
- list_for_each_entry(lkb, &ls->ls_waiters, lkb_wait_reply) {
- if (!lkb->lkb_wait_time)
- continue;
-
- debug_scanned++;
-
- us = ktime_to_us(ktime_sub(ktime_get(), lkb->lkb_wait_time));
-
- if (us < dlm_config.ci_waitwarn_us)
- continue;
-
- lkb->lkb_wait_time = 0;
-
- debug_expired++;
- if (us > debug_maxus)
- debug_maxus = us;
-
- if (!num_nodes) {
- num_nodes = ls->ls_num_nodes;
- warned = kcalloc(num_nodes, sizeof(int), GFP_KERNEL);
- }
- if (!warned)
- continue;
- if (nodeid_warned(lkb->lkb_wait_nodeid, num_nodes, warned))
- continue;
-
- log_error(ls, "waitwarn %x %lld %d us check connection to "
- "node %d", lkb->lkb_id, (long long)us,
- dlm_config.ci_waitwarn_us, lkb->lkb_wait_nodeid);
- }
- mutex_unlock(&ls->ls_waiters_mutex);
- kfree(warned);
-
- if (debug_expired)
- log_debug(ls, "scan_waiters %u warn %u over %d us max %lld us",
- debug_scanned, debug_expired,
- dlm_config.ci_waitwarn_us, (long long)debug_maxus);
-}
-
/* add/remove lkb from global waiters list of lkb's waiting for
a reply from a remote node */
@@ -1487,7 +1456,6 @@ static int add_to_waiters(struct dlm_lkb *lkb, int mstype, int to_nodeid)
lkb->lkb_wait_count++;
lkb->lkb_wait_type = mstype;
- lkb->lkb_wait_time = ktime_get();
lkb->lkb_wait_nodeid = to_nodeid; /* for debugging */
hold_lkb(lkb);
list_add(&lkb->lkb_wait_reply, &ls->ls_waiters);
@@ -1551,6 +1519,7 @@ static int _remove_from_waiters(struct dlm_lkb *lkb, int mstype,
lkb->lkb_wait_type = 0;
lkb->lkb_flags &= ~DLM_IFL_OVERLAP_CANCEL;
lkb->lkb_wait_count--;
+ unhold_lkb(lkb);
goto out_del;
}
@@ -1563,8 +1532,8 @@ static int _remove_from_waiters(struct dlm_lkb *lkb, int mstype,
}
log_error(ls, "remwait error %x remote %d %x msg %d flags %x no wait",
- lkb->lkb_id, ms ? ms->m_header.h_nodeid : 0, lkb->lkb_remid,
- mstype, lkb->lkb_flags);
+ lkb->lkb_id, ms ? le32_to_cpu(ms->m_header.h_nodeid) : 0,
+ lkb->lkb_remid, mstype, lkb->lkb_flags);
return -1;
out_del:
@@ -1577,6 +1546,7 @@ static int _remove_from_waiters(struct dlm_lkb *lkb, int mstype,
log_error(ls, "remwait error %x reply %d wait_type %d overlap",
lkb->lkb_id, mstype, lkb->lkb_wait_type);
lkb->lkb_wait_count--;
+ unhold_lkb(lkb);
lkb->lkb_wait_type = 0;
}
@@ -1609,30 +1579,33 @@ static int remove_from_waiters_ms(struct dlm_lkb *lkb, struct dlm_message *ms)
struct dlm_ls *ls = lkb->lkb_resource->res_ls;
int error;
- if (ms->m_flags != DLM_IFL_STUB_MS)
+ if (ms->m_flags != cpu_to_le32(DLM_IFL_STUB_MS))
mutex_lock(&ls->ls_waiters_mutex);
- error = _remove_from_waiters(lkb, ms->m_type, ms);
- if (ms->m_flags != DLM_IFL_STUB_MS)
+ error = _remove_from_waiters(lkb, le32_to_cpu(ms->m_type), ms);
+ if (ms->m_flags != cpu_to_le32(DLM_IFL_STUB_MS))
mutex_unlock(&ls->ls_waiters_mutex);
return error;
}
/* If there's an rsb for the same resource being removed, ensure
- that the remove message is sent before the new lookup message.
- It should be rare to need a delay here, but if not, then it may
- be worthwhile to add a proper wait mechanism rather than a delay. */
+ * that the remove message is sent before the new lookup message.
+ */
+
+#define DLM_WAIT_PENDING_COND(ls, r) \
+ (ls->ls_remove_len && \
+ !rsb_cmp(r, ls->ls_remove_name, \
+ ls->ls_remove_len))
static void wait_pending_remove(struct dlm_rsb *r)
{
struct dlm_ls *ls = r->res_ls;
restart:
spin_lock(&ls->ls_remove_spin);
- if (ls->ls_remove_len &&
- !rsb_cmp(r, ls->ls_remove_name, ls->ls_remove_len)) {
+ if (DLM_WAIT_PENDING_COND(ls, r)) {
log_debug(ls, "delay lookup for remove dir %d %s",
- r->res_dir_nodeid, r->res_name);
+ r->res_dir_nodeid, r->res_name);
spin_unlock(&ls->ls_remove_spin);
- msleep(1);
+ wait_event(ls->ls_remove_wait, !DLM_WAIT_PENDING_COND(ls, r));
goto restart;
}
spin_unlock(&ls->ls_remove_spin);
@@ -1792,6 +1765,7 @@ static void shrink_bucket(struct dlm_ls *ls, int b)
ls->ls_remove_len = 0;
memset(ls->ls_remove_name, 0, DLM_RESNAME_MAXLEN);
spin_unlock(&ls->ls_remove_spin);
+ wake_up(&ls->ls_remove_wait);
dlm_free_rsb(r);
}
@@ -1809,6 +1783,7 @@ void dlm_scan_rsbs(struct dlm_ls *ls)
}
}
+#ifdef CONFIG_DLM_DEPRECATED_API
static void add_timeout(struct dlm_lkb *lkb)
{
struct dlm_ls *ls = lkb->lkb_resource->res_ls;
@@ -1854,7 +1829,7 @@ static void del_timeout(struct dlm_lkb *lkb)
void dlm_scan_timeout(struct dlm_ls *ls)
{
struct dlm_rsb *r;
- struct dlm_lkb *lkb;
+ struct dlm_lkb *lkb = NULL, *iter;
int do_cancel, do_warn;
s64 wait_us;
@@ -1865,27 +1840,28 @@ void dlm_scan_timeout(struct dlm_ls *ls)
do_cancel = 0;
do_warn = 0;
mutex_lock(&ls->ls_timeout_mutex);
- list_for_each_entry(lkb, &ls->ls_timeout, lkb_time_list) {
+ list_for_each_entry(iter, &ls->ls_timeout, lkb_time_list) {
wait_us = ktime_to_us(ktime_sub(ktime_get(),
- lkb->lkb_timestamp));
+ iter->lkb_timestamp));
- if ((lkb->lkb_exflags & DLM_LKF_TIMEOUT) &&
- wait_us >= (lkb->lkb_timeout_cs * 10000))
+ if ((iter->lkb_exflags & DLM_LKF_TIMEOUT) &&
+ wait_us >= (iter->lkb_timeout_cs * 10000))
do_cancel = 1;
- if ((lkb->lkb_flags & DLM_IFL_WATCH_TIMEWARN) &&
+ if ((iter->lkb_flags & DLM_IFL_WATCH_TIMEWARN) &&
wait_us >= dlm_config.ci_timewarn_cs * 10000)
do_warn = 1;
if (!do_cancel && !do_warn)
continue;
- hold_lkb(lkb);
+ hold_lkb(iter);
+ lkb = iter;
break;
}
mutex_unlock(&ls->ls_timeout_mutex);
- if (!do_cancel && !do_warn)
+ if (!lkb)
break;
r = lkb->lkb_resource;
@@ -1928,17 +1904,11 @@ void dlm_adjust_timeouts(struct dlm_ls *ls)
list_for_each_entry(lkb, &ls->ls_timeout, lkb_time_list)
lkb->lkb_timestamp = ktime_add_us(lkb->lkb_timestamp, adj_us);
mutex_unlock(&ls->ls_timeout_mutex);
-
- if (!dlm_config.ci_waitwarn_us)
- return;
-
- mutex_lock(&ls->ls_waiters_mutex);
- list_for_each_entry(lkb, &ls->ls_waiters, lkb_wait_reply) {
- if (ktime_to_us(lkb->lkb_wait_time))
- lkb->lkb_wait_time = ktime_get();
- }
- mutex_unlock(&ls->ls_waiters_mutex);
}
+#else
+static void add_timeout(struct dlm_lkb *lkb) { }
+static void del_timeout(struct dlm_lkb *lkb) { }
+#endif
/* lkb is master or local copy */
@@ -2039,7 +2009,7 @@ static void set_lvb_lock_pc(struct dlm_rsb *r, struct dlm_lkb *lkb,
if (len > r->res_ls->ls_lvblen)
len = r->res_ls->ls_lvblen;
memcpy(lkb->lkb_lvbptr, ms->m_extra, len);
- lkb->lkb_lvbseq = ms->m_lvbseq;
+ lkb->lkb_lvbseq = le32_to_cpu(ms->m_lvbseq);
}
}
@@ -2170,10 +2140,10 @@ static void munge_demoted(struct dlm_lkb *lkb)
static void munge_altmode(struct dlm_lkb *lkb, struct dlm_message *ms)
{
- if (ms->m_type != DLM_MSG_REQUEST_REPLY &&
- ms->m_type != DLM_MSG_GRANT) {
+ if (ms->m_type != cpu_to_le32(DLM_MSG_REQUEST_REPLY) &&
+ ms->m_type != cpu_to_le32(DLM_MSG_GRANT)) {
log_print("munge_altmode %x invalid reply type %d",
- lkb->lkb_id, ms->m_type);
+ lkb->lkb_id, le32_to_cpu(ms->m_type));
return;
}
@@ -2803,12 +2773,20 @@ static void confirm_master(struct dlm_rsb *r, int error)
}
}
+#ifdef CONFIG_DLM_DEPRECATED_API
static int set_lock_args(int mode, struct dlm_lksb *lksb, uint32_t flags,
int namelen, unsigned long timeout_cs,
void (*ast) (void *astparam),
void *astparam,
void (*bast) (void *astparam, int mode),
struct dlm_args *args)
+#else
+static int set_lock_args(int mode, struct dlm_lksb *lksb, uint32_t flags,
+ int namelen, void (*ast)(void *astparam),
+ void *astparam,
+ void (*bast)(void *astparam, int mode),
+ struct dlm_args *args)
+#endif
{
int rv = -EINVAL;
@@ -2861,7 +2839,9 @@ static int set_lock_args(int mode, struct dlm_lksb *lksb, uint32_t flags,
args->astfn = ast;
args->astparam = astparam;
args->bastfn = bast;
+#ifdef CONFIG_DLM_DEPRECATED_API
args->timeout = timeout_cs;
+#endif
args->mode = mode;
args->lksb = lksb;
rv = 0;
@@ -2886,24 +2866,25 @@ static int set_unlock_args(uint32_t flags, void *astarg, struct dlm_args *args)
static int validate_lock_args(struct dlm_ls *ls, struct dlm_lkb *lkb,
struct dlm_args *args)
{
- int rv = -EINVAL;
+ int rv = -EBUSY;
if (args->flags & DLM_LKF_CONVERT) {
- if (lkb->lkb_flags & DLM_IFL_MSTCPY)
+ if (lkb->lkb_status != DLM_LKSTS_GRANTED)
goto out;
- if (args->flags & DLM_LKF_QUECVT &&
- !__quecvt_compat_matrix[lkb->lkb_grmode+1][args->mode+1])
+ /* lock not allowed if there's any op in progress */
+ if (lkb->lkb_wait_type || lkb->lkb_wait_count)
goto out;
- rv = -EBUSY;
- if (lkb->lkb_status != DLM_LKSTS_GRANTED)
+ if (is_overlap(lkb))
goto out;
- if (lkb->lkb_wait_type)
+ rv = -EINVAL;
+ if (lkb->lkb_flags & DLM_IFL_MSTCPY)
goto out;
- if (is_overlap(lkb))
+ if (args->flags & DLM_LKF_QUECVT &&
+ !__quecvt_compat_matrix[lkb->lkb_grmode+1][args->mode+1])
goto out;
}
@@ -2916,14 +2897,30 @@ static int validate_lock_args(struct dlm_ls *ls, struct dlm_lkb *lkb,
lkb->lkb_lksb = args->lksb;
lkb->lkb_lvbptr = args->lksb->sb_lvbptr;
lkb->lkb_ownpid = (int) current->pid;
+#ifdef CONFIG_DLM_DEPRECATED_API
lkb->lkb_timeout_cs = args->timeout;
+#endif
rv = 0;
out:
- if (rv)
- log_debug(ls, "validate_lock_args %d %x %x %x %d %d %s",
+ switch (rv) {
+ case 0:
+ break;
+ case -EINVAL:
+ /* annoy the user because dlm usage is wrong */
+ WARN_ON(1);
+ log_error(ls, "%s %d %x %x %x %d %d %s", __func__,
rv, lkb->lkb_id, lkb->lkb_flags, args->flags,
lkb->lkb_status, lkb->lkb_wait_type,
lkb->lkb_resource->res_name);
+ break;
+ default:
+ log_debug(ls, "%s %d %x %x %x %d %d %s", __func__,
+ rv, lkb->lkb_id, lkb->lkb_flags, args->flags,
+ lkb->lkb_status, lkb->lkb_wait_type,
+ lkb->lkb_resource->res_name);
+ break;
+ }
+
return rv;
}
@@ -2937,23 +2934,12 @@ static int validate_lock_args(struct dlm_ls *ls, struct dlm_lkb *lkb,
static int validate_unlock_args(struct dlm_lkb *lkb, struct dlm_args *args)
{
struct dlm_ls *ls = lkb->lkb_resource->res_ls;
- int rv = -EINVAL;
-
- if (lkb->lkb_flags & DLM_IFL_MSTCPY) {
- log_error(ls, "unlock on MSTCPY %x", lkb->lkb_id);
- dlm_print_lkb(lkb);
- goto out;
- }
+ int rv = -EBUSY;
- /* an lkb may still exist even though the lock is EOL'ed due to a
- cancel, unlock or failed noqueue request; an app can't use these
- locks; return same error as if the lkid had not been found at all */
-
- if (lkb->lkb_flags & DLM_IFL_ENDOFLIFE) {
- log_debug(ls, "unlock on ENDOFLIFE %x", lkb->lkb_id);
- rv = -ENOENT;
+ /* normal unlock not allowed if there's any op in progress */
+ if (!(args->flags & (DLM_LKF_CANCEL | DLM_LKF_FORCEUNLOCK)) &&
+ (lkb->lkb_wait_type || lkb->lkb_wait_count))
goto out;
- }
/* an lkb may be waiting for an rsb lookup to complete where the
lookup was initiated by another lock */
@@ -2968,7 +2954,24 @@ static int validate_unlock_args(struct dlm_lkb *lkb, struct dlm_args *args)
unhold_lkb(lkb); /* undoes create_lkb() */
}
/* caller changes -EBUSY to 0 for CANCEL and FORCEUNLOCK */
- rv = -EBUSY;
+ goto out;
+ }
+
+ rv = -EINVAL;
+ if (lkb->lkb_flags & DLM_IFL_MSTCPY) {
+ log_error(ls, "unlock on MSTCPY %x", lkb->lkb_id);
+ dlm_print_lkb(lkb);
+ goto out;
+ }
+
+ /* an lkb may still exist even though the lock is EOL'ed due to a
+ * cancel, unlock or failed noqueue request; an app can't use these
+ * locks; return same error as if the lkid had not been found at all
+ */
+
+ if (lkb->lkb_flags & DLM_IFL_ENDOFLIFE) {
+ log_debug(ls, "unlock on ENDOFLIFE %x", lkb->lkb_id);
+ rv = -ENOENT;
goto out;
}
@@ -3041,14 +3044,8 @@ static int validate_unlock_args(struct dlm_lkb *lkb, struct dlm_args *args)
goto out;
}
/* add_to_waiters() will set OVERLAP_UNLOCK */
- goto out_ok;
}
- /* normal unlock not allowed if there's any op in progress */
- rv = -EBUSY;
- if (lkb->lkb_wait_type || lkb->lkb_wait_count)
- goto out;
-
out_ok:
/* an overlapping op shouldn't blow away exflags from other op */
lkb->lkb_exflags |= args->flags;
@@ -3056,11 +3053,25 @@ static int validate_unlock_args(struct dlm_lkb *lkb, struct dlm_args *args)
lkb->lkb_astparam = args->astparam;
rv = 0;
out:
- if (rv)
- log_debug(ls, "validate_unlock_args %d %x %x %x %x %d %s", rv,
+ switch (rv) {
+ case 0:
+ break;
+ case -EINVAL:
+ /* annoy the user because dlm usage is wrong */
+ WARN_ON(1);
+ log_error(ls, "%s %d %x %x %x %x %d %s", __func__, rv,
lkb->lkb_id, lkb->lkb_flags, lkb->lkb_exflags,
args->flags, lkb->lkb_wait_type,
lkb->lkb_resource->res_name);
+ break;
+ default:
+ log_debug(ls, "%s %d %x %x %x %x %d %s", __func__, rv,
+ lkb->lkb_id, lkb->lkb_flags, lkb->lkb_exflags,
+ args->flags, lkb->lkb_wait_type,
+ lkb->lkb_resource->res_name);
+ break;
+ }
+
return rv;
}
@@ -3311,8 +3322,9 @@ static int _cancel_lock(struct dlm_rsb *r, struct dlm_lkb *lkb)
* request_lock(), convert_lock(), unlock_lock(), cancel_lock()
*/
-static int request_lock(struct dlm_ls *ls, struct dlm_lkb *lkb, char *name,
- int len, struct dlm_args *args)
+static int request_lock(struct dlm_ls *ls, struct dlm_lkb *lkb,
+ const void *name, int len,
+ struct dlm_args *args)
{
struct dlm_rsb *r;
int error;
@@ -3411,7 +3423,7 @@ int dlm_lock(dlm_lockspace_t *lockspace,
int mode,
struct dlm_lksb *lksb,
uint32_t flags,
- void *name,
+ const void *name,
unsigned int namelen,
uint32_t parent_lkid,
void (*ast) (void *astarg),
@@ -3437,8 +3449,15 @@ int dlm_lock(dlm_lockspace_t *lockspace,
if (error)
goto out;
+ trace_dlm_lock_start(ls, lkb, name, namelen, mode, flags);
+
+#ifdef CONFIG_DLM_DEPRECATED_API
error = set_lock_args(mode, lksb, flags, namelen, 0, ast,
astarg, bast, &args);
+#else
+ error = set_lock_args(mode, lksb, flags, namelen, ast, astarg, bast,
+ &args);
+#endif
if (error)
goto out_put;
@@ -3450,6 +3469,8 @@ int dlm_lock(dlm_lockspace_t *lockspace,
if (error == -EINPROGRESS)
error = 0;
out_put:
+ trace_dlm_lock_end(ls, lkb, name, namelen, mode, flags, error, true);
+
if (convert || error)
__put_lkb(ls, lkb);
if (error == -EAGAIN || error == -EDEADLK)
@@ -3481,6 +3502,8 @@ int dlm_unlock(dlm_lockspace_t *lockspace,
if (error)
goto out;
+ trace_dlm_unlock_start(ls, lkb, flags);
+
error = set_unlock_args(flags, astarg, &args);
if (error)
goto out_put;
@@ -3495,6 +3518,8 @@ int dlm_unlock(dlm_lockspace_t *lockspace,
if (error == -EBUSY && (flags & (DLM_LKF_CANCEL | DLM_LKF_FORCEUNLOCK)))
error = 0;
out_put:
+ trace_dlm_unlock_end(ls, lkb, flags, error);
+
dlm_put_lkb(lkb);
out:
dlm_unlock_recovery(ls);
@@ -3543,13 +3568,13 @@ static int _create_message(struct dlm_ls *ls, int mb_len,
ms = (struct dlm_message *) mb;
- ms->m_header.h_version = (DLM_HEADER_MAJOR | DLM_HEADER_MINOR);
- ms->m_header.u.h_lockspace = ls->ls_global_id;
- ms->m_header.h_nodeid = dlm_our_nodeid();
- ms->m_header.h_length = mb_len;
+ ms->m_header.h_version = cpu_to_le32(DLM_HEADER_MAJOR | DLM_HEADER_MINOR);
+ ms->m_header.u.h_lockspace = cpu_to_le32(ls->ls_global_id);
+ ms->m_header.h_nodeid = cpu_to_le32(dlm_our_nodeid());
+ ms->m_header.h_length = cpu_to_le16(mb_len);
ms->m_header.h_cmd = DLM_MSG;
- ms->m_type = mstype;
+ ms->m_type = cpu_to_le32(mstype);
*mh_ret = mh;
*ms_ret = ms;
@@ -3588,7 +3613,6 @@ static int create_message(struct dlm_rsb *r, struct dlm_lkb *lkb,
static int send_message(struct dlm_mhandle *mh, struct dlm_message *ms)
{
- dlm_message_out(ms);
dlm_midcomms_commit_mhandle(mh);
return 0;
}
@@ -3596,41 +3620,41 @@ static int send_message(struct dlm_mhandle *mh, struct dlm_message *ms)
static void send_args(struct dlm_rsb *r, struct dlm_lkb *lkb,
struct dlm_message *ms)
{
- ms->m_nodeid = lkb->lkb_nodeid;
- ms->m_pid = lkb->lkb_ownpid;
- ms->m_lkid = lkb->lkb_id;
- ms->m_remid = lkb->lkb_remid;
- ms->m_exflags = lkb->lkb_exflags;
- ms->m_sbflags = lkb->lkb_sbflags;
- ms->m_flags = lkb->lkb_flags;
- ms->m_lvbseq = lkb->lkb_lvbseq;
- ms->m_status = lkb->lkb_status;
- ms->m_grmode = lkb->lkb_grmode;
- ms->m_rqmode = lkb->lkb_rqmode;
- ms->m_hash = r->res_hash;
+ ms->m_nodeid = cpu_to_le32(lkb->lkb_nodeid);
+ ms->m_pid = cpu_to_le32(lkb->lkb_ownpid);
+ ms->m_lkid = cpu_to_le32(lkb->lkb_id);
+ ms->m_remid = cpu_to_le32(lkb->lkb_remid);
+ ms->m_exflags = cpu_to_le32(lkb->lkb_exflags);
+ ms->m_sbflags = cpu_to_le32(lkb->lkb_sbflags);
+ ms->m_flags = cpu_to_le32(lkb->lkb_flags);
+ ms->m_lvbseq = cpu_to_le32(lkb->lkb_lvbseq);
+ ms->m_status = cpu_to_le32(lkb->lkb_status);
+ ms->m_grmode = cpu_to_le32(lkb->lkb_grmode);
+ ms->m_rqmode = cpu_to_le32(lkb->lkb_rqmode);
+ ms->m_hash = cpu_to_le32(r->res_hash);
/* m_result and m_bastmode are set from function args,
not from lkb fields */
if (lkb->lkb_bastfn)
- ms->m_asts |= DLM_CB_BAST;
+ ms->m_asts |= cpu_to_le32(DLM_CB_BAST);
if (lkb->lkb_astfn)
- ms->m_asts |= DLM_CB_CAST;
+ ms->m_asts |= cpu_to_le32(DLM_CB_CAST);
/* compare with switch in create_message; send_remove() doesn't
use send_args() */
switch (ms->m_type) {
- case DLM_MSG_REQUEST:
- case DLM_MSG_LOOKUP:
+ case cpu_to_le32(DLM_MSG_REQUEST):
+ case cpu_to_le32(DLM_MSG_LOOKUP):
memcpy(ms->m_extra, r->res_name, r->res_length);
break;
- case DLM_MSG_CONVERT:
- case DLM_MSG_UNLOCK:
- case DLM_MSG_REQUEST_REPLY:
- case DLM_MSG_CONVERT_REPLY:
- case DLM_MSG_GRANT:
- if (!lkb->lkb_lvbptr)
+ case cpu_to_le32(DLM_MSG_CONVERT):
+ case cpu_to_le32(DLM_MSG_UNLOCK):
+ case cpu_to_le32(DLM_MSG_REQUEST_REPLY):
+ case cpu_to_le32(DLM_MSG_CONVERT_REPLY):
+ case cpu_to_le32(DLM_MSG_GRANT):
+ if (!lkb->lkb_lvbptr || !(lkb->lkb_exflags & DLM_LKF_VALBLK))
break;
memcpy(ms->m_extra, lkb->lkb_lvbptr, r->res_ls->ls_lvblen);
break;
@@ -3679,8 +3703,8 @@ static int send_convert(struct dlm_rsb *r, struct dlm_lkb *lkb)
/* down conversions go without a reply from the master */
if (!error && down_conversion(lkb)) {
remove_from_waiters(lkb, DLM_MSG_CONVERT_REPLY);
- r->res_ls->ls_stub_ms.m_flags = DLM_IFL_STUB_MS;
- r->res_ls->ls_stub_ms.m_type = DLM_MSG_CONVERT_REPLY;
+ r->res_ls->ls_stub_ms.m_flags = cpu_to_le32(DLM_IFL_STUB_MS);
+ r->res_ls->ls_stub_ms.m_type = cpu_to_le32(DLM_MSG_CONVERT_REPLY);
r->res_ls->ls_stub_ms.m_result = 0;
__receive_convert_reply(r, lkb, &r->res_ls->ls_stub_ms);
}
@@ -3737,7 +3761,7 @@ static int send_bast(struct dlm_rsb *r, struct dlm_lkb *lkb, int mode)
send_args(r, lkb, ms);
- ms->m_bastmode = mode;
+ ms->m_bastmode = cpu_to_le32(mode);
error = send_message(mh, ms);
out:
@@ -3785,7 +3809,7 @@ static int send_remove(struct dlm_rsb *r)
goto out;
memcpy(ms->m_extra, r->res_name, r->res_length);
- ms->m_hash = r->res_hash;
+ ms->m_hash = cpu_to_le32(r->res_hash);
error = send_message(mh, ms);
out:
@@ -3807,7 +3831,7 @@ static int send_common_reply(struct dlm_rsb *r, struct dlm_lkb *lkb,
send_args(r, lkb, ms);
- ms->m_result = rv;
+ ms->m_result = cpu_to_le32(to_dlm_errno(rv));
error = send_message(mh, ms);
out:
@@ -3840,15 +3864,15 @@ static int send_lookup_reply(struct dlm_ls *ls, struct dlm_message *ms_in,
struct dlm_rsb *r = &ls->ls_stub_rsb;
struct dlm_message *ms;
struct dlm_mhandle *mh;
- int error, nodeid = ms_in->m_header.h_nodeid;
+ int error, nodeid = le32_to_cpu(ms_in->m_header.h_nodeid);
error = create_message(r, NULL, nodeid, DLM_MSG_LOOKUP_REPLY, &ms, &mh);
if (error)
goto out;
ms->m_lkid = ms_in->m_lkid;
- ms->m_result = rv;
- ms->m_nodeid = ret_nodeid;
+ ms->m_result = cpu_to_le32(to_dlm_errno(rv));
+ ms->m_nodeid = cpu_to_le32(ret_nodeid);
error = send_message(mh, ms);
out:
@@ -3861,25 +3885,26 @@ static int send_lookup_reply(struct dlm_ls *ls, struct dlm_message *ms_in,
static void receive_flags(struct dlm_lkb *lkb, struct dlm_message *ms)
{
- lkb->lkb_exflags = ms->m_exflags;
- lkb->lkb_sbflags = ms->m_sbflags;
+ lkb->lkb_exflags = le32_to_cpu(ms->m_exflags);
+ lkb->lkb_sbflags = le32_to_cpu(ms->m_sbflags);
lkb->lkb_flags = (lkb->lkb_flags & 0xFFFF0000) |
- (ms->m_flags & 0x0000FFFF);
+ (le32_to_cpu(ms->m_flags) & 0x0000FFFF);
}
static void receive_flags_reply(struct dlm_lkb *lkb, struct dlm_message *ms)
{
- if (ms->m_flags == DLM_IFL_STUB_MS)
+ if (ms->m_flags == cpu_to_le32(DLM_IFL_STUB_MS))
return;
- lkb->lkb_sbflags = ms->m_sbflags;
+ lkb->lkb_sbflags = le32_to_cpu(ms->m_sbflags);
lkb->lkb_flags = (lkb->lkb_flags & 0xFFFF0000) |
- (ms->m_flags & 0x0000FFFF);
+ (le32_to_cpu(ms->m_flags) & 0x0000FFFF);
}
static int receive_extralen(struct dlm_message *ms)
{
- return (ms->m_header.h_length - sizeof(struct dlm_message));
+ return (le16_to_cpu(ms->m_header.h_length) -
+ sizeof(struct dlm_message));
}
static int receive_lvb(struct dlm_ls *ls, struct dlm_lkb *lkb,
@@ -3913,14 +3938,14 @@ static void fake_astfn(void *astparam)
static int receive_request_args(struct dlm_ls *ls, struct dlm_lkb *lkb,
struct dlm_message *ms)
{
- lkb->lkb_nodeid = ms->m_header.h_nodeid;
- lkb->lkb_ownpid = ms->m_pid;
- lkb->lkb_remid = ms->m_lkid;
+ lkb->lkb_nodeid = le32_to_cpu(ms->m_header.h_nodeid);
+ lkb->lkb_ownpid = le32_to_cpu(ms->m_pid);
+ lkb->lkb_remid = le32_to_cpu(ms->m_lkid);
lkb->lkb_grmode = DLM_LOCK_IV;
- lkb->lkb_rqmode = ms->m_rqmode;
+ lkb->lkb_rqmode = le32_to_cpu(ms->m_rqmode);
- lkb->lkb_bastfn = (ms->m_asts & DLM_CB_BAST) ? &fake_bastfn : NULL;
- lkb->lkb_astfn = (ms->m_asts & DLM_CB_CAST) ? &fake_astfn : NULL;
+ lkb->lkb_bastfn = (ms->m_asts & cpu_to_le32(DLM_CB_BAST)) ? &fake_bastfn : NULL;
+ lkb->lkb_astfn = (ms->m_asts & cpu_to_le32(DLM_CB_CAST)) ? &fake_astfn : NULL;
if (lkb->lkb_exflags & DLM_LKF_VALBLK) {
/* lkb was just created so there won't be an lvb yet */
@@ -3941,8 +3966,8 @@ static int receive_convert_args(struct dlm_ls *ls, struct dlm_lkb *lkb,
if (receive_lvb(ls, lkb, ms))
return -ENOMEM;
- lkb->lkb_rqmode = ms->m_rqmode;
- lkb->lkb_lvbseq = ms->m_lvbseq;
+ lkb->lkb_rqmode = le32_to_cpu(ms->m_rqmode);
+ lkb->lkb_lvbseq = le32_to_cpu(ms->m_lvbseq);
return 0;
}
@@ -3961,8 +3986,8 @@ static int receive_unlock_args(struct dlm_ls *ls, struct dlm_lkb *lkb,
static void setup_stub_lkb(struct dlm_ls *ls, struct dlm_message *ms)
{
struct dlm_lkb *lkb = &ls->ls_stub_lkb;
- lkb->lkb_nodeid = ms->m_header.h_nodeid;
- lkb->lkb_remid = ms->m_lkid;
+ lkb->lkb_nodeid = le32_to_cpu(ms->m_header.h_nodeid);
+ lkb->lkb_remid = le32_to_cpu(ms->m_lkid);
}
/* This is called after the rsb is locked so that we can safely inspect
@@ -3970,27 +3995,36 @@ static void setup_stub_lkb(struct dlm_ls *ls, struct dlm_message *ms)
static int validate_message(struct dlm_lkb *lkb, struct dlm_message *ms)
{
- int from = ms->m_header.h_nodeid;
+ int from = le32_to_cpu(ms->m_header.h_nodeid);
int error = 0;
+ /* currently mixing of user/kernel locks are not supported */
+ if (ms->m_flags & cpu_to_le32(DLM_IFL_USER) &&
+ ~lkb->lkb_flags & DLM_IFL_USER) {
+ log_error(lkb->lkb_resource->res_ls,
+ "got user dlm message for a kernel lock");
+ error = -EINVAL;
+ goto out;
+ }
+
switch (ms->m_type) {
- case DLM_MSG_CONVERT:
- case DLM_MSG_UNLOCK:
- case DLM_MSG_CANCEL:
+ case cpu_to_le32(DLM_MSG_CONVERT):
+ case cpu_to_le32(DLM_MSG_UNLOCK):
+ case cpu_to_le32(DLM_MSG_CANCEL):
if (!is_master_copy(lkb) || lkb->lkb_nodeid != from)
error = -EINVAL;
break;
- case DLM_MSG_CONVERT_REPLY:
- case DLM_MSG_UNLOCK_REPLY:
- case DLM_MSG_CANCEL_REPLY:
- case DLM_MSG_GRANT:
- case DLM_MSG_BAST:
+ case cpu_to_le32(DLM_MSG_CONVERT_REPLY):
+ case cpu_to_le32(DLM_MSG_UNLOCK_REPLY):
+ case cpu_to_le32(DLM_MSG_CANCEL_REPLY):
+ case cpu_to_le32(DLM_MSG_GRANT):
+ case cpu_to_le32(DLM_MSG_BAST):
if (!is_process_copy(lkb) || lkb->lkb_nodeid != from)
error = -EINVAL;
break;
- case DLM_MSG_REQUEST_REPLY:
+ case cpu_to_le32(DLM_MSG_REQUEST_REPLY):
if (!is_process_copy(lkb))
error = -EINVAL;
else if (lkb->lkb_nodeid != -1 && lkb->lkb_nodeid != from)
@@ -4001,11 +4035,12 @@ static int validate_message(struct dlm_lkb *lkb, struct dlm_message *ms)
error = -EINVAL;
}
+out:
if (error)
log_error(lkb->lkb_resource->res_ls,
"ignore invalid message %d from %d %x %x %x %d",
- ms->m_type, from, lkb->lkb_id, lkb->lkb_remid,
- lkb->lkb_flags, lkb->lkb_nodeid);
+ le32_to_cpu(ms->m_type), from, lkb->lkb_id,
+ lkb->lkb_remid, lkb->lkb_flags, lkb->lkb_nodeid);
return error;
}
@@ -4054,17 +4089,19 @@ static void send_repeat_remove(struct dlm_ls *ls, char *ms_name, int len)
rv = _create_message(ls, sizeof(struct dlm_message) + len,
dir_nodeid, DLM_MSG_REMOVE, &ms, &mh);
if (rv)
- return;
+ goto out;
memcpy(ms->m_extra, name, len);
- ms->m_hash = hash;
+ ms->m_hash = cpu_to_le32(hash);
send_message(mh, ms);
+out:
spin_lock(&ls->ls_remove_spin);
ls->ls_remove_len = 0;
memset(ls->ls_remove_name, 0, DLM_RESNAME_MAXLEN);
spin_unlock(&ls->ls_remove_spin);
+ wake_up(&ls->ls_remove_wait);
}
static int receive_request(struct dlm_ls *ls, struct dlm_message *ms)
@@ -4074,7 +4111,7 @@ static int receive_request(struct dlm_ls *ls, struct dlm_message *ms)
int from_nodeid;
int error, namelen = 0;
- from_nodeid = ms->m_header.h_nodeid;
+ from_nodeid = le32_to_cpu(ms->m_header.h_nodeid);
error = create_lkb(ls, &lkb);
if (error)
@@ -4147,7 +4184,7 @@ static int receive_request(struct dlm_ls *ls, struct dlm_message *ms)
if (error != -ENOTBLK) {
log_limit(ls, "receive_request %x from %d %d",
- ms->m_lkid, from_nodeid, error);
+ le32_to_cpu(ms->m_lkid), from_nodeid, error);
}
if (namelen && error == -EBADR) {
@@ -4166,15 +4203,16 @@ static int receive_convert(struct dlm_ls *ls, struct dlm_message *ms)
struct dlm_rsb *r;
int error, reply = 1;
- error = find_lkb(ls, ms->m_remid, &lkb);
+ error = find_lkb(ls, le32_to_cpu(ms->m_remid), &lkb);
if (error)
goto fail;
- if (lkb->lkb_remid != ms->m_lkid) {
+ if (lkb->lkb_remid != le32_to_cpu(ms->m_lkid)) {
log_error(ls, "receive_convert %x remid %x recover_seq %llu "
"remote %d %x", lkb->lkb_id, lkb->lkb_remid,
(unsigned long long)lkb->lkb_recover_seq,
- ms->m_header.h_nodeid, ms->m_lkid);
+ le32_to_cpu(ms->m_header.h_nodeid),
+ le32_to_cpu(ms->m_lkid));
error = -ENOENT;
dlm_put_lkb(lkb);
goto fail;
@@ -4221,14 +4259,15 @@ static int receive_unlock(struct dlm_ls *ls, struct dlm_message *ms)
struct dlm_rsb *r;
int error;
- error = find_lkb(ls, ms->m_remid, &lkb);
+ error = find_lkb(ls, le32_to_cpu(ms->m_remid), &lkb);
if (error)
goto fail;
- if (lkb->lkb_remid != ms->m_lkid) {
+ if (lkb->lkb_remid != le32_to_cpu(ms->m_lkid)) {
log_error(ls, "receive_unlock %x remid %x remote %d %x",
lkb->lkb_id, lkb->lkb_remid,
- ms->m_header.h_nodeid, ms->m_lkid);
+ le32_to_cpu(ms->m_header.h_nodeid),
+ le32_to_cpu(ms->m_lkid));
error = -ENOENT;
dlm_put_lkb(lkb);
goto fail;
@@ -4272,7 +4311,7 @@ static int receive_cancel(struct dlm_ls *ls, struct dlm_message *ms)
struct dlm_rsb *r;
int error;
- error = find_lkb(ls, ms->m_remid, &lkb);
+ error = find_lkb(ls, le32_to_cpu(ms->m_remid), &lkb);
if (error)
goto fail;
@@ -4308,7 +4347,7 @@ static int receive_grant(struct dlm_ls *ls, struct dlm_message *ms)
struct dlm_rsb *r;
int error;
- error = find_lkb(ls, ms->m_remid, &lkb);
+ error = find_lkb(ls, le32_to_cpu(ms->m_remid), &lkb);
if (error)
return error;
@@ -4339,7 +4378,7 @@ static int receive_bast(struct dlm_ls *ls, struct dlm_message *ms)
struct dlm_rsb *r;
int error;
- error = find_lkb(ls, ms->m_remid, &lkb);
+ error = find_lkb(ls, le32_to_cpu(ms->m_remid), &lkb);
if (error)
return error;
@@ -4352,8 +4391,8 @@ static int receive_bast(struct dlm_ls *ls, struct dlm_message *ms)
if (error)
goto out;
- queue_bast(r, lkb, ms->m_bastmode);
- lkb->lkb_highbast = ms->m_bastmode;
+ queue_bast(r, lkb, le32_to_cpu(ms->m_bastmode));
+ lkb->lkb_highbast = le32_to_cpu(ms->m_bastmode);
out:
unlock_rsb(r);
put_rsb(r);
@@ -4365,7 +4404,7 @@ static void receive_lookup(struct dlm_ls *ls, struct dlm_message *ms)
{
int len, error, ret_nodeid, from_nodeid, our_nodeid;
- from_nodeid = ms->m_header.h_nodeid;
+ from_nodeid = le32_to_cpu(ms->m_header.h_nodeid);
our_nodeid = dlm_our_nodeid();
len = receive_extralen(ms);
@@ -4388,7 +4427,7 @@ static void receive_remove(struct dlm_ls *ls, struct dlm_message *ms)
uint32_t hash, b;
int rv, len, dir_nodeid, from_nodeid;
- from_nodeid = ms->m_header.h_nodeid;
+ from_nodeid = le32_to_cpu(ms->m_header.h_nodeid);
len = receive_extralen(ms);
@@ -4398,7 +4437,7 @@ static void receive_remove(struct dlm_ls *ls, struct dlm_message *ms)
return;
}
- dir_nodeid = dlm_hash2nodeid(ls, ms->m_hash);
+ dir_nodeid = dlm_hash2nodeid(ls, le32_to_cpu(ms->m_hash));
if (dir_nodeid != dlm_our_nodeid()) {
log_error(ls, "receive_remove from %d bad nodeid %d",
from_nodeid, dir_nodeid);
@@ -4471,7 +4510,7 @@ static void receive_remove(struct dlm_ls *ls, struct dlm_message *ms)
static void receive_purge(struct dlm_ls *ls, struct dlm_message *ms)
{
- do_purge(ls, ms->m_nodeid, ms->m_pid);
+ do_purge(ls, le32_to_cpu(ms->m_nodeid), le32_to_cpu(ms->m_pid));
}
static int receive_request_reply(struct dlm_ls *ls, struct dlm_message *ms)
@@ -4479,9 +4518,9 @@ static int receive_request_reply(struct dlm_ls *ls, struct dlm_message *ms)
struct dlm_lkb *lkb;
struct dlm_rsb *r;
int error, mstype, result;
- int from_nodeid = ms->m_header.h_nodeid;
+ int from_nodeid = le32_to_cpu(ms->m_header.h_nodeid);
- error = find_lkb(ls, ms->m_remid, &lkb);
+ error = find_lkb(ls, le32_to_cpu(ms->m_remid), &lkb);
if (error)
return error;
@@ -4497,7 +4536,8 @@ static int receive_request_reply(struct dlm_ls *ls, struct dlm_message *ms)
error = remove_from_waiters(lkb, DLM_MSG_REQUEST_REPLY);
if (error) {
log_error(ls, "receive_request_reply %x remote %d %x result %d",
- lkb->lkb_id, from_nodeid, ms->m_lkid, ms->m_result);
+ lkb->lkb_id, from_nodeid, le32_to_cpu(ms->m_lkid),
+ from_dlm_errno(le32_to_cpu(ms->m_result)));
dlm_dump_rsb(r);
goto out;
}
@@ -4511,7 +4551,7 @@ static int receive_request_reply(struct dlm_ls *ls, struct dlm_message *ms)
}
/* this is the value returned from do_request() on the master */
- result = ms->m_result;
+ result = from_dlm_errno(le32_to_cpu(ms->m_result));
switch (result) {
case -EAGAIN:
@@ -4525,7 +4565,7 @@ static int receive_request_reply(struct dlm_ls *ls, struct dlm_message *ms)
case 0:
/* request was queued or granted on remote master */
receive_flags_reply(lkb, ms);
- lkb->lkb_remid = ms->m_lkid;
+ lkb->lkb_remid = le32_to_cpu(ms->m_lkid);
if (is_altmode(lkb))
munge_altmode(lkb, ms);
if (result) {
@@ -4598,7 +4638,7 @@ static void __receive_convert_reply(struct dlm_rsb *r, struct dlm_lkb *lkb,
struct dlm_message *ms)
{
/* this is the value returned from do_convert() on the master */
- switch (ms->m_result) {
+ switch (from_dlm_errno(le32_to_cpu(ms->m_result))) {
case -EAGAIN:
/* convert would block (be queued) on remote master */
queue_cast(r, lkb, -EAGAIN);
@@ -4631,8 +4671,9 @@ static void __receive_convert_reply(struct dlm_rsb *r, struct dlm_lkb *lkb,
default:
log_error(r->res_ls, "receive_convert_reply %x remote %d %x %d",
- lkb->lkb_id, ms->m_header.h_nodeid, ms->m_lkid,
- ms->m_result);
+ lkb->lkb_id, le32_to_cpu(ms->m_header.h_nodeid),
+ le32_to_cpu(ms->m_lkid),
+ from_dlm_errno(le32_to_cpu(ms->m_result)));
dlm_print_rsb(r);
dlm_print_lkb(lkb);
}
@@ -4666,7 +4707,7 @@ static int receive_convert_reply(struct dlm_ls *ls, struct dlm_message *ms)
struct dlm_lkb *lkb;
int error;
- error = find_lkb(ls, ms->m_remid, &lkb);
+ error = find_lkb(ls, le32_to_cpu(ms->m_remid), &lkb);
if (error)
return error;
@@ -4694,7 +4735,7 @@ static void _receive_unlock_reply(struct dlm_lkb *lkb, struct dlm_message *ms)
/* this is the value returned from do_unlock() on the master */
- switch (ms->m_result) {
+ switch (from_dlm_errno(le32_to_cpu(ms->m_result))) {
case -DLM_EUNLOCK:
receive_flags_reply(lkb, ms);
remove_lock_pc(r, lkb);
@@ -4704,7 +4745,7 @@ static void _receive_unlock_reply(struct dlm_lkb *lkb, struct dlm_message *ms)
break;
default:
log_error(r->res_ls, "receive_unlock_reply %x error %d",
- lkb->lkb_id, ms->m_result);
+ lkb->lkb_id, from_dlm_errno(le32_to_cpu(ms->m_result)));
}
out:
unlock_rsb(r);
@@ -4716,7 +4757,7 @@ static int receive_unlock_reply(struct dlm_ls *ls, struct dlm_message *ms)
struct dlm_lkb *lkb;
int error;
- error = find_lkb(ls, ms->m_remid, &lkb);
+ error = find_lkb(ls, le32_to_cpu(ms->m_remid), &lkb);
if (error)
return error;
@@ -4744,7 +4785,7 @@ static void _receive_cancel_reply(struct dlm_lkb *lkb, struct dlm_message *ms)
/* this is the value returned from do_cancel() on the master */
- switch (ms->m_result) {
+ switch (from_dlm_errno(le32_to_cpu(ms->m_result))) {
case -DLM_ECANCEL:
receive_flags_reply(lkb, ms);
revert_lock_pc(r, lkb);
@@ -4754,7 +4795,8 @@ static void _receive_cancel_reply(struct dlm_lkb *lkb, struct dlm_message *ms)
break;
default:
log_error(r->res_ls, "receive_cancel_reply %x error %d",
- lkb->lkb_id, ms->m_result);
+ lkb->lkb_id,
+ from_dlm_errno(le32_to_cpu(ms->m_result)));
}
out:
unlock_rsb(r);
@@ -4766,7 +4808,7 @@ static int receive_cancel_reply(struct dlm_ls *ls, struct dlm_message *ms)
struct dlm_lkb *lkb;
int error;
- error = find_lkb(ls, ms->m_remid, &lkb);
+ error = find_lkb(ls, le32_to_cpu(ms->m_remid), &lkb);
if (error)
return error;
@@ -4782,9 +4824,10 @@ static void receive_lookup_reply(struct dlm_ls *ls, struct dlm_message *ms)
int error, ret_nodeid;
int do_lookup_list = 0;
- error = find_lkb(ls, ms->m_lkid, &lkb);
+ error = find_lkb(ls, le32_to_cpu(ms->m_lkid), &lkb);
if (error) {
- log_error(ls, "receive_lookup_reply no lkid %x", ms->m_lkid);
+ log_error(ls, "%s no lkid %x", __func__,
+ le32_to_cpu(ms->m_lkid));
return;
}
@@ -4799,7 +4842,7 @@ static void receive_lookup_reply(struct dlm_ls *ls, struct dlm_message *ms)
if (error)
goto out;
- ret_nodeid = ms->m_nodeid;
+ ret_nodeid = le32_to_cpu(ms->m_nodeid);
/* We sometimes receive a request from the dir node for this
rsb before we've received the dir node's loookup_reply for it.
@@ -4811,8 +4854,8 @@ static void receive_lookup_reply(struct dlm_ls *ls, struct dlm_message *ms)
/* This should never happen */
log_error(ls, "receive_lookup_reply %x from %d ret %d "
"master %d dir %d our %d first %x %s",
- lkb->lkb_id, ms->m_header.h_nodeid, ret_nodeid,
- r->res_master_nodeid, r->res_dir_nodeid,
+ lkb->lkb_id, le32_to_cpu(ms->m_header.h_nodeid),
+ ret_nodeid, r->res_master_nodeid, r->res_dir_nodeid,
dlm_our_nodeid(), r->res_first_lkid, r->res_name);
}
@@ -4824,7 +4867,7 @@ static void receive_lookup_reply(struct dlm_ls *ls, struct dlm_message *ms)
} else if (ret_nodeid == -1) {
/* the remote node doesn't believe it's the dir node */
log_error(ls, "receive_lookup_reply %x from %d bad ret_nodeid",
- lkb->lkb_id, ms->m_header.h_nodeid);
+ lkb->lkb_id, le32_to_cpu(ms->m_header.h_nodeid));
r->res_master_nodeid = 0;
r->res_nodeid = -1;
lkb->lkb_nodeid = -1;
@@ -4858,10 +4901,12 @@ static void _receive_message(struct dlm_ls *ls, struct dlm_message *ms,
{
int error = 0, noent = 0;
- if (!dlm_is_member(ls, ms->m_header.h_nodeid)) {
+ if (!dlm_is_member(ls, le32_to_cpu(ms->m_header.h_nodeid))) {
log_limit(ls, "receive %d from non-member %d %x %x %d",
- ms->m_type, ms->m_header.h_nodeid, ms->m_lkid,
- ms->m_remid, ms->m_result);
+ le32_to_cpu(ms->m_type),
+ le32_to_cpu(ms->m_header.h_nodeid),
+ le32_to_cpu(ms->m_lkid), le32_to_cpu(ms->m_remid),
+ from_dlm_errno(le32_to_cpu(ms->m_result)));
return;
}
@@ -4869,77 +4914,78 @@ static void _receive_message(struct dlm_ls *ls, struct dlm_message *ms,
/* messages sent to a master node */
- case DLM_MSG_REQUEST:
+ case cpu_to_le32(DLM_MSG_REQUEST):
error = receive_request(ls, ms);
break;
- case DLM_MSG_CONVERT:
+ case cpu_to_le32(DLM_MSG_CONVERT):
error = receive_convert(ls, ms);
break;
- case DLM_MSG_UNLOCK:
+ case cpu_to_le32(DLM_MSG_UNLOCK):
error = receive_unlock(ls, ms);
break;
- case DLM_MSG_CANCEL:
+ case cpu_to_le32(DLM_MSG_CANCEL):
noent = 1;
error = receive_cancel(ls, ms);
break;
/* messages sent from a master node (replies to above) */
- case DLM_MSG_REQUEST_REPLY:
+ case cpu_to_le32(DLM_MSG_REQUEST_REPLY):
error = receive_request_reply(ls, ms);
break;
- case DLM_MSG_CONVERT_REPLY:
+ case cpu_to_le32(DLM_MSG_CONVERT_REPLY):
error = receive_convert_reply(ls, ms);
break;
- case DLM_MSG_UNLOCK_REPLY:
+ case cpu_to_le32(DLM_MSG_UNLOCK_REPLY):
error = receive_unlock_reply(ls, ms);
break;
- case DLM_MSG_CANCEL_REPLY:
+ case cpu_to_le32(DLM_MSG_CANCEL_REPLY):
error = receive_cancel_reply(ls, ms);
break;
/* messages sent from a master node (only two types of async msg) */
- case DLM_MSG_GRANT:
+ case cpu_to_le32(DLM_MSG_GRANT):
noent = 1;
error = receive_grant(ls, ms);
break;
- case DLM_MSG_BAST:
+ case cpu_to_le32(DLM_MSG_BAST):
noent = 1;
error = receive_bast(ls, ms);
break;
/* messages sent to a dir node */
- case DLM_MSG_LOOKUP:
+ case cpu_to_le32(DLM_MSG_LOOKUP):
receive_lookup(ls, ms);
break;
- case DLM_MSG_REMOVE:
+ case cpu_to_le32(DLM_MSG_REMOVE):
receive_remove(ls, ms);
break;
/* messages sent from a dir node (remove has no reply) */
- case DLM_MSG_LOOKUP_REPLY:
+ case cpu_to_le32(DLM_MSG_LOOKUP_REPLY):
receive_lookup_reply(ls, ms);
break;
/* other messages */
- case DLM_MSG_PURGE:
+ case cpu_to_le32(DLM_MSG_PURGE):
receive_purge(ls, ms);
break;
default:
- log_error(ls, "unknown message type %d", ms->m_type);
+ log_error(ls, "unknown message type %d",
+ le32_to_cpu(ms->m_type));
}
/*
@@ -4955,22 +5001,26 @@ static void _receive_message(struct dlm_ls *ls, struct dlm_message *ms,
if (error == -ENOENT && noent) {
log_debug(ls, "receive %d no %x remote %d %x saved_seq %u",
- ms->m_type, ms->m_remid, ms->m_header.h_nodeid,
- ms->m_lkid, saved_seq);
+ le32_to_cpu(ms->m_type), le32_to_cpu(ms->m_remid),
+ le32_to_cpu(ms->m_header.h_nodeid),
+ le32_to_cpu(ms->m_lkid), saved_seq);
} else if (error == -ENOENT) {
log_error(ls, "receive %d no %x remote %d %x saved_seq %u",
- ms->m_type, ms->m_remid, ms->m_header.h_nodeid,
- ms->m_lkid, saved_seq);
+ le32_to_cpu(ms->m_type), le32_to_cpu(ms->m_remid),
+ le32_to_cpu(ms->m_header.h_nodeid),
+ le32_to_cpu(ms->m_lkid), saved_seq);
- if (ms->m_type == DLM_MSG_CONVERT)
- dlm_dump_rsb_hash(ls, ms->m_hash);
+ if (ms->m_type == cpu_to_le32(DLM_MSG_CONVERT))
+ dlm_dump_rsb_hash(ls, le32_to_cpu(ms->m_hash));
}
if (error == -EINVAL) {
log_error(ls, "receive %d inval from %d lkid %x remid %x "
"saved_seq %u",
- ms->m_type, ms->m_header.h_nodeid,
- ms->m_lkid, ms->m_remid, saved_seq);
+ le32_to_cpu(ms->m_type),
+ le32_to_cpu(ms->m_header.h_nodeid),
+ le32_to_cpu(ms->m_lkid), le32_to_cpu(ms->m_remid),
+ saved_seq);
}
}
@@ -4991,7 +5041,7 @@ static void dlm_receive_message(struct dlm_ls *ls, struct dlm_message *ms,
lockspace generation before we left. */
if (!ls->ls_generation) {
log_limit(ls, "receive %d from %d ignore old gen",
- ms->m_type, nodeid);
+ le32_to_cpu(ms->m_type), nodeid);
return;
}
@@ -5024,30 +5074,30 @@ void dlm_receive_buffer(union dlm_packet *p, int nodeid)
switch (hd->h_cmd) {
case DLM_MSG:
- dlm_message_in(&p->message);
- type = p->message.m_type;
+ type = le32_to_cpu(p->message.m_type);
break;
case DLM_RCOM:
- dlm_rcom_in(&p->rcom);
- type = p->rcom.rc_type;
+ type = le32_to_cpu(p->rcom.rc_type);
break;
default:
log_print("invalid h_cmd %d from %u", hd->h_cmd, nodeid);
return;
}
- if (hd->h_nodeid != nodeid) {
+ if (le32_to_cpu(hd->h_nodeid) != nodeid) {
log_print("invalid h_nodeid %d from %d lockspace %x",
- hd->h_nodeid, nodeid, hd->u.h_lockspace);
+ le32_to_cpu(hd->h_nodeid), nodeid,
+ le32_to_cpu(hd->u.h_lockspace));
return;
}
- ls = dlm_find_lockspace_global(hd->u.h_lockspace);
+ ls = dlm_find_lockspace_global(le32_to_cpu(hd->u.h_lockspace));
if (!ls) {
if (dlm_config.ci_log_debug) {
printk_ratelimited(KERN_DEBUG "dlm: invalid lockspace "
"%u from %d cmd %d type %d\n",
- hd->u.h_lockspace, nodeid, hd->h_cmd, type);
+ le32_to_cpu(hd->u.h_lockspace), nodeid,
+ hd->h_cmd, type);
}
if (hd->h_cmd == DLM_RCOM && type == DLM_RCOM_STATUS)
@@ -5061,8 +5111,11 @@ void dlm_receive_buffer(union dlm_packet *p, int nodeid)
down_read(&ls->ls_recv_active);
if (hd->h_cmd == DLM_MSG)
dlm_receive_message(ls, &p->message, nodeid);
- else
+ else if (hd->h_cmd == DLM_RCOM)
dlm_receive_rcom(ls, &p->rcom, nodeid);
+ else
+ log_error(ls, "invalid h_cmd %d from %d lockspace %x",
+ hd->h_cmd, nodeid, le32_to_cpu(hd->u.h_lockspace));
up_read(&ls->ls_recv_active);
dlm_put_lockspace(ls);
@@ -5074,10 +5127,10 @@ static void recover_convert_waiter(struct dlm_ls *ls, struct dlm_lkb *lkb,
if (middle_conversion(lkb)) {
hold_lkb(lkb);
memset(ms_stub, 0, sizeof(struct dlm_message));
- ms_stub->m_flags = DLM_IFL_STUB_MS;
- ms_stub->m_type = DLM_MSG_CONVERT_REPLY;
- ms_stub->m_result = -EINPROGRESS;
- ms_stub->m_header.h_nodeid = lkb->lkb_nodeid;
+ ms_stub->m_flags = cpu_to_le32(DLM_IFL_STUB_MS);
+ ms_stub->m_type = cpu_to_le32(DLM_MSG_CONVERT_REPLY);
+ ms_stub->m_result = cpu_to_le32(to_dlm_errno(-EINPROGRESS));
+ ms_stub->m_header.h_nodeid = cpu_to_le32(lkb->lkb_nodeid);
_receive_convert_reply(lkb, ms_stub);
/* Same special case as in receive_rcom_lock_args() */
@@ -5196,10 +5249,10 @@ void dlm_recover_waiters_pre(struct dlm_ls *ls)
case DLM_MSG_UNLOCK:
hold_lkb(lkb);
memset(ms_stub, 0, sizeof(struct dlm_message));
- ms_stub->m_flags = DLM_IFL_STUB_MS;
- ms_stub->m_type = DLM_MSG_UNLOCK_REPLY;
- ms_stub->m_result = stub_unlock_result;
- ms_stub->m_header.h_nodeid = lkb->lkb_nodeid;
+ ms_stub->m_flags = cpu_to_le32(DLM_IFL_STUB_MS);
+ ms_stub->m_type = cpu_to_le32(DLM_MSG_UNLOCK_REPLY);
+ ms_stub->m_result = cpu_to_le32(to_dlm_errno(stub_unlock_result));
+ ms_stub->m_header.h_nodeid = cpu_to_le32(lkb->lkb_nodeid);
_receive_unlock_reply(lkb, ms_stub);
dlm_put_lkb(lkb);
break;
@@ -5207,10 +5260,10 @@ void dlm_recover_waiters_pre(struct dlm_ls *ls)
case DLM_MSG_CANCEL:
hold_lkb(lkb);
memset(ms_stub, 0, sizeof(struct dlm_message));
- ms_stub->m_flags = DLM_IFL_STUB_MS;
- ms_stub->m_type = DLM_MSG_CANCEL_REPLY;
- ms_stub->m_result = stub_cancel_result;
- ms_stub->m_header.h_nodeid = lkb->lkb_nodeid;
+ ms_stub->m_flags = cpu_to_le32(DLM_IFL_STUB_MS);
+ ms_stub->m_type = cpu_to_le32(DLM_MSG_CANCEL_REPLY);
+ ms_stub->m_result = cpu_to_le32(to_dlm_errno(stub_cancel_result));
+ ms_stub->m_header.h_nodeid = cpu_to_le32(lkb->lkb_nodeid);
_receive_cancel_reply(lkb, ms_stub);
dlm_put_lkb(lkb);
break;
@@ -5227,21 +5280,18 @@ void dlm_recover_waiters_pre(struct dlm_ls *ls)
static struct dlm_lkb *find_resend_waiter(struct dlm_ls *ls)
{
- struct dlm_lkb *lkb;
- int found = 0;
+ struct dlm_lkb *lkb = NULL, *iter;
mutex_lock(&ls->ls_waiters_mutex);
- list_for_each_entry(lkb, &ls->ls_waiters, lkb_wait_reply) {
- if (lkb->lkb_flags & DLM_IFL_RESEND) {
- hold_lkb(lkb);
- found = 1;
+ list_for_each_entry(iter, &ls->ls_waiters, lkb_wait_reply) {
+ if (iter->lkb_flags & DLM_IFL_RESEND) {
+ hold_lkb(iter);
+ lkb = iter;
break;
}
}
mutex_unlock(&ls->ls_waiters_mutex);
- if (!found)
- lkb = NULL;
return lkb;
}
@@ -5301,11 +5351,16 @@ int dlm_recover_waiters_post(struct dlm_ls *ls)
lkb->lkb_flags &= ~DLM_IFL_OVERLAP_UNLOCK;
lkb->lkb_flags &= ~DLM_IFL_OVERLAP_CANCEL;
lkb->lkb_wait_type = 0;
- lkb->lkb_wait_count = 0;
+ /* drop all wait_count references we still
+ * hold a reference for this iteration.
+ */
+ while (lkb->lkb_wait_count) {
+ lkb->lkb_wait_count--;
+ unhold_lkb(lkb);
+ }
mutex_lock(&ls->ls_waiters_mutex);
list_del_init(&lkb->lkb_wait_reply);
mutex_unlock(&ls->ls_waiters_mutex);
- unhold_lkb(lkb); /* for waiters list */
if (oc || ou) {
/* do an unlock or cancel instead of resending */
@@ -5575,7 +5630,7 @@ static int receive_rcom_lock_args(struct dlm_ls *ls, struct dlm_lkb *lkb,
{
struct rcom_lock *rl = (struct rcom_lock *) rc->rc_buf;
- lkb->lkb_nodeid = rc->rc_header.h_nodeid;
+ lkb->lkb_nodeid = le32_to_cpu(rc->rc_header.h_nodeid);
lkb->lkb_ownpid = le32_to_cpu(rl->rl_ownpid);
lkb->lkb_remid = le32_to_cpu(rl->rl_lkid);
lkb->lkb_exflags = le32_to_cpu(rl->rl_exflags);
@@ -5590,8 +5645,8 @@ static int receive_rcom_lock_args(struct dlm_ls *ls, struct dlm_lkb *lkb,
lkb->lkb_astfn = (rl->rl_asts & DLM_CB_CAST) ? &fake_astfn : NULL;
if (lkb->lkb_exflags & DLM_LKF_VALBLK) {
- int lvblen = rc->rc_header.h_length - sizeof(struct dlm_rcom) -
- sizeof(struct rcom_lock);
+ int lvblen = le16_to_cpu(rc->rc_header.h_length) -
+ sizeof(struct dlm_rcom) - sizeof(struct rcom_lock);
if (lvblen > ls->ls_lvblen)
return -EINVAL;
lkb->lkb_lvbptr = dlm_allocate_lvb(ls);
@@ -5627,7 +5682,7 @@ int dlm_recover_master_copy(struct dlm_ls *ls, struct dlm_rcom *rc)
struct dlm_rsb *r;
struct dlm_lkb *lkb;
uint32_t remid = 0;
- int from_nodeid = rc->rc_header.h_nodeid;
+ int from_nodeid = le32_to_cpu(rc->rc_header.h_nodeid);
int error;
if (rl->rl_parent_lkid) {
@@ -5677,7 +5732,6 @@ int dlm_recover_master_copy(struct dlm_ls *ls, struct dlm_rcom *rc)
attach_lkb(r, lkb);
add_lkb(r, lkb, rl->rl_status);
- error = 0;
ls->ls_recover_locks_in++;
if (!list_empty(&r->res_waitqueue) || !list_empty(&r->res_convertqueue))
@@ -5717,7 +5771,8 @@ int dlm_recover_process_copy(struct dlm_ls *ls, struct dlm_rcom *rc)
error = find_lkb(ls, lkid, &lkb);
if (error) {
log_error(ls, "dlm_recover_process_copy no %x remote %d %x %d",
- lkid, rc->rc_header.h_nodeid, remid, result);
+ lkid, le32_to_cpu(rc->rc_header.h_nodeid), remid,
+ result);
return error;
}
@@ -5727,7 +5782,8 @@ int dlm_recover_process_copy(struct dlm_ls *ls, struct dlm_rcom *rc)
if (!is_process_copy(lkb)) {
log_error(ls, "dlm_recover_process_copy bad %x remote %d %x %d",
- lkid, rc->rc_header.h_nodeid, remid, result);
+ lkid, le32_to_cpu(rc->rc_header.h_nodeid), remid,
+ result);
dlm_dump_rsb(r);
unlock_rsb(r);
put_rsb(r);
@@ -5742,7 +5798,8 @@ int dlm_recover_process_copy(struct dlm_ls *ls, struct dlm_rcom *rc)
a barrier between recover_masters and recover_locks. */
log_debug(ls, "dlm_recover_process_copy %x remote %d %x %d",
- lkid, rc->rc_header.h_nodeid, remid, result);
+ lkid, le32_to_cpu(rc->rc_header.h_nodeid), remid,
+ result);
dlm_send_rcom_lock(r, lkb);
goto out;
@@ -5752,7 +5809,8 @@ int dlm_recover_process_copy(struct dlm_ls *ls, struct dlm_rcom *rc)
break;
default:
log_error(ls, "dlm_recover_process_copy %x remote %d %x %d unk",
- lkid, rc->rc_header.h_nodeid, remid, result);
+ lkid, le32_to_cpu(rc->rc_header.h_nodeid), remid,
+ result);
}
/* an ack for dlm_recover_locks() which waits for replies from
@@ -5766,12 +5824,18 @@ int dlm_recover_process_copy(struct dlm_ls *ls, struct dlm_rcom *rc)
return 0;
}
+#ifdef CONFIG_DLM_DEPRECATED_API
int dlm_user_request(struct dlm_ls *ls, struct dlm_user_args *ua,
int mode, uint32_t flags, void *name, unsigned int namelen,
unsigned long timeout_cs)
+#else
+int dlm_user_request(struct dlm_ls *ls, struct dlm_user_args *ua,
+ int mode, uint32_t flags, void *name, unsigned int namelen)
+#endif
{
struct dlm_lkb *lkb;
struct dlm_args args;
+ bool do_put = true;
int error;
dlm_lock_recovery(ls);
@@ -5782,23 +5846,28 @@ int dlm_user_request(struct dlm_ls *ls, struct dlm_user_args *ua,
goto out;
}
+ trace_dlm_lock_start(ls, lkb, name, namelen, mode, flags);
+
if (flags & DLM_LKF_VALBLK) {
ua->lksb.sb_lvbptr = kzalloc(DLM_USER_LVB_LEN, GFP_NOFS);
if (!ua->lksb.sb_lvbptr) {
kfree(ua);
- __put_lkb(ls, lkb);
error = -ENOMEM;
- goto out;
+ goto out_put;
}
}
+#ifdef CONFIG_DLM_DEPRECATED_API
error = set_lock_args(mode, &ua->lksb, flags, namelen, timeout_cs,
fake_astfn, ua, fake_bastfn, &args);
+#else
+ error = set_lock_args(mode, &ua->lksb, flags, namelen, fake_astfn, ua,
+ fake_bastfn, &args);
+#endif
if (error) {
kfree(ua->lksb.sb_lvbptr);
ua->lksb.sb_lvbptr = NULL;
kfree(ua);
- __put_lkb(ls, lkb);
- goto out;
+ goto out_put;
}
/* After ua is attached to lkb it will be freed by dlm_free_lkb().
@@ -5817,8 +5886,7 @@ int dlm_user_request(struct dlm_ls *ls, struct dlm_user_args *ua,
error = 0;
fallthrough;
default:
- __put_lkb(ls, lkb);
- goto out;
+ goto out_put;
}
/* add this new lkb to the per-process list of locks */
@@ -5826,14 +5894,24 @@ int dlm_user_request(struct dlm_ls *ls, struct dlm_user_args *ua,
hold_lkb(lkb);
list_add_tail(&lkb->lkb_ownqueue, &ua->proc->locks);
spin_unlock(&ua->proc->locks_spin);
+ do_put = false;
+ out_put:
+ trace_dlm_lock_end(ls, lkb, name, namelen, mode, flags, error, false);
+ if (do_put)
+ __put_lkb(ls, lkb);
out:
dlm_unlock_recovery(ls);
return error;
}
+#ifdef CONFIG_DLM_DEPRECATED_API
int dlm_user_convert(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
int mode, uint32_t flags, uint32_t lkid, char *lvb_in,
unsigned long timeout_cs)
+#else
+int dlm_user_convert(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
+ int mode, uint32_t flags, uint32_t lkid, char *lvb_in)
+#endif
{
struct dlm_lkb *lkb;
struct dlm_args args;
@@ -5846,6 +5924,8 @@ int dlm_user_convert(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
if (error)
goto out;
+ trace_dlm_lock_start(ls, lkb, NULL, 0, mode, flags);
+
/* user can change the params on its lock when it converts it, or
add an lvb that didn't exist before */
@@ -5868,8 +5948,13 @@ int dlm_user_convert(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
ua->bastaddr = ua_tmp->bastaddr;
ua->user_lksb = ua_tmp->user_lksb;
+#ifdef CONFIG_DLM_DEPRECATED_API
error = set_lock_args(mode, &ua->lksb, flags, 0, timeout_cs,
fake_astfn, ua, fake_bastfn, &args);
+#else
+ error = set_lock_args(mode, &ua->lksb, flags, 0, fake_astfn, ua,
+ fake_bastfn, &args);
+#endif
if (error)
goto out_put;
@@ -5878,6 +5963,7 @@ int dlm_user_convert(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
if (error == -EINPROGRESS || error == -EAGAIN || error == -EDEADLK)
error = 0;
out_put:
+ trace_dlm_lock_end(ls, lkb, NULL, 0, mode, flags, error, false);
dlm_put_lkb(lkb);
out:
dlm_unlock_recovery(ls);
@@ -5893,39 +5979,38 @@ int dlm_user_convert(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
int dlm_user_adopt_orphan(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
int mode, uint32_t flags, void *name, unsigned int namelen,
- unsigned long timeout_cs, uint32_t *lkid)
+ uint32_t *lkid)
{
- struct dlm_lkb *lkb;
+ struct dlm_lkb *lkb = NULL, *iter;
struct dlm_user_args *ua;
int found_other_mode = 0;
- int found = 0;
int rv = 0;
mutex_lock(&ls->ls_orphans_mutex);
- list_for_each_entry(lkb, &ls->ls_orphans, lkb_ownqueue) {
- if (lkb->lkb_resource->res_length != namelen)
+ list_for_each_entry(iter, &ls->ls_orphans, lkb_ownqueue) {
+ if (iter->lkb_resource->res_length != namelen)
continue;
- if (memcmp(lkb->lkb_resource->res_name, name, namelen))
+ if (memcmp(iter->lkb_resource->res_name, name, namelen))
continue;
- if (lkb->lkb_grmode != mode) {
+ if (iter->lkb_grmode != mode) {
found_other_mode = 1;
continue;
}
- found = 1;
- list_del_init(&lkb->lkb_ownqueue);
- lkb->lkb_flags &= ~DLM_IFL_ORPHAN;
- *lkid = lkb->lkb_id;
+ lkb = iter;
+ list_del_init(&iter->lkb_ownqueue);
+ iter->lkb_flags &= ~DLM_IFL_ORPHAN;
+ *lkid = iter->lkb_id;
break;
}
mutex_unlock(&ls->ls_orphans_mutex);
- if (!found && found_other_mode) {
+ if (!lkb && found_other_mode) {
rv = -EAGAIN;
goto out;
}
- if (!found) {
+ if (!lkb) {
rv = -ENOENT;
goto out;
}
@@ -5971,6 +6056,8 @@ int dlm_user_unlock(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
if (error)
goto out;
+ trace_dlm_unlock_start(ls, lkb, flags);
+
ua = lkb->lkb_ua;
if (lvb_in && ua->lksb.sb_lvbptr)
@@ -5999,6 +6086,7 @@ int dlm_user_unlock(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
list_move(&lkb->lkb_ownqueue, &ua->proc->unlocking);
spin_unlock(&ua->proc->locks_spin);
out_put:
+ trace_dlm_unlock_end(ls, lkb, flags, error);
dlm_put_lkb(lkb);
out:
dlm_unlock_recovery(ls);
@@ -6020,6 +6108,8 @@ int dlm_user_cancel(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
if (error)
goto out;
+ trace_dlm_unlock_start(ls, lkb, flags);
+
ua = lkb->lkb_ua;
if (ua_tmp->castparam)
ua->castparam = ua_tmp->castparam;
@@ -6037,6 +6127,7 @@ int dlm_user_cancel(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
if (error == -EBUSY)
error = 0;
out_put:
+ trace_dlm_unlock_end(ls, lkb, flags, error);
dlm_put_lkb(lkb);
out:
dlm_unlock_recovery(ls);
@@ -6058,6 +6149,8 @@ int dlm_user_deadlock(struct dlm_ls *ls, uint32_t flags, uint32_t lkid)
if (error)
goto out;
+ trace_dlm_unlock_start(ls, lkb, flags);
+
ua = lkb->lkb_ua;
error = set_unlock_args(flags, ua, &args);
@@ -6086,6 +6179,7 @@ int dlm_user_deadlock(struct dlm_ls *ls, uint32_t flags, uint32_t lkid)
if (error == -EBUSY)
error = 0;
out_put:
+ trace_dlm_unlock_end(ls, lkb, flags, error);
dlm_put_lkb(lkb);
out:
dlm_unlock_recovery(ls);
@@ -6141,7 +6235,7 @@ static struct dlm_lkb *del_proc_lock(struct dlm_ls *ls,
{
struct dlm_lkb *lkb = NULL;
- mutex_lock(&ls->ls_clear_proc_locks);
+ spin_lock(&ls->ls_clear_proc_locks);
if (list_empty(&proc->locks))
goto out;
@@ -6153,7 +6247,7 @@ static struct dlm_lkb *del_proc_lock(struct dlm_ls *ls,
else
lkb->lkb_flags |= DLM_IFL_DEAD;
out:
- mutex_unlock(&ls->ls_clear_proc_locks);
+ spin_unlock(&ls->ls_clear_proc_locks);
return lkb;
}
@@ -6190,7 +6284,7 @@ void dlm_clear_proc_locks(struct dlm_ls *ls, struct dlm_user_proc *proc)
dlm_put_lkb(lkb);
}
- mutex_lock(&ls->ls_clear_proc_locks);
+ spin_lock(&ls->ls_clear_proc_locks);
/* in-progress unlocks */
list_for_each_entry_safe(lkb, safe, &proc->unlocking, lkb_ownqueue) {
@@ -6206,7 +6300,7 @@ void dlm_clear_proc_locks(struct dlm_ls *ls, struct dlm_user_proc *proc)
dlm_put_lkb(lkb);
}
- mutex_unlock(&ls->ls_clear_proc_locks);
+ spin_unlock(&ls->ls_clear_proc_locks);
dlm_unlock_recovery(ls);
}
@@ -6277,8 +6371,8 @@ static int send_purge(struct dlm_ls *ls, int nodeid, int pid)
DLM_MSG_PURGE, &ms, &mh);
if (error)
return error;
- ms->m_nodeid = nodeid;
- ms->m_pid = pid;
+ ms->m_nodeid = cpu_to_le32(nodeid);
+ ms->m_pid = cpu_to_le32(pid);
return send_message(mh, ms);
}
@@ -6301,3 +6395,64 @@ int dlm_user_purge(struct dlm_ls *ls, struct dlm_user_proc *proc,
return error;
}
+/* debug functionality */
+int dlm_debug_add_lkb(struct dlm_ls *ls, uint32_t lkb_id, char *name, int len,
+ int lkb_nodeid, unsigned int lkb_flags, int lkb_status)
+{
+ struct dlm_lksb *lksb;
+ struct dlm_lkb *lkb;
+ struct dlm_rsb *r;
+ int error;
+
+ /* we currently can't set a valid user lock */
+ if (lkb_flags & DLM_IFL_USER)
+ return -EOPNOTSUPP;
+
+ lksb = kzalloc(sizeof(*lksb), GFP_NOFS);
+ if (!lksb)
+ return -ENOMEM;
+
+ error = _create_lkb(ls, &lkb, lkb_id, lkb_id + 1);
+ if (error) {
+ kfree(lksb);
+ return error;
+ }
+
+ lkb->lkb_flags = lkb_flags;
+ lkb->lkb_nodeid = lkb_nodeid;
+ lkb->lkb_lksb = lksb;
+ /* user specific pointer, just don't have it NULL for kernel locks */
+ if (~lkb_flags & DLM_IFL_USER)
+ lkb->lkb_astparam = (void *)0xDEADBEEF;
+
+ error = find_rsb(ls, name, len, 0, R_REQUEST, &r);
+ if (error) {
+ kfree(lksb);
+ __put_lkb(ls, lkb);
+ return error;
+ }
+
+ lock_rsb(r);
+ attach_lkb(r, lkb);
+ add_lkb(r, lkb, lkb_status);
+ unlock_rsb(r);
+ put_rsb(r);
+
+ return 0;
+}
+
+int dlm_debug_add_lkb_to_waiters(struct dlm_ls *ls, uint32_t lkb_id,
+ int mstype, int to_nodeid)
+{
+ struct dlm_lkb *lkb;
+ int error;
+
+ error = find_lkb(ls, lkb_id, &lkb);
+ if (error)
+ return error;
+
+ error = add_to_waiters(lkb, mstype, to_nodeid);
+ dlm_put_lkb(lkb);
+ return error;
+}
+
diff --git a/fs/dlm/lock.h b/fs/dlm/lock.h
index 456c6ec3ef6f..40c76b5544da 100644
--- a/fs/dlm/lock.h
+++ b/fs/dlm/lock.h
@@ -24,13 +24,19 @@ int dlm_put_lkb(struct dlm_lkb *lkb);
void dlm_scan_rsbs(struct dlm_ls *ls);
int dlm_lock_recovery_try(struct dlm_ls *ls);
void dlm_unlock_recovery(struct dlm_ls *ls);
-void dlm_scan_waiters(struct dlm_ls *ls);
+
+#ifdef CONFIG_DLM_DEPRECATED_API
void dlm_scan_timeout(struct dlm_ls *ls);
void dlm_adjust_timeouts(struct dlm_ls *ls);
+#else
+static inline void dlm_scan_timeout(struct dlm_ls *ls) { }
+static inline void dlm_adjust_timeouts(struct dlm_ls *ls) { }
+#endif
+
int dlm_master_lookup(struct dlm_ls *ls, int nodeid, char *name, int len,
unsigned int flags, int *r_nodeid, int *result);
-int dlm_search_rsb_tree(struct rb_root *tree, char *name, int len,
+int dlm_search_rsb_tree(struct rb_root *tree, const void *name, int len,
struct dlm_rsb **r_ret);
void dlm_recover_purge(struct dlm_ls *ls);
@@ -41,15 +47,22 @@ void dlm_recover_waiters_pre(struct dlm_ls *ls);
int dlm_recover_master_copy(struct dlm_ls *ls, struct dlm_rcom *rc);
int dlm_recover_process_copy(struct dlm_ls *ls, struct dlm_rcom *rc);
+#ifdef CONFIG_DLM_DEPRECATED_API
int dlm_user_request(struct dlm_ls *ls, struct dlm_user_args *ua, int mode,
uint32_t flags, void *name, unsigned int namelen,
unsigned long timeout_cs);
int dlm_user_convert(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
int mode, uint32_t flags, uint32_t lkid, char *lvb_in,
unsigned long timeout_cs);
+#else
+int dlm_user_request(struct dlm_ls *ls, struct dlm_user_args *ua, int mode,
+ uint32_t flags, void *name, unsigned int namelen);
+int dlm_user_convert(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
+ int mode, uint32_t flags, uint32_t lkid, char *lvb_in);
+#endif
int dlm_user_adopt_orphan(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
int mode, uint32_t flags, void *name, unsigned int namelen,
- unsigned long timeout_cs, uint32_t *lkid);
+ uint32_t *lkid);
int dlm_user_unlock(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
uint32_t flags, uint32_t lkid, char *lvb_in);
int dlm_user_cancel(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
@@ -58,6 +71,10 @@ int dlm_user_purge(struct dlm_ls *ls, struct dlm_user_proc *proc,
int nodeid, int pid);
int dlm_user_deadlock(struct dlm_ls *ls, uint32_t flags, uint32_t lkid);
void dlm_clear_proc_locks(struct dlm_ls *ls, struct dlm_user_proc *proc);
+int dlm_debug_add_lkb(struct dlm_ls *ls, uint32_t lkb_id, char *name, int len,
+ int lkb_nodeid, unsigned int lkb_flags, int lkb_status);
+int dlm_debug_add_lkb_to_waiters(struct dlm_ls *ls, uint32_t lkb_id,
+ int mstype, int to_nodeid);
static inline int is_master(struct dlm_rsb *r)
{
diff --git a/fs/dlm/lockspace.c b/fs/dlm/lockspace.c
index 10eddfa6c3d7..bae050df7abf 100644
--- a/fs/dlm/lockspace.c
+++ b/fs/dlm/lockspace.c
@@ -216,8 +216,7 @@ static int do_uevent(struct dlm_ls *ls, int in)
return ls->ls_uevent_result;
}
-static int dlm_uevent(struct kset *kset, struct kobject *kobj,
- struct kobj_uevent_env *env)
+static int dlm_uevent(struct kobject *kobj, struct kobj_uevent_env *env)
{
struct dlm_ls *ls = container_of(kobj, struct dlm_ls, ls_kobj);
@@ -276,7 +275,6 @@ static int dlm_scand(void *data)
ls->ls_scan_time = jiffies;
dlm_scan_rsbs(ls);
dlm_scan_timeout(ls);
- dlm_scan_waiters(ls);
dlm_unlock_recovery(ls);
} else {
ls->ls_scan_time += HZ;
@@ -314,7 +312,7 @@ struct dlm_ls *dlm_find_lockspace_global(uint32_t id)
list_for_each_entry(ls, &lslist, ls_list) {
if (ls->ls_global_id == id) {
- ls->ls_count++;
+ atomic_inc(&ls->ls_count);
goto out;
}
}
@@ -331,7 +329,7 @@ struct dlm_ls *dlm_find_lockspace_local(dlm_lockspace_t *lockspace)
spin_lock(&lslist_lock);
list_for_each_entry(ls, &lslist, ls_list) {
if (ls->ls_local_handle == lockspace) {
- ls->ls_count++;
+ atomic_inc(&ls->ls_count);
goto out;
}
}
@@ -348,7 +346,7 @@ struct dlm_ls *dlm_find_lockspace_device(int minor)
spin_lock(&lslist_lock);
list_for_each_entry(ls, &lslist, ls_list) {
if (ls->ls_device.minor == minor) {
- ls->ls_count++;
+ atomic_inc(&ls->ls_count);
goto out;
}
}
@@ -360,24 +358,24 @@ struct dlm_ls *dlm_find_lockspace_device(int minor)
void dlm_put_lockspace(struct dlm_ls *ls)
{
- spin_lock(&lslist_lock);
- ls->ls_count--;
- spin_unlock(&lslist_lock);
+ if (atomic_dec_and_test(&ls->ls_count))
+ wake_up(&ls->ls_count_wait);
}
static void remove_lockspace(struct dlm_ls *ls)
{
- for (;;) {
- spin_lock(&lslist_lock);
- if (ls->ls_count == 0) {
- WARN_ON(ls->ls_create_count != 0);
- list_del(&ls->ls_list);
- spin_unlock(&lslist_lock);
- return;
- }
+retry:
+ wait_event(ls->ls_count_wait, atomic_read(&ls->ls_count) == 0);
+
+ spin_lock(&lslist_lock);
+ if (atomic_read(&ls->ls_count) != 0) {
spin_unlock(&lslist_lock);
- ssleep(1);
+ goto retry;
}
+
+ WARN_ON(ls->ls_create_count != 0);
+ list_del(&ls->ls_list);
+ spin_unlock(&lslist_lock);
}
static int threads_start(void)
@@ -418,7 +416,7 @@ static int new_lockspace(const char *name, const char *cluster,
if (namelen > DLM_LOCKSPACE_LEN || namelen == 0)
return -EINVAL;
- if (!lvblen || (lvblen % 8))
+ if (lvblen % 8)
return -EINVAL;
if (!try_module_get(THIS_MODULE))
@@ -481,7 +479,8 @@ static int new_lockspace(const char *name, const char *cluster,
memcpy(ls->ls_name, name, namelen);
ls->ls_namelen = namelen;
ls->ls_lvblen = lvblen;
- ls->ls_count = 0;
+ atomic_set(&ls->ls_count, 0);
+ init_waitqueue_head(&ls->ls_count_wait);
ls->ls_flags = 0;
ls->ls_scan_time = jiffies;
@@ -490,13 +489,28 @@ static int new_lockspace(const char *name, const char *cluster,
ls->ls_ops_arg = ops_arg;
}
- if (flags & DLM_LSFL_TIMEWARN)
+#ifdef CONFIG_DLM_DEPRECATED_API
+ if (flags & DLM_LSFL_TIMEWARN) {
+ pr_warn_once("===============================================================\n"
+ "WARNING: the dlm DLM_LSFL_TIMEWARN flag is being deprecated and\n"
+ " will be removed in v6.2!\n"
+ " Inclusive DLM_LSFL_TIMEWARN define in UAPI header!\n"
+ "===============================================================\n");
+
set_bit(LSFL_TIMEWARN, &ls->ls_flags);
+ }
/* ls_exflags are forced to match among nodes, and we don't
- need to require all nodes to have some flags set */
+ * need to require all nodes to have some flags set
+ */
ls->ls_exflags = (flags & ~(DLM_LSFL_TIMEWARN | DLM_LSFL_FS |
DLM_LSFL_NEWEXCL));
+#else
+ /* ls_exflags are forced to match among nodes, and we don't
+ * need to require all nodes to have some flags set
+ */
+ ls->ls_exflags = (flags & ~(DLM_LSFL_FS | DLM_LSFL_NEWEXCL));
+#endif
size = READ_ONCE(dlm_config.ci_rsbtbl_size);
ls->ls_rsbtbl_size = size;
@@ -511,6 +525,7 @@ static int new_lockspace(const char *name, const char *cluster,
}
spin_lock_init(&ls->ls_remove_spin);
+ init_waitqueue_head(&ls->ls_remove_wait);
for (i = 0; i < DLM_REMOVE_NAMES_MAX; i++) {
ls->ls_remove_names[i] = kzalloc(DLM_RESNAME_MAXLEN+1,
@@ -526,8 +541,10 @@ static int new_lockspace(const char *name, const char *cluster,
mutex_init(&ls->ls_waiters_mutex);
INIT_LIST_HEAD(&ls->ls_orphans);
mutex_init(&ls->ls_orphans_mutex);
+#ifdef CONFIG_DLM_DEPRECATED_API
INIT_LIST_HEAD(&ls->ls_timeout);
mutex_init(&ls->ls_timeout_mutex);
+#endif
INIT_LIST_HEAD(&ls->ls_new_rsb);
spin_lock_init(&ls->ls_new_rsb_spin);
@@ -547,8 +564,8 @@ static int new_lockspace(const char *name, const char *cluster,
init_waitqueue_head(&ls->ls_uevent_wait);
ls->ls_uevent_result = 0;
- init_completion(&ls->ls_members_done);
- ls->ls_members_result = -1;
+ init_completion(&ls->ls_recovery_done);
+ ls->ls_recovery_result = -1;
mutex_init(&ls->ls_cb_mutex);
INIT_LIST_HEAD(&ls->ls_cb_delay);
@@ -564,8 +581,10 @@ static int new_lockspace(const char *name, const char *cluster,
init_rwsem(&ls->ls_in_recovery);
init_rwsem(&ls->ls_recv_active);
INIT_LIST_HEAD(&ls->ls_requestqueue);
+ atomic_set(&ls->ls_requestqueue_cnt, 0);
+ init_waitqueue_head(&ls->ls_requestqueue_wait);
mutex_init(&ls->ls_requestqueue_mutex);
- mutex_init(&ls->ls_clear_proc_locks);
+ spin_lock_init(&ls->ls_clear_proc_locks);
/* Due backwards compatibility with 3.1 we need to use maximum
* possible dlm message size to be sure the message will fit and
@@ -642,8 +661,9 @@ static int new_lockspace(const char *name, const char *cluster,
if (error)
goto out_recoverd;
- wait_for_completion(&ls->ls_members_done);
- error = ls->ls_members_result;
+ /* wait until recovery is successful or failed */
+ wait_for_completion(&ls->ls_recovery_done);
+ error = ls->ls_recovery_result;
if (error)
goto out_members;
@@ -683,10 +703,11 @@ static int new_lockspace(const char *name, const char *cluster,
return error;
}
-int dlm_new_lockspace(const char *name, const char *cluster,
- uint32_t flags, int lvblen,
- const struct dlm_lockspace_ops *ops, void *ops_arg,
- int *ops_result, dlm_lockspace_t **lockspace)
+static int __dlm_new_lockspace(const char *name, const char *cluster,
+ uint32_t flags, int lvblen,
+ const struct dlm_lockspace_ops *ops,
+ void *ops_arg, int *ops_result,
+ dlm_lockspace_t **lockspace)
{
int error = 0;
@@ -712,6 +733,25 @@ int dlm_new_lockspace(const char *name, const char *cluster,
return error;
}
+int dlm_new_lockspace(const char *name, const char *cluster, uint32_t flags,
+ int lvblen, const struct dlm_lockspace_ops *ops,
+ void *ops_arg, int *ops_result,
+ dlm_lockspace_t **lockspace)
+{
+ return __dlm_new_lockspace(name, cluster, flags | DLM_LSFL_FS, lvblen,
+ ops, ops_arg, ops_result, lockspace);
+}
+
+int dlm_new_user_lockspace(const char *name, const char *cluster,
+ uint32_t flags, int lvblen,
+ const struct dlm_lockspace_ops *ops,
+ void *ops_arg, int *ops_result,
+ dlm_lockspace_t **lockspace)
+{
+ return __dlm_new_lockspace(name, cluster, flags, lvblen, ops,
+ ops_arg, ops_result, lockspace);
+}
+
static int lkb_idr_is_local(int id, void *p, void *data)
{
struct dlm_lkb *lkb = p;
@@ -868,7 +908,7 @@ static int release_lockspace(struct dlm_ls *ls, int force)
* until this returns.
*
* Force has 4 possible values:
- * 0 - don't destroy locksapce if it has any LKBs
+ * 0 - don't destroy lockspace if it has any LKBs
* 1 - destroy lockspace if it has remote LKBs but not if it has local LKBs
* 2 - destroy lockspace regardless of LKBs
* 3 - destroy lockspace as part of a forced shutdown
@@ -919,3 +959,15 @@ void dlm_stop_lockspaces(void)
log_print("dlm user daemon left %d lockspaces", count);
}
+void dlm_stop_lockspaces_check(void)
+{
+ struct dlm_ls *ls;
+
+ spin_lock(&lslist_lock);
+ list_for_each_entry(ls, &lslist, ls_list) {
+ if (WARN_ON(!rwsem_is_locked(&ls->ls_in_recovery) ||
+ !dlm_locking_stopped(ls)))
+ break;
+ }
+ spin_unlock(&lslist_lock);
+}
diff --git a/fs/dlm/lockspace.h b/fs/dlm/lockspace.h
index a78d853b9342..03f4a4a3a871 100644
--- a/fs/dlm/lockspace.h
+++ b/fs/dlm/lockspace.h
@@ -12,6 +12,14 @@
#ifndef __LOCKSPACE_DOT_H__
#define __LOCKSPACE_DOT_H__
+/* DLM_LSFL_FS
+ * The lockspace user is in the kernel (i.e. filesystem). Enables
+ * direct bast/cast callbacks.
+ *
+ * internal lockspace flag - will be removed in future
+ */
+#define DLM_LSFL_FS 0x00000004
+
int dlm_lockspace_init(void);
void dlm_lockspace_exit(void);
struct dlm_ls *dlm_find_lockspace_global(uint32_t id);
@@ -19,6 +27,12 @@ struct dlm_ls *dlm_find_lockspace_local(void *id);
struct dlm_ls *dlm_find_lockspace_device(int minor);
void dlm_put_lockspace(struct dlm_ls *ls);
void dlm_stop_lockspaces(void);
+void dlm_stop_lockspaces_check(void);
+int dlm_new_user_lockspace(const char *name, const char *cluster,
+ uint32_t flags, int lvblen,
+ const struct dlm_lockspace_ops *ops,
+ void *ops_arg, int *ops_result,
+ dlm_lockspace_t **lockspace);
#endif /* __LOCKSPACE_DOT_H__ */
diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
index 8f715c620e1f..59f64c596233 100644
--- a/fs/dlm/lowcomms.c
+++ b/fs/dlm/lowcomms.c
@@ -53,9 +53,12 @@
#include <net/sctp/sctp.h>
#include <net/ipv6.h>
+#include <trace/events/dlm.h>
+
#include "dlm_internal.h"
#include "lowcomms.h"
#include "midcomms.h"
+#include "memory.h"
#include "config.h"
#define NEEDED_RMEM (4*1024*1024)
@@ -84,7 +87,6 @@ struct connection {
struct list_head writequeue; /* List of outgoing writequeue_entries */
spinlock_t writequeue_lock;
atomic_t writequeue_cnt;
- struct mutex wq_alloc;
int retries;
#define MAX_CONNECT_RETRIES 3
struct hlist_node list;
@@ -189,6 +191,24 @@ static const struct dlm_proto_ops *dlm_proto_ops;
static void process_recv_sockets(struct work_struct *work);
static void process_send_sockets(struct work_struct *work);
+static void writequeue_entry_ctor(void *data)
+{
+ struct writequeue_entry *entry = data;
+
+ INIT_LIST_HEAD(&entry->msgs);
+}
+
+struct kmem_cache *dlm_lowcomms_writequeue_cache_create(void)
+{
+ return kmem_cache_create("dlm_writequeue", sizeof(struct writequeue_entry),
+ 0, 0, writequeue_entry_ctor);
+}
+
+struct kmem_cache *dlm_lowcomms_msg_cache_create(void)
+{
+ return kmem_cache_create("dlm_msg", sizeof(struct dlm_msg), 0, 0, NULL);
+}
+
/* need to held writequeue_lock */
static struct writequeue_entry *con_next_wq(struct connection *con)
{
@@ -199,7 +219,10 @@ static struct writequeue_entry *con_next_wq(struct connection *con)
e = list_first_entry(&con->writequeue, struct writequeue_entry,
list);
- if (e->len == 0)
+ /* if len is zero nothing is to send, if there are users filling
+ * buffers we wait until the users are done so we can send more.
+ */
+ if (e->users || e->len == 0)
return NULL;
return e;
@@ -265,8 +288,6 @@ static struct connection *nodeid2con(int nodeid, gfp_t alloc)
return NULL;
}
- mutex_init(&con->wq_alloc);
-
spin_lock(&connections_lock);
/* Because multiple workqueues/threads calls this function it can
* race on multiple cpu's. Instead of locking hot path __find_con()
@@ -486,11 +507,9 @@ static void lowcomms_data_ready(struct sock *sk)
{
struct connection *con;
- read_lock_bh(&sk->sk_callback_lock);
con = sock2con(sk);
if (con && !test_and_set_bit(CF_READ_PENDING, &con->flags))
queue_work(recv_workqueue, &con->rwork);
- read_unlock_bh(&sk->sk_callback_lock);
}
static void lowcomms_listen_data_ready(struct sock *sk)
@@ -505,15 +524,14 @@ static void lowcomms_write_space(struct sock *sk)
{
struct connection *con;
- read_lock_bh(&sk->sk_callback_lock);
con = sock2con(sk);
if (!con)
- goto out;
+ return;
if (!test_and_set_bit(CF_CONNECTED, &con->flags)) {
- log_print("successful connected to node %d", con->nodeid);
+ log_print("connected to node %d", con->nodeid);
queue_work(send_workqueue, &con->swork);
- goto out;
+ return;
}
clear_bit(SOCK_NOSPACE, &con->sock->flags);
@@ -524,8 +542,6 @@ static void lowcomms_write_space(struct sock *sk)
}
queue_work(send_workqueue, &con->swork);
-out:
- read_unlock_bh(&sk->sk_callback_lock);
}
static inline void lowcomms_connect_sock(struct connection *con)
@@ -592,42 +608,41 @@ int dlm_lowcomms_nodes_set_mark(int nodeid, unsigned int mark)
static void lowcomms_error_report(struct sock *sk)
{
struct connection *con;
- struct sockaddr_storage saddr;
void (*orig_report)(struct sock *) = NULL;
+ struct inet_sock *inet;
- read_lock_bh(&sk->sk_callback_lock);
con = sock2con(sk);
if (con == NULL)
goto out;
orig_report = listen_sock.sk_error_report;
- if (kernel_getpeername(sk->sk_socket, (struct sockaddr *)&saddr) < 0) {
- printk_ratelimited(KERN_ERR "dlm: node %d: socket error "
- "sending to node %d, port %d, "
- "sk_err=%d/%d\n", dlm_our_nodeid(),
- con->nodeid, dlm_config.ci_tcp_port,
- sk->sk_err, sk->sk_err_soft);
- } else if (saddr.ss_family == AF_INET) {
- struct sockaddr_in *sin4 = (struct sockaddr_in *)&saddr;
+ inet = inet_sk(sk);
+ switch (sk->sk_family) {
+ case AF_INET:
printk_ratelimited(KERN_ERR "dlm: node %d: socket error "
- "sending to node %d at %pI4, port %d, "
+ "sending to node %d at %pI4, dport %d, "
"sk_err=%d/%d\n", dlm_our_nodeid(),
- con->nodeid, &sin4->sin_addr.s_addr,
- dlm_config.ci_tcp_port, sk->sk_err,
+ con->nodeid, &inet->inet_daddr,
+ ntohs(inet->inet_dport), sk->sk_err,
sk->sk_err_soft);
- } else {
- struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&saddr;
-
+ break;
+#if IS_ENABLED(CONFIG_IPV6)
+ case AF_INET6:
printk_ratelimited(KERN_ERR "dlm: node %d: socket error "
- "sending to node %d at %u.%u.%u.%u, "
- "port %d, sk_err=%d/%d\n", dlm_our_nodeid(),
- con->nodeid, sin6->sin6_addr.s6_addr32[0],
- sin6->sin6_addr.s6_addr32[1],
- sin6->sin6_addr.s6_addr32[2],
- sin6->sin6_addr.s6_addr32[3],
- dlm_config.ci_tcp_port, sk->sk_err,
+ "sending to node %d at %pI6c, "
+ "dport %d, sk_err=%d/%d\n", dlm_our_nodeid(),
+ con->nodeid, &sk->sk_v6_daddr,
+ ntohs(inet->inet_dport), sk->sk_err,
sk->sk_err_soft);
+ break;
+#endif
+ default:
+ printk_ratelimited(KERN_ERR "dlm: node %d: socket error "
+ "invalid socket family %d set, "
+ "sk_err=%d/%d\n", dlm_our_nodeid(),
+ sk->sk_family, sk->sk_err, sk->sk_err_soft);
+ goto out;
}
/* below sendcon only handling */
@@ -646,7 +661,6 @@ static void lowcomms_error_report(struct sock *sk)
queue_work(send_workqueue, &con->swork);
out:
- read_unlock_bh(&sk->sk_callback_lock);
if (orig_report)
orig_report(sk);
}
@@ -666,20 +680,20 @@ static void restore_callbacks(struct socket *sock)
{
struct sock *sk = sock->sk;
- write_lock_bh(&sk->sk_callback_lock);
+ lock_sock(sk);
sk->sk_user_data = NULL;
sk->sk_data_ready = listen_sock.sk_data_ready;
sk->sk_state_change = listen_sock.sk_state_change;
sk->sk_write_space = listen_sock.sk_write_space;
sk->sk_error_report = listen_sock.sk_error_report;
- write_unlock_bh(&sk->sk_callback_lock);
+ release_sock(sk);
}
static void add_listen_sock(struct socket *sock, struct listen_connection *con)
{
struct sock *sk = sock->sk;
- write_lock_bh(&sk->sk_callback_lock);
+ lock_sock(sk);
save_listen_callbacks(sock);
con->sock = sock;
@@ -687,7 +701,7 @@ static void add_listen_sock(struct socket *sock, struct listen_connection *con)
sk->sk_allocation = GFP_NOFS;
/* Install a data_ready callback */
sk->sk_data_ready = lowcomms_listen_data_ready;
- write_unlock_bh(&sk->sk_callback_lock);
+ release_sock(sk);
}
/* Make a socket active */
@@ -695,7 +709,7 @@ static void add_sock(struct socket *sock, struct connection *con)
{
struct sock *sk = sock->sk;
- write_lock_bh(&sk->sk_callback_lock);
+ lock_sock(sk);
con->sock = sock;
sk->sk_user_data = con;
@@ -705,7 +719,7 @@ static void add_sock(struct socket *sock, struct connection *con)
sk->sk_state_change = lowcomms_state_change;
sk->sk_allocation = GFP_NOFS;
sk->sk_error_report = lowcomms_error_report;
- write_unlock_bh(&sk->sk_callback_lock);
+ release_sock(sk);
}
/* Add the port number to an IPv6 or 4 sockaddr and return the address
@@ -733,7 +747,7 @@ static void dlm_page_release(struct kref *kref)
ref);
__free_page(e->page);
- kfree(e);
+ dlm_free_writequeue(e);
}
static void dlm_msg_release(struct kref *kref)
@@ -741,7 +755,7 @@ static void dlm_msg_release(struct kref *kref)
struct dlm_msg *msg = container_of(kref, struct dlm_msg, ref);
kref_put(&msg->entry->ref, dlm_page_release);
- kfree(msg);
+ dlm_free_msg(msg);
}
static void free_entry(struct writequeue_entry *e)
@@ -925,6 +939,7 @@ static int receive_from_sock(struct connection *con)
msg.msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL;
ret = kernel_recvmsg(con->sock, &msg, &iov, 1, iov.iov_len,
msg.msg_flags);
+ trace_dlm_recv(con->nodeid, ret);
if (ret == -EAGAIN)
break;
else if (ret <= 0)
@@ -1013,10 +1028,28 @@ static int accept_from_sock(struct listen_connection *con)
/* Get the new node's NODEID */
make_sockaddr(&peeraddr, 0, &len);
if (addr_to_nodeid(&peeraddr, &nodeid, &mark)) {
- unsigned char *b=(unsigned char *)&peeraddr;
- log_print("connect from non cluster node");
- print_hex_dump_bytes("ss: ", DUMP_PREFIX_NONE,
- b, sizeof(struct sockaddr_storage));
+ switch (peeraddr.ss_family) {
+ case AF_INET: {
+ struct sockaddr_in *sin = (struct sockaddr_in *)&peeraddr;
+
+ log_print("connect from non cluster IPv4 node %pI4",
+ &sin->sin_addr);
+ break;
+ }
+#if IS_ENABLED(CONFIG_IPV6)
+ case AF_INET6: {
+ struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&peeraddr;
+
+ log_print("connect from non cluster IPv6 node %pI6c",
+ &sin6->sin6_addr);
+ break;
+ }
+#endif
+ default:
+ log_print("invalid family from non cluster node");
+ break;
+ }
+
sock_release(newsock);
return -1;
}
@@ -1177,33 +1210,33 @@ static void deinit_local(void)
kfree(dlm_local_addr[i]);
}
-static struct writequeue_entry *new_writequeue_entry(struct connection *con,
- gfp_t allocation)
+static struct writequeue_entry *new_writequeue_entry(struct connection *con)
{
struct writequeue_entry *entry;
- entry = kzalloc(sizeof(*entry), allocation);
+ entry = dlm_allocate_writequeue();
if (!entry)
return NULL;
- entry->page = alloc_page(allocation | __GFP_ZERO);
+ entry->page = alloc_page(GFP_ATOMIC | __GFP_ZERO);
if (!entry->page) {
- kfree(entry);
+ dlm_free_writequeue(entry);
return NULL;
}
+ entry->offset = 0;
+ entry->len = 0;
+ entry->end = 0;
+ entry->dirty = false;
entry->con = con;
entry->users = 1;
kref_init(&entry->ref);
- INIT_LIST_HEAD(&entry->msgs);
-
return entry;
}
static struct writequeue_entry *new_wq_entry(struct connection *con, int len,
- gfp_t allocation, char **ppc,
- void (*cb)(struct dlm_mhandle *mh),
- struct dlm_mhandle *mh)
+ char **ppc, void (*cb)(void *data),
+ void *data)
{
struct writequeue_entry *e;
@@ -1215,74 +1248,54 @@ static struct writequeue_entry *new_wq_entry(struct connection *con, int len,
*ppc = page_address(e->page) + e->end;
if (cb)
- cb(mh);
+ cb(data);
e->end += len;
e->users++;
- spin_unlock(&con->writequeue_lock);
-
- return e;
+ goto out;
}
}
- spin_unlock(&con->writequeue_lock);
- e = new_writequeue_entry(con, allocation);
+ e = new_writequeue_entry(con);
if (!e)
- return NULL;
+ goto out;
kref_get(&e->ref);
*ppc = page_address(e->page);
e->end += len;
atomic_inc(&con->writequeue_cnt);
-
- spin_lock(&con->writequeue_lock);
if (cb)
- cb(mh);
+ cb(data);
list_add_tail(&e->list, &con->writequeue);
- spin_unlock(&con->writequeue_lock);
+out:
+ spin_unlock(&con->writequeue_lock);
return e;
};
static struct dlm_msg *dlm_lowcomms_new_msg_con(struct connection *con, int len,
gfp_t allocation, char **ppc,
- void (*cb)(struct dlm_mhandle *mh),
- struct dlm_mhandle *mh)
+ void (*cb)(void *data),
+ void *data)
{
struct writequeue_entry *e;
struct dlm_msg *msg;
- bool sleepable;
- msg = kzalloc(sizeof(*msg), allocation);
+ msg = dlm_allocate_msg(allocation);
if (!msg)
return NULL;
- /* this mutex is being used as a wait to avoid multiple "fast"
- * new writequeue page list entry allocs in new_wq_entry in
- * normal operation which is sleepable context. Without it
- * we could end in multiple writequeue entries with one
- * dlm message because multiple callers were waiting at
- * the writequeue_lock in new_wq_entry().
- */
- sleepable = gfpflags_normal_context(allocation);
- if (sleepable)
- mutex_lock(&con->wq_alloc);
-
kref_init(&msg->ref);
- e = new_wq_entry(con, len, allocation, ppc, cb, mh);
+ e = new_wq_entry(con, len, ppc, cb, data);
if (!e) {
- if (sleepable)
- mutex_unlock(&con->wq_alloc);
-
- kfree(msg);
+ dlm_free_msg(msg);
return NULL;
}
- if (sleepable)
- mutex_unlock(&con->wq_alloc);
-
+ msg->retransmit = false;
+ msg->orig_msg = NULL;
msg->ppc = *ppc;
msg->len = len;
msg->entry = e;
@@ -1290,9 +1303,13 @@ static struct dlm_msg *dlm_lowcomms_new_msg_con(struct connection *con, int len,
return msg;
}
+/* avoid false positive for nodes_srcu, unlock happens in
+ * dlm_lowcomms_commit_msg which is a must call if success
+ */
+#ifndef __CHECKER__
struct dlm_msg *dlm_lowcomms_new_msg(int nodeid, int len, gfp_t allocation,
- char **ppc, void (*cb)(struct dlm_mhandle *mh),
- struct dlm_mhandle *mh)
+ char **ppc, void (*cb)(void *data),
+ void *data)
{
struct connection *con;
struct dlm_msg *msg;
@@ -1313,16 +1330,19 @@ struct dlm_msg *dlm_lowcomms_new_msg(int nodeid, int len, gfp_t allocation,
return NULL;
}
- msg = dlm_lowcomms_new_msg_con(con, len, allocation, ppc, cb, mh);
+ msg = dlm_lowcomms_new_msg_con(con, len, allocation, ppc, cb, data);
if (!msg) {
srcu_read_unlock(&connections_srcu, idx);
return NULL;
}
+ /* for dlm_lowcomms_commit_msg() */
+ kref_get(&msg->ref);
/* we assume if successful commit must called */
msg->idx = idx;
return msg;
}
+#endif
static void _dlm_lowcomms_commit_msg(struct dlm_msg *msg)
{
@@ -1349,11 +1369,18 @@ out:
return;
}
+/* avoid false positive for nodes_srcu, lock was happen in
+ * dlm_lowcomms_new_msg
+ */
+#ifndef __CHECKER__
void dlm_lowcomms_commit_msg(struct dlm_msg *msg)
{
_dlm_lowcomms_commit_msg(msg);
srcu_read_unlock(&connections_srcu, msg->idx);
+ /* because dlm_lowcomms_new_msg() */
+ kref_put(&msg->ref, dlm_msg_release);
}
+#endif
void dlm_lowcomms_put_msg(struct dlm_msg *msg)
{
@@ -1403,7 +1430,6 @@ static void send_to_sock(struct connection *con)
if (!e)
break;
- e = list_first_entry(&con->writequeue, struct writequeue_entry, list);
len = e->len;
offset = e->offset;
BUG_ON(len == 0 && e->users == 0);
@@ -1411,6 +1437,7 @@ static void send_to_sock(struct connection *con)
ret = kernel_sendpage(con->sock, e->page, offset, len,
msg_flags);
+ trace_dlm_send(con->nodeid, ret);
if (ret == -EAGAIN || ret == 0) {
if (ret == -EAGAIN &&
test_bit(SOCKWQ_ASYNC_NOSPACE, &con->sock->flags) &&
@@ -1680,9 +1707,9 @@ static void _stop_conn(struct connection *con, bool and_other)
set_bit(CF_READ_PENDING, &con->flags);
set_bit(CF_WRITE_PENDING, &con->flags);
if (con->sock && con->sock->sk) {
- write_lock_bh(&con->sock->sk->sk_callback_lock);
+ lock_sock(con->sock->sk);
con->sock->sk->sk_user_data = NULL;
- write_unlock_bh(&con->sock->sk->sk_callback_lock);
+ release_sock(con->sock->sk);
}
if (con->othercon && and_other)
_stop_conn(con->othercon, false);
@@ -1775,8 +1802,8 @@ static int dlm_listen_for_all(void)
result = sock_create_kern(&init_net, dlm_local_addr[0]->ss_family,
SOCK_STREAM, dlm_proto_ops->proto, &sock);
if (result < 0) {
- log_print("Can't create comms socket, check SCTP is loaded");
- goto out;
+ log_print("Can't create comms socket: %d", result);
+ return result;
}
sock_set_mark(sock->sk, dlm_config.ci_mark);
@@ -1908,7 +1935,7 @@ static int dlm_sctp_connect(struct connection *con, struct socket *sock,
return ret;
if (!test_and_set_bit(CF_CONNECTED, &con->flags))
- log_print("successful connected to node %d", con->nodeid);
+ log_print("connected to node %d", con->nodeid);
return 0;
}
diff --git a/fs/dlm/lowcomms.h b/fs/dlm/lowcomms.h
index 4ccae07cf005..29369feea991 100644
--- a/fs/dlm/lowcomms.h
+++ b/fs/dlm/lowcomms.h
@@ -38,8 +38,8 @@ void dlm_lowcomms_stop(void);
void dlm_lowcomms_exit(void);
int dlm_lowcomms_close(int nodeid);
struct dlm_msg *dlm_lowcomms_new_msg(int nodeid, int len, gfp_t allocation,
- char **ppc, void (*cb)(struct dlm_mhandle *mh),
- struct dlm_mhandle *mh);
+ char **ppc, void (*cb)(void *data),
+ void *data);
void dlm_lowcomms_commit_msg(struct dlm_msg *msg);
void dlm_lowcomms_put_msg(struct dlm_msg *msg);
int dlm_lowcomms_resend_msg(struct dlm_msg *msg);
@@ -47,6 +47,8 @@ int dlm_lowcomms_connect_node(int nodeid);
int dlm_lowcomms_nodes_set_mark(int nodeid, unsigned int mark);
int dlm_lowcomms_addr(int nodeid, struct sockaddr_storage *addr, int len);
void dlm_midcomms_receive_done(int nodeid);
+struct kmem_cache *dlm_lowcomms_writequeue_cache_create(void);
+struct kmem_cache *dlm_lowcomms_msg_cache_create(void);
#endif /* __LOWCOMMS_DOT_H__ */
diff --git a/fs/dlm/main.c b/fs/dlm/main.c
index afc66a1346d3..1c5be4b70ac1 100644
--- a/fs/dlm/main.c
+++ b/fs/dlm/main.c
@@ -19,6 +19,9 @@
#include "config.h"
#include "lowcomms.h"
+#define CREATE_TRACE_POINTS
+#include <trace/events/dlm.h>
+
static int __init init_dlm(void)
{
int error;
diff --git a/fs/dlm/member.c b/fs/dlm/member.c
index 731d489aa323..2af2ccfe43a9 100644
--- a/fs/dlm/member.c
+++ b/fs/dlm/member.c
@@ -20,7 +20,7 @@
int dlm_slots_version(struct dlm_header *h)
{
- if ((h->h_version & 0x0000FFFF) < DLM_HEADER_SLOTS)
+ if ((le32_to_cpu(h->h_version) & 0x0000FFFF) < DLM_HEADER_SLOTS)
return 0;
return 1;
}
@@ -120,18 +120,13 @@ int dlm_slots_copy_in(struct dlm_ls *ls)
ro0 = (struct rcom_slot *)(rc->rc_buf + sizeof(struct rcom_config));
- for (i = 0, ro = ro0; i < num_slots; i++, ro++) {
- ro->ro_nodeid = le32_to_cpu(ro->ro_nodeid);
- ro->ro_slot = le16_to_cpu(ro->ro_slot);
- }
-
log_slots(ls, gen, num_slots, ro0, NULL, 0);
list_for_each_entry(memb, &ls->ls_nodes, list) {
for (i = 0, ro = ro0; i < num_slots; i++, ro++) {
- if (ro->ro_nodeid != memb->nodeid)
+ if (le32_to_cpu(ro->ro_nodeid) != memb->nodeid)
continue;
- memb->slot = ro->ro_slot;
+ memb->slot = le16_to_cpu(ro->ro_slot);
memb->slot_prev = memb->slot;
break;
}
@@ -442,8 +437,7 @@ static int ping_members(struct dlm_ls *ls)
int error = 0;
list_for_each_entry(memb, &ls->ls_nodes, list) {
- error = dlm_recovery_stopped(ls);
- if (error) {
+ if (dlm_recovery_stopped(ls)) {
error = -EINTR;
break;
}
@@ -540,7 +534,11 @@ int dlm_recover_members(struct dlm_ls *ls, struct dlm_recover *rv, int *neg_out)
int i, error, neg = 0, low = -1;
/* previously removed members that we've not finished removing need to
- count as a negative change so the "neg" recovery steps will happen */
+ * count as a negative change so the "neg" recovery steps will happen
+ *
+ * This functionality must report all member changes to lsops or
+ * midcomms layer and must never return before.
+ */
list_for_each_entry(memb, &ls->ls_nodes_gone, list) {
log_rinfo(ls, "prev removed member %d", memb->nodeid);
@@ -589,19 +587,6 @@ int dlm_recover_members(struct dlm_ls *ls, struct dlm_recover *rv, int *neg_out)
*neg_out = neg;
error = ping_members(ls);
- /* error -EINTR means that a new recovery action is triggered.
- * We ignore this recovery action and let run the new one which might
- * have new member configuration.
- */
- if (error == -EINTR)
- error = 0;
-
- /* new_lockspace() may be waiting to know if the config
- * is good or bad
- */
- ls->ls_members_result = error;
- complete(&ls->ls_members_done);
-
log_rinfo(ls, "dlm_recover_members %d nodes", ls->ls_num_nodes);
return error;
}
@@ -681,7 +666,16 @@ int dlm_ls_stop(struct dlm_ls *ls)
if (!ls->ls_recover_begin)
ls->ls_recover_begin = jiffies;
- dlm_lsop_recover_prep(ls);
+ /* call recover_prep ops only once and not multiple times
+ * for each possible dlm_ls_stop() when recovery is already
+ * stopped.
+ *
+ * If we successful was able to clear LSFL_RUNNING bit and
+ * it was set we know it is the first dlm_ls_stop() call.
+ */
+ if (new)
+ dlm_lsop_recover_prep(ls);
+
return 0;
}
diff --git a/fs/dlm/memory.c b/fs/dlm/memory.c
index 5918f4d39586..ce35c3c19aeb 100644
--- a/fs/dlm/memory.c
+++ b/fs/dlm/memory.c
@@ -10,32 +10,61 @@
******************************************************************************/
#include "dlm_internal.h"
+#include "midcomms.h"
+#include "lowcomms.h"
#include "config.h"
#include "memory.h"
+static struct kmem_cache *writequeue_cache;
+static struct kmem_cache *mhandle_cache;
+static struct kmem_cache *msg_cache;
static struct kmem_cache *lkb_cache;
static struct kmem_cache *rsb_cache;
int __init dlm_memory_init(void)
{
+ writequeue_cache = dlm_lowcomms_writequeue_cache_create();
+ if (!writequeue_cache)
+ goto out;
+
+ mhandle_cache = dlm_midcomms_cache_create();
+ if (!mhandle_cache)
+ goto mhandle;
+
lkb_cache = kmem_cache_create("dlm_lkb", sizeof(struct dlm_lkb),
__alignof__(struct dlm_lkb), 0, NULL);
if (!lkb_cache)
- return -ENOMEM;
+ goto lkb;
+
+ msg_cache = dlm_lowcomms_msg_cache_create();
+ if (!msg_cache)
+ goto msg;
rsb_cache = kmem_cache_create("dlm_rsb", sizeof(struct dlm_rsb),
__alignof__(struct dlm_rsb), 0, NULL);
- if (!rsb_cache) {
- kmem_cache_destroy(lkb_cache);
- return -ENOMEM;
- }
+ if (!rsb_cache)
+ goto rsb;
return 0;
+
+rsb:
+ kmem_cache_destroy(msg_cache);
+msg:
+ kmem_cache_destroy(lkb_cache);
+lkb:
+ kmem_cache_destroy(mhandle_cache);
+mhandle:
+ kmem_cache_destroy(writequeue_cache);
+out:
+ return -ENOMEM;
}
void dlm_memory_exit(void)
{
+ kmem_cache_destroy(writequeue_cache);
+ kmem_cache_destroy(mhandle_cache);
+ kmem_cache_destroy(msg_cache);
kmem_cache_destroy(lkb_cache);
kmem_cache_destroy(rsb_cache);
}
@@ -89,3 +118,32 @@ void dlm_free_lkb(struct dlm_lkb *lkb)
kmem_cache_free(lkb_cache, lkb);
}
+struct dlm_mhandle *dlm_allocate_mhandle(void)
+{
+ return kmem_cache_alloc(mhandle_cache, GFP_NOFS);
+}
+
+void dlm_free_mhandle(struct dlm_mhandle *mhandle)
+{
+ kmem_cache_free(mhandle_cache, mhandle);
+}
+
+struct writequeue_entry *dlm_allocate_writequeue(void)
+{
+ return kmem_cache_alloc(writequeue_cache, GFP_ATOMIC);
+}
+
+void dlm_free_writequeue(struct writequeue_entry *writequeue)
+{
+ kmem_cache_free(writequeue_cache, writequeue);
+}
+
+struct dlm_msg *dlm_allocate_msg(gfp_t allocation)
+{
+ return kmem_cache_alloc(msg_cache, allocation);
+}
+
+void dlm_free_msg(struct dlm_msg *msg)
+{
+ kmem_cache_free(msg_cache, msg);
+}
diff --git a/fs/dlm/memory.h b/fs/dlm/memory.h
index 4f218ea4b187..7bd3f1a391ca 100644
--- a/fs/dlm/memory.h
+++ b/fs/dlm/memory.h
@@ -20,6 +20,12 @@ struct dlm_lkb *dlm_allocate_lkb(struct dlm_ls *ls);
void dlm_free_lkb(struct dlm_lkb *l);
char *dlm_allocate_lvb(struct dlm_ls *ls);
void dlm_free_lvb(char *l);
+struct dlm_mhandle *dlm_allocate_mhandle(void);
+void dlm_free_mhandle(struct dlm_mhandle *mhandle);
+struct writequeue_entry *dlm_allocate_writequeue(void);
+void dlm_free_writequeue(struct writequeue_entry *writequeue);
+struct dlm_msg *dlm_allocate_msg(gfp_t allocation);
+void dlm_free_msg(struct dlm_msg *msg);
#endif /* __MEMORY_DOT_H__ */
diff --git a/fs/dlm/midcomms.c b/fs/dlm/midcomms.c
index 7ae39ec8d9b0..6489bc22ad61 100644
--- a/fs/dlm/midcomms.c
+++ b/fs/dlm/midcomms.c
@@ -135,8 +135,10 @@
#include <net/tcp.h>
#include "dlm_internal.h"
+#include "lockspace.h"
#include "lowcomms.h"
#include "config.h"
+#include "memory.h"
#include "lock.h"
#include "util.h"
#include "midcomms.h"
@@ -220,6 +222,12 @@ DEFINE_STATIC_SRCU(nodes_srcu);
*/
static DEFINE_MUTEX(close_lock);
+struct kmem_cache *dlm_midcomms_cache_create(void)
+{
+ return kmem_cache_create("dlm_mhandle", sizeof(struct dlm_mhandle),
+ 0, 0, NULL);
+}
+
static inline const char *dlm_state_str(int state)
{
switch (state) {
@@ -279,7 +287,7 @@ static void dlm_mhandle_release(struct rcu_head *rcu)
struct dlm_mhandle *mh = container_of(rcu, struct dlm_mhandle, rcu);
dlm_lowcomms_put_msg(mh->msg);
- kfree(mh);
+ dlm_free_mhandle(mh);
}
static void dlm_mhandle_delete(struct midcomms_node *node,
@@ -373,13 +381,12 @@ static int dlm_send_ack(int nodeid, uint32_t seq)
m_header = (struct dlm_header *)ppc;
- m_header->h_version = (DLM_HEADER_MAJOR | DLM_HEADER_MINOR);
- m_header->h_nodeid = dlm_our_nodeid();
- m_header->h_length = mb_len;
+ m_header->h_version = cpu_to_le32(DLM_HEADER_MAJOR | DLM_HEADER_MINOR);
+ m_header->h_nodeid = cpu_to_le32(dlm_our_nodeid());
+ m_header->h_length = cpu_to_le16(mb_len);
m_header->h_cmd = DLM_ACK;
- m_header->u.h_seq = seq;
+ m_header->u.h_seq = cpu_to_le32(seq);
- header_out(m_header);
dlm_lowcomms_commit_msg(msg);
dlm_lowcomms_put_msg(msg);
@@ -402,13 +409,11 @@ static int dlm_send_fin(struct midcomms_node *node,
m_header = (struct dlm_header *)ppc;
- m_header->h_version = (DLM_HEADER_MAJOR | DLM_HEADER_MINOR);
- m_header->h_nodeid = dlm_our_nodeid();
- m_header->h_length = mb_len;
+ m_header->h_version = cpu_to_le32(DLM_HEADER_MAJOR | DLM_HEADER_MINOR);
+ m_header->h_nodeid = cpu_to_le32(dlm_our_nodeid());
+ m_header->h_length = cpu_to_le16(mb_len);
m_header->h_cmd = DLM_FIN;
- header_out(m_header);
-
pr_debug("sending fin msg to node %d\n", node->nodeid);
dlm_midcomms_commit_mhandle(mh);
set_bit(DLM_NODE_FLAG_STOP_TX, &node->flags);
@@ -567,14 +572,14 @@ dlm_midcomms_recv_node_lookup(int nodeid, const union dlm_packet *p,
return NULL;
}
- switch (le32_to_cpu(p->rcom.rc_type)) {
- case DLM_RCOM_NAMES:
+ switch (p->rcom.rc_type) {
+ case cpu_to_le32(DLM_RCOM_NAMES):
fallthrough;
- case DLM_RCOM_NAMES_REPLY:
+ case cpu_to_le32(DLM_RCOM_NAMES_REPLY):
fallthrough;
- case DLM_RCOM_STATUS:
+ case cpu_to_le32(DLM_RCOM_STATUS):
fallthrough;
- case DLM_RCOM_STATUS_REPLY:
+ case cpu_to_le32(DLM_RCOM_STATUS_REPLY):
node = nodeid2node(nodeid, 0);
if (node) {
spin_lock(&node->state_lock);
@@ -734,14 +739,14 @@ static void dlm_midcomms_receive_buffer_3_2(union dlm_packet *p, int nodeid)
*
* length already checked.
*/
- switch (le32_to_cpu(p->rcom.rc_type)) {
- case DLM_RCOM_NAMES:
+ switch (p->rcom.rc_type) {
+ case cpu_to_le32(DLM_RCOM_NAMES):
fallthrough;
- case DLM_RCOM_NAMES_REPLY:
+ case cpu_to_le32(DLM_RCOM_NAMES_REPLY):
fallthrough;
- case DLM_RCOM_STATUS:
+ case cpu_to_le32(DLM_RCOM_STATUS):
fallthrough;
- case DLM_RCOM_STATUS_REPLY:
+ case cpu_to_le32(DLM_RCOM_STATUS_REPLY):
break;
default:
log_print("unsupported rcom type received: %u, will skip this message from node %d",
@@ -909,11 +914,11 @@ int dlm_process_incoming_buffer(int nodeid, unsigned char *buf, int len)
if (msglen > len)
break;
- switch (le32_to_cpu(hd->h_version)) {
- case DLM_VERSION_3_1:
+ switch (hd->h_version) {
+ case cpu_to_le32(DLM_VERSION_3_1):
dlm_midcomms_receive_buffer_3_1((union dlm_packet *)ptr, nodeid);
break;
- case DLM_VERSION_3_2:
+ case cpu_to_le32(DLM_VERSION_3_2):
dlm_midcomms_receive_buffer_3_2((union dlm_packet *)ptr, nodeid);
break;
default:
@@ -969,7 +974,7 @@ void dlm_midcomms_receive_done(int nodeid)
spin_unlock(&node->state_lock);
/* do nothing FIN has it's own ack send */
break;
- };
+ }
srcu_read_unlock(&nodes_srcu, idx);
}
@@ -1013,15 +1018,16 @@ static void dlm_fill_opts_header(struct dlm_opts *opts, uint16_t inner_len,
uint32_t seq)
{
opts->o_header.h_cmd = DLM_OPTS;
- opts->o_header.h_version = (DLM_HEADER_MAJOR | DLM_HEADER_MINOR);
- opts->o_header.h_nodeid = dlm_our_nodeid();
- opts->o_header.h_length = DLM_MIDCOMMS_OPT_LEN + inner_len;
- opts->o_header.u.h_seq = seq;
- header_out(&opts->o_header);
+ opts->o_header.h_version = cpu_to_le32(DLM_HEADER_MAJOR | DLM_HEADER_MINOR);
+ opts->o_header.h_nodeid = cpu_to_le32(dlm_our_nodeid());
+ opts->o_header.h_length = cpu_to_le16(DLM_MIDCOMMS_OPT_LEN + inner_len);
+ opts->o_header.u.h_seq = cpu_to_le32(seq);
}
-static void midcomms_new_msg_cb(struct dlm_mhandle *mh)
+static void midcomms_new_msg_cb(void *data)
{
+ struct dlm_mhandle *mh = data;
+
atomic_inc(&mh->node->send_queue_cnt);
spin_lock(&mh->node->send_queue_lock);
@@ -1053,6 +1059,10 @@ static struct dlm_msg *dlm_midcomms_get_msg_3_2(struct dlm_mhandle *mh, int node
return msg;
}
+/* avoid false positive for nodes_srcu, unlock happens in
+ * dlm_midcomms_commit_mhandle which is a must call if success
+ */
+#ifndef __CHECKER__
struct dlm_mhandle *dlm_midcomms_get_mhandle(int nodeid, int len,
gfp_t allocation, char **ppc)
{
@@ -1071,10 +1081,12 @@ struct dlm_mhandle *dlm_midcomms_get_mhandle(int nodeid, int len,
/* this is a bug, however we going on and hope it will be resolved */
WARN_ON(test_bit(DLM_NODE_FLAG_STOP_TX, &node->flags));
- mh = kzalloc(sizeof(*mh), GFP_NOFS);
+ mh = dlm_allocate_mhandle();
if (!mh)
goto err;
+ mh->committed = false;
+ mh->ack_rcv = NULL;
mh->idx = idx;
mh->node = node;
@@ -1083,7 +1095,7 @@ struct dlm_mhandle *dlm_midcomms_get_mhandle(int nodeid, int len,
msg = dlm_lowcomms_new_msg(nodeid, len, allocation, ppc,
NULL, NULL);
if (!msg) {
- kfree(mh);
+ dlm_free_mhandle(mh);
goto err;
}
@@ -1092,13 +1104,13 @@ struct dlm_mhandle *dlm_midcomms_get_mhandle(int nodeid, int len,
msg = dlm_midcomms_get_msg_3_2(mh, nodeid, len, allocation,
ppc);
if (!msg) {
- kfree(mh);
+ dlm_free_mhandle(mh);
goto err;
}
break;
default:
- kfree(mh);
+ dlm_free_mhandle(mh);
WARN_ON(1);
goto err;
}
@@ -1116,6 +1128,7 @@ err:
srcu_read_unlock(&nodes_srcu, idx);
return NULL;
}
+#endif
static void dlm_midcomms_commit_msg_3_2(struct dlm_mhandle *mh)
{
@@ -1125,6 +1138,10 @@ static void dlm_midcomms_commit_msg_3_2(struct dlm_mhandle *mh)
dlm_lowcomms_commit_msg(mh->msg);
}
+/* avoid false positive for nodes_srcu, lock was happen in
+ * dlm_midcomms_get_mhandle
+ */
+#ifndef __CHECKER__
void dlm_midcomms_commit_mhandle(struct dlm_mhandle *mh)
{
switch (mh->node->version) {
@@ -1134,7 +1151,7 @@ void dlm_midcomms_commit_mhandle(struct dlm_mhandle *mh)
dlm_lowcomms_commit_msg(mh->msg);
dlm_lowcomms_put_msg(mh->msg);
/* mh is not part of rcu list in this case */
- kfree(mh);
+ dlm_free_mhandle(mh);
break;
case DLM_VERSION_3_2:
dlm_midcomms_commit_msg_3_2(mh);
@@ -1146,6 +1163,7 @@ void dlm_midcomms_commit_mhandle(struct dlm_mhandle *mh)
break;
}
}
+#endif
int dlm_midcomms_start(void)
{
@@ -1231,7 +1249,7 @@ void dlm_midcomms_add_member(int nodeid)
}
node->users++;
- pr_debug("users inc count %d\n", node->users);
+ pr_debug("node %d users inc count %d\n", nodeid, node->users);
spin_unlock(&node->state_lock);
srcu_read_unlock(&nodes_srcu, idx);
@@ -1254,7 +1272,7 @@ void dlm_midcomms_remove_member(int nodeid)
spin_lock(&node->state_lock);
node->users--;
- pr_debug("users dec count %d\n", node->users);
+ pr_debug("node %d users dec count %d\n", nodeid, node->users);
/* hitting users count to zero means the
* other side is running dlm_midcomms_stop()
@@ -1395,6 +1413,8 @@ int dlm_midcomms_close(int nodeid)
if (nodeid == dlm_our_nodeid())
return 0;
+ dlm_stop_lockspaces_check();
+
idx = srcu_read_lock(&nodes_srcu);
/* Abort pending close/remove operation */
node = nodeid2node(nodeid, 0);
@@ -1425,3 +1445,51 @@ int dlm_midcomms_close(int nodeid)
return ret;
}
+
+/* debug functionality to send raw dlm msg from user space */
+struct dlm_rawmsg_data {
+ struct midcomms_node *node;
+ void *buf;
+};
+
+static void midcomms_new_rawmsg_cb(void *data)
+{
+ struct dlm_rawmsg_data *rd = data;
+ struct dlm_header *h = rd->buf;
+
+ switch (h->h_version) {
+ case cpu_to_le32(DLM_VERSION_3_1):
+ break;
+ default:
+ switch (h->h_cmd) {
+ case DLM_OPTS:
+ if (!h->u.h_seq)
+ h->u.h_seq = cpu_to_le32(rd->node->seq_send++);
+ break;
+ default:
+ break;
+ }
+ break;
+ }
+}
+
+int dlm_midcomms_rawmsg_send(struct midcomms_node *node, void *buf,
+ int buflen)
+{
+ struct dlm_rawmsg_data rd;
+ struct dlm_msg *msg;
+ char *msgbuf;
+
+ rd.node = node;
+ rd.buf = buf;
+
+ msg = dlm_lowcomms_new_msg(node->nodeid, buflen, GFP_NOFS,
+ &msgbuf, midcomms_new_rawmsg_cb, &rd);
+ if (!msg)
+ return -ENOMEM;
+
+ memcpy(msgbuf, buf, buflen);
+ dlm_lowcomms_commit_msg(msg);
+ return 0;
+}
+
diff --git a/fs/dlm/midcomms.h b/fs/dlm/midcomms.h
index 579abc6929be..82bcd9661922 100644
--- a/fs/dlm/midcomms.h
+++ b/fs/dlm/midcomms.h
@@ -28,6 +28,9 @@ const char *dlm_midcomms_state(struct midcomms_node *node);
unsigned long dlm_midcomms_flags(struct midcomms_node *node);
int dlm_midcomms_send_queue_cnt(struct midcomms_node *node);
uint32_t dlm_midcomms_version(struct midcomms_node *node);
+int dlm_midcomms_rawmsg_send(struct midcomms_node *node, void *buf,
+ int buflen);
+struct kmem_cache *dlm_midcomms_cache_create(void);
#endif /* __MIDCOMMS_DOT_H__ */
diff --git a/fs/dlm/netlink.c b/fs/dlm/netlink.c
index 67f68d48d60c..4de4b8651c6c 100644
--- a/fs/dlm/netlink.c
+++ b/fs/dlm/netlink.c
@@ -75,6 +75,7 @@ static struct genl_family family __ro_after_init = {
.version = DLM_GENL_VERSION,
.small_ops = dlm_nl_ops,
.n_small_ops = ARRAY_SIZE(dlm_nl_ops),
+ .resv_start_op = DLM_CMD_HELLO + 1,
.module = THIS_MODULE,
};
diff --git a/fs/dlm/plock.c b/fs/dlm/plock.c
index c38b2b8ffd1d..737f185aad8d 100644
--- a/fs/dlm/plock.c
+++ b/fs/dlm/plock.c
@@ -13,26 +13,28 @@
#include "dlm_internal.h"
#include "lockspace.h"
-static spinlock_t ops_lock;
-static struct list_head send_list;
-static struct list_head recv_list;
-static wait_queue_head_t send_wq;
-static wait_queue_head_t recv_wq;
+static DEFINE_SPINLOCK(ops_lock);
+static LIST_HEAD(send_list);
+static LIST_HEAD(recv_list);
+static DECLARE_WAIT_QUEUE_HEAD(send_wq);
+static DECLARE_WAIT_QUEUE_HEAD(recv_wq);
-struct plock_op {
- struct list_head list;
- int done;
- struct dlm_plock_info info;
-};
-
-struct plock_xop {
- struct plock_op xop;
- int (*callback)(struct file_lock *fl, int result);
+struct plock_async_data {
void *fl;
void *file;
struct file_lock flc;
+ int (*callback)(struct file_lock *fl, int result);
};
+struct plock_op {
+ struct list_head list;
+ int done;
+ /* if lock op got interrupted while waiting dlm_controld reply */
+ bool sigint;
+ struct dlm_plock_info info;
+ /* if set indicates async handling */
+ struct plock_async_data *data;
+};
static inline void set_version(struct dlm_plock_info *info)
{
@@ -58,10 +60,15 @@ static int check_version(struct dlm_plock_info *info)
return 0;
}
+static void dlm_release_plock_op(struct plock_op *op)
+{
+ kfree(op->data);
+ kfree(op);
+}
+
static void send_op(struct plock_op *op)
{
set_version(&op->info);
- INIT_LIST_HEAD(&op->list);
spin_lock(&ops_lock);
list_add_tail(&op->list, &send_list);
spin_unlock(&ops_lock);
@@ -74,8 +81,7 @@ static void send_op(struct plock_op *op)
abandoned waiter. So, we have to insert the unlock-close when the
lock call is interrupted. */
-static void do_unlock_close(struct dlm_ls *ls, u64 number,
- struct file *file, struct file_lock *fl)
+static void do_unlock_close(const struct dlm_plock_info *info)
{
struct plock_op *op;
@@ -84,15 +90,12 @@ static void do_unlock_close(struct dlm_ls *ls, u64 number,
return;
op->info.optype = DLM_PLOCK_OP_UNLOCK;
- op->info.pid = fl->fl_pid;
- op->info.fsid = ls->ls_global_id;
- op->info.number = number;
+ op->info.pid = info->pid;
+ op->info.fsid = info->fsid;
+ op->info.number = info->number;
op->info.start = 0;
op->info.end = OFFSET_MAX;
- if (fl->fl_lmops && fl->fl_lmops->lm_grant)
- op->info.owner = (__u64) fl->fl_pid;
- else
- op->info.owner = (__u64)(long) fl->fl_owner;
+ op->info.owner = info->owner;
op->info.flags |= DLM_PLOCK_FL_CLOSE;
send_op(op);
@@ -101,22 +104,21 @@ static void do_unlock_close(struct dlm_ls *ls, u64 number,
int dlm_posix_lock(dlm_lockspace_t *lockspace, u64 number, struct file *file,
int cmd, struct file_lock *fl)
{
+ struct plock_async_data *op_data;
struct dlm_ls *ls;
struct plock_op *op;
- struct plock_xop *xop;
int rv;
ls = dlm_find_lockspace_local(lockspace);
if (!ls)
return -EINVAL;
- xop = kzalloc(sizeof(*xop), GFP_NOFS);
- if (!xop) {
+ op = kzalloc(sizeof(*op), GFP_NOFS);
+ if (!op) {
rv = -ENOMEM;
goto out;
}
- op = &xop->xop;
op->info.optype = DLM_PLOCK_OP_LOCK;
op->info.pid = fl->fl_pid;
op->info.ex = (fl->fl_type == F_WRLCK);
@@ -125,46 +127,57 @@ int dlm_posix_lock(dlm_lockspace_t *lockspace, u64 number, struct file *file,
op->info.number = number;
op->info.start = fl->fl_start;
op->info.end = fl->fl_end;
+ /* async handling */
if (fl->fl_lmops && fl->fl_lmops->lm_grant) {
+ op_data = kzalloc(sizeof(*op_data), GFP_NOFS);
+ if (!op_data) {
+ dlm_release_plock_op(op);
+ rv = -ENOMEM;
+ goto out;
+ }
+
/* fl_owner is lockd which doesn't distinguish
processes on the nfs client */
op->info.owner = (__u64) fl->fl_pid;
- xop->callback = fl->fl_lmops->lm_grant;
- locks_init_lock(&xop->flc);
- locks_copy_lock(&xop->flc, fl);
- xop->fl = fl;
- xop->file = file;
+ op_data->callback = fl->fl_lmops->lm_grant;
+ locks_init_lock(&op_data->flc);
+ locks_copy_lock(&op_data->flc, fl);
+ op_data->fl = fl;
+ op_data->file = file;
+
+ op->data = op_data;
+
+ send_op(op);
+ rv = FILE_LOCK_DEFERRED;
+ goto out;
} else {
op->info.owner = (__u64)(long) fl->fl_owner;
- xop->callback = NULL;
}
send_op(op);
- if (xop->callback == NULL) {
- rv = wait_event_interruptible(recv_wq, (op->done != 0));
- if (rv == -ERESTARTSYS) {
- log_debug(ls, "dlm_posix_lock: wait killed %llx",
- (unsigned long long)number);
- spin_lock(&ops_lock);
- list_del(&op->list);
+ rv = wait_event_interruptible(recv_wq, (op->done != 0));
+ if (rv == -ERESTARTSYS) {
+ spin_lock(&ops_lock);
+ /* recheck under ops_lock if we got a done != 0,
+ * if so this interrupt case should be ignored
+ */
+ if (op->done != 0) {
spin_unlock(&ops_lock);
- kfree(xop);
- do_unlock_close(ls, number, file, fl);
- goto out;
+ goto do_lock_wait;
}
- } else {
- rv = FILE_LOCK_DEFERRED;
+
+ op->sigint = true;
+ spin_unlock(&ops_lock);
+ log_debug(ls, "%s: wait interrupted %x %llx pid %d",
+ __func__, ls->ls_global_id,
+ (unsigned long long)number, op->info.pid);
goto out;
}
- spin_lock(&ops_lock);
- if (!list_empty(&op->list)) {
- log_error(ls, "dlm_posix_lock: op on list %llx",
- (unsigned long long)number);
- list_del(&op->list);
- }
- spin_unlock(&ops_lock);
+do_lock_wait:
+
+ WARN_ON(!list_empty(&op->list));
rv = op->info.rv;
@@ -174,7 +187,7 @@ int dlm_posix_lock(dlm_lockspace_t *lockspace, u64 number, struct file *file,
(unsigned long long)number);
}
- kfree(xop);
+ dlm_release_plock_op(op);
out:
dlm_put_lockspace(ls);
return rv;
@@ -184,26 +197,20 @@ EXPORT_SYMBOL_GPL(dlm_posix_lock);
/* Returns failure iff a successful lock operation should be canceled */
static int dlm_plock_callback(struct plock_op *op)
{
+ struct plock_async_data *op_data = op->data;
struct file *file;
struct file_lock *fl;
struct file_lock *flc;
int (*notify)(struct file_lock *fl, int result) = NULL;
- struct plock_xop *xop = (struct plock_xop *)op;
int rv = 0;
- spin_lock(&ops_lock);
- if (!list_empty(&op->list)) {
- log_print("dlm_plock_callback: op on list %llx",
- (unsigned long long)op->info.number);
- list_del(&op->list);
- }
- spin_unlock(&ops_lock);
+ WARN_ON(!list_empty(&op->list));
/* check if the following 2 are still valid or make a copy */
- file = xop->file;
- flc = &xop->flc;
- fl = xop->fl;
- notify = xop->callback;
+ file = op_data->file;
+ flc = &op_data->flc;
+ fl = op_data->fl;
+ notify = op_data->callback;
if (op->info.rv) {
notify(fl, op->info.rv);
@@ -234,7 +241,7 @@ static int dlm_plock_callback(struct plock_op *op)
}
out:
- kfree(xop);
+ dlm_release_plock_op(op);
return rv;
}
@@ -290,13 +297,7 @@ int dlm_posix_unlock(dlm_lockspace_t *lockspace, u64 number, struct file *file,
send_op(op);
wait_event(recv_wq, (op->done != 0));
- spin_lock(&ops_lock);
- if (!list_empty(&op->list)) {
- log_error(ls, "dlm_posix_unlock: op on list %llx",
- (unsigned long long)number);
- list_del(&op->list);
- }
- spin_unlock(&ops_lock);
+ WARN_ON(!list_empty(&op->list));
rv = op->info.rv;
@@ -304,7 +305,7 @@ int dlm_posix_unlock(dlm_lockspace_t *lockspace, u64 number, struct file *file,
rv = 0;
out_free:
- kfree(op);
+ dlm_release_plock_op(op);
out:
dlm_put_lockspace(ls);
fl->fl_flags = fl_flags;
@@ -344,13 +345,7 @@ int dlm_posix_get(dlm_lockspace_t *lockspace, u64 number, struct file *file,
send_op(op);
wait_event(recv_wq, (op->done != 0));
- spin_lock(&ops_lock);
- if (!list_empty(&op->list)) {
- log_error(ls, "dlm_posix_get: op on list %llx",
- (unsigned long long)number);
- list_del(&op->list);
- }
- spin_unlock(&ops_lock);
+ WARN_ON(!list_empty(&op->list));
/* info.rv from userspace is 1 for conflict, 0 for no-conflict,
-ENOENT if there are no locks on the file */
@@ -370,7 +365,7 @@ int dlm_posix_get(dlm_lockspace_t *lockspace, u64 number, struct file *file,
rv = 0;
}
- kfree(op);
+ dlm_release_plock_op(op);
out:
dlm_put_lockspace(ls);
return rv;
@@ -389,7 +384,7 @@ static ssize_t dev_read(struct file *file, char __user *u, size_t count,
spin_lock(&ops_lock);
if (!list_empty(&send_list)) {
- op = list_entry(send_list.next, struct plock_op, list);
+ op = list_first_entry(&send_list, struct plock_op, list);
if (op->info.flags & DLM_PLOCK_FL_CLOSE)
list_del(&op->list);
else
@@ -406,7 +401,7 @@ static ssize_t dev_read(struct file *file, char __user *u, size_t count,
(the process did not make an unlock call). */
if (op->info.flags & DLM_PLOCK_FL_CLOSE)
- kfree(op);
+ dlm_release_plock_op(op);
if (copy_to_user(u, &info, sizeof(info)))
return -EFAULT;
@@ -418,9 +413,9 @@ static ssize_t dev_read(struct file *file, char __user *u, size_t count,
static ssize_t dev_write(struct file *file, const char __user *u, size_t count,
loff_t *ppos)
{
+ struct plock_op *op = NULL, *iter;
struct dlm_plock_info info;
- struct plock_op *op;
- int found = 0, do_callback = 0;
+ int do_callback = 0;
if (count != sizeof(info))
return -EINVAL;
@@ -432,31 +427,43 @@ static ssize_t dev_write(struct file *file, const char __user *u, size_t count,
return -EINVAL;
spin_lock(&ops_lock);
- list_for_each_entry(op, &recv_list, list) {
- if (op->info.fsid == info.fsid &&
- op->info.number == info.number &&
- op->info.owner == info.owner) {
- struct plock_xop *xop = (struct plock_xop *)op;
- list_del_init(&op->list);
- memcpy(&op->info, &info, sizeof(info));
- if (xop->callback)
+ list_for_each_entry(iter, &recv_list, list) {
+ if (iter->info.fsid == info.fsid &&
+ iter->info.number == info.number &&
+ iter->info.owner == info.owner) {
+ if (iter->sigint) {
+ list_del(&iter->list);
+ spin_unlock(&ops_lock);
+
+ pr_debug("%s: sigint cleanup %x %llx pid %d",
+ __func__, iter->info.fsid,
+ (unsigned long long)iter->info.number,
+ iter->info.pid);
+ do_unlock_close(&iter->info);
+ memcpy(&iter->info, &info, sizeof(info));
+ dlm_release_plock_op(iter);
+ return count;
+ }
+ list_del_init(&iter->list);
+ memcpy(&iter->info, &info, sizeof(info));
+ if (iter->data)
do_callback = 1;
else
- op->done = 1;
- found = 1;
+ iter->done = 1;
+ op = iter;
break;
}
}
spin_unlock(&ops_lock);
- if (found) {
+ if (op) {
if (do_callback)
dlm_plock_callback(op);
else
wake_up(&recv_wq);
} else
- log_print("dev_write no op %x %llx", info.fsid,
- (unsigned long long)info.number);
+ log_print("%s: no op %x %llx", __func__,
+ info.fsid, (unsigned long long)info.number);
return count;
}
@@ -492,12 +499,6 @@ int dlm_plock_init(void)
{
int rv;
- spin_lock_init(&ops_lock);
- INIT_LIST_HEAD(&send_list);
- INIT_LIST_HEAD(&recv_list);
- init_waitqueue_head(&send_wq);
- init_waitqueue_head(&recv_wq);
-
rv = misc_register(&plock_dev_misc);
if (rv)
log_print("dlm_plock_init: misc_register failed %d", rv);
diff --git a/fs/dlm/rcom.c b/fs/dlm/rcom.c
index 6cba86470278..f19860315043 100644
--- a/fs/dlm/rcom.c
+++ b/fs/dlm/rcom.c
@@ -34,16 +34,16 @@ static void _create_rcom(struct dlm_ls *ls, int to_nodeid, int type, int len,
rc = (struct dlm_rcom *) mb;
- rc->rc_header.h_version = (DLM_HEADER_MAJOR | DLM_HEADER_MINOR);
- rc->rc_header.u.h_lockspace = ls->ls_global_id;
- rc->rc_header.h_nodeid = dlm_our_nodeid();
- rc->rc_header.h_length = mb_len;
+ rc->rc_header.h_version = cpu_to_le32(DLM_HEADER_MAJOR | DLM_HEADER_MINOR);
+ rc->rc_header.u.h_lockspace = cpu_to_le32(ls->ls_global_id);
+ rc->rc_header.h_nodeid = cpu_to_le32(dlm_our_nodeid());
+ rc->rc_header.h_length = cpu_to_le16(mb_len);
rc->rc_header.h_cmd = DLM_RCOM;
- rc->rc_type = type;
+ rc->rc_type = cpu_to_le32(type);
spin_lock(&ls->ls_recover_lock);
- rc->rc_seq = ls->ls_recover_seq;
+ rc->rc_seq = cpu_to_le64(ls->ls_recover_seq);
spin_unlock(&ls->ls_recover_lock);
*rc_ret = rc;
@@ -91,13 +91,11 @@ static int create_rcom_stateless(struct dlm_ls *ls, int to_nodeid, int type,
static void send_rcom(struct dlm_mhandle *mh, struct dlm_rcom *rc)
{
- dlm_rcom_out(rc);
dlm_midcomms_commit_mhandle(mh);
}
static void send_rcom_stateless(struct dlm_msg *msg, struct dlm_rcom *rc)
{
- dlm_rcom_out(rc);
dlm_lowcomms_commit_msg(msg);
dlm_lowcomms_put_msg(msg);
}
@@ -127,10 +125,10 @@ static int check_rcom_config(struct dlm_ls *ls, struct dlm_rcom *rc, int nodeid)
{
struct rcom_config *rf = (struct rcom_config *) rc->rc_buf;
- if ((rc->rc_header.h_version & 0xFFFF0000) != DLM_HEADER_MAJOR) {
+ if ((le32_to_cpu(rc->rc_header.h_version) & 0xFFFF0000) != DLM_HEADER_MAJOR) {
log_error(ls, "version mismatch: %x nodeid %d: %x",
DLM_HEADER_MAJOR | DLM_HEADER_MINOR, nodeid,
- rc->rc_header.h_version);
+ le32_to_cpu(rc->rc_header.h_version));
return -EPROTO;
}
@@ -145,10 +143,10 @@ static int check_rcom_config(struct dlm_ls *ls, struct dlm_rcom *rc, int nodeid)
return 0;
}
-static void allow_sync_reply(struct dlm_ls *ls, uint64_t *new_seq)
+static void allow_sync_reply(struct dlm_ls *ls, __le64 *new_seq)
{
spin_lock(&ls->ls_rcom_spin);
- *new_seq = ++ls->ls_rcom_seq;
+ *new_seq = cpu_to_le64(++ls->ls_rcom_seq);
set_bit(LSFL_RCOM_WAIT, &ls->ls_flags);
spin_unlock(&ls->ls_rcom_spin);
}
@@ -182,7 +180,7 @@ int dlm_rcom_status(struct dlm_ls *ls, int nodeid, uint32_t status_flags)
if (nodeid == dlm_our_nodeid()) {
rc = ls->ls_recover_buf;
- rc->rc_result = dlm_recover_status(ls);
+ rc->rc_result = cpu_to_le32(dlm_recover_status(ls));
goto out;
}
@@ -208,7 +206,7 @@ retry:
rc = ls->ls_recover_buf;
- if (rc->rc_result == -ESRCH) {
+ if (rc->rc_result == cpu_to_le32(-ESRCH)) {
/* we pretend the remote lockspace exists with 0 status */
log_debug(ls, "remote node %d not ready", nodeid);
rc->rc_result = 0;
@@ -227,7 +225,7 @@ static void receive_rcom_status(struct dlm_ls *ls, struct dlm_rcom *rc_in)
struct dlm_rcom *rc;
struct rcom_status *rs;
uint32_t status;
- int nodeid = rc_in->rc_header.h_nodeid;
+ int nodeid = le32_to_cpu(rc_in->rc_header.h_nodeid);
int len = sizeof(struct rcom_config);
struct dlm_msg *msg;
int num_slots = 0;
@@ -259,7 +257,7 @@ static void receive_rcom_status(struct dlm_ls *ls, struct dlm_rcom *rc_in)
rc->rc_id = rc_in->rc_id;
rc->rc_seq_reply = rc_in->rc_seq;
- rc->rc_result = status;
+ rc->rc_result = cpu_to_le32(status);
set_rcom_config(ls, (struct rcom_config *)rc->rc_buf, num_slots);
@@ -287,14 +285,16 @@ static void receive_sync_reply(struct dlm_ls *ls, struct dlm_rcom *rc_in)
{
spin_lock(&ls->ls_rcom_spin);
if (!test_bit(LSFL_RCOM_WAIT, &ls->ls_flags) ||
- rc_in->rc_id != ls->ls_rcom_seq) {
+ le64_to_cpu(rc_in->rc_id) != ls->ls_rcom_seq) {
log_debug(ls, "reject reply %d from %d seq %llx expect %llx",
- rc_in->rc_type, rc_in->rc_header.h_nodeid,
- (unsigned long long)rc_in->rc_id,
+ le32_to_cpu(rc_in->rc_type),
+ le32_to_cpu(rc_in->rc_header.h_nodeid),
+ (unsigned long long)le64_to_cpu(rc_in->rc_id),
(unsigned long long)ls->ls_rcom_seq);
goto out;
}
- memcpy(ls->ls_recover_buf, rc_in, rc_in->rc_header.h_length);
+ memcpy(ls->ls_recover_buf, rc_in,
+ le16_to_cpu(rc_in->rc_header.h_length));
set_bit(LSFL_RCOM_READY, &ls->ls_flags);
clear_bit(LSFL_RCOM_WAIT, &ls->ls_flags);
wake_up(&ls->ls_wait_general);
@@ -336,8 +336,9 @@ static void receive_rcom_names(struct dlm_ls *ls, struct dlm_rcom *rc_in)
int error, inlen, outlen, nodeid;
struct dlm_msg *msg;
- nodeid = rc_in->rc_header.h_nodeid;
- inlen = rc_in->rc_header.h_length - sizeof(struct dlm_rcom);
+ nodeid = le32_to_cpu(rc_in->rc_header.h_nodeid);
+ inlen = le16_to_cpu(rc_in->rc_header.h_length) -
+ sizeof(struct dlm_rcom);
outlen = DLM_MAX_APP_BUFSIZE - sizeof(struct dlm_rcom);
error = create_rcom_stateless(ls, nodeid, DLM_RCOM_NAMES_REPLY, outlen,
@@ -364,7 +365,7 @@ int dlm_send_rcom_lookup(struct dlm_rsb *r, int dir_nodeid)
if (error)
goto out;
memcpy(rc->rc_buf, r->res_name, r->res_length);
- rc->rc_id = (unsigned long) r->res_id;
+ rc->rc_id = cpu_to_le64(r->res_id);
send_rcom(mh, rc);
out:
@@ -375,11 +376,12 @@ static void receive_rcom_lookup(struct dlm_ls *ls, struct dlm_rcom *rc_in)
{
struct dlm_rcom *rc;
struct dlm_mhandle *mh;
- int error, ret_nodeid, nodeid = rc_in->rc_header.h_nodeid;
- int len = rc_in->rc_header.h_length - sizeof(struct dlm_rcom);
+ int error, ret_nodeid, nodeid = le32_to_cpu(rc_in->rc_header.h_nodeid);
+ int len = le16_to_cpu(rc_in->rc_header.h_length) -
+ sizeof(struct dlm_rcom);
/* Old code would send this special id to trigger a debug dump. */
- if (rc_in->rc_id == 0xFFFFFFFF) {
+ if (rc_in->rc_id == cpu_to_le64(0xFFFFFFFF)) {
log_error(ls, "receive_rcom_lookup dump from %d", nodeid);
dlm_dump_rsb_name(ls, rc_in->rc_buf, len);
return;
@@ -393,7 +395,7 @@ static void receive_rcom_lookup(struct dlm_ls *ls, struct dlm_rcom *rc_in)
DLM_LU_RECOVER_MASTER, &ret_nodeid, NULL);
if (error)
ret_nodeid = error;
- rc->rc_result = ret_nodeid;
+ rc->rc_result = cpu_to_le32(ret_nodeid);
rc->rc_id = rc_in->rc_id;
rc->rc_seq_reply = rc_in->rc_seq;
@@ -452,7 +454,7 @@ int dlm_send_rcom_lock(struct dlm_rsb *r, struct dlm_lkb *lkb)
rl = (struct rcom_lock *) rc->rc_buf;
pack_rcom_lock(r, lkb, rl);
- rc->rc_id = (unsigned long) r;
+ rc->rc_id = cpu_to_le64((uintptr_t)r);
send_rcom(mh, rc);
out:
@@ -464,7 +466,7 @@ static void receive_rcom_lock(struct dlm_ls *ls, struct dlm_rcom *rc_in)
{
struct dlm_rcom *rc;
struct dlm_mhandle *mh;
- int error, nodeid = rc_in->rc_header.h_nodeid;
+ int error, nodeid = le32_to_cpu(rc_in->rc_header.h_nodeid);
dlm_recover_master_copy(ls, rc_in);
@@ -500,21 +502,20 @@ int dlm_send_ls_not_ready(int nodeid, struct dlm_rcom *rc_in)
rc = (struct dlm_rcom *) mb;
- rc->rc_header.h_version = (DLM_HEADER_MAJOR | DLM_HEADER_MINOR);
+ rc->rc_header.h_version = cpu_to_le32(DLM_HEADER_MAJOR | DLM_HEADER_MINOR);
rc->rc_header.u.h_lockspace = rc_in->rc_header.u.h_lockspace;
- rc->rc_header.h_nodeid = dlm_our_nodeid();
- rc->rc_header.h_length = mb_len;
+ rc->rc_header.h_nodeid = cpu_to_le32(dlm_our_nodeid());
+ rc->rc_header.h_length = cpu_to_le16(mb_len);
rc->rc_header.h_cmd = DLM_RCOM;
- rc->rc_type = DLM_RCOM_STATUS_REPLY;
+ rc->rc_type = cpu_to_le32(DLM_RCOM_STATUS_REPLY);
rc->rc_id = rc_in->rc_id;
rc->rc_seq_reply = rc_in->rc_seq;
- rc->rc_result = -ESRCH;
+ rc->rc_result = cpu_to_le32(-ESRCH);
rf = (struct rcom_config *) rc->rc_buf;
rf->rf_lvblen = cpu_to_le32(~0U);
- dlm_rcom_out(rc);
dlm_midcomms_commit_mhandle(mh);
return 0;
@@ -573,27 +574,27 @@ void dlm_receive_rcom(struct dlm_ls *ls, struct dlm_rcom *rc, int nodeid)
uint64_t seq;
switch (rc->rc_type) {
- case DLM_RCOM_STATUS_REPLY:
+ case cpu_to_le32(DLM_RCOM_STATUS_REPLY):
reply = 1;
break;
- case DLM_RCOM_NAMES:
+ case cpu_to_le32(DLM_RCOM_NAMES):
names = 1;
break;
- case DLM_RCOM_NAMES_REPLY:
+ case cpu_to_le32(DLM_RCOM_NAMES_REPLY):
names = 1;
reply = 1;
break;
- case DLM_RCOM_LOOKUP:
+ case cpu_to_le32(DLM_RCOM_LOOKUP):
lookup = 1;
break;
- case DLM_RCOM_LOOKUP_REPLY:
+ case cpu_to_le32(DLM_RCOM_LOOKUP_REPLY):
lookup = 1;
reply = 1;
break;
- case DLM_RCOM_LOCK:
+ case cpu_to_le32(DLM_RCOM_LOCK):
lock = 1;
break;
- case DLM_RCOM_LOCK_REPLY:
+ case cpu_to_le32(DLM_RCOM_LOCK_REPLY):
lock = 1;
reply = 1;
break;
@@ -601,14 +602,14 @@ void dlm_receive_rcom(struct dlm_ls *ls, struct dlm_rcom *rc, int nodeid)
spin_lock(&ls->ls_recover_lock);
status = ls->ls_recover_status;
- stop = test_bit(LSFL_RECOVER_STOP, &ls->ls_flags);
+ stop = dlm_recovery_stopped(ls);
seq = ls->ls_recover_seq;
spin_unlock(&ls->ls_recover_lock);
- if (stop && (rc->rc_type != DLM_RCOM_STATUS))
+ if (stop && (rc->rc_type != cpu_to_le32(DLM_RCOM_STATUS)))
goto ignore;
- if (reply && (rc->rc_seq_reply != seq))
+ if (reply && (le64_to_cpu(rc->rc_seq_reply) != seq))
goto ignore;
if (!(status & DLM_RS_NODES) && (names || lookup || lock))
@@ -618,59 +619,60 @@ void dlm_receive_rcom(struct dlm_ls *ls, struct dlm_rcom *rc, int nodeid)
goto ignore;
switch (rc->rc_type) {
- case DLM_RCOM_STATUS:
+ case cpu_to_le32(DLM_RCOM_STATUS):
receive_rcom_status(ls, rc);
break;
- case DLM_RCOM_NAMES:
+ case cpu_to_le32(DLM_RCOM_NAMES):
receive_rcom_names(ls, rc);
break;
- case DLM_RCOM_LOOKUP:
+ case cpu_to_le32(DLM_RCOM_LOOKUP):
receive_rcom_lookup(ls, rc);
break;
- case DLM_RCOM_LOCK:
- if (rc->rc_header.h_length < lock_size)
+ case cpu_to_le32(DLM_RCOM_LOCK):
+ if (le16_to_cpu(rc->rc_header.h_length) < lock_size)
goto Eshort;
receive_rcom_lock(ls, rc);
break;
- case DLM_RCOM_STATUS_REPLY:
+ case cpu_to_le32(DLM_RCOM_STATUS_REPLY):
receive_sync_reply(ls, rc);
break;
- case DLM_RCOM_NAMES_REPLY:
+ case cpu_to_le32(DLM_RCOM_NAMES_REPLY):
receive_sync_reply(ls, rc);
break;
- case DLM_RCOM_LOOKUP_REPLY:
+ case cpu_to_le32(DLM_RCOM_LOOKUP_REPLY):
receive_rcom_lookup_reply(ls, rc);
break;
- case DLM_RCOM_LOCK_REPLY:
- if (rc->rc_header.h_length < lock_size)
+ case cpu_to_le32(DLM_RCOM_LOCK_REPLY):
+ if (le16_to_cpu(rc->rc_header.h_length) < lock_size)
goto Eshort;
dlm_recover_process_copy(ls, rc);
break;
default:
- log_error(ls, "receive_rcom bad type %d", rc->rc_type);
+ log_error(ls, "receive_rcom bad type %d",
+ le32_to_cpu(rc->rc_type));
}
return;
ignore:
log_limit(ls, "dlm_receive_rcom ignore msg %d "
"from %d %llu %llu recover seq %llu sts %x gen %u",
- rc->rc_type,
+ le32_to_cpu(rc->rc_type),
nodeid,
- (unsigned long long)rc->rc_seq,
- (unsigned long long)rc->rc_seq_reply,
+ (unsigned long long)le64_to_cpu(rc->rc_seq),
+ (unsigned long long)le64_to_cpu(rc->rc_seq_reply),
(unsigned long long)seq,
status, ls->ls_generation);
return;
Eshort:
log_error(ls, "recovery message %d from %d is too short",
- rc->rc_type, nodeid);
+ le32_to_cpu(rc->rc_type), nodeid);
}
diff --git a/fs/dlm/recover.c b/fs/dlm/recover.c
index 8928e99dfd47..ccff1791803f 100644
--- a/fs/dlm/recover.c
+++ b/fs/dlm/recover.c
@@ -114,7 +114,7 @@ static int wait_status_all(struct dlm_ls *ls, uint32_t wait_status,
if (save_slots)
dlm_slot_save(ls, rc, memb);
- if (rc->rc_result & wait_status)
+ if (le32_to_cpu(rc->rc_result) & wait_status)
break;
if (delay < 1000)
delay += 20;
@@ -141,7 +141,7 @@ static int wait_status_low(struct dlm_ls *ls, uint32_t wait_status,
if (error)
break;
- if (rc->rc_result & wait_status)
+ if (le32_to_cpu(rc->rc_result) & wait_status)
break;
if (delay < 1000)
delay += 20;
@@ -568,14 +568,14 @@ int dlm_recover_master_reply(struct dlm_ls *ls, struct dlm_rcom *rc)
struct dlm_rsb *r;
int ret_nodeid, new_master;
- r = recover_idr_find(ls, rc->rc_id);
+ r = recover_idr_find(ls, le64_to_cpu(rc->rc_id));
if (!r) {
log_error(ls, "dlm_recover_master_reply no id %llx",
- (unsigned long long)rc->rc_id);
+ (unsigned long long)le64_to_cpu(rc->rc_id));
goto out;
}
- ret_nodeid = rc->rc_result;
+ ret_nodeid = le32_to_cpu(rc->rc_result);
if (ret_nodeid == dlm_our_nodeid())
new_master = 0;
@@ -732,10 +732,9 @@ void dlm_recovered_lock(struct dlm_rsb *r)
static void recover_lvb(struct dlm_rsb *r)
{
- struct dlm_lkb *lkb, *high_lkb = NULL;
+ struct dlm_lkb *big_lkb = NULL, *iter, *high_lkb = NULL;
uint32_t high_seq = 0;
int lock_lvb_exists = 0;
- int big_lock_exists = 0;
int lvblen = r->res_ls->ls_lvblen;
if (!rsb_flag(r, RSB_NEW_MASTER2) &&
@@ -751,37 +750,37 @@ static void recover_lvb(struct dlm_rsb *r)
/* we are the new master, so figure out if VALNOTVALID should
be set, and set the rsb lvb from the best lkb available. */
- list_for_each_entry(lkb, &r->res_grantqueue, lkb_statequeue) {
- if (!(lkb->lkb_exflags & DLM_LKF_VALBLK))
+ list_for_each_entry(iter, &r->res_grantqueue, lkb_statequeue) {
+ if (!(iter->lkb_exflags & DLM_LKF_VALBLK))
continue;
lock_lvb_exists = 1;
- if (lkb->lkb_grmode > DLM_LOCK_CR) {
- big_lock_exists = 1;
+ if (iter->lkb_grmode > DLM_LOCK_CR) {
+ big_lkb = iter;
goto setflag;
}
- if (((int)lkb->lkb_lvbseq - (int)high_seq) >= 0) {
- high_lkb = lkb;
- high_seq = lkb->lkb_lvbseq;
+ if (((int)iter->lkb_lvbseq - (int)high_seq) >= 0) {
+ high_lkb = iter;
+ high_seq = iter->lkb_lvbseq;
}
}
- list_for_each_entry(lkb, &r->res_convertqueue, lkb_statequeue) {
- if (!(lkb->lkb_exflags & DLM_LKF_VALBLK))
+ list_for_each_entry(iter, &r->res_convertqueue, lkb_statequeue) {
+ if (!(iter->lkb_exflags & DLM_LKF_VALBLK))
continue;
lock_lvb_exists = 1;
- if (lkb->lkb_grmode > DLM_LOCK_CR) {
- big_lock_exists = 1;
+ if (iter->lkb_grmode > DLM_LOCK_CR) {
+ big_lkb = iter;
goto setflag;
}
- if (((int)lkb->lkb_lvbseq - (int)high_seq) >= 0) {
- high_lkb = lkb;
- high_seq = lkb->lkb_lvbseq;
+ if (((int)iter->lkb_lvbseq - (int)high_seq) >= 0) {
+ high_lkb = iter;
+ high_seq = iter->lkb_lvbseq;
}
}
@@ -790,7 +789,7 @@ static void recover_lvb(struct dlm_rsb *r)
goto out;
/* lvb is invalidated if only NL/CR locks remain */
- if (!big_lock_exists)
+ if (!big_lkb)
rsb_set_flag(r, RSB_VALNOTVALID);
if (!r->res_lvbptr) {
@@ -799,9 +798,9 @@ static void recover_lvb(struct dlm_rsb *r)
goto out;
}
- if (big_lock_exists) {
- r->res_lvbseq = lkb->lkb_lvbseq;
- memcpy(r->res_lvbptr, lkb->lkb_lvbptr, lvblen);
+ if (big_lkb) {
+ r->res_lvbseq = big_lkb->lkb_lvbseq;
+ memcpy(r->res_lvbptr, big_lkb->lkb_lvbptr, lvblen);
} else if (high_lkb) {
r->res_lvbseq = high_lkb->lkb_lvbseq;
memcpy(r->res_lvbptr, high_lkb->lkb_lvbptr, lvblen);
diff --git a/fs/dlm/recoverd.c b/fs/dlm/recoverd.c
index 97d052cea5a9..e15eb511b04b 100644
--- a/fs/dlm/recoverd.c
+++ b/fs/dlm/recoverd.c
@@ -70,6 +70,10 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv)
/*
* Add or remove nodes from the lockspace's ls_nodes list.
+ *
+ * Due to the fact that we must report all membership changes to lsops
+ * or midcomms layer, it is not permitted to abort ls_recover() until
+ * this is done.
*/
error = dlm_recover_members(ls, rv, &neg);
@@ -124,8 +128,7 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv)
dlm_recover_waiters_pre(ls);
- error = dlm_recovery_stopped(ls);
- if (error) {
+ if (dlm_recovery_stopped(ls)) {
error = -EINTR;
goto fail;
}
@@ -240,14 +243,12 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv)
jiffies_to_msecs(jiffies - start));
mutex_unlock(&ls->ls_recoverd_active);
- dlm_lsop_recover_done(ls);
return 0;
fail:
dlm_release_root_list(ls);
- log_rinfo(ls, "dlm_recover %llu error %d",
- (unsigned long long)rv->seq, error);
mutex_unlock(&ls->ls_recoverd_active);
+
return error;
}
@@ -258,6 +259,7 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv)
static void do_ls_recovery(struct dlm_ls *ls)
{
struct dlm_recover *rv = NULL;
+ int error;
spin_lock(&ls->ls_recover_lock);
rv = ls->ls_recover_args;
@@ -267,7 +269,31 @@ static void do_ls_recovery(struct dlm_ls *ls)
spin_unlock(&ls->ls_recover_lock);
if (rv) {
- ls_recover(ls, rv);
+ error = ls_recover(ls, rv);
+ switch (error) {
+ case 0:
+ ls->ls_recovery_result = 0;
+ complete(&ls->ls_recovery_done);
+
+ dlm_lsop_recover_done(ls);
+ break;
+ case -EINTR:
+ /* if recovery was interrupted -EINTR we wait for the next
+ * ls_recover() iteration until it hopefully succeeds.
+ */
+ log_rinfo(ls, "%s %llu interrupted and should be queued to run again",
+ __func__, (unsigned long long)rv->seq);
+ break;
+ default:
+ log_rinfo(ls, "%s %llu error %d", __func__,
+ (unsigned long long)rv->seq, error);
+
+ /* let new_lockspace() get aware of critical error */
+ ls->ls_recovery_result = error;
+ complete(&ls->ls_recovery_done);
+ break;
+ }
+
kfree(rv->nodes);
kfree(rv);
}
diff --git a/fs/dlm/requestqueue.c b/fs/dlm/requestqueue.c
index e89e0ff8bfa3..036a9a0078f6 100644
--- a/fs/dlm/requestqueue.c
+++ b/fs/dlm/requestqueue.c
@@ -14,6 +14,7 @@
#include "dir.h"
#include "config.h"
#include "requestqueue.h"
+#include "util.h"
struct rq_entry {
struct list_head list;
@@ -32,7 +33,8 @@ struct rq_entry {
void dlm_add_requestqueue(struct dlm_ls *ls, int nodeid, struct dlm_message *ms)
{
struct rq_entry *e;
- int length = ms->m_header.h_length - sizeof(struct dlm_message);
+ int length = le16_to_cpu(ms->m_header.h_length) -
+ sizeof(struct dlm_message);
e = kmalloc(sizeof(struct rq_entry) + length, GFP_NOFS);
if (!e) {
@@ -42,8 +44,9 @@ void dlm_add_requestqueue(struct dlm_ls *ls, int nodeid, struct dlm_message *ms)
e->recover_seq = ls->ls_recover_seq & 0xFFFFFFFF;
e->nodeid = nodeid;
- memcpy(&e->request, ms, ms->m_header.h_length);
+ memcpy(&e->request, ms, le16_to_cpu(ms->m_header.h_length));
+ atomic_inc(&ls->ls_requestqueue_cnt);
mutex_lock(&ls->ls_requestqueue_mutex);
list_add_tail(&e->list, &ls->ls_requestqueue);
mutex_unlock(&ls->ls_requestqueue_mutex);
@@ -81,14 +84,18 @@ int dlm_process_requestqueue(struct dlm_ls *ls)
log_limit(ls, "dlm_process_requestqueue msg %d from %d "
"lkid %x remid %x result %d seq %u",
- ms->m_type, ms->m_header.h_nodeid,
- ms->m_lkid, ms->m_remid, ms->m_result,
+ le32_to_cpu(ms->m_type),
+ le32_to_cpu(ms->m_header.h_nodeid),
+ le32_to_cpu(ms->m_lkid), le32_to_cpu(ms->m_remid),
+ from_dlm_errno(le32_to_cpu(ms->m_result)),
e->recover_seq);
dlm_receive_message_saved(ls, &e->request, e->recover_seq);
mutex_lock(&ls->ls_requestqueue_mutex);
list_del(&e->list);
+ if (atomic_dec_and_test(&ls->ls_requestqueue_cnt))
+ wake_up(&ls->ls_requestqueue_wait);
kfree(e);
if (dlm_locking_stopped(ls)) {
@@ -115,22 +122,16 @@ int dlm_process_requestqueue(struct dlm_ls *ls)
void dlm_wait_requestqueue(struct dlm_ls *ls)
{
- for (;;) {
- mutex_lock(&ls->ls_requestqueue_mutex);
- if (list_empty(&ls->ls_requestqueue))
- break;
- mutex_unlock(&ls->ls_requestqueue_mutex);
- schedule();
- }
- mutex_unlock(&ls->ls_requestqueue_mutex);
+ wait_event(ls->ls_requestqueue_wait,
+ atomic_read(&ls->ls_requestqueue_cnt) == 0);
}
static int purge_request(struct dlm_ls *ls, struct dlm_message *ms, int nodeid)
{
- uint32_t type = ms->m_type;
+ __le32 type = ms->m_type;
/* the ls is being cleaned up and freed by release_lockspace */
- if (!ls->ls_count)
+ if (!atomic_read(&ls->ls_count))
return 1;
if (dlm_is_removed(ls, nodeid))
@@ -139,9 +140,9 @@ static int purge_request(struct dlm_ls *ls, struct dlm_message *ms, int nodeid)
/* directory operations are always purged because the directory is
always rebuilt during recovery and the lookups resent */
- if (type == DLM_MSG_REMOVE ||
- type == DLM_MSG_LOOKUP ||
- type == DLM_MSG_LOOKUP_REPLY)
+ if (type == cpu_to_le32(DLM_MSG_REMOVE) ||
+ type == cpu_to_le32(DLM_MSG_LOOKUP) ||
+ type == cpu_to_le32(DLM_MSG_LOOKUP_REPLY))
return 1;
if (!dlm_no_directory(ls))
@@ -161,6 +162,8 @@ void dlm_purge_requestqueue(struct dlm_ls *ls)
if (purge_request(ls, ms, e->nodeid)) {
list_del(&e->list);
+ if (atomic_dec_and_test(&ls->ls_requestqueue_cnt))
+ wake_up(&ls->ls_requestqueue_wait);
kfree(e);
}
}
diff --git a/fs/dlm/user.c b/fs/dlm/user.c
index e5cefa90b1ce..c5d27bccc3dc 100644
--- a/fs/dlm/user.c
+++ b/fs/dlm/user.c
@@ -16,6 +16,8 @@
#include <linux/slab.h>
#include <linux/sched/signal.h>
+#include <trace/events/dlm.h>
+
#include "dlm_internal.h"
#include "lockspace.h"
#include "lock.h"
@@ -108,11 +110,11 @@ static void compat_input(struct dlm_write_request *kb,
kb->i.lock.parent = kb32->i.lock.parent;
kb->i.lock.xid = kb32->i.lock.xid;
kb->i.lock.timeout = kb32->i.lock.timeout;
- kb->i.lock.castparam = (void *)(long)kb32->i.lock.castparam;
- kb->i.lock.castaddr = (void *)(long)kb32->i.lock.castaddr;
- kb->i.lock.bastparam = (void *)(long)kb32->i.lock.bastparam;
- kb->i.lock.bastaddr = (void *)(long)kb32->i.lock.bastaddr;
- kb->i.lock.lksb = (void *)(long)kb32->i.lock.lksb;
+ kb->i.lock.castparam = (__user void *)(long)kb32->i.lock.castparam;
+ kb->i.lock.castaddr = (__user void *)(long)kb32->i.lock.castaddr;
+ kb->i.lock.bastparam = (__user void *)(long)kb32->i.lock.bastparam;
+ kb->i.lock.bastaddr = (__user void *)(long)kb32->i.lock.bastaddr;
+ kb->i.lock.lksb = (__user void *)(long)kb32->i.lock.lksb;
memcpy(kb->i.lock.lvb, kb32->i.lock.lvb, DLM_USER_LVB_LEN);
memcpy(kb->i.lock.name, kb32->i.lock.name, namelen);
}
@@ -127,9 +129,9 @@ static void compat_output(struct dlm_lock_result *res,
res32->version[1] = res->version[1];
res32->version[2] = res->version[2];
- res32->user_astaddr = (__u32)(long)res->user_astaddr;
- res32->user_astparam = (__u32)(long)res->user_astparam;
- res32->user_lksb = (__u32)(long)res->user_lksb;
+ res32->user_astaddr = (__u32)(__force long)res->user_astaddr;
+ res32->user_astparam = (__u32)(__force long)res->user_astparam;
+ res32->user_lksb = (__u32)(__force long)res->user_lksb;
res32->bast_mode = res->bast_mode;
res32->lvb_offset = res->lvb_offset;
@@ -184,7 +186,7 @@ void dlm_user_add_ast(struct dlm_lkb *lkb, uint32_t flags, int mode,
return;
ls = lkb->lkb_resource->res_ls;
- mutex_lock(&ls->ls_clear_proc_locks);
+ spin_lock(&ls->ls_clear_proc_locks);
/* If ORPHAN/DEAD flag is set, it means the process is dead so an ast
can't be delivered. For ORPHAN's, dlm_clear_proc_locks() freed
@@ -230,7 +232,7 @@ void dlm_user_add_ast(struct dlm_lkb *lkb, uint32_t flags, int mode,
spin_unlock(&proc->locks_spin);
}
out:
- mutex_unlock(&ls->ls_clear_proc_locks);
+ spin_unlock(&ls->ls_clear_proc_locks);
}
static int device_user_lock(struct dlm_user_proc *proc,
@@ -250,6 +252,14 @@ static int device_user_lock(struct dlm_user_proc *proc,
goto out;
}
+#ifdef CONFIG_DLM_DEPRECATED_API
+ if (params->timeout)
+ pr_warn_once("========================================================\n"
+ "WARNING: the lkb timeout feature is being deprecated and\n"
+ " will be removed in v6.2!\n"
+ "========================================================\n");
+#endif
+
ua = kzalloc(sizeof(struct dlm_user_args), GFP_NOFS);
if (!ua)
goto out;
@@ -262,23 +272,34 @@ static int device_user_lock(struct dlm_user_proc *proc,
ua->xid = params->xid;
if (params->flags & DLM_LKF_CONVERT) {
+#ifdef CONFIG_DLM_DEPRECATED_API
error = dlm_user_convert(ls, ua,
params->mode, params->flags,
params->lkid, params->lvb,
(unsigned long) params->timeout);
+#else
+ error = dlm_user_convert(ls, ua,
+ params->mode, params->flags,
+ params->lkid, params->lvb);
+#endif
} else if (params->flags & DLM_LKF_ORPHAN) {
error = dlm_user_adopt_orphan(ls, ua,
params->mode, params->flags,
params->name, params->namelen,
- (unsigned long) params->timeout,
&lkid);
if (!error)
error = lkid;
} else {
+#ifdef CONFIG_DLM_DEPRECATED_API
error = dlm_user_request(ls, ua,
params->mode, params->flags,
params->name, params->namelen,
(unsigned long) params->timeout);
+#else
+ error = dlm_user_request(ls, ua,
+ params->mode, params->flags,
+ params->name, params->namelen);
+#endif
if (!error)
error = ua->lksb.sb_lkid;
}
@@ -402,9 +423,9 @@ static int device_create_lockspace(struct dlm_lspace_params *params)
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
- error = dlm_new_lockspace(params->name, dlm_config.ci_cluster_name, params->flags,
- DLM_USER_LVB_LEN, NULL, NULL, NULL,
- &lockspace);
+ error = dlm_new_user_lockspace(params->name, dlm_config.ci_cluster_name,
+ params->flags, DLM_USER_LVB_LEN, NULL,
+ NULL, NULL, &lockspace);
if (error)
return error;
@@ -863,7 +884,9 @@ static ssize_t device_read(struct file *file, char __user *buf, size_t count,
goto try_another;
}
- if (cb.flags & DLM_CB_CAST) {
+ if (cb.flags & DLM_CB_BAST) {
+ trace_dlm_bast(lkb->lkb_resource->res_ls, lkb, cb.mode);
+ } else if (cb.flags & DLM_CB_CAST) {
new_mode = cb.mode;
if (!cb.sb_status && lkb->lkb_lksb->sb_lvbptr &&
@@ -872,6 +895,7 @@ static ssize_t device_read(struct file *file, char __user *buf, size_t count,
lkb->lkb_lksb->sb_status = cb.sb_status;
lkb->lkb_lksb->sb_flags = cb.sb_flags;
+ trace_dlm_ast(lkb->lkb_resource->res_ls, lkb);
}
rv = copy_result_to_user(lkb->lkb_ua,
diff --git a/fs/dlm/util.c b/fs/dlm/util.c
index 58acbcc2081a..f2bc401f312f 100644
--- a/fs/dlm/util.c
+++ b/fs/dlm/util.c
@@ -20,28 +20,10 @@
#define DLM_ERRNO_ETIMEDOUT 110
#define DLM_ERRNO_EINPROGRESS 115
-void header_out(struct dlm_header *hd)
-{
- hd->h_version = cpu_to_le32(hd->h_version);
- /* does it for others u32 in union as well */
- hd->u.h_lockspace = cpu_to_le32(hd->u.h_lockspace);
- hd->h_nodeid = cpu_to_le32(hd->h_nodeid);
- hd->h_length = cpu_to_le16(hd->h_length);
-}
-
-void header_in(struct dlm_header *hd)
-{
- hd->h_version = le32_to_cpu(hd->h_version);
- /* does it for others u32 in union as well */
- hd->u.h_lockspace = le32_to_cpu(hd->u.h_lockspace);
- hd->h_nodeid = le32_to_cpu(hd->h_nodeid);
- hd->h_length = le16_to_cpu(hd->h_length);
-}
-
/* higher errno values are inconsistent across architectures, so select
one set of values for on the wire */
-static int to_dlm_errno(int err)
+int to_dlm_errno(int err)
{
switch (err) {
case -EDEADLK:
@@ -62,7 +44,7 @@ static int to_dlm_errno(int err)
return err;
}
-static int from_dlm_errno(int err)
+int from_dlm_errno(int err)
{
switch (err) {
case -DLM_ERRNO_EDEADLK:
@@ -82,73 +64,3 @@ static int from_dlm_errno(int err)
}
return err;
}
-
-void dlm_message_out(struct dlm_message *ms)
-{
- header_out(&ms->m_header);
-
- ms->m_type = cpu_to_le32(ms->m_type);
- ms->m_nodeid = cpu_to_le32(ms->m_nodeid);
- ms->m_pid = cpu_to_le32(ms->m_pid);
- ms->m_lkid = cpu_to_le32(ms->m_lkid);
- ms->m_remid = cpu_to_le32(ms->m_remid);
- ms->m_parent_lkid = cpu_to_le32(ms->m_parent_lkid);
- ms->m_parent_remid = cpu_to_le32(ms->m_parent_remid);
- ms->m_exflags = cpu_to_le32(ms->m_exflags);
- ms->m_sbflags = cpu_to_le32(ms->m_sbflags);
- ms->m_flags = cpu_to_le32(ms->m_flags);
- ms->m_lvbseq = cpu_to_le32(ms->m_lvbseq);
- ms->m_hash = cpu_to_le32(ms->m_hash);
- ms->m_status = cpu_to_le32(ms->m_status);
- ms->m_grmode = cpu_to_le32(ms->m_grmode);
- ms->m_rqmode = cpu_to_le32(ms->m_rqmode);
- ms->m_bastmode = cpu_to_le32(ms->m_bastmode);
- ms->m_asts = cpu_to_le32(ms->m_asts);
- ms->m_result = cpu_to_le32(to_dlm_errno(ms->m_result));
-}
-
-void dlm_message_in(struct dlm_message *ms)
-{
- header_in(&ms->m_header);
-
- ms->m_type = le32_to_cpu(ms->m_type);
- ms->m_nodeid = le32_to_cpu(ms->m_nodeid);
- ms->m_pid = le32_to_cpu(ms->m_pid);
- ms->m_lkid = le32_to_cpu(ms->m_lkid);
- ms->m_remid = le32_to_cpu(ms->m_remid);
- ms->m_parent_lkid = le32_to_cpu(ms->m_parent_lkid);
- ms->m_parent_remid = le32_to_cpu(ms->m_parent_remid);
- ms->m_exflags = le32_to_cpu(ms->m_exflags);
- ms->m_sbflags = le32_to_cpu(ms->m_sbflags);
- ms->m_flags = le32_to_cpu(ms->m_flags);
- ms->m_lvbseq = le32_to_cpu(ms->m_lvbseq);
- ms->m_hash = le32_to_cpu(ms->m_hash);
- ms->m_status = le32_to_cpu(ms->m_status);
- ms->m_grmode = le32_to_cpu(ms->m_grmode);
- ms->m_rqmode = le32_to_cpu(ms->m_rqmode);
- ms->m_bastmode = le32_to_cpu(ms->m_bastmode);
- ms->m_asts = le32_to_cpu(ms->m_asts);
- ms->m_result = from_dlm_errno(le32_to_cpu(ms->m_result));
-}
-
-void dlm_rcom_out(struct dlm_rcom *rc)
-{
- header_out(&rc->rc_header);
-
- rc->rc_type = cpu_to_le32(rc->rc_type);
- rc->rc_result = cpu_to_le32(rc->rc_result);
- rc->rc_id = cpu_to_le64(rc->rc_id);
- rc->rc_seq = cpu_to_le64(rc->rc_seq);
- rc->rc_seq_reply = cpu_to_le64(rc->rc_seq_reply);
-}
-
-void dlm_rcom_in(struct dlm_rcom *rc)
-{
- header_in(&rc->rc_header);
-
- rc->rc_type = le32_to_cpu(rc->rc_type);
- rc->rc_result = le32_to_cpu(rc->rc_result);
- rc->rc_id = le64_to_cpu(rc->rc_id);
- rc->rc_seq = le64_to_cpu(rc->rc_seq);
- rc->rc_seq_reply = le64_to_cpu(rc->rc_seq_reply);
-}
diff --git a/fs/dlm/util.h b/fs/dlm/util.h
index d46f23c7a6a0..b6a4b8adca8d 100644
--- a/fs/dlm/util.h
+++ b/fs/dlm/util.h
@@ -11,12 +11,8 @@
#ifndef __UTIL_DOT_H__
#define __UTIL_DOT_H__
-void dlm_message_out(struct dlm_message *ms);
-void dlm_message_in(struct dlm_message *ms);
-void dlm_rcom_out(struct dlm_rcom *rc);
-void dlm_rcom_in(struct dlm_rcom *rc);
-void header_out(struct dlm_header *hd);
-void header_in(struct dlm_header *hd);
+int to_dlm_errno(int err);
+int from_dlm_errno(int err);
#endif