aboutsummaryrefslogtreecommitdiffstats
path: root/fs/dlm/lowcomms.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/dlm/lowcomms.c')
-rw-r--r--fs/dlm/lowcomms.c770
1 files changed, 386 insertions, 384 deletions
diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
index 0ea9ae35da0b..8f715c620e1f 100644
--- a/fs/dlm/lowcomms.c
+++ b/fs/dlm/lowcomms.c
@@ -84,9 +84,7 @@ struct connection {
struct list_head writequeue; /* List of outgoing writequeue_entries */
spinlock_t writequeue_lock;
atomic_t writequeue_cnt;
- void (*connect_action) (struct connection *); /* What to do to connect */
- void (*shutdown_action)(struct connection *con); /* What to do to shutdown */
- bool (*eof_condition)(struct connection *con); /* What to do to eof check */
+ struct mutex wq_alloc;
int retries;
#define MAX_CONNECT_RETRIES 3
struct hlist_node list;
@@ -145,6 +143,24 @@ struct dlm_node_addr {
struct sockaddr_storage *addr[DLM_MAX_ADDR_COUNT];
};
+struct dlm_proto_ops {
+ bool try_new_addr;
+ const char *name;
+ int proto;
+
+ int (*connect)(struct connection *con, struct socket *sock,
+ struct sockaddr *addr, int addr_len);
+ void (*sockopts)(struct socket *sock);
+ int (*bind)(struct socket *sock);
+ int (*listen_validate)(void);
+ void (*listen_sockopts)(struct socket *sock);
+ int (*listen_bind)(struct socket *sock);
+ /* What to do to shutdown */
+ void (*shutdown_action)(struct connection *con);
+ /* What to do to eof check */
+ bool (*eof_condition)(struct connection *con);
+};
+
static struct listen_sock_callbacks {
void (*sk_error_report)(struct sock *);
void (*sk_data_ready)(struct sock *);
@@ -168,12 +184,26 @@ static struct hlist_head connection_hash[CONN_HASH_SIZE];
static DEFINE_SPINLOCK(connections_lock);
DEFINE_STATIC_SRCU(connections_srcu);
+static const struct dlm_proto_ops *dlm_proto_ops;
+
static void process_recv_sockets(struct work_struct *work);
static void process_send_sockets(struct work_struct *work);
-static void sctp_connect_to_sock(struct connection *con);
-static void tcp_connect_to_sock(struct connection *con);
-static void dlm_tcp_shutdown(struct connection *con);
+/* need to held writequeue_lock */
+static struct writequeue_entry *con_next_wq(struct connection *con)
+{
+ struct writequeue_entry *e;
+
+ if (list_empty(&con->writequeue))
+ return NULL;
+
+ e = list_first_entry(&con->writequeue, struct writequeue_entry,
+ list);
+ if (e->len == 0)
+ return NULL;
+
+ return e;
+}
static struct connection *__find_con(int nodeid, int r)
{
@@ -208,20 +238,6 @@ static int dlm_con_init(struct connection *con, int nodeid)
INIT_WORK(&con->rwork, process_recv_sockets);
init_waitqueue_head(&con->shutdown_wait);
- switch (dlm_config.ci_protocol) {
- case DLM_PROTO_TCP:
- con->connect_action = tcp_connect_to_sock;
- con->shutdown_action = dlm_tcp_shutdown;
- con->eof_condition = tcp_eof_condition;
- break;
- case DLM_PROTO_SCTP:
- con->connect_action = sctp_connect_to_sock;
- break;
- default:
- kfree(con->rx_buf);
- return -EINVAL;
- }
-
return 0;
}
@@ -249,6 +265,8 @@ static struct connection *nodeid2con(int nodeid, gfp_t alloc)
return NULL;
}
+ mutex_init(&con->wq_alloc);
+
spin_lock(&connections_lock);
/* Because multiple workqueues/threads calls this function it can
* race on multiple cpu's. Instead of locking hot path __find_con()
@@ -583,8 +601,7 @@ static void lowcomms_error_report(struct sock *sk)
goto out;
orig_report = listen_sock.sk_error_report;
- if (con->sock == NULL ||
- kernel_getpeername(con->sock, (struct sockaddr *)&saddr) < 0) {
+ if (kernel_getpeername(sk->sk_socket, (struct sockaddr *)&saddr) < 0) {
printk_ratelimited(KERN_ERR "dlm: node %d: socket error "
"sending to node %d, port %d, "
"sk_err=%d/%d\n", dlm_our_nodeid(),
@@ -801,6 +818,7 @@ static void close_connection(struct connection *con, bool and_other,
con->rx_leftover = 0;
con->retries = 0;
+ clear_bit(CF_APP_LIMITED, &con->flags);
clear_bit(CF_CONNECTED, &con->flags);
clear_bit(CF_DELAY_CONNECT, &con->flags);
clear_bit(CF_RECONNECT, &con->flags);
@@ -877,7 +895,6 @@ static int con_realloc_receive_buf(struct connection *con, int newlen)
/* Data received from remote end */
static int receive_from_sock(struct connection *con)
{
- int call_again_soon = 0;
struct msghdr msg;
struct kvec iov;
int ret, buflen;
@@ -897,41 +914,40 @@ static int receive_from_sock(struct connection *con)
goto out_resched;
}
- /* calculate new buffer parameter regarding last receive and
- * possible leftover bytes
- */
- iov.iov_base = con->rx_buf + con->rx_leftover;
- iov.iov_len = con->rx_buflen - con->rx_leftover;
-
- memset(&msg, 0, sizeof(msg));
- msg.msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL;
- ret = kernel_recvmsg(con->sock, &msg, &iov, 1, iov.iov_len,
- msg.msg_flags);
- if (ret <= 0)
- goto out_close;
- else if (ret == iov.iov_len)
- call_again_soon = 1;
-
- /* new buflen according readed bytes and leftover from last receive */
- buflen = ret + con->rx_leftover;
- ret = dlm_process_incoming_buffer(con->nodeid, con->rx_buf, buflen);
- if (ret < 0)
- goto out_close;
+ for (;;) {
+ /* calculate new buffer parameter regarding last receive and
+ * possible leftover bytes
+ */
+ iov.iov_base = con->rx_buf + con->rx_leftover;
+ iov.iov_len = con->rx_buflen - con->rx_leftover;
+
+ memset(&msg, 0, sizeof(msg));
+ msg.msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL;
+ ret = kernel_recvmsg(con->sock, &msg, &iov, 1, iov.iov_len,
+ msg.msg_flags);
+ if (ret == -EAGAIN)
+ break;
+ else if (ret <= 0)
+ goto out_close;
- /* calculate leftover bytes from process and put it into begin of
- * the receive buffer, so next receive we have the full message
- * at the start address of the receive buffer.
- */
- con->rx_leftover = buflen - ret;
- if (con->rx_leftover) {
- memmove(con->rx_buf, con->rx_buf + ret,
- con->rx_leftover);
- call_again_soon = true;
+ /* new buflen according readed bytes and leftover from last receive */
+ buflen = ret + con->rx_leftover;
+ ret = dlm_process_incoming_buffer(con->nodeid, con->rx_buf, buflen);
+ if (ret < 0)
+ goto out_close;
+
+ /* calculate leftover bytes from process and put it into begin of
+ * the receive buffer, so next receive we have the full message
+ * at the start address of the receive buffer.
+ */
+ con->rx_leftover = buflen - ret;
+ if (con->rx_leftover) {
+ memmove(con->rx_buf, con->rx_buf + ret,
+ con->rx_leftover);
+ }
}
- if (call_again_soon)
- goto out_resched;
-
+ dlm_midcomms_receive_done(con->nodeid);
mutex_unlock(&con->sock_mutex);
return 0;
@@ -946,7 +962,8 @@ out_close:
log_print("connection %p got EOF from %d",
con, con->nodeid);
- if (con->eof_condition && con->eof_condition(con)) {
+ if (dlm_proto_ops->eof_condition &&
+ dlm_proto_ops->eof_condition(con)) {
set_bit(CF_EOF, &con->flags);
mutex_unlock(&con->sock_mutex);
} else {
@@ -1134,242 +1151,6 @@ static int sctp_bind_addrs(struct socket *sock, uint16_t port)
return result;
}
-/* Initiate an SCTP association.
- This is a special case of send_to_sock() in that we don't yet have a
- peeled-off socket for this association, so we use the listening socket
- and add the primary IP address of the remote node.
- */
-static void sctp_connect_to_sock(struct connection *con)
-{
- struct sockaddr_storage daddr;
- int result;
- int addr_len;
- struct socket *sock;
- unsigned int mark;
-
- mutex_lock(&con->sock_mutex);
-
- /* Some odd races can cause double-connects, ignore them */
- if (con->retries++ > MAX_CONNECT_RETRIES)
- goto out;
-
- if (con->sock) {
- log_print("node %d already connected.", con->nodeid);
- goto out;
- }
-
- memset(&daddr, 0, sizeof(daddr));
- result = nodeid_to_addr(con->nodeid, &daddr, NULL, true, &mark);
- if (result < 0) {
- log_print("no address for nodeid %d", con->nodeid);
- goto out;
- }
-
- /* Create a socket to communicate with */
- result = sock_create_kern(&init_net, dlm_local_addr[0]->ss_family,
- SOCK_STREAM, IPPROTO_SCTP, &sock);
- if (result < 0)
- goto socket_err;
-
- sock_set_mark(sock->sk, mark);
-
- add_sock(sock, con);
-
- /* Bind to all addresses. */
- if (sctp_bind_addrs(con->sock, 0))
- goto bind_err;
-
- make_sockaddr(&daddr, dlm_config.ci_tcp_port, &addr_len);
-
- log_print_ratelimited("connecting to %d", con->nodeid);
-
- /* Turn off Nagle's algorithm */
- sctp_sock_set_nodelay(sock->sk);
-
- /*
- * Make sock->ops->connect() function return in specified time,
- * since O_NONBLOCK argument in connect() function does not work here,
- * then, we should restore the default value of this attribute.
- */
- sock_set_sndtimeo(sock->sk, 5);
- result = sock->ops->connect(sock, (struct sockaddr *)&daddr, addr_len,
- 0);
- sock_set_sndtimeo(sock->sk, 0);
-
- if (result == -EINPROGRESS)
- result = 0;
- if (result == 0) {
- if (!test_and_set_bit(CF_CONNECTED, &con->flags))
- log_print("successful connected to node %d", con->nodeid);
- goto out;
- }
-
-bind_err:
- con->sock = NULL;
- sock_release(sock);
-
-socket_err:
- /*
- * Some errors are fatal and this list might need adjusting. For other
- * errors we try again until the max number of retries is reached.
- */
- if (result != -EHOSTUNREACH &&
- result != -ENETUNREACH &&
- result != -ENETDOWN &&
- result != -EINVAL &&
- result != -EPROTONOSUPPORT) {
- log_print("connect %d try %d error %d", con->nodeid,
- con->retries, result);
- mutex_unlock(&con->sock_mutex);
- msleep(1000);
- lowcomms_connect_sock(con);
- return;
- }
-
-out:
- mutex_unlock(&con->sock_mutex);
-}
-
-/* Connect a new socket to its peer */
-static void tcp_connect_to_sock(struct connection *con)
-{
- struct sockaddr_storage saddr, src_addr;
- unsigned int mark;
- int addr_len;
- struct socket *sock = NULL;
- int result;
-
- mutex_lock(&con->sock_mutex);
- if (con->retries++ > MAX_CONNECT_RETRIES)
- goto out;
-
- /* Some odd races can cause double-connects, ignore them */
- if (con->sock)
- goto out;
-
- /* Create a socket to communicate with */
- result = sock_create_kern(&init_net, dlm_local_addr[0]->ss_family,
- SOCK_STREAM, IPPROTO_TCP, &sock);
- if (result < 0)
- goto out_err;
-
- memset(&saddr, 0, sizeof(saddr));
- result = nodeid_to_addr(con->nodeid, &saddr, NULL, false, &mark);
- if (result < 0) {
- log_print("no address for nodeid %d", con->nodeid);
- goto out_err;
- }
-
- sock_set_mark(sock->sk, mark);
-
- add_sock(sock, con);
-
- /* Bind to our cluster-known address connecting to avoid
- routing problems */
- memcpy(&src_addr, dlm_local_addr[0], sizeof(src_addr));
- make_sockaddr(&src_addr, 0, &addr_len);
- result = sock->ops->bind(sock, (struct sockaddr *) &src_addr,
- addr_len);
- if (result < 0) {
- log_print("could not bind for connect: %d", result);
- /* This *may* not indicate a critical error */
- }
-
- make_sockaddr(&saddr, dlm_config.ci_tcp_port, &addr_len);
-
- log_print_ratelimited("connecting to %d", con->nodeid);
-
- /* Turn off Nagle's algorithm */
- tcp_sock_set_nodelay(sock->sk);
-
- result = sock->ops->connect(sock, (struct sockaddr *)&saddr, addr_len,
- O_NONBLOCK);
- if (result == -EINPROGRESS)
- result = 0;
- if (result == 0)
- goto out;
-
-out_err:
- if (con->sock) {
- sock_release(con->sock);
- con->sock = NULL;
- } else if (sock) {
- sock_release(sock);
- }
- /*
- * Some errors are fatal and this list might need adjusting. For other
- * errors we try again until the max number of retries is reached.
- */
- if (result != -EHOSTUNREACH &&
- result != -ENETUNREACH &&
- result != -ENETDOWN &&
- result != -EINVAL &&
- result != -EPROTONOSUPPORT) {
- log_print("connect %d try %d error %d", con->nodeid,
- con->retries, result);
- mutex_unlock(&con->sock_mutex);
- msleep(1000);
- lowcomms_connect_sock(con);
- return;
- }
-out:
- mutex_unlock(&con->sock_mutex);
- return;
-}
-
-/* On error caller must run dlm_close_sock() for the
- * listen connection socket.
- */
-static int tcp_create_listen_sock(struct listen_connection *con,
- struct sockaddr_storage *saddr)
-{
- struct socket *sock = NULL;
- int result = 0;
- int addr_len;
-
- if (dlm_local_addr[0]->ss_family == AF_INET)
- addr_len = sizeof(struct sockaddr_in);
- else
- addr_len = sizeof(struct sockaddr_in6);
-
- /* Create a socket to communicate with */
- result = sock_create_kern(&init_net, dlm_local_addr[0]->ss_family,
- SOCK_STREAM, IPPROTO_TCP, &sock);
- if (result < 0) {
- log_print("Can't create listening comms socket");
- goto create_out;
- }
-
- sock_set_mark(sock->sk, dlm_config.ci_mark);
-
- /* Turn off Nagle's algorithm */
- tcp_sock_set_nodelay(sock->sk);
-
- sock_set_reuseaddr(sock->sk);
-
- add_listen_sock(sock, con);
-
- /* Bind to our port */
- make_sockaddr(saddr, dlm_config.ci_tcp_port, &addr_len);
- result = sock->ops->bind(sock, (struct sockaddr *) saddr, addr_len);
- if (result < 0) {
- log_print("Can't bind to port %d", dlm_config.ci_tcp_port);
- goto create_out;
- }
- sock_set_keepalive(sock->sk);
-
- result = sock->ops->listen(sock, 5);
- if (result < 0) {
- log_print("Can't listen on port %d", dlm_config.ci_tcp_port);
- goto create_out;
- }
-
- return 0;
-
-create_out:
- return result;
-}
-
/* Get local addresses */
static void init_local(void)
{
@@ -1396,63 +1177,6 @@ static void deinit_local(void)
kfree(dlm_local_addr[i]);
}
-/* Initialise SCTP socket and bind to all interfaces
- * On error caller must run dlm_close_sock() for the
- * listen connection socket.
- */
-static int sctp_listen_for_all(struct listen_connection *con)
-{
- struct socket *sock = NULL;
- int result = -EINVAL;
-
- log_print("Using SCTP for communications");
-
- result = sock_create_kern(&init_net, dlm_local_addr[0]->ss_family,
- SOCK_STREAM, IPPROTO_SCTP, &sock);
- if (result < 0) {
- log_print("Can't create comms socket, check SCTP is loaded");
- goto out;
- }
-
- sock_set_rcvbuf(sock->sk, NEEDED_RMEM);
- sock_set_mark(sock->sk, dlm_config.ci_mark);
- sctp_sock_set_nodelay(sock->sk);
-
- add_listen_sock(sock, con);
-
- /* Bind to all addresses. */
- result = sctp_bind_addrs(con->sock, dlm_config.ci_tcp_port);
- if (result < 0)
- goto out;
-
- result = sock->ops->listen(sock, 5);
- if (result < 0) {
- log_print("Can't set socket listening");
- goto out;
- }
-
- return 0;
-
-out:
- return result;
-}
-
-static int tcp_listen_for_all(void)
-{
- /* We don't support multi-homed hosts */
- if (dlm_local_count > 1) {
- log_print("TCP protocol can't handle multi-homed hosts, "
- "try SCTP");
- return -EINVAL;
- }
-
- log_print("Using TCP for communications");
-
- return tcp_create_listen_sock(&listen_con, dlm_local_addr[0]);
-}
-
-
-
static struct writequeue_entry *new_writequeue_entry(struct connection *con,
gfp_t allocation)
{
@@ -1528,19 +1252,37 @@ static struct dlm_msg *dlm_lowcomms_new_msg_con(struct connection *con, int len,
{
struct writequeue_entry *e;
struct dlm_msg *msg;
+ bool sleepable;
msg = kzalloc(sizeof(*msg), allocation);
if (!msg)
return NULL;
+ /* this mutex is being used as a wait to avoid multiple "fast"
+ * new writequeue page list entry allocs in new_wq_entry in
+ * normal operation which is sleepable context. Without it
+ * we could end in multiple writequeue entries with one
+ * dlm message because multiple callers were waiting at
+ * the writequeue_lock in new_wq_entry().
+ */
+ sleepable = gfpflags_normal_context(allocation);
+ if (sleepable)
+ mutex_lock(&con->wq_alloc);
+
kref_init(&msg->ref);
e = new_wq_entry(con, len, allocation, ppc, cb, mh);
if (!e) {
+ if (sleepable)
+ mutex_unlock(&con->wq_alloc);
+
kfree(msg);
return NULL;
}
+ if (sleepable)
+ mutex_unlock(&con->wq_alloc);
+
msg->ppc = *ppc;
msg->len = len;
msg->entry = e;
@@ -1646,10 +1388,9 @@ int dlm_lowcomms_resend_msg(struct dlm_msg *msg)
/* Send a message */
static void send_to_sock(struct connection *con)
{
- int ret = 0;
const int msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL;
struct writequeue_entry *e;
- int len, offset;
+ int len, offset, ret;
int count = 0;
mutex_lock(&con->sock_mutex);
@@ -1658,7 +1399,8 @@ static void send_to_sock(struct connection *con)
spin_lock(&con->writequeue_lock);
for (;;) {
- if (list_empty(&con->writequeue))
+ e = con_next_wq(con);
+ if (!e)
break;
e = list_first_entry(&con->writequeue, struct writequeue_entry, list);
@@ -1667,25 +1409,22 @@ static void send_to_sock(struct connection *con)
BUG_ON(len == 0 && e->users == 0);
spin_unlock(&con->writequeue_lock);
- ret = 0;
- if (len) {
- ret = kernel_sendpage(con->sock, e->page, offset, len,
- msg_flags);
- if (ret == -EAGAIN || ret == 0) {
- if (ret == -EAGAIN &&
- test_bit(SOCKWQ_ASYNC_NOSPACE, &con->sock->flags) &&
- !test_and_set_bit(CF_APP_LIMITED, &con->flags)) {
- /* Notify TCP that we're limited by the
- * application window size.
- */
- set_bit(SOCK_NOSPACE, &con->sock->flags);
- con->sock->sk->sk_write_pending++;
- }
- cond_resched();
- goto out;
- } else if (ret < 0)
- goto out;
- }
+ ret = kernel_sendpage(con->sock, e->page, offset, len,
+ msg_flags);
+ if (ret == -EAGAIN || ret == 0) {
+ if (ret == -EAGAIN &&
+ test_bit(SOCKWQ_ASYNC_NOSPACE, &con->sock->flags) &&
+ !test_and_set_bit(CF_APP_LIMITED, &con->flags)) {
+ /* Notify TCP that we're limited by the
+ * application window size.
+ */
+ set_bit(SOCK_NOSPACE, &con->sock->flags);
+ con->sock->sk->sk_write_pending++;
+ }
+ cond_resched();
+ goto out;
+ } else if (ret < 0)
+ goto out;
/* Don't starve people filling buffers */
if (++count >= MAX_SEND_MSG_COUNT) {
@@ -1770,12 +1509,9 @@ int dlm_lowcomms_close(int nodeid)
static void process_recv_sockets(struct work_struct *work)
{
struct connection *con = container_of(work, struct connection, rwork);
- int err;
clear_bit(CF_READ_PENDING, &con->flags);
- do {
- err = receive_from_sock(con);
- } while (!err);
+ receive_from_sock(con);
}
static void process_listen_recv_socket(struct work_struct *work)
@@ -1783,6 +1519,74 @@ static void process_listen_recv_socket(struct work_struct *work)
accept_from_sock(&listen_con);
}
+static void dlm_connect(struct connection *con)
+{
+ struct sockaddr_storage addr;
+ int result, addr_len;
+ struct socket *sock;
+ unsigned int mark;
+
+ /* Some odd races can cause double-connects, ignore them */
+ if (con->retries++ > MAX_CONNECT_RETRIES)
+ return;
+
+ if (con->sock) {
+ log_print("node %d already connected.", con->nodeid);
+ return;
+ }
+
+ memset(&addr, 0, sizeof(addr));
+ result = nodeid_to_addr(con->nodeid, &addr, NULL,
+ dlm_proto_ops->try_new_addr, &mark);
+ if (result < 0) {
+ log_print("no address for nodeid %d", con->nodeid);
+ return;
+ }
+
+ /* Create a socket to communicate with */
+ result = sock_create_kern(&init_net, dlm_local_addr[0]->ss_family,
+ SOCK_STREAM, dlm_proto_ops->proto, &sock);
+ if (result < 0)
+ goto socket_err;
+
+ sock_set_mark(sock->sk, mark);
+ dlm_proto_ops->sockopts(sock);
+
+ add_sock(sock, con);
+
+ result = dlm_proto_ops->bind(sock);
+ if (result < 0)
+ goto add_sock_err;
+
+ log_print_ratelimited("connecting to %d", con->nodeid);
+ make_sockaddr(&addr, dlm_config.ci_tcp_port, &addr_len);
+ result = dlm_proto_ops->connect(con, sock, (struct sockaddr *)&addr,
+ addr_len);
+ if (result < 0)
+ goto add_sock_err;
+
+ return;
+
+add_sock_err:
+ dlm_close_sock(&con->sock);
+
+socket_err:
+ /*
+ * Some errors are fatal and this list might need adjusting. For other
+ * errors we try again until the max number of retries is reached.
+ */
+ if (result != -EHOSTUNREACH &&
+ result != -ENETUNREACH &&
+ result != -ENETDOWN &&
+ result != -EINVAL &&
+ result != -EPROTONOSUPPORT) {
+ log_print("connect %d try %d error %d", con->nodeid,
+ con->retries, result);
+ msleep(1000);
+ lowcomms_connect_sock(con);
+ }
+}
+
/* Send workqueue function */
static void process_send_sockets(struct work_struct *work)
{
@@ -1797,11 +1601,15 @@ static void process_send_sockets(struct work_struct *work)
dlm_midcomms_unack_msg_resend(con->nodeid);
}
- if (con->sock == NULL) { /* not mutex protected so check it inside too */
+ if (con->sock == NULL) {
if (test_and_clear_bit(CF_DELAY_CONNECT, &con->flags))
msleep(1000);
- con->connect_action(con);
+
+ mutex_lock(&con->sock_mutex);
+ dlm_connect(con);
+ mutex_unlock(&con->sock_mutex);
}
+
if (!list_empty(&con->writequeue))
send_to_sock(con);
}
@@ -1840,8 +1648,8 @@ static int work_start(void)
static void shutdown_conn(struct connection *con)
{
- if (con->shutdown_action)
- con->shutdown_action(con);
+ if (dlm_proto_ops->shutdown_action)
+ dlm_proto_ops->shutdown_action(con);
}
void dlm_lowcomms_shutdown(void)
@@ -1948,8 +1756,198 @@ void dlm_lowcomms_stop(void)
srcu_read_unlock(&connections_srcu, idx);
work_stop();
deinit_local();
+
+ dlm_proto_ops = NULL;
}
+static int dlm_listen_for_all(void)
+{
+ struct socket *sock;
+ int result;
+
+ log_print("Using %s for communications",
+ dlm_proto_ops->name);
+
+ result = dlm_proto_ops->listen_validate();
+ if (result < 0)
+ return result;
+
+ result = sock_create_kern(&init_net, dlm_local_addr[0]->ss_family,
+ SOCK_STREAM, dlm_proto_ops->proto, &sock);
+ if (result < 0) {
+ log_print("Can't create comms socket, check SCTP is loaded");
+ goto out;
+ }
+
+ sock_set_mark(sock->sk, dlm_config.ci_mark);
+ dlm_proto_ops->listen_sockopts(sock);
+
+ result = dlm_proto_ops->listen_bind(sock);
+ if (result < 0)
+ goto out;
+
+ save_listen_callbacks(sock);
+ add_listen_sock(sock, &listen_con);
+
+ INIT_WORK(&listen_con.rwork, process_listen_recv_socket);
+ result = sock->ops->listen(sock, 5);
+ if (result < 0) {
+ dlm_close_sock(&listen_con.sock);
+ goto out;
+ }
+
+ return 0;
+
+out:
+ sock_release(sock);
+ return result;
+}
+
+static int dlm_tcp_bind(struct socket *sock)
+{
+ struct sockaddr_storage src_addr;
+ int result, addr_len;
+
+ /* Bind to our cluster-known address connecting to avoid
+ * routing problems.
+ */
+ memcpy(&src_addr, dlm_local_addr[0], sizeof(src_addr));
+ make_sockaddr(&src_addr, 0, &addr_len);
+
+ result = sock->ops->bind(sock, (struct sockaddr *)&src_addr,
+ addr_len);
+ if (result < 0) {
+ /* This *may* not indicate a critical error */
+ log_print("could not bind for connect: %d", result);
+ }
+
+ return 0;
+}
+
+static int dlm_tcp_connect(struct connection *con, struct socket *sock,
+ struct sockaddr *addr, int addr_len)
+{
+ int ret;
+
+ ret = sock->ops->connect(sock, addr, addr_len, O_NONBLOCK);
+ switch (ret) {
+ case -EINPROGRESS:
+ fallthrough;
+ case 0:
+ return 0;
+ }
+
+ return ret;
+}
+
+static int dlm_tcp_listen_validate(void)
+{
+ /* We don't support multi-homed hosts */
+ if (dlm_local_count > 1) {
+ log_print("TCP protocol can't handle multi-homed hosts, try SCTP");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static void dlm_tcp_sockopts(struct socket *sock)
+{
+ /* Turn off Nagle's algorithm */
+ tcp_sock_set_nodelay(sock->sk);
+}
+
+static void dlm_tcp_listen_sockopts(struct socket *sock)
+{
+ dlm_tcp_sockopts(sock);
+ sock_set_reuseaddr(sock->sk);
+}
+
+static int dlm_tcp_listen_bind(struct socket *sock)
+{
+ int addr_len;
+
+ /* Bind to our port */
+ make_sockaddr(dlm_local_addr[0], dlm_config.ci_tcp_port, &addr_len);
+ return sock->ops->bind(sock, (struct sockaddr *)dlm_local_addr[0],
+ addr_len);
+}
+
+static const struct dlm_proto_ops dlm_tcp_ops = {
+ .name = "TCP",
+ .proto = IPPROTO_TCP,
+ .connect = dlm_tcp_connect,
+ .sockopts = dlm_tcp_sockopts,
+ .bind = dlm_tcp_bind,
+ .listen_validate = dlm_tcp_listen_validate,
+ .listen_sockopts = dlm_tcp_listen_sockopts,
+ .listen_bind = dlm_tcp_listen_bind,
+ .shutdown_action = dlm_tcp_shutdown,
+ .eof_condition = tcp_eof_condition,
+};
+
+static int dlm_sctp_bind(struct socket *sock)
+{
+ return sctp_bind_addrs(sock, 0);
+}
+
+static int dlm_sctp_connect(struct connection *con, struct socket *sock,
+ struct sockaddr *addr, int addr_len)
+{
+ int ret;
+
+ /*
+ * Make sock->ops->connect() function return in specified time,
+ * since O_NONBLOCK argument in connect() function does not work here,
+ * then, we should restore the default value of this attribute.
+ */
+ sock_set_sndtimeo(sock->sk, 5);
+ ret = sock->ops->connect(sock, addr, addr_len, 0);
+ sock_set_sndtimeo(sock->sk, 0);
+ if (ret < 0)
+ return ret;
+
+ if (!test_and_set_bit(CF_CONNECTED, &con->flags))
+ log_print("successful connected to node %d", con->nodeid);
+
+ return 0;
+}
+
+static int dlm_sctp_listen_validate(void)
+{
+ if (!IS_ENABLED(CONFIG_IP_SCTP)) {
+ log_print("SCTP is not enabled by this kernel");
+ return -EOPNOTSUPP;
+ }
+
+ request_module("sctp");
+ return 0;
+}
+
+static int dlm_sctp_bind_listen(struct socket *sock)
+{
+ return sctp_bind_addrs(sock, dlm_config.ci_tcp_port);
+}
+
+static void dlm_sctp_sockopts(struct socket *sock)
+{
+ /* Turn off Nagle's algorithm */
+ sctp_sock_set_nodelay(sock->sk);
+ sock_set_rcvbuf(sock->sk, NEEDED_RMEM);
+}
+
+static const struct dlm_proto_ops dlm_sctp_ops = {
+ .name = "SCTP",
+ .proto = IPPROTO_SCTP,
+ .try_new_addr = true,
+ .connect = dlm_sctp_connect,
+ .sockopts = dlm_sctp_sockopts,
+ .bind = dlm_sctp_bind,
+ .listen_validate = dlm_sctp_listen_validate,
+ .listen_sockopts = dlm_sctp_sockopts,
+ .listen_bind = dlm_sctp_bind_listen,
+};
+
int dlm_lowcomms_start(void)
{
int error = -EINVAL;
@@ -1976,23 +1974,27 @@ int dlm_lowcomms_start(void)
/* Start listening */
switch (dlm_config.ci_protocol) {
case DLM_PROTO_TCP:
- error = tcp_listen_for_all();
+ dlm_proto_ops = &dlm_tcp_ops;
break;
case DLM_PROTO_SCTP:
- error = sctp_listen_for_all(&listen_con);
+ dlm_proto_ops = &dlm_sctp_ops;
break;
default:
log_print("Invalid protocol identifier %d set",
dlm_config.ci_protocol);
error = -EINVAL;
- break;
+ goto fail_proto_ops;
}
+
+ error = dlm_listen_for_all();
if (error)
- goto fail_unlisten;
+ goto fail_listen;
return 0;
-fail_unlisten:
+fail_listen:
+ dlm_proto_ops = NULL;
+fail_proto_ops:
dlm_allow_conn = 0;
dlm_close_sock(&listen_con.sock);
work_stop();