aboutsummaryrefslogtreecommitdiffstats
path: root/net/rds/tcp_listen.c
diff options
context:
space:
mode:
authorSowmini Varadhan <sowmini.varadhan@oracle.com>2016-07-14 03:51:03 -0700
committerDavid S. Miller <davem@davemloft.net>2016-07-15 11:36:58 -0700
commit5916e2c1554f3e36f770401c989c3c7fadf619ca (patch)
treee2f85d0d6ad83d6835b131956324d6e30c4eda3e /net/rds/tcp_listen.c
parentRDS: TCP: Reduce code duplication in rds_tcp_reset_callbacks() (diff)
downloadlinux-dev-5916e2c1554f3e36f770401c989c3c7fadf619ca.tar.xz
linux-dev-5916e2c1554f3e36f770401c989c3c7fadf619ca.zip
RDS: TCP: Enable multipath RDS for TCP
Use RDS probe-ping to compute how many paths may be used with the peer, and to synchronously start the multiple paths. If mprds is supported, hash outgoing traffic to one of multiple paths in rds_sendmsg() when multipath RDS is supported by the transport. CC: Santosh Shilimkar <santosh.shilimkar@oracle.com> Signed-off-by: Sowmini Varadhan <sowmini.varadhan@oracle.com> Acked-by: Santosh Shilimkar <santosh.shilimkar@oracle.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/rds/tcp_listen.c')
-rw-r--r--net/rds/tcp_listen.c63
1 files changed, 55 insertions, 8 deletions
diff --git a/net/rds/tcp_listen.c b/net/rds/tcp_listen.c
index 73040e319e4b..e0b23fb5b8d5 100644
--- a/net/rds/tcp_listen.c
+++ b/net/rds/tcp_listen.c
@@ -35,7 +35,6 @@
#include <linux/in.h>
#include <net/tcp.h>
-#include "rds_single_path.h"
#include "rds.h"
#include "tcp.h"
@@ -71,6 +70,52 @@ bail:
return ret;
}
+/* rds_tcp_accept_one_path(): if accepting on cp_index > 0, make sure the
+ * client's ipaddr < server's ipaddr. Otherwise, close the accepted
+ * socket and force a reconneect from smaller -> larger ip addr. The reason
+ * we special case cp_index 0 is to allow the rds probe ping itself to itself
+ * get through efficiently.
+ * Since reconnects are only initiated from the node with the numerically
+ * smaller ip address, we recycle conns in RDS_CONN_ERROR on the passive side
+ * by moving them to CONNECTING in this function.
+ */
+struct rds_tcp_connection *rds_tcp_accept_one_path(struct rds_connection *conn)
+{
+ int i;
+ bool peer_is_smaller = (conn->c_faddr < conn->c_laddr);
+ int npaths = conn->c_npaths;
+
+ if (npaths <= 1) {
+ struct rds_conn_path *cp = &conn->c_path[0];
+ int ret;
+
+ ret = rds_conn_path_transition(cp, RDS_CONN_DOWN,
+ RDS_CONN_CONNECTING);
+ if (!ret)
+ rds_conn_path_transition(cp, RDS_CONN_ERROR,
+ RDS_CONN_CONNECTING);
+ return cp->cp_transport_data;
+ }
+
+ /* for mprds, paths with cp_index > 0 MUST be initiated by the peer
+ * with the smaller address.
+ */
+ if (!peer_is_smaller)
+ return NULL;
+
+ for (i = 1; i < npaths; i++) {
+ struct rds_conn_path *cp = &conn->c_path[i];
+
+ if (rds_conn_path_transition(cp, RDS_CONN_DOWN,
+ RDS_CONN_CONNECTING) ||
+ rds_conn_path_transition(cp, RDS_CONN_ERROR,
+ RDS_CONN_CONNECTING)) {
+ return cp->cp_transport_data;
+ }
+ }
+ return NULL;
+}
+
int rds_tcp_accept_one(struct socket *sock)
{
struct socket *new_sock = NULL;
@@ -120,12 +165,14 @@ int rds_tcp_accept_one(struct socket *sock)
* If the client reboots, this conn will need to be cleaned up.
* rds_tcp_state_change() will do that cleanup
*/
- rs_tcp = (struct rds_tcp_connection *)conn->c_transport_data;
- cp = &conn->c_path[0];
- rds_conn_transition(conn, RDS_CONN_DOWN, RDS_CONN_CONNECTING);
+ rs_tcp = rds_tcp_accept_one_path(conn);
+ if (!rs_tcp)
+ goto rst_nsk;
mutex_lock(&rs_tcp->t_conn_path_lock);
- conn_state = rds_conn_state(conn);
- if (conn_state != RDS_CONN_CONNECTING && conn_state != RDS_CONN_UP)
+ cp = rs_tcp->t_cpath;
+ conn_state = rds_conn_path_state(cp);
+ if (conn_state != RDS_CONN_CONNECTING && conn_state != RDS_CONN_UP &&
+ conn_state != RDS_CONN_ERROR)
goto rst_nsk;
if (rs_tcp->t_sock) {
/* Need to resolve a duelling SYN between peers.
@@ -135,11 +182,11 @@ int rds_tcp_accept_one(struct socket *sock)
* c_transport_data.
*/
if (ntohl(inet->inet_saddr) < ntohl(inet->inet_daddr) ||
- !conn->c_path[0].cp_outgoing) {
+ !cp->cp_outgoing) {
goto rst_nsk;
} else {
rds_tcp_reset_callbacks(new_sock, cp);
- conn->c_path[0].cp_outgoing = 0;
+ cp->cp_outgoing = 0;
/* rds_connect_path_complete() marks RDS_CONN_UP */
rds_connect_path_complete(cp, RDS_CONN_RESETTING);
}