From eee2fa6ab3225192d6d894c54a6fb02ac9efdff6 Mon Sep 17 00:00:00 2001 From: Ka-Cheong Poon Date: Mon, 23 Jul 2018 20:51:21 -0700 Subject: rds: Changing IP address internal representation to struct in6_addr This patch changes the internal representation of an IP address to use struct in6_addr. IPv4 address is stored as an IPv4 mapped address. All the functions which take an IP address as argument are also changed to use struct in6_addr. But RDS socket layer is not modified such that it still does not accept IPv6 address from an application. And RDS layer does not accept nor initiate IPv6 connections. v2: Fixed sparse warnings. Signed-off-by: Ka-Cheong Poon Acked-by: Santosh Shilimkar Signed-off-by: David S. Miller --- net/rds/af_rds.c | 138 +++++++++++++++++++++++++++++++++++++++---------------- 1 file changed, 99 insertions(+), 39 deletions(-) (limited to 'net/rds/af_rds.c') diff --git a/net/rds/af_rds.c b/net/rds/af_rds.c index ab751a150f70..fc1a5c63b783 100644 --- a/net/rds/af_rds.c +++ b/net/rds/af_rds.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006 Oracle. All rights reserved. + * Copyright (c) 2006, 2018 Oracle and/or its affiliates. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -35,6 +35,7 @@ #include #include #include +#include #include #include @@ -113,26 +114,63 @@ void rds_wake_sk_sleep(struct rds_sock *rs) static int rds_getname(struct socket *sock, struct sockaddr *uaddr, int peer) { - struct sockaddr_in *sin = (struct sockaddr_in *)uaddr; struct rds_sock *rs = rds_sk_to_rs(sock->sk); - - memset(sin->sin_zero, 0, sizeof(sin->sin_zero)); + struct sockaddr_in6 *sin6; + struct sockaddr_in *sin; + int uaddr_len; /* racey, don't care */ if (peer) { - if (!rs->rs_conn_addr) + if (ipv6_addr_any(&rs->rs_conn_addr)) return -ENOTCONN; - sin->sin_port = rs->rs_conn_port; - sin->sin_addr.s_addr = rs->rs_conn_addr; + if (ipv6_addr_v4mapped(&rs->rs_conn_addr)) { + sin = (struct sockaddr_in *)uaddr; + memset(sin->sin_zero, 0, sizeof(sin->sin_zero)); + sin->sin_family = AF_INET; + sin->sin_port = rs->rs_conn_port; + sin->sin_addr.s_addr = rs->rs_conn_addr_v4; + uaddr_len = sizeof(*sin); + } else { + sin6 = (struct sockaddr_in6 *)uaddr; + sin6->sin6_family = AF_INET6; + sin6->sin6_port = rs->rs_conn_port; + sin6->sin6_addr = rs->rs_conn_addr; + sin6->sin6_flowinfo = 0; + /* scope_id is the same as in the bound address. */ + sin6->sin6_scope_id = rs->rs_bound_scope_id; + uaddr_len = sizeof(*sin6); + } } else { - sin->sin_port = rs->rs_bound_port; - sin->sin_addr.s_addr = rs->rs_bound_addr; + /* If socket is not yet bound, set the return address family + * to be AF_UNSPEC (value 0) and the address size to be that + * of an IPv4 address. + */ + if (ipv6_addr_any(&rs->rs_bound_addr)) { + sin = (struct sockaddr_in *)uaddr; + memset(sin, 0, sizeof(*sin)); + sin->sin_family = AF_UNSPEC; + return sizeof(*sin); + } + if (ipv6_addr_v4mapped(&rs->rs_bound_addr)) { + sin = (struct sockaddr_in *)uaddr; + memset(sin->sin_zero, 0, sizeof(sin->sin_zero)); + sin->sin_family = AF_INET; + sin->sin_port = rs->rs_bound_port; + sin->sin_addr.s_addr = rs->rs_bound_addr_v4; + uaddr_len = sizeof(*sin); + } else { + sin6 = (struct sockaddr_in6 *)uaddr; + sin6->sin6_family = AF_INET6; + sin6->sin6_port = rs->rs_bound_port; + sin6->sin6_addr = rs->rs_bound_addr; + sin6->sin6_flowinfo = 0; + sin6->sin6_scope_id = rs->rs_bound_scope_id; + uaddr_len = sizeof(*sin6); + } } - sin->sin_family = AF_INET; - - return sizeof(*sin); + return uaddr_len; } /* @@ -203,11 +241,12 @@ static int rds_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) static int rds_cancel_sent_to(struct rds_sock *rs, char __user *optval, int len) { + struct sockaddr_in6 sin6; struct sockaddr_in sin; int ret = 0; /* racing with another thread binding seems ok here */ - if (rs->rs_bound_addr == 0) { + if (ipv6_addr_any(&rs->rs_bound_addr)) { ret = -ENOTCONN; /* XXX not a great errno */ goto out; } @@ -215,14 +254,23 @@ static int rds_cancel_sent_to(struct rds_sock *rs, char __user *optval, if (len < sizeof(struct sockaddr_in)) { ret = -EINVAL; goto out; + } else if (len < sizeof(struct sockaddr_in6)) { + /* Assume IPv4 */ + if (copy_from_user(&sin, optval, sizeof(struct sockaddr_in))) { + ret = -EFAULT; + goto out; + } + ipv6_addr_set_v4mapped(sin.sin_addr.s_addr, &sin6.sin6_addr); + sin6.sin6_port = sin.sin_port; + } else { + if (copy_from_user(&sin6, optval, + sizeof(struct sockaddr_in6))) { + ret = -EFAULT; + goto out; + } } - if (copy_from_user(&sin, optval, sizeof(sin))) { - ret = -EFAULT; - goto out; - } - - rds_send_drop_to(rs, &sin); + rds_send_drop_to(rs, &sin6); out: return ret; } @@ -435,31 +483,41 @@ static int rds_connect(struct socket *sock, struct sockaddr *uaddr, int addr_len, int flags) { struct sock *sk = sock->sk; - struct sockaddr_in *sin = (struct sockaddr_in *)uaddr; + struct sockaddr_in *sin; struct rds_sock *rs = rds_sk_to_rs(sk); int ret = 0; lock_sock(sk); - if (addr_len != sizeof(struct sockaddr_in)) { - ret = -EINVAL; - goto out; - } + switch (addr_len) { + case sizeof(struct sockaddr_in): + sin = (struct sockaddr_in *)uaddr; + if (sin->sin_family != AF_INET) { + ret = -EAFNOSUPPORT; + break; + } + if (sin->sin_addr.s_addr == htonl(INADDR_ANY)) { + ret = -EDESTADDRREQ; + break; + } + if (IN_MULTICAST(ntohl(sin->sin_addr.s_addr)) || + sin->sin_addr.s_addr == htonl(INADDR_BROADCAST)) { + ret = -EINVAL; + break; + } + ipv6_addr_set_v4mapped(sin->sin_addr.s_addr, &rs->rs_conn_addr); + rs->rs_conn_port = sin->sin_port; + break; - if (sin->sin_family != AF_INET) { - ret = -EAFNOSUPPORT; - goto out; - } + case sizeof(struct sockaddr_in6): + ret = -EPROTONOSUPPORT; + break; - if (sin->sin_addr.s_addr == htonl(INADDR_ANY)) { - ret = -EDESTADDRREQ; - goto out; + default: + ret = -EINVAL; + break; } - rs->rs_conn_addr = sin->sin_addr.s_addr; - rs->rs_conn_port = sin->sin_port; - -out: release_sock(sk); return ret; } @@ -578,8 +636,10 @@ static void rds_sock_inc_info(struct socket *sock, unsigned int len, list_for_each_entry(inc, &rs->rs_recv_queue, i_item) { total++; if (total <= len) - rds_inc_info_copy(inc, iter, inc->i_saddr, - rs->rs_bound_addr, 1); + rds_inc_info_copy(inc, iter, + inc->i_saddr.s6_addr32[3], + rs->rs_bound_addr_v4, + 1); } read_unlock(&rs->rs_recv_lock); @@ -608,8 +668,8 @@ static void rds_sock_info(struct socket *sock, unsigned int len, list_for_each_entry(rs, &rds_sock_list, rs_item) { sinfo.sndbuf = rds_sk_sndbuf(rs); sinfo.rcvbuf = rds_sk_rcvbuf(rs); - sinfo.bound_addr = rs->rs_bound_addr; - sinfo.connected_addr = rs->rs_conn_addr; + sinfo.bound_addr = rs->rs_bound_addr_v4; + sinfo.connected_addr = rs->rs_conn_addr_v4; sinfo.bound_port = rs->rs_bound_port; sinfo.connected_port = rs->rs_conn_port; sinfo.inum = sock_i_ino(rds_rs_to_sk(rs)); -- cgit v1.2.3-59-g8ed1b From 1e2b44e78eead7bcadfbf96f70d95773191541c9 Mon Sep 17 00:00:00 2001 From: Ka-Cheong Poon Date: Mon, 23 Jul 2018 20:51:22 -0700 Subject: rds: Enable RDS IPv6 support This patch enables RDS to use IPv6 addresses. For RDS/TCP, the listener is now an IPv6 endpoint which accepts both IPv4 and IPv6 connection requests. RDS/RDMA/IB uses a private data (struct rds_ib_connect_private) exchange between endpoints at RDS connection establishment time to support RDMA. This private data exchange uses a 32 bit integer to represent an IP address. This needs to be changed in order to support IPv6. A new private data struct rds6_ib_connect_private is introduced to handle this. To ensure backward compatibility, an IPv6 capable RDS stack uses another RDMA listener port (RDS_CM_PORT) to accept IPv6 connection. And it continues to use the original RDS_PORT for IPv4 RDS connections. When it needs to communicate with an IPv6 peer, it uses the RDS_CM_PORT to send the connection set up request. v5: Fixed syntax problem (David Miller). v4: Changed port history comments in rds.h (Sowmini Varadhan). v3: Added support to set up IPv4 connection using mapped address (David Miller). Added support to set up connection between link local and non-link addresses. Various review comments from Santosh Shilimkar and Sowmini Varadhan. v2: Fixed bound and peer address scope mismatched issue. Added back rds_connect() IPv6 changes. Signed-off-by: Ka-Cheong Poon Acked-by: Santosh Shilimkar Signed-off-by: David S. Miller --- net/rds/af_rds.c | 91 ++++++++++++++++++++++++++++++++++++++++-------- net/rds/bind.c | 59 ++++++++++++++++++++++++++----- net/rds/connection.c | 54 ++++++++++++++++++++-------- net/rds/ib.c | 55 ++++++++++++++++++++++++----- net/rds/ib_cm.c | 20 ++++++++--- net/rds/rdma_transport.c | 30 +++++++++++++++- net/rds/rdma_transport.h | 5 +++ net/rds/rds.h | 22 +++++++----- net/rds/recv.c | 2 +- net/rds/send.c | 61 ++++++++++++++++++++++++++++---- net/rds/tcp.c | 54 +++++++++++++++++----------- net/rds/tcp.h | 2 +- net/rds/tcp_connect.c | 54 +++++++++++++++++++++------- net/rds/tcp_listen.c | 64 +++++++++++++++++++++++++++------- 14 files changed, 459 insertions(+), 114 deletions(-) (limited to 'net/rds/af_rds.c') diff --git a/net/rds/af_rds.c b/net/rds/af_rds.c index fc1a5c63b783..fc5c48b248fe 100644 --- a/net/rds/af_rds.c +++ b/net/rds/af_rds.c @@ -142,15 +142,32 @@ static int rds_getname(struct socket *sock, struct sockaddr *uaddr, uaddr_len = sizeof(*sin6); } } else { - /* If socket is not yet bound, set the return address family - * to be AF_UNSPEC (value 0) and the address size to be that - * of an IPv4 address. + /* If socket is not yet bound and the socket is connected, + * set the return address family to be the same as the + * connected address, but with 0 address value. If it is not + * connected, set the family to be AF_UNSPEC (value 0) and + * the address size to be that of an IPv4 address. */ if (ipv6_addr_any(&rs->rs_bound_addr)) { - sin = (struct sockaddr_in *)uaddr; - memset(sin, 0, sizeof(*sin)); - sin->sin_family = AF_UNSPEC; - return sizeof(*sin); + if (ipv6_addr_any(&rs->rs_conn_addr)) { + sin = (struct sockaddr_in *)uaddr; + memset(sin, 0, sizeof(*sin)); + sin->sin_family = AF_UNSPEC; + return sizeof(*sin); + } + + if (ipv6_addr_type(&rs->rs_conn_addr) & + IPV6_ADDR_MAPPED) { + sin = (struct sockaddr_in *)uaddr; + memset(sin, 0, sizeof(*sin)); + sin->sin_family = AF_INET; + return sizeof(*sin); + } + + sin6 = (struct sockaddr_in6 *)uaddr; + memset(sin6, 0, sizeof(*sin6)); + sin6->sin6_family = AF_INET6; + return sizeof(*sin6); } if (ipv6_addr_v4mapped(&rs->rs_bound_addr)) { sin = (struct sockaddr_in *)uaddr; @@ -484,16 +501,18 @@ static int rds_connect(struct socket *sock, struct sockaddr *uaddr, { struct sock *sk = sock->sk; struct sockaddr_in *sin; + struct sockaddr_in6 *sin6; struct rds_sock *rs = rds_sk_to_rs(sk); + int addr_type; int ret = 0; lock_sock(sk); - switch (addr_len) { - case sizeof(struct sockaddr_in): + switch (uaddr->sa_family) { + case AF_INET: sin = (struct sockaddr_in *)uaddr; - if (sin->sin_family != AF_INET) { - ret = -EAFNOSUPPORT; + if (addr_len < sizeof(struct sockaddr_in)) { + ret = -EINVAL; break; } if (sin->sin_addr.s_addr == htonl(INADDR_ANY)) { @@ -509,12 +528,56 @@ static int rds_connect(struct socket *sock, struct sockaddr *uaddr, rs->rs_conn_port = sin->sin_port; break; - case sizeof(struct sockaddr_in6): - ret = -EPROTONOSUPPORT; + case AF_INET6: + sin6 = (struct sockaddr_in6 *)uaddr; + if (addr_len < sizeof(struct sockaddr_in6)) { + ret = -EINVAL; + break; + } + addr_type = ipv6_addr_type(&sin6->sin6_addr); + if (!(addr_type & IPV6_ADDR_UNICAST)) { + __be32 addr4; + + if (!(addr_type & IPV6_ADDR_MAPPED)) { + ret = -EPROTOTYPE; + break; + } + + /* It is a mapped address. Need to do some sanity + * checks. + */ + addr4 = sin6->sin6_addr.s6_addr32[3]; + if (addr4 == htonl(INADDR_ANY) || + addr4 == htonl(INADDR_BROADCAST) || + IN_MULTICAST(ntohl(addr4))) { + ret = -EPROTOTYPE; + break; + } + } + + if (addr_type & IPV6_ADDR_LINKLOCAL) { + /* If socket is arleady bound to a link local address, + * the peer address must be on the same link. + */ + if (sin6->sin6_scope_id == 0 || + (!ipv6_addr_any(&rs->rs_bound_addr) && + rs->rs_bound_scope_id && + sin6->sin6_scope_id != rs->rs_bound_scope_id)) { + ret = -EINVAL; + break; + } + /* Remember the connected address scope ID. It will + * be checked against the binding local address when + * the socket is bound. + */ + rs->rs_bound_scope_id = sin6->sin6_scope_id; + } + rs->rs_conn_addr = sin6->sin6_addr; + rs->rs_conn_port = sin6->sin6_port; break; default: - ret = -EINVAL; + ret = -EAFNOSUPPORT; break; } diff --git a/net/rds/bind.c b/net/rds/bind.c index c401776ad938..ba778760cbc2 100644 --- a/net/rds/bind.c +++ b/net/rds/bind.c @@ -127,9 +127,10 @@ static int rds_add_bound(struct rds_sock *rs, const struct in6_addr *addr, if (!rhashtable_insert_fast(&bind_hash_table, &rs->rs_bound_node, ht_parms)) { *port = rs->rs_bound_port; + rs->rs_bound_scope_id = scope_id; ret = 0; - rdsdebug("rs %p binding to %pI4:%d\n", - rs, &addr, (int)ntohs(*port)); + rdsdebug("rs %p binding to %pI6c:%d\n", + rs, addr, (int)ntohs(*port)); break; } else { rs->rs_bound_addr = in6addr_any; @@ -164,23 +165,53 @@ int rds_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) struct in6_addr v6addr, *binding_addr; struct rds_transport *trans; __u32 scope_id = 0; + int addr_type; int ret = 0; __be16 port; - /* We only allow an RDS socket to be bound to an IPv4 address. IPv6 - * address support will be added later. + /* We allow an RDS socket to be bound to either IPv4 or IPv6 + * address. */ - if (addr_len == sizeof(struct sockaddr_in)) { + if (uaddr->sa_family == AF_INET) { struct sockaddr_in *sin = (struct sockaddr_in *)uaddr; - if (sin->sin_family != AF_INET || - sin->sin_addr.s_addr == htonl(INADDR_ANY)) + if (addr_len < sizeof(struct sockaddr_in) || + sin->sin_addr.s_addr == htonl(INADDR_ANY) || + sin->sin_addr.s_addr == htonl(INADDR_BROADCAST) || + IN_MULTICAST(ntohl(sin->sin_addr.s_addr))) return -EINVAL; ipv6_addr_set_v4mapped(sin->sin_addr.s_addr, &v6addr); binding_addr = &v6addr; port = sin->sin_port; - } else if (addr_len == sizeof(struct sockaddr_in6)) { - return -EPROTONOSUPPORT; + } else if (uaddr->sa_family == AF_INET6) { + struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)uaddr; + + if (addr_len < sizeof(struct sockaddr_in6)) + return -EINVAL; + addr_type = ipv6_addr_type(&sin6->sin6_addr); + if (!(addr_type & IPV6_ADDR_UNICAST)) { + __be32 addr4; + + if (!(addr_type & IPV6_ADDR_MAPPED)) + return -EINVAL; + + /* It is a mapped address. Need to do some sanity + * checks. + */ + addr4 = sin6->sin6_addr.s6_addr32[3]; + if (addr4 == htonl(INADDR_ANY) || + addr4 == htonl(INADDR_BROADCAST) || + IN_MULTICAST(ntohl(addr4))) + return -EINVAL; + } + /* The scope ID must be specified for link local address. */ + if (addr_type & IPV6_ADDR_LINKLOCAL) { + if (sin6->sin6_scope_id == 0) + return -EINVAL; + scope_id = sin6->sin6_scope_id; + } + binding_addr = &sin6->sin6_addr; + port = sin6->sin6_port; } else { return -EINVAL; } @@ -191,6 +222,16 @@ int rds_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) ret = -EINVAL; goto out; } + /* Socket is connected. The binding address should have the same + * scope ID as the connected address, except the case when one is + * non-link local address (scope_id is 0). + */ + if (!ipv6_addr_any(&rs->rs_conn_addr) && scope_id && + rs->rs_bound_scope_id && + scope_id != rs->rs_bound_scope_id) { + ret = -EINVAL; + goto out; + } ret = rds_add_bound(rs, binding_addr, &port, scope_id); if (ret) diff --git a/net/rds/connection.c b/net/rds/connection.c index 3176ead0ab4d..5c9ceed55dae 100644 --- a/net/rds/connection.c +++ b/net/rds/connection.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006, 2017 Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2006, 2018 Oracle and/or its affiliates. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -36,6 +36,7 @@ #include #include #include +#include #include "rds.h" #include "loop.h" @@ -200,6 +201,15 @@ static struct rds_connection *__rds_conn_create(struct net *net, conn->c_isv6 = !ipv6_addr_v4mapped(laddr); conn->c_faddr = *faddr; conn->c_dev_if = dev_if; + /* If the local address is link local, set c_bound_if to be the + * index used for this connection. Otherwise, set it to 0 as + * the socket is not bound to an interface. c_bound_if is used + * to look up a socket when a packet is received + */ + if (ipv6_addr_type(laddr) & IPV6_ADDR_LINKLOCAL) + conn->c_bound_if = dev_if; + else + conn->c_bound_if = 0; rds_conn_net_set(conn, net); @@ -486,10 +496,18 @@ void rds_conn_destroy(struct rds_connection *conn) } EXPORT_SYMBOL_GPL(rds_conn_destroy); -static void rds_conn_message_info(struct socket *sock, unsigned int len, - struct rds_info_iterator *iter, - struct rds_info_lengths *lens, - int want_send) +static void __rds_inc_msg_cp(struct rds_incoming *inc, + struct rds_info_iterator *iter, + void *saddr, void *daddr, int flip) +{ + rds_inc_info_copy(inc, iter, *(__be32 *)saddr, + *(__be32 *)daddr, flip); +} + +static void rds_conn_message_info_cmn(struct socket *sock, unsigned int len, + struct rds_info_iterator *iter, + struct rds_info_lengths *lens, + int want_send) { struct hlist_head *head; struct list_head *list; @@ -524,18 +542,13 @@ static void rds_conn_message_info(struct socket *sock, unsigned int len, /* XXX too lazy to maintain counts.. */ list_for_each_entry(rm, list, m_conn_item) { - __be32 laddr; - __be32 faddr; - total++; - laddr = conn->c_laddr.s6_addr32[3]; - faddr = conn->c_faddr.s6_addr32[3]; if (total <= len) - rds_inc_info_copy(&rm->m_inc, - iter, - laddr, - faddr, - 0); + __rds_inc_msg_cp(&rm->m_inc, + iter, + &conn->c_laddr, + &conn->c_faddr, + 0); } spin_unlock_irqrestore(&cp->cp_lock, flags); @@ -548,6 +561,14 @@ static void rds_conn_message_info(struct socket *sock, unsigned int len, lens->each = sizeof(struct rds_info_message); } +static void rds_conn_message_info(struct socket *sock, unsigned int len, + struct rds_info_iterator *iter, + struct rds_info_lengths *lens, + int want_send) +{ + rds_conn_message_info_cmn(sock, len, iter, lens, want_send); +} + static void rds_conn_message_info_send(struct socket *sock, unsigned int len, struct rds_info_iterator *iter, struct rds_info_lengths *lens) @@ -655,6 +676,9 @@ static int rds_conn_info_visitor(struct rds_conn_path *cp, void *buffer) struct rds_info_connection *cinfo = buffer; struct rds_connection *conn = cp->cp_conn; + if (conn->c_isv6) + return 0; + cinfo->next_tx_seq = cp->cp_next_tx_seq; cinfo->next_rx_seq = cp->cp_next_rx_seq; cinfo->laddr = conn->c_laddr.s6_addr32[3]; diff --git a/net/rds/ib.c b/net/rds/ib.c index c712a848957d..756225c5540f 100644 --- a/net/rds/ib.c +++ b/net/rds/ib.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006, 2017 Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2006, 2018 Oracle and/or its affiliates. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -39,6 +39,7 @@ #include #include #include +#include #include "rds_single_path.h" #include "rds.h" @@ -295,6 +296,8 @@ static int rds_ib_conn_info_visitor(struct rds_connection *conn, /* We will only ever look at IB transports */ if (conn->c_trans != &rds_ib_transport) return 0; + if (conn->c_isv6) + return 0; iinfo->src_addr = conn->c_laddr.s6_addr32[3]; iinfo->dst_addr = conn->c_faddr.s6_addr32[3]; @@ -330,7 +333,6 @@ static void rds_ib_ic_info(struct socket *sock, unsigned int len, sizeof(struct rds_info_rdma_connection)); } - /* * Early RDS/IB was built to only bind to an address if there is an IPoIB * device with that address set. @@ -346,8 +348,12 @@ static int rds_ib_laddr_check(struct net *net, const struct in6_addr *addr, { int ret; struct rdma_cm_id *cm_id; + struct sockaddr_in6 sin6; struct sockaddr_in sin; + struct sockaddr *sa; + bool isv4; + isv4 = ipv6_addr_v4mapped(addr); /* Create a CMA ID and try to bind it. This catches both * IB and iWARP capable NICs. */ @@ -356,20 +362,53 @@ static int rds_ib_laddr_check(struct net *net, const struct in6_addr *addr, if (IS_ERR(cm_id)) return PTR_ERR(cm_id); - memset(&sin, 0, sizeof(sin)); - sin.sin_family = AF_INET; - sin.sin_addr.s_addr = addr->s6_addr32[3]; + if (isv4) { + memset(&sin, 0, sizeof(sin)); + sin.sin_family = AF_INET; + sin.sin_addr.s_addr = addr->s6_addr32[3]; + sa = (struct sockaddr *)&sin; + } else { + memset(&sin6, 0, sizeof(sin6)); + sin6.sin6_family = AF_INET6; + sin6.sin6_addr = *addr; + sin6.sin6_scope_id = scope_id; + sa = (struct sockaddr *)&sin6; + + /* XXX Do a special IPv6 link local address check here. The + * reason is that rdma_bind_addr() always succeeds with IPv6 + * link local address regardless it is indeed configured in a + * system. + */ + if (ipv6_addr_type(addr) & IPV6_ADDR_LINKLOCAL) { + struct net_device *dev; + + if (scope_id == 0) + return -EADDRNOTAVAIL; + + /* Use init_net for now as RDS is not network + * name space aware. + */ + dev = dev_get_by_index(&init_net, scope_id); + if (!dev) + return -EADDRNOTAVAIL; + if (!ipv6_chk_addr(&init_net, addr, dev, 1)) { + dev_put(dev); + return -EADDRNOTAVAIL; + } + dev_put(dev); + } + } /* rdma_bind_addr will only succeed for IB & iWARP devices */ - ret = rdma_bind_addr(cm_id, (struct sockaddr *)&sin); + ret = rdma_bind_addr(cm_id, sa); /* due to this, we will claim to support iWARP devices unless we check node_type. */ if (ret || !cm_id->device || cm_id->device->node_type != RDMA_NODE_IB_CA) ret = -EADDRNOTAVAIL; - rdsdebug("addr %pI6c ret %d node type %d\n", - addr, ret, + rdsdebug("addr %pI6c%%%u ret %d node type %d\n", + addr, scope_id, ret, cm_id->device ? cm_id->device->node_type : -1); rdma_destroy_id(cm_id); diff --git a/net/rds/ib_cm.c b/net/rds/ib_cm.c index dd8a867e5a9c..a33b82dc0804 100644 --- a/net/rds/ib_cm.c +++ b/net/rds/ib_cm.c @@ -678,7 +678,7 @@ static u32 rds_ib_protocol_compatible(struct rdma_cm_event *event, bool isv6) return version; } -/* Given an IPv6 address, find the IB net_device which hosts that address and +/* Given an IPv6 address, find the net_device which hosts that address and * return its index. This is used by the rds_ib_cm_handle_connect() code to * find the interface index of where an incoming request comes from when * the request is using a link local address. @@ -695,8 +695,7 @@ static u32 __rds_find_ifindex(struct net *net, const struct in6_addr *addr) rcu_read_lock(); for_each_netdev_rcu(net, dev) { - if (dev->type == ARPHRD_INFINIBAND && - ipv6_chk_addr(net, addr, dev, 0)) { + if (ipv6_chk_addr(net, addr, dev, 1)) { idx = dev->ifindex; break; } @@ -736,7 +735,7 @@ int rds_ib_cm_handle_connect(struct rdma_cm_id *cm_id, dp_cmn = &dp->ricp_v6.dp_cmn; saddr6 = &dp->ricp_v6.dp_saddr; daddr6 = &dp->ricp_v6.dp_daddr; - /* If the local address is link local, need to find the + /* If either address is link local, need to find the * interface index in order to create a proper RDS * connection. */ @@ -748,6 +747,14 @@ int rds_ib_cm_handle_connect(struct rdma_cm_id *cm_id, err = -EOPNOTSUPP; goto out; } + } else if (ipv6_addr_type(saddr6) & IPV6_ADDR_LINKLOCAL) { + /* Use our address to find the correct index. */ + ifindex = __rds_find_ifindex(&init_net, daddr6); + /* No index found... Need to bail out. */ + if (ifindex == 0) { + err = -EOPNOTSUPP; + goto out; + } } } else { dp_cmn = &dp->ricp_v4.dp_cmn; @@ -886,7 +893,10 @@ int rds_ib_conn_path_connect(struct rds_conn_path *cp) /* XXX I wonder what affect the port space has */ /* delegate cm event handler to rdma_transport */ - handler = rds_rdma_cm_event_handler; + if (conn->c_isv6) + handler = rds6_rdma_cm_event_handler; + else + handler = rds_rdma_cm_event_handler; ic->i_cm_id = rdma_create_id(&init_net, handler, conn, RDMA_PS_TCP, IB_QPT_RC); if (IS_ERR(ic->i_cm_id)) { diff --git a/net/rds/rdma_transport.c b/net/rds/rdma_transport.c index f49abef69550..bd67e55354f4 100644 --- a/net/rds/rdma_transport.c +++ b/net/rds/rdma_transport.c @@ -37,7 +37,9 @@ #include "rdma_transport.h" #include "ib.h" +/* Global IPv4 and IPv6 RDS RDMA listener cm_id */ static struct rdma_cm_id *rds_rdma_listen_id; +static struct rdma_cm_id *rds6_rdma_listen_id; static int rds_rdma_cm_event_handler_cmn(struct rdma_cm_id *cm_id, struct rdma_cm_event *event, @@ -153,6 +155,12 @@ int rds_rdma_cm_event_handler(struct rdma_cm_id *cm_id, return rds_rdma_cm_event_handler_cmn(cm_id, event, false); } +int rds6_rdma_cm_event_handler(struct rdma_cm_id *cm_id, + struct rdma_cm_event *event) +{ + return rds_rdma_cm_event_handler_cmn(cm_id, event, true); +} + static int rds_rdma_listen_init_common(rdma_cm_event_handler handler, struct sockaddr *sa, struct rdma_cm_id **ret_cm_id) @@ -206,6 +214,7 @@ out: static int rds_rdma_listen_init(void) { int ret; + struct sockaddr_in6 sin6; struct sockaddr_in sin; sin.sin_family = PF_INET; @@ -214,7 +223,21 @@ static int rds_rdma_listen_init(void) ret = rds_rdma_listen_init_common(rds_rdma_cm_event_handler, (struct sockaddr *)&sin, &rds_rdma_listen_id); - return ret; + if (ret != 0) + return ret; + + sin6.sin6_family = PF_INET6; + sin6.sin6_addr = in6addr_any; + sin6.sin6_port = htons(RDS_CM_PORT); + sin6.sin6_scope_id = 0; + sin6.sin6_flowinfo = 0; + ret = rds_rdma_listen_init_common(rds6_rdma_cm_event_handler, + (struct sockaddr *)&sin6, + &rds6_rdma_listen_id); + /* Keep going even when IPv6 is not enabled in the system. */ + if (ret != 0) + rdsdebug("Cannot set up IPv6 RDMA listener\n"); + return 0; } static void rds_rdma_listen_stop(void) @@ -224,6 +247,11 @@ static void rds_rdma_listen_stop(void) rdma_destroy_id(rds_rdma_listen_id); rds_rdma_listen_id = NULL; } + if (rds6_rdma_listen_id) { + rdsdebug("cm %p\n", rds6_rdma_listen_id); + rdma_destroy_id(rds6_rdma_listen_id); + rds6_rdma_listen_id = NULL; + } } static int rds_rdma_init(void) diff --git a/net/rds/rdma_transport.h b/net/rds/rdma_transport.h index d309c4430124..200d3134aaae 100644 --- a/net/rds/rdma_transport.h +++ b/net/rds/rdma_transport.h @@ -6,11 +6,16 @@ #include #include "rds.h" +/* RDMA_CM also uses 16385 as the listener port. */ +#define RDS_CM_PORT 16385 + #define RDS_RDMA_RESOLVE_TIMEOUT_MS 5000 int rds_rdma_conn_connect(struct rds_connection *conn); int rds_rdma_cm_event_handler(struct rdma_cm_id *cm_id, struct rdma_cm_event *event); +int rds6_rdma_cm_event_handler(struct rdma_cm_id *cm_id, + struct rdma_cm_event *event); /* from ib.c */ extern struct rds_transport rds_ib_transport; diff --git a/net/rds/rds.h b/net/rds/rds.h index 1bff26988a5e..ff537bb11411 100644 --- a/net/rds/rds.h +++ b/net/rds/rds.h @@ -24,14 +24,15 @@ #define RDS_PROTOCOL_MINOR(v) ((v) & 255) #define RDS_PROTOCOL(maj, min) (((maj) << 8) | min) -/* - * XXX randomly chosen, but at least seems to be unused: - * # 18464-18768 Unassigned - * We should do better. We want a reserved port to discourage unpriv'ed - * userspace from listening. +/* The following ports, 16385, 18634, 18635, are registered with IANA as + * the ports to be used for RDS over TCP and UDP. Currently, only RDS over + * TCP and RDS over IB/RDMA are implemented. 18634 is the historical value + * used for the RDMA_CM listener port. RDS/TCP uses port 16385. After + * IPv6 work, RDMA_CM also uses 16385 as the listener port. 18634 is kept + * to ensure compatibility with older RDS modules. Those ports are defined + * in each transport's header file. */ #define RDS_PORT 18634 -#define RDS_CM_PORT 16385 #ifdef ATOMIC64_INIT #define KERNEL_HAS_ATOMIC64 @@ -140,7 +141,8 @@ struct rds_connection { struct hlist_node c_hash_node; struct in6_addr c_laddr; struct in6_addr c_faddr; - int c_dev_if; /* c_laddrs's interface index */ + int c_dev_if; /* ifindex used for this conn */ + int c_bound_if; /* ifindex of c_laddr */ unsigned int c_loopback:1, c_isv6:1, c_ping_triggered:1, @@ -736,7 +738,7 @@ void rds_cong_remove_socket(struct rds_sock *); void rds_cong_exit(void); struct rds_message *rds_cong_update_alloc(struct rds_connection *conn); -/* conn.c */ +/* connection.c */ extern u32 rds_gen_num; int rds_conn_init(void); void rds_conn_exit(void); @@ -874,6 +876,10 @@ int rds_notify_queue_get(struct rds_sock *rs, struct msghdr *msg); void rds_inc_info_copy(struct rds_incoming *inc, struct rds_info_iterator *iter, __be32 saddr, __be32 daddr, int flip); +void rds6_inc_info_copy(struct rds_incoming *inc, + struct rds_info_iterator *iter, + struct in6_addr *saddr, struct in6_addr *daddr, + int flip); /* send.c */ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len); diff --git a/net/rds/recv.c b/net/rds/recv.c index 4217961fd130..1402c21210b1 100644 --- a/net/rds/recv.c +++ b/net/rds/recv.c @@ -364,7 +364,7 @@ void rds_recv_incoming(struct rds_connection *conn, struct in6_addr *saddr, goto out; } - rs = rds_find_bound(daddr, inc->i_hdr.h_dport, conn->c_dev_if); + rs = rds_find_bound(daddr, inc->i_hdr.h_dport, conn->c_bound_if); if (!rs) { rds_stats_inc(s_recv_drop_no_sock); goto out; diff --git a/net/rds/send.c b/net/rds/send.c index 6ed2e925c36a..9604e1faa564 100644 --- a/net/rds/send.c +++ b/net/rds/send.c @@ -1091,10 +1091,9 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len) ret = -EINVAL; goto out; } - switch (namelen) { - case sizeof(*usin): - if (usin->sin_family != AF_INET || - usin->sin_addr.s_addr == htonl(INADDR_ANY) || + switch (usin->sin_family) { + case AF_INET: + if (usin->sin_addr.s_addr == htonl(INADDR_ANY) || usin->sin_addr.s_addr == htonl(INADDR_BROADCAST) || IN_MULTICAST(ntohl(usin->sin_addr.s_addr))) { ret = -EINVAL; @@ -1104,9 +1103,44 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len) dport = usin->sin_port; break; - case sizeof(*sin6): { - ret = -EPROTONOSUPPORT; - goto out; + case AF_INET6: { + int addr_type; + + if (namelen < sizeof(*sin6)) { + ret = -EINVAL; + goto out; + } + addr_type = ipv6_addr_type(&sin6->sin6_addr); + if (!(addr_type & IPV6_ADDR_UNICAST)) { + __be32 addr4; + + if (!(addr_type & IPV6_ADDR_MAPPED)) { + ret = -EINVAL; + goto out; + } + + /* It is a mapped address. Need to do some + * sanity checks. + */ + addr4 = sin6->sin6_addr.s6_addr32[3]; + if (addr4 == htonl(INADDR_ANY) || + addr4 == htonl(INADDR_BROADCAST) || + IN_MULTICAST(ntohl(addr4))) { + return -EINVAL; + goto out; + } + } + if (addr_type & IPV6_ADDR_LINKLOCAL) { + if (sin6->sin6_scope_id == 0) { + ret = -EINVAL; + goto out; + } + scope_id = sin6->sin6_scope_id; + } + + daddr = sin6->sin6_addr; + dport = sin6->sin6_port; + break; } default: @@ -1138,6 +1172,19 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len) ret = -EOPNOTSUPP; goto out; } + /* If the socket is already bound to a link local address, + * it can only send to peers on the same link. But allow + * communicating beween link local and non-link local address. + */ + if (scope_id != rs->rs_bound_scope_id) { + if (!scope_id) { + scope_id = rs->rs_bound_scope_id; + } else if (rs->rs_bound_scope_id) { + release_sock(sk); + ret = -EINVAL; + goto out; + } + } } release_sock(sk); diff --git a/net/rds/tcp.c b/net/rds/tcp.c index dadb33790333..890d0e1d8908 100644 --- a/net/rds/tcp.c +++ b/net/rds/tcp.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006, 2017 Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2006, 2018 Oracle and/or its affiliates. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -46,7 +46,12 @@ /* only for info exporting */ static DEFINE_SPINLOCK(rds_tcp_tc_list_lock); static LIST_HEAD(rds_tcp_tc_list); + +/* rds_tcp_tc_count counts only IPv4 connections. + * rds6_tcp_tc_count counts both IPv4 and IPv6 connections. + */ static unsigned int rds_tcp_tc_count; +static unsigned int rds6_tcp_tc_count; /* Track rds_tcp_connection structs so they can be cleaned up */ static DEFINE_SPINLOCK(rds_tcp_conn_lock); @@ -113,7 +118,9 @@ void rds_tcp_restore_callbacks(struct socket *sock, /* done under the callback_lock to serialize with write_space */ spin_lock(&rds_tcp_tc_list_lock); list_del_init(&tc->t_list_item); - rds_tcp_tc_count--; + rds6_tcp_tc_count--; + if (!tc->t_cpath->cp_conn->c_isv6) + rds_tcp_tc_count--; spin_unlock(&rds_tcp_tc_list_lock); tc->t_sock = NULL; @@ -200,7 +207,9 @@ void rds_tcp_set_callbacks(struct socket *sock, struct rds_conn_path *cp) /* done under the callback_lock to serialize with write_space */ spin_lock(&rds_tcp_tc_list_lock); list_add_tail(&tc->t_list_item, &rds_tcp_tc_list); - rds_tcp_tc_count++; + rds6_tcp_tc_count++; + if (!tc->t_cpath->cp_conn->c_isv6) + rds_tcp_tc_count++; spin_unlock(&rds_tcp_tc_list_lock); /* accepted sockets need our listen data ready undone */ @@ -221,6 +230,9 @@ void rds_tcp_set_callbacks(struct socket *sock, struct rds_conn_path *cp) write_unlock_bh(&sock->sk->sk_callback_lock); } +/* Handle RDS_INFO_TCP_SOCKETS socket option. It only returns IPv4 + * connections for backward compatibility. + */ static void rds_tcp_tc_info(struct socket *rds_sock, unsigned int len, struct rds_info_iterator *iter, struct rds_info_lengths *lens) @@ -228,8 +240,6 @@ static void rds_tcp_tc_info(struct socket *rds_sock, unsigned int len, struct rds_info_tcp_socket tsinfo; struct rds_tcp_connection *tc; unsigned long flags; - struct sockaddr_in sin; - struct socket *sock; spin_lock_irqsave(&rds_tcp_tc_list_lock, flags); @@ -237,16 +247,15 @@ static void rds_tcp_tc_info(struct socket *rds_sock, unsigned int len, goto out; list_for_each_entry(tc, &rds_tcp_tc_list, t_list_item) { + struct inet_sock *inet = inet_sk(tc->t_sock->sk); - sock = tc->t_sock; - if (sock) { - sock->ops->getname(sock, (struct sockaddr *)&sin, 0); - tsinfo.local_addr = sin.sin_addr.s_addr; - tsinfo.local_port = sin.sin_port; - sock->ops->getname(sock, (struct sockaddr *)&sin, 1); - tsinfo.peer_addr = sin.sin_addr.s_addr; - tsinfo.peer_port = sin.sin_port; - } + if (tc->t_cpath->cp_conn->c_isv6) + continue; + + tsinfo.local_addr = inet->inet_saddr; + tsinfo.local_port = inet->inet_sport; + tsinfo.peer_addr = inet->inet_daddr; + tsinfo.peer_port = inet->inet_dport; tsinfo.hdr_rem = tc->t_tinc_hdr_rem; tsinfo.data_rem = tc->t_tinc_data_rem; @@ -494,13 +503,18 @@ static __net_init int rds_tcp_init_net(struct net *net) err = -ENOMEM; goto fail; } - rtn->rds_tcp_listen_sock = rds_tcp_listen_init(net); + rtn->rds_tcp_listen_sock = rds_tcp_listen_init(net, true); if (!rtn->rds_tcp_listen_sock) { - pr_warn("could not set up listen sock\n"); - unregister_net_sysctl_table(rtn->rds_tcp_sysctl); - rtn->rds_tcp_sysctl = NULL; - err = -EAFNOSUPPORT; - goto fail; + pr_warn("could not set up IPv6 listen sock\n"); + + /* Try IPv4 as some systems disable IPv6 */ + rtn->rds_tcp_listen_sock = rds_tcp_listen_init(net, false); + if (!rtn->rds_tcp_listen_sock) { + unregister_net_sysctl_table(rtn->rds_tcp_sysctl); + rtn->rds_tcp_sysctl = NULL; + err = -EAFNOSUPPORT; + goto fail; + } } INIT_WORK(&rtn->rds_tcp_accept_w, rds_tcp_accept_worker); return 0; diff --git a/net/rds/tcp.h b/net/rds/tcp.h index c6fa080e9b6d..3c69361d21c7 100644 --- a/net/rds/tcp.h +++ b/net/rds/tcp.h @@ -67,7 +67,7 @@ void rds_tcp_conn_path_shutdown(struct rds_conn_path *conn); void rds_tcp_state_change(struct sock *sk); /* tcp_listen.c */ -struct socket *rds_tcp_listen_init(struct net *); +struct socket *rds_tcp_listen_init(struct net *net, bool isv6); void rds_tcp_listen_stop(struct socket *sock, struct work_struct *acceptor); void rds_tcp_listen_data_ready(struct sock *sk); int rds_tcp_accept_one(struct socket *sock); diff --git a/net/rds/tcp_connect.c b/net/rds/tcp_connect.c index 231ae927858e..008f50fb25dd 100644 --- a/net/rds/tcp_connect.c +++ b/net/rds/tcp_connect.c @@ -89,9 +89,11 @@ out: int rds_tcp_conn_path_connect(struct rds_conn_path *cp) { struct socket *sock = NULL; + struct sockaddr_in6 sin6; struct sockaddr_in sin; struct sockaddr *addr; int addrlen; + bool isv6; int ret; struct rds_connection *conn = cp->cp_conn; struct rds_tcp_connection *tc = cp->cp_transport_data; @@ -108,18 +110,36 @@ int rds_tcp_conn_path_connect(struct rds_conn_path *cp) mutex_unlock(&tc->t_conn_path_lock); return 0; } - ret = sock_create_kern(rds_conn_net(conn), PF_INET, - SOCK_STREAM, IPPROTO_TCP, &sock); + if (ipv6_addr_v4mapped(&conn->c_laddr)) { + ret = sock_create_kern(rds_conn_net(conn), PF_INET, + SOCK_STREAM, IPPROTO_TCP, &sock); + isv6 = false; + } else { + ret = sock_create_kern(rds_conn_net(conn), PF_INET6, + SOCK_STREAM, IPPROTO_TCP, &sock); + isv6 = true; + } + if (ret < 0) goto out; rds_tcp_tune(sock); - sin.sin_family = AF_INET; - sin.sin_addr.s_addr = conn->c_laddr.s6_addr32[3]; - sin.sin_port = 0; - addr = (struct sockaddr *)&sin; - addrlen = sizeof(sin); + if (isv6) { + sin6.sin6_family = AF_INET6; + sin6.sin6_addr = conn->c_laddr; + sin6.sin6_port = 0; + sin6.sin6_flowinfo = 0; + sin6.sin6_scope_id = conn->c_dev_if; + addr = (struct sockaddr *)&sin6; + addrlen = sizeof(sin6); + } else { + sin.sin_family = AF_INET; + sin.sin_addr.s_addr = conn->c_laddr.s6_addr32[3]; + sin.sin_port = 0; + addr = (struct sockaddr *)&sin; + addrlen = sizeof(sin); + } ret = sock->ops->bind(sock, addr, addrlen); if (ret) { @@ -128,11 +148,21 @@ int rds_tcp_conn_path_connect(struct rds_conn_path *cp) goto out; } - sin.sin_family = AF_INET; - sin.sin_addr.s_addr = conn->c_faddr.s6_addr32[3]; - sin.sin_port = htons(RDS_TCP_PORT); - addr = (struct sockaddr *)&sin; - addrlen = sizeof(sin); + if (isv6) { + sin6.sin6_family = AF_INET6; + sin6.sin6_addr = conn->c_faddr; + sin6.sin6_port = htons(RDS_TCP_PORT); + sin6.sin6_flowinfo = 0; + sin6.sin6_scope_id = conn->c_dev_if; + addr = (struct sockaddr *)&sin6; + addrlen = sizeof(sin6); + } else { + sin.sin_family = AF_INET; + sin.sin_addr.s_addr = conn->c_faddr.s6_addr32[3]; + sin.sin_port = htons(RDS_TCP_PORT); + addr = (struct sockaddr *)&sin; + addrlen = sizeof(sin); + } /* * once we call connect() we can start getting callbacks and they diff --git a/net/rds/tcp_listen.c b/net/rds/tcp_listen.c index 4fdf5b3a47df..0cf0147117d8 100644 --- a/net/rds/tcp_listen.c +++ b/net/rds/tcp_listen.c @@ -131,6 +131,8 @@ int rds_tcp_accept_one(struct socket *sock) struct rds_tcp_connection *rs_tcp = NULL; int conn_state; struct rds_conn_path *cp; + struct in6_addr *my_addr, *peer_addr; + int dev_if; if (!sock) /* module unload or netns delete in progress */ return -ENETUNREACH; @@ -163,15 +165,29 @@ int rds_tcp_accept_one(struct socket *sock) inet = inet_sk(new_sock->sk); + my_addr = &new_sock->sk->sk_v6_rcv_saddr; + peer_addr = &new_sock->sk->sk_v6_daddr; rdsdebug("accepted tcp %pI6c:%u -> %pI6c:%u\n", - &new_sock->sk->sk_v6_rcv_saddr, ntohs(inet->inet_sport), - &new_sock->sk->sk_v6_daddr, ntohs(inet->inet_dport)); + my_addr, ntohs(inet->inet_sport), + peer_addr, ntohs(inet->inet_dport)); + /* sk_bound_dev_if is not set if the peer address is not link local + * address. In this case, it happens that mcast_oif is set. So + * just use it. + */ + if ((ipv6_addr_type(my_addr) & IPV6_ADDR_LINKLOCAL) && + !(ipv6_addr_type(peer_addr) & IPV6_ADDR_LINKLOCAL)) { + struct ipv6_pinfo *inet6; + + inet6 = inet6_sk(new_sock->sk); + dev_if = inet6->mcast_oif; + } else { + dev_if = new_sock->sk->sk_bound_dev_if; + } conn = rds_conn_create(sock_net(sock->sk), &new_sock->sk->sk_v6_rcv_saddr, &new_sock->sk->sk_v6_daddr, - &rds_tcp_transport, GFP_KERNEL, - new_sock->sk->sk_bound_dev_if); + &rds_tcp_transport, GFP_KERNEL, dev_if); if (IS_ERR(conn)) { ret = PTR_ERR(conn); @@ -256,15 +272,22 @@ out: ready(sk); } -struct socket *rds_tcp_listen_init(struct net *net) +struct socket *rds_tcp_listen_init(struct net *net, bool isv6) { - struct sockaddr_in sin; struct socket *sock = NULL; + struct sockaddr_storage ss; + struct sockaddr_in6 *sin6; + struct sockaddr_in *sin; + int addr_len; int ret; - ret = sock_create_kern(net, PF_INET, SOCK_STREAM, IPPROTO_TCP, &sock); - if (ret < 0) + ret = sock_create_kern(net, isv6 ? PF_INET6 : PF_INET, SOCK_STREAM, + IPPROTO_TCP, &sock); + if (ret < 0) { + rdsdebug("could not create %s listener socket: %d\n", + isv6 ? "IPv6" : "IPv4", ret); goto out; + } sock->sk->sk_reuse = SK_CAN_REUSE; rds_tcp_nonagle(sock); @@ -274,13 +297,28 @@ struct socket *rds_tcp_listen_init(struct net *net) sock->sk->sk_data_ready = rds_tcp_listen_data_ready; write_unlock_bh(&sock->sk->sk_callback_lock); - sin.sin_family = PF_INET; - sin.sin_addr.s_addr = (__force u32)htonl(INADDR_ANY); - sin.sin_port = (__force u16)htons(RDS_TCP_PORT); + if (isv6) { + sin6 = (struct sockaddr_in6 *)&ss; + sin6->sin6_family = PF_INET6; + sin6->sin6_addr = in6addr_any; + sin6->sin6_port = (__force u16)htons(RDS_TCP_PORT); + sin6->sin6_scope_id = 0; + sin6->sin6_flowinfo = 0; + addr_len = sizeof(*sin6); + } else { + sin = (struct sockaddr_in *)&ss; + sin->sin_family = PF_INET; + sin->sin_addr.s_addr = INADDR_ANY; + sin->sin_port = (__force u16)htons(RDS_TCP_PORT); + addr_len = sizeof(*sin); + } - ret = sock->ops->bind(sock, (struct sockaddr *)&sin, sizeof(sin)); - if (ret < 0) + ret = sock->ops->bind(sock, (struct sockaddr *)&ss, addr_len); + if (ret < 0) { + rdsdebug("could not bind %s listener socket: %d\n", + isv6 ? "IPv6" : "IPv4", ret); goto out; + } ret = sock->ops->listen(sock, 64); if (ret < 0) -- cgit v1.2.3-59-g8ed1b From e65d4d96334e3ff4fe0064612a93a51c63de08de Mon Sep 17 00:00:00 2001 From: Ka-Cheong Poon Date: Mon, 30 Jul 2018 22:48:42 -0700 Subject: rds: Remove IPv6 dependency This patch removes the IPv6 dependency from RDS. Signed-off-by: Ka-Cheong Poon Acked-by: Santosh Shilimkar Signed-off-by: David S. Miller --- net/rds/Kconfig | 2 +- net/rds/af_rds.c | 32 +++++++++++++++++++------------- net/rds/bind.c | 4 +++- net/rds/connection.c | 26 ++++++++++++++++++++++++-- net/rds/ib.c | 31 ++++++++++++++++++++++++++----- net/rds/ib_cm.c | 9 +++++++++ net/rds/ib_rdma.c | 2 ++ net/rds/rdma_transport.c | 10 ++++++++++ net/rds/recv.c | 2 ++ net/rds/send.c | 2 ++ net/rds/tcp.c | 25 +++++++++++++++++++++++++ net/rds/tcp_listen.c | 21 +++++++++++++++++---- 12 files changed, 140 insertions(+), 26 deletions(-) (limited to 'net/rds/af_rds.c') diff --git a/net/rds/Kconfig b/net/rds/Kconfig index 4c7f2595d919..41f75563b54b 100644 --- a/net/rds/Kconfig +++ b/net/rds/Kconfig @@ -1,7 +1,7 @@ config RDS tristate "The RDS Protocol" - depends on INET && IPV6 + depends on INET ---help--- The RDS (Reliable Datagram Sockets) protocol provides reliable, sequenced delivery of datagrams over Infiniband or TCP. diff --git a/net/rds/af_rds.c b/net/rds/af_rds.c index fc5c48b248fe..65387e1e6964 100644 --- a/net/rds/af_rds.c +++ b/net/rds/af_rds.c @@ -156,18 +156,20 @@ static int rds_getname(struct socket *sock, struct sockaddr *uaddr, return sizeof(*sin); } - if (ipv6_addr_type(&rs->rs_conn_addr) & - IPV6_ADDR_MAPPED) { - sin = (struct sockaddr_in *)uaddr; - memset(sin, 0, sizeof(*sin)); - sin->sin_family = AF_INET; - return sizeof(*sin); +#if IS_ENABLED(CONFIG_IPV6) + if (!(ipv6_addr_type(&rs->rs_conn_addr) & + IPV6_ADDR_MAPPED)) { + sin6 = (struct sockaddr_in6 *)uaddr; + memset(sin6, 0, sizeof(*sin6)); + sin6->sin6_family = AF_INET6; + return sizeof(*sin6); } +#endif - sin6 = (struct sockaddr_in6 *)uaddr; - memset(sin6, 0, sizeof(*sin6)); - sin6->sin6_family = AF_INET6; - return sizeof(*sin6); + sin = (struct sockaddr_in *)uaddr; + memset(sin, 0, sizeof(*sin)); + sin->sin_family = AF_INET; + return sizeof(*sin); } if (ipv6_addr_v4mapped(&rs->rs_bound_addr)) { sin = (struct sockaddr_in *)uaddr; @@ -501,9 +503,7 @@ static int rds_connect(struct socket *sock, struct sockaddr *uaddr, { struct sock *sk = sock->sk; struct sockaddr_in *sin; - struct sockaddr_in6 *sin6; struct rds_sock *rs = rds_sk_to_rs(sk); - int addr_type; int ret = 0; lock_sock(sk); @@ -528,7 +528,11 @@ static int rds_connect(struct socket *sock, struct sockaddr *uaddr, rs->rs_conn_port = sin->sin_port; break; - case AF_INET6: +#if IS_ENABLED(CONFIG_IPV6) + case AF_INET6: { + struct sockaddr_in6 *sin6; + int addr_type; + sin6 = (struct sockaddr_in6 *)uaddr; if (addr_len < sizeof(struct sockaddr_in6)) { ret = -EINVAL; @@ -575,6 +579,8 @@ static int rds_connect(struct socket *sock, struct sockaddr *uaddr, rs->rs_conn_addr = sin6->sin6_addr; rs->rs_conn_port = sin6->sin6_port; break; + } +#endif default: ret = -EAFNOSUPPORT; diff --git a/net/rds/bind.c b/net/rds/bind.c index ba778760cbc2..3ab55784b637 100644 --- a/net/rds/bind.c +++ b/net/rds/bind.c @@ -165,7 +165,6 @@ int rds_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) struct in6_addr v6addr, *binding_addr; struct rds_transport *trans; __u32 scope_id = 0; - int addr_type; int ret = 0; __be16 port; @@ -183,8 +182,10 @@ int rds_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) ipv6_addr_set_v4mapped(sin->sin_addr.s_addr, &v6addr); binding_addr = &v6addr; port = sin->sin_port; +#if IS_ENABLED(CONFIG_IPV6) } else if (uaddr->sa_family == AF_INET6) { struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)uaddr; + int addr_type; if (addr_len < sizeof(struct sockaddr_in6)) return -EINVAL; @@ -212,6 +213,7 @@ int rds_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) } binding_addr = &sin6->sin6_addr; port = sin6->sin6_port; +#endif } else { return -EINVAL; } diff --git a/net/rds/connection.c b/net/rds/connection.c index 051e35c1e7c6..3bd2f4a5a30d 100644 --- a/net/rds/connection.c +++ b/net/rds/connection.c @@ -63,8 +63,12 @@ static struct hlist_head *rds_conn_bucket(const struct in6_addr *laddr, net_get_random_once(&rds6_hash_secret, sizeof(rds6_hash_secret)); lhash = (__force u32)laddr->s6_addr32[3]; +#if IS_ENABLED(CONFIG_IPV6) fhash = __ipv6_addr_jhash(faddr, rds6_hash_secret); - hash = __inet6_ehashfn(lhash, 0, fhash, 0, rds_hash_secret); +#else + fhash = (__force u32)faddr->s6_addr32[3]; +#endif + hash = __inet_ehashfn(lhash, 0, fhash, 0, rds_hash_secret); return &rds_conn_hash[hash & RDS_CONNECTION_HASH_MASK]; } @@ -201,6 +205,8 @@ static struct rds_connection *__rds_conn_create(struct net *net, conn->c_isv6 = !ipv6_addr_v4mapped(laddr); conn->c_faddr = *faddr; conn->c_dev_if = dev_if; + +#if IS_ENABLED(CONFIG_IPV6) /* If the local address is link local, set c_bound_if to be the * index used for this connection. Otherwise, set it to 0 as * the socket is not bound to an interface. c_bound_if is used @@ -209,6 +215,7 @@ static struct rds_connection *__rds_conn_create(struct net *net, if (ipv6_addr_type(laddr) & IPV6_ADDR_LINKLOCAL) conn->c_bound_if = dev_if; else +#endif conn->c_bound_if = 0; rds_conn_net_set(conn, net); @@ -500,9 +507,11 @@ static void __rds_inc_msg_cp(struct rds_incoming *inc, struct rds_info_iterator *iter, void *saddr, void *daddr, int flip, bool isv6) { +#if IS_ENABLED(CONFIG_IPV6) if (isv6) rds6_inc_info_copy(inc, iter, saddr, daddr, flip); else +#endif rds_inc_info_copy(inc, iter, *(__be32 *)saddr, *(__be32 *)daddr, flip); } @@ -581,6 +590,7 @@ static void rds_conn_message_info(struct socket *sock, unsigned int len, rds_conn_message_info_cmn(sock, len, iter, lens, want_send, false); } +#if IS_ENABLED(CONFIG_IPV6) static void rds6_conn_message_info(struct socket *sock, unsigned int len, struct rds_info_iterator *iter, struct rds_info_lengths *lens, @@ -588,6 +598,7 @@ static void rds6_conn_message_info(struct socket *sock, unsigned int len, { rds_conn_message_info_cmn(sock, len, iter, lens, want_send, true); } +#endif static void rds_conn_message_info_send(struct socket *sock, unsigned int len, struct rds_info_iterator *iter, @@ -596,12 +607,14 @@ static void rds_conn_message_info_send(struct socket *sock, unsigned int len, rds_conn_message_info(sock, len, iter, lens, 1); } +#if IS_ENABLED(CONFIG_IPV6) static void rds6_conn_message_info_send(struct socket *sock, unsigned int len, struct rds_info_iterator *iter, struct rds_info_lengths *lens) { rds6_conn_message_info(sock, len, iter, lens, 1); } +#endif static void rds_conn_message_info_retrans(struct socket *sock, unsigned int len, @@ -611,6 +624,7 @@ static void rds_conn_message_info_retrans(struct socket *sock, rds_conn_message_info(sock, len, iter, lens, 0); } +#if IS_ENABLED(CONFIG_IPV6) static void rds6_conn_message_info_retrans(struct socket *sock, unsigned int len, struct rds_info_iterator *iter, @@ -618,6 +632,7 @@ static void rds6_conn_message_info_retrans(struct socket *sock, { rds6_conn_message_info(sock, len, iter, lens, 0); } +#endif void rds_for_each_conn_info(struct socket *sock, unsigned int len, struct rds_info_iterator *iter, @@ -734,6 +749,7 @@ static int rds_conn_info_visitor(struct rds_conn_path *cp, void *buffer) return 1; } +#if IS_ENABLED(CONFIG_IPV6) static int rds6_conn_info_visitor(struct rds_conn_path *cp, void *buffer) { struct rds6_info_connection *cinfo6 = buffer; @@ -761,6 +777,7 @@ static int rds6_conn_info_visitor(struct rds_conn_path *cp, void *buffer) */ return 1; } +#endif static void rds_conn_info(struct socket *sock, unsigned int len, struct rds_info_iterator *iter, @@ -774,6 +791,7 @@ static void rds_conn_info(struct socket *sock, unsigned int len, sizeof(struct rds_info_connection)); } +#if IS_ENABLED(CONFIG_IPV6) static void rds6_conn_info(struct socket *sock, unsigned int len, struct rds_info_iterator *iter, struct rds_info_lengths *lens) @@ -785,6 +803,7 @@ static void rds6_conn_info(struct socket *sock, unsigned int len, buffer, sizeof(struct rds6_info_connection)); } +#endif int rds_conn_init(void) { @@ -807,12 +826,13 @@ int rds_conn_init(void) rds_conn_message_info_send); rds_info_register_func(RDS_INFO_RETRANS_MESSAGES, rds_conn_message_info_retrans); +#if IS_ENABLED(CONFIG_IPV6) rds_info_register_func(RDS6_INFO_CONNECTIONS, rds6_conn_info); rds_info_register_func(RDS6_INFO_SEND_MESSAGES, rds6_conn_message_info_send); rds_info_register_func(RDS6_INFO_RETRANS_MESSAGES, rds6_conn_message_info_retrans); - +#endif return 0; } @@ -830,11 +850,13 @@ void rds_conn_exit(void) rds_conn_message_info_send); rds_info_deregister_func(RDS_INFO_RETRANS_MESSAGES, rds_conn_message_info_retrans); +#if IS_ENABLED(CONFIG_IPV6) rds_info_deregister_func(RDS6_INFO_CONNECTIONS, rds6_conn_info); rds_info_deregister_func(RDS6_INFO_SEND_MESSAGES, rds6_conn_message_info_send); rds_info_deregister_func(RDS6_INFO_RETRANS_MESSAGES, rds6_conn_message_info_retrans); +#endif } /* diff --git a/net/rds/ib.c b/net/rds/ib.c index a4245c42d43b..89c6333ecd39 100644 --- a/net/rds/ib.c +++ b/net/rds/ib.c @@ -321,6 +321,7 @@ static int rds_ib_conn_info_visitor(struct rds_connection *conn, return 1; } +#if IS_ENABLED(CONFIG_IPV6) /* IPv6 version of rds_ib_conn_info_visitor(). */ static int rds6_ib_conn_info_visitor(struct rds_connection *conn, void *buffer) @@ -357,6 +358,7 @@ static int rds6_ib_conn_info_visitor(struct rds_connection *conn, } return 1; } +#endif static void rds_ib_ic_info(struct socket *sock, unsigned int len, struct rds_info_iterator *iter, @@ -370,6 +372,7 @@ static void rds_ib_ic_info(struct socket *sock, unsigned int len, sizeof(struct rds_info_rdma_connection)); } +#if IS_ENABLED(CONFIG_IPV6) /* IPv6 version of rds_ib_ic_info(). */ static void rds6_ib_ic_info(struct socket *sock, unsigned int len, struct rds_info_iterator *iter, @@ -382,6 +385,7 @@ static void rds6_ib_ic_info(struct socket *sock, unsigned int len, buffer, sizeof(struct rds6_info_rdma_connection)); } +#endif /* * Early RDS/IB was built to only bind to an address if there is an IPoIB @@ -398,7 +402,9 @@ static int rds_ib_laddr_check(struct net *net, const struct in6_addr *addr, { int ret; struct rdma_cm_id *cm_id; +#if IS_ENABLED(CONFIG_IPV6) struct sockaddr_in6 sin6; +#endif struct sockaddr_in sin; struct sockaddr *sa; bool isv4; @@ -418,6 +424,7 @@ static int rds_ib_laddr_check(struct net *net, const struct in6_addr *addr, sin.sin_addr.s_addr = addr->s6_addr32[3]; sa = (struct sockaddr *)&sin; } else { +#if IS_ENABLED(CONFIG_IPV6) memset(&sin6, 0, sizeof(sin6)); sin6.sin6_family = AF_INET6; sin6.sin6_addr = *addr; @@ -432,21 +439,30 @@ static int rds_ib_laddr_check(struct net *net, const struct in6_addr *addr, if (ipv6_addr_type(addr) & IPV6_ADDR_LINKLOCAL) { struct net_device *dev; - if (scope_id == 0) - return -EADDRNOTAVAIL; + if (scope_id == 0) { + ret = -EADDRNOTAVAIL; + goto out; + } /* Use init_net for now as RDS is not network * name space aware. */ dev = dev_get_by_index(&init_net, scope_id); - if (!dev) - return -EADDRNOTAVAIL; + if (!dev) { + ret = -EADDRNOTAVAIL; + goto out; + } if (!ipv6_chk_addr(&init_net, addr, dev, 1)) { dev_put(dev); - return -EADDRNOTAVAIL; + ret = -EADDRNOTAVAIL; + goto out; } dev_put(dev); } +#else + ret = -EADDRNOTAVAIL; + goto out; +#endif } /* rdma_bind_addr will only succeed for IB & iWARP devices */ @@ -461,6 +477,7 @@ static int rds_ib_laddr_check(struct net *net, const struct in6_addr *addr, addr, scope_id, ret, cm_id->device ? cm_id->device->node_type : -1); +out: rdma_destroy_id(cm_id); return ret; @@ -491,7 +508,9 @@ void rds_ib_exit(void) rds_ib_set_unloading(); synchronize_rcu(); rds_info_deregister_func(RDS_INFO_IB_CONNECTIONS, rds_ib_ic_info); +#if IS_ENABLED(CONFIG_IPV6) rds_info_deregister_func(RDS6_INFO_IB_CONNECTIONS, rds6_ib_ic_info); +#endif rds_ib_unregister_client(); rds_ib_destroy_nodev_conns(); rds_ib_sysctl_exit(); @@ -553,7 +572,9 @@ int rds_ib_init(void) rds_trans_register(&rds_ib_transport); rds_info_register_func(RDS_INFO_IB_CONNECTIONS, rds_ib_ic_info); +#if IS_ENABLED(CONFIG_IPV6) rds_info_register_func(RDS6_INFO_IB_CONNECTIONS, rds6_ib_ic_info); +#endif goto out; diff --git a/net/rds/ib_cm.c b/net/rds/ib_cm.c index 0d654d99fe41..bfbb31f0c7fd 100644 --- a/net/rds/ib_cm.c +++ b/net/rds/ib_cm.c @@ -678,6 +678,7 @@ static u32 rds_ib_protocol_compatible(struct rdma_cm_event *event, bool isv6) return version; } +#if IS_ENABLED(CONFIG_IPV6) /* Given an IPv6 address, find the net_device which hosts that address and * return its index. This is used by the rds_ib_cm_handle_connect() code to * find the interface index of where an incoming request comes from when @@ -704,6 +705,7 @@ static u32 __rds_find_ifindex(struct net *net, const struct in6_addr *addr) return idx; } +#endif int rds_ib_cm_handle_connect(struct rdma_cm_id *cm_id, struct rdma_cm_event *event, bool isv6) @@ -732,6 +734,7 @@ int rds_ib_cm_handle_connect(struct rdma_cm_id *cm_id, dp = event->param.conn.private_data; if (isv6) { +#if IS_ENABLED(CONFIG_IPV6) dp_cmn = &dp->ricp_v6.dp_cmn; saddr6 = &dp->ricp_v6.dp_saddr; daddr6 = &dp->ricp_v6.dp_daddr; @@ -756,6 +759,10 @@ int rds_ib_cm_handle_connect(struct rdma_cm_id *cm_id, goto out; } } +#else + err = -EOPNOTSUPP; + goto out; +#endif } else { dp_cmn = &dp->ricp_v4.dp_cmn; ipv6_addr_set_v4mapped(dp->ricp_v4.dp_saddr, &s_mapped_addr); @@ -893,9 +900,11 @@ int rds_ib_conn_path_connect(struct rds_conn_path *cp) /* XXX I wonder what affect the port space has */ /* delegate cm event handler to rdma_transport */ +#if IS_ENABLED(CONFIG_IPV6) if (conn->c_isv6) handler = rds6_rdma_cm_event_handler; else +#endif handler = rds_rdma_cm_event_handler; ic->i_cm_id = rdma_create_id(&init_net, handler, conn, RDMA_PS_TCP, IB_QPT_RC); diff --git a/net/rds/ib_rdma.c b/net/rds/ib_rdma.c index e3c8bbbdb43f..99ccafb90410 100644 --- a/net/rds/ib_rdma.c +++ b/net/rds/ib_rdma.c @@ -180,6 +180,7 @@ void rds_ib_get_mr_info(struct rds_ib_device *rds_ibdev, struct rds_info_rdma_co iinfo->rdma_mr_size = pool_1m->fmr_attr.max_pages; } +#if IS_ENABLED(CONFIG_IPV6) void rds6_ib_get_mr_info(struct rds_ib_device *rds_ibdev, struct rds6_info_rdma_connection *iinfo6) { @@ -188,6 +189,7 @@ void rds6_ib_get_mr_info(struct rds_ib_device *rds_ibdev, iinfo6->rdma_mr_max = pool_1m->max_items; iinfo6->rdma_mr_size = pool_1m->fmr_attr.max_pages; } +#endif struct rds_ib_mr *rds_ib_reuse_mr(struct rds_ib_mr_pool *pool) { diff --git a/net/rds/rdma_transport.c b/net/rds/rdma_transport.c index ad78929036ef..6b0f57c83a2a 100644 --- a/net/rds/rdma_transport.c +++ b/net/rds/rdma_transport.c @@ -39,7 +39,9 @@ /* Global IPv4 and IPv6 RDS RDMA listener cm_id */ static struct rdma_cm_id *rds_rdma_listen_id; +#if IS_ENABLED(CONFIG_IPV6) static struct rdma_cm_id *rds6_rdma_listen_id; +#endif static int rds_rdma_cm_event_handler_cmn(struct rdma_cm_id *cm_id, struct rdma_cm_event *event, @@ -155,11 +157,13 @@ int rds_rdma_cm_event_handler(struct rdma_cm_id *cm_id, return rds_rdma_cm_event_handler_cmn(cm_id, event, false); } +#if IS_ENABLED(CONFIG_IPV6) int rds6_rdma_cm_event_handler(struct rdma_cm_id *cm_id, struct rdma_cm_event *event) { return rds_rdma_cm_event_handler_cmn(cm_id, event, true); } +#endif static int rds_rdma_listen_init_common(rdma_cm_event_handler handler, struct sockaddr *sa, @@ -214,7 +218,9 @@ out: static int rds_rdma_listen_init(void) { int ret; +#if IS_ENABLED(CONFIG_IPV6) struct sockaddr_in6 sin6; +#endif struct sockaddr_in sin; sin.sin_family = PF_INET; @@ -226,6 +232,7 @@ static int rds_rdma_listen_init(void) if (ret != 0) return ret; +#if IS_ENABLED(CONFIG_IPV6) sin6.sin6_family = PF_INET6; sin6.sin6_addr = in6addr_any; sin6.sin6_port = htons(RDS_CM_PORT); @@ -237,6 +244,7 @@ static int rds_rdma_listen_init(void) /* Keep going even when IPv6 is not enabled in the system. */ if (ret != 0) rdsdebug("Cannot set up IPv6 RDMA listener\n"); +#endif return 0; } @@ -247,11 +255,13 @@ static void rds_rdma_listen_stop(void) rdma_destroy_id(rds_rdma_listen_id); rds_rdma_listen_id = NULL; } +#if IS_ENABLED(CONFIG_IPV6) if (rds6_rdma_listen_id) { rdsdebug("cm %p\n", rds6_rdma_listen_id); rdma_destroy_id(rds6_rdma_listen_id); rds6_rdma_listen_id = NULL; } +#endif } static int rds_rdma_init(void) diff --git a/net/rds/recv.c b/net/rds/recv.c index 03cd8df54c26..504cd6bcc54c 100644 --- a/net/rds/recv.c +++ b/net/rds/recv.c @@ -793,6 +793,7 @@ void rds_inc_info_copy(struct rds_incoming *inc, rds_info_copy(iter, &minfo, sizeof(minfo)); } +#if IS_ENABLED(CONFIG_IPV6) void rds6_inc_info_copy(struct rds_incoming *inc, struct rds_info_iterator *iter, struct in6_addr *saddr, struct in6_addr *daddr, @@ -817,3 +818,4 @@ void rds6_inc_info_copy(struct rds_incoming *inc, rds_info_copy(iter, &minfo6, sizeof(minfo6)); } +#endif diff --git a/net/rds/send.c b/net/rds/send.c index 18e2b4d3931f..36a5dba56a43 100644 --- a/net/rds/send.c +++ b/net/rds/send.c @@ -1103,6 +1103,7 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len) dport = usin->sin_port; break; +#if IS_ENABLED(CONFIG_IPV6) case AF_INET6: { int addr_type; @@ -1142,6 +1143,7 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len) dport = sin6->sin6_port; break; } +#endif default: ret = -EINVAL; diff --git a/net/rds/tcp.c b/net/rds/tcp.c index f23925af0b8d..2c7b7c352d3e 100644 --- a/net/rds/tcp.c +++ b/net/rds/tcp.c @@ -51,7 +51,9 @@ static LIST_HEAD(rds_tcp_tc_list); * rds6_tcp_tc_count counts both IPv4 and IPv6 connections. */ static unsigned int rds_tcp_tc_count; +#if IS_ENABLED(CONFIG_IPV6) static unsigned int rds6_tcp_tc_count; +#endif /* Track rds_tcp_connection structs so they can be cleaned up */ static DEFINE_SPINLOCK(rds_tcp_conn_lock); @@ -118,7 +120,9 @@ void rds_tcp_restore_callbacks(struct socket *sock, /* done under the callback_lock to serialize with write_space */ spin_lock(&rds_tcp_tc_list_lock); list_del_init(&tc->t_list_item); +#if IS_ENABLED(CONFIG_IPV6) rds6_tcp_tc_count--; +#endif if (!tc->t_cpath->cp_conn->c_isv6) rds_tcp_tc_count--; spin_unlock(&rds_tcp_tc_list_lock); @@ -207,7 +211,9 @@ void rds_tcp_set_callbacks(struct socket *sock, struct rds_conn_path *cp) /* done under the callback_lock to serialize with write_space */ spin_lock(&rds_tcp_tc_list_lock); list_add_tail(&tc->t_list_item, &rds_tcp_tc_list); +#if IS_ENABLED(CONFIG_IPV6) rds6_tcp_tc_count++; +#endif if (!tc->t_cpath->cp_conn->c_isv6) rds_tcp_tc_count++; spin_unlock(&rds_tcp_tc_list_lock); @@ -273,6 +279,7 @@ out: spin_unlock_irqrestore(&rds_tcp_tc_list_lock, flags); } +#if IS_ENABLED(CONFIG_IPV6) /* Handle RDS6_INFO_TCP_SOCKETS socket option. It returns both IPv4 and * IPv6 connections. IPv4 connection address is returned in an IPv4 mapped * address. @@ -314,12 +321,15 @@ out: spin_unlock_irqrestore(&rds_tcp_tc_list_lock, flags); } +#endif static int rds_tcp_laddr_check(struct net *net, const struct in6_addr *addr, __u32 scope_id) { struct net_device *dev = NULL; +#if IS_ENABLED(CONFIG_IPV6) int ret; +#endif if (ipv6_addr_v4mapped(addr)) { if (inet_addr_type(net, addr->s6_addr32[3]) == RTN_LOCAL) @@ -340,9 +350,11 @@ static int rds_tcp_laddr_check(struct net *net, const struct in6_addr *addr, } rcu_read_unlock(); } +#if IS_ENABLED(CONFIG_IPV6) ret = ipv6_chk_addr(net, addr, dev, 0); if (ret) return 0; +#endif return -EADDRNOTAVAIL; } @@ -545,18 +557,27 @@ static __net_init int rds_tcp_init_net(struct net *net) err = -ENOMEM; goto fail; } + +#if IS_ENABLED(CONFIG_IPV6) rtn->rds_tcp_listen_sock = rds_tcp_listen_init(net, true); +#else + rtn->rds_tcp_listen_sock = rds_tcp_listen_init(net, false); +#endif if (!rtn->rds_tcp_listen_sock) { pr_warn("could not set up IPv6 listen sock\n"); +#if IS_ENABLED(CONFIG_IPV6) /* Try IPv4 as some systems disable IPv6 */ rtn->rds_tcp_listen_sock = rds_tcp_listen_init(net, false); if (!rtn->rds_tcp_listen_sock) { +#endif unregister_net_sysctl_table(rtn->rds_tcp_sysctl); rtn->rds_tcp_sysctl = NULL; err = -EAFNOSUPPORT; goto fail; +#if IS_ENABLED(CONFIG_IPV6) } +#endif } INIT_WORK(&rtn->rds_tcp_accept_w, rds_tcp_accept_worker); return 0; @@ -670,7 +691,9 @@ static void rds_tcp_exit(void) rds_tcp_set_unloading(); synchronize_rcu(); rds_info_deregister_func(RDS_INFO_TCP_SOCKETS, rds_tcp_tc_info); +#if IS_ENABLED(CONFIG_IPV6) rds_info_deregister_func(RDS6_INFO_TCP_SOCKETS, rds6_tcp_tc_info); +#endif unregister_pernet_device(&rds_tcp_net_ops); rds_tcp_destroy_conns(); rds_trans_unregister(&rds_tcp_transport); @@ -702,7 +725,9 @@ static int rds_tcp_init(void) rds_trans_register(&rds_tcp_transport); rds_info_register_func(RDS_INFO_TCP_SOCKETS, rds_tcp_tc_info); +#if IS_ENABLED(CONFIG_IPV6) rds_info_register_func(RDS6_INFO_TCP_SOCKETS, rds6_tcp_tc_info); +#endif goto out; out_recv: diff --git a/net/rds/tcp_listen.c b/net/rds/tcp_listen.c index 0cf0147117d8..c12203f646da 100644 --- a/net/rds/tcp_listen.c +++ b/net/rds/tcp_listen.c @@ -132,7 +132,10 @@ int rds_tcp_accept_one(struct socket *sock) int conn_state; struct rds_conn_path *cp; struct in6_addr *my_addr, *peer_addr; - int dev_if; +#if !IS_ENABLED(CONFIG_IPV6) + struct in6_addr saddr, daddr; +#endif + int dev_if = 0; if (!sock) /* module unload or netns delete in progress */ return -ENETUNREACH; @@ -165,12 +168,21 @@ int rds_tcp_accept_one(struct socket *sock) inet = inet_sk(new_sock->sk); +#if IS_ENABLED(CONFIG_IPV6) my_addr = &new_sock->sk->sk_v6_rcv_saddr; peer_addr = &new_sock->sk->sk_v6_daddr; - rdsdebug("accepted tcp %pI6c:%u -> %pI6c:%u\n", +#else + ipv6_addr_set_v4mapped(inet->inet_saddr, &saddr); + ipv6_addr_set_v4mapped(inet->inet_daddr, &daddr); + my_addr = &saddr; + peer_addr = &daddr; +#endif + rdsdebug("accepted family %d tcp %pI6c:%u -> %pI6c:%u\n", + sock->sk->sk_family, my_addr, ntohs(inet->inet_sport), peer_addr, ntohs(inet->inet_dport)); +#if IS_ENABLED(CONFIG_IPV6) /* sk_bound_dev_if is not set if the peer address is not link local * address. In this case, it happens that mcast_oif is set. So * just use it. @@ -184,9 +196,10 @@ int rds_tcp_accept_one(struct socket *sock) } else { dev_if = new_sock->sk->sk_bound_dev_if; } +#endif + conn = rds_conn_create(sock_net(sock->sk), - &new_sock->sk->sk_v6_rcv_saddr, - &new_sock->sk->sk_v6_daddr, + my_addr, peer_addr, &rds_tcp_transport, GFP_KERNEL, dev_if); if (IS_ERR(conn)) { -- cgit v1.2.3-59-g8ed1b