diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2018-08-15 15:04:25 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2018-08-15 15:04:25 -0700 |
commit | 9a76aba02a37718242d7cdc294f0a3901928aa57 (patch) | |
tree | 2040d038f85d2120f21af83b0793efd5af1864e3 /net/rds/af_rds.c | |
parent | x86: i8259: Add missing include file (diff) | |
parent | bpf: test: fix spelling mistake "REUSEEPORT" -> "REUSEPORT" (diff) | |
download | linux-dev-9a76aba02a37718242d7cdc294f0a3901928aa57.tar.xz linux-dev-9a76aba02a37718242d7cdc294f0a3901928aa57.zip |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next
Pull networking updates from David Miller:
"Highlights:
- Gustavo A. R. Silva keeps working on the implicit switch fallthru
changes.
- Support 802.11ax High-Efficiency wireless in cfg80211 et al, From
Luca Coelho.
- Re-enable ASPM in r8169, from Kai-Heng Feng.
- Add virtual XFRM interfaces, which avoids all of the limitations of
existing IPSEC tunnels. From Steffen Klassert.
- Convert GRO over to use a hash table, so that when we have many
flows active we don't traverse a long list during accumluation.
- Many new self tests for routing, TC, tunnels, etc. Too many
contributors to mention them all, but I'm really happy to keep
seeing this stuff.
- Hardware timestamping support for dpaa_eth/fsl-fman from Yangbo Lu.
- Lots of cleanups and fixes in L2TP code from Guillaume Nault.
- Add IPSEC offload support to netdevsim, from Shannon Nelson.
- Add support for slotting with non-uniform distribution to netem
packet scheduler, from Yousuk Seung.
- Add UDP GSO support to mlx5e, from Boris Pismenny.
- Support offloading of Team LAG in NFP, from John Hurley.
- Allow to configure TX queue selection based upon RX queue, from
Amritha Nambiar.
- Support ethtool ring size configuration in aquantia, from Anton
Mikaev.
- Support DSCP and flowlabel per-transport in SCTP, from Xin Long.
- Support list based batching and stack traversal of SKBs, this is
very exciting work. From Edward Cree.
- Busyloop optimizations in vhost_net, from Toshiaki Makita.
- Introduce the ETF qdisc, which allows time based transmissions. IGB
can offload this in hardware. From Vinicius Costa Gomes.
- Add parameter support to devlink, from Moshe Shemesh.
- Several multiplication and division optimizations for BPF JIT in
nfp driver, from Jiong Wang.
- Lots of prepatory work to make more of the packet scheduler layer
lockless, when possible, from Vlad Buslov.
- Add ACK filter and NAT awareness to sch_cake packet scheduler, from
Toke Høiland-Jørgensen.
- Support regions and region snapshots in devlink, from Alex Vesker.
- Allow to attach XDP programs to both HW and SW at the same time on
a given device, with initial support in nfp. From Jakub Kicinski.
- Add TLS RX offload and support in mlx5, from Ilya Lesokhin.
- Use PHYLIB in r8169 driver, from Heiner Kallweit.
- All sorts of changes to support Spectrum 2 in mlxsw driver, from
Ido Schimmel.
- PTP support in mv88e6xxx DSA driver, from Andrew Lunn.
- Make TCP_USER_TIMEOUT socket option more accurate, from Jon
Maxwell.
- Support for templates in packet scheduler classifier, from Jiri
Pirko.
- IPV6 support in RDS, from Ka-Cheong Poon.
- Native tproxy support in nf_tables, from Máté Eckl.
- Maintain IP fragment queue in an rbtree, but optimize properly for
in-order frags. From Peter Oskolkov.
- Improvde handling of ACKs on hole repairs, from Yuchung Cheng"
* git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next: (1996 commits)
bpf: test: fix spelling mistake "REUSEEPORT" -> "REUSEPORT"
hv/netvsc: Fix NULL dereference at single queue mode fallback
net: filter: mark expected switch fall-through
xen-netfront: fix warn message as irq device name has '/'
cxgb4: Add new T5 PCI device ids 0x50af and 0x50b0
net: dsa: mv88e6xxx: missing unlock on error path
rds: fix building with IPV6=m
inet/connection_sock: prefer _THIS_IP_ to current_text_addr
net: dsa: mv88e6xxx: bitwise vs logical bug
net: sock_diag: Fix spectre v1 gadget in __sock_diag_cmd()
ieee802154: hwsim: using right kind of iteration
net: hns3: Add vlan filter setting by ethtool command -K
net: hns3: Set tx ring' tc info when netdev is up
net: hns3: Remove tx ring BD len register in hns3_enet
net: hns3: Fix desc num set to default when setting channel
net: hns3: Fix for phy link issue when using marvell phy driver
net: hns3: Fix for information of phydev lost problem when down/up
net: hns3: Fix for command format parsing error in hclge_is_all_function_id_zero
net: hns3: Add support for serdes loopback selftest
bnxt_en: take coredump_record structure off stack
...
Diffstat (limited to 'net/rds/af_rds.c')
-rw-r--r-- | net/rds/af_rds.c | 205 |
1 files changed, 167 insertions, 38 deletions
diff --git a/net/rds/af_rds.c b/net/rds/af_rds.c index ab751a150f70..65387e1e6964 100644 --- a/net/rds/af_rds.c +++ b/net/rds/af_rds.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006 Oracle. All rights reserved. + * Copyright (c) 2006, 2018 Oracle and/or its affiliates. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -35,6 +35,7 @@ #include <linux/kernel.h> #include <linux/gfp.h> #include <linux/in.h> +#include <linux/ipv6.h> #include <linux/poll.h> #include <net/sock.h> @@ -113,26 +114,82 @@ void rds_wake_sk_sleep(struct rds_sock *rs) static int rds_getname(struct socket *sock, struct sockaddr *uaddr, int peer) { - struct sockaddr_in *sin = (struct sockaddr_in *)uaddr; struct rds_sock *rs = rds_sk_to_rs(sock->sk); - - memset(sin->sin_zero, 0, sizeof(sin->sin_zero)); + struct sockaddr_in6 *sin6; + struct sockaddr_in *sin; + int uaddr_len; /* racey, don't care */ if (peer) { - if (!rs->rs_conn_addr) + if (ipv6_addr_any(&rs->rs_conn_addr)) return -ENOTCONN; - sin->sin_port = rs->rs_conn_port; - sin->sin_addr.s_addr = rs->rs_conn_addr; + if (ipv6_addr_v4mapped(&rs->rs_conn_addr)) { + sin = (struct sockaddr_in *)uaddr; + memset(sin->sin_zero, 0, sizeof(sin->sin_zero)); + sin->sin_family = AF_INET; + sin->sin_port = rs->rs_conn_port; + sin->sin_addr.s_addr = rs->rs_conn_addr_v4; + uaddr_len = sizeof(*sin); + } else { + sin6 = (struct sockaddr_in6 *)uaddr; + sin6->sin6_family = AF_INET6; + sin6->sin6_port = rs->rs_conn_port; + sin6->sin6_addr = rs->rs_conn_addr; + sin6->sin6_flowinfo = 0; + /* scope_id is the same as in the bound address. */ + sin6->sin6_scope_id = rs->rs_bound_scope_id; + uaddr_len = sizeof(*sin6); + } } else { - sin->sin_port = rs->rs_bound_port; - sin->sin_addr.s_addr = rs->rs_bound_addr; + /* If socket is not yet bound and the socket is connected, + * set the return address family to be the same as the + * connected address, but with 0 address value. If it is not + * connected, set the family to be AF_UNSPEC (value 0) and + * the address size to be that of an IPv4 address. + */ + if (ipv6_addr_any(&rs->rs_bound_addr)) { + if (ipv6_addr_any(&rs->rs_conn_addr)) { + sin = (struct sockaddr_in *)uaddr; + memset(sin, 0, sizeof(*sin)); + sin->sin_family = AF_UNSPEC; + return sizeof(*sin); + } + +#if IS_ENABLED(CONFIG_IPV6) + if (!(ipv6_addr_type(&rs->rs_conn_addr) & + IPV6_ADDR_MAPPED)) { + sin6 = (struct sockaddr_in6 *)uaddr; + memset(sin6, 0, sizeof(*sin6)); + sin6->sin6_family = AF_INET6; + return sizeof(*sin6); + } +#endif + + sin = (struct sockaddr_in *)uaddr; + memset(sin, 0, sizeof(*sin)); + sin->sin_family = AF_INET; + return sizeof(*sin); + } + if (ipv6_addr_v4mapped(&rs->rs_bound_addr)) { + sin = (struct sockaddr_in *)uaddr; + memset(sin->sin_zero, 0, sizeof(sin->sin_zero)); + sin->sin_family = AF_INET; + sin->sin_port = rs->rs_bound_port; + sin->sin_addr.s_addr = rs->rs_bound_addr_v4; + uaddr_len = sizeof(*sin); + } else { + sin6 = (struct sockaddr_in6 *)uaddr; + sin6->sin6_family = AF_INET6; + sin6->sin6_port = rs->rs_bound_port; + sin6->sin6_addr = rs->rs_bound_addr; + sin6->sin6_flowinfo = 0; + sin6->sin6_scope_id = rs->rs_bound_scope_id; + uaddr_len = sizeof(*sin6); + } } - sin->sin_family = AF_INET; - - return sizeof(*sin); + return uaddr_len; } /* @@ -203,11 +260,12 @@ static int rds_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) static int rds_cancel_sent_to(struct rds_sock *rs, char __user *optval, int len) { + struct sockaddr_in6 sin6; struct sockaddr_in sin; int ret = 0; /* racing with another thread binding seems ok here */ - if (rs->rs_bound_addr == 0) { + if (ipv6_addr_any(&rs->rs_bound_addr)) { ret = -ENOTCONN; /* XXX not a great errno */ goto out; } @@ -215,14 +273,23 @@ static int rds_cancel_sent_to(struct rds_sock *rs, char __user *optval, if (len < sizeof(struct sockaddr_in)) { ret = -EINVAL; goto out; + } else if (len < sizeof(struct sockaddr_in6)) { + /* Assume IPv4 */ + if (copy_from_user(&sin, optval, sizeof(struct sockaddr_in))) { + ret = -EFAULT; + goto out; + } + ipv6_addr_set_v4mapped(sin.sin_addr.s_addr, &sin6.sin6_addr); + sin6.sin6_port = sin.sin_port; + } else { + if (copy_from_user(&sin6, optval, + sizeof(struct sockaddr_in6))) { + ret = -EFAULT; + goto out; + } } - if (copy_from_user(&sin, optval, sizeof(sin))) { - ret = -EFAULT; - goto out; - } - - rds_send_drop_to(rs, &sin); + rds_send_drop_to(rs, &sin6); out: return ret; } @@ -435,31 +502,91 @@ static int rds_connect(struct socket *sock, struct sockaddr *uaddr, int addr_len, int flags) { struct sock *sk = sock->sk; - struct sockaddr_in *sin = (struct sockaddr_in *)uaddr; + struct sockaddr_in *sin; struct rds_sock *rs = rds_sk_to_rs(sk); int ret = 0; lock_sock(sk); - if (addr_len != sizeof(struct sockaddr_in)) { - ret = -EINVAL; - goto out; - } + switch (uaddr->sa_family) { + case AF_INET: + sin = (struct sockaddr_in *)uaddr; + if (addr_len < sizeof(struct sockaddr_in)) { + ret = -EINVAL; + break; + } + if (sin->sin_addr.s_addr == htonl(INADDR_ANY)) { + ret = -EDESTADDRREQ; + break; + } + if (IN_MULTICAST(ntohl(sin->sin_addr.s_addr)) || + sin->sin_addr.s_addr == htonl(INADDR_BROADCAST)) { + ret = -EINVAL; + break; + } + ipv6_addr_set_v4mapped(sin->sin_addr.s_addr, &rs->rs_conn_addr); + rs->rs_conn_port = sin->sin_port; + break; - if (sin->sin_family != AF_INET) { - ret = -EAFNOSUPPORT; - goto out; - } +#if IS_ENABLED(CONFIG_IPV6) + case AF_INET6: { + struct sockaddr_in6 *sin6; + int addr_type; - if (sin->sin_addr.s_addr == htonl(INADDR_ANY)) { - ret = -EDESTADDRREQ; - goto out; + sin6 = (struct sockaddr_in6 *)uaddr; + if (addr_len < sizeof(struct sockaddr_in6)) { + ret = -EINVAL; + break; + } + addr_type = ipv6_addr_type(&sin6->sin6_addr); + if (!(addr_type & IPV6_ADDR_UNICAST)) { + __be32 addr4; + + if (!(addr_type & IPV6_ADDR_MAPPED)) { + ret = -EPROTOTYPE; + break; + } + + /* It is a mapped address. Need to do some sanity + * checks. + */ + addr4 = sin6->sin6_addr.s6_addr32[3]; + if (addr4 == htonl(INADDR_ANY) || + addr4 == htonl(INADDR_BROADCAST) || + IN_MULTICAST(ntohl(addr4))) { + ret = -EPROTOTYPE; + break; + } + } + + if (addr_type & IPV6_ADDR_LINKLOCAL) { + /* If socket is arleady bound to a link local address, + * the peer address must be on the same link. + */ + if (sin6->sin6_scope_id == 0 || + (!ipv6_addr_any(&rs->rs_bound_addr) && + rs->rs_bound_scope_id && + sin6->sin6_scope_id != rs->rs_bound_scope_id)) { + ret = -EINVAL; + break; + } + /* Remember the connected address scope ID. It will + * be checked against the binding local address when + * the socket is bound. + */ + rs->rs_bound_scope_id = sin6->sin6_scope_id; + } + rs->rs_conn_addr = sin6->sin6_addr; + rs->rs_conn_port = sin6->sin6_port; + break; } +#endif - rs->rs_conn_addr = sin->sin_addr.s_addr; - rs->rs_conn_port = sin->sin_port; + default: + ret = -EAFNOSUPPORT; + break; + } -out: release_sock(sk); return ret; } @@ -578,8 +705,10 @@ static void rds_sock_inc_info(struct socket *sock, unsigned int len, list_for_each_entry(inc, &rs->rs_recv_queue, i_item) { total++; if (total <= len) - rds_inc_info_copy(inc, iter, inc->i_saddr, - rs->rs_bound_addr, 1); + rds_inc_info_copy(inc, iter, + inc->i_saddr.s6_addr32[3], + rs->rs_bound_addr_v4, + 1); } read_unlock(&rs->rs_recv_lock); @@ -608,8 +737,8 @@ static void rds_sock_info(struct socket *sock, unsigned int len, list_for_each_entry(rs, &rds_sock_list, rs_item) { sinfo.sndbuf = rds_sk_sndbuf(rs); sinfo.rcvbuf = rds_sk_rcvbuf(rs); - sinfo.bound_addr = rs->rs_bound_addr; - sinfo.connected_addr = rs->rs_conn_addr; + sinfo.bound_addr = rs->rs_bound_addr_v4; + sinfo.connected_addr = rs->rs_conn_addr_v4; sinfo.bound_port = rs->rs_bound_port; sinfo.connected_port = rs->rs_conn_port; sinfo.inum = sock_i_ino(rds_rs_to_sk(rs)); |