diff options
Diffstat (limited to 'drivers/staging/lustre/lnet/lnet/lib-socket.c')
-rw-r--r-- | drivers/staging/lustre/lnet/lnet/lib-socket.c | 594 |
1 files changed, 594 insertions, 0 deletions
diff --git a/drivers/staging/lustre/lnet/lnet/lib-socket.c b/drivers/staging/lustre/lnet/lnet/lib-socket.c new file mode 100644 index 000000000000..6f7ef4c737cd --- /dev/null +++ b/drivers/staging/lustre/lnet/lnet/lib-socket.c @@ -0,0 +1,594 @@ +/* + * GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see + * http://www.gnu.org/licenses/gpl-2.0.html + * + * GPL HEADER END + */ +/* + * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. + * Use is subject to license terms. + * + * Copyright (c) 2012, 2015 Intel Corporation. + */ +/* + * This file is part of Lustre, http://www.lustre.org/ + * Lustre is a trademark of Seagate, Inc. + */ +#define DEBUG_SUBSYSTEM S_LNET + +#include <linux/if.h> +#include <linux/in.h> +#include <linux/net.h> +#include <linux/file.h> +#include <linux/pagemap.h> +/* For sys_open & sys_close */ +#include <linux/syscalls.h> +#include <net/sock.h> + +#include "../../include/linux/libcfs/libcfs.h" +#include "../../include/linux/lnet/lib-lnet.h" + +static int +kernel_sock_unlocked_ioctl(struct file *filp, int cmd, unsigned long arg) +{ + mm_segment_t oldfs = get_fs(); + int err; + + set_fs(KERNEL_DS); + err = filp->f_op->unlocked_ioctl(filp, cmd, arg); + set_fs(oldfs); + + return err; +} + +static int +lnet_sock_ioctl(int cmd, unsigned long arg) +{ + struct file *sock_filp; + struct socket *sock; + int rc; + + rc = sock_create(PF_INET, SOCK_STREAM, 0, &sock); + if (rc != 0) { + CERROR("Can't create socket: %d\n", rc); + return rc; + } + + sock_filp = sock_alloc_file(sock, 0, NULL); + if (IS_ERR(sock_filp)) { + sock_release(sock); + rc = PTR_ERR(sock_filp); + goto out; + } + + rc = kernel_sock_unlocked_ioctl(sock_filp, cmd, arg); + + fput(sock_filp); +out: + return rc; +} + +int +lnet_ipif_query(char *name, int *up, __u32 *ip, __u32 *mask) +{ + struct ifreq ifr; + int nob; + int rc; + __u32 val; + + nob = strnlen(name, IFNAMSIZ); + if (nob == IFNAMSIZ) { + CERROR("Interface name %s too long\n", name); + return -EINVAL; + } + + CLASSERT(sizeof(ifr.ifr_name) >= IFNAMSIZ); + + strcpy(ifr.ifr_name, name); + rc = lnet_sock_ioctl(SIOCGIFFLAGS, (unsigned long)&ifr); + if (rc != 0) { + CERROR("Can't get flags for interface %s\n", name); + return rc; + } + + if ((ifr.ifr_flags & IFF_UP) == 0) { + CDEBUG(D_NET, "Interface %s down\n", name); + *up = 0; + *ip = *mask = 0; + return 0; + } + *up = 1; + + strcpy(ifr.ifr_name, name); + ifr.ifr_addr.sa_family = AF_INET; + rc = lnet_sock_ioctl(SIOCGIFADDR, (unsigned long)&ifr); + if (rc != 0) { + CERROR("Can't get IP address for interface %s\n", name); + return rc; + } + + val = ((struct sockaddr_in *)&ifr.ifr_addr)->sin_addr.s_addr; + *ip = ntohl(val); + + strcpy(ifr.ifr_name, name); + ifr.ifr_addr.sa_family = AF_INET; + rc = lnet_sock_ioctl(SIOCGIFNETMASK, (unsigned long)&ifr); + if (rc != 0) { + CERROR("Can't get netmask for interface %s\n", name); + return rc; + } + + val = ((struct sockaddr_in *)&ifr.ifr_netmask)->sin_addr.s_addr; + *mask = ntohl(val); + + return 0; +} +EXPORT_SYMBOL(lnet_ipif_query); + +int +lnet_ipif_enumerate(char ***namesp) +{ + /* Allocate and fill in 'names', returning # interfaces/error */ + char **names; + int toobig; + int nalloc; + int nfound; + struct ifreq *ifr; + struct ifconf ifc; + int rc; + int nob; + int i; + + nalloc = 16; /* first guess at max interfaces */ + toobig = 0; + for (;;) { + if (nalloc * sizeof(*ifr) > PAGE_CACHE_SIZE) { + toobig = 1; + nalloc = PAGE_CACHE_SIZE/sizeof(*ifr); + CWARN("Too many interfaces: only enumerating first %d\n", + nalloc); + } + + LIBCFS_ALLOC(ifr, nalloc * sizeof(*ifr)); + if (ifr == NULL) { + CERROR("ENOMEM enumerating up to %d interfaces\n", + nalloc); + rc = -ENOMEM; + goto out0; + } + + ifc.ifc_buf = (char *)ifr; + ifc.ifc_len = nalloc * sizeof(*ifr); + + rc = lnet_sock_ioctl(SIOCGIFCONF, (unsigned long)&ifc); + if (rc < 0) { + CERROR("Error %d enumerating interfaces\n", rc); + goto out1; + } + + LASSERT(rc == 0); + + nfound = ifc.ifc_len/sizeof(*ifr); + LASSERT(nfound <= nalloc); + + if (nfound < nalloc || toobig) + break; + + LIBCFS_FREE(ifr, nalloc * sizeof(*ifr)); + nalloc *= 2; + } + + if (nfound == 0) + goto out1; + + LIBCFS_ALLOC(names, nfound * sizeof(*names)); + if (names == NULL) { + rc = -ENOMEM; + goto out1; + } + + for (i = 0; i < nfound; i++) { + nob = strnlen(ifr[i].ifr_name, IFNAMSIZ); + if (nob == IFNAMSIZ) { + /* no space for terminating NULL */ + CERROR("interface name %.*s too long (%d max)\n", + nob, ifr[i].ifr_name, IFNAMSIZ); + rc = -ENAMETOOLONG; + goto out2; + } + + LIBCFS_ALLOC(names[i], IFNAMSIZ); + if (names[i] == NULL) { + rc = -ENOMEM; + goto out2; + } + + memcpy(names[i], ifr[i].ifr_name, nob); + names[i][nob] = 0; + } + + *namesp = names; + rc = nfound; + +out2: + if (rc < 0) + lnet_ipif_free_enumeration(names, nfound); +out1: + LIBCFS_FREE(ifr, nalloc * sizeof(*ifr)); +out0: + return rc; +} +EXPORT_SYMBOL(lnet_ipif_enumerate); + +void +lnet_ipif_free_enumeration(char **names, int n) +{ + int i; + + LASSERT(n > 0); + + for (i = 0; i < n && names[i] != NULL; i++) + LIBCFS_FREE(names[i], IFNAMSIZ); + + LIBCFS_FREE(names, n * sizeof(*names)); +} +EXPORT_SYMBOL(lnet_ipif_free_enumeration); + +int +lnet_sock_write(struct socket *sock, void *buffer, int nob, int timeout) +{ + int rc; + long ticks = timeout * HZ; + unsigned long then; + struct timeval tv; + + LASSERT(nob > 0); + /* Caller may pass a zero timeout if she thinks the socket buffer is + * empty enough to take the whole message immediately */ + + for (;;) { + struct kvec iov = { + .iov_base = buffer, + .iov_len = nob + }; + struct msghdr msg = { + .msg_flags = (timeout == 0) ? MSG_DONTWAIT : 0 + }; + + if (timeout != 0) { + /* Set send timeout to remaining time */ + tv = (struct timeval) { + .tv_sec = ticks / HZ, + .tv_usec = ((ticks % HZ) * 1000000) / HZ + }; + rc = kernel_setsockopt(sock, SOL_SOCKET, SO_SNDTIMEO, + (char *)&tv, sizeof(tv)); + if (rc != 0) { + CERROR("Can't set socket send timeout %ld.%06d: %d\n", + (long)tv.tv_sec, (int)tv.tv_usec, rc); + return rc; + } + } + + then = jiffies; + rc = kernel_sendmsg(sock, &msg, &iov, 1, nob); + ticks -= jiffies - then; + + if (rc == nob) + return 0; + + if (rc < 0) + return rc; + + if (rc == 0) { + CERROR("Unexpected zero rc\n"); + return -ECONNABORTED; + } + + if (ticks <= 0) + return -EAGAIN; + + buffer = ((char *)buffer) + rc; + nob -= rc; + } + return 0; +} +EXPORT_SYMBOL(lnet_sock_write); + +int +lnet_sock_read(struct socket *sock, void *buffer, int nob, int timeout) +{ + int rc; + long ticks = timeout * HZ; + unsigned long then; + struct timeval tv; + + LASSERT(nob > 0); + LASSERT(ticks > 0); + + for (;;) { + struct kvec iov = { + .iov_base = buffer, + .iov_len = nob + }; + struct msghdr msg = { + .msg_flags = 0 + }; + + /* Set receive timeout to remaining time */ + tv = (struct timeval) { + .tv_sec = ticks / HZ, + .tv_usec = ((ticks % HZ) * 1000000) / HZ + }; + rc = kernel_setsockopt(sock, SOL_SOCKET, SO_RCVTIMEO, + (char *)&tv, sizeof(tv)); + if (rc != 0) { + CERROR("Can't set socket recv timeout %ld.%06d: %d\n", + (long)tv.tv_sec, (int)tv.tv_usec, rc); + return rc; + } + + then = jiffies; + rc = kernel_recvmsg(sock, &msg, &iov, 1, nob, 0); + ticks -= jiffies - then; + + if (rc < 0) + return rc; + + if (rc == 0) + return -ECONNRESET; + + buffer = ((char *)buffer) + rc; + nob -= rc; + + if (nob == 0) + return 0; + + if (ticks <= 0) + return -ETIMEDOUT; + } +} +EXPORT_SYMBOL(lnet_sock_read); + +static int +lnet_sock_create(struct socket **sockp, int *fatal, __u32 local_ip, + int local_port) +{ + struct sockaddr_in locaddr; + struct socket *sock; + int rc; + int option; + + /* All errors are fatal except bind failure if the port is in use */ + *fatal = 1; + + rc = sock_create(PF_INET, SOCK_STREAM, 0, &sock); + *sockp = sock; + if (rc != 0) { + CERROR("Can't create socket: %d\n", rc); + return rc; + } + + option = 1; + rc = kernel_setsockopt(sock, SOL_SOCKET, SO_REUSEADDR, + (char *)&option, sizeof(option)); + if (rc != 0) { + CERROR("Can't set SO_REUSEADDR for socket: %d\n", rc); + goto failed; + } + + if (local_ip != 0 || local_port != 0) { + memset(&locaddr, 0, sizeof(locaddr)); + locaddr.sin_family = AF_INET; + locaddr.sin_port = htons(local_port); + locaddr.sin_addr.s_addr = (local_ip == 0) ? + INADDR_ANY : htonl(local_ip); + + rc = kernel_bind(sock, (struct sockaddr *)&locaddr, + sizeof(locaddr)); + if (rc == -EADDRINUSE) { + CDEBUG(D_NET, "Port %d already in use\n", local_port); + *fatal = 0; + goto failed; + } + if (rc != 0) { + CERROR("Error trying to bind to port %d: %d\n", + local_port, rc); + goto failed; + } + } + return 0; + +failed: + sock_release(sock); + return rc; +} + +int +lnet_sock_setbuf(struct socket *sock, int txbufsize, int rxbufsize) +{ + int option; + int rc; + + if (txbufsize != 0) { + option = txbufsize; + rc = kernel_setsockopt(sock, SOL_SOCKET, SO_SNDBUF, + (char *)&option, sizeof(option)); + if (rc != 0) { + CERROR("Can't set send buffer %d: %d\n", + option, rc); + return rc; + } + } + + if (rxbufsize != 0) { + option = rxbufsize; + rc = kernel_setsockopt(sock, SOL_SOCKET, SO_RCVBUF, + (char *)&option, sizeof(option)); + if (rc != 0) { + CERROR("Can't set receive buffer %d: %d\n", + option, rc); + return rc; + } + } + return 0; +} +EXPORT_SYMBOL(lnet_sock_setbuf); + +int +lnet_sock_getaddr(struct socket *sock, bool remote, __u32 *ip, int *port) +{ + struct sockaddr_in sin; + int len = sizeof(sin); + int rc; + + if (remote) + rc = kernel_getpeername(sock, (struct sockaddr *)&sin, &len); + else + rc = kernel_getsockname(sock, (struct sockaddr *)&sin, &len); + if (rc != 0) { + CERROR("Error %d getting sock %s IP/port\n", + rc, remote ? "peer" : "local"); + return rc; + } + + if (ip != NULL) + *ip = ntohl(sin.sin_addr.s_addr); + + if (port != NULL) + *port = ntohs(sin.sin_port); + + return 0; +} +EXPORT_SYMBOL(lnet_sock_getaddr); + +int +lnet_sock_getbuf(struct socket *sock, int *txbufsize, int *rxbufsize) +{ + if (txbufsize != NULL) + *txbufsize = sock->sk->sk_sndbuf; + + if (rxbufsize != NULL) + *rxbufsize = sock->sk->sk_rcvbuf; + + return 0; +} +EXPORT_SYMBOL(lnet_sock_getbuf); + +int +lnet_sock_listen(struct socket **sockp, __u32 local_ip, int local_port, + int backlog) +{ + int fatal; + int rc; + + rc = lnet_sock_create(sockp, &fatal, local_ip, local_port); + if (rc != 0) { + if (!fatal) + CERROR("Can't create socket: port %d already in use\n", + local_port); + return rc; + } + + rc = kernel_listen(*sockp, backlog); + if (rc == 0) + return 0; + + CERROR("Can't set listen backlog %d: %d\n", backlog, rc); + sock_release(*sockp); + return rc; +} +EXPORT_SYMBOL(lnet_sock_listen); + +int +lnet_sock_accept(struct socket **newsockp, struct socket *sock) +{ + wait_queue_t wait; + struct socket *newsock; + int rc; + + init_waitqueue_entry(&wait, current); + + /* XXX this should add a ref to sock->ops->owner, if + * TCP could be a module */ + rc = sock_create_lite(PF_PACKET, sock->type, IPPROTO_TCP, &newsock); + if (rc) { + CERROR("Can't allocate socket\n"); + return rc; + } + + newsock->ops = sock->ops; + + rc = sock->ops->accept(sock, newsock, O_NONBLOCK); + if (rc == -EAGAIN) { + /* Nothing ready, so wait for activity */ + set_current_state(TASK_INTERRUPTIBLE); + add_wait_queue(sk_sleep(sock->sk), &wait); + schedule(); + remove_wait_queue(sk_sleep(sock->sk), &wait); + set_current_state(TASK_RUNNING); + rc = sock->ops->accept(sock, newsock, O_NONBLOCK); + } + + if (rc != 0) + goto failed; + + *newsockp = newsock; + return 0; + +failed: + sock_release(newsock); + return rc; +} +EXPORT_SYMBOL(lnet_sock_accept); + +int +lnet_sock_connect(struct socket **sockp, int *fatal, __u32 local_ip, + int local_port, __u32 peer_ip, int peer_port) +{ + struct sockaddr_in srvaddr; + int rc; + + rc = lnet_sock_create(sockp, fatal, local_ip, local_port); + if (rc != 0) + return rc; + + memset(&srvaddr, 0, sizeof(srvaddr)); + srvaddr.sin_family = AF_INET; + srvaddr.sin_port = htons(peer_port); + srvaddr.sin_addr.s_addr = htonl(peer_ip); + + rc = kernel_connect(*sockp, (struct sockaddr *)&srvaddr, + sizeof(srvaddr), 0); + if (rc == 0) + return 0; + + /* EADDRNOTAVAIL probably means we're already connected to the same + * peer/port on the same local port on a differently typed + * connection. Let our caller retry with a different local + * port... */ + *fatal = !(rc == -EADDRNOTAVAIL); + + CDEBUG_LIMIT(*fatal ? D_NETERROR : D_NET, + "Error %d connecting %pI4h/%d -> %pI4h/%d\n", rc, + &local_ip, local_port, &peer_ip, peer_port); + + sock_release(*sockp); + return rc; +} +EXPORT_SYMBOL(lnet_sock_connect); |