aboutsummaryrefslogtreecommitdiffstats
path: root/net/sunrpc/svcsock.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/sunrpc/svcsock.c')
-rw-r--r--net/sunrpc/svcsock.c407
1 files changed, 279 insertions, 128 deletions
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index ff1f8bf680aa..63ae94771b8e 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -36,11 +36,13 @@
#include <net/sock.h>
#include <net/checksum.h>
#include <net/ip.h>
+#include <net/ipv6.h>
#include <net/tcp_states.h>
#include <asm/uaccess.h>
#include <asm/ioctls.h>
#include <linux/sunrpc/types.h>
+#include <linux/sunrpc/clnt.h>
#include <linux/sunrpc/xdr.h>
#include <linux/sunrpc/svcsock.h>
#include <linux/sunrpc/stats.h>
@@ -58,10 +60,16 @@
* providing that certain rules are followed:
*
* SK_CONN, SK_DATA, can be set or cleared at any time.
- * after a set, svc_sock_enqueue must be called.
+ * after a set, svc_sock_enqueue must be called.
* after a clear, the socket must be read/accepted
* if this succeeds, it must be set again.
* SK_CLOSE can set at any time. It is never cleared.
+ * sk_inuse contains a bias of '1' until SK_DEAD is set.
+ * so when sk_inuse hits zero, we know the socket is dead
+ * and no-one is using it.
+ * SK_DEAD can only be set while SK_BUSY is held which ensures
+ * no other thread will be using the socket or will try to
+ * set SK_DEAD.
*
*/
@@ -69,7 +77,8 @@
static struct svc_sock *svc_setup_socket(struct svc_serv *, struct socket *,
- int *errp, int pmap_reg);
+ int *errp, int flags);
+static void svc_delete_socket(struct svc_sock *svsk);
static void svc_udp_data_ready(struct sock *, int);
static int svc_udp_recvfrom(struct svc_rqst *);
static int svc_udp_sendto(struct svc_rqst *);
@@ -114,6 +123,41 @@ static inline void svc_reclassify_socket(struct socket *sock)
}
#endif
+static char *__svc_print_addr(struct sockaddr *addr, char *buf, size_t len)
+{
+ switch (addr->sa_family) {
+ case AF_INET:
+ snprintf(buf, len, "%u.%u.%u.%u, port=%u",
+ NIPQUAD(((struct sockaddr_in *) addr)->sin_addr),
+ htons(((struct sockaddr_in *) addr)->sin_port));
+ break;
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+ case AF_INET6:
+ snprintf(buf, len, "%x:%x:%x:%x:%x:%x:%x:%x, port=%u",
+ NIP6(((struct sockaddr_in6 *) addr)->sin6_addr),
+ htons(((struct sockaddr_in6 *) addr)->sin6_port));
+ break;
+#endif
+ default:
+ snprintf(buf, len, "unknown address type: %d", addr->sa_family);
+ break;
+ }
+ return buf;
+}
+
+/**
+ * svc_print_addr - Format rq_addr field for printing
+ * @rqstp: svc_rqst struct containing address to print
+ * @buf: target buffer for formatted address
+ * @len: length of target buffer
+ *
+ */
+char *svc_print_addr(struct svc_rqst *rqstp, char *buf, size_t len)
+{
+ return __svc_print_addr(svc_addr(rqstp), buf, len);
+}
+EXPORT_SYMBOL_GPL(svc_print_addr);
+
/*
* Queue up an idle server thread. Must have pool->sp_lock held.
* Note: this is really a stack rather than a queue, so that we only
@@ -245,7 +289,7 @@ svc_sock_enqueue(struct svc_sock *svsk)
svsk->sk_sk, rqstp);
svc_thread_dequeue(pool, rqstp);
if (rqstp->rq_sock)
- printk(KERN_ERR
+ printk(KERN_ERR
"svc_sock_enqueue: server %p, rq_sock=%p!\n",
rqstp, rqstp->rq_sock);
rqstp->rq_sock = svsk;
@@ -329,8 +373,9 @@ void svc_reserve(struct svc_rqst *rqstp, int space)
static inline void
svc_sock_put(struct svc_sock *svsk)
{
- if (atomic_dec_and_test(&svsk->sk_inuse) &&
- test_bit(SK_DEAD, &svsk->sk_flags)) {
+ if (atomic_dec_and_test(&svsk->sk_inuse)) {
+ BUG_ON(! test_bit(SK_DEAD, &svsk->sk_flags));
+
dprintk("svc: releasing dead socket\n");
if (svsk->sk_sock->file)
sockfd_put(svsk->sk_sock);
@@ -402,6 +447,43 @@ svc_wake_up(struct svc_serv *serv)
}
}
+union svc_pktinfo_u {
+ struct in_pktinfo pkti;
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+ struct in6_pktinfo pkti6;
+#endif
+};
+
+static void svc_set_cmsg_data(struct svc_rqst *rqstp, struct cmsghdr *cmh)
+{
+ switch (rqstp->rq_sock->sk_sk->sk_family) {
+ case AF_INET: {
+ struct in_pktinfo *pki = CMSG_DATA(cmh);
+
+ cmh->cmsg_level = SOL_IP;
+ cmh->cmsg_type = IP_PKTINFO;
+ pki->ipi_ifindex = 0;
+ pki->ipi_spec_dst.s_addr = rqstp->rq_daddr.addr.s_addr;
+ cmh->cmsg_len = CMSG_LEN(sizeof(*pki));
+ }
+ break;
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+ case AF_INET6: {
+ struct in6_pktinfo *pki = CMSG_DATA(cmh);
+
+ cmh->cmsg_level = SOL_IPV6;
+ cmh->cmsg_type = IPV6_PKTINFO;
+ pki->ipi6_ifindex = 0;
+ ipv6_addr_copy(&pki->ipi6_addr,
+ &rqstp->rq_daddr.addr6);
+ cmh->cmsg_len = CMSG_LEN(sizeof(*pki));
+ }
+ break;
+#endif
+ }
+ return;
+}
+
/*
* Generic sendto routine
*/
@@ -411,9 +493,8 @@ svc_sendto(struct svc_rqst *rqstp, struct xdr_buf *xdr)
struct svc_sock *svsk = rqstp->rq_sock;
struct socket *sock = svsk->sk_sock;
int slen;
- char buffer[CMSG_SPACE(sizeof(struct in_pktinfo))];
+ char buffer[CMSG_SPACE(sizeof(union svc_pktinfo_u))];
struct cmsghdr *cmh = (struct cmsghdr *)buffer;
- struct in_pktinfo *pki = (struct in_pktinfo *)CMSG_DATA(cmh);
int len = 0;
int result;
int size;
@@ -421,25 +502,20 @@ svc_sendto(struct svc_rqst *rqstp, struct xdr_buf *xdr)
size_t base = xdr->page_base;
unsigned int pglen = xdr->page_len;
unsigned int flags = MSG_MORE;
+ char buf[RPC_MAX_ADDRBUFLEN];
slen = xdr->len;
if (rqstp->rq_prot == IPPROTO_UDP) {
- /* set the source and destination */
- struct msghdr msg;
- msg.msg_name = &rqstp->rq_addr;
- msg.msg_namelen = sizeof(rqstp->rq_addr);
- msg.msg_iov = NULL;
- msg.msg_iovlen = 0;
- msg.msg_flags = MSG_MORE;
-
- msg.msg_control = cmh;
- msg.msg_controllen = sizeof(buffer);
- cmh->cmsg_len = CMSG_LEN(sizeof(*pki));
- cmh->cmsg_level = SOL_IP;
- cmh->cmsg_type = IP_PKTINFO;
- pki->ipi_ifindex = 0;
- pki->ipi_spec_dst.s_addr = rqstp->rq_daddr;
+ struct msghdr msg = {
+ .msg_name = &rqstp->rq_addr,
+ .msg_namelen = rqstp->rq_addrlen,
+ .msg_control = cmh,
+ .msg_controllen = sizeof(buffer),
+ .msg_flags = MSG_MORE,
+ };
+
+ svc_set_cmsg_data(rqstp, cmh);
if (sock_sendmsg(sock, &msg, 0) < 0)
goto out;
@@ -476,16 +552,16 @@ svc_sendto(struct svc_rqst *rqstp, struct xdr_buf *xdr)
if (xdr->tail[0].iov_len) {
result = kernel_sendpage(sock, rqstp->rq_respages[0],
((unsigned long)xdr->tail[0].iov_base)
- & (PAGE_SIZE-1),
+ & (PAGE_SIZE-1),
xdr->tail[0].iov_len, 0);
if (result > 0)
len += result;
}
out:
- dprintk("svc: socket %p sendto([%p %Zu... ], %d) = %d (addr %x)\n",
- rqstp->rq_sock, xdr->head[0].iov_base, xdr->head[0].iov_len, xdr->len, len,
- rqstp->rq_addr.sin_addr.s_addr);
+ dprintk("svc: socket %p sendto([%p %Zu... ], %d) = %d (addr %s)\n",
+ rqstp->rq_sock, xdr->head[0].iov_base, xdr->head[0].iov_len,
+ xdr->len, len, svc_print_addr(rqstp, buf, sizeof(buf)));
return len;
}
@@ -520,7 +596,7 @@ svc_sock_names(char *buf, struct svc_serv *serv, char *toclose)
if (!serv)
return 0;
- spin_lock(&serv->sv_lock);
+ spin_lock_bh(&serv->sv_lock);
list_for_each_entry(svsk, &serv->sv_permsocks, sk_list) {
int onelen = one_sock_name(buf+len, svsk);
if (toclose && strcmp(toclose, buf+len) == 0)
@@ -528,12 +604,12 @@ svc_sock_names(char *buf, struct svc_serv *serv, char *toclose)
else
len += onelen;
}
- spin_unlock(&serv->sv_lock);
+ spin_unlock_bh(&serv->sv_lock);
if (closesk)
/* Should unregister with portmap, but you cannot
* unregister just one protocol...
*/
- svc_delete_socket(closesk);
+ svc_close_socket(closesk);
else if (toclose)
return -ENOENT;
return len;
@@ -560,31 +636,22 @@ svc_recv_available(struct svc_sock *svsk)
static int
svc_recvfrom(struct svc_rqst *rqstp, struct kvec *iov, int nr, int buflen)
{
- struct msghdr msg;
- struct socket *sock;
- int len, alen;
-
- rqstp->rq_addrlen = sizeof(rqstp->rq_addr);
- sock = rqstp->rq_sock->sk_sock;
-
- msg.msg_name = &rqstp->rq_addr;
- msg.msg_namelen = sizeof(rqstp->rq_addr);
- msg.msg_control = NULL;
- msg.msg_controllen = 0;
-
- msg.msg_flags = MSG_DONTWAIT;
+ struct svc_sock *svsk = rqstp->rq_sock;
+ struct msghdr msg = {
+ .msg_flags = MSG_DONTWAIT,
+ };
+ int len;
- len = kernel_recvmsg(sock, &msg, iov, nr, buflen, MSG_DONTWAIT);
+ len = kernel_recvmsg(svsk->sk_sock, &msg, iov, nr, buflen,
+ msg.msg_flags);
/* sock_recvmsg doesn't fill in the name/namelen, so we must..
- * possibly we should cache this in the svc_sock structure
- * at accept time. FIXME
*/
- alen = sizeof(rqstp->rq_addr);
- kernel_getpeername(sock, (struct sockaddr *)&rqstp->rq_addr, &alen);
+ memcpy(&rqstp->rq_addr, &svsk->sk_remote, svsk->sk_remotelen);
+ rqstp->rq_addrlen = svsk->sk_remotelen;
dprintk("svc: socket %p recvfrom(%p, %Zu) = %d\n",
- rqstp->rq_sock, iov[0].iov_base, iov[0].iov_len, len);
+ svsk, iov[0].iov_base, iov[0].iov_len, len);
return len;
}
@@ -654,6 +721,47 @@ svc_write_space(struct sock *sk)
}
}
+static void svc_udp_get_sender_address(struct svc_rqst *rqstp,
+ struct sk_buff *skb)
+{
+ switch (rqstp->rq_sock->sk_sk->sk_family) {
+ case AF_INET: {
+ /* this seems to come from net/ipv4/udp.c:udp_recvmsg */
+ struct sockaddr_in *sin = svc_addr_in(rqstp);
+
+ sin->sin_family = AF_INET;
+ sin->sin_port = skb->h.uh->source;
+ sin->sin_addr.s_addr = skb->nh.iph->saddr;
+ rqstp->rq_addrlen = sizeof(struct sockaddr_in);
+ /* Remember which interface received this request */
+ rqstp->rq_daddr.addr.s_addr = skb->nh.iph->daddr;
+ }
+ break;
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+ case AF_INET6: {
+ /* this is derived from net/ipv6/udp.c:udpv6_recvmesg */
+ struct sockaddr_in6 *sin6 = svc_addr_in6(rqstp);
+
+ sin6->sin6_family = AF_INET6;
+ sin6->sin6_port = skb->h.uh->source;
+ sin6->sin6_flowinfo = 0;
+ sin6->sin6_scope_id = 0;
+ if (ipv6_addr_type(&sin6->sin6_addr) &
+ IPV6_ADDR_LINKLOCAL)
+ sin6->sin6_scope_id = IP6CB(skb)->iif;
+ ipv6_addr_copy(&sin6->sin6_addr,
+ &skb->nh.ipv6h->saddr);
+ rqstp->rq_addrlen = sizeof(struct sockaddr_in);
+ /* Remember which interface received this request */
+ ipv6_addr_copy(&rqstp->rq_daddr.addr6,
+ &skb->nh.ipv6h->saddr);
+ }
+ break;
+#endif
+ }
+ return;
+}
+
/*
* Receive a datagram from a UDP socket.
*/
@@ -683,6 +791,11 @@ svc_udp_recvfrom(struct svc_rqst *rqstp)
return svc_deferred_recv(rqstp);
}
+ if (test_bit(SK_CLOSE, &svsk->sk_flags)) {
+ svc_delete_socket(svsk);
+ return 0;
+ }
+
clear_bit(SK_DATA, &svsk->sk_flags);
while ((skb = skb_recv_datagram(svsk->sk_sk, 0, 1, &err)) == NULL) {
if (err == -EAGAIN) {
@@ -698,7 +811,7 @@ svc_udp_recvfrom(struct svc_rqst *rqstp)
tv.tv_sec = xtime.tv_sec;
tv.tv_usec = xtime.tv_nsec / NSEC_PER_USEC;
skb_set_timestamp(skb, &tv);
- /* Don't enable netstamp, sunrpc doesn't
+ /* Don't enable netstamp, sunrpc doesn't
need that much accuracy */
}
skb_get_timestamp(skb, &svsk->sk_sk->sk_stamp);
@@ -712,13 +825,9 @@ svc_udp_recvfrom(struct svc_rqst *rqstp)
len = skb->len - sizeof(struct udphdr);
rqstp->rq_arg.len = len;
- rqstp->rq_prot = IPPROTO_UDP;
+ rqstp->rq_prot = IPPROTO_UDP;
- /* Get sender address */
- rqstp->rq_addr.sin_family = AF_INET;
- rqstp->rq_addr.sin_port = skb->h.uh->source;
- rqstp->rq_addr.sin_addr.s_addr = skb->nh.iph->saddr;
- rqstp->rq_daddr = skb->nh.iph->daddr;
+ svc_udp_get_sender_address(rqstp, skb);
if (skb_is_nonlinear(skb)) {
/* we have to copy */
@@ -730,7 +839,7 @@ svc_udp_recvfrom(struct svc_rqst *rqstp)
return 0;
}
local_bh_enable();
- skb_free_datagram(svsk->sk_sk, skb);
+ skb_free_datagram(svsk->sk_sk, skb);
} else {
/* we can use it in-place */
rqstp->rq_arg.head[0].iov_base = skb->data + sizeof(struct udphdr);
@@ -781,7 +890,7 @@ svc_udp_init(struct svc_sock *svsk)
svsk->sk_sendto = svc_udp_sendto;
/* initialise setting must have enough space to
- * receive and respond to one request.
+ * receive and respond to one request.
* svc_udp_recvfrom will re-adjust if necessary
*/
svc_sock_setbufsize(svsk->sk_sock,
@@ -862,18 +971,36 @@ svc_tcp_data_ready(struct sock *sk, int count)
wake_up_interruptible(sk->sk_sleep);
}
+static inline int svc_port_is_privileged(struct sockaddr *sin)
+{
+ switch (sin->sa_family) {
+ case AF_INET:
+ return ntohs(((struct sockaddr_in *)sin)->sin_port)
+ < PROT_SOCK;
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+ case AF_INET6:
+ return ntohs(((struct sockaddr_in6 *)sin)->sin6_port)
+ < PROT_SOCK;
+#endif
+ default:
+ return 0;
+ }
+}
+
/*
* Accept a TCP connection
*/
static void
svc_tcp_accept(struct svc_sock *svsk)
{
- struct sockaddr_in sin;
+ struct sockaddr_storage addr;
+ struct sockaddr *sin = (struct sockaddr *) &addr;
struct svc_serv *serv = svsk->sk_server;
struct socket *sock = svsk->sk_sock;
struct socket *newsock;
struct svc_sock *newsvsk;
int err, slen;
+ char buf[RPC_MAX_ADDRBUFLEN];
dprintk("svc: tcp_accept %p sock %p\n", svsk, sock);
if (!sock)
@@ -894,8 +1021,7 @@ svc_tcp_accept(struct svc_sock *svsk)
set_bit(SK_CONN, &svsk->sk_flags);
svc_sock_enqueue(svsk);
- slen = sizeof(sin);
- err = kernel_getpeername(newsock, (struct sockaddr *) &sin, &slen);
+ err = kernel_getpeername(newsock, sin, &slen);
if (err < 0) {
if (net_ratelimit())
printk(KERN_WARNING "%s: peername failed (err %d)!\n",
@@ -904,27 +1030,30 @@ svc_tcp_accept(struct svc_sock *svsk)
}
/* Ideally, we would want to reject connections from unauthorized
- * hosts here, but when we get encription, the IP of the host won't
- * tell us anything. For now just warn about unpriv connections.
+ * hosts here, but when we get encryption, the IP of the host won't
+ * tell us anything. For now just warn about unpriv connections.
*/
- if (ntohs(sin.sin_port) >= 1024) {
+ if (!svc_port_is_privileged(sin)) {
dprintk(KERN_WARNING
- "%s: connect from unprivileged port: %u.%u.%u.%u:%d\n",
- serv->sv_name,
- NIPQUAD(sin.sin_addr.s_addr), ntohs(sin.sin_port));
+ "%s: connect from unprivileged port: %s\n",
+ serv->sv_name,
+ __svc_print_addr(sin, buf, sizeof(buf)));
}
-
- dprintk("%s: connect from %u.%u.%u.%u:%04x\n", serv->sv_name,
- NIPQUAD(sin.sin_addr.s_addr), ntohs(sin.sin_port));
+ dprintk("%s: connect from %s\n", serv->sv_name,
+ __svc_print_addr(sin, buf, sizeof(buf)));
/* make sure that a write doesn't block forever when
* low on memory
*/
newsock->sk->sk_sndtimeo = HZ*30;
- if (!(newsvsk = svc_setup_socket(serv, newsock, &err, 0)))
+ if (!(newsvsk = svc_setup_socket(serv, newsock, &err,
+ (SVC_SOCK_ANONYMOUS | SVC_SOCK_TEMPORARY))))
goto failed;
+ memcpy(&newsvsk->sk_remote, sin, slen);
+ newsvsk->sk_remotelen = slen;
+ svc_sock_received(newsvsk);
/* make sure that we don't have too many active connections.
* If we have, something must be dropped.
@@ -947,11 +1076,9 @@ svc_tcp_accept(struct svc_sock *svsk)
"sockets, consider increasing the "
"number of nfsd threads\n",
serv->sv_name);
- printk(KERN_NOTICE "%s: last TCP connect from "
- "%u.%u.%u.%u:%d\n",
- serv->sv_name,
- NIPQUAD(sin.sin_addr.s_addr),
- ntohs(sin.sin_port));
+ printk(KERN_NOTICE
+ "%s: last TCP connect from %s\n",
+ serv->sv_name, buf);
}
/*
* Always select the oldest socket. It's not fair,
@@ -1025,7 +1152,7 @@ svc_tcp_recvfrom(struct svc_rqst *rqstp)
* on the number of threads which will access the socket.
*
* rcvbuf just needs to be able to hold a few requests.
- * Normally they will be removed from the queue
+ * Normally they will be removed from the queue
* as soon a a complete request arrives.
*/
svc_sock_setbufsize(svsk->sk_sock,
@@ -1050,7 +1177,7 @@ svc_tcp_recvfrom(struct svc_rqst *rqstp)
if (len < want) {
dprintk("svc: short recvfrom while reading record length (%d of %lu)\n",
- len, want);
+ len, want);
svc_sock_received(svsk);
return -EAGAIN; /* record header not complete */
}
@@ -1176,7 +1303,8 @@ svc_tcp_sendto(struct svc_rqst *rqstp)
rqstp->rq_sock->sk_server->sv_name,
(sent<0)?"got error":"sent only",
sent, xbufp->len);
- svc_delete_socket(rqstp->rq_sock);
+ set_bit(SK_CLOSE, &rqstp->rq_sock->sk_flags);
+ svc_sock_enqueue(rqstp->rq_sock);
sent = -EAGAIN;
}
return sent;
@@ -1207,7 +1335,7 @@ svc_tcp_init(struct svc_sock *svsk)
tp->nonagle = 1; /* disable Nagle's algorithm */
/* initialise setting must have enough space to
- * receive and respond to one request.
+ * receive and respond to one request.
* svc_tcp_recvfrom will re-adjust if necessary
*/
svc_sock_setbufsize(svsk->sk_sock,
@@ -1216,7 +1344,7 @@ svc_tcp_init(struct svc_sock *svsk)
set_bit(SK_CHNGBUF, &svsk->sk_flags);
set_bit(SK_DATA, &svsk->sk_flags);
- if (sk->sk_state != TCP_ESTABLISHED)
+ if (sk->sk_state != TCP_ESTABLISHED)
set_bit(SK_CLOSE, &svsk->sk_flags);
}
}
@@ -1232,7 +1360,7 @@ svc_sock_update_bufs(struct svc_serv *serv)
spin_lock_bh(&serv->sv_lock);
list_for_each(le, &serv->sv_permsocks) {
- struct svc_sock *svsk =
+ struct svc_sock *svsk =
list_entry(le, struct svc_sock, sk_list);
set_bit(SK_CHNGBUF, &svsk->sk_flags);
}
@@ -1252,7 +1380,7 @@ svc_sock_update_bufs(struct svc_serv *serv)
int
svc_recv(struct svc_rqst *rqstp, long timeout)
{
- struct svc_sock *svsk =NULL;
+ struct svc_sock *svsk = NULL;
struct svc_serv *serv = rqstp->rq_server;
struct svc_pool *pool = rqstp->rq_pool;
int len, i;
@@ -1264,11 +1392,11 @@ svc_recv(struct svc_rqst *rqstp, long timeout)
rqstp, timeout);
if (rqstp->rq_sock)
- printk(KERN_ERR
+ printk(KERN_ERR
"svc_recv: service %p, socket not NULL!\n",
rqstp);
if (waitqueue_active(&rqstp->rq_wait))
- printk(KERN_ERR
+ printk(KERN_ERR
"svc_recv: service %p, wait queue active!\n",
rqstp);
@@ -1349,7 +1477,7 @@ svc_recv(struct svc_rqst *rqstp, long timeout)
svsk->sk_lastrecv = get_seconds();
clear_bit(SK_OLD, &svsk->sk_flags);
- rqstp->rq_secure = ntohs(rqstp->rq_addr.sin_port) < 1024;
+ rqstp->rq_secure = svc_port_is_privileged(svc_addr(rqstp));
rqstp->rq_chandle.defer = svc_defer;
if (serv->sv_stats)
@@ -1357,7 +1485,7 @@ svc_recv(struct svc_rqst *rqstp, long timeout)
return len;
}
-/*
+/*
* Drop request
*/
void
@@ -1462,12 +1590,14 @@ svc_age_temp_sockets(unsigned long closure)
* Initialize socket for RPC use and create svc_sock struct
* XXX: May want to setsockopt SO_SNDBUF and SO_RCVBUF.
*/
-static struct svc_sock *
-svc_setup_socket(struct svc_serv *serv, struct socket *sock,
- int *errp, int pmap_register)
+static struct svc_sock *svc_setup_socket(struct svc_serv *serv,
+ struct socket *sock,
+ int *errp, int flags)
{
struct svc_sock *svsk;
struct sock *inet;
+ int pmap_register = !(flags & SVC_SOCK_ANONYMOUS);
+ int is_temporary = flags & SVC_SOCK_TEMPORARY;
dprintk("svc: svc_setup_socket %p\n", sock);
if (!(svsk = kzalloc(sizeof(*svsk), GFP_KERNEL))) {
@@ -1495,7 +1625,7 @@ svc_setup_socket(struct svc_serv *serv, struct socket *sock,
svsk->sk_odata = inet->sk_data_ready;
svsk->sk_owspace = inet->sk_write_space;
svsk->sk_server = serv;
- atomic_set(&svsk->sk_inuse, 0);
+ atomic_set(&svsk->sk_inuse, 1);
svsk->sk_lastrecv = get_seconds();
spin_lock_init(&svsk->sk_defer_lock);
INIT_LIST_HEAD(&svsk->sk_deferred);
@@ -1509,7 +1639,7 @@ svc_setup_socket(struct svc_serv *serv, struct socket *sock,
svc_tcp_init(svsk);
spin_lock_bh(&serv->sv_lock);
- if (!pmap_register) {
+ if (is_temporary) {
set_bit(SK_TEMP, &svsk->sk_flags);
list_add(&svsk->sk_list, &serv->sv_tempsocks);
serv->sv_tmpcnt++;
@@ -1529,8 +1659,6 @@ svc_setup_socket(struct svc_serv *serv, struct socket *sock,
dprintk("svc: svc_setup_socket created %p (inet %p)\n",
svsk, svsk->sk_sk);
- clear_bit(SK_BUSY, &svsk->sk_flags);
- svc_sock_enqueue(svsk);
return svsk;
}
@@ -1553,9 +1681,11 @@ int svc_addsock(struct svc_serv *serv,
else if (so->state > SS_UNCONNECTED)
err = -EISCONN;
else {
- svsk = svc_setup_socket(serv, so, &err, 1);
- if (svsk)
+ svsk = svc_setup_socket(serv, so, &err, SVC_SOCK_DEFAULTS);
+ if (svsk) {
+ svc_sock_received(svsk);
err = 0;
+ }
}
if (err) {
sockfd_put(so);
@@ -1569,18 +1699,18 @@ EXPORT_SYMBOL_GPL(svc_addsock);
/*
* Create socket for RPC service.
*/
-static int
-svc_create_socket(struct svc_serv *serv, int protocol, struct sockaddr_in *sin)
+static int svc_create_socket(struct svc_serv *serv, int protocol,
+ struct sockaddr *sin, int len, int flags)
{
struct svc_sock *svsk;
struct socket *sock;
int error;
int type;
+ char buf[RPC_MAX_ADDRBUFLEN];
- dprintk("svc: svc_create_socket(%s, %d, %u.%u.%u.%u:%d)\n",
- serv->sv_program->pg_name, protocol,
- NIPQUAD(sin->sin_addr.s_addr),
- ntohs(sin->sin_port));
+ dprintk("svc: svc_create_socket(%s, %d, %s)\n",
+ serv->sv_program->pg_name, protocol,
+ __svc_print_addr(sin, buf, sizeof(buf)));
if (protocol != IPPROTO_UDP && protocol != IPPROTO_TCP) {
printk(KERN_WARNING "svc: only UDP and TCP "
@@ -1589,15 +1719,15 @@ svc_create_socket(struct svc_serv *serv, int protocol, struct sockaddr_in *sin)
}
type = (protocol == IPPROTO_UDP)? SOCK_DGRAM : SOCK_STREAM;
- if ((error = sock_create_kern(PF_INET, type, protocol, &sock)) < 0)
+ error = sock_create_kern(sin->sa_family, type, protocol, &sock);
+ if (error < 0)
return error;
svc_reclassify_socket(sock);
if (type == SOCK_STREAM)
- sock->sk->sk_reuse = 1; /* allow address reuse */
- error = kernel_bind(sock, (struct sockaddr *) sin,
- sizeof(*sin));
+ sock->sk->sk_reuse = 1; /* allow address reuse */
+ error = kernel_bind(sock, sin, len);
if (error < 0)
goto bummer;
@@ -1606,8 +1736,10 @@ svc_create_socket(struct svc_serv *serv, int protocol, struct sockaddr_in *sin)
goto bummer;
}
- if ((svsk = svc_setup_socket(serv, sock, &error, 1)) != NULL)
- return 0;
+ if ((svsk = svc_setup_socket(serv, sock, &error, flags)) != NULL) {
+ svc_sock_received(svsk);
+ return ntohs(inet_sk(svsk->sk_sk)->sport);
+ }
bummer:
dprintk("svc: svc_create_socket error = %d\n", -error);
@@ -1618,7 +1750,7 @@ bummer:
/*
* Remove a dead socket
*/
-void
+static void
svc_delete_socket(struct svc_sock *svsk)
{
struct svc_serv *serv;
@@ -1637,43 +1769,60 @@ svc_delete_socket(struct svc_sock *svsk)
if (!test_and_set_bit(SK_DETACHED, &svsk->sk_flags))
list_del_init(&svsk->sk_list);
- /*
+ /*
* We used to delete the svc_sock from whichever list
* it's sk_ready node was on, but we don't actually
* need to. This is because the only time we're called
* while still attached to a queue, the queue itself
* is about to be destroyed (in svc_destroy).
*/
- if (!test_and_set_bit(SK_DEAD, &svsk->sk_flags))
+ if (!test_and_set_bit(SK_DEAD, &svsk->sk_flags)) {
+ BUG_ON(atomic_read(&svsk->sk_inuse)<2);
+ atomic_dec(&svsk->sk_inuse);
if (test_bit(SK_TEMP, &svsk->sk_flags))
serv->sv_tmpcnt--;
+ }
- /* This atomic_inc should be needed - svc_delete_socket
- * should have the semantic of dropping a reference.
- * But it doesn't yet....
- */
- atomic_inc(&svsk->sk_inuse);
spin_unlock_bh(&serv->sv_lock);
+}
+
+void svc_close_socket(struct svc_sock *svsk)
+{
+ set_bit(SK_CLOSE, &svsk->sk_flags);
+ if (test_and_set_bit(SK_BUSY, &svsk->sk_flags))
+ /* someone else will have to effect the close */
+ return;
+
+ atomic_inc(&svsk->sk_inuse);
+ svc_delete_socket(svsk);
+ clear_bit(SK_BUSY, &svsk->sk_flags);
svc_sock_put(svsk);
}
-/*
- * Make a socket for nfsd and lockd
+/**
+ * svc_makesock - Make a socket for nfsd and lockd
+ * @serv: RPC server structure
+ * @protocol: transport protocol to use
+ * @port: port to use
+ * @flags: requested socket characteristics
+ *
*/
-int
-svc_makesock(struct svc_serv *serv, int protocol, unsigned short port)
+int svc_makesock(struct svc_serv *serv, int protocol, unsigned short port,
+ int flags)
{
- struct sockaddr_in sin;
+ struct sockaddr_in sin = {
+ .sin_family = AF_INET,
+ .sin_addr.s_addr = INADDR_ANY,
+ .sin_port = htons(port),
+ };
dprintk("svc: creating socket proto = %d\n", protocol);
- sin.sin_family = AF_INET;
- sin.sin_addr.s_addr = INADDR_ANY;
- sin.sin_port = htons(port);
- return svc_create_socket(serv, protocol, &sin);
+ return svc_create_socket(serv, protocol, (struct sockaddr *) &sin,
+ sizeof(sin), flags);
}
/*
- * Handle defer and revisit of requests
+ * Handle defer and revisit of requests
*/
static void svc_revisit(struct cache_deferred_req *dreq, int too_many)
@@ -1718,7 +1867,8 @@ svc_defer(struct cache_req *req)
dr->handle.owner = rqstp->rq_server;
dr->prot = rqstp->rq_prot;
- dr->addr = rqstp->rq_addr;
+ memcpy(&dr->addr, &rqstp->rq_addr, rqstp->rq_addrlen);
+ dr->addrlen = rqstp->rq_addrlen;
dr->daddr = rqstp->rq_daddr;
dr->argslen = rqstp->rq_arg.len >> 2;
memcpy(dr->args, rqstp->rq_arg.head[0].iov_base-skip, dr->argslen<<2);
@@ -1742,7 +1892,8 @@ static int svc_deferred_recv(struct svc_rqst *rqstp)
rqstp->rq_arg.page_len = 0;
rqstp->rq_arg.len = dr->argslen<<2;
rqstp->rq_prot = dr->prot;
- rqstp->rq_addr = dr->addr;
+ memcpy(&rqstp->rq_addr, &dr->addr, dr->addrlen);
+ rqstp->rq_addrlen = dr->addrlen;
rqstp->rq_daddr = dr->daddr;
rqstp->rq_respages = rqstp->rq_pages;
return dr->argslen<<2;
@@ -1752,7 +1903,7 @@ static int svc_deferred_recv(struct svc_rqst *rqstp)
static struct svc_deferred_req *svc_deferred_dequeue(struct svc_sock *svsk)
{
struct svc_deferred_req *dr = NULL;
-
+
if (!test_bit(SK_DEFERRED, &svsk->sk_flags))
return NULL;
spin_lock_bh(&svsk->sk_defer_lock);