diff options
Diffstat (limited to 'net/vmw_vsock')
-rw-r--r-- | net/vmw_vsock/af_vsock.c | 672 | ||||
-rw-r--r-- | net/vmw_vsock/hyperv_transport.c | 46 | ||||
-rw-r--r-- | net/vmw_vsock/virtio_transport.c | 238 | ||||
-rw-r--r-- | net/vmw_vsock/virtio_transport_common.c | 247 | ||||
-rw-r--r-- | net/vmw_vsock/vmci_transport.c | 25 | ||||
-rw-r--r-- | net/vmw_vsock/vmci_transport_notify.c | 10 | ||||
-rw-r--r-- | net/vmw_vsock/vmci_transport_notify_qstate.c | 12 | ||||
-rw-r--r-- | net/vmw_vsock/vsock_addr.c | 4 | ||||
-rw-r--r-- | net/vmw_vsock/vsock_loopback.c | 12 |
9 files changed, 891 insertions, 375 deletions
diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index a5f28708e0e7..884eca7f6743 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -85,6 +85,7 @@ * TCP_LISTEN - listening */ +#include <linux/compat.h> #include <linux/types.h> #include <linux/bitops.h> #include <linux/cred.h> @@ -333,7 +334,8 @@ void vsock_remove_sock(struct vsock_sock *vsk) } EXPORT_SYMBOL_GPL(vsock_remove_sock); -void vsock_for_each_connected_socket(void (*fn)(struct sock *sk)) +void vsock_for_each_connected_socket(struct vsock_transport *transport, + void (*fn)(struct sock *sk)) { int i; @@ -342,8 +344,12 @@ void vsock_for_each_connected_socket(void (*fn)(struct sock *sk)) for (i = 0; i < ARRAY_SIZE(vsock_connected_table); i++) { struct vsock_sock *vsk; list_for_each_entry(vsk, &vsock_connected_table[i], - connected_table) + connected_table) { + if (vsk->transport != transport) + continue; + fn(sk_vsock(vsk)); + } } spin_unlock_bh(&vsock_table_lock); @@ -415,13 +421,14 @@ static void vsock_deassign_transport(struct vsock_sock *vsk) /* Assign a transport to a socket and call the .init transport callback. * - * Note: for stream socket this must be called when vsk->remote_addr is set - * (e.g. during the connect() or when a connection request on a listener + * Note: for connection oriented socket this must be called when vsk->remote_addr + * is set (e.g. during the connect() or when a connection request on a listener * socket is received). * The vsk->remote_addr is used to decide which transport to use: * - remote CID == VMADDR_CID_LOCAL or g2h->local_cid or VMADDR_CID_HOST if * g2h is not loaded, will use local transport; - * - remote CID <= VMADDR_CID_HOST will use guest->host transport; + * - remote CID <= VMADDR_CID_HOST or h2g is not loaded or remote flags field + * includes VMADDR_FLAG_TO_HOST flag value, will use guest->host transport; * - remote CID > VMADDR_CID_HOST will use host->guest transport; */ int vsock_assign_transport(struct vsock_sock *vsk, struct vsock_sock *psk) @@ -429,16 +436,33 @@ int vsock_assign_transport(struct vsock_sock *vsk, struct vsock_sock *psk) const struct vsock_transport *new_transport; struct sock *sk = sk_vsock(vsk); unsigned int remote_cid = vsk->remote_addr.svm_cid; + __u8 remote_flags; int ret; + /* If the packet is coming with the source and destination CIDs higher + * than VMADDR_CID_HOST, then a vsock channel where all the packets are + * forwarded to the host should be established. Then the host will + * need to forward the packets to the guest. + * + * The flag is set on the (listen) receive path (psk is not NULL). On + * the connect path the flag can be set by the user space application. + */ + if (psk && vsk->local_addr.svm_cid > VMADDR_CID_HOST && + vsk->remote_addr.svm_cid > VMADDR_CID_HOST) + vsk->remote_addr.svm_flags |= VMADDR_FLAG_TO_HOST; + + remote_flags = vsk->remote_addr.svm_flags; + switch (sk->sk_type) { case SOCK_DGRAM: new_transport = transport_dgram; break; case SOCK_STREAM: + case SOCK_SEQPACKET: if (vsock_use_local_transport(remote_cid)) new_transport = transport_local; - else if (remote_cid <= VMADDR_CID_HOST) + else if (remote_cid <= VMADDR_CID_HOST || !transport_h2g || + (remote_flags & VMADDR_FLAG_TO_HOST)) new_transport = transport_g2h; else new_transport = transport_h2g; @@ -452,10 +476,10 @@ int vsock_assign_transport(struct vsock_sock *vsk, struct vsock_sock *psk) return 0; /* transport->release() must be called with sock lock acquired. - * This path can only be taken during vsock_stream_connect(), - * where we have already held the sock lock. - * In the other cases, this function is called on a new socket - * which is not assigned to any transport. + * This path can only be taken during vsock_connect(), where we + * have already held the sock lock. In the other cases, this + * function is called on a new socket which is not assigned to + * any transport. */ vsk->transport->release(vsk); vsock_deassign_transport(vsk); @@ -467,6 +491,14 @@ int vsock_assign_transport(struct vsock_sock *vsk, struct vsock_sock *psk) if (!new_transport || !try_module_get(new_transport->module)) return -ENODEV; + if (sk->sk_type == SOCK_SEQPACKET) { + if (!new_transport->seqpacket_allow || + !new_transport->seqpacket_allow(remote_cid)) { + module_put(new_transport->module); + return -ESOCKTNOSUPPORT; + } + } + ret = new_transport->init(vsk, psk); if (ret) { module_put(new_transport->module); @@ -587,8 +619,8 @@ out: /**** SOCKET OPERATIONS ****/ -static int __vsock_bind_stream(struct vsock_sock *vsk, - struct sockaddr_vm *addr) +static int __vsock_bind_connectible(struct vsock_sock *vsk, + struct sockaddr_vm *addr) { static u32 port; struct sockaddr_vm new_addr; @@ -632,9 +664,10 @@ static int __vsock_bind_stream(struct vsock_sock *vsk, vsock_addr_init(&vsk->local_addr, new_addr.svm_cid, new_addr.svm_port); - /* Remove stream sockets from the unbound list and add them to the hash - * table for easy lookup by its address. The unbound list is simply an - * extra entry at the end of the hash table, a trick used by AF_UNIX. + /* Remove connection oriented sockets from the unbound list and add them + * to the hash table for easy lookup by its address. The unbound list + * is simply an extra entry at the end of the hash table, a trick used + * by AF_UNIX. */ __vsock_remove_bound(vsk); __vsock_insert_bound(vsock_bound_sockets(&vsk->local_addr), vsk); @@ -667,8 +700,9 @@ static int __vsock_bind(struct sock *sk, struct sockaddr_vm *addr) switch (sk->sk_socket->type) { case SOCK_STREAM: + case SOCK_SEQPACKET: spin_lock_bh(&vsock_table_lock); - retval = __vsock_bind_stream(vsk, addr); + retval = __vsock_bind_connectible(vsk, addr); spin_unlock_bh(&vsock_table_lock); break; @@ -738,8 +772,9 @@ static struct sock *__vsock_create(struct net *net, vsk->buffer_size = psk->buffer_size; vsk->buffer_min_size = psk->buffer_min_size; vsk->buffer_max_size = psk->buffer_max_size; + security_sk_clone(parent, sk); } else { - vsk->trusted = capable(CAP_NET_ADMIN); + vsk->trusted = ns_capable_noaudit(&init_user_ns, CAP_NET_ADMIN); vsk->owner = get_current_cred(); vsk->connect_timeout = VSOCK_DEFAULT_CONNECT_TIMEOUT; vsk->buffer_size = VSOCK_DEFAULT_BUFFER_SIZE; @@ -750,6 +785,11 @@ static struct sock *__vsock_create(struct net *net, return sk; } +static bool sock_type_connectible(u16 type) +{ + return (type == SOCK_STREAM) || (type == SOCK_SEQPACKET); +} + static void __vsock_release(struct sock *sk, int level) { if (sk) { @@ -768,7 +808,7 @@ static void __vsock_release(struct sock *sk, int level) if (vsk->transport) vsk->transport->release(vsk); - else if (sk->sk_type == SOCK_STREAM) + else if (sock_type_connectible(sk->sk_type)) vsock_remove_sock(vsk); sock_orphan(sk); @@ -826,12 +866,32 @@ s64 vsock_stream_has_data(struct vsock_sock *vsk) } EXPORT_SYMBOL_GPL(vsock_stream_has_data); +static s64 vsock_connectible_has_data(struct vsock_sock *vsk) +{ + struct sock *sk = sk_vsock(vsk); + + if (sk->sk_type == SOCK_SEQPACKET) + return vsk->transport->seqpacket_has_data(vsk); + else + return vsock_stream_has_data(vsk); +} + s64 vsock_stream_has_space(struct vsock_sock *vsk) { return vsk->transport->stream_has_space(vsk); } EXPORT_SYMBOL_GPL(vsock_stream_has_space); +void vsock_data_ready(struct sock *sk) +{ + struct vsock_sock *vsk = vsock_sk(sk); + + if (vsock_stream_has_data(vsk) >= sk->sk_rcvlowat || + sock_flag(sk, SOCK_DONE)) + sk->sk_data_ready(sk); +} +EXPORT_SYMBOL_GPL(vsock_data_ready); + static int vsock_release(struct socket *sock) { __vsock_release(sock->sk, 0); @@ -919,17 +979,19 @@ static int vsock_shutdown(struct socket *sock, int mode) if ((mode & ~SHUTDOWN_MASK) || !mode) return -EINVAL; - /* If this is a STREAM socket and it is not connected then bail out - * immediately. If it is a DGRAM socket then we must first kick the - * socket so that it wakes up from any sleeping calls, for example - * recv(), and then afterwards return the error. + /* If this is a connection oriented socket and it is not connected then + * bail out immediately. If it is a DGRAM socket then we must first + * kick the socket so that it wakes up from any sleeping calls, for + * example recv(), and then afterwards return the error. */ sk = sock->sk; + + lock_sock(sk); if (sock->state == SS_UNCONNECTED) { err = -ENOTCONN; - if (sk->sk_type == SOCK_STREAM) - return err; + if (sock_type_connectible(sk->sk_type)) + goto out; } else { sock->state = SS_DISCONNECTING; err = 0; @@ -938,17 +1000,17 @@ static int vsock_shutdown(struct socket *sock, int mode) /* Receive and send shutdowns are treated alike. */ mode = mode & (RCV_SHUTDOWN | SEND_SHUTDOWN); if (mode) { - lock_sock(sk); sk->sk_shutdown |= mode; sk->sk_state_change(sk); - release_sock(sk); - if (sk->sk_type == SOCK_STREAM) { + if (sock_type_connectible(sk->sk_type)) { sock_reset_flag(sk, SOCK_DONE); vsock_send_shutdown(sk, mode); } } +out: + release_sock(sk); return err; } @@ -996,10 +1058,13 @@ static __poll_t vsock_poll(struct file *file, struct socket *sock, if (!(sk->sk_shutdown & SEND_SHUTDOWN)) mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND; - } else if (sock->type == SOCK_STREAM) { - const struct vsock_transport *transport = vsk->transport; + } else if (sock_type_connectible(sk->sk_type)) { + const struct vsock_transport *transport; + lock_sock(sk); + transport = vsk->transport; + /* Listening sockets that have connections in their accept * queue can be read. */ @@ -1011,8 +1076,9 @@ static __poll_t vsock_poll(struct file *file, struct socket *sock, if (transport && transport->stream_is_active(vsk) && !(sk->sk_shutdown & RCV_SHUTDOWN)) { bool data_ready_now = false; + int target = sock_rcvlowat(sk, 0, INT_MAX); int ret = transport->notify_poll_in( - vsk, 1, &data_ready_now); + vsk, target, &data_ready_now); if (ret < 0) { mask |= EPOLLERR; } else { @@ -1032,7 +1098,7 @@ static __poll_t vsock_poll(struct file *file, struct socket *sock, } /* Connected sockets that can produce data can be written. */ - if (sk->sk_state == TCP_ESTABLISHED) { + if (transport && sk->sk_state == TCP_ESTABLISHED) { if (!(sk->sk_shutdown & SEND_SHUTDOWN)) { bool space_avail_now = false; int ret = transport->notify_poll_out( @@ -1082,10 +1148,11 @@ static int vsock_dgram_sendmsg(struct socket *sock, struct msghdr *msg, err = 0; sk = sock->sk; vsk = vsock_sk(sk); - transport = vsk->transport; lock_sock(sk); + transport = vsk->transport; + err = vsock_auto_bind(vsk); if (err) goto out; @@ -1202,8 +1269,6 @@ static const struct proto_ops vsock_dgram_ops = { .ioctl = sock_no_ioctl, .listen = sock_no_listen, .shutdown = vsock_shutdown, - .setsockopt = sock_no_setsockopt, - .getsockopt = sock_no_getsockopt, .sendmsg = vsock_dgram_sendmsg, .recvmsg = vsock_dgram_recvmsg, .mmap = sock_no_mmap, @@ -1214,7 +1279,7 @@ static int vsock_transport_cancel_pkt(struct vsock_sock *vsk) { const struct vsock_transport *transport = vsk->transport; - if (!transport->cancel_pkt) + if (!transport || !transport->cancel_pkt) return -EOPNOTSUPP; return transport->cancel_pkt(vsk); @@ -1224,7 +1289,6 @@ static void vsock_connect_timeout(struct work_struct *work) { struct sock *sk; struct vsock_sock *vsk; - int cancel = 0; vsk = container_of(work, struct vsock_sock, connect_work.work); sk = sk_vsock(vsk); @@ -1233,19 +1297,18 @@ static void vsock_connect_timeout(struct work_struct *work) if (sk->sk_state == TCP_SYN_SENT && (sk->sk_shutdown != SHUTDOWN_MASK)) { sk->sk_state = TCP_CLOSE; + sk->sk_socket->state = SS_UNCONNECTED; sk->sk_err = ETIMEDOUT; - sk->sk_error_report(sk); - cancel = 1; + sk_error_report(sk); + vsock_transport_cancel_pkt(vsk); } release_sock(sk); - if (cancel) - vsock_transport_cancel_pkt(vsk); sock_put(sk); } -static int vsock_stream_connect(struct socket *sock, struct sockaddr *addr, - int addr_len, int flags) +static int vsock_connect(struct socket *sock, struct sockaddr *addr, + int addr_len, int flags) { int err; struct sock *sk; @@ -1277,6 +1340,8 @@ static int vsock_stream_connect(struct socket *sock, struct sockaddr *addr, * non-blocking call. */ err = -EALREADY; + if (flags & O_NONBLOCK) + goto out; break; default: if ((sk->sk_state == TCP_LISTEN) || @@ -1338,7 +1403,14 @@ static int vsock_stream_connect(struct socket *sock, struct sockaddr *addr, * timeout fires. */ sock_hold(sk); - schedule_delayed_work(&vsk->connect_work, timeout); + + /* If the timeout function is already scheduled, + * reschedule it, then ungrab the socket refcount to + * keep it balanced. + */ + if (mod_delayed_work(system_wq, &vsk->connect_work, + timeout)) + sock_put(sk); /* Skip ahead to preserve error code set above. */ goto out_wait; @@ -1350,9 +1422,10 @@ static int vsock_stream_connect(struct socket *sock, struct sockaddr *addr, if (signal_pending(current)) { err = sock_intr_errno(timeout); - sk->sk_state = TCP_CLOSE; + sk->sk_state = sk->sk_state == TCP_ESTABLISHED ? TCP_CLOSING : TCP_CLOSE; sock->state = SS_UNCONNECTED; vsock_transport_cancel_pkt(vsk); + vsock_remove_connected(vsk); goto out_wait; } else if (timeout == 0) { err = -ETIMEDOUT; @@ -1395,7 +1468,7 @@ static int vsock_accept(struct socket *sock, struct socket *newsock, int flags, lock_sock(listener); - if (sock->type != SOCK_STREAM) { + if (!sock_type_connectible(sock->type)) { err = -EOPNOTSUPP; goto out; } @@ -1408,7 +1481,7 @@ static int vsock_accept(struct socket *sock, struct socket *newsock, int flags, /* Wait for children sockets to appear; these are the new sockets * created upon connection establishment. */ - timeout = sock_sndtimeo(listener, flags & O_NONBLOCK); + timeout = sock_rcvtimeo(listener, flags & O_NONBLOCK); prepare_to_wait(sk_sleep(listener), &wait, TASK_INTERRUPTIBLE); while ((connected = vsock_dequeue_accept(listener)) == NULL && @@ -1472,7 +1545,7 @@ static int vsock_listen(struct socket *sock, int backlog) lock_sock(sk); - if (sock->type != SOCK_STREAM) { + if (!sock_type_connectible(sk->sk_type)) { err = -EOPNOTSUPP; goto out; } @@ -1516,11 +1589,11 @@ static void vsock_update_buffer_size(struct vsock_sock *vsk, vsk->buffer_size = val; } -static int vsock_stream_setsockopt(struct socket *sock, - int level, - int optname, - char __user *optval, - unsigned int optlen) +static int vsock_connectible_setsockopt(struct socket *sock, + int level, + int optname, + sockptr_t optval, + unsigned int optlen) { int err; struct sock *sk; @@ -1537,7 +1610,7 @@ static int vsock_stream_setsockopt(struct socket *sock, err = -EINVAL; \ goto exit; \ } \ - if (copy_from_user(&_v, optval, sizeof(_v)) != 0) { \ + if (copy_from_sockptr(&_v, optval, sizeof(_v)) != 0) { \ err = -EFAULT; \ goto exit; \ } \ @@ -1546,10 +1619,11 @@ static int vsock_stream_setsockopt(struct socket *sock, err = 0; sk = sock->sk; vsk = vsock_sk(sk); - transport = vsk->transport; lock_sock(sk); + transport = vsk->transport; + switch (optname) { case SO_VM_SOCKETS_BUFFER_SIZE: COPY_IN(val); @@ -1568,13 +1642,18 @@ static int vsock_stream_setsockopt(struct socket *sock, vsock_update_buffer_size(vsk, transport, vsk->buffer_size); break; - case SO_VM_SOCKETS_CONNECT_TIMEOUT: { - struct __kernel_old_timeval tv; - COPY_IN(tv); + case SO_VM_SOCKETS_CONNECT_TIMEOUT_NEW: + case SO_VM_SOCKETS_CONNECT_TIMEOUT_OLD: { + struct __kernel_sock_timeval tv; + + err = sock_copy_user_timeval(&tv, optval, optlen, + optname == SO_VM_SOCKETS_CONNECT_TIMEOUT_OLD); + if (err) + break; if (tv.tv_sec >= 0 && tv.tv_usec < USEC_PER_SEC && tv.tv_sec < (MAX_SCHEDULE_TIMEOUT / HZ - 1)) { vsk->connect_timeout = tv.tv_sec * HZ + - DIV_ROUND_UP(tv.tv_usec, (1000000 / HZ)); + DIV_ROUND_UP((unsigned long)tv.tv_usec, (USEC_PER_SEC / HZ)); if (vsk->connect_timeout == 0) vsk->connect_timeout = VSOCK_DEFAULT_CONNECT_TIMEOUT; @@ -1597,79 +1676,70 @@ exit: return err; } -static int vsock_stream_getsockopt(struct socket *sock, - int level, int optname, - char __user *optval, - int __user *optlen) +static int vsock_connectible_getsockopt(struct socket *sock, + int level, int optname, + char __user *optval, + int __user *optlen) { - int err; + struct sock *sk = sock->sk; + struct vsock_sock *vsk = vsock_sk(sk); + + union { + u64 val64; + struct old_timeval32 tm32; + struct __kernel_old_timeval tm; + struct __kernel_sock_timeval stm; + } v; + + int lv = sizeof(v.val64); int len; - struct sock *sk; - struct vsock_sock *vsk; - u64 val; if (level != AF_VSOCK) return -ENOPROTOOPT; - err = get_user(len, optlen); - if (err != 0) - return err; - -#define COPY_OUT(_v) \ - do { \ - if (len < sizeof(_v)) \ - return -EINVAL; \ - \ - len = sizeof(_v); \ - if (copy_to_user(optval, &_v, len) != 0) \ - return -EFAULT; \ - \ - } while (0) + if (get_user(len, optlen)) + return -EFAULT; - err = 0; - sk = sock->sk; - vsk = vsock_sk(sk); + memset(&v, 0, sizeof(v)); switch (optname) { case SO_VM_SOCKETS_BUFFER_SIZE: - val = vsk->buffer_size; - COPY_OUT(val); + v.val64 = vsk->buffer_size; break; case SO_VM_SOCKETS_BUFFER_MAX_SIZE: - val = vsk->buffer_max_size; - COPY_OUT(val); + v.val64 = vsk->buffer_max_size; break; case SO_VM_SOCKETS_BUFFER_MIN_SIZE: - val = vsk->buffer_min_size; - COPY_OUT(val); + v.val64 = vsk->buffer_min_size; break; - case SO_VM_SOCKETS_CONNECT_TIMEOUT: { - struct __kernel_old_timeval tv; - tv.tv_sec = vsk->connect_timeout / HZ; - tv.tv_usec = - (vsk->connect_timeout - - tv.tv_sec * HZ) * (1000000 / HZ); - COPY_OUT(tv); + case SO_VM_SOCKETS_CONNECT_TIMEOUT_NEW: + case SO_VM_SOCKETS_CONNECT_TIMEOUT_OLD: + lv = sock_get_timeout(vsk->connect_timeout, &v, + optname == SO_VM_SOCKETS_CONNECT_TIMEOUT_OLD); break; - } + default: return -ENOPROTOOPT; } - err = put_user(len, optlen); - if (err != 0) + if (len < lv) + return -EINVAL; + if (len > lv) + len = lv; + if (copy_to_user(optval, &v, len)) return -EFAULT; -#undef COPY_OUT + if (put_user(len, optlen)) + return -EFAULT; return 0; } -static int vsock_stream_sendmsg(struct socket *sock, struct msghdr *msg, - size_t len) +static int vsock_connectible_sendmsg(struct socket *sock, struct msghdr *msg, + size_t len) { struct sock *sk; struct vsock_sock *vsk; @@ -1682,7 +1752,6 @@ static int vsock_stream_sendmsg(struct socket *sock, struct msghdr *msg, sk = sock->sk; vsk = vsock_sk(sk); - transport = vsk->transport; total_written = 0; err = 0; @@ -1691,7 +1760,11 @@ static int vsock_stream_sendmsg(struct socket *sock, struct msghdr *msg, lock_sock(sk); - /* Callers should not provide a destination with stream sockets. */ + transport = vsk->transport; + + /* Callers should not provide a destination with connection oriented + * sockets. + */ if (msg->msg_namelen) { err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP; goto out; @@ -1782,9 +1855,13 @@ static int vsock_stream_sendmsg(struct socket *sock, struct msghdr *msg, * responsibility to check how many bytes we were able to send. */ - written = transport->stream_enqueue( - vsk, msg, - len - total_written); + if (sk->sk_type == SOCK_SEQPACKET) { + written = transport->seqpacket_enqueue(vsk, + msg, len - total_written); + } else { + written = transport->stream_enqueue(vsk, + msg, len - total_written); + } if (written < 0) { err = -ENOMEM; goto out_err; @@ -1800,71 +1877,101 @@ static int vsock_stream_sendmsg(struct socket *sock, struct msghdr *msg, } out_err: - if (total_written > 0) - err = total_written; + if (total_written > 0) { + /* Return number of written bytes only if: + * 1) SOCK_STREAM socket. + * 2) SOCK_SEQPACKET socket when whole buffer is sent. + */ + if (sk->sk_type == SOCK_STREAM || total_written == len) + err = total_written; + } out: release_sock(sk); return err; } - -static int -vsock_stream_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, - int flags) +static int vsock_connectible_wait_data(struct sock *sk, + struct wait_queue_entry *wait, + long timeout, + struct vsock_transport_recv_notify_data *recv_data, + size_t target) { - struct sock *sk; - struct vsock_sock *vsk; const struct vsock_transport *transport; + struct vsock_sock *vsk; + s64 data; int err; - size_t target; - ssize_t copied; - long timeout; - struct vsock_transport_recv_notify_data recv_data; - DEFINE_WAIT(wait); - - sk = sock->sk; vsk = vsock_sk(sk); - transport = vsk->transport; err = 0; + transport = vsk->transport; - lock_sock(sk); + while (1) { + prepare_to_wait(sk_sleep(sk), wait, TASK_INTERRUPTIBLE); + data = vsock_connectible_has_data(vsk); + if (data != 0) + break; + + if (sk->sk_err != 0 || + (sk->sk_shutdown & RCV_SHUTDOWN) || + (vsk->peer_shutdown & SEND_SHUTDOWN)) { + break; + } - if (!transport || sk->sk_state != TCP_ESTABLISHED) { - /* Recvmsg is supposed to return 0 if a peer performs an - * orderly shutdown. Differentiate between that case and when a - * peer has not connected or a local shutdown occured with the - * SOCK_DONE flag. - */ - if (sock_flag(sk, SOCK_DONE)) - err = 0; - else - err = -ENOTCONN; + /* Don't wait for non-blocking sockets. */ + if (timeout == 0) { + err = -EAGAIN; + break; + } - goto out; - } + if (recv_data) { + err = transport->notify_recv_pre_block(vsk, target, recv_data); + if (err < 0) + break; + } - if (flags & MSG_OOB) { - err = -EOPNOTSUPP; - goto out; - } + release_sock(sk); + timeout = schedule_timeout(timeout); + lock_sock(sk); - /* We don't check peer_shutdown flag here since peer may actually shut - * down, but there can be data in the queue that a local socket can - * receive. - */ - if (sk->sk_shutdown & RCV_SHUTDOWN) { - err = 0; - goto out; + if (signal_pending(current)) { + err = sock_intr_errno(timeout); + break; + } else if (timeout == 0) { + err = -EAGAIN; + break; + } } - /* It is valid on Linux to pass in a zero-length receive buffer. This - * is not an error. We may as well bail out now. + finish_wait(sk_sleep(sk), wait); + + if (err) + return err; + + /* Internal transport error when checking for available + * data. XXX This should be changed to a connection + * reset in a later change. */ - if (!len) { - err = 0; - goto out; - } + if (data < 0) + return -ENOMEM; + + return data; +} + +static int __vsock_stream_recvmsg(struct sock *sk, struct msghdr *msg, + size_t len, int flags) +{ + struct vsock_transport_recv_notify_data recv_data; + const struct vsock_transport *transport; + struct vsock_sock *vsk; + ssize_t copied; + size_t target; + long timeout; + int err; + + DEFINE_WAIT(wait); + + vsk = vsock_sk(sk); + transport = vsk->transport; /* We must not copy less than target bytes into the user's buffer * before returning successfully, so we wait for the consume queue to @@ -1886,106 +1993,187 @@ vsock_stream_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, while (1) { - s64 ready; + ssize_t read; - prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); - ready = vsock_stream_has_data(vsk); + err = vsock_connectible_wait_data(sk, &wait, timeout, + &recv_data, target); + if (err <= 0) + break; - if (ready == 0) { - if (sk->sk_err != 0 || - (sk->sk_shutdown & RCV_SHUTDOWN) || - (vsk->peer_shutdown & SEND_SHUTDOWN)) { - finish_wait(sk_sleep(sk), &wait); - break; - } - /* Don't wait for non-blocking sockets. */ - if (timeout == 0) { - err = -EAGAIN; - finish_wait(sk_sleep(sk), &wait); - break; - } + err = transport->notify_recv_pre_dequeue(vsk, target, + &recv_data); + if (err < 0) + break; - err = transport->notify_recv_pre_block( - vsk, target, &recv_data); - if (err < 0) { - finish_wait(sk_sleep(sk), &wait); - break; - } - release_sock(sk); - timeout = schedule_timeout(timeout); - lock_sock(sk); + read = transport->stream_dequeue(vsk, msg, len - copied, flags); + if (read < 0) { + err = -ENOMEM; + break; + } - if (signal_pending(current)) { - err = sock_intr_errno(timeout); - finish_wait(sk_sleep(sk), &wait); - break; - } else if (timeout == 0) { - err = -EAGAIN; - finish_wait(sk_sleep(sk), &wait); - break; - } - } else { - ssize_t read; + copied += read; - finish_wait(sk_sleep(sk), &wait); + err = transport->notify_recv_post_dequeue(vsk, target, read, + !(flags & MSG_PEEK), &recv_data); + if (err < 0) + goto out; - if (ready < 0) { - /* Invalid queue pair content. XXX This should - * be changed to a connection reset in a later - * change. - */ + if (read >= target || flags & MSG_PEEK) + break; - err = -ENOMEM; - goto out; - } + target -= read; + } - err = transport->notify_recv_pre_dequeue( - vsk, target, &recv_data); - if (err < 0) - break; + if (sk->sk_err) + err = -sk->sk_err; + else if (sk->sk_shutdown & RCV_SHUTDOWN) + err = 0; - read = transport->stream_dequeue( - vsk, msg, - len - copied, flags); - if (read < 0) { - err = -ENOMEM; - break; - } + if (copied > 0) + err = copied; - copied += read; +out: + return err; +} - err = transport->notify_recv_post_dequeue( - vsk, target, read, - !(flags & MSG_PEEK), &recv_data); - if (err < 0) - goto out; +static int __vsock_seqpacket_recvmsg(struct sock *sk, struct msghdr *msg, + size_t len, int flags) +{ + const struct vsock_transport *transport; + struct vsock_sock *vsk; + ssize_t msg_len; + long timeout; + int err = 0; + DEFINE_WAIT(wait); - if (read >= target || flags & MSG_PEEK) - break; + vsk = vsock_sk(sk); + transport = vsk->transport; - target -= read; - } + timeout = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); + + err = vsock_connectible_wait_data(sk, &wait, timeout, NULL, 0); + if (err <= 0) + goto out; + + msg_len = transport->seqpacket_dequeue(vsk, msg, flags); + + if (msg_len < 0) { + err = -ENOMEM; + goto out; } - if (sk->sk_err) + if (sk->sk_err) { err = -sk->sk_err; - else if (sk->sk_shutdown & RCV_SHUTDOWN) + } else if (sk->sk_shutdown & RCV_SHUTDOWN) { err = 0; + } else { + /* User sets MSG_TRUNC, so return real length of + * packet. + */ + if (flags & MSG_TRUNC) + err = msg_len; + else + err = len - msg_data_left(msg); - if (copied > 0) - err = copied; + /* Always set MSG_TRUNC if real length of packet is + * bigger than user's buffer. + */ + if (msg_len > len) + msg->msg_flags |= MSG_TRUNC; + } + +out: + return err; +} + +static int +vsock_connectible_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, + int flags) +{ + struct sock *sk; + struct vsock_sock *vsk; + const struct vsock_transport *transport; + int err; + + sk = sock->sk; + vsk = vsock_sk(sk); + err = 0; + + lock_sock(sk); + + transport = vsk->transport; + + if (!transport || sk->sk_state != TCP_ESTABLISHED) { + /* Recvmsg is supposed to return 0 if a peer performs an + * orderly shutdown. Differentiate between that case and when a + * peer has not connected or a local shutdown occurred with the + * SOCK_DONE flag. + */ + if (sock_flag(sk, SOCK_DONE)) + err = 0; + else + err = -ENOTCONN; + + goto out; + } + + if (flags & MSG_OOB) { + err = -EOPNOTSUPP; + goto out; + } + + /* We don't check peer_shutdown flag here since peer may actually shut + * down, but there can be data in the queue that a local socket can + * receive. + */ + if (sk->sk_shutdown & RCV_SHUTDOWN) { + err = 0; + goto out; + } + + /* It is valid on Linux to pass in a zero-length receive buffer. This + * is not an error. We may as well bail out now. + */ + if (!len) { + err = 0; + goto out; + } + + if (sk->sk_type == SOCK_STREAM) + err = __vsock_stream_recvmsg(sk, msg, len, flags); + else + err = __vsock_seqpacket_recvmsg(sk, msg, len, flags); out: release_sock(sk); return err; } +static int vsock_set_rcvlowat(struct sock *sk, int val) +{ + const struct vsock_transport *transport; + struct vsock_sock *vsk; + + vsk = vsock_sk(sk); + + if (val > vsk->buffer_size) + return -EINVAL; + + transport = vsk->transport; + + if (transport && transport->set_rcvlowat) + return transport->set_rcvlowat(vsk, val); + + WRITE_ONCE(sk->sk_rcvlowat, val ? : 1); + return 0; +} + static const struct proto_ops vsock_stream_ops = { .family = PF_VSOCK, .owner = THIS_MODULE, .release = vsock_release, .bind = vsock_bind, - .connect = vsock_stream_connect, + .connect = vsock_connect, .socketpair = sock_no_socketpair, .accept = vsock_accept, .getname = vsock_getname, @@ -1993,10 +2181,32 @@ static const struct proto_ops vsock_stream_ops = { .ioctl = sock_no_ioctl, .listen = vsock_listen, .shutdown = vsock_shutdown, - .setsockopt = vsock_stream_setsockopt, - .getsockopt = vsock_stream_getsockopt, - .sendmsg = vsock_stream_sendmsg, - .recvmsg = vsock_stream_recvmsg, + .setsockopt = vsock_connectible_setsockopt, + .getsockopt = vsock_connectible_getsockopt, + .sendmsg = vsock_connectible_sendmsg, + .recvmsg = vsock_connectible_recvmsg, + .mmap = sock_no_mmap, + .sendpage = sock_no_sendpage, + .set_rcvlowat = vsock_set_rcvlowat, +}; + +static const struct proto_ops vsock_seqpacket_ops = { + .family = PF_VSOCK, + .owner = THIS_MODULE, + .release = vsock_release, + .bind = vsock_bind, + .connect = vsock_connect, + .socketpair = sock_no_socketpair, + .accept = vsock_accept, + .getname = vsock_getname, + .poll = vsock_poll, + .ioctl = sock_no_ioctl, + .listen = vsock_listen, + .shutdown = vsock_shutdown, + .setsockopt = vsock_connectible_setsockopt, + .getsockopt = vsock_connectible_getsockopt, + .sendmsg = vsock_connectible_sendmsg, + .recvmsg = vsock_connectible_recvmsg, .mmap = sock_no_mmap, .sendpage = sock_no_sendpage, }; @@ -2021,6 +2231,9 @@ static int vsock_create(struct net *net, struct socket *sock, case SOCK_STREAM: sock->ops = &vsock_stream_ops; break; + case SOCK_SEQPACKET: + sock->ops = &vsock_seqpacket_ops; + break; default: return -ESOCKTNOSUPPORT; } @@ -2074,8 +2287,7 @@ static long vsock_dev_do_ioctl(struct file *filp, break; default: - pr_err("Unknown ioctl %d\n", cmd); - retval = -EINVAL; + retval = -ENOIOCTLCMD; } return retval; diff --git a/net/vmw_vsock/hyperv_transport.c b/net/vmw_vsock/hyperv_transport.c index 630b851f8150..59c3e2697069 100644 --- a/net/vmw_vsock/hyperv_transport.c +++ b/net/vmw_vsock/hyperv_transport.c @@ -78,6 +78,9 @@ struct hvs_send_buf { ALIGN((payload_len), 8) + \ VMBUS_PKT_TRAILER_SIZE) +/* Upper bound on the size of a VMbus packet for hv_sock */ +#define HVS_MAX_PKT_SIZE HVS_PKT_LEN(HVS_MTU_SIZE) + union hvs_service_id { guid_t srv_id; @@ -225,14 +228,20 @@ static size_t hvs_channel_writable_bytes(struct vmbus_channel *chan) return round_down(ret, 8); } +static int __hvs_send_data(struct vmbus_channel *chan, + struct vmpipe_proto_header *hdr, + size_t to_write) +{ + hdr->pkt_type = 1; + hdr->data_size = to_write; + return vmbus_sendpacket(chan, hdr, sizeof(*hdr) + to_write, + 0, VM_PKT_DATA_INBAND, 0); +} + static int hvs_send_data(struct vmbus_channel *chan, struct hvs_send_buf *send_buf, size_t to_write) { - send_buf->hdr.pkt_type = 1; - send_buf->hdr.data_size = to_write; - return vmbus_sendpacket(chan, &send_buf->hdr, - sizeof(send_buf->hdr) + to_write, - 0, VM_PKT_DATA_INBAND, 0); + return __hvs_send_data(chan, &send_buf->hdr, to_write); } static void hvs_channel_cb(void *ctx) @@ -372,6 +381,8 @@ static void hvs_open_connection(struct vmbus_channel *chan) rcvbuf = ALIGN(rcvbuf, HV_HYP_PAGE_SIZE); } + chan->max_pkt_size = HVS_MAX_PKT_SIZE; + ret = vmbus_open(chan, sndbuf, rcvbuf, NULL, 0, hvs_channel_cb, conn_from_host ? new : sk); if (ret != 0) { @@ -468,20 +479,16 @@ static void hvs_shutdown_lock_held(struct hvsock *hvs, int mode) return; /* It can't fail: see hvs_channel_writable_bytes(). */ - (void)hvs_send_data(hvs->chan, (struct hvs_send_buf *)&hdr, 0); + (void)__hvs_send_data(hvs->chan, &hdr, 0); hvs->fin_sent = true; } static int hvs_shutdown(struct vsock_sock *vsk, int mode) { - struct sock *sk = sk_vsock(vsk); - if (!(mode & SEND_SHUTDOWN)) return 0; - lock_sock(sk); hvs_shutdown_lock_held(vsk->trans, mode); - release_sock(sk); return 0; } @@ -570,12 +577,18 @@ static bool hvs_dgram_allow(u32 cid, u32 port) static int hvs_update_recv_data(struct hvsock *hvs) { struct hvs_recv_buf *recv_buf; - u32 payload_len; + u32 pkt_len, payload_len; + + pkt_len = hv_pkt_len(hvs->recv_desc); + + if (pkt_len < HVS_HEADER_LEN) + return -EIO; recv_buf = (struct hvs_recv_buf *)(hvs->recv_desc + 1); payload_len = recv_buf->hdr.data_size; - if (payload_len > HVS_MTU_SIZE) + if (payload_len > pkt_len - HVS_HEADER_LEN || + payload_len > HVS_MTU_SIZE) return -EIO; if (payload_len == 0) @@ -601,6 +614,8 @@ static ssize_t hvs_stream_dequeue(struct vsock_sock *vsk, struct msghdr *msg, if (need_refill) { hvs->recv_desc = hv_pkt_iter_first(hvs->chan); + if (!hvs->recv_desc) + return -ENOBUFS; ret = hvs_update_recv_data(hvs); if (ret) return ret; @@ -800,6 +815,12 @@ int hvs_notify_send_post_enqueue(struct vsock_sock *vsk, ssize_t written, return 0; } +static +int hvs_set_rcvlowat(struct vsock_sock *vsk, int val) +{ + return -EOPNOTSUPP; +} + static struct vsock_transport hvs_transport = { .module = THIS_MODULE, @@ -835,6 +856,7 @@ static struct vsock_transport hvs_transport = { .notify_send_pre_enqueue = hvs_notify_send_pre_enqueue, .notify_send_post_enqueue = hvs_notify_send_post_enqueue, + .set_rcvlowat = hvs_set_rcvlowat }; static bool hvs_check_transport(struct vsock_sock *vsk) diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c index dfbaf6bd8b1c..ad64f403536a 100644 --- a/net/vmw_vsock/virtio_transport.c +++ b/net/vmw_vsock/virtio_transport.c @@ -22,8 +22,9 @@ #include <net/af_vsock.h> static struct workqueue_struct *virtio_vsock_workqueue; -static struct virtio_vsock *the_virtio_vsock; +static struct virtio_vsock __rcu *the_virtio_vsock; static DEFINE_MUTEX(the_virtio_vsock_mutex); /* protects the_virtio_vsock */ +static struct virtio_transport virtio_transport; /* forward declaration */ struct virtio_vsock { struct virtio_device *vdev; @@ -62,6 +63,7 @@ struct virtio_vsock { struct virtio_vsock_event event_list[8]; u32 guest_cid; + bool seqpacket_allow; }; static u32 virtio_transport_get_local_cid(void) @@ -356,11 +358,14 @@ static void virtio_vsock_event_fill(struct virtio_vsock *vsock) static void virtio_vsock_reset_sock(struct sock *sk) { - lock_sock(sk); + /* vmci_transport.c doesn't take sk_lock here either. At least we're + * under vsock_table_lock so the sock cannot disappear while we're + * executing. + */ + sk->sk_state = TCP_CLOSE; sk->sk_err = ECONNRESET; - sk->sk_error_report(sk); - release_sock(sk); + sk_error_report(sk); } static void virtio_vsock_update_guest_cid(struct virtio_vsock *vsock) @@ -380,7 +385,8 @@ static void virtio_vsock_event_handle(struct virtio_vsock *vsock, switch (le32_to_cpu(event->id)) { case VIRTIO_VSOCK_EVENT_TRANSPORT_RESET: virtio_vsock_update_guest_cid(vsock); - vsock_for_each_connected_socket(virtio_vsock_reset_sock); + vsock_for_each_connected_socket(&virtio_transport.transport, + virtio_vsock_reset_sock); break; } } @@ -443,6 +449,8 @@ static void virtio_vsock_rx_done(struct virtqueue *vq) queue_work(virtio_vsock_workqueue, &vsock->rx_work); } +static bool virtio_transport_seqpacket_allow(u32 remote_cid); + static struct virtio_transport virtio_transport = { .transport = { .module = THIS_MODULE, @@ -469,6 +477,11 @@ static struct virtio_transport virtio_transport = { .stream_is_active = virtio_transport_stream_is_active, .stream_allow = virtio_transport_stream_allow, + .seqpacket_dequeue = virtio_transport_seqpacket_dequeue, + .seqpacket_enqueue = virtio_transport_seqpacket_enqueue, + .seqpacket_allow = virtio_transport_seqpacket_allow, + .seqpacket_has_data = virtio_transport_seqpacket_has_data, + .notify_poll_in = virtio_transport_notify_poll_in, .notify_poll_out = virtio_transport_notify_poll_out, .notify_recv_init = virtio_transport_notify_recv_init, @@ -485,6 +498,21 @@ static struct virtio_transport virtio_transport = { .send_pkt = virtio_transport_send_pkt, }; +static bool virtio_transport_seqpacket_allow(u32 remote_cid) +{ + struct virtio_vsock *vsock; + bool seqpacket_allow; + + seqpacket_allow = false; + rcu_read_lock(); + vsock = rcu_dereference(the_virtio_vsock); + if (vsock) + seqpacket_allow = vsock->seqpacket_allow; + rcu_read_unlock(); + + return seqpacket_allow; +} + static void virtio_transport_rx_work(struct work_struct *work) { struct virtio_vsock *vsock = @@ -538,61 +566,29 @@ out: mutex_unlock(&vsock->rx_lock); } -static int virtio_vsock_probe(struct virtio_device *vdev) +static int virtio_vsock_vqs_init(struct virtio_vsock *vsock) { - vq_callback_t *callbacks[] = { - virtio_vsock_rx_done, - virtio_vsock_tx_done, - virtio_vsock_event_done, - }; + struct virtio_device *vdev = vsock->vdev; static const char * const names[] = { "rx", "tx", "event", }; - struct virtio_vsock *vsock = NULL; + vq_callback_t *callbacks[] = { + virtio_vsock_rx_done, + virtio_vsock_tx_done, + virtio_vsock_event_done, + }; int ret; - ret = mutex_lock_interruptible(&the_virtio_vsock_mutex); - if (ret) - return ret; - - /* Only one virtio-vsock device per guest is supported */ - if (rcu_dereference_protected(the_virtio_vsock, - lockdep_is_held(&the_virtio_vsock_mutex))) { - ret = -EBUSY; - goto out; - } - - vsock = kzalloc(sizeof(*vsock), GFP_KERNEL); - if (!vsock) { - ret = -ENOMEM; - goto out; - } - - vsock->vdev = vdev; - - ret = virtio_find_vqs(vsock->vdev, VSOCK_VQ_MAX, - vsock->vqs, callbacks, names, + ret = virtio_find_vqs(vdev, VSOCK_VQ_MAX, vsock->vqs, callbacks, names, NULL); if (ret < 0) - goto out; + return ret; virtio_vsock_update_guest_cid(vsock); - vsock->rx_buf_nr = 0; - vsock->rx_buf_max_nr = 0; - atomic_set(&vsock->queued_replies, 0); - - mutex_init(&vsock->tx_lock); - mutex_init(&vsock->rx_lock); - mutex_init(&vsock->event_lock); - spin_lock_init(&vsock->send_pkt_list_lock); - INIT_LIST_HEAD(&vsock->send_pkt_list); - INIT_WORK(&vsock->rx_work, virtio_transport_rx_work); - INIT_WORK(&vsock->tx_work, virtio_transport_tx_work); - INIT_WORK(&vsock->event_work, virtio_transport_event_work); - INIT_WORK(&vsock->send_pkt_work, virtio_transport_send_pkt_work); + virtio_device_ready(vdev); mutex_lock(&vsock->tx_lock); vsock->tx_run = true; @@ -608,34 +604,20 @@ static int virtio_vsock_probe(struct virtio_device *vdev) vsock->event_run = true; mutex_unlock(&vsock->event_lock); - vdev->priv = vsock; - rcu_assign_pointer(the_virtio_vsock, vsock); - - mutex_unlock(&the_virtio_vsock_mutex); return 0; - -out: - kfree(vsock); - mutex_unlock(&the_virtio_vsock_mutex); - return ret; } -static void virtio_vsock_remove(struct virtio_device *vdev) +static void virtio_vsock_vqs_del(struct virtio_vsock *vsock) { - struct virtio_vsock *vsock = vdev->priv; + struct virtio_device *vdev = vsock->vdev; struct virtio_vsock_pkt *pkt; - mutex_lock(&the_virtio_vsock_mutex); - - vdev->priv = NULL; - rcu_assign_pointer(the_virtio_vsock, NULL); - synchronize_rcu(); - - /* Reset all connected sockets when the device disappear */ - vsock_for_each_connected_socket(virtio_vsock_reset_sock); + /* Reset all connected sockets when the VQs disappear */ + vsock_for_each_connected_socket(&virtio_transport.transport, + virtio_vsock_reset_sock); /* Stop all work handlers to make sure no one is accessing the device, - * so we can safely call vdev->config->reset(). + * so we can safely call virtio_reset_device(). */ mutex_lock(&vsock->rx_lock); vsock->rx_run = false; @@ -652,7 +634,7 @@ static void virtio_vsock_remove(struct virtio_device *vdev) /* Flush all device writes and interrupts, device will not use any * more buffers. */ - vdev->config->reset(vdev); + virtio_reset_device(vdev); mutex_lock(&vsock->rx_lock); while ((pkt = virtqueue_detach_unused_buf(vsock->vqs[VSOCK_VQ_RX]))) @@ -675,6 +657,78 @@ static void virtio_vsock_remove(struct virtio_device *vdev) /* Delete virtqueues and flush outstanding callbacks if any */ vdev->config->del_vqs(vdev); +} + +static int virtio_vsock_probe(struct virtio_device *vdev) +{ + struct virtio_vsock *vsock = NULL; + int ret; + + ret = mutex_lock_interruptible(&the_virtio_vsock_mutex); + if (ret) + return ret; + + /* Only one virtio-vsock device per guest is supported */ + if (rcu_dereference_protected(the_virtio_vsock, + lockdep_is_held(&the_virtio_vsock_mutex))) { + ret = -EBUSY; + goto out; + } + + vsock = kzalloc(sizeof(*vsock), GFP_KERNEL); + if (!vsock) { + ret = -ENOMEM; + goto out; + } + + vsock->vdev = vdev; + + vsock->rx_buf_nr = 0; + vsock->rx_buf_max_nr = 0; + atomic_set(&vsock->queued_replies, 0); + + mutex_init(&vsock->tx_lock); + mutex_init(&vsock->rx_lock); + mutex_init(&vsock->event_lock); + spin_lock_init(&vsock->send_pkt_list_lock); + INIT_LIST_HEAD(&vsock->send_pkt_list); + INIT_WORK(&vsock->rx_work, virtio_transport_rx_work); + INIT_WORK(&vsock->tx_work, virtio_transport_tx_work); + INIT_WORK(&vsock->event_work, virtio_transport_event_work); + INIT_WORK(&vsock->send_pkt_work, virtio_transport_send_pkt_work); + + if (virtio_has_feature(vdev, VIRTIO_VSOCK_F_SEQPACKET)) + vsock->seqpacket_allow = true; + + vdev->priv = vsock; + + ret = virtio_vsock_vqs_init(vsock); + if (ret < 0) + goto out; + + rcu_assign_pointer(the_virtio_vsock, vsock); + + mutex_unlock(&the_virtio_vsock_mutex); + + return 0; + +out: + kfree(vsock); + mutex_unlock(&the_virtio_vsock_mutex); + return ret; +} + +static void virtio_vsock_remove(struct virtio_device *vdev) +{ + struct virtio_vsock *vsock = vdev->priv; + + mutex_lock(&the_virtio_vsock_mutex); + + vdev->priv = NULL; + rcu_assign_pointer(the_virtio_vsock, NULL); + synchronize_rcu(); + + virtio_vsock_vqs_del(vsock); /* Other works can be queued before 'config->del_vqs()', so we flush * all works before to free the vsock object to avoid use after free. @@ -689,12 +743,56 @@ static void virtio_vsock_remove(struct virtio_device *vdev) kfree(vsock); } +#ifdef CONFIG_PM_SLEEP +static int virtio_vsock_freeze(struct virtio_device *vdev) +{ + struct virtio_vsock *vsock = vdev->priv; + + mutex_lock(&the_virtio_vsock_mutex); + + rcu_assign_pointer(the_virtio_vsock, NULL); + synchronize_rcu(); + + virtio_vsock_vqs_del(vsock); + + mutex_unlock(&the_virtio_vsock_mutex); + + return 0; +} + +static int virtio_vsock_restore(struct virtio_device *vdev) +{ + struct virtio_vsock *vsock = vdev->priv; + int ret; + + mutex_lock(&the_virtio_vsock_mutex); + + /* Only one virtio-vsock device per guest is supported */ + if (rcu_dereference_protected(the_virtio_vsock, + lockdep_is_held(&the_virtio_vsock_mutex))) { + ret = -EBUSY; + goto out; + } + + ret = virtio_vsock_vqs_init(vsock); + if (ret < 0) + goto out; + + rcu_assign_pointer(the_virtio_vsock, vsock); + +out: + mutex_unlock(&the_virtio_vsock_mutex); + return ret; +} +#endif /* CONFIG_PM_SLEEP */ + static struct virtio_device_id id_table[] = { { VIRTIO_ID_VSOCK, VIRTIO_DEV_ANY_ID }, { 0 }, }; static unsigned int features[] = { + VIRTIO_VSOCK_F_SEQPACKET }; static struct virtio_driver virtio_vsock_driver = { @@ -705,6 +803,10 @@ static struct virtio_driver virtio_vsock_driver = { .id_table = id_table, .probe = virtio_vsock_probe, .remove = virtio_vsock_remove, +#ifdef CONFIG_PM_SLEEP + .freeze = virtio_vsock_freeze, + .restore = virtio_vsock_restore, +#endif }; static int __init virtio_vsock_init(void) diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c index f3c4bab2f737..a9980e9b9304 100644 --- a/net/vmw_vsock/virtio_transport_common.c +++ b/net/vmw_vsock/virtio_transport_common.c @@ -74,6 +74,14 @@ virtio_transport_alloc_pkt(struct virtio_vsock_pkt_info *info, err = memcpy_from_msg(pkt->buf, info->msg, len); if (err) goto out; + + if (msg_data_left(info->msg) == 0 && + info->type == VIRTIO_VSOCK_TYPE_SEQPACKET) { + pkt->hdr.flags |= cpu_to_le32(VIRTIO_VSOCK_SEQ_EOM); + + if (info->msg->msg_flags & MSG_EOR) + pkt->hdr.flags |= cpu_to_le32(VIRTIO_VSOCK_SEQ_EOR); + } } trace_virtio_transport_alloc_pkt(src_cid, src_port, @@ -157,10 +165,22 @@ static struct sk_buff *virtio_transport_build_skb(void *opaque) void virtio_transport_deliver_tap_pkt(struct virtio_vsock_pkt *pkt) { + if (pkt->tap_delivered) + return; + vsock_deliver_tap(virtio_transport_build_skb, pkt); + pkt->tap_delivered = true; } EXPORT_SYMBOL_GPL(virtio_transport_deliver_tap_pkt); +static u16 virtio_transport_get_type(struct sock *sk) +{ + if (sk->sk_type == SOCK_STREAM) + return VIRTIO_VSOCK_TYPE_STREAM; + else + return VIRTIO_VSOCK_TYPE_SEQPACKET; +} + /* This function can only be used on connecting/connected sockets, * since a socket assigned to a transport is required. * @@ -175,6 +195,8 @@ static int virtio_transport_send_pkt_info(struct vsock_sock *vsk, struct virtio_vsock_pkt *pkt; u32 pkt_len = info->pkt_len; + info->type = virtio_transport_get_type(sk_vsock(vsk)); + t_ops = virtio_transport_get_ops(vsk); if (unlikely(!t_ops)) return -EFAULT; @@ -265,13 +287,10 @@ void virtio_transport_put_credit(struct virtio_vsock_sock *vvs, u32 credit) } EXPORT_SYMBOL_GPL(virtio_transport_put_credit); -static int virtio_transport_send_credit_update(struct vsock_sock *vsk, - int type, - struct virtio_vsock_hdr *hdr) +static int virtio_transport_send_credit_update(struct vsock_sock *vsk) { struct virtio_vsock_pkt_info info = { .op = VIRTIO_VSOCK_OP_CREDIT_UPDATE, - .type = type, .vsk = vsk, }; @@ -379,11 +398,8 @@ virtio_transport_stream_do_dequeue(struct vsock_sock *vsk, * messages, we set the limit to a high value. TODO: experiment * with different values. */ - if (free_space < VIRTIO_VSOCK_MAX_PKT_BUF_SIZE) { - virtio_transport_send_credit_update(vsk, - VIRTIO_VSOCK_TYPE_STREAM, - NULL); - } + if (free_space < VIRTIO_VSOCK_MAX_PKT_BUF_SIZE) + virtio_transport_send_credit_update(vsk); return total; @@ -393,6 +409,78 @@ out: return err; } +static int virtio_transport_seqpacket_do_dequeue(struct vsock_sock *vsk, + struct msghdr *msg, + int flags) +{ + struct virtio_vsock_sock *vvs = vsk->trans; + struct virtio_vsock_pkt *pkt; + int dequeued_len = 0; + size_t user_buf_len = msg_data_left(msg); + bool msg_ready = false; + + spin_lock_bh(&vvs->rx_lock); + + if (vvs->msg_count == 0) { + spin_unlock_bh(&vvs->rx_lock); + return 0; + } + + while (!msg_ready) { + pkt = list_first_entry(&vvs->rx_queue, struct virtio_vsock_pkt, list); + + if (dequeued_len >= 0) { + size_t pkt_len; + size_t bytes_to_copy; + + pkt_len = (size_t)le32_to_cpu(pkt->hdr.len); + bytes_to_copy = min(user_buf_len, pkt_len); + + if (bytes_to_copy) { + int err; + + /* sk_lock is held by caller so no one else can dequeue. + * Unlock rx_lock since memcpy_to_msg() may sleep. + */ + spin_unlock_bh(&vvs->rx_lock); + + err = memcpy_to_msg(msg, pkt->buf, bytes_to_copy); + if (err) { + /* Copy of message failed. Rest of + * fragments will be freed without copy. + */ + dequeued_len = err; + } else { + user_buf_len -= bytes_to_copy; + } + + spin_lock_bh(&vvs->rx_lock); + } + + if (dequeued_len >= 0) + dequeued_len += pkt_len; + } + + if (le32_to_cpu(pkt->hdr.flags) & VIRTIO_VSOCK_SEQ_EOM) { + msg_ready = true; + vvs->msg_count--; + + if (le32_to_cpu(pkt->hdr.flags) & VIRTIO_VSOCK_SEQ_EOR) + msg->msg_flags |= MSG_EOR; + } + + virtio_transport_dec_rx_pkt(vvs, pkt); + list_del(&pkt->list); + virtio_transport_free_pkt(pkt); + } + + spin_unlock_bh(&vvs->rx_lock); + + virtio_transport_send_credit_update(vsk); + + return dequeued_len; +} + ssize_t virtio_transport_stream_dequeue(struct vsock_sock *vsk, struct msghdr *msg, @@ -405,6 +493,38 @@ virtio_transport_stream_dequeue(struct vsock_sock *vsk, } EXPORT_SYMBOL_GPL(virtio_transport_stream_dequeue); +ssize_t +virtio_transport_seqpacket_dequeue(struct vsock_sock *vsk, + struct msghdr *msg, + int flags) +{ + if (flags & MSG_PEEK) + return -EOPNOTSUPP; + + return virtio_transport_seqpacket_do_dequeue(vsk, msg, flags); +} +EXPORT_SYMBOL_GPL(virtio_transport_seqpacket_dequeue); + +int +virtio_transport_seqpacket_enqueue(struct vsock_sock *vsk, + struct msghdr *msg, + size_t len) +{ + struct virtio_vsock_sock *vvs = vsk->trans; + + spin_lock_bh(&vvs->tx_lock); + + if (len > vvs->peer_buf_alloc) { + spin_unlock_bh(&vvs->tx_lock); + return -EMSGSIZE; + } + + spin_unlock_bh(&vvs->tx_lock); + + return virtio_transport_stream_enqueue(vsk, msg, len); +} +EXPORT_SYMBOL_GPL(virtio_transport_seqpacket_enqueue); + int virtio_transport_dgram_dequeue(struct vsock_sock *vsk, struct msghdr *msg, @@ -427,6 +547,19 @@ s64 virtio_transport_stream_has_data(struct vsock_sock *vsk) } EXPORT_SYMBOL_GPL(virtio_transport_stream_has_data); +u32 virtio_transport_seqpacket_has_data(struct vsock_sock *vsk) +{ + struct virtio_vsock_sock *vvs = vsk->trans; + u32 msg_count; + + spin_lock_bh(&vvs->rx_lock); + msg_count = vvs->msg_count; + spin_unlock_bh(&vvs->rx_lock); + + return msg_count; +} +EXPORT_SYMBOL_GPL(virtio_transport_seqpacket_has_data); + static s64 virtio_transport_has_space(struct vsock_sock *vsk) { struct virtio_vsock_sock *vvs = vsk->trans; @@ -492,8 +625,7 @@ void virtio_transport_notify_buffer_size(struct vsock_sock *vsk, u64 *val) vvs->buf_alloc = *val; - virtio_transport_send_credit_update(vsk, VIRTIO_VSOCK_TYPE_STREAM, - NULL); + virtio_transport_send_credit_update(vsk); } EXPORT_SYMBOL_GPL(virtio_transport_notify_buffer_size); @@ -502,10 +634,7 @@ virtio_transport_notify_poll_in(struct vsock_sock *vsk, size_t target, bool *data_ready_now) { - if (vsock_stream_has_data(vsk)) - *data_ready_now = true; - else - *data_ready_now = false; + *data_ready_now = vsock_stream_has_data(vsk) >= target; return 0; } @@ -620,7 +749,6 @@ int virtio_transport_connect(struct vsock_sock *vsk) { struct virtio_vsock_pkt_info info = { .op = VIRTIO_VSOCK_OP_REQUEST, - .type = VIRTIO_VSOCK_TYPE_STREAM, .vsk = vsk, }; @@ -632,7 +760,6 @@ int virtio_transport_shutdown(struct vsock_sock *vsk, int mode) { struct virtio_vsock_pkt_info info = { .op = VIRTIO_VSOCK_OP_SHUTDOWN, - .type = VIRTIO_VSOCK_TYPE_STREAM, .flags = (mode & RCV_SHUTDOWN ? VIRTIO_VSOCK_SHUTDOWN_RCV : 0) | (mode & SEND_SHUTDOWN ? @@ -661,7 +788,6 @@ virtio_transport_stream_enqueue(struct vsock_sock *vsk, { struct virtio_vsock_pkt_info info = { .op = VIRTIO_VSOCK_OP_RW, - .type = VIRTIO_VSOCK_TYPE_STREAM, .msg = msg, .pkt_len = len, .vsk = vsk, @@ -684,7 +810,6 @@ static int virtio_transport_reset(struct vsock_sock *vsk, { struct virtio_vsock_pkt_info info = { .op = VIRTIO_VSOCK_OP_RST, - .type = VIRTIO_VSOCK_TYPE_STREAM, .reply = !!pkt, .vsk = vsk, }; @@ -729,6 +854,23 @@ static int virtio_transport_reset_no_sock(const struct virtio_transport *t, return t->send_pkt(reply); } +/* This function should be called with sk_lock held and SOCK_DONE set */ +static void virtio_transport_remove_sock(struct vsock_sock *vsk) +{ + struct virtio_vsock_sock *vvs = vsk->trans; + struct virtio_vsock_pkt *pkt, *tmp; + + /* We don't need to take rx_lock, as the socket is closing and we are + * removing it. + */ + list_for_each_entry_safe(pkt, tmp, &vvs->rx_queue, list) { + list_del(&pkt->list); + virtio_transport_free_pkt(pkt); + } + + vsock_remove_sock(vsk); +} + static void virtio_transport_wait_close(struct sock *sk, long timeout) { if (timeout) { @@ -761,7 +903,7 @@ static void virtio_transport_do_close(struct vsock_sock *vsk, (!cancel_timeout || cancel_delayed_work(&vsk->close_work))) { vsk->close_work_scheduled = false; - vsock_remove_sock(vsk); + virtio_transport_remove_sock(vsk); /* Release refcnt obtained when we scheduled the timeout */ sock_put(sk); @@ -824,21 +966,16 @@ static bool virtio_transport_close(struct vsock_sock *vsk) void virtio_transport_release(struct vsock_sock *vsk) { - struct virtio_vsock_sock *vvs = vsk->trans; - struct virtio_vsock_pkt *pkt, *tmp; struct sock *sk = &vsk->sk; bool remove_sock = true; - if (sk->sk_type == SOCK_STREAM) + if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) remove_sock = virtio_transport_close(vsk); - list_for_each_entry_safe(pkt, tmp, &vvs->rx_queue, list) { - list_del(&pkt->list); - virtio_transport_free_pkt(pkt); + if (remove_sock) { + sock_set_flag(sk, SOCK_DONE); + virtio_transport_remove_sock(vsk); } - - if (remove_sock) - vsock_remove_sock(vsk); } EXPORT_SYMBOL_GPL(virtio_transport_release); @@ -874,7 +1011,7 @@ destroy: virtio_transport_reset(vsk, pkt); sk->sk_state = TCP_CLOSE; sk->sk_err = skerr; - sk->sk_error_report(sk); + sk_error_report(sk); return err; } @@ -896,6 +1033,9 @@ virtio_transport_recv_enqueue(struct vsock_sock *vsk, goto out; } + if (le32_to_cpu(pkt->hdr.flags) & VIRTIO_VSOCK_SEQ_EOM) + vvs->msg_count++; + /* Try to copy small packets into the buffer of last packet queued, * to avoid wasting memory queueing the entire buffer with a small * payload. @@ -907,13 +1047,18 @@ virtio_transport_recv_enqueue(struct vsock_sock *vsk, struct virtio_vsock_pkt, list); /* If there is space in the last packet queued, we copy the - * new packet in its buffer. + * new packet in its buffer. We avoid this if the last packet + * queued has VIRTIO_VSOCK_SEQ_EOM set, because this is + * delimiter of SEQPACKET message, so 'pkt' is the first packet + * of a new message. */ - if (pkt->len <= last_pkt->buf_len - last_pkt->len) { + if ((pkt->len <= last_pkt->buf_len - last_pkt->len) && + !(le32_to_cpu(last_pkt->hdr.flags) & VIRTIO_VSOCK_SEQ_EOM)) { memcpy(last_pkt->buf + last_pkt->len, pkt->buf, pkt->len); last_pkt->len += pkt->len; free_pkt = true; + last_pkt->hdr.flags |= pkt->hdr.flags; goto out; } } @@ -936,8 +1081,11 @@ virtio_transport_recv_connected(struct sock *sk, switch (le16_to_cpu(pkt->hdr.op)) { case VIRTIO_VSOCK_OP_RW: virtio_transport_recv_enqueue(vsk, pkt); - sk->sk_data_ready(sk); + vsock_data_ready(sk); return err; + case VIRTIO_VSOCK_OP_CREDIT_REQUEST: + virtio_transport_send_credit_update(vsk); + break; case VIRTIO_VSOCK_OP_CREDIT_UPDATE: sk->sk_write_space(sk); break; @@ -984,7 +1132,6 @@ virtio_transport_send_response(struct vsock_sock *vsk, { struct virtio_vsock_pkt_info info = { .op = VIRTIO_VSOCK_OP_RESPONSE, - .type = VIRTIO_VSOCK_TYPE_STREAM, .remote_cid = le64_to_cpu(pkt->hdr.src_cid), .remote_port = le32_to_cpu(pkt->hdr.src_port), .reply = true, @@ -1080,6 +1227,12 @@ virtio_transport_recv_listen(struct sock *sk, struct virtio_vsock_pkt *pkt, return 0; } +static bool virtio_transport_valid_type(u16 type) +{ + return (type == VIRTIO_VSOCK_TYPE_STREAM) || + (type == VIRTIO_VSOCK_TYPE_SEQPACKET); +} + /* We are under the virtio-vsock's vsock->rx_lock or vhost-vsock's vq->mutex * lock. */ @@ -1105,7 +1258,7 @@ void virtio_transport_recv_pkt(struct virtio_transport *t, le32_to_cpu(pkt->hdr.buf_alloc), le32_to_cpu(pkt->hdr.fwd_cnt)); - if (le16_to_cpu(pkt->hdr.type) != VIRTIO_VSOCK_TYPE_STREAM) { + if (!virtio_transport_valid_type(le16_to_cpu(pkt->hdr.type))) { (void)virtio_transport_reset_no_sock(t, pkt); goto free_pkt; } @@ -1122,14 +1275,29 @@ void virtio_transport_recv_pkt(struct virtio_transport *t, } } - vsk = vsock_sk(sk); + if (virtio_transport_get_type(sk) != le16_to_cpu(pkt->hdr.type)) { + (void)virtio_transport_reset_no_sock(t, pkt); + sock_put(sk); + goto free_pkt; + } - space_available = virtio_transport_space_update(sk, pkt); + vsk = vsock_sk(sk); lock_sock(sk); + /* Check if sk has been closed before lock_sock */ + if (sock_flag(sk, SOCK_DONE)) { + (void)virtio_transport_reset_no_sock(t, pkt); + release_sock(sk); + sock_put(sk); + goto free_pkt; + } + + space_available = virtio_transport_space_update(sk, pkt); + /* Update CID in case it has changed after a transport reset event */ - vsk->local_addr.svm_cid = dst.svm_cid; + if (vsk->local_addr.svm_cid != VMADDR_CID_ANY) + vsk->local_addr.svm_cid = dst.svm_cid; if (space_available) sk->sk_write_space(sk); @@ -1151,6 +1319,7 @@ void virtio_transport_recv_pkt(struct virtio_transport *t, virtio_transport_free_pkt(pkt); break; default: + (void)virtio_transport_reset_no_sock(t, pkt); virtio_transport_free_pkt(pkt); break; } @@ -1170,7 +1339,7 @@ EXPORT_SYMBOL_GPL(virtio_transport_recv_pkt); void virtio_transport_free_pkt(struct virtio_vsock_pkt *pkt) { - kfree(pkt->buf); + kvfree(pkt->buf); kfree(pkt); } EXPORT_SYMBOL_GPL(virtio_transport_free_pkt); diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c index 4b8b1150a738..842c94286d31 100644 --- a/net/vmw_vsock/vmci_transport.c +++ b/net/vmw_vsock/vmci_transport.c @@ -75,6 +75,8 @@ static u32 vmci_transport_qp_resumed_sub_id = VMCI_INVALID_ID; static int PROTOCOL_OVERRIDE = -1; +static struct vsock_transport vmci_transport; /* forward declaration */ + /* Helper function to convert from a VMCI error code to a VSock error code. */ static s32 vmci_transport_error_to_vsock_error(s32 vmci_error) @@ -568,8 +570,7 @@ vmci_transport_queue_pair_alloc(struct vmci_qp **qpair, peer, flags, VMCI_NO_PRIVILEGE_FLAGS); out: if (err < 0) { - pr_err("Could not attach to queue pair with %d\n", - err); + pr_err_once("Could not attach to queue pair with %d\n", err); err = vmci_transport_error_to_vsock_error(err); } @@ -832,7 +833,7 @@ static void vmci_transport_handle_detach(struct sock *sk) sk->sk_state = TCP_CLOSE; sk->sk_err = ECONNRESET; - sk->sk_error_report(sk); + sk_error_report(sk); return; } sk->sk_state = TCP_CLOSE; @@ -883,7 +884,8 @@ static void vmci_transport_qp_resumed_cb(u32 sub_id, const struct vmci_event_data *e_data, void *client_data) { - vsock_for_each_connected_socket(vmci_transport_handle_detach); + vsock_for_each_connected_socket(&vmci_transport, + vmci_transport_handle_detach); } static void vmci_transport_recv_pkt_work(struct work_struct *work) @@ -945,13 +947,11 @@ static int vmci_transport_recv_listen(struct sock *sk, bool old_request = false; bool old_pkt_proto = false; - err = 0; - /* Because we are in the listen state, we could be receiving a packet * for ourself or any previous connection requests that we received. * If it's the latter, we try to find a socket in our list of pending * connections and, if we do, call the appropriate handler for the - * state that that socket is in. Otherwise we try to service the + * state that socket is in. Otherwise we try to service the * connection request. */ pending = vmci_transport_get_pending(sk, pkt); @@ -1251,7 +1251,7 @@ vmci_transport_recv_connecting_server(struct sock *listener, vsock_remove_pending(listener, pending); vsock_enqueue_accept(listener, pending); - /* Callers of accept() will be be waiting on the listening socket, not + /* Callers of accept() will be waiting on the listening socket, not * the pending socket. */ listener->sk_data_ready(listener); @@ -1368,7 +1368,7 @@ destroy: sk->sk_state = TCP_CLOSE; sk->sk_err = skerr; - sk->sk_error_report(sk); + sk_error_report(sk); return err; } @@ -1732,19 +1732,16 @@ static int vmci_transport_dgram_dequeue(struct vsock_sock *vsk, int flags) { int err; - int noblock; struct vmci_datagram *dg; size_t payload_len; struct sk_buff *skb; - noblock = flags & MSG_DONTWAIT; - if (flags & MSG_OOB || flags & MSG_ERRQUEUE) return -EOPNOTSUPP; /* Retrieve the head sk_buff from the socket's receive queue. */ err = 0; - skb = skb_recv_datagram(&vsk->sk, flags, noblock, &err); + skb = skb_recv_datagram(&vsk->sk, flags, &err); if (!skb) return err; @@ -2055,7 +2052,7 @@ static bool vmci_check_transport(struct vsock_sock *vsk) return vsk->transport == &vmci_transport; } -void vmci_vsock_transport_cb(bool is_host) +static void vmci_vsock_transport_cb(bool is_host) { int features; diff --git a/net/vmw_vsock/vmci_transport_notify.c b/net/vmw_vsock/vmci_transport_notify.c index d69fc4b595ad..7c3a7db134b2 100644 --- a/net/vmw_vsock/vmci_transport_notify.c +++ b/net/vmw_vsock/vmci_transport_notify.c @@ -307,7 +307,7 @@ vmci_transport_handle_wrote(struct sock *sk, struct vsock_sock *vsk = vsock_sk(sk); PKT_FIELD(vsk, sent_waiting_read) = false; #endif - sk->sk_data_ready(sk); + vsock_data_ready(sk); } static void vmci_transport_notify_pkt_socket_init(struct sock *sk) @@ -340,12 +340,12 @@ vmci_transport_notify_pkt_poll_in(struct sock *sk, { struct vsock_sock *vsk = vsock_sk(sk); - if (vsock_stream_has_data(vsk)) { + if (vsock_stream_has_data(vsk) >= target) { *data_ready_now = true; } else { - /* We can't read right now because there is nothing in the - * queue. Ask for notifications when there is something to - * read. + /* We can't read right now because there is not enough data + * in the queue. Ask for notifications when there is something + * to read. */ if (sk->sk_state == TCP_ESTABLISHED) { if (!send_waiting_read(sk, 1)) diff --git a/net/vmw_vsock/vmci_transport_notify_qstate.c b/net/vmw_vsock/vmci_transport_notify_qstate.c index 0f36d7c45db3..e96a88d850a8 100644 --- a/net/vmw_vsock/vmci_transport_notify_qstate.c +++ b/net/vmw_vsock/vmci_transport_notify_qstate.c @@ -84,7 +84,7 @@ vmci_transport_handle_wrote(struct sock *sk, bool bottom_half, struct sockaddr_vm *dst, struct sockaddr_vm *src) { - sk->sk_data_ready(sk); + vsock_data_ready(sk); } static void vsock_block_update_write_window(struct sock *sk) @@ -161,12 +161,12 @@ vmci_transport_notify_pkt_poll_in(struct sock *sk, { struct vsock_sock *vsk = vsock_sk(sk); - if (vsock_stream_has_data(vsk)) { + if (vsock_stream_has_data(vsk) >= target) { *data_ready_now = true; } else { - /* We can't read right now because there is nothing in the - * queue. Ask for notifications when there is something to - * read. + /* We can't read right now because there is not enough data + * in the queue. Ask for notifications when there is something + * to read. */ if (sk->sk_state == TCP_ESTABLISHED) vsock_block_update_write_window(sk); @@ -282,7 +282,7 @@ vmci_transport_notify_pkt_recv_post_dequeue( /* See the comment in * vmci_transport_notify_pkt_send_post_enqueue(). */ - sk->sk_data_ready(sk); + vsock_data_ready(sk); } return err; diff --git a/net/vmw_vsock/vsock_addr.c b/net/vmw_vsock/vsock_addr.c index 909de26cb0e7..223b9660a759 100644 --- a/net/vmw_vsock/vsock_addr.c +++ b/net/vmw_vsock/vsock_addr.c @@ -22,13 +22,15 @@ EXPORT_SYMBOL_GPL(vsock_addr_init); int vsock_addr_validate(const struct sockaddr_vm *addr) { + __u8 svm_valid_flags = VMADDR_FLAG_TO_HOST; + if (!addr) return -EFAULT; if (addr->svm_family != AF_VSOCK) return -EAFNOSUPPORT; - if (addr->svm_zero[0] != 0) + if (addr->svm_flags & ~svm_valid_flags) return -EINVAL; return 0; diff --git a/net/vmw_vsock/vsock_loopback.c b/net/vmw_vsock/vsock_loopback.c index a45f7ffca8c5..169a8cf65b39 100644 --- a/net/vmw_vsock/vsock_loopback.c +++ b/net/vmw_vsock/vsock_loopback.c @@ -63,6 +63,8 @@ static int vsock_loopback_cancel_pkt(struct vsock_sock *vsk) return 0; } +static bool vsock_loopback_seqpacket_allow(u32 remote_cid); + static struct virtio_transport loopback_transport = { .transport = { .module = THIS_MODULE, @@ -89,6 +91,11 @@ static struct virtio_transport loopback_transport = { .stream_is_active = virtio_transport_stream_is_active, .stream_allow = virtio_transport_stream_allow, + .seqpacket_dequeue = virtio_transport_seqpacket_dequeue, + .seqpacket_enqueue = virtio_transport_seqpacket_enqueue, + .seqpacket_allow = vsock_loopback_seqpacket_allow, + .seqpacket_has_data = virtio_transport_seqpacket_has_data, + .notify_poll_in = virtio_transport_notify_poll_in, .notify_poll_out = virtio_transport_notify_poll_out, .notify_recv_init = virtio_transport_notify_recv_init, @@ -105,6 +112,11 @@ static struct virtio_transport loopback_transport = { .send_pkt = vsock_loopback_send_pkt, }; +static bool vsock_loopback_seqpacket_allow(u32 remote_cid) +{ + return true; +} + static void vsock_loopback_work(struct work_struct *work) { struct vsock_loopback *vsock = |