diff options
Diffstat (limited to 'tools/testing/vsock')
-rw-r--r-- | tools/testing/vsock/.gitignore | 2 | ||||
-rw-r--r-- | tools/testing/vsock/Makefile | 25 | ||||
-rw-r--r-- | tools/testing/vsock/README | 49 | ||||
-rw-r--r-- | tools/testing/vsock/control.c | 37 | ||||
-rw-r--r-- | tools/testing/vsock/control.h | 2 | ||||
-rw-r--r-- | tools/testing/vsock/msg_zerocopy_common.c | 77 | ||||
-rw-r--r-- | tools/testing/vsock/msg_zerocopy_common.h | 17 | ||||
-rw-r--r-- | tools/testing/vsock/timeout.c | 18 | ||||
-rw-r--r-- | tools/testing/vsock/timeout.h | 1 | ||||
-rw-r--r-- | tools/testing/vsock/util.c | 647 | ||||
-rw-r--r-- | tools/testing/vsock/util.h | 35 | ||||
-rw-r--r-- | tools/testing/vsock/vsock_diag_test.c | 25 | ||||
-rw-r--r-- | tools/testing/vsock/vsock_perf.c | 499 | ||||
-rw-r--r-- | tools/testing/vsock/vsock_test.c | 1828 | ||||
-rw-r--r-- | tools/testing/vsock/vsock_test_zerocopy.c | 358 | ||||
-rw-r--r-- | tools/testing/vsock/vsock_test_zerocopy.h | 15 | ||||
-rw-r--r-- | tools/testing/vsock/vsock_uring_test.c | 353 |
17 files changed, 3849 insertions, 139 deletions
diff --git a/tools/testing/vsock/.gitignore b/tools/testing/vsock/.gitignore index 87ca2731cff9..d9f798713cd7 100644 --- a/tools/testing/vsock/.gitignore +++ b/tools/testing/vsock/.gitignore @@ -2,3 +2,5 @@ *.d vsock_test vsock_diag_test +vsock_perf +vsock_uring_test diff --git a/tools/testing/vsock/Makefile b/tools/testing/vsock/Makefile index f8293c6910c9..6e0b4e95e230 100644 --- a/tools/testing/vsock/Makefile +++ b/tools/testing/vsock/Makefile @@ -1,11 +1,28 @@ # SPDX-License-Identifier: GPL-2.0-only -all: test -test: vsock_test vsock_diag_test -vsock_test: vsock_test.o timeout.o control.o util.o +all: test vsock_perf +test: vsock_test vsock_diag_test vsock_uring_test +vsock_test: vsock_test.o vsock_test_zerocopy.o timeout.o control.o util.o msg_zerocopy_common.o vsock_diag_test: vsock_diag_test.o timeout.o control.o util.o +vsock_perf: vsock_perf.o msg_zerocopy_common.o + +vsock_uring_test: LDLIBS = -luring +vsock_uring_test: control.o util.o vsock_uring_test.o timeout.o msg_zerocopy_common.o CFLAGS += -g -O2 -Werror -Wall -I. -I../../include -I../../../usr/include -Wno-pointer-sign -fno-strict-overflow -fno-strict-aliasing -fno-common -MMD -U_FORTIFY_SOURCE -D_GNU_SOURCE .PHONY: all test clean clean: - ${RM} *.o *.d vsock_test vsock_diag_test + ${RM} *.o *.d vsock_test vsock_diag_test vsock_perf vsock_uring_test -include *.d + +VSOCK_INSTALL_PATH ?= + +install: all +ifdef VSOCK_INSTALL_PATH + mkdir -p $(VSOCK_INSTALL_PATH) + install -m 744 vsock_test $(VSOCK_INSTALL_PATH) + install -m 744 vsock_perf $(VSOCK_INSTALL_PATH) + install -m 744 vsock_diag_test $(VSOCK_INSTALL_PATH) + install -m 744 vsock_uring_test $(VSOCK_INSTALL_PATH) +else + $(error Error: set VSOCK_INSTALL_PATH to use install) +endif diff --git a/tools/testing/vsock/README b/tools/testing/vsock/README index 4d5045e7d2c3..680ce666ceb5 100644 --- a/tools/testing/vsock/README +++ b/tools/testing/vsock/README @@ -35,3 +35,52 @@ Invoke test binaries in both directions as follows: --control-port=$GUEST_IP \ --control-port=1234 \ --peer-cid=3 + +Some tests are designed to produce kernel memory leaks. Leaks detection, +however, is deferred to Kernel Memory Leak Detector. It is recommended to enable +kmemleak (CONFIG_DEBUG_KMEMLEAK=y) and explicitly trigger a scan after each test +suite run, e.g. + + # echo clear > /sys/kernel/debug/kmemleak + # $TEST_BINARY ... + # echo "wait for any grace periods" && sleep 2 + # echo scan > /sys/kernel/debug/kmemleak + # echo "wait for kmemleak" && sleep 5 + # echo scan > /sys/kernel/debug/kmemleak + # cat /sys/kernel/debug/kmemleak + +For more information see Documentation/dev-tools/kmemleak.rst. + +vsock_perf utility +------------------- +'vsock_perf' is a simple tool to measure vsock performance. It works in +sender/receiver modes: sender connect to peer at the specified port and +starts data transmission to the receiver. After data processing is done, +it prints several metrics(see below). + +Usage: +# run as sender +# connect to CID 2, port 1234, send 1G of data, tx buf size is 1M +./vsock_perf --sender 2 --port 1234 --bytes 1G --buf-size 1M + +Output: +tx performance: A Gbits/s + +Output explanation: +A is calculated as "number of bits to send" / "time in tx loop" + +# run as receiver +# listen port 1234, rx buf size is 1M, socket buf size is 1G, SO_RCVLOWAT is 64K +./vsock_perf --port 1234 --buf-size 1M --vsk-size 1G --rcvlowat 64K + +Output: +rx performance: A Gbits/s +total in 'read()': B sec +POLLIN wakeups: C +average in 'read()': D ns + +Output explanation: +A is calculated as "number of received bits" / "time in rx loop". +B is time, spent in 'read()' system call(excluding 'poll()') +C is number of 'poll()' wake ups with POLLIN bit set. +D is B / C, e.g. average amount of time, spent in single 'read()'. diff --git a/tools/testing/vsock/control.c b/tools/testing/vsock/control.c index 4874872fc5a3..0066e0324d35 100644 --- a/tools/testing/vsock/control.c +++ b/tools/testing/vsock/control.c @@ -27,6 +27,7 @@ #include "timeout.h" #include "control.h" +#include "util.h" static int control_fd = -1; @@ -50,7 +51,6 @@ void control_init(const char *control_host, for (ai = result; ai; ai = ai->ai_next) { int fd; - int val = 1; fd = socket(ai->ai_family, ai->ai_socktype, ai->ai_protocol); if (fd < 0) @@ -65,11 +65,8 @@ void control_init(const char *control_host, break; } - if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, - &val, sizeof(val)) < 0) { - perror("setsockopt"); - exit(EXIT_FAILURE); - } + setsockopt_int_check(fd, SOL_SOCKET, SO_REUSEADDR, 1, + "setsockopt SO_REUSEADDR"); if (bind(fd, ai->ai_addr, ai->ai_addrlen) < 0) goto next; @@ -141,6 +138,34 @@ void control_writeln(const char *str) timeout_end(); } +void control_writeulong(unsigned long value) +{ + char str[32]; + + if (snprintf(str, sizeof(str), "%lu", value) >= sizeof(str)) { + perror("snprintf"); + exit(EXIT_FAILURE); + } + + control_writeln(str); +} + +unsigned long control_readulong(void) +{ + unsigned long value; + char *str; + + str = control_readln(); + + if (!str) + exit(EXIT_FAILURE); + + value = strtoul(str, NULL, 10); + free(str); + + return value; +} + /* Return the next line from the control socket (without the trailing newline). * * The program terminates if a timeout occurs. diff --git a/tools/testing/vsock/control.h b/tools/testing/vsock/control.h index 51814b4f9ac1..c1f77fdb2c7a 100644 --- a/tools/testing/vsock/control.h +++ b/tools/testing/vsock/control.h @@ -9,7 +9,9 @@ void control_init(const char *control_host, const char *control_port, void control_cleanup(void); void control_writeln(const char *str); char *control_readln(void); +unsigned long control_readulong(void); void control_expectln(const char *str); bool control_cmpln(char *line, const char *str, bool fail); +void control_writeulong(unsigned long value); #endif /* CONTROL_H */ diff --git a/tools/testing/vsock/msg_zerocopy_common.c b/tools/testing/vsock/msg_zerocopy_common.c new file mode 100644 index 000000000000..8622e5a0f8b7 --- /dev/null +++ b/tools/testing/vsock/msg_zerocopy_common.c @@ -0,0 +1,77 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Some common code for MSG_ZEROCOPY logic + * + * Copyright (C) 2023 SberDevices. + * + * Author: Arseniy Krasnov <avkrasnov@salutedevices.com> + */ + +#include <stdio.h> +#include <stdlib.h> +#include <sys/types.h> +#include <sys/socket.h> +#include <linux/errqueue.h> + +#include "msg_zerocopy_common.h" + +void vsock_recv_completion(int fd, const bool *zerocopied) +{ + struct sock_extended_err *serr; + struct msghdr msg = { 0 }; + char cmsg_data[128]; + struct cmsghdr *cm; + ssize_t res; + + msg.msg_control = cmsg_data; + msg.msg_controllen = sizeof(cmsg_data); + + res = recvmsg(fd, &msg, MSG_ERRQUEUE); + if (res) { + fprintf(stderr, "failed to read error queue: %zi\n", res); + exit(EXIT_FAILURE); + } + + cm = CMSG_FIRSTHDR(&msg); + if (!cm) { + fprintf(stderr, "cmsg: no cmsg\n"); + exit(EXIT_FAILURE); + } + + if (cm->cmsg_level != SOL_VSOCK) { + fprintf(stderr, "cmsg: unexpected 'cmsg_level'\n"); + exit(EXIT_FAILURE); + } + + if (cm->cmsg_type != VSOCK_RECVERR) { + fprintf(stderr, "cmsg: unexpected 'cmsg_type'\n"); + exit(EXIT_FAILURE); + } + + serr = (void *)CMSG_DATA(cm); + if (serr->ee_origin != SO_EE_ORIGIN_ZEROCOPY) { + fprintf(stderr, "serr: wrong origin: %u\n", serr->ee_origin); + exit(EXIT_FAILURE); + } + + if (serr->ee_errno) { + fprintf(stderr, "serr: wrong error code: %u\n", serr->ee_errno); + exit(EXIT_FAILURE); + } + + /* This flag is used for tests, to check that transmission was + * performed as expected: zerocopy or fallback to copy. If NULL + * - don't care. + */ + if (!zerocopied) + return; + + if (*zerocopied && (serr->ee_code & SO_EE_CODE_ZEROCOPY_COPIED)) { + fprintf(stderr, "serr: was copy instead of zerocopy\n"); + exit(EXIT_FAILURE); + } + + if (!*zerocopied && !(serr->ee_code & SO_EE_CODE_ZEROCOPY_COPIED)) { + fprintf(stderr, "serr: was zerocopy instead of copy\n"); + exit(EXIT_FAILURE); + } +} diff --git a/tools/testing/vsock/msg_zerocopy_common.h b/tools/testing/vsock/msg_zerocopy_common.h new file mode 100644 index 000000000000..ad14139e93ca --- /dev/null +++ b/tools/testing/vsock/msg_zerocopy_common.h @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#ifndef MSG_ZEROCOPY_COMMON_H +#define MSG_ZEROCOPY_COMMON_H + +#include <stdbool.h> + +#ifndef SOL_VSOCK +#define SOL_VSOCK 287 +#endif + +#ifndef VSOCK_RECVERR +#define VSOCK_RECVERR 1 +#endif + +void vsock_recv_completion(int fd, const bool *zerocopied); + +#endif /* MSG_ZEROCOPY_COMMON_H */ diff --git a/tools/testing/vsock/timeout.c b/tools/testing/vsock/timeout.c index 44aee49b6cee..1453d38e08bb 100644 --- a/tools/testing/vsock/timeout.c +++ b/tools/testing/vsock/timeout.c @@ -21,6 +21,7 @@ #include <stdbool.h> #include <unistd.h> #include <stdio.h> +#include <time.h> #include "timeout.h" static volatile bool timeout; @@ -28,6 +29,8 @@ static volatile bool timeout; /* SIGALRM handler function. Do not use sleep(2), alarm(2), or * setitimer(2) while using this API - they may interfere with each * other. + * + * If you need to sleep, please use timeout_sleep() provided by this API. */ void sigalrm(int signo) { @@ -58,3 +61,18 @@ void timeout_end(void) alarm(0); timeout = false; } + +/* Sleep in a timeout section. + * + * nanosleep(2) can be used with this API since POSIX.1 explicitly + * specifies that it does not interact with signals. + */ +int timeout_usleep(useconds_t usec) +{ + struct timespec ts = { + .tv_sec = usec / 1000000, + .tv_nsec = (usec % 1000000) * 1000, + }; + + return nanosleep(&ts, NULL); +} diff --git a/tools/testing/vsock/timeout.h b/tools/testing/vsock/timeout.h index ecb7c840e65a..1c3fcad87a49 100644 --- a/tools/testing/vsock/timeout.h +++ b/tools/testing/vsock/timeout.h @@ -11,5 +11,6 @@ void sigalrm(int signo); void timeout_begin(unsigned int seconds); void timeout_check(const char *operation); void timeout_end(void); +int timeout_usleep(useconds_t usec); #endif /* TIMEOUT_H */ diff --git a/tools/testing/vsock/util.c b/tools/testing/vsock/util.c index 93cbd6f603f9..0c7e9cbcbc85 100644 --- a/tools/testing/vsock/util.c +++ b/tools/testing/vsock/util.c @@ -11,10 +11,13 @@ #include <stdio.h> #include <stdint.h> #include <stdlib.h> +#include <string.h> #include <signal.h> #include <unistd.h> #include <assert.h> #include <sys/epoll.h> +#include <sys/mman.h> +#include <linux/sockios.h> #include "timeout.h" #include "control.h" @@ -31,8 +34,7 @@ void init_signals(void) signal(SIGPIPE, SIG_IGN); } -/* Parse a CID in string representation */ -unsigned int parse_cid(const char *str) +static unsigned int parse_uint(const char *str, const char *err_str) { char *endptr = NULL; unsigned long n; @@ -40,12 +42,24 @@ unsigned int parse_cid(const char *str) errno = 0; n = strtoul(str, &endptr, 10); if (errno || *endptr != '\0') { - fprintf(stderr, "malformed CID \"%s\"\n", str); + fprintf(stderr, "malformed %s \"%s\"\n", err_str, str); exit(EXIT_FAILURE); } return n; } +/* Parse a CID in string representation */ +unsigned int parse_cid(const char *str) +{ + return parse_uint(str, "CID"); +} + +/* Parse a port in string representation */ +unsigned int parse_port(const char *str) +{ + return parse_uint(str, "port"); +} + /* Wait for the remote to close the connection */ void vsock_wait_remote_close(int fd) { @@ -83,79 +97,151 @@ void vsock_wait_remote_close(int fd) close(epollfd); } -/* Connect to <cid, port> and return the file descriptor. */ -int vsock_stream_connect(unsigned int cid, unsigned int port) +/* Wait until transport reports no data left to be sent. + * Return false if transport does not implement the unsent_bytes() callback. + */ +bool vsock_wait_sent(int fd) { - union { - struct sockaddr sa; - struct sockaddr_vm svm; - } addr = { - .svm = { - .svm_family = AF_VSOCK, - .svm_port = port, - .svm_cid = cid, - }, + int ret, sock_bytes_unsent; + + timeout_begin(TIMEOUT); + do { + ret = ioctl(fd, SIOCOUTQ, &sock_bytes_unsent); + if (ret < 0) { + if (errno == EOPNOTSUPP) + break; + + perror("ioctl(SIOCOUTQ)"); + exit(EXIT_FAILURE); + } + timeout_check("SIOCOUTQ"); + } while (sock_bytes_unsent != 0); + timeout_end(); + + return !ret; +} + +/* Create socket <type>, bind to <cid, port> and return the file descriptor. */ +int vsock_bind(unsigned int cid, unsigned int port, int type) +{ + struct sockaddr_vm sa = { + .svm_family = AF_VSOCK, + .svm_cid = cid, + .svm_port = port, }; - int ret; int fd; - control_expectln("LISTENING"); + fd = socket(AF_VSOCK, type, 0); + if (fd < 0) { + perror("socket"); + exit(EXIT_FAILURE); + } + + if (bind(fd, (struct sockaddr *)&sa, sizeof(sa))) { + perror("bind"); + exit(EXIT_FAILURE); + } + + return fd; +} - fd = socket(AF_VSOCK, SOCK_STREAM, 0); +int vsock_connect_fd(int fd, unsigned int cid, unsigned int port) +{ + struct sockaddr_vm sa = { + .svm_family = AF_VSOCK, + .svm_cid = cid, + .svm_port = port, + }; + int ret; timeout_begin(TIMEOUT); do { - ret = connect(fd, &addr.sa, sizeof(addr.svm)); + ret = connect(fd, (struct sockaddr *)&sa, sizeof(sa)); timeout_check("connect"); } while (ret < 0 && errno == EINTR); timeout_end(); - if (ret < 0) { + return ret; +} + +/* Bind to <bind_port>, connect to <cid, port> and return the file descriptor. */ +int vsock_bind_connect(unsigned int cid, unsigned int port, unsigned int bind_port, int type) +{ + int client_fd; + + client_fd = vsock_bind(VMADDR_CID_ANY, bind_port, type); + + if (vsock_connect_fd(client_fd, cid, port)) { + perror("connect"); + exit(EXIT_FAILURE); + } + + return client_fd; +} + +/* Connect to <cid, port> and return the file descriptor. */ +int vsock_connect(unsigned int cid, unsigned int port, int type) +{ + int fd; + + control_expectln("LISTENING"); + + fd = socket(AF_VSOCK, type, 0); + if (fd < 0) { + perror("socket"); + exit(EXIT_FAILURE); + } + + if (vsock_connect_fd(fd, cid, port)) { int old_errno = errno; close(fd); fd = -1; errno = old_errno; } + + return fd; +} + +int vsock_stream_connect(unsigned int cid, unsigned int port) +{ + return vsock_connect(cid, port, SOCK_STREAM); +} + +int vsock_seqpacket_connect(unsigned int cid, unsigned int port) +{ + return vsock_connect(cid, port, SOCK_SEQPACKET); +} + +/* Listen on <cid, port> and return the file descriptor. */ +static int vsock_listen(unsigned int cid, unsigned int port, int type) +{ + int fd; + + fd = vsock_bind(cid, port, type); + + if (listen(fd, 1) < 0) { + perror("listen"); + exit(EXIT_FAILURE); + } + return fd; } /* Listen on <cid, port> and return the first incoming connection. The remote * address is stored to clientaddrp. clientaddrp may be NULL. */ -int vsock_stream_accept(unsigned int cid, unsigned int port, - struct sockaddr_vm *clientaddrp) +int vsock_accept(unsigned int cid, unsigned int port, + struct sockaddr_vm *clientaddrp, int type) { union { struct sockaddr sa; struct sockaddr_vm svm; - } addr = { - .svm = { - .svm_family = AF_VSOCK, - .svm_port = port, - .svm_cid = cid, - }, - }; - union { - struct sockaddr sa; - struct sockaddr_vm svm; } clientaddr; socklen_t clientaddr_len = sizeof(clientaddr.svm); - int fd; - int client_fd; - int old_errno; - - fd = socket(AF_VSOCK, SOCK_STREAM, 0); + int fd, client_fd, old_errno; - if (bind(fd, &addr.sa, sizeof(addr.svm)) < 0) { - perror("bind"); - exit(EXIT_FAILURE); - } - - if (listen(fd, 1) < 0) { - perror("listen"); - exit(EXIT_FAILURE); - } + fd = vsock_listen(cid, port, type); control_writeln("LISTENING"); @@ -189,104 +275,157 @@ int vsock_stream_accept(unsigned int cid, unsigned int port, return client_fd; } -/* Transmit one byte and check the return value. +int vsock_stream_accept(unsigned int cid, unsigned int port, + struct sockaddr_vm *clientaddrp) +{ + return vsock_accept(cid, port, clientaddrp, SOCK_STREAM); +} + +int vsock_stream_listen(unsigned int cid, unsigned int port) +{ + return vsock_listen(cid, port, SOCK_STREAM); +} + +int vsock_seqpacket_accept(unsigned int cid, unsigned int port, + struct sockaddr_vm *clientaddrp) +{ + return vsock_accept(cid, port, clientaddrp, SOCK_SEQPACKET); +} + +/* Transmit bytes from a buffer and check the return value. * * expected_ret: * <0 Negative errno (for testing errors) * 0 End-of-file - * 1 Success + * >0 Success (bytes successfully written) */ -void send_byte(int fd, int expected_ret, int flags) +void send_buf(int fd, const void *buf, size_t len, int flags, + ssize_t expected_ret) { - const uint8_t byte = 'A'; - ssize_t nwritten; + ssize_t nwritten = 0; + ssize_t ret; timeout_begin(TIMEOUT); do { - nwritten = send(fd, &byte, sizeof(byte), flags); - timeout_check("write"); - } while (nwritten < 0 && errno == EINTR); + ret = send(fd, buf + nwritten, len - nwritten, flags); + timeout_check("send"); + + if (ret == 0 || (ret < 0 && errno != EINTR)) + break; + + nwritten += ret; + } while (nwritten < len); timeout_end(); if (expected_ret < 0) { - if (nwritten != -1) { - fprintf(stderr, "bogus send(2) return value %zd\n", - nwritten); + if (ret != -1) { + fprintf(stderr, "bogus send(2) return value %zd (expected %zd)\n", + ret, expected_ret); exit(EXIT_FAILURE); } if (errno != -expected_ret) { - perror("write"); + perror("send"); exit(EXIT_FAILURE); } return; } - if (nwritten < 0) { - perror("write"); + if (ret < 0) { + perror("send"); exit(EXIT_FAILURE); } - if (nwritten == 0) { - if (expected_ret == 0) - return; - fprintf(stderr, "unexpected EOF while sending byte\n"); - exit(EXIT_FAILURE); - } - if (nwritten != sizeof(byte)) { - fprintf(stderr, "bogus send(2) return value %zd\n", nwritten); + if (nwritten != expected_ret) { + if (ret == 0) + fprintf(stderr, "unexpected EOF while sending bytes\n"); + + fprintf(stderr, "bogus send(2) bytes written %zd (expected %zd)\n", + nwritten, expected_ret); exit(EXIT_FAILURE); } } -/* Receive one byte and check the return value. +/* Receive bytes in a buffer and check the return value. * * expected_ret: * <0 Negative errno (for testing errors) * 0 End-of-file - * 1 Success + * >0 Success (bytes successfully read) */ -void recv_byte(int fd, int expected_ret, int flags) +void recv_buf(int fd, void *buf, size_t len, int flags, ssize_t expected_ret) { - uint8_t byte; - ssize_t nread; + ssize_t nread = 0; + ssize_t ret; timeout_begin(TIMEOUT); do { - nread = recv(fd, &byte, sizeof(byte), flags); - timeout_check("read"); - } while (nread < 0 && errno == EINTR); + ret = recv(fd, buf + nread, len - nread, flags); + timeout_check("recv"); + + if (ret == 0 || (ret < 0 && errno != EINTR)) + break; + + nread += ret; + } while (nread < len); timeout_end(); if (expected_ret < 0) { - if (nread != -1) { - fprintf(stderr, "bogus recv(2) return value %zd\n", - nread); + if (ret != -1) { + fprintf(stderr, "bogus recv(2) return value %zd (expected %zd)\n", + ret, expected_ret); exit(EXIT_FAILURE); } if (errno != -expected_ret) { - perror("read"); + perror("recv"); exit(EXIT_FAILURE); } return; } - if (nread < 0) { - perror("read"); + if (ret < 0) { + perror("recv"); exit(EXIT_FAILURE); } - if (nread == 0) { - if (expected_ret == 0) - return; - fprintf(stderr, "unexpected EOF while receiving byte\n"); - exit(EXIT_FAILURE); - } - if (nread != sizeof(byte)) { - fprintf(stderr, "bogus recv(2) return value %zd\n", nread); + if (nread != expected_ret) { + if (ret == 0) + fprintf(stderr, "unexpected EOF while receiving bytes\n"); + + fprintf(stderr, "bogus recv(2) bytes read %zd (expected %zd)\n", + nread, expected_ret); exit(EXIT_FAILURE); } +} + +/* Transmit one byte and check the return value. + * + * expected_ret: + * <0 Negative errno (for testing errors) + * 0 End-of-file + * 1 Success + */ +void send_byte(int fd, int expected_ret, int flags) +{ + static const uint8_t byte = 'A'; + + send_buf(fd, &byte, sizeof(byte), flags, expected_ret); +} + +/* Receive one byte and check the return value. + * + * expected_ret: + * <0 Negative errno (for testing errors) + * 0 End-of-file + * 1 Success + */ +void recv_byte(int fd, int expected_ret, int flags) +{ + uint8_t byte; + + recv_buf(fd, &byte, sizeof(byte), flags, expected_ret); + if (byte != 'A') { - fprintf(stderr, "unexpected byte read %c\n", byte); + fprintf(stderr, "unexpected byte read 0x%02x\n", byte); exit(EXIT_FAILURE); } } @@ -352,8 +491,7 @@ void list_tests(const struct test_case *test_cases) exit(EXIT_FAILURE); } -void skip_test(struct test_case *test_cases, size_t test_cases_len, - const char *test_id_str) +static unsigned long parse_test_id(const char *test_id_str, size_t test_cases_len) { unsigned long test_id; char *endptr = NULL; @@ -371,5 +509,330 @@ void skip_test(struct test_case *test_cases, size_t test_cases_len, exit(EXIT_FAILURE); } + return test_id; +} + +void skip_test(struct test_case *test_cases, size_t test_cases_len, + const char *test_id_str) +{ + unsigned long test_id = parse_test_id(test_id_str, test_cases_len); test_cases[test_id].skip = true; } + +void pick_test(struct test_case *test_cases, size_t test_cases_len, + const char *test_id_str) +{ + static bool skip_all = true; + unsigned long test_id; + + if (skip_all) { + unsigned long i; + + for (i = 0; i < test_cases_len; ++i) + test_cases[i].skip = true; + + skip_all = false; + } + + test_id = parse_test_id(test_id_str, test_cases_len); + test_cases[test_id].skip = false; +} + +unsigned long hash_djb2(const void *data, size_t len) +{ + unsigned long hash = 5381; + int i = 0; + + while (i < len) { + hash = ((hash << 5) + hash) + ((unsigned char *)data)[i]; + i++; + } + + return hash; +} + +size_t iovec_bytes(const struct iovec *iov, size_t iovnum) +{ + size_t bytes; + int i; + + for (bytes = 0, i = 0; i < iovnum; i++) + bytes += iov[i].iov_len; + + return bytes; +} + +unsigned long iovec_hash_djb2(const struct iovec *iov, size_t iovnum) +{ + unsigned long hash; + size_t iov_bytes; + size_t offs; + void *tmp; + int i; + + iov_bytes = iovec_bytes(iov, iovnum); + + tmp = malloc(iov_bytes); + if (!tmp) { + perror("malloc"); + exit(EXIT_FAILURE); + } + + for (offs = 0, i = 0; i < iovnum; i++) { + memcpy(tmp + offs, iov[i].iov_base, iov[i].iov_len); + offs += iov[i].iov_len; + } + + hash = hash_djb2(tmp, iov_bytes); + free(tmp); + + return hash; +} + +/* Allocates and returns new 'struct iovec *' according pattern + * in the 'test_iovec'. For each element in the 'test_iovec' it + * allocates new element in the resulting 'iovec'. 'iov_len' + * of the new element is copied from 'test_iovec'. 'iov_base' is + * allocated depending on the 'iov_base' of 'test_iovec': + * + * 'iov_base' == NULL -> valid buf: mmap('iov_len'). + * + * 'iov_base' == MAP_FAILED -> invalid buf: + * mmap('iov_len'), then munmap('iov_len'). + * 'iov_base' still contains result of + * mmap(). + * + * 'iov_base' == number -> unaligned valid buf: + * mmap('iov_len') + number. + * + * 'iovnum' is number of elements in 'test_iovec'. + * + * Returns new 'iovec' or calls 'exit()' on error. + */ +struct iovec *alloc_test_iovec(const struct iovec *test_iovec, int iovnum) +{ + struct iovec *iovec; + int i; + + iovec = malloc(sizeof(*iovec) * iovnum); + if (!iovec) { + perror("malloc"); + exit(EXIT_FAILURE); + } + + for (i = 0; i < iovnum; i++) { + iovec[i].iov_len = test_iovec[i].iov_len; + + iovec[i].iov_base = mmap(NULL, iovec[i].iov_len, + PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS | MAP_POPULATE, + -1, 0); + if (iovec[i].iov_base == MAP_FAILED) { + perror("mmap"); + exit(EXIT_FAILURE); + } + + if (test_iovec[i].iov_base != MAP_FAILED) + iovec[i].iov_base += (uintptr_t)test_iovec[i].iov_base; + } + + /* Unmap "invalid" elements. */ + for (i = 0; i < iovnum; i++) { + if (test_iovec[i].iov_base == MAP_FAILED) { + if (munmap(iovec[i].iov_base, iovec[i].iov_len)) { + perror("munmap"); + exit(EXIT_FAILURE); + } + } + } + + for (i = 0; i < iovnum; i++) { + int j; + + if (test_iovec[i].iov_base == MAP_FAILED) + continue; + + for (j = 0; j < iovec[i].iov_len; j++) + ((uint8_t *)iovec[i].iov_base)[j] = rand() & 0xff; + } + + return iovec; +} + +/* Frees 'iovec *', previously allocated by 'alloc_test_iovec()'. + * On error calls 'exit()'. + */ +void free_test_iovec(const struct iovec *test_iovec, + struct iovec *iovec, int iovnum) +{ + int i; + + for (i = 0; i < iovnum; i++) { + if (test_iovec[i].iov_base != MAP_FAILED) { + if (test_iovec[i].iov_base) + iovec[i].iov_base -= (uintptr_t)test_iovec[i].iov_base; + + if (munmap(iovec[i].iov_base, iovec[i].iov_len)) { + perror("munmap"); + exit(EXIT_FAILURE); + } + } + } + + free(iovec); +} + +/* Set "unsigned long long" socket option and check that it's indeed set */ +void setsockopt_ull_check(int fd, int level, int optname, + unsigned long long val, char const *errmsg) +{ + unsigned long long chkval; + socklen_t chklen; + int err; + + err = setsockopt(fd, level, optname, &val, sizeof(val)); + if (err) { + fprintf(stderr, "setsockopt err: %s (%d)\n", + strerror(errno), errno); + goto fail; + } + + chkval = ~val; /* just make storage != val */ + chklen = sizeof(chkval); + + err = getsockopt(fd, level, optname, &chkval, &chklen); + if (err) { + fprintf(stderr, "getsockopt err: %s (%d)\n", + strerror(errno), errno); + goto fail; + } + + if (chklen != sizeof(chkval)) { + fprintf(stderr, "size mismatch: set %zu got %d\n", sizeof(val), + chklen); + goto fail; + } + + if (chkval != val) { + fprintf(stderr, "value mismatch: set %llu got %llu\n", val, + chkval); + goto fail; + } + return; +fail: + fprintf(stderr, "%s val %llu\n", errmsg, val); + exit(EXIT_FAILURE); +; +} + +/* Set "int" socket option and check that it's indeed set */ +void setsockopt_int_check(int fd, int level, int optname, int val, + char const *errmsg) +{ + int chkval; + socklen_t chklen; + int err; + + err = setsockopt(fd, level, optname, &val, sizeof(val)); + if (err) { + fprintf(stderr, "setsockopt err: %s (%d)\n", + strerror(errno), errno); + goto fail; + } + + chkval = ~val; /* just make storage != val */ + chklen = sizeof(chkval); + + err = getsockopt(fd, level, optname, &chkval, &chklen); + if (err) { + fprintf(stderr, "getsockopt err: %s (%d)\n", + strerror(errno), errno); + goto fail; + } + + if (chklen != sizeof(chkval)) { + fprintf(stderr, "size mismatch: set %zu got %d\n", sizeof(val), + chklen); + goto fail; + } + + if (chkval != val) { + fprintf(stderr, "value mismatch: set %d got %d\n", val, chkval); + goto fail; + } + return; +fail: + fprintf(stderr, "%s val %d\n", errmsg, val); + exit(EXIT_FAILURE); +} + +static void mem_invert(unsigned char *mem, size_t size) +{ + size_t i; + + for (i = 0; i < size; i++) + mem[i] = ~mem[i]; +} + +/* Set "timeval" socket option and check that it's indeed set */ +void setsockopt_timeval_check(int fd, int level, int optname, + struct timeval val, char const *errmsg) +{ + struct timeval chkval; + socklen_t chklen; + int err; + + err = setsockopt(fd, level, optname, &val, sizeof(val)); + if (err) { + fprintf(stderr, "setsockopt err: %s (%d)\n", + strerror(errno), errno); + goto fail; + } + + /* just make storage != val */ + chkval = val; + mem_invert((unsigned char *)&chkval, sizeof(chkval)); + chklen = sizeof(chkval); + + err = getsockopt(fd, level, optname, &chkval, &chklen); + if (err) { + fprintf(stderr, "getsockopt err: %s (%d)\n", + strerror(errno), errno); + goto fail; + } + + if (chklen != sizeof(chkval)) { + fprintf(stderr, "size mismatch: set %zu got %d\n", sizeof(val), + chklen); + goto fail; + } + + if (memcmp(&chkval, &val, sizeof(val)) != 0) { + fprintf(stderr, "value mismatch: set %ld:%ld got %ld:%ld\n", + val.tv_sec, val.tv_usec, chkval.tv_sec, chkval.tv_usec); + goto fail; + } + return; +fail: + fprintf(stderr, "%s val %ld:%ld\n", errmsg, val.tv_sec, val.tv_usec); + exit(EXIT_FAILURE); +} + +void enable_so_zerocopy_check(int fd) +{ + setsockopt_int_check(fd, SOL_SOCKET, SO_ZEROCOPY, 1, + "setsockopt SO_ZEROCOPY"); +} + +void enable_so_linger(int fd, int timeout) +{ + struct linger optval = { + .l_onoff = 1, + .l_linger = timeout + }; + + if (setsockopt(fd, SOL_SOCKET, SO_LINGER, &optval, sizeof(optval))) { + perror("setsockopt(SO_LINGER)"); + exit(EXIT_FAILURE); + } +} diff --git a/tools/testing/vsock/util.h b/tools/testing/vsock/util.h index e53dd09d26d9..5e2db67072d5 100644 --- a/tools/testing/vsock/util.h +++ b/tools/testing/vsock/util.h @@ -12,10 +12,13 @@ enum test_mode { TEST_MODE_SERVER }; +#define DEFAULT_PEER_PORT 1234 + /* Test runner options */ struct test_opts { enum test_mode mode; unsigned int peer_cid; + unsigned int peer_port; }; /* A test case definition. Test functions must print failures to stderr and @@ -35,10 +38,26 @@ struct test_case { void init_signals(void); unsigned int parse_cid(const char *str); +unsigned int parse_port(const char *str); +int vsock_connect_fd(int fd, unsigned int cid, unsigned int port); +int vsock_connect(unsigned int cid, unsigned int port, int type); +int vsock_accept(unsigned int cid, unsigned int port, + struct sockaddr_vm *clientaddrp, int type); int vsock_stream_connect(unsigned int cid, unsigned int port); +int vsock_bind(unsigned int cid, unsigned int port, int type); +int vsock_bind_connect(unsigned int cid, unsigned int port, + unsigned int bind_port, int type); +int vsock_seqpacket_connect(unsigned int cid, unsigned int port); int vsock_stream_accept(unsigned int cid, unsigned int port, struct sockaddr_vm *clientaddrp); +int vsock_stream_listen(unsigned int cid, unsigned int port); +int vsock_seqpacket_accept(unsigned int cid, unsigned int port, + struct sockaddr_vm *clientaddrp); void vsock_wait_remote_close(int fd); +bool vsock_wait_sent(int fd); +void send_buf(int fd, const void *buf, size_t len, int flags, + ssize_t expected_ret); +void recv_buf(int fd, void *buf, size_t len, int flags, ssize_t expected_ret); void send_byte(int fd, int expected_ret, int flags); void recv_byte(int fd, int expected_ret, int flags); void run_tests(const struct test_case *test_cases, @@ -46,4 +65,20 @@ void run_tests(const struct test_case *test_cases, void list_tests(const struct test_case *test_cases); void skip_test(struct test_case *test_cases, size_t test_cases_len, const char *test_id_str); +void pick_test(struct test_case *test_cases, size_t test_cases_len, + const char *test_id_str); +unsigned long hash_djb2(const void *data, size_t len); +size_t iovec_bytes(const struct iovec *iov, size_t iovnum); +unsigned long iovec_hash_djb2(const struct iovec *iov, size_t iovnum); +struct iovec *alloc_test_iovec(const struct iovec *test_iovec, int iovnum); +void free_test_iovec(const struct iovec *test_iovec, + struct iovec *iovec, int iovnum); +void setsockopt_ull_check(int fd, int level, int optname, + unsigned long long val, char const *errmsg); +void setsockopt_int_check(int fd, int level, int optname, int val, + char const *errmsg); +void setsockopt_timeval_check(int fd, int level, int optname, + struct timeval val, char const *errmsg); +void enable_so_zerocopy_check(int fd); +void enable_so_linger(int fd, int timeout); #endif /* UTIL_H */ diff --git a/tools/testing/vsock/vsock_diag_test.c b/tools/testing/vsock/vsock_diag_test.c index cec6f5a738e1..081e045f4696 100644 --- a/tools/testing/vsock/vsock_diag_test.c +++ b/tools/testing/vsock/vsock_diag_test.c @@ -39,6 +39,8 @@ static const char *sock_type_str(int type) return "DGRAM"; case SOCK_STREAM: return "STREAM"; + case SOCK_SEQPACKET: + return "SEQPACKET"; default: return "INVALID TYPE"; } @@ -332,8 +334,6 @@ static void test_no_sockets(const struct test_opts *opts) read_vsock_stat(&sockets); check_no_sockets(&sockets); - - free_sock_stat(&sockets); } static void test_listen_socket_server(const struct test_opts *opts) @@ -344,7 +344,7 @@ static void test_listen_socket_server(const struct test_opts *opts) } addr = { .svm = { .svm_family = AF_VSOCK, - .svm_port = 1234, + .svm_port = opts->peer_port, .svm_cid = VMADDR_CID_ANY, }, }; @@ -380,7 +380,7 @@ static void test_connect_client(const struct test_opts *opts) LIST_HEAD(sockets); struct vsock_stat *st; - fd = vsock_stream_connect(opts->peer_cid, 1234); + fd = vsock_stream_connect(opts->peer_cid, opts->peer_port); if (fd < 0) { perror("connect"); exit(EXIT_FAILURE); @@ -405,7 +405,7 @@ static void test_connect_server(const struct test_opts *opts) LIST_HEAD(sockets); int client_fd; - client_fd = vsock_stream_accept(VMADDR_CID_ANY, 1234, NULL); + client_fd = vsock_stream_accept(VMADDR_CID_ANY, opts->peer_port, NULL); if (client_fd < 0) { perror("accept"); exit(EXIT_FAILURE); @@ -464,6 +464,11 @@ static const struct option longopts[] = { .val = 'p', }, { + .name = "peer-port", + .has_arg = required_argument, + .val = 'q', + }, + { .name = "list", .has_arg = no_argument, .val = 'l', @@ -483,7 +488,7 @@ static const struct option longopts[] = { static void usage(void) { - fprintf(stderr, "Usage: vsock_diag_test [--help] [--control-host=<host>] --control-port=<port> --mode=client|server --peer-cid=<cid> [--list] [--skip=<test_id>]\n" + fprintf(stderr, "Usage: vsock_diag_test [--help] [--control-host=<host>] --control-port=<port> --mode=client|server --peer-cid=<cid> [--peer-port=<port>] [--list] [--skip=<test_id>]\n" "\n" " Server: vsock_diag_test --control-port=1234 --mode=server --peer-cid=3\n" " Client: vsock_diag_test --control-host=192.168.0.1 --control-port=1234 --mode=client --peer-cid=2\n" @@ -505,9 +510,11 @@ static void usage(void) " --control-port <port> Server port to listen on/connect to\n" " --mode client|server Server or client mode\n" " --peer-cid <cid> CID of the other side\n" + " --peer-port <port> AF_VSOCK port used for the test [default: %d]\n" " --list List of tests that will be executed\n" " --skip <test_id> Test ID to skip;\n" - " use multiple --skip options to skip more tests\n" + " use multiple --skip options to skip more tests\n", + DEFAULT_PEER_PORT ); exit(EXIT_FAILURE); } @@ -519,6 +526,7 @@ int main(int argc, char **argv) struct test_opts opts = { .mode = TEST_MODE_UNSET, .peer_cid = VMADDR_CID_ANY, + .peer_port = DEFAULT_PEER_PORT, }; init_signals(); @@ -546,6 +554,9 @@ int main(int argc, char **argv) case 'p': opts.peer_cid = parse_cid(optarg); break; + case 'q': + opts.peer_port = parse_port(optarg); + break; case 'P': control_port = optarg; break; diff --git a/tools/testing/vsock/vsock_perf.c b/tools/testing/vsock/vsock_perf.c new file mode 100644 index 000000000000..75971ac708c9 --- /dev/null +++ b/tools/testing/vsock/vsock_perf.c @@ -0,0 +1,499 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * vsock_perf - benchmark utility for vsock. + * + * Copyright (C) 2022 SberDevices. + * + * Author: Arseniy Krasnov <AVKrasnov@sberdevices.ru> + */ +#include <getopt.h> +#include <stdio.h> +#include <stdlib.h> +#include <stdbool.h> +#include <string.h> +#include <errno.h> +#include <unistd.h> +#include <time.h> +#include <stdint.h> +#include <poll.h> +#include <sys/socket.h> +#include <linux/vm_sockets.h> +#include <sys/mman.h> + +#include "msg_zerocopy_common.h" + +#define DEFAULT_BUF_SIZE_BYTES (128 * 1024) +#define DEFAULT_TO_SEND_BYTES (64 * 1024) +#define DEFAULT_VSOCK_BUF_BYTES (256 * 1024) +#define DEFAULT_RCVLOWAT_BYTES 1 +#define DEFAULT_PORT 1234 + +#define BYTES_PER_GB (1024 * 1024 * 1024ULL) +#define NSEC_PER_SEC (1000000000ULL) + +static unsigned int port = DEFAULT_PORT; +static unsigned long buf_size_bytes = DEFAULT_BUF_SIZE_BYTES; +static unsigned long long vsock_buf_bytes = DEFAULT_VSOCK_BUF_BYTES; +static bool zerocopy; + +static void error(const char *s) +{ + perror(s); + exit(EXIT_FAILURE); +} + +static time_t current_nsec(void) +{ + struct timespec ts; + + if (clock_gettime(CLOCK_REALTIME, &ts)) + error("clock_gettime"); + + return (ts.tv_sec * NSEC_PER_SEC) + ts.tv_nsec; +} + +/* From lib/cmdline.c. */ +static unsigned long memparse(const char *ptr) +{ + char *endptr; + + unsigned long long ret = strtoull(ptr, &endptr, 0); + + switch (*endptr) { + case 'E': + case 'e': + ret <<= 10; + case 'P': + case 'p': + ret <<= 10; + case 'T': + case 't': + ret <<= 10; + case 'G': + case 'g': + ret <<= 10; + case 'M': + case 'm': + ret <<= 10; + case 'K': + case 'k': + ret <<= 10; + endptr++; + default: + break; + } + + return ret; +} + +static void vsock_increase_buf_size(int fd) +{ + if (setsockopt(fd, AF_VSOCK, SO_VM_SOCKETS_BUFFER_MAX_SIZE, + &vsock_buf_bytes, sizeof(vsock_buf_bytes))) + error("setsockopt(SO_VM_SOCKETS_BUFFER_MAX_SIZE)"); + + if (setsockopt(fd, AF_VSOCK, SO_VM_SOCKETS_BUFFER_SIZE, + &vsock_buf_bytes, sizeof(vsock_buf_bytes))) + error("setsockopt(SO_VM_SOCKETS_BUFFER_SIZE)"); +} + +static int vsock_connect(unsigned int cid, unsigned int port) +{ + union { + struct sockaddr sa; + struct sockaddr_vm svm; + } addr = { + .svm = { + .svm_family = AF_VSOCK, + .svm_port = port, + .svm_cid = cid, + }, + }; + int fd; + + fd = socket(AF_VSOCK, SOCK_STREAM, 0); + + if (fd < 0) { + perror("socket"); + return -1; + } + + if (connect(fd, &addr.sa, sizeof(addr.svm)) < 0) { + perror("connect"); + close(fd); + return -1; + } + + return fd; +} + +static float get_gbps(unsigned long bits, time_t ns_delta) +{ + return ((float)bits / 1000000000ULL) / + ((float)ns_delta / NSEC_PER_SEC); +} + +static void run_receiver(int rcvlowat_bytes) +{ + unsigned int read_cnt; + time_t rx_begin_ns; + time_t in_read_ns; + size_t total_recv; + int client_fd; + char *data; + int fd; + union { + struct sockaddr sa; + struct sockaddr_vm svm; + } addr = { + .svm = { + .svm_family = AF_VSOCK, + .svm_port = port, + .svm_cid = VMADDR_CID_ANY, + }, + }; + union { + struct sockaddr sa; + struct sockaddr_vm svm; + } clientaddr; + + socklen_t clientaddr_len = sizeof(clientaddr.svm); + + printf("Run as receiver\n"); + printf("Listen port %u\n", port); + printf("RX buffer %lu bytes\n", buf_size_bytes); + printf("vsock buffer %llu bytes\n", vsock_buf_bytes); + printf("SO_RCVLOWAT %d bytes\n", rcvlowat_bytes); + + fd = socket(AF_VSOCK, SOCK_STREAM, 0); + + if (fd < 0) + error("socket"); + + if (bind(fd, &addr.sa, sizeof(addr.svm)) < 0) + error("bind"); + + if (listen(fd, 1) < 0) + error("listen"); + + client_fd = accept(fd, &clientaddr.sa, &clientaddr_len); + + if (client_fd < 0) + error("accept"); + + vsock_increase_buf_size(client_fd); + + if (setsockopt(client_fd, SOL_SOCKET, SO_RCVLOWAT, + &rcvlowat_bytes, + sizeof(rcvlowat_bytes))) + error("setsockopt(SO_RCVLOWAT)"); + + data = malloc(buf_size_bytes); + + if (!data) { + fprintf(stderr, "'malloc()' failed\n"); + exit(EXIT_FAILURE); + } + + read_cnt = 0; + in_read_ns = 0; + total_recv = 0; + rx_begin_ns = current_nsec(); + + while (1) { + struct pollfd fds = { 0 }; + + fds.fd = client_fd; + fds.events = POLLIN | POLLERR | + POLLHUP | POLLRDHUP; + + if (poll(&fds, 1, -1) < 0) + error("poll"); + + if (fds.revents & POLLERR) { + fprintf(stderr, "'poll()' error\n"); + exit(EXIT_FAILURE); + } + + if (fds.revents & POLLIN) { + ssize_t bytes_read; + time_t t; + + t = current_nsec(); + bytes_read = read(fds.fd, data, buf_size_bytes); + in_read_ns += (current_nsec() - t); + read_cnt++; + + if (!bytes_read) + break; + + if (bytes_read < 0) { + perror("read"); + exit(EXIT_FAILURE); + } + + total_recv += bytes_read; + } + + if (fds.revents & (POLLHUP | POLLRDHUP)) + break; + } + + printf("total bytes received: %zu\n", total_recv); + printf("rx performance: %f Gbits/s\n", + get_gbps(total_recv * 8, current_nsec() - rx_begin_ns)); + printf("total time in 'read()': %f sec\n", (float)in_read_ns / NSEC_PER_SEC); + printf("average time in 'read()': %f ns\n", (float)in_read_ns / read_cnt); + printf("POLLIN wakeups: %i\n", read_cnt); + + free(data); + close(client_fd); + close(fd); +} + +static void enable_so_zerocopy(int fd) +{ + int val = 1; + + if (setsockopt(fd, SOL_SOCKET, SO_ZEROCOPY, &val, sizeof(val))) { + perror("setsockopt"); + exit(EXIT_FAILURE); + } +} + +static void run_sender(int peer_cid, unsigned long to_send_bytes) +{ + time_t tx_begin_ns; + time_t tx_total_ns; + size_t total_send; + time_t time_in_send; + void *data; + int fd; + + if (zerocopy) + printf("Run as sender MSG_ZEROCOPY\n"); + else + printf("Run as sender\n"); + + printf("Connect to %i:%u\n", peer_cid, port); + printf("Send %lu bytes\n", to_send_bytes); + printf("TX buffer %lu bytes\n", buf_size_bytes); + + fd = vsock_connect(peer_cid, port); + + if (fd < 0) + exit(EXIT_FAILURE); + + if (zerocopy) { + enable_so_zerocopy(fd); + + data = mmap(NULL, buf_size_bytes, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if (data == MAP_FAILED) { + perror("mmap"); + exit(EXIT_FAILURE); + } + } else { + data = malloc(buf_size_bytes); + + if (!data) { + fprintf(stderr, "'malloc()' failed\n"); + exit(EXIT_FAILURE); + } + } + + memset(data, 0, buf_size_bytes); + total_send = 0; + time_in_send = 0; + tx_begin_ns = current_nsec(); + + while (total_send < to_send_bytes) { + ssize_t sent; + size_t rest_bytes; + time_t before; + + rest_bytes = to_send_bytes - total_send; + + before = current_nsec(); + sent = send(fd, data, (rest_bytes > buf_size_bytes) ? + buf_size_bytes : rest_bytes, + zerocopy ? MSG_ZEROCOPY : 0); + time_in_send += (current_nsec() - before); + + if (sent <= 0) + error("write"); + + total_send += sent; + + if (zerocopy) { + struct pollfd fds = { 0 }; + + fds.fd = fd; + + if (poll(&fds, 1, -1) < 0) { + perror("poll"); + exit(EXIT_FAILURE); + } + + if (!(fds.revents & POLLERR)) { + fprintf(stderr, "POLLERR expected\n"); + exit(EXIT_FAILURE); + } + + vsock_recv_completion(fd, NULL); + } + } + + tx_total_ns = current_nsec() - tx_begin_ns; + + printf("total bytes sent: %zu\n", total_send); + printf("tx performance: %f Gbits/s\n", + get_gbps(total_send * 8, time_in_send)); + printf("total time in tx loop: %f sec\n", + (float)tx_total_ns / NSEC_PER_SEC); + printf("time in 'send()': %f sec\n", + (float)time_in_send / NSEC_PER_SEC); + + close(fd); + + if (zerocopy) + munmap(data, buf_size_bytes); + else + free(data); +} + +static const char optstring[] = ""; +static const struct option longopts[] = { + { + .name = "help", + .has_arg = no_argument, + .val = 'H', + }, + { + .name = "sender", + .has_arg = required_argument, + .val = 'S', + }, + { + .name = "port", + .has_arg = required_argument, + .val = 'P', + }, + { + .name = "bytes", + .has_arg = required_argument, + .val = 'M', + }, + { + .name = "buf-size", + .has_arg = required_argument, + .val = 'B', + }, + { + .name = "vsk-size", + .has_arg = required_argument, + .val = 'V', + }, + { + .name = "rcvlowat", + .has_arg = required_argument, + .val = 'R', + }, + { + .name = "zerocopy", + .has_arg = no_argument, + .val = 'Z', + }, + {}, +}; + +static void usage(void) +{ + printf("Usage: ./vsock_perf [--help] [options]\n" + "\n" + "This is benchmarking utility, to test vsock performance.\n" + "It runs in two modes: sender or receiver. In sender mode, it\n" + "connects to the specified CID and starts data transmission.\n" + "\n" + "Options:\n" + " --help This message\n" + " --sender <cid> Sender mode (receiver default)\n" + " <cid> of the receiver to connect to\n" + " --zerocopy Enable zerocopy (for sender mode only)\n" + " --port <port> Port (default %d)\n" + " --bytes <bytes>KMG Bytes to send (default %d)\n" + " --buf-size <bytes>KMG Data buffer size (default %d). In sender mode\n" + " it is the buffer size, passed to 'write()'. In\n" + " receiver mode it is the buffer size passed to 'read()'.\n" + " --vsk-size <bytes>KMG Socket buffer size (default %d)\n" + " --rcvlowat <bytes>KMG SO_RCVLOWAT value (default %d)\n" + "\n", DEFAULT_PORT, DEFAULT_TO_SEND_BYTES, + DEFAULT_BUF_SIZE_BYTES, DEFAULT_VSOCK_BUF_BYTES, + DEFAULT_RCVLOWAT_BYTES); + exit(EXIT_FAILURE); +} + +static long strtolx(const char *arg) +{ + long value; + char *end; + + value = strtol(arg, &end, 10); + + if (end != arg + strlen(arg)) + usage(); + + return value; +} + +int main(int argc, char **argv) +{ + unsigned long to_send_bytes = DEFAULT_TO_SEND_BYTES; + int rcvlowat_bytes = DEFAULT_RCVLOWAT_BYTES; + int peer_cid = -1; + bool sender = false; + + while (1) { + int opt = getopt_long(argc, argv, optstring, longopts, NULL); + + if (opt == -1) + break; + + switch (opt) { + case 'V': /* Peer buffer size. */ + vsock_buf_bytes = memparse(optarg); + break; + case 'R': /* SO_RCVLOWAT value. */ + rcvlowat_bytes = memparse(optarg); + break; + case 'P': /* Port to connect to. */ + port = strtolx(optarg); + break; + case 'M': /* Bytes to send. */ + to_send_bytes = memparse(optarg); + break; + case 'B': /* Size of rx/tx buffer. */ + buf_size_bytes = memparse(optarg); + break; + case 'S': /* Sender mode. CID to connect to. */ + peer_cid = strtolx(optarg); + sender = true; + break; + case 'H': /* Help. */ + usage(); + break; + case 'Z': /* Zerocopy. */ + zerocopy = true; + break; + default: + usage(); + } + } + + if (!sender) + run_receiver(rcvlowat_bytes); + else + run_sender(peer_cid, to_send_bytes); + + return 0; +} diff --git a/tools/testing/vsock/vsock_test.c b/tools/testing/vsock/vsock_test.c index 5a4fb80fa832..f669baaa0dca 100644 --- a/tools/testing/vsock/vsock_test.c +++ b/tools/testing/vsock/vsock_test.c @@ -14,11 +14,24 @@ #include <errno.h> #include <unistd.h> #include <linux/kernel.h> +#include <sys/types.h> +#include <sys/socket.h> +#include <time.h> +#include <sys/mman.h> +#include <poll.h> +#include <signal.h> +#include <sys/ioctl.h> +#include <linux/time64.h> +#include "vsock_test_zerocopy.h" #include "timeout.h" #include "control.h" #include "util.h" +/* Basic messages for control_writeulong(), control_readulong() */ +#define CONTROL_CONTINUE 1 +#define CONTROL_DONE 0 + static void test_stream_connection_reset(const struct test_opts *opts) { union { @@ -27,7 +40,7 @@ static void test_stream_connection_reset(const struct test_opts *opts) } addr = { .svm = { .svm_family = AF_VSOCK, - .svm_port = 1234, + .svm_port = opts->peer_port, .svm_cid = opts->peer_cid, }, }; @@ -63,7 +76,7 @@ static void test_stream_bind_only_client(const struct test_opts *opts) } addr = { .svm = { .svm_family = AF_VSOCK, - .svm_port = 1234, + .svm_port = opts->peer_port, .svm_cid = opts->peer_cid, }, }; @@ -99,24 +112,9 @@ static void test_stream_bind_only_client(const struct test_opts *opts) static void test_stream_bind_only_server(const struct test_opts *opts) { - union { - struct sockaddr sa; - struct sockaddr_vm svm; - } addr = { - .svm = { - .svm_family = AF_VSOCK, - .svm_port = 1234, - .svm_cid = VMADDR_CID_ANY, - }, - }; int fd; - fd = socket(AF_VSOCK, SOCK_STREAM, 0); - - if (bind(fd, &addr.sa, sizeof(addr.svm)) < 0) { - perror("bind"); - exit(EXIT_FAILURE); - } + fd = vsock_bind(VMADDR_CID_ANY, opts->peer_port, SOCK_STREAM); /* Notify the client that the server is ready */ control_writeln("BIND"); @@ -131,7 +129,7 @@ static void test_stream_client_close_client(const struct test_opts *opts) { int fd; - fd = vsock_stream_connect(opts->peer_cid, 1234); + fd = vsock_stream_connect(opts->peer_cid, opts->peer_port); if (fd < 0) { perror("connect"); exit(EXIT_FAILURE); @@ -145,7 +143,7 @@ static void test_stream_client_close_server(const struct test_opts *opts) { int fd; - fd = vsock_stream_accept(VMADDR_CID_ANY, 1234, NULL); + fd = vsock_stream_accept(VMADDR_CID_ANY, opts->peer_port, NULL); if (fd < 0) { perror("accept"); exit(EXIT_FAILURE); @@ -166,7 +164,7 @@ static void test_stream_server_close_client(const struct test_opts *opts) { int fd; - fd = vsock_stream_connect(opts->peer_cid, 1234); + fd = vsock_stream_connect(opts->peer_cid, opts->peer_port); if (fd < 0) { perror("connect"); exit(EXIT_FAILURE); @@ -187,7 +185,7 @@ static void test_stream_server_close_server(const struct test_opts *opts) { int fd; - fd = vsock_stream_accept(VMADDR_CID_ANY, 1234, NULL); + fd = vsock_stream_accept(VMADDR_CID_ANY, opts->peer_port, NULL); if (fd < 0) { perror("accept"); exit(EXIT_FAILURE); @@ -208,7 +206,7 @@ static void test_stream_multiconn_client(const struct test_opts *opts) int i; for (i = 0; i < MULTICONN_NFDS; i++) { - fds[i] = vsock_stream_connect(opts->peer_cid, 1234); + fds[i] = vsock_stream_connect(opts->peer_cid, opts->peer_port); if (fds[i] < 0) { perror("connect"); exit(EXIT_FAILURE); @@ -232,7 +230,7 @@ static void test_stream_multiconn_server(const struct test_opts *opts) int i; for (i = 0; i < MULTICONN_NFDS; i++) { - fds[i] = vsock_stream_accept(VMADDR_CID_ANY, 1234, NULL); + fds[i] = vsock_stream_accept(VMADDR_CID_ANY, opts->peer_port, NULL); if (fds[i] < 0) { perror("accept"); exit(EXIT_FAILURE); @@ -250,32 +248,1641 @@ static void test_stream_multiconn_server(const struct test_opts *opts) close(fds[i]); } +#define MSG_PEEK_BUF_LEN 64 + +static void test_msg_peek_client(const struct test_opts *opts, + bool seqpacket) +{ + unsigned char buf[MSG_PEEK_BUF_LEN]; + int fd; + int i; + + if (seqpacket) + fd = vsock_seqpacket_connect(opts->peer_cid, opts->peer_port); + else + fd = vsock_stream_connect(opts->peer_cid, opts->peer_port); + + if (fd < 0) { + perror("connect"); + exit(EXIT_FAILURE); + } + + for (i = 0; i < sizeof(buf); i++) + buf[i] = rand() & 0xFF; + + control_expectln("SRVREADY"); + + send_buf(fd, buf, sizeof(buf), 0, sizeof(buf)); + + close(fd); +} + +static void test_msg_peek_server(const struct test_opts *opts, + bool seqpacket) +{ + unsigned char buf_half[MSG_PEEK_BUF_LEN / 2]; + unsigned char buf_normal[MSG_PEEK_BUF_LEN]; + unsigned char buf_peek[MSG_PEEK_BUF_LEN]; + int fd; + + if (seqpacket) + fd = vsock_seqpacket_accept(VMADDR_CID_ANY, opts->peer_port, NULL); + else + fd = vsock_stream_accept(VMADDR_CID_ANY, opts->peer_port, NULL); + + if (fd < 0) { + perror("accept"); + exit(EXIT_FAILURE); + } + + /* Peek from empty socket. */ + recv_buf(fd, buf_peek, sizeof(buf_peek), MSG_PEEK | MSG_DONTWAIT, + -EAGAIN); + + control_writeln("SRVREADY"); + + /* Peek part of data. */ + recv_buf(fd, buf_half, sizeof(buf_half), MSG_PEEK, sizeof(buf_half)); + + /* Peek whole data. */ + recv_buf(fd, buf_peek, sizeof(buf_peek), MSG_PEEK, sizeof(buf_peek)); + + /* Compare partial and full peek. */ + if (memcmp(buf_half, buf_peek, sizeof(buf_half))) { + fprintf(stderr, "Partial peek data mismatch\n"); + exit(EXIT_FAILURE); + } + + if (seqpacket) { + /* This type of socket supports MSG_TRUNC flag, + * so check it with MSG_PEEK. We must get length + * of the message. + */ + recv_buf(fd, buf_half, sizeof(buf_half), MSG_PEEK | MSG_TRUNC, + sizeof(buf_peek)); + } + + recv_buf(fd, buf_normal, sizeof(buf_normal), 0, sizeof(buf_normal)); + + /* Compare full peek and normal read. */ + if (memcmp(buf_peek, buf_normal, sizeof(buf_peek))) { + fprintf(stderr, "Full peek data mismatch\n"); + exit(EXIT_FAILURE); + } + + close(fd); +} + static void test_stream_msg_peek_client(const struct test_opts *opts) { + return test_msg_peek_client(opts, false); +} + +static void test_stream_msg_peek_server(const struct test_opts *opts) +{ + return test_msg_peek_server(opts, false); +} + +#define SOCK_BUF_SIZE (2 * 1024 * 1024) +#define MAX_MSG_PAGES 4 + +static void test_seqpacket_msg_bounds_client(const struct test_opts *opts) +{ + unsigned long curr_hash; + size_t max_msg_size; + int page_size; + int msg_count; + int fd; + + fd = vsock_seqpacket_connect(opts->peer_cid, opts->peer_port); + if (fd < 0) { + perror("connect"); + exit(EXIT_FAILURE); + } + + /* Wait, until receiver sets buffer size. */ + control_expectln("SRVREADY"); + + curr_hash = 0; + page_size = getpagesize(); + max_msg_size = MAX_MSG_PAGES * page_size; + msg_count = SOCK_BUF_SIZE / max_msg_size; + + for (int i = 0; i < msg_count; i++) { + size_t buf_size; + int flags; + void *buf; + + /* Use "small" buffers and "big" buffers. */ + if (i & 1) + buf_size = page_size + + (rand() % (max_msg_size - page_size)); + else + buf_size = 1 + (rand() % page_size); + + buf = malloc(buf_size); + + if (!buf) { + perror("malloc"); + exit(EXIT_FAILURE); + } + + memset(buf, rand() & 0xff, buf_size); + /* Set at least one MSG_EOR + some random. */ + if (i == (msg_count / 2) || (rand() & 1)) { + flags = MSG_EOR; + curr_hash++; + } else { + flags = 0; + } + + send_buf(fd, buf, buf_size, flags, buf_size); + + /* + * Hash sum is computed at both client and server in + * the same way: + * H += hash('message data') + * Such hash "controls" both data integrity and message + * bounds. After data exchange, both sums are compared + * using control socket, and if message bounds wasn't + * broken - two values must be equal. + */ + curr_hash += hash_djb2(buf, buf_size); + free(buf); + } + + control_writeln("SENDDONE"); + control_writeulong(curr_hash); + close(fd); +} + +static void test_seqpacket_msg_bounds_server(const struct test_opts *opts) +{ + unsigned long long sock_buf_size; + unsigned long remote_hash; + unsigned long curr_hash; + int fd; + struct msghdr msg = {0}; + struct iovec iov = {0}; + + fd = vsock_seqpacket_accept(VMADDR_CID_ANY, opts->peer_port, NULL); + if (fd < 0) { + perror("accept"); + exit(EXIT_FAILURE); + } + + sock_buf_size = SOCK_BUF_SIZE; + + setsockopt_ull_check(fd, AF_VSOCK, SO_VM_SOCKETS_BUFFER_MAX_SIZE, + sock_buf_size, + "setsockopt(SO_VM_SOCKETS_BUFFER_MAX_SIZE)"); + + setsockopt_ull_check(fd, AF_VSOCK, SO_VM_SOCKETS_BUFFER_SIZE, + sock_buf_size, + "setsockopt(SO_VM_SOCKETS_BUFFER_SIZE)"); + + /* Ready to receive data. */ + control_writeln("SRVREADY"); + /* Wait, until peer sends whole data. */ + control_expectln("SENDDONE"); + iov.iov_len = MAX_MSG_PAGES * getpagesize(); + iov.iov_base = malloc(iov.iov_len); + if (!iov.iov_base) { + perror("malloc"); + exit(EXIT_FAILURE); + } + + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + + curr_hash = 0; + + while (1) { + ssize_t recv_size; + + recv_size = recvmsg(fd, &msg, 0); + + if (!recv_size) + break; + + if (recv_size < 0) { + perror("recvmsg"); + exit(EXIT_FAILURE); + } + + if (msg.msg_flags & MSG_EOR) + curr_hash++; + + curr_hash += hash_djb2(msg.msg_iov[0].iov_base, recv_size); + } + + free(iov.iov_base); + close(fd); + remote_hash = control_readulong(); + + if (curr_hash != remote_hash) { + fprintf(stderr, "Message bounds broken\n"); + exit(EXIT_FAILURE); + } +} + +#define MESSAGE_TRUNC_SZ 32 +static void test_seqpacket_msg_trunc_client(const struct test_opts *opts) +{ + int fd; + char buf[MESSAGE_TRUNC_SZ]; + + fd = vsock_seqpacket_connect(opts->peer_cid, opts->peer_port); + if (fd < 0) { + perror("connect"); + exit(EXIT_FAILURE); + } + + send_buf(fd, buf, sizeof(buf), 0, sizeof(buf)); + + control_writeln("SENDDONE"); + close(fd); +} + +static void test_seqpacket_msg_trunc_server(const struct test_opts *opts) +{ + int fd; + char buf[MESSAGE_TRUNC_SZ / 2]; + struct msghdr msg = {0}; + struct iovec iov = {0}; + + fd = vsock_seqpacket_accept(VMADDR_CID_ANY, opts->peer_port, NULL); + if (fd < 0) { + perror("accept"); + exit(EXIT_FAILURE); + } + + control_expectln("SENDDONE"); + iov.iov_base = buf; + iov.iov_len = sizeof(buf); + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + + ssize_t ret = recvmsg(fd, &msg, MSG_TRUNC); + + if (ret != MESSAGE_TRUNC_SZ) { + printf("%zi\n", ret); + perror("MSG_TRUNC doesn't work"); + exit(EXIT_FAILURE); + } + + if (!(msg.msg_flags & MSG_TRUNC)) { + fprintf(stderr, "MSG_TRUNC expected\n"); + exit(EXIT_FAILURE); + } + + close(fd); +} + +static time_t current_nsec(void) +{ + struct timespec ts; + + if (clock_gettime(CLOCK_REALTIME, &ts)) { + perror("clock_gettime(3) failed"); + exit(EXIT_FAILURE); + } + + return (ts.tv_sec * NSEC_PER_SEC) + ts.tv_nsec; +} + +#define RCVTIMEO_TIMEOUT_SEC 1 +#define READ_OVERHEAD_NSEC 250000000 /* 0.25 sec */ + +static void test_seqpacket_timeout_client(const struct test_opts *opts) +{ + int fd; + struct timeval tv; + char dummy; + time_t read_enter_ns; + time_t read_overhead_ns; + + fd = vsock_seqpacket_connect(opts->peer_cid, opts->peer_port); + if (fd < 0) { + perror("connect"); + exit(EXIT_FAILURE); + } + + tv.tv_sec = RCVTIMEO_TIMEOUT_SEC; + tv.tv_usec = 0; + + setsockopt_timeval_check(fd, SOL_SOCKET, SO_RCVTIMEO, tv, + "setsockopt(SO_RCVTIMEO)"); + + read_enter_ns = current_nsec(); + + if (read(fd, &dummy, sizeof(dummy)) != -1) { + fprintf(stderr, + "expected 'dummy' read(2) failure\n"); + exit(EXIT_FAILURE); + } + + if (errno != EAGAIN) { + perror("EAGAIN expected"); + exit(EXIT_FAILURE); + } + + read_overhead_ns = current_nsec() - read_enter_ns - + NSEC_PER_SEC * RCVTIMEO_TIMEOUT_SEC; + + if (read_overhead_ns > READ_OVERHEAD_NSEC) { + fprintf(stderr, + "too much time in read(2), %lu > %i ns\n", + read_overhead_ns, READ_OVERHEAD_NSEC); + exit(EXIT_FAILURE); + } + + control_writeln("WAITDONE"); + close(fd); +} + +static void test_seqpacket_timeout_server(const struct test_opts *opts) +{ int fd; - fd = vsock_stream_connect(opts->peer_cid, 1234); + fd = vsock_seqpacket_accept(VMADDR_CID_ANY, opts->peer_port, NULL); + if (fd < 0) { + perror("accept"); + exit(EXIT_FAILURE); + } + + control_expectln("WAITDONE"); + close(fd); +} + +static void test_seqpacket_bigmsg_client(const struct test_opts *opts) +{ + unsigned long long sock_buf_size; + size_t buf_size; + socklen_t len; + void *data; + int fd; + + len = sizeof(sock_buf_size); + + fd = vsock_seqpacket_connect(opts->peer_cid, opts->peer_port); if (fd < 0) { perror("connect"); exit(EXIT_FAILURE); } + if (getsockopt(fd, AF_VSOCK, SO_VM_SOCKETS_BUFFER_SIZE, + &sock_buf_size, &len)) { + perror("getsockopt"); + exit(EXIT_FAILURE); + } + + sock_buf_size++; + + /* size_t can be < unsigned long long */ + buf_size = (size_t)sock_buf_size; + if (buf_size != sock_buf_size) { + fprintf(stderr, "Returned BUFFER_SIZE too large\n"); + exit(EXIT_FAILURE); + } + + data = malloc(buf_size); + if (!data) { + perror("malloc"); + exit(EXIT_FAILURE); + } + + send_buf(fd, data, buf_size, 0, -EMSGSIZE); + + control_writeln("CLISENT"); + + free(data); + close(fd); +} + +static void test_seqpacket_bigmsg_server(const struct test_opts *opts) +{ + int fd; + + fd = vsock_seqpacket_accept(VMADDR_CID_ANY, opts->peer_port, NULL); + if (fd < 0) { + perror("accept"); + exit(EXIT_FAILURE); + } + + control_expectln("CLISENT"); + + close(fd); +} + +#define BUF_PATTERN_1 'a' +#define BUF_PATTERN_2 'b' + +static void test_seqpacket_invalid_rec_buffer_client(const struct test_opts *opts) +{ + int fd; + unsigned char *buf1; + unsigned char *buf2; + int buf_size = getpagesize() * 3; + + fd = vsock_seqpacket_connect(opts->peer_cid, opts->peer_port); + if (fd < 0) { + perror("connect"); + exit(EXIT_FAILURE); + } + + buf1 = malloc(buf_size); + if (!buf1) { + perror("'malloc()' for 'buf1'"); + exit(EXIT_FAILURE); + } + + buf2 = malloc(buf_size); + if (!buf2) { + perror("'malloc()' for 'buf2'"); + exit(EXIT_FAILURE); + } + + memset(buf1, BUF_PATTERN_1, buf_size); + memset(buf2, BUF_PATTERN_2, buf_size); + + send_buf(fd, buf1, buf_size, 0, buf_size); + + send_buf(fd, buf2, buf_size, 0, buf_size); + + close(fd); +} + +static void test_seqpacket_invalid_rec_buffer_server(const struct test_opts *opts) +{ + int fd; + unsigned char *broken_buf; + unsigned char *valid_buf; + int page_size = getpagesize(); + int buf_size = page_size * 3; + ssize_t res; + int prot = PROT_READ | PROT_WRITE; + int flags = MAP_PRIVATE | MAP_ANONYMOUS; + int i; + + fd = vsock_seqpacket_accept(VMADDR_CID_ANY, opts->peer_port, NULL); + if (fd < 0) { + perror("accept"); + exit(EXIT_FAILURE); + } + + /* Setup first buffer. */ + broken_buf = mmap(NULL, buf_size, prot, flags, -1, 0); + if (broken_buf == MAP_FAILED) { + perror("mmap for 'broken_buf'"); + exit(EXIT_FAILURE); + } + + /* Unmap "hole" in buffer. */ + if (munmap(broken_buf + page_size, page_size)) { + perror("'broken_buf' setup"); + exit(EXIT_FAILURE); + } + + valid_buf = mmap(NULL, buf_size, prot, flags, -1, 0); + if (valid_buf == MAP_FAILED) { + perror("mmap for 'valid_buf'"); + exit(EXIT_FAILURE); + } + + /* Try to fill buffer with unmapped middle. */ + res = read(fd, broken_buf, buf_size); + if (res != -1) { + fprintf(stderr, + "expected 'broken_buf' read(2) failure, got %zi\n", + res); + exit(EXIT_FAILURE); + } + + if (errno != EFAULT) { + perror("unexpected errno of 'broken_buf'"); + exit(EXIT_FAILURE); + } + + /* Try to fill valid buffer. */ + res = read(fd, valid_buf, buf_size); + if (res < 0) { + perror("unexpected 'valid_buf' read(2) failure"); + exit(EXIT_FAILURE); + } + + if (res != buf_size) { + fprintf(stderr, + "invalid 'valid_buf' read(2), expected %i, got %zi\n", + buf_size, res); + exit(EXIT_FAILURE); + } + + for (i = 0; i < buf_size; i++) { + if (valid_buf[i] != BUF_PATTERN_2) { + fprintf(stderr, + "invalid pattern for 'valid_buf' at %i, expected %hhX, got %hhX\n", + i, BUF_PATTERN_2, valid_buf[i]); + exit(EXIT_FAILURE); + } + } + + /* Unmap buffers. */ + munmap(broken_buf, page_size); + munmap(broken_buf + page_size * 2, page_size); + munmap(valid_buf, buf_size); + close(fd); +} + +#define RCVLOWAT_BUF_SIZE 128 + +static void test_stream_poll_rcvlowat_server(const struct test_opts *opts) +{ + int fd; + int i; + + fd = vsock_stream_accept(VMADDR_CID_ANY, opts->peer_port, NULL); + if (fd < 0) { + perror("accept"); + exit(EXIT_FAILURE); + } + + /* Send 1 byte. */ send_byte(fd, 1, 0); + + control_writeln("SRVSENT"); + + /* Wait until client is ready to receive rest of data. */ + control_expectln("CLNSENT"); + + for (i = 0; i < RCVLOWAT_BUF_SIZE - 1; i++) + send_byte(fd, 1, 0); + + /* Keep socket in active state. */ + control_expectln("POLLDONE"); + close(fd); } -static void test_stream_msg_peek_server(const struct test_opts *opts) +static void test_stream_poll_rcvlowat_client(const struct test_opts *opts) +{ + int lowat_val = RCVLOWAT_BUF_SIZE; + char buf[RCVLOWAT_BUF_SIZE]; + struct pollfd fds; + short poll_flags; + int fd; + + fd = vsock_stream_connect(opts->peer_cid, opts->peer_port); + if (fd < 0) { + perror("connect"); + exit(EXIT_FAILURE); + } + + setsockopt_int_check(fd, SOL_SOCKET, SO_RCVLOWAT, + lowat_val, "setsockopt(SO_RCVLOWAT)"); + + control_expectln("SRVSENT"); + + /* At this point, server sent 1 byte. */ + fds.fd = fd; + poll_flags = POLLIN | POLLRDNORM; + fds.events = poll_flags; + + /* Try to wait for 1 sec. */ + if (poll(&fds, 1, 1000) < 0) { + perror("poll"); + exit(EXIT_FAILURE); + } + + /* poll() must return nothing. */ + if (fds.revents) { + fprintf(stderr, "Unexpected poll result %hx\n", + fds.revents); + exit(EXIT_FAILURE); + } + + /* Tell server to send rest of data. */ + control_writeln("CLNSENT"); + + /* Poll for data. */ + if (poll(&fds, 1, 10000) < 0) { + perror("poll"); + exit(EXIT_FAILURE); + } + + /* Only these two bits are expected. */ + if (fds.revents != poll_flags) { + fprintf(stderr, "Unexpected poll result %hx\n", + fds.revents); + exit(EXIT_FAILURE); + } + + /* Use MSG_DONTWAIT, if call is going to wait, EAGAIN + * will be returned. + */ + recv_buf(fd, buf, sizeof(buf), MSG_DONTWAIT, RCVLOWAT_BUF_SIZE); + + control_writeln("POLLDONE"); + + close(fd); +} + +#define INV_BUF_TEST_DATA_LEN 512 + +static void test_inv_buf_client(const struct test_opts *opts, bool stream) +{ + unsigned char data[INV_BUF_TEST_DATA_LEN] = {0}; + ssize_t expected_ret; + int fd; + + if (stream) + fd = vsock_stream_connect(opts->peer_cid, opts->peer_port); + else + fd = vsock_seqpacket_connect(opts->peer_cid, opts->peer_port); + + if (fd < 0) { + perror("connect"); + exit(EXIT_FAILURE); + } + + control_expectln("SENDDONE"); + + /* Use invalid buffer here. */ + recv_buf(fd, NULL, sizeof(data), 0, -EFAULT); + + if (stream) { + /* For SOCK_STREAM we must continue reading. */ + expected_ret = sizeof(data); + } else { + /* For SOCK_SEQPACKET socket's queue must be empty. */ + expected_ret = -EAGAIN; + } + + recv_buf(fd, data, sizeof(data), MSG_DONTWAIT, expected_ret); + + control_writeln("DONE"); + + close(fd); +} + +static void test_inv_buf_server(const struct test_opts *opts, bool stream) +{ + unsigned char data[INV_BUF_TEST_DATA_LEN] = {0}; + int fd; + + if (stream) + fd = vsock_stream_accept(VMADDR_CID_ANY, opts->peer_port, NULL); + else + fd = vsock_seqpacket_accept(VMADDR_CID_ANY, opts->peer_port, NULL); + + if (fd < 0) { + perror("accept"); + exit(EXIT_FAILURE); + } + + send_buf(fd, data, sizeof(data), 0, sizeof(data)); + + control_writeln("SENDDONE"); + + control_expectln("DONE"); + + close(fd); +} + +static void test_stream_inv_buf_client(const struct test_opts *opts) +{ + test_inv_buf_client(opts, true); +} + +static void test_stream_inv_buf_server(const struct test_opts *opts) +{ + test_inv_buf_server(opts, true); +} + +static void test_seqpacket_inv_buf_client(const struct test_opts *opts) +{ + test_inv_buf_client(opts, false); +} + +static void test_seqpacket_inv_buf_server(const struct test_opts *opts) +{ + test_inv_buf_server(opts, false); +} + +#define HELLO_STR "HELLO" +#define WORLD_STR "WORLD" + +static void test_stream_virtio_skb_merge_client(const struct test_opts *opts) +{ + int fd; + + fd = vsock_stream_connect(opts->peer_cid, opts->peer_port); + if (fd < 0) { + perror("connect"); + exit(EXIT_FAILURE); + } + + /* Send first skbuff. */ + send_buf(fd, HELLO_STR, strlen(HELLO_STR), 0, strlen(HELLO_STR)); + + control_writeln("SEND0"); + /* Peer reads part of first skbuff. */ + control_expectln("REPLY0"); + + /* Send second skbuff, it will be appended to the first. */ + send_buf(fd, WORLD_STR, strlen(WORLD_STR), 0, strlen(WORLD_STR)); + + control_writeln("SEND1"); + /* Peer reads merged skbuff packet. */ + control_expectln("REPLY1"); + + close(fd); +} + +static void test_stream_virtio_skb_merge_server(const struct test_opts *opts) +{ + size_t read = 0, to_read; + unsigned char buf[64]; + int fd; + + fd = vsock_stream_accept(VMADDR_CID_ANY, opts->peer_port, NULL); + if (fd < 0) { + perror("accept"); + exit(EXIT_FAILURE); + } + + control_expectln("SEND0"); + + /* Read skbuff partially. */ + to_read = 2; + recv_buf(fd, buf + read, to_read, 0, to_read); + read += to_read; + + control_writeln("REPLY0"); + control_expectln("SEND1"); + + /* Read the rest of both buffers */ + to_read = strlen(HELLO_STR WORLD_STR) - read; + recv_buf(fd, buf + read, to_read, 0, to_read); + read += to_read; + + /* No more bytes should be there */ + to_read = sizeof(buf) - read; + recv_buf(fd, buf + read, to_read, MSG_DONTWAIT, -EAGAIN); + + if (memcmp(buf, HELLO_STR WORLD_STR, strlen(HELLO_STR WORLD_STR))) { + fprintf(stderr, "pattern mismatch\n"); + exit(EXIT_FAILURE); + } + + control_writeln("REPLY1"); + + close(fd); +} + +static void test_seqpacket_msg_peek_client(const struct test_opts *opts) +{ + return test_msg_peek_client(opts, true); +} + +static void test_seqpacket_msg_peek_server(const struct test_opts *opts) +{ + return test_msg_peek_server(opts, true); +} + +static sig_atomic_t have_sigpipe; + +static void sigpipe(int signo) +{ + have_sigpipe = 1; +} + +#define SEND_SLEEP_USEC (10 * 1000) + +static void test_stream_check_sigpipe(int fd) +{ + ssize_t res; + + have_sigpipe = 0; + + /* When the other peer calls shutdown(SHUT_RD), there is a chance that + * the send() call could occur before the message carrying the close + * information arrives over the transport. In such cases, the send() + * might still succeed. To avoid this race, let's retry the send() call + * a few times, ensuring the test is more reliable. + */ + timeout_begin(TIMEOUT); + while(1) { + res = send(fd, "A", 1, 0); + if (res == -1 && errno != EINTR) + break; + + /* Sleep a little before trying again to avoid flooding the + * other peer and filling its receive buffer, causing + * false-negative. + */ + timeout_usleep(SEND_SLEEP_USEC); + timeout_check("send"); + } + timeout_end(); + + if (errno != EPIPE) { + fprintf(stderr, "unexpected send(2) errno %d\n", errno); + exit(EXIT_FAILURE); + } + if (!have_sigpipe) { + fprintf(stderr, "SIGPIPE expected\n"); + exit(EXIT_FAILURE); + } + + have_sigpipe = 0; + + timeout_begin(TIMEOUT); + while(1) { + res = send(fd, "A", 1, MSG_NOSIGNAL); + if (res == -1 && errno != EINTR) + break; + + timeout_usleep(SEND_SLEEP_USEC); + timeout_check("send"); + } + timeout_end(); + + if (errno != EPIPE) { + fprintf(stderr, "unexpected send(2) errno %d\n", errno); + exit(EXIT_FAILURE); + } + if (have_sigpipe) { + fprintf(stderr, "SIGPIPE not expected\n"); + exit(EXIT_FAILURE); + } +} + +static void test_stream_shutwr_client(const struct test_opts *opts) +{ + int fd; + + struct sigaction act = { + .sa_handler = sigpipe, + }; + + sigaction(SIGPIPE, &act, NULL); + + fd = vsock_stream_connect(opts->peer_cid, opts->peer_port); + if (fd < 0) { + perror("connect"); + exit(EXIT_FAILURE); + } + + if (shutdown(fd, SHUT_WR)) { + perror("shutdown"); + exit(EXIT_FAILURE); + } + + test_stream_check_sigpipe(fd); + + control_writeln("CLIENTDONE"); + + close(fd); +} + +static void test_stream_shutwr_server(const struct test_opts *opts) +{ + int fd; + + fd = vsock_stream_accept(VMADDR_CID_ANY, opts->peer_port, NULL); + if (fd < 0) { + perror("accept"); + exit(EXIT_FAILURE); + } + + control_expectln("CLIENTDONE"); + + close(fd); +} + +static void test_stream_shutrd_client(const struct test_opts *opts) +{ + int fd; + + struct sigaction act = { + .sa_handler = sigpipe, + }; + + sigaction(SIGPIPE, &act, NULL); + + fd = vsock_stream_connect(opts->peer_cid, opts->peer_port); + if (fd < 0) { + perror("connect"); + exit(EXIT_FAILURE); + } + + control_expectln("SHUTRDDONE"); + + test_stream_check_sigpipe(fd); + + control_writeln("CLIENTDONE"); + + close(fd); +} + +static void test_stream_shutrd_server(const struct test_opts *opts) +{ + int fd; + + fd = vsock_stream_accept(VMADDR_CID_ANY, opts->peer_port, NULL); + if (fd < 0) { + perror("accept"); + exit(EXIT_FAILURE); + } + + if (shutdown(fd, SHUT_RD)) { + perror("shutdown"); + exit(EXIT_FAILURE); + } + + control_writeln("SHUTRDDONE"); + control_expectln("CLIENTDONE"); + + close(fd); +} + +static void test_double_bind_connect_server(const struct test_opts *opts) +{ + int listen_fd, client_fd, i; + struct sockaddr_vm sa_client; + socklen_t socklen_client = sizeof(sa_client); + + listen_fd = vsock_stream_listen(VMADDR_CID_ANY, opts->peer_port); + + for (i = 0; i < 2; i++) { + control_writeln("LISTENING"); + + timeout_begin(TIMEOUT); + do { + client_fd = accept(listen_fd, (struct sockaddr *)&sa_client, + &socklen_client); + timeout_check("accept"); + } while (client_fd < 0 && errno == EINTR); + timeout_end(); + + if (client_fd < 0) { + perror("accept"); + exit(EXIT_FAILURE); + } + + /* Waiting for remote peer to close connection */ + vsock_wait_remote_close(client_fd); + } + + close(listen_fd); +} + +static void test_double_bind_connect_client(const struct test_opts *opts) +{ + int i, client_fd; + + for (i = 0; i < 2; i++) { + /* Wait until server is ready to accept a new connection */ + control_expectln("LISTENING"); + + /* We use 'peer_port + 1' as "some" port for the 'bind()' + * call. It is safe for overflow, but must be considered, + * when running multiple test applications simultaneously + * where 'peer-port' argument differs by 1. + */ + client_fd = vsock_bind_connect(opts->peer_cid, opts->peer_port, + opts->peer_port + 1, SOCK_STREAM); + + close(client_fd); + } +} + +#define MSG_BUF_IOCTL_LEN 64 +static void test_unsent_bytes_server(const struct test_opts *opts, int type) +{ + unsigned char buf[MSG_BUF_IOCTL_LEN]; + int client_fd; + + client_fd = vsock_accept(VMADDR_CID_ANY, opts->peer_port, NULL, type); + if (client_fd < 0) { + perror("accept"); + exit(EXIT_FAILURE); + } + + recv_buf(client_fd, buf, sizeof(buf), 0, sizeof(buf)); + control_writeln("RECEIVED"); + + close(client_fd); +} + +static void test_unsent_bytes_client(const struct test_opts *opts, int type) +{ + unsigned char buf[MSG_BUF_IOCTL_LEN]; + int fd; + + fd = vsock_connect(opts->peer_cid, opts->peer_port, type); + if (fd < 0) { + perror("connect"); + exit(EXIT_FAILURE); + } + + for (int i = 0; i < sizeof(buf); i++) + buf[i] = rand() & 0xFF; + + send_buf(fd, buf, sizeof(buf), 0, sizeof(buf)); + control_expectln("RECEIVED"); + + /* SIOCOUTQ isn't guaranteed to instantly track sent data. Even though + * the "RECEIVED" message means that the other side has received the + * data, there can be a delay in our kernel before updating the "unsent + * bytes" counter. vsock_wait_sent() will repeat SIOCOUTQ until it + * returns 0. + */ + if (!vsock_wait_sent(fd)) + fprintf(stderr, "Test skipped, SIOCOUTQ not supported.\n"); + + close(fd); +} + +static void test_stream_unsent_bytes_client(const struct test_opts *opts) +{ + test_unsent_bytes_client(opts, SOCK_STREAM); +} + +static void test_stream_unsent_bytes_server(const struct test_opts *opts) +{ + test_unsent_bytes_server(opts, SOCK_STREAM); +} + +static void test_seqpacket_unsent_bytes_client(const struct test_opts *opts) +{ + test_unsent_bytes_client(opts, SOCK_SEQPACKET); +} + +static void test_seqpacket_unsent_bytes_server(const struct test_opts *opts) +{ + test_unsent_bytes_server(opts, SOCK_SEQPACKET); +} + +#define RCVLOWAT_CREDIT_UPD_BUF_SIZE (1024 * 128) +/* This define is the same as in 'include/linux/virtio_vsock.h': + * it is used to decide when to send credit update message during + * reading from rx queue of a socket. Value and its usage in + * kernel is important for this test. + */ +#define VIRTIO_VSOCK_MAX_PKT_BUF_SIZE (1024 * 64) + +static void test_stream_rcvlowat_def_cred_upd_client(const struct test_opts *opts) +{ + size_t buf_size; + void *buf; + int fd; + + fd = vsock_stream_connect(opts->peer_cid, opts->peer_port); + if (fd < 0) { + perror("connect"); + exit(EXIT_FAILURE); + } + + /* Send 1 byte more than peer's buffer size. */ + buf_size = RCVLOWAT_CREDIT_UPD_BUF_SIZE + 1; + + buf = malloc(buf_size); + if (!buf) { + perror("malloc"); + exit(EXIT_FAILURE); + } + + /* Wait until peer sets needed buffer size. */ + recv_byte(fd, 1, 0); + + if (send(fd, buf, buf_size, 0) != buf_size) { + perror("send failed"); + exit(EXIT_FAILURE); + } + + free(buf); + close(fd); +} + +static void test_stream_credit_update_test(const struct test_opts *opts, + bool low_rx_bytes_test) +{ + int recv_buf_size; + struct pollfd fds; + size_t buf_size; + unsigned long long sock_buf_size; + void *buf; + int fd; + + fd = vsock_stream_accept(VMADDR_CID_ANY, opts->peer_port, NULL); + if (fd < 0) { + perror("accept"); + exit(EXIT_FAILURE); + } + + buf_size = RCVLOWAT_CREDIT_UPD_BUF_SIZE; + + /* size_t can be < unsigned long long */ + sock_buf_size = buf_size; + + setsockopt_ull_check(fd, AF_VSOCK, SO_VM_SOCKETS_BUFFER_SIZE, + sock_buf_size, + "setsockopt(SO_VM_SOCKETS_BUFFER_SIZE)"); + + if (low_rx_bytes_test) { + /* Set new SO_RCVLOWAT here. This enables sending credit + * update when number of bytes if our rx queue become < + * SO_RCVLOWAT value. + */ + recv_buf_size = 1 + VIRTIO_VSOCK_MAX_PKT_BUF_SIZE; + + setsockopt_int_check(fd, SOL_SOCKET, SO_RCVLOWAT, + recv_buf_size, "setsockopt(SO_RCVLOWAT)"); + } + + /* Send one dummy byte here, because 'setsockopt()' above also + * sends special packet which tells sender to update our buffer + * size. This 'send_byte()' will serialize such packet with data + * reads in a loop below. Sender starts transmission only when + * it receives this single byte. + */ + send_byte(fd, 1, 0); + + buf = malloc(buf_size); + if (!buf) { + perror("malloc"); + exit(EXIT_FAILURE); + } + + /* Wait until there will be 128KB of data in rx queue. */ + while (1) { + ssize_t res; + + res = recv(fd, buf, buf_size, MSG_PEEK); + if (res == buf_size) + break; + + if (res <= 0) { + fprintf(stderr, "unexpected 'recv()' return: %zi\n", res); + exit(EXIT_FAILURE); + } + } + + /* There is 128KB of data in the socket's rx queue, dequeue first + * 64KB, credit update is sent if 'low_rx_bytes_test' == true. + * Otherwise, credit update is sent in 'if (!low_rx_bytes_test)'. + */ + recv_buf_size = VIRTIO_VSOCK_MAX_PKT_BUF_SIZE; + recv_buf(fd, buf, recv_buf_size, 0, recv_buf_size); + + if (!low_rx_bytes_test) { + recv_buf_size++; + + /* Updating SO_RCVLOWAT will send credit update. */ + setsockopt_int_check(fd, SOL_SOCKET, SO_RCVLOWAT, + recv_buf_size, "setsockopt(SO_RCVLOWAT)"); + } + + fds.fd = fd; + fds.events = POLLIN | POLLRDNORM | POLLERR | + POLLRDHUP | POLLHUP; + + /* This 'poll()' will return once we receive last byte + * sent by client. + */ + if (poll(&fds, 1, -1) < 0) { + perror("poll"); + exit(EXIT_FAILURE); + } + + if (fds.revents & POLLERR) { + fprintf(stderr, "'poll()' error\n"); + exit(EXIT_FAILURE); + } + + if (fds.revents & (POLLIN | POLLRDNORM)) { + recv_buf(fd, buf, recv_buf_size, MSG_DONTWAIT, recv_buf_size); + } else { + /* These flags must be set, as there is at + * least 64KB of data ready to read. + */ + fprintf(stderr, "POLLIN | POLLRDNORM expected\n"); + exit(EXIT_FAILURE); + } + + free(buf); + close(fd); +} + +static void test_stream_cred_upd_on_low_rx_bytes(const struct test_opts *opts) +{ + test_stream_credit_update_test(opts, true); +} + +static void test_stream_cred_upd_on_set_rcvlowat(const struct test_opts *opts) +{ + test_stream_credit_update_test(opts, false); +} + +/* The goal of test leak_acceptq is to stress the race between connect() and + * close(listener). Implementation of client/server loops boils down to: + * + * client server + * ------ ------ + * write(CONTINUE) + * expect(CONTINUE) + * listen() + * write(LISTENING) + * expect(LISTENING) + * connect() close() + */ +#define ACCEPTQ_LEAK_RACE_TIMEOUT 2 /* seconds */ + +static void test_stream_leak_acceptq_client(const struct test_opts *opts) +{ + time_t tout; + int fd; + + tout = current_nsec() + ACCEPTQ_LEAK_RACE_TIMEOUT * NSEC_PER_SEC; + do { + control_writeulong(CONTROL_CONTINUE); + + fd = vsock_stream_connect(opts->peer_cid, opts->peer_port); + if (fd >= 0) + close(fd); + } while (current_nsec() < tout); + + control_writeulong(CONTROL_DONE); +} + +/* Test for a memory leak. User is expected to run kmemleak scan, see README. */ +static void test_stream_leak_acceptq_server(const struct test_opts *opts) { int fd; - fd = vsock_stream_accept(VMADDR_CID_ANY, 1234, NULL); + while (control_readulong() == CONTROL_CONTINUE) { + fd = vsock_stream_listen(VMADDR_CID_ANY, opts->peer_port); + control_writeln("LISTENING"); + close(fd); + } +} + +/* Test for a memory leak. User is expected to run kmemleak scan, see README. */ +static void test_stream_msgzcopy_leak_errq_client(const struct test_opts *opts) +{ + struct pollfd fds = { 0 }; + int fd; + + fd = vsock_stream_connect(opts->peer_cid, opts->peer_port); + if (fd < 0) { + perror("connect"); + exit(EXIT_FAILURE); + } + + enable_so_zerocopy_check(fd); + send_byte(fd, 1, MSG_ZEROCOPY); + + fds.fd = fd; + fds.events = 0; + if (poll(&fds, 1, -1) < 0) { + perror("poll"); + exit(EXIT_FAILURE); + } + + close(fd); +} + +static void test_stream_msgzcopy_leak_errq_server(const struct test_opts *opts) +{ + int fd; + + fd = vsock_stream_accept(VMADDR_CID_ANY, opts->peer_port, NULL); if (fd < 0) { perror("accept"); exit(EXIT_FAILURE); } - recv_byte(fd, 1, MSG_PEEK); recv_byte(fd, 1, 0); + vsock_wait_remote_close(fd); + close(fd); +} + +/* Test msgzcopy_leak_zcskb is meant to exercise sendmsg() error handling path, + * that might leak an skb. The idea is to fail virtio_transport_init_zcopy_skb() + * by hitting net.core.optmem_max limit in sock_omalloc(), specifically + * + * vsock_connectible_sendmsg + * virtio_transport_stream_enqueue + * virtio_transport_send_pkt_info + * virtio_transport_init_zcopy_skb + * . msg_zerocopy_realloc + * . msg_zerocopy_alloc + * . sock_omalloc + * . sk_omem_alloc + size > sysctl_optmem_max + * return -ENOMEM + * + * We abuse the implementation detail of net/socket.c:____sys_sendmsg(). + * sk_omem_alloc can be precisely bumped by sock_kmalloc(), as it is used to + * fetch user-provided control data. + * + * While this approach works for now, it relies on assumptions regarding the + * implementation and configuration (for example, order of net.core.optmem_max + * can not exceed MAX_PAGE_ORDER), which may not hold in the future. A more + * resilient testing could be implemented by leveraging the Fault injection + * framework (CONFIG_FAULT_INJECTION), e.g. + * + * client# echo N > /sys/kernel/debug/failslab/ignore-gfp-wait + * client# echo 0 > /sys/kernel/debug/failslab/verbose + * + * void client(const struct test_opts *opts) + * { + * char buf[16]; + * int f, s, i; + * + * f = open("/proc/self/fail-nth", O_WRONLY); + * + * for (i = 1; i < 32; i++) { + * control_writeulong(CONTROL_CONTINUE); + * + * s = vsock_stream_connect(opts->peer_cid, opts->peer_port); + * enable_so_zerocopy_check(s); + * + * sprintf(buf, "%d", i); + * write(f, buf, strlen(buf)); + * + * send(s, &(char){ 0 }, 1, MSG_ZEROCOPY); + * + * write(f, "0", 1); + * close(s); + * } + * + * control_writeulong(CONTROL_DONE); + * close(f); + * } + * + * void server(const struct test_opts *opts) + * { + * int fd; + * + * while (control_readulong() == CONTROL_CONTINUE) { + * fd = vsock_stream_accept(VMADDR_CID_ANY, opts->peer_port, NULL); + * vsock_wait_remote_close(fd); + * close(fd); + * } + * } + * + * Refer to Documentation/fault-injection/fault-injection.rst. + */ +#define MAX_PAGE_ORDER 10 /* usually */ +#define PAGE_SIZE 4096 + +/* Test for a memory leak. User is expected to run kmemleak scan, see README. */ +static void test_stream_msgzcopy_leak_zcskb_client(const struct test_opts *opts) +{ + size_t optmem_max, ctl_len, chunk_size; + struct msghdr msg = { 0 }; + struct iovec iov; + char *chunk; + int fd, res; + FILE *f; + + f = fopen("/proc/sys/net/core/optmem_max", "r"); + if (!f) { + perror("fopen(optmem_max)"); + exit(EXIT_FAILURE); + } + + if (fscanf(f, "%zu", &optmem_max) != 1) { + fprintf(stderr, "fscanf(optmem_max) failed\n"); + exit(EXIT_FAILURE); + } + + fclose(f); + + fd = vsock_stream_connect(opts->peer_cid, opts->peer_port); + if (fd < 0) { + perror("connect"); + exit(EXIT_FAILURE); + } + + enable_so_zerocopy_check(fd); + + ctl_len = optmem_max - 1; + if (ctl_len > PAGE_SIZE << MAX_PAGE_ORDER) { + fprintf(stderr, "Try with net.core.optmem_max = 100000\n"); + exit(EXIT_FAILURE); + } + + chunk_size = CMSG_SPACE(ctl_len); + chunk = malloc(chunk_size); + if (!chunk) { + perror("malloc"); + exit(EXIT_FAILURE); + } + memset(chunk, 0, chunk_size); + + iov.iov_base = &(char){ 0 }; + iov.iov_len = 1; + + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + msg.msg_control = chunk; + msg.msg_controllen = ctl_len; + + errno = 0; + res = sendmsg(fd, &msg, MSG_ZEROCOPY); + if (res >= 0 || errno != ENOMEM) { + fprintf(stderr, "Expected ENOMEM, got errno=%d res=%d\n", + errno, res); + exit(EXIT_FAILURE); + } + + close(fd); +} + +static void test_stream_msgzcopy_leak_zcskb_server(const struct test_opts *opts) +{ + int fd; + + fd = vsock_stream_accept(VMADDR_CID_ANY, opts->peer_port, NULL); + if (fd < 0) { + perror("accept"); + exit(EXIT_FAILURE); + } + + vsock_wait_remote_close(fd); + close(fd); +} + +#define MAX_PORT_RETRIES 24 /* net/vmw_vsock/af_vsock.c */ + +/* Test attempts to trigger a transport release for an unbound socket. This can + * lead to a reference count mishandling. + */ +static void test_stream_transport_uaf_client(const struct test_opts *opts) +{ + int sockets[MAX_PORT_RETRIES]; + struct sockaddr_vm addr; + int fd, i, alen; + + fd = vsock_bind(VMADDR_CID_ANY, VMADDR_PORT_ANY, SOCK_STREAM); + + alen = sizeof(addr); + if (getsockname(fd, (struct sockaddr *)&addr, &alen)) { + perror("getsockname"); + exit(EXIT_FAILURE); + } + + for (i = 0; i < MAX_PORT_RETRIES; ++i) + sockets[i] = vsock_bind(VMADDR_CID_ANY, ++addr.svm_port, + SOCK_STREAM); + + close(fd); + fd = socket(AF_VSOCK, SOCK_STREAM, 0); + if (fd < 0) { + perror("socket"); + exit(EXIT_FAILURE); + } + + if (!vsock_connect_fd(fd, addr.svm_cid, addr.svm_port)) { + perror("Unexpected connect() #1 success"); + exit(EXIT_FAILURE); + } + + /* Vulnerable system may crash now. */ + if (!vsock_connect_fd(fd, VMADDR_CID_HOST, VMADDR_PORT_ANY)) { + perror("Unexpected connect() #2 success"); + exit(EXIT_FAILURE); + } + + close(fd); + while (i--) + close(sockets[i]); + + control_writeln("DONE"); +} + +static void test_stream_transport_uaf_server(const struct test_opts *opts) +{ + control_expectln("DONE"); +} + +static void test_stream_connect_retry_client(const struct test_opts *opts) +{ + int fd; + + fd = socket(AF_VSOCK, SOCK_STREAM, 0); + if (fd < 0) { + perror("socket"); + exit(EXIT_FAILURE); + } + + if (!vsock_connect_fd(fd, opts->peer_cid, opts->peer_port)) { + fprintf(stderr, "Unexpected connect() #1 success\n"); + exit(EXIT_FAILURE); + } + + control_writeln("LISTEN"); + control_expectln("LISTENING"); + + if (vsock_connect_fd(fd, opts->peer_cid, opts->peer_port)) { + perror("connect() #2"); + exit(EXIT_FAILURE); + } + + close(fd); +} + +static void test_stream_connect_retry_server(const struct test_opts *opts) +{ + int fd; + + control_expectln("LISTEN"); + + fd = vsock_stream_accept(VMADDR_CID_ANY, opts->peer_port, NULL); + if (fd < 0) { + perror("accept"); + exit(EXIT_FAILURE); + } + + vsock_wait_remote_close(fd); + close(fd); +} + +static void test_stream_linger_client(const struct test_opts *opts) +{ + int fd; + + fd = vsock_stream_connect(opts->peer_cid, opts->peer_port); + if (fd < 0) { + perror("connect"); + exit(EXIT_FAILURE); + } + + enable_so_linger(fd, 1); + close(fd); +} + +static void test_stream_linger_server(const struct test_opts *opts) +{ + int fd; + + fd = vsock_stream_accept(VMADDR_CID_ANY, opts->peer_port, NULL); + if (fd < 0) { + perror("accept"); + exit(EXIT_FAILURE); + } + + vsock_wait_remote_close(fd); + close(fd); +} + +/* Half of the default to not risk timing out the control channel */ +#define LINGER_TIMEOUT (TIMEOUT / 2) + +static void test_stream_nolinger_client(const struct test_opts *opts) +{ + bool waited; + time_t ns; + int fd; + + fd = vsock_stream_connect(opts->peer_cid, opts->peer_port); + if (fd < 0) { + perror("connect"); + exit(EXIT_FAILURE); + } + + enable_so_linger(fd, LINGER_TIMEOUT); + send_byte(fd, 1, 0); /* Left unread to expose incorrect behaviour. */ + waited = vsock_wait_sent(fd); + + ns = current_nsec(); + close(fd); + ns = current_nsec() - ns; + + if (!waited) { + fprintf(stderr, "Test skipped, SIOCOUTQ not supported.\n"); + } else if (DIV_ROUND_UP(ns, NSEC_PER_SEC) >= LINGER_TIMEOUT) { + fprintf(stderr, "Unexpected lingering\n"); + exit(EXIT_FAILURE); + } + + control_writeln("DONE"); +} + +static void test_stream_nolinger_server(const struct test_opts *opts) +{ + int fd; + + fd = vsock_stream_accept(VMADDR_CID_ANY, opts->peer_port, NULL); + if (fd < 0) { + perror("accept"); + exit(EXIT_FAILURE); + } + + control_expectln("DONE"); close(fd); } @@ -309,6 +1916,141 @@ static struct test_case test_cases[] = { .run_client = test_stream_msg_peek_client, .run_server = test_stream_msg_peek_server, }, + { + .name = "SOCK_SEQPACKET msg bounds", + .run_client = test_seqpacket_msg_bounds_client, + .run_server = test_seqpacket_msg_bounds_server, + }, + { + .name = "SOCK_SEQPACKET MSG_TRUNC flag", + .run_client = test_seqpacket_msg_trunc_client, + .run_server = test_seqpacket_msg_trunc_server, + }, + { + .name = "SOCK_SEQPACKET timeout", + .run_client = test_seqpacket_timeout_client, + .run_server = test_seqpacket_timeout_server, + }, + { + .name = "SOCK_SEQPACKET invalid receive buffer", + .run_client = test_seqpacket_invalid_rec_buffer_client, + .run_server = test_seqpacket_invalid_rec_buffer_server, + }, + { + .name = "SOCK_STREAM poll() + SO_RCVLOWAT", + .run_client = test_stream_poll_rcvlowat_client, + .run_server = test_stream_poll_rcvlowat_server, + }, + { + .name = "SOCK_SEQPACKET big message", + .run_client = test_seqpacket_bigmsg_client, + .run_server = test_seqpacket_bigmsg_server, + }, + { + .name = "SOCK_STREAM test invalid buffer", + .run_client = test_stream_inv_buf_client, + .run_server = test_stream_inv_buf_server, + }, + { + .name = "SOCK_SEQPACKET test invalid buffer", + .run_client = test_seqpacket_inv_buf_client, + .run_server = test_seqpacket_inv_buf_server, + }, + { + .name = "SOCK_STREAM virtio skb merge", + .run_client = test_stream_virtio_skb_merge_client, + .run_server = test_stream_virtio_skb_merge_server, + }, + { + .name = "SOCK_SEQPACKET MSG_PEEK", + .run_client = test_seqpacket_msg_peek_client, + .run_server = test_seqpacket_msg_peek_server, + }, + { + .name = "SOCK_STREAM SHUT_WR", + .run_client = test_stream_shutwr_client, + .run_server = test_stream_shutwr_server, + }, + { + .name = "SOCK_STREAM SHUT_RD", + .run_client = test_stream_shutrd_client, + .run_server = test_stream_shutrd_server, + }, + { + .name = "SOCK_STREAM MSG_ZEROCOPY", + .run_client = test_stream_msgzcopy_client, + .run_server = test_stream_msgzcopy_server, + }, + { + .name = "SOCK_SEQPACKET MSG_ZEROCOPY", + .run_client = test_seqpacket_msgzcopy_client, + .run_server = test_seqpacket_msgzcopy_server, + }, + { + .name = "SOCK_STREAM MSG_ZEROCOPY empty MSG_ERRQUEUE", + .run_client = test_stream_msgzcopy_empty_errq_client, + .run_server = test_stream_msgzcopy_empty_errq_server, + }, + { + .name = "SOCK_STREAM double bind connect", + .run_client = test_double_bind_connect_client, + .run_server = test_double_bind_connect_server, + }, + { + .name = "SOCK_STREAM virtio credit update + SO_RCVLOWAT", + .run_client = test_stream_rcvlowat_def_cred_upd_client, + .run_server = test_stream_cred_upd_on_set_rcvlowat, + }, + { + .name = "SOCK_STREAM virtio credit update + low rx_bytes", + .run_client = test_stream_rcvlowat_def_cred_upd_client, + .run_server = test_stream_cred_upd_on_low_rx_bytes, + }, + { + .name = "SOCK_STREAM ioctl(SIOCOUTQ) 0 unsent bytes", + .run_client = test_stream_unsent_bytes_client, + .run_server = test_stream_unsent_bytes_server, + }, + { + .name = "SOCK_SEQPACKET ioctl(SIOCOUTQ) 0 unsent bytes", + .run_client = test_seqpacket_unsent_bytes_client, + .run_server = test_seqpacket_unsent_bytes_server, + }, + { + .name = "SOCK_STREAM leak accept queue", + .run_client = test_stream_leak_acceptq_client, + .run_server = test_stream_leak_acceptq_server, + }, + { + .name = "SOCK_STREAM MSG_ZEROCOPY leak MSG_ERRQUEUE", + .run_client = test_stream_msgzcopy_leak_errq_client, + .run_server = test_stream_msgzcopy_leak_errq_server, + }, + { + .name = "SOCK_STREAM MSG_ZEROCOPY leak completion skb", + .run_client = test_stream_msgzcopy_leak_zcskb_client, + .run_server = test_stream_msgzcopy_leak_zcskb_server, + }, + { + .name = "SOCK_STREAM transport release use-after-free", + .run_client = test_stream_transport_uaf_client, + .run_server = test_stream_transport_uaf_server, + }, + { + .name = "SOCK_STREAM retry failed connect()", + .run_client = test_stream_connect_retry_client, + .run_server = test_stream_connect_retry_server, + }, + { + .name = "SOCK_STREAM SO_LINGER null-ptr-deref", + .run_client = test_stream_linger_client, + .run_server = test_stream_linger_server, + }, + { + .name = "SOCK_STREAM SO_LINGER close() on unread", + .run_client = test_stream_nolinger_client, + .run_server = test_stream_nolinger_server, + }, {}, }; @@ -335,6 +2077,11 @@ static const struct option longopts[] = { .val = 'p', }, { + .name = "peer-port", + .has_arg = required_argument, + .val = 'q', + }, + { .name = "list", .has_arg = no_argument, .val = 'l', @@ -345,6 +2092,11 @@ static const struct option longopts[] = { .val = 's', }, { + .name = "pick", + .has_arg = required_argument, + .val = 't', + }, + { .name = "help", .has_arg = no_argument, .val = '?', @@ -354,7 +2106,7 @@ static const struct option longopts[] = { static void usage(void) { - fprintf(stderr, "Usage: vsock_test [--help] [--control-host=<host>] --control-port=<port> --mode=client|server --peer-cid=<cid> [--list] [--skip=<test_id>]\n" + fprintf(stderr, "Usage: vsock_test [--help] [--control-host=<host>] --control-port=<port> --mode=client|server --peer-cid=<cid> [--peer-port=<port>] [--list] [--skip=<test_id>]\n" "\n" " Server: vsock_test --control-port=1234 --mode=server --peer-cid=3\n" " Client: vsock_test --control-host=192.168.0.1 --control-port=1234 --mode=client --peer-cid=2\n" @@ -369,6 +2121,9 @@ static void usage(void) "connect to.\n" "\n" "The CID of the other side must be given with --peer-cid=<cid>.\n" + "During the test, two AF_VSOCK ports will be used: the port\n" + "specified with --peer-port=<port> (or the default port)\n" + "and the next one.\n" "\n" "Options:\n" " --help This help message\n" @@ -376,9 +2131,13 @@ static void usage(void) " --control-port <port> Server port to listen on/connect to\n" " --mode client|server Server or client mode\n" " --peer-cid <cid> CID of the other side\n" + " --peer-port <port> AF_VSOCK port used for the test [default: %d]\n" " --list List of tests that will be executed\n" + " --pick <test_id> Test ID to execute selectively;\n" + " use multiple --pick options to select more tests\n" " --skip <test_id> Test ID to skip;\n" - " use multiple --skip options to skip more tests\n" + " use multiple --skip options to skip more tests\n", + DEFAULT_PEER_PORT ); exit(EXIT_FAILURE); } @@ -390,8 +2149,10 @@ int main(int argc, char **argv) struct test_opts opts = { .mode = TEST_MODE_UNSET, .peer_cid = VMADDR_CID_ANY, + .peer_port = DEFAULT_PEER_PORT, }; + srand(time(NULL)); init_signals(); for (;;) { @@ -417,6 +2178,9 @@ int main(int argc, char **argv) case 'p': opts.peer_cid = parse_cid(optarg); break; + case 'q': + opts.peer_port = parse_port(optarg); + break; case 'P': control_port = optarg; break; @@ -427,6 +2191,10 @@ int main(int argc, char **argv) skip_test(test_cases, ARRAY_SIZE(test_cases) - 1, optarg); break; + case 't': + pick_test(test_cases, ARRAY_SIZE(test_cases) - 1, + optarg); + break; case '?': default: usage(); diff --git a/tools/testing/vsock/vsock_test_zerocopy.c b/tools/testing/vsock/vsock_test_zerocopy.c new file mode 100644 index 000000000000..9d9a6cb9614a --- /dev/null +++ b/tools/testing/vsock/vsock_test_zerocopy.c @@ -0,0 +1,358 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* MSG_ZEROCOPY feature tests for vsock + * + * Copyright (C) 2023 SberDevices. + * + * Author: Arseniy Krasnov <avkrasnov@salutedevices.com> + */ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/mman.h> +#include <unistd.h> +#include <poll.h> +#include <linux/errqueue.h> +#include <linux/kernel.h> +#include <errno.h> + +#include "control.h" +#include "vsock_test_zerocopy.h" +#include "msg_zerocopy_common.h" + +#ifndef PAGE_SIZE +#define PAGE_SIZE 4096 +#endif + +#define VSOCK_TEST_DATA_MAX_IOV 3 + +struct vsock_test_data { + /* This test case if for SOCK_STREAM only. */ + bool stream_only; + /* Data must be zerocopied. This field is checked against + * field 'ee_code' of the 'struct sock_extended_err', which + * contains bit to detect that zerocopy transmission was + * fallbacked to copy mode. + */ + bool zerocopied; + /* Enable SO_ZEROCOPY option on the socket. Without enabled + * SO_ZEROCOPY, every MSG_ZEROCOPY transmission will behave + * like without MSG_ZEROCOPY flag. + */ + bool so_zerocopy; + /* 'errno' after 'sendmsg()' call. */ + int sendmsg_errno; + /* Number of valid elements in 'vecs'. */ + int vecs_cnt; + struct iovec vecs[VSOCK_TEST_DATA_MAX_IOV]; +}; + +static struct vsock_test_data test_data_array[] = { + /* Last element has non-page aligned size. */ + { + .zerocopied = true, + .so_zerocopy = true, + .sendmsg_errno = 0, + .vecs_cnt = 3, + { + { NULL, PAGE_SIZE }, + { NULL, PAGE_SIZE }, + { NULL, 200 } + } + }, + /* All elements have page aligned base and size. */ + { + .zerocopied = true, + .so_zerocopy = true, + .sendmsg_errno = 0, + .vecs_cnt = 3, + { + { NULL, PAGE_SIZE }, + { NULL, PAGE_SIZE * 2 }, + { NULL, PAGE_SIZE * 3 } + } + }, + /* All elements have page aligned base and size. But + * data length is bigger than 64Kb. + */ + { + .zerocopied = true, + .so_zerocopy = true, + .sendmsg_errno = 0, + .vecs_cnt = 3, + { + { NULL, PAGE_SIZE * 16 }, + { NULL, PAGE_SIZE * 16 }, + { NULL, PAGE_SIZE * 16 } + } + }, + /* Middle element has both non-page aligned base and size. */ + { + .zerocopied = true, + .so_zerocopy = true, + .sendmsg_errno = 0, + .vecs_cnt = 3, + { + { NULL, PAGE_SIZE }, + { (void *)1, 100 }, + { NULL, PAGE_SIZE } + } + }, + /* Middle element is unmapped. */ + { + .zerocopied = false, + .so_zerocopy = true, + .sendmsg_errno = ENOMEM, + .vecs_cnt = 3, + { + { NULL, PAGE_SIZE }, + { MAP_FAILED, PAGE_SIZE }, + { NULL, PAGE_SIZE } + } + }, + /* Valid data, but SO_ZEROCOPY is off. This + * will trigger fallback to copy. + */ + { + .zerocopied = false, + .so_zerocopy = false, + .sendmsg_errno = 0, + .vecs_cnt = 1, + { + { NULL, PAGE_SIZE } + } + }, + /* Valid data, but message is bigger than peer's + * buffer, so this will trigger fallback to copy. + * This test is for SOCK_STREAM only, because + * for SOCK_SEQPACKET, 'sendmsg()' returns EMSGSIZE. + */ + { + .stream_only = true, + .zerocopied = false, + .so_zerocopy = true, + .sendmsg_errno = 0, + .vecs_cnt = 1, + { + { NULL, 100 * PAGE_SIZE } + } + }, +}; + +#define POLL_TIMEOUT_MS 100 + +static void test_client(const struct test_opts *opts, + const struct vsock_test_data *test_data, + bool sock_seqpacket) +{ + struct pollfd fds = { 0 }; + struct msghdr msg = { 0 }; + ssize_t sendmsg_res; + struct iovec *iovec; + int fd; + + if (sock_seqpacket) + fd = vsock_seqpacket_connect(opts->peer_cid, opts->peer_port); + else + fd = vsock_stream_connect(opts->peer_cid, opts->peer_port); + + if (fd < 0) { + perror("connect"); + exit(EXIT_FAILURE); + } + + if (test_data->so_zerocopy) + enable_so_zerocopy_check(fd); + + iovec = alloc_test_iovec(test_data->vecs, test_data->vecs_cnt); + + msg.msg_iov = iovec; + msg.msg_iovlen = test_data->vecs_cnt; + + errno = 0; + + sendmsg_res = sendmsg(fd, &msg, MSG_ZEROCOPY); + if (errno != test_data->sendmsg_errno) { + fprintf(stderr, "expected 'errno' == %i, got %i\n", + test_data->sendmsg_errno, errno); + exit(EXIT_FAILURE); + } + + if (!errno) { + if (sendmsg_res != iovec_bytes(iovec, test_data->vecs_cnt)) { + fprintf(stderr, "expected 'sendmsg()' == %li, got %li\n", + iovec_bytes(iovec, test_data->vecs_cnt), + sendmsg_res); + exit(EXIT_FAILURE); + } + } + + fds.fd = fd; + fds.events = 0; + + if (poll(&fds, 1, POLL_TIMEOUT_MS) < 0) { + perror("poll"); + exit(EXIT_FAILURE); + } + + if (fds.revents & POLLERR) { + vsock_recv_completion(fd, &test_data->zerocopied); + } else if (test_data->so_zerocopy && !test_data->sendmsg_errno) { + /* If we don't have data in the error queue, but + * SO_ZEROCOPY was enabled and 'sendmsg()' was + * successful - this is an error. + */ + fprintf(stderr, "POLLERR expected\n"); + exit(EXIT_FAILURE); + } + + if (!test_data->sendmsg_errno) + control_writeulong(iovec_hash_djb2(iovec, test_data->vecs_cnt)); + else + control_writeulong(0); + + control_writeln("DONE"); + free_test_iovec(test_data->vecs, iovec, test_data->vecs_cnt); + close(fd); +} + +void test_stream_msgzcopy_client(const struct test_opts *opts) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(test_data_array); i++) + test_client(opts, &test_data_array[i], false); +} + +void test_seqpacket_msgzcopy_client(const struct test_opts *opts) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(test_data_array); i++) { + if (test_data_array[i].stream_only) + continue; + + test_client(opts, &test_data_array[i], true); + } +} + +static void test_server(const struct test_opts *opts, + const struct vsock_test_data *test_data, + bool sock_seqpacket) +{ + unsigned long remote_hash; + unsigned long local_hash; + ssize_t total_bytes_rec; + unsigned char *data; + size_t data_len; + int fd; + + if (sock_seqpacket) + fd = vsock_seqpacket_accept(VMADDR_CID_ANY, opts->peer_port, NULL); + else + fd = vsock_stream_accept(VMADDR_CID_ANY, opts->peer_port, NULL); + + if (fd < 0) { + perror("accept"); + exit(EXIT_FAILURE); + } + + data_len = iovec_bytes(test_data->vecs, test_data->vecs_cnt); + + data = malloc(data_len); + if (!data) { + perror("malloc"); + exit(EXIT_FAILURE); + } + + total_bytes_rec = 0; + + while (total_bytes_rec != data_len) { + ssize_t bytes_rec; + + bytes_rec = read(fd, data + total_bytes_rec, + data_len - total_bytes_rec); + if (bytes_rec <= 0) + break; + + total_bytes_rec += bytes_rec; + } + + if (test_data->sendmsg_errno == 0) + local_hash = hash_djb2(data, data_len); + else + local_hash = 0; + + free(data); + + /* Waiting for some result. */ + remote_hash = control_readulong(); + if (remote_hash != local_hash) { + fprintf(stderr, "hash mismatch\n"); + exit(EXIT_FAILURE); + } + + control_expectln("DONE"); + close(fd); +} + +void test_stream_msgzcopy_server(const struct test_opts *opts) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(test_data_array); i++) + test_server(opts, &test_data_array[i], false); +} + +void test_seqpacket_msgzcopy_server(const struct test_opts *opts) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(test_data_array); i++) { + if (test_data_array[i].stream_only) + continue; + + test_server(opts, &test_data_array[i], true); + } +} + +void test_stream_msgzcopy_empty_errq_client(const struct test_opts *opts) +{ + struct msghdr msg = { 0 }; + char cmsg_data[128]; + ssize_t res; + int fd; + + fd = vsock_stream_connect(opts->peer_cid, opts->peer_port); + if (fd < 0) { + perror("connect"); + exit(EXIT_FAILURE); + } + + msg.msg_control = cmsg_data; + msg.msg_controllen = sizeof(cmsg_data); + + res = recvmsg(fd, &msg, MSG_ERRQUEUE); + if (res != -1) { + fprintf(stderr, "expected 'recvmsg(2)' failure, got %zi\n", + res); + exit(EXIT_FAILURE); + } + + control_writeln("DONE"); + close(fd); +} + +void test_stream_msgzcopy_empty_errq_server(const struct test_opts *opts) +{ + int fd; + + fd = vsock_stream_accept(VMADDR_CID_ANY, opts->peer_port, NULL); + if (fd < 0) { + perror("accept"); + exit(EXIT_FAILURE); + } + + control_expectln("DONE"); + close(fd); +} diff --git a/tools/testing/vsock/vsock_test_zerocopy.h b/tools/testing/vsock/vsock_test_zerocopy.h new file mode 100644 index 000000000000..3ef2579e024d --- /dev/null +++ b/tools/testing/vsock/vsock_test_zerocopy.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#ifndef VSOCK_TEST_ZEROCOPY_H +#define VSOCK_TEST_ZEROCOPY_H +#include "util.h" + +void test_stream_msgzcopy_client(const struct test_opts *opts); +void test_stream_msgzcopy_server(const struct test_opts *opts); + +void test_seqpacket_msgzcopy_client(const struct test_opts *opts); +void test_seqpacket_msgzcopy_server(const struct test_opts *opts); + +void test_stream_msgzcopy_empty_errq_client(const struct test_opts *opts); +void test_stream_msgzcopy_empty_errq_server(const struct test_opts *opts); + +#endif /* VSOCK_TEST_ZEROCOPY_H */ diff --git a/tools/testing/vsock/vsock_uring_test.c b/tools/testing/vsock/vsock_uring_test.c new file mode 100644 index 000000000000..5c3078969659 --- /dev/null +++ b/tools/testing/vsock/vsock_uring_test.c @@ -0,0 +1,353 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* io_uring tests for vsock + * + * Copyright (C) 2023 SberDevices. + * + * Author: Arseniy Krasnov <avkrasnov@salutedevices.com> + */ + +#include <getopt.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <liburing.h> +#include <unistd.h> +#include <sys/mman.h> +#include <linux/kernel.h> +#include <error.h> + +#include "util.h" +#include "control.h" +#include "msg_zerocopy_common.h" + +#ifndef PAGE_SIZE +#define PAGE_SIZE 4096 +#endif + +#define RING_ENTRIES_NUM 4 + +#define VSOCK_TEST_DATA_MAX_IOV 3 + +struct vsock_io_uring_test { + /* Number of valid elements in 'vecs'. */ + int vecs_cnt; + struct iovec vecs[VSOCK_TEST_DATA_MAX_IOV]; +}; + +static struct vsock_io_uring_test test_data_array[] = { + /* All elements have page aligned base and size. */ + { + .vecs_cnt = 3, + { + { NULL, PAGE_SIZE }, + { NULL, 2 * PAGE_SIZE }, + { NULL, 3 * PAGE_SIZE }, + } + }, + /* Middle element has both non-page aligned base and size. */ + { + .vecs_cnt = 3, + { + { NULL, PAGE_SIZE }, + { (void *)1, 200 }, + { NULL, 3 * PAGE_SIZE }, + } + } +}; + +static void vsock_io_uring_client(const struct test_opts *opts, + const struct vsock_io_uring_test *test_data, + bool msg_zerocopy) +{ + struct io_uring_sqe *sqe; + struct io_uring_cqe *cqe; + struct io_uring ring; + struct iovec *iovec; + struct msghdr msg; + int fd; + + fd = vsock_stream_connect(opts->peer_cid, opts->peer_port); + if (fd < 0) { + perror("connect"); + exit(EXIT_FAILURE); + } + + if (msg_zerocopy) + enable_so_zerocopy_check(fd); + + iovec = alloc_test_iovec(test_data->vecs, test_data->vecs_cnt); + + if (io_uring_queue_init(RING_ENTRIES_NUM, &ring, 0)) + error(1, errno, "io_uring_queue_init"); + + if (io_uring_register_buffers(&ring, iovec, test_data->vecs_cnt)) + error(1, errno, "io_uring_register_buffers"); + + memset(&msg, 0, sizeof(msg)); + msg.msg_iov = iovec; + msg.msg_iovlen = test_data->vecs_cnt; + sqe = io_uring_get_sqe(&ring); + + if (msg_zerocopy) + io_uring_prep_sendmsg_zc(sqe, fd, &msg, 0); + else + io_uring_prep_sendmsg(sqe, fd, &msg, 0); + + if (io_uring_submit(&ring) != 1) + error(1, errno, "io_uring_submit"); + + if (io_uring_wait_cqe(&ring, &cqe)) + error(1, errno, "io_uring_wait_cqe"); + + io_uring_cqe_seen(&ring, cqe); + + control_writeulong(iovec_hash_djb2(iovec, test_data->vecs_cnt)); + + control_writeln("DONE"); + io_uring_queue_exit(&ring); + free_test_iovec(test_data->vecs, iovec, test_data->vecs_cnt); + close(fd); +} + +static void vsock_io_uring_server(const struct test_opts *opts, + const struct vsock_io_uring_test *test_data) +{ + unsigned long remote_hash; + unsigned long local_hash; + struct io_uring ring; + size_t data_len; + size_t recv_len; + void *data; + int fd; + + fd = vsock_stream_accept(VMADDR_CID_ANY, opts->peer_port, NULL); + if (fd < 0) { + perror("accept"); + exit(EXIT_FAILURE); + } + + data_len = iovec_bytes(test_data->vecs, test_data->vecs_cnt); + + data = malloc(data_len); + if (!data) { + perror("malloc"); + exit(EXIT_FAILURE); + } + + if (io_uring_queue_init(RING_ENTRIES_NUM, &ring, 0)) + error(1, errno, "io_uring_queue_init"); + + recv_len = 0; + + while (recv_len < data_len) { + struct io_uring_sqe *sqe; + struct io_uring_cqe *cqe; + struct iovec iovec; + + sqe = io_uring_get_sqe(&ring); + iovec.iov_base = data + recv_len; + iovec.iov_len = data_len; + + io_uring_prep_readv(sqe, fd, &iovec, 1, 0); + + if (io_uring_submit(&ring) != 1) + error(1, errno, "io_uring_submit"); + + if (io_uring_wait_cqe(&ring, &cqe)) + error(1, errno, "io_uring_wait_cqe"); + + recv_len += cqe->res; + io_uring_cqe_seen(&ring, cqe); + } + + if (recv_len != data_len) { + fprintf(stderr, "expected %zu, got %zu\n", data_len, + recv_len); + exit(EXIT_FAILURE); + } + + local_hash = hash_djb2(data, data_len); + + remote_hash = control_readulong(); + if (remote_hash != local_hash) { + fprintf(stderr, "hash mismatch\n"); + exit(EXIT_FAILURE); + } + + control_expectln("DONE"); + io_uring_queue_exit(&ring); + free(data); +} + +void test_stream_uring_server(const struct test_opts *opts) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(test_data_array); i++) + vsock_io_uring_server(opts, &test_data_array[i]); +} + +void test_stream_uring_client(const struct test_opts *opts) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(test_data_array); i++) + vsock_io_uring_client(opts, &test_data_array[i], false); +} + +void test_stream_uring_msg_zc_server(const struct test_opts *opts) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(test_data_array); i++) + vsock_io_uring_server(opts, &test_data_array[i]); +} + +void test_stream_uring_msg_zc_client(const struct test_opts *opts) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(test_data_array); i++) + vsock_io_uring_client(opts, &test_data_array[i], true); +} + +static struct test_case test_cases[] = { + { + .name = "SOCK_STREAM io_uring test", + .run_server = test_stream_uring_server, + .run_client = test_stream_uring_client, + }, + { + .name = "SOCK_STREAM io_uring MSG_ZEROCOPY test", + .run_server = test_stream_uring_msg_zc_server, + .run_client = test_stream_uring_msg_zc_client, + }, + {}, +}; + +static const char optstring[] = ""; +static const struct option longopts[] = { + { + .name = "control-host", + .has_arg = required_argument, + .val = 'H', + }, + { + .name = "control-port", + .has_arg = required_argument, + .val = 'P', + }, + { + .name = "mode", + .has_arg = required_argument, + .val = 'm', + }, + { + .name = "peer-cid", + .has_arg = required_argument, + .val = 'p', + }, + { + .name = "peer-port", + .has_arg = required_argument, + .val = 'q', + }, + { + .name = "help", + .has_arg = no_argument, + .val = '?', + }, + {}, +}; + +static void usage(void) +{ + fprintf(stderr, "Usage: vsock_uring_test [--help] [--control-host=<host>] --control-port=<port> --mode=client|server --peer-cid=<cid> [--peer-port=<port>]\n" + "\n" + " Server: vsock_uring_test --control-port=1234 --mode=server --peer-cid=3\n" + " Client: vsock_uring_test --control-host=192.168.0.1 --control-port=1234 --mode=client --peer-cid=2\n" + "\n" + "Run transmission tests using io_uring. Usage is the same as\n" + "in ./vsock_test\n" + "\n" + "Options:\n" + " --help This help message\n" + " --control-host <host> Server IP address to connect to\n" + " --control-port <port> Server port to listen on/connect to\n" + " --mode client|server Server or client mode\n" + " --peer-cid <cid> CID of the other side\n" + " --peer-port <port> AF_VSOCK port used for the test [default: %d]\n", + DEFAULT_PEER_PORT + ); + exit(EXIT_FAILURE); +} + +int main(int argc, char **argv) +{ + const char *control_host = NULL; + const char *control_port = NULL; + struct test_opts opts = { + .mode = TEST_MODE_UNSET, + .peer_cid = VMADDR_CID_ANY, + .peer_port = DEFAULT_PEER_PORT, + }; + + init_signals(); + + for (;;) { + int opt = getopt_long(argc, argv, optstring, longopts, NULL); + + if (opt == -1) + break; + + switch (opt) { + case 'H': + control_host = optarg; + break; + case 'm': + if (strcmp(optarg, "client") == 0) { + opts.mode = TEST_MODE_CLIENT; + } else if (strcmp(optarg, "server") == 0) { + opts.mode = TEST_MODE_SERVER; + } else { + fprintf(stderr, "--mode must be \"client\" or \"server\"\n"); + return EXIT_FAILURE; + } + break; + case 'p': + opts.peer_cid = parse_cid(optarg); + break; + case 'q': + opts.peer_port = parse_port(optarg); + break; + case 'P': + control_port = optarg; + break; + case '?': + default: + usage(); + } + } + + if (!control_port) + usage(); + if (opts.mode == TEST_MODE_UNSET) + usage(); + if (opts.peer_cid == VMADDR_CID_ANY) + usage(); + + if (!control_host) { + if (opts.mode != TEST_MODE_SERVER) + usage(); + control_host = "0.0.0.0"; + } + + control_init(control_host, control_port, + opts.mode == TEST_MODE_SERVER); + + run_tests(test_cases, &opts); + + control_cleanup(); + + return 0; +} |