diff options
| author | 2018-02-16 16:04:18 -0500 | |
|---|---|---|
| committer | 2018-02-16 16:04:18 -0500 | |
| commit | 80c6d2b8d83b6f2f72bd17d69e2e9fca76e4637a (patch) | |
| tree | 484a63f508f2d92be4c2dd7796382748e7f75f57 /tools | |
| parent | net: Revert sched action extack support series. (diff) | |
| parent | selftests/net: add zerocopy support for PF_RDS test case (diff) | |
Merge branch 'RDS-zerocopy-support'
Sowmini Varadhan says:
====================
RDS: zerocopy support
This is version 3 of the series, following up on review comments for
http://patchwork.ozlabs.org/project/netdev/list/?series=28530
Review comments addressed
Patch 4
- fix fragile use of skb->cb[], do not set ee_code incorrectly.
Patch 5:
- remove needless bzero of skb->cb[], consolidate err cleanup
A brief overview of this feature follows.
This patch series provides support for MSG_ZERCOCOPY
on a PF_RDS socket based on the APIs and infrastructure added
by Commit f214f915e7db ("tcp: enable MSG_ZEROCOPY")
For single threaded rds-stress testing using rds-tcp with the
ixgbe driver using 1M message sizes (-a 1M -q 1M) preliminary
results show that there is a significant reduction in latency: about
90 usec with zerocopy, compared with 200 usec without zerocopy.
This patchset modifies the above for zerocopy in the following manner.
- if the MSG_ZEROCOPY flag is specified with rds_sendmsg(), and,
- if the SO_ZEROCOPY socket option has been set on the PF_RDS socket,
application pages sent down with rds_sendmsg are pinned. The pinning
uses the accounting infrastructure added by a91dbff551a6 ("sock: ulimit
on MSG_ZEROCOPY pages"). The message is unpinned when all references
to the message go down to 0, and the message is freed by rds_message_purge.
A multithreaded application using this infrastructure must send down
a unique 32 bit cookie as ancillary data with each sendmsg invocation.
The format of this ancillary data is described in Patch 5 of the series.
The cookie is passed up to the application on the sk_error_queue when
the message is unpinned, indicating to the application that it is now
safe to free/reuse the message buffer. The details of the completion
notification are provided in Patch 4 of this series.
====================
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'tools')
| -rw-r--r-- | tools/testing/selftests/net/msg_zerocopy.c | 133 |
1 files changed, 128 insertions, 5 deletions
diff --git a/tools/testing/selftests/net/msg_zerocopy.c b/tools/testing/selftests/net/msg_zerocopy.c index e11fe84de0fd..5cc2a53bb71c 100644 --- a/tools/testing/selftests/net/msg_zerocopy.c +++ b/tools/testing/selftests/net/msg_zerocopy.c @@ -14,6 +14,9 @@ * - SOCK_DGRAM * - SOCK_RAW * + * PF_RDS + * - SOCK_SEQPACKET + * * Start this program on two connected hosts, one in send mode and * the other with option '-r' to put it in receiver mode. * @@ -53,6 +56,7 @@ #include <sys/types.h> #include <sys/wait.h> #include <unistd.h> +#include <linux/rds.h> #ifndef SO_EE_ORIGIN_ZEROCOPY #define SO_EE_ORIGIN_ZEROCOPY 5 @@ -164,17 +168,39 @@ static int do_accept(int fd) return fd; } -static bool do_sendmsg(int fd, struct msghdr *msg, bool do_zerocopy) +static void add_zcopy_cookie(struct msghdr *msg, uint32_t cookie) +{ + struct cmsghdr *cm; + + if (!msg->msg_control) + error(1, errno, "NULL cookie"); + cm = (void *)msg->msg_control; + cm->cmsg_len = CMSG_LEN(sizeof(cookie)); + cm->cmsg_level = SOL_RDS; + cm->cmsg_type = RDS_CMSG_ZCOPY_COOKIE; + memcpy(CMSG_DATA(cm), &cookie, sizeof(cookie)); +} + +static bool do_sendmsg(int fd, struct msghdr *msg, bool do_zerocopy, int domain) { int ret, len, i, flags; + static uint32_t cookie; + char ckbuf[CMSG_SPACE(sizeof(cookie))]; len = 0; for (i = 0; i < msg->msg_iovlen; i++) len += msg->msg_iov[i].iov_len; flags = MSG_DONTWAIT; - if (do_zerocopy) + if (do_zerocopy) { flags |= MSG_ZEROCOPY; + if (domain == PF_RDS) { + memset(&msg->msg_control, 0, sizeof(msg->msg_control)); + msg->msg_controllen = CMSG_SPACE(sizeof(cookie)); + msg->msg_control = (struct cmsghdr *)ckbuf; + add_zcopy_cookie(msg, ++cookie); + } + } ret = sendmsg(fd, msg, flags); if (ret == -1 && errno == EAGAIN) @@ -190,6 +216,10 @@ static bool do_sendmsg(int fd, struct msghdr *msg, bool do_zerocopy) if (do_zerocopy && ret) expected_completions++; } + if (do_zerocopy && domain == PF_RDS) { + msg->msg_control = NULL; + msg->msg_controllen = 0; + } return true; } @@ -216,7 +246,9 @@ static void do_sendmsg_corked(int fd, struct msghdr *msg) msg->msg_iov[0].iov_len = payload_len + extra_len; extra_len = 0; - do_sendmsg(fd, msg, do_zerocopy); + do_sendmsg(fd, msg, do_zerocopy, + (cfg_dst_addr.ss_family == AF_INET ? + PF_INET : PF_INET6)); } do_setsockopt(fd, IPPROTO_UDP, UDP_CORK, 0); @@ -300,13 +332,38 @@ static int do_setup_tx(int domain, int type, int protocol) if (cfg_zerocopy) do_setsockopt(fd, SOL_SOCKET, SO_ZEROCOPY, 1); - if (domain != PF_PACKET) + if (domain != PF_PACKET && domain != PF_RDS) if (connect(fd, (void *) &cfg_dst_addr, cfg_alen)) error(1, errno, "connect"); + if (domain == PF_RDS) { + if (bind(fd, (void *) &cfg_src_addr, cfg_alen)) + error(1, errno, "bind"); + } + return fd; } +static int do_process_zerocopy_cookies(struct sock_extended_err *serr, + uint32_t *ckbuf, size_t nbytes) +{ + int ncookies, i; + + if (serr->ee_errno != 0) + error(1, 0, "serr: wrong error code: %u", serr->ee_errno); + ncookies = serr->ee_data; + if (ncookies > SO_EE_ORIGIN_MAX_ZCOOKIES) + error(1, 0, "Returned %d cookies, max expected %d\n", + ncookies, SO_EE_ORIGIN_MAX_ZCOOKIES); + if (nbytes != ncookies * sizeof(uint32_t)) + error(1, 0, "Expected %d cookies, got %ld\n", + ncookies, nbytes/sizeof(uint32_t)); + for (i = 0; i < ncookies; i++) + if (cfg_verbose >= 2) + fprintf(stderr, "%d\n", ckbuf[i]); + return ncookies; +} + static bool do_recv_completion(int fd) { struct sock_extended_err *serr; @@ -315,10 +372,17 @@ static bool do_recv_completion(int fd) uint32_t hi, lo, range; int ret, zerocopy; char control[100]; + uint32_t ckbuf[SO_EE_ORIGIN_MAX_ZCOOKIES]; + struct iovec iov; msg.msg_control = control; msg.msg_controllen = sizeof(control); + iov.iov_base = ckbuf; + iov.iov_len = (SO_EE_ORIGIN_MAX_ZCOOKIES * sizeof(ckbuf[0])); + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + ret = recvmsg(fd, &msg, MSG_ERRQUEUE); if (ret == -1 && errno == EAGAIN) return false; @@ -337,6 +401,11 @@ static bool do_recv_completion(int fd) cm->cmsg_level, cm->cmsg_type); serr = (void *) CMSG_DATA(cm); + + if (serr->ee_origin == SO_EE_ORIGIN_ZCOOKIE) { + completions += do_process_zerocopy_cookies(serr, ckbuf, ret); + return true; + } if (serr->ee_origin != SO_EE_ORIGIN_ZEROCOPY) error(1, 0, "serr: wrong origin: %u", serr->ee_origin); if (serr->ee_errno != 0) @@ -444,6 +513,13 @@ static void do_tx(int domain, int type, int protocol) msg.msg_iovlen++; } + if (domain == PF_RDS) { + msg.msg_name = &cfg_dst_addr; + msg.msg_namelen = (cfg_dst_addr.ss_family == AF_INET ? + sizeof(struct sockaddr_in) : + sizeof(struct sockaddr_in6)); + } + iov[2].iov_base = payload; iov[2].iov_len = cfg_payload_len; msg.msg_iovlen++; @@ -454,7 +530,7 @@ static void do_tx(int domain, int type, int protocol) if (cfg_cork) do_sendmsg_corked(fd, &msg); else - do_sendmsg(fd, &msg, cfg_zerocopy); + do_sendmsg(fd, &msg, cfg_zerocopy, domain); while (!do_poll(fd, POLLOUT)) { if (cfg_zerocopy) @@ -555,6 +631,40 @@ static void do_flush_datagram(int fd, int type) bytes += cfg_payload_len; } + +static void do_recvmsg(int fd) +{ + int ret, off = 0; + char *buf; + struct iovec iov; + struct msghdr msg; + struct sockaddr_storage din; + + buf = calloc(cfg_payload_len, sizeof(char)); + iov.iov_base = buf; + iov.iov_len = cfg_payload_len; + + memset(&msg, 0, sizeof(msg)); + msg.msg_name = &din; + msg.msg_namelen = sizeof(din); + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + + ret = recvmsg(fd, &msg, MSG_TRUNC); + + if (ret == -1) + error(1, errno, "recv"); + if (ret != cfg_payload_len) + error(1, 0, "recv: ret=%u != %u", ret, cfg_payload_len); + + if (memcmp(buf + off, payload, ret)) + error(1, 0, "recv: data mismatch"); + + free(buf); + packets++; + bytes += cfg_payload_len; +} + static void do_rx(int domain, int type, int protocol) { uint64_t tstop; @@ -566,6 +676,8 @@ static void do_rx(int domain, int type, int protocol) do { if (type == SOCK_STREAM) do_flush_tcp(fd); + else if (domain == PF_RDS) + do_recvmsg(fd); else do_flush_datagram(fd, type); @@ -610,6 +722,7 @@ static void parse_opts(int argc, char **argv) 40 /* max tcp options */; int c; char *daddr = NULL, *saddr = NULL; + char *cfg_test; cfg_payload_len = max_payload_len; @@ -667,6 +780,14 @@ static void parse_opts(int argc, char **argv) break; } } + + cfg_test = argv[argc - 1]; + if (strcmp(cfg_test, "rds") == 0) { + if (!daddr) + error(1, 0, "-D <server addr> required for PF_RDS\n"); + if (!cfg_rx && !saddr) + error(1, 0, "-S <client addr> required for PF_RDS\n"); + } setup_sockaddr(cfg_family, daddr, &cfg_dst_addr); setup_sockaddr(cfg_family, saddr, &cfg_src_addr); @@ -699,6 +820,8 @@ int main(int argc, char **argv) do_test(cfg_family, SOCK_STREAM, 0); else if (!strcmp(cfg_test, "udp")) do_test(cfg_family, SOCK_DGRAM, 0); + else if (!strcmp(cfg_test, "rds")) + do_test(PF_RDS, SOCK_SEQPACKET, 0); else error(1, 0, "unknown cfg_test %s", cfg_test); |
