From patchwork Fri Mar 16 21:34:37 2012 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Michael Tokarev X-Patchwork-Id: 147281 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Received: from lists.gnu.org (lists.gnu.org [208.118.235.17]) (using TLSv1 with cipher AES256-SHA (256/256 bits)) (Client did not present a certificate) by ozlabs.org (Postfix) with ESMTPS id E584FB6EF4 for ; Sat, 17 Mar 2012 08:39:27 +1100 (EST) Received: from localhost ([::1]:48904 helo=lists.gnu.org) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1S8ert-0004kj-No for incoming@patchwork.ozlabs.org; Fri, 16 Mar 2012 17:39:25 -0400 Received: from eggs.gnu.org ([208.118.235.92]:60967) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1S8erF-0003JX-9L for qemu-devel@nongnu.org; Fri, 16 Mar 2012 17:38:48 -0400 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1S8equ-0005EY-1l for qemu-devel@nongnu.org; Fri, 16 Mar 2012 17:38:44 -0400 Received: from isrv.corpit.ru ([86.62.121.231]:43258) by eggs.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1S8eqt-0005E9-Im for qemu-devel@nongnu.org; Fri, 16 Mar 2012 17:38:23 -0400 Received: from gandalf.tls.msk.ru (mjt.vpn.tls.msk.ru [192.168.177.99]) by isrv.corpit.ru (Postfix) with ESMTP id 14F12A0B8E; Sat, 17 Mar 2012 01:38:22 +0400 (MSK) Received: by gandalf.tls.msk.ru (Postfix, from userid 1000) id CA9F35DC; Sat, 17 Mar 2012 01:34:47 +0400 (MSK) From: Michael Tokarev To: Anthony Liguori , qemu-devel@nongnu.org Date: Sat, 17 Mar 2012 01:34:37 +0400 Message-Id: <1331933677-29628-12-git-send-email-mjt@msgid.tls.msk.ru> X-Mailer: git-send-email 1.7.9.1 In-Reply-To: <1331933677-29628-1-git-send-email-mjt@msgid.tls.msk.ru> References: <1331933677-29628-1-git-send-email-mjt@msgid.tls.msk.ru> X-detected-operating-system: by eggs.gnu.org: Genre and OS details not recognized. X-Received-From: 86.62.121.231 Cc: Paolo Bonzini , Michael Tokarev Subject: [Qemu-devel] [PATCHv5 11/11] rewrite iov_send_recv() and move it to iov.c X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.14 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org Sender: qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org Make it much more understandable, add a missing iov_cnt argument (number of iovs in the iov), and add comments to it. The new implementation has been extensively tested by splitting a large buffer into many small randomly-sized chunks, sending it over socket to another, slow process and verifying the receiving data is the same. Also add a unit test for iov_send_recv(), sending/ receiving data between two processes over a socketpair using random vectors and random sizes. Signed-off-by: Michael Tokarev --- cutils.c | 83 ----------------------------------------- iov.c | 103 +++++++++++++++++++++++++++++++++++++++++++++++++++ iov.h | 15 ++++--- qemu-coroutine-io.c | 2 +- test-iov.c | 103 +++++++++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 216 insertions(+), 90 deletions(-) diff --git a/cutils.c b/cutils.c index cb6f638..e2bc1b8 100644 --- a/cutils.c +++ b/cutils.c @@ -375,86 +375,3 @@ int qemu_parse_fd(const char *param) } return fd; } - -ssize_t iov_send_recv(int sockfd, struct iovec *iov, - size_t offset, size_t bytes, - bool do_sendv) -{ - int iovlen; - ssize_t ret; - size_t diff; - struct iovec *last_iov; - - /* last_iov is inclusive, so count from one. */ - iovlen = 1; - last_iov = iov; - bytes += offset; - - while (last_iov->iov_len < bytes) { - bytes -= last_iov->iov_len; - - last_iov++; - iovlen++; - } - - diff = last_iov->iov_len - bytes; - last_iov->iov_len -= diff; - - while (iov->iov_len <= offset) { - offset -= iov->iov_len; - - iov++; - iovlen--; - } - - iov->iov_base = (char *) iov->iov_base + offset; - iov->iov_len -= offset; - - { -#if defined CONFIG_IOVEC && defined CONFIG_POSIX - struct msghdr msg; - memset(&msg, 0, sizeof(msg)); - msg.msg_iov = iov; - msg.msg_iovlen = iovlen; - - do { - if (do_sendv) { - ret = sendmsg(sockfd, &msg, 0); - } else { - ret = recvmsg(sockfd, &msg, 0); - } - } while (ret == -1 && errno == EINTR); -#else - struct iovec *p = iov; - ret = 0; - while (iovlen > 0) { - int rc; - if (do_sendv) { - rc = send(sockfd, p->iov_base, p->iov_len, 0); - } else { - rc = qemu_recv(sockfd, p->iov_base, p->iov_len, 0); - } - if (rc == -1) { - if (errno == EINTR) { - continue; - } - if (ret == 0) { - ret = -1; - } - break; - } - if (rc == 0) { - break; - } - ret += rc; - iovlen--, p++; - } -#endif - } - - /* Undo the changes above */ - iov->iov_base = (char *) iov->iov_base - offset; - iov->iov_len += offset; - last_iov->iov_len += diff; - return ret; -} diff --git a/iov.c b/iov.c index 9657d28..7cc08f0 100644 --- a/iov.c +++ b/iov.c @@ -18,6 +18,14 @@ #include "iov.h" +#ifdef _WIN32 +# include +# include +#else +# include +# include +#endif + size_t iov_from_buf(struct iovec *iov, unsigned int iov_cnt, size_t offset, const void *buf, size_t bytes) { @@ -87,6 +95,101 @@ size_t iov_size(const struct iovec *iov, const unsigned int iov_cnt) return len; } +/* helper function for iov_send_recv() */ +static ssize_t +do_send_recv(int sockfd, struct iovec *iov, unsigned iov_cnt, bool do_send) +{ +#if defined CONFIG_IOVEC && defined CONFIG_POSIX + ssize_t ret; + struct msghdr msg; + memset(&msg, 0, sizeof(msg)); + msg.msg_iov = iov; + msg.msg_iovlen = iov_cnt; + do { + ret = do_send + ? sendmsg(sockfd, &msg, 0) + : recvmsg(sockfd, &msg, 0); + } while (ret < 0 && errno == EINTR); + return ret; +#else + /* else send piece-by-piece */ + /*XXX Note: windows has WSASend() and WSARecv() */ + unsigned i; + size_t count = 0; + for (i = 0; i < iov_cnt; ++i) { + ssize_t r = do_send + ? send(sockfd, iov[i].iov_base, iov[i].iov_len, 0) + : recv(sockfd, iov[i].iov_base, iov[i].iov_len, 0); + if (r > 0) { + ret += r; + } else if (!r) { + break; + } else if (errno == EINTR) { + continue; + } else { + /* else it is some "other" error, + * only return if there was no data processed. */ + if (ret == 0) { + return -1; + } + break; + } + } + return count; +#endif +} + +ssize_t iov_send_recv(int sockfd, struct iovec *iov, unsigned iov_cnt, + size_t offset, size_t bytes, + bool do_send) +{ + ssize_t ret; + unsigned si, ei; /* start and end indexes */ + + /* Find the start position, skipping `offset' bytes: + * first, skip all full-sized vector elements, */ + for (si = 0; si < iov_cnt && offset >= iov[si].iov_len; ++si) { + offset -= iov[si].iov_len; + } + if (offset) { + assert(si < iov_cnt); + /* second, skip `offset' bytes from the (now) first element, + * undo it on exit */ + iov[si].iov_base += offset; + iov[si].iov_len -= offset; + } + /* Find the end position skipping `bytes' bytes: */ + /* first, skip all full-sized elements */ + for (ei = si; ei < iov_cnt && iov[ei].iov_len <= bytes; ++ei) { + bytes -= iov[ei].iov_len; + } + if (bytes) { + /* second, fixup the last element, and remember + * the length we've cut from the end of it in `bytes' */ + size_t tail; + assert(ei < iov_cnt); + assert(iov[ei].iov_len > bytes); + tail = iov[ei].iov_len - bytes; + iov[ei].iov_len = bytes; + bytes = tail; /* bytes is now equal to the tail size */ + ++ei; + } + + ret = do_send_recv(sockfd, iov + si, ei - si, do_send); + + /* Undo the changes above */ + if (offset) { + iov[si].iov_base -= offset; + iov[si].iov_len += offset; + } + if (bytes) { + iov[ei-1].iov_len += bytes; + } + + return ret; +} + + void iov_hexdump(const struct iovec *iov, const unsigned int iov_cnt, FILE *fp, const char *prefix, size_t limit) { diff --git a/iov.h b/iov.h index 9b6a883..381f37a 100644 --- a/iov.h +++ b/iov.h @@ -60,7 +60,7 @@ size_t iov_memset(const struct iovec *iov, const unsigned int iov_cnt, * `offset' bytes in the beginning of iovec buffer are skipped and * next `bytes' bytes are used, which must be within data of iovec. * - * r = iov_send_recv(sockfd, iov, offset, bytes, true); + * r = iov_send_recv(sockfd, iov, iovcnt, offset, bytes, true); * * is logically equivalent to * @@ -68,13 +68,16 @@ size_t iov_memset(const struct iovec *iov, const unsigned int iov_cnt, * iov_to_buf(iov, iovcnt, offset, buf, bytes); * r = send(sockfd, buf, bytes, 0); * free(buf); + * + * For iov_send_recv() _whole_ area being sent or received + * should be within the iovec, not only beginning of it. */ -ssize_t iov_send_recv(int sockfd, struct iovec *iov, +ssize_t iov_send_recv(int sockfd, struct iovec *iov, unsigned iov_cnt, size_t offset, size_t bytes, bool do_send); -#define iov_recv(sockfd, iov, offset, bytes) \ - iov_send_recv(sockfd, iov, offset, bytes, false) -#define iov_send(sockfd, iov, offset, bytes) \ - iov_send_recv(sockfd, iov, offset, bytes, true) +#define iov_recv(sockfd, iov, iov_cnt, offset, bytes) \ + iov_send_recv(sockfd, iov, iov_cnt, offset, bytes, false) +#define iov_send(sockfd, iov, iov_cnt, offset, bytes) \ + iov_send_recv(sockfd, iov, iov_cnt, offset, bytes, true) /** * Produce a text hexdump of iovec `iov' with `iov_cnt' number of elements diff --git a/qemu-coroutine-io.c b/qemu-coroutine-io.c index 6693c78..5734965 100644 --- a/qemu-coroutine-io.c +++ b/qemu-coroutine-io.c @@ -34,7 +34,7 @@ qemu_co_sendv_recvv(int sockfd, struct iovec *iov, unsigned iov_cnt, size_t done = 0; ssize_t ret; while (done < bytes) { - ret = iov_send_recv(sockfd, iov, + ret = iov_send_recv(sockfd, iov, iov_cnt, offset + done, bytes - done, do_send); if (ret > 0) { done += ret; diff --git a/test-iov.c b/test-iov.c index fd8f9fe..8a05e04 100644 --- a/test-iov.c +++ b/test-iov.c @@ -1,6 +1,7 @@ #include #include "qemu-common.h" #include "iov.h" +#include "qemu_socket.h" /* return a (pseudo-)random number in [min, max] range */ @@ -149,10 +150,112 @@ static void test_to_from_buf(void) } } +static void test_io(void) +{ + int sv[2]; + int r; + unsigned i, j, k, s, t; + fd_set fds; + unsigned niov; + struct iovec *iov, *siov; + unsigned char *buf; + size_t sz; + + iov_random(&iov, &niov); + sz = iov_size(iov, niov); + buf = g_malloc(sz); + for (i = 0; i < sz; ++i) { + buf[i] = i & 255; + } + iov_from_buf(iov, niov, 0, buf, sz); + + siov = g_malloc(sizeof(*iov) * niov); + memcpy(siov, iov, sizeof(*iov) * niov); + + if (socketpair(PF_UNIX, SOCK_STREAM, 0, sv) < 0) { + perror("socketpair"); + exit(1); + } + + FD_ZERO(&fds); + + t = 0; + if (fork() == 0) { + /* writer */ + + close(sv[0]); + FD_SET(sv[1], &fds); + fcntl(sv[1], F_SETFL, O_RDWR|O_NONBLOCK); + r = rannum(sz / 2, sz); + setsockopt(sv[1], SOL_SOCKET, SO_SNDBUF, &r, sizeof(r)); + + for (i = 0; i <= sz; ++i) { + for (j = i; j <= sz; ++j) { + k = i; + do { + s = rannum(0, j - k); + r = iov_send(sv[1], iov, niov, k, s); + g_assert(memcmp(iov, siov, sizeof(*iov)*niov) == 0); + if (r >= 0) { + k += r; + t += r; + usleep(rannum(0, 30)); + } else if (errno == EAGAIN) { + select(sv[1]+1, NULL, &fds, NULL, NULL); + continue; + } else { + perror("send"); + exit(1); + } + } while(k < j); + } + } + exit(0); + + } else { + /* reader & verifier */ + + close(sv[1]); + FD_SET(sv[0], &fds); + fcntl(sv[0], F_SETFL, O_RDWR|O_NONBLOCK); + r = rannum(sz / 2, sz); + setsockopt(sv[0], SOL_SOCKET, SO_RCVBUF, &r, sizeof(r)); + usleep(500000); + + for (i = 0; i <= sz; ++i) { + for (j = i; j <= sz; ++j) { + k = i; + iov_memset(iov, niov, 0, j & 255, -1); + do { + s = rannum(0, j - k); + r = iov_recv(sv[0], iov, niov, k, s); + g_assert(memcmp(iov, siov, sizeof(*iov)*niov) == 0); + if (r > 0) { + k += r; + t += r; + } else if (!r) { + if (s) { + break; + } + } else if (errno == EAGAIN) { + select(sv[0]+1, &fds, NULL, NULL, NULL); + continue; + } else { + perror("recv"); + exit(1); + } + } while(k < j); + test_iov_bytes(iov, niov, i, j - i); + } + } + } +} + int main(int argc, char **argv) { srandom(getpid()); g_test_init(&argc, &argv, NULL); g_test_add_func("/basic/iov/from-to-buf", test_to_from_buf); + g_test_add_func("/basic/iov/io", test_io); return g_test_run(); }