Message ID | 1331845217-21705-12-git-send-email-mjt@msgid.tls.msk.ru |
---|---|
State | New |
Headers | show |
On 03/15/2012 04:00 PM, Michael Tokarev wrote: > Make it much more understandable, add a missing > iov_cnt argument (number of iovs in the iov), and > add comments to it. > > The new implementation has been extensively tested > by splitting a large buffer into many small > randomly-sized chunks, sending it over socket to > another, slow process and verifying the receiving > data is the same. > > Signed-off-by: Michael Tokarev<mjt@tls.msk.ru> > --- > cutils.c | 83 ---------------------------------------- > iov.c | 105 +++++++++++++++++++++++++++++++++++++++++++++++++++ > iov.h | 15 ++++--- > qemu-coroutine-io.c | 2 +- > 4 files changed, 115 insertions(+), 90 deletions(-) > > diff --git a/cutils.c b/cutils.c > index cb6f638..e2bc1b8 100644 > --- a/cutils.c > +++ b/cutils.c > @@ -375,86 +375,3 @@ int qemu_parse_fd(const char *param) > } > return fd; > } > - > -ssize_t iov_send_recv(int sockfd, struct iovec *iov, > - size_t offset, size_t bytes, > - bool do_sendv) > -{ > - int iovlen; > - ssize_t ret; > - size_t diff; > - struct iovec *last_iov; > - > - /* last_iov is inclusive, so count from one. */ > - iovlen = 1; > - last_iov = iov; > - bytes += offset; > - > - while (last_iov->iov_len< bytes) { > - bytes -= last_iov->iov_len; > - > - last_iov++; > - iovlen++; > - } > - > - diff = last_iov->iov_len - bytes; > - last_iov->iov_len -= diff; > - > - while (iov->iov_len<= offset) { > - offset -= iov->iov_len; > - > - iov++; > - iovlen--; > - } > - > - iov->iov_base = (char *) iov->iov_base + offset; > - iov->iov_len -= offset; > - > - { > -#if defined CONFIG_IOVEC&& defined CONFIG_POSIX > - struct msghdr msg; > - memset(&msg, 0, sizeof(msg)); > - msg.msg_iov = iov; > - msg.msg_iovlen = iovlen; > - > - do { > - if (do_sendv) { > - ret = sendmsg(sockfd,&msg, 0); > - } else { > - ret = recvmsg(sockfd,&msg, 0); > - } > - } while (ret == -1&& errno == EINTR); > -#else > - struct iovec *p = iov; > - ret = 0; > - while (iovlen> 0) { > - int rc; > - if (do_sendv) { > - rc = send(sockfd, p->iov_base, p->iov_len, 0); > - } else { > - rc = qemu_recv(sockfd, p->iov_base, p->iov_len, 0); > - } > - if (rc == -1) { > - if (errno == EINTR) { > - continue; > - } > - if (ret == 0) { > - ret = -1; > - } > - break; > - } > - if (rc == 0) { > - break; > - } > - ret += rc; > - iovlen--, p++; > - } > -#endif > - } > - > - /* Undo the changes above */ > - iov->iov_base = (char *) iov->iov_base - offset; > - iov->iov_len += offset; > - last_iov->iov_len += diff; > - return ret; > -} > diff --git a/iov.c b/iov.c > index fd518dd..6c79b68 100644 > --- a/iov.c > +++ b/iov.c > @@ -18,6 +18,14 @@ > > #include "iov.h" > > +#ifdef _WIN32 > +# include<windows.h> > +# include<winsock2.h> > +#else > +# include<sys/types.h> > +# include<sys/socket.h> > +#endif > + > size_t iov_from_buf(struct iovec *iov, unsigned int iov_cnt, > size_t offset, const void *buf, size_t bytes) > { > @@ -87,6 +95,103 @@ size_t iov_size(const struct iovec *iov, const unsigned int iov_cnt) > return len; > } > > +/* helper function for iov_send_recv() */ > +static ssize_t > +do_send_recv(int sockfd, struct iovec *iov, unsigned iov_cnt, bool do_send) > +{ > +#if defined CONFIG_IOVEC&& defined CONFIG_POSIX > + ssize_t ret; > + struct msghdr msg; > + memset(&msg, 0, sizeof(msg)); > + msg.msg_iov = iov; > + msg.msg_iovlen = iov_cnt; > + do { > + ret = do_send > + ? sendmsg(sockfd,&msg, 0) > + : recvmsg(sockfd,&msg, 0); > + } while (ret< 0&& errno == EINTR); > + return ret; > +#else > + /* else send piece-by-piece */ > + /*XXX Note: windows has WSASend() and WSARecv() */ > + unsigned i; > + size_t count = 0; > + for(i = 0; i< iov_cnt; ++i) { > + ssize_t r = do_send > + ? send(sockfd, iov[i].iov_base, iov[i].iov_len, 0) > + : recv(sockfd, iov[i].iov_base, iov[i].iov_len, 0); > + if (r> 0) { > + ret += r; > + } else if (!r) { > + break; > + } else if (errno == EINTR) { > + continue; > + } else { > + /* else it is some "other" error, > + * only return if there was no data processed. */ > + if (ret == 0) { > + return -1; > + } > + break; > + } > + } > + return count; > +#endif > +} > + > +ssize_t iov_send_recv(int sockfd, struct iovec *iov, unsigned iov_cnt, > + size_t offset, size_t bytes, > + bool do_send) > +{ > + ssize_t ret; > + size_t lastlen; > + unsigned si, ei; /* start and end indexes */ > + > + /* Find the start position, skipping `offset' bytes: > + * first, skip all full-sized vector elements, */ > + for(si = 0; si< iov_cnt&& offset>= iov[si].iov_len; ++si) { > + offset -= iov[si].iov_len; > + } > + if (offset) { > + assert(si< iov_cnt); > + /* second, skip `offset' bytes from the (now) first element, > + * undo it on exit */ > + iov[si].iov_base += offset; > + iov[si].iov_len -= offset; > + } > + /* Find the end position skipping `bytes' bytes: */ > + /* first, skip all full-sized elements */ > + for(ei = si; ei< iov_cnt&& iov[ei].iov_len<= bytes; ++ei) { > + bytes -= iov[ei].iov_len; > + } > + if (bytes) { > + /* second, fixup the last element, and remember > + * the length we've cut from the end of it in `bytes' */ > + assert(ei< iov_cnt); > + assert(iov[ei].iov_len> bytes); > + lastlen = iov[ei].iov_len; > + iov[ei].iov_len = bytes; > + ++ei; > + } else { > + lastlen = 0; > + } > + > + ret = do_send_recv(sockfd, iov + si, ei - si, do_send); > + > + /* Undo the changes above */ > + if (offset) { > + iov[si].iov_base -= offset; > + iov[si].iov_len += offset; > + } > + if (lastlen) { > + --ei; > + iov[ei].iov_len = lastlen; > + } > + > + return ret; > +} > + > + Coding style is off in this code. I'd really like to see unit tests if we're rewriting core functions... Regards, Anthony Liguori > void iov_hexdump(const struct iovec *iov, const unsigned int iov_cnt, > FILE *fp, const char *prefix, size_t limit) > { > diff --git a/iov.h b/iov.h > index 9b6a883..381f37a 100644 > --- a/iov.h > +++ b/iov.h > @@ -60,7 +60,7 @@ size_t iov_memset(const struct iovec *iov, const unsigned int iov_cnt, > * `offset' bytes in the beginning of iovec buffer are skipped and > * next `bytes' bytes are used, which must be within data of iovec. > * > - * r = iov_send_recv(sockfd, iov, offset, bytes, true); > + * r = iov_send_recv(sockfd, iov, iovcnt, offset, bytes, true); > * > * is logically equivalent to > * > @@ -68,13 +68,16 @@ size_t iov_memset(const struct iovec *iov, const unsigned int iov_cnt, > * iov_to_buf(iov, iovcnt, offset, buf, bytes); > * r = send(sockfd, buf, bytes, 0); > * free(buf); > + * > + * For iov_send_recv() _whole_ area being sent or received > + * should be within the iovec, not only beginning of it. > */ > -ssize_t iov_send_recv(int sockfd, struct iovec *iov, > +ssize_t iov_send_recv(int sockfd, struct iovec *iov, unsigned iov_cnt, > size_t offset, size_t bytes, bool do_send); > -#define iov_recv(sockfd, iov, offset, bytes) \ > - iov_send_recv(sockfd, iov, offset, bytes, false) > -#define iov_send(sockfd, iov, offset, bytes) \ > - iov_send_recv(sockfd, iov, offset, bytes, true) > +#define iov_recv(sockfd, iov, iov_cnt, offset, bytes) \ > + iov_send_recv(sockfd, iov, iov_cnt, offset, bytes, false) > +#define iov_send(sockfd, iov, iov_cnt, offset, bytes) \ > + iov_send_recv(sockfd, iov, iov_cnt, offset, bytes, true) > > /** > * Produce a text hexdump of iovec `iov' with `iov_cnt' number of elements > diff --git a/qemu-coroutine-io.c b/qemu-coroutine-io.c > index 6693c78..5734965 100644 > --- a/qemu-coroutine-io.c > +++ b/qemu-coroutine-io.c > @@ -34,7 +34,7 @@ qemu_co_sendv_recvv(int sockfd, struct iovec *iov, unsigned iov_cnt, > size_t done = 0; > ssize_t ret; > while (done< bytes) { > - ret = iov_send_recv(sockfd, iov, > + ret = iov_send_recv(sockfd, iov, iov_cnt, > offset + done, bytes - done, do_send); > if (ret> 0) { > done += ret;
diff --git a/cutils.c b/cutils.c index cb6f638..e2bc1b8 100644 --- a/cutils.c +++ b/cutils.c @@ -375,86 +375,3 @@ int qemu_parse_fd(const char *param) } return fd; } - -ssize_t iov_send_recv(int sockfd, struct iovec *iov, - size_t offset, size_t bytes, - bool do_sendv) -{ - int iovlen; - ssize_t ret; - size_t diff; - struct iovec *last_iov; - - /* last_iov is inclusive, so count from one. */ - iovlen = 1; - last_iov = iov; - bytes += offset; - - while (last_iov->iov_len < bytes) { - bytes -= last_iov->iov_len; - - last_iov++; - iovlen++; - } - - diff = last_iov->iov_len - bytes; - last_iov->iov_len -= diff; - - while (iov->iov_len <= offset) { - offset -= iov->iov_len; - - iov++; - iovlen--; - } - - iov->iov_base = (char *) iov->iov_base + offset; - iov->iov_len -= offset; - - { -#if defined CONFIG_IOVEC && defined CONFIG_POSIX - struct msghdr msg; - memset(&msg, 0, sizeof(msg)); - msg.msg_iov = iov; - msg.msg_iovlen = iovlen; - - do { - if (do_sendv) { - ret = sendmsg(sockfd, &msg, 0); - } else { - ret = recvmsg(sockfd, &msg, 0); - } - } while (ret == -1 && errno == EINTR); -#else - struct iovec *p = iov; - ret = 0; - while (iovlen > 0) { - int rc; - if (do_sendv) { - rc = send(sockfd, p->iov_base, p->iov_len, 0); - } else { - rc = qemu_recv(sockfd, p->iov_base, p->iov_len, 0); - } - if (rc == -1) { - if (errno == EINTR) { - continue; - } - if (ret == 0) { - ret = -1; - } - break; - } - if (rc == 0) { - break; - } - ret += rc; - iovlen--, p++; - } -#endif - } - - /* Undo the changes above */ - iov->iov_base = (char *) iov->iov_base - offset; - iov->iov_len += offset; - last_iov->iov_len += diff; - return ret; -} diff --git a/iov.c b/iov.c index fd518dd..6c79b68 100644 --- a/iov.c +++ b/iov.c @@ -18,6 +18,14 @@ #include "iov.h" +#ifdef _WIN32 +# include <windows.h> +# include <winsock2.h> +#else +# include <sys/types.h> +# include <sys/socket.h> +#endif + size_t iov_from_buf(struct iovec *iov, unsigned int iov_cnt, size_t offset, const void *buf, size_t bytes) { @@ -87,6 +95,103 @@ size_t iov_size(const struct iovec *iov, const unsigned int iov_cnt) return len; } +/* helper function for iov_send_recv() */ +static ssize_t +do_send_recv(int sockfd, struct iovec *iov, unsigned iov_cnt, bool do_send) +{ +#if defined CONFIG_IOVEC && defined CONFIG_POSIX + ssize_t ret; + struct msghdr msg; + memset(&msg, 0, sizeof(msg)); + msg.msg_iov = iov; + msg.msg_iovlen = iov_cnt; + do { + ret = do_send + ? sendmsg(sockfd, &msg, 0) + : recvmsg(sockfd, &msg, 0); + } while (ret < 0 && errno == EINTR); + return ret; +#else + /* else send piece-by-piece */ + /*XXX Note: windows has WSASend() and WSARecv() */ + unsigned i; + size_t count = 0; + for(i = 0; i < iov_cnt; ++i) { + ssize_t r = do_send + ? send(sockfd, iov[i].iov_base, iov[i].iov_len, 0) + : recv(sockfd, iov[i].iov_base, iov[i].iov_len, 0); + if (r > 0) { + ret += r; + } else if (!r) { + break; + } else if (errno == EINTR) { + continue; + } else { + /* else it is some "other" error, + * only return if there was no data processed. */ + if (ret == 0) { + return -1; + } + break; + } + } + return count; +#endif +} + +ssize_t iov_send_recv(int sockfd, struct iovec *iov, unsigned iov_cnt, + size_t offset, size_t bytes, + bool do_send) +{ + ssize_t ret; + size_t lastlen; + unsigned si, ei; /* start and end indexes */ + + /* Find the start position, skipping `offset' bytes: + * first, skip all full-sized vector elements, */ + for(si = 0; si < iov_cnt && offset >= iov[si].iov_len; ++si) { + offset -= iov[si].iov_len; + } + if (offset) { + assert(si < iov_cnt); + /* second, skip `offset' bytes from the (now) first element, + * undo it on exit */ + iov[si].iov_base += offset; + iov[si].iov_len -= offset; + } + /* Find the end position skipping `bytes' bytes: */ + /* first, skip all full-sized elements */ + for(ei = si; ei < iov_cnt && iov[ei].iov_len <= bytes; ++ei) { + bytes -= iov[ei].iov_len; + } + if (bytes) { + /* second, fixup the last element, and remember + * the length we've cut from the end of it in `bytes' */ + assert(ei < iov_cnt); + assert(iov[ei].iov_len > bytes); + lastlen = iov[ei].iov_len; + iov[ei].iov_len = bytes; + ++ei; + } else { + lastlen = 0; + } + + ret = do_send_recv(sockfd, iov + si, ei - si, do_send); + + /* Undo the changes above */ + if (offset) { + iov[si].iov_base -= offset; + iov[si].iov_len += offset; + } + if (lastlen) { + --ei; + iov[ei].iov_len = lastlen; + } + + return ret; +} + + void iov_hexdump(const struct iovec *iov, const unsigned int iov_cnt, FILE *fp, const char *prefix, size_t limit) { diff --git a/iov.h b/iov.h index 9b6a883..381f37a 100644 --- a/iov.h +++ b/iov.h @@ -60,7 +60,7 @@ size_t iov_memset(const struct iovec *iov, const unsigned int iov_cnt, * `offset' bytes in the beginning of iovec buffer are skipped and * next `bytes' bytes are used, which must be within data of iovec. * - * r = iov_send_recv(sockfd, iov, offset, bytes, true); + * r = iov_send_recv(sockfd, iov, iovcnt, offset, bytes, true); * * is logically equivalent to * @@ -68,13 +68,16 @@ size_t iov_memset(const struct iovec *iov, const unsigned int iov_cnt, * iov_to_buf(iov, iovcnt, offset, buf, bytes); * r = send(sockfd, buf, bytes, 0); * free(buf); + * + * For iov_send_recv() _whole_ area being sent or received + * should be within the iovec, not only beginning of it. */ -ssize_t iov_send_recv(int sockfd, struct iovec *iov, +ssize_t iov_send_recv(int sockfd, struct iovec *iov, unsigned iov_cnt, size_t offset, size_t bytes, bool do_send); -#define iov_recv(sockfd, iov, offset, bytes) \ - iov_send_recv(sockfd, iov, offset, bytes, false) -#define iov_send(sockfd, iov, offset, bytes) \ - iov_send_recv(sockfd, iov, offset, bytes, true) +#define iov_recv(sockfd, iov, iov_cnt, offset, bytes) \ + iov_send_recv(sockfd, iov, iov_cnt, offset, bytes, false) +#define iov_send(sockfd, iov, iov_cnt, offset, bytes) \ + iov_send_recv(sockfd, iov, iov_cnt, offset, bytes, true) /** * Produce a text hexdump of iovec `iov' with `iov_cnt' number of elements diff --git a/qemu-coroutine-io.c b/qemu-coroutine-io.c index 6693c78..5734965 100644 --- a/qemu-coroutine-io.c +++ b/qemu-coroutine-io.c @@ -34,7 +34,7 @@ qemu_co_sendv_recvv(int sockfd, struct iovec *iov, unsigned iov_cnt, size_t done = 0; ssize_t ret; while (done < bytes) { - ret = iov_send_recv(sockfd, iov, + ret = iov_send_recv(sockfd, iov, iov_cnt, offset + done, bytes - done, do_send); if (ret > 0) { done += ret;
Make it much more understandable, add a missing iov_cnt argument (number of iovs in the iov), and add comments to it. The new implementation has been extensively tested by splitting a large buffer into many small randomly-sized chunks, sending it over socket to another, slow process and verifying the receiving data is the same. Signed-off-by: Michael Tokarev <mjt@tls.msk.ru> --- cutils.c | 83 ---------------------------------------- iov.c | 105 +++++++++++++++++++++++++++++++++++++++++++++++++++ iov.h | 15 ++++--- qemu-coroutine-io.c | 2 +- 4 files changed, 115 insertions(+), 90 deletions(-)