@@ -40,6 +40,7 @@ typedef struct NetSocketState {
int state; /* 0 = getting length, 1 = getting data */
unsigned int index;
unsigned int packet_len;
+ unsigned int send_index; /* number of bytes sent (only SOCK_STREAM) */
uint8_t buf[4096];
struct sockaddr_in dgram_dst; /* contains inet host and port destination iff connectionless (SOCK_DGRAM) */
IOHandler *send_fn; /* differs between SOCK_STREAM/SOCK_DGRAM */
@@ -88,15 +89,54 @@ static void net_socket_writable(void *opaque)
qemu_flush_queued_packets(&s->nc);
}
-/* XXX: we consider we can send the whole packet without blocking */
static ssize_t net_socket_receive(NetClientState *nc, const uint8_t *buf, size_t size)
{
NetSocketState *s = DO_UPCAST(NetSocketState, nc, nc);
- uint32_t len;
- len = htonl(size);
+ struct iovec iov[2];
+ int iovcnt = 0;
+ ssize_t ret;
+ ssize_t total_size = 0;
+ uint32_t len = htonl(size);
+ unsigned int buf_index;
+
+ /* Length header */
+ if (s->send_index < sizeof(len)) {
+ iov[iovcnt].iov_base = (uint8_t *)&len + s->send_index;
+ iov[iovcnt].iov_len = sizeof(len) - s->send_index;
+ total_size += iov[iovcnt].iov_len;
+ iovcnt++;
+
+ buf_index = 0;
+ } else {
+ buf_index = s->send_index - sizeof(len);
+ }
+
+ assert(buf_index < size);
+
+ /* Payload buffer */
+ iov[iovcnt].iov_base = (uint8_t *)buf + buf_index;
+ iov[iovcnt].iov_len = size - buf_index;
+ total_size += iov[iovcnt].iov_len;
+ iovcnt++;
+
+ do {
+ ret = writev(s->fd, iov, iovcnt);
+ } while (ret == -1 && ret == EINTR);
- send_all(s->fd, (const uint8_t *)&len, sizeof(len));
- return send_all(s->fd, buf, size);
+ if (ret == -1 && errno == EAGAIN) {
+ ret = 0; /* handled further down */
+ }
+ if (ret == -1) {
+ s->send_index = 0;
+ return -errno;
+ }
+ if (ret < total_size) {
+ s->send_index += ret;
+ net_socket_write_poll(s, true);
+ return 0;
+ }
+ s->send_index = 0;
+ return size;
}
static ssize_t net_socket_receive_dgram(NetClientState *nc, const uint8_t *buf, size_t size)
Replace spinning send_all() with a proper non-blocking send. When the socket write buffer limit is reached, we should stop trying to send and wait for the socket to become writable again. Non-blocking TCP sockets can return in two different ways when the write buffer limit is reached: 1. ret = -1 and errno = EAGAIN/EWOULDBLOCK. No data has been written. 2. ret < total_size. Short write, only part of the message was transmitted. Handle both cases and keep track of how many bytes have been written in s->send_index. (This includes the 'length' header before the actual payload buffer.) Signed-off-by: Stefan Hajnoczi <stefanha@linux.vnet.ibm.com> --- net/socket.c | 50 +++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 45 insertions(+), 5 deletions(-)