diff mbox

[v1,RFC,23/34] io: add QIOChannelSocket class

Message ID 1429280557-8887-24-git-send-email-berrange@redhat.com
State New
Headers show

Commit Message

Daniel P. Berrangé April 17, 2015, 2:22 p.m. UTC
Implement a QIOChannel subclass that supports sockets I/O

TBD check errno handling of windows port & fix watch impl

Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
---
 include/io/channel-socket.h | 168 +++++++++++++
 io/Makefile.objs            |   1 +
 io/channel-socket.c         | 572 ++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 741 insertions(+)
 create mode 100644 include/io/channel-socket.h
 create mode 100644 io/channel-socket.c

Comments

Paolo Bonzini April 17, 2015, 3:28 p.m. UTC | #1
On 17/04/2015 16:22, Daniel P. Berrange wrote:
> Implement a QIOChannel subclass that supports sockets I/O
> 
> TBD check errno handling of windows port & fix watch impl
> 
> Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
> ---
>  include/io/channel-socket.h | 168 +++++++++++++
>  io/Makefile.objs            |   1 +
>  io/channel-socket.c         | 572 ++++++++++++++++++++++++++++++++++++++++++++
>  3 files changed, 741 insertions(+)
>  create mode 100644 include/io/channel-socket.h
>  create mode 100644 io/channel-socket.c
> 
> diff --git a/include/io/channel-socket.h b/include/io/channel-socket.h
> new file mode 100644
> index 0000000..b95349b
> --- /dev/null
> +++ b/include/io/channel-socket.h
> @@ -0,0 +1,168 @@
> +/*
> + * QEMU I/O channels sockets driver
> + *
> + * Copyright (c) 2015 Red Hat, Inc.
> + *
> + * This library is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2 of the License, or (at your option) any later version.
> + *
> + * This library is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with this library; if not, see <http://www.gnu.org/licenses/>.
> + *
> + */
> +
> +#ifndef QIO_CHANNEL_SOCKET_H__
> +#define QIO_CHANNEL_SOCKET_H__
> +
> +#include "io/channel.h"
> +#include "qemu/sockets.h"
> +
> +#define TYPE_QIO_CHANNEL_SOCKET "qemu:io-channel-socket"
> +#define QIO_CHANNEL_SOCKET(obj)                                     \
> +    OBJECT_CHECK(QIOChannelSocket, (obj), TYPE_QIO_CHANNEL_SOCKET)
> +
> +typedef struct QIOChannelSocket QIOChannelSocket;
> +
> +/**
> + * QIOChannelSocket:
> + *
> + * The QIOChannelSocket class provides a channel implementation
> + * that can transport data over a UNIX socket or TCP socket.
> + * Beyond the core channel API, it also provides functionality
> + * for accepting client connections, tuning some socket
> + * parameters and getting socket address strings.
> + */
> +
> +struct QIOChannelSocket {
> +    QIOChannel parent;
> +    int fd;
> +    struct sockaddr_storage localAddr;
> +    socklen_t localAddrLen;
> +    struct sockaddr_storage remoteAddr;
> +    socklen_t remoteAddrLen;
> +};
> +
> +
> +/**
> + * qio_channel_socket_new_fd:
> + * @fd: the socket file descriptor
> + * @errp: pointer to an uninitialized error object
> + *
> + * Create a channel for performing I/O on the socket
> + * connection represented by the file descriptor @fd.
> + *
> + * Returns: the socket channel object, or NULL on error
> + */
> +QIOChannelSocket *
> +qio_channel_socket_new_fd(int fd,
> +                          Error **errp);
> +
> +/**
> + * qio_channel_socket_get_local_addr_string:
> + * @ioc: the socket channel object
> + * @hostname: pointer to be filled with hostname string
> + * @servicename: pointer to be filled with servicename string
> + * @family: pointer to be filled with network address family
> + * @errp: pointer to an uninitialized error object
> + *
> + * Get the string representation of the local socket
> + * address. The address information will be stored in
> + * the @hostname, @servicename and @family parameters.
> + * The @hostname and @servicename strings will be
> + * allocated to the size required and should be free
> + * with g_free() when no longer required
> + *
> + * Returns: 0 on success, -1 on error
> + */
> +int
> +qio_channel_socket_get_local_addr_string(QIOChannelSocket *ioc,
> +                                         char **hostname,
> +                                         char **servicename,
> +                                         NetworkAddressFamily *family,
> +                                         Error **errp);
> +
> +/**
> + * qio_channel_socket_get_remote_addr_string:
> + * @ioc: the socket channel object
> + * @hostname: pointer to be filled with hostname string
> + * @servicename: pointer to be filled with servicename string
> + * @family: pointer to be filled with network address family
> + * @errp: pointer to an uninitialized error object
> + *
> + * Get the string representation of the remote socket
> + * address. The address information will be stored in
> + * the @hostname, @servicename and @family parameters.
> + * The @hostname and @servicename strings will be
> + * allocated to the size required and should be free
> + * with g_free() when no longer required
> + *
> + * Returns: 0 on success, -1 on error
> + */
> +int
> +qio_channel_socket_get_remote_addr_string(QIOChannelSocket *ioc,
> +                                          char **hostname,
> +                                          char **servicename,
> +                                          NetworkAddressFamily *family,
> +                                          Error **errp);

Would it be possible to change these to use a SocketAddress* type?

> +
> +/**
> + * qio_channel_socket_set_nodelay:
> + * @ioc: the socket channel object
> + * @enabled: the new flag state
> + *
> + * Set the state of the NODELAY socket flag. If the

The function name is okay, but please write TCP_NODELAY in the
documentation, or talk about Nagle's algorithm instead of mentioning the
flag.

> + * @enabled parameter is true, then NODELAY will be
> + * set and data will be transmitted immediately. If
> + * @enabled is false, then data may be temporarily
> + * held for transmission to enable writes to be
> + * coallesced.
> + */
> +void
> +qio_channel_socket_set_nodelay(QIOChannelSocket *ioc,
> +                               bool enabled);
> +
> +/**
> + * qio_channel_socket_accept:
> + * @ioc: the socket channel object
> + * @errp: pointer to an uninitialized error object
> + *
> + * If the socket represents a server, then this accepts
> + * a new client connection. The returned channel will
> + * represent the connected client socket.
> + *
> + * Returns: the new client channel, or NULL on error
> + */
> +QIOChannelSocket *
> +qio_channel_socket_accept(QIOChannelSocket *ioc,
> +                          Error **errp);

Does it make sense for a passive socket to be a QIOChannelSocket?  We
have already a pretty decent API in util/qemu-sockets.c, and
QIOChannelSocket will become more similar to qemu-sockets if you switch
to SocketAddress.  Perhaps this function can just take a file descriptor?

Paolo

> +
> +typedef enum {
> +    QIO_CHANNEL_SOCKET_SHUTDOWN_BOTH,
> +    QIO_CHANNEL_SOCKET_SHUTDOWN_READ,
> +    QIO_CHANNEL_SOCKET_SHUTDOWN_WRITE,
> +} QIOChannelSocketShutdown;
> +
> +/**
> + * qio_channel_socket_shutdown:
> + * @ioc: the socket channel object
> + * @how: the direction to shutdown
> + * @errp: pointer to an uninitialized error object
> + *
> + * Shutdowns transmission or receiving on a socket
> + * without closing the socket file descriptor.
> + *
> + * Returns: 0 on success, -1 on error
> + */
> +int
> +qio_channel_socket_shutdown(QIOChannelSocket *ioc,
> +                            QIOChannelSocketShutdown how,
> +                            Error **errp);
> +
> +#endif /* QIO_CHANNEL_SOCKET_H__ */
> diff --git a/io/Makefile.objs b/io/Makefile.objs
> index a776676..4f5e276 100644
> --- a/io/Makefile.objs
> +++ b/io/Makefile.objs
> @@ -1,2 +1,3 @@
>  util-obj-y += channel.o
>  util-obj-y += channel-unix.o
> +util-obj-y += channel-socket.o
> diff --git a/io/channel-socket.c b/io/channel-socket.c
> new file mode 100644
> index 0000000..5eacf38
> --- /dev/null
> +++ b/io/channel-socket.c
> @@ -0,0 +1,572 @@
> +/*
> + * QEMU I/O channels sockets driver
> + *
> + * Copyright (c) 2015 Red Hat, Inc.
> + *
> + * This library is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2 of the License, or (at your option) any later version.
> + *
> + * This library is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with this library; if not, see <http://www.gnu.org/licenses/>.
> + *
> + */
> +
> +#include <glib/gi18n.h>
> +
> +#include "io/channel-socket.h"
> +#include "io/channel-unix.h"
> +
> +#define SOCKET_MAX_FDS 16
> +
> +static int
> +qio_channel_socket_get_addr_string(struct sockaddr_storage *sa,
> +                                   socklen_t salen,
> +                                   char **hostname,
> +                                   char **servicename,
> +                                   NetworkAddressFamily *family,
> +                                   Error **errp)
> +{
> +    char host[NI_MAXHOST];
> +    char serv[NI_MAXSERV];
> +    int ret;
> +
> +    ret = getnameinfo((struct sockaddr *)sa, salen,
> +                      host, sizeof(host),
> +                      serv, sizeof(serv),
> +                      NI_NUMERICHOST | NI_NUMERICSERV);
> +    if (ret != 0) {
> +        error_setg(errp, "Cannot format numeric socket address: %s\n",
> +                   gai_strerror(ret));
> +        return -1;
> +    }
> +
> +    *hostname = g_strdup(host);
> +    *servicename = g_strdup(serv);
> +    *family = inet_netfamily(sa->ss_family);
> +    return 0;
> +}
> +
> +int
> +qio_channel_socket_get_local_addr_string(QIOChannelSocket *ioc,
> +                                         char **hostname,
> +                                         char **servicename,
> +                                         NetworkAddressFamily *family,
> +                                         Error **errp)
> +{
> +    return qio_channel_socket_get_addr_string(&ioc->localAddr,
> +                                              ioc->localAddrLen,
> +                                              hostname,
> +                                              servicename,
> +                                              family,
> +                                              errp);
> +}
> +
> +int
> +qio_channel_socket_get_remote_addr_string(QIOChannelSocket *ioc,
> +                                          char **hostname,
> +                                          char **servicename,
> +                                          NetworkAddressFamily *family,
> +                                          Error **errp)
> +{
> +    return qio_channel_socket_get_addr_string(&ioc->remoteAddr,
> +                                              ioc->remoteAddrLen,
> +                                              hostname,
> +                                              servicename,
> +                                              family,
> +                                              errp);
> +}
> +
> +QIOChannelSocket *
> +qio_channel_socket_new_fd(int fd,
> +                          Error **errp)
> +{
> +    QIOChannelSocket *ioc;
> +
> +    ioc = QIO_CHANNEL_SOCKET(object_new(TYPE_QIO_CHANNEL_SOCKET));
> +
> +    ioc->fd = fd;
> +    ioc->remoteAddrLen = sizeof(ioc->remoteAddr);
> +    ioc->localAddrLen = sizeof(ioc->localAddr);
> +
> +    if (getpeername(fd, (struct sockaddr *)&ioc->remoteAddr,
> +                    &ioc->remoteAddrLen) < 0) {
> +        if (socket_error() == ENOTCONN) {
> +            memset(&ioc->remoteAddr, 0, sizeof(ioc->remoteAddr));
> +            ioc->remoteAddrLen = sizeof(ioc->remoteAddr);
> +        } else {
> +            error_setg_errno(errp, socket_error(), "%s",
> +                             _("Unable to query remote socket address"));
> +            goto error;
> +        }
> +    }
> +
> +    if (getsockname(fd, (struct sockaddr *)&ioc->localAddr,
> +                    &ioc->localAddrLen) < 0) {
> +        error_setg_errno(errp, socket_error(), "%s",
> +                         _("Unable to query local socket address"));
> +        goto error;
> +    }
> +
> +    return ioc;
> +
> + error:
> +    ioc->fd = -1; /* Let the caller close FD on failure */
> +    object_unref(OBJECT(ioc));
> +    return NULL;
> +}
> +
> +QIOChannelSocket *
> +qio_channel_socket_accept(QIOChannelSocket *ioc,
> +                          Error **errp)
> +{
> +    QIOChannelSocket *cioc;
> +
> +    cioc = QIO_CHANNEL_SOCKET(object_new(TYPE_QIO_CHANNEL_SOCKET));
> +    cioc->fd = -1;
> +    cioc->remoteAddrLen = sizeof(ioc->remoteAddr);
> +    cioc->localAddrLen = sizeof(ioc->localAddr);
> +
> + retry:
> +    cioc->fd = accept(ioc->fd, (struct sockaddr *)&cioc->remoteAddr,
> +                      &cioc->remoteAddrLen);
> +    if (cioc->fd < 0) {
> +        if (socket_error() == EINTR) {
> +            goto retry;
> +        }
> +        goto error;
> +    }
> +
> +    if (getsockname(cioc->fd, (struct sockaddr *)&ioc->localAddr,
> +                    &ioc->localAddrLen) < 0) {
> +        error_setg_errno(errp, socket_error(), "%s",
> +                         _("Unable to query local socket address"));
> +        goto error;
> +    }
> +
> +    return cioc;
> +
> + error:
> +    object_unref(OBJECT(cioc));
> +    return NULL;
> +}
> +
> +static void qio_channel_socket_init(Object *obj)
> +{
> +    QIOChannelSocket *ioc = QIO_CHANNEL_SOCKET(obj);
> +    ioc->fd = -1;
> +}
> +
> +static void qio_channel_socket_finalize(Object *obj)
> +{
> +    QIOChannelSocket *ioc = QIO_CHANNEL_SOCKET(obj);
> +    if (ioc->fd != -1) {
> +        close(ioc->fd);
> +        ioc->fd = -1;
> +    }
> +}
> +
> +static bool qio_channel_socket_has_feature(QIOChannel *ioc,
> +                                           QIOChannelFeature feature)
> +{
> +    QIOChannelSocket *sioc = QIO_CHANNEL_SOCKET(ioc);
> +
> +    switch (feature) {
> +    case QIO_CHANNEL_FEATURE_FD_PASS:
> +#ifndef WIN32
> +        if (sioc->localAddr.ss_family == AF_UNIX) {
> +            return true;
> +        } else {
> +#endif /* WIN32 */
> +            return false;
> +#ifndef WIN32
> +        }
> +#endif /* WIN32 */
> +    default:
> +        return false;
> +    }
> +}
> +
> +
> +#ifndef WIN32
> +static void qio_channel_socket_copy_fds(struct msghdr *msg,
> +                                        int **fds, size_t *nfds)
> +{
> +    struct cmsghdr *cmsg;
> +
> +    *nfds = 0;
> +    *fds = NULL;
> +
> +    for (cmsg = CMSG_FIRSTHDR(msg); cmsg; cmsg = CMSG_NXTHDR(msg, cmsg)) {
> +        int fd_size, i;
> +        int gotfds;
> +
> +        if (cmsg->cmsg_len < CMSG_LEN(sizeof(int)) ||
> +            cmsg->cmsg_level != SOL_SOCKET ||
> +            cmsg->cmsg_type != SCM_RIGHTS) {
> +            continue;
> +        }
> +
> +        fd_size = cmsg->cmsg_len - CMSG_LEN(0);
> +
> +        if (!fd_size) {
> +            continue;
> +        }
> +
> +        gotfds = fd_size / sizeof(int);
> +        *fds = g_renew(int, *fds, *nfds + gotfds);
> +        memcpy(*fds + *nfds, CMSG_DATA(cmsg), fd_size);
> +
> +        for (i = 0; i < gotfds; i++) {
> +            int fd = (*fds)[*nfds + i];
> +            if (fd < 0) {
> +                continue;
> +            }
> +
> +            /* O_NONBLOCK is preserved across SCM_RIGHTS so reset it */
> +            qemu_set_block(fd);
> +
> +#ifndef MSG_CMSG_CLOEXEC
> +            qemu_set_cloexec(fd);
> +#endif
> +        }
> +        *nfds += gotfds;
> +    }
> +}
> +
> +
> +static ssize_t qio_channel_socket_readv(QIOChannel *ioc,
> +                                        const struct iovec *iov,
> +                                        size_t niov,
> +                                        int **fds,
> +                                        size_t *nfds,
> +                                        int flags,
> +                                        Error **errp)
> +{
> +    QIOChannelSocket *sioc = QIO_CHANNEL_SOCKET(ioc);
> +    ssize_t ret;
> +    struct msghdr msg = { NULL, };
> +    char control[CMSG_SPACE(sizeof(int) * SOCKET_MAX_FDS)];
> +    int sflags = 0;
> +
> +    if (flags & ~QIO_CHANNEL_READ_PEEK) {
> +        error_setg_errno(errp, EINVAL,
> +                         _("Flags %x are not supported"),
> +                         flags & ~QIO_CHANNEL_READ_PEEK);
> +        return -1;
> +    }
> +
> +#ifdef MSG_CMSG_CLOEXEC
> +    sflags |= MSG_CMSG_CLOEXEC;
> +#endif
> +    if (flags & QIO_CHANNEL_READ_PEEK) {
> +        sflags |= MSG_PEEK;
> +    }
> +
> +    msg.msg_iov = (struct iovec *)iov;
> +    msg.msg_iovlen = niov;
> +    if (fds && nfds) {
> +        msg.msg_control = control;
> +        msg.msg_controllen = sizeof(control);
> +    }
> +
> + retry:
> +    ret = recvmsg(sioc->fd, &msg, sflags);
> +    if (ret < 0) {
> +        if (socket_error() == EAGAIN) {
> +            return -2;
> +        }
> +        if (socket_error() == EINTR) {
> +            goto retry;
> +        }
> +
> +        error_setg_errno(errp, socket_error(), "%s",
> +                         _("Unable to read from socket"));
> +        return -1;
> +    }
> +
> +    if (fds && nfds) {
> +        qio_channel_socket_copy_fds(&msg, fds, nfds);
> +    }
> +
> +    return ret;
> +}
> +
> +static ssize_t qio_channel_socket_writev(QIOChannel *ioc,
> +                                         const struct iovec *iov,
> +                                         size_t niov,
> +                                         int *fds,
> +                                         size_t nfds,
> +                                         int flags G_GNUC_UNUSED,
> +                                         Error **errp)
> +{
> +    QIOChannelSocket *sioc = QIO_CHANNEL_SOCKET(ioc);
> +    ssize_t ret;
> +    struct msghdr msg = { NULL, };
> +
> +    if (flags) {
> +        error_setg_errno(errp, EINVAL,
> +                         _("Flags %x are not supported"),
> +                         flags);
> +        return -1;
> +    }
> +
> +    msg.msg_iov = (struct iovec *)iov;
> +    msg.msg_iovlen = niov;
> +
> +    if (nfds) {
> +        char control[CMSG_SPACE(sizeof(int) * SOCKET_MAX_FDS)];
> +        size_t fdsize = sizeof(int) * nfds;
> +        struct cmsghdr *cmsg;
> +
> +        if (nfds > SOCKET_MAX_FDS) {
> +            error_setg_errno(errp, -EINVAL,
> +                             _("Only %d FDs can be sent, got %zu"),
> +                             SOCKET_MAX_FDS, nfds);
> +            return -1;
> +        }
> +
> +        msg.msg_control = control;
> +        msg.msg_controllen = CMSG_SPACE(sizeof(int) * nfds);
> +
> +        cmsg = CMSG_FIRSTHDR(&msg);
> +        cmsg->cmsg_len = CMSG_LEN(fdsize);
> +        cmsg->cmsg_level = SOL_SOCKET;
> +        cmsg->cmsg_type = SCM_RIGHTS;
> +        memcpy(CMSG_DATA(cmsg), fds, fdsize);
> +    }
> +
> + retry:
> +    ret = sendmsg(sioc->fd, &msg, 0);
> +    if (ret <= 0) {
> +        if (socket_error() == EAGAIN) {
> +            return -2;
> +        }
> +        if (socket_error() == EINTR) {
> +            goto retry;
> +        }
> +        error_setg_errno(errp, socket_error(), "%s",
> +                         _("Unable to write to socket"));
> +        return -1;
> +    }
> +    return ret;
> +}
> +#else /* WIN32 */
> +static ssize_t qio_channel_socket_readv(QIOChannel *ioc,
> +                                        const struct iovec *iov,
> +                                        size_t niov,
> +                                        int **fds,
> +                                        size_t *nfds,
> +                                        int flags,
> +                                        Error **errp)
> +{
> +    QIOChannelSocket *sioc = QIO_CHANNEL_SOCKET(ioc);
> +    ssize_t done = 0;
> +    ssize_t i;
> +
> +    if (flags) {
> +        error_setg_errno(errp, EINVAL,
> +                         _("Flags %x are not supported"),
> +                         flags);
> +        return -1;
> +    }
> +    if (fds || nfds) {
> +        error_setg_errno(errp, EINVAL, "%s",
> +                         _("Channel does not support file descriptor passing"));
> +        return -1;
> +    }
> +
> +    for (i = 0; i < niov; i++) {
> +        ssize_t ret;
> +    retry:
> +        ret = recv(sioc->fd,
> +                   iov[i].iov_base,
> +                   iov[i].iov_len,
> +                   0);
> +        if (ret < 0) {
> +            if (socket_error() == EAGAIN) {
> +                if (done) {
> +                    return done;
> +                } else {
> +                    return QIO_CHANNEL_ERR_BLOCK;
> +                }
> +            } else if (socket_error() == EINTR) {
> +                goto retry;
> +            } else {
> +                error_setg_errno(errp, socket_error(), "%s",
> +                                 _("Unable to write to socket"));
> +                return -1;
> +            }
> +        }
> +        done += ret;
> +        if (ret < iov[i].iov_len) {
> +            return done;
> +        }
> +    }
> +
> +    return done;
> +}
> +
> +static ssize_t qio_channel_socket_writev(QIOChannel *ioc,
> +                                         const struct iovec *iov,
> +                                         size_t niov,
> +                                         int *fds,
> +                                         size_t nfds,
> +                                         int flags G_GNUC_UNUSED,
> +                                         Error **errp)
> +{
> +    QIOChannelSocket *sioc = QIO_CHANNEL_SOCKET(ioc);
> +    ssize_t done = 0;
> +    ssize_t i;
> +
> +    if (flags) {
> +        error_setg_errno(errp, EINVAL,
> +                         _("Flags %x are not supported"),
> +                         flags);
> +        return -1;
> +    }
> +    if (fds || nfds) {
> +        error_setg_errno(errp, EINVAL, "%s",
> +                         _("Channel does not support file descriptor passing"));
> +        return -1;
> +    }
> +
> +    for (i = 0; i < niov; i++) {
> +        ssize_t ret;
> +    retry:
> +        ret = send(sioc->fd,
> +                   iov[i].iov_base,
> +                   iov[i].iov_len,
> +                   0);
> +        if (ret < 0) {
> +            if (socket_error() == EAGAIN) {
> +                if (done) {
> +                    return done;
> +                } else {
> +                    return QIO_CHANNEL_ERR_BLOCK;
> +                }
> +            } else if (socket_error() == EINTR) {
> +                goto retry;
> +            } else {
> +                error_setg_errno(errp, socket_error(), "%s",
> +                                 _("Unable to write to socket"));
> +                return -1;
> +            }
> +        }
> +        done += ret;
> +        if (ret < iov[i].iov_len) {
> +            return done;
> +        }
> +    }
> +
> +    return done;
> +}
> +#endif /* WIN32 */
> +
> +static void qio_channel_socket_set_blocking(QIOChannel *ioc,
> +                                            bool enabled)
> +{
> +    QIOChannelSocket *sioc = QIO_CHANNEL_SOCKET(ioc);
> +
> +    if (enabled) {
> +        qemu_set_block(sioc->fd);
> +    } else {
> +        qemu_set_nonblock(sioc->fd);
> +    }
> +}
> +
> +void
> +qio_channel_socket_set_nodelay(QIOChannelSocket *ioc,
> +                               bool enabled)
> +{
> +    QIOChannelSocket *sioc = QIO_CHANNEL_SOCKET(ioc);
> +    int v = enabled ? 1 : 0;
> +
> +    qemu_setsockopt(sioc->fd, IPPROTO_TCP, TCP_NODELAY, &v, sizeof(v));
> +}
> +
> +static int qio_channel_socket_close(QIOChannel *ioc,
> +                                    Error **errp)
> +{
> +    QIOChannelSocket *sioc = QIO_CHANNEL_SOCKET(ioc);
> +
> +    if (closesocket(sioc->fd) < 0) {
> +        sioc->fd = -1;
> +        error_setg_errno(errp, socket_error(), "%s",
> +                         _("Unable to close socket"));
> +        return -1;
> +    }
> +    sioc->fd = -1;
> +    return 0;
> +}
> +
> +int
> +qio_channel_socket_shutdown(QIOChannelSocket *ioc,
> +                            QIOChannelSocketShutdown how,
> +                            Error **errp)
> +{
> +    int sockhow;
> +    switch (how) {
> +    case QIO_CHANNEL_SOCKET_SHUTDOWN_READ:
> +        sockhow = SHUT_RD;
> +        break;
> +    case QIO_CHANNEL_SOCKET_SHUTDOWN_WRITE:
> +        sockhow = SHUT_WR;
> +        break;
> +    case QIO_CHANNEL_SOCKET_SHUTDOWN_BOTH:
> +    default:
> +        sockhow = SHUT_RDWR;
> +        break;
> +    }
> +
> +    if (shutdown(ioc->fd, sockhow) < 0) {
> +        error_setg_errno(errp, socket_error(), "%s",
> +                         _("Unable to shutdown socket"));
> +        return -1;
> +    }
> +    return 0;
> +}
> +
> +static GSource *qio_channel_socket_create_watch(QIOChannel *ioc,
> +                                                GIOCondition condition)
> +{
> +    QIOChannelSocket *sioc = QIO_CHANNEL_SOCKET(ioc);
> +    return qio_channel_unix_create_fd_watch(ioc,
> +                                            sioc->fd,
> +                                            condition);
> +}
> +
> +static void qio_channel_socket_class_init(ObjectClass *klass,
> +                                          void *class_data G_GNUC_UNUSED)
> +{
> +    QIOChannelClass *ioc_klass = QIO_CHANNEL_CLASS(klass);
> +
> +    ioc_klass->io_has_feature = qio_channel_socket_has_feature;
> +    ioc_klass->io_writev = qio_channel_socket_writev;
> +    ioc_klass->io_readv = qio_channel_socket_readv;
> +    ioc_klass->io_set_blocking = qio_channel_socket_set_blocking;
> +    ioc_klass->io_close = qio_channel_socket_close;
> +    ioc_klass->io_create_watch = qio_channel_socket_create_watch;
> +}
> +
> +static const TypeInfo qio_channel_socket_info = {
> +    .parent = TYPE_QIO_CHANNEL,
> +    .name = TYPE_QIO_CHANNEL_SOCKET,
> +    .instance_size = sizeof(QIOChannelSocket),
> +    .instance_init = qio_channel_socket_init,
> +    .instance_finalize = qio_channel_socket_finalize,
> +    .class_init = qio_channel_socket_class_init,
> +};
> +
> +static void qio_channel_socket_register_types(void)
> +{
> +    type_register_static(&qio_channel_socket_info);
> +}
> +
> +type_init(qio_channel_socket_register_types);
>
Daniel P. Berrangé April 17, 2015, 3:52 p.m. UTC | #2
On Fri, Apr 17, 2015 at 05:28:09PM +0200, Paolo Bonzini wrote:
> 
> 
> On 17/04/2015 16:22, Daniel P. Berrange wrote:
> > Implement a QIOChannel subclass that supports sockets I/O
> > 
> > TBD check errno handling of windows port & fix watch impl
> > 
> > Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
> > ---
> >  include/io/channel-socket.h | 168 +++++++++++++
> >  io/Makefile.objs            |   1 +
> >  io/channel-socket.c         | 572 ++++++++++++++++++++++++++++++++++++++++++++
> >  3 files changed, 741 insertions(+)
> >  create mode 100644 include/io/channel-socket.h
> >  create mode 100644 io/channel-socket.c
> > 
> > diff --git a/include/io/channel-socket.h b/include/io/channel-socket.h
> > new file mode 100644
> > index 0000000..b95349b
> > --- /dev/null
> > +++ b/include/io/channel-socket.h


> > +/**
> > + * qio_channel_socket_get_local_addr_string:
> > + * @ioc: the socket channel object
> > + * @hostname: pointer to be filled with hostname string
> > + * @servicename: pointer to be filled with servicename string
> > + * @family: pointer to be filled with network address family
> > + * @errp: pointer to an uninitialized error object
> > + *
> > + * Get the string representation of the local socket
> > + * address. The address information will be stored in
> > + * the @hostname, @servicename and @family parameters.
> > + * The @hostname and @servicename strings will be
> > + * allocated to the size required and should be free
> > + * with g_free() when no longer required
> > + *
> > + * Returns: 0 on success, -1 on error
> > + */
> > +int
> > +qio_channel_socket_get_local_addr_string(QIOChannelSocket *ioc,
> > +                                         char **hostname,
> > +                                         char **servicename,
> > +                                         NetworkAddressFamily *family,
> > +                                         Error **errp);
> > +
> > +/**
> > + * qio_channel_socket_get_remote_addr_string:
> > + * @ioc: the socket channel object
> > + * @hostname: pointer to be filled with hostname string
> > + * @servicename: pointer to be filled with servicename string
> > + * @family: pointer to be filled with network address family
> > + * @errp: pointer to an uninitialized error object
> > + *
> > + * Get the string representation of the remote socket
> > + * address. The address information will be stored in
> > + * the @hostname, @servicename and @family parameters.
> > + * The @hostname and @servicename strings will be
> > + * allocated to the size required and should be free
> > + * with g_free() when no longer required
> > + *
> > + * Returns: 0 on success, -1 on error
> > + */
> > +int
> > +qio_channel_socket_get_remote_addr_string(QIOChannelSocket *ioc,
> > +                                          char **hostname,
> > +                                          char **servicename,
> > +                                          NetworkAddressFamily *family,
> > +                                          Error **errp);
> 
> Would it be possible to change these to use a SocketAddress* type?

Yeah, that looks like it is a viable option.

> > +
> > +/**
> > + * qio_channel_socket_set_nodelay:
> > + * @ioc: the socket channel object
> > + * @enabled: the new flag state
> > + *
> > + * Set the state of the NODELAY socket flag. If the
> 
> The function name is okay, but please write TCP_NODELAY in the
> documentation, or talk about Nagle's algorithm instead of mentioning the
> flag.

Sure will do.

> > + * @enabled parameter is true, then NODELAY will be
> > + * set and data will be transmitted immediately. If
> > + * @enabled is false, then data may be temporarily
> > + * held for transmission to enable writes to be
> > + * coallesced.
> > + */
> > +void
> > +qio_channel_socket_set_nodelay(QIOChannelSocket *ioc,
> > +                               bool enabled);
> > +
> > +/**
> > + * qio_channel_socket_accept:
> > + * @ioc: the socket channel object
> > + * @errp: pointer to an uninitialized error object
> > + *
> > + * If the socket represents a server, then this accepts
> > + * a new client connection. The returned channel will
> > + * represent the connected client socket.
> > + *
> > + * Returns: the new client channel, or NULL on error
> > + */
> > +QIOChannelSocket *
> > +qio_channel_socket_accept(QIOChannelSocket *ioc,
> > +                          Error **errp);
> 
> Does it make sense for a passive socket to be a QIOChannelSocket?  We
> have already a pretty decent API in util/qemu-sockets.c, and
> QIOChannelSocket will become more similar to qemu-sockets if you switch
> to SocketAddress.  Perhaps this function can just take a file descriptor?

I was somewhat undecided about that really - One of my todos is to see
about better integrating with qemu-sockets for the connection facilities,
so will consider this problem too.

Regards,
Daniel
Paolo Bonzini April 17, 2015, 4 p.m. UTC | #3
On 17/04/2015 17:52, Daniel P. Berrange wrote:
>>> > > +QIOChannelSocket *
>>> > > +qio_channel_socket_accept(QIOChannelSocket *ioc,
>>> > > +                          Error **errp);
>> > 
>> > Does it make sense for a passive socket to be a QIOChannelSocket?  We
>> > have already a pretty decent API in util/qemu-sockets.c, and
>> > QIOChannelSocket will become more similar to qemu-sockets if you switch
>> > to SocketAddress.  Perhaps this function can just take a file descriptor?
> I was somewhat undecided about that really - One of my todos is to see
> about better integrating with qemu-sockets for the connection facilities,
> so will consider this problem too.

Hmm, I guess it makes sense to have the passive socket as a QOM object,
so it is okay.

Paolo
Gerd Hoffmann April 20, 2015, 7:18 a.m. UTC | #4
On Fr, 2015-04-17 at 15:22 +0100, Daniel P. Berrange wrote:
> Implement a QIOChannel subclass that supports sockets I/O
> 
> TBD check errno handling of windows port & fix watch impl

> +struct QIOChannelSocket {
> +    QIOChannel parent;
> +    int fd;
> +    struct sockaddr_storage localAddr;
> +    socklen_t localAddrLen;
> +    struct sockaddr_storage remoteAddr;
> +    socklen_t remoteAddrLen;
> +};

Looks like this supports a single listening socket only, correct?  That
is a long-standing issue we have in qemu.  Listening on both ipv4 and
ipv6 doesn't work, except when binding the socket to the ipv6 wildcard
address which can accept ipv4 connects too on most systems.

Would be nice to tackle that while putting the socket code upside down
anyway.

cheers,
  Gerd
Daniel P. Berrangé April 23, 2015, 12:31 p.m. UTC | #5
On Mon, Apr 20, 2015 at 09:18:49AM +0200, Gerd Hoffmann wrote:
> On Fr, 2015-04-17 at 15:22 +0100, Daniel P. Berrange wrote:
> > Implement a QIOChannel subclass that supports sockets I/O
> > 
> > TBD check errno handling of windows port & fix watch impl
> 
> > +struct QIOChannelSocket {
> > +    QIOChannel parent;
> > +    int fd;
> > +    struct sockaddr_storage localAddr;
> > +    socklen_t localAddrLen;
> > +    struct sockaddr_storage remoteAddr;
> > +    socklen_t remoteAddrLen;
> > +};
> 
> Looks like this supports a single listening socket only, correct?  That
> is a long-standing issue we have in qemu.  Listening on both ipv4 and
> ipv6 doesn't work, except when binding the socket to the ipv6 wildcard
> address which can accept ipv4 connects too on most systems.
> 
> Would be nice to tackle that while putting the socket code upside down
> anyway.

Thanks for reminding me about this limitation - I'll have a think about
how we can solve this problem at the same time too.

Regards,
Daniel
diff mbox

Patch

diff --git a/include/io/channel-socket.h b/include/io/channel-socket.h
new file mode 100644
index 0000000..b95349b
--- /dev/null
+++ b/include/io/channel-socket.h
@@ -0,0 +1,168 @@ 
+/*
+ * QEMU I/O channels sockets driver
+ *
+ * Copyright (c) 2015 Red Hat, Inc.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+#ifndef QIO_CHANNEL_SOCKET_H__
+#define QIO_CHANNEL_SOCKET_H__
+
+#include "io/channel.h"
+#include "qemu/sockets.h"
+
+#define TYPE_QIO_CHANNEL_SOCKET "qemu:io-channel-socket"
+#define QIO_CHANNEL_SOCKET(obj)                                     \
+    OBJECT_CHECK(QIOChannelSocket, (obj), TYPE_QIO_CHANNEL_SOCKET)
+
+typedef struct QIOChannelSocket QIOChannelSocket;
+
+/**
+ * QIOChannelSocket:
+ *
+ * The QIOChannelSocket class provides a channel implementation
+ * that can transport data over a UNIX socket or TCP socket.
+ * Beyond the core channel API, it also provides functionality
+ * for accepting client connections, tuning some socket
+ * parameters and getting socket address strings.
+ */
+
+struct QIOChannelSocket {
+    QIOChannel parent;
+    int fd;
+    struct sockaddr_storage localAddr;
+    socklen_t localAddrLen;
+    struct sockaddr_storage remoteAddr;
+    socklen_t remoteAddrLen;
+};
+
+
+/**
+ * qio_channel_socket_new_fd:
+ * @fd: the socket file descriptor
+ * @errp: pointer to an uninitialized error object
+ *
+ * Create a channel for performing I/O on the socket
+ * connection represented by the file descriptor @fd.
+ *
+ * Returns: the socket channel object, or NULL on error
+ */
+QIOChannelSocket *
+qio_channel_socket_new_fd(int fd,
+                          Error **errp);
+
+/**
+ * qio_channel_socket_get_local_addr_string:
+ * @ioc: the socket channel object
+ * @hostname: pointer to be filled with hostname string
+ * @servicename: pointer to be filled with servicename string
+ * @family: pointer to be filled with network address family
+ * @errp: pointer to an uninitialized error object
+ *
+ * Get the string representation of the local socket
+ * address. The address information will be stored in
+ * the @hostname, @servicename and @family parameters.
+ * The @hostname and @servicename strings will be
+ * allocated to the size required and should be free
+ * with g_free() when no longer required
+ *
+ * Returns: 0 on success, -1 on error
+ */
+int
+qio_channel_socket_get_local_addr_string(QIOChannelSocket *ioc,
+                                         char **hostname,
+                                         char **servicename,
+                                         NetworkAddressFamily *family,
+                                         Error **errp);
+
+/**
+ * qio_channel_socket_get_remote_addr_string:
+ * @ioc: the socket channel object
+ * @hostname: pointer to be filled with hostname string
+ * @servicename: pointer to be filled with servicename string
+ * @family: pointer to be filled with network address family
+ * @errp: pointer to an uninitialized error object
+ *
+ * Get the string representation of the remote socket
+ * address. The address information will be stored in
+ * the @hostname, @servicename and @family parameters.
+ * The @hostname and @servicename strings will be
+ * allocated to the size required and should be free
+ * with g_free() when no longer required
+ *
+ * Returns: 0 on success, -1 on error
+ */
+int
+qio_channel_socket_get_remote_addr_string(QIOChannelSocket *ioc,
+                                          char **hostname,
+                                          char **servicename,
+                                          NetworkAddressFamily *family,
+                                          Error **errp);
+
+/**
+ * qio_channel_socket_set_nodelay:
+ * @ioc: the socket channel object
+ * @enabled: the new flag state
+ *
+ * Set the state of the NODELAY socket flag. If the
+ * @enabled parameter is true, then NODELAY will be
+ * set and data will be transmitted immediately. If
+ * @enabled is false, then data may be temporarily
+ * held for transmission to enable writes to be
+ * coallesced.
+ */
+void
+qio_channel_socket_set_nodelay(QIOChannelSocket *ioc,
+                               bool enabled);
+
+/**
+ * qio_channel_socket_accept:
+ * @ioc: the socket channel object
+ * @errp: pointer to an uninitialized error object
+ *
+ * If the socket represents a server, then this accepts
+ * a new client connection. The returned channel will
+ * represent the connected client socket.
+ *
+ * Returns: the new client channel, or NULL on error
+ */
+QIOChannelSocket *
+qio_channel_socket_accept(QIOChannelSocket *ioc,
+                          Error **errp);
+
+typedef enum {
+    QIO_CHANNEL_SOCKET_SHUTDOWN_BOTH,
+    QIO_CHANNEL_SOCKET_SHUTDOWN_READ,
+    QIO_CHANNEL_SOCKET_SHUTDOWN_WRITE,
+} QIOChannelSocketShutdown;
+
+/**
+ * qio_channel_socket_shutdown:
+ * @ioc: the socket channel object
+ * @how: the direction to shutdown
+ * @errp: pointer to an uninitialized error object
+ *
+ * Shutdowns transmission or receiving on a socket
+ * without closing the socket file descriptor.
+ *
+ * Returns: 0 on success, -1 on error
+ */
+int
+qio_channel_socket_shutdown(QIOChannelSocket *ioc,
+                            QIOChannelSocketShutdown how,
+                            Error **errp);
+
+#endif /* QIO_CHANNEL_SOCKET_H__ */
diff --git a/io/Makefile.objs b/io/Makefile.objs
index a776676..4f5e276 100644
--- a/io/Makefile.objs
+++ b/io/Makefile.objs
@@ -1,2 +1,3 @@ 
 util-obj-y += channel.o
 util-obj-y += channel-unix.o
+util-obj-y += channel-socket.o
diff --git a/io/channel-socket.c b/io/channel-socket.c
new file mode 100644
index 0000000..5eacf38
--- /dev/null
+++ b/io/channel-socket.c
@@ -0,0 +1,572 @@ 
+/*
+ * QEMU I/O channels sockets driver
+ *
+ * Copyright (c) 2015 Red Hat, Inc.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+#include <glib/gi18n.h>
+
+#include "io/channel-socket.h"
+#include "io/channel-unix.h"
+
+#define SOCKET_MAX_FDS 16
+
+static int
+qio_channel_socket_get_addr_string(struct sockaddr_storage *sa,
+                                   socklen_t salen,
+                                   char **hostname,
+                                   char **servicename,
+                                   NetworkAddressFamily *family,
+                                   Error **errp)
+{
+    char host[NI_MAXHOST];
+    char serv[NI_MAXSERV];
+    int ret;
+
+    ret = getnameinfo((struct sockaddr *)sa, salen,
+                      host, sizeof(host),
+                      serv, sizeof(serv),
+                      NI_NUMERICHOST | NI_NUMERICSERV);
+    if (ret != 0) {
+        error_setg(errp, "Cannot format numeric socket address: %s\n",
+                   gai_strerror(ret));
+        return -1;
+    }
+
+    *hostname = g_strdup(host);
+    *servicename = g_strdup(serv);
+    *family = inet_netfamily(sa->ss_family);
+    return 0;
+}
+
+int
+qio_channel_socket_get_local_addr_string(QIOChannelSocket *ioc,
+                                         char **hostname,
+                                         char **servicename,
+                                         NetworkAddressFamily *family,
+                                         Error **errp)
+{
+    return qio_channel_socket_get_addr_string(&ioc->localAddr,
+                                              ioc->localAddrLen,
+                                              hostname,
+                                              servicename,
+                                              family,
+                                              errp);
+}
+
+int
+qio_channel_socket_get_remote_addr_string(QIOChannelSocket *ioc,
+                                          char **hostname,
+                                          char **servicename,
+                                          NetworkAddressFamily *family,
+                                          Error **errp)
+{
+    return qio_channel_socket_get_addr_string(&ioc->remoteAddr,
+                                              ioc->remoteAddrLen,
+                                              hostname,
+                                              servicename,
+                                              family,
+                                              errp);
+}
+
+QIOChannelSocket *
+qio_channel_socket_new_fd(int fd,
+                          Error **errp)
+{
+    QIOChannelSocket *ioc;
+
+    ioc = QIO_CHANNEL_SOCKET(object_new(TYPE_QIO_CHANNEL_SOCKET));
+
+    ioc->fd = fd;
+    ioc->remoteAddrLen = sizeof(ioc->remoteAddr);
+    ioc->localAddrLen = sizeof(ioc->localAddr);
+
+    if (getpeername(fd, (struct sockaddr *)&ioc->remoteAddr,
+                    &ioc->remoteAddrLen) < 0) {
+        if (socket_error() == ENOTCONN) {
+            memset(&ioc->remoteAddr, 0, sizeof(ioc->remoteAddr));
+            ioc->remoteAddrLen = sizeof(ioc->remoteAddr);
+        } else {
+            error_setg_errno(errp, socket_error(), "%s",
+                             _("Unable to query remote socket address"));
+            goto error;
+        }
+    }
+
+    if (getsockname(fd, (struct sockaddr *)&ioc->localAddr,
+                    &ioc->localAddrLen) < 0) {
+        error_setg_errno(errp, socket_error(), "%s",
+                         _("Unable to query local socket address"));
+        goto error;
+    }
+
+    return ioc;
+
+ error:
+    ioc->fd = -1; /* Let the caller close FD on failure */
+    object_unref(OBJECT(ioc));
+    return NULL;
+}
+
+QIOChannelSocket *
+qio_channel_socket_accept(QIOChannelSocket *ioc,
+                          Error **errp)
+{
+    QIOChannelSocket *cioc;
+
+    cioc = QIO_CHANNEL_SOCKET(object_new(TYPE_QIO_CHANNEL_SOCKET));
+    cioc->fd = -1;
+    cioc->remoteAddrLen = sizeof(ioc->remoteAddr);
+    cioc->localAddrLen = sizeof(ioc->localAddr);
+
+ retry:
+    cioc->fd = accept(ioc->fd, (struct sockaddr *)&cioc->remoteAddr,
+                      &cioc->remoteAddrLen);
+    if (cioc->fd < 0) {
+        if (socket_error() == EINTR) {
+            goto retry;
+        }
+        goto error;
+    }
+
+    if (getsockname(cioc->fd, (struct sockaddr *)&ioc->localAddr,
+                    &ioc->localAddrLen) < 0) {
+        error_setg_errno(errp, socket_error(), "%s",
+                         _("Unable to query local socket address"));
+        goto error;
+    }
+
+    return cioc;
+
+ error:
+    object_unref(OBJECT(cioc));
+    return NULL;
+}
+
+static void qio_channel_socket_init(Object *obj)
+{
+    QIOChannelSocket *ioc = QIO_CHANNEL_SOCKET(obj);
+    ioc->fd = -1;
+}
+
+static void qio_channel_socket_finalize(Object *obj)
+{
+    QIOChannelSocket *ioc = QIO_CHANNEL_SOCKET(obj);
+    if (ioc->fd != -1) {
+        close(ioc->fd);
+        ioc->fd = -1;
+    }
+}
+
+static bool qio_channel_socket_has_feature(QIOChannel *ioc,
+                                           QIOChannelFeature feature)
+{
+    QIOChannelSocket *sioc = QIO_CHANNEL_SOCKET(ioc);
+
+    switch (feature) {
+    case QIO_CHANNEL_FEATURE_FD_PASS:
+#ifndef WIN32
+        if (sioc->localAddr.ss_family == AF_UNIX) {
+            return true;
+        } else {
+#endif /* WIN32 */
+            return false;
+#ifndef WIN32
+        }
+#endif /* WIN32 */
+    default:
+        return false;
+    }
+}
+
+
+#ifndef WIN32
+static void qio_channel_socket_copy_fds(struct msghdr *msg,
+                                        int **fds, size_t *nfds)
+{
+    struct cmsghdr *cmsg;
+
+    *nfds = 0;
+    *fds = NULL;
+
+    for (cmsg = CMSG_FIRSTHDR(msg); cmsg; cmsg = CMSG_NXTHDR(msg, cmsg)) {
+        int fd_size, i;
+        int gotfds;
+
+        if (cmsg->cmsg_len < CMSG_LEN(sizeof(int)) ||
+            cmsg->cmsg_level != SOL_SOCKET ||
+            cmsg->cmsg_type != SCM_RIGHTS) {
+            continue;
+        }
+
+        fd_size = cmsg->cmsg_len - CMSG_LEN(0);
+
+        if (!fd_size) {
+            continue;
+        }
+
+        gotfds = fd_size / sizeof(int);
+        *fds = g_renew(int, *fds, *nfds + gotfds);
+        memcpy(*fds + *nfds, CMSG_DATA(cmsg), fd_size);
+
+        for (i = 0; i < gotfds; i++) {
+            int fd = (*fds)[*nfds + i];
+            if (fd < 0) {
+                continue;
+            }
+
+            /* O_NONBLOCK is preserved across SCM_RIGHTS so reset it */
+            qemu_set_block(fd);
+
+#ifndef MSG_CMSG_CLOEXEC
+            qemu_set_cloexec(fd);
+#endif
+        }
+        *nfds += gotfds;
+    }
+}
+
+
+static ssize_t qio_channel_socket_readv(QIOChannel *ioc,
+                                        const struct iovec *iov,
+                                        size_t niov,
+                                        int **fds,
+                                        size_t *nfds,
+                                        int flags,
+                                        Error **errp)
+{
+    QIOChannelSocket *sioc = QIO_CHANNEL_SOCKET(ioc);
+    ssize_t ret;
+    struct msghdr msg = { NULL, };
+    char control[CMSG_SPACE(sizeof(int) * SOCKET_MAX_FDS)];
+    int sflags = 0;
+
+    if (flags & ~QIO_CHANNEL_READ_PEEK) {
+        error_setg_errno(errp, EINVAL,
+                         _("Flags %x are not supported"),
+                         flags & ~QIO_CHANNEL_READ_PEEK);
+        return -1;
+    }
+
+#ifdef MSG_CMSG_CLOEXEC
+    sflags |= MSG_CMSG_CLOEXEC;
+#endif
+    if (flags & QIO_CHANNEL_READ_PEEK) {
+        sflags |= MSG_PEEK;
+    }
+
+    msg.msg_iov = (struct iovec *)iov;
+    msg.msg_iovlen = niov;
+    if (fds && nfds) {
+        msg.msg_control = control;
+        msg.msg_controllen = sizeof(control);
+    }
+
+ retry:
+    ret = recvmsg(sioc->fd, &msg, sflags);
+    if (ret < 0) {
+        if (socket_error() == EAGAIN) {
+            return -2;
+        }
+        if (socket_error() == EINTR) {
+            goto retry;
+        }
+
+        error_setg_errno(errp, socket_error(), "%s",
+                         _("Unable to read from socket"));
+        return -1;
+    }
+
+    if (fds && nfds) {
+        qio_channel_socket_copy_fds(&msg, fds, nfds);
+    }
+
+    return ret;
+}
+
+static ssize_t qio_channel_socket_writev(QIOChannel *ioc,
+                                         const struct iovec *iov,
+                                         size_t niov,
+                                         int *fds,
+                                         size_t nfds,
+                                         int flags G_GNUC_UNUSED,
+                                         Error **errp)
+{
+    QIOChannelSocket *sioc = QIO_CHANNEL_SOCKET(ioc);
+    ssize_t ret;
+    struct msghdr msg = { NULL, };
+
+    if (flags) {
+        error_setg_errno(errp, EINVAL,
+                         _("Flags %x are not supported"),
+                         flags);
+        return -1;
+    }
+
+    msg.msg_iov = (struct iovec *)iov;
+    msg.msg_iovlen = niov;
+
+    if (nfds) {
+        char control[CMSG_SPACE(sizeof(int) * SOCKET_MAX_FDS)];
+        size_t fdsize = sizeof(int) * nfds;
+        struct cmsghdr *cmsg;
+
+        if (nfds > SOCKET_MAX_FDS) {
+            error_setg_errno(errp, -EINVAL,
+                             _("Only %d FDs can be sent, got %zu"),
+                             SOCKET_MAX_FDS, nfds);
+            return -1;
+        }
+
+        msg.msg_control = control;
+        msg.msg_controllen = CMSG_SPACE(sizeof(int) * nfds);
+
+        cmsg = CMSG_FIRSTHDR(&msg);
+        cmsg->cmsg_len = CMSG_LEN(fdsize);
+        cmsg->cmsg_level = SOL_SOCKET;
+        cmsg->cmsg_type = SCM_RIGHTS;
+        memcpy(CMSG_DATA(cmsg), fds, fdsize);
+    }
+
+ retry:
+    ret = sendmsg(sioc->fd, &msg, 0);
+    if (ret <= 0) {
+        if (socket_error() == EAGAIN) {
+            return -2;
+        }
+        if (socket_error() == EINTR) {
+            goto retry;
+        }
+        error_setg_errno(errp, socket_error(), "%s",
+                         _("Unable to write to socket"));
+        return -1;
+    }
+    return ret;
+}
+#else /* WIN32 */
+static ssize_t qio_channel_socket_readv(QIOChannel *ioc,
+                                        const struct iovec *iov,
+                                        size_t niov,
+                                        int **fds,
+                                        size_t *nfds,
+                                        int flags,
+                                        Error **errp)
+{
+    QIOChannelSocket *sioc = QIO_CHANNEL_SOCKET(ioc);
+    ssize_t done = 0;
+    ssize_t i;
+
+    if (flags) {
+        error_setg_errno(errp, EINVAL,
+                         _("Flags %x are not supported"),
+                         flags);
+        return -1;
+    }
+    if (fds || nfds) {
+        error_setg_errno(errp, EINVAL, "%s",
+                         _("Channel does not support file descriptor passing"));
+        return -1;
+    }
+
+    for (i = 0; i < niov; i++) {
+        ssize_t ret;
+    retry:
+        ret = recv(sioc->fd,
+                   iov[i].iov_base,
+                   iov[i].iov_len,
+                   0);
+        if (ret < 0) {
+            if (socket_error() == EAGAIN) {
+                if (done) {
+                    return done;
+                } else {
+                    return QIO_CHANNEL_ERR_BLOCK;
+                }
+            } else if (socket_error() == EINTR) {
+                goto retry;
+            } else {
+                error_setg_errno(errp, socket_error(), "%s",
+                                 _("Unable to write to socket"));
+                return -1;
+            }
+        }
+        done += ret;
+        if (ret < iov[i].iov_len) {
+            return done;
+        }
+    }
+
+    return done;
+}
+
+static ssize_t qio_channel_socket_writev(QIOChannel *ioc,
+                                         const struct iovec *iov,
+                                         size_t niov,
+                                         int *fds,
+                                         size_t nfds,
+                                         int flags G_GNUC_UNUSED,
+                                         Error **errp)
+{
+    QIOChannelSocket *sioc = QIO_CHANNEL_SOCKET(ioc);
+    ssize_t done = 0;
+    ssize_t i;
+
+    if (flags) {
+        error_setg_errno(errp, EINVAL,
+                         _("Flags %x are not supported"),
+                         flags);
+        return -1;
+    }
+    if (fds || nfds) {
+        error_setg_errno(errp, EINVAL, "%s",
+                         _("Channel does not support file descriptor passing"));
+        return -1;
+    }
+
+    for (i = 0; i < niov; i++) {
+        ssize_t ret;
+    retry:
+        ret = send(sioc->fd,
+                   iov[i].iov_base,
+                   iov[i].iov_len,
+                   0);
+        if (ret < 0) {
+            if (socket_error() == EAGAIN) {
+                if (done) {
+                    return done;
+                } else {
+                    return QIO_CHANNEL_ERR_BLOCK;
+                }
+            } else if (socket_error() == EINTR) {
+                goto retry;
+            } else {
+                error_setg_errno(errp, socket_error(), "%s",
+                                 _("Unable to write to socket"));
+                return -1;
+            }
+        }
+        done += ret;
+        if (ret < iov[i].iov_len) {
+            return done;
+        }
+    }
+
+    return done;
+}
+#endif /* WIN32 */
+
+static void qio_channel_socket_set_blocking(QIOChannel *ioc,
+                                            bool enabled)
+{
+    QIOChannelSocket *sioc = QIO_CHANNEL_SOCKET(ioc);
+
+    if (enabled) {
+        qemu_set_block(sioc->fd);
+    } else {
+        qemu_set_nonblock(sioc->fd);
+    }
+}
+
+void
+qio_channel_socket_set_nodelay(QIOChannelSocket *ioc,
+                               bool enabled)
+{
+    QIOChannelSocket *sioc = QIO_CHANNEL_SOCKET(ioc);
+    int v = enabled ? 1 : 0;
+
+    qemu_setsockopt(sioc->fd, IPPROTO_TCP, TCP_NODELAY, &v, sizeof(v));
+}
+
+static int qio_channel_socket_close(QIOChannel *ioc,
+                                    Error **errp)
+{
+    QIOChannelSocket *sioc = QIO_CHANNEL_SOCKET(ioc);
+
+    if (closesocket(sioc->fd) < 0) {
+        sioc->fd = -1;
+        error_setg_errno(errp, socket_error(), "%s",
+                         _("Unable to close socket"));
+        return -1;
+    }
+    sioc->fd = -1;
+    return 0;
+}
+
+int
+qio_channel_socket_shutdown(QIOChannelSocket *ioc,
+                            QIOChannelSocketShutdown how,
+                            Error **errp)
+{
+    int sockhow;
+    switch (how) {
+    case QIO_CHANNEL_SOCKET_SHUTDOWN_READ:
+        sockhow = SHUT_RD;
+        break;
+    case QIO_CHANNEL_SOCKET_SHUTDOWN_WRITE:
+        sockhow = SHUT_WR;
+        break;
+    case QIO_CHANNEL_SOCKET_SHUTDOWN_BOTH:
+    default:
+        sockhow = SHUT_RDWR;
+        break;
+    }
+
+    if (shutdown(ioc->fd, sockhow) < 0) {
+        error_setg_errno(errp, socket_error(), "%s",
+                         _("Unable to shutdown socket"));
+        return -1;
+    }
+    return 0;
+}
+
+static GSource *qio_channel_socket_create_watch(QIOChannel *ioc,
+                                                GIOCondition condition)
+{
+    QIOChannelSocket *sioc = QIO_CHANNEL_SOCKET(ioc);
+    return qio_channel_unix_create_fd_watch(ioc,
+                                            sioc->fd,
+                                            condition);
+}
+
+static void qio_channel_socket_class_init(ObjectClass *klass,
+                                          void *class_data G_GNUC_UNUSED)
+{
+    QIOChannelClass *ioc_klass = QIO_CHANNEL_CLASS(klass);
+
+    ioc_klass->io_has_feature = qio_channel_socket_has_feature;
+    ioc_klass->io_writev = qio_channel_socket_writev;
+    ioc_klass->io_readv = qio_channel_socket_readv;
+    ioc_klass->io_set_blocking = qio_channel_socket_set_blocking;
+    ioc_klass->io_close = qio_channel_socket_close;
+    ioc_klass->io_create_watch = qio_channel_socket_create_watch;
+}
+
+static const TypeInfo qio_channel_socket_info = {
+    .parent = TYPE_QIO_CHANNEL,
+    .name = TYPE_QIO_CHANNEL_SOCKET,
+    .instance_size = sizeof(QIOChannelSocket),
+    .instance_init = qio_channel_socket_init,
+    .instance_finalize = qio_channel_socket_finalize,
+    .class_init = qio_channel_socket_class_init,
+};
+
+static void qio_channel_socket_register_types(void)
+{
+    type_register_static(&qio_channel_socket_info);
+}
+
+type_init(qio_channel_socket_register_types);