diff mbox

[1/3] Unified Datagram Socket Transport

Message ID 20170718170819.28494-2-anton.ivanov@cambridgegreys.com
State New
Headers show

Commit Message

Anton Ivanov July 18, 2017, 5:08 p.m. UTC
From: Anton Ivanov <anton.ivanov@cambridgegreys.com>

1. Creates a common backend for socket transports using
recvmmsg().
2. Migrates L2TPv3 to the new backend

Signed-off-by: Anton Ivanov <anton.ivanov@cambridgegreys.com>
---
 configure         |  10 +-
 net/Makefile.objs |   2 +-
 net/l2tpv3.c      | 531 +++++++++---------------------------------------------
 net/net.c         |   4 +-
 net/unified.c     | 406 +++++++++++++++++++++++++++++++++++++++++
 net/unified.h     | 118 ++++++++++++
 6 files changed, 613 insertions(+), 458 deletions(-)
 create mode 100644 net/unified.c
 create mode 100644 net/unified.h

Comments

Jason Wang July 19, 2017, 5:39 a.m. UTC | #1
On 2017年07月19日 01:08, anton.ivanov@cambridgegreys.com wrote:
> From: Anton Ivanov <anton.ivanov@cambridgegreys.com>
>
> 1. Creates a common backend for socket transports using
> recvmmsg().
> 2. Migrates L2TPv3 to the new backend

It would be better if you could further split out 2 from this patch.

>
> Signed-off-by: Anton Ivanov <anton.ivanov@cambridgegreys.com>
> ---
>   configure         |  10 +-
>   net/Makefile.objs |   2 +-
>   net/l2tpv3.c      | 531 +++++++++---------------------------------------------
>   net/net.c         |   4 +-
>   net/unified.c     | 406 +++++++++++++++++++++++++++++++++++++++++
>   net/unified.h     | 118 ++++++++++++
>   6 files changed, 613 insertions(+), 458 deletions(-)
>   create mode 100644 net/unified.c
>   create mode 100644 net/unified.h
>
> diff --git a/configure b/configure
> index a3f0522e8f..99a60b723c 100755
> --- a/configure
> +++ b/configure
> @@ -1862,7 +1862,7 @@ if ! compile_object -Werror ; then
>   fi
>   
>   ##########################################
> -# L2TPV3 probe
> +# UNIFIED probe
>   
>   cat > $TMPC <<EOF
>   #include <sys/socket.h>
> @@ -1870,9 +1870,9 @@ cat > $TMPC <<EOF
>   int main(void) { return sizeof(struct mmsghdr); }
>   EOF
>   if compile_prog "" "" ; then
> -  l2tpv3=yes
> +  unified=yes
>   else
> -  l2tpv3=no
> +  unified=no
>   fi
>   
>   ##########################################
> @@ -5458,8 +5458,8 @@ fi
>   if test "$netmap" = "yes" ; then
>     echo "CONFIG_NETMAP=y" >> $config_host_mak
>   fi
> -if test "$l2tpv3" = "yes" ; then
> -  echo "CONFIG_L2TPV3=y" >> $config_host_mak
> +if test "$unified" = "yes" ; then
> +  echo "CONFIG_UNIFIED=y" >> $config_host_mak
>   fi

Could we keep l2tpv3 option?

>   if test "$cap_ng" = "yes" ; then
>     echo "CONFIG_LIBCAP=y" >> $config_host_mak
> diff --git a/net/Makefile.objs b/net/Makefile.objs
> index 67ba5e26fb..8026ad778a 100644
> --- a/net/Makefile.objs
> +++ b/net/Makefile.objs
> @@ -2,7 +2,7 @@ common-obj-y = net.o queue.o checksum.o util.o hub.o
>   common-obj-y += socket.o
>   common-obj-y += dump.o
>   common-obj-y += eth.o
> -common-obj-$(CONFIG_L2TPV3) += l2tpv3.o
> +common-obj-$(CONFIG_UNIFIED) += l2tpv3.o unified.o
>   common-obj-$(CONFIG_POSIX) += vhost-user.o
>   common-obj-$(CONFIG_SLIRP) += slirp.o
>   common-obj-$(CONFIG_VDE) += vde.o
> diff --git a/net/l2tpv3.c b/net/l2tpv3.c
> index 6745b78990..05413c9cbd 100644
> --- a/net/l2tpv3.c
> +++ b/net/l2tpv3.c
> @@ -1,6 +1,7 @@
>   /*
>    * QEMU System Emulator
>    *
> + * Copyright (c) 2015-2017 Cambridge Greys Limited
>    * Copyright (c) 2003-2008 Fabrice Bellard
>    * Copyright (c) 2012-2014 Cisco Systems
>    *
> @@ -34,19 +35,9 @@
>   #include "qemu/sockets.h"
>   #include "qemu/iov.h"
>   #include "qemu/main-loop.h"
> +#include "unified.h"
>   
>   
> -/* The buffer size needs to be investigated for optimum numbers and
> - * optimum means of paging in on different systems. This size is
> - * chosen to be sufficient to accommodate one packet with some headers
> - */
> -
> -#define BUFFER_ALIGN sysconf(_SC_PAGESIZE)
> -#define BUFFER_SIZE 2048
> -#define IOVSIZE 2
> -#define MAX_L2TPV3_MSGCNT 64
> -#define MAX_L2TPV3_IOVCNT (MAX_L2TPV3_MSGCNT * IOVSIZE)
> -
>   /* Header set to 0x30000 signifies a data packet */
>   
>   #define L2TPV3_DATA_PACKET 0x30000
> @@ -57,31 +48,7 @@
>   #define IPPROTO_L2TP 0x73
>   #endif
>   
> -typedef struct NetL2TPV3State {
> -    NetClientState nc;
> -    int fd;
> -
> -    /*
> -     * these are used for xmit - that happens packet a time
> -     * and for first sign of life packet (easier to parse that once)
> -     */
> -
> -    uint8_t *header_buf;
> -    struct iovec *vec;
> -
> -    /*
> -     * these are used for receive - try to "eat" up to 32 packets at a time
> -     */
> -
> -    struct mmsghdr *msgvec;
> -
> -    /*
> -     * peer address
> -     */
> -
> -    struct sockaddr_storage *dgram_dst;
> -    uint32_t dst_size;
> -
> +typedef struct L2TPV3TunnelParams {
>       /*
>        * L2TPv3 parameters
>        */
> @@ -90,37 +57,8 @@ typedef struct NetL2TPV3State {
>       uint64_t tx_cookie;
>       uint32_t rx_session;
>       uint32_t tx_session;
> -    uint32_t header_size;
>       uint32_t counter;
>   
> -    /*
> -    * DOS avoidance in error handling
> -    */
> -
> -    bool header_mismatch;
> -
> -    /*
> -     * Ring buffer handling
> -     */
> -
> -    int queue_head;
> -    int queue_tail;
> -    int queue_depth;
> -
> -    /*
> -     * Precomputed offsets
> -     */
> -
> -    uint32_t offset;
> -    uint32_t cookie_offset;
> -    uint32_t counter_offset;
> -    uint32_t session_offset;
> -
> -    /* Poll Control */
> -
> -    bool read_poll;
> -    bool write_poll;
> -
>       /* Flags */
>   
>       bool ipv6;
> @@ -130,189 +68,62 @@ typedef struct NetL2TPV3State {
>       bool cookie;
>       bool cookie_is_64;
>   
> -} NetL2TPV3State;
> -
> -static void net_l2tpv3_send(void *opaque);
> -static void l2tpv3_writable(void *opaque);
> -
> -static void l2tpv3_update_fd_handler(NetL2TPV3State *s)
> -{
> -    qemu_set_fd_handler(s->fd,
> -                        s->read_poll ? net_l2tpv3_send : NULL,
> -                        s->write_poll ? l2tpv3_writable : NULL,
> -                        s);
> -}
> -
> -static void l2tpv3_read_poll(NetL2TPV3State *s, bool enable)
> -{
> -    if (s->read_poll != enable) {
> -        s->read_poll = enable;
> -        l2tpv3_update_fd_handler(s);
> -    }
> -}
> +    /* Precomputed L2TPV3 specific offsets */
> +    uint32_t cookie_offset;
> +    uint32_t counter_offset;
> +    uint32_t session_offset;
>   
> -static void l2tpv3_write_poll(NetL2TPV3State *s, bool enable)
> -{
> -    if (s->write_poll != enable) {
> -        s->write_poll = enable;
> -        l2tpv3_update_fd_handler(s);
> -    }
> -}
> +} L2TPV3TunnelParams;
>   
> -static void l2tpv3_writable(void *opaque)
> -{
> -    NetL2TPV3State *s = opaque;
> -    l2tpv3_write_poll(s, false);
> -    qemu_flush_queued_packets(&s->nc);
> -}
>   
> -static void l2tpv3_send_completed(NetClientState *nc, ssize_t len)
> -{
> -    NetL2TPV3State *s = DO_UPCAST(NetL2TPV3State, nc, nc);
> -    l2tpv3_read_poll(s, true);
> -}
>   
> -static void l2tpv3_poll(NetClientState *nc, bool enable)
> +static void l2tpv3_form_header(void *us)
>   {
> -    NetL2TPV3State *s = DO_UPCAST(NetL2TPV3State, nc, nc);
> -    l2tpv3_write_poll(s, enable);
> -    l2tpv3_read_poll(s, enable);
> -}
> +    NetUnifiedState *s = (NetUnifiedState *) us;
> +    L2TPV3TunnelParams *p = (L2TPV3TunnelParams *) s->params;

How about embedding NetUnifiedState into this structure and keep using 
NetL2TPV3State? Then:

-  's' could be kept and lots of lines of changes could be saved here 
and l2tpv3_verify_header()
-  each transport could have their own type instead of using 
NET_CLIENT_DRIVER_L2TPV3

?

>   
> -static void l2tpv3_form_header(NetL2TPV3State *s)
> -{
>       uint32_t *counter;
>   
> -    if (s->udp) {
> +    if (p->udp) {
>           stl_be_p((uint32_t *) s->header_buf, L2TPV3_DATA_PACKET);
>       }
>       stl_be_p(
> -            (uint32_t *) (s->header_buf + s->session_offset),
> -            s->tx_session
> +            (uint32_t *) (s->header_buf + p->session_offset),
> +            p->tx_session
>           );
> -    if (s->cookie) {
> -        if (s->cookie_is_64) {
> +    if (p->cookie) {
> +        if (p->cookie_is_64) {
>               stq_be_p(
> -                (uint64_t *)(s->header_buf + s->cookie_offset),
> -                s->tx_cookie
> +                (uint64_t *)(s->header_buf + p->cookie_offset),
> +                p->tx_cookie
>               );
>           } else {
>               stl_be_p(
> -                (uint32_t *) (s->header_buf + s->cookie_offset),
> -                s->tx_cookie
> +                (uint32_t *) (s->header_buf + p->cookie_offset),
> +                p->tx_cookie
>               );
>           }
>       }
> -    if (s->has_counter) {
> -        counter = (uint32_t *)(s->header_buf + s->counter_offset);
> -        if (s->pin_counter) {
> +    if (p->has_counter) {
> +        counter = (uint32_t *)(s->header_buf + p->counter_offset);
> +        if (p->pin_counter) {
>               *counter = 0;
>           } else {
> -            stl_be_p(counter, ++s->counter);
> -        }
> -    }
> -}
> -
> -static ssize_t net_l2tpv3_receive_dgram_iov(NetClientState *nc,
> -                    const struct iovec *iov,
> -                    int iovcnt)
> -{
> -    NetL2TPV3State *s = DO_UPCAST(NetL2TPV3State, nc, nc);
> -
> -    struct msghdr message;
> -    int ret;
> -
> -    if (iovcnt > MAX_L2TPV3_IOVCNT - 1) {
> -        error_report(
> -            "iovec too long %d > %d, change l2tpv3.h",
> -            iovcnt, MAX_L2TPV3_IOVCNT
> -        );
> -        return -1;
> -    }
> -    l2tpv3_form_header(s);
> -    memcpy(s->vec + 1, iov, iovcnt * sizeof(struct iovec));
> -    s->vec->iov_base = s->header_buf;
> -    s->vec->iov_len = s->offset;
> -    message.msg_name = s->dgram_dst;
> -    message.msg_namelen = s->dst_size;
> -    message.msg_iov = s->vec;
> -    message.msg_iovlen = iovcnt + 1;
> -    message.msg_control = NULL;
> -    message.msg_controllen = 0;
> -    message.msg_flags = 0;
> -    do {
> -        ret = sendmsg(s->fd, &message, 0);
> -    } while ((ret == -1) && (errno == EINTR));
> -    if (ret > 0) {
> -        ret -= s->offset;
> -    } else if (ret == 0) {
> -        /* belt and braces - should not occur on DGRAM
> -        * we should get an error and never a 0 send
> -        */
> -        ret = iov_size(iov, iovcnt);
> -    } else {
> -        /* signal upper layer that socket buffer is full */
> -        ret = -errno;
> -        if (ret == -EAGAIN || ret == -ENOBUFS) {
> -            l2tpv3_write_poll(s, true);
> -            ret = 0;
> +            stl_be_p(counter, ++p->counter);
>           }
>       }
> -    return ret;
>   }
>   
> -static ssize_t net_l2tpv3_receive_dgram(NetClientState *nc,
> -                    const uint8_t *buf,
> -                    size_t size)
> -{
> -    NetL2TPV3State *s = DO_UPCAST(NetL2TPV3State, nc, nc);
> -
> -    struct iovec *vec;
> -    struct msghdr message;
> -    ssize_t ret = 0;
> -
> -    l2tpv3_form_header(s);
> -    vec = s->vec;
> -    vec->iov_base = s->header_buf;
> -    vec->iov_len = s->offset;
> -    vec++;
> -    vec->iov_base = (void *) buf;
> -    vec->iov_len = size;
> -    message.msg_name = s->dgram_dst;
> -    message.msg_namelen = s->dst_size;
> -    message.msg_iov = s->vec;
> -    message.msg_iovlen = 2;
> -    message.msg_control = NULL;
> -    message.msg_controllen = 0;
> -    message.msg_flags = 0;
> -    do {
> -        ret = sendmsg(s->fd, &message, 0);
> -    } while ((ret == -1) && (errno == EINTR));
> -    if (ret > 0) {
> -        ret -= s->offset;
> -    } else if (ret == 0) {
> -        /* belt and braces - should not occur on DGRAM
> -        * we should get an error and never a 0 send
> -        */
> -        ret = size;
> -    } else {
> -        ret = -errno;
> -        if (ret == -EAGAIN || ret == -ENOBUFS) {
> -            /* signal upper layer that socket buffer is full */
> -            l2tpv3_write_poll(s, true);
> -            ret = 0;
> -        }
> -    }
> -    return ret;
> -}
>   
> -static int l2tpv3_verify_header(NetL2TPV3State *s, uint8_t *buf)
> +static int l2tpv3_verify_header(void *us, uint8_t *buf)
>   {
>   
> +    NetUnifiedState *s = (NetUnifiedState *) us;
> +    L2TPV3TunnelParams *p = (L2TPV3TunnelParams *) s->params;
>       uint32_t *session;
>       uint64_t cookie;
>   
> -    if ((!s->udp) && (!s->ipv6)) {
> +    if ((!p->udp) && (!p->ipv6)) {
>           buf += sizeof(struct iphdr) /* fix for ipv4 raw */;
>       }
>   
> @@ -321,21 +132,21 @@ static int l2tpv3_verify_header(NetL2TPV3State *s, uint8_t *buf)
>       * that anyway.
>       */
>   
> -    if (s->cookie) {
> -        if (s->cookie_is_64) {
> -            cookie = ldq_be_p(buf + s->cookie_offset);
> +    if (p->cookie) {
> +        if (p->cookie_is_64) {
> +            cookie = ldq_be_p(buf + p->cookie_offset);
>           } else {
> -            cookie = ldl_be_p(buf + s->cookie_offset) & 0xffffffffULL;
> +            cookie = ldl_be_p(buf + p->cookie_offset) & 0xffffffffULL;
>           }
> -        if (cookie != s->rx_cookie) {
> +        if (cookie != p->rx_cookie) {
>               if (!s->header_mismatch) {
>                   error_report("unknown cookie id");
>               }
>               return -1;
>           }
>       }
> -    session = (uint32_t *) (buf + s->session_offset);
> -    if (ldl_be_p(session) != s->rx_session) {
> +    session = (uint32_t *) (buf + p->session_offset);
> +    if (ldl_be_p(session) != p->rx_session) {
>           if (!s->header_mismatch) {
>               error_report("session mismatch");
>           }
> @@ -344,203 +155,31 @@ static int l2tpv3_verify_header(NetL2TPV3State *s, uint8_t *buf)
>       return 0;
>   }
>   
> -static void net_l2tpv3_process_queue(NetL2TPV3State *s)
> -{
> -    int size = 0;
> -    struct iovec *vec;
> -    bool bad_read;
> -    int data_size;
> -    struct mmsghdr *msgvec;
> -
> -    /* go into ring mode only if there is a "pending" tail */
> -    if (s->queue_depth > 0) {
> -        do {
> -            msgvec = s->msgvec + s->queue_tail;
> -            if (msgvec->msg_len > 0) {
> -                data_size = msgvec->msg_len - s->header_size;
> -                vec = msgvec->msg_hdr.msg_iov;
> -                if ((data_size > 0) &&
> -                    (l2tpv3_verify_header(s, vec->iov_base) == 0)) {
> -                    vec++;
> -                    /* Use the legacy delivery for now, we will
> -                     * switch to using our own ring as a queueing mechanism
> -                     * at a later date
> -                     */
> -                    size = qemu_send_packet_async(
> -                            &s->nc,
> -                            vec->iov_base,
> -                            data_size,
> -                            l2tpv3_send_completed
> -                        );
> -                    if (size == 0) {
> -                        l2tpv3_read_poll(s, false);
> -                    }
> -                    bad_read = false;
> -                } else {
> -                    bad_read = true;
> -                    if (!s->header_mismatch) {
> -                        /* report error only once */
> -                        error_report("l2tpv3 header verification failed");
> -                        s->header_mismatch = true;
> -                    }
> -                }
> -            } else {
> -                bad_read = true;
> -            }
> -            s->queue_tail = (s->queue_tail + 1) % MAX_L2TPV3_MSGCNT;
> -            s->queue_depth--;
> -        } while (
> -                (s->queue_depth > 0) &&
> -                 qemu_can_send_packet(&s->nc) &&
> -                ((size > 0) || bad_read)
> -            );
> -    }
> -}
> -
> -static void net_l2tpv3_send(void *opaque)
> -{
> -    NetL2TPV3State *s = opaque;
> -    int target_count, count;
> -    struct mmsghdr *msgvec;
> -
> -    /* go into ring mode only if there is a "pending" tail */
> -
> -    if (s->queue_depth) {
> -
> -        /* The ring buffer we use has variable intake
> -         * count of how much we can read varies - adjust accordingly
> -         */
> -
> -        target_count = MAX_L2TPV3_MSGCNT - s->queue_depth;
> -
> -        /* Ensure we do not overrun the ring when we have
> -         * a lot of enqueued packets
> -         */
> -
> -        if (s->queue_head + target_count > MAX_L2TPV3_MSGCNT) {
> -            target_count = MAX_L2TPV3_MSGCNT - s->queue_head;
> -        }
> -    } else {
> -
> -        /* we do not have any pending packets - we can use
> -        * the whole message vector linearly instead of using
> -        * it as a ring
> -        */
> -
> -        s->queue_head = 0;
> -        s->queue_tail = 0;
> -        target_count = MAX_L2TPV3_MSGCNT;
> -    }
> -
> -    msgvec = s->msgvec + s->queue_head;
> -    if (target_count > 0) {
> -        do {
> -            count = recvmmsg(
> -                s->fd,
> -                msgvec,
> -                target_count, MSG_DONTWAIT, NULL);
> -        } while ((count == -1) && (errno == EINTR));
> -        if (count < 0) {
> -            /* Recv error - we still need to flush packets here,
> -             * (re)set queue head to current position
> -             */
> -            count = 0;
> -        }
> -        s->queue_head = (s->queue_head + count) % MAX_L2TPV3_MSGCNT;
> -        s->queue_depth += count;
> -    }
> -    net_l2tpv3_process_queue(s);
> -}
> -
> -static void destroy_vector(struct mmsghdr *msgvec, int count, int iovcount)
> -{
> -    int i, j;
> -    struct iovec *iov;
> -    struct mmsghdr *cleanup = msgvec;
> -    if (cleanup) {
> -        for (i = 0; i < count; i++) {
> -            if (cleanup->msg_hdr.msg_iov) {
> -                iov = cleanup->msg_hdr.msg_iov;
> -                for (j = 0; j < iovcount; j++) {
> -                    g_free(iov->iov_base);
> -                    iov++;
> -                }
> -                g_free(cleanup->msg_hdr.msg_iov);
> -            }
> -            cleanup++;
> -        }
> -        g_free(msgvec);
> -    }
> -}
> -
> -static struct mmsghdr *build_l2tpv3_vector(NetL2TPV3State *s, int count)
> -{
> -    int i;
> -    struct iovec *iov;
> -    struct mmsghdr *msgvec, *result;
> -
> -    msgvec = g_new(struct mmsghdr, count);
> -    result = msgvec;
> -    for (i = 0; i < count ; i++) {
> -        msgvec->msg_hdr.msg_name = NULL;
> -        msgvec->msg_hdr.msg_namelen = 0;
> -        iov =  g_new(struct iovec, IOVSIZE);
> -        msgvec->msg_hdr.msg_iov = iov;
> -        iov->iov_base = g_malloc(s->header_size);
> -        iov->iov_len = s->header_size;
> -        iov++ ;
> -        iov->iov_base = qemu_memalign(BUFFER_ALIGN, BUFFER_SIZE);
> -        iov->iov_len = BUFFER_SIZE;
> -        msgvec->msg_hdr.msg_iovlen = 2;
> -        msgvec->msg_hdr.msg_control = NULL;
> -        msgvec->msg_hdr.msg_controllen = 0;
> -        msgvec->msg_hdr.msg_flags = 0;
> -        msgvec++;
> -    }
> -    return result;
> -}
> -
> -static void net_l2tpv3_cleanup(NetClientState *nc)
> -{
> -    NetL2TPV3State *s = DO_UPCAST(NetL2TPV3State, nc, nc);
> -    qemu_purge_queued_packets(nc);
> -    l2tpv3_read_poll(s, false);
> -    l2tpv3_write_poll(s, false);
> -    if (s->fd >= 0) {
> -        close(s->fd);
> -    }
> -    destroy_vector(s->msgvec, MAX_L2TPV3_MSGCNT, IOVSIZE);
> -    g_free(s->vec);
> -    g_free(s->header_buf);
> -    g_free(s->dgram_dst);
> -}
> -
> -static NetClientInfo net_l2tpv3_info = {
> -    .type = NET_CLIENT_DRIVER_L2TPV3,
> -    .size = sizeof(NetL2TPV3State),
> -    .receive = net_l2tpv3_receive_dgram,
> -    .receive_iov = net_l2tpv3_receive_dgram_iov,
> -    .poll = l2tpv3_poll,
> -    .cleanup = net_l2tpv3_cleanup,
> -};
> -
>   int net_init_l2tpv3(const Netdev *netdev,
>                       const char *name,
>                       NetClientState *peer, Error **errp)
>   {
>       /* FIXME error_setg(errp, ...) on failure */
>       const NetdevL2TPv3Options *l2tpv3;
> -    NetL2TPV3State *s;
> +    NetUnifiedState *s;
>       NetClientState *nc;
> +    L2TPV3TunnelParams *p;
> +
>       int fd = -1, gairet;
>       struct addrinfo hints;
>       struct addrinfo *result = NULL;
>       char *srcport, *dstport;
>   
> -    nc = qemu_new_net_client(&net_l2tpv3_info, peer, "l2tpv3", name);
> +    nc = qemu_new_unified_net_client(name, peer);
> +
> +    s = DO_UPCAST(NetUnifiedState, nc, nc);
> +
> +    p = g_malloc(sizeof(L2TPV3TunnelParams));

Where was this freed?

>   
> -    s = DO_UPCAST(NetL2TPV3State, nc, nc);
> +    s->params = p;
>   
> +    s->form_header = &l2tpv3_form_header;
> +    s->verify_header = &l2tpv3_verify_header;
>       s->queue_head = 0;
>       s->queue_tail = 0;
>       s->header_mismatch = false;

Why not move all above into qemu_new_unified_net()?

> @@ -549,9 +188,9 @@ int net_init_l2tpv3(const Netdev *netdev,
>       l2tpv3 = &netdev->u.l2tpv3;
>   
>       if (l2tpv3->has_ipv6 && l2tpv3->ipv6) {
> -        s->ipv6 = l2tpv3->ipv6;
> +        p->ipv6 = l2tpv3->ipv6;
>       } else {
> -        s->ipv6 = false;
> +        p->ipv6 = false;
>       }
>   
>       if ((l2tpv3->has_offset) && (l2tpv3->offset > 256)) {
> @@ -561,22 +200,22 @@ int net_init_l2tpv3(const Netdev *netdev,
>   
>       if (l2tpv3->has_rxcookie || l2tpv3->has_txcookie) {
>           if (l2tpv3->has_rxcookie && l2tpv3->has_txcookie) {
> -            s->cookie = true;
> +            p->cookie = true;
>           } else {
>               goto outerr;
>           }
>       } else {
> -        s->cookie = false;
> +        p->cookie = false;
>       }
>   
>       if (l2tpv3->has_cookie64 || l2tpv3->cookie64) {
> -        s->cookie_is_64  = true;
> +        p->cookie_is_64  = true;
>       } else {
> -        s->cookie_is_64  = false;
> +        p->cookie_is_64  = false;
>       }
>   
>       if (l2tpv3->has_udp && l2tpv3->udp) {
> -        s->udp = true;
> +        p->udp = true;
>           if (!(l2tpv3->has_srcport && l2tpv3->has_dstport)) {
>               error_report("l2tpv3_open : need both src and dst port for udp");
>               goto outerr;
> @@ -585,52 +224,52 @@ int net_init_l2tpv3(const Netdev *netdev,
>               dstport = l2tpv3->dstport;
>           }
>       } else {
> -        s->udp = false;
> +        p->udp = false;
>           srcport = NULL;
>           dstport = NULL;
>       }
>   
>   
>       s->offset = 4;
> -    s->session_offset = 0;
> -    s->cookie_offset = 4;
> -    s->counter_offset = 4;
> +    p->session_offset = 0;
> +    p->cookie_offset = 4;
> +    p->counter_offset = 4;
>   
> -    s->tx_session = l2tpv3->txsession;
> +    p->tx_session = l2tpv3->txsession;
>       if (l2tpv3->has_rxsession) {
> -        s->rx_session = l2tpv3->rxsession;
> +        p->rx_session = l2tpv3->rxsession;
>       } else {
> -        s->rx_session = s->tx_session;
> +        p->rx_session = p->tx_session;
>       }
>   
> -    if (s->cookie) {
> -        s->rx_cookie = l2tpv3->rxcookie;
> -        s->tx_cookie = l2tpv3->txcookie;
> -        if (s->cookie_is_64 == true) {
> +    if (p->cookie) {
> +        p->rx_cookie = l2tpv3->rxcookie;
> +        p->tx_cookie = l2tpv3->txcookie;
> +        if (p->cookie_is_64 == true) {
>               /* 64 bit cookie */
>               s->offset += 8;
> -            s->counter_offset += 8;
> +            p->counter_offset += 8;
>           } else {
>               /* 32 bit cookie */
>               s->offset += 4;
> -            s->counter_offset += 4;
> +            p->counter_offset += 4;
>           }
>       }
>   
>       memset(&hints, 0, sizeof(hints));
>   
> -    if (s->ipv6) {
> +    if (p->ipv6) {
>           hints.ai_family = AF_INET6;
>       } else {
>           hints.ai_family = AF_INET;
>       }
> -    if (s->udp) {
> +    if (p->udp) {
>           hints.ai_socktype = SOCK_DGRAM;
>           hints.ai_protocol = 0;
>           s->offset += 4;
> -        s->counter_offset += 4;
> -        s->session_offset += 4;
> -        s->cookie_offset += 4;
> +        p->counter_offset += 4;
> +        p->session_offset += 4;
> +        p->cookie_offset += 4;
>       } else {
>           hints.ai_socktype = SOCK_RAW;
>           hints.ai_protocol = IPPROTO_L2TP;
> @@ -661,12 +300,12 @@ int net_init_l2tpv3(const Netdev *netdev,
>   
>       memset(&hints, 0, sizeof(hints));
>   
> -    if (s->ipv6) {
> +    if (p->ipv6) {
>           hints.ai_family = AF_INET6;
>       } else {
>           hints.ai_family = AF_INET;
>       }
> -    if (s->udp) {
> +    if (p->udp) {
>           hints.ai_socktype = SOCK_DGRAM;
>           hints.ai_protocol = 0;
>       } else {
> @@ -693,17 +332,17 @@ int net_init_l2tpv3(const Netdev *netdev,
>       }
>   
>       if (l2tpv3->has_counter && l2tpv3->counter) {
> -        s->has_counter = true;
> +        p->has_counter = true;
>           s->offset += 4;
>       } else {
> -        s->has_counter = false;
> +        p->has_counter = false;
>       }
>   
>       if (l2tpv3->has_pincounter && l2tpv3->pincounter) {
> -        s->has_counter = true;  /* pin counter implies that there is counter */
> -        s->pin_counter = true;
> +        p->has_counter = true;  /* pin counter implies that there is counter */
> +        p->pin_counter = true;
>       } else {
> -        s->pin_counter = false;
> +        p->pin_counter = false;
>       }
>   
>       if (l2tpv3->has_offset) {
> @@ -711,22 +350,14 @@ int net_init_l2tpv3(const Netdev *netdev,
>           s->offset += l2tpv3->offset;
>       }
>   
> -    if ((s->ipv6) || (s->udp)) {
> +    if ((p->ipv6) || (p->udp)) {
>           s->header_size = s->offset;
>       } else {
>           s->header_size = s->offset + sizeof(struct iphdr);
>       }
>   
> -    s->msgvec = build_l2tpv3_vector(s, MAX_L2TPV3_MSGCNT);
> -    s->vec = g_new(struct iovec, MAX_L2TPV3_IOVCNT);
> -    s->header_buf = g_malloc(s->header_size);
> -
> -    qemu_set_nonblock(fd);
> -
> -    s->fd = fd;
> -    s->counter = 0;
> -
> -    l2tpv3_read_poll(s, true);
> +    qemu_net_finalize_unified_init(s, fd);
> +    p->counter = 0;
>   
>       snprintf(s->nc.info_str, sizeof(s->nc.info_str),
>                "l2tpv3: connected");
> diff --git a/net/net.c b/net/net.c
> index 6235aabed8..9270b52ac8 100644
> --- a/net/net.c
> +++ b/net/net.c
> @@ -959,8 +959,8 @@ static int (* const net_client_init_fun[NET_CLIENT_DRIVER__MAX])(
>   #ifdef CONFIG_VHOST_NET_USED
>           [NET_CLIENT_DRIVER_VHOST_USER] = net_init_vhost_user,
>   #endif
> -#ifdef CONFIG_L2TPV3
> -        [NET_CLIENT_DRIVER_L2TPV3]    = net_init_l2tpv3,
> +#ifdef CONFIG_UNIFIED
> +        [NET_CLIENT_DRIVER_L2TPV3] = net_init_l2tpv3,
>   #endif
>   };
>   
> diff --git a/net/unified.c b/net/unified.c

Not a native speaker, but I think we need a better name here e.g udst 
which is short for Unified Datagram Socket Transport?

> new file mode 100644
> index 0000000000..f15d1e1eed
> --- /dev/null
> +++ b/net/unified.c
> @@ -0,0 +1,406 @@
> +/*
> + * QEMU System Emulator
> + *
> + * Copyright (c) 2015-2017 Cambridge Greys Limited
> + * Copyright (c) 2012-2014 Cisco Systems
> + * Copyright (c) 2003-2008 Fabrice Bellard
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a copy
> + * of this software and associated documentation files (the "Software"), to deal
> + * in the Software without restriction, including without limitation the rights
> + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
> + * copies of the Software, and to permit persons to whom the Software is
> + * furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice shall be included in
> + * all copies or substantial portions of the Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
> + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
> + * THE SOFTWARE.
> + */
> +
> +#include "qemu/osdep.h"
> +#include <linux/ip.h>
> +#include <netdb.h>
> +#include "net/net.h"
> +#include "clients.h"
> +#include "qemu-common.h"
> +#include "qemu/error-report.h"
> +#include "qemu/option.h"
> +#include "qemu/sockets.h"
> +#include "qemu/iov.h"
> +#include "qemu/main-loop.h"
> +#include "unified.h"
> +
> +static void net_unified_send(void *opaque);
> +static void unified_writable(void *opaque);
> +
> +static void unified_update_fd_handler(NetUnifiedState *s)
> +{
> +    qemu_set_fd_handler(s->fd,
> +                        s->read_poll ? net_unified_send : NULL,
> +                        s->write_poll ? unified_writable : NULL,
> +                        s);
> +}
> +
> +static void unified_read_poll(NetUnifiedState *s, bool enable)
> +{
> +    if (s->read_poll != enable) {
> +        s->read_poll = enable;
> +        unified_update_fd_handler(s);
> +    }
> +}
> +
> +static void unified_write_poll(NetUnifiedState *s, bool enable)
> +{
> +    if (s->write_poll != enable) {
> +        s->write_poll = enable;
> +        unified_update_fd_handler(s);
> +    }
> +}
> +
> +static void unified_writable(void *opaque)
> +{
> +    NetUnifiedState *s = opaque;
> +    unified_write_poll(s, false);
> +    qemu_flush_queued_packets(&s->nc);
> +}
> +
> +static void unified_send_completed(NetClientState *nc, ssize_t len)
> +{
> +    NetUnifiedState *s = DO_UPCAST(NetUnifiedState, nc, nc);
> +    unified_read_poll(s, true);
> +}
> +
> +static void unified_poll(NetClientState *nc, bool enable)
> +{
> +    NetUnifiedState *s = DO_UPCAST(NetUnifiedState, nc, nc);
> +    unified_write_poll(s, enable);
> +    unified_read_poll(s, enable);
> +}
> +
> +static ssize_t net_unified_receive_dgram_iov(NetClientState *nc,
> +                    const struct iovec *iov,
> +                    int iovcnt)
> +{
> +    NetUnifiedState *s = DO_UPCAST(NetUnifiedState, nc, nc);
> +
> +    struct msghdr message;
> +    int ret;
> +
> +    if (iovcnt > MAX_UNIFIED_IOVCNT - 1) {
> +        error_report(
> +            "iovec too long %d > %d, change unified.h",
> +            iovcnt, MAX_UNIFIED_IOVCNT
> +        );
> +        return -1;
> +    }
> +    if (s->offset > 0) {

net_l2tpv3_receive_dgram_iov() does not have this check. I guess it 
s->offset=0 will be used by other transport. Maybe it's better to delay 
this change until is has a real user or add a comment here.

> +        s->form_header(s);
> +        memcpy(s->vec + 1, iov, iovcnt * sizeof(struct iovec));
> +        s->vec->iov_base = s->header_buf;
> +        s->vec->iov_len = s->offset;
> +        message.msg_iovlen = iovcnt + 1;
> +    } else {
> +        memcpy(s->vec, iov, iovcnt * sizeof(struct iovec));
> +        message.msg_iovlen = iovcnt;
> +    }
> +    message.msg_name = s->dgram_dst;
> +    message.msg_namelen = s->dst_size;
> +    message.msg_iov = s->vec;
> +    message.msg_control = NULL;
> +    message.msg_controllen = 0;
> +    message.msg_flags = 0;
> +    do {
> +        ret = sendmsg(s->fd, &message, 0);
> +    } while ((ret == -1) && (errno == EINTR));
> +    if (ret > 0) {
> +        ret -= s->offset;
> +    } else if (ret == 0) {
> +        /* belt and braces - should not occur on DGRAM
> +        * we should get an error and never a 0 send
> +        */
> +        ret = iov_size(iov, iovcnt);
> +    } else {
> +        /* signal upper layer that socket buffer is full */
> +        ret = -errno;
> +        if (ret == -EAGAIN || ret == -ENOBUFS) {
> +            unified_write_poll(s, true);
> +            ret = 0;
> +        }
> +    }
> +    return ret;
> +}
> +
> +static ssize_t net_unified_receive_dgram(NetClientState *nc,
> +                    const uint8_t *buf,
> +                    size_t size)
> +{
> +    NetUnifiedState *s = DO_UPCAST(NetUnifiedState, nc, nc);
> +
> +    struct iovec *vec;
> +    struct msghdr message;
> +    ssize_t ret = 0;
> +
> +    vec = s->vec;
> +    if (s->offset > 0) {
> +        s->form_header(s);
> +        vec->iov_base = s->header_buf;
> +        vec->iov_len = s->offset;
> +        message.msg_iovlen = 2;
> +        vec++;
> +    } else {
> +        message.msg_iovlen = 1;
> +    }
> +    vec->iov_base = (void *) buf;
> +    vec->iov_len = size;
> +    message.msg_name = s->dgram_dst;
> +    message.msg_namelen = s->dst_size;
> +    message.msg_iov = s->vec;
> +    message.msg_control = NULL;
> +    message.msg_controllen = 0;
> +    message.msg_flags = 0;
> +    do {
> +        ret = sendmsg(s->fd, &message, 0);
> +    } while ((ret == -1) && (errno == EINTR));
> +    if (ret > 0) {
> +        ret -= s->offset;
> +    } else if (ret == 0) {
> +        /* belt and braces - should not occur on DGRAM
> +        * we should get an error and never a 0 send
> +        */
> +        ret = size;
> +    } else {
> +        ret = -errno;
> +        if (ret == -EAGAIN || ret == -ENOBUFS) {
> +            /* signal upper layer that socket buffer is full */
> +            unified_write_poll(s, true);
> +            ret = 0;
> +        }
> +    }
> +    return ret;
> +}
> +
> +
> +static void net_unified_process_queue(NetUnifiedState *s)
> +{
> +    int size = 0;
> +    struct iovec *vec;
> +    bool bad_read;
> +    int data_size;
> +    struct mmsghdr *msgvec;
> +
> +    /* go into ring mode only if there is a "pending" tail */
> +    if (s->queue_depth > 0) {
> +        do {
> +            msgvec = s->msgvec + s->queue_tail;
> +            if (msgvec->msg_len > 0) {
> +                data_size = msgvec->msg_len - s->header_size;
> +                vec = msgvec->msg_hdr.msg_iov;
> +                if ((data_size > 0) &&
> +                    (s->verify_header(s, vec->iov_base) == 0)) {
> +                    if (s->header_size > 0) {
> +                        vec++;
> +                    }
> +                    /* Use the legacy delivery for now, we will
> +                     * switch to using our own ring as a queueing mechanism
> +                     * at a later date
> +                     */
> +                    size = qemu_send_packet_async(
> +                            &s->nc,
> +                            vec->iov_base,
> +                            data_size,
> +                            unified_send_completed
> +                        );
> +                    if (size == 0) {
> +                        unified_read_poll(s, false);
> +                    }
> +                    bad_read = false;
> +                } else {
> +                    bad_read = true;
> +                    if (!s->header_mismatch) {
> +                        /* report error only once */
> +                        error_report("unified header verification failed");
> +                        s->header_mismatch = true;
> +                    }
> +                }
> +            } else {
> +                bad_read = true;
> +            }
> +            s->queue_tail = (s->queue_tail + 1) % MAX_UNIFIED_MSGCNT;
> +            s->queue_depth--;
> +        } while (
> +                (s->queue_depth > 0) &&
> +                 qemu_can_send_packet(&s->nc) &&
> +                ((size > 0) || bad_read)
> +            );
> +    }
> +}
> +
> +static void net_unified_send(void *opaque)
> +{
> +    NetUnifiedState *s = opaque;
> +    int target_count, count;
> +    struct mmsghdr *msgvec;
> +
> +    /* go into ring mode only if there is a "pending" tail */
> +
> +    if (s->queue_depth) {
> +
> +        /* The ring buffer we use has variable intake
> +         * count of how much we can read varies - adjust accordingly
> +         */
> +
> +        target_count = MAX_UNIFIED_MSGCNT - s->queue_depth;
> +
> +        /* Ensure we do not overrun the ring when we have
> +         * a lot of enqueued packets
> +         */
> +
> +        if (s->queue_head + target_count > MAX_UNIFIED_MSGCNT) {
> +            target_count = MAX_UNIFIED_MSGCNT - s->queue_head;
> +        }
> +    } else {
> +
> +        /* we do not have any pending packets - we can use
> +        * the whole message vector linearly instead of using
> +        * it as a ring
> +        */
> +
> +        s->queue_head = 0;
> +        s->queue_tail = 0;
> +        target_count = MAX_UNIFIED_MSGCNT;
> +    }
> +
> +    msgvec = s->msgvec + s->queue_head;
> +    if (target_count > 0) {
> +        do {
> +            count = recvmmsg(
> +                s->fd,
> +                msgvec,
> +                target_count, MSG_DONTWAIT, NULL);
> +        } while ((count == -1) && (errno == EINTR));
> +        if (count < 0) {
> +            /* Recv error - we still need to flush packets here,
> +             * (re)set queue head to current position
> +             */
> +            count = 0;
> +        }
> +        s->queue_head = (s->queue_head + count) % MAX_UNIFIED_MSGCNT;
> +        s->queue_depth += count;
> +    }
> +    net_unified_process_queue(s);
> +}
> +
> +static void destroy_vector(struct mmsghdr *msgvec, int count, int iovcount)
> +{
> +    int i, j;
> +    struct iovec *iov;
> +    struct mmsghdr *cleanup = msgvec;
> +    if (cleanup) {
> +        for (i = 0; i < count; i++) {
> +            if (cleanup->msg_hdr.msg_iov) {
> +                iov = cleanup->msg_hdr.msg_iov;
> +                for (j = 0; j < iovcount; j++) {
> +                    g_free(iov->iov_base);
> +                    iov++;
> +                }
> +                g_free(cleanup->msg_hdr.msg_iov);
> +            }
> +            cleanup++;
> +        }
> +        g_free(msgvec);
> +    }
> +}
> +
> +
> +
> +static struct mmsghdr *build_unified_vector(NetUnifiedState *s, int count)
> +{
> +    int i;
> +    struct iovec *iov;
> +    struct mmsghdr *msgvec, *result;
> +
> +    msgvec = g_new(struct mmsghdr, count);
> +    result = msgvec;
> +    for (i = 0; i < count ; i++) {
> +        msgvec->msg_hdr.msg_name = NULL;
> +        msgvec->msg_hdr.msg_namelen = 0;
> +        iov =  g_new(struct iovec, IOVSIZE);
> +        msgvec->msg_hdr.msg_iov = iov;
> +        if (s->header_size > 0) {

Same here.

> +            iov->iov_base = g_malloc(s->header_size);
> +            iov->iov_len = s->header_size;
> +            iov++ ;
> +        }
> +        iov->iov_base = qemu_memalign(BUFFER_ALIGN, BUFFER_SIZE);
> +        iov->iov_len = BUFFER_SIZE;
> +        msgvec->msg_hdr.msg_iovlen = 2;
> +        msgvec->msg_hdr.msg_control = NULL;
> +        msgvec->msg_hdr.msg_controllen = 0;
> +        msgvec->msg_hdr.msg_flags = 0;
> +        msgvec++;
> +    }
> +    return result;
> +}
> +
> +static void net_unified_cleanup(NetClientState *nc)
> +{
> +    NetUnifiedState *s = DO_UPCAST(NetUnifiedState, nc, nc);
> +    qemu_purge_queued_packets(nc);
> +    unified_read_poll(s, false);
> +    unified_write_poll(s, false);
> +    if (s->fd >= 0) {
> +        close(s->fd);
> +    }
> +    if (s->header_size > 0) {
> +        destroy_vector(s->msgvec, MAX_UNIFIED_MSGCNT, IOVSIZE);
> +    } else {
> +        destroy_vector(s->msgvec, MAX_UNIFIED_MSGCNT, 1);
> +    }
> +    g_free(s->vec);
> +    if (s->header_buf != NULL) {
> +        g_free(s->header_buf);
> +    }
> +    if (s->dgram_dst != NULL) {
> +        g_free(s->dgram_dst);
> +    }
> +}
> +
> +static NetClientInfo net_unified_info = {
> +    /* we share this one for all types for now, wrong I know :) */
> +    .type = NET_CLIENT_DRIVER_L2TPV3,

Like I said above, better to have transport specific type.

Thanks

> +    .size = sizeof(NetUnifiedState),
> +    .receive = net_unified_receive_dgram,
> +    .receive_iov = net_unified_receive_dgram_iov,
> +    .poll = unified_poll,
> +    .cleanup = net_unified_cleanup,
> +};
> +
> +NetClientState *qemu_new_unified_net_client(const char *name,
> +                    NetClientState *peer) {
> +    return qemu_new_net_client(&net_unified_info, peer, "unified", name);
> +}
> +
> +void qemu_net_finalize_unified_init(NetUnifiedState *s, int fd)
> +{
> +
> +    s->msgvec = build_unified_vector(s, MAX_UNIFIED_MSGCNT);
> +    s->vec = g_new(struct iovec, MAX_UNIFIED_IOVCNT);
> +    if (s->header_size > 0) {
> +        s->header_buf = g_malloc(s->header_size);
> +    } else {
> +        s->header_buf = NULL;
> +    }
> +    qemu_set_nonblock(fd);
> +
> +    s->fd = fd;
> +    unified_read_poll(s, true);
> +
> +}
> +
> diff --git a/net/unified.h b/net/unified.h
> new file mode 100644
> index 0000000000..97ec743f0e
> --- /dev/null
> +++ b/net/unified.h
> @@ -0,0 +1,118 @@
> +/*
> + * QEMU System Emulator
> + *
> + * Copyright (c) 2015-2017 Cambridge Greys Limited
> + * Copyright (c) 2012-2014 Cisco Systems
> + * Copyright (c) 2003-2008 Fabrice Bellard
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a copy
> + * of this software and associated documentation files (the "Software"), to deal
> + * in the Software without restriction, including without limitation the rights
> + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
> + * copies of the Software, and to permit persons to whom the Software is
> + * furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice shall be included in
> + * all copies or substantial portions of the Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
> + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
> + * THE SOFTWARE.
> + */
> +
> +#include "qemu/osdep.h"
> +
> +
> +#define BUFFER_ALIGN sysconf(_SC_PAGESIZE)
> +#define BUFFER_SIZE 2048
> +#define IOVSIZE 2
> +#define MAX_UNIFIED_MSGCNT 64
> +#define MAX_UNIFIED_IOVCNT (MAX_UNIFIED_MSGCNT * IOVSIZE)
> +
> +#ifndef QEMU_NET_UNIFIED_H
> +#define QEMU_NET_UNIFIED_H
> +
> +typedef struct NetUnifiedState {
> +    NetClientState nc;
> +
> +    int fd;
> +
> +    /*
> +     * these are used for xmit - that happens packet a time
> +     * and for first sign of life packet (easier to parse that once)
> +     */
> +
> +    uint8_t *header_buf;
> +    struct iovec *vec;
> +
> +    /*
> +     * these are used for receive - try to "eat" up to 32 packets at a time
> +     */
> +
> +    struct mmsghdr *msgvec;
> +
> +    /*
> +     * peer address
> +     */
> +
> +    struct sockaddr_storage *dgram_dst;
> +    uint32_t dst_size;
> +
> +    /*
> +     * Internal Queue
> +     */
> +
> +    /*
> +    * DOS avoidance in error handling
> +    */
> +
> +    /* Easier to keep l2tpv3 specific */
> +
> +    bool header_mismatch;
> +
> +    /*
> +     *
> +     * Ring buffer handling
> +     *
> +     */
> +
> +    int queue_head;
> +    int queue_tail;
> +    int queue_depth;
> +
> +    /*
> +     * Offset to data - common for all protocols
> +     */
> +
> +    uint32_t offset;
> +
> +    /*
> +     * Header size - common for all protocols
> +     */
> +
> +    uint32_t header_size;
> +    /* Poll Control */
> +
> +    bool read_poll;
> +    bool write_poll;
> +
> +    /* Parameters */
> +
> +    void *params;
> +
> +    /* header forming functions */
> +
> +    int (*verify_header)(void *s, uint8_t *buf);
> +    void (*form_header)(void *s);
> +
> +} NetUnifiedState;
> +
> +extern NetClientState *qemu_new_unified_net_client(const char *name,
> +                    NetClientState *peer);
> +
> +extern void qemu_net_finalize_unified_init(NetUnifiedState *s, int fd);
> +#endif
Anton Ivanov July 19, 2017, 5:48 a.m. UTC | #2
On 19/07/17 06:39, Jason Wang wrote:
>
>
> On 2017年07月19日 01:08, anton.ivanov@cambridgegreys.com wrote:
>> From: Anton Ivanov <anton.ivanov@cambridgegreys.com>
>>
>> 1. Creates a common backend for socket transports using
>> recvmmsg().
>> 2. Migrates L2TPv3 to the new backend
>
> It would be better if you could further split out 2 from this patch.
>
>>
>> Signed-off-by: Anton Ivanov <anton.ivanov@cambridgegreys.com>
>> ---
>>   configure         |  10 +-
>>   net/Makefile.objs |   2 +-
>>   net/l2tpv3.c      | 531 
>> +++++++++---------------------------------------------
>>   net/net.c         |   4 +-
>>   net/unified.c     | 406 +++++++++++++++++++++++++++++++++++++++++
>>   net/unified.h     | 118 ++++++++++++
>>   6 files changed, 613 insertions(+), 458 deletions(-)
>>   create mode 100644 net/unified.c
>>   create mode 100644 net/unified.h
>>
>> diff --git a/configure b/configure
>> index a3f0522e8f..99a60b723c 100755
>> --- a/configure
>> +++ b/configure
>> @@ -1862,7 +1862,7 @@ if ! compile_object -Werror ; then
>>   fi
>>     ##########################################
>> -# L2TPV3 probe
>> +# UNIFIED probe
>>     cat > $TMPC <<EOF
>>   #include <sys/socket.h>
>> @@ -1870,9 +1870,9 @@ cat > $TMPC <<EOF
>>   int main(void) { return sizeof(struct mmsghdr); }
>>   EOF
>>   if compile_prog "" "" ; then
>> -  l2tpv3=yes
>> +  unified=yes
>>   else
>> -  l2tpv3=no
>> +  unified=no
>>   fi
>>     ##########################################
>> @@ -5458,8 +5458,8 @@ fi
>>   if test "$netmap" = "yes" ; then
>>     echo "CONFIG_NETMAP=y" >> $config_host_mak
>>   fi
>> -if test "$l2tpv3" = "yes" ; then
>> -  echo "CONFIG_L2TPV3=y" >> $config_host_mak
>> +if test "$unified" = "yes" ; then
>> +  echo "CONFIG_UNIFIED=y" >> $config_host_mak
>>   fi
>
> Could we keep l2tpv3 option?

The l2tpv3 test is actually a test for recvmmsg. If you can do one 
recvmmsg transport you can do all of them.

>
>>   if test "$cap_ng" = "yes" ; then
>>     echo "CONFIG_LIBCAP=y" >> $config_host_mak
>> diff --git a/net/Makefile.objs b/net/Makefile.objs
>> index 67ba5e26fb..8026ad778a 100644
>> --- a/net/Makefile.objs
>> +++ b/net/Makefile.objs
>> @@ -2,7 +2,7 @@ common-obj-y = net.o queue.o checksum.o util.o hub.o
>>   common-obj-y += socket.o
>>   common-obj-y += dump.o
>>   common-obj-y += eth.o
>> -common-obj-$(CONFIG_L2TPV3) += l2tpv3.o
>> +common-obj-$(CONFIG_UNIFIED) += l2tpv3.o unified.o
>>   common-obj-$(CONFIG_POSIX) += vhost-user.o
>>   common-obj-$(CONFIG_SLIRP) += slirp.o
>>   common-obj-$(CONFIG_VDE) += vde.o
>> diff --git a/net/l2tpv3.c b/net/l2tpv3.c
>> index 6745b78990..05413c9cbd 100644
>> --- a/net/l2tpv3.c
>> +++ b/net/l2tpv3.c
>> @@ -1,6 +1,7 @@
>>   /*
>>    * QEMU System Emulator
>>    *
>> + * Copyright (c) 2015-2017 Cambridge Greys Limited
>>    * Copyright (c) 2003-2008 Fabrice Bellard
>>    * Copyright (c) 2012-2014 Cisco Systems
>>    *
>> @@ -34,19 +35,9 @@
>>   #include "qemu/sockets.h"
>>   #include "qemu/iov.h"
>>   #include "qemu/main-loop.h"
>> +#include "unified.h"
>>     -/* The buffer size needs to be investigated for optimum numbers and
>> - * optimum means of paging in on different systems. This size is
>> - * chosen to be sufficient to accommodate one packet with some headers
>> - */
>> -
>> -#define BUFFER_ALIGN sysconf(_SC_PAGESIZE)
>> -#define BUFFER_SIZE 2048
>> -#define IOVSIZE 2
>> -#define MAX_L2TPV3_MSGCNT 64
>> -#define MAX_L2TPV3_IOVCNT (MAX_L2TPV3_MSGCNT * IOVSIZE)
>> -
>>   /* Header set to 0x30000 signifies a data packet */
>>     #define L2TPV3_DATA_PACKET 0x30000
>> @@ -57,31 +48,7 @@
>>   #define IPPROTO_L2TP 0x73
>>   #endif
>>   -typedef struct NetL2TPV3State {
>> -    NetClientState nc;
>> -    int fd;
>> -
>> -    /*
>> -     * these are used for xmit - that happens packet a time
>> -     * and for first sign of life packet (easier to parse that once)
>> -     */
>> -
>> -    uint8_t *header_buf;
>> -    struct iovec *vec;
>> -
>> -    /*
>> -     * these are used for receive - try to "eat" up to 32 packets at 
>> a time
>> -     */
>> -
>> -    struct mmsghdr *msgvec;
>> -
>> -    /*
>> -     * peer address
>> -     */
>> -
>> -    struct sockaddr_storage *dgram_dst;
>> -    uint32_t dst_size;
>> -
>> +typedef struct L2TPV3TunnelParams {
>>       /*
>>        * L2TPv3 parameters
>>        */
>> @@ -90,37 +57,8 @@ typedef struct NetL2TPV3State {
>>       uint64_t tx_cookie;
>>       uint32_t rx_session;
>>       uint32_t tx_session;
>> -    uint32_t header_size;
>>       uint32_t counter;
>>   -    /*
>> -    * DOS avoidance in error handling
>> -    */
>> -
>> -    bool header_mismatch;
>> -
>> -    /*
>> -     * Ring buffer handling
>> -     */
>> -
>> -    int queue_head;
>> -    int queue_tail;
>> -    int queue_depth;
>> -
>> -    /*
>> -     * Precomputed offsets
>> -     */
>> -
>> -    uint32_t offset;
>> -    uint32_t cookie_offset;
>> -    uint32_t counter_offset;
>> -    uint32_t session_offset;
>> -
>> -    /* Poll Control */
>> -
>> -    bool read_poll;
>> -    bool write_poll;
>> -
>>       /* Flags */
>>         bool ipv6;
>> @@ -130,189 +68,62 @@ typedef struct NetL2TPV3State {
>>       bool cookie;
>>       bool cookie_is_64;
>>   -} NetL2TPV3State;
>> -
>> -static void net_l2tpv3_send(void *opaque);
>> -static void l2tpv3_writable(void *opaque);
>> -
>> -static void l2tpv3_update_fd_handler(NetL2TPV3State *s)
>> -{
>> -    qemu_set_fd_handler(s->fd,
>> -                        s->read_poll ? net_l2tpv3_send : NULL,
>> -                        s->write_poll ? l2tpv3_writable : NULL,
>> -                        s);
>> -}
>> -
>> -static void l2tpv3_read_poll(NetL2TPV3State *s, bool enable)
>> -{
>> -    if (s->read_poll != enable) {
>> -        s->read_poll = enable;
>> -        l2tpv3_update_fd_handler(s);
>> -    }
>> -}
>> +    /* Precomputed L2TPV3 specific offsets */
>> +    uint32_t cookie_offset;
>> +    uint32_t counter_offset;
>> +    uint32_t session_offset;
>>   -static void l2tpv3_write_poll(NetL2TPV3State *s, bool enable)
>> -{
>> -    if (s->write_poll != enable) {
>> -        s->write_poll = enable;
>> -        l2tpv3_update_fd_handler(s);
>> -    }
>> -}
>> +} L2TPV3TunnelParams;
>>   -static void l2tpv3_writable(void *opaque)
>> -{
>> -    NetL2TPV3State *s = opaque;
>> -    l2tpv3_write_poll(s, false);
>> -    qemu_flush_queued_packets(&s->nc);
>> -}
>>   -static void l2tpv3_send_completed(NetClientState *nc, ssize_t len)
>> -{
>> -    NetL2TPV3State *s = DO_UPCAST(NetL2TPV3State, nc, nc);
>> -    l2tpv3_read_poll(s, true);
>> -}
>>   -static void l2tpv3_poll(NetClientState *nc, bool enable)
>> +static void l2tpv3_form_header(void *us)
>>   {
>> -    NetL2TPV3State *s = DO_UPCAST(NetL2TPV3State, nc, nc);
>> -    l2tpv3_write_poll(s, enable);
>> -    l2tpv3_read_poll(s, enable);
>> -}
>> +    NetUnifiedState *s = (NetUnifiedState *) us;
>> +    L2TPV3TunnelParams *p = (L2TPV3TunnelParams *) s->params;
>
> How about embedding NetUnifiedState into this structure and keep using 
> NetL2TPV3State? Then:
>
> -  's' could be kept and lots of lines of changes could be saved here 
> and l2tpv3_verify_header()
> -  each transport could have their own type instead of using 
> NET_CLIENT_DRIVER_L2TPV3

Good idea. I will try it and see how it pans out.

>
> ?
>
>>   -static void l2tpv3_form_header(NetL2TPV3State *s)
>> -{
>>       uint32_t *counter;
>>   -    if (s->udp) {
>> +    if (p->udp) {
>>           stl_be_p((uint32_t *) s->header_buf, L2TPV3_DATA_PACKET);
>>       }
>>       stl_be_p(
>> -            (uint32_t *) (s->header_buf + s->session_offset),
>> -            s->tx_session
>> +            (uint32_t *) (s->header_buf + p->session_offset),
>> +            p->tx_session
>>           );
>> -    if (s->cookie) {
>> -        if (s->cookie_is_64) {
>> +    if (p->cookie) {
>> +        if (p->cookie_is_64) {
>>               stq_be_p(
>> -                (uint64_t *)(s->header_buf + s->cookie_offset),
>> -                s->tx_cookie
>> +                (uint64_t *)(s->header_buf + p->cookie_offset),
>> +                p->tx_cookie
>>               );
>>           } else {
>>               stl_be_p(
>> -                (uint32_t *) (s->header_buf + s->cookie_offset),
>> -                s->tx_cookie
>> +                (uint32_t *) (s->header_buf + p->cookie_offset),
>> +                p->tx_cookie
>>               );
>>           }
>>       }
>> -    if (s->has_counter) {
>> -        counter = (uint32_t *)(s->header_buf + s->counter_offset);
>> -        if (s->pin_counter) {
>> +    if (p->has_counter) {
>> +        counter = (uint32_t *)(s->header_buf + p->counter_offset);
>> +        if (p->pin_counter) {
>>               *counter = 0;
>>           } else {
>> -            stl_be_p(counter, ++s->counter);
>> -        }
>> -    }
>> -}
>> -
>> -static ssize_t net_l2tpv3_receive_dgram_iov(NetClientState *nc,
>> -                    const struct iovec *iov,
>> -                    int iovcnt)
>> -{
>> -    NetL2TPV3State *s = DO_UPCAST(NetL2TPV3State, nc, nc);
>> -
>> -    struct msghdr message;
>> -    int ret;
>> -
>> -    if (iovcnt > MAX_L2TPV3_IOVCNT - 1) {
>> -        error_report(
>> -            "iovec too long %d > %d, change l2tpv3.h",
>> -            iovcnt, MAX_L2TPV3_IOVCNT
>> -        );
>> -        return -1;
>> -    }
>> -    l2tpv3_form_header(s);
>> -    memcpy(s->vec + 1, iov, iovcnt * sizeof(struct iovec));
>> -    s->vec->iov_base = s->header_buf;
>> -    s->vec->iov_len = s->offset;
>> -    message.msg_name = s->dgram_dst;
>> -    message.msg_namelen = s->dst_size;
>> -    message.msg_iov = s->vec;
>> -    message.msg_iovlen = iovcnt + 1;
>> -    message.msg_control = NULL;
>> -    message.msg_controllen = 0;
>> -    message.msg_flags = 0;
>> -    do {
>> -        ret = sendmsg(s->fd, &message, 0);
>> -    } while ((ret == -1) && (errno == EINTR));
>> -    if (ret > 0) {
>> -        ret -= s->offset;
>> -    } else if (ret == 0) {
>> -        /* belt and braces - should not occur on DGRAM
>> -        * we should get an error and never a 0 send
>> -        */
>> -        ret = iov_size(iov, iovcnt);
>> -    } else {
>> -        /* signal upper layer that socket buffer is full */
>> -        ret = -errno;
>> -        if (ret == -EAGAIN || ret == -ENOBUFS) {
>> -            l2tpv3_write_poll(s, true);
>> -            ret = 0;
>> +            stl_be_p(counter, ++p->counter);
>>           }
>>       }
>> -    return ret;
>>   }
>>   -static ssize_t net_l2tpv3_receive_dgram(NetClientState *nc,
>> -                    const uint8_t *buf,
>> -                    size_t size)
>> -{
>> -    NetL2TPV3State *s = DO_UPCAST(NetL2TPV3State, nc, nc);
>> -
>> -    struct iovec *vec;
>> -    struct msghdr message;
>> -    ssize_t ret = 0;
>> -
>> -    l2tpv3_form_header(s);
>> -    vec = s->vec;
>> -    vec->iov_base = s->header_buf;
>> -    vec->iov_len = s->offset;
>> -    vec++;
>> -    vec->iov_base = (void *) buf;
>> -    vec->iov_len = size;
>> -    message.msg_name = s->dgram_dst;
>> -    message.msg_namelen = s->dst_size;
>> -    message.msg_iov = s->vec;
>> -    message.msg_iovlen = 2;
>> -    message.msg_control = NULL;
>> -    message.msg_controllen = 0;
>> -    message.msg_flags = 0;
>> -    do {
>> -        ret = sendmsg(s->fd, &message, 0);
>> -    } while ((ret == -1) && (errno == EINTR));
>> -    if (ret > 0) {
>> -        ret -= s->offset;
>> -    } else if (ret == 0) {
>> -        /* belt and braces - should not occur on DGRAM
>> -        * we should get an error and never a 0 send
>> -        */
>> -        ret = size;
>> -    } else {
>> -        ret = -errno;
>> -        if (ret == -EAGAIN || ret == -ENOBUFS) {
>> -            /* signal upper layer that socket buffer is full */
>> -            l2tpv3_write_poll(s, true);
>> -            ret = 0;
>> -        }
>> -    }
>> -    return ret;
>> -}
>>   -static int l2tpv3_verify_header(NetL2TPV3State *s, uint8_t *buf)
>> +static int l2tpv3_verify_header(void *us, uint8_t *buf)
>>   {
>>   +    NetUnifiedState *s = (NetUnifiedState *) us;
>> +    L2TPV3TunnelParams *p = (L2TPV3TunnelParams *) s->params;
>>       uint32_t *session;
>>       uint64_t cookie;
>>   -    if ((!s->udp) && (!s->ipv6)) {
>> +    if ((!p->udp) && (!p->ipv6)) {
>>           buf += sizeof(struct iphdr) /* fix for ipv4 raw */;
>>       }
>>   @@ -321,21 +132,21 @@ static int 
>> l2tpv3_verify_header(NetL2TPV3State *s, uint8_t *buf)
>>       * that anyway.
>>       */
>>   -    if (s->cookie) {
>> -        if (s->cookie_is_64) {
>> -            cookie = ldq_be_p(buf + s->cookie_offset);
>> +    if (p->cookie) {
>> +        if (p->cookie_is_64) {
>> +            cookie = ldq_be_p(buf + p->cookie_offset);
>>           } else {
>> -            cookie = ldl_be_p(buf + s->cookie_offset) & 0xffffffffULL;
>> +            cookie = ldl_be_p(buf + p->cookie_offset) & 0xffffffffULL;
>>           }
>> -        if (cookie != s->rx_cookie) {
>> +        if (cookie != p->rx_cookie) {
>>               if (!s->header_mismatch) {
>>                   error_report("unknown cookie id");
>>               }
>>               return -1;
>>           }
>>       }
>> -    session = (uint32_t *) (buf + s->session_offset);
>> -    if (ldl_be_p(session) != s->rx_session) {
>> +    session = (uint32_t *) (buf + p->session_offset);
>> +    if (ldl_be_p(session) != p->rx_session) {
>>           if (!s->header_mismatch) {
>>               error_report("session mismatch");
>>           }
>> @@ -344,203 +155,31 @@ static int l2tpv3_verify_header(NetL2TPV3State 
>> *s, uint8_t *buf)
>>       return 0;
>>   }
>>   -static void net_l2tpv3_process_queue(NetL2TPV3State *s)
>> -{
>> -    int size = 0;
>> -    struct iovec *vec;
>> -    bool bad_read;
>> -    int data_size;
>> -    struct mmsghdr *msgvec;
>> -
>> -    /* go into ring mode only if there is a "pending" tail */
>> -    if (s->queue_depth > 0) {
>> -        do {
>> -            msgvec = s->msgvec + s->queue_tail;
>> -            if (msgvec->msg_len > 0) {
>> -                data_size = msgvec->msg_len - s->header_size;
>> -                vec = msgvec->msg_hdr.msg_iov;
>> -                if ((data_size > 0) &&
>> -                    (l2tpv3_verify_header(s, vec->iov_base) == 0)) {
>> -                    vec++;
>> -                    /* Use the legacy delivery for now, we will
>> -                     * switch to using our own ring as a queueing 
>> mechanism
>> -                     * at a later date
>> -                     */
>> -                    size = qemu_send_packet_async(
>> -                            &s->nc,
>> -                            vec->iov_base,
>> -                            data_size,
>> -                            l2tpv3_send_completed
>> -                        );
>> -                    if (size == 0) {
>> -                        l2tpv3_read_poll(s, false);
>> -                    }
>> -                    bad_read = false;
>> -                } else {
>> -                    bad_read = true;
>> -                    if (!s->header_mismatch) {
>> -                        /* report error only once */
>> -                        error_report("l2tpv3 header verification 
>> failed");
>> -                        s->header_mismatch = true;
>> -                    }
>> -                }
>> -            } else {
>> -                bad_read = true;
>> -            }
>> -            s->queue_tail = (s->queue_tail + 1) % MAX_L2TPV3_MSGCNT;
>> -            s->queue_depth--;
>> -        } while (
>> -                (s->queue_depth > 0) &&
>> -                 qemu_can_send_packet(&s->nc) &&
>> -                ((size > 0) || bad_read)
>> -            );
>> -    }
>> -}
>> -
>> -static void net_l2tpv3_send(void *opaque)
>> -{
>> -    NetL2TPV3State *s = opaque;
>> -    int target_count, count;
>> -    struct mmsghdr *msgvec;
>> -
>> -    /* go into ring mode only if there is a "pending" tail */
>> -
>> -    if (s->queue_depth) {
>> -
>> -        /* The ring buffer we use has variable intake
>> -         * count of how much we can read varies - adjust accordingly
>> -         */
>> -
>> -        target_count = MAX_L2TPV3_MSGCNT - s->queue_depth;
>> -
>> -        /* Ensure we do not overrun the ring when we have
>> -         * a lot of enqueued packets
>> -         */
>> -
>> -        if (s->queue_head + target_count > MAX_L2TPV3_MSGCNT) {
>> -            target_count = MAX_L2TPV3_MSGCNT - s->queue_head;
>> -        }
>> -    } else {
>> -
>> -        /* we do not have any pending packets - we can use
>> -        * the whole message vector linearly instead of using
>> -        * it as a ring
>> -        */
>> -
>> -        s->queue_head = 0;
>> -        s->queue_tail = 0;
>> -        target_count = MAX_L2TPV3_MSGCNT;
>> -    }
>> -
>> -    msgvec = s->msgvec + s->queue_head;
>> -    if (target_count > 0) {
>> -        do {
>> -            count = recvmmsg(
>> -                s->fd,
>> -                msgvec,
>> -                target_count, MSG_DONTWAIT, NULL);
>> -        } while ((count == -1) && (errno == EINTR));
>> -        if (count < 0) {
>> -            /* Recv error - we still need to flush packets here,
>> -             * (re)set queue head to current position
>> -             */
>> -            count = 0;
>> -        }
>> -        s->queue_head = (s->queue_head + count) % MAX_L2TPV3_MSGCNT;
>> -        s->queue_depth += count;
>> -    }
>> -    net_l2tpv3_process_queue(s);
>> -}
>> -
>> -static void destroy_vector(struct mmsghdr *msgvec, int count, int 
>> iovcount)
>> -{
>> -    int i, j;
>> -    struct iovec *iov;
>> -    struct mmsghdr *cleanup = msgvec;
>> -    if (cleanup) {
>> -        for (i = 0; i < count; i++) {
>> -            if (cleanup->msg_hdr.msg_iov) {
>> -                iov = cleanup->msg_hdr.msg_iov;
>> -                for (j = 0; j < iovcount; j++) {
>> -                    g_free(iov->iov_base);
>> -                    iov++;
>> -                }
>> -                g_free(cleanup->msg_hdr.msg_iov);
>> -            }
>> -            cleanup++;
>> -        }
>> -        g_free(msgvec);
>> -    }
>> -}
>> -
>> -static struct mmsghdr *build_l2tpv3_vector(NetL2TPV3State *s, int 
>> count)
>> -{
>> -    int i;
>> -    struct iovec *iov;
>> -    struct mmsghdr *msgvec, *result;
>> -
>> -    msgvec = g_new(struct mmsghdr, count);
>> -    result = msgvec;
>> -    for (i = 0; i < count ; i++) {
>> -        msgvec->msg_hdr.msg_name = NULL;
>> -        msgvec->msg_hdr.msg_namelen = 0;
>> -        iov =  g_new(struct iovec, IOVSIZE);
>> -        msgvec->msg_hdr.msg_iov = iov;
>> -        iov->iov_base = g_malloc(s->header_size);
>> -        iov->iov_len = s->header_size;
>> -        iov++ ;
>> -        iov->iov_base = qemu_memalign(BUFFER_ALIGN, BUFFER_SIZE);
>> -        iov->iov_len = BUFFER_SIZE;
>> -        msgvec->msg_hdr.msg_iovlen = 2;
>> -        msgvec->msg_hdr.msg_control = NULL;
>> -        msgvec->msg_hdr.msg_controllen = 0;
>> -        msgvec->msg_hdr.msg_flags = 0;
>> -        msgvec++;
>> -    }
>> -    return result;
>> -}
>> -
>> -static void net_l2tpv3_cleanup(NetClientState *nc)
>> -{
>> -    NetL2TPV3State *s = DO_UPCAST(NetL2TPV3State, nc, nc);
>> -    qemu_purge_queued_packets(nc);
>> -    l2tpv3_read_poll(s, false);
>> -    l2tpv3_write_poll(s, false);
>> -    if (s->fd >= 0) {
>> -        close(s->fd);
>> -    }
>> -    destroy_vector(s->msgvec, MAX_L2TPV3_MSGCNT, IOVSIZE);
>> -    g_free(s->vec);
>> -    g_free(s->header_buf);
>> -    g_free(s->dgram_dst);
>> -}
>> -
>> -static NetClientInfo net_l2tpv3_info = {
>> -    .type = NET_CLIENT_DRIVER_L2TPV3,
>> -    .size = sizeof(NetL2TPV3State),
>> -    .receive = net_l2tpv3_receive_dgram,
>> -    .receive_iov = net_l2tpv3_receive_dgram_iov,
>> -    .poll = l2tpv3_poll,
>> -    .cleanup = net_l2tpv3_cleanup,
>> -};
>> -
>>   int net_init_l2tpv3(const Netdev *netdev,
>>                       const char *name,
>>                       NetClientState *peer, Error **errp)
>>   {
>>       /* FIXME error_setg(errp, ...) on failure */
>>       const NetdevL2TPv3Options *l2tpv3;
>> -    NetL2TPV3State *s;
>> +    NetUnifiedState *s;
>>       NetClientState *nc;
>> +    L2TPV3TunnelParams *p;
>> +
>>       int fd = -1, gairet;
>>       struct addrinfo hints;
>>       struct addrinfo *result = NULL;
>>       char *srcport, *dstport;
>>   -    nc = qemu_new_net_client(&net_l2tpv3_info, peer, "l2tpv3", name);
>> +    nc = qemu_new_unified_net_client(name, peer);
>> +
>> +    s = DO_UPCAST(NetUnifiedState, nc, nc);
>> +
>> +    p = g_malloc(sizeof(L2TPV3TunnelParams));
>
> Where was this freed?
>
>>   -    s = DO_UPCAST(NetL2TPV3State, nc, nc);
>> +    s->params = p;
>>   +    s->form_header = &l2tpv3_form_header;
>> +    s->verify_header = &l2tpv3_verify_header;
>>       s->queue_head = 0;
>>       s->queue_tail = 0;
>>       s->header_mismatch = false;
>
> Why not move all above into qemu_new_unified_net()?

Only queue head/tail assignment can move.

raw which uses same backend does not use header_mismatch. Form/verify 
header are different for each sub-transport. F.e. for gre you need the 
gre one, for raw you need the raw one, etc.

>
>> @@ -549,9 +188,9 @@ int net_init_l2tpv3(const Netdev *netdev,
>>       l2tpv3 = &netdev->u.l2tpv3;
>>         if (l2tpv3->has_ipv6 && l2tpv3->ipv6) {
>> -        s->ipv6 = l2tpv3->ipv6;
>> +        p->ipv6 = l2tpv3->ipv6;
>>       } else {
>> -        s->ipv6 = false;
>> +        p->ipv6 = false;
>>       }
>>         if ((l2tpv3->has_offset) && (l2tpv3->offset > 256)) {
>> @@ -561,22 +200,22 @@ int net_init_l2tpv3(const Netdev *netdev,
>>         if (l2tpv3->has_rxcookie || l2tpv3->has_txcookie) {
>>           if (l2tpv3->has_rxcookie && l2tpv3->has_txcookie) {
>> -            s->cookie = true;
>> +            p->cookie = true;
>>           } else {
>>               goto outerr;
>>           }
>>       } else {
>> -        s->cookie = false;
>> +        p->cookie = false;
>>       }
>>         if (l2tpv3->has_cookie64 || l2tpv3->cookie64) {
>> -        s->cookie_is_64  = true;
>> +        p->cookie_is_64  = true;
>>       } else {
>> -        s->cookie_is_64  = false;
>> +        p->cookie_is_64  = false;
>>       }
>>         if (l2tpv3->has_udp && l2tpv3->udp) {
>> -        s->udp = true;
>> +        p->udp = true;
>>           if (!(l2tpv3->has_srcport && l2tpv3->has_dstport)) {
>>               error_report("l2tpv3_open : need both src and dst port 
>> for udp");
>>               goto outerr;
>> @@ -585,52 +224,52 @@ int net_init_l2tpv3(const Netdev *netdev,
>>               dstport = l2tpv3->dstport;
>>           }
>>       } else {
>> -        s->udp = false;
>> +        p->udp = false;
>>           srcport = NULL;
>>           dstport = NULL;
>>       }
>>           s->offset = 4;
>> -    s->session_offset = 0;
>> -    s->cookie_offset = 4;
>> -    s->counter_offset = 4;
>> +    p->session_offset = 0;
>> +    p->cookie_offset = 4;
>> +    p->counter_offset = 4;
>>   -    s->tx_session = l2tpv3->txsession;
>> +    p->tx_session = l2tpv3->txsession;
>>       if (l2tpv3->has_rxsession) {
>> -        s->rx_session = l2tpv3->rxsession;
>> +        p->rx_session = l2tpv3->rxsession;
>>       } else {
>> -        s->rx_session = s->tx_session;
>> +        p->rx_session = p->tx_session;
>>       }
>>   -    if (s->cookie) {
>> -        s->rx_cookie = l2tpv3->rxcookie;
>> -        s->tx_cookie = l2tpv3->txcookie;
>> -        if (s->cookie_is_64 == true) {
>> +    if (p->cookie) {
>> +        p->rx_cookie = l2tpv3->rxcookie;
>> +        p->tx_cookie = l2tpv3->txcookie;
>> +        if (p->cookie_is_64 == true) {
>>               /* 64 bit cookie */
>>               s->offset += 8;
>> -            s->counter_offset += 8;
>> +            p->counter_offset += 8;
>>           } else {
>>               /* 32 bit cookie */
>>               s->offset += 4;
>> -            s->counter_offset += 4;
>> +            p->counter_offset += 4;
>>           }
>>       }
>>         memset(&hints, 0, sizeof(hints));
>>   -    if (s->ipv6) {
>> +    if (p->ipv6) {
>>           hints.ai_family = AF_INET6;
>>       } else {
>>           hints.ai_family = AF_INET;
>>       }
>> -    if (s->udp) {
>> +    if (p->udp) {
>>           hints.ai_socktype = SOCK_DGRAM;
>>           hints.ai_protocol = 0;
>>           s->offset += 4;
>> -        s->counter_offset += 4;
>> -        s->session_offset += 4;
>> -        s->cookie_offset += 4;
>> +        p->counter_offset += 4;
>> +        p->session_offset += 4;
>> +        p->cookie_offset += 4;
>>       } else {
>>           hints.ai_socktype = SOCK_RAW;
>>           hints.ai_protocol = IPPROTO_L2TP;
>> @@ -661,12 +300,12 @@ int net_init_l2tpv3(const Netdev *netdev,
>>         memset(&hints, 0, sizeof(hints));
>>   -    if (s->ipv6) {
>> +    if (p->ipv6) {
>>           hints.ai_family = AF_INET6;
>>       } else {
>>           hints.ai_family = AF_INET;
>>       }
>> -    if (s->udp) {
>> +    if (p->udp) {
>>           hints.ai_socktype = SOCK_DGRAM;
>>           hints.ai_protocol = 0;
>>       } else {
>> @@ -693,17 +332,17 @@ int net_init_l2tpv3(const Netdev *netdev,
>>       }
>>         if (l2tpv3->has_counter && l2tpv3->counter) {
>> -        s->has_counter = true;
>> +        p->has_counter = true;
>>           s->offset += 4;
>>       } else {
>> -        s->has_counter = false;
>> +        p->has_counter = false;
>>       }
>>         if (l2tpv3->has_pincounter && l2tpv3->pincounter) {
>> -        s->has_counter = true;  /* pin counter implies that there is 
>> counter */
>> -        s->pin_counter = true;
>> +        p->has_counter = true;  /* pin counter implies that there is 
>> counter */
>> +        p->pin_counter = true;
>>       } else {
>> -        s->pin_counter = false;
>> +        p->pin_counter = false;
>>       }
>>         if (l2tpv3->has_offset) {
>> @@ -711,22 +350,14 @@ int net_init_l2tpv3(const Netdev *netdev,
>>           s->offset += l2tpv3->offset;
>>       }
>>   -    if ((s->ipv6) || (s->udp)) {
>> +    if ((p->ipv6) || (p->udp)) {
>>           s->header_size = s->offset;
>>       } else {
>>           s->header_size = s->offset + sizeof(struct iphdr);
>>       }
>>   -    s->msgvec = build_l2tpv3_vector(s, MAX_L2TPV3_MSGCNT);
>> -    s->vec = g_new(struct iovec, MAX_L2TPV3_IOVCNT);
>> -    s->header_buf = g_malloc(s->header_size);
>> -
>> -    qemu_set_nonblock(fd);
>> -
>> -    s->fd = fd;
>> -    s->counter = 0;
>> -
>> -    l2tpv3_read_poll(s, true);
>> +    qemu_net_finalize_unified_init(s, fd);
>> +    p->counter = 0;
>>         snprintf(s->nc.info_str, sizeof(s->nc.info_str),
>>                "l2tpv3: connected");
>> diff --git a/net/net.c b/net/net.c
>> index 6235aabed8..9270b52ac8 100644
>> --- a/net/net.c
>> +++ b/net/net.c
>> @@ -959,8 +959,8 @@ static int (* const 
>> net_client_init_fun[NET_CLIENT_DRIVER__MAX])(
>>   #ifdef CONFIG_VHOST_NET_USED
>>           [NET_CLIENT_DRIVER_VHOST_USER] = net_init_vhost_user,
>>   #endif
>> -#ifdef CONFIG_L2TPV3
>> -        [NET_CLIENT_DRIVER_L2TPV3]    = net_init_l2tpv3,
>> +#ifdef CONFIG_UNIFIED
>> +        [NET_CLIENT_DRIVER_L2TPV3] = net_init_l2tpv3,
>>   #endif
>>   };
>>   diff --git a/net/unified.c b/net/unified.c
>
> Not a native speaker, but I think we need a better name here e.g udst 
> which is short for Unified Datagram Socket Transport?

I am not a native speaker either :)

I am OK - let's call it udst as this is more descriptive and this 
clearly delineates that you cannot
migrate tcp/socket to it.

>
>> new file mode 100644
>> index 0000000000..f15d1e1eed
>> --- /dev/null
>> +++ b/net/unified.c
>> @@ -0,0 +1,406 @@
>> +/*
>> + * QEMU System Emulator
>> + *
>> + * Copyright (c) 2015-2017 Cambridge Greys Limited
>> + * Copyright (c) 2012-2014 Cisco Systems
>> + * Copyright (c) 2003-2008 Fabrice Bellard
>> + *
>> + * Permission is hereby granted, free of charge, to any person 
>> obtaining a copy
>> + * of this software and associated documentation files (the 
>> "Software"), to deal
>> + * in the Software without restriction, including without limitation 
>> the rights
>> + * to use, copy, modify, merge, publish, distribute, sublicense, 
>> and/or sell
>> + * copies of the Software, and to permit persons to whom the 
>> Software is
>> + * furnished to do so, subject to the following conditions:
>> + *
>> + * The above copyright notice and this permission notice shall be 
>> included in
>> + * all copies or substantial portions of the Software.
>> + *
>> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
>> EXPRESS OR
>> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
>> MERCHANTABILITY,
>> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT 
>> SHALL
>> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES 
>> OR OTHER
>> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 
>> ARISING FROM,
>> + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 
>> DEALINGS IN
>> + * THE SOFTWARE.
>> + */
>> +
>> +#include "qemu/osdep.h"
>> +#include <linux/ip.h>
>> +#include <netdb.h>
>> +#include "net/net.h"
>> +#include "clients.h"
>> +#include "qemu-common.h"
>> +#include "qemu/error-report.h"
>> +#include "qemu/option.h"
>> +#include "qemu/sockets.h"
>> +#include "qemu/iov.h"
>> +#include "qemu/main-loop.h"
>> +#include "unified.h"
>> +
>> +static void net_unified_send(void *opaque);
>> +static void unified_writable(void *opaque);
>> +
>> +static void unified_update_fd_handler(NetUnifiedState *s)
>> +{
>> +    qemu_set_fd_handler(s->fd,
>> +                        s->read_poll ? net_unified_send : NULL,
>> +                        s->write_poll ? unified_writable : NULL,
>> +                        s);
>> +}
>> +
>> +static void unified_read_poll(NetUnifiedState *s, bool enable)
>> +{
>> +    if (s->read_poll != enable) {
>> +        s->read_poll = enable;
>> +        unified_update_fd_handler(s);
>> +    }
>> +}
>> +
>> +static void unified_write_poll(NetUnifiedState *s, bool enable)
>> +{
>> +    if (s->write_poll != enable) {
>> +        s->write_poll = enable;
>> +        unified_update_fd_handler(s);
>> +    }
>> +}
>> +
>> +static void unified_writable(void *opaque)
>> +{
>> +    NetUnifiedState *s = opaque;
>> +    unified_write_poll(s, false);
>> +    qemu_flush_queued_packets(&s->nc);
>> +}
>> +
>> +static void unified_send_completed(NetClientState *nc, ssize_t len)
>> +{
>> +    NetUnifiedState *s = DO_UPCAST(NetUnifiedState, nc, nc);
>> +    unified_read_poll(s, true);
>> +}
>> +
>> +static void unified_poll(NetClientState *nc, bool enable)
>> +{
>> +    NetUnifiedState *s = DO_UPCAST(NetUnifiedState, nc, nc);
>> +    unified_write_poll(s, enable);
>> +    unified_read_poll(s, enable);
>> +}
>> +
>> +static ssize_t net_unified_receive_dgram_iov(NetClientState *nc,
>> +                    const struct iovec *iov,
>> +                    int iovcnt)
>> +{
>> +    NetUnifiedState *s = DO_UPCAST(NetUnifiedState, nc, nc);
>> +
>> +    struct msghdr message;
>> +    int ret;
>> +
>> +    if (iovcnt > MAX_UNIFIED_IOVCNT - 1) {
>> +        error_report(
>> +            "iovec too long %d > %d, change unified.h",
>> +            iovcnt, MAX_UNIFIED_IOVCNT
>> +        );
>> +        return -1;
>> +    }
>> +    if (s->offset > 0) {
>
> net_l2tpv3_receive_dgram_iov() does not have this check. I guess it 
> s->offset=0 will be used by other transport. Maybe it's better to 
> delay this change until is has a real user or add a comment here.

The real user is in patch No 2. Raw.

>
>> +        s->form_header(s);
>> +        memcpy(s->vec + 1, iov, iovcnt * sizeof(struct iovec));
>> +        s->vec->iov_base = s->header_buf;
>> +        s->vec->iov_len = s->offset;
>> +        message.msg_iovlen = iovcnt + 1;
>> +    } else {
>> +        memcpy(s->vec, iov, iovcnt * sizeof(struct iovec));
>> +        message.msg_iovlen = iovcnt;
>> +    }
>> +    message.msg_name = s->dgram_dst;
>> +    message.msg_namelen = s->dst_size;
>> +    message.msg_iov = s->vec;
>> +    message.msg_control = NULL;
>> +    message.msg_controllen = 0;
>> +    message.msg_flags = 0;
>> +    do {
>> +        ret = sendmsg(s->fd, &message, 0);
>> +    } while ((ret == -1) && (errno == EINTR));
>> +    if (ret > 0) {
>> +        ret -= s->offset;
>> +    } else if (ret == 0) {
>> +        /* belt and braces - should not occur on DGRAM
>> +        * we should get an error and never a 0 send
>> +        */
>> +        ret = iov_size(iov, iovcnt);
>> +    } else {
>> +        /* signal upper layer that socket buffer is full */
>> +        ret = -errno;
>> +        if (ret == -EAGAIN || ret == -ENOBUFS) {
>> +            unified_write_poll(s, true);
>> +            ret = 0;
>> +        }
>> +    }
>> +    return ret;
>> +}
>> +
>> +static ssize_t net_unified_receive_dgram(NetClientState *nc,
>> +                    const uint8_t *buf,
>> +                    size_t size)
>> +{
>> +    NetUnifiedState *s = DO_UPCAST(NetUnifiedState, nc, nc);
>> +
>> +    struct iovec *vec;
>> +    struct msghdr message;
>> +    ssize_t ret = 0;
>> +
>> +    vec = s->vec;
>> +    if (s->offset > 0) {
>> +        s->form_header(s);
>> +        vec->iov_base = s->header_buf;
>> +        vec->iov_len = s->offset;
>> +        message.msg_iovlen = 2;
>> +        vec++;
>> +    } else {
>> +        message.msg_iovlen = 1;
>> +    }
>> +    vec->iov_base = (void *) buf;
>> +    vec->iov_len = size;
>> +    message.msg_name = s->dgram_dst;
>> +    message.msg_namelen = s->dst_size;
>> +    message.msg_iov = s->vec;
>> +    message.msg_control = NULL;
>> +    message.msg_controllen = 0;
>> +    message.msg_flags = 0;
>> +    do {
>> +        ret = sendmsg(s->fd, &message, 0);
>> +    } while ((ret == -1) && (errno == EINTR));
>> +    if (ret > 0) {
>> +        ret -= s->offset;
>> +    } else if (ret == 0) {
>> +        /* belt and braces - should not occur on DGRAM
>> +        * we should get an error and never a 0 send
>> +        */
>> +        ret = size;
>> +    } else {
>> +        ret = -errno;
>> +        if (ret == -EAGAIN || ret == -ENOBUFS) {
>> +            /* signal upper layer that socket buffer is full */
>> +            unified_write_poll(s, true);
>> +            ret = 0;
>> +        }
>> +    }
>> +    return ret;
>> +}
>> +
>> +
>> +static void net_unified_process_queue(NetUnifiedState *s)
>> +{
>> +    int size = 0;
>> +    struct iovec *vec;
>> +    bool bad_read;
>> +    int data_size;
>> +    struct mmsghdr *msgvec;
>> +
>> +    /* go into ring mode only if there is a "pending" tail */
>> +    if (s->queue_depth > 0) {
>> +        do {
>> +            msgvec = s->msgvec + s->queue_tail;
>> +            if (msgvec->msg_len > 0) {
>> +                data_size = msgvec->msg_len - s->header_size;
>> +                vec = msgvec->msg_hdr.msg_iov;
>> +                if ((data_size > 0) &&
>> +                    (s->verify_header(s, vec->iov_base) == 0)) {
>> +                    if (s->header_size > 0) {
>> +                        vec++;
>> +                    }
>> +                    /* Use the legacy delivery for now, we will
>> +                     * switch to using our own ring as a queueing 
>> mechanism
>> +                     * at a later date
>> +                     */
>> +                    size = qemu_send_packet_async(
>> +                            &s->nc,
>> +                            vec->iov_base,
>> +                            data_size,
>> +                            unified_send_completed
>> +                        );
>> +                    if (size == 0) {
>> +                        unified_read_poll(s, false);
>> +                    }
>> +                    bad_read = false;
>> +                } else {
>> +                    bad_read = true;
>> +                    if (!s->header_mismatch) {
>> +                        /* report error only once */
>> +                        error_report("unified header verification 
>> failed");
>> +                        s->header_mismatch = true;
>> +                    }
>> +                }
>> +            } else {
>> +                bad_read = true;
>> +            }
>> +            s->queue_tail = (s->queue_tail + 1) % MAX_UNIFIED_MSGCNT;
>> +            s->queue_depth--;
>> +        } while (
>> +                (s->queue_depth > 0) &&
>> +                 qemu_can_send_packet(&s->nc) &&
>> +                ((size > 0) || bad_read)
>> +            );
>> +    }
>> +}
>> +
>> +static void net_unified_send(void *opaque)
>> +{
>> +    NetUnifiedState *s = opaque;
>> +    int target_count, count;
>> +    struct mmsghdr *msgvec;
>> +
>> +    /* go into ring mode only if there is a "pending" tail */
>> +
>> +    if (s->queue_depth) {
>> +
>> +        /* The ring buffer we use has variable intake
>> +         * count of how much we can read varies - adjust accordingly
>> +         */
>> +
>> +        target_count = MAX_UNIFIED_MSGCNT - s->queue_depth;
>> +
>> +        /* Ensure we do not overrun the ring when we have
>> +         * a lot of enqueued packets
>> +         */
>> +
>> +        if (s->queue_head + target_count > MAX_UNIFIED_MSGCNT) {
>> +            target_count = MAX_UNIFIED_MSGCNT - s->queue_head;
>> +        }
>> +    } else {
>> +
>> +        /* we do not have any pending packets - we can use
>> +        * the whole message vector linearly instead of using
>> +        * it as a ring
>> +        */
>> +
>> +        s->queue_head = 0;
>> +        s->queue_tail = 0;
>> +        target_count = MAX_UNIFIED_MSGCNT;
>> +    }
>> +
>> +    msgvec = s->msgvec + s->queue_head;
>> +    if (target_count > 0) {
>> +        do {
>> +            count = recvmmsg(
>> +                s->fd,
>> +                msgvec,
>> +                target_count, MSG_DONTWAIT, NULL);
>> +        } while ((count == -1) && (errno == EINTR));
>> +        if (count < 0) {
>> +            /* Recv error - we still need to flush packets here,
>> +             * (re)set queue head to current position
>> +             */
>> +            count = 0;
>> +        }
>> +        s->queue_head = (s->queue_head + count) % MAX_UNIFIED_MSGCNT;
>> +        s->queue_depth += count;
>> +    }
>> +    net_unified_process_queue(s);
>> +}
>> +
>> +static void destroy_vector(struct mmsghdr *msgvec, int count, int 
>> iovcount)
>> +{
>> +    int i, j;
>> +    struct iovec *iov;
>> +    struct mmsghdr *cleanup = msgvec;
>> +    if (cleanup) {
>> +        for (i = 0; i < count; i++) {
>> +            if (cleanup->msg_hdr.msg_iov) {
>> +                iov = cleanup->msg_hdr.msg_iov;
>> +                for (j = 0; j < iovcount; j++) {
>> +                    g_free(iov->iov_base);
>> +                    iov++;
>> +                }
>> +                g_free(cleanup->msg_hdr.msg_iov);
>> +            }
>> +            cleanup++;
>> +        }
>> +        g_free(msgvec);
>> +    }
>> +}
>> +
>> +
>> +
>> +static struct mmsghdr *build_unified_vector(NetUnifiedState *s, int 
>> count)
>> +{
>> +    int i;
>> +    struct iovec *iov;
>> +    struct mmsghdr *msgvec, *result;
>> +
>> +    msgvec = g_new(struct mmsghdr, count);
>> +    result = msgvec;
>> +    for (i = 0; i < count ; i++) {
>> +        msgvec->msg_hdr.msg_name = NULL;
>> +        msgvec->msg_hdr.msg_namelen = 0;
>> +        iov =  g_new(struct iovec, IOVSIZE);
>> +        msgvec->msg_hdr.msg_iov = iov;
>> +        if (s->header_size > 0) {
>
> Same here.
>
>> +            iov->iov_base = g_malloc(s->header_size);
>> +            iov->iov_len = s->header_size;
>> +            iov++ ;
>> +        }
>> +        iov->iov_base = qemu_memalign(BUFFER_ALIGN, BUFFER_SIZE);
>> +        iov->iov_len = BUFFER_SIZE;
>> +        msgvec->msg_hdr.msg_iovlen = 2;
>> +        msgvec->msg_hdr.msg_control = NULL;
>> +        msgvec->msg_hdr.msg_controllen = 0;
>> +        msgvec->msg_hdr.msg_flags = 0;
>> +        msgvec++;
>> +    }
>> +    return result;
>> +}
>> +
>> +static void net_unified_cleanup(NetClientState *nc)
>> +{
>> +    NetUnifiedState *s = DO_UPCAST(NetUnifiedState, nc, nc);
>> +    qemu_purge_queued_packets(nc);
>> +    unified_read_poll(s, false);
>> +    unified_write_poll(s, false);
>> +    if (s->fd >= 0) {
>> +        close(s->fd);
>> +    }
>> +    if (s->header_size > 0) {
>> +        destroy_vector(s->msgvec, MAX_UNIFIED_MSGCNT, IOVSIZE);
>> +    } else {
>> +        destroy_vector(s->msgvec, MAX_UNIFIED_MSGCNT, 1);
>> +    }
>> +    g_free(s->vec);
>> +    if (s->header_buf != NULL) {
>> +        g_free(s->header_buf);
>> +    }
>> +    if (s->dgram_dst != NULL) {
>> +        g_free(s->dgram_dst);
>> +    }
>> +}
>> +
>> +static NetClientInfo net_unified_info = {
>> +    /* we share this one for all types for now, wrong I know :) */
>> +    .type = NET_CLIENT_DRIVER_L2TPV3,
>
> Like I said above, better to have transport specific type.

Agree. I will get on with it. I may need some help on how to introduce a 
transport which is not selectable by users (just used as backend for 
other transports) into the json schema.

It is now designed to produce "end-user-visible" options.

A.

>
> Thanks
>
>> +    .size = sizeof(NetUnifiedState),
>> +    .receive = net_unified_receive_dgram,
>> +    .receive_iov = net_unified_receive_dgram_iov,
>> +    .poll = unified_poll,
>> +    .cleanup = net_unified_cleanup,
>> +};
>> +
>> +NetClientState *qemu_new_unified_net_client(const char *name,
>> +                    NetClientState *peer) {
>> +    return qemu_new_net_client(&net_unified_info, peer, "unified", 
>> name);
>> +}
>> +
>> +void qemu_net_finalize_unified_init(NetUnifiedState *s, int fd)
>> +{
>> +
>> +    s->msgvec = build_unified_vector(s, MAX_UNIFIED_MSGCNT);
>> +    s->vec = g_new(struct iovec, MAX_UNIFIED_IOVCNT);
>> +    if (s->header_size > 0) {
>> +        s->header_buf = g_malloc(s->header_size);
>> +    } else {
>> +        s->header_buf = NULL;
>> +    }
>> +    qemu_set_nonblock(fd);
>> +
>> +    s->fd = fd;
>> +    unified_read_poll(s, true);
>> +
>> +}
>> +
>> diff --git a/net/unified.h b/net/unified.h
>> new file mode 100644
>> index 0000000000..97ec743f0e
>> --- /dev/null
>> +++ b/net/unified.h
>> @@ -0,0 +1,118 @@
>> +/*
>> + * QEMU System Emulator
>> + *
>> + * Copyright (c) 2015-2017 Cambridge Greys Limited
>> + * Copyright (c) 2012-2014 Cisco Systems
>> + * Copyright (c) 2003-2008 Fabrice Bellard
>> + *
>> + * Permission is hereby granted, free of charge, to any person 
>> obtaining a copy
>> + * of this software and associated documentation files (the 
>> "Software"), to deal
>> + * in the Software without restriction, including without limitation 
>> the rights
>> + * to use, copy, modify, merge, publish, distribute, sublicense, 
>> and/or sell
>> + * copies of the Software, and to permit persons to whom the 
>> Software is
>> + * furnished to do so, subject to the following conditions:
>> + *
>> + * The above copyright notice and this permission notice shall be 
>> included in
>> + * all copies or substantial portions of the Software.
>> + *
>> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
>> EXPRESS OR
>> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
>> MERCHANTABILITY,
>> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT 
>> SHALL
>> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES 
>> OR OTHER
>> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 
>> ARISING FROM,
>> + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 
>> DEALINGS IN
>> + * THE SOFTWARE.
>> + */
>> +
>> +#include "qemu/osdep.h"
>> +
>> +
>> +#define BUFFER_ALIGN sysconf(_SC_PAGESIZE)
>> +#define BUFFER_SIZE 2048
>> +#define IOVSIZE 2
>> +#define MAX_UNIFIED_MSGCNT 64
>> +#define MAX_UNIFIED_IOVCNT (MAX_UNIFIED_MSGCNT * IOVSIZE)
>> +
>> +#ifndef QEMU_NET_UNIFIED_H
>> +#define QEMU_NET_UNIFIED_H
>> +
>> +typedef struct NetUnifiedState {
>> +    NetClientState nc;
>> +
>> +    int fd;
>> +
>> +    /*
>> +     * these are used for xmit - that happens packet a time
>> +     * and for first sign of life packet (easier to parse that once)
>> +     */
>> +
>> +    uint8_t *header_buf;
>> +    struct iovec *vec;
>> +
>> +    /*
>> +     * these are used for receive - try to "eat" up to 32 packets at 
>> a time
>> +     */
>> +
>> +    struct mmsghdr *msgvec;
>> +
>> +    /*
>> +     * peer address
>> +     */
>> +
>> +    struct sockaddr_storage *dgram_dst;
>> +    uint32_t dst_size;
>> +
>> +    /*
>> +     * Internal Queue
>> +     */
>> +
>> +    /*
>> +    * DOS avoidance in error handling
>> +    */
>> +
>> +    /* Easier to keep l2tpv3 specific */
>> +
>> +    bool header_mismatch;
>> +
>> +    /*
>> +     *
>> +     * Ring buffer handling
>> +     *
>> +     */
>> +
>> +    int queue_head;
>> +    int queue_tail;
>> +    int queue_depth;
>> +
>> +    /*
>> +     * Offset to data - common for all protocols
>> +     */
>> +
>> +    uint32_t offset;
>> +
>> +    /*
>> +     * Header size - common for all protocols
>> +     */
>> +
>> +    uint32_t header_size;
>> +    /* Poll Control */
>> +
>> +    bool read_poll;
>> +    bool write_poll;
>> +
>> +    /* Parameters */
>> +
>> +    void *params;
>> +
>> +    /* header forming functions */
>> +
>> +    int (*verify_header)(void *s, uint8_t *buf);
>> +    void (*form_header)(void *s);
>> +
>> +} NetUnifiedState;
>> +
>> +extern NetClientState *qemu_new_unified_net_client(const char *name,
>> +                    NetClientState *peer);
>> +
>> +extern void qemu_net_finalize_unified_init(NetUnifiedState *s, int fd);
>> +#endif
>
>
Jason Wang July 19, 2017, 6:07 a.m. UTC | #3
On 2017年07月19日 13:48, Anton Ivanov wrote:
>
>
> On 19/07/17 06:39, Jason Wang wrote:
>>
>>
>> On 2017年07月19日 01:08, anton.ivanov@cambridgegreys.com wrote:
>>> From: Anton Ivanov <anton.ivanov@cambridgegreys.com>
>>>
>>> 1. Creates a common backend for socket transports using
>>> recvmmsg().
>>> 2. Migrates L2TPv3 to the new backend
>>
>> It would be better if you could further split out 2 from this patch.
>>
>>>
>>> Signed-off-by: Anton Ivanov <anton.ivanov@cambridgegreys.com>
>>> ---
>>>   configure         |  10 +-
>>>   net/Makefile.objs |   2 +-
>>>   net/l2tpv3.c      | 531 
>>> +++++++++---------------------------------------------
>>>   net/net.c         |   4 +-
>>>   net/unified.c     | 406 +++++++++++++++++++++++++++++++++++++++++
>>>   net/unified.h     | 118 ++++++++++++
>>>   6 files changed, 613 insertions(+), 458 deletions(-)
>>>   create mode 100644 net/unified.c
>>>   create mode 100644 net/unified.h
>>>
>>> diff --git a/configure b/configure
>>> index a3f0522e8f..99a60b723c 100755
>>> --- a/configure
>>> +++ b/configure
>>> @@ -1862,7 +1862,7 @@ if ! compile_object -Werror ; then
>>>   fi
>>>     ##########################################
>>> -# L2TPV3 probe
>>> +# UNIFIED probe
>>>     cat > $TMPC <<EOF
>>>   #include <sys/socket.h>
>>> @@ -1870,9 +1870,9 @@ cat > $TMPC <<EOF
>>>   int main(void) { return sizeof(struct mmsghdr); }
>>>   EOF
>>>   if compile_prog "" "" ; then
>>> -  l2tpv3=yes
>>> +  unified=yes
>>>   else
>>> -  l2tpv3=no
>>> +  unified=no
>>>   fi
>>>     ##########################################
>>> @@ -5458,8 +5458,8 @@ fi
>>>   if test "$netmap" = "yes" ; then
>>>     echo "CONFIG_NETMAP=y" >> $config_host_mak
>>>   fi
>>> -if test "$l2tpv3" = "yes" ; then
>>> -  echo "CONFIG_L2TPV3=y" >> $config_host_mak
>>> +if test "$unified" = "yes" ; then
>>> +  echo "CONFIG_UNIFIED=y" >> $config_host_mak
>>>   fi
>>
>> Could we keep l2tpv3 option?
>
> The l2tpv3 test is actually a test for recvmmsg. If you can do one 
> recvmmsg transport you can do all of them.

Yes, but I wonder whether or not the check for recvmmsg is too simple. 
We probably want something like what AV_VSOCK did, test the support of 
each transport through socket().

>
>>
>>>   if test "$cap_ng" = "yes" ; then
>>>     echo "CONFIG_LIBCAP=y" >> $config_host_mak
>>> diff --git a/net/Makefile.objs b/net/Makefile.objs
>>> index 67ba5e26fb..8026ad778a 100644
>>> --- a/net/Makefile.objs
>>> +++ b/net/Makefile.objs
>>> @@ -2,7 +2,7 @@ common-obj-y = net.o queue.o checksum.o util.o hub.o
>>>   common-obj-y += socket.o
>>>   common-obj-y += dump.o
>>>   common-obj-y += eth.o
>>> -common-obj-$(CONFIG_L2TPV3) += l2tpv3.o
>>> +common-obj-$(CONFIG_UNIFIED) += l2tpv3.o unified.o
>>>   common-obj-$(CONFIG_POSIX) += vhost-user.o
>>>   common-obj-$(CONFIG_SLIRP) += slirp.o
>>>   common-obj-$(CONFIG_VDE) += vde.o

[...]

>>>>   -    s = DO_UPCAST(NetL2TPV3State, nc, nc);
>>>> +    s->params = p;
>>>>   +    s->form_header = &l2tpv3_form_header;
>>>> +    s->verify_header = &l2tpv3_verify_header;
>>>>       s->queue_head = 0;
>>>>       s->queue_tail = 0;
>>>>       s->header_mismatch = false;
>>>
>>> Why not move all above into qemu_new_unified_net()?
>
> Only queue head/tail assignment can move.
>
> raw which uses same backend does not use header_mismatch. Form/verify 
> header are different for each sub-transport. F.e. for gre you need the 
> gre one, for raw you need the raw one, etc.

Right, I mean pass function pointer to qemu_new_unified_net().

>
>>
>>> @@ -549,9 +188,9 @@ int net_init_l2tpv3(const Netdev *netdev,
>>>       l2tpv3 = &netdev->u.l2tpv3;
>>>         if (l2tpv3->has_ipv6 && l2tpv3->ipv6) {
>>> -        s->ipv6 = l2tpv3->ipv6;
>>> +        p->ipv6 = l2tpv3->ipv6;
>>>       } else {
>>> -        s->ipv6 = false;
>>> +        p->ipv6 = false;

[...]

>>>   diff --git a/net/unified.c b/net/unified.c
>>
>> Not a native speaker, but I think we need a better name here e.g udst 
>> which is short for Unified Datagram Socket Transport?
>
> I am not a native speaker either :)
>
> I am OK - let's call it udst as this is more descriptive and this 
> clearly delineates that you cannot
> migrate tcp/socket to it.

Ok.

>
>>
>>>

[...]

>>> +
>>> +static ssize_t net_unified_receive_dgram_iov(NetClientState *nc,
>>> +                    const struct iovec *iov,
>>> +                    int iovcnt)
>>> +{
>>> +    NetUnifiedState *s = DO_UPCAST(NetUnifiedState, nc, nc);
>>> +
>>> +    struct msghdr message;
>>> +    int ret;
>>> +
>>> +    if (iovcnt > MAX_UNIFIED_IOVCNT - 1) {
>>> +        error_report(
>>> +            "iovec too long %d > %d, change unified.h",
>>> +            iovcnt, MAX_UNIFIED_IOVCNT
>>> +        );
>>> +        return -1;
>>> +    }
>>> +    if (s->offset > 0) {
>>
>> net_l2tpv3_receive_dgram_iov() does not have this check. I guess it 
>> s->offset=0 will be used by other transport. Maybe it's better to 
>> delay this change until is has a real user or add a comment here.
>
> The real user is in patch No 2. Raw.

Ok.

Thanks.
Anton Ivanov July 19, 2017, 6:48 a.m. UTC | #4
[snip]

>>> Could we keep l2tpv3 option?
>>
>> The l2tpv3 test is actually a test for recvmmsg. If you can do one 
>> recvmmsg transport you can do all of them.
>
> Yes, but I wonder whether or not the check for recvmmsg is too simple. 
> We probably want something like what AV_VSOCK did, test the support of 
> each transport through socket().

We may need this in the future.

I do not think we need it for the first 3 transports lined up for this - 
l2tpv3, gre and raw.  The only reqs are recvmmsg (and sendmmsg in the 
future) and raw sockets. They are very simple :)

So unless we try to fold all of raw initialization (on/off for offloads, 
etc) into the driver we should not need more tests for now. We will need 
them once we add more transports.

By the way - on raw, in addition to cost of timestamps, recvmmsg and 
especially sendmmsg in most cases will have lower number of copies 
compared to tpacket. IMHO there is still a very important use case for 
tpacket, but it will require hw/ work - vm used as a forensic tap. We 
will need to emulate one of the drivers which convey the timestamp so 
that a pcap/tpacket implementation in the VM can get a precise timestamp 
"at real capture".

>
>>
>>>
>>>>   if test "$cap_ng" = "yes" ; then
>>>>     echo "CONFIG_LIBCAP=y" >> $config_host_mak
>>>> diff --git a/net/Makefile.objs b/net/Makefile.objs
>>>> index 67ba5e26fb..8026ad778a 100644
>>>> --- a/net/Makefile.objs
>>>> +++ b/net/Makefile.objs
>>>> @@ -2,7 +2,7 @@ common-obj-y = net.o queue.o checksum.o util.o hub.o
>>>>   common-obj-y += socket.o
>>>>   common-obj-y += dump.o
>>>>   common-obj-y += eth.o
>>>> -common-obj-$(CONFIG_L2TPV3) += l2tpv3.o
>>>> +common-obj-$(CONFIG_UNIFIED) += l2tpv3.o unified.o
>>>>   common-obj-$(CONFIG_POSIX) += vhost-user.o
>>>>   common-obj-$(CONFIG_SLIRP) += slirp.o
>>>>   common-obj-$(CONFIG_VDE) += vde.o
>
> [...]
>
>>>>>   -    s = DO_UPCAST(NetL2TPV3State, nc, nc);
>>>>> +    s->params = p;
>>>>>   +    s->form_header = &l2tpv3_form_header;
>>>>> +    s->verify_header = &l2tpv3_verify_header;
>>>>>       s->queue_head = 0;
>>>>>       s->queue_tail = 0;
>>>>>       s->header_mismatch = false;
>>>>
>>>> Why not move all above into qemu_new_unified_net()?
>>
>> Only queue head/tail assignment can move.
>>
>> raw which uses same backend does not use header_mismatch. Form/verify 
>> header are different for each sub-transport. F.e. for gre you need 
>> the gre one, for raw you need the raw one, etc.
>
> Right, I mean pass function pointer to qemu_new_unified_net().

Ack - will do in the next revision.


[snip]
Anton Ivanov July 21, 2017, 5:50 p.m. UTC | #5
[snip]

>> +    NetUnifiedState *s = (NetUnifiedState *) us;
>> +    L2TPV3TunnelParams *p = (L2TPV3TunnelParams *) s->params;
>
> How about embedding NetUnifiedState into this structure and keep using 
> NetL2TPV3State? Then:
>
> -  's' could be kept and lots of lines of changes could be saved here 
> and l2tpv3_verify_header()
> -  each transport could have their own type instead of using 
> NET_CLIENT_DRIVER_L2TPV3

That means each of them having their own read/write functions in each 
transport, destroy functions, etc.

I am trying to achieve exactly the opposite which across all transports 
should save more code. There should be nothing in a transport which 
leverages the common datagram processing backend except:

1. Init and parse arguments
2. Form Header
3. Verify Header

All the rest can be common for a large family of datagram based 
transports - L2TPv3, GRE, RAW (both full interface and just pulling a 
specific vlan out of it), etc.

It is trivial to do that for fixed size headers (as in the current 
patchset family). It is a bit more difficult to that for variable 
headers, but still datagram (GUE, Geneve, etc).

These may also add 4 - I/O to control plane, but it remains to be seen 
if that is needed.

This also makes any improvements to the backend - f.e. switching from 
send() to sendmmsg() automatically available for all transports.

What cannot be done is to shoehorn into this stream based. I believe we 
have only one of those - the original socket.c in tcp mode and we can 
leave it to stay that way and switch only the datagram mode to a better 
backend.

I am going through the other comments in the meantime to see if I missed 
something else and fixing the omissions.

A.

[snip]
Jason Wang July 24, 2017, 3:51 a.m. UTC | #6
On 2017年07月22日 01:50, Anton Ivanov wrote:
> [snip]
>
>>> +    NetUnifiedState *s = (NetUnifiedState *) us;
>>> +    L2TPV3TunnelParams *p = (L2TPV3TunnelParams *) s->params;
>>
>> How about embedding NetUnifiedState into this structure and keep 
>> using NetL2TPV3State? Then:
>>
>> -  's' could be kept and lots of lines of changes could be saved here 
>> and l2tpv3_verify_header()
>> -  each transport could have their own type instead of using 
>> NET_CLIENT_DRIVER_L2TPV3
>
> That means each of them having their own read/write functions in each 
> transport, destroy functions, etc.

Looks not? Just something like

typedef struct L2TPV3State {
     NetUDSTState udst;
     /* L2TPV3 specific data */
     ....
};

static NetClientInfo l2tpv3_info = {
     /* we share this one for all types for now, wrong I know :) */
     .type = NET_CLIENT_DRIVER_L2TPV3,
     .size = sizeof(L2TPV3State),
     .receive = net_udst_receive_dgram,
     .receive_iov = net_udst_receive_dgram_iov,
     .poll = udst_poll,
     .cleanup = net_udst_cleanup,
};

Thanks

>
> I am trying to achieve exactly the opposite which across all 
> transports should save more code. There should be nothing in a 
> transport which leverages the common datagram processing backend except:
>
> 1. Init and parse arguments
> 2. Form Header
> 3. Verify Header
>
> All the rest can be common for a large family of datagram based 
> transports - L2TPv3, GRE, RAW (both full interface and just pulling a 
> specific vlan out of it), etc.
>
> It is trivial to do that for fixed size headers (as in the current 
> patchset family). It is a bit more difficult to that for variable 
> headers, but still datagram (GUE, Geneve, etc).
>
> These may also add 4 - I/O to control plane, but it remains to be seen 
> if that is needed.
>
> This also makes any improvements to the backend - f.e. switching from 
> send() to sendmmsg() automatically available for all transports.
>
> What cannot be done is to shoehorn into this stream based. I believe 
> we have only one of those - the original socket.c in tcp mode and we 
> can leave it to stay that way and switch only the datagram mode to a 
> better backend.
>
> I am going through the other comments in the meantime to see if I 
> missed something else and fixing the omissions.
>
> A.
>
> [snip]
>
diff mbox

Patch

diff --git a/configure b/configure
index a3f0522e8f..99a60b723c 100755
--- a/configure
+++ b/configure
@@ -1862,7 +1862,7 @@  if ! compile_object -Werror ; then
 fi
 
 ##########################################
-# L2TPV3 probe
+# UNIFIED probe
 
 cat > $TMPC <<EOF
 #include <sys/socket.h>
@@ -1870,9 +1870,9 @@  cat > $TMPC <<EOF
 int main(void) { return sizeof(struct mmsghdr); }
 EOF
 if compile_prog "" "" ; then
-  l2tpv3=yes
+  unified=yes
 else
-  l2tpv3=no
+  unified=no
 fi
 
 ##########################################
@@ -5458,8 +5458,8 @@  fi
 if test "$netmap" = "yes" ; then
   echo "CONFIG_NETMAP=y" >> $config_host_mak
 fi
-if test "$l2tpv3" = "yes" ; then
-  echo "CONFIG_L2TPV3=y" >> $config_host_mak
+if test "$unified" = "yes" ; then
+  echo "CONFIG_UNIFIED=y" >> $config_host_mak
 fi
 if test "$cap_ng" = "yes" ; then
   echo "CONFIG_LIBCAP=y" >> $config_host_mak
diff --git a/net/Makefile.objs b/net/Makefile.objs
index 67ba5e26fb..8026ad778a 100644
--- a/net/Makefile.objs
+++ b/net/Makefile.objs
@@ -2,7 +2,7 @@  common-obj-y = net.o queue.o checksum.o util.o hub.o
 common-obj-y += socket.o
 common-obj-y += dump.o
 common-obj-y += eth.o
-common-obj-$(CONFIG_L2TPV3) += l2tpv3.o
+common-obj-$(CONFIG_UNIFIED) += l2tpv3.o unified.o
 common-obj-$(CONFIG_POSIX) += vhost-user.o
 common-obj-$(CONFIG_SLIRP) += slirp.o
 common-obj-$(CONFIG_VDE) += vde.o
diff --git a/net/l2tpv3.c b/net/l2tpv3.c
index 6745b78990..05413c9cbd 100644
--- a/net/l2tpv3.c
+++ b/net/l2tpv3.c
@@ -1,6 +1,7 @@ 
 /*
  * QEMU System Emulator
  *
+ * Copyright (c) 2015-2017 Cambridge Greys Limited
  * Copyright (c) 2003-2008 Fabrice Bellard
  * Copyright (c) 2012-2014 Cisco Systems
  *
@@ -34,19 +35,9 @@ 
 #include "qemu/sockets.h"
 #include "qemu/iov.h"
 #include "qemu/main-loop.h"
+#include "unified.h"
 
 
-/* The buffer size needs to be investigated for optimum numbers and
- * optimum means of paging in on different systems. This size is
- * chosen to be sufficient to accommodate one packet with some headers
- */
-
-#define BUFFER_ALIGN sysconf(_SC_PAGESIZE)
-#define BUFFER_SIZE 2048
-#define IOVSIZE 2
-#define MAX_L2TPV3_MSGCNT 64
-#define MAX_L2TPV3_IOVCNT (MAX_L2TPV3_MSGCNT * IOVSIZE)
-
 /* Header set to 0x30000 signifies a data packet */
 
 #define L2TPV3_DATA_PACKET 0x30000
@@ -57,31 +48,7 @@ 
 #define IPPROTO_L2TP 0x73
 #endif
 
-typedef struct NetL2TPV3State {
-    NetClientState nc;
-    int fd;
-
-    /*
-     * these are used for xmit - that happens packet a time
-     * and for first sign of life packet (easier to parse that once)
-     */
-
-    uint8_t *header_buf;
-    struct iovec *vec;
-
-    /*
-     * these are used for receive - try to "eat" up to 32 packets at a time
-     */
-
-    struct mmsghdr *msgvec;
-
-    /*
-     * peer address
-     */
-
-    struct sockaddr_storage *dgram_dst;
-    uint32_t dst_size;
-
+typedef struct L2TPV3TunnelParams {
     /*
      * L2TPv3 parameters
      */
@@ -90,37 +57,8 @@  typedef struct NetL2TPV3State {
     uint64_t tx_cookie;
     uint32_t rx_session;
     uint32_t tx_session;
-    uint32_t header_size;
     uint32_t counter;
 
-    /*
-    * DOS avoidance in error handling
-    */
-
-    bool header_mismatch;
-
-    /*
-     * Ring buffer handling
-     */
-
-    int queue_head;
-    int queue_tail;
-    int queue_depth;
-
-    /*
-     * Precomputed offsets
-     */
-
-    uint32_t offset;
-    uint32_t cookie_offset;
-    uint32_t counter_offset;
-    uint32_t session_offset;
-
-    /* Poll Control */
-
-    bool read_poll;
-    bool write_poll;
-
     /* Flags */
 
     bool ipv6;
@@ -130,189 +68,62 @@  typedef struct NetL2TPV3State {
     bool cookie;
     bool cookie_is_64;
 
-} NetL2TPV3State;
-
-static void net_l2tpv3_send(void *opaque);
-static void l2tpv3_writable(void *opaque);
-
-static void l2tpv3_update_fd_handler(NetL2TPV3State *s)
-{
-    qemu_set_fd_handler(s->fd,
-                        s->read_poll ? net_l2tpv3_send : NULL,
-                        s->write_poll ? l2tpv3_writable : NULL,
-                        s);
-}
-
-static void l2tpv3_read_poll(NetL2TPV3State *s, bool enable)
-{
-    if (s->read_poll != enable) {
-        s->read_poll = enable;
-        l2tpv3_update_fd_handler(s);
-    }
-}
+    /* Precomputed L2TPV3 specific offsets */
+    uint32_t cookie_offset;
+    uint32_t counter_offset;
+    uint32_t session_offset;
 
-static void l2tpv3_write_poll(NetL2TPV3State *s, bool enable)
-{
-    if (s->write_poll != enable) {
-        s->write_poll = enable;
-        l2tpv3_update_fd_handler(s);
-    }
-}
+} L2TPV3TunnelParams;
 
-static void l2tpv3_writable(void *opaque)
-{
-    NetL2TPV3State *s = opaque;
-    l2tpv3_write_poll(s, false);
-    qemu_flush_queued_packets(&s->nc);
-}
 
-static void l2tpv3_send_completed(NetClientState *nc, ssize_t len)
-{
-    NetL2TPV3State *s = DO_UPCAST(NetL2TPV3State, nc, nc);
-    l2tpv3_read_poll(s, true);
-}
 
-static void l2tpv3_poll(NetClientState *nc, bool enable)
+static void l2tpv3_form_header(void *us)
 {
-    NetL2TPV3State *s = DO_UPCAST(NetL2TPV3State, nc, nc);
-    l2tpv3_write_poll(s, enable);
-    l2tpv3_read_poll(s, enable);
-}
+    NetUnifiedState *s = (NetUnifiedState *) us;
+    L2TPV3TunnelParams *p = (L2TPV3TunnelParams *) s->params;
 
-static void l2tpv3_form_header(NetL2TPV3State *s)
-{
     uint32_t *counter;
 
-    if (s->udp) {
+    if (p->udp) {
         stl_be_p((uint32_t *) s->header_buf, L2TPV3_DATA_PACKET);
     }
     stl_be_p(
-            (uint32_t *) (s->header_buf + s->session_offset),
-            s->tx_session
+            (uint32_t *) (s->header_buf + p->session_offset),
+            p->tx_session
         );
-    if (s->cookie) {
-        if (s->cookie_is_64) {
+    if (p->cookie) {
+        if (p->cookie_is_64) {
             stq_be_p(
-                (uint64_t *)(s->header_buf + s->cookie_offset),
-                s->tx_cookie
+                (uint64_t *)(s->header_buf + p->cookie_offset),
+                p->tx_cookie
             );
         } else {
             stl_be_p(
-                (uint32_t *) (s->header_buf + s->cookie_offset),
-                s->tx_cookie
+                (uint32_t *) (s->header_buf + p->cookie_offset),
+                p->tx_cookie
             );
         }
     }
-    if (s->has_counter) {
-        counter = (uint32_t *)(s->header_buf + s->counter_offset);
-        if (s->pin_counter) {
+    if (p->has_counter) {
+        counter = (uint32_t *)(s->header_buf + p->counter_offset);
+        if (p->pin_counter) {
             *counter = 0;
         } else {
-            stl_be_p(counter, ++s->counter);
-        }
-    }
-}
-
-static ssize_t net_l2tpv3_receive_dgram_iov(NetClientState *nc,
-                    const struct iovec *iov,
-                    int iovcnt)
-{
-    NetL2TPV3State *s = DO_UPCAST(NetL2TPV3State, nc, nc);
-
-    struct msghdr message;
-    int ret;
-
-    if (iovcnt > MAX_L2TPV3_IOVCNT - 1) {
-        error_report(
-            "iovec too long %d > %d, change l2tpv3.h",
-            iovcnt, MAX_L2TPV3_IOVCNT
-        );
-        return -1;
-    }
-    l2tpv3_form_header(s);
-    memcpy(s->vec + 1, iov, iovcnt * sizeof(struct iovec));
-    s->vec->iov_base = s->header_buf;
-    s->vec->iov_len = s->offset;
-    message.msg_name = s->dgram_dst;
-    message.msg_namelen = s->dst_size;
-    message.msg_iov = s->vec;
-    message.msg_iovlen = iovcnt + 1;
-    message.msg_control = NULL;
-    message.msg_controllen = 0;
-    message.msg_flags = 0;
-    do {
-        ret = sendmsg(s->fd, &message, 0);
-    } while ((ret == -1) && (errno == EINTR));
-    if (ret > 0) {
-        ret -= s->offset;
-    } else if (ret == 0) {
-        /* belt and braces - should not occur on DGRAM
-        * we should get an error and never a 0 send
-        */
-        ret = iov_size(iov, iovcnt);
-    } else {
-        /* signal upper layer that socket buffer is full */
-        ret = -errno;
-        if (ret == -EAGAIN || ret == -ENOBUFS) {
-            l2tpv3_write_poll(s, true);
-            ret = 0;
+            stl_be_p(counter, ++p->counter);
         }
     }
-    return ret;
 }
 
-static ssize_t net_l2tpv3_receive_dgram(NetClientState *nc,
-                    const uint8_t *buf,
-                    size_t size)
-{
-    NetL2TPV3State *s = DO_UPCAST(NetL2TPV3State, nc, nc);
-
-    struct iovec *vec;
-    struct msghdr message;
-    ssize_t ret = 0;
-
-    l2tpv3_form_header(s);
-    vec = s->vec;
-    vec->iov_base = s->header_buf;
-    vec->iov_len = s->offset;
-    vec++;
-    vec->iov_base = (void *) buf;
-    vec->iov_len = size;
-    message.msg_name = s->dgram_dst;
-    message.msg_namelen = s->dst_size;
-    message.msg_iov = s->vec;
-    message.msg_iovlen = 2;
-    message.msg_control = NULL;
-    message.msg_controllen = 0;
-    message.msg_flags = 0;
-    do {
-        ret = sendmsg(s->fd, &message, 0);
-    } while ((ret == -1) && (errno == EINTR));
-    if (ret > 0) {
-        ret -= s->offset;
-    } else if (ret == 0) {
-        /* belt and braces - should not occur on DGRAM
-        * we should get an error and never a 0 send
-        */
-        ret = size;
-    } else {
-        ret = -errno;
-        if (ret == -EAGAIN || ret == -ENOBUFS) {
-            /* signal upper layer that socket buffer is full */
-            l2tpv3_write_poll(s, true);
-            ret = 0;
-        }
-    }
-    return ret;
-}
 
-static int l2tpv3_verify_header(NetL2TPV3State *s, uint8_t *buf)
+static int l2tpv3_verify_header(void *us, uint8_t *buf)
 {
 
+    NetUnifiedState *s = (NetUnifiedState *) us;
+    L2TPV3TunnelParams *p = (L2TPV3TunnelParams *) s->params;
     uint32_t *session;
     uint64_t cookie;
 
-    if ((!s->udp) && (!s->ipv6)) {
+    if ((!p->udp) && (!p->ipv6)) {
         buf += sizeof(struct iphdr) /* fix for ipv4 raw */;
     }
 
@@ -321,21 +132,21 @@  static int l2tpv3_verify_header(NetL2TPV3State *s, uint8_t *buf)
     * that anyway.
     */
 
-    if (s->cookie) {
-        if (s->cookie_is_64) {
-            cookie = ldq_be_p(buf + s->cookie_offset);
+    if (p->cookie) {
+        if (p->cookie_is_64) {
+            cookie = ldq_be_p(buf + p->cookie_offset);
         } else {
-            cookie = ldl_be_p(buf + s->cookie_offset) & 0xffffffffULL;
+            cookie = ldl_be_p(buf + p->cookie_offset) & 0xffffffffULL;
         }
-        if (cookie != s->rx_cookie) {
+        if (cookie != p->rx_cookie) {
             if (!s->header_mismatch) {
                 error_report("unknown cookie id");
             }
             return -1;
         }
     }
-    session = (uint32_t *) (buf + s->session_offset);
-    if (ldl_be_p(session) != s->rx_session) {
+    session = (uint32_t *) (buf + p->session_offset);
+    if (ldl_be_p(session) != p->rx_session) {
         if (!s->header_mismatch) {
             error_report("session mismatch");
         }
@@ -344,203 +155,31 @@  static int l2tpv3_verify_header(NetL2TPV3State *s, uint8_t *buf)
     return 0;
 }
 
-static void net_l2tpv3_process_queue(NetL2TPV3State *s)
-{
-    int size = 0;
-    struct iovec *vec;
-    bool bad_read;
-    int data_size;
-    struct mmsghdr *msgvec;
-
-    /* go into ring mode only if there is a "pending" tail */
-    if (s->queue_depth > 0) {
-        do {
-            msgvec = s->msgvec + s->queue_tail;
-            if (msgvec->msg_len > 0) {
-                data_size = msgvec->msg_len - s->header_size;
-                vec = msgvec->msg_hdr.msg_iov;
-                if ((data_size > 0) &&
-                    (l2tpv3_verify_header(s, vec->iov_base) == 0)) {
-                    vec++;
-                    /* Use the legacy delivery for now, we will
-                     * switch to using our own ring as a queueing mechanism
-                     * at a later date
-                     */
-                    size = qemu_send_packet_async(
-                            &s->nc,
-                            vec->iov_base,
-                            data_size,
-                            l2tpv3_send_completed
-                        );
-                    if (size == 0) {
-                        l2tpv3_read_poll(s, false);
-                    }
-                    bad_read = false;
-                } else {
-                    bad_read = true;
-                    if (!s->header_mismatch) {
-                        /* report error only once */
-                        error_report("l2tpv3 header verification failed");
-                        s->header_mismatch = true;
-                    }
-                }
-            } else {
-                bad_read = true;
-            }
-            s->queue_tail = (s->queue_tail + 1) % MAX_L2TPV3_MSGCNT;
-            s->queue_depth--;
-        } while (
-                (s->queue_depth > 0) &&
-                 qemu_can_send_packet(&s->nc) &&
-                ((size > 0) || bad_read)
-            );
-    }
-}
-
-static void net_l2tpv3_send(void *opaque)
-{
-    NetL2TPV3State *s = opaque;
-    int target_count, count;
-    struct mmsghdr *msgvec;
-
-    /* go into ring mode only if there is a "pending" tail */
-
-    if (s->queue_depth) {
-
-        /* The ring buffer we use has variable intake
-         * count of how much we can read varies - adjust accordingly
-         */
-
-        target_count = MAX_L2TPV3_MSGCNT - s->queue_depth;
-
-        /* Ensure we do not overrun the ring when we have
-         * a lot of enqueued packets
-         */
-
-        if (s->queue_head + target_count > MAX_L2TPV3_MSGCNT) {
-            target_count = MAX_L2TPV3_MSGCNT - s->queue_head;
-        }
-    } else {
-
-        /* we do not have any pending packets - we can use
-        * the whole message vector linearly instead of using
-        * it as a ring
-        */
-
-        s->queue_head = 0;
-        s->queue_tail = 0;
-        target_count = MAX_L2TPV3_MSGCNT;
-    }
-
-    msgvec = s->msgvec + s->queue_head;
-    if (target_count > 0) {
-        do {
-            count = recvmmsg(
-                s->fd,
-                msgvec,
-                target_count, MSG_DONTWAIT, NULL);
-        } while ((count == -1) && (errno == EINTR));
-        if (count < 0) {
-            /* Recv error - we still need to flush packets here,
-             * (re)set queue head to current position
-             */
-            count = 0;
-        }
-        s->queue_head = (s->queue_head + count) % MAX_L2TPV3_MSGCNT;
-        s->queue_depth += count;
-    }
-    net_l2tpv3_process_queue(s);
-}
-
-static void destroy_vector(struct mmsghdr *msgvec, int count, int iovcount)
-{
-    int i, j;
-    struct iovec *iov;
-    struct mmsghdr *cleanup = msgvec;
-    if (cleanup) {
-        for (i = 0; i < count; i++) {
-            if (cleanup->msg_hdr.msg_iov) {
-                iov = cleanup->msg_hdr.msg_iov;
-                for (j = 0; j < iovcount; j++) {
-                    g_free(iov->iov_base);
-                    iov++;
-                }
-                g_free(cleanup->msg_hdr.msg_iov);
-            }
-            cleanup++;
-        }
-        g_free(msgvec);
-    }
-}
-
-static struct mmsghdr *build_l2tpv3_vector(NetL2TPV3State *s, int count)
-{
-    int i;
-    struct iovec *iov;
-    struct mmsghdr *msgvec, *result;
-
-    msgvec = g_new(struct mmsghdr, count);
-    result = msgvec;
-    for (i = 0; i < count ; i++) {
-        msgvec->msg_hdr.msg_name = NULL;
-        msgvec->msg_hdr.msg_namelen = 0;
-        iov =  g_new(struct iovec, IOVSIZE);
-        msgvec->msg_hdr.msg_iov = iov;
-        iov->iov_base = g_malloc(s->header_size);
-        iov->iov_len = s->header_size;
-        iov++ ;
-        iov->iov_base = qemu_memalign(BUFFER_ALIGN, BUFFER_SIZE);
-        iov->iov_len = BUFFER_SIZE;
-        msgvec->msg_hdr.msg_iovlen = 2;
-        msgvec->msg_hdr.msg_control = NULL;
-        msgvec->msg_hdr.msg_controllen = 0;
-        msgvec->msg_hdr.msg_flags = 0;
-        msgvec++;
-    }
-    return result;
-}
-
-static void net_l2tpv3_cleanup(NetClientState *nc)
-{
-    NetL2TPV3State *s = DO_UPCAST(NetL2TPV3State, nc, nc);
-    qemu_purge_queued_packets(nc);
-    l2tpv3_read_poll(s, false);
-    l2tpv3_write_poll(s, false);
-    if (s->fd >= 0) {
-        close(s->fd);
-    }
-    destroy_vector(s->msgvec, MAX_L2TPV3_MSGCNT, IOVSIZE);
-    g_free(s->vec);
-    g_free(s->header_buf);
-    g_free(s->dgram_dst);
-}
-
-static NetClientInfo net_l2tpv3_info = {
-    .type = NET_CLIENT_DRIVER_L2TPV3,
-    .size = sizeof(NetL2TPV3State),
-    .receive = net_l2tpv3_receive_dgram,
-    .receive_iov = net_l2tpv3_receive_dgram_iov,
-    .poll = l2tpv3_poll,
-    .cleanup = net_l2tpv3_cleanup,
-};
-
 int net_init_l2tpv3(const Netdev *netdev,
                     const char *name,
                     NetClientState *peer, Error **errp)
 {
     /* FIXME error_setg(errp, ...) on failure */
     const NetdevL2TPv3Options *l2tpv3;
-    NetL2TPV3State *s;
+    NetUnifiedState *s;
     NetClientState *nc;
+    L2TPV3TunnelParams *p;
+
     int fd = -1, gairet;
     struct addrinfo hints;
     struct addrinfo *result = NULL;
     char *srcport, *dstport;
 
-    nc = qemu_new_net_client(&net_l2tpv3_info, peer, "l2tpv3", name);
+    nc = qemu_new_unified_net_client(name, peer);
+
+    s = DO_UPCAST(NetUnifiedState, nc, nc);
+
+    p = g_malloc(sizeof(L2TPV3TunnelParams));
 
-    s = DO_UPCAST(NetL2TPV3State, nc, nc);
+    s->params = p;
 
+    s->form_header = &l2tpv3_form_header;
+    s->verify_header = &l2tpv3_verify_header;
     s->queue_head = 0;
     s->queue_tail = 0;
     s->header_mismatch = false;
@@ -549,9 +188,9 @@  int net_init_l2tpv3(const Netdev *netdev,
     l2tpv3 = &netdev->u.l2tpv3;
 
     if (l2tpv3->has_ipv6 && l2tpv3->ipv6) {
-        s->ipv6 = l2tpv3->ipv6;
+        p->ipv6 = l2tpv3->ipv6;
     } else {
-        s->ipv6 = false;
+        p->ipv6 = false;
     }
 
     if ((l2tpv3->has_offset) && (l2tpv3->offset > 256)) {
@@ -561,22 +200,22 @@  int net_init_l2tpv3(const Netdev *netdev,
 
     if (l2tpv3->has_rxcookie || l2tpv3->has_txcookie) {
         if (l2tpv3->has_rxcookie && l2tpv3->has_txcookie) {
-            s->cookie = true;
+            p->cookie = true;
         } else {
             goto outerr;
         }
     } else {
-        s->cookie = false;
+        p->cookie = false;
     }
 
     if (l2tpv3->has_cookie64 || l2tpv3->cookie64) {
-        s->cookie_is_64  = true;
+        p->cookie_is_64  = true;
     } else {
-        s->cookie_is_64  = false;
+        p->cookie_is_64  = false;
     }
 
     if (l2tpv3->has_udp && l2tpv3->udp) {
-        s->udp = true;
+        p->udp = true;
         if (!(l2tpv3->has_srcport && l2tpv3->has_dstport)) {
             error_report("l2tpv3_open : need both src and dst port for udp");
             goto outerr;
@@ -585,52 +224,52 @@  int net_init_l2tpv3(const Netdev *netdev,
             dstport = l2tpv3->dstport;
         }
     } else {
-        s->udp = false;
+        p->udp = false;
         srcport = NULL;
         dstport = NULL;
     }
 
 
     s->offset = 4;
-    s->session_offset = 0;
-    s->cookie_offset = 4;
-    s->counter_offset = 4;
+    p->session_offset = 0;
+    p->cookie_offset = 4;
+    p->counter_offset = 4;
 
-    s->tx_session = l2tpv3->txsession;
+    p->tx_session = l2tpv3->txsession;
     if (l2tpv3->has_rxsession) {
-        s->rx_session = l2tpv3->rxsession;
+        p->rx_session = l2tpv3->rxsession;
     } else {
-        s->rx_session = s->tx_session;
+        p->rx_session = p->tx_session;
     }
 
-    if (s->cookie) {
-        s->rx_cookie = l2tpv3->rxcookie;
-        s->tx_cookie = l2tpv3->txcookie;
-        if (s->cookie_is_64 == true) {
+    if (p->cookie) {
+        p->rx_cookie = l2tpv3->rxcookie;
+        p->tx_cookie = l2tpv3->txcookie;
+        if (p->cookie_is_64 == true) {
             /* 64 bit cookie */
             s->offset += 8;
-            s->counter_offset += 8;
+            p->counter_offset += 8;
         } else {
             /* 32 bit cookie */
             s->offset += 4;
-            s->counter_offset += 4;
+            p->counter_offset += 4;
         }
     }
 
     memset(&hints, 0, sizeof(hints));
 
-    if (s->ipv6) {
+    if (p->ipv6) {
         hints.ai_family = AF_INET6;
     } else {
         hints.ai_family = AF_INET;
     }
-    if (s->udp) {
+    if (p->udp) {
         hints.ai_socktype = SOCK_DGRAM;
         hints.ai_protocol = 0;
         s->offset += 4;
-        s->counter_offset += 4;
-        s->session_offset += 4;
-        s->cookie_offset += 4;
+        p->counter_offset += 4;
+        p->session_offset += 4;
+        p->cookie_offset += 4;
     } else {
         hints.ai_socktype = SOCK_RAW;
         hints.ai_protocol = IPPROTO_L2TP;
@@ -661,12 +300,12 @@  int net_init_l2tpv3(const Netdev *netdev,
 
     memset(&hints, 0, sizeof(hints));
 
-    if (s->ipv6) {
+    if (p->ipv6) {
         hints.ai_family = AF_INET6;
     } else {
         hints.ai_family = AF_INET;
     }
-    if (s->udp) {
+    if (p->udp) {
         hints.ai_socktype = SOCK_DGRAM;
         hints.ai_protocol = 0;
     } else {
@@ -693,17 +332,17 @@  int net_init_l2tpv3(const Netdev *netdev,
     }
 
     if (l2tpv3->has_counter && l2tpv3->counter) {
-        s->has_counter = true;
+        p->has_counter = true;
         s->offset += 4;
     } else {
-        s->has_counter = false;
+        p->has_counter = false;
     }
 
     if (l2tpv3->has_pincounter && l2tpv3->pincounter) {
-        s->has_counter = true;  /* pin counter implies that there is counter */
-        s->pin_counter = true;
+        p->has_counter = true;  /* pin counter implies that there is counter */
+        p->pin_counter = true;
     } else {
-        s->pin_counter = false;
+        p->pin_counter = false;
     }
 
     if (l2tpv3->has_offset) {
@@ -711,22 +350,14 @@  int net_init_l2tpv3(const Netdev *netdev,
         s->offset += l2tpv3->offset;
     }
 
-    if ((s->ipv6) || (s->udp)) {
+    if ((p->ipv6) || (p->udp)) {
         s->header_size = s->offset;
     } else {
         s->header_size = s->offset + sizeof(struct iphdr);
     }
 
-    s->msgvec = build_l2tpv3_vector(s, MAX_L2TPV3_MSGCNT);
-    s->vec = g_new(struct iovec, MAX_L2TPV3_IOVCNT);
-    s->header_buf = g_malloc(s->header_size);
-
-    qemu_set_nonblock(fd);
-
-    s->fd = fd;
-    s->counter = 0;
-
-    l2tpv3_read_poll(s, true);
+    qemu_net_finalize_unified_init(s, fd);
+    p->counter = 0;
 
     snprintf(s->nc.info_str, sizeof(s->nc.info_str),
              "l2tpv3: connected");
diff --git a/net/net.c b/net/net.c
index 6235aabed8..9270b52ac8 100644
--- a/net/net.c
+++ b/net/net.c
@@ -959,8 +959,8 @@  static int (* const net_client_init_fun[NET_CLIENT_DRIVER__MAX])(
 #ifdef CONFIG_VHOST_NET_USED
         [NET_CLIENT_DRIVER_VHOST_USER] = net_init_vhost_user,
 #endif
-#ifdef CONFIG_L2TPV3
-        [NET_CLIENT_DRIVER_L2TPV3]    = net_init_l2tpv3,
+#ifdef CONFIG_UNIFIED
+        [NET_CLIENT_DRIVER_L2TPV3] = net_init_l2tpv3,
 #endif
 };
 
diff --git a/net/unified.c b/net/unified.c
new file mode 100644
index 0000000000..f15d1e1eed
--- /dev/null
+++ b/net/unified.c
@@ -0,0 +1,406 @@ 
+/*
+ * QEMU System Emulator
+ *
+ * Copyright (c) 2015-2017 Cambridge Greys Limited
+ * Copyright (c) 2012-2014 Cisco Systems
+ * Copyright (c) 2003-2008 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "qemu/osdep.h"
+#include <linux/ip.h>
+#include <netdb.h>
+#include "net/net.h"
+#include "clients.h"
+#include "qemu-common.h"
+#include "qemu/error-report.h"
+#include "qemu/option.h"
+#include "qemu/sockets.h"
+#include "qemu/iov.h"
+#include "qemu/main-loop.h"
+#include "unified.h"
+
+static void net_unified_send(void *opaque);
+static void unified_writable(void *opaque);
+
+static void unified_update_fd_handler(NetUnifiedState *s)
+{
+    qemu_set_fd_handler(s->fd,
+                        s->read_poll ? net_unified_send : NULL,
+                        s->write_poll ? unified_writable : NULL,
+                        s);
+}
+
+static void unified_read_poll(NetUnifiedState *s, bool enable)
+{
+    if (s->read_poll != enable) {
+        s->read_poll = enable;
+        unified_update_fd_handler(s);
+    }
+}
+
+static void unified_write_poll(NetUnifiedState *s, bool enable)
+{
+    if (s->write_poll != enable) {
+        s->write_poll = enable;
+        unified_update_fd_handler(s);
+    }
+}
+
+static void unified_writable(void *opaque)
+{
+    NetUnifiedState *s = opaque;
+    unified_write_poll(s, false);
+    qemu_flush_queued_packets(&s->nc);
+}
+
+static void unified_send_completed(NetClientState *nc, ssize_t len)
+{
+    NetUnifiedState *s = DO_UPCAST(NetUnifiedState, nc, nc);
+    unified_read_poll(s, true);
+}
+
+static void unified_poll(NetClientState *nc, bool enable)
+{
+    NetUnifiedState *s = DO_UPCAST(NetUnifiedState, nc, nc);
+    unified_write_poll(s, enable);
+    unified_read_poll(s, enable);
+}
+
+static ssize_t net_unified_receive_dgram_iov(NetClientState *nc,
+                    const struct iovec *iov,
+                    int iovcnt)
+{
+    NetUnifiedState *s = DO_UPCAST(NetUnifiedState, nc, nc);
+
+    struct msghdr message;
+    int ret;
+
+    if (iovcnt > MAX_UNIFIED_IOVCNT - 1) {
+        error_report(
+            "iovec too long %d > %d, change unified.h",
+            iovcnt, MAX_UNIFIED_IOVCNT
+        );
+        return -1;
+    }
+    if (s->offset > 0) {
+        s->form_header(s);
+        memcpy(s->vec + 1, iov, iovcnt * sizeof(struct iovec));
+        s->vec->iov_base = s->header_buf;
+        s->vec->iov_len = s->offset;
+        message.msg_iovlen = iovcnt + 1;
+    } else {
+        memcpy(s->vec, iov, iovcnt * sizeof(struct iovec));
+        message.msg_iovlen = iovcnt;
+    }
+    message.msg_name = s->dgram_dst;
+    message.msg_namelen = s->dst_size;
+    message.msg_iov = s->vec;
+    message.msg_control = NULL;
+    message.msg_controllen = 0;
+    message.msg_flags = 0;
+    do {
+        ret = sendmsg(s->fd, &message, 0);
+    } while ((ret == -1) && (errno == EINTR));
+    if (ret > 0) {
+        ret -= s->offset;
+    } else if (ret == 0) {
+        /* belt and braces - should not occur on DGRAM
+        * we should get an error and never a 0 send
+        */
+        ret = iov_size(iov, iovcnt);
+    } else {
+        /* signal upper layer that socket buffer is full */
+        ret = -errno;
+        if (ret == -EAGAIN || ret == -ENOBUFS) {
+            unified_write_poll(s, true);
+            ret = 0;
+        }
+    }
+    return ret;
+}
+
+static ssize_t net_unified_receive_dgram(NetClientState *nc,
+                    const uint8_t *buf,
+                    size_t size)
+{
+    NetUnifiedState *s = DO_UPCAST(NetUnifiedState, nc, nc);
+
+    struct iovec *vec;
+    struct msghdr message;
+    ssize_t ret = 0;
+
+    vec = s->vec;
+    if (s->offset > 0) {
+        s->form_header(s);
+        vec->iov_base = s->header_buf;
+        vec->iov_len = s->offset;
+        message.msg_iovlen = 2;
+        vec++;
+    } else {
+        message.msg_iovlen = 1;
+    }
+    vec->iov_base = (void *) buf;
+    vec->iov_len = size;
+    message.msg_name = s->dgram_dst;
+    message.msg_namelen = s->dst_size;
+    message.msg_iov = s->vec;
+    message.msg_control = NULL;
+    message.msg_controllen = 0;
+    message.msg_flags = 0;
+    do {
+        ret = sendmsg(s->fd, &message, 0);
+    } while ((ret == -1) && (errno == EINTR));
+    if (ret > 0) {
+        ret -= s->offset;
+    } else if (ret == 0) {
+        /* belt and braces - should not occur on DGRAM
+        * we should get an error and never a 0 send
+        */
+        ret = size;
+    } else {
+        ret = -errno;
+        if (ret == -EAGAIN || ret == -ENOBUFS) {
+            /* signal upper layer that socket buffer is full */
+            unified_write_poll(s, true);
+            ret = 0;
+        }
+    }
+    return ret;
+}
+
+
+static void net_unified_process_queue(NetUnifiedState *s)
+{
+    int size = 0;
+    struct iovec *vec;
+    bool bad_read;
+    int data_size;
+    struct mmsghdr *msgvec;
+
+    /* go into ring mode only if there is a "pending" tail */
+    if (s->queue_depth > 0) {
+        do {
+            msgvec = s->msgvec + s->queue_tail;
+            if (msgvec->msg_len > 0) {
+                data_size = msgvec->msg_len - s->header_size;
+                vec = msgvec->msg_hdr.msg_iov;
+                if ((data_size > 0) &&
+                    (s->verify_header(s, vec->iov_base) == 0)) {
+                    if (s->header_size > 0) {
+                        vec++;
+                    }
+                    /* Use the legacy delivery for now, we will
+                     * switch to using our own ring as a queueing mechanism
+                     * at a later date
+                     */
+                    size = qemu_send_packet_async(
+                            &s->nc,
+                            vec->iov_base,
+                            data_size,
+                            unified_send_completed
+                        );
+                    if (size == 0) {
+                        unified_read_poll(s, false);
+                    }
+                    bad_read = false;
+                } else {
+                    bad_read = true;
+                    if (!s->header_mismatch) {
+                        /* report error only once */
+                        error_report("unified header verification failed");
+                        s->header_mismatch = true;
+                    }
+                }
+            } else {
+                bad_read = true;
+            }
+            s->queue_tail = (s->queue_tail + 1) % MAX_UNIFIED_MSGCNT;
+            s->queue_depth--;
+        } while (
+                (s->queue_depth > 0) &&
+                 qemu_can_send_packet(&s->nc) &&
+                ((size > 0) || bad_read)
+            );
+    }
+}
+
+static void net_unified_send(void *opaque)
+{
+    NetUnifiedState *s = opaque;
+    int target_count, count;
+    struct mmsghdr *msgvec;
+
+    /* go into ring mode only if there is a "pending" tail */
+
+    if (s->queue_depth) {
+
+        /* The ring buffer we use has variable intake
+         * count of how much we can read varies - adjust accordingly
+         */
+
+        target_count = MAX_UNIFIED_MSGCNT - s->queue_depth;
+
+        /* Ensure we do not overrun the ring when we have
+         * a lot of enqueued packets
+         */
+
+        if (s->queue_head + target_count > MAX_UNIFIED_MSGCNT) {
+            target_count = MAX_UNIFIED_MSGCNT - s->queue_head;
+        }
+    } else {
+
+        /* we do not have any pending packets - we can use
+        * the whole message vector linearly instead of using
+        * it as a ring
+        */
+
+        s->queue_head = 0;
+        s->queue_tail = 0;
+        target_count = MAX_UNIFIED_MSGCNT;
+    }
+
+    msgvec = s->msgvec + s->queue_head;
+    if (target_count > 0) {
+        do {
+            count = recvmmsg(
+                s->fd,
+                msgvec,
+                target_count, MSG_DONTWAIT, NULL);
+        } while ((count == -1) && (errno == EINTR));
+        if (count < 0) {
+            /* Recv error - we still need to flush packets here,
+             * (re)set queue head to current position
+             */
+            count = 0;
+        }
+        s->queue_head = (s->queue_head + count) % MAX_UNIFIED_MSGCNT;
+        s->queue_depth += count;
+    }
+    net_unified_process_queue(s);
+}
+
+static void destroy_vector(struct mmsghdr *msgvec, int count, int iovcount)
+{
+    int i, j;
+    struct iovec *iov;
+    struct mmsghdr *cleanup = msgvec;
+    if (cleanup) {
+        for (i = 0; i < count; i++) {
+            if (cleanup->msg_hdr.msg_iov) {
+                iov = cleanup->msg_hdr.msg_iov;
+                for (j = 0; j < iovcount; j++) {
+                    g_free(iov->iov_base);
+                    iov++;
+                }
+                g_free(cleanup->msg_hdr.msg_iov);
+            }
+            cleanup++;
+        }
+        g_free(msgvec);
+    }
+}
+
+
+
+static struct mmsghdr *build_unified_vector(NetUnifiedState *s, int count)
+{
+    int i;
+    struct iovec *iov;
+    struct mmsghdr *msgvec, *result;
+
+    msgvec = g_new(struct mmsghdr, count);
+    result = msgvec;
+    for (i = 0; i < count ; i++) {
+        msgvec->msg_hdr.msg_name = NULL;
+        msgvec->msg_hdr.msg_namelen = 0;
+        iov =  g_new(struct iovec, IOVSIZE);
+        msgvec->msg_hdr.msg_iov = iov;
+        if (s->header_size > 0) {
+            iov->iov_base = g_malloc(s->header_size);
+            iov->iov_len = s->header_size;
+            iov++ ;
+        }
+        iov->iov_base = qemu_memalign(BUFFER_ALIGN, BUFFER_SIZE);
+        iov->iov_len = BUFFER_SIZE;
+        msgvec->msg_hdr.msg_iovlen = 2;
+        msgvec->msg_hdr.msg_control = NULL;
+        msgvec->msg_hdr.msg_controllen = 0;
+        msgvec->msg_hdr.msg_flags = 0;
+        msgvec++;
+    }
+    return result;
+}
+
+static void net_unified_cleanup(NetClientState *nc)
+{
+    NetUnifiedState *s = DO_UPCAST(NetUnifiedState, nc, nc);
+    qemu_purge_queued_packets(nc);
+    unified_read_poll(s, false);
+    unified_write_poll(s, false);
+    if (s->fd >= 0) {
+        close(s->fd);
+    }
+    if (s->header_size > 0) {
+        destroy_vector(s->msgvec, MAX_UNIFIED_MSGCNT, IOVSIZE);
+    } else {
+        destroy_vector(s->msgvec, MAX_UNIFIED_MSGCNT, 1);
+    }
+    g_free(s->vec);
+    if (s->header_buf != NULL) {
+        g_free(s->header_buf);
+    }
+    if (s->dgram_dst != NULL) {
+        g_free(s->dgram_dst);
+    }
+}
+
+static NetClientInfo net_unified_info = {
+    /* we share this one for all types for now, wrong I know :) */
+    .type = NET_CLIENT_DRIVER_L2TPV3,
+    .size = sizeof(NetUnifiedState),
+    .receive = net_unified_receive_dgram,
+    .receive_iov = net_unified_receive_dgram_iov,
+    .poll = unified_poll,
+    .cleanup = net_unified_cleanup,
+};
+
+NetClientState *qemu_new_unified_net_client(const char *name,
+                    NetClientState *peer) {
+    return qemu_new_net_client(&net_unified_info, peer, "unified", name);
+}
+
+void qemu_net_finalize_unified_init(NetUnifiedState *s, int fd)
+{
+
+    s->msgvec = build_unified_vector(s, MAX_UNIFIED_MSGCNT);
+    s->vec = g_new(struct iovec, MAX_UNIFIED_IOVCNT);
+    if (s->header_size > 0) {
+        s->header_buf = g_malloc(s->header_size);
+    } else {
+        s->header_buf = NULL;
+    }
+    qemu_set_nonblock(fd);
+
+    s->fd = fd;
+    unified_read_poll(s, true);
+
+}
+
diff --git a/net/unified.h b/net/unified.h
new file mode 100644
index 0000000000..97ec743f0e
--- /dev/null
+++ b/net/unified.h
@@ -0,0 +1,118 @@ 
+/*
+ * QEMU System Emulator
+ *
+ * Copyright (c) 2015-2017 Cambridge Greys Limited
+ * Copyright (c) 2012-2014 Cisco Systems
+ * Copyright (c) 2003-2008 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "qemu/osdep.h"
+
+
+#define BUFFER_ALIGN sysconf(_SC_PAGESIZE)
+#define BUFFER_SIZE 2048
+#define IOVSIZE 2
+#define MAX_UNIFIED_MSGCNT 64
+#define MAX_UNIFIED_IOVCNT (MAX_UNIFIED_MSGCNT * IOVSIZE)
+
+#ifndef QEMU_NET_UNIFIED_H
+#define QEMU_NET_UNIFIED_H
+
+typedef struct NetUnifiedState {
+    NetClientState nc;
+
+    int fd;
+
+    /*
+     * these are used for xmit - that happens packet a time
+     * and for first sign of life packet (easier to parse that once)
+     */
+
+    uint8_t *header_buf;
+    struct iovec *vec;
+
+    /*
+     * these are used for receive - try to "eat" up to 32 packets at a time
+     */
+
+    struct mmsghdr *msgvec;
+
+    /*
+     * peer address
+     */
+
+    struct sockaddr_storage *dgram_dst;
+    uint32_t dst_size;
+
+    /*
+     * Internal Queue
+     */
+
+    /*
+    * DOS avoidance in error handling
+    */
+
+    /* Easier to keep l2tpv3 specific */
+
+    bool header_mismatch;
+
+    /*
+     *
+     * Ring buffer handling
+     *
+     */
+
+    int queue_head;
+    int queue_tail;
+    int queue_depth;
+
+    /*
+     * Offset to data - common for all protocols
+     */
+
+    uint32_t offset;
+
+    /*
+     * Header size - common for all protocols
+     */
+
+    uint32_t header_size;
+    /* Poll Control */
+
+    bool read_poll;
+    bool write_poll;
+
+    /* Parameters */
+
+    void *params;
+
+    /* header forming functions */
+
+    int (*verify_header)(void *s, uint8_t *buf);
+    void (*form_header)(void *s);
+
+} NetUnifiedState;
+
+extern NetClientState *qemu_new_unified_net_client(const char *name,
+                    NetClientState *peer);
+
+extern void qemu_net_finalize_unified_init(NetUnifiedState *s, int fd);
+#endif