[6/9] virtio net: introduce dataplane for virtio net

Message ID	1361451293-5181-7-git-send-email-qemulist@gmail.com
State	New
Headers	show Return-Path: <qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org> From: Liu Ping Fan <qemulist@gmail.com> To: qemu-devel@nongnu.org Date: Thu, 21 Feb 2013 20:54:50 +0800 Message-Id: <1361451293-5181-7-git-send-email-qemulist@gmail.com> In-Reply-To: <1361451293-5181-1-git-send-email-qemulist@gmail.com> References: <1361451293-5181-1-git-send-email-qemulist@gmail.com> Cc: Stefan Hajnoczi <stefanha@gmail.com>, Anthony Liguori <aliguori@us.ibm.com> Subject: [Qemu-devel] [PATCH 6/9] virtio net: introduce dataplane for virtio net Precedence: list Errors-To: qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org Sender: qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org

Message ID

1361451293-5181-7-git-send-email-qemulist@gmail.com

State

New

Headers

From: Liu Ping Fan <qemulist@gmail.com>
To: qemu-devel@nongnu.org
Date: Thu, 21 Feb 2013 20:54:50 +0800
Message-Id: <1361451293-5181-7-git-send-email-qemulist@gmail.com>
In-Reply-To: <1361451293-5181-1-git-send-email-qemulist@gmail.com>
References: <1361451293-5181-1-git-send-email-qemulist@gmail.com>
Cc: Stefan Hajnoczi <stefanha@gmail.com>,
	Anthony Liguori <aliguori@us.ibm.com>
Subject: [Qemu-devel] [PATCH 6/9] virtio net: introduce dataplane for virtio
	net
Precedence: list
Errors-To: qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org
Sender: qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org

Commit Message

pingfan liu Feb. 21, 2013, 12:54 p.m. UTC

From: Liu Ping Fan <pingfank@linux.vnet.ibm.com>

This is a emulation to virtio-blk dataplane, which push the data
handling out of biglock. And it is a try to implement this process
in userspace, while vhost-net in kernel.

Signed-off-by: Liu Ping Fan <pingfank@linux.vnet.ibm.com>
---
 hw/dataplane/virtio-net.c |  422 +++++++++++++++++++++++++++++++++++++++++++++
 hw/dataplane/virtio-net.h |   26 +++
 hw/virtio-net.c           |   56 +-----
 hw/virtio-net.h           |   61 +++++++
 4 files changed, 517 insertions(+), 48 deletions(-)
 create mode 100644 hw/dataplane/virtio-net.c
 create mode 100644 hw/dataplane/virtio-net.h

Comments

Michael Roth Feb. 21, 2013, 8:55 p.m. UTC | #1

On Thu, Feb 21, 2013 at 08:54:50PM +0800, Liu Ping Fan wrote:
> From: Liu Ping Fan <pingfank@linux.vnet.ibm.com>
> 
> This is a emulation to virtio-blk dataplane, which push the data
> handling out of biglock. And it is a try to implement this process
> in userspace, while vhost-net in kernel.
> 
> Signed-off-by: Liu Ping Fan <pingfank@linux.vnet.ibm.com>
> ---
>  hw/dataplane/virtio-net.c |  422 +++++++++++++++++++++++++++++++++++++++++++++
>  hw/dataplane/virtio-net.h |   26 +++
>  hw/virtio-net.c           |   56 +-----
>  hw/virtio-net.h           |   61 +++++++
>  4 files changed, 517 insertions(+), 48 deletions(-)
>  create mode 100644 hw/dataplane/virtio-net.c
>  create mode 100644 hw/dataplane/virtio-net.h
> 
> diff --git a/hw/dataplane/virtio-net.c b/hw/dataplane/virtio-net.c
> new file mode 100644
> index 0000000..9a1795d
> --- /dev/null
> +++ b/hw/dataplane/virtio-net.c
> @@ -0,0 +1,422 @@
> +/* Copyright IBM, Corp. 2013
> + *
> + * Based on vhost-net and virtio-blk dataplane code
> + *
> + * This work is licensed under the terms of the GNU GPL, version 2.
> + */
> +#include "hw/virtio.h"
> +#include "qemu/iov.h"
> +#include "vring.h"
> +#include <linux/virtio_ring.h>
> +#include "net/net.h"
> +#include "net/checksum.h"
> +#include "net/tap.h"
> +#include "virtio-net.h"
> +#include "qemu/error-report.h"
> +
> +typedef struct VirtIONetDataPlane {
> +    int async_tx_head;
> +    Vring *rx_vring;
> +    Vring *tx_vring;
> +    EventHandler *rx_handler;
> +    EventHandler *tx_handler;
> +    bool stop;
> +} VirtIONetDataPlane;
> +
> +WorkThread virt_net_thread;
> +
> +#define VRING_MAX 128
> +
> +static int32_t virtnet_tx(VirtIONet *n, VirtQueue *vq);
> +
> +static void virtnet_tx_complete(struct NetClientState *nc, ssize_t sz)
> +{
> +    int ret;
> +    VirtIONet *n = DO_UPCAST(NICState, nc, nc)->opaque;
> +
> +    vring_push(n->dp->tx_vring, n->dp->async_tx_head, 0);
> +    ret = virtnet_tx(n, n->tx_vq);
> +    if (ret != -EBUSY) {
> +        vring_enable_notification(&n->vdev, n->dp->tx_vring);
> +    }
> +}
> +
> +static int virtnet_tx(VirtIONet *n, VirtQueue *vq)
> +{
> +    struct iovec out_iov[VRING_MAX], sg[VRING_MAX];
> +    struct iovec *snd, *end = &out_iov[VRING_MAX];
> +    int head;
> +    unsigned int out_num, in_num, sg_num;
> +    int ret;
> +    int num_packets = 0;
> +
> +    if (!(n->vdev.status & VIRTIO_CONFIG_S_DRIVER_OK)) {
> +        return num_packets;
> +    }
> +
> +    assert(n->vdev.vm_running);
> +
> +    if (n->async_tx.elem.out_num) {
> +        return num_packets;
> +    }
> +
> +    while (true) {
> +        head = vring_pop(&n->vdev, n->dp->tx_vring, out_iov, end, &out_num,
> +                            &in_num);
> +        if (head < 0) {
> +            break;
> +        }
> +        snd = out_iov;
> +        assert(n->host_hdr_len <= n->guest_hdr_len);
> +        if (n->host_hdr_len != n->guest_hdr_len) {
> +            sg_num = iov_copy(sg, ARRAY_SIZE(sg),
> +                                       out_iov, out_num,
> +                                       0, n->host_hdr_len);
> +            sg_num += iov_copy(sg + sg_num, ARRAY_SIZE(sg) - sg_num,
> +                             out_iov, out_num,
> +                             n->guest_hdr_len, -1);
> +            out_num = sg_num;
> +            snd = sg;
> +        }
> +
> +        ret = qemu_sendv_packet_async(&n->nic->nc, snd, out_num,
> +                    virtnet_tx_complete);
> +        if (ret == 0) {
> +            n->dp->async_tx_head = head;
> +            return -EBUSY;
> +        }
> +        vring_push(n->dp->tx_vring, head, 0);
> +        if (num_packets++ > n->tx_burst) {
> +            break;
> +        }

I'm not sure why we'd break here: if we're sending out lots of packets
should we keep notifications disabled and continue sending them till
we'd block? Is it to avoid starving the rx side?

> +    }
> +
> +    return num_packets;
> +}
> +
> +static void virtnet_handle_tx(VirtIODevice *vdev, VirtQueue *vq)
> +{
> +    int32 ret;
> +    VirtIONet *n = (VirtIONet *)vdev;
> +
> +    /* This happens when device was stopped but VCPU wasn't. */
> +    if (!n->vdev.vm_running) {
> +        return;
> +    }
> +    vring_disable_notification(vdev, n->dp->tx_vring);
> +    ret = virtnet_tx(n, vq);
> +    if (ret != -EBUSY) {
> +        vring_enable_notification(vdev, n->dp->tx_vring);
> +    }
> +}
> +
> +
> +static int virtio_net_can_receive(NetClientState *nc)
> +{
> +    VirtIONet *n = DO_UPCAST(NICState, nc, nc)->opaque;
> +    if (!n->vdev.vm_running) {
> +        return 0;
> +    }
> +    if (!(n->vdev.status & VIRTIO_CONFIG_S_DRIVER_OK)) {
> +        return 0;
> +    }
> +
> +    return 1;
> +}
> +
> +/* peek but not use */
> +static int rx_mergeable_buf_sz(VirtIONet *n)
> +{
> +    uint16_t start, idx, head;
> +    int total = 0;
> +    Vring *vring = n->dp->rx_vring;
> +    struct vring_desc *dsc;
> +    struct vring_desc *base;
> +
> +    for (start = vring->last_avail_idx; start != vring->vr.avail->idx;
> +            start++) {
> +        head = start%vring->vr.num;
> +        idx = vring->vr.avail->ring[head];
> +        if (vring->vr.desc[idx].flags & VRING_DESC_F_INDIRECT) {
> +            base = hostmem_lookup(&vring->hostmem, vring->vr.desc[idx].addr,
> +                    vring->vr.desc[idx].len, 0);
> +        } else {
> +            base = vring->vr.desc;
> +        }
> +        dsc = base;
> +        do {
> +            total += dsc->len;
> +            if (!(dsc->flags & VRING_DESC_F_NEXT)) {
> +                break;
> +            }
> +            dsc = &base[dsc->next];
> +        } while (true);
> +    }
> +    return total;
> +}

NetClients usually have a set-size buffer they'll pass in, or as least
will tell us how much they have to give us, so instead of mapping in and
summing all the descriptor heads we can just return true if we have at
least that much?

> +
> +static bool virtnet_has_buffers(VirtIONet *n, int bufsize)
> +{
> +    if (!vring_more_avail(n->dp->rx_vring)) {
> +        return false;
> +    }
> +    if (n->mergeable_rx_bufs) {
> +        if (rx_mergeable_buf_sz(n) <  bufsize) {
> +            return false;
> +        }
> +    }
> +    return true;
> +}
> +
> +static ssize_t virtnet_rx(NetClientState *nc, const uint8_t *buf, size_t size)
> +{
> +    VirtIONet *n = DO_UPCAST(NICState, nc, nc)->opaque;
> +    struct iovec in_vec[VIRTQUEUE_MAX_SIZE], head_iov[2], *end;
> +    unsigned int in_num, out_num, vnet_hdr_sz;
> +    int head;
> +    size_t len, total, offset = 0;
> +    uint16_t numbuff = 0;
> +
> +    total = offset = 0;
> +
> +    end = &in_vec[VIRTQUEUE_MAX_SIZE];
> +    if (!virtio_net_receive_filter(n, buf, size)) {
> +        return size;
> +    }
> +
> +    /* enough buff ? */
> +    if (!virtnet_has_buffers(n, size)) {
> +        vring_enable_notification(&n->vdev, n->dp->rx_vring);
> +        return 0;
> +    }
> +
> +    while (size > offset) {
> +        head = vring_pop(&n->vdev, n->dp->rx_vring, in_vec, end, &out_num,
> +                &in_num);
> +        if (head < 0) {
> +            return 0;
> +        }
> +        len = 0;
> +        if (numbuff == 0) {
> +            virtio_net_receive_header(n, in_vec, in_num, buf, size);
> +
> +            if (n->mergeable_rx_bufs) {
> +                vnet_hdr_sz = sizeof(struct virtio_net_hdr_mrg_rxbuf);
> +            } else {
> +                vnet_hdr_sz = sizeof(struct virtio_net_hdr);
> +            }
> +            iov_copy(head_iov, 2, in_vec, in_num, 0, vnet_hdr_sz);
> +            offset += n->host_hdr_len;
> +            total += vnet_hdr_sz;
> +            len += vnet_hdr_sz;
> +        }
> +        len += iov_from_buf(in_vec, in_num, vnet_hdr_sz, buf+offset,
> +                        size-offset);
> +        offset += len;
> +        total += len;
> +        numbuff++;
> +        /* Guest wont see used->idx until we are ready */
> +        vring_fill(n->dp->rx_vring, head, len);
> +    }
> +
> +    if (n->mergeable_rx_bufs) {
> +        iov_from_buf(head_iov, 2,
> +            offsetof(struct virtio_net_hdr_mrg_rxbuf, num_buffers), &numbuff,
> +                sizeof(numbuff));
> +    }
> +    vring_flush(n->dp->rx_vring);
> +
> +    if (vring_should_notify(&n->vdev, n->dp->rx_vring)) {
> +        virtio_irq(n->rx_vq);
> +    }
> +
> +    return size;
> +}
> +
> +static void tx_cb(EventHandler *handler, uint32_t events)
> +{
> +    VirtIONet *n = handler->opaque;
> +
> +    event_notifier_test_and_clear(handler->notifier);
> +    virtnet_handle_tx(&n->vdev, n->tx_vq);
> +}
> +
> +/* rvq has buffer again, push tap to fill in */
> +static void rx_cb(EventHandler *handler, uint32_t events)
> +{
> +    VirtIONet *n = handler->opaque;
> +
> +    event_notifier_test_and_clear(handler->notifier);
> +    qemu_flush_queued_packets(&n->nic->nc);
> +}
> +
> +static NetClientInfo net_dp_info = {
> +    .type = NET_CLIENT_OPTIONS_KIND_NIC,
> +    .size = sizeof(NICState),
> +    .can_receive = virtio_net_can_receive,
> +    .receive = virtnet_rx,
> +    .cleanup = virtio_net_cleanup,
> +    .link_status_changed = virtio_net_set_link_status,
> +};
> +
> +void virtnet_dataplane_create(VirtIONet *n)
> +{
> +    EventHandler *tx_handler, *rx_handler;
> +
> +    n->dp = g_malloc(sizeof(VirtIONetDataPlane));
> +    n->dp->stop = false;
> +    n->dp->rx_vring = g_malloc(sizeof(Vring));
> +    n->dp->tx_vring = g_malloc(sizeof(Vring));
> +    rx_handler = n->dp->rx_handler = g_malloc(sizeof(EventHandler));
> +    tx_handler = n->dp->tx_handler = g_malloc(sizeof(EventHandler));
> +    tx_handler->opaque = n;
> +    rx_handler->opaque = n;
> +
> +    /* safely redirect receive handler */
> +    n->nic->nc.info = &net_dp_info;
> +}
> +
> +static int virtnet_dataplane_disable_notifiers(VirtIONet *n)
> +{
> +    int i, r;
> +    VirtIODevice *vdev = &n->vdev;
> +
> +    for (i = 0; i < 2; ++i) {
> +        r = vdev->binding->set_host_notifier(vdev->binding_opaque, i, false);
> +        if (r < 0) {
> +            fprintf(stderr, "virtnet dataplane %d notifier unbinding failed:
> +                    %d\n", i, -r);
> +        }
> +    }
> +    return r;
> +}
> +
> +static int virtnet_dataplane_enable_notifiers(VirtIONet *n)
> +{
> +    int i, r;
> +    VirtIODevice *vdev = &n->vdev;
> +
> +    if (!vdev->binding->set_host_notifier) {
> +        fprintf(stderr, "binding does not support host notifiers\n");
> +        r = -ENOSYS;
> +        goto fail;
> +    }
> +    for (i = 0; i < 2; ++i) {
> +        r = vdev->binding->set_host_notifier(vdev->binding_opaque, i, true);
> +        if (r < 0) {
> +            fprintf(stderr, "virtnet dataplane %d notifier binding failed:
> +                    %d\n", i, -r);
> +            goto fail_vq;
> +        }
> +    }
> +
> +    return 0;
> +fail_vq:
> +    while (--i >= 0) {
> +        r = vdev->binding->set_host_notifier(vdev->binding_opaque, i, false);
> +        if (r < 0) {
> +            fprintf(stderr, "virtnet dataplane %d notifier cleanup error:
> +                    %d\n", i, -r);
> +            fflush(stderr);
> +        }
> +        assert(r >= 0);
> +    }
> +fail:
> +    return r;
> +}
> +
> +
> +static void thread_cb(EventHandler *handler, uint32_t events)
> +{
> +    EventNotifier *e = handler->notifier;
> +    event_notifier_test_and_clear(e);
> +}
> +
> +static void *working_thread(void *data)
> +{
> +    WorkThread *t = (WorkThread *)data;
> +
> +    qemu_mutex_lock(&t->lock);
> +    qemu_cond_signal(&t->cond_start);
> +    qemu_mutex_unlock(&t->lock);
> +    while (t->state == THREAD_START) {
> +        event_poll(&t->polltbl);
> +    }
> +    return NULL;
> +}
> +
> +static void init_work_thread(void)
> +{
> +    EventHandler *thread_handler = g_malloc(sizeof(EventHandler));
> +    WorkThread *t = &virt_net_thread;

Does virt_net_thread need to be global? Couldn't we hang it off of
VirtIONet somewhere and pass that in instead?

> +
> +    qemu_mutex_init(&t->lock);
> +    qemu_cond_init(&t->cond_start);
> +    event_poll_init(&t->polltbl, 4);
> +    event_notifier_init(&t->e, 0);
> +    event_poll_add(&t->polltbl, thread_handler, &t->e, thread_cb);
> +    qemu_mutex_lock(&t->lock);
> +    t->state = THREAD_START;
> +    qemu_thread_create(&t->thread, working_thread, t, QEMU_THREAD_JOINABLE);

Have you considered using separate threads for tx/rx? This would allow
you to spin/block freely in rx/tx without regard for starvation
(assuming that's an actual problem you're hitting)

> +    qemu_cond_wait(&t->cond_start, &t->lock);
> +    qemu_mutex_unlock(&t->lock);
> +}
> +
> +void virtnet_dataplane_start(VirtIONet *n)
> +{
> +    bool rslt;
> +    EventNotifier *tx_e = virtio_queue_get_host_notifier(n->tx_vq);
> +    EventNotifier *rx_e = virtio_queue_get_host_notifier(n->rx_vq);
> +    WorkThread *t = &virt_net_thread;
> +
> +    virtnet_dataplane_enable_notifiers(n);
> +    rslt = vring_setup(n->dp->rx_vring, &n->vdev, 0);
> +    if (!rslt) {
> +        error_report("fail to setup rx vring\n");
> +        exit(1);
> +    }
> +    vring_restore(n->dp->rx_vring,
> +                    virtio_queue_get_last_avail_idx(&n->vdev, 0));
> +    rslt = vring_setup(n->dp->tx_vring, &n->vdev, 1);
> +    if (!rslt) {
> +        error_report("fail to setup tx vring\n");
> +        exit(1);
> +    }
> +    vring_restore(n->dp->tx_vring,
> +            virtio_queue_get_last_avail_idx(&n->vdev, 1));
> +    init_work_thread();
> +
> +    event_poll_add(&t->polltbl, n->dp->rx_handler, rx_e, rx_cb);
> +    event_poll_add(&t->polltbl, n->dp->tx_handler, tx_e, tx_cb);
> +}
> +
> +void virtnet_dataplane_stop(VirtIONet *n)
> +{
> +    EventNotifier *rx_e = virtio_queue_get_host_notifier(n->rx_vq);
> +    EventNotifier *tx_e = virtio_queue_get_host_notifier(n->tx_vq);
> +    WorkThread *t = &virt_net_thread;
> +
> +    event_poll_del_fd(&t->polltbl, event_notifier_get_fd(rx_e));
> +    event_poll_del_fd(&t->polltbl, event_notifier_get_fd(tx_e));
> +
> +    t->state = THREAD_EXIT;
> +    event_notifier_set(&t->e);
> +    qemu_thread_join(&t->thread);
> +    virtio_queue_set_last_avail_idx(&n->vdev, 0,
> +            n->dp->rx_vring->last_avail_idx);
> +    virtio_queue_set_last_avail_idx(&n->vdev, 1,
> +            n->dp->tx_vring->last_avail_idx);
> +    vring_teardown(n->dp->rx_vring);
> +    vring_teardown(n->dp->tx_vring);
> +    virtnet_dataplane_disable_notifiers(n);
> +}
> +
> +void virtnet_dataplane_destroy(VirtIONet *n)
> +{
> +    virtnet_dataplane_stop(n);
> +    g_free(n->dp->rx_vring);
> +    g_free(n->dp->tx_vring);
> +    g_free(n->dp->rx_handler);
> +    g_free(n->dp->tx_handler);
> +    g_free(n->dp);
> +}
> diff --git a/hw/dataplane/virtio-net.h b/hw/dataplane/virtio-net.h
> new file mode 100644
> index 0000000..e50b2de
> --- /dev/null
> +++ b/hw/dataplane/virtio-net.h
> @@ -0,0 +1,26 @@
> +/* Copyright IBM, Corp. 2013
> + *
> + * This work is licensed under the terms of the GNU GPL, version 2.
> + */
> +#ifndef VIRT_NET_DATAPLANE_H
> +#define VIRT_NET_DATAPLANE_H
> +
> +#include "event-poll.h"
> +#include "qemu/thread.h"
> +#include "hw/virtio-net.h"
> +
> +typedef enum  { THREAD_START, THREAD_EXIT
> +} WorkState;
> +
> +typedef struct WorkThread {
> +    EventPoll polltbl;
> +    QemuThread thread;
> +    EventNotifier e;
> +
> +    WorkState state;
> +    QemuMutex lock;
> +    QemuCond cond_start;
> +} WorkThread;
> +
> +extern WorkThread virt_net_thread;
> +#endif
> diff --git a/hw/virtio-net.c b/hw/virtio-net.c
> index 5d03b31..6bf4a40 100644
> --- a/hw/virtio-net.c
> +++ b/hw/virtio-net.c
> @@ -26,47 +26,6 @@
>  #define MAC_TABLE_ENTRIES    64
>  #define MAX_VLAN    (1 << 12)   /* Per 802.1Q definition */
> 
> -typedef struct VirtIONet
> -{
> -    VirtIODevice vdev;
> -    uint8_t mac[ETH_ALEN];
> -    uint16_t status;
> -    VirtQueue *rx_vq;
> -    VirtQueue *tx_vq;
> -    VirtQueue *ctrl_vq;
> -    NICState *nic;
> -    QEMUTimer *tx_timer;
> -    QEMUBH *tx_bh;
> -    uint32_t tx_timeout;
> -    int32_t tx_burst;
> -    int tx_waiting;
> -    uint32_t has_vnet_hdr;
> -    size_t host_hdr_len;
> -    size_t guest_hdr_len;
> -    uint8_t has_ufo;
> -    struct {
> -        VirtQueueElement elem;
> -        ssize_t len;
> -    } async_tx;
> -    int mergeable_rx_bufs;
> -    uint8_t promisc;
> -    uint8_t allmulti;
> -    uint8_t alluni;
> -    uint8_t nomulti;
> -    uint8_t nouni;
> -    uint8_t nobcast;
> -    uint8_t vhost_started;
> -    struct {
> -        int in_use;
> -        int first_multi;
> -        uint8_t multi_overflow;
> -        uint8_t uni_overflow;
> -        uint8_t *macs;
> -    } mac_table;
> -    uint32_t *vlans;
> -    DeviceState *qdev;
> -} VirtIONet;
> -
>  /* TODO
>   * - we could suppress RX interrupt if we were so inclined.
>   */
> @@ -165,7 +124,7 @@ static void virtio_net_set_status(struct VirtIODevice *vdev, uint8_t status)
>      }
>  }
> 
> -static void virtio_net_set_link_status(NetClientState *nc)
> +void virtio_net_set_link_status(NetClientState *nc)
>  {
>      VirtIONet *n = DO_UPCAST(NICState, nc, nc)->opaque;
>      uint16_t old_status = n->status;
> @@ -528,8 +487,8 @@ static void work_around_broken_dhclient(struct virtio_net_hdr *hdr,
>      }
>  }
> 
> -static void receive_header(VirtIONet *n, const struct iovec *iov, int iov_cnt,
> -                           const void *buf, size_t size)
> +void virtio_net_receive_header(VirtIONet *n, const struct iovec *iov,
> +                           int iov_cnt, const void *buf, size_t size)
>  {
>      if (n->has_vnet_hdr) {
>          /* FIXME this cast is evil */
> @@ -546,7 +505,7 @@ static void receive_header(VirtIONet *n, const struct iovec *iov, int iov_cnt,
>      }
>  }
> 
> -static int receive_filter(VirtIONet *n, const uint8_t *buf, int size)
> +int virtio_net_receive_filter(VirtIONet *n, const uint8_t *buf, int size)
>  {
>      static const uint8_t bcast[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
>      static const uint8_t vlan[] = {0x81, 0x00};
> @@ -612,8 +571,9 @@ static ssize_t virtio_net_receive(NetClientState *nc, const uint8_t *buf, size_t
>      if (!virtio_net_has_buffers(n, size + n->guest_hdr_len - n->host_hdr_len))
>          return 0;
> 
> -    if (!receive_filter(n, buf, size))
> +    if (!virtio_net_receive_filter(n, buf, size)) {
>          return size;
> +    }
> 
>      offset = i = 0;
> 
> @@ -649,7 +609,7 @@ static ssize_t virtio_net_receive(NetClientState *nc, const uint8_t *buf, size_t
>                                      sizeof(mhdr.num_buffers));
>              }
> 
> -            receive_header(n, sg, elem.in_num, buf, size);
> +            virtio_net_receive_header(n, sg, elem.in_num, buf, size);
>              offset = n->host_hdr_len;
>              total += n->guest_hdr_len;
>              guest_offset = n->guest_hdr_len;
> @@ -994,7 +954,7 @@ static int virtio_net_load(QEMUFile *f, void *opaque, int version_id)
>      return 0;
>  }
> 
> -static void virtio_net_cleanup(NetClientState *nc)
> +void virtio_net_cleanup(NetClientState *nc)
>  {
>      VirtIONet *n = DO_UPCAST(NICState, nc, nc)->opaque;
> 
> diff --git a/hw/virtio-net.h b/hw/virtio-net.h
> index d46fb98..ed91a02 100644
> --- a/hw/virtio-net.h
> +++ b/hw/virtio-net.h
> @@ -159,4 +159,65 @@ struct virtio_net_ctrl_mac {
>          DEFINE_PROP_BIT("ctrl_rx", _state, _field, VIRTIO_NET_F_CTRL_RX, true), \
>          DEFINE_PROP_BIT("ctrl_vlan", _state, _field, VIRTIO_NET_F_CTRL_VLAN, true), \
>          DEFINE_PROP_BIT("ctrl_rx_extra", _state, _field, VIRTIO_NET_F_CTRL_RX_EXTRA, true)
> +
> +
> +#ifdef CONFIG_VIRTIO_NET_DATA_PLANE
> +struct VirtIONetDataPlane;
> +#endif
> +
> +typedef struct VirtIONet {
> +    VirtIODevice vdev;
> +    uint8_t mac[ETH_ALEN];
> +    uint16_t status;
> +    VirtQueue *rx_vq;
> +    VirtQueue *tx_vq;
> +    VirtQueue *ctrl_vq;
> +    NICState *nic;
> +    QEMUTimer *tx_timer;
> +    QEMUBH *tx_bh;
> +    uint32_t tx_timeout;
> +    int32_t tx_burst;
> +    int tx_waiting;
> +    uint32_t has_vnet_hdr;
> +    size_t host_hdr_len;
> +    size_t guest_hdr_len;
> +    uint8_t has_ufo;
> +    struct {
> +        VirtQueueElement elem;
> +        ssize_t len;
> +    } async_tx;
> +    int mergeable_rx_bufs;
> +    uint8_t promisc;
> +    uint8_t allmulti;
> +    uint8_t alluni;
> +    uint8_t nomulti;
> +    uint8_t nouni;
> +    uint8_t nobcast;
> +    uint8_t vhost_started;
> +    struct {
> +        int in_use;
> +        int first_multi;
> +        uint8_t multi_overflow;
> +        uint8_t uni_overflow;
> +        uint8_t *macs;
> +    } mac_table;
> +    uint32_t *vlans;
> +
> +#ifdef CONFIG_VIRTIO_NET_DATA_PLANE
> +    struct VirtIONetDataPlane *dp;
> +    bool dp_start;
> +#endif
> +    DeviceState *qdev;
> +} VirtIONet;
> +
> +int virtio_net_receive_filter(VirtIONet *n, const uint8_t *buf, int size);
> +void virtio_net_receive_header(VirtIONet *n, const struct iovec *iov,
> +                    int iov_cnt, const void *buf, size_t size);
> +void virtio_net_set_link_status(NetClientState *nc);
> +void virtio_net_cleanup(NetClientState *nc);
> +
> +#ifdef CONFIG_VIRTIO_NET_DATA_PLANE
> +void virtnet_dataplane_create(VirtIONet *n);
> +#endif
> +
>  #endif
> -- 
> 1.7.4.4
> 
>

pingfan liu Feb. 27, 2013, 9:36 a.m. UTC | #2

On Fri, Feb 22, 2013 at 4:55 AM, mdroth <mdroth@linux.vnet.ibm.com> wrote:
> On Thu, Feb 21, 2013 at 08:54:50PM +0800, Liu Ping Fan wrote:
>> From: Liu Ping Fan <pingfank@linux.vnet.ibm.com>
>>
>> This is a emulation to virtio-blk dataplane, which push the data
>> handling out of biglock. And it is a try to implement this process
>> in userspace, while vhost-net in kernel.
>>
>> Signed-off-by: Liu Ping Fan <pingfank@linux.vnet.ibm.com>
>> ---
>>  hw/dataplane/virtio-net.c |  422 +++++++++++++++++++++++++++++++++++++++++++++
>>  hw/dataplane/virtio-net.h |   26 +++
>>  hw/virtio-net.c           |   56 +-----
>>  hw/virtio-net.h           |   61 +++++++
>>  4 files changed, 517 insertions(+), 48 deletions(-)
>>  create mode 100644 hw/dataplane/virtio-net.c
>>  create mode 100644 hw/dataplane/virtio-net.h
>>
>> diff --git a/hw/dataplane/virtio-net.c b/hw/dataplane/virtio-net.c
>> new file mode 100644
>> index 0000000..9a1795d
>> --- /dev/null
>> +++ b/hw/dataplane/virtio-net.c
>> @@ -0,0 +1,422 @@
>> +/* Copyright IBM, Corp. 2013
>> + *
>> + * Based on vhost-net and virtio-blk dataplane code
>> + *
>> + * This work is licensed under the terms of the GNU GPL, version 2.
>> + */
>> +#include "hw/virtio.h"
>> +#include "qemu/iov.h"
>> +#include "vring.h"
>> +#include <linux/virtio_ring.h>
>> +#include "net/net.h"
>> +#include "net/checksum.h"
>> +#include "net/tap.h"
>> +#include "virtio-net.h"
>> +#include "qemu/error-report.h"
>> +
>> +typedef struct VirtIONetDataPlane {
>> +    int async_tx_head;
>> +    Vring *rx_vring;
>> +    Vring *tx_vring;
>> +    EventHandler *rx_handler;
>> +    EventHandler *tx_handler;
>> +    bool stop;
>> +} VirtIONetDataPlane;
>> +
>> +WorkThread virt_net_thread;
>> +
>> +#define VRING_MAX 128
>> +
>> +static int32_t virtnet_tx(VirtIONet *n, VirtQueue *vq);
>> +
>> +static void virtnet_tx_complete(struct NetClientState *nc, ssize_t sz)
>> +{
>> +    int ret;
>> +    VirtIONet *n = DO_UPCAST(NICState, nc, nc)->opaque;
>> +
>> +    vring_push(n->dp->tx_vring, n->dp->async_tx_head, 0);
>> +    ret = virtnet_tx(n, n->tx_vq);
>> +    if (ret != -EBUSY) {
>> +        vring_enable_notification(&n->vdev, n->dp->tx_vring);
>> +    }
>> +}
>> +
>> +static int virtnet_tx(VirtIONet *n, VirtQueue *vq)
>> +{
>> +    struct iovec out_iov[VRING_MAX], sg[VRING_MAX];
>> +    struct iovec *snd, *end = &out_iov[VRING_MAX];
>> +    int head;
>> +    unsigned int out_num, in_num, sg_num;
>> +    int ret;
>> +    int num_packets = 0;
>> +
>> +    if (!(n->vdev.status & VIRTIO_CONFIG_S_DRIVER_OK)) {
>> +        return num_packets;
>> +    }
>> +
>> +    assert(n->vdev.vm_running);
>> +
>> +    if (n->async_tx.elem.out_num) {
>> +        return num_packets;
>> +    }
>> +
>> +    while (true) {
>> +        head = vring_pop(&n->vdev, n->dp->tx_vring, out_iov, end, &out_num,
>> +                            &in_num);
>> +        if (head < 0) {
>> +            break;
>> +        }
>> +        snd = out_iov;
>> +        assert(n->host_hdr_len <= n->guest_hdr_len);
>> +        if (n->host_hdr_len != n->guest_hdr_len) {
>> +            sg_num = iov_copy(sg, ARRAY_SIZE(sg),
>> +                                       out_iov, out_num,
>> +                                       0, n->host_hdr_len);
>> +            sg_num += iov_copy(sg + sg_num, ARRAY_SIZE(sg) - sg_num,
>> +                             out_iov, out_num,
>> +                             n->guest_hdr_len, -1);
>> +            out_num = sg_num;
>> +            snd = sg;
>> +        }
>> +
>> +        ret = qemu_sendv_packet_async(&n->nic->nc, snd, out_num,
>> +                    virtnet_tx_complete);
>> +        if (ret == 0) {
>> +            n->dp->async_tx_head = head;
>> +            return -EBUSY;
>> +        }
>> +        vring_push(n->dp->tx_vring, head, 0);
>> +        if (num_packets++ > n->tx_burst) {
>> +            break;
>> +        }
>
> I'm not sure why we'd break here: if we're sending out lots of packets
> should we keep notifications disabled and continue sending them till
> we'd block? Is it to avoid starving the rx side?
>
Yes.
>> +    }
>> +
>> +    return num_packets;
>> +}
>> +
>> +static void virtnet_handle_tx(VirtIODevice *vdev, VirtQueue *vq)
>> +{
>> +    int32 ret;
>> +    VirtIONet *n = (VirtIONet *)vdev;
>> +
>> +    /* This happens when device was stopped but VCPU wasn't. */
>> +    if (!n->vdev.vm_running) {
>> +        return;
>> +    }
>> +    vring_disable_notification(vdev, n->dp->tx_vring);
>> +    ret = virtnet_tx(n, vq);
>> +    if (ret != -EBUSY) {
>> +        vring_enable_notification(vdev, n->dp->tx_vring);
>> +    }
>> +}
>> +
>> +
>> +static int virtio_net_can_receive(NetClientState *nc)
>> +{
>> +    VirtIONet *n = DO_UPCAST(NICState, nc, nc)->opaque;
>> +    if (!n->vdev.vm_running) {
>> +        return 0;
>> +    }
>> +    if (!(n->vdev.status & VIRTIO_CONFIG_S_DRIVER_OK)) {
>> +        return 0;
>> +    }
>> +
>> +    return 1;
>> +}
>> +
>> +/* peek but not use */
>> +static int rx_mergeable_buf_sz(VirtIONet *n)
>> +{
>> +    uint16_t start, idx, head;
>> +    int total = 0;
>> +    Vring *vring = n->dp->rx_vring;
>> +    struct vring_desc *dsc;
>> +    struct vring_desc *base;
>> +
>> +    for (start = vring->last_avail_idx; start != vring->vr.avail->idx;
>> +            start++) {
>> +        head = start%vring->vr.num;
>> +        idx = vring->vr.avail->ring[head];
>> +        if (vring->vr.desc[idx].flags & VRING_DESC_F_INDIRECT) {
>> +            base = hostmem_lookup(&vring->hostmem, vring->vr.desc[idx].addr,
>> +                    vring->vr.desc[idx].len, 0);
>> +        } else {
>> +            base = vring->vr.desc;
>> +        }
>> +        dsc = base;
>> +        do {
>> +            total += dsc->len;
>> +            if (!(dsc->flags & VRING_DESC_F_NEXT)) {
>> +                break;
>> +            }
>> +            dsc = &base[dsc->next];
>> +        } while (true);
>> +    }
>> +    return total;
>> +}
>
> NetClients usually have a set-size buffer they'll pass in, or as least
> will tell us how much they have to give us, so instead of mapping in and
> summing all the descriptor heads we can just return true if we have at
> least that much?
>
We need to decide whether rx ring has enough buffer or not.  And as
you suggest, I think we can trace the rest size of rx ring since last
summing and allocation, and if not enough space left again, then
recheck it.
>> +
>> +static bool virtnet_has_buffers(VirtIONet *n, int bufsize)
>> +{
>> +    if (!vring_more_avail(n->dp->rx_vring)) {
>> +        return false;
>> +    }
>> +    if (n->mergeable_rx_bufs) {
>> +        if (rx_mergeable_buf_sz(n) <  bufsize) {
>> +            return false;
>> +        }
>> +    }
>> +    return true;
>> +}
>> +
>> +static ssize_t virtnet_rx(NetClientState *nc, const uint8_t *buf, size_t size)
>> +{
>> +    VirtIONet *n = DO_UPCAST(NICState, nc, nc)->opaque;
>> +    struct iovec in_vec[VIRTQUEUE_MAX_SIZE], head_iov[2], *end;
>> +    unsigned int in_num, out_num, vnet_hdr_sz;
>> +    int head;
>> +    size_t len, total, offset = 0;
>> +    uint16_t numbuff = 0;
>> +
>> +    total = offset = 0;
>> +
>> +    end = &in_vec[VIRTQUEUE_MAX_SIZE];
>> +    if (!virtio_net_receive_filter(n, buf, size)) {
>> +        return size;
>> +    }
>> +
>> +    /* enough buff ? */
>> +    if (!virtnet_has_buffers(n, size)) {
>> +        vring_enable_notification(&n->vdev, n->dp->rx_vring);
>> +        return 0;
>> +    }
>> +
>> +    while (size > offset) {
>> +        head = vring_pop(&n->vdev, n->dp->rx_vring, in_vec, end, &out_num,
>> +                &in_num);
>> +        if (head < 0) {
>> +            return 0;
>> +        }
>> +        len = 0;
>> +        if (numbuff == 0) {
>> +            virtio_net_receive_header(n, in_vec, in_num, buf, size);
>> +
>> +            if (n->mergeable_rx_bufs) {
>> +                vnet_hdr_sz = sizeof(struct virtio_net_hdr_mrg_rxbuf);
>> +            } else {
>> +                vnet_hdr_sz = sizeof(struct virtio_net_hdr);
>> +            }
>> +            iov_copy(head_iov, 2, in_vec, in_num, 0, vnet_hdr_sz);
>> +            offset += n->host_hdr_len;
>> +            total += vnet_hdr_sz;
>> +            len += vnet_hdr_sz;
>> +        }
>> +        len += iov_from_buf(in_vec, in_num, vnet_hdr_sz, buf+offset,
>> +                        size-offset);
>> +        offset += len;
>> +        total += len;
>> +        numbuff++;
>> +        /* Guest wont see used->idx until we are ready */
>> +        vring_fill(n->dp->rx_vring, head, len);
>> +    }
>> +
>> +    if (n->mergeable_rx_bufs) {
>> +        iov_from_buf(head_iov, 2,
>> +            offsetof(struct virtio_net_hdr_mrg_rxbuf, num_buffers), &numbuff,
>> +                sizeof(numbuff));
>> +    }
>> +    vring_flush(n->dp->rx_vring);
>> +
>> +    if (vring_should_notify(&n->vdev, n->dp->rx_vring)) {
>> +        virtio_irq(n->rx_vq);
>> +    }
>> +
>> +    return size;
>> +}
>> +
>> +static void tx_cb(EventHandler *handler, uint32_t events)
>> +{
>> +    VirtIONet *n = handler->opaque;
>> +
>> +    event_notifier_test_and_clear(handler->notifier);
>> +    virtnet_handle_tx(&n->vdev, n->tx_vq);
>> +}
>> +
>> +/* rvq has buffer again, push tap to fill in */
>> +static void rx_cb(EventHandler *handler, uint32_t events)
>> +{
>> +    VirtIONet *n = handler->opaque;
>> +
>> +    event_notifier_test_and_clear(handler->notifier);
>> +    qemu_flush_queued_packets(&n->nic->nc);
>> +}
>> +
>> +static NetClientInfo net_dp_info = {
>> +    .type = NET_CLIENT_OPTIONS_KIND_NIC,
>> +    .size = sizeof(NICState),
>> +    .can_receive = virtio_net_can_receive,
>> +    .receive = virtnet_rx,
>> +    .cleanup = virtio_net_cleanup,
>> +    .link_status_changed = virtio_net_set_link_status,
>> +};
>> +
>> +void virtnet_dataplane_create(VirtIONet *n)
>> +{
>> +    EventHandler *tx_handler, *rx_handler;
>> +
>> +    n->dp = g_malloc(sizeof(VirtIONetDataPlane));
>> +    n->dp->stop = false;
>> +    n->dp->rx_vring = g_malloc(sizeof(Vring));
>> +    n->dp->tx_vring = g_malloc(sizeof(Vring));
>> +    rx_handler = n->dp->rx_handler = g_malloc(sizeof(EventHandler));
>> +    tx_handler = n->dp->tx_handler = g_malloc(sizeof(EventHandler));
>> +    tx_handler->opaque = n;
>> +    rx_handler->opaque = n;
>> +
>> +    /* safely redirect receive handler */
>> +    n->nic->nc.info = &net_dp_info;
>> +}
>> +
>> +static int virtnet_dataplane_disable_notifiers(VirtIONet *n)
>> +{
>> +    int i, r;
>> +    VirtIODevice *vdev = &n->vdev;
>> +
>> +    for (i = 0; i < 2; ++i) {
>> +        r = vdev->binding->set_host_notifier(vdev->binding_opaque, i, false);
>> +        if (r < 0) {
>> +            fprintf(stderr, "virtnet dataplane %d notifier unbinding failed:
>> +                    %d\n", i, -r);
>> +        }
>> +    }
>> +    return r;
>> +}
>> +
>> +static int virtnet_dataplane_enable_notifiers(VirtIONet *n)
>> +{
>> +    int i, r;
>> +    VirtIODevice *vdev = &n->vdev;
>> +
>> +    if (!vdev->binding->set_host_notifier) {
>> +        fprintf(stderr, "binding does not support host notifiers\n");
>> +        r = -ENOSYS;
>> +        goto fail;
>> +    }
>> +    for (i = 0; i < 2; ++i) {
>> +        r = vdev->binding->set_host_notifier(vdev->binding_opaque, i, true);
>> +        if (r < 0) {
>> +            fprintf(stderr, "virtnet dataplane %d notifier binding failed:
>> +                    %d\n", i, -r);
>> +            goto fail_vq;
>> +        }
>> +    }
>> +
>> +    return 0;
>> +fail_vq:
>> +    while (--i >= 0) {
>> +        r = vdev->binding->set_host_notifier(vdev->binding_opaque, i, false);
>> +        if (r < 0) {
>> +            fprintf(stderr, "virtnet dataplane %d notifier cleanup error:
>> +                    %d\n", i, -r);
>> +            fflush(stderr);
>> +        }
>> +        assert(r >= 0);
>> +    }
>> +fail:
>> +    return r;
>> +}
>> +
>> +
>> +static void thread_cb(EventHandler *handler, uint32_t events)
>> +{
>> +    EventNotifier *e = handler->notifier;
>> +    event_notifier_test_and_clear(e);
>> +}
>> +
>> +static void *working_thread(void *data)
>> +{
>> +    WorkThread *t = (WorkThread *)data;
>> +
>> +    qemu_mutex_lock(&t->lock);
>> +    qemu_cond_signal(&t->cond_start);
>> +    qemu_mutex_unlock(&t->lock);
>> +    while (t->state == THREAD_START) {
>> +        event_poll(&t->polltbl);
>> +    }
>> +    return NULL;
>> +}
>> +
>> +static void init_work_thread(void)
>> +{
>> +    EventHandler *thread_handler = g_malloc(sizeof(EventHandler));
>> +    WorkThread *t = &virt_net_thread;
>
> Does virt_net_thread need to be global? Couldn't we hang it off of
> VirtIONet somewhere and pass that in instead?
>
Yes, the final design aim is to assign thread to virtio net dynamiclly.

>> +
>> +    qemu_mutex_init(&t->lock);
>> +    qemu_cond_init(&t->cond_start);
>> +    event_poll_init(&t->polltbl, 4);
>> +    event_notifier_init(&t->e, 0);
>> +    event_poll_add(&t->polltbl, thread_handler, &t->e, thread_cb);
>> +    qemu_mutex_lock(&t->lock);
>> +    t->state = THREAD_START;
>> +    qemu_thread_create(&t->thread, working_thread, t, QEMU_THREAD_JOINABLE);
>
> Have you considered using separate threads for tx/rx? This would allow
> you to spin/block freely in rx/tx without regard for starvation
> (assuming that's an actual problem you're hitting)
>
Using separate thread can improve performance, but increase complexity
to sync. And the final aim is to push a bunch of devices handler onto
different thread not just virtio net.

Thanks and regards,
Pingfan

>> +    qemu_cond_wait(&t->cond_start, &t->lock);
>> +    qemu_mutex_unlock(&t->lock);
>> +}
>> +
>> +void virtnet_dataplane_start(VirtIONet *n)
>> +{
>> +    bool rslt;
>> +    EventNotifier *tx_e = virtio_queue_get_host_notifier(n->tx_vq);
>> +    EventNotifier *rx_e = virtio_queue_get_host_notifier(n->rx_vq);
>> +    WorkThread *t = &virt_net_thread;
>> +
>> +    virtnet_dataplane_enable_notifiers(n);
>> +    rslt = vring_setup(n->dp->rx_vring, &n->vdev, 0);
>> +    if (!rslt) {
>> +        error_report("fail to setup rx vring\n");
>> +        exit(1);
>> +    }
>> +    vring_restore(n->dp->rx_vring,
>> +                    virtio_queue_get_last_avail_idx(&n->vdev, 0));
>> +    rslt = vring_setup(n->dp->tx_vring, &n->vdev, 1);
>> +    if (!rslt) {
>> +        error_report("fail to setup tx vring\n");
>> +        exit(1);
>> +    }
>> +    vring_restore(n->dp->tx_vring,
>> +            virtio_queue_get_last_avail_idx(&n->vdev, 1));
>> +    init_work_thread();
>> +
>> +    event_poll_add(&t->polltbl, n->dp->rx_handler, rx_e, rx_cb);
>> +    event_poll_add(&t->polltbl, n->dp->tx_handler, tx_e, tx_cb);
>> +}
>> +
>> +void virtnet_dataplane_stop(VirtIONet *n)
>> +{
>> +    EventNotifier *rx_e = virtio_queue_get_host_notifier(n->rx_vq);
>> +    EventNotifier *tx_e = virtio_queue_get_host_notifier(n->tx_vq);
>> +    WorkThread *t = &virt_net_thread;
>> +
>> +    event_poll_del_fd(&t->polltbl, event_notifier_get_fd(rx_e));
>> +    event_poll_del_fd(&t->polltbl, event_notifier_get_fd(tx_e));
>> +
>> +    t->state = THREAD_EXIT;
>> +    event_notifier_set(&t->e);
>> +    qemu_thread_join(&t->thread);
>> +    virtio_queue_set_last_avail_idx(&n->vdev, 0,
>> +            n->dp->rx_vring->last_avail_idx);
>> +    virtio_queue_set_last_avail_idx(&n->vdev, 1,
>> +            n->dp->tx_vring->last_avail_idx);
>> +    vring_teardown(n->dp->rx_vring);
>> +    vring_teardown(n->dp->tx_vring);
>> +    virtnet_dataplane_disable_notifiers(n);
>> +}
>> +
>> +void virtnet_dataplane_destroy(VirtIONet *n)
>> +{
>> +    virtnet_dataplane_stop(n);
>> +    g_free(n->dp->rx_vring);
>> +    g_free(n->dp->tx_vring);
>> +    g_free(n->dp->rx_handler);
>> +    g_free(n->dp->tx_handler);
>> +    g_free(n->dp);
>> +}
>> diff --git a/hw/dataplane/virtio-net.h b/hw/dataplane/virtio-net.h
>> new file mode 100644
>> index 0000000..e50b2de
>> --- /dev/null
>> +++ b/hw/dataplane/virtio-net.h
>> @@ -0,0 +1,26 @@
>> +/* Copyright IBM, Corp. 2013
>> + *
>> + * This work is licensed under the terms of the GNU GPL, version 2.
>> + */
>> +#ifndef VIRT_NET_DATAPLANE_H
>> +#define VIRT_NET_DATAPLANE_H
>> +
>> +#include "event-poll.h"
>> +#include "qemu/thread.h"
>> +#include "hw/virtio-net.h"
>> +
>> +typedef enum  { THREAD_START, THREAD_EXIT
>> +} WorkState;
>> +
>> +typedef struct WorkThread {
>> +    EventPoll polltbl;
>> +    QemuThread thread;
>> +    EventNotifier e;
>> +
>> +    WorkState state;
>> +    QemuMutex lock;
>> +    QemuCond cond_start;
>> +} WorkThread;
>> +
>> +extern WorkThread virt_net_thread;
>> +#endif
>> diff --git a/hw/virtio-net.c b/hw/virtio-net.c
>> index 5d03b31..6bf4a40 100644
>> --- a/hw/virtio-net.c
>> +++ b/hw/virtio-net.c
>> @@ -26,47 +26,6 @@
>>  #define MAC_TABLE_ENTRIES    64
>>  #define MAX_VLAN    (1 << 12)   /* Per 802.1Q definition */
>>
>> -typedef struct VirtIONet
>> -{
>> -    VirtIODevice vdev;
>> -    uint8_t mac[ETH_ALEN];
>> -    uint16_t status;
>> -    VirtQueue *rx_vq;
>> -    VirtQueue *tx_vq;
>> -    VirtQueue *ctrl_vq;
>> -    NICState *nic;
>> -    QEMUTimer *tx_timer;
>> -    QEMUBH *tx_bh;
>> -    uint32_t tx_timeout;
>> -    int32_t tx_burst;
>> -    int tx_waiting;
>> -    uint32_t has_vnet_hdr;
>> -    size_t host_hdr_len;
>> -    size_t guest_hdr_len;
>> -    uint8_t has_ufo;
>> -    struct {
>> -        VirtQueueElement elem;
>> -        ssize_t len;
>> -    } async_tx;
>> -    int mergeable_rx_bufs;
>> -    uint8_t promisc;
>> -    uint8_t allmulti;
>> -    uint8_t alluni;
>> -    uint8_t nomulti;
>> -    uint8_t nouni;
>> -    uint8_t nobcast;
>> -    uint8_t vhost_started;
>> -    struct {
>> -        int in_use;
>> -        int first_multi;
>> -        uint8_t multi_overflow;
>> -        uint8_t uni_overflow;
>> -        uint8_t *macs;
>> -    } mac_table;
>> -    uint32_t *vlans;
>> -    DeviceState *qdev;
>> -} VirtIONet;
>> -
>>  /* TODO
>>   * - we could suppress RX interrupt if we were so inclined.
>>   */
>> @@ -165,7 +124,7 @@ static void virtio_net_set_status(struct VirtIODevice *vdev, uint8_t status)
>>      }
>>  }
>>
>> -static void virtio_net_set_link_status(NetClientState *nc)
>> +void virtio_net_set_link_status(NetClientState *nc)
>>  {
>>      VirtIONet *n = DO_UPCAST(NICState, nc, nc)->opaque;
>>      uint16_t old_status = n->status;
>> @@ -528,8 +487,8 @@ static void work_around_broken_dhclient(struct virtio_net_hdr *hdr,
>>      }
>>  }
>>
>> -static void receive_header(VirtIONet *n, const struct iovec *iov, int iov_cnt,
>> -                           const void *buf, size_t size)
>> +void virtio_net_receive_header(VirtIONet *n, const struct iovec *iov,
>> +                           int iov_cnt, const void *buf, size_t size)
>>  {
>>      if (n->has_vnet_hdr) {
>>          /* FIXME this cast is evil */
>> @@ -546,7 +505,7 @@ static void receive_header(VirtIONet *n, const struct iovec *iov, int iov_cnt,
>>      }
>>  }
>>
>> -static int receive_filter(VirtIONet *n, const uint8_t *buf, int size)
>> +int virtio_net_receive_filter(VirtIONet *n, const uint8_t *buf, int size)
>>  {
>>      static const uint8_t bcast[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
>>      static const uint8_t vlan[] = {0x81, 0x00};
>> @@ -612,8 +571,9 @@ static ssize_t virtio_net_receive(NetClientState *nc, const uint8_t *buf, size_t
>>      if (!virtio_net_has_buffers(n, size + n->guest_hdr_len - n->host_hdr_len))
>>          return 0;
>>
>> -    if (!receive_filter(n, buf, size))
>> +    if (!virtio_net_receive_filter(n, buf, size)) {
>>          return size;
>> +    }
>>
>>      offset = i = 0;
>>
>> @@ -649,7 +609,7 @@ static ssize_t virtio_net_receive(NetClientState *nc, const uint8_t *buf, size_t
>>                                      sizeof(mhdr.num_buffers));
>>              }
>>
>> -            receive_header(n, sg, elem.in_num, buf, size);
>> +            virtio_net_receive_header(n, sg, elem.in_num, buf, size);
>>              offset = n->host_hdr_len;
>>              total += n->guest_hdr_len;
>>              guest_offset = n->guest_hdr_len;
>> @@ -994,7 +954,7 @@ static int virtio_net_load(QEMUFile *f, void *opaque, int version_id)
>>      return 0;
>>  }
>>
>> -static void virtio_net_cleanup(NetClientState *nc)
>> +void virtio_net_cleanup(NetClientState *nc)
>>  {
>>      VirtIONet *n = DO_UPCAST(NICState, nc, nc)->opaque;
>>
>> diff --git a/hw/virtio-net.h b/hw/virtio-net.h
>> index d46fb98..ed91a02 100644
>> --- a/hw/virtio-net.h
>> +++ b/hw/virtio-net.h
>> @@ -159,4 +159,65 @@ struct virtio_net_ctrl_mac {
>>          DEFINE_PROP_BIT("ctrl_rx", _state, _field, VIRTIO_NET_F_CTRL_RX, true), \
>>          DEFINE_PROP_BIT("ctrl_vlan", _state, _field, VIRTIO_NET_F_CTRL_VLAN, true), \
>>          DEFINE_PROP_BIT("ctrl_rx_extra", _state, _field, VIRTIO_NET_F_CTRL_RX_EXTRA, true)
>> +
>> +
>> +#ifdef CONFIG_VIRTIO_NET_DATA_PLANE
>> +struct VirtIONetDataPlane;
>> +#endif
>> +
>> +typedef struct VirtIONet {
>> +    VirtIODevice vdev;
>> +    uint8_t mac[ETH_ALEN];
>> +    uint16_t status;
>> +    VirtQueue *rx_vq;
>> +    VirtQueue *tx_vq;
>> +    VirtQueue *ctrl_vq;
>> +    NICState *nic;
>> +    QEMUTimer *tx_timer;
>> +    QEMUBH *tx_bh;
>> +    uint32_t tx_timeout;
>> +    int32_t tx_burst;
>> +    int tx_waiting;
>> +    uint32_t has_vnet_hdr;
>> +    size_t host_hdr_len;
>> +    size_t guest_hdr_len;
>> +    uint8_t has_ufo;
>> +    struct {
>> +        VirtQueueElement elem;
>> +        ssize_t len;
>> +    } async_tx;
>> +    int mergeable_rx_bufs;
>> +    uint8_t promisc;
>> +    uint8_t allmulti;
>> +    uint8_t alluni;
>> +    uint8_t nomulti;
>> +    uint8_t nouni;
>> +    uint8_t nobcast;
>> +    uint8_t vhost_started;
>> +    struct {
>> +        int in_use;
>> +        int first_multi;
>> +        uint8_t multi_overflow;
>> +        uint8_t uni_overflow;
>> +        uint8_t *macs;
>> +    } mac_table;
>> +    uint32_t *vlans;
>> +
>> +#ifdef CONFIG_VIRTIO_NET_DATA_PLANE
>> +    struct VirtIONetDataPlane *dp;
>> +    bool dp_start;
>> +#endif
>> +    DeviceState *qdev;
>> +} VirtIONet;
>> +
>> +int virtio_net_receive_filter(VirtIONet *n, const uint8_t *buf, int size);
>> +void virtio_net_receive_header(VirtIONet *n, const struct iovec *iov,
>> +                    int iov_cnt, const void *buf, size_t size);
>> +void virtio_net_set_link_status(NetClientState *nc);
>> +void virtio_net_cleanup(NetClientState *nc);
>> +
>> +#ifdef CONFIG_VIRTIO_NET_DATA_PLANE
>> +void virtnet_dataplane_create(VirtIONet *n);
>> +#endif
>> +
>>  #endif
>> --
>> 1.7.4.4
>>
>>

diff --git a/hw/dataplane/virtio-net.c b/hw/dataplane/virtio-net.c
new file mode 100644
index 0000000..9a1795d
--- /dev/null
+++ b/hw/dataplane/virtio-net.c
@@ -0,0 +1,422 @@ 
+/* Copyright IBM, Corp. 2013
+ *
+ * Based on vhost-net and virtio-blk dataplane code
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ */
+#include "hw/virtio.h"
+#include "qemu/iov.h"
+#include "vring.h"
+#include <linux/virtio_ring.h>
+#include "net/net.h"
+#include "net/checksum.h"
+#include "net/tap.h"
+#include "virtio-net.h"
+#include "qemu/error-report.h"
+
+typedef struct VirtIONetDataPlane {
+    int async_tx_head;
+    Vring *rx_vring;
+    Vring *tx_vring;
+    EventHandler *rx_handler;
+    EventHandler *tx_handler;
+    bool stop;
+} VirtIONetDataPlane;
+
+WorkThread virt_net_thread;
+
+#define VRING_MAX 128
+
+static int32_t virtnet_tx(VirtIONet *n, VirtQueue *vq);
+
+static void virtnet_tx_complete(struct NetClientState *nc, ssize_t sz)
+{
+    int ret;
+    VirtIONet *n = DO_UPCAST(NICState, nc, nc)->opaque;
+
+    vring_push(n->dp->tx_vring, n->dp->async_tx_head, 0);
+    ret = virtnet_tx(n, n->tx_vq);
+    if (ret != -EBUSY) {
+        vring_enable_notification(&n->vdev, n->dp->tx_vring);
+    }
+}
+
+static int virtnet_tx(VirtIONet *n, VirtQueue *vq)
+{
+    struct iovec out_iov[VRING_MAX], sg[VRING_MAX];
+    struct iovec *snd, *end = &out_iov[VRING_MAX];
+    int head;
+    unsigned int out_num, in_num, sg_num;
+    int ret;
+    int num_packets = 0;
+
+    if (!(n->vdev.status & VIRTIO_CONFIG_S_DRIVER_OK)) {
+        return num_packets;
+    }
+
+    assert(n->vdev.vm_running);
+
+    if (n->async_tx.elem.out_num) {
+        return num_packets;
+    }
+
+    while (true) {
+        head = vring_pop(&n->vdev, n->dp->tx_vring, out_iov, end, &out_num,
+                            &in_num);
+        if (head < 0) {
+            break;
+        }
+        snd = out_iov;
+        assert(n->host_hdr_len <= n->guest_hdr_len);
+        if (n->host_hdr_len != n->guest_hdr_len) {
+            sg_num = iov_copy(sg, ARRAY_SIZE(sg),
+                                       out_iov, out_num,
+                                       0, n->host_hdr_len);
+            sg_num += iov_copy(sg + sg_num, ARRAY_SIZE(sg) - sg_num,
+                             out_iov, out_num,
+                             n->guest_hdr_len, -1);
+            out_num = sg_num;
+            snd = sg;
+        }
+
+        ret = qemu_sendv_packet_async(&n->nic->nc, snd, out_num,
+                    virtnet_tx_complete);
+        if (ret == 0) {
+            n->dp->async_tx_head = head;
+            return -EBUSY;
+        }
+        vring_push(n->dp->tx_vring, head, 0);
+        if (num_packets++ > n->tx_burst) {
+            break;
+        }
+    }
+
+    return num_packets;
+}
+
+static void virtnet_handle_tx(VirtIODevice *vdev, VirtQueue *vq)
+{
+    int32 ret;
+    VirtIONet *n = (VirtIONet *)vdev;
+
+    /* This happens when device was stopped but VCPU wasn't. */
+    if (!n->vdev.vm_running) {
+        return;
+    }
+    vring_disable_notification(vdev, n->dp->tx_vring);
+    ret = virtnet_tx(n, vq);
+    if (ret != -EBUSY) {
+        vring_enable_notification(vdev, n->dp->tx_vring);
+    }
+}
+
+
+static int virtio_net_can_receive(NetClientState *nc)
+{
+    VirtIONet *n = DO_UPCAST(NICState, nc, nc)->opaque;
+    if (!n->vdev.vm_running) {
+        return 0;
+    }
+    if (!(n->vdev.status & VIRTIO_CONFIG_S_DRIVER_OK)) {
+        return 0;
+    }
+
+    return 1;
+}
+
+/* peek but not use */
+static int rx_mergeable_buf_sz(VirtIONet *n)
+{
+    uint16_t start, idx, head;
+    int total = 0;
+    Vring *vring = n->dp->rx_vring;
+    struct vring_desc *dsc;
+    struct vring_desc *base;
+
+    for (start = vring->last_avail_idx; start != vring->vr.avail->idx;
+            start++) {
+        head = start%vring->vr.num;
+        idx = vring->vr.avail->ring[head];
+        if (vring->vr.desc[idx].flags & VRING_DESC_F_INDIRECT) {
+            base = hostmem_lookup(&vring->hostmem, vring->vr.desc[idx].addr,
+                    vring->vr.desc[idx].len, 0);
+        } else {
+            base = vring->vr.desc;
+        }
+        dsc = base;
+        do {
+            total += dsc->len;
+            if (!(dsc->flags & VRING_DESC_F_NEXT)) {
+                break;
+            }
+            dsc = &base[dsc->next];
+        } while (true);
+    }
+    return total;
+}
+
+static bool virtnet_has_buffers(VirtIONet *n, int bufsize)
+{
+    if (!vring_more_avail(n->dp->rx_vring)) {
+        return false;
+    }
+    if (n->mergeable_rx_bufs) {
+        if (rx_mergeable_buf_sz(n) <  bufsize) {
+            return false;
+        }
+    }
+    return true;
+}
+
+static ssize_t virtnet_rx(NetClientState *nc, const uint8_t *buf, size_t size)
+{
+    VirtIONet *n = DO_UPCAST(NICState, nc, nc)->opaque;
+    struct iovec in_vec[VIRTQUEUE_MAX_SIZE], head_iov[2], *end;
+    unsigned int in_num, out_num, vnet_hdr_sz;
+    int head;
+    size_t len, total, offset = 0;
+    uint16_t numbuff = 0;
+
+    total = offset = 0;
+
+    end = &in_vec[VIRTQUEUE_MAX_SIZE];
+    if (!virtio_net_receive_filter(n, buf, size)) {
+        return size;
+    }
+
+    /* enough buff ? */
+    if (!virtnet_has_buffers(n, size)) {
+        vring_enable_notification(&n->vdev, n->dp->rx_vring);
+        return 0;
+    }
+
+    while (size > offset) {
+        head = vring_pop(&n->vdev, n->dp->rx_vring, in_vec, end, &out_num,
+                &in_num);
+        if (head < 0) {
+            return 0;
+        }
+        len = 0;
+        if (numbuff == 0) {
+            virtio_net_receive_header(n, in_vec, in_num, buf, size);
+
+            if (n->mergeable_rx_bufs) {
+                vnet_hdr_sz = sizeof(struct virtio_net_hdr_mrg_rxbuf);
+            } else {
+                vnet_hdr_sz = sizeof(struct virtio_net_hdr);
+            }
+            iov_copy(head_iov, 2, in_vec, in_num, 0, vnet_hdr_sz);
+            offset += n->host_hdr_len;
+            total += vnet_hdr_sz;
+            len += vnet_hdr_sz;
+        }
+        len += iov_from_buf(in_vec, in_num, vnet_hdr_sz, buf+offset,
+                        size-offset);
+        offset += len;
+        total += len;
+        numbuff++;
+        /* Guest wont see used->idx until we are ready */
+        vring_fill(n->dp->rx_vring, head, len);
+    }
+
+    if (n->mergeable_rx_bufs) {
+        iov_from_buf(head_iov, 2,
+            offsetof(struct virtio_net_hdr_mrg_rxbuf, num_buffers), &numbuff,
+                sizeof(numbuff));
+    }
+    vring_flush(n->dp->rx_vring);
+
+    if (vring_should_notify(&n->vdev, n->dp->rx_vring)) {
+        virtio_irq(n->rx_vq);
+    }
+
+    return size;
+}
+
+static void tx_cb(EventHandler *handler, uint32_t events)
+{
+    VirtIONet *n = handler->opaque;
+
+    event_notifier_test_and_clear(handler->notifier);
+    virtnet_handle_tx(&n->vdev, n->tx_vq);
+}
+
+/* rvq has buffer again, push tap to fill in */
+static void rx_cb(EventHandler *handler, uint32_t events)
+{
+    VirtIONet *n = handler->opaque;
+
+    event_notifier_test_and_clear(handler->notifier);
+    qemu_flush_queued_packets(&n->nic->nc);
+}
+
+static NetClientInfo net_dp_info = {
+    .type = NET_CLIENT_OPTIONS_KIND_NIC,
+    .size = sizeof(NICState),
+    .can_receive = virtio_net_can_receive,
+    .receive = virtnet_rx,
+    .cleanup = virtio_net_cleanup,
+    .link_status_changed = virtio_net_set_link_status,
+};
+
+void virtnet_dataplane_create(VirtIONet *n)
+{
+    EventHandler *tx_handler, *rx_handler;
+
+    n->dp = g_malloc(sizeof(VirtIONetDataPlane));
+    n->dp->stop = false;
+    n->dp->rx_vring = g_malloc(sizeof(Vring));
+    n->dp->tx_vring = g_malloc(sizeof(Vring));
+    rx_handler = n->dp->rx_handler = g_malloc(sizeof(EventHandler));
+    tx_handler = n->dp->tx_handler = g_malloc(sizeof(EventHandler));
+    tx_handler->opaque = n;
+    rx_handler->opaque = n;
+
+    /* safely redirect receive handler */
+    n->nic->nc.info = &net_dp_info;
+}
+
+static int virtnet_dataplane_disable_notifiers(VirtIONet *n)
+{
+    int i, r;
+    VirtIODevice *vdev = &n->vdev;
+
+    for (i = 0; i < 2; ++i) {
+        r = vdev->binding->set_host_notifier(vdev->binding_opaque, i, false);
+        if (r < 0) {
+            fprintf(stderr, "virtnet dataplane %d notifier unbinding failed:
+                    %d\n", i, -r);
+        }
+    }
+    return r;
+}
+
+static int virtnet_dataplane_enable_notifiers(VirtIONet *n)
+{
+    int i, r;
+    VirtIODevice *vdev = &n->vdev;
+
+    if (!vdev->binding->set_host_notifier) {
+        fprintf(stderr, "binding does not support host notifiers\n");
+        r = -ENOSYS;
+        goto fail;
+    }
+    for (i = 0; i < 2; ++i) {
+        r = vdev->binding->set_host_notifier(vdev->binding_opaque, i, true);
+        if (r < 0) {
+            fprintf(stderr, "virtnet dataplane %d notifier binding failed:
+                    %d\n", i, -r);
+            goto fail_vq;
+        }
+    }
+
+    return 0;
+fail_vq:
+    while (--i >= 0) {
+        r = vdev->binding->set_host_notifier(vdev->binding_opaque, i, false);
+        if (r < 0) {
+            fprintf(stderr, "virtnet dataplane %d notifier cleanup error:
+                    %d\n", i, -r);
+            fflush(stderr);
+        }
+        assert(r >= 0);
+    }
+fail:
+    return r;
+}
+
+
+static void thread_cb(EventHandler *handler, uint32_t events)
+{
+    EventNotifier *e = handler->notifier;
+    event_notifier_test_and_clear(e);
+}
+
+static void *working_thread(void *data)
+{
+    WorkThread *t = (WorkThread *)data;
+
+    qemu_mutex_lock(&t->lock);
+    qemu_cond_signal(&t->cond_start);
+    qemu_mutex_unlock(&t->lock);
+    while (t->state == THREAD_START) {
+        event_poll(&t->polltbl);
+    }
+    return NULL;
+}
+
+static void init_work_thread(void)
+{
+    EventHandler *thread_handler = g_malloc(sizeof(EventHandler));
+    WorkThread *t = &virt_net_thread;
+
+    qemu_mutex_init(&t->lock);
+    qemu_cond_init(&t->cond_start);
+    event_poll_init(&t->polltbl, 4);
+    event_notifier_init(&t->e, 0);
+    event_poll_add(&t->polltbl, thread_handler, &t->e, thread_cb);
+    qemu_mutex_lock(&t->lock);
+    t->state = THREAD_START;
+    qemu_thread_create(&t->thread, working_thread, t, QEMU_THREAD_JOINABLE);
+    qemu_cond_wait(&t->cond_start, &t->lock);
+    qemu_mutex_unlock(&t->lock);
+}
+
+void virtnet_dataplane_start(VirtIONet *n)
+{
+    bool rslt;
+    EventNotifier *tx_e = virtio_queue_get_host_notifier(n->tx_vq);
+    EventNotifier *rx_e = virtio_queue_get_host_notifier(n->rx_vq);
+    WorkThread *t = &virt_net_thread;
+
+    virtnet_dataplane_enable_notifiers(n);
+    rslt = vring_setup(n->dp->rx_vring, &n->vdev, 0);
+    if (!rslt) {
+        error_report("fail to setup rx vring\n");
+        exit(1);
+    }
+    vring_restore(n->dp->rx_vring,
+                    virtio_queue_get_last_avail_idx(&n->vdev, 0));
+    rslt = vring_setup(n->dp->tx_vring, &n->vdev, 1);
+    if (!rslt) {
+        error_report("fail to setup tx vring\n");
+        exit(1);
+    }
+    vring_restore(n->dp->tx_vring,
+            virtio_queue_get_last_avail_idx(&n->vdev, 1));
+    init_work_thread();
+
+    event_poll_add(&t->polltbl, n->dp->rx_handler, rx_e, rx_cb);
+    event_poll_add(&t->polltbl, n->dp->tx_handler, tx_e, tx_cb);
+}
+
+void virtnet_dataplane_stop(VirtIONet *n)
+{
+    EventNotifier *rx_e = virtio_queue_get_host_notifier(n->rx_vq);
+    EventNotifier *tx_e = virtio_queue_get_host_notifier(n->tx_vq);
+    WorkThread *t = &virt_net_thread;
+
+    event_poll_del_fd(&t->polltbl, event_notifier_get_fd(rx_e));
+    event_poll_del_fd(&t->polltbl, event_notifier_get_fd(tx_e));
+
+    t->state = THREAD_EXIT;
+    event_notifier_set(&t->e);
+    qemu_thread_join(&t->thread);
+    virtio_queue_set_last_avail_idx(&n->vdev, 0,
+            n->dp->rx_vring->last_avail_idx);
+    virtio_queue_set_last_avail_idx(&n->vdev, 1,
+            n->dp->tx_vring->last_avail_idx);
+    vring_teardown(n->dp->rx_vring);
+    vring_teardown(n->dp->tx_vring);
+    virtnet_dataplane_disable_notifiers(n);
+}
+
+void virtnet_dataplane_destroy(VirtIONet *n)
+{
+    virtnet_dataplane_stop(n);
+    g_free(n->dp->rx_vring);
+    g_free(n->dp->tx_vring);
+    g_free(n->dp->rx_handler);
+    g_free(n->dp->tx_handler);
+    g_free(n->dp);
+}
diff --git a/hw/dataplane/virtio-net.h b/hw/dataplane/virtio-net.h
new file mode 100644
index 0000000..e50b2de
--- /dev/null
+++ b/hw/dataplane/virtio-net.h
@@ -0,0 +1,26 @@ 
+/* Copyright IBM, Corp. 2013
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ */
+#ifndef VIRT_NET_DATAPLANE_H
+#define VIRT_NET_DATAPLANE_H
+
+#include "event-poll.h"
+#include "qemu/thread.h"
+#include "hw/virtio-net.h"
+
+typedef enum  { THREAD_START, THREAD_EXIT
+} WorkState;
+
+typedef struct WorkThread {
+    EventPoll polltbl;
+    QemuThread thread;
+    EventNotifier e;
+
+    WorkState state;
+    QemuMutex lock;
+    QemuCond cond_start;
+} WorkThread;
+
+extern WorkThread virt_net_thread;
+#endif
diff --git a/hw/virtio-net.c b/hw/virtio-net.c
index 5d03b31..6bf4a40 100644
--- a/hw/virtio-net.c
+++ b/hw/virtio-net.c
@@ -26,47 +26,6 @@ 
 #define MAC_TABLE_ENTRIES    64
 #define MAX_VLAN    (1 << 12)   /* Per 802.1Q definition */
 
-typedef struct VirtIONet
-{
-    VirtIODevice vdev;
-    uint8_t mac[ETH_ALEN];
-    uint16_t status;
-    VirtQueue *rx_vq;
-    VirtQueue *tx_vq;
-    VirtQueue *ctrl_vq;
-    NICState *nic;
-    QEMUTimer *tx_timer;
-    QEMUBH *tx_bh;
-    uint32_t tx_timeout;
-    int32_t tx_burst;
-    int tx_waiting;
-    uint32_t has_vnet_hdr;
-    size_t host_hdr_len;
-    size_t guest_hdr_len;
-    uint8_t has_ufo;
-    struct {
-        VirtQueueElement elem;
-        ssize_t len;
-    } async_tx;
-    int mergeable_rx_bufs;
-    uint8_t promisc;
-    uint8_t allmulti;
-    uint8_t alluni;
-    uint8_t nomulti;
-    uint8_t nouni;
-    uint8_t nobcast;
-    uint8_t vhost_started;
-    struct {
-        int in_use;
-        int first_multi;
-        uint8_t multi_overflow;
-        uint8_t uni_overflow;
-        uint8_t *macs;
-    } mac_table;
-    uint32_t *vlans;
-    DeviceState *qdev;
-} VirtIONet;
-
 /* TODO
  * - we could suppress RX interrupt if we were so inclined.
  */
@@ -165,7 +124,7 @@  static void virtio_net_set_status(struct VirtIODevice *vdev, uint8_t status)
     }
 }
 
-static void virtio_net_set_link_status(NetClientState *nc)
+void virtio_net_set_link_status(NetClientState *nc)
 {
     VirtIONet *n = DO_UPCAST(NICState, nc, nc)->opaque;
     uint16_t old_status = n->status;
@@ -528,8 +487,8 @@  static void work_around_broken_dhclient(struct virtio_net_hdr *hdr,
     }
 }
 
-static void receive_header(VirtIONet *n, const struct iovec *iov, int iov_cnt,
-                           const void *buf, size_t size)
+void virtio_net_receive_header(VirtIONet *n, const struct iovec *iov,
+                           int iov_cnt, const void *buf, size_t size)
 {
     if (n->has_vnet_hdr) {
         /* FIXME this cast is evil */
@@ -546,7 +505,7 @@  static void receive_header(VirtIONet *n, const struct iovec *iov, int iov_cnt,
     }
 }
 
-static int receive_filter(VirtIONet *n, const uint8_t *buf, int size)
+int virtio_net_receive_filter(VirtIONet *n, const uint8_t *buf, int size)
 {
     static const uint8_t bcast[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
     static const uint8_t vlan[] = {0x81, 0x00};
@@ -612,8 +571,9 @@  static ssize_t virtio_net_receive(NetClientState *nc, const uint8_t *buf, size_t
     if (!virtio_net_has_buffers(n, size + n->guest_hdr_len - n->host_hdr_len))
         return 0;
 
-    if (!receive_filter(n, buf, size))
+    if (!virtio_net_receive_filter(n, buf, size)) {
         return size;
+    }
 
     offset = i = 0;
 
@@ -649,7 +609,7 @@  static ssize_t virtio_net_receive(NetClientState *nc, const uint8_t *buf, size_t
                                     sizeof(mhdr.num_buffers));
             }
 
-            receive_header(n, sg, elem.in_num, buf, size);
+            virtio_net_receive_header(n, sg, elem.in_num, buf, size);
             offset = n->host_hdr_len;
             total += n->guest_hdr_len;
             guest_offset = n->guest_hdr_len;
@@ -994,7 +954,7 @@  static int virtio_net_load(QEMUFile *f, void *opaque, int version_id)
     return 0;
 }
 
-static void virtio_net_cleanup(NetClientState *nc)
+void virtio_net_cleanup(NetClientState *nc)
 {
     VirtIONet *n = DO_UPCAST(NICState, nc, nc)->opaque;
 
diff --git a/hw/virtio-net.h b/hw/virtio-net.h
index d46fb98..ed91a02 100644
--- a/hw/virtio-net.h
+++ b/hw/virtio-net.h
@@ -159,4 +159,65 @@  struct virtio_net_ctrl_mac {
         DEFINE_PROP_BIT("ctrl_rx", _state, _field, VIRTIO_NET_F_CTRL_RX, true), \
         DEFINE_PROP_BIT("ctrl_vlan", _state, _field, VIRTIO_NET_F_CTRL_VLAN, true), \
         DEFINE_PROP_BIT("ctrl_rx_extra", _state, _field, VIRTIO_NET_F_CTRL_RX_EXTRA, true)
+
+
+#ifdef CONFIG_VIRTIO_NET_DATA_PLANE
+struct VirtIONetDataPlane;
+#endif
+
+typedef struct VirtIONet {
+    VirtIODevice vdev;
+    uint8_t mac[ETH_ALEN];
+    uint16_t status;
+    VirtQueue *rx_vq;
+    VirtQueue *tx_vq;
+    VirtQueue *ctrl_vq;
+    NICState *nic;
+    QEMUTimer *tx_timer;
+    QEMUBH *tx_bh;
+    uint32_t tx_timeout;
+    int32_t tx_burst;
+    int tx_waiting;
+    uint32_t has_vnet_hdr;
+    size_t host_hdr_len;
+    size_t guest_hdr_len;
+    uint8_t has_ufo;
+    struct {
+        VirtQueueElement elem;
+        ssize_t len;
+    } async_tx;
+    int mergeable_rx_bufs;
+    uint8_t promisc;
+    uint8_t allmulti;
+    uint8_t alluni;
+    uint8_t nomulti;
+    uint8_t nouni;
+    uint8_t nobcast;
+    uint8_t vhost_started;
+    struct {
+        int in_use;
+        int first_multi;
+        uint8_t multi_overflow;
+        uint8_t uni_overflow;
+        uint8_t *macs;
+    } mac_table;
+    uint32_t *vlans;
+
+#ifdef CONFIG_VIRTIO_NET_DATA_PLANE
+    struct VirtIONetDataPlane *dp;
+    bool dp_start;
+#endif
+    DeviceState *qdev;
+} VirtIONet;
+
+int virtio_net_receive_filter(VirtIONet *n, const uint8_t *buf, int size);
+void virtio_net_receive_header(VirtIONet *n, const struct iovec *iov,
+                    int iov_cnt, const void *buf, size_t size);
+void virtio_net_set_link_status(NetClientState *nc);
+void virtio_net_cleanup(NetClientState *nc);
+
+#ifdef CONFIG_VIRTIO_NET_DATA_PLANE
+void virtnet_dataplane_create(VirtIONet *n);
+#endif
+
 #endif

[6/9] virtio net: introduce dataplane for virtio net

Commit Message

Comments

Patch