diff mbox

[10/12] virtio-net: multiqueue support

Message ID 1356690724-37891-11-git-send-email-jasowang@redhat.com
State New
Headers show

Commit Message

Jason Wang Dec. 28, 2012, 10:32 a.m. UTC
This patch implements both userspace and vhost support for multiple queue
virtio-net (VIRTIO_NET_F_MQ). This is done by introducing an array of
VirtIONetQueue to VirtIONet.

Signed-off-by: Jason Wang <jasowang@redhat.com>
---
 hw/virtio-net.c |  318 ++++++++++++++++++++++++++++++++++++++++++-------------
 hw/virtio-net.h |   27 +++++-
 2 files changed, 271 insertions(+), 74 deletions(-)

Comments

Blue Swirl Dec. 28, 2012, 5:52 p.m. UTC | #1
On Fri, Dec 28, 2012 at 10:32 AM, Jason Wang <jasowang@redhat.com> wrote:
> This patch implements both userspace and vhost support for multiple queue
> virtio-net (VIRTIO_NET_F_MQ). This is done by introducing an array of
> VirtIONetQueue to VirtIONet.
>
> Signed-off-by: Jason Wang <jasowang@redhat.com>
> ---
>  hw/virtio-net.c |  318 ++++++++++++++++++++++++++++++++++++++++++-------------
>  hw/virtio-net.h |   27 +++++-
>  2 files changed, 271 insertions(+), 74 deletions(-)
>
> diff --git a/hw/virtio-net.c b/hw/virtio-net.c
> index c6f0915..aaeef1b 100644
> --- a/hw/virtio-net.c
> +++ b/hw/virtio-net.c
> @@ -45,7 +45,7 @@ typedef struct VirtIONet
>      VirtIODevice vdev;
>      uint8_t mac[ETH_ALEN];
>      uint16_t status;
> -    VirtIONetQueue vq;
> +    VirtIONetQueue vqs[MAX_QUEUE_NUM];
>      VirtQueue *ctrl_vq;
>      NICState *nic;
>      uint32_t tx_timeout;
> @@ -70,14 +70,23 @@ typedef struct VirtIONet
>      } mac_table;
>      uint32_t *vlans;
>      DeviceState *qdev;
> +    int multiqueue;
> +    uint16_t max_queues;
> +    uint16_t curr_queues;
>  } VirtIONet;
>
> -static VirtIONetQueue *virtio_net_get_queue(NetClientState *nc)
> +static VirtIONetQueue *virtio_net_get_subqueue(NetClientState *nc)
>  {
>      VirtIONet *n = qemu_get_nic_opaque(nc);
>
> -    return &n->vq;
> +    return &n->vqs[nc->queue_index];
>  }
> +
> +static int vq2q(int queue_index)
> +{
> +    return queue_index / 2;
> +}
> +
>  /* TODO
>   * - we could suppress RX interrupt if we were so inclined.
>   */
> @@ -93,6 +102,7 @@ static void virtio_net_get_config(VirtIODevice *vdev, uint8_t *config)
>      struct virtio_net_config netcfg;
>
>      stw_p(&netcfg.status, n->status);
> +    stw_p(&netcfg.max_virtqueue_pairs, n->max_queues);
>      memcpy(netcfg.mac, n->mac, ETH_ALEN);
>      memcpy(config, &netcfg, sizeof(netcfg));
>  }
> @@ -116,31 +126,33 @@ static bool virtio_net_started(VirtIONet *n, uint8_t status)
>          (n->status & VIRTIO_NET_S_LINK_UP) && n->vdev.vm_running;
>  }
>
> -static void virtio_net_vhost_status(VirtIONet *n, uint8_t status)
> +static void virtio_net_vhost_status(VirtIONet *n, int queue_index,
> +                                    uint8_t status)
>  {
> -    VirtIONetQueue *q = &n->vq;
> +    NetClientState *nc = qemu_get_subqueue(n->nic, queue_index);
> +    VirtIONetQueue *q = &n->vqs[queue_index];
>
> -    if (!qemu_get_queue(n->nic)->peer) {
> +    if (!nc->peer) {
>          return;
>      }
> -    if (qemu_get_queue(n->nic)->peer->info->type != NET_CLIENT_OPTIONS_KIND_TAP) {
> +    if (nc->peer->info->type != NET_CLIENT_OPTIONS_KIND_TAP) {
>          return;
>      }
>
> -    if (!tap_get_vhost_net(qemu_get_queue(n->nic)->peer)) {
> +    if (!tap_get_vhost_net(nc->peer)) {
>          return;
>      }
> -    if (!!q->vhost_started == virtio_net_started(n, status) &&
> -                              !qemu_get_queue(n->nic)->peer->link_down) {
> +    if (!!q->vhost_started ==
> +        (virtio_net_started(n, status) && !nc->peer->link_down)) {
>          return;
>      }
>      if (!q->vhost_started) {
>          int r;
> -        if (!vhost_net_query(tap_get_vhost_net(qemu_get_queue(n->nic)->peer), &n->vdev)) {
> +        if (!vhost_net_query(tap_get_vhost_net(nc->peer), &n->vdev)) {
>              return;
>          }
> -        r = vhost_net_start(tap_get_vhost_net(qemu_get_queue(n->nic)->peer),
> -                            &n->vdev, 0);
> +        r = vhost_net_start(tap_get_vhost_net(nc->peer), &n->vdev,
> +                            queue_index * 2);
>          if (r < 0) {
>              error_report("unable to start vhost net: %d: "
>                           "falling back on userspace virtio", -r);
> @@ -148,7 +160,7 @@ static void virtio_net_vhost_status(VirtIONet *n, uint8_t status)
>              q->vhost_started = 1;
>          }
>      } else {
> -        vhost_net_stop(tap_get_vhost_net(qemu_get_queue(n->nic)->peer), &n->vdev);
> +        vhost_net_stop(tap_get_vhost_net(nc->peer), &n->vdev);
>          q->vhost_started = 0;
>      }
>  }
> @@ -156,26 +168,35 @@ static void virtio_net_vhost_status(VirtIONet *n, uint8_t status)
>  static void virtio_net_set_status(struct VirtIODevice *vdev, uint8_t status)
>  {
>      VirtIONet *n = to_virtio_net(vdev);
> -    VirtIONetQueue *q = &n->vq;
> +    int i;
>
> -    virtio_net_vhost_status(n, status);
> +    for (i = 0; i < n->max_queues; i++) {
> +        VirtIONetQueue *q = &n->vqs[i];
> +        uint8_t queue_status = status;
>
> -    if (!q->tx_waiting) {
> -        return;
> -    }
> +        if ((!n->multiqueue && i != 0) || i >= n->curr_queues) {
> +            queue_status = 0;
> +        }
>
> -    if (virtio_net_started(n, status) && !q->vhost_started) {
> -        if (q->tx_timer) {
> -            qemu_mod_timer(q->tx_timer,
> -                           qemu_get_clock_ns(vm_clock) + n->tx_timeout);
> -        } else {
> -            qemu_bh_schedule(q->tx_bh);
> +        virtio_net_vhost_status(n, i, queue_status);
> +
> +        if (!q->tx_waiting) {
> +            continue;
>          }
> -    } else {
> -        if (q->tx_timer) {
> -            qemu_del_timer(q->tx_timer);
> +
> +        if (virtio_net_started(n, status) && !q->vhost_started) {
> +            if (q->tx_timer) {
> +                qemu_mod_timer(q->tx_timer,
> +                               qemu_get_clock_ns(vm_clock) + n->tx_timeout);
> +            } else {
> +                qemu_bh_schedule(q->tx_bh);
> +            }
>          } else {
> -            qemu_bh_cancel(q->tx_bh);
> +            if (q->tx_timer) {
> +                qemu_del_timer(q->tx_timer);
> +            } else {
> +                qemu_bh_cancel(q->tx_bh);
> +            }
>          }
>      }
>  }
> @@ -207,6 +228,8 @@ static void virtio_net_reset(VirtIODevice *vdev)
>      n->nomulti = 0;
>      n->nouni = 0;
>      n->nobcast = 0;
> +    /* multiqueue is disalbed by default */
> +    n->curr_queues = 1;
>
>      /* Flush any MAC and VLAN filter table state */
>      n->mac_table.in_use = 0;
> @@ -245,18 +268,72 @@ static int peer_has_ufo(VirtIONet *n)
>
>  static void virtio_net_set_mrg_rx_bufs(VirtIONet *n, int mergeable_rx_bufs)
>  {
> +    int i;
> +    NetClientState *nc;
> +
>      n->mergeable_rx_bufs = mergeable_rx_bufs;
>
>      n->guest_hdr_len = n->mergeable_rx_bufs ?
>          sizeof(struct virtio_net_hdr_mrg_rxbuf) : sizeof(struct virtio_net_hdr);
>
> -    if (peer_has_vnet_hdr(n) &&
> -        tap_has_vnet_hdr_len(qemu_get_queue(n->nic)->peer, n->guest_hdr_len)) {
> -        tap_set_vnet_hdr_len(qemu_get_queue(n->nic)->peer, n->guest_hdr_len);
> -        n->host_hdr_len = n->guest_hdr_len;
> +    for (i = 0; i < n->max_queues; i++) {
> +        nc = qemu_get_subqueue(n->nic, i);
> +
> +        if (peer_has_vnet_hdr(n) &&
> +            tap_has_vnet_hdr_len(nc->peer, n->guest_hdr_len)) {
> +            tap_set_vnet_hdr_len(nc->peer, n->guest_hdr_len);
> +            n->host_hdr_len = n->guest_hdr_len;
> +        }
>      }
>  }
>
> +static int peer_attach(VirtIONet *n, int index)
> +{
> +    NetClientState *nc = qemu_get_subqueue(n->nic, index);
> +    int ret;
> +
> +    if (!nc->peer) {
> +        ret = -1;
> +    } else if (nc->peer->info->type != NET_CLIENT_OPTIONS_KIND_TAP) {
> +        ret = -1;
> +    } else {
> +        ret = tap_attach(nc->peer);
> +    }
> +
> +    return ret;
> +}
> +
> +static int peer_detach(VirtIONet *n, int index)
> +{
> +    NetClientState *nc = qemu_get_subqueue(n->nic, index);
> +    int ret;
> +
> +    if (!nc->peer) {
> +        ret = -1;
> +    } else if (nc->peer->info->type !=  NET_CLIENT_OPTIONS_KIND_TAP) {
> +        ret = -1;
> +    } else {
> +        ret = tap_detach(nc->peer);
> +    }
> +
> +    return ret;
> +}
> +
> +static void virtio_net_set_queues(VirtIONet *n)
> +{
> +    int i;
> +
> +    for (i = 0; i < n->max_queues; i++) {
> +        if (i < n->curr_queues) {
> +            assert(!peer_attach(n, i));
> +        } else {
> +            assert(!peer_detach(n, i));
> +        }
> +    }
> +}
> +
> +static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue, int ctrl);
> +
>  static uint32_t virtio_net_get_features(VirtIODevice *vdev, uint32_t features)
>  {
>      VirtIONet *n = to_virtio_net(vdev);
> @@ -308,25 +385,33 @@ static uint32_t virtio_net_bad_features(VirtIODevice *vdev)
>  static void virtio_net_set_features(VirtIODevice *vdev, uint32_t features)
>  {
>      VirtIONet *n = to_virtio_net(vdev);
> +    int i;
> +
> +    virtio_net_set_multiqueue(n, !!(features & (1 << VIRTIO_NET_F_MQ)),
> +                              !!(features & (1 << VIRTIO_NET_F_CTRL_VQ)));
>
>      virtio_net_set_mrg_rx_bufs(n, !!(features & (1 << VIRTIO_NET_F_MRG_RXBUF)));
>
>      if (n->has_vnet_hdr) {
> -        tap_set_offload(qemu_get_queue(n->nic)->peer,
> +        tap_set_offload(qemu_get_subqueue(n->nic, 0)->peer,
>                          (features >> VIRTIO_NET_F_GUEST_CSUM) & 1,
>                          (features >> VIRTIO_NET_F_GUEST_TSO4) & 1,
>                          (features >> VIRTIO_NET_F_GUEST_TSO6) & 1,
>                          (features >> VIRTIO_NET_F_GUEST_ECN)  & 1,
>                          (features >> VIRTIO_NET_F_GUEST_UFO)  & 1);
>      }
> -    if (!qemu_get_queue(n->nic)->peer ||
> -        qemu_get_queue(n->nic)->peer->info->type != NET_CLIENT_OPTIONS_KIND_TAP) {
> -        return;
> -    }
> -    if (!tap_get_vhost_net(qemu_get_queue(n->nic)->peer)) {
> -        return;
> +
> +    for (i = 0;  i < n->max_queues; i++) {
> +        NetClientState *nc = qemu_get_subqueue(n->nic, i);
> +
> +        if (!nc->peer || nc->peer->info->type != NET_CLIENT_OPTIONS_KIND_TAP) {
> +            continue;
> +        }
> +        if (!tap_get_vhost_net(nc->peer)) {
> +            continue;
> +        }
> +        vhost_net_ack_features(tap_get_vhost_net(nc->peer), features);
>      }
> -    vhost_net_ack_features(tap_get_vhost_net(qemu_get_queue(n->nic)->peer), features);
>  }
>
>  static int virtio_net_handle_rx_mode(VirtIONet *n, uint8_t cmd,
> @@ -436,6 +521,35 @@ static int virtio_net_handle_vlan_table(VirtIONet *n, uint8_t cmd,
>      return VIRTIO_NET_OK;
>  }
>
> +static int virtio_net_handle_mq(VirtIONet *n, uint8_t cmd,
> +                                VirtQueueElement *elem)
> +{
> +    struct virtio_net_ctrl_mq s;
> +
> +    if (elem->out_num != 2 ||
> +        elem->out_sg[1].iov_len != sizeof(struct virtio_net_ctrl_mq)) {
> +        error_report("virtio-net ctrl invalid steering command");
> +        return VIRTIO_NET_ERR;
> +    }
> +
> +    if (cmd != VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET) {
> +        return VIRTIO_NET_ERR;
> +    }
> +
> +    memcpy(&s, elem->out_sg[1].iov_base, sizeof(struct virtio_net_ctrl_mq));
> +
> +    if (s.virtqueue_pairs < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN ||
> +        s.virtqueue_pairs > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX ||
> +        s.virtqueue_pairs > n->max_queues) {
> +        return VIRTIO_NET_ERR;
> +    }
> +
> +    n->curr_queues = s.virtqueue_pairs;
> +    virtio_net_set_queues(n);
> +    virtio_net_set_status(&n->vdev, n->vdev.status);
> +
> +    return VIRTIO_NET_OK;
> +}
>  static void virtio_net_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq)
>  {
>      VirtIONet *n = to_virtio_net(vdev);
> @@ -464,6 +578,8 @@ static void virtio_net_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq)
>              status = virtio_net_handle_mac(n, ctrl.cmd, &elem);
>          else if (ctrl.class == VIRTIO_NET_CTRL_VLAN)
>              status = virtio_net_handle_vlan_table(n, ctrl.cmd, &elem);
> +        else if (ctrl.class == VIRTIO_NET_CTRL_MQ)

Please add braces.

> +            status = virtio_net_handle_mq(n, ctrl.cmd, &elem);
>
>          stb_p(elem.in_sg[elem.in_num - 1].iov_base, status);
>
> @@ -477,19 +593,24 @@ static void virtio_net_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq)
>  static void virtio_net_handle_rx(VirtIODevice *vdev, VirtQueue *vq)
>  {
>      VirtIONet *n = to_virtio_net(vdev);
> +    int queue_index = vq2q(virtio_get_queue_index(vq));
>
> -    qemu_flush_queued_packets(qemu_get_queue(n->nic));
> +    qemu_flush_queued_packets(qemu_get_subqueue(n->nic, queue_index));
>  }
>
>  static int virtio_net_can_receive(NetClientState *nc)
>  {
>      VirtIONet *n = qemu_get_nic_opaque(nc);
> -    VirtIONetQueue *q = virtio_net_get_queue(nc);
> +    VirtIONetQueue *q = virtio_net_get_subqueue(nc);
>
>      if (!n->vdev.vm_running) {
>          return 0;
>      }
>
> +    if (nc->queue_index >= n->curr_queues) {
> +        return 0;
> +    }
> +
>      if (!virtio_queue_ready(q->rx_vq) ||
>          !(n->vdev.status & VIRTIO_CONFIG_S_DRIVER_OK)) {
>          return 0;
> @@ -620,14 +741,15 @@ static int receive_filter(VirtIONet *n, const uint8_t *buf, int size)
>  static ssize_t virtio_net_receive(NetClientState *nc, const uint8_t *buf, size_t size)
>  {
>      VirtIONet *n = qemu_get_nic_opaque(nc);
> -    VirtIONetQueue *q = virtio_net_get_queue(nc);
> +    VirtIONetQueue *q = virtio_net_get_subqueue(nc);
>      struct iovec mhdr_sg[VIRTQUEUE_MAX_SIZE];
>      struct virtio_net_hdr_mrg_rxbuf mhdr;
>      unsigned mhdr_cnt = 0;
>      size_t offset, i, guest_offset;
>
> -    if (!virtio_net_can_receive(qemu_get_queue(n->nic)))
> +    if (!virtio_net_can_receive(nc)) {
>          return -1;
> +    }
>
>      /* hdr_len refers to the header we supply to the guest */
>      if (!virtio_net_has_buffers(q, size + n->guest_hdr_len - n->host_hdr_len)) {
> @@ -720,7 +842,7 @@ static int32_t virtio_net_flush_tx(VirtIONetQueue *q);
>  static void virtio_net_tx_complete(NetClientState *nc, ssize_t len)
>  {
>      VirtIONet *n = qemu_get_nic_opaque(nc);
> -    VirtIONetQueue *q = virtio_net_get_queue(nc);
> +    VirtIONetQueue *q = virtio_net_get_subqueue(nc);
>
>      virtqueue_push(q->tx_vq, &q->async_tx.elem, 0);
>      virtio_notify(&n->vdev, q->tx_vq);
> @@ -737,6 +859,7 @@ static int32_t virtio_net_flush_tx(VirtIONetQueue *q)
>      VirtIONet *n = q->n;
>      VirtQueueElement elem;
>      int32_t num_packets = 0;
> +    int queue_index = vq2q(virtio_get_queue_index(q->tx_vq));
>      if (!(n->vdev.status & VIRTIO_CONFIG_S_DRIVER_OK)) {
>          return num_packets;
>      }
> @@ -778,8 +901,8 @@ static int32_t virtio_net_flush_tx(VirtIONetQueue *q)
>
>          len = n->guest_hdr_len;
>
> -        ret = qemu_sendv_packet_async(qemu_get_queue(n->nic), out_sg, out_num,
> -                                      virtio_net_tx_complete);
> +        ret = qemu_sendv_packet_async(qemu_get_subqueue(n->nic, queue_index),
> +                                      out_sg, out_num, virtio_net_tx_complete);
>          if (ret == 0) {
>              virtio_queue_set_notification(q->tx_vq, 0);
>              q->async_tx.elem = elem;
> @@ -802,7 +925,7 @@ static int32_t virtio_net_flush_tx(VirtIONetQueue *q)
>  static void virtio_net_handle_tx_timer(VirtIODevice *vdev, VirtQueue *vq)
>  {
>      VirtIONet *n = to_virtio_net(vdev);
> -    VirtIONetQueue *q = &n->vq;
> +    VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))];
>
>      /* This happens when device was stopped but VCPU wasn't. */
>      if (!n->vdev.vm_running) {
> @@ -826,7 +949,7 @@ static void virtio_net_handle_tx_timer(VirtIODevice *vdev, VirtQueue *vq)
>  static void virtio_net_handle_tx_bh(VirtIODevice *vdev, VirtQueue *vq)
>  {
>      VirtIONet *n = to_virtio_net(vdev);
> -    VirtIONetQueue *q = &n->vq;
> +    VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))];
>
>      if (unlikely(q->tx_waiting)) {
>          return;
> @@ -894,10 +1017,49 @@ static void virtio_net_tx_bh(void *opaque)
>      }
>  }
>
> +static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue, int ctrl)
> +{
> +    VirtIODevice *vdev = &n->vdev;
> +    int i;
> +
> +    n->multiqueue = multiqueue;
> +
> +    if (!multiqueue)
> +        n->curr_queues = 1;

Ditto. Didn't checkpatch.pl catch these or did you not check?

> +
> +    for (i = 2; i <= n->max_queues * 2 + 1; i++) {
> +        virtio_del_queue(vdev, i);
> +    }
> +
> +    for (i = 1; i < n->max_queues; i++) {
> +        n->vqs[i].rx_vq = virtio_add_queue(vdev, 256, virtio_net_handle_rx);
> +        if (n->vqs[i].tx_timer) {
> +            n->vqs[i].tx_vq =
> +                virtio_add_queue(vdev, 256, virtio_net_handle_tx_timer);
> +            n->vqs[i].tx_timer = qemu_new_timer_ns(vm_clock,
> +                                                   virtio_net_tx_timer,
> +                                                   &n->vqs[i]);
> +        } else {
> +            n->vqs[i].tx_vq =
> +                virtio_add_queue(vdev, 256, virtio_net_handle_tx_bh);
> +            n->vqs[i].tx_bh = qemu_bh_new(virtio_net_tx_bh, &n->vqs[i]);
> +        }
> +
> +        n->vqs[i].tx_waiting = 0;
> +        n->vqs[i].n = n;
> +    }
> +
> +    if (ctrl) {
> +        n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl);
> +    }
> +
> +    virtio_net_set_queues(n);
> +}
> +
>  static void virtio_net_save(QEMUFile *f, void *opaque)
>  {
>      VirtIONet *n = opaque;
> -    VirtIONetQueue *q = &n->vq;
> +    VirtIONetQueue *q = &n->vqs[0];
>
>      /* At this point, backend must be stopped, otherwise
>       * it might keep writing to memory. */
> @@ -926,9 +1088,8 @@ static void virtio_net_save(QEMUFile *f, void *opaque)
>  static int virtio_net_load(QEMUFile *f, void *opaque, int version_id)
>  {
>      VirtIONet *n = opaque;
> -    VirtIONetQueue *q = &n->vq;
> -    int i;
> -    int ret;
> +    VirtIONetQueue *q = &n->vqs[0];
> +    int ret, i;
>
>      if (version_id < 2 || version_id > VIRTIO_NET_VM_VERSION)
>          return -EINVAL;
> @@ -1044,6 +1205,7 @@ VirtIODevice *virtio_net_init(DeviceState *dev, NICConf *conf,
>                                virtio_net_conf *net)
>  {
>      VirtIONet *n;
> +    int i;
>
>      n = (VirtIONet *)virtio_common_init("virtio-net", VIRTIO_ID_NET,
>                                          sizeof(struct virtio_net_config),
> @@ -1056,8 +1218,11 @@ VirtIODevice *virtio_net_init(DeviceState *dev, NICConf *conf,
>      n->vdev.bad_features = virtio_net_bad_features;
>      n->vdev.reset = virtio_net_reset;
>      n->vdev.set_status = virtio_net_set_status;
> -    n->vq.rx_vq = virtio_add_queue(&n->vdev, 256, virtio_net_handle_rx);
> -    n->vq.n = n;
> +    n->vqs[0].rx_vq = virtio_add_queue(&n->vdev, 256, virtio_net_handle_rx);
> +    n->max_queues = conf->queues;
> +    n->curr_queues = 1;
> +    n->vqs[0].n = n;
> +    n->tx_timeout = net->txtimer;
>
>      if (net->tx && strcmp(net->tx, "timer") && strcmp(net->tx, "bh")) {
>          error_report("virtio-net: "
> @@ -1067,14 +1232,14 @@ VirtIODevice *virtio_net_init(DeviceState *dev, NICConf *conf,
>      }
>
>      if (net->tx && !strcmp(net->tx, "timer")) {
> -        n->vq.tx_vq = virtio_add_queue(&n->vdev, 256,
> -                                       virtio_net_handle_tx_timer);
> -        n->vq.tx_timer = qemu_new_timer_ns(vm_clock,
> -                                           virtio_net_tx_timer, &n->vq);
> -        n->tx_timeout = net->txtimer;
> +        n->vqs[0].tx_vq = virtio_add_queue(&n->vdev, 256,
> +                                           virtio_net_handle_tx_timer);
> +        n->vqs[0].tx_timer = qemu_new_timer_ns(vm_clock, virtio_net_tx_timer,
> +                                               &n->vqs[0]);
>      } else {
> -        n->vq.tx_vq = virtio_add_queue(&n->vdev, 256, virtio_net_handle_tx_bh);
> -        n->vq.tx_bh = qemu_bh_new(virtio_net_tx_bh, &n->vq);
> +        n->vqs[0].tx_vq = virtio_add_queue(&n->vdev, 256,
> +                                           virtio_net_handle_tx_bh);
> +        n->vqs[0].tx_bh = qemu_bh_new(virtio_net_tx_bh, &n->vqs[0]);
>      }
>      n->ctrl_vq = virtio_add_queue(&n->vdev, 64, virtio_net_handle_ctrl);
>      qemu_macaddr_default_if_unset(&conf->macaddr);
> @@ -1084,7 +1249,9 @@ VirtIODevice *virtio_net_init(DeviceState *dev, NICConf *conf,
>      n->nic = qemu_new_nic(&net_virtio_info, conf, object_get_typename(OBJECT(dev)), dev->id, n);
>      peer_test_vnet_hdr(n);
>      if (peer_has_vnet_hdr(n)) {
> -        tap_using_vnet_hdr(qemu_get_queue(n->nic)->peer, 1);
> +        for (i = 0; i < n->max_queues; i++) {
> +            tap_using_vnet_hdr(qemu_get_subqueue(n->nic, i)->peer, 1);
> +        }
>          n->host_hdr_len = sizeof(struct virtio_net_hdr);
>      } else {
>          n->host_hdr_len = 0;
> @@ -1092,7 +1259,7 @@ VirtIODevice *virtio_net_init(DeviceState *dev, NICConf *conf,
>
>      qemu_format_nic_info_str(qemu_get_queue(n->nic), conf->macaddr.a);
>
> -    n->vq.tx_waiting = 0;
> +    n->vqs[0].tx_waiting = 0;
>      n->tx_burst = net->txburst;
>      virtio_net_set_mrg_rx_bufs(n, 0);
>      n->promisc = 1; /* for compatibility */
> @@ -1113,23 +1280,28 @@ VirtIODevice *virtio_net_init(DeviceState *dev, NICConf *conf,
>  void virtio_net_exit(VirtIODevice *vdev)
>  {
>      VirtIONet *n = DO_UPCAST(VirtIONet, vdev, vdev);
> -    VirtIONetQueue *q = &n->vq;
> +    int i;
>
>      /* This will stop vhost backend if appropriate. */
>      virtio_net_set_status(vdev, 0);
>
> -    qemu_purge_queued_packets(qemu_get_queue(n->nic));
> -
>      unregister_savevm(n->qdev, "virtio-net", n);
>
>      g_free(n->mac_table.macs);
>      g_free(n->vlans);
>
> -    if (q->tx_timer) {
> -        qemu_del_timer(q->tx_timer);
> -        qemu_free_timer(q->tx_timer);
> -    } else {
> -        qemu_bh_delete(q->tx_bh);
> +    for (i = 0; i < n->max_queues; i++) {
> +        VirtIONetQueue *q = &n->vqs[i];
> +        NetClientState *nc = qemu_get_subqueue(n->nic, i);
> +
> +        qemu_purge_queued_packets(nc);
> +
> +        if (q->tx_timer) {
> +            qemu_del_timer(q->tx_timer);
> +            qemu_free_timer(q->tx_timer);
> +        } else {
> +            qemu_bh_delete(q->tx_bh);
> +        }
>      }
>
>      qemu_del_nic(n->nic);
> diff --git a/hw/virtio-net.h b/hw/virtio-net.h
> index 36aa463..bc5857a 100644
> --- a/hw/virtio-net.h
> +++ b/hw/virtio-net.h
> @@ -44,6 +44,8 @@
>  #define VIRTIO_NET_F_CTRL_RX    18      /* Control channel RX mode support */
>  #define VIRTIO_NET_F_CTRL_VLAN  19      /* Control channel VLAN filtering */
>  #define VIRTIO_NET_F_CTRL_RX_EXTRA 20   /* Extra RX mode control support */
> +#define VIRTIO_NET_F_MQ         22      /* Device supports Receive Flow
> +                                         * Steering */
>
>  #define VIRTIO_NET_S_LINK_UP    1       /* Link is up */
>
> @@ -72,6 +74,8 @@ struct virtio_net_config
>      uint8_t mac[ETH_ALEN];
>      /* See VIRTIO_NET_F_STATUS and VIRTIO_NET_S_* above */
>      uint16_t status;
> +    /* Max virtqueue pairs supported by the device */
> +    uint16_t max_virtqueue_pairs;
>  } QEMU_PACKED;
>
>  /* This is the first element of the scatter-gather list.  If you don't
> @@ -168,6 +172,26 @@ struct virtio_net_ctrl_mac {
>   #define VIRTIO_NET_CTRL_VLAN_ADD             0
>   #define VIRTIO_NET_CTRL_VLAN_DEL             1
>
> +/*
> + * Control Multiqueue
> + *
> + * The command VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET
> + * enables multiqueue, specifying the number of the transmit and
> + * receive queues that will be used. After the command is consumed and acked by
> + * the device, the device will not steer new packets on receive virtqueues
> + * other than specified nor read from transmit virtqueues other than specified.
> + * Accordingly, driver should not transmit new packets  on virtqueues other than
> + * specified.
> + */
> +struct virtio_net_ctrl_mq {

VirtIONetCtrlMQ and please don't forget the typedef.

> +    uint16_t virtqueue_pairs;
> +};
> +
> +#define VIRTIO_NET_CTRL_MQ   4
> + #define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET        0
> + #define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN        1
> + #define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX        0x8000
> +
>  #define DEFINE_VIRTIO_NET_FEATURES(_state, _field) \
>          DEFINE_VIRTIO_COMMON_FEATURES(_state, _field), \
>          DEFINE_PROP_BIT("csum", _state, _field, VIRTIO_NET_F_CSUM, true), \
> @@ -186,5 +210,6 @@ struct virtio_net_ctrl_mac {
>          DEFINE_PROP_BIT("ctrl_vq", _state, _field, VIRTIO_NET_F_CTRL_VQ, true), \
>          DEFINE_PROP_BIT("ctrl_rx", _state, _field, VIRTIO_NET_F_CTRL_RX, true), \
>          DEFINE_PROP_BIT("ctrl_vlan", _state, _field, VIRTIO_NET_F_CTRL_VLAN, true), \
> -        DEFINE_PROP_BIT("ctrl_rx_extra", _state, _field, VIRTIO_NET_F_CTRL_RX_EXTRA, true)
> +        DEFINE_PROP_BIT("ctrl_rx_extra", _state, _field, VIRTIO_NET_F_CTRL_RX_EXTRA, true), \
> +        DEFINE_PROP_BIT("mq", _state, _field, VIRTIO_NET_F_MQ, true)
>  #endif
> --
> 1.7.1
>
>
Jason Wang Jan. 4, 2013, 5:12 a.m. UTC | #2
On 12/29/2012 01:52 AM, Blue Swirl wrote:
> On Fri, Dec 28, 2012 at 10:32 AM, Jason Wang <jasowang@redhat.com> wrote:
>> This patch implements both userspace and vhost support for multiple queue
>> virtio-net (VIRTIO_NET_F_MQ). This is done by introducing an array of
>> VirtIONetQueue to VirtIONet.
>>
>> Signed-off-by: Jason Wang <jasowang@redhat.com>
>> ---
>>  hw/virtio-net.c |  318 ++++++++++++++++++++++++++++++++++++++++++-------------
>>  hw/virtio-net.h |   27 +++++-
>>  2 files changed, 271 insertions(+), 74 deletions(-)
[...]
>>  static void virtio_net_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq)
>>  {
>>      VirtIONet *n = to_virtio_net(vdev);
>> @@ -464,6 +578,8 @@ static void virtio_net_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq)
>>              status = virtio_net_handle_mac(n, ctrl.cmd, &elem);
>>          else if (ctrl.class == VIRTIO_NET_CTRL_VLAN)
>>              status = virtio_net_handle_vlan_table(n, ctrl.cmd, &elem);
>> +        else if (ctrl.class == VIRTIO_NET_CTRL_MQ)
> Please add braces.

Sure.
>
>> +            status = virtio_net_handle_mq(n, ctrl.cmd, &elem);
>>
>>          stb_p(elem.in_sg[elem.in_num - 1].iov_base, status);
>>
>> @@ -477,19 +593,24 @@ static void virtio_net_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq)
>>  static void virtio_net_handle_rx(VirtIODevice *vdev, VirtQueue *vq)
>>  {
>>      VirtIONet *n = to_virtio_net(vdev);
>> +    int queue_index = vq2q(virtio_get_queue_index(vq));
>>
>> -    qemu_flush_queued_packets(qemu_get_queue(n->nic));
>> +    qemu_flush_queued_packets(qemu_get_subqueue(n->nic, queue_index));
>>  }
>>
>>  
[...]
>>
>> +static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue, int ctrl)
>> +{
>> +    VirtIODevice *vdev = &n->vdev;
>> +    int i;
>> +
>> +    n->multiqueue = multiqueue;
>> +
>> +    if (!multiqueue)
>> +        n->curr_queues = 1;
> Ditto. Didn't checkpatch.pl catch these or did you not check?

Sorry, will add braces here. I run checkpatch.pl but finally find that
some or lots of the existed codes (such as this file) does not obey the
rules. So I'm not sure whether I need to correct my own codes, or left
them as this file does and correct them all in the future.
>
[...]
>>  } QEMU_PACKED;
>>
>>  /* This is the first element of the scatter-gather list.  If you don't
>> @@ -168,6 +172,26 @@ struct virtio_net_ctrl_mac {
>>   #define VIRTIO_NET_CTRL_VLAN_ADD             0
>>   #define VIRTIO_NET_CTRL_VLAN_DEL             1
>>
>> +/*
>> + * Control Multiqueue
>> + *
>> + * The command VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET
>> + * enables multiqueue, specifying the number of the transmit and
>> + * receive queues that will be used. After the command is consumed and acked by
>> + * the device, the device will not steer new packets on receive virtqueues
>> + * other than specified nor read from transmit virtqueues other than specified.
>> + * Accordingly, driver should not transmit new packets  on virtqueues other than
>> + * specified.
>> + */
>> +struct virtio_net_ctrl_mq {
> VirtIONetCtrlMQ and please don't forget the typedef.

Sure, but the same question as above. (See other structures in this file).
>
>> +    uint16_t virtqueue_pairs;
>> +};
>> +
>> +#define VIRTIO_NET_CTRL_MQ   4
>> + #define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET        0
>> + #define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN        1
>> + #define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX        0x8000
>> +
>>  #define DEFINE_VIRTIO_NET_FEATURES(_state, _field) \
>>          DEFINE_VIRTIO_COMMON_FEATURES(_state, _field), \
>>          DEFINE_PROP_BIT("csum", _state, _field, VIRTIO_NET_F_CSUM, true), \
>> @@ -186,5 +210,6 @@ struct virtio_net_ctrl_mac {
>>          DEFINE_PROP_BIT("ctrl_vq", _state, _field, VIRTIO_NET_F_CTRL_VQ, true), \
>>          DEFINE_PROP_BIT("ctrl_rx", _state, _field, VIRTIO_NET_F_CTRL_RX, true), \
>>          DEFINE_PROP_BIT("ctrl_vlan", _state, _field, VIRTIO_NET_F_CTRL_VLAN, true), \
>> -        DEFINE_PROP_BIT("ctrl_rx_extra", _state, _field, VIRTIO_NET_F_CTRL_RX_EXTRA, true)
>> +        DEFINE_PROP_BIT("ctrl_rx_extra", _state, _field, VIRTIO_NET_F_CTRL_RX_EXTRA, true), \
>> +        DEFINE_PROP_BIT("mq", _state, _field, VIRTIO_NET_F_MQ, true)
>>  #endif
>> --
>> 1.7.1
>>
>>
Blue Swirl Jan. 4, 2013, 8:41 p.m. UTC | #3
On Fri, Jan 4, 2013 at 5:12 AM, Jason Wang <jasowang@redhat.com> wrote:
> On 12/29/2012 01:52 AM, Blue Swirl wrote:
>> On Fri, Dec 28, 2012 at 10:32 AM, Jason Wang <jasowang@redhat.com> wrote:
>>> This patch implements both userspace and vhost support for multiple queue
>>> virtio-net (VIRTIO_NET_F_MQ). This is done by introducing an array of
>>> VirtIONetQueue to VirtIONet.
>>>
>>> Signed-off-by: Jason Wang <jasowang@redhat.com>
>>> ---
>>>  hw/virtio-net.c |  318 ++++++++++++++++++++++++++++++++++++++++++-------------
>>>  hw/virtio-net.h |   27 +++++-
>>>  2 files changed, 271 insertions(+), 74 deletions(-)
> [...]
>>>  static void virtio_net_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq)
>>>  {
>>>      VirtIONet *n = to_virtio_net(vdev);
>>> @@ -464,6 +578,8 @@ static void virtio_net_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq)
>>>              status = virtio_net_handle_mac(n, ctrl.cmd, &elem);
>>>          else if (ctrl.class == VIRTIO_NET_CTRL_VLAN)
>>>              status = virtio_net_handle_vlan_table(n, ctrl.cmd, &elem);
>>> +        else if (ctrl.class == VIRTIO_NET_CTRL_MQ)
>> Please add braces.
>
> Sure.
>>
>>> +            status = virtio_net_handle_mq(n, ctrl.cmd, &elem);
>>>
>>>          stb_p(elem.in_sg[elem.in_num - 1].iov_base, status);
>>>
>>> @@ -477,19 +593,24 @@ static void virtio_net_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq)
>>>  static void virtio_net_handle_rx(VirtIODevice *vdev, VirtQueue *vq)
>>>  {
>>>      VirtIONet *n = to_virtio_net(vdev);
>>> +    int queue_index = vq2q(virtio_get_queue_index(vq));
>>>
>>> -    qemu_flush_queued_packets(qemu_get_queue(n->nic));
>>> +    qemu_flush_queued_packets(qemu_get_subqueue(n->nic, queue_index));
>>>  }
>>>
>>>
> [...]
>>>
>>> +static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue, int ctrl)
>>> +{
>>> +    VirtIODevice *vdev = &n->vdev;
>>> +    int i;
>>> +
>>> +    n->multiqueue = multiqueue;
>>> +
>>> +    if (!multiqueue)
>>> +        n->curr_queues = 1;
>> Ditto. Didn't checkpatch.pl catch these or did you not check?
>
> Sorry, will add braces here. I run checkpatch.pl but finally find that
> some or lots of the existed codes (such as this file) does not obey the
> rules. So I'm not sure whether I need to correct my own codes, or left
> them as this file does and correct them all in the future.

The goal is to make QEMU codebase conform to CODING_STYLE. Currently
this is not the case for some amounts of code, but we should use
opportunities like this to advance towards that goal.

>>
> [...]
>>>  } QEMU_PACKED;
>>>
>>>  /* This is the first element of the scatter-gather list.  If you don't
>>> @@ -168,6 +172,26 @@ struct virtio_net_ctrl_mac {
>>>   #define VIRTIO_NET_CTRL_VLAN_ADD             0
>>>   #define VIRTIO_NET_CTRL_VLAN_DEL             1
>>>
>>> +/*
>>> + * Control Multiqueue
>>> + *
>>> + * The command VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET
>>> + * enables multiqueue, specifying the number of the transmit and
>>> + * receive queues that will be used. After the command is consumed and acked by
>>> + * the device, the device will not steer new packets on receive virtqueues
>>> + * other than specified nor read from transmit virtqueues other than specified.
>>> + * Accordingly, driver should not transmit new packets  on virtqueues other than
>>> + * specified.
>>> + */
>>> +struct virtio_net_ctrl_mq {
>> VirtIONetCtrlMQ and please don't forget the typedef.
>
> Sure, but the same question as above. (See other structures in this file).
>>
>>> +    uint16_t virtqueue_pairs;
>>> +};
>>> +
>>> +#define VIRTIO_NET_CTRL_MQ   4
>>> + #define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET        0
>>> + #define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN        1
>>> + #define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX        0x8000
>>> +
>>>  #define DEFINE_VIRTIO_NET_FEATURES(_state, _field) \
>>>          DEFINE_VIRTIO_COMMON_FEATURES(_state, _field), \
>>>          DEFINE_PROP_BIT("csum", _state, _field, VIRTIO_NET_F_CSUM, true), \
>>> @@ -186,5 +210,6 @@ struct virtio_net_ctrl_mac {
>>>          DEFINE_PROP_BIT("ctrl_vq", _state, _field, VIRTIO_NET_F_CTRL_VQ, true), \
>>>          DEFINE_PROP_BIT("ctrl_rx", _state, _field, VIRTIO_NET_F_CTRL_RX, true), \
>>>          DEFINE_PROP_BIT("ctrl_vlan", _state, _field, VIRTIO_NET_F_CTRL_VLAN, true), \
>>> -        DEFINE_PROP_BIT("ctrl_rx_extra", _state, _field, VIRTIO_NET_F_CTRL_RX_EXTRA, true)
>>> +        DEFINE_PROP_BIT("ctrl_rx_extra", _state, _field, VIRTIO_NET_F_CTRL_RX_EXTRA, true), \
>>> +        DEFINE_PROP_BIT("mq", _state, _field, VIRTIO_NET_F_MQ, true)
>>>  #endif
>>> --
>>> 1.7.1
>>>
>>>
>
Wanlong Gao Jan. 8, 2013, 9:07 a.m. UTC | #4
On 12/28/2012 06:32 PM, Jason Wang wrote:
> +    } else if (nc->peer->info->type !=  NET_CLIENT_OPTIONS_KIND_TAP) {
> +        ret = -1;
> +    } else {
> +        ret = tap_detach(nc->peer);
> +    }
> +
> +    return ret;
> +}
> +
> +static void virtio_net_set_queues(VirtIONet *n)
> +{
> +    int i;
> +
> +    for (i = 0; i < n->max_queues; i++) {
> +        if (i < n->curr_queues) {
> +            assert(!peer_attach(n, i));
> +        } else {
> +            assert(!peer_detach(n, i));

I got a assert here,
qemu-system-x86_64: /work/git/qemu/hw/virtio-net.c:330: virtio_net_set_queues: Assertion `!peer_detach(n, i)' failed.

Any thoughts?

Thanks,
Wanlong Gao

> +        }
> +    }
> +}
> +
> +static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue, int ctrl);
> +
Jason Wang Jan. 8, 2013, 9:29 a.m. UTC | #5
On 01/08/2013 05:07 PM, Wanlong Gao wrote:
> On 12/28/2012 06:32 PM, Jason Wang wrote:
>> +    } else if (nc->peer->info->type !=  NET_CLIENT_OPTIONS_KIND_TAP) {
>> +        ret = -1;
>> +    } else {
>> +        ret = tap_detach(nc->peer);
>> +    }
>> +
>> +    return ret;
>> +}
>> +
>> +static void virtio_net_set_queues(VirtIONet *n)
>> +{
>> +    int i;
>> +
>> +    for (i = 0; i < n->max_queues; i++) {
>> +        if (i < n->curr_queues) {
>> +            assert(!peer_attach(n, i));
>> +        } else {
>> +            assert(!peer_detach(n, i));
> I got a assert here,
> qemu-system-x86_64: /work/git/qemu/hw/virtio-net.c:330: virtio_net_set_queues: Assertion `!peer_detach(n, i)' failed.
>
> Any thoughts?
>
> Thanks,
> Wanlong Gao

Thanks for the testing, which steps or cases did you met this assertion,
migration, reboot or just changing the number of virtqueues?

>> +        }
>> +    }
>> +}
>> +
>> +static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue, int ctrl);
>> +
> --
> To unsubscribe from this list: send the line "unsubscribe kvm" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
Wanlong Gao Jan. 8, 2013, 9:32 a.m. UTC | #6
On 01/08/2013 05:29 PM, Jason Wang wrote:
> On 01/08/2013 05:07 PM, Wanlong Gao wrote:
>> On 12/28/2012 06:32 PM, Jason Wang wrote:
>>> +    } else if (nc->peer->info->type !=  NET_CLIENT_OPTIONS_KIND_TAP) {
>>> +        ret = -1;
>>> +    } else {
>>> +        ret = tap_detach(nc->peer);
>>> +    }
>>> +
>>> +    return ret;
>>> +}
>>> +
>>> +static void virtio_net_set_queues(VirtIONet *n)
>>> +{
>>> +    int i;
>>> +
>>> +    for (i = 0; i < n->max_queues; i++) {
>>> +        if (i < n->curr_queues) {
>>> +            assert(!peer_attach(n, i));
>>> +        } else {
>>> +            assert(!peer_detach(n, i));
>> I got a assert here,
>> qemu-system-x86_64: /work/git/qemu/hw/virtio-net.c:330: virtio_net_set_queues: Assertion `!peer_detach(n, i)' failed.
>>
>> Any thoughts?
>>
>> Thanks,
>> Wanlong Gao
> 
> Thanks for the testing, which steps or cases did you met this assertion,
> migration, reboot or just changing the number of virtqueues?

It may because my host doesn't support muti-tap, I'll try with the upstream kernel again.

Thanks,
Wanlong Gao

> 
>>> +        }
>>> +    }
>>> +}
>>> +
>>> +static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue, int ctrl);
>>> +
>> --
>> To unsubscribe from this list: send the line "unsubscribe kvm" in
>> the body of a message to majordomo@vger.kernel.org
>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> 
>
Wanlong Gao Jan. 8, 2013, 9:49 a.m. UTC | #7
On 01/08/2013 05:29 PM, Jason Wang wrote:
> On 01/08/2013 05:07 PM, Wanlong Gao wrote:
>> On 12/28/2012 06:32 PM, Jason Wang wrote:
>>> +    } else if (nc->peer->info->type !=  NET_CLIENT_OPTIONS_KIND_TAP) {
>>> +        ret = -1;
>>> +    } else {
>>> +        ret = tap_detach(nc->peer);
>>> +    }
>>> +
>>> +    return ret;
>>> +}
>>> +
>>> +static void virtio_net_set_queues(VirtIONet *n)
>>> +{
>>> +    int i;
>>> +
>>> +    for (i = 0; i < n->max_queues; i++) {
>>> +        if (i < n->curr_queues) {
>>> +            assert(!peer_attach(n, i));
>>> +        } else {
>>> +            assert(!peer_detach(n, i));
>> I got a assert here,
>> qemu-system-x86_64: /work/git/qemu/hw/virtio-net.c:330: virtio_net_set_queues: Assertion `!peer_detach(n, i)' failed.
>>
>> Any thoughts?
>>
>> Thanks,
>> Wanlong Gao
> 
> Thanks for the testing, which steps or cases did you met this assertion,
> migration, reboot or just changing the number of virtqueues?

I use the 3.8-rc2 to test it again, I saw this tag has the multi-tap support.

I just can't start the QEMU use  -netdev tap,id=hostnet0,queues=2,fd=%d,fd=%d -device virtio-net-pci,netdev=hostnet0,id=net0,mac=52:54:00:ce:7b:29,bus=pci.0,addr=0x3

I pre-opened two tap fds, did I missing something?

Thanks,
Wanlong Gao

> 
>>> +        }
>>> +    }
>>> +}
>>> +
>>> +static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue, int ctrl);
>>> +
>> --
>> To unsubscribe from this list: send the line "unsubscribe kvm" in
>> the body of a message to majordomo@vger.kernel.org
>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> 
>
Jason Wang Jan. 8, 2013, 9:51 a.m. UTC | #8
On 01/08/2013 05:49 PM, Wanlong Gao wrote:
> On 01/08/2013 05:29 PM, Jason Wang wrote:
>> On 01/08/2013 05:07 PM, Wanlong Gao wrote:
>>> On 12/28/2012 06:32 PM, Jason Wang wrote:
>>>> +    } else if (nc->peer->info->type !=  NET_CLIENT_OPTIONS_KIND_TAP) {
>>>> +        ret = -1;
>>>> +    } else {
>>>> +        ret = tap_detach(nc->peer);
>>>> +    }
>>>> +
>>>> +    return ret;
>>>> +}
>>>> +
>>>> +static void virtio_net_set_queues(VirtIONet *n)
>>>> +{
>>>> +    int i;
>>>> +
>>>> +    for (i = 0; i < n->max_queues; i++) {
>>>> +        if (i < n->curr_queues) {
>>>> +            assert(!peer_attach(n, i));
>>>> +        } else {
>>>> +            assert(!peer_detach(n, i));
>>> I got a assert here,
>>> qemu-system-x86_64: /work/git/qemu/hw/virtio-net.c:330: virtio_net_set_queues: Assertion `!peer_detach(n, i)' failed.
>>>
>>> Any thoughts?
>>>
>>> Thanks,
>>> Wanlong Gao
>> Thanks for the testing, which steps or cases did you met this assertion,
>> migration, reboot or just changing the number of virtqueues?
> I use the 3.8-rc2 to test it again, I saw this tag has the multi-tap support.
>
> I just can't start the QEMU use  -netdev tap,id=hostnet0,queues=2,fd=%d,fd=%d -device virtio-net-pci,netdev=hostnet0,id=net0,mac=52:54:00:ce:7b:29,bus=pci.0,addr=0x3
>
> I pre-opened two tap fds, did I missing something?

Nothing missed :) It should work.

Could you please try not use fd=X and let qemu to create the file
descriptors by itself? Btw, how did you create the two tap fds?

Thanks
>
> Thanks,
> Wanlong Gao
>
>>>> +        }
>>>> +    }
>>>> +}
>>>> +
>>>> +static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue, int ctrl);
>>>> +
>>> --
>>> To unsubscribe from this list: send the line "unsubscribe kvm" in
>>> the body of a message to majordomo@vger.kernel.org
>>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>>
>
Wanlong Gao Jan. 8, 2013, 10 a.m. UTC | #9
On 01/08/2013 05:51 PM, Jason Wang wrote:
> On 01/08/2013 05:49 PM, Wanlong Gao wrote:
>> On 01/08/2013 05:29 PM, Jason Wang wrote:
>>> On 01/08/2013 05:07 PM, Wanlong Gao wrote:
>>>> On 12/28/2012 06:32 PM, Jason Wang wrote:
>>>>> +    } else if (nc->peer->info->type !=  NET_CLIENT_OPTIONS_KIND_TAP) {
>>>>> +        ret = -1;
>>>>> +    } else {
>>>>> +        ret = tap_detach(nc->peer);
>>>>> +    }
>>>>> +
>>>>> +    return ret;
>>>>> +}
>>>>> +
>>>>> +static void virtio_net_set_queues(VirtIONet *n)
>>>>> +{
>>>>> +    int i;
>>>>> +
>>>>> +    for (i = 0; i < n->max_queues; i++) {
>>>>> +        if (i < n->curr_queues) {
>>>>> +            assert(!peer_attach(n, i));
>>>>> +        } else {
>>>>> +            assert(!peer_detach(n, i));
>>>> I got a assert here,
>>>> qemu-system-x86_64: /work/git/qemu/hw/virtio-net.c:330: virtio_net_set_queues: Assertion `!peer_detach(n, i)' failed.
>>>>
>>>> Any thoughts?
>>>>
>>>> Thanks,
>>>> Wanlong Gao
>>> Thanks for the testing, which steps or cases did you met this assertion,
>>> migration, reboot or just changing the number of virtqueues?
>> I use the 3.8-rc2 to test it again, I saw this tag has the multi-tap support.
>>
>> I just can't start the QEMU use  -netdev tap,id=hostnet0,queues=2,fd=%d,fd=%d -device virtio-net-pci,netdev=hostnet0,id=net0,mac=52:54:00:ce:7b:29,bus=pci.0,addr=0x3
>>
>> I pre-opened two tap fds, did I missing something?
> 
> Nothing missed :) It should work.
> 
> Could you please try not use fd=X and let qemu to create the file
> descriptors by itself? Btw, how did you create the two tap fds?

Can it create descriptors itself? I get 
qemu-system-x86_64: -netdev tap,id=hostnet0,queues=2: Device 'tap' could not be initialized

I create the tap fd like this, and dup create the second fd, third fd, right?

	int tap_fd = open("/dev/net/tun", O_RDWR);
	int vhost_fd = open("/dev/vhost-net", O_RDWR);
	char *tap_name = "tap";
	char cmd[2048];
	char brctl[256];
	char netup[256];
	struct ifreq ifr;
	if (tap_fd < 0) {
		printf("open tun device failed\n");
		return -1;
	}
	if (vhost_fd < 0) {
		printf("open vhost-net device failed\n");
		return -1;
	}
	memset(&ifr, 0, sizeof(ifr));
	memcpy(ifr.ifr_name, tap_name, sizeof(tap_name));
	ifr.ifr_flags = IFF_TAP | IFF_NO_PI;

	/*
	 * setup tap net device
	 */
	if (ioctl(tap_fd, TUNSETIFF, &ifr) < 0) {
		printf("setup tap net device failed\n");
		return -1;
	}

	sprintf(brctl, "brctl addif virbr0 %s", tap_name);
	sprintf(netup, "ifconfig %s up", tap_name);
	system(brctl);
	system(netup);

Thanks,
Wanlong Gao


> 
> Thanks
>>
>> Thanks,
>> Wanlong Gao
>>
>>>>> +        }
>>>>> +    }
>>>>> +}
>>>>> +
>>>>> +static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue, int ctrl);
>>>>> +
>>>> --
>>>> To unsubscribe from this list: send the line "unsubscribe kvm" in
>>>> the body of a message to majordomo@vger.kernel.org
>>>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>>>
>>
> 
>
Jason Wang Jan. 8, 2013, 10:14 a.m. UTC | #10
On 01/08/2013 06:00 PM, Wanlong Gao wrote:
> On 01/08/2013 05:51 PM, Jason Wang wrote:
>> On 01/08/2013 05:49 PM, Wanlong Gao wrote:
>>> On 01/08/2013 05:29 PM, Jason Wang wrote:
>>>> On 01/08/2013 05:07 PM, Wanlong Gao wrote:
>>>>> On 12/28/2012 06:32 PM, Jason Wang wrote:
>>>>>> +    } else if (nc->peer->info->type !=  NET_CLIENT_OPTIONS_KIND_TAP) {
>>>>>> +        ret = -1;
>>>>>> +    } else {
>>>>>> +        ret = tap_detach(nc->peer);
>>>>>> +    }
>>>>>> +
>>>>>> +    return ret;
>>>>>> +}
>>>>>> +
>>>>>> +static void virtio_net_set_queues(VirtIONet *n)
>>>>>> +{
>>>>>> +    int i;
>>>>>> +
>>>>>> +    for (i = 0; i < n->max_queues; i++) {
>>>>>> +        if (i < n->curr_queues) {
>>>>>> +            assert(!peer_attach(n, i));
>>>>>> +        } else {
>>>>>> +            assert(!peer_detach(n, i));
>>>>> I got a assert here,
>>>>> qemu-system-x86_64: /work/git/qemu/hw/virtio-net.c:330: virtio_net_set_queues: Assertion `!peer_detach(n, i)' failed.
>>>>>
>>>>> Any thoughts?
>>>>>
>>>>> Thanks,
>>>>> Wanlong Gao
>>>> Thanks for the testing, which steps or cases did you met this assertion,
>>>> migration, reboot or just changing the number of virtqueues?
>>> I use the 3.8-rc2 to test it again, I saw this tag has the multi-tap support.
>>>
>>> I just can't start the QEMU use  -netdev tap,id=hostnet0,queues=2,fd=%d,fd=%d -device virtio-net-pci,netdev=hostnet0,id=net0,mac=52:54:00:ce:7b:29,bus=pci.0,addr=0x3
>>>
>>> I pre-opened two tap fds, did I missing something?
>> Nothing missed :) It should work.
>>
>> Could you please try not use fd=X and let qemu to create the file
>> descriptors by itself? Btw, how did you create the two tap fds?
> Can it create descriptors itself? I get 
> qemu-system-x86_64: -netdev tap,id=hostnet0,queues=2: Device 'tap' could not be initialized

You need prepare an ifup script which default at /etc/qemu-ifup (like
following). Or you may try to add a script=no after:

#!/bin/sh

switch=kvmbr0

/sbin/ifconfig $1 0.0.0.0 up
/usr/sbin/brctl addif $switch $1
/usr/sbin/brctl stp $switch off

This will let qemu create a tap fd itself and make it to be connected to
a port of the bridge caled kvmbr0.
>
> I create the tap fd like this, and dup create the second fd, third fd, right?

The second and third fd should be created with TUNSETIFF with the same
tap_name also. Btw, you need to specify a IFF_MULTI_QUEUE flag to tell
the kernel you want to create a multiqueue tap device, otherwise the
second and third calling of TUNSETIFF will fail.

Thanks
>
> 	int tap_fd = open("/dev/net/tun", O_RDWR);
> 	int vhost_fd = open("/dev/vhost-net", O_RDWR);
> 	char *tap_name = "tap";
> 	char cmd[2048];
> 	char brctl[256];
> 	char netup[256];
> 	struct ifreq ifr;
> 	if (tap_fd < 0) {
> 		printf("open tun device failed\n");
> 		return -1;
> 	}
> 	if (vhost_fd < 0) {
> 		printf("open vhost-net device failed\n");
> 		return -1;
> 	}
> 	memset(&ifr, 0, sizeof(ifr));
> 	memcpy(ifr.ifr_name, tap_name, sizeof(tap_name));
> 	ifr.ifr_flags = IFF_TAP | IFF_NO_PI;
>
> 	/*
> 	 * setup tap net device
> 	 */
> 	if (ioctl(tap_fd, TUNSETIFF, &ifr) < 0) {
> 		printf("setup tap net device failed\n");
> 		return -1;
> 	}
>
> 	sprintf(brctl, "brctl addif virbr0 %s", tap_name);
> 	sprintf(netup, "ifconfig %s up", tap_name);
> 	system(brctl);
> 	system(netup);
>
> Thanks,
> Wanlong Gao
>
>
>> Thanks
>>> Thanks,
>>> Wanlong Gao
>>>
>>>>>> +        }
>>>>>> +    }
>>>>>> +}
>>>>>> +
>>>>>> +static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue, int ctrl);
>>>>>> +
>>>>> --
>>>>> To unsubscribe from this list: send the line "unsubscribe kvm" in
>>>>> the body of a message to majordomo@vger.kernel.org
>>>>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>>
Wanlong Gao Jan. 8, 2013, 11:24 a.m. UTC | #11
On 01/08/2013 06:14 PM, Jason Wang wrote:
> On 01/08/2013 06:00 PM, Wanlong Gao wrote:
>> On 01/08/2013 05:51 PM, Jason Wang wrote:
>>> On 01/08/2013 05:49 PM, Wanlong Gao wrote:
>>>> On 01/08/2013 05:29 PM, Jason Wang wrote:
>>>>> On 01/08/2013 05:07 PM, Wanlong Gao wrote:
>>>>>> On 12/28/2012 06:32 PM, Jason Wang wrote:
>>>>>>> +    } else if (nc->peer->info->type !=  NET_CLIENT_OPTIONS_KIND_TAP) {
>>>>>>> +        ret = -1;
>>>>>>> +    } else {
>>>>>>> +        ret = tap_detach(nc->peer);
>>>>>>> +    }
>>>>>>> +
>>>>>>> +    return ret;
>>>>>>> +}
>>>>>>> +
>>>>>>> +static void virtio_net_set_queues(VirtIONet *n)
>>>>>>> +{
>>>>>>> +    int i;
>>>>>>> +
>>>>>>> +    for (i = 0; i < n->max_queues; i++) {
>>>>>>> +        if (i < n->curr_queues) {
>>>>>>> +            assert(!peer_attach(n, i));
>>>>>>> +        } else {
>>>>>>> +            assert(!peer_detach(n, i));
>>>>>> I got a assert here,
>>>>>> qemu-system-x86_64: /work/git/qemu/hw/virtio-net.c:330: virtio_net_set_queues: Assertion `!peer_detach(n, i)' failed.
>>>>>>
>>>>>> Any thoughts?
>>>>>>
>>>>>> Thanks,
>>>>>> Wanlong Gao
>>>>> Thanks for the testing, which steps or cases did you met this assertion,
>>>>> migration, reboot or just changing the number of virtqueues?
>>>> I use the 3.8-rc2 to test it again, I saw this tag has the multi-tap support.
>>>>
>>>> I just can't start the QEMU use  -netdev tap,id=hostnet0,queues=2,fd=%d,fd=%d -device virtio-net-pci,netdev=hostnet0,id=net0,mac=52:54:00:ce:7b:29,bus=pci.0,addr=0x3
>>>>
>>>> I pre-opened two tap fds, did I missing something?
>>> Nothing missed :) It should work.
>>>
>>> Could you please try not use fd=X and let qemu to create the file
>>> descriptors by itself? Btw, how did you create the two tap fds?
>> Can it create descriptors itself? I get 
>> qemu-system-x86_64: -netdev tap,id=hostnet0,queues=2: Device 'tap' could not be initialized
> 
> You need prepare an ifup script which default at /etc/qemu-ifup (like
> following). Or you may try to add a script=no after:
> 
> #!/bin/sh
> 
> switch=kvmbr0
> 
> /sbin/ifconfig $1 0.0.0.0 up
> /usr/sbin/brctl addif $switch $1
> /usr/sbin/brctl stp $switch off
> 
> This will let qemu create a tap fd itself and make it to be connected to
> a port of the bridge caled kvmbr0.
>>
>> I create the tap fd like this, and dup create the second fd, third fd, right?
> 
> The second and third fd should be created with TUNSETIFF with the same
> tap_name also. Btw, you need to specify a IFF_MULTI_QUEUE flag to tell
> the kernel you want to create a multiqueue tap device, otherwise the
> second and third calling of TUNSETIFF will fail.

Thank you for teaching me, I'll try it tomorrow.

Regards,
Wanlong Gao

> 
> Thanks
>>
>> 	int tap_fd = open("/dev/net/tun", O_RDWR);
>> 	int vhost_fd = open("/dev/vhost-net", O_RDWR);
>> 	char *tap_name = "tap";
>> 	char cmd[2048];
>> 	char brctl[256];
>> 	char netup[256];
>> 	struct ifreq ifr;
>> 	if (tap_fd < 0) {
>> 		printf("open tun device failed\n");
>> 		return -1;
>> 	}
>> 	if (vhost_fd < 0) {
>> 		printf("open vhost-net device failed\n");
>> 		return -1;
>> 	}
>> 	memset(&ifr, 0, sizeof(ifr));
>> 	memcpy(ifr.ifr_name, tap_name, sizeof(tap_name));
>> 	ifr.ifr_flags = IFF_TAP | IFF_NO_PI;
>>
>> 	/*
>> 	 * setup tap net device
>> 	 */
>> 	if (ioctl(tap_fd, TUNSETIFF, &ifr) < 0) {
>> 		printf("setup tap net device failed\n");
>> 		return -1;
>> 	}
>>
>> 	sprintf(brctl, "brctl addif virbr0 %s", tap_name);
>> 	sprintf(netup, "ifconfig %s up", tap_name);
>> 	system(brctl);
>> 	system(netup);
>>
>> Thanks,
>> Wanlong Gao
>>
>>
>>> Thanks
>>>> Thanks,
>>>> Wanlong Gao
>>>>
>>>>>>> +        }
>>>>>>> +    }
>>>>>>> +}
>>>>>>> +
>>>>>>> +static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue, int ctrl);
>>>>>>> +
>>>>>> --
>>>>>> To unsubscribe from this list: send the line "unsubscribe kvm" in
>>>>>> the body of a message to majordomo@vger.kernel.org
>>>>>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>>>
> 
>
Jason Wang Jan. 9, 2013, 3:11 a.m. UTC | #12
On 01/08/2013 07:24 PM, Wanlong Gao wrote:
> On 01/08/2013 06:14 PM, Jason Wang wrote:
>> On 01/08/2013 06:00 PM, Wanlong Gao wrote:
>>> On 01/08/2013 05:51 PM, Jason Wang wrote:
>>>> On 01/08/2013 05:49 PM, Wanlong Gao wrote:
>>>>> On 01/08/2013 05:29 PM, Jason Wang wrote:
>>>>>> On 01/08/2013 05:07 PM, Wanlong Gao wrote:
>>>>>>> On 12/28/2012 06:32 PM, Jason Wang wrote:
>>>>>>>> +    } else if (nc->peer->info->type !=  NET_CLIENT_OPTIONS_KIND_TAP) {
>>>>>>>> +        ret = -1;
>>>>>>>> +    } else {
>>>>>>>> +        ret = tap_detach(nc->peer);
>>>>>>>> +    }
>>>>>>>> +
>>>>>>>> +    return ret;
>>>>>>>> +}
>>>>>>>> +
>>>>>>>> +static void virtio_net_set_queues(VirtIONet *n)
>>>>>>>> +{
>>>>>>>> +    int i;
>>>>>>>> +
>>>>>>>> +    for (i = 0; i < n->max_queues; i++) {
>>>>>>>> +        if (i < n->curr_queues) {
>>>>>>>> +            assert(!peer_attach(n, i));
>>>>>>>> +        } else {
>>>>>>>> +            assert(!peer_detach(n, i));
>>>>>>> I got a assert here,
>>>>>>> qemu-system-x86_64: /work/git/qemu/hw/virtio-net.c:330: virtio_net_set_queues: Assertion `!peer_detach(n, i)' failed.
>>>>>>>
>>>>>>> Any thoughts?
>>>>>>>
>>>>>>> Thanks,
>>>>>>> Wanlong Gao
>>>>>> Thanks for the testing, which steps or cases did you met this assertion,
>>>>>> migration, reboot or just changing the number of virtqueues?
>>>>> I use the 3.8-rc2 to test it again, I saw this tag has the multi-tap support.
>>>>>
>>>>> I just can't start the QEMU use  -netdev tap,id=hostnet0,queues=2,fd=%d,fd=%d -device virtio-net-pci,netdev=hostnet0,id=net0,mac=52:54:00:ce:7b:29,bus=pci.0,addr=0x3
>>>>>
>>>>> I pre-opened two tap fds, did I missing something?
>>>> Nothing missed :) It should work.
>>>>
>>>> Could you please try not use fd=X and let qemu to create the file
>>>> descriptors by itself? Btw, how did you create the two tap fds?
>>> Can it create descriptors itself? I get 
>>> qemu-system-x86_64: -netdev tap,id=hostnet0,queues=2: Device 'tap' could not be initialized
>> You need prepare an ifup script which default at /etc/qemu-ifup (like
>> following). Or you may try to add a script=no after:
>>
>> #!/bin/sh
>>
>> switch=kvmbr0
>>
>> /sbin/ifconfig $1 0.0.0.0 up
>> /usr/sbin/brctl addif $switch $1
>> /usr/sbin/brctl stp $switch off
>>
>> This will let qemu create a tap fd itself and make it to be connected to
>> a port of the bridge caled kvmbr0.
>>> I create the tap fd like this, and dup create the second fd, third fd, right?
>> The second and third fd should be created with TUNSETIFF with the same
>> tap_name also. Btw, you need to specify a IFF_MULTI_QUEUE flag to tell
>> the kernel you want to create a multiqueue tap device, otherwise the
>> second and third calling of TUNSETIFF will fail.
> Thank you for teaching me, I'll try it tomorrow.
>
> Regards,
> Wanlong Gao

Thanks, the API of multiqueue should be documented in
Documentation/networking/tuntap.txt. It's in my TODO list.
>
>> Thanks
>>> 	int tap_fd = open("/dev/net/tun", O_RDWR);
>>> 	int vhost_fd = open("/dev/vhost-net", O_RDWR);
>>> 	char *tap_name = "tap";
>>> 	char cmd[2048];
>>> 	char brctl[256];
>>> 	char netup[256];
>>> 	struct ifreq ifr;
>>> 	if (tap_fd < 0) {
>>> 		printf("open tun device failed\n");
>>> 		return -1;
>>> 	}
>>> 	if (vhost_fd < 0) {
>>> 		printf("open vhost-net device failed\n");
>>> 		return -1;
>>> 	}
>>> 	memset(&ifr, 0, sizeof(ifr));
>>> 	memcpy(ifr.ifr_name, tap_name, sizeof(tap_name));
>>> 	ifr.ifr_flags = IFF_TAP | IFF_NO_PI;
>>>
>>> 	/*
>>> 	 * setup tap net device
>>> 	 */
>>> 	if (ioctl(tap_fd, TUNSETIFF, &ifr) < 0) {
>>> 		printf("setup tap net device failed\n");
>>> 		return -1;
>>> 	}
>>>
>>> 	sprintf(brctl, "brctl addif virbr0 %s", tap_name);
>>> 	sprintf(netup, "ifconfig %s up", tap_name);
>>> 	system(brctl);
>>> 	system(netup);
>>>
>>> Thanks,
>>> Wanlong Gao
>>>
>>>
>>>> Thanks
>>>>> Thanks,
>>>>> Wanlong Gao
>>>>>
>>>>>>>> +        }
>>>>>>>> +    }
>>>>>>>> +}
>>>>>>>> +
>>>>>>>> +static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue, int ctrl);
>>>>>>>> +
>>>>>>> --
>>>>>>> To unsubscribe from this list: send the line "unsubscribe kvm" in
>>>>>>> the body of a message to majordomo@vger.kernel.org
>>>>>>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>>
> --
> To unsubscribe from this list: send the line "unsubscribe kvm" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
Wanlong Gao Jan. 9, 2013, 8:23 a.m. UTC | #13
On 01/08/2013 06:14 PM, Jason Wang wrote:
> On 01/08/2013 06:00 PM, Wanlong Gao wrote:
>> On 01/08/2013 05:51 PM, Jason Wang wrote:
>>> On 01/08/2013 05:49 PM, Wanlong Gao wrote:
>>>> On 01/08/2013 05:29 PM, Jason Wang wrote:
>>>>> On 01/08/2013 05:07 PM, Wanlong Gao wrote:
>>>>>> On 12/28/2012 06:32 PM, Jason Wang wrote:
>>>>>>> +    } else if (nc->peer->info->type !=  NET_CLIENT_OPTIONS_KIND_TAP) {
>>>>>>> +        ret = -1;
>>>>>>> +    } else {
>>>>>>> +        ret = tap_detach(nc->peer);
>>>>>>> +    }
>>>>>>> +
>>>>>>> +    return ret;
>>>>>>> +}
>>>>>>> +
>>>>>>> +static void virtio_net_set_queues(VirtIONet *n)
>>>>>>> +{
>>>>>>> +    int i;
>>>>>>> +
>>>>>>> +    for (i = 0; i < n->max_queues; i++) {
>>>>>>> +        if (i < n->curr_queues) {
>>>>>>> +            assert(!peer_attach(n, i));
>>>>>>> +        } else {
>>>>>>> +            assert(!peer_detach(n, i));
>>>>>> I got a assert here,
>>>>>> qemu-system-x86_64: /work/git/qemu/hw/virtio-net.c:330: virtio_net_set_queues: Assertion `!peer_detach(n, i)' failed.
>>>>>>
>>>>>> Any thoughts?
>>>>>>
>>>>>> Thanks,
>>>>>> Wanlong Gao
>>>>> Thanks for the testing, which steps or cases did you met this assertion,
>>>>> migration, reboot or just changing the number of virtqueues?
>>>> I use the 3.8-rc2 to test it again, I saw this tag has the multi-tap support.
>>>>
>>>> I just can't start the QEMU use  -netdev tap,id=hostnet0,queues=2,fd=%d,fd=%d -device virtio-net-pci,netdev=hostnet0,id=net0,mac=52:54:00:ce:7b:29,bus=pci.0,addr=0x3
>>>>
>>>> I pre-opened two tap fds, did I missing something?
>>> Nothing missed :) It should work.
>>>
>>> Could you please try not use fd=X and let qemu to create the file
>>> descriptors by itself? Btw, how did you create the two tap fds?
>> Can it create descriptors itself? I get 
>> qemu-system-x86_64: -netdev tap,id=hostnet0,queues=2: Device 'tap' could not be initialized
> 
> You need prepare an ifup script which default at /etc/qemu-ifup (like
> following). Or you may try to add a script=no after:
> 
> #!/bin/sh
> 
> switch=kvmbr0
> 
> /sbin/ifconfig $1 0.0.0.0 up
> /usr/sbin/brctl addif $switch $1
> /usr/sbin/brctl stp $switch off
> 
> This will let qemu create a tap fd itself and make it to be connected to
> a port of the bridge caled kvmbr0.

But how to support multi-queue in this way?
I got guest kernel panic when using this way and set queues=4.

Thanks,
Wanlong Gao

>>
>> I create the tap fd like this, and dup create the second fd, third fd, right?
> 
> The second and third fd should be created with TUNSETIFF with the same
> tap_name also. Btw, you need to specify a IFF_MULTI_QUEUE flag to tell
> the kernel you want to create a multiqueue tap device, otherwise the
> second and third calling of TUNSETIFF will fail.
> 
> Thanks
>>
>> 	int tap_fd = open("/dev/net/tun", O_RDWR);
>> 	int vhost_fd = open("/dev/vhost-net", O_RDWR);
>> 	char *tap_name = "tap";
>> 	char cmd[2048];
>> 	char brctl[256];
>> 	char netup[256];
>> 	struct ifreq ifr;
>> 	if (tap_fd < 0) {
>> 		printf("open tun device failed\n");
>> 		return -1;
>> 	}
>> 	if (vhost_fd < 0) {
>> 		printf("open vhost-net device failed\n");
>> 		return -1;
>> 	}
>> 	memset(&ifr, 0, sizeof(ifr));
>> 	memcpy(ifr.ifr_name, tap_name, sizeof(tap_name));
>> 	ifr.ifr_flags = IFF_TAP | IFF_NO_PI;
>>
>> 	/*
>> 	 * setup tap net device
>> 	 */
>> 	if (ioctl(tap_fd, TUNSETIFF, &ifr) < 0) {
>> 		printf("setup tap net device failed\n");
>> 		return -1;
>> 	}
>>
>> 	sprintf(brctl, "brctl addif virbr0 %s", tap_name);
>> 	sprintf(netup, "ifconfig %s up", tap_name);
>> 	system(brctl);
>> 	system(netup);
>>
>> Thanks,
>> Wanlong Gao
>>
>>
>>> Thanks
>>>> Thanks,
>>>> Wanlong Gao
>>>>
>>>>>>> +        }
>>>>>>> +    }
>>>>>>> +}
>>>>>>> +
>>>>>>> +static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue, int ctrl);
>>>>>>> +
>>>>>> --
>>>>>> To unsubscribe from this list: send the line "unsubscribe kvm" in
>>>>>> the body of a message to majordomo@vger.kernel.org
>>>>>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>>>
> 
>
Jason Wang Jan. 9, 2013, 9:30 a.m. UTC | #14
On 01/09/2013 04:23 PM, Wanlong Gao wrote:
> On 01/08/2013 06:14 PM, Jason Wang wrote:
>> On 01/08/2013 06:00 PM, Wanlong Gao wrote:
>>> On 01/08/2013 05:51 PM, Jason Wang wrote:
>>>> On 01/08/2013 05:49 PM, Wanlong Gao wrote:
>>>>> On 01/08/2013 05:29 PM, Jason Wang wrote:
>>>>>> On 01/08/2013 05:07 PM, Wanlong Gao wrote:
>>>>>>> On 12/28/2012 06:32 PM, Jason Wang wrote:
>>>>>>>> +    } else if (nc->peer->info->type !=  NET_CLIENT_OPTIONS_KIND_TAP) {
>>>>>>>> +        ret = -1;
>>>>>>>> +    } else {
>>>>>>>> +        ret = tap_detach(nc->peer);
>>>>>>>> +    }
>>>>>>>> +
>>>>>>>> +    return ret;
>>>>>>>> +}
>>>>>>>> +
>>>>>>>> +static void virtio_net_set_queues(VirtIONet *n)
>>>>>>>> +{
>>>>>>>> +    int i;
>>>>>>>> +
>>>>>>>> +    for (i = 0; i < n->max_queues; i++) {
>>>>>>>> +        if (i < n->curr_queues) {
>>>>>>>> +            assert(!peer_attach(n, i));
>>>>>>>> +        } else {
>>>>>>>> +            assert(!peer_detach(n, i));
>>>>>>> I got a assert here,
>>>>>>> qemu-system-x86_64: /work/git/qemu/hw/virtio-net.c:330: virtio_net_set_queues: Assertion `!peer_detach(n, i)' failed.
>>>>>>>
>>>>>>> Any thoughts?
>>>>>>>
>>>>>>> Thanks,
>>>>>>> Wanlong Gao
>>>>>> Thanks for the testing, which steps or cases did you met this assertion,
>>>>>> migration, reboot or just changing the number of virtqueues?
>>>>> I use the 3.8-rc2 to test it again, I saw this tag has the multi-tap support.
>>>>>
>>>>> I just can't start the QEMU use  -netdev tap,id=hostnet0,queues=2,fd=%d,fd=%d -device virtio-net-pci,netdev=hostnet0,id=net0,mac=52:54:00:ce:7b:29,bus=pci.0,addr=0x3
>>>>>
>>>>> I pre-opened two tap fds, did I missing something?
>>>> Nothing missed :) It should work.
>>>>
>>>> Could you please try not use fd=X and let qemu to create the file
>>>> descriptors by itself? Btw, how did you create the two tap fds?
>>> Can it create descriptors itself? I get 
>>> qemu-system-x86_64: -netdev tap,id=hostnet0,queues=2: Device 'tap' could not be initialized
>> You need prepare an ifup script which default at /etc/qemu-ifup (like
>> following). Or you may try to add a script=no after:
>>
>> #!/bin/sh
>>
>> switch=kvmbr0
>>
>> /sbin/ifconfig $1 0.0.0.0 up
>> /usr/sbin/brctl addif $switch $1
>> /usr/sbin/brctl stp $switch off
>>
>> This will let qemu create a tap fd itself and make it to be connected to
>> a port of the bridge caled kvmbr0.
> But how to support multi-queue in this way?

Qemu will create the necessary multiqueue tap by itself, see patch 0/12.
> I got guest kernel panic when using this way and set queues=4.

Does it happens w/o or w/ a fd parameter? What's the qemu command line?
Did you meet it during boot time?

Thanks
>
> Thanks,
> Wanlong Gao
>
>>> I create the tap fd like this, and dup create the second fd, third fd, right?
>> The second and third fd should be created with TUNSETIFF with the same
>> tap_name also. Btw, you need to specify a IFF_MULTI_QUEUE flag to tell
>> the kernel you want to create a multiqueue tap device, otherwise the
>> second and third calling of TUNSETIFF will fail.
>>
>> Thanks
>>> 	int tap_fd = open("/dev/net/tun", O_RDWR);
>>> 	int vhost_fd = open("/dev/vhost-net", O_RDWR);
>>> 	char *tap_name = "tap";
>>> 	char cmd[2048];
>>> 	char brctl[256];
>>> 	char netup[256];
>>> 	struct ifreq ifr;
>>> 	if (tap_fd < 0) {
>>> 		printf("open tun device failed\n");
>>> 		return -1;
>>> 	}
>>> 	if (vhost_fd < 0) {
>>> 		printf("open vhost-net device failed\n");
>>> 		return -1;
>>> 	}
>>> 	memset(&ifr, 0, sizeof(ifr));
>>> 	memcpy(ifr.ifr_name, tap_name, sizeof(tap_name));
>>> 	ifr.ifr_flags = IFF_TAP | IFF_NO_PI;
>>>
>>> 	/*
>>> 	 * setup tap net device
>>> 	 */
>>> 	if (ioctl(tap_fd, TUNSETIFF, &ifr) < 0) {
>>> 		printf("setup tap net device failed\n");
>>> 		return -1;
>>> 	}
>>>
>>> 	sprintf(brctl, "brctl addif virbr0 %s", tap_name);
>>> 	sprintf(netup, "ifconfig %s up", tap_name);
>>> 	system(brctl);
>>> 	system(netup);
>>>
>>> Thanks,
>>> Wanlong Gao
>>>
>>>
>>>> Thanks
>>>>> Thanks,
>>>>> Wanlong Gao
>>>>>
>>>>>>>> +        }
>>>>>>>> +    }
>>>>>>>> +}
>>>>>>>> +
>>>>>>>> +static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue, int ctrl);
>>>>>>>> +
>>>>>>> --
>>>>>>> To unsubscribe from this list: send the line "unsubscribe kvm" in
>>>>>>> the body of a message to majordomo@vger.kernel.org
>>>>>>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>>
> --
> To unsubscribe from this list: send the line "unsubscribe kvm" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
Wanlong Gao Jan. 9, 2013, 10:01 a.m. UTC | #15
On 01/09/2013 05:30 PM, Jason Wang wrote:
> On 01/09/2013 04:23 PM, Wanlong Gao wrote:
>> On 01/08/2013 06:14 PM, Jason Wang wrote:
>>> On 01/08/2013 06:00 PM, Wanlong Gao wrote:
>>>> On 01/08/2013 05:51 PM, Jason Wang wrote:
>>>>> On 01/08/2013 05:49 PM, Wanlong Gao wrote:
>>>>>> On 01/08/2013 05:29 PM, Jason Wang wrote:
>>>>>>> On 01/08/2013 05:07 PM, Wanlong Gao wrote:
>>>>>>>> On 12/28/2012 06:32 PM, Jason Wang wrote:
>>>>>>>>> +    } else if (nc->peer->info->type !=  NET_CLIENT_OPTIONS_KIND_TAP) {
>>>>>>>>> +        ret = -1;
>>>>>>>>> +    } else {
>>>>>>>>> +        ret = tap_detach(nc->peer);
>>>>>>>>> +    }
>>>>>>>>> +
>>>>>>>>> +    return ret;
>>>>>>>>> +}
>>>>>>>>> +
>>>>>>>>> +static void virtio_net_set_queues(VirtIONet *n)
>>>>>>>>> +{
>>>>>>>>> +    int i;
>>>>>>>>> +
>>>>>>>>> +    for (i = 0; i < n->max_queues; i++) {
>>>>>>>>> +        if (i < n->curr_queues) {
>>>>>>>>> +            assert(!peer_attach(n, i));
>>>>>>>>> +        } else {
>>>>>>>>> +            assert(!peer_detach(n, i));
>>>>>>>> I got a assert here,
>>>>>>>> qemu-system-x86_64: /work/git/qemu/hw/virtio-net.c:330: virtio_net_set_queues: Assertion `!peer_detach(n, i)' failed.
>>>>>>>>
>>>>>>>> Any thoughts?
>>>>>>>>
>>>>>>>> Thanks,
>>>>>>>> Wanlong Gao
>>>>>>> Thanks for the testing, which steps or cases did you met this assertion,
>>>>>>> migration, reboot or just changing the number of virtqueues?
>>>>>> I use the 3.8-rc2 to test it again, I saw this tag has the multi-tap support.
>>>>>>
>>>>>> I just can't start the QEMU use  -netdev tap,id=hostnet0,queues=2,fd=%d,fd=%d -device virtio-net-pci,netdev=hostnet0,id=net0,mac=52:54:00:ce:7b:29,bus=pci.0,addr=0x3
>>>>>>
>>>>>> I pre-opened two tap fds, did I missing something?
>>>>> Nothing missed :) It should work.
>>>>>
>>>>> Could you please try not use fd=X and let qemu to create the file
>>>>> descriptors by itself? Btw, how did you create the two tap fds?
>>>> Can it create descriptors itself? I get 
>>>> qemu-system-x86_64: -netdev tap,id=hostnet0,queues=2: Device 'tap' could not be initialized
>>> You need prepare an ifup script which default at /etc/qemu-ifup (like
>>> following). Or you may try to add a script=no after:
>>>
>>> #!/bin/sh
>>>
>>> switch=kvmbr0
>>>
>>> /sbin/ifconfig $1 0.0.0.0 up
>>> /usr/sbin/brctl addif $switch $1
>>> /usr/sbin/brctl stp $switch off
>>>
>>> This will let qemu create a tap fd itself and make it to be connected to
>>> a port of the bridge caled kvmbr0.
>> But how to support multi-queue in this way?
> 
> Qemu will create the necessary multiqueue tap by itself, see patch 0/12.
>> I got guest kernel panic when using this way and set queues=4.
> 
> Does it happens w/o or w/ a fd parameter? What's the qemu command line?
> Did you meet it during boot time?

The QEMU command line is 

/work/git/qemu/x86_64-softmmu/qemu-system-x86_64 -name f17 -M pc-0.15 -enable-kvm -m 3096 \
-smp 4,sockets=4,cores=1,threads=1 \
-uuid c31a9f3e-4161-c53a-339c-5dc36d0497cb -no-user-config -nodefaults \
-chardev socket,id=charmonitor,path=/var/lib/libvirt/qemu/f17.monitor,server,nowait \
-mon chardev=charmonitor,id=monitor,mode=control \
-rtc base=utc -no-shutdown \
-device piix3-usb-uhci,id=usb,bus=pci.0,addr=0x1.0x2 \
-device virtio-scsi-pci,id=scsi0,bus=pci.0,addr=0xb,num_queues=4,hotplug=on \
-device virtio-serial-pci,id=virtio-serial0,bus=pci.0,addr=0x5 \
-drive file=/vm/f17.img,if=none,id=drive-virtio-disk0,format=qcow2 \
-device virtio-blk-pci,scsi=off,bus=pci.0,addr=0x6,drive=drive-virtio-disk0,id=virtio-disk0,bootindex=1 \
-drive file=/vm2/f17-kernel.img,if=none,id=drive-virtio-disk1,format=qcow2 \
-device virtio-blk-pci,scsi=off,bus=pci.0,addr=0x8,drive=drive-virtio-disk1,id=virtio-disk1 \
-drive file=/vm/virtio-scsi/scsi3.img,if=none,id=drive-scsi0-0-2-0,format=raw \
-device scsi-hd,bus=scsi0.0,channel=0,scsi-id=0,lun=2,drive=drive-scsi0-0-2-0,id=scsi0-0-2-0,removable=on \
-drive file=/vm/virtio-scsi/scsi4.img,if=none,id=drive-scsi0-0-3-0,format=raw \
-device scsi-hd,bus=scsi0.0,channel=0,scsi-id=0,lun=3,drive=drive-scsi0-0-3-0,id=scsi0-0-3-0 \
-drive file=/vm/virtio-scsi/scsi1.img,if=none,id=drive-scsi0-0-0-0,format=raw \
-device scsi-hd,bus=scsi0.0,channel=0,scsi-id=0,lun=0,drive=drive-scsi0-0-0-0,id=scsi0-0-0-0 \
-drive file=/vm/virtio-scsi/scsi2.img,if=none,id=drive-scsi0-0-1-0,format=raw \
-device scsi-hd,bus=scsi0.0,channel=0,scsi-id=0,lun=1,drive=drive-scsi0-0-1-0,id=scsi0-0-1-0 \
-chardev pty,id=charserial0 -device isa-serial,chardev=charserial0,id=serial0 \
-chardev file,id=charserial1,path=/vm/f17.log \
-device isa-serial,chardev=charserial1,id=serial1 \
-device usb-tablet,id=input0 -vga std \
-device virtio-balloon-pci,id=balloon0,bus=pci.0,addr=0x7 \
-netdev tap,id=hostnet0,vhost=on,queues=4 \
-device virtio-net-pci,netdev=hostnet0,id=net0,mac=52:54:00:ce:7b:29,bus=pci.0,addr=0x3 \
-monitor stdio

I got panic just after booting the system, did nothing,  waited for a while, the guest panicked.

[   28.053004] BUG: soft lockup - CPU#1 stuck for 23s! [ip:592]
[   28.053004] Modules linked in: ip6t_REJECT nf_conntrack_ipv6 nf_defrag_ipv6 xt_state nf_conntrack ip6table_filter ip6_tables uinput joydev microcode virtio_balloon pcspkr virtio_net i2c_piix4 i2c_core virtio_scsi virtio_blk floppy
[   28.053004] CPU 1 
[   28.053004] Pid: 592, comm: ip Not tainted 3.8.0-rc1-net+ #3 Bochs Bochs
[   28.053004] RIP: 0010:[<ffffffff8137a9ab>]  [<ffffffff8137a9ab>] virtqueue_get_buf+0xb/0x120
[   28.053004] RSP: 0018:ffff8800bc913550  EFLAGS: 00000246
[   28.053004] RAX: 0000000000000000 RBX: ffff8800bc49c000 RCX: ffff8800bc49e000
[   28.053004] RDX: 0000000000000000 RSI: ffff8800bc913584 RDI: ffff8800bcfd4000
[   28.053004] RBP: ffff8800bc913558 R08: ffff8800bcfd0800 R09: 0000000000000000
[   28.053004] R10: ffff8800bc49c000 R11: ffff880036cc4de0 R12: ffff8800bcfd4000
[   28.053004] R13: ffff8800bc913558 R14: ffffffff8137ad73 R15: 00000000000200d0
[   28.053004] FS:  00007fb27a589740(0000) GS:ffff8800c1480000(0000) knlGS:0000000000000000
[   28.053004] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[   28.053004] CR2: 0000000000640530 CR3: 00000000baeff000 CR4: 00000000000006e0
[   28.053004] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[   28.053004] DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
[   28.053004] Process ip (pid: 592, threadinfo ffff8800bc912000, task ffff880036da2e20)
[   28.053004] Stack:
[   28.053004]  ffff8800bcfd0800 ffff8800bc913638 ffffffffa003e9bb ffff8800bc913656
[   28.053004]  0000000100000002 ffff8800c17ebb08 000000500000ff10 ffffea0002f244c0
[   28.053004]  0000000200000582 0000000000000000 0000000000000000 ffffea0002f244c0
[   28.053004] Call Trace:
[   28.053004]  [<ffffffffa003e9bb>] virtnet_send_command.constprop.26+0x24b/0x270 [virtio_net]
[   28.053004]  [<ffffffff812ed963>] ? sg_init_table+0x23/0x50
[   28.053004]  [<ffffffffa0040629>] virtnet_set_rx_mode+0x99/0x300 [virtio_net]
[   28.053004]  [<ffffffff8152306f>] __dev_set_rx_mode+0x5f/0xb0
[   28.053004]  [<ffffffff815230ef>] dev_set_rx_mode+0x2f/0x50
[   28.053004]  [<ffffffff815231b7>] __dev_open+0xa7/0xf0
[   28.053004]  [<ffffffff81523461>] __dev_change_flags+0xa1/0x180
[   28.053004]  [<ffffffff815235f8>] dev_change_flags+0x28/0x70
[   28.053004]  [<ffffffff8152ff20>] do_setlink+0x3b0/0xa50
[   28.053004]  [<ffffffff812fb6b1>] ? nla_parse+0x31/0xe0
[   28.053004]  [<ffffffff815325de>] rtnl_newlink+0x36e/0x580
[   28.053004]  [<ffffffff811355cc>] ? get_page_from_freelist+0x37c/0x730
[   28.053004]  [<ffffffff81531e13>] rtnetlink_rcv_msg+0x113/0x2f0
[   28.053004]  [<ffffffff8117d973>] ? __kmalloc_node_track_caller+0x63/0x1c0
[   28.053004]  [<ffffffff8151526b>] ? __alloc_skb+0x8b/0x2a0
[   28.053004]  [<ffffffff81531d00>] ? __rtnl_unlock+0x20/0x20
[   28.053004]  [<ffffffff8154b571>] netlink_rcv_skb+0xb1/0xc0
[   28.053004]  [<ffffffff8152ea05>] rtnetlink_rcv+0x25/0x40
[   28.053004]  [<ffffffff8154ae91>] netlink_unicast+0x1a1/0x220
[   28.053004]  [<ffffffff8154b211>] netlink_sendmsg+0x301/0x3c0
[   28.053004]  [<ffffffff81508530>] sock_sendmsg+0xb0/0xe0
[   28.053004]  [<ffffffff8113a45b>] ? lru_cache_add_lru+0x3b/0x60
[   28.053004]  [<ffffffff811608b7>] ? page_add_new_anon_rmap+0xc7/0x180
[   28.053004]  [<ffffffff81509efc>] __sys_sendmsg+0x3ac/0x3c0
[   28.053004]  [<ffffffff8162e47c>] ? __do_page_fault+0x23c/0x4d0
[   28.053004]  [<ffffffff8115c9ef>] ? do_brk+0x1ff/0x370
[   28.053004]  [<ffffffff8150bec9>] sys_sendmsg+0x49/0x90
[   28.053004]  [<ffffffff81632d59>] system_call_fastpath+0x16/0x1b
[   28.053004] Code: 04 0f ae f0 48 8b 47 50 5d 0f b7 50 02 66 39 57 64 0f 94 c0 c3 66 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 55 48 89 e5 41 54 <53> 80 7f 59 00 48 89 fb 0f 85 90 00 00 00 48 8b 47 50 0f b7 50 


The QEMU tree I used is git://github.com/jasowang/qemu.git

Thanks,
Wanlong Gao

> 
> Thanks
>>
>> Thanks,
>> Wanlong Gao
>>
>>>> I create the tap fd like this, and dup create the second fd, third fd, right?
>>> The second and third fd should be created with TUNSETIFF with the same
>>> tap_name also. Btw, you need to specify a IFF_MULTI_QUEUE flag to tell
>>> the kernel you want to create a multiqueue tap device, otherwise the
>>> second and third calling of TUNSETIFF will fail.
>>>
>>> Thanks
>>>> 	int tap_fd = open("/dev/net/tun", O_RDWR);
>>>> 	int vhost_fd = open("/dev/vhost-net", O_RDWR);
>>>> 	char *tap_name = "tap";
>>>> 	char cmd[2048];
>>>> 	char brctl[256];
>>>> 	char netup[256];
>>>> 	struct ifreq ifr;
>>>> 	if (tap_fd < 0) {
>>>> 		printf("open tun device failed\n");
>>>> 		return -1;
>>>> 	}
>>>> 	if (vhost_fd < 0) {
>>>> 		printf("open vhost-net device failed\n");
>>>> 		return -1;
>>>> 	}
>>>> 	memset(&ifr, 0, sizeof(ifr));
>>>> 	memcpy(ifr.ifr_name, tap_name, sizeof(tap_name));
>>>> 	ifr.ifr_flags = IFF_TAP | IFF_NO_PI;
>>>>
>>>> 	/*
>>>> 	 * setup tap net device
>>>> 	 */
>>>> 	if (ioctl(tap_fd, TUNSETIFF, &ifr) < 0) {
>>>> 		printf("setup tap net device failed\n");
>>>> 		return -1;
>>>> 	}
>>>>
>>>> 	sprintf(brctl, "brctl addif virbr0 %s", tap_name);
>>>> 	sprintf(netup, "ifconfig %s up", tap_name);
>>>> 	system(brctl);
>>>> 	system(netup);
>>>>
>>>> Thanks,
>>>> Wanlong Gao
>>>>
>>>>
>>>>> Thanks
>>>>>> Thanks,
>>>>>> Wanlong Gao
>>>>>>
>>>>>>>>> +        }
>>>>>>>>> +    }
>>>>>>>>> +}
>>>>>>>>> +
>>>>>>>>> +static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue, int ctrl);
>>>>>>>>> +
>>>>>>>> --
>>>>>>>> To unsubscribe from this list: send the line "unsubscribe kvm" in
>>>>>>>> the body of a message to majordomo@vger.kernel.org
>>>>>>>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>>>
>> --
>> To unsubscribe from this list: send the line "unsubscribe kvm" in
>> the body of a message to majordomo@vger.kernel.org
>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> 
>
Jason Wang Jan. 9, 2013, 3:26 p.m. UTC | #16
On 01/09/2013 06:01 PM, Wanlong Gao wrote:
> On 01/09/2013 05:30 PM, Jason Wang wrote:
>> On 01/09/2013 04:23 PM, Wanlong Gao wrote:
>>> On 01/08/2013 06:14 PM, Jason Wang wrote:
>>>> On 01/08/2013 06:00 PM, Wanlong Gao wrote:
>>>>> On 01/08/2013 05:51 PM, Jason Wang wrote:
>>>>>> On 01/08/2013 05:49 PM, Wanlong Gao wrote:
>>>>>>> On 01/08/2013 05:29 PM, Jason Wang wrote:
>>>>>>>> On 01/08/2013 05:07 PM, Wanlong Gao wrote:
>>>>>>>>> On 12/28/2012 06:32 PM, Jason Wang wrote:
>>>>>>>>>> +    } else if (nc->peer->info->type !=  NET_CLIENT_OPTIONS_KIND_TAP) {
>>>>>>>>>> +        ret = -1;
>>>>>>>>>> +    } else {
>>>>>>>>>> +        ret = tap_detach(nc->peer);
>>>>>>>>>> +    }
>>>>>>>>>> +
>>>>>>>>>> +    return ret;
>>>>>>>>>> +}
>>>>>>>>>> +
>>>>>>>>>> +static void virtio_net_set_queues(VirtIONet *n)
>>>>>>>>>> +{
>>>>>>>>>> +    int i;
>>>>>>>>>> +
>>>>>>>>>> +    for (i = 0; i < n->max_queues; i++) {
>>>>>>>>>> +        if (i < n->curr_queues) {
>>>>>>>>>> +            assert(!peer_attach(n, i));
>>>>>>>>>> +        } else {
>>>>>>>>>> +            assert(!peer_detach(n, i));
>>>>>>>>> I got a assert here,
>>>>>>>>> qemu-system-x86_64: /work/git/qemu/hw/virtio-net.c:330: virtio_net_set_queues: Assertion `!peer_detach(n, i)' failed.
>>>>>>>>>
>>>>>>>>> Any thoughts?
>>>>>>>>>
>>>>>>>>> Thanks,
>>>>>>>>> Wanlong Gao
>>>>>>>> Thanks for the testing, which steps or cases did you met this assertion,
>>>>>>>> migration, reboot or just changing the number of virtqueues?
>>>>>>> I use the 3.8-rc2 to test it again, I saw this tag has the multi-tap support.
>>>>>>>
>>>>>>> I just can't start the QEMU use  -netdev tap,id=hostnet0,queues=2,fd=%d,fd=%d -device virtio-net-pci,netdev=hostnet0,id=net0,mac=52:54:00:ce:7b:29,bus=pci.0,addr=0x3
>>>>>>>
>>>>>>> I pre-opened two tap fds, did I missing something?
>>>>>> Nothing missed :) It should work.
>>>>>>
>>>>>> Could you please try not use fd=X and let qemu to create the file
>>>>>> descriptors by itself? Btw, how did you create the two tap fds?
>>>>> Can it create descriptors itself? I get 
>>>>> qemu-system-x86_64: -netdev tap,id=hostnet0,queues=2: Device 'tap' could not be initialized
>>>> You need prepare an ifup script which default at /etc/qemu-ifup (like
>>>> following). Or you may try to add a script=no after:
>>>>
>>>> #!/bin/sh
>>>>
>>>> switch=kvmbr0
>>>>
>>>> /sbin/ifconfig $1 0.0.0.0 up
>>>> /usr/sbin/brctl addif $switch $1
>>>> /usr/sbin/brctl stp $switch off
>>>>
>>>> This will let qemu create a tap fd itself and make it to be connected to
>>>> a port of the bridge caled kvmbr0.
>>> But how to support multi-queue in this way?
>> Qemu will create the necessary multiqueue tap by itself, see patch 0/12.
>>> I got guest kernel panic when using this way and set queues=4.
>> Does it happens w/o or w/ a fd parameter? What's the qemu command line?
>> Did you meet it during boot time?
> The QEMU command line is 
>
> /work/git/qemu/x86_64-softmmu/qemu-system-x86_64 -name f17 -M pc-0.15 -enable-kvm -m 3096 \
> -smp 4,sockets=4,cores=1,threads=1 \
> -uuid c31a9f3e-4161-c53a-339c-5dc36d0497cb -no-user-config -nodefaults \
> -chardev socket,id=charmonitor,path=/var/lib/libvirt/qemu/f17.monitor,server,nowait \
> -mon chardev=charmonitor,id=monitor,mode=control \
> -rtc base=utc -no-shutdown \
> -device piix3-usb-uhci,id=usb,bus=pci.0,addr=0x1.0x2 \
> -device virtio-scsi-pci,id=scsi0,bus=pci.0,addr=0xb,num_queues=4,hotplug=on \
> -device virtio-serial-pci,id=virtio-serial0,bus=pci.0,addr=0x5 \
> -drive file=/vm/f17.img,if=none,id=drive-virtio-disk0,format=qcow2 \
> -device virtio-blk-pci,scsi=off,bus=pci.0,addr=0x6,drive=drive-virtio-disk0,id=virtio-disk0,bootindex=1 \
> -drive file=/vm2/f17-kernel.img,if=none,id=drive-virtio-disk1,format=qcow2 \
> -device virtio-blk-pci,scsi=off,bus=pci.0,addr=0x8,drive=drive-virtio-disk1,id=virtio-disk1 \
> -drive file=/vm/virtio-scsi/scsi3.img,if=none,id=drive-scsi0-0-2-0,format=raw \
> -device scsi-hd,bus=scsi0.0,channel=0,scsi-id=0,lun=2,drive=drive-scsi0-0-2-0,id=scsi0-0-2-0,removable=on \
> -drive file=/vm/virtio-scsi/scsi4.img,if=none,id=drive-scsi0-0-3-0,format=raw \
> -device scsi-hd,bus=scsi0.0,channel=0,scsi-id=0,lun=3,drive=drive-scsi0-0-3-0,id=scsi0-0-3-0 \
> -drive file=/vm/virtio-scsi/scsi1.img,if=none,id=drive-scsi0-0-0-0,format=raw \
> -device scsi-hd,bus=scsi0.0,channel=0,scsi-id=0,lun=0,drive=drive-scsi0-0-0-0,id=scsi0-0-0-0 \
> -drive file=/vm/virtio-scsi/scsi2.img,if=none,id=drive-scsi0-0-1-0,format=raw \
> -device scsi-hd,bus=scsi0.0,channel=0,scsi-id=0,lun=1,drive=drive-scsi0-0-1-0,id=scsi0-0-1-0 \
> -chardev pty,id=charserial0 -device isa-serial,chardev=charserial0,id=serial0 \
> -chardev file,id=charserial1,path=/vm/f17.log \
> -device isa-serial,chardev=charserial1,id=serial1 \
> -device usb-tablet,id=input0 -vga std \
> -device virtio-balloon-pci,id=balloon0,bus=pci.0,addr=0x7 \
> -netdev tap,id=hostnet0,vhost=on,queues=4 \
> -device virtio-net-pci,netdev=hostnet0,id=net0,mac=52:54:00:ce:7b:29,bus=pci.0,addr=0x3 \
> -monitor stdio
>
> I got panic just after booting the system, did nothing,  waited for a while, the guest panicked.
>
> [   28.053004] BUG: soft lockup - CPU#1 stuck for 23s! [ip:592]
> [   28.053004] Modules linked in: ip6t_REJECT nf_conntrack_ipv6 nf_defrag_ipv6 xt_state nf_conntrack ip6table_filter ip6_tables uinput joydev microcode virtio_balloon pcspkr virtio_net i2c_piix4 i2c_core virtio_scsi virtio_blk floppy
> [   28.053004] CPU 1 
> [   28.053004] Pid: 592, comm: ip Not tainted 3.8.0-rc1-net+ #3 Bochs Bochs
> [   28.053004] RIP: 0010:[<ffffffff8137a9ab>]  [<ffffffff8137a9ab>] virtqueue_get_buf+0xb/0x120
> [   28.053004] RSP: 0018:ffff8800bc913550  EFLAGS: 00000246
> [   28.053004] RAX: 0000000000000000 RBX: ffff8800bc49c000 RCX: ffff8800bc49e000
> [   28.053004] RDX: 0000000000000000 RSI: ffff8800bc913584 RDI: ffff8800bcfd4000
> [   28.053004] RBP: ffff8800bc913558 R08: ffff8800bcfd0800 R09: 0000000000000000
> [   28.053004] R10: ffff8800bc49c000 R11: ffff880036cc4de0 R12: ffff8800bcfd4000
> [   28.053004] R13: ffff8800bc913558 R14: ffffffff8137ad73 R15: 00000000000200d0
> [   28.053004] FS:  00007fb27a589740(0000) GS:ffff8800c1480000(0000) knlGS:0000000000000000
> [   28.053004] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> [   28.053004] CR2: 0000000000640530 CR3: 00000000baeff000 CR4: 00000000000006e0
> [   28.053004] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
> [   28.053004] DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
> [   28.053004] Process ip (pid: 592, threadinfo ffff8800bc912000, task ffff880036da2e20)
> [   28.053004] Stack:
> [   28.053004]  ffff8800bcfd0800 ffff8800bc913638 ffffffffa003e9bb ffff8800bc913656
> [   28.053004]  0000000100000002 ffff8800c17ebb08 000000500000ff10 ffffea0002f244c0
> [   28.053004]  0000000200000582 0000000000000000 0000000000000000 ffffea0002f244c0
> [   28.053004] Call Trace:
> [   28.053004]  [<ffffffffa003e9bb>] virtnet_send_command.constprop.26+0x24b/0x270 [virtio_net]
> [   28.053004]  [<ffffffff812ed963>] ? sg_init_table+0x23/0x50
> [   28.053004]  [<ffffffffa0040629>] virtnet_set_rx_mode+0x99/0x300 [virtio_net]
> [   28.053004]  [<ffffffff8152306f>] __dev_set_rx_mode+0x5f/0xb0
> [   28.053004]  [<ffffffff815230ef>] dev_set_rx_mode+0x2f/0x50
> [   28.053004]  [<ffffffff815231b7>] __dev_open+0xa7/0xf0
> [   28.053004]  [<ffffffff81523461>] __dev_change_flags+0xa1/0x180
> [   28.053004]  [<ffffffff815235f8>] dev_change_flags+0x28/0x70
> [   28.053004]  [<ffffffff8152ff20>] do_setlink+0x3b0/0xa50
> [   28.053004]  [<ffffffff812fb6b1>] ? nla_parse+0x31/0xe0
> [   28.053004]  [<ffffffff815325de>] rtnl_newlink+0x36e/0x580
> [   28.053004]  [<ffffffff811355cc>] ? get_page_from_freelist+0x37c/0x730
> [   28.053004]  [<ffffffff81531e13>] rtnetlink_rcv_msg+0x113/0x2f0
> [   28.053004]  [<ffffffff8117d973>] ? __kmalloc_node_track_caller+0x63/0x1c0
> [   28.053004]  [<ffffffff8151526b>] ? __alloc_skb+0x8b/0x2a0
> [   28.053004]  [<ffffffff81531d00>] ? __rtnl_unlock+0x20/0x20
> [   28.053004]  [<ffffffff8154b571>] netlink_rcv_skb+0xb1/0xc0
> [   28.053004]  [<ffffffff8152ea05>] rtnetlink_rcv+0x25/0x40
> [   28.053004]  [<ffffffff8154ae91>] netlink_unicast+0x1a1/0x220
> [   28.053004]  [<ffffffff8154b211>] netlink_sendmsg+0x301/0x3c0
> [   28.053004]  [<ffffffff81508530>] sock_sendmsg+0xb0/0xe0
> [   28.053004]  [<ffffffff8113a45b>] ? lru_cache_add_lru+0x3b/0x60
> [   28.053004]  [<ffffffff811608b7>] ? page_add_new_anon_rmap+0xc7/0x180
> [   28.053004]  [<ffffffff81509efc>] __sys_sendmsg+0x3ac/0x3c0
> [   28.053004]  [<ffffffff8162e47c>] ? __do_page_fault+0x23c/0x4d0
> [   28.053004]  [<ffffffff8115c9ef>] ? do_brk+0x1ff/0x370
> [   28.053004]  [<ffffffff8150bec9>] sys_sendmsg+0x49/0x90
> [   28.053004]  [<ffffffff81632d59>] system_call_fastpath+0x16/0x1b
> [   28.053004] Code: 04 0f ae f0 48 8b 47 50 5d 0f b7 50 02 66 39 57 64 0f 94 c0 c3 66 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 55 48 89 e5 41 54 <53> 80 7f 59 00 48 89 fb 0f 85 90 00 00 00 48 8b 47 50 0f b7 50 
>
>
> The QEMU tree I used is git://github.com/jasowang/qemu.git

Thanks a lot, will try to reproduce my self tomorrow. From the
calltrace, looks like we send a command to a rx/tx queue.
> Thanks,
> Wanlong Gao
>
>> Thanks
>>> Thanks,
>>> Wanlong Gao
>>>
>>>>> I create the tap fd like this, and dup create the second fd, third fd, right?
>>>> The second and third fd should be created with TUNSETIFF with the same
>>>> tap_name also. Btw, you need to specify a IFF_MULTI_QUEUE flag to tell
>>>> the kernel you want to create a multiqueue tap device, otherwise the
>>>> second and third calling of TUNSETIFF will fail.
>>>>
>>>> Thanks
>>>>> 	int tap_fd = open("/dev/net/tun", O_RDWR);
>>>>> 	int vhost_fd = open("/dev/vhost-net", O_RDWR);
>>>>> 	char *tap_name = "tap";
>>>>> 	char cmd[2048];
>>>>> 	char brctl[256];
>>>>> 	char netup[256];
>>>>> 	struct ifreq ifr;
>>>>> 	if (tap_fd < 0) {
>>>>> 		printf("open tun device failed\n");
>>>>> 		return -1;
>>>>> 	}
>>>>> 	if (vhost_fd < 0) {
>>>>> 		printf("open vhost-net device failed\n");
>>>>> 		return -1;
>>>>> 	}
>>>>> 	memset(&ifr, 0, sizeof(ifr));
>>>>> 	memcpy(ifr.ifr_name, tap_name, sizeof(tap_name));
>>>>> 	ifr.ifr_flags = IFF_TAP | IFF_NO_PI;
>>>>>
>>>>> 	/*
>>>>> 	 * setup tap net device
>>>>> 	 */
>>>>> 	if (ioctl(tap_fd, TUNSETIFF, &ifr) < 0) {
>>>>> 		printf("setup tap net device failed\n");
>>>>> 		return -1;
>>>>> 	}
>>>>>
>>>>> 	sprintf(brctl, "brctl addif virbr0 %s", tap_name);
>>>>> 	sprintf(netup, "ifconfig %s up", tap_name);
>>>>> 	system(brctl);
>>>>> 	system(netup);
>>>>>
>>>>> Thanks,
>>>>> Wanlong Gao
>>>>>
>>>>>
>>>>>> Thanks
>>>>>>> Thanks,
>>>>>>> Wanlong Gao
>>>>>>>
>>>>>>>>>> +        }
>>>>>>>>>> +    }
>>>>>>>>>> +}
>>>>>>>>>> +
>>>>>>>>>> +static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue, int ctrl);
>>>>>>>>>> +
>>>>>>>>> --
>>>>>>>>> To unsubscribe from this list: send the line "unsubscribe kvm" in
>>>>>>>>> the body of a message to majordomo@vger.kernel.org
>>>>>>>>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>>> --
>>> To unsubscribe from this list: send the line "unsubscribe kvm" in
>>> the body of a message to majordomo@vger.kernel.org
>>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>>
diff mbox

Patch

diff --git a/hw/virtio-net.c b/hw/virtio-net.c
index c6f0915..aaeef1b 100644
--- a/hw/virtio-net.c
+++ b/hw/virtio-net.c
@@ -45,7 +45,7 @@  typedef struct VirtIONet
     VirtIODevice vdev;
     uint8_t mac[ETH_ALEN];
     uint16_t status;
-    VirtIONetQueue vq;
+    VirtIONetQueue vqs[MAX_QUEUE_NUM];
     VirtQueue *ctrl_vq;
     NICState *nic;
     uint32_t tx_timeout;
@@ -70,14 +70,23 @@  typedef struct VirtIONet
     } mac_table;
     uint32_t *vlans;
     DeviceState *qdev;
+    int multiqueue;
+    uint16_t max_queues;
+    uint16_t curr_queues;
 } VirtIONet;
 
-static VirtIONetQueue *virtio_net_get_queue(NetClientState *nc)
+static VirtIONetQueue *virtio_net_get_subqueue(NetClientState *nc)
 {
     VirtIONet *n = qemu_get_nic_opaque(nc);
 
-    return &n->vq;
+    return &n->vqs[nc->queue_index];
 }
+
+static int vq2q(int queue_index)
+{
+    return queue_index / 2;
+}
+
 /* TODO
  * - we could suppress RX interrupt if we were so inclined.
  */
@@ -93,6 +102,7 @@  static void virtio_net_get_config(VirtIODevice *vdev, uint8_t *config)
     struct virtio_net_config netcfg;
 
     stw_p(&netcfg.status, n->status);
+    stw_p(&netcfg.max_virtqueue_pairs, n->max_queues);
     memcpy(netcfg.mac, n->mac, ETH_ALEN);
     memcpy(config, &netcfg, sizeof(netcfg));
 }
@@ -116,31 +126,33 @@  static bool virtio_net_started(VirtIONet *n, uint8_t status)
         (n->status & VIRTIO_NET_S_LINK_UP) && n->vdev.vm_running;
 }
 
-static void virtio_net_vhost_status(VirtIONet *n, uint8_t status)
+static void virtio_net_vhost_status(VirtIONet *n, int queue_index,
+                                    uint8_t status)
 {
-    VirtIONetQueue *q = &n->vq;
+    NetClientState *nc = qemu_get_subqueue(n->nic, queue_index);
+    VirtIONetQueue *q = &n->vqs[queue_index];
 
-    if (!qemu_get_queue(n->nic)->peer) {
+    if (!nc->peer) {
         return;
     }
-    if (qemu_get_queue(n->nic)->peer->info->type != NET_CLIENT_OPTIONS_KIND_TAP) {
+    if (nc->peer->info->type != NET_CLIENT_OPTIONS_KIND_TAP) {
         return;
     }
 
-    if (!tap_get_vhost_net(qemu_get_queue(n->nic)->peer)) {
+    if (!tap_get_vhost_net(nc->peer)) {
         return;
     }
-    if (!!q->vhost_started == virtio_net_started(n, status) &&
-                              !qemu_get_queue(n->nic)->peer->link_down) {
+    if (!!q->vhost_started ==
+        (virtio_net_started(n, status) && !nc->peer->link_down)) {
         return;
     }
     if (!q->vhost_started) {
         int r;
-        if (!vhost_net_query(tap_get_vhost_net(qemu_get_queue(n->nic)->peer), &n->vdev)) {
+        if (!vhost_net_query(tap_get_vhost_net(nc->peer), &n->vdev)) {
             return;
         }
-        r = vhost_net_start(tap_get_vhost_net(qemu_get_queue(n->nic)->peer),
-                            &n->vdev, 0);
+        r = vhost_net_start(tap_get_vhost_net(nc->peer), &n->vdev,
+                            queue_index * 2);
         if (r < 0) {
             error_report("unable to start vhost net: %d: "
                          "falling back on userspace virtio", -r);
@@ -148,7 +160,7 @@  static void virtio_net_vhost_status(VirtIONet *n, uint8_t status)
             q->vhost_started = 1;
         }
     } else {
-        vhost_net_stop(tap_get_vhost_net(qemu_get_queue(n->nic)->peer), &n->vdev);
+        vhost_net_stop(tap_get_vhost_net(nc->peer), &n->vdev);
         q->vhost_started = 0;
     }
 }
@@ -156,26 +168,35 @@  static void virtio_net_vhost_status(VirtIONet *n, uint8_t status)
 static void virtio_net_set_status(struct VirtIODevice *vdev, uint8_t status)
 {
     VirtIONet *n = to_virtio_net(vdev);
-    VirtIONetQueue *q = &n->vq;
+    int i;
 
-    virtio_net_vhost_status(n, status);
+    for (i = 0; i < n->max_queues; i++) {
+        VirtIONetQueue *q = &n->vqs[i];
+        uint8_t queue_status = status;
 
-    if (!q->tx_waiting) {
-        return;
-    }
+        if ((!n->multiqueue && i != 0) || i >= n->curr_queues) {
+            queue_status = 0;
+        }
 
-    if (virtio_net_started(n, status) && !q->vhost_started) {
-        if (q->tx_timer) {
-            qemu_mod_timer(q->tx_timer,
-                           qemu_get_clock_ns(vm_clock) + n->tx_timeout);
-        } else {
-            qemu_bh_schedule(q->tx_bh);
+        virtio_net_vhost_status(n, i, queue_status);
+
+        if (!q->tx_waiting) {
+            continue;
         }
-    } else {
-        if (q->tx_timer) {
-            qemu_del_timer(q->tx_timer);
+
+        if (virtio_net_started(n, status) && !q->vhost_started) {
+            if (q->tx_timer) {
+                qemu_mod_timer(q->tx_timer,
+                               qemu_get_clock_ns(vm_clock) + n->tx_timeout);
+            } else {
+                qemu_bh_schedule(q->tx_bh);
+            }
         } else {
-            qemu_bh_cancel(q->tx_bh);
+            if (q->tx_timer) {
+                qemu_del_timer(q->tx_timer);
+            } else {
+                qemu_bh_cancel(q->tx_bh);
+            }
         }
     }
 }
@@ -207,6 +228,8 @@  static void virtio_net_reset(VirtIODevice *vdev)
     n->nomulti = 0;
     n->nouni = 0;
     n->nobcast = 0;
+    /* multiqueue is disalbed by default */
+    n->curr_queues = 1;
 
     /* Flush any MAC and VLAN filter table state */
     n->mac_table.in_use = 0;
@@ -245,18 +268,72 @@  static int peer_has_ufo(VirtIONet *n)
 
 static void virtio_net_set_mrg_rx_bufs(VirtIONet *n, int mergeable_rx_bufs)
 {
+    int i;
+    NetClientState *nc;
+
     n->mergeable_rx_bufs = mergeable_rx_bufs;
 
     n->guest_hdr_len = n->mergeable_rx_bufs ?
         sizeof(struct virtio_net_hdr_mrg_rxbuf) : sizeof(struct virtio_net_hdr);
 
-    if (peer_has_vnet_hdr(n) &&
-        tap_has_vnet_hdr_len(qemu_get_queue(n->nic)->peer, n->guest_hdr_len)) {
-        tap_set_vnet_hdr_len(qemu_get_queue(n->nic)->peer, n->guest_hdr_len);
-        n->host_hdr_len = n->guest_hdr_len;
+    for (i = 0; i < n->max_queues; i++) {
+        nc = qemu_get_subqueue(n->nic, i);
+
+        if (peer_has_vnet_hdr(n) &&
+            tap_has_vnet_hdr_len(nc->peer, n->guest_hdr_len)) {
+            tap_set_vnet_hdr_len(nc->peer, n->guest_hdr_len);
+            n->host_hdr_len = n->guest_hdr_len;
+        }
     }
 }
 
+static int peer_attach(VirtIONet *n, int index)
+{
+    NetClientState *nc = qemu_get_subqueue(n->nic, index);
+    int ret;
+
+    if (!nc->peer) {
+        ret = -1;
+    } else if (nc->peer->info->type != NET_CLIENT_OPTIONS_KIND_TAP) {
+        ret = -1;
+    } else {
+        ret = tap_attach(nc->peer);
+    }
+
+    return ret;
+}
+
+static int peer_detach(VirtIONet *n, int index)
+{
+    NetClientState *nc = qemu_get_subqueue(n->nic, index);
+    int ret;
+
+    if (!nc->peer) {
+        ret = -1;
+    } else if (nc->peer->info->type !=  NET_CLIENT_OPTIONS_KIND_TAP) {
+        ret = -1;
+    } else {
+        ret = tap_detach(nc->peer);
+    }
+
+    return ret;
+}
+
+static void virtio_net_set_queues(VirtIONet *n)
+{
+    int i;
+
+    for (i = 0; i < n->max_queues; i++) {
+        if (i < n->curr_queues) {
+            assert(!peer_attach(n, i));
+        } else {
+            assert(!peer_detach(n, i));
+        }
+    }
+}
+
+static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue, int ctrl);
+
 static uint32_t virtio_net_get_features(VirtIODevice *vdev, uint32_t features)
 {
     VirtIONet *n = to_virtio_net(vdev);
@@ -308,25 +385,33 @@  static uint32_t virtio_net_bad_features(VirtIODevice *vdev)
 static void virtio_net_set_features(VirtIODevice *vdev, uint32_t features)
 {
     VirtIONet *n = to_virtio_net(vdev);
+    int i;
+
+    virtio_net_set_multiqueue(n, !!(features & (1 << VIRTIO_NET_F_MQ)),
+                              !!(features & (1 << VIRTIO_NET_F_CTRL_VQ)));
 
     virtio_net_set_mrg_rx_bufs(n, !!(features & (1 << VIRTIO_NET_F_MRG_RXBUF)));
 
     if (n->has_vnet_hdr) {
-        tap_set_offload(qemu_get_queue(n->nic)->peer,
+        tap_set_offload(qemu_get_subqueue(n->nic, 0)->peer,
                         (features >> VIRTIO_NET_F_GUEST_CSUM) & 1,
                         (features >> VIRTIO_NET_F_GUEST_TSO4) & 1,
                         (features >> VIRTIO_NET_F_GUEST_TSO6) & 1,
                         (features >> VIRTIO_NET_F_GUEST_ECN)  & 1,
                         (features >> VIRTIO_NET_F_GUEST_UFO)  & 1);
     }
-    if (!qemu_get_queue(n->nic)->peer ||
-        qemu_get_queue(n->nic)->peer->info->type != NET_CLIENT_OPTIONS_KIND_TAP) {
-        return;
-    }
-    if (!tap_get_vhost_net(qemu_get_queue(n->nic)->peer)) {
-        return;
+
+    for (i = 0;  i < n->max_queues; i++) {
+        NetClientState *nc = qemu_get_subqueue(n->nic, i);
+
+        if (!nc->peer || nc->peer->info->type != NET_CLIENT_OPTIONS_KIND_TAP) {
+            continue;
+        }
+        if (!tap_get_vhost_net(nc->peer)) {
+            continue;
+        }
+        vhost_net_ack_features(tap_get_vhost_net(nc->peer), features);
     }
-    vhost_net_ack_features(tap_get_vhost_net(qemu_get_queue(n->nic)->peer), features);
 }
 
 static int virtio_net_handle_rx_mode(VirtIONet *n, uint8_t cmd,
@@ -436,6 +521,35 @@  static int virtio_net_handle_vlan_table(VirtIONet *n, uint8_t cmd,
     return VIRTIO_NET_OK;
 }
 
+static int virtio_net_handle_mq(VirtIONet *n, uint8_t cmd,
+                                VirtQueueElement *elem)
+{
+    struct virtio_net_ctrl_mq s;
+
+    if (elem->out_num != 2 ||
+        elem->out_sg[1].iov_len != sizeof(struct virtio_net_ctrl_mq)) {
+        error_report("virtio-net ctrl invalid steering command");
+        return VIRTIO_NET_ERR;
+    }
+
+    if (cmd != VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET) {
+        return VIRTIO_NET_ERR;
+    }
+
+    memcpy(&s, elem->out_sg[1].iov_base, sizeof(struct virtio_net_ctrl_mq));
+
+    if (s.virtqueue_pairs < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN ||
+        s.virtqueue_pairs > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX ||
+        s.virtqueue_pairs > n->max_queues) {
+        return VIRTIO_NET_ERR;
+    }
+
+    n->curr_queues = s.virtqueue_pairs;
+    virtio_net_set_queues(n);
+    virtio_net_set_status(&n->vdev, n->vdev.status);
+
+    return VIRTIO_NET_OK;
+}
 static void virtio_net_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq)
 {
     VirtIONet *n = to_virtio_net(vdev);
@@ -464,6 +578,8 @@  static void virtio_net_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq)
             status = virtio_net_handle_mac(n, ctrl.cmd, &elem);
         else if (ctrl.class == VIRTIO_NET_CTRL_VLAN)
             status = virtio_net_handle_vlan_table(n, ctrl.cmd, &elem);
+        else if (ctrl.class == VIRTIO_NET_CTRL_MQ)
+            status = virtio_net_handle_mq(n, ctrl.cmd, &elem);
 
         stb_p(elem.in_sg[elem.in_num - 1].iov_base, status);
 
@@ -477,19 +593,24 @@  static void virtio_net_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq)
 static void virtio_net_handle_rx(VirtIODevice *vdev, VirtQueue *vq)
 {
     VirtIONet *n = to_virtio_net(vdev);
+    int queue_index = vq2q(virtio_get_queue_index(vq));
 
-    qemu_flush_queued_packets(qemu_get_queue(n->nic));
+    qemu_flush_queued_packets(qemu_get_subqueue(n->nic, queue_index));
 }
 
 static int virtio_net_can_receive(NetClientState *nc)
 {
     VirtIONet *n = qemu_get_nic_opaque(nc);
-    VirtIONetQueue *q = virtio_net_get_queue(nc);
+    VirtIONetQueue *q = virtio_net_get_subqueue(nc);
 
     if (!n->vdev.vm_running) {
         return 0;
     }
 
+    if (nc->queue_index >= n->curr_queues) {
+        return 0;
+    }
+
     if (!virtio_queue_ready(q->rx_vq) ||
         !(n->vdev.status & VIRTIO_CONFIG_S_DRIVER_OK)) {
         return 0;
@@ -620,14 +741,15 @@  static int receive_filter(VirtIONet *n, const uint8_t *buf, int size)
 static ssize_t virtio_net_receive(NetClientState *nc, const uint8_t *buf, size_t size)
 {
     VirtIONet *n = qemu_get_nic_opaque(nc);
-    VirtIONetQueue *q = virtio_net_get_queue(nc);
+    VirtIONetQueue *q = virtio_net_get_subqueue(nc);
     struct iovec mhdr_sg[VIRTQUEUE_MAX_SIZE];
     struct virtio_net_hdr_mrg_rxbuf mhdr;
     unsigned mhdr_cnt = 0;
     size_t offset, i, guest_offset;
 
-    if (!virtio_net_can_receive(qemu_get_queue(n->nic)))
+    if (!virtio_net_can_receive(nc)) {
         return -1;
+    }
 
     /* hdr_len refers to the header we supply to the guest */
     if (!virtio_net_has_buffers(q, size + n->guest_hdr_len - n->host_hdr_len)) {
@@ -720,7 +842,7 @@  static int32_t virtio_net_flush_tx(VirtIONetQueue *q);
 static void virtio_net_tx_complete(NetClientState *nc, ssize_t len)
 {
     VirtIONet *n = qemu_get_nic_opaque(nc);
-    VirtIONetQueue *q = virtio_net_get_queue(nc);
+    VirtIONetQueue *q = virtio_net_get_subqueue(nc);
 
     virtqueue_push(q->tx_vq, &q->async_tx.elem, 0);
     virtio_notify(&n->vdev, q->tx_vq);
@@ -737,6 +859,7 @@  static int32_t virtio_net_flush_tx(VirtIONetQueue *q)
     VirtIONet *n = q->n;
     VirtQueueElement elem;
     int32_t num_packets = 0;
+    int queue_index = vq2q(virtio_get_queue_index(q->tx_vq));
     if (!(n->vdev.status & VIRTIO_CONFIG_S_DRIVER_OK)) {
         return num_packets;
     }
@@ -778,8 +901,8 @@  static int32_t virtio_net_flush_tx(VirtIONetQueue *q)
 
         len = n->guest_hdr_len;
 
-        ret = qemu_sendv_packet_async(qemu_get_queue(n->nic), out_sg, out_num,
-                                      virtio_net_tx_complete);
+        ret = qemu_sendv_packet_async(qemu_get_subqueue(n->nic, queue_index),
+                                      out_sg, out_num, virtio_net_tx_complete);
         if (ret == 0) {
             virtio_queue_set_notification(q->tx_vq, 0);
             q->async_tx.elem = elem;
@@ -802,7 +925,7 @@  static int32_t virtio_net_flush_tx(VirtIONetQueue *q)
 static void virtio_net_handle_tx_timer(VirtIODevice *vdev, VirtQueue *vq)
 {
     VirtIONet *n = to_virtio_net(vdev);
-    VirtIONetQueue *q = &n->vq;
+    VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))];
 
     /* This happens when device was stopped but VCPU wasn't. */
     if (!n->vdev.vm_running) {
@@ -826,7 +949,7 @@  static void virtio_net_handle_tx_timer(VirtIODevice *vdev, VirtQueue *vq)
 static void virtio_net_handle_tx_bh(VirtIODevice *vdev, VirtQueue *vq)
 {
     VirtIONet *n = to_virtio_net(vdev);
-    VirtIONetQueue *q = &n->vq;
+    VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))];
 
     if (unlikely(q->tx_waiting)) {
         return;
@@ -894,10 +1017,49 @@  static void virtio_net_tx_bh(void *opaque)
     }
 }
 
+static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue, int ctrl)
+{
+    VirtIODevice *vdev = &n->vdev;
+    int i;
+
+    n->multiqueue = multiqueue;
+
+    if (!multiqueue)
+        n->curr_queues = 1;
+
+    for (i = 2; i <= n->max_queues * 2 + 1; i++) {
+        virtio_del_queue(vdev, i);
+    }
+
+    for (i = 1; i < n->max_queues; i++) {
+        n->vqs[i].rx_vq = virtio_add_queue(vdev, 256, virtio_net_handle_rx);
+        if (n->vqs[i].tx_timer) {
+            n->vqs[i].tx_vq =
+                virtio_add_queue(vdev, 256, virtio_net_handle_tx_timer);
+            n->vqs[i].tx_timer = qemu_new_timer_ns(vm_clock,
+                                                   virtio_net_tx_timer,
+                                                   &n->vqs[i]);
+        } else {
+            n->vqs[i].tx_vq =
+                virtio_add_queue(vdev, 256, virtio_net_handle_tx_bh);
+            n->vqs[i].tx_bh = qemu_bh_new(virtio_net_tx_bh, &n->vqs[i]);
+        }
+
+        n->vqs[i].tx_waiting = 0;
+        n->vqs[i].n = n;
+    }
+
+    if (ctrl) {
+        n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl);
+    }
+
+    virtio_net_set_queues(n);
+}
+
 static void virtio_net_save(QEMUFile *f, void *opaque)
 {
     VirtIONet *n = opaque;
-    VirtIONetQueue *q = &n->vq;
+    VirtIONetQueue *q = &n->vqs[0];
 
     /* At this point, backend must be stopped, otherwise
      * it might keep writing to memory. */
@@ -926,9 +1088,8 @@  static void virtio_net_save(QEMUFile *f, void *opaque)
 static int virtio_net_load(QEMUFile *f, void *opaque, int version_id)
 {
     VirtIONet *n = opaque;
-    VirtIONetQueue *q = &n->vq;
-    int i;
-    int ret;
+    VirtIONetQueue *q = &n->vqs[0];
+    int ret, i;
 
     if (version_id < 2 || version_id > VIRTIO_NET_VM_VERSION)
         return -EINVAL;
@@ -1044,6 +1205,7 @@  VirtIODevice *virtio_net_init(DeviceState *dev, NICConf *conf,
                               virtio_net_conf *net)
 {
     VirtIONet *n;
+    int i;
 
     n = (VirtIONet *)virtio_common_init("virtio-net", VIRTIO_ID_NET,
                                         sizeof(struct virtio_net_config),
@@ -1056,8 +1218,11 @@  VirtIODevice *virtio_net_init(DeviceState *dev, NICConf *conf,
     n->vdev.bad_features = virtio_net_bad_features;
     n->vdev.reset = virtio_net_reset;
     n->vdev.set_status = virtio_net_set_status;
-    n->vq.rx_vq = virtio_add_queue(&n->vdev, 256, virtio_net_handle_rx);
-    n->vq.n = n;
+    n->vqs[0].rx_vq = virtio_add_queue(&n->vdev, 256, virtio_net_handle_rx);
+    n->max_queues = conf->queues;
+    n->curr_queues = 1;
+    n->vqs[0].n = n;
+    n->tx_timeout = net->txtimer;
 
     if (net->tx && strcmp(net->tx, "timer") && strcmp(net->tx, "bh")) {
         error_report("virtio-net: "
@@ -1067,14 +1232,14 @@  VirtIODevice *virtio_net_init(DeviceState *dev, NICConf *conf,
     }
 
     if (net->tx && !strcmp(net->tx, "timer")) {
-        n->vq.tx_vq = virtio_add_queue(&n->vdev, 256,
-                                       virtio_net_handle_tx_timer);
-        n->vq.tx_timer = qemu_new_timer_ns(vm_clock,
-                                           virtio_net_tx_timer, &n->vq);
-        n->tx_timeout = net->txtimer;
+        n->vqs[0].tx_vq = virtio_add_queue(&n->vdev, 256,
+                                           virtio_net_handle_tx_timer);
+        n->vqs[0].tx_timer = qemu_new_timer_ns(vm_clock, virtio_net_tx_timer,
+                                               &n->vqs[0]);
     } else {
-        n->vq.tx_vq = virtio_add_queue(&n->vdev, 256, virtio_net_handle_tx_bh);
-        n->vq.tx_bh = qemu_bh_new(virtio_net_tx_bh, &n->vq);
+        n->vqs[0].tx_vq = virtio_add_queue(&n->vdev, 256,
+                                           virtio_net_handle_tx_bh);
+        n->vqs[0].tx_bh = qemu_bh_new(virtio_net_tx_bh, &n->vqs[0]);
     }
     n->ctrl_vq = virtio_add_queue(&n->vdev, 64, virtio_net_handle_ctrl);
     qemu_macaddr_default_if_unset(&conf->macaddr);
@@ -1084,7 +1249,9 @@  VirtIODevice *virtio_net_init(DeviceState *dev, NICConf *conf,
     n->nic = qemu_new_nic(&net_virtio_info, conf, object_get_typename(OBJECT(dev)), dev->id, n);
     peer_test_vnet_hdr(n);
     if (peer_has_vnet_hdr(n)) {
-        tap_using_vnet_hdr(qemu_get_queue(n->nic)->peer, 1);
+        for (i = 0; i < n->max_queues; i++) {
+            tap_using_vnet_hdr(qemu_get_subqueue(n->nic, i)->peer, 1);
+        }
         n->host_hdr_len = sizeof(struct virtio_net_hdr);
     } else {
         n->host_hdr_len = 0;
@@ -1092,7 +1259,7 @@  VirtIODevice *virtio_net_init(DeviceState *dev, NICConf *conf,
 
     qemu_format_nic_info_str(qemu_get_queue(n->nic), conf->macaddr.a);
 
-    n->vq.tx_waiting = 0;
+    n->vqs[0].tx_waiting = 0;
     n->tx_burst = net->txburst;
     virtio_net_set_mrg_rx_bufs(n, 0);
     n->promisc = 1; /* for compatibility */
@@ -1113,23 +1280,28 @@  VirtIODevice *virtio_net_init(DeviceState *dev, NICConf *conf,
 void virtio_net_exit(VirtIODevice *vdev)
 {
     VirtIONet *n = DO_UPCAST(VirtIONet, vdev, vdev);
-    VirtIONetQueue *q = &n->vq;
+    int i;
 
     /* This will stop vhost backend if appropriate. */
     virtio_net_set_status(vdev, 0);
 
-    qemu_purge_queued_packets(qemu_get_queue(n->nic));
-
     unregister_savevm(n->qdev, "virtio-net", n);
 
     g_free(n->mac_table.macs);
     g_free(n->vlans);
 
-    if (q->tx_timer) {
-        qemu_del_timer(q->tx_timer);
-        qemu_free_timer(q->tx_timer);
-    } else {
-        qemu_bh_delete(q->tx_bh);
+    for (i = 0; i < n->max_queues; i++) {
+        VirtIONetQueue *q = &n->vqs[i];
+        NetClientState *nc = qemu_get_subqueue(n->nic, i);
+
+        qemu_purge_queued_packets(nc);
+
+        if (q->tx_timer) {
+            qemu_del_timer(q->tx_timer);
+            qemu_free_timer(q->tx_timer);
+        } else {
+            qemu_bh_delete(q->tx_bh);
+        }
     }
 
     qemu_del_nic(n->nic);
diff --git a/hw/virtio-net.h b/hw/virtio-net.h
index 36aa463..bc5857a 100644
--- a/hw/virtio-net.h
+++ b/hw/virtio-net.h
@@ -44,6 +44,8 @@ 
 #define VIRTIO_NET_F_CTRL_RX    18      /* Control channel RX mode support */
 #define VIRTIO_NET_F_CTRL_VLAN  19      /* Control channel VLAN filtering */
 #define VIRTIO_NET_F_CTRL_RX_EXTRA 20   /* Extra RX mode control support */
+#define VIRTIO_NET_F_MQ         22      /* Device supports Receive Flow
+                                         * Steering */
 
 #define VIRTIO_NET_S_LINK_UP    1       /* Link is up */
 
@@ -72,6 +74,8 @@  struct virtio_net_config
     uint8_t mac[ETH_ALEN];
     /* See VIRTIO_NET_F_STATUS and VIRTIO_NET_S_* above */
     uint16_t status;
+    /* Max virtqueue pairs supported by the device */
+    uint16_t max_virtqueue_pairs;
 } QEMU_PACKED;
 
 /* This is the first element of the scatter-gather list.  If you don't
@@ -168,6 +172,26 @@  struct virtio_net_ctrl_mac {
  #define VIRTIO_NET_CTRL_VLAN_ADD             0
  #define VIRTIO_NET_CTRL_VLAN_DEL             1
 
+/*
+ * Control Multiqueue
+ *
+ * The command VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET
+ * enables multiqueue, specifying the number of the transmit and
+ * receive queues that will be used. After the command is consumed and acked by
+ * the device, the device will not steer new packets on receive virtqueues
+ * other than specified nor read from transmit virtqueues other than specified.
+ * Accordingly, driver should not transmit new packets  on virtqueues other than
+ * specified.
+ */
+struct virtio_net_ctrl_mq {
+    uint16_t virtqueue_pairs;
+};
+
+#define VIRTIO_NET_CTRL_MQ   4
+ #define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET        0
+ #define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN        1
+ #define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX        0x8000
+
 #define DEFINE_VIRTIO_NET_FEATURES(_state, _field) \
         DEFINE_VIRTIO_COMMON_FEATURES(_state, _field), \
         DEFINE_PROP_BIT("csum", _state, _field, VIRTIO_NET_F_CSUM, true), \
@@ -186,5 +210,6 @@  struct virtio_net_ctrl_mac {
         DEFINE_PROP_BIT("ctrl_vq", _state, _field, VIRTIO_NET_F_CTRL_VQ, true), \
         DEFINE_PROP_BIT("ctrl_rx", _state, _field, VIRTIO_NET_F_CTRL_RX, true), \
         DEFINE_PROP_BIT("ctrl_vlan", _state, _field, VIRTIO_NET_F_CTRL_VLAN, true), \
-        DEFINE_PROP_BIT("ctrl_rx_extra", _state, _field, VIRTIO_NET_F_CTRL_RX_EXTRA, true)
+        DEFINE_PROP_BIT("ctrl_rx_extra", _state, _field, VIRTIO_NET_F_CTRL_RX_EXTRA, true), \
+        DEFINE_PROP_BIT("mq", _state, _field, VIRTIO_NET_F_MQ, true)
 #endif