diff mbox

[2/5] virtio-net: Limit number of packets sent per TX flush

Message ID 20100827223718.2696.64669.stgit@s20.home
State New
Headers show

Commit Message

Alex Williamson Aug. 27, 2010, 10:37 p.m. UTC
If virtio_net_flush_tx is called with notification disabled, we can
race with the guest, processing packets at the same rate as they
get produced.  The trouble is that this means we have no guaranteed
exit condition from the function and can spend minutes in there.
Currently flush_tx is only called with notification on, which seems
to limit us to one pass through the queue per call.  An upcoming
patch changes this.

One pass through the queue (256) seems to be a good default value
for this, balancing latency with throughput.  We use a signed int
for txburst because 2^31 packets in a burst would take many, many
minutes to process and it allows us to easily return a negative
value value from virtio_net_flush_tx() to indicate a back-off
or error condition.

Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
---

 hw/s390-virtio-bus.c |    4 +++-
 hw/s390-virtio-bus.h |    2 ++
 hw/syborg_virtio.c   |    6 +++++-
 hw/virtio-net.c      |   23 ++++++++++++++++-------
 hw/virtio-pci.c      |    6 +++++-
 hw/virtio.h          |    2 +-
 6 files changed, 32 insertions(+), 11 deletions(-)

Comments

Michael S. Tsirkin Aug. 31, 2010, 8:23 p.m. UTC | #1
On Fri, Aug 27, 2010 at 04:37:18PM -0600, Alex Williamson wrote:
> If virtio_net_flush_tx is called with notification disabled, we can
> race with the guest, processing packets at the same rate as they
> get produced.  The trouble is that this means we have no guaranteed
> exit condition from the function and can spend minutes in there.
> Currently flush_tx is only called with notification on, which seems
> to limit us to one pass through the queue per call.  An upcoming
> patch changes this.
> 
> One pass through the queue (256) seems to be a good default value
> for this, balancing latency with throughput.  We use a signed int
> for txburst because 2^31 packets in a burst would take many, many
> minutes to process and it allows us to easily return a negative
> value value from virtio_net_flush_tx() to indicate a back-off
> or error condition.
> 
> Signed-off-by: Alex Williamson <alex.williamson@redhat.com>

It might be better not to make this configurable, and simply set
txburst = vq->num in code in a single place, than the magic
256 constant in multiuple places.
Alternatively, let's put a macro in a .h file.

It might also be a good idea to put the above motivation in comments in the code.


> ---
> 
>  hw/s390-virtio-bus.c |    4 +++-
>  hw/s390-virtio-bus.h |    2 ++
>  hw/syborg_virtio.c   |    6 +++++-
>  hw/virtio-net.c      |   23 ++++++++++++++++-------
>  hw/virtio-pci.c      |    6 +++++-
>  hw/virtio.h          |    2 +-
>  6 files changed, 32 insertions(+), 11 deletions(-)
> 
> diff --git a/hw/s390-virtio-bus.c b/hw/s390-virtio-bus.c
> index 4da0b40..1483362 100644
> --- a/hw/s390-virtio-bus.c
> +++ b/hw/s390-virtio-bus.c
> @@ -110,7 +110,8 @@ static int s390_virtio_net_init(VirtIOS390Device *dev)
>  {
>      VirtIODevice *vdev;
>  
> -    vdev = virtio_net_init((DeviceState *)dev, &dev->nic, dev->txtimer);
> +    vdev = virtio_net_init((DeviceState *)dev, &dev->nic,
> +                           dev->txtimer, dev->txburst);
>      if (!vdev) {
>          return -1;
>      }
> @@ -328,6 +329,7 @@ static VirtIOS390DeviceInfo s390_virtio_net = {
>      .qdev.props = (Property[]) {
>          DEFINE_NIC_PROPERTIES(VirtIOS390Device, nic),
>          DEFINE_PROP_UINT32("txtimer", VirtIOS390Device, txtimer, 1),
> +        DEFINE_PROP_INT32("txburst", VirtIOS390Device, txburst, 256),
>          DEFINE_PROP_END_OF_LIST(),
>      },
>  };
> diff --git a/hw/s390-virtio-bus.h b/hw/s390-virtio-bus.h
> index 922daf2..808aea0 100644
> --- a/hw/s390-virtio-bus.h
> +++ b/hw/s390-virtio-bus.h
> @@ -45,6 +45,8 @@ typedef struct VirtIOS390Device {
>      uint32_t max_virtserial_ports;
>      /* Timeout value for virtio-net txtimer, 1 = default, >1 = ns timeout */
>      uint32_t txtimer;
> +    /* Max tx packets for virtio-net to burst at a time */
> +    int32_t txburst;
>  } VirtIOS390Device;
>  
>  typedef struct VirtIOS390Bus {
> diff --git a/hw/syborg_virtio.c b/hw/syborg_virtio.c
> index c8d731a..7b76972 100644
> --- a/hw/syborg_virtio.c
> +++ b/hw/syborg_virtio.c
> @@ -70,6 +70,8 @@ typedef struct {
>      uint32_t host_features;
>      /* Timeout value for virtio-net txtimer, 1 = default, >1 = ns timeout */
>      uint32_t txtimer;
> +    /* Max tx packets for virtio-net to burst at a time */
> +    int32_t txburst;
>  } SyborgVirtIOProxy;
>  
>  static uint32_t syborg_virtio_readl(void *opaque, target_phys_addr_t offset)
> @@ -286,7 +288,8 @@ static int syborg_virtio_net_init(SysBusDevice *dev)
>      VirtIODevice *vdev;
>      SyborgVirtIOProxy *proxy = FROM_SYSBUS(SyborgVirtIOProxy, dev);
>  
> -    vdev = virtio_net_init(&dev->qdev, &proxy->nic, proxy->txtimer);
> +    vdev = virtio_net_init(&dev->qdev, &proxy->nic,
> +                           proxy->txtimer, proxy->txburst);
>      return syborg_virtio_init(proxy, vdev);
>  }
>  
> @@ -298,6 +301,7 @@ static SysBusDeviceInfo syborg_virtio_net_info = {
>          DEFINE_NIC_PROPERTIES(SyborgVirtIOProxy, nic),
>          DEFINE_VIRTIO_NET_FEATURES(SyborgVirtIOProxy, host_features),
>          DEFINE_PROP_UINT32("txtimer", SyborgVirtIOProxy, txtimer, 1),
> +        DEFINE_PROP_INT32("txburst", SyborgVirtIOProxy, txburst, 256),
>          DEFINE_PROP_END_OF_LIST(),
>      }
>  };
> diff --git a/hw/virtio-net.c b/hw/virtio-net.c
> index 9ef29f0..ac4aa8f 100644
> --- a/hw/virtio-net.c
> +++ b/hw/virtio-net.c
> @@ -37,6 +37,7 @@ typedef struct VirtIONet
>      NICState *nic;
>      QEMUTimer *tx_timer;
>      uint32_t tx_timeout;
> +    int32_t tx_burst;
>      int tx_timer_active;
>      uint32_t has_vnet_hdr;
>      uint8_t has_ufo;
> @@ -620,7 +621,7 @@ static ssize_t virtio_net_receive(VLANClientState *nc, const uint8_t *buf, size_
>      return size;
>  }
>  
> -static void virtio_net_flush_tx(VirtIONet *n, VirtQueue *vq);
> +static int32_t virtio_net_flush_tx(VirtIONet *n, VirtQueue *vq);
>  
>  static void virtio_net_tx_complete(VLANClientState *nc, ssize_t len)
>  {
> @@ -636,16 +637,18 @@ static void virtio_net_tx_complete(VLANClientState *nc, ssize_t len)
>  }
>  
>  /* TX */
> -static void virtio_net_flush_tx(VirtIONet *n, VirtQueue *vq)
> +static int32_t virtio_net_flush_tx(VirtIONet *n, VirtQueue *vq)
>  {
>      VirtQueueElement elem;
> +    int32_t num_packets = 0;
>  
> -    if (!(n->vdev.status & VIRTIO_CONFIG_S_DRIVER_OK))
> -        return;
> +    if (!(n->vdev.status & VIRTIO_CONFIG_S_DRIVER_OK)) {
> +        return num_packets;
> +    }
>  
>      if (n->async_tx.elem.out_num) {
>          virtio_queue_set_notification(n->tx_vq, 0);
> -        return;
> +        return num_packets;
>      }
>  
>      while (virtqueue_pop(vq, &elem)) {
> @@ -682,14 +685,19 @@ static void virtio_net_flush_tx(VirtIONet *n, VirtQueue *vq)
>              virtio_queue_set_notification(n->tx_vq, 0);
>              n->async_tx.elem = elem;
>              n->async_tx.len  = len;
> -            return;
> +            return -EBUSY;
>          }
>  
>          len += ret;
>  
>          virtqueue_push(vq, &elem, len);
>          virtio_notify(&n->vdev, vq);
> +
> +        if (++num_packets >= n->tx_burst) {
> +            break;
> +        }
>      }
> +    return num_packets;
>  }
>  
>  static void virtio_net_handle_tx(VirtIODevice *vdev, VirtQueue *vq)
> @@ -905,7 +913,7 @@ static void virtio_net_vmstate_change(void *opaque, int running, int reason)
>  }
>  
>  VirtIODevice *virtio_net_init(DeviceState *dev, NICConf *conf,
> -                              uint32_t txtimer)
> +                              uint32_t txtimer, int32_t txburst)
>  {
>      VirtIONet *n;
>  
> @@ -942,6 +950,7 @@ VirtIODevice *virtio_net_init(DeviceState *dev, NICConf *conf,
>              n->tx_timeout = txtimer;
>          }
>      }
> +    n->tx_burst = txburst;
>      n->mergeable_rx_bufs = 0;
>      n->promisc = 1; /* for compatibility */
>  
> diff --git a/hw/virtio-pci.c b/hw/virtio-pci.c
> index e3b9897..e025c09 100644
> --- a/hw/virtio-pci.c
> +++ b/hw/virtio-pci.c
> @@ -109,6 +109,8 @@ typedef struct {
>      uint32_t max_virtserial_ports;
>      /* Timeout value for virtio-net txtimer, 1 = default, >1 = ns timeout */
>      uint32_t txtimer;
> +    /* Max tx packets for virtio-net to burst at a time */
> +    int32_t txburst;
>  } VirtIOPCIProxy;
>  
>  /* virtio device */
> @@ -615,7 +617,8 @@ static int virtio_net_init_pci(PCIDevice *pci_dev)
>      VirtIOPCIProxy *proxy = DO_UPCAST(VirtIOPCIProxy, pci_dev, pci_dev);
>      VirtIODevice *vdev;
>  
> -    vdev = virtio_net_init(&pci_dev->qdev, &proxy->nic, proxy->txtimer);
> +    vdev = virtio_net_init(&pci_dev->qdev, &proxy->nic,
> +                           proxy->txtimer, proxy->txburst);
>  
>      vdev->nvectors = proxy->nvectors;
>      virtio_init_pci(proxy, vdev,
> @@ -693,6 +696,7 @@ static PCIDeviceInfo virtio_info[] = {
>              DEFINE_VIRTIO_NET_FEATURES(VirtIOPCIProxy, host_features),
>              DEFINE_NIC_PROPERTIES(VirtIOPCIProxy, nic),
>              DEFINE_PROP_UINT32("txtimer", VirtIOPCIProxy, txtimer, 1),
> +            DEFINE_PROP_INT32("txburst", VirtIOPCIProxy, txburst, 256),
>              DEFINE_PROP_END_OF_LIST(),
>          },
>          .qdev.reset = virtio_pci_reset,
> diff --git a/hw/virtio.h b/hw/virtio.h
> index 77d05e0..4051889 100644
> --- a/hw/virtio.h
> +++ b/hw/virtio.h
> @@ -186,7 +186,7 @@ void virtio_bind_device(VirtIODevice *vdev, const VirtIOBindings *binding,
>  /* Base devices.  */
>  VirtIODevice *virtio_blk_init(DeviceState *dev, BlockConf *conf);
>  VirtIODevice *virtio_net_init(DeviceState *dev, NICConf *conf,
> -                              uint32_t txtimer);
> +                              uint32_t txtimer, int32_t txburst);
>  VirtIODevice *virtio_serial_init(DeviceState *dev, uint32_t max_nr_ports);
>  VirtIODevice *virtio_balloon_init(DeviceState *dev);
>  #ifdef CONFIG_LINUX
> 
> --
> To unsubscribe from this list: send the line "unsubscribe kvm" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/hw/s390-virtio-bus.c b/hw/s390-virtio-bus.c
index 4da0b40..1483362 100644
--- a/hw/s390-virtio-bus.c
+++ b/hw/s390-virtio-bus.c
@@ -110,7 +110,8 @@  static int s390_virtio_net_init(VirtIOS390Device *dev)
 {
     VirtIODevice *vdev;
 
-    vdev = virtio_net_init((DeviceState *)dev, &dev->nic, dev->txtimer);
+    vdev = virtio_net_init((DeviceState *)dev, &dev->nic,
+                           dev->txtimer, dev->txburst);
     if (!vdev) {
         return -1;
     }
@@ -328,6 +329,7 @@  static VirtIOS390DeviceInfo s390_virtio_net = {
     .qdev.props = (Property[]) {
         DEFINE_NIC_PROPERTIES(VirtIOS390Device, nic),
         DEFINE_PROP_UINT32("txtimer", VirtIOS390Device, txtimer, 1),
+        DEFINE_PROP_INT32("txburst", VirtIOS390Device, txburst, 256),
         DEFINE_PROP_END_OF_LIST(),
     },
 };
diff --git a/hw/s390-virtio-bus.h b/hw/s390-virtio-bus.h
index 922daf2..808aea0 100644
--- a/hw/s390-virtio-bus.h
+++ b/hw/s390-virtio-bus.h
@@ -45,6 +45,8 @@  typedef struct VirtIOS390Device {
     uint32_t max_virtserial_ports;
     /* Timeout value for virtio-net txtimer, 1 = default, >1 = ns timeout */
     uint32_t txtimer;
+    /* Max tx packets for virtio-net to burst at a time */
+    int32_t txburst;
 } VirtIOS390Device;
 
 typedef struct VirtIOS390Bus {
diff --git a/hw/syborg_virtio.c b/hw/syborg_virtio.c
index c8d731a..7b76972 100644
--- a/hw/syborg_virtio.c
+++ b/hw/syborg_virtio.c
@@ -70,6 +70,8 @@  typedef struct {
     uint32_t host_features;
     /* Timeout value for virtio-net txtimer, 1 = default, >1 = ns timeout */
     uint32_t txtimer;
+    /* Max tx packets for virtio-net to burst at a time */
+    int32_t txburst;
 } SyborgVirtIOProxy;
 
 static uint32_t syborg_virtio_readl(void *opaque, target_phys_addr_t offset)
@@ -286,7 +288,8 @@  static int syborg_virtio_net_init(SysBusDevice *dev)
     VirtIODevice *vdev;
     SyborgVirtIOProxy *proxy = FROM_SYSBUS(SyborgVirtIOProxy, dev);
 
-    vdev = virtio_net_init(&dev->qdev, &proxy->nic, proxy->txtimer);
+    vdev = virtio_net_init(&dev->qdev, &proxy->nic,
+                           proxy->txtimer, proxy->txburst);
     return syborg_virtio_init(proxy, vdev);
 }
 
@@ -298,6 +301,7 @@  static SysBusDeviceInfo syborg_virtio_net_info = {
         DEFINE_NIC_PROPERTIES(SyborgVirtIOProxy, nic),
         DEFINE_VIRTIO_NET_FEATURES(SyborgVirtIOProxy, host_features),
         DEFINE_PROP_UINT32("txtimer", SyborgVirtIOProxy, txtimer, 1),
+        DEFINE_PROP_INT32("txburst", SyborgVirtIOProxy, txburst, 256),
         DEFINE_PROP_END_OF_LIST(),
     }
 };
diff --git a/hw/virtio-net.c b/hw/virtio-net.c
index 9ef29f0..ac4aa8f 100644
--- a/hw/virtio-net.c
+++ b/hw/virtio-net.c
@@ -37,6 +37,7 @@  typedef struct VirtIONet
     NICState *nic;
     QEMUTimer *tx_timer;
     uint32_t tx_timeout;
+    int32_t tx_burst;
     int tx_timer_active;
     uint32_t has_vnet_hdr;
     uint8_t has_ufo;
@@ -620,7 +621,7 @@  static ssize_t virtio_net_receive(VLANClientState *nc, const uint8_t *buf, size_
     return size;
 }
 
-static void virtio_net_flush_tx(VirtIONet *n, VirtQueue *vq);
+static int32_t virtio_net_flush_tx(VirtIONet *n, VirtQueue *vq);
 
 static void virtio_net_tx_complete(VLANClientState *nc, ssize_t len)
 {
@@ -636,16 +637,18 @@  static void virtio_net_tx_complete(VLANClientState *nc, ssize_t len)
 }
 
 /* TX */
-static void virtio_net_flush_tx(VirtIONet *n, VirtQueue *vq)
+static int32_t virtio_net_flush_tx(VirtIONet *n, VirtQueue *vq)
 {
     VirtQueueElement elem;
+    int32_t num_packets = 0;
 
-    if (!(n->vdev.status & VIRTIO_CONFIG_S_DRIVER_OK))
-        return;
+    if (!(n->vdev.status & VIRTIO_CONFIG_S_DRIVER_OK)) {
+        return num_packets;
+    }
 
     if (n->async_tx.elem.out_num) {
         virtio_queue_set_notification(n->tx_vq, 0);
-        return;
+        return num_packets;
     }
 
     while (virtqueue_pop(vq, &elem)) {
@@ -682,14 +685,19 @@  static void virtio_net_flush_tx(VirtIONet *n, VirtQueue *vq)
             virtio_queue_set_notification(n->tx_vq, 0);
             n->async_tx.elem = elem;
             n->async_tx.len  = len;
-            return;
+            return -EBUSY;
         }
 
         len += ret;
 
         virtqueue_push(vq, &elem, len);
         virtio_notify(&n->vdev, vq);
+
+        if (++num_packets >= n->tx_burst) {
+            break;
+        }
     }
+    return num_packets;
 }
 
 static void virtio_net_handle_tx(VirtIODevice *vdev, VirtQueue *vq)
@@ -905,7 +913,7 @@  static void virtio_net_vmstate_change(void *opaque, int running, int reason)
 }
 
 VirtIODevice *virtio_net_init(DeviceState *dev, NICConf *conf,
-                              uint32_t txtimer)
+                              uint32_t txtimer, int32_t txburst)
 {
     VirtIONet *n;
 
@@ -942,6 +950,7 @@  VirtIODevice *virtio_net_init(DeviceState *dev, NICConf *conf,
             n->tx_timeout = txtimer;
         }
     }
+    n->tx_burst = txburst;
     n->mergeable_rx_bufs = 0;
     n->promisc = 1; /* for compatibility */
 
diff --git a/hw/virtio-pci.c b/hw/virtio-pci.c
index e3b9897..e025c09 100644
--- a/hw/virtio-pci.c
+++ b/hw/virtio-pci.c
@@ -109,6 +109,8 @@  typedef struct {
     uint32_t max_virtserial_ports;
     /* Timeout value for virtio-net txtimer, 1 = default, >1 = ns timeout */
     uint32_t txtimer;
+    /* Max tx packets for virtio-net to burst at a time */
+    int32_t txburst;
 } VirtIOPCIProxy;
 
 /* virtio device */
@@ -615,7 +617,8 @@  static int virtio_net_init_pci(PCIDevice *pci_dev)
     VirtIOPCIProxy *proxy = DO_UPCAST(VirtIOPCIProxy, pci_dev, pci_dev);
     VirtIODevice *vdev;
 
-    vdev = virtio_net_init(&pci_dev->qdev, &proxy->nic, proxy->txtimer);
+    vdev = virtio_net_init(&pci_dev->qdev, &proxy->nic,
+                           proxy->txtimer, proxy->txburst);
 
     vdev->nvectors = proxy->nvectors;
     virtio_init_pci(proxy, vdev,
@@ -693,6 +696,7 @@  static PCIDeviceInfo virtio_info[] = {
             DEFINE_VIRTIO_NET_FEATURES(VirtIOPCIProxy, host_features),
             DEFINE_NIC_PROPERTIES(VirtIOPCIProxy, nic),
             DEFINE_PROP_UINT32("txtimer", VirtIOPCIProxy, txtimer, 1),
+            DEFINE_PROP_INT32("txburst", VirtIOPCIProxy, txburst, 256),
             DEFINE_PROP_END_OF_LIST(),
         },
         .qdev.reset = virtio_pci_reset,
diff --git a/hw/virtio.h b/hw/virtio.h
index 77d05e0..4051889 100644
--- a/hw/virtio.h
+++ b/hw/virtio.h
@@ -186,7 +186,7 @@  void virtio_bind_device(VirtIODevice *vdev, const VirtIOBindings *binding,
 /* Base devices.  */
 VirtIODevice *virtio_blk_init(DeviceState *dev, BlockConf *conf);
 VirtIODevice *virtio_net_init(DeviceState *dev, NICConf *conf,
-                              uint32_t txtimer);
+                              uint32_t txtimer, int32_t txburst);
 VirtIODevice *virtio_serial_init(DeviceState *dev, uint32_t max_nr_ports);
 VirtIODevice *virtio_balloon_init(DeviceState *dev);
 #ifdef CONFIG_LINUX