diff mbox

vhost_net: start/stop guest notifiers properly

Message ID 1408355491-42089-1-git-send-email-jasowang@redhat.com
State New
Headers show

Commit Message

Jason Wang Aug. 18, 2014, 9:51 a.m. UTC
commit a9f98bb5ebe6fb1869321dcc58e72041ae626ad8 vhost: multiqueue
support changed the order of stopping the device. Previously
vhost_dev_stop would disable backend and only afterwards, unset guest
notifiers. We now unset guest notifiers while vhost is still
active. This can lose interrupts causing guest networking to fail.

Additionally, remove the hdev->started assert in vhost.c since we may
want to start the guest notifiers before vhost starts and stop the
guest notifiers after vhost is stopped.

In particular, this has been observed during migration.

Reported-by: "Zhangjie (HZ)" <zhangjie14@huawei.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Jason Wang <jasowang@redhat.com>

--

Zhang Jie, please test this patch to see if it fixes the issue.
---
 hw/net/vhost_net.c | 20 ++++++++++----------
 hw/virtio/vhost.c  |  2 --
 2 files changed, 10 insertions(+), 12 deletions(-)

Comments

Zhangjie (HZ) Aug. 18, 2014, 12:11 p.m. UTC | #1
On 2014/8/18 17:51, Jason Wang wrote:
> commit a9f98bb5ebe6fb1869321dcc58e72041ae626ad8 vhost: multiqueue
> support changed the order of stopping the device. Previously
> vhost_dev_stop would disable backend and only afterwards, unset guest
> notifiers. We now unset guest notifiers while vhost is still
> active. This can lose interrupts causing guest networking to fail.
> 
> Additionally, remove the hdev->started assert in vhost.c since we may
> want to start the guest notifiers before vhost starts and stop the
> guest notifiers after vhost is stopped.
> 
> In particular, this has been observed during migration.

> 
Thanks! I will have a test about your patch today! :-)
William Dauchy Aug. 18, 2014, 1:20 p.m. UTC | #2
On Mon, Aug 18, 2014 at 11:51 AM, Jason Wang <jasowang@redhat.com> wrote:
>  err:
> @@ -254,16 +254,16 @@ void vhost_net_stop(VirtIODevice *dev, NetClientState *ncs,
>      VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(vbus);
>      int i, r;
>
> +    for (i = 0; i < total_queues; i++) {
> +        vhost_net_stop_one(tap_get_vhost_net(ncs[i].peer), dev);
> +    }
> +
>      r = k->set_guest_notifiers(qbus->parent, total_queues * 2, false);
>      if (r < 0) {
>          fprintf(stderr, "vhost guest notifier cleanup failed: %d\n", r);
>          fflush(stderr);
>      }
>      assert(r >= 0);
> -
> -    for (i = 0; i < total_queues; i++) {
> -        vhost_net_stop_one(tap_get_vhost_net(ncs[i].peer), dev);
> -    }
>  }

since
ed8b4af Refactor virtio-net to use generic get_vhost_net
get_vhost_net is used instead of tap_get_vhost_net

Could you rebase your patch to facilitate tests or is it intentional?

Thanks,
Michael S. Tsirkin Aug. 18, 2014, 7:53 p.m. UTC | #3
On Mon, Aug 18, 2014 at 05:51:31PM +0800, Jason Wang wrote:
> commit a9f98bb5ebe6fb1869321dcc58e72041ae626ad8 vhost: multiqueue
> support changed the order of stopping the device. Previously
> vhost_dev_stop would disable backend and only afterwards, unset guest
> notifiers. We now unset guest notifiers while vhost is still
> active. This can lose interrupts causing guest networking to fail.
> 
> Additionally, remove the hdev->started assert in vhost.c since we may
> want to start the guest notifiers before vhost starts and stop the
> guest notifiers after vhost is stopped.
> 
> In particular, this has been observed during migration.
> 
> Reported-by: "Zhangjie (HZ)" <zhangjie14@huawei.com>
> Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
> Signed-off-by: Jason Wang <jasowang@redhat.com>


This doesn't seem to apply to master.
Can you rebase please?
> --
> 
> Zhang Jie, please test this patch to see if it fixes the issue.
> ---
>  hw/net/vhost_net.c | 20 ++++++++++----------
>  hw/virtio/vhost.c  |  2 --
>  2 files changed, 10 insertions(+), 12 deletions(-)
> 
> diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c
> index 006576d..72084ba 100644
> --- a/hw/net/vhost_net.c
> +++ b/hw/net/vhost_net.c
> @@ -223,6 +223,12 @@ int vhost_net_start(VirtIODevice *dev, NetClientState *ncs,
>          goto err;
>      }
>  
> +    r = k->set_guest_notifiers(qbus->parent, total_queues * 2, true);
> +    if (r < 0) {
> +        error_report("Error binding guest notifier: %d", -r);
> +        goto err;
> +    }
> +
>      for (i = 0; i < total_queues; i++) {
>          r = vhost_net_start_one(tap_get_vhost_net(ncs[i].peer), dev, i * 2);
>  
> @@ -231,12 +237,6 @@ int vhost_net_start(VirtIODevice *dev, NetClientState *ncs,
>          }
>      }
>  
> -    r = k->set_guest_notifiers(qbus->parent, total_queues * 2, true);
> -    if (r < 0) {
> -        error_report("Error binding guest notifier: %d", -r);
> -        goto err;
> -    }
> -
>      return 0;
>  
>  err:
> @@ -254,16 +254,16 @@ void vhost_net_stop(VirtIODevice *dev, NetClientState *ncs,
>      VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(vbus);
>      int i, r;
>  
> +    for (i = 0; i < total_queues; i++) {
> +        vhost_net_stop_one(tap_get_vhost_net(ncs[i].peer), dev);
> +    }
> +
>      r = k->set_guest_notifiers(qbus->parent, total_queues * 2, false);
>      if (r < 0) {
>          fprintf(stderr, "vhost guest notifier cleanup failed: %d\n", r);
>          fflush(stderr);
>      }
>      assert(r >= 0);
> -
> -    for (i = 0; i < total_queues; i++) {
> -        vhost_net_stop_one(tap_get_vhost_net(ncs[i].peer), dev);
> -    }
>  }
>  
>  void vhost_net_cleanup(struct vhost_net *net)
> diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c
> index 9e336ad..d74514a 100644
> --- a/hw/virtio/vhost.c
> +++ b/hw/virtio/vhost.c
> @@ -969,7 +969,6 @@ void vhost_dev_disable_notifiers(struct vhost_dev *hdev, VirtIODevice *vdev)
>  bool vhost_virtqueue_pending(struct vhost_dev *hdev, int n)
>  {
>      struct vhost_virtqueue *vq = hdev->vqs + n - hdev->vq_index;
> -    assert(hdev->started);
>      assert(n >= hdev->vq_index && n < hdev->vq_index + hdev->nvqs);
>      return event_notifier_test_and_clear(&vq->masked_notifier);
>  }
> @@ -981,7 +980,6 @@ void vhost_virtqueue_mask(struct vhost_dev *hdev, VirtIODevice *vdev, int n,
>      struct VirtQueue *vvq = virtio_get_queue(vdev, n);
>      int r, index = n - hdev->vq_index;
>  
> -    assert(hdev->started);
>      assert(n >= hdev->vq_index && n < hdev->vq_index + hdev->nvqs);
>  
>      struct vhost_vring_file file = {
> -- 
> 1.8.3.1
Jason Wang Aug. 19, 2014, 2:46 a.m. UTC | #4
On 08/18/2014 08:11 PM, Zhangjie (HZ) wrote:
> On 2014/8/18 17:51, Jason Wang wrote:
>> commit a9f98bb5ebe6fb1869321dcc58e72041ae626ad8 vhost: multiqueue
>> support changed the order of stopping the device. Previously
>> vhost_dev_stop would disable backend and only afterwards, unset guest
>> notifiers. We now unset guest notifiers while vhost is still
>> active. This can lose interrupts causing guest networking to fail.
>>
>> Additionally, remove the hdev->started assert in vhost.c since we may
>> want to start the guest notifiers before vhost starts and stop the
>> guest notifiers after vhost is stopped.
>>
>> In particular, this has been observed during migration.
> Thanks! I will have a test about your patch today! :-)

The patch was reported not applied cleanly. I will rebase it and send a
new one.

Please test that patch.

Thanks.
Jason Wang Aug. 19, 2014, 2:48 a.m. UTC | #5
On 08/18/2014 09:20 PM, William Dauchy wrote:
> On Mon, Aug 18, 2014 at 11:51 AM, Jason Wang <jasowang@redhat.com> wrote:
>>  err:
>> @@ -254,16 +254,16 @@ void vhost_net_stop(VirtIODevice *dev, NetClientState *ncs,
>>      VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(vbus);
>>      int i, r;
>>
>> +    for (i = 0; i < total_queues; i++) {
>> +        vhost_net_stop_one(tap_get_vhost_net(ncs[i].peer), dev);
>> +    }
>> +
>>      r = k->set_guest_notifiers(qbus->parent, total_queues * 2, false);
>>      if (r < 0) {
>>          fprintf(stderr, "vhost guest notifier cleanup failed: %d\n", r);
>>          fflush(stderr);
>>      }
>>      assert(r >= 0);
>> -
>> -    for (i = 0; i < total_queues; i++) {
>> -        vhost_net_stop_one(tap_get_vhost_net(ncs[i].peer), dev);
>> -    }
>>  }
> since
> ed8b4af Refactor virtio-net to use generic get_vhost_net
> get_vhost_net is used instead of tap_get_vhost_net
>
> Could you rebase your patch to facilitate tests or is it intentional?

Not intentional, my tree is out of date. I will rebase the patch.

Thanks for pointing this out.
>
> Thanks,
Jason Wang Aug. 19, 2014, 2:49 a.m. UTC | #6
On 08/19/2014 03:53 AM, Michael S. Tsirkin wrote:
> On Mon, Aug 18, 2014 at 05:51:31PM +0800, Jason Wang wrote:
>> > commit a9f98bb5ebe6fb1869321dcc58e72041ae626ad8 vhost: multiqueue
>> > support changed the order of stopping the device. Previously
>> > vhost_dev_stop would disable backend and only afterwards, unset guest
>> > notifiers. We now unset guest notifiers while vhost is still
>> > active. This can lose interrupts causing guest networking to fail.
>> > 
>> > Additionally, remove the hdev->started assert in vhost.c since we may
>> > want to start the guest notifiers before vhost starts and stop the
>> > guest notifiers after vhost is stopped.
>> > 
>> > In particular, this has been observed during migration.
>> > 
>> > Reported-by: "Zhangjie (HZ)" <zhangjie14@huawei.com>
>> > Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
>> > Signed-off-by: Jason Wang <jasowang@redhat.com>
> This doesn't seem to apply to master.
> Can you rebase please?

Yes, will send a new version.
diff mbox

Patch

diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c
index 006576d..72084ba 100644
--- a/hw/net/vhost_net.c
+++ b/hw/net/vhost_net.c
@@ -223,6 +223,12 @@  int vhost_net_start(VirtIODevice *dev, NetClientState *ncs,
         goto err;
     }
 
+    r = k->set_guest_notifiers(qbus->parent, total_queues * 2, true);
+    if (r < 0) {
+        error_report("Error binding guest notifier: %d", -r);
+        goto err;
+    }
+
     for (i = 0; i < total_queues; i++) {
         r = vhost_net_start_one(tap_get_vhost_net(ncs[i].peer), dev, i * 2);
 
@@ -231,12 +237,6 @@  int vhost_net_start(VirtIODevice *dev, NetClientState *ncs,
         }
     }
 
-    r = k->set_guest_notifiers(qbus->parent, total_queues * 2, true);
-    if (r < 0) {
-        error_report("Error binding guest notifier: %d", -r);
-        goto err;
-    }
-
     return 0;
 
 err:
@@ -254,16 +254,16 @@  void vhost_net_stop(VirtIODevice *dev, NetClientState *ncs,
     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(vbus);
     int i, r;
 
+    for (i = 0; i < total_queues; i++) {
+        vhost_net_stop_one(tap_get_vhost_net(ncs[i].peer), dev);
+    }
+
     r = k->set_guest_notifiers(qbus->parent, total_queues * 2, false);
     if (r < 0) {
         fprintf(stderr, "vhost guest notifier cleanup failed: %d\n", r);
         fflush(stderr);
     }
     assert(r >= 0);
-
-    for (i = 0; i < total_queues; i++) {
-        vhost_net_stop_one(tap_get_vhost_net(ncs[i].peer), dev);
-    }
 }
 
 void vhost_net_cleanup(struct vhost_net *net)
diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c
index 9e336ad..d74514a 100644
--- a/hw/virtio/vhost.c
+++ b/hw/virtio/vhost.c
@@ -969,7 +969,6 @@  void vhost_dev_disable_notifiers(struct vhost_dev *hdev, VirtIODevice *vdev)
 bool vhost_virtqueue_pending(struct vhost_dev *hdev, int n)
 {
     struct vhost_virtqueue *vq = hdev->vqs + n - hdev->vq_index;
-    assert(hdev->started);
     assert(n >= hdev->vq_index && n < hdev->vq_index + hdev->nvqs);
     return event_notifier_test_and_clear(&vq->masked_notifier);
 }
@@ -981,7 +980,6 @@  void vhost_virtqueue_mask(struct vhost_dev *hdev, VirtIODevice *vdev, int n,
     struct VirtQueue *vvq = virtio_get_queue(vdev, n);
     int r, index = n - hdev->vq_index;
 
-    assert(hdev->started);
     assert(n >= hdev->vq_index && n < hdev->vq_index + hdev->nvqs);
 
     struct vhost_vring_file file = {