Patchwork [RFC,19/20] Insert do_event_tap() to virtio-{blk, net}, comment out assert() on cpu_single_env temporally.

login
register
mail settings
Submitter Yoshiaki Tamura
Date April 21, 2010, 5:57 a.m.
Message ID <1271829445-5328-20-git-send-email-tamura.yoshiaki@lab.ntt.co.jp>
Download mbox | patch
Permalink /patch/50630/
State New
Headers show

Comments

Yoshiaki Tamura - April 21, 2010, 5:57 a.m.
do_event_tap() is inserted to functions which actually fire outputs.
By synchronizing VMs before outputs are fired, we can failover to the
receiver upon failure.  To save VM continuously, comment out assert()
on cpu_single_env temporally.

Signed-off-by: Yoshiaki Tamura <tamura.yoshiaki@lab.ntt.co.jp>
---
 hw/virtio-blk.c |    2 ++
 hw/virtio-net.c |    2 ++
 qemu-kvm.c      |    7 ++++++-
 3 files changed, 10 insertions(+), 1 deletions(-)
Anthony Liguori - April 22, 2010, 7:39 p.m.
On 04/21/2010 12:57 AM, Yoshiaki Tamura wrote:
> do_event_tap() is inserted to functions which actually fire outputs.
> By synchronizing VMs before outputs are fired, we can failover to the
> receiver upon failure.  To save VM continuously, comment out assert()
> on cpu_single_env temporally.
>
> Signed-off-by: Yoshiaki Tamura<tamura.yoshiaki@lab.ntt.co.jp>
> ---
>   hw/virtio-blk.c |    2 ++
>   hw/virtio-net.c |    2 ++
>   qemu-kvm.c      |    7 ++++++-
>   3 files changed, 10 insertions(+), 1 deletions(-)
>    

This would be better done in the generic layers (the block and net 
layers respectively).  Then it would work with virtio and emulated devices.

Regards,

Anthony Liguori

> diff --git a/hw/virtio-blk.c b/hw/virtio-blk.c
> index b80402d..1dd1c31 100644
> --- a/hw/virtio-blk.c
> +++ b/hw/virtio-blk.c
> @@ -327,6 +327,8 @@ static void virtio_blk_handle_output(VirtIODevice *vdev, VirtQueue *vq)
>           .old_bs = NULL,
>       };
>
> +    do_event_tap();
> +
>       while ((req = virtio_blk_get_request(s))) {
>           virtio_blk_handle_request(req,&mrb);
>       }
> diff --git a/hw/virtio-net.c b/hw/virtio-net.c
> index 5c0093e..1a32bf3 100644
> --- a/hw/virtio-net.c
> +++ b/hw/virtio-net.c
> @@ -667,6 +667,8 @@ static void virtio_net_handle_tx(VirtIODevice *vdev, VirtQueue *vq)
>   {
>       VirtIONet *n = to_virtio_net(vdev);
>
> +    do_event_tap();
> +
>       if (n->tx_timer_active) {
>           virtio_queue_set_notification(vq, 1);
>           qemu_del_timer(n->tx_timer);
> diff --git a/qemu-kvm.c b/qemu-kvm.c
> index 1414f49..769bc95 100644
> --- a/qemu-kvm.c
> +++ b/qemu-kvm.c
> @@ -935,8 +935,12 @@ int kvm_run(CPUState *env)
>
>       post_kvm_run(kvm, env);
>
> +    /* TODO: we need to prevent tapping events that derived from the
> +     * same VMEXIT. This needs more info from the kernel. */
>   #if defined(KVM_CAP_COALESCED_MMIO)
>       if (kvm_state->coalesced_mmio) {
> +        /* prevent from tapping events while handling coalesced_mmio */
> +        event_tap_suspend();
>           struct kvm_coalesced_mmio_ring *ring =
>               (void *) run + kvm_state->coalesced_mmio * PAGE_SIZE;
>           while (ring->first != ring->last) {
> @@ -946,6 +950,7 @@ int kvm_run(CPUState *env)
>               smp_wmb();
>               ring->first = (ring->first + 1) % KVM_COALESCED_MMIO_MAX;
>           }
> +        event_tap_resume();
>       }
>   #endif
>
> @@ -1770,7 +1775,7 @@ static void resume_all_threads(void)
>   {
>       CPUState *penv = first_cpu;
>
> -    assert(!cpu_single_env);
> +    /* assert(!cpu_single_env); */
>
>       while (penv) {
>           penv->stop = 0;
>
Yoshiaki Tamura - April 23, 2010, 4:51 a.m.
Anthony Liguori wrote:
> On 04/21/2010 12:57 AM, Yoshiaki Tamura wrote:
>> do_event_tap() is inserted to functions which actually fire outputs.
>> By synchronizing VMs before outputs are fired, we can failover to the
>> receiver upon failure. To save VM continuously, comment out assert()
>> on cpu_single_env temporally.
>>
>> Signed-off-by: Yoshiaki Tamura<tamura.yoshiaki@lab.ntt.co.jp>
>> ---
>> hw/virtio-blk.c | 2 ++
>> hw/virtio-net.c | 2 ++
>> qemu-kvm.c | 7 ++++++-
>> 3 files changed, 10 insertions(+), 1 deletions(-)
>
> This would be better done in the generic layers (the block and net
> layers respectively). Then it would work with virtio and emulated devices.

I agree with your opinion that it's better if we can handle any emulated devices 
at once.  However, I have a question here that, if we put do_event_tap() to the 
generic layers, emulated devices state would have already been proceeded, and it 
won't be possible to reproduce those I/O after failover?  If I were wrong, I 
would be happy to move it, but if I were right, there are would be two 
approaches to overcome this:

1. Sync I/O requests to the receiver, and upon failover, release those requests 
before running the guest VM.
2. Copy the emulated devices state before starting emulating, and once it comes 
to the generic layer, start the synchronizing using the copied state.

We should also consider the guest's VCPU state.  I previously had similar 
discussion with Avi.  I would like to reconfirm his idea too.

>
> Regards,
>
> Anthony Liguori
>
>> diff --git a/hw/virtio-blk.c b/hw/virtio-blk.c
>> index b80402d..1dd1c31 100644
>> --- a/hw/virtio-blk.c
>> +++ b/hw/virtio-blk.c
>> @@ -327,6 +327,8 @@ static void virtio_blk_handle_output(VirtIODevice
>> *vdev, VirtQueue *vq)
>> .old_bs = NULL,
>> };
>>
>> + do_event_tap();
>> +
>> while ((req = virtio_blk_get_request(s))) {
>> virtio_blk_handle_request(req,&mrb);
>> }
>> diff --git a/hw/virtio-net.c b/hw/virtio-net.c
>> index 5c0093e..1a32bf3 100644
>> --- a/hw/virtio-net.c
>> +++ b/hw/virtio-net.c
>> @@ -667,6 +667,8 @@ static void virtio_net_handle_tx(VirtIODevice
>> *vdev, VirtQueue *vq)
>> {
>> VirtIONet *n = to_virtio_net(vdev);
>>
>> + do_event_tap();
>> +
>> if (n->tx_timer_active) {
>> virtio_queue_set_notification(vq, 1);
>> qemu_del_timer(n->tx_timer);
>> diff --git a/qemu-kvm.c b/qemu-kvm.c
>> index 1414f49..769bc95 100644
>> --- a/qemu-kvm.c
>> +++ b/qemu-kvm.c
>> @@ -935,8 +935,12 @@ int kvm_run(CPUState *env)
>>
>> post_kvm_run(kvm, env);
>>
>> + /* TODO: we need to prevent tapping events that derived from the
>> + * same VMEXIT. This needs more info from the kernel. */
>> #if defined(KVM_CAP_COALESCED_MMIO)
>> if (kvm_state->coalesced_mmio) {
>> + /* prevent from tapping events while handling coalesced_mmio */
>> + event_tap_suspend();
>> struct kvm_coalesced_mmio_ring *ring =
>> (void *) run + kvm_state->coalesced_mmio * PAGE_SIZE;
>> while (ring->first != ring->last) {
>> @@ -946,6 +950,7 @@ int kvm_run(CPUState *env)
>> smp_wmb();
>> ring->first = (ring->first + 1) % KVM_COALESCED_MMIO_MAX;
>> }
>> + event_tap_resume();
>> }
>> #endif
>>
>> @@ -1770,7 +1775,7 @@ static void resume_all_threads(void)
>> {
>> CPUState *penv = first_cpu;
>>
>> - assert(!cpu_single_env);
>> + /* assert(!cpu_single_env); */
>>
>> while (penv) {
>> penv->stop = 0;
>
>
>
>

Patch

diff --git a/hw/virtio-blk.c b/hw/virtio-blk.c
index b80402d..1dd1c31 100644
--- a/hw/virtio-blk.c
+++ b/hw/virtio-blk.c
@@ -327,6 +327,8 @@  static void virtio_blk_handle_output(VirtIODevice *vdev, VirtQueue *vq)
         .old_bs = NULL,
     };
 
+    do_event_tap();
+
     while ((req = virtio_blk_get_request(s))) {
         virtio_blk_handle_request(req, &mrb);
     }
diff --git a/hw/virtio-net.c b/hw/virtio-net.c
index 5c0093e..1a32bf3 100644
--- a/hw/virtio-net.c
+++ b/hw/virtio-net.c
@@ -667,6 +667,8 @@  static void virtio_net_handle_tx(VirtIODevice *vdev, VirtQueue *vq)
 {
     VirtIONet *n = to_virtio_net(vdev);
 
+    do_event_tap();
+
     if (n->tx_timer_active) {
         virtio_queue_set_notification(vq, 1);
         qemu_del_timer(n->tx_timer);
diff --git a/qemu-kvm.c b/qemu-kvm.c
index 1414f49..769bc95 100644
--- a/qemu-kvm.c
+++ b/qemu-kvm.c
@@ -935,8 +935,12 @@  int kvm_run(CPUState *env)
 
     post_kvm_run(kvm, env);
 
+    /* TODO: we need to prevent tapping events that derived from the
+     * same VMEXIT. This needs more info from the kernel. */
 #if defined(KVM_CAP_COALESCED_MMIO)
     if (kvm_state->coalesced_mmio) {
+        /* prevent from tapping events while handling coalesced_mmio */
+        event_tap_suspend();
         struct kvm_coalesced_mmio_ring *ring =
             (void *) run + kvm_state->coalesced_mmio * PAGE_SIZE;
         while (ring->first != ring->last) {
@@ -946,6 +950,7 @@  int kvm_run(CPUState *env)
             smp_wmb();
             ring->first = (ring->first + 1) % KVM_COALESCED_MMIO_MAX;
         }
+        event_tap_resume();
     }
 #endif
 
@@ -1770,7 +1775,7 @@  static void resume_all_threads(void)
 {
     CPUState *penv = first_cpu;
 
-    assert(!cpu_single_env);
+    /* assert(!cpu_single_env); */
 
     while (penv) {
         penv->stop = 0;