diff mbox

[RFC,19/20] Insert do_event_tap() to virtio-{blk, net}, comment out assert() on cpu_single_env temporally.

Message ID 1271829445-5328-20-git-send-email-tamura.yoshiaki@lab.ntt.co.jp
State New
Headers show

Commit Message

Yoshiaki Tamura April 21, 2010, 5:57 a.m. UTC
do_event_tap() is inserted to functions which actually fire outputs.
By synchronizing VMs before outputs are fired, we can failover to the
receiver upon failure.  To save VM continuously, comment out assert()
on cpu_single_env temporally.

Signed-off-by: Yoshiaki Tamura <tamura.yoshiaki@lab.ntt.co.jp>
---
 hw/virtio-blk.c |    2 ++
 hw/virtio-net.c |    2 ++
 qemu-kvm.c      |    7 ++++++-
 3 files changed, 10 insertions(+), 1 deletions(-)

Comments

Anthony Liguori April 22, 2010, 7:39 p.m. UTC | #1
On 04/21/2010 12:57 AM, Yoshiaki Tamura wrote:
> do_event_tap() is inserted to functions which actually fire outputs.
> By synchronizing VMs before outputs are fired, we can failover to the
> receiver upon failure.  To save VM continuously, comment out assert()
> on cpu_single_env temporally.
>
> Signed-off-by: Yoshiaki Tamura<tamura.yoshiaki@lab.ntt.co.jp>
> ---
>   hw/virtio-blk.c |    2 ++
>   hw/virtio-net.c |    2 ++
>   qemu-kvm.c      |    7 ++++++-
>   3 files changed, 10 insertions(+), 1 deletions(-)
>    

This would be better done in the generic layers (the block and net 
layers respectively).  Then it would work with virtio and emulated devices.

Regards,

Anthony Liguori

> diff --git a/hw/virtio-blk.c b/hw/virtio-blk.c
> index b80402d..1dd1c31 100644
> --- a/hw/virtio-blk.c
> +++ b/hw/virtio-blk.c
> @@ -327,6 +327,8 @@ static void virtio_blk_handle_output(VirtIODevice *vdev, VirtQueue *vq)
>           .old_bs = NULL,
>       };
>
> +    do_event_tap();
> +
>       while ((req = virtio_blk_get_request(s))) {
>           virtio_blk_handle_request(req,&mrb);
>       }
> diff --git a/hw/virtio-net.c b/hw/virtio-net.c
> index 5c0093e..1a32bf3 100644
> --- a/hw/virtio-net.c
> +++ b/hw/virtio-net.c
> @@ -667,6 +667,8 @@ static void virtio_net_handle_tx(VirtIODevice *vdev, VirtQueue *vq)
>   {
>       VirtIONet *n = to_virtio_net(vdev);
>
> +    do_event_tap();
> +
>       if (n->tx_timer_active) {
>           virtio_queue_set_notification(vq, 1);
>           qemu_del_timer(n->tx_timer);
> diff --git a/qemu-kvm.c b/qemu-kvm.c
> index 1414f49..769bc95 100644
> --- a/qemu-kvm.c
> +++ b/qemu-kvm.c
> @@ -935,8 +935,12 @@ int kvm_run(CPUState *env)
>
>       post_kvm_run(kvm, env);
>
> +    /* TODO: we need to prevent tapping events that derived from the
> +     * same VMEXIT. This needs more info from the kernel. */
>   #if defined(KVM_CAP_COALESCED_MMIO)
>       if (kvm_state->coalesced_mmio) {
> +        /* prevent from tapping events while handling coalesced_mmio */
> +        event_tap_suspend();
>           struct kvm_coalesced_mmio_ring *ring =
>               (void *) run + kvm_state->coalesced_mmio * PAGE_SIZE;
>           while (ring->first != ring->last) {
> @@ -946,6 +950,7 @@ int kvm_run(CPUState *env)
>               smp_wmb();
>               ring->first = (ring->first + 1) % KVM_COALESCED_MMIO_MAX;
>           }
> +        event_tap_resume();
>       }
>   #endif
>
> @@ -1770,7 +1775,7 @@ static void resume_all_threads(void)
>   {
>       CPUState *penv = first_cpu;
>
> -    assert(!cpu_single_env);
> +    /* assert(!cpu_single_env); */
>
>       while (penv) {
>           penv->stop = 0;
>
Yoshiaki Tamura April 23, 2010, 4:51 a.m. UTC | #2
Anthony Liguori wrote:
> On 04/21/2010 12:57 AM, Yoshiaki Tamura wrote:
>> do_event_tap() is inserted to functions which actually fire outputs.
>> By synchronizing VMs before outputs are fired, we can failover to the
>> receiver upon failure. To save VM continuously, comment out assert()
>> on cpu_single_env temporally.
>>
>> Signed-off-by: Yoshiaki Tamura<tamura.yoshiaki@lab.ntt.co.jp>
>> ---
>> hw/virtio-blk.c | 2 ++
>> hw/virtio-net.c | 2 ++
>> qemu-kvm.c | 7 ++++++-
>> 3 files changed, 10 insertions(+), 1 deletions(-)
>
> This would be better done in the generic layers (the block and net
> layers respectively). Then it would work with virtio and emulated devices.

I agree with your opinion that it's better if we can handle any emulated devices 
at once.  However, I have a question here that, if we put do_event_tap() to the 
generic layers, emulated devices state would have already been proceeded, and it 
won't be possible to reproduce those I/O after failover?  If I were wrong, I 
would be happy to move it, but if I were right, there are would be two 
approaches to overcome this:

1. Sync I/O requests to the receiver, and upon failover, release those requests 
before running the guest VM.
2. Copy the emulated devices state before starting emulating, and once it comes 
to the generic layer, start the synchronizing using the copied state.

We should also consider the guest's VCPU state.  I previously had similar 
discussion with Avi.  I would like to reconfirm his idea too.

>
> Regards,
>
> Anthony Liguori
>
>> diff --git a/hw/virtio-blk.c b/hw/virtio-blk.c
>> index b80402d..1dd1c31 100644
>> --- a/hw/virtio-blk.c
>> +++ b/hw/virtio-blk.c
>> @@ -327,6 +327,8 @@ static void virtio_blk_handle_output(VirtIODevice
>> *vdev, VirtQueue *vq)
>> .old_bs = NULL,
>> };
>>
>> + do_event_tap();
>> +
>> while ((req = virtio_blk_get_request(s))) {
>> virtio_blk_handle_request(req,&mrb);
>> }
>> diff --git a/hw/virtio-net.c b/hw/virtio-net.c
>> index 5c0093e..1a32bf3 100644
>> --- a/hw/virtio-net.c
>> +++ b/hw/virtio-net.c
>> @@ -667,6 +667,8 @@ static void virtio_net_handle_tx(VirtIODevice
>> *vdev, VirtQueue *vq)
>> {
>> VirtIONet *n = to_virtio_net(vdev);
>>
>> + do_event_tap();
>> +
>> if (n->tx_timer_active) {
>> virtio_queue_set_notification(vq, 1);
>> qemu_del_timer(n->tx_timer);
>> diff --git a/qemu-kvm.c b/qemu-kvm.c
>> index 1414f49..769bc95 100644
>> --- a/qemu-kvm.c
>> +++ b/qemu-kvm.c
>> @@ -935,8 +935,12 @@ int kvm_run(CPUState *env)
>>
>> post_kvm_run(kvm, env);
>>
>> + /* TODO: we need to prevent tapping events that derived from the
>> + * same VMEXIT. This needs more info from the kernel. */
>> #if defined(KVM_CAP_COALESCED_MMIO)
>> if (kvm_state->coalesced_mmio) {
>> + /* prevent from tapping events while handling coalesced_mmio */
>> + event_tap_suspend();
>> struct kvm_coalesced_mmio_ring *ring =
>> (void *) run + kvm_state->coalesced_mmio * PAGE_SIZE;
>> while (ring->first != ring->last) {
>> @@ -946,6 +950,7 @@ int kvm_run(CPUState *env)
>> smp_wmb();
>> ring->first = (ring->first + 1) % KVM_COALESCED_MMIO_MAX;
>> }
>> + event_tap_resume();
>> }
>> #endif
>>
>> @@ -1770,7 +1775,7 @@ static void resume_all_threads(void)
>> {
>> CPUState *penv = first_cpu;
>>
>> - assert(!cpu_single_env);
>> + /* assert(!cpu_single_env); */
>>
>> while (penv) {
>> penv->stop = 0;
>
>
>
>
diff mbox

Patch

diff --git a/hw/virtio-blk.c b/hw/virtio-blk.c
index b80402d..1dd1c31 100644
--- a/hw/virtio-blk.c
+++ b/hw/virtio-blk.c
@@ -327,6 +327,8 @@  static void virtio_blk_handle_output(VirtIODevice *vdev, VirtQueue *vq)
         .old_bs = NULL,
     };
 
+    do_event_tap();
+
     while ((req = virtio_blk_get_request(s))) {
         virtio_blk_handle_request(req, &mrb);
     }
diff --git a/hw/virtio-net.c b/hw/virtio-net.c
index 5c0093e..1a32bf3 100644
--- a/hw/virtio-net.c
+++ b/hw/virtio-net.c
@@ -667,6 +667,8 @@  static void virtio_net_handle_tx(VirtIODevice *vdev, VirtQueue *vq)
 {
     VirtIONet *n = to_virtio_net(vdev);
 
+    do_event_tap();
+
     if (n->tx_timer_active) {
         virtio_queue_set_notification(vq, 1);
         qemu_del_timer(n->tx_timer);
diff --git a/qemu-kvm.c b/qemu-kvm.c
index 1414f49..769bc95 100644
--- a/qemu-kvm.c
+++ b/qemu-kvm.c
@@ -935,8 +935,12 @@  int kvm_run(CPUState *env)
 
     post_kvm_run(kvm, env);
 
+    /* TODO: we need to prevent tapping events that derived from the
+     * same VMEXIT. This needs more info from the kernel. */
 #if defined(KVM_CAP_COALESCED_MMIO)
     if (kvm_state->coalesced_mmio) {
+        /* prevent from tapping events while handling coalesced_mmio */
+        event_tap_suspend();
         struct kvm_coalesced_mmio_ring *ring =
             (void *) run + kvm_state->coalesced_mmio * PAGE_SIZE;
         while (ring->first != ring->last) {
@@ -946,6 +950,7 @@  int kvm_run(CPUState *env)
             smp_wmb();
             ring->first = (ring->first + 1) % KVM_COALESCED_MMIO_MAX;
         }
+        event_tap_resume();
     }
 #endif
 
@@ -1770,7 +1775,7 @@  static void resume_all_threads(void)
 {
     CPUState *penv = first_cpu;
 
-    assert(!cpu_single_env);
+    /* assert(!cpu_single_env); */
 
     while (penv) {
         penv->stop = 0;