Patchwork Re: [PATCH 06/19] virtio: update last_avail_idx when inuse is decreased.

login
register
mail settings
Submitter Yoshiaki Tamura
Date Dec. 24, 2010, 11:22 a.m.
Message ID <AANLkTi=7bS=W+FZihBya-pRXR2asQ6BgSTBPcPewgHBF@mail.gmail.com>
Download mbox | patch
Permalink /patch/76629/
State New
Headers show

Comments

Yoshiaki Tamura - Dec. 24, 2010, 11:22 a.m.
2010/12/24 Michael S. Tsirkin <mst@redhat.com>:
> On Fri, Dec 24, 2010 at 12:18:15PM +0900, Yoshiaki Tamura wrote:
>> virtio save/load is currently sending last_avail_idx, but inuse isn't.
>> This causes inconsistent state when using Kemari which replays
>> outstanding requests on the secondary.  By letting last_avail_idx to
>> be updated after inuse is decreased, it would be possible to replay
>> the outstanding requests.  Noth that live migration shouldn't be
>> affected because it waits until flushing all requests.  Also in
>> conjunction with event-tap, requests inversion should be avoided.
>>
>> Signed-off-by: Yoshiaki Tamura <tamura.yoshiaki@lab.ntt.co.jp>
>
> I think I understood the request inversion. My question now is,
> event-tap transfers inuse events as well, wont the same
> request be repeated twice?
>
>> ---
>>  hw/virtio.c |    8 +++++++-
>>  1 files changed, 7 insertions(+), 1 deletions(-)
>>
>> diff --git a/hw/virtio.c b/hw/virtio.c
>> index 07dbf86..f915c46 100644
>> --- a/hw/virtio.c
>> +++ b/hw/virtio.c
>> @@ -72,7 +72,7 @@ struct VirtQueue
>>      VRing vring;
>>      target_phys_addr_t pa;
>>      uint16_t last_avail_idx;
>> -    int inuse;
>> +    uint16_t inuse;
>>      uint16_t vector;
>>      void (*handle_output)(VirtIODevice *vdev, VirtQueue *vq);
>>      VirtIODevice *vdev;
>> @@ -671,6 +671,7 @@ void virtio_save(VirtIODevice *vdev, QEMUFile *f)
>>          qemu_put_be32(f, vdev->vq[i].vring.num);
>>          qemu_put_be64(f, vdev->vq[i].pa);
>>          qemu_put_be16s(f, &vdev->vq[i].last_avail_idx);
>> +        qemu_put_be16s(f, &vdev->vq[i].inuse);
>>          if (vdev->binding->save_queue)
>>              vdev->binding->save_queue(vdev->binding_opaque, i, f);
>>      }
>> @@ -710,6 +711,11 @@ int virtio_load(VirtIODevice *vdev, QEMUFile *f)
>>          vdev->vq[i].vring.num = qemu_get_be32(f);
>>          vdev->vq[i].pa = qemu_get_be64(f);
>>          qemu_get_be16s(f, &vdev->vq[i].last_avail_idx);
>> +        qemu_get_be16s(f, &vdev->vq[i].inuse);
>> +
>> +        /* revert last_avail_idx if there are outstanding emulation. */
>
> if there are outstanding emulation -> if requests
> are outstanding in event-tap?
>
>> +        vdev->vq[i].last_avail_idx -= vdev->vq[i].inuse;
>> +        vdev->vq[i].inuse = 0;
>>
>
> I don't understand it, if this is all we do we can equivalently
> decrement on the sender side and avoid breaking migration compatibility?

It seems I sent the old patch...  I'm really sorry.  Currently
I'm taking the approach to update last_avai_idx later.
Decreasing looks scary to me if the guest already knows about it.


commit 8ac6ba51cc558b3bfcac7a5814d92f275ee874e9
Author: Yoshiaki Tamura <tamura.yoshiaki@lab.ntt.co.jp>
Date:   Mon May 17 10:36:14 2010 +0900

    virtio: update last_avail_idx when inuse is decreased.

    virtio save/load is currently sending last_avail_idx, but inuse isn't.
    This causes inconsistent state when using Kemari which replays
    outstanding requests on the secondary.  By letting last_avail_idx to
    be updated after inuse is decreased, it would be possible to replay
    the outstanding requests.  Noth that live migration shouldn't be
    affected because it waits until flushing all requests.  Also in
    conjunction with event-tap, requests inversion should be avoided.

    Signed-off-by: Yoshiaki Tamura <tamura.yoshiaki@lab.ntt.co.jp>



>
>>          if (vdev->vq[i].pa) {
>>              uint16_t nheads;
>> --
>> 1.7.1.2
> --
> To unsubscribe from this list: send the line "unsubscribe kvm" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>
Michael S. Tsirkin - Dec. 24, 2010, 12:40 p.m.
On Fri, Dec 24, 2010 at 08:22:00PM +0900, Yoshiaki Tamura wrote:
> 2010/12/24 Michael S. Tsirkin <mst@redhat.com>:
> > On Fri, Dec 24, 2010 at 12:18:15PM +0900, Yoshiaki Tamura wrote:
> >> virtio save/load is currently sending last_avail_idx, but inuse isn't.
> >> This causes inconsistent state when using Kemari which replays
> >> outstanding requests on the secondary.  By letting last_avail_idx to
> >> be updated after inuse is decreased, it would be possible to replay
> >> the outstanding requests.  Noth that live migration shouldn't be
> >> affected because it waits until flushing all requests.  Also in
> >> conjunction with event-tap, requests inversion should be avoided.
> >>
> >> Signed-off-by: Yoshiaki Tamura <tamura.yoshiaki@lab.ntt.co.jp>
> >
> > I think I understood the request inversion. My question now is,
> > event-tap transfers inuse events as well, wont the same
> > request be repeated twice?
> >
> >> ---
> >>  hw/virtio.c |    8 +++++++-
> >>  1 files changed, 7 insertions(+), 1 deletions(-)
> >>
> >> diff --git a/hw/virtio.c b/hw/virtio.c
> >> index 07dbf86..f915c46 100644
> >> --- a/hw/virtio.c
> >> +++ b/hw/virtio.c
> >> @@ -72,7 +72,7 @@ struct VirtQueue
> >>      VRing vring;
> >>      target_phys_addr_t pa;
> >>      uint16_t last_avail_idx;
> >> -    int inuse;
> >> +    uint16_t inuse;
> >>      uint16_t vector;
> >>      void (*handle_output)(VirtIODevice *vdev, VirtQueue *vq);
> >>      VirtIODevice *vdev;
> >> @@ -671,6 +671,7 @@ void virtio_save(VirtIODevice *vdev, QEMUFile *f)
> >>          qemu_put_be32(f, vdev->vq[i].vring.num);
> >>          qemu_put_be64(f, vdev->vq[i].pa);
> >>          qemu_put_be16s(f, &vdev->vq[i].last_avail_idx);
> >> +        qemu_put_be16s(f, &vdev->vq[i].inuse);
> >>          if (vdev->binding->save_queue)
> >>              vdev->binding->save_queue(vdev->binding_opaque, i, f);
> >>      }
> >> @@ -710,6 +711,11 @@ int virtio_load(VirtIODevice *vdev, QEMUFile *f)
> >>          vdev->vq[i].vring.num = qemu_get_be32(f);
> >>          vdev->vq[i].pa = qemu_get_be64(f);
> >>          qemu_get_be16s(f, &vdev->vq[i].last_avail_idx);
> >> +        qemu_get_be16s(f, &vdev->vq[i].inuse);
> >> +
> >> +        /* revert last_avail_idx if there are outstanding emulation. */
> >
> > if there are outstanding emulation -> if requests
> > are outstanding in event-tap?
> >
> >> +        vdev->vq[i].last_avail_idx -= vdev->vq[i].inuse;
> >> +        vdev->vq[i].inuse = 0;
> >>
> >
> > I don't understand it, if this is all we do we can equivalently
> > decrement on the sender side and avoid breaking migration compatibility?
> 
> It seems I sent the old patch...  I'm really sorry.  Currently
> I'm taking the approach to update last_avai_idx later.
> Decreasing looks scary to me if the guest already knows about it.

It seems exactly the same functionally.

> commit 8ac6ba51cc558b3bfcac7a5814d92f275ee874e9
> Author: Yoshiaki Tamura <tamura.yoshiaki@lab.ntt.co.jp>
> Date:   Mon May 17 10:36:14 2010 +0900
> 
>     virtio: update last_avail_idx when inuse is decreased.
> 
>     virtio save/load is currently sending last_avail_idx, but inuse isn't.
>     This causes inconsistent state when using Kemari which replays
>     outstanding requests on the secondary.  By letting last_avail_idx to
>     be updated after inuse is decreased, it would be possible to replay
>     the outstanding requests.  Noth that live migration shouldn't be
>     affected because it waits until flushing all requests.  Also in
>     conjunction with event-tap, requests inversion should be avoided.
> 
>     Signed-off-by: Yoshiaki Tamura <tamura.yoshiaki@lab.ntt.co.jp>
> 
> diff --git a/hw/virtio.c b/hw/virtio.c
> index 07dbf86..b1586da 100644
> --- a/hw/virtio.c
> +++ b/hw/virtio.c
> @@ -198,7 +198,7 @@ int virtio_queue_ready(VirtQueue *vq)
> 
>  int virtio_queue_empty(VirtQueue *vq)
>  {
> -    return vring_avail_idx(vq) == vq->last_avail_idx;
> +    return vring_avail_idx(vq) == vq->last_avail_idx + vq->inuse;
>  }
> 
>  void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem,
> @@ -238,6 +238,7 @@ void virtqueue_flush(VirtQueue *vq, unsigned int count)
>      wmb();
>      trace_virtqueue_flush(vq, count);
>      vring_used_idx_increment(vq, count);
> +    vq->last_avail_idx += count;
>      vq->inuse -= count;
>  }
> 
> @@ -306,7 +307,7 @@ int virtqueue_avail_bytes(VirtQueue *vq, int in_bytes, int o
>      unsigned int idx;
>      int total_bufs, in_total, out_total;
> 
> -    idx = vq->last_avail_idx;
> +    idx = vq->last_avail_idx + vq->inuse;
> 
>      total_bufs = in_total = out_total = 0;
>      while (virtqueue_num_heads(vq, idx)) {
> @@ -386,7 +387,7 @@ int virtqueue_pop(VirtQueue *vq, VirtQueueElement *elem)
>      unsigned int i, head, max;
>      target_phys_addr_t desc_pa = vq->vring.desc;
> 
> -    if (!virtqueue_num_heads(vq, vq->last_avail_idx))
> +    if (!virtqueue_num_heads(vq, vq->last_avail_idx + vq->inuse))
>          return 0;
> 
>      /* When we start there are none of either input nor output. */
> @@ -394,7 +395,7 @@ int virtqueue_pop(VirtQueue *vq, VirtQueueElement *elem)
> 
>      max = vq->vring.num;
> 
> -    i = head = virtqueue_get_head(vq, vq->last_avail_idx++);
> +    i = head = virtqueue_get_head(vq, vq->last_avail_idx + vq->inuse);
> 
>      if (vring_desc_flags(desc_pa, i) & VRING_DESC_F_INDIRECT) {
>          if (vring_desc_len(desc_pa, i) % sizeof(VRingDesc)) {
> @@ -626,7 +627,7 @@ void virtio_notify(VirtIODevice *vdev, VirtQueue *vq)
>      /* Always notify when queue is empty (when feature acknowledge) */
>      if ((vring_avail_flags(vq) & VRING_AVAIL_F_NO_INTERRUPT) &&
>          (!(vdev->guest_features & (1 << VIRTIO_F_NOTIFY_ON_EMPTY)) ||
> -         (vq->inuse || vring_avail_idx(vq) != vq->last_avail_idx)))
> +         (vq->inuse || vring_avail_idx(vq) != vq->last_avail_idx + vq->inuse)))
>          return;
> 
>      trace_virtio_notify(vdev, vq);
> 
> 
> >
> >>          if (vdev->vq[i].pa) {
> >>              uint16_t nheads;
> >> --
> >> 1.7.1.2
> > --
> > To unsubscribe from this list: send the line "unsubscribe kvm" in
> > the body of a message to majordomo@vger.kernel.org
> > More majordomo info at  http://vger.kernel.org/majordomo-info.html
> >
Yoshiaki Tamura - Dec. 24, 2010, 1:14 p.m.
2010/12/24 Michael S. Tsirkin <mst@redhat.com>:
> On Fri, Dec 24, 2010 at 08:22:00PM +0900, Yoshiaki Tamura wrote:
>> 2010/12/24 Michael S. Tsirkin <mst@redhat.com>:
>> > On Fri, Dec 24, 2010 at 12:18:15PM +0900, Yoshiaki Tamura wrote:
>> >> virtio save/load is currently sending last_avail_idx, but inuse isn't.
>> >> This causes inconsistent state when using Kemari which replays
>> >> outstanding requests on the secondary.  By letting last_avail_idx to
>> >> be updated after inuse is decreased, it would be possible to replay
>> >> the outstanding requests.  Noth that live migration shouldn't be
>> >> affected because it waits until flushing all requests.  Also in
>> >> conjunction with event-tap, requests inversion should be avoided.
>> >>
>> >> Signed-off-by: Yoshiaki Tamura <tamura.yoshiaki@lab.ntt.co.jp>
>> >
>> > I think I understood the request inversion. My question now is,
>> > event-tap transfers inuse events as well, wont the same
>> > request be repeated twice?
>> >
>> >> ---
>> >>  hw/virtio.c |    8 +++++++-
>> >>  1 files changed, 7 insertions(+), 1 deletions(-)
>> >>
>> >> diff --git a/hw/virtio.c b/hw/virtio.c
>> >> index 07dbf86..f915c46 100644
>> >> --- a/hw/virtio.c
>> >> +++ b/hw/virtio.c
>> >> @@ -72,7 +72,7 @@ struct VirtQueue
>> >>      VRing vring;
>> >>      target_phys_addr_t pa;
>> >>      uint16_t last_avail_idx;
>> >> -    int inuse;
>> >> +    uint16_t inuse;
>> >>      uint16_t vector;
>> >>      void (*handle_output)(VirtIODevice *vdev, VirtQueue *vq);
>> >>      VirtIODevice *vdev;
>> >> @@ -671,6 +671,7 @@ void virtio_save(VirtIODevice *vdev, QEMUFile *f)
>> >>          qemu_put_be32(f, vdev->vq[i].vring.num);
>> >>          qemu_put_be64(f, vdev->vq[i].pa);
>> >>          qemu_put_be16s(f, &vdev->vq[i].last_avail_idx);
>> >> +        qemu_put_be16s(f, &vdev->vq[i].inuse);
>> >>          if (vdev->binding->save_queue)
>> >>              vdev->binding->save_queue(vdev->binding_opaque, i, f);
>> >>      }
>> >> @@ -710,6 +711,11 @@ int virtio_load(VirtIODevice *vdev, QEMUFile *f)
>> >>          vdev->vq[i].vring.num = qemu_get_be32(f);
>> >>          vdev->vq[i].pa = qemu_get_be64(f);
>> >>          qemu_get_be16s(f, &vdev->vq[i].last_avail_idx);
>> >> +        qemu_get_be16s(f, &vdev->vq[i].inuse);
>> >> +
>> >> +        /* revert last_avail_idx if there are outstanding emulation. */
>> >
>> > if there are outstanding emulation -> if requests
>> > are outstanding in event-tap?
>> >
>> >> +        vdev->vq[i].last_avail_idx -= vdev->vq[i].inuse;
>> >> +        vdev->vq[i].inuse = 0;
>> >>
>> >
>> > I don't understand it, if this is all we do we can equivalently
>> > decrement on the sender side and avoid breaking migration compatibility?
>>
>> It seems I sent the old patch...  I'm really sorry.  Currently
>> I'm taking the approach to update last_avai_idx later.
>> Decreasing looks scary to me if the guest already knows about it.
>
> It seems exactly the same functionally.

If it is the same I'm fine to go with the decreasing approach.
Is it fine for the guest?  Is last_avai_idx irrelevant to the
guest's behavior?

Yoshi

>> commit 8ac6ba51cc558b3bfcac7a5814d92f275ee874e9
>> Author: Yoshiaki Tamura <tamura.yoshiaki@lab.ntt.co.jp>
>> Date:   Mon May 17 10:36:14 2010 +0900
>>
>>     virtio: update last_avail_idx when inuse is decreased.
>>
>>     virtio save/load is currently sending last_avail_idx, but inuse isn't.
>>     This causes inconsistent state when using Kemari which replays
>>     outstanding requests on the secondary.  By letting last_avail_idx to
>>     be updated after inuse is decreased, it would be possible to replay
>>     the outstanding requests.  Noth that live migration shouldn't be
>>     affected because it waits until flushing all requests.  Also in
>>     conjunction with event-tap, requests inversion should be avoided.
>>
>>     Signed-off-by: Yoshiaki Tamura <tamura.yoshiaki@lab.ntt.co.jp>
>>
>> diff --git a/hw/virtio.c b/hw/virtio.c
>> index 07dbf86..b1586da 100644
>> --- a/hw/virtio.c
>> +++ b/hw/virtio.c
>> @@ -198,7 +198,7 @@ int virtio_queue_ready(VirtQueue *vq)
>>
>>  int virtio_queue_empty(VirtQueue *vq)
>>  {
>> -    return vring_avail_idx(vq) == vq->last_avail_idx;
>> +    return vring_avail_idx(vq) == vq->last_avail_idx + vq->inuse;
>>  }
>>
>>  void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem,
>> @@ -238,6 +238,7 @@ void virtqueue_flush(VirtQueue *vq, unsigned int count)
>>      wmb();
>>      trace_virtqueue_flush(vq, count);
>>      vring_used_idx_increment(vq, count);
>> +    vq->last_avail_idx += count;
>>      vq->inuse -= count;
>>  }
>>
>> @@ -306,7 +307,7 @@ int virtqueue_avail_bytes(VirtQueue *vq, int in_bytes, int o
>>      unsigned int idx;
>>      int total_bufs, in_total, out_total;
>>
>> -    idx = vq->last_avail_idx;
>> +    idx = vq->last_avail_idx + vq->inuse;
>>
>>      total_bufs = in_total = out_total = 0;
>>      while (virtqueue_num_heads(vq, idx)) {
>> @@ -386,7 +387,7 @@ int virtqueue_pop(VirtQueue *vq, VirtQueueElement *elem)
>>      unsigned int i, head, max;
>>      target_phys_addr_t desc_pa = vq->vring.desc;
>>
>> -    if (!virtqueue_num_heads(vq, vq->last_avail_idx))
>> +    if (!virtqueue_num_heads(vq, vq->last_avail_idx + vq->inuse))
>>          return 0;
>>
>>      /* When we start there are none of either input nor output. */
>> @@ -394,7 +395,7 @@ int virtqueue_pop(VirtQueue *vq, VirtQueueElement *elem)
>>
>>      max = vq->vring.num;
>>
>> -    i = head = virtqueue_get_head(vq, vq->last_avail_idx++);
>> +    i = head = virtqueue_get_head(vq, vq->last_avail_idx + vq->inuse);
>>
>>      if (vring_desc_flags(desc_pa, i) & VRING_DESC_F_INDIRECT) {
>>          if (vring_desc_len(desc_pa, i) % sizeof(VRingDesc)) {
>> @@ -626,7 +627,7 @@ void virtio_notify(VirtIODevice *vdev, VirtQueue *vq)
>>      /* Always notify when queue is empty (when feature acknowledge) */
>>      if ((vring_avail_flags(vq) & VRING_AVAIL_F_NO_INTERRUPT) &&
>>          (!(vdev->guest_features & (1 << VIRTIO_F_NOTIFY_ON_EMPTY)) ||
>> -         (vq->inuse || vring_avail_idx(vq) != vq->last_avail_idx)))
>> +         (vq->inuse || vring_avail_idx(vq) != vq->last_avail_idx + vq->inuse)))
>>          return;
>>
>>      trace_virtio_notify(vdev, vq);
>>
>>
>> >
>> >>          if (vdev->vq[i].pa) {
>> >>              uint16_t nheads;
>> >> --
>> >> 1.7.1.2
>> > --
>> > To unsubscribe from this list: send the line "unsubscribe kvm" in
>> > the body of a message to majordomo@vger.kernel.org
>> > More majordomo info at  http://vger.kernel.org/majordomo-info.html
>> >
> --
> To unsubscribe from this list: send the line "unsubscribe kvm" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>
Michael S. Tsirkin - Dec. 24, 2010, 1:23 p.m.
On Fri, Dec 24, 2010 at 10:14:50PM +0900, Yoshiaki Tamura wrote:
> 2010/12/24 Michael S. Tsirkin <mst@redhat.com>:
> > On Fri, Dec 24, 2010 at 08:22:00PM +0900, Yoshiaki Tamura wrote:
> >> 2010/12/24 Michael S. Tsirkin <mst@redhat.com>:
> >> > On Fri, Dec 24, 2010 at 12:18:15PM +0900, Yoshiaki Tamura wrote:
> >> >> virtio save/load is currently sending last_avail_idx, but inuse isn't.
> >> >> This causes inconsistent state when using Kemari which replays
> >> >> outstanding requests on the secondary.  By letting last_avail_idx to
> >> >> be updated after inuse is decreased, it would be possible to replay
> >> >> the outstanding requests.  Noth that live migration shouldn't be
> >> >> affected because it waits until flushing all requests.  Also in
> >> >> conjunction with event-tap, requests inversion should be avoided.
> >> >>
> >> >> Signed-off-by: Yoshiaki Tamura <tamura.yoshiaki@lab.ntt.co.jp>
> >> >
> >> > I think I understood the request inversion. My question now is,
> >> > event-tap transfers inuse events as well, wont the same
> >> > request be repeated twice?
> >> >
> >> >> ---
> >> >>  hw/virtio.c |    8 +++++++-
> >> >>  1 files changed, 7 insertions(+), 1 deletions(-)
> >> >>
> >> >> diff --git a/hw/virtio.c b/hw/virtio.c
> >> >> index 07dbf86..f915c46 100644
> >> >> --- a/hw/virtio.c
> >> >> +++ b/hw/virtio.c
> >> >> @@ -72,7 +72,7 @@ struct VirtQueue
> >> >>      VRing vring;
> >> >>      target_phys_addr_t pa;
> >> >>      uint16_t last_avail_idx;
> >> >> -    int inuse;
> >> >> +    uint16_t inuse;
> >> >>      uint16_t vector;
> >> >>      void (*handle_output)(VirtIODevice *vdev, VirtQueue *vq);
> >> >>      VirtIODevice *vdev;
> >> >> @@ -671,6 +671,7 @@ void virtio_save(VirtIODevice *vdev, QEMUFile *f)
> >> >>          qemu_put_be32(f, vdev->vq[i].vring.num);
> >> >>          qemu_put_be64(f, vdev->vq[i].pa);
> >> >>          qemu_put_be16s(f, &vdev->vq[i].last_avail_idx);
> >> >> +        qemu_put_be16s(f, &vdev->vq[i].inuse);
> >> >>          if (vdev->binding->save_queue)
> >> >>              vdev->binding->save_queue(vdev->binding_opaque, i, f);
> >> >>      }
> >> >> @@ -710,6 +711,11 @@ int virtio_load(VirtIODevice *vdev, QEMUFile *f)
> >> >>          vdev->vq[i].vring.num = qemu_get_be32(f);
> >> >>          vdev->vq[i].pa = qemu_get_be64(f);
> >> >>          qemu_get_be16s(f, &vdev->vq[i].last_avail_idx);
> >> >> +        qemu_get_be16s(f, &vdev->vq[i].inuse);
> >> >> +
> >> >> +        /* revert last_avail_idx if there are outstanding emulation. */
> >> >
> >> > if there are outstanding emulation -> if requests
> >> > are outstanding in event-tap?
> >> >
> >> >> +        vdev->vq[i].last_avail_idx -= vdev->vq[i].inuse;
> >> >> +        vdev->vq[i].inuse = 0;
> >> >>
> >> >
> >> > I don't understand it, if this is all we do we can equivalently
> >> > decrement on the sender side and avoid breaking migration compatibility?
> >>
> >> It seems I sent the old patch...  I'm really sorry.  Currently
> >> I'm taking the approach to update last_avai_idx later.
> >> Decreasing looks scary to me if the guest already knows about it.
> >
> > It seems exactly the same functionally.
> 
> If it is the same I'm fine to go with the decreasing approach.
> Is it fine for the guest?  Is last_avai_idx irrelevant to the
> guest's behavior?
> 
> Yoshi

At least at the moment, yes.

> >> commit 8ac6ba51cc558b3bfcac7a5814d92f275ee874e9
> >> Author: Yoshiaki Tamura <tamura.yoshiaki@lab.ntt.co.jp>
> >> Date:   Mon May 17 10:36:14 2010 +0900
> >>
> >>     virtio: update last_avail_idx when inuse is decreased.
> >>
> >>     virtio save/load is currently sending last_avail_idx, but inuse isn't.
> >>     This causes inconsistent state when using Kemari which replays
> >>     outstanding requests on the secondary.  By letting last_avail_idx to
> >>     be updated after inuse is decreased, it would be possible to replay
> >>     the outstanding requests.  Noth that live migration shouldn't be
> >>     affected because it waits until flushing all requests.  Also in
> >>     conjunction with event-tap, requests inversion should be avoided.
> >>
> >>     Signed-off-by: Yoshiaki Tamura <tamura.yoshiaki@lab.ntt.co.jp>
> >>
> >> diff --git a/hw/virtio.c b/hw/virtio.c
> >> index 07dbf86..b1586da 100644
> >> --- a/hw/virtio.c
> >> +++ b/hw/virtio.c
> >> @@ -198,7 +198,7 @@ int virtio_queue_ready(VirtQueue *vq)
> >>
> >>  int virtio_queue_empty(VirtQueue *vq)
> >>  {
> >> -    return vring_avail_idx(vq) == vq->last_avail_idx;
> >> +    return vring_avail_idx(vq) == vq->last_avail_idx + vq->inuse;
> >>  }
> >>
> >>  void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem,
> >> @@ -238,6 +238,7 @@ void virtqueue_flush(VirtQueue *vq, unsigned int count)
> >>      wmb();
> >>      trace_virtqueue_flush(vq, count);
> >>      vring_used_idx_increment(vq, count);
> >> +    vq->last_avail_idx += count;
> >>      vq->inuse -= count;
> >>  }
> >>
> >> @@ -306,7 +307,7 @@ int virtqueue_avail_bytes(VirtQueue *vq, int in_bytes, int o
> >>      unsigned int idx;
> >>      int total_bufs, in_total, out_total;
> >>
> >> -    idx = vq->last_avail_idx;
> >> +    idx = vq->last_avail_idx + vq->inuse;
> >>
> >>      total_bufs = in_total = out_total = 0;
> >>      while (virtqueue_num_heads(vq, idx)) {
> >> @@ -386,7 +387,7 @@ int virtqueue_pop(VirtQueue *vq, VirtQueueElement *elem)
> >>      unsigned int i, head, max;
> >>      target_phys_addr_t desc_pa = vq->vring.desc;
> >>
> >> -    if (!virtqueue_num_heads(vq, vq->last_avail_idx))
> >> +    if (!virtqueue_num_heads(vq, vq->last_avail_idx + vq->inuse))
> >>          return 0;
> >>
> >>      /* When we start there are none of either input nor output. */
> >> @@ -394,7 +395,7 @@ int virtqueue_pop(VirtQueue *vq, VirtQueueElement *elem)
> >>
> >>      max = vq->vring.num;
> >>
> >> -    i = head = virtqueue_get_head(vq, vq->last_avail_idx++);
> >> +    i = head = virtqueue_get_head(vq, vq->last_avail_idx + vq->inuse);
> >>
> >>      if (vring_desc_flags(desc_pa, i) & VRING_DESC_F_INDIRECT) {
> >>          if (vring_desc_len(desc_pa, i) % sizeof(VRingDesc)) {
> >> @@ -626,7 +627,7 @@ void virtio_notify(VirtIODevice *vdev, VirtQueue *vq)
> >>      /* Always notify when queue is empty (when feature acknowledge) */
> >>      if ((vring_avail_flags(vq) & VRING_AVAIL_F_NO_INTERRUPT) &&
> >>          (!(vdev->guest_features & (1 << VIRTIO_F_NOTIFY_ON_EMPTY)) ||
> >> -         (vq->inuse || vring_avail_idx(vq) != vq->last_avail_idx)))
> >> +         (vq->inuse || vring_avail_idx(vq) != vq->last_avail_idx + vq->inuse)))
> >>          return;
> >>
> >>      trace_virtio_notify(vdev, vq);
> >>
> >>
> >> >
> >> >>          if (vdev->vq[i].pa) {
> >> >>              uint16_t nheads;
> >> >> --
> >> >> 1.7.1.2
> >> > --
> >> > To unsubscribe from this list: send the line "unsubscribe kvm" in
> >> > the body of a message to majordomo@vger.kernel.org
> >> > More majordomo info at  http://vger.kernel.org/majordomo-info.html
> >> >
> > --
> > To unsubscribe from this list: send the line "unsubscribe kvm" in
> > the body of a message to majordomo@vger.kernel.org
> > More majordomo info at  http://vger.kernel.org/majordomo-info.html
> >
Yoshiaki Tamura - Dec. 24, 2010, 1:31 p.m.
2010/12/24 Michael S. Tsirkin <mst@redhat.com>:
> On Fri, Dec 24, 2010 at 10:14:50PM +0900, Yoshiaki Tamura wrote:
>> 2010/12/24 Michael S. Tsirkin <mst@redhat.com>:
>> > On Fri, Dec 24, 2010 at 08:22:00PM +0900, Yoshiaki Tamura wrote:
>> >> 2010/12/24 Michael S. Tsirkin <mst@redhat.com>:
>> >> > On Fri, Dec 24, 2010 at 12:18:15PM +0900, Yoshiaki Tamura wrote:
>> >> >> virtio save/load is currently sending last_avail_idx, but inuse isn't.
>> >> >> This causes inconsistent state when using Kemari which replays
>> >> >> outstanding requests on the secondary.  By letting last_avail_idx to
>> >> >> be updated after inuse is decreased, it would be possible to replay
>> >> >> the outstanding requests.  Noth that live migration shouldn't be
>> >> >> affected because it waits until flushing all requests.  Also in
>> >> >> conjunction with event-tap, requests inversion should be avoided.
>> >> >>
>> >> >> Signed-off-by: Yoshiaki Tamura <tamura.yoshiaki@lab.ntt.co.jp>
>> >> >
>> >> > I think I understood the request inversion. My question now is,
>> >> > event-tap transfers inuse events as well, wont the same
>> >> > request be repeated twice?
>> >> >
>> >> >> ---
>> >> >>  hw/virtio.c |    8 +++++++-
>> >> >>  1 files changed, 7 insertions(+), 1 deletions(-)
>> >> >>
>> >> >> diff --git a/hw/virtio.c b/hw/virtio.c
>> >> >> index 07dbf86..f915c46 100644
>> >> >> --- a/hw/virtio.c
>> >> >> +++ b/hw/virtio.c
>> >> >> @@ -72,7 +72,7 @@ struct VirtQueue
>> >> >>      VRing vring;
>> >> >>      target_phys_addr_t pa;
>> >> >>      uint16_t last_avail_idx;
>> >> >> -    int inuse;
>> >> >> +    uint16_t inuse;
>> >> >>      uint16_t vector;
>> >> >>      void (*handle_output)(VirtIODevice *vdev, VirtQueue *vq);
>> >> >>      VirtIODevice *vdev;
>> >> >> @@ -671,6 +671,7 @@ void virtio_save(VirtIODevice *vdev, QEMUFile *f)
>> >> >>          qemu_put_be32(f, vdev->vq[i].vring.num);
>> >> >>          qemu_put_be64(f, vdev->vq[i].pa);
>> >> >>          qemu_put_be16s(f, &vdev->vq[i].last_avail_idx);
>> >> >> +        qemu_put_be16s(f, &vdev->vq[i].inuse);
>> >> >>          if (vdev->binding->save_queue)
>> >> >>              vdev->binding->save_queue(vdev->binding_opaque, i, f);
>> >> >>      }
>> >> >> @@ -710,6 +711,11 @@ int virtio_load(VirtIODevice *vdev, QEMUFile *f)
>> >> >>          vdev->vq[i].vring.num = qemu_get_be32(f);
>> >> >>          vdev->vq[i].pa = qemu_get_be64(f);
>> >> >>          qemu_get_be16s(f, &vdev->vq[i].last_avail_idx);
>> >> >> +        qemu_get_be16s(f, &vdev->vq[i].inuse);
>> >> >> +
>> >> >> +        /* revert last_avail_idx if there are outstanding emulation. */
>> >> >
>> >> > if there are outstanding emulation -> if requests
>> >> > are outstanding in event-tap?
>> >> >
>> >> >> +        vdev->vq[i].last_avail_idx -= vdev->vq[i].inuse;
>> >> >> +        vdev->vq[i].inuse = 0;
>> >> >>
>> >> >
>> >> > I don't understand it, if this is all we do we can equivalently
>> >> > decrement on the sender side and avoid breaking migration compatibility?
>> >>
>> >> It seems I sent the old patch...  I'm really sorry.  Currently
>> >> I'm taking the approach to update last_avai_idx later.
>> >> Decreasing looks scary to me if the guest already knows about it.
>> >
>> > It seems exactly the same functionally.
>>
>> If it is the same I'm fine to go with the decreasing approach.
>> Is it fine for the guest?  Is last_avai_idx irrelevant to the
>> guest's behavior?
>>
>> Yoshi
>
> At least at the moment, yes.

OK.  I'll put it in the next spin.  Thanks for your advices!

Yoshi

>
>> >> commit 8ac6ba51cc558b3bfcac7a5814d92f275ee874e9
>> >> Author: Yoshiaki Tamura <tamura.yoshiaki@lab.ntt.co.jp>
>> >> Date:   Mon May 17 10:36:14 2010 +0900
>> >>
>> >>     virtio: update last_avail_idx when inuse is decreased.
>> >>
>> >>     virtio save/load is currently sending last_avail_idx, but inuse isn't.
>> >>     This causes inconsistent state when using Kemari which replays
>> >>     outstanding requests on the secondary.  By letting last_avail_idx to
>> >>     be updated after inuse is decreased, it would be possible to replay
>> >>     the outstanding requests.  Noth that live migration shouldn't be
>> >>     affected because it waits until flushing all requests.  Also in
>> >>     conjunction with event-tap, requests inversion should be avoided.
>> >>
>> >>     Signed-off-by: Yoshiaki Tamura <tamura.yoshiaki@lab.ntt.co.jp>
>> >>
>> >> diff --git a/hw/virtio.c b/hw/virtio.c
>> >> index 07dbf86..b1586da 100644
>> >> --- a/hw/virtio.c
>> >> +++ b/hw/virtio.c
>> >> @@ -198,7 +198,7 @@ int virtio_queue_ready(VirtQueue *vq)
>> >>
>> >>  int virtio_queue_empty(VirtQueue *vq)
>> >>  {
>> >> -    return vring_avail_idx(vq) == vq->last_avail_idx;
>> >> +    return vring_avail_idx(vq) == vq->last_avail_idx + vq->inuse;
>> >>  }
>> >>
>> >>  void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem,
>> >> @@ -238,6 +238,7 @@ void virtqueue_flush(VirtQueue *vq, unsigned int count)
>> >>      wmb();
>> >>      trace_virtqueue_flush(vq, count);
>> >>      vring_used_idx_increment(vq, count);
>> >> +    vq->last_avail_idx += count;
>> >>      vq->inuse -= count;
>> >>  }
>> >>
>> >> @@ -306,7 +307,7 @@ int virtqueue_avail_bytes(VirtQueue *vq, int in_bytes, int o
>> >>      unsigned int idx;
>> >>      int total_bufs, in_total, out_total;
>> >>
>> >> -    idx = vq->last_avail_idx;
>> >> +    idx = vq->last_avail_idx + vq->inuse;
>> >>
>> >>      total_bufs = in_total = out_total = 0;
>> >>      while (virtqueue_num_heads(vq, idx)) {
>> >> @@ -386,7 +387,7 @@ int virtqueue_pop(VirtQueue *vq, VirtQueueElement *elem)
>> >>      unsigned int i, head, max;
>> >>      target_phys_addr_t desc_pa = vq->vring.desc;
>> >>
>> >> -    if (!virtqueue_num_heads(vq, vq->last_avail_idx))
>> >> +    if (!virtqueue_num_heads(vq, vq->last_avail_idx + vq->inuse))
>> >>          return 0;
>> >>
>> >>      /* When we start there are none of either input nor output. */
>> >> @@ -394,7 +395,7 @@ int virtqueue_pop(VirtQueue *vq, VirtQueueElement *elem)
>> >>
>> >>      max = vq->vring.num;
>> >>
>> >> -    i = head = virtqueue_get_head(vq, vq->last_avail_idx++);
>> >> +    i = head = virtqueue_get_head(vq, vq->last_avail_idx + vq->inuse);
>> >>
>> >>      if (vring_desc_flags(desc_pa, i) & VRING_DESC_F_INDIRECT) {
>> >>          if (vring_desc_len(desc_pa, i) % sizeof(VRingDesc)) {
>> >> @@ -626,7 +627,7 @@ void virtio_notify(VirtIODevice *vdev, VirtQueue *vq)
>> >>      /* Always notify when queue is empty (when feature acknowledge) */
>> >>      if ((vring_avail_flags(vq) & VRING_AVAIL_F_NO_INTERRUPT) &&
>> >>          (!(vdev->guest_features & (1 << VIRTIO_F_NOTIFY_ON_EMPTY)) ||
>> >> -         (vq->inuse || vring_avail_idx(vq) != vq->last_avail_idx)))
>> >> +         (vq->inuse || vring_avail_idx(vq) != vq->last_avail_idx + vq->inuse)))
>> >>          return;
>> >>
>> >>      trace_virtio_notify(vdev, vq);
>> >>
>> >>
>> >> >
>> >> >>          if (vdev->vq[i].pa) {
>> >> >>              uint16_t nheads;
>> >> >> --
>> >> >> 1.7.1.2
>> >> > --
>> >> > To unsubscribe from this list: send the line "unsubscribe kvm" in
>> >> > the body of a message to majordomo@vger.kernel.org
>> >> > More majordomo info at  http://vger.kernel.org/majordomo-info.html
>> >> >
>> > --
>> > To unsubscribe from this list: send the line "unsubscribe kvm" in
>> > the body of a message to majordomo@vger.kernel.org
>> > More majordomo info at  http://vger.kernel.org/majordomo-info.html
>> >
>
>

Patch

diff --git a/hw/virtio.c b/hw/virtio.c
index 07dbf86..b1586da 100644
--- a/hw/virtio.c
+++ b/hw/virtio.c
@@ -198,7 +198,7 @@  int virtio_queue_ready(VirtQueue *vq)

 int virtio_queue_empty(VirtQueue *vq)
 {
-    return vring_avail_idx(vq) == vq->last_avail_idx;
+    return vring_avail_idx(vq) == vq->last_avail_idx + vq->inuse;
 }

 void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem,
@@ -238,6 +238,7 @@  void virtqueue_flush(VirtQueue *vq, unsigned int count)
     wmb();
     trace_virtqueue_flush(vq, count);
     vring_used_idx_increment(vq, count);
+    vq->last_avail_idx += count;
     vq->inuse -= count;
 }

@@ -306,7 +307,7 @@  int virtqueue_avail_bytes(VirtQueue *vq, int in_bytes, int o
     unsigned int idx;
     int total_bufs, in_total, out_total;

-    idx = vq->last_avail_idx;
+    idx = vq->last_avail_idx + vq->inuse;

     total_bufs = in_total = out_total = 0;
     while (virtqueue_num_heads(vq, idx)) {
@@ -386,7 +387,7 @@  int virtqueue_pop(VirtQueue *vq, VirtQueueElement *elem)
     unsigned int i, head, max;
     target_phys_addr_t desc_pa = vq->vring.desc;

-    if (!virtqueue_num_heads(vq, vq->last_avail_idx))
+    if (!virtqueue_num_heads(vq, vq->last_avail_idx + vq->inuse))
         return 0;

     /* When we start there are none of either input nor output. */
@@ -394,7 +395,7 @@  int virtqueue_pop(VirtQueue *vq, VirtQueueElement *elem)

     max = vq->vring.num;

-    i = head = virtqueue_get_head(vq, vq->last_avail_idx++);
+    i = head = virtqueue_get_head(vq, vq->last_avail_idx + vq->inuse);

     if (vring_desc_flags(desc_pa, i) & VRING_DESC_F_INDIRECT) {
         if (vring_desc_len(desc_pa, i) % sizeof(VRingDesc)) {
@@ -626,7 +627,7 @@  void virtio_notify(VirtIODevice *vdev, VirtQueue *vq)
     /* Always notify when queue is empty (when feature acknowledge) */
     if ((vring_avail_flags(vq) & VRING_AVAIL_F_NO_INTERRUPT) &&
         (!(vdev->guest_features & (1 << VIRTIO_F_NOTIFY_ON_EMPTY)) ||
-         (vq->inuse || vring_avail_idx(vq) != vq->last_avail_idx)))
+         (vq->inuse || vring_avail_idx(vq) != vq->last_avail_idx + vq->inuse)))
         return;

     trace_virtio_notify(vdev, vq);