diff mbox

[v2,for-2.3] virtio-blk: correctly dirty guest memory

Message ID 1427997044-392-1-git-send-email-pbonzini@redhat.com
State New
Headers show

Commit Message

Paolo Bonzini April 2, 2015, 5:50 p.m. UTC
After qemu_iovec_destroy, the QEMUIOVector's size is zeroed and
the zero size ultimately is used to compute virtqueue_push's len
argument.  Therefore, reads from virtio-blk devices did not
migrate their results correctly.  (Writes were okay).

Save the size in virtio_blk_handle_request, and use it when the request
is completed.

Based on a patch by Wen Congyang.

Signed-off-by: Wen Congyang <wency@cn.fujitsu.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 hw/block/dataplane/virtio-blk.c |  3 +--
 hw/block/virtio-blk.c           | 13 ++++++++++++-
 include/hw/virtio/virtio-blk.h  |  1 +
 3 files changed, 14 insertions(+), 3 deletions(-)

Comments

Michael S. Tsirkin April 2, 2015, 5:54 p.m. UTC | #1
On Thu, Apr 02, 2015 at 07:50:44PM +0200, Paolo Bonzini wrote:
> After qemu_iovec_destroy, the QEMUIOVector's size is zeroed and
> the zero size ultimately is used to compute virtqueue_push's len
> argument.  Therefore, reads from virtio-blk devices did not
> migrate their results correctly.  (Writes were okay).
> 
> Save the size in virtio_blk_handle_request, and use it when the request
> is completed.
> 
> Based on a patch by Wen Congyang.
> 
> Signed-off-by: Wen Congyang <wency@cn.fujitsu.com>
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>

If you are touching this code anyway, maybe it makes
sense to merge Rusty's virtio len patches?
I didn't want them in 2.3 since they aren't ciritical,
but we are changing these lines anyway, maybe make
them correct?

> ---
>  hw/block/dataplane/virtio-blk.c |  3 +--
>  hw/block/virtio-blk.c           | 13 ++++++++++++-
>  include/hw/virtio/virtio-blk.h  |  1 +
>  3 files changed, 14 insertions(+), 3 deletions(-)
> 
> diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c
> index cd41478..3db139b 100644
> --- a/hw/block/dataplane/virtio-blk.c
> +++ b/hw/block/dataplane/virtio-blk.c
> @@ -77,8 +77,7 @@ static void complete_request_vring(VirtIOBlockReq *req, unsigned char status)
>      VirtIOBlockDataPlane *s = req->dev->dataplane;
>      stb_p(&req->in->status, status);
>  
> -    vring_push(s->vdev, &req->dev->dataplane->vring, &req->elem,
> -               req->qiov.size + sizeof(*req->in));
> +    vring_push(s->vdev, &req->dev->dataplane->vring, &req->elem, req->in_len);
>  
>      /* Suppress notification to guest by BH and its scheduled
>       * flag because requests are completed as a batch after io
> diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c
> index 000c38d..9546fd2 100644
> --- a/hw/block/virtio-blk.c
> +++ b/hw/block/virtio-blk.c
> @@ -33,6 +33,7 @@ VirtIOBlockReq *virtio_blk_alloc_request(VirtIOBlock *s)
>      VirtIOBlockReq *req = g_slice_new(VirtIOBlockReq);
>      req->dev = s;
>      req->qiov.size = 0;
> +    req->in_len = 0;
>      req->next = NULL;
>      req->mr_next = NULL;
>      return req;
> @@ -54,7 +55,7 @@ static void virtio_blk_complete_request(VirtIOBlockReq *req,
>      trace_virtio_blk_req_complete(req, status);
>  
>      stb_p(&req->in->status, status);
> -    virtqueue_push(s->vq, &req->elem, req->qiov.size + sizeof(*req->in));
> +    virtqueue_push(s->vq, &req->elem, req->in_len);
>      virtio_notify(vdev, s->vq);
>  }
>  
> @@ -102,6 +103,14 @@ static void virtio_blk_rw_complete(void *opaque, int ret)
>          if (ret) {
>              int p = virtio_ldl_p(VIRTIO_DEVICE(req->dev), &req->out.type);
>              bool is_read = !(p & VIRTIO_BLK_T_OUT);
> +            /* Note that memory may be dirtied on read failure.  If the
> +             * virtio request is not completed here, as is the case for
> +             * BLOCK_ERROR_ACTION_STOP, the memory may not be copied
> +             * correctly during live migration.  While this is ugly,
> +             * it is acceptable because the device is free to write to
> +             * the memory until the request is completed (which will
> +             * happen on the other side of the migration).
> +             */
>              if (virtio_blk_handle_rw_error(req, -ret, is_read)) {
>                  continue;
>              }
> @@ -496,6 +505,8 @@ void virtio_blk_handle_request(VirtIOBlockReq *req, MultiReqBuffer *mrb)
>          exit(1);
>      }
>  
> +    /* We always touch the last byte, so just see how big in_iov is.  */
> +    req->in_len = iov_size(in_iov, in_num);
>      req->in = (void *)in_iov[in_num - 1].iov_base
>                + in_iov[in_num - 1].iov_len
>                - sizeof(struct virtio_blk_inhdr);
> diff --git a/include/hw/virtio/virtio-blk.h b/include/hw/virtio/virtio-blk.h
> index b3ffcd9..6bf5905 100644
> --- a/include/hw/virtio/virtio-blk.h
> +++ b/include/hw/virtio/virtio-blk.h
> @@ -67,6 +67,7 @@ typedef struct VirtIOBlockReq {
>      struct virtio_blk_inhdr *in;
>      struct virtio_blk_outhdr out;
>      QEMUIOVector qiov;
> +    size_t in_len;
>      struct VirtIOBlockReq *next;
>      struct VirtIOBlockReq *mr_next;
>      BlockAcctCookie acct;
> -- 
> 2.3.4
Paolo Bonzini April 2, 2015, 5:57 p.m. UTC | #2
On 02/04/2015 19:54, Michael S. Tsirkin wrote:
> On Thu, Apr 02, 2015 at 07:50:44PM +0200, Paolo Bonzini wrote:
>> After qemu_iovec_destroy, the QEMUIOVector's size is zeroed and
>> the zero size ultimately is used to compute virtqueue_push's len
>> argument.  Therefore, reads from virtio-blk devices did not
>> migrate their results correctly.  (Writes were okay).
>>
>> Save the size in virtio_blk_handle_request, and use it when the request
>> is completed.
>>
>> Based on a patch by Wen Congyang.
>>
>> Signed-off-by: Wen Congyang <wency@cn.fujitsu.com>
>> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> 
> If you are touching this code anyway, maybe it makes
> sense to merge Rusty's virtio len patches?
> I didn't want them in 2.3 since they aren't ciritical,
> but we are changing these lines anyway, maybe make
> them correct?

My patch is a strict superset of Rusty's patch 2/2.  In fact the first
version was very similar to his, but neither my v1 nor his patch covers
SCSI or flush or get-serial requests.

Rusty's patch 1/2 only adds an assertion, I think.  It's not critical
for 2.3.

Paolo

>> ---
>>  hw/block/dataplane/virtio-blk.c |  3 +--
>>  hw/block/virtio-blk.c           | 13 ++++++++++++-
>>  include/hw/virtio/virtio-blk.h  |  1 +
>>  3 files changed, 14 insertions(+), 3 deletions(-)
>>
>> diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c
>> index cd41478..3db139b 100644
>> --- a/hw/block/dataplane/virtio-blk.c
>> +++ b/hw/block/dataplane/virtio-blk.c
>> @@ -77,8 +77,7 @@ static void complete_request_vring(VirtIOBlockReq *req, unsigned char status)
>>      VirtIOBlockDataPlane *s = req->dev->dataplane;
>>      stb_p(&req->in->status, status);
>>  
>> -    vring_push(s->vdev, &req->dev->dataplane->vring, &req->elem,
>> -               req->qiov.size + sizeof(*req->in));
>> +    vring_push(s->vdev, &req->dev->dataplane->vring, &req->elem, req->in_len);
>>  
>>      /* Suppress notification to guest by BH and its scheduled
>>       * flag because requests are completed as a batch after io
>> diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c
>> index 000c38d..9546fd2 100644
>> --- a/hw/block/virtio-blk.c
>> +++ b/hw/block/virtio-blk.c
>> @@ -33,6 +33,7 @@ VirtIOBlockReq *virtio_blk_alloc_request(VirtIOBlock *s)
>>      VirtIOBlockReq *req = g_slice_new(VirtIOBlockReq);
>>      req->dev = s;
>>      req->qiov.size = 0;
>> +    req->in_len = 0;
>>      req->next = NULL;
>>      req->mr_next = NULL;
>>      return req;
>> @@ -54,7 +55,7 @@ static void virtio_blk_complete_request(VirtIOBlockReq *req,
>>      trace_virtio_blk_req_complete(req, status);
>>  
>>      stb_p(&req->in->status, status);
>> -    virtqueue_push(s->vq, &req->elem, req->qiov.size + sizeof(*req->in));
>> +    virtqueue_push(s->vq, &req->elem, req->in_len);
>>      virtio_notify(vdev, s->vq);
>>  }
>>  
>> @@ -102,6 +103,14 @@ static void virtio_blk_rw_complete(void *opaque, int ret)
>>          if (ret) {
>>              int p = virtio_ldl_p(VIRTIO_DEVICE(req->dev), &req->out.type);
>>              bool is_read = !(p & VIRTIO_BLK_T_OUT);
>> +            /* Note that memory may be dirtied on read failure.  If the
>> +             * virtio request is not completed here, as is the case for
>> +             * BLOCK_ERROR_ACTION_STOP, the memory may not be copied
>> +             * correctly during live migration.  While this is ugly,
>> +             * it is acceptable because the device is free to write to
>> +             * the memory until the request is completed (which will
>> +             * happen on the other side of the migration).
>> +             */
>>              if (virtio_blk_handle_rw_error(req, -ret, is_read)) {
>>                  continue;
>>              }
>> @@ -496,6 +505,8 @@ void virtio_blk_handle_request(VirtIOBlockReq *req, MultiReqBuffer *mrb)
>>          exit(1);
>>      }
>>  
>> +    /* We always touch the last byte, so just see how big in_iov is.  */
>> +    req->in_len = iov_size(in_iov, in_num);
>>      req->in = (void *)in_iov[in_num - 1].iov_base
>>                + in_iov[in_num - 1].iov_len
>>                - sizeof(struct virtio_blk_inhdr);
>> diff --git a/include/hw/virtio/virtio-blk.h b/include/hw/virtio/virtio-blk.h
>> index b3ffcd9..6bf5905 100644
>> --- a/include/hw/virtio/virtio-blk.h
>> +++ b/include/hw/virtio/virtio-blk.h
>> @@ -67,6 +67,7 @@ typedef struct VirtIOBlockReq {
>>      struct virtio_blk_inhdr *in;
>>      struct virtio_blk_outhdr out;
>>      QEMUIOVector qiov;
>> +    size_t in_len;
>>      struct VirtIOBlockReq *next;
>>      struct VirtIOBlockReq *mr_next;
>>      BlockAcctCookie acct;
>> -- 
>> 2.3.4
Michael S. Tsirkin April 2, 2015, 6:46 p.m. UTC | #3
On Thu, Apr 02, 2015 at 07:57:22PM +0200, Paolo Bonzini wrote:
> 
> 
> On 02/04/2015 19:54, Michael S. Tsirkin wrote:
> > On Thu, Apr 02, 2015 at 07:50:44PM +0200, Paolo Bonzini wrote:
> >> After qemu_iovec_destroy, the QEMUIOVector's size is zeroed and
> >> the zero size ultimately is used to compute virtqueue_push's len
> >> argument.  Therefore, reads from virtio-blk devices did not
> >> migrate their results correctly.  (Writes were okay).
> >>
> >> Save the size in virtio_blk_handle_request, and use it when the request
> >> is completed.
> >>
> >> Based on a patch by Wen Congyang.
> >>
> >> Signed-off-by: Wen Congyang <wency@cn.fujitsu.com>
> >> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> > 
> > If you are touching this code anyway, maybe it makes
> > sense to merge Rusty's virtio len patches?
> > I didn't want them in 2.3 since they aren't ciritical,
> > but we are changing these lines anyway, maybe make
> > them correct?
> 
> My patch is a strict superset of Rusty's patch 2/2.  In fact the first
> version was very similar to his, but neither my v1 nor his patch covers
> SCSI or flush or get-serial requests.

Oh, true in fact.  It might be a good idea to add something like this to
the commit log:

	Additionally, virtio spec requires that device writes at least
	len bytes to descriptor - so that driver can rely on
	bytes 0..len-1 being initialized by device. Specifically, it says
	len can be used as an optimization "for drivers using untrusted
	buffers: if you do not know exactly how much has been written by the
	device, you usually have to zero the buffer to ensure no data leakage
	occurs".

	We violated this rule in two cases: on write - len should be 0,
	request size was mistakenly used - and on read error - we don't
	know whether the whole request size was written, so again len
	should be set to 0.

> 
> Rusty's patch 1/2 only adds an assertion, I think.  It's not critical
> for 2.3.
> 
> Paolo

I agree.

> >> ---
> >>  hw/block/dataplane/virtio-blk.c |  3 +--
> >>  hw/block/virtio-blk.c           | 13 ++++++++++++-
> >>  include/hw/virtio/virtio-blk.h  |  1 +
> >>  3 files changed, 14 insertions(+), 3 deletions(-)
> >>
> >> diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c
> >> index cd41478..3db139b 100644
> >> --- a/hw/block/dataplane/virtio-blk.c
> >> +++ b/hw/block/dataplane/virtio-blk.c
> >> @@ -77,8 +77,7 @@ static void complete_request_vring(VirtIOBlockReq *req, unsigned char status)
> >>      VirtIOBlockDataPlane *s = req->dev->dataplane;
> >>      stb_p(&req->in->status, status);
> >>  
> >> -    vring_push(s->vdev, &req->dev->dataplane->vring, &req->elem,
> >> -               req->qiov.size + sizeof(*req->in));
> >> +    vring_push(s->vdev, &req->dev->dataplane->vring, &req->elem, req->in_len);
> >>  
> >>      /* Suppress notification to guest by BH and its scheduled
> >>       * flag because requests are completed as a batch after io
> >> diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c
> >> index 000c38d..9546fd2 100644
> >> --- a/hw/block/virtio-blk.c
> >> +++ b/hw/block/virtio-blk.c
> >> @@ -33,6 +33,7 @@ VirtIOBlockReq *virtio_blk_alloc_request(VirtIOBlock *s)
> >>      VirtIOBlockReq *req = g_slice_new(VirtIOBlockReq);
> >>      req->dev = s;
> >>      req->qiov.size = 0;
> >> +    req->in_len = 0;
> >>      req->next = NULL;
> >>      req->mr_next = NULL;
> >>      return req;
> >> @@ -54,7 +55,7 @@ static void virtio_blk_complete_request(VirtIOBlockReq *req,
> >>      trace_virtio_blk_req_complete(req, status);
> >>  
> >>      stb_p(&req->in->status, status);
> >> -    virtqueue_push(s->vq, &req->elem, req->qiov.size + sizeof(*req->in));
> >> +    virtqueue_push(s->vq, &req->elem, req->in_len);
> >>      virtio_notify(vdev, s->vq);
> >>  }
> >>  
> >> @@ -102,6 +103,14 @@ static void virtio_blk_rw_complete(void *opaque, int ret)
> >>          if (ret) {
> >>              int p = virtio_ldl_p(VIRTIO_DEVICE(req->dev), &req->out.type);
> >>              bool is_read = !(p & VIRTIO_BLK_T_OUT);
> >> +            /* Note that memory may be dirtied on read failure.  If the
> >> +             * virtio request is not completed here, as is the case for
> >> +             * BLOCK_ERROR_ACTION_STOP, the memory may not be copied
> >> +             * correctly during live migration.  While this is ugly,
> >> +             * it is acceptable because the device is free to write to
> >> +             * the memory until the request is completed (which will
> >> +             * happen on the other side of the migration).
> >> +             */
> >>              if (virtio_blk_handle_rw_error(req, -ret, is_read)) {
> >>                  continue;
> >>              }
> >> @@ -496,6 +505,8 @@ void virtio_blk_handle_request(VirtIOBlockReq *req, MultiReqBuffer *mrb)
> >>          exit(1);
> >>      }
> >>  
> >> +    /* We always touch the last byte, so just see how big in_iov is.  */
> >> +    req->in_len = iov_size(in_iov, in_num);
> >>      req->in = (void *)in_iov[in_num - 1].iov_base
> >>                + in_iov[in_num - 1].iov_len
> >>                - sizeof(struct virtio_blk_inhdr);
> >> diff --git a/include/hw/virtio/virtio-blk.h b/include/hw/virtio/virtio-blk.h
> >> index b3ffcd9..6bf5905 100644
> >> --- a/include/hw/virtio/virtio-blk.h
> >> +++ b/include/hw/virtio/virtio-blk.h
> >> @@ -67,6 +67,7 @@ typedef struct VirtIOBlockReq {
> >>      struct virtio_blk_inhdr *in;
> >>      struct virtio_blk_outhdr out;
> >>      QEMUIOVector qiov;
> >> +    size_t in_len;
> >>      struct VirtIOBlockReq *next;
> >>      struct VirtIOBlockReq *mr_next;
> >>      BlockAcctCookie acct;
> >> -- 
> >> 2.3.4
Paolo Bonzini April 2, 2015, 6:53 p.m. UTC | #4
On 02/04/2015 20:46, Michael S. Tsirkin wrote:
> Oh, true in fact.  It might be a good idea to add something like this to
> the commit log:
> 
> 	Additionally, virtio spec requires that device writes at least
> 	len bytes to descriptor - so that driver can rely on
> 	bytes 0..len-1 being initialized by device. Specifically, it says
> 	len can be used as an optimization "for drivers using untrusted
> 	buffers: if you do not know exactly how much has been written by the
> 	device, you usually have to zero the buffer to ensure no data leakage
> 	occurs".
> 
> 	We violated this rule in two cases: on write - len should be 0,
> 	request size was mistakenly used

Should be 1 due to the status byte.

> - and on read error - we don't
> 	know whether the whole request size was written, so again len
> 	should be set to 0.

Oh no wait... my patch does not handle the read error case.

The len argument to virtqueue_push is being overloaded with two meanings:

1) a value that is >= the actual count, used to set the dirty bitmap

2) a value that should be <= the actual count, used as mentioned in your
English text above.

This is a problem for read errors, because the status byte is at the end
of the input buffers.  So (1) requires that you set len = size+1, while
(2) requires that you set len = 0.

My patch only deals with (1), which is a correctness problem for
migration, as Wen debugged.  It is a 2.3 regression.

I don't think (2) is fixable without changing the virtqueue API, and it
is not a regression.

Paolo
Michael S. Tsirkin April 2, 2015, 7:17 p.m. UTC | #5
On Thu, Apr 02, 2015 at 08:53:04PM +0200, Paolo Bonzini wrote:
> 
> 
> On 02/04/2015 20:46, Michael S. Tsirkin wrote:
> > Oh, true in fact.  It might be a good idea to add something like this to
> > the commit log:
> > 
> > 	Additionally, virtio spec requires that device writes at least
> > 	len bytes to descriptor - so that driver can rely on
> > 	bytes 0..len-1 being initialized by device. Specifically, it says
> > 	len can be used as an optimization "for drivers using untrusted
> > 	buffers: if you do not know exactly how much has been written by the
> > 	device, you usually have to zero the buffer to ensure no data leakage
> > 	occurs".
> > 
> > 	We violated this rule in two cases: on write - len should be 0,
> > 	request size was mistakenly used
> 
> Should be 1 due to the status byte.
> 
> > - and on read error - we don't
> > 	know whether the whole request size was written, so again len
> > 	should be set to 0.
> 
> Oh no wait... my patch does not handle the read error case.
> 
> The len argument to virtqueue_push is being overloaded with two meanings:
> 
> 1) a value that is >= the actual count, used to set the dirty bitmap
> 
> 2) a value that should be <= the actual count, used as mentioned in your
> English text above.
> 
> This is a problem for read errors, because the status byte is at the end
> of the input buffers.  So (1) requires that you set len = size+1, while
> (2) requires that you set len = 0.
> 
> My patch only deals with (1), which is a correctness problem for
> migration, as Wen debugged.  It is a 2.3 regression.
> 
> I don't think (2) is fixable without changing the virtqueue API, and it
> is not a regression.
> 
> Paolo

I agree here. If you respin for any reason, documenting that you
fixed (1) might be a good idea.
Li Zhijian April 3, 2015, 4:13 a.m. UTC | #6
On 04/03/2015 01:50 AM, Paolo Bonzini wrote:
> After qemu_iovec_destroy, the QEMUIOVector's size is zeroed and
> the zero size ultimately is used to compute virtqueue_push's len
> argument.  Therefore, reads from virtio-blk devices did not
> migrate their results correctly.  (Writes were okay).
>
> Save the size in virtio_blk_handle_request, and use it when the request
> is completed.
>
> Based on a patch by Wen Congyang.
>
> Signed-off-by: Wen Congyang <wency@cn.fujitsu.com>
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Tested-by: Li Zhijian <lizhijian@cn.fujitsu.com>

Thanks
Li Zhijian


> ---
>   hw/block/dataplane/virtio-blk.c |  3 +--
>   hw/block/virtio-blk.c           | 13 ++++++++++++-
>   include/hw/virtio/virtio-blk.h  |  1 +
>   3 files changed, 14 insertions(+), 3 deletions(-)
>
> diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c
> index cd41478..3db139b 100644
> --- a/hw/block/dataplane/virtio-blk.c
> +++ b/hw/block/dataplane/virtio-blk.c
> @@ -77,8 +77,7 @@ static void complete_request_vring(VirtIOBlockReq *req, unsigned char status)
>       VirtIOBlockDataPlane *s = req->dev->dataplane;
>       stb_p(&req->in->status, status);
>   
> -    vring_push(s->vdev, &req->dev->dataplane->vring, &req->elem,
> -               req->qiov.size + sizeof(*req->in));
> +    vring_push(s->vdev, &req->dev->dataplane->vring, &req->elem, req->in_len);
>   
>       /* Suppress notification to guest by BH and its scheduled
>        * flag because requests are completed as a batch after io
> diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c
> index 000c38d..9546fd2 100644
> --- a/hw/block/virtio-blk.c
> +++ b/hw/block/virtio-blk.c
> @@ -33,6 +33,7 @@ VirtIOBlockReq *virtio_blk_alloc_request(VirtIOBlock *s)
>       VirtIOBlockReq *req = g_slice_new(VirtIOBlockReq);
>       req->dev = s;
>       req->qiov.size = 0;
> +    req->in_len = 0;
>       req->next = NULL;
>       req->mr_next = NULL;
>       return req;
> @@ -54,7 +55,7 @@ static void virtio_blk_complete_request(VirtIOBlockReq *req,
>       trace_virtio_blk_req_complete(req, status);
>   
>       stb_p(&req->in->status, status);
> -    virtqueue_push(s->vq, &req->elem, req->qiov.size + sizeof(*req->in));
> +    virtqueue_push(s->vq, &req->elem, req->in_len);
>       virtio_notify(vdev, s->vq);
>   }
>   
> @@ -102,6 +103,14 @@ static void virtio_blk_rw_complete(void *opaque, int ret)
>           if (ret) {
>               int p = virtio_ldl_p(VIRTIO_DEVICE(req->dev), &req->out.type);
>               bool is_read = !(p & VIRTIO_BLK_T_OUT);
> +            /* Note that memory may be dirtied on read failure.  If the
> +             * virtio request is not completed here, as is the case for
> +             * BLOCK_ERROR_ACTION_STOP, the memory may not be copied
> +             * correctly during live migration.  While this is ugly,
> +             * it is acceptable because the device is free to write to
> +             * the memory until the request is completed (which will
> +             * happen on the other side of the migration).
> +             */
>               if (virtio_blk_handle_rw_error(req, -ret, is_read)) {
>                   continue;
>               }
> @@ -496,6 +505,8 @@ void virtio_blk_handle_request(VirtIOBlockReq *req, MultiReqBuffer *mrb)
>           exit(1);
>       }
>   
> +    /* We always touch the last byte, so just see how big in_iov is.  */
> +    req->in_len = iov_size(in_iov, in_num);
>       req->in = (void *)in_iov[in_num - 1].iov_base
>                 + in_iov[in_num - 1].iov_len
>                 - sizeof(struct virtio_blk_inhdr);
> diff --git a/include/hw/virtio/virtio-blk.h b/include/hw/virtio/virtio-blk.h
> index b3ffcd9..6bf5905 100644
> --- a/include/hw/virtio/virtio-blk.h
> +++ b/include/hw/virtio/virtio-blk.h
> @@ -67,6 +67,7 @@ typedef struct VirtIOBlockReq {
>       struct virtio_blk_inhdr *in;
>       struct virtio_blk_outhdr out;
>       QEMUIOVector qiov;
> +    size_t in_len;
>       struct VirtIOBlockReq *next;
>       struct VirtIOBlockReq *mr_next;
>       BlockAcctCookie acct;
Stefan Hajnoczi April 7, 2015, 2:11 p.m. UTC | #7
On Thu, Apr 02, 2015 at 07:50:44PM +0200, Paolo Bonzini wrote:
> After qemu_iovec_destroy, the QEMUIOVector's size is zeroed and
> the zero size ultimately is used to compute virtqueue_push's len
> argument.  Therefore, reads from virtio-blk devices did not
> migrate their results correctly.  (Writes were okay).
> 
> Save the size in virtio_blk_handle_request, and use it when the request
> is completed.
> 
> Based on a patch by Wen Congyang.
> 
> Signed-off-by: Wen Congyang <wency@cn.fujitsu.com>
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> ---
>  hw/block/dataplane/virtio-blk.c |  3 +--
>  hw/block/virtio-blk.c           | 13 ++++++++++++-
>  include/hw/virtio/virtio-blk.h  |  1 +
>  3 files changed, 14 insertions(+), 3 deletions(-)

This also changes len for VIRTIO_BLK_T_SCSI_CMD and VIRTIO_BLK_T_GET_ID.
The Linux virtio_blk.ko and Windows viostor drivers ignore the value
anyway so what's the worst thing that could happen? :)

Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Stefan Hajnoczi April 8, 2015, 9:38 a.m. UTC | #8
On Tue, Apr 7, 2015 at 3:11 PM, Stefan Hajnoczi <stefanha@redhat.com> wrote:
> On Thu, Apr 02, 2015 at 07:50:44PM +0200, Paolo Bonzini wrote:
>> After qemu_iovec_destroy, the QEMUIOVector's size is zeroed and
>> the zero size ultimately is used to compute virtqueue_push's len
>> argument.  Therefore, reads from virtio-blk devices did not
>> migrate their results correctly.  (Writes were okay).
>>
>> Save the size in virtio_blk_handle_request, and use it when the request
>> is completed.
>>
>> Based on a patch by Wen Congyang.
>>
>> Signed-off-by: Wen Congyang <wency@cn.fujitsu.com>
>> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
>> ---
>>  hw/block/dataplane/virtio-blk.c |  3 +--
>>  hw/block/virtio-blk.c           | 13 ++++++++++++-
>>  include/hw/virtio/virtio-blk.h  |  1 +
>>  3 files changed, 14 insertions(+), 3 deletions(-)
>
> This also changes len for VIRTIO_BLK_T_SCSI_CMD and VIRTIO_BLK_T_GET_ID.
> The Linux virtio_blk.ko and Windows viostor drivers ignore the value
> anyway so what's the worst thing that could happen? :)
>
> Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>

I'd like to merge Paolo's patch for QEMU 2.3-rc3.  It ensures that
virtio-blk unmaps and dirties "in" buffers correctly.

The Message-Id for the patch I'm referring to is
<1427997044-392-1-git-send-email-pbonzini@redhat.com>.

Any objections?  Speak now if it's critical, otherwise please send
follow-up patches for QEMU 2.4.

Stefan
Stefan Hajnoczi April 8, 2015, 9:42 a.m. UTC | #9
On Thu, Apr 02, 2015 at 07:50:44PM +0200, Paolo Bonzini wrote:
> After qemu_iovec_destroy, the QEMUIOVector's size is zeroed and
> the zero size ultimately is used to compute virtqueue_push's len
> argument.  Therefore, reads from virtio-blk devices did not
> migrate their results correctly.  (Writes were okay).
> 
> Save the size in virtio_blk_handle_request, and use it when the request
> is completed.
> 
> Based on a patch by Wen Congyang.
> 
> Signed-off-by: Wen Congyang <wency@cn.fujitsu.com>
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> ---
>  hw/block/dataplane/virtio-blk.c |  3 +--
>  hw/block/virtio-blk.c           | 13 ++++++++++++-

Applied to my block tree for QEMU 2.3-rc3:
https://github.com/stefanha/qemu/commits/block

I can still drop it if there is an objection, so just let me know.
Several related points have been mentioned but they are orthogonal and
less urgent.  Please address them in separate QEMU 2.4 patches.

Stefan
diff mbox

Patch

diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c
index cd41478..3db139b 100644
--- a/hw/block/dataplane/virtio-blk.c
+++ b/hw/block/dataplane/virtio-blk.c
@@ -77,8 +77,7 @@  static void complete_request_vring(VirtIOBlockReq *req, unsigned char status)
     VirtIOBlockDataPlane *s = req->dev->dataplane;
     stb_p(&req->in->status, status);
 
-    vring_push(s->vdev, &req->dev->dataplane->vring, &req->elem,
-               req->qiov.size + sizeof(*req->in));
+    vring_push(s->vdev, &req->dev->dataplane->vring, &req->elem, req->in_len);
 
     /* Suppress notification to guest by BH and its scheduled
      * flag because requests are completed as a batch after io
diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c
index 000c38d..9546fd2 100644
--- a/hw/block/virtio-blk.c
+++ b/hw/block/virtio-blk.c
@@ -33,6 +33,7 @@  VirtIOBlockReq *virtio_blk_alloc_request(VirtIOBlock *s)
     VirtIOBlockReq *req = g_slice_new(VirtIOBlockReq);
     req->dev = s;
     req->qiov.size = 0;
+    req->in_len = 0;
     req->next = NULL;
     req->mr_next = NULL;
     return req;
@@ -54,7 +55,7 @@  static void virtio_blk_complete_request(VirtIOBlockReq *req,
     trace_virtio_blk_req_complete(req, status);
 
     stb_p(&req->in->status, status);
-    virtqueue_push(s->vq, &req->elem, req->qiov.size + sizeof(*req->in));
+    virtqueue_push(s->vq, &req->elem, req->in_len);
     virtio_notify(vdev, s->vq);
 }
 
@@ -102,6 +103,14 @@  static void virtio_blk_rw_complete(void *opaque, int ret)
         if (ret) {
             int p = virtio_ldl_p(VIRTIO_DEVICE(req->dev), &req->out.type);
             bool is_read = !(p & VIRTIO_BLK_T_OUT);
+            /* Note that memory may be dirtied on read failure.  If the
+             * virtio request is not completed here, as is the case for
+             * BLOCK_ERROR_ACTION_STOP, the memory may not be copied
+             * correctly during live migration.  While this is ugly,
+             * it is acceptable because the device is free to write to
+             * the memory until the request is completed (which will
+             * happen on the other side of the migration).
+             */
             if (virtio_blk_handle_rw_error(req, -ret, is_read)) {
                 continue;
             }
@@ -496,6 +505,8 @@  void virtio_blk_handle_request(VirtIOBlockReq *req, MultiReqBuffer *mrb)
         exit(1);
     }
 
+    /* We always touch the last byte, so just see how big in_iov is.  */
+    req->in_len = iov_size(in_iov, in_num);
     req->in = (void *)in_iov[in_num - 1].iov_base
               + in_iov[in_num - 1].iov_len
               - sizeof(struct virtio_blk_inhdr);
diff --git a/include/hw/virtio/virtio-blk.h b/include/hw/virtio/virtio-blk.h
index b3ffcd9..6bf5905 100644
--- a/include/hw/virtio/virtio-blk.h
+++ b/include/hw/virtio/virtio-blk.h
@@ -67,6 +67,7 @@  typedef struct VirtIOBlockReq {
     struct virtio_blk_inhdr *in;
     struct virtio_blk_outhdr out;
     QEMUIOVector qiov;
+    size_t in_len;
     struct VirtIOBlockReq *next;
     struct VirtIOBlockReq *mr_next;
     BlockAcctCookie acct;