diff mbox

virtio-blk: correctly dirty guest memory

Message ID 1427981862-19783-1-git-send-email-pbonzini@redhat.com
State New
Headers show

Commit Message

Paolo Bonzini April 2, 2015, 1:37 p.m. UTC
After qemu_iovec_destroy, the QEMUIOVector's size is zeroed and
the zero size ultimately is used to compute virtqueue_push's len
argument.  Therefore, reads from virtio-blk devices did not
migrate their results correctly.  (Writes were okay).

Save the size in submit_requests, and use it when the request is
completed.

Based on a patch by Wen Congyang.

Signed-off-by: Wen Congyang <wency@cn.fujitsu.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 hw/block/dataplane/virtio-blk.c |  2 +-
 hw/block/virtio-blk.c           | 16 +++++++++++++++-
 include/hw/virtio/virtio-blk.h  |  1 +
 3 files changed, 17 insertions(+), 2 deletions(-)

Comments

Fam Zheng April 2, 2015, 2:39 p.m. UTC | #1
On Thu, 04/02 15:37, Paolo Bonzini wrote:
> After qemu_iovec_destroy, the QEMUIOVector's size is zeroed and
> the zero size ultimately is used to compute virtqueue_push's len
> argument.  Therefore, reads from virtio-blk devices did not
> migrate their results correctly.  (Writes were okay).

Can't we move qemu_iovec_destroy to virtio_blk_free_request?

Fam

> 
> Save the size in submit_requests, and use it when the request is
> completed.
> 
> Based on a patch by Wen Congyang.
> 
> Signed-off-by: Wen Congyang <wency@cn.fujitsu.com>
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> ---
>  hw/block/dataplane/virtio-blk.c |  2 +-
>  hw/block/virtio-blk.c           | 16 +++++++++++++++-
>  include/hw/virtio/virtio-blk.h  |  1 +
>  3 files changed, 17 insertions(+), 2 deletions(-)
> 
> diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c
> index cd41478..b37ede3 100644
> --- a/hw/block/dataplane/virtio-blk.c
> +++ b/hw/block/dataplane/virtio-blk.c
> @@ -78,7 +78,7 @@ static void complete_request_vring(VirtIOBlockReq *req, unsigned char status)
>      stb_p(&req->in->status, status);
>  
>      vring_push(s->vdev, &req->dev->dataplane->vring, &req->elem,
> -               req->qiov.size + sizeof(*req->in));
> +               req->read_size + sizeof(*req->in));
>  
>      /* Suppress notification to guest by BH and its scheduled
>       * flag because requests are completed as a batch after io
> diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c
> index 000c38d..2f00dc4 100644
> --- a/hw/block/virtio-blk.c
> +++ b/hw/block/virtio-blk.c
> @@ -33,6 +33,7 @@ VirtIOBlockReq *virtio_blk_alloc_request(VirtIOBlock *s)
>      VirtIOBlockReq *req = g_slice_new(VirtIOBlockReq);
>      req->dev = s;
>      req->qiov.size = 0;
> +    req->read_size = 0;
>      req->next = NULL;
>      req->mr_next = NULL;
>      return req;
> @@ -54,7 +55,7 @@ static void virtio_blk_complete_request(VirtIOBlockReq *req,
>      trace_virtio_blk_req_complete(req, status);
>  
>      stb_p(&req->in->status, status);
> -    virtqueue_push(s->vq, &req->elem, req->qiov.size + sizeof(*req->in));
> +    virtqueue_push(s->vq, &req->elem, req->read_size + sizeof(*req->in));
>      virtio_notify(vdev, s->vq);
>  }
>  
> @@ -102,6 +103,14 @@ static void virtio_blk_rw_complete(void *opaque, int ret)
>          if (ret) {
>              int p = virtio_ldl_p(VIRTIO_DEVICE(req->dev), &req->out.type);
>              bool is_read = !(p & VIRTIO_BLK_T_OUT);
> +            /* Note that memory may be dirtied on read failure.  If the
> +             * virtio request is not completed here, as is the case for
> +             * BLOCK_ERROR_ACTION_STOP, the memory may not be copied
> +             * correctly during live migration.  While this is ugly,
> +             * it is acceptable because the device is free to write to
> +             * the memory until the request is completed (which will
> +             * happen on the other side of the migration).
> +             */
>              if (virtio_blk_handle_rw_error(req, -ret, is_read)) {
>                  continue;
>              }
> @@ -348,9 +357,14 @@ static inline void submit_requests(BlockBackend *blk, MultiReqBuffer *mrb,
>      }
>  
>      if (is_write) {
> +        mrb->reqs[start]->read_size = 0;
>          blk_aio_writev(blk, sector_num, qiov, nb_sectors,
>                         virtio_blk_rw_complete, mrb->reqs[start]);
>      } else {
> +        /* Save old qiov->size, which will be used in
> +         * virtio_blk_complete_request()
> +         */
> +        mrb->reqs[start]->read_size = qiov->size;
>          blk_aio_readv(blk, sector_num, qiov, nb_sectors,
>                        virtio_blk_rw_complete, mrb->reqs[start]);
>      }
> diff --git a/include/hw/virtio/virtio-blk.h b/include/hw/virtio/virtio-blk.h
> index b3ffcd9..d73ec06 100644
> --- a/include/hw/virtio/virtio-blk.h
> +++ b/include/hw/virtio/virtio-blk.h
> @@ -67,6 +67,7 @@ typedef struct VirtIOBlockReq {
>      struct virtio_blk_inhdr *in;
>      struct virtio_blk_outhdr out;
>      QEMUIOVector qiov;
> +    size_t read_size;
>      struct VirtIOBlockReq *next;
>      struct VirtIOBlockReq *mr_next;
>      BlockAcctCookie acct;
> -- 
> 2.3.4
> 
>
Paolo Bonzini April 2, 2015, 2:51 p.m. UTC | #2
On 02/04/2015 16:39, Fam Zheng wrote:
> On Thu, 04/02 15:37, Paolo Bonzini wrote:
>> After qemu_iovec_destroy, the QEMUIOVector's size is zeroed and
>> the zero size ultimately is used to compute virtqueue_push's len
>> argument.  Therefore, reads from virtio-blk devices did not
>> migrate their results correctly.  (Writes were okay).
> 
> Can't we move qemu_iovec_destroy to virtio_blk_free_request?

You would still have to add more code to differentiate reads and
writes---I think.

Paolo

> Fam
> 
>>
>> Save the size in submit_requests, and use it when the request is
>> completed.
>>
>> Based on a patch by Wen Congyang.
>>
>> Signed-off-by: Wen Congyang <wency@cn.fujitsu.com>
>> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
>> ---
>>  hw/block/dataplane/virtio-blk.c |  2 +-
>>  hw/block/virtio-blk.c           | 16 +++++++++++++++-
>>  include/hw/virtio/virtio-blk.h  |  1 +
>>  3 files changed, 17 insertions(+), 2 deletions(-)
>>
>> diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c
>> index cd41478..b37ede3 100644
>> --- a/hw/block/dataplane/virtio-blk.c
>> +++ b/hw/block/dataplane/virtio-blk.c
>> @@ -78,7 +78,7 @@ static void complete_request_vring(VirtIOBlockReq *req, unsigned char status)
>>      stb_p(&req->in->status, status);
>>  
>>      vring_push(s->vdev, &req->dev->dataplane->vring, &req->elem,
>> -               req->qiov.size + sizeof(*req->in));
>> +               req->read_size + sizeof(*req->in));
>>  
>>      /* Suppress notification to guest by BH and its scheduled
>>       * flag because requests are completed as a batch after io
>> diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c
>> index 000c38d..2f00dc4 100644
>> --- a/hw/block/virtio-blk.c
>> +++ b/hw/block/virtio-blk.c
>> @@ -33,6 +33,7 @@ VirtIOBlockReq *virtio_blk_alloc_request(VirtIOBlock *s)
>>      VirtIOBlockReq *req = g_slice_new(VirtIOBlockReq);
>>      req->dev = s;
>>      req->qiov.size = 0;
>> +    req->read_size = 0;
>>      req->next = NULL;
>>      req->mr_next = NULL;
>>      return req;
>> @@ -54,7 +55,7 @@ static void virtio_blk_complete_request(VirtIOBlockReq *req,
>>      trace_virtio_blk_req_complete(req, status);
>>  
>>      stb_p(&req->in->status, status);
>> -    virtqueue_push(s->vq, &req->elem, req->qiov.size + sizeof(*req->in));
>> +    virtqueue_push(s->vq, &req->elem, req->read_size + sizeof(*req->in));
>>      virtio_notify(vdev, s->vq);
>>  }
>>  
>> @@ -102,6 +103,14 @@ static void virtio_blk_rw_complete(void *opaque, int ret)
>>          if (ret) {
>>              int p = virtio_ldl_p(VIRTIO_DEVICE(req->dev), &req->out.type);
>>              bool is_read = !(p & VIRTIO_BLK_T_OUT);
>> +            /* Note that memory may be dirtied on read failure.  If the
>> +             * virtio request is not completed here, as is the case for
>> +             * BLOCK_ERROR_ACTION_STOP, the memory may not be copied
>> +             * correctly during live migration.  While this is ugly,
>> +             * it is acceptable because the device is free to write to
>> +             * the memory until the request is completed (which will
>> +             * happen on the other side of the migration).
>> +             */
>>              if (virtio_blk_handle_rw_error(req, -ret, is_read)) {
>>                  continue;
>>              }
>> @@ -348,9 +357,14 @@ static inline void submit_requests(BlockBackend *blk, MultiReqBuffer *mrb,
>>      }
>>  
>>      if (is_write) {
>> +        mrb->reqs[start]->read_size = 0;
>>          blk_aio_writev(blk, sector_num, qiov, nb_sectors,
>>                         virtio_blk_rw_complete, mrb->reqs[start]);
>>      } else {
>> +        /* Save old qiov->size, which will be used in
>> +         * virtio_blk_complete_request()
>> +         */
>> +        mrb->reqs[start]->read_size = qiov->size;
>>          blk_aio_readv(blk, sector_num, qiov, nb_sectors,
>>                        virtio_blk_rw_complete, mrb->reqs[start]);
>>      }
>> diff --git a/include/hw/virtio/virtio-blk.h b/include/hw/virtio/virtio-blk.h
>> index b3ffcd9..d73ec06 100644
>> --- a/include/hw/virtio/virtio-blk.h
>> +++ b/include/hw/virtio/virtio-blk.h
>> @@ -67,6 +67,7 @@ typedef struct VirtIOBlockReq {
>>      struct virtio_blk_inhdr *in;
>>      struct virtio_blk_outhdr out;
>>      QEMUIOVector qiov;
>> +    size_t read_size;
>>      struct VirtIOBlockReq *next;
>>      struct VirtIOBlockReq *mr_next;
>>      BlockAcctCookie acct;
>> -- 
>> 2.3.4
>>
>>
Fam Zheng April 2, 2015, 3:16 p.m. UTC | #3
On Thu, 04/02 16:51, Paolo Bonzini wrote:
> 
> 
> On 02/04/2015 16:39, Fam Zheng wrote:
> > On Thu, 04/02 15:37, Paolo Bonzini wrote:
> >> After qemu_iovec_destroy, the QEMUIOVector's size is zeroed and
> >> the zero size ultimately is used to compute virtqueue_push's len
> >> argument.  Therefore, reads from virtio-blk devices did not
> >> migrate their results correctly.  (Writes were okay).
> > 
> > Can't we move qemu_iovec_destroy to virtio_blk_free_request?
> 
> You would still have to add more code to differentiate reads and
> writes---I think.

Yeah, but the extra field will not be needed.

Fam

> 
> Paolo
> 
> > Fam
> > 
> >>
> >> Save the size in submit_requests, and use it when the request is
> >> completed.
> >>
> >> Based on a patch by Wen Congyang.
> >>
> >> Signed-off-by: Wen Congyang <wency@cn.fujitsu.com>
> >> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> >> ---
> >>  hw/block/dataplane/virtio-blk.c |  2 +-
> >>  hw/block/virtio-blk.c           | 16 +++++++++++++++-
> >>  include/hw/virtio/virtio-blk.h  |  1 +
> >>  3 files changed, 17 insertions(+), 2 deletions(-)
> >>
> >> diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c
> >> index cd41478..b37ede3 100644
> >> --- a/hw/block/dataplane/virtio-blk.c
> >> +++ b/hw/block/dataplane/virtio-blk.c
> >> @@ -78,7 +78,7 @@ static void complete_request_vring(VirtIOBlockReq *req, unsigned char status)
> >>      stb_p(&req->in->status, status);
> >>  
> >>      vring_push(s->vdev, &req->dev->dataplane->vring, &req->elem,
> >> -               req->qiov.size + sizeof(*req->in));
> >> +               req->read_size + sizeof(*req->in));
> >>  
> >>      /* Suppress notification to guest by BH and its scheduled
> >>       * flag because requests are completed as a batch after io
> >> diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c
> >> index 000c38d..2f00dc4 100644
> >> --- a/hw/block/virtio-blk.c
> >> +++ b/hw/block/virtio-blk.c
> >> @@ -33,6 +33,7 @@ VirtIOBlockReq *virtio_blk_alloc_request(VirtIOBlock *s)
> >>      VirtIOBlockReq *req = g_slice_new(VirtIOBlockReq);
> >>      req->dev = s;
> >>      req->qiov.size = 0;
> >> +    req->read_size = 0;
> >>      req->next = NULL;
> >>      req->mr_next = NULL;
> >>      return req;
> >> @@ -54,7 +55,7 @@ static void virtio_blk_complete_request(VirtIOBlockReq *req,
> >>      trace_virtio_blk_req_complete(req, status);
> >>  
> >>      stb_p(&req->in->status, status);
> >> -    virtqueue_push(s->vq, &req->elem, req->qiov.size + sizeof(*req->in));
> >> +    virtqueue_push(s->vq, &req->elem, req->read_size + sizeof(*req->in));
> >>      virtio_notify(vdev, s->vq);
> >>  }
> >>  
> >> @@ -102,6 +103,14 @@ static void virtio_blk_rw_complete(void *opaque, int ret)
> >>          if (ret) {
> >>              int p = virtio_ldl_p(VIRTIO_DEVICE(req->dev), &req->out.type);
> >>              bool is_read = !(p & VIRTIO_BLK_T_OUT);
> >> +            /* Note that memory may be dirtied on read failure.  If the
> >> +             * virtio request is not completed here, as is the case for
> >> +             * BLOCK_ERROR_ACTION_STOP, the memory may not be copied
> >> +             * correctly during live migration.  While this is ugly,
> >> +             * it is acceptable because the device is free to write to
> >> +             * the memory until the request is completed (which will
> >> +             * happen on the other side of the migration).
> >> +             */
> >>              if (virtio_blk_handle_rw_error(req, -ret, is_read)) {
> >>                  continue;
> >>              }
> >> @@ -348,9 +357,14 @@ static inline void submit_requests(BlockBackend *blk, MultiReqBuffer *mrb,
> >>      }
> >>  
> >>      if (is_write) {
> >> +        mrb->reqs[start]->read_size = 0;
> >>          blk_aio_writev(blk, sector_num, qiov, nb_sectors,
> >>                         virtio_blk_rw_complete, mrb->reqs[start]);
> >>      } else {
> >> +        /* Save old qiov->size, which will be used in
> >> +         * virtio_blk_complete_request()
> >> +         */
> >> +        mrb->reqs[start]->read_size = qiov->size;
> >>          blk_aio_readv(blk, sector_num, qiov, nb_sectors,
> >>                        virtio_blk_rw_complete, mrb->reqs[start]);
> >>      }
> >> diff --git a/include/hw/virtio/virtio-blk.h b/include/hw/virtio/virtio-blk.h
> >> index b3ffcd9..d73ec06 100644
> >> --- a/include/hw/virtio/virtio-blk.h
> >> +++ b/include/hw/virtio/virtio-blk.h
> >> @@ -67,6 +67,7 @@ typedef struct VirtIOBlockReq {
> >>      struct virtio_blk_inhdr *in;
> >>      struct virtio_blk_outhdr out;
> >>      QEMUIOVector qiov;
> >> +    size_t read_size;
> >>      struct VirtIOBlockReq *next;
> >>      struct VirtIOBlockReq *mr_next;
> >>      BlockAcctCookie acct;
> >> -- 
> >> 2.3.4
> >>
> >>
Paolo Bonzini April 2, 2015, 3:21 p.m. UTC | #4
On 02/04/2015 17:16, Fam Zheng wrote:
>>>> > >> After qemu_iovec_destroy, the QEMUIOVector's size is zeroed and
>>>> > >> the zero size ultimately is used to compute virtqueue_push's len
>>>> > >> argument.  Therefore, reads from virtio-blk devices did not
>>>> > >> migrate their results correctly.  (Writes were okay).
>>> > > 
>>> > > Can't we move qemu_iovec_destroy to virtio_blk_free_request?
>> > 
>> > You would still have to add more code to differentiate reads and
>> > writes---I think.
> Yeah, but the extra field will not be needed.

Can you post an alternative patch?  One small complication is that
is_write is in mrb but not in mrb->reqs[x].  virtio_blk_rw_complete is
already doing

    int p = virtio_ldl_p(VIRTIO_DEVICE(req->dev), &req->out.type);
    bool is_read = !(p & VIRTIO_BLK_T_OUT);

but only in a slow path.

Paolo
diff mbox

Patch

diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c
index cd41478..b37ede3 100644
--- a/hw/block/dataplane/virtio-blk.c
+++ b/hw/block/dataplane/virtio-blk.c
@@ -78,7 +78,7 @@  static void complete_request_vring(VirtIOBlockReq *req, unsigned char status)
     stb_p(&req->in->status, status);
 
     vring_push(s->vdev, &req->dev->dataplane->vring, &req->elem,
-               req->qiov.size + sizeof(*req->in));
+               req->read_size + sizeof(*req->in));
 
     /* Suppress notification to guest by BH and its scheduled
      * flag because requests are completed as a batch after io
diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c
index 000c38d..2f00dc4 100644
--- a/hw/block/virtio-blk.c
+++ b/hw/block/virtio-blk.c
@@ -33,6 +33,7 @@  VirtIOBlockReq *virtio_blk_alloc_request(VirtIOBlock *s)
     VirtIOBlockReq *req = g_slice_new(VirtIOBlockReq);
     req->dev = s;
     req->qiov.size = 0;
+    req->read_size = 0;
     req->next = NULL;
     req->mr_next = NULL;
     return req;
@@ -54,7 +55,7 @@  static void virtio_blk_complete_request(VirtIOBlockReq *req,
     trace_virtio_blk_req_complete(req, status);
 
     stb_p(&req->in->status, status);
-    virtqueue_push(s->vq, &req->elem, req->qiov.size + sizeof(*req->in));
+    virtqueue_push(s->vq, &req->elem, req->read_size + sizeof(*req->in));
     virtio_notify(vdev, s->vq);
 }
 
@@ -102,6 +103,14 @@  static void virtio_blk_rw_complete(void *opaque, int ret)
         if (ret) {
             int p = virtio_ldl_p(VIRTIO_DEVICE(req->dev), &req->out.type);
             bool is_read = !(p & VIRTIO_BLK_T_OUT);
+            /* Note that memory may be dirtied on read failure.  If the
+             * virtio request is not completed here, as is the case for
+             * BLOCK_ERROR_ACTION_STOP, the memory may not be copied
+             * correctly during live migration.  While this is ugly,
+             * it is acceptable because the device is free to write to
+             * the memory until the request is completed (which will
+             * happen on the other side of the migration).
+             */
             if (virtio_blk_handle_rw_error(req, -ret, is_read)) {
                 continue;
             }
@@ -348,9 +357,14 @@  static inline void submit_requests(BlockBackend *blk, MultiReqBuffer *mrb,
     }
 
     if (is_write) {
+        mrb->reqs[start]->read_size = 0;
         blk_aio_writev(blk, sector_num, qiov, nb_sectors,
                        virtio_blk_rw_complete, mrb->reqs[start]);
     } else {
+        /* Save old qiov->size, which will be used in
+         * virtio_blk_complete_request()
+         */
+        mrb->reqs[start]->read_size = qiov->size;
         blk_aio_readv(blk, sector_num, qiov, nb_sectors,
                       virtio_blk_rw_complete, mrb->reqs[start]);
     }
diff --git a/include/hw/virtio/virtio-blk.h b/include/hw/virtio/virtio-blk.h
index b3ffcd9..d73ec06 100644
--- a/include/hw/virtio/virtio-blk.h
+++ b/include/hw/virtio/virtio-blk.h
@@ -67,6 +67,7 @@  typedef struct VirtIOBlockReq {
     struct virtio_blk_inhdr *in;
     struct virtio_blk_outhdr out;
     QEMUIOVector qiov;
+    size_t read_size;
     struct VirtIOBlockReq *next;
     struct VirtIOBlockReq *mr_next;
     BlockAcctCookie acct;