Message ID | 1427981862-19783-1-git-send-email-pbonzini@redhat.com |
---|---|
State | New |
Headers | show |
On Thu, 04/02 15:37, Paolo Bonzini wrote: > After qemu_iovec_destroy, the QEMUIOVector's size is zeroed and > the zero size ultimately is used to compute virtqueue_push's len > argument. Therefore, reads from virtio-blk devices did not > migrate their results correctly. (Writes were okay). Can't we move qemu_iovec_destroy to virtio_blk_free_request? Fam > > Save the size in submit_requests, and use it when the request is > completed. > > Based on a patch by Wen Congyang. > > Signed-off-by: Wen Congyang <wency@cn.fujitsu.com> > Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> > --- > hw/block/dataplane/virtio-blk.c | 2 +- > hw/block/virtio-blk.c | 16 +++++++++++++++- > include/hw/virtio/virtio-blk.h | 1 + > 3 files changed, 17 insertions(+), 2 deletions(-) > > diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c > index cd41478..b37ede3 100644 > --- a/hw/block/dataplane/virtio-blk.c > +++ b/hw/block/dataplane/virtio-blk.c > @@ -78,7 +78,7 @@ static void complete_request_vring(VirtIOBlockReq *req, unsigned char status) > stb_p(&req->in->status, status); > > vring_push(s->vdev, &req->dev->dataplane->vring, &req->elem, > - req->qiov.size + sizeof(*req->in)); > + req->read_size + sizeof(*req->in)); > > /* Suppress notification to guest by BH and its scheduled > * flag because requests are completed as a batch after io > diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c > index 000c38d..2f00dc4 100644 > --- a/hw/block/virtio-blk.c > +++ b/hw/block/virtio-blk.c > @@ -33,6 +33,7 @@ VirtIOBlockReq *virtio_blk_alloc_request(VirtIOBlock *s) > VirtIOBlockReq *req = g_slice_new(VirtIOBlockReq); > req->dev = s; > req->qiov.size = 0; > + req->read_size = 0; > req->next = NULL; > req->mr_next = NULL; > return req; > @@ -54,7 +55,7 @@ static void virtio_blk_complete_request(VirtIOBlockReq *req, > trace_virtio_blk_req_complete(req, status); > > stb_p(&req->in->status, status); > - virtqueue_push(s->vq, &req->elem, req->qiov.size + sizeof(*req->in)); > + virtqueue_push(s->vq, &req->elem, req->read_size + sizeof(*req->in)); > virtio_notify(vdev, s->vq); > } > > @@ -102,6 +103,14 @@ static void virtio_blk_rw_complete(void *opaque, int ret) > if (ret) { > int p = virtio_ldl_p(VIRTIO_DEVICE(req->dev), &req->out.type); > bool is_read = !(p & VIRTIO_BLK_T_OUT); > + /* Note that memory may be dirtied on read failure. If the > + * virtio request is not completed here, as is the case for > + * BLOCK_ERROR_ACTION_STOP, the memory may not be copied > + * correctly during live migration. While this is ugly, > + * it is acceptable because the device is free to write to > + * the memory until the request is completed (which will > + * happen on the other side of the migration). > + */ > if (virtio_blk_handle_rw_error(req, -ret, is_read)) { > continue; > } > @@ -348,9 +357,14 @@ static inline void submit_requests(BlockBackend *blk, MultiReqBuffer *mrb, > } > > if (is_write) { > + mrb->reqs[start]->read_size = 0; > blk_aio_writev(blk, sector_num, qiov, nb_sectors, > virtio_blk_rw_complete, mrb->reqs[start]); > } else { > + /* Save old qiov->size, which will be used in > + * virtio_blk_complete_request() > + */ > + mrb->reqs[start]->read_size = qiov->size; > blk_aio_readv(blk, sector_num, qiov, nb_sectors, > virtio_blk_rw_complete, mrb->reqs[start]); > } > diff --git a/include/hw/virtio/virtio-blk.h b/include/hw/virtio/virtio-blk.h > index b3ffcd9..d73ec06 100644 > --- a/include/hw/virtio/virtio-blk.h > +++ b/include/hw/virtio/virtio-blk.h > @@ -67,6 +67,7 @@ typedef struct VirtIOBlockReq { > struct virtio_blk_inhdr *in; > struct virtio_blk_outhdr out; > QEMUIOVector qiov; > + size_t read_size; > struct VirtIOBlockReq *next; > struct VirtIOBlockReq *mr_next; > BlockAcctCookie acct; > -- > 2.3.4 > >
On 02/04/2015 16:39, Fam Zheng wrote: > On Thu, 04/02 15:37, Paolo Bonzini wrote: >> After qemu_iovec_destroy, the QEMUIOVector's size is zeroed and >> the zero size ultimately is used to compute virtqueue_push's len >> argument. Therefore, reads from virtio-blk devices did not >> migrate their results correctly. (Writes were okay). > > Can't we move qemu_iovec_destroy to virtio_blk_free_request? You would still have to add more code to differentiate reads and writes---I think. Paolo > Fam > >> >> Save the size in submit_requests, and use it when the request is >> completed. >> >> Based on a patch by Wen Congyang. >> >> Signed-off-by: Wen Congyang <wency@cn.fujitsu.com> >> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> >> --- >> hw/block/dataplane/virtio-blk.c | 2 +- >> hw/block/virtio-blk.c | 16 +++++++++++++++- >> include/hw/virtio/virtio-blk.h | 1 + >> 3 files changed, 17 insertions(+), 2 deletions(-) >> >> diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c >> index cd41478..b37ede3 100644 >> --- a/hw/block/dataplane/virtio-blk.c >> +++ b/hw/block/dataplane/virtio-blk.c >> @@ -78,7 +78,7 @@ static void complete_request_vring(VirtIOBlockReq *req, unsigned char status) >> stb_p(&req->in->status, status); >> >> vring_push(s->vdev, &req->dev->dataplane->vring, &req->elem, >> - req->qiov.size + sizeof(*req->in)); >> + req->read_size + sizeof(*req->in)); >> >> /* Suppress notification to guest by BH and its scheduled >> * flag because requests are completed as a batch after io >> diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c >> index 000c38d..2f00dc4 100644 >> --- a/hw/block/virtio-blk.c >> +++ b/hw/block/virtio-blk.c >> @@ -33,6 +33,7 @@ VirtIOBlockReq *virtio_blk_alloc_request(VirtIOBlock *s) >> VirtIOBlockReq *req = g_slice_new(VirtIOBlockReq); >> req->dev = s; >> req->qiov.size = 0; >> + req->read_size = 0; >> req->next = NULL; >> req->mr_next = NULL; >> return req; >> @@ -54,7 +55,7 @@ static void virtio_blk_complete_request(VirtIOBlockReq *req, >> trace_virtio_blk_req_complete(req, status); >> >> stb_p(&req->in->status, status); >> - virtqueue_push(s->vq, &req->elem, req->qiov.size + sizeof(*req->in)); >> + virtqueue_push(s->vq, &req->elem, req->read_size + sizeof(*req->in)); >> virtio_notify(vdev, s->vq); >> } >> >> @@ -102,6 +103,14 @@ static void virtio_blk_rw_complete(void *opaque, int ret) >> if (ret) { >> int p = virtio_ldl_p(VIRTIO_DEVICE(req->dev), &req->out.type); >> bool is_read = !(p & VIRTIO_BLK_T_OUT); >> + /* Note that memory may be dirtied on read failure. If the >> + * virtio request is not completed here, as is the case for >> + * BLOCK_ERROR_ACTION_STOP, the memory may not be copied >> + * correctly during live migration. While this is ugly, >> + * it is acceptable because the device is free to write to >> + * the memory until the request is completed (which will >> + * happen on the other side of the migration). >> + */ >> if (virtio_blk_handle_rw_error(req, -ret, is_read)) { >> continue; >> } >> @@ -348,9 +357,14 @@ static inline void submit_requests(BlockBackend *blk, MultiReqBuffer *mrb, >> } >> >> if (is_write) { >> + mrb->reqs[start]->read_size = 0; >> blk_aio_writev(blk, sector_num, qiov, nb_sectors, >> virtio_blk_rw_complete, mrb->reqs[start]); >> } else { >> + /* Save old qiov->size, which will be used in >> + * virtio_blk_complete_request() >> + */ >> + mrb->reqs[start]->read_size = qiov->size; >> blk_aio_readv(blk, sector_num, qiov, nb_sectors, >> virtio_blk_rw_complete, mrb->reqs[start]); >> } >> diff --git a/include/hw/virtio/virtio-blk.h b/include/hw/virtio/virtio-blk.h >> index b3ffcd9..d73ec06 100644 >> --- a/include/hw/virtio/virtio-blk.h >> +++ b/include/hw/virtio/virtio-blk.h >> @@ -67,6 +67,7 @@ typedef struct VirtIOBlockReq { >> struct virtio_blk_inhdr *in; >> struct virtio_blk_outhdr out; >> QEMUIOVector qiov; >> + size_t read_size; >> struct VirtIOBlockReq *next; >> struct VirtIOBlockReq *mr_next; >> BlockAcctCookie acct; >> -- >> 2.3.4 >> >>
On Thu, 04/02 16:51, Paolo Bonzini wrote: > > > On 02/04/2015 16:39, Fam Zheng wrote: > > On Thu, 04/02 15:37, Paolo Bonzini wrote: > >> After qemu_iovec_destroy, the QEMUIOVector's size is zeroed and > >> the zero size ultimately is used to compute virtqueue_push's len > >> argument. Therefore, reads from virtio-blk devices did not > >> migrate their results correctly. (Writes were okay). > > > > Can't we move qemu_iovec_destroy to virtio_blk_free_request? > > You would still have to add more code to differentiate reads and > writes---I think. Yeah, but the extra field will not be needed. Fam > > Paolo > > > Fam > > > >> > >> Save the size in submit_requests, and use it when the request is > >> completed. > >> > >> Based on a patch by Wen Congyang. > >> > >> Signed-off-by: Wen Congyang <wency@cn.fujitsu.com> > >> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> > >> --- > >> hw/block/dataplane/virtio-blk.c | 2 +- > >> hw/block/virtio-blk.c | 16 +++++++++++++++- > >> include/hw/virtio/virtio-blk.h | 1 + > >> 3 files changed, 17 insertions(+), 2 deletions(-) > >> > >> diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c > >> index cd41478..b37ede3 100644 > >> --- a/hw/block/dataplane/virtio-blk.c > >> +++ b/hw/block/dataplane/virtio-blk.c > >> @@ -78,7 +78,7 @@ static void complete_request_vring(VirtIOBlockReq *req, unsigned char status) > >> stb_p(&req->in->status, status); > >> > >> vring_push(s->vdev, &req->dev->dataplane->vring, &req->elem, > >> - req->qiov.size + sizeof(*req->in)); > >> + req->read_size + sizeof(*req->in)); > >> > >> /* Suppress notification to guest by BH and its scheduled > >> * flag because requests are completed as a batch after io > >> diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c > >> index 000c38d..2f00dc4 100644 > >> --- a/hw/block/virtio-blk.c > >> +++ b/hw/block/virtio-blk.c > >> @@ -33,6 +33,7 @@ VirtIOBlockReq *virtio_blk_alloc_request(VirtIOBlock *s) > >> VirtIOBlockReq *req = g_slice_new(VirtIOBlockReq); > >> req->dev = s; > >> req->qiov.size = 0; > >> + req->read_size = 0; > >> req->next = NULL; > >> req->mr_next = NULL; > >> return req; > >> @@ -54,7 +55,7 @@ static void virtio_blk_complete_request(VirtIOBlockReq *req, > >> trace_virtio_blk_req_complete(req, status); > >> > >> stb_p(&req->in->status, status); > >> - virtqueue_push(s->vq, &req->elem, req->qiov.size + sizeof(*req->in)); > >> + virtqueue_push(s->vq, &req->elem, req->read_size + sizeof(*req->in)); > >> virtio_notify(vdev, s->vq); > >> } > >> > >> @@ -102,6 +103,14 @@ static void virtio_blk_rw_complete(void *opaque, int ret) > >> if (ret) { > >> int p = virtio_ldl_p(VIRTIO_DEVICE(req->dev), &req->out.type); > >> bool is_read = !(p & VIRTIO_BLK_T_OUT); > >> + /* Note that memory may be dirtied on read failure. If the > >> + * virtio request is not completed here, as is the case for > >> + * BLOCK_ERROR_ACTION_STOP, the memory may not be copied > >> + * correctly during live migration. While this is ugly, > >> + * it is acceptable because the device is free to write to > >> + * the memory until the request is completed (which will > >> + * happen on the other side of the migration). > >> + */ > >> if (virtio_blk_handle_rw_error(req, -ret, is_read)) { > >> continue; > >> } > >> @@ -348,9 +357,14 @@ static inline void submit_requests(BlockBackend *blk, MultiReqBuffer *mrb, > >> } > >> > >> if (is_write) { > >> + mrb->reqs[start]->read_size = 0; > >> blk_aio_writev(blk, sector_num, qiov, nb_sectors, > >> virtio_blk_rw_complete, mrb->reqs[start]); > >> } else { > >> + /* Save old qiov->size, which will be used in > >> + * virtio_blk_complete_request() > >> + */ > >> + mrb->reqs[start]->read_size = qiov->size; > >> blk_aio_readv(blk, sector_num, qiov, nb_sectors, > >> virtio_blk_rw_complete, mrb->reqs[start]); > >> } > >> diff --git a/include/hw/virtio/virtio-blk.h b/include/hw/virtio/virtio-blk.h > >> index b3ffcd9..d73ec06 100644 > >> --- a/include/hw/virtio/virtio-blk.h > >> +++ b/include/hw/virtio/virtio-blk.h > >> @@ -67,6 +67,7 @@ typedef struct VirtIOBlockReq { > >> struct virtio_blk_inhdr *in; > >> struct virtio_blk_outhdr out; > >> QEMUIOVector qiov; > >> + size_t read_size; > >> struct VirtIOBlockReq *next; > >> struct VirtIOBlockReq *mr_next; > >> BlockAcctCookie acct; > >> -- > >> 2.3.4 > >> > >>
On 02/04/2015 17:16, Fam Zheng wrote: >>>> > >> After qemu_iovec_destroy, the QEMUIOVector's size is zeroed and >>>> > >> the zero size ultimately is used to compute virtqueue_push's len >>>> > >> argument. Therefore, reads from virtio-blk devices did not >>>> > >> migrate their results correctly. (Writes were okay). >>> > > >>> > > Can't we move qemu_iovec_destroy to virtio_blk_free_request? >> > >> > You would still have to add more code to differentiate reads and >> > writes---I think. > Yeah, but the extra field will not be needed. Can you post an alternative patch? One small complication is that is_write is in mrb but not in mrb->reqs[x]. virtio_blk_rw_complete is already doing int p = virtio_ldl_p(VIRTIO_DEVICE(req->dev), &req->out.type); bool is_read = !(p & VIRTIO_BLK_T_OUT); but only in a slow path. Paolo
diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c index cd41478..b37ede3 100644 --- a/hw/block/dataplane/virtio-blk.c +++ b/hw/block/dataplane/virtio-blk.c @@ -78,7 +78,7 @@ static void complete_request_vring(VirtIOBlockReq *req, unsigned char status) stb_p(&req->in->status, status); vring_push(s->vdev, &req->dev->dataplane->vring, &req->elem, - req->qiov.size + sizeof(*req->in)); + req->read_size + sizeof(*req->in)); /* Suppress notification to guest by BH and its scheduled * flag because requests are completed as a batch after io diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c index 000c38d..2f00dc4 100644 --- a/hw/block/virtio-blk.c +++ b/hw/block/virtio-blk.c @@ -33,6 +33,7 @@ VirtIOBlockReq *virtio_blk_alloc_request(VirtIOBlock *s) VirtIOBlockReq *req = g_slice_new(VirtIOBlockReq); req->dev = s; req->qiov.size = 0; + req->read_size = 0; req->next = NULL; req->mr_next = NULL; return req; @@ -54,7 +55,7 @@ static void virtio_blk_complete_request(VirtIOBlockReq *req, trace_virtio_blk_req_complete(req, status); stb_p(&req->in->status, status); - virtqueue_push(s->vq, &req->elem, req->qiov.size + sizeof(*req->in)); + virtqueue_push(s->vq, &req->elem, req->read_size + sizeof(*req->in)); virtio_notify(vdev, s->vq); } @@ -102,6 +103,14 @@ static void virtio_blk_rw_complete(void *opaque, int ret) if (ret) { int p = virtio_ldl_p(VIRTIO_DEVICE(req->dev), &req->out.type); bool is_read = !(p & VIRTIO_BLK_T_OUT); + /* Note that memory may be dirtied on read failure. If the + * virtio request is not completed here, as is the case for + * BLOCK_ERROR_ACTION_STOP, the memory may not be copied + * correctly during live migration. While this is ugly, + * it is acceptable because the device is free to write to + * the memory until the request is completed (which will + * happen on the other side of the migration). + */ if (virtio_blk_handle_rw_error(req, -ret, is_read)) { continue; } @@ -348,9 +357,14 @@ static inline void submit_requests(BlockBackend *blk, MultiReqBuffer *mrb, } if (is_write) { + mrb->reqs[start]->read_size = 0; blk_aio_writev(blk, sector_num, qiov, nb_sectors, virtio_blk_rw_complete, mrb->reqs[start]); } else { + /* Save old qiov->size, which will be used in + * virtio_blk_complete_request() + */ + mrb->reqs[start]->read_size = qiov->size; blk_aio_readv(blk, sector_num, qiov, nb_sectors, virtio_blk_rw_complete, mrb->reqs[start]); } diff --git a/include/hw/virtio/virtio-blk.h b/include/hw/virtio/virtio-blk.h index b3ffcd9..d73ec06 100644 --- a/include/hw/virtio/virtio-blk.h +++ b/include/hw/virtio/virtio-blk.h @@ -67,6 +67,7 @@ typedef struct VirtIOBlockReq { struct virtio_blk_inhdr *in; struct virtio_blk_outhdr out; QEMUIOVector qiov; + size_t read_size; struct VirtIOBlockReq *next; struct VirtIOBlockReq *mr_next; BlockAcctCookie acct;