Message ID | 1427997044-392-1-git-send-email-pbonzini@redhat.com |
---|---|
State | New |
Headers | show |
On Thu, Apr 02, 2015 at 07:50:44PM +0200, Paolo Bonzini wrote: > After qemu_iovec_destroy, the QEMUIOVector's size is zeroed and > the zero size ultimately is used to compute virtqueue_push's len > argument. Therefore, reads from virtio-blk devices did not > migrate their results correctly. (Writes were okay). > > Save the size in virtio_blk_handle_request, and use it when the request > is completed. > > Based on a patch by Wen Congyang. > > Signed-off-by: Wen Congyang <wency@cn.fujitsu.com> > Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> If you are touching this code anyway, maybe it makes sense to merge Rusty's virtio len patches? I didn't want them in 2.3 since they aren't ciritical, but we are changing these lines anyway, maybe make them correct? > --- > hw/block/dataplane/virtio-blk.c | 3 +-- > hw/block/virtio-blk.c | 13 ++++++++++++- > include/hw/virtio/virtio-blk.h | 1 + > 3 files changed, 14 insertions(+), 3 deletions(-) > > diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c > index cd41478..3db139b 100644 > --- a/hw/block/dataplane/virtio-blk.c > +++ b/hw/block/dataplane/virtio-blk.c > @@ -77,8 +77,7 @@ static void complete_request_vring(VirtIOBlockReq *req, unsigned char status) > VirtIOBlockDataPlane *s = req->dev->dataplane; > stb_p(&req->in->status, status); > > - vring_push(s->vdev, &req->dev->dataplane->vring, &req->elem, > - req->qiov.size + sizeof(*req->in)); > + vring_push(s->vdev, &req->dev->dataplane->vring, &req->elem, req->in_len); > > /* Suppress notification to guest by BH and its scheduled > * flag because requests are completed as a batch after io > diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c > index 000c38d..9546fd2 100644 > --- a/hw/block/virtio-blk.c > +++ b/hw/block/virtio-blk.c > @@ -33,6 +33,7 @@ VirtIOBlockReq *virtio_blk_alloc_request(VirtIOBlock *s) > VirtIOBlockReq *req = g_slice_new(VirtIOBlockReq); > req->dev = s; > req->qiov.size = 0; > + req->in_len = 0; > req->next = NULL; > req->mr_next = NULL; > return req; > @@ -54,7 +55,7 @@ static void virtio_blk_complete_request(VirtIOBlockReq *req, > trace_virtio_blk_req_complete(req, status); > > stb_p(&req->in->status, status); > - virtqueue_push(s->vq, &req->elem, req->qiov.size + sizeof(*req->in)); > + virtqueue_push(s->vq, &req->elem, req->in_len); > virtio_notify(vdev, s->vq); > } > > @@ -102,6 +103,14 @@ static void virtio_blk_rw_complete(void *opaque, int ret) > if (ret) { > int p = virtio_ldl_p(VIRTIO_DEVICE(req->dev), &req->out.type); > bool is_read = !(p & VIRTIO_BLK_T_OUT); > + /* Note that memory may be dirtied on read failure. If the > + * virtio request is not completed here, as is the case for > + * BLOCK_ERROR_ACTION_STOP, the memory may not be copied > + * correctly during live migration. While this is ugly, > + * it is acceptable because the device is free to write to > + * the memory until the request is completed (which will > + * happen on the other side of the migration). > + */ > if (virtio_blk_handle_rw_error(req, -ret, is_read)) { > continue; > } > @@ -496,6 +505,8 @@ void virtio_blk_handle_request(VirtIOBlockReq *req, MultiReqBuffer *mrb) > exit(1); > } > > + /* We always touch the last byte, so just see how big in_iov is. */ > + req->in_len = iov_size(in_iov, in_num); > req->in = (void *)in_iov[in_num - 1].iov_base > + in_iov[in_num - 1].iov_len > - sizeof(struct virtio_blk_inhdr); > diff --git a/include/hw/virtio/virtio-blk.h b/include/hw/virtio/virtio-blk.h > index b3ffcd9..6bf5905 100644 > --- a/include/hw/virtio/virtio-blk.h > +++ b/include/hw/virtio/virtio-blk.h > @@ -67,6 +67,7 @@ typedef struct VirtIOBlockReq { > struct virtio_blk_inhdr *in; > struct virtio_blk_outhdr out; > QEMUIOVector qiov; > + size_t in_len; > struct VirtIOBlockReq *next; > struct VirtIOBlockReq *mr_next; > BlockAcctCookie acct; > -- > 2.3.4
On 02/04/2015 19:54, Michael S. Tsirkin wrote: > On Thu, Apr 02, 2015 at 07:50:44PM +0200, Paolo Bonzini wrote: >> After qemu_iovec_destroy, the QEMUIOVector's size is zeroed and >> the zero size ultimately is used to compute virtqueue_push's len >> argument. Therefore, reads from virtio-blk devices did not >> migrate their results correctly. (Writes were okay). >> >> Save the size in virtio_blk_handle_request, and use it when the request >> is completed. >> >> Based on a patch by Wen Congyang. >> >> Signed-off-by: Wen Congyang <wency@cn.fujitsu.com> >> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> > > If you are touching this code anyway, maybe it makes > sense to merge Rusty's virtio len patches? > I didn't want them in 2.3 since they aren't ciritical, > but we are changing these lines anyway, maybe make > them correct? My patch is a strict superset of Rusty's patch 2/2. In fact the first version was very similar to his, but neither my v1 nor his patch covers SCSI or flush or get-serial requests. Rusty's patch 1/2 only adds an assertion, I think. It's not critical for 2.3. Paolo >> --- >> hw/block/dataplane/virtio-blk.c | 3 +-- >> hw/block/virtio-blk.c | 13 ++++++++++++- >> include/hw/virtio/virtio-blk.h | 1 + >> 3 files changed, 14 insertions(+), 3 deletions(-) >> >> diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c >> index cd41478..3db139b 100644 >> --- a/hw/block/dataplane/virtio-blk.c >> +++ b/hw/block/dataplane/virtio-blk.c >> @@ -77,8 +77,7 @@ static void complete_request_vring(VirtIOBlockReq *req, unsigned char status) >> VirtIOBlockDataPlane *s = req->dev->dataplane; >> stb_p(&req->in->status, status); >> >> - vring_push(s->vdev, &req->dev->dataplane->vring, &req->elem, >> - req->qiov.size + sizeof(*req->in)); >> + vring_push(s->vdev, &req->dev->dataplane->vring, &req->elem, req->in_len); >> >> /* Suppress notification to guest by BH and its scheduled >> * flag because requests are completed as a batch after io >> diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c >> index 000c38d..9546fd2 100644 >> --- a/hw/block/virtio-blk.c >> +++ b/hw/block/virtio-blk.c >> @@ -33,6 +33,7 @@ VirtIOBlockReq *virtio_blk_alloc_request(VirtIOBlock *s) >> VirtIOBlockReq *req = g_slice_new(VirtIOBlockReq); >> req->dev = s; >> req->qiov.size = 0; >> + req->in_len = 0; >> req->next = NULL; >> req->mr_next = NULL; >> return req; >> @@ -54,7 +55,7 @@ static void virtio_blk_complete_request(VirtIOBlockReq *req, >> trace_virtio_blk_req_complete(req, status); >> >> stb_p(&req->in->status, status); >> - virtqueue_push(s->vq, &req->elem, req->qiov.size + sizeof(*req->in)); >> + virtqueue_push(s->vq, &req->elem, req->in_len); >> virtio_notify(vdev, s->vq); >> } >> >> @@ -102,6 +103,14 @@ static void virtio_blk_rw_complete(void *opaque, int ret) >> if (ret) { >> int p = virtio_ldl_p(VIRTIO_DEVICE(req->dev), &req->out.type); >> bool is_read = !(p & VIRTIO_BLK_T_OUT); >> + /* Note that memory may be dirtied on read failure. If the >> + * virtio request is not completed here, as is the case for >> + * BLOCK_ERROR_ACTION_STOP, the memory may not be copied >> + * correctly during live migration. While this is ugly, >> + * it is acceptable because the device is free to write to >> + * the memory until the request is completed (which will >> + * happen on the other side of the migration). >> + */ >> if (virtio_blk_handle_rw_error(req, -ret, is_read)) { >> continue; >> } >> @@ -496,6 +505,8 @@ void virtio_blk_handle_request(VirtIOBlockReq *req, MultiReqBuffer *mrb) >> exit(1); >> } >> >> + /* We always touch the last byte, so just see how big in_iov is. */ >> + req->in_len = iov_size(in_iov, in_num); >> req->in = (void *)in_iov[in_num - 1].iov_base >> + in_iov[in_num - 1].iov_len >> - sizeof(struct virtio_blk_inhdr); >> diff --git a/include/hw/virtio/virtio-blk.h b/include/hw/virtio/virtio-blk.h >> index b3ffcd9..6bf5905 100644 >> --- a/include/hw/virtio/virtio-blk.h >> +++ b/include/hw/virtio/virtio-blk.h >> @@ -67,6 +67,7 @@ typedef struct VirtIOBlockReq { >> struct virtio_blk_inhdr *in; >> struct virtio_blk_outhdr out; >> QEMUIOVector qiov; >> + size_t in_len; >> struct VirtIOBlockReq *next; >> struct VirtIOBlockReq *mr_next; >> BlockAcctCookie acct; >> -- >> 2.3.4
On Thu, Apr 02, 2015 at 07:57:22PM +0200, Paolo Bonzini wrote: > > > On 02/04/2015 19:54, Michael S. Tsirkin wrote: > > On Thu, Apr 02, 2015 at 07:50:44PM +0200, Paolo Bonzini wrote: > >> After qemu_iovec_destroy, the QEMUIOVector's size is zeroed and > >> the zero size ultimately is used to compute virtqueue_push's len > >> argument. Therefore, reads from virtio-blk devices did not > >> migrate their results correctly. (Writes were okay). > >> > >> Save the size in virtio_blk_handle_request, and use it when the request > >> is completed. > >> > >> Based on a patch by Wen Congyang. > >> > >> Signed-off-by: Wen Congyang <wency@cn.fujitsu.com> > >> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> > > > > If you are touching this code anyway, maybe it makes > > sense to merge Rusty's virtio len patches? > > I didn't want them in 2.3 since they aren't ciritical, > > but we are changing these lines anyway, maybe make > > them correct? > > My patch is a strict superset of Rusty's patch 2/2. In fact the first > version was very similar to his, but neither my v1 nor his patch covers > SCSI or flush or get-serial requests. Oh, true in fact. It might be a good idea to add something like this to the commit log: Additionally, virtio spec requires that device writes at least len bytes to descriptor - so that driver can rely on bytes 0..len-1 being initialized by device. Specifically, it says len can be used as an optimization "for drivers using untrusted buffers: if you do not know exactly how much has been written by the device, you usually have to zero the buffer to ensure no data leakage occurs". We violated this rule in two cases: on write - len should be 0, request size was mistakenly used - and on read error - we don't know whether the whole request size was written, so again len should be set to 0. > > Rusty's patch 1/2 only adds an assertion, I think. It's not critical > for 2.3. > > Paolo I agree. > >> --- > >> hw/block/dataplane/virtio-blk.c | 3 +-- > >> hw/block/virtio-blk.c | 13 ++++++++++++- > >> include/hw/virtio/virtio-blk.h | 1 + > >> 3 files changed, 14 insertions(+), 3 deletions(-) > >> > >> diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c > >> index cd41478..3db139b 100644 > >> --- a/hw/block/dataplane/virtio-blk.c > >> +++ b/hw/block/dataplane/virtio-blk.c > >> @@ -77,8 +77,7 @@ static void complete_request_vring(VirtIOBlockReq *req, unsigned char status) > >> VirtIOBlockDataPlane *s = req->dev->dataplane; > >> stb_p(&req->in->status, status); > >> > >> - vring_push(s->vdev, &req->dev->dataplane->vring, &req->elem, > >> - req->qiov.size + sizeof(*req->in)); > >> + vring_push(s->vdev, &req->dev->dataplane->vring, &req->elem, req->in_len); > >> > >> /* Suppress notification to guest by BH and its scheduled > >> * flag because requests are completed as a batch after io > >> diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c > >> index 000c38d..9546fd2 100644 > >> --- a/hw/block/virtio-blk.c > >> +++ b/hw/block/virtio-blk.c > >> @@ -33,6 +33,7 @@ VirtIOBlockReq *virtio_blk_alloc_request(VirtIOBlock *s) > >> VirtIOBlockReq *req = g_slice_new(VirtIOBlockReq); > >> req->dev = s; > >> req->qiov.size = 0; > >> + req->in_len = 0; > >> req->next = NULL; > >> req->mr_next = NULL; > >> return req; > >> @@ -54,7 +55,7 @@ static void virtio_blk_complete_request(VirtIOBlockReq *req, > >> trace_virtio_blk_req_complete(req, status); > >> > >> stb_p(&req->in->status, status); > >> - virtqueue_push(s->vq, &req->elem, req->qiov.size + sizeof(*req->in)); > >> + virtqueue_push(s->vq, &req->elem, req->in_len); > >> virtio_notify(vdev, s->vq); > >> } > >> > >> @@ -102,6 +103,14 @@ static void virtio_blk_rw_complete(void *opaque, int ret) > >> if (ret) { > >> int p = virtio_ldl_p(VIRTIO_DEVICE(req->dev), &req->out.type); > >> bool is_read = !(p & VIRTIO_BLK_T_OUT); > >> + /* Note that memory may be dirtied on read failure. If the > >> + * virtio request is not completed here, as is the case for > >> + * BLOCK_ERROR_ACTION_STOP, the memory may not be copied > >> + * correctly during live migration. While this is ugly, > >> + * it is acceptable because the device is free to write to > >> + * the memory until the request is completed (which will > >> + * happen on the other side of the migration). > >> + */ > >> if (virtio_blk_handle_rw_error(req, -ret, is_read)) { > >> continue; > >> } > >> @@ -496,6 +505,8 @@ void virtio_blk_handle_request(VirtIOBlockReq *req, MultiReqBuffer *mrb) > >> exit(1); > >> } > >> > >> + /* We always touch the last byte, so just see how big in_iov is. */ > >> + req->in_len = iov_size(in_iov, in_num); > >> req->in = (void *)in_iov[in_num - 1].iov_base > >> + in_iov[in_num - 1].iov_len > >> - sizeof(struct virtio_blk_inhdr); > >> diff --git a/include/hw/virtio/virtio-blk.h b/include/hw/virtio/virtio-blk.h > >> index b3ffcd9..6bf5905 100644 > >> --- a/include/hw/virtio/virtio-blk.h > >> +++ b/include/hw/virtio/virtio-blk.h > >> @@ -67,6 +67,7 @@ typedef struct VirtIOBlockReq { > >> struct virtio_blk_inhdr *in; > >> struct virtio_blk_outhdr out; > >> QEMUIOVector qiov; > >> + size_t in_len; > >> struct VirtIOBlockReq *next; > >> struct VirtIOBlockReq *mr_next; > >> BlockAcctCookie acct; > >> -- > >> 2.3.4
On 02/04/2015 20:46, Michael S. Tsirkin wrote: > Oh, true in fact. It might be a good idea to add something like this to > the commit log: > > Additionally, virtio spec requires that device writes at least > len bytes to descriptor - so that driver can rely on > bytes 0..len-1 being initialized by device. Specifically, it says > len can be used as an optimization "for drivers using untrusted > buffers: if you do not know exactly how much has been written by the > device, you usually have to zero the buffer to ensure no data leakage > occurs". > > We violated this rule in two cases: on write - len should be 0, > request size was mistakenly used Should be 1 due to the status byte. > - and on read error - we don't > know whether the whole request size was written, so again len > should be set to 0. Oh no wait... my patch does not handle the read error case. The len argument to virtqueue_push is being overloaded with two meanings: 1) a value that is >= the actual count, used to set the dirty bitmap 2) a value that should be <= the actual count, used as mentioned in your English text above. This is a problem for read errors, because the status byte is at the end of the input buffers. So (1) requires that you set len = size+1, while (2) requires that you set len = 0. My patch only deals with (1), which is a correctness problem for migration, as Wen debugged. It is a 2.3 regression. I don't think (2) is fixable without changing the virtqueue API, and it is not a regression. Paolo
On Thu, Apr 02, 2015 at 08:53:04PM +0200, Paolo Bonzini wrote: > > > On 02/04/2015 20:46, Michael S. Tsirkin wrote: > > Oh, true in fact. It might be a good idea to add something like this to > > the commit log: > > > > Additionally, virtio spec requires that device writes at least > > len bytes to descriptor - so that driver can rely on > > bytes 0..len-1 being initialized by device. Specifically, it says > > len can be used as an optimization "for drivers using untrusted > > buffers: if you do not know exactly how much has been written by the > > device, you usually have to zero the buffer to ensure no data leakage > > occurs". > > > > We violated this rule in two cases: on write - len should be 0, > > request size was mistakenly used > > Should be 1 due to the status byte. > > > - and on read error - we don't > > know whether the whole request size was written, so again len > > should be set to 0. > > Oh no wait... my patch does not handle the read error case. > > The len argument to virtqueue_push is being overloaded with two meanings: > > 1) a value that is >= the actual count, used to set the dirty bitmap > > 2) a value that should be <= the actual count, used as mentioned in your > English text above. > > This is a problem for read errors, because the status byte is at the end > of the input buffers. So (1) requires that you set len = size+1, while > (2) requires that you set len = 0. > > My patch only deals with (1), which is a correctness problem for > migration, as Wen debugged. It is a 2.3 regression. > > I don't think (2) is fixable without changing the virtqueue API, and it > is not a regression. > > Paolo I agree here. If you respin for any reason, documenting that you fixed (1) might be a good idea.
On 04/03/2015 01:50 AM, Paolo Bonzini wrote: > After qemu_iovec_destroy, the QEMUIOVector's size is zeroed and > the zero size ultimately is used to compute virtqueue_push's len > argument. Therefore, reads from virtio-blk devices did not > migrate their results correctly. (Writes were okay). > > Save the size in virtio_blk_handle_request, and use it when the request > is completed. > > Based on a patch by Wen Congyang. > > Signed-off-by: Wen Congyang <wency@cn.fujitsu.com> > Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> Tested-by: Li Zhijian <lizhijian@cn.fujitsu.com> Thanks Li Zhijian > --- > hw/block/dataplane/virtio-blk.c | 3 +-- > hw/block/virtio-blk.c | 13 ++++++++++++- > include/hw/virtio/virtio-blk.h | 1 + > 3 files changed, 14 insertions(+), 3 deletions(-) > > diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c > index cd41478..3db139b 100644 > --- a/hw/block/dataplane/virtio-blk.c > +++ b/hw/block/dataplane/virtio-blk.c > @@ -77,8 +77,7 @@ static void complete_request_vring(VirtIOBlockReq *req, unsigned char status) > VirtIOBlockDataPlane *s = req->dev->dataplane; > stb_p(&req->in->status, status); > > - vring_push(s->vdev, &req->dev->dataplane->vring, &req->elem, > - req->qiov.size + sizeof(*req->in)); > + vring_push(s->vdev, &req->dev->dataplane->vring, &req->elem, req->in_len); > > /* Suppress notification to guest by BH and its scheduled > * flag because requests are completed as a batch after io > diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c > index 000c38d..9546fd2 100644 > --- a/hw/block/virtio-blk.c > +++ b/hw/block/virtio-blk.c > @@ -33,6 +33,7 @@ VirtIOBlockReq *virtio_blk_alloc_request(VirtIOBlock *s) > VirtIOBlockReq *req = g_slice_new(VirtIOBlockReq); > req->dev = s; > req->qiov.size = 0; > + req->in_len = 0; > req->next = NULL; > req->mr_next = NULL; > return req; > @@ -54,7 +55,7 @@ static void virtio_blk_complete_request(VirtIOBlockReq *req, > trace_virtio_blk_req_complete(req, status); > > stb_p(&req->in->status, status); > - virtqueue_push(s->vq, &req->elem, req->qiov.size + sizeof(*req->in)); > + virtqueue_push(s->vq, &req->elem, req->in_len); > virtio_notify(vdev, s->vq); > } > > @@ -102,6 +103,14 @@ static void virtio_blk_rw_complete(void *opaque, int ret) > if (ret) { > int p = virtio_ldl_p(VIRTIO_DEVICE(req->dev), &req->out.type); > bool is_read = !(p & VIRTIO_BLK_T_OUT); > + /* Note that memory may be dirtied on read failure. If the > + * virtio request is not completed here, as is the case for > + * BLOCK_ERROR_ACTION_STOP, the memory may not be copied > + * correctly during live migration. While this is ugly, > + * it is acceptable because the device is free to write to > + * the memory until the request is completed (which will > + * happen on the other side of the migration). > + */ > if (virtio_blk_handle_rw_error(req, -ret, is_read)) { > continue; > } > @@ -496,6 +505,8 @@ void virtio_blk_handle_request(VirtIOBlockReq *req, MultiReqBuffer *mrb) > exit(1); > } > > + /* We always touch the last byte, so just see how big in_iov is. */ > + req->in_len = iov_size(in_iov, in_num); > req->in = (void *)in_iov[in_num - 1].iov_base > + in_iov[in_num - 1].iov_len > - sizeof(struct virtio_blk_inhdr); > diff --git a/include/hw/virtio/virtio-blk.h b/include/hw/virtio/virtio-blk.h > index b3ffcd9..6bf5905 100644 > --- a/include/hw/virtio/virtio-blk.h > +++ b/include/hw/virtio/virtio-blk.h > @@ -67,6 +67,7 @@ typedef struct VirtIOBlockReq { > struct virtio_blk_inhdr *in; > struct virtio_blk_outhdr out; > QEMUIOVector qiov; > + size_t in_len; > struct VirtIOBlockReq *next; > struct VirtIOBlockReq *mr_next; > BlockAcctCookie acct;
On Thu, Apr 02, 2015 at 07:50:44PM +0200, Paolo Bonzini wrote: > After qemu_iovec_destroy, the QEMUIOVector's size is zeroed and > the zero size ultimately is used to compute virtqueue_push's len > argument. Therefore, reads from virtio-blk devices did not > migrate their results correctly. (Writes were okay). > > Save the size in virtio_blk_handle_request, and use it when the request > is completed. > > Based on a patch by Wen Congyang. > > Signed-off-by: Wen Congyang <wency@cn.fujitsu.com> > Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> > --- > hw/block/dataplane/virtio-blk.c | 3 +-- > hw/block/virtio-blk.c | 13 ++++++++++++- > include/hw/virtio/virtio-blk.h | 1 + > 3 files changed, 14 insertions(+), 3 deletions(-) This also changes len for VIRTIO_BLK_T_SCSI_CMD and VIRTIO_BLK_T_GET_ID. The Linux virtio_blk.ko and Windows viostor drivers ignore the value anyway so what's the worst thing that could happen? :) Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
On Tue, Apr 7, 2015 at 3:11 PM, Stefan Hajnoczi <stefanha@redhat.com> wrote: > On Thu, Apr 02, 2015 at 07:50:44PM +0200, Paolo Bonzini wrote: >> After qemu_iovec_destroy, the QEMUIOVector's size is zeroed and >> the zero size ultimately is used to compute virtqueue_push's len >> argument. Therefore, reads from virtio-blk devices did not >> migrate their results correctly. (Writes were okay). >> >> Save the size in virtio_blk_handle_request, and use it when the request >> is completed. >> >> Based on a patch by Wen Congyang. >> >> Signed-off-by: Wen Congyang <wency@cn.fujitsu.com> >> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> >> --- >> hw/block/dataplane/virtio-blk.c | 3 +-- >> hw/block/virtio-blk.c | 13 ++++++++++++- >> include/hw/virtio/virtio-blk.h | 1 + >> 3 files changed, 14 insertions(+), 3 deletions(-) > > This also changes len for VIRTIO_BLK_T_SCSI_CMD and VIRTIO_BLK_T_GET_ID. > The Linux virtio_blk.ko and Windows viostor drivers ignore the value > anyway so what's the worst thing that could happen? :) > > Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> I'd like to merge Paolo's patch for QEMU 2.3-rc3. It ensures that virtio-blk unmaps and dirties "in" buffers correctly. The Message-Id for the patch I'm referring to is <1427997044-392-1-git-send-email-pbonzini@redhat.com>. Any objections? Speak now if it's critical, otherwise please send follow-up patches for QEMU 2.4. Stefan
On Thu, Apr 02, 2015 at 07:50:44PM +0200, Paolo Bonzini wrote: > After qemu_iovec_destroy, the QEMUIOVector's size is zeroed and > the zero size ultimately is used to compute virtqueue_push's len > argument. Therefore, reads from virtio-blk devices did not > migrate their results correctly. (Writes were okay). > > Save the size in virtio_blk_handle_request, and use it when the request > is completed. > > Based on a patch by Wen Congyang. > > Signed-off-by: Wen Congyang <wency@cn.fujitsu.com> > Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> > --- > hw/block/dataplane/virtio-blk.c | 3 +-- > hw/block/virtio-blk.c | 13 ++++++++++++- Applied to my block tree for QEMU 2.3-rc3: https://github.com/stefanha/qemu/commits/block I can still drop it if there is an objection, so just let me know. Several related points have been mentioned but they are orthogonal and less urgent. Please address them in separate QEMU 2.4 patches. Stefan
diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c index cd41478..3db139b 100644 --- a/hw/block/dataplane/virtio-blk.c +++ b/hw/block/dataplane/virtio-blk.c @@ -77,8 +77,7 @@ static void complete_request_vring(VirtIOBlockReq *req, unsigned char status) VirtIOBlockDataPlane *s = req->dev->dataplane; stb_p(&req->in->status, status); - vring_push(s->vdev, &req->dev->dataplane->vring, &req->elem, - req->qiov.size + sizeof(*req->in)); + vring_push(s->vdev, &req->dev->dataplane->vring, &req->elem, req->in_len); /* Suppress notification to guest by BH and its scheduled * flag because requests are completed as a batch after io diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c index 000c38d..9546fd2 100644 --- a/hw/block/virtio-blk.c +++ b/hw/block/virtio-blk.c @@ -33,6 +33,7 @@ VirtIOBlockReq *virtio_blk_alloc_request(VirtIOBlock *s) VirtIOBlockReq *req = g_slice_new(VirtIOBlockReq); req->dev = s; req->qiov.size = 0; + req->in_len = 0; req->next = NULL; req->mr_next = NULL; return req; @@ -54,7 +55,7 @@ static void virtio_blk_complete_request(VirtIOBlockReq *req, trace_virtio_blk_req_complete(req, status); stb_p(&req->in->status, status); - virtqueue_push(s->vq, &req->elem, req->qiov.size + sizeof(*req->in)); + virtqueue_push(s->vq, &req->elem, req->in_len); virtio_notify(vdev, s->vq); } @@ -102,6 +103,14 @@ static void virtio_blk_rw_complete(void *opaque, int ret) if (ret) { int p = virtio_ldl_p(VIRTIO_DEVICE(req->dev), &req->out.type); bool is_read = !(p & VIRTIO_BLK_T_OUT); + /* Note that memory may be dirtied on read failure. If the + * virtio request is not completed here, as is the case for + * BLOCK_ERROR_ACTION_STOP, the memory may not be copied + * correctly during live migration. While this is ugly, + * it is acceptable because the device is free to write to + * the memory until the request is completed (which will + * happen on the other side of the migration). + */ if (virtio_blk_handle_rw_error(req, -ret, is_read)) { continue; } @@ -496,6 +505,8 @@ void virtio_blk_handle_request(VirtIOBlockReq *req, MultiReqBuffer *mrb) exit(1); } + /* We always touch the last byte, so just see how big in_iov is. */ + req->in_len = iov_size(in_iov, in_num); req->in = (void *)in_iov[in_num - 1].iov_base + in_iov[in_num - 1].iov_len - sizeof(struct virtio_blk_inhdr); diff --git a/include/hw/virtio/virtio-blk.h b/include/hw/virtio/virtio-blk.h index b3ffcd9..6bf5905 100644 --- a/include/hw/virtio/virtio-blk.h +++ b/include/hw/virtio/virtio-blk.h @@ -67,6 +67,7 @@ typedef struct VirtIOBlockReq { struct virtio_blk_inhdr *in; struct virtio_blk_outhdr out; QEMUIOVector qiov; + size_t in_len; struct VirtIOBlockReq *next; struct VirtIOBlockReq *mr_next; BlockAcctCookie acct;