Message ID | 20191110190310.19799-3-dplotnikov@virtuozzo.com |
---|---|
State | New |
Headers | show |
Series | allow to set 'drive' property on a realized block device | expand |
On 10.11.2019 22:03, Denis Plotnikov wrote: > This allows to change (replace) the file on a block device and is useful > to workaround exclusive file access restrictions, e.g. to implement VM > migration with a shared disk stored on some storage with the exclusive > file opening model: a destination VM is started waiting for incomming > migration with a fake image drive, and later, on the last migration > phase, the fake image file is replaced with the real one. > > Signed-off-by: Denis Plotnikov <dplotnikov@virtuozzo.com> > --- > hw/core/qdev-properties-system.c | 89 +++++++++++++++++++++++++++----- > 1 file changed, 77 insertions(+), 12 deletions(-) > > diff --git a/hw/core/qdev-properties-system.c b/hw/core/qdev-properties-system.c > index c534590dcd..aaab1370a4 100644 > --- a/hw/core/qdev-properties-system.c > +++ b/hw/core/qdev-properties-system.c > @@ -79,8 +79,55 @@ static void set_pointer(Object *obj, Visitor *v, Property *prop, > > /* --- drive --- */ > > -static void do_parse_drive(DeviceState *dev, const char *str, void **ptr, > - const char *propname, bool iothread, Error **errp) > +static void do_parse_drive_realized(DeviceState *dev, const char *str, > + void **ptr, const char *propname, > + bool iothread, Error **errp) > +{ > + BlockBackend *blk = *ptr; > + BlockDriverState *bs = bdrv_lookup_bs(NULL, str, NULL); > + int ret; > + bool blk_created = false; > + > + if (!bs) { > + error_setg(errp, "Can't find blockdev '%s'", str); > + return; > + } > + > + if (!blk) { > + AioContext *ctx = iothread ? bdrv_get_aio_context(bs) : > + qemu_get_aio_context(); > + blk = blk_new(ctx, BLK_PERM_ALL, BLK_PERM_ALL); > + blk_created = true; Actually, I have concerns about situation where blk=null. Is there any case when scsi-hd (or others) doesn't have a blk assigned and it's legal? > + } else { > + if (blk_bs(blk)) { > + blk_remove_bs(blk); > + } > + } > + > + ret = blk_insert_bs(blk, bs, errp); > + > + if (!ret && blk_created) { > + if (blk_attach_dev(blk, dev) < 0) { > + /* > + * Shouldn't be any errors here since we just created > + * the new blk because the device doesn't have any. > + * Leave the message here in case blk_attach_dev is changed > + */ > + error_setg(errp, "Can't attach drive '%s' to device '%s'", > + str, object_get_typename(OBJECT(dev))); > + } else { > + *ptr = blk; > + } > + } > + > + if (blk_created) { > + blk_unref(blk); > + } > +} > + > +static void do_parse_drive_unrealized(DeviceState *dev, const char *str, > + void **ptr, const char *propname, > + bool iothread, Error **errp) > { > BlockBackend *blk; > bool blk_created = false; > @@ -137,18 +184,34 @@ fail: > } > } > > -static void parse_drive(DeviceState *dev, const char *str, void **ptr, > - const char *propname, Error **errp) > -{ > - do_parse_drive(dev, str, ptr, propname, false, errp); > -} > - > -static void parse_drive_iothread(DeviceState *dev, const char *str, void **ptr, > +static void parse_drive_realized(DeviceState *dev, const char *str, void **ptr, > const char *propname, Error **errp) > { > - do_parse_drive(dev, str, ptr, propname, true, errp); > + do_parse_drive_realized(dev, str, ptr, propname, false, errp); > } > > +static void parse_drive_realized_iothread(DeviceState *dev, const char *str, > + void **ptr, const char *propname, > + Error **errp) > +{ > + do_parse_drive_realized(dev, str, ptr, propname, true, errp); > +} > + > +static void parse_drive_unrealized(DeviceState *dev, const char *str, > + void **ptr, const char *propname, > + Error **errp) > +{ > + do_parse_drive_unrealized(dev, str, ptr, propname, false, errp); > +} > + > +static void parse_drive_unrealized_iothread(DeviceState *dev, const char *str, > + void **ptr, const char *propname, > + Error **errp) > +{ > + do_parse_drive_unrealized(dev, str, ptr, propname, true, errp); > +} > + > + > static void release_drive(Object *obj, const char *name, void *opaque) > { > DeviceState *dev = DEVICE(obj); > @@ -188,13 +251,15 @@ static void get_drive(Object *obj, Visitor *v, const char *name, void *opaque, > static void set_drive(Object *obj, Visitor *v, const char *name, void *opaque, > Error **errp) > { > - set_pointer(obj, v, opaque, NULL, parse_drive, name, errp); > + set_pointer(obj, v, opaque, parse_drive_realized, parse_drive_unrealized, > + name, errp); > } > > static void set_drive_iothread(Object *obj, Visitor *v, const char *name, > void *opaque, Error **errp) > { > - set_pointer(obj, v, opaque, NULL, parse_drive_iothread, name, errp); > + set_pointer(obj, v, opaque, parse_drive_realized_iothread, > + parse_drive_unrealized_iothread, name, errp); > } > > const PropertyInfo qdev_prop_drive = {
On 10.11.2019 22:08, Denis Plotnikov wrote: > > On 10.11.2019 22:03, Denis Plotnikov wrote: >> This allows to change (replace) the file on a block device and is useful >> to workaround exclusive file access restrictions, e.g. to implement VM >> migration with a shared disk stored on some storage with the exclusive >> file opening model: a destination VM is started waiting for incomming >> migration with a fake image drive, and later, on the last migration >> phase, the fake image file is replaced with the real one. >> >> Signed-off-by: Denis Plotnikov <dplotnikov@virtuozzo.com> >> --- >> hw/core/qdev-properties-system.c | 89 +++++++++++++++++++++++++++----- >> 1 file changed, 77 insertions(+), 12 deletions(-) >> >> diff --git a/hw/core/qdev-properties-system.c >> b/hw/core/qdev-properties-system.c >> index c534590dcd..aaab1370a4 100644 >> --- a/hw/core/qdev-properties-system.c >> +++ b/hw/core/qdev-properties-system.c >> @@ -79,8 +79,55 @@ static void set_pointer(Object *obj, Visitor *v, >> Property *prop, >> /* --- drive --- */ >> -static void do_parse_drive(DeviceState *dev, const char *str, void >> **ptr, >> - const char *propname, bool iothread, >> Error **errp) >> +static void do_parse_drive_realized(DeviceState *dev, const char *str, >> + void **ptr, const char *propname, >> + bool iothread, Error **errp) >> +{ >> + BlockBackend *blk = *ptr; >> + BlockDriverState *bs = bdrv_lookup_bs(NULL, str, NULL); >> + int ret; >> + bool blk_created = false; >> + >> + if (!bs) { >> + error_setg(errp, "Can't find blockdev '%s'", str); >> + return; >> + } >> + >> + if (!blk) { >> + AioContext *ctx = iothread ? bdrv_get_aio_context(bs) : >> + qemu_get_aio_context(); >> + blk = blk_new(ctx, BLK_PERM_ALL, BLK_PERM_ALL); >> + blk_created = true; > > Actually, I have concerns about situation where blk=null. > > Is there any case when scsi-hd (or others) doesn't have a blk assigned > and it's legal? > >> + } else { >> + if (blk_bs(blk)) { >> + blk_remove_bs(blk); >> + } >> + } >> + >> + ret = blk_insert_bs(blk, bs, errp); >> + >> + if (!ret && blk_created) { >> + if (blk_attach_dev(blk, dev) < 0) { >> + /* >> + * Shouldn't be any errors here since we just created >> + * the new blk because the device doesn't have any. >> + * Leave the message here in case blk_attach_dev is changed >> + */ >> + error_setg(errp, "Can't attach drive '%s' to device '%s'", >> + str, object_get_typename(OBJECT(dev))); >> + } else { >> + *ptr = blk; >> + } >> + } Another problem here, is that the "size" of the device dev may not match after setting a drive. So, we should update it after the drive setting. It was found, that it could be done by calling BlockDevOps.bdrv_parent_cb_resize. But I have some concerns about doing it so. In the case of virtio scsi disk we have the following callstack bdrv_parent_cb_resize calls() -> scsi_device_report_change(dev, SENSE_CODE(CAPACITY_CHANGED)) -> virtio_scsi_change -> virtio_scsi_push_event(s, dev, VIRTIO_SCSI_T_PARAM_CHANGE, sense.asc | (sense.ascq << 8)); virtio_scsi_change pushes the event to the guest to make the guest ask for size refreshing. If I'm not mistaken, here we can get a race condition when some another request is processed with an unchanged size and then the size changing request is processed. I didn't find a better way to update device size so any comments are welcome. Thanks! Denis >> + >> + if (blk_created) { >> + blk_unref(blk); >> + } >> +} >> + >> +static void do_parse_drive_unrealized(DeviceState *dev, const char >> *str, >> + void **ptr, const char *propname, >> + bool iothread, Error **errp) >> { >> BlockBackend *blk; >> bool blk_created = false; >> @@ -137,18 +184,34 @@ fail: >> } >> } >> -static void parse_drive(DeviceState *dev, const char *str, void >> **ptr, >> - const char *propname, Error **errp) >> -{ >> - do_parse_drive(dev, str, ptr, propname, false, errp); >> -} >> - >> -static void parse_drive_iothread(DeviceState *dev, const char *str, >> void **ptr, >> +static void parse_drive_realized(DeviceState *dev, const char *str, >> void **ptr, >> const char *propname, Error **errp) >> { >> - do_parse_drive(dev, str, ptr, propname, true, errp); >> + do_parse_drive_realized(dev, str, ptr, propname, false, errp); >> } >> +static void parse_drive_realized_iothread(DeviceState *dev, const >> char *str, >> + void **ptr, const char >> *propname, >> + Error **errp) >> +{ >> + do_parse_drive_realized(dev, str, ptr, propname, true, errp); >> +} >> + >> +static void parse_drive_unrealized(DeviceState *dev, const char *str, >> + void **ptr, const char *propname, >> + Error **errp) >> +{ >> + do_parse_drive_unrealized(dev, str, ptr, propname, false, errp); >> +} >> + >> +static void parse_drive_unrealized_iothread(DeviceState *dev, const >> char *str, >> + void **ptr, const char >> *propname, >> + Error **errp) >> +{ >> + do_parse_drive_unrealized(dev, str, ptr, propname, true, errp); >> +} >> + >> + >> static void release_drive(Object *obj, const char *name, void *opaque) >> { >> DeviceState *dev = DEVICE(obj); >> @@ -188,13 +251,15 @@ static void get_drive(Object *obj, Visitor *v, >> const char *name, void *opaque, >> static void set_drive(Object *obj, Visitor *v, const char *name, >> void *opaque, >> Error **errp) >> { >> - set_pointer(obj, v, opaque, NULL, parse_drive, name, errp); >> + set_pointer(obj, v, opaque, parse_drive_realized, >> parse_drive_unrealized, >> + name, errp); >> } >> static void set_drive_iothread(Object *obj, Visitor *v, const >> char *name, >> void *opaque, Error **errp) >> { >> - set_pointer(obj, v, opaque, NULL, parse_drive_iothread, name, >> errp); >> + set_pointer(obj, v, opaque, parse_drive_realized_iothread, >> + parse_drive_unrealized_iothread, name, errp); >> } >> const PropertyInfo qdev_prop_drive = {
On 18.11.2019 13:50, Denis Plotnikov wrote: > > > On 10.11.2019 22:08, Denis Plotnikov wrote: >> >> On 10.11.2019 22:03, Denis Plotnikov wrote: >>> This allows to change (replace) the file on a block device and is >>> useful >>> to workaround exclusive file access restrictions, e.g. to implement VM >>> migration with a shared disk stored on some storage with the exclusive >>> file opening model: a destination VM is started waiting for incomming >>> migration with a fake image drive, and later, on the last migration >>> phase, the fake image file is replaced with the real one. >>> >>> Signed-off-by: Denis Plotnikov <dplotnikov@virtuozzo.com> >>> --- >>> hw/core/qdev-properties-system.c | 89 >>> +++++++++++++++++++++++++++----- >>> 1 file changed, 77 insertions(+), 12 deletions(-) >>> >>> diff --git a/hw/core/qdev-properties-system.c >>> b/hw/core/qdev-properties-system.c >>> index c534590dcd..aaab1370a4 100644 >>> --- a/hw/core/qdev-properties-system.c >>> +++ b/hw/core/qdev-properties-system.c >>> @@ -79,8 +79,55 @@ static void set_pointer(Object *obj, Visitor *v, >>> Property *prop, >>> /* --- drive --- */ >>> -static void do_parse_drive(DeviceState *dev, const char *str, >>> void **ptr, >>> - const char *propname, bool iothread, >>> Error **errp) >>> +static void do_parse_drive_realized(DeviceState *dev, const char *str, >>> + void **ptr, const char *propname, >>> + bool iothread, Error **errp) >>> +{ >>> + BlockBackend *blk = *ptr; >>> + BlockDriverState *bs = bdrv_lookup_bs(NULL, str, NULL); >>> + int ret; >>> + bool blk_created = false; >>> + >>> + if (!bs) { >>> + error_setg(errp, "Can't find blockdev '%s'", str); >>> + return; >>> + } >>> + >>> + if (!blk) { >>> + AioContext *ctx = iothread ? bdrv_get_aio_context(bs) : >>> + qemu_get_aio_context(); >>> + blk = blk_new(ctx, BLK_PERM_ALL, BLK_PERM_ALL); >>> + blk_created = true; >> >> Actually, I have concerns about situation where blk=null. >> >> Is there any case when scsi-hd (or others) doesn't have a blk >> assigned and it's legal? >> >>> + } else { >>> + if (blk_bs(blk)) { >>> + blk_remove_bs(blk); >>> + } >>> + } >>> + >>> + ret = blk_insert_bs(blk, bs, errp); >>> + >>> + if (!ret && blk_created) { >>> + if (blk_attach_dev(blk, dev) < 0) { >>> + /* >>> + * Shouldn't be any errors here since we just created >>> + * the new blk because the device doesn't have any. >>> + * Leave the message here in case blk_attach_dev is >>> changed >>> + */ >>> + error_setg(errp, "Can't attach drive '%s' to device >>> '%s'", >>> + str, object_get_typename(OBJECT(dev))); >>> + } else { >>> + *ptr = blk; >>> + } >>> + } > Another problem here, is that the "size" of the device dev may not > match after setting a drive. > So, we should update it after the drive setting. > It was found, that it could be done by calling > BlockDevOps.bdrv_parent_cb_resize. > > But I have some concerns about doing it so. In the case of virtio scsi > disk we have the following callstack > > bdrv_parent_cb_resize calls() -> > scsi_device_report_change(dev, SENSE_CODE(CAPACITY_CHANGED)) -> > virtio_scsi_change -> > virtio_scsi_push_event(s, dev, > VIRTIO_SCSI_T_PARAM_CHANGE, > sense.asc | > (sense.ascq << 8)); > > > virtio_scsi_change pushes the event to the guest to make the guest > ask for size refreshing. > If I'm not mistaken, here we can get a race condition when some > another request is processed with an unchanged > size and then the size changing request is processed. > > I didn't find a better way to update device size so any comments are > welcome. > > Thanks! > > Denis >>> + >>> + if (blk_created) { >>> + blk_unref(blk); >>> + } >>> +} >>> + >>> +static void do_parse_drive_unrealized(DeviceState *dev, const char >>> *str, >>> + void **ptr, const char >>> *propname, >>> + bool iothread, Error **errp) >>> { >>> BlockBackend *blk; >>> bool blk_created = false; >>> @@ -137,18 +184,34 @@ fail: >>> } >>> } >>> -static void parse_drive(DeviceState *dev, const char *str, void >>> **ptr, >>> - const char *propname, Error **errp) >>> -{ >>> - do_parse_drive(dev, str, ptr, propname, false, errp); >>> -} >>> - >>> -static void parse_drive_iothread(DeviceState *dev, const char *str, >>> void **ptr, >>> +static void parse_drive_realized(DeviceState *dev, const char *str, >>> void **ptr, >>> const char *propname, Error **errp) >>> { >>> - do_parse_drive(dev, str, ptr, propname, true, errp); >>> + do_parse_drive_realized(dev, str, ptr, propname, false, errp); >>> } >>> +static void parse_drive_realized_iothread(DeviceState *dev, const >>> char *str, >>> + void **ptr, const char >>> *propname, >>> + Error **errp) >>> +{ >>> + do_parse_drive_realized(dev, str, ptr, propname, true, errp); >>> +} >>> + >>> +static void parse_drive_unrealized(DeviceState *dev, const char *str, >>> + void **ptr, const char *propname, >>> + Error **errp) >>> +{ >>> + do_parse_drive_unrealized(dev, str, ptr, propname, false, errp); >>> +} >>> + >>> +static void parse_drive_unrealized_iothread(DeviceState *dev, const >>> char *str, >>> + void **ptr, const char >>> *propname, >>> + Error **errp) >>> +{ >>> + do_parse_drive_unrealized(dev, str, ptr, propname, true, errp); >>> +} >>> + >>> + >>> static void release_drive(Object *obj, const char *name, void >>> *opaque) >>> { >>> DeviceState *dev = DEVICE(obj); >>> @@ -188,13 +251,15 @@ static void get_drive(Object *obj, Visitor *v, >>> const char *name, void *opaque, >>> static void set_drive(Object *obj, Visitor *v, const char *name, >>> void *opaque, >>> Error **errp) >>> { >>> - set_pointer(obj, v, opaque, NULL, parse_drive, name, errp); >>> + set_pointer(obj, v, opaque, parse_drive_realized, >>> parse_drive_unrealized, >>> + name, errp); >>> } >>> static void set_drive_iothread(Object *obj, Visitor *v, const >>> char *name, >>> void *opaque, Error **errp) >>> { >>> - set_pointer(obj, v, opaque, NULL, parse_drive_iothread, name, >>> errp); >>> + set_pointer(obj, v, opaque, parse_drive_realized_iothread, >>> + parse_drive_unrealized_iothread, name, errp); >>> } >>> const PropertyInfo qdev_prop_drive = { >
Am 18.11.2019 um 11:50 hat Denis Plotnikov geschrieben: > > > On 10.11.2019 22:08, Denis Plotnikov wrote: > > > > On 10.11.2019 22:03, Denis Plotnikov wrote: > >> This allows to change (replace) the file on a block device and is useful > >> to workaround exclusive file access restrictions, e.g. to implement VM > >> migration with a shared disk stored on some storage with the exclusive > >> file opening model: a destination VM is started waiting for incomming > >> migration with a fake image drive, and later, on the last migration > >> phase, the fake image file is replaced with the real one. > >> > >> Signed-off-by: Denis Plotnikov <dplotnikov@virtuozzo.com> > >> --- > >> hw/core/qdev-properties-system.c | 89 +++++++++++++++++++++++++++----- > >> 1 file changed, 77 insertions(+), 12 deletions(-) > >> > >> diff --git a/hw/core/qdev-properties-system.c > >> b/hw/core/qdev-properties-system.c > >> index c534590dcd..aaab1370a4 100644 > >> --- a/hw/core/qdev-properties-system.c > >> +++ b/hw/core/qdev-properties-system.c > >> @@ -79,8 +79,55 @@ static void set_pointer(Object *obj, Visitor *v, > >> Property *prop, > >> /* --- drive --- */ > >> -static void do_parse_drive(DeviceState *dev, const char *str, void > >> **ptr, > >> - const char *propname, bool iothread, > >> Error **errp) > >> +static void do_parse_drive_realized(DeviceState *dev, const char *str, > >> + void **ptr, const char *propname, > >> + bool iothread, Error **errp) > >> +{ > >> + BlockBackend *blk = *ptr; > >> + BlockDriverState *bs = bdrv_lookup_bs(NULL, str, NULL); > >> + int ret; > >> + bool blk_created = false; > >> + > >> + if (!bs) { > >> + error_setg(errp, "Can't find blockdev '%s'", str); > >> + return; > >> + } > >> + > >> + if (!blk) { > >> + AioContext *ctx = iothread ? bdrv_get_aio_context(bs) : > >> + qemu_get_aio_context(); > >> + blk = blk_new(ctx, BLK_PERM_ALL, BLK_PERM_ALL); > >> + blk_created = true; > > > > Actually, I have concerns about situation where blk=null. > > > > Is there any case when scsi-hd (or others) doesn't have a blk assigned > > and it's legal? No, block devices will always have a BlockBackend, even if it doesn't have a root node inserted. > >> + } else { > >> + if (blk_bs(blk)) { > >> + blk_remove_bs(blk); > >> + } > >> + } > >> + > >> + ret = blk_insert_bs(blk, bs, errp); > >> + > >> + if (!ret && blk_created) { > >> + if (blk_attach_dev(blk, dev) < 0) { > >> + /* > >> + * Shouldn't be any errors here since we just created > >> + * the new blk because the device doesn't have any. > >> + * Leave the message here in case blk_attach_dev is changed > >> + */ > >> + error_setg(errp, "Can't attach drive '%s' to device '%s'", > >> + str, object_get_typename(OBJECT(dev))); > >> + } else { > >> + *ptr = blk; > >> + } > >> + } > Another problem here, is that the "size" of the device dev may not match > after setting a drive. > So, we should update it after the drive setting. > It was found, that it could be done by calling > BlockDevOps.bdrv_parent_cb_resize. > > But I have some concerns about doing it so. In the case of virtio scsi > disk we have the following callstack > > bdrv_parent_cb_resize calls() -> > scsi_device_report_change(dev, SENSE_CODE(CAPACITY_CHANGED)) -> > virtio_scsi_change -> > virtio_scsi_push_event(s, dev, VIRTIO_SCSI_T_PARAM_CHANGE, > sense.asc | > (sense.ascq << 8)); I think the safest option for now (and which should solve the case you want to address) is checking whether old and new size match and returning an error otherwise. > virtio_scsi_change pushes the event to the guest to make the guest > ask for size refreshing. If I'm not mistaken, here we can get a race > condition when some another request is processed with an unchanged > size and then the size changing request is processed. I think this is actually a problem even without resizing: We need to quiesce the device between removing the old root and inserting the new one. They way to achieve this is probably by splitting blk_drain() into a blk_drain_begin()/end() and then draining the BlockBackend here while we're working on it. Kevin
On 13.12.2019 13:32, Kevin Wolf wrote: > Am 18.11.2019 um 11:50 hat Denis Plotnikov geschrieben: >> >> On 10.11.2019 22:08, Denis Plotnikov wrote: >>> On 10.11.2019 22:03, Denis Plotnikov wrote: >>>> This allows to change (replace) the file on a block device and is useful >>>> to workaround exclusive file access restrictions, e.g. to implement VM >>>> migration with a shared disk stored on some storage with the exclusive >>>> file opening model: a destination VM is started waiting for incomming >>>> migration with a fake image drive, and later, on the last migration >>>> phase, the fake image file is replaced with the real one. >>>> >>>> Signed-off-by: Denis Plotnikov <dplotnikov@virtuozzo.com> >>>> --- >>>> hw/core/qdev-properties-system.c | 89 +++++++++++++++++++++++++++----- >>>> 1 file changed, 77 insertions(+), 12 deletions(-) >>>> >>>> diff --git a/hw/core/qdev-properties-system.c >>>> b/hw/core/qdev-properties-system.c >>>> index c534590dcd..aaab1370a4 100644 >>>> --- a/hw/core/qdev-properties-system.c >>>> +++ b/hw/core/qdev-properties-system.c >>>> @@ -79,8 +79,55 @@ static void set_pointer(Object *obj, Visitor *v, >>>> Property *prop, >>>> /* --- drive --- */ >>>> -static void do_parse_drive(DeviceState *dev, const char *str, void >>>> **ptr, >>>> - const char *propname, bool iothread, >>>> Error **errp) >>>> +static void do_parse_drive_realized(DeviceState *dev, const char *str, >>>> + void **ptr, const char *propname, >>>> + bool iothread, Error **errp) >>>> +{ >>>> + BlockBackend *blk = *ptr; >>>> + BlockDriverState *bs = bdrv_lookup_bs(NULL, str, NULL); >>>> + int ret; >>>> + bool blk_created = false; >>>> + >>>> + if (!bs) { >>>> + error_setg(errp, "Can't find blockdev '%s'", str); >>>> + return; >>>> + } >>>> + >>>> + if (!blk) { >>>> + AioContext *ctx = iothread ? bdrv_get_aio_context(bs) : >>>> + qemu_get_aio_context(); >>>> + blk = blk_new(ctx, BLK_PERM_ALL, BLK_PERM_ALL); >>>> + blk_created = true; >>> Actually, I have concerns about situation where blk=null. >>> >>> Is there any case when scsi-hd (or others) doesn't have a blk assigned >>> and it's legal? > No, block devices will always have a BlockBackend, even if it doesn't > have a root node inserted. > >>>> + } else { >>>> + if (blk_bs(blk)) { >>>> + blk_remove_bs(blk); >>>> + } >>>> + } >>>> + >>>> + ret = blk_insert_bs(blk, bs, errp); >>>> + >>>> + if (!ret && blk_created) { >>>> + if (blk_attach_dev(blk, dev) < 0) { >>>> + /* >>>> + * Shouldn't be any errors here since we just created >>>> + * the new blk because the device doesn't have any. >>>> + * Leave the message here in case blk_attach_dev is changed >>>> + */ >>>> + error_setg(errp, "Can't attach drive '%s' to device '%s'", >>>> + str, object_get_typename(OBJECT(dev))); >>>> + } else { >>>> + *ptr = blk; >>>> + } >>>> + } >> Another problem here, is that the "size" of the device dev may not match >> after setting a drive. >> So, we should update it after the drive setting. >> It was found, that it could be done by calling >> BlockDevOps.bdrv_parent_cb_resize. >> >> But I have some concerns about doing it so. In the case of virtio scsi >> disk we have the following callstack >> >> bdrv_parent_cb_resize calls() -> >> scsi_device_report_change(dev, SENSE_CODE(CAPACITY_CHANGED)) -> >> virtio_scsi_change -> >> virtio_scsi_push_event(s, dev, VIRTIO_SCSI_T_PARAM_CHANGE, >> sense.asc | >> (sense.ascq << 8)); > I think the safest option for now (and which should solve the case you > want to address) is checking whether old and new size match and > returning an error otherwise. > >> virtio_scsi_change pushes the event to the guest to make the guest >> ask for size refreshing. If I'm not mistaken, here we can get a race >> condition when some another request is processed with an unchanged >> size and then the size changing request is processed. > I think this is actually a problem even without resizing: We need to > quiesce the device between removing the old root and inserting the new > one. They way to achieve this is probably by splitting blk_drain() into > a blk_drain_begin()/end() and then draining the BlockBackend here while > we're working on it. > > Kevin Why don't we use bdrv_drained_begin/end directly? This is what blk_drain does. If we want to split blk_drain we must keep track if blk's brdv isn't change otherwise we can end up with drain_begin one and drain end another bdrv if we do remove/insert in between. Another thing is should we really care about this if we have VM stopped and the sizes matched? Denis >
Am 16.12.2019 um 15:51 hat Denis Plotnikov geschrieben: > On 13.12.2019 13:32, Kevin Wolf wrote: > > Am 18.11.2019 um 11:50 hat Denis Plotnikov geschrieben: > >> Another problem here, is that the "size" of the device dev may not match > >> after setting a drive. > >> So, we should update it after the drive setting. > >> It was found, that it could be done by calling > >> BlockDevOps.bdrv_parent_cb_resize. > >> > >> But I have some concerns about doing it so. In the case of virtio scsi > >> disk we have the following callstack > >> > >> bdrv_parent_cb_resize calls() -> > >> scsi_device_report_change(dev, SENSE_CODE(CAPACITY_CHANGED)) -> > >> virtio_scsi_change -> > >> virtio_scsi_push_event(s, dev, VIRTIO_SCSI_T_PARAM_CHANGE, > >> sense.asc | > >> (sense.ascq << 8)); > > I think the safest option for now (and which should solve the case you > > want to address) is checking whether old and new size match and > > returning an error otherwise. > > > >> virtio_scsi_change pushes the event to the guest to make the guest > >> ask for size refreshing. If I'm not mistaken, here we can get a race > >> condition when some another request is processed with an unchanged > >> size and then the size changing request is processed. > > I think this is actually a problem even without resizing: We need to > > quiesce the device between removing the old root and inserting the new > > one. They way to achieve this is probably by splitting blk_drain() into > > a blk_drain_begin()/end() and then draining the BlockBackend here while > > we're working on it. > > > > Kevin > Why don't we use bdrv_drained_begin/end directly? This is what > blk_drain does. > If we want to split blk_drain we must keep track if blk's brdv isn't > change otherwise we can end up with drain_begin one and drain end > another bdrv if we do remove/insert in between. Hmm, true, we would have to keep track of draining at the BlockBackend level and consider it in blk_remove_bs() and blk_insert_bs(). Maybe that's not worth it. If we use bdrv_drained_begin/end directly, I think we need to drain both the old and the new root node during the process. > Another thing is should we really care about this if we have VM > stopped and the sizes matched? How do we know that the VM is stopped? And why would we require this? Your patch doesn't implement or at least check this, and it seems a bit impractical for example when all you want is inserting a filter node. Kevin
On 16.12.2019 18:38, Kevin Wolf wrote: > Am 16.12.2019 um 15:51 hat Denis Plotnikov geschrieben: >> On 13.12.2019 13:32, Kevin Wolf wrote: >>> Am 18.11.2019 um 11:50 hat Denis Plotnikov geschrieben: >>>> Another problem here, is that the "size" of the device dev may not match >>>> after setting a drive. >>>> So, we should update it after the drive setting. >>>> It was found, that it could be done by calling >>>> BlockDevOps.bdrv_parent_cb_resize. >>>> >>>> But I have some concerns about doing it so. In the case of virtio scsi >>>> disk we have the following callstack >>>> >>>> bdrv_parent_cb_resize calls() -> >>>> scsi_device_report_change(dev, SENSE_CODE(CAPACITY_CHANGED)) -> >>>> virtio_scsi_change -> >>>> virtio_scsi_push_event(s, dev, VIRTIO_SCSI_T_PARAM_CHANGE, >>>> sense.asc | >>>> (sense.ascq << 8)); >>> I think the safest option for now (and which should solve the case you >>> want to address) is checking whether old and new size match and >>> returning an error otherwise. >>> >>>> virtio_scsi_change pushes the event to the guest to make the guest >>>> ask for size refreshing. If I'm not mistaken, here we can get a race >>>> condition when some another request is processed with an unchanged >>>> size and then the size changing request is processed. >>> I think this is actually a problem even without resizing: We need to >>> quiesce the device between removing the old root and inserting the new >>> one. They way to achieve this is probably by splitting blk_drain() into >>> a blk_drain_begin()/end() and then draining the BlockBackend here while >>> we're working on it. >>> >>> Kevin >> Why don't we use bdrv_drained_begin/end directly? This is what >> blk_drain does. >> If we want to split blk_drain we must keep track if blk's brdv isn't >> change otherwise we can end up with drain_begin one and drain end >> another bdrv if we do remove/insert in between. > Hmm, true, we would have to keep track of draining at the BlockBackend > level and consider it in blk_remove_bs() and blk_insert_bs(). Maybe > that's not worth it. > > If we use bdrv_drained_begin/end directly, I think we need to drain both > the old and the new root node during the process. > >> Another thing is should we really care about this if we have VM >> stopped and the sizes matched? > How do we know that the VM is stopped? And why would we require this? I implied the scenario of VM migration over a shared storage with an exclusive file access model. The VM is stopped on drive changing phase. If there is no use to require it, than ok. Denis > Your patch doesn't implement or at least check this, and it seems a bit > impractical for example when all you want is inserting a filter node. > > Kevin
diff --git a/hw/core/qdev-properties-system.c b/hw/core/qdev-properties-system.c index c534590dcd..aaab1370a4 100644 --- a/hw/core/qdev-properties-system.c +++ b/hw/core/qdev-properties-system.c @@ -79,8 +79,55 @@ static void set_pointer(Object *obj, Visitor *v, Property *prop, /* --- drive --- */ -static void do_parse_drive(DeviceState *dev, const char *str, void **ptr, - const char *propname, bool iothread, Error **errp) +static void do_parse_drive_realized(DeviceState *dev, const char *str, + void **ptr, const char *propname, + bool iothread, Error **errp) +{ + BlockBackend *blk = *ptr; + BlockDriverState *bs = bdrv_lookup_bs(NULL, str, NULL); + int ret; + bool blk_created = false; + + if (!bs) { + error_setg(errp, "Can't find blockdev '%s'", str); + return; + } + + if (!blk) { + AioContext *ctx = iothread ? bdrv_get_aio_context(bs) : + qemu_get_aio_context(); + blk = blk_new(ctx, BLK_PERM_ALL, BLK_PERM_ALL); + blk_created = true; + } else { + if (blk_bs(blk)) { + blk_remove_bs(blk); + } + } + + ret = blk_insert_bs(blk, bs, errp); + + if (!ret && blk_created) { + if (blk_attach_dev(blk, dev) < 0) { + /* + * Shouldn't be any errors here since we just created + * the new blk because the device doesn't have any. + * Leave the message here in case blk_attach_dev is changed + */ + error_setg(errp, "Can't attach drive '%s' to device '%s'", + str, object_get_typename(OBJECT(dev))); + } else { + *ptr = blk; + } + } + + if (blk_created) { + blk_unref(blk); + } +} + +static void do_parse_drive_unrealized(DeviceState *dev, const char *str, + void **ptr, const char *propname, + bool iothread, Error **errp) { BlockBackend *blk; bool blk_created = false; @@ -137,18 +184,34 @@ fail: } } -static void parse_drive(DeviceState *dev, const char *str, void **ptr, - const char *propname, Error **errp) -{ - do_parse_drive(dev, str, ptr, propname, false, errp); -} - -static void parse_drive_iothread(DeviceState *dev, const char *str, void **ptr, +static void parse_drive_realized(DeviceState *dev, const char *str, void **ptr, const char *propname, Error **errp) { - do_parse_drive(dev, str, ptr, propname, true, errp); + do_parse_drive_realized(dev, str, ptr, propname, false, errp); } +static void parse_drive_realized_iothread(DeviceState *dev, const char *str, + void **ptr, const char *propname, + Error **errp) +{ + do_parse_drive_realized(dev, str, ptr, propname, true, errp); +} + +static void parse_drive_unrealized(DeviceState *dev, const char *str, + void **ptr, const char *propname, + Error **errp) +{ + do_parse_drive_unrealized(dev, str, ptr, propname, false, errp); +} + +static void parse_drive_unrealized_iothread(DeviceState *dev, const char *str, + void **ptr, const char *propname, + Error **errp) +{ + do_parse_drive_unrealized(dev, str, ptr, propname, true, errp); +} + + static void release_drive(Object *obj, const char *name, void *opaque) { DeviceState *dev = DEVICE(obj); @@ -188,13 +251,15 @@ static void get_drive(Object *obj, Visitor *v, const char *name, void *opaque, static void set_drive(Object *obj, Visitor *v, const char *name, void *opaque, Error **errp) { - set_pointer(obj, v, opaque, NULL, parse_drive, name, errp); + set_pointer(obj, v, opaque, parse_drive_realized, parse_drive_unrealized, + name, errp); } static void set_drive_iothread(Object *obj, Visitor *v, const char *name, void *opaque, Error **errp) { - set_pointer(obj, v, opaque, NULL, parse_drive_iothread, name, errp); + set_pointer(obj, v, opaque, parse_drive_realized_iothread, + parse_drive_unrealized_iothread, name, errp); } const PropertyInfo qdev_prop_drive = {
This allows to change (replace) the file on a block device and is useful to workaround exclusive file access restrictions, e.g. to implement VM migration with a shared disk stored on some storage with the exclusive file opening model: a destination VM is started waiting for incomming migration with a fake image drive, and later, on the last migration phase, the fake image file is replaced with the real one. Signed-off-by: Denis Plotnikov <dplotnikov@virtuozzo.com> --- hw/core/qdev-properties-system.c | 89 +++++++++++++++++++++++++++----- 1 file changed, 77 insertions(+), 12 deletions(-)