diff mbox series

[v0,2/2] block: allow to set 'drive' property on a realized block device

Message ID 20191110190310.19799-3-dplotnikov@virtuozzo.com
State New
Headers show
Series allow to set 'drive' property on a realized block device | expand

Commit Message

Denis Plotnikov Nov. 10, 2019, 7:03 p.m. UTC
This allows to change (replace) the file on a block device and is useful
to workaround exclusive file access restrictions, e.g. to implement VM
migration with a shared disk stored on some storage with the exclusive
file opening model: a destination VM is started waiting for incomming
migration with a fake image drive, and later, on the last migration
phase, the fake image file is replaced with the real one.

Signed-off-by: Denis Plotnikov <dplotnikov@virtuozzo.com>
---
 hw/core/qdev-properties-system.c | 89 +++++++++++++++++++++++++++-----
 1 file changed, 77 insertions(+), 12 deletions(-)

Comments

Denis Plotnikov Nov. 10, 2019, 7:08 p.m. UTC | #1
On 10.11.2019 22:03, Denis Plotnikov wrote:
> This allows to change (replace) the file on a block device and is useful
> to workaround exclusive file access restrictions, e.g. to implement VM
> migration with a shared disk stored on some storage with the exclusive
> file opening model: a destination VM is started waiting for incomming
> migration with a fake image drive, and later, on the last migration
> phase, the fake image file is replaced with the real one.
>
> Signed-off-by: Denis Plotnikov <dplotnikov@virtuozzo.com>
> ---
>   hw/core/qdev-properties-system.c | 89 +++++++++++++++++++++++++++-----
>   1 file changed, 77 insertions(+), 12 deletions(-)
>
> diff --git a/hw/core/qdev-properties-system.c b/hw/core/qdev-properties-system.c
> index c534590dcd..aaab1370a4 100644
> --- a/hw/core/qdev-properties-system.c
> +++ b/hw/core/qdev-properties-system.c
> @@ -79,8 +79,55 @@ static void set_pointer(Object *obj, Visitor *v, Property *prop,
>   
>   /* --- drive --- */
>   
> -static void do_parse_drive(DeviceState *dev, const char *str, void **ptr,
> -                           const char *propname, bool iothread, Error **errp)
> +static void do_parse_drive_realized(DeviceState *dev, const char *str,
> +                                    void **ptr, const char *propname,
> +                                    bool iothread, Error **errp)
> +{
> +    BlockBackend *blk = *ptr;
> +    BlockDriverState *bs = bdrv_lookup_bs(NULL, str, NULL);
> +    int ret;
> +    bool blk_created = false;
> +
> +    if (!bs) {
> +        error_setg(errp, "Can't find blockdev '%s'", str);
> +        return;
> +    }
> +
> +    if (!blk) {
> +        AioContext *ctx = iothread ? bdrv_get_aio_context(bs) :
> +                                     qemu_get_aio_context();
> +        blk = blk_new(ctx, BLK_PERM_ALL, BLK_PERM_ALL);
> +        blk_created = true;

Actually, I have concerns about situation where blk=null.

Is there any case when scsi-hd (or others) doesn't have a blk assigned 
and it's legal?

> +    } else {
> +        if (blk_bs(blk)) {
> +            blk_remove_bs(blk);
> +        }
> +    }
> +
> +    ret = blk_insert_bs(blk, bs, errp);
> +
> +    if (!ret && blk_created) {
> +        if (blk_attach_dev(blk, dev) < 0) {
> +            /*
> +             * Shouldn't be any errors here since we just created
> +             * the new blk because the device doesn't have any.
> +             * Leave the message here in case blk_attach_dev is changed
> +             */
> +             error_setg(errp, "Can't attach drive '%s' to device '%s'",
> +                        str, object_get_typename(OBJECT(dev)));
> +        } else {
> +            *ptr = blk;
> +        }
> +    }
> +
> +    if (blk_created) {
> +        blk_unref(blk);
> +    }
> +}
> +
> +static void do_parse_drive_unrealized(DeviceState *dev, const char *str,
> +                                      void **ptr, const char *propname,
> +                                      bool iothread, Error **errp)
>   {
>       BlockBackend *blk;
>       bool blk_created = false;
> @@ -137,18 +184,34 @@ fail:
>       }
>   }
>   
> -static void parse_drive(DeviceState *dev, const char *str, void **ptr,
> -                        const char *propname, Error **errp)
> -{
> -    do_parse_drive(dev, str, ptr, propname, false, errp);
> -}
> -
> -static void parse_drive_iothread(DeviceState *dev, const char *str, void **ptr,
> +static void parse_drive_realized(DeviceState *dev, const char *str, void **ptr,
>                                    const char *propname, Error **errp)
>   {
> -    do_parse_drive(dev, str, ptr, propname, true, errp);
> +    do_parse_drive_realized(dev, str, ptr, propname, false, errp);
>   }
>   
> +static void parse_drive_realized_iothread(DeviceState *dev, const char *str,
> +                                          void **ptr, const char *propname,
> +                                          Error **errp)
> +{
> +    do_parse_drive_realized(dev, str, ptr, propname, true, errp);
> +}
> +
> +static void parse_drive_unrealized(DeviceState *dev, const char *str,
> +                                   void **ptr, const char *propname,
> +                                   Error **errp)
> +{
> +    do_parse_drive_unrealized(dev, str, ptr, propname, false, errp);
> +}
> +
> +static void parse_drive_unrealized_iothread(DeviceState *dev, const char *str,
> +                                            void **ptr, const char *propname,
> +                                            Error **errp)
> +{
> +    do_parse_drive_unrealized(dev, str, ptr, propname, true, errp);
> +}
> +
> +
>   static void release_drive(Object *obj, const char *name, void *opaque)
>   {
>       DeviceState *dev = DEVICE(obj);
> @@ -188,13 +251,15 @@ static void get_drive(Object *obj, Visitor *v, const char *name, void *opaque,
>   static void set_drive(Object *obj, Visitor *v, const char *name, void *opaque,
>                         Error **errp)
>   {
> -    set_pointer(obj, v, opaque, NULL, parse_drive, name, errp);
> +    set_pointer(obj, v, opaque, parse_drive_realized, parse_drive_unrealized,
> +                name, errp);
>   }
>   
>   static void set_drive_iothread(Object *obj, Visitor *v, const char *name,
>                                  void *opaque, Error **errp)
>   {
> -    set_pointer(obj, v, opaque, NULL, parse_drive_iothread, name, errp);
> +    set_pointer(obj, v, opaque, parse_drive_realized_iothread,
> +                parse_drive_unrealized_iothread, name, errp);
>   }
>   
>   const PropertyInfo qdev_prop_drive = {
Denis Plotnikov Nov. 18, 2019, 10:50 a.m. UTC | #2
On 10.11.2019 22:08, Denis Plotnikov wrote:
>
> On 10.11.2019 22:03, Denis Plotnikov wrote:
>> This allows to change (replace) the file on a block device and is useful
>> to workaround exclusive file access restrictions, e.g. to implement VM
>> migration with a shared disk stored on some storage with the exclusive
>> file opening model: a destination VM is started waiting for incomming
>> migration with a fake image drive, and later, on the last migration
>> phase, the fake image file is replaced with the real one.
>>
>> Signed-off-by: Denis Plotnikov <dplotnikov@virtuozzo.com>
>> ---
>>   hw/core/qdev-properties-system.c | 89 +++++++++++++++++++++++++++-----
>>   1 file changed, 77 insertions(+), 12 deletions(-)
>>
>> diff --git a/hw/core/qdev-properties-system.c 
>> b/hw/core/qdev-properties-system.c
>> index c534590dcd..aaab1370a4 100644
>> --- a/hw/core/qdev-properties-system.c
>> +++ b/hw/core/qdev-properties-system.c
>> @@ -79,8 +79,55 @@ static void set_pointer(Object *obj, Visitor *v, 
>> Property *prop,
>>     /* --- drive --- */
>>   -static void do_parse_drive(DeviceState *dev, const char *str, void 
>> **ptr,
>> -                           const char *propname, bool iothread, 
>> Error **errp)
>> +static void do_parse_drive_realized(DeviceState *dev, const char *str,
>> +                                    void **ptr, const char *propname,
>> +                                    bool iothread, Error **errp)
>> +{
>> +    BlockBackend *blk = *ptr;
>> +    BlockDriverState *bs = bdrv_lookup_bs(NULL, str, NULL);
>> +    int ret;
>> +    bool blk_created = false;
>> +
>> +    if (!bs) {
>> +        error_setg(errp, "Can't find blockdev '%s'", str);
>> +        return;
>> +    }
>> +
>> +    if (!blk) {
>> +        AioContext *ctx = iothread ? bdrv_get_aio_context(bs) :
>> +                                     qemu_get_aio_context();
>> +        blk = blk_new(ctx, BLK_PERM_ALL, BLK_PERM_ALL);
>> +        blk_created = true;
>
> Actually, I have concerns about situation where blk=null.
>
> Is there any case when scsi-hd (or others) doesn't have a blk assigned 
> and it's legal?
>
>> +    } else {
>> +        if (blk_bs(blk)) {
>> +            blk_remove_bs(blk);
>> +        }
>> +    }
>> +
>> +    ret = blk_insert_bs(blk, bs, errp);
>> +
>> +    if (!ret && blk_created) {
>> +        if (blk_attach_dev(blk, dev) < 0) {
>> +            /*
>> +             * Shouldn't be any errors here since we just created
>> +             * the new blk because the device doesn't have any.
>> +             * Leave the message here in case blk_attach_dev is changed
>> +             */
>> +             error_setg(errp, "Can't attach drive '%s' to device '%s'",
>> +                        str, object_get_typename(OBJECT(dev)));
>> +        } else {
>> +            *ptr = blk;
>> +        }
>> +    }
Another problem here, is that the "size" of the device dev may not match 
after setting a drive.
So, we should update it after the drive setting.
It was found, that it could be done by calling 
BlockDevOps.bdrv_parent_cb_resize.

But I have some concerns about doing it so. In the case of virtio scsi 
disk we have the following callstack

     bdrv_parent_cb_resize calls() ->
         scsi_device_report_change(dev, SENSE_CODE(CAPACITY_CHANGED)) ->
             virtio_scsi_change ->
                 virtio_scsi_push_event(s, dev, VIRTIO_SCSI_T_PARAM_CHANGE,
                                                     sense.asc | 
(sense.ascq << 8));


virtio_scsi_change  pushes the event to the guest to make the guest ask 
for size refreshing.
If I'm not mistaken, here we can get a race condition when some another 
request is processed with an unchanged
size and then the size changing request is processed.

I didn't find a better way to update device size so any comments are 
welcome.

Thanks!

Denis
>> +
>> +    if (blk_created) {
>> +        blk_unref(blk);
>> +    }
>> +}
>> +
>> +static void do_parse_drive_unrealized(DeviceState *dev, const char 
>> *str,
>> +                                      void **ptr, const char *propname,
>> +                                      bool iothread, Error **errp)
>>   {
>>       BlockBackend *blk;
>>       bool blk_created = false;
>> @@ -137,18 +184,34 @@ fail:
>>       }
>>   }
>>   -static void parse_drive(DeviceState *dev, const char *str, void 
>> **ptr,
>> -                        const char *propname, Error **errp)
>> -{
>> -    do_parse_drive(dev, str, ptr, propname, false, errp);
>> -}
>> -
>> -static void parse_drive_iothread(DeviceState *dev, const char *str, 
>> void **ptr,
>> +static void parse_drive_realized(DeviceState *dev, const char *str, 
>> void **ptr,
>>                                    const char *propname, Error **errp)
>>   {
>> -    do_parse_drive(dev, str, ptr, propname, true, errp);
>> +    do_parse_drive_realized(dev, str, ptr, propname, false, errp);
>>   }
>>   +static void parse_drive_realized_iothread(DeviceState *dev, const 
>> char *str,
>> +                                          void **ptr, const char 
>> *propname,
>> +                                          Error **errp)
>> +{
>> +    do_parse_drive_realized(dev, str, ptr, propname, true, errp);
>> +}
>> +
>> +static void parse_drive_unrealized(DeviceState *dev, const char *str,
>> +                                   void **ptr, const char *propname,
>> +                                   Error **errp)
>> +{
>> +    do_parse_drive_unrealized(dev, str, ptr, propname, false, errp);
>> +}
>> +
>> +static void parse_drive_unrealized_iothread(DeviceState *dev, const 
>> char *str,
>> +                                            void **ptr, const char 
>> *propname,
>> +                                            Error **errp)
>> +{
>> +    do_parse_drive_unrealized(dev, str, ptr, propname, true, errp);
>> +}
>> +
>> +
>>   static void release_drive(Object *obj, const char *name, void *opaque)
>>   {
>>       DeviceState *dev = DEVICE(obj);
>> @@ -188,13 +251,15 @@ static void get_drive(Object *obj, Visitor *v, 
>> const char *name, void *opaque,
>>   static void set_drive(Object *obj, Visitor *v, const char *name, 
>> void *opaque,
>>                         Error **errp)
>>   {
>> -    set_pointer(obj, v, opaque, NULL, parse_drive, name, errp);
>> +    set_pointer(obj, v, opaque, parse_drive_realized, 
>> parse_drive_unrealized,
>> +                name, errp);
>>   }
>>     static void set_drive_iothread(Object *obj, Visitor *v, const 
>> char *name,
>>                                  void *opaque, Error **errp)
>>   {
>> -    set_pointer(obj, v, opaque, NULL, parse_drive_iothread, name, 
>> errp);
>> +    set_pointer(obj, v, opaque, parse_drive_realized_iothread,
>> +                parse_drive_unrealized_iothread, name, errp);
>>   }
>>     const PropertyInfo qdev_prop_drive = {
Denis Plotnikov Dec. 13, 2019, 7:30 a.m. UTC | #3
On 18.11.2019 13:50, Denis Plotnikov wrote:
>
>
> On 10.11.2019 22:08, Denis Plotnikov wrote:
>>
>> On 10.11.2019 22:03, Denis Plotnikov wrote:
>>> This allows to change (replace) the file on a block device and is 
>>> useful
>>> to workaround exclusive file access restrictions, e.g. to implement VM
>>> migration with a shared disk stored on some storage with the exclusive
>>> file opening model: a destination VM is started waiting for incomming
>>> migration with a fake image drive, and later, on the last migration
>>> phase, the fake image file is replaced with the real one.
>>>
>>> Signed-off-by: Denis Plotnikov <dplotnikov@virtuozzo.com>
>>> ---
>>>   hw/core/qdev-properties-system.c | 89 
>>> +++++++++++++++++++++++++++-----
>>>   1 file changed, 77 insertions(+), 12 deletions(-)
>>>
>>> diff --git a/hw/core/qdev-properties-system.c 
>>> b/hw/core/qdev-properties-system.c
>>> index c534590dcd..aaab1370a4 100644
>>> --- a/hw/core/qdev-properties-system.c
>>> +++ b/hw/core/qdev-properties-system.c
>>> @@ -79,8 +79,55 @@ static void set_pointer(Object *obj, Visitor *v, 
>>> Property *prop,
>>>     /* --- drive --- */
>>>   -static void do_parse_drive(DeviceState *dev, const char *str, 
>>> void **ptr,
>>> -                           const char *propname, bool iothread, 
>>> Error **errp)
>>> +static void do_parse_drive_realized(DeviceState *dev, const char *str,
>>> +                                    void **ptr, const char *propname,
>>> +                                    bool iothread, Error **errp)
>>> +{
>>> +    BlockBackend *blk = *ptr;
>>> +    BlockDriverState *bs = bdrv_lookup_bs(NULL, str, NULL);
>>> +    int ret;
>>> +    bool blk_created = false;
>>> +
>>> +    if (!bs) {
>>> +        error_setg(errp, "Can't find blockdev '%s'", str);
>>> +        return;
>>> +    }
>>> +
>>> +    if (!blk) {
>>> +        AioContext *ctx = iothread ? bdrv_get_aio_context(bs) :
>>> +                                     qemu_get_aio_context();
>>> +        blk = blk_new(ctx, BLK_PERM_ALL, BLK_PERM_ALL);
>>> +        blk_created = true;
>>
>> Actually, I have concerns about situation where blk=null.
>>
>> Is there any case when scsi-hd (or others) doesn't have a blk 
>> assigned and it's legal?
>>
>>> +    } else {
>>> +        if (blk_bs(blk)) {
>>> +            blk_remove_bs(blk);
>>> +        }
>>> +    }
>>> +
>>> +    ret = blk_insert_bs(blk, bs, errp);
>>> +
>>> +    if (!ret && blk_created) {
>>> +        if (blk_attach_dev(blk, dev) < 0) {
>>> +            /*
>>> +             * Shouldn't be any errors here since we just created
>>> +             * the new blk because the device doesn't have any.
>>> +             * Leave the message here in case blk_attach_dev is 
>>> changed
>>> +             */
>>> +             error_setg(errp, "Can't attach drive '%s' to device 
>>> '%s'",
>>> +                        str, object_get_typename(OBJECT(dev)));
>>> +        } else {
>>> +            *ptr = blk;
>>> +        }
>>> +    }
> Another problem here, is that the "size" of the device dev may not 
> match after setting a drive.
> So, we should update it after the drive setting.
> It was found, that it could be done by calling 
> BlockDevOps.bdrv_parent_cb_resize.
>
> But I have some concerns about doing it so. In the case of virtio scsi 
> disk we have the following callstack
>
>     bdrv_parent_cb_resize calls() ->
>         scsi_device_report_change(dev, SENSE_CODE(CAPACITY_CHANGED)) ->
>             virtio_scsi_change ->
>                 virtio_scsi_push_event(s, dev, 
> VIRTIO_SCSI_T_PARAM_CHANGE,
>                                                     sense.asc | 
> (sense.ascq << 8));
>
>
> virtio_scsi_change  pushes the event to the guest to make the guest 
> ask for size refreshing.
> If I'm not mistaken, here we can get a race condition when some 
> another request is processed with an unchanged
> size and then the size changing request is processed.
>
> I didn't find a better way to update device size so any comments are 
> welcome.
>
> Thanks!
>
> Denis
>>> +
>>> +    if (blk_created) {
>>> +        blk_unref(blk);
>>> +    }
>>> +}
>>> +
>>> +static void do_parse_drive_unrealized(DeviceState *dev, const char 
>>> *str,
>>> +                                      void **ptr, const char 
>>> *propname,
>>> +                                      bool iothread, Error **errp)
>>>   {
>>>       BlockBackend *blk;
>>>       bool blk_created = false;
>>> @@ -137,18 +184,34 @@ fail:
>>>       }
>>>   }
>>>   -static void parse_drive(DeviceState *dev, const char *str, void 
>>> **ptr,
>>> -                        const char *propname, Error **errp)
>>> -{
>>> -    do_parse_drive(dev, str, ptr, propname, false, errp);
>>> -}
>>> -
>>> -static void parse_drive_iothread(DeviceState *dev, const char *str, 
>>> void **ptr,
>>> +static void parse_drive_realized(DeviceState *dev, const char *str, 
>>> void **ptr,
>>>                                    const char *propname, Error **errp)
>>>   {
>>> -    do_parse_drive(dev, str, ptr, propname, true, errp);
>>> +    do_parse_drive_realized(dev, str, ptr, propname, false, errp);
>>>   }
>>>   +static void parse_drive_realized_iothread(DeviceState *dev, const 
>>> char *str,
>>> +                                          void **ptr, const char 
>>> *propname,
>>> +                                          Error **errp)
>>> +{
>>> +    do_parse_drive_realized(dev, str, ptr, propname, true, errp);
>>> +}
>>> +
>>> +static void parse_drive_unrealized(DeviceState *dev, const char *str,
>>> +                                   void **ptr, const char *propname,
>>> +                                   Error **errp)
>>> +{
>>> +    do_parse_drive_unrealized(dev, str, ptr, propname, false, errp);
>>> +}
>>> +
>>> +static void parse_drive_unrealized_iothread(DeviceState *dev, const 
>>> char *str,
>>> +                                            void **ptr, const char 
>>> *propname,
>>> +                                            Error **errp)
>>> +{
>>> +    do_parse_drive_unrealized(dev, str, ptr, propname, true, errp);
>>> +}
>>> +
>>> +
>>>   static void release_drive(Object *obj, const char *name, void 
>>> *opaque)
>>>   {
>>>       DeviceState *dev = DEVICE(obj);
>>> @@ -188,13 +251,15 @@ static void get_drive(Object *obj, Visitor *v, 
>>> const char *name, void *opaque,
>>>   static void set_drive(Object *obj, Visitor *v, const char *name, 
>>> void *opaque,
>>>                         Error **errp)
>>>   {
>>> -    set_pointer(obj, v, opaque, NULL, parse_drive, name, errp);
>>> +    set_pointer(obj, v, opaque, parse_drive_realized, 
>>> parse_drive_unrealized,
>>> +                name, errp);
>>>   }
>>>     static void set_drive_iothread(Object *obj, Visitor *v, const 
>>> char *name,
>>>                                  void *opaque, Error **errp)
>>>   {
>>> -    set_pointer(obj, v, opaque, NULL, parse_drive_iothread, name, 
>>> errp);
>>> +    set_pointer(obj, v, opaque, parse_drive_realized_iothread,
>>> +                parse_drive_unrealized_iothread, name, errp);
>>>   }
>>>     const PropertyInfo qdev_prop_drive = {
>
Kevin Wolf Dec. 13, 2019, 10:32 a.m. UTC | #4
Am 18.11.2019 um 11:50 hat Denis Plotnikov geschrieben:
> 
> 
> On 10.11.2019 22:08, Denis Plotnikov wrote:
> >
> > On 10.11.2019 22:03, Denis Plotnikov wrote:
> >> This allows to change (replace) the file on a block device and is useful
> >> to workaround exclusive file access restrictions, e.g. to implement VM
> >> migration with a shared disk stored on some storage with the exclusive
> >> file opening model: a destination VM is started waiting for incomming
> >> migration with a fake image drive, and later, on the last migration
> >> phase, the fake image file is replaced with the real one.
> >>
> >> Signed-off-by: Denis Plotnikov <dplotnikov@virtuozzo.com>
> >> ---
> >>   hw/core/qdev-properties-system.c | 89 +++++++++++++++++++++++++++-----
> >>   1 file changed, 77 insertions(+), 12 deletions(-)
> >>
> >> diff --git a/hw/core/qdev-properties-system.c 
> >> b/hw/core/qdev-properties-system.c
> >> index c534590dcd..aaab1370a4 100644
> >> --- a/hw/core/qdev-properties-system.c
> >> +++ b/hw/core/qdev-properties-system.c
> >> @@ -79,8 +79,55 @@ static void set_pointer(Object *obj, Visitor *v, 
> >> Property *prop,
> >>     /* --- drive --- */
> >>   -static void do_parse_drive(DeviceState *dev, const char *str, void 
> >> **ptr,
> >> -                           const char *propname, bool iothread, 
> >> Error **errp)
> >> +static void do_parse_drive_realized(DeviceState *dev, const char *str,
> >> +                                    void **ptr, const char *propname,
> >> +                                    bool iothread, Error **errp)
> >> +{
> >> +    BlockBackend *blk = *ptr;
> >> +    BlockDriverState *bs = bdrv_lookup_bs(NULL, str, NULL);
> >> +    int ret;
> >> +    bool blk_created = false;
> >> +
> >> +    if (!bs) {
> >> +        error_setg(errp, "Can't find blockdev '%s'", str);
> >> +        return;
> >> +    }
> >> +
> >> +    if (!blk) {
> >> +        AioContext *ctx = iothread ? bdrv_get_aio_context(bs) :
> >> +                                     qemu_get_aio_context();
> >> +        blk = blk_new(ctx, BLK_PERM_ALL, BLK_PERM_ALL);
> >> +        blk_created = true;
> >
> > Actually, I have concerns about situation where blk=null.
> >
> > Is there any case when scsi-hd (or others) doesn't have a blk assigned 
> > and it's legal?

No, block devices will always have a BlockBackend, even if it doesn't
have a root node inserted.

> >> +    } else {
> >> +        if (blk_bs(blk)) {
> >> +            blk_remove_bs(blk);
> >> +        }
> >> +    }
> >> +
> >> +    ret = blk_insert_bs(blk, bs, errp);
> >> +
> >> +    if (!ret && blk_created) {
> >> +        if (blk_attach_dev(blk, dev) < 0) {
> >> +            /*
> >> +             * Shouldn't be any errors here since we just created
> >> +             * the new blk because the device doesn't have any.
> >> +             * Leave the message here in case blk_attach_dev is changed
> >> +             */
> >> +             error_setg(errp, "Can't attach drive '%s' to device '%s'",
> >> +                        str, object_get_typename(OBJECT(dev)));
> >> +        } else {
> >> +            *ptr = blk;
> >> +        }
> >> +    }
> Another problem here, is that the "size" of the device dev may not match 
> after setting a drive.
> So, we should update it after the drive setting.
> It was found, that it could be done by calling 
> BlockDevOps.bdrv_parent_cb_resize.
> 
> But I have some concerns about doing it so. In the case of virtio scsi 
> disk we have the following callstack
> 
>      bdrv_parent_cb_resize calls() ->
>          scsi_device_report_change(dev, SENSE_CODE(CAPACITY_CHANGED)) ->
>              virtio_scsi_change ->
>                  virtio_scsi_push_event(s, dev, VIRTIO_SCSI_T_PARAM_CHANGE,
>                                                      sense.asc | 
> (sense.ascq << 8));

I think the safest option for now (and which should solve the case you
want to address) is checking whether old and new size match and
returning an error otherwise.

> virtio_scsi_change  pushes the event to the guest to make the guest
> ask for size refreshing.  If I'm not mistaken, here we can get a race
> condition when some another request is processed with an unchanged
> size and then the size changing request is processed.

I think this is actually a problem even without resizing: We need to
quiesce the device between removing the old root and inserting the new
one. They way to achieve this is probably by splitting blk_drain() into
a blk_drain_begin()/end() and then draining the BlockBackend here while
we're working on it.

Kevin
Denis Plotnikov Dec. 16, 2019, 2:51 p.m. UTC | #5
On 13.12.2019 13:32, Kevin Wolf wrote:
> Am 18.11.2019 um 11:50 hat Denis Plotnikov geschrieben:
>>
>> On 10.11.2019 22:08, Denis Plotnikov wrote:
>>> On 10.11.2019 22:03, Denis Plotnikov wrote:
>>>> This allows to change (replace) the file on a block device and is useful
>>>> to workaround exclusive file access restrictions, e.g. to implement VM
>>>> migration with a shared disk stored on some storage with the exclusive
>>>> file opening model: a destination VM is started waiting for incomming
>>>> migration with a fake image drive, and later, on the last migration
>>>> phase, the fake image file is replaced with the real one.
>>>>
>>>> Signed-off-by: Denis Plotnikov <dplotnikov@virtuozzo.com>
>>>> ---
>>>>    hw/core/qdev-properties-system.c | 89 +++++++++++++++++++++++++++-----
>>>>    1 file changed, 77 insertions(+), 12 deletions(-)
>>>>
>>>> diff --git a/hw/core/qdev-properties-system.c
>>>> b/hw/core/qdev-properties-system.c
>>>> index c534590dcd..aaab1370a4 100644
>>>> --- a/hw/core/qdev-properties-system.c
>>>> +++ b/hw/core/qdev-properties-system.c
>>>> @@ -79,8 +79,55 @@ static void set_pointer(Object *obj, Visitor *v,
>>>> Property *prop,
>>>>      /* --- drive --- */
>>>>    -static void do_parse_drive(DeviceState *dev, const char *str, void
>>>> **ptr,
>>>> -                           const char *propname, bool iothread,
>>>> Error **errp)
>>>> +static void do_parse_drive_realized(DeviceState *dev, const char *str,
>>>> +                                    void **ptr, const char *propname,
>>>> +                                    bool iothread, Error **errp)
>>>> +{
>>>> +    BlockBackend *blk = *ptr;
>>>> +    BlockDriverState *bs = bdrv_lookup_bs(NULL, str, NULL);
>>>> +    int ret;
>>>> +    bool blk_created = false;
>>>> +
>>>> +    if (!bs) {
>>>> +        error_setg(errp, "Can't find blockdev '%s'", str);
>>>> +        return;
>>>> +    }
>>>> +
>>>> +    if (!blk) {
>>>> +        AioContext *ctx = iothread ? bdrv_get_aio_context(bs) :
>>>> +                                     qemu_get_aio_context();
>>>> +        blk = blk_new(ctx, BLK_PERM_ALL, BLK_PERM_ALL);
>>>> +        blk_created = true;
>>> Actually, I have concerns about situation where blk=null.
>>>
>>> Is there any case when scsi-hd (or others) doesn't have a blk assigned
>>> and it's legal?
> No, block devices will always have a BlockBackend, even if it doesn't
> have a root node inserted.
>
>>>> +    } else {
>>>> +        if (blk_bs(blk)) {
>>>> +            blk_remove_bs(blk);
>>>> +        }
>>>> +    }
>>>> +
>>>> +    ret = blk_insert_bs(blk, bs, errp);
>>>> +
>>>> +    if (!ret && blk_created) {
>>>> +        if (blk_attach_dev(blk, dev) < 0) {
>>>> +            /*
>>>> +             * Shouldn't be any errors here since we just created
>>>> +             * the new blk because the device doesn't have any.
>>>> +             * Leave the message here in case blk_attach_dev is changed
>>>> +             */
>>>> +             error_setg(errp, "Can't attach drive '%s' to device '%s'",
>>>> +                        str, object_get_typename(OBJECT(dev)));
>>>> +        } else {
>>>> +            *ptr = blk;
>>>> +        }
>>>> +    }
>> Another problem here, is that the "size" of the device dev may not match
>> after setting a drive.
>> So, we should update it after the drive setting.
>> It was found, that it could be done by calling
>> BlockDevOps.bdrv_parent_cb_resize.
>>
>> But I have some concerns about doing it so. In the case of virtio scsi
>> disk we have the following callstack
>>
>>       bdrv_parent_cb_resize calls() ->
>>           scsi_device_report_change(dev, SENSE_CODE(CAPACITY_CHANGED)) ->
>>               virtio_scsi_change ->
>>                   virtio_scsi_push_event(s, dev, VIRTIO_SCSI_T_PARAM_CHANGE,
>>                                                       sense.asc |
>> (sense.ascq << 8));
> I think the safest option for now (and which should solve the case you
> want to address) is checking whether old and new size match and
> returning an error otherwise.
>
>> virtio_scsi_change  pushes the event to the guest to make the guest
>> ask for size refreshing.  If I'm not mistaken, here we can get a race
>> condition when some another request is processed with an unchanged
>> size and then the size changing request is processed.
> I think this is actually a problem even without resizing: We need to
> quiesce the device between removing the old root and inserting the new
> one. They way to achieve this is probably by splitting blk_drain() into
> a blk_drain_begin()/end() and then draining the BlockBackend here while
> we're working on it.
>
> Kevin
Why don't we use bdrv_drained_begin/end directly? This is what blk_drain 
does.
If we want to split blk_drain we must keep track if blk's brdv isn't 
change otherwise we can end up with drain_begin one and drain end 
another bdrv if we do remove/insert in between.

Another thing is should we really care about this if we have VM stopped 
and the sizes matched?

Denis
>
Kevin Wolf Dec. 16, 2019, 3:38 p.m. UTC | #6
Am 16.12.2019 um 15:51 hat Denis Plotnikov geschrieben:
> On 13.12.2019 13:32, Kevin Wolf wrote:
> > Am 18.11.2019 um 11:50 hat Denis Plotnikov geschrieben:
> >> Another problem here, is that the "size" of the device dev may not match
> >> after setting a drive.
> >> So, we should update it after the drive setting.
> >> It was found, that it could be done by calling
> >> BlockDevOps.bdrv_parent_cb_resize.
> >>
> >> But I have some concerns about doing it so. In the case of virtio scsi
> >> disk we have the following callstack
> >>
> >>       bdrv_parent_cb_resize calls() ->
> >>           scsi_device_report_change(dev, SENSE_CODE(CAPACITY_CHANGED)) ->
> >>               virtio_scsi_change ->
> >>                   virtio_scsi_push_event(s, dev, VIRTIO_SCSI_T_PARAM_CHANGE,
> >>                                                       sense.asc |
> >> (sense.ascq << 8));
> > I think the safest option for now (and which should solve the case you
> > want to address) is checking whether old and new size match and
> > returning an error otherwise.
> >
> >> virtio_scsi_change  pushes the event to the guest to make the guest
> >> ask for size refreshing.  If I'm not mistaken, here we can get a race
> >> condition when some another request is processed with an unchanged
> >> size and then the size changing request is processed.
> > I think this is actually a problem even without resizing: We need to
> > quiesce the device between removing the old root and inserting the new
> > one. They way to achieve this is probably by splitting blk_drain() into
> > a blk_drain_begin()/end() and then draining the BlockBackend here while
> > we're working on it.
> >
> > Kevin
> Why don't we use bdrv_drained_begin/end directly? This is what
> blk_drain does.
> If we want to split blk_drain we must keep track if blk's brdv isn't
> change otherwise we can end up with drain_begin one and drain end
> another bdrv if we do remove/insert in between.

Hmm, true, we would have to keep track of draining at the BlockBackend
level and consider it in blk_remove_bs() and blk_insert_bs(). Maybe
that's not worth it.

If we use bdrv_drained_begin/end directly, I think we need to drain both
the old and the new root node during the process.

> Another thing is should we really care about this if we have VM
> stopped and the sizes matched?

How do we know that the VM is stopped? And why would we require this?
Your patch doesn't implement or at least check this, and it seems a bit
impractical for example when all you want is inserting a filter node.

Kevin
Denis Plotnikov Dec. 16, 2019, 3:58 p.m. UTC | #7
On 16.12.2019 18:38, Kevin Wolf wrote:
> Am 16.12.2019 um 15:51 hat Denis Plotnikov geschrieben:
>> On 13.12.2019 13:32, Kevin Wolf wrote:
>>> Am 18.11.2019 um 11:50 hat Denis Plotnikov geschrieben:
>>>> Another problem here, is that the "size" of the device dev may not match
>>>> after setting a drive.
>>>> So, we should update it after the drive setting.
>>>> It was found, that it could be done by calling
>>>> BlockDevOps.bdrv_parent_cb_resize.
>>>>
>>>> But I have some concerns about doing it so. In the case of virtio scsi
>>>> disk we have the following callstack
>>>>
>>>>        bdrv_parent_cb_resize calls() ->
>>>>            scsi_device_report_change(dev, SENSE_CODE(CAPACITY_CHANGED)) ->
>>>>                virtio_scsi_change ->
>>>>                    virtio_scsi_push_event(s, dev, VIRTIO_SCSI_T_PARAM_CHANGE,
>>>>                                                        sense.asc |
>>>> (sense.ascq << 8));
>>> I think the safest option for now (and which should solve the case you
>>> want to address) is checking whether old and new size match and
>>> returning an error otherwise.
>>>
>>>> virtio_scsi_change  pushes the event to the guest to make the guest
>>>> ask for size refreshing.  If I'm not mistaken, here we can get a race
>>>> condition when some another request is processed with an unchanged
>>>> size and then the size changing request is processed.
>>> I think this is actually a problem even without resizing: We need to
>>> quiesce the device between removing the old root and inserting the new
>>> one. They way to achieve this is probably by splitting blk_drain() into
>>> a blk_drain_begin()/end() and then draining the BlockBackend here while
>>> we're working on it.
>>>
>>> Kevin
>> Why don't we use bdrv_drained_begin/end directly? This is what
>> blk_drain does.
>> If we want to split blk_drain we must keep track if blk's brdv isn't
>> change otherwise we can end up with drain_begin one and drain end
>> another bdrv if we do remove/insert in between.
> Hmm, true, we would have to keep track of draining at the BlockBackend
> level and consider it in blk_remove_bs() and blk_insert_bs(). Maybe
> that's not worth it.
>
> If we use bdrv_drained_begin/end directly, I think we need to drain both
> the old and the new root node during the process.
>
>> Another thing is should we really care about this if we have VM
>> stopped and the sizes matched?
> How do we know that the VM is stopped? And why would we require this?
I implied the scenario of VM migration over a shared storage with an 
exclusive file access model.
The VM is stopped on drive changing phase.

If there is no use to require it, than ok.

Denis
> Your patch doesn't implement or at least check this, and it seems a bit
> impractical for example when all you want is inserting a filter node.
>
> Kevin
diff mbox series

Patch

diff --git a/hw/core/qdev-properties-system.c b/hw/core/qdev-properties-system.c
index c534590dcd..aaab1370a4 100644
--- a/hw/core/qdev-properties-system.c
+++ b/hw/core/qdev-properties-system.c
@@ -79,8 +79,55 @@  static void set_pointer(Object *obj, Visitor *v, Property *prop,
 
 /* --- drive --- */
 
-static void do_parse_drive(DeviceState *dev, const char *str, void **ptr,
-                           const char *propname, bool iothread, Error **errp)
+static void do_parse_drive_realized(DeviceState *dev, const char *str,
+                                    void **ptr, const char *propname,
+                                    bool iothread, Error **errp)
+{
+    BlockBackend *blk = *ptr;
+    BlockDriverState *bs = bdrv_lookup_bs(NULL, str, NULL);
+    int ret;
+    bool blk_created = false;
+
+    if (!bs) {
+        error_setg(errp, "Can't find blockdev '%s'", str);
+        return;
+    }
+
+    if (!blk) {
+        AioContext *ctx = iothread ? bdrv_get_aio_context(bs) :
+                                     qemu_get_aio_context();
+        blk = blk_new(ctx, BLK_PERM_ALL, BLK_PERM_ALL);
+        blk_created = true;
+    } else {
+        if (blk_bs(blk)) {
+            blk_remove_bs(blk);
+        }
+    }
+
+    ret = blk_insert_bs(blk, bs, errp);
+
+    if (!ret && blk_created) {
+        if (blk_attach_dev(blk, dev) < 0) {
+            /*
+             * Shouldn't be any errors here since we just created
+             * the new blk because the device doesn't have any.
+             * Leave the message here in case blk_attach_dev is changed
+             */
+             error_setg(errp, "Can't attach drive '%s' to device '%s'",
+                        str, object_get_typename(OBJECT(dev)));
+        } else {
+            *ptr = blk;
+        }
+    }
+
+    if (blk_created) {
+        blk_unref(blk);
+    }
+}
+
+static void do_parse_drive_unrealized(DeviceState *dev, const char *str,
+                                      void **ptr, const char *propname,
+                                      bool iothread, Error **errp)
 {
     BlockBackend *blk;
     bool blk_created = false;
@@ -137,18 +184,34 @@  fail:
     }
 }
 
-static void parse_drive(DeviceState *dev, const char *str, void **ptr,
-                        const char *propname, Error **errp)
-{
-    do_parse_drive(dev, str, ptr, propname, false, errp);
-}
-
-static void parse_drive_iothread(DeviceState *dev, const char *str, void **ptr,
+static void parse_drive_realized(DeviceState *dev, const char *str, void **ptr,
                                  const char *propname, Error **errp)
 {
-    do_parse_drive(dev, str, ptr, propname, true, errp);
+    do_parse_drive_realized(dev, str, ptr, propname, false, errp);
 }
 
+static void parse_drive_realized_iothread(DeviceState *dev, const char *str,
+                                          void **ptr, const char *propname,
+                                          Error **errp)
+{
+    do_parse_drive_realized(dev, str, ptr, propname, true, errp);
+}
+
+static void parse_drive_unrealized(DeviceState *dev, const char *str,
+                                   void **ptr, const char *propname,
+                                   Error **errp)
+{
+    do_parse_drive_unrealized(dev, str, ptr, propname, false, errp);
+}
+
+static void parse_drive_unrealized_iothread(DeviceState *dev, const char *str,
+                                            void **ptr, const char *propname,
+                                            Error **errp)
+{
+    do_parse_drive_unrealized(dev, str, ptr, propname, true, errp);
+}
+
+
 static void release_drive(Object *obj, const char *name, void *opaque)
 {
     DeviceState *dev = DEVICE(obj);
@@ -188,13 +251,15 @@  static void get_drive(Object *obj, Visitor *v, const char *name, void *opaque,
 static void set_drive(Object *obj, Visitor *v, const char *name, void *opaque,
                       Error **errp)
 {
-    set_pointer(obj, v, opaque, NULL, parse_drive, name, errp);
+    set_pointer(obj, v, opaque, parse_drive_realized, parse_drive_unrealized,
+                name, errp);
 }
 
 static void set_drive_iothread(Object *obj, Visitor *v, const char *name,
                                void *opaque, Error **errp)
 {
-    set_pointer(obj, v, opaque, NULL, parse_drive_iothread, name, errp);
+    set_pointer(obj, v, opaque, parse_drive_realized_iothread,
+                parse_drive_unrealized_iothread, name, errp);
 }
 
 const PropertyInfo qdev_prop_drive = {