Patchwork [v9,5/6] Qemu: Framework for reopening images safely

login
register
mail settings
Submitter Supriya Kannery
Date Nov. 11, 2011, 6:48 a.m.
Message ID <20111111064818.15024.2323.sendpatchset@skannery.in.ibm.com>
Download mbox | patch
Permalink /patch/125099/
State New
Headers show

Comments

Supriya Kannery - Nov. 11, 2011, 6:48 a.m.
Struct BDRVReopenState along with three reopen related functions
introduced for handling reopen state of images safely. This can be
extended by each of the block drivers to reopen respective
image files.

Signed-off-by: Supriya Kannery <supriyak@linux.vnet.ibm.com>
Luiz Capitulino - Nov. 17, 2011, 1:16 p.m.
On Fri, 11 Nov 2011 12:18:18 +0530
Supriya Kannery <supriyak@linux.vnet.ibm.com> wrote:

> Struct BDRVReopenState along with three reopen related functions
> introduced for handling reopen state of images safely. This can be
> extended by each of the block drivers to reopen respective
> image files.

Shouldn't this patch come before the one introducing the QMP command?

> 
> Signed-off-by: Supriya Kannery <supriyak@linux.vnet.ibm.com>
> 
> Index: qemu/block.c
> ===================================================================
> --- qemu.orig/block.c
> +++ qemu/block.c
> @@ -696,10 +696,33 @@ unlink_and_fail:
>      return ret;
>  }
>  
> +int bdrv_reopen_prepare(BlockDriverState *bs, BDRVReopenState **prs, int flags)
> +{
> +     BlockDriver *drv = bs->drv;
> +
> +     return drv->bdrv_reopen_prepare(bs, prs, flags);
> +}
> +
> +void bdrv_reopen_commit(BlockDriverState *bs, BDRVReopenState *rs, int flags)
> +{
> +    BlockDriver *drv = bs->drv;
> +
> +    drv->bdrv_reopen_commit(bs, rs, flags);
> +    bs->open_flags = flags;
> +}
> +
> +void bdrv_reopen_abort(BlockDriverState *bs, BDRVReopenState *rs)
> +{
> +    BlockDriver *drv = bs->drv;
> +
> +    drv->bdrv_reopen_abort(bs, rs);
> +}
> +
>  int bdrv_reopen(BlockDriverState *bs, int bdrv_flags)
>  {
>      BlockDriver *drv = bs->drv;
>      int ret = 0, open_flags;
> +    BDRVReopenState *reopen_state = NULL;
>  
>      /* Quiesce IO for the given block device */
>      qemu_aio_flush();
> @@ -708,17 +731,31 @@ int bdrv_reopen(BlockDriverState *bs, in
>          qerror_report(QERR_DATA_SYNC_FAILED, bs->device_name);
>          return ret;
>      }
> -    open_flags = bs->open_flags;
> -    bdrv_close(bs);
>  
> -    ret = bdrv_open(bs, bs->filename, bdrv_flags, drv);
> -    if (ret < 0) {
> -        /* Reopen failed. Try to open with original flags */
> -        qerror_report(QERR_REOPEN_FILE_FAILED, bs->filename);
> -        ret = bdrv_open(bs, bs->filename, open_flags, drv);
> +    /* Use driver specific reopen() if available */
> +    if (drv->bdrv_reopen_prepare) {
> +        ret = bdrv_reopen_prepare(bs, &reopen_state, bdrv_flags);
> +         if (ret < 0) {
> +            bdrv_reopen_abort(bs, reopen_state);
> +            qerror_report(QERR_REOPEN_FILE_FAILED, bs->filename);
> +            return ret;
> +        }
> +
> +        bdrv_reopen_commit(bs, reopen_state, bdrv_flags);
> +
> +    } else {
> +       open_flags = bs->open_flags;
> +       bdrv_close(bs);
> +
> +       ret = bdrv_open(bs, bs->filename, bdrv_flags, drv);
>          if (ret < 0) {
> -            /* Reopen failed with orig and modified flags */
> -            abort();
> +            /* Reopen failed. Try to open with original flags */
> +            qerror_report(QERR_REOPEN_FILE_FAILED, bs->filename);
> +            ret = bdrv_open(bs, bs->filename, open_flags, drv);
> +            if (ret < 0) {
> +                /* Reopen failed with orig and modified flags */
> +                bs->drv = NULL;
> +            }
>          }
>      }
>  
> Index: qemu/block_int.h
> ===================================================================
> --- qemu.orig/block_int.h
> +++ qemu/block_int.h
> @@ -56,6 +56,14 @@ struct BlockDriver {
>      int (*bdrv_probe)(const uint8_t *buf, int buf_size, const char *filename);
>      int (*bdrv_probe_device)(const char *filename);
>      int (*bdrv_open)(BlockDriverState *bs, int flags);
> +
> +    /* For handling image reopen for split or non-split files */
> +    int (*bdrv_reopen_prepare)(BlockDriverState *bs,
> +                               BDRVReopenState **prs,
> +                               int flags);
> +    void (*bdrv_reopen_commit)(BlockDriverState *bs, BDRVReopenState *rs,
> +                               int flags);
> +    void (*bdrv_reopen_abort)(BlockDriverState *bs, BDRVReopenState *rs);
>      int (*bdrv_file_open)(BlockDriverState *bs, const char *filename, int flags);
>      int (*bdrv_read)(BlockDriverState *bs, int64_t sector_num,
>                       uint8_t *buf, int nb_sectors);
> @@ -213,6 +221,11 @@ struct BlockDriverState {
>      void *private;
>  };
>  
> +struct BDRVReopenState {
> +    BlockDriverState *bs;
> +    int reopen_flags;
> +};
> +
>  struct BlockDriverAIOCB {
>      AIOPool *pool;
>      BlockDriverState *bs;
> Index: qemu/qemu-common.h
> ===================================================================
> --- qemu.orig/qemu-common.h
> +++ qemu/qemu-common.h
> @@ -203,6 +203,7 @@ typedef struct NICInfo NICInfo;
>  typedef struct HCIInfo HCIInfo;
>  typedef struct AudioState AudioState;
>  typedef struct BlockDriverState BlockDriverState;
> +typedef struct BDRVReopenState BDRVReopenState;
>  typedef struct DriveInfo DriveInfo;
>  typedef struct DisplayState DisplayState;
>  typedef struct DisplayChangeListener DisplayChangeListener;
> Index: qemu/block.h
> ===================================================================
> --- qemu.orig/block.h
> +++ qemu/block.h
> @@ -105,6 +105,9 @@ int bdrv_file_open(BlockDriverState **pb
>  int bdrv_open(BlockDriverState *bs, const char *filename, int flags,
>                BlockDriver *drv);
>  int bdrv_reopen(BlockDriverState *bs, int bdrv_flags);
> +int bdrv_reopen_prepare(BlockDriverState *bs, BDRVReopenState **prs, int flags);
> +void bdrv_reopen_commit(BlockDriverState *bs, BDRVReopenState *rs,  int flags);
> +void bdrv_reopen_abort(BlockDriverState *bs, BDRVReopenState *rs);
>  void bdrv_close(BlockDriverState *bs);
>  int bdrv_attach_dev(BlockDriverState *bs, void *dev);
>  void bdrv_attach_dev_nofail(BlockDriverState *bs, void *dev);
>
Stefan Hajnoczi - Nov. 17, 2011, 2:36 p.m.
On Fri, Nov 11, 2011 at 6:48 AM, Supriya Kannery
<supriyak@linux.vnet.ibm.com> wrote:
> @@ -708,17 +731,31 @@ int bdrv_reopen(BlockDriverState *bs, in
>         qerror_report(QERR_DATA_SYNC_FAILED, bs->device_name);
>         return ret;
>     }
> -    open_flags = bs->open_flags;
> -    bdrv_close(bs);
>
> -    ret = bdrv_open(bs, bs->filename, bdrv_flags, drv);
> -    if (ret < 0) {
> -        /* Reopen failed. Try to open with original flags */
> -        qerror_report(QERR_REOPEN_FILE_FAILED, bs->filename);
> -        ret = bdrv_open(bs, bs->filename, open_flags, drv);
> +    /* Use driver specific reopen() if available */
> +    if (drv->bdrv_reopen_prepare) {

This seems weird to me because we're saying a driver may have
drv->bdrv_reopen_prepare == NULL but the public bdrv_reopen_prepare()
function doesn't check and return -ENOTSUP.

This check can be moved into bdrv_reopen_prepare().  We can test for
the -ENOTSUP return value here instead.

> +        ret = bdrv_reopen_prepare(bs, &reopen_state, bdrv_flags);
> +         if (ret < 0) {

Indentation is off here.

Stefan
supriya kannery - Nov. 21, 2011, 12:13 p.m.
Stefan Hajnoczi wrote:
> On Fri, Nov 11, 2011 at 6:48 AM, Supriya Kannery
> <supriyak@linux.vnet.ibm.com> wrote:
>   
>> @@ -708,17 +731,31 @@ int bdrv_reopen(BlockDriverState *bs, in
>>         qerror_report(QERR_DATA_SYNC_FAILED, bs->device_name);
>>         return ret;
>>     }
>> -    open_flags = bs->open_flags;
>> -    bdrv_close(bs);
>>
>> -    ret = bdrv_open(bs, bs->filename, bdrv_flags, drv);
>> -    if (ret < 0) {
>> -        /* Reopen failed. Try to open with original flags */
>> -        qerror_report(QERR_REOPEN_FILE_FAILED, bs->filename);
>> -        ret = bdrv_open(bs, bs->filename, open_flags, drv);
>> +    /* Use driver specific reopen() if available */
>> +    if (drv->bdrv_reopen_prepare) {
>>     
>
> This seems weird to me because we're saying a driver may have
> drv->bdrv_reopen_prepare == NULL but the public bdrv_reopen_prepare()
> function doesn't check and return -ENOTSUP.
>   

If drv->bdrv_reopen_prepare == NULL , then we are not calling the
publick bdrv_reopen_prepare at all. Unless we later call  public 
bdrv_reopen_prepare
from elsewhere without checking drv->bdrv_reopen_prepare,  a check for
-ENOTSUP inside the public one is not needed right?

Also, we are handling reopening for even those drivers which don't
have its own bdrv_reopen_prepare defined, by taking the "else"
control path. So condition for reporting "ENOTSUP" shouldn't come
up as of now. Please let me know your thoughts.

> This check can be moved into bdrv_reopen_prepare().  We can test for
> the -ENOTSUP return value here instead.
>
>   
>> +        ret = bdrv_reopen_prepare(bs, &reopen_state, bdrv_flags);
>> +         if (ret < 0) {
>>     
>
> Indentation is off here.
>   

sure..will take care in next version.
> Stefan
>
>
Stefan Hajnoczi - Nov. 21, 2011, 2:31 p.m.
On Mon, Nov 21, 2011 at 12:13 PM, supriya kannery <supriyak@in.ibm.com> wrote:
> Stefan Hajnoczi wrote:
>>
>> On Fri, Nov 11, 2011 at 6:48 AM, Supriya Kannery
>> <supriyak@linux.vnet.ibm.com> wrote:
>>
>>>
>>> @@ -708,17 +731,31 @@ int bdrv_reopen(BlockDriverState *bs, in
>>>        qerror_report(QERR_DATA_SYNC_FAILED, bs->device_name);
>>>        return ret;
>>>    }
>>> -    open_flags = bs->open_flags;
>>> -    bdrv_close(bs);
>>>
>>> -    ret = bdrv_open(bs, bs->filename, bdrv_flags, drv);
>>> -    if (ret < 0) {
>>> -        /* Reopen failed. Try to open with original flags */
>>> -        qerror_report(QERR_REOPEN_FILE_FAILED, bs->filename);
>>> -        ret = bdrv_open(bs, bs->filename, open_flags, drv);
>>> +    /* Use driver specific reopen() if available */
>>> +    if (drv->bdrv_reopen_prepare) {
>>>
>>
>> This seems weird to me because we're saying a driver may have
>> drv->bdrv_reopen_prepare == NULL but the public bdrv_reopen_prepare()
>> function doesn't check and return -ENOTSUP.
>>
>
> If drv->bdrv_reopen_prepare == NULL , then we are not calling the
> publick bdrv_reopen_prepare at all. Unless we later call  public
> bdrv_reopen_prepare
> from elsewhere without checking drv->bdrv_reopen_prepare,  a check for
> -ENOTSUP inside the public one is not needed right?
>
> Also, we are handling reopening for even those drivers which don't
> have its own bdrv_reopen_prepare defined, by taking the "else"
> control path. So condition for reporting "ENOTSUP" shouldn't come
> up as of now. Please let me know your thoughts.

How does VMDK implement its prepare/commit/abort?  It needs to use the
"public" bdrv_reopen_prepare() function on its image files.

BTW I think the bdrv_reopen_*() functions should go in block_int.h and
not block.h.  They are visible to the block layer but not public to
the rest of QEMU, which must use the bdrv_reopen() interface only.

I think what's really missing is a way to tie this all together.  You
have posted raw format and raw-posix protocol patches.  But we need to
cover image formats, where VMDK is the multi-file special case and
qcow2/qed/etc are simpler but also need to be supported.

Right now anything but raw-posix is still closing and reopening.  By
adding support for image formats I think you'll find the right way to
structure this code.

Stefan
supriya kannery - Nov. 22, 2011, 10:24 a.m.
Stefan Hajnoczi wrote:
> On Mon, Nov 21, 2011 at 12:13 PM, supriya kannery <supriyak@in.ibm.com> wrote:
>   
>> Stefan Hajnoczi wrote:
>>     
>>> On Fri, Nov 11, 2011 at 6:48 AM, Supriya Kannery
>>> <supriyak@linux.vnet.ibm.com> wrote:
>>>
>>>       
>>>> @@ -708,17 +731,31 @@ int bdrv_reopen(BlockDriverState *bs, in
>>>>        qerror_report(QERR_DATA_SYNC_FAILED, bs->device_name);
>>>>        return ret;
>>>>    }
>>>> -    open_flags = bs->open_flags;
>>>> -    bdrv_close(bs);
>>>>
>>>> -    ret = bdrv_open(bs, bs->filename, bdrv_flags, drv);
>>>> -    if (ret < 0) {
>>>> -        /* Reopen failed. Try to open with original flags */
>>>> -        qerror_report(QERR_REOPEN_FILE_FAILED, bs->filename);
>>>> -        ret = bdrv_open(bs, bs->filename, open_flags, drv);
>>>> +    /* Use driver specific reopen() if available */
>>>> +    if (drv->bdrv_reopen_prepare) {
>>>>
>>>>         
>>> This seems weird to me because we're saying a driver may have
>>> drv->bdrv_reopen_prepare == NULL but the public bdrv_reopen_prepare()
>>> function doesn't check and return -ENOTSUP.
>>>
>>>       
>> If drv->bdrv_reopen_prepare == NULL , then we are not calling the
>> publick bdrv_reopen_prepare at all. Unless we later call  public
>> bdrv_reopen_prepare
>> from elsewhere without checking drv->bdrv_reopen_prepare,  a check for
>> -ENOTSUP inside the public one is not needed right?
>>
>> Also, we are handling reopening for even those drivers which don't
>> have its own bdrv_reopen_prepare defined, by taking the "else"
>> control path. So condition for reporting "ENOTSUP" shouldn't come
>> up as of now. Please let me know your thoughts.
>>     
>
> How does VMDK implement its prepare/commit/abort?  It needs to use the
> "public" bdrv_reopen_prepare() function on its image files.
>   

bdrv_reopen() is the public interface which gets called by any of the 
image formats.
So VMDK or any image format has to call bdrv_reopen which decides to call
driver specific prepare/commit/abort or simply close and reopen the file.

> BTW I think the bdrv_reopen_*() functions should go in block_int.h and
> not block.h.  They are visible to the block layer but not public to
> the rest of QEMU, which must use the bdrv_reopen() interface only.
>
> I think what's really missing is a way to tie this all together.  You
> have posted raw format and raw-posix protocol patches.  But we need to
> cover image formats, where VMDK is the multi-file special case and
> qcow2/qed/etc are simpler but also need to be supported.
>
> Right now anything but raw-posix is still closing and reopening.  By
> adding support for image formats I think you'll find the right way to
> structure this code.
>
>   

Since only bdrv_reopen() is public, it is declared in block.h and 
structure of
code done in similar way how bdrv_open() is  done.

The else part in bdrv_reopen() will handle reopen requests
for images other than raw for now (simply close and reopen).

-thanks, Supriya
> Stefan
>
>
Kevin Wolf - Nov. 22, 2011, 11:04 a.m.
Am 22.11.2011 11:24, schrieb supriya kannery:
> Stefan Hajnoczi wrote:
>> On Mon, Nov 21, 2011 at 12:13 PM, supriya kannery <supriyak@in.ibm.com> wrote:
>>   
>>> Stefan Hajnoczi wrote:
>>>     
>>>> On Fri, Nov 11, 2011 at 6:48 AM, Supriya Kannery
>>>> <supriyak@linux.vnet.ibm.com> wrote:
>>>>
>>>>       
>>>>> @@ -708,17 +731,31 @@ int bdrv_reopen(BlockDriverState *bs, in
>>>>>        qerror_report(QERR_DATA_SYNC_FAILED, bs->device_name);
>>>>>        return ret;
>>>>>    }
>>>>> -    open_flags = bs->open_flags;
>>>>> -    bdrv_close(bs);
>>>>>
>>>>> -    ret = bdrv_open(bs, bs->filename, bdrv_flags, drv);
>>>>> -    if (ret < 0) {
>>>>> -        /* Reopen failed. Try to open with original flags */
>>>>> -        qerror_report(QERR_REOPEN_FILE_FAILED, bs->filename);
>>>>> -        ret = bdrv_open(bs, bs->filename, open_flags, drv);
>>>>> +    /* Use driver specific reopen() if available */
>>>>> +    if (drv->bdrv_reopen_prepare) {
>>>>>
>>>>>         
>>>> This seems weird to me because we're saying a driver may have
>>>> drv->bdrv_reopen_prepare == NULL but the public bdrv_reopen_prepare()
>>>> function doesn't check and return -ENOTSUP.
>>>>
>>>>       
>>> If drv->bdrv_reopen_prepare == NULL , then we are not calling the
>>> publick bdrv_reopen_prepare at all. Unless we later call  public
>>> bdrv_reopen_prepare
>>> from elsewhere without checking drv->bdrv_reopen_prepare,  a check for
>>> -ENOTSUP inside the public one is not needed right?
>>>
>>> Also, we are handling reopening for even those drivers which don't
>>> have its own bdrv_reopen_prepare defined, by taking the "else"
>>> control path. So condition for reporting "ENOTSUP" shouldn't come
>>> up as of now. Please let me know your thoughts.
>>>     
>>
>> How does VMDK implement its prepare/commit/abort?  It needs to use the
>> "public" bdrv_reopen_prepare() function on its image files.
>>   
> 
> bdrv_reopen() is the public interface which gets called by any of the 
> image formats.
> So VMDK or any image format has to call bdrv_reopen which decides to call
> driver specific prepare/commit/abort or simply close and reopen the file.

No, that doesn't work. In order to get all-or-nothing semantics, you
need to explicitly prepare all child images and only when you know the
results of all preparations, you can decide whether to commit or abort all.

Kevin
supriya kannery - Nov. 22, 2011, 11:16 a.m.
Kevin Wolf wrote:
> Am 22.11.2011 11:24, schrieb supriya kannery:
>   
>> Stefan Hajnoczi wrote:
>>     
>>> On Mon, Nov 21, 2011 at 12:13 PM, supriya kannery <supriyak@in.ibm.com> wrote:
>>>   
>>>       
>>>> Stefan Hajnoczi wrote:
>>>>     
>>>>         
>>>>> On Fri, Nov 11, 2011 at 6:48 AM, Supriya Kannery
>>>>> <supriyak@linux.vnet.ibm.com> wrote:
>>>>>
>>>>>       
>>>>>           
>>>>>> @@ -708,17 +731,31 @@ int bdrv_reopen(BlockDriverState *bs, in
>>>>>>        qerror_report(QERR_DATA_SYNC_FAILED, bs->device_name);
>>>>>>        return ret;
>>>>>>    }
>>>>>> -    open_flags = bs->open_flags;
>>>>>> -    bdrv_close(bs);
>>>>>>
>>>>>> -    ret = bdrv_open(bs, bs->filename, bdrv_flags, drv);
>>>>>> -    if (ret < 0) {
>>>>>> -        /* Reopen failed. Try to open with original flags */
>>>>>> -        qerror_report(QERR_REOPEN_FILE_FAILED, bs->filename);
>>>>>> -        ret = bdrv_open(bs, bs->filename, open_flags, drv);
>>>>>> +    /* Use driver specific reopen() if available */
>>>>>> +    if (drv->bdrv_reopen_prepare) {
>>>>>>
>>>>>>         
>>>>>>             
>>>>> This seems weird to me because we're saying a driver may have
>>>>> drv->bdrv_reopen_prepare == NULL but the public bdrv_reopen_prepare()
>>>>> function doesn't check and return -ENOTSUP.
>>>>>
>>>>>       
>>>>>           
>>>> If drv->bdrv_reopen_prepare == NULL , then we are not calling the
>>>> publick bdrv_reopen_prepare at all. Unless we later call  public
>>>> bdrv_reopen_prepare
>>>> from elsewhere without checking drv->bdrv_reopen_prepare,  a check for
>>>> -ENOTSUP inside the public one is not needed right?
>>>>
>>>> Also, we are handling reopening for even those drivers which don't
>>>> have its own bdrv_reopen_prepare defined, by taking the "else"
>>>> control path. So condition for reporting "ENOTSUP" shouldn't come
>>>> up as of now. Please let me know your thoughts.
>>>>     
>>>>         
>>> How does VMDK implement its prepare/commit/abort?  It needs to use the
>>> "public" bdrv_reopen_prepare() function on its image files.
>>>   
>>>       
>> bdrv_reopen() is the public interface which gets called by any of the 
>> image formats.
>> So VMDK or any image format has to call bdrv_reopen which decides to call
>> driver specific prepare/commit/abort or simply close and reopen the file.
>>     
>
> No, that doesn't work. In order to get all-or-nothing semantics, you
> need to explicitly prepare all child images and only when you know the
> results of all preparations, you can decide whether to commit or abort all.
>   
bdrv_reopen_prepare/commit/abort will be implemented specific to VMDK in 
vmdk.c. Then for vmdk,
drv->bdrv_reopen_prepare() will handle  preparing child images and 
return success to bdrv_reopen ()
only if all of them get prepared successfully.  The prepare/commit/abort 
concept we took up considering
vmdk's special case of multiple files.

So it is bdrv_reopen() which is public and called by hostcache change 
request for any of the image formats.
It then routes the processing to respective prepare/commit/abort 
implemented by the drivers, including VMDK.
In cases where drivers don't have their own implementation, default 
route is taken which is simply
closing and opening the file.

- thanks, Supriya
> Kevin
>
>
Stefan Hajnoczi - Nov. 22, 2011, 11:49 a.m.
On Tue, Nov 22, 2011 at 11:16 AM, supriya kannery <supriyak@in.ibm.com> wrote:
> Kevin Wolf wrote:
>>
>> Am 22.11.2011 11:24, schrieb supriya kannery:
>>
>>>
>>> Stefan Hajnoczi wrote:
>>>
>>>>
>>>> On Mon, Nov 21, 2011 at 12:13 PM, supriya kannery <supriyak@in.ibm.com>
>>>> wrote:
>>>>
>>>>>
>>>>> Stefan Hajnoczi wrote:
>>>>>
>>>>>>
>>>>>> On Fri, Nov 11, 2011 at 6:48 AM, Supriya Kannery
>>>>>> <supriyak@linux.vnet.ibm.com> wrote:
>>>>>>
>>>>>>
>>>>>>>
>>>>>>> @@ -708,17 +731,31 @@ int bdrv_reopen(BlockDriverState *bs, in
>>>>>>>       qerror_report(QERR_DATA_SYNC_FAILED, bs->device_name);
>>>>>>>       return ret;
>>>>>>>   }
>>>>>>> -    open_flags = bs->open_flags;
>>>>>>> -    bdrv_close(bs);
>>>>>>>
>>>>>>> -    ret = bdrv_open(bs, bs->filename, bdrv_flags, drv);
>>>>>>> -    if (ret < 0) {
>>>>>>> -        /* Reopen failed. Try to open with original flags */
>>>>>>> -        qerror_report(QERR_REOPEN_FILE_FAILED, bs->filename);
>>>>>>> -        ret = bdrv_open(bs, bs->filename, open_flags, drv);
>>>>>>> +    /* Use driver specific reopen() if available */
>>>>>>> +    if (drv->bdrv_reopen_prepare) {
>>>>>>>
>>>>>>>
>>>>>>
>>>>>> This seems weird to me because we're saying a driver may have
>>>>>> drv->bdrv_reopen_prepare == NULL but the public bdrv_reopen_prepare()
>>>>>> function doesn't check and return -ENOTSUP.
>>>>>>
>>>>>>
>>>>>
>>>>> If drv->bdrv_reopen_prepare == NULL , then we are not calling the
>>>>> publick bdrv_reopen_prepare at all. Unless we later call  public
>>>>> bdrv_reopen_prepare
>>>>> from elsewhere without checking drv->bdrv_reopen_prepare,  a check for
>>>>> -ENOTSUP inside the public one is not needed right?
>>>>>
>>>>> Also, we are handling reopening for even those drivers which don't
>>>>> have its own bdrv_reopen_prepare defined, by taking the "else"
>>>>> control path. So condition for reporting "ENOTSUP" shouldn't come
>>>>> up as of now. Please let me know your thoughts.
>>>>>
>>>>
>>>> How does VMDK implement its prepare/commit/abort?  It needs to use the
>>>> "public" bdrv_reopen_prepare() function on its image files.
>>>>
>>>
>>> bdrv_reopen() is the public interface which gets called by any of the
>>> image formats.
>>> So VMDK or any image format has to call bdrv_reopen which decides to call
>>> driver specific prepare/commit/abort or simply close and reopen the file.
>>>
>>
>> No, that doesn't work. In order to get all-or-nothing semantics, you
>> need to explicitly prepare all child images and only when you know the
>> results of all preparations, you can decide whether to commit or abort
>> all.
>>
>
> bdrv_reopen_prepare/commit/abort will be implemented specific to VMDK in
> vmdk.c. Then for vmdk,
> drv->bdrv_reopen_prepare() will handle  preparing child images and return
> success to bdrv_reopen ()
> only if all of them get prepared successfully.  The prepare/commit/abort
> concept we took up considering
> vmdk's special case of multiple files.
>
> So it is bdrv_reopen() which is public and called by hostcache change
> request for any of the image formats.
> It then routes the processing to respective prepare/commit/abort implemented
> by the drivers, including VMDK.
> In cases where drivers don't have their own implementation, default route is
> taken which is simply
> closing and opening the file.

VMDK must call bdrv_reopen_prepare()/bdrv_reopen_commit()/bdrv_reopen_abort()
on its child images in order to support aborting when there is a
failure half-way through.  If it used bdrv_reopen() on its child
images then it could not roll back later when there is a failure on
the next child.

My bigger picture comment was that safe reopen support for raw-posix
is great but we should be able to take advantage of that for image
formats.  I'd rather see all image formats except VMDK have safe
reopen in this series than only raw-posix and vmdk.  How about the
generic prepare/commit/abort implementation that Kevin suggested in a
previous thread - something that qcow2, qed, etc can use in order to
get the safe reopen ability?

(If we don't get safe reopen support for qcow2, qed, etc then dynamic
hostcache changing will take the unsafe reopen path in some of the
common usecases with image file.)

Stefan
Supriya Kannery - Nov. 23, 2011, 3:52 a.m.
On 11/22/2011 05:19 PM, Stefan Hajnoczi wrote:
> On Tue, Nov 22, 2011 at 11:16 AM, supriya kannery<supriyak@in.ibm.com>  wrote:
>> Kevin Wolf wrote:
>>>
>>> Am 22.11.2011 11:24, schrieb supriya kannery:
>>>
>>>>
>>>>>
>>>>> How does VMDK implement its prepare/commit/abort?  It needs to use the
>>>>> "public" bdrv_reopen_prepare() function on its image files.
>>>>>
>>>>
>>>> bdrv_reopen() is the public interface which gets called by any of the
>>>> image formats.
>>>> So VMDK or any image format has to call bdrv_reopen which decides to call
>>>> driver specific prepare/commit/abort or simply close and reopen the file.
>>>>
>>>
>>> No, that doesn't work. In order to get all-or-nothing semantics, you
>>> need to explicitly prepare all child images and only when you know the
>>> results of all preparations, you can decide whether to commit or abort
>>> all.
>>>
>>
>> bdrv_reopen_prepare/commit/abort will be implemented specific to VMDK in
>> vmdk.c. Then for vmdk,
>> drv->bdrv_reopen_prepare() will handle  preparing child images and return
>> success to bdrv_reopen ()
>> only if all of them get prepared successfully.  The prepare/commit/abort
>> concept we took up considering
>> vmdk's special case of multiple files.
>>
>> So it is bdrv_reopen() which is public and called by hostcache change
>> request for any of the image formats.
>> It then routes the processing to respective prepare/commit/abort implemented
>> by the drivers, including VMDK.
>> In cases where drivers don't have their own implementation, default route is
>> taken which is simply
>> closing and opening the file.
>
> VMDK must call bdrv_reopen_prepare()/bdrv_reopen_commit()/bdrv_reopen_abort()
> on its child images in order to support aborting when there is a
> failure half-way through.  If it used bdrv_reopen() on its child
> images then it could not roll back later when there is a failure on
> the next child.
>

I got both your's and Kevin's point now, after looking into vmdk.c code
related to extents.
My initial understanding was bdrv_reopen_prepare() implemented inside 
vmdk.c can be called for handling reopening of multiple child
images. But observing how vmdk_open() is implemented for child images, I 
should follow that method. So will make 
bdrv_reopen_prepare()/commit/abort in block.c to be used by vmdk as well 
as other required image formats (like how raw.c is using them).

- thanks, Supriya

Patch

Index: qemu/block.c
===================================================================
--- qemu.orig/block.c
+++ qemu/block.c
@@ -696,10 +696,33 @@  unlink_and_fail:
     return ret;
 }
 
+int bdrv_reopen_prepare(BlockDriverState *bs, BDRVReopenState **prs, int flags)
+{
+     BlockDriver *drv = bs->drv;
+
+     return drv->bdrv_reopen_prepare(bs, prs, flags);
+}
+
+void bdrv_reopen_commit(BlockDriverState *bs, BDRVReopenState *rs, int flags)
+{
+    BlockDriver *drv = bs->drv;
+
+    drv->bdrv_reopen_commit(bs, rs, flags);
+    bs->open_flags = flags;
+}
+
+void bdrv_reopen_abort(BlockDriverState *bs, BDRVReopenState *rs)
+{
+    BlockDriver *drv = bs->drv;
+
+    drv->bdrv_reopen_abort(bs, rs);
+}
+
 int bdrv_reopen(BlockDriverState *bs, int bdrv_flags)
 {
     BlockDriver *drv = bs->drv;
     int ret = 0, open_flags;
+    BDRVReopenState *reopen_state = NULL;
 
     /* Quiesce IO for the given block device */
     qemu_aio_flush();
@@ -708,17 +731,31 @@  int bdrv_reopen(BlockDriverState *bs, in
         qerror_report(QERR_DATA_SYNC_FAILED, bs->device_name);
         return ret;
     }
-    open_flags = bs->open_flags;
-    bdrv_close(bs);
 
-    ret = bdrv_open(bs, bs->filename, bdrv_flags, drv);
-    if (ret < 0) {
-        /* Reopen failed. Try to open with original flags */
-        qerror_report(QERR_REOPEN_FILE_FAILED, bs->filename);
-        ret = bdrv_open(bs, bs->filename, open_flags, drv);
+    /* Use driver specific reopen() if available */
+    if (drv->bdrv_reopen_prepare) {
+        ret = bdrv_reopen_prepare(bs, &reopen_state, bdrv_flags);
+         if (ret < 0) {
+            bdrv_reopen_abort(bs, reopen_state);
+            qerror_report(QERR_REOPEN_FILE_FAILED, bs->filename);
+            return ret;
+        }
+
+        bdrv_reopen_commit(bs, reopen_state, bdrv_flags);
+
+    } else {
+       open_flags = bs->open_flags;
+       bdrv_close(bs);
+
+       ret = bdrv_open(bs, bs->filename, bdrv_flags, drv);
         if (ret < 0) {
-            /* Reopen failed with orig and modified flags */
-            abort();
+            /* Reopen failed. Try to open with original flags */
+            qerror_report(QERR_REOPEN_FILE_FAILED, bs->filename);
+            ret = bdrv_open(bs, bs->filename, open_flags, drv);
+            if (ret < 0) {
+                /* Reopen failed with orig and modified flags */
+                bs->drv = NULL;
+            }
         }
     }
 
Index: qemu/block_int.h
===================================================================
--- qemu.orig/block_int.h
+++ qemu/block_int.h
@@ -56,6 +56,14 @@  struct BlockDriver {
     int (*bdrv_probe)(const uint8_t *buf, int buf_size, const char *filename);
     int (*bdrv_probe_device)(const char *filename);
     int (*bdrv_open)(BlockDriverState *bs, int flags);
+
+    /* For handling image reopen for split or non-split files */
+    int (*bdrv_reopen_prepare)(BlockDriverState *bs,
+                               BDRVReopenState **prs,
+                               int flags);
+    void (*bdrv_reopen_commit)(BlockDriverState *bs, BDRVReopenState *rs,
+                               int flags);
+    void (*bdrv_reopen_abort)(BlockDriverState *bs, BDRVReopenState *rs);
     int (*bdrv_file_open)(BlockDriverState *bs, const char *filename, int flags);
     int (*bdrv_read)(BlockDriverState *bs, int64_t sector_num,
                      uint8_t *buf, int nb_sectors);
@@ -213,6 +221,11 @@  struct BlockDriverState {
     void *private;
 };
 
+struct BDRVReopenState {
+    BlockDriverState *bs;
+    int reopen_flags;
+};
+
 struct BlockDriverAIOCB {
     AIOPool *pool;
     BlockDriverState *bs;
Index: qemu/qemu-common.h
===================================================================
--- qemu.orig/qemu-common.h
+++ qemu/qemu-common.h
@@ -203,6 +203,7 @@  typedef struct NICInfo NICInfo;
 typedef struct HCIInfo HCIInfo;
 typedef struct AudioState AudioState;
 typedef struct BlockDriverState BlockDriverState;
+typedef struct BDRVReopenState BDRVReopenState;
 typedef struct DriveInfo DriveInfo;
 typedef struct DisplayState DisplayState;
 typedef struct DisplayChangeListener DisplayChangeListener;
Index: qemu/block.h
===================================================================
--- qemu.orig/block.h
+++ qemu/block.h
@@ -105,6 +105,9 @@  int bdrv_file_open(BlockDriverState **pb
 int bdrv_open(BlockDriverState *bs, const char *filename, int flags,
               BlockDriver *drv);
 int bdrv_reopen(BlockDriverState *bs, int bdrv_flags);
+int bdrv_reopen_prepare(BlockDriverState *bs, BDRVReopenState **prs, int flags);
+void bdrv_reopen_commit(BlockDriverState *bs, BDRVReopenState *rs,  int flags);
+void bdrv_reopen_abort(BlockDriverState *bs, BDRVReopenState *rs);
 void bdrv_close(BlockDriverState *bs);
 int bdrv_attach_dev(BlockDriverState *bs, void *dev);
 void bdrv_attach_dev_nofail(BlockDriverState *bs, void *dev);