Patchwork [v9,6/6] Qemu: raw posix implementation of reopen functions

login
register
mail settings
Submitter Supriya Kannery
Date Nov. 11, 2011, 6:48 a.m.
Message ID <20111111064833.15024.54047.sendpatchset@skannery.in.ibm.com>
Download mbox | patch
Permalink /patch/125100/
State New
Headers show

Comments

Supriya Kannery - Nov. 11, 2011, 6:48 a.m.
raw-posix driver changes for bdrv_reopen_xx functions to
safely reopen image files. Reopening of image files while 
changing hostcache dynamically is handled here.

Signed-off-by: Supriya Kannery <supriyak@linux.vnet.ibm.com>
Stefan Hajnoczi - Nov. 17, 2011, 2:41 p.m.
On Fri, Nov 11, 2011 at 6:48 AM, Supriya Kannery
<supriyak@linux.vnet.ibm.com> wrote:
> +static int raw_reopen_prepare(BlockDriverState *bs, BDRVReopenState **prs,
> +                              int flags)
> +{
> +    BDRVRawReopenState *raw_rs = g_malloc0(sizeof(BDRVRawReopenState));
> +    BDRVRawState *s = bs->opaque;
> +    int ret = 0;
> +
> +    raw_rs->reopen_state.reopen_flags = s->open_flags;
> +    raw_rs->reopen_state.bs = bs;
> +    raw_rs->reopen_fd = -1;
> +    *prs = &(raw_rs->reopen_state);
> +
> +    /* Flags that can be set using fcntl */
> +    int fcntl_flags = BDRV_O_NOCACHE;
> +
> +    if ((bs->open_flags & ~fcntl_flags) == (flags & ~fcntl_flags)) {
> +        raw_rs->reopen_fd = dup(s->fd);
> +        if (raw_rs->reopen_fd <= 0) {
> +            return -1;

return -errno;

> +        }
> +        if ((flags & BDRV_O_NOCACHE)) {
> +            raw_rs->reopen_state.reopen_flags |= O_DIRECT;
> +        } else {
> +            raw_rs->reopen_state.reopen_flags &= ~O_DIRECT;
> +        }
> +        ret = fcntl_setfl(raw_rs->reopen_fd, raw_rs->reopen_state.reopen_flags);

I wonder if this works on Solaris, FreeBSD, etc?

Perhaps there needs to be a fallback to the missing "else" case below...

> +    } else {
> +
> +        /* TBD: Handle O_DSYNC and other flags. For now return error */
> +        ret = -1;

...and this needs to be implemented.

> +    }
> +    return ret;
> +}

Stefan
supriya kannery - Nov. 21, 2011, 12:30 p.m.
Stefan Hajnoczi wrote:
> On Fri, Nov 11, 2011 at 6:48 AM, Supriya Kannery
> <supriyak@linux.vnet.ibm.com> wrote:
>   
>> +static int raw_reopen_prepare(BlockDriverState *bs, BDRVReopenState **prs,
>> +                              int flags)
>> +{
>> +    BDRVRawReopenState *raw_rs = g_malloc0(sizeof(BDRVRawReopenState));
>> +    BDRVRawState *s = bs->opaque;
>> +    int ret = 0;
>> +
>> +    raw_rs->reopen_state.reopen_flags = s->open_flags;
>> +    raw_rs->reopen_state.bs = bs;
>> +    raw_rs->reopen_fd = -1;
>> +    *prs = &(raw_rs->reopen_state);
>> +
>> +    /* Flags that can be set using fcntl */
>> +    int fcntl_flags = BDRV_O_NOCACHE;
>> +
>> +    if ((bs->open_flags & ~fcntl_flags) == (flags & ~fcntl_flags)) {
>> +        raw_rs->reopen_fd = dup(s->fd);
>> +        if (raw_rs->reopen_fd <= 0) {
>> +            return -1;
>>     
>
> return -errno;
>
>   
>> +        }
>> +        if ((flags & BDRV_O_NOCACHE)) {
>> +            raw_rs->reopen_state.reopen_flags |= O_DIRECT;
>> +        } else {
>> +            raw_rs->reopen_state.reopen_flags &= ~O_DIRECT;
>> +        }
>> +        ret = fcntl_setfl(raw_rs->reopen_fd, raw_rs->reopen_state.reopen_flags);
>>     
>
> I wonder if this works on Solaris, FreeBSD, etc?
>
> Perhaps there needs to be a fallback to the missing "else" case below...
>
>   

ok. Will look into whether this will work on Solaris, FreeBSD etc..

>> +    } else {
>> +
>> +        /* TBD: Handle O_DSYNC and other flags. For now return error */
>> +        ret = -1;
>>     
>
> ...and this needs to be implemented.
>
>   

ok

>> +    }
>> +    return ret;
>> +}
>>     
>
> Stefan
>
>
supriya kannery - Nov. 22, 2011, 9:45 a.m.
supriya kannery wrote:
> Stefan Hajnoczi wrote:
>> On Fri, Nov 11, 2011 at 6:48 AM, Supriya Kannery
>> <supriyak@linux.vnet.ibm.com> wrote:
>>   
>>> +        }
>>> +        if ((flags & BDRV_O_NOCACHE)) {
>>> +            raw_rs->reopen_state.reopen_flags |= O_DIRECT;
>>> +        } else {
>>> +            raw_rs->reopen_state.reopen_flags &= ~O_DIRECT;
>>> +        }
>>> +        ret = fcntl_setfl(raw_rs->reopen_fd, 
>>> raw_rs->reopen_state.reopen_flags);
>>>     
>>
>> I wonder if this works on Solaris, FreeBSD, etc?
>>
>> Perhaps there needs to be a fallback to the missing "else" case below...
>>
>>   
>
> ok. Will look into whether this will work on Solaris, FreeBSD etc..
>

This should work for all non-win Oses.
I have tested only in x86.

#ifndef _WIN32
/* Sets a specific flag */
int fcntl_setfl(int fd, int flag)
{
    int flags;

    flags = fcntl(fd, F_GETFL);

- thanks, Supriya
supriya kannery - Nov. 22, 2011, 11:30 a.m.
Stefan Hajnoczi wrote:
> On Tue, Nov 22, 2011 at 9:45 AM, supriya kannery <supriyak@in.ibm.com> wrote:
>   
>> supriya kannery wrote:
>>     
>>> Stefan Hajnoczi wrote:
>>>       
>>>> On Fri, Nov 11, 2011 at 6:48 AM, Supriya Kannery
>>>> <supriyak@linux.vnet.ibm.com> wrote:
>>>>
>>>>         
>>>>> +        }
>>>>> +        if ((flags & BDRV_O_NOCACHE)) {
>>>>> +            raw_rs->reopen_state.reopen_flags |= O_DIRECT;
>>>>> +        } else {
>>>>> +            raw_rs->reopen_state.reopen_flags &= ~O_DIRECT;
>>>>> +        }
>>>>> +        ret = fcntl_setfl(raw_rs->reopen_fd,
>>>>> raw_rs->reopen_state.reopen_flags);
>>>>>
>>>>>           
>>>> I wonder if this works on Solaris, FreeBSD, etc?
>>>>
>>>> Perhaps there needs to be a fallback to the missing "else" case below...
>>>>
>>>>
>>>>         
>>> ok. Will look into whether this will work on Solaris, FreeBSD etc..
>>>
>>>       
>> This should work for all non-win Oses.
>> I have tested only in x86.
>>
>> #ifndef _WIN32
>> /* Sets a specific flag */
>> int fcntl_setfl(int fd, int flag)
>> {
>>   int flags;
>>
>>   flags = fcntl(fd, F_GETFL);
>>     
>
> Are you sure POSIX guarantees that O_DIRECT can be changed with
> F_SETFL?  I didn't find any statement in the specification.  It is
> possible that this code compiles but does not actually work on
> non-Linux OSes.  Did you run tests?
>   
I don't have FreeBSD and Solaris systems to use.
Referred the following man page links to verify that O_DIRECT in these
OSes can be changed using fcntl.
http://fuse4bsd.creo.hu/localcgi/man-cgi.cgi?fcntl+2
http://man-wiki.net/index.php/2:fcntl
If anybody with these systems can confirm, that would be very helpful.

-thanks, Supriya


> Stefan
>
>
Stefan Hajnoczi - Nov. 22, 2011, 11:32 a.m.
On Tue, Nov 22, 2011 at 9:45 AM, supriya kannery <supriyak@in.ibm.com> wrote:
> supriya kannery wrote:
>>
>> Stefan Hajnoczi wrote:
>>>
>>> On Fri, Nov 11, 2011 at 6:48 AM, Supriya Kannery
>>> <supriyak@linux.vnet.ibm.com> wrote:
>>>
>>>>
>>>> +        }
>>>> +        if ((flags & BDRV_O_NOCACHE)) {
>>>> +            raw_rs->reopen_state.reopen_flags |= O_DIRECT;
>>>> +        } else {
>>>> +            raw_rs->reopen_state.reopen_flags &= ~O_DIRECT;
>>>> +        }
>>>> +        ret = fcntl_setfl(raw_rs->reopen_fd,
>>>> raw_rs->reopen_state.reopen_flags);
>>>>
>>>
>>> I wonder if this works on Solaris, FreeBSD, etc?
>>>
>>> Perhaps there needs to be a fallback to the missing "else" case below...
>>>
>>>
>>
>> ok. Will look into whether this will work on Solaris, FreeBSD etc..
>>
>
> This should work for all non-win Oses.
> I have tested only in x86.
>
> #ifndef _WIN32
> /* Sets a specific flag */
> int fcntl_setfl(int fd, int flag)
> {
>   int flags;
>
>   flags = fcntl(fd, F_GETFL);

Are you sure POSIX guarantees that O_DIRECT can be changed with
F_SETFL?  I didn't find any statement in the specification.  It is
possible that this code compiles but does not actually work on
non-Linux OSes.  Did you run tests?

Stefan
Christoph Hellwig - Nov. 22, 2011, 11:36 a.m.
On Tue, Nov 22, 2011 at 11:32:42AM +0000, Stefan Hajnoczi wrote:
> Are you sure POSIX guarantees that O_DIRECT can be changed with
> F_SETFL?  I didn't find any statement in the specification.  It is
> possible that this code compiles but does not actually work on
> non-Linux OSes.  Did you run tests?

O_DIRECT is a non-posix extention, and semantics of it will differ
greatly between the operating systems implementing it.
Stefan Hajnoczi - Nov. 22, 2011, 11:54 a.m.
On Tue, Nov 22, 2011 at 11:30 AM, supriya kannery <supriyak@in.ibm.com> wrote:
> Stefan Hajnoczi wrote:
>>
>> On Tue, Nov 22, 2011 at 9:45 AM, supriya kannery <supriyak@in.ibm.com>
>> wrote:
>>
>>>
>>> supriya kannery wrote:
>>>
>>>>
>>>> Stefan Hajnoczi wrote:
>>>>
>>>>>
>>>>> On Fri, Nov 11, 2011 at 6:48 AM, Supriya Kannery
>>>>> <supriyak@linux.vnet.ibm.com> wrote:
>>>>>
>>>>>
>>>>>>
>>>>>> +        }
>>>>>> +        if ((flags & BDRV_O_NOCACHE)) {
>>>>>> +            raw_rs->reopen_state.reopen_flags |= O_DIRECT;
>>>>>> +        } else {
>>>>>> +            raw_rs->reopen_state.reopen_flags &= ~O_DIRECT;
>>>>>> +        }
>>>>>> +        ret = fcntl_setfl(raw_rs->reopen_fd,
>>>>>> raw_rs->reopen_state.reopen_flags);
>>>>>>
>>>>>>
>>>>>
>>>>> I wonder if this works on Solaris, FreeBSD, etc?
>>>>>
>>>>> Perhaps there needs to be a fallback to the missing "else" case
>>>>> below...
>>>>>
>>>>>
>>>>>
>>>>
>>>> ok. Will look into whether this will work on Solaris, FreeBSD etc..
>>>>
>>>>
>>>
>>> This should work for all non-win Oses.
>>> I have tested only in x86.
>>>
>>> #ifndef _WIN32
>>> /* Sets a specific flag */
>>> int fcntl_setfl(int fd, int flag)
>>> {
>>>  int flags;
>>>
>>>  flags = fcntl(fd, F_GETFL);
>>>
>>
>> Are you sure POSIX guarantees that O_DIRECT can be changed with
>> F_SETFL?  I didn't find any statement in the specification.  It is
>> possible that this code compiles but does not actually work on
>> non-Linux OSes.  Did you run tests?
>>
>
> I don't have FreeBSD and Solaris systems to use.
> Referred the following man page links to verify that O_DIRECT in these
> OSes can be changed using fcntl.
> http://fuse4bsd.creo.hu/localcgi/man-cgi.cgi?fcntl+2
> http://man-wiki.net/index.php/2:fcntl
> If anybody with these systems can confirm, that would be very helpful.

Cool, thanks for sharing.  The safest would be to do a F_GETFL
afterwards and fall back to unsafe reopen if it didn't work.

Stefan

Patch

Index: qemu/block/raw.c
===================================================================
--- qemu.orig/block/raw.c
+++ qemu/block/raw.c
@@ -9,6 +9,23 @@  static int raw_open(BlockDriverState *bs
     return 0;
 }
 
+static int raw_reopen_prepare(BlockDriverState *bs, BDRVReopenState **prs,
+                              int flags)
+{
+    return bdrv_reopen_prepare(bs->file, prs, flags);
+}
+
+static void raw_reopen_commit(BlockDriverState *bs, BDRVReopenState *rs,
+                              int flags)
+{
+    bdrv_reopen_commit(bs->file, rs, flags);
+}
+
+static void raw_reopen_abort(BlockDriverState *bs, BDRVReopenState *rs)
+{
+    bdrv_reopen_abort(bs->file, rs);
+}
+
 static int coroutine_fn raw_co_readv(BlockDriverState *bs, int64_t sector_num,
                                      int nb_sectors, QEMUIOVector *qiov)
 {
@@ -107,10 +124,12 @@  static BlockDriver bdrv_raw = {
 
     /* It's really 0, but we need to make g_malloc() happy */
     .instance_size      = 1,
-
     .bdrv_open          = raw_open,
+    .bdrv_reopen_prepare
+                        = raw_reopen_prepare,
+    .bdrv_reopen_commit = raw_reopen_commit,
+    .bdrv_reopen_abort  = raw_reopen_abort,
     .bdrv_close         = raw_close,
-
     .bdrv_co_readv      = raw_co_readv,
     .bdrv_co_writev     = raw_co_writev,
     .bdrv_co_flush      = raw_co_flush,
Index: qemu/block/raw-posix.c
===================================================================
--- qemu.orig/block/raw-posix.c
+++ qemu/block/raw-posix.c
@@ -136,6 +136,11 @@  typedef struct BDRVRawState {
 #endif
 } BDRVRawState;
 
+typedef struct BDRVRawReopenState {
+    int reopen_fd;
+    BDRVReopenState reopen_state;
+} BDRVRawReopenState;
+
 static int fd_open(BlockDriverState *bs);
 static int64_t raw_getlength(BlockDriverState *bs);
 
@@ -279,6 +284,70 @@  static int raw_open(BlockDriverState *bs
     return raw_open_common(bs, filename, flags, 0);
 }
 
+static int raw_reopen_prepare(BlockDriverState *bs, BDRVReopenState **prs,
+                              int flags)
+{
+    BDRVRawReopenState *raw_rs = g_malloc0(sizeof(BDRVRawReopenState));
+    BDRVRawState *s = bs->opaque;
+    int ret = 0;
+
+    raw_rs->reopen_state.reopen_flags = s->open_flags;
+    raw_rs->reopen_state.bs = bs;
+    raw_rs->reopen_fd = -1;
+    *prs = &(raw_rs->reopen_state);
+
+    /* Flags that can be set using fcntl */
+    int fcntl_flags = BDRV_O_NOCACHE;
+
+    if ((bs->open_flags & ~fcntl_flags) == (flags & ~fcntl_flags)) {
+        raw_rs->reopen_fd = dup(s->fd);
+        if (raw_rs->reopen_fd <= 0) {
+            return -1;
+        }
+        if ((flags & BDRV_O_NOCACHE)) {
+            raw_rs->reopen_state.reopen_flags |= O_DIRECT;
+        } else {
+            raw_rs->reopen_state.reopen_flags &= ~O_DIRECT;
+        }
+        ret = fcntl_setfl(raw_rs->reopen_fd, raw_rs->reopen_state.reopen_flags);
+    } else {
+
+        /* TBD: Handle O_DSYNC and other flags. For now return error */
+        ret = -1;
+    }
+    return ret;
+}
+
+static void raw_reopen_commit(BlockDriverState *bs, BDRVReopenState *rs,
+                              int flags)
+{
+    BDRVRawReopenState *raw_rs;
+    BDRVRawState *s = bs->opaque;
+
+    raw_rs = container_of(rs, BDRVRawReopenState, reopen_state);
+
+    /* Set new flags, so replace old fd with new one */
+    close(s->fd);
+    s->fd = raw_rs->reopen_fd;
+    s->open_flags = rs->reopen_flags;
+    bs->open_flags = flags;
+
+    g_free(raw_rs);
+
+}
+
+static void raw_reopen_abort(BlockDriverState *bs, BDRVReopenState *rs)
+{
+    BDRVRawReopenState *raw_rs;
+
+    raw_rs = container_of(rs, BDRVRawReopenState, reopen_state);
+
+    if (raw_rs->reopen_fd != -1) {
+        close(raw_rs->reopen_fd);
+    }
+    g_free(raw_rs);
+}
+
 /* XXX: use host sector size if necessary with:
 #ifdef DIOCGSECTORSIZE
         {
@@ -631,6 +700,9 @@  static BlockDriver bdrv_file = {
     .instance_size = sizeof(BDRVRawState),
     .bdrv_probe = NULL, /* no probe for protocols */
     .bdrv_file_open = raw_open,
+    .bdrv_reopen_prepare = raw_reopen_prepare,
+    .bdrv_reopen_commit = raw_reopen_commit,
+    .bdrv_reopen_abort = raw_reopen_abort,
     .bdrv_close = raw_close,
     .bdrv_create = raw_create,
     .bdrv_co_discard = raw_co_discard,