Patchwork A different way to ask for readonly drive

login
register
mail settings
Submitter Naphtali Sprei
Date Dec. 14, 2009, 1:35 p.m.
Message ID <4B263F0B.90408@redhat.com>
Download mbox | patch
Permalink /patch/41106/
State New
Headers show

Comments

Naphtali Sprei - Dec. 14, 2009, 1:35 p.m.
Hi,
After feedback from Red Hat guys, I've decided to slightly modify the approach to drive's readonly.
The new approach also addresses the silent fall-back to open the drives' file as read-only when read-write fails
(permission denied) that causes unexpected behavior.
Instead of the 'readonly' boolean flag, another flag introduced (a replacement), 'read_write' with three values [on|off|try]:
read_write=on : open with read and write permission, no fall-back to read-only
read_write=off: open with read-only permission
read_write=try: open with read and write permission and if fails, fall-back to read-only (the default if nothing specified)

Suggestions for better naming for flag/values welcomed.

I've tried to explicitly pass the required flags for the bdrv_open function in callers, but probably missed some.

 Naphtali



Signed-off-by: Naphtali Sprei <nsprei@redhat.com>
---
 block.c       |   29 +++++++++++++++++------------
 block.h       |    7 +++++--
 hw/xen_disk.c |    3 ++-
 monitor.c     |    2 +-
 qemu-config.c |    4 ++--
 qemu-img.c    |   14 ++++++++------
 qemu-nbd.c    |    2 +-
 vl.c          |   42 +++++++++++++++++++++++++++++++++---------
 8 files changed, 69 insertions(+), 34 deletions(-)
Stefan Weil - Dec. 14, 2009, 3:53 p.m.
Naphtali Sprei schrieb:
> Hi,
> After feedback from Red Hat guys, I've decided to slightly modify the approach to drive's readonly.
> The new approach also addresses the silent fall-back to open the drives' file as read-only when read-write fails
> (permission denied) that causes unexpected behavior.
> Instead of the 'readonly' boolean flag, another flag introduced (a replacement), 'read_write' with three values [on|off|try]:
> read_write=on : open with read and write permission, no fall-back to read-only
> read_write=off: open with read-only permission
> read_write=try: open with read and write permission and if fails, fall-back to read-only (the default if nothing specified)
>
> Suggestions for better naming for flag/values welcomed.
>
> I've tried to explicitly pass the required flags for the bdrv_open function in callers, but probably missed some.
>
>  Naphtali
>
> ...
>   
Instead of on/off, I'd prefer the common shortcuts rw/ro.
"try" is ok, but maybe "rw-ro" is better.

So here are my suggestions:

read_write=rw
read_write=ro
read_write=rw-ro

or

access=rw
access=ro
access=rw-ro

Regards,
Stefan
Kevin Wolf - Dec. 15, 2009, 5:45 p.m.
Am 14.12.2009 14:35, schrieb Naphtali Sprei:
> Hi,
> After feedback from Red Hat guys, I've decided to slightly modify the approach to drive's readonly.
> The new approach also addresses the silent fall-back to open the drives' file as read-only when read-write fails
> (permission denied) that causes unexpected behavior.
> Instead of the 'readonly' boolean flag, another flag introduced (a replacement), 'read_write' with three values [on|off|try]:
> read_write=on : open with read and write permission, no fall-back to read-only
> read_write=off: open with read-only permission
> read_write=try: open with read and write permission and if fails, fall-back to read-only (the default if nothing specified)
> 
> Suggestions for better naming for flag/values welcomed.
> 
> I've tried to explicitly pass the required flags for the bdrv_open function in callers, but probably missed some.
> 
>  Naphtali
> 
> 
> 
> Signed-off-by: Naphtali Sprei <nsprei@redhat.com>
> ---
>  block.c       |   29 +++++++++++++++++------------
>  block.h       |    7 +++++--
>  hw/xen_disk.c |    3 ++-
>  monitor.c     |    2 +-
>  qemu-config.c |    4 ++--
>  qemu-img.c    |   14 ++++++++------
>  qemu-nbd.c    |    2 +-
>  vl.c          |   42 +++++++++++++++++++++++++++++++++---------
>  8 files changed, 69 insertions(+), 34 deletions(-)
> 
> diff --git a/block.c b/block.c
> index 3f3496e..75788ca 100644
> --- a/block.c
> +++ b/block.c
> @@ -356,7 +356,7 @@ int bdrv_open(BlockDriverState *bs, const char *filename, int flags)
>  int bdrv_open2(BlockDriverState *bs, const char *filename, int flags,
>                 BlockDriver *drv)
>  {
> -    int ret, open_flags, try_rw;
> +    int ret, open_flags;
>      char tmp_filename[PATH_MAX];
>      char backing_filename[PATH_MAX];
>  
> @@ -378,7 +378,7 @@ int bdrv_open2(BlockDriverState *bs, const char *filename, int flags,
>  
>          /* if there is a backing file, use it */
>          bs1 = bdrv_new("");
> -        ret = bdrv_open2(bs1, filename, 0, drv);
> +        ret = bdrv_open2(bs1, filename, BDRV_O_RDONLY, drv);
>          if (ret < 0) {
>              bdrv_delete(bs1);
>              return ret;
> @@ -445,19 +445,22 @@ int bdrv_open2(BlockDriverState *bs, const char *filename, int flags,
>          bs->enable_write_cache = 1;
>  
>      /* Note: for compatibility, we open disk image files as RDWR, and
> -       RDONLY as fallback */
> -    try_rw = !bs->read_only || bs->is_temporary;
> -    if (!(flags & BDRV_O_FILE))
> -        open_flags = (try_rw ? BDRV_O_RDWR : 0) |
> -            (flags & (BDRV_O_CACHE_MASK|BDRV_O_NATIVE_AIO));
> -    else
> +       RDONLY as fallback (if flag BDRV_O_RO_FBCK is on) */
> +    bs->read_only = BDRV_FLAGS_IS_RO(flags);
> +    if (!(flags & BDRV_O_FILE)) {
> +        open_flags = (flags & (BDRV_O_ACCESS | BDRV_O_CACHE_MASK|BDRV_O_NATIVE_AIO));
> +        if (bs->is_temporary) { /* snapshot */
> +            open_flags |= BDRV_O_RDWR;

open_flags = (open_flags & ~BDRV_O_ACCESS) | BDRV_O_RDWR;

Yes, I know, RDONLY is 0 anyway, but still... Or move the first
open_flags line into the if and have a second one for is_temporary that
doesn't copy BDRV_O_ACCESS.

> +        }
> +    } else
>          open_flags = flags & ~(BDRV_O_FILE | BDRV_O_SNAPSHOT);
>      if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv))
>          ret = -ENOTSUP;
>      else
>          ret = drv->bdrv_open(bs, filename, open_flags);
> -    if ((ret == -EACCES || ret == -EPERM) && !(flags & BDRV_O_FILE)) {
> -        ret = drv->bdrv_open(bs, filename, open_flags & ~BDRV_O_RDWR);
> +    if ((ret == -EACCES || ret == -EPERM) && !(flags & BDRV_O_FILE) &&
> +        (flags & BDRV_O_RO_FBCK)) {
> +        ret = drv->bdrv_open(bs, filename, (open_flags & ~BDRV_O_RDWR) | BDRV_O_RDONLY);

Mask BDRV_O_ACCESS out, not only BDRV_O_RDWR.

>          bs->read_only = 1;
>      }
>      if (ret < 0) {
> @@ -481,12 +484,14 @@ int bdrv_open2(BlockDriverState *bs, const char *filename, int flags,
>          /* if there is a backing file, use it */
>          BlockDriver *back_drv = NULL;
>          bs->backing_hd = bdrv_new("");
> -        /* pass on read_only property to the backing_hd */
> -        bs->backing_hd->read_only = bs->read_only;
>          path_combine(backing_filename, sizeof(backing_filename),
>                       filename, bs->backing_file);
>          if (bs->backing_format[0] != '\0')
>              back_drv = bdrv_find_format(bs->backing_format);
> +        /* pass on ro flags to the backing_hd */
> +        bs->backing_hd->read_only =  BDRV_FLAGS_IS_RO(flags);
> +        open_flags &= ~BDRV_O_ACCESS;
> +        open_flags |= (flags & BDRV_O_ACCESS);
>          ret = bdrv_open2(bs->backing_hd, backing_filename, open_flags,
>                           back_drv);
>          if (ret < 0) {
> diff --git a/block.h b/block.h
> index fa51ddf..b32c6a4 100644
> --- a/block.h
> +++ b/block.h
> @@ -28,8 +28,9 @@ typedef struct QEMUSnapshotInfo {
>  } QEMUSnapshotInfo;
>  
>  #define BDRV_O_RDONLY      0x0000
> -#define BDRV_O_RDWR        0x0002
> -#define BDRV_O_ACCESS      0x0003
> +#define BDRV_O_RDWR        0x0001
> +#define BDRV_O_ACCESS      0x0001

Is this needed? I can't see why we would need more than a flag that says
if we are read-write or not, but if you were to remove the old two-bit
field, you should do the complete cleanup. There is not reason for
BDRV_O_ACCESS to exist then any more.

In any case I don't think that saving the wasted bit belong into this
patch, so better separate it out.

> +#define BDRV_O_RO_FBCK     0x0002

Come on, we can afford the four additional letters to make it
BDRV_O_RO_FALLBACK and suddenly it becomes readable.

>  #define BDRV_O_CREAT       0x0004 /* create an empty file */
>  #define BDRV_O_SNAPSHOT    0x0008 /* open the file read only and save writes in a snapshot */
>  #define BDRV_O_FILE        0x0010 /* open as a raw file (do not try to
> @@ -41,6 +42,8 @@ typedef struct QEMUSnapshotInfo {
>  #define BDRV_O_NATIVE_AIO  0x0080 /* use native AIO instead of the thread pool */
>  
>  #define BDRV_O_CACHE_MASK  (BDRV_O_NOCACHE | BDRV_O_CACHE_WB)
> +#define BDRV_O_DFLT_OPEN   (BDRV_O_RDWR | BDRV_O_RO_FBCK)

Same here, what's wrong with spelling DEFAULT out?

> +#define BDRV_FLAGS_IS_RO(flags) ((flags & BDRV_O_ACCESS) == BDRV_O_RDONLY)
>  
>  #define BDRV_SECTOR_BITS   9
>  #define BDRV_SECTOR_SIZE   (1 << BDRV_SECTOR_BITS)
> diff --git a/hw/xen_disk.c b/hw/xen_disk.c
> index 5c55251..13688db 100644
> --- a/hw/xen_disk.c
> +++ b/hw/xen_disk.c
> @@ -610,7 +610,8 @@ static int blk_init(struct XenDevice *xendev)
>      /* read-only ? */
>      if (strcmp(blkdev->mode, "w") == 0) {
>  	mode   = O_RDWR;
> -	qflags = BDRV_O_RDWR;
> +        /* for compatibility, also allow readonly fallback, for now */
> +	qflags = BDRV_O_RDWR | BDRV_O_RO_FBCK;
>      } else {
>  	mode   = O_RDONLY;
>  	qflags = BDRV_O_RDONLY;
> diff --git a/monitor.c b/monitor.c
> index d97d529..440e17e 100644
> --- a/monitor.c
> +++ b/monitor.c
> @@ -910,7 +910,7 @@ static void do_change_block(Monitor *mon, const char *device,
>      }
>      if (eject_device(mon, bs, 0) < 0)
>          return;
> -    bdrv_open2(bs, filename, 0, drv);
> +    bdrv_open2(bs, filename, BDRV_O_DFLT_OPEN, drv);
>      monitor_read_bdrv_key_start(mon, bs, NULL, NULL);
>  }
>  
> diff --git a/qemu-config.c b/qemu-config.c
> index c3203c8..b559459 100644
> --- a/qemu-config.c
> +++ b/qemu-config.c
> @@ -76,8 +76,8 @@ QemuOptsList qemu_drive_opts = {
>              .type = QEMU_OPT_STRING,
>              .help = "pci address (virtio only)",
>          },{
> -            .name = "readonly",
> -            .type = QEMU_OPT_BOOL,
> +            .name = "read_write",
> +            .type = QEMU_OPT_STRING,
>          },
>          { /* end if list */ }
>      },
> diff --git a/qemu-img.c b/qemu-img.c
> index 1d97f2e..dee3e60 100644
> --- a/qemu-img.c
> +++ b/qemu-img.c
> @@ -201,7 +201,7 @@ static BlockDriverState *bdrv_new_open(const char *filename,
>      } else {
>          drv = NULL;
>      }
> -    if (bdrv_open2(bs, filename, BRDV_O_FLAGS, drv) < 0) {
> +    if (bdrv_open2(bs, filename, BRDV_O_FLAGS | BDRV_O_DFLT_OPEN, drv) < 0) {
>          error("Could not open '%s'", filename);
>      }
>      if (bdrv_is_encrypted(bs)) {
> @@ -406,7 +406,7 @@ static int img_check(int argc, char **argv)
>      } else {
>          drv = NULL;
>      }
> -    if (bdrv_open2(bs, filename, BRDV_O_FLAGS, drv) < 0) {
> +    if (bdrv_open2(bs, filename, BRDV_O_FLAGS | BDRV_O_RDONLY, drv) < 0) {
>          error("Could not open '%s'", filename);
>      }
>      ret = bdrv_check(bs);
> @@ -465,7 +465,7 @@ static int img_commit(int argc, char **argv)
>      } else {
>          drv = NULL;
>      }
> -    if (bdrv_open2(bs, filename, BRDV_O_FLAGS, drv) < 0) {
> +    if (bdrv_open2(bs, filename, BRDV_O_FLAGS | BDRV_O_RDWR, drv) < 0) {
>          error("Could not open '%s'", filename);
>      }
>      ret = bdrv_commit(bs);
> @@ -884,7 +884,7 @@ static int img_info(int argc, char **argv)
>      } else {
>          drv = NULL;
>      }
> -    if (bdrv_open2(bs, filename, BRDV_O_FLAGS, drv) < 0) {
> +    if (bdrv_open2(bs, filename, BRDV_O_FLAGS | BDRV_O_RDONLY, drv) < 0) {
>          error("Could not open '%s'", filename);
>      }
>      bdrv_get_format(bs, fmt_name, sizeof(fmt_name));
> @@ -932,10 +932,11 @@ static int img_snapshot(int argc, char **argv)
>      BlockDriverState *bs;
>      QEMUSnapshotInfo sn;
>      char *filename, *snapshot_name = NULL;
> -    int c, ret;
> +    int c, ret, bdrv_oflags;
>      int action = 0;
>      qemu_timeval tv;
>  
> +    bdrv_oflags = BDRV_O_RDWR; /* but no read-only fallback */
>      /* Parse commandline parameters */
>      for(;;) {
>          c = getopt(argc, argv, "la:c:d:h");
> @@ -951,6 +952,7 @@ static int img_snapshot(int argc, char **argv)
>                  return 0;
>              }
>              action = SNAPSHOT_LIST;
> +            bdrv_oflags = BDRV_O_RDONLY; /* no need for RW */
>              break;
>          case 'a':
>              if (action) {
> @@ -988,7 +990,7 @@ static int img_snapshot(int argc, char **argv)
>      if (!bs)
>          error("Not enough memory");
>  
> -    if (bdrv_open2(bs, filename, 0, NULL) < 0) {
> +    if (bdrv_open2(bs, filename, bdrv_oflags, NULL) < 0) {

Not related to your patch, but I think we want to have BRDV_O_FLAGS
here, too. And we need to get rid of that typo some time. Well, I guess
something for my own to-do list.

>          error("Could not open '%s'", filename);
>      }
>  
> diff --git a/qemu-nbd.c b/qemu-nbd.c
> index 6cdb834..64f4c68 100644
> --- a/qemu-nbd.c
> +++ b/qemu-nbd.c
> @@ -212,7 +212,7 @@ int main(int argc, char **argv)
>      int opt_ind = 0;
>      int li;
>      char *end;
> -    int flags = 0;
> +    int flags = BDRV_O_DFLT_OPEN;
>      int partition = -1;
>      int ret;
>      int shared = 1;
> diff --git a/vl.c b/vl.c
> index c0d98f5..cef2d67 100644
> --- a/vl.c
> +++ b/vl.c
> @@ -2090,6 +2090,7 @@ DriveInfo *drive_init(QemuOpts *opts, void *opaque,
>                        int *fatal_error)
>  {
>      const char *buf;
> +    const char *rw_buf = 0;
>      const char *file = NULL;
>      char devname[128];
>      const char *serial;
> @@ -2104,12 +2105,12 @@ DriveInfo *drive_init(QemuOpts *opts, void *opaque,
>      int index;
>      int cache;
>      int aio = 0;
> -    int ro = 0;
>      int bdrv_flags;
>      int on_read_error, on_write_error;
>      const char *devaddr;
>      DriveInfo *dinfo;
>      int snapshot = 0;
> +    int read_write, ro_fallback;
>  
>      *fatal_error = 1;
>  
> @@ -2137,7 +2138,7 @@ DriveInfo *drive_init(QemuOpts *opts, void *opaque,
>      secs  = qemu_opt_get_number(opts, "secs", 0);
>  
>      snapshot = qemu_opt_get_bool(opts, "snapshot", 0);
> -    ro = qemu_opt_get_bool(opts, "readonly", 0);
> +    rw_buf = qemu_opt_get(opts, "read_write");
>  
>      file = qemu_opt_get(opts, "file");
>      serial = qemu_opt_get(opts, "serial");
> @@ -2420,6 +2421,29 @@ DriveInfo *drive_init(QemuOpts *opts, void *opaque,
>          *fatal_error = 0;
>          return NULL;
>      }
> +
> +    read_write = 1;
> +    ro_fallback = 1;
> +    if (rw_buf) {
> +        if (!strcmp(rw_buf, "off")) {
> +            read_write = 0;
> +            ro_fallback = 0;
> +            if (type != IF_SCSI && type != IF_VIRTIO && type != IF_FLOPPY) {
> +                fprintf(stderr, "qemu: read_write=off flag not supported for drive of this interface\n");
> +                return NULL;
> +            }
> +        } else if (!strcmp(rw_buf, "on")) {
> +            read_write = 1;
> +            ro_fallback = 0;
> +        } else if (!strcmp(rw_buf, "try")) { /* default, but keep it explicit */
> +            read_write = 1;
> +            ro_fallback = 1;

We probably should have the check or SCSI/virtio/floppy here, too. If
the user explicitly requests that we should try read-only and it's not
available with the device I think that's a reason to exit with an error.

> +        } else {
> +            fprintf(stderr, "qemu: '%s' invalid read_write option (valid values: [on|off|try] )\n", buf);
> +            return NULL;
> +        }
> +    }
> +    
>      bdrv_flags = 0;
>      if (snapshot) {
>          bdrv_flags |= BDRV_O_SNAPSHOT;
> @@ -2436,16 +2460,16 @@ DriveInfo *drive_init(QemuOpts *opts, void *opaque,
>          bdrv_flags &= ~BDRV_O_NATIVE_AIO;
>      }
>  
> -    if (ro == 1) {
> -        if (type == IF_IDE) {
> -            fprintf(stderr, "qemu: readonly flag not supported for drive with ide interface\n");
> -            return NULL;
> -        }
> -        (void)bdrv_set_read_only(dinfo->bdrv, 1);
> +    if (read_write) {
> +        bdrv_flags |= BDRV_O_RDWR;
> +    }

If BDRV_O_ACCESS continues to exist, BDRV_O_RDWR is not a flag but a
value for that field. To make things clearer, I'd prefer something like
this then:

/* Set BDRV_O_ACCESS value */
bdrv_flags |= read_write ? BDRV_O_RDWR : BDRV_O_RDONLY;

> +    if (ro_fallback) {
> +        bdrv_flags |= BDRV_O_RO_FBCK;
>      }
> +  
>  
>      if (bdrv_open2(dinfo->bdrv, file, bdrv_flags, drv) < 0) {
> -        fprintf(stderr, "qemu: could not open disk image %s: %s\n",
> +        fprintf(stderr, "qemu: could not open disk image %s with requested permission: %s\n",
>                          file, strerror(errno));
>          return NULL;
>      }

Kevin
Jamie Lokier - Dec. 15, 2009, 6:45 p.m.
Stefan Weil wrote:
> So here are my suggestions:
> 
> read_write=rw
> read_write=ro
> read_write=rw-ro
> 
> or
> 
> access=rw
> access=ro
> access=rw-ro

Mine:

access=rw
access=ro
access=auto  (default)

-- Jamie
Stefan Weil - Dec. 15, 2009, 10:09 p.m.
Jamie Lokier schrieb:
> Stefan Weil wrote:
>   
>> So here are my suggestions:
>>
>> read_write=rw
>> read_write=ro
>> read_write=rw-ro
>>
>> or
>>
>> access=rw
>> access=ro
>> access=rw-ro
>>     
>
> Mine:
>
> access=rw
> access=ro
> access=auto  (default)
>
> -- Jamie

Looks good. Best suggestion up to now (in my personal opinion).

-- Stefan
Christoph Hellwig - Dec. 17, 2009, 10:50 a.m.
On Tue, Dec 15, 2009 at 06:45:01PM +0000, Jamie Lokier wrote:
> access=rw
> access=ro
> access=auto  (default)

Yes, that sounds like the least clumsy one.  I still think the current
implementation is a very bad default, though.
Richard W.M. Jones - Dec. 17, 2009, 11:31 a.m.
On Mon, Dec 14, 2009 at 03:35:07PM +0200, Naphtali Sprei wrote:
>  block.c       |   29 +++++++++++++++++------------
>  block.h       |    7 +++++--
>  hw/xen_disk.c |    3 ++-
>  monitor.c     |    2 +-
>  qemu-config.c |    4 ++--
>  qemu-img.c    |   14 ++++++++------
>  qemu-nbd.c    |    2 +-
>  vl.c          |   42 +++++++++++++++++++++++++++++++++---------

Please update qemu-options.hx, the -drive documentation.

As well as helping users, this also allows us (libvirt, libguestfs) to
detect whether the particular version of qemu we are using offers this
feature.

Rich.
Jamie Lokier - Dec. 17, 2009, 1:16 p.m.
Christoph Hellwig wrote:
> On Tue, Dec 15, 2009 at 06:45:01PM +0000, Jamie Lokier wrote:
> > access=rw
> > access=ro
> > access=auto  (default)
> 
> Yes, that sounds like the least clumsy one.  I still think the current
> implementation is a very bad default, though.

Without agreeing or disagreeing over whether it's a bad default :), a
usability problem occurs with the current implementation when you
deliberately "chmod 444" an image to have high confidence that it's
opened read only: When running as root, file permissions are ignored
(except sometimes on NFS).

For that reason I use "chattr +i" on all my read-only image files, to
really make sure that no qemu invocation mistake could accidentally
corrupt valuable images.  That works, but it's not very convenient.

If the "auto" method is kept, I think it would be an improvement if it
checks the file permission itself, and does not even try to open a
file O_RDWR if there are no writable permission bits - so that "chmod
444" has the same "open as read only" effect when qemu is invoked as root.

-- Jamie
Kevin Wolf - Dec. 17, 2009, 2:23 p.m.
Am 17.12.2009 14:16, schrieb Jamie Lokier:
> Christoph Hellwig wrote:
>> On Tue, Dec 15, 2009 at 06:45:01PM +0000, Jamie Lokier wrote:
>>> access=rw
>>> access=ro
>>> access=auto  (default)
>>
>> Yes, that sounds like the least clumsy one.  I still think the current
>> implementation is a very bad default, though.
> 
> Without agreeing or disagreeing over whether it's a bad default :), a
> usability problem occurs with the current implementation when you
> deliberately "chmod 444" an image to have high confidence that it's
> opened read only: When running as root, file permissions are ignored
> (except sometimes on NFS).
> 
> For that reason I use "chattr +i" on all my read-only image files, to
> really make sure that no qemu invocation mistake could accidentally
> corrupt valuable images.  That works, but it's not very convenient.
> 
> If the "auto" method is kept, I think it would be an improvement if it
> checks the file permission itself, and does not even try to open a
> file O_RDWR if there are no writable permission bits - so that "chmod
> 444" has the same "open as read only" effect when qemu is invoked as root.

I don't think that this makes sense. That you can write the file as root
is a feature of your OS and qemu has nothing to do with it. Doing
anything else than accessing it would actually be unexpected behaviour
on this OS. We're just writing an application, not a better OS.

You can decide to protect your images with the qemu readonly option and
get the protection that qemu defines, or you take the permissions of the
OS and get from the OS whatever the definition of that protection is
(including write access for root). qemu can't and shouldn't know that
you use the OS's protection but actually don't quite mean what it's
defined to be.

Kevin
Markus Armbruster - Dec. 17, 2009, 2:31 p.m.
Jamie Lokier <jamie@shareable.org> writes:

> Christoph Hellwig wrote:
>> On Tue, Dec 15, 2009 at 06:45:01PM +0000, Jamie Lokier wrote:
>> > access=rw
>> > access=ro
>> > access=auto  (default)
>> 
>> Yes, that sounds like the least clumsy one.  I still think the current
>> implementation is a very bad default, though.
>
> Without agreeing or disagreeing over whether it's a bad default :), a
> usability problem occurs with the current implementation when you
> deliberately "chmod 444" an image to have high confidence that it's
> opened read only: When running as root, file permissions are ignored
> (except sometimes on NFS).
>
> For that reason I use "chattr +i" on all my read-only image files, to
> really make sure that no qemu invocation mistake could accidentally
> corrupt valuable images.  That works, but it's not very convenient.
>
> If the "auto" method is kept, I think it would be an improvement if it
> checks the file permission itself, and does not even try to open a
> file O_RDWR if there are no writable permission bits - so that "chmod
> 444" has the same "open as read only" effect when qemu is invoked as root.

"Improving" on the kernel's permission checking easily ends in tears.

And if we change the the current "auto" behavior in incompatible ways
anyway, then my preferred change would be to just drop it.
Jamie Lokier - Dec. 17, 2009, 3:30 p.m.
Kevin Wolf wrote:
> Am 17.12.2009 14:16, schrieb Jamie Lokier:
> You can decide to protect your images with the qemu readonly option and
> get the protection that qemu defines, or you take the permissions of the
> OS and get from the OS whatever the definition of that protection is
> (including write access for root).

Note that until the latest patch, "chmod 444" was the _the_ user
interface to this feature of qemu.  It's a bad interface, but it was
the only one available.  qemu is weird like that, having external
file permissions control an internal behaviour switch.

> qemu can't and shouldn't know that you use the OS's protection but
> actually don't quite mean what it's defined to be.

Then I concur with Christopher Hellwig, and we should drop the "auto"
behaviour entirely, and force the user interface to be the qemu
command line instead of "chmod" from now one.

-- Jamie

Patch

diff --git a/block.c b/block.c
index 3f3496e..75788ca 100644
--- a/block.c
+++ b/block.c
@@ -356,7 +356,7 @@  int bdrv_open(BlockDriverState *bs, const char *filename, int flags)
 int bdrv_open2(BlockDriverState *bs, const char *filename, int flags,
                BlockDriver *drv)
 {
-    int ret, open_flags, try_rw;
+    int ret, open_flags;
     char tmp_filename[PATH_MAX];
     char backing_filename[PATH_MAX];
 
@@ -378,7 +378,7 @@  int bdrv_open2(BlockDriverState *bs, const char *filename, int flags,
 
         /* if there is a backing file, use it */
         bs1 = bdrv_new("");
-        ret = bdrv_open2(bs1, filename, 0, drv);
+        ret = bdrv_open2(bs1, filename, BDRV_O_RDONLY, drv);
         if (ret < 0) {
             bdrv_delete(bs1);
             return ret;
@@ -445,19 +445,22 @@  int bdrv_open2(BlockDriverState *bs, const char *filename, int flags,
         bs->enable_write_cache = 1;
 
     /* Note: for compatibility, we open disk image files as RDWR, and
-       RDONLY as fallback */
-    try_rw = !bs->read_only || bs->is_temporary;
-    if (!(flags & BDRV_O_FILE))
-        open_flags = (try_rw ? BDRV_O_RDWR : 0) |
-            (flags & (BDRV_O_CACHE_MASK|BDRV_O_NATIVE_AIO));
-    else
+       RDONLY as fallback (if flag BDRV_O_RO_FBCK is on) */
+    bs->read_only = BDRV_FLAGS_IS_RO(flags);
+    if (!(flags & BDRV_O_FILE)) {
+        open_flags = (flags & (BDRV_O_ACCESS | BDRV_O_CACHE_MASK|BDRV_O_NATIVE_AIO));
+        if (bs->is_temporary) { /* snapshot */
+            open_flags |= BDRV_O_RDWR;
+        }
+    } else
         open_flags = flags & ~(BDRV_O_FILE | BDRV_O_SNAPSHOT);
     if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv))
         ret = -ENOTSUP;
     else
         ret = drv->bdrv_open(bs, filename, open_flags);
-    if ((ret == -EACCES || ret == -EPERM) && !(flags & BDRV_O_FILE)) {
-        ret = drv->bdrv_open(bs, filename, open_flags & ~BDRV_O_RDWR);
+    if ((ret == -EACCES || ret == -EPERM) && !(flags & BDRV_O_FILE) &&
+        (flags & BDRV_O_RO_FBCK)) {
+        ret = drv->bdrv_open(bs, filename, (open_flags & ~BDRV_O_RDWR) | BDRV_O_RDONLY);
         bs->read_only = 1;
     }
     if (ret < 0) {
@@ -481,12 +484,14 @@  int bdrv_open2(BlockDriverState *bs, const char *filename, int flags,
         /* if there is a backing file, use it */
         BlockDriver *back_drv = NULL;
         bs->backing_hd = bdrv_new("");
-        /* pass on read_only property to the backing_hd */
-        bs->backing_hd->read_only = bs->read_only;
         path_combine(backing_filename, sizeof(backing_filename),
                      filename, bs->backing_file);
         if (bs->backing_format[0] != '\0')
             back_drv = bdrv_find_format(bs->backing_format);
+        /* pass on ro flags to the backing_hd */
+        bs->backing_hd->read_only =  BDRV_FLAGS_IS_RO(flags);
+        open_flags &= ~BDRV_O_ACCESS;
+        open_flags |= (flags & BDRV_O_ACCESS);
         ret = bdrv_open2(bs->backing_hd, backing_filename, open_flags,
                          back_drv);
         if (ret < 0) {
diff --git a/block.h b/block.h
index fa51ddf..b32c6a4 100644
--- a/block.h
+++ b/block.h
@@ -28,8 +28,9 @@  typedef struct QEMUSnapshotInfo {
 } QEMUSnapshotInfo;
 
 #define BDRV_O_RDONLY      0x0000
-#define BDRV_O_RDWR        0x0002
-#define BDRV_O_ACCESS      0x0003
+#define BDRV_O_RDWR        0x0001
+#define BDRV_O_ACCESS      0x0001
+#define BDRV_O_RO_FBCK     0x0002
 #define BDRV_O_CREAT       0x0004 /* create an empty file */
 #define BDRV_O_SNAPSHOT    0x0008 /* open the file read only and save writes in a snapshot */
 #define BDRV_O_FILE        0x0010 /* open as a raw file (do not try to
@@ -41,6 +42,8 @@  typedef struct QEMUSnapshotInfo {
 #define BDRV_O_NATIVE_AIO  0x0080 /* use native AIO instead of the thread pool */
 
 #define BDRV_O_CACHE_MASK  (BDRV_O_NOCACHE | BDRV_O_CACHE_WB)
+#define BDRV_O_DFLT_OPEN   (BDRV_O_RDWR | BDRV_O_RO_FBCK)
+#define BDRV_FLAGS_IS_RO(flags) ((flags & BDRV_O_ACCESS) == BDRV_O_RDONLY)
 
 #define BDRV_SECTOR_BITS   9
 #define BDRV_SECTOR_SIZE   (1 << BDRV_SECTOR_BITS)
diff --git a/hw/xen_disk.c b/hw/xen_disk.c
index 5c55251..13688db 100644
--- a/hw/xen_disk.c
+++ b/hw/xen_disk.c
@@ -610,7 +610,8 @@  static int blk_init(struct XenDevice *xendev)
     /* read-only ? */
     if (strcmp(blkdev->mode, "w") == 0) {
 	mode   = O_RDWR;
-	qflags = BDRV_O_RDWR;
+        /* for compatibility, also allow readonly fallback, for now */
+	qflags = BDRV_O_RDWR | BDRV_O_RO_FBCK;
     } else {
 	mode   = O_RDONLY;
 	qflags = BDRV_O_RDONLY;
diff --git a/monitor.c b/monitor.c
index d97d529..440e17e 100644
--- a/monitor.c
+++ b/monitor.c
@@ -910,7 +910,7 @@  static void do_change_block(Monitor *mon, const char *device,
     }
     if (eject_device(mon, bs, 0) < 0)
         return;
-    bdrv_open2(bs, filename, 0, drv);
+    bdrv_open2(bs, filename, BDRV_O_DFLT_OPEN, drv);
     monitor_read_bdrv_key_start(mon, bs, NULL, NULL);
 }
 
diff --git a/qemu-config.c b/qemu-config.c
index c3203c8..b559459 100644
--- a/qemu-config.c
+++ b/qemu-config.c
@@ -76,8 +76,8 @@  QemuOptsList qemu_drive_opts = {
             .type = QEMU_OPT_STRING,
             .help = "pci address (virtio only)",
         },{
-            .name = "readonly",
-            .type = QEMU_OPT_BOOL,
+            .name = "read_write",
+            .type = QEMU_OPT_STRING,
         },
         { /* end if list */ }
     },
diff --git a/qemu-img.c b/qemu-img.c
index 1d97f2e..dee3e60 100644
--- a/qemu-img.c
+++ b/qemu-img.c
@@ -201,7 +201,7 @@  static BlockDriverState *bdrv_new_open(const char *filename,
     } else {
         drv = NULL;
     }
-    if (bdrv_open2(bs, filename, BRDV_O_FLAGS, drv) < 0) {
+    if (bdrv_open2(bs, filename, BRDV_O_FLAGS | BDRV_O_DFLT_OPEN, drv) < 0) {
         error("Could not open '%s'", filename);
     }
     if (bdrv_is_encrypted(bs)) {
@@ -406,7 +406,7 @@  static int img_check(int argc, char **argv)
     } else {
         drv = NULL;
     }
-    if (bdrv_open2(bs, filename, BRDV_O_FLAGS, drv) < 0) {
+    if (bdrv_open2(bs, filename, BRDV_O_FLAGS | BDRV_O_RDONLY, drv) < 0) {
         error("Could not open '%s'", filename);
     }
     ret = bdrv_check(bs);
@@ -465,7 +465,7 @@  static int img_commit(int argc, char **argv)
     } else {
         drv = NULL;
     }
-    if (bdrv_open2(bs, filename, BRDV_O_FLAGS, drv) < 0) {
+    if (bdrv_open2(bs, filename, BRDV_O_FLAGS | BDRV_O_RDWR, drv) < 0) {
         error("Could not open '%s'", filename);
     }
     ret = bdrv_commit(bs);
@@ -884,7 +884,7 @@  static int img_info(int argc, char **argv)
     } else {
         drv = NULL;
     }
-    if (bdrv_open2(bs, filename, BRDV_O_FLAGS, drv) < 0) {
+    if (bdrv_open2(bs, filename, BRDV_O_FLAGS | BDRV_O_RDONLY, drv) < 0) {
         error("Could not open '%s'", filename);
     }
     bdrv_get_format(bs, fmt_name, sizeof(fmt_name));
@@ -932,10 +932,11 @@  static int img_snapshot(int argc, char **argv)
     BlockDriverState *bs;
     QEMUSnapshotInfo sn;
     char *filename, *snapshot_name = NULL;
-    int c, ret;
+    int c, ret, bdrv_oflags;
     int action = 0;
     qemu_timeval tv;
 
+    bdrv_oflags = BDRV_O_RDWR; /* but no read-only fallback */
     /* Parse commandline parameters */
     for(;;) {
         c = getopt(argc, argv, "la:c:d:h");
@@ -951,6 +952,7 @@  static int img_snapshot(int argc, char **argv)
                 return 0;
             }
             action = SNAPSHOT_LIST;
+            bdrv_oflags = BDRV_O_RDONLY; /* no need for RW */
             break;
         case 'a':
             if (action) {
@@ -988,7 +990,7 @@  static int img_snapshot(int argc, char **argv)
     if (!bs)
         error("Not enough memory");
 
-    if (bdrv_open2(bs, filename, 0, NULL) < 0) {
+    if (bdrv_open2(bs, filename, bdrv_oflags, NULL) < 0) {
         error("Could not open '%s'", filename);
     }
 
diff --git a/qemu-nbd.c b/qemu-nbd.c
index 6cdb834..64f4c68 100644
--- a/qemu-nbd.c
+++ b/qemu-nbd.c
@@ -212,7 +212,7 @@  int main(int argc, char **argv)
     int opt_ind = 0;
     int li;
     char *end;
-    int flags = 0;
+    int flags = BDRV_O_DFLT_OPEN;
     int partition = -1;
     int ret;
     int shared = 1;
diff --git a/vl.c b/vl.c
index c0d98f5..cef2d67 100644
--- a/vl.c
+++ b/vl.c
@@ -2090,6 +2090,7 @@  DriveInfo *drive_init(QemuOpts *opts, void *opaque,
                       int *fatal_error)
 {
     const char *buf;
+    const char *rw_buf = 0;
     const char *file = NULL;
     char devname[128];
     const char *serial;
@@ -2104,12 +2105,12 @@  DriveInfo *drive_init(QemuOpts *opts, void *opaque,
     int index;
     int cache;
     int aio = 0;
-    int ro = 0;
     int bdrv_flags;
     int on_read_error, on_write_error;
     const char *devaddr;
     DriveInfo *dinfo;
     int snapshot = 0;
+    int read_write, ro_fallback;
 
     *fatal_error = 1;
 
@@ -2137,7 +2138,7 @@  DriveInfo *drive_init(QemuOpts *opts, void *opaque,
     secs  = qemu_opt_get_number(opts, "secs", 0);
 
     snapshot = qemu_opt_get_bool(opts, "snapshot", 0);
-    ro = qemu_opt_get_bool(opts, "readonly", 0);
+    rw_buf = qemu_opt_get(opts, "read_write");
 
     file = qemu_opt_get(opts, "file");
     serial = qemu_opt_get(opts, "serial");
@@ -2420,6 +2421,29 @@  DriveInfo *drive_init(QemuOpts *opts, void *opaque,
         *fatal_error = 0;
         return NULL;
     }
+
+    read_write = 1;
+    ro_fallback = 1;
+    if (rw_buf) {
+        if (!strcmp(rw_buf, "off")) {
+            read_write = 0;
+            ro_fallback = 0;
+            if (type != IF_SCSI && type != IF_VIRTIO && type != IF_FLOPPY) {
+                fprintf(stderr, "qemu: read_write=off flag not supported for drive of this interface\n");
+                return NULL;
+            }
+        } else if (!strcmp(rw_buf, "on")) {
+            read_write = 1;
+            ro_fallback = 0;
+        } else if (!strcmp(rw_buf, "try")) { /* default, but keep it explicit */
+            read_write = 1;
+            ro_fallback = 1;
+        } else {
+            fprintf(stderr, "qemu: '%s' invalid read_write option (valid values: [on|off|try] )\n", buf);
+            return NULL;
+        }
+    }
+    
     bdrv_flags = 0;
     if (snapshot) {
         bdrv_flags |= BDRV_O_SNAPSHOT;
@@ -2436,16 +2460,16 @@  DriveInfo *drive_init(QemuOpts *opts, void *opaque,
         bdrv_flags &= ~BDRV_O_NATIVE_AIO;
     }
 
-    if (ro == 1) {
-        if (type == IF_IDE) {
-            fprintf(stderr, "qemu: readonly flag not supported for drive with ide interface\n");
-            return NULL;
-        }
-        (void)bdrv_set_read_only(dinfo->bdrv, 1);
+    if (read_write) {
+        bdrv_flags |= BDRV_O_RDWR;
+    }
+    if (ro_fallback) {
+        bdrv_flags |= BDRV_O_RO_FBCK;
     }
+  
 
     if (bdrv_open2(dinfo->bdrv, file, bdrv_flags, drv) < 0) {
-        fprintf(stderr, "qemu: could not open disk image %s: %s\n",
+        fprintf(stderr, "qemu: could not open disk image %s with requested permission: %s\n",
                         file, strerror(errno));
         return NULL;
     }