diff mbox series

raw-format: drop WRITE and RESIZE child perms when possible

Message ID 20210726122839.822900-1-stefanha@redhat.com
State New
Headers show
Series raw-format: drop WRITE and RESIZE child perms when possible | expand

Commit Message

Stefan Hajnoczi July 26, 2021, 12:28 p.m. UTC
The following command-line fails due to a permissions conflict:

  $ qemu-storage-daemon \
      --blockdev driver=nvme,node-name=nvme0,device=0000:08:00.0,namespace=1 \
      --blockdev driver=raw,node-name=l1-1,file=nvme0,offset=0,size=1073741824 \
      --blockdev driver=raw,node-name=l1-2,file=nvme0,offset=1073741824,size=1073741824 \
      --nbd-server addr.type=unix,addr.path=/tmp/nbd.sock,max-connections=2 \
      --export type=nbd,id=nbd-l1-1,node-name=l1-1,name=l1-1,writable=on \
      --export type=nbd,id=nbd-l1-2,node-name=l1-2,name=l1-2,writable=on

  qemu-storage-daemon: --export type=nbd,id=nbd-l1-1,node-name=l1-1,name=l1-1,writable=on: Permission conflict on node 'nvme0': permissions 'resize' are both required by node 'l1-1' (uses node 'nvme0' as 'file' child) and unshared by node 'l1-2' (uses node 'nvme0' as 'file' child).

The problem is that block/raw-format.c relies on bdrv_default_perms() to
set permissions on the nvme node. The default permissions add RESIZE in
anticipation of a format driver like qcow2 that needs to grow the image
file. This fails because RESIZE is unshared, so we cannot get the RESIZE
permission.

Max Reitz pointed out that block/crypto.c already handles this case by
implementing a custom ->bdrv_child_perm() function that adjusts the
result of bdrv_default_perms().

This patch takes the same approach in block/raw-format.c so that RESIZE
is only required if it's actually necessary (e.g. the parent is qcow2).

Cc: Max Reitz <mreitz@redhat.com>
Cc: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
---
This is not a bug fix, so I didn't mark it for QEMU 6.1. It's new
behavior that hasn't been supported before. I want to split an NVMe
drive using the raw format's offset=/size= feature.
---
 block/raw-format.c | 21 ++++++++++++++++++++-
 1 file changed, 20 insertions(+), 1 deletion(-)

Comments

Vladimir Sementsov-Ogievskiy July 26, 2021, 2:41 p.m. UTC | #1
26.07.2021 15:28, Stefan Hajnoczi wrote:
> The following command-line fails due to a permissions conflict:
> 
>    $ qemu-storage-daemon \
>        --blockdev driver=nvme,node-name=nvme0,device=0000:08:00.0,namespace=1 \
>        --blockdev driver=raw,node-name=l1-1,file=nvme0,offset=0,size=1073741824 \
>        --blockdev driver=raw,node-name=l1-2,file=nvme0,offset=1073741824,size=1073741824 \
>        --nbd-server addr.type=unix,addr.path=/tmp/nbd.sock,max-connections=2 \
>        --export type=nbd,id=nbd-l1-1,node-name=l1-1,name=l1-1,writable=on \
>        --export type=nbd,id=nbd-l1-2,node-name=l1-2,name=l1-2,writable=on
> 
>    qemu-storage-daemon: --export type=nbd,id=nbd-l1-1,node-name=l1-1,name=l1-1,writable=on: Permission conflict on node 'nvme0': permissions 'resize' are both required by node 'l1-1' (uses node 'nvme0' as 'file' child) and unshared by node 'l1-2' (uses node 'nvme0' as 'file' child).
> 
> The problem is that block/raw-format.c relies on bdrv_default_perms() to
> set permissions on the nvme node. The default permissions add RESIZE in
> anticipation of a format driver like qcow2 that needs to grow the image
> file. This fails because RESIZE is unshared, so we cannot get the RESIZE
> permission.
> 
> Max Reitz pointed out that block/crypto.c already handles this case by
> implementing a custom ->bdrv_child_perm() function that adjusts the
> result of bdrv_default_perms().
> 
> This patch takes the same approach in block/raw-format.c so that RESIZE
> is only required if it's actually necessary (e.g. the parent is qcow2).
> 
> Cc: Max Reitz <mreitz@redhat.com>
> Cc: Kevin Wolf <kwolf@redhat.com>
> Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
> ---
> This is not a bug fix, so I didn't mark it for QEMU 6.1. It's new
> behavior that hasn't been supported before. I want to split an NVMe
> drive using the raw format's offset=/size= feature.
> ---
>   block/raw-format.c | 21 ++++++++++++++++++++-
>   1 file changed, 20 insertions(+), 1 deletion(-)
> 
> diff --git a/block/raw-format.c b/block/raw-format.c
> index 7717578ed6..c26f493688 100644
> --- a/block/raw-format.c
> +++ b/block/raw-format.c
> @@ -580,6 +580,25 @@ static void raw_cancel_in_flight(BlockDriverState *bs)
>       bdrv_cancel_in_flight(bs->file->bs);
>   }
>   
> +static void raw_child_perm(BlockDriverState *bs, BdrvChild *c,
> +                           BdrvChildRole role,
> +                           BlockReopenQueue *reopen_queue,
> +                           uint64_t parent_perm, uint64_t parent_shared,
> +                           uint64_t *nperm, uint64_t *nshared)
> +{
> +    bdrv_default_perms(bs, c, role, reopen_queue, parent_perm,
> +                       parent_shared, nperm, nshared);
> +
> +    /*
> +     * bdrv_default_perms() may add WRITE and/or RESIZE (see comment in
> +     * bdrv_default_perms_for_storage() for an explanation) but we only need
> +     * them if they are in parent_perm. Drop WRITE and RESIZE whenever possible
> +     * to avoid permission conflicts.
> +     */
> +    *nperm &= ~(BLK_PERM_WRITE | BLK_PERM_RESIZE);
> +    *nperm |= parent_perm & (BLK_PERM_WRITE | BLK_PERM_RESIZE);
> +}
> +
>   BlockDriver bdrv_raw = {
>       .format_name          = "raw",
>       .instance_size        = sizeof(BDRVRawState),
> @@ -588,7 +607,7 @@ BlockDriver bdrv_raw = {
>       .bdrv_reopen_commit   = &raw_reopen_commit,
>       .bdrv_reopen_abort    = &raw_reopen_abort,
>       .bdrv_open            = &raw_open,
> -    .bdrv_child_perm      = bdrv_default_perms,
> +    .bdrv_child_perm      = raw_child_perm,
>       .bdrv_co_create_opts  = &raw_co_create_opts,
>       .bdrv_co_preadv       = &raw_co_preadv,
>       .bdrv_co_pwritev      = &raw_co_pwritev,
> 

I think it's OK:

Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>


Still, did you consider an alternative of making bdrv_filter_default_perm() function public and just do ".bdrv_child_perm = bdrv_filter_default_perm," here?

raw_format is not considered to be filter, but for it's permissions I think it works exactly like filter.
Kevin Wolf July 26, 2021, 3:42 p.m. UTC | #2
Am 26.07.2021 um 16:41 hat Vladimir Sementsov-Ogievskiy geschrieben:
> 26.07.2021 15:28, Stefan Hajnoczi wrote:
> > The following command-line fails due to a permissions conflict:
> > 
> >    $ qemu-storage-daemon \
> >        --blockdev driver=nvme,node-name=nvme0,device=0000:08:00.0,namespace=1 \
> >        --blockdev driver=raw,node-name=l1-1,file=nvme0,offset=0,size=1073741824 \
> >        --blockdev driver=raw,node-name=l1-2,file=nvme0,offset=1073741824,size=1073741824 \
> >        --nbd-server addr.type=unix,addr.path=/tmp/nbd.sock,max-connections=2 \
> >        --export type=nbd,id=nbd-l1-1,node-name=l1-1,name=l1-1,writable=on \
> >        --export type=nbd,id=nbd-l1-2,node-name=l1-2,name=l1-2,writable=on
> > 
> >    qemu-storage-daemon: --export type=nbd,id=nbd-l1-1,node-name=l1-1,name=l1-1,writable=on: Permission conflict on node 'nvme0': permissions 'resize' are both required by node 'l1-1' (uses node 'nvme0' as 'file' child) and unshared by node 'l1-2' (uses node 'nvme0' as 'file' child).
> > 
> > The problem is that block/raw-format.c relies on bdrv_default_perms() to
> > set permissions on the nvme node. The default permissions add RESIZE in
> > anticipation of a format driver like qcow2 that needs to grow the image
> > file. This fails because RESIZE is unshared, so we cannot get the RESIZE
> > permission.
> > 
> > Max Reitz pointed out that block/crypto.c already handles this case by
> > implementing a custom ->bdrv_child_perm() function that adjusts the
> > result of bdrv_default_perms().
> > 
> > This patch takes the same approach in block/raw-format.c so that RESIZE
> > is only required if it's actually necessary (e.g. the parent is qcow2).
> > 
> > Cc: Max Reitz <mreitz@redhat.com>
> > Cc: Kevin Wolf <kwolf@redhat.com>
> > Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
> > ---
> > This is not a bug fix, so I didn't mark it for QEMU 6.1. It's new
> > behavior that hasn't been supported before. I want to split an NVMe
> > drive using the raw format's offset=/size= feature.
> > ---
> >   block/raw-format.c | 21 ++++++++++++++++++++-
> >   1 file changed, 20 insertions(+), 1 deletion(-)
> > 
> > diff --git a/block/raw-format.c b/block/raw-format.c
> > index 7717578ed6..c26f493688 100644
> > --- a/block/raw-format.c
> > +++ b/block/raw-format.c
> > @@ -580,6 +580,25 @@ static void raw_cancel_in_flight(BlockDriverState *bs)
> >       bdrv_cancel_in_flight(bs->file->bs);
> >   }
> > +static void raw_child_perm(BlockDriverState *bs, BdrvChild *c,
> > +                           BdrvChildRole role,
> > +                           BlockReopenQueue *reopen_queue,
> > +                           uint64_t parent_perm, uint64_t parent_shared,
> > +                           uint64_t *nperm, uint64_t *nshared)
> > +{
> > +    bdrv_default_perms(bs, c, role, reopen_queue, parent_perm,
> > +                       parent_shared, nperm, nshared);
> > +
> > +    /*
> > +     * bdrv_default_perms() may add WRITE and/or RESIZE (see comment in
> > +     * bdrv_default_perms_for_storage() for an explanation) but we only need
> > +     * them if they are in parent_perm. Drop WRITE and RESIZE whenever possible
> > +     * to avoid permission conflicts.
> > +     */
> > +    *nperm &= ~(BLK_PERM_WRITE | BLK_PERM_RESIZE);
> > +    *nperm |= parent_perm & (BLK_PERM_WRITE | BLK_PERM_RESIZE);
> > +}
> > +
> >   BlockDriver bdrv_raw = {
> >       .format_name          = "raw",
> >       .instance_size        = sizeof(BDRVRawState),
> > @@ -588,7 +607,7 @@ BlockDriver bdrv_raw = {
> >       .bdrv_reopen_commit   = &raw_reopen_commit,
> >       .bdrv_reopen_abort    = &raw_reopen_abort,
> >       .bdrv_open            = &raw_open,
> > -    .bdrv_child_perm      = bdrv_default_perms,
> > +    .bdrv_child_perm      = raw_child_perm,
> >       .bdrv_co_create_opts  = &raw_co_create_opts,
> >       .bdrv_co_preadv       = &raw_co_preadv,
> >       .bdrv_co_pwritev      = &raw_co_pwritev,
> > 
> 
> I think it's OK:
> 
> Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
> 
> 
> Still, did you consider an alternative of making
> bdrv_filter_default_perm() function public and just do
> ".bdrv_child_perm = bdrv_filter_default_perm," here?
> 
> raw_format is not considered to be filter, but for it's permissions I
> think it works exactly like filter.

I had the same thought, but then commit 69dca43d6b6 explicitly made the
opposite change. I seem to remember that Max never liked raw being
treated like a filter much.

Kevin
Stefan Hajnoczi July 26, 2021, 3:59 p.m. UTC | #3
On Mon, Jul 26, 2021 at 05:42:47PM +0200, Kevin Wolf wrote:
> Am 26.07.2021 um 16:41 hat Vladimir Sementsov-Ogievskiy geschrieben:
> > 26.07.2021 15:28, Stefan Hajnoczi wrote:
> > > The following command-line fails due to a permissions conflict:
> > > 
> > >    $ qemu-storage-daemon \
> > >        --blockdev driver=nvme,node-name=nvme0,device=0000:08:00.0,namespace=1 \
> > >        --blockdev driver=raw,node-name=l1-1,file=nvme0,offset=0,size=1073741824 \
> > >        --blockdev driver=raw,node-name=l1-2,file=nvme0,offset=1073741824,size=1073741824 \
> > >        --nbd-server addr.type=unix,addr.path=/tmp/nbd.sock,max-connections=2 \
> > >        --export type=nbd,id=nbd-l1-1,node-name=l1-1,name=l1-1,writable=on \
> > >        --export type=nbd,id=nbd-l1-2,node-name=l1-2,name=l1-2,writable=on
> > > 
> > >    qemu-storage-daemon: --export type=nbd,id=nbd-l1-1,node-name=l1-1,name=l1-1,writable=on: Permission conflict on node 'nvme0': permissions 'resize' are both required by node 'l1-1' (uses node 'nvme0' as 'file' child) and unshared by node 'l1-2' (uses node 'nvme0' as 'file' child).
> > > 
> > > The problem is that block/raw-format.c relies on bdrv_default_perms() to
> > > set permissions on the nvme node. The default permissions add RESIZE in
> > > anticipation of a format driver like qcow2 that needs to grow the image
> > > file. This fails because RESIZE is unshared, so we cannot get the RESIZE
> > > permission.
> > > 
> > > Max Reitz pointed out that block/crypto.c already handles this case by
> > > implementing a custom ->bdrv_child_perm() function that adjusts the
> > > result of bdrv_default_perms().
> > > 
> > > This patch takes the same approach in block/raw-format.c so that RESIZE
> > > is only required if it's actually necessary (e.g. the parent is qcow2).
> > > 
> > > Cc: Max Reitz <mreitz@redhat.com>
> > > Cc: Kevin Wolf <kwolf@redhat.com>
> > > Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
> > > ---
> > > This is not a bug fix, so I didn't mark it for QEMU 6.1. It's new
> > > behavior that hasn't been supported before. I want to split an NVMe
> > > drive using the raw format's offset=/size= feature.
> > > ---
> > >   block/raw-format.c | 21 ++++++++++++++++++++-
> > >   1 file changed, 20 insertions(+), 1 deletion(-)
> > > 
> > > diff --git a/block/raw-format.c b/block/raw-format.c
> > > index 7717578ed6..c26f493688 100644
> > > --- a/block/raw-format.c
> > > +++ b/block/raw-format.c
> > > @@ -580,6 +580,25 @@ static void raw_cancel_in_flight(BlockDriverState *bs)
> > >       bdrv_cancel_in_flight(bs->file->bs);
> > >   }
> > > +static void raw_child_perm(BlockDriverState *bs, BdrvChild *c,
> > > +                           BdrvChildRole role,
> > > +                           BlockReopenQueue *reopen_queue,
> > > +                           uint64_t parent_perm, uint64_t parent_shared,
> > > +                           uint64_t *nperm, uint64_t *nshared)
> > > +{
> > > +    bdrv_default_perms(bs, c, role, reopen_queue, parent_perm,
> > > +                       parent_shared, nperm, nshared);
> > > +
> > > +    /*
> > > +     * bdrv_default_perms() may add WRITE and/or RESIZE (see comment in
> > > +     * bdrv_default_perms_for_storage() for an explanation) but we only need
> > > +     * them if they are in parent_perm. Drop WRITE and RESIZE whenever possible
> > > +     * to avoid permission conflicts.
> > > +     */
> > > +    *nperm &= ~(BLK_PERM_WRITE | BLK_PERM_RESIZE);
> > > +    *nperm |= parent_perm & (BLK_PERM_WRITE | BLK_PERM_RESIZE);
> > > +}
> > > +
> > >   BlockDriver bdrv_raw = {
> > >       .format_name          = "raw",
> > >       .instance_size        = sizeof(BDRVRawState),
> > > @@ -588,7 +607,7 @@ BlockDriver bdrv_raw = {
> > >       .bdrv_reopen_commit   = &raw_reopen_commit,
> > >       .bdrv_reopen_abort    = &raw_reopen_abort,
> > >       .bdrv_open            = &raw_open,
> > > -    .bdrv_child_perm      = bdrv_default_perms,
> > > +    .bdrv_child_perm      = raw_child_perm,
> > >       .bdrv_co_create_opts  = &raw_co_create_opts,
> > >       .bdrv_co_preadv       = &raw_co_preadv,
> > >       .bdrv_co_pwritev      = &raw_co_pwritev,
> > > 
> > 
> > I think it's OK:
> > 
> > Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
> > 
> > 
> > Still, did you consider an alternative of making
> > bdrv_filter_default_perm() function public and just do
> > ".bdrv_child_perm = bdrv_filter_default_perm," here?
> > 
> > raw_format is not considered to be filter, but for it's permissions I
> > think it works exactly like filter.
> 
> I had the same thought, but then commit 69dca43d6b6 explicitly made the
> opposite change. I seem to remember that Max never liked raw being
> treated like a filter much.

Additionally:

  static int raw_open(BlockDriverState *bs, QDict *options, int flags,
                      Error **errp)
  {
  ...
      /*
       * Without offset and a size limit, this driver behaves very much
       * like a filter.  With any such limit, it does not.
       */
      if (offset || has_size) {
          file_role = BDRV_CHILD_DATA | BDRV_CHILD_PRIMARY;
      } else {
          file_role = BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY;
      }

Whether the raw child node acts as FILTERED or DATA depends on whether
offset=/size= were given.

Stefan
Hanna Czenczek Aug. 19, 2021, 1:37 p.m. UTC | #4
On 26.07.21 14:28, Stefan Hajnoczi wrote:
> The following command-line fails due to a permissions conflict:
>
>    $ qemu-storage-daemon \
>        --blockdev driver=nvme,node-name=nvme0,device=0000:08:00.0,namespace=1 \
>        --blockdev driver=raw,node-name=l1-1,file=nvme0,offset=0,size=1073741824 \
>        --blockdev driver=raw,node-name=l1-2,file=nvme0,offset=1073741824,size=1073741824 \
>        --nbd-server addr.type=unix,addr.path=/tmp/nbd.sock,max-connections=2 \
>        --export type=nbd,id=nbd-l1-1,node-name=l1-1,name=l1-1,writable=on \
>        --export type=nbd,id=nbd-l1-2,node-name=l1-2,name=l1-2,writable=on
>
>    qemu-storage-daemon: --export type=nbd,id=nbd-l1-1,node-name=l1-1,name=l1-1,writable=on: Permission conflict on node 'nvme0': permissions 'resize' are both required by node 'l1-1' (uses node 'nvme0' as 'file' child) and unshared by node 'l1-2' (uses node 'nvme0' as 'file' child).
>
> The problem is that block/raw-format.c relies on bdrv_default_perms() to
> set permissions on the nvme node. The default permissions add RESIZE in
> anticipation of a format driver like qcow2 that needs to grow the image
> file. This fails because RESIZE is unshared, so we cannot get the RESIZE
> permission.
>
> Max Reitz pointed out that block/crypto.c already handles this case by
> implementing a custom ->bdrv_child_perm() function that adjusts the
> result of bdrv_default_perms().
>
> This patch takes the same approach in block/raw-format.c so that RESIZE
> is only required if it's actually necessary (e.g. the parent is qcow2).
>
> Cc: Max Reitz <mreitz@redhat.com>
> Cc: Kevin Wolf <kwolf@redhat.com>
> Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
> ---
> This is not a bug fix, so I didn't mark it for QEMU 6.1. It's new
> behavior that hasn't been supported before. I want to split an NVMe
> drive using the raw format's offset=/size= feature.
> ---
>   block/raw-format.c | 21 ++++++++++++++++++++-
>   1 file changed, 20 insertions(+), 1 deletion(-)

Thanks, applied to my block-next branch:

https://github.com/XanClic/qemu/commits/block-next

Hanna
diff mbox series

Patch

diff --git a/block/raw-format.c b/block/raw-format.c
index 7717578ed6..c26f493688 100644
--- a/block/raw-format.c
+++ b/block/raw-format.c
@@ -580,6 +580,25 @@  static void raw_cancel_in_flight(BlockDriverState *bs)
     bdrv_cancel_in_flight(bs->file->bs);
 }
 
+static void raw_child_perm(BlockDriverState *bs, BdrvChild *c,
+                           BdrvChildRole role,
+                           BlockReopenQueue *reopen_queue,
+                           uint64_t parent_perm, uint64_t parent_shared,
+                           uint64_t *nperm, uint64_t *nshared)
+{
+    bdrv_default_perms(bs, c, role, reopen_queue, parent_perm,
+                       parent_shared, nperm, nshared);
+
+    /*
+     * bdrv_default_perms() may add WRITE and/or RESIZE (see comment in
+     * bdrv_default_perms_for_storage() for an explanation) but we only need
+     * them if they are in parent_perm. Drop WRITE and RESIZE whenever possible
+     * to avoid permission conflicts.
+     */
+    *nperm &= ~(BLK_PERM_WRITE | BLK_PERM_RESIZE);
+    *nperm |= parent_perm & (BLK_PERM_WRITE | BLK_PERM_RESIZE);
+}
+
 BlockDriver bdrv_raw = {
     .format_name          = "raw",
     .instance_size        = sizeof(BDRVRawState),
@@ -588,7 +607,7 @@  BlockDriver bdrv_raw = {
     .bdrv_reopen_commit   = &raw_reopen_commit,
     .bdrv_reopen_abort    = &raw_reopen_abort,
     .bdrv_open            = &raw_open,
-    .bdrv_child_perm      = bdrv_default_perms,
+    .bdrv_child_perm      = raw_child_perm,
     .bdrv_co_create_opts  = &raw_co_create_opts,
     .bdrv_co_preadv       = &raw_co_preadv,
     .bdrv_co_pwritev      = &raw_co_pwritev,