diff mbox

[02/23] block: New BlockBackend

Message ID 1410336832-22160-3-git-send-email-armbru@redhat.com
State New
Headers show

Commit Message

Markus Armbruster Sept. 10, 2014, 8:13 a.m. UTC
A block device consists of a frontend device model and a backend.

A block backend has a tree of block drivers doing the actual work.
The tree is managed by the block layer.

We currently use a single abstraction BlockDriverState both for tree
nodes and the backend as a whole.  Drawbacks:

* Its API includes both stuff that makes sense only at the block
  backend level (root of the tree) and stuff that's only for use
  within the block layer.  This makes the API bigger and more complex
  than necessary.  Moreover, it's not obvious which interfaces are
  meant for device models, and which really aren't.

* Since device models keep a reference to their backend, the backend
  object can't just be destroyed.  But for media change, we need to
  replace the tree.  Our solution is to make the BlockDriverState
  generic, with actual driver state in a separate object, pointed to
  by member opaque.  That lets us replace the tree by deinitializing
  and reinitializing its root.  This special need of the root makes
  the data structure awkward everywhere in the tree.

The general plan is to separate the APIs into "block backend", for use
by device models, monitor and whatever other code dealing with block
backends, and "block driver", for use by the block layer and whatever
other code (if any) dealing with trees and tree nodes.

Code dealing with block backends, device models in particular, should
become completely oblivious of BlockDriverState.  This should let us
clean up both APIs, and the tree data structures.

This commit is a first step.  It creates a minimal "block backend"
API: type BlockBackend and functions to create, destroy and find them.
BlockBackend objects are created and destroyed, but not yet used for
anything; that'll come shortly.

BlockBackend is reference-counted.  Its reference count never exceeds
one so far, but that's going to change.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
---
 block/Makefile.objs            |   2 +-
 block/block-backend.c          | 110 +++++++++++++++++++++++++++++++++++++++++
 blockdev.c                     |  10 +++-
 hw/block/xen_disk.c            |  11 +++++
 include/qemu/typedefs.h        |   1 +
 include/sysemu/block-backend.h |  26 ++++++++++
 qemu-img.c                     |  46 +++++++++++++++++
 qemu-io.c                      |   8 +++
 qemu-nbd.c                     |   3 +-
 9 files changed, 214 insertions(+), 3 deletions(-)
 create mode 100644 block/block-backend.c
 create mode 100644 include/sysemu/block-backend.h

Comments

Kevin Wolf Sept. 10, 2014, 9:56 a.m. UTC | #1
Am 10.09.2014 um 10:13 hat Markus Armbruster geschrieben:
> A block device consists of a frontend device model and a backend.
> 
> A block backend has a tree of block drivers doing the actual work.
> The tree is managed by the block layer.
> 
> We currently use a single abstraction BlockDriverState both for tree
> nodes and the backend as a whole.  Drawbacks:
> 
> * Its API includes both stuff that makes sense only at the block
>   backend level (root of the tree) and stuff that's only for use
>   within the block layer.  This makes the API bigger and more complex
>   than necessary.  Moreover, it's not obvious which interfaces are
>   meant for device models, and which really aren't.
> 
> * Since device models keep a reference to their backend, the backend
>   object can't just be destroyed.  But for media change, we need to
>   replace the tree.  Our solution is to make the BlockDriverState
>   generic, with actual driver state in a separate object, pointed to
>   by member opaque.  That lets us replace the tree by deinitializing
>   and reinitializing its root.  This special need of the root makes
>   the data structure awkward everywhere in the tree.
> 
> The general plan is to separate the APIs into "block backend", for use
> by device models, monitor and whatever other code dealing with block
> backends, and "block driver", for use by the block layer and whatever
> other code (if any) dealing with trees and tree nodes.
> 
> Code dealing with block backends, device models in particular, should
> become completely oblivious of BlockDriverState.  This should let us
> clean up both APIs, and the tree data structures.
> 
> This commit is a first step.  It creates a minimal "block backend"
> API: type BlockBackend and functions to create, destroy and find them.
> BlockBackend objects are created and destroyed, but not yet used for
> anything; that'll come shortly.
> 
> BlockBackend is reference-counted.  Its reference count never exceeds
> one so far, but that's going to change.
> 
> Signed-off-by: Markus Armbruster <armbru@redhat.com>
> ---
>  block/Makefile.objs            |   2 +-
>  block/block-backend.c          | 110 +++++++++++++++++++++++++++++++++++++++++
>  blockdev.c                     |  10 +++-
>  hw/block/xen_disk.c            |  11 +++++
>  include/qemu/typedefs.h        |   1 +
>  include/sysemu/block-backend.h |  26 ++++++++++
>  qemu-img.c                     |  46 +++++++++++++++++
>  qemu-io.c                      |   8 +++
>  qemu-nbd.c                     |   3 +-
>  9 files changed, 214 insertions(+), 3 deletions(-)
>  create mode 100644 block/block-backend.c
>  create mode 100644 include/sysemu/block-backend.h
> 
> diff --git a/block/Makefile.objs b/block/Makefile.objs
> index f45f939..a70140b 100644
> --- a/block/Makefile.objs
> +++ b/block/Makefile.objs
> @@ -5,7 +5,7 @@ block-obj-y += qed-check.o
>  block-obj-$(CONFIG_VHDX) += vhdx.o vhdx-endian.o vhdx-log.o
>  block-obj-$(CONFIG_QUORUM) += quorum.o
>  block-obj-y += parallels.o blkdebug.o blkverify.o
> -block-obj-y += snapshot.o qapi.o
> +block-obj-y += block-backend.o snapshot.o qapi.o
>  block-obj-$(CONFIG_WIN32) += raw-win32.o win32-aio.o
>  block-obj-$(CONFIG_POSIX) += raw-posix.o
>  block-obj-$(CONFIG_LINUX_AIO) += linux-aio.o
> diff --git a/block/block-backend.c b/block/block-backend.c
> new file mode 100644
> index 0000000..833f7d9
> --- /dev/null
> +++ b/block/block-backend.c
> @@ -0,0 +1,110 @@
> +/*
> + * QEMU Block backends
> + *
> + * Copyright (C) 2014 Red Hat, Inc.
> + *
> + * Authors:
> + *  Markus Armbruster <armbru@redhat.com>,
> + *
> + * This work is licensed under the terms of the GNU GPL, version 2 or
> + * later.  See the COPYING file in the top-level directory.
> + */

I think we still have the long-term plan of exposing a block layer
library that can be consumed by libvirt. As the usage in qemu-io/img/nbd
shows, this will probably have to use BlockBackends, so this code is part
of the block layer core.

Considering this, using the LGPL would be more practical. Can you please
make this change for v2? (Personally, I would have used the MIT license
that the rest of the block layer uses, which also make copying code
around cleaner license-wise, but I know you dislike it.)

> +#include "sysemu/block-backend.h"
> +#include "block/block_int.h"
> +
> +struct BlockBackend {
> +    char *name;
> +    int refcnt;
> +    QTAILQ_ENTRY(BlockBackend) link; /* for blk_backends */
> +};
> +
> +static QTAILQ_HEAD(, BlockBackend) blk_backends =
> +    QTAILQ_HEAD_INITIALIZER(blk_backends);
> +
> +/**
> + * blk_new:
> + * @name: name, must not be %NULL or empty
> + * @errp: return location for an error to be set on failure, or %NULL
> + *
> + * Create a new BlockBackend, with a reference count of one.  Fail if
> + * @name already exists.
> + *
> + * Returns: the BlockBackend on success, %NULL on failure
> + */
> +BlockBackend *blk_new(const char *name, Error **errp)
> +{
> +    BlockBackend *blk = g_new0(BlockBackend, 1);
> +
> +    assert(name && name[0]);
> +    if (blk_by_name(name)) {
> +        error_setg(errp, "Device with id '%s' already exists", name);
> +        return NULL;

blk is leaked here.

> +    }
> +    blk->name = g_strdup(name);
> +    blk->refcnt = 1;
> +    QTAILQ_INSERT_TAIL(&blk_backends, blk, link);
> +    return blk;
> +}
> +
> +static void blk_delete(BlockBackend *blk)
> +{
> +    assert(!blk->refcnt);
> +    QTAILQ_REMOVE(&blk_backends, blk, link);
> +    g_free(blk->name);
> +    g_free(blk);
> +}
> +
> +/**
> + * blk_ref:
> + *
> + * Increment @blk's reference count.
> + */
> +void blk_ref(BlockBackend *blk)
> +{
> +    blk->refcnt++;
> +}
> +
> +/**
> + * blk_unref:
> + *
> + * Decrement @blk's reference count.  If this drops it to zero,
> + * destroy @blk.
> + */
> +void blk_unref(BlockBackend *blk)
> +{
> +    if (blk) {
> +        g_assert(blk->refcnt > 0);

You're mixing assert() and g_assert() in this patch. Any reason for
this? If not, I think plain assert() is clearly in the majority in the
overall codebase.

> +        if (!--blk->refcnt) {
> +            blk_delete(blk);
> +        }
> +    }
> +}
> +
> +const char *blk_name(BlockBackend *blk)
> +{
> +    return blk->name;
> +}
> +
> +BlockBackend *blk_by_name(const char *name)
> +{
> +    BlockBackend *blk;
> +
> +    QTAILQ_FOREACH(blk, &blk_backends, link) {
> +        if (!strcmp(name, blk->name)) {
> +            return blk;
> +        }
> +    }
> +    return NULL;
> +}

No comment for these two non-static functions?

> +/**
> + * blk_next:
> + *
> + * Returns: the first BlockBackend if @blk is null, else @blk's next
> + * sibling, which is %NULL for the last BlockBackend
> + */
> +BlockBackend *blk_next(BlockBackend *blk)
> +{
> +    return blk ? QTAILQ_NEXT(blk, link) : QTAILQ_FIRST(&blk_backends);
> +}
> diff --git a/blockdev.c b/blockdev.c
> index 9fbd888..86596bc 100644
> --- a/blockdev.c
> +++ b/blockdev.c

Okay, so here the hard part starts: As long as the BB is completely
unused, it's very hard to review at which places one must be created and
deleted.

What was your approach to systematically find all of them?

> @@ -30,6 +30,7 @@
>   * THE SOFTWARE.
>   */
>  
> +#include "sysemu/block-backend.h"
>  #include "sysemu/blockdev.h"
>  #include "hw/block/block.h"
>  #include "block/blockjob.h"
> @@ -221,6 +222,7 @@ void drive_del(DriveInfo *dinfo)
>      }
>  
>      bdrv_unref(dinfo->bdrv);
> +    blk_unref(blk_by_name(dinfo->id));
>      g_free(dinfo->id);
>      QTAILQ_REMOVE(&drives, dinfo, next);
>      g_free(dinfo->serial);
> @@ -301,6 +303,7 @@ static DriveInfo *blockdev_init(const char *file, QDict *bs_opts,
>      int ro = 0;
>      int bdrv_flags = 0;
>      int on_read_error, on_write_error;
> +    BlockBackend *blk;
>      DriveInfo *dinfo;
>      ThrottleConfig cfg;
>      int snapshot = 0;
> @@ -456,6 +459,10 @@ static DriveInfo *blockdev_init(const char *file, QDict *bs_opts,
>      }
>  
>      /* init */
> +    blk = blk_new(qemu_opts_id(opts), errp);
> +    if (!blk) {
> +        goto early_err;
> +    }
>      dinfo = g_malloc0(sizeof(*dinfo));
>      dinfo->id = g_strdup(qemu_opts_id(opts));
>      dinfo->bdrv = bdrv_new_named(dinfo->id, &error);
> @@ -525,6 +532,7 @@ err:
>  bdrv_new_err:
>      g_free(dinfo->id);
>      g_free(dinfo);
> +    blk_unref(blk);
>  early_err:
>      qemu_opts_del(opts);
>  err_no_opts:
> @@ -1770,7 +1778,7 @@ int do_drive_del(Monitor *mon, const QDict *qdict, QObject **ret_data)
>       */
>      if (bdrv_get_attached_dev(bs)) {
>          bdrv_make_anon(bs);
> -
> +        blk_unref(blk_by_name(id));
>          /* Further I/O must not pause the guest */
>          bdrv_set_on_error(bs, BLOCKDEV_ON_ERROR_REPORT,
>                            BLOCKDEV_ON_ERROR_REPORT);

Won't we unref the BB a second time now when unplugging the device?
(drive_del() called in blockdev_auto_del())

> diff --git a/hw/block/xen_disk.c b/hw/block/xen_disk.c
> index 8bac7ff..730a021 100644
> --- a/hw/block/xen_disk.c
> +++ b/hw/block/xen_disk.c
> @@ -39,6 +39,7 @@
>  #include "hw/xen/xen_backend.h"
>  #include "xen_blkif.h"
>  #include "sysemu/blockdev.h"
> +#include "sysemu/block-backend.h"
>  
>  /* ------------------------------------------------------------- */
>  
> @@ -852,12 +853,18 @@ static int blk_connect(struct XenDevice *xendev)
>      blkdev->dinfo = drive_get(IF_XEN, 0, index);
>      if (!blkdev->dinfo) {
>          Error *local_err = NULL;
> +        BlockBackend *blk;
>          BlockDriver *drv;
>  
>          /* setup via xenbus -> create new block driver instance */
>          xen_be_printf(&blkdev->xendev, 2, "create new bdrv (xenbus setup)\n");
> +        blk = blk_new(blkdev->dev, NULL);
> +        if (!blk) {
> +            return -1;
> +        }
>          blkdev->bs = bdrv_new_named(blkdev->dev, NULL);
>          if (!blkdev->bs) {
> +            blk_unref(blk);
>              return -1;
>          }
>  
> @@ -868,6 +875,7 @@ static int blk_connect(struct XenDevice *xendev)
>                            error_get_pretty(local_err));
>              error_free(local_err);
>              bdrv_unref(blkdev->bs);
> +            blk_unref(blk);
>              blkdev->bs = NULL;
>              return -1;
>          }
> @@ -983,6 +991,9 @@ static void blk_disconnect(struct XenDevice *xendev)
>      if (blkdev->bs) {
>          bdrv_detach_dev(blkdev->bs, blkdev);
>          bdrv_unref(blkdev->bs);
> +        if (!blkdev->dinfo) {
> +            blk_unref(blk_by_name(blkdev->dev));
> +        }
>          blkdev->bs = NULL;
>      }
>      xen_be_unbind_evtchn(&blkdev->xendev);
> diff --git a/include/qemu/typedefs.h b/include/qemu/typedefs.h
> index 5f20b0e..198da2e 100644
> --- a/include/qemu/typedefs.h
> +++ b/include/qemu/typedefs.h
> @@ -35,6 +35,7 @@ typedef struct MachineClass MachineClass;
>  typedef struct NICInfo NICInfo;
>  typedef struct HCIInfo HCIInfo;
>  typedef struct AudioState AudioState;
> +typedef struct BlockBackend BlockBackend;
>  typedef struct BlockDriverState BlockDriverState;
>  typedef struct DriveInfo DriveInfo;
>  typedef struct DisplayState DisplayState;
> diff --git a/include/sysemu/block-backend.h b/include/sysemu/block-backend.h
> new file mode 100644
> index 0000000..3f8371c
> --- /dev/null
> +++ b/include/sysemu/block-backend.h
> @@ -0,0 +1,26 @@
> +/*
> + * QEMU Block backends
> + *
> + * Copyright (C) 2014 Red Hat, Inc.
> + *
> + * Authors:
> + *  Markus Armbruster <armbru@redhat.com>,
> + *
> + * This work is licensed under the terms of the GNU GPL, version 2 or
> + * later.  See the COPYING file in the top-level directory.
> + */
> +
> +#ifndef BLOCK_BACKEND_H
> +#define BLOCK_BACKEND_H
> +
> +#include "qemu/typedefs.h"
> +#include "qapi/error.h"
> +
> +BlockBackend *blk_new(const char *name, Error **errp);
> +void blk_ref(BlockBackend *blk);
> +void blk_unref(BlockBackend *blk);
> +const char *blk_name(BlockBackend *blk);
> +BlockBackend *blk_by_name(const char *name);
> +BlockBackend *blk_next(BlockBackend *blk);
> +
> +#endif
> diff --git a/qemu-img.c b/qemu-img.c
> index 4490a22..bad3f64 100644
> --- a/qemu-img.c
> +++ b/qemu-img.c

Won't comment on each hunk in qemu-img, but in many cases, on
bdrv_new_open() failure, blk is leaked.

> diff --git a/qemu-nbd.c b/qemu-nbd.c
> index a56ebfc..94b9b49 100644
> --- a/qemu-nbd.c
> +++ b/qemu-nbd.c
> @@ -17,7 +17,7 @@
>   */
>  
>  #include "qemu-common.h"
> -#include "block/block.h"
> +#include "sysemu/block-backend.h"
>  #include "block/block_int.h"
>  #include "block/nbd.h"
>  #include "qemu/main-loop.h"
> @@ -687,6 +687,7 @@ int main(int argc, char **argv)
>          drv = NULL;
>      }
>  
> +    blk_new("hda", &error_abort);
>      bs = bdrv_new_named("hda", &error_abort);
>  
>      srcpath = argv[optind];

Where is the matching blk_unref?

Kevin
Benoît Canet Sept. 10, 2014, 11:34 a.m. UTC | #2
The Wednesday 10 Sep 2014 à 10:13:31 (+0200), Markus Armbruster wrote :
> A block device consists of a frontend device model and a backend.
> 
> A block backend has a tree of block drivers doing the actual work.
> The tree is managed by the block layer.
> 
> We currently use a single abstraction BlockDriverState both for tree
> nodes and the backend as a whole.  Drawbacks:
> 
> * Its API includes both stuff that makes sense only at the block
>   backend level (root of the tree) and stuff that's only for use
>   within the block layer.  This makes the API bigger and more complex
>   than necessary.  Moreover, it's not obvious which interfaces are
>   meant for device models, and which really aren't.
> 
> * Since device models keep a reference to their backend, the backend
>   object can't just be destroyed.  But for media change, we need to
>   replace the tree.  Our solution is to make the BlockDriverState
>   generic, with actual driver state in a separate object, pointed to
>   by member opaque.  That lets us replace the tree by deinitializing
>   and reinitializing its root.  This special need of the root makes
>   the data structure awkward everywhere in the tree.
> 
> The general plan is to separate the APIs into "block backend", for use
> by device models, monitor and whatever other code dealing with block
> backends, and "block driver", for use by the block layer and whatever
> other code (if any) dealing with trees and tree nodes.
> 
> Code dealing with block backends, device models in particular, should
> become completely oblivious of BlockDriverState.  This should let us
> clean up both APIs, and the tree data structures.
> 
> This commit is a first step.  It creates a minimal "block backend"
> API: type BlockBackend and functions to create, destroy and find them.
> BlockBackend objects are created and destroyed, but not yet used for
> anything; that'll come shortly.
> 
> BlockBackend is reference-counted.  Its reference count never exceeds
> one so far, but that's going to change.
> 
> Signed-off-by: Markus Armbruster <armbru@redhat.com>
> ---
>  block/Makefile.objs            |   2 +-
>  block/block-backend.c          | 110 +++++++++++++++++++++++++++++++++++++++++
>  blockdev.c                     |  10 +++-
>  hw/block/xen_disk.c            |  11 +++++
>  include/qemu/typedefs.h        |   1 +
>  include/sysemu/block-backend.h |  26 ++++++++++
>  qemu-img.c                     |  46 +++++++++++++++++
>  qemu-io.c                      |   8 +++
>  qemu-nbd.c                     |   3 +-
>  9 files changed, 214 insertions(+), 3 deletions(-)
>  create mode 100644 block/block-backend.c
>  create mode 100644 include/sysemu/block-backend.h
> 
> diff --git a/block/Makefile.objs b/block/Makefile.objs
> index f45f939..a70140b 100644
> --- a/block/Makefile.objs
> +++ b/block/Makefile.objs
> @@ -5,7 +5,7 @@ block-obj-y += qed-check.o
>  block-obj-$(CONFIG_VHDX) += vhdx.o vhdx-endian.o vhdx-log.o
>  block-obj-$(CONFIG_QUORUM) += quorum.o
>  block-obj-y += parallels.o blkdebug.o blkverify.o
> -block-obj-y += snapshot.o qapi.o
> +block-obj-y += block-backend.o snapshot.o qapi.o
>  block-obj-$(CONFIG_WIN32) += raw-win32.o win32-aio.o
>  block-obj-$(CONFIG_POSIX) += raw-posix.o
>  block-obj-$(CONFIG_LINUX_AIO) += linux-aio.o
> diff --git a/block/block-backend.c b/block/block-backend.c
> new file mode 100644
> index 0000000..833f7d9
> --- /dev/null
> +++ b/block/block-backend.c
> @@ -0,0 +1,110 @@
> +/*
> + * QEMU Block backends
> + *
> + * Copyright (C) 2014 Red Hat, Inc.
> + *
> + * Authors:
> + *  Markus Armbruster <armbru@redhat.com>,
> + *
> + * This work is licensed under the terms of the GNU GPL, version 2 or
> + * later.  See the COPYING file in the top-level directory.
> + */
> +
> +#include "sysemu/block-backend.h"
> +#include "block/block_int.h"
> +
> +struct BlockBackend {
> +    char *name;
> +    int refcnt;
> +    QTAILQ_ENTRY(BlockBackend) link; /* for blk_backends */
> +};
> +
> +static QTAILQ_HEAD(, BlockBackend) blk_backends =
> +    QTAILQ_HEAD_INITIALIZER(blk_backends);
> +
> +/**
> + * blk_new:
> + * @name: name, must not be %NULL or empty
> + * @errp: return location for an error to be set on failure, or %NULL
> + *
> + * Create a new BlockBackend, with a reference count of one.  Fail if
> + * @name already exists.
> + *
> + * Returns: the BlockBackend on success, %NULL on failure
> + */
> +BlockBackend *blk_new(const char *name, Error **errp)

I am responding for the easy part first.

So here the blockbackend is identified by a name

> +{
> +    BlockBackend *blk = g_new0(BlockBackend, 1);
> +
> +    assert(name && name[0]);
> +    if (blk_by_name(name)) {

> +        error_setg(errp, "Device with id '%s' already exists", name);

But here is it an id or a name ?
Do we need to make a choice everywhere in the code between id and name ?

> +        return NULL;
> +    }
> +    blk->name = g_strdup(name);
> +    blk->refcnt = 1;
> +    QTAILQ_INSERT_TAIL(&blk_backends, blk, link);
> +    return blk;
> +}
> +
> +static void blk_delete(BlockBackend *blk)
> +{
> +    assert(!blk->refcnt);
> +    QTAILQ_REMOVE(&blk_backends, blk, link);
> +    g_free(blk->name);
> +    g_free(blk);
> +}
> +
> +/**
> + * blk_ref:
> + *
> + * Increment @blk's reference count.
> + */
> +void blk_ref(BlockBackend *blk)
> +{

if blk_unref you take care of doing
+    if (blk) {
to make sur the user does not pass a NULL pointer.
Transforming blk into a NULL pointer is not a side effect
of blk_unref so this test is designed to prevent a user
brain damage.

If the user can be brain damaged to pass a NULL to blk_unref he
could be equally stupid passing a NULL to blk_ref.
Why not adding the same test here ?


> +    blk->refcnt++;
> +}
> +
> +/**
> + * blk_unref:
> + *
> + * Decrement @blk's reference count.  If this drops it to zero,
> + * destroy @blk.
> + */
> +void blk_unref(BlockBackend *blk)
> +{
> +    if (blk) {
> +        g_assert(blk->refcnt > 0);
> +        if (!--blk->refcnt) {
> +            blk_delete(blk);
> +        }
> +    }
> +}
> +
> +const char *blk_name(BlockBackend *blk)
> +{
> +    return blk->name;
> +}
> +
> +BlockBackend *blk_by_name(const char *name)
> +{
> +    BlockBackend *blk;
> +
> +    QTAILQ_FOREACH(blk, &blk_backends, link) {
> +        if (!strcmp(name, blk->name)) {
> +            return blk;
> +        }
> +    }
> +    return NULL;
> +}
> +
> +/**
> + * blk_next:
> + *
> + * Returns: the first BlockBackend if @blk is null, else @blk's next
> + * sibling, which is %NULL for the last BlockBackend
> + */
> +BlockBackend *blk_next(BlockBackend *blk)
> +{
> +    return blk ? QTAILQ_NEXT(blk, link) : QTAILQ_FIRST(&blk_backends);
> +}
> diff --git a/blockdev.c b/blockdev.c
> index 9fbd888..86596bc 100644
> --- a/blockdev.c
> +++ b/blockdev.c
> @@ -30,6 +30,7 @@
>   * THE SOFTWARE.
>   */
>  
> +#include "sysemu/block-backend.h"
>  #include "sysemu/blockdev.h"
>  #include "hw/block/block.h"
>  #include "block/blockjob.h"
> @@ -221,6 +222,7 @@ void drive_del(DriveInfo *dinfo)
>      }
>  
>      bdrv_unref(dinfo->bdrv);
> +    blk_unref(blk_by_name(dinfo->id));

Really the mix of name and id is odd.

>      g_free(dinfo->id);
>      QTAILQ_REMOVE(&drives, dinfo, next);
>      g_free(dinfo->serial);
> @@ -301,6 +303,7 @@ static DriveInfo *blockdev_init(const char *file, QDict *bs_opts,
>      int ro = 0;
>      int bdrv_flags = 0;
>      int on_read_error, on_write_error;
> +    BlockBackend *blk;
>      DriveInfo *dinfo;
>      ThrottleConfig cfg;
>      int snapshot = 0;
> @@ -456,6 +459,10 @@ static DriveInfo *blockdev_init(const char *file, QDict *bs_opts,
>      }
>  
>      /* init */
> +    blk = blk_new(qemu_opts_id(opts), errp);
> +    if (!blk) {
> +        goto early_err;
> +    }
>      dinfo = g_malloc0(sizeof(*dinfo));
>      dinfo->id = g_strdup(qemu_opts_id(opts));
>      dinfo->bdrv = bdrv_new_named(dinfo->id, &error);
> @@ -525,6 +532,7 @@ err:
>  bdrv_new_err:
>      g_free(dinfo->id);
>      g_free(dinfo);
> +    blk_unref(blk);
>  early_err:
>      qemu_opts_del(opts);
>  err_no_opts:
> @@ -1770,7 +1778,7 @@ int do_drive_del(Monitor *mon, const QDict *qdict, QObject **ret_data)
>       */
>      if (bdrv_get_attached_dev(bs)) {
>          bdrv_make_anon(bs);
> -
> +        blk_unref(blk_by_name(id));
>          /* Further I/O must not pause the guest */
>          bdrv_set_on_error(bs, BLOCKDEV_ON_ERROR_REPORT,
>                            BLOCKDEV_ON_ERROR_REPORT);
> diff --git a/hw/block/xen_disk.c b/hw/block/xen_disk.c
> index 8bac7ff..730a021 100644
> --- a/hw/block/xen_disk.c
> +++ b/hw/block/xen_disk.c
> @@ -39,6 +39,7 @@
>  #include "hw/xen/xen_backend.h"
>  #include "xen_blkif.h"
>  #include "sysemu/blockdev.h"
> +#include "sysemu/block-backend.h"
>  
>  /* ------------------------------------------------------------- */
>  
> @@ -852,12 +853,18 @@ static int blk_connect(struct XenDevice *xendev)
>      blkdev->dinfo = drive_get(IF_XEN, 0, index);
>      if (!blkdev->dinfo) {
>          Error *local_err = NULL;
> +        BlockBackend *blk;
>          BlockDriver *drv;
>  
>          /* setup via xenbus -> create new block driver instance */
>          xen_be_printf(&blkdev->xendev, 2, "create new bdrv (xenbus setup)\n");
> +        blk = blk_new(blkdev->dev, NULL);
> +        if (!blk) {
> +            return -1;
> +        }
>          blkdev->bs = bdrv_new_named(blkdev->dev, NULL);
>          if (!blkdev->bs) {
> +            blk_unref(blk);
>              return -1;
>          }
>  
> @@ -868,6 +875,7 @@ static int blk_connect(struct XenDevice *xendev)
>                            error_get_pretty(local_err));
>              error_free(local_err);
>              bdrv_unref(blkdev->bs);
> +            blk_unref(blk);
>              blkdev->bs = NULL;
>              return -1;
>          }
> @@ -983,6 +991,9 @@ static void blk_disconnect(struct XenDevice *xendev)
>      if (blkdev->bs) {
>          bdrv_detach_dev(blkdev->bs, blkdev);
>          bdrv_unref(blkdev->bs);
> +        if (!blkdev->dinfo) {
> +            blk_unref(blk_by_name(blkdev->dev));
> +        }
>          blkdev->bs = NULL;
>      }
>      xen_be_unbind_evtchn(&blkdev->xendev);
> diff --git a/include/qemu/typedefs.h b/include/qemu/typedefs.h
> index 5f20b0e..198da2e 100644
> --- a/include/qemu/typedefs.h
> +++ b/include/qemu/typedefs.h
> @@ -35,6 +35,7 @@ typedef struct MachineClass MachineClass;
>  typedef struct NICInfo NICInfo;
>  typedef struct HCIInfo HCIInfo;
>  typedef struct AudioState AudioState;
> +typedef struct BlockBackend BlockBackend;
>  typedef struct BlockDriverState BlockDriverState;
>  typedef struct DriveInfo DriveInfo;
>  typedef struct DisplayState DisplayState;
> diff --git a/include/sysemu/block-backend.h b/include/sysemu/block-backend.h
> new file mode 100644
> index 0000000..3f8371c
> --- /dev/null
> +++ b/include/sysemu/block-backend.h
> @@ -0,0 +1,26 @@
> +/*
> + * QEMU Block backends
> + *
> + * Copyright (C) 2014 Red Hat, Inc.
> + *
> + * Authors:
> + *  Markus Armbruster <armbru@redhat.com>,
> + *
> + * This work is licensed under the terms of the GNU GPL, version 2 or
> + * later.  See the COPYING file in the top-level directory.
> + */
> +
> +#ifndef BLOCK_BACKEND_H
> +#define BLOCK_BACKEND_H
> +
> +#include "qemu/typedefs.h"
> +#include "qapi/error.h"
> +
> +BlockBackend *blk_new(const char *name, Error **errp);
> +void blk_ref(BlockBackend *blk);
> +void blk_unref(BlockBackend *blk);
> +const char *blk_name(BlockBackend *blk);
> +BlockBackend *blk_by_name(const char *name);
> +BlockBackend *blk_next(BlockBackend *blk);
> +
> +#endif
> diff --git a/qemu-img.c b/qemu-img.c
> index 4490a22..bad3f64 100644
> --- a/qemu-img.c
> +++ b/qemu-img.c
> @@ -29,6 +29,7 @@
>  #include "qemu/error-report.h"
>  #include "qemu/osdep.h"
>  #include "sysemu/sysemu.h"
> +#include "sysemu/block-backend.h"
>  #include "block/block_int.h"
>  #include "block/qapi.h"
>  #include <getopt.h>
> @@ -575,6 +576,7 @@ static int img_check(int argc, char **argv)
>      int c, ret;
>      OutputFormat output_format = OFORMAT_HUMAN;
>      const char *filename, *fmt, *output, *cache;
> +    BlockBackend *blk;
>      BlockDriverState *bs;
>      int fix = 0;
>      int flags = BDRV_O_FLAGS | BDRV_O_CHECK;
> @@ -649,6 +651,7 @@ static int img_check(int argc, char **argv)
>          return 1;
>      }
>  
> +    blk = blk_new("image", &error_abort);
>      bs = bdrv_new_open("image", filename, fmt, flags, true, quiet);
>      if (!bs) {
>          return 1;
> @@ -710,6 +713,7 @@ static int img_check(int argc, char **argv)
>  fail:
>      qapi_free_ImageCheck(check);
>      bdrv_unref(bs);
> +    blk_unref(blk);
>  
>      return ret;
>  }
> @@ -718,6 +722,7 @@ static int img_commit(int argc, char **argv)
>  {
>      int c, ret, flags;
>      const char *filename, *fmt, *cache;
> +    BlockBackend *blk;
>      BlockDriverState *bs;
>      bool quiet = false;
>  
> @@ -756,6 +761,7 @@ static int img_commit(int argc, char **argv)
>          return 1;
>      }
>  
> +    blk = blk_new("image", &error_abort);
>      bs = bdrv_new_open("image", filename, fmt, flags, true, quiet);
>      if (!bs) {
>          return 1;
> @@ -780,6 +786,7 @@ static int img_commit(int argc, char **argv)
>      }
>  
>      bdrv_unref(bs);
> +    blk_unref(blk);
>      if (ret) {
>          return 1;
>      }
> @@ -942,6 +949,7 @@ static int check_empty_sectors(BlockDriverState *bs, int64_t sect_num,
>  static int img_compare(int argc, char **argv)
>  {
>      const char *fmt1 = NULL, *fmt2 = NULL, *cache, *filename1, *filename2;
> +    BlockBackend *blk1, *blk2;
>      BlockDriverState *bs1, *bs2;
>      int64_t total_sectors1, total_sectors2;
>      uint8_t *buf1 = NULL, *buf2 = NULL;
> @@ -1011,6 +1019,7 @@ static int img_compare(int argc, char **argv)
>          goto out3;
>      }
>  
> +    blk1 = blk_new("image 1", &error_abort);
>      bs1 = bdrv_new_open("image 1", filename1, fmt1, flags, true, quiet);
>      if (!bs1) {
>          error_report("Can't open file %s", filename1);
> @@ -1018,6 +1027,7 @@ static int img_compare(int argc, char **argv)
>          goto out3;
>      }
>  
> +    blk2 = blk_new("image 2", &error_abort);
>      bs2 = bdrv_new_open("image 2", filename2, fmt2, flags, true, quiet);
>      if (!bs2) {
>          error_report("Can't open file %s", filename2);
> @@ -1184,10 +1194,12 @@ static int img_compare(int argc, char **argv)
>  
>  out:
>      bdrv_unref(bs2);
> +    blk_unref(blk2);
>      qemu_vfree(buf1);
>      qemu_vfree(buf2);
>  out2:
>      bdrv_unref(bs1);
> +    blk_unref(blk1);
>  out3:
>      qemu_progress_end();
>      return ret;
> @@ -1200,6 +1212,7 @@ static int img_convert(int argc, char **argv)
>      int progress = 0, flags, src_flags;
>      const char *fmt, *out_fmt, *cache, *src_cache, *out_baseimg, *out_filename;
>      BlockDriver *drv, *proto_drv;
> +    BlockBackend **blk = NULL, *out_blk = NULL;
>      BlockDriverState **bs = NULL, *out_bs = NULL;
>      int64_t total_sectors, nb_sectors, sector_num, bs_offset;
>      int64_t *bs_sectors = NULL;
> @@ -1354,6 +1367,7 @@ static int img_convert(int argc, char **argv)
>  
>      qemu_progress_print(0, 100);
>  
> +    blk = g_new0(BlockBackend *, bs_n);
>      bs = g_new0(BlockDriverState *, bs_n);
>      bs_sectors = g_new(int64_t, bs_n);
>  
> @@ -1361,6 +1375,7 @@ static int img_convert(int argc, char **argv)
>      for (bs_i = 0; bs_i < bs_n; bs_i++) {
>          char *id = bs_n > 1 ? g_strdup_printf("source %d", bs_i)
>                              : g_strdup("source");
> +        blk[bs_i] = blk_new(id, &error_abort);
>          bs[bs_i] = bdrv_new_open(id, argv[optind + bs_i], fmt, src_flags,
>                                   true, quiet);
>          g_free(id);
> @@ -1486,6 +1501,7 @@ static int img_convert(int argc, char **argv)
>          goto out;
>      }
>  
> +    out_blk = blk_new("target", &error_abort);
>      out_bs = bdrv_new_open("target", out_filename, out_fmt, flags, true, quiet);
>      if (!out_bs) {
>          ret = -1;
> @@ -1742,6 +1758,7 @@ out:
>      if (out_bs) {
>          bdrv_unref(out_bs);
>      }
> +    blk_unref(out_blk);
>      if (bs) {
>          for (bs_i = 0; bs_i < bs_n; bs_i++) {
>              if (bs[bs_i]) {
> @@ -1750,6 +1767,12 @@ out:
>          }
>          g_free(bs);
>      }
> +    if (blk) {
> +        for (bs_i = 0; bs_i < bs_n; bs_i++) {
> +            blk_unref(blk[bs_i]);
> +        }
> +        g_free(blk);
> +    }
>      g_free(bs_sectors);
>  fail_getopt:
>      g_free(options);
> @@ -1858,6 +1881,7 @@ static ImageInfoList *collect_image_info_list(const char *filename,
>      filenames = g_hash_table_new_full(g_str_hash, str_equal_func, NULL, NULL);
>  
>      while (filename) {
> +        BlockBackend *blk;
>          BlockDriverState *bs;
>          ImageInfo *info;
>          ImageInfoList *elem;
> @@ -1869,6 +1893,7 @@ static ImageInfoList *collect_image_info_list(const char *filename,
>          }
>          g_hash_table_insert(filenames, (gpointer)filename, NULL);
>  
> +        blk = blk_new("image", &error_abort);
>          bs = bdrv_new_open("image", filename, fmt,
>                             BDRV_O_FLAGS | BDRV_O_NO_BACKING, false, false);
>          if (!bs) {
> @@ -1880,6 +1905,7 @@ static ImageInfoList *collect_image_info_list(const char *filename,
>              error_report("%s", error_get_pretty(err));
>              error_free(err);
>              bdrv_unref(bs);
> +            blk_unref(blk);
>              goto err;
>          }
>  
> @@ -1889,6 +1915,7 @@ static ImageInfoList *collect_image_info_list(const char *filename,
>          last = &elem->next;
>  
>          bdrv_unref(bs);
> +        blk_unref(blk);
>  
>          filename = fmt = NULL;
>          if (chain) {
> @@ -2082,6 +2109,7 @@ static int img_map(int argc, char **argv)
>  {
>      int c;
>      OutputFormat output_format = OFORMAT_HUMAN;
> +    BlockBackend *blk;
>      BlockDriverState *bs;
>      const char *filename, *fmt, *output;
>      int64_t length;
> @@ -2130,6 +2158,7 @@ static int img_map(int argc, char **argv)
>          return 1;
>      }
>  
> +    blk = blk_new("image", &error_abort);
>      bs = bdrv_new_open("image", filename, fmt, BDRV_O_FLAGS, true, false);
>      if (!bs) {
>          return 1;
> @@ -2175,6 +2204,7 @@ static int img_map(int argc, char **argv)
>  
>  out:
>      bdrv_unref(bs);
> +    blk_unref(blk);
>      return ret < 0;
>  }
>  
> @@ -2185,6 +2215,7 @@ out:
>  
>  static int img_snapshot(int argc, char **argv)
>  {
> +    BlockBackend *blk;
>      BlockDriverState *bs;
>      QEMUSnapshotInfo sn;
>      char *filename, *snapshot_name = NULL;
> @@ -2250,6 +2281,7 @@ static int img_snapshot(int argc, char **argv)
>      filename = argv[optind++];
>  
>      /* Open the image */
> +    blk = blk_new("image", &error_abort);
>      bs = bdrv_new_open("image", filename, NULL, bdrv_oflags, true, quiet);
>      if (!bs) {
>          return 1;
> @@ -2297,6 +2329,7 @@ static int img_snapshot(int argc, char **argv)
>  
>      /* Cleanup */
>      bdrv_unref(bs);
> +    blk_unref(blk);
>      if (ret) {
>          return 1;
>      }
> @@ -2305,6 +2338,7 @@ static int img_snapshot(int argc, char **argv)
>  
>  static int img_rebase(int argc, char **argv)
>  {
> +    BlockBackend *blk = NULL, *blk_old_backing = NULL, *blk_new_backing = NULL;
>      BlockDriverState *bs = NULL, *bs_old_backing = NULL, *bs_new_backing = NULL;
>      BlockDriver *old_backing_drv, *new_backing_drv;
>      char *filename;
> @@ -2393,6 +2427,7 @@ static int img_rebase(int argc, char **argv)
>       * Ignore the old backing file for unsafe rebase in case we want to correct
>       * the reference to a renamed or moved backing file.
>       */
> +    blk = blk_new("image", &error_abort);
>      bs = bdrv_new_open("image", filename, fmt, flags, true, quiet);
>      if (!bs) {
>          ret = -1;
> @@ -2425,6 +2460,7 @@ static int img_rebase(int argc, char **argv)
>      if (!unsafe) {
>          char backing_name[1024];
>  
> +        blk_old_backing = blk_new("old_backing", &error_abort);
>          bs_old_backing = bdrv_new_named("old_backing", &error_abort);
>          bdrv_get_backing_filename(bs, backing_name, sizeof(backing_name));
>          ret = bdrv_open(&bs_old_backing, backing_name, NULL, NULL, src_flags,
> @@ -2436,6 +2472,7 @@ static int img_rebase(int argc, char **argv)
>              goto out;
>          }
>          if (out_baseimg[0]) {
> +            blk_new_backing = blk_new("new_backing", &error_abort);
>              bs_new_backing = bdrv_new_named("new_backing", &error_abort);
>              ret = bdrv_open(&bs_new_backing, out_baseimg, NULL, NULL, src_flags,
>                              new_backing_drv, &local_err);
> @@ -2614,12 +2651,15 @@ out:
>          if (bs_old_backing != NULL) {
>              bdrv_unref(bs_old_backing);
>          }
> +        blk_unref(blk_old_backing);
>          if (bs_new_backing != NULL) {
>              bdrv_unref(bs_new_backing);
>          }
> +        blk_unref(blk_new_backing);
>      }
>  
>      bdrv_unref(bs);
> +    blk_unref(blk);
>      if (ret) {
>          return 1;
>      }
> @@ -2632,6 +2672,7 @@ static int img_resize(int argc, char **argv)
>      const char *filename, *fmt, *size;
>      int64_t n, total_size;
>      bool quiet = false;
> +    BlockBackend *blk = NULL;
>      BlockDriverState *bs = NULL;
>      QemuOpts *param;
>      static QemuOptsList resize_options = {
> @@ -2708,6 +2749,7 @@ static int img_resize(int argc, char **argv)
>      n = qemu_opt_get_size(param, BLOCK_OPT_SIZE, 0);
>      qemu_opts_del(param);
>  
> +    blk = blk_new("image", &error_abort);
>      bs = bdrv_new_open("image", filename, fmt, BDRV_O_FLAGS | BDRV_O_RDWR,
>                         true, quiet);
>      if (!bs) {
> @@ -2745,6 +2787,7 @@ out:
>      if (bs) {
>          bdrv_unref(bs);
>      }
> +    blk_unref(blk);
>      if (ret) {
>          return 1;
>      }
> @@ -2760,6 +2803,7 @@ static int img_amend(int argc, char **argv)
>      const char *fmt = NULL, *filename, *cache;
>      int flags;
>      bool quiet = false;
> +    BlockBackend *blk = NULL;
>      BlockDriverState *bs = NULL;
>  
>      cache = BDRV_DEFAULT_CACHE;
> @@ -2823,6 +2867,7 @@ static int img_amend(int argc, char **argv)
>          goto out;
>      }
>  
> +    blk = blk_new("image", &error_abort);
>      bs = bdrv_new_open("image", filename, fmt, flags, true, quiet);
>      if (!bs) {
>          error_report("Could not open image '%s'", filename);
> @@ -2856,6 +2901,7 @@ out:
>      if (bs) {
>          bdrv_unref(bs);
>      }
> +    blk_unref(blk);
>      qemu_opts_del(opts);
>      qemu_opts_free(create_opts);
>      g_free(options);
> diff --git a/qemu-io.c b/qemu-io.c
> index 44c2e1c..45e5494 100644
> --- a/qemu-io.c
> +++ b/qemu-io.c
> @@ -19,6 +19,7 @@
>  #include "qemu/option.h"
>  #include "qemu/config-file.h"
>  #include "qemu/readline.h"
> +#include "sysemu/block-backend.h"
>  #include "block/block_int.h"
>  #include "trace/control.h"
>  
> @@ -26,6 +27,7 @@
>  
>  static char *progname;
>  
> +static BlockBackend *qemuio_blk;
>  static BlockDriverState *qemuio_bs;
>  
>  /* qemu-io commands passed using -c */
> @@ -37,7 +39,9 @@ static ReadLineState *readline_state;
>  static int close_f(BlockDriverState *bs, int argc, char **argv)
>  {
>      bdrv_unref(bs);
> +    blk_unref(qemuio_blk);
>      qemuio_bs = NULL;
> +    qemuio_blk = NULL;
>      return 0;
>  }
>  
> @@ -58,6 +62,7 @@ static int openfile(char *name, int flags, int growable, QDict *opts)
>          return 1;
>      }
>  
> +    qemuio_blk = blk_new("hda", &error_abort);
>      qemuio_bs = bdrv_new_named("hda", &error_abort);
>  
>      if (growable) {
> @@ -70,7 +75,9 @@ static int openfile(char *name, int flags, int growable, QDict *opts)
>                  error_get_pretty(local_err));
>          error_free(local_err);
>          bdrv_unref(qemuio_bs);
> +        blk_unref(qemuio_blk);
>          qemuio_bs = NULL;
> +        qemuio_blk = NULL;
>          return 1;
>      }
>  
> @@ -479,6 +486,7 @@ int main(int argc, char **argv)
>      if (qemuio_bs) {
>          bdrv_unref(qemuio_bs);
>      }
> +    blk_unref(qemuio_blk);
>      g_free(readline_state);
>      return 0;
>  }
> diff --git a/qemu-nbd.c b/qemu-nbd.c
> index a56ebfc..94b9b49 100644
> --- a/qemu-nbd.c
> +++ b/qemu-nbd.c
> @@ -17,7 +17,7 @@
>   */
>  
>  #include "qemu-common.h"
> -#include "block/block.h"
> +#include "sysemu/block-backend.h"
>  #include "block/block_int.h"
>  #include "block/nbd.h"
>  #include "qemu/main-loop.h"
> @@ -687,6 +687,7 @@ int main(int argc, char **argv)
>          drv = NULL;
>      }
>  
> +    blk_new("hda", &error_abort);
>      bs = bdrv_new_named("hda", &error_abort);
>  
>      srcpath = argv[optind];
> -- 
> 1.9.3
> 
>
Kevin Wolf Sept. 10, 2014, 11:44 a.m. UTC | #3
Am 10.09.2014 um 13:34 hat Benoît Canet geschrieben:
> The Wednesday 10 Sep 2014 à 10:13:31 (+0200), Markus Armbruster wrote :
> > A block device consists of a frontend device model and a backend.
> > 
> > A block backend has a tree of block drivers doing the actual work.
> > The tree is managed by the block layer.
> > 
> > We currently use a single abstraction BlockDriverState both for tree
> > nodes and the backend as a whole.  Drawbacks:
> > 
> > * Its API includes both stuff that makes sense only at the block
> >   backend level (root of the tree) and stuff that's only for use
> >   within the block layer.  This makes the API bigger and more complex
> >   than necessary.  Moreover, it's not obvious which interfaces are
> >   meant for device models, and which really aren't.
> > 
> > * Since device models keep a reference to their backend, the backend
> >   object can't just be destroyed.  But for media change, we need to
> >   replace the tree.  Our solution is to make the BlockDriverState
> >   generic, with actual driver state in a separate object, pointed to
> >   by member opaque.  That lets us replace the tree by deinitializing
> >   and reinitializing its root.  This special need of the root makes
> >   the data structure awkward everywhere in the tree.
> > 
> > The general plan is to separate the APIs into "block backend", for use
> > by device models, monitor and whatever other code dealing with block
> > backends, and "block driver", for use by the block layer and whatever
> > other code (if any) dealing with trees and tree nodes.
> > 
> > Code dealing with block backends, device models in particular, should
> > become completely oblivious of BlockDriverState.  This should let us
> > clean up both APIs, and the tree data structures.
> > 
> > This commit is a first step.  It creates a minimal "block backend"
> > API: type BlockBackend and functions to create, destroy and find them.
> > BlockBackend objects are created and destroyed, but not yet used for
> > anything; that'll come shortly.
> > 
> > BlockBackend is reference-counted.  Its reference count never exceeds
> > one so far, but that's going to change.
> > 
> > Signed-off-by: Markus Armbruster <armbru@redhat.com>

> > +/**
> > + * blk_ref:
> > + *
> > + * Increment @blk's reference count.
> > + */
> > +void blk_ref(BlockBackend *blk)
> > +{
> 
> if blk_unref you take care of doing
> +    if (blk) {
> to make sur the user does not pass a NULL pointer.
> Transforming blk into a NULL pointer is not a side effect
> of blk_unref so this test is designed to prevent a user
> brain damage.

Not really, I'd rather consider it a convenience feature, just like
you're allowed to call free(NULL) or bdrv_unref(NULL) without having a
check for != NULL everywhere. This will be handy especially in error
paths.

> If the user can be brain damaged to pass a NULL to blk_unref he
> could be equally stupid passing a NULL to blk_ref.
> Why not adding the same test here ?

Whereas in blk_ref() it really wouldn't make any sense.

Kevin
Benoît Canet Sept. 10, 2014, 11:51 a.m. UTC | #4
The Wednesday 10 Sep 2014 à 13:44:17 (+0200), Kevin Wolf wrote :
> Am 10.09.2014 um 13:34 hat Benoît Canet geschrieben:
> > The Wednesday 10 Sep 2014 à 10:13:31 (+0200), Markus Armbruster wrote :
> > > A block device consists of a frontend device model and a backend.
> > > 
> > > A block backend has a tree of block drivers doing the actual work.
> > > The tree is managed by the block layer.
> > > 
> > > We currently use a single abstraction BlockDriverState both for tree
> > > nodes and the backend as a whole.  Drawbacks:
> > > 
> > > * Its API includes both stuff that makes sense only at the block
> > >   backend level (root of the tree) and stuff that's only for use
> > >   within the block layer.  This makes the API bigger and more complex
> > >   than necessary.  Moreover, it's not obvious which interfaces are
> > >   meant for device models, and which really aren't.
> > > 
> > > * Since device models keep a reference to their backend, the backend
> > >   object can't just be destroyed.  But for media change, we need to
> > >   replace the tree.  Our solution is to make the BlockDriverState
> > >   generic, with actual driver state in a separate object, pointed to
> > >   by member opaque.  That lets us replace the tree by deinitializing
> > >   and reinitializing its root.  This special need of the root makes
> > >   the data structure awkward everywhere in the tree.
> > > 
> > > The general plan is to separate the APIs into "block backend", for use
> > > by device models, monitor and whatever other code dealing with block
> > > backends, and "block driver", for use by the block layer and whatever
> > > other code (if any) dealing with trees and tree nodes.
> > > 
> > > Code dealing with block backends, device models in particular, should
> > > become completely oblivious of BlockDriverState.  This should let us
> > > clean up both APIs, and the tree data structures.
> > > 
> > > This commit is a first step.  It creates a minimal "block backend"
> > > API: type BlockBackend and functions to create, destroy and find them.
> > > BlockBackend objects are created and destroyed, but not yet used for
> > > anything; that'll come shortly.
> > > 
> > > BlockBackend is reference-counted.  Its reference count never exceeds
> > > one so far, but that's going to change.
> > > 
> > > Signed-off-by: Markus Armbruster <armbru@redhat.com>
> 
> > > +/**
> > > + * blk_ref:
> > > + *
> > > + * Increment @blk's reference count.
> > > + */
> > > +void blk_ref(BlockBackend *blk)
> > > +{
> > 
> > if blk_unref you take care of doing
> > +    if (blk) {
> > to make sur the user does not pass a NULL pointer.
> > Transforming blk into a NULL pointer is not a side effect
> > of blk_unref so this test is designed to prevent a user
> > brain damage.
> 
> Not really, I'd rather consider it a convenience feature, just like
> you're allowed to call free(NULL) or bdrv_unref(NULL) without having a
> check for != NULL everywhere. This will be handy especially in error
> paths.

ok I see the spirit of it.

Benoit
> 
> > If the user can be brain damaged to pass a NULL to blk_unref he
> > could be equally stupid passing a NULL to blk_ref.
> > Why not adding the same test here ?
> 
> Whereas in blk_ref() it really wouldn't make any sense.
> 
> Kevin
>
Benoît Canet Sept. 10, 2014, 12:40 p.m. UTC | #5
The Wednesday 10 Sep 2014 à 10:13:31 (+0200), Markus Armbruster wrote :
> A block device consists of a frontend device model and a backend.
> 
> A block backend has a tree of block drivers doing the actual work.
> The tree is managed by the block layer.
> 
> We currently use a single abstraction BlockDriverState both for tree
> nodes and the backend as a whole.  Drawbacks:
> 
> * Its API includes both stuff that makes sense only at the block
>   backend level (root of the tree) and stuff that's only for use
>   within the block layer.  This makes the API bigger and more complex
>   than necessary.  Moreover, it's not obvious which interfaces are
>   meant for device models, and which really aren't.
> 
> * Since device models keep a reference to their backend, the backend
>   object can't just be destroyed.  But for media change, we need to
>   replace the tree.  Our solution is to make the BlockDriverState
>   generic, with actual driver state in a separate object, pointed to
>   by member opaque.  That lets us replace the tree by deinitializing
>   and reinitializing its root.  This special need of the root makes
>   the data structure awkward everywhere in the tree.
> 
> The general plan is to separate the APIs into "block backend", for use
> by device models, monitor and whatever other code dealing with block
> backends, and "block driver", for use by the block layer and whatever
> other code (if any) dealing with trees and tree nodes.
> 
> Code dealing with block backends, device models in particular, should
> become completely oblivious of BlockDriverState.  This should let us
> clean up both APIs, and the tree data structures.
> 
> This commit is a first step.  It creates a minimal "block backend"
> API: type BlockBackend and functions to create, destroy and find them.
> BlockBackend objects are created and destroyed, but not yet used for
> anything; that'll come shortly.
> 
> BlockBackend is reference-counted.  Its reference count never exceeds
> one so far, but that's going to change.
> 
> Signed-off-by: Markus Armbruster <armbru@redhat.com>
> ---
>  block/Makefile.objs            |   2 +-
>  block/block-backend.c          | 110 +++++++++++++++++++++++++++++++++++++++++
>  blockdev.c                     |  10 +++-
>  hw/block/xen_disk.c            |  11 +++++
>  include/qemu/typedefs.h        |   1 +
>  include/sysemu/block-backend.h |  26 ++++++++++
>  qemu-img.c                     |  46 +++++++++++++++++
>  qemu-io.c                      |   8 +++
>  qemu-nbd.c                     |   3 +-
>  9 files changed, 214 insertions(+), 3 deletions(-)
>  create mode 100644 block/block-backend.c
>  create mode 100644 include/sysemu/block-backend.h
> 
> diff --git a/block/Makefile.objs b/block/Makefile.objs
> index f45f939..a70140b 100644
> --- a/block/Makefile.objs
> +++ b/block/Makefile.objs
> @@ -5,7 +5,7 @@ block-obj-y += qed-check.o
>  block-obj-$(CONFIG_VHDX) += vhdx.o vhdx-endian.o vhdx-log.o
>  block-obj-$(CONFIG_QUORUM) += quorum.o
>  block-obj-y += parallels.o blkdebug.o blkverify.o
> -block-obj-y += snapshot.o qapi.o
> +block-obj-y += block-backend.o snapshot.o qapi.o
>  block-obj-$(CONFIG_WIN32) += raw-win32.o win32-aio.o
>  block-obj-$(CONFIG_POSIX) += raw-posix.o
>  block-obj-$(CONFIG_LINUX_AIO) += linux-aio.o
> diff --git a/block/block-backend.c b/block/block-backend.c
> new file mode 100644
> index 0000000..833f7d9
> --- /dev/null
> +++ b/block/block-backend.c
> @@ -0,0 +1,110 @@
> +/*
> + * QEMU Block backends
> + *
> + * Copyright (C) 2014 Red Hat, Inc.
> + *
> + * Authors:
> + *  Markus Armbruster <armbru@redhat.com>,
> + *
> + * This work is licensed under the terms of the GNU GPL, version 2 or
> + * later.  See the COPYING file in the top-level directory.
> + */
> +
> +#include "sysemu/block-backend.h"
> +#include "block/block_int.h"
> +
> +struct BlockBackend {
> +    char *name;
> +    int refcnt;
> +    QTAILQ_ENTRY(BlockBackend) link; /* for blk_backends */
> +};
> +
> +static QTAILQ_HEAD(, BlockBackend) blk_backends =
> +    QTAILQ_HEAD_INITIALIZER(blk_backends);
> +
> +/**
> + * blk_new:
> + * @name: name, must not be %NULL or empty
> + * @errp: return location for an error to be set on failure, or %NULL
> + *
> + * Create a new BlockBackend, with a reference count of one.  Fail if
> + * @name already exists.
> + *
> + * Returns: the BlockBackend on success, %NULL on failure
> + */
> +BlockBackend *blk_new(const char *name, Error **errp)
> +{
> +    BlockBackend *blk = g_new0(BlockBackend, 1);
> +
> +    assert(name && name[0]);
> +    if (blk_by_name(name)) {
> +        error_setg(errp, "Device with id '%s' already exists", name);
> +        return NULL;
> +    }
> +    blk->name = g_strdup(name);
> +    blk->refcnt = 1;
> +    QTAILQ_INSERT_TAIL(&blk_backends, blk, link);
> +    return blk;
> +}
> +
> +static void blk_delete(BlockBackend *blk)
> +{
> +    assert(!blk->refcnt);
> +    QTAILQ_REMOVE(&blk_backends, blk, link);
> +    g_free(blk->name);
> +    g_free(blk);
> +}
> +
> +/**
> + * blk_ref:
> + *
> + * Increment @blk's reference count.
> + */
> +void blk_ref(BlockBackend *blk)
> +{
> +    blk->refcnt++;
> +}
> +
> +/**
> + * blk_unref:
> + *
> + * Decrement @blk's reference count.  If this drops it to zero,
> + * destroy @blk.
> + */
> +void blk_unref(BlockBackend *blk)
> +{
> +    if (blk) {
> +        g_assert(blk->refcnt > 0);
> +        if (!--blk->refcnt) {
> +            blk_delete(blk);
> +        }
> +    }
> +}
> +
> +const char *blk_name(BlockBackend *blk)
> +{
> +    return blk->name;
> +}
> +
> +BlockBackend *blk_by_name(const char *name)
> +{
> +    BlockBackend *blk;
> +
> +    QTAILQ_FOREACH(blk, &blk_backends, link) {
> +        if (!strcmp(name, blk->name)) {
> +            return blk;
> +        }
> +    }
> +    return NULL;
> +}
> +
> +/**
> + * blk_next:
> + *
> + * Returns: the first BlockBackend if @blk is null, else @blk's next
> + * sibling, which is %NULL for the last BlockBackend
> + */
> +BlockBackend *blk_next(BlockBackend *blk)
> +{
> +    return blk ? QTAILQ_NEXT(blk, link) : QTAILQ_FIRST(&blk_backends);
> +}
> diff --git a/blockdev.c b/blockdev.c
> index 9fbd888..86596bc 100644
> --- a/blockdev.c
> +++ b/blockdev.c
> @@ -30,6 +30,7 @@
>   * THE SOFTWARE.
>   */
>  
> +#include "sysemu/block-backend.h"
>  #include "sysemu/blockdev.h"
>  #include "hw/block/block.h"
>  #include "block/blockjob.h"
> @@ -221,6 +222,7 @@ void drive_del(DriveInfo *dinfo)
>      }
>  
>      bdrv_unref(dinfo->bdrv);
> +    blk_unref(blk_by_name(dinfo->id));
>      g_free(dinfo->id);
>      QTAILQ_REMOVE(&drives, dinfo, next);
>      g_free(dinfo->serial);
> @@ -301,6 +303,7 @@ static DriveInfo *blockdev_init(const char *file, QDict *bs_opts,
>      int ro = 0;
>      int bdrv_flags = 0;
>      int on_read_error, on_write_error;
> +    BlockBackend *blk;
>      DriveInfo *dinfo;
>      ThrottleConfig cfg;
>      int snapshot = 0;
> @@ -456,6 +459,10 @@ static DriveInfo *blockdev_init(const char *file, QDict *bs_opts,
>      }
>  
>      /* init */
> +    blk = blk_new(qemu_opts_id(opts), errp);
> +    if (!blk) {
> +        goto early_err;
> +    }

Here you create a new block backend.
And you don't attach it to anything in any way yet.

So down in the code the following test will leak it:
    if (!file || !*file) {                                                      
        if (has_driver_specific_opts) {                                         
            file = NULL;                                                        
        } else {                                                                
            QDECREF(bs_opts);                                                   
            qemu_opts_del(opts);                                                
            return dinfo;                                                       
        }                                                                       
    } 

I am sure one of your next patchs fixes this but for this
precise commit this do look like a leak.

>      dinfo = g_malloc0(sizeof(*dinfo));
>      dinfo->id = g_strdup(qemu_opts_id(opts));
>      dinfo->bdrv = bdrv_new_named(dinfo->id, &error);
> @@ -525,6 +532,7 @@ err:
>  bdrv_new_err:
>      g_free(dinfo->id);
>      g_free(dinfo);
> +    blk_unref(blk);
>  early_err:
>      qemu_opts_del(opts);
>  err_no_opts:
> @@ -1770,7 +1778,7 @@ int do_drive_del(Monitor *mon, const QDict *qdict, QObject **ret_data)
>       */
>      if (bdrv_get_attached_dev(bs)) {
>          bdrv_make_anon(bs);
> -
> +        blk_unref(blk_by_name(id));
>          /* Further I/O must not pause the guest */
>          bdrv_set_on_error(bs, BLOCKDEV_ON_ERROR_REPORT,
>                            BLOCKDEV_ON_ERROR_REPORT);
> diff --git a/hw/block/xen_disk.c b/hw/block/xen_disk.c
> index 8bac7ff..730a021 100644
> --- a/hw/block/xen_disk.c
> +++ b/hw/block/xen_disk.c
> @@ -39,6 +39,7 @@
>  #include "hw/xen/xen_backend.h"
>  #include "xen_blkif.h"
>  #include "sysemu/blockdev.h"
> +#include "sysemu/block-backend.h"
>  
>  /* ------------------------------------------------------------- */
>  
> @@ -852,12 +853,18 @@ static int blk_connect(struct XenDevice *xendev)
>      blkdev->dinfo = drive_get(IF_XEN, 0, index);
>      if (!blkdev->dinfo) {
>          Error *local_err = NULL;
> +        BlockBackend *blk;
>          BlockDriver *drv;
>  
>          /* setup via xenbus -> create new block driver instance */
>          xen_be_printf(&blkdev->xendev, 2, "create new bdrv (xenbus setup)\n");
> +        blk = blk_new(blkdev->dev, NULL);
> +        if (!blk) {
> +            return -1;
> +        }
>          blkdev->bs = bdrv_new_named(blkdev->dev, NULL);
>          if (!blkdev->bs) {
> +            blk_unref(blk);
>              return -1;
>          }
>  
> @@ -868,6 +875,7 @@ static int blk_connect(struct XenDevice *xendev)
>                            error_get_pretty(local_err));
>              error_free(local_err);
>              bdrv_unref(blkdev->bs);
> +            blk_unref(blk);
>              blkdev->bs = NULL;
>              return -1;
>          }
> @@ -983,6 +991,9 @@ static void blk_disconnect(struct XenDevice *xendev)
>      if (blkdev->bs) {
>          bdrv_detach_dev(blkdev->bs, blkdev);
>          bdrv_unref(blkdev->bs);
> +        if (!blkdev->dinfo) {
> +            blk_unref(blk_by_name(blkdev->dev));
> +        }
>          blkdev->bs = NULL;
>      }
>      xen_be_unbind_evtchn(&blkdev->xendev);
> diff --git a/include/qemu/typedefs.h b/include/qemu/typedefs.h
> index 5f20b0e..198da2e 100644
> --- a/include/qemu/typedefs.h
> +++ b/include/qemu/typedefs.h
> @@ -35,6 +35,7 @@ typedef struct MachineClass MachineClass;
>  typedef struct NICInfo NICInfo;
>  typedef struct HCIInfo HCIInfo;
>  typedef struct AudioState AudioState;
> +typedef struct BlockBackend BlockBackend;
>  typedef struct BlockDriverState BlockDriverState;
>  typedef struct DriveInfo DriveInfo;
>  typedef struct DisplayState DisplayState;
> diff --git a/include/sysemu/block-backend.h b/include/sysemu/block-backend.h
> new file mode 100644
> index 0000000..3f8371c
> --- /dev/null
> +++ b/include/sysemu/block-backend.h
> @@ -0,0 +1,26 @@
> +/*
> + * QEMU Block backends
> + *
> + * Copyright (C) 2014 Red Hat, Inc.
> + *
> + * Authors:
> + *  Markus Armbruster <armbru@redhat.com>,
> + *
> + * This work is licensed under the terms of the GNU GPL, version 2 or
> + * later.  See the COPYING file in the top-level directory.
> + */
> +
> +#ifndef BLOCK_BACKEND_H
> +#define BLOCK_BACKEND_H
> +
> +#include "qemu/typedefs.h"
> +#include "qapi/error.h"
> +
> +BlockBackend *blk_new(const char *name, Error **errp);
> +void blk_ref(BlockBackend *blk);
> +void blk_unref(BlockBackend *blk);
> +const char *blk_name(BlockBackend *blk);
> +BlockBackend *blk_by_name(const char *name);
> +BlockBackend *blk_next(BlockBackend *blk);
> +
> +#endif
> diff --git a/qemu-img.c b/qemu-img.c
> index 4490a22..bad3f64 100644
> --- a/qemu-img.c
> +++ b/qemu-img.c
> @@ -29,6 +29,7 @@
>  #include "qemu/error-report.h"
>  #include "qemu/osdep.h"
>  #include "sysemu/sysemu.h"
> +#include "sysemu/block-backend.h"
>  #include "block/block_int.h"
>  #include "block/qapi.h"
>  #include <getopt.h>
> @@ -575,6 +576,7 @@ static int img_check(int argc, char **argv)
>      int c, ret;
>      OutputFormat output_format = OFORMAT_HUMAN;
>      const char *filename, *fmt, *output, *cache;
> +    BlockBackend *blk;
>      BlockDriverState *bs;
>      int fix = 0;
>      int flags = BDRV_O_FLAGS | BDRV_O_CHECK;
> @@ -649,6 +651,7 @@ static int img_check(int argc, char **argv)
>          return 1;
>      }
>  

> +    blk = blk_new("image", &error_abort);
Hmm we are so sure this will work that we don't do if (!block) ?

>      bs = bdrv_new_open("image", filename, fmt, flags, true, quiet);
>      if (!bs) {
>          return 1;
> @@ -710,6 +713,7 @@ static int img_check(int argc, char **argv)
>  fail:
>      qapi_free_ImageCheck(check);
>      bdrv_unref(bs);
> +    blk_unref(blk);
>  
>      return ret;
>  }
> @@ -718,6 +722,7 @@ static int img_commit(int argc, char **argv)
>  {
>      int c, ret, flags;
>      const char *filename, *fmt, *cache;
> +    BlockBackend *blk;
>      BlockDriverState *bs;
>      bool quiet = false;
>  
> @@ -756,6 +761,7 @@ static int img_commit(int argc, char **argv)
>          return 1;
>      }
>  
> +    blk = blk_new("image", &error_abort);
>      bs = bdrv_new_open("image", filename, fmt, flags, true, quiet);
>      if (!bs) {
>          return 1;
> @@ -780,6 +786,7 @@ static int img_commit(int argc, char **argv)
>      }
>  
>      bdrv_unref(bs);
> +    blk_unref(blk);
>      if (ret) {
>          return 1;
>      }
> @@ -942,6 +949,7 @@ static int check_empty_sectors(BlockDriverState *bs, int64_t sect_num,
>  static int img_compare(int argc, char **argv)
>  {
>      const char *fmt1 = NULL, *fmt2 = NULL, *cache, *filename1, *filename2;
> +    BlockBackend *blk1, *blk2;
>      BlockDriverState *bs1, *bs2;
>      int64_t total_sectors1, total_sectors2;
>      uint8_t *buf1 = NULL, *buf2 = NULL;
> @@ -1011,6 +1019,7 @@ static int img_compare(int argc, char **argv)
>          goto out3;
>      }
>  
> +    blk1 = blk_new("image 1", &error_abort);
>      bs1 = bdrv_new_open("image 1", filename1, fmt1, flags, true, quiet);
>      if (!bs1) {
>          error_report("Can't open file %s", filename1);
> @@ -1018,6 +1027,7 @@ static int img_compare(int argc, char **argv)
>          goto out3;
>      }
>  
> +    blk2 = blk_new("image 2", &error_abort);
>      bs2 = bdrv_new_open("image 2", filename2, fmt2, flags, true, quiet);
>      if (!bs2) {
>          error_report("Can't open file %s", filename2);
> @@ -1184,10 +1194,12 @@ static int img_compare(int argc, char **argv)
>  
>  out:
>      bdrv_unref(bs2);
> +    blk_unref(blk2);
>      qemu_vfree(buf1);
>      qemu_vfree(buf2);
>  out2:
>      bdrv_unref(bs1);
> +    blk_unref(blk1);
>  out3:
>      qemu_progress_end();
>      return ret;
> @@ -1200,6 +1212,7 @@ static int img_convert(int argc, char **argv)
>      int progress = 0, flags, src_flags;
>      const char *fmt, *out_fmt, *cache, *src_cache, *out_baseimg, *out_filename;
>      BlockDriver *drv, *proto_drv;
> +    BlockBackend **blk = NULL, *out_blk = NULL;
>      BlockDriverState **bs = NULL, *out_bs = NULL;
>      int64_t total_sectors, nb_sectors, sector_num, bs_offset;
>      int64_t *bs_sectors = NULL;
> @@ -1354,6 +1367,7 @@ static int img_convert(int argc, char **argv)
>  
>      qemu_progress_print(0, 100);
>  
> +    blk = g_new0(BlockBackend *, bs_n);
>      bs = g_new0(BlockDriverState *, bs_n);
>      bs_sectors = g_new(int64_t, bs_n);
>  
> @@ -1361,6 +1375,7 @@ static int img_convert(int argc, char **argv)
>      for (bs_i = 0; bs_i < bs_n; bs_i++) {
>          char *id = bs_n > 1 ? g_strdup_printf("source %d", bs_i)
>                              : g_strdup("source");
> +        blk[bs_i] = blk_new(id, &error_abort);
>          bs[bs_i] = bdrv_new_open(id, argv[optind + bs_i], fmt, src_flags,
>                                   true, quiet);
>          g_free(id);
> @@ -1486,6 +1501,7 @@ static int img_convert(int argc, char **argv)
>          goto out;
>      }
>  
> +    out_blk = blk_new("target", &error_abort);
>      out_bs = bdrv_new_open("target", out_filename, out_fmt, flags, true, quiet);
>      if (!out_bs) {
>          ret = -1;
> @@ -1742,6 +1758,7 @@ out:
>      if (out_bs) {
>          bdrv_unref(out_bs);
>      }
> +    blk_unref(out_blk);
>      if (bs) {
>          for (bs_i = 0; bs_i < bs_n; bs_i++) {
>              if (bs[bs_i]) {
> @@ -1750,6 +1767,12 @@ out:
>          }
>          g_free(bs);
>      }
> +    if (blk) {
> +        for (bs_i = 0; bs_i < bs_n; bs_i++) {
> +            blk_unref(blk[bs_i]);
> +        }
> +        g_free(blk);
> +    }
>      g_free(bs_sectors);
>  fail_getopt:
>      g_free(options);
> @@ -1858,6 +1881,7 @@ static ImageInfoList *collect_image_info_list(const char *filename,
>      filenames = g_hash_table_new_full(g_str_hash, str_equal_func, NULL, NULL);
>  
>      while (filename) {
> +        BlockBackend *blk;
>          BlockDriverState *bs;
>          ImageInfo *info;
>          ImageInfoList *elem;
> @@ -1869,6 +1893,7 @@ static ImageInfoList *collect_image_info_list(const char *filename,
>          }
>          g_hash_table_insert(filenames, (gpointer)filename, NULL);
>  
> +        blk = blk_new("image", &error_abort);
>          bs = bdrv_new_open("image", filename, fmt,
>                             BDRV_O_FLAGS | BDRV_O_NO_BACKING, false, false);
>          if (!bs) {

I think it misses an 
> +            blk_unref(blk);
in if(!bs) branch.

> @@ -1880,6 +1905,7 @@ static ImageInfoList *collect_image_info_list(const char *filename,
>              error_report("%s", error_get_pretty(err));
>              error_free(err);
>              bdrv_unref(bs);
> +            blk_unref(blk);
>              goto err;
>          }
>  
> @@ -1889,6 +1915,7 @@ static ImageInfoList *collect_image_info_list(const char *filename,
>          last = &elem->next;
>  
>          bdrv_unref(bs);
> +        blk_unref(blk);
>  
>          filename = fmt = NULL;
>          if (chain) {
> @@ -2082,6 +2109,7 @@ static int img_map(int argc, char **argv)
>  {
>      int c;
>      OutputFormat output_format = OFORMAT_HUMAN;
> +    BlockBackend *blk;
>      BlockDriverState *bs;
>      const char *filename, *fmt, *output;
>      int64_t length;
> @@ -2130,6 +2158,7 @@ static int img_map(int argc, char **argv)
>          return 1;
>      }
>  
> +    blk = blk_new("image", &error_abort);
>      bs = bdrv_new_open("image", filename, fmt, BDRV_O_FLAGS, true, false);
>      if (!bs) {
>          return 1;
> @@ -2175,6 +2204,7 @@ static int img_map(int argc, char **argv)
>  
>  out:
>      bdrv_unref(bs);
> +    blk_unref(blk);
>      return ret < 0;
>  }
>  
> @@ -2185,6 +2215,7 @@ out:
>  
>  static int img_snapshot(int argc, char **argv)
>  {
> +    BlockBackend *blk;
>      BlockDriverState *bs;
>      QEMUSnapshotInfo sn;
>      char *filename, *snapshot_name = NULL;
> @@ -2250,6 +2281,7 @@ static int img_snapshot(int argc, char **argv)
>      filename = argv[optind++];
>  
>      /* Open the image */
> +    blk = blk_new("image", &error_abort);
>      bs = bdrv_new_open("image", filename, NULL, bdrv_oflags, true, quiet);
>      if (!bs) {
>          return 1;
> @@ -2297,6 +2329,7 @@ static int img_snapshot(int argc, char **argv)
>  
>      /* Cleanup */
>      bdrv_unref(bs);
> +    blk_unref(blk);
>      if (ret) {
>          return 1;
>      }
> @@ -2305,6 +2338,7 @@ static int img_snapshot(int argc, char **argv)
>  
>  static int img_rebase(int argc, char **argv)
>  {
> +    BlockBackend *blk = NULL, *blk_old_backing = NULL, *blk_new_backing = NULL;
>      BlockDriverState *bs = NULL, *bs_old_backing = NULL, *bs_new_backing = NULL;
>      BlockDriver *old_backing_drv, *new_backing_drv;
>      char *filename;
> @@ -2393,6 +2427,7 @@ static int img_rebase(int argc, char **argv)
>       * Ignore the old backing file for unsafe rebase in case we want to correct
>       * the reference to a renamed or moved backing file.
>       */
> +    blk = blk_new("image", &error_abort);
>      bs = bdrv_new_open("image", filename, fmt, flags, true, quiet);
>      if (!bs) {
>          ret = -1;
> @@ -2425,6 +2460,7 @@ static int img_rebase(int argc, char **argv)
>      if (!unsafe) {
>          char backing_name[1024];
>  
> +        blk_old_backing = blk_new("old_backing", &error_abort);
>          bs_old_backing = bdrv_new_named("old_backing", &error_abort);
>          bdrv_get_backing_filename(bs, backing_name, sizeof(backing_name));
>          ret = bdrv_open(&bs_old_backing, backing_name, NULL, NULL, src_flags,
> @@ -2436,6 +2472,7 @@ static int img_rebase(int argc, char **argv)
>              goto out;
>          }
>          if (out_baseimg[0]) {
> +            blk_new_backing = blk_new("new_backing", &error_abort);
>              bs_new_backing = bdrv_new_named("new_backing", &error_abort);
>              ret = bdrv_open(&bs_new_backing, out_baseimg, NULL, NULL, src_flags,
>                              new_backing_drv, &local_err);
> @@ -2614,12 +2651,15 @@ out:
>          if (bs_old_backing != NULL) {
>              bdrv_unref(bs_old_backing);
>          }
> +        blk_unref(blk_old_backing);
>          if (bs_new_backing != NULL) {
>              bdrv_unref(bs_new_backing);
>          }
> +        blk_unref(blk_new_backing);
>      }
>  
>      bdrv_unref(bs);
> +    blk_unref(blk);
>      if (ret) {
>          return 1;
>      }
> @@ -2632,6 +2672,7 @@ static int img_resize(int argc, char **argv)
>      const char *filename, *fmt, *size;
>      int64_t n, total_size;
>      bool quiet = false;
> +    BlockBackend *blk = NULL;
>      BlockDriverState *bs = NULL;
>      QemuOpts *param;
>      static QemuOptsList resize_options = {
> @@ -2708,6 +2749,7 @@ static int img_resize(int argc, char **argv)
>      n = qemu_opt_get_size(param, BLOCK_OPT_SIZE, 0);
>      qemu_opts_del(param);
>  
> +    blk = blk_new("image", &error_abort);
>      bs = bdrv_new_open("image", filename, fmt, BDRV_O_FLAGS | BDRV_O_RDWR,
>                         true, quiet);
>      if (!bs) {
> @@ -2745,6 +2787,7 @@ out:
>      if (bs) {
>          bdrv_unref(bs);
>      }
> +    blk_unref(blk);
>      if (ret) {
>          return 1;
>      }
> @@ -2760,6 +2803,7 @@ static int img_amend(int argc, char **argv)
>      const char *fmt = NULL, *filename, *cache;
>      int flags;
>      bool quiet = false;
> +    BlockBackend *blk = NULL;
>      BlockDriverState *bs = NULL;
>  
>      cache = BDRV_DEFAULT_CACHE;
> @@ -2823,6 +2867,7 @@ static int img_amend(int argc, char **argv)
>          goto out;
>      }
>  
> +    blk = blk_new("image", &error_abort);
>      bs = bdrv_new_open("image", filename, fmt, flags, true, quiet);
>      if (!bs) {
>          error_report("Could not open image '%s'", filename);
> @@ -2856,6 +2901,7 @@ out:
>      if (bs) {
>          bdrv_unref(bs);
>      }
> +    blk_unref(blk);
>      qemu_opts_del(opts);
>      qemu_opts_free(create_opts);
>      g_free(options);
> diff --git a/qemu-io.c b/qemu-io.c
> index 44c2e1c..45e5494 100644
> --- a/qemu-io.c
> +++ b/qemu-io.c
> @@ -19,6 +19,7 @@
>  #include "qemu/option.h"
>  #include "qemu/config-file.h"
>  #include "qemu/readline.h"
> +#include "sysemu/block-backend.h"
>  #include "block/block_int.h"
>  #include "trace/control.h"
>  
> @@ -26,6 +27,7 @@
>  
>  static char *progname;
>  
> +static BlockBackend *qemuio_blk;
>  static BlockDriverState *qemuio_bs;
>  
>  /* qemu-io commands passed using -c */
> @@ -37,7 +39,9 @@ static ReadLineState *readline_state;
>  static int close_f(BlockDriverState *bs, int argc, char **argv)
>  {
>      bdrv_unref(bs);
> +    blk_unref(qemuio_blk);
>      qemuio_bs = NULL;
> +    qemuio_blk = NULL;
>      return 0;
>  }
>  
> @@ -58,6 +62,7 @@ static int openfile(char *name, int flags, int growable, QDict *opts)
>          return 1;
>      }
>  
> +    qemuio_blk = blk_new("hda", &error_abort);

>      qemuio_bs = bdrv_new_named("hda", &error_abort);
I see accepting that an allocation _will_ work is the qemu tools style.

>  
>      if (growable) {
> @@ -70,7 +75,9 @@ static int openfile(char *name, int flags, int growable, QDict *opts)
>                  error_get_pretty(local_err));
>          error_free(local_err);
>          bdrv_unref(qemuio_bs);
> +        blk_unref(qemuio_blk);
>          qemuio_bs = NULL;
> +        qemuio_blk = NULL;
>          return 1;
>      }
>  
> @@ -479,6 +486,7 @@ int main(int argc, char **argv)
>      if (qemuio_bs) {
>          bdrv_unref(qemuio_bs);
>      }
> +    blk_unref(qemuio_blk);
>      g_free(readline_state);
>      return 0;
>  }
> diff --git a/qemu-nbd.c b/qemu-nbd.c
> index a56ebfc..94b9b49 100644
> --- a/qemu-nbd.c
> +++ b/qemu-nbd.c
> @@ -17,7 +17,7 @@
>   */
>  
>  #include "qemu-common.h"
> -#include "block/block.h"
> +#include "sysemu/block-backend.h"
>  #include "block/block_int.h"
>  #include "block/nbd.h"
>  #include "qemu/main-loop.h"
> @@ -687,6 +687,7 @@ int main(int argc, char **argv)
>          drv = NULL;
>      }
>  
> +    blk_new("hda", &error_abort);
>      bs = bdrv_new_named("hda", &error_abort);
>  
>      srcpath = argv[optind];
> -- 
> 1.9.3
> 
>
Benoît Canet Sept. 10, 2014, 12:46 p.m. UTC | #6
The Wednesday 10 Sep 2014 à 14:40:42 (+0200), Benoît Canet wrote :
> The Wednesday 10 Sep 2014 à 10:13:31 (+0200), Markus Armbruster wrote :
> > A block device consists of a frontend device model and a backend.
> > 
> > A block backend has a tree of block drivers doing the actual work.
> > The tree is managed by the block layer.
> > 
> > We currently use a single abstraction BlockDriverState both for tree
> > nodes and the backend as a whole.  Drawbacks:
> > 
> > * Its API includes both stuff that makes sense only at the block
> >   backend level (root of the tree) and stuff that's only for use
> >   within the block layer.  This makes the API bigger and more complex
> >   than necessary.  Moreover, it's not obvious which interfaces are
> >   meant for device models, and which really aren't.
> > 
> > * Since device models keep a reference to their backend, the backend
> >   object can't just be destroyed.  But for media change, we need to
> >   replace the tree.  Our solution is to make the BlockDriverState
> >   generic, with actual driver state in a separate object, pointed to
> >   by member opaque.  That lets us replace the tree by deinitializing
> >   and reinitializing its root.  This special need of the root makes
> >   the data structure awkward everywhere in the tree.
> > 
> > The general plan is to separate the APIs into "block backend", for use
> > by device models, monitor and whatever other code dealing with block
> > backends, and "block driver", for use by the block layer and whatever
> > other code (if any) dealing with trees and tree nodes.
> > 
> > Code dealing with block backends, device models in particular, should
> > become completely oblivious of BlockDriverState.  This should let us
> > clean up both APIs, and the tree data structures.
> > 
> > This commit is a first step.  It creates a minimal "block backend"
> > API: type BlockBackend and functions to create, destroy and find them.
> > BlockBackend objects are created and destroyed, but not yet used for
> > anything; that'll come shortly.
> > 
> > BlockBackend is reference-counted.  Its reference count never exceeds
> > one so far, but that's going to change.
> > 
> > Signed-off-by: Markus Armbruster <armbru@redhat.com>
> > ---
> >  block/Makefile.objs            |   2 +-
> >  block/block-backend.c          | 110 +++++++++++++++++++++++++++++++++++++++++
> >  blockdev.c                     |  10 +++-
> >  hw/block/xen_disk.c            |  11 +++++
> >  include/qemu/typedefs.h        |   1 +
> >  include/sysemu/block-backend.h |  26 ++++++++++
> >  qemu-img.c                     |  46 +++++++++++++++++
> >  qemu-io.c                      |   8 +++
> >  qemu-nbd.c                     |   3 +-
> >  9 files changed, 214 insertions(+), 3 deletions(-)
> >  create mode 100644 block/block-backend.c
> >  create mode 100644 include/sysemu/block-backend.h
> > 
> > diff --git a/block/Makefile.objs b/block/Makefile.objs
> > index f45f939..a70140b 100644
> > --- a/block/Makefile.objs
> > +++ b/block/Makefile.objs
> > @@ -5,7 +5,7 @@ block-obj-y += qed-check.o
> >  block-obj-$(CONFIG_VHDX) += vhdx.o vhdx-endian.o vhdx-log.o
> >  block-obj-$(CONFIG_QUORUM) += quorum.o
> >  block-obj-y += parallels.o blkdebug.o blkverify.o
> > -block-obj-y += snapshot.o qapi.o
> > +block-obj-y += block-backend.o snapshot.o qapi.o
> >  block-obj-$(CONFIG_WIN32) += raw-win32.o win32-aio.o
> >  block-obj-$(CONFIG_POSIX) += raw-posix.o
> >  block-obj-$(CONFIG_LINUX_AIO) += linux-aio.o
> > diff --git a/block/block-backend.c b/block/block-backend.c
> > new file mode 100644
> > index 0000000..833f7d9
> > --- /dev/null
> > +++ b/block/block-backend.c
> > @@ -0,0 +1,110 @@
> > +/*
> > + * QEMU Block backends
> > + *
> > + * Copyright (C) 2014 Red Hat, Inc.
> > + *
> > + * Authors:
> > + *  Markus Armbruster <armbru@redhat.com>,
> > + *
> > + * This work is licensed under the terms of the GNU GPL, version 2 or
> > + * later.  See the COPYING file in the top-level directory.
> > + */
> > +
> > +#include "sysemu/block-backend.h"
> > +#include "block/block_int.h"
> > +
> > +struct BlockBackend {
> > +    char *name;
> > +    int refcnt;
> > +    QTAILQ_ENTRY(BlockBackend) link; /* for blk_backends */
> > +};
> > +
> > +static QTAILQ_HEAD(, BlockBackend) blk_backends =
> > +    QTAILQ_HEAD_INITIALIZER(blk_backends);
> > +
> > +/**
> > + * blk_new:
> > + * @name: name, must not be %NULL or empty
> > + * @errp: return location for an error to be set on failure, or %NULL
> > + *
> > + * Create a new BlockBackend, with a reference count of one.  Fail if
> > + * @name already exists.
> > + *
> > + * Returns: the BlockBackend on success, %NULL on failure
> > + */
> > +BlockBackend *blk_new(const char *name, Error **errp)
> > +{
> > +    BlockBackend *blk = g_new0(BlockBackend, 1);
> > +
> > +    assert(name && name[0]);
> > +    if (blk_by_name(name)) {
> > +        error_setg(errp, "Device with id '%s' already exists", name);
> > +        return NULL;
> > +    }
> > +    blk->name = g_strdup(name);
> > +    blk->refcnt = 1;
> > +    QTAILQ_INSERT_TAIL(&blk_backends, blk, link);
> > +    return blk;
> > +}
> > +
> > +static void blk_delete(BlockBackend *blk)
> > +{
> > +    assert(!blk->refcnt);
> > +    QTAILQ_REMOVE(&blk_backends, blk, link);
> > +    g_free(blk->name);
> > +    g_free(blk);
> > +}
> > +
> > +/**
> > + * blk_ref:
> > + *
> > + * Increment @blk's reference count.
> > + */
> > +void blk_ref(BlockBackend *blk)
> > +{
> > +    blk->refcnt++;
> > +}
> > +
> > +/**
> > + * blk_unref:
> > + *
> > + * Decrement @blk's reference count.  If this drops it to zero,
> > + * destroy @blk.
> > + */
> > +void blk_unref(BlockBackend *blk)
> > +{
> > +    if (blk) {
> > +        g_assert(blk->refcnt > 0);
> > +        if (!--blk->refcnt) {
> > +            blk_delete(blk);
> > +        }
> > +    }
> > +}
> > +
> > +const char *blk_name(BlockBackend *blk)
> > +{
> > +    return blk->name;
> > +}
> > +
> > +BlockBackend *blk_by_name(const char *name)
> > +{
> > +    BlockBackend *blk;
> > +
> > +    QTAILQ_FOREACH(blk, &blk_backends, link) {
> > +        if (!strcmp(name, blk->name)) {
> > +            return blk;
> > +        }
> > +    }
> > +    return NULL;
> > +}
> > +
> > +/**
> > + * blk_next:
> > + *
> > + * Returns: the first BlockBackend if @blk is null, else @blk's next
> > + * sibling, which is %NULL for the last BlockBackend
> > + */
> > +BlockBackend *blk_next(BlockBackend *blk)
> > +{
> > +    return blk ? QTAILQ_NEXT(blk, link) : QTAILQ_FIRST(&blk_backends);
> > +}
> > diff --git a/blockdev.c b/blockdev.c
> > index 9fbd888..86596bc 100644
> > --- a/blockdev.c
> > +++ b/blockdev.c
> > @@ -30,6 +30,7 @@
> >   * THE SOFTWARE.
> >   */
> >  
> > +#include "sysemu/block-backend.h"
> >  #include "sysemu/blockdev.h"
> >  #include "hw/block/block.h"
> >  #include "block/blockjob.h"
> > @@ -221,6 +222,7 @@ void drive_del(DriveInfo *dinfo)
> >      }
> >  
> >      bdrv_unref(dinfo->bdrv);
> > +    blk_unref(blk_by_name(dinfo->id));
> >      g_free(dinfo->id);
> >      QTAILQ_REMOVE(&drives, dinfo, next);
> >      g_free(dinfo->serial);
> > @@ -301,6 +303,7 @@ static DriveInfo *blockdev_init(const char *file, QDict *bs_opts,
> >      int ro = 0;
> >      int bdrv_flags = 0;
> >      int on_read_error, on_write_error;
> > +    BlockBackend *blk;
> >      DriveInfo *dinfo;
> >      ThrottleConfig cfg;
> >      int snapshot = 0;
> > @@ -456,6 +459,10 @@ static DriveInfo *blockdev_init(const char *file, QDict *bs_opts,
> >      }
> >  
> >      /* init */
> > +    blk = blk_new(qemu_opts_id(opts), errp);
> > +    if (!blk) {
> > +        goto early_err;
> > +    }
> 
> Here you create a new block backend.
> And you don't attach it to anything in any way yet.
> 
> So down in the code the following test will leak it:
>     if (!file || !*file) {                                                      
>         if (has_driver_specific_opts) {                                         
>             file = NULL;                                                        
>         } else {                                                                
>             QDECREF(bs_opts);                                                   
>             qemu_opts_del(opts);                                                
>             return dinfo;                                                       
>         }                                                                       
>     } 
> 
> I am sure one of your next patchs fixes this but for this
> precise commit this do look like a leak.
> 
> >      dinfo = g_malloc0(sizeof(*dinfo));
> >      dinfo->id = g_strdup(qemu_opts_id(opts));
> >      dinfo->bdrv = bdrv_new_named(dinfo->id, &error);
> > @@ -525,6 +532,7 @@ err:
> >  bdrv_new_err:
> >      g_free(dinfo->id);
> >      g_free(dinfo);
> > +    blk_unref(blk);
> >  early_err:
> >      qemu_opts_del(opts);
> >  err_no_opts:
> > @@ -1770,7 +1778,7 @@ int do_drive_del(Monitor *mon, const QDict *qdict, QObject **ret_data)
> >       */
> >      if (bdrv_get_attached_dev(bs)) {
> >          bdrv_make_anon(bs);
> > -
> > +        blk_unref(blk_by_name(id));
> >          /* Further I/O must not pause the guest */
> >          bdrv_set_on_error(bs, BLOCKDEV_ON_ERROR_REPORT,
> >                            BLOCKDEV_ON_ERROR_REPORT);
> > diff --git a/hw/block/xen_disk.c b/hw/block/xen_disk.c
> > index 8bac7ff..730a021 100644
> > --- a/hw/block/xen_disk.c
> > +++ b/hw/block/xen_disk.c
> > @@ -39,6 +39,7 @@
> >  #include "hw/xen/xen_backend.h"
> >  #include "xen_blkif.h"
> >  #include "sysemu/blockdev.h"
> > +#include "sysemu/block-backend.h"
> >  
> >  /* ------------------------------------------------------------- */
> >  
> > @@ -852,12 +853,18 @@ static int blk_connect(struct XenDevice *xendev)
> >      blkdev->dinfo = drive_get(IF_XEN, 0, index);
> >      if (!blkdev->dinfo) {
> >          Error *local_err = NULL;
> > +        BlockBackend *blk;
> >          BlockDriver *drv;
> >  
> >          /* setup via xenbus -> create new block driver instance */
> >          xen_be_printf(&blkdev->xendev, 2, "create new bdrv (xenbus setup)\n");
> > +        blk = blk_new(blkdev->dev, NULL);
> > +        if (!blk) {
> > +            return -1;
> > +        }
> >          blkdev->bs = bdrv_new_named(blkdev->dev, NULL);
> >          if (!blkdev->bs) {
> > +            blk_unref(blk);
> >              return -1;
> >          }
> >  
> > @@ -868,6 +875,7 @@ static int blk_connect(struct XenDevice *xendev)
> >                            error_get_pretty(local_err));
> >              error_free(local_err);
> >              bdrv_unref(blkdev->bs);
> > +            blk_unref(blk);
> >              blkdev->bs = NULL;
> >              return -1;
> >          }
> > @@ -983,6 +991,9 @@ static void blk_disconnect(struct XenDevice *xendev)
> >      if (blkdev->bs) {
> >          bdrv_detach_dev(blkdev->bs, blkdev);
> >          bdrv_unref(blkdev->bs);
> > +        if (!blkdev->dinfo) {
> > +            blk_unref(blk_by_name(blkdev->dev));
> > +        }
> >          blkdev->bs = NULL;
> >      }
> >      xen_be_unbind_evtchn(&blkdev->xendev);
> > diff --git a/include/qemu/typedefs.h b/include/qemu/typedefs.h
> > index 5f20b0e..198da2e 100644
> > --- a/include/qemu/typedefs.h
> > +++ b/include/qemu/typedefs.h
> > @@ -35,6 +35,7 @@ typedef struct MachineClass MachineClass;
> >  typedef struct NICInfo NICInfo;
> >  typedef struct HCIInfo HCIInfo;
> >  typedef struct AudioState AudioState;
> > +typedef struct BlockBackend BlockBackend;
> >  typedef struct BlockDriverState BlockDriverState;
> >  typedef struct DriveInfo DriveInfo;
> >  typedef struct DisplayState DisplayState;
> > diff --git a/include/sysemu/block-backend.h b/include/sysemu/block-backend.h
> > new file mode 100644
> > index 0000000..3f8371c
> > --- /dev/null
> > +++ b/include/sysemu/block-backend.h
> > @@ -0,0 +1,26 @@
> > +/*
> > + * QEMU Block backends
> > + *
> > + * Copyright (C) 2014 Red Hat, Inc.
> > + *
> > + * Authors:
> > + *  Markus Armbruster <armbru@redhat.com>,
> > + *
> > + * This work is licensed under the terms of the GNU GPL, version 2 or
> > + * later.  See the COPYING file in the top-level directory.
> > + */
> > +
> > +#ifndef BLOCK_BACKEND_H
> > +#define BLOCK_BACKEND_H
> > +
> > +#include "qemu/typedefs.h"
> > +#include "qapi/error.h"
> > +
> > +BlockBackend *blk_new(const char *name, Error **errp);
> > +void blk_ref(BlockBackend *blk);
> > +void blk_unref(BlockBackend *blk);
> > +const char *blk_name(BlockBackend *blk);
> > +BlockBackend *blk_by_name(const char *name);
> > +BlockBackend *blk_next(BlockBackend *blk);
> > +
> > +#endif
> > diff --git a/qemu-img.c b/qemu-img.c
> > index 4490a22..bad3f64 100644
> > --- a/qemu-img.c
> > +++ b/qemu-img.c
> > @@ -29,6 +29,7 @@
> >  #include "qemu/error-report.h"
> >  #include "qemu/osdep.h"
> >  #include "sysemu/sysemu.h"
> > +#include "sysemu/block-backend.h"
> >  #include "block/block_int.h"
> >  #include "block/qapi.h"
> >  #include <getopt.h>
> > @@ -575,6 +576,7 @@ static int img_check(int argc, char **argv)
> >      int c, ret;
> >      OutputFormat output_format = OFORMAT_HUMAN;
> >      const char *filename, *fmt, *output, *cache;
> > +    BlockBackend *blk;
> >      BlockDriverState *bs;
> >      int fix = 0;
> >      int flags = BDRV_O_FLAGS | BDRV_O_CHECK;
> > @@ -649,6 +651,7 @@ static int img_check(int argc, char **argv)
> >          return 1;
> >      }
> >  
> 
> > +    blk = blk_new("image", &error_abort);
> Hmm we are so sure this will work that we don't do if (!block) ?

Ok I understood we are sure because we control the id and won't use twice the same.

> 
> >      bs = bdrv_new_open("image", filename, fmt, flags, true, quiet);
> >      if (!bs) {
> >          return 1;
> > @@ -710,6 +713,7 @@ static int img_check(int argc, char **argv)
> >  fail:
> >      qapi_free_ImageCheck(check);
> >      bdrv_unref(bs);
> > +    blk_unref(blk);
> >  
> >      return ret;
> >  }
> > @@ -718,6 +722,7 @@ static int img_commit(int argc, char **argv)
> >  {
> >      int c, ret, flags;
> >      const char *filename, *fmt, *cache;
> > +    BlockBackend *blk;
> >      BlockDriverState *bs;
> >      bool quiet = false;
> >  
> > @@ -756,6 +761,7 @@ static int img_commit(int argc, char **argv)
> >          return 1;
> >      }
> >  
> > +    blk = blk_new("image", &error_abort);
> >      bs = bdrv_new_open("image", filename, fmt, flags, true, quiet);
> >      if (!bs) {
> >          return 1;
> > @@ -780,6 +786,7 @@ static int img_commit(int argc, char **argv)
> >      }
> >  
> >      bdrv_unref(bs);
> > +    blk_unref(blk);
> >      if (ret) {
> >          return 1;
> >      }
> > @@ -942,6 +949,7 @@ static int check_empty_sectors(BlockDriverState *bs, int64_t sect_num,
> >  static int img_compare(int argc, char **argv)
> >  {
> >      const char *fmt1 = NULL, *fmt2 = NULL, *cache, *filename1, *filename2;
> > +    BlockBackend *blk1, *blk2;
> >      BlockDriverState *bs1, *bs2;
> >      int64_t total_sectors1, total_sectors2;
> >      uint8_t *buf1 = NULL, *buf2 = NULL;
> > @@ -1011,6 +1019,7 @@ static int img_compare(int argc, char **argv)
> >          goto out3;
> >      }
> >  
> > +    blk1 = blk_new("image 1", &error_abort);
> >      bs1 = bdrv_new_open("image 1", filename1, fmt1, flags, true, quiet);
> >      if (!bs1) {
> >          error_report("Can't open file %s", filename1);
> > @@ -1018,6 +1027,7 @@ static int img_compare(int argc, char **argv)
> >          goto out3;
> >      }
> >  
> > +    blk2 = blk_new("image 2", &error_abort);
> >      bs2 = bdrv_new_open("image 2", filename2, fmt2, flags, true, quiet);
> >      if (!bs2) {
> >          error_report("Can't open file %s", filename2);
> > @@ -1184,10 +1194,12 @@ static int img_compare(int argc, char **argv)
> >  
> >  out:
> >      bdrv_unref(bs2);
> > +    blk_unref(blk2);
> >      qemu_vfree(buf1);
> >      qemu_vfree(buf2);
> >  out2:
> >      bdrv_unref(bs1);
> > +    blk_unref(blk1);
> >  out3:
> >      qemu_progress_end();
> >      return ret;
> > @@ -1200,6 +1212,7 @@ static int img_convert(int argc, char **argv)
> >      int progress = 0, flags, src_flags;
> >      const char *fmt, *out_fmt, *cache, *src_cache, *out_baseimg, *out_filename;
> >      BlockDriver *drv, *proto_drv;
> > +    BlockBackend **blk = NULL, *out_blk = NULL;
> >      BlockDriverState **bs = NULL, *out_bs = NULL;
> >      int64_t total_sectors, nb_sectors, sector_num, bs_offset;
> >      int64_t *bs_sectors = NULL;
> > @@ -1354,6 +1367,7 @@ static int img_convert(int argc, char **argv)
> >  
> >      qemu_progress_print(0, 100);
> >  
> > +    blk = g_new0(BlockBackend *, bs_n);
> >      bs = g_new0(BlockDriverState *, bs_n);
> >      bs_sectors = g_new(int64_t, bs_n);
> >  
> > @@ -1361,6 +1375,7 @@ static int img_convert(int argc, char **argv)
> >      for (bs_i = 0; bs_i < bs_n; bs_i++) {
> >          char *id = bs_n > 1 ? g_strdup_printf("source %d", bs_i)
> >                              : g_strdup("source");
> > +        blk[bs_i] = blk_new(id, &error_abort);
> >          bs[bs_i] = bdrv_new_open(id, argv[optind + bs_i], fmt, src_flags,
> >                                   true, quiet);
> >          g_free(id);
> > @@ -1486,6 +1501,7 @@ static int img_convert(int argc, char **argv)
> >          goto out;
> >      }
> >  
> > +    out_blk = blk_new("target", &error_abort);
> >      out_bs = bdrv_new_open("target", out_filename, out_fmt, flags, true, quiet);
> >      if (!out_bs) {
> >          ret = -1;
> > @@ -1742,6 +1758,7 @@ out:
> >      if (out_bs) {
> >          bdrv_unref(out_bs);
> >      }
> > +    blk_unref(out_blk);
> >      if (bs) {
> >          for (bs_i = 0; bs_i < bs_n; bs_i++) {
> >              if (bs[bs_i]) {
> > @@ -1750,6 +1767,12 @@ out:
> >          }
> >          g_free(bs);
> >      }
> > +    if (blk) {
> > +        for (bs_i = 0; bs_i < bs_n; bs_i++) {
> > +            blk_unref(blk[bs_i]);
> > +        }
> > +        g_free(blk);
> > +    }
> >      g_free(bs_sectors);
> >  fail_getopt:
> >      g_free(options);
> > @@ -1858,6 +1881,7 @@ static ImageInfoList *collect_image_info_list(const char *filename,
> >      filenames = g_hash_table_new_full(g_str_hash, str_equal_func, NULL, NULL);
> >  
> >      while (filename) {
> > +        BlockBackend *blk;
> >          BlockDriverState *bs;
> >          ImageInfo *info;
> >          ImageInfoList *elem;
> > @@ -1869,6 +1893,7 @@ static ImageInfoList *collect_image_info_list(const char *filename,
> >          }
> >          g_hash_table_insert(filenames, (gpointer)filename, NULL);
> >  
> > +        blk = blk_new("image", &error_abort);
> >          bs = bdrv_new_open("image", filename, fmt,
> >                             BDRV_O_FLAGS | BDRV_O_NO_BACKING, false, false);
> >          if (!bs) {
> 
> I think it misses an 
> > +            blk_unref(blk);
> in if(!bs) branch.
> 
> > @@ -1880,6 +1905,7 @@ static ImageInfoList *collect_image_info_list(const char *filename,
> >              error_report("%s", error_get_pretty(err));
> >              error_free(err);
> >              bdrv_unref(bs);
> > +            blk_unref(blk);
> >              goto err;
> >          }
> >  
> > @@ -1889,6 +1915,7 @@ static ImageInfoList *collect_image_info_list(const char *filename,
> >          last = &elem->next;
> >  
> >          bdrv_unref(bs);
> > +        blk_unref(blk);
> >  
> >          filename = fmt = NULL;
> >          if (chain) {
> > @@ -2082,6 +2109,7 @@ static int img_map(int argc, char **argv)
> >  {
> >      int c;
> >      OutputFormat output_format = OFORMAT_HUMAN;
> > +    BlockBackend *blk;
> >      BlockDriverState *bs;
> >      const char *filename, *fmt, *output;
> >      int64_t length;
> > @@ -2130,6 +2158,7 @@ static int img_map(int argc, char **argv)
> >          return 1;
> >      }
> >  
> > +    blk = blk_new("image", &error_abort);
> >      bs = bdrv_new_open("image", filename, fmt, BDRV_O_FLAGS, true, false);
> >      if (!bs) {
> >          return 1;
> > @@ -2175,6 +2204,7 @@ static int img_map(int argc, char **argv)
> >  
> >  out:
> >      bdrv_unref(bs);
> > +    blk_unref(blk);
> >      return ret < 0;
> >  }
> >  
> > @@ -2185,6 +2215,7 @@ out:
> >  
> >  static int img_snapshot(int argc, char **argv)
> >  {
> > +    BlockBackend *blk;
> >      BlockDriverState *bs;
> >      QEMUSnapshotInfo sn;
> >      char *filename, *snapshot_name = NULL;
> > @@ -2250,6 +2281,7 @@ static int img_snapshot(int argc, char **argv)
> >      filename = argv[optind++];
> >  
> >      /* Open the image */
> > +    blk = blk_new("image", &error_abort);
> >      bs = bdrv_new_open("image", filename, NULL, bdrv_oflags, true, quiet);
> >      if (!bs) {
> >          return 1;
> > @@ -2297,6 +2329,7 @@ static int img_snapshot(int argc, char **argv)
> >  
> >      /* Cleanup */
> >      bdrv_unref(bs);
> > +    blk_unref(blk);
> >      if (ret) {
> >          return 1;
> >      }
> > @@ -2305,6 +2338,7 @@ static int img_snapshot(int argc, char **argv)
> >  
> >  static int img_rebase(int argc, char **argv)
> >  {
> > +    BlockBackend *blk = NULL, *blk_old_backing = NULL, *blk_new_backing = NULL;
> >      BlockDriverState *bs = NULL, *bs_old_backing = NULL, *bs_new_backing = NULL;
> >      BlockDriver *old_backing_drv, *new_backing_drv;
> >      char *filename;
> > @@ -2393,6 +2427,7 @@ static int img_rebase(int argc, char **argv)
> >       * Ignore the old backing file for unsafe rebase in case we want to correct
> >       * the reference to a renamed or moved backing file.
> >       */
> > +    blk = blk_new("image", &error_abort);
> >      bs = bdrv_new_open("image", filename, fmt, flags, true, quiet);
> >      if (!bs) {
> >          ret = -1;
> > @@ -2425,6 +2460,7 @@ static int img_rebase(int argc, char **argv)
> >      if (!unsafe) {
> >          char backing_name[1024];
> >  
> > +        blk_old_backing = blk_new("old_backing", &error_abort);
> >          bs_old_backing = bdrv_new_named("old_backing", &error_abort);
> >          bdrv_get_backing_filename(bs, backing_name, sizeof(backing_name));
> >          ret = bdrv_open(&bs_old_backing, backing_name, NULL, NULL, src_flags,
> > @@ -2436,6 +2472,7 @@ static int img_rebase(int argc, char **argv)
> >              goto out;
> >          }
> >          if (out_baseimg[0]) {
> > +            blk_new_backing = blk_new("new_backing", &error_abort);
> >              bs_new_backing = bdrv_new_named("new_backing", &error_abort);
> >              ret = bdrv_open(&bs_new_backing, out_baseimg, NULL, NULL, src_flags,
> >                              new_backing_drv, &local_err);
> > @@ -2614,12 +2651,15 @@ out:
> >          if (bs_old_backing != NULL) {
> >              bdrv_unref(bs_old_backing);
> >          }
> > +        blk_unref(blk_old_backing);
> >          if (bs_new_backing != NULL) {
> >              bdrv_unref(bs_new_backing);
> >          }
> > +        blk_unref(blk_new_backing);
> >      }
> >  
> >      bdrv_unref(bs);
> > +    blk_unref(blk);
> >      if (ret) {
> >          return 1;
> >      }
> > @@ -2632,6 +2672,7 @@ static int img_resize(int argc, char **argv)
> >      const char *filename, *fmt, *size;
> >      int64_t n, total_size;
> >      bool quiet = false;
> > +    BlockBackend *blk = NULL;
> >      BlockDriverState *bs = NULL;
> >      QemuOpts *param;
> >      static QemuOptsList resize_options = {
> > @@ -2708,6 +2749,7 @@ static int img_resize(int argc, char **argv)
> >      n = qemu_opt_get_size(param, BLOCK_OPT_SIZE, 0);
> >      qemu_opts_del(param);
> >  
> > +    blk = blk_new("image", &error_abort);
> >      bs = bdrv_new_open("image", filename, fmt, BDRV_O_FLAGS | BDRV_O_RDWR,
> >                         true, quiet);
> >      if (!bs) {
> > @@ -2745,6 +2787,7 @@ out:
> >      if (bs) {
> >          bdrv_unref(bs);
> >      }
> > +    blk_unref(blk);
> >      if (ret) {
> >          return 1;
> >      }
> > @@ -2760,6 +2803,7 @@ static int img_amend(int argc, char **argv)
> >      const char *fmt = NULL, *filename, *cache;
> >      int flags;
> >      bool quiet = false;
> > +    BlockBackend *blk = NULL;
> >      BlockDriverState *bs = NULL;
> >  
> >      cache = BDRV_DEFAULT_CACHE;
> > @@ -2823,6 +2867,7 @@ static int img_amend(int argc, char **argv)
> >          goto out;
> >      }
> >  
> > +    blk = blk_new("image", &error_abort);
> >      bs = bdrv_new_open("image", filename, fmt, flags, true, quiet);
> >      if (!bs) {
> >          error_report("Could not open image '%s'", filename);
> > @@ -2856,6 +2901,7 @@ out:
> >      if (bs) {
> >          bdrv_unref(bs);
> >      }
> > +    blk_unref(blk);
> >      qemu_opts_del(opts);
> >      qemu_opts_free(create_opts);
> >      g_free(options);
> > diff --git a/qemu-io.c b/qemu-io.c
> > index 44c2e1c..45e5494 100644
> > --- a/qemu-io.c
> > +++ b/qemu-io.c
> > @@ -19,6 +19,7 @@
> >  #include "qemu/option.h"
> >  #include "qemu/config-file.h"
> >  #include "qemu/readline.h"
> > +#include "sysemu/block-backend.h"
> >  #include "block/block_int.h"
> >  #include "trace/control.h"
> >  
> > @@ -26,6 +27,7 @@
> >  
> >  static char *progname;
> >  
> > +static BlockBackend *qemuio_blk;
> >  static BlockDriverState *qemuio_bs;
> >  
> >  /* qemu-io commands passed using -c */
> > @@ -37,7 +39,9 @@ static ReadLineState *readline_state;
> >  static int close_f(BlockDriverState *bs, int argc, char **argv)
> >  {
> >      bdrv_unref(bs);
> > +    blk_unref(qemuio_blk);
> >      qemuio_bs = NULL;
> > +    qemuio_blk = NULL;
> >      return 0;
> >  }
> >  
> > @@ -58,6 +62,7 @@ static int openfile(char *name, int flags, int growable, QDict *opts)
> >          return 1;
> >      }
> >  
> > +    qemuio_blk = blk_new("hda", &error_abort);
> 
> >      qemuio_bs = bdrv_new_named("hda", &error_abort);
> I see accepting that an allocation _will_ work is the qemu tools style.
> 
> >  
> >      if (growable) {
> > @@ -70,7 +75,9 @@ static int openfile(char *name, int flags, int growable, QDict *opts)
> >                  error_get_pretty(local_err));
> >          error_free(local_err);
> >          bdrv_unref(qemuio_bs);
> > +        blk_unref(qemuio_blk);
> >          qemuio_bs = NULL;
> > +        qemuio_blk = NULL;
> >          return 1;
> >      }
> >  
> > @@ -479,6 +486,7 @@ int main(int argc, char **argv)
> >      if (qemuio_bs) {
> >          bdrv_unref(qemuio_bs);
> >      }
> > +    blk_unref(qemuio_blk);
> >      g_free(readline_state);
> >      return 0;
> >  }
> > diff --git a/qemu-nbd.c b/qemu-nbd.c
> > index a56ebfc..94b9b49 100644
> > --- a/qemu-nbd.c
> > +++ b/qemu-nbd.c
> > @@ -17,7 +17,7 @@
> >   */
> >  
> >  #include "qemu-common.h"
> > -#include "block/block.h"
> > +#include "sysemu/block-backend.h"
> >  #include "block/block_int.h"
> >  #include "block/nbd.h"
> >  #include "qemu/main-loop.h"
> > @@ -687,6 +687,7 @@ int main(int argc, char **argv)
> >          drv = NULL;
> >      }
> >  
> > +    blk_new("hda", &error_abort);
> >      bs = bdrv_new_named("hda", &error_abort);
> >  
> >      srcpath = argv[optind];
> > -- 
> > 1.9.3
> > 
> >
Markus Armbruster Sept. 11, 2014, 10:03 a.m. UTC | #7
Kevin Wolf <kwolf@redhat.com> writes:

> Am 10.09.2014 um 10:13 hat Markus Armbruster geschrieben:
>> A block device consists of a frontend device model and a backend.
>> 
>> A block backend has a tree of block drivers doing the actual work.
>> The tree is managed by the block layer.
>> 
>> We currently use a single abstraction BlockDriverState both for tree
>> nodes and the backend as a whole.  Drawbacks:
>> 
>> * Its API includes both stuff that makes sense only at the block
>>   backend level (root of the tree) and stuff that's only for use
>>   within the block layer.  This makes the API bigger and more complex
>>   than necessary.  Moreover, it's not obvious which interfaces are
>>   meant for device models, and which really aren't.
>> 
>> * Since device models keep a reference to their backend, the backend
>>   object can't just be destroyed.  But for media change, we need to
>>   replace the tree.  Our solution is to make the BlockDriverState
>>   generic, with actual driver state in a separate object, pointed to
>>   by member opaque.  That lets us replace the tree by deinitializing
>>   and reinitializing its root.  This special need of the root makes
>>   the data structure awkward everywhere in the tree.
>> 
>> The general plan is to separate the APIs into "block backend", for use
>> by device models, monitor and whatever other code dealing with block
>> backends, and "block driver", for use by the block layer and whatever
>> other code (if any) dealing with trees and tree nodes.
>> 
>> Code dealing with block backends, device models in particular, should
>> become completely oblivious of BlockDriverState.  This should let us
>> clean up both APIs, and the tree data structures.
>> 
>> This commit is a first step.  It creates a minimal "block backend"
>> API: type BlockBackend and functions to create, destroy and find them.
>> BlockBackend objects are created and destroyed, but not yet used for
>> anything; that'll come shortly.
>> 
>> BlockBackend is reference-counted.  Its reference count never exceeds
>> one so far, but that's going to change.
>> 
>> Signed-off-by: Markus Armbruster <armbru@redhat.com>
>> ---
>>  block/Makefile.objs            |   2 +-
>>  block/block-backend.c          | 110 +++++++++++++++++++++++++++++++++++++++++
>>  blockdev.c                     |  10 +++-
>>  hw/block/xen_disk.c            |  11 +++++
>>  include/qemu/typedefs.h        |   1 +
>>  include/sysemu/block-backend.h |  26 ++++++++++
>>  qemu-img.c                     |  46 +++++++++++++++++
>>  qemu-io.c                      |   8 +++
>>  qemu-nbd.c                     |   3 +-
>>  9 files changed, 214 insertions(+), 3 deletions(-)
>>  create mode 100644 block/block-backend.c
>>  create mode 100644 include/sysemu/block-backend.h
>> 
>> diff --git a/block/Makefile.objs b/block/Makefile.objs
>> index f45f939..a70140b 100644
>> --- a/block/Makefile.objs
>> +++ b/block/Makefile.objs
>> @@ -5,7 +5,7 @@ block-obj-y += qed-check.o
>>  block-obj-$(CONFIG_VHDX) += vhdx.o vhdx-endian.o vhdx-log.o
>>  block-obj-$(CONFIG_QUORUM) += quorum.o
>>  block-obj-y += parallels.o blkdebug.o blkverify.o
>> -block-obj-y += snapshot.o qapi.o
>> +block-obj-y += block-backend.o snapshot.o qapi.o
>>  block-obj-$(CONFIG_WIN32) += raw-win32.o win32-aio.o
>>  block-obj-$(CONFIG_POSIX) += raw-posix.o
>>  block-obj-$(CONFIG_LINUX_AIO) += linux-aio.o
>> diff --git a/block/block-backend.c b/block/block-backend.c
>> new file mode 100644
>> index 0000000..833f7d9
>> --- /dev/null
>> +++ b/block/block-backend.c
>> @@ -0,0 +1,110 @@
>> +/*
>> + * QEMU Block backends
>> + *
>> + * Copyright (C) 2014 Red Hat, Inc.
>> + *
>> + * Authors:
>> + *  Markus Armbruster <armbru@redhat.com>,
>> + *
>> + * This work is licensed under the terms of the GNU GPL, version 2 or
>> + * later.  See the COPYING file in the top-level directory.
>> + */
>
> I think we still have the long-term plan of exposing a block layer
> library that can be consumed by libvirt. As the usage in qemu-io/img/nbd
> shows, this will probably have to use BlockBackends, so this code is part
> of the block layer core.
>
> Considering this, using the LGPL would be more practical. Can you please
> make this change for v2? (Personally, I would have used the MIT license
> that the rest of the block layer uses, which also make copying code
> around cleaner license-wise, but I know you dislike it.)

I do.

Having to accept the Lesser GPL's leaching loophole annoys me, but the
libvirt licensing boat has long sailed.

>> +#include "sysemu/block-backend.h"
>> +#include "block/block_int.h"
>> +
>> +struct BlockBackend {
>> +    char *name;
>> +    int refcnt;
>> +    QTAILQ_ENTRY(BlockBackend) link; /* for blk_backends */
>> +};
>> +
>> +static QTAILQ_HEAD(, BlockBackend) blk_backends =
>> +    QTAILQ_HEAD_INITIALIZER(blk_backends);
>> +
>> +/**
>> + * blk_new:
>> + * @name: name, must not be %NULL or empty
>> + * @errp: return location for an error to be set on failure, or %NULL
>> + *
>> + * Create a new BlockBackend, with a reference count of one.  Fail if
>> + * @name already exists.
>> + *
>> + * Returns: the BlockBackend on success, %NULL on failure
>> + */
>> +BlockBackend *blk_new(const char *name, Error **errp)
>> +{
>> +    BlockBackend *blk = g_new0(BlockBackend, 1);
>> +
>> +    assert(name && name[0]);
>> +    if (blk_by_name(name)) {
>> +        error_setg(errp, "Device with id '%s' already exists", name);
>> +        return NULL;
>
> blk is leaked here.

Fixed.

>> +    }
>> +    blk->name = g_strdup(name);
>> +    blk->refcnt = 1;
>> +    QTAILQ_INSERT_TAIL(&blk_backends, blk, link);
>> +    return blk;
>> +}
>> +
>> +static void blk_delete(BlockBackend *blk)
>> +{
>> +    assert(!blk->refcnt);
>> +    QTAILQ_REMOVE(&blk_backends, blk, link);
>> +    g_free(blk->name);
>> +    g_free(blk);
>> +}
>> +
>> +/**
>> + * blk_ref:
>> + *
>> + * Increment @blk's reference count.
>> + */
>> +void blk_ref(BlockBackend *blk)
>> +{
>> +    blk->refcnt++;
>> +}
>> +
>> +/**
>> + * blk_unref:
>> + *
>> + * Decrement @blk's reference count.  If this drops it to zero,
>> + * destroy @blk.
>> + */
>> +void blk_unref(BlockBackend *blk)
>> +{
>> +    if (blk) {
>> +        g_assert(blk->refcnt > 0);
>
> You're mixing assert() and g_assert() in this patch. Any reason for
> this?

Stupidity?

>       If not, I think plain assert() is clearly in the majority in the
> overall codebase.

Fixed.

>> +        if (!--blk->refcnt) {
>> +            blk_delete(blk);
>> +        }
>> +    }
>> +}
>> +
>> +const char *blk_name(BlockBackend *blk)
>> +{
>> +    return blk->name;
>> +}
>> +
>> +BlockBackend *blk_by_name(const char *name)
>> +{
>> +    BlockBackend *blk;
>> +
>> +    QTAILQ_FOREACH(blk, &blk_backends, link) {
>> +        if (!strcmp(name, blk->name)) {
>> +            return blk;
>> +        }
>> +    }
>> +    return NULL;
>> +}
>
> No comment for these two non-static functions?

I considered the abysmal signal-to-noise ratio of their GTK-Doc-style
function comments, and balked.

Considering we're not using this style in the block layer much, what do
you think about me abandoning this GTK-doc business, and adding
*concise* function comments to all my new public functions instead?

>> +/**
>> + * blk_next:
>> + *
>> + * Returns: the first BlockBackend if @blk is null, else @blk's next
>> + * sibling, which is %NULL for the last BlockBackend
>> + */
>> +BlockBackend *blk_next(BlockBackend *blk)
>> +{
>> +    return blk ? QTAILQ_NEXT(blk, link) : QTAILQ_FIRST(&blk_backends);
>> +}
>> diff --git a/blockdev.c b/blockdev.c
>> index 9fbd888..86596bc 100644
>> --- a/blockdev.c
>> +++ b/blockdev.c
>
> Okay, so here the hard part starts: As long as the BB is completely
> unused, it's very hard to review at which places one must be created and
> deleted.
>
> What was your approach to systematically find all of them?

Good question!  Fortunately, I have an answer ready :)

We want to create a BB exactly when we're creating a named BDS.  "Named"
in the sense of "in bdrv_states".

We want to destroy a BB exactly when we're destroying the BDS that
motivated its creation.

This is a baby step towards having named BDSes owned by a BB.  That'll
be done by PATCH 05.

The places creating a named BDS are all clearly visible in PATCH 01,
because I rename the function doing that to bdrv_new_named().

This patch adds a blk_new() right next to every bdrv_new_named(), except
for qemu-img.c.  qemu-img.c calls bdrv_new_named() in bdrv_new_open().
I can't easily call blk_new() there, because the callers need the new BB
to be able to destroy it, but I can't easily return the new BB in
addition to the new BDS.  So I call blk_new() right before every
bdrv_new_open() instead.

BB destruction isn't quite as obvious, because destruction of named and
nameless BDSes looks the same in the code.  Either you examine all
bdrv_unref() and figure out whether it's named, and if yes, where you
can get the BB you need to unref here.  Or you figure out for every
allocation of a named BDS where it can be destroyed, and add the BB
destruction there.  That's what I did.

>> @@ -30,6 +30,7 @@
>>   * THE SOFTWARE.
>>   */
>>  
>> +#include "sysemu/block-backend.h"
>>  #include "sysemu/blockdev.h"
>>  #include "hw/block/block.h"
>>  #include "block/blockjob.h"
>> @@ -221,6 +222,7 @@ void drive_del(DriveInfo *dinfo)
>>      }
>>  
>>      bdrv_unref(dinfo->bdrv);
>> +    blk_unref(blk_by_name(dinfo->id));
>>      g_free(dinfo->id);
>>      QTAILQ_REMOVE(&drives, dinfo, next);
>>      g_free(dinfo->serial);
>> @@ -301,6 +303,7 @@ static DriveInfo *blockdev_init(const char *file, QDict *bs_opts,
>>      int ro = 0;
>>      int bdrv_flags = 0;
>>      int on_read_error, on_write_error;
>> +    BlockBackend *blk;
>>      DriveInfo *dinfo;
>>      ThrottleConfig cfg;
>>      int snapshot = 0;
>> @@ -456,6 +459,10 @@ static DriveInfo *blockdev_init(const char *file, QDict *bs_opts,
>>      }
>>  
>>      /* init */
>> +    blk = blk_new(qemu_opts_id(opts), errp);
>> +    if (!blk) {
>> +        goto early_err;
>> +    }
>>      dinfo = g_malloc0(sizeof(*dinfo));
>>      dinfo->id = g_strdup(qemu_opts_id(opts));
>>      dinfo->bdrv = bdrv_new_named(dinfo->id, &error);
>> @@ -525,6 +532,7 @@ err:
>>  bdrv_new_err:
>>      g_free(dinfo->id);
>>      g_free(dinfo);
>> +    blk_unref(blk);
>>  early_err:
>>      qemu_opts_del(opts);
>>  err_no_opts:
>> @@ -1770,7 +1778,7 @@ int do_drive_del(Monitor *mon, const QDict *qdict, QObject **ret_data)
>>       */
>>      if (bdrv_get_attached_dev(bs)) {
>>          bdrv_make_anon(bs);
>> -
>> +        blk_unref(blk_by_name(id));
>>          /* Further I/O must not pause the guest */
>>          bdrv_set_on_error(bs, BLOCKDEV_ON_ERROR_REPORT,
>>                            BLOCKDEV_ON_ERROR_REPORT);
>
> Won't we unref the BB a second time now when unplugging the device?
> (drive_del() called in blockdev_auto_del())

Short answer: you're right, there's a bug, and I'll fix it.

Long answer: this part is hairy, because the drive_del command is badly
designed.

For historical reasons, unplugging a device model destroys the block
backends it's attached to, and this is the only way to destroy block
backends.

Aside: we're not carrying that misfeature forward to blockdev-add.

For some device models, the guest can prevent unplug.  Some users need a
way to forcibly revoke device model access to the block backend then, so
the underlying images can be safely used for something else.

drive_del lets you do that.  Unfortunately, it conflates revoking access
with destroying the backend.

Commit 9063f81 makes drive_del immediately destroy the root BDS.  Nice:
the device name becomes available for reuse immediately.  Not so nice:
the device model's pointer to the root BDS dangles, and we're prone to
crash when the memory gets reused.

Commit d22b2f4 fixed that by hiding the root BDS instead of destroying
it.  Destruction only happens on unplug.  "Hiding" means removing it
from bdrv_states and graph_bdrv_states; see bdrv_make_anon().

We should've limited the command to revoking access, avoiding this silly
hiding business.

The obvious thing to do here is match the mess: hide the BB along with
the BDS here, delete it in blockdev_auto_del().

Trouble is that hiding it makes it hard to find in blockdev_auto_del().

I tried to avoid the need to find it there by destroying it here.  On
unplug, drive_del()'s blk_unref(blk_by_name(dinfo->id)) won't do
anything, because blk_by_name() returns NULL.  *Except* when the user
has since added *another* BB with the same name!  Oops...

Simplest possible solution: I hide the BB here, and *leak* it (with a
fat FIXME comment) until it becomes easy enough to find.  I guess I can
find it right in the next patch.

>> diff --git a/hw/block/xen_disk.c b/hw/block/xen_disk.c
>> index 8bac7ff..730a021 100644
>> --- a/hw/block/xen_disk.c
>> +++ b/hw/block/xen_disk.c
>> @@ -39,6 +39,7 @@
>>  #include "hw/xen/xen_backend.h"
>>  #include "xen_blkif.h"
>>  #include "sysemu/blockdev.h"
>> +#include "sysemu/block-backend.h"
>>  
>>  /* ------------------------------------------------------------- */
>>  
>> @@ -852,12 +853,18 @@ static int blk_connect(struct XenDevice *xendev)
>>      blkdev->dinfo = drive_get(IF_XEN, 0, index);
>>      if (!blkdev->dinfo) {
>>          Error *local_err = NULL;
>> +        BlockBackend *blk;
>>          BlockDriver *drv;
>>  
>>          /* setup via xenbus -> create new block driver instance */
>>          xen_be_printf(&blkdev->xendev, 2, "create new bdrv (xenbus setup)\n");
>> +        blk = blk_new(blkdev->dev, NULL);
>> +        if (!blk) {
>> +            return -1;
>> +        }
>>          blkdev->bs = bdrv_new_named(blkdev->dev, NULL);
>>          if (!blkdev->bs) {
>> +            blk_unref(blk);
>>              return -1;
>>          }
>>  
>> @@ -868,6 +875,7 @@ static int blk_connect(struct XenDevice *xendev)
>>                            error_get_pretty(local_err));
>>              error_free(local_err);
>>              bdrv_unref(blkdev->bs);
>> +            blk_unref(blk);
>>              blkdev->bs = NULL;
>>              return -1;
>>          }
>> @@ -983,6 +991,9 @@ static void blk_disconnect(struct XenDevice *xendev)
>>      if (blkdev->bs) {
>>          bdrv_detach_dev(blkdev->bs, blkdev);
>>          bdrv_unref(blkdev->bs);
>> +        if (!blkdev->dinfo) {
>> +            blk_unref(blk_by_name(blkdev->dev));
>> +        }
>>          blkdev->bs = NULL;
>>      }
>>      xen_be_unbind_evtchn(&blkdev->xendev);
>> diff --git a/include/qemu/typedefs.h b/include/qemu/typedefs.h
>> index 5f20b0e..198da2e 100644
>> --- a/include/qemu/typedefs.h
>> +++ b/include/qemu/typedefs.h
>> @@ -35,6 +35,7 @@ typedef struct MachineClass MachineClass;
>>  typedef struct NICInfo NICInfo;
>>  typedef struct HCIInfo HCIInfo;
>>  typedef struct AudioState AudioState;
>> +typedef struct BlockBackend BlockBackend;
>>  typedef struct BlockDriverState BlockDriverState;
>>  typedef struct DriveInfo DriveInfo;
>>  typedef struct DisplayState DisplayState;
>> diff --git a/include/sysemu/block-backend.h b/include/sysemu/block-backend.h
>> new file mode 100644
>> index 0000000..3f8371c
>> --- /dev/null
>> +++ b/include/sysemu/block-backend.h
>> @@ -0,0 +1,26 @@
>> +/*
>> + * QEMU Block backends
>> + *
>> + * Copyright (C) 2014 Red Hat, Inc.
>> + *
>> + * Authors:
>> + *  Markus Armbruster <armbru@redhat.com>,
>> + *
>> + * This work is licensed under the terms of the GNU GPL, version 2 or
>> + * later.  See the COPYING file in the top-level directory.
>> + */
>> +
>> +#ifndef BLOCK_BACKEND_H
>> +#define BLOCK_BACKEND_H
>> +
>> +#include "qemu/typedefs.h"
>> +#include "qapi/error.h"
>> +
>> +BlockBackend *blk_new(const char *name, Error **errp);
>> +void blk_ref(BlockBackend *blk);
>> +void blk_unref(BlockBackend *blk);
>> +const char *blk_name(BlockBackend *blk);
>> +BlockBackend *blk_by_name(const char *name);
>> +BlockBackend *blk_next(BlockBackend *blk);
>> +
>> +#endif
>> diff --git a/qemu-img.c b/qemu-img.c
>> index 4490a22..bad3f64 100644
>> --- a/qemu-img.c
>> +++ b/qemu-img.c
>
> Won't comment on each hunk in qemu-img, but in many cases, on
> bdrv_new_open() failure, blk is leaked.

I'll check them systematically.

>> diff --git a/qemu-nbd.c b/qemu-nbd.c
>> index a56ebfc..94b9b49 100644
>> --- a/qemu-nbd.c
>> +++ b/qemu-nbd.c
>> @@ -17,7 +17,7 @@
>>   */
>>  
>>  #include "qemu-common.h"
>> -#include "block/block.h"
>> +#include "sysemu/block-backend.h"
>>  #include "block/block_int.h"
>>  #include "block/nbd.h"
>>  #include "qemu/main-loop.h"
>> @@ -687,6 +687,7 @@ int main(int argc, char **argv)
>>          drv = NULL;
>>      }
>>  
>> +    blk_new("hda", &error_abort);
>>      bs = bdrv_new_named("hda", &error_abort);
>>  
>>      srcpath = argv[optind];
>
> Where is the matching blk_unref?

Right next to the bdrv_unref(): nowhere :)

If you like, I can throw in a preliminary patch adding the bdrv_unref().
Then add the matching blk_unref() in patch.
Markus Armbruster Sept. 11, 2014, 10:11 a.m. UTC | #8
Benoît Canet <benoit.canet@irqsave.net> writes:

> The Wednesday 10 Sep 2014  10:13:31 (+0200), Markus Armbruster wrote :
>> A block device consists of a frontend device model and a backend.
>> 
>> A block backend has a tree of block drivers doing the actual work.
>> The tree is managed by the block layer.
>> 
>> We currently use a single abstraction BlockDriverState both for tree
>> nodes and the backend as a whole.  Drawbacks:
>> 
>> * Its API includes both stuff that makes sense only at the block
>>   backend level (root of the tree) and stuff that's only for use
>>   within the block layer.  This makes the API bigger and more complex
>>   than necessary.  Moreover, it's not obvious which interfaces are
>>   meant for device models, and which really aren't.
>> 
>> * Since device models keep a reference to their backend, the backend
>>   object can't just be destroyed.  But for media change, we need to
>>   replace the tree.  Our solution is to make the BlockDriverState
>>   generic, with actual driver state in a separate object, pointed to
>>   by member opaque.  That lets us replace the tree by deinitializing
>>   and reinitializing its root.  This special need of the root makes
>>   the data structure awkward everywhere in the tree.
>> 
>> The general plan is to separate the APIs into "block backend", for use
>> by device models, monitor and whatever other code dealing with block
>> backends, and "block driver", for use by the block layer and whatever
>> other code (if any) dealing with trees and tree nodes.
>> 
>> Code dealing with block backends, device models in particular, should
>> become completely oblivious of BlockDriverState.  This should let us
>> clean up both APIs, and the tree data structures.
>> 
>> This commit is a first step.  It creates a minimal "block backend"
>> API: type BlockBackend and functions to create, destroy and find them.
>> BlockBackend objects are created and destroyed, but not yet used for
>> anything; that'll come shortly.
>> 
>> BlockBackend is reference-counted.  Its reference count never exceeds
>> one so far, but that's going to change.
>> 
>> Signed-off-by: Markus Armbruster <armbru@redhat.com>
>> ---
>>  block/Makefile.objs            |   2 +-
>>  block/block-backend.c          | 110 +++++++++++++++++++++++++++++++++++++++++
>>  blockdev.c                     |  10 +++-
>>  hw/block/xen_disk.c            |  11 +++++
>>  include/qemu/typedefs.h        |   1 +
>>  include/sysemu/block-backend.h |  26 ++++++++++
>>  qemu-img.c                     |  46 +++++++++++++++++
>>  qemu-io.c                      |   8 +++
>>  qemu-nbd.c                     |   3 +-
>>  9 files changed, 214 insertions(+), 3 deletions(-)
>>  create mode 100644 block/block-backend.c
>>  create mode 100644 include/sysemu/block-backend.h
>> 
>> diff --git a/block/Makefile.objs b/block/Makefile.objs
>> index f45f939..a70140b 100644
>> --- a/block/Makefile.objs
>> +++ b/block/Makefile.objs
>> @@ -5,7 +5,7 @@ block-obj-y += qed-check.o
>>  block-obj-$(CONFIG_VHDX) += vhdx.o vhdx-endian.o vhdx-log.o
>>  block-obj-$(CONFIG_QUORUM) += quorum.o
>>  block-obj-y += parallels.o blkdebug.o blkverify.o
>> -block-obj-y += snapshot.o qapi.o
>> +block-obj-y += block-backend.o snapshot.o qapi.o
>>  block-obj-$(CONFIG_WIN32) += raw-win32.o win32-aio.o
>>  block-obj-$(CONFIG_POSIX) += raw-posix.o
>>  block-obj-$(CONFIG_LINUX_AIO) += linux-aio.o
>> diff --git a/block/block-backend.c b/block/block-backend.c
>> new file mode 100644
>> index 0000000..833f7d9
>> --- /dev/null
>> +++ b/block/block-backend.c
>> @@ -0,0 +1,110 @@
>> +/*
>> + * QEMU Block backends
>> + *
>> + * Copyright (C) 2014 Red Hat, Inc.
>> + *
>> + * Authors:
>> + *  Markus Armbruster <armbru@redhat.com>,
>> + *
>> + * This work is licensed under the terms of the GNU GPL, version 2 or
>> + * later.  See the COPYING file in the top-level directory.
>> + */
>> +
>> +#include "sysemu/block-backend.h"
>> +#include "block/block_int.h"
>> +
>> +struct BlockBackend {
>> +    char *name;
>> +    int refcnt;
>> +    QTAILQ_ENTRY(BlockBackend) link; /* for blk_backends */
>> +};
>> +
>> +static QTAILQ_HEAD(, BlockBackend) blk_backends =
>> +    QTAILQ_HEAD_INITIALIZER(blk_backends);
>> +
>> +/**
>> + * blk_new:
>> + * @name: name, must not be %NULL or empty
>> + * @errp: return location for an error to be set on failure, or %NULL
>> + *
>> + * Create a new BlockBackend, with a reference count of one.  Fail if
>> + * @name already exists.
>> + *
>> + * Returns: the BlockBackend on success, %NULL on failure
>> + */
>> +BlockBackend *blk_new(const char *name, Error **errp)
>
> I am responding for the easy part first.
>
> So here the blockbackend is identified by a name
>
>> +{
>> +    BlockBackend *blk = g_new0(BlockBackend, 1);
>> +
>> +    assert(name && name[0]);
>> +    if (blk_by_name(name)) {
>
>> +        error_setg(errp, "Device with id '%s' already exists", name);
>
> But here is it an id or a name ?
> Do we need to make a choice everywhere in the code between id and name ?

If we can agree on a convention to use within the block layer, I'll be
happy to follow it.

Right now, we mix "id", "device", "device name" freely.  My patch
mimicks existing usage: "id" in QemuOpts and some error messages,
"device name" and its abbreviated variations in the code, the schema,
and some other error messages.

>> +        return NULL;
>> +    }
>> +    blk->name = g_strdup(name);
>> +    blk->refcnt = 1;
>> +    QTAILQ_INSERT_TAIL(&blk_backends, blk, link);
>> +    return blk;
>> +}
>> +
>> +static void blk_delete(BlockBackend *blk)
>> +{
>> +    assert(!blk->refcnt);
>> +    QTAILQ_REMOVE(&blk_backends, blk, link);
>> +    g_free(blk->name);
>> +    g_free(blk);
>> +}
>> +
>> +/**
>> + * blk_ref:
>> + *
>> + * Increment @blk's reference count.
>> + */
>> +void blk_ref(BlockBackend *blk)
>> +{
>
> if blk_unref you take care of doing
> +    if (blk) {
> to make sur the user does not pass a NULL pointer.
> Transforming blk into a NULL pointer is not a side effect
> of blk_unref so this test is designed to prevent a user
> brain damage.
>
> If the user can be brain damaged to pass a NULL to blk_unref he
> could be equally stupid passing a NULL to blk_ref.
> Why not adding the same test here ?

Kevin already explained this one.

[...]
Markus Armbruster Sept. 11, 2014, 10:21 a.m. UTC | #9
Benoît Canet <benoit.canet@irqsave.net> writes:

> The Wednesday 10 Sep 2014 à 10:13:31 (+0200), Markus Armbruster wrote :
>> A block device consists of a frontend device model and a backend.
>> 
>> A block backend has a tree of block drivers doing the actual work.
>> The tree is managed by the block layer.
>> 
>> We currently use a single abstraction BlockDriverState both for tree
>> nodes and the backend as a whole.  Drawbacks:
>> 
>> * Its API includes both stuff that makes sense only at the block
>>   backend level (root of the tree) and stuff that's only for use
>>   within the block layer.  This makes the API bigger and more complex
>>   than necessary.  Moreover, it's not obvious which interfaces are
>>   meant for device models, and which really aren't.
>> 
>> * Since device models keep a reference to their backend, the backend
>>   object can't just be destroyed.  But for media change, we need to
>>   replace the tree.  Our solution is to make the BlockDriverState
>>   generic, with actual driver state in a separate object, pointed to
>>   by member opaque.  That lets us replace the tree by deinitializing
>>   and reinitializing its root.  This special need of the root makes
>>   the data structure awkward everywhere in the tree.
>> 
>> The general plan is to separate the APIs into "block backend", for use
>> by device models, monitor and whatever other code dealing with block
>> backends, and "block driver", for use by the block layer and whatever
>> other code (if any) dealing with trees and tree nodes.
>> 
>> Code dealing with block backends, device models in particular, should
>> become completely oblivious of BlockDriverState.  This should let us
>> clean up both APIs, and the tree data structures.
>> 
>> This commit is a first step.  It creates a minimal "block backend"
>> API: type BlockBackend and functions to create, destroy and find them.
>> BlockBackend objects are created and destroyed, but not yet used for
>> anything; that'll come shortly.
>> 
>> BlockBackend is reference-counted.  Its reference count never exceeds
>> one so far, but that's going to change.
>> 
>> Signed-off-by: Markus Armbruster <armbru@redhat.com>
>> ---
>>  block/Makefile.objs            |   2 +-
>>  block/block-backend.c          | 110 +++++++++++++++++++++++++++++++++++++++++
>>  blockdev.c                     |  10 +++-
>>  hw/block/xen_disk.c            |  11 +++++
>>  include/qemu/typedefs.h        |   1 +
>>  include/sysemu/block-backend.h |  26 ++++++++++
>>  qemu-img.c                     |  46 +++++++++++++++++
>>  qemu-io.c                      |   8 +++
>>  qemu-nbd.c                     |   3 +-
>>  9 files changed, 214 insertions(+), 3 deletions(-)
>>  create mode 100644 block/block-backend.c
>>  create mode 100644 include/sysemu/block-backend.h
>> 
>> diff --git a/block/Makefile.objs b/block/Makefile.objs
>> index f45f939..a70140b 100644
>> --- a/block/Makefile.objs
>> +++ b/block/Makefile.objs
>> @@ -5,7 +5,7 @@ block-obj-y += qed-check.o
>>  block-obj-$(CONFIG_VHDX) += vhdx.o vhdx-endian.o vhdx-log.o
>>  block-obj-$(CONFIG_QUORUM) += quorum.o
>>  block-obj-y += parallels.o blkdebug.o blkverify.o
>> -block-obj-y += snapshot.o qapi.o
>> +block-obj-y += block-backend.o snapshot.o qapi.o
>>  block-obj-$(CONFIG_WIN32) += raw-win32.o win32-aio.o
>>  block-obj-$(CONFIG_POSIX) += raw-posix.o
>>  block-obj-$(CONFIG_LINUX_AIO) += linux-aio.o
>> diff --git a/block/block-backend.c b/block/block-backend.c
>> new file mode 100644
>> index 0000000..833f7d9
>> --- /dev/null
>> +++ b/block/block-backend.c
>> @@ -0,0 +1,110 @@
>> +/*
>> + * QEMU Block backends
>> + *
>> + * Copyright (C) 2014 Red Hat, Inc.
>> + *
>> + * Authors:
>> + *  Markus Armbruster <armbru@redhat.com>,
>> + *
>> + * This work is licensed under the terms of the GNU GPL, version 2 or
>> + * later.  See the COPYING file in the top-level directory.
>> + */
>> +
>> +#include "sysemu/block-backend.h"
>> +#include "block/block_int.h"
>> +
>> +struct BlockBackend {
>> +    char *name;
>> +    int refcnt;
>> +    QTAILQ_ENTRY(BlockBackend) link; /* for blk_backends */
>> +};
>> +
>> +static QTAILQ_HEAD(, BlockBackend) blk_backends =
>> +    QTAILQ_HEAD_INITIALIZER(blk_backends);
>> +
>> +/**
>> + * blk_new:
>> + * @name: name, must not be %NULL or empty
>> + * @errp: return location for an error to be set on failure, or %NULL
>> + *
>> + * Create a new BlockBackend, with a reference count of one.  Fail if
>> + * @name already exists.
>> + *
>> + * Returns: the BlockBackend on success, %NULL on failure
>> + */
>> +BlockBackend *blk_new(const char *name, Error **errp)
>> +{
>> +    BlockBackend *blk = g_new0(BlockBackend, 1);
>> +
>> +    assert(name && name[0]);
>> +    if (blk_by_name(name)) {
>> +        error_setg(errp, "Device with id '%s' already exists", name);
>> +        return NULL;
>> +    }
>> +    blk->name = g_strdup(name);
>> +    blk->refcnt = 1;
>> +    QTAILQ_INSERT_TAIL(&blk_backends, blk, link);
>> +    return blk;
>> +}
>> +
>> +static void blk_delete(BlockBackend *blk)
>> +{
>> +    assert(!blk->refcnt);
>> +    QTAILQ_REMOVE(&blk_backends, blk, link);
>> +    g_free(blk->name);
>> +    g_free(blk);
>> +}
>> +
>> +/**
>> + * blk_ref:
>> + *
>> + * Increment @blk's reference count.
>> + */
>> +void blk_ref(BlockBackend *blk)
>> +{
>> +    blk->refcnt++;
>> +}
>> +
>> +/**
>> + * blk_unref:
>> + *
>> + * Decrement @blk's reference count.  If this drops it to zero,
>> + * destroy @blk.
>> + */
>> +void blk_unref(BlockBackend *blk)
>> +{
>> +    if (blk) {
>> +        g_assert(blk->refcnt > 0);
>> +        if (!--blk->refcnt) {
>> +            blk_delete(blk);
>> +        }
>> +    }
>> +}
>> +
>> +const char *blk_name(BlockBackend *blk)
>> +{
>> +    return blk->name;
>> +}
>> +
>> +BlockBackend *blk_by_name(const char *name)
>> +{
>> +    BlockBackend *blk;
>> +
>> +    QTAILQ_FOREACH(blk, &blk_backends, link) {
>> +        if (!strcmp(name, blk->name)) {
>> +            return blk;
>> +        }
>> +    }
>> +    return NULL;
>> +}
>> +
>> +/**
>> + * blk_next:
>> + *
>> + * Returns: the first BlockBackend if @blk is null, else @blk's next
>> + * sibling, which is %NULL for the last BlockBackend
>> + */
>> +BlockBackend *blk_next(BlockBackend *blk)
>> +{
>> +    return blk ? QTAILQ_NEXT(blk, link) : QTAILQ_FIRST(&blk_backends);
>> +}
>> diff --git a/blockdev.c b/blockdev.c
>> index 9fbd888..86596bc 100644
>> --- a/blockdev.c
>> +++ b/blockdev.c
>> @@ -30,6 +30,7 @@
>>   * THE SOFTWARE.
>>   */
>>  
>> +#include "sysemu/block-backend.h"
>>  #include "sysemu/blockdev.h"
>>  #include "hw/block/block.h"
>>  #include "block/blockjob.h"
>> @@ -221,6 +222,7 @@ void drive_del(DriveInfo *dinfo)
>>      }
>>  
>>      bdrv_unref(dinfo->bdrv);
>> +    blk_unref(blk_by_name(dinfo->id));
>>      g_free(dinfo->id);
>>      QTAILQ_REMOVE(&drives, dinfo, next);
>>      g_free(dinfo->serial);
>> @@ -301,6 +303,7 @@ static DriveInfo *blockdev_init(const char *file, QDict *bs_opts,
>>      int ro = 0;
>>      int bdrv_flags = 0;
>>      int on_read_error, on_write_error;
>> +    BlockBackend *blk;
>>      DriveInfo *dinfo;
>>      ThrottleConfig cfg;
>>      int snapshot = 0;
>> @@ -456,6 +459,10 @@ static DriveInfo *blockdev_init(const char *file, QDict *bs_opts,
>>      }
>>  
>>      /* init */
>> +    blk = blk_new(qemu_opts_id(opts), errp);
>> +    if (!blk) {
>> +        goto early_err;
>> +    }
>
> Here you create a new block backend.
> And you don't attach it to anything in any way yet.

Yes.  Right before creating the root BDS.

> So down in the code the following test will leak it:
>     if (!file || !*file) {                                                      
>         if (has_driver_specific_opts) {                                         
>             file = NULL;                                                        
>         } else {                                                                
>             QDECREF(bs_opts);                                                   
>             qemu_opts_del(opts);                                                
>             return dinfo;                                                       
>         }                                                                       
>     } 

The root BDS isn't destroyed here, and therefore the BB isn't, either.

The BB will be destroyed right when the root BDS is.

> I am sure one of your next patchs fixes this but for this
> precise commit this do look like a leak.
>
>>      dinfo = g_malloc0(sizeof(*dinfo));
>>      dinfo->id = g_strdup(qemu_opts_id(opts));
>>      dinfo->bdrv = bdrv_new_named(dinfo->id, &error);
>> @@ -525,6 +532,7 @@ err:
>>  bdrv_new_err:
>>      g_free(dinfo->id);
>>      g_free(dinfo);
>> +    blk_unref(blk);
>>  early_err:
>>      qemu_opts_del(opts);
>>  err_no_opts:
>> @@ -1770,7 +1778,7 @@ int do_drive_del(Monitor *mon, const QDict *qdict, QObject **ret_data)
>>       */
>>      if (bdrv_get_attached_dev(bs)) {
>>          bdrv_make_anon(bs);
>> -
>> +        blk_unref(blk_by_name(id));
>>          /* Further I/O must not pause the guest */
>>          bdrv_set_on_error(bs, BLOCKDEV_ON_ERROR_REPORT,
>>                            BLOCKDEV_ON_ERROR_REPORT);
>> diff --git a/hw/block/xen_disk.c b/hw/block/xen_disk.c
>> index 8bac7ff..730a021 100644
>> --- a/hw/block/xen_disk.c
>> +++ b/hw/block/xen_disk.c
>> @@ -39,6 +39,7 @@
>>  #include "hw/xen/xen_backend.h"
>>  #include "xen_blkif.h"
>>  #include "sysemu/blockdev.h"
>> +#include "sysemu/block-backend.h"
>>  
>>  /* ------------------------------------------------------------- */
>>  
>> @@ -852,12 +853,18 @@ static int blk_connect(struct XenDevice *xendev)
>>      blkdev->dinfo = drive_get(IF_XEN, 0, index);
>>      if (!blkdev->dinfo) {
>>          Error *local_err = NULL;
>> +        BlockBackend *blk;
>>          BlockDriver *drv;
>>  
>>          /* setup via xenbus -> create new block driver instance */
>>          xen_be_printf(&blkdev->xendev, 2, "create new bdrv (xenbus setup)\n");
>> +        blk = blk_new(blkdev->dev, NULL);
>> +        if (!blk) {
>> +            return -1;
>> +        }
>>          blkdev->bs = bdrv_new_named(blkdev->dev, NULL);
>>          if (!blkdev->bs) {
>> +            blk_unref(blk);
>>              return -1;
>>          }
>>  
>> @@ -868,6 +875,7 @@ static int blk_connect(struct XenDevice *xendev)
>>                            error_get_pretty(local_err));
>>              error_free(local_err);
>>              bdrv_unref(blkdev->bs);
>> +            blk_unref(blk);
>>              blkdev->bs = NULL;
>>              return -1;
>>          }
>> @@ -983,6 +991,9 @@ static void blk_disconnect(struct XenDevice *xendev)
>>      if (blkdev->bs) {
>>          bdrv_detach_dev(blkdev->bs, blkdev);
>>          bdrv_unref(blkdev->bs);
>> +        if (!blkdev->dinfo) {
>> +            blk_unref(blk_by_name(blkdev->dev));
>> +        }
>>          blkdev->bs = NULL;
>>      }
>>      xen_be_unbind_evtchn(&blkdev->xendev);
>> diff --git a/include/qemu/typedefs.h b/include/qemu/typedefs.h
>> index 5f20b0e..198da2e 100644
>> --- a/include/qemu/typedefs.h
>> +++ b/include/qemu/typedefs.h
>> @@ -35,6 +35,7 @@ typedef struct MachineClass MachineClass;
>>  typedef struct NICInfo NICInfo;
>>  typedef struct HCIInfo HCIInfo;
>>  typedef struct AudioState AudioState;
>> +typedef struct BlockBackend BlockBackend;
>>  typedef struct BlockDriverState BlockDriverState;
>>  typedef struct DriveInfo DriveInfo;
>>  typedef struct DisplayState DisplayState;
>> diff --git a/include/sysemu/block-backend.h b/include/sysemu/block-backend.h
>> new file mode 100644
>> index 0000000..3f8371c
>> --- /dev/null
>> +++ b/include/sysemu/block-backend.h
>> @@ -0,0 +1,26 @@
>> +/*
>> + * QEMU Block backends
>> + *
>> + * Copyright (C) 2014 Red Hat, Inc.
>> + *
>> + * Authors:
>> + *  Markus Armbruster <armbru@redhat.com>,
>> + *
>> + * This work is licensed under the terms of the GNU GPL, version 2 or
>> + * later.  See the COPYING file in the top-level directory.
>> + */
>> +
>> +#ifndef BLOCK_BACKEND_H
>> +#define BLOCK_BACKEND_H
>> +
>> +#include "qemu/typedefs.h"
>> +#include "qapi/error.h"
>> +
>> +BlockBackend *blk_new(const char *name, Error **errp);
>> +void blk_ref(BlockBackend *blk);
>> +void blk_unref(BlockBackend *blk);
>> +const char *blk_name(BlockBackend *blk);
>> +BlockBackend *blk_by_name(const char *name);
>> +BlockBackend *blk_next(BlockBackend *blk);
>> +
>> +#endif
>> diff --git a/qemu-img.c b/qemu-img.c
>> index 4490a22..bad3f64 100644
>> --- a/qemu-img.c
>> +++ b/qemu-img.c
>> @@ -29,6 +29,7 @@
>>  #include "qemu/error-report.h"
>>  #include "qemu/osdep.h"
>>  #include "sysemu/sysemu.h"
>> +#include "sysemu/block-backend.h"
>>  #include "block/block_int.h"
>>  #include "block/qapi.h"
>>  #include <getopt.h>
>> @@ -575,6 +576,7 @@ static int img_check(int argc, char **argv)
>>      int c, ret;
>>      OutputFormat output_format = OFORMAT_HUMAN;
>>      const char *filename, *fmt, *output, *cache;
>> +    BlockBackend *blk;
>>      BlockDriverState *bs;
>>      int fix = 0;
>>      int flags = BDRV_O_FLAGS | BDRV_O_CHECK;
>> @@ -649,6 +651,7 @@ static int img_check(int argc, char **argv)
>>          return 1;
>>      }
>>  
>
>> +    blk = blk_new("image", &error_abort);
> Hmm we are so sure this will work that we don't do if (!block) ?

Matches what bdrv_new_open() does:

    bs = bdrv_new_named(id, &error_abort);

As you noted further down, the tools treat these failures as programming
errors.  That's appropriate.

>>      bs = bdrv_new_open("image", filename, fmt, flags, true, quiet);
>>      if (!bs) {
>>          return 1;
>> @@ -710,6 +713,7 @@ static int img_check(int argc, char **argv)
>>  fail:
>>      qapi_free_ImageCheck(check);
>>      bdrv_unref(bs);
>> +    blk_unref(blk);
>>  
>>      return ret;
>>  }
>> @@ -718,6 +722,7 @@ static int img_commit(int argc, char **argv)
>>  {
>>      int c, ret, flags;
>>      const char *filename, *fmt, *cache;
>> +    BlockBackend *blk;
>>      BlockDriverState *bs;
>>      bool quiet = false;
>>  
>> @@ -756,6 +761,7 @@ static int img_commit(int argc, char **argv)
>>          return 1;
>>      }
>>  
>> +    blk = blk_new("image", &error_abort);
>>      bs = bdrv_new_open("image", filename, fmt, flags, true, quiet);
>>      if (!bs) {
>>          return 1;
>> @@ -780,6 +786,7 @@ static int img_commit(int argc, char **argv)
>>      }
>>  
>>      bdrv_unref(bs);
>> +    blk_unref(blk);
>>      if (ret) {
>>          return 1;
>>      }
>> @@ -942,6 +949,7 @@ static int check_empty_sectors(BlockDriverState *bs, int64_t sect_num,
>>  static int img_compare(int argc, char **argv)
>>  {
>>      const char *fmt1 = NULL, *fmt2 = NULL, *cache, *filename1, *filename2;
>> +    BlockBackend *blk1, *blk2;
>>      BlockDriverState *bs1, *bs2;
>>      int64_t total_sectors1, total_sectors2;
>>      uint8_t *buf1 = NULL, *buf2 = NULL;
>> @@ -1011,6 +1019,7 @@ static int img_compare(int argc, char **argv)
>>          goto out3;
>>      }
>>  
>> +    blk1 = blk_new("image 1", &error_abort);
>>      bs1 = bdrv_new_open("image 1", filename1, fmt1, flags, true, quiet);
>>      if (!bs1) {
>>          error_report("Can't open file %s", filename1);
>> @@ -1018,6 +1027,7 @@ static int img_compare(int argc, char **argv)
>>          goto out3;
>>      }
>>  
>> +    blk2 = blk_new("image 2", &error_abort);
>>      bs2 = bdrv_new_open("image 2", filename2, fmt2, flags, true, quiet);
>>      if (!bs2) {
>>          error_report("Can't open file %s", filename2);
>> @@ -1184,10 +1194,12 @@ static int img_compare(int argc, char **argv)
>>  
>>  out:
>>      bdrv_unref(bs2);
>> +    blk_unref(blk2);
>>      qemu_vfree(buf1);
>>      qemu_vfree(buf2);
>>  out2:
>>      bdrv_unref(bs1);
>> +    blk_unref(blk1);
>>  out3:
>>      qemu_progress_end();
>>      return ret;
>> @@ -1200,6 +1212,7 @@ static int img_convert(int argc, char **argv)
>>      int progress = 0, flags, src_flags;
>>      const char *fmt, *out_fmt, *cache, *src_cache, *out_baseimg, *out_filename;
>>      BlockDriver *drv, *proto_drv;
>> +    BlockBackend **blk = NULL, *out_blk = NULL;
>>      BlockDriverState **bs = NULL, *out_bs = NULL;
>>      int64_t total_sectors, nb_sectors, sector_num, bs_offset;
>>      int64_t *bs_sectors = NULL;
>> @@ -1354,6 +1367,7 @@ static int img_convert(int argc, char **argv)
>>  
>>      qemu_progress_print(0, 100);
>>  
>> +    blk = g_new0(BlockBackend *, bs_n);
>>      bs = g_new0(BlockDriverState *, bs_n);
>>      bs_sectors = g_new(int64_t, bs_n);
>>  
>> @@ -1361,6 +1375,7 @@ static int img_convert(int argc, char **argv)
>>      for (bs_i = 0; bs_i < bs_n; bs_i++) {
>>          char *id = bs_n > 1 ? g_strdup_printf("source %d", bs_i)
>>                              : g_strdup("source");
>> +        blk[bs_i] = blk_new(id, &error_abort);
>>          bs[bs_i] = bdrv_new_open(id, argv[optind + bs_i], fmt, src_flags,
>>                                   true, quiet);
>>          g_free(id);
>> @@ -1486,6 +1501,7 @@ static int img_convert(int argc, char **argv)
>>          goto out;
>>      }
>>  
>> +    out_blk = blk_new("target", &error_abort);
>>      out_bs = bdrv_new_open("target", out_filename, out_fmt, flags, true, quiet);
>>      if (!out_bs) {
>>          ret = -1;
>> @@ -1742,6 +1758,7 @@ out:
>>      if (out_bs) {
>>          bdrv_unref(out_bs);
>>      }
>> +    blk_unref(out_blk);
>>      if (bs) {
>>          for (bs_i = 0; bs_i < bs_n; bs_i++) {
>>              if (bs[bs_i]) {
>> @@ -1750,6 +1767,12 @@ out:
>>          }
>>          g_free(bs);
>>      }
>> +    if (blk) {
>> +        for (bs_i = 0; bs_i < bs_n; bs_i++) {
>> +            blk_unref(blk[bs_i]);
>> +        }
>> +        g_free(blk);
>> +    }
>>      g_free(bs_sectors);
>>  fail_getopt:
>>      g_free(options);
>> @@ -1858,6 +1881,7 @@ static ImageInfoList *collect_image_info_list(const char *filename,
>>      filenames = g_hash_table_new_full(g_str_hash, str_equal_func, NULL, NULL);
>>  
>>      while (filename) {
>> +        BlockBackend *blk;
>>          BlockDriverState *bs;
>>          ImageInfo *info;
>>          ImageInfoList *elem;
>> @@ -1869,6 +1893,7 @@ static ImageInfoList *collect_image_info_list(const char *filename,
>>          }
>>          g_hash_table_insert(filenames, (gpointer)filename, NULL);
>>  
>> +        blk = blk_new("image", &error_abort);
>>          bs = bdrv_new_open("image", filename, fmt,
>>                             BDRV_O_FLAGS | BDRV_O_NO_BACKING, false, false);
>>          if (!bs) {
>
> I think it misses an 
>> +            blk_unref(blk);
> in if(!bs) branch.

Yes.  Kevin noted that, too.  I'll fix it.

[...]
Markus Armbruster Sept. 11, 2014, 10:22 a.m. UTC | #10
Benoît Canet <benoit.canet@irqsave.net> writes:

> The Wednesday 10 Sep 2014 à 14:40:42 (+0200), Benoît Canet wrote :
>> The Wednesday 10 Sep 2014 à 10:13:31 (+0200), Markus Armbruster wrote :
[...]
>> > diff --git a/qemu-img.c b/qemu-img.c
>> > index 4490a22..bad3f64 100644
>> > --- a/qemu-img.c
>> > +++ b/qemu-img.c
>> > @@ -29,6 +29,7 @@
>> >  #include "qemu/error-report.h"
>> >  #include "qemu/osdep.h"
>> >  #include "sysemu/sysemu.h"
>> > +#include "sysemu/block-backend.h"
>> >  #include "block/block_int.h"
>> >  #include "block/qapi.h"
>> >  #include <getopt.h>
>> > @@ -575,6 +576,7 @@ static int img_check(int argc, char **argv)
>> >      int c, ret;
>> >      OutputFormat output_format = OFORMAT_HUMAN;
>> >      const char *filename, *fmt, *output, *cache;
>> > +    BlockBackend *blk;
>> >      BlockDriverState *bs;
>> >      int fix = 0;
>> >      int flags = BDRV_O_FLAGS | BDRV_O_CHECK;
>> > @@ -649,6 +651,7 @@ static int img_check(int argc, char **argv)
>> >          return 1;
>> >      }
>> >  
>> 
>> > +    blk = blk_new("image", &error_abort);
>> Hmm we are so sure this will work that we don't do if (!block) ?
>
> Ok I understood we are sure because we control the id and won't use twice the same.

Exactly!

[...]
Markus Armbruster Sept. 11, 2014, 11:45 a.m. UTC | #11
Markus Armbruster <armbru@redhat.com> writes:

> Kevin Wolf <kwolf@redhat.com> writes:
[...]
>>> diff --git a/qemu-img.c b/qemu-img.c
>>> index 4490a22..bad3f64 100644
>>> --- a/qemu-img.c
>>> +++ b/qemu-img.c
>>
>> Won't comment on each hunk in qemu-img, but in many cases, on
>> bdrv_new_open() failure, blk is leaked.
>
> I'll check them systematically.

The leaks all go away in PATCH 03.  Fixing PATCH 02 anyway, of course.
Markus Armbruster Sept. 11, 2014, 2:38 p.m. UTC | #12
Markus Armbruster <armbru@redhat.com> writes:

> Kevin Wolf <kwolf@redhat.com> writes:
[...]
>>> diff --git a/blockdev.c b/blockdev.c
>>> index 9fbd888..86596bc 100644
>>> --- a/blockdev.c
>>> +++ b/blockdev.c
[...]
>>> @@ -1770,7 +1778,7 @@ int do_drive_del(Monitor *mon, const QDict *qdict, QObject **ret_data)
>>>       */
>>>      if (bdrv_get_attached_dev(bs)) {
>>>          bdrv_make_anon(bs);
>>> -
>>> +        blk_unref(blk_by_name(id));
>>>          /* Further I/O must not pause the guest */
>>>          bdrv_set_on_error(bs, BLOCKDEV_ON_ERROR_REPORT,
>>>                            BLOCKDEV_ON_ERROR_REPORT);
>>
>> Won't we unref the BB a second time now when unplugging the device?
>> (drive_del() called in blockdev_auto_del())
>
> Short answer: you're right, there's a bug, and I'll fix it.
>
> Long answer: this part is hairy, because the drive_del command is badly
> designed.
[...]
> Simplest possible solution: I hide the BB here, and *leak* it (with a
> fat FIXME comment) until it becomes easy enough to find.  I guess I can
> find it right in the next patch.

Nevermind, I found a tolerable way to avoid the temporary leak.
diff mbox

Patch

diff --git a/block/Makefile.objs b/block/Makefile.objs
index f45f939..a70140b 100644
--- a/block/Makefile.objs
+++ b/block/Makefile.objs
@@ -5,7 +5,7 @@  block-obj-y += qed-check.o
 block-obj-$(CONFIG_VHDX) += vhdx.o vhdx-endian.o vhdx-log.o
 block-obj-$(CONFIG_QUORUM) += quorum.o
 block-obj-y += parallels.o blkdebug.o blkverify.o
-block-obj-y += snapshot.o qapi.o
+block-obj-y += block-backend.o snapshot.o qapi.o
 block-obj-$(CONFIG_WIN32) += raw-win32.o win32-aio.o
 block-obj-$(CONFIG_POSIX) += raw-posix.o
 block-obj-$(CONFIG_LINUX_AIO) += linux-aio.o
diff --git a/block/block-backend.c b/block/block-backend.c
new file mode 100644
index 0000000..833f7d9
--- /dev/null
+++ b/block/block-backend.c
@@ -0,0 +1,110 @@ 
+/*
+ * QEMU Block backends
+ *
+ * Copyright (C) 2014 Red Hat, Inc.
+ *
+ * Authors:
+ *  Markus Armbruster <armbru@redhat.com>,
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * later.  See the COPYING file in the top-level directory.
+ */
+
+#include "sysemu/block-backend.h"
+#include "block/block_int.h"
+
+struct BlockBackend {
+    char *name;
+    int refcnt;
+    QTAILQ_ENTRY(BlockBackend) link; /* for blk_backends */
+};
+
+static QTAILQ_HEAD(, BlockBackend) blk_backends =
+    QTAILQ_HEAD_INITIALIZER(blk_backends);
+
+/**
+ * blk_new:
+ * @name: name, must not be %NULL or empty
+ * @errp: return location for an error to be set on failure, or %NULL
+ *
+ * Create a new BlockBackend, with a reference count of one.  Fail if
+ * @name already exists.
+ *
+ * Returns: the BlockBackend on success, %NULL on failure
+ */
+BlockBackend *blk_new(const char *name, Error **errp)
+{
+    BlockBackend *blk = g_new0(BlockBackend, 1);
+
+    assert(name && name[0]);
+    if (blk_by_name(name)) {
+        error_setg(errp, "Device with id '%s' already exists", name);
+        return NULL;
+    }
+    blk->name = g_strdup(name);
+    blk->refcnt = 1;
+    QTAILQ_INSERT_TAIL(&blk_backends, blk, link);
+    return blk;
+}
+
+static void blk_delete(BlockBackend *blk)
+{
+    assert(!blk->refcnt);
+    QTAILQ_REMOVE(&blk_backends, blk, link);
+    g_free(blk->name);
+    g_free(blk);
+}
+
+/**
+ * blk_ref:
+ *
+ * Increment @blk's reference count.
+ */
+void blk_ref(BlockBackend *blk)
+{
+    blk->refcnt++;
+}
+
+/**
+ * blk_unref:
+ *
+ * Decrement @blk's reference count.  If this drops it to zero,
+ * destroy @blk.
+ */
+void blk_unref(BlockBackend *blk)
+{
+    if (blk) {
+        g_assert(blk->refcnt > 0);
+        if (!--blk->refcnt) {
+            blk_delete(blk);
+        }
+    }
+}
+
+const char *blk_name(BlockBackend *blk)
+{
+    return blk->name;
+}
+
+BlockBackend *blk_by_name(const char *name)
+{
+    BlockBackend *blk;
+
+    QTAILQ_FOREACH(blk, &blk_backends, link) {
+        if (!strcmp(name, blk->name)) {
+            return blk;
+        }
+    }
+    return NULL;
+}
+
+/**
+ * blk_next:
+ *
+ * Returns: the first BlockBackend if @blk is null, else @blk's next
+ * sibling, which is %NULL for the last BlockBackend
+ */
+BlockBackend *blk_next(BlockBackend *blk)
+{
+    return blk ? QTAILQ_NEXT(blk, link) : QTAILQ_FIRST(&blk_backends);
+}
diff --git a/blockdev.c b/blockdev.c
index 9fbd888..86596bc 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -30,6 +30,7 @@ 
  * THE SOFTWARE.
  */
 
+#include "sysemu/block-backend.h"
 #include "sysemu/blockdev.h"
 #include "hw/block/block.h"
 #include "block/blockjob.h"
@@ -221,6 +222,7 @@  void drive_del(DriveInfo *dinfo)
     }
 
     bdrv_unref(dinfo->bdrv);
+    blk_unref(blk_by_name(dinfo->id));
     g_free(dinfo->id);
     QTAILQ_REMOVE(&drives, dinfo, next);
     g_free(dinfo->serial);
@@ -301,6 +303,7 @@  static DriveInfo *blockdev_init(const char *file, QDict *bs_opts,
     int ro = 0;
     int bdrv_flags = 0;
     int on_read_error, on_write_error;
+    BlockBackend *blk;
     DriveInfo *dinfo;
     ThrottleConfig cfg;
     int snapshot = 0;
@@ -456,6 +459,10 @@  static DriveInfo *blockdev_init(const char *file, QDict *bs_opts,
     }
 
     /* init */
+    blk = blk_new(qemu_opts_id(opts), errp);
+    if (!blk) {
+        goto early_err;
+    }
     dinfo = g_malloc0(sizeof(*dinfo));
     dinfo->id = g_strdup(qemu_opts_id(opts));
     dinfo->bdrv = bdrv_new_named(dinfo->id, &error);
@@ -525,6 +532,7 @@  err:
 bdrv_new_err:
     g_free(dinfo->id);
     g_free(dinfo);
+    blk_unref(blk);
 early_err:
     qemu_opts_del(opts);
 err_no_opts:
@@ -1770,7 +1778,7 @@  int do_drive_del(Monitor *mon, const QDict *qdict, QObject **ret_data)
      */
     if (bdrv_get_attached_dev(bs)) {
         bdrv_make_anon(bs);
-
+        blk_unref(blk_by_name(id));
         /* Further I/O must not pause the guest */
         bdrv_set_on_error(bs, BLOCKDEV_ON_ERROR_REPORT,
                           BLOCKDEV_ON_ERROR_REPORT);
diff --git a/hw/block/xen_disk.c b/hw/block/xen_disk.c
index 8bac7ff..730a021 100644
--- a/hw/block/xen_disk.c
+++ b/hw/block/xen_disk.c
@@ -39,6 +39,7 @@ 
 #include "hw/xen/xen_backend.h"
 #include "xen_blkif.h"
 #include "sysemu/blockdev.h"
+#include "sysemu/block-backend.h"
 
 /* ------------------------------------------------------------- */
 
@@ -852,12 +853,18 @@  static int blk_connect(struct XenDevice *xendev)
     blkdev->dinfo = drive_get(IF_XEN, 0, index);
     if (!blkdev->dinfo) {
         Error *local_err = NULL;
+        BlockBackend *blk;
         BlockDriver *drv;
 
         /* setup via xenbus -> create new block driver instance */
         xen_be_printf(&blkdev->xendev, 2, "create new bdrv (xenbus setup)\n");
+        blk = blk_new(blkdev->dev, NULL);
+        if (!blk) {
+            return -1;
+        }
         blkdev->bs = bdrv_new_named(blkdev->dev, NULL);
         if (!blkdev->bs) {
+            blk_unref(blk);
             return -1;
         }
 
@@ -868,6 +875,7 @@  static int blk_connect(struct XenDevice *xendev)
                           error_get_pretty(local_err));
             error_free(local_err);
             bdrv_unref(blkdev->bs);
+            blk_unref(blk);
             blkdev->bs = NULL;
             return -1;
         }
@@ -983,6 +991,9 @@  static void blk_disconnect(struct XenDevice *xendev)
     if (blkdev->bs) {
         bdrv_detach_dev(blkdev->bs, blkdev);
         bdrv_unref(blkdev->bs);
+        if (!blkdev->dinfo) {
+            blk_unref(blk_by_name(blkdev->dev));
+        }
         blkdev->bs = NULL;
     }
     xen_be_unbind_evtchn(&blkdev->xendev);
diff --git a/include/qemu/typedefs.h b/include/qemu/typedefs.h
index 5f20b0e..198da2e 100644
--- a/include/qemu/typedefs.h
+++ b/include/qemu/typedefs.h
@@ -35,6 +35,7 @@  typedef struct MachineClass MachineClass;
 typedef struct NICInfo NICInfo;
 typedef struct HCIInfo HCIInfo;
 typedef struct AudioState AudioState;
+typedef struct BlockBackend BlockBackend;
 typedef struct BlockDriverState BlockDriverState;
 typedef struct DriveInfo DriveInfo;
 typedef struct DisplayState DisplayState;
diff --git a/include/sysemu/block-backend.h b/include/sysemu/block-backend.h
new file mode 100644
index 0000000..3f8371c
--- /dev/null
+++ b/include/sysemu/block-backend.h
@@ -0,0 +1,26 @@ 
+/*
+ * QEMU Block backends
+ *
+ * Copyright (C) 2014 Red Hat, Inc.
+ *
+ * Authors:
+ *  Markus Armbruster <armbru@redhat.com>,
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * later.  See the COPYING file in the top-level directory.
+ */
+
+#ifndef BLOCK_BACKEND_H
+#define BLOCK_BACKEND_H
+
+#include "qemu/typedefs.h"
+#include "qapi/error.h"
+
+BlockBackend *blk_new(const char *name, Error **errp);
+void blk_ref(BlockBackend *blk);
+void blk_unref(BlockBackend *blk);
+const char *blk_name(BlockBackend *blk);
+BlockBackend *blk_by_name(const char *name);
+BlockBackend *blk_next(BlockBackend *blk);
+
+#endif
diff --git a/qemu-img.c b/qemu-img.c
index 4490a22..bad3f64 100644
--- a/qemu-img.c
+++ b/qemu-img.c
@@ -29,6 +29,7 @@ 
 #include "qemu/error-report.h"
 #include "qemu/osdep.h"
 #include "sysemu/sysemu.h"
+#include "sysemu/block-backend.h"
 #include "block/block_int.h"
 #include "block/qapi.h"
 #include <getopt.h>
@@ -575,6 +576,7 @@  static int img_check(int argc, char **argv)
     int c, ret;
     OutputFormat output_format = OFORMAT_HUMAN;
     const char *filename, *fmt, *output, *cache;
+    BlockBackend *blk;
     BlockDriverState *bs;
     int fix = 0;
     int flags = BDRV_O_FLAGS | BDRV_O_CHECK;
@@ -649,6 +651,7 @@  static int img_check(int argc, char **argv)
         return 1;
     }
 
+    blk = blk_new("image", &error_abort);
     bs = bdrv_new_open("image", filename, fmt, flags, true, quiet);
     if (!bs) {
         return 1;
@@ -710,6 +713,7 @@  static int img_check(int argc, char **argv)
 fail:
     qapi_free_ImageCheck(check);
     bdrv_unref(bs);
+    blk_unref(blk);
 
     return ret;
 }
@@ -718,6 +722,7 @@  static int img_commit(int argc, char **argv)
 {
     int c, ret, flags;
     const char *filename, *fmt, *cache;
+    BlockBackend *blk;
     BlockDriverState *bs;
     bool quiet = false;
 
@@ -756,6 +761,7 @@  static int img_commit(int argc, char **argv)
         return 1;
     }
 
+    blk = blk_new("image", &error_abort);
     bs = bdrv_new_open("image", filename, fmt, flags, true, quiet);
     if (!bs) {
         return 1;
@@ -780,6 +786,7 @@  static int img_commit(int argc, char **argv)
     }
 
     bdrv_unref(bs);
+    blk_unref(blk);
     if (ret) {
         return 1;
     }
@@ -942,6 +949,7 @@  static int check_empty_sectors(BlockDriverState *bs, int64_t sect_num,
 static int img_compare(int argc, char **argv)
 {
     const char *fmt1 = NULL, *fmt2 = NULL, *cache, *filename1, *filename2;
+    BlockBackend *blk1, *blk2;
     BlockDriverState *bs1, *bs2;
     int64_t total_sectors1, total_sectors2;
     uint8_t *buf1 = NULL, *buf2 = NULL;
@@ -1011,6 +1019,7 @@  static int img_compare(int argc, char **argv)
         goto out3;
     }
 
+    blk1 = blk_new("image 1", &error_abort);
     bs1 = bdrv_new_open("image 1", filename1, fmt1, flags, true, quiet);
     if (!bs1) {
         error_report("Can't open file %s", filename1);
@@ -1018,6 +1027,7 @@  static int img_compare(int argc, char **argv)
         goto out3;
     }
 
+    blk2 = blk_new("image 2", &error_abort);
     bs2 = bdrv_new_open("image 2", filename2, fmt2, flags, true, quiet);
     if (!bs2) {
         error_report("Can't open file %s", filename2);
@@ -1184,10 +1194,12 @@  static int img_compare(int argc, char **argv)
 
 out:
     bdrv_unref(bs2);
+    blk_unref(blk2);
     qemu_vfree(buf1);
     qemu_vfree(buf2);
 out2:
     bdrv_unref(bs1);
+    blk_unref(blk1);
 out3:
     qemu_progress_end();
     return ret;
@@ -1200,6 +1212,7 @@  static int img_convert(int argc, char **argv)
     int progress = 0, flags, src_flags;
     const char *fmt, *out_fmt, *cache, *src_cache, *out_baseimg, *out_filename;
     BlockDriver *drv, *proto_drv;
+    BlockBackend **blk = NULL, *out_blk = NULL;
     BlockDriverState **bs = NULL, *out_bs = NULL;
     int64_t total_sectors, nb_sectors, sector_num, bs_offset;
     int64_t *bs_sectors = NULL;
@@ -1354,6 +1367,7 @@  static int img_convert(int argc, char **argv)
 
     qemu_progress_print(0, 100);
 
+    blk = g_new0(BlockBackend *, bs_n);
     bs = g_new0(BlockDriverState *, bs_n);
     bs_sectors = g_new(int64_t, bs_n);
 
@@ -1361,6 +1375,7 @@  static int img_convert(int argc, char **argv)
     for (bs_i = 0; bs_i < bs_n; bs_i++) {
         char *id = bs_n > 1 ? g_strdup_printf("source %d", bs_i)
                             : g_strdup("source");
+        blk[bs_i] = blk_new(id, &error_abort);
         bs[bs_i] = bdrv_new_open(id, argv[optind + bs_i], fmt, src_flags,
                                  true, quiet);
         g_free(id);
@@ -1486,6 +1501,7 @@  static int img_convert(int argc, char **argv)
         goto out;
     }
 
+    out_blk = blk_new("target", &error_abort);
     out_bs = bdrv_new_open("target", out_filename, out_fmt, flags, true, quiet);
     if (!out_bs) {
         ret = -1;
@@ -1742,6 +1758,7 @@  out:
     if (out_bs) {
         bdrv_unref(out_bs);
     }
+    blk_unref(out_blk);
     if (bs) {
         for (bs_i = 0; bs_i < bs_n; bs_i++) {
             if (bs[bs_i]) {
@@ -1750,6 +1767,12 @@  out:
         }
         g_free(bs);
     }
+    if (blk) {
+        for (bs_i = 0; bs_i < bs_n; bs_i++) {
+            blk_unref(blk[bs_i]);
+        }
+        g_free(blk);
+    }
     g_free(bs_sectors);
 fail_getopt:
     g_free(options);
@@ -1858,6 +1881,7 @@  static ImageInfoList *collect_image_info_list(const char *filename,
     filenames = g_hash_table_new_full(g_str_hash, str_equal_func, NULL, NULL);
 
     while (filename) {
+        BlockBackend *blk;
         BlockDriverState *bs;
         ImageInfo *info;
         ImageInfoList *elem;
@@ -1869,6 +1893,7 @@  static ImageInfoList *collect_image_info_list(const char *filename,
         }
         g_hash_table_insert(filenames, (gpointer)filename, NULL);
 
+        blk = blk_new("image", &error_abort);
         bs = bdrv_new_open("image", filename, fmt,
                            BDRV_O_FLAGS | BDRV_O_NO_BACKING, false, false);
         if (!bs) {
@@ -1880,6 +1905,7 @@  static ImageInfoList *collect_image_info_list(const char *filename,
             error_report("%s", error_get_pretty(err));
             error_free(err);
             bdrv_unref(bs);
+            blk_unref(blk);
             goto err;
         }
 
@@ -1889,6 +1915,7 @@  static ImageInfoList *collect_image_info_list(const char *filename,
         last = &elem->next;
 
         bdrv_unref(bs);
+        blk_unref(blk);
 
         filename = fmt = NULL;
         if (chain) {
@@ -2082,6 +2109,7 @@  static int img_map(int argc, char **argv)
 {
     int c;
     OutputFormat output_format = OFORMAT_HUMAN;
+    BlockBackend *blk;
     BlockDriverState *bs;
     const char *filename, *fmt, *output;
     int64_t length;
@@ -2130,6 +2158,7 @@  static int img_map(int argc, char **argv)
         return 1;
     }
 
+    blk = blk_new("image", &error_abort);
     bs = bdrv_new_open("image", filename, fmt, BDRV_O_FLAGS, true, false);
     if (!bs) {
         return 1;
@@ -2175,6 +2204,7 @@  static int img_map(int argc, char **argv)
 
 out:
     bdrv_unref(bs);
+    blk_unref(blk);
     return ret < 0;
 }
 
@@ -2185,6 +2215,7 @@  out:
 
 static int img_snapshot(int argc, char **argv)
 {
+    BlockBackend *blk;
     BlockDriverState *bs;
     QEMUSnapshotInfo sn;
     char *filename, *snapshot_name = NULL;
@@ -2250,6 +2281,7 @@  static int img_snapshot(int argc, char **argv)
     filename = argv[optind++];
 
     /* Open the image */
+    blk = blk_new("image", &error_abort);
     bs = bdrv_new_open("image", filename, NULL, bdrv_oflags, true, quiet);
     if (!bs) {
         return 1;
@@ -2297,6 +2329,7 @@  static int img_snapshot(int argc, char **argv)
 
     /* Cleanup */
     bdrv_unref(bs);
+    blk_unref(blk);
     if (ret) {
         return 1;
     }
@@ -2305,6 +2338,7 @@  static int img_snapshot(int argc, char **argv)
 
 static int img_rebase(int argc, char **argv)
 {
+    BlockBackend *blk = NULL, *blk_old_backing = NULL, *blk_new_backing = NULL;
     BlockDriverState *bs = NULL, *bs_old_backing = NULL, *bs_new_backing = NULL;
     BlockDriver *old_backing_drv, *new_backing_drv;
     char *filename;
@@ -2393,6 +2427,7 @@  static int img_rebase(int argc, char **argv)
      * Ignore the old backing file for unsafe rebase in case we want to correct
      * the reference to a renamed or moved backing file.
      */
+    blk = blk_new("image", &error_abort);
     bs = bdrv_new_open("image", filename, fmt, flags, true, quiet);
     if (!bs) {
         ret = -1;
@@ -2425,6 +2460,7 @@  static int img_rebase(int argc, char **argv)
     if (!unsafe) {
         char backing_name[1024];
 
+        blk_old_backing = blk_new("old_backing", &error_abort);
         bs_old_backing = bdrv_new_named("old_backing", &error_abort);
         bdrv_get_backing_filename(bs, backing_name, sizeof(backing_name));
         ret = bdrv_open(&bs_old_backing, backing_name, NULL, NULL, src_flags,
@@ -2436,6 +2472,7 @@  static int img_rebase(int argc, char **argv)
             goto out;
         }
         if (out_baseimg[0]) {
+            blk_new_backing = blk_new("new_backing", &error_abort);
             bs_new_backing = bdrv_new_named("new_backing", &error_abort);
             ret = bdrv_open(&bs_new_backing, out_baseimg, NULL, NULL, src_flags,
                             new_backing_drv, &local_err);
@@ -2614,12 +2651,15 @@  out:
         if (bs_old_backing != NULL) {
             bdrv_unref(bs_old_backing);
         }
+        blk_unref(blk_old_backing);
         if (bs_new_backing != NULL) {
             bdrv_unref(bs_new_backing);
         }
+        blk_unref(blk_new_backing);
     }
 
     bdrv_unref(bs);
+    blk_unref(blk);
     if (ret) {
         return 1;
     }
@@ -2632,6 +2672,7 @@  static int img_resize(int argc, char **argv)
     const char *filename, *fmt, *size;
     int64_t n, total_size;
     bool quiet = false;
+    BlockBackend *blk = NULL;
     BlockDriverState *bs = NULL;
     QemuOpts *param;
     static QemuOptsList resize_options = {
@@ -2708,6 +2749,7 @@  static int img_resize(int argc, char **argv)
     n = qemu_opt_get_size(param, BLOCK_OPT_SIZE, 0);
     qemu_opts_del(param);
 
+    blk = blk_new("image", &error_abort);
     bs = bdrv_new_open("image", filename, fmt, BDRV_O_FLAGS | BDRV_O_RDWR,
                        true, quiet);
     if (!bs) {
@@ -2745,6 +2787,7 @@  out:
     if (bs) {
         bdrv_unref(bs);
     }
+    blk_unref(blk);
     if (ret) {
         return 1;
     }
@@ -2760,6 +2803,7 @@  static int img_amend(int argc, char **argv)
     const char *fmt = NULL, *filename, *cache;
     int flags;
     bool quiet = false;
+    BlockBackend *blk = NULL;
     BlockDriverState *bs = NULL;
 
     cache = BDRV_DEFAULT_CACHE;
@@ -2823,6 +2867,7 @@  static int img_amend(int argc, char **argv)
         goto out;
     }
 
+    blk = blk_new("image", &error_abort);
     bs = bdrv_new_open("image", filename, fmt, flags, true, quiet);
     if (!bs) {
         error_report("Could not open image '%s'", filename);
@@ -2856,6 +2901,7 @@  out:
     if (bs) {
         bdrv_unref(bs);
     }
+    blk_unref(blk);
     qemu_opts_del(opts);
     qemu_opts_free(create_opts);
     g_free(options);
diff --git a/qemu-io.c b/qemu-io.c
index 44c2e1c..45e5494 100644
--- a/qemu-io.c
+++ b/qemu-io.c
@@ -19,6 +19,7 @@ 
 #include "qemu/option.h"
 #include "qemu/config-file.h"
 #include "qemu/readline.h"
+#include "sysemu/block-backend.h"
 #include "block/block_int.h"
 #include "trace/control.h"
 
@@ -26,6 +27,7 @@ 
 
 static char *progname;
 
+static BlockBackend *qemuio_blk;
 static BlockDriverState *qemuio_bs;
 
 /* qemu-io commands passed using -c */
@@ -37,7 +39,9 @@  static ReadLineState *readline_state;
 static int close_f(BlockDriverState *bs, int argc, char **argv)
 {
     bdrv_unref(bs);
+    blk_unref(qemuio_blk);
     qemuio_bs = NULL;
+    qemuio_blk = NULL;
     return 0;
 }
 
@@ -58,6 +62,7 @@  static int openfile(char *name, int flags, int growable, QDict *opts)
         return 1;
     }
 
+    qemuio_blk = blk_new("hda", &error_abort);
     qemuio_bs = bdrv_new_named("hda", &error_abort);
 
     if (growable) {
@@ -70,7 +75,9 @@  static int openfile(char *name, int flags, int growable, QDict *opts)
                 error_get_pretty(local_err));
         error_free(local_err);
         bdrv_unref(qemuio_bs);
+        blk_unref(qemuio_blk);
         qemuio_bs = NULL;
+        qemuio_blk = NULL;
         return 1;
     }
 
@@ -479,6 +486,7 @@  int main(int argc, char **argv)
     if (qemuio_bs) {
         bdrv_unref(qemuio_bs);
     }
+    blk_unref(qemuio_blk);
     g_free(readline_state);
     return 0;
 }
diff --git a/qemu-nbd.c b/qemu-nbd.c
index a56ebfc..94b9b49 100644
--- a/qemu-nbd.c
+++ b/qemu-nbd.c
@@ -17,7 +17,7 @@ 
  */
 
 #include "qemu-common.h"
-#include "block/block.h"
+#include "sysemu/block-backend.h"
 #include "block/block_int.h"
 #include "block/nbd.h"
 #include "qemu/main-loop.h"
@@ -687,6 +687,7 @@  int main(int argc, char **argv)
         drv = NULL;
     }
 
+    blk_new("hda", &error_abort);
     bs = bdrv_new_named("hda", &error_abort);
 
     srcpath = argv[optind];