diff mbox

[03/23] block: Connect BlockBackend to BlockDriverState

Message ID 1410336832-22160-4-git-send-email-armbru@redhat.com
State New
Headers show

Commit Message

Markus Armbruster Sept. 10, 2014, 8:13 a.m. UTC
The pointer from BlockBackend to BlockDriverState is a strong
reference, managed with bdrv_ref() / bdrv_unref(), the back-pointer is
a weak one.

Convenience function blk_new_with_bs() creates a BlockBackend with its
BlockDriverState.  Callers have to unref both.  The commit after next
will relieve them of the need to unref the BlockDriverState.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
---
 block.c                        |  10 ++--
 block/block-backend.c          |  79 +++++++++++++++++++++++++++++++
 blockdev.c                     |  49 +++++++++++---------
 hw/block/xen_disk.c            |   8 +---
 include/block/block_int.h      |   2 +
 include/sysemu/block-backend.h |   5 ++
 qemu-img.c                     | 103 +++++++++++++++++++++--------------------
 qemu-io.c                      |   4 +-
 qemu-nbd.c                     |   3 +-
 9 files changed, 175 insertions(+), 88 deletions(-)

Comments

Benoît Canet Sept. 10, 2014, 11:55 a.m. UTC | #1
The Wednesday 10 Sep 2014 à 10:13:32 (+0200), Markus Armbruster wrote :
> The pointer from BlockBackend to BlockDriverState is a strong
> reference, managed with bdrv_ref() / bdrv_unref(), the back-pointer is
> a weak one.
> 
> Convenience function blk_new_with_bs() creates a BlockBackend with its
> BlockDriverState.  Callers have to unref both.  The commit after next
> will relieve them of the need to unref the BlockDriverState.
> 
> Signed-off-by: Markus Armbruster <armbru@redhat.com>
> ---
>  block.c                        |  10 ++--
>  block/block-backend.c          |  79 +++++++++++++++++++++++++++++++
>  blockdev.c                     |  49 +++++++++++---------
>  hw/block/xen_disk.c            |   8 +---
>  include/block/block_int.h      |   2 +
>  include/sysemu/block-backend.h |   5 ++
>  qemu-img.c                     | 103 +++++++++++++++++++++--------------------
>  qemu-io.c                      |   4 +-
>  qemu-nbd.c                     |   3 +-
>  9 files changed, 175 insertions(+), 88 deletions(-)
> 
> diff --git a/block.c b/block.c
> index 4b3bcd4..a6c03da 100644
> --- a/block.c
> +++ b/block.c
> @@ -2032,7 +2032,7 @@ static void bdrv_move_feature_fields(BlockDriverState *bs_dest,
>   * This will modify the BlockDriverState fields, and swap contents
>   * between bs_new and bs_old. Both bs_new and bs_old are modified.
>   *
> - * bs_new is required to be anonymous.
> + * bs_new must be nameless and not attached to a BlockBackend.

1)
Does "nameless" refer to device_name or node_name ?
From the code I see it's device_name but the wording can be confusing.

maybe:
+ * bs_new must have an empty device_name and not be attached to a BlockBackend.


>   *
>   * This function does not create any image files.
>   */
> @@ -2051,8 +2051,9 @@ void bdrv_swap(BlockDriverState *bs_new, BlockDriverState *bs_old)
>          QTAILQ_REMOVE(&graph_bdrv_states, bs_old, node_list);
>      }
>  
> -    /* bs_new must be anonymous and shouldn't have anything fancy enabled */
> +    /* bs_new must be nameless and shouldn't have anything fancy enabled */
>      assert(bs_new->device_name[0] == '\0');
> +    assert(!bs_new->blk);
>      assert(QLIST_EMPTY(&bs_new->dirty_bitmaps));
>      assert(bs_new->job == NULL);
>      assert(bs_new->dev == NULL);
> @@ -2068,8 +2069,9 @@ void bdrv_swap(BlockDriverState *bs_new, BlockDriverState *bs_old)
>      bdrv_move_feature_fields(bs_old, bs_new);
>      bdrv_move_feature_fields(bs_new, &tmp);
>  
> -    /* bs_new shouldn't be in bdrv_states even after the swap!  */
> +    /* bs_new must remain nameless and unattached */
Here the proximity get rid of the ambiguity so it's ok.

>      assert(bs_new->device_name[0] == '\0');
> +    assert(!bs_new->blk);
>  
>      /* Check a few fields that should remain attached to the device */
>      assert(bs_new->dev == NULL);
> @@ -2096,7 +2098,7 @@ void bdrv_swap(BlockDriverState *bs_new, BlockDriverState *bs_old)
>   * This will modify the BlockDriverState fields, and swap contents
>   * between bs_new and bs_top. Both bs_new and bs_top are modified.
>   *
> - * bs_new is required to be anonymous.
> + * bs_new must be nameless and not attached to a BlockBackend.
See 1)

>   *
>   * This function does not create any image files.
>   */
> diff --git a/block/block-backend.c b/block/block-backend.c
> index 833f7d9..deccb54 100644
> --- a/block/block-backend.c
> +++ b/block/block-backend.c
> @@ -16,6 +16,7 @@
>  struct BlockBackend {
>      char *name;
>      int refcnt;
> +    BlockDriverState *bs;
>      QTAILQ_ENTRY(BlockBackend) link; /* for blk_backends */
>  };
>  
> @@ -47,9 +48,43 @@ BlockBackend *blk_new(const char *name, Error **errp)
>      return blk;
>  }
>  
> +/**
> + * blk_new_with_bs:
> + * @name: name, must not be %NULL or empty
> + * @errp: return location for an error to be set on failure, or %NULL
> + *
> + * Create a new BlockBackend, with a reference count of one, and
> + * attach a new BlockDriverState to it, also with a reference count of
> + * one.  Caller owns *both* references.
> + * TODO Let caller own only the BlockBackend reference
> + * Fail if @name already exists.
> + *
> + * Returns: the BlockBackend on success, %NULL on error
> + */
> +BlockBackend *blk_new_with_bs(const char *name, Error **errp)
> +{
> +    BlockBackend *blk;
> +    BlockDriverState *bs;
> +
> +    blk = blk_new(name, errp);
> +    if (!blk) {
> +        return NULL;
> +    }
> +
> +    bs = bdrv_new_named(name, errp);
> +    if (!bs) {
> +        blk_unref(blk);
> +        return NULL;
> +    }
> +
> +    blk_attach_bs(blk, bs);
> +    return blk;
> +}
> +
>  static void blk_delete(BlockBackend *blk)
>  {
>      assert(!blk->refcnt);
> +    blk_detach_bs(blk);
>      QTAILQ_REMOVE(&blk_backends, blk, link);
>      g_free(blk->name);
>      g_free(blk);
> @@ -70,6 +105,9 @@ void blk_ref(BlockBackend *blk)
>   *
>   * Decrement @blk's reference count.  If this drops it to zero,
>   * destroy @blk.
> + *
> + * Does *not* touch the attached BlockDriverState's reference count.
> + * TODO Decrement it!
>   */
>  void blk_unref(BlockBackend *blk)
>  {
> @@ -108,3 +146,44 @@ BlockBackend *blk_next(BlockBackend *blk)
>  {
>      return blk ? QTAILQ_NEXT(blk, link) : QTAILQ_FIRST(&blk_backends);
>  }
> +
> +/**
> + * blk_attach_bs:
> + *
> + * Attach @bs to @blk, taking over the caller's reference to @bs.
> + */
> +void blk_attach_bs(BlockBackend *blk, BlockDriverState *bs)
> +{
> +    assert(!blk->bs && !bs->blk);
> +    blk->bs = bs;
> +    bs->blk = blk;
> +}
> +
> +/**
> + * blk_bs:
> + *
> + * Returns: the BlockDriverState attached to @blk, or %NULL
> + */
> +BlockDriverState *blk_bs(BlockBackend *blk)
> +{
> +    return blk->bs;
> +}
> +
> +/**
> + * blk_detach_bs:
> + *
> + * Detach @blk's BlockDriverState, giving up its reference to the
> + * caller.
> + *
> + * Returns: the detached BlockDriverState
> + */
> +BlockDriverState *blk_detach_bs(BlockBackend *blk)
> +{
> +    BlockDriverState *bs = blk->bs;
> +
> +    if (bs) {
> +        bs->blk = NULL;
> +        blk->bs = NULL;
> +    }
> +    return bs;
> +}
> diff --git a/blockdev.c b/blockdev.c
> index 86596bc..0a0b95e 100644
> --- a/blockdev.c
> +++ b/blockdev.c
> @@ -304,6 +304,7 @@ static DriveInfo *blockdev_init(const char *file, QDict *bs_opts,
>      int bdrv_flags = 0;
>      int on_read_error, on_write_error;
>      BlockBackend *blk;
> +    BlockDriverState *bs;
>      DriveInfo *dinfo;
>      ThrottleConfig cfg;
>      int snapshot = 0;
> @@ -459,30 +460,28 @@ static DriveInfo *blockdev_init(const char *file, QDict *bs_opts,
>      }
>  
>      /* init */
> -    blk = blk_new(qemu_opts_id(opts), errp);
> +    blk = blk_new_with_bs(qemu_opts_id(opts), errp);
>      if (!blk) {
>          goto early_err;
>      }
> +    bs = blk_bs(blk);
> +    bs->open_flags = snapshot ? BDRV_O_SNAPSHOT : 0;
> +    bs->read_only = ro;
> +    bs->detect_zeroes = detect_zeroes;
> +
> +    bdrv_set_on_error(bs, on_read_error, on_write_error);
> +
> +    /* disk I/O throttling */
> +    if (throttle_enabled(&cfg)) {
> +        bdrv_io_limits_enable(bs);
> +        bdrv_set_io_limits(bs, &cfg);
> +    }
> +
>      dinfo = g_malloc0(sizeof(*dinfo));
>      dinfo->id = g_strdup(qemu_opts_id(opts));
> -    dinfo->bdrv = bdrv_new_named(dinfo->id, &error);
> -    if (error) {
> -        error_propagate(errp, error);
> -        goto bdrv_new_err;
> -    }
> -    dinfo->bdrv->open_flags = snapshot ? BDRV_O_SNAPSHOT : 0;
> -    dinfo->bdrv->read_only = ro;
> -    dinfo->bdrv->detect_zeroes = detect_zeroes;
> +    dinfo->bdrv = bs;
>      QTAILQ_INSERT_TAIL(&drives, dinfo, next);
>  
> -    bdrv_set_on_error(dinfo->bdrv, on_read_error, on_write_error);
> -
> -    /* disk I/O throttling */
> -    if (throttle_enabled(&cfg)) {
> -        bdrv_io_limits_enable(dinfo->bdrv);
> -        bdrv_set_io_limits(dinfo->bdrv, &cfg);
> -    }
> -
>      if (!file || !*file) {
>          if (has_driver_specific_opts) {
>              file = NULL;
> @@ -509,7 +508,8 @@ static DriveInfo *blockdev_init(const char *file, QDict *bs_opts,
>      bdrv_flags |= ro ? 0 : BDRV_O_RDWR;
>  
>      QINCREF(bs_opts);
> -    ret = bdrv_open(&dinfo->bdrv, file, NULL, bs_opts, bdrv_flags, drv, &error);
> +    ret = bdrv_open(&bs, file, NULL, bs_opts, bdrv_flags, drv, &error);
> +    assert(bs == blk_bs(blk));
>  
>      if (ret < 0) {
>          error_setg(errp, "could not open disk image %s: %s",
> @@ -518,8 +518,9 @@ static DriveInfo *blockdev_init(const char *file, QDict *bs_opts,
>          goto err;
>      }
>  
> -    if (bdrv_key_required(dinfo->bdrv))
> +    if (bdrv_key_required(bs)) {
>          autostart = 0;
> +    }
>  
>      QDECREF(bs_opts);
>      qemu_opts_del(opts);
> @@ -529,7 +530,6 @@ static DriveInfo *blockdev_init(const char *file, QDict *bs_opts,
>  err:
>      bdrv_unref(dinfo->bdrv);
>      QTAILQ_REMOVE(&drives, dinfo, next);
> -bdrv_new_err:
>      g_free(dinfo->id);
>      g_free(dinfo);
>      blk_unref(blk);
> @@ -1746,15 +1746,17 @@ void qmp_block_set_io_throttle(const char *device, int64_t bps, int64_t bps_rd,
>  int do_drive_del(Monitor *mon, const QDict *qdict, QObject **ret_data)
>  {
>      const char *id = qdict_get_str(qdict, "id");
> +    BlockBackend *blk;
>      BlockDriverState *bs;
>      AioContext *aio_context;
>      Error *local_err = NULL;
>  
> -    bs = bdrv_find(id);
> -    if (!bs) {
> +    blk = blk_by_name(id);
> +    if (!blk) {
>          error_report("Device '%s' not found", id);
>          return -1;
>      }
> +    bs = blk_bs(blk);
>  
>      aio_context = bdrv_get_aio_context(bs);
>      aio_context_acquire(aio_context);
> @@ -1777,8 +1779,9 @@ int do_drive_del(Monitor *mon, const QDict *qdict, QObject **ret_data)
>       * then we can just get rid of the block driver state right here.
>       */
>      if (bdrv_get_attached_dev(bs)) {
> +        blk_detach_bs(blk);
> +        blk_unref(blk);
>          bdrv_make_anon(bs);
> -        blk_unref(blk_by_name(id));
>          /* Further I/O must not pause the guest */
>          bdrv_set_on_error(bs, BLOCKDEV_ON_ERROR_REPORT,
>                            BLOCKDEV_ON_ERROR_REPORT);
> diff --git a/hw/block/xen_disk.c b/hw/block/xen_disk.c
> index 730a021..51f4f3a 100644
> --- a/hw/block/xen_disk.c
> +++ b/hw/block/xen_disk.c
> @@ -858,15 +858,11 @@ static int blk_connect(struct XenDevice *xendev)
>  
>          /* setup via xenbus -> create new block driver instance */
>          xen_be_printf(&blkdev->xendev, 2, "create new bdrv (xenbus setup)\n");
> -        blk = blk_new(blkdev->dev, NULL);
> +        blk = blk_new_with_bs(blkdev->dev, NULL);
>          if (!blk) {
>              return -1;
>          }
> -        blkdev->bs = bdrv_new_named(blkdev->dev, NULL);
> -        if (!blkdev->bs) {
> -            blk_unref(blk);
> -            return -1;
> -        }
> +        blkdev->bs = blk_bs(blk);
>  
>          drv = bdrv_find_whitelisted_format(blkdev->fileproto, readonly);
>          if (bdrv_open(&blkdev->bs, blkdev->filename, NULL, NULL, qflags,
> diff --git a/include/block/block_int.h b/include/block/block_int.h
> index 8a61215..a04c852 100644
> --- a/include/block/block_int.h
> +++ b/include/block/block_int.h
> @@ -323,6 +323,8 @@ struct BlockDriverState {
>      BlockDriver *drv; /* NULL means no media */
>      void *opaque;
>  
> +    BlockBackend *blk;          /* owning backend, if any */
> +
>      void *dev;                  /* attached device model, if any */
>      /* TODO change to DeviceState when all users are qdevified */
>      const BlockDevOps *dev_ops;
> diff --git a/include/sysemu/block-backend.h b/include/sysemu/block-backend.h
> index 3f8371c..539b96f 100644
> --- a/include/sysemu/block-backend.h
> +++ b/include/sysemu/block-backend.h
> @@ -17,10 +17,15 @@
>  #include "qapi/error.h"
>  
>  BlockBackend *blk_new(const char *name, Error **errp);
> +BlockBackend *blk_new_with_bs(const char *name, Error **errp);
>  void blk_ref(BlockBackend *blk);
>  void blk_unref(BlockBackend *blk);
>  const char *blk_name(BlockBackend *blk);
>  BlockBackend *blk_by_name(const char *name);
>  BlockBackend *blk_next(BlockBackend *blk);
>  
> +void blk_attach_bs(BlockBackend *blk, BlockDriverState *bs);
> +BlockDriverState *blk_detach_bs(BlockBackend *blk);
> +BlockDriverState *blk_bs(BlockBackend *blk);
> +
>  #endif
> diff --git a/qemu-img.c b/qemu-img.c
> index bad3f64..9fba5a1 100644
> --- a/qemu-img.c
> +++ b/qemu-img.c
> @@ -284,20 +284,19 @@ static int print_block_option_help(const char *filename, const char *fmt)
>      return 0;
>  }
>  
> -static BlockDriverState *bdrv_new_open(const char *id,
> -                                       const char *filename,
> -                                       const char *fmt,
> -                                       int flags,
> -                                       bool require_io,
> -                                       bool quiet)
> +static BlockBackend *img_open(const char *id, const char *filename,
> +                              const char *fmt, int flags,
> +                              bool require_io, bool quiet)
>  {
> +    BlockBackend *blk;
>      BlockDriverState *bs;
>      BlockDriver *drv;
>      char password[256];
>      Error *local_err = NULL;
>      int ret;
>  
> -    bs = bdrv_new_named(id, &error_abort);
> +    blk = blk_new_with_bs(id, &error_abort);
> +    bs = blk_bs(blk);
>  
>      if (fmt) {
>          drv = bdrv_find_format(fmt);
> @@ -328,9 +327,10 @@ static BlockDriverState *bdrv_new_open(const char *id,
>              goto fail;
>          }
>      }
> -    return bs;
> +    return blk;
>  fail:
>      bdrv_unref(bs);
> +    blk_unref(blk);
>      return NULL;
>  }
>  
> @@ -651,11 +651,11 @@ static int img_check(int argc, char **argv)
>          return 1;
>      }
>  
> -    blk = blk_new("image", &error_abort);
> -    bs = bdrv_new_open("image", filename, fmt, flags, true, quiet);
> -    if (!bs) {
> +    blk = img_open("image", filename, fmt, flags, true, quiet);
> +    if (!blk) {
>          return 1;
>      }
> +    bs = blk_bs(blk);
>  
>      check = g_new0(ImageCheck, 1);
>      ret = collect_image_check(bs, check, filename, fmt, fix);
> @@ -761,11 +761,12 @@ static int img_commit(int argc, char **argv)
>          return 1;
>      }
>  
> -    blk = blk_new("image", &error_abort);
> -    bs = bdrv_new_open("image", filename, fmt, flags, true, quiet);
> -    if (!bs) {
> +    blk = img_open("image", filename, fmt, flags, true, quiet);
> +    if (!blk) {
>          return 1;
>      }
> +    bs = blk_bs(blk);
> +
>      ret = bdrv_commit(bs);
>      switch(ret) {
>      case 0:
> @@ -1019,21 +1020,21 @@ static int img_compare(int argc, char **argv)
>          goto out3;
>      }
>  
> -    blk1 = blk_new("image 1", &error_abort);
> -    bs1 = bdrv_new_open("image 1", filename1, fmt1, flags, true, quiet);
> -    if (!bs1) {
> +    blk1 = img_open("image 1", filename1, fmt1, flags, true, quiet);
> +    if (!blk1) {
>          error_report("Can't open file %s", filename1);
>          ret = 2;
>          goto out3;
>      }
> +    bs1 = blk_bs(blk1);
>  
> -    blk2 = blk_new("image 2", &error_abort);
> -    bs2 = bdrv_new_open("image 2", filename2, fmt2, flags, true, quiet);
> -    if (!bs2) {
> +    blk2 = img_open("image 2", filename2, fmt2, flags, true, quiet);
> +    if (!blk2) {
>          error_report("Can't open file %s", filename2);
>          ret = 2;
>          goto out2;
>      }
> +    bs2 = blk_bs(blk2);
>  
>      buf1 = qemu_blockalign(bs1, IO_BUF_SIZE);
>      buf2 = qemu_blockalign(bs2, IO_BUF_SIZE);
> @@ -1375,15 +1376,15 @@ static int img_convert(int argc, char **argv)
>      for (bs_i = 0; bs_i < bs_n; bs_i++) {
>          char *id = bs_n > 1 ? g_strdup_printf("source %d", bs_i)
>                              : g_strdup("source");
> -        blk[bs_i] = blk_new(id, &error_abort);
> -        bs[bs_i] = bdrv_new_open(id, argv[optind + bs_i], fmt, src_flags,
> -                                 true, quiet);
> +        blk[bs_i] = img_open(id, argv[optind + bs_i], fmt, src_flags,
> +                             true, quiet);
>          g_free(id);
> -        if (!bs[bs_i]) {
> +        if (!blk[bs_i]) {
>              error_report("Could not open '%s'", argv[optind + bs_i]);
>              ret = -1;
>              goto out;
>          }
> +        bs[bs_i] = blk_bs(blk[bs_i]);
>          bs_sectors[bs_i] = bdrv_nb_sectors(bs[bs_i]);
>          if (bs_sectors[bs_i] < 0) {
>              error_report("Could not get size of %s: %s",
> @@ -1501,12 +1502,12 @@ static int img_convert(int argc, char **argv)
>          goto out;
>      }
>  
> -    out_blk = blk_new("target", &error_abort);
> -    out_bs = bdrv_new_open("target", out_filename, out_fmt, flags, true, quiet);
> -    if (!out_bs) {
> +    out_blk = img_open("target", out_filename, out_fmt, flags, true, quiet);
> +    if (!out_blk) {
>          ret = -1;
>          goto out;
>      }
> +    out_bs = blk_bs(out_blk);
>  
>      bs_i = 0;
>      bs_offset = 0;
> @@ -1893,12 +1894,12 @@ static ImageInfoList *collect_image_info_list(const char *filename,
>          }
>          g_hash_table_insert(filenames, (gpointer)filename, NULL);
>  
> -        blk = blk_new("image", &error_abort);
> -        bs = bdrv_new_open("image", filename, fmt,
> -                           BDRV_O_FLAGS | BDRV_O_NO_BACKING, false, false);
> -        if (!bs) {
> +        blk = img_open("image", filename, fmt,
> +                       BDRV_O_FLAGS | BDRV_O_NO_BACKING, false, false);
> +        if (!blk) {
>              goto err;
>          }
> +        bs = blk_bs(blk);
>  
>          bdrv_query_image_info(bs, &info, &err);
>          if (err) {
> @@ -2158,11 +2159,11 @@ static int img_map(int argc, char **argv)
>          return 1;
>      }
>  
> -    blk = blk_new("image", &error_abort);
> -    bs = bdrv_new_open("image", filename, fmt, BDRV_O_FLAGS, true, false);
> -    if (!bs) {
> +    blk = img_open("image", filename, fmt, BDRV_O_FLAGS, true, false);
> +    if (!blk) {
>          return 1;
>      }
> +    bs = blk_bs(blk);
>  
>      if (output_format == OFORMAT_HUMAN) {
>          printf("%-16s%-16s%-16s%s\n", "Offset", "Length", "Mapped to", "File");
> @@ -2281,11 +2282,11 @@ static int img_snapshot(int argc, char **argv)
>      filename = argv[optind++];
>  
>      /* Open the image */
> -    blk = blk_new("image", &error_abort);
> -    bs = bdrv_new_open("image", filename, NULL, bdrv_oflags, true, quiet);
> -    if (!bs) {
> +    blk = img_open("image", filename, NULL, bdrv_oflags, true, quiet);
> +    if (!blk) {
>          return 1;
>      }
> +    bs = blk_bs(blk);
>  
>      /* Perform the requested action */
>      switch(action) {
> @@ -2427,12 +2428,12 @@ static int img_rebase(int argc, char **argv)
>       * Ignore the old backing file for unsafe rebase in case we want to correct
>       * the reference to a renamed or moved backing file.
>       */
> -    blk = blk_new("image", &error_abort);
> -    bs = bdrv_new_open("image", filename, fmt, flags, true, quiet);
> -    if (!bs) {
> +    blk = img_open("image", filename, fmt, flags, true, quiet);
> +    if (!blk) {
>          ret = -1;
>          goto out;
>      }
> +    bs = blk_bs(blk);
>  
>      /* Find the right drivers for the backing files */
>      old_backing_drv = NULL;
> @@ -2460,8 +2461,8 @@ static int img_rebase(int argc, char **argv)
>      if (!unsafe) {
>          char backing_name[1024];
>  
> -        blk_old_backing = blk_new("old_backing", &error_abort);
> -        bs_old_backing = bdrv_new_named("old_backing", &error_abort);
> +        blk_old_backing = blk_new_with_bs("old_backing", &error_abort);
> +        bs_old_backing = blk_bs(blk_old_backing);
>          bdrv_get_backing_filename(bs, backing_name, sizeof(backing_name));
>          ret = bdrv_open(&bs_old_backing, backing_name, NULL, NULL, src_flags,
>                          old_backing_drv, &local_err);
> @@ -2472,8 +2473,8 @@ static int img_rebase(int argc, char **argv)
>              goto out;
>          }
>          if (out_baseimg[0]) {
> -            blk_new_backing = blk_new("new_backing", &error_abort);
> -            bs_new_backing = bdrv_new_named("new_backing", &error_abort);
> +            blk_new_backing = blk_new_with_bs("new_backing", &error_abort);
> +            bs_new_backing = blk_bs(blk_new_backing);
>              ret = bdrv_open(&bs_new_backing, out_baseimg, NULL, NULL, src_flags,
>                              new_backing_drv, &local_err);
>              if (ret) {
> @@ -2749,13 +2750,13 @@ static int img_resize(int argc, char **argv)
>      n = qemu_opt_get_size(param, BLOCK_OPT_SIZE, 0);
>      qemu_opts_del(param);
>  
> -    blk = blk_new("image", &error_abort);
> -    bs = bdrv_new_open("image", filename, fmt, BDRV_O_FLAGS | BDRV_O_RDWR,
> -                       true, quiet);
> -    if (!bs) {
> +    blk = img_open("image", filename, fmt, BDRV_O_FLAGS | BDRV_O_RDWR,
> +                   true, quiet);
> +    if (!blk) {
>          ret = -1;
>          goto out;
>      }
> +    bs = blk_bs(blk);
>  
>      if (relative) {
>          total_size = bdrv_getlength(bs) + n * relative;
> @@ -2867,13 +2868,13 @@ static int img_amend(int argc, char **argv)
>          goto out;
>      }
>  
> -    blk = blk_new("image", &error_abort);
> -    bs = bdrv_new_open("image", filename, fmt, flags, true, quiet);
> -    if (!bs) {
> +    blk = img_open("image", filename, fmt, flags, true, quiet);
> +    if (!blk) {
>          error_report("Could not open image '%s'", filename);
>          ret = -1;
>          goto out;
>      }
> +    bs = blk_bs(blk);
>  
>      fmt = bs->drv->format_name;
>  
> diff --git a/qemu-io.c b/qemu-io.c
> index 45e5494..ef1d3ea 100644
> --- a/qemu-io.c
> +++ b/qemu-io.c
> @@ -62,8 +62,8 @@ static int openfile(char *name, int flags, int growable, QDict *opts)
>          return 1;
>      }
>  
> -    qemuio_blk = blk_new("hda", &error_abort);
> -    qemuio_bs = bdrv_new_named("hda", &error_abort);
> +    qemuio_blk = blk_new_with_bs("hda", &error_abort);
> +    qemuio_bs = blk_bs(qemuio_blk);
>  
>      if (growable) {
>          flags |= BDRV_O_PROTOCOL;
> diff --git a/qemu-nbd.c b/qemu-nbd.c
> index 94b9b49..8eff588 100644
> --- a/qemu-nbd.c
> +++ b/qemu-nbd.c
> @@ -687,8 +687,7 @@ int main(int argc, char **argv)
>          drv = NULL;
>      }
>  
> -    blk_new("hda", &error_abort);
> -    bs = bdrv_new_named("hda", &error_abort);
> +    bs = blk_bs(blk_new_with_bs("hda", &error_abort));
>  
>      srcpath = argv[optind];
>      ret = bdrv_open(&bs, srcpath, NULL, NULL, flags, drv, &local_err);
> -- 
> 1.9.3
>
Kevin Wolf Sept. 10, 2014, 12:24 p.m. UTC | #2
Am 10.09.2014 um 10:13 hat Markus Armbruster geschrieben:
> The pointer from BlockBackend to BlockDriverState is a strong
> reference, managed with bdrv_ref() / bdrv_unref(), the back-pointer is
> a weak one.
> 
> Convenience function blk_new_with_bs() creates a BlockBackend with its
> BlockDriverState.  Callers have to unref both.  The commit after next
> will relieve them of the need to unref the BlockDriverState.
> 
> Signed-off-by: Markus Armbruster <armbru@redhat.com>

I'm wondering whether this is the right direction to take. In creating a
block device, we have the following operations:

1. Create a BlockBackend
2. Create a BlockDriverState
3. Open the BlockDriverState

bdrv_open() can do 2 and 3, which is probably what we want to have in
the end - new/create and delete/close should be a single operation for
BDSes.

Combining 1 and 2 into a single function might make it harder to actually
use bdrv_open() in this way.

> diff --git a/blockdev.c b/blockdev.c
> index 86596bc..0a0b95e 100644
> --- a/blockdev.c
> +++ b/blockdev.c
> @@ -304,6 +304,7 @@ static DriveInfo *blockdev_init(const char *file, QDict *bs_opts,
>      int bdrv_flags = 0;
>      int on_read_error, on_write_error;
>      BlockBackend *blk;
> +    BlockDriverState *bs;
>      DriveInfo *dinfo;
>      ThrottleConfig cfg;
>      int snapshot = 0;
> @@ -459,30 +460,28 @@ static DriveInfo *blockdev_init(const char *file, QDict *bs_opts,
>      }
>  
>      /* init */
> -    blk = blk_new(qemu_opts_id(opts), errp);
> +    blk = blk_new_with_bs(qemu_opts_id(opts), errp);
>      if (!blk) {
>          goto early_err;
>      }
> +    bs = blk_bs(blk);
> +    bs->open_flags = snapshot ? BDRV_O_SNAPSHOT : 0;
> +    bs->read_only = ro;
> +    bs->detect_zeroes = detect_zeroes;
> +
> +    bdrv_set_on_error(bs, on_read_error, on_write_error);
> +
> +    /* disk I/O throttling */
> +    if (throttle_enabled(&cfg)) {
> +        bdrv_io_limits_enable(bs);
> +        bdrv_set_io_limits(bs, &cfg);
> +    }
> +
>      dinfo = g_malloc0(sizeof(*dinfo));
>      dinfo->id = g_strdup(qemu_opts_id(opts));
> -    dinfo->bdrv = bdrv_new_named(dinfo->id, &error);
> -    if (error) {
> -        error_propagate(errp, error);
> -        goto bdrv_new_err;
> -    }
> -    dinfo->bdrv->open_flags = snapshot ? BDRV_O_SNAPSHOT : 0;
> -    dinfo->bdrv->read_only = ro;
> -    dinfo->bdrv->detect_zeroes = detect_zeroes;
> +    dinfo->bdrv = bs;
>      QTAILQ_INSERT_TAIL(&drives, dinfo, next);
>  
> -    bdrv_set_on_error(dinfo->bdrv, on_read_error, on_write_error);
> -
> -    /* disk I/O throttling */
> -    if (throttle_enabled(&cfg)) {
> -        bdrv_io_limits_enable(dinfo->bdrv);
> -        bdrv_set_io_limits(dinfo->bdrv, &cfg);
> -    }
> -
>      if (!file || !*file) {
>          if (has_driver_specific_opts) {
>              file = NULL;

This looks correct, but it's really doing two things at once (on the one
hand changing code to use bs instead of dinfo->bdrv and reordering, on
the other hand introducing blk_new_with_bs()).

Might be worth doing in separate patches.

> @@ -509,7 +508,8 @@ static DriveInfo *blockdev_init(const char *file, QDict *bs_opts,
>      bdrv_flags |= ro ? 0 : BDRV_O_RDWR;
>  
>      QINCREF(bs_opts);
> -    ret = bdrv_open(&dinfo->bdrv, file, NULL, bs_opts, bdrv_flags, drv, &error);
> +    ret = bdrv_open(&bs, file, NULL, bs_opts, bdrv_flags, drv, &error);
> +    assert(bs == blk_bs(blk));
>  
>      if (ret < 0) {
>          error_setg(errp, "could not open disk image %s: %s",
> @@ -518,8 +518,9 @@ static DriveInfo *blockdev_init(const char *file, QDict *bs_opts,
>          goto err;
>      }
>  
> -    if (bdrv_key_required(dinfo->bdrv))
> +    if (bdrv_key_required(bs)) {
>          autostart = 0;
> +    }
>  
>      QDECREF(bs_opts);
>      qemu_opts_del(opts);
> @@ -529,7 +530,6 @@ static DriveInfo *blockdev_init(const char *file, QDict *bs_opts,
>  err:
>      bdrv_unref(dinfo->bdrv);
>      QTAILQ_REMOVE(&drives, dinfo, next);
> -bdrv_new_err:
>      g_free(dinfo->id);
>      g_free(dinfo);
>      blk_unref(blk);
> @@ -1746,15 +1746,17 @@ void qmp_block_set_io_throttle(const char *device, int64_t bps, int64_t bps_rd,
>  int do_drive_del(Monitor *mon, const QDict *qdict, QObject **ret_data)
>  {
>      const char *id = qdict_get_str(qdict, "id");
> +    BlockBackend *blk;
>      BlockDriverState *bs;
>      AioContext *aio_context;
>      Error *local_err = NULL;
>  
> -    bs = bdrv_find(id);
> -    if (!bs) {
> +    blk = blk_by_name(id);
> +    if (!blk) {
>          error_report("Device '%s' not found", id);
>          return -1;
>      }
> +    bs = blk_bs(blk);
>  
>      aio_context = bdrv_get_aio_context(bs);
>      aio_context_acquire(aio_context);
> @@ -1777,8 +1779,9 @@ int do_drive_del(Monitor *mon, const QDict *qdict, QObject **ret_data)
>       * then we can just get rid of the block driver state right here.
>       */
>      if (bdrv_get_attached_dev(bs)) {
> +        blk_detach_bs(blk);
> +        blk_unref(blk);

We're not doing real refcounting yet, but if we did, and if the refcount
didn't drop to zero here, why would detaching the bs still be correct?

blk_delete() already takes care of detaching after this patch, so I
suppose removing the call here would be right.

>          bdrv_make_anon(bs);
> -        blk_unref(blk_by_name(id));
>          /* Further I/O must not pause the guest */
>          bdrv_set_on_error(bs, BLOCKDEV_ON_ERROR_REPORT,
>                            BLOCKDEV_ON_ERROR_REPORT);

Kevin
Markus Armbruster Sept. 11, 2014, 10:52 a.m. UTC | #3
Benoît Canet <benoit.canet@irqsave.net> writes:

> The Wednesday 10 Sep 2014 à 10:13:32 (+0200), Markus Armbruster wrote :
>> The pointer from BlockBackend to BlockDriverState is a strong
>> reference, managed with bdrv_ref() / bdrv_unref(), the back-pointer is
>> a weak one.
>> 
>> Convenience function blk_new_with_bs() creates a BlockBackend with its
>> BlockDriverState.  Callers have to unref both.  The commit after next
>> will relieve them of the need to unref the BlockDriverState.
>> 
>> Signed-off-by: Markus Armbruster <armbru@redhat.com>
>> ---
>>  block.c                        |  10 ++--
>>  block/block-backend.c          |  79 +++++++++++++++++++++++++++++++
>>  blockdev.c                     |  49 +++++++++++---------
>>  hw/block/xen_disk.c            |   8 +---
>>  include/block/block_int.h      |   2 +
>>  include/sysemu/block-backend.h |   5 ++
>>  qemu-img.c                     | 103 +++++++++++++++++++++--------------------
>>  qemu-io.c                      |   4 +-
>>  qemu-nbd.c                     |   3 +-
>>  9 files changed, 175 insertions(+), 88 deletions(-)
>> 
>> diff --git a/block.c b/block.c
>> index 4b3bcd4..a6c03da 100644
>> --- a/block.c
>> +++ b/block.c
>> @@ -2032,7 +2032,7 @@ static void bdrv_move_feature_fields(BlockDriverState *bs_dest,
>>   * This will modify the BlockDriverState fields, and swap contents
>>   * between bs_new and bs_old. Both bs_new and bs_old are modified.
>>   *
>> - * bs_new is required to be anonymous.
>> + * bs_new must be nameless and not attached to a BlockBackend.
>
> 1)
> Does "nameless" refer to device_name or node_name ?
> From the code I see it's device_name but the wording can be confusing.

Yes.  It's slightly better than "anonymous", because that really means
both device_name and node_name (since commit dc364f4).

The "nameless" part will go away in PATCH 08, so I'm inclined not to
worry about it now.

> maybe:
> + * bs_new must have an empty device_name and not be attached to a BlockBackend.

Can do if you think it's necessary.
Markus Armbruster Sept. 11, 2014, 3:27 p.m. UTC | #4
Kevin Wolf <kwolf@redhat.com> writes:

> Am 10.09.2014 um 10:13 hat Markus Armbruster geschrieben:
>> The pointer from BlockBackend to BlockDriverState is a strong
>> reference, managed with bdrv_ref() / bdrv_unref(), the back-pointer is
>> a weak one.
>> 
>> Convenience function blk_new_with_bs() creates a BlockBackend with its
>> BlockDriverState.  Callers have to unref both.  The commit after next
>> will relieve them of the need to unref the BlockDriverState.
>> 
>> Signed-off-by: Markus Armbruster <armbru@redhat.com>
>
> I'm wondering whether this is the right direction to take. In creating a
> block device, we have the following operations:
>
> 1. Create a BlockBackend
> 2. Create a BlockDriverState
> 3. Open the BlockDriverState
>
> bdrv_open() can do 2 and 3, which is probably what we want to have in
> the end - new/create and delete/close should be a single operation for
> BDSes.

Agreed.

> Combining 1 and 2 into a single function might make it harder to actually
> use bdrv_open() in this way.

Not really.

Before my series, callers all look like:

    bs = bdrv_new(name, errp);
    if (!bs) {
        [bail out]
    }
    [stuff...]
    ret = bdrv_open(&bs, [...]);
    if (ret < 0) {
        [bail out, unreffing blk and bdrv (only blk after PATCH 05]
    }

Fusing bdrv_new() and bdrv_open() into bdrv_open_new() would make it
look like:

    [stuff'...]
    bs = bdrv_open_new([...]);
    if (!bs) {
        [bail out]
    }
    [stuff''...]

Transforming [stuff...] may take some thought.

Now let's consider how my patch affects such a future fusion.  After my
patch, we have:

    blk = blk_new_with_bs(name, errp);
    if (!blk) {
        [bail out]
    }
    bs = blk_bs(blk);
    [stuff...]
    ret = bdrv_open(&bs, [...]);
    if (ret < 0) {
        [bail out, unreffing blk and bdrv (only blk after PATCH 05]
    }

Avoiding the convenience function would make it look like:

    blk = blk_new(name, errp);
    if (!blk) {
        [bail out]
    }
    bs = bdrv_new_named(name, errp);
    if (!bs) {
        [bail out, unreffing blk]
    }
    blk_attach_bs(blk, bs);
    [stuff...]
    ret = bdrv_open(&bs, [...]);
    if (ret < 0) {
        [bail out, unreffing blk and bdrv (only blk after PATCH 05]
    }

More verbose, and more complicated error handling.  Okay if it really
buys us something.  Let's fuse and see:

    [stuff'...]
    blk = blk_new(name, errp);
    if (!blk) {
        [bail out]
    }
    [stuff''...]
    ret = bdrv_open(&bs, [...]);
    if (ret < 0) {
        [bail out, unreffing blk]
    }
    [stuff'''...]

As above, transforming [stuff...] may take some thought.  Having to
inline the convenience function, however, is utterly trivial, and
doesn't make the fusing any harder.

>> diff --git a/blockdev.c b/blockdev.c
>> index 86596bc..0a0b95e 100644
>> --- a/blockdev.c
>> +++ b/blockdev.c
>> @@ -304,6 +304,7 @@ static DriveInfo *blockdev_init(const char *file, QDict *bs_opts,
>>      int bdrv_flags = 0;
>>      int on_read_error, on_write_error;
>>      BlockBackend *blk;
>> +    BlockDriverState *bs;
>>      DriveInfo *dinfo;
>>      ThrottleConfig cfg;
>>      int snapshot = 0;
>> @@ -459,30 +460,28 @@ static DriveInfo *blockdev_init(const char *file, QDict *bs_opts,
>>      }
>>  
>>      /* init */
>> -    blk = blk_new(qemu_opts_id(opts), errp);
>> +    blk = blk_new_with_bs(qemu_opts_id(opts), errp);
>>      if (!blk) {
>>          goto early_err;
>>      }
>> +    bs = blk_bs(blk);
>> +    bs->open_flags = snapshot ? BDRV_O_SNAPSHOT : 0;
>> +    bs->read_only = ro;
>> +    bs->detect_zeroes = detect_zeroes;
>> +
>> +    bdrv_set_on_error(bs, on_read_error, on_write_error);
>> +
>> +    /* disk I/O throttling */
>> +    if (throttle_enabled(&cfg)) {
>> +        bdrv_io_limits_enable(bs);
>> +        bdrv_set_io_limits(bs, &cfg);
>> +    }
>> +
>>      dinfo = g_malloc0(sizeof(*dinfo));
>>      dinfo->id = g_strdup(qemu_opts_id(opts));
>> -    dinfo->bdrv = bdrv_new_named(dinfo->id, &error);
>> -    if (error) {
>> -        error_propagate(errp, error);
>> -        goto bdrv_new_err;
>> -    }
>> -    dinfo->bdrv->open_flags = snapshot ? BDRV_O_SNAPSHOT : 0;
>> -    dinfo->bdrv->read_only = ro;
>> -    dinfo->bdrv->detect_zeroes = detect_zeroes;
>> +    dinfo->bdrv = bs;
>>      QTAILQ_INSERT_TAIL(&drives, dinfo, next);
>>  
>> -    bdrv_set_on_error(dinfo->bdrv, on_read_error, on_write_error);
>> -
>> -    /* disk I/O throttling */
>> -    if (throttle_enabled(&cfg)) {
>> -        bdrv_io_limits_enable(dinfo->bdrv);
>> -        bdrv_set_io_limits(dinfo->bdrv, &cfg);
>> -    }
>> -
>>      if (!file || !*file) {
>>          if (has_driver_specific_opts) {
>>              file = NULL;
>
> This looks correct, but it's really doing two things at once (on the one
> hand changing code to use bs instead of dinfo->bdrv and reordering, on
> the other hand introducing blk_new_with_bs()).
>
> Might be worth doing in separate patches.

I'll give it a try.

>> @@ -509,7 +508,8 @@ static DriveInfo *blockdev_init(const char *file, QDict *bs_opts,
>>      bdrv_flags |= ro ? 0 : BDRV_O_RDWR;
>>  
>>      QINCREF(bs_opts);
>> -    ret = bdrv_open(&dinfo->bdrv, file, NULL, bs_opts, bdrv_flags, drv, &error);
>> +    ret = bdrv_open(&bs, file, NULL, bs_opts, bdrv_flags, drv, &error);
>> +    assert(bs == blk_bs(blk));
>>  
>>      if (ret < 0) {
>>          error_setg(errp, "could not open disk image %s: %s",
>> @@ -518,8 +518,9 @@ static DriveInfo *blockdev_init(const char *file, QDict *bs_opts,
>>          goto err;
>>      }
>>  
>> -    if (bdrv_key_required(dinfo->bdrv))
>> +    if (bdrv_key_required(bs)) {
>>          autostart = 0;
>> +    }
>>  
>>      QDECREF(bs_opts);
>>      qemu_opts_del(opts);
>> @@ -529,7 +530,6 @@ static DriveInfo *blockdev_init(const char *file, QDict *bs_opts,
>>  err:
>>      bdrv_unref(dinfo->bdrv);
>>      QTAILQ_REMOVE(&drives, dinfo, next);
>> -bdrv_new_err:
>>      g_free(dinfo->id);
>>      g_free(dinfo);
>>      blk_unref(blk);
>> @@ -1746,15 +1746,17 @@ void qmp_block_set_io_throttle(const char *device, int64_t bps, int64_t bps_rd,
>>  int do_drive_del(Monitor *mon, const QDict *qdict, QObject **ret_data)
>>  {
>>      const char *id = qdict_get_str(qdict, "id");
>> +    BlockBackend *blk;
>>      BlockDriverState *bs;
>>      AioContext *aio_context;
>>      Error *local_err = NULL;
>>  
>> -    bs = bdrv_find(id);
>> -    if (!bs) {
>> +    blk = blk_by_name(id);
>> +    if (!blk) {
>>          error_report("Device '%s' not found", id);
>>          return -1;
>>      }
>> +    bs = blk_bs(blk);
>>  
>>      aio_context = bdrv_get_aio_context(bs);
>>      aio_context_acquire(aio_context);
>> @@ -1777,8 +1779,9 @@ int do_drive_del(Monitor *mon, const QDict *qdict, QObject **ret_data)
>>       * then we can just get rid of the block driver state right here.
>>       */
>>      if (bdrv_get_attached_dev(bs)) {
>> +        blk_detach_bs(blk);
>> +        blk_unref(blk);
>
> We're not doing real refcounting yet, but if we did, and if the refcount
> didn't drop to zero here, why would detaching the bs still be correct?
>
> blk_delete() already takes care of detaching after this patch, so I
> suppose removing the call here would be right.

I've since reworked this part to address the temporary leak you pointed
out in your review of PATCH 02.  My current tree has this hunk here:

@@ -1791,8 +1787,7 @@ int do_drive_del(Monitor *mon, const QDict *qdict, QObject **ret_data)
      * then we can just get rid of the block driver state right here.
      */
     if (bdrv_get_attached_dev(bs)) {
-        bdrv_make_anon(bs);
-        blk_unref(blk_by_name(id));
+        blk_make_anon(blk);
         /* Further I/O must not pause the guest */
         bdrv_set_on_error(bs, BLOCKDEV_ON_ERROR_REPORT,
                           BLOCKDEV_ON_ERROR_REPORT);

Suggest you take another careful look when you review v2.

>>          bdrv_make_anon(bs);
>> -        blk_unref(blk_by_name(id));
>>          /* Further I/O must not pause the guest */
>>          bdrv_set_on_error(bs, BLOCKDEV_ON_ERROR_REPORT,
>>                            BLOCKDEV_ON_ERROR_REPORT);
>
> Kevin
diff mbox

Patch

diff --git a/block.c b/block.c
index 4b3bcd4..a6c03da 100644
--- a/block.c
+++ b/block.c
@@ -2032,7 +2032,7 @@  static void bdrv_move_feature_fields(BlockDriverState *bs_dest,
  * This will modify the BlockDriverState fields, and swap contents
  * between bs_new and bs_old. Both bs_new and bs_old are modified.
  *
- * bs_new is required to be anonymous.
+ * bs_new must be nameless and not attached to a BlockBackend.
  *
  * This function does not create any image files.
  */
@@ -2051,8 +2051,9 @@  void bdrv_swap(BlockDriverState *bs_new, BlockDriverState *bs_old)
         QTAILQ_REMOVE(&graph_bdrv_states, bs_old, node_list);
     }
 
-    /* bs_new must be anonymous and shouldn't have anything fancy enabled */
+    /* bs_new must be nameless and shouldn't have anything fancy enabled */
     assert(bs_new->device_name[0] == '\0');
+    assert(!bs_new->blk);
     assert(QLIST_EMPTY(&bs_new->dirty_bitmaps));
     assert(bs_new->job == NULL);
     assert(bs_new->dev == NULL);
@@ -2068,8 +2069,9 @@  void bdrv_swap(BlockDriverState *bs_new, BlockDriverState *bs_old)
     bdrv_move_feature_fields(bs_old, bs_new);
     bdrv_move_feature_fields(bs_new, &tmp);
 
-    /* bs_new shouldn't be in bdrv_states even after the swap!  */
+    /* bs_new must remain nameless and unattached */
     assert(bs_new->device_name[0] == '\0');
+    assert(!bs_new->blk);
 
     /* Check a few fields that should remain attached to the device */
     assert(bs_new->dev == NULL);
@@ -2096,7 +2098,7 @@  void bdrv_swap(BlockDriverState *bs_new, BlockDriverState *bs_old)
  * This will modify the BlockDriverState fields, and swap contents
  * between bs_new and bs_top. Both bs_new and bs_top are modified.
  *
- * bs_new is required to be anonymous.
+ * bs_new must be nameless and not attached to a BlockBackend.
  *
  * This function does not create any image files.
  */
diff --git a/block/block-backend.c b/block/block-backend.c
index 833f7d9..deccb54 100644
--- a/block/block-backend.c
+++ b/block/block-backend.c
@@ -16,6 +16,7 @@ 
 struct BlockBackend {
     char *name;
     int refcnt;
+    BlockDriverState *bs;
     QTAILQ_ENTRY(BlockBackend) link; /* for blk_backends */
 };
 
@@ -47,9 +48,43 @@  BlockBackend *blk_new(const char *name, Error **errp)
     return blk;
 }
 
+/**
+ * blk_new_with_bs:
+ * @name: name, must not be %NULL or empty
+ * @errp: return location for an error to be set on failure, or %NULL
+ *
+ * Create a new BlockBackend, with a reference count of one, and
+ * attach a new BlockDriverState to it, also with a reference count of
+ * one.  Caller owns *both* references.
+ * TODO Let caller own only the BlockBackend reference
+ * Fail if @name already exists.
+ *
+ * Returns: the BlockBackend on success, %NULL on error
+ */
+BlockBackend *blk_new_with_bs(const char *name, Error **errp)
+{
+    BlockBackend *blk;
+    BlockDriverState *bs;
+
+    blk = blk_new(name, errp);
+    if (!blk) {
+        return NULL;
+    }
+
+    bs = bdrv_new_named(name, errp);
+    if (!bs) {
+        blk_unref(blk);
+        return NULL;
+    }
+
+    blk_attach_bs(blk, bs);
+    return blk;
+}
+
 static void blk_delete(BlockBackend *blk)
 {
     assert(!blk->refcnt);
+    blk_detach_bs(blk);
     QTAILQ_REMOVE(&blk_backends, blk, link);
     g_free(blk->name);
     g_free(blk);
@@ -70,6 +105,9 @@  void blk_ref(BlockBackend *blk)
  *
  * Decrement @blk's reference count.  If this drops it to zero,
  * destroy @blk.
+ *
+ * Does *not* touch the attached BlockDriverState's reference count.
+ * TODO Decrement it!
  */
 void blk_unref(BlockBackend *blk)
 {
@@ -108,3 +146,44 @@  BlockBackend *blk_next(BlockBackend *blk)
 {
     return blk ? QTAILQ_NEXT(blk, link) : QTAILQ_FIRST(&blk_backends);
 }
+
+/**
+ * blk_attach_bs:
+ *
+ * Attach @bs to @blk, taking over the caller's reference to @bs.
+ */
+void blk_attach_bs(BlockBackend *blk, BlockDriverState *bs)
+{
+    assert(!blk->bs && !bs->blk);
+    blk->bs = bs;
+    bs->blk = blk;
+}
+
+/**
+ * blk_bs:
+ *
+ * Returns: the BlockDriverState attached to @blk, or %NULL
+ */
+BlockDriverState *blk_bs(BlockBackend *blk)
+{
+    return blk->bs;
+}
+
+/**
+ * blk_detach_bs:
+ *
+ * Detach @blk's BlockDriverState, giving up its reference to the
+ * caller.
+ *
+ * Returns: the detached BlockDriverState
+ */
+BlockDriverState *blk_detach_bs(BlockBackend *blk)
+{
+    BlockDriverState *bs = blk->bs;
+
+    if (bs) {
+        bs->blk = NULL;
+        blk->bs = NULL;
+    }
+    return bs;
+}
diff --git a/blockdev.c b/blockdev.c
index 86596bc..0a0b95e 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -304,6 +304,7 @@  static DriveInfo *blockdev_init(const char *file, QDict *bs_opts,
     int bdrv_flags = 0;
     int on_read_error, on_write_error;
     BlockBackend *blk;
+    BlockDriverState *bs;
     DriveInfo *dinfo;
     ThrottleConfig cfg;
     int snapshot = 0;
@@ -459,30 +460,28 @@  static DriveInfo *blockdev_init(const char *file, QDict *bs_opts,
     }
 
     /* init */
-    blk = blk_new(qemu_opts_id(opts), errp);
+    blk = blk_new_with_bs(qemu_opts_id(opts), errp);
     if (!blk) {
         goto early_err;
     }
+    bs = blk_bs(blk);
+    bs->open_flags = snapshot ? BDRV_O_SNAPSHOT : 0;
+    bs->read_only = ro;
+    bs->detect_zeroes = detect_zeroes;
+
+    bdrv_set_on_error(bs, on_read_error, on_write_error);
+
+    /* disk I/O throttling */
+    if (throttle_enabled(&cfg)) {
+        bdrv_io_limits_enable(bs);
+        bdrv_set_io_limits(bs, &cfg);
+    }
+
     dinfo = g_malloc0(sizeof(*dinfo));
     dinfo->id = g_strdup(qemu_opts_id(opts));
-    dinfo->bdrv = bdrv_new_named(dinfo->id, &error);
-    if (error) {
-        error_propagate(errp, error);
-        goto bdrv_new_err;
-    }
-    dinfo->bdrv->open_flags = snapshot ? BDRV_O_SNAPSHOT : 0;
-    dinfo->bdrv->read_only = ro;
-    dinfo->bdrv->detect_zeroes = detect_zeroes;
+    dinfo->bdrv = bs;
     QTAILQ_INSERT_TAIL(&drives, dinfo, next);
 
-    bdrv_set_on_error(dinfo->bdrv, on_read_error, on_write_error);
-
-    /* disk I/O throttling */
-    if (throttle_enabled(&cfg)) {
-        bdrv_io_limits_enable(dinfo->bdrv);
-        bdrv_set_io_limits(dinfo->bdrv, &cfg);
-    }
-
     if (!file || !*file) {
         if (has_driver_specific_opts) {
             file = NULL;
@@ -509,7 +508,8 @@  static DriveInfo *blockdev_init(const char *file, QDict *bs_opts,
     bdrv_flags |= ro ? 0 : BDRV_O_RDWR;
 
     QINCREF(bs_opts);
-    ret = bdrv_open(&dinfo->bdrv, file, NULL, bs_opts, bdrv_flags, drv, &error);
+    ret = bdrv_open(&bs, file, NULL, bs_opts, bdrv_flags, drv, &error);
+    assert(bs == blk_bs(blk));
 
     if (ret < 0) {
         error_setg(errp, "could not open disk image %s: %s",
@@ -518,8 +518,9 @@  static DriveInfo *blockdev_init(const char *file, QDict *bs_opts,
         goto err;
     }
 
-    if (bdrv_key_required(dinfo->bdrv))
+    if (bdrv_key_required(bs)) {
         autostart = 0;
+    }
 
     QDECREF(bs_opts);
     qemu_opts_del(opts);
@@ -529,7 +530,6 @@  static DriveInfo *blockdev_init(const char *file, QDict *bs_opts,
 err:
     bdrv_unref(dinfo->bdrv);
     QTAILQ_REMOVE(&drives, dinfo, next);
-bdrv_new_err:
     g_free(dinfo->id);
     g_free(dinfo);
     blk_unref(blk);
@@ -1746,15 +1746,17 @@  void qmp_block_set_io_throttle(const char *device, int64_t bps, int64_t bps_rd,
 int do_drive_del(Monitor *mon, const QDict *qdict, QObject **ret_data)
 {
     const char *id = qdict_get_str(qdict, "id");
+    BlockBackend *blk;
     BlockDriverState *bs;
     AioContext *aio_context;
     Error *local_err = NULL;
 
-    bs = bdrv_find(id);
-    if (!bs) {
+    blk = blk_by_name(id);
+    if (!blk) {
         error_report("Device '%s' not found", id);
         return -1;
     }
+    bs = blk_bs(blk);
 
     aio_context = bdrv_get_aio_context(bs);
     aio_context_acquire(aio_context);
@@ -1777,8 +1779,9 @@  int do_drive_del(Monitor *mon, const QDict *qdict, QObject **ret_data)
      * then we can just get rid of the block driver state right here.
      */
     if (bdrv_get_attached_dev(bs)) {
+        blk_detach_bs(blk);
+        blk_unref(blk);
         bdrv_make_anon(bs);
-        blk_unref(blk_by_name(id));
         /* Further I/O must not pause the guest */
         bdrv_set_on_error(bs, BLOCKDEV_ON_ERROR_REPORT,
                           BLOCKDEV_ON_ERROR_REPORT);
diff --git a/hw/block/xen_disk.c b/hw/block/xen_disk.c
index 730a021..51f4f3a 100644
--- a/hw/block/xen_disk.c
+++ b/hw/block/xen_disk.c
@@ -858,15 +858,11 @@  static int blk_connect(struct XenDevice *xendev)
 
         /* setup via xenbus -> create new block driver instance */
         xen_be_printf(&blkdev->xendev, 2, "create new bdrv (xenbus setup)\n");
-        blk = blk_new(blkdev->dev, NULL);
+        blk = blk_new_with_bs(blkdev->dev, NULL);
         if (!blk) {
             return -1;
         }
-        blkdev->bs = bdrv_new_named(blkdev->dev, NULL);
-        if (!blkdev->bs) {
-            blk_unref(blk);
-            return -1;
-        }
+        blkdev->bs = blk_bs(blk);
 
         drv = bdrv_find_whitelisted_format(blkdev->fileproto, readonly);
         if (bdrv_open(&blkdev->bs, blkdev->filename, NULL, NULL, qflags,
diff --git a/include/block/block_int.h b/include/block/block_int.h
index 8a61215..a04c852 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -323,6 +323,8 @@  struct BlockDriverState {
     BlockDriver *drv; /* NULL means no media */
     void *opaque;
 
+    BlockBackend *blk;          /* owning backend, if any */
+
     void *dev;                  /* attached device model, if any */
     /* TODO change to DeviceState when all users are qdevified */
     const BlockDevOps *dev_ops;
diff --git a/include/sysemu/block-backend.h b/include/sysemu/block-backend.h
index 3f8371c..539b96f 100644
--- a/include/sysemu/block-backend.h
+++ b/include/sysemu/block-backend.h
@@ -17,10 +17,15 @@ 
 #include "qapi/error.h"
 
 BlockBackend *blk_new(const char *name, Error **errp);
+BlockBackend *blk_new_with_bs(const char *name, Error **errp);
 void blk_ref(BlockBackend *blk);
 void blk_unref(BlockBackend *blk);
 const char *blk_name(BlockBackend *blk);
 BlockBackend *blk_by_name(const char *name);
 BlockBackend *blk_next(BlockBackend *blk);
 
+void blk_attach_bs(BlockBackend *blk, BlockDriverState *bs);
+BlockDriverState *blk_detach_bs(BlockBackend *blk);
+BlockDriverState *blk_bs(BlockBackend *blk);
+
 #endif
diff --git a/qemu-img.c b/qemu-img.c
index bad3f64..9fba5a1 100644
--- a/qemu-img.c
+++ b/qemu-img.c
@@ -284,20 +284,19 @@  static int print_block_option_help(const char *filename, const char *fmt)
     return 0;
 }
 
-static BlockDriverState *bdrv_new_open(const char *id,
-                                       const char *filename,
-                                       const char *fmt,
-                                       int flags,
-                                       bool require_io,
-                                       bool quiet)
+static BlockBackend *img_open(const char *id, const char *filename,
+                              const char *fmt, int flags,
+                              bool require_io, bool quiet)
 {
+    BlockBackend *blk;
     BlockDriverState *bs;
     BlockDriver *drv;
     char password[256];
     Error *local_err = NULL;
     int ret;
 
-    bs = bdrv_new_named(id, &error_abort);
+    blk = blk_new_with_bs(id, &error_abort);
+    bs = blk_bs(blk);
 
     if (fmt) {
         drv = bdrv_find_format(fmt);
@@ -328,9 +327,10 @@  static BlockDriverState *bdrv_new_open(const char *id,
             goto fail;
         }
     }
-    return bs;
+    return blk;
 fail:
     bdrv_unref(bs);
+    blk_unref(blk);
     return NULL;
 }
 
@@ -651,11 +651,11 @@  static int img_check(int argc, char **argv)
         return 1;
     }
 
-    blk = blk_new("image", &error_abort);
-    bs = bdrv_new_open("image", filename, fmt, flags, true, quiet);
-    if (!bs) {
+    blk = img_open("image", filename, fmt, flags, true, quiet);
+    if (!blk) {
         return 1;
     }
+    bs = blk_bs(blk);
 
     check = g_new0(ImageCheck, 1);
     ret = collect_image_check(bs, check, filename, fmt, fix);
@@ -761,11 +761,12 @@  static int img_commit(int argc, char **argv)
         return 1;
     }
 
-    blk = blk_new("image", &error_abort);
-    bs = bdrv_new_open("image", filename, fmt, flags, true, quiet);
-    if (!bs) {
+    blk = img_open("image", filename, fmt, flags, true, quiet);
+    if (!blk) {
         return 1;
     }
+    bs = blk_bs(blk);
+
     ret = bdrv_commit(bs);
     switch(ret) {
     case 0:
@@ -1019,21 +1020,21 @@  static int img_compare(int argc, char **argv)
         goto out3;
     }
 
-    blk1 = blk_new("image 1", &error_abort);
-    bs1 = bdrv_new_open("image 1", filename1, fmt1, flags, true, quiet);
-    if (!bs1) {
+    blk1 = img_open("image 1", filename1, fmt1, flags, true, quiet);
+    if (!blk1) {
         error_report("Can't open file %s", filename1);
         ret = 2;
         goto out3;
     }
+    bs1 = blk_bs(blk1);
 
-    blk2 = blk_new("image 2", &error_abort);
-    bs2 = bdrv_new_open("image 2", filename2, fmt2, flags, true, quiet);
-    if (!bs2) {
+    blk2 = img_open("image 2", filename2, fmt2, flags, true, quiet);
+    if (!blk2) {
         error_report("Can't open file %s", filename2);
         ret = 2;
         goto out2;
     }
+    bs2 = blk_bs(blk2);
 
     buf1 = qemu_blockalign(bs1, IO_BUF_SIZE);
     buf2 = qemu_blockalign(bs2, IO_BUF_SIZE);
@@ -1375,15 +1376,15 @@  static int img_convert(int argc, char **argv)
     for (bs_i = 0; bs_i < bs_n; bs_i++) {
         char *id = bs_n > 1 ? g_strdup_printf("source %d", bs_i)
                             : g_strdup("source");
-        blk[bs_i] = blk_new(id, &error_abort);
-        bs[bs_i] = bdrv_new_open(id, argv[optind + bs_i], fmt, src_flags,
-                                 true, quiet);
+        blk[bs_i] = img_open(id, argv[optind + bs_i], fmt, src_flags,
+                             true, quiet);
         g_free(id);
-        if (!bs[bs_i]) {
+        if (!blk[bs_i]) {
             error_report("Could not open '%s'", argv[optind + bs_i]);
             ret = -1;
             goto out;
         }
+        bs[bs_i] = blk_bs(blk[bs_i]);
         bs_sectors[bs_i] = bdrv_nb_sectors(bs[bs_i]);
         if (bs_sectors[bs_i] < 0) {
             error_report("Could not get size of %s: %s",
@@ -1501,12 +1502,12 @@  static int img_convert(int argc, char **argv)
         goto out;
     }
 
-    out_blk = blk_new("target", &error_abort);
-    out_bs = bdrv_new_open("target", out_filename, out_fmt, flags, true, quiet);
-    if (!out_bs) {
+    out_blk = img_open("target", out_filename, out_fmt, flags, true, quiet);
+    if (!out_blk) {
         ret = -1;
         goto out;
     }
+    out_bs = blk_bs(out_blk);
 
     bs_i = 0;
     bs_offset = 0;
@@ -1893,12 +1894,12 @@  static ImageInfoList *collect_image_info_list(const char *filename,
         }
         g_hash_table_insert(filenames, (gpointer)filename, NULL);
 
-        blk = blk_new("image", &error_abort);
-        bs = bdrv_new_open("image", filename, fmt,
-                           BDRV_O_FLAGS | BDRV_O_NO_BACKING, false, false);
-        if (!bs) {
+        blk = img_open("image", filename, fmt,
+                       BDRV_O_FLAGS | BDRV_O_NO_BACKING, false, false);
+        if (!blk) {
             goto err;
         }
+        bs = blk_bs(blk);
 
         bdrv_query_image_info(bs, &info, &err);
         if (err) {
@@ -2158,11 +2159,11 @@  static int img_map(int argc, char **argv)
         return 1;
     }
 
-    blk = blk_new("image", &error_abort);
-    bs = bdrv_new_open("image", filename, fmt, BDRV_O_FLAGS, true, false);
-    if (!bs) {
+    blk = img_open("image", filename, fmt, BDRV_O_FLAGS, true, false);
+    if (!blk) {
         return 1;
     }
+    bs = blk_bs(blk);
 
     if (output_format == OFORMAT_HUMAN) {
         printf("%-16s%-16s%-16s%s\n", "Offset", "Length", "Mapped to", "File");
@@ -2281,11 +2282,11 @@  static int img_snapshot(int argc, char **argv)
     filename = argv[optind++];
 
     /* Open the image */
-    blk = blk_new("image", &error_abort);
-    bs = bdrv_new_open("image", filename, NULL, bdrv_oflags, true, quiet);
-    if (!bs) {
+    blk = img_open("image", filename, NULL, bdrv_oflags, true, quiet);
+    if (!blk) {
         return 1;
     }
+    bs = blk_bs(blk);
 
     /* Perform the requested action */
     switch(action) {
@@ -2427,12 +2428,12 @@  static int img_rebase(int argc, char **argv)
      * Ignore the old backing file for unsafe rebase in case we want to correct
      * the reference to a renamed or moved backing file.
      */
-    blk = blk_new("image", &error_abort);
-    bs = bdrv_new_open("image", filename, fmt, flags, true, quiet);
-    if (!bs) {
+    blk = img_open("image", filename, fmt, flags, true, quiet);
+    if (!blk) {
         ret = -1;
         goto out;
     }
+    bs = blk_bs(blk);
 
     /* Find the right drivers for the backing files */
     old_backing_drv = NULL;
@@ -2460,8 +2461,8 @@  static int img_rebase(int argc, char **argv)
     if (!unsafe) {
         char backing_name[1024];
 
-        blk_old_backing = blk_new("old_backing", &error_abort);
-        bs_old_backing = bdrv_new_named("old_backing", &error_abort);
+        blk_old_backing = blk_new_with_bs("old_backing", &error_abort);
+        bs_old_backing = blk_bs(blk_old_backing);
         bdrv_get_backing_filename(bs, backing_name, sizeof(backing_name));
         ret = bdrv_open(&bs_old_backing, backing_name, NULL, NULL, src_flags,
                         old_backing_drv, &local_err);
@@ -2472,8 +2473,8 @@  static int img_rebase(int argc, char **argv)
             goto out;
         }
         if (out_baseimg[0]) {
-            blk_new_backing = blk_new("new_backing", &error_abort);
-            bs_new_backing = bdrv_new_named("new_backing", &error_abort);
+            blk_new_backing = blk_new_with_bs("new_backing", &error_abort);
+            bs_new_backing = blk_bs(blk_new_backing);
             ret = bdrv_open(&bs_new_backing, out_baseimg, NULL, NULL, src_flags,
                             new_backing_drv, &local_err);
             if (ret) {
@@ -2749,13 +2750,13 @@  static int img_resize(int argc, char **argv)
     n = qemu_opt_get_size(param, BLOCK_OPT_SIZE, 0);
     qemu_opts_del(param);
 
-    blk = blk_new("image", &error_abort);
-    bs = bdrv_new_open("image", filename, fmt, BDRV_O_FLAGS | BDRV_O_RDWR,
-                       true, quiet);
-    if (!bs) {
+    blk = img_open("image", filename, fmt, BDRV_O_FLAGS | BDRV_O_RDWR,
+                   true, quiet);
+    if (!blk) {
         ret = -1;
         goto out;
     }
+    bs = blk_bs(blk);
 
     if (relative) {
         total_size = bdrv_getlength(bs) + n * relative;
@@ -2867,13 +2868,13 @@  static int img_amend(int argc, char **argv)
         goto out;
     }
 
-    blk = blk_new("image", &error_abort);
-    bs = bdrv_new_open("image", filename, fmt, flags, true, quiet);
-    if (!bs) {
+    blk = img_open("image", filename, fmt, flags, true, quiet);
+    if (!blk) {
         error_report("Could not open image '%s'", filename);
         ret = -1;
         goto out;
     }
+    bs = blk_bs(blk);
 
     fmt = bs->drv->format_name;
 
diff --git a/qemu-io.c b/qemu-io.c
index 45e5494..ef1d3ea 100644
--- a/qemu-io.c
+++ b/qemu-io.c
@@ -62,8 +62,8 @@  static int openfile(char *name, int flags, int growable, QDict *opts)
         return 1;
     }
 
-    qemuio_blk = blk_new("hda", &error_abort);
-    qemuio_bs = bdrv_new_named("hda", &error_abort);
+    qemuio_blk = blk_new_with_bs("hda", &error_abort);
+    qemuio_bs = blk_bs(qemuio_blk);
 
     if (growable) {
         flags |= BDRV_O_PROTOCOL;
diff --git a/qemu-nbd.c b/qemu-nbd.c
index 94b9b49..8eff588 100644
--- a/qemu-nbd.c
+++ b/qemu-nbd.c
@@ -687,8 +687,7 @@  int main(int argc, char **argv)
         drv = NULL;
     }
 
-    blk_new("hda", &error_abort);
-    bs = bdrv_new_named("hda", &error_abort);
+    bs = blk_bs(blk_new_with_bs("hda", &error_abort));
 
     srcpath = argv[optind];
     ret = bdrv_open(&bs, srcpath, NULL, NULL, flags, drv, &local_err);