diff mbox

[V18,11/12] quorum: Add quorum_open() and quorum_close().

Message ID 1392725487-18330-12-git-send-email-benoit.canet@irqsave.net
State New
Headers show

Commit Message

Benoît Canet Feb. 18, 2014, 12:11 p.m. UTC
From: Benoît Canet <benoit@irqsave.net>

Example of command line:

-drive if=virtio,driver=quorum,\
children.0.file.filename=1.raw,\
children.0.node-name=1.raw,\
children.0.driver=raw,\
children.1.file.filename=2.raw,\
children.1.node-name=2.raw,\
children.1.driver=raw,\
children.2.file.filename=3.raw,\
children.2.node-name=3.raw,\
children.2.driver=raw,\
vote-threshold=2

blkverify=on with vote-threshold=2 and two files can be passed to
emulate blkverify.

Signed-off-by: Benoit Canet <benoit@irqsave.net>
---
 block/quorum.c   | 161 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
 monitor.c        |   3 ++
 qapi-schema.json |  21 +++++++-
 3 files changed, 184 insertions(+), 1 deletion(-)

Comments

Leandro Dorileo Feb. 18, 2014, 5:37 p.m. UTC | #1
On Tue, Feb 18, 2014 at 01:11:26PM +0100, Beno??t Canet wrote:
> From: Beno??t Canet <benoit@irqsave.net>
> 
> Example of command line:
> 
> -drive if=virtio,driver=quorum,\
> children.0.file.filename=1.raw,\
> children.0.node-name=1.raw,\
> children.0.driver=raw,\
> children.1.file.filename=2.raw,\
> children.1.node-name=2.raw,\
> children.1.driver=raw,\
> children.2.file.filename=3.raw,\
> children.2.node-name=3.raw,\
> children.2.driver=raw,\
> vote-threshold=2
> 
> blkverify=on with vote-threshold=2 and two files can be passed to
> emulate blkverify.
> 
> Signed-off-by: Benoit Canet <benoit@irqsave.net>
> ---
>  block/quorum.c   | 161 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
>  monitor.c        |   3 ++
>  qapi-schema.json |  21 +++++++-
>  3 files changed, 184 insertions(+), 1 deletion(-)
> 
> diff --git a/block/quorum.c b/block/quorum.c
> index 40832c0..18721ba 100644
> --- a/block/quorum.c
> +++ b/block/quorum.c
> @@ -20,6 +20,9 @@
>  
>  #define HASH_LENGTH 32
>  
> +#define QUORUM_OPT_VOTE_THRESHOLD "vote-threshold"
> +#define QUORUM_OPT_BLKVERIFY      "blkverify"
> +
>  /* This union holds a vote hash value */
>  typedef union QuorumVoteValue {
>      char h[HASH_LENGTH];       /* SHA-256 hash */
> @@ -672,12 +675,170 @@ static bool quorum_recurse_is_first_non_filter(BlockDriverState *bs,
>      return false;
>  }
>  
> +static int quorum_valid_threshold(int threshold, int num_children, Error **errp)
> +{
> +
> +    if (threshold < 1) {
> +        error_set(errp, QERR_INVALID_PARAMETER_VALUE,
> +                  "vote-threshold", "value >= 1");
> +        return -ERANGE;
> +    }
> +
> +    if (threshold > num_children) {
> +        error_setg(errp, "threshold may not exceed children count");
> +        return -ERANGE;
> +    }
> +
> +    return 0;
> +}
> +
> +static QemuOptsList quorum_runtime_opts = {
> +    .name = "quorum",
> +    .head = QTAILQ_HEAD_INITIALIZER(quorum_runtime_opts.head),
> +    .desc = {
> +        {
> +            .name = QUORUM_OPT_VOTE_THRESHOLD,
> +            .type = QEMU_OPT_NUMBER,
> +            .help = "The number of vote needed for reaching quorum",
> +        },
> +        {
> +            .name = QUORUM_OPT_BLKVERIFY,
> +            .type = QEMU_OPT_BOOL,
> +            .help = "Trigger block verify mode if set",
> +        },
> +        { /* end of list */ }
> +    },
> +};
> +
> +static int quorum_open(BlockDriverState *bs, QDict *options, int flags,
> +                       Error **errp)
> +{
> +    BDRVQuorumState *s = bs->opaque;
> +    Error *local_err = NULL;
> +    QemuOpts *opts;
> +    bool *opened;
> +    QDict *sub = NULL;
> +    QList *list = NULL;
> +    const QListEntry *lentry;
> +    const QDictEntry *dentry;
> +    int i;
> +    int ret = 0;
> +
> +    qdict_flatten(options);
> +    qdict_extract_subqdict(options, &sub, "children.");
> +    qdict_array_split(sub, &list);
> +
> +    /* count how many different children are present and validate
> +     * qdict_size(sub) address the open by reference case
> +     */
> +    s->num_children = !qlist_size(list) ? qdict_size(sub) : qlist_size(list);
> +    if (s->num_children < 2) {
> +        error_setg(&local_err,
> +                   "Number of provided children must be greater than 1");
> +        ret = -EINVAL;
> +        goto exit;
> +    }
> +
> +    opts = qemu_opts_create(&quorum_runtime_opts, NULL, 0, &error_abort);
> +    qemu_opts_absorb_qdict(opts, options, &local_err);
> +    if (error_is_set(&local_err)) {
> +        ret = -EINVAL;
> +        goto exit;
> +    }
> +
> +    s->threshold = qemu_opt_get_number(opts, QUORUM_OPT_VOTE_THRESHOLD, 0);
> +
> +    /* and validate it against s->num_children */
> +    ret = quorum_valid_threshold(s->threshold, s->num_children, &local_err);
> +    if (ret < 0) {
> +        goto exit;
> +    }
> +
> +    /* is the driver in blkverify mode */
> +    if (qemu_opt_get_bool(opts, QUORUM_OPT_BLKVERIFY, false) &&
> +        s->num_children == 2 && s->threshold == 2) {
> +        s->is_blkverify = true;
> +    } else if (qemu_opt_get_bool(opts, QUORUM_OPT_BLKVERIFY, false)) {
> +        fprintf(stderr, "blkverify mode is set by setting blkverify=on "
> +                "and using two files with vote_threshold=2\n");
> +    }
> +
> +    /* allocate the children BlockDriverState array */
> +    s->bs = g_new0(BlockDriverState *, s->num_children);
> +    opened = g_new0(bool, s->num_children);
> +
> +    /* Open by file name or options dict (command line or QMP) */
> +    if (s->num_children == qlist_size(list)) {
> +        for (i = 0, lentry = qlist_first(list); lentry;
> +            lentry = qlist_next(lentry), i++) {
> +            QDict *d = qobject_to_qdict(lentry->value);
> +            QINCREF(d);
> +            ret = bdrv_open(&s->bs[i], NULL, NULL, d, flags, NULL, &local_err);


Shouldn't this bdrv_open call be?

ret = bdrv_open(s->bs[i], NULL, d, flags, NULL, &local_err);


> +            if (ret < 0) {
> +                goto close_exit;
> +            }
> +            opened[i] = true;
> +        }
> +    /* Open by QMP references */
> +    } else {
> +        for (i = 0, dentry = qdict_first(sub); dentry;
> +             dentry = qdict_next(sub, dentry), i++) {
> +            QString *string = qobject_to_qstring(dentry->value);
> +            ret = bdrv_open(&s->bs[i], NULL, qstring_get_str(string), NULL,
> +                            flags, NULL, &local_err);


This other bdrv_open() call seems to be not right as well, I think it should be:

            ret = bdrv_open(s->bs[i], qstring_get_str(string), NULL,
                            flags, NULL, &local_err);



> +            if (ret < 0) {
> +                goto close_exit;
> +            }
> +            opened[i] = true;
> +        }
> +    }
> +
> +    g_free(opened);
> +    goto exit;
> +
> +close_exit:
> +    /* cleanup on error */
> +    for (i = 0; i < s->num_children; i++) {
> +        if (!opened[i]) {
> +            continue;
> +        }
> +        bdrv_unref(s->bs[i]);
> +    }
> +    g_free(s->bs);
> +    g_free(opened);
> +exit:
> +    /* propagate error */
> +    if (error_is_set(&local_err)) {
> +        error_propagate(errp, local_err);
> +    }
> +    QDECREF(list);
> +    QDECREF(sub);
> +    return ret;
> +}
> +
> +static void quorum_close(BlockDriverState *bs)
> +{
> +    BDRVQuorumState *s = bs->opaque;
> +    int i;
> +
> +    for (i = 0; i < s->num_children; i++) {
> +        bdrv_unref(s->bs[i]);
> +    }
> +
> +    g_free(s->bs);
> +}
> +
>  static BlockDriver bdrv_quorum = {
>      .format_name        = "quorum",
>      .protocol_name      = "quorum",
>  
>      .instance_size      = sizeof(BDRVQuorumState),
>  
> +    .bdrv_file_open     = quorum_open,
> +    .bdrv_close         = quorum_close,
> +
> +    .authorizations     = { true, true },
> +
>      .bdrv_co_flush_to_disk = quorum_co_flush,
>  
>      .bdrv_getlength     = quorum_getlength,
> diff --git a/monitor.c b/monitor.c
> index 81ffa0f..ed5bb98 100644
> --- a/monitor.c
> +++ b/monitor.c
> @@ -639,6 +639,9 @@ static void monitor_protocol_event_init(void)
>      monitor_protocol_event_throttle(QEVENT_RTC_CHANGE, 1000);
>      monitor_protocol_event_throttle(QEVENT_BALLOON_CHANGE, 1000);
>      monitor_protocol_event_throttle(QEVENT_WATCHDOG, 1000);
> +    /* limit the rate of quorum events to avoid hammering the management */
> +    monitor_protocol_event_throttle(QEVENT_QUORUM_REPORT_BAD, 1000);
> +    monitor_protocol_event_throttle(QEVENT_QUORUM_FAILURE, 1000);
>  }
>  
>  /**
> diff --git a/qapi-schema.json b/qapi-schema.json
> index 7cfb5e5..990d0c5 100644
> --- a/qapi-schema.json
> +++ b/qapi-schema.json
> @@ -4352,6 +4352,24 @@
>              'raw': 'BlockdevRef' } }
>  
>  ##
> +# @BlockdevOptionsQuorum
> +#
> +# Driver specific block device options for Quorum
> +#
> +# @blkverify:      #optional true if the driver must print content mismatch
> +#
> +# @children:       the children block device to use
> +#
> +# @vote_threshold: the vote limit under which a read will fail
> +#
> +# Since: 2.0
> +##
> +{ 'type': 'BlockdevOptionsQuorum',
> +  'data': { '*blkverify': 'bool',
> +            'children': [ 'BlockdevRef' ],
> +            'vote-threshold': 'int' } }
> +
> +##
>  # @BlockdevOptions
>  #
>  # Options for creating a block device.
> @@ -4390,7 +4408,8 @@
>        'vdi':        'BlockdevOptionsGenericFormat',
>        'vhdx':       'BlockdevOptionsGenericFormat',
>        'vmdk':       'BlockdevOptionsGenericCOWFormat',
> -      'vpc':        'BlockdevOptionsGenericFormat'
> +      'vpc':        'BlockdevOptionsGenericFormat',
> +      'quorum':     'BlockdevOptionsQuorum'
>    } }
>  
>  ##
> -- 
> 1.8.3.2
> 
>
Benoît Canet Feb. 18, 2014, 5:48 p.m. UTC | #2
The Tuesday 18 Feb 2014 à 17:37:08 (+0000), Leandro Dorileo wrote :
> On Tue, Feb 18, 2014 at 01:11:26PM +0100, Beno??t Canet wrote:
> > From: Beno??t Canet <benoit@irqsave.net>
> > 
> > Example of command line:
> > 
> > -drive if=virtio,driver=quorum,\
> > children.0.file.filename=1.raw,\
> > children.0.node-name=1.raw,\
> > children.0.driver=raw,\
> > children.1.file.filename=2.raw,\
> > children.1.node-name=2.raw,\
> > children.1.driver=raw,\
> > children.2.file.filename=3.raw,\
> > children.2.node-name=3.raw,\
> > children.2.driver=raw,\
> > vote-threshold=2
> > 
> > blkverify=on with vote-threshold=2 and two files can be passed to
> > emulate blkverify.
> > 
> > Signed-off-by: Benoit Canet <benoit@irqsave.net>
> > ---
> >  block/quorum.c   | 161 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
> >  monitor.c        |   3 ++
> >  qapi-schema.json |  21 +++++++-
> >  3 files changed, 184 insertions(+), 1 deletion(-)
> > 
> > diff --git a/block/quorum.c b/block/quorum.c
> > index 40832c0..18721ba 100644
> > --- a/block/quorum.c
> > +++ b/block/quorum.c
> > @@ -20,6 +20,9 @@
> >  
> >  #define HASH_LENGTH 32
> >  
> > +#define QUORUM_OPT_VOTE_THRESHOLD "vote-threshold"
> > +#define QUORUM_OPT_BLKVERIFY      "blkverify"
> > +
> >  /* This union holds a vote hash value */
> >  typedef union QuorumVoteValue {
> >      char h[HASH_LENGTH];       /* SHA-256 hash */
> > @@ -672,12 +675,170 @@ static bool quorum_recurse_is_first_non_filter(BlockDriverState *bs,
> >      return false;
> >  }
> >  
> > +static int quorum_valid_threshold(int threshold, int num_children, Error **errp)
> > +{
> > +
> > +    if (threshold < 1) {
> > +        error_set(errp, QERR_INVALID_PARAMETER_VALUE,
> > +                  "vote-threshold", "value >= 1");
> > +        return -ERANGE;
> > +    }
> > +
> > +    if (threshold > num_children) {
> > +        error_setg(errp, "threshold may not exceed children count");
> > +        return -ERANGE;
> > +    }
> > +
> > +    return 0;
> > +}
> > +
> > +static QemuOptsList quorum_runtime_opts = {
> > +    .name = "quorum",
> > +    .head = QTAILQ_HEAD_INITIALIZER(quorum_runtime_opts.head),
> > +    .desc = {
> > +        {
> > +            .name = QUORUM_OPT_VOTE_THRESHOLD,
> > +            .type = QEMU_OPT_NUMBER,
> > +            .help = "The number of vote needed for reaching quorum",
> > +        },
> > +        {
> > +            .name = QUORUM_OPT_BLKVERIFY,
> > +            .type = QEMU_OPT_BOOL,
> > +            .help = "Trigger block verify mode if set",
> > +        },
> > +        { /* end of list */ }
> > +    },
> > +};
> > +
> > +static int quorum_open(BlockDriverState *bs, QDict *options, int flags,
> > +                       Error **errp)
> > +{
> > +    BDRVQuorumState *s = bs->opaque;
> > +    Error *local_err = NULL;
> > +    QemuOpts *opts;
> > +    bool *opened;
> > +    QDict *sub = NULL;
> > +    QList *list = NULL;
> > +    const QListEntry *lentry;
> > +    const QDictEntry *dentry;
> > +    int i;
> > +    int ret = 0;
> > +
> > +    qdict_flatten(options);
> > +    qdict_extract_subqdict(options, &sub, "children.");
> > +    qdict_array_split(sub, &list);
> > +
> > +    /* count how many different children are present and validate
> > +     * qdict_size(sub) address the open by reference case
> > +     */
> > +    s->num_children = !qlist_size(list) ? qdict_size(sub) : qlist_size(list);
> > +    if (s->num_children < 2) {
> > +        error_setg(&local_err,
> > +                   "Number of provided children must be greater than 1");
> > +        ret = -EINVAL;
> > +        goto exit;
> > +    }
> > +
> > +    opts = qemu_opts_create(&quorum_runtime_opts, NULL, 0, &error_abort);
> > +    qemu_opts_absorb_qdict(opts, options, &local_err);
> > +    if (error_is_set(&local_err)) {
> > +        ret = -EINVAL;
> > +        goto exit;
> > +    }
> > +
> > +    s->threshold = qemu_opt_get_number(opts, QUORUM_OPT_VOTE_THRESHOLD, 0);
> > +
> > +    /* and validate it against s->num_children */
> > +    ret = quorum_valid_threshold(s->threshold, s->num_children, &local_err);
> > +    if (ret < 0) {
> > +        goto exit;
> > +    }
> > +
> > +    /* is the driver in blkverify mode */
> > +    if (qemu_opt_get_bool(opts, QUORUM_OPT_BLKVERIFY, false) &&
> > +        s->num_children == 2 && s->threshold == 2) {
> > +        s->is_blkverify = true;
> > +    } else if (qemu_opt_get_bool(opts, QUORUM_OPT_BLKVERIFY, false)) {
> > +        fprintf(stderr, "blkverify mode is set by setting blkverify=on "
> > +                "and using two files with vote_threshold=2\n");
> > +    }
> > +
> > +    /* allocate the children BlockDriverState array */
> > +    s->bs = g_new0(BlockDriverState *, s->num_children);
> > +    opened = g_new0(bool, s->num_children);
> > +
> > +    /* Open by file name or options dict (command line or QMP) */
> > +    if (s->num_children == qlist_size(list)) {
> > +        for (i = 0, lentry = qlist_first(list); lentry;
> > +            lentry = qlist_next(lentry), i++) {
> > +            QDict *d = qobject_to_qdict(lentry->value);
> > +            QINCREF(d);
> > +            ret = bdrv_open(&s->bs[i], NULL, NULL, d, flags, NULL, &local_err);
> 
> 
> Shouldn't this bdrv_open call be?
> 
> ret = bdrv_open(s->bs[i], NULL, d, flags, NULL, &local_err);
> 
> 
> > +            if (ret < 0) {
> > +                goto close_exit;
> > +            }
> > +            opened[i] = true;
> > +        }
> > +    /* Open by QMP references */
> > +    } else {
> > +        for (i = 0, dentry = qdict_first(sub); dentry;
> > +             dentry = qdict_next(sub, dentry), i++) {
> > +            QString *string = qobject_to_qstring(dentry->value);
> > +            ret = bdrv_open(&s->bs[i], NULL, qstring_get_str(string), NULL,
> > +                            flags, NULL, &local_err);
> 
> 
> This other bdrv_open() call seems to be not right as well, I think it should be:
> 
>             ret = bdrv_open(s->bs[i], qstring_get_str(string), NULL,
>                             flags, NULL, &local_err);

These calls use the new syntax of bdrv_open based on the
"[PATCH v4 0/8] block: Integrate bdrv_file_open() into bdrv_open()" series
by Max Reitz.

Best regards

Benoît

> 
> 
> 
> > +            if (ret < 0) {
> > +                goto close_exit;
> > +            }
> > +            opened[i] = true;
> > +        }
> > +    }
> > +
> > +    g_free(opened);
> > +    goto exit;
> > +
> > +close_exit:
> > +    /* cleanup on error */
> > +    for (i = 0; i < s->num_children; i++) {
> > +        if (!opened[i]) {
> > +            continue;
> > +        }
> > +        bdrv_unref(s->bs[i]);
> > +    }
> > +    g_free(s->bs);
> > +    g_free(opened);
> > +exit:
> > +    /* propagate error */
> > +    if (error_is_set(&local_err)) {
> > +        error_propagate(errp, local_err);
> > +    }
> > +    QDECREF(list);
> > +    QDECREF(sub);
> > +    return ret;
> > +}
> > +
> > +static void quorum_close(BlockDriverState *bs)
> > +{
> > +    BDRVQuorumState *s = bs->opaque;
> > +    int i;
> > +
> > +    for (i = 0; i < s->num_children; i++) {
> > +        bdrv_unref(s->bs[i]);
> > +    }
> > +
> > +    g_free(s->bs);
> > +}
> > +
> >  static BlockDriver bdrv_quorum = {
> >      .format_name        = "quorum",
> >      .protocol_name      = "quorum",
> >  
> >      .instance_size      = sizeof(BDRVQuorumState),
> >  
> > +    .bdrv_file_open     = quorum_open,
> > +    .bdrv_close         = quorum_close,
> > +
> > +    .authorizations     = { true, true },
> > +
> >      .bdrv_co_flush_to_disk = quorum_co_flush,
> >  
> >      .bdrv_getlength     = quorum_getlength,
> > diff --git a/monitor.c b/monitor.c
> > index 81ffa0f..ed5bb98 100644
> > --- a/monitor.c
> > +++ b/monitor.c
> > @@ -639,6 +639,9 @@ static void monitor_protocol_event_init(void)
> >      monitor_protocol_event_throttle(QEVENT_RTC_CHANGE, 1000);
> >      monitor_protocol_event_throttle(QEVENT_BALLOON_CHANGE, 1000);
> >      monitor_protocol_event_throttle(QEVENT_WATCHDOG, 1000);
> > +    /* limit the rate of quorum events to avoid hammering the management */
> > +    monitor_protocol_event_throttle(QEVENT_QUORUM_REPORT_BAD, 1000);
> > +    monitor_protocol_event_throttle(QEVENT_QUORUM_FAILURE, 1000);
> >  }
> >  
> >  /**
> > diff --git a/qapi-schema.json b/qapi-schema.json
> > index 7cfb5e5..990d0c5 100644
> > --- a/qapi-schema.json
> > +++ b/qapi-schema.json
> > @@ -4352,6 +4352,24 @@
> >              'raw': 'BlockdevRef' } }
> >  
> >  ##
> > +# @BlockdevOptionsQuorum
> > +#
> > +# Driver specific block device options for Quorum
> > +#
> > +# @blkverify:      #optional true if the driver must print content mismatch
> > +#
> > +# @children:       the children block device to use
> > +#
> > +# @vote_threshold: the vote limit under which a read will fail
> > +#
> > +# Since: 2.0
> > +##
> > +{ 'type': 'BlockdevOptionsQuorum',
> > +  'data': { '*blkverify': 'bool',
> > +            'children': [ 'BlockdevRef' ],
> > +            'vote-threshold': 'int' } }
> > +
> > +##
> >  # @BlockdevOptions
> >  #
> >  # Options for creating a block device.
> > @@ -4390,7 +4408,8 @@
> >        'vdi':        'BlockdevOptionsGenericFormat',
> >        'vhdx':       'BlockdevOptionsGenericFormat',
> >        'vmdk':       'BlockdevOptionsGenericCOWFormat',
> > -      'vpc':        'BlockdevOptionsGenericFormat'
> > +      'vpc':        'BlockdevOptionsGenericFormat',
> > +      'quorum':     'BlockdevOptionsQuorum'
> >    } }
> >  
> >  ##
> > -- 
> > 1.8.3.2
> > 
> > 
> 
> -- 
> Leandro Dorileo
Max Reitz Feb. 18, 2014, 5:49 p.m. UTC | #3
On 18.02.2014 18:37, Leandro Dorileo wrote:
> On Tue, Feb 18, 2014 at 01:11:26PM +0100, Beno??t Canet wrote:
>> From: Beno??t Canet <benoit@irqsave.net>
>>
>> Example of command line:
>>
>> -drive if=virtio,driver=quorum,\
>> children.0.file.filename=1.raw,\
>> children.0.node-name=1.raw,\
>> children.0.driver=raw,\
>> children.1.file.filename=2.raw,\
>> children.1.node-name=2.raw,\
>> children.1.driver=raw,\
>> children.2.file.filename=3.raw,\
>> children.2.node-name=3.raw,\
>> children.2.driver=raw,\
>> vote-threshold=2
>>
>> blkverify=on with vote-threshold=2 and two files can be passed to
>> emulate blkverify.
>>
>> Signed-off-by: Benoit Canet <benoit@irqsave.net>
>> ---
>>   block/quorum.c   | 161 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
>>   monitor.c        |   3 ++
>>   qapi-schema.json |  21 +++++++-
>>   3 files changed, 184 insertions(+), 1 deletion(-)
>>
>> diff --git a/block/quorum.c b/block/quorum.c
>> index 40832c0..18721ba 100644
>> --- a/block/quorum.c
>> +++ b/block/quorum.c
>> @@ -20,6 +20,9 @@
>>   
>>   #define HASH_LENGTH 32
>>   
>> +#define QUORUM_OPT_VOTE_THRESHOLD "vote-threshold"
>> +#define QUORUM_OPT_BLKVERIFY      "blkverify"
>> +
>>   /* This union holds a vote hash value */
>>   typedef union QuorumVoteValue {
>>       char h[HASH_LENGTH];       /* SHA-256 hash */
>> @@ -672,12 +675,170 @@ static bool quorum_recurse_is_first_non_filter(BlockDriverState *bs,
>>       return false;
>>   }
>>   
>> +static int quorum_valid_threshold(int threshold, int num_children, Error **errp)
>> +{
>> +
>> +    if (threshold < 1) {
>> +        error_set(errp, QERR_INVALID_PARAMETER_VALUE,
>> +                  "vote-threshold", "value >= 1");
>> +        return -ERANGE;
>> +    }
>> +
>> +    if (threshold > num_children) {
>> +        error_setg(errp, "threshold may not exceed children count");
>> +        return -ERANGE;
>> +    }
>> +
>> +    return 0;
>> +}
>> +
>> +static QemuOptsList quorum_runtime_opts = {
>> +    .name = "quorum",
>> +    .head = QTAILQ_HEAD_INITIALIZER(quorum_runtime_opts.head),
>> +    .desc = {
>> +        {
>> +            .name = QUORUM_OPT_VOTE_THRESHOLD,
>> +            .type = QEMU_OPT_NUMBER,
>> +            .help = "The number of vote needed for reaching quorum",
>> +        },
>> +        {
>> +            .name = QUORUM_OPT_BLKVERIFY,
>> +            .type = QEMU_OPT_BOOL,
>> +            .help = "Trigger block verify mode if set",
>> +        },
>> +        { /* end of list */ }
>> +    },
>> +};
>> +
>> +static int quorum_open(BlockDriverState *bs, QDict *options, int flags,
>> +                       Error **errp)
>> +{
>> +    BDRVQuorumState *s = bs->opaque;
>> +    Error *local_err = NULL;
>> +    QemuOpts *opts;
>> +    bool *opened;
>> +    QDict *sub = NULL;
>> +    QList *list = NULL;
>> +    const QListEntry *lentry;
>> +    const QDictEntry *dentry;
>> +    int i;
>> +    int ret = 0;
>> +
>> +    qdict_flatten(options);
>> +    qdict_extract_subqdict(options, &sub, "children.");
>> +    qdict_array_split(sub, &list);
>> +
>> +    /* count how many different children are present and validate
>> +     * qdict_size(sub) address the open by reference case
>> +     */
>> +    s->num_children = !qlist_size(list) ? qdict_size(sub) : qlist_size(list);
>> +    if (s->num_children < 2) {
>> +        error_setg(&local_err,
>> +                   "Number of provided children must be greater than 1");
>> +        ret = -EINVAL;
>> +        goto exit;
>> +    }
>> +
>> +    opts = qemu_opts_create(&quorum_runtime_opts, NULL, 0, &error_abort);
>> +    qemu_opts_absorb_qdict(opts, options, &local_err);
>> +    if (error_is_set(&local_err)) {
>> +        ret = -EINVAL;
>> +        goto exit;
>> +    }
>> +
>> +    s->threshold = qemu_opt_get_number(opts, QUORUM_OPT_VOTE_THRESHOLD, 0);
>> +
>> +    /* and validate it against s->num_children */
>> +    ret = quorum_valid_threshold(s->threshold, s->num_children, &local_err);
>> +    if (ret < 0) {
>> +        goto exit;
>> +    }
>> +
>> +    /* is the driver in blkverify mode */
>> +    if (qemu_opt_get_bool(opts, QUORUM_OPT_BLKVERIFY, false) &&
>> +        s->num_children == 2 && s->threshold == 2) {
>> +        s->is_blkverify = true;
>> +    } else if (qemu_opt_get_bool(opts, QUORUM_OPT_BLKVERIFY, false)) {
>> +        fprintf(stderr, "blkverify mode is set by setting blkverify=on "
>> +                "and using two files with vote_threshold=2\n");
>> +    }
>> +
>> +    /* allocate the children BlockDriverState array */
>> +    s->bs = g_new0(BlockDriverState *, s->num_children);
>> +    opened = g_new0(bool, s->num_children);
>> +
>> +    /* Open by file name or options dict (command line or QMP) */
>> +    if (s->num_children == qlist_size(list)) {
>> +        for (i = 0, lentry = qlist_first(list); lentry;
>> +            lentry = qlist_next(lentry), i++) {
>> +            QDict *d = qobject_to_qdict(lentry->value);
>> +            QINCREF(d);
>> +            ret = bdrv_open(&s->bs[i], NULL, NULL, d, flags, NULL, &local_err);
>
> Shouldn't this bdrv_open call be?
>
> ret = bdrv_open(s->bs[i], NULL, d, flags, NULL, &local_err);

This series is meant to be applied on my bdrv_open()/bdrv_file_open() 
series which added a new "reference" string parameter to bdrv_open() 
(after the filename). Giving NULL for that parameter is correct here.

>> +            if (ret < 0) {
>> +                goto close_exit;
>> +            }
>> +            opened[i] = true;
>> +        }
>> +    /* Open by QMP references */
>> +    } else {
>> +        for (i = 0, dentry = qdict_first(sub); dentry;
>> +             dentry = qdict_next(sub, dentry), i++) {
>> +            QString *string = qobject_to_qstring(dentry->value);
>> +            ret = bdrv_open(&s->bs[i], NULL, qstring_get_str(string), NULL,
>> +                            flags, NULL, &local_err);
>
> This other bdrv_open() call seems to be not right as well, I think it should be:
>
>              ret = bdrv_open(s->bs[i], qstring_get_str(string), NULL,
>                              flags, NULL, &local_err);

Here, the string is given as that new reference parameter. Therefore, 
this is correct as well (when applied on top of my series).

Max

>> +            if (ret < 0) {
>> +                goto close_exit;
>> +            }
>> +            opened[i] = true;
>> +        }
>> +    }
>> +
>> +    g_free(opened);
>> +    goto exit;
>> +
>> +close_exit:
>> +    /* cleanup on error */
>> +    for (i = 0; i < s->num_children; i++) {
>> +        if (!opened[i]) {
>> +            continue;
>> +        }
>> +        bdrv_unref(s->bs[i]);
>> +    }
>> +    g_free(s->bs);
>> +    g_free(opened);
>> +exit:
>> +    /* propagate error */
>> +    if (error_is_set(&local_err)) {
>> +        error_propagate(errp, local_err);
>> +    }
>> +    QDECREF(list);
>> +    QDECREF(sub);
>> +    return ret;
>> +}
>> +
>> +static void quorum_close(BlockDriverState *bs)
>> +{
>> +    BDRVQuorumState *s = bs->opaque;
>> +    int i;
>> +
>> +    for (i = 0; i < s->num_children; i++) {
>> +        bdrv_unref(s->bs[i]);
>> +    }
>> +
>> +    g_free(s->bs);
>> +}
>> +
>>   static BlockDriver bdrv_quorum = {
>>       .format_name        = "quorum",
>>       .protocol_name      = "quorum",
>>   
>>       .instance_size      = sizeof(BDRVQuorumState),
>>   
>> +    .bdrv_file_open     = quorum_open,
>> +    .bdrv_close         = quorum_close,
>> +
>> +    .authorizations     = { true, true },
>> +
>>       .bdrv_co_flush_to_disk = quorum_co_flush,
>>   
>>       .bdrv_getlength     = quorum_getlength,
>> diff --git a/monitor.c b/monitor.c
>> index 81ffa0f..ed5bb98 100644
>> --- a/monitor.c
>> +++ b/monitor.c
>> @@ -639,6 +639,9 @@ static void monitor_protocol_event_init(void)
>>       monitor_protocol_event_throttle(QEVENT_RTC_CHANGE, 1000);
>>       monitor_protocol_event_throttle(QEVENT_BALLOON_CHANGE, 1000);
>>       monitor_protocol_event_throttle(QEVENT_WATCHDOG, 1000);
>> +    /* limit the rate of quorum events to avoid hammering the management */
>> +    monitor_protocol_event_throttle(QEVENT_QUORUM_REPORT_BAD, 1000);
>> +    monitor_protocol_event_throttle(QEVENT_QUORUM_FAILURE, 1000);
>>   }
>>   
>>   /**
>> diff --git a/qapi-schema.json b/qapi-schema.json
>> index 7cfb5e5..990d0c5 100644
>> --- a/qapi-schema.json
>> +++ b/qapi-schema.json
>> @@ -4352,6 +4352,24 @@
>>               'raw': 'BlockdevRef' } }
>>   
>>   ##
>> +# @BlockdevOptionsQuorum
>> +#
>> +# Driver specific block device options for Quorum
>> +#
>> +# @blkverify:      #optional true if the driver must print content mismatch
>> +#
>> +# @children:       the children block device to use
>> +#
>> +# @vote_threshold: the vote limit under which a read will fail
>> +#
>> +# Since: 2.0
>> +##
>> +{ 'type': 'BlockdevOptionsQuorum',
>> +  'data': { '*blkverify': 'bool',
>> +            'children': [ 'BlockdevRef' ],
>> +            'vote-threshold': 'int' } }
>> +
>> +##
>>   # @BlockdevOptions
>>   #
>>   # Options for creating a block device.
>> @@ -4390,7 +4408,8 @@
>>         'vdi':        'BlockdevOptionsGenericFormat',
>>         'vhdx':       'BlockdevOptionsGenericFormat',
>>         'vmdk':       'BlockdevOptionsGenericCOWFormat',
>> -      'vpc':        'BlockdevOptionsGenericFormat'
>> +      'vpc':        'BlockdevOptionsGenericFormat',
>> +      'quorum':     'BlockdevOptionsQuorum'
>>     } }
>>   
>>   ##
>> -- 
>> 1.8.3.2
>>
>>
Max Reitz Feb. 18, 2014, 6:12 p.m. UTC | #4
On 18.02.2014 13:11, Benoît Canet wrote:
> From: Benoît Canet <benoit@irqsave.net>
>
> Example of command line:
>
> -drive if=virtio,driver=quorum,\
> children.0.file.filename=1.raw,\
> children.0.node-name=1.raw,\
> children.0.driver=raw,\
> children.1.file.filename=2.raw,\
> children.1.node-name=2.raw,\
> children.1.driver=raw,\
> children.2.file.filename=3.raw,\
> children.2.node-name=3.raw,\
> children.2.driver=raw,\
> vote-threshold=2
>
> blkverify=on with vote-threshold=2 and two files can be passed to
> emulate blkverify.
>
> Signed-off-by: Benoit Canet <benoit@irqsave.net>
> ---
>   block/quorum.c   | 161 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
>   monitor.c        |   3 ++
>   qapi-schema.json |  21 +++++++-
>   3 files changed, 184 insertions(+), 1 deletion(-)

Reviewed-by: Max Reitz <mreitz@redhat.com>
Leandro Dorileo Feb. 18, 2014, 7:39 p.m. UTC | #5
On Tue, Feb 18, 2014 at 06:48:27PM +0100, Benoît Canet wrote:
> The Tuesday 18 Feb 2014 à 17:37:08 (+0000), Leandro Dorileo wrote :
> > On Tue, Feb 18, 2014 at 01:11:26PM +0100, Beno??t Canet wrote:
> > > From: Beno??t Canet <benoit@irqsave.net>
> > > 
> > > Example of command line:
> > > 
> > > -drive if=virtio,driver=quorum,\
> > > children.0.file.filename=1.raw,\
> > > children.0.node-name=1.raw,\
> > > children.0.driver=raw,\
> > > children.1.file.filename=2.raw,\
> > > children.1.node-name=2.raw,\
> > > children.1.driver=raw,\
> > > children.2.file.filename=3.raw,\
> > > children.2.node-name=3.raw,\
> > > children.2.driver=raw,\
> > > vote-threshold=2
> > > 
> > > blkverify=on with vote-threshold=2 and two files can be passed to
> > > emulate blkverify.
> > > 
> > > Signed-off-by: Benoit Canet <benoit@irqsave.net>
> > > ---
> > >  block/quorum.c   | 161 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
> > >  monitor.c        |   3 ++
> > >  qapi-schema.json |  21 +++++++-
> > >  3 files changed, 184 insertions(+), 1 deletion(-)
> > > 
> > > diff --git a/block/quorum.c b/block/quorum.c
> > > index 40832c0..18721ba 100644
> > > --- a/block/quorum.c
> > > +++ b/block/quorum.c
> > > @@ -20,6 +20,9 @@
> > >  
> > >  #define HASH_LENGTH 32
> > >  
> > > +#define QUORUM_OPT_VOTE_THRESHOLD "vote-threshold"
> > > +#define QUORUM_OPT_BLKVERIFY      "blkverify"
> > > +
> > >  /* This union holds a vote hash value */
> > >  typedef union QuorumVoteValue {
> > >      char h[HASH_LENGTH];       /* SHA-256 hash */
> > > @@ -672,12 +675,170 @@ static bool quorum_recurse_is_first_non_filter(BlockDriverState *bs,
> > >      return false;
> > >  }
> > >  
> > > +static int quorum_valid_threshold(int threshold, int num_children, Error **errp)
> > > +{
> > > +
> > > +    if (threshold < 1) {
> > > +        error_set(errp, QERR_INVALID_PARAMETER_VALUE,
> > > +                  "vote-threshold", "value >= 1");
> > > +        return -ERANGE;
> > > +    }
> > > +
> > > +    if (threshold > num_children) {
> > > +        error_setg(errp, "threshold may not exceed children count");
> > > +        return -ERANGE;
> > > +    }
> > > +
> > > +    return 0;
> > > +}
> > > +
> > > +static QemuOptsList quorum_runtime_opts = {
> > > +    .name = "quorum",
> > > +    .head = QTAILQ_HEAD_INITIALIZER(quorum_runtime_opts.head),
> > > +    .desc = {
> > > +        {
> > > +            .name = QUORUM_OPT_VOTE_THRESHOLD,
> > > +            .type = QEMU_OPT_NUMBER,
> > > +            .help = "The number of vote needed for reaching quorum",
> > > +        },
> > > +        {
> > > +            .name = QUORUM_OPT_BLKVERIFY,
> > > +            .type = QEMU_OPT_BOOL,
> > > +            .help = "Trigger block verify mode if set",
> > > +        },
> > > +        { /* end of list */ }
> > > +    },
> > > +};
> > > +
> > > +static int quorum_open(BlockDriverState *bs, QDict *options, int flags,
> > > +                       Error **errp)
> > > +{
> > > +    BDRVQuorumState *s = bs->opaque;
> > > +    Error *local_err = NULL;
> > > +    QemuOpts *opts;
> > > +    bool *opened;
> > > +    QDict *sub = NULL;
> > > +    QList *list = NULL;
> > > +    const QListEntry *lentry;
> > > +    const QDictEntry *dentry;
> > > +    int i;
> > > +    int ret = 0;
> > > +
> > > +    qdict_flatten(options);
> > > +    qdict_extract_subqdict(options, &sub, "children.");
> > > +    qdict_array_split(sub, &list);
> > > +
> > > +    /* count how many different children are present and validate
> > > +     * qdict_size(sub) address the open by reference case
> > > +     */
> > > +    s->num_children = !qlist_size(list) ? qdict_size(sub) : qlist_size(list);
> > > +    if (s->num_children < 2) {
> > > +        error_setg(&local_err,
> > > +                   "Number of provided children must be greater than 1");
> > > +        ret = -EINVAL;
> > > +        goto exit;
> > > +    }
> > > +
> > > +    opts = qemu_opts_create(&quorum_runtime_opts, NULL, 0, &error_abort);
> > > +    qemu_opts_absorb_qdict(opts, options, &local_err);
> > > +    if (error_is_set(&local_err)) {
> > > +        ret = -EINVAL;
> > > +        goto exit;
> > > +    }
> > > +
> > > +    s->threshold = qemu_opt_get_number(opts, QUORUM_OPT_VOTE_THRESHOLD, 0);
> > > +
> > > +    /* and validate it against s->num_children */
> > > +    ret = quorum_valid_threshold(s->threshold, s->num_children, &local_err);
> > > +    if (ret < 0) {
> > > +        goto exit;
> > > +    }
> > > +
> > > +    /* is the driver in blkverify mode */
> > > +    if (qemu_opt_get_bool(opts, QUORUM_OPT_BLKVERIFY, false) &&
> > > +        s->num_children == 2 && s->threshold == 2) {
> > > +        s->is_blkverify = true;
> > > +    } else if (qemu_opt_get_bool(opts, QUORUM_OPT_BLKVERIFY, false)) {
> > > +        fprintf(stderr, "blkverify mode is set by setting blkverify=on "
> > > +                "and using two files with vote_threshold=2\n");
> > > +    }
> > > +
> > > +    /* allocate the children BlockDriverState array */
> > > +    s->bs = g_new0(BlockDriverState *, s->num_children);
> > > +    opened = g_new0(bool, s->num_children);
> > > +
> > > +    /* Open by file name or options dict (command line or QMP) */
> > > +    if (s->num_children == qlist_size(list)) {
> > > +        for (i = 0, lentry = qlist_first(list); lentry;
> > > +            lentry = qlist_next(lentry), i++) {
> > > +            QDict *d = qobject_to_qdict(lentry->value);
> > > +            QINCREF(d);
> > > +            ret = bdrv_open(&s->bs[i], NULL, NULL, d, flags, NULL, &local_err);
> > 
> > 
> > Shouldn't this bdrv_open call be?
> > 
> > ret = bdrv_open(s->bs[i], NULL, d, flags, NULL, &local_err);
> > 
> > 
> > > +            if (ret < 0) {
> > > +                goto close_exit;
> > > +            }
> > > +            opened[i] = true;
> > > +        }
> > > +    /* Open by QMP references */
> > > +    } else {
> > > +        for (i = 0, dentry = qdict_first(sub); dentry;
> > > +             dentry = qdict_next(sub, dentry), i++) {
> > > +            QString *string = qobject_to_qstring(dentry->value);
> > > +            ret = bdrv_open(&s->bs[i], NULL, qstring_get_str(string), NULL,
> > > +                            flags, NULL, &local_err);
> > 
> > 
> > This other bdrv_open() call seems to be not right as well, I think it should be:
> > 
> >             ret = bdrv_open(s->bs[i], qstring_get_str(string), NULL,
> >                             flags, NULL, &local_err);
> 
> These calls use the new syntax of bdrv_open based on the
> "[PATCH v4 0/8] block: Integrate bdrv_file_open() into bdrv_open()" series
> by Max Reitz.
> 


Yeah, I've missed that one :)


> Best regards
> 
> Benoît
> 
> > 
> > 
> > 
> > > +            if (ret < 0) {
> > > +                goto close_exit;
> > > +            }
> > > +            opened[i] = true;
> > > +        }
> > > +    }
> > > +
> > > +    g_free(opened);
> > > +    goto exit;
> > > +
> > > +close_exit:
> > > +    /* cleanup on error */
> > > +    for (i = 0; i < s->num_children; i++) {
> > > +        if (!opened[i]) {
> > > +            continue;
> > > +        }
> > > +        bdrv_unref(s->bs[i]);
> > > +    }
> > > +    g_free(s->bs);
> > > +    g_free(opened);
> > > +exit:
> > > +    /* propagate error */
> > > +    if (error_is_set(&local_err)) {
> > > +        error_propagate(errp, local_err);
> > > +    }
> > > +    QDECREF(list);
> > > +    QDECREF(sub);
> > > +    return ret;
> > > +}
> > > +
> > > +static void quorum_close(BlockDriverState *bs)
> > > +{
> > > +    BDRVQuorumState *s = bs->opaque;
> > > +    int i;
> > > +
> > > +    for (i = 0; i < s->num_children; i++) {
> > > +        bdrv_unref(s->bs[i]);
> > > +    }
> > > +
> > > +    g_free(s->bs);
> > > +}
> > > +
> > >  static BlockDriver bdrv_quorum = {
> > >      .format_name        = "quorum",
> > >      .protocol_name      = "quorum",
> > >  
> > >      .instance_size      = sizeof(BDRVQuorumState),
> > >  
> > > +    .bdrv_file_open     = quorum_open,
> > > +    .bdrv_close         = quorum_close,
> > > +
> > > +    .authorizations     = { true, true },
> > > +
> > >      .bdrv_co_flush_to_disk = quorum_co_flush,
> > >  
> > >      .bdrv_getlength     = quorum_getlength,
> > > diff --git a/monitor.c b/monitor.c
> > > index 81ffa0f..ed5bb98 100644
> > > --- a/monitor.c
> > > +++ b/monitor.c
> > > @@ -639,6 +639,9 @@ static void monitor_protocol_event_init(void)
> > >      monitor_protocol_event_throttle(QEVENT_RTC_CHANGE, 1000);
> > >      monitor_protocol_event_throttle(QEVENT_BALLOON_CHANGE, 1000);
> > >      monitor_protocol_event_throttle(QEVENT_WATCHDOG, 1000);
> > > +    /* limit the rate of quorum events to avoid hammering the management */
> > > +    monitor_protocol_event_throttle(QEVENT_QUORUM_REPORT_BAD, 1000);
> > > +    monitor_protocol_event_throttle(QEVENT_QUORUM_FAILURE, 1000);
> > >  }
> > >  
> > >  /**
> > > diff --git a/qapi-schema.json b/qapi-schema.json
> > > index 7cfb5e5..990d0c5 100644
> > > --- a/qapi-schema.json
> > > +++ b/qapi-schema.json
> > > @@ -4352,6 +4352,24 @@
> > >              'raw': 'BlockdevRef' } }
> > >  
> > >  ##
> > > +# @BlockdevOptionsQuorum
> > > +#
> > > +# Driver specific block device options for Quorum
> > > +#
> > > +# @blkverify:      #optional true if the driver must print content mismatch
> > > +#
> > > +# @children:       the children block device to use
> > > +#
> > > +# @vote_threshold: the vote limit under which a read will fail
> > > +#
> > > +# Since: 2.0
> > > +##
> > > +{ 'type': 'BlockdevOptionsQuorum',
> > > +  'data': { '*blkverify': 'bool',
> > > +            'children': [ 'BlockdevRef' ],
> > > +            'vote-threshold': 'int' } }
> > > +
> > > +##
> > >  # @BlockdevOptions
> > >  #
> > >  # Options for creating a block device.
> > > @@ -4390,7 +4408,8 @@
> > >        'vdi':        'BlockdevOptionsGenericFormat',
> > >        'vhdx':       'BlockdevOptionsGenericFormat',
> > >        'vmdk':       'BlockdevOptionsGenericCOWFormat',
> > > -      'vpc':        'BlockdevOptionsGenericFormat'
> > > +      'vpc':        'BlockdevOptionsGenericFormat',
> > > +      'quorum':     'BlockdevOptionsQuorum'
> > >    } }
> > >  
> > >  ##
> > > -- 
> > > 1.8.3.2
> > > 
> > > 
> > 
> > -- 
> > Leandro Dorileo
diff mbox

Patch

diff --git a/block/quorum.c b/block/quorum.c
index 40832c0..18721ba 100644
--- a/block/quorum.c
+++ b/block/quorum.c
@@ -20,6 +20,9 @@ 
 
 #define HASH_LENGTH 32
 
+#define QUORUM_OPT_VOTE_THRESHOLD "vote-threshold"
+#define QUORUM_OPT_BLKVERIFY      "blkverify"
+
 /* This union holds a vote hash value */
 typedef union QuorumVoteValue {
     char h[HASH_LENGTH];       /* SHA-256 hash */
@@ -672,12 +675,170 @@  static bool quorum_recurse_is_first_non_filter(BlockDriverState *bs,
     return false;
 }
 
+static int quorum_valid_threshold(int threshold, int num_children, Error **errp)
+{
+
+    if (threshold < 1) {
+        error_set(errp, QERR_INVALID_PARAMETER_VALUE,
+                  "vote-threshold", "value >= 1");
+        return -ERANGE;
+    }
+
+    if (threshold > num_children) {
+        error_setg(errp, "threshold may not exceed children count");
+        return -ERANGE;
+    }
+
+    return 0;
+}
+
+static QemuOptsList quorum_runtime_opts = {
+    .name = "quorum",
+    .head = QTAILQ_HEAD_INITIALIZER(quorum_runtime_opts.head),
+    .desc = {
+        {
+            .name = QUORUM_OPT_VOTE_THRESHOLD,
+            .type = QEMU_OPT_NUMBER,
+            .help = "The number of vote needed for reaching quorum",
+        },
+        {
+            .name = QUORUM_OPT_BLKVERIFY,
+            .type = QEMU_OPT_BOOL,
+            .help = "Trigger block verify mode if set",
+        },
+        { /* end of list */ }
+    },
+};
+
+static int quorum_open(BlockDriverState *bs, QDict *options, int flags,
+                       Error **errp)
+{
+    BDRVQuorumState *s = bs->opaque;
+    Error *local_err = NULL;
+    QemuOpts *opts;
+    bool *opened;
+    QDict *sub = NULL;
+    QList *list = NULL;
+    const QListEntry *lentry;
+    const QDictEntry *dentry;
+    int i;
+    int ret = 0;
+
+    qdict_flatten(options);
+    qdict_extract_subqdict(options, &sub, "children.");
+    qdict_array_split(sub, &list);
+
+    /* count how many different children are present and validate
+     * qdict_size(sub) address the open by reference case
+     */
+    s->num_children = !qlist_size(list) ? qdict_size(sub) : qlist_size(list);
+    if (s->num_children < 2) {
+        error_setg(&local_err,
+                   "Number of provided children must be greater than 1");
+        ret = -EINVAL;
+        goto exit;
+    }
+
+    opts = qemu_opts_create(&quorum_runtime_opts, NULL, 0, &error_abort);
+    qemu_opts_absorb_qdict(opts, options, &local_err);
+    if (error_is_set(&local_err)) {
+        ret = -EINVAL;
+        goto exit;
+    }
+
+    s->threshold = qemu_opt_get_number(opts, QUORUM_OPT_VOTE_THRESHOLD, 0);
+
+    /* and validate it against s->num_children */
+    ret = quorum_valid_threshold(s->threshold, s->num_children, &local_err);
+    if (ret < 0) {
+        goto exit;
+    }
+
+    /* is the driver in blkverify mode */
+    if (qemu_opt_get_bool(opts, QUORUM_OPT_BLKVERIFY, false) &&
+        s->num_children == 2 && s->threshold == 2) {
+        s->is_blkverify = true;
+    } else if (qemu_opt_get_bool(opts, QUORUM_OPT_BLKVERIFY, false)) {
+        fprintf(stderr, "blkverify mode is set by setting blkverify=on "
+                "and using two files with vote_threshold=2\n");
+    }
+
+    /* allocate the children BlockDriverState array */
+    s->bs = g_new0(BlockDriverState *, s->num_children);
+    opened = g_new0(bool, s->num_children);
+
+    /* Open by file name or options dict (command line or QMP) */
+    if (s->num_children == qlist_size(list)) {
+        for (i = 0, lentry = qlist_first(list); lentry;
+            lentry = qlist_next(lentry), i++) {
+            QDict *d = qobject_to_qdict(lentry->value);
+            QINCREF(d);
+            ret = bdrv_open(&s->bs[i], NULL, NULL, d, flags, NULL, &local_err);
+            if (ret < 0) {
+                goto close_exit;
+            }
+            opened[i] = true;
+        }
+    /* Open by QMP references */
+    } else {
+        for (i = 0, dentry = qdict_first(sub); dentry;
+             dentry = qdict_next(sub, dentry), i++) {
+            QString *string = qobject_to_qstring(dentry->value);
+            ret = bdrv_open(&s->bs[i], NULL, qstring_get_str(string), NULL,
+                            flags, NULL, &local_err);
+            if (ret < 0) {
+                goto close_exit;
+            }
+            opened[i] = true;
+        }
+    }
+
+    g_free(opened);
+    goto exit;
+
+close_exit:
+    /* cleanup on error */
+    for (i = 0; i < s->num_children; i++) {
+        if (!opened[i]) {
+            continue;
+        }
+        bdrv_unref(s->bs[i]);
+    }
+    g_free(s->bs);
+    g_free(opened);
+exit:
+    /* propagate error */
+    if (error_is_set(&local_err)) {
+        error_propagate(errp, local_err);
+    }
+    QDECREF(list);
+    QDECREF(sub);
+    return ret;
+}
+
+static void quorum_close(BlockDriverState *bs)
+{
+    BDRVQuorumState *s = bs->opaque;
+    int i;
+
+    for (i = 0; i < s->num_children; i++) {
+        bdrv_unref(s->bs[i]);
+    }
+
+    g_free(s->bs);
+}
+
 static BlockDriver bdrv_quorum = {
     .format_name        = "quorum",
     .protocol_name      = "quorum",
 
     .instance_size      = sizeof(BDRVQuorumState),
 
+    .bdrv_file_open     = quorum_open,
+    .bdrv_close         = quorum_close,
+
+    .authorizations     = { true, true },
+
     .bdrv_co_flush_to_disk = quorum_co_flush,
 
     .bdrv_getlength     = quorum_getlength,
diff --git a/monitor.c b/monitor.c
index 81ffa0f..ed5bb98 100644
--- a/monitor.c
+++ b/monitor.c
@@ -639,6 +639,9 @@  static void monitor_protocol_event_init(void)
     monitor_protocol_event_throttle(QEVENT_RTC_CHANGE, 1000);
     monitor_protocol_event_throttle(QEVENT_BALLOON_CHANGE, 1000);
     monitor_protocol_event_throttle(QEVENT_WATCHDOG, 1000);
+    /* limit the rate of quorum events to avoid hammering the management */
+    monitor_protocol_event_throttle(QEVENT_QUORUM_REPORT_BAD, 1000);
+    monitor_protocol_event_throttle(QEVENT_QUORUM_FAILURE, 1000);
 }
 
 /**
diff --git a/qapi-schema.json b/qapi-schema.json
index 7cfb5e5..990d0c5 100644
--- a/qapi-schema.json
+++ b/qapi-schema.json
@@ -4352,6 +4352,24 @@ 
             'raw': 'BlockdevRef' } }
 
 ##
+# @BlockdevOptionsQuorum
+#
+# Driver specific block device options for Quorum
+#
+# @blkverify:      #optional true if the driver must print content mismatch
+#
+# @children:       the children block device to use
+#
+# @vote_threshold: the vote limit under which a read will fail
+#
+# Since: 2.0
+##
+{ 'type': 'BlockdevOptionsQuorum',
+  'data': { '*blkverify': 'bool',
+            'children': [ 'BlockdevRef' ],
+            'vote-threshold': 'int' } }
+
+##
 # @BlockdevOptions
 #
 # Options for creating a block device.
@@ -4390,7 +4408,8 @@ 
       'vdi':        'BlockdevOptionsGenericFormat',
       'vhdx':       'BlockdevOptionsGenericFormat',
       'vmdk':       'BlockdevOptionsGenericCOWFormat',
-      'vpc':        'BlockdevOptionsGenericFormat'
+      'vpc':        'BlockdevOptionsGenericFormat',
+      'quorum':     'BlockdevOptionsQuorum'
   } }
 
 ##