diff mbox

[V10,12/13] quorum: Add quorum_open() and quorum_close().

Message ID 1390927974-31325-13-git-send-email-benoit.canet@irqsave.net
State New
Headers show

Commit Message

Benoît Canet Jan. 28, 2014, 4:52 p.m. UTC
From: Benoît Canet <benoit@irqsave.net>

Example of command line:
-drive if=virtio,file.driver=quorum,\
file.children.0.file.filename=1.raw,\
file.children.0.node-name=1.raw,\
file.children.0.driver=raw,\
file.children.1.file.filename=2.raw,\
file.children.1.node-name=2.raw,\
file.children.1.driver=raw,\
file.children.2.file.filename=3.raw,\
file.children.2.node-name=3.raw,\
file.children.2.driver=raw,\
file.vote_threshold=2

file.blkverify=on with file.vote_threshold=2 and two files can be passed to
emulated blkverify.

Signed-off-by: Benoit Canet <benoit@irqsave.net>
---
 block/quorum.c   | 308 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
 qapi-schema.json |  21 +++-
 2 files changed, 328 insertions(+), 1 deletion(-)

Comments

Max Reitz Feb. 2, 2014, 11:09 p.m. UTC | #1
On 28.01.2014 17:52, Benoît Canet wrote:
> From: Benoît Canet <benoit@irqsave.net>
>
> Example of command line:
> -drive if=virtio,file.driver=quorum,\
> file.children.0.file.filename=1.raw,\
> file.children.0.node-name=1.raw,\
> file.children.0.driver=raw,\
> file.children.1.file.filename=2.raw,\
> file.children.1.node-name=2.raw,\
> file.children.1.driver=raw,\
> file.children.2.file.filename=3.raw,\
> file.children.2.node-name=3.raw,\
> file.children.2.driver=raw,\
> file.vote_threshold=2
>
> file.blkverify=on with file.vote_threshold=2 and two files can be passed to
> emulated blkverify.
>
> Signed-off-by: Benoit Canet <benoit@irqsave.net>
> ---
>   block/quorum.c   | 308 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
>   qapi-schema.json |  21 +++-
>   2 files changed, 328 insertions(+), 1 deletion(-)
>
> diff --git a/block/quorum.c b/block/quorum.c
> index e7b2090..0c0d630 100644
> --- a/block/quorum.c
> +++ b/block/quorum.c
> @@ -17,8 +17,12 @@
>   #include <gnutls/crypto.h>
>   #include "block/block_int.h"
>   #include "qapi/qmp/qjson.h"
> +#include "qapi/qmp/types.h"
> +#include "qemu-common.h"
>   
>   #define HASH_LENGTH 32
> +#define KEY_PREFIX "children."
> +#define KEY_FILENAME_SUFFIX ".file.filename"
>   
>   /* This union holds a vote hash value */
>   typedef union QuorumVoteValue {
> @@ -702,12 +706,316 @@ static bool quorum_recurse_is_first_non_filter(BlockDriverState *bs,
>       return false;
>   }

Perhaps you could make use of qdict_extract_subqdict() and 
qdict_array_split() functions here...? That is, 
qdict_extract_subqdict(..., "children.") and then qdict_array_split() on 
the result?

> +static int quorum_match_key(const char *key,
> +                            char **key_prefix)
> +{
> +    const char *start;
> +    char *next;
> +    unsigned long long idx;
> +    int ret;
> +
> +    *key_prefix = NULL;
> +
> +    /* the following code is a translation of the following pseudo code:
> +     *       match = key.match('(^children\.(\d+)\.)$suffix)
> +     *       if not match:
> +     *           return -1;
> +     *       key_prefix = match.outer_match()
> +     *       idx = match.inner_match()
> +     *
> +     * note: we also match the .file suffix to avoid futher checkings
> +     */
> +
> +    /* this key is not a child */
> +    if (strncmp(key, KEY_PREFIX, strlen(KEY_PREFIX))) {
> +        return -1;
> +    }
> +
> +    /* take first char after prefix */
> +    start = key + strlen(KEY_PREFIX);
> +
> +    /* if the string end here -> scan fail */
> +    if (start[0] == '\0') {
> +        return -1;
> +    }
> +
> +    /* try to retrieve the index */
> +    ret = parse_uint(start, &idx, &next, 10);
> +
> +    /* no int found -> scan fail */
> +    if (ret < 0) {
> +        return -1;
> +    }
> +
> +    /* we are taking a reference via QMP */
> +    if (next - key == strlen(key)) {

"if (!*next)" would probably accomplish the same (or "if next[0] == 
'\0')" to keep your coding style) without having to call strlen().

> +        *key_prefix = g_strdup(key);
> +        return idx;
> +    }
> +
> +    /* match the suffix to avoid matching the same idx
> +     * multiple times and be required to do more checks later
> +     */
> +    if (strncmp(next, KEY_FILENAME_SUFFIX, strlen(KEY_FILENAME_SUFFIX))) {

As stated in my review from October, you may use sizeof() to avoid strlen().

> +        return -1;
> +    }
> +
> +    /* do not include '.' */
> +    int len = next - key;
> +    *key_prefix = g_strndup(key, len);
> +
> +    return idx;
> +}
> +
> +static QDict *quorum_get_children_idx(const QDict *options)
> +{
> +    const QDictEntry *ent;
> +    QDict *result;
> +    char *key_prefix;
> +    int idx;
> +
> +    result = qdict_new();
> +
> +    for (ent = qdict_first(options); ent; ent = qdict_next(options, ent)) {
> +        const char *key = qdict_entry_key(ent);
> +        idx = quorum_match_key(key,
> +                               &key_prefix);
> +
> +        /* if the result zero or positive we got a key */
> +        if (idx < 0) {
> +            continue;
> +        }
> +
> +        qdict_put(result, key_prefix, qint_from_int(idx));
> +    }
> +
> +    return result;
> +}
> +
> +static int quorum_fill_validation_array(bool *array,
> +                                        const QDict *dict,
> +                                        int total,
> +                                        Error **errp)
> +{
> +    const QDictEntry *ent;
> +
> +    /* fill the checking array with children indexes */
> +    for (ent = qdict_first(dict); ent; ent = qdict_next(dict, ent)) {
> +        const char *key = qdict_entry_key(ent);
> +        int idx = qdict_get_int(dict, key);
> +
> +        if (idx < 0 || idx >= total) {
> +            error_setg(errp,
> +                "Children index must be between 0 and children count -1");

Quote from October: I'd prefer "- 1" instead of "-1" (or even "Child 
index must be an unsigned integer smaller than the children count").

> +            return -ERANGE;
> +        }
> +
> +        array[idx] = true;
> +    }
> +
> +    return 0;
> +}
> +
> +static int quorum_valid_indexes(const bool *array, int total, Error **errp)
> +{
> +    int i;
> +
> +    for (i = 0; i < total; i++) {
> +        if (array[i] == true) {
> +            continue;
> +        }
> +
> +        error_setg(errp,
> +            "All child indexes between 0 and children count -1  must be "
> +            " used");

Again, I'd prefer "- 1"; then, there are two spaces to the left of 
"must" and two spaces after "be".

> +        return -ERANGE;
> +    }
> +
> +    return 0;
> +}
> +
> +static int quorum_valid_children_indexes(const QDict *dict,
> +                                         int total,
> +                                         Error **errp)
> +{
> +    bool *array;
> +    int ret = 0;;
> +
> +    /* allocate indexes checking array and put false in it */
> +    array = g_new0(bool, total);
> +
> +    ret = quorum_fill_validation_array(array, dict, total, errp);
> +    if (ret < 0) {
> +        goto free_exit;
> +    }
> +
> +    ret = quorum_valid_indexes(array, total, errp);
> +free_exit:
> +    g_free(array);
> +    return ret;
> +}
> +
> +static int quorum_valid_threshold(int threshold,
> +                                  int total,
> +                                  Error **errp)
> +{
> +
> +    if (threshold < 1) {
> +        error_set(errp, QERR_INVALID_PARAMETER_VALUE,
> +                  "vote-threshold", "value >= 1");
> +        return -ERANGE;
> +    }
> +
> +    if (threshold > total) {
> +        error_setg(errp, "threshold <= children count must be true");

Quote from October: Well, while this might technically be true, I'd 
rather go for "threshold may not exceed children count" instead. ;-)

> +        return -ERANGE;
> +    }
> +
> +    return 0;
> +}
> +
> +static int quorum_open(BlockDriverState *bs,
> +                       QDict *options,
> +                       int flags,
> +                       Error **errp)
> +{
> +    BDRVQuorumState *s = bs->opaque;
> +    Error *local_err = NULL;
> +    const QDictEntry *ent;
> +    QDict *idx_dict;
> +    bool *opened;
> +    const char *value;
> +    char *next;
> +    int i;
> +    int ret = 0;
> +
> +    /* get a dict of children indexes for validation */
> +    idx_dict = quorum_get_children_idx(options);
> +
> +    /* count how many different children indexes are present and validate */
> +    s->total = qdict_size(idx_dict);
> +    if (s->total < 2) {
> +        error_setg(&local_err,
> +                   "Number of provided children must be greater than 1");
> +        ret = -EINVAL;
> +        goto exit;
> +    }
> +
> +    /* validate that the set of index is coherent */
> +    ret = quorum_valid_children_indexes(idx_dict, s->total, &local_err);
> +    if (ret < 0) {
> +        goto exit;
> +    }
> +
> +    ret = qdict_get_try_int(options, "vote-threshold", -1);
> +    /* from QMP */
> +    if (ret != -1) {
> +        qdict_del(options, "vote-threshold");
> +        s->threshold = ret;
> +    /* from command line */
> +    } else {
> +        /* retrieve the threshold option from the command line */
> +        value = qdict_get_try_str(options, "vote_threshold");
> +        if (!value) {
> +            error_setg(&local_err,
> +                       "vote_threshold must be provided");
> +            ret = -EINVAL;
> +            goto exit;
> +        }
> +        qdict_del(options, "vote_threshold");
> +
> +        ret = parse_uint(value, (unsigned long long *) &s->threshold, &next, 10);

I don't like this cast at all...

> +
> +        /* no int found -> scan fail */
> +        if (ret < 0) {
> +            error_setg(&local_err,
> +                       "invalid voter_threshold specified");

*vote_threshold

> +            ret = -EINVAL;
> +            goto exit;
> +        }
> +    }
> +
> +    /* and validate it againts s->total */

Still *against

> +    ret = quorum_valid_threshold(s->threshold, s->total, &local_err);
> +    if (ret < 0) {
> +        goto exit;
> +    }
> +
> +    /* is the driver in blkverify mode */
> +    value = qdict_get_try_str(options, "blkverify");
> +    if (value && !strcmp(value, "on")  &&
> +        s->total == 2 && s->threshold == 2) {
> +        s->is_blkverify = true;
> +    }

Quote from October: If the user specifies anything different from "on" 
or if he does and s->total or s->threshold is not 2, quorum will 
silently work in non-blkverify mode without telling the user. So I'd 
emit a message here if blkverify has been specified and its value is 
different from "off" but s->is_blkverify remains false (i.e., “else if 
(value && strcmp(value, "off")) { fprintf(stderr, "[Some warning]"); }”).

> +    qdict_del(options, "blkverify");
> +
> +    /* allocate the children BlockDriverState array */
> +    s->bs = g_new0(BlockDriverState *, s->total);
> +    opened = g_new0(bool, s->total);
> +
> +    /* open children bs */
> +    for (ent = qdict_first(idx_dict);
> +         ent; ent = qdict_next(idx_dict, ent)) {

This condition fits on a single line.

> +        const char *key = qdict_entry_key(ent);
> +        int idx = qdict_get_int(idx_dict, key);
> +        ret = bdrv_open_image(&s->bs[idx],
> +                              NULL,
> +                              options,
> +                              key,
> +                              flags,
> +                              false,
> +                              &local_err);

This takes two, but definitely not seven.

> +        if (ret < 0) {
> +            goto close_exit;
> +        }
> +        opened[idx] = true;
> +    }
> +
> +    g_free(opened);
> +    goto exit;
> +
> +close_exit:
> +    /* cleanup on error */
> +    for (i = 0; i < s->total; i++) {
> +        if (!opened[i]) {
> +            continue;
> +        }
> +        bdrv_close(s->bs[i]);

You'll probably want bdrv_unref() here.

> +    }
> +    g_free(s->bs);
> +    g_free(opened);
> +exit:
> +    /* propagate error */
> +    if (error_is_set(&local_err)) {
> +        error_propagate(errp, local_err);
> +    }
> +    return ret;
> +}
> +
> +static void quorum_close(BlockDriverState *bs)
> +{
> +    BDRVQuorumState *s = bs->opaque;
> +    int i;
> +
> +    for (i = 0; i < s->total; i++) {
> +        /* Ensure writes reach stable storage */
> +        bdrv_flush(s->bs[i]);
> +        /* close manually because we'll free s->bs */
> +        bdrv_close(s->bs[i]);

Again, bdrv_unref().

> +    }
> +
> +    g_free(s->bs);
> +}
> +
>   static BlockDriver bdrv_quorum = {
>       .format_name        = "quorum",
>       .protocol_name      = "quorum",
>   
>       .instance_size      = sizeof(BDRVQuorumState),
>   
> +    .bdrv_file_open     = quorum_open,
> +    .bdrv_close         = quorum_close,
> +
>       .bdrv_co_flush_to_disk = quorum_co_flush,
>   
>       .bdrv_getlength     = quorum_getlength,
> diff --git a/qapi-schema.json b/qapi-schema.json
> index 05ced9d..903a3a0 100644
> --- a/qapi-schema.json
> +++ b/qapi-schema.json
> @@ -4352,6 +4352,24 @@
>               'raw': 'BlockdevRef' } }
>   
>   ##
> +# @BlockdevOptionsQuorum
> +#
> +# Driver specific block device options for Quorum
> +#
> +# @blkverify:      #optional true if the driver must print content mismatch
> +#
> +# @children:       the children block device to use
> +#
> +# @vote_threshold: the vote limit under which a read will fail
> +#
> +# Since: 2.0
> +##
> +{ 'type': 'BlockdevOptionsQuorum',
> +  'data': { '*blkverify': 'bool',
> +            'children': [ 'BlockdevRef' ],
> +            'vote-threshold': 'int' } }
> +
> +##
>   # @BlockdevOptions
>   #
>   # Options for creating a block device.
> @@ -4389,7 +4407,8 @@
>         'vdi':        'BlockdevOptionsGenericFormat',
>         'vhdx':       'BlockdevOptionsGenericFormat',
>         'vmdk':       'BlockdevOptionsGenericCOWFormat',
> -      'vpc':        'BlockdevOptionsGenericFormat'
> +      'vpc':        'BlockdevOptionsGenericFormat',
> +      'quorum':     'BlockdevOptionsQuorum'
>     } }
>   
>   ##

As I've said before, I'd really like it if you could abandon all those 
functions for parsing the incoming options QDict in favor of 
qdict_extract_subqdict() and qdict_array_split() which will most likely 
do the job just fine; and, in fact, I have written qdict_array_split() 
with Quorum in mind. ;-)

Max
Benoît Canet Feb. 3, 2014, 12:21 p.m. UTC | #2
Le Monday 03 Feb 2014 à 00:09:28 (+0100), Max Reitz a écrit :
> On 28.01.2014 17:52, Benoît Canet wrote:
> >From: Benoît Canet <benoit@irqsave.net>
> >
> >Example of command line:
> >-drive if=virtio,file.driver=quorum,\
> >file.children.0.file.filename=1.raw,\
> >file.children.0.node-name=1.raw,\
> >file.children.0.driver=raw,\
> >file.children.1.file.filename=2.raw,\
> >file.children.1.node-name=2.raw,\
> >file.children.1.driver=raw,\
> >file.children.2.file.filename=3.raw,\
> >file.children.2.node-name=3.raw,\
> >file.children.2.driver=raw,\
> >file.vote_threshold=2
> >
> >file.blkverify=on with file.vote_threshold=2 and two files can be passed to
> >emulated blkverify.
> >
> >Signed-off-by: Benoit Canet <benoit@irqsave.net>
> >---
> >  block/quorum.c   | 308 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
> >  qapi-schema.json |  21 +++-
> >  2 files changed, 328 insertions(+), 1 deletion(-)
> >
> >diff --git a/block/quorum.c b/block/quorum.c
> >index e7b2090..0c0d630 100644
> >--- a/block/quorum.c
> >+++ b/block/quorum.c
> >@@ -17,8 +17,12 @@
> >  #include <gnutls/crypto.h>
> >  #include "block/block_int.h"
> >  #include "qapi/qmp/qjson.h"
> >+#include "qapi/qmp/types.h"
> >+#include "qemu-common.h"
> >  #define HASH_LENGTH 32
> >+#define KEY_PREFIX "children."
> >+#define KEY_FILENAME_SUFFIX ".file.filename"
> >  /* This union holds a vote hash value */
> >  typedef union QuorumVoteValue {
> >@@ -702,12 +706,316 @@ static bool quorum_recurse_is_first_non_filter(BlockDriverState *bs,
> >      return false;
> >  }
> 
> Perhaps you could make use of qdict_extract_subqdict() and
> qdict_array_split() functions here...? That is,
> qdict_extract_subqdict(..., "children.") and then
> qdict_array_split() on the result?
> 
> >+static int quorum_match_key(const char *key,
> >+                            char **key_prefix)
> >+{
> >+    const char *start;
> >+    char *next;
> >+    unsigned long long idx;
> >+    int ret;
> >+
> >+    *key_prefix = NULL;
> >+
> >+    /* the following code is a translation of the following pseudo code:
> >+     *       match = key.match('(^children\.(\d+)\.)$suffix)
> >+     *       if not match:
> >+     *           return -1;
> >+     *       key_prefix = match.outer_match()
> >+     *       idx = match.inner_match()
> >+     *
> >+     * note: we also match the .file suffix to avoid futher checkings
> >+     */
> >+
> >+    /* this key is not a child */
> >+    if (strncmp(key, KEY_PREFIX, strlen(KEY_PREFIX))) {
> >+        return -1;
> >+    }
> >+
> >+    /* take first char after prefix */
> >+    start = key + strlen(KEY_PREFIX);
> >+
> >+    /* if the string end here -> scan fail */
> >+    if (start[0] == '\0') {
> >+        return -1;
> >+    }
> >+
> >+    /* try to retrieve the index */
> >+    ret = parse_uint(start, &idx, &next, 10);
> >+
> >+    /* no int found -> scan fail */
> >+    if (ret < 0) {
> >+        return -1;
> >+    }
> >+
> >+    /* we are taking a reference via QMP */
> >+    if (next - key == strlen(key)) {
> 
> "if (!*next)" would probably accomplish the same (or "if next[0] ==
> '\0')" to keep your coding style) without having to call strlen().
> 
> >+        *key_prefix = g_strdup(key);
> >+        return idx;
> >+    }
> >+
> >+    /* match the suffix to avoid matching the same idx
> >+     * multiple times and be required to do more checks later
> >+     */
> >+    if (strncmp(next, KEY_FILENAME_SUFFIX, strlen(KEY_FILENAME_SUFFIX))) {
> 
> As stated in my review from October, you may use sizeof() to avoid strlen().
> 
> >+        return -1;
> >+    }
> >+
> >+    /* do not include '.' */
> >+    int len = next - key;
> >+    *key_prefix = g_strndup(key, len);
> >+
> >+    return idx;
> >+}
> >+
> >+static QDict *quorum_get_children_idx(const QDict *options)
> >+{
> >+    const QDictEntry *ent;
> >+    QDict *result;
> >+    char *key_prefix;
> >+    int idx;
> >+
> >+    result = qdict_new();
> >+
> >+    for (ent = qdict_first(options); ent; ent = qdict_next(options, ent)) {
> >+        const char *key = qdict_entry_key(ent);
> >+        idx = quorum_match_key(key,
> >+                               &key_prefix);
> >+
> >+        /* if the result zero or positive we got a key */
> >+        if (idx < 0) {
> >+            continue;
> >+        }
> >+
> >+        qdict_put(result, key_prefix, qint_from_int(idx));
> >+    }
> >+
> >+    return result;
> >+}
> >+
> >+static int quorum_fill_validation_array(bool *array,
> >+                                        const QDict *dict,
> >+                                        int total,
> >+                                        Error **errp)
> >+{
> >+    const QDictEntry *ent;
> >+
> >+    /* fill the checking array with children indexes */
> >+    for (ent = qdict_first(dict); ent; ent = qdict_next(dict, ent)) {
> >+        const char *key = qdict_entry_key(ent);
> >+        int idx = qdict_get_int(dict, key);
> >+
> >+        if (idx < 0 || idx >= total) {
> >+            error_setg(errp,
> >+                "Children index must be between 0 and children count -1");
> 
> Quote from October: I'd prefer "- 1" instead of "-1" (or even "Child
> index must be an unsigned integer smaller than the children count").
> 
> >+            return -ERANGE;
> >+        }
> >+
> >+        array[idx] = true;
> >+    }
> >+
> >+    return 0;
> >+}
> >+
> >+static int quorum_valid_indexes(const bool *array, int total, Error **errp)
> >+{
> >+    int i;
> >+
> >+    for (i = 0; i < total; i++) {
> >+        if (array[i] == true) {
> >+            continue;
> >+        }
> >+
> >+        error_setg(errp,
> >+            "All child indexes between 0 and children count -1  must be "
> >+            " used");
> 
> Again, I'd prefer "- 1"; then, there are two spaces to the left of
> "must" and two spaces after "be".
> 
> >+        return -ERANGE;
> >+    }
> >+
> >+    return 0;
> >+}
> >+
> >+static int quorum_valid_children_indexes(const QDict *dict,
> >+                                         int total,
> >+                                         Error **errp)
> >+{
> >+    bool *array;
> >+    int ret = 0;;
> >+
> >+    /* allocate indexes checking array and put false in it */
> >+    array = g_new0(bool, total);
> >+
> >+    ret = quorum_fill_validation_array(array, dict, total, errp);
> >+    if (ret < 0) {
> >+        goto free_exit;
> >+    }
> >+
> >+    ret = quorum_valid_indexes(array, total, errp);
> >+free_exit:
> >+    g_free(array);
> >+    return ret;
> >+}
> >+
> >+static int quorum_valid_threshold(int threshold,
> >+                                  int total,
> >+                                  Error **errp)
> >+{
> >+
> >+    if (threshold < 1) {
> >+        error_set(errp, QERR_INVALID_PARAMETER_VALUE,
> >+                  "vote-threshold", "value >= 1");
> >+        return -ERANGE;
> >+    }
> >+
> >+    if (threshold > total) {
> >+        error_setg(errp, "threshold <= children count must be true");
> 
> Quote from October: Well, while this might technically be true, I'd
> rather go for "threshold may not exceed children count" instead. ;-)
> 
> >+        return -ERANGE;
> >+    }
> >+
> >+    return 0;
> >+}
> >+
> >+static int quorum_open(BlockDriverState *bs,
> >+                       QDict *options,
> >+                       int flags,
> >+                       Error **errp)
> >+{
> >+    BDRVQuorumState *s = bs->opaque;
> >+    Error *local_err = NULL;
> >+    const QDictEntry *ent;
> >+    QDict *idx_dict;
> >+    bool *opened;
> >+    const char *value;
> >+    char *next;
> >+    int i;
> >+    int ret = 0;
> >+
> >+    /* get a dict of children indexes for validation */
> >+    idx_dict = quorum_get_children_idx(options);
> >+
> >+    /* count how many different children indexes are present and validate */
> >+    s->total = qdict_size(idx_dict);
> >+    if (s->total < 2) {
> >+        error_setg(&local_err,
> >+                   "Number of provided children must be greater than 1");
> >+        ret = -EINVAL;
> >+        goto exit;
> >+    }
> >+
> >+    /* validate that the set of index is coherent */
> >+    ret = quorum_valid_children_indexes(idx_dict, s->total, &local_err);
> >+    if (ret < 0) {
> >+        goto exit;
> >+    }
> >+
> >+    ret = qdict_get_try_int(options, "vote-threshold", -1);
> >+    /* from QMP */
> >+    if (ret != -1) {
> >+        qdict_del(options, "vote-threshold");
> >+        s->threshold = ret;
> >+    /* from command line */
> >+    } else {
> >+        /* retrieve the threshold option from the command line */
> >+        value = qdict_get_try_str(options, "vote_threshold");
> >+        if (!value) {
> >+            error_setg(&local_err,
> >+                       "vote_threshold must be provided");
> >+            ret = -EINVAL;
> >+            goto exit;
> >+        }
> >+        qdict_del(options, "vote_threshold");
> >+
> >+        ret = parse_uint(value, (unsigned long long *) &s->threshold, &next, 10);
> 
> I don't like this cast at all...
> 
> >+
> >+        /* no int found -> scan fail */
> >+        if (ret < 0) {
> >+            error_setg(&local_err,
> >+                       "invalid voter_threshold specified");
> 
> *vote_threshold
> 
> >+            ret = -EINVAL;
> >+            goto exit;
> >+        }
> >+    }
> >+
> >+    /* and validate it againts s->total */
> 
> Still *against
> 
> >+    ret = quorum_valid_threshold(s->threshold, s->total, &local_err);
> >+    if (ret < 0) {
> >+        goto exit;
> >+    }
> >+
> >+    /* is the driver in blkverify mode */
> >+    value = qdict_get_try_str(options, "blkverify");
> >+    if (value && !strcmp(value, "on")  &&
> >+        s->total == 2 && s->threshold == 2) {
> >+        s->is_blkverify = true;
> >+    }
> 
> Quote from October: If the user specifies anything different from
> "on" or if he does and s->total or s->threshold is not 2, quorum
> will silently work in non-blkverify mode without telling the user.
> So I'd emit a message here if blkverify has been specified and its
> value is different from "off" but s->is_blkverify remains false
> (i.e., “else if (value && strcmp(value, "off")) { fprintf(stderr,
> "[Some warning]"); }”).
> 
> >+    qdict_del(options, "blkverify");
> >+
> >+    /* allocate the children BlockDriverState array */
> >+    s->bs = g_new0(BlockDriverState *, s->total);
> >+    opened = g_new0(bool, s->total);
> >+
> >+    /* open children bs */
> >+    for (ent = qdict_first(idx_dict);
> >+         ent; ent = qdict_next(idx_dict, ent)) {
> 
> This condition fits on a single line.
> 
> >+        const char *key = qdict_entry_key(ent);
> >+        int idx = qdict_get_int(idx_dict, key);
> >+        ret = bdrv_open_image(&s->bs[idx],
> >+                              NULL,
> >+                              options,
> >+                              key,
> >+                              flags,
> >+                              false,
> >+                              &local_err);
> 
> This takes two, but definitely not seven.
> 
> >+        if (ret < 0) {
> >+            goto close_exit;
> >+        }
> >+        opened[idx] = true;
> >+    }
> >+
> >+    g_free(opened);
> >+    goto exit;
> >+
> >+close_exit:
> >+    /* cleanup on error */
> >+    for (i = 0; i < s->total; i++) {
> >+        if (!opened[i]) {
> >+            continue;
> >+        }
> >+        bdrv_close(s->bs[i]);
> 
> You'll probably want bdrv_unref() here.

I don't think so because to simplify memory management s->bs is an array
containing all the allocated bs and is freed at once.
So should bdrv_unref still be used ?

> 
> >+    }
> >+    g_free(s->bs);
> >+    g_free(opened);
> >+exit:
> >+    /* propagate error */
> >+    if (error_is_set(&local_err)) {
> >+        error_propagate(errp, local_err);
> >+    }
> >+    return ret;
> >+}
> >+
> >+static void quorum_close(BlockDriverState *bs)
> >+{
> >+    BDRVQuorumState *s = bs->opaque;
> >+    int i;
> >+
> >+    for (i = 0; i < s->total; i++) {
> >+        /* Ensure writes reach stable storage */
> >+        bdrv_flush(s->bs[i]);
> >+        /* close manually because we'll free s->bs */
> >+        bdrv_close(s->bs[i]);
> 
> Again, bdrv_unref().
> 
> >+    }
> >+
> >+    g_free(s->bs);
> >+}
> >+
> >  static BlockDriver bdrv_quorum = {
> >      .format_name        = "quorum",
> >      .protocol_name      = "quorum",
> >      .instance_size      = sizeof(BDRVQuorumState),
> >+    .bdrv_file_open     = quorum_open,
> >+    .bdrv_close         = quorum_close,
> >+
> >      .bdrv_co_flush_to_disk = quorum_co_flush,
> >      .bdrv_getlength     = quorum_getlength,
> >diff --git a/qapi-schema.json b/qapi-schema.json
> >index 05ced9d..903a3a0 100644
> >--- a/qapi-schema.json
> >+++ b/qapi-schema.json
> >@@ -4352,6 +4352,24 @@
> >              'raw': 'BlockdevRef' } }
> >  ##
> >+# @BlockdevOptionsQuorum
> >+#
> >+# Driver specific block device options for Quorum
> >+#
> >+# @blkverify:      #optional true if the driver must print content mismatch
> >+#
> >+# @children:       the children block device to use
> >+#
> >+# @vote_threshold: the vote limit under which a read will fail
> >+#
> >+# Since: 2.0
> >+##
> >+{ 'type': 'BlockdevOptionsQuorum',
> >+  'data': { '*blkverify': 'bool',
> >+            'children': [ 'BlockdevRef' ],
> >+            'vote-threshold': 'int' } }
> >+
> >+##
> >  # @BlockdevOptions
> >  #
> >  # Options for creating a block device.
> >@@ -4389,7 +4407,8 @@
> >        'vdi':        'BlockdevOptionsGenericFormat',
> >        'vhdx':       'BlockdevOptionsGenericFormat',
> >        'vmdk':       'BlockdevOptionsGenericCOWFormat',
> >-      'vpc':        'BlockdevOptionsGenericFormat'
> >+      'vpc':        'BlockdevOptionsGenericFormat',
> >+      'quorum':     'BlockdevOptionsQuorum'
> >    } }
> >  ##
> 
> As I've said before, I'd really like it if you could abandon all
> those functions for parsing the incoming options QDict in favor of
> qdict_extract_subqdict() and qdict_array_split() which will most
> likely do the job just fine; and, in fact, I have written
> qdict_array_split() with Quorum in mind. ;-)
> 
> Max
>
Benoît Canet Feb. 3, 2014, 12:43 p.m. UTC | #3
Le Monday 03 Feb 2014 à 00:09:28 (+0100), Max Reitz a écrit :
> On 28.01.2014 17:52, Benoît Canet wrote:
> >From: Benoît Canet <benoit@irqsave.net>
> >
> >Example of command line:
> >-drive if=virtio,file.driver=quorum,\
> >file.children.0.file.filename=1.raw,\
> >file.children.0.node-name=1.raw,\
> >file.children.0.driver=raw,\
> >file.children.1.file.filename=2.raw,\
> >file.children.1.node-name=2.raw,\
> >file.children.1.driver=raw,\
> >file.children.2.file.filename=3.raw,\
> >file.children.2.node-name=3.raw,\
> >file.children.2.driver=raw,\
> >file.vote_threshold=2
> >
> >file.blkverify=on with file.vote_threshold=2 and two files can be passed to
> >emulated blkverify.
> >
> >Signed-off-by: Benoit Canet <benoit@irqsave.net>
> >---
> >  block/quorum.c   | 308 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
> >  qapi-schema.json |  21 +++-
> >  2 files changed, 328 insertions(+), 1 deletion(-)
> >
> >diff --git a/block/quorum.c b/block/quorum.c
> >index e7b2090..0c0d630 100644
> >--- a/block/quorum.c
> >+++ b/block/quorum.c
> >@@ -17,8 +17,12 @@
> >  #include <gnutls/crypto.h>
> >  #include "block/block_int.h"
> >  #include "qapi/qmp/qjson.h"
> >+#include "qapi/qmp/types.h"
> >+#include "qemu-common.h"
> >  #define HASH_LENGTH 32
> >+#define KEY_PREFIX "children."
> >+#define KEY_FILENAME_SUFFIX ".file.filename"
> >  /* This union holds a vote hash value */
> >  typedef union QuorumVoteValue {
> >@@ -702,12 +706,316 @@ static bool quorum_recurse_is_first_non_filter(BlockDriverState *bs,
> >      return false;
> >  }
> 

As a general stance sorry to have missed this mail of review from october.
It was not done on purpose.

Best regards

Benoît

> Perhaps you could make use of qdict_extract_subqdict() and
> qdict_array_split() functions here...? That is,
> qdict_extract_subqdict(..., "children.") and then
> qdict_array_split() on the result?
> 
> >+static int quorum_match_key(const char *key,
> >+                            char **key_prefix)
> >+{
> >+    const char *start;
> >+    char *next;
> >+    unsigned long long idx;
> >+    int ret;
> >+
> >+    *key_prefix = NULL;
> >+
> >+    /* the following code is a translation of the following pseudo code:
> >+     *       match = key.match('(^children\.(\d+)\.)$suffix)
> >+     *       if not match:
> >+     *           return -1;
> >+     *       key_prefix = match.outer_match()
> >+     *       idx = match.inner_match()
> >+     *
> >+     * note: we also match the .file suffix to avoid futher checkings
> >+     */
> >+
> >+    /* this key is not a child */
> >+    if (strncmp(key, KEY_PREFIX, strlen(KEY_PREFIX))) {
> >+        return -1;
> >+    }
> >+
> >+    /* take first char after prefix */
> >+    start = key + strlen(KEY_PREFIX);
> >+
> >+    /* if the string end here -> scan fail */
> >+    if (start[0] == '\0') {
> >+        return -1;
> >+    }
> >+
> >+    /* try to retrieve the index */
> >+    ret = parse_uint(start, &idx, &next, 10);
> >+
> >+    /* no int found -> scan fail */
> >+    if (ret < 0) {
> >+        return -1;
> >+    }
> >+
> >+    /* we are taking a reference via QMP */
> >+    if (next - key == strlen(key)) {
> 
> "if (!*next)" would probably accomplish the same (or "if next[0] ==
> '\0')" to keep your coding style) without having to call strlen().
> 
> >+        *key_prefix = g_strdup(key);
> >+        return idx;
> >+    }
> >+
> >+    /* match the suffix to avoid matching the same idx
> >+     * multiple times and be required to do more checks later
> >+     */
> >+    if (strncmp(next, KEY_FILENAME_SUFFIX, strlen(KEY_FILENAME_SUFFIX))) {
> 
> As stated in my review from October, you may use sizeof() to avoid strlen().
> 
> >+        return -1;
> >+    }
> >+
> >+    /* do not include '.' */
> >+    int len = next - key;
> >+    *key_prefix = g_strndup(key, len);
> >+
> >+    return idx;
> >+}
> >+
> >+static QDict *quorum_get_children_idx(const QDict *options)
> >+{
> >+    const QDictEntry *ent;
> >+    QDict *result;
> >+    char *key_prefix;
> >+    int idx;
> >+
> >+    result = qdict_new();
> >+
> >+    for (ent = qdict_first(options); ent; ent = qdict_next(options, ent)) {
> >+        const char *key = qdict_entry_key(ent);
> >+        idx = quorum_match_key(key,
> >+                               &key_prefix);
> >+
> >+        /* if the result zero or positive we got a key */
> >+        if (idx < 0) {
> >+            continue;
> >+        }
> >+
> >+        qdict_put(result, key_prefix, qint_from_int(idx));
> >+    }
> >+
> >+    return result;
> >+}
> >+
> >+static int quorum_fill_validation_array(bool *array,
> >+                                        const QDict *dict,
> >+                                        int total,
> >+                                        Error **errp)
> >+{
> >+    const QDictEntry *ent;
> >+
> >+    /* fill the checking array with children indexes */
> >+    for (ent = qdict_first(dict); ent; ent = qdict_next(dict, ent)) {
> >+        const char *key = qdict_entry_key(ent);
> >+        int idx = qdict_get_int(dict, key);
> >+
> >+        if (idx < 0 || idx >= total) {
> >+            error_setg(errp,
> >+                "Children index must be between 0 and children count -1");
> 
> Quote from October: I'd prefer "- 1" instead of "-1" (or even "Child
> index must be an unsigned integer smaller than the children count").
> 
> >+            return -ERANGE;
> >+        }
> >+
> >+        array[idx] = true;
> >+    }
> >+
> >+    return 0;
> >+}
> >+
> >+static int quorum_valid_indexes(const bool *array, int total, Error **errp)
> >+{
> >+    int i;
> >+
> >+    for (i = 0; i < total; i++) {
> >+        if (array[i] == true) {
> >+            continue;
> >+        }
> >+
> >+        error_setg(errp,
> >+            "All child indexes between 0 and children count -1  must be "
> >+            " used");
> 
> Again, I'd prefer "- 1"; then, there are two spaces to the left of
> "must" and two spaces after "be".
> 
> >+        return -ERANGE;
> >+    }
> >+
> >+    return 0;
> >+}
> >+
> >+static int quorum_valid_children_indexes(const QDict *dict,
> >+                                         int total,
> >+                                         Error **errp)
> >+{
> >+    bool *array;
> >+    int ret = 0;;
> >+
> >+    /* allocate indexes checking array and put false in it */
> >+    array = g_new0(bool, total);
> >+
> >+    ret = quorum_fill_validation_array(array, dict, total, errp);
> >+    if (ret < 0) {
> >+        goto free_exit;
> >+    }
> >+
> >+    ret = quorum_valid_indexes(array, total, errp);
> >+free_exit:
> >+    g_free(array);
> >+    return ret;
> >+}
> >+
> >+static int quorum_valid_threshold(int threshold,
> >+                                  int total,
> >+                                  Error **errp)
> >+{
> >+
> >+    if (threshold < 1) {
> >+        error_set(errp, QERR_INVALID_PARAMETER_VALUE,
> >+                  "vote-threshold", "value >= 1");
> >+        return -ERANGE;
> >+    }
> >+
> >+    if (threshold > total) {
> >+        error_setg(errp, "threshold <= children count must be true");
> 
> Quote from October: Well, while this might technically be true, I'd
> rather go for "threshold may not exceed children count" instead. ;-)
> 
> >+        return -ERANGE;
> >+    }
> >+
> >+    return 0;
> >+}
> >+
> >+static int quorum_open(BlockDriverState *bs,
> >+                       QDict *options,
> >+                       int flags,
> >+                       Error **errp)
> >+{
> >+    BDRVQuorumState *s = bs->opaque;
> >+    Error *local_err = NULL;
> >+    const QDictEntry *ent;
> >+    QDict *idx_dict;
> >+    bool *opened;
> >+    const char *value;
> >+    char *next;
> >+    int i;
> >+    int ret = 0;
> >+
> >+    /* get a dict of children indexes for validation */
> >+    idx_dict = quorum_get_children_idx(options);
> >+
> >+    /* count how many different children indexes are present and validate */
> >+    s->total = qdict_size(idx_dict);
> >+    if (s->total < 2) {
> >+        error_setg(&local_err,
> >+                   "Number of provided children must be greater than 1");
> >+        ret = -EINVAL;
> >+        goto exit;
> >+    }
> >+
> >+    /* validate that the set of index is coherent */
> >+    ret = quorum_valid_children_indexes(idx_dict, s->total, &local_err);
> >+    if (ret < 0) {
> >+        goto exit;
> >+    }
> >+
> >+    ret = qdict_get_try_int(options, "vote-threshold", -1);
> >+    /* from QMP */
> >+    if (ret != -1) {
> >+        qdict_del(options, "vote-threshold");
> >+        s->threshold = ret;
> >+    /* from command line */
> >+    } else {
> >+        /* retrieve the threshold option from the command line */
> >+        value = qdict_get_try_str(options, "vote_threshold");
> >+        if (!value) {
> >+            error_setg(&local_err,
> >+                       "vote_threshold must be provided");
> >+            ret = -EINVAL;
> >+            goto exit;
> >+        }
> >+        qdict_del(options, "vote_threshold");
> >+
> >+        ret = parse_uint(value, (unsigned long long *) &s->threshold, &next, 10);
> 
> I don't like this cast at all...
> 
> >+
> >+        /* no int found -> scan fail */
> >+        if (ret < 0) {
> >+            error_setg(&local_err,
> >+                       "invalid voter_threshold specified");
> 
> *vote_threshold
> 
> >+            ret = -EINVAL;
> >+            goto exit;
> >+        }
> >+    }
> >+
> >+    /* and validate it againts s->total */
> 
> Still *against
> 
> >+    ret = quorum_valid_threshold(s->threshold, s->total, &local_err);
> >+    if (ret < 0) {
> >+        goto exit;
> >+    }
> >+
> >+    /* is the driver in blkverify mode */
> >+    value = qdict_get_try_str(options, "blkverify");
> >+    if (value && !strcmp(value, "on")  &&
> >+        s->total == 2 && s->threshold == 2) {
> >+        s->is_blkverify = true;
> >+    }
> 
> Quote from October: If the user specifies anything different from
> "on" or if he does and s->total or s->threshold is not 2, quorum
> will silently work in non-blkverify mode without telling the user.
> So I'd emit a message here if blkverify has been specified and its
> value is different from "off" but s->is_blkverify remains false
> (i.e., “else if (value && strcmp(value, "off")) { fprintf(stderr,
> "[Some warning]"); }”).
> 
> >+    qdict_del(options, "blkverify");
> >+
> >+    /* allocate the children BlockDriverState array */
> >+    s->bs = g_new0(BlockDriverState *, s->total);
> >+    opened = g_new0(bool, s->total);
> >+
> >+    /* open children bs */
> >+    for (ent = qdict_first(idx_dict);
> >+         ent; ent = qdict_next(idx_dict, ent)) {
> 
> This condition fits on a single line.
> 
> >+        const char *key = qdict_entry_key(ent);
> >+        int idx = qdict_get_int(idx_dict, key);
> >+        ret = bdrv_open_image(&s->bs[idx],
> >+                              NULL,
> >+                              options,
> >+                              key,
> >+                              flags,
> >+                              false,
> >+                              &local_err);
> 
> This takes two, but definitely not seven.
> 
> >+        if (ret < 0) {
> >+            goto close_exit;
> >+        }
> >+        opened[idx] = true;
> >+    }
> >+
> >+    g_free(opened);
> >+    goto exit;
> >+
> >+close_exit:
> >+    /* cleanup on error */
> >+    for (i = 0; i < s->total; i++) {
> >+        if (!opened[i]) {
> >+            continue;
> >+        }
> >+        bdrv_close(s->bs[i]);
> 
> You'll probably want bdrv_unref() here.
> 
> >+    }
> >+    g_free(s->bs);
> >+    g_free(opened);
> >+exit:
> >+    /* propagate error */
> >+    if (error_is_set(&local_err)) {
> >+        error_propagate(errp, local_err);
> >+    }
> >+    return ret;
> >+}
> >+
> >+static void quorum_close(BlockDriverState *bs)
> >+{
> >+    BDRVQuorumState *s = bs->opaque;
> >+    int i;
> >+
> >+    for (i = 0; i < s->total; i++) {
> >+        /* Ensure writes reach stable storage */
> >+        bdrv_flush(s->bs[i]);
> >+        /* close manually because we'll free s->bs */
> >+        bdrv_close(s->bs[i]);
> 
> Again, bdrv_unref().
> 
> >+    }
> >+
> >+    g_free(s->bs);
> >+}
> >+
> >  static BlockDriver bdrv_quorum = {
> >      .format_name        = "quorum",
> >      .protocol_name      = "quorum",
> >      .instance_size      = sizeof(BDRVQuorumState),
> >+    .bdrv_file_open     = quorum_open,
> >+    .bdrv_close         = quorum_close,
> >+
> >      .bdrv_co_flush_to_disk = quorum_co_flush,
> >      .bdrv_getlength     = quorum_getlength,
> >diff --git a/qapi-schema.json b/qapi-schema.json
> >index 05ced9d..903a3a0 100644
> >--- a/qapi-schema.json
> >+++ b/qapi-schema.json
> >@@ -4352,6 +4352,24 @@
> >              'raw': 'BlockdevRef' } }
> >  ##
> >+# @BlockdevOptionsQuorum
> >+#
> >+# Driver specific block device options for Quorum
> >+#
> >+# @blkverify:      #optional true if the driver must print content mismatch
> >+#
> >+# @children:       the children block device to use
> >+#
> >+# @vote_threshold: the vote limit under which a read will fail
> >+#
> >+# Since: 2.0
> >+##
> >+{ 'type': 'BlockdevOptionsQuorum',
> >+  'data': { '*blkverify': 'bool',
> >+            'children': [ 'BlockdevRef' ],
> >+            'vote-threshold': 'int' } }
> >+
> >+##
> >  # @BlockdevOptions
> >  #
> >  # Options for creating a block device.
> >@@ -4389,7 +4407,8 @@
> >        'vdi':        'BlockdevOptionsGenericFormat',
> >        'vhdx':       'BlockdevOptionsGenericFormat',
> >        'vmdk':       'BlockdevOptionsGenericCOWFormat',
> >-      'vpc':        'BlockdevOptionsGenericFormat'
> >+      'vpc':        'BlockdevOptionsGenericFormat',
> >+      'quorum':     'BlockdevOptionsQuorum'
> >    } }
> >  ##
> 
> As I've said before, I'd really like it if you could abandon all
> those functions for parsing the incoming options QDict in favor of
> qdict_extract_subqdict() and qdict_array_split() which will most
> likely do the job just fine; and, in fact, I have written
> qdict_array_split() with Quorum in mind. ;-)
> 
> Max
>
Benoît Canet Feb. 3, 2014, 12:49 p.m. UTC | #4
Le Monday 03 Feb 2014 à 13:21:18 (+0100), Benoît Canet a écrit :
> Le Monday 03 Feb 2014 à 00:09:28 (+0100), Max Reitz a écrit :
> > On 28.01.2014 17:52, Benoît Canet wrote:
> > >From: Benoît Canet <benoit@irqsave.net>
> > >
> > >Example of command line:
> > >-drive if=virtio,file.driver=quorum,\
> > >file.children.0.file.filename=1.raw,\
> > >file.children.0.node-name=1.raw,\
> > >file.children.0.driver=raw,\
> > >file.children.1.file.filename=2.raw,\
> > >file.children.1.node-name=2.raw,\
> > >file.children.1.driver=raw,\
> > >file.children.2.file.filename=3.raw,\
> > >file.children.2.node-name=3.raw,\
> > >file.children.2.driver=raw,\
> > >file.vote_threshold=2
> > >
> > >file.blkverify=on with file.vote_threshold=2 and two files can be passed to
> > >emulated blkverify.
> > >
> > >Signed-off-by: Benoit Canet <benoit@irqsave.net>
> > >---
> > >  block/quorum.c   | 308 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
> > >  qapi-schema.json |  21 +++-
> > >  2 files changed, 328 insertions(+), 1 deletion(-)
> > >
> > >diff --git a/block/quorum.c b/block/quorum.c
> > >index e7b2090..0c0d630 100644
> > >--- a/block/quorum.c
> > >+++ b/block/quorum.c
> > >@@ -17,8 +17,12 @@
> > >  #include <gnutls/crypto.h>
> > >  #include "block/block_int.h"
> > >  #include "qapi/qmp/qjson.h"
> > >+#include "qapi/qmp/types.h"
> > >+#include "qemu-common.h"
> > >  #define HASH_LENGTH 32
> > >+#define KEY_PREFIX "children."
> > >+#define KEY_FILENAME_SUFFIX ".file.filename"
> > >  /* This union holds a vote hash value */
> > >  typedef union QuorumVoteValue {
> > >@@ -702,12 +706,316 @@ static bool quorum_recurse_is_first_non_filter(BlockDriverState *bs,
> > >      return false;
> > >  }
> > 
> > Perhaps you could make use of qdict_extract_subqdict() and
> > qdict_array_split() functions here...? That is,
> > qdict_extract_subqdict(..., "children.") and then
> > qdict_array_split() on the result?
> > 
> > >+static int quorum_match_key(const char *key,
> > >+                            char **key_prefix)
> > >+{
> > >+    const char *start;
> > >+    char *next;
> > >+    unsigned long long idx;
> > >+    int ret;
> > >+
> > >+    *key_prefix = NULL;
> > >+
> > >+    /* the following code is a translation of the following pseudo code:
> > >+     *       match = key.match('(^children\.(\d+)\.)$suffix)
> > >+     *       if not match:
> > >+     *           return -1;
> > >+     *       key_prefix = match.outer_match()
> > >+     *       idx = match.inner_match()
> > >+     *
> > >+     * note: we also match the .file suffix to avoid futher checkings
> > >+     */
> > >+
> > >+    /* this key is not a child */
> > >+    if (strncmp(key, KEY_PREFIX, strlen(KEY_PREFIX))) {
> > >+        return -1;
> > >+    }
> > >+
> > >+    /* take first char after prefix */
> > >+    start = key + strlen(KEY_PREFIX);
> > >+
> > >+    /* if the string end here -> scan fail */
> > >+    if (start[0] == '\0') {
> > >+        return -1;
> > >+    }
> > >+
> > >+    /* try to retrieve the index */
> > >+    ret = parse_uint(start, &idx, &next, 10);
> > >+
> > >+    /* no int found -> scan fail */
> > >+    if (ret < 0) {
> > >+        return -1;
> > >+    }
> > >+
> > >+    /* we are taking a reference via QMP */
> > >+    if (next - key == strlen(key)) {
> > 
> > "if (!*next)" would probably accomplish the same (or "if next[0] ==
> > '\0')" to keep your coding style) without having to call strlen().
> > 
> > >+        *key_prefix = g_strdup(key);
> > >+        return idx;
> > >+    }
> > >+
> > >+    /* match the suffix to avoid matching the same idx
> > >+     * multiple times and be required to do more checks later
> > >+     */
> > >+    if (strncmp(next, KEY_FILENAME_SUFFIX, strlen(KEY_FILENAME_SUFFIX))) {
> > 
> > As stated in my review from October, you may use sizeof() to avoid strlen().
> > 
> > >+        return -1;
> > >+    }
> > >+
> > >+    /* do not include '.' */
> > >+    int len = next - key;
> > >+    *key_prefix = g_strndup(key, len);
> > >+
> > >+    return idx;
> > >+}
> > >+
> > >+static QDict *quorum_get_children_idx(const QDict *options)
> > >+{
> > >+    const QDictEntry *ent;
> > >+    QDict *result;
> > >+    char *key_prefix;
> > >+    int idx;
> > >+
> > >+    result = qdict_new();
> > >+
> > >+    for (ent = qdict_first(options); ent; ent = qdict_next(options, ent)) {
> > >+        const char *key = qdict_entry_key(ent);
> > >+        idx = quorum_match_key(key,
> > >+                               &key_prefix);
> > >+
> > >+        /* if the result zero or positive we got a key */
> > >+        if (idx < 0) {
> > >+            continue;
> > >+        }
> > >+
> > >+        qdict_put(result, key_prefix, qint_from_int(idx));
> > >+    }
> > >+
> > >+    return result;
> > >+}
> > >+
> > >+static int quorum_fill_validation_array(bool *array,
> > >+                                        const QDict *dict,
> > >+                                        int total,
> > >+                                        Error **errp)
> > >+{
> > >+    const QDictEntry *ent;
> > >+
> > >+    /* fill the checking array with children indexes */
> > >+    for (ent = qdict_first(dict); ent; ent = qdict_next(dict, ent)) {
> > >+        const char *key = qdict_entry_key(ent);
> > >+        int idx = qdict_get_int(dict, key);
> > >+
> > >+        if (idx < 0 || idx >= total) {
> > >+            error_setg(errp,
> > >+                "Children index must be between 0 and children count -1");
> > 
> > Quote from October: I'd prefer "- 1" instead of "-1" (or even "Child
> > index must be an unsigned integer smaller than the children count").
> > 
> > >+            return -ERANGE;
> > >+        }
> > >+
> > >+        array[idx] = true;
> > >+    }
> > >+
> > >+    return 0;
> > >+}
> > >+
> > >+static int quorum_valid_indexes(const bool *array, int total, Error **errp)
> > >+{
> > >+    int i;
> > >+
> > >+    for (i = 0; i < total; i++) {
> > >+        if (array[i] == true) {
> > >+            continue;
> > >+        }
> > >+
> > >+        error_setg(errp,
> > >+            "All child indexes between 0 and children count -1  must be "
> > >+            " used");
> > 
> > Again, I'd prefer "- 1"; then, there are two spaces to the left of
> > "must" and two spaces after "be".
> > 
> > >+        return -ERANGE;
> > >+    }
> > >+
> > >+    return 0;
> > >+}
> > >+
> > >+static int quorum_valid_children_indexes(const QDict *dict,
> > >+                                         int total,
> > >+                                         Error **errp)
> > >+{
> > >+    bool *array;
> > >+    int ret = 0;;
> > >+
> > >+    /* allocate indexes checking array and put false in it */
> > >+    array = g_new0(bool, total);
> > >+
> > >+    ret = quorum_fill_validation_array(array, dict, total, errp);
> > >+    if (ret < 0) {
> > >+        goto free_exit;
> > >+    }
> > >+
> > >+    ret = quorum_valid_indexes(array, total, errp);
> > >+free_exit:
> > >+    g_free(array);
> > >+    return ret;
> > >+}
> > >+
> > >+static int quorum_valid_threshold(int threshold,
> > >+                                  int total,
> > >+                                  Error **errp)
> > >+{
> > >+
> > >+    if (threshold < 1) {
> > >+        error_set(errp, QERR_INVALID_PARAMETER_VALUE,
> > >+                  "vote-threshold", "value >= 1");
> > >+        return -ERANGE;
> > >+    }
> > >+
> > >+    if (threshold > total) {
> > >+        error_setg(errp, "threshold <= children count must be true");
> > 
> > Quote from October: Well, while this might technically be true, I'd
> > rather go for "threshold may not exceed children count" instead. ;-)
> > 
> > >+        return -ERANGE;
> > >+    }
> > >+
> > >+    return 0;
> > >+}
> > >+
> > >+static int quorum_open(BlockDriverState *bs,
> > >+                       QDict *options,
> > >+                       int flags,
> > >+                       Error **errp)
> > >+{
> > >+    BDRVQuorumState *s = bs->opaque;
> > >+    Error *local_err = NULL;
> > >+    const QDictEntry *ent;
> > >+    QDict *idx_dict;
> > >+    bool *opened;
> > >+    const char *value;
> > >+    char *next;
> > >+    int i;
> > >+    int ret = 0;
> > >+
> > >+    /* get a dict of children indexes for validation */
> > >+    idx_dict = quorum_get_children_idx(options);
> > >+
> > >+    /* count how many different children indexes are present and validate */
> > >+    s->total = qdict_size(idx_dict);
> > >+    if (s->total < 2) {
> > >+        error_setg(&local_err,
> > >+                   "Number of provided children must be greater than 1");
> > >+        ret = -EINVAL;
> > >+        goto exit;
> > >+    }
> > >+
> > >+    /* validate that the set of index is coherent */
> > >+    ret = quorum_valid_children_indexes(idx_dict, s->total, &local_err);
> > >+    if (ret < 0) {
> > >+        goto exit;
> > >+    }
> > >+
> > >+    ret = qdict_get_try_int(options, "vote-threshold", -1);
> > >+    /* from QMP */
> > >+    if (ret != -1) {
> > >+        qdict_del(options, "vote-threshold");
> > >+        s->threshold = ret;
> > >+    /* from command line */
> > >+    } else {
> > >+        /* retrieve the threshold option from the command line */
> > >+        value = qdict_get_try_str(options, "vote_threshold");
> > >+        if (!value) {
> > >+            error_setg(&local_err,
> > >+                       "vote_threshold must be provided");
> > >+            ret = -EINVAL;
> > >+            goto exit;
> > >+        }
> > >+        qdict_del(options, "vote_threshold");
> > >+
> > >+        ret = parse_uint(value, (unsigned long long *) &s->threshold, &next, 10);
> > 
> > I don't like this cast at all...
> > 
> > >+
> > >+        /* no int found -> scan fail */
> > >+        if (ret < 0) {
> > >+            error_setg(&local_err,
> > >+                       "invalid voter_threshold specified");
> > 
> > *vote_threshold
> > 
> > >+            ret = -EINVAL;
> > >+            goto exit;
> > >+        }
> > >+    }
> > >+
> > >+    /* and validate it againts s->total */
> > 
> > Still *against
> > 
> > >+    ret = quorum_valid_threshold(s->threshold, s->total, &local_err);
> > >+    if (ret < 0) {
> > >+        goto exit;
> > >+    }
> > >+
> > >+    /* is the driver in blkverify mode */
> > >+    value = qdict_get_try_str(options, "blkverify");
> > >+    if (value && !strcmp(value, "on")  &&
> > >+        s->total == 2 && s->threshold == 2) {
> > >+        s->is_blkverify = true;
> > >+    }
> > 
> > Quote from October: If the user specifies anything different from
> > "on" or if he does and s->total or s->threshold is not 2, quorum
> > will silently work in non-blkverify mode without telling the user.
> > So I'd emit a message here if blkverify has been specified and its
> > value is different from "off" but s->is_blkverify remains false
> > (i.e., “else if (value && strcmp(value, "off")) { fprintf(stderr,
> > "[Some warning]"); }”).
> > 
> > >+    qdict_del(options, "blkverify");
> > >+
> > >+    /* allocate the children BlockDriverState array */
> > >+    s->bs = g_new0(BlockDriverState *, s->total);
> > >+    opened = g_new0(bool, s->total);
> > >+
> > >+    /* open children bs */
> > >+    for (ent = qdict_first(idx_dict);
> > >+         ent; ent = qdict_next(idx_dict, ent)) {
> > 
> > This condition fits on a single line.
> > 
> > >+        const char *key = qdict_entry_key(ent);
> > >+        int idx = qdict_get_int(idx_dict, key);
> > >+        ret = bdrv_open_image(&s->bs[idx],
> > >+                              NULL,
> > >+                              options,
> > >+                              key,
> > >+                              flags,
> > >+                              false,
> > >+                              &local_err);
> > 
> > This takes two, but definitely not seven.
> > 
> > >+        if (ret < 0) {
> > >+            goto close_exit;
> > >+        }
> > >+        opened[idx] = true;
> > >+    }
> > >+
> > >+    g_free(opened);
> > >+    goto exit;
> > >+
> > >+close_exit:
> > >+    /* cleanup on error */
> > >+    for (i = 0; i < s->total; i++) {
> > >+        if (!opened[i]) {
> > >+            continue;
> > >+        }
> > >+        bdrv_close(s->bs[i]);
> > 
> > You'll probably want bdrv_unref() here.
> 
> I don't think so because to simplify memory management s->bs is an array
> containing all the allocated bs and is freed at once.
> So should bdrv_unref still be used ?
I double checked you are right :)

> 
> > 
> > >+    }
> > >+    g_free(s->bs);
> > >+    g_free(opened);
> > >+exit:
> > >+    /* propagate error */
> > >+    if (error_is_set(&local_err)) {
> > >+        error_propagate(errp, local_err);
> > >+    }
> > >+    return ret;
> > >+}
> > >+
> > >+static void quorum_close(BlockDriverState *bs)
> > >+{
> > >+    BDRVQuorumState *s = bs->opaque;
> > >+    int i;
> > >+
> > >+    for (i = 0; i < s->total; i++) {
> > >+        /* Ensure writes reach stable storage */
> > >+        bdrv_flush(s->bs[i]);
> > >+        /* close manually because we'll free s->bs */
> > >+        bdrv_close(s->bs[i]);
> > 
> > Again, bdrv_unref().
> > 
> > >+    }
> > >+
> > >+    g_free(s->bs);
> > >+}
> > >+
> > >  static BlockDriver bdrv_quorum = {
> > >      .format_name        = "quorum",
> > >      .protocol_name      = "quorum",
> > >      .instance_size      = sizeof(BDRVQuorumState),
> > >+    .bdrv_file_open     = quorum_open,
> > >+    .bdrv_close         = quorum_close,
> > >+
> > >      .bdrv_co_flush_to_disk = quorum_co_flush,
> > >      .bdrv_getlength     = quorum_getlength,
> > >diff --git a/qapi-schema.json b/qapi-schema.json
> > >index 05ced9d..903a3a0 100644
> > >--- a/qapi-schema.json
> > >+++ b/qapi-schema.json
> > >@@ -4352,6 +4352,24 @@
> > >              'raw': 'BlockdevRef' } }
> > >  ##
> > >+# @BlockdevOptionsQuorum
> > >+#
> > >+# Driver specific block device options for Quorum
> > >+#
> > >+# @blkverify:      #optional true if the driver must print content mismatch
> > >+#
> > >+# @children:       the children block device to use
> > >+#
> > >+# @vote_threshold: the vote limit under which a read will fail
> > >+#
> > >+# Since: 2.0
> > >+##
> > >+{ 'type': 'BlockdevOptionsQuorum',
> > >+  'data': { '*blkverify': 'bool',
> > >+            'children': [ 'BlockdevRef' ],
> > >+            'vote-threshold': 'int' } }
> > >+
> > >+##
> > >  # @BlockdevOptions
> > >  #
> > >  # Options for creating a block device.
> > >@@ -4389,7 +4407,8 @@
> > >        'vdi':        'BlockdevOptionsGenericFormat',
> > >        'vhdx':       'BlockdevOptionsGenericFormat',
> > >        'vmdk':       'BlockdevOptionsGenericCOWFormat',
> > >-      'vpc':        'BlockdevOptionsGenericFormat'
> > >+      'vpc':        'BlockdevOptionsGenericFormat',
> > >+      'quorum':     'BlockdevOptionsQuorum'
> > >    } }
> > >  ##
> > 
> > As I've said before, I'd really like it if you could abandon all
> > those functions for parsing the incoming options QDict in favor of
> > qdict_extract_subqdict() and qdict_array_split() which will most
> > likely do the job just fine; and, in fact, I have written
> > qdict_array_split() with Quorum in mind. ;-)
> > 
> > Max
> > 
>
diff mbox

Patch

diff --git a/block/quorum.c b/block/quorum.c
index e7b2090..0c0d630 100644
--- a/block/quorum.c
+++ b/block/quorum.c
@@ -17,8 +17,12 @@ 
 #include <gnutls/crypto.h>
 #include "block/block_int.h"
 #include "qapi/qmp/qjson.h"
+#include "qapi/qmp/types.h"
+#include "qemu-common.h"
 
 #define HASH_LENGTH 32
+#define KEY_PREFIX "children."
+#define KEY_FILENAME_SUFFIX ".file.filename"
 
 /* This union holds a vote hash value */
 typedef union QuorumVoteValue {
@@ -702,12 +706,316 @@  static bool quorum_recurse_is_first_non_filter(BlockDriverState *bs,
     return false;
 }
 
+static int quorum_match_key(const char *key,
+                            char **key_prefix)
+{
+    const char *start;
+    char *next;
+    unsigned long long idx;
+    int ret;
+
+    *key_prefix = NULL;
+
+    /* the following code is a translation of the following pseudo code:
+     *       match = key.match('(^children\.(\d+)\.)$suffix)
+     *       if not match:
+     *           return -1;
+     *       key_prefix = match.outer_match()
+     *       idx = match.inner_match()
+     *
+     * note: we also match the .file suffix to avoid futher checkings
+     */
+
+    /* this key is not a child */
+    if (strncmp(key, KEY_PREFIX, strlen(KEY_PREFIX))) {
+        return -1;
+    }
+
+    /* take first char after prefix */
+    start = key + strlen(KEY_PREFIX);
+
+    /* if the string end here -> scan fail */
+    if (start[0] == '\0') {
+        return -1;
+    }
+
+    /* try to retrieve the index */
+    ret = parse_uint(start, &idx, &next, 10);
+
+    /* no int found -> scan fail */
+    if (ret < 0) {
+        return -1;
+    }
+
+    /* we are taking a reference via QMP */
+    if (next - key == strlen(key)) {
+        *key_prefix = g_strdup(key);
+        return idx;
+    }
+
+    /* match the suffix to avoid matching the same idx
+     * multiple times and be required to do more checks later
+     */
+    if (strncmp(next, KEY_FILENAME_SUFFIX, strlen(KEY_FILENAME_SUFFIX))) {
+        return -1;
+    }
+
+    /* do not include '.' */
+    int len = next - key;
+    *key_prefix = g_strndup(key, len);
+
+    return idx;
+}
+
+static QDict *quorum_get_children_idx(const QDict *options)
+{
+    const QDictEntry *ent;
+    QDict *result;
+    char *key_prefix;
+    int idx;
+
+    result = qdict_new();
+
+    for (ent = qdict_first(options); ent; ent = qdict_next(options, ent)) {
+        const char *key = qdict_entry_key(ent);
+        idx = quorum_match_key(key,
+                               &key_prefix);
+
+        /* if the result zero or positive we got a key */
+        if (idx < 0) {
+            continue;
+        }
+
+        qdict_put(result, key_prefix, qint_from_int(idx));
+    }
+
+    return result;
+}
+
+static int quorum_fill_validation_array(bool *array,
+                                        const QDict *dict,
+                                        int total,
+                                        Error **errp)
+{
+    const QDictEntry *ent;
+
+    /* fill the checking array with children indexes */
+    for (ent = qdict_first(dict); ent; ent = qdict_next(dict, ent)) {
+        const char *key = qdict_entry_key(ent);
+        int idx = qdict_get_int(dict, key);
+
+        if (idx < 0 || idx >= total) {
+            error_setg(errp,
+                "Children index must be between 0 and children count -1");
+            return -ERANGE;
+        }
+
+        array[idx] = true;
+    }
+
+    return 0;
+}
+
+static int quorum_valid_indexes(const bool *array, int total, Error **errp)
+{
+    int i;
+
+    for (i = 0; i < total; i++) {
+        if (array[i] == true) {
+            continue;
+        }
+
+        error_setg(errp,
+            "All child indexes between 0 and children count -1  must be "
+            " used");
+        return -ERANGE;
+    }
+
+    return 0;
+}
+
+static int quorum_valid_children_indexes(const QDict *dict,
+                                         int total,
+                                         Error **errp)
+{
+    bool *array;
+    int ret = 0;;
+
+    /* allocate indexes checking array and put false in it */
+    array = g_new0(bool, total);
+
+    ret = quorum_fill_validation_array(array, dict, total, errp);
+    if (ret < 0) {
+        goto free_exit;
+    }
+
+    ret = quorum_valid_indexes(array, total, errp);
+free_exit:
+    g_free(array);
+    return ret;
+}
+
+static int quorum_valid_threshold(int threshold,
+                                  int total,
+                                  Error **errp)
+{
+
+    if (threshold < 1) {
+        error_set(errp, QERR_INVALID_PARAMETER_VALUE,
+                  "vote-threshold", "value >= 1");
+        return -ERANGE;
+    }
+
+    if (threshold > total) {
+        error_setg(errp, "threshold <= children count must be true");
+        return -ERANGE;
+    }
+
+    return 0;
+}
+
+static int quorum_open(BlockDriverState *bs,
+                       QDict *options,
+                       int flags,
+                       Error **errp)
+{
+    BDRVQuorumState *s = bs->opaque;
+    Error *local_err = NULL;
+    const QDictEntry *ent;
+    QDict *idx_dict;
+    bool *opened;
+    const char *value;
+    char *next;
+    int i;
+    int ret = 0;
+
+    /* get a dict of children indexes for validation */
+    idx_dict = quorum_get_children_idx(options);
+
+    /* count how many different children indexes are present and validate */
+    s->total = qdict_size(idx_dict);
+    if (s->total < 2) {
+        error_setg(&local_err,
+                   "Number of provided children must be greater than 1");
+        ret = -EINVAL;
+        goto exit;
+    }
+
+    /* validate that the set of index is coherent */
+    ret = quorum_valid_children_indexes(idx_dict, s->total, &local_err);
+    if (ret < 0) {
+        goto exit;
+    }
+
+    ret = qdict_get_try_int(options, "vote-threshold", -1);
+    /* from QMP */
+    if (ret != -1) {
+        qdict_del(options, "vote-threshold");
+        s->threshold = ret;
+    /* from command line */
+    } else {
+        /* retrieve the threshold option from the command line */
+        value = qdict_get_try_str(options, "vote_threshold");
+        if (!value) {
+            error_setg(&local_err,
+                       "vote_threshold must be provided");
+            ret = -EINVAL;
+            goto exit;
+        }
+        qdict_del(options, "vote_threshold");
+
+        ret = parse_uint(value, (unsigned long long *) &s->threshold, &next, 10);
+
+        /* no int found -> scan fail */
+        if (ret < 0) {
+            error_setg(&local_err,
+                       "invalid voter_threshold specified");
+            ret = -EINVAL;
+            goto exit;
+        }
+    }
+
+    /* and validate it againts s->total */
+    ret = quorum_valid_threshold(s->threshold, s->total, &local_err);
+    if (ret < 0) {
+        goto exit;
+    }
+
+    /* is the driver in blkverify mode */
+    value = qdict_get_try_str(options, "blkverify");
+    if (value && !strcmp(value, "on")  &&
+        s->total == 2 && s->threshold == 2) {
+        s->is_blkverify = true;
+    }
+    qdict_del(options, "blkverify");
+
+    /* allocate the children BlockDriverState array */
+    s->bs = g_new0(BlockDriverState *, s->total);
+    opened = g_new0(bool, s->total);
+
+    /* open children bs */
+    for (ent = qdict_first(idx_dict);
+         ent; ent = qdict_next(idx_dict, ent)) {
+        const char *key = qdict_entry_key(ent);
+        int idx = qdict_get_int(idx_dict, key);
+        ret = bdrv_open_image(&s->bs[idx],
+                              NULL,
+                              options,
+                              key,
+                              flags,
+                              false,
+                              &local_err);
+        if (ret < 0) {
+            goto close_exit;
+        }
+        opened[idx] = true;
+    }
+
+    g_free(opened);
+    goto exit;
+
+close_exit:
+    /* cleanup on error */
+    for (i = 0; i < s->total; i++) {
+        if (!opened[i]) {
+            continue;
+        }
+        bdrv_close(s->bs[i]);
+    }
+    g_free(s->bs);
+    g_free(opened);
+exit:
+    /* propagate error */
+    if (error_is_set(&local_err)) {
+        error_propagate(errp, local_err);
+    }
+    return ret;
+}
+
+static void quorum_close(BlockDriverState *bs)
+{
+    BDRVQuorumState *s = bs->opaque;
+    int i;
+
+    for (i = 0; i < s->total; i++) {
+        /* Ensure writes reach stable storage */
+        bdrv_flush(s->bs[i]);
+        /* close manually because we'll free s->bs */
+        bdrv_close(s->bs[i]);
+    }
+
+    g_free(s->bs);
+}
+
 static BlockDriver bdrv_quorum = {
     .format_name        = "quorum",
     .protocol_name      = "quorum",
 
     .instance_size      = sizeof(BDRVQuorumState),
 
+    .bdrv_file_open     = quorum_open,
+    .bdrv_close         = quorum_close,
+
     .bdrv_co_flush_to_disk = quorum_co_flush,
 
     .bdrv_getlength     = quorum_getlength,
diff --git a/qapi-schema.json b/qapi-schema.json
index 05ced9d..903a3a0 100644
--- a/qapi-schema.json
+++ b/qapi-schema.json
@@ -4352,6 +4352,24 @@ 
             'raw': 'BlockdevRef' } }
 
 ##
+# @BlockdevOptionsQuorum
+#
+# Driver specific block device options for Quorum
+#
+# @blkverify:      #optional true if the driver must print content mismatch
+#
+# @children:       the children block device to use
+#
+# @vote_threshold: the vote limit under which a read will fail
+#
+# Since: 2.0
+##
+{ 'type': 'BlockdevOptionsQuorum',
+  'data': { '*blkverify': 'bool',
+            'children': [ 'BlockdevRef' ],
+            'vote-threshold': 'int' } }
+
+##
 # @BlockdevOptions
 #
 # Options for creating a block device.
@@ -4389,7 +4407,8 @@ 
       'vdi':        'BlockdevOptionsGenericFormat',
       'vhdx':       'BlockdevOptionsGenericFormat',
       'vmdk':       'BlockdevOptionsGenericCOWFormat',
-      'vpc':        'BlockdevOptionsGenericFormat'
+      'vpc':        'BlockdevOptionsGenericFormat',
+      'quorum':     'BlockdevOptionsQuorum'
   } }
 
 ##