diff mbox

[V9,06/11] quorum: Add quorum mechanism.

Message ID 1380717564-11098-7-git-send-email-benoit@irqsave.net
State New
Headers show

Commit Message

Benoît Canet Oct. 2, 2013, 12:39 p.m. UTC
Use gnutls's SHA-256 to compare versions.

Signed-off-by: Benoit Canet <benoit@irqsave.net>
---
 block/Makefile.objs       |   2 +-
 block/quorum.c            | 321 +++++++++++++++++++++++++++++++++++++++++++++-
 configure                 |  36 ++++++
 include/monitor/monitor.h |   2 +
 monitor.c                 |   2 +
 5 files changed, 361 insertions(+), 2 deletions(-)

Comments

Max Reitz Oct. 4, 2013, 2:48 p.m. UTC | #1
On 2013-10-02 14:39, Benoît Canet wrote:
> Use gnutls's SHA-256 to compare versions.
Wouldn't CRC32 suffice? (I don't really oppose using SHA, but taking in 
gnutls as a dependency just for comparing several memory areas seems a 
bit much to me)

> Signed-off-by: Benoit Canet <benoit@irqsave.net>
> ---
>   block/Makefile.objs       |   2 +-
>   block/quorum.c            | 321 +++++++++++++++++++++++++++++++++++++++++++++-
>   configure                 |  36 ++++++
>   include/monitor/monitor.h |   2 +
>   monitor.c                 |   2 +
>   5 files changed, 361 insertions(+), 2 deletions(-)
>
> diff --git a/block/Makefile.objs b/block/Makefile.objs
> index 05a65c2..adcdc21 100644
> --- a/block/Makefile.objs
> +++ b/block/Makefile.objs
> @@ -3,7 +3,7 @@ block-obj-y += qcow2.o qcow2-refcount.o qcow2-cluster.o qcow2-snapshot.o qcow2-c
>   block-obj-y += qed.o qed-gencb.o qed-l2-cache.o qed-table.o qed-cluster.o
>   block-obj-y += qed-check.o
>   block-obj-y += vhdx.o
> -block-obj-y += quorum.o
> +block-obj-$(CONFIG_QUORUM) += quorum.o
>   block-obj-y += parallels.o blkdebug.o blkverify.o
>   block-obj-y += snapshot.o qapi.o
>   block-obj-$(CONFIG_WIN32) += raw-win32.o win32-aio.o
> diff --git a/block/quorum.c b/block/quorum.c
> index f0fc0e9..e235ac1 100644
> --- a/block/quorum.c
> +++ b/block/quorum.c
> @@ -13,7 +13,43 @@
>    * See the COPYING file in the top-level directory.
>    */
>   
> +#include <gnutls/gnutls.h>
> +#include <gnutls/crypto.h>
>   #include "block/block_int.h"
> +#include "qapi/qmp/qjson.h"
> +
> +#define HASH_LENGTH 32
> +
> +/* This union hold a vote hash value */
*holds

> +typedef union QuorumVoteValue {
> +    char h[HASH_LENGTH];       /* SHA-256 hash */
> +    int64_t l;                 /* simpler 64 bits hash */
> +} QuorumVoteValue;
> +
> +/* A vote item */
> +typedef struct QuorumVoteItem {
> +    int index;
> +    QLIST_ENTRY(QuorumVoteItem) next;
> +} QuorumVoteItem;
> +
> +/* this structure is a vote version. A version is the set of vote sharing the
*set of votes

> + * same vote value.
> + * The set of vote will be tracked with the items field and it's count is
*set of votes or *vote set; also s/it's count/its cardinality/ or 
something like that

> + * vote_count.
> + */
> +typedef struct QuorumVoteVersion {
> +    QuorumVoteValue value;
> +    int index;
> +    int vote_count;
> +    QLIST_HEAD(, QuorumVoteItem) items;
> +    QLIST_ENTRY(QuorumVoteVersion) next;
> +} QuorumVoteVersion;
> +
> +/* this structure hold a group of vote versions together */
*holds

> +typedef struct QuorumVotes {
> +    QLIST_HEAD(, QuorumVoteVersion) vote_list;
> +    int (*compare)(QuorumVoteValue *a, QuorumVoteValue *b);
> +} QuorumVotes;
>   
>   /* the following structure hold the state of one quorum instance */
>   typedef struct {
> @@ -60,10 +96,14 @@ struct QuorumAIOCB {
>       int success_count;          /* number of successfully completed AIOCB */
>       bool *finished;             /* completion signal for cancel */
>   
> +    QuorumVotes votes;
> +
>       bool is_read;
>       int vote_ret;
>   };
>   
> +static void quorum_vote(QuorumAIOCB *acb);
> +
>   static void quorum_aio_cancel(BlockDriverAIOCB *blockacb)
>   {
>       QuorumAIOCB *acb = container_of(blockacb, QuorumAIOCB, common);
> @@ -111,6 +151,10 @@ static void quorum_aio_finalize(QuorumAIOCB *acb)
>           acb->aios[i].ret = 0;
>       }
>   
> +    if (acb->vote_ret) {
> +        ret = acb->vote_ret;
> +    }
> +
>       acb->common.cb(acb->common.opaque, ret);
>       if (acb->finished) {
>           *acb->finished = true;
> @@ -122,6 +166,11 @@ static void quorum_aio_finalize(QuorumAIOCB *acb)
>       qemu_aio_release(acb);
>   }
>   
> +static int quorum_sha256_compare(QuorumVoteValue *a, QuorumVoteValue *b)
> +{
> +    return memcmp(a->h, b->h, HASH_LENGTH);
> +}
> +
>   static QuorumAIOCB *quorum_aio_get(BDRVQuorumState *s,
>                                      BlockDriverState *bs,
>                                      QEMUIOVector *qiov,
> @@ -141,6 +190,7 @@ static QuorumAIOCB *quorum_aio_get(BDRVQuorumState *s,
>       acb->count = 0;
>       acb->success_count = 0;
>       acb->finished = NULL;
> +    acb->votes.compare = quorum_sha256_compare;
>       acb->is_read = false;
>       acb->vote_ret = 0;
>   
> @@ -170,9 +220,278 @@ static void quorum_aio_cb(void *opaque, int ret)
>           return;
>       }
>   
> +    /* Do the vote on read */
> +    if (acb->is_read) {
> +        quorum_vote(acb);
> +    }
> +
>       quorum_aio_finalize(acb);
>   }
>   
> +static void quorum_report_bad(QuorumAIOCB *acb, int index)
> +{
> +    QObject *data;
> +    data = qobject_from_jsonf("{ 'children-index': %i"
I'd prefer child-index. Generally, remember that the singular of 
"children" is "child".

> +                              ", 'sector-num': %" PRId64
> +                              ", 'sectors-count': %i }",
> +                              index,
> +                              acb->sector_num,
> +                              acb->nb_sectors);
> +    monitor_protocol_event(QEVENT_QUORUM_REPORT_BAD, data);
How about decrementing the refcount of data?

> +}
> +
> +static void quorum_report_failure(QuorumAIOCB *acb)
> +{
> +    QObject *data;
> +    data = qobject_from_jsonf("{ 'sector-num': %" PRId64
> +                              ", 'sectors-count': %i }",
> +                              acb->sector_num,
> +                              acb->nb_sectors);
> +    monitor_protocol_event(QEVENT_QUORUM_FAILURE, data);
Same here.

> +}
> +
> +static void quorum_report_bad_versions(QuorumAIOCB *acb,
> +                                      QuorumVoteValue *value)
> +{
> +    QuorumVoteVersion *version;
> +    QuorumVoteItem *item;
> +
> +    QLIST_FOREACH(version, &acb->votes.vote_list, next) {
> +        if (!acb->votes.compare(&version->value, value)) {
> +            continue;
> +        }
> +        QLIST_FOREACH(item, &version->items, next) {
> +            quorum_report_bad(acb, item->index);
> +        }
> +    }
> +}
> +
> +static void quorum_copy_qiov(QEMUIOVector *dest, QEMUIOVector *source)
> +{
> +    int i;
> +    assert(dest->niov == source->niov);
> +    assert(dest->size == source->size);
> +    for (i = 0; i < source->niov; i++) {
> +        assert(dest->iov[i].iov_len == source->iov[i].iov_len);
> +        memcpy(dest->iov[i].iov_base,
> +               source->iov[i].iov_base,
> +               source->iov[i].iov_len);
> +    }
> +}
> +
> +static void quorum_count_vote(QuorumVotes *votes,
> +                              QuorumVoteValue *value,
> +                              int index)
> +{
> +    QuorumVoteVersion *v = NULL, *version = NULL;
> +    QuorumVoteItem *item;
> +
> +    /* look if we have something with this hash */
> +    QLIST_FOREACH(v, &votes->vote_list, next) {
> +        if (!votes->compare(&v->value, value)) {
> +            version = v;
> +            break;
> +        }
> +    }
> +
> +    /* It's a version not yet in the list add it */
> +    if (!version) {
> +        version = g_new0(QuorumVoteVersion, 1);
> +        QLIST_INIT(&version->items);
> +        memcpy(&version->value, value, sizeof(version->value));
> +        version->index = index;
> +        version->vote_count = 0;
> +        QLIST_INSERT_HEAD(&votes->vote_list, version, next);
> +    }
> +
> +    version->vote_count++;
> +
> +    item = g_new0(QuorumVoteItem, 1);
> +    item->index = index;
> +    QLIST_INSERT_HEAD(&version->items, item, next);
> +}
> +
> +static void quorum_free_vote_list(QuorumVotes *votes)
> +{
> +    QuorumVoteVersion *version, *next_version;
> +    QuorumVoteItem *item, *next_item;
> +
> +    QLIST_FOREACH_SAFE(version, &votes->vote_list, next, next_version) {
> +        QLIST_REMOVE(version, next);
> +        QLIST_FOREACH_SAFE(item, &version->items, next, next_item) {
> +            QLIST_REMOVE(item, next);
> +            g_free(item);
> +        }
> +        g_free(version);
> +    }
> +}
> +
> +static int quorum_compute_hash(QuorumAIOCB *acb, int i, QuorumVoteValue *hash)
> +{
> +    int j, ret;
> +    gnutls_hash_hd_t dig;
> +    QEMUIOVector *qiov = &acb->aios[i].qiov;
> +
> +    ret = gnutls_hash_init(&dig, GNUTLS_DIG_SHA256);
> +
> +    if (ret < 0) {
> +        return ret;
> +    }
> +
> +    for (j = 0; j < qiov->niov; j++) {
> +        ret = gnutls_hash(dig, qiov->iov[j].iov_base, qiov->iov[j].iov_len);
> +        if (ret < 0) {
> +            return ret;
gnutls_hash_deinit?

> +        }
> +    }
> +
> +    gnutls_hash_deinit(dig, (void *) hash);
> +
> +    return 0;
> +}
> +
> +static QuorumVoteVersion *quorum_get_vote_winner(QuorumVotes *votes)
> +{
> +    int i = 0;
> +    QuorumVoteVersion *candidate, *winner = NULL;
> +
> +    QLIST_FOREACH(candidate, &votes->vote_list, next) {
> +        if (candidate->vote_count > i) {
> +            i = candidate->vote_count;
> +            winner = candidate;
> +        }
> +    }
> +
> +    return winner;
> +}
> +
> +static bool quorum_iovec_compare(QEMUIOVector *a, QEMUIOVector *b)
> +{
> +    int i;
> +    int result;
> +
> +    assert(a->niov == b->niov);
> +    for (i = 0; i < a->niov; i++) {
> +        assert(a->iov[i].iov_len == b->iov[i].iov_len);
> +        result = memcmp(a->iov[i].iov_base,
> +                        b->iov[i].iov_base,
> +                        a->iov[i].iov_len);
> +        if (result) {
> +            return false;
> +        }
> +    }
> +
> +    return true;
> +}
> +
> +static void GCC_FMT_ATTR(2, 3) quorum_err(QuorumAIOCB *acb,
> +                                          const char *fmt, ...)
> +{
> +    va_list ap;
> +
> +    va_start(ap, fmt);
> +    fprintf(stderr, "quorum: sector_num=%" PRId64 " nb_sectors=%d ",
> +            acb->sector_num, acb->nb_sectors);
> +    vfprintf(stderr, fmt, ap);
> +    fprintf(stderr, "\n");
> +    va_end(ap);
> +    exit(1);
> +}
> +
> +static bool quorum_compare(QuorumAIOCB *acb,
> +                           QEMUIOVector *a,
> +                           QEMUIOVector *b)
> +{
> +    BDRVQuorumState *s = acb->bqs;
> +    ssize_t offset;
> +
> +    /* This driver will replace blkverify in this particular case */
> +    if (s->is_blkverify) {
> +        offset = qemu_iovec_compare(a, b);
> +        if (offset != -1) {
> +            quorum_err(acb, "contents mismatch in sector %" PRId64,
> +                       acb->sector_num +
> +                       (uint64_t)(offset / BDRV_SECTOR_SIZE));
> +        }
> +        return true;
> +    }
> +
> +    return quorum_iovec_compare(a, b);
> +}
> +
> +static void quorum_vote(QuorumAIOCB *acb)
> +{
> +    bool quorum = true;
> +    int i, j, ret;
> +    QuorumVoteValue hash;
> +    BDRVQuorumState *s = acb->bqs;
> +    QuorumVoteVersion *winner;
> +
> +    /* get the index of the first successful read */
> +    for (i = 0; i < s->total; i++) {
> +        if (!acb->aios[i].ret) {
> +            break;
> +        }
> +    }
> +
> +    /* compare this read with all other successful read looking for quorum */
> +    for (j = i + 1; j < s->total; j++) {
> +        if (acb->aios[j].ret) {
> +            continue;
> +        }
> +        quorum = quorum_compare(acb, &acb->aios[i].qiov, &acb->aios[j].qiov);
> +        if (!quorum) {
> +            break;
> +       }
> +    }
> +
> +    /* Every successful read agrees -> Quorum */
> +    if (quorum) {
> +        quorum_copy_qiov(acb->qiov, &acb->aios[i].qiov);
If no reads were successful at all, quorum will be true and i will be 
s->total. Therefore, this will result in an array access with an index 
out of bounds here.

Generally, why is this function executed at all if any read failed? If a 
read fails, the quorum read function will return an error, thus the 
caller will ignore the result anyway.

> +        return;
> +    }
> +
> +    /* compute hashs for each successful read, also store indexes */
> +    for (i = 0; i < s->total; i++) {
> +        if (acb->aios[i].ret) {
> +            continue;
> +        }
> +        ret = quorum_compute_hash(acb, i, &hash);
> +        /* if ever the hash computation failed */
> +        if (ret < 0) {
> +            acb->vote_ret = ret;
> +            goto free_exit;
> +        }
> +        quorum_count_vote(&acb->votes, &hash, i);
> +    }
> +
> +    /* vote to select the most represented version */
> +    winner = quorum_get_vote_winner(&acb->votes);
> +    /* every vote version are differents -> error */
> +    if (!winner) {
> +        quorum_report_failure(acb);
> +        acb->vote_ret = -EIO;
> +        goto free_exit;
> +    }
> +
> +    /* if the winner count is smaller then threshold read fail */
s/then/than/, s/ read fail/, the read fails/

> +    if (winner->vote_count < s->threshold) {
> +        quorum_report_failure(acb);
> +        acb->vote_ret = -EIO;
> +        goto free_exit;
> +    }
> +
> +    /* we have a winner: copy it */
> +    quorum_copy_qiov(acb->qiov, &acb->aios[winner->index].qiov);
> +
> +    /* some versions are bad print them */
> +    quorum_report_bad_versions(acb, &winner->value);
> +
> +free_exit:
> +    /* free lists */
> +    quorum_free_vote_list(&acb->votes);
> +}
> +
>   static BlockDriverAIOCB *quorum_aio_readv(BlockDriverState *bs,
>                                            int64_t sector_num,
>                                            QEMUIOVector *qiov,
> @@ -194,7 +513,7 @@ static BlockDriverAIOCB *quorum_aio_readv(BlockDriverState *bs,
>       }
>   
>       for (i = 0; i < s->total; i++) {
> -        bdrv_aio_readv(&s->bs[i], sector_num, qiov, nb_sectors,
> +        bdrv_aio_readv(&s->bs[i], sector_num, &acb->aios[i].qiov, nb_sectors,
>                          quorum_aio_cb, &acb->aios[i]);
>       }
>   
> diff --git a/configure b/configure
> index 23dbaaf..efbe7d9 100755
> --- a/configure
> +++ b/configure
> @@ -246,6 +246,7 @@ gtk=""
>   gtkabi="2.0"
>   tpm="no"
>   libssh2=""
> +quorum="no"
>   
>   # parse CC options first
>   for opt do
> @@ -972,6 +973,10 @@ for opt do
>     ;;
>     --enable-libssh2) libssh2="yes"
>     ;;
> +  --disable-quorum) quorum="no"
> +  ;;
> +  --enable-quorum) quorum="yes"
> +  ;;
>     *) echo "ERROR: unknown option $opt"; show_help="yes"
>     ;;
>     esac
> @@ -1203,6 +1208,8 @@ echo "  --gcov=GCOV              use specified gcov [$gcov_tool]"
>   echo "  --enable-tpm             enable TPM support"
>   echo "  --disable-libssh2        disable ssh block device support"
>   echo "  --enable-libssh2         enable ssh block device support"
> +echo "  --disable-quorum         disable quorum block filter support"
> +echo "  --enable-quorum          enable quorum block filter support"
>   echo ""
>   echo "NOTE: The object files are built at the place where configure is launched"
>   exit 1
> @@ -1895,6 +1902,30 @@ EOF
>   fi
>   
>   ##########################################
> +# Quorum probe (check for gnutls)
> +if test "$quorum" != "no" ; then
> +cat > $TMPC <<EOF
> +#include <gnutls/gnutls.h>
> +#include <gnutls/crypto.h>
> +int main(void) {char data[4096], digest[32];
> +gnutls_hash_fast(GNUTLS_DIG_SHA256, data, 4096, digest);
> +return 0;
> +}
> +EOF
> +quorum_tls_cflags=`$pkg_config --cflags gnutls 2> /dev/null`
> +quorum_tls_libs=`$pkg_config --libs gnutls 2> /dev/null`
> +if compile_prog "$quorum_tls_cflags" "$quorum_tls_libs" ; then
> +  qcow_tls=yes
> +  libs_softmmu="$quorum_tls_libs $libs_softmmu"
> +  libs_tools="$quorum_tls_libs $libs_softmmu"
> +  QEMU_CFLAGS="$QEMU_CFLAGS $quorum_tls_cflags"
> +else
> +  echo "gnutls > 2.10.0 required to compile Quorum"
> +  exit 1
> +fi
> +fi
> +
> +##########################################
>   # VNC SASL detection
>   if test "$vnc" = "yes" -a "$vnc_sasl" != "no" ; then
>     cat > $TMPC <<EOF
> @@ -3758,6 +3789,7 @@ echo "TPM support       $tpm"
>   echo "libssh2 support   $libssh2"
>   echo "TPM passthrough   $tpm_passthrough"
>   echo "QOM debugging     $qom_cast_debug"
> +echo "Quorum            $quorum"
>   
>   if test "$sdl_too_old" = "yes"; then
>   echo "-> Your SDL version is too old - please upgrade to have SDL support"
> @@ -4156,6 +4188,10 @@ if test "$libssh2" = "yes" ; then
>     echo "CONFIG_LIBSSH2=y" >> $config_host_mak
>   fi
>   
> +if test "$quorum" = "yes" ; then
> +  echo "CONFIG_QUORUM=y" >> $config_host_mak
> +fi
> +
>   if test "$virtio_blk_data_plane" = "yes" ; then
>     echo 'CONFIG_VIRTIO_BLK_DATA_PLANE=$(CONFIG_VIRTIO)' >> $config_host_mak
>   fi
> diff --git a/include/monitor/monitor.h b/include/monitor/monitor.h
> index 10fa0e3..51902ed 100644
> --- a/include/monitor/monitor.h
> +++ b/include/monitor/monitor.h
> @@ -49,6 +49,8 @@ typedef enum MonitorEvent {
>       QEVENT_SPICE_MIGRATE_COMPLETED,
>       QEVENT_GUEST_PANICKED,
>       QEVENT_BLOCK_IMAGE_CORRUPTED,
> +    QEVENT_QUORUM_FAILURE,
> +    QEVENT_QUORUM_REPORT_BAD,
>   
>       /* Add to 'monitor_event_names' array in monitor.c when
>        * defining new events here */
> diff --git a/monitor.c b/monitor.c
> index 74f3f1b..89d139f 100644
> --- a/monitor.c
> +++ b/monitor.c
> @@ -507,6 +507,8 @@ static const char *monitor_event_names[] = {
>       [QEVENT_SPICE_MIGRATE_COMPLETED] = "SPICE_MIGRATE_COMPLETED",
>       [QEVENT_GUEST_PANICKED] = "GUEST_PANICKED",
>       [QEVENT_BLOCK_IMAGE_CORRUPTED] = "BLOCK_IMAGE_CORRUPTED",
> +    [QEVENT_QUORUM_FAILURE] = "QUORUM_FAILURE",
> +    [QEVENT_QUORUM_REPORT_BAD] = "QUORUM_REPORT_BAD",
Could you document these events in docs/qmp/qmp-events.txt?

Max

>   };
>   QEMU_BUILD_BUG_ON(ARRAY_SIZE(monitor_event_names) != QEVENT_MAX)
>
Benoît Canet Oct. 28, 2013, 12:31 p.m. UTC | #2
Le Friday 04 Oct 2013 à 16:48:12 (+0200), Max Reitz a écrit :
> On 2013-10-02 14:39, Benoît Canet wrote:
> >Use gnutls's SHA-256 to compare versions.
> Wouldn't CRC32 suffice? (I don't really oppose using SHA, but taking
> in gnutls as a dependency just for comparing several memory areas
> seems a bit much to me)

Initially it gzip's addler32 was used but someone was concerned with the risk
of collisions.
Anyway the code fallback using hashes only when something wrong is detected so
it won't impact the normal case.

Best regards

Benoît

> 
> >Signed-off-by: Benoit Canet <benoit@irqsave.net>
> >---
> >  block/Makefile.objs       |   2 +-
> >  block/quorum.c            | 321 +++++++++++++++++++++++++++++++++++++++++++++-
> >  configure                 |  36 ++++++
> >  include/monitor/monitor.h |   2 +
> >  monitor.c                 |   2 +
> >  5 files changed, 361 insertions(+), 2 deletions(-)
> >
> >diff --git a/block/Makefile.objs b/block/Makefile.objs
> >index 05a65c2..adcdc21 100644
> >--- a/block/Makefile.objs
> >+++ b/block/Makefile.objs
> >@@ -3,7 +3,7 @@ block-obj-y += qcow2.o qcow2-refcount.o qcow2-cluster.o qcow2-snapshot.o qcow2-c
> >  block-obj-y += qed.o qed-gencb.o qed-l2-cache.o qed-table.o qed-cluster.o
> >  block-obj-y += qed-check.o
> >  block-obj-y += vhdx.o
> >-block-obj-y += quorum.o
> >+block-obj-$(CONFIG_QUORUM) += quorum.o
> >  block-obj-y += parallels.o blkdebug.o blkverify.o
> >  block-obj-y += snapshot.o qapi.o
> >  block-obj-$(CONFIG_WIN32) += raw-win32.o win32-aio.o
> >diff --git a/block/quorum.c b/block/quorum.c
> >index f0fc0e9..e235ac1 100644
> >--- a/block/quorum.c
> >+++ b/block/quorum.c
> >@@ -13,7 +13,43 @@
> >   * See the COPYING file in the top-level directory.
> >   */
> >+#include <gnutls/gnutls.h>
> >+#include <gnutls/crypto.h>
> >  #include "block/block_int.h"
> >+#include "qapi/qmp/qjson.h"
> >+
> >+#define HASH_LENGTH 32
> >+
> >+/* This union hold a vote hash value */
> *holds
> 
> >+typedef union QuorumVoteValue {
> >+    char h[HASH_LENGTH];       /* SHA-256 hash */
> >+    int64_t l;                 /* simpler 64 bits hash */
> >+} QuorumVoteValue;
> >+
> >+/* A vote item */
> >+typedef struct QuorumVoteItem {
> >+    int index;
> >+    QLIST_ENTRY(QuorumVoteItem) next;
> >+} QuorumVoteItem;
> >+
> >+/* this structure is a vote version. A version is the set of vote sharing the
> *set of votes
> 
> >+ * same vote value.
> >+ * The set of vote will be tracked with the items field and it's count is
> *set of votes or *vote set; also s/it's count/its cardinality/ or
> something like that
> 
> >+ * vote_count.
> >+ */
> >+typedef struct QuorumVoteVersion {
> >+    QuorumVoteValue value;
> >+    int index;
> >+    int vote_count;
> >+    QLIST_HEAD(, QuorumVoteItem) items;
> >+    QLIST_ENTRY(QuorumVoteVersion) next;
> >+} QuorumVoteVersion;
> >+
> >+/* this structure hold a group of vote versions together */
> *holds
> 
> >+typedef struct QuorumVotes {
> >+    QLIST_HEAD(, QuorumVoteVersion) vote_list;
> >+    int (*compare)(QuorumVoteValue *a, QuorumVoteValue *b);
> >+} QuorumVotes;
> >  /* the following structure hold the state of one quorum instance */
> >  typedef struct {
> >@@ -60,10 +96,14 @@ struct QuorumAIOCB {
> >      int success_count;          /* number of successfully completed AIOCB */
> >      bool *finished;             /* completion signal for cancel */
> >+    QuorumVotes votes;
> >+
> >      bool is_read;
> >      int vote_ret;
> >  };
> >+static void quorum_vote(QuorumAIOCB *acb);
> >+
> >  static void quorum_aio_cancel(BlockDriverAIOCB *blockacb)
> >  {
> >      QuorumAIOCB *acb = container_of(blockacb, QuorumAIOCB, common);
> >@@ -111,6 +151,10 @@ static void quorum_aio_finalize(QuorumAIOCB *acb)
> >          acb->aios[i].ret = 0;
> >      }
> >+    if (acb->vote_ret) {
> >+        ret = acb->vote_ret;
> >+    }
> >+
> >      acb->common.cb(acb->common.opaque, ret);
> >      if (acb->finished) {
> >          *acb->finished = true;
> >@@ -122,6 +166,11 @@ static void quorum_aio_finalize(QuorumAIOCB *acb)
> >      qemu_aio_release(acb);
> >  }
> >+static int quorum_sha256_compare(QuorumVoteValue *a, QuorumVoteValue *b)
> >+{
> >+    return memcmp(a->h, b->h, HASH_LENGTH);
> >+}
> >+
> >  static QuorumAIOCB *quorum_aio_get(BDRVQuorumState *s,
> >                                     BlockDriverState *bs,
> >                                     QEMUIOVector *qiov,
> >@@ -141,6 +190,7 @@ static QuorumAIOCB *quorum_aio_get(BDRVQuorumState *s,
> >      acb->count = 0;
> >      acb->success_count = 0;
> >      acb->finished = NULL;
> >+    acb->votes.compare = quorum_sha256_compare;
> >      acb->is_read = false;
> >      acb->vote_ret = 0;
> >@@ -170,9 +220,278 @@ static void quorum_aio_cb(void *opaque, int ret)
> >          return;
> >      }
> >+    /* Do the vote on read */
> >+    if (acb->is_read) {
> >+        quorum_vote(acb);
> >+    }
> >+
> >      quorum_aio_finalize(acb);
> >  }
> >+static void quorum_report_bad(QuorumAIOCB *acb, int index)
> >+{
> >+    QObject *data;
> >+    data = qobject_from_jsonf("{ 'children-index': %i"
> I'd prefer child-index. Generally, remember that the singular of
> "children" is "child".
> 
> >+                              ", 'sector-num': %" PRId64
> >+                              ", 'sectors-count': %i }",
> >+                              index,
> >+                              acb->sector_num,
> >+                              acb->nb_sectors);
> >+    monitor_protocol_event(QEVENT_QUORUM_REPORT_BAD, data);
> How about decrementing the refcount of data?
> 
> >+}
> >+
> >+static void quorum_report_failure(QuorumAIOCB *acb)
> >+{
> >+    QObject *data;
> >+    data = qobject_from_jsonf("{ 'sector-num': %" PRId64
> >+                              ", 'sectors-count': %i }",
> >+                              acb->sector_num,
> >+                              acb->nb_sectors);
> >+    monitor_protocol_event(QEVENT_QUORUM_FAILURE, data);
> Same here.
> 
> >+}
> >+
> >+static void quorum_report_bad_versions(QuorumAIOCB *acb,
> >+                                      QuorumVoteValue *value)
> >+{
> >+    QuorumVoteVersion *version;
> >+    QuorumVoteItem *item;
> >+
> >+    QLIST_FOREACH(version, &acb->votes.vote_list, next) {
> >+        if (!acb->votes.compare(&version->value, value)) {
> >+            continue;
> >+        }
> >+        QLIST_FOREACH(item, &version->items, next) {
> >+            quorum_report_bad(acb, item->index);
> >+        }
> >+    }
> >+}
> >+
> >+static void quorum_copy_qiov(QEMUIOVector *dest, QEMUIOVector *source)
> >+{
> >+    int i;
> >+    assert(dest->niov == source->niov);
> >+    assert(dest->size == source->size);
> >+    for (i = 0; i < source->niov; i++) {
> >+        assert(dest->iov[i].iov_len == source->iov[i].iov_len);
> >+        memcpy(dest->iov[i].iov_base,
> >+               source->iov[i].iov_base,
> >+               source->iov[i].iov_len);
> >+    }
> >+}
> >+
> >+static void quorum_count_vote(QuorumVotes *votes,
> >+                              QuorumVoteValue *value,
> >+                              int index)
> >+{
> >+    QuorumVoteVersion *v = NULL, *version = NULL;
> >+    QuorumVoteItem *item;
> >+
> >+    /* look if we have something with this hash */
> >+    QLIST_FOREACH(v, &votes->vote_list, next) {
> >+        if (!votes->compare(&v->value, value)) {
> >+            version = v;
> >+            break;
> >+        }
> >+    }
> >+
> >+    /* It's a version not yet in the list add it */
> >+    if (!version) {
> >+        version = g_new0(QuorumVoteVersion, 1);
> >+        QLIST_INIT(&version->items);
> >+        memcpy(&version->value, value, sizeof(version->value));
> >+        version->index = index;
> >+        version->vote_count = 0;
> >+        QLIST_INSERT_HEAD(&votes->vote_list, version, next);
> >+    }
> >+
> >+    version->vote_count++;
> >+
> >+    item = g_new0(QuorumVoteItem, 1);
> >+    item->index = index;
> >+    QLIST_INSERT_HEAD(&version->items, item, next);
> >+}
> >+
> >+static void quorum_free_vote_list(QuorumVotes *votes)
> >+{
> >+    QuorumVoteVersion *version, *next_version;
> >+    QuorumVoteItem *item, *next_item;
> >+
> >+    QLIST_FOREACH_SAFE(version, &votes->vote_list, next, next_version) {
> >+        QLIST_REMOVE(version, next);
> >+        QLIST_FOREACH_SAFE(item, &version->items, next, next_item) {
> >+            QLIST_REMOVE(item, next);
> >+            g_free(item);
> >+        }
> >+        g_free(version);
> >+    }
> >+}
> >+
> >+static int quorum_compute_hash(QuorumAIOCB *acb, int i, QuorumVoteValue *hash)
> >+{
> >+    int j, ret;
> >+    gnutls_hash_hd_t dig;
> >+    QEMUIOVector *qiov = &acb->aios[i].qiov;
> >+
> >+    ret = gnutls_hash_init(&dig, GNUTLS_DIG_SHA256);
> >+
> >+    if (ret < 0) {
> >+        return ret;
> >+    }
> >+
> >+    for (j = 0; j < qiov->niov; j++) {
> >+        ret = gnutls_hash(dig, qiov->iov[j].iov_base, qiov->iov[j].iov_len);
> >+        if (ret < 0) {
> >+            return ret;
> gnutls_hash_deinit?
> 
> >+        }
> >+    }
> >+
> >+    gnutls_hash_deinit(dig, (void *) hash);
> >+
> >+    return 0;
> >+}
> >+
> >+static QuorumVoteVersion *quorum_get_vote_winner(QuorumVotes *votes)
> >+{
> >+    int i = 0;
> >+    QuorumVoteVersion *candidate, *winner = NULL;
> >+
> >+    QLIST_FOREACH(candidate, &votes->vote_list, next) {
> >+        if (candidate->vote_count > i) {
> >+            i = candidate->vote_count;
> >+            winner = candidate;
> >+        }
> >+    }
> >+
> >+    return winner;
> >+}
> >+
> >+static bool quorum_iovec_compare(QEMUIOVector *a, QEMUIOVector *b)
> >+{
> >+    int i;
> >+    int result;
> >+
> >+    assert(a->niov == b->niov);
> >+    for (i = 0; i < a->niov; i++) {
> >+        assert(a->iov[i].iov_len == b->iov[i].iov_len);
> >+        result = memcmp(a->iov[i].iov_base,
> >+                        b->iov[i].iov_base,
> >+                        a->iov[i].iov_len);
> >+        if (result) {
> >+            return false;
> >+        }
> >+    }
> >+
> >+    return true;
> >+}
> >+
> >+static void GCC_FMT_ATTR(2, 3) quorum_err(QuorumAIOCB *acb,
> >+                                          const char *fmt, ...)
> >+{
> >+    va_list ap;
> >+
> >+    va_start(ap, fmt);
> >+    fprintf(stderr, "quorum: sector_num=%" PRId64 " nb_sectors=%d ",
> >+            acb->sector_num, acb->nb_sectors);
> >+    vfprintf(stderr, fmt, ap);
> >+    fprintf(stderr, "\n");
> >+    va_end(ap);
> >+    exit(1);
> >+}
> >+
> >+static bool quorum_compare(QuorumAIOCB *acb,
> >+                           QEMUIOVector *a,
> >+                           QEMUIOVector *b)
> >+{
> >+    BDRVQuorumState *s = acb->bqs;
> >+    ssize_t offset;
> >+
> >+    /* This driver will replace blkverify in this particular case */
> >+    if (s->is_blkverify) {
> >+        offset = qemu_iovec_compare(a, b);
> >+        if (offset != -1) {
> >+            quorum_err(acb, "contents mismatch in sector %" PRId64,
> >+                       acb->sector_num +
> >+                       (uint64_t)(offset / BDRV_SECTOR_SIZE));
> >+        }
> >+        return true;
> >+    }
> >+
> >+    return quorum_iovec_compare(a, b);
> >+}
> >+
> >+static void quorum_vote(QuorumAIOCB *acb)
> >+{
> >+    bool quorum = true;
> >+    int i, j, ret;
> >+    QuorumVoteValue hash;
> >+    BDRVQuorumState *s = acb->bqs;
> >+    QuorumVoteVersion *winner;
> >+
> >+    /* get the index of the first successful read */
> >+    for (i = 0; i < s->total; i++) {
> >+        if (!acb->aios[i].ret) {
> >+            break;
> >+        }
> >+    }
> >+
> >+    /* compare this read with all other successful read looking for quorum */
> >+    for (j = i + 1; j < s->total; j++) {
> >+        if (acb->aios[j].ret) {
> >+            continue;
> >+        }
> >+        quorum = quorum_compare(acb, &acb->aios[i].qiov, &acb->aios[j].qiov);
> >+        if (!quorum) {
> >+            break;
> >+       }
> >+    }
> >+
> >+    /* Every successful read agrees -> Quorum */
> >+    if (quorum) {
> >+        quorum_copy_qiov(acb->qiov, &acb->aios[i].qiov);
> If no reads were successful at all, quorum will be true and i will
> be s->total. Therefore, this will result in an array access with an
> index out of bounds here.
> 
> Generally, why is this function executed at all if any read failed?
> If a read fails, the quorum read function will return an error, thus
> the caller will ignore the result anyway.
> 
> >+        return;
> >+    }
> >+
> >+    /* compute hashs for each successful read, also store indexes */
> >+    for (i = 0; i < s->total; i++) {
> >+        if (acb->aios[i].ret) {
> >+            continue;
> >+        }
> >+        ret = quorum_compute_hash(acb, i, &hash);
> >+        /* if ever the hash computation failed */
> >+        if (ret < 0) {
> >+            acb->vote_ret = ret;
> >+            goto free_exit;
> >+        }
> >+        quorum_count_vote(&acb->votes, &hash, i);
> >+    }
> >+
> >+    /* vote to select the most represented version */
> >+    winner = quorum_get_vote_winner(&acb->votes);
> >+    /* every vote version are differents -> error */
> >+    if (!winner) {
> >+        quorum_report_failure(acb);
> >+        acb->vote_ret = -EIO;
> >+        goto free_exit;
> >+    }
> >+
> >+    /* if the winner count is smaller then threshold read fail */
> s/then/than/, s/ read fail/, the read fails/
> 
> >+    if (winner->vote_count < s->threshold) {
> >+        quorum_report_failure(acb);
> >+        acb->vote_ret = -EIO;
> >+        goto free_exit;
> >+    }
> >+
> >+    /* we have a winner: copy it */
> >+    quorum_copy_qiov(acb->qiov, &acb->aios[winner->index].qiov);
> >+
> >+    /* some versions are bad print them */
> >+    quorum_report_bad_versions(acb, &winner->value);
> >+
> >+free_exit:
> >+    /* free lists */
> >+    quorum_free_vote_list(&acb->votes);
> >+}
> >+
> >  static BlockDriverAIOCB *quorum_aio_readv(BlockDriverState *bs,
> >                                           int64_t sector_num,
> >                                           QEMUIOVector *qiov,
> >@@ -194,7 +513,7 @@ static BlockDriverAIOCB *quorum_aio_readv(BlockDriverState *bs,
> >      }
> >      for (i = 0; i < s->total; i++) {
> >-        bdrv_aio_readv(&s->bs[i], sector_num, qiov, nb_sectors,
> >+        bdrv_aio_readv(&s->bs[i], sector_num, &acb->aios[i].qiov, nb_sectors,
> >                         quorum_aio_cb, &acb->aios[i]);
> >      }
> >diff --git a/configure b/configure
> >index 23dbaaf..efbe7d9 100755
> >--- a/configure
> >+++ b/configure
> >@@ -246,6 +246,7 @@ gtk=""
> >  gtkabi="2.0"
> >  tpm="no"
> >  libssh2=""
> >+quorum="no"
> >  # parse CC options first
> >  for opt do
> >@@ -972,6 +973,10 @@ for opt do
> >    ;;
> >    --enable-libssh2) libssh2="yes"
> >    ;;
> >+  --disable-quorum) quorum="no"
> >+  ;;
> >+  --enable-quorum) quorum="yes"
> >+  ;;
> >    *) echo "ERROR: unknown option $opt"; show_help="yes"
> >    ;;
> >    esac
> >@@ -1203,6 +1208,8 @@ echo "  --gcov=GCOV              use specified gcov [$gcov_tool]"
> >  echo "  --enable-tpm             enable TPM support"
> >  echo "  --disable-libssh2        disable ssh block device support"
> >  echo "  --enable-libssh2         enable ssh block device support"
> >+echo "  --disable-quorum         disable quorum block filter support"
> >+echo "  --enable-quorum          enable quorum block filter support"
> >  echo ""
> >  echo "NOTE: The object files are built at the place where configure is launched"
> >  exit 1
> >@@ -1895,6 +1902,30 @@ EOF
> >  fi
> >  ##########################################
> >+# Quorum probe (check for gnutls)
> >+if test "$quorum" != "no" ; then
> >+cat > $TMPC <<EOF
> >+#include <gnutls/gnutls.h>
> >+#include <gnutls/crypto.h>
> >+int main(void) {char data[4096], digest[32];
> >+gnutls_hash_fast(GNUTLS_DIG_SHA256, data, 4096, digest);
> >+return 0;
> >+}
> >+EOF
> >+quorum_tls_cflags=`$pkg_config --cflags gnutls 2> /dev/null`
> >+quorum_tls_libs=`$pkg_config --libs gnutls 2> /dev/null`
> >+if compile_prog "$quorum_tls_cflags" "$quorum_tls_libs" ; then
> >+  qcow_tls=yes
> >+  libs_softmmu="$quorum_tls_libs $libs_softmmu"
> >+  libs_tools="$quorum_tls_libs $libs_softmmu"
> >+  QEMU_CFLAGS="$QEMU_CFLAGS $quorum_tls_cflags"
> >+else
> >+  echo "gnutls > 2.10.0 required to compile Quorum"
> >+  exit 1
> >+fi
> >+fi
> >+
> >+##########################################
> >  # VNC SASL detection
> >  if test "$vnc" = "yes" -a "$vnc_sasl" != "no" ; then
> >    cat > $TMPC <<EOF
> >@@ -3758,6 +3789,7 @@ echo "TPM support       $tpm"
> >  echo "libssh2 support   $libssh2"
> >  echo "TPM passthrough   $tpm_passthrough"
> >  echo "QOM debugging     $qom_cast_debug"
> >+echo "Quorum            $quorum"
> >  if test "$sdl_too_old" = "yes"; then
> >  echo "-> Your SDL version is too old - please upgrade to have SDL support"
> >@@ -4156,6 +4188,10 @@ if test "$libssh2" = "yes" ; then
> >    echo "CONFIG_LIBSSH2=y" >> $config_host_mak
> >  fi
> >+if test "$quorum" = "yes" ; then
> >+  echo "CONFIG_QUORUM=y" >> $config_host_mak
> >+fi
> >+
> >  if test "$virtio_blk_data_plane" = "yes" ; then
> >    echo 'CONFIG_VIRTIO_BLK_DATA_PLANE=$(CONFIG_VIRTIO)' >> $config_host_mak
> >  fi
> >diff --git a/include/monitor/monitor.h b/include/monitor/monitor.h
> >index 10fa0e3..51902ed 100644
> >--- a/include/monitor/monitor.h
> >+++ b/include/monitor/monitor.h
> >@@ -49,6 +49,8 @@ typedef enum MonitorEvent {
> >      QEVENT_SPICE_MIGRATE_COMPLETED,
> >      QEVENT_GUEST_PANICKED,
> >      QEVENT_BLOCK_IMAGE_CORRUPTED,
> >+    QEVENT_QUORUM_FAILURE,
> >+    QEVENT_QUORUM_REPORT_BAD,
> >      /* Add to 'monitor_event_names' array in monitor.c when
> >       * defining new events here */
> >diff --git a/monitor.c b/monitor.c
> >index 74f3f1b..89d139f 100644
> >--- a/monitor.c
> >+++ b/monitor.c
> >@@ -507,6 +507,8 @@ static const char *monitor_event_names[] = {
> >      [QEVENT_SPICE_MIGRATE_COMPLETED] = "SPICE_MIGRATE_COMPLETED",
> >      [QEVENT_GUEST_PANICKED] = "GUEST_PANICKED",
> >      [QEVENT_BLOCK_IMAGE_CORRUPTED] = "BLOCK_IMAGE_CORRUPTED",
> >+    [QEVENT_QUORUM_FAILURE] = "QUORUM_FAILURE",
> >+    [QEVENT_QUORUM_REPORT_BAD] = "QUORUM_REPORT_BAD",
> Could you document these events in docs/qmp/qmp-events.txt?
> 
> Max
> 
> >  };
> >  QEMU_BUILD_BUG_ON(ARRAY_SIZE(monitor_event_names) != QEVENT_MAX)
> 
>
Benoît Canet Oct. 28, 2013, 1:04 p.m. UTC | #3
Le Friday 04 Oct 2013 à 16:48:12 (+0200), Max Reitz a écrit :
> On 2013-10-02 14:39, Benoît Canet wrote:
> >Use gnutls's SHA-256 to compare versions.
> Wouldn't CRC32 suffice? (I don't really oppose using SHA, but taking
> in gnutls as a dependency just for comparing several memory areas
> seems a bit much to me)
> 
> >Signed-off-by: Benoit Canet <benoit@irqsave.net>
> >---
> >  block/Makefile.objs       |   2 +-
> >  block/quorum.c            | 321 +++++++++++++++++++++++++++++++++++++++++++++-
> >  configure                 |  36 ++++++
> >  include/monitor/monitor.h |   2 +
> >  monitor.c                 |   2 +
> >  5 files changed, 361 insertions(+), 2 deletions(-)
> >
> >diff --git a/block/Makefile.objs b/block/Makefile.objs
> >index 05a65c2..adcdc21 100644
> >--- a/block/Makefile.objs
> >+++ b/block/Makefile.objs
> >@@ -3,7 +3,7 @@ block-obj-y += qcow2.o qcow2-refcount.o qcow2-cluster.o qcow2-snapshot.o qcow2-c
> >  block-obj-y += qed.o qed-gencb.o qed-l2-cache.o qed-table.o qed-cluster.o
> >  block-obj-y += qed-check.o
> >  block-obj-y += vhdx.o
> >-block-obj-y += quorum.o
> >+block-obj-$(CONFIG_QUORUM) += quorum.o
> >  block-obj-y += parallels.o blkdebug.o blkverify.o
> >  block-obj-y += snapshot.o qapi.o
> >  block-obj-$(CONFIG_WIN32) += raw-win32.o win32-aio.o
> >diff --git a/block/quorum.c b/block/quorum.c
> >index f0fc0e9..e235ac1 100644
> >--- a/block/quorum.c
> >+++ b/block/quorum.c
> >@@ -13,7 +13,43 @@
> >   * See the COPYING file in the top-level directory.
> >   */
> >+#include <gnutls/gnutls.h>
> >+#include <gnutls/crypto.h>
> >  #include "block/block_int.h"
> >+#include "qapi/qmp/qjson.h"
> >+
> >+#define HASH_LENGTH 32
> >+
> >+/* This union hold a vote hash value */
> *holds
> 
> >+typedef union QuorumVoteValue {
> >+    char h[HASH_LENGTH];       /* SHA-256 hash */
> >+    int64_t l;                 /* simpler 64 bits hash */
> >+} QuorumVoteValue;
> >+
> >+/* A vote item */
> >+typedef struct QuorumVoteItem {
> >+    int index;
> >+    QLIST_ENTRY(QuorumVoteItem) next;
> >+} QuorumVoteItem;
> >+
> >+/* this structure is a vote version. A version is the set of vote sharing the
> *set of votes
> 
> >+ * same vote value.
> >+ * The set of vote will be tracked with the items field and it's count is
> *set of votes or *vote set; also s/it's count/its cardinality/ or
> something like that
> 
> >+ * vote_count.
> >+ */
> >+typedef struct QuorumVoteVersion {
> >+    QuorumVoteValue value;
> >+    int index;
> >+    int vote_count;
> >+    QLIST_HEAD(, QuorumVoteItem) items;
> >+    QLIST_ENTRY(QuorumVoteVersion) next;
> >+} QuorumVoteVersion;
> >+
> >+/* this structure hold a group of vote versions together */
> *holds
> 
> >+typedef struct QuorumVotes {
> >+    QLIST_HEAD(, QuorumVoteVersion) vote_list;
> >+    int (*compare)(QuorumVoteValue *a, QuorumVoteValue *b);
> >+} QuorumVotes;
> >  /* the following structure hold the state of one quorum instance */
> >  typedef struct {
> >@@ -60,10 +96,14 @@ struct QuorumAIOCB {
> >      int success_count;          /* number of successfully completed AIOCB */
> >      bool *finished;             /* completion signal for cancel */
> >+    QuorumVotes votes;
> >+
> >      bool is_read;
> >      int vote_ret;
> >  };
> >+static void quorum_vote(QuorumAIOCB *acb);
> >+
> >  static void quorum_aio_cancel(BlockDriverAIOCB *blockacb)
> >  {
> >      QuorumAIOCB *acb = container_of(blockacb, QuorumAIOCB, common);
> >@@ -111,6 +151,10 @@ static void quorum_aio_finalize(QuorumAIOCB *acb)
> >          acb->aios[i].ret = 0;
> >      }
> >+    if (acb->vote_ret) {
> >+        ret = acb->vote_ret;
> >+    }
> >+
> >      acb->common.cb(acb->common.opaque, ret);
> >      if (acb->finished) {
> >          *acb->finished = true;
> >@@ -122,6 +166,11 @@ static void quorum_aio_finalize(QuorumAIOCB *acb)
> >      qemu_aio_release(acb);
> >  }
> >+static int quorum_sha256_compare(QuorumVoteValue *a, QuorumVoteValue *b)
> >+{
> >+    return memcmp(a->h, b->h, HASH_LENGTH);
> >+}
> >+
> >  static QuorumAIOCB *quorum_aio_get(BDRVQuorumState *s,
> >                                     BlockDriverState *bs,
> >                                     QEMUIOVector *qiov,
> >@@ -141,6 +190,7 @@ static QuorumAIOCB *quorum_aio_get(BDRVQuorumState *s,
> >      acb->count = 0;
> >      acb->success_count = 0;
> >      acb->finished = NULL;
> >+    acb->votes.compare = quorum_sha256_compare;
> >      acb->is_read = false;
> >      acb->vote_ret = 0;
> >@@ -170,9 +220,278 @@ static void quorum_aio_cb(void *opaque, int ret)
> >          return;
> >      }
> >+    /* Do the vote on read */
> >+    if (acb->is_read) {
> >+        quorum_vote(acb);
> >+    }
> >+
> >      quorum_aio_finalize(acb);
> >  }
> >+static void quorum_report_bad(QuorumAIOCB *acb, int index)
> >+{
> >+    QObject *data;
> >+    data = qobject_from_jsonf("{ 'children-index': %i"
> I'd prefer child-index. Generally, remember that the singular of
> "children" is "child".
> 
> >+                              ", 'sector-num': %" PRId64
> >+                              ", 'sectors-count': %i }",
> >+                              index,
> >+                              acb->sector_num,
> >+                              acb->nb_sectors);
> >+    monitor_protocol_event(QEVENT_QUORUM_REPORT_BAD, data);
> How about decrementing the refcount of data?
> 
> >+}
> >+
> >+static void quorum_report_failure(QuorumAIOCB *acb)
> >+{
> >+    QObject *data;
> >+    data = qobject_from_jsonf("{ 'sector-num': %" PRId64
> >+                              ", 'sectors-count': %i }",
> >+                              acb->sector_num,
> >+                              acb->nb_sectors);
> >+    monitor_protocol_event(QEVENT_QUORUM_FAILURE, data);
> Same here.
> 
> >+}
> >+
> >+static void quorum_report_bad_versions(QuorumAIOCB *acb,
> >+                                      QuorumVoteValue *value)
> >+{
> >+    QuorumVoteVersion *version;
> >+    QuorumVoteItem *item;
> >+
> >+    QLIST_FOREACH(version, &acb->votes.vote_list, next) {
> >+        if (!acb->votes.compare(&version->value, value)) {
> >+            continue;
> >+        }
> >+        QLIST_FOREACH(item, &version->items, next) {
> >+            quorum_report_bad(acb, item->index);
> >+        }
> >+    }
> >+}
> >+
> >+static void quorum_copy_qiov(QEMUIOVector *dest, QEMUIOVector *source)
> >+{
> >+    int i;
> >+    assert(dest->niov == source->niov);
> >+    assert(dest->size == source->size);
> >+    for (i = 0; i < source->niov; i++) {
> >+        assert(dest->iov[i].iov_len == source->iov[i].iov_len);
> >+        memcpy(dest->iov[i].iov_base,
> >+               source->iov[i].iov_base,
> >+               source->iov[i].iov_len);
> >+    }
> >+}
> >+
> >+static void quorum_count_vote(QuorumVotes *votes,
> >+                              QuorumVoteValue *value,
> >+                              int index)
> >+{
> >+    QuorumVoteVersion *v = NULL, *version = NULL;
> >+    QuorumVoteItem *item;
> >+
> >+    /* look if we have something with this hash */
> >+    QLIST_FOREACH(v, &votes->vote_list, next) {
> >+        if (!votes->compare(&v->value, value)) {
> >+            version = v;
> >+            break;
> >+        }
> >+    }
> >+
> >+    /* It's a version not yet in the list add it */
> >+    if (!version) {
> >+        version = g_new0(QuorumVoteVersion, 1);
> >+        QLIST_INIT(&version->items);
> >+        memcpy(&version->value, value, sizeof(version->value));
> >+        version->index = index;
> >+        version->vote_count = 0;
> >+        QLIST_INSERT_HEAD(&votes->vote_list, version, next);
> >+    }
> >+
> >+    version->vote_count++;
> >+
> >+    item = g_new0(QuorumVoteItem, 1);
> >+    item->index = index;
> >+    QLIST_INSERT_HEAD(&version->items, item, next);
> >+}
> >+
> >+static void quorum_free_vote_list(QuorumVotes *votes)
> >+{
> >+    QuorumVoteVersion *version, *next_version;
> >+    QuorumVoteItem *item, *next_item;
> >+
> >+    QLIST_FOREACH_SAFE(version, &votes->vote_list, next, next_version) {
> >+        QLIST_REMOVE(version, next);
> >+        QLIST_FOREACH_SAFE(item, &version->items, next, next_item) {
> >+            QLIST_REMOVE(item, next);
> >+            g_free(item);
> >+        }
> >+        g_free(version);
> >+    }
> >+}
> >+
> >+static int quorum_compute_hash(QuorumAIOCB *acb, int i, QuorumVoteValue *hash)
> >+{
> >+    int j, ret;
> >+    gnutls_hash_hd_t dig;
> >+    QEMUIOVector *qiov = &acb->aios[i].qiov;
> >+
> >+    ret = gnutls_hash_init(&dig, GNUTLS_DIG_SHA256);
> >+
> >+    if (ret < 0) {
> >+        return ret;
> >+    }
> >+
> >+    for (j = 0; j < qiov->niov; j++) {
> >+        ret = gnutls_hash(dig, qiov->iov[j].iov_base, qiov->iov[j].iov_len);
> >+        if (ret < 0) {
> >+            return ret;
> gnutls_hash_deinit?
> 
> >+        }
> >+    }
> >+
> >+    gnutls_hash_deinit(dig, (void *) hash);
> >+
> >+    return 0;
> >+}
> >+
> >+static QuorumVoteVersion *quorum_get_vote_winner(QuorumVotes *votes)
> >+{
> >+    int i = 0;
> >+    QuorumVoteVersion *candidate, *winner = NULL;
> >+
> >+    QLIST_FOREACH(candidate, &votes->vote_list, next) {
> >+        if (candidate->vote_count > i) {
> >+            i = candidate->vote_count;
> >+            winner = candidate;
> >+        }
> >+    }
> >+
> >+    return winner;
> >+}
> >+
> >+static bool quorum_iovec_compare(QEMUIOVector *a, QEMUIOVector *b)
> >+{
> >+    int i;
> >+    int result;
> >+
> >+    assert(a->niov == b->niov);
> >+    for (i = 0; i < a->niov; i++) {
> >+        assert(a->iov[i].iov_len == b->iov[i].iov_len);
> >+        result = memcmp(a->iov[i].iov_base,
> >+                        b->iov[i].iov_base,
> >+                        a->iov[i].iov_len);
> >+        if (result) {
> >+            return false;
> >+        }
> >+    }
> >+
> >+    return true;
> >+}
> >+
> >+static void GCC_FMT_ATTR(2, 3) quorum_err(QuorumAIOCB *acb,
> >+                                          const char *fmt, ...)
> >+{
> >+    va_list ap;
> >+
> >+    va_start(ap, fmt);
> >+    fprintf(stderr, "quorum: sector_num=%" PRId64 " nb_sectors=%d ",
> >+            acb->sector_num, acb->nb_sectors);
> >+    vfprintf(stderr, fmt, ap);
> >+    fprintf(stderr, "\n");
> >+    va_end(ap);
> >+    exit(1);
> >+}
> >+
> >+static bool quorum_compare(QuorumAIOCB *acb,
> >+                           QEMUIOVector *a,
> >+                           QEMUIOVector *b)
> >+{
> >+    BDRVQuorumState *s = acb->bqs;
> >+    ssize_t offset;
> >+
> >+    /* This driver will replace blkverify in this particular case */
> >+    if (s->is_blkverify) {
> >+        offset = qemu_iovec_compare(a, b);
> >+        if (offset != -1) {
> >+            quorum_err(acb, "contents mismatch in sector %" PRId64,
> >+                       acb->sector_num +
> >+                       (uint64_t)(offset / BDRV_SECTOR_SIZE));
> >+        }
> >+        return true;
> >+    }
> >+
> >+    return quorum_iovec_compare(a, b);
> >+}
> >+
> >+static void quorum_vote(QuorumAIOCB *acb)
> >+{
> >+    bool quorum = true;
> >+    int i, j, ret;
> >+    QuorumVoteValue hash;
> >+    BDRVQuorumState *s = acb->bqs;
> >+    QuorumVoteVersion *winner;
> >+
> >+    /* get the index of the first successful read */
> >+    for (i = 0; i < s->total; i++) {
> >+        if (!acb->aios[i].ret) {
> >+            break;
> >+        }
> >+    }
> >+
> >+    /* compare this read with all other successful read looking for quorum */
> >+    for (j = i + 1; j < s->total; j++) {
> >+        if (acb->aios[j].ret) {
> >+            continue;
> >+        }
> >+        quorum = quorum_compare(acb, &acb->aios[i].qiov, &acb->aios[j].qiov);
> >+        if (!quorum) {
> >+            break;
> >+       }
> >+    }
> >+
> >+    /* Every successful read agrees -> Quorum */
> >+    if (quorum) {
> >+        quorum_copy_qiov(acb->qiov, &acb->aios[i].qiov);
> If no reads were successful at all, quorum will be true and i will
> be s->total. Therefore, this will result in an array access with an
> index out of bounds here.
Thanks a lot for spotting this.

> 
> Generally, why is this function executed at all if any read failed?
> If a read fails, the quorum read function will return an error, thus
> the caller will ignore the result anyway.

I should if there is a quorum between the successfull reads; if all the success
ful reads agrees and their count is greater than threshold.

> 
> >+        return;
> >+    }
> >+
> >+    /* compute hashs for each successful read, also store indexes */
> >+    for (i = 0; i < s->total; i++) {
> >+        if (acb->aios[i].ret) {
> >+            continue;
> >+        }
> >+        ret = quorum_compute_hash(acb, i, &hash);
> >+        /* if ever the hash computation failed */
> >+        if (ret < 0) {
> >+            acb->vote_ret = ret;
> >+            goto free_exit;
> >+        }
> >+        quorum_count_vote(&acb->votes, &hash, i);
> >+    }
> >+
> >+    /* vote to select the most represented version */
> >+    winner = quorum_get_vote_winner(&acb->votes);
> >+    /* every vote version are differents -> error */
> >+    if (!winner) {
> >+        quorum_report_failure(acb);
> >+        acb->vote_ret = -EIO;
> >+        goto free_exit;
> >+    }
> >+
> >+    /* if the winner count is smaller then threshold read fail */
> s/then/than/, s/ read fail/, the read fails/
> 
> >+    if (winner->vote_count < s->threshold) {
> >+        quorum_report_failure(acb);
> >+        acb->vote_ret = -EIO;
> >+        goto free_exit;
> >+    }
> >+
> >+    /* we have a winner: copy it */
> >+    quorum_copy_qiov(acb->qiov, &acb->aios[winner->index].qiov);
> >+
> >+    /* some versions are bad print them */
> >+    quorum_report_bad_versions(acb, &winner->value);
> >+
> >+free_exit:
> >+    /* free lists */
> >+    quorum_free_vote_list(&acb->votes);
> >+}
> >+
> >  static BlockDriverAIOCB *quorum_aio_readv(BlockDriverState *bs,
> >                                           int64_t sector_num,
> >                                           QEMUIOVector *qiov,
> >@@ -194,7 +513,7 @@ static BlockDriverAIOCB *quorum_aio_readv(BlockDriverState *bs,
> >      }
> >      for (i = 0; i < s->total; i++) {
> >-        bdrv_aio_readv(&s->bs[i], sector_num, qiov, nb_sectors,
> >+        bdrv_aio_readv(&s->bs[i], sector_num, &acb->aios[i].qiov, nb_sectors,
> >                         quorum_aio_cb, &acb->aios[i]);
> >      }
> >diff --git a/configure b/configure
> >index 23dbaaf..efbe7d9 100755
> >--- a/configure
> >+++ b/configure
> >@@ -246,6 +246,7 @@ gtk=""
> >  gtkabi="2.0"
> >  tpm="no"
> >  libssh2=""
> >+quorum="no"
> >  # parse CC options first
> >  for opt do
> >@@ -972,6 +973,10 @@ for opt do
> >    ;;
> >    --enable-libssh2) libssh2="yes"
> >    ;;
> >+  --disable-quorum) quorum="no"
> >+  ;;
> >+  --enable-quorum) quorum="yes"
> >+  ;;
> >    *) echo "ERROR: unknown option $opt"; show_help="yes"
> >    ;;
> >    esac
> >@@ -1203,6 +1208,8 @@ echo "  --gcov=GCOV              use specified gcov [$gcov_tool]"
> >  echo "  --enable-tpm             enable TPM support"
> >  echo "  --disable-libssh2        disable ssh block device support"
> >  echo "  --enable-libssh2         enable ssh block device support"
> >+echo "  --disable-quorum         disable quorum block filter support"
> >+echo "  --enable-quorum          enable quorum block filter support"
> >  echo ""
> >  echo "NOTE: The object files are built at the place where configure is launched"
> >  exit 1
> >@@ -1895,6 +1902,30 @@ EOF
> >  fi
> >  ##########################################
> >+# Quorum probe (check for gnutls)
> >+if test "$quorum" != "no" ; then
> >+cat > $TMPC <<EOF
> >+#include <gnutls/gnutls.h>
> >+#include <gnutls/crypto.h>
> >+int main(void) {char data[4096], digest[32];
> >+gnutls_hash_fast(GNUTLS_DIG_SHA256, data, 4096, digest);
> >+return 0;
> >+}
> >+EOF
> >+quorum_tls_cflags=`$pkg_config --cflags gnutls 2> /dev/null`
> >+quorum_tls_libs=`$pkg_config --libs gnutls 2> /dev/null`
> >+if compile_prog "$quorum_tls_cflags" "$quorum_tls_libs" ; then
> >+  qcow_tls=yes
> >+  libs_softmmu="$quorum_tls_libs $libs_softmmu"
> >+  libs_tools="$quorum_tls_libs $libs_softmmu"
> >+  QEMU_CFLAGS="$QEMU_CFLAGS $quorum_tls_cflags"
> >+else
> >+  echo "gnutls > 2.10.0 required to compile Quorum"
> >+  exit 1
> >+fi
> >+fi
> >+
> >+##########################################
> >  # VNC SASL detection
> >  if test "$vnc" = "yes" -a "$vnc_sasl" != "no" ; then
> >    cat > $TMPC <<EOF
> >@@ -3758,6 +3789,7 @@ echo "TPM support       $tpm"
> >  echo "libssh2 support   $libssh2"
> >  echo "TPM passthrough   $tpm_passthrough"
> >  echo "QOM debugging     $qom_cast_debug"
> >+echo "Quorum            $quorum"
> >  if test "$sdl_too_old" = "yes"; then
> >  echo "-> Your SDL version is too old - please upgrade to have SDL support"
> >@@ -4156,6 +4188,10 @@ if test "$libssh2" = "yes" ; then
> >    echo "CONFIG_LIBSSH2=y" >> $config_host_mak
> >  fi
> >+if test "$quorum" = "yes" ; then
> >+  echo "CONFIG_QUORUM=y" >> $config_host_mak
> >+fi
> >+
> >  if test "$virtio_blk_data_plane" = "yes" ; then
> >    echo 'CONFIG_VIRTIO_BLK_DATA_PLANE=$(CONFIG_VIRTIO)' >> $config_host_mak
> >  fi
> >diff --git a/include/monitor/monitor.h b/include/monitor/monitor.h
> >index 10fa0e3..51902ed 100644
> >--- a/include/monitor/monitor.h
> >+++ b/include/monitor/monitor.h
> >@@ -49,6 +49,8 @@ typedef enum MonitorEvent {
> >      QEVENT_SPICE_MIGRATE_COMPLETED,
> >      QEVENT_GUEST_PANICKED,
> >      QEVENT_BLOCK_IMAGE_CORRUPTED,
> >+    QEVENT_QUORUM_FAILURE,
> >+    QEVENT_QUORUM_REPORT_BAD,
> >      /* Add to 'monitor_event_names' array in monitor.c when
> >       * defining new events here */
> >diff --git a/monitor.c b/monitor.c
> >index 74f3f1b..89d139f 100644
> >--- a/monitor.c
> >+++ b/monitor.c
> >@@ -507,6 +507,8 @@ static const char *monitor_event_names[] = {
> >      [QEVENT_SPICE_MIGRATE_COMPLETED] = "SPICE_MIGRATE_COMPLETED",
> >      [QEVENT_GUEST_PANICKED] = "GUEST_PANICKED",
> >      [QEVENT_BLOCK_IMAGE_CORRUPTED] = "BLOCK_IMAGE_CORRUPTED",
> >+    [QEVENT_QUORUM_FAILURE] = "QUORUM_FAILURE",
> >+    [QEVENT_QUORUM_REPORT_BAD] = "QUORUM_REPORT_BAD",
> Could you document these events in docs/qmp/qmp-events.txt?
> 
> Max
> 
> >  };
> >  QEMU_BUILD_BUG_ON(ARRAY_SIZE(monitor_event_names) != QEVENT_MAX)
> 
>
Max Reitz Oct. 29, 2013, 4:42 p.m. UTC | #4
Am 28.10.2013 13:31, schrieb Benoît Canet:
> Le Friday 04 Oct 2013 à 16:48:12 (+0200), Max Reitz a écrit :
>> On 2013-10-02 14:39, Benoît Canet wrote:
>>> Use gnutls's SHA-256 to compare versions.
>> Wouldn't CRC32 suffice? (I don't really oppose using SHA, but taking
>> in gnutls as a dependency just for comparing several memory areas
>> seems a bit much to me)
> Initially it gzip's addler32 was used but someone was concerned with the risk
> of collisions.
> Anyway the code fallback using hashes only when something wrong is detected so
> it won't impact the normal case.
>
> Best regards
>
> Benoît

Yes, that's correct, but it adds a new dependency to qemu. Personally, I
am unable to decide whether this is better than having a higher risk of
collisions with CRC, so I'll leave the decision to someone more
qualified (like you). ;-)

Max
Max Reitz Oct. 29, 2013, 5:34 p.m. UTC | #5
Am 28.10.2013 14:04, schrieb Benoît Canet:
> Le Friday 04 Oct 2013 à 16:48:12 (+0200), Max Reitz a écrit :
>> On 2013-10-02 14:39, Benoît Canet wrote:
>>> Use gnutls's SHA-256 to compare versions.
>> Wouldn't CRC32 suffice? (I don't really oppose using SHA, but taking
>> in gnutls as a dependency just for comparing several memory areas
>> seems a bit much to me)
>>
>>> Signed-off-by: Benoit Canet <benoit@irqsave.net>
>>> ---
>>>  block/Makefile.objs       |   2 +-
>>>  block/quorum.c            | 321 +++++++++++++++++++++++++++++++++++++++++++++-
>>>  configure                 |  36 ++++++
>>>  include/monitor/monitor.h |   2 +
>>>  monitor.c                 |   2 +
>>>  5 files changed, 361 insertions(+), 2 deletions(-)
>>>
>>> diff --git a/block/Makefile.objs b/block/Makefile.objs
>>> index 05a65c2..adcdc21 100644
>>> --- a/block/Makefile.objs
>>> +++ b/block/Makefile.objs
>>> @@ -3,7 +3,7 @@ block-obj-y += qcow2.o qcow2-refcount.o qcow2-cluster.o qcow2-snapshot.o qcow2-c
>>>  block-obj-y += qed.o qed-gencb.o qed-l2-cache.o qed-table.o qed-cluster.o
>>>  block-obj-y += qed-check.o
>>>  block-obj-y += vhdx.o
>>> -block-obj-y += quorum.o
>>> +block-obj-$(CONFIG_QUORUM) += quorum.o
>>>  block-obj-y += parallels.o blkdebug.o blkverify.o
>>>  block-obj-y += snapshot.o qapi.o
>>>  block-obj-$(CONFIG_WIN32) += raw-win32.o win32-aio.o
>>> diff --git a/block/quorum.c b/block/quorum.c
>>> index f0fc0e9..e235ac1 100644
>>> --- a/block/quorum.c
>>> +++ b/block/quorum.c
>>> @@ -13,7 +13,43 @@
>>>   * See the COPYING file in the top-level directory.
>>>   */
>>> +#include <gnutls/gnutls.h>
>>> +#include <gnutls/crypto.h>
>>>  #include "block/block_int.h"
>>> +#include "qapi/qmp/qjson.h"
>>> +
>>> +#define HASH_LENGTH 32
>>> +
>>> +/* This union hold a vote hash value */
>> *holds
>>
>>> +typedef union QuorumVoteValue {
>>> +    char h[HASH_LENGTH];       /* SHA-256 hash */
>>> +    int64_t l;                 /* simpler 64 bits hash */
>>> +} QuorumVoteValue;
>>> +
>>> +/* A vote item */
>>> +typedef struct QuorumVoteItem {
>>> +    int index;
>>> +    QLIST_ENTRY(QuorumVoteItem) next;
>>> +} QuorumVoteItem;
>>> +
>>> +/* this structure is a vote version. A version is the set of vote sharing the
>> *set of votes
>>
>>> + * same vote value.
>>> + * The set of vote will be tracked with the items field and it's count is
>> *set of votes or *vote set; also s/it's count/its cardinality/ or
>> something like that
>>
>>> + * vote_count.
>>> + */
>>> +typedef struct QuorumVoteVersion {
>>> +    QuorumVoteValue value;
>>> +    int index;
>>> +    int vote_count;
>>> +    QLIST_HEAD(, QuorumVoteItem) items;
>>> +    QLIST_ENTRY(QuorumVoteVersion) next;
>>> +} QuorumVoteVersion;
>>> +
>>> +/* this structure hold a group of vote versions together */
>> *holds
>>
>>> +typedef struct QuorumVotes {
>>> +    QLIST_HEAD(, QuorumVoteVersion) vote_list;
>>> +    int (*compare)(QuorumVoteValue *a, QuorumVoteValue *b);
>>> +} QuorumVotes;
>>>  /* the following structure hold the state of one quorum instance */
>>>  typedef struct {
>>> @@ -60,10 +96,14 @@ struct QuorumAIOCB {
>>>      int success_count;          /* number of successfully completed AIOCB */
>>>      bool *finished;             /* completion signal for cancel */
>>> +    QuorumVotes votes;
>>> +
>>>      bool is_read;
>>>      int vote_ret;
>>>  };
>>> +static void quorum_vote(QuorumAIOCB *acb);
>>> +
>>>  static void quorum_aio_cancel(BlockDriverAIOCB *blockacb)
>>>  {
>>>      QuorumAIOCB *acb = container_of(blockacb, QuorumAIOCB, common);
>>> @@ -111,6 +151,10 @@ static void quorum_aio_finalize(QuorumAIOCB *acb)
>>>          acb->aios[i].ret = 0;
>>>      }
>>> +    if (acb->vote_ret) {
>>> +        ret = acb->vote_ret;
>>> +    }
>>> +
>>>      acb->common.cb(acb->common.opaque, ret);
>>>      if (acb->finished) {
>>>          *acb->finished = true;
>>> @@ -122,6 +166,11 @@ static void quorum_aio_finalize(QuorumAIOCB *acb)
>>>      qemu_aio_release(acb);
>>>  }
>>> +static int quorum_sha256_compare(QuorumVoteValue *a, QuorumVoteValue *b)
>>> +{
>>> +    return memcmp(a->h, b->h, HASH_LENGTH);
>>> +}
>>> +
>>>  static QuorumAIOCB *quorum_aio_get(BDRVQuorumState *s,
>>>                                     BlockDriverState *bs,
>>>                                     QEMUIOVector *qiov,
>>> @@ -141,6 +190,7 @@ static QuorumAIOCB *quorum_aio_get(BDRVQuorumState *s,
>>>      acb->count = 0;
>>>      acb->success_count = 0;
>>>      acb->finished = NULL;
>>> +    acb->votes.compare = quorum_sha256_compare;
>>>      acb->is_read = false;
>>>      acb->vote_ret = 0;
>>> @@ -170,9 +220,278 @@ static void quorum_aio_cb(void *opaque, int ret)
>>>          return;
>>>      }
>>> +    /* Do the vote on read */
>>> +    if (acb->is_read) {
>>> +        quorum_vote(acb);
>>> +    }
>>> +
>>>      quorum_aio_finalize(acb);
>>>  }
>>> +static void quorum_report_bad(QuorumAIOCB *acb, int index)
>>> +{
>>> +    QObject *data;
>>> +    data = qobject_from_jsonf("{ 'children-index': %i"
>> I'd prefer child-index. Generally, remember that the singular of
>> "children" is "child".
>>
>>> +                              ", 'sector-num': %" PRId64
>>> +                              ", 'sectors-count': %i }",
>>> +                              index,
>>> +                              acb->sector_num,
>>> +                              acb->nb_sectors);
>>> +    monitor_protocol_event(QEVENT_QUORUM_REPORT_BAD, data);
>> How about decrementing the refcount of data?
>>
>>> +}
>>> +
>>> +static void quorum_report_failure(QuorumAIOCB *acb)
>>> +{
>>> +    QObject *data;
>>> +    data = qobject_from_jsonf("{ 'sector-num': %" PRId64
>>> +                              ", 'sectors-count': %i }",
>>> +                              acb->sector_num,
>>> +                              acb->nb_sectors);
>>> +    monitor_protocol_event(QEVENT_QUORUM_FAILURE, data);
>> Same here.
>>
>>> +}
>>> +
>>> +static void quorum_report_bad_versions(QuorumAIOCB *acb,
>>> +                                      QuorumVoteValue *value)
>>> +{
>>> +    QuorumVoteVersion *version;
>>> +    QuorumVoteItem *item;
>>> +
>>> +    QLIST_FOREACH(version, &acb->votes.vote_list, next) {
>>> +        if (!acb->votes.compare(&version->value, value)) {
>>> +            continue;
>>> +        }
>>> +        QLIST_FOREACH(item, &version->items, next) {
>>> +            quorum_report_bad(acb, item->index);
>>> +        }
>>> +    }
>>> +}
>>> +
>>> +static void quorum_copy_qiov(QEMUIOVector *dest, QEMUIOVector *source)
>>> +{
>>> +    int i;
>>> +    assert(dest->niov == source->niov);
>>> +    assert(dest->size == source->size);
>>> +    for (i = 0; i < source->niov; i++) {
>>> +        assert(dest->iov[i].iov_len == source->iov[i].iov_len);
>>> +        memcpy(dest->iov[i].iov_base,
>>> +               source->iov[i].iov_base,
>>> +               source->iov[i].iov_len);
>>> +    }
>>> +}
>>> +
>>> +static void quorum_count_vote(QuorumVotes *votes,
>>> +                              QuorumVoteValue *value,
>>> +                              int index)
>>> +{
>>> +    QuorumVoteVersion *v = NULL, *version = NULL;
>>> +    QuorumVoteItem *item;
>>> +
>>> +    /* look if we have something with this hash */
>>> +    QLIST_FOREACH(v, &votes->vote_list, next) {
>>> +        if (!votes->compare(&v->value, value)) {
>>> +            version = v;
>>> +            break;
>>> +        }
>>> +    }
>>> +
>>> +    /* It's a version not yet in the list add it */
>>> +    if (!version) {
>>> +        version = g_new0(QuorumVoteVersion, 1);
>>> +        QLIST_INIT(&version->items);
>>> +        memcpy(&version->value, value, sizeof(version->value));
>>> +        version->index = index;
>>> +        version->vote_count = 0;
>>> +        QLIST_INSERT_HEAD(&votes->vote_list, version, next);
>>> +    }
>>> +
>>> +    version->vote_count++;
>>> +
>>> +    item = g_new0(QuorumVoteItem, 1);
>>> +    item->index = index;
>>> +    QLIST_INSERT_HEAD(&version->items, item, next);
>>> +}
>>> +
>>> +static void quorum_free_vote_list(QuorumVotes *votes)
>>> +{
>>> +    QuorumVoteVersion *version, *next_version;
>>> +    QuorumVoteItem *item, *next_item;
>>> +
>>> +    QLIST_FOREACH_SAFE(version, &votes->vote_list, next, next_version) {
>>> +        QLIST_REMOVE(version, next);
>>> +        QLIST_FOREACH_SAFE(item, &version->items, next, next_item) {
>>> +            QLIST_REMOVE(item, next);
>>> +            g_free(item);
>>> +        }
>>> +        g_free(version);
>>> +    }
>>> +}
>>> +
>>> +static int quorum_compute_hash(QuorumAIOCB *acb, int i, QuorumVoteValue *hash)
>>> +{
>>> +    int j, ret;
>>> +    gnutls_hash_hd_t dig;
>>> +    QEMUIOVector *qiov = &acb->aios[i].qiov;
>>> +
>>> +    ret = gnutls_hash_init(&dig, GNUTLS_DIG_SHA256);
>>> +
>>> +    if (ret < 0) {
>>> +        return ret;
>>> +    }
>>> +
>>> +    for (j = 0; j < qiov->niov; j++) {
>>> +        ret = gnutls_hash(dig, qiov->iov[j].iov_base, qiov->iov[j].iov_len);
>>> +        if (ret < 0) {
>>> +            return ret;
>> gnutls_hash_deinit?
>>
>>> +        }
>>> +    }
>>> +
>>> +    gnutls_hash_deinit(dig, (void *) hash);
>>> +
>>> +    return 0;
>>> +}
>>> +
>>> +static QuorumVoteVersion *quorum_get_vote_winner(QuorumVotes *votes)
>>> +{
>>> +    int i = 0;
>>> +    QuorumVoteVersion *candidate, *winner = NULL;
>>> +
>>> +    QLIST_FOREACH(candidate, &votes->vote_list, next) {
>>> +        if (candidate->vote_count > i) {
>>> +            i = candidate->vote_count;
>>> +            winner = candidate;
>>> +        }
>>> +    }
>>> +
>>> +    return winner;
>>> +}
>>> +
>>> +static bool quorum_iovec_compare(QEMUIOVector *a, QEMUIOVector *b)
>>> +{
>>> +    int i;
>>> +    int result;
>>> +
>>> +    assert(a->niov == b->niov);
>>> +    for (i = 0; i < a->niov; i++) {
>>> +        assert(a->iov[i].iov_len == b->iov[i].iov_len);
>>> +        result = memcmp(a->iov[i].iov_base,
>>> +                        b->iov[i].iov_base,
>>> +                        a->iov[i].iov_len);
>>> +        if (result) {
>>> +            return false;
>>> +        }
>>> +    }
>>> +
>>> +    return true;
>>> +}
>>> +
>>> +static void GCC_FMT_ATTR(2, 3) quorum_err(QuorumAIOCB *acb,
>>> +                                          const char *fmt, ...)
>>> +{
>>> +    va_list ap;
>>> +
>>> +    va_start(ap, fmt);
>>> +    fprintf(stderr, "quorum: sector_num=%" PRId64 " nb_sectors=%d ",
>>> +            acb->sector_num, acb->nb_sectors);
>>> +    vfprintf(stderr, fmt, ap);
>>> +    fprintf(stderr, "\n");
>>> +    va_end(ap);
>>> +    exit(1);
>>> +}
>>> +
>>> +static bool quorum_compare(QuorumAIOCB *acb,
>>> +                           QEMUIOVector *a,
>>> +                           QEMUIOVector *b)
>>> +{
>>> +    BDRVQuorumState *s = acb->bqs;
>>> +    ssize_t offset;
>>> +
>>> +    /* This driver will replace blkverify in this particular case */
>>> +    if (s->is_blkverify) {
>>> +        offset = qemu_iovec_compare(a, b);
>>> +        if (offset != -1) {
>>> +            quorum_err(acb, "contents mismatch in sector %" PRId64,
>>> +                       acb->sector_num +
>>> +                       (uint64_t)(offset / BDRV_SECTOR_SIZE));
>>> +        }
>>> +        return true;
>>> +    }
>>> +
>>> +    return quorum_iovec_compare(a, b);
>>> +}
>>> +
>>> +static void quorum_vote(QuorumAIOCB *acb)
>>> +{
>>> +    bool quorum = true;
>>> +    int i, j, ret;
>>> +    QuorumVoteValue hash;
>>> +    BDRVQuorumState *s = acb->bqs;
>>> +    QuorumVoteVersion *winner;
>>> +
>>> +    /* get the index of the first successful read */
>>> +    for (i = 0; i < s->total; i++) {
>>> +        if (!acb->aios[i].ret) {
>>> +            break;
>>> +        }
>>> +    }
>>> +
>>> +    /* compare this read with all other successful read looking for quorum */
>>> +    for (j = i + 1; j < s->total; j++) {
>>> +        if (acb->aios[j].ret) {
>>> +            continue;
>>> +        }
>>> +        quorum = quorum_compare(acb, &acb->aios[i].qiov, &acb->aios[j].qiov);
>>> +        if (!quorum) {
>>> +            break;
>>> +       }
>>> +    }
>>> +
>>> +    /* Every successful read agrees -> Quorum */
>>> +    if (quorum) {
>>> +        quorum_copy_qiov(acb->qiov, &acb->aios[i].qiov);
>> If no reads were successful at all, quorum will be true and i will
>> be s->total. Therefore, this will result in an array access with an
>> index out of bounds here.
> Thanks a lot for spotting this.
>
>> Generally, why is this function executed at all if any read failed?
>> If a read fails, the quorum read function will return an error, thus
>> the caller will ignore the result anyway.
> I should if there is a quorum between the successfull reads; if all the success
> ful reads agrees and their count is greater than threshold.

Ah, I see it now; quorum_aio_readv() returns "success" if more reads
than the threshould succeeded (and return the same result). I thought it
would fail as soon as just a single read operation failed – sorry, my fault.

Max
diff mbox

Patch

diff --git a/block/Makefile.objs b/block/Makefile.objs
index 05a65c2..adcdc21 100644
--- a/block/Makefile.objs
+++ b/block/Makefile.objs
@@ -3,7 +3,7 @@  block-obj-y += qcow2.o qcow2-refcount.o qcow2-cluster.o qcow2-snapshot.o qcow2-c
 block-obj-y += qed.o qed-gencb.o qed-l2-cache.o qed-table.o qed-cluster.o
 block-obj-y += qed-check.o
 block-obj-y += vhdx.o
-block-obj-y += quorum.o
+block-obj-$(CONFIG_QUORUM) += quorum.o
 block-obj-y += parallels.o blkdebug.o blkverify.o
 block-obj-y += snapshot.o qapi.o
 block-obj-$(CONFIG_WIN32) += raw-win32.o win32-aio.o
diff --git a/block/quorum.c b/block/quorum.c
index f0fc0e9..e235ac1 100644
--- a/block/quorum.c
+++ b/block/quorum.c
@@ -13,7 +13,43 @@ 
  * See the COPYING file in the top-level directory.
  */
 
+#include <gnutls/gnutls.h>
+#include <gnutls/crypto.h>
 #include "block/block_int.h"
+#include "qapi/qmp/qjson.h"
+
+#define HASH_LENGTH 32
+
+/* This union hold a vote hash value */
+typedef union QuorumVoteValue {
+    char h[HASH_LENGTH];       /* SHA-256 hash */
+    int64_t l;                 /* simpler 64 bits hash */
+} QuorumVoteValue;
+
+/* A vote item */
+typedef struct QuorumVoteItem {
+    int index;
+    QLIST_ENTRY(QuorumVoteItem) next;
+} QuorumVoteItem;
+
+/* this structure is a vote version. A version is the set of vote sharing the
+ * same vote value.
+ * The set of vote will be tracked with the items field and it's count is
+ * vote_count.
+ */
+typedef struct QuorumVoteVersion {
+    QuorumVoteValue value;
+    int index;
+    int vote_count;
+    QLIST_HEAD(, QuorumVoteItem) items;
+    QLIST_ENTRY(QuorumVoteVersion) next;
+} QuorumVoteVersion;
+
+/* this structure hold a group of vote versions together */
+typedef struct QuorumVotes {
+    QLIST_HEAD(, QuorumVoteVersion) vote_list;
+    int (*compare)(QuorumVoteValue *a, QuorumVoteValue *b);
+} QuorumVotes;
 
 /* the following structure hold the state of one quorum instance */
 typedef struct {
@@ -60,10 +96,14 @@  struct QuorumAIOCB {
     int success_count;          /* number of successfully completed AIOCB */
     bool *finished;             /* completion signal for cancel */
 
+    QuorumVotes votes;
+
     bool is_read;
     int vote_ret;
 };
 
+static void quorum_vote(QuorumAIOCB *acb);
+
 static void quorum_aio_cancel(BlockDriverAIOCB *blockacb)
 {
     QuorumAIOCB *acb = container_of(blockacb, QuorumAIOCB, common);
@@ -111,6 +151,10 @@  static void quorum_aio_finalize(QuorumAIOCB *acb)
         acb->aios[i].ret = 0;
     }
 
+    if (acb->vote_ret) {
+        ret = acb->vote_ret;
+    }
+
     acb->common.cb(acb->common.opaque, ret);
     if (acb->finished) {
         *acb->finished = true;
@@ -122,6 +166,11 @@  static void quorum_aio_finalize(QuorumAIOCB *acb)
     qemu_aio_release(acb);
 }
 
+static int quorum_sha256_compare(QuorumVoteValue *a, QuorumVoteValue *b)
+{
+    return memcmp(a->h, b->h, HASH_LENGTH);
+}
+
 static QuorumAIOCB *quorum_aio_get(BDRVQuorumState *s,
                                    BlockDriverState *bs,
                                    QEMUIOVector *qiov,
@@ -141,6 +190,7 @@  static QuorumAIOCB *quorum_aio_get(BDRVQuorumState *s,
     acb->count = 0;
     acb->success_count = 0;
     acb->finished = NULL;
+    acb->votes.compare = quorum_sha256_compare;
     acb->is_read = false;
     acb->vote_ret = 0;
 
@@ -170,9 +220,278 @@  static void quorum_aio_cb(void *opaque, int ret)
         return;
     }
 
+    /* Do the vote on read */
+    if (acb->is_read) {
+        quorum_vote(acb);
+    }
+
     quorum_aio_finalize(acb);
 }
 
+static void quorum_report_bad(QuorumAIOCB *acb, int index)
+{
+    QObject *data;
+    data = qobject_from_jsonf("{ 'children-index': %i"
+                              ", 'sector-num': %" PRId64
+                              ", 'sectors-count': %i }",
+                              index,
+                              acb->sector_num,
+                              acb->nb_sectors);
+    monitor_protocol_event(QEVENT_QUORUM_REPORT_BAD, data);
+}
+
+static void quorum_report_failure(QuorumAIOCB *acb)
+{
+    QObject *data;
+    data = qobject_from_jsonf("{ 'sector-num': %" PRId64
+                              ", 'sectors-count': %i }",
+                              acb->sector_num,
+                              acb->nb_sectors);
+    monitor_protocol_event(QEVENT_QUORUM_FAILURE, data);
+}
+
+static void quorum_report_bad_versions(QuorumAIOCB *acb,
+                                      QuorumVoteValue *value)
+{
+    QuorumVoteVersion *version;
+    QuorumVoteItem *item;
+
+    QLIST_FOREACH(version, &acb->votes.vote_list, next) {
+        if (!acb->votes.compare(&version->value, value)) {
+            continue;
+        }
+        QLIST_FOREACH(item, &version->items, next) {
+            quorum_report_bad(acb, item->index);
+        }
+    }
+}
+
+static void quorum_copy_qiov(QEMUIOVector *dest, QEMUIOVector *source)
+{
+    int i;
+    assert(dest->niov == source->niov);
+    assert(dest->size == source->size);
+    for (i = 0; i < source->niov; i++) {
+        assert(dest->iov[i].iov_len == source->iov[i].iov_len);
+        memcpy(dest->iov[i].iov_base,
+               source->iov[i].iov_base,
+               source->iov[i].iov_len);
+    }
+}
+
+static void quorum_count_vote(QuorumVotes *votes,
+                              QuorumVoteValue *value,
+                              int index)
+{
+    QuorumVoteVersion *v = NULL, *version = NULL;
+    QuorumVoteItem *item;
+
+    /* look if we have something with this hash */
+    QLIST_FOREACH(v, &votes->vote_list, next) {
+        if (!votes->compare(&v->value, value)) {
+            version = v;
+            break;
+        }
+    }
+
+    /* It's a version not yet in the list add it */
+    if (!version) {
+        version = g_new0(QuorumVoteVersion, 1);
+        QLIST_INIT(&version->items);
+        memcpy(&version->value, value, sizeof(version->value));
+        version->index = index;
+        version->vote_count = 0;
+        QLIST_INSERT_HEAD(&votes->vote_list, version, next);
+    }
+
+    version->vote_count++;
+
+    item = g_new0(QuorumVoteItem, 1);
+    item->index = index;
+    QLIST_INSERT_HEAD(&version->items, item, next);
+}
+
+static void quorum_free_vote_list(QuorumVotes *votes)
+{
+    QuorumVoteVersion *version, *next_version;
+    QuorumVoteItem *item, *next_item;
+
+    QLIST_FOREACH_SAFE(version, &votes->vote_list, next, next_version) {
+        QLIST_REMOVE(version, next);
+        QLIST_FOREACH_SAFE(item, &version->items, next, next_item) {
+            QLIST_REMOVE(item, next);
+            g_free(item);
+        }
+        g_free(version);
+    }
+}
+
+static int quorum_compute_hash(QuorumAIOCB *acb, int i, QuorumVoteValue *hash)
+{
+    int j, ret;
+    gnutls_hash_hd_t dig;
+    QEMUIOVector *qiov = &acb->aios[i].qiov;
+
+    ret = gnutls_hash_init(&dig, GNUTLS_DIG_SHA256);
+
+    if (ret < 0) {
+        return ret;
+    }
+
+    for (j = 0; j < qiov->niov; j++) {
+        ret = gnutls_hash(dig, qiov->iov[j].iov_base, qiov->iov[j].iov_len);
+        if (ret < 0) {
+            return ret;
+        }
+    }
+
+    gnutls_hash_deinit(dig, (void *) hash);
+
+    return 0;
+}
+
+static QuorumVoteVersion *quorum_get_vote_winner(QuorumVotes *votes)
+{
+    int i = 0;
+    QuorumVoteVersion *candidate, *winner = NULL;
+
+    QLIST_FOREACH(candidate, &votes->vote_list, next) {
+        if (candidate->vote_count > i) {
+            i = candidate->vote_count;
+            winner = candidate;
+        }
+    }
+
+    return winner;
+}
+
+static bool quorum_iovec_compare(QEMUIOVector *a, QEMUIOVector *b)
+{
+    int i;
+    int result;
+
+    assert(a->niov == b->niov);
+    for (i = 0; i < a->niov; i++) {
+        assert(a->iov[i].iov_len == b->iov[i].iov_len);
+        result = memcmp(a->iov[i].iov_base,
+                        b->iov[i].iov_base,
+                        a->iov[i].iov_len);
+        if (result) {
+            return false;
+        }
+    }
+
+    return true;
+}
+
+static void GCC_FMT_ATTR(2, 3) quorum_err(QuorumAIOCB *acb,
+                                          const char *fmt, ...)
+{
+    va_list ap;
+
+    va_start(ap, fmt);
+    fprintf(stderr, "quorum: sector_num=%" PRId64 " nb_sectors=%d ",
+            acb->sector_num, acb->nb_sectors);
+    vfprintf(stderr, fmt, ap);
+    fprintf(stderr, "\n");
+    va_end(ap);
+    exit(1);
+}
+
+static bool quorum_compare(QuorumAIOCB *acb,
+                           QEMUIOVector *a,
+                           QEMUIOVector *b)
+{
+    BDRVQuorumState *s = acb->bqs;
+    ssize_t offset;
+
+    /* This driver will replace blkverify in this particular case */
+    if (s->is_blkverify) {
+        offset = qemu_iovec_compare(a, b);
+        if (offset != -1) {
+            quorum_err(acb, "contents mismatch in sector %" PRId64,
+                       acb->sector_num +
+                       (uint64_t)(offset / BDRV_SECTOR_SIZE));
+        }
+        return true;
+    }
+
+    return quorum_iovec_compare(a, b);
+}
+
+static void quorum_vote(QuorumAIOCB *acb)
+{
+    bool quorum = true;
+    int i, j, ret;
+    QuorumVoteValue hash;
+    BDRVQuorumState *s = acb->bqs;
+    QuorumVoteVersion *winner;
+
+    /* get the index of the first successful read */
+    for (i = 0; i < s->total; i++) {
+        if (!acb->aios[i].ret) {
+            break;
+        }
+    }
+
+    /* compare this read with all other successful read looking for quorum */
+    for (j = i + 1; j < s->total; j++) {
+        if (acb->aios[j].ret) {
+            continue;
+        }
+        quorum = quorum_compare(acb, &acb->aios[i].qiov, &acb->aios[j].qiov);
+        if (!quorum) {
+            break;
+       }
+    }
+
+    /* Every successful read agrees -> Quorum */
+    if (quorum) {
+        quorum_copy_qiov(acb->qiov, &acb->aios[i].qiov);
+        return;
+    }
+
+    /* compute hashs for each successful read, also store indexes */
+    for (i = 0; i < s->total; i++) {
+        if (acb->aios[i].ret) {
+            continue;
+        }
+        ret = quorum_compute_hash(acb, i, &hash);
+        /* if ever the hash computation failed */
+        if (ret < 0) {
+            acb->vote_ret = ret;
+            goto free_exit;
+        }
+        quorum_count_vote(&acb->votes, &hash, i);
+    }
+
+    /* vote to select the most represented version */
+    winner = quorum_get_vote_winner(&acb->votes);
+    /* every vote version are differents -> error */
+    if (!winner) {
+        quorum_report_failure(acb);
+        acb->vote_ret = -EIO;
+        goto free_exit;
+    }
+
+    /* if the winner count is smaller then threshold read fail */
+    if (winner->vote_count < s->threshold) {
+        quorum_report_failure(acb);
+        acb->vote_ret = -EIO;
+        goto free_exit;
+    }
+
+    /* we have a winner: copy it */
+    quorum_copy_qiov(acb->qiov, &acb->aios[winner->index].qiov);
+
+    /* some versions are bad print them */
+    quorum_report_bad_versions(acb, &winner->value);
+
+free_exit:
+    /* free lists */
+    quorum_free_vote_list(&acb->votes);
+}
+
 static BlockDriverAIOCB *quorum_aio_readv(BlockDriverState *bs,
                                          int64_t sector_num,
                                          QEMUIOVector *qiov,
@@ -194,7 +513,7 @@  static BlockDriverAIOCB *quorum_aio_readv(BlockDriverState *bs,
     }
 
     for (i = 0; i < s->total; i++) {
-        bdrv_aio_readv(&s->bs[i], sector_num, qiov, nb_sectors,
+        bdrv_aio_readv(&s->bs[i], sector_num, &acb->aios[i].qiov, nb_sectors,
                        quorum_aio_cb, &acb->aios[i]);
     }
 
diff --git a/configure b/configure
index 23dbaaf..efbe7d9 100755
--- a/configure
+++ b/configure
@@ -246,6 +246,7 @@  gtk=""
 gtkabi="2.0"
 tpm="no"
 libssh2=""
+quorum="no"
 
 # parse CC options first
 for opt do
@@ -972,6 +973,10 @@  for opt do
   ;;
   --enable-libssh2) libssh2="yes"
   ;;
+  --disable-quorum) quorum="no"
+  ;;
+  --enable-quorum) quorum="yes"
+  ;;
   *) echo "ERROR: unknown option $opt"; show_help="yes"
   ;;
   esac
@@ -1203,6 +1208,8 @@  echo "  --gcov=GCOV              use specified gcov [$gcov_tool]"
 echo "  --enable-tpm             enable TPM support"
 echo "  --disable-libssh2        disable ssh block device support"
 echo "  --enable-libssh2         enable ssh block device support"
+echo "  --disable-quorum         disable quorum block filter support"
+echo "  --enable-quorum          enable quorum block filter support"
 echo ""
 echo "NOTE: The object files are built at the place where configure is launched"
 exit 1
@@ -1895,6 +1902,30 @@  EOF
 fi
 
 ##########################################
+# Quorum probe (check for gnutls)
+if test "$quorum" != "no" ; then
+cat > $TMPC <<EOF
+#include <gnutls/gnutls.h>
+#include <gnutls/crypto.h>
+int main(void) {char data[4096], digest[32];
+gnutls_hash_fast(GNUTLS_DIG_SHA256, data, 4096, digest);
+return 0;
+}
+EOF
+quorum_tls_cflags=`$pkg_config --cflags gnutls 2> /dev/null`
+quorum_tls_libs=`$pkg_config --libs gnutls 2> /dev/null`
+if compile_prog "$quorum_tls_cflags" "$quorum_tls_libs" ; then
+  qcow_tls=yes
+  libs_softmmu="$quorum_tls_libs $libs_softmmu"
+  libs_tools="$quorum_tls_libs $libs_softmmu"
+  QEMU_CFLAGS="$QEMU_CFLAGS $quorum_tls_cflags"
+else
+  echo "gnutls > 2.10.0 required to compile Quorum"
+  exit 1
+fi
+fi
+
+##########################################
 # VNC SASL detection
 if test "$vnc" = "yes" -a "$vnc_sasl" != "no" ; then
   cat > $TMPC <<EOF
@@ -3758,6 +3789,7 @@  echo "TPM support       $tpm"
 echo "libssh2 support   $libssh2"
 echo "TPM passthrough   $tpm_passthrough"
 echo "QOM debugging     $qom_cast_debug"
+echo "Quorum            $quorum"
 
 if test "$sdl_too_old" = "yes"; then
 echo "-> Your SDL version is too old - please upgrade to have SDL support"
@@ -4156,6 +4188,10 @@  if test "$libssh2" = "yes" ; then
   echo "CONFIG_LIBSSH2=y" >> $config_host_mak
 fi
 
+if test "$quorum" = "yes" ; then
+  echo "CONFIG_QUORUM=y" >> $config_host_mak
+fi
+
 if test "$virtio_blk_data_plane" = "yes" ; then
   echo 'CONFIG_VIRTIO_BLK_DATA_PLANE=$(CONFIG_VIRTIO)' >> $config_host_mak
 fi
diff --git a/include/monitor/monitor.h b/include/monitor/monitor.h
index 10fa0e3..51902ed 100644
--- a/include/monitor/monitor.h
+++ b/include/monitor/monitor.h
@@ -49,6 +49,8 @@  typedef enum MonitorEvent {
     QEVENT_SPICE_MIGRATE_COMPLETED,
     QEVENT_GUEST_PANICKED,
     QEVENT_BLOCK_IMAGE_CORRUPTED,
+    QEVENT_QUORUM_FAILURE,
+    QEVENT_QUORUM_REPORT_BAD,
 
     /* Add to 'monitor_event_names' array in monitor.c when
      * defining new events here */
diff --git a/monitor.c b/monitor.c
index 74f3f1b..89d139f 100644
--- a/monitor.c
+++ b/monitor.c
@@ -507,6 +507,8 @@  static const char *monitor_event_names[] = {
     [QEVENT_SPICE_MIGRATE_COMPLETED] = "SPICE_MIGRATE_COMPLETED",
     [QEVENT_GUEST_PANICKED] = "GUEST_PANICKED",
     [QEVENT_BLOCK_IMAGE_CORRUPTED] = "BLOCK_IMAGE_CORRUPTED",
+    [QEVENT_QUORUM_FAILURE] = "QUORUM_FAILURE",
+    [QEVENT_QUORUM_REPORT_BAD] = "QUORUM_REPORT_BAD",
 };
 QEMU_BUILD_BUG_ON(ARRAY_SIZE(monitor_event_names) != QEVENT_MAX)