Patchwork [RFC,V2,10/10] quorum: Add quorum mechanism.

login
register
mail settings
Submitter Benoit Canet
Date Aug. 7, 2012, 1:44 p.m.
Message ID <1344347073-7773-11-git-send-email-benoit@irqsave.net>
Download mbox | patch
Permalink /patch/175641/
State New
Headers show

Comments

Benoit Canet - Aug. 7, 2012, 1:44 p.m.
Signed-off-by: Benoit Canet <benoit@irqsave.net>
---
 block/quorum.c |   84 ++++++++++++++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 78 insertions(+), 6 deletions(-)
Stefan Hajnoczi - Aug. 8, 2012, 3:54 p.m.
On Tue, Aug 7, 2012 at 2:44 PM, BenoƮt Canet <benoit.canet@gmail.com> wrote:
> +static void quorum_copy_qiov(QEMUIOVector *dest, QEMUIOVector *source)
> +{
> +    int i;
> +    for (i = 0; i < source->niov; i++) {
> +        memcpy(dest->iov[i].iov_base,
> +               source->iov[i].iov_base,
> +               source->iov[i].iov_len);
> +        dest->iov[i].iov_len = source->iov[i].iov_len;
> +    }
> +    dest->niov = source->niov;
> +    dest->nalloc = source->nalloc;
> +    dest->size = source->size;

dest and source must be compatible.  Their element lengths must be identical.

Therefore I suggest dropping the assignments and replacing them with
assert(3) calls that remind us that we know they are compatible.

> +}
> +
> +static void quorum_vote(QuorumAIOCB *acb)
> +{
> +    ssize_t a_b, b_c, a_c;
> +    a_b = blkverify_iovec_compare(&acb->qiovs[0], &acb->qiovs[1]);
> +    b_c = blkverify_iovec_compare(&acb->qiovs[1], &acb->qiovs[2]);
> +
> +    /* Three vector identical -> quorum */
> +    if (a_b == b_c && a_b == -1) {
> +        quorum_copy_qiov(acb->qiov, &acb->qiovs[0]); /*clone a */
> +        return;
> +    }
> +    if (a_b == -1) {
> +        quorum_print_bad(acb, "C");
> +        quorum_copy_qiov(acb->qiov, &acb->qiovs[0]); /*clone a */
> +        return;
> +    }
> +    if (b_c == -1) {
> +        quorum_print_bad(acb, "A");
> +        quorum_copy_qiov(acb->qiov, &acb->qiovs[1]); /*clone b */
> +        return;
> +    }
> +    a_c = blkverify_iovec_compare(&acb->qiovs[0], &acb->qiovs[2]);
> +    if (a_c == -1) {
> +        quorum_print_bad(acb, "B");
> +        quorum_copy_qiov(acb->qiov, &acb->qiovs[0]); /*clone a */
> +        return;
> +    }
> +    quorum_print_failure(acb);
> +    acb->vote_ret = -EIO;
>  }

In the common case comparison will succeed so we could use acb->qiov
as acb->qiovs[0] (a's qiov).  In that case we wouldn't need to copy
the data.  If you feel this will complicate things you could leave a
comment so someone can add it in the future, if necessary.

Patch

diff --git a/block/quorum.c b/block/quorum.c
index 13804c1..5914141 100644
--- a/block/quorum.c
+++ b/block/quorum.c
@@ -174,7 +174,7 @@  static int quorum_check_ret(QuorumAIOCB *acb)
 static void quorum_aio_bh(void *opaque)
 {
     QuorumAIOCB *acb = opaque;
-    int i;
+    int i, ret;
 
     for (i = 0; i <= 2; i++) {
         if (acb->aios[i].buf) {
@@ -184,7 +184,12 @@  static void quorum_aio_bh(void *opaque)
     }
 
     qemu_bh_delete(acb->bh);
-    acb->common.cb(acb->common.opaque, quorum_check_ret(acb));
+    if (acb->vote_ret) {
+        ret = acb->vote_ret;
+    } else {
+        ret = quorum_check_ret(acb);
+    }
+    acb->common.cb(acb->common.opaque, ret);
     if (acb->finished) {
         *acb->finished = true;
     }
@@ -226,10 +231,75 @@  static void quorum_aio_cb(void *opaque, int ret)
     sacb->ret = ret;
     acb->count++;
     assert(acb->count <= 3);
-    if (acb->count == 3) {
-        acb->bh = qemu_bh_new(quorum_aio_bh, acb);
-        qemu_bh_schedule(acb->bh);
+    if (acb->count < 3) {
+        return;
     }
+
+    /* Do the quorum */
+    if (acb->vote) {
+        acb->vote(acb);
+    }
+
+    acb->bh = qemu_bh_new(quorum_aio_bh, acb);
+    qemu_bh_schedule(acb->bh);
+}
+
+static void quorum_print_bad(QuorumAIOCB *acb, const char *filename)
+{
+    fprintf(stderr, "quorum: corrected error in quorum file %s: sector_num=%"
+            PRId64 " nb_sectors=%i\n", filename, acb->sector_num,
+            acb->nb_sectors);
+}
+
+static void quorum_print_failure(QuorumAIOCB *acb)
+{
+    fprintf(stderr, "quorum: failure sector_num=%" PRId64 " nb_sectors=%i\n",
+            acb->sector_num, acb->nb_sectors);
+}
+
+static void quorum_copy_qiov(QEMUIOVector *dest, QEMUIOVector *source)
+{
+    int i;
+    for (i = 0; i < source->niov; i++) {
+        memcpy(dest->iov[i].iov_base,
+               source->iov[i].iov_base,
+               source->iov[i].iov_len);
+        dest->iov[i].iov_len = source->iov[i].iov_len;
+    }
+    dest->niov = source->niov;
+    dest->nalloc = source->nalloc;
+    dest->size = source->size;
+}
+
+static void quorum_vote(QuorumAIOCB *acb)
+{
+    ssize_t a_b, b_c, a_c;
+    a_b = blkverify_iovec_compare(&acb->qiovs[0], &acb->qiovs[1]);
+    b_c = blkverify_iovec_compare(&acb->qiovs[1], &acb->qiovs[2]);
+
+    /* Three vector identical -> quorum */
+    if (a_b == b_c && a_b == -1) {
+        quorum_copy_qiov(acb->qiov, &acb->qiovs[0]); /*clone a */
+        return;
+    }
+    if (a_b == -1) {
+        quorum_print_bad(acb, "C");
+        quorum_copy_qiov(acb->qiov, &acb->qiovs[0]); /*clone a */
+        return;
+    }
+    if (b_c == -1) {
+        quorum_print_bad(acb, "A");
+        quorum_copy_qiov(acb->qiov, &acb->qiovs[1]); /*clone b */
+        return;
+    }
+    a_c = blkverify_iovec_compare(&acb->qiovs[0], &acb->qiovs[2]);
+    if (a_c == -1) {
+        quorum_print_bad(acb, "B");
+        quorum_copy_qiov(acb->qiov, &acb->qiovs[0]); /*clone a */
+        return;
+    }
+    quorum_print_failure(acb);
+    acb->vote_ret = -EIO;
 }
 
 static BlockDriverAIOCB *quorum_aio_readv(BlockDriverState *bs,
@@ -244,6 +314,8 @@  static BlockDriverAIOCB *quorum_aio_readv(BlockDriverState *bs,
                                       nb_sectors, cb, opaque);
     int i;
 
+    acb->vote = quorum_vote;
+
     for (i = 0; i <= 2; i++) {
         acb->aios[i].buf = qemu_blockalign(bs->file, qiov->size);
         qemu_iovec_init(&acb->qiovs[i], qiov->niov);
@@ -251,7 +323,7 @@  static BlockDriverAIOCB *quorum_aio_readv(BlockDriverState *bs,
     }
 
     for (i = 0; i <= 2; i++) {
-        bdrv_aio_readv(s->bs[i], sector_num, qiov, nb_sectors,
+        bdrv_aio_readv(s->bs[i], sector_num, &acb->qiovs[i], nb_sectors,
                        quorum_aio_cb, &acb->aios[i]);
     }