diff mbox

[7/9] virtio-blk: live migrate s->rq with multiqueue

Message ID 1463787632-7241-8-git-send-email-stefanha@redhat.com
State New
Headers show

Commit Message

Stefan Hajnoczi May 20, 2016, 11:40 p.m. UTC
Each request in s->rq belongs to a virtqueue.  When multiqueue is
enabled we can no longer default to the first virtqueue.  Explicitly
migrate virtqueue indices when needed.

The migration stream looks like this:

  [s->rq][mq_rq_indices, ~QEMU_VM_SUBSECTION][virtio subsections]

This patch adds the mq_rq_indices subsection.  A terminator byte
(~QEMU_VM_SUBSECTION) must be emitted after the subsection since the
generic virtio subsections follow and vmstate_load_state() would attempt
to load them too.

This change preserves migration compatibility as follows:

1. Old -> new: multiqueue is not be enabled so the mq_rq_indices
   subsection is not in the migration stream.  virtio_blk_load_device()
   attempts to load subsections but fails since any subsection is a
   generic virtio subsection and we continue.  The generic virtio code
   will then load the subsection that we failed to load.

2. New -> old: when multiqueue is disabled the migration stream is
   unchanged and therefore compatible.  When multiqueue is enabled the
   generic virtio subsection loading safely fails when it hits
   virtio_blk/mq_rq_indices.

In summary, the only failure case is when multiqueue is enabled in a new
QEMU and we migrate to an old QEMU.

Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
---
 hw/block/virtio-blk.c          | 123 +++++++++++++++++++++++++++++++++++++++--
 include/hw/virtio/virtio-blk.h |   5 ++
 2 files changed, 123 insertions(+), 5 deletions(-)

Comments

Paolo Bonzini May 21, 2016, 3:37 p.m. UTC | #1
On 21/05/2016 01:40, Stefan Hajnoczi wrote:
>      while (req) {
>          qemu_put_sbyte(f, 1);

Could you just put an extra 32-bit queue id here if num_queues > 1?  A
guest with num_queues > 1 cannot be started on pre-2.7 QEMU, so you can
change the migration format (if virtio were using vmstate, it would be a
VMSTATE_UINT32_TEST).

Thanks,

Paolo

>          qemu_put_virtqueue_element(f, &req->elem);
>          req = req->next;
> +        s->num_rq++;
>      }
>      qemu_put_sbyte(f, 0);
> +
> +    /* In order to distinguish virtio-blk subsections from the generic virtio
> +     * device subsections that follow we emit a terminating byte.  Old versions
> +     * of QEMU don't expect the terminating byte so, for compatibility, only
> +     * write it when virtio-blk subsections are needed.
> +     */
> +    if (virtio_blk_mq_rq_indices_needed(s)) {
> +        uint32_t i;
> +
> +        s->mq_rq_indices = g_new(uint32_t, s->num_rq);
> +        req = s->rq;
> +        for (i = 0; i < s->num_rq; i++) {
> +            s->mq_rq_indices[i] = virtio_get_queue_index(req->vq);
> +            req = req->next;
> +        }
> +
> +        vmstate_save_state(f, &virtio_blk_vmstate, s, NULL);
> +        qemu_put_ubyte(f, ~QEMU_VM_SUBSECTION);
> +
> +        g_free(s->mq_rq_indices);
> +        s->mq_rq_indices = NULL;
Stefan Hajnoczi May 27, 2016, 9:42 p.m. UTC | #2
On Sat, May 21, 2016 at 05:37:27PM +0200, Paolo Bonzini wrote:
> 
> 
> On 21/05/2016 01:40, Stefan Hajnoczi wrote:
> >      while (req) {
> >          qemu_put_sbyte(f, 1);
> 
> Could you just put an extra 32-bit queue id here if num_queues > 1?  A
> guest with num_queues > 1 cannot be started on pre-2.7 QEMU, so you can
> change the migration format (if virtio were using vmstate, it would be a
> VMSTATE_UINT32_TEST).

That's a good point: the same command-line would not work on old QEMU so
there is no possibility of new->old migration in the first place!

Your solution is much simpler than my hack (which took me a long time to
find and I was kind of proud of).  Will change for v2.

Stefan
diff mbox

Patch

diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c
index ab0f589..64b4185 100644
--- a/hw/block/virtio-blk.c
+++ b/hw/block/virtio-blk.c
@@ -16,6 +16,7 @@ 
 #include "qemu-common.h"
 #include "qemu/iov.h"
 #include "qemu/error-report.h"
+#include "migration/migration.h"
 #include "trace.h"
 #include "hw/block/block.h"
 #include "sysemu/block-backend.h"
@@ -823,6 +824,42 @@  static void virtio_blk_set_status(VirtIODevice *vdev, uint8_t status)
     }
 }
 
+static bool virtio_blk_mq_rq_indices_needed(void *opaque)
+{
+    VirtIOBlock *s = opaque;
+
+    return s->conf.num_queues && s->rq;
+}
+
+/* Array of virtqueue indices for requests in s->rq */
+static const VMStateDescription vmstate_virtio_blk_mq_rq_indices = {
+    .name = "virtio_blk/mq_rq_indices",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .minimum_version_id_old = 1,
+    .needed = virtio_blk_mq_rq_indices_needed,
+    .fields = (VMStateField[]) {
+        VMSTATE_UINT32(num_rq, VirtIOBlock),
+        VMSTATE_VARRAY_UINT32_ALLOC(mq_rq_indices, VirtIOBlock, num_rq, 1,
+                                    vmstate_info_uint32, uint32_t),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
+static const VMStateDescription virtio_blk_vmstate = {
+    .name = "virtio_blk",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .minimum_version_id_old = 1,
+    .fields = (VMStateField[]) {
+        VMSTATE_END_OF_LIST()
+    },
+    .subsections = (const VMStateDescription * []) {
+        &vmstate_virtio_blk_mq_rq_indices,
+        NULL
+    }
+};
+
 static void virtio_blk_save(QEMUFile *f, void *opaque)
 {
     VirtIODevice *vdev = VIRTIO_DEVICE(opaque);
@@ -842,12 +879,36 @@  static void virtio_blk_save_device(VirtIODevice *vdev, QEMUFile *f)
     VirtIOBlock *s = VIRTIO_BLK(vdev);
     VirtIOBlockReq *req = s->rq;
 
+    s->num_rq = 0;
     while (req) {
         qemu_put_sbyte(f, 1);
         qemu_put_virtqueue_element(f, &req->elem);
         req = req->next;
+        s->num_rq++;
     }
     qemu_put_sbyte(f, 0);
+
+    /* In order to distinguish virtio-blk subsections from the generic virtio
+     * device subsections that follow we emit a terminating byte.  Old versions
+     * of QEMU don't expect the terminating byte so, for compatibility, only
+     * write it when virtio-blk subsections are needed.
+     */
+    if (virtio_blk_mq_rq_indices_needed(s)) {
+        uint32_t i;
+
+        s->mq_rq_indices = g_new(uint32_t, s->num_rq);
+        req = s->rq;
+        for (i = 0; i < s->num_rq; i++) {
+            s->mq_rq_indices[i] = virtio_get_queue_index(req->vq);
+            req = req->next;
+        }
+
+        vmstate_save_state(f, &virtio_blk_vmstate, s, NULL);
+        qemu_put_ubyte(f, ~QEMU_VM_SUBSECTION);
+
+        g_free(s->mq_rq_indices);
+        s->mq_rq_indices = NULL;
+    }
 }
 
 static int virtio_blk_load(QEMUFile *f, void *opaque, int version_id)
@@ -865,16 +926,68 @@  static int virtio_blk_load_device(VirtIODevice *vdev, QEMUFile *f,
                                   int version_id)
 {
     VirtIOBlock *s = VIRTIO_BLK(vdev);
+    VirtQueue *vq0 = virtio_get_queue(vdev, 0);
+    VirtIOBlockReq **tailp = (VirtIOBlockReq **)&s->rq;
+    VirtIOBlockReq *req;
+    uint32_t num_rq = 0;
+    int ret;
 
     while (qemu_get_sbyte(f)) {
-        VirtIOBlockReq *req;
         req = qemu_get_virtqueue_element(f, sizeof(VirtIOBlockReq));
-        virtio_blk_init_request(s, s->vq, req);
-        req->next = s->rq;
-        s->rq = req;
+
+        /* Virtqueue is adjusted by a subsection in the multiqueue case */
+        virtio_blk_init_request(s, vq0, req);
+
+        *tailp = req;
+        tailp = &req->next;
+        num_rq++;
+    }
+
+    s->num_rq = 0;
+    s->mq_rq_indices = NULL;
+    ret = vmstate_load_state(f, &virtio_blk_vmstate, s, 1);
+    if (ret == 0) {
+        uint32_t i;
+
+        if (qemu_peek_byte(f, 0) != (uint8_t)~QEMU_VM_SUBSECTION) {
+            if (s->num_rq != 0) {
+                ret = -EINVAL; /* unexpected terminator byte */
+            } else {
+                ret = 0; /* no subsection for us or generic virtio */
+            }
+            goto out;
+        }
+        qemu_file_skip(f, 1);
+
+        if (num_rq != s->num_rq) {
+            ret = -EINVAL;
+            goto out;
+        }
+
+        req = s->rq;
+        for (i = 0; i < num_rq; i++) {
+            uint32_t idx = s->mq_rq_indices[i];
+
+            if (idx >= s->conf.num_queues) {
+                ret = -EINVAL;
+                goto out;
+            }
+
+            req->vq = virtio_get_queue(vdev, idx);
+            req = req->next;
+        }
+    } else if (ret == -ENOENT) {
+        /* This could be the generic virtio subsections, ignore and let the
+         * virtio code have a try.  If that fails too then load will really
+         * fail.
+         */
+        ret = 0;
     }
 
-    return 0;
+out:
+    g_free(s->mq_rq_indices);
+    s->mq_rq_indices = NULL;
+    return ret;
 }
 
 static void virtio_blk_resize(void *opaque)
diff --git a/include/hw/virtio/virtio-blk.h b/include/hw/virtio/virtio-blk.h
index b6e7860..0bf9ebc 100644
--- a/include/hw/virtio/virtio-blk.h
+++ b/include/hw/virtio/virtio-blk.h
@@ -49,6 +49,11 @@  typedef struct VirtIOBlock {
     BlockBackend *blk;
     VirtQueue *vq;
     void *rq;
+
+    /* The following two fields are used only during save/load */
+    uint32_t num_rq;
+    uint32_t *mq_rq_indices;
+
     QEMUBH *bh;
     QEMUBH *batch_notify_bh;
     unsigned long *batch_notify_vqs;