Patchwork [4/5] Reopen files after migration

login
register
mail settings
Submitter Juan Quintela
Date Jan. 4, 2011, 2:33 p.m.
Message ID <7fef2ab89110849603902ac62d8c164cd3ff01d2.1294150511.git.quintela@redhat.com>
Download mbox | patch
Permalink /patch/77477/
State New
Headers show

Comments

Juan Quintela - Jan. 4, 2011, 2:33 p.m.
We need to invalidate the Read Cache on the destination, otherwise we
have corruption.  Easy way to reproduce it is:

- create an qcow2 images
- start qemu on destination of migration (qemu .... -incoming tcp:...)
- start qemu on source of migration and do one install.
- migrate at the end of install (when lot of disk IO has happened).

Destination of migration has a local copy of the L1/L2 tables that existed
at the beginning, before the install started.  We have disk corruption at
this point.  The solution (for NFS) is to just re-open the file.  Operations
have to happen in this order:

- source of migration: flush()
- destination: close(file);
- destination: open(file)

it is not necesary that source of migration close the file.

Signed-off-by: Juan Quintela <quintela@redhat.com>
---
 blockdev.c  |   42 +++++++++++++++++++++++++++++++++++++-----
 blockdev.h  |    6 ++++++
 migration.c |    6 ++++++
 3 files changed, 49 insertions(+), 5 deletions(-)
Blue Swirl - Jan. 4, 2011, 7:05 p.m.
On Tue, Jan 4, 2011 at 2:33 PM, Juan Quintela <quintela@redhat.com> wrote:
> We need to invalidate the Read Cache on the destination, otherwise we
> have corruption.  Easy way to reproduce it is:
>
> - create an qcow2 images
> - start qemu on destination of migration (qemu .... -incoming tcp:...)
> - start qemu on source of migration and do one install.
> - migrate at the end of install (when lot of disk IO has happened).
>
> Destination of migration has a local copy of the L1/L2 tables that existed
> at the beginning, before the install started.  We have disk corruption at
> this point.  The solution (for NFS) is to just re-open the file.  Operations
> have to happen in this order:
>
> - source of migration: flush()
> - destination: close(file);
> - destination: open(file)
>
> it is not necesary that source of migration close the file.
>
> Signed-off-by: Juan Quintela <quintela@redhat.com>
> ---
>  blockdev.c  |   42 +++++++++++++++++++++++++++++++++++++-----
>  blockdev.h  |    6 ++++++
>  migration.c |    6 ++++++
>  3 files changed, 49 insertions(+), 5 deletions(-)
>
> diff --git a/blockdev.c b/blockdev.c
> index f9bb659..3f3df7a 100644
> --- a/blockdev.c
> +++ b/blockdev.c
> @@ -116,6 +116,7 @@ void drive_uninit(DriveInfo *dinfo)
>     bdrv_delete(dinfo->bdrv);
>     QTAILQ_REMOVE(&drives, dinfo, next);
>     qemu_free(dinfo->id);
> +    qemu_free(dinfo->file);
>     qemu_free(dinfo);
>  }
>
> @@ -136,6 +137,36 @@ static int parse_block_error_action(const char *buf, int is_read)
>     }
>  }
>
> +static int drive_open(DriveInfo *dinfo)
> +{
> +    int res = bdrv_open(dinfo->bdrv, dinfo->file, dinfo->bdrv_flags, dinfo->drv);
> +
> +    if (res < 0) {
> +        fprintf(stderr, "qemu: could not open disk image %s: %s\n",
> +                        dinfo->file, strerror(errno));
> +    }
> +    return res;
> +}
> +
> +int drives_reinit(void)
> +{
> +    DriveInfo *dinfo;
> +
> +    QTAILQ_FOREACH(dinfo, &drives, next) {
> +        if (dinfo->opened && !bdrv_is_read_only(dinfo->bdrv)) {
> +            int res;
> +            bdrv_close(dinfo->bdrv);
> +            res = drive_open(dinfo);
> +            if (res) {
> +                   fprintf(stderr, "qemu: re-open of %s failed wth error %d\n",
> +                           dinfo->file, res);
> +                   return res;
> +           }
> +        }
> +    }
> +    return 0;
> +}
> +
>  DriveInfo *drive_init(QemuOpts *opts, int default_to_scsi, int *fatal_error)
>  {
>     const char *buf;
> @@ -156,7 +187,6 @@ DriveInfo *drive_init(QemuOpts *opts, int default_to_scsi, int *fatal_error)
>     const char *devaddr;
>     DriveInfo *dinfo;
>     int snapshot = 0;
> -    int ret;
>
>     *fatal_error = 1;
>
> @@ -487,10 +517,12 @@ DriveInfo *drive_init(QemuOpts *opts, int default_to_scsi, int *fatal_error)
>
>     bdrv_flags |= ro ? 0 : BDRV_O_RDWR;
>
> -    ret = bdrv_open(dinfo->bdrv, file, bdrv_flags, drv);
> -    if (ret < 0) {
> -        fprintf(stderr, "qemu: could not open disk image %s: %s\n",
> -                        file, strerror(-ret));
> +    dinfo->file = qemu_strdup(file);
> +    dinfo->bdrv_flags = bdrv_flags;
> +    dinfo->drv = drv;
> +    dinfo->opened = 1;
> +
> +    if (drive_open(dinfo) < 0) {
>         goto error;
>     }
>
> diff --git a/blockdev.h b/blockdev.h
> index 4536b5c..e009fee 100644
> --- a/blockdev.h
> +++ b/blockdev.h
> @@ -29,6 +29,10 @@ struct DriveInfo {
>     QemuOpts *opts;
>     char serial[BLOCK_SERIAL_STRLEN + 1];
>     QTAILQ_ENTRY(DriveInfo) next;
> +    int opened;
> +    int bdrv_flags;
> +    char *file;
> +    BlockDriver *drv;
>  };
>
>  #define MAX_IDE_DEVS   2
> @@ -42,6 +46,8 @@ DriveInfo *drive_get_by_blockdev(BlockDriverState *bs);
>  QemuOpts *drive_add(const char *file, const char *fmt, ...) GCC_FMT_ATTR(2, 3);
>  DriveInfo *drive_init(QemuOpts *arg, int default_to_scsi, int *fatal_error);
>
> +extern int drives_reinit(void);

'extern' is useless for functions.
Markus Armbruster - Jan. 5, 2011, 3:52 p.m.
Juan Quintela <quintela@redhat.com> writes:

> We need to invalidate the Read Cache on the destination, otherwise we
> have corruption.  Easy way to reproduce it is:
>
> - create an qcow2 images
> - start qemu on destination of migration (qemu .... -incoming tcp:...)
> - start qemu on source of migration and do one install.
> - migrate at the end of install (when lot of disk IO has happened).
>
> Destination of migration has a local copy of the L1/L2 tables that existed
> at the beginning, before the install started.  We have disk corruption at
> this point.  The solution (for NFS) is to just re-open the file.  Operations
> have to happen in this order:
>
> - source of migration: flush()
> - destination: close(file);
> - destination: open(file)
>
> it is not necesary that source of migration close the file.
>
> Signed-off-by: Juan Quintela <quintela@redhat.com>
> ---
>  blockdev.c  |   42 +++++++++++++++++++++++++++++++++++++-----
>  blockdev.h  |    6 ++++++
>  migration.c |    6 ++++++
>  3 files changed, 49 insertions(+), 5 deletions(-)
>
> diff --git a/blockdev.c b/blockdev.c
> index f9bb659..3f3df7a 100644
> --- a/blockdev.c
> +++ b/blockdev.c
> @@ -116,6 +116,7 @@ void drive_uninit(DriveInfo *dinfo)
>      bdrv_delete(dinfo->bdrv);
>      QTAILQ_REMOVE(&drives, dinfo, next);
>      qemu_free(dinfo->id);
> +    qemu_free(dinfo->file);
>      qemu_free(dinfo);
>  }
>
> @@ -136,6 +137,36 @@ static int parse_block_error_action(const char *buf, int is_read)
>      }
>  }
>
> +static int drive_open(DriveInfo *dinfo)
> +{
> +    int res = bdrv_open(dinfo->bdrv, dinfo->file, dinfo->bdrv_flags, dinfo->drv);
> +
> +    if (res < 0) {
> +        fprintf(stderr, "qemu: could not open disk image %s: %s\n",
> +                        dinfo->file, strerror(errno));
> +    }
> +    return res;
> +}
> +
> +int drives_reinit(void)
> +{
> +    DriveInfo *dinfo;
> +
> +    QTAILQ_FOREACH(dinfo, &drives, next) {
> +        if (dinfo->opened && !bdrv_is_read_only(dinfo->bdrv)) {
> +            int res;
> +            bdrv_close(dinfo->bdrv);
> +            res = drive_open(dinfo);
> +            if (res) {
> +		    fprintf(stderr, "qemu: re-open of %s failed wth error %d\n",
> +			    dinfo->file, res);
> +		    return res;
> +	    }
> +        }
> +    }
> +    return 0;
> +}
> +

Shouldn't this live one layer down, in block.c?

We already have two reopens there, in bdrv_commit() and
bdrv_snapshot_goto().

I reduced DriveInfo to a mere helper object for drive_init() & friends
(see commit f8b6cc00, for instance).  Real block work should not use
it.

>  DriveInfo *drive_init(QemuOpts *opts, int default_to_scsi, int *fatal_error)
>  {
>      const char *buf;
> @@ -156,7 +187,6 @@ DriveInfo *drive_init(QemuOpts *opts, int default_to_scsi, int *fatal_error)
>      const char *devaddr;
>      DriveInfo *dinfo;
>      int snapshot = 0;
> -    int ret;
>
>      *fatal_error = 1;
>
> @@ -487,10 +517,12 @@ DriveInfo *drive_init(QemuOpts *opts, int default_to_scsi, int *fatal_error)
>
>      bdrv_flags |= ro ? 0 : BDRV_O_RDWR;
>
> -    ret = bdrv_open(dinfo->bdrv, file, bdrv_flags, drv);
> -    if (ret < 0) {
> -        fprintf(stderr, "qemu: could not open disk image %s: %s\n",
> -                        file, strerror(-ret));
> +    dinfo->file = qemu_strdup(file);
> +    dinfo->bdrv_flags = bdrv_flags;
> +    dinfo->drv = drv;
> +    dinfo->opened = 1;
> +
> +    if (drive_open(dinfo) < 0) {
>          goto error;
>      }
>
> diff --git a/blockdev.h b/blockdev.h
> index 4536b5c..e009fee 100644
> --- a/blockdev.h
> +++ b/blockdev.h
> @@ -29,6 +29,10 @@ struct DriveInfo {
>      QemuOpts *opts;
>      char serial[BLOCK_SERIAL_STRLEN + 1];
>      QTAILQ_ENTRY(DriveInfo) next;
> +    int opened;

Could you explain why you need this member?

> +    int bdrv_flags;

Use BlockDriverState's open_flags, like the other reopens?

> +    char *file;

BlockDriverState's filename?

> +    BlockDriver *drv;

BlockDriverState's drv?

>  };
>
>  #define MAX_IDE_DEVS	2
> @@ -42,6 +46,8 @@ DriveInfo *drive_get_by_blockdev(BlockDriverState *bs);
>  QemuOpts *drive_add(const char *file, const char *fmt, ...) GCC_FMT_ATTR(2, 3);
>  DriveInfo *drive_init(QemuOpts *arg, int default_to_scsi, int *fatal_error);
>
> +extern int drives_reinit(void);
> +
>  /* device-hotplug */
>
>  DriveInfo *add_init_drive(const char *opts);
> diff --git a/migration.c b/migration.c
> index a8b65e5..fdff804 100644
> --- a/migration.c
> +++ b/migration.c
> @@ -17,6 +17,7 @@
>  #include "buffered_file.h"
>  #include "sysemu.h"
>  #include "block.h"
> +#include "blockdev.h"
>  #include "qemu_socket.h"
>  #include "block-migration.h"
>  #include "qemu-objects.h"
> @@ -69,6 +70,11 @@ void process_incoming_migration(QEMUFile *f)
>
>      incoming_expected = false;
>
> +    if (drives_reinit() != 0) {
> +        fprintf(stderr, "reopening of drives failed\n");
> +        exit(1);
> +    }
> +
>      if (autostart)
>          vm_start();
>  }

Patch

diff --git a/blockdev.c b/blockdev.c
index f9bb659..3f3df7a 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -116,6 +116,7 @@  void drive_uninit(DriveInfo *dinfo)
     bdrv_delete(dinfo->bdrv);
     QTAILQ_REMOVE(&drives, dinfo, next);
     qemu_free(dinfo->id);
+    qemu_free(dinfo->file);
     qemu_free(dinfo);
 }

@@ -136,6 +137,36 @@  static int parse_block_error_action(const char *buf, int is_read)
     }
 }

+static int drive_open(DriveInfo *dinfo)
+{
+    int res = bdrv_open(dinfo->bdrv, dinfo->file, dinfo->bdrv_flags, dinfo->drv);
+
+    if (res < 0) {
+        fprintf(stderr, "qemu: could not open disk image %s: %s\n",
+                        dinfo->file, strerror(errno));
+    }
+    return res;
+}
+
+int drives_reinit(void)
+{
+    DriveInfo *dinfo;
+
+    QTAILQ_FOREACH(dinfo, &drives, next) {
+        if (dinfo->opened && !bdrv_is_read_only(dinfo->bdrv)) {
+            int res;
+            bdrv_close(dinfo->bdrv);
+            res = drive_open(dinfo);
+            if (res) {
+		    fprintf(stderr, "qemu: re-open of %s failed wth error %d\n",
+			    dinfo->file, res);
+		    return res;
+	    }
+        }
+    }
+    return 0;
+}
+
 DriveInfo *drive_init(QemuOpts *opts, int default_to_scsi, int *fatal_error)
 {
     const char *buf;
@@ -156,7 +187,6 @@  DriveInfo *drive_init(QemuOpts *opts, int default_to_scsi, int *fatal_error)
     const char *devaddr;
     DriveInfo *dinfo;
     int snapshot = 0;
-    int ret;

     *fatal_error = 1;

@@ -487,10 +517,12 @@  DriveInfo *drive_init(QemuOpts *opts, int default_to_scsi, int *fatal_error)

     bdrv_flags |= ro ? 0 : BDRV_O_RDWR;

-    ret = bdrv_open(dinfo->bdrv, file, bdrv_flags, drv);
-    if (ret < 0) {
-        fprintf(stderr, "qemu: could not open disk image %s: %s\n",
-                        file, strerror(-ret));
+    dinfo->file = qemu_strdup(file);
+    dinfo->bdrv_flags = bdrv_flags;
+    dinfo->drv = drv;
+    dinfo->opened = 1;
+
+    if (drive_open(dinfo) < 0) {
         goto error;
     }

diff --git a/blockdev.h b/blockdev.h
index 4536b5c..e009fee 100644
--- a/blockdev.h
+++ b/blockdev.h
@@ -29,6 +29,10 @@  struct DriveInfo {
     QemuOpts *opts;
     char serial[BLOCK_SERIAL_STRLEN + 1];
     QTAILQ_ENTRY(DriveInfo) next;
+    int opened;
+    int bdrv_flags;
+    char *file;
+    BlockDriver *drv;
 };

 #define MAX_IDE_DEVS	2
@@ -42,6 +46,8 @@  DriveInfo *drive_get_by_blockdev(BlockDriverState *bs);
 QemuOpts *drive_add(const char *file, const char *fmt, ...) GCC_FMT_ATTR(2, 3);
 DriveInfo *drive_init(QemuOpts *arg, int default_to_scsi, int *fatal_error);

+extern int drives_reinit(void);
+
 /* device-hotplug */

 DriveInfo *add_init_drive(const char *opts);
diff --git a/migration.c b/migration.c
index a8b65e5..fdff804 100644
--- a/migration.c
+++ b/migration.c
@@ -17,6 +17,7 @@ 
 #include "buffered_file.h"
 #include "sysemu.h"
 #include "block.h"
+#include "blockdev.h"
 #include "qemu_socket.h"
 #include "block-migration.h"
 #include "qemu-objects.h"
@@ -69,6 +70,11 @@  void process_incoming_migration(QEMUFile *f)

     incoming_expected = false;

+    if (drives_reinit() != 0) {
+        fprintf(stderr, "reopening of drives failed\n");
+        exit(1);
+    }
+
     if (autostart)
         vm_start();
 }