Patchwork [2/2] drive_open: Add invalidate option for block devices

login
register
mail settings
Submitter Juan Quintela
Date Nov. 9, 2011, 7:16 p.m.
Message ID <8920a92ef9be3313494009853d25bae9cc5a1c54.1320865627.git.quintela@redhat.com>
Download mbox | patch
Permalink /patch/124663/
State New
Headers show

Comments

Juan Quintela - Nov. 9, 2011, 7:16 p.m.
Linux allows to invalidate block devices.  This is needed for the incoming
migration part.

Signed-off-by: Juan Quintela <quintela@redhat.com>
---
 block.h           |    2 ++
 block/raw-posix.c |   24 ++++++++++++++++++++++++
 blockdev.c        |    8 ++++----
 3 files changed, 30 insertions(+), 4 deletions(-)
Kevin Wolf - Nov. 10, 2011, 11:33 a.m.
Am 09.11.2011 20:16, schrieb Juan Quintela:
> Linux allows to invalidate block devices.  This is needed for the incoming
> migration part.
> 
> Signed-off-by: Juan Quintela <quintela@redhat.com>

I think Christoph said that this ioctl kills ramdisks? Or was that
something different?

Kevin
Juan Quintela - Nov. 10, 2011, 4:45 p.m.
Kevin Wolf <kwolf@redhat.com> wrote:
> Am 09.11.2011 20:16, schrieb Juan Quintela:
>> Linux allows to invalidate block devices.  This is needed for the incoming
>> migration part.
>> 
>> Signed-off-by: Juan Quintela <quintela@redhat.com>
>
> I think Christoph said that this ioctl kills ramdisks? Or was that
> something different?

On patch 0/2 I said that I was not proposing this.  That this "kind" of
fixed the problem for iscsi and linux, but that it is not "reliable".

Patch 2/2 was to start discussion, and to show the problem we had to
fix, not a "solution".

As said during the discussion:
- clustered filesystems: they are good
- non-coherentent shared storage (NFS, iSCSI, ...): needs cache=none,
  anything else is insane
- formats (qcow2): needs reopen, or at least reload metadata.

And the discussion is how to go from here.

Later, Juan.

Patch

diff --git a/block.h b/block.h
index 38cd748..517b446 100644
--- a/block.h
+++ b/block.h
@@ -61,6 +61,8 @@  typedef struct BlockDevOps {
 #define BDRV_O_NATIVE_AIO  0x0080 /* use native AIO instead of the thread pool */
 #define BDRV_O_NO_BACKING  0x0100 /* don't open the backing file */
 #define BDRV_O_NO_FLUSH    0x0200 /* disable flushing on this disk */
+#define BDRV_O_INVALIDATE  0x0400 /* invalidate buffer cache for this device.
+                                     re-read things from server */

 #define BDRV_O_CACHE_MASK  (BDRV_O_NOCACHE | BDRV_O_CACHE_WB | BDRV_O_NO_FLUSH)

diff --git a/block/raw-posix.c b/block/raw-posix.c
index a3de373..84303a0 100644
--- a/block/raw-posix.c
+++ b/block/raw-posix.c
@@ -52,6 +52,7 @@ 
 #include <sys/param.h>
 #include <linux/cdrom.h>
 #include <linux/fd.h>
+#include <linux/fs.h>
 #endif
 #if defined (__FreeBSD__) || defined(__FreeBSD_kernel__)
 #include <sys/disk.h>
@@ -218,6 +219,29 @@  static int raw_open_common(BlockDriverState *bs, const char *filename,
     s->fd = fd;
     s->aligned_buf = NULL;

+#ifdef __linux__
+    if ((bdrv_flags & BDRV_O_INVALIDATE)) {
+        struct stat buf;
+        int res;
+
+        res = fstat(fd, &buf);
+
+        if (res < 0) {
+            return -errno;
+        }
+
+        if (S_ISBLK(buf.st_mode)) {
+            printf("we are in a block device: %s\n", filename);
+            res = ioctl(fd, BLKFLSBUF, 0);
+            if (res < 0) {
+                fprintf(stderr, "qemu: buffer invalidation of %s"
+                        " failed with error %d\n", filename, errno);
+                return -errno;
+            }
+        }
+    }
+#endif /* __linux__ */
+
     if ((bdrv_flags & BDRV_O_NOCACHE)) {
         /*
          * Allocate a buffer for read/modify/write cycles.  Chose the size
diff --git a/blockdev.c b/blockdev.c
index a10de7a..ea02ee7 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -217,10 +217,10 @@  static int parse_block_error_action(const char *buf, int is_read)
     }
 }

-static int drive_open(DriveInfo *dinfo)
+static int drive_open(DriveInfo *dinfo, int extra_flags)
 {
     int res = bdrv_open(dinfo->bdrv, dinfo->file,
-                        dinfo->bdrv_flags, dinfo->drv);
+                        dinfo->bdrv_flags | extra_flags, dinfo->drv);

     if (res < 0) {
         fprintf(stderr, "qemu: could not open disk image %s: %s\n",
@@ -237,7 +237,7 @@  int drives_reinit(void)
         if (dinfo->opened && !bdrv_is_read_only(dinfo->bdrv)) {
             int res;
             bdrv_close(dinfo->bdrv);
-            res = drive_open(dinfo);
+            res = drive_open(dinfo, BDRV_O_INVALIDATE);
             if (res) {
                 fprintf(stderr, "qemu: re-open of %s failed with error %d\n",
                         dinfo->file, res);
@@ -550,7 +550,7 @@  DriveInfo *drive_init(QemuOpts *opts, int default_to_scsi)
     dinfo->drv = drv;
     dinfo->opened = 1;

-    if (drive_open(dinfo) < 0) {
+    if (drive_open(dinfo, 0) < 0) {
         goto err;
     }