@@ -23,7 +23,7 @@ block-nested-y += qcow2.o qcow2-refcount.o qcow2-cluster.o qcow2-snapshot.o qcow
block-nested-y += qed.o qed-gencb.o qed-l2-cache.o qed-table.o qed-cluster.o
block-nested-y += qed-check.o
block-nested-y += parallels.o nbd.o blkdebug.o sheepdog.o blkverify.o
-block-nested-y += blksim.o
+block-nested-y += blksim.o fvd.o
block-nested-$(CONFIG_WIN32) += raw-win32.o
block-nested-$(CONFIG_POSIX) += raw-posix.o
block-nested-$(CONFIG_CURL) += curl.o
new file mode 100644
@@ -0,0 +1,21 @@
+/*
+ * QEMU Fast Virtual Disk Format bdrv_create()
+ *
+ * Copyright IBM, Corp. 2010
+ *
+ * Authors:
+ * Chunqiang Tang <ctang@us.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ *
+ */
+
+static int fvd_create(const char *filename, QEMUOptionParameter * options)
+{
+ return -ENOTSUP;
+}
+
+static QEMUOptionParameter fvd_create_options[] = {
+ {NULL}
+};
new file mode 100644
@@ -0,0 +1,24 @@
+/*
+ * QEMU Fast Virtual Disk Format bdrv_flush() and bdrv_aio_flush()
+ *
+ * Copyright IBM, Corp. 2010
+ *
+ * Authors:
+ * Chunqiang Tang <ctang@us.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ *
+ */
+
+static BlockDriverAIOCB *fvd_aio_flush(BlockDriverState * bs,
+ BlockDriverCompletionFunc * cb,
+ void *opaque)
+{
+ return NULL;
+}
+
+static int fvd_flush(BlockDriverState * bs)
+{
+ return -ENOTSUP;
+}
new file mode 100644
@@ -0,0 +1,37 @@
+/*
+ * QEMU Fast Virtual Disk Format Misc Functions of BlockDriver Interface
+ *
+ * Copyright IBM, Corp. 2010
+ *
+ * Authors:
+ * Chunqiang Tang <ctang@us.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ *
+ */
+
+static void fvd_close(BlockDriverState * bs)
+{
+}
+
+static int fvd_probe(const uint8_t * buf, int buf_size, const char *filename)
+{
+ return 0;
+}
+
+static int fvd_is_allocated(BlockDriverState * bs, int64_t sector_num,
+ int nb_sectors, int *pnum)
+{
+ return 0;
+}
+
+static int fvd_get_info(BlockDriverState * bs, BlockDriverInfo * bdi)
+{
+ return -ENOTSUP;
+}
+
+static int fvd_has_zero_init(BlockDriverState * bs)
+{
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,17 @@
+/*
+ * QEMU Fast Virtual Disk Format bdrv_file_open()
+ *
+ * Copyright IBM, Corp. 2010
+ *
+ * Authors:
+ * Chunqiang Tang <ctang@us.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ *
+ */
+
+static int fvd_open(BlockDriverState * bs, const char *filename, int flags)
+{
+ return -ENOTSUP;
+}
new file mode 100644
@@ -0,0 +1,21 @@
+/*
+ * QEMU Fast Virtual Disk Format bdrv_aio_readv()
+ *
+ * Copyright IBM, Corp. 2010
+ *
+ * Authors:
+ * Chunqiang Tang <ctang@us.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ *
+ */
+
+static BlockDriverAIOCB *fvd_aio_readv(BlockDriverState * bs,
+ int64_t sector_num, QEMUIOVector * qiov,
+ int nb_sectors,
+ BlockDriverCompletionFunc * cb,
+ void *opaque)
+{
+ return NULL;
+}
new file mode 100644
@@ -0,0 +1,21 @@
+/*
+ * QEMU Fast Virtual Disk Format bdrv_update
+ *
+ * Copyright IBM, Corp. 2010
+ *
+ * Authors:
+ * Chunqiang Tang <ctang@us.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ *
+ */
+
+static int fvd_update(BlockDriverState * bs, QEMUOptionParameter * options)
+{
+ return -ENOTSUP;
+}
+
+static QEMUOptionParameter fvd_update_options[] = {
+ {NULL}
+};
new file mode 100644
@@ -0,0 +1,21 @@
+/*
+ * QEMU Fast Virtual Disk Format bdrv_aio_writev()
+ *
+ * Copyright IBM, Corp. 2010
+ *
+ * Authors:
+ * Chunqiang Tang <ctang@us.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ *
+ */
+
+static BlockDriverAIOCB *fvd_aio_writev(BlockDriverState * bs,
+ int64_t sector_num,
+ QEMUIOVector * qiov, int nb_sectors,
+ BlockDriverCompletionFunc * cb,
+ void *opaque)
+{
+ return NULL;
+}
new file mode 100644
@@ -0,0 +1,60 @@
+/*
+ * QEMU Fast Virtual Disk Format
+ *
+ * Copyright IBM, Corp. 2010
+ *
+ * Authors:
+ * Chunqiang Tang <ctang@us.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ *
+ */
+
+/*=============================================================================
+ * See the following companion papers for a detailed description of FVD:
+ * 1. The so-called "FVD-cow paper":
+ * "FVD: a High-Performance Virtual Machine Image Format for Cloud",
+ * by Chunqiang Tang, 2010.
+ * 2. The so-called "FVD-compact paper":
+ * "FVD: a High-Performance Virtual Machine Image Format for Cloud
+ * with Sparse Image Capability", by Chunqiang Tang, 2010.
+ *============================================================================*/
+
+#include "block/fvd.h"
+
+/* Use include to avoid exposing too many FVD symbols, and to allow inline
+ * function optimization. */
+#include "block/fvd-flush.c"
+#include "block/fvd-update.c"
+#include "block/fvd-misc.c"
+#include "block/fvd-create.c"
+#include "block/fvd-open.c"
+#include "block/fvd-read.c"
+#include "block/fvd-write.c"
+
+static BlockDriver bdrv_fvd = {
+ .format_name = "fvd",
+ .instance_size = sizeof(BDRVFvdState),
+ .bdrv_create = fvd_create,
+ .bdrv_probe = fvd_probe,
+ .bdrv_file_open = fvd_open,
+ .bdrv_close = fvd_close,
+ .bdrv_is_allocated = fvd_is_allocated,
+ .bdrv_flush = fvd_flush,
+ .bdrv_aio_readv = fvd_aio_readv,
+ .bdrv_aio_writev = fvd_aio_writev,
+ .bdrv_aio_flush = fvd_aio_flush,
+ .create_options = fvd_create_options,
+ .update_options = fvd_update_options,
+ .bdrv_get_info = fvd_get_info,
+ .bdrv_update = fvd_update,
+ .bdrv_has_zero_init = fvd_has_zero_init
+};
+
+static void bdrv_fvd_init(void)
+{
+ bdrv_register(&bdrv_fvd);
+}
+
+block_init(bdrv_fvd_init);
new file mode 100644
@@ -0,0 +1,171 @@
+/*
+ * QEMU Fast Virtual Disk Format
+ *
+ * Copyright IBM, Corp. 2010
+ *
+ * Authors:
+ * Chunqiang Tang <ctang@us.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ *
+ */
+
+#include "block_int.h"
+#include "osdep.h"
+#include "qemu-option.h"
+#include "qemu-timer.h"
+#include "block.h"
+#include "qemu-queue.h"
+#include "qemu-common.h"
+
+enum {
+ FVD_MAGIC = ('F' | 'V' << 8 | 'D' << 16 | '\0' << 24),
+ FVD_VERSION = 1,
+ INCOMPATIBLE_FEATURES_SPACE = 4096, /* in bytes. */
+ DEF_PAGE_SIZE = 4096 /* in bytes. */
+};
+
+/*
+ * The FVD format consists of the following fields in little endian:
+ * + Header fields of FvdHeader.
+ * + Bitmap, starting on a 4KB page boundary at a location specified by
+ * FvdHeader.bitmap_offset.
+ * + Journal, starting on a 4KB page boundary at a location specified by
+ * FvdHeader.journal_offset.
+ * + Table, starting on a 4KB page boundary at a location specified by
+ * FvdHeader.table_offset. When expanding the size of an existing FVD
+ * image, the table can be expanded to borrow space from the next,
+ * "virtual disk data" section, by relocating some data chunks.
+ * + Virtual disk data, starting on a 4KB page boundary. Optionally, disk
+ * data can be stored in a separate data file specified by
+ * FvdHeader.data_file.
+ */
+typedef struct __attribute__ ((__packed__)) FvdHeader {
+ uint32_t magic; /* FVD_MAGIC */
+
+ /* Size of FvdHeader in bytes, rounded up to DEF_PAGE_SIZE. A new FVD
+ * version may add fields to FvdHeader and hence need to increase
+ * header_size. When an old FVD version reads an image created by a new
+ * FVD version, the old version only reads the beginning part of FvdHeader
+ * that it can understand and ignroes the new fields at the end of
+ * FvdHeader. */
+ uint32_t header_size;
+
+ /* Version of the FVD software that created the image. */
+ uint32_t create_version;
+
+ /* Version of the FVD software that openned the image most recently. This
+ * field is for forward compatibility. Consider one example. Suppos FVD
+ * version N+1 introduces a compatible feature, e.g., adding a
+ * 'last_modified' timestamp into the FVD image header. Even if FVD
+ * version N is unaware of this new feature, it can still open an image
+ * created by FVD version N+1 without problem, but won't update the
+ * last_modified field. FVD version N sets the image's
+ * 'last_open_version=N' when it opens the image. When FVD version N+1
+ * opens this image, it knows that the 'last_modified' field cannot be
+ * trusted and may take some actions accordingly, e.g., being conservative
+ * in some optimization heuristics that depend on the value of
+ * 'last_modified' to avoid making the optimization counter effective. */
+ uint32_t last_open_version;
+
+ uint64_t virtual_disk_size; /* in bytes. Disk size perceived by the VM. */
+ uint64_t data_offset; /* in bytes. Aligned on DEF_PAGE_SIZE. */
+
+ /* Data can be optionally stored in a different file. */
+ char data_file[1024];
+ char data_file_fmt[16];
+
+ /* Base image. */
+ char base_img[1024];
+ char base_img_fmt[16];
+ uint64_t base_img_size; /* in bytes. */
+
+ /* Bitmap for base image. */
+ uint64_t bitmap_offset; /* in bytes. Aligned on DEF_PAGE_SIZE. */
+ uint64_t bitmap_size; /* in bytes. Rounded up to DEF_PAGE_SIZE. */
+ uint64_t block_size; /* in bytes. */
+
+ /* Journal */
+ uint64_t journal_offset; /* in bytes. */
+ uint64_t journal_size; /* in bytes. On-disk journal size. */
+ uint32_t clean_shutdown; /* true if VM's last shutdown was graceful. */
+ uint64_t stable_journal_epoch; /* Needed only if a chunk can be relocated.*/
+ uint64_t journal_buf_size; /* in bytes. In-memory buffer size. */
+ uint64_t journal_clean_buf_period; /* in milliseconds. */
+
+ /* Table for compact image. */
+ uint64_t table_offset; /* in bytes. Aligned on DEF_PAGE_SIZE. */
+ uint64_t table_size; /* in bytes. Rounded up to DEF_PAGE_SIZE. */
+ uint64_t chunk_size; /* in bytes. */
+ uint64_t storage_grow_unit; /* in bytes. */
+ char add_storage_cmd[1024];
+ uint32_t chunks_relocated; /* Affect bdrv_has_zero_init(). */
+
+ /* Copy-on-read */
+ uint32_t copy_on_read; /* true or false */
+ uint64_t max_outstanding_copy_on_read_data; /* in bytes. */
+
+ /* Prefetching. */
+ int64_t prefetch_start_delay; /* in seconds. -1 means disabled. */
+ uint32_t base_img_fully_prefetched; /* true or false. */
+ uint32_t num_prefetch_slots; /* Max number of oustanding prefetch writes. */
+ uint64_t bytes_per_prefetch; /* For whole image prefetching. */
+ uint64_t prefetch_read_throughput_measure_time; /* in milliseconds. */
+ uint64_t prefetch_write_throughput_measure_time; /* in milliseconds. */
+ uint64_t prefetch_min_read_throughput; /* in KB/second. */
+ uint64_t prefetch_min_write_throughput; /* in KB/second. */
+ uint64_t prefetch_max_read_throughput; /* in KB/second. */
+ uint64_t prefetch_max_write_throughput; /* in KB/second. */
+ uint64_t prefetch_throttle_time; /* in milliseconds. */
+
+ /* need_zero_init is true if the image mandates that the storage layer
+ * (BDRVFvdState.fvd_data) must return true for bdrv_has_zero_init().
+ * This is the case if the optimization described in Section 3.3.3 of the
+ * FVD-cow paper is enabled (see function search_holes()). If 'qemu-img
+ * create' sets need_zero_init to true, 'qemu-img update' can be used to
+ * manually reset it to false, if the user always manually pre-fills the
+ * storage (e.g., a raw partition) with zeros. If the image is stored on a
+ * file system, it already supports zero_init, and hence there is no need
+ * to manually manipulate this field. */
+ uint32_t need_zero_init;
+
+ /* This field enables adding incompatible features. For example, Suppose
+ * FVD version N+1 adds image compression. A compressed image cannot be
+ * openned by FVD version N. Suppose in FVD version N, the value of
+ * INCOMPATIBLE_FEATURES_SPACE is 4096. Introducing image compression
+ * in FVD version N+1 causes the following changes to the header.
+ * In FVD version N:
+ * uint8_t incompatible_features[4096];
+ * In FVD version N+1:
+ * uint8_t image_compressed;
+ * uint8_t incompatible_features[4095];
+ *
+ * When any FVD version X opens an image, it always scans through the
+ * entire array of 'incompatible_features', although the size of
+ * INCOMPATIBLE_FEATURES_SPACE may be different for different FVD
+ * versions. If any bit of 'incompatible_features' is non-zero, FVD
+ * version X refuses to open the image. In the example above, if FVD
+ * version N+1 creates a non-compressed image, it sets
+ * 'image_compressed=0', which then still allows FVD version N to open the
+ * image. Instead of using one byte to represent a new feature, it can
+ * also use one bit to represent a new feature, which then allows a total
+ * of 32768 incompatible features to be added in the future.
+ */
+ uint8_t incompatible_features[INCOMPATIBLE_FEATURES_SPACE];
+
+ /* When a new FVD version introduces a new feature (which may or may not
+ * be backward compatible), an arbitrary number of new fields can be added
+ * to the image header, but those new fields must be added at the end of
+ * 'FvdHeader'. Old FVD versions simply won't read or write those new
+ * fields. Old FVD versions can still correctly access the bitmap, the
+ * journal, and the table, because no FVD version assumes a fixed header
+ * size, but instead accesses the bitmap, the journal, and the table
+ * through bitmap_offset, journal_offset, and table_offset, respectively.
+ * Similarly, if a new data structure of a variable size is added to the
+ * image header in the future, it must also be indexed by an offset field
+ * and a size field. */
+} FvdHeader;
+
+typedef struct BDRVFvdState {
+} BDRVFvdState;
This patch is part of the Fast Virtual Disk (FVD) proposal. See http://wiki.qemu.org/Features/FVD. This patch adds the skeleton of the block device driver for Fast Virtual Disk (FVD). Signed-off-by: Chunqiang Tang <ctang@us.ibm.com> --- Makefile.objs | 2 +- block/fvd-create.c | 21 +++++++ block/fvd-flush.c | 24 +++++++ block/fvd-misc.c | 37 +++++++++++ block/fvd-open.c | 17 +++++ block/fvd-read.c | 21 +++++++ block/fvd-update.c | 21 +++++++ block/fvd-write.c | 21 +++++++ block/fvd.c | 60 ++++++++++++++++++ block/fvd.h | 171 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 10 files changed, 394 insertions(+), 1 deletions(-) create mode 100644 block/fvd-create.c create mode 100644 block/fvd-flush.c create mode 100644 block/fvd-misc.c create mode 100644 block/fvd-open.c create mode 100644 block/fvd-read.c create mode 100644 block/fvd-update.c create mode 100644 block/fvd-write.c create mode 100644 block/fvd.c create mode 100644 block/fvd.h