diff mbox

[09/26] FVD: add impl of interface bdrv_create()

Message ID 1298673486-3573-9-git-send-email-ctang@us.ibm.com
State New
Headers show

Commit Message

Chunqiang Tang Feb. 25, 2011, 10:37 p.m. UTC
This patch is part of the Fast Virtual Disk (FVD) proposal.
See http://wiki.qemu.org/Features/FVD.

This patch adds FVD's implementation of the bdrv_create() interface. It
supports FVD image creation.

Signed-off-by: Chunqiang Tang <ctang@us.ibm.com>
---
 block/fvd-create.c  |  702 ++++++++++++++++++++++++++++++++++++++++++++++++++-
 block/fvd-journal.c |    5 +
 block/fvd.c         |    2 +-
 3 files changed, 707 insertions(+), 2 deletions(-)
diff mbox

Patch

diff --git a/block/fvd-create.c b/block/fvd-create.c
index 5593cea..c8912aa 100644
--- a/block/fvd-create.c
+++ b/block/fvd-create.c
@@ -11,11 +11,711 @@ 
  *
  */
 
+static void fvd_header_cpu_to_le(FvdHeader * header);
+static inline int64_t calc_min_journal_size(int64_t table_entries);
+static inline int search_empty_blocks(int fd, uint8_t * bitmap,
+                                      BlockDriverState * bs,
+                                      int64_t nb_sectors,
+                                      int32_t hole_size,
+                                      int32_t block_size);
+
 static int fvd_create(const char *filename, QEMUOptionParameter * options)
 {
-    return -ENOTSUP;
+    int fd, ret = 0;
+    FvdHeader *header;
+    int64_t virtual_disk_size = DEF_PAGE_SIZE;
+    int32_t header_size;
+    const char *base_img = NULL;
+    const char *base_img_fmt = NULL;
+    const char *data_file = NULL;
+    const char *data_file_fmt = NULL;
+    int32_t hole_size = 0;
+    int copy_on_read = false;
+    int prefetch_start_delay = -1;
+    BlockDriverState *bs = NULL;
+    int bitmap_size = 0;
+    int64_t base_img_size = 0;
+    int64_t table_size = 0;
+    int64_t journal_size = 0;
+    int32_t block_size = 0;
+    int compact_image = false;
+    uint64_t max_copy_on_read = MAX_OUTSTANDING_COPY_ON_READ_DATA;
+    uint32_t num_prefetch_slots = NUM_PREFETCH_SLOTS;
+    uint64_t bytes_per_prefetch = BYTES_PER_PREFETCH;
+    uint64_t prefetch_throttle_time = PREFETCH_THROTTLING_TIME;
+    uint64_t prefetch_read_measure_time = PREFETCH_MIN_MEASURE_READ_TIME;
+    uint64_t prefetch_write_measure_time = PREFETCH_MIN_MEASURE_WRITE_TIME;
+    uint64_t prefetch_min_read_throughput = PREFETCH_MIN_READ_THROUGHPUT;
+    uint64_t prefetch_min_write_throughput = PREFETCH_MIN_WRITE_THROUGHPUT;
+    uint64_t prefetch_max_read_throughput = PREFETCH_MAX_READ_THROUGHPUT;
+    uint64_t prefetch_max_write_throughput = PREFETCH_MAX_WRITE_THROUGHPUT;
+
+    header_size = sizeof(FvdHeader);
+    header_size = ROUND_UP(header_size, DEF_PAGE_SIZE);
+    header = my_qemu_mallocz(header_size);
+    header->header_size = header_size;
+
+    /* Read out options */
+    while (options && options->name) {
+        if (!strcmp(options->name, BLOCK_OPT_SIZE)) {
+            virtual_disk_size = options->value.n;
+        } else if (!strcmp(options->name, "prefetch_start_delay")) {
+            if (options->value.n <= 0) {
+                prefetch_start_delay = -1;
+            } else {
+                prefetch_start_delay = options->value.n;
+            }
+        } else if (!strcmp(options->name, BLOCK_OPT_BACKING_FILE)) {
+            base_img = options->value.s;
+        } else if (!strcmp(options->name, BLOCK_OPT_BACKING_FMT)) {
+            base_img_fmt = options->value.s;
+        } else if (!strcmp(options->name, "copy_on_read")) {
+            copy_on_read = options->value.n;
+        } else if (!strcmp(options->name, "data_file")) {
+            data_file = options->value.s;
+        } else if (!strcmp(options->name, "data_file_fmt")) {
+            data_file_fmt = options->value.s;
+        } else if (!strcmp(options->name, "optimize_empty_block")) {
+            hole_size = options->value.n;
+        } else if (!strcmp(options->name, "compact_image")) {
+            compact_image = options->value.n;
+        } else if (!strcmp(options->name, "block_size")) {
+            block_size = options->value.n;
+        } else if (!strcmp(options->name, "chunk_size")) {
+            header->chunk_size = options->value.n;
+        } else if (!strcmp(options->name, "journal_size")) {
+            journal_size = options->value.n;
+        } else if (!strcmp(options->name, "journal_buf_size")) {
+            header->journal_buf_size = options->value.n;
+        } else if (!strcmp(options->name, "journal_clean_buf_period")) {
+            header->journal_clean_buf_period = options->value.n;
+        } else if (!strcmp(options->name, "storage_grow_unit")) {
+            header->storage_grow_unit = options->value.n;
+        } else if (!strcmp(options->name, "add_storage_cmd") &&
+                   options->value.s) {
+            pstrcpy(header->add_storage_cmd, sizeof(header->add_storage_cmd),
+                    options->value.s);
+        } else if (!strcmp(options->name, "num_prefetch_slots") &&
+                   options->value.n > 0) {
+            num_prefetch_slots = options->value.n;
+        } else if (!strcmp(options->name, "bytes_per_prefetch") &&
+                   options->value.n > 0) {
+            bytes_per_prefetch = options->value.n;
+        } else if (!strcmp(options->name, "prefetch_throttle_time") &&
+                   options->value.n > 0) {
+            prefetch_throttle_time = options->value.n;
+        } else if (!strcmp(options->name,
+                           "prefetch_read_throughput_measure_time") &&
+                   options->value.n > 0) {
+            prefetch_read_measure_time = options->value.n;
+        } else if (!strcmp(options->name,
+                           "prefetch_write_throughput_measure_time") &&
+                   options->value.n > 0) {
+            prefetch_write_measure_time = options->value.n;
+        } else if (!strcmp(options->name,
+                           "prefetch_min_read_throughput") &&
+                   options->value.n > 0) {
+            prefetch_min_read_throughput = options->value.n;
+        } else if (!strcmp(options->name,
+                           "prefetch_min_write_throughput") &&
+                   options->value.n > 0) {
+            prefetch_min_write_throughput = options->value.n;
+        } else if (!strcmp(options->name,
+                           "prefetch_max_read_throughput") &&
+                   options->value.n > 0) {
+            prefetch_max_read_throughput = options->value.n;
+        } else if (!strcmp(options->name,
+                           "prefetch_max_write_throughput") &&
+                   options->value.n > 0) {
+            prefetch_max_write_throughput = options->value.n;
+        } else if (!strcmp(options->name,
+                           "max_outstanding_copy_on_read_data") &&
+                   options->value.n > 0) {
+            max_copy_on_read = options->value.n;
+        }
+        options++;
+    }
+
+    virtual_disk_size = ROUND_UP(virtual_disk_size, 512);
+
+    /* Check if arguments are valid. */
+    if (base_img && strlen(base_img) > 1023) {
+        fprintf(stderr, "The base image name is longer than 1023 characters, "
+                "which is not allowed.\n");
+        return -EINVAL;
+    }
+
+    if (base_img && hole_size > 0) {
+        if (compact_image) {
+            fprintf(stderr, "compact_image and optimize_empty_block cannot be "
+                    "enabled together. Please disable optimize_empty_block.\n");
+            return -EINVAL;
+        }
+        header->need_zero_init = true;
+    } else {
+        header->need_zero_init = false;
+    }
+
+    if (data_file) {
+        pstrcpy(header->data_file, 1024, data_file);
+        if (data_file_fmt) {
+            pstrcpy(header->data_file_fmt, 16, data_file_fmt);
+        }
+    }
+
+    header->magic = FVD_MAGIC;
+    header->last_open_version = header->create_version = FVD_VERSION;
+    header->virtual_disk_size = virtual_disk_size;
+    header->clean_shutdown = true;
+
+    if (!base_img) {
+        header->base_img_fully_prefetched = true;
+    } else {
+        /* Handle base image. */
+        int ret;
+
+        bs = bdrv_new("");
+        if (!bs) {
+            fprintf(stderr, "Failed to create a new block driver\n");
+            return -EIO;
+        }
+
+        pstrcpy(header->base_img, 1024, base_img);
+        if (base_img_fmt) {
+            pstrcpy(header->base_img_fmt, 16, base_img_fmt);
+            BlockDriver *drv = bdrv_find_format(base_img_fmt);
+            if (!drv) {
+                fprintf(stderr, "Failed to find driver for format '%s'\n",
+                        base_img_fmt);
+                return -EINVAL;
+            }
+            ret = bdrv_open(bs, base_img, 0, drv);
+        } else {
+            ret = bdrv_open(bs, base_img, 0, NULL);
+        }
+
+        if (ret < 0) {
+            fprintf(stderr, "Failed to open the base image %s\n", base_img);
+            return -EIO;
+        }
+
+        base_img_size = bdrv_getlength(bs);
+        base_img_size = MIN(virtual_disk_size, base_img_size);
+        base_img_size = ROUND_UP(base_img_size, 512);
+
+        if (block_size <= 0) {
+            /* No block size is provided. Find the smallest block size that
+             * does not make the bitmap too big. */
+            block_size = 512;
+            while (1) {
+                int64_t blocks = (base_img_size + block_size - 1) / block_size;
+                bitmap_size = (blocks + 7) / 8;
+                if (bitmap_size <= MODERATE_BITMAP_SIZE) {
+                    break;
+                }
+                block_size *= 2;
+            }
+        } else {
+            block_size = ROUND_UP(block_size, 512);
+            int64_t blocks = (base_img_size + block_size - 1) / block_size;
+            bitmap_size = (blocks + 7) / 8;
+        }
+
+        bitmap_size = ROUND_UP(bitmap_size, DEF_PAGE_SIZE);
+        header->bitmap_size = bitmap_size;
+        header->block_size = block_size;
+        header->bitmap_offset = header_size;
+        header->base_img_size = base_img_size;
+        header->max_outstanding_copy_on_read_data = max_copy_on_read;
+        header->copy_on_read = copy_on_read;
+        header->prefetch_start_delay = prefetch_start_delay;
+        header->num_prefetch_slots = num_prefetch_slots;
+        header->bytes_per_prefetch = ROUND_UP(bytes_per_prefetch, block_size);
+        header->prefetch_throttle_time = prefetch_throttle_time;
+        header->prefetch_read_throughput_measure_time =
+                                        prefetch_read_measure_time;
+        header->prefetch_write_throughput_measure_time =
+                                        prefetch_write_measure_time;
+        header->prefetch_min_read_throughput = prefetch_min_read_throughput;
+        header->prefetch_min_write_throughput = prefetch_min_write_throughput;
+        header->prefetch_max_read_throughput = prefetch_max_read_throughput;
+        header->prefetch_max_write_throughput = prefetch_max_write_throughput;
+        header->base_img_fully_prefetched = false;
+    }
+
+    /* Set the table size. */
+    if (compact_image) {
+        if (header->chunk_size <= 0) {
+            header->chunk_size = CHUNK_SIZE;
+        }
+        if (base_img) {
+            /* chunk_size must be a multiple of block_size. */
+            header->chunk_size = ROUND_UP(header->chunk_size, block_size);
+        } else {
+            header->chunk_size = ROUND_UP(header->chunk_size, DEF_PAGE_SIZE);
+        }
+
+        if (header->storage_grow_unit <= 0) {
+            header->storage_grow_unit = STORAGE_GROW_UNIT;
+        }
+        if (header->storage_grow_unit < header->chunk_size) {
+            header->storage_grow_unit = header->chunk_size;
+        }
+        int64_t table_entries =
+            (virtual_disk_size + header->chunk_size - 1) / header->chunk_size;
+        table_size = sizeof(uint32_t) * table_entries;
+        table_size = ROUND_UP(table_size, DEF_PAGE_SIZE);
+        if (table_size > 0) {
+            header->table_size = table_size;
+        }
+    }
+
+    /* Set the journal size. */
+    if (bitmap_size <= 0 && table_size <= 0) {
+        header->journal_size = 0;       /* No need to use journal. */
+    } else if (journal_size < 0) {
+        /* Disable the use of journal, which reduces overhead but may cause
+         * data corruption if the host crashes. This is a valid configuration
+         * for some use cases, where data integrity is not critical.  */
+        header->journal_size = 0;
+    } else {
+        if (journal_size == 0) {
+            /* No journal size is specified. Use a default size. */
+            journal_size = JOURNAL_SIZE;
+        }
+        if (table_size > 0) {
+            /* Make sure that the journal is at least large enough to record
+             * all table changes in one shot, which is the extremely unlikely
+             * worst case. */
+            int64_t vsize = virtual_disk_size + header->chunk_size - 1;
+            int64_t table_entries = vsize / header->chunk_size;
+            int64_t min_journal_size = calc_min_journal_size(table_entries);
+            if (journal_size < min_journal_size) {
+                journal_size = min_journal_size;
+            }
+        }
+        journal_size = ROUND_UP(journal_size, DEF_PAGE_SIZE);
+        header->journal_size = journal_size;
+        header->journal_offset = header_size + bitmap_size;
+
+        if (header->journal_buf_size <= 0) {
+            header->journal_buf_size = JOURNAL_BUF_SIZE;
+        }
+        header->journal_buf_size = ROUND_UP(header->journal_buf_size, 512);
+        if (header->journal_buf_size > header->journal_size) {
+            header->journal_buf_size = header->journal_size;
+        }
+        if (header->journal_clean_buf_period == 0) {
+            header->journal_clean_buf_period = JOURNAL_CLEAN_BUF_PERIOD;
+        }
+    }
+
+    if (table_size > 0) {
+        /* Table is located right before the data region. When expanding the
+         * size of an existing FVD image, the table can be expanded to borrow
+         * space from the data region, by relocating some data chunks. */
+        header->table_offset = header_size + bitmap_size + journal_size;
+    }
+
+    header->data_offset = header_size + bitmap_size + table_size +
+        MAX(0, journal_size);
+
+    /* Create the image file. */
+    fd = qemu_open(filename, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, 0644);
+    if (fd < 0) {
+        fprintf(stderr, "Failed to open %s\n", filename);
+        goto fail;
+    }
+    fvd_header_cpu_to_le(header);
+
+    if (qemu_write_full(fd, header, header_size) != header_size) {
+        fprintf(stderr, "Failed to write the header of %s\n", filename);
+        goto fail;
+    }
+
+    /* Initialize the bitmap. */
+    if (bitmap_size > 0) {
+        uint8_t *bitmap = my_qemu_mallocz(bitmap_size);
+        if (hole_size > 0) {
+            if ((ret = search_empty_blocks(fd, bitmap, bs, base_img_size / 512,
+                                           hole_size, block_size))) {
+                goto fail;
+            }
+        }
+
+        ret = qemu_write_full(fd, bitmap, bitmap_size);
+        my_qemu_free(bitmap);
+        if (ret != bitmap_size) {
+            fprintf(stderr, "Failed to zero out the bitmap of %s\n", filename);
+            goto fail;
+        }
+    }
+
+    /* Initialize the journal. */
+    if (journal_size > 0) {
+        uint8_t *empty_journal = my_qemu_malloc(journal_size);
+        memset(empty_journal, 0xA5, journal_size); /* EMPTY_JRECORD */
+        ret = qemu_write_full(fd, empty_journal, journal_size);
+        my_qemu_free(empty_journal);
+        if (ret != journal_size) {
+            fprintf(stderr, "Failed to initialize the journal\n");
+            goto fail;
+        }
+    }
+
+    /* Initialize the table. */
+    if (table_size > 0) {
+        /* Set all entries to EMPTY_TABLE (0xFFFFFFFF). */
+        uint8_t *empty_table = my_qemu_malloc(table_size);
+        memset(empty_table, 0xFF, table_size);
+        ret = qemu_write_full(fd, empty_table, table_size);
+        my_qemu_free(empty_table);
+        if (ret != table_size) {
+            fprintf(stderr, "Failed to write the table of %s\n.", filename);
+            goto fail;
+        }
+    }
+
+    if (bs) {
+        bdrv_close(bs);
+    }
+    my_qemu_free(header);
+    return 0;
+
+fail:
+    if (bs) {
+        bdrv_close(bs);
+    }
+    close(fd);
+    my_qemu_free(header);
+    return -EIO;
 }
 
+/* For the optimization called "free write to zero-filled blocks". See Section
+ * 3.3.3 of the FVD-cow paper. Briefly, it finds zero-filled blocks in the
+ * base image and sets the corresponding bits in the bitmap to one. */
+static inline int search_empty_blocks(int fd, uint8_t * bitmap,
+                                      BlockDriverState * bs,
+                                      int64_t nb_sectors,
+                                      int32_t hole_size,
+                                      int32_t block_size)
+{
+    printf("Searching empty blocks in the base image. Please wait...");
+    fflush(stdout);
+
+    if (hole_size < block_size) {
+        hole_size = block_size;
+    }
+    hole_size = ROUND_UP(hole_size, block_size);
+    nb_sectors = ROUND_DOWN(nb_sectors, hole_size);
+    const int sectors_per_hole = hole_size / 512;
+    const int sectors_per_block = block_size / 512;
+    int num_int64_in_hole = hole_size / 8;
+    int64_t hole_count = 0;
+    int i;
+    int64_t sec = 0;
+    uint8_t *p = my_qemu_blockalign(bs, hole_size);
+
+    while (sec < nb_sectors) {
+        int64_t *q;
+
+        if (bdrv_read(bs, sec, p, sectors_per_hole) < 0) {
+            fprintf(stderr, "Error in reading the base image\n");
+            my_qemu_vfree(p);
+            return -EIO;
+        }
+
+        /* All zeros? */
+        q = (int64_t *) p;
+        for (i = 0; i < num_int64_in_hole; i++) {
+            if (*q != 0) {
+                break;
+            }
+            q++;
+        }
+
+        if (i < num_int64_in_hole) {
+            /* This is not a hole. */
+            sec += sectors_per_hole;
+        } else {
+            /* These  sectors consist of only zeros.  Set the flag to
+             * indicate that there is no need to read this sector from the
+             * base image.  See Section 3.3.3 of the FVD-cow paper for the
+             * rationale. */
+            hole_count++;
+            int64_t end = sec + sectors_per_hole;
+            while (sec < end) {
+                int block_num = sec / sectors_per_block;
+                int64_t bitmap_byte_offset = block_num / 8;
+                uint8_t bitmap_bit_offset = block_num % 8;
+                int8_t mask = (uint8_t) (0x01 << bitmap_bit_offset);
+                uint8_t b = bitmap[bitmap_byte_offset];
+                if (!(b & mask)) {
+                    b |= mask;
+                    bitmap[bitmap_byte_offset] |= mask;
+                }
+                sec += sectors_per_block;
+            }
+        }
+    }
+
+    printf("\nFound %"PRId64" zero-filled empty regions.\n", hole_count);
+    my_qemu_vfree(p);
+    return 0;
+}
+
+static void fvd_header_cpu_to_le(FvdHeader * header)
+{
+    cpu_to_le32s(&header->magic);
+    cpu_to_le32s(&header->header_size);
+    cpu_to_le32s(&header->create_version);
+    cpu_to_le32s(&header->last_open_version);
+    cpu_to_le32s((uint32_t *) & header->base_img_fully_prefetched);
+    cpu_to_le64s((uint64_t *) & header->data_offset);
+    cpu_to_le64s((uint64_t *) & header->virtual_disk_size);
+    cpu_to_le64s((uint64_t *) & header->base_img_size);
+    cpu_to_le64s((uint64_t *) & header->max_outstanding_copy_on_read_data);
+    cpu_to_le64s((uint64_t *) & header->bitmap_offset);
+    cpu_to_le64s((uint64_t *) & header->bitmap_size);
+    cpu_to_le32s((uint32_t *) & header->copy_on_read);
+    cpu_to_le32s((uint32_t *) & header->need_zero_init);
+    cpu_to_le64s((uint64_t *) & header->prefetch_start_delay);
+    cpu_to_le32s((uint32_t *) & header->num_prefetch_slots);
+    cpu_to_le64s((uint64_t *) & header->bytes_per_prefetch);
+    cpu_to_le64s((uint64_t *) & header->prefetch_throttle_time);
+    cpu_to_le64s((uint64_t *) & header->prefetch_read_throughput_measure_time);
+    cpu_to_le64s((uint64_t *) & header->prefetch_write_throughput_measure_time);
+    cpu_to_le64s((uint64_t *) & header->prefetch_min_read_throughput);
+    cpu_to_le64s((uint64_t *) & header->prefetch_min_write_throughput);
+    cpu_to_le64s((uint64_t *) & header->prefetch_max_read_throughput);
+    cpu_to_le64s((uint64_t *) & header->prefetch_max_write_throughput);
+    cpu_to_le64s((uint64_t *) & header->block_size);
+    cpu_to_le64s((uint64_t *) & header->chunk_size);
+    cpu_to_le64s((uint64_t *) & header->storage_grow_unit);
+    cpu_to_le64s((uint64_t *) & header->table_offset);
+    cpu_to_le32s((uint32_t *) & header->clean_shutdown);
+    cpu_to_le64s((uint64_t *) & header->journal_offset);
+    cpu_to_le64s((uint64_t *) & header->journal_size);
+    cpu_to_le64s((uint64_t *) & header->stable_journal_epoch);
+    cpu_to_le64s((uint64_t *) & header->journal_buf_size);
+    cpu_to_le64s((uint64_t *) & header->journal_clean_buf_period);
+}
+
+static void fvd_header_le_to_cpu(FvdHeader * header)
+{
+    le32_to_cpus(&header->magic);
+    le32_to_cpus(&header->header_size);
+    le32_to_cpus(&header->create_version);
+    le32_to_cpus(&header->last_open_version);
+    le32_to_cpus((uint32_t *) & header->base_img_fully_prefetched);
+    le64_to_cpus((uint64_t *) & header->data_offset);
+    le64_to_cpus((uint64_t *) & header->virtual_disk_size);
+    le64_to_cpus((uint64_t *) & header->base_img_size);
+    le64_to_cpus((uint64_t *) & header->max_outstanding_copy_on_read_data);
+    le64_to_cpus((uint64_t *) & header->bitmap_offset);
+    le64_to_cpus((uint64_t *) & header->bitmap_size);
+    le32_to_cpus((uint32_t *) & header->copy_on_read);
+    le32_to_cpus((uint32_t *) & header->need_zero_init);
+    le64_to_cpus((uint64_t *) & header->prefetch_start_delay);
+    le64_to_cpus((uint64_t *) & header->num_prefetch_slots);
+    le64_to_cpus((uint64_t *) & header->bytes_per_prefetch);
+    le64_to_cpus((uint64_t *) & header->prefetch_throttle_time);
+    le64_to_cpus((uint64_t *) & header->prefetch_read_throughput_measure_time);
+    le64_to_cpus((uint64_t *) & header->prefetch_write_throughput_measure_time);
+    le64_to_cpus((uint64_t *) & header->prefetch_min_read_throughput);
+    le64_to_cpus((uint64_t *) & header->prefetch_min_write_throughput);
+    le64_to_cpus((uint64_t *) & header->prefetch_max_read_throughput);
+    le64_to_cpus((uint64_t *) & header->prefetch_max_write_throughput);
+    le64_to_cpus((uint64_t *) & header->block_size);
+    le64_to_cpus((uint64_t *) & header->chunk_size);
+    le64_to_cpus((uint64_t *) & header->storage_grow_unit);
+    le64_to_cpus((uint64_t *) & header->table_offset);
+    le32_to_cpus((uint32_t *) & header->clean_shutdown);
+    le64_to_cpus((uint64_t *) & header->journal_offset);
+    le64_to_cpus((uint64_t *) & header->journal_size);
+    le64_to_cpus((uint64_t *) & header->stable_journal_epoch);
+    le64_to_cpus((uint64_t *) & header->journal_buf_size);
+    le64_to_cpus((uint64_t *) & header->journal_clean_buf_period);
+}
+
+/* This function can handle incompatibility issues between different FVD
+ * versions, specifically, FvdHeader might have different sizes. */
+static int read_fvd_header(BDRVFvdState * s, FvdHeader * header)
+{
+    /* FvdHeader of different FVD versions might have different sizes. Read
+     * header->header_size first. */
+    if (bdrv_pread(s->fvd_metadata, 0, header, 512) != 512) {
+        fprintf(stderr, "Failed to read the FVD header.\n");
+        return -EIO;
+    }
+
+    /* Now read the part of FvdHeader that is commonly understandable to the
+     * FVD version that created the image and this FVD version. */
+    le32_to_cpus(&header->header_size);
+    int common_size = MIN(header->header_size, sizeof(FvdHeader));
+    if (bdrv_pread(s->fvd_metadata, 0, header, common_size) != common_size) {
+        fprintf(stderr, "Failed to read the FVD header.\n");
+        return -EIO;
+    }
+
+    fvd_header_le_to_cpu(header);
+
+    if (header->magic != FVD_MAGIC) {
+        fprintf(stderr, "Error: image does not have the correct FVD format "
+                "magic number in header\n");
+        return -EIO;
+    }
+
+    return 0;
+}
+
+/* This function can handle incompatibility issues between different FVD
+ * versions, specifically, FvdHeader might have different sizes. */
+static int update_fvd_header(BDRVFvdState * s, FvdHeader * header)
+{
+    /* FvdHeader of different FVD versions might have different sizes. Only
+     * write the part of FvdHeader that is commonly understandable to the
+     * FVD version that created the image and this FVD version. */
+    int common_size = MIN(header->header_size, sizeof(FvdHeader));
+    fvd_header_cpu_to_le(header);
+    int ret = bdrv_pwrite(s->fvd_metadata, 0, header, common_size);
+
+    if (ret != common_size) {
+        fprintf(stderr, "Failed to update the FVD header.\n");
+        ASSERT(false);
+        return -EIO;
+    }
+
+    return 0;
+}
+
+static inline void update_clean_shutdown_flag(BDRVFvdState * s, int clean)
+{
+    FvdHeader header;
+    if (!read_fvd_header(s, &header)) {
+        header.last_open_version = FVD_VERSION;
+        header.clean_shutdown = clean;
+
+        if (!update_fvd_header(s, &header)) {
+            QDEBUG("Set clean_shutdown to %s\n", BOOL(clean));
+            if (bdrv_flush(s->fvd_metadata)) {
+                s->metadata_err_prohibit_write = true;
+            }
+        }
+    }
+}
+
+
 static QEMUOptionParameter fvd_create_options[] = {
+    {
+     .name = BLOCK_OPT_SIZE,
+     .type = OPT_SIZE,
+     .help = "Virtual disk size"},
+    {
+     .name = "compact_image",
+     .type = OPT_FLAG,
+     .help = "compact_image=on|off"},
+    {
+     .name = "block_size",
+     .type = OPT_SIZE,
+     .help = "Block size"},
+    {
+     .name = "chunk_size",
+     .type = OPT_SIZE,
+     .help = "Chunk size"},
+    {
+     .name = "storage_grow_unit",
+     .type = OPT_SIZE,
+     .help = "Storage grow unit"},
+    {
+     .name = "add_storage_cmd",
+     .type = OPT_STRING,
+     .help = "Command to add storage when running out of space"},
+    {
+     .name = BLOCK_OPT_BACKING_FILE,
+     .type = OPT_STRING,
+     .help = "File name of a backing image"},
+    {
+     .name = BLOCK_OPT_BACKING_FMT,
+     .type = OPT_STRING,
+     .help = "Image format of the backing image"},
+    {
+     .name = "data_file",
+     .type = OPT_STRING,
+     .help = "File name of a data file"},
+    {
+     .name = "data_file_fmt",
+     .type = OPT_STRING,
+     .help = "Image format of the data file"},
+    {
+     .name = "copy_on_read",
+     .type = OPT_FLAG,
+     .help = "copy_on_read=on|off"},
+    {
+     .name = "prefetch_start_delay",
+     .type = OPT_NUMBER,
+     .help = "Delay in seconds before starting whole image prefetching. "},
+    {
+     .name = "journal_size",
+     .type = OPT_SIZE,
+     .help = "Journal size"},
+    {
+     .name = "max_outstanding_copy_on_read_data",
+     .type = OPT_SIZE,
+     .help = "copy_on_read is temporarily disabled when the unsaved data "
+     "exceed this threshold (in bytes)"},
+    {
+     .name = "journal_buf_size",
+     .type = OPT_SIZE,
+     .help = "size of in-memory journal buffer (in bytes)"},
+    {
+     .name = "journal_clean_buf_period",
+     .type = OPT_NUMBER,
+     .help = "(milliseconds)"},
+    {
+     .name = "num_prefetch_slots",
+     .type = OPT_NUMBER,
+     .help = "number of concurrent prefetches allowed"},
+    {
+     .name = "bytes_per_prefetch",
+     .type = OPT_NUMBER,
+     .help = "data to read per prefetch"},
+    {
+     .name = "bytes_per_prefetch",
+     .type = OPT_NUMBER,
+     .help = "data to read per prefetch"},
+    {
+     .name = "prefetch_over_threshold_throttle_time",
+     .type = OPT_NUMBER,
+     .help = "in milliseconds"},
+    {
+     .name = "prefetch_read_throughput_measure_time",
+     .type = OPT_NUMBER,
+     .help = "in milliseconds"},
+    {
+     .name = "prefetch_write_throughput_measure_time",
+     .type = OPT_NUMBER,
+     .help = "in milliseconds"},
+    {
+     .name = "prefetch_min_read_throughput",
+     .type = OPT_NUMBER,
+     .help = "in KB/s"},
+    {
+     .name = "prefetch_max_read_throughput",
+     .type = OPT_NUMBER,
+     .help = "in KB/s"},
+    {
+     .name = "prefetch_min_write_throughput",
+     .type = OPT_NUMBER,
+     .help = "in KB/s"},
+    {
+     .name = "prefetch_max_write_throughput",
+     .type = OPT_NUMBER,
+     .help = "in KB/s"},
+    {
+     .name = "optimize_empty_block",
+     .type = OPT_SIZE,
+     .help = "Minimum size (in bytes) of a zero-filled region whose state will "
+     "be preset in the bitmap. Settingto 0 turns off this optimization"},
     {NULL}
 };
diff --git a/block/fvd-journal.c b/block/fvd-journal.c
index 5824e35..246f425 100644
--- a/block/fvd-journal.c
+++ b/block/fvd-journal.c
@@ -17,6 +17,11 @@  static bool emulate_host_crash = true;
 static bool emulate_host_crash = false;
 #endif
 
+static inline int64_t calc_min_journal_size(int64_t table_entries)
+{
+    return 512;
+}
+
 void fvd_emulate_host_crash(bool cond)
 {
     emulate_host_crash = cond;
diff --git a/block/fvd.c b/block/fvd.c
index 13fe940..d6263e7 100644
--- a/block/fvd.c
+++ b/block/fvd.c
@@ -27,13 +27,13 @@ 
  * function optimization. */
 #include "block/fvd-debug.c"
 #include "block/fvd-flush.c"
-#include "block/fvd-update.c"
 #include "block/fvd-misc.c"
 #include "block/fvd-create.c"
 #include "block/fvd-open.c"
 #include "block/fvd-read.c"
 #include "block/fvd-write.c"
 #include "block/fvd-journal.c"
+#include "block/fvd-update.c"
 
 static BlockDriver bdrv_fvd = {
     .format_name = "fvd",