Patchwork [v8,10/12] VMDK: create different subformats

login
register
mail settings
Submitter Feiran Zheng
Date July 5, 2011, 11:31 a.m.
Message ID <1309865478-32766-11-git-send-email-famcool@gmail.com>
Download mbox | patch
Permalink /patch/103262/
State New
Headers show

Comments

Feiran Zheng - July 5, 2011, 11:31 a.m.
Add create option 'format', with enums:
    monolithicSparse
    monolithicFlat
    twoGbMaxExtentSparse
    twoGbMaxExtentFlat
Each creates a subformat image file. The default is monolithiSparse.

Signed-off-by: Fam Zheng <famcool@gmail.com>
---
 block/vmdk.c |  561 ++++++++++++++++++++++++++++++++++------------------------
 block_int.h  |    1 +
 2 files changed, 330 insertions(+), 232 deletions(-)
Stefan Hajnoczi - July 8, 2011, 3:29 p.m.
On Tue, Jul 5, 2011 at 12:31 PM, Fam Zheng <famcool@gmail.com> wrote:
> Add create option 'format', with enums:

The -drive format=... option exists in QEMU today to specify the image
format of a file.  I think adding a format=... creation option may
lead to confusion.

How about subformat=... or type=...?

> Each creates a subformat image file. The default is monolithiSparse.

s/monolithiSparse/monolithicSparse/

> @@ -243,168 +243,6 @@ static int vmdk_is_cid_valid(BlockDriverState *bs)
>     return 1;
>  }
>
> -static int vmdk_snapshot_create(const char *filename, const char *backing_file)

Is this function really not needed anymore?

> @@ -1189,28 +990,317 @@ static int vmdk_create(const char *filename, QEMUOptionParameter *options)
>         }
>     }
>
> -    /* compose the descriptor */
> -    real_filename = filename;
> -    if ((temp_str = strrchr(real_filename, '\\')) != NULL)
> -        real_filename = temp_str + 1;
> -    if ((temp_str = strrchr(real_filename, '/')) != NULL)
> -        real_filename = temp_str + 1;
> -    if ((temp_str = strrchr(real_filename, ':')) != NULL)
> -        real_filename = temp_str + 1;
> -    snprintf(desc, sizeof(desc), desc_template, (unsigned int)time(NULL),
> -             total_size, real_filename,
> -             (flags & BLOCK_FLAG_COMPAT6 ? 6 : 4),
> -             total_size / (int64_t)(63 * 16));
> -
> -    /* write the descriptor */
> -    lseek(fd, le64_to_cpu(header.desc_offset) << 9, SEEK_SET);
> -    ret = qemu_write_full(fd, desc, strlen(desc));
> -    if (ret != strlen(desc)) {
> +    filesize -= filesize;

What is the point of setting filesize to zero?

> +    ret = 0;
> + exit:
> +    close(fd);
> +    return ret;
> +}
> +
> +static int vmdk_create_flat(const char *filename, int64_t filesize)
> +{
> +    int fd, ret;
> +
> +    fd = open(
> +            filename,
> +            O_WRONLY | O_CREAT | O_TRUNC | O_BINARY | O_LARGEFILE,
> +            0644);
> +    if (fd < 0) {
> +        return -errno;
> +    }
> +    ret = ftruncate(fd, filesize);
> +    if (ret) {
>         ret = -errno;
> -        goto exit;
> +        close(fd);
> +        return -errno;

errno is a global variable that may be modified by any errno-using
library function.  Its value may be changed by close(2) (even if there
is no error closing the fd).  Therefore please do return ret instead
of return -errno.

>     }
> +    close(fd);
> +    return 0;
> +}
>
> -    ret = 0;
> +static int filename_decompose(const char *filename, char *path, char *prefix,
> +        char *postfix, int buf_len)

Memory sizes (e.g. buffer size) should be size_t (which is unsigned)
instead of int.

> +{
> +    const char *p, *q;
> +
> +    if (filename == NULL || !strlen(filename)) {
> +        fprintf(stderr, "Vmdk: wrong filename (%s)\n", filename);

Printing filename doesn't make sense since filename is either NULL or
"".  Also note that fprintf(..., "%s", NULL) is undefined and may
crash on some platforms (e.g. Solaris).

> +        return -1;
> +    }
> +    p = strrchr(filename, '/');
> +    if (p == NULL) {
> +        p = strrchr(filename, '\\');
> +    }
> +    if (p == NULL) {
> +        p = strrchr(filename, ':');
> +    }
> +    if (p != NULL) {
> +        p++;
> +        if (p - filename >= buf_len) {
> +            return -1;
> +        }
> +        strncpy(path, filename, p - filename);
> +        path[p - filename] = 0;
> +    } else {
> +        p = filename;
> +        path[0] = '\0';
> +    }
> +    q = strrchr(p, '.');
> +    if (q == NULL) {
> +        pstrcpy(prefix, buf_len, p);
> +        postfix[0] = '\0';
> +    } else {

No check for prefix buf_len here.  Imagine filename has no '/', '\\',
or ':' but it does have a '.'.  It is possible to overflow prefix.

> +        strncpy(prefix, p, q - p);
> +        prefix[q - p] = '\0';
> +        pstrcpy(postfix, buf_len, q);
> +    }
> +    return 0;
> +}
> +
> +static int relative_path(char *dest, int dest_size,
> +        const char *base, const char *target)
> +{
> +    int i = 0;
> +    int n = 0;
> +    const char *p, *q;
> +#ifdef _WIN32
> +    const char *sep = "\\";
> +#else
> +    const char *sep = "/";
> +#endif
> +
> +    if (!(dest && base && target)) {
> +        return -1;
> +    }
> +    if (path_is_absolute(target)) {
> +        dest[dest_size - 1] = '\0';
> +        strncpy(dest, target, dest_size - 1);
> +        return 0;
> +    }
> +    while (base[i] == target[i]) {
> +        i++;
> +    }
> +    p = &base[i];
> +    q = &target[i];
> +    while (*p) {
> +        if (*p == *sep) {
> +            n++;
> +        }
> +        p++;
> +    }
> +    dest[0] = '\0';
> +    for (; n; n--) {
> +        pstrcat(dest, dest_size, "..");
> +        pstrcat(dest, dest_size, sep);
> +    }
> +    pstrcat(dest, dest_size, q);
> +    return 0;
> +}
> +
> +static int vmdk_create(const char *filename, QEMUOptionParameter *options)
> +{
> +    int fd = -1;
> +    char desc[4096];
> +    int64_t total_size = 0;
> +    const char *backing_file = NULL;
> +    const char *fmt = NULL;
> +    int flags = 0;
> +    int ret = 0;
> +    char ext_desc_lines[1024] = "";
> +    char path[1024], prefix[1024], postfix[1024];
> +    const int64_t split_size = 0x80000000;  /* VMDK has constant split size */
> +    const char desc_template[] =
> +        "# Disk DescriptorFile\n"
> +        "version=1\n"
> +        "CID=%x\n"
> +        "parentCID=%x\n"
> +        "createType=\"%s\"\n"
> +        "%s"
> +        "\n"
> +        "# Extent description\n"
> +        "%s"
> +        "\n"
> +        "# The Disk Data Base\n"
> +        "#DDB\n"
> +        "\n"
> +        "ddb.virtualHWVersion = \"%d\"\n"
> +        "ddb.geometry.cylinders = \"%" PRId64 "\"\n"
> +        "ddb.geometry.heads = \"16\"\n"
> +        "ddb.geometry.sectors = \"63\"\n"
> +        "ddb.adapterType = \"ide\"\n";
> +
> +    if (filename_decompose(filename, path, prefix, postfix, 1024)) {

Please don't hardcode buffer sizes, if one of path, prefix, postfix
ever needs to be changed then this code also needs to be updated.  I
suggest defining a constant and using it everywhere.

> +        return -EINVAL;
> +    }
> +    /* Read out options */
> +    while (options && options->name) {
> +        if (!strcmp(options->name, BLOCK_OPT_SIZE)) {
> +            total_size = options->value.n;
> +        } else if (!strcmp(options->name, BLOCK_OPT_BACKING_FILE)) {
> +            backing_file = options->value.s;
> +        } else if (!strcmp(options->name, BLOCK_OPT_COMPAT6)) {
> +            flags |= options->value.n ? BLOCK_FLAG_COMPAT6 : 0;
> +        } else if (!strcmp(options->name, BLOCK_OPT_FMT)) {
> +            fmt = options->value.s;
> +        }
> +        options++;
> +    }
> +    if (!fmt) {
> +        fmt = "monolithicSparse";
> +    }
> +    if (!strcmp(fmt, "monolithicFlat") || !strcmp(fmt, "twoGbMaxExtentFlat")) {
> +        bool split = strcmp(fmt, "monolithicFlat");
> +        const char desc_line_templ[] = "RW %lld FLAT \"%s\" 0\n";
> +        int64_t filesize = total_size;
> +        int idx = 1;
> +
> +        if (backing_file) {
> +            /* not supporting backing file for flat image */
> +            return -ENOTSUP;
> +        }
> +        while (filesize > 0) {
> +            char desc_line[1024];
> +            char ext_filename[1024];
> +            char desc_filename[1024];

Buffer sizes again.

> +            int64_t size = filesize;
> +
> +            if (split && size > split_size) {
> +                size = split_size;
> +            }
> +            if (split) {
> +                sprintf(desc_filename, "%s-f%03d%s",
> +                        prefix, idx++, postfix);

snprintf?

> +                sprintf(ext_filename, "%s%s",
> +                        path, desc_filename);
> +            } else {
> +                sprintf(desc_filename, "%s-flat%s",
> +                        prefix, postfix);
> +                sprintf(ext_filename, "%s%s",
> +                        path, desc_filename);
> +            }
> +            if (vmdk_create_flat(ext_filename, size)) {
> +                return -EINVAL;
> +            }
> +            filesize -= size;
> +
> +            /* Format description line */
> +            snprintf(desc_line, 1024,
> +                        desc_line_templ, size / 512, desc_filename);

Here sizeof(desc_line) should be used as the buffer size to avoid
duplicating it.  Same thing below in the code.

> +            pstrcat(ext_desc_lines, sizeof(ext_desc_lines), desc_line);
> +        }
> +
> +        /* generate descriptor file */
> +        snprintf(desc, sizeof(desc), desc_template,
> +                (unsigned int)time(NULL),
> +                0xffffffff,
> +                fmt,
> +                "",
> +                ext_desc_lines,
> +                (flags & BLOCK_FLAG_COMPAT6 ? 6 : 4),
> +                total_size / (int64_t)(63 * 16 * 512));
> +        fd = open(
> +                filename,
> +                O_WRONLY | O_CREAT | O_TRUNC | O_BINARY | O_LARGEFILE,
> +                0644);
> +        if (fd < 0) {
> +            return -errno;
> +        }
> +        ret = qemu_write_full(fd, desc, strlen(desc));
> +        if (ret != strlen(desc)) {
> +            ret = -errno;
> +            goto exit;
> +        }
> +        ret = 0;
> +    } else if (!strcmp(fmt, "monolithicSparse")
> +                || !strcmp(fmt, "twoGbMaxExtentSparse")) {
> +        int ret;
> +        int fd = 0;
> +        int idx = 1;
> +        int64_t filesize = total_size;
> +        const char desc_line_templ[] = "RW %lld SPARSE \"%s\"\n";
> +        char desc_line[1024];
> +        char desc_filename[1024];
> +        char ext_filename[1024];
> +        bool split = strcmp(fmt, "monolithicSparse");
> +        char parent_desc_line[1024] = "";
> +        uint32_t parent_cid = 0xffffffff;
> +
> +        if (backing_file) {
> +            char parent_filename[1024];
> +            BlockDriverState *bs = bdrv_new("");
> +            ret = bdrv_open(bs, backing_file, 0, NULL);
> +            if (ret != 0) {
> +                bdrv_delete(bs);
> +                return ret;
> +            }
> +            filesize = bdrv_getlength(bs);
> +            parent_cid = vmdk_read_cid(bs, 0);

This assumes that the backing file is vmdk.  Where was that checked?

Stefan
Feiran Zheng - July 9, 2011, 12:09 p.m.
On Fri, Jul 8, 2011 at 11:29 PM, Stefan Hajnoczi <stefanha@gmail.com> wrote:
> On Tue, Jul 5, 2011 at 12:31 PM, Fam Zheng <famcool@gmail.com> wrote:
>> Add create option 'format', with enums:
>
> The -drive format=... option exists in QEMU today to specify the image
> format of a file.  I think adding a format=... creation option may
> lead to confusion.
>
> How about subformat=... or type=...?
>
>> Each creates a subformat image file. The default is monolithiSparse.
>
> s/monolithiSparse/monolithicSparse/
>
>> @@ -243,168 +243,6 @@ static int vmdk_is_cid_valid(BlockDriverState *bs)
>>     return 1;
>>  }
>>
>> -static int vmdk_snapshot_create(const char *filename, const char *backing_file)
>
> Is this function really not needed anymore?
>
I think so. This function is not used to create snapshot, actually
it's called for backing file. It copies header and L1 table, allocate
L2 tables and leave them zero. This is equivalent to creating a new
extent. The only difference is to set descriptor options parentCID and
parantFileNameHint.

>> @@ -1189,28 +990,317 @@ static int vmdk_create(const char *filename, QEMUOptionParameter *options)
>>         }
>>     }
>>
>> -    /* compose the descriptor */
>> -    real_filename = filename;
>> -    if ((temp_str = strrchr(real_filename, '\\')) != NULL)
>> -        real_filename = temp_str + 1;
>> -    if ((temp_str = strrchr(real_filename, '/')) != NULL)
>> -        real_filename = temp_str + 1;
>> -    if ((temp_str = strrchr(real_filename, ':')) != NULL)
>> -        real_filename = temp_str + 1;
>> -    snprintf(desc, sizeof(desc), desc_template, (unsigned int)time(NULL),
>> -             total_size, real_filename,
>> -             (flags & BLOCK_FLAG_COMPAT6 ? 6 : 4),
>> -             total_size / (int64_t)(63 * 16));
>> -
>> -    /* write the descriptor */
>> -    lseek(fd, le64_to_cpu(header.desc_offset) << 9, SEEK_SET);
>> -    ret = qemu_write_full(fd, desc, strlen(desc));
>> -    if (ret != strlen(desc)) {
>> +    filesize -= filesize;
>
> What is the point of setting filesize to zero?
>
>> +    ret = 0;
>> + exit:
>> +    close(fd);
>> +    return ret;
>> +}
>> +
>> +static int vmdk_create_flat(const char *filename, int64_t filesize)
>> +{
>> +    int fd, ret;
>> +
>> +    fd = open(
>> +            filename,
>> +            O_WRONLY | O_CREAT | O_TRUNC | O_BINARY | O_LARGEFILE,
>> +            0644);
>> +    if (fd < 0) {
>> +        return -errno;
>> +    }
>> +    ret = ftruncate(fd, filesize);
>> +    if (ret) {
>>         ret = -errno;
>> -        goto exit;
>> +        close(fd);
>> +        return -errno;
>
> errno is a global variable that may be modified by any errno-using
> library function.  Its value may be changed by close(2) (even if there
> is no error closing the fd).  Therefore please do return ret instead
> of return -errno.
>
>>     }
>> +    close(fd);
>> +    return 0;
>> +}
>>
>> -    ret = 0;
>> +static int filename_decompose(const char *filename, char *path, char *prefix,
>> +        char *postfix, int buf_len)
>
> Memory sizes (e.g. buffer size) should be size_t (which is unsigned)
> instead of int.
>
>> +{
>> +    const char *p, *q;
>> +
>> +    if (filename == NULL || !strlen(filename)) {
>> +        fprintf(stderr, "Vmdk: wrong filename (%s)\n", filename);
>
> Printing filename doesn't make sense since filename is either NULL or
> "".  Also note that fprintf(..., "%s", NULL) is undefined and may
> crash on some platforms (e.g. Solaris).
>
>> +        return -1;
>> +    }
>> +    p = strrchr(filename, '/');
>> +    if (p == NULL) {
>> +        p = strrchr(filename, '\\');
>> +    }
>> +    if (p == NULL) {
>> +        p = strrchr(filename, ':');
>> +    }
>> +    if (p != NULL) {
>> +        p++;
>> +        if (p - filename >= buf_len) {
>> +            return -1;
>> +        }
>> +        strncpy(path, filename, p - filename);
>> +        path[p - filename] = 0;
>> +    } else {
>> +        p = filename;
>> +        path[0] = '\0';
>> +    }
>> +    q = strrchr(p, '.');
>> +    if (q == NULL) {
>> +        pstrcpy(prefix, buf_len, p);
>> +        postfix[0] = '\0';
>> +    } else {
>
> No check for prefix buf_len here.  Imagine filename has no '/', '\\',
> or ':' but it does have a '.'.  It is possible to overflow prefix.
>
>> +        strncpy(prefix, p, q - p);
>> +        prefix[q - p] = '\0';
>> +        pstrcpy(postfix, buf_len, q);
>> +    }
>> +    return 0;
>> +}
>> +
>> +static int relative_path(char *dest, int dest_size,
>> +        const char *base, const char *target)
>> +{
>> +    int i = 0;
>> +    int n = 0;
>> +    const char *p, *q;
>> +#ifdef _WIN32
>> +    const char *sep = "\\";
>> +#else
>> +    const char *sep = "/";
>> +#endif
>> +
>> +    if (!(dest && base && target)) {
>> +        return -1;
>> +    }
>> +    if (path_is_absolute(target)) {
>> +        dest[dest_size - 1] = '\0';
>> +        strncpy(dest, target, dest_size - 1);
>> +        return 0;
>> +    }
>> +    while (base[i] == target[i]) {
>> +        i++;
>> +    }
>> +    p = &base[i];
>> +    q = &target[i];
>> +    while (*p) {
>> +        if (*p == *sep) {
>> +            n++;
>> +        }
>> +        p++;
>> +    }
>> +    dest[0] = '\0';
>> +    for (; n; n--) {
>> +        pstrcat(dest, dest_size, "..");
>> +        pstrcat(dest, dest_size, sep);
>> +    }
>> +    pstrcat(dest, dest_size, q);
>> +    return 0;
>> +}
>> +
>> +static int vmdk_create(const char *filename, QEMUOptionParameter *options)
>> +{
>> +    int fd = -1;
>> +    char desc[4096];
>> +    int64_t total_size = 0;
>> +    const char *backing_file = NULL;
>> +    const char *fmt = NULL;
>> +    int flags = 0;
>> +    int ret = 0;
>> +    char ext_desc_lines[1024] = "";
>> +    char path[1024], prefix[1024], postfix[1024];
>> +    const int64_t split_size = 0x80000000;  /* VMDK has constant split size */
>> +    const char desc_template[] =
>> +        "# Disk DescriptorFile\n"
>> +        "version=1\n"
>> +        "CID=%x\n"
>> +        "parentCID=%x\n"
>> +        "createType=\"%s\"\n"
>> +        "%s"
>> +        "\n"
>> +        "# Extent description\n"
>> +        "%s"
>> +        "\n"
>> +        "# The Disk Data Base\n"
>> +        "#DDB\n"
>> +        "\n"
>> +        "ddb.virtualHWVersion = \"%d\"\n"
>> +        "ddb.geometry.cylinders = \"%" PRId64 "\"\n"
>> +        "ddb.geometry.heads = \"16\"\n"
>> +        "ddb.geometry.sectors = \"63\"\n"
>> +        "ddb.adapterType = \"ide\"\n";
>> +
>> +    if (filename_decompose(filename, path, prefix, postfix, 1024)) {
>
> Please don't hardcode buffer sizes, if one of path, prefix, postfix
> ever needs to be changed then this code also needs to be updated.  I
> suggest defining a constant and using it everywhere.
>
>> +        return -EINVAL;
>> +    }
>> +    /* Read out options */
>> +    while (options && options->name) {
>> +        if (!strcmp(options->name, BLOCK_OPT_SIZE)) {
>> +            total_size = options->value.n;
>> +        } else if (!strcmp(options->name, BLOCK_OPT_BACKING_FILE)) {
>> +            backing_file = options->value.s;
>> +        } else if (!strcmp(options->name, BLOCK_OPT_COMPAT6)) {
>> +            flags |= options->value.n ? BLOCK_FLAG_COMPAT6 : 0;
>> +        } else if (!strcmp(options->name, BLOCK_OPT_FMT)) {
>> +            fmt = options->value.s;
>> +        }
>> +        options++;
>> +    }
>> +    if (!fmt) {
>> +        fmt = "monolithicSparse";
>> +    }
>> +    if (!strcmp(fmt, "monolithicFlat") || !strcmp(fmt, "twoGbMaxExtentFlat")) {
>> +        bool split = strcmp(fmt, "monolithicFlat");
>> +        const char desc_line_templ[] = "RW %lld FLAT \"%s\" 0\n";
>> +        int64_t filesize = total_size;
>> +        int idx = 1;
>> +
>> +        if (backing_file) {
>> +            /* not supporting backing file for flat image */
>> +            return -ENOTSUP;
>> +        }
>> +        while (filesize > 0) {
>> +            char desc_line[1024];
>> +            char ext_filename[1024];
>> +            char desc_filename[1024];
>
> Buffer sizes again.
>
>> +            int64_t size = filesize;
>> +
>> +            if (split && size > split_size) {
>> +                size = split_size;
>> +            }
>> +            if (split) {
>> +                sprintf(desc_filename, "%s-f%03d%s",
>> +                        prefix, idx++, postfix);
>
> snprintf?
>
>> +                sprintf(ext_filename, "%s%s",
>> +                        path, desc_filename);
>> +            } else {
>> +                sprintf(desc_filename, "%s-flat%s",
>> +                        prefix, postfix);
>> +                sprintf(ext_filename, "%s%s",
>> +                        path, desc_filename);
>> +            }
>> +            if (vmdk_create_flat(ext_filename, size)) {
>> +                return -EINVAL;
>> +            }
>> +            filesize -= size;
>> +
>> +            /* Format description line */
>> +            snprintf(desc_line, 1024,
>> +                        desc_line_templ, size / 512, desc_filename);
>
> Here sizeof(desc_line) should be used as the buffer size to avoid
> duplicating it.  Same thing below in the code.
>
>> +            pstrcat(ext_desc_lines, sizeof(ext_desc_lines), desc_line);
>> +        }
>> +
>> +        /* generate descriptor file */
>> +        snprintf(desc, sizeof(desc), desc_template,
>> +                (unsigned int)time(NULL),
>> +                0xffffffff,
>> +                fmt,
>> +                "",
>> +                ext_desc_lines,
>> +                (flags & BLOCK_FLAG_COMPAT6 ? 6 : 4),
>> +                total_size / (int64_t)(63 * 16 * 512));
>> +        fd = open(
>> +                filename,
>> +                O_WRONLY | O_CREAT | O_TRUNC | O_BINARY | O_LARGEFILE,
>> +                0644);
>> +        if (fd < 0) {
>> +            return -errno;
>> +        }
>> +        ret = qemu_write_full(fd, desc, strlen(desc));
>> +        if (ret != strlen(desc)) {
>> +            ret = -errno;
>> +            goto exit;
>> +        }
>> +        ret = 0;
>> +    } else if (!strcmp(fmt, "monolithicSparse")
>> +                || !strcmp(fmt, "twoGbMaxExtentSparse")) {
>> +        int ret;
>> +        int fd = 0;
>> +        int idx = 1;
>> +        int64_t filesize = total_size;
>> +        const char desc_line_templ[] = "RW %lld SPARSE \"%s\"\n";
>> +        char desc_line[1024];
>> +        char desc_filename[1024];
>> +        char ext_filename[1024];
>> +        bool split = strcmp(fmt, "monolithicSparse");
>> +        char parent_desc_line[1024] = "";
>> +        uint32_t parent_cid = 0xffffffff;
>> +
>> +        if (backing_file) {
>> +            char parent_filename[1024];
>> +            BlockDriverState *bs = bdrv_new("");
>> +            ret = bdrv_open(bs, backing_file, 0, NULL);
>> +            if (ret != 0) {
>> +                bdrv_delete(bs);
>> +                return ret;
>> +            }
>> +            filesize = bdrv_getlength(bs);
>> +            parent_cid = vmdk_read_cid(bs, 0);
>
> This assumes that the backing file is vmdk.  Where was that checked?
>
> Stefan
>

Patch

diff --git a/block/vmdk.c b/block/vmdk.c
index 2183ace..0f51332 100644
--- a/block/vmdk.c
+++ b/block/vmdk.c
@@ -156,8 +156,8 @@  static int vmdk_probe(const uint8_t *buf, int buf_size, const char *filename)
 #define CHECK_CID 1
 
 #define SECTOR_SIZE 512
-#define DESC_SIZE 20*SECTOR_SIZE	// 20 sectors of 512 bytes each
-#define HEADER_SIZE 512   			// first sector of 512 bytes
+#define DESC_SIZE (20 * SECTOR_SIZE)    /* 20 sectors of 512 bytes each */
+#define HEADER_SIZE 512                 /* first sector of 512 bytes */
 
 static void vmdk_free_extents(BlockDriverState *bs)
 {
@@ -243,168 +243,6 @@  static int vmdk_is_cid_valid(BlockDriverState *bs)
     return 1;
 }
 
-static int vmdk_snapshot_create(const char *filename, const char *backing_file)
-{
-    int snp_fd, p_fd;
-    int ret;
-    uint32_t p_cid;
-    char *p_name, *gd_buf, *rgd_buf;
-    const char *real_filename, *temp_str;
-    VMDK4Header header;
-    uint32_t gde_entries, gd_size;
-    int64_t gd_offset, rgd_offset, capacity, gt_size;
-    char p_desc[DESC_SIZE], s_desc[DESC_SIZE], hdr[HEADER_SIZE];
-    static const char desc_template[] =
-    "# Disk DescriptorFile\n"
-    "version=1\n"
-    "CID=%x\n"
-    "parentCID=%x\n"
-    "createType=\"monolithicSparse\"\n"
-    "parentFileNameHint=\"%s\"\n"
-    "\n"
-    "# Extent description\n"
-    "RW %u SPARSE \"%s\"\n"
-    "\n"
-    "# The Disk Data Base \n"
-    "#DDB\n"
-    "\n";
-
-    snp_fd = open(filename, O_RDWR | O_CREAT | O_TRUNC | O_BINARY | O_LARGEFILE, 0644);
-    if (snp_fd < 0)
-        return -errno;
-    p_fd = open(backing_file, O_RDONLY | O_BINARY | O_LARGEFILE);
-    if (p_fd < 0) {
-        close(snp_fd);
-        return -errno;
-    }
-
-    /* read the header */
-    if (lseek(p_fd, 0x0, SEEK_SET) == -1) {
-        ret = -errno;
-        goto fail;
-    }
-    if (read(p_fd, hdr, HEADER_SIZE) != HEADER_SIZE) {
-        ret = -errno;
-        goto fail;
-    }
-
-    /* write the header */
-    if (lseek(snp_fd, 0x0, SEEK_SET) == -1) {
-        ret = -errno;
-        goto fail;
-    }
-    if (write(snp_fd, hdr, HEADER_SIZE) == -1) {
-        ret = -errno;
-        goto fail;
-    }
-
-    memset(&header, 0, sizeof(header));
-    memcpy(&header,&hdr[4], sizeof(header)); // skip the VMDK4_MAGIC
-
-    if (ftruncate(snp_fd, header.grain_offset << 9)) {
-        ret = -errno;
-        goto fail;
-    }
-    /* the descriptor offset = 0x200 */
-    if (lseek(p_fd, 0x200, SEEK_SET) == -1) {
-        ret = -errno;
-        goto fail;
-    }
-    if (read(p_fd, p_desc, DESC_SIZE) != DESC_SIZE) {
-        ret = -errno;
-        goto fail;
-    }
-
-    if ((p_name = strstr(p_desc,"CID")) != NULL) {
-        p_name += sizeof("CID");
-        sscanf(p_name,"%x",&p_cid);
-    }
-
-    real_filename = filename;
-    if ((temp_str = strrchr(real_filename, '\\')) != NULL)
-        real_filename = temp_str + 1;
-    if ((temp_str = strrchr(real_filename, '/')) != NULL)
-        real_filename = temp_str + 1;
-    if ((temp_str = strrchr(real_filename, ':')) != NULL)
-        real_filename = temp_str + 1;
-
-    snprintf(s_desc, sizeof(s_desc), desc_template, p_cid, p_cid, backing_file,
-             (uint32_t)header.capacity, real_filename);
-
-    /* write the descriptor */
-    if (lseek(snp_fd, 0x200, SEEK_SET) == -1) {
-        ret = -errno;
-        goto fail;
-    }
-    if (write(snp_fd, s_desc, strlen(s_desc)) == -1) {
-        ret = -errno;
-        goto fail;
-    }
-
-    gd_offset = header.gd_offset * SECTOR_SIZE;     // offset of GD table
-    rgd_offset = header.rgd_offset * SECTOR_SIZE;   // offset of RGD table
-    capacity = header.capacity * SECTOR_SIZE;       // Extent size
-    /*
-     * Each GDE span 32M disk, means:
-     * 512 GTE per GT, each GTE points to grain
-     */
-    gt_size = (int64_t)header.num_gtes_per_gte * header.granularity * SECTOR_SIZE;
-    if (!gt_size) {
-        ret = -EINVAL;
-        goto fail;
-    }
-    gde_entries = (uint32_t)(capacity / gt_size);  // number of gde/rgde
-    gd_size = gde_entries * sizeof(uint32_t);
-
-    /* write RGD */
-    rgd_buf = qemu_malloc(gd_size);
-    if (lseek(p_fd, rgd_offset, SEEK_SET) == -1) {
-        ret = -errno;
-        goto fail_rgd;
-    }
-    if (read(p_fd, rgd_buf, gd_size) != gd_size) {
-        ret = -errno;
-        goto fail_rgd;
-    }
-    if (lseek(snp_fd, rgd_offset, SEEK_SET) == -1) {
-        ret = -errno;
-        goto fail_rgd;
-    }
-    if (write(snp_fd, rgd_buf, gd_size) == -1) {
-        ret = -errno;
-        goto fail_rgd;
-    }
-
-    /* write GD */
-    gd_buf = qemu_malloc(gd_size);
-    if (lseek(p_fd, gd_offset, SEEK_SET) == -1) {
-        ret = -errno;
-        goto fail_gd;
-    }
-    if (read(p_fd, gd_buf, gd_size) != gd_size) {
-        ret = -errno;
-        goto fail_gd;
-    }
-    if (lseek(snp_fd, gd_offset, SEEK_SET) == -1) {
-        ret = -errno;
-        goto fail_gd;
-    }
-    if (write(snp_fd, gd_buf, gd_size) == -1) {
-        ret = -errno;
-        goto fail_gd;
-    }
-    ret = 0;
-
-fail_gd:
-    qemu_free(gd_buf);
-fail_rgd:
-    qemu_free(rgd_buf);
-fail:
-    close(p_fd);
-    close(snp_fd);
-    return ret;
-}
-
 static int vmdk_parent_open(BlockDriverState *bs)
 {
     char *p_name;
@@ -1059,68 +897,32 @@  static int vmdk_write(BlockDriverState *bs, int64_t sector_num,
     return 0;
 }
 
-static int vmdk_create(const char *filename, QEMUOptionParameter *options)
+
+static int vmdk_create_sparse(const char *filename, int64_t filesize)
 {
-    int fd, i;
+    int ret, i;
+    int fd = 0;
     VMDK4Header header;
     uint32_t tmp, magic, grains, gd_size, gt_size, gt_count;
-    static const char desc_template[] =
-        "# Disk DescriptorFile\n"
-        "version=1\n"
-        "CID=%x\n"
-        "parentCID=ffffffff\n"
-        "createType=\"monolithicSparse\"\n"
-        "\n"
-        "# Extent description\n"
-        "RW %" PRId64 " SPARSE \"%s\"\n"
-        "\n"
-        "# The Disk Data Base \n"
-        "#DDB\n"
-        "\n"
-        "ddb.virtualHWVersion = \"%d\"\n"
-        "ddb.geometry.cylinders = \"%" PRId64 "\"\n"
-        "ddb.geometry.heads = \"16\"\n"
-        "ddb.geometry.sectors = \"63\"\n"
-        "ddb.adapterType = \"ide\"\n";
-    char desc[1024];
-    const char *real_filename, *temp_str;
-    int64_t total_size = 0;
-    const char *backing_file = NULL;
-    int flags = 0;
-    int ret;
 
-    // Read out options
-    while (options && options->name) {
-        if (!strcmp(options->name, BLOCK_OPT_SIZE)) {
-            total_size = options->value.n / 512;
-        } else if (!strcmp(options->name, BLOCK_OPT_BACKING_FILE)) {
-            backing_file = options->value.s;
-        } else if (!strcmp(options->name, BLOCK_OPT_COMPAT6)) {
-            flags |= options->value.n ? BLOCK_FLAG_COMPAT6: 0;
-        }
-        options++;
-    }
-
-    /* XXX: add support for backing file */
-    if (backing_file) {
-        return vmdk_snapshot_create(filename, backing_file);
-    }
-
-    fd = open(filename, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY | O_LARGEFILE,
-              0644);
+    fd = open(
+        filename,
+        O_WRONLY | O_CREAT | O_TRUNC | O_BINARY | O_LARGEFILE,
+        0644);
     if (fd < 0)
         return -errno;
     magic = cpu_to_be32(VMDK4_MAGIC);
     memset(&header, 0, sizeof(header));
     header.version = 1;
     header.flags = 3; /* ?? */
-    header.capacity = total_size;
+    header.capacity = filesize / 512;
     header.granularity = 128;
     header.num_gtes_per_gte = 512;
 
-    grains = (total_size + header.granularity - 1) / header.granularity;
+    grains = (filesize / 512 + header.granularity - 1) / header.granularity;
     gt_size = ((header.num_gtes_per_gte * sizeof(uint32_t)) + 511) >> 9;
-    gt_count = (grains + header.num_gtes_per_gte - 1) / header.num_gtes_per_gte;
+    gt_count =
+        (grains + header.num_gtes_per_gte - 1) / header.num_gtes_per_gte;
     gd_size = (gt_count * sizeof(uint32_t) + 511) >> 9;
 
     header.desc_offset = 1;
@@ -1131,7 +933,6 @@  static int vmdk_create(const char *filename, QEMUOptionParameter *options)
        ((header.gd_offset + gd_size + (gt_size * gt_count) +
          header.granularity - 1) / header.granularity) *
         header.granularity;
-
     /* swap endianness for all header fields */
     header.version = cpu_to_le32(header.version);
     header.flags = cpu_to_le32(header.flags);
@@ -1189,28 +990,317 @@  static int vmdk_create(const char *filename, QEMUOptionParameter *options)
         }
     }
 
-    /* compose the descriptor */
-    real_filename = filename;
-    if ((temp_str = strrchr(real_filename, '\\')) != NULL)
-        real_filename = temp_str + 1;
-    if ((temp_str = strrchr(real_filename, '/')) != NULL)
-        real_filename = temp_str + 1;
-    if ((temp_str = strrchr(real_filename, ':')) != NULL)
-        real_filename = temp_str + 1;
-    snprintf(desc, sizeof(desc), desc_template, (unsigned int)time(NULL),
-             total_size, real_filename,
-             (flags & BLOCK_FLAG_COMPAT6 ? 6 : 4),
-             total_size / (int64_t)(63 * 16));
-
-    /* write the descriptor */
-    lseek(fd, le64_to_cpu(header.desc_offset) << 9, SEEK_SET);
-    ret = qemu_write_full(fd, desc, strlen(desc));
-    if (ret != strlen(desc)) {
+    filesize -= filesize;
+    ret = 0;
+ exit:
+    close(fd);
+    return ret;
+}
+
+static int vmdk_create_flat(const char *filename, int64_t filesize)
+{
+    int fd, ret;
+
+    fd = open(
+            filename,
+            O_WRONLY | O_CREAT | O_TRUNC | O_BINARY | O_LARGEFILE,
+            0644);
+    if (fd < 0) {
+        return -errno;
+    }
+    ret = ftruncate(fd, filesize);
+    if (ret) {
         ret = -errno;
-        goto exit;
+        close(fd);
+        return -errno;
     }
+    close(fd);
+    return 0;
+}
 
-    ret = 0;
+static int filename_decompose(const char *filename, char *path, char *prefix,
+        char *postfix, int buf_len)
+{
+    const char *p, *q;
+
+    if (filename == NULL || !strlen(filename)) {
+        fprintf(stderr, "Vmdk: wrong filename (%s)\n", filename);
+        return -1;
+    }
+    p = strrchr(filename, '/');
+    if (p == NULL) {
+        p = strrchr(filename, '\\');
+    }
+    if (p == NULL) {
+        p = strrchr(filename, ':');
+    }
+    if (p != NULL) {
+        p++;
+        if (p - filename >= buf_len) {
+            return -1;
+        }
+        strncpy(path, filename, p - filename);
+        path[p - filename] = 0;
+    } else {
+        p = filename;
+        path[0] = '\0';
+    }
+    q = strrchr(p, '.');
+    if (q == NULL) {
+        pstrcpy(prefix, buf_len, p);
+        postfix[0] = '\0';
+    } else {
+        strncpy(prefix, p, q - p);
+        prefix[q - p] = '\0';
+        pstrcpy(postfix, buf_len, q);
+    }
+    return 0;
+}
+
+static int relative_path(char *dest, int dest_size,
+        const char *base, const char *target)
+{
+    int i = 0;
+    int n = 0;
+    const char *p, *q;
+#ifdef _WIN32
+    const char *sep = "\\";
+#else
+    const char *sep = "/";
+#endif
+
+    if (!(dest && base && target)) {
+        return -1;
+    }
+    if (path_is_absolute(target)) {
+        dest[dest_size - 1] = '\0';
+        strncpy(dest, target, dest_size - 1);
+        return 0;
+    }
+    while (base[i] == target[i]) {
+        i++;
+    }
+    p = &base[i];
+    q = &target[i];
+    while (*p) {
+        if (*p == *sep) {
+            n++;
+        }
+        p++;
+    }
+    dest[0] = '\0';
+    for (; n; n--) {
+        pstrcat(dest, dest_size, "..");
+        pstrcat(dest, dest_size, sep);
+    }
+    pstrcat(dest, dest_size, q);
+    return 0;
+}
+
+static int vmdk_create(const char *filename, QEMUOptionParameter *options)
+{
+    int fd = -1;
+    char desc[4096];
+    int64_t total_size = 0;
+    const char *backing_file = NULL;
+    const char *fmt = NULL;
+    int flags = 0;
+    int ret = 0;
+    char ext_desc_lines[1024] = "";
+    char path[1024], prefix[1024], postfix[1024];
+    const int64_t split_size = 0x80000000;  /* VMDK has constant split size */
+    const char desc_template[] =
+        "# Disk DescriptorFile\n"
+        "version=1\n"
+        "CID=%x\n"
+        "parentCID=%x\n"
+        "createType=\"%s\"\n"
+        "%s"
+        "\n"
+        "# Extent description\n"
+        "%s"
+        "\n"
+        "# The Disk Data Base\n"
+        "#DDB\n"
+        "\n"
+        "ddb.virtualHWVersion = \"%d\"\n"
+        "ddb.geometry.cylinders = \"%" PRId64 "\"\n"
+        "ddb.geometry.heads = \"16\"\n"
+        "ddb.geometry.sectors = \"63\"\n"
+        "ddb.adapterType = \"ide\"\n";
+
+    if (filename_decompose(filename, path, prefix, postfix, 1024)) {
+        return -EINVAL;
+    }
+    /* Read out options */
+    while (options && options->name) {
+        if (!strcmp(options->name, BLOCK_OPT_SIZE)) {
+            total_size = options->value.n;
+        } else if (!strcmp(options->name, BLOCK_OPT_BACKING_FILE)) {
+            backing_file = options->value.s;
+        } else if (!strcmp(options->name, BLOCK_OPT_COMPAT6)) {
+            flags |= options->value.n ? BLOCK_FLAG_COMPAT6 : 0;
+        } else if (!strcmp(options->name, BLOCK_OPT_FMT)) {
+            fmt = options->value.s;
+        }
+        options++;
+    }
+    if (!fmt) {
+        fmt = "monolithicSparse";
+    }
+    if (!strcmp(fmt, "monolithicFlat") || !strcmp(fmt, "twoGbMaxExtentFlat")) {
+        bool split = strcmp(fmt, "monolithicFlat");
+        const char desc_line_templ[] = "RW %lld FLAT \"%s\" 0\n";
+        int64_t filesize = total_size;
+        int idx = 1;
+
+        if (backing_file) {
+            /* not supporting backing file for flat image */
+            return -ENOTSUP;
+        }
+        while (filesize > 0) {
+            char desc_line[1024];
+            char ext_filename[1024];
+            char desc_filename[1024];
+            int64_t size = filesize;
+
+            if (split && size > split_size) {
+                size = split_size;
+            }
+            if (split) {
+                sprintf(desc_filename, "%s-f%03d%s",
+                        prefix, idx++, postfix);
+                sprintf(ext_filename, "%s%s",
+                        path, desc_filename);
+            } else {
+                sprintf(desc_filename, "%s-flat%s",
+                        prefix, postfix);
+                sprintf(ext_filename, "%s%s",
+                        path, desc_filename);
+            }
+            if (vmdk_create_flat(ext_filename, size)) {
+                return -EINVAL;
+            }
+            filesize -= size;
+
+            /* Format description line */
+            snprintf(desc_line, 1024,
+                        desc_line_templ, size / 512, desc_filename);
+            pstrcat(ext_desc_lines, sizeof(ext_desc_lines), desc_line);
+        }
+
+        /* generate descriptor file */
+        snprintf(desc, sizeof(desc), desc_template,
+                (unsigned int)time(NULL),
+                0xffffffff,
+                fmt,
+                "",
+                ext_desc_lines,
+                (flags & BLOCK_FLAG_COMPAT6 ? 6 : 4),
+                total_size / (int64_t)(63 * 16 * 512));
+        fd = open(
+                filename,
+                O_WRONLY | O_CREAT | O_TRUNC | O_BINARY | O_LARGEFILE,
+                0644);
+        if (fd < 0) {
+            return -errno;
+        }
+        ret = qemu_write_full(fd, desc, strlen(desc));
+        if (ret != strlen(desc)) {
+            ret = -errno;
+            goto exit;
+        }
+        ret = 0;
+    } else if (!strcmp(fmt, "monolithicSparse")
+                || !strcmp(fmt, "twoGbMaxExtentSparse")) {
+        int ret;
+        int fd = 0;
+        int idx = 1;
+        int64_t filesize = total_size;
+        const char desc_line_templ[] = "RW %lld SPARSE \"%s\"\n";
+        char desc_line[1024];
+        char desc_filename[1024];
+        char ext_filename[1024];
+        bool split = strcmp(fmt, "monolithicSparse");
+        char parent_desc_line[1024] = "";
+        uint32_t parent_cid = 0xffffffff;
+
+        if (backing_file) {
+            char parent_filename[1024];
+            BlockDriverState *bs = bdrv_new("");
+            ret = bdrv_open(bs, backing_file, 0, NULL);
+            if (ret != 0) {
+                bdrv_delete(bs);
+                return ret;
+            }
+            filesize = bdrv_getlength(bs);
+            parent_cid = vmdk_read_cid(bs, 0);
+            bdrv_delete(bs);
+            relative_path(parent_filename, 1024, filename, backing_file);
+            snprintf(parent_desc_line, sizeof(parent_desc_line),
+                    "parentFileNameHint=\"%s\"", parent_filename);
+        }
+        while (filesize > 0) {
+            int64_t size = filesize;
+            if (split && size > split_size) {
+                size = split_size;
+            }
+
+            if (split) {
+                snprintf(desc_filename, 1024,
+                        "%s-s%03d%s", prefix, idx++, postfix);
+                snprintf(ext_filename, 1024, "%s%s", path, desc_filename);
+            } else {
+                snprintf(desc_filename, 1024, "%s%s", prefix, postfix);
+                snprintf(ext_filename, 1024, "%s%s", path, desc_filename);
+            }
+            ret = vmdk_create_sparse(ext_filename, size);
+            if (ret != 0) {
+                return ret;
+            }
+            filesize -= size;
+            snprintf(desc_line, 1024,
+                    desc_line_templ, size / 512, desc_filename);
+            pstrcat(ext_desc_lines, sizeof(ext_desc_lines), desc_line);
+        }
+        /* generate descriptor file */
+        snprintf(desc, sizeof(desc), desc_template,
+                (unsigned int)time(NULL),
+                parent_cid,
+                fmt,
+                parent_desc_line,
+                ext_desc_lines,
+                (flags & BLOCK_FLAG_COMPAT6 ? 6 : 4),
+                total_size / (int64_t)(63 * 16 * 512));
+        if (split) {
+            fd = open(
+                    filename,
+                    O_WRONLY | O_CREAT | O_TRUNC | O_BINARY | O_LARGEFILE,
+                    0644);
+        } else {
+            fd = open(
+                    filename,
+                    O_WRONLY | O_BINARY | O_LARGEFILE,
+                    0644);
+        }
+        if (fd < 0) {
+            return -errno;
+        }
+        /* the descriptor offset = 0x200 */
+        if (!split && 0x200 != lseek(fd, 0x200, SEEK_SET)) {
+            ret = -errno;
+            goto exit;
+        }
+        ret = qemu_write_full(fd, desc, strlen(desc));
+        if (ret != strlen(desc)) {
+            ret = -errno;
+            goto exit;
+        }
+        ret = 0;
+    } else {
+        fprintf(stderr, "Vmdk: unsupported format (%s)\n", fmt);
+        return -ENOTSUP;
+    }
 exit:
     close(fd);
     return ret;
@@ -1253,6 +1343,13 @@  static QEMUOptionParameter vmdk_create_options[] = {
         .type = OPT_FLAG,
         .help = "VMDK version 6 image"
     },
+    {
+        .name = BLOCK_OPT_FMT,
+        .type = OPT_STRING,
+        .help =
+            "VMDK flat extent format, can be one of "
+            "{monolithicSparse (default) | monolithicFlat | twoGbMaxExtentSparse | twoGbMaxExtentFlat} "
+    },
     { NULL }
 };
 
diff --git a/block_int.h b/block_int.h
index 1e265d2..e870c33 100644
--- a/block_int.h
+++ b/block_int.h
@@ -39,6 +39,7 @@ 
 #define BLOCK_OPT_CLUSTER_SIZE  "cluster_size"
 #define BLOCK_OPT_TABLE_SIZE    "table_size"
 #define BLOCK_OPT_PREALLOC      "preallocation"
+#define BLOCK_OPT_FMT           "format"
 
 typedef struct AIOPool {
     void (*cancel)(BlockDriverAIOCB *acb);