Patchwork [v6,01/12] VMDK: introduce VmdkExtent

login
register
mail settings
Submitter Feiran Zheng
Date July 1, 2011, 4:55 a.m.
Message ID <1309496142-14228-2-git-send-email-famcool@gmail.com>
Download mbox | patch
Permalink /patch/102859/
State New
Headers show

Comments

Feiran Zheng - July 1, 2011, 4:55 a.m.
Introduced VmdkExtent array into BDRVVmdkState, enable holding multiple
image extents for multiple file image support.

Signed-off-by: Fam Zheng <famcool@gmail.com>
---
 block/vmdk.c |  355 +++++++++++++++++++++++++++++++++++++++++-----------------
 1 files changed, 252 insertions(+), 103 deletions(-)
Kevin Wolf - July 1, 2011, 12:28 p.m.
Am 01.07.2011 06:55, schrieb Fam Zheng:
> Introduced VmdkExtent array into BDRVVmdkState, enable holding multiple
> image extents for multiple file image support.
> 
> Signed-off-by: Fam Zheng <famcool@gmail.com>
> ---
>  block/vmdk.c |  355 +++++++++++++++++++++++++++++++++++++++++-----------------
>  1 files changed, 252 insertions(+), 103 deletions(-)
> 
> diff --git a/block/vmdk.c b/block/vmdk.c
> index 922b23d..f26137d 100644
> --- a/block/vmdk.c
> +++ b/block/vmdk.c
> @@ -60,7 +60,11 @@ typedef struct {
>  
>  #define L2_CACHE_SIZE 16
>  
> -typedef struct BDRVVmdkState {
> +typedef struct VmdkExtent {
> +    BlockDriverState *file;
> +    bool flat;
> +    int64_t sectors;
> +    int64_t end_sector;
>      int64_t l1_table_offset;
>      int64_t l1_backup_table_offset;
>      uint32_t *l1_table;
> @@ -74,7 +78,12 @@ typedef struct BDRVVmdkState {
>      uint32_t l2_cache_counts[L2_CACHE_SIZE];
>  
>      unsigned int cluster_sectors;
> +} VmdkExtent;
> +
> +typedef struct BDRVVmdkState {
> +    int num_extents;
>      uint32_t parent_cid;
> +    VmdkExtent *extents;
>  } BDRVVmdkState;
>  
>  typedef struct VmdkMetaData {
> @@ -105,6 +114,19 @@ static int vmdk_probe(const uint8_t *buf, int buf_size, const char *filename)
>  #define DESC_SIZE 20*SECTOR_SIZE	// 20 sectors of 512 bytes each
>  #define HEADER_SIZE 512   			// first sector of 512 bytes
>  
> +static void vmdk_free_extents(BlockDriverState *bs)
> +{
> +    int i;
> +    BDRVVmdkState *s = bs->opaque;
> +
> +    for (i = 0; i < s->num_extents; i++) {
> +        qemu_free(s->extents[i].l1_table);
> +        qemu_free(s->extents[i].l2_cache);
> +        qemu_free(s->extents[i].l1_backup_table);
> +    }
> +    qemu_free(s->extents);
> +}
> +
>  static uint32_t vmdk_read_cid(BlockDriverState *bs, int parent)
>  {
>      char desc[DESC_SIZE];
> @@ -156,8 +178,8 @@ static int vmdk_write_cid(BlockDriverState *bs, uint32_t cid)
>  
>  static int vmdk_is_cid_valid(BlockDriverState *bs)
>  {
> -#ifdef CHECK_CID
>      BDRVVmdkState *s = bs->opaque;
> +#ifdef CHECK_CID
>      BlockDriverState *p_bs = bs->backing_hd;
>      uint32_t cur_pcid;

Hm, intentional? The only thing this seems to change is that it causes
an 'unused variable' warning of CHECK_CID isn't defined.

> @@ -358,11 +380,50 @@ static int vmdk_parent_open(BlockDriverState *bs)
>      return 0;
>  }
>  
> +/* Create and append extent to the extent array. Return the added VmdkExtent
> + * address. return NULL if allocation failed. */
> +static VmdkExtent *vmdk_add_extent(BlockDriverState *bs,
> +                           BlockDriverState *file, bool flat, int64_t sectors,
> +                           int64_t l1_offset, int64_t l1_backup_offset,
> +                           uint32_t l1_size,
> +                           int l2_size, unsigned int cluster_sectors)
> +{
> +    VmdkExtent *extent;
> +    BDRVVmdkState *s = bs->opaque;
> +
> +    s->extents = qemu_realloc(s->extents,
> +                              (s->num_extents + 1) * sizeof(VmdkExtent));
> +    extent = &s->extents[s->num_extents];
> +    s->num_extents++;
> +
> +    memset(extent, 0, sizeof(VmdkExtent));
> +    extent->file = file;
> +    extent->flat = flat;
> +    extent->sectors = sectors;
> +    extent->l1_table_offset = l1_offset;
> +    extent->l1_backup_table_offset = l1_backup_offset;
> +    extent->l1_size = l1_size;
> +    extent->l1_entry_sectors = l2_size * cluster_sectors;
> +    extent->l2_size = l2_size;
> +    extent->cluster_sectors = cluster_sectors;
> +
> +    if (s->num_extents > 1) {
> +        extent->end_sector = (*(extent - 1)).end_sector + extent->sectors;
> +    } else {
> +        extent->end_sector = extent->sectors;
> +    }
> +    bs->total_sectors = extent->end_sector;

This means that extents must be added in ascending order, so that the
latest extent describes the highest sector numbers. Should be documented.

> +    return extent;
> +}
> +
> +
>  static int vmdk_open(BlockDriverState *bs, int flags)
>  {
>      BDRVVmdkState *s = bs->opaque;
>      uint32_t magic;
> -    int l1_size, i;
> +    int i;
> +    uint32_t l1_size, l1_entry_sectors;
> +    VmdkExtent *extent = NULL;
>  
>      if (bdrv_pread(bs->file, 0, &magic, sizeof(magic)) != sizeof(magic))
>          goto fail;
> @@ -370,32 +431,34 @@ static int vmdk_open(BlockDriverState *bs, int flags)
>      magic = be32_to_cpu(magic);
>      if (magic == VMDK3_MAGIC) {
>          VMDK3Header header;
> -
> -        if (bdrv_pread(bs->file, sizeof(magic), &header, sizeof(header)) != sizeof(header))
> +        if (bdrv_pread(bs->file, sizeof(magic), &header, sizeof(header))
> +                != sizeof(header)) {
>              goto fail;
> -        s->cluster_sectors = le32_to_cpu(header.granularity);
> -        s->l2_size = 1 << 9;
> -        s->l1_size = 1 << 6;
> -        bs->total_sectors = le32_to_cpu(header.disk_sectors);
> -        s->l1_table_offset = le32_to_cpu(header.l1dir_offset) << 9;
> -        s->l1_backup_table_offset = 0;
> -        s->l1_entry_sectors = s->l2_size * s->cluster_sectors;
> +        }
> +        extent = vmdk_add_extent(bs, bs->file, false,
> +                              le32_to_cpu(header.disk_sectors),
> +                              le32_to_cpu(header.l1dir_offset) << 9, 0,
> +                              1 << 6, 1 << 9, le32_to_cpu(header.granularity));
>      } else if (magic == VMDK4_MAGIC) {
>          VMDK4Header header;
> -
> -        if (bdrv_pread(bs->file, sizeof(magic), &header, sizeof(header)) != sizeof(header))
> +        if (bdrv_pread(bs->file, sizeof(magic), &header, sizeof(header))
> +                != sizeof(header)) {
>              goto fail;
> -        bs->total_sectors = le64_to_cpu(header.capacity);
> -        s->cluster_sectors = le64_to_cpu(header.granularity);
> -        s->l2_size = le32_to_cpu(header.num_gtes_per_gte);
> -        s->l1_entry_sectors = s->l2_size * s->cluster_sectors;
> -        if (s->l1_entry_sectors <= 0)
> +        }
> +        l1_entry_sectors = le32_to_cpu(header.num_gtes_per_gte)
> +                            * le64_to_cpu(header.granularity);
> +        l1_size = (le64_to_cpu(header.capacity) + l1_entry_sectors - 1)
> +                    / l1_entry_sectors;
> +        extent = vmdk_add_extent(bs, bs->file, false,
> +                              le64_to_cpu(header.capacity),
> +                              le64_to_cpu(header.gd_offset) << 9,
> +                              le64_to_cpu(header.rgd_offset) << 9,
> +                              l1_size,
> +                              le32_to_cpu(header.num_gtes_per_gte),
> +                              le64_to_cpu(header.granularity));
> +        if (extent->l1_entry_sectors <= 0) {
>              goto fail;
> -        s->l1_size = (bs->total_sectors + s->l1_entry_sectors - 1)
> -            / s->l1_entry_sectors;
> -        s->l1_table_offset = le64_to_cpu(header.rgd_offset) << 9;
> -        s->l1_backup_table_offset = le64_to_cpu(header.gd_offset) << 9;
> -
> +        }
>          // try to open parent images, if exist
>          if (vmdk_parent_open(bs) != 0)
>              goto fail;
> @@ -405,41 +468,56 @@ static int vmdk_open(BlockDriverState *bs, int flags)
>          goto fail;
>      }
>  
> +    /* sum up the total sectors */
> +    bs->total_sectors = 0;
> +    for (i = 0; i < s->num_extents; i++) {
> +        bs->total_sectors += s->extents[i].sectors;
> +    }

Is this required? Doesn't vmdk_add_extent already update bs->total_sectors?

> +
>      /* read the L1 table */
> -    l1_size = s->l1_size * sizeof(uint32_t);
> -    s->l1_table = qemu_malloc(l1_size);
> -    if (bdrv_pread(bs->file, s->l1_table_offset, s->l1_table, l1_size) != l1_size)
> +    l1_size = extent->l1_size * sizeof(uint32_t);
> +    extent->l1_table = qemu_malloc(l1_size);
> +    if (bdrv_pread(bs->file,
> +            extent->l1_table_offset,
> +            extent->l1_table,
> +            l1_size)
> +        != l1_size) {
>          goto fail;
> -    for(i = 0; i < s->l1_size; i++) {
> -        le32_to_cpus(&s->l1_table[i]);
> +    }
> +    for (i = 0; i < extent->l1_size; i++) {
> +        le32_to_cpus(&extent->l1_table[i]);
>      }
>  
> -    if (s->l1_backup_table_offset) {
> -        s->l1_backup_table = qemu_malloc(l1_size);
> -        if (bdrv_pread(bs->file, s->l1_backup_table_offset, s->l1_backup_table, l1_size) != l1_size)
> +    if (extent->l1_backup_table_offset) {
> +        extent->l1_backup_table = qemu_malloc(l1_size);
> +        if (bdrv_pread(bs->file,
> +                    extent->l1_backup_table_offset,
> +                    extent->l1_backup_table,
> +                    l1_size)
> +                != l1_size) {
>              goto fail;
> -        for(i = 0; i < s->l1_size; i++) {
> -            le32_to_cpus(&s->l1_backup_table[i]);
> +        }
> +        for (i = 0; i < extent->l1_size; i++) {
> +            le32_to_cpus(&extent->l1_backup_table[i]);
>          }
>      }
>  
> -    s->l2_cache = qemu_malloc(s->l2_size * L2_CACHE_SIZE * sizeof(uint32_t));
> +    extent->l2_cache =
> +        qemu_malloc(extent->l2_size * L2_CACHE_SIZE * sizeof(uint32_t));
>      return 0;
>   fail:
> -    qemu_free(s->l1_backup_table);
> -    qemu_free(s->l1_table);
> -    qemu_free(s->l2_cache);
> +    vmdk_free_extents(bs);
>      return -1;
>  }
>  
> -static uint64_t get_cluster_offset(BlockDriverState *bs, VmdkMetaData *m_data,
> -                                   uint64_t offset, int allocate);
> -
> -static int get_whole_cluster(BlockDriverState *bs, uint64_t cluster_offset,
> -                             uint64_t offset, int allocate)
> +static int get_whole_cluster(BlockDriverState *bs,
> +                VmdkExtent *extent,
> +                uint64_t cluster_offset,
> +                uint64_t offset,
> +                bool allocate)
>  {
> -    BDRVVmdkState *s = bs->opaque;
> -    uint8_t  whole_grain[s->cluster_sectors*512];        // 128 sectors * 512 bytes each = grain size 64KB
> +    /* 128 sectors * 512 bytes each = grain size 64KB */
> +    uint8_t  whole_grain[extent->cluster_sectors * 512];
>  
>      // we will be here if it's first write on non-exist grain(cluster).
>      // try to read from parent image, if exist
> @@ -450,14 +528,14 @@ static int get_whole_cluster(BlockDriverState *bs, uint64_t cluster_offset,
>              return -1;
>  
>          ret = bdrv_read(bs->backing_hd, offset >> 9, whole_grain,
> -            s->cluster_sectors);
> +                extent->cluster_sectors);
>          if (ret < 0) {
>              return -1;
>          }
>  
>          //Write grain only into the active image
> -        ret = bdrv_write(bs->file, cluster_offset, whole_grain,
> -            s->cluster_sectors);
> +        ret = bdrv_write(extent->file, cluster_offset, whole_grain,
> +                extent->cluster_sectors);
>          if (ret < 0) {
>              return -1;
>          }
> @@ -465,29 +543,39 @@ static int get_whole_cluster(BlockDriverState *bs, uint64_t cluster_offset,
>      return 0;
>  }
>  
> -static int vmdk_L2update(BlockDriverState *bs, VmdkMetaData *m_data)
> +static int vmdk_L2update(VmdkExtent *extent, VmdkMetaData *m_data)
>  {
> -    BDRVVmdkState *s = bs->opaque;
> -
>      /* update L2 table */
> -    if (bdrv_pwrite_sync(bs->file, ((int64_t)m_data->l2_offset * 512) + (m_data->l2_index * sizeof(m_data->offset)),
> -                    &(m_data->offset), sizeof(m_data->offset)) < 0)
> +    if (bdrv_pwrite_sync(
> +                extent->file,
> +                ((int64_t)m_data->l2_offset * 512)
> +                    + (m_data->l2_index * sizeof(m_data->offset)),
> +                &(m_data->offset),
> +                sizeof(m_data->offset)
> +            ) < 0) {
>          return -1;
> +    }
>      /* update backup L2 table */
> -    if (s->l1_backup_table_offset != 0) {
> -        m_data->l2_offset = s->l1_backup_table[m_data->l1_index];
> -        if (bdrv_pwrite_sync(bs->file, ((int64_t)m_data->l2_offset * 512) + (m_data->l2_index * sizeof(m_data->offset)),
> -                        &(m_data->offset), sizeof(m_data->offset)) < 0)
> +    if (extent->l1_backup_table_offset != 0) {
> +        m_data->l2_offset = extent->l1_backup_table[m_data->l1_index];
> +        if (bdrv_pwrite_sync(
> +                    extent->file,
> +                    ((int64_t)m_data->l2_offset * 512)
> +                        + (m_data->l2_index * sizeof(m_data->offset)),
> +                    &(m_data->offset), sizeof(m_data->offset)
> +                ) < 0) {
>              return -1;
> +        }
>      }
>  
>      return 0;
>  }
>  
> -static uint64_t get_cluster_offset(BlockDriverState *bs, VmdkMetaData *m_data,
> -                                   uint64_t offset, int allocate)
> +static uint64_t get_cluster_offset(BlockDriverState *bs,
> +                                    VmdkExtent *extent,
> +                                    VmdkMetaData *m_data,
> +                                    uint64_t offset, int allocate)
>  {
> -    BDRVVmdkState *s = bs->opaque;
>      unsigned int l1_index, l2_offset, l2_index;
>      int min_index, i, j;
>      uint32_t min_count, *l2_table, tmp = 0;
> @@ -496,21 +584,23 @@ static uint64_t get_cluster_offset(BlockDriverState *bs, VmdkMetaData *m_data,
>      if (m_data)
>          m_data->valid = 0;
>  
> -    l1_index = (offset >> 9) / s->l1_entry_sectors;
> -    if (l1_index >= s->l1_size)
> +    l1_index = (offset >> 9) / extent->l1_entry_sectors;
> +    if (l1_index >= extent->l1_size) {
>          return 0;
> -    l2_offset = s->l1_table[l1_index];
> -    if (!l2_offset)
> +    }
> +    l2_offset = extent->l1_table[l1_index];
> +    if (!l2_offset) {
>          return 0;
> +    }
>      for(i = 0; i < L2_CACHE_SIZE; i++) {
> -        if (l2_offset == s->l2_cache_offsets[i]) {
> +        if (l2_offset == extent->l2_cache_offsets[i]) {
>              /* increment the hit count */
> -            if (++s->l2_cache_counts[i] == 0xffffffff) {
> +            if (++extent->l2_cache_counts[i] == 0xffffffff) {
>                  for(j = 0; j < L2_CACHE_SIZE; j++) {
> -                    s->l2_cache_counts[j] >>= 1;
> +                    extent->l2_cache_counts[j] >>= 1;
>                  }
>              }
> -            l2_table = s->l2_cache + (i * s->l2_size);
> +            l2_table = extent->l2_cache + (i * extent->l2_size);
>              goto found;
>          }
>      }
> @@ -518,20 +608,25 @@ static uint64_t get_cluster_offset(BlockDriverState *bs, VmdkMetaData *m_data,
>      min_index = 0;
>      min_count = 0xffffffff;
>      for(i = 0; i < L2_CACHE_SIZE; i++) {
> -        if (s->l2_cache_counts[i] < min_count) {
> -            min_count = s->l2_cache_counts[i];
> +        if (extent->l2_cache_counts[i] < min_count) {
> +            min_count = extent->l2_cache_counts[i];
>              min_index = i;
>          }
>      }
> -    l2_table = s->l2_cache + (min_index * s->l2_size);
> -    if (bdrv_pread(bs->file, (int64_t)l2_offset * 512, l2_table, s->l2_size * sizeof(uint32_t)) !=
> -                                                                        s->l2_size * sizeof(uint32_t))
> +    l2_table = extent->l2_cache + (min_index * extent->l2_size);
> +    if (bdrv_pread(
> +                extent->file,
> +                (int64_t)l2_offset * 512,
> +                l2_table,
> +                extent->l2_size * sizeof(uint32_t)
> +            ) != extent->l2_size * sizeof(uint32_t)) {
>          return 0;
> +    }
>  
> -    s->l2_cache_offsets[min_index] = l2_offset;
> -    s->l2_cache_counts[min_index] = 1;
> +    extent->l2_cache_offsets[min_index] = l2_offset;
> +    extent->l2_cache_counts[min_index] = 1;
>   found:
> -    l2_index = ((offset >> 9) / s->cluster_sectors) % s->l2_size;
> +    l2_index = ((offset >> 9) / extent->cluster_sectors) % extent->l2_size;
>      cluster_offset = le32_to_cpu(l2_table[l2_index]);
>  
>      if (!cluster_offset) {
> @@ -539,8 +634,11 @@ static uint64_t get_cluster_offset(BlockDriverState *bs, VmdkMetaData *m_data,
>              return 0;
>  
>          // Avoid the L2 tables update for the images that have snapshots.
> -        cluster_offset = bdrv_getlength(bs->file);
> -        bdrv_truncate(bs->file, cluster_offset + (s->cluster_sectors << 9));
> +        cluster_offset = bdrv_getlength(extent->file);
> +        bdrv_truncate(
> +            extent->file,
> +            cluster_offset + (extent->cluster_sectors << 9)
> +        );
>  
>          cluster_offset >>= 9;
>          tmp = cpu_to_le32(cluster_offset);
> @@ -551,7 +649,8 @@ static uint64_t get_cluster_offset(BlockDriverState *bs, VmdkMetaData *m_data,
>           * This problem may occur because of insufficient space on host disk
>           * or inappropriate VM shutdown.
>           */
> -        if (get_whole_cluster(bs, cluster_offset, offset, allocate) == -1)
> +        if (get_whole_cluster(
> +                bs, extent, cluster_offset, offset, allocate) == -1)
>              return 0;
>  
>          if (m_data) {
> @@ -566,33 +665,69 @@ static uint64_t get_cluster_offset(BlockDriverState *bs, VmdkMetaData *m_data,
>      return cluster_offset;
>  }
>  
> +static VmdkExtent *find_extent(BDRVVmdkState *s,
> +                                int64_t sector_num, VmdkExtent *start_hint)
> +{
> +    VmdkExtent *extent = start_hint;
> +
> +    if (!extent) {
> +        extent = &s->extents[0];
> +    }
> +    while (extent < &s->extents[s->num_extents]) {
> +        if (sector_num < extent->end_sector) {
> +            return extent;
> +        }
> +        extent++;
> +    }

This relies on the right order in the array again. Worth a comment in
the declaration of the array in BDRVVmdkState.

The rest of the patch looks good. None of the issues are really
critical, so if your prefer to do the changes on top of your current
series, that works for me, too.

Kevin

Patch

diff --git a/block/vmdk.c b/block/vmdk.c
index 922b23d..f26137d 100644
--- a/block/vmdk.c
+++ b/block/vmdk.c
@@ -60,7 +60,11 @@  typedef struct {
 
 #define L2_CACHE_SIZE 16
 
-typedef struct BDRVVmdkState {
+typedef struct VmdkExtent {
+    BlockDriverState *file;
+    bool flat;
+    int64_t sectors;
+    int64_t end_sector;
     int64_t l1_table_offset;
     int64_t l1_backup_table_offset;
     uint32_t *l1_table;
@@ -74,7 +78,12 @@  typedef struct BDRVVmdkState {
     uint32_t l2_cache_counts[L2_CACHE_SIZE];
 
     unsigned int cluster_sectors;
+} VmdkExtent;
+
+typedef struct BDRVVmdkState {
+    int num_extents;
     uint32_t parent_cid;
+    VmdkExtent *extents;
 } BDRVVmdkState;
 
 typedef struct VmdkMetaData {
@@ -105,6 +114,19 @@  static int vmdk_probe(const uint8_t *buf, int buf_size, const char *filename)
 #define DESC_SIZE 20*SECTOR_SIZE	// 20 sectors of 512 bytes each
 #define HEADER_SIZE 512   			// first sector of 512 bytes
 
+static void vmdk_free_extents(BlockDriverState *bs)
+{
+    int i;
+    BDRVVmdkState *s = bs->opaque;
+
+    for (i = 0; i < s->num_extents; i++) {
+        qemu_free(s->extents[i].l1_table);
+        qemu_free(s->extents[i].l2_cache);
+        qemu_free(s->extents[i].l1_backup_table);
+    }
+    qemu_free(s->extents);
+}
+
 static uint32_t vmdk_read_cid(BlockDriverState *bs, int parent)
 {
     char desc[DESC_SIZE];
@@ -156,8 +178,8 @@  static int vmdk_write_cid(BlockDriverState *bs, uint32_t cid)
 
 static int vmdk_is_cid_valid(BlockDriverState *bs)
 {
-#ifdef CHECK_CID
     BDRVVmdkState *s = bs->opaque;
+#ifdef CHECK_CID
     BlockDriverState *p_bs = bs->backing_hd;
     uint32_t cur_pcid;
 
@@ -358,11 +380,50 @@  static int vmdk_parent_open(BlockDriverState *bs)
     return 0;
 }
 
+/* Create and append extent to the extent array. Return the added VmdkExtent
+ * address. return NULL if allocation failed. */
+static VmdkExtent *vmdk_add_extent(BlockDriverState *bs,
+                           BlockDriverState *file, bool flat, int64_t sectors,
+                           int64_t l1_offset, int64_t l1_backup_offset,
+                           uint32_t l1_size,
+                           int l2_size, unsigned int cluster_sectors)
+{
+    VmdkExtent *extent;
+    BDRVVmdkState *s = bs->opaque;
+
+    s->extents = qemu_realloc(s->extents,
+                              (s->num_extents + 1) * sizeof(VmdkExtent));
+    extent = &s->extents[s->num_extents];
+    s->num_extents++;
+
+    memset(extent, 0, sizeof(VmdkExtent));
+    extent->file = file;
+    extent->flat = flat;
+    extent->sectors = sectors;
+    extent->l1_table_offset = l1_offset;
+    extent->l1_backup_table_offset = l1_backup_offset;
+    extent->l1_size = l1_size;
+    extent->l1_entry_sectors = l2_size * cluster_sectors;
+    extent->l2_size = l2_size;
+    extent->cluster_sectors = cluster_sectors;
+
+    if (s->num_extents > 1) {
+        extent->end_sector = (*(extent - 1)).end_sector + extent->sectors;
+    } else {
+        extent->end_sector = extent->sectors;
+    }
+    bs->total_sectors = extent->end_sector;
+    return extent;
+}
+
+
 static int vmdk_open(BlockDriverState *bs, int flags)
 {
     BDRVVmdkState *s = bs->opaque;
     uint32_t magic;
-    int l1_size, i;
+    int i;
+    uint32_t l1_size, l1_entry_sectors;
+    VmdkExtent *extent = NULL;
 
     if (bdrv_pread(bs->file, 0, &magic, sizeof(magic)) != sizeof(magic))
         goto fail;
@@ -370,32 +431,34 @@  static int vmdk_open(BlockDriverState *bs, int flags)
     magic = be32_to_cpu(magic);
     if (magic == VMDK3_MAGIC) {
         VMDK3Header header;
-
-        if (bdrv_pread(bs->file, sizeof(magic), &header, sizeof(header)) != sizeof(header))
+        if (bdrv_pread(bs->file, sizeof(magic), &header, sizeof(header))
+                != sizeof(header)) {
             goto fail;
-        s->cluster_sectors = le32_to_cpu(header.granularity);
-        s->l2_size = 1 << 9;
-        s->l1_size = 1 << 6;
-        bs->total_sectors = le32_to_cpu(header.disk_sectors);
-        s->l1_table_offset = le32_to_cpu(header.l1dir_offset) << 9;
-        s->l1_backup_table_offset = 0;
-        s->l1_entry_sectors = s->l2_size * s->cluster_sectors;
+        }
+        extent = vmdk_add_extent(bs, bs->file, false,
+                              le32_to_cpu(header.disk_sectors),
+                              le32_to_cpu(header.l1dir_offset) << 9, 0,
+                              1 << 6, 1 << 9, le32_to_cpu(header.granularity));
     } else if (magic == VMDK4_MAGIC) {
         VMDK4Header header;
-
-        if (bdrv_pread(bs->file, sizeof(magic), &header, sizeof(header)) != sizeof(header))
+        if (bdrv_pread(bs->file, sizeof(magic), &header, sizeof(header))
+                != sizeof(header)) {
             goto fail;
-        bs->total_sectors = le64_to_cpu(header.capacity);
-        s->cluster_sectors = le64_to_cpu(header.granularity);
-        s->l2_size = le32_to_cpu(header.num_gtes_per_gte);
-        s->l1_entry_sectors = s->l2_size * s->cluster_sectors;
-        if (s->l1_entry_sectors <= 0)
+        }
+        l1_entry_sectors = le32_to_cpu(header.num_gtes_per_gte)
+                            * le64_to_cpu(header.granularity);
+        l1_size = (le64_to_cpu(header.capacity) + l1_entry_sectors - 1)
+                    / l1_entry_sectors;
+        extent = vmdk_add_extent(bs, bs->file, false,
+                              le64_to_cpu(header.capacity),
+                              le64_to_cpu(header.gd_offset) << 9,
+                              le64_to_cpu(header.rgd_offset) << 9,
+                              l1_size,
+                              le32_to_cpu(header.num_gtes_per_gte),
+                              le64_to_cpu(header.granularity));
+        if (extent->l1_entry_sectors <= 0) {
             goto fail;
-        s->l1_size = (bs->total_sectors + s->l1_entry_sectors - 1)
-            / s->l1_entry_sectors;
-        s->l1_table_offset = le64_to_cpu(header.rgd_offset) << 9;
-        s->l1_backup_table_offset = le64_to_cpu(header.gd_offset) << 9;
-
+        }
         // try to open parent images, if exist
         if (vmdk_parent_open(bs) != 0)
             goto fail;
@@ -405,41 +468,56 @@  static int vmdk_open(BlockDriverState *bs, int flags)
         goto fail;
     }
 
+    /* sum up the total sectors */
+    bs->total_sectors = 0;
+    for (i = 0; i < s->num_extents; i++) {
+        bs->total_sectors += s->extents[i].sectors;
+    }
+
     /* read the L1 table */
-    l1_size = s->l1_size * sizeof(uint32_t);
-    s->l1_table = qemu_malloc(l1_size);
-    if (bdrv_pread(bs->file, s->l1_table_offset, s->l1_table, l1_size) != l1_size)
+    l1_size = extent->l1_size * sizeof(uint32_t);
+    extent->l1_table = qemu_malloc(l1_size);
+    if (bdrv_pread(bs->file,
+            extent->l1_table_offset,
+            extent->l1_table,
+            l1_size)
+        != l1_size) {
         goto fail;
-    for(i = 0; i < s->l1_size; i++) {
-        le32_to_cpus(&s->l1_table[i]);
+    }
+    for (i = 0; i < extent->l1_size; i++) {
+        le32_to_cpus(&extent->l1_table[i]);
     }
 
-    if (s->l1_backup_table_offset) {
-        s->l1_backup_table = qemu_malloc(l1_size);
-        if (bdrv_pread(bs->file, s->l1_backup_table_offset, s->l1_backup_table, l1_size) != l1_size)
+    if (extent->l1_backup_table_offset) {
+        extent->l1_backup_table = qemu_malloc(l1_size);
+        if (bdrv_pread(bs->file,
+                    extent->l1_backup_table_offset,
+                    extent->l1_backup_table,
+                    l1_size)
+                != l1_size) {
             goto fail;
-        for(i = 0; i < s->l1_size; i++) {
-            le32_to_cpus(&s->l1_backup_table[i]);
+        }
+        for (i = 0; i < extent->l1_size; i++) {
+            le32_to_cpus(&extent->l1_backup_table[i]);
         }
     }
 
-    s->l2_cache = qemu_malloc(s->l2_size * L2_CACHE_SIZE * sizeof(uint32_t));
+    extent->l2_cache =
+        qemu_malloc(extent->l2_size * L2_CACHE_SIZE * sizeof(uint32_t));
     return 0;
  fail:
-    qemu_free(s->l1_backup_table);
-    qemu_free(s->l1_table);
-    qemu_free(s->l2_cache);
+    vmdk_free_extents(bs);
     return -1;
 }
 
-static uint64_t get_cluster_offset(BlockDriverState *bs, VmdkMetaData *m_data,
-                                   uint64_t offset, int allocate);
-
-static int get_whole_cluster(BlockDriverState *bs, uint64_t cluster_offset,
-                             uint64_t offset, int allocate)
+static int get_whole_cluster(BlockDriverState *bs,
+                VmdkExtent *extent,
+                uint64_t cluster_offset,
+                uint64_t offset,
+                bool allocate)
 {
-    BDRVVmdkState *s = bs->opaque;
-    uint8_t  whole_grain[s->cluster_sectors*512];        // 128 sectors * 512 bytes each = grain size 64KB
+    /* 128 sectors * 512 bytes each = grain size 64KB */
+    uint8_t  whole_grain[extent->cluster_sectors * 512];
 
     // we will be here if it's first write on non-exist grain(cluster).
     // try to read from parent image, if exist
@@ -450,14 +528,14 @@  static int get_whole_cluster(BlockDriverState *bs, uint64_t cluster_offset,
             return -1;
 
         ret = bdrv_read(bs->backing_hd, offset >> 9, whole_grain,
-            s->cluster_sectors);
+                extent->cluster_sectors);
         if (ret < 0) {
             return -1;
         }
 
         //Write grain only into the active image
-        ret = bdrv_write(bs->file, cluster_offset, whole_grain,
-            s->cluster_sectors);
+        ret = bdrv_write(extent->file, cluster_offset, whole_grain,
+                extent->cluster_sectors);
         if (ret < 0) {
             return -1;
         }
@@ -465,29 +543,39 @@  static int get_whole_cluster(BlockDriverState *bs, uint64_t cluster_offset,
     return 0;
 }
 
-static int vmdk_L2update(BlockDriverState *bs, VmdkMetaData *m_data)
+static int vmdk_L2update(VmdkExtent *extent, VmdkMetaData *m_data)
 {
-    BDRVVmdkState *s = bs->opaque;
-
     /* update L2 table */
-    if (bdrv_pwrite_sync(bs->file, ((int64_t)m_data->l2_offset * 512) + (m_data->l2_index * sizeof(m_data->offset)),
-                    &(m_data->offset), sizeof(m_data->offset)) < 0)
+    if (bdrv_pwrite_sync(
+                extent->file,
+                ((int64_t)m_data->l2_offset * 512)
+                    + (m_data->l2_index * sizeof(m_data->offset)),
+                &(m_data->offset),
+                sizeof(m_data->offset)
+            ) < 0) {
         return -1;
+    }
     /* update backup L2 table */
-    if (s->l1_backup_table_offset != 0) {
-        m_data->l2_offset = s->l1_backup_table[m_data->l1_index];
-        if (bdrv_pwrite_sync(bs->file, ((int64_t)m_data->l2_offset * 512) + (m_data->l2_index * sizeof(m_data->offset)),
-                        &(m_data->offset), sizeof(m_data->offset)) < 0)
+    if (extent->l1_backup_table_offset != 0) {
+        m_data->l2_offset = extent->l1_backup_table[m_data->l1_index];
+        if (bdrv_pwrite_sync(
+                    extent->file,
+                    ((int64_t)m_data->l2_offset * 512)
+                        + (m_data->l2_index * sizeof(m_data->offset)),
+                    &(m_data->offset), sizeof(m_data->offset)
+                ) < 0) {
             return -1;
+        }
     }
 
     return 0;
 }
 
-static uint64_t get_cluster_offset(BlockDriverState *bs, VmdkMetaData *m_data,
-                                   uint64_t offset, int allocate)
+static uint64_t get_cluster_offset(BlockDriverState *bs,
+                                    VmdkExtent *extent,
+                                    VmdkMetaData *m_data,
+                                    uint64_t offset, int allocate)
 {
-    BDRVVmdkState *s = bs->opaque;
     unsigned int l1_index, l2_offset, l2_index;
     int min_index, i, j;
     uint32_t min_count, *l2_table, tmp = 0;
@@ -496,21 +584,23 @@  static uint64_t get_cluster_offset(BlockDriverState *bs, VmdkMetaData *m_data,
     if (m_data)
         m_data->valid = 0;
 
-    l1_index = (offset >> 9) / s->l1_entry_sectors;
-    if (l1_index >= s->l1_size)
+    l1_index = (offset >> 9) / extent->l1_entry_sectors;
+    if (l1_index >= extent->l1_size) {
         return 0;
-    l2_offset = s->l1_table[l1_index];
-    if (!l2_offset)
+    }
+    l2_offset = extent->l1_table[l1_index];
+    if (!l2_offset) {
         return 0;
+    }
     for(i = 0; i < L2_CACHE_SIZE; i++) {
-        if (l2_offset == s->l2_cache_offsets[i]) {
+        if (l2_offset == extent->l2_cache_offsets[i]) {
             /* increment the hit count */
-            if (++s->l2_cache_counts[i] == 0xffffffff) {
+            if (++extent->l2_cache_counts[i] == 0xffffffff) {
                 for(j = 0; j < L2_CACHE_SIZE; j++) {
-                    s->l2_cache_counts[j] >>= 1;
+                    extent->l2_cache_counts[j] >>= 1;
                 }
             }
-            l2_table = s->l2_cache + (i * s->l2_size);
+            l2_table = extent->l2_cache + (i * extent->l2_size);
             goto found;
         }
     }
@@ -518,20 +608,25 @@  static uint64_t get_cluster_offset(BlockDriverState *bs, VmdkMetaData *m_data,
     min_index = 0;
     min_count = 0xffffffff;
     for(i = 0; i < L2_CACHE_SIZE; i++) {
-        if (s->l2_cache_counts[i] < min_count) {
-            min_count = s->l2_cache_counts[i];
+        if (extent->l2_cache_counts[i] < min_count) {
+            min_count = extent->l2_cache_counts[i];
             min_index = i;
         }
     }
-    l2_table = s->l2_cache + (min_index * s->l2_size);
-    if (bdrv_pread(bs->file, (int64_t)l2_offset * 512, l2_table, s->l2_size * sizeof(uint32_t)) !=
-                                                                        s->l2_size * sizeof(uint32_t))
+    l2_table = extent->l2_cache + (min_index * extent->l2_size);
+    if (bdrv_pread(
+                extent->file,
+                (int64_t)l2_offset * 512,
+                l2_table,
+                extent->l2_size * sizeof(uint32_t)
+            ) != extent->l2_size * sizeof(uint32_t)) {
         return 0;
+    }
 
-    s->l2_cache_offsets[min_index] = l2_offset;
-    s->l2_cache_counts[min_index] = 1;
+    extent->l2_cache_offsets[min_index] = l2_offset;
+    extent->l2_cache_counts[min_index] = 1;
  found:
-    l2_index = ((offset >> 9) / s->cluster_sectors) % s->l2_size;
+    l2_index = ((offset >> 9) / extent->cluster_sectors) % extent->l2_size;
     cluster_offset = le32_to_cpu(l2_table[l2_index]);
 
     if (!cluster_offset) {
@@ -539,8 +634,11 @@  static uint64_t get_cluster_offset(BlockDriverState *bs, VmdkMetaData *m_data,
             return 0;
 
         // Avoid the L2 tables update for the images that have snapshots.
-        cluster_offset = bdrv_getlength(bs->file);
-        bdrv_truncate(bs->file, cluster_offset + (s->cluster_sectors << 9));
+        cluster_offset = bdrv_getlength(extent->file);
+        bdrv_truncate(
+            extent->file,
+            cluster_offset + (extent->cluster_sectors << 9)
+        );
 
         cluster_offset >>= 9;
         tmp = cpu_to_le32(cluster_offset);
@@ -551,7 +649,8 @@  static uint64_t get_cluster_offset(BlockDriverState *bs, VmdkMetaData *m_data,
          * This problem may occur because of insufficient space on host disk
          * or inappropriate VM shutdown.
          */
-        if (get_whole_cluster(bs, cluster_offset, offset, allocate) == -1)
+        if (get_whole_cluster(
+                bs, extent, cluster_offset, offset, allocate) == -1)
             return 0;
 
         if (m_data) {
@@ -566,33 +665,69 @@  static uint64_t get_cluster_offset(BlockDriverState *bs, VmdkMetaData *m_data,
     return cluster_offset;
 }
 
+static VmdkExtent *find_extent(BDRVVmdkState *s,
+                                int64_t sector_num, VmdkExtent *start_hint)
+{
+    VmdkExtent *extent = start_hint;
+
+    if (!extent) {
+        extent = &s->extents[0];
+    }
+    while (extent < &s->extents[s->num_extents]) {
+        if (sector_num < extent->end_sector) {
+            return extent;
+        }
+        extent++;
+    }
+    return NULL;
+}
+
 static int vmdk_is_allocated(BlockDriverState *bs, int64_t sector_num,
                              int nb_sectors, int *pnum)
 {
     BDRVVmdkState *s = bs->opaque;
-    int index_in_cluster, n;
-    uint64_t cluster_offset;
 
-    cluster_offset = get_cluster_offset(bs, NULL, sector_num << 9, 0);
-    index_in_cluster = sector_num % s->cluster_sectors;
-    n = s->cluster_sectors - index_in_cluster;
+    int64_t index_in_cluster, n, ret;
+    uint64_t offset;
+    VmdkExtent *extent;
+
+    extent = find_extent(s, sector_num, NULL);
+    if (!extent) {
+        return 0;
+    }
+    if (extent->flat) {
+        n = extent->end_sector - sector_num;
+        ret = 1;
+    } else {
+        offset = get_cluster_offset(bs, extent, NULL, sector_num * 512, 0);
+        index_in_cluster = sector_num % extent->cluster_sectors;
+        n = extent->cluster_sectors - index_in_cluster;
+        ret = offset ? 1 : 0;
+    }
     if (n > nb_sectors)
         n = nb_sectors;
     *pnum = n;
-    return (cluster_offset != 0);
+    return ret;
 }
 
 static int vmdk_read(BlockDriverState *bs, int64_t sector_num,
                     uint8_t *buf, int nb_sectors)
 {
     BDRVVmdkState *s = bs->opaque;
-    int index_in_cluster, n, ret;
+    int ret;
+    uint64_t n, index_in_cluster;
+    VmdkExtent *extent = NULL;
     uint64_t cluster_offset;
 
     while (nb_sectors > 0) {
-        cluster_offset = get_cluster_offset(bs, NULL, sector_num << 9, 0);
-        index_in_cluster = sector_num % s->cluster_sectors;
-        n = s->cluster_sectors - index_in_cluster;
+        extent = find_extent(s, sector_num, extent);
+        if (!extent) {
+            return -EIO;
+        }
+        cluster_offset = get_cluster_offset(
+                            bs, extent, NULL, sector_num << 9, 0);
+        index_in_cluster = sector_num % extent->cluster_sectors;
+        n = extent->cluster_sectors - index_in_cluster;
         if (n > nb_sectors)
             n = nb_sectors;
         if (!cluster_offset) {
@@ -621,10 +756,12 @@  static int vmdk_write(BlockDriverState *bs, int64_t sector_num,
                      const uint8_t *buf, int nb_sectors)
 {
     BDRVVmdkState *s = bs->opaque;
-    VmdkMetaData m_data;
-    int index_in_cluster, n;
+    VmdkExtent *extent = NULL;
+    int n;
+    int64_t index_in_cluster;
     uint64_t cluster_offset;
     static int cid_update = 0;
+    VmdkMetaData m_data;
 
     if (sector_num > bs->total_sectors) {
         fprintf(stderr,
@@ -635,20 +772,35 @@  static int vmdk_write(BlockDriverState *bs, int64_t sector_num,
     }
 
     while (nb_sectors > 0) {
-        index_in_cluster = sector_num & (s->cluster_sectors - 1);
-        n = s->cluster_sectors - index_in_cluster;
-        if (n > nb_sectors)
-            n = nb_sectors;
-        cluster_offset = get_cluster_offset(bs, &m_data, sector_num << 9, 1);
-        if (!cluster_offset)
+        extent = find_extent(s, sector_num, extent);
+        if (!extent) {
+            return -EIO;
+        }
+        cluster_offset = get_cluster_offset(
+                                bs,
+                                extent,
+                                &m_data,
+                                sector_num << 9, 1);
+        if (!cluster_offset) {
             return -1;
+        }
+        index_in_cluster = sector_num % extent->cluster_sectors;
+        n = extent->cluster_sectors - index_in_cluster;
+        if (n > nb_sectors) {
+            n = nb_sectors;
+        }
 
-        if (bdrv_pwrite(bs->file, cluster_offset + index_in_cluster * 512, buf, n * 512) != n * 512)
+        if (bdrv_pwrite(bs->file,
+                        cluster_offset + index_in_cluster * 512,
+                        buf, n * 512)
+                != n * 512) {
             return -1;
+        }
         if (m_data.valid) {
             /* update L2 tables */
-            if (vmdk_L2update(bs, &m_data) == -1)
+            if (vmdk_L2update(extent, &m_data) == -1) {
                 return -1;
+            }
         }
         nb_sectors -= n;
         sector_num += n;
@@ -822,10 +974,7 @@  exit:
 
 static void vmdk_close(BlockDriverState *bs)
 {
-    BDRVVmdkState *s = bs->opaque;
-
-    qemu_free(s->l1_table);
-    qemu_free(s->l2_cache);
+    vmdk_free_extents(bs);
 }
 
 static int vmdk_flush(BlockDriverState *bs)