Patchwork [v9,01/12] VMDK: introduce VmdkExtent

login
register
mail settings
Submitter Feiran Zheng
Date July 12, 2011, 11:56 a.m.
Message ID <1310471799-20055-2-git-send-email-famcool@gmail.com>
Download mbox | patch
Permalink /patch/104365/
State New
Headers show

Comments

Feiran Zheng - July 12, 2011, 11:56 a.m.
Introduced VmdkExtent array into BDRVVmdkState, enable holding multiple
image extents for multiple file image support.

Signed-off-by: Fam Zheng <famcool@gmail.com>
---
 block/vmdk.c |  348 +++++++++++++++++++++++++++++++++++++++++-----------------
 1 files changed, 246 insertions(+), 102 deletions(-)

Patch

diff --git a/block/vmdk.c b/block/vmdk.c
index 922b23d..3b78583 100644
--- a/block/vmdk.c
+++ b/block/vmdk.c
@@ -60,7 +60,11 @@  typedef struct {
 
 #define L2_CACHE_SIZE 16
 
-typedef struct BDRVVmdkState {
+typedef struct VmdkExtent {
+    BlockDriverState *file;
+    bool flat;
+    int64_t sectors;
+    int64_t end_sector;
     int64_t l1_table_offset;
     int64_t l1_backup_table_offset;
     uint32_t *l1_table;
@@ -74,7 +78,13 @@  typedef struct BDRVVmdkState {
     uint32_t l2_cache_counts[L2_CACHE_SIZE];
 
     unsigned int cluster_sectors;
+} VmdkExtent;
+
+typedef struct BDRVVmdkState {
     uint32_t parent_cid;
+    int num_extents;
+    /* Extent array with num_extents entries, ascend ordered by address */
+    VmdkExtent *extents;
 } BDRVVmdkState;
 
 typedef struct VmdkMetaData {
@@ -105,6 +115,19 @@  static int vmdk_probe(const uint8_t *buf, int buf_size, const char *filename)
 #define DESC_SIZE 20*SECTOR_SIZE	// 20 sectors of 512 bytes each
 #define HEADER_SIZE 512   			// first sector of 512 bytes
 
+static void vmdk_free_extents(BlockDriverState *bs)
+{
+    int i;
+    BDRVVmdkState *s = bs->opaque;
+
+    for (i = 0; i < s->num_extents; i++) {
+        qemu_free(s->extents[i].l1_table);
+        qemu_free(s->extents[i].l2_cache);
+        qemu_free(s->extents[i].l1_backup_table);
+    }
+    qemu_free(s->extents);
+}
+
 static uint32_t vmdk_read_cid(BlockDriverState *bs, int parent)
 {
     char desc[DESC_SIZE];
@@ -358,11 +381,50 @@  static int vmdk_parent_open(BlockDriverState *bs)
     return 0;
 }
 
+/* Create and append extent to the extent array. Return the added VmdkExtent
+ * address. return NULL if allocation failed. */
+static VmdkExtent *vmdk_add_extent(BlockDriverState *bs,
+                           BlockDriverState *file, bool flat, int64_t sectors,
+                           int64_t l1_offset, int64_t l1_backup_offset,
+                           uint32_t l1_size,
+                           int l2_size, unsigned int cluster_sectors)
+{
+    VmdkExtent *extent;
+    BDRVVmdkState *s = bs->opaque;
+
+    s->extents = qemu_realloc(s->extents,
+                              (s->num_extents + 1) * sizeof(VmdkExtent));
+    extent = &s->extents[s->num_extents];
+    s->num_extents++;
+
+    memset(extent, 0, sizeof(VmdkExtent));
+    extent->file = file;
+    extent->flat = flat;
+    extent->sectors = sectors;
+    extent->l1_table_offset = l1_offset;
+    extent->l1_backup_table_offset = l1_backup_offset;
+    extent->l1_size = l1_size;
+    extent->l1_entry_sectors = l2_size * cluster_sectors;
+    extent->l2_size = l2_size;
+    extent->cluster_sectors = cluster_sectors;
+
+    if (s->num_extents > 1) {
+        extent->end_sector = (*(extent - 1)).end_sector + extent->sectors;
+    } else {
+        extent->end_sector = extent->sectors;
+    }
+    bs->total_sectors = extent->end_sector;
+    return extent;
+}
+
+
 static int vmdk_open(BlockDriverState *bs, int flags)
 {
     BDRVVmdkState *s = bs->opaque;
     uint32_t magic;
-    int l1_size, i;
+    int i;
+    uint32_t l1_size, l1_entry_sectors;
+    VmdkExtent *extent = NULL;
 
     if (bdrv_pread(bs->file, 0, &magic, sizeof(magic)) != sizeof(magic))
         goto fail;
@@ -370,32 +432,34 @@  static int vmdk_open(BlockDriverState *bs, int flags)
     magic = be32_to_cpu(magic);
     if (magic == VMDK3_MAGIC) {
         VMDK3Header header;
-
-        if (bdrv_pread(bs->file, sizeof(magic), &header, sizeof(header)) != sizeof(header))
+        if (bdrv_pread(bs->file, sizeof(magic), &header, sizeof(header))
+                != sizeof(header)) {
             goto fail;
-        s->cluster_sectors = le32_to_cpu(header.granularity);
-        s->l2_size = 1 << 9;
-        s->l1_size = 1 << 6;
-        bs->total_sectors = le32_to_cpu(header.disk_sectors);
-        s->l1_table_offset = le32_to_cpu(header.l1dir_offset) << 9;
-        s->l1_backup_table_offset = 0;
-        s->l1_entry_sectors = s->l2_size * s->cluster_sectors;
+        }
+        extent = vmdk_add_extent(bs, bs->file, false,
+                              le32_to_cpu(header.disk_sectors),
+                              le32_to_cpu(header.l1dir_offset) << 9, 0,
+                              1 << 6, 1 << 9, le32_to_cpu(header.granularity));
     } else if (magic == VMDK4_MAGIC) {
         VMDK4Header header;
-
-        if (bdrv_pread(bs->file, sizeof(magic), &header, sizeof(header)) != sizeof(header))
+        if (bdrv_pread(bs->file, sizeof(magic), &header, sizeof(header))
+                != sizeof(header)) {
             goto fail;
-        bs->total_sectors = le64_to_cpu(header.capacity);
-        s->cluster_sectors = le64_to_cpu(header.granularity);
-        s->l2_size = le32_to_cpu(header.num_gtes_per_gte);
-        s->l1_entry_sectors = s->l2_size * s->cluster_sectors;
-        if (s->l1_entry_sectors <= 0)
+        }
+        l1_entry_sectors = le32_to_cpu(header.num_gtes_per_gte)
+                            * le64_to_cpu(header.granularity);
+        l1_size = (le64_to_cpu(header.capacity) + l1_entry_sectors - 1)
+                    / l1_entry_sectors;
+        extent = vmdk_add_extent(bs, bs->file, false,
+                              le64_to_cpu(header.capacity),
+                              le64_to_cpu(header.gd_offset) << 9,
+                              le64_to_cpu(header.rgd_offset) << 9,
+                              l1_size,
+                              le32_to_cpu(header.num_gtes_per_gte),
+                              le64_to_cpu(header.granularity));
+        if (extent->l1_entry_sectors <= 0) {
             goto fail;
-        s->l1_size = (bs->total_sectors + s->l1_entry_sectors - 1)
-            / s->l1_entry_sectors;
-        s->l1_table_offset = le64_to_cpu(header.rgd_offset) << 9;
-        s->l1_backup_table_offset = le64_to_cpu(header.gd_offset) << 9;
-
+        }
         // try to open parent images, if exist
         if (vmdk_parent_open(bs) != 0)
             goto fail;
@@ -406,40 +470,49 @@  static int vmdk_open(BlockDriverState *bs, int flags)
     }
 
     /* read the L1 table */
-    l1_size = s->l1_size * sizeof(uint32_t);
-    s->l1_table = qemu_malloc(l1_size);
-    if (bdrv_pread(bs->file, s->l1_table_offset, s->l1_table, l1_size) != l1_size)
+    l1_size = extent->l1_size * sizeof(uint32_t);
+    extent->l1_table = qemu_malloc(l1_size);
+    if (bdrv_pread(bs->file,
+            extent->l1_table_offset,
+            extent->l1_table,
+            l1_size)
+        != l1_size) {
         goto fail;
-    for(i = 0; i < s->l1_size; i++) {
-        le32_to_cpus(&s->l1_table[i]);
+    }
+    for (i = 0; i < extent->l1_size; i++) {
+        le32_to_cpus(&extent->l1_table[i]);
     }
 
-    if (s->l1_backup_table_offset) {
-        s->l1_backup_table = qemu_malloc(l1_size);
-        if (bdrv_pread(bs->file, s->l1_backup_table_offset, s->l1_backup_table, l1_size) != l1_size)
+    if (extent->l1_backup_table_offset) {
+        extent->l1_backup_table = qemu_malloc(l1_size);
+        if (bdrv_pread(bs->file,
+                    extent->l1_backup_table_offset,
+                    extent->l1_backup_table,
+                    l1_size)
+                != l1_size) {
             goto fail;
-        for(i = 0; i < s->l1_size; i++) {
-            le32_to_cpus(&s->l1_backup_table[i]);
+        }
+        for (i = 0; i < extent->l1_size; i++) {
+            le32_to_cpus(&extent->l1_backup_table[i]);
         }
     }
 
-    s->l2_cache = qemu_malloc(s->l2_size * L2_CACHE_SIZE * sizeof(uint32_t));
+    extent->l2_cache =
+        qemu_malloc(extent->l2_size * L2_CACHE_SIZE * sizeof(uint32_t));
     return 0;
  fail:
-    qemu_free(s->l1_backup_table);
-    qemu_free(s->l1_table);
-    qemu_free(s->l2_cache);
+    vmdk_free_extents(bs);
     return -1;
 }
 
-static uint64_t get_cluster_offset(BlockDriverState *bs, VmdkMetaData *m_data,
-                                   uint64_t offset, int allocate);
-
-static int get_whole_cluster(BlockDriverState *bs, uint64_t cluster_offset,
-                             uint64_t offset, int allocate)
+static int get_whole_cluster(BlockDriverState *bs,
+                VmdkExtent *extent,
+                uint64_t cluster_offset,
+                uint64_t offset,
+                bool allocate)
 {
-    BDRVVmdkState *s = bs->opaque;
-    uint8_t  whole_grain[s->cluster_sectors*512];        // 128 sectors * 512 bytes each = grain size 64KB
+    /* 128 sectors * 512 bytes each = grain size 64KB */
+    uint8_t  whole_grain[extent->cluster_sectors * 512];
 
     // we will be here if it's first write on non-exist grain(cluster).
     // try to read from parent image, if exist
@@ -450,14 +523,14 @@  static int get_whole_cluster(BlockDriverState *bs, uint64_t cluster_offset,
             return -1;
 
         ret = bdrv_read(bs->backing_hd, offset >> 9, whole_grain,
-            s->cluster_sectors);
+                extent->cluster_sectors);
         if (ret < 0) {
             return -1;
         }
 
         //Write grain only into the active image
-        ret = bdrv_write(bs->file, cluster_offset, whole_grain,
-            s->cluster_sectors);
+        ret = bdrv_write(extent->file, cluster_offset, whole_grain,
+                extent->cluster_sectors);
         if (ret < 0) {
             return -1;
         }
@@ -465,29 +538,39 @@  static int get_whole_cluster(BlockDriverState *bs, uint64_t cluster_offset,
     return 0;
 }
 
-static int vmdk_L2update(BlockDriverState *bs, VmdkMetaData *m_data)
+static int vmdk_L2update(VmdkExtent *extent, VmdkMetaData *m_data)
 {
-    BDRVVmdkState *s = bs->opaque;
-
     /* update L2 table */
-    if (bdrv_pwrite_sync(bs->file, ((int64_t)m_data->l2_offset * 512) + (m_data->l2_index * sizeof(m_data->offset)),
-                    &(m_data->offset), sizeof(m_data->offset)) < 0)
+    if (bdrv_pwrite_sync(
+                extent->file,
+                ((int64_t)m_data->l2_offset * 512)
+                    + (m_data->l2_index * sizeof(m_data->offset)),
+                &(m_data->offset),
+                sizeof(m_data->offset)
+            ) < 0) {
         return -1;
+    }
     /* update backup L2 table */
-    if (s->l1_backup_table_offset != 0) {
-        m_data->l2_offset = s->l1_backup_table[m_data->l1_index];
-        if (bdrv_pwrite_sync(bs->file, ((int64_t)m_data->l2_offset * 512) + (m_data->l2_index * sizeof(m_data->offset)),
-                        &(m_data->offset), sizeof(m_data->offset)) < 0)
+    if (extent->l1_backup_table_offset != 0) {
+        m_data->l2_offset = extent->l1_backup_table[m_data->l1_index];
+        if (bdrv_pwrite_sync(
+                    extent->file,
+                    ((int64_t)m_data->l2_offset * 512)
+                        + (m_data->l2_index * sizeof(m_data->offset)),
+                    &(m_data->offset), sizeof(m_data->offset)
+                ) < 0) {
             return -1;
+        }
     }
 
     return 0;
 }
 
-static uint64_t get_cluster_offset(BlockDriverState *bs, VmdkMetaData *m_data,
-                                   uint64_t offset, int allocate)
+static uint64_t get_cluster_offset(BlockDriverState *bs,
+                                    VmdkExtent *extent,
+                                    VmdkMetaData *m_data,
+                                    uint64_t offset, int allocate)
 {
-    BDRVVmdkState *s = bs->opaque;
     unsigned int l1_index, l2_offset, l2_index;
     int min_index, i, j;
     uint32_t min_count, *l2_table, tmp = 0;
@@ -496,21 +579,23 @@  static uint64_t get_cluster_offset(BlockDriverState *bs, VmdkMetaData *m_data,
     if (m_data)
         m_data->valid = 0;
 
-    l1_index = (offset >> 9) / s->l1_entry_sectors;
-    if (l1_index >= s->l1_size)
+    l1_index = (offset >> 9) / extent->l1_entry_sectors;
+    if (l1_index >= extent->l1_size) {
         return 0;
-    l2_offset = s->l1_table[l1_index];
-    if (!l2_offset)
+    }
+    l2_offset = extent->l1_table[l1_index];
+    if (!l2_offset) {
         return 0;
+    }
     for(i = 0; i < L2_CACHE_SIZE; i++) {
-        if (l2_offset == s->l2_cache_offsets[i]) {
+        if (l2_offset == extent->l2_cache_offsets[i]) {
             /* increment the hit count */
-            if (++s->l2_cache_counts[i] == 0xffffffff) {
+            if (++extent->l2_cache_counts[i] == 0xffffffff) {
                 for(j = 0; j < L2_CACHE_SIZE; j++) {
-                    s->l2_cache_counts[j] >>= 1;
+                    extent->l2_cache_counts[j] >>= 1;
                 }
             }
-            l2_table = s->l2_cache + (i * s->l2_size);
+            l2_table = extent->l2_cache + (i * extent->l2_size);
             goto found;
         }
     }
@@ -518,20 +603,25 @@  static uint64_t get_cluster_offset(BlockDriverState *bs, VmdkMetaData *m_data,
     min_index = 0;
     min_count = 0xffffffff;
     for(i = 0; i < L2_CACHE_SIZE; i++) {
-        if (s->l2_cache_counts[i] < min_count) {
-            min_count = s->l2_cache_counts[i];
+        if (extent->l2_cache_counts[i] < min_count) {
+            min_count = extent->l2_cache_counts[i];
             min_index = i;
         }
     }
-    l2_table = s->l2_cache + (min_index * s->l2_size);
-    if (bdrv_pread(bs->file, (int64_t)l2_offset * 512, l2_table, s->l2_size * sizeof(uint32_t)) !=
-                                                                        s->l2_size * sizeof(uint32_t))
+    l2_table = extent->l2_cache + (min_index * extent->l2_size);
+    if (bdrv_pread(
+                extent->file,
+                (int64_t)l2_offset * 512,
+                l2_table,
+                extent->l2_size * sizeof(uint32_t)
+            ) != extent->l2_size * sizeof(uint32_t)) {
         return 0;
+    }
 
-    s->l2_cache_offsets[min_index] = l2_offset;
-    s->l2_cache_counts[min_index] = 1;
+    extent->l2_cache_offsets[min_index] = l2_offset;
+    extent->l2_cache_counts[min_index] = 1;
  found:
-    l2_index = ((offset >> 9) / s->cluster_sectors) % s->l2_size;
+    l2_index = ((offset >> 9) / extent->cluster_sectors) % extent->l2_size;
     cluster_offset = le32_to_cpu(l2_table[l2_index]);
 
     if (!cluster_offset) {
@@ -539,8 +629,11 @@  static uint64_t get_cluster_offset(BlockDriverState *bs, VmdkMetaData *m_data,
             return 0;
 
         // Avoid the L2 tables update for the images that have snapshots.
-        cluster_offset = bdrv_getlength(bs->file);
-        bdrv_truncate(bs->file, cluster_offset + (s->cluster_sectors << 9));
+        cluster_offset = bdrv_getlength(extent->file);
+        bdrv_truncate(
+            extent->file,
+            cluster_offset + (extent->cluster_sectors << 9)
+        );
 
         cluster_offset >>= 9;
         tmp = cpu_to_le32(cluster_offset);
@@ -551,7 +644,8 @@  static uint64_t get_cluster_offset(BlockDriverState *bs, VmdkMetaData *m_data,
          * This problem may occur because of insufficient space on host disk
          * or inappropriate VM shutdown.
          */
-        if (get_whole_cluster(bs, cluster_offset, offset, allocate) == -1)
+        if (get_whole_cluster(
+                bs, extent, cluster_offset, offset, allocate) == -1)
             return 0;
 
         if (m_data) {
@@ -566,33 +660,69 @@  static uint64_t get_cluster_offset(BlockDriverState *bs, VmdkMetaData *m_data,
     return cluster_offset;
 }
 
+static VmdkExtent *find_extent(BDRVVmdkState *s,
+                                int64_t sector_num, VmdkExtent *start_hint)
+{
+    VmdkExtent *extent = start_hint;
+
+    if (!extent) {
+        extent = &s->extents[0];
+    }
+    while (extent < &s->extents[s->num_extents]) {
+        if (sector_num < extent->end_sector) {
+            return extent;
+        }
+        extent++;
+    }
+    return NULL;
+}
+
 static int vmdk_is_allocated(BlockDriverState *bs, int64_t sector_num,
                              int nb_sectors, int *pnum)
 {
     BDRVVmdkState *s = bs->opaque;
-    int index_in_cluster, n;
-    uint64_t cluster_offset;
 
-    cluster_offset = get_cluster_offset(bs, NULL, sector_num << 9, 0);
-    index_in_cluster = sector_num % s->cluster_sectors;
-    n = s->cluster_sectors - index_in_cluster;
+    int64_t index_in_cluster, n, ret;
+    uint64_t offset;
+    VmdkExtent *extent;
+
+    extent = find_extent(s, sector_num, NULL);
+    if (!extent) {
+        return 0;
+    }
+    if (extent->flat) {
+        n = extent->end_sector - sector_num;
+        ret = 1;
+    } else {
+        offset = get_cluster_offset(bs, extent, NULL, sector_num * 512, 0);
+        index_in_cluster = sector_num % extent->cluster_sectors;
+        n = extent->cluster_sectors - index_in_cluster;
+        ret = offset ? 1 : 0;
+    }
     if (n > nb_sectors)
         n = nb_sectors;
     *pnum = n;
-    return (cluster_offset != 0);
+    return ret;
 }
 
 static int vmdk_read(BlockDriverState *bs, int64_t sector_num,
                     uint8_t *buf, int nb_sectors)
 {
     BDRVVmdkState *s = bs->opaque;
-    int index_in_cluster, n, ret;
+    int ret;
+    uint64_t n, index_in_cluster;
+    VmdkExtent *extent = NULL;
     uint64_t cluster_offset;
 
     while (nb_sectors > 0) {
-        cluster_offset = get_cluster_offset(bs, NULL, sector_num << 9, 0);
-        index_in_cluster = sector_num % s->cluster_sectors;
-        n = s->cluster_sectors - index_in_cluster;
+        extent = find_extent(s, sector_num, extent);
+        if (!extent) {
+            return -EIO;
+        }
+        cluster_offset = get_cluster_offset(
+                            bs, extent, NULL, sector_num << 9, 0);
+        index_in_cluster = sector_num % extent->cluster_sectors;
+        n = extent->cluster_sectors - index_in_cluster;
         if (n > nb_sectors)
             n = nb_sectors;
         if (!cluster_offset) {
@@ -621,10 +751,12 @@  static int vmdk_write(BlockDriverState *bs, int64_t sector_num,
                      const uint8_t *buf, int nb_sectors)
 {
     BDRVVmdkState *s = bs->opaque;
-    VmdkMetaData m_data;
-    int index_in_cluster, n;
+    VmdkExtent *extent = NULL;
+    int n;
+    int64_t index_in_cluster;
     uint64_t cluster_offset;
     static int cid_update = 0;
+    VmdkMetaData m_data;
 
     if (sector_num > bs->total_sectors) {
         fprintf(stderr,
@@ -635,20 +767,35 @@  static int vmdk_write(BlockDriverState *bs, int64_t sector_num,
     }
 
     while (nb_sectors > 0) {
-        index_in_cluster = sector_num & (s->cluster_sectors - 1);
-        n = s->cluster_sectors - index_in_cluster;
-        if (n > nb_sectors)
-            n = nb_sectors;
-        cluster_offset = get_cluster_offset(bs, &m_data, sector_num << 9, 1);
-        if (!cluster_offset)
+        extent = find_extent(s, sector_num, extent);
+        if (!extent) {
+            return -EIO;
+        }
+        cluster_offset = get_cluster_offset(
+                                bs,
+                                extent,
+                                &m_data,
+                                sector_num << 9, 1);
+        if (!cluster_offset) {
             return -1;
+        }
+        index_in_cluster = sector_num % extent->cluster_sectors;
+        n = extent->cluster_sectors - index_in_cluster;
+        if (n > nb_sectors) {
+            n = nb_sectors;
+        }
 
-        if (bdrv_pwrite(bs->file, cluster_offset + index_in_cluster * 512, buf, n * 512) != n * 512)
+        if (bdrv_pwrite(bs->file,
+                        cluster_offset + index_in_cluster * 512,
+                        buf, n * 512)
+                != n * 512) {
             return -1;
+        }
         if (m_data.valid) {
             /* update L2 tables */
-            if (vmdk_L2update(bs, &m_data) == -1)
+            if (vmdk_L2update(extent, &m_data) == -1) {
                 return -1;
+            }
         }
         nb_sectors -= n;
         sector_num += n;
@@ -822,10 +969,7 @@  exit:
 
 static void vmdk_close(BlockDriverState *bs)
 {
-    BDRVVmdkState *s = bs->opaque;
-
-    qemu_free(s->l1_table);
-    qemu_free(s->l2_cache);
+    vmdk_free_extents(bs);
 }
 
 static int vmdk_flush(BlockDriverState *bs)