Patchwork [4/5] block: Virtual Bridges VERDE GOW disk image format, GOW version 2 and version 3 support implementation

login
register
mail settings
Submitter Leonardo E. Reiter
Date March 8, 2012, 10:16 p.m.
Message ID <CA+BHXkK1TwN1gNW_ApEPbt4NmXKmjnFMiBWeCeaPFus_OxJm+w@mail.gmail.com>
Download mbox | patch
Permalink /patch/145676/
State New
Headers show

Comments

Leonardo E. Reiter - March 8, 2012, 10:16 p.m.
commit aef59f96db2ea9dcfdea1b0e1f3c7e05843c5f1a
Author: Leonardo E. Reiter <lreiter@vbridges.com>
Date:   Thu Mar 8 16:02:36 2012 -0600

    Virtual Bridges VERDE GOW version 2 and 3 disk image format
implementation
    Signed-off-by: Leonardo E. Reiter <lreiter@vbridges.com>

+    .bdrv_aio_readv     = gow23_aio_readv,
+    .bdrv_aio_writev    = gow23_aio_writev,
+    .bdrv_aio_flush     = gow23_aio_flush,
+    .create_options     = gow23_create_options,
+};
+
+static void bdrv_gow23_init(void)
+{
+    bdrv_register(&bdrv_gow2);
+    bdrv_register(&bdrv_gow3);
+}
+
+block_init(bdrv_gow23_init);
+
+#endif  /* ! _WIN32 */
+
Stefan Hajnoczi - March 9, 2012, 11:48 a.m.
On Thu, Mar 8, 2012 at 10:16 PM, Leonardo E. Reiter
<lreiter@vbridges.com> wrote:
> +    s->full_block = calloc(1, GOW_BLOCKSIZE);

Please use g_malloc0(GOW_BLOCKSIZE) and g_free() instead of standard
library memory allocation functions.  The glib memory management
functions are a bit richer than the standard library functions and
include tracing which is useful for memory leak debugging.

There is a special function for allocating disk I/O buffers:
void *qemu_blockalign(BlockDriverState *bs, size_t size)

It honors memory alignment requirements which are necessary when
opening the image file with O_DIRECT.  You can use qemu_vfree() to
free such buffers.

Stefan

Patch

diff --git a/block/gow23.c b/block/gow23.c
new file mode 100644
index 0000000..9968fa7
--- /dev/null
+++ b/block/gow23.c
@@ -0,0 +1,664 @@ 
+/*
+ * Virtual Bridges Grow-on-Write versions 2, 3 block driver for QEMU
+ *
+ * Copyright (c) 2008-2009 Leonardo E. Reiter
+ * Copyright (c) 1984-2012 Virtual Bridges, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
copy
+ * of this software and associated documentation files (the "Software"),
to deal
+ * in the Software without restriction, including without limitation the
rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or
sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
IN
+ * THE SOFTWARE.
+ */
+#ifndef _WIN32
+#include <sys/mman.h>
+#include <stdlib.h>
+#include <time.h>
+#include "qemu-common.h"
+#include "block_int.h"
+#include "module.h"
+
+#include "gow_int.h"
+
+typedef struct {
+    BlockDriverState *hd;
+    gow3_header_t *header;
+    int is_gow3;
+    int fd;
+    int64_t base;
+    uint8_t *full_block;
+    int read_only;
+
+    /* gow3-specific */
+    uint32_t version;
+    uint32_t carry;
+    int      cset;
+} gow23_state_t;
+
+static int gow23_probe_common(const uint8_t *buf, int buf_size,
+        const char *filename, uint32_t magic)
+{
+    const gow3_header_t *header = (const void *)buf;
+
+    if (header->magic == le32_to_cpu(magic) &&
+            le32_to_cpu(header->size) <= GOW_MAXBLOCKS &&
+            le32_to_cpu(header->allocated) <= le32_to_cpu(header->size)) {
+        return 10000;
+    }
+    return 0;
+}
+
+static int gow2_probe(const uint8_t *buf, int buf_size, const char
*filename)
+{
+    return gow23_probe_common(buf, buf_size, filename, GOW2_MAGIC);
+}
+
+static int gow3_probe(const uint8_t *buf, int buf_size, const char
*filename)
+{
+    return gow23_probe_common(buf, buf_size, filename, GOW3_MAGIC);
+}
+
+static int gow23_open_common(BlockDriverState *bs, int flags, size_t len)
+{
+    gow23_state_t *s = bs->opaque;
+
+    /* WARNING: relying on fd being the first field in BDRVRawState */
+    s->fd = *((int *) bs->file->opaque);
+
+    int prot = PROT_READ | ((flags & O_RDWR) ? PROT_WRITE : 0);
+
+    s->full_block = calloc(1, GOW_BLOCKSIZE);
+    if (s->full_block == NULL) {
+        return -EIO;
+    }
+
+    s->header = mmap(NULL, len, prot, MAP_SHARED, s->fd, 0);
+    if (s->header == MAP_FAILED) {
+        return -EIO;
+    }
+
+    /* calculate total sectors */
+    bs->total_sectors = ((int64_t) le32_to_cpu(s->header->size)
+            * GOW_BLOCKSIZE) >> 9;
+
+    return 0;
+}
+
+static int gow2_open(BlockDriverState *bs, int flags)
+{
+    gow23_state_t *s = bs->opaque;
+    s->is_gow3 = 0;
+
+    int ret = gow23_open_common(bs, flags, sizeof(gow2_header_t));
+
+    if (ret < 0) {
+        return ret;
+    }
+    s->base = GOW2_HEADER_SIZE;
+
+    return 0;
+}
+
+static int gow3_open(BlockDriverState *bs, int flags)
+{
+    gow23_state_t *s = bs->opaque;
+    s->is_gow3 = 1;
+
+    int ret = gow23_open_common(bs, flags, sizeof(gow3_header_t));
+
+    if (ret < 0) {
+        return ret;
+    }
+    s->base = GOW3_HEADER_SIZE;
+
+    /* calculate the "next" version number by finding the highest versioned
+     * block in the image and incrementing */
+    s->version = 1;
+    s->cset = 0;
+    s->carry = le32_to_cpu(s->header->carry);
+
+    int index;
+    for (index = 0; index < GOW_MAXBLOCKS; ++index) {
+        if (le32_to_cpu(s->header->ver[index]) > s->version) {
+            s->version = le32_to_cpu(s->header->ver[index]);
+        }
+    }
+    if (++s->version == 0) {
+        /* version wrapped (highly unlikely in any lifetime);
+         * increment carry value */
+        ++s->version;
+        ++s->carry;
+        s->cset = 1;
+    }
+    return 0;
+}
+
+static int gow23_create_common(const char *filename,
+        QEMUOptionParameter *options, int is_gow3)
+{
+    int fd;
+    int64_t total_size = 0;
+    const char *backing_file = NULL;
+    int flags = 0;
+    size_t header_size = is_gow3 ? GOW3_HEADER_SIZE : GOW2_HEADER_SIZE;
+    gow3_header_t *header = calloc(1, header_size);
+    int index;
+    ssize_t written;
+
+    /* Read out options */
+    while (options && options->name) {
+        if (!strcmp(options->name, BLOCK_OPT_SIZE)) {
+            total_size = options->value.n / 512;
+        } else if (!strcmp(options->name, BLOCK_OPT_BACKING_FILE)) {
+            backing_file = options->value.s;
+        } else if (!strcmp(options->name, BLOCK_OPT_ENCRYPT)) {
+            flags |= options->value.n ? BLOCK_FLAG_ENCRYPT : 0;
+        }
+        options++;
+    }
+    int64_t blocks = total_size / (GOW_BLOCKSIZE >> 9);
+
+    if (header == NULL) {
+        return -EIO;
+    }
+
+    if (flags || backing_file || blocks < 1 || blocks > GOW_MAXBLOCKS) {
+        free(header);
+        return -ENOTSUP;
+    }
+
+    fd = open(filename, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, 0644);
+    if (fd < 0) {
+        free(header);
+        return -EIO;
+    }
+
+    /* create and write the header */
+    header->magic = cpu_to_le32(is_gow3 ? GOW3_MAGIC : GOW2_MAGIC);
+    header->size = cpu_to_le32(((uint32_t) blocks));
+    header->allocated = 0;
+
+    if (is_gow3) {
+        long int *sptr = (long int *) header->signature;
+        srandom((unsigned int) time(NULL));
+        for (index = 0; index < sizeof(header->signature) / sizeof(long
int);
+                ++index, ++sptr) {
+            *sptr = random();
+        }
+        header->carry = 0;
+        header->maint = 0;
+    }
+
+    for (index = 0; index < GOW_MAXBLOCKS; index++) {
+        header->map[index] = GOW_FREE_BLOCK;
+        if (is_gow3) {
+            header->ver[index] = cpu_to_le32(1);
+        }
+    }
+
+    written = write(fd, header, header_size);
+    free(header);
+    close(fd);
+
+    return (written != header_size) ? -EIO : 0;
+}
+
+static int gow2_create(const char *filename, QEMUOptionParameter *options)
+{
+    int ret = gow23_create_common(filename, options, 0);
+    return ret;
+}
+
+static int gow3_create(const char *filename, QEMUOptionParameter *options)
+{
+    int ret = gow23_create_common(filename, options, 1);
+    return ret;
+}
+
+static int gow23_flush(BlockDriverState *bs)
+{
+    return bdrv_flush(bs->file);
+}
+
+static BlockDriverAIOCB *gow23_aio_flush(BlockDriverState *bs,
+        BlockDriverCompletionFunc *cb, void *opaque)
+{
+    return bdrv_aio_flush(bs->file, cb, opaque);
+}
+
+static int gow23_truncate(BlockDriverState *bs, int64_t offset)
+{
+    gow23_state_t *s = bs->opaque;
+    int64_t size;
+    uint32_t index;
+
+    /* calculate size in blocks, rounding up to nearest block */
+    size = (offset + (GOW_BLOCKSIZE - 1)) / GOW_BLOCKSIZE;
+    if (size < 1 || size > GOW_MAXBLOCKS) {
+        return -ENOTSUP;
+    }
+
+    /* adjust header */
+    s->header->size = cpu_to_le32(((uint32_t) size));
+    if (le32_to_cpu(s->header->allocated) > le32_to_cpu(s->header->size)) {
+        s->header->allocated = s->header->size;
+    }
+
+    /* clear unallocated blocks, if any */
+    for (index = le32_to_cpu(s->header->allocated);
+            index < le32_to_cpu(s->header->size); ++index) {
+        s->header->map[index] = GOW_FREE_BLOCK;
+        if (s->is_gow3) {
+            s->header->ver[index] = cpu_to_le32(1);
+        }
+    }
+
+    /* XXX: actual image length doesn't change, even if making it smaller
+     * since the exact location of the blocks in the image file may be
+     * out of order */
+
+    gow23_flush(bs);
+
+    return 0;
+}
+
+typedef struct {
+    BlockDriverAIOCB common;
+
+    int64_t sector;
+    uint8_t *ptr;
+    void *orig_ptr;
+    int left;
+
+    QEMUIOVector *qiov;
+    struct iovec hd_iov;
+    bool is_write;
+    QEMUBH *bh;
+    QEMUIOVector hd_qiov;
+    BlockDriverAIOCB *hd_aiocb;
+} gow23_aiocb_t;
+
+static void gow23_aio_cancel(BlockDriverAIOCB *blockacb)
+{
+    gow23_aiocb_t *acb = container_of(blockacb, gow23_aiocb_t, common);
+    if (acb->hd_aiocb) {
+        bdrv_aio_cancel(acb->hd_aiocb);
+    }
+    qemu_aio_release(acb);
+}
+
+static AIOPool gow23_aio_pool = {
+    .aiocb_size         = sizeof(gow23_aiocb_t),
+    .cancel             = gow23_aio_cancel,
+};
+
+static gow23_aiocb_t *gow23_aio_setup(BlockDriverState *bs,
+        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
+        BlockDriverCompletionFunc *cb, void *opaque, int is_write)
+{
+    gow23_aiocb_t *acb = qemu_aio_get(&gow23_aio_pool, bs, cb, opaque);
+    if (!acb) {
+        return NULL;
+    }
+    acb->hd_aiocb = NULL;
+    acb->sector = sector_num;
+    acb->qiov = qiov;
+    acb->is_write = is_write;
+    if (qiov->niov > 1) {
+        acb->ptr = acb->orig_ptr = qemu_blockalign(bs, qiov->size);
+        if (is_write) {
+            qemu_iovec_to_buffer(qiov, acb->ptr);
+        }
+    } else {
+        acb->ptr = (uint8_t *)qiov->iov->iov_base;
+    }
+    acb->left = nb_sectors;
+
+    return acb;
+}
+
+static void gow23_aio_write_cb(void *opaque, int ret);
+static void gow23_aio_read_cb(void *opaque, int ret);
+
+static void gow23_aio_rw_bh(void *opaque)
+{
+    gow23_aiocb_t *acb = opaque;
+    qemu_bh_delete(acb->bh);
+    acb->bh = NULL;
+
+    if (acb->is_write) {
+        gow23_aio_write_cb(opaque, 0);
+    } else {
+        gow23_aio_read_cb(opaque, 0);
+    }
+}
+
+static int gow23_schedule_bh(QEMUBHFunc *cb, gow23_aiocb_t *acb)
+{
+    if (acb->bh) {
+        return -EIO;
+    }
+
+    acb->bh = qemu_bh_new(cb, acb);
+    if (!acb->bh) {
+        return -EIO;
+    }
+
+    qemu_bh_schedule(acb->bh);
+
+    return 0;
+}
+
+static void setup_next_block(gow23_aiocb_t *acb, uint32_t sectors)
+{
+    /* adjust pointers and counts */
+    acb->ptr += (sectors << 9);
+    acb->left -= sectors;
+    acb->sector += sectors;
+    if (acb->left < 0) {
+        acb->left = 0;
+        acb->sector = 0;
+    }
+}
+
+static void gow23_aio_read_cb(void *opaque, int ret)
+{
+    gow23_aiocb_t *acb = opaque;
+    BlockDriverState *bs = acb->common.bs;
+    gow23_state_t *s = bs->opaque;
+
+    acb->hd_aiocb = NULL;
+    if (ret < 0) {
+        goto done;
+    }
+
+redo:
+    if (acb->left == 0) {
+        /* request completed */
+        ret = 0;
+        goto done;
+    }
+
+    /* prepare next AIO request */
+    int64_t sector = acb->sector;
+    uint32_t block = BLOCK_NUM(sector);
+    uint32_t offset = BLOCK_OFFSET(sector);
+    uint32_t sectors = BLOCK_SECTORS(offset);
+    if (sectors > acb->left) {
+        sectors = acb->left;
+    }
+
+    memset(acb->ptr, 0, sectors << 9);
+    if (s->header->map[block] == GOW_FREE_BLOCK) {
+        setup_next_block(acb, sectors);
+        goto redo;
+    }
+
+    /* avoid reading past the end of the file, which can happen with a
legacy
+     * image that is not 64kb aligned */
+    int64_t ssector = IMG_OFFSET(block, offset) >> 9;
+    int64_t esector = lseek(s->fd, 0, SEEK_END) >> 9;
+    uint32_t rsectors;
+    if ((ssector + sectors) > esector) {
+        if ((esector - ssector) < 1) {
+            setup_next_block(acb, sectors);
+            goto redo;
+        } else {
+            rsectors = (uint32_t) (esector - ssector);
+        }
+    } else {
+        rsectors = sectors;
+    }
+
+    acb->hd_iov.iov_base = (void *)acb->ptr;
+    acb->hd_iov.iov_len = rsectors * 512;
+    qemu_iovec_init_external(&acb->hd_qiov, &acb->hd_iov, 1);
+    acb->hd_aiocb = bdrv_aio_readv(bs->file,
+            ssector,
+            &acb->hd_qiov,
+            rsectors,
+            gow23_aio_read_cb, acb);
+
+    if (acb->hd_aiocb == NULL) {
+        ret = -EIO;
+        goto done;
+    }
+
+    setup_next_block(acb, sectors);
+    return;
+
+done:
+    if (acb->qiov->niov > 1) {
+        qemu_iovec_from_buffer(acb->qiov, acb->orig_ptr, acb->qiov->size);
+        qemu_vfree(acb->orig_ptr);
+    }
+    acb->common.cb(acb->common.opaque, ret);
+    qemu_aio_release(acb);
+}
+
+static BlockDriverAIOCB *gow23_aio_readv(BlockDriverState *bs,
+        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
+        BlockDriverCompletionFunc *cb, void *opaque)
+{
+    gow23_aiocb_t *acb = gow23_aio_setup(bs, sector_num, qiov, nb_sectors,
cb,
+            opaque, 0);
+    if (!acb) {
+        return NULL;
+    }
+
+    int ret = gow23_schedule_bh(gow23_aio_rw_bh, acb);
+    if (ret < 0) {
+        if (acb->qiov->niov > 1) {
+            qemu_vfree(acb->orig_ptr);
+        }
+        qemu_aio_release(acb);
+        return NULL;
+    }
+
+    return &acb->common;
+}
+
+static void gow23_aio_write_cb(void *opaque, int ret)
+{
+    gow23_aiocb_t *acb = opaque;
+    BlockDriverState *bs = acb->common.bs;
+    gow23_state_t *s = bs->opaque;
+
+    acb->hd_aiocb = NULL;
+
+    if (ret < 0) {
+        goto done;
+    }
+
+redo:
+    if (acb->left == 0) {
+        /* request completed */
+        ret = 0;
+        goto done;
+    }
+
+    int64_t sector = acb->sector;
+    uint32_t block = BLOCK_NUM(sector);
+    uint32_t offset = BLOCK_OFFSET(sector);
+    uint32_t sectors = BLOCK_SECTORS(offset);
+    if (sectors > acb->left) {
+        sectors = acb->left;
+    }
+
+
+    int new_block = 0;
+    if (s->header->map[block] == GOW_FREE_BLOCK) {
+        /* if it's zero-filled buffer, don't actually allocate */
+        uint32_t *wb;
+        int index;
+        int wlen = ((sectors << 9) / sizeof(uint32_t));
+
+        for (index = 0, wb = (uint32_t *) acb->ptr;
+                index < wlen && *wb == 0; ++index, ++wb) {
+            /* empty */;
+        }
+        if (index >= wlen) {
+            setup_next_block(acb, sectors);
+            goto redo;
+        }
+        /* allocate the block */
+        if (le32_to_cpu(s->header->allocated) >=
le32_to_cpu(s->header->size)) {
+            /* image full */
+            ret = -ENOSPC;
+            goto done;
+        }
+        s->header->map[block] = s->header->allocated;
+        uint32_t allocated = le32_to_cpu(s->header->allocated);
+        s->header->allocated = cpu_to_le32(allocated + 1);
+        new_block = 1;
+    }
+
+    int64_t wr_offset = IMG_OFFSET(block, offset) >> 9;
+    uint8_t *wr_data = acb->ptr;
+    uint32_t wr_len = sectors;
+
+    if (new_block && (sectors < (GOW_BLOCKSIZE/512))) {
+        /* write the full block */
+        wr_offset = IMG_OFFSET(block, 0) >> 9;;
+        wr_data = s->full_block;
+        wr_len = GOW_BLOCKSIZE/512;
+
+        memset(wr_data, 0, GOW_BLOCKSIZE);
+        memcpy(wr_data + offset, acb->ptr, sectors * 512);
+    }
+
+    if (s->is_gow3) {
+        /* update block version */
+        s->header->ver[block] = cpu_to_le32(s->version);
+        if (s->cset) {
+            s->header->carry = cpu_to_le32(s->carry);
+            s->cset = 0;
+        }
+    }
+
+    acb->hd_iov.iov_base = (void *)wr_data;
+    acb->hd_iov.iov_len = wr_len * 512;
+    qemu_iovec_init_external(&acb->hd_qiov, &acb->hd_iov, 1);
+    acb->hd_aiocb = bdrv_aio_writev(bs->file,
+            wr_offset,
+            &acb->hd_qiov, /* wr_data, */
+            wr_len,
+            gow23_aio_write_cb, acb);
+
+    if (acb->hd_aiocb == NULL) {
+        ret = -EIO;
+        goto done;
+    }
+    setup_next_block(acb, sectors);
+    return;
+
+done:
+    if (acb->qiov->niov > 1) {
+        qemu_vfree(acb->orig_ptr);
+    }
+    acb->common.cb(acb->common.opaque, ret);
+    qemu_aio_release(acb);
+}
+
+static BlockDriverAIOCB *gow23_aio_writev(BlockDriverState *bs,
+        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
+        BlockDriverCompletionFunc *cb, void *opaque)
+{
+    gow23_aiocb_t *acb = gow23_aio_setup(bs, sector_num, qiov, nb_sectors,
cb,
+            opaque, 1);
+    if (!acb) {
+        return NULL;
+    }
+
+    int ret = gow23_schedule_bh(gow23_aio_rw_bh, acb);
+    if (ret < 0) {
+        if (acb->qiov->niov > 1) {
+            qemu_vfree(acb->orig_ptr);
+        }
+        qemu_aio_release(acb);
+        return NULL;
+    }
+
+    return &acb->common;
+}
+
+static void gow23_close(BlockDriverState *bs)
+{
+    gow23_state_t *s = bs->opaque;
+
+    munmap((void *) s->header, s->is_gow3 ? sizeof(gow3_header_t)
+            : sizeof(gow2_header_t));
+    if (s->fd) {
+        close(s->fd);
+    }
+    free(s->full_block);
+}
+
+static QEMUOptionParameter gow23_create_options[] = {
+    {
+        .name = BLOCK_OPT_SIZE,
+        .type = OPT_SIZE,
+        .help = "Virtual disk size"
+    },
+    {
+        .name = BLOCK_OPT_BACKING_FILE,
+        .type = OPT_STRING,
+        .help = "File name of a base image"
+    },
+    { NULL }
+};
+
+static BlockDriver bdrv_gow2 = {
+    .format_name        = "gow2",
+    .instance_size      = sizeof(gow23_state_t),
+    .bdrv_probe         = gow2_probe,
+    .bdrv_open          = gow2_open,
+    .bdrv_close         = gow23_close,
+    .bdrv_create        = gow2_create,
+
+    .bdrv_truncate      = gow23_truncate,
+
+    .bdrv_aio_readv     = gow23_aio_readv,
+    .bdrv_aio_writev    = gow23_aio_writev,
+    .bdrv_aio_flush     = gow23_aio_flush,
+    .create_options     = gow23_create_options,
+};
+
+static BlockDriver bdrv_gow3 = {
+    .format_name        = "gow3",
+    .instance_size      = sizeof(gow23_state_t),
+    .bdrv_probe         = gow3_probe,
+    .bdrv_open          = gow3_open,
+    .bdrv_close         = gow23_close,
+    .bdrv_create        = gow3_create,
+
+    .bdrv_truncate      = gow23_truncate,
+