Patchwork [v6,10/20] block: vhdx - add log write support

login
register
mail settings
Submitter Jeff Cody
Date Sept. 25, 2013, 9:02 p.m.
Message ID <c03ab72ff15e66ca3f06066269ce00f64b42b10b.1380141614.git.jcody@redhat.com>
Download mbox | patch
Permalink /patch/278031/
State New
Headers show

Comments

Jeff Cody - Sept. 25, 2013, 9:02 p.m.
This adds support for writing to the VHDX log.

For spec details, see VHDX Specification Format v1.00:
https://www.microsoft.com/en-us/download/details.aspx?id=34750

There are a few limitations to this log support:
1.) There is no caching yet
2.) The log is flushed after each entry

The primary write interface, vhdx_log_write_and_flush(), performs a log
write followed by an immediate flush of the log.

As each log entry sector is a minimum of 4KB, partial sector writes are
filled in with data from the disk write destination.

If the current file log GUID is 0, a new GUID is generated and updated
in the header.

Signed-off-by: Jeff Cody <jcody@redhat.com>
---
 block/vhdx-log.c | 276 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
 block/vhdx.h     |   3 +
 2 files changed, 279 insertions(+)
Stefan Hajnoczi - Oct. 1, 2013, 1:04 p.m.
On Wed, Sep 25, 2013 at 05:02:55PM -0400, Jeff Cody wrote:
> +static int vhdx_log_write(BlockDriverState *bs, BDRVVHDXState *s,
> +                          void *data, uint32_t length, uint64_t offset)
> +{
> +    int ret = 0;
> +    void *buffer = NULL;
> +    void *merged_sector = NULL;
> +    void *data_tmp, *sector_write;
> +    unsigned int i;
> +    int sector_offset;
> +    uint32_t desc_sectors, sectors, total_length;
> +    uint32_t sectors_written = 0;
> +    uint32_t aligned_length;
> +    uint32_t leading_length = 0;
> +    uint32_t trailing_length = 0;
> +    uint32_t partial_sectors = 0;
> +    uint32_t bytes_written = 0;
> +    uint64_t file_offset;
> +    VHDXHeader *header;
> +    VHDXLogEntryHeader new_hdr;
> +    VHDXLogDescriptor *new_desc = NULL;
> +    VHDXLogDataSector *data_sector = NULL;
> +    MSGUID new_guid = { 0 };
> +
> +    header = s->headers[s->curr_header];
> +
> +    /* need to have offset read data, and be on 4096 byte boundary */
> +
> +    if (length > header->log_length) {
> +        /* no log present.  we could create a log here instead of failing */
> +        ret = -EINVAL;
> +        goto exit;
> +    }
> +
> +    if (guid_eq(header->log_guid, zero_guid)) {
> +        vhdx_guid_generate(&new_guid);
> +        vhdx_update_headers(bs, s, false, &new_guid);
> +    } else {
> +        /* currently, we require that the log be flushed after
> +         * every write. */
> +        ret = -ENOTSUP;

Missing goto exit?
Jeff Cody - Oct. 1, 2013, 1:26 p.m.
On Tue, Oct 01, 2013 at 03:04:15PM +0200, Stefan Hajnoczi wrote:
> On Wed, Sep 25, 2013 at 05:02:55PM -0400, Jeff Cody wrote:
> > +static int vhdx_log_write(BlockDriverState *bs, BDRVVHDXState *s,
> > +                          void *data, uint32_t length, uint64_t offset)
> > +{
> > +    int ret = 0;
> > +    void *buffer = NULL;
> > +    void *merged_sector = NULL;
> > +    void *data_tmp, *sector_write;
> > +    unsigned int i;
> > +    int sector_offset;
> > +    uint32_t desc_sectors, sectors, total_length;
> > +    uint32_t sectors_written = 0;
> > +    uint32_t aligned_length;
> > +    uint32_t leading_length = 0;
> > +    uint32_t trailing_length = 0;
> > +    uint32_t partial_sectors = 0;
> > +    uint32_t bytes_written = 0;
> > +    uint64_t file_offset;
> > +    VHDXHeader *header;
> > +    VHDXLogEntryHeader new_hdr;
> > +    VHDXLogDescriptor *new_desc = NULL;
> > +    VHDXLogDataSector *data_sector = NULL;
> > +    MSGUID new_guid = { 0 };
> > +
> > +    header = s->headers[s->curr_header];
> > +
> > +    /* need to have offset read data, and be on 4096 byte boundary */
> > +
> > +    if (length > header->log_length) {
> > +        /* no log present.  we could create a log here instead of failing */
> > +        ret = -EINVAL;
> > +        goto exit;
> > +    }
> > +
> > +    if (guid_eq(header->log_guid, zero_guid)) {
> > +        vhdx_guid_generate(&new_guid);
> > +        vhdx_update_headers(bs, s, false, &new_guid);
> > +    } else {
> > +        /* currently, we require that the log be flushed after
> > +         * every write. */
> > +        ret = -ENOTSUP;
> 
> Missing goto exit?

Oof!  Yes it is, thanks.
Stefan Hajnoczi - Oct. 1, 2013, 1:30 p.m.
On Wed, Sep 25, 2013 at 05:02:55PM -0400, Jeff Cody wrote:
> +/* Perform a log write, and then immediately flush the entire log */
> +int vhdx_log_write_and_flush(BlockDriverState *bs, BDRVVHDXState *s,
> +                             void *data, uint32_t length, uint64_t offset)
> +{
> +    int ret = 0;
> +    VHDXLogSequence logs = { .valid = true,
> +                             .count = 1,
> +                             .hdr = { 0 } };
> +
> +
> +    ret = vhdx_log_write(bs, s, data, length, offset);
> +    if (ret < 0) {
> +        goto exit;
> +    }
> +    logs.log = s->log;

I don't see a bdrv_co_flush() that ensures the log has been persisted
before we begin applying log entries to the file.

Without the flush there is no guarantee that the log entries will be
accessible in case of power failure during vhdx_log_write_and_flush().

Stefan

Patch

diff --git a/block/vhdx-log.c b/block/vhdx-log.c
index 10a87cc..09cb80b 100644
--- a/block/vhdx-log.c
+++ b/block/vhdx-log.c
@@ -156,6 +156,55 @@  exit:
     return ret;
 }
 
+/* Writes num_sectors to the log (all log sectors are 4096 bytes),
+ * from buffer 'buffer'.  Upon return, *sectors_written will contain
+ * the number of sectors successfully written.
+ *
+ * It is assumed that 'buffer' is at least 4096*num_sectors large.
+ *
+ * 0 is returned on success, -errno otherwise */
+static int vhdx_log_write_sectors(BlockDriverState *bs, VHDXLogEntries *log,
+                                  uint32_t *sectors_written, void *buffer,
+                                  uint32_t num_sectors)
+{
+    int ret = 0;
+    uint64_t offset;
+    uint32_t write;
+    void *buffer_tmp;
+    BDRVVHDXState *s = bs->opaque;
+
+    ret = vhdx_user_visible_write(bs, s);
+    if (ret < 0) {
+        goto exit;
+    }
+
+    write = log->write;
+
+    buffer_tmp = buffer;
+    while (num_sectors) {
+
+        offset = log->offset + write;
+        write = vhdx_log_inc_idx(write, log->length);
+        if (write == log->read) {
+            /* full */
+            break;
+        }
+        ret = bdrv_pwrite(bs->file, offset, buffer_tmp, VHDX_LOG_SECTOR_SIZE);
+        if (ret < 0) {
+            goto exit;
+        }
+        buffer_tmp += VHDX_LOG_SECTOR_SIZE;
+
+        log->write = write;
+        *sectors_written = *sectors_written + 1;
+        num_sectors--;
+    }
+
+exit:
+    return ret;
+}
+
+
 /* Validates a log entry header */
 static bool vhdx_log_hdr_is_valid(VHDXLogEntries *log, VHDXLogEntryHeader *hdr,
                                   BDRVVHDXState *s)
@@ -721,3 +770,230 @@  exit:
 }
 
 
+
+static void vhdx_log_raw_to_le_sector(VHDXLogDescriptor *desc,
+                                      VHDXLogDataSector *sector, void *data,
+                                      uint64_t seq)
+{
+    /* 8 + 4084 + 4 = 4096, 1 log sector */
+    memcpy(&desc->leading_bytes, data, 8);
+    data += 8;
+    cpu_to_le64s(&desc->leading_bytes);
+    memcpy(sector->data, data, 4084);
+    data += 4084;
+    memcpy(&desc->trailing_bytes, data, 4);
+    cpu_to_le32s(&desc->trailing_bytes);
+    data += 4;
+
+    sector->sequence_high  = (uint32_t) (seq >> 32);
+    sector->sequence_low   = (uint32_t) (seq & 0xffffffff);
+    sector->data_signature = VHDX_LOG_DATA_SIGNATURE;
+
+    vhdx_log_desc_le_export(desc);
+    vhdx_log_data_le_export(sector);
+}
+
+
+static int vhdx_log_write(BlockDriverState *bs, BDRVVHDXState *s,
+                          void *data, uint32_t length, uint64_t offset)
+{
+    int ret = 0;
+    void *buffer = NULL;
+    void *merged_sector = NULL;
+    void *data_tmp, *sector_write;
+    unsigned int i;
+    int sector_offset;
+    uint32_t desc_sectors, sectors, total_length;
+    uint32_t sectors_written = 0;
+    uint32_t aligned_length;
+    uint32_t leading_length = 0;
+    uint32_t trailing_length = 0;
+    uint32_t partial_sectors = 0;
+    uint32_t bytes_written = 0;
+    uint64_t file_offset;
+    VHDXHeader *header;
+    VHDXLogEntryHeader new_hdr;
+    VHDXLogDescriptor *new_desc = NULL;
+    VHDXLogDataSector *data_sector = NULL;
+    MSGUID new_guid = { 0 };
+
+    header = s->headers[s->curr_header];
+
+    /* need to have offset read data, and be on 4096 byte boundary */
+
+    if (length > header->log_length) {
+        /* no log present.  we could create a log here instead of failing */
+        ret = -EINVAL;
+        goto exit;
+    }
+
+    if (guid_eq(header->log_guid, zero_guid)) {
+        vhdx_guid_generate(&new_guid);
+        vhdx_update_headers(bs, s, false, &new_guid);
+    } else {
+        /* currently, we require that the log be flushed after
+         * every write. */
+        ret = -ENOTSUP;
+    }
+
+    /* 0 is an invalid sequence number, but may also represent the first
+     * log write (or a wrapped seq) */
+    if (s->log.sequence == 0) {
+        s->log.sequence = 1;
+    }
+
+    sector_offset = offset % VHDX_LOG_SECTOR_SIZE;
+    file_offset = (offset / VHDX_LOG_SECTOR_SIZE) * VHDX_LOG_SECTOR_SIZE;
+
+    aligned_length = length;
+
+    /* add in the unaligned head and tail bytes */
+    if (sector_offset) {
+        leading_length = (VHDX_LOG_SECTOR_SIZE - sector_offset);
+        leading_length = leading_length > length ? length : leading_length;
+        aligned_length -= leading_length;
+        partial_sectors++;
+    }
+
+    sectors = aligned_length / VHDX_LOG_SECTOR_SIZE;
+    trailing_length = aligned_length - (sectors * VHDX_LOG_SECTOR_SIZE);
+    if (trailing_length) {
+        partial_sectors++;
+    }
+
+    sectors += partial_sectors;
+
+    /* sectors is now how many sectors the data itself takes, not
+     * including the header and descriptor metadata */
+
+    new_hdr = (VHDXLogEntryHeader) {
+                .signature           = VHDX_LOG_SIGNATURE,
+                .tail                = s->log.tail,
+                .sequence_number     = s->log.sequence,
+                .descriptor_count    = sectors,
+                .reserved            = 0,
+                .flushed_file_offset = bdrv_getlength(bs->file),
+                .last_file_offset    = bdrv_getlength(bs->file),
+              };
+
+    new_hdr.log_guid = header->log_guid;
+
+    desc_sectors = vhdx_compute_desc_sectors(new_hdr.descriptor_count);
+
+    total_length = (desc_sectors + sectors) * VHDX_LOG_SECTOR_SIZE;
+    new_hdr.entry_length = total_length;
+
+    vhdx_log_entry_hdr_le_export(&new_hdr);
+
+    buffer = qemu_blockalign(bs, total_length);
+    memcpy(buffer, &new_hdr, sizeof(new_hdr));
+
+    new_desc = (VHDXLogDescriptor *) (buffer + sizeof(new_hdr));
+    data_sector = buffer + (desc_sectors * VHDX_LOG_SECTOR_SIZE);
+    data_tmp = data;
+
+    /* All log sectors are 4KB, so for any partial sectors we must
+     * merge the data with preexisting data from the final file
+     * destination */
+    merged_sector = qemu_blockalign(bs, VHDX_LOG_SECTOR_SIZE);
+
+    for (i = 0; i < sectors; i++) {
+        new_desc->signature       = VHDX_LOG_DESC_SIGNATURE;
+        new_desc->sequence_number = s->log.sequence;
+        new_desc->file_offset     = file_offset;
+
+        if (i == 0 && leading_length) {
+            /* partial sector at the front of the buffer */
+            ret = bdrv_pread(bs->file, file_offset, merged_sector,
+                             VHDX_LOG_SECTOR_SIZE);
+            if (ret < 0) {
+                goto exit;
+            }
+            memcpy(merged_sector + sector_offset, data_tmp, leading_length);
+            bytes_written = leading_length;
+            sector_write = merged_sector;
+        } else if (i == sectors - 1 && trailing_length) {
+            /* partial sector at the end of the buffer */
+            ret = bdrv_pread(bs->file,
+                            file_offset,
+                            merged_sector + trailing_length,
+                            VHDX_LOG_SECTOR_SIZE - trailing_length);
+            if (ret < 0) {
+                goto exit;
+            }
+            memcpy(merged_sector, data_tmp, trailing_length);
+            bytes_written = trailing_length;
+            sector_write = merged_sector;
+        } else {
+            bytes_written = VHDX_LOG_SECTOR_SIZE;
+            sector_write = data_tmp;
+        }
+
+        /* populate the raw sector data into the proper structures,
+         * as well as update the descriptor, and convert to proper
+         * endianness */
+        vhdx_log_raw_to_le_sector(new_desc, data_sector, sector_write,
+                                  s->log.sequence);
+
+        data_tmp += bytes_written;
+        data_sector++;
+        new_desc++;
+        file_offset += VHDX_LOG_SECTOR_SIZE;
+    }
+
+    /* checksum covers entire entry, from the log header through the
+     * last data sector */
+    vhdx_update_checksum(buffer, total_length,
+                         offsetof(VHDXLogEntryHeader, checksum));
+    cpu_to_le32s((uint32_t *)(buffer + 4));
+
+    /* now write to the log */
+    vhdx_log_write_sectors(bs, &s->log, &sectors_written, buffer,
+                           desc_sectors + sectors);
+    if (ret < 0) {
+        goto exit;
+    }
+
+    if (sectors_written != desc_sectors + sectors) {
+        /* instead of failing, we could flush the log here */
+        ret = -EINVAL;
+        goto exit;
+    }
+
+    s->log.sequence++;
+    /* write new tail */
+    s->log.tail = s->log.write;
+
+exit:
+    qemu_vfree(buffer);
+    qemu_vfree(merged_sector);
+    return ret;
+}
+
+/* Perform a log write, and then immediately flush the entire log */
+int vhdx_log_write_and_flush(BlockDriverState *bs, BDRVVHDXState *s,
+                             void *data, uint32_t length, uint64_t offset)
+{
+    int ret = 0;
+    VHDXLogSequence logs = { .valid = true,
+                             .count = 1,
+                             .hdr = { 0 } };
+
+
+    ret = vhdx_log_write(bs, s, data, length, offset);
+    if (ret < 0) {
+        goto exit;
+    }
+    logs.log = s->log;
+
+    ret = vhdx_log_flush(bs, s, &logs);
+    if (ret < 0) {
+        goto exit;
+    }
+
+    s->log = logs.log;
+
+exit:
+    return ret;
+}
+
diff --git a/block/vhdx.h b/block/vhdx.h
index 831aa13..4f32c01 100644
--- a/block/vhdx.h
+++ b/block/vhdx.h
@@ -396,6 +396,9 @@  bool vhdx_checksum_is_valid(uint8_t *buf, size_t size, int crc_offset);
 
 int vhdx_parse_log(BlockDriverState *bs, BDRVVHDXState *s, bool *flushed);
 
+int vhdx_log_write_and_flush(BlockDriverState *bs, BDRVVHDXState *s,
+                             void *data, uint32_t length, uint64_t offset);
+
 static inline void leguid_to_cpus(MSGUID *guid)
 {
     le32_to_cpus(&guid->data1);