Patchwork [8/9] block: vhdx - add log write support

login
register
mail settings
Submitter Jeff Cody
Date July 24, 2013, 5:54 p.m.
Message ID <4a2d211444689a51f9e85bb8b2975e5551442ab9.1374687002.git.jcody@redhat.com>
Download mbox | patch
Permalink /patch/261492/
State New
Headers show

Comments

Jeff Cody - July 24, 2013, 5:54 p.m.
This adds support for writing to the VHDX log.

For spec details, see VHDX Specification Format v1.00:
https://www.microsoft.com/en-us/download/details.aspx?id=34750

There are a few limitations to this log support:
1.) There is no caching yet
2.) The log is flushed after each entry

The primary write interface, vhdx_log_write_and_flush(), performs a log
write followed by an immediate flush of the log.

As each log entry sector is a minimum of 4KB, partial sector writes are
filled in with data from the disk write destination.

If the current file log GUID is 0, a new GUID is generated and updated
in the header.

Signed-off-by: Jeff Cody <jcody@redhat.com>
---
 block/vhdx-log.c | 273 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
 block/vhdx.h     |   3 +
 2 files changed, 276 insertions(+)
Fam Zheng - July 30, 2013, 3:57 a.m.
On Wed, 07/24 13:54, Jeff Cody wrote:
> This adds support for writing to the VHDX log.
> 
> For spec details, see VHDX Specification Format v1.00:
> https://www.microsoft.com/en-us/download/details.aspx?id=34750
> 
> There are a few limitations to this log support:
> 1.) There is no caching yet
> 2.) The log is flushed after each entry
> 
> The primary write interface, vhdx_log_write_and_flush(), performs a log
> write followed by an immediate flush of the log.
> 
> As each log entry sector is a minimum of 4KB, partial sector writes are
> filled in with data from the disk write destination.
> 
> If the current file log GUID is 0, a new GUID is generated and updated
> in the header.
> 
> Signed-off-by: Jeff Cody <jcody@redhat.com>
> ---
>  block/vhdx-log.c | 273 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
>  block/vhdx.h     |   3 +
>  2 files changed, 276 insertions(+)
> 
> diff --git a/block/vhdx-log.c b/block/vhdx-log.c
> index 89b9000..786b393 100644
> --- a/block/vhdx-log.c
> +++ b/block/vhdx-log.c
> @@ -170,6 +170,53 @@ exit:
>      return ret;
>  }
>  
> +/* Writes num_sectors to the log (all log sectors are 4096 bytes),
> + * from buffer 'buffer'.  Upon return, *sectors_written will contain
> + * the number of sectors successfully written.
> + *
> + * It is assumed that 'buffer' is at least 4096*num_sectors large.
> + *
> + * 0 is returned on success, -errno otherwise */
> +static int vhdx_log_write_sectors(BlockDriverState *bs, VHDXLogEntries *log,
> +                                  uint32_t *sectors_written, void *buffer,
> +                                  uint32_t num_sectors)
> +{
> +    int ret = 0;
> +    uint64_t offset;
> +    uint32_t write;
> +    void *buffer_tmp;
> +    BDRVVHDXState *s = bs->opaque;
> +
> +    vhdx_user_visible_write(bs, s);
> +
> +    write = log->write;
> +
> +    buffer_tmp = buffer;
> +    while (num_sectors) {
> +
> +        offset = log->offset + write;
> +        write = vhdx_log_inc_idx(write, log->length);
> +        if (write == log->read) {
> +            /* full */
> +            break;
> +        }
> +        ret = bdrv_pwrite_sync(bs->file, offset, buffer_tmp,
> +                               VHDX_LOG_SECTOR_SIZE);
> +        if (ret < 0) {
> +            goto exit;
> +        }
> +        buffer_tmp += VHDX_LOG_SECTOR_SIZE;
> +
> +        log->write = write;
> +        *sectors_written = *sectors_written + 1;
> +        num_sectors--;
> +    }
> +
> +exit:
> +    return ret;
> +}
> +
> +
>  /* Validates a log entry header */
>  static bool vhdx_log_hdr_is_valid(VHDXLogEntries *log, VHDXLogEntryHeader *hdr,
>                                    BDRVVHDXState *s)
> @@ -732,3 +779,229 @@ exit:
>      return ret;
>  }
>  
> +
> +
> +static void vhdx_log_raw_to_le_sector(VHDXLogDescriptor *desc,
> +                                      VHDXLogDataSector *sector, void *data,
> +                                      uint64_t seq)
> +{
> +    memcpy(&desc->leading_bytes, data, 8);
> +    data += 8;
> +    cpu_to_le64s(&desc->leading_bytes);
> +    memcpy(sector->data, data, 4084);
> +    data += 4084;
> +    memcpy(&desc->trailing_bytes, data, 4);
> +    cpu_to_le32s(&desc->trailing_bytes);
> +    data += 4;
> +
> +    sector->sequence_high  = (uint32_t) (seq >> 32);
> +    sector->sequence_low   = (uint32_t) (seq & 0xffffffff);
> +    sector->data_signature = VHDX_LOG_DATA_SIGNATURE;
> +
> +    vhdx_log_desc_le_export(desc);
> +    vhdx_log_data_le_export(sector);
> +}
> +
> +
> +static int vhdx_log_write(BlockDriverState *bs, BDRVVHDXState *s,
> +                          void *data, uint32_t length, uint64_t offset)
> +{
> +    int ret = 0;
> +    void *buffer = NULL;
> +    void *merged_sector = NULL;
> +    void *data_tmp, *sector_write;
> +    unsigned int i;
> +    int sector_offset;
> +    uint32_t desc_sectors, sectors, total_length;
> +    uint32_t sectors_written = 0;
> +    uint32_t aligned_length;
> +    uint32_t leading_length = 0;
> +    uint32_t trailing_length = 0;
> +    uint32_t partial_sectors = 0;
> +    uint32_t bytes_written = 0;
> +    uint64_t file_offset;
> +    VHDXHeader *header;
> +    VHDXLogEntryHeader new_hdr;
> +    VHDXLogDescriptor *new_desc = NULL;
> +    VHDXLogDataSector *data_sector = NULL;
> +    MSGUID new_guid = { 0 };
> +
> +    header = s->headers[s->curr_header];
> +
> +    /* need to have offset read data, and be on 4096 byte boundary */
> +
> +    if (length > header->log_length) {
> +        /* no log present.  we could create a log here instead of failing */

Does newly created vhdx have allocated log sectors?

> +        ret = -EINVAL;
> +        goto exit;
> +    }
> +
> +    if (vhdx_log_guid_is_zero(&header->log_guid)) {
> +        vhdx_guid_generate(&new_guid);
> +        vhdx_update_headers(bs, s, false, &new_guid);
> +    } else {
> +        /* currently, we require that the log be flushed after
> +         * every write. */
> +        ret = -ENOTSUP;

Can we make an assertion here?

> +    }
> +
> +    /* 0 is an invalid sequence number, but may also represent the first
> +     * log write (or a wrapped seq) */
> +    if (s->log.sequence == 0) {
> +        s->log.sequence = 1;
> +    }
> +
> +    sector_offset = offset % VHDX_LOG_SECTOR_SIZE;
> +    file_offset = (offset / VHDX_LOG_SECTOR_SIZE) * VHDX_LOG_SECTOR_SIZE;
> +
> +    aligned_length = length;
> +
> +    /* add in the unaligned head and tail bytes */
> +    if (sector_offset) {
> +        leading_length = (VHDX_LOG_SECTOR_SIZE - sector_offset);
> +        leading_length = leading_length > length ? length : leading_length;
> +        aligned_length -= leading_length;
> +        partial_sectors++;
> +    }
> +
> +    sectors = aligned_length / VHDX_LOG_SECTOR_SIZE;
> +    trailing_length = aligned_length - (sectors * VHDX_LOG_SECTOR_SIZE);
> +    if (trailing_length) {
> +        partial_sectors++;
> +    }
> +
> +    sectors += partial_sectors;
> +
> +    /* sectors is now how many sectors the data itself takes, not
> +     * including the header and descriptor metadata */
> +
> +    new_hdr = (VHDXLogEntryHeader) {
> +                .signature           = VHDX_LOG_SIGNATURE,
> +                .tail                = s->log.tail,
> +                .sequence_number     = s->log.sequence,
> +                .descriptor_count    = sectors,
> +                .reserved            = 0,
> +                .flushed_file_offset = bdrv_getlength(bs->file),
> +                .last_file_offset    = bdrv_getlength(bs->file),
> +              };
> +
> +    memcpy(&new_hdr.log_guid, &header->log_guid, sizeof(MSGUID));
> +
> +    desc_sectors = vhdx_compute_desc_sectors(new_hdr.descriptor_count);
> +
> +    total_length = (desc_sectors + sectors) * VHDX_LOG_SECTOR_SIZE;
> +    new_hdr.entry_length = total_length;
> +
> +    vhdx_log_entry_hdr_le_export(&new_hdr);
> +
> +    buffer = qemu_blockalign(bs, total_length);
> +    memcpy(buffer, &new_hdr, sizeof(new_hdr));
> +
> +    new_desc = (VHDXLogDescriptor *) (buffer + sizeof(new_hdr));
> +    data_sector = buffer + (desc_sectors * VHDX_LOG_SECTOR_SIZE);
> +    data_tmp = data;
> +
> +    /* All log sectors are 4KB, so for any partial sectors we must
> +     * merge the data with preexisting data from the final file
> +     * destination */
> +    merged_sector = qemu_blockalign(bs, VHDX_LOG_SECTOR_SIZE);
> +
> +    for (i = 0; i < sectors; i++) {
> +        new_desc->signature       = VHDX_LOG_DESC_SIGNATURE;
> +        new_desc->sequence_number = s->log.sequence;
> +        new_desc->file_offset     = file_offset;
> +
> +        if (i == 0 && leading_length) {
> +            /* partial sector at the front of the buffer */
> +            ret = bdrv_pread(bs->file, file_offset, merged_sector,
> +                             VHDX_LOG_SECTOR_SIZE);
> +            if (ret < 0) {
> +                goto exit;
> +            }
> +            memcpy(merged_sector + sector_offset, data_tmp, leading_length);
> +            bytes_written = leading_length;
> +            sector_write = merged_sector;
> +        } else if (i == sectors - 1 && trailing_length) {
> +            /* partial sector at the end of the buffer */
> +            ret = bdrv_pread(bs->file,
> +                            file_offset,
> +                            merged_sector + trailing_length,
> +                            VHDX_LOG_SECTOR_SIZE - trailing_length);
> +            if (ret < 0) {
> +                goto exit;
> +            }
> +            memcpy(merged_sector, data_tmp, trailing_length);
> +            bytes_written = trailing_length;
> +            sector_write = merged_sector;
> +        } else {
> +            bytes_written = VHDX_LOG_SECTOR_SIZE;
> +            sector_write = data_tmp;
> +        }
> +
> +        /* populate the raw sector data into the proper structures,
> +         * as well as update the descriptor, and convert to proper
> +         * endianness */
> +        vhdx_log_raw_to_le_sector(new_desc, data_sector, sector_write,
> +                                  s->log.sequence);
> +
> +        data_tmp += bytes_written;
> +        data_sector++;
> +        new_desc++;
> +        file_offset += VHDX_LOG_SECTOR_SIZE;
> +    }
> +
> +    /* checksum covers entire entry, from the log header through the
> +     * last data sector */
> +    vhdx_update_checksum(buffer, total_length, 4);
> +    cpu_to_le32s((uint32_t *)(buffer + 4));
> +
> +    /* now write to the log */
> +    vhdx_log_write_sectors(bs, &s->log, &sectors_written, buffer,
> +                           desc_sectors + sectors);
> +    if (ret < 0) {
> +        goto exit;
> +    }
> +
> +    if (sectors_written != desc_sectors + sectors) {
> +        /* instead of failing, we could flush the log here */
> +        ret = -EINVAL;
> +        goto exit;
> +    }
> +
> +    s->log.sequence++;
> +    /* write new tail */
> +    s->log.tail = s->log.write;
> +
> +exit:
> +    qemu_vfree(buffer);
> +    qemu_vfree(merged_sector);
> +    return ret;
> +}
> +
> +/* Perform a log write, and then immediately flush the entire log */
> +int vhdx_log_write_and_flush(BlockDriverState *bs, BDRVVHDXState *s,
> +                             void *data, uint32_t length, uint64_t offset)
> +{
> +    int ret = 0;
> +    VHDXLogSequence logs = { .valid = true,
> +                             .count = 1,
> +                             .hdr = { 0 } };
> +
> +
> +    ret = vhdx_log_write(bs, s, data, length, offset);
> +    if (ret < 0) {
> +        goto exit;
> +    }
> +    logs.log = s->log;
> +
> +    ret = vhdx_log_flush(bs, s, &logs);
> +    if (ret < 0) {
> +        goto exit;
> +    }
> +
> +    s->log = logs.log;
> +
> +exit:
> +    return ret;
> +}
> +
> diff --git a/block/vhdx.h b/block/vhdx.h
> index 24b126e..b210efc 100644
> --- a/block/vhdx.h
> +++ b/block/vhdx.h
> @@ -393,6 +393,9 @@ bool vhdx_checksum_is_valid(uint8_t *buf, size_t size, int crc_offset);
>  
>  int vhdx_parse_log(BlockDriverState *bs, BDRVVHDXState *s);
>  
> +int vhdx_log_write_and_flush(BlockDriverState *bs, BDRVVHDXState *s,
> +                             void *data, uint32_t length, uint64_t offset);
> +
>  static inline void leguid_to_cpus(MSGUID *guid)
>  {
>      le32_to_cpus(&guid->data1);
> -- 
> 1.8.1.4
> 
>
Jeff Cody - July 30, 2013, 2:11 p.m.
On Tue, Jul 30, 2013 at 11:57:20AM +0800, Fam Zheng wrote:
> On Wed, 07/24 13:54, Jeff Cody wrote:
> > This adds support for writing to the VHDX log.
> > 
> > For spec details, see VHDX Specification Format v1.00:
> > https://www.microsoft.com/en-us/download/details.aspx?id=34750
> > 
> > There are a few limitations to this log support:
> > 1.) There is no caching yet
> > 2.) The log is flushed after each entry
> > 
> > The primary write interface, vhdx_log_write_and_flush(), performs a log
> > write followed by an immediate flush of the log.
> > 
> > As each log entry sector is a minimum of 4KB, partial sector writes are
> > filled in with data from the disk write destination.
> > 
> > If the current file log GUID is 0, a new GUID is generated and updated
> > in the header.
> > 
> > Signed-off-by: Jeff Cody <jcody@redhat.com>
> > ---
> >  block/vhdx-log.c | 273 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
> >  block/vhdx.h     |   3 +
> >  2 files changed, 276 insertions(+)
> > 
> > diff --git a/block/vhdx-log.c b/block/vhdx-log.c
> > index 89b9000..786b393 100644
> > --- a/block/vhdx-log.c
> > +++ b/block/vhdx-log.c
> > @@ -170,6 +170,53 @@ exit:
> >      return ret;
> >  }
> >  
> > +/* Writes num_sectors to the log (all log sectors are 4096 bytes),
> > + * from buffer 'buffer'.  Upon return, *sectors_written will contain
> > + * the number of sectors successfully written.
> > + *
> > + * It is assumed that 'buffer' is at least 4096*num_sectors large.
> > + *
> > + * 0 is returned on success, -errno otherwise */
> > +static int vhdx_log_write_sectors(BlockDriverState *bs, VHDXLogEntries *log,
> > +                                  uint32_t *sectors_written, void *buffer,
> > +                                  uint32_t num_sectors)
> > +{
> > +    int ret = 0;
> > +    uint64_t offset;
> > +    uint32_t write;
> > +    void *buffer_tmp;
> > +    BDRVVHDXState *s = bs->opaque;
> > +
> > +    vhdx_user_visible_write(bs, s);
> > +
> > +    write = log->write;
> > +
> > +    buffer_tmp = buffer;
> > +    while (num_sectors) {
> > +
> > +        offset = log->offset + write;
> > +        write = vhdx_log_inc_idx(write, log->length);
> > +        if (write == log->read) {
> > +            /* full */
> > +            break;
> > +        }
> > +        ret = bdrv_pwrite_sync(bs->file, offset, buffer_tmp,
> > +                               VHDX_LOG_SECTOR_SIZE);
> > +        if (ret < 0) {
> > +            goto exit;
> > +        }
> > +        buffer_tmp += VHDX_LOG_SECTOR_SIZE;
> > +
> > +        log->write = write;
> > +        *sectors_written = *sectors_written + 1;
> > +        num_sectors--;
> > +    }
> > +
> > +exit:
> > +    return ret;
> > +}
> > +
> > +
> >  /* Validates a log entry header */
> >  static bool vhdx_log_hdr_is_valid(VHDXLogEntries *log, VHDXLogEntryHeader *hdr,
> >                                    BDRVVHDXState *s)
> > @@ -732,3 +779,229 @@ exit:
> >      return ret;
> >  }
> >  
> > +
> > +
> > +static void vhdx_log_raw_to_le_sector(VHDXLogDescriptor *desc,
> > +                                      VHDXLogDataSector *sector, void *data,
> > +                                      uint64_t seq)
> > +{
> > +    memcpy(&desc->leading_bytes, data, 8);
> > +    data += 8;
> > +    cpu_to_le64s(&desc->leading_bytes);
> > +    memcpy(sector->data, data, 4084);
> > +    data += 4084;
> > +    memcpy(&desc->trailing_bytes, data, 4);
> > +    cpu_to_le32s(&desc->trailing_bytes);
> > +    data += 4;
> > +
> > +    sector->sequence_high  = (uint32_t) (seq >> 32);
> > +    sector->sequence_low   = (uint32_t) (seq & 0xffffffff);
> > +    sector->data_signature = VHDX_LOG_DATA_SIGNATURE;
> > +
> > +    vhdx_log_desc_le_export(desc);
> > +    vhdx_log_data_le_export(sector);
> > +}
> > +
> > +
> > +static int vhdx_log_write(BlockDriverState *bs, BDRVVHDXState *s,
> > +                          void *data, uint32_t length, uint64_t offset)
> > +{
> > +    int ret = 0;
> > +    void *buffer = NULL;
> > +    void *merged_sector = NULL;
> > +    void *data_tmp, *sector_write;
> > +    unsigned int i;
> > +    int sector_offset;
> > +    uint32_t desc_sectors, sectors, total_length;
> > +    uint32_t sectors_written = 0;
> > +    uint32_t aligned_length;
> > +    uint32_t leading_length = 0;
> > +    uint32_t trailing_length = 0;
> > +    uint32_t partial_sectors = 0;
> > +    uint32_t bytes_written = 0;
> > +    uint64_t file_offset;
> > +    VHDXHeader *header;
> > +    VHDXLogEntryHeader new_hdr;
> > +    VHDXLogDescriptor *new_desc = NULL;
> > +    VHDXLogDataSector *data_sector = NULL;
> > +    MSGUID new_guid = { 0 };
> > +
> > +    header = s->headers[s->curr_header];
> > +
> > +    /* need to have offset read data, and be on 4096 byte boundary */
> > +
> > +    if (length > header->log_length) {
> > +        /* no log present.  we could create a log here instead of failing */
> 
> Does newly created vhdx have allocated log sectors?
> 

I don't know of any way to make Hyper-V create a file without an
allocated log area (I believe with the files I've generated, it
allocates a 1MB log between the header and the BAT region).

The spec says that "LogLength" in the header should be a multiple of
1MB.  And technically, 0 is a multiple of every number, so when
parsing the header I don't fail out on a zero-lengthed log.  In
practice, I don't think Hyper-V creates files with zero-length logs,
but I don't think the spec rules it out.

So we could either allocate a log in the file at this point, or fail.

> > +        ret = -EINVAL;
> > +        goto exit;
> > +    }
> > +
> > +    if (vhdx_log_guid_is_zero(&header->log_guid)) {
> > +        vhdx_guid_generate(&new_guid);
> > +        vhdx_update_headers(bs, s, false, &new_guid);
> > +    } else {
> > +        /* currently, we require that the log be flushed after
> > +         * every write. */
> > +        ret = -ENOTSUP;
> 
> Can we make an assertion here?
>

I don't know if we should assert here - the VM could certainly
continue on if this is not the primary drive.

> > +    }
> > +
> > +    /* 0 is an invalid sequence number, but may also represent the first
> > +     * log write (or a wrapped seq) */
> > +    if (s->log.sequence == 0) {
> > +        s->log.sequence = 1;
> > +    }
> > +
> > +    sector_offset = offset % VHDX_LOG_SECTOR_SIZE;
> > +    file_offset = (offset / VHDX_LOG_SECTOR_SIZE) * VHDX_LOG_SECTOR_SIZE;
> > +
> > +    aligned_length = length;
> > +
> > +    /* add in the unaligned head and tail bytes */
> > +    if (sector_offset) {
> > +        leading_length = (VHDX_LOG_SECTOR_SIZE - sector_offset);
> > +        leading_length = leading_length > length ? length : leading_length;
> > +        aligned_length -= leading_length;
> > +        partial_sectors++;
> > +    }
> > +
> > +    sectors = aligned_length / VHDX_LOG_SECTOR_SIZE;
> > +    trailing_length = aligned_length - (sectors * VHDX_LOG_SECTOR_SIZE);
> > +    if (trailing_length) {
> > +        partial_sectors++;
> > +    }
> > +
> > +    sectors += partial_sectors;
> > +
> > +    /* sectors is now how many sectors the data itself takes, not
> > +     * including the header and descriptor metadata */
> > +
> > +    new_hdr = (VHDXLogEntryHeader) {
> > +                .signature           = VHDX_LOG_SIGNATURE,
> > +                .tail                = s->log.tail,
> > +                .sequence_number     = s->log.sequence,
> > +                .descriptor_count    = sectors,
> > +                .reserved            = 0,
> > +                .flushed_file_offset = bdrv_getlength(bs->file),
> > +                .last_file_offset    = bdrv_getlength(bs->file),
> > +              };
> > +
> > +    memcpy(&new_hdr.log_guid, &header->log_guid, sizeof(MSGUID));
> > +
> > +    desc_sectors = vhdx_compute_desc_sectors(new_hdr.descriptor_count);
> > +
> > +    total_length = (desc_sectors + sectors) * VHDX_LOG_SECTOR_SIZE;
> > +    new_hdr.entry_length = total_length;
> > +
> > +    vhdx_log_entry_hdr_le_export(&new_hdr);
> > +
> > +    buffer = qemu_blockalign(bs, total_length);
> > +    memcpy(buffer, &new_hdr, sizeof(new_hdr));
> > +
> > +    new_desc = (VHDXLogDescriptor *) (buffer + sizeof(new_hdr));
> > +    data_sector = buffer + (desc_sectors * VHDX_LOG_SECTOR_SIZE);
> > +    data_tmp = data;
> > +
> > +    /* All log sectors are 4KB, so for any partial sectors we must
> > +     * merge the data with preexisting data from the final file
> > +     * destination */
> > +    merged_sector = qemu_blockalign(bs, VHDX_LOG_SECTOR_SIZE);
> > +
> > +    for (i = 0; i < sectors; i++) {
> > +        new_desc->signature       = VHDX_LOG_DESC_SIGNATURE;
> > +        new_desc->sequence_number = s->log.sequence;
> > +        new_desc->file_offset     = file_offset;
> > +
> > +        if (i == 0 && leading_length) {
> > +            /* partial sector at the front of the buffer */
> > +            ret = bdrv_pread(bs->file, file_offset, merged_sector,
> > +                             VHDX_LOG_SECTOR_SIZE);
> > +            if (ret < 0) {
> > +                goto exit;
> > +            }
> > +            memcpy(merged_sector + sector_offset, data_tmp, leading_length);
> > +            bytes_written = leading_length;
> > +            sector_write = merged_sector;
> > +        } else if (i == sectors - 1 && trailing_length) {
> > +            /* partial sector at the end of the buffer */
> > +            ret = bdrv_pread(bs->file,
> > +                            file_offset,
> > +                            merged_sector + trailing_length,
> > +                            VHDX_LOG_SECTOR_SIZE - trailing_length);
> > +            if (ret < 0) {
> > +                goto exit;
> > +            }
> > +            memcpy(merged_sector, data_tmp, trailing_length);
> > +            bytes_written = trailing_length;
> > +            sector_write = merged_sector;
> > +        } else {
> > +            bytes_written = VHDX_LOG_SECTOR_SIZE;
> > +            sector_write = data_tmp;
> > +        }
> > +
> > +        /* populate the raw sector data into the proper structures,
> > +         * as well as update the descriptor, and convert to proper
> > +         * endianness */
> > +        vhdx_log_raw_to_le_sector(new_desc, data_sector, sector_write,
> > +                                  s->log.sequence);
> > +
> > +        data_tmp += bytes_written;
> > +        data_sector++;
> > +        new_desc++;
> > +        file_offset += VHDX_LOG_SECTOR_SIZE;
> > +    }
> > +
> > +    /* checksum covers entire entry, from the log header through the
> > +     * last data sector */
> > +    vhdx_update_checksum(buffer, total_length, 4);
> > +    cpu_to_le32s((uint32_t *)(buffer + 4));
> > +
> > +    /* now write to the log */
> > +    vhdx_log_write_sectors(bs, &s->log, &sectors_written, buffer,
> > +                           desc_sectors + sectors);
> > +    if (ret < 0) {
> > +        goto exit;
> > +    }
> > +
> > +    if (sectors_written != desc_sectors + sectors) {
> > +        /* instead of failing, we could flush the log here */
> > +        ret = -EINVAL;
> > +        goto exit;
> > +    }
> > +
> > +    s->log.sequence++;
> > +    /* write new tail */
> > +    s->log.tail = s->log.write;
> > +
> > +exit:
> > +    qemu_vfree(buffer);
> > +    qemu_vfree(merged_sector);
> > +    return ret;
> > +}
> > +
> > +/* Perform a log write, and then immediately flush the entire log */
> > +int vhdx_log_write_and_flush(BlockDriverState *bs, BDRVVHDXState *s,
> > +                             void *data, uint32_t length, uint64_t offset)
> > +{
> > +    int ret = 0;
> > +    VHDXLogSequence logs = { .valid = true,
> > +                             .count = 1,
> > +                             .hdr = { 0 } };
> > +
> > +
> > +    ret = vhdx_log_write(bs, s, data, length, offset);
> > +    if (ret < 0) {
> > +        goto exit;
> > +    }
> > +    logs.log = s->log;
> > +
> > +    ret = vhdx_log_flush(bs, s, &logs);
> > +    if (ret < 0) {
> > +        goto exit;
> > +    }
> > +
> > +    s->log = logs.log;
> > +
> > +exit:
> > +    return ret;
> > +}
> > +
> > diff --git a/block/vhdx.h b/block/vhdx.h
> > index 24b126e..b210efc 100644
> > --- a/block/vhdx.h
> > +++ b/block/vhdx.h
> > @@ -393,6 +393,9 @@ bool vhdx_checksum_is_valid(uint8_t *buf, size_t size, int crc_offset);
> >  
> >  int vhdx_parse_log(BlockDriverState *bs, BDRVVHDXState *s);
> >  
> > +int vhdx_log_write_and_flush(BlockDriverState *bs, BDRVVHDXState *s,
> > +                             void *data, uint32_t length, uint64_t offset);
> > +
> >  static inline void leguid_to_cpus(MSGUID *guid)
> >  {
> >      le32_to_cpus(&guid->data1);
> > -- 
> > 1.8.1.4
> > 
> > 
> 
> -- 
> Fam

Patch

diff --git a/block/vhdx-log.c b/block/vhdx-log.c
index 89b9000..786b393 100644
--- a/block/vhdx-log.c
+++ b/block/vhdx-log.c
@@ -170,6 +170,53 @@  exit:
     return ret;
 }
 
+/* Writes num_sectors to the log (all log sectors are 4096 bytes),
+ * from buffer 'buffer'.  Upon return, *sectors_written will contain
+ * the number of sectors successfully written.
+ *
+ * It is assumed that 'buffer' is at least 4096*num_sectors large.
+ *
+ * 0 is returned on success, -errno otherwise */
+static int vhdx_log_write_sectors(BlockDriverState *bs, VHDXLogEntries *log,
+                                  uint32_t *sectors_written, void *buffer,
+                                  uint32_t num_sectors)
+{
+    int ret = 0;
+    uint64_t offset;
+    uint32_t write;
+    void *buffer_tmp;
+    BDRVVHDXState *s = bs->opaque;
+
+    vhdx_user_visible_write(bs, s);
+
+    write = log->write;
+
+    buffer_tmp = buffer;
+    while (num_sectors) {
+
+        offset = log->offset + write;
+        write = vhdx_log_inc_idx(write, log->length);
+        if (write == log->read) {
+            /* full */
+            break;
+        }
+        ret = bdrv_pwrite_sync(bs->file, offset, buffer_tmp,
+                               VHDX_LOG_SECTOR_SIZE);
+        if (ret < 0) {
+            goto exit;
+        }
+        buffer_tmp += VHDX_LOG_SECTOR_SIZE;
+
+        log->write = write;
+        *sectors_written = *sectors_written + 1;
+        num_sectors--;
+    }
+
+exit:
+    return ret;
+}
+
+
 /* Validates a log entry header */
 static bool vhdx_log_hdr_is_valid(VHDXLogEntries *log, VHDXLogEntryHeader *hdr,
                                   BDRVVHDXState *s)
@@ -732,3 +779,229 @@  exit:
     return ret;
 }
 
+
+
+static void vhdx_log_raw_to_le_sector(VHDXLogDescriptor *desc,
+                                      VHDXLogDataSector *sector, void *data,
+                                      uint64_t seq)
+{
+    memcpy(&desc->leading_bytes, data, 8);
+    data += 8;
+    cpu_to_le64s(&desc->leading_bytes);
+    memcpy(sector->data, data, 4084);
+    data += 4084;
+    memcpy(&desc->trailing_bytes, data, 4);
+    cpu_to_le32s(&desc->trailing_bytes);
+    data += 4;
+
+    sector->sequence_high  = (uint32_t) (seq >> 32);
+    sector->sequence_low   = (uint32_t) (seq & 0xffffffff);
+    sector->data_signature = VHDX_LOG_DATA_SIGNATURE;
+
+    vhdx_log_desc_le_export(desc);
+    vhdx_log_data_le_export(sector);
+}
+
+
+static int vhdx_log_write(BlockDriverState *bs, BDRVVHDXState *s,
+                          void *data, uint32_t length, uint64_t offset)
+{
+    int ret = 0;
+    void *buffer = NULL;
+    void *merged_sector = NULL;
+    void *data_tmp, *sector_write;
+    unsigned int i;
+    int sector_offset;
+    uint32_t desc_sectors, sectors, total_length;
+    uint32_t sectors_written = 0;
+    uint32_t aligned_length;
+    uint32_t leading_length = 0;
+    uint32_t trailing_length = 0;
+    uint32_t partial_sectors = 0;
+    uint32_t bytes_written = 0;
+    uint64_t file_offset;
+    VHDXHeader *header;
+    VHDXLogEntryHeader new_hdr;
+    VHDXLogDescriptor *new_desc = NULL;
+    VHDXLogDataSector *data_sector = NULL;
+    MSGUID new_guid = { 0 };
+
+    header = s->headers[s->curr_header];
+
+    /* need to have offset read data, and be on 4096 byte boundary */
+
+    if (length > header->log_length) {
+        /* no log present.  we could create a log here instead of failing */
+        ret = -EINVAL;
+        goto exit;
+    }
+
+    if (vhdx_log_guid_is_zero(&header->log_guid)) {
+        vhdx_guid_generate(&new_guid);
+        vhdx_update_headers(bs, s, false, &new_guid);
+    } else {
+        /* currently, we require that the log be flushed after
+         * every write. */
+        ret = -ENOTSUP;
+    }
+
+    /* 0 is an invalid sequence number, but may also represent the first
+     * log write (or a wrapped seq) */
+    if (s->log.sequence == 0) {
+        s->log.sequence = 1;
+    }
+
+    sector_offset = offset % VHDX_LOG_SECTOR_SIZE;
+    file_offset = (offset / VHDX_LOG_SECTOR_SIZE) * VHDX_LOG_SECTOR_SIZE;
+
+    aligned_length = length;
+
+    /* add in the unaligned head and tail bytes */
+    if (sector_offset) {
+        leading_length = (VHDX_LOG_SECTOR_SIZE - sector_offset);
+        leading_length = leading_length > length ? length : leading_length;
+        aligned_length -= leading_length;
+        partial_sectors++;
+    }
+
+    sectors = aligned_length / VHDX_LOG_SECTOR_SIZE;
+    trailing_length = aligned_length - (sectors * VHDX_LOG_SECTOR_SIZE);
+    if (trailing_length) {
+        partial_sectors++;
+    }
+
+    sectors += partial_sectors;
+
+    /* sectors is now how many sectors the data itself takes, not
+     * including the header and descriptor metadata */
+
+    new_hdr = (VHDXLogEntryHeader) {
+                .signature           = VHDX_LOG_SIGNATURE,
+                .tail                = s->log.tail,
+                .sequence_number     = s->log.sequence,
+                .descriptor_count    = sectors,
+                .reserved            = 0,
+                .flushed_file_offset = bdrv_getlength(bs->file),
+                .last_file_offset    = bdrv_getlength(bs->file),
+              };
+
+    memcpy(&new_hdr.log_guid, &header->log_guid, sizeof(MSGUID));
+
+    desc_sectors = vhdx_compute_desc_sectors(new_hdr.descriptor_count);
+
+    total_length = (desc_sectors + sectors) * VHDX_LOG_SECTOR_SIZE;
+    new_hdr.entry_length = total_length;
+
+    vhdx_log_entry_hdr_le_export(&new_hdr);
+
+    buffer = qemu_blockalign(bs, total_length);
+    memcpy(buffer, &new_hdr, sizeof(new_hdr));
+
+    new_desc = (VHDXLogDescriptor *) (buffer + sizeof(new_hdr));
+    data_sector = buffer + (desc_sectors * VHDX_LOG_SECTOR_SIZE);
+    data_tmp = data;
+
+    /* All log sectors are 4KB, so for any partial sectors we must
+     * merge the data with preexisting data from the final file
+     * destination */
+    merged_sector = qemu_blockalign(bs, VHDX_LOG_SECTOR_SIZE);
+
+    for (i = 0; i < sectors; i++) {
+        new_desc->signature       = VHDX_LOG_DESC_SIGNATURE;
+        new_desc->sequence_number = s->log.sequence;
+        new_desc->file_offset     = file_offset;
+
+        if (i == 0 && leading_length) {
+            /* partial sector at the front of the buffer */
+            ret = bdrv_pread(bs->file, file_offset, merged_sector,
+                             VHDX_LOG_SECTOR_SIZE);
+            if (ret < 0) {
+                goto exit;
+            }
+            memcpy(merged_sector + sector_offset, data_tmp, leading_length);
+            bytes_written = leading_length;
+            sector_write = merged_sector;
+        } else if (i == sectors - 1 && trailing_length) {
+            /* partial sector at the end of the buffer */
+            ret = bdrv_pread(bs->file,
+                            file_offset,
+                            merged_sector + trailing_length,
+                            VHDX_LOG_SECTOR_SIZE - trailing_length);
+            if (ret < 0) {
+                goto exit;
+            }
+            memcpy(merged_sector, data_tmp, trailing_length);
+            bytes_written = trailing_length;
+            sector_write = merged_sector;
+        } else {
+            bytes_written = VHDX_LOG_SECTOR_SIZE;
+            sector_write = data_tmp;
+        }
+
+        /* populate the raw sector data into the proper structures,
+         * as well as update the descriptor, and convert to proper
+         * endianness */
+        vhdx_log_raw_to_le_sector(new_desc, data_sector, sector_write,
+                                  s->log.sequence);
+
+        data_tmp += bytes_written;
+        data_sector++;
+        new_desc++;
+        file_offset += VHDX_LOG_SECTOR_SIZE;
+    }
+
+    /* checksum covers entire entry, from the log header through the
+     * last data sector */
+    vhdx_update_checksum(buffer, total_length, 4);
+    cpu_to_le32s((uint32_t *)(buffer + 4));
+
+    /* now write to the log */
+    vhdx_log_write_sectors(bs, &s->log, &sectors_written, buffer,
+                           desc_sectors + sectors);
+    if (ret < 0) {
+        goto exit;
+    }
+
+    if (sectors_written != desc_sectors + sectors) {
+        /* instead of failing, we could flush the log here */
+        ret = -EINVAL;
+        goto exit;
+    }
+
+    s->log.sequence++;
+    /* write new tail */
+    s->log.tail = s->log.write;
+
+exit:
+    qemu_vfree(buffer);
+    qemu_vfree(merged_sector);
+    return ret;
+}
+
+/* Perform a log write, and then immediately flush the entire log */
+int vhdx_log_write_and_flush(BlockDriverState *bs, BDRVVHDXState *s,
+                             void *data, uint32_t length, uint64_t offset)
+{
+    int ret = 0;
+    VHDXLogSequence logs = { .valid = true,
+                             .count = 1,
+                             .hdr = { 0 } };
+
+
+    ret = vhdx_log_write(bs, s, data, length, offset);
+    if (ret < 0) {
+        goto exit;
+    }
+    logs.log = s->log;
+
+    ret = vhdx_log_flush(bs, s, &logs);
+    if (ret < 0) {
+        goto exit;
+    }
+
+    s->log = logs.log;
+
+exit:
+    return ret;
+}
+
diff --git a/block/vhdx.h b/block/vhdx.h
index 24b126e..b210efc 100644
--- a/block/vhdx.h
+++ b/block/vhdx.h
@@ -393,6 +393,9 @@  bool vhdx_checksum_is_valid(uint8_t *buf, size_t size, int crc_offset);
 
 int vhdx_parse_log(BlockDriverState *bs, BDRVVHDXState *s);
 
+int vhdx_log_write_and_flush(BlockDriverState *bs, BDRVVHDXState *s,
+                             void *data, uint32_t length, uint64_t offset);
+
 static inline void leguid_to_cpus(MSGUID *guid)
 {
     le32_to_cpus(&guid->data1);