diff mbox

[RFC,6/8] Add VMA backup archive writer Python module

Message ID 1362867748-30528-7-git-send-email-stefanha@redhat.com
State New
Headers show

Commit Message

Stefan Hajnoczi March 9, 2013, 10:22 p.m. UTC
The vma module provides an interface for writing VMA backup archives:

  writer = vma.Writer(open('test.vma', 'wb))
  writer.add_config('guest.xml', '<guest></guest>')
  stream_id = writer.add_stream('foo',  # name
                                65536)  # size
  writer.write(stream_id, 0, '\0' * 32768)
  writer.write(stream_id, 32768, '\1' * 32768)
  writer.close()

The Writer handles sequential writes that are not cluster-aligned.  This
is typically only the vmstate.  Disk writes are 64 KB aligned in
practice.

VMA supports zero regions within a 64 KB cluster.  The vma module does
not implement this, the full cluster is written.

Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
---
 vma.py | 236 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 236 insertions(+)
 create mode 100644 vma.py
diff mbox

Patch

diff --git a/vma.py b/vma.py
new file mode 100644
index 0000000..236ba14
--- /dev/null
+++ b/vma.py
@@ -0,0 +1,236 @@ 
+# VMA writer module
+#
+# Copyright 2013 Red Hat, Inc. and/or its affiliates
+#
+# Authors:
+#   Stefan Hajnoczi <stefanha@redhat.com>
+#
+# This work is licensed under the terms of the GNU GPL, version 2 or later.
+# See the COPYING file in the top-level directory.
+
+import array
+import struct
+import hashlib
+import uuid
+import time
+
+__all__ = ['Writer']
+
+VMA_MAGIC = 0x564d4100
+VMA_VERSION = 1
+VMA_MAX_CONFIGS = 256
+VMA_CLUSTER_SIZE = 65536
+VMA_BLOCKS_PER_EXTENT = 59
+VMA_EXTENT_MAGIC = 0x564d4145
+
+header_struct = struct.Struct('>II16cQ16cIII')
+dev_info_struct = struct.Struct('>IIQQQ')
+extent_struct = struct.Struct('>I2xH16c16c')
+le16_struct = struct.Struct('<H')
+be32_struct = struct.Struct('>I')
+be64_struct = struct.Struct('>Q')
+
+class Writer(object):
+    def __init__(self, fobj):
+        self.fobj = fobj
+        self.uuid = uuid.uuid4().bytes
+        self.streams = []
+        self.blobs = ['\0']
+        self.blob_offset = 1
+        self.config_names = []
+        self.config_data = []
+        self.header_written = False
+        self.align_bufs = {}
+        self.extent = []
+
+    def alloc_blob(self, blob):
+        '''Return allocated blob buffer offset'''
+        offset = self.blob_offset
+        self.blobs.append(le16_struct.pack(len(blob)))
+        self.blobs.append(blob)
+        self.blob_offset += le16_struct.size + len(blob)
+        return offset
+
+    def alloc_blob_str(self, s):
+        '''Return allocated blob buffer offset for string'''
+        return self.alloc_blob(s + '\0')
+
+    def build_dev_info(self):
+        '''Return a buffer with device infos'''
+        bufs = ['\0' * dev_info_struct.size]
+        for name, size in self.streams:
+            name_ptr = self.alloc_blob_str(name)
+            buf = dev_info_struct.pack(name_ptr, 0, size, 0, 0)
+            bufs.append(buf)
+        padding = (255 - len(self.streams)) * dev_info_struct.size
+        bufs.append('\0' * padding)
+        return ''.join(bufs)
+
+    def build_blob_buffer(self):
+        '''Return a buffer with blob data'''
+        return ''.join(self.blobs)
+
+    def build_config(self):
+        '''Return a buffer with config names and data'''
+        bufs = []
+
+        for ptr in self.config_names:
+            bufs.append(be32_struct.pack(ptr))
+        padding = (VMA_MAX_CONFIGS - len(self.config_names)) * be32_struct.size
+        bufs.append('\0' * padding)
+
+        for ptr in self.config_data:
+            bufs.append(be32_struct.pack(ptr))
+        padding = (VMA_MAX_CONFIGS - len(self.config_data)) * be32_struct.size
+        bufs.append('\0' * padding)
+
+        return ''.join(bufs)
+
+    def write_header(self):
+        # Build header pieces
+        config = self.build_config()
+        dev_info = self.build_dev_info()
+        blob_buffer = self.build_blob_buffer()
+
+        # Size the header
+        blob_buffer_offset = header_struct.size + 1984 + \
+                             len(config) + 4 + len(dev_info)
+        header_size = blob_buffer_offset + len(blob_buffer)
+
+        # Build header without checksum
+        fields = (VMA_MAGIC,
+                  VMA_VERSION) + \
+                 tuple(self.uuid) + \
+                 (int(time.mktime(time.gmtime())),) + \
+                 tuple('\0' * 16) + \
+                 (blob_buffer_offset,
+                  len(blob_buffer),
+                  header_size)
+        header = header_struct.pack(*fields)
+
+        # Checksum header
+        buf = ''.join([header,
+                       '\0' * 1984,
+                       config,
+                       '\0' * 4, # VMAHeader.dev_info is unaligned (vma.h bug)
+                       dev_info,
+                       blob_buffer])
+        digest = hashlib.md5(buf).digest()
+        buf = array.array('c', buf) # string does not support assignment
+        buf[32:32 + 16] = array.array('c', digest)
+
+        self.fobj.write(buf)
+
+    def add_config(self, name, data):
+        name_ptr = self.alloc_blob_str(name)
+        data_ptr = self.alloc_blob(data)
+        self.config_names.append(name_ptr)
+        self.config_data.append(name_ptr)
+
+    def add_stream(self, name, size):
+        self.streams.append((name, size))
+        return len(self.streams)
+
+    def build_blockinfo(self):
+        '''Return a blockinfo buffer for the current extent'''
+        bufs = []
+        for stream_id, offset, _ in self.extent:
+            buf = be64_struct.pack(0xffff000000000000 | \
+                                   (stream_id << 32)  | \
+                                   offset // VMA_CLUSTER_SIZE)
+            bufs.append(buf)
+        padding = (VMA_BLOCKS_PER_EXTENT - len(self.extent)) * be64_struct.size
+        bufs.append('\0' * padding)
+        return ''.join(bufs)
+
+    def write_extent(self):
+        blockinfo = self.build_blockinfo()
+        block_count = len(self.extent) * (VMA_CLUSTER_SIZE // 4096)
+
+        # Build header without checksum
+        fields = (VMA_EXTENT_MAGIC,
+                  block_count) + \
+                 tuple(self.uuid) + \
+                 tuple('\0' * 16)
+        header = extent_struct.pack(*fields)
+
+        # Checksum header
+        buf = ''.join([header, blockinfo])
+        digest = hashlib.md5(buf).digest()
+        buf = array.array('c', buf) # string does not support assignment
+        buf[24:24 + 16] = array.array('c', digest)
+
+        self.fobj.write(buf)
+        for _, _, data in self.extent:
+            self.fobj.write(data)
+
+        self.extent = []
+
+    def append_cluster(self, stream_id, offset, data):
+        '''Append one cluster to the current extent'''
+        self.extent.append((stream_id, offset, data))
+        if len(self.extent) == VMA_BLOCKS_PER_EXTENT:
+            self.write_extent()
+
+    def align_write(self, stream_id, offset, data):
+        '''Buffer writes whose length is not cluster-aligned (vmstate)'''
+        # Fast path for aligned writes
+        mod = len(data) % VMA_CLUSTER_SIZE
+        if stream_id not in self.align_bufs and mod == 0:
+            return False, offset, data
+
+        # Add data to buffer
+        bufs, start, total = self.align_bufs.get(stream_id, ([], offset, 0))
+        assert start + total == offset # must be sequential
+        bufs.append(data)
+        total += len(data)
+        self.align_bufs[stream_id] = (bufs, start, total)
+
+        # Stop if we don't have a cluster yet
+        if total < VMA_CLUSTER_SIZE:
+            return True, None, None
+
+        # Take as many clusters as possible
+        end = (total // VMA_CLUSTER_SIZE) * VMA_CLUSTER_SIZE
+        aligned = []
+        nbytes = 0
+        while nbytes < end:
+            buf = bufs.pop(0)
+            aligned.append(buf)
+            nbytes += len(buf)
+        if nbytes > end:
+            buf = aligned[-1]
+            keep = end - (nbytes - len(buf))
+            left, right = buf[:keep], buf[keep:]
+            aligned[-1] = left
+            bufs.insert(0, right)
+        self.align_bufs[stream_id] = (bufs, start + end, total - end)
+        return False, start, ''.join(aligned)
+
+    def write(self, stream_id, offset, data):
+        if not self.header_written:
+            self.write_header()
+            self.header_written = True
+
+        need_more, offset, data = self.align_write(stream_id, offset, data)
+        if need_more:
+            return
+
+        for i in range(len(data) // VMA_CLUSTER_SIZE):
+            self.append_cluster(stream_id, offset, data[:VMA_CLUSTER_SIZE])
+            data = data[VMA_CLUSTER_SIZE:]
+            offset += VMA_CLUSTER_SIZE
+
+    def close(self):
+        # Flush unaligned data
+        for stream_id in self.align_bufs.keys():
+            bufs, start, total = self.align_bufs[stream_id]
+            assert total < VMA_CLUSTER_SIZE
+            padding = VMA_CLUSTER_SIZE - total
+            bufs.append('\0' * padding)
+            self.append_cluster(stream_id, start, ''.join(bufs))
+        self.align_bufs = {}
+
+        # Write final extent, if necessary
+        if self.extent:
+            self.write_extent()