diff mbox

[v5,6/6] BitmapLog: python script for extracting bitmap from a binary file

Message ID 1406862751-24008-7-git-send-email-sanidhya.iiith@gmail.com
State New
Headers show

Commit Message

Sanidhya Kashyap Aug. 1, 2014, 3:12 a.m. UTC
The file not only extracts the bitmap from the file but also draws the figure
if required. Currently, figure is drawn for all the bitmaps. Later, I'll make
the change to draw for different blocks.

The picture is drawn by generating a matrix of 0s and 1s from the bitmap. The
dimensions are calculated on the basis of total bitmap pages which is represented
as sqrt(total pages) X (sqrt(total pages) + 1). The white parts indicate non dirtied
region while the black - dirtied region.

The python code requires some libraries such as numpy, pylab and math to generate
the images.

Signed-off-by: Sanidhya Kashyap <sanidhya.iiith@gmail.com>
---
 scripts/extract-bitmap.py | 144 ++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 144 insertions(+)
 create mode 100755 scripts/extract-bitmap.py

Comments

Dr. David Alan Gilbert Aug. 12, 2014, 12:36 p.m. UTC | #1
* Sanidhya Kashyap (sanidhya.iiith@gmail.com) wrote:
> The file not only extracts the bitmap from the file but also draws the figure
> if required. Currently, figure is drawn for all the bitmaps. Later, I'll make
> the change to draw for different blocks.
> 
> The picture is drawn by generating a matrix of 0s and 1s from the bitmap. The
> dimensions are calculated on the basis of total bitmap pages which is represented
> as sqrt(total pages) X (sqrt(total pages) + 1). The white parts indicate non dirtied
> region while the black - dirtied region.
> 
> The python code requires some libraries such as numpy, pylab and math to generate
> the images.

This is interesting; I've tried this out with a Fedora boot and recorded about 240
frames; the script has problems when trying to convert that big a recording to images;
  1) It loads it all into memory - so it used about 2GB of RAM
  2) It opens each image as a window - so I then had to close about 240 windows

I fiddled with it a bit and created .png files, and then stiched these together
with imagemagick to create an animation.  It's quite interesting watch the OS boot
and then see me login and start google's stress apptest which suddenly changes
the whole of memory.

I suggest making it dump each image to a file in a similar way and making it free the
python image data after each frame, then there shouldn't be a size limit.
(I did try and upload the image, but QEMU's wiki didn't like the size)

Dave


> 
> Signed-off-by: Sanidhya Kashyap <sanidhya.iiith@gmail.com>
> ---
>  scripts/extract-bitmap.py | 144 ++++++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 144 insertions(+)
>  create mode 100755 scripts/extract-bitmap.py
> 
> diff --git a/scripts/extract-bitmap.py b/scripts/extract-bitmap.py
> new file mode 100755
> index 0000000..942deca
> --- /dev/null
> +++ b/scripts/extract-bitmap.py
> @@ -0,0 +1,144 @@
> +#!/usr/bin/python
> +# This python script helps in extracting the dirty bitmap present
> +# in the file after executing the log-dirty-bitmap command either
> +# from the qmp or hmp interface. This file only processes binary
> +# file obtained via command.
> +#
> +# Copyright (C) 2014 Sanidhya Kashyap <sanidhya.iiith@gmail.com>
> +#
> +# Authors:
> +#       Sanidhya Kashyap
> +#
> +#
> +# This work is licensed under the terms of the GNU GPL, version 2 or later.
> +
> +import struct
> +import argparse
> +from functools import partial
> +from math import sqrt
> +from numpy import array
> +from pylab import figure,imshow,show,gray
> +
> +long_bytes = 8
> +byte_size = 8
> +int_bytes = 4
> +complete_bitmap_list = []
> +block_list = []
> +
> +def get_unsigned_long_integer(value):
> +	return struct.unpack('<Q', value)[0]
> +
> +def get_long_integer(value):
> +	return struct.unpack('<q', value)[0]
> +
> +def get_integer(value):
> +	return struct.unpack('<i', value)[0]
> +
> +def get_char(value):
> +	return struct.unpack('<c', value)[0]
> +
> +def get_string(value, length):
> +	name = struct.unpack('<'+str(length)+'s', value)[0]
> +	for i in range(len(name)):
> +		if name[i] == '\x00':
> +			return name[:i]
> +
> +def dec2bin(decimal):
> +    bin_value = bin(decimal)[2:]
> +    if len(bin_value) < long_bytes * byte_size:
> +        add_zeroes = long_bytes * byte_size - len(bin_value)
> +        for i in range(add_zeroes):
> +            bin_value += "0"
> +    return str(bin_value)
> +
> +def get_bitmap_length(ram_bitmap_pages):
> +    bitmap_length = ram_bitmap_pages / (long_bytes * byte_size)
> +    if ram_bitmap_pages % (long_bytes * byte_size) != 0:
> +        bitmap_length += 1
> +    return bitmap_length
> +
> +def dump_ram_block_info(infile):
> +    total_blocks = get_integer(infile.read(int_bytes))
> +    for i in range(total_blocks):
> +        block_name_length = get_integer(infile.read(int_bytes))
> +        block_name = get_string(infile.read(block_name_length), block_name_length)
> +        block_offset = get_unsigned_long_integer(infile.read(long_bytes))
> +        block_length = get_unsigned_long_integer(infile.read(long_bytes))
> +        block_list.append(dict(name=block_name, offset=block_offset, length=block_length))
> +
> +def generate_images():
> +    r = 0
> +    for list in complete_bitmap_list:
> +        all_digits = ""
> +        for element in list:
> +            all_digits += dec2bin(element)
> +        l = len(all_digits)
> +        sqrtvalue = int(sqrt(l))
> +        for i in range(sqrtvalue * (sqrtvalue+1)-l):
> +            all_digits+="0"
> +
> +        v = []
> +        l = len(all_digits)
> +        for i in range(sqrtvalue+1):
> +            v1 = []
> +            for j in range(sqrtvalue):
> +                v1.append(int(all_digits[i*sqrtvalue+j]))
> +            v.append(v1)
> +
> +        im_array = array(v)
> +        figure(r)
> +        imshow(im_array, cmap=gray())
> +        r += 1
> +    show()
> +
> +def dump_bitmap(infile, draw):
> +    marker = 'M'
> +    count = 0
> +    value = ' '
> +    current_ram_bitmap_pages = 0
> +    prev_ram_bitmap_pages = 0
> +    while True:
> +        if len(value) == 0  or marker != 'M':
> +            print "issue with the dump"
> +            return
> +        bitmap_page_raw_value = infile.read(long_bytes)
> +        if not bitmap_page_raw_value:
> +            break
> +        current_ram_bitmap_pages = get_long_integer(bitmap_page_raw_value)
> +        if current_ram_bitmap_pages != prev_ram_bitmap_pages:
> +            prev_ram_bitmap_pages = current_ram_bitmap_pages
> +            dump_ram_block_info(infile)
> +
> +        bitmap_length = get_bitmap_length(current_ram_bitmap_pages)
> +        bitmap_list = []
> +        bitmap_raw_value = infile.read(long_bytes * bitmap_length)
> +        if not bitmap_raw_value:
> +            break
> +        count+=1
> +        for i in range(bitmap_length):
> +            mark = i * long_bytes
> +            bitmap_list.append((get_unsigned_long_integer(bitmap_raw_value[mark:mark+long_bytes])))
> +        complete_bitmap_list.append(bitmap_list)
> +        value = infile.read(1)
> +        marker = get_char(value)
> +    if draw is True:
> +        generate_images()
> +    else:
> +        print complete_bitmap_list
> +
> +def main():
> +    extracter = argparse.ArgumentParser(description='Extract dirty bitmap from binary file.')
> +    extracter.add_argument('infile', help='Input file to extract the bitmap')
> +    extracter.add_argument('-d', action='store_true', dest='draw', default=False,
> +            help='Draw a black and white image of the processed dirty bitmap')
> +    args = extracter.parse_args()
> +    print 'The filename is {}'.format(args.infile)
> +
> +    infile = open(format(args.infile), 'rb')
> +
> +    dump_bitmap(infile, args.draw);
> +
> +    infile.close()
> +
> +if __name__ == '__main__':
> +    main()
> -- 
> 1.9.3
> 
> 
--
Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
Dr. David Alan Gilbert Aug. 12, 2014, 2:04 p.m. UTC | #2
* Dr. David Alan Gilbert (dgilbert@redhat.com) wrote:
> * Sanidhya Kashyap (sanidhya.iiith@gmail.com) wrote:
> > The file not only extracts the bitmap from the file but also draws the figure
> > if required. Currently, figure is drawn for all the bitmaps. Later, I'll make
> > the change to draw for different blocks.
> > 
> > The picture is drawn by generating a matrix of 0s and 1s from the bitmap. The
> > dimensions are calculated on the basis of total bitmap pages which is represented
> > as sqrt(total pages) X (sqrt(total pages) + 1). The white parts indicate non dirtied
> > region while the black - dirtied region.
> > 
> > The python code requires some libraries such as numpy, pylab and math to generate
> > the images.
> 
> This is interesting; I've tried this out with a Fedora boot and recorded about 240
> frames; the script has problems when trying to convert that big a recording to images;
>   1) It loads it all into memory - so it used about 2GB of RAM
>   2) It opens each image as a window - so I then had to close about 240 windows
> 
> I fiddled with it a bit and created .png files, and then stiched these together
> with imagemagick to create an animation.  It's quite interesting watch the OS boot
> and then see me login and start google's stress apptest which suddenly changes
> the whole of memory.
> 
> I suggest making it dump each image to a file in a similar way and making it free the
> python image data after each frame, then there shouldn't be a size limit.
> (I did try and upload the image, but QEMU's wiki didn't like the size)

and here's the (scaled down for size) looping gif:
http://wiki.qemu-project.org/images/9/9a/Fedboot50.gif

Dave

> 
> Dave
> 
> 
> > 
> > Signed-off-by: Sanidhya Kashyap <sanidhya.iiith@gmail.com>
> > ---
> >  scripts/extract-bitmap.py | 144 ++++++++++++++++++++++++++++++++++++++++++++++
> >  1 file changed, 144 insertions(+)
> >  create mode 100755 scripts/extract-bitmap.py
> > 
> > diff --git a/scripts/extract-bitmap.py b/scripts/extract-bitmap.py
> > new file mode 100755
> > index 0000000..942deca
> > --- /dev/null
> > +++ b/scripts/extract-bitmap.py
> > @@ -0,0 +1,144 @@
> > +#!/usr/bin/python
> > +# This python script helps in extracting the dirty bitmap present
> > +# in the file after executing the log-dirty-bitmap command either
> > +# from the qmp or hmp interface. This file only processes binary
> > +# file obtained via command.
> > +#
> > +# Copyright (C) 2014 Sanidhya Kashyap <sanidhya.iiith@gmail.com>
> > +#
> > +# Authors:
> > +#       Sanidhya Kashyap
> > +#
> > +#
> > +# This work is licensed under the terms of the GNU GPL, version 2 or later.
> > +
> > +import struct
> > +import argparse
> > +from functools import partial
> > +from math import sqrt
> > +from numpy import array
> > +from pylab import figure,imshow,show,gray
> > +
> > +long_bytes = 8
> > +byte_size = 8
> > +int_bytes = 4
> > +complete_bitmap_list = []
> > +block_list = []
> > +
> > +def get_unsigned_long_integer(value):
> > +	return struct.unpack('<Q', value)[0]
> > +
> > +def get_long_integer(value):
> > +	return struct.unpack('<q', value)[0]
> > +
> > +def get_integer(value):
> > +	return struct.unpack('<i', value)[0]
> > +
> > +def get_char(value):
> > +	return struct.unpack('<c', value)[0]
> > +
> > +def get_string(value, length):
> > +	name = struct.unpack('<'+str(length)+'s', value)[0]
> > +	for i in range(len(name)):
> > +		if name[i] == '\x00':
> > +			return name[:i]
> > +
> > +def dec2bin(decimal):
> > +    bin_value = bin(decimal)[2:]
> > +    if len(bin_value) < long_bytes * byte_size:
> > +        add_zeroes = long_bytes * byte_size - len(bin_value)
> > +        for i in range(add_zeroes):
> > +            bin_value += "0"
> > +    return str(bin_value)
> > +
> > +def get_bitmap_length(ram_bitmap_pages):
> > +    bitmap_length = ram_bitmap_pages / (long_bytes * byte_size)
> > +    if ram_bitmap_pages % (long_bytes * byte_size) != 0:
> > +        bitmap_length += 1
> > +    return bitmap_length
> > +
> > +def dump_ram_block_info(infile):
> > +    total_blocks = get_integer(infile.read(int_bytes))
> > +    for i in range(total_blocks):
> > +        block_name_length = get_integer(infile.read(int_bytes))
> > +        block_name = get_string(infile.read(block_name_length), block_name_length)
> > +        block_offset = get_unsigned_long_integer(infile.read(long_bytes))
> > +        block_length = get_unsigned_long_integer(infile.read(long_bytes))
> > +        block_list.append(dict(name=block_name, offset=block_offset, length=block_length))
> > +
> > +def generate_images():
> > +    r = 0
> > +    for list in complete_bitmap_list:
> > +        all_digits = ""
> > +        for element in list:
> > +            all_digits += dec2bin(element)
> > +        l = len(all_digits)
> > +        sqrtvalue = int(sqrt(l))
> > +        for i in range(sqrtvalue * (sqrtvalue+1)-l):
> > +            all_digits+="0"
> > +
> > +        v = []
> > +        l = len(all_digits)
> > +        for i in range(sqrtvalue+1):
> > +            v1 = []
> > +            for j in range(sqrtvalue):
> > +                v1.append(int(all_digits[i*sqrtvalue+j]))
> > +            v.append(v1)
> > +
> > +        im_array = array(v)
> > +        figure(r)
> > +        imshow(im_array, cmap=gray())
> > +        r += 1
> > +    show()
> > +
> > +def dump_bitmap(infile, draw):
> > +    marker = 'M'
> > +    count = 0
> > +    value = ' '
> > +    current_ram_bitmap_pages = 0
> > +    prev_ram_bitmap_pages = 0
> > +    while True:
> > +        if len(value) == 0  or marker != 'M':
> > +            print "issue with the dump"
> > +            return
> > +        bitmap_page_raw_value = infile.read(long_bytes)
> > +        if not bitmap_page_raw_value:
> > +            break
> > +        current_ram_bitmap_pages = get_long_integer(bitmap_page_raw_value)
> > +        if current_ram_bitmap_pages != prev_ram_bitmap_pages:
> > +            prev_ram_bitmap_pages = current_ram_bitmap_pages
> > +            dump_ram_block_info(infile)
> > +
> > +        bitmap_length = get_bitmap_length(current_ram_bitmap_pages)
> > +        bitmap_list = []
> > +        bitmap_raw_value = infile.read(long_bytes * bitmap_length)
> > +        if not bitmap_raw_value:
> > +            break
> > +        count+=1
> > +        for i in range(bitmap_length):
> > +            mark = i * long_bytes
> > +            bitmap_list.append((get_unsigned_long_integer(bitmap_raw_value[mark:mark+long_bytes])))
> > +        complete_bitmap_list.append(bitmap_list)
> > +        value = infile.read(1)
> > +        marker = get_char(value)
> > +    if draw is True:
> > +        generate_images()
> > +    else:
> > +        print complete_bitmap_list
> > +
> > +def main():
> > +    extracter = argparse.ArgumentParser(description='Extract dirty bitmap from binary file.')
> > +    extracter.add_argument('infile', help='Input file to extract the bitmap')
> > +    extracter.add_argument('-d', action='store_true', dest='draw', default=False,
> > +            help='Draw a black and white image of the processed dirty bitmap')
> > +    args = extracter.parse_args()
> > +    print 'The filename is {}'.format(args.infile)
> > +
> > +    infile = open(format(args.infile), 'rb')
> > +
> > +    dump_bitmap(infile, args.draw);
> > +
> > +    infile.close()
> > +
> > +if __name__ == '__main__':
> > +    main()
> > -- 
> > 1.9.3
> > 
> > 
> --
> Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
--
Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
diff mbox

Patch

diff --git a/scripts/extract-bitmap.py b/scripts/extract-bitmap.py
new file mode 100755
index 0000000..942deca
--- /dev/null
+++ b/scripts/extract-bitmap.py
@@ -0,0 +1,144 @@ 
+#!/usr/bin/python
+# This python script helps in extracting the dirty bitmap present
+# in the file after executing the log-dirty-bitmap command either
+# from the qmp or hmp interface. This file only processes binary
+# file obtained via command.
+#
+# Copyright (C) 2014 Sanidhya Kashyap <sanidhya.iiith@gmail.com>
+#
+# Authors:
+#       Sanidhya Kashyap
+#
+#
+# This work is licensed under the terms of the GNU GPL, version 2 or later.
+
+import struct
+import argparse
+from functools import partial
+from math import sqrt
+from numpy import array
+from pylab import figure,imshow,show,gray
+
+long_bytes = 8
+byte_size = 8
+int_bytes = 4
+complete_bitmap_list = []
+block_list = []
+
+def get_unsigned_long_integer(value):
+	return struct.unpack('<Q', value)[0]
+
+def get_long_integer(value):
+	return struct.unpack('<q', value)[0]
+
+def get_integer(value):
+	return struct.unpack('<i', value)[0]
+
+def get_char(value):
+	return struct.unpack('<c', value)[0]
+
+def get_string(value, length):
+	name = struct.unpack('<'+str(length)+'s', value)[0]
+	for i in range(len(name)):
+		if name[i] == '\x00':
+			return name[:i]
+
+def dec2bin(decimal):
+    bin_value = bin(decimal)[2:]
+    if len(bin_value) < long_bytes * byte_size:
+        add_zeroes = long_bytes * byte_size - len(bin_value)
+        for i in range(add_zeroes):
+            bin_value += "0"
+    return str(bin_value)
+
+def get_bitmap_length(ram_bitmap_pages):
+    bitmap_length = ram_bitmap_pages / (long_bytes * byte_size)
+    if ram_bitmap_pages % (long_bytes * byte_size) != 0:
+        bitmap_length += 1
+    return bitmap_length
+
+def dump_ram_block_info(infile):
+    total_blocks = get_integer(infile.read(int_bytes))
+    for i in range(total_blocks):
+        block_name_length = get_integer(infile.read(int_bytes))
+        block_name = get_string(infile.read(block_name_length), block_name_length)
+        block_offset = get_unsigned_long_integer(infile.read(long_bytes))
+        block_length = get_unsigned_long_integer(infile.read(long_bytes))
+        block_list.append(dict(name=block_name, offset=block_offset, length=block_length))
+
+def generate_images():
+    r = 0
+    for list in complete_bitmap_list:
+        all_digits = ""
+        for element in list:
+            all_digits += dec2bin(element)
+        l = len(all_digits)
+        sqrtvalue = int(sqrt(l))
+        for i in range(sqrtvalue * (sqrtvalue+1)-l):
+            all_digits+="0"
+
+        v = []
+        l = len(all_digits)
+        for i in range(sqrtvalue+1):
+            v1 = []
+            for j in range(sqrtvalue):
+                v1.append(int(all_digits[i*sqrtvalue+j]))
+            v.append(v1)
+
+        im_array = array(v)
+        figure(r)
+        imshow(im_array, cmap=gray())
+        r += 1
+    show()
+
+def dump_bitmap(infile, draw):
+    marker = 'M'
+    count = 0
+    value = ' '
+    current_ram_bitmap_pages = 0
+    prev_ram_bitmap_pages = 0
+    while True:
+        if len(value) == 0  or marker != 'M':
+            print "issue with the dump"
+            return
+        bitmap_page_raw_value = infile.read(long_bytes)
+        if not bitmap_page_raw_value:
+            break
+        current_ram_bitmap_pages = get_long_integer(bitmap_page_raw_value)
+        if current_ram_bitmap_pages != prev_ram_bitmap_pages:
+            prev_ram_bitmap_pages = current_ram_bitmap_pages
+            dump_ram_block_info(infile)
+
+        bitmap_length = get_bitmap_length(current_ram_bitmap_pages)
+        bitmap_list = []
+        bitmap_raw_value = infile.read(long_bytes * bitmap_length)
+        if not bitmap_raw_value:
+            break
+        count+=1
+        for i in range(bitmap_length):
+            mark = i * long_bytes
+            bitmap_list.append((get_unsigned_long_integer(bitmap_raw_value[mark:mark+long_bytes])))
+        complete_bitmap_list.append(bitmap_list)
+        value = infile.read(1)
+        marker = get_char(value)
+    if draw is True:
+        generate_images()
+    else:
+        print complete_bitmap_list
+
+def main():
+    extracter = argparse.ArgumentParser(description='Extract dirty bitmap from binary file.')
+    extracter.add_argument('infile', help='Input file to extract the bitmap')
+    extracter.add_argument('-d', action='store_true', dest='draw', default=False,
+            help='Draw a black and white image of the processed dirty bitmap')
+    args = extracter.parse_args()
+    print 'The filename is {}'.format(args.infile)
+
+    infile = open(format(args.infile), 'rb')
+
+    dump_bitmap(infile, args.draw);
+
+    infile.close()
+
+if __name__ == '__main__':
+    main()