From patchwork Fri Aug 15 17:07:01 2014 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Stefan Hajnoczi X-Patchwork-Id: 380395 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Received: from lists.gnu.org (lists.gnu.org [IPv6:2001:4830:134:3::11]) (using TLSv1 with cipher AES256-SHA (256/256 bits)) (No client certificate requested) by ozlabs.org (Postfix) with ESMTPS id 9A063140097 for ; Sat, 16 Aug 2014 03:39:21 +1000 (EST) Received: from localhost ([::1]:60919 helo=lists.gnu.org) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1XILTD-000447-OY for incoming@patchwork.ozlabs.org; Fri, 15 Aug 2014 13:39:19 -0400 Received: from eggs.gnu.org ([2001:4830:134:3::10]:50763) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1XIL07-00005H-4h for qemu-devel@nongnu.org; Fri, 15 Aug 2014 13:09:19 -0400 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1XIL02-0000bN-3f for qemu-devel@nongnu.org; Fri, 15 Aug 2014 13:09:14 -0400 Received: from mx1.redhat.com ([209.132.183.28]:51830) by eggs.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1XIL00-0000bF-I7 for qemu-devel@nongnu.org; Fri, 15 Aug 2014 13:09:10 -0400 Received: from int-mx10.intmail.prod.int.phx2.redhat.com (int-mx10.intmail.prod.int.phx2.redhat.com [10.5.11.23]) by mx1.redhat.com (8.14.4/8.14.4) with ESMTP id s7FH90JC025694 (version=TLSv1/SSLv3 cipher=DHE-RSA-AES256-GCM-SHA384 bits=256 verify=OK); Fri, 15 Aug 2014 13:09:00 -0400 Received: from localhost (ovpn-112-32.ams2.redhat.com [10.36.112.32]) by int-mx10.intmail.prod.int.phx2.redhat.com (8.14.4/8.14.4) with ESMTP id s7FH8wi7002088; Fri, 15 Aug 2014 13:08:59 -0400 From: Stefan Hajnoczi To: Date: Fri, 15 Aug 2014 18:07:01 +0100 Message-Id: <1408122422-13935-55-git-send-email-stefanha@redhat.com> In-Reply-To: <1408122422-13935-1-git-send-email-stefanha@redhat.com> References: <1408122422-13935-1-git-send-email-stefanha@redhat.com> X-Scanned-By: MIMEDefang 2.68 on 10.5.11.23 X-detected-operating-system: by eggs.gnu.org: GNU/Linux 3.x X-Received-From: 209.132.183.28 Cc: Peter Maydell , Maria Kustova , Maria Kustova , Stefan Hajnoczi Subject: [Qemu-devel] [PULL 54/55] image-fuzzer: Reduce number of generator functions in __init__ X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.14 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org Sender: qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org From: Maria Kustova Some issues can be found only when a fuzzed image has a partial structure, e.g. has L1/L2 tables but no refcount ones. Generation of an entirely defined image limits these cases. Now the Image constructor creates only a header and a backing file name (if any), other image elements are generated in the 'create_image' API. Signed-off-by: Maria Kustova Signed-off-by: Stefan Hajnoczi --- tests/image-fuzzer/qcow2/layout.py | 304 +++++++++++++++++-------------------- 1 file changed, 143 insertions(+), 161 deletions(-) diff --git a/tests/image-fuzzer/qcow2/layout.py b/tests/image-fuzzer/qcow2/layout.py index deed9ea..730c771 100644 --- a/tests/image-fuzzer/qcow2/layout.py +++ b/tests/image-fuzzer/qcow2/layout.py @@ -21,6 +21,7 @@ import struct import fuzz from math import ceil from os import urandom +from itertools import chain MAX_IMAGE_SIZE = 10 * (1 << 20) # Standard sizes @@ -36,7 +37,7 @@ class Field(object): of value necessary for its packing to binary form, an offset from the beginning of the image, a value and a name. - The field can be iterated as a list [format, offset, value]. + The field can be iterated as a list [format, offset, value, name]. """ __slots__ = ('fmt', 'offset', 'value', 'name') @@ -48,7 +49,7 @@ class Field(object): self.name = name def __iter__(self): - return iter([self.fmt, self.offset, self.value]) + return iter([self.fmt, self.offset, self.value, self.name]) def __repr__(self): return "Field(fmt='%s', offset=%d, value=%s, name=%s)" % \ @@ -59,15 +60,14 @@ class FieldsList(object): """List of fields. - The class allows access to a field in the list by its name and joins - several list in one via in-place addition. + The class allows access to a field in the list by its name. """ def __init__(self, meta_data=None): if meta_data is None: self.data = [] else: - self.data = [Field(f[0], f[1], f[2], f[3]) + self.data = [Field(*f) for f in meta_data] def __getitem__(self, name): @@ -76,10 +76,6 @@ class FieldsList(object): def __iter__(self): return iter(self.data) - def __iadd__(self, other): - self.data += other.data - return self - def __len__(self): return len(self.data) @@ -93,75 +89,31 @@ class Image(object): a file. """ - @staticmethod - def _size_params(): - """Generate a random image size aligned to a random correct - cluster size. - """ - cluster_bits = random.randrange(9, 21) - cluster_size = 1 << cluster_bits - img_size = random.randrange(0, MAX_IMAGE_SIZE + 1, cluster_size) - return (cluster_bits, img_size) - - @staticmethod - def _get_available_clusters(used, number): - """Return a set of indices of not allocated clusters. - - 'used' contains indices of currently allocated clusters. - All clusters that cannot be allocated between 'used' clusters will have - indices appended to the end of 'used'. + def __init__(self, backing_file_name=None): + """Create a random valid qcow2 image with the correct header and stored + backing file name. """ - append_id = max(used) + 1 - free = set(range(1, append_id)) - used - if len(free) >= number: - return set(random.sample(free, number)) - else: - return free | set(range(append_id, append_id + number - len(free))) - - @staticmethod - def _get_adjacent_clusters(used, size): - """Return an index of the first cluster in the sequence of free ones. - - 'used' contains indices of currently allocated clusters. 'size' is the - length of the sequence of free clusters. - If the sequence of 'size' is not available between 'used' clusters, its - first index will be append to the end of 'used'. - """ - def get_cluster_id(lst, length): - """Return the first index of the sequence of the specified length - or None if the sequence cannot be inserted in the list. - """ - if len(lst) != 0: - pairs = [] - pair = (lst[0], 1) - for i in range(1, len(lst)): - if lst[i] == lst[i-1] + 1: - pair = (lst[i], pair[1] + 1) - else: - pairs.append(pair) - pair = (lst[i], 1) - pairs.append(pair) - random.shuffle(pairs) - for x, s in pairs: - if s >= length: - return x - length + 1 - return None - - append_id = max(used) + 1 - free = list(set(range(1, append_id)) - used) - idx = get_cluster_id(free, size) - if idx is None: - return append_id - else: - return idx + cluster_bits, self.image_size = self._size_params() + self.cluster_size = 1 << cluster_bits + self.header = FieldsList() + self.backing_file_name = FieldsList() + self.backing_file_format = FieldsList() + self.feature_name_table = FieldsList() + self.end_of_extension_area = FieldsList() + self.l2_tables = FieldsList() + self.l1_table = FieldsList() + self.ext_offset = 0 + self.create_header(cluster_bits, backing_file_name) + self.set_backing_file_name(backing_file_name) + self.data_clusters = self._alloc_data(self.image_size, + self.cluster_size) + # Percentage of fields will be fuzzed + self.bias = random.uniform(0.2, 0.5) - @staticmethod - def _alloc_data(img_size, cluster_size): - """Return a set of random indices of clusters allocated for guest data. - """ - num_of_cls = img_size/cluster_size - return set(random.sample(range(1, num_of_cls + 1), - random.randint(0, num_of_cls))) + def __iter__(self): + return chain(self.header, self.backing_file_format, + self.feature_name_table, self.end_of_extension_area, + self.backing_file_name, self.l1_table, self.l2_tables) def create_header(self, cluster_bits, backing_file_name=None): """Generate a random valid header.""" @@ -196,12 +148,12 @@ class Image(object): random.getrandbits(2) self.header['compatible_features'][0].value = random.getrandbits(1) self.header['header_length'][0].value = 104 - - max_header_len = struct.calcsize( + # Extensions start at the header last field offset and the field size + self.ext_offset = struct.calcsize( self.header['header_length'][0].fmt) + \ self.header['header_length'][0].offset end_of_extension_area_len = 2 * UINT32_S - free_space = self.cluster_size - max_header_len - \ + free_space = self.cluster_size - self.ext_offset - \ end_of_extension_area_len # If the backing file name specified and there is enough space for it # in the first cluster, then it's placed in the very end of the first @@ -224,24 +176,16 @@ class Image(object): [data_fmt, self.header['backing_file_offset'][0].value, backing_file_name, 'bf_name'] ]) - else: - self.backing_file_name = FieldsList() def set_backing_file_format(self, backing_file_fmt=None): - """Generate the header extension for the backing file - format. - """ - self.backing_file_format = FieldsList() - offset = struct.calcsize(self.header['header_length'][0].fmt) + \ - self.header['header_length'][0].offset - + """Generate the header extension for the backing file format.""" if backing_file_fmt is not None: # Calculation of the free space available in the first cluster end_of_extension_area_len = 2 * UINT32_S high_border = (self.header['backing_file_offset'][0].value or (self.cluster_size - 1)) - \ end_of_extension_area_len - free_space = high_border - offset + free_space = high_border - self.ext_offset ext_size = 2 * UINT32_S + ((len(backing_file_fmt) + 7) & ~7) if free_space >= ext_size: @@ -249,18 +193,19 @@ class Image(object): ext_data_fmt = '>' + str(ext_data_len) + 's' ext_padding_len = 7 - (ext_data_len - 1) % 8 self.backing_file_format = FieldsList([ - ['>I', offset, 0xE2792ACA, 'ext_magic'], - ['>I', offset + UINT32_S, ext_data_len, 'ext_length'], - [ext_data_fmt, offset + UINT32_S * 2, backing_file_fmt, - 'bf_format'] + ['>I', self.ext_offset, 0xE2792ACA, 'ext_magic'], + ['>I', self.ext_offset + UINT32_S, ext_data_len, + 'ext_length'], + [ext_data_fmt, self.ext_offset + UINT32_S * 2, + backing_file_fmt, 'bf_format'] ]) - offset = self.backing_file_format['bf_format'][0].offset + \ - struct.calcsize(self.backing_file_format[ - 'bf_format'][0].fmt) + ext_padding_len - - return offset + self.ext_offset = \ + struct.calcsize( + self.backing_file_format['bf_format'][0].fmt) + \ + ext_padding_len + \ + self.backing_file_format['bf_format'][0].offset - def create_feature_name_table(self, offset): + def create_feature_name_table(self): """Generate a random header extension for names of features used in the image. """ @@ -272,7 +217,7 @@ class Image(object): high_border = (self.header['backing_file_offset'][0].value or (self.cluster_size - 1)) - \ end_of_extension_area_len - free_space = high_border - offset + free_space = high_border - self.ext_offset # Sum of sizes of 'magic' and 'length' header extension fields ext_header_len = 2 * UINT32_S fnt_entry_size = 6 * UINT64_S @@ -281,7 +226,7 @@ class Image(object): if not num_fnt_entries == 0: feature_tables = [] feature_ids = [] - inner_offset = offset + ext_header_len + inner_offset = self.ext_offset + ext_header_len feat_name = 'some cool feature' while len(feature_tables) < num_fnt_entries * 3: feat_type, feat_bit = gen_feat_ids() @@ -301,24 +246,20 @@ class Image(object): # No padding for the extension is necessary, because # the extension length is multiple of 8 self.feature_name_table = FieldsList([ - ['>I', offset, 0x6803f857, 'ext_magic'], + ['>I', self.ext_offset, 0x6803f857, 'ext_magic'], # One feature table contains 3 fields and takes 48 bytes - ['>I', offset + UINT32_S, len(feature_tables) / 3 * 48, - 'ext_length'] + ['>I', self.ext_offset + UINT32_S, + len(feature_tables) / 3 * 48, 'ext_length'] ] + feature_tables) - offset = inner_offset - else: - self.feature_name_table = FieldsList() - - return offset + self.ext_offset = inner_offset - def set_end_of_extension_area(self, offset): + def set_end_of_extension_area(self): """Generate a mandatory header extension marking end of header extensions. """ self.end_of_extension_area = FieldsList([ - ['>I', offset, 0, 'ext_magic'], - ['>I', offset + UINT32_S, 0, 'ext_length'] + ['>I', self.ext_offset, 0, 'ext_magic'], + ['>I', self.ext_offset + UINT32_S, 0, 'ext_length'] ]) def create_l_structures(self): @@ -352,7 +293,7 @@ class Image(object): l1 = [['>Q', l1_offset, 0, 'l1_entry']] l2 = [] else: - meta_data = set([0]) + meta_data = self._get_metadata() guest_clusters = random.sample(range(self.image_size / self.cluster_size), len(self.data_clusters)) @@ -389,49 +330,15 @@ class Image(object): float(self.cluster_size**2))) self.header['l1_table_offset'][0].value = l1_offset - def __init__(self, backing_file_name=None, backing_file_fmt=None): - """Create a random valid qcow2 image with the correct inner structure - and allowable values. - """ - cluster_bits, self.image_size = self._size_params() - self.cluster_size = 1 << cluster_bits - self.create_header(cluster_bits, backing_file_name) - self.set_backing_file_name(backing_file_name) - offset = self.set_backing_file_format(backing_file_fmt) - offset = self.create_feature_name_table(offset) - self.set_end_of_extension_area(offset) - self.data_clusters = self._alloc_data(self.image_size, - self.cluster_size) - self.create_l_structures() - # Container for entire image - self.data = FieldsList() - # Percentage of fields will be fuzzed - self.bias = random.uniform(0.2, 0.5) - - def __iter__(self): - return iter([self.header, - self.backing_file_format, - self.feature_name_table, - self.end_of_extension_area, - self.backing_file_name, - self.l1_table, - self.l2_tables]) - - def _join(self): - """Join all image structure elements as header, tables, etc in one - list of fields. - """ - if len(self.data) == 0: - for v in self: - self.data += v - def fuzz(self, fields_to_fuzz=None): """Fuzz an image by corrupting values of a random subset of its fields. Without parameters the method fuzzes an entire image. + If 'fields_to_fuzz' is specified then only fields in this list will be fuzzed. 'fields_to_fuzz' can contain both individual fields and more general image elements as a header or tables. + In the first case the field will be fuzzed always. In the second a random subset of fields will be selected and fuzzed. """ @@ -442,8 +349,7 @@ class Image(object): return random.random() < self.bias if fields_to_fuzz is None: - self._join() - for field in self.data: + for field in self: if coin(): field.value = getattr(fuzz, field.name)(field.value) else: @@ -454,20 +360,15 @@ class Image(object): field.value = getattr(fuzz, field.name)(field.value) else: + # If fields with the requested name were not generated + # getattr(self, item[0])[item[1]] returns an empty list for field in getattr(self, item[0])[item[1]]: - try: - field.value = getattr(fuzz, field.name)( - field.value) - except AttributeError: - # Some fields can be skipped depending on - # their prerequisites - pass + field.value = getattr(fuzz, field.name)(field.value) def write(self, filename): """Write an entire image to the file.""" image_file = open(filename, 'w') - self._join() - for field in self.data: + for field in self: image_file.seek(field.offset) image_file.write(struct.pack(field.fmt, field.value)) @@ -484,11 +385,92 @@ class Image(object): image_file.write("\0") image_file.close() + @staticmethod + def _size_params(): + """Generate a random image size aligned to a random correct + cluster size. + """ + cluster_bits = random.randrange(9, 21) + cluster_size = 1 << cluster_bits + img_size = random.randrange(0, MAX_IMAGE_SIZE + 1, cluster_size) + return (cluster_bits, img_size) + + @staticmethod + def _get_available_clusters(used, number): + """Return a set of indices of not allocated clusters. + + 'used' contains indices of currently allocated clusters. + All clusters that cannot be allocated between 'used' clusters will have + indices appended to the end of 'used'. + """ + append_id = max(used) + 1 + free = set(range(1, append_id)) - used + if len(free) >= number: + return set(random.sample(free, number)) + else: + return free | set(range(append_id, append_id + number - len(free))) + + @staticmethod + def _get_adjacent_clusters(used, size): + """Return an index of the first cluster in the sequence of free ones. + + 'used' contains indices of currently allocated clusters. 'size' is the + length of the sequence of free clusters. + If the sequence of 'size' is not available between 'used' clusters, its + first index will be append to the end of 'used'. + """ + def get_cluster_id(lst, length): + """Return the first index of the sequence of the specified length + or None if the sequence cannot be inserted in the list. + """ + if len(lst) != 0: + pairs = [] + pair = (lst[0], 1) + for i in range(1, len(lst)): + if lst[i] == lst[i-1] + 1: + pair = (lst[i], pair[1] + 1) + else: + pairs.append(pair) + pair = (lst[i], 1) + pairs.append(pair) + random.shuffle(pairs) + for x, s in pairs: + if s >= length: + return x - length + 1 + return None + + append_id = max(used) + 1 + free = list(set(range(1, append_id)) - used) + idx = get_cluster_id(free, size) + if idx is None: + return append_id + else: + return idx + + @staticmethod + def _alloc_data(img_size, cluster_size): + """Return a set of random indices of clusters allocated for guest data. + """ + num_of_cls = img_size/cluster_size + return set(random.sample(range(1, num_of_cls + 1), + random.randint(0, num_of_cls))) + + def _get_metadata(self): + """Return indices of clusters allocated for image metadata.""" + ids = set() + for x in self: + ids.add(x.offset/self.cluster_size) + return ids + def create_image(test_img_path, backing_file_name=None, backing_file_fmt=None, fields_to_fuzz=None): """Create a fuzzed image and write it to the specified file.""" - image = Image(backing_file_name, backing_file_fmt) + image = Image(backing_file_name) + image.set_backing_file_format(backing_file_fmt) + image.create_feature_name_table() + image.set_end_of_extension_area() + image.create_l_structures() image.fuzz(fields_to_fuzz) image.write(test_img_path) return image.image_size