From patchwork Wed Oct 31 22:36:09 2012 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Michael Roth X-Patchwork-Id: 196049 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Received: from lists.gnu.org (lists.gnu.org [208.118.235.17]) (using TLSv1 with cipher AES256-SHA (256/256 bits)) (Client did not present a certificate) by ozlabs.org (Postfix) with ESMTPS id 1DC322C0093 for ; Thu, 1 Nov 2012 10:18:42 +1100 (EST) Received: from localhost ([::1]:48523 helo=lists.gnu.org) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1TThYW-00031c-6l for incoming@patchwork.ozlabs.org; Wed, 31 Oct 2012 19:18:40 -0400 Received: from eggs.gnu.org ([208.118.235.92]:34970) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1TTguN-00024C-Qj for qemu-devel@nongnu.org; Wed, 31 Oct 2012 18:37:18 -0400 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1TTguI-0005uC-QF for qemu-devel@nongnu.org; Wed, 31 Oct 2012 18:37:11 -0400 Received: from mail-ia0-f173.google.com ([209.85.210.173]:37027) by eggs.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1TTguI-0005ku-LJ for qemu-devel@nongnu.org; Wed, 31 Oct 2012 18:37:06 -0400 Received: by mail-ia0-f173.google.com with SMTP id m10so1511545iam.4 for ; Wed, 31 Oct 2012 15:37:06 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gmail.com; s=20120113; h=sender:from:to:cc:subject:date:message-id:x-mailer:in-reply-to :references; bh=fE31ORM8eStwDAaZXmEszAFX5PzXvDvXy7yKCawBNO4=; b=Tx2CXOCX8DfTN9Wge0nNf26WpFmBwF2rhspOZjx2F+FInHmWfUFOy9Wd3vvwXyNdz4 rL4CxnCF8slyzH+l/0pL59DKXofH5FaWJl2GbRaw4cjk6nh+/zWS8uJTfNftSQ9HrOBb 2EGSNyuuSX2LVPVVUVjYNJpH9YW14MqgqpZGJoJKQaYT4ODT9B4HmZk6C3QIOVuWqYol 4ffRhGI3lXVWPu9jBMbyLwwrt0RPo+GsNc1Qqo9ecOdl2kPX9WCCp3sLDBciLyX7ORsP BfD7y7/rdNXgShQY9x8yHu5YxuUQo+93PEfxBB1rwxpk+BiApcAIYbvhMkN+6fT6bIJ6 zDCw== Received: by 10.50.152.197 with SMTP id va5mr2823842igb.12.1351723026379; Wed, 31 Oct 2012 15:37:06 -0700 (PDT) Received: from loki.morrigu.org (cpe-72-179-62-111.austin.res.rr.com. [72.179.62.111]) by mx.google.com with ESMTPS id hg2sm11556858igc.3.2012.10.31.15.37.04 (version=TLSv1/SSLv3 cipher=OTHER); Wed, 31 Oct 2012 15:37:05 -0700 (PDT) From: Michael Roth To: qemu-devel@nongnu.org Date: Wed, 31 Oct 2012 17:36:09 -0500 Message-Id: <1351722972-17801-26-git-send-email-mdroth@linux.vnet.ibm.com> X-Mailer: git-send-email 1.7.9.5 In-Reply-To: <1351722972-17801-1-git-send-email-mdroth@linux.vnet.ibm.com> References: <1351722972-17801-1-git-send-email-mdroth@linux.vnet.ibm.com> X-detected-operating-system: by eggs.gnu.org: GNU/Linux 3.x [fuzzy] X-Received-From: 209.85.210.173 Cc: kwolf@redhat.com, peter.maydell@linaro.org, aliguori@us.ibm.com, blauwirbel@gmail.com, pbonzini@redhat.com Subject: [Qemu-devel] [PATCH 25/28] qidl: add C parser (based on QC parser) X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.14 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org Sender: qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org This introduces the QIDL parser to process QIDL annotations in C files. This code is mostly a straight import from qc.git, with some reworking to handle the declaration/annotation format and lexer we're using for QEMU. Reviewed-by: Paolo Bonzini Signed-off-by: Michael Roth --- scripts/qidl_parser.py | 426 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 426 insertions(+) create mode 100644 scripts/qidl_parser.py diff --git a/scripts/qidl_parser.py b/scripts/qidl_parser.py new file mode 100644 index 0000000..dfadb72 --- /dev/null +++ b/scripts/qidl_parser.py @@ -0,0 +1,426 @@ +# +# QEMU IDL Parser +# +# Copyright IBM, Corp. 2012 +# +# Authors: +# Anthony Liguori +# Michael Roth +# +# This work is licensed under the terms of the GNU GPLv2 or later. +# See the COPYING file in the top-level directory. + +import sys, json +from lexer import Input, CLexer + +def process_annotation(node, params, set_use_default_tag=False): + annotation_group = params[0] + if annotation_group == 'serialize': + annotation_type = params[1] + if annotation_type not in ['size_is', 'string'] and set_use_default_tag: + node['use_default_tag'] = False + + if annotation_type == 'derived': + node['is_derived'] = True + elif annotation_type == 'immutable': + node['is_immutable'] = True + elif annotation_type == 'elsewhere': + node['is_elsewhere'] = True + elif annotation_type == 'broken': + node['is_broken'] = True + elif annotation_type == 'size_is': + node['is_array'] = True + expression = params[2] + if expression.isdigit(): + expression = int(expression) + node['array_size'] = expression + elif annotation_type == 'string': + if node['type'] != 'char': + raise Exception("expected char[] for q_string, got %s" + % node['type']) + node['is_string'] = True + elif annotation_type == 'optional': + node['is_optional'] = True + elif annotation_group == 'property': + node['is_property'] = True + if node.has_key('property_fields'): + node['property_fields'].append(params[1:]) + else: + node['property_fields'] = [params[1:]] + + return node + +def parse_annotations(l, node): + while l.check_token('symbol', 'QIDL'): + params = [] + l.pop() + + l.pop_expected('operator', '(') + open_parens = 1 + param = "" + while open_parens: + if l.check_token('operator', ','): + params.append(param) + param = "" + l.pop() + continue + + if l.check_token('operator', '('): + open_parens += 1 + elif l.check_token('operator', ')'): + open_parens -= 1 + + if open_parens > 0: + param += l.peek() + + l.pop() + + if param != "": + params.append(param) + + try: + node = process_annotation(node, params) + except Exception as e: + raise Exception("%s: %s" % (l, str(e))) + + return node + +def parse_modifiers(l, node): + modifiers = ['const', 'volatile'] + while True: + if l.peek_type() in modifiers: + if l.check_token('const', 'const'): + node['is_const'] = True + l.pop() + else: + break + +def parse_type(l): + node = {} + node['use_default_tag'] = True + unsigned_types = ['char', 'short', 'int', 'long'] + type_complete = False + typename = '' + + parse_modifiers(l, node) + + if l.check_token('struct', 'struct') or l.check_token('union', 'union'): + if l.check_token('union', 'union'): + node['is_union'] = True + typename += l.pop() + ' ' + if not l.check_token('operator', '{'): + typename += l.pop_expected('symbol') + type_complete = True + + if l.check_token('union', 'union'): + typename += l.pop() + ' ' + if not l.check_token('operator', '{'): + typename += l.pop_expected('symbol') + type_complete = True + + if l.check_token('unsigned', 'unsigned'): + typename += l.pop() + if filter(lambda t: l.check_token('symbol', t), unsigned_types): + typename += ' ' + else: + typename += ' int' + type_complete = True + + if l.check_token('enum', 'enum'): + typename += l.pop() + ' ' + + # we don't currently handle embedded struct/union declarations, skip them + # for now + if l.check_token('operator', '{'): + open_braces = 1 + while open_braces: + l.pop() + if l.check_token('operator', '{'): + open_braces += 1 + elif l.check_token('operator', '}'): + open_braces -= 1 + l.pop() + typename += "" + node['is_nested_decl'] = True + else: + if l.check_token('operator', '*'): + l.pop() + node['is_pointer'] = True + elif type_complete == False: + typename += l.pop_expected('symbol') + + node['type'] = typename + + node = parse_annotations(l, node) + + if l.check_token('operator', '*'): + l.pop() + node['is_pointer'] = True + + return node + +def parse_var_decl(l, repeating_type=None): + if repeating_type == None: + node = parse_type(l) + else: + node = { 'type': repeating_type } + node['use_default_tag'] = True + + if l.check_token('operator', '('): + node['is_function'] = True + l.pop() + l.pop_expected('operator', '*') + variable = l.pop_expected('symbol') + l.pop_expected('operator', ')') + + # skip the param list since we don't use it currently + l.pop_expected('operator', '(') + open_parens = 1 + while open_parens: + if l.check_token('operator', '('): + open_parens += 1 + elif l.check_token('operator', ')'): + open_parens -= 1 + l.pop() + elif l.check_token('operator', ';'): + if node.has_key('is_union') and node['is_union']: + variable = None + else: + raise Exception("%s: unnamed, non-union struct member" % l) + elif l.check_token('operator', '*'): + node['is_pointer'] = True + l.pop() + while l.check_token('operator', '*'): + if node.has_key('pointer_nesting'): + node['pointer_nesting'] += 1 + else: + node['pointer_nested'] = 1 + l.pop() + variable = l.pop_expected('symbol') + else: + variable = l.pop_expected('symbol') + node['variable'] = variable + + dim_expressions = [] + is_array = False + while l.check_token('operator', '['): + is_array = True + l.pop() + dim_expression = '' + while not l.check_token('operator', ']'): + dim_expression += l.pop() + l.pop_expected('operator', ']') + dim_expressions.append(dim_expression) + + if is_array: + if not node.has_key('is_array'): + node['is_array'] = True + for i in range(0, len(dim_expressions)): + if dim_expressions[i].isdigit(): + dim_expressions[i] = int(dim_expressions[i]) + if len(dim_expressions) == 1: + node['array_size'] = dim_expressions[0] + else: + node['array_size'] = dim_expressions + else: + for i in range(0, len(dim_expressions)): + if dim_expressions[i].isdigit(): + dim_expressions[i] = int(dim_expressions[i]) + if len(dim_expressions) == 1: + node['array_capacity'] = dim_expressions[0] + else: + node['array_capacity'] = dim_expressions + + node = parse_annotations(l, node) + + return node + +def parse_struct(l): + l.pop_expected('struct', 'struct') + + name = None + if l.check_token('symbol'): + name = l.pop() + + l.pop_expected('operator', '{') + + nodes = [] + + while not l.check_token('operator', '}'): + node = parse_var_decl(l) + nodes.append(node) + while l.check_token('operator', ','): + l.pop() + node = parse_var_decl(l, node['type']) + nodes.append(node) + + l.pop_expected('operator', ';') + + l.pop() + + return { 'struct': name, 'fields': nodes } + +def parse_typedef(l): + l.pop_expected('typedef', 'typedef') + + node = parse_struct(l) + typename = l.pop_expected('symbol') + + return { 'typedef': typename, 'type': node } + +def get_default_tag(params): + default_tag = "standard" + try: + index = params.index('serialize') + default_tag = params[index + 1] + except Exception: + pass + finally: + return default_tag + +def parse_params(l): + params = [] + parens = 0 + while not l.eof(): + if l.check_token('operator', '('): + parens += 1 + elif l.check_token('operator', ')'): + parens -= 1 + if parens == 0: + break + elif parens > 0: + if not l.check_token('operator', ','): + params.append(l.peek()) + l.pop() + l.pop_expected('operator', ')') + if parens != 0: + raise Exception("%s: unmatched parenthesis in QIDL macro" % l) + + return params + +def parse_immutable_types(l): + params = parse_params(l) + return list(params) + +def parse_declaration_params(l): + declaration_info = {} + params = None + arg_string = "" + implement = True + is_public = False + if l.check_token('symbol', 'QIDL_START_IMPLEMENTATION'): + l.pop() + elif l.check_token('symbol', 'QIDL_START_PUBLIC'): + implement = False + is_public = True + l.pop() + else: + l.pop_expected('symbol', 'QIDL_START') + + params = parse_params(l) + declaration_info['id'] = params[0] + declaration_info['do_state'] = True + declaration_info['do_properties'] = True + declaration_info['implement'] = implement + declaration_info['public'] = is_public + if "skip_state" in params: + declaration_info['do_state'] = False + if "skip_properties" in params: + declaration_info['do_properties'] = False + declaration_info['default_tag'] = get_default_tag(params) + + return declaration_info + +def immutable_by_default(field): + immutable = False + if field.has_key('is_const') and field['is_const']: + immutable = True + elif field.has_key('is_function') and field['is_function']: + immutable = True + return immutable + +def parse_declaration(l): + declaration_info = parse_declaration_params(l) + + if l.check_token('typedef'): + node = parse_typedef(l) + elif l.check_token('struct'): + node = parse_struct(l) + else: + raise Exception("%s: unsupported QIDL declaration" % l) + + default_tag = declaration_info['default_tag'] + for field in node['fields']: + if field['use_default_tag'] == True: + try: + if immutable_by_default(field): + process_annotation(field, ['serialize', 'immutable'], False) + else: + process_annotation(field, ['serialize', default_tag], False) + except Exception as e: + raise Exception("%s: %s" % (l, str(e))) + + l.pop_expected('operator', ';') + node['id'] = declaration_info['id'] + node['do_state'] = declaration_info['do_state'] + node['do_properties'] = declaration_info['do_properties'] + node['implement'] = declaration_info['implement'] + node['public'] = declaration_info['public'] + + return node + +def find_node(nodes, name): + for node in nodes: + if node['id'] == name: + return node + return None + +# For any fields which have not been explicitly marked otherwise, +# mark them immutable if they're in QIDL_IMMUTABLE_TYPES() +def process_immutable_list(nodes, immutable_types): + for node in nodes: + typename = None + fields = [] + if node.has_key('typedef'): + typename = node['typedef'] + fields = node['type']['fields'] + elif node.has_key('struct'): + typename = node['struct'] + fields = node['fields'] + else: + raise Exception("top-level neither typedef nor struct") + + for field in fields: + if field.has_key('use_default_tag') and field['use_default_tag']: + if field['type'] in immutable_types: + field['is_immutable'] = True + +def parse_file(f): + nodes = [] + filtered_tokens = ['whitespace', 'comment', 'directive'] + immutable_types = [] + l = CLexer(Input(f), filtered_tokens) + while not l.eof(): + line = l.peek_line() + if line.startswith("QIDL_START_IMPLEMENTATION("): + info = parse_declaration_params(l) + node = find_node(nodes, info['id']) + node['implement'] = info['implement'] + node['do_state'] = info['do_state'] + node['do_properties'] = info['do_properties'] + elif line.startswith("QIDL_START"): + node = parse_declaration(l) + nodes.append(node) + elif line.startswith("QIDL_IMMUTABLE_TYPES("): + immutable_types = parse_immutable_types(l) + else: + l.pop_line() + process_immutable_list(nodes, immutable_types) + return nodes + +def main(): + nodes, immutable_types = parse_file(sys.stdin) + print json.dumps(nodes, sort_keys=True, indent=2) + +if __name__ == '__main__': + main()